diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,350101 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 26.07561929595828, - "eval_steps": 5000, - "global_step": 50000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0005215123859191656, - "grad_norm": Infinity, - "learning_rate": 0.0, - "loss": 9.3221, - "step": 1 - }, - { - "epoch": 0.0010430247718383311, - "grad_norm": 18.024211883544922, - "learning_rate": 2.0000000000000002e-07, - "loss": 9.0362, - "step": 2 - }, - { - "epoch": 0.0015645371577574967, - "grad_norm": 18.082225799560547, - "learning_rate": 4.0000000000000003e-07, - "loss": 9.3207, - "step": 3 - }, - { - "epoch": 0.0020860495436766623, - "grad_norm": 17.8996639251709, - "learning_rate": 6.000000000000001e-07, - "loss": 9.3106, - "step": 4 - }, - { - "epoch": 0.002607561929595828, - "grad_norm": 18.508499145507812, - "learning_rate": 8.000000000000001e-07, - "loss": 9.3007, - "step": 5 - }, - { - "epoch": 0.0031290743155149934, - "grad_norm": 19.56254768371582, - "learning_rate": 1.0000000000000002e-06, - "loss": 9.3288, - "step": 6 - }, - { - "epoch": 0.003650586701434159, - "grad_norm": 17.659278869628906, - "learning_rate": 1.2000000000000002e-06, - "loss": 9.2988, - "step": 7 - }, - { - "epoch": 0.0041720990873533245, - "grad_norm": 15.949824333190918, - "learning_rate": 1.4000000000000001e-06, - "loss": 9.3166, - "step": 8 - }, - { - "epoch": 0.00469361147327249, - "grad_norm": 14.140177726745605, - "learning_rate": 1.6000000000000001e-06, - "loss": 9.0064, - "step": 9 - }, - { - "epoch": 0.005215123859191656, - "grad_norm": 13.076690673828125, - "learning_rate": 1.8e-06, - "loss": 9.0732, - "step": 10 - }, - { - "epoch": 0.005736636245110821, - "grad_norm": 10.833678245544434, - "learning_rate": 2.0000000000000003e-06, - "loss": 9.1203, - "step": 11 - }, - { - "epoch": 0.006258148631029987, - "grad_norm": 9.970856666564941, - "learning_rate": 2.2e-06, - "loss": 8.9484, - "step": 12 - }, - { - "epoch": 0.006779661016949152, - "grad_norm": 9.647157669067383, - "learning_rate": 2.4000000000000003e-06, - "loss": 9.1161, - "step": 13 - }, - { - "epoch": 0.007301173402868318, - "grad_norm": 8.991828918457031, - "learning_rate": 2.6e-06, - "loss": 9.0114, - "step": 14 - }, - { - "epoch": 0.007822685788787484, - "grad_norm": 7.713730335235596, - "learning_rate": 2.8000000000000003e-06, - "loss": 8.9205, - "step": 15 - }, - { - "epoch": 0.008344198174706649, - "grad_norm": 7.970811367034912, - "learning_rate": 3e-06, - "loss": 8.9524, - "step": 16 - }, - { - "epoch": 0.008865710560625815, - "grad_norm": 7.163078784942627, - "learning_rate": 3.2000000000000003e-06, - "loss": 8.8868, - "step": 17 - }, - { - "epoch": 0.00938722294654498, - "grad_norm": 5.800992965698242, - "learning_rate": 3.4000000000000005e-06, - "loss": 8.8257, - "step": 18 - }, - { - "epoch": 0.009908735332464147, - "grad_norm": 5.590092658996582, - "learning_rate": 3.6e-06, - "loss": 8.821, - "step": 19 - }, - { - "epoch": 0.010430247718383311, - "grad_norm": 5.476081371307373, - "learning_rate": 3.8e-06, - "loss": 8.7248, - "step": 20 - }, - { - "epoch": 0.010951760104302478, - "grad_norm": 4.819453239440918, - "learning_rate": 4.000000000000001e-06, - "loss": 8.7742, - "step": 21 - }, - { - "epoch": 0.011473272490221642, - "grad_norm": 4.722670078277588, - "learning_rate": 4.2000000000000004e-06, - "loss": 8.7207, - "step": 22 - }, - { - "epoch": 0.011994784876140809, - "grad_norm": 4.683105945587158, - "learning_rate": 4.4e-06, - "loss": 8.6636, - "step": 23 - }, - { - "epoch": 0.012516297262059974, - "grad_norm": 4.900444507598877, - "learning_rate": 4.6e-06, - "loss": 8.6013, - "step": 24 - }, - { - "epoch": 0.01303780964797914, - "grad_norm": 4.241089820861816, - "learning_rate": 4.800000000000001e-06, - "loss": 8.5963, - "step": 25 - }, - { - "epoch": 0.013559322033898305, - "grad_norm": 4.1339874267578125, - "learning_rate": 5e-06, - "loss": 8.627, - "step": 26 - }, - { - "epoch": 0.014080834419817471, - "grad_norm": 4.197762489318848, - "learning_rate": 5.2e-06, - "loss": 8.5626, - "step": 27 - }, - { - "epoch": 0.014602346805736636, - "grad_norm": 4.505918979644775, - "learning_rate": 5.4e-06, - "loss": 8.4761, - "step": 28 - }, - { - "epoch": 0.015123859191655802, - "grad_norm": 4.118350982666016, - "learning_rate": 5.600000000000001e-06, - "loss": 8.4802, - "step": 29 - }, - { - "epoch": 0.01564537157757497, - "grad_norm": 4.4556660652160645, - "learning_rate": 5.8e-06, - "loss": 8.5576, - "step": 30 - }, - { - "epoch": 0.01616688396349413, - "grad_norm": 3.6120073795318604, - "learning_rate": 6e-06, - "loss": 8.4474, - "step": 31 - }, - { - "epoch": 0.016688396349413298, - "grad_norm": 5.583066940307617, - "learning_rate": 6.2e-06, - "loss": 7.4272, - "step": 32 - }, - { - "epoch": 0.017209908735332465, - "grad_norm": 5.695413589477539, - "learning_rate": 6.4000000000000006e-06, - "loss": 8.3017, - "step": 33 - }, - { - "epoch": 0.01773142112125163, - "grad_norm": 4.050053119659424, - "learning_rate": 6.6e-06, - "loss": 8.4141, - "step": 34 - }, - { - "epoch": 0.018252933507170794, - "grad_norm": 3.966257333755493, - "learning_rate": 6.800000000000001e-06, - "loss": 8.4491, - "step": 35 - }, - { - "epoch": 0.01877444589308996, - "grad_norm": 3.792633056640625, - "learning_rate": 7.000000000000001e-06, - "loss": 8.3827, - "step": 36 - }, - { - "epoch": 0.019295958279009127, - "grad_norm": 4.522724628448486, - "learning_rate": 7.2e-06, - "loss": 8.3003, - "step": 37 - }, - { - "epoch": 0.019817470664928293, - "grad_norm": 3.689426898956299, - "learning_rate": 7.4e-06, - "loss": 8.3769, - "step": 38 - }, - { - "epoch": 0.020338983050847456, - "grad_norm": 3.616961717605591, - "learning_rate": 7.6e-06, - "loss": 8.3962, - "step": 39 - }, - { - "epoch": 0.020860495436766623, - "grad_norm": 3.489570379257202, - "learning_rate": 7.8e-06, - "loss": 8.3578, - "step": 40 - }, - { - "epoch": 0.02138200782268579, - "grad_norm": 3.925410509109497, - "learning_rate": 8.000000000000001e-06, - "loss": 8.3282, - "step": 41 - }, - { - "epoch": 0.021903520208604955, - "grad_norm": 3.7634716033935547, - "learning_rate": 8.200000000000001e-06, - "loss": 8.2869, - "step": 42 - }, - { - "epoch": 0.02242503259452412, - "grad_norm": 3.7333362102508545, - "learning_rate": 8.400000000000001e-06, - "loss": 8.2405, - "step": 43 - }, - { - "epoch": 0.022946544980443285, - "grad_norm": 3.0992345809936523, - "learning_rate": 8.599999999999999e-06, - "loss": 8.2989, - "step": 44 - }, - { - "epoch": 0.02346805736636245, - "grad_norm": 6.670464515686035, - "learning_rate": 8.8e-06, - "loss": 8.003, - "step": 45 - }, - { - "epoch": 0.023989569752281618, - "grad_norm": 5.897899627685547, - "learning_rate": 9e-06, - "loss": 7.8498, - "step": 46 - }, - { - "epoch": 0.02451108213820078, - "grad_norm": 4.171645641326904, - "learning_rate": 9.2e-06, - "loss": 8.2926, - "step": 47 - }, - { - "epoch": 0.025032594524119947, - "grad_norm": 4.116384029388428, - "learning_rate": 9.4e-06, - "loss": 8.2173, - "step": 48 - }, - { - "epoch": 0.025554106910039114, - "grad_norm": 4.061080455780029, - "learning_rate": 9.600000000000001e-06, - "loss": 8.1813, - "step": 49 - }, - { - "epoch": 0.02607561929595828, - "grad_norm": 3.8002374172210693, - "learning_rate": 9.800000000000001e-06, - "loss": 8.162, - "step": 50 - }, - { - "epoch": 0.026597131681877443, - "grad_norm": 3.3633811473846436, - "learning_rate": 1e-05, - "loss": 8.1976, - "step": 51 - }, - { - "epoch": 0.02711864406779661, - "grad_norm": 3.8181605339050293, - "learning_rate": 1.02e-05, - "loss": 8.1911, - "step": 52 - }, - { - "epoch": 0.027640156453715776, - "grad_norm": 3.7845466136932373, - "learning_rate": 1.04e-05, - "loss": 8.0857, - "step": 53 - }, - { - "epoch": 0.028161668839634942, - "grad_norm": 3.9014339447021484, - "learning_rate": 1.06e-05, - "loss": 8.0618, - "step": 54 - }, - { - "epoch": 0.028683181225554105, - "grad_norm": 4.385609149932861, - "learning_rate": 1.08e-05, - "loss": 8.0047, - "step": 55 - }, - { - "epoch": 0.02920469361147327, - "grad_norm": 4.386277198791504, - "learning_rate": 1.1000000000000001e-05, - "loss": 7.9167, - "step": 56 - }, - { - "epoch": 0.029726205997392438, - "grad_norm": 5.018571853637695, - "learning_rate": 1.1200000000000001e-05, - "loss": 7.8501, - "step": 57 - }, - { - "epoch": 0.030247718383311605, - "grad_norm": 4.002060413360596, - "learning_rate": 1.1400000000000001e-05, - "loss": 8.0016, - "step": 58 - }, - { - "epoch": 0.03076923076923077, - "grad_norm": 3.611271381378174, - "learning_rate": 1.16e-05, - "loss": 8.0786, - "step": 59 - }, - { - "epoch": 0.03129074315514994, - "grad_norm": 4.341294288635254, - "learning_rate": 1.18e-05, - "loss": 7.9634, - "step": 60 - }, - { - "epoch": 0.031812255541069104, - "grad_norm": 4.467768669128418, - "learning_rate": 1.2e-05, - "loss": 7.9299, - "step": 61 - }, - { - "epoch": 0.03233376792698826, - "grad_norm": 3.67276668548584, - "learning_rate": 1.22e-05, - "loss": 7.969, - "step": 62 - }, - { - "epoch": 0.03285528031290743, - "grad_norm": 4.076155662536621, - "learning_rate": 1.24e-05, - "loss": 7.9558, - "step": 63 - }, - { - "epoch": 0.033376792698826596, - "grad_norm": 4.484344005584717, - "learning_rate": 1.2600000000000001e-05, - "loss": 7.9411, - "step": 64 - }, - { - "epoch": 0.03389830508474576, - "grad_norm": 4.032654285430908, - "learning_rate": 1.2800000000000001e-05, - "loss": 7.892, - "step": 65 - }, - { - "epoch": 0.03441981747066493, - "grad_norm": 4.794155120849609, - "learning_rate": 1.3000000000000001e-05, - "loss": 7.6581, - "step": 66 - }, - { - "epoch": 0.034941329856584095, - "grad_norm": 3.885824680328369, - "learning_rate": 1.32e-05, - "loss": 7.8953, - "step": 67 - }, - { - "epoch": 0.03546284224250326, - "grad_norm": 4.162301063537598, - "learning_rate": 1.3400000000000002e-05, - "loss": 7.797, - "step": 68 - }, - { - "epoch": 0.03598435462842243, - "grad_norm": 4.53833532333374, - "learning_rate": 1.3600000000000002e-05, - "loss": 7.8279, - "step": 69 - }, - { - "epoch": 0.03650586701434159, - "grad_norm": 3.926084041595459, - "learning_rate": 1.3800000000000002e-05, - "loss": 7.9131, - "step": 70 - }, - { - "epoch": 0.037027379400260754, - "grad_norm": 3.81913161277771, - "learning_rate": 1.4000000000000001e-05, - "loss": 7.8192, - "step": 71 - }, - { - "epoch": 0.03754889178617992, - "grad_norm": 3.9308505058288574, - "learning_rate": 1.42e-05, - "loss": 7.8109, - "step": 72 - }, - { - "epoch": 0.03807040417209909, - "grad_norm": 4.004209041595459, - "learning_rate": 1.44e-05, - "loss": 7.7685, - "step": 73 - }, - { - "epoch": 0.038591916558018254, - "grad_norm": 5.752688884735107, - "learning_rate": 1.4599999999999999e-05, - "loss": 7.0768, - "step": 74 - }, - { - "epoch": 0.03911342894393742, - "grad_norm": 5.2515788078308105, - "learning_rate": 1.48e-05, - "loss": 7.4493, - "step": 75 - }, - { - "epoch": 0.039634941329856586, - "grad_norm": 3.6931424140930176, - "learning_rate": 1.5e-05, - "loss": 7.7851, - "step": 76 - }, - { - "epoch": 0.04015645371577575, - "grad_norm": 4.673013687133789, - "learning_rate": 1.52e-05, - "loss": 7.2595, - "step": 77 - }, - { - "epoch": 0.04067796610169491, - "grad_norm": 3.719449043273926, - "learning_rate": 1.54e-05, - "loss": 7.7229, - "step": 78 - }, - { - "epoch": 0.04119947848761408, - "grad_norm": 4.387474060058594, - "learning_rate": 1.56e-05, - "loss": 7.5895, - "step": 79 - }, - { - "epoch": 0.041720990873533245, - "grad_norm": 3.777567148208618, - "learning_rate": 1.58e-05, - "loss": 7.6765, - "step": 80 - }, - { - "epoch": 0.04224250325945241, - "grad_norm": 4.529924392700195, - "learning_rate": 1.6000000000000003e-05, - "loss": 7.4633, - "step": 81 - }, - { - "epoch": 0.04276401564537158, - "grad_norm": 4.4846272468566895, - "learning_rate": 1.62e-05, - "loss": 7.596, - "step": 82 - }, - { - "epoch": 0.043285528031290744, - "grad_norm": 4.144165992736816, - "learning_rate": 1.6400000000000002e-05, - "loss": 7.5337, - "step": 83 - }, - { - "epoch": 0.04380704041720991, - "grad_norm": 4.42596435546875, - "learning_rate": 1.66e-05, - "loss": 7.4749, - "step": 84 - }, - { - "epoch": 0.04432855280312908, - "grad_norm": 4.529561519622803, - "learning_rate": 1.6800000000000002e-05, - "loss": 7.7078, - "step": 85 - }, - { - "epoch": 0.04485006518904824, - "grad_norm": 4.734699249267578, - "learning_rate": 1.7000000000000003e-05, - "loss": 7.4827, - "step": 86 - }, - { - "epoch": 0.0453715775749674, - "grad_norm": 3.5461740493774414, - "learning_rate": 1.7199999999999998e-05, - "loss": 7.6987, - "step": 87 - }, - { - "epoch": 0.04589308996088657, - "grad_norm": 4.5075812339782715, - "learning_rate": 1.74e-05, - "loss": 7.6847, - "step": 88 - }, - { - "epoch": 0.046414602346805736, - "grad_norm": 3.5770719051361084, - "learning_rate": 1.76e-05, - "loss": 7.6693, - "step": 89 - }, - { - "epoch": 0.0469361147327249, - "grad_norm": 3.6447737216949463, - "learning_rate": 1.78e-05, - "loss": 7.6698, - "step": 90 - }, - { - "epoch": 0.04745762711864407, - "grad_norm": 4.477330684661865, - "learning_rate": 1.8e-05, - "loss": 7.2415, - "step": 91 - }, - { - "epoch": 0.047979139504563235, - "grad_norm": 4.59464693069458, - "learning_rate": 1.8200000000000002e-05, - "loss": 7.5612, - "step": 92 - }, - { - "epoch": 0.0485006518904824, - "grad_norm": 4.561006546020508, - "learning_rate": 1.84e-05, - "loss": 7.3928, - "step": 93 - }, - { - "epoch": 0.04902216427640156, - "grad_norm": 3.7959651947021484, - "learning_rate": 1.86e-05, - "loss": 7.6621, - "step": 94 - }, - { - "epoch": 0.04954367666232073, - "grad_norm": 3.2018020153045654, - "learning_rate": 1.88e-05, - "loss": 7.461, - "step": 95 - }, - { - "epoch": 0.050065189048239894, - "grad_norm": 3.7467145919799805, - "learning_rate": 1.9e-05, - "loss": 7.5199, - "step": 96 - }, - { - "epoch": 0.05058670143415906, - "grad_norm": 3.6669843196868896, - "learning_rate": 1.9200000000000003e-05, - "loss": 7.4672, - "step": 97 - }, - { - "epoch": 0.05110821382007823, - "grad_norm": 3.8501787185668945, - "learning_rate": 1.94e-05, - "loss": 7.6553, - "step": 98 - }, - { - "epoch": 0.051629726205997394, - "grad_norm": 3.7197256088256836, - "learning_rate": 1.9600000000000002e-05, - "loss": 7.503, - "step": 99 - }, - { - "epoch": 0.05215123859191656, - "grad_norm": 3.496748924255371, - "learning_rate": 1.9800000000000004e-05, - "loss": 7.5369, - "step": 100 - }, - { - "epoch": 0.052672750977835726, - "grad_norm": 3.6881065368652344, - "learning_rate": 2e-05, - "loss": 7.6187, - "step": 101 - }, - { - "epoch": 0.053194263363754886, - "grad_norm": 3.723168134689331, - "learning_rate": 2.0200000000000003e-05, - "loss": 7.6025, - "step": 102 - }, - { - "epoch": 0.05371577574967405, - "grad_norm": 3.4807724952697754, - "learning_rate": 2.04e-05, - "loss": 7.5207, - "step": 103 - }, - { - "epoch": 0.05423728813559322, - "grad_norm": 5.443449974060059, - "learning_rate": 2.06e-05, - "loss": 7.6484, - "step": 104 - }, - { - "epoch": 0.054758800521512385, - "grad_norm": 4.682538986206055, - "learning_rate": 2.08e-05, - "loss": 7.466, - "step": 105 - }, - { - "epoch": 0.05528031290743155, - "grad_norm": 3.433757781982422, - "learning_rate": 2.1e-05, - "loss": 7.6806, - "step": 106 - }, - { - "epoch": 0.05580182529335072, - "grad_norm": 3.9352850914001465, - "learning_rate": 2.12e-05, - "loss": 7.4971, - "step": 107 - }, - { - "epoch": 0.056323337679269884, - "grad_norm": 4.322215557098389, - "learning_rate": 2.1400000000000002e-05, - "loss": 7.3326, - "step": 108 - }, - { - "epoch": 0.05684485006518905, - "grad_norm": 3.155344247817993, - "learning_rate": 2.16e-05, - "loss": 7.6482, - "step": 109 - }, - { - "epoch": 0.05736636245110821, - "grad_norm": 3.5959250926971436, - "learning_rate": 2.18e-05, - "loss": 7.4452, - "step": 110 - }, - { - "epoch": 0.05788787483702738, - "grad_norm": 3.504971742630005, - "learning_rate": 2.2000000000000003e-05, - "loss": 7.5226, - "step": 111 - }, - { - "epoch": 0.05840938722294654, - "grad_norm": 4.071589946746826, - "learning_rate": 2.22e-05, - "loss": 7.5775, - "step": 112 - }, - { - "epoch": 0.05893089960886571, - "grad_norm": 4.240148544311523, - "learning_rate": 2.2400000000000002e-05, - "loss": 7.3025, - "step": 113 - }, - { - "epoch": 0.059452411994784876, - "grad_norm": 2.76248836517334, - "learning_rate": 2.26e-05, - "loss": 7.6323, - "step": 114 - }, - { - "epoch": 0.05997392438070404, - "grad_norm": 2.944532632827759, - "learning_rate": 2.2800000000000002e-05, - "loss": 7.5755, - "step": 115 - }, - { - "epoch": 0.06049543676662321, - "grad_norm": 3.8600049018859863, - "learning_rate": 2.3000000000000003e-05, - "loss": 7.5862, - "step": 116 - }, - { - "epoch": 0.061016949152542375, - "grad_norm": 3.798712730407715, - "learning_rate": 2.32e-05, - "loss": 7.2015, - "step": 117 - }, - { - "epoch": 0.06153846153846154, - "grad_norm": 4.453895092010498, - "learning_rate": 2.3400000000000003e-05, - "loss": 7.4686, - "step": 118 - }, - { - "epoch": 0.0620599739243807, - "grad_norm": 3.261066198348999, - "learning_rate": 2.36e-05, - "loss": 7.4823, - "step": 119 - }, - { - "epoch": 0.06258148631029987, - "grad_norm": 4.026912689208984, - "learning_rate": 2.38e-05, - "loss": 6.8815, - "step": 120 - }, - { - "epoch": 0.06310299869621903, - "grad_norm": 5.84824800491333, - "learning_rate": 2.4e-05, - "loss": 6.57, - "step": 121 - }, - { - "epoch": 0.06362451108213821, - "grad_norm": 3.3494112491607666, - "learning_rate": 2.4200000000000002e-05, - "loss": 7.5444, - "step": 122 - }, - { - "epoch": 0.06414602346805737, - "grad_norm": 3.446805477142334, - "learning_rate": 2.44e-05, - "loss": 7.4487, - "step": 123 - }, - { - "epoch": 0.06466753585397653, - "grad_norm": 3.2302680015563965, - "learning_rate": 2.46e-05, - "loss": 7.5182, - "step": 124 - }, - { - "epoch": 0.0651890482398957, - "grad_norm": 3.4136993885040283, - "learning_rate": 2.48e-05, - "loss": 7.4691, - "step": 125 - }, - { - "epoch": 0.06571056062581486, - "grad_norm": 3.1575052738189697, - "learning_rate": 2.5e-05, - "loss": 7.587, - "step": 126 - }, - { - "epoch": 0.06623207301173403, - "grad_norm": 3.0641024112701416, - "learning_rate": 2.5200000000000003e-05, - "loss": 7.4983, - "step": 127 - }, - { - "epoch": 0.06675358539765319, - "grad_norm": 3.3988382816314697, - "learning_rate": 2.54e-05, - "loss": 7.4704, - "step": 128 - }, - { - "epoch": 0.06727509778357237, - "grad_norm": 3.5170507431030273, - "learning_rate": 2.5600000000000002e-05, - "loss": 7.4556, - "step": 129 - }, - { - "epoch": 0.06779661016949153, - "grad_norm": 3.233266830444336, - "learning_rate": 2.58e-05, - "loss": 7.5896, - "step": 130 - }, - { - "epoch": 0.06831812255541068, - "grad_norm": 5.319973468780518, - "learning_rate": 2.6000000000000002e-05, - "loss": 7.062, - "step": 131 - }, - { - "epoch": 0.06883963494132986, - "grad_norm": 4.150874137878418, - "learning_rate": 2.6200000000000003e-05, - "loss": 7.3406, - "step": 132 - }, - { - "epoch": 0.06936114732724902, - "grad_norm": 3.942434787750244, - "learning_rate": 2.64e-05, - "loss": 7.3488, - "step": 133 - }, - { - "epoch": 0.06988265971316819, - "grad_norm": 6.038638114929199, - "learning_rate": 2.6600000000000003e-05, - "loss": 7.2262, - "step": 134 - }, - { - "epoch": 0.07040417209908735, - "grad_norm": 4.91567325592041, - "learning_rate": 2.6800000000000004e-05, - "loss": 7.3306, - "step": 135 - }, - { - "epoch": 0.07092568448500652, - "grad_norm": 3.807650327682495, - "learning_rate": 2.7000000000000002e-05, - "loss": 7.233, - "step": 136 - }, - { - "epoch": 0.07144719687092568, - "grad_norm": 3.7115111351013184, - "learning_rate": 2.7200000000000004e-05, - "loss": 7.4093, - "step": 137 - }, - { - "epoch": 0.07196870925684486, - "grad_norm": 4.007230758666992, - "learning_rate": 2.7400000000000002e-05, - "loss": 7.3865, - "step": 138 - }, - { - "epoch": 0.07249022164276402, - "grad_norm": 4.70639705657959, - "learning_rate": 2.7600000000000003e-05, - "loss": 7.1557, - "step": 139 - }, - { - "epoch": 0.07301173402868318, - "grad_norm": 4.678992748260498, - "learning_rate": 2.7800000000000005e-05, - "loss": 7.5268, - "step": 140 - }, - { - "epoch": 0.07353324641460235, - "grad_norm": 5.117120742797852, - "learning_rate": 2.8000000000000003e-05, - "loss": 7.3199, - "step": 141 - }, - { - "epoch": 0.07405475880052151, - "grad_norm": 4.096368312835693, - "learning_rate": 2.8199999999999998e-05, - "loss": 7.322, - "step": 142 - }, - { - "epoch": 0.07457627118644068, - "grad_norm": 3.6049387454986572, - "learning_rate": 2.84e-05, - "loss": 7.4737, - "step": 143 - }, - { - "epoch": 0.07509778357235984, - "grad_norm": 3.5590572357177734, - "learning_rate": 2.86e-05, - "loss": 7.5124, - "step": 144 - }, - { - "epoch": 0.07561929595827901, - "grad_norm": 3.052095890045166, - "learning_rate": 2.88e-05, - "loss": 7.4555, - "step": 145 - }, - { - "epoch": 0.07614080834419817, - "grad_norm": 3.6913254261016846, - "learning_rate": 2.9e-05, - "loss": 7.5133, - "step": 146 - }, - { - "epoch": 0.07666232073011733, - "grad_norm": 2.9980180263519287, - "learning_rate": 2.9199999999999998e-05, - "loss": 7.5124, - "step": 147 - }, - { - "epoch": 0.07718383311603651, - "grad_norm": 3.3327064514160156, - "learning_rate": 2.94e-05, - "loss": 7.3965, - "step": 148 - }, - { - "epoch": 0.07770534550195567, - "grad_norm": 3.835602045059204, - "learning_rate": 2.96e-05, - "loss": 7.3371, - "step": 149 - }, - { - "epoch": 0.07822685788787484, - "grad_norm": 3.212507486343384, - "learning_rate": 2.98e-05, - "loss": 7.3495, - "step": 150 - }, - { - "epoch": 0.078748370273794, - "grad_norm": 3.987884759902954, - "learning_rate": 3e-05, - "loss": 6.7947, - "step": 151 - }, - { - "epoch": 0.07926988265971317, - "grad_norm": 3.093217611312866, - "learning_rate": 3.02e-05, - "loss": 7.2575, - "step": 152 - }, - { - "epoch": 0.07979139504563233, - "grad_norm": 3.0359363555908203, - "learning_rate": 3.04e-05, - "loss": 7.5516, - "step": 153 - }, - { - "epoch": 0.0803129074315515, - "grad_norm": 3.396735906600952, - "learning_rate": 3.06e-05, - "loss": 7.2979, - "step": 154 - }, - { - "epoch": 0.08083441981747067, - "grad_norm": 3.282459259033203, - "learning_rate": 3.08e-05, - "loss": 7.2913, - "step": 155 - }, - { - "epoch": 0.08135593220338982, - "grad_norm": 3.2505970001220703, - "learning_rate": 3.1e-05, - "loss": 7.6147, - "step": 156 - }, - { - "epoch": 0.081877444589309, - "grad_norm": 3.0416345596313477, - "learning_rate": 3.12e-05, - "loss": 7.5666, - "step": 157 - }, - { - "epoch": 0.08239895697522816, - "grad_norm": 3.6219711303710938, - "learning_rate": 3.1400000000000004e-05, - "loss": 7.4667, - "step": 158 - }, - { - "epoch": 0.08292046936114733, - "grad_norm": 2.634970188140869, - "learning_rate": 3.16e-05, - "loss": 7.5679, - "step": 159 - }, - { - "epoch": 0.08344198174706649, - "grad_norm": 3.3388631343841553, - "learning_rate": 3.18e-05, - "loss": 7.5941, - "step": 160 - }, - { - "epoch": 0.08396349413298566, - "grad_norm": 3.418192148208618, - "learning_rate": 3.2000000000000005e-05, - "loss": 7.3521, - "step": 161 - }, - { - "epoch": 0.08448500651890482, - "grad_norm": 4.8227105140686035, - "learning_rate": 3.2200000000000003e-05, - "loss": 6.8505, - "step": 162 - }, - { - "epoch": 0.08500651890482398, - "grad_norm": 3.985224723815918, - "learning_rate": 3.24e-05, - "loss": 7.463, - "step": 163 - }, - { - "epoch": 0.08552803129074316, - "grad_norm": 3.390418767929077, - "learning_rate": 3.26e-05, - "loss": 7.3933, - "step": 164 - }, - { - "epoch": 0.08604954367666232, - "grad_norm": 3.4502861499786377, - "learning_rate": 3.2800000000000004e-05, - "loss": 7.2525, - "step": 165 - }, - { - "epoch": 0.08657105606258149, - "grad_norm": 3.928596258163452, - "learning_rate": 3.3e-05, - "loss": 7.4648, - "step": 166 - }, - { - "epoch": 0.08709256844850065, - "grad_norm": 2.900792121887207, - "learning_rate": 3.32e-05, - "loss": 7.4054, - "step": 167 - }, - { - "epoch": 0.08761408083441982, - "grad_norm": 3.5969791412353516, - "learning_rate": 3.3400000000000005e-05, - "loss": 7.1412, - "step": 168 - }, - { - "epoch": 0.08813559322033898, - "grad_norm": 2.8989689350128174, - "learning_rate": 3.3600000000000004e-05, - "loss": 7.4241, - "step": 169 - }, - { - "epoch": 0.08865710560625815, - "grad_norm": 2.927574872970581, - "learning_rate": 3.38e-05, - "loss": 7.3529, - "step": 170 - }, - { - "epoch": 0.08917861799217731, - "grad_norm": 3.080883264541626, - "learning_rate": 3.4000000000000007e-05, - "loss": 7.3821, - "step": 171 - }, - { - "epoch": 0.08970013037809647, - "grad_norm": 3.5870022773742676, - "learning_rate": 3.4200000000000005e-05, - "loss": 7.0631, - "step": 172 - }, - { - "epoch": 0.09022164276401565, - "grad_norm": 3.4165422916412354, - "learning_rate": 3.4399999999999996e-05, - "loss": 7.4823, - "step": 173 - }, - { - "epoch": 0.0907431551499348, - "grad_norm": 3.157397985458374, - "learning_rate": 3.46e-05, - "loss": 7.3439, - "step": 174 - }, - { - "epoch": 0.09126466753585398, - "grad_norm": 3.3898069858551025, - "learning_rate": 3.48e-05, - "loss": 7.2797, - "step": 175 - }, - { - "epoch": 0.09178617992177314, - "grad_norm": 3.834183931350708, - "learning_rate": 3.5e-05, - "loss": 7.2583, - "step": 176 - }, - { - "epoch": 0.09230769230769231, - "grad_norm": 2.7050435543060303, - "learning_rate": 3.52e-05, - "loss": 7.5331, - "step": 177 - }, - { - "epoch": 0.09282920469361147, - "grad_norm": 2.892559766769409, - "learning_rate": 3.54e-05, - "loss": 7.5713, - "step": 178 - }, - { - "epoch": 0.09335071707953065, - "grad_norm": 3.3834376335144043, - "learning_rate": 3.56e-05, - "loss": 7.3731, - "step": 179 - }, - { - "epoch": 0.0938722294654498, - "grad_norm": 2.4844014644622803, - "learning_rate": 3.58e-05, - "loss": 7.576, - "step": 180 - }, - { - "epoch": 0.09439374185136896, - "grad_norm": 3.313626766204834, - "learning_rate": 3.6e-05, - "loss": 7.4822, - "step": 181 - }, - { - "epoch": 0.09491525423728814, - "grad_norm": 4.537646770477295, - "learning_rate": 3.62e-05, - "loss": 7.2434, - "step": 182 - }, - { - "epoch": 0.0954367666232073, - "grad_norm": 3.511963367462158, - "learning_rate": 3.6400000000000004e-05, - "loss": 7.4671, - "step": 183 - }, - { - "epoch": 0.09595827900912647, - "grad_norm": 3.1389901638031006, - "learning_rate": 3.66e-05, - "loss": 7.2465, - "step": 184 - }, - { - "epoch": 0.09647979139504563, - "grad_norm": 3.084373712539673, - "learning_rate": 3.68e-05, - "loss": 7.4383, - "step": 185 - }, - { - "epoch": 0.0970013037809648, - "grad_norm": 3.6729354858398438, - "learning_rate": 3.7e-05, - "loss": 7.1553, - "step": 186 - }, - { - "epoch": 0.09752281616688396, - "grad_norm": 3.3839831352233887, - "learning_rate": 3.72e-05, - "loss": 7.3268, - "step": 187 - }, - { - "epoch": 0.09804432855280312, - "grad_norm": 4.4720892906188965, - "learning_rate": 3.74e-05, - "loss": 7.4081, - "step": 188 - }, - { - "epoch": 0.0985658409387223, - "grad_norm": 3.6028249263763428, - "learning_rate": 3.76e-05, - "loss": 7.0773, - "step": 189 - }, - { - "epoch": 0.09908735332464146, - "grad_norm": 3.095641613006592, - "learning_rate": 3.7800000000000004e-05, - "loss": 7.5315, - "step": 190 - }, - { - "epoch": 0.09960886571056063, - "grad_norm": 2.9480347633361816, - "learning_rate": 3.8e-05, - "loss": 7.4514, - "step": 191 - }, - { - "epoch": 0.10013037809647979, - "grad_norm": 2.6473734378814697, - "learning_rate": 3.82e-05, - "loss": 7.5321, - "step": 192 - }, - { - "epoch": 0.10065189048239896, - "grad_norm": 4.760178089141846, - "learning_rate": 3.8400000000000005e-05, - "loss": 7.3236, - "step": 193 - }, - { - "epoch": 0.10117340286831812, - "grad_norm": 2.964203357696533, - "learning_rate": 3.86e-05, - "loss": 7.5497, - "step": 194 - }, - { - "epoch": 0.1016949152542373, - "grad_norm": 3.2622687816619873, - "learning_rate": 3.88e-05, - "loss": 7.2481, - "step": 195 - }, - { - "epoch": 0.10221642764015645, - "grad_norm": 3.5849318504333496, - "learning_rate": 3.9000000000000006e-05, - "loss": 6.9631, - "step": 196 - }, - { - "epoch": 0.10273794002607561, - "grad_norm": 2.8486852645874023, - "learning_rate": 3.9200000000000004e-05, - "loss": 7.3895, - "step": 197 - }, - { - "epoch": 0.10325945241199479, - "grad_norm": 4.567874431610107, - "learning_rate": 3.94e-05, - "loss": 7.0744, - "step": 198 - }, - { - "epoch": 0.10378096479791395, - "grad_norm": 3.6248340606689453, - "learning_rate": 3.960000000000001e-05, - "loss": 7.0411, - "step": 199 - }, - { - "epoch": 0.10430247718383312, - "grad_norm": 2.7067694664001465, - "learning_rate": 3.9800000000000005e-05, - "loss": 7.3924, - "step": 200 - }, - { - "epoch": 0.10482398956975228, - "grad_norm": 3.14599871635437, - "learning_rate": 4e-05, - "loss": 7.3167, - "step": 201 - }, - { - "epoch": 0.10534550195567145, - "grad_norm": 2.6617326736450195, - "learning_rate": 4.02e-05, - "loss": 7.5365, - "step": 202 - }, - { - "epoch": 0.10586701434159061, - "grad_norm": 4.169859886169434, - "learning_rate": 4.0400000000000006e-05, - "loss": 7.0596, - "step": 203 - }, - { - "epoch": 0.10638852672750977, - "grad_norm": 2.913005828857422, - "learning_rate": 4.0600000000000004e-05, - "loss": 7.475, - "step": 204 - }, - { - "epoch": 0.10691003911342895, - "grad_norm": 3.477138042449951, - "learning_rate": 4.08e-05, - "loss": 7.2434, - "step": 205 - }, - { - "epoch": 0.1074315514993481, - "grad_norm": 2.9127392768859863, - "learning_rate": 4.1e-05, - "loss": 7.4914, - "step": 206 - }, - { - "epoch": 0.10795306388526728, - "grad_norm": 3.4160966873168945, - "learning_rate": 4.12e-05, - "loss": 7.2338, - "step": 207 - }, - { - "epoch": 0.10847457627118644, - "grad_norm": 3.0882041454315186, - "learning_rate": 4.14e-05, - "loss": 7.2559, - "step": 208 - }, - { - "epoch": 0.10899608865710561, - "grad_norm": 3.879185438156128, - "learning_rate": 4.16e-05, - "loss": 7.0722, - "step": 209 - }, - { - "epoch": 0.10951760104302477, - "grad_norm": 2.9452905654907227, - "learning_rate": 4.18e-05, - "loss": 7.4291, - "step": 210 - }, - { - "epoch": 0.11003911342894394, - "grad_norm": 4.876804351806641, - "learning_rate": 4.2e-05, - "loss": 7.4369, - "step": 211 - }, - { - "epoch": 0.1105606258148631, - "grad_norm": 2.561800956726074, - "learning_rate": 4.22e-05, - "loss": 7.4774, - "step": 212 - }, - { - "epoch": 0.11108213820078226, - "grad_norm": 4.161262035369873, - "learning_rate": 4.24e-05, - "loss": 6.5584, - "step": 213 - }, - { - "epoch": 0.11160365058670144, - "grad_norm": 3.3121731281280518, - "learning_rate": 4.26e-05, - "loss": 7.2167, - "step": 214 - }, - { - "epoch": 0.1121251629726206, - "grad_norm": 3.4215073585510254, - "learning_rate": 4.2800000000000004e-05, - "loss": 7.0574, - "step": 215 - }, - { - "epoch": 0.11264667535853977, - "grad_norm": 2.8985512256622314, - "learning_rate": 4.3e-05, - "loss": 7.3702, - "step": 216 - }, - { - "epoch": 0.11316818774445893, - "grad_norm": 3.5382654666900635, - "learning_rate": 4.32e-05, - "loss": 7.128, - "step": 217 - }, - { - "epoch": 0.1136897001303781, - "grad_norm": 2.850515127182007, - "learning_rate": 4.3400000000000005e-05, - "loss": 7.4392, - "step": 218 - }, - { - "epoch": 0.11421121251629726, - "grad_norm": 3.423903703689575, - "learning_rate": 4.36e-05, - "loss": 7.3433, - "step": 219 - }, - { - "epoch": 0.11473272490221642, - "grad_norm": 5.7699360847473145, - "learning_rate": 4.38e-05, - "loss": 7.2612, - "step": 220 - }, - { - "epoch": 0.1152542372881356, - "grad_norm": 4.314525127410889, - "learning_rate": 4.4000000000000006e-05, - "loss": 7.3813, - "step": 221 - }, - { - "epoch": 0.11577574967405475, - "grad_norm": 4.324763774871826, - "learning_rate": 4.4200000000000004e-05, - "loss": 6.7292, - "step": 222 - }, - { - "epoch": 0.11629726205997393, - "grad_norm": 7.078699111938477, - "learning_rate": 4.44e-05, - "loss": 7.0438, - "step": 223 - }, - { - "epoch": 0.11681877444589309, - "grad_norm": 5.300456523895264, - "learning_rate": 4.46e-05, - "loss": 7.1135, - "step": 224 - }, - { - "epoch": 0.11734028683181226, - "grad_norm": 3.453906536102295, - "learning_rate": 4.4800000000000005e-05, - "loss": 7.3285, - "step": 225 - }, - { - "epoch": 0.11786179921773142, - "grad_norm": 5.6908183097839355, - "learning_rate": 4.5e-05, - "loss": 7.0077, - "step": 226 - }, - { - "epoch": 0.11838331160365059, - "grad_norm": 3.9178898334503174, - "learning_rate": 4.52e-05, - "loss": 7.3279, - "step": 227 - }, - { - "epoch": 0.11890482398956975, - "grad_norm": 3.508704900741577, - "learning_rate": 4.5400000000000006e-05, - "loss": 7.144, - "step": 228 - }, - { - "epoch": 0.11942633637548891, - "grad_norm": 4.181432247161865, - "learning_rate": 4.5600000000000004e-05, - "loss": 7.241, - "step": 229 - }, - { - "epoch": 0.11994784876140809, - "grad_norm": 2.5863866806030273, - "learning_rate": 4.58e-05, - "loss": 7.4811, - "step": 230 - }, - { - "epoch": 0.12046936114732724, - "grad_norm": 8.508010864257812, - "learning_rate": 4.600000000000001e-05, - "loss": 6.8705, - "step": 231 - }, - { - "epoch": 0.12099087353324642, - "grad_norm": 3.1672141551971436, - "learning_rate": 4.6200000000000005e-05, - "loss": 7.3065, - "step": 232 - }, - { - "epoch": 0.12151238591916558, - "grad_norm": 3.6299948692321777, - "learning_rate": 4.64e-05, - "loss": 7.1479, - "step": 233 - }, - { - "epoch": 0.12203389830508475, - "grad_norm": 4.699325084686279, - "learning_rate": 4.660000000000001e-05, - "loss": 6.7042, - "step": 234 - }, - { - "epoch": 0.12255541069100391, - "grad_norm": 3.7025651931762695, - "learning_rate": 4.6800000000000006e-05, - "loss": 7.3001, - "step": 235 - }, - { - "epoch": 0.12307692307692308, - "grad_norm": 3.0916712284088135, - "learning_rate": 4.7e-05, - "loss": 7.235, - "step": 236 - }, - { - "epoch": 0.12359843546284224, - "grad_norm": 4.043112277984619, - "learning_rate": 4.72e-05, - "loss": 6.7526, - "step": 237 - }, - { - "epoch": 0.1241199478487614, - "grad_norm": 3.4944348335266113, - "learning_rate": 4.74e-05, - "loss": 7.3302, - "step": 238 - }, - { - "epoch": 0.12464146023468058, - "grad_norm": 2.8404836654663086, - "learning_rate": 4.76e-05, - "loss": 7.398, - "step": 239 - }, - { - "epoch": 0.12516297262059975, - "grad_norm": 4.0313801765441895, - "learning_rate": 4.78e-05, - "loss": 6.9056, - "step": 240 - }, - { - "epoch": 0.1256844850065189, - "grad_norm": 3.906691789627075, - "learning_rate": 4.8e-05, - "loss": 6.644, - "step": 241 - }, - { - "epoch": 0.12620599739243807, - "grad_norm": 3.601623773574829, - "learning_rate": 4.82e-05, - "loss": 7.0927, - "step": 242 - }, - { - "epoch": 0.12672750977835723, - "grad_norm": 2.7283427715301514, - "learning_rate": 4.8400000000000004e-05, - "loss": 7.4099, - "step": 243 - }, - { - "epoch": 0.12724902216427642, - "grad_norm": 3.994126796722412, - "learning_rate": 4.86e-05, - "loss": 6.9068, - "step": 244 - }, - { - "epoch": 0.12777053455019557, - "grad_norm": 3.048358917236328, - "learning_rate": 4.88e-05, - "loss": 7.3069, - "step": 245 - }, - { - "epoch": 0.12829204693611473, - "grad_norm": 3.4312479496002197, - "learning_rate": 4.9e-05, - "loss": 7.2706, - "step": 246 - }, - { - "epoch": 0.1288135593220339, - "grad_norm": 3.605164051055908, - "learning_rate": 4.92e-05, - "loss": 7.1076, - "step": 247 - }, - { - "epoch": 0.12933507170795305, - "grad_norm": 5.564464569091797, - "learning_rate": 4.94e-05, - "loss": 6.2989, - "step": 248 - }, - { - "epoch": 0.12985658409387224, - "grad_norm": 3.8566784858703613, - "learning_rate": 4.96e-05, - "loss": 7.0423, - "step": 249 - }, - { - "epoch": 0.1303780964797914, - "grad_norm": 3.1338601112365723, - "learning_rate": 4.9800000000000004e-05, - "loss": 7.0444, - "step": 250 - }, - { - "epoch": 0.13089960886571056, - "grad_norm": 2.8090527057647705, - "learning_rate": 5e-05, - "loss": 7.4891, - "step": 251 - }, - { - "epoch": 0.13142112125162972, - "grad_norm": 7.6368632316589355, - "learning_rate": 5.02e-05, - "loss": 6.0822, - "step": 252 - }, - { - "epoch": 0.13194263363754888, - "grad_norm": 4.872903823852539, - "learning_rate": 5.0400000000000005e-05, - "loss": 7.0497, - "step": 253 - }, - { - "epoch": 0.13246414602346807, - "grad_norm": 5.52940559387207, - "learning_rate": 5.0600000000000003e-05, - "loss": 6.6472, - "step": 254 - }, - { - "epoch": 0.13298565840938723, - "grad_norm": 2.805499792098999, - "learning_rate": 5.08e-05, - "loss": 7.3001, - "step": 255 - }, - { - "epoch": 0.13350717079530638, - "grad_norm": 4.028730869293213, - "learning_rate": 5.1000000000000006e-05, - "loss": 6.8972, - "step": 256 - }, - { - "epoch": 0.13402868318122554, - "grad_norm": 4.018136024475098, - "learning_rate": 5.1200000000000004e-05, - "loss": 7.3112, - "step": 257 - }, - { - "epoch": 0.13455019556714473, - "grad_norm": 3.7548141479492188, - "learning_rate": 5.14e-05, - "loss": 7.1665, - "step": 258 - }, - { - "epoch": 0.1350717079530639, - "grad_norm": 3.9899773597717285, - "learning_rate": 5.16e-05, - "loss": 6.7029, - "step": 259 - }, - { - "epoch": 0.13559322033898305, - "grad_norm": 5.135863304138184, - "learning_rate": 5.1800000000000005e-05, - "loss": 7.2739, - "step": 260 - }, - { - "epoch": 0.1361147327249022, - "grad_norm": 2.649033784866333, - "learning_rate": 5.2000000000000004e-05, - "loss": 7.3011, - "step": 261 - }, - { - "epoch": 0.13663624511082137, - "grad_norm": 3.3404154777526855, - "learning_rate": 5.22e-05, - "loss": 7.3246, - "step": 262 - }, - { - "epoch": 0.13715775749674056, - "grad_norm": 3.8582091331481934, - "learning_rate": 5.2400000000000007e-05, - "loss": 7.0718, - "step": 263 - }, - { - "epoch": 0.13767926988265972, - "grad_norm": 2.9569761753082275, - "learning_rate": 5.2600000000000005e-05, - "loss": 7.1488, - "step": 264 - }, - { - "epoch": 0.13820078226857888, - "grad_norm": 2.694443702697754, - "learning_rate": 5.28e-05, - "loss": 7.366, - "step": 265 - }, - { - "epoch": 0.13872229465449804, - "grad_norm": 5.168875694274902, - "learning_rate": 5.300000000000001e-05, - "loss": 7.0269, - "step": 266 - }, - { - "epoch": 0.13924380704041722, - "grad_norm": 4.086500644683838, - "learning_rate": 5.3200000000000006e-05, - "loss": 7.2118, - "step": 267 - }, - { - "epoch": 0.13976531942633638, - "grad_norm": 4.048635959625244, - "learning_rate": 5.3400000000000004e-05, - "loss": 7.21, - "step": 268 - }, - { - "epoch": 0.14028683181225554, - "grad_norm": 4.643430233001709, - "learning_rate": 5.360000000000001e-05, - "loss": 6.8379, - "step": 269 - }, - { - "epoch": 0.1408083441981747, - "grad_norm": 4.895519733428955, - "learning_rate": 5.380000000000001e-05, - "loss": 6.992, - "step": 270 - }, - { - "epoch": 0.14132985658409386, - "grad_norm": 3.6653215885162354, - "learning_rate": 5.4000000000000005e-05, - "loss": 6.9136, - "step": 271 - }, - { - "epoch": 0.14185136897001305, - "grad_norm": 4.5786662101745605, - "learning_rate": 5.420000000000001e-05, - "loss": 6.9135, - "step": 272 - }, - { - "epoch": 0.1423728813559322, - "grad_norm": 4.77662992477417, - "learning_rate": 5.440000000000001e-05, - "loss": 6.5562, - "step": 273 - }, - { - "epoch": 0.14289439374185137, - "grad_norm": 3.748788595199585, - "learning_rate": 5.4600000000000006e-05, - "loss": 7.1313, - "step": 274 - }, - { - "epoch": 0.14341590612777053, - "grad_norm": 2.907221555709839, - "learning_rate": 5.4800000000000004e-05, - "loss": 7.2938, - "step": 275 - }, - { - "epoch": 0.1439374185136897, - "grad_norm": 4.719839096069336, - "learning_rate": 5.500000000000001e-05, - "loss": 7.179, - "step": 276 - }, - { - "epoch": 0.14445893089960887, - "grad_norm": 4.704406261444092, - "learning_rate": 5.520000000000001e-05, - "loss": 6.7314, - "step": 277 - }, - { - "epoch": 0.14498044328552803, - "grad_norm": 3.7895584106445312, - "learning_rate": 5.5400000000000005e-05, - "loss": 7.1991, - "step": 278 - }, - { - "epoch": 0.1455019556714472, - "grad_norm": 3.7524566650390625, - "learning_rate": 5.560000000000001e-05, - "loss": 7.1896, - "step": 279 - }, - { - "epoch": 0.14602346805736635, - "grad_norm": 3.232865810394287, - "learning_rate": 5.580000000000001e-05, - "loss": 7.4014, - "step": 280 - }, - { - "epoch": 0.14654498044328554, - "grad_norm": 2.797893762588501, - "learning_rate": 5.6000000000000006e-05, - "loss": 7.1049, - "step": 281 - }, - { - "epoch": 0.1470664928292047, - "grad_norm": 3.1756818294525146, - "learning_rate": 5.620000000000001e-05, - "loss": 6.8909, - "step": 282 - }, - { - "epoch": 0.14758800521512386, - "grad_norm": 3.7766144275665283, - "learning_rate": 5.6399999999999995e-05, - "loss": 6.999, - "step": 283 - }, - { - "epoch": 0.14810951760104302, - "grad_norm": 3.2954635620117188, - "learning_rate": 5.66e-05, - "loss": 6.9622, - "step": 284 - }, - { - "epoch": 0.14863102998696218, - "grad_norm": 2.8703413009643555, - "learning_rate": 5.68e-05, - "loss": 7.0317, - "step": 285 - }, - { - "epoch": 0.14915254237288136, - "grad_norm": 3.632871150970459, - "learning_rate": 5.6999999999999996e-05, - "loss": 7.4122, - "step": 286 - }, - { - "epoch": 0.14967405475880052, - "grad_norm": 3.128606081008911, - "learning_rate": 5.72e-05, - "loss": 7.2959, - "step": 287 - }, - { - "epoch": 0.15019556714471968, - "grad_norm": 3.604712724685669, - "learning_rate": 5.74e-05, - "loss": 6.9134, - "step": 288 - }, - { - "epoch": 0.15071707953063884, - "grad_norm": 3.8073525428771973, - "learning_rate": 5.76e-05, - "loss": 7.05, - "step": 289 - }, - { - "epoch": 0.15123859191655803, - "grad_norm": 3.9711482524871826, - "learning_rate": 5.7799999999999995e-05, - "loss": 7.0299, - "step": 290 - }, - { - "epoch": 0.1517601043024772, - "grad_norm": 3.2127535343170166, - "learning_rate": 5.8e-05, - "loss": 6.9698, - "step": 291 - }, - { - "epoch": 0.15228161668839635, - "grad_norm": 4.119180679321289, - "learning_rate": 5.82e-05, - "loss": 6.839, - "step": 292 - }, - { - "epoch": 0.1528031290743155, - "grad_norm": 2.627946615219116, - "learning_rate": 5.8399999999999997e-05, - "loss": 7.3496, - "step": 293 - }, - { - "epoch": 0.15332464146023467, - "grad_norm": 3.819568157196045, - "learning_rate": 5.86e-05, - "loss": 6.5435, - "step": 294 - }, - { - "epoch": 0.15384615384615385, - "grad_norm": 2.9470767974853516, - "learning_rate": 5.88e-05, - "loss": 7.1781, - "step": 295 - }, - { - "epoch": 0.15436766623207301, - "grad_norm": 2.706519603729248, - "learning_rate": 5.9e-05, - "loss": 7.0022, - "step": 296 - }, - { - "epoch": 0.15488917861799217, - "grad_norm": 2.6944267749786377, - "learning_rate": 5.92e-05, - "loss": 6.7678, - "step": 297 - }, - { - "epoch": 0.15541069100391133, - "grad_norm": 2.8793187141418457, - "learning_rate": 5.94e-05, - "loss": 6.9549, - "step": 298 - }, - { - "epoch": 0.15593220338983052, - "grad_norm": 3.094952344894409, - "learning_rate": 5.96e-05, - "loss": 6.7861, - "step": 299 - }, - { - "epoch": 0.15645371577574968, - "grad_norm": 2.9189517498016357, - "learning_rate": 5.9800000000000003e-05, - "loss": 7.1257, - "step": 300 - }, - { - "epoch": 0.15697522816166884, - "grad_norm": 3.8729801177978516, - "learning_rate": 6e-05, - "loss": 7.1658, - "step": 301 - }, - { - "epoch": 0.157496740547588, - "grad_norm": 3.2805135250091553, - "learning_rate": 6.02e-05, - "loss": 6.7815, - "step": 302 - }, - { - "epoch": 0.15801825293350716, - "grad_norm": 3.4292726516723633, - "learning_rate": 6.04e-05, - "loss": 7.0912, - "step": 303 - }, - { - "epoch": 0.15853976531942635, - "grad_norm": 5.145147800445557, - "learning_rate": 6.06e-05, - "loss": 6.5012, - "step": 304 - }, - { - "epoch": 0.1590612777053455, - "grad_norm": 2.6266815662384033, - "learning_rate": 6.08e-05, - "loss": 6.8954, - "step": 305 - }, - { - "epoch": 0.15958279009126466, - "grad_norm": 2.963740825653076, - "learning_rate": 6.1e-05, - "loss": 6.9509, - "step": 306 - }, - { - "epoch": 0.16010430247718382, - "grad_norm": 3.066110372543335, - "learning_rate": 6.12e-05, - "loss": 6.9598, - "step": 307 - }, - { - "epoch": 0.160625814863103, - "grad_norm": 3.2593555450439453, - "learning_rate": 6.14e-05, - "loss": 6.8141, - "step": 308 - }, - { - "epoch": 0.16114732724902217, - "grad_norm": 3.2543344497680664, - "learning_rate": 6.16e-05, - "loss": 6.9847, - "step": 309 - }, - { - "epoch": 0.16166883963494133, - "grad_norm": 3.4616024494171143, - "learning_rate": 6.18e-05, - "loss": 6.6924, - "step": 310 - }, - { - "epoch": 0.1621903520208605, - "grad_norm": 3.570981979370117, - "learning_rate": 6.2e-05, - "loss": 6.7445, - "step": 311 - }, - { - "epoch": 0.16271186440677965, - "grad_norm": 4.955770969390869, - "learning_rate": 6.220000000000001e-05, - "loss": 6.3881, - "step": 312 - }, - { - "epoch": 0.16323337679269884, - "grad_norm": 3.3917860984802246, - "learning_rate": 6.24e-05, - "loss": 6.5997, - "step": 313 - }, - { - "epoch": 0.163754889178618, - "grad_norm": 3.502500295639038, - "learning_rate": 6.26e-05, - "loss": 6.6838, - "step": 314 - }, - { - "epoch": 0.16427640156453716, - "grad_norm": 4.28438663482666, - "learning_rate": 6.280000000000001e-05, - "loss": 7.0345, - "step": 315 - }, - { - "epoch": 0.16479791395045632, - "grad_norm": 2.617302179336548, - "learning_rate": 6.3e-05, - "loss": 7.3267, - "step": 316 - }, - { - "epoch": 0.1653194263363755, - "grad_norm": 5.027987480163574, - "learning_rate": 6.32e-05, - "loss": 6.7456, - "step": 317 - }, - { - "epoch": 0.16584093872229466, - "grad_norm": 3.9923369884490967, - "learning_rate": 6.340000000000001e-05, - "loss": 6.9154, - "step": 318 - }, - { - "epoch": 0.16636245110821382, - "grad_norm": 3.381068229675293, - "learning_rate": 6.36e-05, - "loss": 6.7825, - "step": 319 - }, - { - "epoch": 0.16688396349413298, - "grad_norm": 3.726672410964966, - "learning_rate": 6.38e-05, - "loss": 7.0749, - "step": 320 - }, - { - "epoch": 0.16740547588005214, - "grad_norm": 3.0546324253082275, - "learning_rate": 6.400000000000001e-05, - "loss": 6.9388, - "step": 321 - }, - { - "epoch": 0.16792698826597133, - "grad_norm": 2.4811761379241943, - "learning_rate": 6.42e-05, - "loss": 7.3094, - "step": 322 - }, - { - "epoch": 0.1684485006518905, - "grad_norm": 4.7711710929870605, - "learning_rate": 6.440000000000001e-05, - "loss": 6.7317, - "step": 323 - }, - { - "epoch": 0.16897001303780965, - "grad_norm": 3.3538639545440674, - "learning_rate": 6.460000000000001e-05, - "loss": 7.0458, - "step": 324 - }, - { - "epoch": 0.1694915254237288, - "grad_norm": 3.4860734939575195, - "learning_rate": 6.48e-05, - "loss": 6.8537, - "step": 325 - }, - { - "epoch": 0.17001303780964797, - "grad_norm": 3.4439752101898193, - "learning_rate": 6.500000000000001e-05, - "loss": 6.6793, - "step": 326 - }, - { - "epoch": 0.17053455019556715, - "grad_norm": 2.9555447101593018, - "learning_rate": 6.52e-05, - "loss": 6.9347, - "step": 327 - }, - { - "epoch": 0.1710560625814863, - "grad_norm": 4.9550461769104, - "learning_rate": 6.54e-05, - "loss": 6.1947, - "step": 328 - }, - { - "epoch": 0.17157757496740547, - "grad_norm": 3.706544876098633, - "learning_rate": 6.560000000000001e-05, - "loss": 7.0336, - "step": 329 - }, - { - "epoch": 0.17209908735332463, - "grad_norm": 2.4361014366149902, - "learning_rate": 6.58e-05, - "loss": 7.2412, - "step": 330 - }, - { - "epoch": 0.17262059973924382, - "grad_norm": 4.105840682983398, - "learning_rate": 6.6e-05, - "loss": 7.0146, - "step": 331 - }, - { - "epoch": 0.17314211212516298, - "grad_norm": 2.911102294921875, - "learning_rate": 6.620000000000001e-05, - "loss": 7.1755, - "step": 332 - }, - { - "epoch": 0.17366362451108214, - "grad_norm": 4.427680015563965, - "learning_rate": 6.64e-05, - "loss": 7.1427, - "step": 333 - }, - { - "epoch": 0.1741851368970013, - "grad_norm": 3.9352667331695557, - "learning_rate": 6.66e-05, - "loss": 7.0795, - "step": 334 - }, - { - "epoch": 0.17470664928292046, - "grad_norm": 3.0695817470550537, - "learning_rate": 6.680000000000001e-05, - "loss": 6.519, - "step": 335 - }, - { - "epoch": 0.17522816166883964, - "grad_norm": 5.045691013336182, - "learning_rate": 6.7e-05, - "loss": 6.8514, - "step": 336 - }, - { - "epoch": 0.1757496740547588, - "grad_norm": 3.0756583213806152, - "learning_rate": 6.720000000000001e-05, - "loss": 6.8241, - "step": 337 - }, - { - "epoch": 0.17627118644067796, - "grad_norm": 2.9580092430114746, - "learning_rate": 6.740000000000001e-05, - "loss": 6.8762, - "step": 338 - }, - { - "epoch": 0.17679269882659712, - "grad_norm": 3.5179967880249023, - "learning_rate": 6.76e-05, - "loss": 6.3788, - "step": 339 - }, - { - "epoch": 0.1773142112125163, - "grad_norm": 2.7665321826934814, - "learning_rate": 6.780000000000001e-05, - "loss": 6.7854, - "step": 340 - }, - { - "epoch": 0.17783572359843547, - "grad_norm": 3.25581693649292, - "learning_rate": 6.800000000000001e-05, - "loss": 6.9394, - "step": 341 - }, - { - "epoch": 0.17835723598435463, - "grad_norm": 3.7985944747924805, - "learning_rate": 6.82e-05, - "loss": 6.7423, - "step": 342 - }, - { - "epoch": 0.1788787483702738, - "grad_norm": 2.765812635421753, - "learning_rate": 6.840000000000001e-05, - "loss": 7.1229, - "step": 343 - }, - { - "epoch": 0.17940026075619295, - "grad_norm": 3.301640748977661, - "learning_rate": 6.860000000000001e-05, - "loss": 6.761, - "step": 344 - }, - { - "epoch": 0.17992177314211213, - "grad_norm": 3.5700604915618896, - "learning_rate": 6.879999999999999e-05, - "loss": 7.0995, - "step": 345 - }, - { - "epoch": 0.1804432855280313, - "grad_norm": 3.4261698722839355, - "learning_rate": 6.9e-05, - "loss": 6.6845, - "step": 346 - }, - { - "epoch": 0.18096479791395045, - "grad_norm": 2.867846965789795, - "learning_rate": 6.92e-05, - "loss": 7.1234, - "step": 347 - }, - { - "epoch": 0.1814863102998696, - "grad_norm": 4.3554182052612305, - "learning_rate": 6.939999999999999e-05, - "loss": 6.3443, - "step": 348 - }, - { - "epoch": 0.1820078226857888, - "grad_norm": 4.192983627319336, - "learning_rate": 6.96e-05, - "loss": 6.6085, - "step": 349 - }, - { - "epoch": 0.18252933507170796, - "grad_norm": 3.7375059127807617, - "learning_rate": 6.98e-05, - "loss": 6.9625, - "step": 350 - }, - { - "epoch": 0.18305084745762712, - "grad_norm": 3.2049381732940674, - "learning_rate": 7e-05, - "loss": 6.6007, - "step": 351 - }, - { - "epoch": 0.18357235984354628, - "grad_norm": 3.154276132583618, - "learning_rate": 7.02e-05, - "loss": 6.1258, - "step": 352 - }, - { - "epoch": 0.18409387222946544, - "grad_norm": 3.293564796447754, - "learning_rate": 7.04e-05, - "loss": 6.8757, - "step": 353 - }, - { - "epoch": 0.18461538461538463, - "grad_norm": 3.0282490253448486, - "learning_rate": 7.06e-05, - "loss": 6.8665, - "step": 354 - }, - { - "epoch": 0.18513689700130379, - "grad_norm": 3.028792142868042, - "learning_rate": 7.08e-05, - "loss": 6.9648, - "step": 355 - }, - { - "epoch": 0.18565840938722294, - "grad_norm": 3.3097164630889893, - "learning_rate": 7.1e-05, - "loss": 6.6744, - "step": 356 - }, - { - "epoch": 0.1861799217731421, - "grad_norm": 2.978569746017456, - "learning_rate": 7.12e-05, - "loss": 6.827, - "step": 357 - }, - { - "epoch": 0.1867014341590613, - "grad_norm": 2.8361692428588867, - "learning_rate": 7.14e-05, - "loss": 7.1999, - "step": 358 - }, - { - "epoch": 0.18722294654498045, - "grad_norm": 2.434253454208374, - "learning_rate": 7.16e-05, - "loss": 7.1673, - "step": 359 - }, - { - "epoch": 0.1877444589308996, - "grad_norm": 3.8715171813964844, - "learning_rate": 7.18e-05, - "loss": 6.5985, - "step": 360 - }, - { - "epoch": 0.18826597131681877, - "grad_norm": 2.6759724617004395, - "learning_rate": 7.2e-05, - "loss": 6.9218, - "step": 361 - }, - { - "epoch": 0.18878748370273793, - "grad_norm": 3.4225521087646484, - "learning_rate": 7.22e-05, - "loss": 6.2147, - "step": 362 - }, - { - "epoch": 0.18930899608865712, - "grad_norm": 3.0106003284454346, - "learning_rate": 7.24e-05, - "loss": 6.539, - "step": 363 - }, - { - "epoch": 0.18983050847457628, - "grad_norm": 2.9981253147125244, - "learning_rate": 7.26e-05, - "loss": 6.8481, - "step": 364 - }, - { - "epoch": 0.19035202086049544, - "grad_norm": 2.7975802421569824, - "learning_rate": 7.280000000000001e-05, - "loss": 6.7999, - "step": 365 - }, - { - "epoch": 0.1908735332464146, - "grad_norm": 2.4574432373046875, - "learning_rate": 7.3e-05, - "loss": 7.1035, - "step": 366 - }, - { - "epoch": 0.19139504563233375, - "grad_norm": 4.905591011047363, - "learning_rate": 7.32e-05, - "loss": 6.0549, - "step": 367 - }, - { - "epoch": 0.19191655801825294, - "grad_norm": 3.8138203620910645, - "learning_rate": 7.340000000000001e-05, - "loss": 6.8213, - "step": 368 - }, - { - "epoch": 0.1924380704041721, - "grad_norm": 3.0640335083007812, - "learning_rate": 7.36e-05, - "loss": 6.7799, - "step": 369 - }, - { - "epoch": 0.19295958279009126, - "grad_norm": 3.3823201656341553, - "learning_rate": 7.38e-05, - "loss": 6.9555, - "step": 370 - }, - { - "epoch": 0.19348109517601042, - "grad_norm": 2.893047571182251, - "learning_rate": 7.4e-05, - "loss": 6.9372, - "step": 371 - }, - { - "epoch": 0.1940026075619296, - "grad_norm": 2.6517035961151123, - "learning_rate": 7.42e-05, - "loss": 7.2126, - "step": 372 - }, - { - "epoch": 0.19452411994784877, - "grad_norm": 2.4646477699279785, - "learning_rate": 7.44e-05, - "loss": 7.114, - "step": 373 - }, - { - "epoch": 0.19504563233376793, - "grad_norm": 4.219038009643555, - "learning_rate": 7.46e-05, - "loss": 6.3893, - "step": 374 - }, - { - "epoch": 0.19556714471968709, - "grad_norm": 2.6734843254089355, - "learning_rate": 7.48e-05, - "loss": 7.0357, - "step": 375 - }, - { - "epoch": 0.19608865710560625, - "grad_norm": 3.205026626586914, - "learning_rate": 7.500000000000001e-05, - "loss": 6.4775, - "step": 376 - }, - { - "epoch": 0.19661016949152543, - "grad_norm": 2.869208812713623, - "learning_rate": 7.52e-05, - "loss": 7.0149, - "step": 377 - }, - { - "epoch": 0.1971316818774446, - "grad_norm": 3.115974187850952, - "learning_rate": 7.54e-05, - "loss": 6.8281, - "step": 378 - }, - { - "epoch": 0.19765319426336375, - "grad_norm": 2.7429001331329346, - "learning_rate": 7.560000000000001e-05, - "loss": 7.1209, - "step": 379 - }, - { - "epoch": 0.1981747066492829, - "grad_norm": 2.84698748588562, - "learning_rate": 7.58e-05, - "loss": 6.5012, - "step": 380 - }, - { - "epoch": 0.1986962190352021, - "grad_norm": 3.101663112640381, - "learning_rate": 7.6e-05, - "loss": 6.759, - "step": 381 - }, - { - "epoch": 0.19921773142112126, - "grad_norm": 2.9939825534820557, - "learning_rate": 7.620000000000001e-05, - "loss": 6.9159, - "step": 382 - }, - { - "epoch": 0.19973924380704042, - "grad_norm": 2.900312662124634, - "learning_rate": 7.64e-05, - "loss": 7.1468, - "step": 383 - }, - { - "epoch": 0.20026075619295958, - "grad_norm": 3.3308403491973877, - "learning_rate": 7.66e-05, - "loss": 6.5847, - "step": 384 - }, - { - "epoch": 0.20078226857887874, - "grad_norm": 2.6935532093048096, - "learning_rate": 7.680000000000001e-05, - "loss": 7.1005, - "step": 385 - }, - { - "epoch": 0.20130378096479792, - "grad_norm": 3.0341670513153076, - "learning_rate": 7.7e-05, - "loss": 6.5444, - "step": 386 - }, - { - "epoch": 0.20182529335071708, - "grad_norm": 2.6228854656219482, - "learning_rate": 7.72e-05, - "loss": 6.927, - "step": 387 - }, - { - "epoch": 0.20234680573663624, - "grad_norm": 2.779094696044922, - "learning_rate": 7.740000000000001e-05, - "loss": 7.0569, - "step": 388 - }, - { - "epoch": 0.2028683181225554, - "grad_norm": 3.2601938247680664, - "learning_rate": 7.76e-05, - "loss": 6.7258, - "step": 389 - }, - { - "epoch": 0.2033898305084746, - "grad_norm": 3.3600635528564453, - "learning_rate": 7.780000000000001e-05, - "loss": 6.4443, - "step": 390 - }, - { - "epoch": 0.20391134289439375, - "grad_norm": 2.924651861190796, - "learning_rate": 7.800000000000001e-05, - "loss": 7.1191, - "step": 391 - }, - { - "epoch": 0.2044328552803129, - "grad_norm": 2.5779831409454346, - "learning_rate": 7.82e-05, - "loss": 7.0077, - "step": 392 - }, - { - "epoch": 0.20495436766623207, - "grad_norm": 3.5986835956573486, - "learning_rate": 7.840000000000001e-05, - "loss": 6.9599, - "step": 393 - }, - { - "epoch": 0.20547588005215123, - "grad_norm": 3.574509859085083, - "learning_rate": 7.860000000000001e-05, - "loss": 7.0468, - "step": 394 - }, - { - "epoch": 0.20599739243807041, - "grad_norm": 3.1655123233795166, - "learning_rate": 7.88e-05, - "loss": 6.8709, - "step": 395 - }, - { - "epoch": 0.20651890482398957, - "grad_norm": 3.0388529300689697, - "learning_rate": 7.900000000000001e-05, - "loss": 7.0886, - "step": 396 - }, - { - "epoch": 0.20704041720990873, - "grad_norm": 4.3477020263671875, - "learning_rate": 7.920000000000001e-05, - "loss": 6.6419, - "step": 397 - }, - { - "epoch": 0.2075619295958279, - "grad_norm": 2.8662290573120117, - "learning_rate": 7.94e-05, - "loss": 6.9051, - "step": 398 - }, - { - "epoch": 0.20808344198174705, - "grad_norm": 4.2950758934021, - "learning_rate": 7.960000000000001e-05, - "loss": 5.8443, - "step": 399 - }, - { - "epoch": 0.20860495436766624, - "grad_norm": 3.039278745651245, - "learning_rate": 7.98e-05, - "loss": 6.596, - "step": 400 - }, - { - "epoch": 0.2091264667535854, - "grad_norm": 3.4841458797454834, - "learning_rate": 8e-05, - "loss": 5.7305, - "step": 401 - }, - { - "epoch": 0.20964797913950456, - "grad_norm": 2.7502031326293945, - "learning_rate": 8.020000000000001e-05, - "loss": 6.7217, - "step": 402 - }, - { - "epoch": 0.21016949152542372, - "grad_norm": 3.4197518825531006, - "learning_rate": 8.04e-05, - "loss": 7.1718, - "step": 403 - }, - { - "epoch": 0.2106910039113429, - "grad_norm": 3.0966851711273193, - "learning_rate": 8.060000000000001e-05, - "loss": 6.3327, - "step": 404 - }, - { - "epoch": 0.21121251629726207, - "grad_norm": 2.639968156814575, - "learning_rate": 8.080000000000001e-05, - "loss": 6.8873, - "step": 405 - }, - { - "epoch": 0.21173402868318122, - "grad_norm": 3.080145835876465, - "learning_rate": 8.1e-05, - "loss": 6.34, - "step": 406 - }, - { - "epoch": 0.21225554106910038, - "grad_norm": 3.6168289184570312, - "learning_rate": 8.120000000000001e-05, - "loss": 6.4417, - "step": 407 - }, - { - "epoch": 0.21277705345501954, - "grad_norm": 3.0011940002441406, - "learning_rate": 8.14e-05, - "loss": 6.564, - "step": 408 - }, - { - "epoch": 0.21329856584093873, - "grad_norm": 3.090268135070801, - "learning_rate": 8.16e-05, - "loss": 6.8972, - "step": 409 - }, - { - "epoch": 0.2138200782268579, - "grad_norm": 2.9102587699890137, - "learning_rate": 8.18e-05, - "loss": 6.9113, - "step": 410 - }, - { - "epoch": 0.21434159061277705, - "grad_norm": 3.6984519958496094, - "learning_rate": 8.2e-05, - "loss": 6.0702, - "step": 411 - }, - { - "epoch": 0.2148631029986962, - "grad_norm": 2.714181661605835, - "learning_rate": 8.22e-05, - "loss": 6.946, - "step": 412 - }, - { - "epoch": 0.2153846153846154, - "grad_norm": 2.511662721633911, - "learning_rate": 8.24e-05, - "loss": 7.1757, - "step": 413 - }, - { - "epoch": 0.21590612777053456, - "grad_norm": 3.234900712966919, - "learning_rate": 8.26e-05, - "loss": 6.817, - "step": 414 - }, - { - "epoch": 0.21642764015645372, - "grad_norm": 3.1684656143188477, - "learning_rate": 8.28e-05, - "loss": 6.9316, - "step": 415 - }, - { - "epoch": 0.21694915254237288, - "grad_norm": 2.9769480228424072, - "learning_rate": 8.3e-05, - "loss": 6.6332, - "step": 416 - }, - { - "epoch": 0.21747066492829203, - "grad_norm": 3.721759080886841, - "learning_rate": 8.32e-05, - "loss": 6.3789, - "step": 417 - }, - { - "epoch": 0.21799217731421122, - "grad_norm": 3.035891056060791, - "learning_rate": 8.34e-05, - "loss": 7.0388, - "step": 418 - }, - { - "epoch": 0.21851368970013038, - "grad_norm": 2.604626178741455, - "learning_rate": 8.36e-05, - "loss": 7.0204, - "step": 419 - }, - { - "epoch": 0.21903520208604954, - "grad_norm": 3.0238819122314453, - "learning_rate": 8.38e-05, - "loss": 6.7262, - "step": 420 - }, - { - "epoch": 0.2195567144719687, - "grad_norm": 2.8538432121276855, - "learning_rate": 8.4e-05, - "loss": 6.625, - "step": 421 - }, - { - "epoch": 0.2200782268578879, - "grad_norm": 3.035665273666382, - "learning_rate": 8.42e-05, - "loss": 6.7138, - "step": 422 - }, - { - "epoch": 0.22059973924380705, - "grad_norm": 2.5486512184143066, - "learning_rate": 8.44e-05, - "loss": 6.8959, - "step": 423 - }, - { - "epoch": 0.2211212516297262, - "grad_norm": 3.3886094093322754, - "learning_rate": 8.46e-05, - "loss": 6.3696, - "step": 424 - }, - { - "epoch": 0.22164276401564537, - "grad_norm": 3.423654556274414, - "learning_rate": 8.48e-05, - "loss": 7.0734, - "step": 425 - }, - { - "epoch": 0.22216427640156453, - "grad_norm": 2.735738515853882, - "learning_rate": 8.5e-05, - "loss": 6.7616, - "step": 426 - }, - { - "epoch": 0.2226857887874837, - "grad_norm": 3.713524580001831, - "learning_rate": 8.52e-05, - "loss": 6.2687, - "step": 427 - }, - { - "epoch": 0.22320730117340287, - "grad_norm": 2.945415735244751, - "learning_rate": 8.54e-05, - "loss": 6.5513, - "step": 428 - }, - { - "epoch": 0.22372881355932203, - "grad_norm": 2.5077168941497803, - "learning_rate": 8.560000000000001e-05, - "loss": 6.7392, - "step": 429 - }, - { - "epoch": 0.2242503259452412, - "grad_norm": 2.3934662342071533, - "learning_rate": 8.58e-05, - "loss": 7.0249, - "step": 430 - }, - { - "epoch": 0.22477183833116038, - "grad_norm": 2.6413142681121826, - "learning_rate": 8.6e-05, - "loss": 6.8845, - "step": 431 - }, - { - "epoch": 0.22529335071707954, - "grad_norm": 2.520956516265869, - "learning_rate": 8.620000000000001e-05, - "loss": 6.8159, - "step": 432 - }, - { - "epoch": 0.2258148631029987, - "grad_norm": 2.6290929317474365, - "learning_rate": 8.64e-05, - "loss": 7.0407, - "step": 433 - }, - { - "epoch": 0.22633637548891786, - "grad_norm": 2.630131721496582, - "learning_rate": 8.66e-05, - "loss": 6.8931, - "step": 434 - }, - { - "epoch": 0.22685788787483702, - "grad_norm": 2.8762598037719727, - "learning_rate": 8.680000000000001e-05, - "loss": 6.8662, - "step": 435 - }, - { - "epoch": 0.2273794002607562, - "grad_norm": 2.833972930908203, - "learning_rate": 8.7e-05, - "loss": 6.8259, - "step": 436 - }, - { - "epoch": 0.22790091264667536, - "grad_norm": 2.7063074111938477, - "learning_rate": 8.72e-05, - "loss": 6.5592, - "step": 437 - }, - { - "epoch": 0.22842242503259452, - "grad_norm": 2.695197820663452, - "learning_rate": 8.740000000000001e-05, - "loss": 6.7381, - "step": 438 - }, - { - "epoch": 0.22894393741851368, - "grad_norm": 3.287299394607544, - "learning_rate": 8.76e-05, - "loss": 6.3144, - "step": 439 - }, - { - "epoch": 0.22946544980443284, - "grad_norm": 2.936960220336914, - "learning_rate": 8.78e-05, - "loss": 6.7655, - "step": 440 - }, - { - "epoch": 0.22998696219035203, - "grad_norm": 3.0835437774658203, - "learning_rate": 8.800000000000001e-05, - "loss": 6.7237, - "step": 441 - }, - { - "epoch": 0.2305084745762712, - "grad_norm": 2.654698371887207, - "learning_rate": 8.82e-05, - "loss": 7.0268, - "step": 442 - }, - { - "epoch": 0.23102998696219035, - "grad_norm": 15.455745697021484, - "learning_rate": 8.840000000000001e-05, - "loss": 5.8494, - "step": 443 - }, - { - "epoch": 0.2315514993481095, - "grad_norm": 3.2970926761627197, - "learning_rate": 8.86e-05, - "loss": 6.8435, - "step": 444 - }, - { - "epoch": 0.2320730117340287, - "grad_norm": 2.7584433555603027, - "learning_rate": 8.88e-05, - "loss": 6.4046, - "step": 445 - }, - { - "epoch": 0.23259452411994785, - "grad_norm": 2.5174078941345215, - "learning_rate": 8.900000000000001e-05, - "loss": 7.0402, - "step": 446 - }, - { - "epoch": 0.233116036505867, - "grad_norm": 2.509023904800415, - "learning_rate": 8.92e-05, - "loss": 6.7754, - "step": 447 - }, - { - "epoch": 0.23363754889178617, - "grad_norm": 4.349696636199951, - "learning_rate": 8.94e-05, - "loss": 6.8015, - "step": 448 - }, - { - "epoch": 0.23415906127770533, - "grad_norm": 2.6458146572113037, - "learning_rate": 8.960000000000001e-05, - "loss": 6.7895, - "step": 449 - }, - { - "epoch": 0.23468057366362452, - "grad_norm": 2.8139069080352783, - "learning_rate": 8.98e-05, - "loss": 6.9038, - "step": 450 - }, - { - "epoch": 0.23520208604954368, - "grad_norm": 3.1928818225860596, - "learning_rate": 9e-05, - "loss": 6.5367, - "step": 451 - }, - { - "epoch": 0.23572359843546284, - "grad_norm": 4.430820941925049, - "learning_rate": 9.020000000000001e-05, - "loss": 6.6013, - "step": 452 - }, - { - "epoch": 0.236245110821382, - "grad_norm": 4.031113624572754, - "learning_rate": 9.04e-05, - "loss": 6.9254, - "step": 453 - }, - { - "epoch": 0.23676662320730119, - "grad_norm": 3.2106876373291016, - "learning_rate": 9.06e-05, - "loss": 6.5961, - "step": 454 - }, - { - "epoch": 0.23728813559322035, - "grad_norm": 4.746469497680664, - "learning_rate": 9.080000000000001e-05, - "loss": 6.6948, - "step": 455 - }, - { - "epoch": 0.2378096479791395, - "grad_norm": 3.455411672592163, - "learning_rate": 9.1e-05, - "loss": 5.6938, - "step": 456 - }, - { - "epoch": 0.23833116036505866, - "grad_norm": 3.9724597930908203, - "learning_rate": 9.120000000000001e-05, - "loss": 6.8219, - "step": 457 - }, - { - "epoch": 0.23885267275097782, - "grad_norm": 2.8081107139587402, - "learning_rate": 9.140000000000001e-05, - "loss": 6.5813, - "step": 458 - }, - { - "epoch": 0.239374185136897, - "grad_norm": 3.2344539165496826, - "learning_rate": 9.16e-05, - "loss": 6.6187, - "step": 459 - }, - { - "epoch": 0.23989569752281617, - "grad_norm": 2.544550895690918, - "learning_rate": 9.180000000000001e-05, - "loss": 6.8158, - "step": 460 - }, - { - "epoch": 0.24041720990873533, - "grad_norm": 2.41996693611145, - "learning_rate": 9.200000000000001e-05, - "loss": 6.9909, - "step": 461 - }, - { - "epoch": 0.2409387222946545, - "grad_norm": 2.891789674758911, - "learning_rate": 9.22e-05, - "loss": 6.9594, - "step": 462 - }, - { - "epoch": 0.24146023468057368, - "grad_norm": 2.933687448501587, - "learning_rate": 9.240000000000001e-05, - "loss": 6.7971, - "step": 463 - }, - { - "epoch": 0.24198174706649284, - "grad_norm": 3.0920803546905518, - "learning_rate": 9.260000000000001e-05, - "loss": 6.1003, - "step": 464 - }, - { - "epoch": 0.242503259452412, - "grad_norm": 2.788957118988037, - "learning_rate": 9.28e-05, - "loss": 6.9173, - "step": 465 - }, - { - "epoch": 0.24302477183833116, - "grad_norm": 4.545304775238037, - "learning_rate": 9.300000000000001e-05, - "loss": 6.9782, - "step": 466 - }, - { - "epoch": 0.24354628422425031, - "grad_norm": 2.7203845977783203, - "learning_rate": 9.320000000000002e-05, - "loss": 6.703, - "step": 467 - }, - { - "epoch": 0.2440677966101695, - "grad_norm": 5.148874282836914, - "learning_rate": 9.340000000000001e-05, - "loss": 6.2721, - "step": 468 - }, - { - "epoch": 0.24458930899608866, - "grad_norm": 2.7999868392944336, - "learning_rate": 9.360000000000001e-05, - "loss": 6.975, - "step": 469 - }, - { - "epoch": 0.24511082138200782, - "grad_norm": 2.686619281768799, - "learning_rate": 9.38e-05, - "loss": 6.4811, - "step": 470 - }, - { - "epoch": 0.24563233376792698, - "grad_norm": 3.537447929382324, - "learning_rate": 9.4e-05, - "loss": 6.8074, - "step": 471 - }, - { - "epoch": 0.24615384615384617, - "grad_norm": 2.357633352279663, - "learning_rate": 9.42e-05, - "loss": 6.9754, - "step": 472 - }, - { - "epoch": 0.24667535853976533, - "grad_norm": 2.735482931137085, - "learning_rate": 9.44e-05, - "loss": 6.7566, - "step": 473 - }, - { - "epoch": 0.2471968709256845, - "grad_norm": 2.3248891830444336, - "learning_rate": 9.46e-05, - "loss": 6.822, - "step": 474 - }, - { - "epoch": 0.24771838331160365, - "grad_norm": 2.6541171073913574, - "learning_rate": 9.48e-05, - "loss": 6.7979, - "step": 475 - }, - { - "epoch": 0.2482398956975228, - "grad_norm": 2.36301851272583, - "learning_rate": 9.5e-05, - "loss": 6.9476, - "step": 476 - }, - { - "epoch": 0.248761408083442, - "grad_norm": 2.5697407722473145, - "learning_rate": 9.52e-05, - "loss": 6.8588, - "step": 477 - }, - { - "epoch": 0.24928292046936115, - "grad_norm": 2.7867579460144043, - "learning_rate": 9.54e-05, - "loss": 6.6956, - "step": 478 - }, - { - "epoch": 0.2498044328552803, - "grad_norm": 2.825334072113037, - "learning_rate": 9.56e-05, - "loss": 6.7513, - "step": 479 - }, - { - "epoch": 0.2503259452411995, - "grad_norm": 3.315460681915283, - "learning_rate": 9.58e-05, - "loss": 7.0593, - "step": 480 - }, - { - "epoch": 0.25084745762711863, - "grad_norm": 2.340892791748047, - "learning_rate": 9.6e-05, - "loss": 6.6995, - "step": 481 - }, - { - "epoch": 0.2513689700130378, - "grad_norm": 3.2200984954833984, - "learning_rate": 9.620000000000001e-05, - "loss": 6.5835, - "step": 482 - }, - { - "epoch": 0.25189048239895695, - "grad_norm": 3.7848949432373047, - "learning_rate": 9.64e-05, - "loss": 6.7126, - "step": 483 - }, - { - "epoch": 0.25241199478487614, - "grad_norm": 3.250565528869629, - "learning_rate": 9.66e-05, - "loss": 6.9432, - "step": 484 - }, - { - "epoch": 0.2529335071707953, - "grad_norm": 5.082596302032471, - "learning_rate": 9.680000000000001e-05, - "loss": 5.4362, - "step": 485 - }, - { - "epoch": 0.25345501955671446, - "grad_norm": 2.3779993057250977, - "learning_rate": 9.7e-05, - "loss": 6.9548, - "step": 486 - }, - { - "epoch": 0.25397653194263364, - "grad_norm": 2.4856936931610107, - "learning_rate": 9.72e-05, - "loss": 6.8634, - "step": 487 - }, - { - "epoch": 0.25449804432855283, - "grad_norm": 3.136725425720215, - "learning_rate": 9.74e-05, - "loss": 6.5332, - "step": 488 - }, - { - "epoch": 0.25501955671447196, - "grad_norm": 3.2251977920532227, - "learning_rate": 9.76e-05, - "loss": 7.0258, - "step": 489 - }, - { - "epoch": 0.25554106910039115, - "grad_norm": 4.4574875831604, - "learning_rate": 9.78e-05, - "loss": 6.5353, - "step": 490 - }, - { - "epoch": 0.2560625814863103, - "grad_norm": 4.009222507476807, - "learning_rate": 9.8e-05, - "loss": 6.259, - "step": 491 - }, - { - "epoch": 0.25658409387222947, - "grad_norm": 3.212877035140991, - "learning_rate": 9.82e-05, - "loss": 6.6794, - "step": 492 - }, - { - "epoch": 0.25710560625814866, - "grad_norm": 2.506984233856201, - "learning_rate": 9.84e-05, - "loss": 6.8881, - "step": 493 - }, - { - "epoch": 0.2576271186440678, - "grad_norm": 3.386939287185669, - "learning_rate": 9.86e-05, - "loss": 6.4301, - "step": 494 - }, - { - "epoch": 0.258148631029987, - "grad_norm": 3.279521942138672, - "learning_rate": 9.88e-05, - "loss": 6.2215, - "step": 495 - }, - { - "epoch": 0.2586701434159061, - "grad_norm": 3.4294681549072266, - "learning_rate": 9.900000000000001e-05, - "loss": 6.8048, - "step": 496 - }, - { - "epoch": 0.2591916558018253, - "grad_norm": 2.5611634254455566, - "learning_rate": 9.92e-05, - "loss": 6.566, - "step": 497 - }, - { - "epoch": 0.2597131681877445, - "grad_norm": 2.3411877155303955, - "learning_rate": 9.94e-05, - "loss": 7.0717, - "step": 498 - }, - { - "epoch": 0.2602346805736636, - "grad_norm": 2.4214110374450684, - "learning_rate": 9.960000000000001e-05, - "loss": 6.8023, - "step": 499 - }, - { - "epoch": 0.2607561929595828, - "grad_norm": 2.475004196166992, - "learning_rate": 9.98e-05, - "loss": 6.5057, - "step": 500 - }, - { - "epoch": 0.26127770534550193, - "grad_norm": 2.73582124710083, - "learning_rate": 0.0001, - "loss": 6.7666, - "step": 501 - }, - { - "epoch": 0.2617992177314211, - "grad_norm": 2.2623517513275146, - "learning_rate": 9.999899497487437e-05, - "loss": 6.8927, - "step": 502 - }, - { - "epoch": 0.2623207301173403, - "grad_norm": 3.2750511169433594, - "learning_rate": 9.999798994974875e-05, - "loss": 6.3848, - "step": 503 - }, - { - "epoch": 0.26284224250325944, - "grad_norm": 3.193834066390991, - "learning_rate": 9.999698492462311e-05, - "loss": 6.6813, - "step": 504 - }, - { - "epoch": 0.2633637548891786, - "grad_norm": 2.648184061050415, - "learning_rate": 9.999597989949749e-05, - "loss": 6.9224, - "step": 505 - }, - { - "epoch": 0.26388526727509776, - "grad_norm": 3.534034013748169, - "learning_rate": 9.999497487437187e-05, - "loss": 6.9134, - "step": 506 - }, - { - "epoch": 0.26440677966101694, - "grad_norm": 2.204660415649414, - "learning_rate": 9.999396984924624e-05, - "loss": 7.1112, - "step": 507 - }, - { - "epoch": 0.26492829204693613, - "grad_norm": 2.4047603607177734, - "learning_rate": 9.999296482412061e-05, - "loss": 6.8362, - "step": 508 - }, - { - "epoch": 0.26544980443285526, - "grad_norm": 2.6058859825134277, - "learning_rate": 9.999195979899499e-05, - "loss": 6.9067, - "step": 509 - }, - { - "epoch": 0.26597131681877445, - "grad_norm": 2.683767795562744, - "learning_rate": 9.999095477386935e-05, - "loss": 6.5132, - "step": 510 - }, - { - "epoch": 0.26649282920469364, - "grad_norm": 2.2093727588653564, - "learning_rate": 9.998994974874373e-05, - "loss": 6.7814, - "step": 511 - }, - { - "epoch": 0.26701434159061277, - "grad_norm": 2.3152880668640137, - "learning_rate": 9.998894472361809e-05, - "loss": 6.5789, - "step": 512 - }, - { - "epoch": 0.26753585397653196, - "grad_norm": 2.3612897396087646, - "learning_rate": 9.998793969849247e-05, - "loss": 6.7686, - "step": 513 - }, - { - "epoch": 0.2680573663624511, - "grad_norm": 2.7442212104797363, - "learning_rate": 9.998693467336684e-05, - "loss": 6.708, - "step": 514 - }, - { - "epoch": 0.2685788787483703, - "grad_norm": 3.004387140274048, - "learning_rate": 9.99859296482412e-05, - "loss": 6.1397, - "step": 515 - }, - { - "epoch": 0.26910039113428946, - "grad_norm": 2.567108631134033, - "learning_rate": 9.998492462311558e-05, - "loss": 6.7375, - "step": 516 - }, - { - "epoch": 0.2696219035202086, - "grad_norm": 2.468379497528076, - "learning_rate": 9.998391959798996e-05, - "loss": 6.7837, - "step": 517 - }, - { - "epoch": 0.2701434159061278, - "grad_norm": 3.2258684635162354, - "learning_rate": 9.998291457286433e-05, - "loss": 6.682, - "step": 518 - }, - { - "epoch": 0.2706649282920469, - "grad_norm": 2.224414587020874, - "learning_rate": 9.99819095477387e-05, - "loss": 6.912, - "step": 519 - }, - { - "epoch": 0.2711864406779661, - "grad_norm": 2.5131306648254395, - "learning_rate": 9.998090452261308e-05, - "loss": 7.0706, - "step": 520 - }, - { - "epoch": 0.2717079530638853, - "grad_norm": 2.430908679962158, - "learning_rate": 9.997989949748744e-05, - "loss": 6.9863, - "step": 521 - }, - { - "epoch": 0.2722294654498044, - "grad_norm": 2.695842742919922, - "learning_rate": 9.997889447236182e-05, - "loss": 6.9889, - "step": 522 - }, - { - "epoch": 0.2727509778357236, - "grad_norm": 3.565156936645508, - "learning_rate": 9.997788944723618e-05, - "loss": 6.2835, - "step": 523 - }, - { - "epoch": 0.27327249022164274, - "grad_norm": 2.9504036903381348, - "learning_rate": 9.997688442211056e-05, - "loss": 6.8601, - "step": 524 - }, - { - "epoch": 0.2737940026075619, - "grad_norm": 2.996612548828125, - "learning_rate": 9.997587939698492e-05, - "loss": 6.6386, - "step": 525 - }, - { - "epoch": 0.2743155149934811, - "grad_norm": 2.6341590881347656, - "learning_rate": 9.99748743718593e-05, - "loss": 6.7187, - "step": 526 - }, - { - "epoch": 0.27483702737940024, - "grad_norm": 2.4681437015533447, - "learning_rate": 9.997386934673368e-05, - "loss": 6.9114, - "step": 527 - }, - { - "epoch": 0.27535853976531943, - "grad_norm": 4.2253947257995605, - "learning_rate": 9.997286432160804e-05, - "loss": 6.4639, - "step": 528 - }, - { - "epoch": 0.2758800521512386, - "grad_norm": 4.804105758666992, - "learning_rate": 9.997185929648242e-05, - "loss": 6.5992, - "step": 529 - }, - { - "epoch": 0.27640156453715775, - "grad_norm": 3.748114824295044, - "learning_rate": 9.997085427135679e-05, - "loss": 6.7112, - "step": 530 - }, - { - "epoch": 0.27692307692307694, - "grad_norm": 3.581264019012451, - "learning_rate": 9.996984924623116e-05, - "loss": 6.478, - "step": 531 - }, - { - "epoch": 0.27744458930899607, - "grad_norm": 2.9176101684570312, - "learning_rate": 9.996884422110553e-05, - "loss": 6.5075, - "step": 532 - }, - { - "epoch": 0.27796610169491526, - "grad_norm": 4.499645233154297, - "learning_rate": 9.99678391959799e-05, - "loss": 6.7193, - "step": 533 - }, - { - "epoch": 0.27848761408083444, - "grad_norm": 3.435164451599121, - "learning_rate": 9.996683417085427e-05, - "loss": 6.6621, - "step": 534 - }, - { - "epoch": 0.2790091264667536, - "grad_norm": 2.5283384323120117, - "learning_rate": 9.996582914572865e-05, - "loss": 6.8493, - "step": 535 - }, - { - "epoch": 0.27953063885267276, - "grad_norm": 2.3399932384490967, - "learning_rate": 9.996482412060301e-05, - "loss": 6.5532, - "step": 536 - }, - { - "epoch": 0.2800521512385919, - "grad_norm": 3.006356716156006, - "learning_rate": 9.996381909547739e-05, - "loss": 6.7371, - "step": 537 - }, - { - "epoch": 0.2805736636245111, - "grad_norm": 2.1820175647735596, - "learning_rate": 9.996281407035177e-05, - "loss": 6.9289, - "step": 538 - }, - { - "epoch": 0.28109517601043027, - "grad_norm": 2.4511280059814453, - "learning_rate": 9.996180904522615e-05, - "loss": 6.4717, - "step": 539 - }, - { - "epoch": 0.2816166883963494, - "grad_norm": 2.5437686443328857, - "learning_rate": 9.996080402010051e-05, - "loss": 6.5271, - "step": 540 - }, - { - "epoch": 0.2821382007822686, - "grad_norm": 2.9577114582061768, - "learning_rate": 9.995979899497487e-05, - "loss": 6.0008, - "step": 541 - }, - { - "epoch": 0.2826597131681877, - "grad_norm": 3.3158369064331055, - "learning_rate": 9.995879396984925e-05, - "loss": 6.7803, - "step": 542 - }, - { - "epoch": 0.2831812255541069, - "grad_norm": 2.789043426513672, - "learning_rate": 9.995778894472362e-05, - "loss": 6.9305, - "step": 543 - }, - { - "epoch": 0.2837027379400261, - "grad_norm": 2.564784049987793, - "learning_rate": 9.9956783919598e-05, - "loss": 6.4814, - "step": 544 - }, - { - "epoch": 0.2842242503259452, - "grad_norm": 2.3991527557373047, - "learning_rate": 9.995577889447236e-05, - "loss": 6.6684, - "step": 545 - }, - { - "epoch": 0.2847457627118644, - "grad_norm": 2.5177085399627686, - "learning_rate": 9.995477386934674e-05, - "loss": 6.8058, - "step": 546 - }, - { - "epoch": 0.28526727509778355, - "grad_norm": 2.5932178497314453, - "learning_rate": 9.995376884422111e-05, - "loss": 6.4201, - "step": 547 - }, - { - "epoch": 0.28578878748370273, - "grad_norm": 2.2884035110473633, - "learning_rate": 9.995276381909549e-05, - "loss": 6.6815, - "step": 548 - }, - { - "epoch": 0.2863102998696219, - "grad_norm": 2.2769322395324707, - "learning_rate": 9.995175879396986e-05, - "loss": 6.7661, - "step": 549 - }, - { - "epoch": 0.28683181225554105, - "grad_norm": 2.198941230773926, - "learning_rate": 9.995075376884423e-05, - "loss": 6.8029, - "step": 550 - }, - { - "epoch": 0.28735332464146024, - "grad_norm": 2.370363235473633, - "learning_rate": 9.99497487437186e-05, - "loss": 6.5167, - "step": 551 - }, - { - "epoch": 0.2878748370273794, - "grad_norm": 2.798598289489746, - "learning_rate": 9.994874371859298e-05, - "loss": 6.6494, - "step": 552 - }, - { - "epoch": 0.28839634941329856, - "grad_norm": 2.337928056716919, - "learning_rate": 9.994773869346734e-05, - "loss": 6.9835, - "step": 553 - }, - { - "epoch": 0.28891786179921775, - "grad_norm": 2.35713791847229, - "learning_rate": 9.994673366834172e-05, - "loss": 6.9039, - "step": 554 - }, - { - "epoch": 0.2894393741851369, - "grad_norm": 2.443599224090576, - "learning_rate": 9.994572864321608e-05, - "loss": 6.6804, - "step": 555 - }, - { - "epoch": 0.28996088657105606, - "grad_norm": 2.2196035385131836, - "learning_rate": 9.994472361809045e-05, - "loss": 6.9186, - "step": 556 - }, - { - "epoch": 0.29048239895697525, - "grad_norm": 2.3514981269836426, - "learning_rate": 9.994371859296482e-05, - "loss": 6.8633, - "step": 557 - }, - { - "epoch": 0.2910039113428944, - "grad_norm": 2.501081943511963, - "learning_rate": 9.99427135678392e-05, - "loss": 6.8908, - "step": 558 - }, - { - "epoch": 0.29152542372881357, - "grad_norm": 2.3073713779449463, - "learning_rate": 9.994170854271358e-05, - "loss": 6.554, - "step": 559 - }, - { - "epoch": 0.2920469361147327, - "grad_norm": 2.7027711868286133, - "learning_rate": 9.994070351758794e-05, - "loss": 6.5762, - "step": 560 - }, - { - "epoch": 0.2925684485006519, - "grad_norm": 2.681204319000244, - "learning_rate": 9.993969849246232e-05, - "loss": 6.3981, - "step": 561 - }, - { - "epoch": 0.2930899608865711, - "grad_norm": 3.6177210807800293, - "learning_rate": 9.993869346733669e-05, - "loss": 6.3199, - "step": 562 - }, - { - "epoch": 0.2936114732724902, - "grad_norm": 2.3057701587677, - "learning_rate": 9.993768844221106e-05, - "loss": 6.8121, - "step": 563 - }, - { - "epoch": 0.2941329856584094, - "grad_norm": 2.4482412338256836, - "learning_rate": 9.993668341708543e-05, - "loss": 6.8837, - "step": 564 - }, - { - "epoch": 0.29465449804432853, - "grad_norm": 2.292982816696167, - "learning_rate": 9.99356783919598e-05, - "loss": 6.5851, - "step": 565 - }, - { - "epoch": 0.2951760104302477, - "grad_norm": 2.7785377502441406, - "learning_rate": 9.993467336683417e-05, - "loss": 6.4553, - "step": 566 - }, - { - "epoch": 0.2956975228161669, - "grad_norm": 3.0232276916503906, - "learning_rate": 9.993366834170855e-05, - "loss": 6.3535, - "step": 567 - }, - { - "epoch": 0.29621903520208603, - "grad_norm": 2.3795006275177, - "learning_rate": 9.993266331658293e-05, - "loss": 6.906, - "step": 568 - }, - { - "epoch": 0.2967405475880052, - "grad_norm": 2.426237106323242, - "learning_rate": 9.993165829145729e-05, - "loss": 5.973, - "step": 569 - }, - { - "epoch": 0.29726205997392435, - "grad_norm": 2.931170701980591, - "learning_rate": 9.993065326633167e-05, - "loss": 6.9815, - "step": 570 - }, - { - "epoch": 0.29778357235984354, - "grad_norm": 2.250025987625122, - "learning_rate": 9.992964824120603e-05, - "loss": 6.7264, - "step": 571 - }, - { - "epoch": 0.2983050847457627, - "grad_norm": 2.563656806945801, - "learning_rate": 9.992864321608041e-05, - "loss": 6.875, - "step": 572 - }, - { - "epoch": 0.29882659713168186, - "grad_norm": 2.0841214656829834, - "learning_rate": 9.992763819095477e-05, - "loss": 6.9514, - "step": 573 - }, - { - "epoch": 0.29934810951760105, - "grad_norm": 2.6887941360473633, - "learning_rate": 9.992663316582915e-05, - "loss": 6.3593, - "step": 574 - }, - { - "epoch": 0.29986962190352023, - "grad_norm": 3.577542543411255, - "learning_rate": 9.992562814070352e-05, - "loss": 6.1976, - "step": 575 - }, - { - "epoch": 0.30039113428943937, - "grad_norm": 2.6426141262054443, - "learning_rate": 9.99246231155779e-05, - "loss": 6.8532, - "step": 576 - }, - { - "epoch": 0.30091264667535855, - "grad_norm": 2.703559637069702, - "learning_rate": 9.992361809045226e-05, - "loss": 6.4574, - "step": 577 - }, - { - "epoch": 0.3014341590612777, - "grad_norm": 3.2348217964172363, - "learning_rate": 9.992261306532664e-05, - "loss": 6.8975, - "step": 578 - }, - { - "epoch": 0.30195567144719687, - "grad_norm": 3.617314338684082, - "learning_rate": 9.992160804020101e-05, - "loss": 6.2764, - "step": 579 - }, - { - "epoch": 0.30247718383311606, - "grad_norm": 2.5697176456451416, - "learning_rate": 9.992060301507539e-05, - "loss": 6.4337, - "step": 580 - }, - { - "epoch": 0.3029986962190352, - "grad_norm": 2.1568777561187744, - "learning_rate": 9.991959798994976e-05, - "loss": 7.0039, - "step": 581 - }, - { - "epoch": 0.3035202086049544, - "grad_norm": 2.116344690322876, - "learning_rate": 9.991859296482412e-05, - "loss": 6.7774, - "step": 582 - }, - { - "epoch": 0.3040417209908735, - "grad_norm": 2.5044496059417725, - "learning_rate": 9.99175879396985e-05, - "loss": 6.659, - "step": 583 - }, - { - "epoch": 0.3045632333767927, - "grad_norm": 3.5272934436798096, - "learning_rate": 9.991658291457286e-05, - "loss": 5.9951, - "step": 584 - }, - { - "epoch": 0.3050847457627119, - "grad_norm": 4.261949062347412, - "learning_rate": 9.991557788944724e-05, - "loss": 5.7512, - "step": 585 - }, - { - "epoch": 0.305606258148631, - "grad_norm": 2.383537769317627, - "learning_rate": 9.99145728643216e-05, - "loss": 6.6171, - "step": 586 - }, - { - "epoch": 0.3061277705345502, - "grad_norm": 2.4009881019592285, - "learning_rate": 9.991356783919598e-05, - "loss": 6.6186, - "step": 587 - }, - { - "epoch": 0.30664928292046933, - "grad_norm": 2.8905534744262695, - "learning_rate": 9.991256281407035e-05, - "loss": 6.5925, - "step": 588 - }, - { - "epoch": 0.3071707953063885, - "grad_norm": 2.1318790912628174, - "learning_rate": 9.991155778894473e-05, - "loss": 6.8829, - "step": 589 - }, - { - "epoch": 0.3076923076923077, - "grad_norm": 3.37378191947937, - "learning_rate": 9.99105527638191e-05, - "loss": 6.314, - "step": 590 - }, - { - "epoch": 0.30821382007822684, - "grad_norm": 2.8562064170837402, - "learning_rate": 9.990954773869348e-05, - "loss": 6.285, - "step": 591 - }, - { - "epoch": 0.30873533246414603, - "grad_norm": 2.5292999744415283, - "learning_rate": 9.990854271356785e-05, - "loss": 6.816, - "step": 592 - }, - { - "epoch": 0.3092568448500652, - "grad_norm": 2.5964102745056152, - "learning_rate": 9.990753768844222e-05, - "loss": 6.6583, - "step": 593 - }, - { - "epoch": 0.30977835723598435, - "grad_norm": 2.4368226528167725, - "learning_rate": 9.990653266331659e-05, - "loss": 6.8602, - "step": 594 - }, - { - "epoch": 0.31029986962190353, - "grad_norm": 2.9984607696533203, - "learning_rate": 9.990552763819095e-05, - "loss": 6.5601, - "step": 595 - }, - { - "epoch": 0.31082138200782267, - "grad_norm": 2.356990098953247, - "learning_rate": 9.990452261306533e-05, - "loss": 6.5403, - "step": 596 - }, - { - "epoch": 0.31134289439374185, - "grad_norm": 2.809706687927246, - "learning_rate": 9.99035175879397e-05, - "loss": 6.7957, - "step": 597 - }, - { - "epoch": 0.31186440677966104, - "grad_norm": 2.398435354232788, - "learning_rate": 9.990251256281407e-05, - "loss": 6.5643, - "step": 598 - }, - { - "epoch": 0.3123859191655802, - "grad_norm": 2.753350257873535, - "learning_rate": 9.990150753768845e-05, - "loss": 6.5292, - "step": 599 - }, - { - "epoch": 0.31290743155149936, - "grad_norm": 2.7954092025756836, - "learning_rate": 9.990050251256283e-05, - "loss": 6.7809, - "step": 600 - }, - { - "epoch": 0.3134289439374185, - "grad_norm": 2.3736412525177, - "learning_rate": 9.989949748743719e-05, - "loss": 6.6319, - "step": 601 - }, - { - "epoch": 0.3139504563233377, - "grad_norm": 2.091228723526001, - "learning_rate": 9.989849246231157e-05, - "loss": 6.7365, - "step": 602 - }, - { - "epoch": 0.31447196870925687, - "grad_norm": 2.360786199569702, - "learning_rate": 9.989748743718593e-05, - "loss": 6.5564, - "step": 603 - }, - { - "epoch": 0.314993481095176, - "grad_norm": 2.7194275856018066, - "learning_rate": 9.989648241206031e-05, - "loss": 6.5311, - "step": 604 - }, - { - "epoch": 0.3155149934810952, - "grad_norm": 2.5315980911254883, - "learning_rate": 9.989547738693468e-05, - "loss": 6.8755, - "step": 605 - }, - { - "epoch": 0.3160365058670143, - "grad_norm": 2.4782521724700928, - "learning_rate": 9.989447236180905e-05, - "loss": 6.6772, - "step": 606 - }, - { - "epoch": 0.3165580182529335, - "grad_norm": 2.5762054920196533, - "learning_rate": 9.989346733668342e-05, - "loss": 6.8217, - "step": 607 - }, - { - "epoch": 0.3170795306388527, - "grad_norm": 2.8427186012268066, - "learning_rate": 9.989246231155778e-05, - "loss": 6.1232, - "step": 608 - }, - { - "epoch": 0.3176010430247718, - "grad_norm": 2.958988666534424, - "learning_rate": 9.989145728643216e-05, - "loss": 6.7649, - "step": 609 - }, - { - "epoch": 0.318122555410691, - "grad_norm": 3.5217721462249756, - "learning_rate": 9.989045226130654e-05, - "loss": 6.7988, - "step": 610 - }, - { - "epoch": 0.31864406779661014, - "grad_norm": 2.9620628356933594, - "learning_rate": 9.988944723618092e-05, - "loss": 6.6146, - "step": 611 - }, - { - "epoch": 0.31916558018252933, - "grad_norm": 2.283308267593384, - "learning_rate": 9.988844221105528e-05, - "loss": 6.7742, - "step": 612 - }, - { - "epoch": 0.3196870925684485, - "grad_norm": 2.426337957382202, - "learning_rate": 9.988743718592966e-05, - "loss": 6.5583, - "step": 613 - }, - { - "epoch": 0.32020860495436765, - "grad_norm": 2.8968324661254883, - "learning_rate": 9.988643216080402e-05, - "loss": 6.9948, - "step": 614 - }, - { - "epoch": 0.32073011734028684, - "grad_norm": 2.178039073944092, - "learning_rate": 9.98854271356784e-05, - "loss": 6.5209, - "step": 615 - }, - { - "epoch": 0.321251629726206, - "grad_norm": 3.11246919631958, - "learning_rate": 9.988442211055276e-05, - "loss": 6.6784, - "step": 616 - }, - { - "epoch": 0.32177314211212515, - "grad_norm": 2.2160632610321045, - "learning_rate": 9.988341708542714e-05, - "loss": 6.8533, - "step": 617 - }, - { - "epoch": 0.32229465449804434, - "grad_norm": 2.8915889263153076, - "learning_rate": 9.98824120603015e-05, - "loss": 6.2564, - "step": 618 - }, - { - "epoch": 0.3228161668839635, - "grad_norm": 2.2910103797912598, - "learning_rate": 9.988140703517588e-05, - "loss": 6.8245, - "step": 619 - }, - { - "epoch": 0.32333767926988266, - "grad_norm": 5.047464370727539, - "learning_rate": 9.988040201005026e-05, - "loss": 5.2356, - "step": 620 - }, - { - "epoch": 0.32385919165580185, - "grad_norm": 3.101825475692749, - "learning_rate": 9.987939698492463e-05, - "loss": 6.2499, - "step": 621 - }, - { - "epoch": 0.324380704041721, - "grad_norm": 2.597949981689453, - "learning_rate": 9.9878391959799e-05, - "loss": 6.8516, - "step": 622 - }, - { - "epoch": 0.32490221642764017, - "grad_norm": 2.2827794551849365, - "learning_rate": 9.987738693467337e-05, - "loss": 6.699, - "step": 623 - }, - { - "epoch": 0.3254237288135593, - "grad_norm": 2.6016316413879395, - "learning_rate": 9.987638190954775e-05, - "loss": 6.6628, - "step": 624 - }, - { - "epoch": 0.3259452411994785, - "grad_norm": 2.5033156871795654, - "learning_rate": 9.987537688442211e-05, - "loss": 6.3799, - "step": 625 - }, - { - "epoch": 0.3264667535853977, - "grad_norm": 2.48293399810791, - "learning_rate": 9.987437185929649e-05, - "loss": 6.6465, - "step": 626 - }, - { - "epoch": 0.3269882659713168, - "grad_norm": 3.0237913131713867, - "learning_rate": 9.987336683417085e-05, - "loss": 6.106, - "step": 627 - }, - { - "epoch": 0.327509778357236, - "grad_norm": 2.6413562297821045, - "learning_rate": 9.987236180904523e-05, - "loss": 6.5199, - "step": 628 - }, - { - "epoch": 0.3280312907431551, - "grad_norm": 2.531050682067871, - "learning_rate": 9.98713567839196e-05, - "loss": 6.4822, - "step": 629 - }, - { - "epoch": 0.3285528031290743, - "grad_norm": 2.211895704269409, - "learning_rate": 9.987035175879397e-05, - "loss": 6.417, - "step": 630 - }, - { - "epoch": 0.3290743155149935, - "grad_norm": 2.857788562774658, - "learning_rate": 9.986934673366835e-05, - "loss": 6.3971, - "step": 631 - }, - { - "epoch": 0.32959582790091263, - "grad_norm": 2.409496784210205, - "learning_rate": 9.986834170854273e-05, - "loss": 6.6121, - "step": 632 - }, - { - "epoch": 0.3301173402868318, - "grad_norm": 3.8872246742248535, - "learning_rate": 9.986733668341709e-05, - "loss": 5.9356, - "step": 633 - }, - { - "epoch": 0.330638852672751, - "grad_norm": 3.0166659355163574, - "learning_rate": 9.986633165829146e-05, - "loss": 6.4228, - "step": 634 - }, - { - "epoch": 0.33116036505867014, - "grad_norm": 2.2669386863708496, - "learning_rate": 9.986532663316583e-05, - "loss": 6.3234, - "step": 635 - }, - { - "epoch": 0.3316818774445893, - "grad_norm": 2.7591681480407715, - "learning_rate": 9.98643216080402e-05, - "loss": 6.3055, - "step": 636 - }, - { - "epoch": 0.33220338983050846, - "grad_norm": 2.8966269493103027, - "learning_rate": 9.986331658291458e-05, - "loss": 6.2917, - "step": 637 - }, - { - "epoch": 0.33272490221642764, - "grad_norm": 2.310699462890625, - "learning_rate": 9.986231155778894e-05, - "loss": 6.4535, - "step": 638 - }, - { - "epoch": 0.33324641460234683, - "grad_norm": 2.1760547161102295, - "learning_rate": 9.986130653266332e-05, - "loss": 6.4803, - "step": 639 - }, - { - "epoch": 0.33376792698826596, - "grad_norm": 2.1403820514678955, - "learning_rate": 9.98603015075377e-05, - "loss": 6.9452, - "step": 640 - }, - { - "epoch": 0.33428943937418515, - "grad_norm": 2.1203603744506836, - "learning_rate": 9.985929648241207e-05, - "loss": 6.944, - "step": 641 - }, - { - "epoch": 0.3348109517601043, - "grad_norm": 2.634228467941284, - "learning_rate": 9.985829145728644e-05, - "loss": 6.2601, - "step": 642 - }, - { - "epoch": 0.33533246414602347, - "grad_norm": 2.673799753189087, - "learning_rate": 9.985728643216082e-05, - "loss": 6.4082, - "step": 643 - }, - { - "epoch": 0.33585397653194266, - "grad_norm": 2.0828588008880615, - "learning_rate": 9.985628140703518e-05, - "loss": 6.593, - "step": 644 - }, - { - "epoch": 0.3363754889178618, - "grad_norm": 2.9630815982818604, - "learning_rate": 9.985527638190956e-05, - "loss": 6.3006, - "step": 645 - }, - { - "epoch": 0.336897001303781, - "grad_norm": 2.163931369781494, - "learning_rate": 9.985427135678392e-05, - "loss": 6.8087, - "step": 646 - }, - { - "epoch": 0.3374185136897001, - "grad_norm": 2.118283987045288, - "learning_rate": 9.98532663316583e-05, - "loss": 6.6582, - "step": 647 - }, - { - "epoch": 0.3379400260756193, - "grad_norm": 2.1185507774353027, - "learning_rate": 9.985226130653266e-05, - "loss": 6.5589, - "step": 648 - }, - { - "epoch": 0.3384615384615385, - "grad_norm": 2.769584894180298, - "learning_rate": 9.985125628140703e-05, - "loss": 6.2612, - "step": 649 - }, - { - "epoch": 0.3389830508474576, - "grad_norm": 2.097851276397705, - "learning_rate": 9.98502512562814e-05, - "loss": 6.5514, - "step": 650 - }, - { - "epoch": 0.3395045632333768, - "grad_norm": 3.0042083263397217, - "learning_rate": 9.984924623115578e-05, - "loss": 6.6406, - "step": 651 - }, - { - "epoch": 0.34002607561929593, - "grad_norm": 2.238783359527588, - "learning_rate": 9.984824120603016e-05, - "loss": 6.4638, - "step": 652 - }, - { - "epoch": 0.3405475880052151, - "grad_norm": 2.270066261291504, - "learning_rate": 9.984723618090453e-05, - "loss": 6.2751, - "step": 653 - }, - { - "epoch": 0.3410691003911343, - "grad_norm": 2.1098685264587402, - "learning_rate": 9.98462311557789e-05, - "loss": 6.5967, - "step": 654 - }, - { - "epoch": 0.34159061277705344, - "grad_norm": 2.4665894508361816, - "learning_rate": 9.984522613065327e-05, - "loss": 5.9501, - "step": 655 - }, - { - "epoch": 0.3421121251629726, - "grad_norm": 2.433864116668701, - "learning_rate": 9.984422110552765e-05, - "loss": 6.4734, - "step": 656 - }, - { - "epoch": 0.3426336375488918, - "grad_norm": 2.2921559810638428, - "learning_rate": 9.984321608040201e-05, - "loss": 6.5201, - "step": 657 - }, - { - "epoch": 0.34315514993481094, - "grad_norm": 1.889911413192749, - "learning_rate": 9.984221105527639e-05, - "loss": 6.8002, - "step": 658 - }, - { - "epoch": 0.34367666232073013, - "grad_norm": 3.1400258541107178, - "learning_rate": 9.984120603015075e-05, - "loss": 6.0453, - "step": 659 - }, - { - "epoch": 0.34419817470664926, - "grad_norm": 2.0831573009490967, - "learning_rate": 9.984020100502513e-05, - "loss": 6.637, - "step": 660 - }, - { - "epoch": 0.34471968709256845, - "grad_norm": 2.1338939666748047, - "learning_rate": 9.983919597989951e-05, - "loss": 6.1151, - "step": 661 - }, - { - "epoch": 0.34524119947848764, - "grad_norm": 2.0769996643066406, - "learning_rate": 9.983819095477387e-05, - "loss": 6.8921, - "step": 662 - }, - { - "epoch": 0.34576271186440677, - "grad_norm": 2.5557219982147217, - "learning_rate": 9.983718592964825e-05, - "loss": 6.5197, - "step": 663 - }, - { - "epoch": 0.34628422425032596, - "grad_norm": 3.2359161376953125, - "learning_rate": 9.983618090452261e-05, - "loss": 6.3139, - "step": 664 - }, - { - "epoch": 0.3468057366362451, - "grad_norm": 2.768599510192871, - "learning_rate": 9.983517587939699e-05, - "loss": 5.7752, - "step": 665 - }, - { - "epoch": 0.3473272490221643, - "grad_norm": 2.582446575164795, - "learning_rate": 9.983417085427136e-05, - "loss": 6.3778, - "step": 666 - }, - { - "epoch": 0.34784876140808346, - "grad_norm": 2.22094988822937, - "learning_rate": 9.983316582914573e-05, - "loss": 6.5065, - "step": 667 - }, - { - "epoch": 0.3483702737940026, - "grad_norm": 2.9147276878356934, - "learning_rate": 9.98321608040201e-05, - "loss": 6.1764, - "step": 668 - }, - { - "epoch": 0.3488917861799218, - "grad_norm": 2.245464563369751, - "learning_rate": 9.983115577889448e-05, - "loss": 6.3914, - "step": 669 - }, - { - "epoch": 0.3494132985658409, - "grad_norm": 2.8328449726104736, - "learning_rate": 9.983015075376884e-05, - "loss": 5.925, - "step": 670 - }, - { - "epoch": 0.3499348109517601, - "grad_norm": 2.521390676498413, - "learning_rate": 9.982914572864322e-05, - "loss": 6.3792, - "step": 671 - }, - { - "epoch": 0.3504563233376793, - "grad_norm": 2.3779420852661133, - "learning_rate": 9.98281407035176e-05, - "loss": 6.1934, - "step": 672 - }, - { - "epoch": 0.3509778357235984, - "grad_norm": 2.066786289215088, - "learning_rate": 9.982713567839197e-05, - "loss": 6.9087, - "step": 673 - }, - { - "epoch": 0.3514993481095176, - "grad_norm": 2.0983738899230957, - "learning_rate": 9.982613065326634e-05, - "loss": 6.6262, - "step": 674 - }, - { - "epoch": 0.3520208604954368, - "grad_norm": 1.9429088830947876, - "learning_rate": 9.98251256281407e-05, - "loss": 6.483, - "step": 675 - }, - { - "epoch": 0.3525423728813559, - "grad_norm": 2.257582426071167, - "learning_rate": 9.982412060301508e-05, - "loss": 6.5038, - "step": 676 - }, - { - "epoch": 0.3530638852672751, - "grad_norm": 2.220712423324585, - "learning_rate": 9.982311557788945e-05, - "loss": 6.8907, - "step": 677 - }, - { - "epoch": 0.35358539765319424, - "grad_norm": 2.099017381668091, - "learning_rate": 9.982211055276382e-05, - "loss": 6.6702, - "step": 678 - }, - { - "epoch": 0.35410691003911343, - "grad_norm": 2.3172242641448975, - "learning_rate": 9.982110552763819e-05, - "loss": 6.9045, - "step": 679 - }, - { - "epoch": 0.3546284224250326, - "grad_norm": 2.212599277496338, - "learning_rate": 9.982010050251257e-05, - "loss": 6.4172, - "step": 680 - }, - { - "epoch": 0.35514993481095175, - "grad_norm": 2.87436580657959, - "learning_rate": 9.981909547738694e-05, - "loss": 6.7151, - "step": 681 - }, - { - "epoch": 0.35567144719687094, - "grad_norm": 2.53831148147583, - "learning_rate": 9.981809045226132e-05, - "loss": 6.4728, - "step": 682 - }, - { - "epoch": 0.35619295958279007, - "grad_norm": 2.370199203491211, - "learning_rate": 9.981708542713569e-05, - "loss": 6.7124, - "step": 683 - }, - { - "epoch": 0.35671447196870926, - "grad_norm": 2.186441421508789, - "learning_rate": 9.981608040201006e-05, - "loss": 6.4603, - "step": 684 - }, - { - "epoch": 0.35723598435462844, - "grad_norm": 2.382138252258301, - "learning_rate": 9.981507537688443e-05, - "loss": 6.3521, - "step": 685 - }, - { - "epoch": 0.3577574967405476, - "grad_norm": 2.411463737487793, - "learning_rate": 9.98140703517588e-05, - "loss": 6.6357, - "step": 686 - }, - { - "epoch": 0.35827900912646676, - "grad_norm": 2.4420485496520996, - "learning_rate": 9.981306532663317e-05, - "loss": 6.5282, - "step": 687 - }, - { - "epoch": 0.3588005215123859, - "grad_norm": 1.8494997024536133, - "learning_rate": 9.981206030150753e-05, - "loss": 6.8825, - "step": 688 - }, - { - "epoch": 0.3593220338983051, - "grad_norm": 1.9439504146575928, - "learning_rate": 9.981105527638191e-05, - "loss": 6.3554, - "step": 689 - }, - { - "epoch": 0.35984354628422427, - "grad_norm": 3.761709690093994, - "learning_rate": 9.981005025125628e-05, - "loss": 6.1133, - "step": 690 - }, - { - "epoch": 0.3603650586701434, - "grad_norm": 2.7955455780029297, - "learning_rate": 9.980904522613065e-05, - "loss": 6.2528, - "step": 691 - }, - { - "epoch": 0.3608865710560626, - "grad_norm": 3.0165700912475586, - "learning_rate": 9.980804020100503e-05, - "loss": 6.4799, - "step": 692 - }, - { - "epoch": 0.3614080834419817, - "grad_norm": 3.0634753704071045, - "learning_rate": 9.980703517587941e-05, - "loss": 6.5751, - "step": 693 - }, - { - "epoch": 0.3619295958279009, - "grad_norm": 2.1592202186584473, - "learning_rate": 9.980603015075377e-05, - "loss": 6.7329, - "step": 694 - }, - { - "epoch": 0.3624511082138201, - "grad_norm": 2.629218101501465, - "learning_rate": 9.980502512562815e-05, - "loss": 6.6906, - "step": 695 - }, - { - "epoch": 0.3629726205997392, - "grad_norm": 2.9310076236724854, - "learning_rate": 9.980402010050252e-05, - "loss": 6.6852, - "step": 696 - }, - { - "epoch": 0.3634941329856584, - "grad_norm": 2.3942654132843018, - "learning_rate": 9.98030150753769e-05, - "loss": 6.7677, - "step": 697 - }, - { - "epoch": 0.3640156453715776, - "grad_norm": 2.7797329425811768, - "learning_rate": 9.980201005025126e-05, - "loss": 6.842, - "step": 698 - }, - { - "epoch": 0.36453715775749673, - "grad_norm": 2.6364879608154297, - "learning_rate": 9.980100502512564e-05, - "loss": 6.7427, - "step": 699 - }, - { - "epoch": 0.3650586701434159, - "grad_norm": 2.9886577129364014, - "learning_rate": 9.98e-05, - "loss": 6.1343, - "step": 700 - }, - { - "epoch": 0.36558018252933505, - "grad_norm": 2.7793991565704346, - "learning_rate": 9.979899497487438e-05, - "loss": 6.3457, - "step": 701 - }, - { - "epoch": 0.36610169491525424, - "grad_norm": 2.1975605487823486, - "learning_rate": 9.979798994974876e-05, - "loss": 6.2639, - "step": 702 - }, - { - "epoch": 0.3666232073011734, - "grad_norm": 2.404585838317871, - "learning_rate": 9.979698492462312e-05, - "loss": 6.3425, - "step": 703 - }, - { - "epoch": 0.36714471968709256, - "grad_norm": 2.1477532386779785, - "learning_rate": 9.97959798994975e-05, - "loss": 6.8087, - "step": 704 - }, - { - "epoch": 0.36766623207301175, - "grad_norm": 1.9459115266799927, - "learning_rate": 9.979497487437186e-05, - "loss": 6.6319, - "step": 705 - }, - { - "epoch": 0.3681877444589309, - "grad_norm": 2.8141002655029297, - "learning_rate": 9.979396984924624e-05, - "loss": 6.683, - "step": 706 - }, - { - "epoch": 0.36870925684485006, - "grad_norm": 2.0927507877349854, - "learning_rate": 9.97929648241206e-05, - "loss": 6.3955, - "step": 707 - }, - { - "epoch": 0.36923076923076925, - "grad_norm": 2.4862911701202393, - "learning_rate": 9.979195979899498e-05, - "loss": 6.1181, - "step": 708 - }, - { - "epoch": 0.3697522816166884, - "grad_norm": 2.4091734886169434, - "learning_rate": 9.979095477386935e-05, - "loss": 6.4157, - "step": 709 - }, - { - "epoch": 0.37027379400260757, - "grad_norm": 2.641993761062622, - "learning_rate": 9.978994974874372e-05, - "loss": 5.8782, - "step": 710 - }, - { - "epoch": 0.3707953063885267, - "grad_norm": 2.108973264694214, - "learning_rate": 9.978894472361809e-05, - "loss": 6.7079, - "step": 711 - }, - { - "epoch": 0.3713168187744459, - "grad_norm": 2.3215339183807373, - "learning_rate": 9.978793969849247e-05, - "loss": 6.3592, - "step": 712 - }, - { - "epoch": 0.3718383311603651, - "grad_norm": 2.543102502822876, - "learning_rate": 9.978693467336684e-05, - "loss": 6.1239, - "step": 713 - }, - { - "epoch": 0.3723598435462842, - "grad_norm": 2.2113935947418213, - "learning_rate": 9.978592964824121e-05, - "loss": 6.6394, - "step": 714 - }, - { - "epoch": 0.3728813559322034, - "grad_norm": 2.406519651412964, - "learning_rate": 9.978492462311559e-05, - "loss": 6.7071, - "step": 715 - }, - { - "epoch": 0.3734028683181226, - "grad_norm": 2.3539154529571533, - "learning_rate": 9.978391959798995e-05, - "loss": 6.1004, - "step": 716 - }, - { - "epoch": 0.3739243807040417, - "grad_norm": 2.1021957397460938, - "learning_rate": 9.978291457286433e-05, - "loss": 6.8711, - "step": 717 - }, - { - "epoch": 0.3744458930899609, - "grad_norm": 2.0883538722991943, - "learning_rate": 9.978190954773869e-05, - "loss": 6.8728, - "step": 718 - }, - { - "epoch": 0.37496740547588003, - "grad_norm": 2.173269748687744, - "learning_rate": 9.978090452261307e-05, - "loss": 6.5409, - "step": 719 - }, - { - "epoch": 0.3754889178617992, - "grad_norm": 2.509430408477783, - "learning_rate": 9.977989949748743e-05, - "loss": 6.9534, - "step": 720 - }, - { - "epoch": 0.3760104302477184, - "grad_norm": 2.534581184387207, - "learning_rate": 9.977889447236181e-05, - "loss": 6.1659, - "step": 721 - }, - { - "epoch": 0.37653194263363754, - "grad_norm": 2.591604709625244, - "learning_rate": 9.977788944723619e-05, - "loss": 6.6487, - "step": 722 - }, - { - "epoch": 0.3770534550195567, - "grad_norm": 2.5615108013153076, - "learning_rate": 9.977688442211057e-05, - "loss": 6.3818, - "step": 723 - }, - { - "epoch": 0.37757496740547586, - "grad_norm": 2.3022994995117188, - "learning_rate": 9.977587939698493e-05, - "loss": 6.3038, - "step": 724 - }, - { - "epoch": 0.37809647979139505, - "grad_norm": 1.9913159608840942, - "learning_rate": 9.977487437185931e-05, - "loss": 6.8417, - "step": 725 - }, - { - "epoch": 0.37861799217731423, - "grad_norm": 2.830613136291504, - "learning_rate": 9.977386934673367e-05, - "loss": 6.3994, - "step": 726 - }, - { - "epoch": 0.37913950456323336, - "grad_norm": 2.2367968559265137, - "learning_rate": 9.977286432160805e-05, - "loss": 5.9519, - "step": 727 - }, - { - "epoch": 0.37966101694915255, - "grad_norm": 2.3432517051696777, - "learning_rate": 9.977185929648242e-05, - "loss": 6.4948, - "step": 728 - }, - { - "epoch": 0.3801825293350717, - "grad_norm": 2.349743127822876, - "learning_rate": 9.977085427135678e-05, - "loss": 6.4056, - "step": 729 - }, - { - "epoch": 0.38070404172099087, - "grad_norm": 1.8101089000701904, - "learning_rate": 9.976984924623116e-05, - "loss": 6.5163, - "step": 730 - }, - { - "epoch": 0.38122555410691006, - "grad_norm": 1.9853712320327759, - "learning_rate": 9.976884422110552e-05, - "loss": 6.7933, - "step": 731 - }, - { - "epoch": 0.3817470664928292, - "grad_norm": 2.36020827293396, - "learning_rate": 9.97678391959799e-05, - "loss": 6.0693, - "step": 732 - }, - { - "epoch": 0.3822685788787484, - "grad_norm": 2.329319477081299, - "learning_rate": 9.976683417085428e-05, - "loss": 6.4357, - "step": 733 - }, - { - "epoch": 0.3827900912646675, - "grad_norm": 2.8170721530914307, - "learning_rate": 9.976582914572866e-05, - "loss": 6.4844, - "step": 734 - }, - { - "epoch": 0.3833116036505867, - "grad_norm": 2.363882303237915, - "learning_rate": 9.976482412060302e-05, - "loss": 6.8889, - "step": 735 - }, - { - "epoch": 0.3838331160365059, - "grad_norm": 2.246117115020752, - "learning_rate": 9.97638190954774e-05, - "loss": 6.4265, - "step": 736 - }, - { - "epoch": 0.384354628422425, - "grad_norm": 2.067037343978882, - "learning_rate": 9.976281407035176e-05, - "loss": 6.5094, - "step": 737 - }, - { - "epoch": 0.3848761408083442, - "grad_norm": 2.13342547416687, - "learning_rate": 9.976180904522614e-05, - "loss": 6.6682, - "step": 738 - }, - { - "epoch": 0.3853976531942634, - "grad_norm": 2.1345455646514893, - "learning_rate": 9.97608040201005e-05, - "loss": 6.6607, - "step": 739 - }, - { - "epoch": 0.3859191655801825, - "grad_norm": 2.126084089279175, - "learning_rate": 9.975979899497488e-05, - "loss": 6.5625, - "step": 740 - }, - { - "epoch": 0.3864406779661017, - "grad_norm": 2.372014284133911, - "learning_rate": 9.975879396984925e-05, - "loss": 6.2901, - "step": 741 - }, - { - "epoch": 0.38696219035202084, - "grad_norm": 2.119297742843628, - "learning_rate": 9.975778894472362e-05, - "loss": 6.8048, - "step": 742 - }, - { - "epoch": 0.38748370273794003, - "grad_norm": 2.1009063720703125, - "learning_rate": 9.9756783919598e-05, - "loss": 6.3845, - "step": 743 - }, - { - "epoch": 0.3880052151238592, - "grad_norm": 2.2610244750976562, - "learning_rate": 9.975577889447237e-05, - "loss": 6.6487, - "step": 744 - }, - { - "epoch": 0.38852672750977835, - "grad_norm": 2.0792105197906494, - "learning_rate": 9.975477386934674e-05, - "loss": 6.3372, - "step": 745 - }, - { - "epoch": 0.38904823989569753, - "grad_norm": 3.173232316970825, - "learning_rate": 9.975376884422111e-05, - "loss": 6.1376, - "step": 746 - }, - { - "epoch": 0.38956975228161667, - "grad_norm": 2.279728651046753, - "learning_rate": 9.975276381909549e-05, - "loss": 6.2877, - "step": 747 - }, - { - "epoch": 0.39009126466753585, - "grad_norm": 2.0728516578674316, - "learning_rate": 9.975175879396985e-05, - "loss": 6.6308, - "step": 748 - }, - { - "epoch": 0.39061277705345504, - "grad_norm": 2.2767817974090576, - "learning_rate": 9.975075376884423e-05, - "loss": 6.4936, - "step": 749 - }, - { - "epoch": 0.39113428943937417, - "grad_norm": 2.400374412536621, - "learning_rate": 9.974974874371859e-05, - "loss": 6.607, - "step": 750 - }, - { - "epoch": 0.39165580182529336, - "grad_norm": 2.1324334144592285, - "learning_rate": 9.974874371859297e-05, - "loss": 6.1019, - "step": 751 - }, - { - "epoch": 0.3921773142112125, - "grad_norm": 2.673335075378418, - "learning_rate": 9.974773869346734e-05, - "loss": 6.3559, - "step": 752 - }, - { - "epoch": 0.3926988265971317, - "grad_norm": 2.1924960613250732, - "learning_rate": 9.974673366834171e-05, - "loss": 6.1355, - "step": 753 - }, - { - "epoch": 0.39322033898305087, - "grad_norm": 3.365873098373413, - "learning_rate": 9.974572864321609e-05, - "loss": 5.9735, - "step": 754 - }, - { - "epoch": 0.39374185136897, - "grad_norm": 2.3410911560058594, - "learning_rate": 9.974472361809046e-05, - "loss": 6.4903, - "step": 755 - }, - { - "epoch": 0.3942633637548892, - "grad_norm": 2.600425958633423, - "learning_rate": 9.974371859296483e-05, - "loss": 6.5502, - "step": 756 - }, - { - "epoch": 0.39478487614080837, - "grad_norm": 2.579627513885498, - "learning_rate": 9.97427135678392e-05, - "loss": 6.8163, - "step": 757 - }, - { - "epoch": 0.3953063885267275, - "grad_norm": 2.5041720867156982, - "learning_rate": 9.974170854271358e-05, - "loss": 6.0078, - "step": 758 - }, - { - "epoch": 0.3958279009126467, - "grad_norm": 2.5352189540863037, - "learning_rate": 9.974070351758794e-05, - "loss": 6.7737, - "step": 759 - }, - { - "epoch": 0.3963494132985658, - "grad_norm": 2.391979455947876, - "learning_rate": 9.973969849246232e-05, - "loss": 6.0156, - "step": 760 - }, - { - "epoch": 0.396870925684485, - "grad_norm": 2.4471120834350586, - "learning_rate": 9.973869346733668e-05, - "loss": 6.498, - "step": 761 - }, - { - "epoch": 0.3973924380704042, - "grad_norm": 2.259601593017578, - "learning_rate": 9.973768844221106e-05, - "loss": 6.6708, - "step": 762 - }, - { - "epoch": 0.39791395045632333, - "grad_norm": 2.0752549171447754, - "learning_rate": 9.973668341708542e-05, - "loss": 6.6007, - "step": 763 - }, - { - "epoch": 0.3984354628422425, - "grad_norm": 1.9034416675567627, - "learning_rate": 9.97356783919598e-05, - "loss": 6.7455, - "step": 764 - }, - { - "epoch": 0.39895697522816165, - "grad_norm": 2.1480019092559814, - "learning_rate": 9.973467336683418e-05, - "loss": 6.7664, - "step": 765 - }, - { - "epoch": 0.39947848761408083, - "grad_norm": 3.482856512069702, - "learning_rate": 9.973366834170856e-05, - "loss": 6.4067, - "step": 766 - }, - { - "epoch": 0.4, - "grad_norm": 4.285748481750488, - "learning_rate": 9.973266331658292e-05, - "loss": 6.2284, - "step": 767 - }, - { - "epoch": 0.40052151238591915, - "grad_norm": 3.017019033432007, - "learning_rate": 9.973165829145729e-05, - "loss": 6.0039, - "step": 768 - }, - { - "epoch": 0.40104302477183834, - "grad_norm": 2.2138030529022217, - "learning_rate": 9.973065326633166e-05, - "loss": 6.7996, - "step": 769 - }, - { - "epoch": 0.4015645371577575, - "grad_norm": 2.1760737895965576, - "learning_rate": 9.972964824120603e-05, - "loss": 5.9331, - "step": 770 - }, - { - "epoch": 0.40208604954367666, - "grad_norm": 2.1760194301605225, - "learning_rate": 9.97286432160804e-05, - "loss": 6.5415, - "step": 771 - }, - { - "epoch": 0.40260756192959585, - "grad_norm": 2.3232998847961426, - "learning_rate": 9.972763819095477e-05, - "loss": 6.1442, - "step": 772 - }, - { - "epoch": 0.403129074315515, - "grad_norm": 2.1887319087982178, - "learning_rate": 9.972663316582915e-05, - "loss": 6.4638, - "step": 773 - }, - { - "epoch": 0.40365058670143417, - "grad_norm": 2.012146234512329, - "learning_rate": 9.972562814070353e-05, - "loss": 6.5892, - "step": 774 - }, - { - "epoch": 0.4041720990873533, - "grad_norm": 2.053077220916748, - "learning_rate": 9.97246231155779e-05, - "loss": 6.6057, - "step": 775 - }, - { - "epoch": 0.4046936114732725, - "grad_norm": 2.199997901916504, - "learning_rate": 9.972361809045227e-05, - "loss": 6.4031, - "step": 776 - }, - { - "epoch": 0.4052151238591917, - "grad_norm": 2.076964855194092, - "learning_rate": 9.972261306532665e-05, - "loss": 6.6246, - "step": 777 - }, - { - "epoch": 0.4057366362451108, - "grad_norm": 2.625805377960205, - "learning_rate": 9.972160804020101e-05, - "loss": 5.8286, - "step": 778 - }, - { - "epoch": 0.40625814863103, - "grad_norm": 2.041363477706909, - "learning_rate": 9.972060301507539e-05, - "loss": 6.7248, - "step": 779 - }, - { - "epoch": 0.4067796610169492, - "grad_norm": 2.3348333835601807, - "learning_rate": 9.971959798994975e-05, - "loss": 6.7389, - "step": 780 - }, - { - "epoch": 0.4073011734028683, - "grad_norm": 2.599583625793457, - "learning_rate": 9.971859296482412e-05, - "loss": 6.1583, - "step": 781 - }, - { - "epoch": 0.4078226857887875, - "grad_norm": 2.1481375694274902, - "learning_rate": 9.97175879396985e-05, - "loss": 6.4425, - "step": 782 - }, - { - "epoch": 0.40834419817470663, - "grad_norm": 2.2883105278015137, - "learning_rate": 9.971658291457286e-05, - "loss": 6.2921, - "step": 783 - }, - { - "epoch": 0.4088657105606258, - "grad_norm": 2.9295544624328613, - "learning_rate": 9.971557788944724e-05, - "loss": 6.6697, - "step": 784 - }, - { - "epoch": 0.409387222946545, - "grad_norm": 3.041402816772461, - "learning_rate": 9.971457286432161e-05, - "loss": 6.5913, - "step": 785 - }, - { - "epoch": 0.40990873533246414, - "grad_norm": 2.110576868057251, - "learning_rate": 9.971356783919599e-05, - "loss": 6.7166, - "step": 786 - }, - { - "epoch": 0.4104302477183833, - "grad_norm": 2.453145980834961, - "learning_rate": 9.971256281407036e-05, - "loss": 6.4385, - "step": 787 - }, - { - "epoch": 0.41095176010430245, - "grad_norm": 2.2735378742218018, - "learning_rate": 9.971155778894473e-05, - "loss": 6.2105, - "step": 788 - }, - { - "epoch": 0.41147327249022164, - "grad_norm": 2.40952205657959, - "learning_rate": 9.97105527638191e-05, - "loss": 6.975, - "step": 789 - }, - { - "epoch": 0.41199478487614083, - "grad_norm": 2.142829418182373, - "learning_rate": 9.970954773869348e-05, - "loss": 6.33, - "step": 790 - }, - { - "epoch": 0.41251629726205996, - "grad_norm": 2.208801031112671, - "learning_rate": 9.970854271356784e-05, - "loss": 6.5763, - "step": 791 - }, - { - "epoch": 0.41303780964797915, - "grad_norm": 2.2279272079467773, - "learning_rate": 9.970753768844222e-05, - "loss": 6.7193, - "step": 792 - }, - { - "epoch": 0.4135593220338983, - "grad_norm": 2.445939064025879, - "learning_rate": 9.970653266331658e-05, - "loss": 5.6927, - "step": 793 - }, - { - "epoch": 0.41408083441981747, - "grad_norm": 2.1655194759368896, - "learning_rate": 9.970552763819096e-05, - "loss": 6.53, - "step": 794 - }, - { - "epoch": 0.41460234680573665, - "grad_norm": 2.0326120853424072, - "learning_rate": 9.970452261306534e-05, - "loss": 6.6162, - "step": 795 - }, - { - "epoch": 0.4151238591916558, - "grad_norm": 2.154041290283203, - "learning_rate": 9.97035175879397e-05, - "loss": 6.1554, - "step": 796 - }, - { - "epoch": 0.415645371577575, - "grad_norm": 2.786252021789551, - "learning_rate": 9.970251256281408e-05, - "loss": 6.5729, - "step": 797 - }, - { - "epoch": 0.4161668839634941, - "grad_norm": 2.426708936691284, - "learning_rate": 9.970150753768844e-05, - "loss": 6.3655, - "step": 798 - }, - { - "epoch": 0.4166883963494133, - "grad_norm": 2.152690887451172, - "learning_rate": 9.970050251256282e-05, - "loss": 6.4706, - "step": 799 - }, - { - "epoch": 0.4172099087353325, - "grad_norm": 2.748511791229248, - "learning_rate": 9.969949748743719e-05, - "loss": 5.6839, - "step": 800 - }, - { - "epoch": 0.4177314211212516, - "grad_norm": 2.1645658016204834, - "learning_rate": 9.969849246231156e-05, - "loss": 6.258, - "step": 801 - }, - { - "epoch": 0.4182529335071708, - "grad_norm": 1.7470781803131104, - "learning_rate": 9.969748743718593e-05, - "loss": 6.6934, - "step": 802 - }, - { - "epoch": 0.41877444589309, - "grad_norm": 2.1566150188446045, - "learning_rate": 9.96964824120603e-05, - "loss": 6.4163, - "step": 803 - }, - { - "epoch": 0.4192959582790091, - "grad_norm": 2.085911750793457, - "learning_rate": 9.969547738693467e-05, - "loss": 6.4241, - "step": 804 - }, - { - "epoch": 0.4198174706649283, - "grad_norm": 2.668271780014038, - "learning_rate": 9.969447236180905e-05, - "loss": 6.0993, - "step": 805 - }, - { - "epoch": 0.42033898305084744, - "grad_norm": 2.191254138946533, - "learning_rate": 9.969346733668343e-05, - "loss": 6.7284, - "step": 806 - }, - { - "epoch": 0.4208604954367666, - "grad_norm": 2.0428121089935303, - "learning_rate": 9.969246231155779e-05, - "loss": 6.4521, - "step": 807 - }, - { - "epoch": 0.4213820078226858, - "grad_norm": 2.4796793460845947, - "learning_rate": 9.969145728643217e-05, - "loss": 6.4149, - "step": 808 - }, - { - "epoch": 0.42190352020860494, - "grad_norm": 1.9883676767349243, - "learning_rate": 9.969045226130653e-05, - "loss": 6.7675, - "step": 809 - }, - { - "epoch": 0.42242503259452413, - "grad_norm": 2.1834592819213867, - "learning_rate": 9.968944723618091e-05, - "loss": 6.7581, - "step": 810 - }, - { - "epoch": 0.42294654498044326, - "grad_norm": 2.4649016857147217, - "learning_rate": 9.968844221105527e-05, - "loss": 6.3115, - "step": 811 - }, - { - "epoch": 0.42346805736636245, - "grad_norm": 2.1063711643218994, - "learning_rate": 9.968743718592965e-05, - "loss": 6.4236, - "step": 812 - }, - { - "epoch": 0.42398956975228164, - "grad_norm": 2.2991364002227783, - "learning_rate": 9.968643216080402e-05, - "loss": 6.5763, - "step": 813 - }, - { - "epoch": 0.42451108213820077, - "grad_norm": 2.2292230129241943, - "learning_rate": 9.96854271356784e-05, - "loss": 5.647, - "step": 814 - }, - { - "epoch": 0.42503259452411996, - "grad_norm": 2.248950481414795, - "learning_rate": 9.968442211055277e-05, - "loss": 6.0973, - "step": 815 - }, - { - "epoch": 0.4255541069100391, - "grad_norm": 2.0685508251190186, - "learning_rate": 9.968341708542715e-05, - "loss": 6.6272, - "step": 816 - }, - { - "epoch": 0.4260756192959583, - "grad_norm": 2.3320984840393066, - "learning_rate": 9.968241206030151e-05, - "loss": 6.0772, - "step": 817 - }, - { - "epoch": 0.42659713168187746, - "grad_norm": 2.918611526489258, - "learning_rate": 9.968140703517589e-05, - "loss": 6.1137, - "step": 818 - }, - { - "epoch": 0.4271186440677966, - "grad_norm": 2.216487169265747, - "learning_rate": 9.968040201005026e-05, - "loss": 6.4165, - "step": 819 - }, - { - "epoch": 0.4276401564537158, - "grad_norm": 2.4474472999572754, - "learning_rate": 9.967939698492463e-05, - "loss": 6.4032, - "step": 820 - }, - { - "epoch": 0.42816166883963497, - "grad_norm": 2.467963218688965, - "learning_rate": 9.9678391959799e-05, - "loss": 6.5711, - "step": 821 - }, - { - "epoch": 0.4286831812255541, - "grad_norm": 2.1296226978302, - "learning_rate": 9.967738693467336e-05, - "loss": 6.7074, - "step": 822 - }, - { - "epoch": 0.4292046936114733, - "grad_norm": 1.93131685256958, - "learning_rate": 9.967638190954774e-05, - "loss": 6.6004, - "step": 823 - }, - { - "epoch": 0.4297262059973924, - "grad_norm": 2.0198779106140137, - "learning_rate": 9.96753768844221e-05, - "loss": 6.3437, - "step": 824 - }, - { - "epoch": 0.4302477183833116, - "grad_norm": 2.828876495361328, - "learning_rate": 9.967437185929648e-05, - "loss": 6.3619, - "step": 825 - }, - { - "epoch": 0.4307692307692308, - "grad_norm": 2.0243163108825684, - "learning_rate": 9.967336683417086e-05, - "loss": 6.6018, - "step": 826 - }, - { - "epoch": 0.4312907431551499, - "grad_norm": 2.898697853088379, - "learning_rate": 9.967236180904524e-05, - "loss": 6.3946, - "step": 827 - }, - { - "epoch": 0.4318122555410691, - "grad_norm": 2.850396156311035, - "learning_rate": 9.96713567839196e-05, - "loss": 6.2984, - "step": 828 - }, - { - "epoch": 0.43233376792698824, - "grad_norm": 2.146773099899292, - "learning_rate": 9.967035175879398e-05, - "loss": 6.1838, - "step": 829 - }, - { - "epoch": 0.43285528031290743, - "grad_norm": 2.1175990104675293, - "learning_rate": 9.966934673366835e-05, - "loss": 6.2523, - "step": 830 - }, - { - "epoch": 0.4333767926988266, - "grad_norm": 2.2733535766601562, - "learning_rate": 9.966834170854272e-05, - "loss": 6.0775, - "step": 831 - }, - { - "epoch": 0.43389830508474575, - "grad_norm": 2.080181360244751, - "learning_rate": 9.966733668341709e-05, - "loss": 6.4696, - "step": 832 - }, - { - "epoch": 0.43441981747066494, - "grad_norm": 2.3921234607696533, - "learning_rate": 9.966633165829147e-05, - "loss": 6.4152, - "step": 833 - }, - { - "epoch": 0.43494132985658407, - "grad_norm": 2.218611478805542, - "learning_rate": 9.966532663316583e-05, - "loss": 6.0767, - "step": 834 - }, - { - "epoch": 0.43546284224250326, - "grad_norm": 2.674525737762451, - "learning_rate": 9.966432160804021e-05, - "loss": 6.377, - "step": 835 - }, - { - "epoch": 0.43598435462842244, - "grad_norm": 2.659274101257324, - "learning_rate": 9.966331658291458e-05, - "loss": 6.121, - "step": 836 - }, - { - "epoch": 0.4365058670143416, - "grad_norm": 2.6279726028442383, - "learning_rate": 9.966231155778895e-05, - "loss": 6.1335, - "step": 837 - }, - { - "epoch": 0.43702737940026076, - "grad_norm": 2.000652551651001, - "learning_rate": 9.966130653266333e-05, - "loss": 6.4938, - "step": 838 - }, - { - "epoch": 0.4375488917861799, - "grad_norm": 1.9366097450256348, - "learning_rate": 9.966030150753769e-05, - "loss": 6.5206, - "step": 839 - }, - { - "epoch": 0.4380704041720991, - "grad_norm": 2.2027428150177, - "learning_rate": 9.965929648241207e-05, - "loss": 6.7759, - "step": 840 - }, - { - "epoch": 0.43859191655801827, - "grad_norm": 2.574753522872925, - "learning_rate": 9.965829145728643e-05, - "loss": 6.3747, - "step": 841 - }, - { - "epoch": 0.4391134289439374, - "grad_norm": 1.8134504556655884, - "learning_rate": 9.965728643216081e-05, - "loss": 6.6625, - "step": 842 - }, - { - "epoch": 0.4396349413298566, - "grad_norm": 2.208388328552246, - "learning_rate": 9.965628140703518e-05, - "loss": 6.28, - "step": 843 - }, - { - "epoch": 0.4401564537157758, - "grad_norm": 2.0692834854125977, - "learning_rate": 9.965527638190955e-05, - "loss": 6.2188, - "step": 844 - }, - { - "epoch": 0.4406779661016949, - "grad_norm": 2.3833465576171875, - "learning_rate": 9.965427135678392e-05, - "loss": 6.6833, - "step": 845 - }, - { - "epoch": 0.4411994784876141, - "grad_norm": 2.536780834197998, - "learning_rate": 9.96532663316583e-05, - "loss": 6.4912, - "step": 846 - }, - { - "epoch": 0.4417209908735332, - "grad_norm": 2.005906343460083, - "learning_rate": 9.965226130653267e-05, - "loss": 6.3401, - "step": 847 - }, - { - "epoch": 0.4422425032594524, - "grad_norm": 2.355052947998047, - "learning_rate": 9.965125628140704e-05, - "loss": 6.5562, - "step": 848 - }, - { - "epoch": 0.4427640156453716, - "grad_norm": 2.3538742065429688, - "learning_rate": 9.965025125628142e-05, - "loss": 6.5453, - "step": 849 - }, - { - "epoch": 0.44328552803129073, - "grad_norm": 1.905880093574524, - "learning_rate": 9.964924623115578e-05, - "loss": 6.6314, - "step": 850 - }, - { - "epoch": 0.4438070404172099, - "grad_norm": 2.212345600128174, - "learning_rate": 9.964824120603016e-05, - "loss": 5.8242, - "step": 851 - }, - { - "epoch": 0.44432855280312905, - "grad_norm": 2.20920991897583, - "learning_rate": 9.964723618090452e-05, - "loss": 6.6775, - "step": 852 - }, - { - "epoch": 0.44485006518904824, - "grad_norm": 2.1129469871520996, - "learning_rate": 9.96462311557789e-05, - "loss": 6.4668, - "step": 853 - }, - { - "epoch": 0.4453715775749674, - "grad_norm": 1.9672009944915771, - "learning_rate": 9.964522613065326e-05, - "loss": 6.4218, - "step": 854 - }, - { - "epoch": 0.44589308996088656, - "grad_norm": 2.4036476612091064, - "learning_rate": 9.964422110552764e-05, - "loss": 6.1991, - "step": 855 - }, - { - "epoch": 0.44641460234680574, - "grad_norm": 2.628465414047241, - "learning_rate": 9.964321608040202e-05, - "loss": 6.1609, - "step": 856 - }, - { - "epoch": 0.4469361147327249, - "grad_norm": 2.142796039581299, - "learning_rate": 9.96422110552764e-05, - "loss": 6.3854, - "step": 857 - }, - { - "epoch": 0.44745762711864406, - "grad_norm": 2.1347718238830566, - "learning_rate": 9.964120603015076e-05, - "loss": 6.6483, - "step": 858 - }, - { - "epoch": 0.44797913950456325, - "grad_norm": 1.9581797122955322, - "learning_rate": 9.964020100502514e-05, - "loss": 6.5637, - "step": 859 - }, - { - "epoch": 0.4485006518904824, - "grad_norm": 2.314955472946167, - "learning_rate": 9.96391959798995e-05, - "loss": 5.8771, - "step": 860 - }, - { - "epoch": 0.44902216427640157, - "grad_norm": 2.3754324913024902, - "learning_rate": 9.963819095477387e-05, - "loss": 6.4736, - "step": 861 - }, - { - "epoch": 0.44954367666232076, - "grad_norm": 2.0141539573669434, - "learning_rate": 9.963718592964825e-05, - "loss": 5.9831, - "step": 862 - }, - { - "epoch": 0.4500651890482399, - "grad_norm": 2.220392942428589, - "learning_rate": 9.963618090452261e-05, - "loss": 6.4879, - "step": 863 - }, - { - "epoch": 0.4505867014341591, - "grad_norm": 2.6271748542785645, - "learning_rate": 9.963517587939699e-05, - "loss": 6.0724, - "step": 864 - }, - { - "epoch": 0.4511082138200782, - "grad_norm": 2.093454360961914, - "learning_rate": 9.963417085427135e-05, - "loss": 6.3358, - "step": 865 - }, - { - "epoch": 0.4516297262059974, - "grad_norm": 2.137228012084961, - "learning_rate": 9.963316582914573e-05, - "loss": 6.5176, - "step": 866 - }, - { - "epoch": 0.4521512385919166, - "grad_norm": 2.132906436920166, - "learning_rate": 9.963216080402011e-05, - "loss": 6.3885, - "step": 867 - }, - { - "epoch": 0.4526727509778357, - "grad_norm": 1.9319450855255127, - "learning_rate": 9.963115577889449e-05, - "loss": 6.6291, - "step": 868 - }, - { - "epoch": 0.4531942633637549, - "grad_norm": 2.303853750228882, - "learning_rate": 9.963015075376885e-05, - "loss": 6.2823, - "step": 869 - }, - { - "epoch": 0.45371577574967403, - "grad_norm": 2.088036298751831, - "learning_rate": 9.962914572864323e-05, - "loss": 6.4187, - "step": 870 - }, - { - "epoch": 0.4542372881355932, - "grad_norm": 1.9422059059143066, - "learning_rate": 9.962814070351759e-05, - "loss": 6.2571, - "step": 871 - }, - { - "epoch": 0.4547588005215124, - "grad_norm": 2.202895164489746, - "learning_rate": 9.962713567839197e-05, - "loss": 6.3058, - "step": 872 - }, - { - "epoch": 0.45528031290743154, - "grad_norm": 3.0498602390289307, - "learning_rate": 9.962613065326633e-05, - "loss": 6.3952, - "step": 873 - }, - { - "epoch": 0.4558018252933507, - "grad_norm": 2.319120168685913, - "learning_rate": 9.96251256281407e-05, - "loss": 6.1164, - "step": 874 - }, - { - "epoch": 0.45632333767926986, - "grad_norm": 2.3087847232818604, - "learning_rate": 9.962412060301508e-05, - "loss": 5.9293, - "step": 875 - }, - { - "epoch": 0.45684485006518905, - "grad_norm": 2.0732786655426025, - "learning_rate": 9.962311557788945e-05, - "loss": 6.2632, - "step": 876 - }, - { - "epoch": 0.45736636245110823, - "grad_norm": 2.2241642475128174, - "learning_rate": 9.962211055276383e-05, - "loss": 6.4525, - "step": 877 - }, - { - "epoch": 0.45788787483702736, - "grad_norm": 1.9518303871154785, - "learning_rate": 9.96211055276382e-05, - "loss": 5.958, - "step": 878 - }, - { - "epoch": 0.45840938722294655, - "grad_norm": 2.0389230251312256, - "learning_rate": 9.962010050251257e-05, - "loss": 6.6528, - "step": 879 - }, - { - "epoch": 0.4589308996088657, - "grad_norm": 2.852085590362549, - "learning_rate": 9.961909547738694e-05, - "loss": 5.5762, - "step": 880 - }, - { - "epoch": 0.45945241199478487, - "grad_norm": 2.5660274028778076, - "learning_rate": 9.961809045226132e-05, - "loss": 6.5803, - "step": 881 - }, - { - "epoch": 0.45997392438070406, - "grad_norm": 2.298530340194702, - "learning_rate": 9.961708542713568e-05, - "loss": 6.46, - "step": 882 - }, - { - "epoch": 0.4604954367666232, - "grad_norm": 2.127445697784424, - "learning_rate": 9.961608040201006e-05, - "loss": 6.4486, - "step": 883 - }, - { - "epoch": 0.4610169491525424, - "grad_norm": 1.897956132888794, - "learning_rate": 9.961507537688442e-05, - "loss": 6.7523, - "step": 884 - }, - { - "epoch": 0.46153846153846156, - "grad_norm": 4.019052028656006, - "learning_rate": 9.96140703517588e-05, - "loss": 6.1086, - "step": 885 - }, - { - "epoch": 0.4620599739243807, - "grad_norm": 1.951799988746643, - "learning_rate": 9.961306532663316e-05, - "loss": 6.7259, - "step": 886 - }, - { - "epoch": 0.4625814863102999, - "grad_norm": 1.9468040466308594, - "learning_rate": 9.961206030150754e-05, - "loss": 6.6451, - "step": 887 - }, - { - "epoch": 0.463102998696219, - "grad_norm": 2.5095787048339844, - "learning_rate": 9.961105527638192e-05, - "loss": 5.9871, - "step": 888 - }, - { - "epoch": 0.4636245110821382, - "grad_norm": 2.0893378257751465, - "learning_rate": 9.961005025125628e-05, - "loss": 6.3916, - "step": 889 - }, - { - "epoch": 0.4641460234680574, - "grad_norm": 2.000333070755005, - "learning_rate": 9.960904522613066e-05, - "loss": 6.5248, - "step": 890 - }, - { - "epoch": 0.4646675358539765, - "grad_norm": 2.454456090927124, - "learning_rate": 9.960804020100503e-05, - "loss": 6.4069, - "step": 891 - }, - { - "epoch": 0.4651890482398957, - "grad_norm": 2.1235005855560303, - "learning_rate": 9.96070351758794e-05, - "loss": 6.4154, - "step": 892 - }, - { - "epoch": 0.46571056062581484, - "grad_norm": 2.0405213832855225, - "learning_rate": 9.960603015075377e-05, - "loss": 6.624, - "step": 893 - }, - { - "epoch": 0.466232073011734, - "grad_norm": 2.1271770000457764, - "learning_rate": 9.960502512562815e-05, - "loss": 6.5836, - "step": 894 - }, - { - "epoch": 0.4667535853976532, - "grad_norm": 2.0283665657043457, - "learning_rate": 9.960402010050251e-05, - "loss": 6.6333, - "step": 895 - }, - { - "epoch": 0.46727509778357235, - "grad_norm": 3.0277304649353027, - "learning_rate": 9.960301507537689e-05, - "loss": 6.4652, - "step": 896 - }, - { - "epoch": 0.46779661016949153, - "grad_norm": 3.6672751903533936, - "learning_rate": 9.960201005025127e-05, - "loss": 6.3166, - "step": 897 - }, - { - "epoch": 0.46831812255541067, - "grad_norm": 2.3754241466522217, - "learning_rate": 9.960100502512564e-05, - "loss": 6.6083, - "step": 898 - }, - { - "epoch": 0.46883963494132985, - "grad_norm": 2.811269998550415, - "learning_rate": 9.960000000000001e-05, - "loss": 6.2006, - "step": 899 - }, - { - "epoch": 0.46936114732724904, - "grad_norm": 1.9116339683532715, - "learning_rate": 9.959899497487437e-05, - "loss": 6.7957, - "step": 900 - }, - { - "epoch": 0.46988265971316817, - "grad_norm": 1.9986357688903809, - "learning_rate": 9.959798994974875e-05, - "loss": 6.3889, - "step": 901 - }, - { - "epoch": 0.47040417209908736, - "grad_norm": 1.9240593910217285, - "learning_rate": 9.959698492462312e-05, - "loss": 6.5054, - "step": 902 - }, - { - "epoch": 0.47092568448500655, - "grad_norm": 1.8461600542068481, - "learning_rate": 9.959597989949749e-05, - "loss": 6.5647, - "step": 903 - }, - { - "epoch": 0.4714471968709257, - "grad_norm": 1.726055383682251, - "learning_rate": 9.959497487437186e-05, - "loss": 6.3581, - "step": 904 - }, - { - "epoch": 0.47196870925684486, - "grad_norm": 1.9957642555236816, - "learning_rate": 9.959396984924623e-05, - "loss": 6.472, - "step": 905 - }, - { - "epoch": 0.472490221642764, - "grad_norm": 1.817854404449463, - "learning_rate": 9.95929648241206e-05, - "loss": 6.4174, - "step": 906 - }, - { - "epoch": 0.4730117340286832, - "grad_norm": 2.026216506958008, - "learning_rate": 9.959195979899498e-05, - "loss": 6.4387, - "step": 907 - }, - { - "epoch": 0.47353324641460237, - "grad_norm": 1.8881945610046387, - "learning_rate": 9.959095477386935e-05, - "loss": 6.5739, - "step": 908 - }, - { - "epoch": 0.4740547588005215, - "grad_norm": 2.0653135776519775, - "learning_rate": 9.958994974874373e-05, - "loss": 6.6318, - "step": 909 - }, - { - "epoch": 0.4745762711864407, - "grad_norm": 2.2507307529449463, - "learning_rate": 9.95889447236181e-05, - "loss": 6.3731, - "step": 910 - }, - { - "epoch": 0.4750977835723598, - "grad_norm": 2.0260534286499023, - "learning_rate": 9.958793969849247e-05, - "loss": 6.1886, - "step": 911 - }, - { - "epoch": 0.475619295958279, - "grad_norm": 2.2879021167755127, - "learning_rate": 9.958693467336684e-05, - "loss": 5.9656, - "step": 912 - }, - { - "epoch": 0.4761408083441982, - "grad_norm": 1.8898496627807617, - "learning_rate": 9.958592964824122e-05, - "loss": 6.4944, - "step": 913 - }, - { - "epoch": 0.47666232073011733, - "grad_norm": 1.9814594984054565, - "learning_rate": 9.958492462311558e-05, - "loss": 6.361, - "step": 914 - }, - { - "epoch": 0.4771838331160365, - "grad_norm": 2.2697174549102783, - "learning_rate": 9.958391959798995e-05, - "loss": 6.4851, - "step": 915 - }, - { - "epoch": 0.47770534550195565, - "grad_norm": 2.321835517883301, - "learning_rate": 9.958291457286432e-05, - "loss": 5.9252, - "step": 916 - }, - { - "epoch": 0.47822685788787483, - "grad_norm": 2.1413791179656982, - "learning_rate": 9.958190954773869e-05, - "loss": 6.5192, - "step": 917 - }, - { - "epoch": 0.478748370273794, - "grad_norm": 2.2743680477142334, - "learning_rate": 9.958090452261307e-05, - "loss": 6.183, - "step": 918 - }, - { - "epoch": 0.47926988265971315, - "grad_norm": 2.1204566955566406, - "learning_rate": 9.957989949748744e-05, - "loss": 6.2142, - "step": 919 - }, - { - "epoch": 0.47979139504563234, - "grad_norm": 2.070068836212158, - "learning_rate": 9.957889447236182e-05, - "loss": 6.7194, - "step": 920 - }, - { - "epoch": 0.4803129074315515, - "grad_norm": 2.5954298973083496, - "learning_rate": 9.957788944723619e-05, - "loss": 5.7381, - "step": 921 - }, - { - "epoch": 0.48083441981747066, - "grad_norm": 1.9493515491485596, - "learning_rate": 9.957688442211056e-05, - "loss": 6.4862, - "step": 922 - }, - { - "epoch": 0.48135593220338985, - "grad_norm": 2.2104249000549316, - "learning_rate": 9.957587939698493e-05, - "loss": 6.1622, - "step": 923 - }, - { - "epoch": 0.481877444589309, - "grad_norm": 2.772524118423462, - "learning_rate": 9.95748743718593e-05, - "loss": 5.6473, - "step": 924 - }, - { - "epoch": 0.48239895697522817, - "grad_norm": 2.035775661468506, - "learning_rate": 9.957386934673367e-05, - "loss": 6.0676, - "step": 925 - }, - { - "epoch": 0.48292046936114735, - "grad_norm": 2.280177354812622, - "learning_rate": 9.957286432160805e-05, - "loss": 6.2864, - "step": 926 - }, - { - "epoch": 0.4834419817470665, - "grad_norm": 1.8228402137756348, - "learning_rate": 9.957185929648241e-05, - "loss": 6.6061, - "step": 927 - }, - { - "epoch": 0.48396349413298567, - "grad_norm": 2.4340789318084717, - "learning_rate": 9.957085427135679e-05, - "loss": 6.4215, - "step": 928 - }, - { - "epoch": 0.4844850065189048, - "grad_norm": 1.9269063472747803, - "learning_rate": 9.956984924623117e-05, - "loss": 6.2029, - "step": 929 - }, - { - "epoch": 0.485006518904824, - "grad_norm": 2.2546534538269043, - "learning_rate": 9.956884422110553e-05, - "loss": 6.2769, - "step": 930 - }, - { - "epoch": 0.4855280312907432, - "grad_norm": 1.8607932329177856, - "learning_rate": 9.956783919597991e-05, - "loss": 6.6443, - "step": 931 - }, - { - "epoch": 0.4860495436766623, - "grad_norm": 1.9565026760101318, - "learning_rate": 9.956683417085427e-05, - "loss": 6.5523, - "step": 932 - }, - { - "epoch": 0.4865710560625815, - "grad_norm": 1.8473814725875854, - "learning_rate": 9.956582914572865e-05, - "loss": 6.6484, - "step": 933 - }, - { - "epoch": 0.48709256844850063, - "grad_norm": 2.0442159175872803, - "learning_rate": 9.956482412060302e-05, - "loss": 6.6033, - "step": 934 - }, - { - "epoch": 0.4876140808344198, - "grad_norm": 1.9316771030426025, - "learning_rate": 9.95638190954774e-05, - "loss": 6.2141, - "step": 935 - }, - { - "epoch": 0.488135593220339, - "grad_norm": 1.9396896362304688, - "learning_rate": 9.956281407035176e-05, - "loss": 6.6897, - "step": 936 - }, - { - "epoch": 0.48865710560625814, - "grad_norm": 1.804550290107727, - "learning_rate": 9.956180904522614e-05, - "loss": 6.5316, - "step": 937 - }, - { - "epoch": 0.4891786179921773, - "grad_norm": 2.3102989196777344, - "learning_rate": 9.95608040201005e-05, - "loss": 6.467, - "step": 938 - }, - { - "epoch": 0.48970013037809645, - "grad_norm": 1.9410513639450073, - "learning_rate": 9.955979899497488e-05, - "loss": 6.698, - "step": 939 - }, - { - "epoch": 0.49022164276401564, - "grad_norm": 1.9564518928527832, - "learning_rate": 9.955879396984926e-05, - "loss": 6.4822, - "step": 940 - }, - { - "epoch": 0.49074315514993483, - "grad_norm": 1.8969272375106812, - "learning_rate": 9.955778894472362e-05, - "loss": 6.2348, - "step": 941 - }, - { - "epoch": 0.49126466753585396, - "grad_norm": 2.329587697982788, - "learning_rate": 9.9556783919598e-05, - "loss": 6.0454, - "step": 942 - }, - { - "epoch": 0.49178617992177315, - "grad_norm": 1.7472556829452515, - "learning_rate": 9.955577889447236e-05, - "loss": 6.4282, - "step": 943 - }, - { - "epoch": 0.49230769230769234, - "grad_norm": 1.8456318378448486, - "learning_rate": 9.955477386934674e-05, - "loss": 6.3436, - "step": 944 - }, - { - "epoch": 0.49282920469361147, - "grad_norm": 2.0255699157714844, - "learning_rate": 9.95537688442211e-05, - "loss": 6.1632, - "step": 945 - }, - { - "epoch": 0.49335071707953065, - "grad_norm": 2.161907911300659, - "learning_rate": 9.955276381909548e-05, - "loss": 6.2036, - "step": 946 - }, - { - "epoch": 0.4938722294654498, - "grad_norm": 2.113365411758423, - "learning_rate": 9.955175879396985e-05, - "loss": 6.1591, - "step": 947 - }, - { - "epoch": 0.494393741851369, - "grad_norm": 2.2957074642181396, - "learning_rate": 9.955075376884422e-05, - "loss": 6.2229, - "step": 948 - }, - { - "epoch": 0.49491525423728816, - "grad_norm": 2.9717864990234375, - "learning_rate": 9.95497487437186e-05, - "loss": 6.6651, - "step": 949 - }, - { - "epoch": 0.4954367666232073, - "grad_norm": 1.9626245498657227, - "learning_rate": 9.954874371859298e-05, - "loss": 6.6075, - "step": 950 - }, - { - "epoch": 0.4959582790091265, - "grad_norm": 3.1651618480682373, - "learning_rate": 9.954773869346734e-05, - "loss": 6.1353, - "step": 951 - }, - { - "epoch": 0.4964797913950456, - "grad_norm": 2.3175432682037354, - "learning_rate": 9.954673366834172e-05, - "loss": 6.5026, - "step": 952 - }, - { - "epoch": 0.4970013037809648, - "grad_norm": 2.5663411617279053, - "learning_rate": 9.954572864321609e-05, - "loss": 6.6599, - "step": 953 - }, - { - "epoch": 0.497522816166884, - "grad_norm": 2.2984795570373535, - "learning_rate": 9.954472361809045e-05, - "loss": 6.636, - "step": 954 - }, - { - "epoch": 0.4980443285528031, - "grad_norm": 2.301086664199829, - "learning_rate": 9.954371859296483e-05, - "loss": 6.476, - "step": 955 - }, - { - "epoch": 0.4985658409387223, - "grad_norm": 2.7066967487335205, - "learning_rate": 9.954271356783919e-05, - "loss": 6.0005, - "step": 956 - }, - { - "epoch": 0.49908735332464144, - "grad_norm": 2.74013352394104, - "learning_rate": 9.954170854271357e-05, - "loss": 6.6643, - "step": 957 - }, - { - "epoch": 0.4996088657105606, - "grad_norm": 1.9838446378707886, - "learning_rate": 9.954070351758793e-05, - "loss": 6.4186, - "step": 958 - }, - { - "epoch": 0.5001303780964798, - "grad_norm": 2.4673357009887695, - "learning_rate": 9.953969849246231e-05, - "loss": 6.5595, - "step": 959 - }, - { - "epoch": 0.500651890482399, - "grad_norm": 2.108759880065918, - "learning_rate": 9.953869346733669e-05, - "loss": 6.5914, - "step": 960 - }, - { - "epoch": 0.5011734028683181, - "grad_norm": 2.391068696975708, - "learning_rate": 9.953768844221107e-05, - "loss": 6.661, - "step": 961 - }, - { - "epoch": 0.5016949152542373, - "grad_norm": 2.1729373931884766, - "learning_rate": 9.953668341708543e-05, - "loss": 6.1647, - "step": 962 - }, - { - "epoch": 0.5022164276401565, - "grad_norm": 2.26346755027771, - "learning_rate": 9.953567839195981e-05, - "loss": 6.261, - "step": 963 - }, - { - "epoch": 0.5027379400260756, - "grad_norm": 2.5807383060455322, - "learning_rate": 9.953467336683417e-05, - "loss": 6.1459, - "step": 964 - }, - { - "epoch": 0.5032594524119948, - "grad_norm": 2.2451083660125732, - "learning_rate": 9.953366834170855e-05, - "loss": 6.489, - "step": 965 - }, - { - "epoch": 0.5037809647979139, - "grad_norm": 2.171003580093384, - "learning_rate": 9.953266331658292e-05, - "loss": 6.6078, - "step": 966 - }, - { - "epoch": 0.5043024771838331, - "grad_norm": 3.1965880393981934, - "learning_rate": 9.953165829145728e-05, - "loss": 5.5818, - "step": 967 - }, - { - "epoch": 0.5048239895697523, - "grad_norm": 2.145333766937256, - "learning_rate": 9.953065326633166e-05, - "loss": 6.2235, - "step": 968 - }, - { - "epoch": 0.5053455019556714, - "grad_norm": 2.572251558303833, - "learning_rate": 9.952964824120604e-05, - "loss": 6.2302, - "step": 969 - }, - { - "epoch": 0.5058670143415906, - "grad_norm": 2.1392300128936768, - "learning_rate": 9.952864321608041e-05, - "loss": 6.6791, - "step": 970 - }, - { - "epoch": 0.5063885267275098, - "grad_norm": 2.185953140258789, - "learning_rate": 9.952763819095478e-05, - "loss": 6.5173, - "step": 971 - }, - { - "epoch": 0.5069100391134289, - "grad_norm": 2.472808361053467, - "learning_rate": 9.952663316582916e-05, - "loss": 6.738, - "step": 972 - }, - { - "epoch": 0.5074315514993482, - "grad_norm": 2.662168025970459, - "learning_rate": 9.952562814070352e-05, - "loss": 6.2035, - "step": 973 - }, - { - "epoch": 0.5079530638852673, - "grad_norm": 2.2262117862701416, - "learning_rate": 9.95246231155779e-05, - "loss": 6.2722, - "step": 974 - }, - { - "epoch": 0.5084745762711864, - "grad_norm": 3.2270820140838623, - "learning_rate": 9.952361809045226e-05, - "loss": 6.3604, - "step": 975 - }, - { - "epoch": 0.5089960886571057, - "grad_norm": 2.8973543643951416, - "learning_rate": 9.952261306532664e-05, - "loss": 6.4082, - "step": 976 - }, - { - "epoch": 0.5095176010430248, - "grad_norm": 2.3042736053466797, - "learning_rate": 9.9521608040201e-05, - "loss": 6.3213, - "step": 977 - }, - { - "epoch": 0.5100391134289439, - "grad_norm": 2.7366580963134766, - "learning_rate": 9.952060301507538e-05, - "loss": 5.7565, - "step": 978 - }, - { - "epoch": 0.5105606258148631, - "grad_norm": 2.349928140640259, - "learning_rate": 9.951959798994975e-05, - "loss": 5.8961, - "step": 979 - }, - { - "epoch": 0.5110821382007823, - "grad_norm": 2.2525947093963623, - "learning_rate": 9.951859296482412e-05, - "loss": 6.388, - "step": 980 - }, - { - "epoch": 0.5116036505867014, - "grad_norm": 2.0856735706329346, - "learning_rate": 9.95175879396985e-05, - "loss": 6.6325, - "step": 981 - }, - { - "epoch": 0.5121251629726206, - "grad_norm": 2.2722132205963135, - "learning_rate": 9.951658291457287e-05, - "loss": 5.8759, - "step": 982 - }, - { - "epoch": 0.5126466753585398, - "grad_norm": 2.305325746536255, - "learning_rate": 9.951557788944724e-05, - "loss": 6.5283, - "step": 983 - }, - { - "epoch": 0.5131681877444589, - "grad_norm": 2.8892693519592285, - "learning_rate": 9.951457286432161e-05, - "loss": 5.6446, - "step": 984 - }, - { - "epoch": 0.5136897001303781, - "grad_norm": 2.2443737983703613, - "learning_rate": 9.951356783919599e-05, - "loss": 6.1867, - "step": 985 - }, - { - "epoch": 0.5142112125162973, - "grad_norm": 2.326246500015259, - "learning_rate": 9.951256281407035e-05, - "loss": 6.4661, - "step": 986 - }, - { - "epoch": 0.5147327249022164, - "grad_norm": 2.3732025623321533, - "learning_rate": 9.951155778894473e-05, - "loss": 6.0932, - "step": 987 - }, - { - "epoch": 0.5152542372881356, - "grad_norm": 2.115880250930786, - "learning_rate": 9.951055276381909e-05, - "loss": 6.2948, - "step": 988 - }, - { - "epoch": 0.5157757496740547, - "grad_norm": 2.152890682220459, - "learning_rate": 9.950954773869347e-05, - "loss": 6.5644, - "step": 989 - }, - { - "epoch": 0.516297262059974, - "grad_norm": 2.4016923904418945, - "learning_rate": 9.950854271356785e-05, - "loss": 6.6785, - "step": 990 - }, - { - "epoch": 0.5168187744458931, - "grad_norm": 2.1402158737182617, - "learning_rate": 9.950753768844223e-05, - "loss": 6.4978, - "step": 991 - }, - { - "epoch": 0.5173402868318122, - "grad_norm": 2.263880729675293, - "learning_rate": 9.950653266331659e-05, - "loss": 6.4149, - "step": 992 - }, - { - "epoch": 0.5178617992177315, - "grad_norm": 2.540372848510742, - "learning_rate": 9.950552763819096e-05, - "loss": 6.4855, - "step": 993 - }, - { - "epoch": 0.5183833116036506, - "grad_norm": 2.118459463119507, - "learning_rate": 9.950452261306533e-05, - "loss": 6.001, - "step": 994 - }, - { - "epoch": 0.5189048239895697, - "grad_norm": 1.845294713973999, - "learning_rate": 9.95035175879397e-05, - "loss": 6.5425, - "step": 995 - }, - { - "epoch": 0.519426336375489, - "grad_norm": 2.0339488983154297, - "learning_rate": 9.950251256281408e-05, - "loss": 6.5648, - "step": 996 - }, - { - "epoch": 0.5199478487614081, - "grad_norm": 2.080930233001709, - "learning_rate": 9.950150753768844e-05, - "loss": 5.9018, - "step": 997 - }, - { - "epoch": 0.5204693611473272, - "grad_norm": 2.14528489112854, - "learning_rate": 9.950050251256282e-05, - "loss": 6.103, - "step": 998 - }, - { - "epoch": 0.5209908735332465, - "grad_norm": 2.3449432849884033, - "learning_rate": 9.949949748743718e-05, - "loss": 6.5891, - "step": 999 - }, - { - "epoch": 0.5215123859191656, - "grad_norm": 1.9489444494247437, - "learning_rate": 9.949849246231156e-05, - "loss": 6.4501, - "step": 1000 - }, - { - "epoch": 0.5220338983050847, - "grad_norm": 2.270794153213501, - "learning_rate": 9.949748743718594e-05, - "loss": 6.4565, - "step": 1001 - }, - { - "epoch": 0.5225554106910039, - "grad_norm": 1.788223147392273, - "learning_rate": 9.949648241206032e-05, - "loss": 6.5355, - "step": 1002 - }, - { - "epoch": 0.5230769230769231, - "grad_norm": 1.8515621423721313, - "learning_rate": 9.949547738693468e-05, - "loss": 6.565, - "step": 1003 - }, - { - "epoch": 0.5235984354628422, - "grad_norm": 2.0171244144439697, - "learning_rate": 9.949447236180906e-05, - "loss": 6.5739, - "step": 1004 - }, - { - "epoch": 0.5241199478487614, - "grad_norm": 1.7724168300628662, - "learning_rate": 9.949346733668342e-05, - "loss": 6.2755, - "step": 1005 - }, - { - "epoch": 0.5246414602346806, - "grad_norm": 2.0184807777404785, - "learning_rate": 9.94924623115578e-05, - "loss": 6.457, - "step": 1006 - }, - { - "epoch": 0.5251629726205997, - "grad_norm": 2.0908501148223877, - "learning_rate": 9.949145728643216e-05, - "loss": 6.297, - "step": 1007 - }, - { - "epoch": 0.5256844850065189, - "grad_norm": 1.9920762777328491, - "learning_rate": 9.949045226130653e-05, - "loss": 6.3217, - "step": 1008 - }, - { - "epoch": 0.5262059973924381, - "grad_norm": 2.2184832096099854, - "learning_rate": 9.94894472361809e-05, - "loss": 5.7467, - "step": 1009 - }, - { - "epoch": 0.5267275097783573, - "grad_norm": 1.9815866947174072, - "learning_rate": 9.948844221105528e-05, - "loss": 6.1595, - "step": 1010 - }, - { - "epoch": 0.5272490221642764, - "grad_norm": 3.5510051250457764, - "learning_rate": 9.948743718592966e-05, - "loss": 5.9881, - "step": 1011 - }, - { - "epoch": 0.5277705345501955, - "grad_norm": 2.057077169418335, - "learning_rate": 9.948643216080403e-05, - "loss": 6.2812, - "step": 1012 - }, - { - "epoch": 0.5282920469361148, - "grad_norm": 2.4590835571289062, - "learning_rate": 9.94854271356784e-05, - "loss": 6.6273, - "step": 1013 - }, - { - "epoch": 0.5288135593220339, - "grad_norm": 1.7845070362091064, - "learning_rate": 9.948442211055277e-05, - "loss": 6.6754, - "step": 1014 - }, - { - "epoch": 0.529335071707953, - "grad_norm": 1.9927841424942017, - "learning_rate": 9.948341708542715e-05, - "loss": 6.1964, - "step": 1015 - }, - { - "epoch": 0.5298565840938723, - "grad_norm": 1.9606233835220337, - "learning_rate": 9.948241206030151e-05, - "loss": 5.9889, - "step": 1016 - }, - { - "epoch": 0.5303780964797914, - "grad_norm": 2.0790512561798096, - "learning_rate": 9.948140703517589e-05, - "loss": 6.4329, - "step": 1017 - }, - { - "epoch": 0.5308996088657105, - "grad_norm": 1.935746431350708, - "learning_rate": 9.948040201005025e-05, - "loss": 6.4808, - "step": 1018 - }, - { - "epoch": 0.5314211212516298, - "grad_norm": 1.5664710998535156, - "learning_rate": 9.947939698492463e-05, - "loss": 5.8561, - "step": 1019 - }, - { - "epoch": 0.5319426336375489, - "grad_norm": 2.1359148025512695, - "learning_rate": 9.9478391959799e-05, - "loss": 6.5126, - "step": 1020 - }, - { - "epoch": 0.532464146023468, - "grad_norm": 2.975498676300049, - "learning_rate": 9.947738693467337e-05, - "loss": 5.5307, - "step": 1021 - }, - { - "epoch": 0.5329856584093873, - "grad_norm": 2.526907444000244, - "learning_rate": 9.947638190954775e-05, - "loss": 6.3619, - "step": 1022 - }, - { - "epoch": 0.5335071707953064, - "grad_norm": 1.9787168502807617, - "learning_rate": 9.947537688442211e-05, - "loss": 6.4997, - "step": 1023 - }, - { - "epoch": 0.5340286831812255, - "grad_norm": 2.2634403705596924, - "learning_rate": 9.947437185929649e-05, - "loss": 6.4142, - "step": 1024 - }, - { - "epoch": 0.5345501955671447, - "grad_norm": 2.2375221252441406, - "learning_rate": 9.947336683417086e-05, - "loss": 6.6987, - "step": 1025 - }, - { - "epoch": 0.5350717079530639, - "grad_norm": 1.8052479028701782, - "learning_rate": 9.947236180904523e-05, - "loss": 6.7399, - "step": 1026 - }, - { - "epoch": 0.535593220338983, - "grad_norm": 2.8728318214416504, - "learning_rate": 9.94713567839196e-05, - "loss": 6.3754, - "step": 1027 - }, - { - "epoch": 0.5361147327249022, - "grad_norm": 2.69226336479187, - "learning_rate": 9.947035175879398e-05, - "loss": 6.2582, - "step": 1028 - }, - { - "epoch": 0.5366362451108214, - "grad_norm": 2.250656843185425, - "learning_rate": 9.946934673366834e-05, - "loss": 5.9589, - "step": 1029 - }, - { - "epoch": 0.5371577574967406, - "grad_norm": 2.127631425857544, - "learning_rate": 9.946834170854272e-05, - "loss": 5.9818, - "step": 1030 - }, - { - "epoch": 0.5376792698826597, - "grad_norm": 1.8615456819534302, - "learning_rate": 9.94673366834171e-05, - "loss": 6.4924, - "step": 1031 - }, - { - "epoch": 0.5382007822685789, - "grad_norm": 2.1967973709106445, - "learning_rate": 9.946633165829147e-05, - "loss": 6.3876, - "step": 1032 - }, - { - "epoch": 0.5387222946544981, - "grad_norm": 1.914030909538269, - "learning_rate": 9.946532663316584e-05, - "loss": 6.4738, - "step": 1033 - }, - { - "epoch": 0.5392438070404172, - "grad_norm": 2.0085256099700928, - "learning_rate": 9.94643216080402e-05, - "loss": 6.4261, - "step": 1034 - }, - { - "epoch": 0.5397653194263363, - "grad_norm": 1.7855981588363647, - "learning_rate": 9.946331658291458e-05, - "loss": 6.4943, - "step": 1035 - }, - { - "epoch": 0.5402868318122556, - "grad_norm": 1.8241933584213257, - "learning_rate": 9.946231155778894e-05, - "loss": 6.7336, - "step": 1036 - }, - { - "epoch": 0.5408083441981747, - "grad_norm": 2.0525999069213867, - "learning_rate": 9.946130653266332e-05, - "loss": 6.3461, - "step": 1037 - }, - { - "epoch": 0.5413298565840938, - "grad_norm": 2.5363781452178955, - "learning_rate": 9.946030150753769e-05, - "loss": 6.5937, - "step": 1038 - }, - { - "epoch": 0.5418513689700131, - "grad_norm": 2.140744209289551, - "learning_rate": 9.945929648241206e-05, - "loss": 6.5601, - "step": 1039 - }, - { - "epoch": 0.5423728813559322, - "grad_norm": 1.9791829586029053, - "learning_rate": 9.945829145728643e-05, - "loss": 5.9308, - "step": 1040 - }, - { - "epoch": 0.5428943937418513, - "grad_norm": 2.283832550048828, - "learning_rate": 9.94572864321608e-05, - "loss": 6.6413, - "step": 1041 - }, - { - "epoch": 0.5434159061277706, - "grad_norm": 1.9554219245910645, - "learning_rate": 9.945628140703518e-05, - "loss": 6.1505, - "step": 1042 - }, - { - "epoch": 0.5439374185136897, - "grad_norm": 2.2553982734680176, - "learning_rate": 9.945527638190956e-05, - "loss": 6.7015, - "step": 1043 - }, - { - "epoch": 0.5444589308996088, - "grad_norm": 2.226177453994751, - "learning_rate": 9.945427135678393e-05, - "loss": 6.4264, - "step": 1044 - }, - { - "epoch": 0.5449804432855281, - "grad_norm": 2.015780210494995, - "learning_rate": 9.94532663316583e-05, - "loss": 6.4164, - "step": 1045 - }, - { - "epoch": 0.5455019556714472, - "grad_norm": 2.360095977783203, - "learning_rate": 9.945226130653267e-05, - "loss": 6.4349, - "step": 1046 - }, - { - "epoch": 0.5460234680573663, - "grad_norm": 2.0980846881866455, - "learning_rate": 9.945125628140703e-05, - "loss": 6.1571, - "step": 1047 - }, - { - "epoch": 0.5465449804432855, - "grad_norm": 2.199629306793213, - "learning_rate": 9.945025125628141e-05, - "loss": 6.1847, - "step": 1048 - }, - { - "epoch": 0.5470664928292047, - "grad_norm": 2.2445294857025146, - "learning_rate": 9.944924623115577e-05, - "loss": 6.1854, - "step": 1049 - }, - { - "epoch": 0.5475880052151239, - "grad_norm": 2.2078707218170166, - "learning_rate": 9.944824120603015e-05, - "loss": 6.4354, - "step": 1050 - }, - { - "epoch": 0.548109517601043, - "grad_norm": 2.116070032119751, - "learning_rate": 9.944723618090453e-05, - "loss": 6.4002, - "step": 1051 - }, - { - "epoch": 0.5486310299869622, - "grad_norm": 2.0001003742218018, - "learning_rate": 9.944623115577891e-05, - "loss": 6.0909, - "step": 1052 - }, - { - "epoch": 0.5491525423728814, - "grad_norm": 1.7960060834884644, - "learning_rate": 9.944522613065327e-05, - "loss": 6.7664, - "step": 1053 - }, - { - "epoch": 0.5496740547588005, - "grad_norm": 1.8234988451004028, - "learning_rate": 9.944422110552765e-05, - "loss": 6.6887, - "step": 1054 - }, - { - "epoch": 0.5501955671447197, - "grad_norm": 2.5000674724578857, - "learning_rate": 9.944321608040201e-05, - "loss": 6.3386, - "step": 1055 - }, - { - "epoch": 0.5507170795306389, - "grad_norm": 2.168358087539673, - "learning_rate": 9.944221105527639e-05, - "loss": 6.6375, - "step": 1056 - }, - { - "epoch": 0.551238591916558, - "grad_norm": 1.9939749240875244, - "learning_rate": 9.944120603015076e-05, - "loss": 6.0951, - "step": 1057 - }, - { - "epoch": 0.5517601043024772, - "grad_norm": 2.2767982482910156, - "learning_rate": 9.944020100502513e-05, - "loss": 6.1074, - "step": 1058 - }, - { - "epoch": 0.5522816166883964, - "grad_norm": 2.473562002182007, - "learning_rate": 9.94391959798995e-05, - "loss": 6.2916, - "step": 1059 - }, - { - "epoch": 0.5528031290743155, - "grad_norm": 2.0120818614959717, - "learning_rate": 9.943819095477386e-05, - "loss": 5.6506, - "step": 1060 - }, - { - "epoch": 0.5533246414602346, - "grad_norm": 2.1312575340270996, - "learning_rate": 9.943718592964824e-05, - "loss": 6.1445, - "step": 1061 - }, - { - "epoch": 0.5538461538461539, - "grad_norm": 2.3592288494110107, - "learning_rate": 9.943618090452262e-05, - "loss": 6.2612, - "step": 1062 - }, - { - "epoch": 0.554367666232073, - "grad_norm": 1.9979387521743774, - "learning_rate": 9.9435175879397e-05, - "loss": 6.4108, - "step": 1063 - }, - { - "epoch": 0.5548891786179921, - "grad_norm": 2.145378828048706, - "learning_rate": 9.943417085427136e-05, - "loss": 5.9514, - "step": 1064 - }, - { - "epoch": 0.5554106910039114, - "grad_norm": 1.7572509050369263, - "learning_rate": 9.943316582914574e-05, - "loss": 6.4152, - "step": 1065 - }, - { - "epoch": 0.5559322033898305, - "grad_norm": 1.8876161575317383, - "learning_rate": 9.94321608040201e-05, - "loss": 6.4293, - "step": 1066 - }, - { - "epoch": 0.5564537157757496, - "grad_norm": 1.8606067895889282, - "learning_rate": 9.943115577889448e-05, - "loss": 6.6877, - "step": 1067 - }, - { - "epoch": 0.5569752281616689, - "grad_norm": 2.1855692863464355, - "learning_rate": 9.943015075376885e-05, - "loss": 6.1459, - "step": 1068 - }, - { - "epoch": 0.557496740547588, - "grad_norm": 2.0761561393737793, - "learning_rate": 9.942914572864322e-05, - "loss": 6.502, - "step": 1069 - }, - { - "epoch": 0.5580182529335072, - "grad_norm": 1.9480054378509521, - "learning_rate": 9.942814070351759e-05, - "loss": 6.5445, - "step": 1070 - }, - { - "epoch": 0.5585397653194263, - "grad_norm": 2.7435081005096436, - "learning_rate": 9.942713567839197e-05, - "loss": 5.4653, - "step": 1071 - }, - { - "epoch": 0.5590612777053455, - "grad_norm": 2.2142210006713867, - "learning_rate": 9.942613065326634e-05, - "loss": 5.6591, - "step": 1072 - }, - { - "epoch": 0.5595827900912647, - "grad_norm": 1.8588271141052246, - "learning_rate": 9.942512562814071e-05, - "loss": 6.497, - "step": 1073 - }, - { - "epoch": 0.5601043024771838, - "grad_norm": 2.1879067420959473, - "learning_rate": 9.942412060301508e-05, - "loss": 5.8484, - "step": 1074 - }, - { - "epoch": 0.560625814863103, - "grad_norm": 2.1053307056427, - "learning_rate": 9.942311557788945e-05, - "loss": 5.9358, - "step": 1075 - }, - { - "epoch": 0.5611473272490222, - "grad_norm": 2.069387197494507, - "learning_rate": 9.942211055276383e-05, - "loss": 6.5896, - "step": 1076 - }, - { - "epoch": 0.5616688396349413, - "grad_norm": 2.250141143798828, - "learning_rate": 9.942110552763819e-05, - "loss": 6.5784, - "step": 1077 - }, - { - "epoch": 0.5621903520208605, - "grad_norm": 1.9212855100631714, - "learning_rate": 9.942010050251257e-05, - "loss": 6.7726, - "step": 1078 - }, - { - "epoch": 0.5627118644067797, - "grad_norm": 1.8462995290756226, - "learning_rate": 9.941909547738693e-05, - "loss": 6.5867, - "step": 1079 - }, - { - "epoch": 0.5632333767926988, - "grad_norm": 2.0089070796966553, - "learning_rate": 9.941809045226131e-05, - "loss": 5.8652, - "step": 1080 - }, - { - "epoch": 0.563754889178618, - "grad_norm": 1.8268921375274658, - "learning_rate": 9.941708542713568e-05, - "loss": 6.4779, - "step": 1081 - }, - { - "epoch": 0.5642764015645372, - "grad_norm": 1.631845474243164, - "learning_rate": 9.941608040201005e-05, - "loss": 6.6743, - "step": 1082 - }, - { - "epoch": 0.5647979139504563, - "grad_norm": 1.6352670192718506, - "learning_rate": 9.941507537688443e-05, - "loss": 6.3974, - "step": 1083 - }, - { - "epoch": 0.5653194263363754, - "grad_norm": 1.7636839151382446, - "learning_rate": 9.941407035175881e-05, - "loss": 6.6397, - "step": 1084 - }, - { - "epoch": 0.5658409387222947, - "grad_norm": 2.027069330215454, - "learning_rate": 9.941306532663317e-05, - "loss": 6.1071, - "step": 1085 - }, - { - "epoch": 0.5663624511082138, - "grad_norm": 1.8907263278961182, - "learning_rate": 9.941206030150754e-05, - "loss": 6.5588, - "step": 1086 - }, - { - "epoch": 0.566883963494133, - "grad_norm": 1.6413065195083618, - "learning_rate": 9.941105527638192e-05, - "loss": 6.7847, - "step": 1087 - }, - { - "epoch": 0.5674054758800522, - "grad_norm": 1.6439162492752075, - "learning_rate": 9.941005025125628e-05, - "loss": 6.4677, - "step": 1088 - }, - { - "epoch": 0.5679269882659713, - "grad_norm": 1.8425122499465942, - "learning_rate": 9.940904522613066e-05, - "loss": 6.5117, - "step": 1089 - }, - { - "epoch": 0.5684485006518905, - "grad_norm": 2.7872750759124756, - "learning_rate": 9.940804020100502e-05, - "loss": 5.5311, - "step": 1090 - }, - { - "epoch": 0.5689700130378097, - "grad_norm": 2.0331318378448486, - "learning_rate": 9.94070351758794e-05, - "loss": 6.3681, - "step": 1091 - }, - { - "epoch": 0.5694915254237288, - "grad_norm": 2.093341588973999, - "learning_rate": 9.940603015075376e-05, - "loss": 6.4774, - "step": 1092 - }, - { - "epoch": 0.570013037809648, - "grad_norm": 2.029984712600708, - "learning_rate": 9.940502512562814e-05, - "loss": 6.2532, - "step": 1093 - }, - { - "epoch": 0.5705345501955671, - "grad_norm": 2.1496570110321045, - "learning_rate": 9.940402010050252e-05, - "loss": 6.373, - "step": 1094 - }, - { - "epoch": 0.5710560625814863, - "grad_norm": 2.257427453994751, - "learning_rate": 9.94030150753769e-05, - "loss": 5.5611, - "step": 1095 - }, - { - "epoch": 0.5715775749674055, - "grad_norm": 2.026304244995117, - "learning_rate": 9.940201005025126e-05, - "loss": 6.5535, - "step": 1096 - }, - { - "epoch": 0.5720990873533246, - "grad_norm": 2.959139823913574, - "learning_rate": 9.940100502512564e-05, - "loss": 6.1954, - "step": 1097 - }, - { - "epoch": 0.5726205997392438, - "grad_norm": 2.200242757797241, - "learning_rate": 9.94e-05, - "loss": 6.2894, - "step": 1098 - }, - { - "epoch": 0.573142112125163, - "grad_norm": 2.430525064468384, - "learning_rate": 9.939899497487438e-05, - "loss": 5.8289, - "step": 1099 - }, - { - "epoch": 0.5736636245110821, - "grad_norm": 1.819159984588623, - "learning_rate": 9.939798994974875e-05, - "loss": 6.5941, - "step": 1100 - }, - { - "epoch": 0.5741851368970013, - "grad_norm": 2.1125643253326416, - "learning_rate": 9.939698492462311e-05, - "loss": 6.5426, - "step": 1101 - }, - { - "epoch": 0.5747066492829205, - "grad_norm": 1.901065707206726, - "learning_rate": 9.939597989949749e-05, - "loss": 6.5261, - "step": 1102 - }, - { - "epoch": 0.5752281616688396, - "grad_norm": 2.6235954761505127, - "learning_rate": 9.939497487437187e-05, - "loss": 5.2949, - "step": 1103 - }, - { - "epoch": 0.5757496740547589, - "grad_norm": 2.304426670074463, - "learning_rate": 9.939396984924624e-05, - "loss": 6.2389, - "step": 1104 - }, - { - "epoch": 0.576271186440678, - "grad_norm": 1.906935453414917, - "learning_rate": 9.939296482412061e-05, - "loss": 6.1475, - "step": 1105 - }, - { - "epoch": 0.5767926988265971, - "grad_norm": 1.9593114852905273, - "learning_rate": 9.939195979899499e-05, - "loss": 6.3794, - "step": 1106 - }, - { - "epoch": 0.5773142112125162, - "grad_norm": 1.981544852256775, - "learning_rate": 9.939095477386935e-05, - "loss": 6.2827, - "step": 1107 - }, - { - "epoch": 0.5778357235984355, - "grad_norm": 1.8649400472640991, - "learning_rate": 9.938994974874373e-05, - "loss": 6.3705, - "step": 1108 - }, - { - "epoch": 0.5783572359843546, - "grad_norm": 1.8049644231796265, - "learning_rate": 9.938894472361809e-05, - "loss": 6.3085, - "step": 1109 - }, - { - "epoch": 0.5788787483702738, - "grad_norm": 2.325085163116455, - "learning_rate": 9.938793969849247e-05, - "loss": 6.0134, - "step": 1110 - }, - { - "epoch": 0.579400260756193, - "grad_norm": 1.9665300846099854, - "learning_rate": 9.938693467336683e-05, - "loss": 6.4956, - "step": 1111 - }, - { - "epoch": 0.5799217731421121, - "grad_norm": 2.0801262855529785, - "learning_rate": 9.938592964824121e-05, - "loss": 6.7524, - "step": 1112 - }, - { - "epoch": 0.5804432855280313, - "grad_norm": 1.720389485359192, - "learning_rate": 9.938492462311558e-05, - "loss": 6.6196, - "step": 1113 - }, - { - "epoch": 0.5809647979139505, - "grad_norm": 2.0624289512634277, - "learning_rate": 9.938391959798995e-05, - "loss": 6.532, - "step": 1114 - }, - { - "epoch": 0.5814863102998696, - "grad_norm": 2.2252767086029053, - "learning_rate": 9.938291457286433e-05, - "loss": 5.5149, - "step": 1115 - }, - { - "epoch": 0.5820078226857888, - "grad_norm": 1.7989685535430908, - "learning_rate": 9.93819095477387e-05, - "loss": 6.6338, - "step": 1116 - }, - { - "epoch": 0.5825293350717079, - "grad_norm": 2.1111786365509033, - "learning_rate": 9.938090452261307e-05, - "loss": 6.3048, - "step": 1117 - }, - { - "epoch": 0.5830508474576271, - "grad_norm": 1.7639018297195435, - "learning_rate": 9.937989949748744e-05, - "loss": 6.4724, - "step": 1118 - }, - { - "epoch": 0.5835723598435463, - "grad_norm": 1.664967656135559, - "learning_rate": 9.937889447236182e-05, - "loss": 6.7072, - "step": 1119 - }, - { - "epoch": 0.5840938722294654, - "grad_norm": 2.8242104053497314, - "learning_rate": 9.937788944723618e-05, - "loss": 5.9272, - "step": 1120 - }, - { - "epoch": 0.5846153846153846, - "grad_norm": 1.8767706155776978, - "learning_rate": 9.937688442211056e-05, - "loss": 6.6593, - "step": 1121 - }, - { - "epoch": 0.5851368970013038, - "grad_norm": 2.1955630779266357, - "learning_rate": 9.937587939698492e-05, - "loss": 6.3052, - "step": 1122 - }, - { - "epoch": 0.5856584093872229, - "grad_norm": 2.174100875854492, - "learning_rate": 9.93748743718593e-05, - "loss": 6.4872, - "step": 1123 - }, - { - "epoch": 0.5861799217731422, - "grad_norm": 2.197890043258667, - "learning_rate": 9.937386934673368e-05, - "loss": 6.3364, - "step": 1124 - }, - { - "epoch": 0.5867014341590613, - "grad_norm": 1.782468318939209, - "learning_rate": 9.937286432160806e-05, - "loss": 6.3847, - "step": 1125 - }, - { - "epoch": 0.5872229465449804, - "grad_norm": 2.293619394302368, - "learning_rate": 9.937185929648242e-05, - "loss": 6.2027, - "step": 1126 - }, - { - "epoch": 0.5877444589308997, - "grad_norm": 1.9395240545272827, - "learning_rate": 9.937085427135678e-05, - "loss": 6.1196, - "step": 1127 - }, - { - "epoch": 0.5882659713168188, - "grad_norm": 1.8216606378555298, - "learning_rate": 9.936984924623116e-05, - "loss": 6.3899, - "step": 1128 - }, - { - "epoch": 0.5887874837027379, - "grad_norm": 2.6699256896972656, - "learning_rate": 9.936884422110553e-05, - "loss": 5.9487, - "step": 1129 - }, - { - "epoch": 0.5893089960886571, - "grad_norm": 1.7982321977615356, - "learning_rate": 9.93678391959799e-05, - "loss": 6.0509, - "step": 1130 - }, - { - "epoch": 0.5898305084745763, - "grad_norm": 2.2850613594055176, - "learning_rate": 9.936683417085427e-05, - "loss": 6.0642, - "step": 1131 - }, - { - "epoch": 0.5903520208604954, - "grad_norm": 1.7728103399276733, - "learning_rate": 9.936582914572865e-05, - "loss": 6.3612, - "step": 1132 - }, - { - "epoch": 0.5908735332464146, - "grad_norm": 1.8440884351730347, - "learning_rate": 9.936482412060301e-05, - "loss": 6.4031, - "step": 1133 - }, - { - "epoch": 0.5913950456323338, - "grad_norm": 1.899545431137085, - "learning_rate": 9.936381909547739e-05, - "loss": 6.047, - "step": 1134 - }, - { - "epoch": 0.5919165580182529, - "grad_norm": 1.9492884874343872, - "learning_rate": 9.936281407035177e-05, - "loss": 6.229, - "step": 1135 - }, - { - "epoch": 0.5924380704041721, - "grad_norm": 2.0436065196990967, - "learning_rate": 9.936180904522614e-05, - "loss": 6.541, - "step": 1136 - }, - { - "epoch": 0.5929595827900913, - "grad_norm": 1.8518743515014648, - "learning_rate": 9.936080402010051e-05, - "loss": 6.4035, - "step": 1137 - }, - { - "epoch": 0.5934810951760104, - "grad_norm": 2.591897487640381, - "learning_rate": 9.935979899497489e-05, - "loss": 5.8126, - "step": 1138 - }, - { - "epoch": 0.5940026075619296, - "grad_norm": 1.6692888736724854, - "learning_rate": 9.935879396984925e-05, - "loss": 6.6656, - "step": 1139 - }, - { - "epoch": 0.5945241199478487, - "grad_norm": 1.7847602367401123, - "learning_rate": 9.935778894472362e-05, - "loss": 6.1819, - "step": 1140 - }, - { - "epoch": 0.595045632333768, - "grad_norm": 2.053975820541382, - "learning_rate": 9.935678391959799e-05, - "loss": 6.2015, - "step": 1141 - }, - { - "epoch": 0.5955671447196871, - "grad_norm": 2.16721510887146, - "learning_rate": 9.935577889447236e-05, - "loss": 6.322, - "step": 1142 - }, - { - "epoch": 0.5960886571056062, - "grad_norm": 2.0767982006073, - "learning_rate": 9.935477386934673e-05, - "loss": 5.7837, - "step": 1143 - }, - { - "epoch": 0.5966101694915255, - "grad_norm": 2.1614692211151123, - "learning_rate": 9.935376884422111e-05, - "loss": 6.2995, - "step": 1144 - }, - { - "epoch": 0.5971316818774446, - "grad_norm": 1.9652820825576782, - "learning_rate": 9.935276381909549e-05, - "loss": 6.2763, - "step": 1145 - }, - { - "epoch": 0.5976531942633637, - "grad_norm": 1.8606866598129272, - "learning_rate": 9.935175879396985e-05, - "loss": 6.3984, - "step": 1146 - }, - { - "epoch": 0.598174706649283, - "grad_norm": 3.1722075939178467, - "learning_rate": 9.935075376884423e-05, - "loss": 5.8785, - "step": 1147 - }, - { - "epoch": 0.5986962190352021, - "grad_norm": 1.7217590808868408, - "learning_rate": 9.93497487437186e-05, - "loss": 6.613, - "step": 1148 - }, - { - "epoch": 0.5992177314211212, - "grad_norm": 2.163100481033325, - "learning_rate": 9.934874371859297e-05, - "loss": 6.4155, - "step": 1149 - }, - { - "epoch": 0.5997392438070405, - "grad_norm": 1.902317762374878, - "learning_rate": 9.934773869346734e-05, - "loss": 6.3051, - "step": 1150 - }, - { - "epoch": 0.6002607561929596, - "grad_norm": 1.8997035026550293, - "learning_rate": 9.934673366834172e-05, - "loss": 6.5693, - "step": 1151 - }, - { - "epoch": 0.6007822685788787, - "grad_norm": 1.8708505630493164, - "learning_rate": 9.934572864321608e-05, - "loss": 6.4053, - "step": 1152 - }, - { - "epoch": 0.6013037809647979, - "grad_norm": 1.93083655834198, - "learning_rate": 9.934472361809045e-05, - "loss": 6.1654, - "step": 1153 - }, - { - "epoch": 0.6018252933507171, - "grad_norm": 1.918582558631897, - "learning_rate": 9.934371859296482e-05, - "loss": 5.8484, - "step": 1154 - }, - { - "epoch": 0.6023468057366362, - "grad_norm": 1.9105820655822754, - "learning_rate": 9.93427135678392e-05, - "loss": 6.4204, - "step": 1155 - }, - { - "epoch": 0.6028683181225554, - "grad_norm": 2.1229636669158936, - "learning_rate": 9.934170854271358e-05, - "loss": 6.3691, - "step": 1156 - }, - { - "epoch": 0.6033898305084746, - "grad_norm": 2.0528697967529297, - "learning_rate": 9.934070351758794e-05, - "loss": 6.735, - "step": 1157 - }, - { - "epoch": 0.6039113428943937, - "grad_norm": 2.188812732696533, - "learning_rate": 9.933969849246232e-05, - "loss": 6.3616, - "step": 1158 - }, - { - "epoch": 0.6044328552803129, - "grad_norm": 2.288559913635254, - "learning_rate": 9.933869346733669e-05, - "loss": 6.547, - "step": 1159 - }, - { - "epoch": 0.6049543676662321, - "grad_norm": 1.7310538291931152, - "learning_rate": 9.933768844221106e-05, - "loss": 6.6614, - "step": 1160 - }, - { - "epoch": 0.6054758800521512, - "grad_norm": 2.2639284133911133, - "learning_rate": 9.933668341708543e-05, - "loss": 6.3984, - "step": 1161 - }, - { - "epoch": 0.6059973924380704, - "grad_norm": 1.7019494771957397, - "learning_rate": 9.93356783919598e-05, - "loss": 6.5026, - "step": 1162 - }, - { - "epoch": 0.6065189048239896, - "grad_norm": 1.9687600135803223, - "learning_rate": 9.933467336683417e-05, - "loss": 6.4789, - "step": 1163 - }, - { - "epoch": 0.6070404172099088, - "grad_norm": 1.8984894752502441, - "learning_rate": 9.933366834170855e-05, - "loss": 6.4769, - "step": 1164 - }, - { - "epoch": 0.6075619295958279, - "grad_norm": 2.1345672607421875, - "learning_rate": 9.933266331658293e-05, - "loss": 6.1621, - "step": 1165 - }, - { - "epoch": 0.608083441981747, - "grad_norm": 2.157698392868042, - "learning_rate": 9.933165829145729e-05, - "loss": 6.2369, - "step": 1166 - }, - { - "epoch": 0.6086049543676663, - "grad_norm": 2.080808639526367, - "learning_rate": 9.933065326633167e-05, - "loss": 6.2962, - "step": 1167 - }, - { - "epoch": 0.6091264667535854, - "grad_norm": 1.8234134912490845, - "learning_rate": 9.932964824120603e-05, - "loss": 6.3768, - "step": 1168 - }, - { - "epoch": 0.6096479791395045, - "grad_norm": 1.9063458442687988, - "learning_rate": 9.932864321608041e-05, - "loss": 6.4957, - "step": 1169 - }, - { - "epoch": 0.6101694915254238, - "grad_norm": 1.779676914215088, - "learning_rate": 9.932763819095477e-05, - "loss": 6.2882, - "step": 1170 - }, - { - "epoch": 0.6106910039113429, - "grad_norm": 1.87655508518219, - "learning_rate": 9.932663316582915e-05, - "loss": 6.5468, - "step": 1171 - }, - { - "epoch": 0.611212516297262, - "grad_norm": 1.9955928325653076, - "learning_rate": 9.932562814070352e-05, - "loss": 5.9997, - "step": 1172 - }, - { - "epoch": 0.6117340286831813, - "grad_norm": 3.1170473098754883, - "learning_rate": 9.93246231155779e-05, - "loss": 6.168, - "step": 1173 - }, - { - "epoch": 0.6122555410691004, - "grad_norm": 2.2072954177856445, - "learning_rate": 9.932361809045226e-05, - "loss": 6.4796, - "step": 1174 - }, - { - "epoch": 0.6127770534550195, - "grad_norm": 2.2578601837158203, - "learning_rate": 9.932261306532664e-05, - "loss": 6.3757, - "step": 1175 - }, - { - "epoch": 0.6132985658409387, - "grad_norm": 2.2653486728668213, - "learning_rate": 9.932160804020101e-05, - "loss": 5.8629, - "step": 1176 - }, - { - "epoch": 0.6138200782268579, - "grad_norm": 2.1006174087524414, - "learning_rate": 9.932060301507539e-05, - "loss": 6.3708, - "step": 1177 - }, - { - "epoch": 0.614341590612777, - "grad_norm": 2.440850019454956, - "learning_rate": 9.931959798994976e-05, - "loss": 5.9661, - "step": 1178 - }, - { - "epoch": 0.6148631029986962, - "grad_norm": 1.9377793073654175, - "learning_rate": 9.931859296482413e-05, - "loss": 6.5853, - "step": 1179 - }, - { - "epoch": 0.6153846153846154, - "grad_norm": 1.788891077041626, - "learning_rate": 9.93175879396985e-05, - "loss": 6.4273, - "step": 1180 - }, - { - "epoch": 0.6159061277705346, - "grad_norm": 2.273021697998047, - "learning_rate": 9.931658291457286e-05, - "loss": 6.5507, - "step": 1181 - }, - { - "epoch": 0.6164276401564537, - "grad_norm": 1.8314498662948608, - "learning_rate": 9.931557788944724e-05, - "loss": 6.4072, - "step": 1182 - }, - { - "epoch": 0.6169491525423729, - "grad_norm": 1.9090126752853394, - "learning_rate": 9.93145728643216e-05, - "loss": 6.455, - "step": 1183 - }, - { - "epoch": 0.6174706649282921, - "grad_norm": 1.825119137763977, - "learning_rate": 9.931356783919598e-05, - "loss": 6.7681, - "step": 1184 - }, - { - "epoch": 0.6179921773142112, - "grad_norm": 1.8277357816696167, - "learning_rate": 9.931256281407036e-05, - "loss": 6.5272, - "step": 1185 - }, - { - "epoch": 0.6185136897001304, - "grad_norm": 2.247853994369507, - "learning_rate": 9.931155778894474e-05, - "loss": 5.9878, - "step": 1186 - }, - { - "epoch": 0.6190352020860496, - "grad_norm": 1.789474368095398, - "learning_rate": 9.93105527638191e-05, - "loss": 6.3125, - "step": 1187 - }, - { - "epoch": 0.6195567144719687, - "grad_norm": 2.32488751411438, - "learning_rate": 9.930954773869348e-05, - "loss": 5.7663, - "step": 1188 - }, - { - "epoch": 0.6200782268578878, - "grad_norm": 2.320885181427002, - "learning_rate": 9.930854271356784e-05, - "loss": 5.5406, - "step": 1189 - }, - { - "epoch": 0.6205997392438071, - "grad_norm": 1.9397251605987549, - "learning_rate": 9.930753768844222e-05, - "loss": 6.3136, - "step": 1190 - }, - { - "epoch": 0.6211212516297262, - "grad_norm": 1.7406009435653687, - "learning_rate": 9.930653266331659e-05, - "loss": 6.4718, - "step": 1191 - }, - { - "epoch": 0.6216427640156453, - "grad_norm": 2.0325028896331787, - "learning_rate": 9.930552763819096e-05, - "loss": 5.8915, - "step": 1192 - }, - { - "epoch": 0.6221642764015646, - "grad_norm": 2.1372628211975098, - "learning_rate": 9.930452261306533e-05, - "loss": 5.5932, - "step": 1193 - }, - { - "epoch": 0.6226857887874837, - "grad_norm": 2.163978099822998, - "learning_rate": 9.930351758793969e-05, - "loss": 5.8113, - "step": 1194 - }, - { - "epoch": 0.6232073011734028, - "grad_norm": 2.0743110179901123, - "learning_rate": 9.930251256281407e-05, - "loss": 6.4113, - "step": 1195 - }, - { - "epoch": 0.6237288135593221, - "grad_norm": 2.2374720573425293, - "learning_rate": 9.930150753768845e-05, - "loss": 6.1799, - "step": 1196 - }, - { - "epoch": 0.6242503259452412, - "grad_norm": 1.8585360050201416, - "learning_rate": 9.930050251256283e-05, - "loss": 6.4773, - "step": 1197 - }, - { - "epoch": 0.6247718383311603, - "grad_norm": 1.854195475578308, - "learning_rate": 9.929949748743719e-05, - "loss": 6.5368, - "step": 1198 - }, - { - "epoch": 0.6252933507170795, - "grad_norm": 1.9343338012695312, - "learning_rate": 9.929849246231157e-05, - "loss": 6.5308, - "step": 1199 - }, - { - "epoch": 0.6258148631029987, - "grad_norm": 1.9238635301589966, - "learning_rate": 9.929748743718593e-05, - "loss": 6.1491, - "step": 1200 - }, - { - "epoch": 0.6263363754889179, - "grad_norm": 2.309617280960083, - "learning_rate": 9.929648241206031e-05, - "loss": 6.5636, - "step": 1201 - }, - { - "epoch": 0.626857887874837, - "grad_norm": 1.8611011505126953, - "learning_rate": 9.929547738693467e-05, - "loss": 6.3989, - "step": 1202 - }, - { - "epoch": 0.6273794002607562, - "grad_norm": 2.3958685398101807, - "learning_rate": 9.929447236180905e-05, - "loss": 6.0467, - "step": 1203 - }, - { - "epoch": 0.6279009126466754, - "grad_norm": 2.3826165199279785, - "learning_rate": 9.929346733668342e-05, - "loss": 6.0121, - "step": 1204 - }, - { - "epoch": 0.6284224250325945, - "grad_norm": 2.1858012676239014, - "learning_rate": 9.92924623115578e-05, - "loss": 6.5982, - "step": 1205 - }, - { - "epoch": 0.6289439374185137, - "grad_norm": 1.8589776754379272, - "learning_rate": 9.929145728643217e-05, - "loss": 6.6164, - "step": 1206 - }, - { - "epoch": 0.6294654498044329, - "grad_norm": 1.6967905759811401, - "learning_rate": 9.929045226130654e-05, - "loss": 6.0849, - "step": 1207 - }, - { - "epoch": 0.629986962190352, - "grad_norm": 1.9245147705078125, - "learning_rate": 9.928944723618091e-05, - "loss": 6.126, - "step": 1208 - }, - { - "epoch": 0.6305084745762712, - "grad_norm": 1.9393119812011719, - "learning_rate": 9.928844221105528e-05, - "loss": 6.4905, - "step": 1209 - }, - { - "epoch": 0.6310299869621904, - "grad_norm": 1.8244280815124512, - "learning_rate": 9.928743718592966e-05, - "loss": 6.3695, - "step": 1210 - }, - { - "epoch": 0.6315514993481095, - "grad_norm": 2.2182328701019287, - "learning_rate": 9.928643216080402e-05, - "loss": 5.9904, - "step": 1211 - }, - { - "epoch": 0.6320730117340286, - "grad_norm": 1.8193329572677612, - "learning_rate": 9.92854271356784e-05, - "loss": 6.4533, - "step": 1212 - }, - { - "epoch": 0.6325945241199479, - "grad_norm": 1.874064326286316, - "learning_rate": 9.928442211055276e-05, - "loss": 6.4634, - "step": 1213 - }, - { - "epoch": 0.633116036505867, - "grad_norm": 2.1150290966033936, - "learning_rate": 9.928341708542714e-05, - "loss": 6.2769, - "step": 1214 - }, - { - "epoch": 0.6336375488917861, - "grad_norm": 1.9426897764205933, - "learning_rate": 9.92824120603015e-05, - "loss": 6.3897, - "step": 1215 - }, - { - "epoch": 0.6341590612777054, - "grad_norm": 1.9069862365722656, - "learning_rate": 9.928140703517588e-05, - "loss": 6.0429, - "step": 1216 - }, - { - "epoch": 0.6346805736636245, - "grad_norm": 1.8784018754959106, - "learning_rate": 9.928040201005026e-05, - "loss": 6.5544, - "step": 1217 - }, - { - "epoch": 0.6352020860495436, - "grad_norm": 1.7192256450653076, - "learning_rate": 9.927939698492464e-05, - "loss": 6.4387, - "step": 1218 - }, - { - "epoch": 0.6357235984354629, - "grad_norm": 1.593495488166809, - "learning_rate": 9.9278391959799e-05, - "loss": 6.5836, - "step": 1219 - }, - { - "epoch": 0.636245110821382, - "grad_norm": 1.7476557493209839, - "learning_rate": 9.927738693467337e-05, - "loss": 6.2185, - "step": 1220 - }, - { - "epoch": 0.6367666232073012, - "grad_norm": 1.6952474117279053, - "learning_rate": 9.927638190954774e-05, - "loss": 6.4145, - "step": 1221 - }, - { - "epoch": 0.6372881355932203, - "grad_norm": 1.852982521057129, - "learning_rate": 9.927537688442211e-05, - "loss": 6.4145, - "step": 1222 - }, - { - "epoch": 0.6378096479791395, - "grad_norm": 1.8660837411880493, - "learning_rate": 9.927437185929649e-05, - "loss": 6.0361, - "step": 1223 - }, - { - "epoch": 0.6383311603650587, - "grad_norm": 1.7795859575271606, - "learning_rate": 9.927336683417085e-05, - "loss": 6.2594, - "step": 1224 - }, - { - "epoch": 0.6388526727509778, - "grad_norm": 1.7390978336334229, - "learning_rate": 9.927236180904523e-05, - "loss": 6.2544, - "step": 1225 - }, - { - "epoch": 0.639374185136897, - "grad_norm": 1.9701423645019531, - "learning_rate": 9.927135678391961e-05, - "loss": 6.1094, - "step": 1226 - }, - { - "epoch": 0.6398956975228162, - "grad_norm": 2.1514768600463867, - "learning_rate": 9.927035175879398e-05, - "loss": 6.4946, - "step": 1227 - }, - { - "epoch": 0.6404172099087353, - "grad_norm": 1.6748924255371094, - "learning_rate": 9.926934673366835e-05, - "loss": 6.6379, - "step": 1228 - }, - { - "epoch": 0.6409387222946545, - "grad_norm": 2.049246311187744, - "learning_rate": 9.926834170854273e-05, - "loss": 6.0481, - "step": 1229 - }, - { - "epoch": 0.6414602346805737, - "grad_norm": 2.0611886978149414, - "learning_rate": 9.926733668341709e-05, - "loss": 6.5221, - "step": 1230 - }, - { - "epoch": 0.6419817470664928, - "grad_norm": 2.0248711109161377, - "learning_rate": 9.926633165829147e-05, - "loss": 6.4768, - "step": 1231 - }, - { - "epoch": 0.642503259452412, - "grad_norm": 1.9739201068878174, - "learning_rate": 9.926532663316583e-05, - "loss": 6.5902, - "step": 1232 - }, - { - "epoch": 0.6430247718383312, - "grad_norm": 2.2955453395843506, - "learning_rate": 9.92643216080402e-05, - "loss": 6.2011, - "step": 1233 - }, - { - "epoch": 0.6435462842242503, - "grad_norm": 2.43315052986145, - "learning_rate": 9.926331658291458e-05, - "loss": 6.4694, - "step": 1234 - }, - { - "epoch": 0.6440677966101694, - "grad_norm": 2.2616562843322754, - "learning_rate": 9.926231155778894e-05, - "loss": 6.0659, - "step": 1235 - }, - { - "epoch": 0.6445893089960887, - "grad_norm": 1.7571403980255127, - "learning_rate": 9.926130653266332e-05, - "loss": 6.574, - "step": 1236 - }, - { - "epoch": 0.6451108213820078, - "grad_norm": 2.2987818717956543, - "learning_rate": 9.92603015075377e-05, - "loss": 5.557, - "step": 1237 - }, - { - "epoch": 0.645632333767927, - "grad_norm": 1.8230011463165283, - "learning_rate": 9.925929648241207e-05, - "loss": 6.251, - "step": 1238 - }, - { - "epoch": 0.6461538461538462, - "grad_norm": 2.1173017024993896, - "learning_rate": 9.925829145728644e-05, - "loss": 6.4815, - "step": 1239 - }, - { - "epoch": 0.6466753585397653, - "grad_norm": 1.8613569736480713, - "learning_rate": 9.925728643216082e-05, - "loss": 6.6348, - "step": 1240 - }, - { - "epoch": 0.6471968709256845, - "grad_norm": 1.6213207244873047, - "learning_rate": 9.925628140703518e-05, - "loss": 6.5309, - "step": 1241 - }, - { - "epoch": 0.6477183833116037, - "grad_norm": 2.0249369144439697, - "learning_rate": 9.925527638190956e-05, - "loss": 6.5142, - "step": 1242 - }, - { - "epoch": 0.6482398956975228, - "grad_norm": 2.0371780395507812, - "learning_rate": 9.925427135678392e-05, - "loss": 6.3588, - "step": 1243 - }, - { - "epoch": 0.648761408083442, - "grad_norm": 2.447199583053589, - "learning_rate": 9.92532663316583e-05, - "loss": 6.3764, - "step": 1244 - }, - { - "epoch": 0.6492829204693612, - "grad_norm": 1.746205449104309, - "learning_rate": 9.925226130653266e-05, - "loss": 6.3675, - "step": 1245 - }, - { - "epoch": 0.6498044328552803, - "grad_norm": 2.0738072395324707, - "learning_rate": 9.925125628140703e-05, - "loss": 5.9435, - "step": 1246 - }, - { - "epoch": 0.6503259452411995, - "grad_norm": 2.0077099800109863, - "learning_rate": 9.92502512562814e-05, - "loss": 6.0831, - "step": 1247 - }, - { - "epoch": 0.6508474576271186, - "grad_norm": 1.974311351776123, - "learning_rate": 9.924924623115578e-05, - "loss": 6.3185, - "step": 1248 - }, - { - "epoch": 0.6513689700130378, - "grad_norm": 1.8912620544433594, - "learning_rate": 9.924824120603016e-05, - "loss": 6.7106, - "step": 1249 - }, - { - "epoch": 0.651890482398957, - "grad_norm": 1.8888311386108398, - "learning_rate": 9.924723618090453e-05, - "loss": 6.4595, - "step": 1250 - }, - { - "epoch": 0.6524119947848761, - "grad_norm": 1.9805779457092285, - "learning_rate": 9.92462311557789e-05, - "loss": 6.5483, - "step": 1251 - }, - { - "epoch": 0.6529335071707953, - "grad_norm": 2.192660093307495, - "learning_rate": 9.924522613065327e-05, - "loss": 6.4706, - "step": 1252 - }, - { - "epoch": 0.6534550195567145, - "grad_norm": 1.837544560432434, - "learning_rate": 9.924422110552765e-05, - "loss": 6.4827, - "step": 1253 - }, - { - "epoch": 0.6539765319426336, - "grad_norm": 1.8572523593902588, - "learning_rate": 9.924321608040201e-05, - "loss": 6.4135, - "step": 1254 - }, - { - "epoch": 0.6544980443285529, - "grad_norm": 2.07061767578125, - "learning_rate": 9.924221105527639e-05, - "loss": 6.0722, - "step": 1255 - }, - { - "epoch": 0.655019556714472, - "grad_norm": 2.2605385780334473, - "learning_rate": 9.924120603015075e-05, - "loss": 5.9388, - "step": 1256 - }, - { - "epoch": 0.6555410691003911, - "grad_norm": 2.2526824474334717, - "learning_rate": 9.924020100502513e-05, - "loss": 5.918, - "step": 1257 - }, - { - "epoch": 0.6560625814863102, - "grad_norm": 1.8854278326034546, - "learning_rate": 9.923919597989951e-05, - "loss": 6.0172, - "step": 1258 - }, - { - "epoch": 0.6565840938722295, - "grad_norm": 1.7376154661178589, - "learning_rate": 9.923819095477387e-05, - "loss": 5.8039, - "step": 1259 - }, - { - "epoch": 0.6571056062581486, - "grad_norm": 1.967217206954956, - "learning_rate": 9.923718592964825e-05, - "loss": 5.795, - "step": 1260 - }, - { - "epoch": 0.6576271186440678, - "grad_norm": 1.7940503358840942, - "learning_rate": 9.923618090452261e-05, - "loss": 6.5282, - "step": 1261 - }, - { - "epoch": 0.658148631029987, - "grad_norm": 1.6767634153366089, - "learning_rate": 9.923517587939699e-05, - "loss": 6.6692, - "step": 1262 - }, - { - "epoch": 0.6586701434159061, - "grad_norm": 2.1176092624664307, - "learning_rate": 9.923417085427136e-05, - "loss": 6.3115, - "step": 1263 - }, - { - "epoch": 0.6591916558018253, - "grad_norm": 2.351166248321533, - "learning_rate": 9.923316582914573e-05, - "loss": 6.158, - "step": 1264 - }, - { - "epoch": 0.6597131681877445, - "grad_norm": 2.062457323074341, - "learning_rate": 9.92321608040201e-05, - "loss": 4.877, - "step": 1265 - }, - { - "epoch": 0.6602346805736636, - "grad_norm": 2.187218427658081, - "learning_rate": 9.923115577889448e-05, - "loss": 6.1125, - "step": 1266 - }, - { - "epoch": 0.6607561929595828, - "grad_norm": 2.3859918117523193, - "learning_rate": 9.923015075376884e-05, - "loss": 5.9177, - "step": 1267 - }, - { - "epoch": 0.661277705345502, - "grad_norm": 2.109041213989258, - "learning_rate": 9.922914572864322e-05, - "loss": 6.5515, - "step": 1268 - }, - { - "epoch": 0.6617992177314211, - "grad_norm": 2.451289415359497, - "learning_rate": 9.92281407035176e-05, - "loss": 6.0999, - "step": 1269 - }, - { - "epoch": 0.6623207301173403, - "grad_norm": 1.7792937755584717, - "learning_rate": 9.922713567839197e-05, - "loss": 6.3901, - "step": 1270 - }, - { - "epoch": 0.6628422425032594, - "grad_norm": 2.1809239387512207, - "learning_rate": 9.922613065326634e-05, - "loss": 5.8402, - "step": 1271 - }, - { - "epoch": 0.6633637548891786, - "grad_norm": 1.8536064624786377, - "learning_rate": 9.922512562814072e-05, - "loss": 6.301, - "step": 1272 - }, - { - "epoch": 0.6638852672750978, - "grad_norm": 1.9039344787597656, - "learning_rate": 9.922412060301508e-05, - "loss": 6.2659, - "step": 1273 - }, - { - "epoch": 0.6644067796610169, - "grad_norm": 2.198302984237671, - "learning_rate": 9.922311557788944e-05, - "loss": 5.824, - "step": 1274 - }, - { - "epoch": 0.6649282920469362, - "grad_norm": 1.8464465141296387, - "learning_rate": 9.922211055276382e-05, - "loss": 6.401, - "step": 1275 - }, - { - "epoch": 0.6654498044328553, - "grad_norm": 1.6977571249008179, - "learning_rate": 9.922110552763819e-05, - "loss": 6.2889, - "step": 1276 - }, - { - "epoch": 0.6659713168187744, - "grad_norm": 1.9438045024871826, - "learning_rate": 9.922010050251256e-05, - "loss": 6.6902, - "step": 1277 - }, - { - "epoch": 0.6664928292046937, - "grad_norm": 1.8544456958770752, - "learning_rate": 9.921909547738694e-05, - "loss": 6.3197, - "step": 1278 - }, - { - "epoch": 0.6670143415906128, - "grad_norm": 2.1300861835479736, - "learning_rate": 9.921809045226132e-05, - "loss": 6.284, - "step": 1279 - }, - { - "epoch": 0.6675358539765319, - "grad_norm": 2.162370204925537, - "learning_rate": 9.921708542713568e-05, - "loss": 6.545, - "step": 1280 - }, - { - "epoch": 0.668057366362451, - "grad_norm": 2.0180537700653076, - "learning_rate": 9.921608040201006e-05, - "loss": 6.4824, - "step": 1281 - }, - { - "epoch": 0.6685788787483703, - "grad_norm": 1.8317381143569946, - "learning_rate": 9.921507537688443e-05, - "loss": 6.3651, - "step": 1282 - }, - { - "epoch": 0.6691003911342894, - "grad_norm": 1.8794056177139282, - "learning_rate": 9.92140703517588e-05, - "loss": 6.5026, - "step": 1283 - }, - { - "epoch": 0.6696219035202086, - "grad_norm": 1.8696671724319458, - "learning_rate": 9.921306532663317e-05, - "loss": 6.3876, - "step": 1284 - }, - { - "epoch": 0.6701434159061278, - "grad_norm": 1.8482470512390137, - "learning_rate": 9.921206030150755e-05, - "loss": 6.389, - "step": 1285 - }, - { - "epoch": 0.6706649282920469, - "grad_norm": 1.7337751388549805, - "learning_rate": 9.921105527638191e-05, - "loss": 6.4841, - "step": 1286 - }, - { - "epoch": 0.6711864406779661, - "grad_norm": 2.004901647567749, - "learning_rate": 9.921005025125627e-05, - "loss": 5.9165, - "step": 1287 - }, - { - "epoch": 0.6717079530638853, - "grad_norm": 2.050809383392334, - "learning_rate": 9.920904522613065e-05, - "loss": 6.1752, - "step": 1288 - }, - { - "epoch": 0.6722294654498044, - "grad_norm": 1.867122769355774, - "learning_rate": 9.920804020100503e-05, - "loss": 6.0666, - "step": 1289 - }, - { - "epoch": 0.6727509778357236, - "grad_norm": 1.7209299802780151, - "learning_rate": 9.920703517587941e-05, - "loss": 6.2589, - "step": 1290 - }, - { - "epoch": 0.6732724902216428, - "grad_norm": 1.833649754524231, - "learning_rate": 9.920603015075377e-05, - "loss": 6.3143, - "step": 1291 - }, - { - "epoch": 0.673794002607562, - "grad_norm": 1.895180344581604, - "learning_rate": 9.920502512562815e-05, - "loss": 6.3483, - "step": 1292 - }, - { - "epoch": 0.6743155149934811, - "grad_norm": 2.510172128677368, - "learning_rate": 9.920402010050251e-05, - "loss": 6.0177, - "step": 1293 - }, - { - "epoch": 0.6748370273794002, - "grad_norm": 1.880961298942566, - "learning_rate": 9.920301507537689e-05, - "loss": 6.184, - "step": 1294 - }, - { - "epoch": 0.6753585397653195, - "grad_norm": 1.997738242149353, - "learning_rate": 9.920201005025126e-05, - "loss": 6.4835, - "step": 1295 - }, - { - "epoch": 0.6758800521512386, - "grad_norm": 1.7524503469467163, - "learning_rate": 9.920100502512563e-05, - "loss": 6.5082, - "step": 1296 - }, - { - "epoch": 0.6764015645371577, - "grad_norm": 1.8719860315322876, - "learning_rate": 9.92e-05, - "loss": 6.1785, - "step": 1297 - }, - { - "epoch": 0.676923076923077, - "grad_norm": 2.0365631580352783, - "learning_rate": 9.919899497487438e-05, - "loss": 6.6612, - "step": 1298 - }, - { - "epoch": 0.6774445893089961, - "grad_norm": 1.6351118087768555, - "learning_rate": 9.919798994974875e-05, - "loss": 6.6928, - "step": 1299 - }, - { - "epoch": 0.6779661016949152, - "grad_norm": 2.222693920135498, - "learning_rate": 9.919698492462312e-05, - "loss": 6.0948, - "step": 1300 - }, - { - "epoch": 0.6784876140808345, - "grad_norm": 2.037600517272949, - "learning_rate": 9.91959798994975e-05, - "loss": 5.8376, - "step": 1301 - }, - { - "epoch": 0.6790091264667536, - "grad_norm": 1.7934489250183105, - "learning_rate": 9.919497487437186e-05, - "loss": 6.1895, - "step": 1302 - }, - { - "epoch": 0.6795306388526727, - "grad_norm": 1.9172230958938599, - "learning_rate": 9.919396984924624e-05, - "loss": 5.7558, - "step": 1303 - }, - { - "epoch": 0.6800521512385919, - "grad_norm": 2.754615068435669, - "learning_rate": 9.91929648241206e-05, - "loss": 5.9009, - "step": 1304 - }, - { - "epoch": 0.6805736636245111, - "grad_norm": 2.217742443084717, - "learning_rate": 9.919195979899498e-05, - "loss": 6.3688, - "step": 1305 - }, - { - "epoch": 0.6810951760104302, - "grad_norm": 1.9266620874404907, - "learning_rate": 9.919095477386935e-05, - "loss": 6.1365, - "step": 1306 - }, - { - "epoch": 0.6816166883963494, - "grad_norm": 2.358166217803955, - "learning_rate": 9.918994974874372e-05, - "loss": 6.5645, - "step": 1307 - }, - { - "epoch": 0.6821382007822686, - "grad_norm": 2.411017417907715, - "learning_rate": 9.918894472361809e-05, - "loss": 6.2203, - "step": 1308 - }, - { - "epoch": 0.6826597131681877, - "grad_norm": 1.6456941366195679, - "learning_rate": 9.918793969849247e-05, - "loss": 6.2156, - "step": 1309 - }, - { - "epoch": 0.6831812255541069, - "grad_norm": 2.295139789581299, - "learning_rate": 9.918693467336684e-05, - "loss": 6.4303, - "step": 1310 - }, - { - "epoch": 0.6837027379400261, - "grad_norm": 2.089681625366211, - "learning_rate": 9.918592964824122e-05, - "loss": 5.9316, - "step": 1311 - }, - { - "epoch": 0.6842242503259452, - "grad_norm": 2.998058319091797, - "learning_rate": 9.918492462311559e-05, - "loss": 6.3628, - "step": 1312 - }, - { - "epoch": 0.6847457627118644, - "grad_norm": 2.1547327041625977, - "learning_rate": 9.918391959798995e-05, - "loss": 5.8071, - "step": 1313 - }, - { - "epoch": 0.6852672750977836, - "grad_norm": 1.840842366218567, - "learning_rate": 9.918291457286433e-05, - "loss": 6.4315, - "step": 1314 - }, - { - "epoch": 0.6857887874837028, - "grad_norm": 2.0396242141723633, - "learning_rate": 9.918190954773869e-05, - "loss": 5.8021, - "step": 1315 - }, - { - "epoch": 0.6863102998696219, - "grad_norm": 1.7985059022903442, - "learning_rate": 9.918090452261307e-05, - "loss": 6.1555, - "step": 1316 - }, - { - "epoch": 0.686831812255541, - "grad_norm": 1.8200675249099731, - "learning_rate": 9.917989949748743e-05, - "loss": 6.2221, - "step": 1317 - }, - { - "epoch": 0.6873533246414603, - "grad_norm": 1.65859055519104, - "learning_rate": 9.917889447236181e-05, - "loss": 6.3274, - "step": 1318 - }, - { - "epoch": 0.6878748370273794, - "grad_norm": 2.2409865856170654, - "learning_rate": 9.917788944723619e-05, - "loss": 6.2698, - "step": 1319 - }, - { - "epoch": 0.6883963494132985, - "grad_norm": 1.8096379041671753, - "learning_rate": 9.917688442211057e-05, - "loss": 6.1951, - "step": 1320 - }, - { - "epoch": 0.6889178617992178, - "grad_norm": 1.733154058456421, - "learning_rate": 9.917587939698493e-05, - "loss": 6.5585, - "step": 1321 - }, - { - "epoch": 0.6894393741851369, - "grad_norm": 1.6778079271316528, - "learning_rate": 9.917487437185931e-05, - "loss": 6.3903, - "step": 1322 - }, - { - "epoch": 0.689960886571056, - "grad_norm": 1.8648881912231445, - "learning_rate": 9.917386934673367e-05, - "loss": 6.0461, - "step": 1323 - }, - { - "epoch": 0.6904823989569753, - "grad_norm": 1.7817151546478271, - "learning_rate": 9.917286432160805e-05, - "loss": 5.8143, - "step": 1324 - }, - { - "epoch": 0.6910039113428944, - "grad_norm": 1.9449816942214966, - "learning_rate": 9.917185929648242e-05, - "loss": 6.1508, - "step": 1325 - }, - { - "epoch": 0.6915254237288135, - "grad_norm": 1.79619562625885, - "learning_rate": 9.917085427135678e-05, - "loss": 6.3163, - "step": 1326 - }, - { - "epoch": 0.6920469361147328, - "grad_norm": 1.4900660514831543, - "learning_rate": 9.916984924623116e-05, - "loss": 6.5095, - "step": 1327 - }, - { - "epoch": 0.6925684485006519, - "grad_norm": 1.7384716272354126, - "learning_rate": 9.916884422110552e-05, - "loss": 6.5707, - "step": 1328 - }, - { - "epoch": 0.693089960886571, - "grad_norm": 1.6734282970428467, - "learning_rate": 9.91678391959799e-05, - "loss": 6.5893, - "step": 1329 - }, - { - "epoch": 0.6936114732724902, - "grad_norm": 1.81753671169281, - "learning_rate": 9.916683417085428e-05, - "loss": 5.9292, - "step": 1330 - }, - { - "epoch": 0.6941329856584094, - "grad_norm": 2.5046372413635254, - "learning_rate": 9.916582914572866e-05, - "loss": 5.8917, - "step": 1331 - }, - { - "epoch": 0.6946544980443285, - "grad_norm": 1.9453530311584473, - "learning_rate": 9.916482412060302e-05, - "loss": 6.4001, - "step": 1332 - }, - { - "epoch": 0.6951760104302477, - "grad_norm": 2.032595634460449, - "learning_rate": 9.91638190954774e-05, - "loss": 6.527, - "step": 1333 - }, - { - "epoch": 0.6956975228161669, - "grad_norm": 2.470912218093872, - "learning_rate": 9.916281407035176e-05, - "loss": 5.1709, - "step": 1334 - }, - { - "epoch": 0.6962190352020861, - "grad_norm": 2.646174430847168, - "learning_rate": 9.916180904522614e-05, - "loss": 6.122, - "step": 1335 - }, - { - "epoch": 0.6967405475880052, - "grad_norm": 2.1192989349365234, - "learning_rate": 9.91608040201005e-05, - "loss": 6.1945, - "step": 1336 - }, - { - "epoch": 0.6972620599739244, - "grad_norm": 2.0626115798950195, - "learning_rate": 9.915979899497488e-05, - "loss": 5.863, - "step": 1337 - }, - { - "epoch": 0.6977835723598436, - "grad_norm": 1.9882649183273315, - "learning_rate": 9.915879396984925e-05, - "loss": 6.5519, - "step": 1338 - }, - { - "epoch": 0.6983050847457627, - "grad_norm": 1.8993260860443115, - "learning_rate": 9.915778894472362e-05, - "loss": 6.4309, - "step": 1339 - }, - { - "epoch": 0.6988265971316818, - "grad_norm": 1.897606611251831, - "learning_rate": 9.9156783919598e-05, - "loss": 6.1618, - "step": 1340 - }, - { - "epoch": 0.6993481095176011, - "grad_norm": 1.8617669343948364, - "learning_rate": 9.915577889447237e-05, - "loss": 6.468, - "step": 1341 - }, - { - "epoch": 0.6998696219035202, - "grad_norm": 1.8829879760742188, - "learning_rate": 9.915477386934674e-05, - "loss": 6.144, - "step": 1342 - }, - { - "epoch": 0.7003911342894393, - "grad_norm": 2.290776491165161, - "learning_rate": 9.915376884422111e-05, - "loss": 6.3441, - "step": 1343 - }, - { - "epoch": 0.7009126466753586, - "grad_norm": 1.8587820529937744, - "learning_rate": 9.915276381909549e-05, - "loss": 6.2323, - "step": 1344 - }, - { - "epoch": 0.7014341590612777, - "grad_norm": 2.2007639408111572, - "learning_rate": 9.915175879396985e-05, - "loss": 6.229, - "step": 1345 - }, - { - "epoch": 0.7019556714471968, - "grad_norm": 1.9643776416778564, - "learning_rate": 9.915075376884423e-05, - "loss": 6.2255, - "step": 1346 - }, - { - "epoch": 0.7024771838331161, - "grad_norm": 1.7954367399215698, - "learning_rate": 9.914974874371859e-05, - "loss": 6.4048, - "step": 1347 - }, - { - "epoch": 0.7029986962190352, - "grad_norm": 1.8727362155914307, - "learning_rate": 9.914874371859297e-05, - "loss": 6.0571, - "step": 1348 - }, - { - "epoch": 0.7035202086049543, - "grad_norm": 1.7820097208023071, - "learning_rate": 9.914773869346733e-05, - "loss": 6.4511, - "step": 1349 - }, - { - "epoch": 0.7040417209908736, - "grad_norm": 1.9630566835403442, - "learning_rate": 9.914673366834171e-05, - "loss": 6.0497, - "step": 1350 - }, - { - "epoch": 0.7045632333767927, - "grad_norm": 2.033202648162842, - "learning_rate": 9.914572864321609e-05, - "loss": 5.5071, - "step": 1351 - }, - { - "epoch": 0.7050847457627119, - "grad_norm": 2.129666566848755, - "learning_rate": 9.914472361809045e-05, - "loss": 6.2073, - "step": 1352 - }, - { - "epoch": 0.705606258148631, - "grad_norm": 2.2263948917388916, - "learning_rate": 9.914371859296483e-05, - "loss": 6.3499, - "step": 1353 - }, - { - "epoch": 0.7061277705345502, - "grad_norm": 1.6847009658813477, - "learning_rate": 9.91427135678392e-05, - "loss": 6.4733, - "step": 1354 - }, - { - "epoch": 0.7066492829204694, - "grad_norm": 2.293405771255493, - "learning_rate": 9.914170854271357e-05, - "loss": 5.9952, - "step": 1355 - }, - { - "epoch": 0.7071707953063885, - "grad_norm": 1.9283884763717651, - "learning_rate": 9.914070351758794e-05, - "loss": 6.4086, - "step": 1356 - }, - { - "epoch": 0.7076923076923077, - "grad_norm": 2.0034866333007812, - "learning_rate": 9.913969849246232e-05, - "loss": 6.5055, - "step": 1357 - }, - { - "epoch": 0.7082138200782269, - "grad_norm": 1.8390480279922485, - "learning_rate": 9.913869346733668e-05, - "loss": 6.4887, - "step": 1358 - }, - { - "epoch": 0.708735332464146, - "grad_norm": 1.684147596359253, - "learning_rate": 9.913768844221106e-05, - "loss": 6.2046, - "step": 1359 - }, - { - "epoch": 0.7092568448500652, - "grad_norm": 2.04543399810791, - "learning_rate": 9.913668341708544e-05, - "loss": 6.0661, - "step": 1360 - }, - { - "epoch": 0.7097783572359844, - "grad_norm": 1.8137935400009155, - "learning_rate": 9.913567839195981e-05, - "loss": 6.0942, - "step": 1361 - }, - { - "epoch": 0.7102998696219035, - "grad_norm": 1.6837714910507202, - "learning_rate": 9.913467336683418e-05, - "loss": 6.2989, - "step": 1362 - }, - { - "epoch": 0.7108213820078226, - "grad_norm": 2.3682711124420166, - "learning_rate": 9.913366834170856e-05, - "loss": 6.2318, - "step": 1363 - }, - { - "epoch": 0.7113428943937419, - "grad_norm": 1.941892385482788, - "learning_rate": 9.913266331658292e-05, - "loss": 6.0932, - "step": 1364 - }, - { - "epoch": 0.711864406779661, - "grad_norm": 2.045935869216919, - "learning_rate": 9.91316582914573e-05, - "loss": 5.7107, - "step": 1365 - }, - { - "epoch": 0.7123859191655801, - "grad_norm": 1.923986792564392, - "learning_rate": 9.913065326633166e-05, - "loss": 6.3409, - "step": 1366 - }, - { - "epoch": 0.7129074315514994, - "grad_norm": 1.878115177154541, - "learning_rate": 9.912964824120603e-05, - "loss": 6.1724, - "step": 1367 - }, - { - "epoch": 0.7134289439374185, - "grad_norm": 1.9086147546768188, - "learning_rate": 9.91286432160804e-05, - "loss": 6.2958, - "step": 1368 - }, - { - "epoch": 0.7139504563233376, - "grad_norm": 1.8487077951431274, - "learning_rate": 9.912763819095477e-05, - "loss": 6.2885, - "step": 1369 - }, - { - "epoch": 0.7144719687092569, - "grad_norm": 1.9221127033233643, - "learning_rate": 9.912663316582915e-05, - "loss": 6.079, - "step": 1370 - }, - { - "epoch": 0.714993481095176, - "grad_norm": 1.7881758213043213, - "learning_rate": 9.912562814070352e-05, - "loss": 6.4793, - "step": 1371 - }, - { - "epoch": 0.7155149934810952, - "grad_norm": 2.078024387359619, - "learning_rate": 9.91246231155779e-05, - "loss": 6.5469, - "step": 1372 - }, - { - "epoch": 0.7160365058670144, - "grad_norm": 2.1184799671173096, - "learning_rate": 9.912361809045227e-05, - "loss": 6.1762, - "step": 1373 - }, - { - "epoch": 0.7165580182529335, - "grad_norm": 1.7050516605377197, - "learning_rate": 9.912261306532664e-05, - "loss": 6.5793, - "step": 1374 - }, - { - "epoch": 0.7170795306388527, - "grad_norm": 1.6978082656860352, - "learning_rate": 9.912160804020101e-05, - "loss": 6.4262, - "step": 1375 - }, - { - "epoch": 0.7176010430247718, - "grad_norm": 1.8031973838806152, - "learning_rate": 9.912060301507539e-05, - "loss": 6.4686, - "step": 1376 - }, - { - "epoch": 0.718122555410691, - "grad_norm": 2.0421676635742188, - "learning_rate": 9.911959798994975e-05, - "loss": 6.0667, - "step": 1377 - }, - { - "epoch": 0.7186440677966102, - "grad_norm": 1.668738842010498, - "learning_rate": 9.911859296482413e-05, - "loss": 6.6257, - "step": 1378 - }, - { - "epoch": 0.7191655801825293, - "grad_norm": 1.8086986541748047, - "learning_rate": 9.911758793969849e-05, - "loss": 6.1899, - "step": 1379 - }, - { - "epoch": 0.7196870925684485, - "grad_norm": 1.7863272428512573, - "learning_rate": 9.911658291457287e-05, - "loss": 6.4031, - "step": 1380 - }, - { - "epoch": 0.7202086049543677, - "grad_norm": 1.7395962476730347, - "learning_rate": 9.911557788944725e-05, - "loss": 6.5891, - "step": 1381 - }, - { - "epoch": 0.7207301173402868, - "grad_norm": 2.206829309463501, - "learning_rate": 9.911457286432161e-05, - "loss": 5.8696, - "step": 1382 - }, - { - "epoch": 0.721251629726206, - "grad_norm": 1.7793954610824585, - "learning_rate": 9.911356783919599e-05, - "loss": 6.4146, - "step": 1383 - }, - { - "epoch": 0.7217731421121252, - "grad_norm": 1.937822699546814, - "learning_rate": 9.911256281407035e-05, - "loss": 5.8145, - "step": 1384 - }, - { - "epoch": 0.7222946544980443, - "grad_norm": 1.963646650314331, - "learning_rate": 9.911155778894473e-05, - "loss": 6.1204, - "step": 1385 - }, - { - "epoch": 0.7228161668839634, - "grad_norm": 2.0328638553619385, - "learning_rate": 9.91105527638191e-05, - "loss": 6.1133, - "step": 1386 - }, - { - "epoch": 0.7233376792698827, - "grad_norm": 2.0667145252227783, - "learning_rate": 9.910954773869347e-05, - "loss": 6.0303, - "step": 1387 - }, - { - "epoch": 0.7238591916558018, - "grad_norm": 1.812839150428772, - "learning_rate": 9.910854271356784e-05, - "loss": 6.547, - "step": 1388 - }, - { - "epoch": 0.724380704041721, - "grad_norm": 1.7212921380996704, - "learning_rate": 9.910753768844222e-05, - "loss": 6.521, - "step": 1389 - }, - { - "epoch": 0.7249022164276402, - "grad_norm": 1.8122483491897583, - "learning_rate": 9.910653266331658e-05, - "loss": 6.5369, - "step": 1390 - }, - { - "epoch": 0.7254237288135593, - "grad_norm": 1.9334791898727417, - "learning_rate": 9.910552763819096e-05, - "loss": 5.997, - "step": 1391 - }, - { - "epoch": 0.7259452411994785, - "grad_norm": 1.8224554061889648, - "learning_rate": 9.910452261306534e-05, - "loss": 6.3573, - "step": 1392 - }, - { - "epoch": 0.7264667535853977, - "grad_norm": 1.8500584363937378, - "learning_rate": 9.91035175879397e-05, - "loss": 6.3884, - "step": 1393 - }, - { - "epoch": 0.7269882659713168, - "grad_norm": 2.17646861076355, - "learning_rate": 9.910251256281408e-05, - "loss": 6.4312, - "step": 1394 - }, - { - "epoch": 0.727509778357236, - "grad_norm": 2.113665819168091, - "learning_rate": 9.910150753768844e-05, - "loss": 6.0552, - "step": 1395 - }, - { - "epoch": 0.7280312907431552, - "grad_norm": 1.659449815750122, - "learning_rate": 9.910050251256282e-05, - "loss": 6.3511, - "step": 1396 - }, - { - "epoch": 0.7285528031290743, - "grad_norm": 1.7127867937088013, - "learning_rate": 9.909949748743719e-05, - "loss": 6.3485, - "step": 1397 - }, - { - "epoch": 0.7290743155149935, - "grad_norm": 1.88431715965271, - "learning_rate": 9.909849246231156e-05, - "loss": 6.5242, - "step": 1398 - }, - { - "epoch": 0.7295958279009126, - "grad_norm": 1.7146869897842407, - "learning_rate": 9.909748743718593e-05, - "loss": 6.4066, - "step": 1399 - }, - { - "epoch": 0.7301173402868318, - "grad_norm": 1.7314929962158203, - "learning_rate": 9.90964824120603e-05, - "loss": 6.2814, - "step": 1400 - }, - { - "epoch": 0.730638852672751, - "grad_norm": 1.812191367149353, - "learning_rate": 9.909547738693468e-05, - "loss": 6.0934, - "step": 1401 - }, - { - "epoch": 0.7311603650586701, - "grad_norm": 1.746126651763916, - "learning_rate": 9.909447236180906e-05, - "loss": 6.4359, - "step": 1402 - }, - { - "epoch": 0.7316818774445893, - "grad_norm": 2.3535757064819336, - "learning_rate": 9.909346733668343e-05, - "loss": 6.5909, - "step": 1403 - }, - { - "epoch": 0.7322033898305085, - "grad_norm": 1.9074891805648804, - "learning_rate": 9.90924623115578e-05, - "loss": 6.0014, - "step": 1404 - }, - { - "epoch": 0.7327249022164276, - "grad_norm": 1.8047109842300415, - "learning_rate": 9.909145728643217e-05, - "loss": 6.3232, - "step": 1405 - }, - { - "epoch": 0.7332464146023469, - "grad_norm": 1.673736572265625, - "learning_rate": 9.909045226130653e-05, - "loss": 6.6315, - "step": 1406 - }, - { - "epoch": 0.733767926988266, - "grad_norm": 1.802794337272644, - "learning_rate": 9.908944723618091e-05, - "loss": 5.9461, - "step": 1407 - }, - { - "epoch": 0.7342894393741851, - "grad_norm": 2.095456123352051, - "learning_rate": 9.908844221105527e-05, - "loss": 5.9153, - "step": 1408 - }, - { - "epoch": 0.7348109517601042, - "grad_norm": 2.0065979957580566, - "learning_rate": 9.908743718592965e-05, - "loss": 6.2348, - "step": 1409 - }, - { - "epoch": 0.7353324641460235, - "grad_norm": 1.992843747138977, - "learning_rate": 9.908643216080402e-05, - "loss": 6.4291, - "step": 1410 - }, - { - "epoch": 0.7358539765319426, - "grad_norm": 1.914299726486206, - "learning_rate": 9.90854271356784e-05, - "loss": 6.351, - "step": 1411 - }, - { - "epoch": 0.7363754889178618, - "grad_norm": 1.7779158353805542, - "learning_rate": 9.908442211055277e-05, - "loss": 6.261, - "step": 1412 - }, - { - "epoch": 0.736897001303781, - "grad_norm": 1.99246346950531, - "learning_rate": 9.908341708542715e-05, - "loss": 6.1437, - "step": 1413 - }, - { - "epoch": 0.7374185136897001, - "grad_norm": 2.0438947677612305, - "learning_rate": 9.908241206030151e-05, - "loss": 6.3093, - "step": 1414 - }, - { - "epoch": 0.7379400260756193, - "grad_norm": 2.0654006004333496, - "learning_rate": 9.908140703517589e-05, - "loss": 6.209, - "step": 1415 - }, - { - "epoch": 0.7384615384615385, - "grad_norm": 1.7370989322662354, - "learning_rate": 9.908040201005026e-05, - "loss": 6.3714, - "step": 1416 - }, - { - "epoch": 0.7389830508474576, - "grad_norm": 1.9075545072555542, - "learning_rate": 9.907939698492463e-05, - "loss": 6.3272, - "step": 1417 - }, - { - "epoch": 0.7395045632333768, - "grad_norm": 1.9900084733963013, - "learning_rate": 9.9078391959799e-05, - "loss": 6.4492, - "step": 1418 - }, - { - "epoch": 0.740026075619296, - "grad_norm": 2.02169132232666, - "learning_rate": 9.907738693467336e-05, - "loss": 6.4734, - "step": 1419 - }, - { - "epoch": 0.7405475880052151, - "grad_norm": 2.2172529697418213, - "learning_rate": 9.907638190954774e-05, - "loss": 6.0227, - "step": 1420 - }, - { - "epoch": 0.7410691003911343, - "grad_norm": 2.0385639667510986, - "learning_rate": 9.90753768844221e-05, - "loss": 6.2609, - "step": 1421 - }, - { - "epoch": 0.7415906127770534, - "grad_norm": 1.70989990234375, - "learning_rate": 9.907437185929648e-05, - "loss": 6.5667, - "step": 1422 - }, - { - "epoch": 0.7421121251629726, - "grad_norm": 1.5108897686004639, - "learning_rate": 9.907336683417086e-05, - "loss": 6.5336, - "step": 1423 - }, - { - "epoch": 0.7426336375488918, - "grad_norm": 1.7108489274978638, - "learning_rate": 9.907236180904524e-05, - "loss": 5.6998, - "step": 1424 - }, - { - "epoch": 0.7431551499348109, - "grad_norm": 2.171581745147705, - "learning_rate": 9.90713567839196e-05, - "loss": 5.648, - "step": 1425 - }, - { - "epoch": 0.7436766623207302, - "grad_norm": 1.7710875272750854, - "learning_rate": 9.907035175879398e-05, - "loss": 6.0819, - "step": 1426 - }, - { - "epoch": 0.7441981747066493, - "grad_norm": 1.806067705154419, - "learning_rate": 9.906934673366834e-05, - "loss": 6.199, - "step": 1427 - }, - { - "epoch": 0.7447196870925684, - "grad_norm": 1.7951360940933228, - "learning_rate": 9.906834170854272e-05, - "loss": 5.808, - "step": 1428 - }, - { - "epoch": 0.7452411994784877, - "grad_norm": 1.8740195035934448, - "learning_rate": 9.906733668341709e-05, - "loss": 6.062, - "step": 1429 - }, - { - "epoch": 0.7457627118644068, - "grad_norm": 2.0066068172454834, - "learning_rate": 9.906633165829146e-05, - "loss": 6.2449, - "step": 1430 - }, - { - "epoch": 0.7462842242503259, - "grad_norm": 1.889897346496582, - "learning_rate": 9.906532663316583e-05, - "loss": 6.3622, - "step": 1431 - }, - { - "epoch": 0.7468057366362452, - "grad_norm": 1.6075999736785889, - "learning_rate": 9.90643216080402e-05, - "loss": 6.4702, - "step": 1432 - }, - { - "epoch": 0.7473272490221643, - "grad_norm": 2.4462218284606934, - "learning_rate": 9.906331658291458e-05, - "loss": 5.7908, - "step": 1433 - }, - { - "epoch": 0.7478487614080834, - "grad_norm": 1.9184459447860718, - "learning_rate": 9.906231155778895e-05, - "loss": 6.1585, - "step": 1434 - }, - { - "epoch": 0.7483702737940026, - "grad_norm": 2.1779956817626953, - "learning_rate": 9.906130653266333e-05, - "loss": 6.1793, - "step": 1435 - }, - { - "epoch": 0.7488917861799218, - "grad_norm": 1.7527437210083008, - "learning_rate": 9.906030150753769e-05, - "loss": 6.449, - "step": 1436 - }, - { - "epoch": 0.7494132985658409, - "grad_norm": 1.6552367210388184, - "learning_rate": 9.905929648241207e-05, - "loss": 6.5836, - "step": 1437 - }, - { - "epoch": 0.7499348109517601, - "grad_norm": 1.90924072265625, - "learning_rate": 9.905829145728643e-05, - "loss": 5.9668, - "step": 1438 - }, - { - "epoch": 0.7504563233376793, - "grad_norm": 2.0400619506835938, - "learning_rate": 9.905728643216081e-05, - "loss": 6.2105, - "step": 1439 - }, - { - "epoch": 0.7509778357235984, - "grad_norm": 1.8455311059951782, - "learning_rate": 9.905628140703517e-05, - "loss": 6.3227, - "step": 1440 - }, - { - "epoch": 0.7514993481095176, - "grad_norm": 2.2235541343688965, - "learning_rate": 9.905527638190955e-05, - "loss": 6.1011, - "step": 1441 - }, - { - "epoch": 0.7520208604954368, - "grad_norm": 1.8395979404449463, - "learning_rate": 9.905427135678392e-05, - "loss": 6.1377, - "step": 1442 - }, - { - "epoch": 0.752542372881356, - "grad_norm": 1.9957846403121948, - "learning_rate": 9.90532663316583e-05, - "loss": 6.0979, - "step": 1443 - }, - { - "epoch": 0.7530638852672751, - "grad_norm": 1.9281573295593262, - "learning_rate": 9.905226130653267e-05, - "loss": 6.4314, - "step": 1444 - }, - { - "epoch": 0.7535853976531942, - "grad_norm": 1.8461557626724243, - "learning_rate": 9.905125628140704e-05, - "loss": 6.603, - "step": 1445 - }, - { - "epoch": 0.7541069100391135, - "grad_norm": 1.6056270599365234, - "learning_rate": 9.905025125628141e-05, - "loss": 6.6123, - "step": 1446 - }, - { - "epoch": 0.7546284224250326, - "grad_norm": 1.977894902229309, - "learning_rate": 9.904924623115578e-05, - "loss": 6.2871, - "step": 1447 - }, - { - "epoch": 0.7551499348109517, - "grad_norm": 1.59738290309906, - "learning_rate": 9.904824120603016e-05, - "loss": 6.4567, - "step": 1448 - }, - { - "epoch": 0.755671447196871, - "grad_norm": 1.7254303693771362, - "learning_rate": 9.904723618090452e-05, - "loss": 6.3123, - "step": 1449 - }, - { - "epoch": 0.7561929595827901, - "grad_norm": 2.0217103958129883, - "learning_rate": 9.90462311557789e-05, - "loss": 6.0582, - "step": 1450 - }, - { - "epoch": 0.7567144719687092, - "grad_norm": 1.7805087566375732, - "learning_rate": 9.904522613065326e-05, - "loss": 6.3635, - "step": 1451 - }, - { - "epoch": 0.7572359843546285, - "grad_norm": 1.9256607294082642, - "learning_rate": 9.904422110552764e-05, - "loss": 5.7504, - "step": 1452 - }, - { - "epoch": 0.7577574967405476, - "grad_norm": 1.9249347448349, - "learning_rate": 9.904321608040202e-05, - "loss": 6.099, - "step": 1453 - }, - { - "epoch": 0.7582790091264667, - "grad_norm": 2.34395694732666, - "learning_rate": 9.90422110552764e-05, - "loss": 6.1244, - "step": 1454 - }, - { - "epoch": 0.758800521512386, - "grad_norm": 2.0598037242889404, - "learning_rate": 9.904120603015076e-05, - "loss": 6.4953, - "step": 1455 - }, - { - "epoch": 0.7593220338983051, - "grad_norm": 1.7266490459442139, - "learning_rate": 9.904020100502514e-05, - "loss": 6.4625, - "step": 1456 - }, - { - "epoch": 0.7598435462842242, - "grad_norm": 1.6677218675613403, - "learning_rate": 9.90391959798995e-05, - "loss": 6.4217, - "step": 1457 - }, - { - "epoch": 0.7603650586701434, - "grad_norm": 1.7407574653625488, - "learning_rate": 9.903819095477388e-05, - "loss": 6.395, - "step": 1458 - }, - { - "epoch": 0.7608865710560626, - "grad_norm": 1.788590431213379, - "learning_rate": 9.903718592964824e-05, - "loss": 6.2999, - "step": 1459 - }, - { - "epoch": 0.7614080834419817, - "grad_norm": 1.6275875568389893, - "learning_rate": 9.903618090452261e-05, - "loss": 6.4773, - "step": 1460 - }, - { - "epoch": 0.7619295958279009, - "grad_norm": 1.848962664604187, - "learning_rate": 9.903517587939699e-05, - "loss": 6.1702, - "step": 1461 - }, - { - "epoch": 0.7624511082138201, - "grad_norm": 1.6216939687728882, - "learning_rate": 9.903417085427135e-05, - "loss": 6.4403, - "step": 1462 - }, - { - "epoch": 0.7629726205997392, - "grad_norm": 1.8353079557418823, - "learning_rate": 9.903316582914573e-05, - "loss": 6.4074, - "step": 1463 - }, - { - "epoch": 0.7634941329856584, - "grad_norm": 1.9631184339523315, - "learning_rate": 9.903216080402011e-05, - "loss": 6.3446, - "step": 1464 - }, - { - "epoch": 0.7640156453715776, - "grad_norm": 1.9883472919464111, - "learning_rate": 9.903115577889448e-05, - "loss": 6.1008, - "step": 1465 - }, - { - "epoch": 0.7645371577574968, - "grad_norm": 1.910007119178772, - "learning_rate": 9.903015075376885e-05, - "loss": 6.2482, - "step": 1466 - }, - { - "epoch": 0.7650586701434159, - "grad_norm": 2.2316036224365234, - "learning_rate": 9.902914572864323e-05, - "loss": 6.1602, - "step": 1467 - }, - { - "epoch": 0.765580182529335, - "grad_norm": 1.9672383069992065, - "learning_rate": 9.902814070351759e-05, - "loss": 6.3586, - "step": 1468 - }, - { - "epoch": 0.7661016949152543, - "grad_norm": 2.0362443923950195, - "learning_rate": 9.902713567839197e-05, - "loss": 5.8586, - "step": 1469 - }, - { - "epoch": 0.7666232073011734, - "grad_norm": 2.102182388305664, - "learning_rate": 9.902613065326633e-05, - "loss": 5.5886, - "step": 1470 - }, - { - "epoch": 0.7671447196870925, - "grad_norm": 1.752789855003357, - "learning_rate": 9.902512562814071e-05, - "loss": 6.1534, - "step": 1471 - }, - { - "epoch": 0.7676662320730118, - "grad_norm": 1.9371451139450073, - "learning_rate": 9.902412060301508e-05, - "loss": 6.203, - "step": 1472 - }, - { - "epoch": 0.7681877444589309, - "grad_norm": 1.8908559083938599, - "learning_rate": 9.902311557788945e-05, - "loss": 6.0585, - "step": 1473 - }, - { - "epoch": 0.76870925684485, - "grad_norm": 1.9075958728790283, - "learning_rate": 9.902211055276383e-05, - "loss": 5.9371, - "step": 1474 - }, - { - "epoch": 0.7692307692307693, - "grad_norm": 1.8455158472061157, - "learning_rate": 9.90211055276382e-05, - "loss": 6.4795, - "step": 1475 - }, - { - "epoch": 0.7697522816166884, - "grad_norm": 1.840221881866455, - "learning_rate": 9.902010050251257e-05, - "loss": 6.209, - "step": 1476 - }, - { - "epoch": 0.7702737940026075, - "grad_norm": 1.9002190828323364, - "learning_rate": 9.901909547738694e-05, - "loss": 6.4243, - "step": 1477 - }, - { - "epoch": 0.7707953063885268, - "grad_norm": 1.634083867073059, - "learning_rate": 9.901809045226132e-05, - "loss": 6.5637, - "step": 1478 - }, - { - "epoch": 0.7713168187744459, - "grad_norm": 2.265779972076416, - "learning_rate": 9.901708542713568e-05, - "loss": 6.3149, - "step": 1479 - }, - { - "epoch": 0.771838331160365, - "grad_norm": 2.1377058029174805, - "learning_rate": 9.901608040201006e-05, - "loss": 6.0884, - "step": 1480 - }, - { - "epoch": 0.7723598435462842, - "grad_norm": 1.5956249237060547, - "learning_rate": 9.901507537688442e-05, - "loss": 6.5332, - "step": 1481 - }, - { - "epoch": 0.7728813559322034, - "grad_norm": 1.7575913667678833, - "learning_rate": 9.90140703517588e-05, - "loss": 6.3354, - "step": 1482 - }, - { - "epoch": 0.7734028683181225, - "grad_norm": 1.7829210758209229, - "learning_rate": 9.901306532663316e-05, - "loss": 6.4074, - "step": 1483 - }, - { - "epoch": 0.7739243807040417, - "grad_norm": 1.781745195388794, - "learning_rate": 9.901206030150754e-05, - "loss": 6.3292, - "step": 1484 - }, - { - "epoch": 0.7744458930899609, - "grad_norm": 1.8603249788284302, - "learning_rate": 9.901105527638192e-05, - "loss": 6.3827, - "step": 1485 - }, - { - "epoch": 0.7749674054758801, - "grad_norm": 1.7212809324264526, - "learning_rate": 9.901005025125628e-05, - "loss": 6.3597, - "step": 1486 - }, - { - "epoch": 0.7754889178617992, - "grad_norm": 2.077568531036377, - "learning_rate": 9.900904522613066e-05, - "loss": 6.5001, - "step": 1487 - }, - { - "epoch": 0.7760104302477184, - "grad_norm": 1.6344174146652222, - "learning_rate": 9.900804020100503e-05, - "loss": 6.5129, - "step": 1488 - }, - { - "epoch": 0.7765319426336376, - "grad_norm": 1.760164499282837, - "learning_rate": 9.90070351758794e-05, - "loss": 6.2177, - "step": 1489 - }, - { - "epoch": 0.7770534550195567, - "grad_norm": 1.7045994997024536, - "learning_rate": 9.900603015075377e-05, - "loss": 6.3082, - "step": 1490 - }, - { - "epoch": 0.7775749674054758, - "grad_norm": 1.8701021671295166, - "learning_rate": 9.900502512562815e-05, - "loss": 6.5448, - "step": 1491 - }, - { - "epoch": 0.7780964797913951, - "grad_norm": 1.9008667469024658, - "learning_rate": 9.900402010050251e-05, - "loss": 6.1047, - "step": 1492 - }, - { - "epoch": 0.7786179921773142, - "grad_norm": 2.518509864807129, - "learning_rate": 9.900301507537689e-05, - "loss": 5.6636, - "step": 1493 - }, - { - "epoch": 0.7791395045632333, - "grad_norm": 2.122746467590332, - "learning_rate": 9.900201005025127e-05, - "loss": 6.311, - "step": 1494 - }, - { - "epoch": 0.7796610169491526, - "grad_norm": 1.750341534614563, - "learning_rate": 9.900100502512564e-05, - "loss": 6.4305, - "step": 1495 - }, - { - "epoch": 0.7801825293350717, - "grad_norm": 2.263638496398926, - "learning_rate": 9.900000000000001e-05, - "loss": 6.0462, - "step": 1496 - }, - { - "epoch": 0.7807040417209908, - "grad_norm": 1.6788009405136108, - "learning_rate": 9.899899497487439e-05, - "loss": 6.3432, - "step": 1497 - }, - { - "epoch": 0.7812255541069101, - "grad_norm": 2.6085753440856934, - "learning_rate": 9.899798994974875e-05, - "loss": 5.5262, - "step": 1498 - }, - { - "epoch": 0.7817470664928292, - "grad_norm": 2.0676863193511963, - "learning_rate": 9.899698492462311e-05, - "loss": 5.8053, - "step": 1499 - }, - { - "epoch": 0.7822685788787483, - "grad_norm": 2.021592140197754, - "learning_rate": 9.899597989949749e-05, - "loss": 6.2348, - "step": 1500 - }, - { - "epoch": 0.7827900912646676, - "grad_norm": 2.2871415615081787, - "learning_rate": 9.899497487437186e-05, - "loss": 6.4151, - "step": 1501 - }, - { - "epoch": 0.7833116036505867, - "grad_norm": 2.0189568996429443, - "learning_rate": 9.899396984924623e-05, - "loss": 6.3597, - "step": 1502 - }, - { - "epoch": 0.7838331160365059, - "grad_norm": 1.8244130611419678, - "learning_rate": 9.89929648241206e-05, - "loss": 6.258, - "step": 1503 - }, - { - "epoch": 0.784354628422425, - "grad_norm": 1.9418656826019287, - "learning_rate": 9.899195979899498e-05, - "loss": 5.831, - "step": 1504 - }, - { - "epoch": 0.7848761408083442, - "grad_norm": 1.8899316787719727, - "learning_rate": 9.899095477386935e-05, - "loss": 6.2857, - "step": 1505 - }, - { - "epoch": 0.7853976531942634, - "grad_norm": 2.0739128589630127, - "learning_rate": 9.898994974874373e-05, - "loss": 5.6871, - "step": 1506 - }, - { - "epoch": 0.7859191655801825, - "grad_norm": 1.8384994268417358, - "learning_rate": 9.89889447236181e-05, - "loss": 6.3722, - "step": 1507 - }, - { - "epoch": 0.7864406779661017, - "grad_norm": 2.0411200523376465, - "learning_rate": 9.898793969849247e-05, - "loss": 5.9428, - "step": 1508 - }, - { - "epoch": 0.7869621903520209, - "grad_norm": 2.0504939556121826, - "learning_rate": 9.898693467336684e-05, - "loss": 6.3496, - "step": 1509 - }, - { - "epoch": 0.78748370273794, - "grad_norm": 1.8186893463134766, - "learning_rate": 9.898592964824122e-05, - "loss": 6.5377, - "step": 1510 - }, - { - "epoch": 0.7880052151238592, - "grad_norm": 1.8714625835418701, - "learning_rate": 9.898492462311558e-05, - "loss": 6.3488, - "step": 1511 - }, - { - "epoch": 0.7885267275097784, - "grad_norm": 2.114046096801758, - "learning_rate": 9.898391959798994e-05, - "loss": 6.2311, - "step": 1512 - }, - { - "epoch": 0.7890482398956975, - "grad_norm": 1.7104909420013428, - "learning_rate": 9.898291457286432e-05, - "loss": 6.2372, - "step": 1513 - }, - { - "epoch": 0.7895697522816167, - "grad_norm": 2.1042661666870117, - "learning_rate": 9.89819095477387e-05, - "loss": 6.2205, - "step": 1514 - }, - { - "epoch": 0.7900912646675359, - "grad_norm": 1.9569180011749268, - "learning_rate": 9.898090452261308e-05, - "loss": 5.8999, - "step": 1515 - }, - { - "epoch": 0.790612777053455, - "grad_norm": 1.9890902042388916, - "learning_rate": 9.897989949748744e-05, - "loss": 6.4819, - "step": 1516 - }, - { - "epoch": 0.7911342894393741, - "grad_norm": 1.888201117515564, - "learning_rate": 9.897889447236182e-05, - "loss": 6.1938, - "step": 1517 - }, - { - "epoch": 0.7916558018252934, - "grad_norm": 1.9150131940841675, - "learning_rate": 9.897788944723618e-05, - "loss": 6.1976, - "step": 1518 - }, - { - "epoch": 0.7921773142112125, - "grad_norm": 1.835471272468567, - "learning_rate": 9.897688442211056e-05, - "loss": 6.3683, - "step": 1519 - }, - { - "epoch": 0.7926988265971316, - "grad_norm": 1.6649370193481445, - "learning_rate": 9.897587939698493e-05, - "loss": 6.2805, - "step": 1520 - }, - { - "epoch": 0.7932203389830509, - "grad_norm": 2.5710160732269287, - "learning_rate": 9.89748743718593e-05, - "loss": 5.6337, - "step": 1521 - }, - { - "epoch": 0.79374185136897, - "grad_norm": 1.8277206420898438, - "learning_rate": 9.897386934673367e-05, - "loss": 6.1163, - "step": 1522 - }, - { - "epoch": 0.7942633637548892, - "grad_norm": 1.8245021104812622, - "learning_rate": 9.897286432160805e-05, - "loss": 6.3545, - "step": 1523 - }, - { - "epoch": 0.7947848761408084, - "grad_norm": 1.7888435125350952, - "learning_rate": 9.897185929648241e-05, - "loss": 6.1207, - "step": 1524 - }, - { - "epoch": 0.7953063885267275, - "grad_norm": 1.6552172899246216, - "learning_rate": 9.897085427135679e-05, - "loss": 6.1833, - "step": 1525 - }, - { - "epoch": 0.7958279009126467, - "grad_norm": 2.000274658203125, - "learning_rate": 9.896984924623117e-05, - "loss": 6.3528, - "step": 1526 - }, - { - "epoch": 0.7963494132985658, - "grad_norm": 2.166152238845825, - "learning_rate": 9.896884422110553e-05, - "loss": 6.5508, - "step": 1527 - }, - { - "epoch": 0.796870925684485, - "grad_norm": 2.298640012741089, - "learning_rate": 9.896783919597991e-05, - "loss": 6.0207, - "step": 1528 - }, - { - "epoch": 0.7973924380704042, - "grad_norm": 2.129709005355835, - "learning_rate": 9.896683417085427e-05, - "loss": 6.1335, - "step": 1529 - }, - { - "epoch": 0.7979139504563233, - "grad_norm": 2.158891201019287, - "learning_rate": 9.896582914572865e-05, - "loss": 5.933, - "step": 1530 - }, - { - "epoch": 0.7984354628422425, - "grad_norm": 2.039499044418335, - "learning_rate": 9.896482412060301e-05, - "loss": 6.2648, - "step": 1531 - }, - { - "epoch": 0.7989569752281617, - "grad_norm": 1.9083620309829712, - "learning_rate": 9.896381909547739e-05, - "loss": 6.5334, - "step": 1532 - }, - { - "epoch": 0.7994784876140808, - "grad_norm": 1.8761316537857056, - "learning_rate": 9.896281407035176e-05, - "loss": 6.2449, - "step": 1533 - }, - { - "epoch": 0.8, - "grad_norm": 1.6660499572753906, - "learning_rate": 9.896180904522613e-05, - "loss": 6.3593, - "step": 1534 - }, - { - "epoch": 0.8005215123859192, - "grad_norm": 1.6793335676193237, - "learning_rate": 9.896080402010051e-05, - "loss": 6.3603, - "step": 1535 - }, - { - "epoch": 0.8010430247718383, - "grad_norm": 1.666662573814392, - "learning_rate": 9.895979899497489e-05, - "loss": 5.7815, - "step": 1536 - }, - { - "epoch": 0.8015645371577576, - "grad_norm": 1.8911926746368408, - "learning_rate": 9.895879396984925e-05, - "loss": 6.3802, - "step": 1537 - }, - { - "epoch": 0.8020860495436767, - "grad_norm": 1.7046828269958496, - "learning_rate": 9.895778894472363e-05, - "loss": 6.2818, - "step": 1538 - }, - { - "epoch": 0.8026075619295958, - "grad_norm": 1.654525637626648, - "learning_rate": 9.8956783919598e-05, - "loss": 6.3808, - "step": 1539 - }, - { - "epoch": 0.803129074315515, - "grad_norm": 1.7619636058807373, - "learning_rate": 9.895577889447236e-05, - "loss": 6.4523, - "step": 1540 - }, - { - "epoch": 0.8036505867014342, - "grad_norm": 1.7219228744506836, - "learning_rate": 9.895477386934674e-05, - "loss": 6.2557, - "step": 1541 - }, - { - "epoch": 0.8041720990873533, - "grad_norm": 1.713586449623108, - "learning_rate": 9.89537688442211e-05, - "loss": 6.2463, - "step": 1542 - }, - { - "epoch": 0.8046936114732725, - "grad_norm": 1.813874363899231, - "learning_rate": 9.895276381909548e-05, - "loss": 6.1161, - "step": 1543 - }, - { - "epoch": 0.8052151238591917, - "grad_norm": 1.9660282135009766, - "learning_rate": 9.895175879396985e-05, - "loss": 6.1216, - "step": 1544 - }, - { - "epoch": 0.8057366362451108, - "grad_norm": 1.6842600107192993, - "learning_rate": 9.895075376884422e-05, - "loss": 6.4891, - "step": 1545 - }, - { - "epoch": 0.80625814863103, - "grad_norm": 2.0831360816955566, - "learning_rate": 9.89497487437186e-05, - "loss": 6.5017, - "step": 1546 - }, - { - "epoch": 0.8067796610169492, - "grad_norm": 1.654707908630371, - "learning_rate": 9.894874371859298e-05, - "loss": 6.5324, - "step": 1547 - }, - { - "epoch": 0.8073011734028683, - "grad_norm": 1.885933756828308, - "learning_rate": 9.894773869346734e-05, - "loss": 5.9692, - "step": 1548 - }, - { - "epoch": 0.8078226857887875, - "grad_norm": 1.806748390197754, - "learning_rate": 9.894673366834172e-05, - "loss": 6.3875, - "step": 1549 - }, - { - "epoch": 0.8083441981747066, - "grad_norm": 1.5926307439804077, - "learning_rate": 9.894572864321609e-05, - "loss": 6.3195, - "step": 1550 - }, - { - "epoch": 0.8088657105606258, - "grad_norm": 1.7504496574401855, - "learning_rate": 9.894472361809046e-05, - "loss": 6.2834, - "step": 1551 - }, - { - "epoch": 0.809387222946545, - "grad_norm": 1.9168084859848022, - "learning_rate": 9.894371859296483e-05, - "loss": 6.1883, - "step": 1552 - }, - { - "epoch": 0.8099087353324641, - "grad_norm": 1.6733273267745972, - "learning_rate": 9.894271356783919e-05, - "loss": 6.5441, - "step": 1553 - }, - { - "epoch": 0.8104302477183833, - "grad_norm": 2.0163559913635254, - "learning_rate": 9.894170854271357e-05, - "loss": 6.3247, - "step": 1554 - }, - { - "epoch": 0.8109517601043025, - "grad_norm": 3.170121431350708, - "learning_rate": 9.894070351758795e-05, - "loss": 6.4189, - "step": 1555 - }, - { - "epoch": 0.8114732724902216, - "grad_norm": 1.7728058099746704, - "learning_rate": 9.893969849246232e-05, - "loss": 6.1033, - "step": 1556 - }, - { - "epoch": 0.8119947848761409, - "grad_norm": 1.6014310121536255, - "learning_rate": 9.893869346733669e-05, - "loss": 6.4606, - "step": 1557 - }, - { - "epoch": 0.81251629726206, - "grad_norm": 1.60207998752594, - "learning_rate": 9.893768844221107e-05, - "loss": 6.6052, - "step": 1558 - }, - { - "epoch": 0.8130378096479791, - "grad_norm": 1.8946715593338013, - "learning_rate": 9.893668341708543e-05, - "loss": 5.8825, - "step": 1559 - }, - { - "epoch": 0.8135593220338984, - "grad_norm": 1.6949716806411743, - "learning_rate": 9.893567839195981e-05, - "loss": 6.4048, - "step": 1560 - }, - { - "epoch": 0.8140808344198175, - "grad_norm": 1.7373712062835693, - "learning_rate": 9.893467336683417e-05, - "loss": 6.2614, - "step": 1561 - }, - { - "epoch": 0.8146023468057366, - "grad_norm": 2.1420915126800537, - "learning_rate": 9.893366834170855e-05, - "loss": 5.641, - "step": 1562 - }, - { - "epoch": 0.8151238591916558, - "grad_norm": 1.9140840768814087, - "learning_rate": 9.893266331658292e-05, - "loss": 6.2816, - "step": 1563 - }, - { - "epoch": 0.815645371577575, - "grad_norm": 1.9483697414398193, - "learning_rate": 9.89316582914573e-05, - "loss": 6.436, - "step": 1564 - }, - { - "epoch": 0.8161668839634941, - "grad_norm": 2.1768035888671875, - "learning_rate": 9.893065326633166e-05, - "loss": 5.3937, - "step": 1565 - }, - { - "epoch": 0.8166883963494133, - "grad_norm": 1.8576078414916992, - "learning_rate": 9.892964824120604e-05, - "loss": 6.343, - "step": 1566 - }, - { - "epoch": 0.8172099087353325, - "grad_norm": 1.9725664854049683, - "learning_rate": 9.892864321608041e-05, - "loss": 6.306, - "step": 1567 - }, - { - "epoch": 0.8177314211212516, - "grad_norm": 1.7392650842666626, - "learning_rate": 9.892763819095478e-05, - "loss": 6.2763, - "step": 1568 - }, - { - "epoch": 0.8182529335071708, - "grad_norm": 1.9523593187332153, - "learning_rate": 9.892663316582916e-05, - "loss": 5.9679, - "step": 1569 - }, - { - "epoch": 0.81877444589309, - "grad_norm": 2.391331195831299, - "learning_rate": 9.892562814070352e-05, - "loss": 6.0673, - "step": 1570 - }, - { - "epoch": 0.8192959582790091, - "grad_norm": 1.7008408308029175, - "learning_rate": 9.89246231155779e-05, - "loss": 6.2428, - "step": 1571 - }, - { - "epoch": 0.8198174706649283, - "grad_norm": 1.6180990934371948, - "learning_rate": 9.892361809045226e-05, - "loss": 6.3696, - "step": 1572 - }, - { - "epoch": 0.8203389830508474, - "grad_norm": 1.9037576913833618, - "learning_rate": 9.892261306532664e-05, - "loss": 6.2464, - "step": 1573 - }, - { - "epoch": 0.8208604954367666, - "grad_norm": 1.7359576225280762, - "learning_rate": 9.8921608040201e-05, - "loss": 6.2016, - "step": 1574 - }, - { - "epoch": 0.8213820078226858, - "grad_norm": 2.0656278133392334, - "learning_rate": 9.892060301507538e-05, - "loss": 6.0986, - "step": 1575 - }, - { - "epoch": 0.8219035202086049, - "grad_norm": 1.735777497291565, - "learning_rate": 9.891959798994975e-05, - "loss": 6.4396, - "step": 1576 - }, - { - "epoch": 0.8224250325945242, - "grad_norm": 1.8029255867004395, - "learning_rate": 9.891859296482412e-05, - "loss": 6.2857, - "step": 1577 - }, - { - "epoch": 0.8229465449804433, - "grad_norm": 1.8384455442428589, - "learning_rate": 9.89175879396985e-05, - "loss": 6.0151, - "step": 1578 - }, - { - "epoch": 0.8234680573663624, - "grad_norm": 1.951324701309204, - "learning_rate": 9.891658291457287e-05, - "loss": 5.8272, - "step": 1579 - }, - { - "epoch": 0.8239895697522817, - "grad_norm": 1.8120818138122559, - "learning_rate": 9.891557788944724e-05, - "loss": 6.0491, - "step": 1580 - }, - { - "epoch": 0.8245110821382008, - "grad_norm": 1.7207704782485962, - "learning_rate": 9.891457286432161e-05, - "loss": 6.3431, - "step": 1581 - }, - { - "epoch": 0.8250325945241199, - "grad_norm": 1.9370810985565186, - "learning_rate": 9.891356783919599e-05, - "loss": 5.8802, - "step": 1582 - }, - { - "epoch": 0.8255541069100392, - "grad_norm": 1.954039454460144, - "learning_rate": 9.891256281407035e-05, - "loss": 6.5588, - "step": 1583 - }, - { - "epoch": 0.8260756192959583, - "grad_norm": 1.8196903467178345, - "learning_rate": 9.891155778894473e-05, - "loss": 5.7277, - "step": 1584 - }, - { - "epoch": 0.8265971316818774, - "grad_norm": 1.8717683553695679, - "learning_rate": 9.891055276381909e-05, - "loss": 6.379, - "step": 1585 - }, - { - "epoch": 0.8271186440677966, - "grad_norm": 1.6302410364151, - "learning_rate": 9.890954773869347e-05, - "loss": 6.3934, - "step": 1586 - }, - { - "epoch": 0.8276401564537158, - "grad_norm": 2.919593572616577, - "learning_rate": 9.890854271356785e-05, - "loss": 5.6854, - "step": 1587 - }, - { - "epoch": 0.8281616688396349, - "grad_norm": 1.7433992624282837, - "learning_rate": 9.890753768844223e-05, - "loss": 6.1599, - "step": 1588 - }, - { - "epoch": 0.8286831812255541, - "grad_norm": 1.7605253458023071, - "learning_rate": 9.890653266331659e-05, - "loss": 6.1212, - "step": 1589 - }, - { - "epoch": 0.8292046936114733, - "grad_norm": 1.6788815259933472, - "learning_rate": 9.890552763819097e-05, - "loss": 6.406, - "step": 1590 - }, - { - "epoch": 0.8297262059973924, - "grad_norm": 1.7766562700271606, - "learning_rate": 9.890452261306533e-05, - "loss": 6.4508, - "step": 1591 - }, - { - "epoch": 0.8302477183833116, - "grad_norm": 1.7444632053375244, - "learning_rate": 9.89035175879397e-05, - "loss": 6.055, - "step": 1592 - }, - { - "epoch": 0.8307692307692308, - "grad_norm": 2.1119027137756348, - "learning_rate": 9.890251256281407e-05, - "loss": 6.2266, - "step": 1593 - }, - { - "epoch": 0.83129074315515, - "grad_norm": 1.8859652280807495, - "learning_rate": 9.890150753768844e-05, - "loss": 6.3608, - "step": 1594 - }, - { - "epoch": 0.8318122555410691, - "grad_norm": 1.8703123331069946, - "learning_rate": 9.890050251256282e-05, - "loss": 5.9862, - "step": 1595 - }, - { - "epoch": 0.8323337679269882, - "grad_norm": 1.7631269693374634, - "learning_rate": 9.889949748743718e-05, - "loss": 6.1231, - "step": 1596 - }, - { - "epoch": 0.8328552803129075, - "grad_norm": 1.702431321144104, - "learning_rate": 9.889849246231156e-05, - "loss": 6.218, - "step": 1597 - }, - { - "epoch": 0.8333767926988266, - "grad_norm": 1.5598454475402832, - "learning_rate": 9.889748743718594e-05, - "loss": 6.4459, - "step": 1598 - }, - { - "epoch": 0.8338983050847457, - "grad_norm": 3.122119903564453, - "learning_rate": 9.889648241206031e-05, - "loss": 5.9532, - "step": 1599 - }, - { - "epoch": 0.834419817470665, - "grad_norm": 1.9566999673843384, - "learning_rate": 9.889547738693468e-05, - "loss": 6.1945, - "step": 1600 - }, - { - "epoch": 0.8349413298565841, - "grad_norm": 2.015418291091919, - "learning_rate": 9.889447236180906e-05, - "loss": 6.0165, - "step": 1601 - }, - { - "epoch": 0.8354628422425032, - "grad_norm": 1.7766563892364502, - "learning_rate": 9.889346733668342e-05, - "loss": 6.2291, - "step": 1602 - }, - { - "epoch": 0.8359843546284225, - "grad_norm": 2.0525100231170654, - "learning_rate": 9.88924623115578e-05, - "loss": 6.2915, - "step": 1603 - }, - { - "epoch": 0.8365058670143416, - "grad_norm": 1.9141136407852173, - "learning_rate": 9.889145728643216e-05, - "loss": 6.4239, - "step": 1604 - }, - { - "epoch": 0.8370273794002607, - "grad_norm": 2.252941370010376, - "learning_rate": 9.889045226130653e-05, - "loss": 5.5331, - "step": 1605 - }, - { - "epoch": 0.83754889178618, - "grad_norm": 2.044296979904175, - "learning_rate": 9.88894472361809e-05, - "loss": 5.9919, - "step": 1606 - }, - { - "epoch": 0.8380704041720991, - "grad_norm": 1.8609423637390137, - "learning_rate": 9.888844221105528e-05, - "loss": 6.5144, - "step": 1607 - }, - { - "epoch": 0.8385919165580182, - "grad_norm": 1.6780518293380737, - "learning_rate": 9.888743718592966e-05, - "loss": 6.1254, - "step": 1608 - }, - { - "epoch": 0.8391134289439374, - "grad_norm": 1.7610026597976685, - "learning_rate": 9.888643216080402e-05, - "loss": 6.1459, - "step": 1609 - }, - { - "epoch": 0.8396349413298566, - "grad_norm": 2.063675880432129, - "learning_rate": 9.88854271356784e-05, - "loss": 5.8401, - "step": 1610 - }, - { - "epoch": 0.8401564537157757, - "grad_norm": 1.9090136289596558, - "learning_rate": 9.888442211055277e-05, - "loss": 6.1468, - "step": 1611 - }, - { - "epoch": 0.8406779661016949, - "grad_norm": 1.9683161973953247, - "learning_rate": 9.888341708542714e-05, - "loss": 6.0548, - "step": 1612 - }, - { - "epoch": 0.8411994784876141, - "grad_norm": 1.8758752346038818, - "learning_rate": 9.888241206030151e-05, - "loss": 6.0022, - "step": 1613 - }, - { - "epoch": 0.8417209908735332, - "grad_norm": 1.9879589080810547, - "learning_rate": 9.888140703517589e-05, - "loss": 6.4184, - "step": 1614 - }, - { - "epoch": 0.8422425032594524, - "grad_norm": 1.6938636302947998, - "learning_rate": 9.888040201005025e-05, - "loss": 6.3096, - "step": 1615 - }, - { - "epoch": 0.8427640156453716, - "grad_norm": 1.819214940071106, - "learning_rate": 9.887939698492463e-05, - "loss": 6.1078, - "step": 1616 - }, - { - "epoch": 0.8432855280312908, - "grad_norm": 2.409513235092163, - "learning_rate": 9.887839195979899e-05, - "loss": 6.4294, - "step": 1617 - }, - { - "epoch": 0.8438070404172099, - "grad_norm": 2.883535623550415, - "learning_rate": 9.887738693467337e-05, - "loss": 6.4456, - "step": 1618 - }, - { - "epoch": 0.8443285528031291, - "grad_norm": 2.0457053184509277, - "learning_rate": 9.887638190954775e-05, - "loss": 6.2745, - "step": 1619 - }, - { - "epoch": 0.8448500651890483, - "grad_norm": 1.9166165590286255, - "learning_rate": 9.887537688442211e-05, - "loss": 6.5612, - "step": 1620 - }, - { - "epoch": 0.8453715775749674, - "grad_norm": 2.0632543563842773, - "learning_rate": 9.887437185929649e-05, - "loss": 5.371, - "step": 1621 - }, - { - "epoch": 0.8458930899608865, - "grad_norm": 1.76840078830719, - "learning_rate": 9.887336683417085e-05, - "loss": 6.5503, - "step": 1622 - }, - { - "epoch": 0.8464146023468058, - "grad_norm": 1.7861356735229492, - "learning_rate": 9.887236180904523e-05, - "loss": 5.9609, - "step": 1623 - }, - { - "epoch": 0.8469361147327249, - "grad_norm": 1.7596338987350464, - "learning_rate": 9.88713567839196e-05, - "loss": 5.9131, - "step": 1624 - }, - { - "epoch": 0.847457627118644, - "grad_norm": 1.798568606376648, - "learning_rate": 9.887035175879397e-05, - "loss": 6.1214, - "step": 1625 - }, - { - "epoch": 0.8479791395045633, - "grad_norm": 1.8737748861312866, - "learning_rate": 9.886934673366834e-05, - "loss": 6.2013, - "step": 1626 - }, - { - "epoch": 0.8485006518904824, - "grad_norm": 1.8132468461990356, - "learning_rate": 9.886834170854272e-05, - "loss": 5.842, - "step": 1627 - }, - { - "epoch": 0.8490221642764015, - "grad_norm": 1.8197094202041626, - "learning_rate": 9.88673366834171e-05, - "loss": 6.5088, - "step": 1628 - }, - { - "epoch": 0.8495436766623208, - "grad_norm": 1.6862690448760986, - "learning_rate": 9.886633165829147e-05, - "loss": 6.409, - "step": 1629 - }, - { - "epoch": 0.8500651890482399, - "grad_norm": 1.6131439208984375, - "learning_rate": 9.886532663316584e-05, - "loss": 6.3657, - "step": 1630 - }, - { - "epoch": 0.850586701434159, - "grad_norm": 1.8240110874176025, - "learning_rate": 9.886432160804021e-05, - "loss": 6.0665, - "step": 1631 - }, - { - "epoch": 0.8511082138200782, - "grad_norm": 1.997011423110962, - "learning_rate": 9.886331658291458e-05, - "loss": 6.0481, - "step": 1632 - }, - { - "epoch": 0.8516297262059974, - "grad_norm": 1.7994962930679321, - "learning_rate": 9.886231155778894e-05, - "loss": 5.8835, - "step": 1633 - }, - { - "epoch": 0.8521512385919165, - "grad_norm": 1.7089227437973022, - "learning_rate": 9.886130653266332e-05, - "loss": 6.0958, - "step": 1634 - }, - { - "epoch": 0.8526727509778357, - "grad_norm": 1.5072299242019653, - "learning_rate": 9.886030150753769e-05, - "loss": 6.4401, - "step": 1635 - }, - { - "epoch": 0.8531942633637549, - "grad_norm": 1.7127587795257568, - "learning_rate": 9.885929648241206e-05, - "loss": 6.4359, - "step": 1636 - }, - { - "epoch": 0.853715775749674, - "grad_norm": 1.9178413152694702, - "learning_rate": 9.885829145728643e-05, - "loss": 5.9645, - "step": 1637 - }, - { - "epoch": 0.8542372881355932, - "grad_norm": 1.8929202556610107, - "learning_rate": 9.88572864321608e-05, - "loss": 6.2902, - "step": 1638 - }, - { - "epoch": 0.8547588005215124, - "grad_norm": 2.042419910430908, - "learning_rate": 9.885628140703518e-05, - "loss": 6.3096, - "step": 1639 - }, - { - "epoch": 0.8552803129074316, - "grad_norm": 1.7103806734085083, - "learning_rate": 9.885527638190956e-05, - "loss": 6.3807, - "step": 1640 - }, - { - "epoch": 0.8558018252933507, - "grad_norm": 1.9331402778625488, - "learning_rate": 9.885427135678393e-05, - "loss": 6.4526, - "step": 1641 - }, - { - "epoch": 0.8563233376792699, - "grad_norm": 1.9394752979278564, - "learning_rate": 9.88532663316583e-05, - "loss": 6.305, - "step": 1642 - }, - { - "epoch": 0.8568448500651891, - "grad_norm": 2.5800442695617676, - "learning_rate": 9.885226130653267e-05, - "loss": 5.2474, - "step": 1643 - }, - { - "epoch": 0.8573663624511082, - "grad_norm": 1.7617117166519165, - "learning_rate": 9.885125628140705e-05, - "loss": 6.1225, - "step": 1644 - }, - { - "epoch": 0.8578878748370273, - "grad_norm": 2.0699551105499268, - "learning_rate": 9.885025125628141e-05, - "loss": 6.1364, - "step": 1645 - }, - { - "epoch": 0.8584093872229466, - "grad_norm": 2.011145830154419, - "learning_rate": 9.884924623115577e-05, - "loss": 6.5612, - "step": 1646 - }, - { - "epoch": 0.8589308996088657, - "grad_norm": 2.022282600402832, - "learning_rate": 9.884824120603015e-05, - "loss": 6.0344, - "step": 1647 - }, - { - "epoch": 0.8594524119947848, - "grad_norm": 2.03702449798584, - "learning_rate": 9.884723618090453e-05, - "loss": 6.2099, - "step": 1648 - }, - { - "epoch": 0.8599739243807041, - "grad_norm": 1.7508972883224487, - "learning_rate": 9.884623115577891e-05, - "loss": 6.2943, - "step": 1649 - }, - { - "epoch": 0.8604954367666232, - "grad_norm": 1.8152028322219849, - "learning_rate": 9.884522613065327e-05, - "loss": 6.1205, - "step": 1650 - }, - { - "epoch": 0.8610169491525423, - "grad_norm": 1.9179080724716187, - "learning_rate": 9.884422110552765e-05, - "loss": 6.2267, - "step": 1651 - }, - { - "epoch": 0.8615384615384616, - "grad_norm": 1.5328307151794434, - "learning_rate": 9.884321608040201e-05, - "loss": 6.3627, - "step": 1652 - }, - { - "epoch": 0.8620599739243807, - "grad_norm": 1.8169989585876465, - "learning_rate": 9.884221105527639e-05, - "loss": 6.1228, - "step": 1653 - }, - { - "epoch": 0.8625814863102998, - "grad_norm": 2.0403926372528076, - "learning_rate": 9.884120603015076e-05, - "loss": 6.3445, - "step": 1654 - }, - { - "epoch": 0.863102998696219, - "grad_norm": 1.8902539014816284, - "learning_rate": 9.884020100502513e-05, - "loss": 6.2774, - "step": 1655 - }, - { - "epoch": 0.8636245110821382, - "grad_norm": 2.031467914581299, - "learning_rate": 9.88391959798995e-05, - "loss": 5.5188, - "step": 1656 - }, - { - "epoch": 0.8641460234680574, - "grad_norm": 1.8601702451705933, - "learning_rate": 9.883819095477388e-05, - "loss": 6.4218, - "step": 1657 - }, - { - "epoch": 0.8646675358539765, - "grad_norm": 1.8415976762771606, - "learning_rate": 9.883718592964824e-05, - "loss": 5.8598, - "step": 1658 - }, - { - "epoch": 0.8651890482398957, - "grad_norm": 1.6482434272766113, - "learning_rate": 9.883618090452262e-05, - "loss": 6.5083, - "step": 1659 - }, - { - "epoch": 0.8657105606258149, - "grad_norm": 1.7652984857559204, - "learning_rate": 9.8835175879397e-05, - "loss": 6.1357, - "step": 1660 - }, - { - "epoch": 0.866232073011734, - "grad_norm": 2.12864089012146, - "learning_rate": 9.883417085427136e-05, - "loss": 5.5445, - "step": 1661 - }, - { - "epoch": 0.8667535853976532, - "grad_norm": 2.5631587505340576, - "learning_rate": 9.883316582914574e-05, - "loss": 6.0426, - "step": 1662 - }, - { - "epoch": 0.8672750977835724, - "grad_norm": 1.9603437185287476, - "learning_rate": 9.88321608040201e-05, - "loss": 6.3445, - "step": 1663 - }, - { - "epoch": 0.8677966101694915, - "grad_norm": 1.6258248090744019, - "learning_rate": 9.883115577889448e-05, - "loss": 6.1017, - "step": 1664 - }, - { - "epoch": 0.8683181225554107, - "grad_norm": 1.8574259281158447, - "learning_rate": 9.883015075376884e-05, - "loss": 6.4319, - "step": 1665 - }, - { - "epoch": 0.8688396349413299, - "grad_norm": 2.0191900730133057, - "learning_rate": 9.882914572864322e-05, - "loss": 4.9855, - "step": 1666 - }, - { - "epoch": 0.869361147327249, - "grad_norm": 1.8582489490509033, - "learning_rate": 9.882814070351759e-05, - "loss": 6.4653, - "step": 1667 - }, - { - "epoch": 0.8698826597131681, - "grad_norm": 1.9659878015518188, - "learning_rate": 9.882713567839196e-05, - "loss": 6.531, - "step": 1668 - }, - { - "epoch": 0.8704041720990874, - "grad_norm": 1.6383056640625, - "learning_rate": 9.882613065326634e-05, - "loss": 6.5757, - "step": 1669 - }, - { - "epoch": 0.8709256844850065, - "grad_norm": 2.2755446434020996, - "learning_rate": 9.882512562814072e-05, - "loss": 5.7911, - "step": 1670 - }, - { - "epoch": 0.8714471968709256, - "grad_norm": 1.9824647903442383, - "learning_rate": 9.882412060301508e-05, - "loss": 6.3458, - "step": 1671 - }, - { - "epoch": 0.8719687092568449, - "grad_norm": 1.9701985120773315, - "learning_rate": 9.882311557788945e-05, - "loss": 6.5847, - "step": 1672 - }, - { - "epoch": 0.872490221642764, - "grad_norm": 2.0365712642669678, - "learning_rate": 9.882211055276383e-05, - "loss": 6.2689, - "step": 1673 - }, - { - "epoch": 0.8730117340286832, - "grad_norm": 2.18672776222229, - "learning_rate": 9.882110552763819e-05, - "loss": 5.9488, - "step": 1674 - }, - { - "epoch": 0.8735332464146024, - "grad_norm": 1.8200421333312988, - "learning_rate": 9.882010050251257e-05, - "loss": 5.9885, - "step": 1675 - }, - { - "epoch": 0.8740547588005215, - "grad_norm": 2.351130247116089, - "learning_rate": 9.881909547738693e-05, - "loss": 4.6779, - "step": 1676 - }, - { - "epoch": 0.8745762711864407, - "grad_norm": 2.151280641555786, - "learning_rate": 9.881809045226131e-05, - "loss": 5.6456, - "step": 1677 - }, - { - "epoch": 0.8750977835723598, - "grad_norm": 1.717529296875, - "learning_rate": 9.881708542713567e-05, - "loss": 5.7936, - "step": 1678 - }, - { - "epoch": 0.875619295958279, - "grad_norm": 2.2586164474487305, - "learning_rate": 9.881608040201005e-05, - "loss": 5.4351, - "step": 1679 - }, - { - "epoch": 0.8761408083441982, - "grad_norm": 2.082540512084961, - "learning_rate": 9.881507537688443e-05, - "loss": 6.1644, - "step": 1680 - }, - { - "epoch": 0.8766623207301173, - "grad_norm": 1.6716521978378296, - "learning_rate": 9.881407035175881e-05, - "loss": 6.2942, - "step": 1681 - }, - { - "epoch": 0.8771838331160365, - "grad_norm": 1.7701725959777832, - "learning_rate": 9.881306532663317e-05, - "loss": 5.9054, - "step": 1682 - }, - { - "epoch": 0.8777053455019557, - "grad_norm": 2.3273019790649414, - "learning_rate": 9.881206030150755e-05, - "loss": 5.3712, - "step": 1683 - }, - { - "epoch": 0.8782268578878748, - "grad_norm": 1.9791053533554077, - "learning_rate": 9.881105527638191e-05, - "loss": 6.2034, - "step": 1684 - }, - { - "epoch": 0.878748370273794, - "grad_norm": 1.683030605316162, - "learning_rate": 9.881005025125628e-05, - "loss": 6.5014, - "step": 1685 - }, - { - "epoch": 0.8792698826597132, - "grad_norm": 1.854114294052124, - "learning_rate": 9.880904522613066e-05, - "loss": 6.2767, - "step": 1686 - }, - { - "epoch": 0.8797913950456323, - "grad_norm": 1.6722493171691895, - "learning_rate": 9.880804020100502e-05, - "loss": 6.2782, - "step": 1687 - }, - { - "epoch": 0.8803129074315516, - "grad_norm": 1.731178879737854, - "learning_rate": 9.88070351758794e-05, - "loss": 6.1457, - "step": 1688 - }, - { - "epoch": 0.8808344198174707, - "grad_norm": 1.559341311454773, - "learning_rate": 9.880603015075378e-05, - "loss": 6.5441, - "step": 1689 - }, - { - "epoch": 0.8813559322033898, - "grad_norm": 1.5850564241409302, - "learning_rate": 9.880502512562815e-05, - "loss": 6.2866, - "step": 1690 - }, - { - "epoch": 0.881877444589309, - "grad_norm": 1.7074015140533447, - "learning_rate": 9.880402010050252e-05, - "loss": 6.5135, - "step": 1691 - }, - { - "epoch": 0.8823989569752282, - "grad_norm": 1.5369197130203247, - "learning_rate": 9.88030150753769e-05, - "loss": 6.221, - "step": 1692 - }, - { - "epoch": 0.8829204693611473, - "grad_norm": 1.7250388860702515, - "learning_rate": 9.880201005025126e-05, - "loss": 6.2168, - "step": 1693 - }, - { - "epoch": 0.8834419817470665, - "grad_norm": 1.8233838081359863, - "learning_rate": 9.880100502512564e-05, - "loss": 6.174, - "step": 1694 - }, - { - "epoch": 0.8839634941329857, - "grad_norm": 1.7374942302703857, - "learning_rate": 9.88e-05, - "loss": 5.7856, - "step": 1695 - }, - { - "epoch": 0.8844850065189048, - "grad_norm": 1.9016749858856201, - "learning_rate": 9.879899497487438e-05, - "loss": 6.3599, - "step": 1696 - }, - { - "epoch": 0.885006518904824, - "grad_norm": 1.8541254997253418, - "learning_rate": 9.879798994974874e-05, - "loss": 5.1571, - "step": 1697 - }, - { - "epoch": 0.8855280312907432, - "grad_norm": 1.592788577079773, - "learning_rate": 9.879698492462311e-05, - "loss": 6.4222, - "step": 1698 - }, - { - "epoch": 0.8860495436766623, - "grad_norm": 1.5975663661956787, - "learning_rate": 9.879597989949749e-05, - "loss": 6.2935, - "step": 1699 - }, - { - "epoch": 0.8865710560625815, - "grad_norm": 1.7215317487716675, - "learning_rate": 9.879497487437186e-05, - "loss": 6.3248, - "step": 1700 - }, - { - "epoch": 0.8870925684485007, - "grad_norm": 1.6796884536743164, - "learning_rate": 9.879396984924624e-05, - "loss": 6.1983, - "step": 1701 - }, - { - "epoch": 0.8876140808344198, - "grad_norm": 1.7115135192871094, - "learning_rate": 9.879296482412061e-05, - "loss": 6.421, - "step": 1702 - }, - { - "epoch": 0.888135593220339, - "grad_norm": 1.6824716329574585, - "learning_rate": 9.879195979899498e-05, - "loss": 6.3495, - "step": 1703 - }, - { - "epoch": 0.8886571056062581, - "grad_norm": 1.7323739528656006, - "learning_rate": 9.879095477386935e-05, - "loss": 6.4326, - "step": 1704 - }, - { - "epoch": 0.8891786179921773, - "grad_norm": 1.7057945728302002, - "learning_rate": 9.878994974874373e-05, - "loss": 6.3177, - "step": 1705 - }, - { - "epoch": 0.8897001303780965, - "grad_norm": 1.6935622692108154, - "learning_rate": 9.878894472361809e-05, - "loss": 6.4874, - "step": 1706 - }, - { - "epoch": 0.8902216427640156, - "grad_norm": 1.9630391597747803, - "learning_rate": 9.878793969849247e-05, - "loss": 6.5104, - "step": 1707 - }, - { - "epoch": 0.8907431551499349, - "grad_norm": 2.1287498474121094, - "learning_rate": 9.878693467336683e-05, - "loss": 6.3868, - "step": 1708 - }, - { - "epoch": 0.891264667535854, - "grad_norm": 1.9277119636535645, - "learning_rate": 9.878592964824121e-05, - "loss": 5.7459, - "step": 1709 - }, - { - "epoch": 0.8917861799217731, - "grad_norm": 1.9207557439804077, - "learning_rate": 9.878492462311559e-05, - "loss": 6.427, - "step": 1710 - }, - { - "epoch": 0.8923076923076924, - "grad_norm": 2.0179965496063232, - "learning_rate": 9.878391959798995e-05, - "loss": 6.3432, - "step": 1711 - }, - { - "epoch": 0.8928292046936115, - "grad_norm": 1.8834829330444336, - "learning_rate": 9.878291457286433e-05, - "loss": 6.0137, - "step": 1712 - }, - { - "epoch": 0.8933507170795306, - "grad_norm": 1.7018119096755981, - "learning_rate": 9.87819095477387e-05, - "loss": 6.4548, - "step": 1713 - }, - { - "epoch": 0.8938722294654498, - "grad_norm": 1.7061896324157715, - "learning_rate": 9.878090452261307e-05, - "loss": 5.9995, - "step": 1714 - }, - { - "epoch": 0.894393741851369, - "grad_norm": 1.7229599952697754, - "learning_rate": 9.877989949748744e-05, - "loss": 6.0517, - "step": 1715 - }, - { - "epoch": 0.8949152542372881, - "grad_norm": 1.5875067710876465, - "learning_rate": 9.877889447236182e-05, - "loss": 6.6317, - "step": 1716 - }, - { - "epoch": 0.8954367666232073, - "grad_norm": 1.7189629077911377, - "learning_rate": 9.877788944723618e-05, - "loss": 5.836, - "step": 1717 - }, - { - "epoch": 0.8959582790091265, - "grad_norm": 2.8782334327697754, - "learning_rate": 9.877688442211056e-05, - "loss": 6.144, - "step": 1718 - }, - { - "epoch": 0.8964797913950456, - "grad_norm": 1.9008127450942993, - "learning_rate": 9.877587939698492e-05, - "loss": 6.4061, - "step": 1719 - }, - { - "epoch": 0.8970013037809648, - "grad_norm": 2.296552896499634, - "learning_rate": 9.87748743718593e-05, - "loss": 5.9501, - "step": 1720 - }, - { - "epoch": 0.897522816166884, - "grad_norm": 1.9610203504562378, - "learning_rate": 9.877386934673368e-05, - "loss": 6.2823, - "step": 1721 - }, - { - "epoch": 0.8980443285528031, - "grad_norm": 1.9161206483840942, - "learning_rate": 9.877286432160806e-05, - "loss": 6.2341, - "step": 1722 - }, - { - "epoch": 0.8985658409387223, - "grad_norm": 1.5760002136230469, - "learning_rate": 9.877185929648242e-05, - "loss": 6.6321, - "step": 1723 - }, - { - "epoch": 0.8990873533246415, - "grad_norm": 1.7403117418289185, - "learning_rate": 9.87708542713568e-05, - "loss": 6.3808, - "step": 1724 - }, - { - "epoch": 0.8996088657105606, - "grad_norm": 1.792900800704956, - "learning_rate": 9.876984924623116e-05, - "loss": 6.1505, - "step": 1725 - }, - { - "epoch": 0.9001303780964798, - "grad_norm": 1.8007700443267822, - "learning_rate": 9.876884422110553e-05, - "loss": 6.4719, - "step": 1726 - }, - { - "epoch": 0.9006518904823989, - "grad_norm": 1.6678389310836792, - "learning_rate": 9.87678391959799e-05, - "loss": 6.3762, - "step": 1727 - }, - { - "epoch": 0.9011734028683182, - "grad_norm": 1.6664470434188843, - "learning_rate": 9.876683417085427e-05, - "loss": 6.3374, - "step": 1728 - }, - { - "epoch": 0.9016949152542373, - "grad_norm": 1.8322288990020752, - "learning_rate": 9.876582914572865e-05, - "loss": 6.1869, - "step": 1729 - }, - { - "epoch": 0.9022164276401564, - "grad_norm": 1.7103643417358398, - "learning_rate": 9.876482412060302e-05, - "loss": 6.0917, - "step": 1730 - }, - { - "epoch": 0.9027379400260757, - "grad_norm": 1.6477649211883545, - "learning_rate": 9.87638190954774e-05, - "loss": 5.822, - "step": 1731 - }, - { - "epoch": 0.9032594524119948, - "grad_norm": 1.7735110521316528, - "learning_rate": 9.876281407035177e-05, - "loss": 6.5537, - "step": 1732 - }, - { - "epoch": 0.9037809647979139, - "grad_norm": 1.7689518928527832, - "learning_rate": 9.876180904522614e-05, - "loss": 6.1209, - "step": 1733 - }, - { - "epoch": 0.9043024771838332, - "grad_norm": 1.736576795578003, - "learning_rate": 9.876080402010051e-05, - "loss": 6.1883, - "step": 1734 - }, - { - "epoch": 0.9048239895697523, - "grad_norm": 2.2251172065734863, - "learning_rate": 9.875979899497489e-05, - "loss": 6.0837, - "step": 1735 - }, - { - "epoch": 0.9053455019556714, - "grad_norm": 1.9843007326126099, - "learning_rate": 9.875879396984925e-05, - "loss": 6.1071, - "step": 1736 - }, - { - "epoch": 0.9058670143415906, - "grad_norm": 2.2417023181915283, - "learning_rate": 9.875778894472363e-05, - "loss": 5.9756, - "step": 1737 - }, - { - "epoch": 0.9063885267275098, - "grad_norm": 1.7956897020339966, - "learning_rate": 9.875678391959799e-05, - "loss": 6.0028, - "step": 1738 - }, - { - "epoch": 0.9069100391134289, - "grad_norm": 1.6691679954528809, - "learning_rate": 9.875577889447236e-05, - "loss": 6.2581, - "step": 1739 - }, - { - "epoch": 0.9074315514993481, - "grad_norm": 1.5963270664215088, - "learning_rate": 9.875477386934673e-05, - "loss": 6.0376, - "step": 1740 - }, - { - "epoch": 0.9079530638852673, - "grad_norm": 1.7119053602218628, - "learning_rate": 9.875376884422111e-05, - "loss": 6.2609, - "step": 1741 - }, - { - "epoch": 0.9084745762711864, - "grad_norm": 1.8534127473831177, - "learning_rate": 9.875276381909549e-05, - "loss": 6.2267, - "step": 1742 - }, - { - "epoch": 0.9089960886571056, - "grad_norm": 1.8750230073928833, - "learning_rate": 9.875175879396985e-05, - "loss": 6.3913, - "step": 1743 - }, - { - "epoch": 0.9095176010430248, - "grad_norm": 1.5363609790802002, - "learning_rate": 9.875075376884423e-05, - "loss": 6.5447, - "step": 1744 - }, - { - "epoch": 0.910039113428944, - "grad_norm": 2.0098559856414795, - "learning_rate": 9.87497487437186e-05, - "loss": 6.062, - "step": 1745 - }, - { - "epoch": 0.9105606258148631, - "grad_norm": 2.1519622802734375, - "learning_rate": 9.874874371859297e-05, - "loss": 5.6723, - "step": 1746 - }, - { - "epoch": 0.9110821382007823, - "grad_norm": 1.838989019393921, - "learning_rate": 9.874773869346734e-05, - "loss": 6.4871, - "step": 1747 - }, - { - "epoch": 0.9116036505867015, - "grad_norm": 1.7751922607421875, - "learning_rate": 9.874673366834172e-05, - "loss": 6.2561, - "step": 1748 - }, - { - "epoch": 0.9121251629726206, - "grad_norm": 1.711822271347046, - "learning_rate": 9.874572864321608e-05, - "loss": 6.4721, - "step": 1749 - }, - { - "epoch": 0.9126466753585397, - "grad_norm": 1.937978744506836, - "learning_rate": 9.874472361809046e-05, - "loss": 6.0697, - "step": 1750 - }, - { - "epoch": 0.913168187744459, - "grad_norm": 1.9958463907241821, - "learning_rate": 9.874371859296482e-05, - "loss": 6.0823, - "step": 1751 - }, - { - "epoch": 0.9136897001303781, - "grad_norm": 1.7063099145889282, - "learning_rate": 9.87427135678392e-05, - "loss": 6.4678, - "step": 1752 - }, - { - "epoch": 0.9142112125162972, - "grad_norm": 1.628139853477478, - "learning_rate": 9.874170854271358e-05, - "loss": 6.5647, - "step": 1753 - }, - { - "epoch": 0.9147327249022165, - "grad_norm": 1.9758220911026, - "learning_rate": 9.874070351758794e-05, - "loss": 6.2301, - "step": 1754 - }, - { - "epoch": 0.9152542372881356, - "grad_norm": 2.1318085193634033, - "learning_rate": 9.873969849246232e-05, - "loss": 5.5774, - "step": 1755 - }, - { - "epoch": 0.9157757496740547, - "grad_norm": 1.7579708099365234, - "learning_rate": 9.873869346733668e-05, - "loss": 5.7774, - "step": 1756 - }, - { - "epoch": 0.916297262059974, - "grad_norm": 1.7938499450683594, - "learning_rate": 9.873768844221106e-05, - "loss": 6.1348, - "step": 1757 - }, - { - "epoch": 0.9168187744458931, - "grad_norm": 1.8468101024627686, - "learning_rate": 9.873668341708543e-05, - "loss": 6.5261, - "step": 1758 - }, - { - "epoch": 0.9173402868318122, - "grad_norm": 1.8074986934661865, - "learning_rate": 9.87356783919598e-05, - "loss": 6.4841, - "step": 1759 - }, - { - "epoch": 0.9178617992177314, - "grad_norm": 1.7092223167419434, - "learning_rate": 9.873467336683417e-05, - "loss": 6.3841, - "step": 1760 - }, - { - "epoch": 0.9183833116036506, - "grad_norm": 1.7861287593841553, - "learning_rate": 9.873366834170855e-05, - "loss": 6.355, - "step": 1761 - }, - { - "epoch": 0.9189048239895697, - "grad_norm": 1.6930010318756104, - "learning_rate": 9.873266331658292e-05, - "loss": 6.3363, - "step": 1762 - }, - { - "epoch": 0.9194263363754889, - "grad_norm": 1.8475645780563354, - "learning_rate": 9.87316582914573e-05, - "loss": 5.8782, - "step": 1763 - }, - { - "epoch": 0.9199478487614081, - "grad_norm": 1.5209002494812012, - "learning_rate": 9.873065326633167e-05, - "loss": 6.0526, - "step": 1764 - }, - { - "epoch": 0.9204693611473272, - "grad_norm": 1.700175166130066, - "learning_rate": 9.872964824120603e-05, - "loss": 6.5621, - "step": 1765 - }, - { - "epoch": 0.9209908735332464, - "grad_norm": 1.5412355661392212, - "learning_rate": 9.872864321608041e-05, - "loss": 6.4583, - "step": 1766 - }, - { - "epoch": 0.9215123859191656, - "grad_norm": 1.6577399969100952, - "learning_rate": 9.872763819095477e-05, - "loss": 6.0804, - "step": 1767 - }, - { - "epoch": 0.9220338983050848, - "grad_norm": 1.7236900329589844, - "learning_rate": 9.872663316582915e-05, - "loss": 6.3388, - "step": 1768 - }, - { - "epoch": 0.9225554106910039, - "grad_norm": 1.93367338180542, - "learning_rate": 9.872562814070351e-05, - "loss": 5.9426, - "step": 1769 - }, - { - "epoch": 0.9230769230769231, - "grad_norm": 1.6736849546432495, - "learning_rate": 9.872462311557789e-05, - "loss": 6.1373, - "step": 1770 - }, - { - "epoch": 0.9235984354628423, - "grad_norm": 1.5711864233016968, - "learning_rate": 9.872361809045226e-05, - "loss": 6.2082, - "step": 1771 - }, - { - "epoch": 0.9241199478487614, - "grad_norm": 1.7657711505889893, - "learning_rate": 9.872261306532663e-05, - "loss": 5.958, - "step": 1772 - }, - { - "epoch": 0.9246414602346805, - "grad_norm": 1.7789695262908936, - "learning_rate": 9.872160804020101e-05, - "loss": 6.1682, - "step": 1773 - }, - { - "epoch": 0.9251629726205998, - "grad_norm": 1.6570029258728027, - "learning_rate": 9.872060301507539e-05, - "loss": 6.2757, - "step": 1774 - }, - { - "epoch": 0.9256844850065189, - "grad_norm": 1.5490208864212036, - "learning_rate": 9.871959798994975e-05, - "loss": 6.4814, - "step": 1775 - }, - { - "epoch": 0.926205997392438, - "grad_norm": 1.5855228900909424, - "learning_rate": 9.871859296482413e-05, - "loss": 6.3562, - "step": 1776 - }, - { - "epoch": 0.9267275097783573, - "grad_norm": 1.965667963027954, - "learning_rate": 9.87175879396985e-05, - "loss": 5.9576, - "step": 1777 - }, - { - "epoch": 0.9272490221642764, - "grad_norm": 1.893012285232544, - "learning_rate": 9.871658291457286e-05, - "loss": 5.8361, - "step": 1778 - }, - { - "epoch": 0.9277705345501955, - "grad_norm": 1.981179118156433, - "learning_rate": 9.871557788944724e-05, - "loss": 5.4058, - "step": 1779 - }, - { - "epoch": 0.9282920469361148, - "grad_norm": 2.0810556411743164, - "learning_rate": 9.87145728643216e-05, - "loss": 6.0086, - "step": 1780 - }, - { - "epoch": 0.9288135593220339, - "grad_norm": 1.7562342882156372, - "learning_rate": 9.871356783919598e-05, - "loss": 6.1404, - "step": 1781 - }, - { - "epoch": 0.929335071707953, - "grad_norm": 1.414299726486206, - "learning_rate": 9.871256281407036e-05, - "loss": 6.5221, - "step": 1782 - }, - { - "epoch": 0.9298565840938723, - "grad_norm": 2.073514461517334, - "learning_rate": 9.871155778894474e-05, - "loss": 5.9284, - "step": 1783 - }, - { - "epoch": 0.9303780964797914, - "grad_norm": 1.5229054689407349, - "learning_rate": 9.87105527638191e-05, - "loss": 6.6462, - "step": 1784 - }, - { - "epoch": 0.9308996088657105, - "grad_norm": 1.5980912446975708, - "learning_rate": 9.870954773869348e-05, - "loss": 6.0822, - "step": 1785 - }, - { - "epoch": 0.9314211212516297, - "grad_norm": 1.7462660074234009, - "learning_rate": 9.870854271356784e-05, - "loss": 6.36, - "step": 1786 - }, - { - "epoch": 0.9319426336375489, - "grad_norm": 2.215930700302124, - "learning_rate": 9.870753768844222e-05, - "loss": 5.7601, - "step": 1787 - }, - { - "epoch": 0.932464146023468, - "grad_norm": 1.588579773902893, - "learning_rate": 9.870653266331659e-05, - "loss": 6.3966, - "step": 1788 - }, - { - "epoch": 0.9329856584093872, - "grad_norm": 1.9071831703186035, - "learning_rate": 9.870552763819096e-05, - "loss": 6.2341, - "step": 1789 - }, - { - "epoch": 0.9335071707953064, - "grad_norm": 1.9192125797271729, - "learning_rate": 9.870452261306533e-05, - "loss": 6.0921, - "step": 1790 - }, - { - "epoch": 0.9340286831812256, - "grad_norm": 1.903688669204712, - "learning_rate": 9.870351758793969e-05, - "loss": 6.0152, - "step": 1791 - }, - { - "epoch": 0.9345501955671447, - "grad_norm": 2.2227704524993896, - "learning_rate": 9.870251256281407e-05, - "loss": 5.5872, - "step": 1792 - }, - { - "epoch": 0.9350717079530639, - "grad_norm": 2.658254623413086, - "learning_rate": 9.870150753768845e-05, - "loss": 6.2801, - "step": 1793 - }, - { - "epoch": 0.9355932203389831, - "grad_norm": 1.718497633934021, - "learning_rate": 9.870050251256282e-05, - "loss": 6.3553, - "step": 1794 - }, - { - "epoch": 0.9361147327249022, - "grad_norm": 1.6464396715164185, - "learning_rate": 9.869949748743719e-05, - "loss": 6.3048, - "step": 1795 - }, - { - "epoch": 0.9366362451108213, - "grad_norm": 1.729474425315857, - "learning_rate": 9.869849246231157e-05, - "loss": 6.1583, - "step": 1796 - }, - { - "epoch": 0.9371577574967406, - "grad_norm": 1.7387269735336304, - "learning_rate": 9.869748743718593e-05, - "loss": 6.2634, - "step": 1797 - }, - { - "epoch": 0.9376792698826597, - "grad_norm": 1.7279126644134521, - "learning_rate": 9.869648241206031e-05, - "loss": 6.073, - "step": 1798 - }, - { - "epoch": 0.9382007822685788, - "grad_norm": 1.6976224184036255, - "learning_rate": 9.869547738693467e-05, - "loss": 6.6056, - "step": 1799 - }, - { - "epoch": 0.9387222946544981, - "grad_norm": 1.962341547012329, - "learning_rate": 9.869447236180905e-05, - "loss": 6.3039, - "step": 1800 - }, - { - "epoch": 0.9392438070404172, - "grad_norm": 1.8708937168121338, - "learning_rate": 9.869346733668342e-05, - "loss": 6.4098, - "step": 1801 - }, - { - "epoch": 0.9397653194263363, - "grad_norm": 1.9174119234085083, - "learning_rate": 9.86924623115578e-05, - "loss": 6.2019, - "step": 1802 - }, - { - "epoch": 0.9402868318122556, - "grad_norm": 1.9023312330245972, - "learning_rate": 9.869145728643217e-05, - "loss": 6.0684, - "step": 1803 - }, - { - "epoch": 0.9408083441981747, - "grad_norm": 1.9731063842773438, - "learning_rate": 9.869045226130654e-05, - "loss": 6.2027, - "step": 1804 - }, - { - "epoch": 0.9413298565840938, - "grad_norm": 1.8011584281921387, - "learning_rate": 9.868944723618091e-05, - "loss": 6.3769, - "step": 1805 - }, - { - "epoch": 0.9418513689700131, - "grad_norm": 1.8728646039962769, - "learning_rate": 9.868844221105528e-05, - "loss": 5.9545, - "step": 1806 - }, - { - "epoch": 0.9423728813559322, - "grad_norm": 1.7673097848892212, - "learning_rate": 9.868743718592966e-05, - "loss": 6.1181, - "step": 1807 - }, - { - "epoch": 0.9428943937418514, - "grad_norm": 1.7257661819458008, - "learning_rate": 9.868643216080402e-05, - "loss": 6.4183, - "step": 1808 - }, - { - "epoch": 0.9434159061277705, - "grad_norm": 1.774137020111084, - "learning_rate": 9.86854271356784e-05, - "loss": 6.1167, - "step": 1809 - }, - { - "epoch": 0.9439374185136897, - "grad_norm": 1.786062479019165, - "learning_rate": 9.868442211055276e-05, - "loss": 5.9211, - "step": 1810 - }, - { - "epoch": 0.9444589308996089, - "grad_norm": 3.52620530128479, - "learning_rate": 9.868341708542714e-05, - "loss": 6.0918, - "step": 1811 - }, - { - "epoch": 0.944980443285528, - "grad_norm": 1.9542983770370483, - "learning_rate": 9.86824120603015e-05, - "loss": 6.0449, - "step": 1812 - }, - { - "epoch": 0.9455019556714472, - "grad_norm": 1.9136114120483398, - "learning_rate": 9.868140703517588e-05, - "loss": 6.4076, - "step": 1813 - }, - { - "epoch": 0.9460234680573664, - "grad_norm": 1.745273232460022, - "learning_rate": 9.868040201005026e-05, - "loss": 6.477, - "step": 1814 - }, - { - "epoch": 0.9465449804432855, - "grad_norm": 1.7708137035369873, - "learning_rate": 9.867939698492464e-05, - "loss": 6.3215, - "step": 1815 - }, - { - "epoch": 0.9470664928292047, - "grad_norm": 1.7978767156600952, - "learning_rate": 9.8678391959799e-05, - "loss": 6.2015, - "step": 1816 - }, - { - "epoch": 0.9475880052151239, - "grad_norm": 1.886091947555542, - "learning_rate": 9.867738693467338e-05, - "loss": 5.5708, - "step": 1817 - }, - { - "epoch": 0.948109517601043, - "grad_norm": 2.059328079223633, - "learning_rate": 9.867638190954774e-05, - "loss": 6.1685, - "step": 1818 - }, - { - "epoch": 0.9486310299869621, - "grad_norm": 1.7448359727859497, - "learning_rate": 9.867537688442211e-05, - "loss": 6.3786, - "step": 1819 - }, - { - "epoch": 0.9491525423728814, - "grad_norm": 1.4995218515396118, - "learning_rate": 9.867437185929649e-05, - "loss": 5.0233, - "step": 1820 - }, - { - "epoch": 0.9496740547588005, - "grad_norm": 1.9038867950439453, - "learning_rate": 9.867336683417085e-05, - "loss": 6.2677, - "step": 1821 - }, - { - "epoch": 0.9501955671447196, - "grad_norm": 1.964781403541565, - "learning_rate": 9.867236180904523e-05, - "loss": 6.0163, - "step": 1822 - }, - { - "epoch": 0.9507170795306389, - "grad_norm": 1.859845757484436, - "learning_rate": 9.86713567839196e-05, - "loss": 6.0896, - "step": 1823 - }, - { - "epoch": 0.951238591916558, - "grad_norm": 1.9396536350250244, - "learning_rate": 9.867035175879398e-05, - "loss": 6.2767, - "step": 1824 - }, - { - "epoch": 0.9517601043024772, - "grad_norm": 1.6548652648925781, - "learning_rate": 9.866934673366835e-05, - "loss": 6.3923, - "step": 1825 - }, - { - "epoch": 0.9522816166883964, - "grad_norm": 1.84656822681427, - "learning_rate": 9.866834170854273e-05, - "loss": 5.658, - "step": 1826 - }, - { - "epoch": 0.9528031290743155, - "grad_norm": 1.8334338665008545, - "learning_rate": 9.866733668341709e-05, - "loss": 6.1049, - "step": 1827 - }, - { - "epoch": 0.9533246414602347, - "grad_norm": 1.528545618057251, - "learning_rate": 9.866633165829147e-05, - "loss": 6.3846, - "step": 1828 - }, - { - "epoch": 0.9538461538461539, - "grad_norm": 1.863281488418579, - "learning_rate": 9.866532663316583e-05, - "loss": 6.0952, - "step": 1829 - }, - { - "epoch": 0.954367666232073, - "grad_norm": 1.7164326906204224, - "learning_rate": 9.866432160804021e-05, - "loss": 6.4763, - "step": 1830 - }, - { - "epoch": 0.9548891786179922, - "grad_norm": 1.736375331878662, - "learning_rate": 9.866331658291457e-05, - "loss": 6.2349, - "step": 1831 - }, - { - "epoch": 0.9554106910039113, - "grad_norm": 1.8414313793182373, - "learning_rate": 9.866231155778894e-05, - "loss": 6.3186, - "step": 1832 - }, - { - "epoch": 0.9559322033898305, - "grad_norm": 1.798867106437683, - "learning_rate": 9.866130653266332e-05, - "loss": 5.9766, - "step": 1833 - }, - { - "epoch": 0.9564537157757497, - "grad_norm": 1.835066318511963, - "learning_rate": 9.86603015075377e-05, - "loss": 6.1279, - "step": 1834 - }, - { - "epoch": 0.9569752281616688, - "grad_norm": 1.9193572998046875, - "learning_rate": 9.865929648241207e-05, - "loss": 6.0124, - "step": 1835 - }, - { - "epoch": 0.957496740547588, - "grad_norm": 1.6664605140686035, - "learning_rate": 9.865829145728644e-05, - "loss": 6.1305, - "step": 1836 - }, - { - "epoch": 0.9580182529335072, - "grad_norm": 2.0482826232910156, - "learning_rate": 9.865728643216081e-05, - "loss": 6.1755, - "step": 1837 - }, - { - "epoch": 0.9585397653194263, - "grad_norm": 1.9236359596252441, - "learning_rate": 9.865628140703518e-05, - "loss": 5.6478, - "step": 1838 - }, - { - "epoch": 0.9590612777053455, - "grad_norm": 1.6822118759155273, - "learning_rate": 9.865527638190956e-05, - "loss": 6.1067, - "step": 1839 - }, - { - "epoch": 0.9595827900912647, - "grad_norm": 1.6255333423614502, - "learning_rate": 9.865427135678392e-05, - "loss": 6.2461, - "step": 1840 - }, - { - "epoch": 0.9601043024771838, - "grad_norm": 1.6045290231704712, - "learning_rate": 9.86532663316583e-05, - "loss": 6.3111, - "step": 1841 - }, - { - "epoch": 0.960625814863103, - "grad_norm": 1.4453548192977905, - "learning_rate": 9.865226130653266e-05, - "loss": 6.453, - "step": 1842 - }, - { - "epoch": 0.9611473272490222, - "grad_norm": 1.9731504917144775, - "learning_rate": 9.865125628140704e-05, - "loss": 6.0372, - "step": 1843 - }, - { - "epoch": 0.9616688396349413, - "grad_norm": 1.813751220703125, - "learning_rate": 9.865025125628142e-05, - "loss": 6.0699, - "step": 1844 - }, - { - "epoch": 0.9621903520208605, - "grad_norm": 1.7470742464065552, - "learning_rate": 9.864924623115578e-05, - "loss": 6.2216, - "step": 1845 - }, - { - "epoch": 0.9627118644067797, - "grad_norm": 1.889413595199585, - "learning_rate": 9.864824120603016e-05, - "loss": 6.3146, - "step": 1846 - }, - { - "epoch": 0.9632333767926988, - "grad_norm": 1.9028747081756592, - "learning_rate": 9.864723618090452e-05, - "loss": 6.115, - "step": 1847 - }, - { - "epoch": 0.963754889178618, - "grad_norm": 1.8532224893569946, - "learning_rate": 9.86462311557789e-05, - "loss": 6.1386, - "step": 1848 - }, - { - "epoch": 0.9642764015645372, - "grad_norm": 1.599428415298462, - "learning_rate": 9.864522613065327e-05, - "loss": 6.4411, - "step": 1849 - }, - { - "epoch": 0.9647979139504563, - "grad_norm": 1.677685260772705, - "learning_rate": 9.864422110552764e-05, - "loss": 6.4012, - "step": 1850 - }, - { - "epoch": 0.9653194263363755, - "grad_norm": 1.7733136415481567, - "learning_rate": 9.864321608040201e-05, - "loss": 5.9693, - "step": 1851 - }, - { - "epoch": 0.9658409387222947, - "grad_norm": 1.55734121799469, - "learning_rate": 9.864221105527639e-05, - "loss": 6.2712, - "step": 1852 - }, - { - "epoch": 0.9663624511082138, - "grad_norm": 1.5608091354370117, - "learning_rate": 9.864120603015075e-05, - "loss": 6.07, - "step": 1853 - }, - { - "epoch": 0.966883963494133, - "grad_norm": 1.8081550598144531, - "learning_rate": 9.864020100502513e-05, - "loss": 6.1857, - "step": 1854 - }, - { - "epoch": 0.9674054758800521, - "grad_norm": 1.903599739074707, - "learning_rate": 9.86391959798995e-05, - "loss": 5.9202, - "step": 1855 - }, - { - "epoch": 0.9679269882659713, - "grad_norm": 2.073634624481201, - "learning_rate": 9.863819095477388e-05, - "loss": 6.4165, - "step": 1856 - }, - { - "epoch": 0.9684485006518905, - "grad_norm": 1.8562136888504028, - "learning_rate": 9.863718592964825e-05, - "loss": 5.9238, - "step": 1857 - }, - { - "epoch": 0.9689700130378096, - "grad_norm": 1.9968205690383911, - "learning_rate": 9.863618090452261e-05, - "loss": 5.905, - "step": 1858 - }, - { - "epoch": 0.9694915254237289, - "grad_norm": 1.8912436962127686, - "learning_rate": 9.863517587939699e-05, - "loss": 5.9879, - "step": 1859 - }, - { - "epoch": 0.970013037809648, - "grad_norm": 1.8201597929000854, - "learning_rate": 9.863417085427136e-05, - "loss": 6.3433, - "step": 1860 - }, - { - "epoch": 0.9705345501955671, - "grad_norm": 1.501621127128601, - "learning_rate": 9.863316582914573e-05, - "loss": 6.2233, - "step": 1861 - }, - { - "epoch": 0.9710560625814864, - "grad_norm": 1.7901490926742554, - "learning_rate": 9.86321608040201e-05, - "loss": 6.4165, - "step": 1862 - }, - { - "epoch": 0.9715775749674055, - "grad_norm": 1.8126524686813354, - "learning_rate": 9.863115577889447e-05, - "loss": 6.2119, - "step": 1863 - }, - { - "epoch": 0.9720990873533246, - "grad_norm": 1.8406113386154175, - "learning_rate": 9.863015075376885e-05, - "loss": 6.1828, - "step": 1864 - }, - { - "epoch": 0.9726205997392438, - "grad_norm": 1.9171116352081299, - "learning_rate": 9.862914572864323e-05, - "loss": 6.1778, - "step": 1865 - }, - { - "epoch": 0.973142112125163, - "grad_norm": 1.883215069770813, - "learning_rate": 9.86281407035176e-05, - "loss": 6.0915, - "step": 1866 - }, - { - "epoch": 0.9736636245110821, - "grad_norm": 1.798251986503601, - "learning_rate": 9.862713567839197e-05, - "loss": 6.4367, - "step": 1867 - }, - { - "epoch": 0.9741851368970013, - "grad_norm": 1.9052170515060425, - "learning_rate": 9.862613065326634e-05, - "loss": 5.5588, - "step": 1868 - }, - { - "epoch": 0.9747066492829205, - "grad_norm": 1.575830340385437, - "learning_rate": 9.862512562814071e-05, - "loss": 6.0571, - "step": 1869 - }, - { - "epoch": 0.9752281616688396, - "grad_norm": 1.853869915008545, - "learning_rate": 9.862412060301508e-05, - "loss": 6.0275, - "step": 1870 - }, - { - "epoch": 0.9757496740547588, - "grad_norm": 1.7869341373443604, - "learning_rate": 9.862311557788944e-05, - "loss": 6.3859, - "step": 1871 - }, - { - "epoch": 0.976271186440678, - "grad_norm": 1.8462800979614258, - "learning_rate": 9.862211055276382e-05, - "loss": 5.3611, - "step": 1872 - }, - { - "epoch": 0.9767926988265971, - "grad_norm": 1.9830440282821655, - "learning_rate": 9.862110552763819e-05, - "loss": 6.1347, - "step": 1873 - }, - { - "epoch": 0.9773142112125163, - "grad_norm": 1.848233938217163, - "learning_rate": 9.862010050251256e-05, - "loss": 5.7121, - "step": 1874 - }, - { - "epoch": 0.9778357235984355, - "grad_norm": 1.6460009813308716, - "learning_rate": 9.861909547738694e-05, - "loss": 6.2635, - "step": 1875 - }, - { - "epoch": 0.9783572359843546, - "grad_norm": 1.6736358404159546, - "learning_rate": 9.861809045226132e-05, - "loss": 6.3444, - "step": 1876 - }, - { - "epoch": 0.9788787483702738, - "grad_norm": 2.030003547668457, - "learning_rate": 9.861708542713568e-05, - "loss": 5.6073, - "step": 1877 - }, - { - "epoch": 0.9794002607561929, - "grad_norm": 1.8256900310516357, - "learning_rate": 9.861608040201006e-05, - "loss": 5.9125, - "step": 1878 - }, - { - "epoch": 0.9799217731421122, - "grad_norm": 1.9384441375732422, - "learning_rate": 9.861507537688443e-05, - "loss": 6.2115, - "step": 1879 - }, - { - "epoch": 0.9804432855280313, - "grad_norm": 1.758963942527771, - "learning_rate": 9.86140703517588e-05, - "loss": 6.1118, - "step": 1880 - }, - { - "epoch": 0.9809647979139504, - "grad_norm": 2.266641855239868, - "learning_rate": 9.861306532663317e-05, - "loss": 5.9316, - "step": 1881 - }, - { - "epoch": 0.9814863102998697, - "grad_norm": 2.2349464893341064, - "learning_rate": 9.861206030150755e-05, - "loss": 6.0164, - "step": 1882 - }, - { - "epoch": 0.9820078226857888, - "grad_norm": 2.1468758583068848, - "learning_rate": 9.861105527638191e-05, - "loss": 5.8991, - "step": 1883 - }, - { - "epoch": 0.9825293350717079, - "grad_norm": 2.0775907039642334, - "learning_rate": 9.861005025125629e-05, - "loss": 6.0994, - "step": 1884 - }, - { - "epoch": 0.9830508474576272, - "grad_norm": 1.7565312385559082, - "learning_rate": 9.860904522613067e-05, - "loss": 6.1407, - "step": 1885 - }, - { - "epoch": 0.9835723598435463, - "grad_norm": 1.8905107975006104, - "learning_rate": 9.860804020100503e-05, - "loss": 6.3397, - "step": 1886 - }, - { - "epoch": 0.9840938722294654, - "grad_norm": 1.8091247081756592, - "learning_rate": 9.860703517587941e-05, - "loss": 6.4454, - "step": 1887 - }, - { - "epoch": 0.9846153846153847, - "grad_norm": 1.7579909563064575, - "learning_rate": 9.860603015075377e-05, - "loss": 6.4286, - "step": 1888 - }, - { - "epoch": 0.9851368970013038, - "grad_norm": 1.901070237159729, - "learning_rate": 9.860502512562815e-05, - "loss": 6.3575, - "step": 1889 - }, - { - "epoch": 0.9856584093872229, - "grad_norm": 2.136735200881958, - "learning_rate": 9.860402010050251e-05, - "loss": 5.8651, - "step": 1890 - }, - { - "epoch": 0.9861799217731421, - "grad_norm": 1.7569983005523682, - "learning_rate": 9.860301507537689e-05, - "loss": 6.2668, - "step": 1891 - }, - { - "epoch": 0.9867014341590613, - "grad_norm": 1.9299886226654053, - "learning_rate": 9.860201005025126e-05, - "loss": 6.1899, - "step": 1892 - }, - { - "epoch": 0.9872229465449804, - "grad_norm": 1.8312844038009644, - "learning_rate": 9.860100502512563e-05, - "loss": 5.8749, - "step": 1893 - }, - { - "epoch": 0.9877444589308996, - "grad_norm": 1.7669068574905396, - "learning_rate": 9.86e-05, - "loss": 5.3984, - "step": 1894 - }, - { - "epoch": 0.9882659713168188, - "grad_norm": 2.1258013248443604, - "learning_rate": 9.859899497487438e-05, - "loss": 5.7357, - "step": 1895 - }, - { - "epoch": 0.988787483702738, - "grad_norm": 1.752524971961975, - "learning_rate": 9.859798994974875e-05, - "loss": 5.7311, - "step": 1896 - }, - { - "epoch": 0.9893089960886571, - "grad_norm": 1.7557425498962402, - "learning_rate": 9.859698492462312e-05, - "loss": 5.766, - "step": 1897 - }, - { - "epoch": 0.9898305084745763, - "grad_norm": 1.8874051570892334, - "learning_rate": 9.85959798994975e-05, - "loss": 6.0503, - "step": 1898 - }, - { - "epoch": 0.9903520208604955, - "grad_norm": 1.5621223449707031, - "learning_rate": 9.859497487437186e-05, - "loss": 6.5691, - "step": 1899 - }, - { - "epoch": 0.9908735332464146, - "grad_norm": 1.6794296503067017, - "learning_rate": 9.859396984924624e-05, - "loss": 6.4146, - "step": 1900 - }, - { - "epoch": 0.9913950456323337, - "grad_norm": 1.704735279083252, - "learning_rate": 9.85929648241206e-05, - "loss": 6.3028, - "step": 1901 - }, - { - "epoch": 0.991916558018253, - "grad_norm": 1.6560001373291016, - "learning_rate": 9.859195979899498e-05, - "loss": 6.0321, - "step": 1902 - }, - { - "epoch": 0.9924380704041721, - "grad_norm": 1.8004179000854492, - "learning_rate": 9.859095477386934e-05, - "loss": 5.9194, - "step": 1903 - }, - { - "epoch": 0.9929595827900912, - "grad_norm": 1.9367026090621948, - "learning_rate": 9.858994974874372e-05, - "loss": 6.1279, - "step": 1904 - }, - { - "epoch": 0.9934810951760105, - "grad_norm": 1.9425865411758423, - "learning_rate": 9.858894472361809e-05, - "loss": 6.5871, - "step": 1905 - }, - { - "epoch": 0.9940026075619296, - "grad_norm": 1.9378076791763306, - "learning_rate": 9.858793969849246e-05, - "loss": 6.3338, - "step": 1906 - }, - { - "epoch": 0.9945241199478487, - "grad_norm": 2.6838061809539795, - "learning_rate": 9.858693467336684e-05, - "loss": 5.4407, - "step": 1907 - }, - { - "epoch": 0.995045632333768, - "grad_norm": 2.263880729675293, - "learning_rate": 9.858592964824122e-05, - "loss": 5.8836, - "step": 1908 - }, - { - "epoch": 0.9955671447196871, - "grad_norm": 1.8455873727798462, - "learning_rate": 9.858492462311558e-05, - "loss": 6.3885, - "step": 1909 - }, - { - "epoch": 0.9960886571056062, - "grad_norm": 1.9014992713928223, - "learning_rate": 9.858391959798996e-05, - "loss": 6.3106, - "step": 1910 - }, - { - "epoch": 0.9966101694915255, - "grad_norm": 1.7918483018875122, - "learning_rate": 9.858291457286433e-05, - "loss": 6.2332, - "step": 1911 - }, - { - "epoch": 0.9971316818774446, - "grad_norm": 1.7935258150100708, - "learning_rate": 9.858190954773869e-05, - "loss": 6.0331, - "step": 1912 - }, - { - "epoch": 0.9976531942633637, - "grad_norm": 1.972747564315796, - "learning_rate": 9.858090452261307e-05, - "loss": 6.0963, - "step": 1913 - }, - { - "epoch": 0.9981747066492829, - "grad_norm": 1.479304552078247, - "learning_rate": 9.857989949748743e-05, - "loss": 6.2654, - "step": 1914 - }, - { - "epoch": 0.9986962190352021, - "grad_norm": 2.468888282775879, - "learning_rate": 9.857889447236181e-05, - "loss": 5.4459, - "step": 1915 - }, - { - "epoch": 0.9992177314211212, - "grad_norm": 2.081630229949951, - "learning_rate": 9.857788944723619e-05, - "loss": 6.3112, - "step": 1916 - }, - { - "epoch": 0.9997392438070404, - "grad_norm": 2.034188747406006, - "learning_rate": 9.857688442211057e-05, - "loss": 6.0643, - "step": 1917 - }, - { - "epoch": 1.0002607561929595, - "grad_norm": 1.9129623174667358, - "learning_rate": 9.857587939698493e-05, - "loss": 6.4628, - "step": 1918 - }, - { - "epoch": 1.0007822685788788, - "grad_norm": 1.8343724012374878, - "learning_rate": 9.857487437185931e-05, - "loss": 6.4646, - "step": 1919 - }, - { - "epoch": 1.001303780964798, - "grad_norm": 1.682604193687439, - "learning_rate": 9.857386934673367e-05, - "loss": 6.3218, - "step": 1920 - }, - { - "epoch": 1.001825293350717, - "grad_norm": 1.7998607158660889, - "learning_rate": 9.857286432160805e-05, - "loss": 6.4673, - "step": 1921 - }, - { - "epoch": 1.0023468057366363, - "grad_norm": 1.6766268014907837, - "learning_rate": 9.857185929648241e-05, - "loss": 6.4773, - "step": 1922 - }, - { - "epoch": 1.0028683181225555, - "grad_norm": 1.8383983373641968, - "learning_rate": 9.857085427135679e-05, - "loss": 6.0813, - "step": 1923 - }, - { - "epoch": 1.0033898305084745, - "grad_norm": 1.5827420949935913, - "learning_rate": 9.856984924623116e-05, - "loss": 6.298, - "step": 1924 - }, - { - "epoch": 1.0039113428943938, - "grad_norm": 1.828986644744873, - "learning_rate": 9.856884422110552e-05, - "loss": 6.0962, - "step": 1925 - }, - { - "epoch": 1.004432855280313, - "grad_norm": 1.7401952743530273, - "learning_rate": 9.85678391959799e-05, - "loss": 6.1504, - "step": 1926 - }, - { - "epoch": 1.004954367666232, - "grad_norm": 1.6379196643829346, - "learning_rate": 9.856683417085428e-05, - "loss": 6.0205, - "step": 1927 - }, - { - "epoch": 1.0054758800521513, - "grad_norm": 2.1414053440093994, - "learning_rate": 9.856582914572865e-05, - "loss": 5.9953, - "step": 1928 - }, - { - "epoch": 1.0059973924380705, - "grad_norm": 2.381213903427124, - "learning_rate": 9.856482412060302e-05, - "loss": 5.7121, - "step": 1929 - }, - { - "epoch": 1.0065189048239895, - "grad_norm": 1.7482136487960815, - "learning_rate": 9.85638190954774e-05, - "loss": 6.1058, - "step": 1930 - }, - { - "epoch": 1.0070404172099088, - "grad_norm": 1.5391474962234497, - "learning_rate": 9.856281407035176e-05, - "loss": 6.4011, - "step": 1931 - }, - { - "epoch": 1.0075619295958278, - "grad_norm": 1.7834244966506958, - "learning_rate": 9.856180904522614e-05, - "loss": 6.237, - "step": 1932 - }, - { - "epoch": 1.008083441981747, - "grad_norm": 1.662527084350586, - "learning_rate": 9.85608040201005e-05, - "loss": 6.2767, - "step": 1933 - }, - { - "epoch": 1.0086049543676663, - "grad_norm": 1.704943299293518, - "learning_rate": 9.855979899497488e-05, - "loss": 6.2224, - "step": 1934 - }, - { - "epoch": 1.0091264667535853, - "grad_norm": 1.9073781967163086, - "learning_rate": 9.855879396984924e-05, - "loss": 6.1934, - "step": 1935 - }, - { - "epoch": 1.0096479791395045, - "grad_norm": 2.1955313682556152, - "learning_rate": 9.855778894472362e-05, - "loss": 5.7247, - "step": 1936 - }, - { - "epoch": 1.0101694915254238, - "grad_norm": 1.8141535520553589, - "learning_rate": 9.8556783919598e-05, - "loss": 5.604, - "step": 1937 - }, - { - "epoch": 1.0106910039113428, - "grad_norm": 1.701280951499939, - "learning_rate": 9.855577889447236e-05, - "loss": 6.3704, - "step": 1938 - }, - { - "epoch": 1.011212516297262, - "grad_norm": 1.7259966135025024, - "learning_rate": 9.855477386934674e-05, - "loss": 6.2628, - "step": 1939 - }, - { - "epoch": 1.0117340286831813, - "grad_norm": 2.0503947734832764, - "learning_rate": 9.855376884422111e-05, - "loss": 5.9671, - "step": 1940 - }, - { - "epoch": 1.0122555410691003, - "grad_norm": 2.145129919052124, - "learning_rate": 9.855276381909548e-05, - "loss": 6.0575, - "step": 1941 - }, - { - "epoch": 1.0127770534550196, - "grad_norm": 1.6872786283493042, - "learning_rate": 9.855175879396985e-05, - "loss": 6.2608, - "step": 1942 - }, - { - "epoch": 1.0132985658409388, - "grad_norm": 1.89679753780365, - "learning_rate": 9.855075376884423e-05, - "loss": 6.0626, - "step": 1943 - }, - { - "epoch": 1.0138200782268578, - "grad_norm": 1.792041301727295, - "learning_rate": 9.854974874371859e-05, - "loss": 6.1741, - "step": 1944 - }, - { - "epoch": 1.014341590612777, - "grad_norm": 1.738544225692749, - "learning_rate": 9.854874371859297e-05, - "loss": 5.8878, - "step": 1945 - }, - { - "epoch": 1.0148631029986963, - "grad_norm": 1.7884182929992676, - "learning_rate": 9.854773869346733e-05, - "loss": 6.0474, - "step": 1946 - }, - { - "epoch": 1.0153846153846153, - "grad_norm": 1.6626036167144775, - "learning_rate": 9.854673366834171e-05, - "loss": 6.3142, - "step": 1947 - }, - { - "epoch": 1.0159061277705346, - "grad_norm": 1.596867561340332, - "learning_rate": 9.854572864321609e-05, - "loss": 6.4722, - "step": 1948 - }, - { - "epoch": 1.0164276401564538, - "grad_norm": 1.572496771812439, - "learning_rate": 9.854472361809047e-05, - "loss": 6.4497, - "step": 1949 - }, - { - "epoch": 1.0169491525423728, - "grad_norm": 2.112011432647705, - "learning_rate": 9.854371859296483e-05, - "loss": 5.6047, - "step": 1950 - }, - { - "epoch": 1.017470664928292, - "grad_norm": 1.6629061698913574, - "learning_rate": 9.85427135678392e-05, - "loss": 6.1408, - "step": 1951 - }, - { - "epoch": 1.0179921773142113, - "grad_norm": 2.0328640937805176, - "learning_rate": 9.854170854271357e-05, - "loss": 5.6916, - "step": 1952 - }, - { - "epoch": 1.0185136897001303, - "grad_norm": 1.8895622491836548, - "learning_rate": 9.854070351758794e-05, - "loss": 6.0915, - "step": 1953 - }, - { - "epoch": 1.0190352020860496, - "grad_norm": 1.845371127128601, - "learning_rate": 9.853969849246232e-05, - "loss": 6.0751, - "step": 1954 - }, - { - "epoch": 1.0195567144719686, - "grad_norm": 1.6103541851043701, - "learning_rate": 9.853869346733668e-05, - "loss": 6.2483, - "step": 1955 - }, - { - "epoch": 1.0200782268578878, - "grad_norm": 1.5887151956558228, - "learning_rate": 9.853768844221106e-05, - "loss": 6.1339, - "step": 1956 - }, - { - "epoch": 1.020599739243807, - "grad_norm": 1.7419342994689941, - "learning_rate": 9.853668341708544e-05, - "loss": 5.8927, - "step": 1957 - }, - { - "epoch": 1.0211212516297261, - "grad_norm": 1.63241708278656, - "learning_rate": 9.853567839195981e-05, - "loss": 5.7099, - "step": 1958 - }, - { - "epoch": 1.0216427640156454, - "grad_norm": 1.866214632987976, - "learning_rate": 9.853467336683418e-05, - "loss": 6.2121, - "step": 1959 - }, - { - "epoch": 1.0221642764015646, - "grad_norm": 2.0659101009368896, - "learning_rate": 9.853366834170856e-05, - "loss": 5.9859, - "step": 1960 - }, - { - "epoch": 1.0226857887874836, - "grad_norm": 1.8834388256072998, - "learning_rate": 9.853266331658292e-05, - "loss": 5.9066, - "step": 1961 - }, - { - "epoch": 1.0232073011734029, - "grad_norm": 2.102869749069214, - "learning_rate": 9.85316582914573e-05, - "loss": 6.0997, - "step": 1962 - }, - { - "epoch": 1.023728813559322, - "grad_norm": 1.770065188407898, - "learning_rate": 9.853065326633166e-05, - "loss": 6.3884, - "step": 1963 - }, - { - "epoch": 1.0242503259452411, - "grad_norm": 1.7065154314041138, - "learning_rate": 9.852964824120603e-05, - "loss": 6.0609, - "step": 1964 - }, - { - "epoch": 1.0247718383311604, - "grad_norm": 1.9834550619125366, - "learning_rate": 9.85286432160804e-05, - "loss": 6.1505, - "step": 1965 - }, - { - "epoch": 1.0252933507170796, - "grad_norm": 1.844692349433899, - "learning_rate": 9.852763819095477e-05, - "loss": 5.6711, - "step": 1966 - }, - { - "epoch": 1.0258148631029986, - "grad_norm": 1.8565480709075928, - "learning_rate": 9.852663316582915e-05, - "loss": 5.6849, - "step": 1967 - }, - { - "epoch": 1.0263363754889179, - "grad_norm": 2.0983030796051025, - "learning_rate": 9.852562814070352e-05, - "loss": 5.4211, - "step": 1968 - }, - { - "epoch": 1.0268578878748371, - "grad_norm": 1.8495198488235474, - "learning_rate": 9.85246231155779e-05, - "loss": 5.9789, - "step": 1969 - }, - { - "epoch": 1.0273794002607561, - "grad_norm": 2.0760555267333984, - "learning_rate": 9.852361809045227e-05, - "loss": 6.0637, - "step": 1970 - }, - { - "epoch": 1.0279009126466754, - "grad_norm": 1.6629621982574463, - "learning_rate": 9.852261306532664e-05, - "loss": 5.9882, - "step": 1971 - }, - { - "epoch": 1.0284224250325946, - "grad_norm": 1.7916992902755737, - "learning_rate": 9.852160804020101e-05, - "loss": 6.0562, - "step": 1972 - }, - { - "epoch": 1.0289439374185136, - "grad_norm": 1.7200196981430054, - "learning_rate": 9.852060301507539e-05, - "loss": 6.4633, - "step": 1973 - }, - { - "epoch": 1.0294654498044329, - "grad_norm": 1.4872539043426514, - "learning_rate": 9.851959798994975e-05, - "loss": 6.4146, - "step": 1974 - }, - { - "epoch": 1.0299869621903521, - "grad_norm": 1.9057978391647339, - "learning_rate": 9.851859296482413e-05, - "loss": 5.738, - "step": 1975 - }, - { - "epoch": 1.0305084745762711, - "grad_norm": 1.5664172172546387, - "learning_rate": 9.851758793969849e-05, - "loss": 6.2933, - "step": 1976 - }, - { - "epoch": 1.0310299869621904, - "grad_norm": 1.8218095302581787, - "learning_rate": 9.851658291457287e-05, - "loss": 6.2938, - "step": 1977 - }, - { - "epoch": 1.0315514993481094, - "grad_norm": 1.6508524417877197, - "learning_rate": 9.851557788944725e-05, - "loss": 6.0992, - "step": 1978 - }, - { - "epoch": 1.0320730117340287, - "grad_norm": 1.6507478952407837, - "learning_rate": 9.851457286432161e-05, - "loss": 5.8736, - "step": 1979 - }, - { - "epoch": 1.032594524119948, - "grad_norm": 1.7887682914733887, - "learning_rate": 9.851356783919599e-05, - "loss": 6.2563, - "step": 1980 - }, - { - "epoch": 1.033116036505867, - "grad_norm": 1.625353217124939, - "learning_rate": 9.851256281407035e-05, - "loss": 5.8274, - "step": 1981 - }, - { - "epoch": 1.0336375488917862, - "grad_norm": 1.6090606451034546, - "learning_rate": 9.851155778894473e-05, - "loss": 6.2335, - "step": 1982 - }, - { - "epoch": 1.0341590612777054, - "grad_norm": 1.6227985620498657, - "learning_rate": 9.85105527638191e-05, - "loss": 6.192, - "step": 1983 - }, - { - "epoch": 1.0346805736636244, - "grad_norm": 1.8747210502624512, - "learning_rate": 9.850954773869347e-05, - "loss": 5.7952, - "step": 1984 - }, - { - "epoch": 1.0352020860495437, - "grad_norm": 1.5406162738800049, - "learning_rate": 9.850854271356784e-05, - "loss": 6.3294, - "step": 1985 - }, - { - "epoch": 1.035723598435463, - "grad_norm": 1.755257248878479, - "learning_rate": 9.850753768844222e-05, - "loss": 5.9776, - "step": 1986 - }, - { - "epoch": 1.036245110821382, - "grad_norm": 1.7343206405639648, - "learning_rate": 9.850653266331658e-05, - "loss": 6.2135, - "step": 1987 - }, - { - "epoch": 1.0367666232073012, - "grad_norm": 1.4382730722427368, - "learning_rate": 9.850552763819096e-05, - "loss": 6.3149, - "step": 1988 - }, - { - "epoch": 1.0372881355932204, - "grad_norm": 1.5399811267852783, - "learning_rate": 9.850452261306534e-05, - "loss": 6.2842, - "step": 1989 - }, - { - "epoch": 1.0378096479791394, - "grad_norm": 1.7079685926437378, - "learning_rate": 9.850351758793971e-05, - "loss": 6.1115, - "step": 1990 - }, - { - "epoch": 1.0383311603650587, - "grad_norm": 2.1128201484680176, - "learning_rate": 9.850251256281408e-05, - "loss": 5.342, - "step": 1991 - }, - { - "epoch": 1.038852672750978, - "grad_norm": 1.9430609941482544, - "learning_rate": 9.850150753768844e-05, - "loss": 5.3026, - "step": 1992 - }, - { - "epoch": 1.039374185136897, - "grad_norm": 2.039355754852295, - "learning_rate": 9.850050251256282e-05, - "loss": 5.6604, - "step": 1993 - }, - { - "epoch": 1.0398956975228162, - "grad_norm": 1.564090609550476, - "learning_rate": 9.849949748743718e-05, - "loss": 6.3321, - "step": 1994 - }, - { - "epoch": 1.0404172099087354, - "grad_norm": 1.8027979135513306, - "learning_rate": 9.849849246231156e-05, - "loss": 5.8961, - "step": 1995 - }, - { - "epoch": 1.0409387222946545, - "grad_norm": 1.6895747184753418, - "learning_rate": 9.849748743718593e-05, - "loss": 6.015, - "step": 1996 - }, - { - "epoch": 1.0414602346805737, - "grad_norm": 1.9850399494171143, - "learning_rate": 9.84964824120603e-05, - "loss": 5.5114, - "step": 1997 - }, - { - "epoch": 1.041981747066493, - "grad_norm": 2.297036647796631, - "learning_rate": 9.849547738693468e-05, - "loss": 5.8444, - "step": 1998 - }, - { - "epoch": 1.042503259452412, - "grad_norm": 1.6363486051559448, - "learning_rate": 9.849447236180906e-05, - "loss": 6.3536, - "step": 1999 - }, - { - "epoch": 1.0430247718383312, - "grad_norm": 1.6470552682876587, - "learning_rate": 9.849346733668342e-05, - "loss": 6.5008, - "step": 2000 - }, - { - "epoch": 1.0435462842242502, - "grad_norm": 1.8797277212142944, - "learning_rate": 9.84924623115578e-05, - "loss": 5.9143, - "step": 2001 - }, - { - "epoch": 1.0440677966101695, - "grad_norm": 1.6308670043945312, - "learning_rate": 9.849145728643217e-05, - "loss": 6.1703, - "step": 2002 - }, - { - "epoch": 1.0445893089960887, - "grad_norm": 1.7901939153671265, - "learning_rate": 9.849045226130654e-05, - "loss": 6.0045, - "step": 2003 - }, - { - "epoch": 1.0451108213820077, - "grad_norm": 1.9173728227615356, - "learning_rate": 9.848944723618091e-05, - "loss": 5.8901, - "step": 2004 - }, - { - "epoch": 1.045632333767927, - "grad_norm": 2.012906551361084, - "learning_rate": 9.848844221105527e-05, - "loss": 5.8192, - "step": 2005 - }, - { - "epoch": 1.0461538461538462, - "grad_norm": 1.6332141160964966, - "learning_rate": 9.848743718592965e-05, - "loss": 6.1516, - "step": 2006 - }, - { - "epoch": 1.0466753585397652, - "grad_norm": 1.6701242923736572, - "learning_rate": 9.848643216080401e-05, - "loss": 6.4004, - "step": 2007 - }, - { - "epoch": 1.0471968709256845, - "grad_norm": 2.0260472297668457, - "learning_rate": 9.848542713567839e-05, - "loss": 5.5856, - "step": 2008 - }, - { - "epoch": 1.0477183833116037, - "grad_norm": 1.9220818281173706, - "learning_rate": 9.848442211055277e-05, - "loss": 5.6215, - "step": 2009 - }, - { - "epoch": 1.0482398956975227, - "grad_norm": 1.9030778408050537, - "learning_rate": 9.848341708542715e-05, - "loss": 6.1956, - "step": 2010 - }, - { - "epoch": 1.048761408083442, - "grad_norm": 1.7378175258636475, - "learning_rate": 9.848241206030151e-05, - "loss": 6.4719, - "step": 2011 - }, - { - "epoch": 1.0492829204693612, - "grad_norm": 1.8204840421676636, - "learning_rate": 9.848140703517589e-05, - "loss": 5.8105, - "step": 2012 - }, - { - "epoch": 1.0498044328552802, - "grad_norm": 1.8509920835494995, - "learning_rate": 9.848040201005025e-05, - "loss": 6.1854, - "step": 2013 - }, - { - "epoch": 1.0503259452411995, - "grad_norm": 1.8737764358520508, - "learning_rate": 9.847939698492463e-05, - "loss": 5.6766, - "step": 2014 - }, - { - "epoch": 1.0508474576271187, - "grad_norm": 1.9872692823410034, - "learning_rate": 9.8478391959799e-05, - "loss": 5.9299, - "step": 2015 - }, - { - "epoch": 1.0513689700130378, - "grad_norm": 1.6695271730422974, - "learning_rate": 9.847738693467337e-05, - "loss": 6.208, - "step": 2016 - }, - { - "epoch": 1.051890482398957, - "grad_norm": 1.674023151397705, - "learning_rate": 9.847638190954774e-05, - "loss": 6.0535, - "step": 2017 - }, - { - "epoch": 1.0524119947848762, - "grad_norm": 1.775860071182251, - "learning_rate": 9.847537688442212e-05, - "loss": 5.9867, - "step": 2018 - }, - { - "epoch": 1.0529335071707953, - "grad_norm": 1.8346490859985352, - "learning_rate": 9.84743718592965e-05, - "loss": 6.1779, - "step": 2019 - }, - { - "epoch": 1.0534550195567145, - "grad_norm": 1.7631299495697021, - "learning_rate": 9.847336683417086e-05, - "loss": 5.8033, - "step": 2020 - }, - { - "epoch": 1.0539765319426337, - "grad_norm": 1.8419535160064697, - "learning_rate": 9.847236180904524e-05, - "loss": 6.1076, - "step": 2021 - }, - { - "epoch": 1.0544980443285528, - "grad_norm": 1.8955037593841553, - "learning_rate": 9.84713567839196e-05, - "loss": 6.1954, - "step": 2022 - }, - { - "epoch": 1.055019556714472, - "grad_norm": 1.6946598291397095, - "learning_rate": 9.847035175879398e-05, - "loss": 6.133, - "step": 2023 - }, - { - "epoch": 1.055541069100391, - "grad_norm": 1.8254743814468384, - "learning_rate": 9.846934673366834e-05, - "loss": 5.9449, - "step": 2024 - }, - { - "epoch": 1.0560625814863103, - "grad_norm": 1.554898738861084, - "learning_rate": 9.846834170854272e-05, - "loss": 6.2816, - "step": 2025 - }, - { - "epoch": 1.0565840938722295, - "grad_norm": 1.5383116006851196, - "learning_rate": 9.846733668341709e-05, - "loss": 6.2263, - "step": 2026 - }, - { - "epoch": 1.0571056062581485, - "grad_norm": 1.6168657541275024, - "learning_rate": 9.846633165829146e-05, - "loss": 5.9276, - "step": 2027 - }, - { - "epoch": 1.0576271186440678, - "grad_norm": 1.5930066108703613, - "learning_rate": 9.846532663316583e-05, - "loss": 5.8445, - "step": 2028 - }, - { - "epoch": 1.058148631029987, - "grad_norm": 1.5730817317962646, - "learning_rate": 9.84643216080402e-05, - "loss": 5.9498, - "step": 2029 - }, - { - "epoch": 1.058670143415906, - "grad_norm": 1.4855321645736694, - "learning_rate": 9.846331658291458e-05, - "loss": 6.1839, - "step": 2030 - }, - { - "epoch": 1.0591916558018253, - "grad_norm": 1.5094448328018188, - "learning_rate": 9.846231155778895e-05, - "loss": 6.2985, - "step": 2031 - }, - { - "epoch": 1.0597131681877445, - "grad_norm": 1.7278530597686768, - "learning_rate": 9.846130653266332e-05, - "loss": 6.2531, - "step": 2032 - }, - { - "epoch": 1.0602346805736635, - "grad_norm": 1.5366487503051758, - "learning_rate": 9.846030150753769e-05, - "loss": 6.0652, - "step": 2033 - }, - { - "epoch": 1.0607561929595828, - "grad_norm": 1.62075674533844, - "learning_rate": 9.845929648241207e-05, - "loss": 6.0717, - "step": 2034 - }, - { - "epoch": 1.061277705345502, - "grad_norm": 1.766707181930542, - "learning_rate": 9.845829145728643e-05, - "loss": 6.0662, - "step": 2035 - }, - { - "epoch": 1.061799217731421, - "grad_norm": 1.8812587261199951, - "learning_rate": 9.845728643216081e-05, - "loss": 5.5153, - "step": 2036 - }, - { - "epoch": 1.0623207301173403, - "grad_norm": 1.743231177330017, - "learning_rate": 9.845628140703517e-05, - "loss": 5.9511, - "step": 2037 - }, - { - "epoch": 1.0628422425032595, - "grad_norm": 1.5841785669326782, - "learning_rate": 9.845527638190955e-05, - "loss": 6.2901, - "step": 2038 - }, - { - "epoch": 1.0633637548891786, - "grad_norm": 1.7085474729537964, - "learning_rate": 9.845427135678393e-05, - "loss": 6.2665, - "step": 2039 - }, - { - "epoch": 1.0638852672750978, - "grad_norm": 1.6257435083389282, - "learning_rate": 9.845326633165831e-05, - "loss": 6.323, - "step": 2040 - }, - { - "epoch": 1.064406779661017, - "grad_norm": 1.8219819068908691, - "learning_rate": 9.845226130653267e-05, - "loss": 6.0413, - "step": 2041 - }, - { - "epoch": 1.064928292046936, - "grad_norm": 1.6297686100006104, - "learning_rate": 9.845125628140705e-05, - "loss": 6.3621, - "step": 2042 - }, - { - "epoch": 1.0654498044328553, - "grad_norm": 1.575216293334961, - "learning_rate": 9.845025125628141e-05, - "loss": 6.237, - "step": 2043 - }, - { - "epoch": 1.0659713168187746, - "grad_norm": 1.653867244720459, - "learning_rate": 9.844924623115578e-05, - "loss": 6.2894, - "step": 2044 - }, - { - "epoch": 1.0664928292046936, - "grad_norm": 1.923910140991211, - "learning_rate": 9.844824120603016e-05, - "loss": 5.6098, - "step": 2045 - }, - { - "epoch": 1.0670143415906128, - "grad_norm": 1.6443042755126953, - "learning_rate": 9.844723618090452e-05, - "loss": 6.1054, - "step": 2046 - }, - { - "epoch": 1.067535853976532, - "grad_norm": 1.9147251844406128, - "learning_rate": 9.84462311557789e-05, - "loss": 5.5705, - "step": 2047 - }, - { - "epoch": 1.068057366362451, - "grad_norm": 1.740979790687561, - "learning_rate": 9.844522613065326e-05, - "loss": 6.2621, - "step": 2048 - }, - { - "epoch": 1.0685788787483703, - "grad_norm": 2.3516361713409424, - "learning_rate": 9.844422110552764e-05, - "loss": 6.0933, - "step": 2049 - }, - { - "epoch": 1.0691003911342893, - "grad_norm": 1.9072270393371582, - "learning_rate": 9.844321608040202e-05, - "loss": 6.2036, - "step": 2050 - }, - { - "epoch": 1.0696219035202086, - "grad_norm": 1.9166215658187866, - "learning_rate": 9.84422110552764e-05, - "loss": 5.9267, - "step": 2051 - }, - { - "epoch": 1.0701434159061278, - "grad_norm": 1.8280320167541504, - "learning_rate": 9.844120603015076e-05, - "loss": 5.9086, - "step": 2052 - }, - { - "epoch": 1.0706649282920468, - "grad_norm": 1.831407904624939, - "learning_rate": 9.844020100502514e-05, - "loss": 6.0674, - "step": 2053 - }, - { - "epoch": 1.071186440677966, - "grad_norm": 1.6701538562774658, - "learning_rate": 9.84391959798995e-05, - "loss": 6.2811, - "step": 2054 - }, - { - "epoch": 1.0717079530638853, - "grad_norm": 1.583001732826233, - "learning_rate": 9.843819095477388e-05, - "loss": 6.468, - "step": 2055 - }, - { - "epoch": 1.0722294654498044, - "grad_norm": 2.1178581714630127, - "learning_rate": 9.843718592964824e-05, - "loss": 5.3708, - "step": 2056 - }, - { - "epoch": 1.0727509778357236, - "grad_norm": 1.7624542713165283, - "learning_rate": 9.843618090452261e-05, - "loss": 5.998, - "step": 2057 - }, - { - "epoch": 1.0732724902216428, - "grad_norm": 1.7489049434661865, - "learning_rate": 9.843517587939699e-05, - "loss": 6.3201, - "step": 2058 - }, - { - "epoch": 1.0737940026075619, - "grad_norm": 1.6883853673934937, - "learning_rate": 9.843417085427136e-05, - "loss": 6.5115, - "step": 2059 - }, - { - "epoch": 1.074315514993481, - "grad_norm": 1.4730063676834106, - "learning_rate": 9.843316582914574e-05, - "loss": 6.0829, - "step": 2060 - }, - { - "epoch": 1.0748370273794003, - "grad_norm": 1.5787452459335327, - "learning_rate": 9.84321608040201e-05, - "loss": 6.1248, - "step": 2061 - }, - { - "epoch": 1.0753585397653194, - "grad_norm": 1.7233240604400635, - "learning_rate": 9.843115577889448e-05, - "loss": 6.2, - "step": 2062 - }, - { - "epoch": 1.0758800521512386, - "grad_norm": 1.7375123500823975, - "learning_rate": 9.843015075376885e-05, - "loss": 6.1556, - "step": 2063 - }, - { - "epoch": 1.0764015645371579, - "grad_norm": 1.6828275918960571, - "learning_rate": 9.842914572864323e-05, - "loss": 5.9103, - "step": 2064 - }, - { - "epoch": 1.0769230769230769, - "grad_norm": 1.6976250410079956, - "learning_rate": 9.842814070351759e-05, - "loss": 6.2481, - "step": 2065 - }, - { - "epoch": 1.0774445893089961, - "grad_norm": 1.8267648220062256, - "learning_rate": 9.842713567839197e-05, - "loss": 5.7245, - "step": 2066 - }, - { - "epoch": 1.0779661016949154, - "grad_norm": 1.7747575044631958, - "learning_rate": 9.842613065326633e-05, - "loss": 6.2221, - "step": 2067 - }, - { - "epoch": 1.0784876140808344, - "grad_norm": 1.6890941858291626, - "learning_rate": 9.842512562814071e-05, - "loss": 6.2296, - "step": 2068 - }, - { - "epoch": 1.0790091264667536, - "grad_norm": 2.056994676589966, - "learning_rate": 9.842412060301507e-05, - "loss": 6.1521, - "step": 2069 - }, - { - "epoch": 1.0795306388526726, - "grad_norm": 1.6816191673278809, - "learning_rate": 9.842311557788945e-05, - "loss": 5.9184, - "step": 2070 - }, - { - "epoch": 1.0800521512385919, - "grad_norm": 1.544165849685669, - "learning_rate": 9.842211055276383e-05, - "loss": 6.4803, - "step": 2071 - }, - { - "epoch": 1.0805736636245111, - "grad_norm": 1.6133559942245483, - "learning_rate": 9.84211055276382e-05, - "loss": 5.9038, - "step": 2072 - }, - { - "epoch": 1.0810951760104301, - "grad_norm": 1.7555313110351562, - "learning_rate": 9.842010050251257e-05, - "loss": 6.1311, - "step": 2073 - }, - { - "epoch": 1.0816166883963494, - "grad_norm": 1.6976627111434937, - "learning_rate": 9.841909547738694e-05, - "loss": 5.9387, - "step": 2074 - }, - { - "epoch": 1.0821382007822686, - "grad_norm": 1.5983151197433472, - "learning_rate": 9.841809045226131e-05, - "loss": 6.0487, - "step": 2075 - }, - { - "epoch": 1.0826597131681877, - "grad_norm": 1.5447509288787842, - "learning_rate": 9.841708542713568e-05, - "loss": 6.2851, - "step": 2076 - }, - { - "epoch": 1.083181225554107, - "grad_norm": 1.6700820922851562, - "learning_rate": 9.841608040201006e-05, - "loss": 6.3008, - "step": 2077 - }, - { - "epoch": 1.0837027379400261, - "grad_norm": 1.7985095977783203, - "learning_rate": 9.841507537688442e-05, - "loss": 6.0742, - "step": 2078 - }, - { - "epoch": 1.0842242503259452, - "grad_norm": 1.6672126054763794, - "learning_rate": 9.84140703517588e-05, - "loss": 5.897, - "step": 2079 - }, - { - "epoch": 1.0847457627118644, - "grad_norm": 1.5944784879684448, - "learning_rate": 9.841306532663316e-05, - "loss": 6.1446, - "step": 2080 - }, - { - "epoch": 1.0852672750977836, - "grad_norm": 2.028343915939331, - "learning_rate": 9.841206030150754e-05, - "loss": 5.6221, - "step": 2081 - }, - { - "epoch": 1.0857887874837027, - "grad_norm": 2.0334553718566895, - "learning_rate": 9.841105527638192e-05, - "loss": 5.7647, - "step": 2082 - }, - { - "epoch": 1.086310299869622, - "grad_norm": 1.9639347791671753, - "learning_rate": 9.84100502512563e-05, - "loss": 5.8936, - "step": 2083 - }, - { - "epoch": 1.0868318122555412, - "grad_norm": 1.745819330215454, - "learning_rate": 9.840904522613066e-05, - "loss": 6.4195, - "step": 2084 - }, - { - "epoch": 1.0873533246414602, - "grad_norm": 1.7406219244003296, - "learning_rate": 9.840804020100502e-05, - "loss": 6.1403, - "step": 2085 - }, - { - "epoch": 1.0878748370273794, - "grad_norm": 1.6571893692016602, - "learning_rate": 9.84070351758794e-05, - "loss": 6.3012, - "step": 2086 - }, - { - "epoch": 1.0883963494132987, - "grad_norm": 1.6557159423828125, - "learning_rate": 9.840603015075377e-05, - "loss": 6.3509, - "step": 2087 - }, - { - "epoch": 1.0889178617992177, - "grad_norm": 1.6120842695236206, - "learning_rate": 9.840502512562814e-05, - "loss": 6.4066, - "step": 2088 - }, - { - "epoch": 1.089439374185137, - "grad_norm": 1.6781662702560425, - "learning_rate": 9.840402010050251e-05, - "loss": 6.2072, - "step": 2089 - }, - { - "epoch": 1.0899608865710562, - "grad_norm": 1.6951066255569458, - "learning_rate": 9.840301507537689e-05, - "loss": 6.0058, - "step": 2090 - }, - { - "epoch": 1.0904823989569752, - "grad_norm": 1.7157716751098633, - "learning_rate": 9.840201005025126e-05, - "loss": 5.9738, - "step": 2091 - }, - { - "epoch": 1.0910039113428944, - "grad_norm": 1.5716623067855835, - "learning_rate": 9.840100502512564e-05, - "loss": 6.0578, - "step": 2092 - }, - { - "epoch": 1.0915254237288137, - "grad_norm": 1.7220311164855957, - "learning_rate": 9.84e-05, - "loss": 6.465, - "step": 2093 - }, - { - "epoch": 1.0920469361147327, - "grad_norm": 1.594516396522522, - "learning_rate": 9.839899497487438e-05, - "loss": 6.207, - "step": 2094 - }, - { - "epoch": 1.092568448500652, - "grad_norm": 1.5586062669754028, - "learning_rate": 9.839798994974875e-05, - "loss": 6.2873, - "step": 2095 - }, - { - "epoch": 1.093089960886571, - "grad_norm": 1.5276882648468018, - "learning_rate": 9.839698492462313e-05, - "loss": 6.3951, - "step": 2096 - }, - { - "epoch": 1.0936114732724902, - "grad_norm": 1.5596115589141846, - "learning_rate": 9.839597989949749e-05, - "loss": 6.3728, - "step": 2097 - }, - { - "epoch": 1.0941329856584094, - "grad_norm": 1.6365646123886108, - "learning_rate": 9.839497487437186e-05, - "loss": 6.323, - "step": 2098 - }, - { - "epoch": 1.0946544980443285, - "grad_norm": 1.506773591041565, - "learning_rate": 9.839396984924623e-05, - "loss": 6.0078, - "step": 2099 - }, - { - "epoch": 1.0951760104302477, - "grad_norm": 1.5022132396697998, - "learning_rate": 9.83929648241206e-05, - "loss": 6.2774, - "step": 2100 - }, - { - "epoch": 1.095697522816167, - "grad_norm": 1.5496234893798828, - "learning_rate": 9.839195979899497e-05, - "loss": 6.3331, - "step": 2101 - }, - { - "epoch": 1.096219035202086, - "grad_norm": 1.4517372846603394, - "learning_rate": 9.839095477386935e-05, - "loss": 6.4021, - "step": 2102 - }, - { - "epoch": 1.0967405475880052, - "grad_norm": 1.9331247806549072, - "learning_rate": 9.838994974874373e-05, - "loss": 5.8071, - "step": 2103 - }, - { - "epoch": 1.0972620599739245, - "grad_norm": 1.5102336406707764, - "learning_rate": 9.83889447236181e-05, - "loss": 6.1295, - "step": 2104 - }, - { - "epoch": 1.0977835723598435, - "grad_norm": 1.818704605102539, - "learning_rate": 9.838793969849247e-05, - "loss": 6.1369, - "step": 2105 - }, - { - "epoch": 1.0983050847457627, - "grad_norm": 2.0119788646698, - "learning_rate": 9.838693467336684e-05, - "loss": 6.287, - "step": 2106 - }, - { - "epoch": 1.098826597131682, - "grad_norm": 1.5393317937850952, - "learning_rate": 9.838592964824121e-05, - "loss": 6.3275, - "step": 2107 - }, - { - "epoch": 1.099348109517601, - "grad_norm": 1.4994674921035767, - "learning_rate": 9.838492462311558e-05, - "loss": 6.4564, - "step": 2108 - }, - { - "epoch": 1.0998696219035202, - "grad_norm": 1.8602712154388428, - "learning_rate": 9.838391959798996e-05, - "loss": 6.0075, - "step": 2109 - }, - { - "epoch": 1.1003911342894395, - "grad_norm": 1.671539545059204, - "learning_rate": 9.838291457286432e-05, - "loss": 5.6595, - "step": 2110 - }, - { - "epoch": 1.1009126466753585, - "grad_norm": 1.6117165088653564, - "learning_rate": 9.83819095477387e-05, - "loss": 6.0615, - "step": 2111 - }, - { - "epoch": 1.1014341590612777, - "grad_norm": 1.7274749279022217, - "learning_rate": 9.838090452261308e-05, - "loss": 5.3156, - "step": 2112 - }, - { - "epoch": 1.101955671447197, - "grad_norm": 1.6751445531845093, - "learning_rate": 9.837989949748744e-05, - "loss": 5.8146, - "step": 2113 - }, - { - "epoch": 1.102477183833116, - "grad_norm": 1.924877405166626, - "learning_rate": 9.837889447236182e-05, - "loss": 5.9772, - "step": 2114 - }, - { - "epoch": 1.1029986962190352, - "grad_norm": 1.8831536769866943, - "learning_rate": 9.837788944723618e-05, - "loss": 5.9623, - "step": 2115 - }, - { - "epoch": 1.1035202086049543, - "grad_norm": 1.9220843315124512, - "learning_rate": 9.837688442211056e-05, - "loss": 6.2552, - "step": 2116 - }, - { - "epoch": 1.1040417209908735, - "grad_norm": 1.7072629928588867, - "learning_rate": 9.837587939698493e-05, - "loss": 5.7738, - "step": 2117 - }, - { - "epoch": 1.1045632333767927, - "grad_norm": 1.7481555938720703, - "learning_rate": 9.83748743718593e-05, - "loss": 5.8876, - "step": 2118 - }, - { - "epoch": 1.1050847457627118, - "grad_norm": 1.7137829065322876, - "learning_rate": 9.837386934673367e-05, - "loss": 6.2407, - "step": 2119 - }, - { - "epoch": 1.105606258148631, - "grad_norm": 1.6986284255981445, - "learning_rate": 9.837286432160805e-05, - "loss": 5.7636, - "step": 2120 - }, - { - "epoch": 1.1061277705345502, - "grad_norm": 1.6503149271011353, - "learning_rate": 9.837185929648241e-05, - "loss": 6.4542, - "step": 2121 - }, - { - "epoch": 1.1066492829204693, - "grad_norm": 1.5618839263916016, - "learning_rate": 9.837085427135679e-05, - "loss": 6.0379, - "step": 2122 - }, - { - "epoch": 1.1071707953063885, - "grad_norm": 1.5384618043899536, - "learning_rate": 9.836984924623117e-05, - "loss": 6.4178, - "step": 2123 - }, - { - "epoch": 1.1076923076923078, - "grad_norm": 1.634857177734375, - "learning_rate": 9.836884422110553e-05, - "loss": 6.5396, - "step": 2124 - }, - { - "epoch": 1.1082138200782268, - "grad_norm": 1.5410327911376953, - "learning_rate": 9.836783919597991e-05, - "loss": 6.1464, - "step": 2125 - }, - { - "epoch": 1.108735332464146, - "grad_norm": 1.5957677364349365, - "learning_rate": 9.836683417085427e-05, - "loss": 6.3153, - "step": 2126 - }, - { - "epoch": 1.1092568448500653, - "grad_norm": 1.618376612663269, - "learning_rate": 9.836582914572865e-05, - "loss": 6.134, - "step": 2127 - }, - { - "epoch": 1.1097783572359843, - "grad_norm": 1.6335352659225464, - "learning_rate": 9.836482412060301e-05, - "loss": 6.1493, - "step": 2128 - }, - { - "epoch": 1.1102998696219035, - "grad_norm": 1.6532281637191772, - "learning_rate": 9.836381909547739e-05, - "loss": 6.1335, - "step": 2129 - }, - { - "epoch": 1.1108213820078228, - "grad_norm": 1.6897399425506592, - "learning_rate": 9.836281407035176e-05, - "loss": 6.0623, - "step": 2130 - }, - { - "epoch": 1.1113428943937418, - "grad_norm": 1.7886675596237183, - "learning_rate": 9.836180904522613e-05, - "loss": 5.7708, - "step": 2131 - }, - { - "epoch": 1.111864406779661, - "grad_norm": 1.5723931789398193, - "learning_rate": 9.836080402010051e-05, - "loss": 5.7262, - "step": 2132 - }, - { - "epoch": 1.1123859191655803, - "grad_norm": 1.8316543102264404, - "learning_rate": 9.835979899497489e-05, - "loss": 5.7699, - "step": 2133 - }, - { - "epoch": 1.1129074315514993, - "grad_norm": 1.6696882247924805, - "learning_rate": 9.835879396984925e-05, - "loss": 6.0161, - "step": 2134 - }, - { - "epoch": 1.1134289439374185, - "grad_norm": 1.565625786781311, - "learning_rate": 9.835778894472363e-05, - "loss": 6.2272, - "step": 2135 - }, - { - "epoch": 1.1139504563233378, - "grad_norm": 1.6374708414077759, - "learning_rate": 9.8356783919598e-05, - "loss": 6.0726, - "step": 2136 - }, - { - "epoch": 1.1144719687092568, - "grad_norm": 1.6318912506103516, - "learning_rate": 9.835577889447236e-05, - "loss": 6.0006, - "step": 2137 - }, - { - "epoch": 1.114993481095176, - "grad_norm": 1.750747561454773, - "learning_rate": 9.835477386934674e-05, - "loss": 6.0683, - "step": 2138 - }, - { - "epoch": 1.1155149934810953, - "grad_norm": 1.8892841339111328, - "learning_rate": 9.83537688442211e-05, - "loss": 6.0745, - "step": 2139 - }, - { - "epoch": 1.1160365058670143, - "grad_norm": 1.969397783279419, - "learning_rate": 9.835276381909548e-05, - "loss": 5.6781, - "step": 2140 - }, - { - "epoch": 1.1165580182529335, - "grad_norm": 1.6905620098114014, - "learning_rate": 9.835175879396984e-05, - "loss": 5.5594, - "step": 2141 - }, - { - "epoch": 1.1170795306388526, - "grad_norm": 1.5232009887695312, - "learning_rate": 9.835075376884422e-05, - "loss": 6.4654, - "step": 2142 - }, - { - "epoch": 1.1176010430247718, - "grad_norm": 1.677577257156372, - "learning_rate": 9.83497487437186e-05, - "loss": 5.988, - "step": 2143 - }, - { - "epoch": 1.118122555410691, - "grad_norm": 1.5536683797836304, - "learning_rate": 9.834874371859298e-05, - "loss": 6.2869, - "step": 2144 - }, - { - "epoch": 1.11864406779661, - "grad_norm": 1.7479469776153564, - "learning_rate": 9.834773869346734e-05, - "loss": 6.1856, - "step": 2145 - }, - { - "epoch": 1.1191655801825293, - "grad_norm": 1.7326782941818237, - "learning_rate": 9.834673366834172e-05, - "loss": 6.4232, - "step": 2146 - }, - { - "epoch": 1.1196870925684486, - "grad_norm": 1.7394683361053467, - "learning_rate": 9.834572864321608e-05, - "loss": 6.1438, - "step": 2147 - }, - { - "epoch": 1.1202086049543676, - "grad_norm": 1.8489806652069092, - "learning_rate": 9.834472361809046e-05, - "loss": 5.7594, - "step": 2148 - }, - { - "epoch": 1.1207301173402868, - "grad_norm": 1.8415873050689697, - "learning_rate": 9.834371859296483e-05, - "loss": 6.0798, - "step": 2149 - }, - { - "epoch": 1.121251629726206, - "grad_norm": 1.6918288469314575, - "learning_rate": 9.834271356783919e-05, - "loss": 6.1011, - "step": 2150 - }, - { - "epoch": 1.121773142112125, - "grad_norm": 2.09799861907959, - "learning_rate": 9.834170854271357e-05, - "loss": 6.0128, - "step": 2151 - }, - { - "epoch": 1.1222946544980443, - "grad_norm": 1.820911169052124, - "learning_rate": 9.834070351758795e-05, - "loss": 6.3198, - "step": 2152 - }, - { - "epoch": 1.1228161668839636, - "grad_norm": 1.6595430374145508, - "learning_rate": 9.833969849246232e-05, - "loss": 6.1062, - "step": 2153 - }, - { - "epoch": 1.1233376792698826, - "grad_norm": 1.874584436416626, - "learning_rate": 9.833869346733669e-05, - "loss": 5.7289, - "step": 2154 - }, - { - "epoch": 1.1238591916558018, - "grad_norm": 1.949552059173584, - "learning_rate": 9.833768844221107e-05, - "loss": 5.9235, - "step": 2155 - }, - { - "epoch": 1.124380704041721, - "grad_norm": 1.8642287254333496, - "learning_rate": 9.833668341708543e-05, - "loss": 5.7948, - "step": 2156 - }, - { - "epoch": 1.12490221642764, - "grad_norm": 1.6870375871658325, - "learning_rate": 9.833567839195981e-05, - "loss": 5.8126, - "step": 2157 - }, - { - "epoch": 1.1254237288135593, - "grad_norm": 1.5605230331420898, - "learning_rate": 9.833467336683417e-05, - "loss": 6.2091, - "step": 2158 - }, - { - "epoch": 1.1259452411994784, - "grad_norm": 1.9269726276397705, - "learning_rate": 9.833366834170855e-05, - "loss": 6.1305, - "step": 2159 - }, - { - "epoch": 1.1264667535853976, - "grad_norm": 1.8271476030349731, - "learning_rate": 9.833266331658291e-05, - "loss": 5.6783, - "step": 2160 - }, - { - "epoch": 1.1269882659713168, - "grad_norm": 1.9713389873504639, - "learning_rate": 9.833165829145729e-05, - "loss": 5.9957, - "step": 2161 - }, - { - "epoch": 1.1275097783572359, - "grad_norm": 1.7722227573394775, - "learning_rate": 9.833065326633166e-05, - "loss": 5.904, - "step": 2162 - }, - { - "epoch": 1.1280312907431551, - "grad_norm": 1.75780189037323, - "learning_rate": 9.832964824120603e-05, - "loss": 5.881, - "step": 2163 - }, - { - "epoch": 1.1285528031290744, - "grad_norm": 1.7344143390655518, - "learning_rate": 9.832864321608041e-05, - "loss": 5.6713, - "step": 2164 - }, - { - "epoch": 1.1290743155149934, - "grad_norm": 1.8423362970352173, - "learning_rate": 9.832763819095478e-05, - "loss": 6.0549, - "step": 2165 - }, - { - "epoch": 1.1295958279009126, - "grad_norm": 1.6502448320388794, - "learning_rate": 9.832663316582915e-05, - "loss": 6.2476, - "step": 2166 - }, - { - "epoch": 1.1301173402868319, - "grad_norm": 1.7386568784713745, - "learning_rate": 9.832562814070352e-05, - "loss": 5.9287, - "step": 2167 - }, - { - "epoch": 1.1306388526727509, - "grad_norm": 1.5258994102478027, - "learning_rate": 9.83246231155779e-05, - "loss": 6.2101, - "step": 2168 - }, - { - "epoch": 1.1311603650586701, - "grad_norm": 1.5508569478988647, - "learning_rate": 9.832361809045226e-05, - "loss": 6.3921, - "step": 2169 - }, - { - "epoch": 1.1316818774445894, - "grad_norm": 1.5998291969299316, - "learning_rate": 9.832261306532664e-05, - "loss": 5.9298, - "step": 2170 - }, - { - "epoch": 1.1322033898305084, - "grad_norm": 1.7097409963607788, - "learning_rate": 9.8321608040201e-05, - "loss": 5.8778, - "step": 2171 - }, - { - "epoch": 1.1327249022164276, - "grad_norm": 1.778180480003357, - "learning_rate": 9.832060301507538e-05, - "loss": 5.8236, - "step": 2172 - }, - { - "epoch": 1.1332464146023469, - "grad_norm": 1.5483494997024536, - "learning_rate": 9.831959798994976e-05, - "loss": 6.2395, - "step": 2173 - }, - { - "epoch": 1.133767926988266, - "grad_norm": 1.9308470487594604, - "learning_rate": 9.831859296482414e-05, - "loss": 6.0561, - "step": 2174 - }, - { - "epoch": 1.1342894393741851, - "grad_norm": 1.6938871145248413, - "learning_rate": 9.83175879396985e-05, - "loss": 6.0515, - "step": 2175 - }, - { - "epoch": 1.1348109517601044, - "grad_norm": 1.7431280612945557, - "learning_rate": 9.831658291457288e-05, - "loss": 6.0175, - "step": 2176 - }, - { - "epoch": 1.1353324641460234, - "grad_norm": 1.915162444114685, - "learning_rate": 9.831557788944724e-05, - "loss": 5.7712, - "step": 2177 - }, - { - "epoch": 1.1358539765319426, - "grad_norm": 1.7051831483840942, - "learning_rate": 9.831457286432161e-05, - "loss": 6.3093, - "step": 2178 - }, - { - "epoch": 1.1363754889178619, - "grad_norm": 1.555277705192566, - "learning_rate": 9.831356783919598e-05, - "loss": 6.2157, - "step": 2179 - }, - { - "epoch": 1.136897001303781, - "grad_norm": 1.62564218044281, - "learning_rate": 9.831256281407035e-05, - "loss": 6.2933, - "step": 2180 - }, - { - "epoch": 1.1374185136897002, - "grad_norm": 1.742859125137329, - "learning_rate": 9.831155778894473e-05, - "loss": 5.3673, - "step": 2181 - }, - { - "epoch": 1.1379400260756194, - "grad_norm": 1.5772731304168701, - "learning_rate": 9.831055276381909e-05, - "loss": 6.1485, - "step": 2182 - }, - { - "epoch": 1.1384615384615384, - "grad_norm": 1.573353886604309, - "learning_rate": 9.830954773869347e-05, - "loss": 6.462, - "step": 2183 - }, - { - "epoch": 1.1389830508474577, - "grad_norm": 1.5485851764678955, - "learning_rate": 9.830854271356785e-05, - "loss": 6.2419, - "step": 2184 - }, - { - "epoch": 1.139504563233377, - "grad_norm": 1.6806432008743286, - "learning_rate": 9.830753768844222e-05, - "loss": 5.8934, - "step": 2185 - }, - { - "epoch": 1.140026075619296, - "grad_norm": 1.5984195470809937, - "learning_rate": 9.830653266331659e-05, - "loss": 6.374, - "step": 2186 - }, - { - "epoch": 1.1405475880052152, - "grad_norm": 2.130748748779297, - "learning_rate": 9.830552763819097e-05, - "loss": 5.4443, - "step": 2187 - }, - { - "epoch": 1.1410691003911344, - "grad_norm": 1.83894944190979, - "learning_rate": 9.830452261306533e-05, - "loss": 6.4328, - "step": 2188 - }, - { - "epoch": 1.1415906127770534, - "grad_norm": 2.2462737560272217, - "learning_rate": 9.830351758793971e-05, - "loss": 5.9004, - "step": 2189 - }, - { - "epoch": 1.1421121251629727, - "grad_norm": 1.5884034633636475, - "learning_rate": 9.830251256281407e-05, - "loss": 6.3034, - "step": 2190 - }, - { - "epoch": 1.1426336375488917, - "grad_norm": 1.7714293003082275, - "learning_rate": 9.830150753768844e-05, - "loss": 6.036, - "step": 2191 - }, - { - "epoch": 1.143155149934811, - "grad_norm": 1.5858405828475952, - "learning_rate": 9.830050251256282e-05, - "loss": 6.1191, - "step": 2192 - }, - { - "epoch": 1.1436766623207302, - "grad_norm": 1.5486083030700684, - "learning_rate": 9.829949748743719e-05, - "loss": 6.6528, - "step": 2193 - }, - { - "epoch": 1.1441981747066492, - "grad_norm": 1.5546194314956665, - "learning_rate": 9.829849246231157e-05, - "loss": 6.1703, - "step": 2194 - }, - { - "epoch": 1.1447196870925684, - "grad_norm": 1.4341551065444946, - "learning_rate": 9.829748743718594e-05, - "loss": 6.1345, - "step": 2195 - }, - { - "epoch": 1.1452411994784877, - "grad_norm": 1.745427131652832, - "learning_rate": 9.829648241206031e-05, - "loss": 5.8823, - "step": 2196 - }, - { - "epoch": 1.1457627118644067, - "grad_norm": 1.6985986232757568, - "learning_rate": 9.829547738693468e-05, - "loss": 5.9667, - "step": 2197 - }, - { - "epoch": 1.146284224250326, - "grad_norm": 1.8772536516189575, - "learning_rate": 9.829447236180906e-05, - "loss": 6.0903, - "step": 2198 - }, - { - "epoch": 1.1468057366362452, - "grad_norm": 1.6111915111541748, - "learning_rate": 9.829346733668342e-05, - "loss": 6.3498, - "step": 2199 - }, - { - "epoch": 1.1473272490221642, - "grad_norm": 1.5966535806655884, - "learning_rate": 9.82924623115578e-05, - "loss": 6.2569, - "step": 2200 - }, - { - "epoch": 1.1478487614080835, - "grad_norm": 1.5252742767333984, - "learning_rate": 9.829145728643216e-05, - "loss": 6.2356, - "step": 2201 - }, - { - "epoch": 1.1483702737940027, - "grad_norm": 1.8468446731567383, - "learning_rate": 9.829045226130654e-05, - "loss": 6.1507, - "step": 2202 - }, - { - "epoch": 1.1488917861799217, - "grad_norm": 1.9033254384994507, - "learning_rate": 9.82894472361809e-05, - "loss": 6.0196, - "step": 2203 - }, - { - "epoch": 1.149413298565841, - "grad_norm": 1.7099493741989136, - "learning_rate": 9.828844221105528e-05, - "loss": 6.2645, - "step": 2204 - }, - { - "epoch": 1.1499348109517602, - "grad_norm": 1.6157017946243286, - "learning_rate": 9.828743718592966e-05, - "loss": 6.0945, - "step": 2205 - }, - { - "epoch": 1.1504563233376792, - "grad_norm": 1.555907130241394, - "learning_rate": 9.828643216080402e-05, - "loss": 6.2589, - "step": 2206 - }, - { - "epoch": 1.1509778357235985, - "grad_norm": 1.6805888414382935, - "learning_rate": 9.82854271356784e-05, - "loss": 6.2222, - "step": 2207 - }, - { - "epoch": 1.1514993481095175, - "grad_norm": 1.697525978088379, - "learning_rate": 9.828442211055277e-05, - "loss": 5.9605, - "step": 2208 - }, - { - "epoch": 1.1520208604954367, - "grad_norm": 1.5336872339248657, - "learning_rate": 9.828341708542714e-05, - "loss": 6.3101, - "step": 2209 - }, - { - "epoch": 1.152542372881356, - "grad_norm": 1.678194284439087, - "learning_rate": 9.828241206030151e-05, - "loss": 6.0563, - "step": 2210 - }, - { - "epoch": 1.153063885267275, - "grad_norm": 1.6272337436676025, - "learning_rate": 9.828140703517589e-05, - "loss": 6.054, - "step": 2211 - }, - { - "epoch": 1.1535853976531942, - "grad_norm": 1.6122015714645386, - "learning_rate": 9.828040201005025e-05, - "loss": 6.2147, - "step": 2212 - }, - { - "epoch": 1.1541069100391135, - "grad_norm": 1.390834093093872, - "learning_rate": 9.827939698492463e-05, - "loss": 6.5119, - "step": 2213 - }, - { - "epoch": 1.1546284224250325, - "grad_norm": 2.453835964202881, - "learning_rate": 9.8278391959799e-05, - "loss": 5.8751, - "step": 2214 - }, - { - "epoch": 1.1551499348109517, - "grad_norm": 1.8139581680297852, - "learning_rate": 9.827738693467338e-05, - "loss": 6.4075, - "step": 2215 - }, - { - "epoch": 1.155671447196871, - "grad_norm": 1.6349258422851562, - "learning_rate": 9.827638190954775e-05, - "loss": 6.074, - "step": 2216 - }, - { - "epoch": 1.15619295958279, - "grad_norm": 1.4726719856262207, - "learning_rate": 9.827537688442211e-05, - "loss": 6.5689, - "step": 2217 - }, - { - "epoch": 1.1567144719687092, - "grad_norm": 1.6071897745132446, - "learning_rate": 9.827437185929649e-05, - "loss": 6.4558, - "step": 2218 - }, - { - "epoch": 1.1572359843546285, - "grad_norm": 1.5450910329818726, - "learning_rate": 9.827336683417085e-05, - "loss": 6.2967, - "step": 2219 - }, - { - "epoch": 1.1577574967405475, - "grad_norm": 1.5636141300201416, - "learning_rate": 9.827236180904523e-05, - "loss": 6.094, - "step": 2220 - }, - { - "epoch": 1.1582790091264668, - "grad_norm": 1.7181307077407837, - "learning_rate": 9.82713567839196e-05, - "loss": 5.8776, - "step": 2221 - }, - { - "epoch": 1.158800521512386, - "grad_norm": 1.8026118278503418, - "learning_rate": 9.827035175879397e-05, - "loss": 5.842, - "step": 2222 - }, - { - "epoch": 1.159322033898305, - "grad_norm": 1.553421974182129, - "learning_rate": 9.826934673366834e-05, - "loss": 6.4097, - "step": 2223 - }, - { - "epoch": 1.1598435462842243, - "grad_norm": 1.5685358047485352, - "learning_rate": 9.826834170854272e-05, - "loss": 6.1269, - "step": 2224 - }, - { - "epoch": 1.1603650586701435, - "grad_norm": 1.5738389492034912, - "learning_rate": 9.82673366834171e-05, - "loss": 6.2047, - "step": 2225 - }, - { - "epoch": 1.1608865710560625, - "grad_norm": 1.6662055253982544, - "learning_rate": 9.826633165829147e-05, - "loss": 5.367, - "step": 2226 - }, - { - "epoch": 1.1614080834419818, - "grad_norm": 1.5968204736709595, - "learning_rate": 9.826532663316584e-05, - "loss": 6.1703, - "step": 2227 - }, - { - "epoch": 1.161929595827901, - "grad_norm": 1.5658279657363892, - "learning_rate": 9.826432160804021e-05, - "loss": 6.1592, - "step": 2228 - }, - { - "epoch": 1.16245110821382, - "grad_norm": 1.8439615964889526, - "learning_rate": 9.826331658291458e-05, - "loss": 5.7523, - "step": 2229 - }, - { - "epoch": 1.1629726205997393, - "grad_norm": 1.8611191511154175, - "learning_rate": 9.826231155778894e-05, - "loss": 6.0335, - "step": 2230 - }, - { - "epoch": 1.1634941329856585, - "grad_norm": 1.8219220638275146, - "learning_rate": 9.826130653266332e-05, - "loss": 5.6515, - "step": 2231 - }, - { - "epoch": 1.1640156453715775, - "grad_norm": 1.6783543825149536, - "learning_rate": 9.826030150753768e-05, - "loss": 6.0214, - "step": 2232 - }, - { - "epoch": 1.1645371577574968, - "grad_norm": 1.8528460264205933, - "learning_rate": 9.825929648241206e-05, - "loss": 5.8591, - "step": 2233 - }, - { - "epoch": 1.165058670143416, - "grad_norm": 1.5973658561706543, - "learning_rate": 9.825829145728643e-05, - "loss": 6.1476, - "step": 2234 - }, - { - "epoch": 1.165580182529335, - "grad_norm": 1.5532174110412598, - "learning_rate": 9.82572864321608e-05, - "loss": 5.6784, - "step": 2235 - }, - { - "epoch": 1.1661016949152543, - "grad_norm": 1.471919059753418, - "learning_rate": 9.825628140703518e-05, - "loss": 6.4345, - "step": 2236 - }, - { - "epoch": 1.1666232073011735, - "grad_norm": 1.7118595838546753, - "learning_rate": 9.825527638190956e-05, - "loss": 5.7913, - "step": 2237 - }, - { - "epoch": 1.1671447196870925, - "grad_norm": 1.6788328886032104, - "learning_rate": 9.825427135678392e-05, - "loss": 5.7096, - "step": 2238 - }, - { - "epoch": 1.1676662320730118, - "grad_norm": 1.8063212633132935, - "learning_rate": 9.82532663316583e-05, - "loss": 5.7509, - "step": 2239 - }, - { - "epoch": 1.1681877444589308, - "grad_norm": 1.606229305267334, - "learning_rate": 9.825226130653267e-05, - "loss": 6.0547, - "step": 2240 - }, - { - "epoch": 1.16870925684485, - "grad_norm": 1.6397027969360352, - "learning_rate": 9.825125628140704e-05, - "loss": 6.3072, - "step": 2241 - }, - { - "epoch": 1.1692307692307693, - "grad_norm": 1.6101173162460327, - "learning_rate": 9.825025125628141e-05, - "loss": 5.8815, - "step": 2242 - }, - { - "epoch": 1.1697522816166883, - "grad_norm": 1.7230368852615356, - "learning_rate": 9.824924623115577e-05, - "loss": 6.0834, - "step": 2243 - }, - { - "epoch": 1.1702737940026076, - "grad_norm": 1.6329448223114014, - "learning_rate": 9.824824120603015e-05, - "loss": 6.1478, - "step": 2244 - }, - { - "epoch": 1.1707953063885268, - "grad_norm": 1.643519401550293, - "learning_rate": 9.824723618090453e-05, - "loss": 6.2166, - "step": 2245 - }, - { - "epoch": 1.1713168187744458, - "grad_norm": 1.6363450288772583, - "learning_rate": 9.82462311557789e-05, - "loss": 6.4417, - "step": 2246 - }, - { - "epoch": 1.171838331160365, - "grad_norm": 1.8582649230957031, - "learning_rate": 9.824522613065327e-05, - "loss": 5.889, - "step": 2247 - }, - { - "epoch": 1.1723598435462843, - "grad_norm": 1.55232834815979, - "learning_rate": 9.824422110552765e-05, - "loss": 6.1526, - "step": 2248 - }, - { - "epoch": 1.1728813559322033, - "grad_norm": 1.8015625476837158, - "learning_rate": 9.824321608040201e-05, - "loss": 6.0543, - "step": 2249 - }, - { - "epoch": 1.1734028683181226, - "grad_norm": 1.7950050830841064, - "learning_rate": 9.824221105527639e-05, - "loss": 6.2373, - "step": 2250 - }, - { - "epoch": 1.1739243807040418, - "grad_norm": 1.7826024293899536, - "learning_rate": 9.824120603015075e-05, - "loss": 6.3744, - "step": 2251 - }, - { - "epoch": 1.1744458930899608, - "grad_norm": 1.687927484512329, - "learning_rate": 9.824020100502513e-05, - "loss": 5.8684, - "step": 2252 - }, - { - "epoch": 1.17496740547588, - "grad_norm": 2.5282304286956787, - "learning_rate": 9.82391959798995e-05, - "loss": 5.8711, - "step": 2253 - }, - { - "epoch": 1.175488917861799, - "grad_norm": 1.7576898336410522, - "learning_rate": 9.823819095477387e-05, - "loss": 6.3262, - "step": 2254 - }, - { - "epoch": 1.1760104302477183, - "grad_norm": 1.9101850986480713, - "learning_rate": 9.823718592964824e-05, - "loss": 6.0912, - "step": 2255 - }, - { - "epoch": 1.1765319426336376, - "grad_norm": 1.9231549501419067, - "learning_rate": 9.823618090452262e-05, - "loss": 5.0419, - "step": 2256 - }, - { - "epoch": 1.1770534550195566, - "grad_norm": 1.8675200939178467, - "learning_rate": 9.8235175879397e-05, - "loss": 6.3504, - "step": 2257 - }, - { - "epoch": 1.1775749674054758, - "grad_norm": 1.917932152748108, - "learning_rate": 9.823417085427136e-05, - "loss": 5.8987, - "step": 2258 - }, - { - "epoch": 1.178096479791395, - "grad_norm": 1.615600824356079, - "learning_rate": 9.823316582914574e-05, - "loss": 6.1797, - "step": 2259 - }, - { - "epoch": 1.1786179921773141, - "grad_norm": 1.7991294860839844, - "learning_rate": 9.82321608040201e-05, - "loss": 6.0582, - "step": 2260 - }, - { - "epoch": 1.1791395045632334, - "grad_norm": 1.6269344091415405, - "learning_rate": 9.823115577889448e-05, - "loss": 5.973, - "step": 2261 - }, - { - "epoch": 1.1796610169491526, - "grad_norm": 1.6376138925552368, - "learning_rate": 9.823015075376884e-05, - "loss": 6.1668, - "step": 2262 - }, - { - "epoch": 1.1801825293350716, - "grad_norm": 2.362882614135742, - "learning_rate": 9.822914572864322e-05, - "loss": 5.8695, - "step": 2263 - }, - { - "epoch": 1.1807040417209909, - "grad_norm": 2.2332370281219482, - "learning_rate": 9.822814070351759e-05, - "loss": 5.3695, - "step": 2264 - }, - { - "epoch": 1.18122555410691, - "grad_norm": 2.100804090499878, - "learning_rate": 9.822713567839196e-05, - "loss": 5.5639, - "step": 2265 - }, - { - "epoch": 1.1817470664928291, - "grad_norm": 1.9866795539855957, - "learning_rate": 9.822613065326634e-05, - "loss": 5.975, - "step": 2266 - }, - { - "epoch": 1.1822685788787484, - "grad_norm": 1.7337366342544556, - "learning_rate": 9.822512562814072e-05, - "loss": 5.9724, - "step": 2267 - }, - { - "epoch": 1.1827900912646676, - "grad_norm": 1.8712918758392334, - "learning_rate": 9.822412060301508e-05, - "loss": 5.5507, - "step": 2268 - }, - { - "epoch": 1.1833116036505866, - "grad_norm": 1.8291009664535522, - "learning_rate": 9.822311557788946e-05, - "loss": 5.9644, - "step": 2269 - }, - { - "epoch": 1.1838331160365059, - "grad_norm": 1.9850845336914062, - "learning_rate": 9.822211055276383e-05, - "loss": 6.1363, - "step": 2270 - }, - { - "epoch": 1.1843546284224251, - "grad_norm": 1.5565494298934937, - "learning_rate": 9.822110552763819e-05, - "loss": 6.2219, - "step": 2271 - }, - { - "epoch": 1.1848761408083441, - "grad_norm": 1.5349458456039429, - "learning_rate": 9.822010050251257e-05, - "loss": 6.291, - "step": 2272 - }, - { - "epoch": 1.1853976531942634, - "grad_norm": 1.640566110610962, - "learning_rate": 9.821909547738693e-05, - "loss": 6.1211, - "step": 2273 - }, - { - "epoch": 1.1859191655801826, - "grad_norm": 1.5279864072799683, - "learning_rate": 9.821809045226131e-05, - "loss": 6.3587, - "step": 2274 - }, - { - "epoch": 1.1864406779661016, - "grad_norm": 1.7557909488677979, - "learning_rate": 9.821708542713567e-05, - "loss": 5.8717, - "step": 2275 - }, - { - "epoch": 1.1869621903520209, - "grad_norm": 1.6266311407089233, - "learning_rate": 9.821608040201005e-05, - "loss": 6.334, - "step": 2276 - }, - { - "epoch": 1.1874837027379401, - "grad_norm": 2.151165246963501, - "learning_rate": 9.821507537688443e-05, - "loss": 6.0189, - "step": 2277 - }, - { - "epoch": 1.1880052151238591, - "grad_norm": 1.840430736541748, - "learning_rate": 9.821407035175881e-05, - "loss": 6.2953, - "step": 2278 - }, - { - "epoch": 1.1885267275097784, - "grad_norm": 1.5451712608337402, - "learning_rate": 9.821306532663317e-05, - "loss": 6.0759, - "step": 2279 - }, - { - "epoch": 1.1890482398956976, - "grad_norm": 1.6075654029846191, - "learning_rate": 9.821206030150755e-05, - "loss": 5.9847, - "step": 2280 - }, - { - "epoch": 1.1895697522816167, - "grad_norm": 1.6984021663665771, - "learning_rate": 9.821105527638191e-05, - "loss": 6.363, - "step": 2281 - }, - { - "epoch": 1.190091264667536, - "grad_norm": 1.7678803205490112, - "learning_rate": 9.821005025125629e-05, - "loss": 5.2538, - "step": 2282 - }, - { - "epoch": 1.1906127770534551, - "grad_norm": 1.713875412940979, - "learning_rate": 9.820904522613066e-05, - "loss": 5.8226, - "step": 2283 - }, - { - "epoch": 1.1911342894393742, - "grad_norm": 1.529335379600525, - "learning_rate": 9.820804020100502e-05, - "loss": 6.3554, - "step": 2284 - }, - { - "epoch": 1.1916558018252934, - "grad_norm": 1.929689884185791, - "learning_rate": 9.82070351758794e-05, - "loss": 5.81, - "step": 2285 - }, - { - "epoch": 1.1921773142112124, - "grad_norm": 1.424947738647461, - "learning_rate": 9.820603015075378e-05, - "loss": 6.2951, - "step": 2286 - }, - { - "epoch": 1.1926988265971317, - "grad_norm": 1.5082155466079712, - "learning_rate": 9.820502512562815e-05, - "loss": 6.3561, - "step": 2287 - }, - { - "epoch": 1.193220338983051, - "grad_norm": 1.5439660549163818, - "learning_rate": 9.820402010050252e-05, - "loss": 6.1057, - "step": 2288 - }, - { - "epoch": 1.19374185136897, - "grad_norm": 1.7151106595993042, - "learning_rate": 9.82030150753769e-05, - "loss": 6.0657, - "step": 2289 - }, - { - "epoch": 1.1942633637548892, - "grad_norm": 1.5136423110961914, - "learning_rate": 9.820201005025126e-05, - "loss": 6.3777, - "step": 2290 - }, - { - "epoch": 1.1947848761408084, - "grad_norm": 1.7190476655960083, - "learning_rate": 9.820100502512564e-05, - "loss": 6.0312, - "step": 2291 - }, - { - "epoch": 1.1953063885267274, - "grad_norm": 1.7018376588821411, - "learning_rate": 9.82e-05, - "loss": 5.7159, - "step": 2292 - }, - { - "epoch": 1.1958279009126467, - "grad_norm": 1.499851942062378, - "learning_rate": 9.819899497487438e-05, - "loss": 6.5096, - "step": 2293 - }, - { - "epoch": 1.196349413298566, - "grad_norm": 2.2462544441223145, - "learning_rate": 9.819798994974874e-05, - "loss": 5.727, - "step": 2294 - }, - { - "epoch": 1.196870925684485, - "grad_norm": 1.4919116497039795, - "learning_rate": 9.819698492462312e-05, - "loss": 6.4189, - "step": 2295 - }, - { - "epoch": 1.1973924380704042, - "grad_norm": 1.8870748281478882, - "learning_rate": 9.819597989949749e-05, - "loss": 5.6393, - "step": 2296 - }, - { - "epoch": 1.1979139504563234, - "grad_norm": 2.049379587173462, - "learning_rate": 9.819497487437186e-05, - "loss": 5.533, - "step": 2297 - }, - { - "epoch": 1.1984354628422424, - "grad_norm": 1.7870759963989258, - "learning_rate": 9.819396984924624e-05, - "loss": 5.9648, - "step": 2298 - }, - { - "epoch": 1.1989569752281617, - "grad_norm": 2.0743887424468994, - "learning_rate": 9.81929648241206e-05, - "loss": 6.079, - "step": 2299 - }, - { - "epoch": 1.1994784876140807, - "grad_norm": 1.709718108177185, - "learning_rate": 9.819195979899498e-05, - "loss": 5.927, - "step": 2300 - }, - { - "epoch": 1.2, - "grad_norm": 1.67744779586792, - "learning_rate": 9.819095477386935e-05, - "loss": 5.7817, - "step": 2301 - }, - { - "epoch": 1.2005215123859192, - "grad_norm": 1.8184869289398193, - "learning_rate": 9.818994974874373e-05, - "loss": 5.8901, - "step": 2302 - }, - { - "epoch": 1.2010430247718382, - "grad_norm": 1.5468182563781738, - "learning_rate": 9.818894472361809e-05, - "loss": 5.96, - "step": 2303 - }, - { - "epoch": 1.2015645371577575, - "grad_norm": 2.268282651901245, - "learning_rate": 9.818793969849247e-05, - "loss": 5.69, - "step": 2304 - }, - { - "epoch": 1.2020860495436767, - "grad_norm": 1.4362901449203491, - "learning_rate": 9.818693467336683e-05, - "loss": 6.3722, - "step": 2305 - }, - { - "epoch": 1.2026075619295957, - "grad_norm": 1.658185601234436, - "learning_rate": 9.818592964824121e-05, - "loss": 5.6416, - "step": 2306 - }, - { - "epoch": 1.203129074315515, - "grad_norm": 1.4815093278884888, - "learning_rate": 9.818492462311559e-05, - "loss": 6.0633, - "step": 2307 - }, - { - "epoch": 1.2036505867014342, - "grad_norm": 1.7652782201766968, - "learning_rate": 9.818391959798997e-05, - "loss": 6.3984, - "step": 2308 - }, - { - "epoch": 1.2041720990873532, - "grad_norm": 1.8604273796081543, - "learning_rate": 9.818291457286433e-05, - "loss": 5.2822, - "step": 2309 - }, - { - "epoch": 1.2046936114732725, - "grad_norm": 1.6468194723129272, - "learning_rate": 9.81819095477387e-05, - "loss": 5.9139, - "step": 2310 - }, - { - "epoch": 1.2052151238591917, - "grad_norm": 1.622148036956787, - "learning_rate": 9.818090452261307e-05, - "loss": 6.2985, - "step": 2311 - }, - { - "epoch": 1.2057366362451107, - "grad_norm": 1.872686743736267, - "learning_rate": 9.817989949748744e-05, - "loss": 6.0084, - "step": 2312 - }, - { - "epoch": 1.20625814863103, - "grad_norm": 1.625709891319275, - "learning_rate": 9.817889447236181e-05, - "loss": 6.1685, - "step": 2313 - }, - { - "epoch": 1.2067796610169492, - "grad_norm": 1.7757817506790161, - "learning_rate": 9.817788944723618e-05, - "loss": 5.9205, - "step": 2314 - }, - { - "epoch": 1.2073011734028682, - "grad_norm": 1.7141445875167847, - "learning_rate": 9.817688442211056e-05, - "loss": 5.8054, - "step": 2315 - }, - { - "epoch": 1.2078226857887875, - "grad_norm": 1.5678503513336182, - "learning_rate": 9.817587939698492e-05, - "loss": 6.079, - "step": 2316 - }, - { - "epoch": 1.2083441981747067, - "grad_norm": 1.7246036529541016, - "learning_rate": 9.81748743718593e-05, - "loss": 6.3806, - "step": 2317 - }, - { - "epoch": 1.2088657105606258, - "grad_norm": 2.032944440841675, - "learning_rate": 9.817386934673368e-05, - "loss": 6.0508, - "step": 2318 - }, - { - "epoch": 1.209387222946545, - "grad_norm": 1.5565643310546875, - "learning_rate": 9.817286432160805e-05, - "loss": 6.3019, - "step": 2319 - }, - { - "epoch": 1.2099087353324642, - "grad_norm": 2.0052428245544434, - "learning_rate": 9.817185929648242e-05, - "loss": 6.4018, - "step": 2320 - }, - { - "epoch": 1.2104302477183833, - "grad_norm": 1.7622815370559692, - "learning_rate": 9.81708542713568e-05, - "loss": 5.9461, - "step": 2321 - }, - { - "epoch": 1.2109517601043025, - "grad_norm": 1.7361843585968018, - "learning_rate": 9.816984924623116e-05, - "loss": 6.2536, - "step": 2322 - }, - { - "epoch": 1.2114732724902217, - "grad_norm": 1.6813410520553589, - "learning_rate": 9.816884422110552e-05, - "loss": 5.9063, - "step": 2323 - }, - { - "epoch": 1.2119947848761408, - "grad_norm": 1.498180866241455, - "learning_rate": 9.81678391959799e-05, - "loss": 6.2711, - "step": 2324 - }, - { - "epoch": 1.21251629726206, - "grad_norm": 1.8266832828521729, - "learning_rate": 9.816683417085427e-05, - "loss": 5.0716, - "step": 2325 - }, - { - "epoch": 1.2130378096479792, - "grad_norm": 1.962540864944458, - "learning_rate": 9.816582914572864e-05, - "loss": 6.0388, - "step": 2326 - }, - { - "epoch": 1.2135593220338983, - "grad_norm": 2.35485577583313, - "learning_rate": 9.816482412060302e-05, - "loss": 5.7462, - "step": 2327 - }, - { - "epoch": 1.2140808344198175, - "grad_norm": 2.106419801712036, - "learning_rate": 9.81638190954774e-05, - "loss": 5.8976, - "step": 2328 - }, - { - "epoch": 1.2146023468057368, - "grad_norm": 1.767941951751709, - "learning_rate": 9.816281407035176e-05, - "loss": 5.8703, - "step": 2329 - }, - { - "epoch": 1.2151238591916558, - "grad_norm": 1.8493388891220093, - "learning_rate": 9.816180904522614e-05, - "loss": 5.8767, - "step": 2330 - }, - { - "epoch": 1.215645371577575, - "grad_norm": 1.4994268417358398, - "learning_rate": 9.81608040201005e-05, - "loss": 6.0914, - "step": 2331 - }, - { - "epoch": 1.216166883963494, - "grad_norm": 1.5960980653762817, - "learning_rate": 9.815979899497488e-05, - "loss": 5.801, - "step": 2332 - }, - { - "epoch": 1.2166883963494133, - "grad_norm": 1.6593005657196045, - "learning_rate": 9.815879396984925e-05, - "loss": 6.13, - "step": 2333 - }, - { - "epoch": 1.2172099087353325, - "grad_norm": 1.76396906375885, - "learning_rate": 9.815778894472363e-05, - "loss": 6.12, - "step": 2334 - }, - { - "epoch": 1.2177314211212515, - "grad_norm": 1.8069300651550293, - "learning_rate": 9.815678391959799e-05, - "loss": 5.8941, - "step": 2335 - }, - { - "epoch": 1.2182529335071708, - "grad_norm": 1.8025990724563599, - "learning_rate": 9.815577889447236e-05, - "loss": 5.9834, - "step": 2336 - }, - { - "epoch": 1.21877444589309, - "grad_norm": 1.6424545049667358, - "learning_rate": 9.815477386934673e-05, - "loss": 5.8012, - "step": 2337 - }, - { - "epoch": 1.219295958279009, - "grad_norm": 1.5494656562805176, - "learning_rate": 9.815376884422111e-05, - "loss": 6.2522, - "step": 2338 - }, - { - "epoch": 1.2198174706649283, - "grad_norm": 1.567967414855957, - "learning_rate": 9.815276381909549e-05, - "loss": 6.3534, - "step": 2339 - }, - { - "epoch": 1.2203389830508475, - "grad_norm": 1.6258360147476196, - "learning_rate": 9.815175879396985e-05, - "loss": 6.4877, - "step": 2340 - }, - { - "epoch": 1.2208604954367666, - "grad_norm": 1.8733817338943481, - "learning_rate": 9.815075376884423e-05, - "loss": 5.8444, - "step": 2341 - }, - { - "epoch": 1.2213820078226858, - "grad_norm": 1.7575840950012207, - "learning_rate": 9.81497487437186e-05, - "loss": 5.7722, - "step": 2342 - }, - { - "epoch": 1.221903520208605, - "grad_norm": 2.2345924377441406, - "learning_rate": 9.814874371859297e-05, - "loss": 5.6587, - "step": 2343 - }, - { - "epoch": 1.222425032594524, - "grad_norm": 2.177804946899414, - "learning_rate": 9.814773869346734e-05, - "loss": 5.6759, - "step": 2344 - }, - { - "epoch": 1.2229465449804433, - "grad_norm": 1.8077000379562378, - "learning_rate": 9.814673366834171e-05, - "loss": 5.6903, - "step": 2345 - }, - { - "epoch": 1.2234680573663623, - "grad_norm": 1.6206762790679932, - "learning_rate": 9.814572864321608e-05, - "loss": 5.9817, - "step": 2346 - }, - { - "epoch": 1.2239895697522816, - "grad_norm": 1.6954774856567383, - "learning_rate": 9.814472361809046e-05, - "loss": 6.5568, - "step": 2347 - }, - { - "epoch": 1.2245110821382008, - "grad_norm": 1.815686821937561, - "learning_rate": 9.814371859296483e-05, - "loss": 6.1389, - "step": 2348 - }, - { - "epoch": 1.2250325945241198, - "grad_norm": 1.7051798105239868, - "learning_rate": 9.814271356783921e-05, - "loss": 6.1791, - "step": 2349 - }, - { - "epoch": 1.225554106910039, - "grad_norm": 1.7045036554336548, - "learning_rate": 9.814170854271358e-05, - "loss": 6.0514, - "step": 2350 - }, - { - "epoch": 1.2260756192959583, - "grad_norm": 1.4944511651992798, - "learning_rate": 9.814070351758794e-05, - "loss": 6.2937, - "step": 2351 - }, - { - "epoch": 1.2265971316818773, - "grad_norm": 1.5316293239593506, - "learning_rate": 9.813969849246232e-05, - "loss": 6.1102, - "step": 2352 - }, - { - "epoch": 1.2271186440677966, - "grad_norm": 1.761322021484375, - "learning_rate": 9.813869346733668e-05, - "loss": 6.0906, - "step": 2353 - }, - { - "epoch": 1.2276401564537158, - "grad_norm": 1.932059407234192, - "learning_rate": 9.813768844221106e-05, - "loss": 5.2294, - "step": 2354 - }, - { - "epoch": 1.2281616688396348, - "grad_norm": 1.5344874858856201, - "learning_rate": 9.813668341708543e-05, - "loss": 6.3834, - "step": 2355 - }, - { - "epoch": 1.228683181225554, - "grad_norm": 1.8602535724639893, - "learning_rate": 9.81356783919598e-05, - "loss": 6.1281, - "step": 2356 - }, - { - "epoch": 1.2292046936114733, - "grad_norm": 2.0995049476623535, - "learning_rate": 9.813467336683417e-05, - "loss": 5.8005, - "step": 2357 - }, - { - "epoch": 1.2297262059973924, - "grad_norm": 1.6905689239501953, - "learning_rate": 9.813366834170855e-05, - "loss": 5.8638, - "step": 2358 - }, - { - "epoch": 1.2302477183833116, - "grad_norm": 1.4666197299957275, - "learning_rate": 9.813266331658292e-05, - "loss": 5.8753, - "step": 2359 - }, - { - "epoch": 1.2307692307692308, - "grad_norm": 1.5940220355987549, - "learning_rate": 9.81316582914573e-05, - "loss": 6.2041, - "step": 2360 - }, - { - "epoch": 1.2312907431551499, - "grad_norm": 1.532310128211975, - "learning_rate": 9.813065326633167e-05, - "loss": 5.5097, - "step": 2361 - }, - { - "epoch": 1.231812255541069, - "grad_norm": 1.5657151937484741, - "learning_rate": 9.812964824120604e-05, - "loss": 6.1737, - "step": 2362 - }, - { - "epoch": 1.2323337679269883, - "grad_norm": 1.6417806148529053, - "learning_rate": 9.812864321608041e-05, - "loss": 6.1769, - "step": 2363 - }, - { - "epoch": 1.2328552803129074, - "grad_norm": 2.9208431243896484, - "learning_rate": 9.812763819095477e-05, - "loss": 6.0494, - "step": 2364 - }, - { - "epoch": 1.2333767926988266, - "grad_norm": 1.9619948863983154, - "learning_rate": 9.812663316582915e-05, - "loss": 5.8946, - "step": 2365 - }, - { - "epoch": 1.2338983050847459, - "grad_norm": 1.982285976409912, - "learning_rate": 9.812562814070351e-05, - "loss": 5.5907, - "step": 2366 - }, - { - "epoch": 1.2344198174706649, - "grad_norm": 1.9110360145568848, - "learning_rate": 9.812462311557789e-05, - "loss": 6.056, - "step": 2367 - }, - { - "epoch": 1.2349413298565841, - "grad_norm": 1.975943684577942, - "learning_rate": 9.812361809045227e-05, - "loss": 6.2207, - "step": 2368 - }, - { - "epoch": 1.2354628422425034, - "grad_norm": 1.454530954360962, - "learning_rate": 9.812261306532665e-05, - "loss": 5.6655, - "step": 2369 - }, - { - "epoch": 1.2359843546284224, - "grad_norm": 1.6475328207015991, - "learning_rate": 9.812160804020101e-05, - "loss": 6.2606, - "step": 2370 - }, - { - "epoch": 1.2365058670143416, - "grad_norm": 1.6005642414093018, - "learning_rate": 9.812060301507539e-05, - "loss": 6.033, - "step": 2371 - }, - { - "epoch": 1.2370273794002609, - "grad_norm": 1.6820613145828247, - "learning_rate": 9.811959798994975e-05, - "loss": 6.1884, - "step": 2372 - }, - { - "epoch": 1.2375488917861799, - "grad_norm": 1.7590827941894531, - "learning_rate": 9.811859296482413e-05, - "loss": 6.0102, - "step": 2373 - }, - { - "epoch": 1.2380704041720991, - "grad_norm": 1.5519553422927856, - "learning_rate": 9.81175879396985e-05, - "loss": 6.2487, - "step": 2374 - }, - { - "epoch": 1.2385919165580184, - "grad_norm": 1.5279432535171509, - "learning_rate": 9.811658291457287e-05, - "loss": 5.5374, - "step": 2375 - }, - { - "epoch": 1.2391134289439374, - "grad_norm": 1.8449634313583374, - "learning_rate": 9.811557788944724e-05, - "loss": 5.9623, - "step": 2376 - }, - { - "epoch": 1.2396349413298566, - "grad_norm": 2.171844720840454, - "learning_rate": 9.81145728643216e-05, - "loss": 5.9813, - "step": 2377 - }, - { - "epoch": 1.2401564537157757, - "grad_norm": 1.9095181226730347, - "learning_rate": 9.811356783919598e-05, - "loss": 5.5188, - "step": 2378 - }, - { - "epoch": 1.240677966101695, - "grad_norm": 2.523716688156128, - "learning_rate": 9.811256281407036e-05, - "loss": 5.7852, - "step": 2379 - }, - { - "epoch": 1.2411994784876141, - "grad_norm": 2.035066843032837, - "learning_rate": 9.811155778894474e-05, - "loss": 5.7163, - "step": 2380 - }, - { - "epoch": 1.2417209908735332, - "grad_norm": 1.975915789604187, - "learning_rate": 9.81105527638191e-05, - "loss": 6.0897, - "step": 2381 - }, - { - "epoch": 1.2422425032594524, - "grad_norm": 1.7738310098648071, - "learning_rate": 9.810954773869348e-05, - "loss": 6.2117, - "step": 2382 - }, - { - "epoch": 1.2427640156453716, - "grad_norm": 1.6086384057998657, - "learning_rate": 9.810854271356784e-05, - "loss": 6.1091, - "step": 2383 - }, - { - "epoch": 1.2432855280312907, - "grad_norm": 2.0317318439483643, - "learning_rate": 9.810753768844222e-05, - "loss": 5.345, - "step": 2384 - }, - { - "epoch": 1.24380704041721, - "grad_norm": 1.6118898391723633, - "learning_rate": 9.810653266331658e-05, - "loss": 6.2362, - "step": 2385 - }, - { - "epoch": 1.2443285528031292, - "grad_norm": 1.6175588369369507, - "learning_rate": 9.810552763819096e-05, - "loss": 6.144, - "step": 2386 - }, - { - "epoch": 1.2448500651890482, - "grad_norm": 1.6560720205307007, - "learning_rate": 9.810452261306533e-05, - "loss": 6.0237, - "step": 2387 - }, - { - "epoch": 1.2453715775749674, - "grad_norm": 1.6471009254455566, - "learning_rate": 9.81035175879397e-05, - "loss": 5.5321, - "step": 2388 - }, - { - "epoch": 1.2458930899608867, - "grad_norm": 2.081552505493164, - "learning_rate": 9.810251256281408e-05, - "loss": 5.8745, - "step": 2389 - }, - { - "epoch": 1.2464146023468057, - "grad_norm": 1.6420671939849854, - "learning_rate": 9.810150753768845e-05, - "loss": 6.3909, - "step": 2390 - }, - { - "epoch": 1.246936114732725, - "grad_norm": 1.7266751527786255, - "learning_rate": 9.810050251256282e-05, - "loss": 6.219, - "step": 2391 - }, - { - "epoch": 1.2474576271186442, - "grad_norm": 1.7193596363067627, - "learning_rate": 9.809949748743719e-05, - "loss": 6.1863, - "step": 2392 - }, - { - "epoch": 1.2479791395045632, - "grad_norm": 1.615049958229065, - "learning_rate": 9.809849246231157e-05, - "loss": 6.1865, - "step": 2393 - }, - { - "epoch": 1.2485006518904824, - "grad_norm": 1.4636505842208862, - "learning_rate": 9.809748743718593e-05, - "loss": 6.1721, - "step": 2394 - }, - { - "epoch": 1.2490221642764014, - "grad_norm": 1.4940389394760132, - "learning_rate": 9.809648241206031e-05, - "loss": 6.2341, - "step": 2395 - }, - { - "epoch": 1.2495436766623207, - "grad_norm": 1.5442814826965332, - "learning_rate": 9.809547738693467e-05, - "loss": 6.3287, - "step": 2396 - }, - { - "epoch": 1.25006518904824, - "grad_norm": 1.662725567817688, - "learning_rate": 9.809447236180905e-05, - "loss": 6.0666, - "step": 2397 - }, - { - "epoch": 1.250586701434159, - "grad_norm": 1.5695070028305054, - "learning_rate": 9.809346733668341e-05, - "loss": 5.967, - "step": 2398 - }, - { - "epoch": 1.2511082138200782, - "grad_norm": 1.7494882345199585, - "learning_rate": 9.809246231155779e-05, - "loss": 5.7404, - "step": 2399 - }, - { - "epoch": 1.2516297262059974, - "grad_norm": 1.7852306365966797, - "learning_rate": 9.809145728643217e-05, - "loss": 5.748, - "step": 2400 - }, - { - "epoch": 1.2521512385919165, - "grad_norm": 1.620023488998413, - "learning_rate": 9.809045226130655e-05, - "loss": 5.7975, - "step": 2401 - }, - { - "epoch": 1.2526727509778357, - "grad_norm": 1.6267998218536377, - "learning_rate": 9.808944723618091e-05, - "loss": 6.3073, - "step": 2402 - }, - { - "epoch": 1.253194263363755, - "grad_norm": 1.6103161573410034, - "learning_rate": 9.808844221105528e-05, - "loss": 6.3882, - "step": 2403 - }, - { - "epoch": 1.253715775749674, - "grad_norm": 1.8769530057907104, - "learning_rate": 9.808743718592965e-05, - "loss": 6.2069, - "step": 2404 - }, - { - "epoch": 1.2542372881355932, - "grad_norm": 1.8078742027282715, - "learning_rate": 9.808643216080402e-05, - "loss": 6.1111, - "step": 2405 - }, - { - "epoch": 1.2547588005215125, - "grad_norm": 1.5848397016525269, - "learning_rate": 9.80854271356784e-05, - "loss": 6.0275, - "step": 2406 - }, - { - "epoch": 1.2552803129074315, - "grad_norm": 1.5705652236938477, - "learning_rate": 9.808442211055276e-05, - "loss": 6.0868, - "step": 2407 - }, - { - "epoch": 1.2558018252933507, - "grad_norm": 1.5766164064407349, - "learning_rate": 9.808341708542714e-05, - "loss": 6.3224, - "step": 2408 - }, - { - "epoch": 1.25632333767927, - "grad_norm": 1.7578343152999878, - "learning_rate": 9.80824120603015e-05, - "loss": 6.0232, - "step": 2409 - }, - { - "epoch": 1.256844850065189, - "grad_norm": 1.6132627725601196, - "learning_rate": 9.808140703517588e-05, - "loss": 6.3296, - "step": 2410 - }, - { - "epoch": 1.2573663624511082, - "grad_norm": 1.480827808380127, - "learning_rate": 9.808040201005026e-05, - "loss": 5.3716, - "step": 2411 - }, - { - "epoch": 1.2578878748370275, - "grad_norm": 1.8271872997283936, - "learning_rate": 9.807939698492464e-05, - "loss": 6.3604, - "step": 2412 - }, - { - "epoch": 1.2584093872229465, - "grad_norm": 1.5612602233886719, - "learning_rate": 9.8078391959799e-05, - "loss": 6.0017, - "step": 2413 - }, - { - "epoch": 1.2589308996088657, - "grad_norm": 1.9115878343582153, - "learning_rate": 9.807738693467338e-05, - "loss": 6.1742, - "step": 2414 - }, - { - "epoch": 1.259452411994785, - "grad_norm": 2.0935845375061035, - "learning_rate": 9.807638190954774e-05, - "loss": 6.3859, - "step": 2415 - }, - { - "epoch": 1.259973924380704, - "grad_norm": 1.5691924095153809, - "learning_rate": 9.807537688442211e-05, - "loss": 6.0657, - "step": 2416 - }, - { - "epoch": 1.2604954367666232, - "grad_norm": 1.652215838432312, - "learning_rate": 9.807437185929648e-05, - "loss": 5.5103, - "step": 2417 - }, - { - "epoch": 1.2610169491525425, - "grad_norm": 1.9450608491897583, - "learning_rate": 9.807336683417085e-05, - "loss": 6.2519, - "step": 2418 - }, - { - "epoch": 1.2615384615384615, - "grad_norm": 1.692731499671936, - "learning_rate": 9.807236180904523e-05, - "loss": 6.0318, - "step": 2419 - }, - { - "epoch": 1.2620599739243807, - "grad_norm": 1.6469907760620117, - "learning_rate": 9.80713567839196e-05, - "loss": 6.1992, - "step": 2420 - }, - { - "epoch": 1.2625814863103, - "grad_norm": 1.776899814605713, - "learning_rate": 9.807035175879398e-05, - "loss": 6.1549, - "step": 2421 - }, - { - "epoch": 1.263102998696219, - "grad_norm": 1.7042206525802612, - "learning_rate": 9.806934673366835e-05, - "loss": 6.1194, - "step": 2422 - }, - { - "epoch": 1.2636245110821382, - "grad_norm": 1.4940481185913086, - "learning_rate": 9.806834170854272e-05, - "loss": 5.7737, - "step": 2423 - }, - { - "epoch": 1.2641460234680575, - "grad_norm": 1.8041486740112305, - "learning_rate": 9.806733668341709e-05, - "loss": 6.1321, - "step": 2424 - }, - { - "epoch": 1.2646675358539765, - "grad_norm": 1.6374828815460205, - "learning_rate": 9.806633165829147e-05, - "loss": 6.1351, - "step": 2425 - }, - { - "epoch": 1.2651890482398958, - "grad_norm": 1.774246096611023, - "learning_rate": 9.806532663316583e-05, - "loss": 6.0356, - "step": 2426 - }, - { - "epoch": 1.265710560625815, - "grad_norm": 2.0529778003692627, - "learning_rate": 9.806432160804021e-05, - "loss": 5.5112, - "step": 2427 - }, - { - "epoch": 1.266232073011734, - "grad_norm": 1.7884122133255005, - "learning_rate": 9.806331658291457e-05, - "loss": 5.346, - "step": 2428 - }, - { - "epoch": 1.2667535853976533, - "grad_norm": 1.6786189079284668, - "learning_rate": 9.806231155778894e-05, - "loss": 5.8765, - "step": 2429 - }, - { - "epoch": 1.2672750977835723, - "grad_norm": 1.947405457496643, - "learning_rate": 9.806130653266332e-05, - "loss": 6.323, - "step": 2430 - }, - { - "epoch": 1.2677966101694915, - "grad_norm": 1.6861083507537842, - "learning_rate": 9.806030150753769e-05, - "loss": 6.3398, - "step": 2431 - }, - { - "epoch": 1.2683181225554108, - "grad_norm": 1.694516897201538, - "learning_rate": 9.805929648241207e-05, - "loss": 6.252, - "step": 2432 - }, - { - "epoch": 1.2688396349413298, - "grad_norm": 1.7161301374435425, - "learning_rate": 9.805829145728644e-05, - "loss": 6.4414, - "step": 2433 - }, - { - "epoch": 1.269361147327249, - "grad_norm": 1.750016689300537, - "learning_rate": 9.805728643216081e-05, - "loss": 6.0129, - "step": 2434 - }, - { - "epoch": 1.269882659713168, - "grad_norm": 1.5770639181137085, - "learning_rate": 9.805628140703518e-05, - "loss": 6.1028, - "step": 2435 - }, - { - "epoch": 1.2704041720990873, - "grad_norm": 1.5977411270141602, - "learning_rate": 9.805527638190956e-05, - "loss": 6.2965, - "step": 2436 - }, - { - "epoch": 1.2709256844850065, - "grad_norm": 1.6670105457305908, - "learning_rate": 9.805427135678392e-05, - "loss": 6.159, - "step": 2437 - }, - { - "epoch": 1.2714471968709256, - "grad_norm": 2.2631120681762695, - "learning_rate": 9.80532663316583e-05, - "loss": 5.8192, - "step": 2438 - }, - { - "epoch": 1.2719687092568448, - "grad_norm": 1.9512810707092285, - "learning_rate": 9.805226130653266e-05, - "loss": 5.774, - "step": 2439 - }, - { - "epoch": 1.272490221642764, - "grad_norm": 1.7135931253433228, - "learning_rate": 9.805125628140704e-05, - "loss": 6.0227, - "step": 2440 - }, - { - "epoch": 1.273011734028683, - "grad_norm": 1.7273602485656738, - "learning_rate": 9.805025125628142e-05, - "loss": 5.9689, - "step": 2441 - }, - { - "epoch": 1.2735332464146023, - "grad_norm": 1.6565227508544922, - "learning_rate": 9.80492462311558e-05, - "loss": 6.0811, - "step": 2442 - }, - { - "epoch": 1.2740547588005215, - "grad_norm": 1.4586776494979858, - "learning_rate": 9.804824120603016e-05, - "loss": 6.3572, - "step": 2443 - }, - { - "epoch": 1.2745762711864406, - "grad_norm": 1.7837969064712524, - "learning_rate": 9.804723618090452e-05, - "loss": 5.8148, - "step": 2444 - }, - { - "epoch": 1.2750977835723598, - "grad_norm": 2.64665150642395, - "learning_rate": 9.80462311557789e-05, - "loss": 6.1503, - "step": 2445 - }, - { - "epoch": 1.275619295958279, - "grad_norm": 1.5442789793014526, - "learning_rate": 9.804522613065327e-05, - "loss": 6.0142, - "step": 2446 - }, - { - "epoch": 1.276140808344198, - "grad_norm": 1.8197027444839478, - "learning_rate": 9.804422110552764e-05, - "loss": 6.1549, - "step": 2447 - }, - { - "epoch": 1.2766623207301173, - "grad_norm": 1.8374695777893066, - "learning_rate": 9.804321608040201e-05, - "loss": 6.1212, - "step": 2448 - }, - { - "epoch": 1.2771838331160366, - "grad_norm": 1.674656867980957, - "learning_rate": 9.804221105527639e-05, - "loss": 6.2472, - "step": 2449 - }, - { - "epoch": 1.2777053455019556, - "grad_norm": 1.585371971130371, - "learning_rate": 9.804120603015075e-05, - "loss": 6.1531, - "step": 2450 - }, - { - "epoch": 1.2782268578878748, - "grad_norm": 1.6216912269592285, - "learning_rate": 9.804020100502513e-05, - "loss": 6.1949, - "step": 2451 - }, - { - "epoch": 1.278748370273794, - "grad_norm": 1.6730854511260986, - "learning_rate": 9.80391959798995e-05, - "loss": 5.6518, - "step": 2452 - }, - { - "epoch": 1.279269882659713, - "grad_norm": 1.676228404045105, - "learning_rate": 9.803819095477388e-05, - "loss": 6.2649, - "step": 2453 - }, - { - "epoch": 1.2797913950456323, - "grad_norm": 1.7399259805679321, - "learning_rate": 9.803718592964825e-05, - "loss": 5.9254, - "step": 2454 - }, - { - "epoch": 1.2803129074315516, - "grad_norm": 1.9783879518508911, - "learning_rate": 9.803618090452263e-05, - "loss": 5.6008, - "step": 2455 - }, - { - "epoch": 1.2808344198174706, - "grad_norm": 1.4143816232681274, - "learning_rate": 9.803517587939699e-05, - "loss": 6.3869, - "step": 2456 - }, - { - "epoch": 1.2813559322033898, - "grad_norm": 1.5272953510284424, - "learning_rate": 9.803417085427135e-05, - "loss": 6.5396, - "step": 2457 - }, - { - "epoch": 1.281877444589309, - "grad_norm": 1.6473231315612793, - "learning_rate": 9.803316582914573e-05, - "loss": 6.0603, - "step": 2458 - }, - { - "epoch": 1.282398956975228, - "grad_norm": 1.557220697402954, - "learning_rate": 9.80321608040201e-05, - "loss": 6.1595, - "step": 2459 - }, - { - "epoch": 1.2829204693611473, - "grad_norm": 1.5079889297485352, - "learning_rate": 9.803115577889447e-05, - "loss": 6.1603, - "step": 2460 - }, - { - "epoch": 1.2834419817470666, - "grad_norm": 1.6709891557693481, - "learning_rate": 9.803015075376885e-05, - "loss": 5.3529, - "step": 2461 - }, - { - "epoch": 1.2839634941329856, - "grad_norm": 1.5749216079711914, - "learning_rate": 9.802914572864323e-05, - "loss": 6.0981, - "step": 2462 - }, - { - "epoch": 1.2844850065189048, - "grad_norm": 1.7351951599121094, - "learning_rate": 9.80281407035176e-05, - "loss": 6.0681, - "step": 2463 - }, - { - "epoch": 1.285006518904824, - "grad_norm": 1.6309770345687866, - "learning_rate": 9.802713567839197e-05, - "loss": 6.3394, - "step": 2464 - }, - { - "epoch": 1.2855280312907431, - "grad_norm": 1.4928302764892578, - "learning_rate": 9.802613065326634e-05, - "loss": 6.3404, - "step": 2465 - }, - { - "epoch": 1.2860495436766624, - "grad_norm": 1.9041593074798584, - "learning_rate": 9.802512562814071e-05, - "loss": 5.887, - "step": 2466 - }, - { - "epoch": 1.2865710560625816, - "grad_norm": 1.6199983358383179, - "learning_rate": 9.802412060301508e-05, - "loss": 6.254, - "step": 2467 - }, - { - "epoch": 1.2870925684485006, - "grad_norm": 1.7301759719848633, - "learning_rate": 9.802311557788946e-05, - "loss": 5.9548, - "step": 2468 - }, - { - "epoch": 1.2876140808344199, - "grad_norm": 1.7285408973693848, - "learning_rate": 9.802211055276382e-05, - "loss": 5.7616, - "step": 2469 - }, - { - "epoch": 1.288135593220339, - "grad_norm": 1.7891991138458252, - "learning_rate": 9.802110552763818e-05, - "loss": 6.2355, - "step": 2470 - }, - { - "epoch": 1.2886571056062581, - "grad_norm": 1.5435711145401, - "learning_rate": 9.802010050251256e-05, - "loss": 6.3816, - "step": 2471 - }, - { - "epoch": 1.2891786179921774, - "grad_norm": 1.8156284093856812, - "learning_rate": 9.801909547738694e-05, - "loss": 5.5133, - "step": 2472 - }, - { - "epoch": 1.2897001303780966, - "grad_norm": 1.7439326047897339, - "learning_rate": 9.801809045226132e-05, - "loss": 6.2033, - "step": 2473 - }, - { - "epoch": 1.2902216427640156, - "grad_norm": 1.7019009590148926, - "learning_rate": 9.801708542713568e-05, - "loss": 5.5285, - "step": 2474 - }, - { - "epoch": 1.2907431551499349, - "grad_norm": 1.8198002576828003, - "learning_rate": 9.801608040201006e-05, - "loss": 5.458, - "step": 2475 - }, - { - "epoch": 1.291264667535854, - "grad_norm": 1.6428302526474, - "learning_rate": 9.801507537688442e-05, - "loss": 6.1403, - "step": 2476 - }, - { - "epoch": 1.2917861799217731, - "grad_norm": 1.5422180891036987, - "learning_rate": 9.80140703517588e-05, - "loss": 6.2856, - "step": 2477 - }, - { - "epoch": 1.2923076923076924, - "grad_norm": 1.499881625175476, - "learning_rate": 9.801306532663317e-05, - "loss": 6.1848, - "step": 2478 - }, - { - "epoch": 1.2928292046936114, - "grad_norm": 1.6463180780410767, - "learning_rate": 9.801206030150754e-05, - "loss": 5.4377, - "step": 2479 - }, - { - "epoch": 1.2933507170795306, - "grad_norm": 1.7342177629470825, - "learning_rate": 9.801105527638191e-05, - "loss": 5.959, - "step": 2480 - }, - { - "epoch": 1.2938722294654499, - "grad_norm": 1.9241501092910767, - "learning_rate": 9.801005025125629e-05, - "loss": 5.8093, - "step": 2481 - }, - { - "epoch": 1.294393741851369, - "grad_norm": 1.6937975883483887, - "learning_rate": 9.800904522613066e-05, - "loss": 5.5399, - "step": 2482 - }, - { - "epoch": 1.2949152542372881, - "grad_norm": 1.646341323852539, - "learning_rate": 9.800804020100503e-05, - "loss": 6.2984, - "step": 2483 - }, - { - "epoch": 1.2954367666232072, - "grad_norm": 1.5861973762512207, - "learning_rate": 9.80070351758794e-05, - "loss": 6.2272, - "step": 2484 - }, - { - "epoch": 1.2959582790091264, - "grad_norm": 1.4318344593048096, - "learning_rate": 9.800603015075377e-05, - "loss": 6.023, - "step": 2485 - }, - { - "epoch": 1.2964797913950457, - "grad_norm": 1.5936084985733032, - "learning_rate": 9.800502512562815e-05, - "loss": 6.2481, - "step": 2486 - }, - { - "epoch": 1.2970013037809647, - "grad_norm": 1.5022445917129517, - "learning_rate": 9.800402010050251e-05, - "loss": 6.2545, - "step": 2487 - }, - { - "epoch": 1.297522816166884, - "grad_norm": 1.5663511753082275, - "learning_rate": 9.800301507537689e-05, - "loss": 5.8146, - "step": 2488 - }, - { - "epoch": 1.2980443285528032, - "grad_norm": 1.574946641921997, - "learning_rate": 9.800201005025125e-05, - "loss": 5.9538, - "step": 2489 - }, - { - "epoch": 1.2985658409387222, - "grad_norm": 1.575602650642395, - "learning_rate": 9.800100502512563e-05, - "loss": 5.959, - "step": 2490 - }, - { - "epoch": 1.2990873533246414, - "grad_norm": 1.4218430519104004, - "learning_rate": 9.8e-05, - "loss": 6.1358, - "step": 2491 - }, - { - "epoch": 1.2996088657105607, - "grad_norm": 1.6269416809082031, - "learning_rate": 9.799899497487437e-05, - "loss": 5.571, - "step": 2492 - }, - { - "epoch": 1.3001303780964797, - "grad_norm": 1.6003565788269043, - "learning_rate": 9.799798994974875e-05, - "loss": 5.9888, - "step": 2493 - }, - { - "epoch": 1.300651890482399, - "grad_norm": 2.0037682056427, - "learning_rate": 9.799698492462313e-05, - "loss": 6.3128, - "step": 2494 - }, - { - "epoch": 1.3011734028683182, - "grad_norm": 1.8058274984359741, - "learning_rate": 9.79959798994975e-05, - "loss": 5.7687, - "step": 2495 - }, - { - "epoch": 1.3016949152542372, - "grad_norm": 1.7981421947479248, - "learning_rate": 9.799497487437186e-05, - "loss": 6.1458, - "step": 2496 - }, - { - "epoch": 1.3022164276401564, - "grad_norm": 1.8295743465423584, - "learning_rate": 9.799396984924624e-05, - "loss": 5.6928, - "step": 2497 - }, - { - "epoch": 1.3027379400260757, - "grad_norm": 1.759970784187317, - "learning_rate": 9.79929648241206e-05, - "loss": 5.9358, - "step": 2498 - }, - { - "epoch": 1.3032594524119947, - "grad_norm": 1.6202988624572754, - "learning_rate": 9.799195979899498e-05, - "loss": 6.4561, - "step": 2499 - }, - { - "epoch": 1.303780964797914, - "grad_norm": 1.6213043928146362, - "learning_rate": 9.799095477386934e-05, - "loss": 6.0728, - "step": 2500 - }, - { - "epoch": 1.3043024771838332, - "grad_norm": 1.6311516761779785, - "learning_rate": 9.798994974874372e-05, - "loss": 6.0394, - "step": 2501 - }, - { - "epoch": 1.3048239895697522, - "grad_norm": 1.4928587675094604, - "learning_rate": 9.79889447236181e-05, - "loss": 6.1494, - "step": 2502 - }, - { - "epoch": 1.3053455019556715, - "grad_norm": 1.6309846639633179, - "learning_rate": 9.798793969849248e-05, - "loss": 6.1334, - "step": 2503 - }, - { - "epoch": 1.3058670143415907, - "grad_norm": 1.5226510763168335, - "learning_rate": 9.798693467336684e-05, - "loss": 6.2621, - "step": 2504 - }, - { - "epoch": 1.3063885267275097, - "grad_norm": 1.6723437309265137, - "learning_rate": 9.798592964824122e-05, - "loss": 6.0395, - "step": 2505 - }, - { - "epoch": 1.306910039113429, - "grad_norm": 1.5453710556030273, - "learning_rate": 9.798492462311558e-05, - "loss": 6.2832, - "step": 2506 - }, - { - "epoch": 1.3074315514993482, - "grad_norm": 1.5942221879959106, - "learning_rate": 9.798391959798996e-05, - "loss": 5.8928, - "step": 2507 - }, - { - "epoch": 1.3079530638852672, - "grad_norm": 1.722939133644104, - "learning_rate": 9.798291457286433e-05, - "loss": 5.9817, - "step": 2508 - }, - { - "epoch": 1.3084745762711865, - "grad_norm": 1.8619356155395508, - "learning_rate": 9.798190954773869e-05, - "loss": 5.6537, - "step": 2509 - }, - { - "epoch": 1.3089960886571057, - "grad_norm": 1.6537522077560425, - "learning_rate": 9.798090452261307e-05, - "loss": 6.0491, - "step": 2510 - }, - { - "epoch": 1.3095176010430247, - "grad_norm": 1.5271246433258057, - "learning_rate": 9.797989949748743e-05, - "loss": 6.2063, - "step": 2511 - }, - { - "epoch": 1.310039113428944, - "grad_norm": 1.718353509902954, - "learning_rate": 9.797889447236181e-05, - "loss": 5.6789, - "step": 2512 - }, - { - "epoch": 1.3105606258148632, - "grad_norm": 1.6641216278076172, - "learning_rate": 9.797788944723619e-05, - "loss": 5.8507, - "step": 2513 - }, - { - "epoch": 1.3110821382007822, - "grad_norm": 1.6516773700714111, - "learning_rate": 9.797688442211056e-05, - "loss": 5.7553, - "step": 2514 - }, - { - "epoch": 1.3116036505867015, - "grad_norm": 1.5930336713790894, - "learning_rate": 9.797587939698493e-05, - "loss": 6.3542, - "step": 2515 - }, - { - "epoch": 1.3121251629726207, - "grad_norm": 1.6110321283340454, - "learning_rate": 9.797487437185931e-05, - "loss": 6.1247, - "step": 2516 - }, - { - "epoch": 1.3126466753585397, - "grad_norm": 1.4728533029556274, - "learning_rate": 9.797386934673367e-05, - "loss": 6.1848, - "step": 2517 - }, - { - "epoch": 1.313168187744459, - "grad_norm": 1.437720537185669, - "learning_rate": 9.797286432160805e-05, - "loss": 6.1445, - "step": 2518 - }, - { - "epoch": 1.3136897001303782, - "grad_norm": 1.5480064153671265, - "learning_rate": 9.797185929648241e-05, - "loss": 6.1675, - "step": 2519 - }, - { - "epoch": 1.3142112125162972, - "grad_norm": 1.5881060361862183, - "learning_rate": 9.797085427135679e-05, - "loss": 6.1074, - "step": 2520 - }, - { - "epoch": 1.3147327249022165, - "grad_norm": 1.677389144897461, - "learning_rate": 9.796984924623116e-05, - "loss": 6.2863, - "step": 2521 - }, - { - "epoch": 1.3152542372881357, - "grad_norm": 1.5233243703842163, - "learning_rate": 9.796884422110553e-05, - "loss": 5.4793, - "step": 2522 - }, - { - "epoch": 1.3157757496740548, - "grad_norm": 1.6342347860336304, - "learning_rate": 9.796783919597991e-05, - "loss": 6.1585, - "step": 2523 - }, - { - "epoch": 1.316297262059974, - "grad_norm": 1.5781776905059814, - "learning_rate": 9.796683417085428e-05, - "loss": 6.2741, - "step": 2524 - }, - { - "epoch": 1.316818774445893, - "grad_norm": 1.4817887544631958, - "learning_rate": 9.796582914572865e-05, - "loss": 6.3891, - "step": 2525 - }, - { - "epoch": 1.3173402868318123, - "grad_norm": 1.7219842672348022, - "learning_rate": 9.796482412060302e-05, - "loss": 5.3118, - "step": 2526 - }, - { - "epoch": 1.3178617992177315, - "grad_norm": 1.8795174360275269, - "learning_rate": 9.79638190954774e-05, - "loss": 5.8285, - "step": 2527 - }, - { - "epoch": 1.3183833116036505, - "grad_norm": 1.6331959962844849, - "learning_rate": 9.796281407035176e-05, - "loss": 6.4864, - "step": 2528 - }, - { - "epoch": 1.3189048239895698, - "grad_norm": 1.7642707824707031, - "learning_rate": 9.796180904522614e-05, - "loss": 5.4986, - "step": 2529 - }, - { - "epoch": 1.3194263363754888, - "grad_norm": 2.0098085403442383, - "learning_rate": 9.79608040201005e-05, - "loss": 5.6884, - "step": 2530 - }, - { - "epoch": 1.319947848761408, - "grad_norm": 1.6574918031692505, - "learning_rate": 9.795979899497488e-05, - "loss": 6.1445, - "step": 2531 - }, - { - "epoch": 1.3204693611473273, - "grad_norm": 1.724198341369629, - "learning_rate": 9.795879396984924e-05, - "loss": 5.874, - "step": 2532 - }, - { - "epoch": 1.3209908735332463, - "grad_norm": 1.5275824069976807, - "learning_rate": 9.795778894472362e-05, - "loss": 6.3019, - "step": 2533 - }, - { - "epoch": 1.3215123859191655, - "grad_norm": 1.919723391532898, - "learning_rate": 9.7956783919598e-05, - "loss": 5.8738, - "step": 2534 - }, - { - "epoch": 1.3220338983050848, - "grad_norm": 1.7140527963638306, - "learning_rate": 9.795577889447238e-05, - "loss": 5.602, - "step": 2535 - }, - { - "epoch": 1.3225554106910038, - "grad_norm": 1.700982928276062, - "learning_rate": 9.795477386934674e-05, - "loss": 6.0537, - "step": 2536 - }, - { - "epoch": 1.323076923076923, - "grad_norm": 1.9550904035568237, - "learning_rate": 9.79537688442211e-05, - "loss": 5.993, - "step": 2537 - }, - { - "epoch": 1.3235984354628423, - "grad_norm": 1.9019474983215332, - "learning_rate": 9.795276381909548e-05, - "loss": 5.8358, - "step": 2538 - }, - { - "epoch": 1.3241199478487613, - "grad_norm": 1.6109199523925781, - "learning_rate": 9.795175879396985e-05, - "loss": 6.1591, - "step": 2539 - }, - { - "epoch": 1.3246414602346805, - "grad_norm": 1.4759520292282104, - "learning_rate": 9.795075376884423e-05, - "loss": 6.2137, - "step": 2540 - }, - { - "epoch": 1.3251629726205998, - "grad_norm": 1.5282145738601685, - "learning_rate": 9.794974874371859e-05, - "loss": 6.0001, - "step": 2541 - }, - { - "epoch": 1.3256844850065188, - "grad_norm": 1.5583438873291016, - "learning_rate": 9.794874371859297e-05, - "loss": 6.2771, - "step": 2542 - }, - { - "epoch": 1.326205997392438, - "grad_norm": 1.5829055309295654, - "learning_rate": 9.794773869346735e-05, - "loss": 6.5383, - "step": 2543 - }, - { - "epoch": 1.3267275097783573, - "grad_norm": 1.4666547775268555, - "learning_rate": 9.794673366834172e-05, - "loss": 5.9638, - "step": 2544 - }, - { - "epoch": 1.3272490221642763, - "grad_norm": 1.8580710887908936, - "learning_rate": 9.794572864321609e-05, - "loss": 5.5906, - "step": 2545 - }, - { - "epoch": 1.3277705345501956, - "grad_norm": 1.7471016645431519, - "learning_rate": 9.794472361809047e-05, - "loss": 5.8475, - "step": 2546 - }, - { - "epoch": 1.3282920469361148, - "grad_norm": 1.5614352226257324, - "learning_rate": 9.794371859296483e-05, - "loss": 5.8226, - "step": 2547 - }, - { - "epoch": 1.3288135593220338, - "grad_norm": 1.8175265789031982, - "learning_rate": 9.794271356783921e-05, - "loss": 5.9907, - "step": 2548 - }, - { - "epoch": 1.329335071707953, - "grad_norm": 1.7280012369155884, - "learning_rate": 9.794170854271357e-05, - "loss": 6.093, - "step": 2549 - }, - { - "epoch": 1.3298565840938723, - "grad_norm": 1.6838841438293457, - "learning_rate": 9.794070351758794e-05, - "loss": 6.0929, - "step": 2550 - }, - { - "epoch": 1.3303780964797913, - "grad_norm": 1.689023494720459, - "learning_rate": 9.793969849246231e-05, - "loss": 6.2114, - "step": 2551 - }, - { - "epoch": 1.3308996088657106, - "grad_norm": 1.5648472309112549, - "learning_rate": 9.793869346733668e-05, - "loss": 6.1307, - "step": 2552 - }, - { - "epoch": 1.3314211212516298, - "grad_norm": 1.5117080211639404, - "learning_rate": 9.793768844221106e-05, - "loss": 6.0646, - "step": 2553 - }, - { - "epoch": 1.3319426336375488, - "grad_norm": 1.6181011199951172, - "learning_rate": 9.793668341708543e-05, - "loss": 6.2321, - "step": 2554 - }, - { - "epoch": 1.332464146023468, - "grad_norm": 1.7066125869750977, - "learning_rate": 9.793567839195981e-05, - "loss": 5.7869, - "step": 2555 - }, - { - "epoch": 1.3329856584093873, - "grad_norm": 1.660563349723816, - "learning_rate": 9.793467336683418e-05, - "loss": 5.5365, - "step": 2556 - }, - { - "epoch": 1.3335071707953063, - "grad_norm": 1.5499671697616577, - "learning_rate": 9.793366834170855e-05, - "loss": 6.2316, - "step": 2557 - }, - { - "epoch": 1.3340286831812256, - "grad_norm": 1.5472174882888794, - "learning_rate": 9.793266331658292e-05, - "loss": 6.2175, - "step": 2558 - }, - { - "epoch": 1.3345501955671448, - "grad_norm": 1.8386262655258179, - "learning_rate": 9.79316582914573e-05, - "loss": 5.3997, - "step": 2559 - }, - { - "epoch": 1.3350717079530638, - "grad_norm": 1.4272487163543701, - "learning_rate": 9.793065326633166e-05, - "loss": 6.3611, - "step": 2560 - }, - { - "epoch": 1.335593220338983, - "grad_norm": 1.4717625379562378, - "learning_rate": 9.792964824120604e-05, - "loss": 6.0026, - "step": 2561 - }, - { - "epoch": 1.3361147327249023, - "grad_norm": 1.599033236503601, - "learning_rate": 9.79286432160804e-05, - "loss": 6.1974, - "step": 2562 - }, - { - "epoch": 1.3366362451108214, - "grad_norm": 1.5725860595703125, - "learning_rate": 9.792763819095477e-05, - "loss": 6.3259, - "step": 2563 - }, - { - "epoch": 1.3371577574967406, - "grad_norm": 2.6072471141815186, - "learning_rate": 9.792663316582914e-05, - "loss": 4.7133, - "step": 2564 - }, - { - "epoch": 1.3376792698826598, - "grad_norm": 2.007253408432007, - "learning_rate": 9.792562814070352e-05, - "loss": 5.3914, - "step": 2565 - }, - { - "epoch": 1.3382007822685789, - "grad_norm": 1.5861899852752686, - "learning_rate": 9.79246231155779e-05, - "loss": 6.2114, - "step": 2566 - }, - { - "epoch": 1.338722294654498, - "grad_norm": 1.6070928573608398, - "learning_rate": 9.792361809045226e-05, - "loss": 6.3889, - "step": 2567 - }, - { - "epoch": 1.3392438070404173, - "grad_norm": 1.71126127243042, - "learning_rate": 9.792261306532664e-05, - "loss": 6.2217, - "step": 2568 - }, - { - "epoch": 1.3397653194263364, - "grad_norm": 1.8321564197540283, - "learning_rate": 9.7921608040201e-05, - "loss": 5.0663, - "step": 2569 - }, - { - "epoch": 1.3402868318122556, - "grad_norm": 1.8267775774002075, - "learning_rate": 9.792060301507538e-05, - "loss": 5.8484, - "step": 2570 - }, - { - "epoch": 1.3408083441981746, - "grad_norm": 1.884848713874817, - "learning_rate": 9.791959798994975e-05, - "loss": 6.1498, - "step": 2571 - }, - { - "epoch": 1.3413298565840939, - "grad_norm": 1.9376534223556519, - "learning_rate": 9.791859296482413e-05, - "loss": 5.8715, - "step": 2572 - }, - { - "epoch": 1.3418513689700131, - "grad_norm": 1.5505872964859009, - "learning_rate": 9.791758793969849e-05, - "loss": 5.8914, - "step": 2573 - }, - { - "epoch": 1.3423728813559321, - "grad_norm": 1.549850583076477, - "learning_rate": 9.791658291457287e-05, - "loss": 5.8212, - "step": 2574 - }, - { - "epoch": 1.3428943937418514, - "grad_norm": 1.6205334663391113, - "learning_rate": 9.791557788944725e-05, - "loss": 6.0149, - "step": 2575 - }, - { - "epoch": 1.3434159061277704, - "grad_norm": 1.5993939638137817, - "learning_rate": 9.791457286432161e-05, - "loss": 5.9101, - "step": 2576 - }, - { - "epoch": 1.3439374185136896, - "grad_norm": 1.6862393617630005, - "learning_rate": 9.791356783919599e-05, - "loss": 5.8717, - "step": 2577 - }, - { - "epoch": 1.3444589308996089, - "grad_norm": 1.8221981525421143, - "learning_rate": 9.791256281407035e-05, - "loss": 5.9455, - "step": 2578 - }, - { - "epoch": 1.344980443285528, - "grad_norm": 1.7727500200271606, - "learning_rate": 9.791155778894473e-05, - "loss": 6.0823, - "step": 2579 - }, - { - "epoch": 1.3455019556714471, - "grad_norm": 1.5663912296295166, - "learning_rate": 9.79105527638191e-05, - "loss": 5.9271, - "step": 2580 - }, - { - "epoch": 1.3460234680573664, - "grad_norm": 1.5444380044937134, - "learning_rate": 9.790954773869347e-05, - "loss": 6.2839, - "step": 2581 - }, - { - "epoch": 1.3465449804432854, - "grad_norm": 1.6440356969833374, - "learning_rate": 9.790854271356784e-05, - "loss": 5.8643, - "step": 2582 - }, - { - "epoch": 1.3470664928292047, - "grad_norm": 1.5112228393554688, - "learning_rate": 9.790753768844221e-05, - "loss": 6.2394, - "step": 2583 - }, - { - "epoch": 1.347588005215124, - "grad_norm": 1.6379401683807373, - "learning_rate": 9.790653266331658e-05, - "loss": 6.1044, - "step": 2584 - }, - { - "epoch": 1.348109517601043, - "grad_norm": 2.836538791656494, - "learning_rate": 9.790552763819096e-05, - "loss": 5.6336, - "step": 2585 - }, - { - "epoch": 1.3486310299869622, - "grad_norm": 1.7374294996261597, - "learning_rate": 9.790452261306533e-05, - "loss": 5.8256, - "step": 2586 - }, - { - "epoch": 1.3491525423728814, - "grad_norm": 1.745182991027832, - "learning_rate": 9.790351758793971e-05, - "loss": 6.1143, - "step": 2587 - }, - { - "epoch": 1.3496740547588004, - "grad_norm": 1.5580577850341797, - "learning_rate": 9.790251256281408e-05, - "loss": 5.5485, - "step": 2588 - }, - { - "epoch": 1.3501955671447197, - "grad_norm": 1.682558298110962, - "learning_rate": 9.790150753768844e-05, - "loss": 5.9472, - "step": 2589 - }, - { - "epoch": 1.350717079530639, - "grad_norm": 1.5831084251403809, - "learning_rate": 9.790050251256282e-05, - "loss": 5.9733, - "step": 2590 - }, - { - "epoch": 1.351238591916558, - "grad_norm": 1.820956826210022, - "learning_rate": 9.789949748743718e-05, - "loss": 5.5316, - "step": 2591 - }, - { - "epoch": 1.3517601043024772, - "grad_norm": 1.6184927225112915, - "learning_rate": 9.789849246231156e-05, - "loss": 5.986, - "step": 2592 - }, - { - "epoch": 1.3522816166883964, - "grad_norm": 1.6540985107421875, - "learning_rate": 9.789748743718593e-05, - "loss": 6.3191, - "step": 2593 - }, - { - "epoch": 1.3528031290743154, - "grad_norm": 1.6854184865951538, - "learning_rate": 9.78964824120603e-05, - "loss": 6.4295, - "step": 2594 - }, - { - "epoch": 1.3533246414602347, - "grad_norm": 1.6246769428253174, - "learning_rate": 9.789547738693468e-05, - "loss": 6.2036, - "step": 2595 - }, - { - "epoch": 1.353846153846154, - "grad_norm": 1.6471805572509766, - "learning_rate": 9.789447236180906e-05, - "loss": 6.0365, - "step": 2596 - }, - { - "epoch": 1.354367666232073, - "grad_norm": 1.5774614810943604, - "learning_rate": 9.789346733668342e-05, - "loss": 5.7276, - "step": 2597 - }, - { - "epoch": 1.3548891786179922, - "grad_norm": 1.7998982667922974, - "learning_rate": 9.78924623115578e-05, - "loss": 5.8563, - "step": 2598 - }, - { - "epoch": 1.3554106910039114, - "grad_norm": 1.6175079345703125, - "learning_rate": 9.789145728643217e-05, - "loss": 6.2784, - "step": 2599 - }, - { - "epoch": 1.3559322033898304, - "grad_norm": 1.7844960689544678, - "learning_rate": 9.789045226130654e-05, - "loss": 5.6735, - "step": 2600 - }, - { - "epoch": 1.3564537157757497, - "grad_norm": 1.676839828491211, - "learning_rate": 9.788944723618091e-05, - "loss": 5.9944, - "step": 2601 - }, - { - "epoch": 1.356975228161669, - "grad_norm": 1.8011897802352905, - "learning_rate": 9.788844221105527e-05, - "loss": 6.2693, - "step": 2602 - }, - { - "epoch": 1.357496740547588, - "grad_norm": 1.7827959060668945, - "learning_rate": 9.788743718592965e-05, - "loss": 4.96, - "step": 2603 - }, - { - "epoch": 1.3580182529335072, - "grad_norm": 1.8489748239517212, - "learning_rate": 9.788643216080401e-05, - "loss": 5.6311, - "step": 2604 - }, - { - "epoch": 1.3585397653194264, - "grad_norm": 1.7378653287887573, - "learning_rate": 9.788542713567839e-05, - "loss": 6.0367, - "step": 2605 - }, - { - "epoch": 1.3590612777053455, - "grad_norm": 1.6756761074066162, - "learning_rate": 9.788442211055277e-05, - "loss": 5.8726, - "step": 2606 - }, - { - "epoch": 1.3595827900912647, - "grad_norm": 1.737670660018921, - "learning_rate": 9.788341708542715e-05, - "loss": 5.6778, - "step": 2607 - }, - { - "epoch": 1.360104302477184, - "grad_norm": 1.5372390747070312, - "learning_rate": 9.788241206030151e-05, - "loss": 6.3004, - "step": 2608 - }, - { - "epoch": 1.360625814863103, - "grad_norm": 1.6686307191848755, - "learning_rate": 9.788140703517589e-05, - "loss": 5.7527, - "step": 2609 - }, - { - "epoch": 1.3611473272490222, - "grad_norm": 1.5270382165908813, - "learning_rate": 9.788040201005025e-05, - "loss": 6.2194, - "step": 2610 - }, - { - "epoch": 1.3616688396349415, - "grad_norm": 1.7912288904190063, - "learning_rate": 9.787939698492463e-05, - "loss": 5.5666, - "step": 2611 - }, - { - "epoch": 1.3621903520208605, - "grad_norm": 1.9645494222640991, - "learning_rate": 9.7878391959799e-05, - "loss": 5.7711, - "step": 2612 - }, - { - "epoch": 1.3627118644067797, - "grad_norm": 2.0565121173858643, - "learning_rate": 9.787738693467337e-05, - "loss": 5.6659, - "step": 2613 - }, - { - "epoch": 1.363233376792699, - "grad_norm": 2.1548197269439697, - "learning_rate": 9.787638190954774e-05, - "loss": 5.1618, - "step": 2614 - }, - { - "epoch": 1.363754889178618, - "grad_norm": 2.06243634223938, - "learning_rate": 9.787537688442212e-05, - "loss": 6.034, - "step": 2615 - }, - { - "epoch": 1.3642764015645372, - "grad_norm": 1.6652909517288208, - "learning_rate": 9.78743718592965e-05, - "loss": 6.0894, - "step": 2616 - }, - { - "epoch": 1.3647979139504562, - "grad_norm": 1.676997184753418, - "learning_rate": 9.787336683417086e-05, - "loss": 6.19, - "step": 2617 - }, - { - "epoch": 1.3653194263363755, - "grad_norm": 2.0292418003082275, - "learning_rate": 9.787236180904524e-05, - "loss": 5.9899, - "step": 2618 - }, - { - "epoch": 1.3658409387222947, - "grad_norm": 1.7349538803100586, - "learning_rate": 9.78713567839196e-05, - "loss": 5.9425, - "step": 2619 - }, - { - "epoch": 1.3663624511082137, - "grad_norm": 1.5623798370361328, - "learning_rate": 9.787035175879398e-05, - "loss": 6.1844, - "step": 2620 - }, - { - "epoch": 1.366883963494133, - "grad_norm": 1.6435014009475708, - "learning_rate": 9.786934673366834e-05, - "loss": 6.3674, - "step": 2621 - }, - { - "epoch": 1.367405475880052, - "grad_norm": 1.6038990020751953, - "learning_rate": 9.786834170854272e-05, - "loss": 5.898, - "step": 2622 - }, - { - "epoch": 1.3679269882659713, - "grad_norm": 1.5299803018569946, - "learning_rate": 9.786733668341708e-05, - "loss": 6.1502, - "step": 2623 - }, - { - "epoch": 1.3684485006518905, - "grad_norm": 1.4204024076461792, - "learning_rate": 9.786633165829146e-05, - "loss": 6.3264, - "step": 2624 - }, - { - "epoch": 1.3689700130378095, - "grad_norm": 1.6097935438156128, - "learning_rate": 9.786532663316583e-05, - "loss": 6.056, - "step": 2625 - }, - { - "epoch": 1.3694915254237288, - "grad_norm": 1.7671841382980347, - "learning_rate": 9.78643216080402e-05, - "loss": 6.0276, - "step": 2626 - }, - { - "epoch": 1.370013037809648, - "grad_norm": 1.5279979705810547, - "learning_rate": 9.786331658291458e-05, - "loss": 6.2113, - "step": 2627 - }, - { - "epoch": 1.370534550195567, - "grad_norm": 1.5320380926132202, - "learning_rate": 9.786231155778896e-05, - "loss": 5.7059, - "step": 2628 - }, - { - "epoch": 1.3710560625814863, - "grad_norm": 1.603962779045105, - "learning_rate": 9.786130653266332e-05, - "loss": 6.2179, - "step": 2629 - }, - { - "epoch": 1.3715775749674055, - "grad_norm": 1.5197679996490479, - "learning_rate": 9.786030150753769e-05, - "loss": 6.0437, - "step": 2630 - }, - { - "epoch": 1.3720990873533245, - "grad_norm": 1.4926389455795288, - "learning_rate": 9.785929648241207e-05, - "loss": 6.2209, - "step": 2631 - }, - { - "epoch": 1.3726205997392438, - "grad_norm": 1.773118019104004, - "learning_rate": 9.785829145728643e-05, - "loss": 5.7632, - "step": 2632 - }, - { - "epoch": 1.373142112125163, - "grad_norm": 1.9592387676239014, - "learning_rate": 9.785728643216081e-05, - "loss": 6.2943, - "step": 2633 - }, - { - "epoch": 1.373663624511082, - "grad_norm": 1.56898832321167, - "learning_rate": 9.785628140703517e-05, - "loss": 6.4302, - "step": 2634 - }, - { - "epoch": 1.3741851368970013, - "grad_norm": 1.6073887348175049, - "learning_rate": 9.785527638190955e-05, - "loss": 5.8032, - "step": 2635 - }, - { - "epoch": 1.3747066492829205, - "grad_norm": 1.637276291847229, - "learning_rate": 9.785427135678393e-05, - "loss": 6.316, - "step": 2636 - }, - { - "epoch": 1.3752281616688395, - "grad_norm": 1.6882108449935913, - "learning_rate": 9.78532663316583e-05, - "loss": 5.9569, - "step": 2637 - }, - { - "epoch": 1.3757496740547588, - "grad_norm": 1.5448179244995117, - "learning_rate": 9.785226130653267e-05, - "loss": 6.1033, - "step": 2638 - }, - { - "epoch": 1.376271186440678, - "grad_norm": 1.5533088445663452, - "learning_rate": 9.785125628140705e-05, - "loss": 6.2964, - "step": 2639 - }, - { - "epoch": 1.376792698826597, - "grad_norm": 1.6889455318450928, - "learning_rate": 9.785025125628141e-05, - "loss": 6.0242, - "step": 2640 - }, - { - "epoch": 1.3773142112125163, - "grad_norm": 1.469394564628601, - "learning_rate": 9.784924623115579e-05, - "loss": 6.2216, - "step": 2641 - }, - { - "epoch": 1.3778357235984355, - "grad_norm": 1.7211103439331055, - "learning_rate": 9.784824120603015e-05, - "loss": 6.2173, - "step": 2642 - }, - { - "epoch": 1.3783572359843546, - "grad_norm": 1.776984691619873, - "learning_rate": 9.784723618090452e-05, - "loss": 5.8262, - "step": 2643 - }, - { - "epoch": 1.3788787483702738, - "grad_norm": 1.743001103401184, - "learning_rate": 9.78462311557789e-05, - "loss": 5.6768, - "step": 2644 - }, - { - "epoch": 1.379400260756193, - "grad_norm": 1.719679832458496, - "learning_rate": 9.784522613065326e-05, - "loss": 5.7834, - "step": 2645 - }, - { - "epoch": 1.379921773142112, - "grad_norm": 1.7682908773422241, - "learning_rate": 9.784422110552764e-05, - "loss": 5.7643, - "step": 2646 - }, - { - "epoch": 1.3804432855280313, - "grad_norm": 1.6111764907836914, - "learning_rate": 9.784321608040202e-05, - "loss": 5.8891, - "step": 2647 - }, - { - "epoch": 1.3809647979139505, - "grad_norm": 1.6279090642929077, - "learning_rate": 9.78422110552764e-05, - "loss": 6.5447, - "step": 2648 - }, - { - "epoch": 1.3814863102998696, - "grad_norm": 1.7054316997528076, - "learning_rate": 9.784120603015076e-05, - "loss": 5.9229, - "step": 2649 - }, - { - "epoch": 1.3820078226857888, - "grad_norm": 1.66355562210083, - "learning_rate": 9.784020100502514e-05, - "loss": 6.3505, - "step": 2650 - }, - { - "epoch": 1.382529335071708, - "grad_norm": 1.6712533235549927, - "learning_rate": 9.78391959798995e-05, - "loss": 5.9786, - "step": 2651 - }, - { - "epoch": 1.383050847457627, - "grad_norm": 1.773200511932373, - "learning_rate": 9.783819095477388e-05, - "loss": 5.9672, - "step": 2652 - }, - { - "epoch": 1.3835723598435463, - "grad_norm": 1.87320077419281, - "learning_rate": 9.783718592964824e-05, - "loss": 6.2531, - "step": 2653 - }, - { - "epoch": 1.3840938722294656, - "grad_norm": 1.7074990272521973, - "learning_rate": 9.783618090452262e-05, - "loss": 6.1667, - "step": 2654 - }, - { - "epoch": 1.3846153846153846, - "grad_norm": 1.6075319051742554, - "learning_rate": 9.783517587939698e-05, - "loss": 6.4496, - "step": 2655 - }, - { - "epoch": 1.3851368970013038, - "grad_norm": 1.4664288759231567, - "learning_rate": 9.783417085427136e-05, - "loss": 6.3227, - "step": 2656 - }, - { - "epoch": 1.385658409387223, - "grad_norm": 1.835418701171875, - "learning_rate": 9.783316582914574e-05, - "loss": 5.9629, - "step": 2657 - }, - { - "epoch": 1.386179921773142, - "grad_norm": 1.5901494026184082, - "learning_rate": 9.78321608040201e-05, - "loss": 6.282, - "step": 2658 - }, - { - "epoch": 1.3867014341590613, - "grad_norm": 1.5904797315597534, - "learning_rate": 9.783115577889448e-05, - "loss": 6.1755, - "step": 2659 - }, - { - "epoch": 1.3872229465449806, - "grad_norm": 1.4861235618591309, - "learning_rate": 9.783015075376885e-05, - "loss": 6.2742, - "step": 2660 - }, - { - "epoch": 1.3877444589308996, - "grad_norm": 1.5851759910583496, - "learning_rate": 9.782914572864322e-05, - "loss": 5.6079, - "step": 2661 - }, - { - "epoch": 1.3882659713168188, - "grad_norm": 1.512555480003357, - "learning_rate": 9.782814070351759e-05, - "loss": 5.8674, - "step": 2662 - }, - { - "epoch": 1.3887874837027379, - "grad_norm": 1.62095046043396, - "learning_rate": 9.782713567839197e-05, - "loss": 6.1174, - "step": 2663 - }, - { - "epoch": 1.389308996088657, - "grad_norm": 1.9584076404571533, - "learning_rate": 9.782613065326633e-05, - "loss": 6.2369, - "step": 2664 - }, - { - "epoch": 1.3898305084745763, - "grad_norm": 1.4809750318527222, - "learning_rate": 9.782512562814071e-05, - "loss": 6.4352, - "step": 2665 - }, - { - "epoch": 1.3903520208604954, - "grad_norm": 1.7495768070220947, - "learning_rate": 9.782412060301507e-05, - "loss": 6.1479, - "step": 2666 - }, - { - "epoch": 1.3908735332464146, - "grad_norm": 1.6603760719299316, - "learning_rate": 9.782311557788945e-05, - "loss": 6.1712, - "step": 2667 - }, - { - "epoch": 1.3913950456323338, - "grad_norm": 1.6336594820022583, - "learning_rate": 9.782211055276383e-05, - "loss": 6.0849, - "step": 2668 - }, - { - "epoch": 1.3919165580182529, - "grad_norm": 1.7293792963027954, - "learning_rate": 9.782110552763819e-05, - "loss": 6.3651, - "step": 2669 - }, - { - "epoch": 1.3924380704041721, - "grad_norm": 1.815760850906372, - "learning_rate": 9.782010050251257e-05, - "loss": 5.8891, - "step": 2670 - }, - { - "epoch": 1.3929595827900911, - "grad_norm": 2.0131959915161133, - "learning_rate": 9.781909547738694e-05, - "loss": 5.8572, - "step": 2671 - }, - { - "epoch": 1.3934810951760104, - "grad_norm": 1.7193174362182617, - "learning_rate": 9.781809045226131e-05, - "loss": 6.1702, - "step": 2672 - }, - { - "epoch": 1.3940026075619296, - "grad_norm": 2.0554697513580322, - "learning_rate": 9.781708542713568e-05, - "loss": 5.1767, - "step": 2673 - }, - { - "epoch": 1.3945241199478486, - "grad_norm": 1.6696985960006714, - "learning_rate": 9.781608040201006e-05, - "loss": 5.9303, - "step": 2674 - }, - { - "epoch": 1.3950456323337679, - "grad_norm": 1.638900876045227, - "learning_rate": 9.781507537688442e-05, - "loss": 6.1467, - "step": 2675 - }, - { - "epoch": 1.3955671447196871, - "grad_norm": 1.4648538827896118, - "learning_rate": 9.78140703517588e-05, - "loss": 6.1998, - "step": 2676 - }, - { - "epoch": 1.3960886571056061, - "grad_norm": 1.5157066583633423, - "learning_rate": 9.781306532663318e-05, - "loss": 5.7231, - "step": 2677 - }, - { - "epoch": 1.3966101694915254, - "grad_norm": 1.7398536205291748, - "learning_rate": 9.781206030150755e-05, - "loss": 5.7624, - "step": 2678 - }, - { - "epoch": 1.3971316818774446, - "grad_norm": 1.8303221464157104, - "learning_rate": 9.781105527638192e-05, - "loss": 6.1247, - "step": 2679 - }, - { - "epoch": 1.3976531942633637, - "grad_norm": 2.1140778064727783, - "learning_rate": 9.78100502512563e-05, - "loss": 5.4852, - "step": 2680 - }, - { - "epoch": 1.398174706649283, - "grad_norm": 2.1840293407440186, - "learning_rate": 9.780904522613066e-05, - "loss": 5.8905, - "step": 2681 - }, - { - "epoch": 1.3986962190352021, - "grad_norm": 1.8187013864517212, - "learning_rate": 9.780804020100502e-05, - "loss": 6.2925, - "step": 2682 - }, - { - "epoch": 1.3992177314211212, - "grad_norm": 1.5236451625823975, - "learning_rate": 9.78070351758794e-05, - "loss": 6.1928, - "step": 2683 - }, - { - "epoch": 1.3997392438070404, - "grad_norm": 1.9740772247314453, - "learning_rate": 9.780603015075377e-05, - "loss": 5.9138, - "step": 2684 - }, - { - "epoch": 1.4002607561929596, - "grad_norm": 1.7422348260879517, - "learning_rate": 9.780502512562814e-05, - "loss": 5.9113, - "step": 2685 - }, - { - "epoch": 1.4007822685788787, - "grad_norm": 1.651078462600708, - "learning_rate": 9.780402010050251e-05, - "loss": 6.2694, - "step": 2686 - }, - { - "epoch": 1.401303780964798, - "grad_norm": 1.6408063173294067, - "learning_rate": 9.780301507537689e-05, - "loss": 5.7764, - "step": 2687 - }, - { - "epoch": 1.4018252933507171, - "grad_norm": 1.7678227424621582, - "learning_rate": 9.780201005025126e-05, - "loss": 6.1006, - "step": 2688 - }, - { - "epoch": 1.4023468057366362, - "grad_norm": 1.836049199104309, - "learning_rate": 9.780100502512564e-05, - "loss": 5.6566, - "step": 2689 - }, - { - "epoch": 1.4028683181225554, - "grad_norm": 1.9151856899261475, - "learning_rate": 9.78e-05, - "loss": 6.0176, - "step": 2690 - }, - { - "epoch": 1.4033898305084747, - "grad_norm": 1.669245719909668, - "learning_rate": 9.779899497487438e-05, - "loss": 5.9382, - "step": 2691 - }, - { - "epoch": 1.4039113428943937, - "grad_norm": 1.576112985610962, - "learning_rate": 9.779798994974875e-05, - "loss": 6.1099, - "step": 2692 - }, - { - "epoch": 1.404432855280313, - "grad_norm": 1.7071670293807983, - "learning_rate": 9.779698492462313e-05, - "loss": 6.1505, - "step": 2693 - }, - { - "epoch": 1.4049543676662322, - "grad_norm": 1.6029512882232666, - "learning_rate": 9.779597989949749e-05, - "loss": 6.0897, - "step": 2694 - }, - { - "epoch": 1.4054758800521512, - "grad_norm": 1.475574016571045, - "learning_rate": 9.779497487437185e-05, - "loss": 6.3107, - "step": 2695 - }, - { - "epoch": 1.4059973924380704, - "grad_norm": 1.6027474403381348, - "learning_rate": 9.779396984924623e-05, - "loss": 6.0719, - "step": 2696 - }, - { - "epoch": 1.4065189048239897, - "grad_norm": 1.8850823640823364, - "learning_rate": 9.779296482412061e-05, - "loss": 5.3675, - "step": 2697 - }, - { - "epoch": 1.4070404172099087, - "grad_norm": 1.5701134204864502, - "learning_rate": 9.779195979899499e-05, - "loss": 5.8423, - "step": 2698 - }, - { - "epoch": 1.407561929595828, - "grad_norm": 1.754403829574585, - "learning_rate": 9.779095477386935e-05, - "loss": 5.6602, - "step": 2699 - }, - { - "epoch": 1.4080834419817472, - "grad_norm": 1.9073467254638672, - "learning_rate": 9.778994974874373e-05, - "loss": 5.7155, - "step": 2700 - }, - { - "epoch": 1.4086049543676662, - "grad_norm": 1.5590382814407349, - "learning_rate": 9.77889447236181e-05, - "loss": 6.246, - "step": 2701 - }, - { - "epoch": 1.4091264667535854, - "grad_norm": 1.701776385307312, - "learning_rate": 9.778793969849247e-05, - "loss": 5.85, - "step": 2702 - }, - { - "epoch": 1.4096479791395047, - "grad_norm": 1.4830223321914673, - "learning_rate": 9.778693467336684e-05, - "loss": 6.0624, - "step": 2703 - }, - { - "epoch": 1.4101694915254237, - "grad_norm": 1.811763048171997, - "learning_rate": 9.778592964824121e-05, - "loss": 5.9242, - "step": 2704 - }, - { - "epoch": 1.410691003911343, - "grad_norm": 1.4055840969085693, - "learning_rate": 9.778492462311558e-05, - "loss": 5.6296, - "step": 2705 - }, - { - "epoch": 1.4112125162972622, - "grad_norm": 1.5805575847625732, - "learning_rate": 9.778391959798996e-05, - "loss": 5.8183, - "step": 2706 - }, - { - "epoch": 1.4117340286831812, - "grad_norm": 1.620363473892212, - "learning_rate": 9.778291457286432e-05, - "loss": 6.1067, - "step": 2707 - }, - { - "epoch": 1.4122555410691005, - "grad_norm": 1.528887391090393, - "learning_rate": 9.77819095477387e-05, - "loss": 6.1093, - "step": 2708 - }, - { - "epoch": 1.4127770534550197, - "grad_norm": 1.5594453811645508, - "learning_rate": 9.778090452261308e-05, - "loss": 6.4691, - "step": 2709 - }, - { - "epoch": 1.4132985658409387, - "grad_norm": 1.5446897745132446, - "learning_rate": 9.777989949748744e-05, - "loss": 6.1942, - "step": 2710 - }, - { - "epoch": 1.413820078226858, - "grad_norm": 1.3707178831100464, - "learning_rate": 9.777889447236182e-05, - "loss": 6.1318, - "step": 2711 - }, - { - "epoch": 1.414341590612777, - "grad_norm": 1.3916586637496948, - "learning_rate": 9.777788944723618e-05, - "loss": 5.9678, - "step": 2712 - }, - { - "epoch": 1.4148631029986962, - "grad_norm": 1.5228615999221802, - "learning_rate": 9.777688442211056e-05, - "loss": 6.245, - "step": 2713 - }, - { - "epoch": 1.4153846153846155, - "grad_norm": 1.6578292846679688, - "learning_rate": 9.777587939698492e-05, - "loss": 6.0219, - "step": 2714 - }, - { - "epoch": 1.4159061277705345, - "grad_norm": 1.4831353425979614, - "learning_rate": 9.77748743718593e-05, - "loss": 6.0208, - "step": 2715 - }, - { - "epoch": 1.4164276401564537, - "grad_norm": 1.5010249614715576, - "learning_rate": 9.777386934673367e-05, - "loss": 5.989, - "step": 2716 - }, - { - "epoch": 1.4169491525423727, - "grad_norm": 1.4538003206253052, - "learning_rate": 9.777286432160804e-05, - "loss": 6.2988, - "step": 2717 - }, - { - "epoch": 1.417470664928292, - "grad_norm": 1.6703176498413086, - "learning_rate": 9.777185929648242e-05, - "loss": 5.8872, - "step": 2718 - }, - { - "epoch": 1.4179921773142112, - "grad_norm": 1.7749847173690796, - "learning_rate": 9.77708542713568e-05, - "loss": 5.6526, - "step": 2719 - }, - { - "epoch": 1.4185136897001303, - "grad_norm": 1.5599079132080078, - "learning_rate": 9.776984924623116e-05, - "loss": 6.2287, - "step": 2720 - }, - { - "epoch": 1.4190352020860495, - "grad_norm": 1.6760694980621338, - "learning_rate": 9.776884422110554e-05, - "loss": 5.886, - "step": 2721 - }, - { - "epoch": 1.4195567144719687, - "grad_norm": 1.5350369215011597, - "learning_rate": 9.77678391959799e-05, - "loss": 6.3132, - "step": 2722 - }, - { - "epoch": 1.4200782268578878, - "grad_norm": 1.721165418624878, - "learning_rate": 9.776683417085427e-05, - "loss": 5.8893, - "step": 2723 - }, - { - "epoch": 1.420599739243807, - "grad_norm": 1.448067307472229, - "learning_rate": 9.776582914572865e-05, - "loss": 5.8933, - "step": 2724 - }, - { - "epoch": 1.4211212516297262, - "grad_norm": 2.8793842792510986, - "learning_rate": 9.776482412060301e-05, - "loss": 5.7259, - "step": 2725 - }, - { - "epoch": 1.4216427640156453, - "grad_norm": 1.7226959466934204, - "learning_rate": 9.776381909547739e-05, - "loss": 6.0488, - "step": 2726 - }, - { - "epoch": 1.4221642764015645, - "grad_norm": 1.6190671920776367, - "learning_rate": 9.776281407035175e-05, - "loss": 5.774, - "step": 2727 - }, - { - "epoch": 1.4226857887874838, - "grad_norm": 1.7586593627929688, - "learning_rate": 9.776180904522613e-05, - "loss": 5.5945, - "step": 2728 - }, - { - "epoch": 1.4232073011734028, - "grad_norm": 1.689913272857666, - "learning_rate": 9.776080402010051e-05, - "loss": 6.0817, - "step": 2729 - }, - { - "epoch": 1.423728813559322, - "grad_norm": 1.766281247138977, - "learning_rate": 9.775979899497489e-05, - "loss": 5.992, - "step": 2730 - }, - { - "epoch": 1.4242503259452413, - "grad_norm": 1.5891350507736206, - "learning_rate": 9.775879396984925e-05, - "loss": 6.1043, - "step": 2731 - }, - { - "epoch": 1.4247718383311603, - "grad_norm": 1.8605815172195435, - "learning_rate": 9.775778894472363e-05, - "loss": 5.7457, - "step": 2732 - }, - { - "epoch": 1.4252933507170795, - "grad_norm": 1.4682244062423706, - "learning_rate": 9.7756783919598e-05, - "loss": 6.2523, - "step": 2733 - }, - { - "epoch": 1.4258148631029988, - "grad_norm": 1.5217187404632568, - "learning_rate": 9.775577889447237e-05, - "loss": 5.8937, - "step": 2734 - }, - { - "epoch": 1.4263363754889178, - "grad_norm": 1.4801461696624756, - "learning_rate": 9.775477386934674e-05, - "loss": 6.2666, - "step": 2735 - }, - { - "epoch": 1.426857887874837, - "grad_norm": 1.696235179901123, - "learning_rate": 9.77537688442211e-05, - "loss": 6.2227, - "step": 2736 - }, - { - "epoch": 1.4273794002607563, - "grad_norm": 1.5992404222488403, - "learning_rate": 9.775276381909548e-05, - "loss": 6.2008, - "step": 2737 - }, - { - "epoch": 1.4279009126466753, - "grad_norm": 1.5044103860855103, - "learning_rate": 9.775175879396984e-05, - "loss": 6.2463, - "step": 2738 - }, - { - "epoch": 1.4284224250325945, - "grad_norm": 1.8731878995895386, - "learning_rate": 9.775075376884422e-05, - "loss": 5.9088, - "step": 2739 - }, - { - "epoch": 1.4289439374185138, - "grad_norm": 1.772975206375122, - "learning_rate": 9.77497487437186e-05, - "loss": 5.6106, - "step": 2740 - }, - { - "epoch": 1.4294654498044328, - "grad_norm": 1.5398154258728027, - "learning_rate": 9.774874371859298e-05, - "loss": 5.8087, - "step": 2741 - }, - { - "epoch": 1.429986962190352, - "grad_norm": 1.6439192295074463, - "learning_rate": 9.774773869346734e-05, - "loss": 6.1478, - "step": 2742 - }, - { - "epoch": 1.4305084745762713, - "grad_norm": 1.5353777408599854, - "learning_rate": 9.774673366834172e-05, - "loss": 6.1206, - "step": 2743 - }, - { - "epoch": 1.4310299869621903, - "grad_norm": 1.945823311805725, - "learning_rate": 9.774572864321608e-05, - "loss": 5.619, - "step": 2744 - }, - { - "epoch": 1.4315514993481095, - "grad_norm": 1.5973284244537354, - "learning_rate": 9.774472361809046e-05, - "loss": 5.9759, - "step": 2745 - }, - { - "epoch": 1.4320730117340288, - "grad_norm": 1.4522231817245483, - "learning_rate": 9.774371859296483e-05, - "loss": 6.3957, - "step": 2746 - }, - { - "epoch": 1.4325945241199478, - "grad_norm": 1.6614530086517334, - "learning_rate": 9.77427135678392e-05, - "loss": 6.0201, - "step": 2747 - }, - { - "epoch": 1.433116036505867, - "grad_norm": 1.624030351638794, - "learning_rate": 9.774170854271357e-05, - "loss": 6.4179, - "step": 2748 - }, - { - "epoch": 1.4336375488917863, - "grad_norm": 1.7130664587020874, - "learning_rate": 9.774070351758795e-05, - "loss": 6.2297, - "step": 2749 - }, - { - "epoch": 1.4341590612777053, - "grad_norm": 1.7549505233764648, - "learning_rate": 9.773969849246232e-05, - "loss": 6.1637, - "step": 2750 - }, - { - "epoch": 1.4346805736636246, - "grad_norm": 1.5874719619750977, - "learning_rate": 9.773869346733669e-05, - "loss": 5.8404, - "step": 2751 - }, - { - "epoch": 1.4352020860495438, - "grad_norm": 1.5968170166015625, - "learning_rate": 9.773768844221106e-05, - "loss": 6.0682, - "step": 2752 - }, - { - "epoch": 1.4357235984354628, - "grad_norm": 1.7284387350082397, - "learning_rate": 9.773668341708543e-05, - "loss": 6.0219, - "step": 2753 - }, - { - "epoch": 1.436245110821382, - "grad_norm": 1.857333779335022, - "learning_rate": 9.773567839195981e-05, - "loss": 5.5317, - "step": 2754 - }, - { - "epoch": 1.4367666232073013, - "grad_norm": 1.6999911069869995, - "learning_rate": 9.773467336683417e-05, - "loss": 6.1317, - "step": 2755 - }, - { - "epoch": 1.4372881355932203, - "grad_norm": 1.6325955390930176, - "learning_rate": 9.773366834170855e-05, - "loss": 6.3398, - "step": 2756 - }, - { - "epoch": 1.4378096479791396, - "grad_norm": 1.7335489988327026, - "learning_rate": 9.773266331658291e-05, - "loss": 5.9226, - "step": 2757 - }, - { - "epoch": 1.4383311603650586, - "grad_norm": 1.9692493677139282, - "learning_rate": 9.773165829145729e-05, - "loss": 6.0559, - "step": 2758 - }, - { - "epoch": 1.4388526727509778, - "grad_norm": 2.2112321853637695, - "learning_rate": 9.773065326633166e-05, - "loss": 5.7823, - "step": 2759 - }, - { - "epoch": 1.439374185136897, - "grad_norm": 1.7404147386550903, - "learning_rate": 9.772964824120603e-05, - "loss": 5.2362, - "step": 2760 - }, - { - "epoch": 1.439895697522816, - "grad_norm": 1.7193166017532349, - "learning_rate": 9.772864321608041e-05, - "loss": 6.2647, - "step": 2761 - }, - { - "epoch": 1.4404172099087353, - "grad_norm": 1.6993381977081299, - "learning_rate": 9.772763819095478e-05, - "loss": 5.7926, - "step": 2762 - }, - { - "epoch": 1.4409387222946544, - "grad_norm": 1.695243000984192, - "learning_rate": 9.772663316582915e-05, - "loss": 6.0883, - "step": 2763 - }, - { - "epoch": 1.4414602346805736, - "grad_norm": 1.5530554056167603, - "learning_rate": 9.772562814070352e-05, - "loss": 6.1312, - "step": 2764 - }, - { - "epoch": 1.4419817470664928, - "grad_norm": 1.697091817855835, - "learning_rate": 9.77246231155779e-05, - "loss": 6.063, - "step": 2765 - }, - { - "epoch": 1.4425032594524119, - "grad_norm": 1.5669046640396118, - "learning_rate": 9.772361809045226e-05, - "loss": 5.9498, - "step": 2766 - }, - { - "epoch": 1.443024771838331, - "grad_norm": 1.5745625495910645, - "learning_rate": 9.772261306532664e-05, - "loss": 6.099, - "step": 2767 - }, - { - "epoch": 1.4435462842242504, - "grad_norm": 1.4733214378356934, - "learning_rate": 9.7721608040201e-05, - "loss": 6.3993, - "step": 2768 - }, - { - "epoch": 1.4440677966101694, - "grad_norm": 1.5762883424758911, - "learning_rate": 9.772060301507538e-05, - "loss": 6.1406, - "step": 2769 - }, - { - "epoch": 1.4445893089960886, - "grad_norm": 1.6982038021087646, - "learning_rate": 9.771959798994976e-05, - "loss": 6.309, - "step": 2770 - }, - { - "epoch": 1.4451108213820079, - "grad_norm": 1.6006107330322266, - "learning_rate": 9.771859296482414e-05, - "loss": 6.1712, - "step": 2771 - }, - { - "epoch": 1.4456323337679269, - "grad_norm": 1.9190771579742432, - "learning_rate": 9.77175879396985e-05, - "loss": 5.6892, - "step": 2772 - }, - { - "epoch": 1.4461538461538461, - "grad_norm": 1.6955370903015137, - "learning_rate": 9.771658291457288e-05, - "loss": 6.2863, - "step": 2773 - }, - { - "epoch": 1.4466753585397654, - "grad_norm": 1.7755554914474487, - "learning_rate": 9.771557788944724e-05, - "loss": 5.2662, - "step": 2774 - }, - { - "epoch": 1.4471968709256844, - "grad_norm": 1.620536208152771, - "learning_rate": 9.77145728643216e-05, - "loss": 6.131, - "step": 2775 - }, - { - "epoch": 1.4477183833116036, - "grad_norm": 1.549432396888733, - "learning_rate": 9.771356783919598e-05, - "loss": 5.8983, - "step": 2776 - }, - { - "epoch": 1.4482398956975229, - "grad_norm": 1.6327165365219116, - "learning_rate": 9.771256281407035e-05, - "loss": 6.4203, - "step": 2777 - }, - { - "epoch": 1.448761408083442, - "grad_norm": 1.7144237756729126, - "learning_rate": 9.771155778894473e-05, - "loss": 5.8586, - "step": 2778 - }, - { - "epoch": 1.4492829204693611, - "grad_norm": 1.787179708480835, - "learning_rate": 9.771055276381909e-05, - "loss": 6.3244, - "step": 2779 - }, - { - "epoch": 1.4498044328552804, - "grad_norm": 1.541170597076416, - "learning_rate": 9.770954773869347e-05, - "loss": 6.2233, - "step": 2780 - }, - { - "epoch": 1.4503259452411994, - "grad_norm": 1.7627884149551392, - "learning_rate": 9.770854271356785e-05, - "loss": 6.0454, - "step": 2781 - }, - { - "epoch": 1.4508474576271186, - "grad_norm": 1.892922043800354, - "learning_rate": 9.770753768844222e-05, - "loss": 5.5029, - "step": 2782 - }, - { - "epoch": 1.4513689700130379, - "grad_norm": 1.7500706911087036, - "learning_rate": 9.770653266331659e-05, - "loss": 6.0293, - "step": 2783 - }, - { - "epoch": 1.451890482398957, - "grad_norm": 1.5444124937057495, - "learning_rate": 9.770552763819097e-05, - "loss": 6.1176, - "step": 2784 - }, - { - "epoch": 1.4524119947848761, - "grad_norm": 1.8232090473175049, - "learning_rate": 9.770452261306533e-05, - "loss": 5.5497, - "step": 2785 - }, - { - "epoch": 1.4529335071707954, - "grad_norm": 2.1292386054992676, - "learning_rate": 9.770351758793971e-05, - "loss": 5.7434, - "step": 2786 - }, - { - "epoch": 1.4534550195567144, - "grad_norm": 1.7782337665557861, - "learning_rate": 9.770251256281407e-05, - "loss": 6.1511, - "step": 2787 - }, - { - "epoch": 1.4539765319426337, - "grad_norm": 1.6669893264770508, - "learning_rate": 9.770150753768844e-05, - "loss": 5.9931, - "step": 2788 - }, - { - "epoch": 1.454498044328553, - "grad_norm": 1.544895052909851, - "learning_rate": 9.770050251256281e-05, - "loss": 5.963, - "step": 2789 - }, - { - "epoch": 1.455019556714472, - "grad_norm": 1.584064245223999, - "learning_rate": 9.769949748743719e-05, - "loss": 6.0779, - "step": 2790 - }, - { - "epoch": 1.4555410691003912, - "grad_norm": 1.780301809310913, - "learning_rate": 9.769849246231157e-05, - "loss": 6.0508, - "step": 2791 - }, - { - "epoch": 1.4560625814863104, - "grad_norm": 1.5685304403305054, - "learning_rate": 9.769748743718593e-05, - "loss": 5.5051, - "step": 2792 - }, - { - "epoch": 1.4565840938722294, - "grad_norm": 1.5297799110412598, - "learning_rate": 9.769648241206031e-05, - "loss": 6.2241, - "step": 2793 - }, - { - "epoch": 1.4571056062581487, - "grad_norm": 1.5676274299621582, - "learning_rate": 9.769547738693468e-05, - "loss": 5.9569, - "step": 2794 - }, - { - "epoch": 1.457627118644068, - "grad_norm": 1.5621898174285889, - "learning_rate": 9.769447236180905e-05, - "loss": 6.048, - "step": 2795 - }, - { - "epoch": 1.458148631029987, - "grad_norm": 1.7106941938400269, - "learning_rate": 9.769346733668342e-05, - "loss": 5.8046, - "step": 2796 - }, - { - "epoch": 1.4586701434159062, - "grad_norm": 1.6666656732559204, - "learning_rate": 9.76924623115578e-05, - "loss": 5.7507, - "step": 2797 - }, - { - "epoch": 1.4591916558018254, - "grad_norm": 1.7873709201812744, - "learning_rate": 9.769145728643216e-05, - "loss": 5.8235, - "step": 2798 - }, - { - "epoch": 1.4597131681877444, - "grad_norm": 1.6920788288116455, - "learning_rate": 9.769045226130654e-05, - "loss": 6.1177, - "step": 2799 - }, - { - "epoch": 1.4602346805736637, - "grad_norm": 1.6523910760879517, - "learning_rate": 9.76894472361809e-05, - "loss": 6.1985, - "step": 2800 - }, - { - "epoch": 1.460756192959583, - "grad_norm": 1.450553297996521, - "learning_rate": 9.768844221105528e-05, - "loss": 6.2034, - "step": 2801 - }, - { - "epoch": 1.461277705345502, - "grad_norm": 1.5712532997131348, - "learning_rate": 9.768743718592966e-05, - "loss": 5.3465, - "step": 2802 - }, - { - "epoch": 1.4617992177314212, - "grad_norm": 1.7631434202194214, - "learning_rate": 9.768643216080402e-05, - "loss": 5.8792, - "step": 2803 - }, - { - "epoch": 1.4623207301173402, - "grad_norm": 1.5004957914352417, - "learning_rate": 9.76854271356784e-05, - "loss": 6.1505, - "step": 2804 - }, - { - "epoch": 1.4628422425032594, - "grad_norm": 1.6706433296203613, - "learning_rate": 9.768442211055276e-05, - "loss": 6.3158, - "step": 2805 - }, - { - "epoch": 1.4633637548891787, - "grad_norm": 1.5519438982009888, - "learning_rate": 9.768341708542714e-05, - "loss": 6.0472, - "step": 2806 - }, - { - "epoch": 1.4638852672750977, - "grad_norm": 1.572006344795227, - "learning_rate": 9.76824120603015e-05, - "loss": 6.3187, - "step": 2807 - }, - { - "epoch": 1.464406779661017, - "grad_norm": 1.6571171283721924, - "learning_rate": 9.768140703517588e-05, - "loss": 6.2947, - "step": 2808 - }, - { - "epoch": 1.4649282920469362, - "grad_norm": 1.80170476436615, - "learning_rate": 9.768040201005025e-05, - "loss": 5.6707, - "step": 2809 - }, - { - "epoch": 1.4654498044328552, - "grad_norm": 1.6589888334274292, - "learning_rate": 9.767939698492463e-05, - "loss": 5.9271, - "step": 2810 - }, - { - "epoch": 1.4659713168187745, - "grad_norm": 1.5281791687011719, - "learning_rate": 9.7678391959799e-05, - "loss": 6.1576, - "step": 2811 - }, - { - "epoch": 1.4664928292046935, - "grad_norm": 1.7295739650726318, - "learning_rate": 9.767738693467338e-05, - "loss": 6.1883, - "step": 2812 - }, - { - "epoch": 1.4670143415906127, - "grad_norm": 1.55623459815979, - "learning_rate": 9.767638190954775e-05, - "loss": 6.1236, - "step": 2813 - }, - { - "epoch": 1.467535853976532, - "grad_norm": 1.4653511047363281, - "learning_rate": 9.767537688442212e-05, - "loss": 5.6758, - "step": 2814 - }, - { - "epoch": 1.468057366362451, - "grad_norm": 1.4706426858901978, - "learning_rate": 9.767437185929649e-05, - "loss": 6.071, - "step": 2815 - }, - { - "epoch": 1.4685788787483702, - "grad_norm": 1.8940315246582031, - "learning_rate": 9.767336683417085e-05, - "loss": 5.4638, - "step": 2816 - }, - { - "epoch": 1.4691003911342895, - "grad_norm": 1.5454559326171875, - "learning_rate": 9.767236180904523e-05, - "loss": 6.3419, - "step": 2817 - }, - { - "epoch": 1.4696219035202085, - "grad_norm": 1.528501033782959, - "learning_rate": 9.76713567839196e-05, - "loss": 6.2238, - "step": 2818 - }, - { - "epoch": 1.4701434159061277, - "grad_norm": 1.6060926914215088, - "learning_rate": 9.767035175879397e-05, - "loss": 6.1678, - "step": 2819 - }, - { - "epoch": 1.470664928292047, - "grad_norm": 1.8323644399642944, - "learning_rate": 9.766934673366834e-05, - "loss": 5.7754, - "step": 2820 - }, - { - "epoch": 1.471186440677966, - "grad_norm": 1.4659833908081055, - "learning_rate": 9.766834170854271e-05, - "loss": 6.2661, - "step": 2821 - }, - { - "epoch": 1.4717079530638852, - "grad_norm": 1.4832403659820557, - "learning_rate": 9.766733668341709e-05, - "loss": 6.2244, - "step": 2822 - }, - { - "epoch": 1.4722294654498045, - "grad_norm": 1.5771267414093018, - "learning_rate": 9.766633165829147e-05, - "loss": 6.141, - "step": 2823 - }, - { - "epoch": 1.4727509778357235, - "grad_norm": 1.6609662771224976, - "learning_rate": 9.766532663316583e-05, - "loss": 6.0953, - "step": 2824 - }, - { - "epoch": 1.4732724902216427, - "grad_norm": 1.6120318174362183, - "learning_rate": 9.766432160804021e-05, - "loss": 5.3603, - "step": 2825 - }, - { - "epoch": 1.473794002607562, - "grad_norm": 1.627646565437317, - "learning_rate": 9.766331658291458e-05, - "loss": 5.9943, - "step": 2826 - }, - { - "epoch": 1.474315514993481, - "grad_norm": 1.608627200126648, - "learning_rate": 9.766231155778895e-05, - "loss": 5.4972, - "step": 2827 - }, - { - "epoch": 1.4748370273794003, - "grad_norm": 1.4947052001953125, - "learning_rate": 9.766130653266332e-05, - "loss": 6.052, - "step": 2828 - }, - { - "epoch": 1.4753585397653195, - "grad_norm": 1.654911756515503, - "learning_rate": 9.766030150753768e-05, - "loss": 5.8259, - "step": 2829 - }, - { - "epoch": 1.4758800521512385, - "grad_norm": 1.4821324348449707, - "learning_rate": 9.765929648241206e-05, - "loss": 6.0947, - "step": 2830 - }, - { - "epoch": 1.4764015645371578, - "grad_norm": 1.5970264673233032, - "learning_rate": 9.765829145728644e-05, - "loss": 5.8676, - "step": 2831 - }, - { - "epoch": 1.476923076923077, - "grad_norm": 1.470488429069519, - "learning_rate": 9.765728643216082e-05, - "loss": 6.0662, - "step": 2832 - }, - { - "epoch": 1.477444589308996, - "grad_norm": 1.6056243181228638, - "learning_rate": 9.765628140703518e-05, - "loss": 6.0034, - "step": 2833 - }, - { - "epoch": 1.4779661016949153, - "grad_norm": 1.5965206623077393, - "learning_rate": 9.765527638190956e-05, - "loss": 6.1443, - "step": 2834 - }, - { - "epoch": 1.4784876140808345, - "grad_norm": 1.5480287075042725, - "learning_rate": 9.765427135678392e-05, - "loss": 6.2511, - "step": 2835 - }, - { - "epoch": 1.4790091264667535, - "grad_norm": 1.6252895593643188, - "learning_rate": 9.76532663316583e-05, - "loss": 5.8563, - "step": 2836 - }, - { - "epoch": 1.4795306388526728, - "grad_norm": 1.6706434488296509, - "learning_rate": 9.765226130653267e-05, - "loss": 5.8372, - "step": 2837 - }, - { - "epoch": 1.480052151238592, - "grad_norm": 1.8390557765960693, - "learning_rate": 9.765125628140704e-05, - "loss": 6.1495, - "step": 2838 - }, - { - "epoch": 1.480573663624511, - "grad_norm": 1.649429202079773, - "learning_rate": 9.765025125628141e-05, - "loss": 6.1639, - "step": 2839 - }, - { - "epoch": 1.4810951760104303, - "grad_norm": 1.5332372188568115, - "learning_rate": 9.764924623115579e-05, - "loss": 6.1565, - "step": 2840 - }, - { - "epoch": 1.4816166883963495, - "grad_norm": 1.7449291944503784, - "learning_rate": 9.764824120603015e-05, - "loss": 5.7117, - "step": 2841 - }, - { - "epoch": 1.4821382007822685, - "grad_norm": 1.5824756622314453, - "learning_rate": 9.764723618090453e-05, - "loss": 6.0968, - "step": 2842 - }, - { - "epoch": 1.4826597131681878, - "grad_norm": 1.5848503112792969, - "learning_rate": 9.76462311557789e-05, - "loss": 6.3909, - "step": 2843 - }, - { - "epoch": 1.483181225554107, - "grad_norm": 1.5621473789215088, - "learning_rate": 9.764522613065327e-05, - "loss": 6.2478, - "step": 2844 - }, - { - "epoch": 1.483702737940026, - "grad_norm": 2.170722007751465, - "learning_rate": 9.764422110552765e-05, - "loss": 5.9168, - "step": 2845 - }, - { - "epoch": 1.4842242503259453, - "grad_norm": 1.6070153713226318, - "learning_rate": 9.764321608040201e-05, - "loss": 6.374, - "step": 2846 - }, - { - "epoch": 1.4847457627118645, - "grad_norm": 1.587161898612976, - "learning_rate": 9.764221105527639e-05, - "loss": 6.1393, - "step": 2847 - }, - { - "epoch": 1.4852672750977836, - "grad_norm": 1.6479389667510986, - "learning_rate": 9.764120603015075e-05, - "loss": 6.1762, - "step": 2848 - }, - { - "epoch": 1.4857887874837028, - "grad_norm": 1.7014250755310059, - "learning_rate": 9.764020100502513e-05, - "loss": 5.653, - "step": 2849 - }, - { - "epoch": 1.4863102998696218, - "grad_norm": 2.1229970455169678, - "learning_rate": 9.76391959798995e-05, - "loss": 5.3146, - "step": 2850 - }, - { - "epoch": 1.486831812255541, - "grad_norm": 1.7179540395736694, - "learning_rate": 9.763819095477387e-05, - "loss": 6.4392, - "step": 2851 - }, - { - "epoch": 1.4873533246414603, - "grad_norm": 1.461183786392212, - "learning_rate": 9.763718592964825e-05, - "loss": 6.4847, - "step": 2852 - }, - { - "epoch": 1.4878748370273793, - "grad_norm": 1.5658162832260132, - "learning_rate": 9.763618090452263e-05, - "loss": 6.114, - "step": 2853 - }, - { - "epoch": 1.4883963494132986, - "grad_norm": 2.1612656116485596, - "learning_rate": 9.7635175879397e-05, - "loss": 5.9358, - "step": 2854 - }, - { - "epoch": 1.4889178617992178, - "grad_norm": 1.7783305644989014, - "learning_rate": 9.763417085427136e-05, - "loss": 5.9594, - "step": 2855 - }, - { - "epoch": 1.4894393741851368, - "grad_norm": 2.0530593395233154, - "learning_rate": 9.763316582914574e-05, - "loss": 5.9313, - "step": 2856 - }, - { - "epoch": 1.489960886571056, - "grad_norm": 1.7588746547698975, - "learning_rate": 9.76321608040201e-05, - "loss": 5.927, - "step": 2857 - }, - { - "epoch": 1.490482398956975, - "grad_norm": 1.7404680252075195, - "learning_rate": 9.763115577889448e-05, - "loss": 6.4275, - "step": 2858 - }, - { - "epoch": 1.4910039113428943, - "grad_norm": 1.607073187828064, - "learning_rate": 9.763015075376884e-05, - "loss": 6.0119, - "step": 2859 - }, - { - "epoch": 1.4915254237288136, - "grad_norm": 1.7869770526885986, - "learning_rate": 9.762914572864322e-05, - "loss": 5.8664, - "step": 2860 - }, - { - "epoch": 1.4920469361147326, - "grad_norm": 1.5147517919540405, - "learning_rate": 9.762814070351758e-05, - "loss": 6.2189, - "step": 2861 - }, - { - "epoch": 1.4925684485006518, - "grad_norm": 1.593854546546936, - "learning_rate": 9.762713567839196e-05, - "loss": 5.8206, - "step": 2862 - }, - { - "epoch": 1.493089960886571, - "grad_norm": 1.5516552925109863, - "learning_rate": 9.762613065326634e-05, - "loss": 5.8553, - "step": 2863 - }, - { - "epoch": 1.49361147327249, - "grad_norm": 1.632185935974121, - "learning_rate": 9.762512562814072e-05, - "loss": 5.8157, - "step": 2864 - }, - { - "epoch": 1.4941329856584094, - "grad_norm": 1.693359136581421, - "learning_rate": 9.762412060301508e-05, - "loss": 5.7293, - "step": 2865 - }, - { - "epoch": 1.4946544980443286, - "grad_norm": 1.6333162784576416, - "learning_rate": 9.762311557788946e-05, - "loss": 5.8918, - "step": 2866 - }, - { - "epoch": 1.4951760104302476, - "grad_norm": 1.72529137134552, - "learning_rate": 9.762211055276382e-05, - "loss": 5.5389, - "step": 2867 - }, - { - "epoch": 1.4956975228161669, - "grad_norm": 1.593548059463501, - "learning_rate": 9.762110552763819e-05, - "loss": 6.1077, - "step": 2868 - }, - { - "epoch": 1.496219035202086, - "grad_norm": 1.6674294471740723, - "learning_rate": 9.762010050251257e-05, - "loss": 6.1928, - "step": 2869 - }, - { - "epoch": 1.4967405475880051, - "grad_norm": 2.0850799083709717, - "learning_rate": 9.761909547738693e-05, - "loss": 5.5981, - "step": 2870 - }, - { - "epoch": 1.4972620599739244, - "grad_norm": 1.980785608291626, - "learning_rate": 9.761809045226131e-05, - "loss": 5.4568, - "step": 2871 - }, - { - "epoch": 1.4977835723598436, - "grad_norm": 1.558742642402649, - "learning_rate": 9.761708542713569e-05, - "loss": 6.1087, - "step": 2872 - }, - { - "epoch": 1.4983050847457626, - "grad_norm": 1.6420050859451294, - "learning_rate": 9.761608040201006e-05, - "loss": 5.937, - "step": 2873 - }, - { - "epoch": 1.4988265971316819, - "grad_norm": 1.5581105947494507, - "learning_rate": 9.761507537688443e-05, - "loss": 5.9875, - "step": 2874 - }, - { - "epoch": 1.4993481095176011, - "grad_norm": 1.6250531673431396, - "learning_rate": 9.76140703517588e-05, - "loss": 6.2148, - "step": 2875 - }, - { - "epoch": 1.4998696219035201, - "grad_norm": 1.7030606269836426, - "learning_rate": 9.761306532663317e-05, - "loss": 5.8734, - "step": 2876 - }, - { - "epoch": 1.5003911342894394, - "grad_norm": 1.5861607789993286, - "learning_rate": 9.761206030150755e-05, - "loss": 6.0202, - "step": 2877 - }, - { - "epoch": 1.5009126466753586, - "grad_norm": 1.554383397102356, - "learning_rate": 9.761105527638191e-05, - "loss": 6.1231, - "step": 2878 - }, - { - "epoch": 1.5014341590612776, - "grad_norm": 1.6653318405151367, - "learning_rate": 9.761005025125629e-05, - "loss": 5.8554, - "step": 2879 - }, - { - "epoch": 1.5019556714471969, - "grad_norm": 1.762291669845581, - "learning_rate": 9.760904522613065e-05, - "loss": 6.0767, - "step": 2880 - }, - { - "epoch": 1.5024771838331161, - "grad_norm": 1.4581154584884644, - "learning_rate": 9.760804020100502e-05, - "loss": 6.263, - "step": 2881 - }, - { - "epoch": 1.5029986962190351, - "grad_norm": 1.7813777923583984, - "learning_rate": 9.76070351758794e-05, - "loss": 5.5883, - "step": 2882 - }, - { - "epoch": 1.5035202086049544, - "grad_norm": 1.5640543699264526, - "learning_rate": 9.760603015075377e-05, - "loss": 5.9936, - "step": 2883 - }, - { - "epoch": 1.5040417209908736, - "grad_norm": 1.7181663513183594, - "learning_rate": 9.760502512562815e-05, - "loss": 5.5832, - "step": 2884 - }, - { - "epoch": 1.5045632333767927, - "grad_norm": 1.6281917095184326, - "learning_rate": 9.760402010050252e-05, - "loss": 5.8035, - "step": 2885 - }, - { - "epoch": 1.505084745762712, - "grad_norm": 1.650791883468628, - "learning_rate": 9.76030150753769e-05, - "loss": 5.8989, - "step": 2886 - }, - { - "epoch": 1.5056062581486311, - "grad_norm": 1.5169929265975952, - "learning_rate": 9.760201005025126e-05, - "loss": 6.0039, - "step": 2887 - }, - { - "epoch": 1.5061277705345502, - "grad_norm": 1.5851532220840454, - "learning_rate": 9.760100502512564e-05, - "loss": 6.2984, - "step": 2888 - }, - { - "epoch": 1.5066492829204694, - "grad_norm": 1.801254153251648, - "learning_rate": 9.76e-05, - "loss": 5.8432, - "step": 2889 - }, - { - "epoch": 1.5071707953063886, - "grad_norm": 2.037017583847046, - "learning_rate": 9.759899497487438e-05, - "loss": 5.7578, - "step": 2890 - }, - { - "epoch": 1.5076923076923077, - "grad_norm": 1.8728010654449463, - "learning_rate": 9.759798994974874e-05, - "loss": 5.7716, - "step": 2891 - }, - { - "epoch": 1.508213820078227, - "grad_norm": 1.9070972204208374, - "learning_rate": 9.759698492462312e-05, - "loss": 6.0053, - "step": 2892 - }, - { - "epoch": 1.5087353324641462, - "grad_norm": 1.8258485794067383, - "learning_rate": 9.75959798994975e-05, - "loss": 5.7379, - "step": 2893 - }, - { - "epoch": 1.5092568448500652, - "grad_norm": 1.6042457818984985, - "learning_rate": 9.759497487437188e-05, - "loss": 6.3639, - "step": 2894 - }, - { - "epoch": 1.5097783572359842, - "grad_norm": 1.659169316291809, - "learning_rate": 9.759396984924624e-05, - "loss": 5.841, - "step": 2895 - }, - { - "epoch": 1.5102998696219037, - "grad_norm": 1.4794549942016602, - "learning_rate": 9.75929648241206e-05, - "loss": 6.0871, - "step": 2896 - }, - { - "epoch": 1.5108213820078227, - "grad_norm": 1.6386024951934814, - "learning_rate": 9.759195979899498e-05, - "loss": 6.0702, - "step": 2897 - }, - { - "epoch": 1.5113428943937417, - "grad_norm": 2.325228214263916, - "learning_rate": 9.759095477386935e-05, - "loss": 5.6941, - "step": 2898 - }, - { - "epoch": 1.5118644067796612, - "grad_norm": 1.6784334182739258, - "learning_rate": 9.758994974874372e-05, - "loss": 5.9596, - "step": 2899 - }, - { - "epoch": 1.5123859191655802, - "grad_norm": 1.5968056917190552, - "learning_rate": 9.758894472361809e-05, - "loss": 5.8579, - "step": 2900 - }, - { - "epoch": 1.5129074315514992, - "grad_norm": 1.658576488494873, - "learning_rate": 9.758793969849247e-05, - "loss": 6.2242, - "step": 2901 - }, - { - "epoch": 1.5134289439374187, - "grad_norm": 1.8618217706680298, - "learning_rate": 9.758693467336683e-05, - "loss": 5.6366, - "step": 2902 - }, - { - "epoch": 1.5139504563233377, - "grad_norm": 1.681307315826416, - "learning_rate": 9.758592964824121e-05, - "loss": 6.0446, - "step": 2903 - }, - { - "epoch": 1.5144719687092567, - "grad_norm": 1.5531169176101685, - "learning_rate": 9.758492462311559e-05, - "loss": 6.3236, - "step": 2904 - }, - { - "epoch": 1.5149934810951762, - "grad_norm": 1.6814061403274536, - "learning_rate": 9.758391959798996e-05, - "loss": 5.9854, - "step": 2905 - }, - { - "epoch": 1.5155149934810952, - "grad_norm": 1.6177403926849365, - "learning_rate": 9.758291457286433e-05, - "loss": 5.8279, - "step": 2906 - }, - { - "epoch": 1.5160365058670142, - "grad_norm": 1.6217435598373413, - "learning_rate": 9.75819095477387e-05, - "loss": 5.7753, - "step": 2907 - }, - { - "epoch": 1.5165580182529335, - "grad_norm": 1.6294753551483154, - "learning_rate": 9.758090452261307e-05, - "loss": 5.8529, - "step": 2908 - }, - { - "epoch": 1.5170795306388527, - "grad_norm": 1.7994980812072754, - "learning_rate": 9.757989949748744e-05, - "loss": 6.033, - "step": 2909 - }, - { - "epoch": 1.5176010430247717, - "grad_norm": 1.6275047063827515, - "learning_rate": 9.757889447236181e-05, - "loss": 6.2808, - "step": 2910 - }, - { - "epoch": 1.518122555410691, - "grad_norm": 1.6710172891616821, - "learning_rate": 9.757788944723618e-05, - "loss": 5.7843, - "step": 2911 - }, - { - "epoch": 1.5186440677966102, - "grad_norm": 1.6221249103546143, - "learning_rate": 9.757688442211056e-05, - "loss": 6.0414, - "step": 2912 - }, - { - "epoch": 1.5191655801825292, - "grad_norm": 1.5152809619903564, - "learning_rate": 9.757587939698492e-05, - "loss": 6.0427, - "step": 2913 - }, - { - "epoch": 1.5196870925684485, - "grad_norm": 1.7017035484313965, - "learning_rate": 9.75748743718593e-05, - "loss": 5.9066, - "step": 2914 - }, - { - "epoch": 1.5202086049543677, - "grad_norm": 1.6341040134429932, - "learning_rate": 9.757386934673368e-05, - "loss": 5.5702, - "step": 2915 - }, - { - "epoch": 1.5207301173402867, - "grad_norm": 1.7104073762893677, - "learning_rate": 9.757286432160805e-05, - "loss": 5.8711, - "step": 2916 - }, - { - "epoch": 1.521251629726206, - "grad_norm": 1.750266194343567, - "learning_rate": 9.757185929648242e-05, - "loss": 5.5838, - "step": 2917 - }, - { - "epoch": 1.5217731421121252, - "grad_norm": 1.659591555595398, - "learning_rate": 9.75708542713568e-05, - "loss": 5.9391, - "step": 2918 - }, - { - "epoch": 1.5222946544980442, - "grad_norm": 1.646873116493225, - "learning_rate": 9.756984924623116e-05, - "loss": 5.9978, - "step": 2919 - }, - { - "epoch": 1.5228161668839635, - "grad_norm": 1.7272626161575317, - "learning_rate": 9.756884422110554e-05, - "loss": 5.5819, - "step": 2920 - }, - { - "epoch": 1.5233376792698827, - "grad_norm": 1.7887064218521118, - "learning_rate": 9.75678391959799e-05, - "loss": 5.846, - "step": 2921 - }, - { - "epoch": 1.5238591916558017, - "grad_norm": 1.9516392946243286, - "learning_rate": 9.756683417085427e-05, - "loss": 6.3186, - "step": 2922 - }, - { - "epoch": 1.524380704041721, - "grad_norm": 2.01957368850708, - "learning_rate": 9.756582914572864e-05, - "loss": 5.7755, - "step": 2923 - }, - { - "epoch": 1.5249022164276402, - "grad_norm": 1.7032673358917236, - "learning_rate": 9.756482412060302e-05, - "loss": 5.8643, - "step": 2924 - }, - { - "epoch": 1.5254237288135593, - "grad_norm": 1.404292106628418, - "learning_rate": 9.75638190954774e-05, - "loss": 6.3486, - "step": 2925 - }, - { - "epoch": 1.5259452411994785, - "grad_norm": 1.519274115562439, - "learning_rate": 9.756281407035176e-05, - "loss": 6.1759, - "step": 2926 - }, - { - "epoch": 1.5264667535853977, - "grad_norm": 1.6258536577224731, - "learning_rate": 9.756180904522614e-05, - "loss": 6.0833, - "step": 2927 - }, - { - "epoch": 1.5269882659713168, - "grad_norm": 1.7093464136123657, - "learning_rate": 9.75608040201005e-05, - "loss": 6.1114, - "step": 2928 - }, - { - "epoch": 1.527509778357236, - "grad_norm": 1.6579821109771729, - "learning_rate": 9.755979899497488e-05, - "loss": 5.83, - "step": 2929 - }, - { - "epoch": 1.5280312907431552, - "grad_norm": 1.6382776498794556, - "learning_rate": 9.755879396984925e-05, - "loss": 5.8508, - "step": 2930 - }, - { - "epoch": 1.5285528031290743, - "grad_norm": 1.6001276969909668, - "learning_rate": 9.755778894472363e-05, - "loss": 5.9308, - "step": 2931 - }, - { - "epoch": 1.5290743155149935, - "grad_norm": 1.5809142589569092, - "learning_rate": 9.755678391959799e-05, - "loss": 6.1609, - "step": 2932 - }, - { - "epoch": 1.5295958279009128, - "grad_norm": 1.8534437417984009, - "learning_rate": 9.755577889447237e-05, - "loss": 5.8663, - "step": 2933 - }, - { - "epoch": 1.5301173402868318, - "grad_norm": 1.6129528284072876, - "learning_rate": 9.755477386934673e-05, - "loss": 6.2429, - "step": 2934 - }, - { - "epoch": 1.530638852672751, - "grad_norm": 1.6421903371810913, - "learning_rate": 9.755376884422111e-05, - "loss": 5.9604, - "step": 2935 - }, - { - "epoch": 1.5311603650586703, - "grad_norm": 1.5983402729034424, - "learning_rate": 9.755276381909549e-05, - "loss": 5.666, - "step": 2936 - }, - { - "epoch": 1.5316818774445893, - "grad_norm": 1.7107360363006592, - "learning_rate": 9.755175879396985e-05, - "loss": 5.7312, - "step": 2937 - }, - { - "epoch": 1.5322033898305085, - "grad_norm": 1.8987083435058594, - "learning_rate": 9.755075376884423e-05, - "loss": 5.5693, - "step": 2938 - }, - { - "epoch": 1.5327249022164278, - "grad_norm": 1.514696478843689, - "learning_rate": 9.75497487437186e-05, - "loss": 6.2017, - "step": 2939 - }, - { - "epoch": 1.5332464146023468, - "grad_norm": 1.960425615310669, - "learning_rate": 9.754874371859297e-05, - "loss": 6.2485, - "step": 2940 - }, - { - "epoch": 1.5337679269882658, - "grad_norm": 1.7381919622421265, - "learning_rate": 9.754773869346734e-05, - "loss": 6.1301, - "step": 2941 - }, - { - "epoch": 1.5342894393741853, - "grad_norm": 1.4492655992507935, - "learning_rate": 9.754673366834171e-05, - "loss": 6.1159, - "step": 2942 - }, - { - "epoch": 1.5348109517601043, - "grad_norm": 1.6336946487426758, - "learning_rate": 9.754572864321608e-05, - "loss": 6.1576, - "step": 2943 - }, - { - "epoch": 1.5353324641460233, - "grad_norm": 1.7284420728683472, - "learning_rate": 9.754472361809046e-05, - "loss": 6.1785, - "step": 2944 - }, - { - "epoch": 1.5358539765319428, - "grad_norm": 1.6475152969360352, - "learning_rate": 9.754371859296483e-05, - "loss": 6.2428, - "step": 2945 - }, - { - "epoch": 1.5363754889178618, - "grad_norm": 1.6339117288589478, - "learning_rate": 9.754271356783921e-05, - "loss": 6.153, - "step": 2946 - }, - { - "epoch": 1.5368970013037808, - "grad_norm": 1.715817928314209, - "learning_rate": 9.754170854271358e-05, - "loss": 5.9706, - "step": 2947 - }, - { - "epoch": 1.5374185136897003, - "grad_norm": 1.686881184577942, - "learning_rate": 9.754070351758794e-05, - "loss": 6.1089, - "step": 2948 - }, - { - "epoch": 1.5379400260756193, - "grad_norm": 1.6033015251159668, - "learning_rate": 9.753969849246232e-05, - "loss": 6.0584, - "step": 2949 - }, - { - "epoch": 1.5384615384615383, - "grad_norm": 1.640824556350708, - "learning_rate": 9.753869346733668e-05, - "loss": 6.1831, - "step": 2950 - }, - { - "epoch": 1.5389830508474578, - "grad_norm": 1.4297370910644531, - "learning_rate": 9.753768844221106e-05, - "loss": 5.9394, - "step": 2951 - }, - { - "epoch": 1.5395045632333768, - "grad_norm": 1.4852330684661865, - "learning_rate": 9.753668341708542e-05, - "loss": 6.3877, - "step": 2952 - }, - { - "epoch": 1.5400260756192958, - "grad_norm": 1.996718168258667, - "learning_rate": 9.75356783919598e-05, - "loss": 5.4326, - "step": 2953 - }, - { - "epoch": 1.540547588005215, - "grad_norm": 1.5923837423324585, - "learning_rate": 9.753467336683417e-05, - "loss": 6.1394, - "step": 2954 - }, - { - "epoch": 1.5410691003911343, - "grad_norm": 1.5062754154205322, - "learning_rate": 9.753366834170854e-05, - "loss": 6.1537, - "step": 2955 - }, - { - "epoch": 1.5415906127770533, - "grad_norm": 1.5277647972106934, - "learning_rate": 9.753266331658292e-05, - "loss": 5.9143, - "step": 2956 - }, - { - "epoch": 1.5421121251629726, - "grad_norm": 1.3999603986740112, - "learning_rate": 9.75316582914573e-05, - "loss": 6.2244, - "step": 2957 - }, - { - "epoch": 1.5426336375488918, - "grad_norm": 1.6692534685134888, - "learning_rate": 9.753065326633166e-05, - "loss": 6.0189, - "step": 2958 - }, - { - "epoch": 1.5431551499348108, - "grad_norm": 1.6099843978881836, - "learning_rate": 9.752964824120604e-05, - "loss": 6.044, - "step": 2959 - }, - { - "epoch": 1.54367666232073, - "grad_norm": 1.4609355926513672, - "learning_rate": 9.75286432160804e-05, - "loss": 5.9263, - "step": 2960 - }, - { - "epoch": 1.5441981747066493, - "grad_norm": 1.9037601947784424, - "learning_rate": 9.752763819095477e-05, - "loss": 5.5976, - "step": 2961 - }, - { - "epoch": 1.5447196870925683, - "grad_norm": 1.6415376663208008, - "learning_rate": 9.752663316582915e-05, - "loss": 5.9765, - "step": 2962 - }, - { - "epoch": 1.5452411994784876, - "grad_norm": 1.714272141456604, - "learning_rate": 9.752562814070351e-05, - "loss": 5.4352, - "step": 2963 - }, - { - "epoch": 1.5457627118644068, - "grad_norm": 1.7286732196807861, - "learning_rate": 9.752462311557789e-05, - "loss": 5.9192, - "step": 2964 - }, - { - "epoch": 1.5462842242503259, - "grad_norm": 1.4834184646606445, - "learning_rate": 9.752361809045227e-05, - "loss": 6.2592, - "step": 2965 - }, - { - "epoch": 1.546805736636245, - "grad_norm": 1.61598539352417, - "learning_rate": 9.752261306532665e-05, - "loss": 5.8179, - "step": 2966 - }, - { - "epoch": 1.5473272490221643, - "grad_norm": 1.7281643152236938, - "learning_rate": 9.752160804020101e-05, - "loss": 6.0512, - "step": 2967 - }, - { - "epoch": 1.5478487614080834, - "grad_norm": 1.6342471837997437, - "learning_rate": 9.752060301507539e-05, - "loss": 6.0184, - "step": 2968 - }, - { - "epoch": 1.5483702737940026, - "grad_norm": 1.7903777360916138, - "learning_rate": 9.751959798994975e-05, - "loss": 5.3036, - "step": 2969 - }, - { - "epoch": 1.5488917861799218, - "grad_norm": 1.9066871404647827, - "learning_rate": 9.751859296482413e-05, - "loss": 5.4797, - "step": 2970 - }, - { - "epoch": 1.5494132985658409, - "grad_norm": 1.6245708465576172, - "learning_rate": 9.75175879396985e-05, - "loss": 6.0497, - "step": 2971 - }, - { - "epoch": 1.5499348109517601, - "grad_norm": 1.5795633792877197, - "learning_rate": 9.751658291457287e-05, - "loss": 5.8559, - "step": 2972 - }, - { - "epoch": 1.5504563233376794, - "grad_norm": 1.4975231885910034, - "learning_rate": 9.751557788944724e-05, - "loss": 6.3377, - "step": 2973 - }, - { - "epoch": 1.5509778357235984, - "grad_norm": 1.8085834980010986, - "learning_rate": 9.75145728643216e-05, - "loss": 5.9806, - "step": 2974 - }, - { - "epoch": 1.5514993481095176, - "grad_norm": 1.5248533487319946, - "learning_rate": 9.751356783919598e-05, - "loss": 6.3091, - "step": 2975 - }, - { - "epoch": 1.5520208604954369, - "grad_norm": 2.4740777015686035, - "learning_rate": 9.751256281407036e-05, - "loss": 5.5885, - "step": 2976 - }, - { - "epoch": 1.5525423728813559, - "grad_norm": 1.3745386600494385, - "learning_rate": 9.751155778894473e-05, - "loss": 6.216, - "step": 2977 - }, - { - "epoch": 1.5530638852672751, - "grad_norm": 1.5964089632034302, - "learning_rate": 9.75105527638191e-05, - "loss": 5.419, - "step": 2978 - }, - { - "epoch": 1.5535853976531944, - "grad_norm": 1.5235254764556885, - "learning_rate": 9.750954773869348e-05, - "loss": 6.1601, - "step": 2979 - }, - { - "epoch": 1.5541069100391134, - "grad_norm": 1.5818287134170532, - "learning_rate": 9.750854271356784e-05, - "loss": 5.849, - "step": 2980 - }, - { - "epoch": 1.5546284224250326, - "grad_norm": 1.5845973491668701, - "learning_rate": 9.750753768844222e-05, - "loss": 6.0963, - "step": 2981 - }, - { - "epoch": 1.5551499348109519, - "grad_norm": 1.655224084854126, - "learning_rate": 9.750653266331658e-05, - "loss": 5.596, - "step": 2982 - }, - { - "epoch": 1.555671447196871, - "grad_norm": 1.7107782363891602, - "learning_rate": 9.750552763819096e-05, - "loss": 5.8844, - "step": 2983 - }, - { - "epoch": 1.5561929595827901, - "grad_norm": 1.5279433727264404, - "learning_rate": 9.750452261306533e-05, - "loss": 5.7067, - "step": 2984 - }, - { - "epoch": 1.5567144719687094, - "grad_norm": 1.6591229438781738, - "learning_rate": 9.75035175879397e-05, - "loss": 6.0565, - "step": 2985 - }, - { - "epoch": 1.5572359843546284, - "grad_norm": 1.5764596462249756, - "learning_rate": 9.750251256281408e-05, - "loss": 5.9206, - "step": 2986 - }, - { - "epoch": 1.5577574967405476, - "grad_norm": 1.5098923444747925, - "learning_rate": 9.750150753768846e-05, - "loss": 6.392, - "step": 2987 - }, - { - "epoch": 1.5582790091264669, - "grad_norm": 1.9125800132751465, - "learning_rate": 9.750050251256282e-05, - "loss": 5.6939, - "step": 2988 - }, - { - "epoch": 1.558800521512386, - "grad_norm": 1.614362120628357, - "learning_rate": 9.749949748743719e-05, - "loss": 5.2786, - "step": 2989 - }, - { - "epoch": 1.559322033898305, - "grad_norm": 1.8829795122146606, - "learning_rate": 9.749849246231156e-05, - "loss": 5.7584, - "step": 2990 - }, - { - "epoch": 1.5598435462842244, - "grad_norm": 1.995069146156311, - "learning_rate": 9.749748743718593e-05, - "loss": 5.7102, - "step": 2991 - }, - { - "epoch": 1.5603650586701434, - "grad_norm": 1.6811987161636353, - "learning_rate": 9.749648241206031e-05, - "loss": 6.2956, - "step": 2992 - }, - { - "epoch": 1.5608865710560624, - "grad_norm": 1.592775583267212, - "learning_rate": 9.749547738693467e-05, - "loss": 6.1885, - "step": 2993 - }, - { - "epoch": 1.561408083441982, - "grad_norm": 2.03885555267334, - "learning_rate": 9.749447236180905e-05, - "loss": 5.5371, - "step": 2994 - }, - { - "epoch": 1.561929595827901, - "grad_norm": 1.621597409248352, - "learning_rate": 9.749346733668341e-05, - "loss": 5.6533, - "step": 2995 - }, - { - "epoch": 1.56245110821382, - "grad_norm": 1.7323951721191406, - "learning_rate": 9.749246231155779e-05, - "loss": 6.096, - "step": 2996 - }, - { - "epoch": 1.5629726205997394, - "grad_norm": 1.7036476135253906, - "learning_rate": 9.749145728643217e-05, - "loss": 5.7614, - "step": 2997 - }, - { - "epoch": 1.5634941329856584, - "grad_norm": 1.639918565750122, - "learning_rate": 9.749045226130655e-05, - "loss": 5.8986, - "step": 2998 - }, - { - "epoch": 1.5640156453715774, - "grad_norm": 1.728251338005066, - "learning_rate": 9.748944723618091e-05, - "loss": 6.0599, - "step": 2999 - }, - { - "epoch": 1.5645371577574967, - "grad_norm": 1.6250648498535156, - "learning_rate": 9.748844221105529e-05, - "loss": 6.2399, - "step": 3000 - }, - { - "epoch": 1.565058670143416, - "grad_norm": 2.3469226360321045, - "learning_rate": 9.748743718592965e-05, - "loss": 5.2829, - "step": 3001 - }, - { - "epoch": 1.565580182529335, - "grad_norm": 1.505710482597351, - "learning_rate": 9.748643216080402e-05, - "loss": 6.2257, - "step": 3002 - }, - { - "epoch": 1.5661016949152542, - "grad_norm": 1.5264219045639038, - "learning_rate": 9.74854271356784e-05, - "loss": 5.8644, - "step": 3003 - }, - { - "epoch": 1.5666232073011734, - "grad_norm": 1.6579408645629883, - "learning_rate": 9.748442211055276e-05, - "loss": 5.711, - "step": 3004 - }, - { - "epoch": 1.5671447196870925, - "grad_norm": 1.549682855606079, - "learning_rate": 9.748341708542714e-05, - "loss": 6.0775, - "step": 3005 - }, - { - "epoch": 1.5676662320730117, - "grad_norm": 1.4245432615280151, - "learning_rate": 9.748241206030152e-05, - "loss": 6.2478, - "step": 3006 - }, - { - "epoch": 1.568187744458931, - "grad_norm": 1.4779332876205444, - "learning_rate": 9.748140703517589e-05, - "loss": 6.167, - "step": 3007 - }, - { - "epoch": 1.56870925684485, - "grad_norm": 1.5079220533370972, - "learning_rate": 9.748040201005026e-05, - "loss": 6.2643, - "step": 3008 - }, - { - "epoch": 1.5692307692307692, - "grad_norm": 1.641533613204956, - "learning_rate": 9.747939698492464e-05, - "loss": 6.1743, - "step": 3009 - }, - { - "epoch": 1.5697522816166884, - "grad_norm": 1.50382661819458, - "learning_rate": 9.7478391959799e-05, - "loss": 6.3332, - "step": 3010 - }, - { - "epoch": 1.5702737940026075, - "grad_norm": 1.461124062538147, - "learning_rate": 9.747738693467338e-05, - "loss": 6.0997, - "step": 3011 - }, - { - "epoch": 1.5707953063885267, - "grad_norm": 1.540845274925232, - "learning_rate": 9.747638190954774e-05, - "loss": 6.2184, - "step": 3012 - }, - { - "epoch": 1.571316818774446, - "grad_norm": 1.6550092697143555, - "learning_rate": 9.747537688442212e-05, - "loss": 6.1339, - "step": 3013 - }, - { - "epoch": 1.571838331160365, - "grad_norm": 1.4923017024993896, - "learning_rate": 9.747437185929648e-05, - "loss": 6.4135, - "step": 3014 - }, - { - "epoch": 1.5723598435462842, - "grad_norm": 1.6147205829620361, - "learning_rate": 9.747336683417085e-05, - "loss": 6.1525, - "step": 3015 - }, - { - "epoch": 1.5728813559322035, - "grad_norm": 1.5925735235214233, - "learning_rate": 9.747236180904523e-05, - "loss": 6.1855, - "step": 3016 - }, - { - "epoch": 1.5734028683181225, - "grad_norm": 1.4535584449768066, - "learning_rate": 9.74713567839196e-05, - "loss": 6.3462, - "step": 3017 - }, - { - "epoch": 1.5739243807040417, - "grad_norm": 1.6119768619537354, - "learning_rate": 9.747035175879398e-05, - "loss": 6.2605, - "step": 3018 - }, - { - "epoch": 1.574445893089961, - "grad_norm": 1.7026129961013794, - "learning_rate": 9.746934673366835e-05, - "loss": 5.8512, - "step": 3019 - }, - { - "epoch": 1.57496740547588, - "grad_norm": 1.547124981880188, - "learning_rate": 9.746834170854272e-05, - "loss": 5.981, - "step": 3020 - }, - { - "epoch": 1.5754889178617992, - "grad_norm": 1.8274706602096558, - "learning_rate": 9.746733668341709e-05, - "loss": 5.6709, - "step": 3021 - }, - { - "epoch": 1.5760104302477185, - "grad_norm": 1.559336543083191, - "learning_rate": 9.746633165829147e-05, - "loss": 6.1293, - "step": 3022 - }, - { - "epoch": 1.5765319426336375, - "grad_norm": 1.7963043451309204, - "learning_rate": 9.746532663316583e-05, - "loss": 6.0766, - "step": 3023 - }, - { - "epoch": 1.5770534550195567, - "grad_norm": 1.5406216382980347, - "learning_rate": 9.746432160804021e-05, - "loss": 5.9616, - "step": 3024 - }, - { - "epoch": 1.577574967405476, - "grad_norm": 1.4781417846679688, - "learning_rate": 9.746331658291457e-05, - "loss": 5.9903, - "step": 3025 - }, - { - "epoch": 1.578096479791395, - "grad_norm": 1.6290030479431152, - "learning_rate": 9.746231155778895e-05, - "loss": 6.0626, - "step": 3026 - }, - { - "epoch": 1.5786179921773142, - "grad_norm": 1.7072821855545044, - "learning_rate": 9.746130653266333e-05, - "loss": 6.2445, - "step": 3027 - }, - { - "epoch": 1.5791395045632335, - "grad_norm": 1.7702466249465942, - "learning_rate": 9.746030150753769e-05, - "loss": 5.8532, - "step": 3028 - }, - { - "epoch": 1.5796610169491525, - "grad_norm": 1.4710116386413574, - "learning_rate": 9.745929648241207e-05, - "loss": 5.9021, - "step": 3029 - }, - { - "epoch": 1.5801825293350718, - "grad_norm": 1.680172324180603, - "learning_rate": 9.745829145728643e-05, - "loss": 5.7276, - "step": 3030 - }, - { - "epoch": 1.580704041720991, - "grad_norm": 1.5179733037948608, - "learning_rate": 9.745728643216081e-05, - "loss": 5.9933, - "step": 3031 - }, - { - "epoch": 1.58122555410691, - "grad_norm": 1.4295916557312012, - "learning_rate": 9.745628140703518e-05, - "loss": 6.1653, - "step": 3032 - }, - { - "epoch": 1.5817470664928293, - "grad_norm": 2.4275026321411133, - "learning_rate": 9.745527638190955e-05, - "loss": 6.0781, - "step": 3033 - }, - { - "epoch": 1.5822685788787485, - "grad_norm": 1.754798173904419, - "learning_rate": 9.745427135678392e-05, - "loss": 5.9422, - "step": 3034 - }, - { - "epoch": 1.5827900912646675, - "grad_norm": 1.4458229541778564, - "learning_rate": 9.74532663316583e-05, - "loss": 6.2068, - "step": 3035 - }, - { - "epoch": 1.5833116036505865, - "grad_norm": 1.5155539512634277, - "learning_rate": 9.745226130653266e-05, - "loss": 6.0428, - "step": 3036 - }, - { - "epoch": 1.583833116036506, - "grad_norm": 1.578027606010437, - "learning_rate": 9.745125628140704e-05, - "loss": 6.3008, - "step": 3037 - }, - { - "epoch": 1.584354628422425, - "grad_norm": 1.6520020961761475, - "learning_rate": 9.745025125628142e-05, - "loss": 5.924, - "step": 3038 - }, - { - "epoch": 1.584876140808344, - "grad_norm": 1.5327036380767822, - "learning_rate": 9.74492462311558e-05, - "loss": 6.187, - "step": 3039 - }, - { - "epoch": 1.5853976531942635, - "grad_norm": 1.4840681552886963, - "learning_rate": 9.744824120603016e-05, - "loss": 6.0787, - "step": 3040 - }, - { - "epoch": 1.5859191655801825, - "grad_norm": 1.6682302951812744, - "learning_rate": 9.744723618090452e-05, - "loss": 5.9859, - "step": 3041 - }, - { - "epoch": 1.5864406779661016, - "grad_norm": 1.7628681659698486, - "learning_rate": 9.74462311557789e-05, - "loss": 5.0342, - "step": 3042 - }, - { - "epoch": 1.586962190352021, - "grad_norm": 1.3723901510238647, - "learning_rate": 9.744522613065326e-05, - "loss": 6.2808, - "step": 3043 - }, - { - "epoch": 1.58748370273794, - "grad_norm": 1.7502182722091675, - "learning_rate": 9.744422110552764e-05, - "loss": 5.5828, - "step": 3044 - }, - { - "epoch": 1.588005215123859, - "grad_norm": 1.4252878427505493, - "learning_rate": 9.7443216080402e-05, - "loss": 5.8077, - "step": 3045 - }, - { - "epoch": 1.5885267275097783, - "grad_norm": 1.544830083847046, - "learning_rate": 9.744221105527638e-05, - "loss": 5.8496, - "step": 3046 - }, - { - "epoch": 1.5890482398956975, - "grad_norm": 1.733964443206787, - "learning_rate": 9.744120603015076e-05, - "loss": 5.1959, - "step": 3047 - }, - { - "epoch": 1.5895697522816166, - "grad_norm": 2.145663022994995, - "learning_rate": 9.744020100502514e-05, - "loss": 5.6511, - "step": 3048 - }, - { - "epoch": 1.5900912646675358, - "grad_norm": 1.7752289772033691, - "learning_rate": 9.74391959798995e-05, - "loss": 5.4873, - "step": 3049 - }, - { - "epoch": 1.590612777053455, - "grad_norm": 1.7373502254486084, - "learning_rate": 9.743819095477388e-05, - "loss": 5.6868, - "step": 3050 - }, - { - "epoch": 1.591134289439374, - "grad_norm": 1.5320210456848145, - "learning_rate": 9.743718592964825e-05, - "loss": 6.3112, - "step": 3051 - }, - { - "epoch": 1.5916558018252933, - "grad_norm": 1.4615966081619263, - "learning_rate": 9.743618090452262e-05, - "loss": 6.4131, - "step": 3052 - }, - { - "epoch": 1.5921773142112126, - "grad_norm": 1.70128333568573, - "learning_rate": 9.743517587939699e-05, - "loss": 5.7769, - "step": 3053 - }, - { - "epoch": 1.5926988265971316, - "grad_norm": 1.4361833333969116, - "learning_rate": 9.743417085427135e-05, - "loss": 6.1996, - "step": 3054 - }, - { - "epoch": 1.5932203389830508, - "grad_norm": 1.9747556447982788, - "learning_rate": 9.743316582914573e-05, - "loss": 5.4913, - "step": 3055 - }, - { - "epoch": 1.59374185136897, - "grad_norm": 1.5286611318588257, - "learning_rate": 9.74321608040201e-05, - "loss": 5.9351, - "step": 3056 - }, - { - "epoch": 1.594263363754889, - "grad_norm": 1.548088550567627, - "learning_rate": 9.743115577889447e-05, - "loss": 5.9806, - "step": 3057 - }, - { - "epoch": 1.5947848761408083, - "grad_norm": 1.812968134880066, - "learning_rate": 9.743015075376885e-05, - "loss": 6.0099, - "step": 3058 - }, - { - "epoch": 1.5953063885267276, - "grad_norm": 1.438568353652954, - "learning_rate": 9.742914572864323e-05, - "loss": 6.2415, - "step": 3059 - }, - { - "epoch": 1.5958279009126466, - "grad_norm": 1.4927949905395508, - "learning_rate": 9.742814070351759e-05, - "loss": 5.6877, - "step": 3060 - }, - { - "epoch": 1.5963494132985658, - "grad_norm": 1.5806691646575928, - "learning_rate": 9.742713567839197e-05, - "loss": 6.2952, - "step": 3061 - }, - { - "epoch": 1.596870925684485, - "grad_norm": 1.5213185548782349, - "learning_rate": 9.742613065326633e-05, - "loss": 5.9267, - "step": 3062 - }, - { - "epoch": 1.597392438070404, - "grad_norm": 1.5299967527389526, - "learning_rate": 9.742512562814071e-05, - "loss": 5.8624, - "step": 3063 - }, - { - "epoch": 1.5979139504563233, - "grad_norm": 2.186304807662964, - "learning_rate": 9.742412060301508e-05, - "loss": 5.6364, - "step": 3064 - }, - { - "epoch": 1.5984354628422426, - "grad_norm": 1.8593779802322388, - "learning_rate": 9.742311557788945e-05, - "loss": 5.7762, - "step": 3065 - }, - { - "epoch": 1.5989569752281616, - "grad_norm": 1.5214787721633911, - "learning_rate": 9.742211055276382e-05, - "loss": 5.6702, - "step": 3066 - }, - { - "epoch": 1.5994784876140808, - "grad_norm": 1.5965590476989746, - "learning_rate": 9.742110552763818e-05, - "loss": 6.1784, - "step": 3067 - }, - { - "epoch": 1.6, - "grad_norm": 2.020270586013794, - "learning_rate": 9.742010050251256e-05, - "loss": 6.126, - "step": 3068 - }, - { - "epoch": 1.600521512385919, - "grad_norm": 1.525932788848877, - "learning_rate": 9.741909547738694e-05, - "loss": 6.2303, - "step": 3069 - }, - { - "epoch": 1.6010430247718384, - "grad_norm": 1.7978794574737549, - "learning_rate": 9.741809045226132e-05, - "loss": 5.7706, - "step": 3070 - }, - { - "epoch": 1.6015645371577576, - "grad_norm": 1.65778648853302, - "learning_rate": 9.741708542713568e-05, - "loss": 5.6908, - "step": 3071 - }, - { - "epoch": 1.6020860495436766, - "grad_norm": 1.375840663909912, - "learning_rate": 9.741608040201006e-05, - "loss": 6.1165, - "step": 3072 - }, - { - "epoch": 1.6026075619295959, - "grad_norm": 1.5219671726226807, - "learning_rate": 9.741507537688442e-05, - "loss": 6.0988, - "step": 3073 - }, - { - "epoch": 1.603129074315515, - "grad_norm": 1.7993241548538208, - "learning_rate": 9.74140703517588e-05, - "loss": 5.8465, - "step": 3074 - }, - { - "epoch": 1.6036505867014341, - "grad_norm": 1.4790480136871338, - "learning_rate": 9.741306532663317e-05, - "loss": 6.1904, - "step": 3075 - }, - { - "epoch": 1.6041720990873534, - "grad_norm": 1.670688509941101, - "learning_rate": 9.741206030150754e-05, - "loss": 5.8278, - "step": 3076 - }, - { - "epoch": 1.6046936114732726, - "grad_norm": 1.8832173347473145, - "learning_rate": 9.741105527638191e-05, - "loss": 5.718, - "step": 3077 - }, - { - "epoch": 1.6052151238591916, - "grad_norm": 1.6235421895980835, - "learning_rate": 9.741005025125629e-05, - "loss": 6.0965, - "step": 3078 - }, - { - "epoch": 1.6057366362451109, - "grad_norm": 1.6298588514328003, - "learning_rate": 9.740904522613066e-05, - "loss": 6.049, - "step": 3079 - }, - { - "epoch": 1.6062581486310301, - "grad_norm": 1.5675996541976929, - "learning_rate": 9.740804020100504e-05, - "loss": 5.7409, - "step": 3080 - }, - { - "epoch": 1.6067796610169491, - "grad_norm": 1.7733455896377563, - "learning_rate": 9.74070351758794e-05, - "loss": 5.7855, - "step": 3081 - }, - { - "epoch": 1.6073011734028682, - "grad_norm": 1.679229974746704, - "learning_rate": 9.740603015075377e-05, - "loss": 6.0233, - "step": 3082 - }, - { - "epoch": 1.6078226857887876, - "grad_norm": 1.6783993244171143, - "learning_rate": 9.740502512562815e-05, - "loss": 5.5203, - "step": 3083 - }, - { - "epoch": 1.6083441981747066, - "grad_norm": 1.510166049003601, - "learning_rate": 9.740402010050251e-05, - "loss": 6.3932, - "step": 3084 - }, - { - "epoch": 1.6088657105606257, - "grad_norm": 1.5554659366607666, - "learning_rate": 9.740301507537689e-05, - "loss": 6.0891, - "step": 3085 - }, - { - "epoch": 1.6093872229465451, - "grad_norm": 1.4308596849441528, - "learning_rate": 9.740201005025125e-05, - "loss": 6.2292, - "step": 3086 - }, - { - "epoch": 1.6099087353324641, - "grad_norm": 1.6161266565322876, - "learning_rate": 9.740100502512563e-05, - "loss": 5.7356, - "step": 3087 - }, - { - "epoch": 1.6104302477183832, - "grad_norm": 1.5871121883392334, - "learning_rate": 9.74e-05, - "loss": 5.7743, - "step": 3088 - }, - { - "epoch": 1.6109517601043026, - "grad_norm": 1.6464309692382812, - "learning_rate": 9.739899497487437e-05, - "loss": 6.06, - "step": 3089 - }, - { - "epoch": 1.6114732724902217, - "grad_norm": 1.6074179410934448, - "learning_rate": 9.739798994974875e-05, - "loss": 6.275, - "step": 3090 - }, - { - "epoch": 1.6119947848761407, - "grad_norm": 1.4984294176101685, - "learning_rate": 9.739698492462313e-05, - "loss": 6.1576, - "step": 3091 - }, - { - "epoch": 1.6125162972620601, - "grad_norm": 1.8707150220870972, - "learning_rate": 9.73959798994975e-05, - "loss": 5.5464, - "step": 3092 - }, - { - "epoch": 1.6130378096479792, - "grad_norm": 1.5556368827819824, - "learning_rate": 9.739497487437187e-05, - "loss": 5.8181, - "step": 3093 - }, - { - "epoch": 1.6135593220338982, - "grad_norm": 1.747147560119629, - "learning_rate": 9.739396984924624e-05, - "loss": 5.8047, - "step": 3094 - }, - { - "epoch": 1.6140808344198174, - "grad_norm": 1.6061432361602783, - "learning_rate": 9.73929648241206e-05, - "loss": 6.1276, - "step": 3095 - }, - { - "epoch": 1.6146023468057367, - "grad_norm": 1.7293413877487183, - "learning_rate": 9.739195979899498e-05, - "loss": 5.3286, - "step": 3096 - }, - { - "epoch": 1.6151238591916557, - "grad_norm": 1.447916030883789, - "learning_rate": 9.739095477386934e-05, - "loss": 5.6049, - "step": 3097 - }, - { - "epoch": 1.615645371577575, - "grad_norm": 1.6178771257400513, - "learning_rate": 9.738994974874372e-05, - "loss": 5.8783, - "step": 3098 - }, - { - "epoch": 1.6161668839634942, - "grad_norm": 1.8009456396102905, - "learning_rate": 9.73889447236181e-05, - "loss": 5.924, - "step": 3099 - }, - { - "epoch": 1.6166883963494132, - "grad_norm": 1.552306056022644, - "learning_rate": 9.738793969849248e-05, - "loss": 6.1086, - "step": 3100 - }, - { - "epoch": 1.6172099087353324, - "grad_norm": 1.544073224067688, - "learning_rate": 9.738693467336684e-05, - "loss": 5.2937, - "step": 3101 - }, - { - "epoch": 1.6177314211212517, - "grad_norm": 1.6039420366287231, - "learning_rate": 9.738592964824122e-05, - "loss": 6.1915, - "step": 3102 - }, - { - "epoch": 1.6182529335071707, - "grad_norm": 1.5217640399932861, - "learning_rate": 9.738492462311558e-05, - "loss": 6.0091, - "step": 3103 - }, - { - "epoch": 1.61877444589309, - "grad_norm": 1.600091814994812, - "learning_rate": 9.738391959798996e-05, - "loss": 6.2108, - "step": 3104 - }, - { - "epoch": 1.6192959582790092, - "grad_norm": 1.5660648345947266, - "learning_rate": 9.738291457286432e-05, - "loss": 5.9596, - "step": 3105 - }, - { - "epoch": 1.6198174706649282, - "grad_norm": 1.6846814155578613, - "learning_rate": 9.73819095477387e-05, - "loss": 5.7896, - "step": 3106 - }, - { - "epoch": 1.6203389830508474, - "grad_norm": 1.575038194656372, - "learning_rate": 9.738090452261307e-05, - "loss": 6.0597, - "step": 3107 - }, - { - "epoch": 1.6208604954367667, - "grad_norm": 1.5629881620407104, - "learning_rate": 9.737989949748743e-05, - "loss": 6.0841, - "step": 3108 - }, - { - "epoch": 1.6213820078226857, - "grad_norm": 1.6237661838531494, - "learning_rate": 9.737889447236181e-05, - "loss": 5.8951, - "step": 3109 - }, - { - "epoch": 1.621903520208605, - "grad_norm": 1.5605125427246094, - "learning_rate": 9.737788944723619e-05, - "loss": 6.056, - "step": 3110 - }, - { - "epoch": 1.6224250325945242, - "grad_norm": 1.6115822792053223, - "learning_rate": 9.737688442211056e-05, - "loss": 5.8353, - "step": 3111 - }, - { - "epoch": 1.6229465449804432, - "grad_norm": 1.6092780828475952, - "learning_rate": 9.737587939698493e-05, - "loss": 6.2188, - "step": 3112 - }, - { - "epoch": 1.6234680573663625, - "grad_norm": 1.494054913520813, - "learning_rate": 9.73748743718593e-05, - "loss": 6.2143, - "step": 3113 - }, - { - "epoch": 1.6239895697522817, - "grad_norm": 1.4322123527526855, - "learning_rate": 9.737386934673367e-05, - "loss": 6.3, - "step": 3114 - }, - { - "epoch": 1.6245110821382007, - "grad_norm": 1.6598525047302246, - "learning_rate": 9.737286432160805e-05, - "loss": 5.5408, - "step": 3115 - }, - { - "epoch": 1.62503259452412, - "grad_norm": 1.647043228149414, - "learning_rate": 9.737185929648241e-05, - "loss": 6.0035, - "step": 3116 - }, - { - "epoch": 1.6255541069100392, - "grad_norm": 1.543968677520752, - "learning_rate": 9.737085427135679e-05, - "loss": 6.0252, - "step": 3117 - }, - { - "epoch": 1.6260756192959582, - "grad_norm": 1.6839088201522827, - "learning_rate": 9.736984924623115e-05, - "loss": 6.2788, - "step": 3118 - }, - { - "epoch": 1.6265971316818775, - "grad_norm": 1.6307122707366943, - "learning_rate": 9.736884422110553e-05, - "loss": 6.0629, - "step": 3119 - }, - { - "epoch": 1.6271186440677967, - "grad_norm": 1.541347861289978, - "learning_rate": 9.736783919597991e-05, - "loss": 6.0879, - "step": 3120 - }, - { - "epoch": 1.6276401564537157, - "grad_norm": 1.6512030363082886, - "learning_rate": 9.736683417085427e-05, - "loss": 5.8473, - "step": 3121 - }, - { - "epoch": 1.628161668839635, - "grad_norm": 1.576765775680542, - "learning_rate": 9.736582914572865e-05, - "loss": 6.1983, - "step": 3122 - }, - { - "epoch": 1.6286831812255542, - "grad_norm": 1.4205610752105713, - "learning_rate": 9.736482412060302e-05, - "loss": 6.1726, - "step": 3123 - }, - { - "epoch": 1.6292046936114732, - "grad_norm": 1.5198034048080444, - "learning_rate": 9.73638190954774e-05, - "loss": 5.8932, - "step": 3124 - }, - { - "epoch": 1.6297262059973925, - "grad_norm": 1.707255244255066, - "learning_rate": 9.736281407035176e-05, - "loss": 5.5705, - "step": 3125 - }, - { - "epoch": 1.6302477183833117, - "grad_norm": 1.4681293964385986, - "learning_rate": 9.736180904522614e-05, - "loss": 6.2079, - "step": 3126 - }, - { - "epoch": 1.6307692307692307, - "grad_norm": 1.739350438117981, - "learning_rate": 9.73608040201005e-05, - "loss": 5.7466, - "step": 3127 - }, - { - "epoch": 1.6312907431551498, - "grad_norm": 1.8174374103546143, - "learning_rate": 9.735979899497488e-05, - "loss": 6.0534, - "step": 3128 - }, - { - "epoch": 1.6318122555410692, - "grad_norm": 1.691854476928711, - "learning_rate": 9.735879396984924e-05, - "loss": 5.692, - "step": 3129 - }, - { - "epoch": 1.6323337679269883, - "grad_norm": 1.603854775428772, - "learning_rate": 9.735778894472362e-05, - "loss": 6.09, - "step": 3130 - }, - { - "epoch": 1.6328552803129073, - "grad_norm": 1.5197350978851318, - "learning_rate": 9.7356783919598e-05, - "loss": 5.7362, - "step": 3131 - }, - { - "epoch": 1.6333767926988267, - "grad_norm": 1.5920674800872803, - "learning_rate": 9.735577889447238e-05, - "loss": 5.8172, - "step": 3132 - }, - { - "epoch": 1.6338983050847458, - "grad_norm": 1.6657129526138306, - "learning_rate": 9.735477386934674e-05, - "loss": 5.9969, - "step": 3133 - }, - { - "epoch": 1.6344198174706648, - "grad_norm": 1.5910208225250244, - "learning_rate": 9.73537688442211e-05, - "loss": 6.0633, - "step": 3134 - }, - { - "epoch": 1.6349413298565842, - "grad_norm": 1.6786820888519287, - "learning_rate": 9.735276381909548e-05, - "loss": 5.8129, - "step": 3135 - }, - { - "epoch": 1.6354628422425033, - "grad_norm": 1.702570915222168, - "learning_rate": 9.735175879396985e-05, - "loss": 6.4482, - "step": 3136 - }, - { - "epoch": 1.6359843546284223, - "grad_norm": 1.7444863319396973, - "learning_rate": 9.735075376884422e-05, - "loss": 5.9886, - "step": 3137 - }, - { - "epoch": 1.6365058670143418, - "grad_norm": 1.555273175239563, - "learning_rate": 9.734974874371859e-05, - "loss": 6.1227, - "step": 3138 - }, - { - "epoch": 1.6370273794002608, - "grad_norm": 1.495219111442566, - "learning_rate": 9.734874371859297e-05, - "loss": 6.3125, - "step": 3139 - }, - { - "epoch": 1.6375488917861798, - "grad_norm": 1.6466037034988403, - "learning_rate": 9.734773869346734e-05, - "loss": 6.2784, - "step": 3140 - }, - { - "epoch": 1.638070404172099, - "grad_norm": 1.6692683696746826, - "learning_rate": 9.734673366834172e-05, - "loss": 6.3513, - "step": 3141 - }, - { - "epoch": 1.6385919165580183, - "grad_norm": 1.5703704357147217, - "learning_rate": 9.734572864321609e-05, - "loss": 6.1411, - "step": 3142 - }, - { - "epoch": 1.6391134289439373, - "grad_norm": 1.8088418245315552, - "learning_rate": 9.734472361809046e-05, - "loss": 5.4836, - "step": 3143 - }, - { - "epoch": 1.6396349413298565, - "grad_norm": 1.7659289836883545, - "learning_rate": 9.734371859296483e-05, - "loss": 5.6414, - "step": 3144 - }, - { - "epoch": 1.6401564537157758, - "grad_norm": 1.4511542320251465, - "learning_rate": 9.73427135678392e-05, - "loss": 5.3162, - "step": 3145 - }, - { - "epoch": 1.6406779661016948, - "grad_norm": 1.4907194375991821, - "learning_rate": 9.734170854271357e-05, - "loss": 5.9429, - "step": 3146 - }, - { - "epoch": 1.641199478487614, - "grad_norm": 1.6705886125564575, - "learning_rate": 9.734070351758794e-05, - "loss": 5.7635, - "step": 3147 - }, - { - "epoch": 1.6417209908735333, - "grad_norm": 1.6852425336837769, - "learning_rate": 9.733969849246231e-05, - "loss": 6.226, - "step": 3148 - }, - { - "epoch": 1.6422425032594523, - "grad_norm": 1.6095467805862427, - "learning_rate": 9.733869346733668e-05, - "loss": 6.2492, - "step": 3149 - }, - { - "epoch": 1.6427640156453716, - "grad_norm": 1.592104434967041, - "learning_rate": 9.733768844221106e-05, - "loss": 5.9898, - "step": 3150 - }, - { - "epoch": 1.6432855280312908, - "grad_norm": 1.6531606912612915, - "learning_rate": 9.733668341708543e-05, - "loss": 6.0379, - "step": 3151 - }, - { - "epoch": 1.6438070404172098, - "grad_norm": 2.132061004638672, - "learning_rate": 9.733567839195981e-05, - "loss": 6.0662, - "step": 3152 - }, - { - "epoch": 1.644328552803129, - "grad_norm": 1.8207018375396729, - "learning_rate": 9.733467336683418e-05, - "loss": 5.7647, - "step": 3153 - }, - { - "epoch": 1.6448500651890483, - "grad_norm": 1.7841594219207764, - "learning_rate": 9.733366834170855e-05, - "loss": 6.0311, - "step": 3154 - }, - { - "epoch": 1.6453715775749673, - "grad_norm": 2.0698485374450684, - "learning_rate": 9.733266331658292e-05, - "loss": 6.168, - "step": 3155 - }, - { - "epoch": 1.6458930899608866, - "grad_norm": 1.9563928842544556, - "learning_rate": 9.73316582914573e-05, - "loss": 5.2311, - "step": 3156 - }, - { - "epoch": 1.6464146023468058, - "grad_norm": 1.6322273015975952, - "learning_rate": 9.733065326633166e-05, - "loss": 6.0019, - "step": 3157 - }, - { - "epoch": 1.6469361147327248, - "grad_norm": 1.7840666770935059, - "learning_rate": 9.732964824120604e-05, - "loss": 5.8147, - "step": 3158 - }, - { - "epoch": 1.647457627118644, - "grad_norm": 1.6237585544586182, - "learning_rate": 9.73286432160804e-05, - "loss": 6.1855, - "step": 3159 - }, - { - "epoch": 1.6479791395045633, - "grad_norm": 1.6893723011016846, - "learning_rate": 9.732763819095478e-05, - "loss": 5.4287, - "step": 3160 - }, - { - "epoch": 1.6485006518904823, - "grad_norm": 1.9960300922393799, - "learning_rate": 9.732663316582916e-05, - "loss": 6.0214, - "step": 3161 - }, - { - "epoch": 1.6490221642764016, - "grad_norm": 1.9659291505813599, - "learning_rate": 9.732562814070352e-05, - "loss": 6.0264, - "step": 3162 - }, - { - "epoch": 1.6495436766623208, - "grad_norm": 1.6435083150863647, - "learning_rate": 9.73246231155779e-05, - "loss": 6.1167, - "step": 3163 - }, - { - "epoch": 1.6500651890482398, - "grad_norm": 1.759752869606018, - "learning_rate": 9.732361809045226e-05, - "loss": 5.6, - "step": 3164 - }, - { - "epoch": 1.650586701434159, - "grad_norm": 1.8139715194702148, - "learning_rate": 9.732261306532664e-05, - "loss": 6.154, - "step": 3165 - }, - { - "epoch": 1.6511082138200783, - "grad_norm": 1.6587743759155273, - "learning_rate": 9.7321608040201e-05, - "loss": 6.0956, - "step": 3166 - }, - { - "epoch": 1.6516297262059974, - "grad_norm": 1.5413472652435303, - "learning_rate": 9.732060301507538e-05, - "loss": 6.356, - "step": 3167 - }, - { - "epoch": 1.6521512385919166, - "grad_norm": 2.0291459560394287, - "learning_rate": 9.731959798994975e-05, - "loss": 5.2967, - "step": 3168 - }, - { - "epoch": 1.6526727509778358, - "grad_norm": 1.5828510522842407, - "learning_rate": 9.731859296482413e-05, - "loss": 5.9587, - "step": 3169 - }, - { - "epoch": 1.6531942633637549, - "grad_norm": 1.633156180381775, - "learning_rate": 9.731758793969849e-05, - "loss": 5.6907, - "step": 3170 - }, - { - "epoch": 1.653715775749674, - "grad_norm": 1.5092873573303223, - "learning_rate": 9.731658291457287e-05, - "loss": 5.9621, - "step": 3171 - }, - { - "epoch": 1.6542372881355933, - "grad_norm": 1.314452052116394, - "learning_rate": 9.731557788944725e-05, - "loss": 6.321, - "step": 3172 - }, - { - "epoch": 1.6547588005215124, - "grad_norm": 1.517844319343567, - "learning_rate": 9.731457286432162e-05, - "loss": 5.93, - "step": 3173 - }, - { - "epoch": 1.6552803129074316, - "grad_norm": 1.4646230936050415, - "learning_rate": 9.731356783919599e-05, - "loss": 6.2153, - "step": 3174 - }, - { - "epoch": 1.6558018252933508, - "grad_norm": 1.441266417503357, - "learning_rate": 9.731256281407035e-05, - "loss": 6.0711, - "step": 3175 - }, - { - "epoch": 1.6563233376792699, - "grad_norm": 1.4708000421524048, - "learning_rate": 9.731155778894473e-05, - "loss": 5.9965, - "step": 3176 - }, - { - "epoch": 1.656844850065189, - "grad_norm": 1.5106927156448364, - "learning_rate": 9.73105527638191e-05, - "loss": 6.0369, - "step": 3177 - }, - { - "epoch": 1.6573663624511084, - "grad_norm": 1.526909589767456, - "learning_rate": 9.730954773869347e-05, - "loss": 5.6303, - "step": 3178 - }, - { - "epoch": 1.6578878748370274, - "grad_norm": 1.5698353052139282, - "learning_rate": 9.730854271356784e-05, - "loss": 5.858, - "step": 3179 - }, - { - "epoch": 1.6584093872229464, - "grad_norm": 1.653381586074829, - "learning_rate": 9.730753768844221e-05, - "loss": 5.6504, - "step": 3180 - }, - { - "epoch": 1.6589308996088659, - "grad_norm": 1.536844253540039, - "learning_rate": 9.730653266331659e-05, - "loss": 5.8652, - "step": 3181 - }, - { - "epoch": 1.6594524119947849, - "grad_norm": 1.779517650604248, - "learning_rate": 9.730552763819097e-05, - "loss": 5.9035, - "step": 3182 - }, - { - "epoch": 1.659973924380704, - "grad_norm": 1.669467806816101, - "learning_rate": 9.730452261306533e-05, - "loss": 5.2017, - "step": 3183 - }, - { - "epoch": 1.6604954367666234, - "grad_norm": 1.5142687559127808, - "learning_rate": 9.730351758793971e-05, - "loss": 6.1466, - "step": 3184 - }, - { - "epoch": 1.6610169491525424, - "grad_norm": 1.5393387079238892, - "learning_rate": 9.730251256281408e-05, - "loss": 6.1589, - "step": 3185 - }, - { - "epoch": 1.6615384615384614, - "grad_norm": 1.948114037513733, - "learning_rate": 9.730150753768845e-05, - "loss": 5.5209, - "step": 3186 - }, - { - "epoch": 1.6620599739243807, - "grad_norm": 1.6124712228775024, - "learning_rate": 9.730050251256282e-05, - "loss": 5.79, - "step": 3187 - }, - { - "epoch": 1.6625814863103, - "grad_norm": 1.585602879524231, - "learning_rate": 9.729949748743718e-05, - "loss": 6.0741, - "step": 3188 - }, - { - "epoch": 1.663102998696219, - "grad_norm": 1.609656572341919, - "learning_rate": 9.729849246231156e-05, - "loss": 5.7526, - "step": 3189 - }, - { - "epoch": 1.6636245110821382, - "grad_norm": 1.617051362991333, - "learning_rate": 9.729748743718592e-05, - "loss": 6.0574, - "step": 3190 - }, - { - "epoch": 1.6641460234680574, - "grad_norm": 1.704528570175171, - "learning_rate": 9.72964824120603e-05, - "loss": 5.5729, - "step": 3191 - }, - { - "epoch": 1.6646675358539764, - "grad_norm": 1.463706374168396, - "learning_rate": 9.729547738693468e-05, - "loss": 5.8071, - "step": 3192 - }, - { - "epoch": 1.6651890482398957, - "grad_norm": 1.5594342947006226, - "learning_rate": 9.729447236180906e-05, - "loss": 6.0381, - "step": 3193 - }, - { - "epoch": 1.665710560625815, - "grad_norm": 1.6204968690872192, - "learning_rate": 9.729346733668342e-05, - "loss": 5.994, - "step": 3194 - }, - { - "epoch": 1.666232073011734, - "grad_norm": 1.850070834159851, - "learning_rate": 9.72924623115578e-05, - "loss": 6.234, - "step": 3195 - }, - { - "epoch": 1.6667535853976532, - "grad_norm": 1.6641219854354858, - "learning_rate": 9.729145728643216e-05, - "loss": 5.5622, - "step": 3196 - }, - { - "epoch": 1.6672750977835724, - "grad_norm": 1.4465763568878174, - "learning_rate": 9.729045226130654e-05, - "loss": 5.7803, - "step": 3197 - }, - { - "epoch": 1.6677966101694914, - "grad_norm": 1.6652206182479858, - "learning_rate": 9.72894472361809e-05, - "loss": 5.7948, - "step": 3198 - }, - { - "epoch": 1.6683181225554107, - "grad_norm": 1.6243849992752075, - "learning_rate": 9.728844221105528e-05, - "loss": 6.0146, - "step": 3199 - }, - { - "epoch": 1.66883963494133, - "grad_norm": 1.4728500843048096, - "learning_rate": 9.728743718592965e-05, - "loss": 5.8717, - "step": 3200 - }, - { - "epoch": 1.669361147327249, - "grad_norm": 1.6355260610580444, - "learning_rate": 9.728643216080403e-05, - "loss": 6.2207, - "step": 3201 - }, - { - "epoch": 1.6698826597131682, - "grad_norm": 1.5135046243667603, - "learning_rate": 9.72854271356784e-05, - "loss": 6.2645, - "step": 3202 - }, - { - "epoch": 1.6704041720990874, - "grad_norm": 1.6245781183242798, - "learning_rate": 9.728442211055277e-05, - "loss": 5.8798, - "step": 3203 - }, - { - "epoch": 1.6709256844850064, - "grad_norm": 1.7407125234603882, - "learning_rate": 9.728341708542715e-05, - "loss": 5.881, - "step": 3204 - }, - { - "epoch": 1.6714471968709257, - "grad_norm": 2.1430344581604004, - "learning_rate": 9.728241206030151e-05, - "loss": 5.3271, - "step": 3205 - }, - { - "epoch": 1.671968709256845, - "grad_norm": 1.6417948007583618, - "learning_rate": 9.728140703517589e-05, - "loss": 5.8358, - "step": 3206 - }, - { - "epoch": 1.672490221642764, - "grad_norm": 1.7306737899780273, - "learning_rate": 9.728040201005025e-05, - "loss": 6.0933, - "step": 3207 - }, - { - "epoch": 1.6730117340286832, - "grad_norm": 1.6175457239151, - "learning_rate": 9.727939698492463e-05, - "loss": 6.1129, - "step": 3208 - }, - { - "epoch": 1.6735332464146024, - "grad_norm": 1.6006898880004883, - "learning_rate": 9.7278391959799e-05, - "loss": 5.7058, - "step": 3209 - }, - { - "epoch": 1.6740547588005215, - "grad_norm": 1.5916205644607544, - "learning_rate": 9.727738693467337e-05, - "loss": 5.6739, - "step": 3210 - }, - { - "epoch": 1.6745762711864407, - "grad_norm": 1.5880261659622192, - "learning_rate": 9.727638190954774e-05, - "loss": 6.168, - "step": 3211 - }, - { - "epoch": 1.67509778357236, - "grad_norm": 1.5823211669921875, - "learning_rate": 9.727537688442211e-05, - "loss": 5.5296, - "step": 3212 - }, - { - "epoch": 1.675619295958279, - "grad_norm": 1.749687671661377, - "learning_rate": 9.727437185929649e-05, - "loss": 6.1563, - "step": 3213 - }, - { - "epoch": 1.6761408083441982, - "grad_norm": 1.472648024559021, - "learning_rate": 9.727336683417086e-05, - "loss": 6.03, - "step": 3214 - }, - { - "epoch": 1.6766623207301175, - "grad_norm": 1.5022220611572266, - "learning_rate": 9.727236180904523e-05, - "loss": 6.3691, - "step": 3215 - }, - { - "epoch": 1.6771838331160365, - "grad_norm": 1.4343161582946777, - "learning_rate": 9.72713567839196e-05, - "loss": 6.2584, - "step": 3216 - }, - { - "epoch": 1.6777053455019557, - "grad_norm": 1.6226125955581665, - "learning_rate": 9.727035175879398e-05, - "loss": 5.626, - "step": 3217 - }, - { - "epoch": 1.678226857887875, - "grad_norm": 1.6637554168701172, - "learning_rate": 9.726934673366834e-05, - "loss": 6.1724, - "step": 3218 - }, - { - "epoch": 1.678748370273794, - "grad_norm": 1.526485800743103, - "learning_rate": 9.726834170854272e-05, - "loss": 5.9241, - "step": 3219 - }, - { - "epoch": 1.6792698826597132, - "grad_norm": 1.4458792209625244, - "learning_rate": 9.726733668341708e-05, - "loss": 6.0839, - "step": 3220 - }, - { - "epoch": 1.6797913950456325, - "grad_norm": 1.759611964225769, - "learning_rate": 9.726633165829146e-05, - "loss": 6.1272, - "step": 3221 - }, - { - "epoch": 1.6803129074315515, - "grad_norm": 1.697718620300293, - "learning_rate": 9.726532663316584e-05, - "loss": 5.2536, - "step": 3222 - }, - { - "epoch": 1.6808344198174705, - "grad_norm": 1.6743053197860718, - "learning_rate": 9.726432160804022e-05, - "loss": 5.4143, - "step": 3223 - }, - { - "epoch": 1.68135593220339, - "grad_norm": 1.784546971321106, - "learning_rate": 9.726331658291458e-05, - "loss": 6.0877, - "step": 3224 - }, - { - "epoch": 1.681877444589309, - "grad_norm": 1.6950150728225708, - "learning_rate": 9.726231155778896e-05, - "loss": 5.778, - "step": 3225 - }, - { - "epoch": 1.682398956975228, - "grad_norm": 1.5330315828323364, - "learning_rate": 9.726130653266332e-05, - "loss": 6.0452, - "step": 3226 - }, - { - "epoch": 1.6829204693611475, - "grad_norm": 1.6510361433029175, - "learning_rate": 9.726030150753769e-05, - "loss": 6.0363, - "step": 3227 - }, - { - "epoch": 1.6834419817470665, - "grad_norm": 1.49409818649292, - "learning_rate": 9.725929648241207e-05, - "loss": 5.5592, - "step": 3228 - }, - { - "epoch": 1.6839634941329855, - "grad_norm": 1.4729665517807007, - "learning_rate": 9.725829145728643e-05, - "loss": 6.1572, - "step": 3229 - }, - { - "epoch": 1.684485006518905, - "grad_norm": 1.8135627508163452, - "learning_rate": 9.725728643216081e-05, - "loss": 5.8689, - "step": 3230 - }, - { - "epoch": 1.685006518904824, - "grad_norm": 1.5554484128952026, - "learning_rate": 9.725628140703517e-05, - "loss": 6.1711, - "step": 3231 - }, - { - "epoch": 1.685528031290743, - "grad_norm": 1.8536498546600342, - "learning_rate": 9.725527638190955e-05, - "loss": 5.6095, - "step": 3232 - }, - { - "epoch": 1.6860495436766623, - "grad_norm": 1.6792150735855103, - "learning_rate": 9.725427135678393e-05, - "loss": 6.177, - "step": 3233 - }, - { - "epoch": 1.6865710560625815, - "grad_norm": 1.4636952877044678, - "learning_rate": 9.72532663316583e-05, - "loss": 5.9558, - "step": 3234 - }, - { - "epoch": 1.6870925684485005, - "grad_norm": 1.5282926559448242, - "learning_rate": 9.725226130653267e-05, - "loss": 6.1731, - "step": 3235 - }, - { - "epoch": 1.6876140808344198, - "grad_norm": 1.7123357057571411, - "learning_rate": 9.725125628140705e-05, - "loss": 5.9655, - "step": 3236 - }, - { - "epoch": 1.688135593220339, - "grad_norm": 1.8475371599197388, - "learning_rate": 9.725025125628141e-05, - "loss": 6.1073, - "step": 3237 - }, - { - "epoch": 1.688657105606258, - "grad_norm": 1.6527007818222046, - "learning_rate": 9.724924623115579e-05, - "loss": 5.6579, - "step": 3238 - }, - { - "epoch": 1.6891786179921773, - "grad_norm": 1.5052242279052734, - "learning_rate": 9.724824120603015e-05, - "loss": 6.1953, - "step": 3239 - }, - { - "epoch": 1.6897001303780965, - "grad_norm": 1.5291155576705933, - "learning_rate": 9.724723618090452e-05, - "loss": 6.3444, - "step": 3240 - }, - { - "epoch": 1.6902216427640155, - "grad_norm": 2.2314364910125732, - "learning_rate": 9.72462311557789e-05, - "loss": 4.9503, - "step": 3241 - }, - { - "epoch": 1.6907431551499348, - "grad_norm": 1.610428810119629, - "learning_rate": 9.724522613065326e-05, - "loss": 6.2835, - "step": 3242 - }, - { - "epoch": 1.691264667535854, - "grad_norm": 1.706899642944336, - "learning_rate": 9.724422110552764e-05, - "loss": 5.3784, - "step": 3243 - }, - { - "epoch": 1.691786179921773, - "grad_norm": 1.4577761888504028, - "learning_rate": 9.724321608040202e-05, - "loss": 5.8671, - "step": 3244 - }, - { - "epoch": 1.6923076923076923, - "grad_norm": 1.4243916273117065, - "learning_rate": 9.724221105527639e-05, - "loss": 6.0417, - "step": 3245 - }, - { - "epoch": 1.6928292046936115, - "grad_norm": 1.9628136157989502, - "learning_rate": 9.724120603015076e-05, - "loss": 5.5775, - "step": 3246 - }, - { - "epoch": 1.6933507170795306, - "grad_norm": 2.1769423484802246, - "learning_rate": 9.724020100502514e-05, - "loss": 5.0881, - "step": 3247 - }, - { - "epoch": 1.6938722294654498, - "grad_norm": 1.6167597770690918, - "learning_rate": 9.72391959798995e-05, - "loss": 6.1707, - "step": 3248 - }, - { - "epoch": 1.694393741851369, - "grad_norm": 1.4802266359329224, - "learning_rate": 9.723819095477388e-05, - "loss": 6.2948, - "step": 3249 - }, - { - "epoch": 1.694915254237288, - "grad_norm": 1.6667572259902954, - "learning_rate": 9.723718592964824e-05, - "loss": 5.7585, - "step": 3250 - }, - { - "epoch": 1.6954367666232073, - "grad_norm": 1.5481176376342773, - "learning_rate": 9.723618090452262e-05, - "loss": 5.9769, - "step": 3251 - }, - { - "epoch": 1.6959582790091265, - "grad_norm": 1.5769729614257812, - "learning_rate": 9.723517587939698e-05, - "loss": 6.1965, - "step": 3252 - }, - { - "epoch": 1.6964797913950456, - "grad_norm": 1.7354010343551636, - "learning_rate": 9.723417085427136e-05, - "loss": 5.8371, - "step": 3253 - }, - { - "epoch": 1.6970013037809648, - "grad_norm": 1.6017951965332031, - "learning_rate": 9.723316582914574e-05, - "loss": 5.5837, - "step": 3254 - }, - { - "epoch": 1.697522816166884, - "grad_norm": 1.2437846660614014, - "learning_rate": 9.72321608040201e-05, - "loss": 6.2058, - "step": 3255 - }, - { - "epoch": 1.698044328552803, - "grad_norm": 1.50468909740448, - "learning_rate": 9.723115577889448e-05, - "loss": 6.2282, - "step": 3256 - }, - { - "epoch": 1.6985658409387223, - "grad_norm": 1.3210327625274658, - "learning_rate": 9.723015075376885e-05, - "loss": 6.1736, - "step": 3257 - }, - { - "epoch": 1.6990873533246416, - "grad_norm": 1.5534708499908447, - "learning_rate": 9.722914572864322e-05, - "loss": 5.6591, - "step": 3258 - }, - { - "epoch": 1.6996088657105606, - "grad_norm": 1.5405446290969849, - "learning_rate": 9.722814070351759e-05, - "loss": 5.7173, - "step": 3259 - }, - { - "epoch": 1.7001303780964798, - "grad_norm": 1.3668922185897827, - "learning_rate": 9.722713567839197e-05, - "loss": 6.2671, - "step": 3260 - }, - { - "epoch": 1.700651890482399, - "grad_norm": 1.8384426832199097, - "learning_rate": 9.722613065326633e-05, - "loss": 6.1787, - "step": 3261 - }, - { - "epoch": 1.701173402868318, - "grad_norm": 1.519514799118042, - "learning_rate": 9.722512562814071e-05, - "loss": 6.1819, - "step": 3262 - }, - { - "epoch": 1.7016949152542373, - "grad_norm": 1.448196530342102, - "learning_rate": 9.722412060301507e-05, - "loss": 5.9922, - "step": 3263 - }, - { - "epoch": 1.7022164276401566, - "grad_norm": 1.866915225982666, - "learning_rate": 9.722311557788945e-05, - "loss": 5.6248, - "step": 3264 - }, - { - "epoch": 1.7027379400260756, - "grad_norm": 1.9794881343841553, - "learning_rate": 9.722211055276383e-05, - "loss": 6.1928, - "step": 3265 - }, - { - "epoch": 1.7032594524119948, - "grad_norm": 1.7719507217407227, - "learning_rate": 9.72211055276382e-05, - "loss": 5.8398, - "step": 3266 - }, - { - "epoch": 1.703780964797914, - "grad_norm": 1.4759310483932495, - "learning_rate": 9.722010050251257e-05, - "loss": 6.2204, - "step": 3267 - }, - { - "epoch": 1.704302477183833, - "grad_norm": 1.6018708944320679, - "learning_rate": 9.721909547738693e-05, - "loss": 5.3853, - "step": 3268 - }, - { - "epoch": 1.7048239895697521, - "grad_norm": 1.46084725856781, - "learning_rate": 9.721809045226131e-05, - "loss": 6.1681, - "step": 3269 - }, - { - "epoch": 1.7053455019556716, - "grad_norm": 1.3885011672973633, - "learning_rate": 9.721708542713568e-05, - "loss": 6.2089, - "step": 3270 - }, - { - "epoch": 1.7058670143415906, - "grad_norm": 1.4984090328216553, - "learning_rate": 9.721608040201005e-05, - "loss": 6.1931, - "step": 3271 - }, - { - "epoch": 1.7063885267275096, - "grad_norm": 1.3669795989990234, - "learning_rate": 9.721507537688442e-05, - "loss": 6.1169, - "step": 3272 - }, - { - "epoch": 1.706910039113429, - "grad_norm": 1.523040533065796, - "learning_rate": 9.72140703517588e-05, - "loss": 5.8951, - "step": 3273 - }, - { - "epoch": 1.707431551499348, - "grad_norm": 1.7961832284927368, - "learning_rate": 9.721306532663317e-05, - "loss": 5.721, - "step": 3274 - }, - { - "epoch": 1.7079530638852671, - "grad_norm": 1.6705384254455566, - "learning_rate": 9.721206030150755e-05, - "loss": 5.9702, - "step": 3275 - }, - { - "epoch": 1.7084745762711866, - "grad_norm": 1.9442307949066162, - "learning_rate": 9.721105527638192e-05, - "loss": 5.9098, - "step": 3276 - }, - { - "epoch": 1.7089960886571056, - "grad_norm": 1.4218543767929077, - "learning_rate": 9.72100502512563e-05, - "loss": 6.2064, - "step": 3277 - }, - { - "epoch": 1.7095176010430246, - "grad_norm": 1.6957820653915405, - "learning_rate": 9.720904522613066e-05, - "loss": 5.8875, - "step": 3278 - }, - { - "epoch": 1.710039113428944, - "grad_norm": 1.9108747243881226, - "learning_rate": 9.720804020100504e-05, - "loss": 5.4027, - "step": 3279 - }, - { - "epoch": 1.7105606258148631, - "grad_norm": 1.7203845977783203, - "learning_rate": 9.72070351758794e-05, - "loss": 5.8904, - "step": 3280 - }, - { - "epoch": 1.7110821382007821, - "grad_norm": 1.6585044860839844, - "learning_rate": 9.720603015075376e-05, - "loss": 6.0254, - "step": 3281 - }, - { - "epoch": 1.7116036505867014, - "grad_norm": 1.764907717704773, - "learning_rate": 9.720502512562814e-05, - "loss": 5.9133, - "step": 3282 - }, - { - "epoch": 1.7121251629726206, - "grad_norm": 1.5757007598876953, - "learning_rate": 9.72040201005025e-05, - "loss": 5.8941, - "step": 3283 - }, - { - "epoch": 1.7126466753585396, - "grad_norm": 1.5602794885635376, - "learning_rate": 9.720301507537688e-05, - "loss": 5.9031, - "step": 3284 - }, - { - "epoch": 1.713168187744459, - "grad_norm": 1.8277525901794434, - "learning_rate": 9.720201005025126e-05, - "loss": 5.226, - "step": 3285 - }, - { - "epoch": 1.7136897001303781, - "grad_norm": 1.4694033861160278, - "learning_rate": 9.720100502512564e-05, - "loss": 6.3207, - "step": 3286 - }, - { - "epoch": 1.7142112125162972, - "grad_norm": 1.516701579093933, - "learning_rate": 9.72e-05, - "loss": 6.1813, - "step": 3287 - }, - { - "epoch": 1.7147327249022164, - "grad_norm": 1.7726422548294067, - "learning_rate": 9.719899497487438e-05, - "loss": 6.0361, - "step": 3288 - }, - { - "epoch": 1.7152542372881356, - "grad_norm": 1.670492172241211, - "learning_rate": 9.719798994974875e-05, - "loss": 5.9087, - "step": 3289 - }, - { - "epoch": 1.7157757496740547, - "grad_norm": 1.8022445440292358, - "learning_rate": 9.719698492462312e-05, - "loss": 5.8934, - "step": 3290 - }, - { - "epoch": 1.716297262059974, - "grad_norm": 1.7031400203704834, - "learning_rate": 9.719597989949749e-05, - "loss": 6.0479, - "step": 3291 - }, - { - "epoch": 1.7168187744458931, - "grad_norm": 1.783332109451294, - "learning_rate": 9.719497487437187e-05, - "loss": 5.9079, - "step": 3292 - }, - { - "epoch": 1.7173402868318122, - "grad_norm": 1.495786190032959, - "learning_rate": 9.719396984924623e-05, - "loss": 6.118, - "step": 3293 - }, - { - "epoch": 1.7178617992177314, - "grad_norm": 1.5326268672943115, - "learning_rate": 9.719296482412061e-05, - "loss": 6.096, - "step": 3294 - }, - { - "epoch": 1.7183833116036507, - "grad_norm": 1.570668339729309, - "learning_rate": 9.719195979899499e-05, - "loss": 6.2606, - "step": 3295 - }, - { - "epoch": 1.7189048239895697, - "grad_norm": 1.4823070764541626, - "learning_rate": 9.719095477386935e-05, - "loss": 6.1616, - "step": 3296 - }, - { - "epoch": 1.719426336375489, - "grad_norm": 1.6309150457382202, - "learning_rate": 9.718994974874373e-05, - "loss": 5.8717, - "step": 3297 - }, - { - "epoch": 1.7199478487614082, - "grad_norm": 1.5561985969543457, - "learning_rate": 9.718894472361809e-05, - "loss": 6.2413, - "step": 3298 - }, - { - "epoch": 1.7204693611473272, - "grad_norm": 1.670965552330017, - "learning_rate": 9.718793969849247e-05, - "loss": 6.2541, - "step": 3299 - }, - { - "epoch": 1.7209908735332464, - "grad_norm": 1.5011696815490723, - "learning_rate": 9.718693467336683e-05, - "loss": 5.779, - "step": 3300 - }, - { - "epoch": 1.7215123859191657, - "grad_norm": 1.6187682151794434, - "learning_rate": 9.718592964824121e-05, - "loss": 5.8724, - "step": 3301 - }, - { - "epoch": 1.7220338983050847, - "grad_norm": 1.737080454826355, - "learning_rate": 9.718492462311558e-05, - "loss": 6.0139, - "step": 3302 - }, - { - "epoch": 1.722555410691004, - "grad_norm": 3.3389365673065186, - "learning_rate": 9.718391959798995e-05, - "loss": 5.8996, - "step": 3303 - }, - { - "epoch": 1.7230769230769232, - "grad_norm": 1.7126438617706299, - "learning_rate": 9.718291457286432e-05, - "loss": 6.0762, - "step": 3304 - }, - { - "epoch": 1.7235984354628422, - "grad_norm": 1.518081784248352, - "learning_rate": 9.71819095477387e-05, - "loss": 6.3283, - "step": 3305 - }, - { - "epoch": 1.7241199478487614, - "grad_norm": 1.5217375755310059, - "learning_rate": 9.718090452261307e-05, - "loss": 5.7146, - "step": 3306 - }, - { - "epoch": 1.7246414602346807, - "grad_norm": 1.6588184833526611, - "learning_rate": 9.717989949748744e-05, - "loss": 4.9533, - "step": 3307 - }, - { - "epoch": 1.7251629726205997, - "grad_norm": 1.5553386211395264, - "learning_rate": 9.717889447236182e-05, - "loss": 6.3043, - "step": 3308 - }, - { - "epoch": 1.725684485006519, - "grad_norm": 1.6457551717758179, - "learning_rate": 9.717788944723618e-05, - "loss": 5.8313, - "step": 3309 - }, - { - "epoch": 1.7262059973924382, - "grad_norm": 2.029294729232788, - "learning_rate": 9.717688442211056e-05, - "loss": 5.214, - "step": 3310 - }, - { - "epoch": 1.7267275097783572, - "grad_norm": 1.8718783855438232, - "learning_rate": 9.717587939698492e-05, - "loss": 6.2013, - "step": 3311 - }, - { - "epoch": 1.7272490221642764, - "grad_norm": 1.6630576848983765, - "learning_rate": 9.71748743718593e-05, - "loss": 6.0053, - "step": 3312 - }, - { - "epoch": 1.7277705345501957, - "grad_norm": 1.5276439189910889, - "learning_rate": 9.717386934673367e-05, - "loss": 6.0402, - "step": 3313 - }, - { - "epoch": 1.7282920469361147, - "grad_norm": 1.8371270895004272, - "learning_rate": 9.717286432160804e-05, - "loss": 6.0884, - "step": 3314 - }, - { - "epoch": 1.7288135593220337, - "grad_norm": 1.7210265398025513, - "learning_rate": 9.717185929648242e-05, - "loss": 5.9401, - "step": 3315 - }, - { - "epoch": 1.7293350717079532, - "grad_norm": 2.0925912857055664, - "learning_rate": 9.71708542713568e-05, - "loss": 5.7091, - "step": 3316 - }, - { - "epoch": 1.7298565840938722, - "grad_norm": 1.5949950218200684, - "learning_rate": 9.716984924623116e-05, - "loss": 5.8663, - "step": 3317 - }, - { - "epoch": 1.7303780964797912, - "grad_norm": 1.8782790899276733, - "learning_rate": 9.716884422110554e-05, - "loss": 5.7639, - "step": 3318 - }, - { - "epoch": 1.7308996088657107, - "grad_norm": 1.514481782913208, - "learning_rate": 9.71678391959799e-05, - "loss": 6.0857, - "step": 3319 - }, - { - "epoch": 1.7314211212516297, - "grad_norm": 1.7982304096221924, - "learning_rate": 9.716683417085427e-05, - "loss": 5.6281, - "step": 3320 - }, - { - "epoch": 1.7319426336375487, - "grad_norm": 1.679223895072937, - "learning_rate": 9.716582914572865e-05, - "loss": 5.1745, - "step": 3321 - }, - { - "epoch": 1.7324641460234682, - "grad_norm": 2.737560272216797, - "learning_rate": 9.716482412060301e-05, - "loss": 5.4417, - "step": 3322 - }, - { - "epoch": 1.7329856584093872, - "grad_norm": 1.8556020259857178, - "learning_rate": 9.716381909547739e-05, - "loss": 5.3947, - "step": 3323 - }, - { - "epoch": 1.7335071707953063, - "grad_norm": 1.6604238748550415, - "learning_rate": 9.716281407035175e-05, - "loss": 6.0392, - "step": 3324 - }, - { - "epoch": 1.7340286831812257, - "grad_norm": 1.582079529762268, - "learning_rate": 9.716180904522613e-05, - "loss": 4.9405, - "step": 3325 - }, - { - "epoch": 1.7345501955671447, - "grad_norm": 1.5796748399734497, - "learning_rate": 9.716080402010051e-05, - "loss": 6.1195, - "step": 3326 - }, - { - "epoch": 1.7350717079530638, - "grad_norm": 1.5330551862716675, - "learning_rate": 9.715979899497489e-05, - "loss": 6.4154, - "step": 3327 - }, - { - "epoch": 1.735593220338983, - "grad_norm": 1.6535720825195312, - "learning_rate": 9.715879396984925e-05, - "loss": 6.2119, - "step": 3328 - }, - { - "epoch": 1.7361147327249022, - "grad_norm": 1.4664764404296875, - "learning_rate": 9.715778894472363e-05, - "loss": 6.0851, - "step": 3329 - }, - { - "epoch": 1.7366362451108213, - "grad_norm": 1.472206950187683, - "learning_rate": 9.7156783919598e-05, - "loss": 5.9087, - "step": 3330 - }, - { - "epoch": 1.7371577574967405, - "grad_norm": 1.4665530920028687, - "learning_rate": 9.715577889447237e-05, - "loss": 6.3229, - "step": 3331 - }, - { - "epoch": 1.7376792698826597, - "grad_norm": 1.3584777116775513, - "learning_rate": 9.715477386934674e-05, - "loss": 5.3922, - "step": 3332 - }, - { - "epoch": 1.7382007822685788, - "grad_norm": 1.5922040939331055, - "learning_rate": 9.71537688442211e-05, - "loss": 5.9226, - "step": 3333 - }, - { - "epoch": 1.738722294654498, - "grad_norm": 1.6438482999801636, - "learning_rate": 9.715276381909548e-05, - "loss": 6.1637, - "step": 3334 - }, - { - "epoch": 1.7392438070404173, - "grad_norm": 1.4272111654281616, - "learning_rate": 9.715175879396986e-05, - "loss": 6.209, - "step": 3335 - }, - { - "epoch": 1.7397653194263363, - "grad_norm": 1.4767323732376099, - "learning_rate": 9.715075376884423e-05, - "loss": 5.3347, - "step": 3336 - }, - { - "epoch": 1.7402868318122555, - "grad_norm": 1.3619269132614136, - "learning_rate": 9.71497487437186e-05, - "loss": 6.0317, - "step": 3337 - }, - { - "epoch": 1.7408083441981748, - "grad_norm": 1.4023441076278687, - "learning_rate": 9.714874371859298e-05, - "loss": 6.1441, - "step": 3338 - }, - { - "epoch": 1.7413298565840938, - "grad_norm": 1.679480791091919, - "learning_rate": 9.714773869346734e-05, - "loss": 5.7566, - "step": 3339 - }, - { - "epoch": 1.741851368970013, - "grad_norm": 1.4111641645431519, - "learning_rate": 9.714673366834172e-05, - "loss": 6.1269, - "step": 3340 - }, - { - "epoch": 1.7423728813559323, - "grad_norm": 1.4637426137924194, - "learning_rate": 9.714572864321608e-05, - "loss": 6.3632, - "step": 3341 - }, - { - "epoch": 1.7428943937418513, - "grad_norm": 1.6454591751098633, - "learning_rate": 9.714472361809046e-05, - "loss": 5.5887, - "step": 3342 - }, - { - "epoch": 1.7434159061277705, - "grad_norm": 1.7154889106750488, - "learning_rate": 9.714371859296482e-05, - "loss": 6.1023, - "step": 3343 - }, - { - "epoch": 1.7439374185136898, - "grad_norm": 1.5314412117004395, - "learning_rate": 9.71427135678392e-05, - "loss": 6.0471, - "step": 3344 - }, - { - "epoch": 1.7444589308996088, - "grad_norm": 1.543838381767273, - "learning_rate": 9.714170854271357e-05, - "loss": 6.1968, - "step": 3345 - }, - { - "epoch": 1.744980443285528, - "grad_norm": 1.5923969745635986, - "learning_rate": 9.714070351758794e-05, - "loss": 5.5729, - "step": 3346 - }, - { - "epoch": 1.7455019556714473, - "grad_norm": 1.6715855598449707, - "learning_rate": 9.713969849246232e-05, - "loss": 6.4064, - "step": 3347 - }, - { - "epoch": 1.7460234680573663, - "grad_norm": 1.6406844854354858, - "learning_rate": 9.713869346733669e-05, - "loss": 5.9657, - "step": 3348 - }, - { - "epoch": 1.7465449804432855, - "grad_norm": 1.6807109117507935, - "learning_rate": 9.713768844221106e-05, - "loss": 5.4293, - "step": 3349 - }, - { - "epoch": 1.7470664928292048, - "grad_norm": 1.5913708209991455, - "learning_rate": 9.713668341708543e-05, - "loss": 6.1809, - "step": 3350 - }, - { - "epoch": 1.7475880052151238, - "grad_norm": 1.7646077871322632, - "learning_rate": 9.71356783919598e-05, - "loss": 5.509, - "step": 3351 - }, - { - "epoch": 1.748109517601043, - "grad_norm": 1.5574208498001099, - "learning_rate": 9.713467336683417e-05, - "loss": 5.5986, - "step": 3352 - }, - { - "epoch": 1.7486310299869623, - "grad_norm": 1.732813835144043, - "learning_rate": 9.713366834170855e-05, - "loss": 5.7303, - "step": 3353 - }, - { - "epoch": 1.7491525423728813, - "grad_norm": 1.608209252357483, - "learning_rate": 9.713266331658291e-05, - "loss": 6.1821, - "step": 3354 - }, - { - "epoch": 1.7496740547588006, - "grad_norm": 1.464309811592102, - "learning_rate": 9.713165829145729e-05, - "loss": 6.1374, - "step": 3355 - }, - { - "epoch": 1.7501955671447198, - "grad_norm": 1.570180892944336, - "learning_rate": 9.713065326633167e-05, - "loss": 6.1028, - "step": 3356 - }, - { - "epoch": 1.7507170795306388, - "grad_norm": 1.7300324440002441, - "learning_rate": 9.712964824120605e-05, - "loss": 5.7946, - "step": 3357 - }, - { - "epoch": 1.751238591916558, - "grad_norm": 1.4357713460922241, - "learning_rate": 9.712864321608041e-05, - "loss": 6.3007, - "step": 3358 - }, - { - "epoch": 1.7517601043024773, - "grad_norm": 1.6414669752120972, - "learning_rate": 9.712763819095479e-05, - "loss": 5.4064, - "step": 3359 - }, - { - "epoch": 1.7522816166883963, - "grad_norm": 1.6113923788070679, - "learning_rate": 9.712663316582915e-05, - "loss": 5.6724, - "step": 3360 - }, - { - "epoch": 1.7528031290743156, - "grad_norm": 1.573392391204834, - "learning_rate": 9.712562814070352e-05, - "loss": 6.0677, - "step": 3361 - }, - { - "epoch": 1.7533246414602348, - "grad_norm": 1.553194522857666, - "learning_rate": 9.71246231155779e-05, - "loss": 5.9444, - "step": 3362 - }, - { - "epoch": 1.7538461538461538, - "grad_norm": 1.4644821882247925, - "learning_rate": 9.712361809045226e-05, - "loss": 6.3711, - "step": 3363 - }, - { - "epoch": 1.7543676662320729, - "grad_norm": 1.4744619131088257, - "learning_rate": 9.712261306532664e-05, - "loss": 6.0969, - "step": 3364 - }, - { - "epoch": 1.7548891786179923, - "grad_norm": 1.6132123470306396, - "learning_rate": 9.7121608040201e-05, - "loss": 6.2089, - "step": 3365 - }, - { - "epoch": 1.7554106910039113, - "grad_norm": 1.6961557865142822, - "learning_rate": 9.712060301507538e-05, - "loss": 5.9151, - "step": 3366 - }, - { - "epoch": 1.7559322033898304, - "grad_norm": 1.5807000398635864, - "learning_rate": 9.711959798994976e-05, - "loss": 5.8969, - "step": 3367 - }, - { - "epoch": 1.7564537157757498, - "grad_norm": 1.5537190437316895, - "learning_rate": 9.711859296482413e-05, - "loss": 6.2752, - "step": 3368 - }, - { - "epoch": 1.7569752281616688, - "grad_norm": 1.529908299446106, - "learning_rate": 9.71175879396985e-05, - "loss": 6.1472, - "step": 3369 - }, - { - "epoch": 1.7574967405475879, - "grad_norm": 1.5507049560546875, - "learning_rate": 9.711658291457288e-05, - "loss": 6.1645, - "step": 3370 - }, - { - "epoch": 1.7580182529335073, - "grad_norm": 1.8250657320022583, - "learning_rate": 9.711557788944724e-05, - "loss": 6.0594, - "step": 3371 - }, - { - "epoch": 1.7585397653194264, - "grad_norm": 1.6674892902374268, - "learning_rate": 9.711457286432162e-05, - "loss": 5.9769, - "step": 3372 - }, - { - "epoch": 1.7590612777053454, - "grad_norm": 1.6310733556747437, - "learning_rate": 9.711356783919598e-05, - "loss": 5.8109, - "step": 3373 - }, - { - "epoch": 1.7595827900912646, - "grad_norm": 1.7524957656860352, - "learning_rate": 9.711256281407035e-05, - "loss": 6.1912, - "step": 3374 - }, - { - "epoch": 1.7601043024771839, - "grad_norm": 1.4561899900436401, - "learning_rate": 9.711155778894472e-05, - "loss": 6.1365, - "step": 3375 - }, - { - "epoch": 1.7606258148631029, - "grad_norm": 1.5613276958465576, - "learning_rate": 9.71105527638191e-05, - "loss": 5.9976, - "step": 3376 - }, - { - "epoch": 1.7611473272490221, - "grad_norm": 1.4331763982772827, - "learning_rate": 9.710954773869348e-05, - "loss": 6.2593, - "step": 3377 - }, - { - "epoch": 1.7616688396349414, - "grad_norm": 2.2194039821624756, - "learning_rate": 9.710854271356784e-05, - "loss": 5.8111, - "step": 3378 - }, - { - "epoch": 1.7621903520208604, - "grad_norm": 1.4964829683303833, - "learning_rate": 9.710753768844222e-05, - "loss": 6.0591, - "step": 3379 - }, - { - "epoch": 1.7627118644067796, - "grad_norm": 1.4492355585098267, - "learning_rate": 9.710653266331659e-05, - "loss": 6.2314, - "step": 3380 - }, - { - "epoch": 1.7632333767926989, - "grad_norm": 1.5991970300674438, - "learning_rate": 9.710552763819096e-05, - "loss": 6.2326, - "step": 3381 - }, - { - "epoch": 1.763754889178618, - "grad_norm": 1.5764954090118408, - "learning_rate": 9.710452261306533e-05, - "loss": 6.1551, - "step": 3382 - }, - { - "epoch": 1.7642764015645371, - "grad_norm": 1.803238034248352, - "learning_rate": 9.71035175879397e-05, - "loss": 5.6768, - "step": 3383 - }, - { - "epoch": 1.7647979139504564, - "grad_norm": 1.6471593379974365, - "learning_rate": 9.710251256281407e-05, - "loss": 5.4339, - "step": 3384 - }, - { - "epoch": 1.7653194263363754, - "grad_norm": 1.7341952323913574, - "learning_rate": 9.710150753768845e-05, - "loss": 6.0701, - "step": 3385 - }, - { - "epoch": 1.7658409387222946, - "grad_norm": 1.4746848344802856, - "learning_rate": 9.710050251256281e-05, - "loss": 6.2508, - "step": 3386 - }, - { - "epoch": 1.7663624511082139, - "grad_norm": 1.5259249210357666, - "learning_rate": 9.709949748743719e-05, - "loss": 5.7826, - "step": 3387 - }, - { - "epoch": 1.766883963494133, - "grad_norm": 1.4762948751449585, - "learning_rate": 9.709849246231157e-05, - "loss": 5.9028, - "step": 3388 - }, - { - "epoch": 1.7674054758800521, - "grad_norm": 1.5306518077850342, - "learning_rate": 9.709748743718593e-05, - "loss": 5.9101, - "step": 3389 - }, - { - "epoch": 1.7679269882659714, - "grad_norm": 1.5504504442214966, - "learning_rate": 9.709648241206031e-05, - "loss": 6.2105, - "step": 3390 - }, - { - "epoch": 1.7684485006518904, - "grad_norm": 1.6335573196411133, - "learning_rate": 9.709547738693468e-05, - "loss": 6.0706, - "step": 3391 - }, - { - "epoch": 1.7689700130378097, - "grad_norm": 1.774306058883667, - "learning_rate": 9.709447236180905e-05, - "loss": 5.4662, - "step": 3392 - }, - { - "epoch": 1.769491525423729, - "grad_norm": 1.3517709970474243, - "learning_rate": 9.709346733668342e-05, - "loss": 6.2963, - "step": 3393 - }, - { - "epoch": 1.770013037809648, - "grad_norm": 1.4020494222640991, - "learning_rate": 9.70924623115578e-05, - "loss": 6.194, - "step": 3394 - }, - { - "epoch": 1.7705345501955672, - "grad_norm": 1.624859094619751, - "learning_rate": 9.709145728643216e-05, - "loss": 5.4284, - "step": 3395 - }, - { - "epoch": 1.7710560625814864, - "grad_norm": 1.7723333835601807, - "learning_rate": 9.709045226130654e-05, - "loss": 6.0049, - "step": 3396 - }, - { - "epoch": 1.7715775749674054, - "grad_norm": 1.8757661581039429, - "learning_rate": 9.70894472361809e-05, - "loss": 6.0504, - "step": 3397 - }, - { - "epoch": 1.7720990873533247, - "grad_norm": 1.5301741361618042, - "learning_rate": 9.708844221105528e-05, - "loss": 6.1439, - "step": 3398 - }, - { - "epoch": 1.772620599739244, - "grad_norm": 1.6773927211761475, - "learning_rate": 9.708743718592966e-05, - "loss": 5.8625, - "step": 3399 - }, - { - "epoch": 1.773142112125163, - "grad_norm": 1.6877857446670532, - "learning_rate": 9.708643216080402e-05, - "loss": 5.5765, - "step": 3400 - }, - { - "epoch": 1.7736636245110822, - "grad_norm": 1.3745710849761963, - "learning_rate": 9.70854271356784e-05, - "loss": 6.0789, - "step": 3401 - }, - { - "epoch": 1.7741851368970014, - "grad_norm": 1.8772443532943726, - "learning_rate": 9.708442211055276e-05, - "loss": 5.6929, - "step": 3402 - }, - { - "epoch": 1.7747066492829204, - "grad_norm": 1.653212308883667, - "learning_rate": 9.708341708542714e-05, - "loss": 6.0236, - "step": 3403 - }, - { - "epoch": 1.7752281616688397, - "grad_norm": 1.6786621809005737, - "learning_rate": 9.70824120603015e-05, - "loss": 5.7432, - "step": 3404 - }, - { - "epoch": 1.775749674054759, - "grad_norm": 1.7271103858947754, - "learning_rate": 9.708140703517588e-05, - "loss": 5.6483, - "step": 3405 - }, - { - "epoch": 1.776271186440678, - "grad_norm": 1.6076699495315552, - "learning_rate": 9.708040201005025e-05, - "loss": 5.8815, - "step": 3406 - }, - { - "epoch": 1.7767926988265972, - "grad_norm": 1.5660747289657593, - "learning_rate": 9.707939698492463e-05, - "loss": 5.8855, - "step": 3407 - }, - { - "epoch": 1.7773142112125164, - "grad_norm": 1.5821980237960815, - "learning_rate": 9.7078391959799e-05, - "loss": 6.2526, - "step": 3408 - }, - { - "epoch": 1.7778357235984354, - "grad_norm": 1.4485617876052856, - "learning_rate": 9.707738693467338e-05, - "loss": 6.0431, - "step": 3409 - }, - { - "epoch": 1.7783572359843545, - "grad_norm": 1.438031792640686, - "learning_rate": 9.707638190954775e-05, - "loss": 6.095, - "step": 3410 - }, - { - "epoch": 1.778878748370274, - "grad_norm": 1.4515784978866577, - "learning_rate": 9.707537688442212e-05, - "loss": 6.0939, - "step": 3411 - }, - { - "epoch": 1.779400260756193, - "grad_norm": 1.772674560546875, - "learning_rate": 9.707437185929649e-05, - "loss": 5.6474, - "step": 3412 - }, - { - "epoch": 1.779921773142112, - "grad_norm": 1.5566948652267456, - "learning_rate": 9.707336683417085e-05, - "loss": 6.1361, - "step": 3413 - }, - { - "epoch": 1.7804432855280314, - "grad_norm": 1.7735984325408936, - "learning_rate": 9.707236180904523e-05, - "loss": 5.5231, - "step": 3414 - }, - { - "epoch": 1.7809647979139505, - "grad_norm": 1.4642142057418823, - "learning_rate": 9.70713567839196e-05, - "loss": 6.0834, - "step": 3415 - }, - { - "epoch": 1.7814863102998695, - "grad_norm": 1.6048023700714111, - "learning_rate": 9.707035175879397e-05, - "loss": 5.869, - "step": 3416 - }, - { - "epoch": 1.782007822685789, - "grad_norm": 1.6047550439834595, - "learning_rate": 9.706934673366834e-05, - "loss": 6.0868, - "step": 3417 - }, - { - "epoch": 1.782529335071708, - "grad_norm": 1.5889705419540405, - "learning_rate": 9.706834170854271e-05, - "loss": 6.1874, - "step": 3418 - }, - { - "epoch": 1.783050847457627, - "grad_norm": 1.7397993803024292, - "learning_rate": 9.706733668341709e-05, - "loss": 5.8478, - "step": 3419 - }, - { - "epoch": 1.7835723598435462, - "grad_norm": 1.524584412574768, - "learning_rate": 9.706633165829147e-05, - "loss": 6.2041, - "step": 3420 - }, - { - "epoch": 1.7840938722294655, - "grad_norm": 1.886851191520691, - "learning_rate": 9.706532663316583e-05, - "loss": 6.0316, - "step": 3421 - }, - { - "epoch": 1.7846153846153845, - "grad_norm": 1.659754991531372, - "learning_rate": 9.706432160804021e-05, - "loss": 6.0272, - "step": 3422 - }, - { - "epoch": 1.7851368970013037, - "grad_norm": 1.5530073642730713, - "learning_rate": 9.706331658291458e-05, - "loss": 6.065, - "step": 3423 - }, - { - "epoch": 1.785658409387223, - "grad_norm": 1.7824958562850952, - "learning_rate": 9.706231155778895e-05, - "loss": 5.8887, - "step": 3424 - }, - { - "epoch": 1.786179921773142, - "grad_norm": 1.7361865043640137, - "learning_rate": 9.706130653266332e-05, - "loss": 5.7271, - "step": 3425 - }, - { - "epoch": 1.7867014341590612, - "grad_norm": 1.4935905933380127, - "learning_rate": 9.706030150753768e-05, - "loss": 6.2631, - "step": 3426 - }, - { - "epoch": 1.7872229465449805, - "grad_norm": 1.4938560724258423, - "learning_rate": 9.705929648241206e-05, - "loss": 6.0801, - "step": 3427 - }, - { - "epoch": 1.7877444589308995, - "grad_norm": 1.551841139793396, - "learning_rate": 9.705829145728644e-05, - "loss": 5.8064, - "step": 3428 - }, - { - "epoch": 1.7882659713168187, - "grad_norm": 1.4372341632843018, - "learning_rate": 9.705728643216082e-05, - "loss": 6.0318, - "step": 3429 - }, - { - "epoch": 1.788787483702738, - "grad_norm": 1.7153056859970093, - "learning_rate": 9.705628140703518e-05, - "loss": 5.5594, - "step": 3430 - }, - { - "epoch": 1.789308996088657, - "grad_norm": 1.4397196769714355, - "learning_rate": 9.705527638190956e-05, - "loss": 5.8875, - "step": 3431 - }, - { - "epoch": 1.7898305084745763, - "grad_norm": 1.4717869758605957, - "learning_rate": 9.705427135678392e-05, - "loss": 6.0481, - "step": 3432 - }, - { - "epoch": 1.7903520208604955, - "grad_norm": 1.4425760507583618, - "learning_rate": 9.70532663316583e-05, - "loss": 6.4561, - "step": 3433 - }, - { - "epoch": 1.7908735332464145, - "grad_norm": 1.4299579858779907, - "learning_rate": 9.705226130653266e-05, - "loss": 5.8588, - "step": 3434 - }, - { - "epoch": 1.7913950456323338, - "grad_norm": 1.4884129762649536, - "learning_rate": 9.705125628140704e-05, - "loss": 5.8936, - "step": 3435 - }, - { - "epoch": 1.791916558018253, - "grad_norm": 1.5652891397476196, - "learning_rate": 9.70502512562814e-05, - "loss": 5.9654, - "step": 3436 - }, - { - "epoch": 1.792438070404172, - "grad_norm": 1.4190223217010498, - "learning_rate": 9.704924623115578e-05, - "loss": 6.109, - "step": 3437 - }, - { - "epoch": 1.7929595827900913, - "grad_norm": 1.406278371810913, - "learning_rate": 9.704824120603015e-05, - "loss": 6.2161, - "step": 3438 - }, - { - "epoch": 1.7934810951760105, - "grad_norm": 1.5271124839782715, - "learning_rate": 9.704723618090453e-05, - "loss": 6.2097, - "step": 3439 - }, - { - "epoch": 1.7940026075619295, - "grad_norm": 1.7708873748779297, - "learning_rate": 9.70462311557789e-05, - "loss": 5.065, - "step": 3440 - }, - { - "epoch": 1.7945241199478488, - "grad_norm": 1.5382492542266846, - "learning_rate": 9.704522613065327e-05, - "loss": 6.0505, - "step": 3441 - }, - { - "epoch": 1.795045632333768, - "grad_norm": 1.623565673828125, - "learning_rate": 9.704422110552765e-05, - "loss": 5.8352, - "step": 3442 - }, - { - "epoch": 1.795567144719687, - "grad_norm": 1.801328182220459, - "learning_rate": 9.704321608040201e-05, - "loss": 4.7086, - "step": 3443 - }, - { - "epoch": 1.7960886571056063, - "grad_norm": 1.8106749057769775, - "learning_rate": 9.704221105527639e-05, - "loss": 5.8866, - "step": 3444 - }, - { - "epoch": 1.7966101694915255, - "grad_norm": 1.5402570962905884, - "learning_rate": 9.704120603015075e-05, - "loss": 5.9812, - "step": 3445 - }, - { - "epoch": 1.7971316818774445, - "grad_norm": 1.6236382722854614, - "learning_rate": 9.704020100502513e-05, - "loss": 5.5292, - "step": 3446 - }, - { - "epoch": 1.7976531942633638, - "grad_norm": 1.6535191535949707, - "learning_rate": 9.70391959798995e-05, - "loss": 6.0806, - "step": 3447 - }, - { - "epoch": 1.798174706649283, - "grad_norm": 1.5745586156845093, - "learning_rate": 9.703819095477387e-05, - "loss": 5.8841, - "step": 3448 - }, - { - "epoch": 1.798696219035202, - "grad_norm": 1.4843578338623047, - "learning_rate": 9.703718592964825e-05, - "loss": 6.2798, - "step": 3449 - }, - { - "epoch": 1.7992177314211213, - "grad_norm": 1.9440903663635254, - "learning_rate": 9.703618090452263e-05, - "loss": 5.1926, - "step": 3450 - }, - { - "epoch": 1.7997392438070405, - "grad_norm": 1.854526162147522, - "learning_rate": 9.703517587939699e-05, - "loss": 5.785, - "step": 3451 - }, - { - "epoch": 1.8002607561929596, - "grad_norm": 1.6898843050003052, - "learning_rate": 9.703417085427137e-05, - "loss": 5.9249, - "step": 3452 - }, - { - "epoch": 1.8007822685788788, - "grad_norm": 1.592730164527893, - "learning_rate": 9.703316582914573e-05, - "loss": 5.89, - "step": 3453 - }, - { - "epoch": 1.801303780964798, - "grad_norm": 1.7539761066436768, - "learning_rate": 9.70321608040201e-05, - "loss": 6.1321, - "step": 3454 - }, - { - "epoch": 1.801825293350717, - "grad_norm": 1.6388393640518188, - "learning_rate": 9.703115577889448e-05, - "loss": 5.9846, - "step": 3455 - }, - { - "epoch": 1.802346805736636, - "grad_norm": 1.582400918006897, - "learning_rate": 9.703015075376884e-05, - "loss": 6.0675, - "step": 3456 - }, - { - "epoch": 1.8028683181225555, - "grad_norm": 2.0983474254608154, - "learning_rate": 9.702914572864322e-05, - "loss": 5.7575, - "step": 3457 - }, - { - "epoch": 1.8033898305084746, - "grad_norm": 1.803412675857544, - "learning_rate": 9.702814070351758e-05, - "loss": 6.1308, - "step": 3458 - }, - { - "epoch": 1.8039113428943936, - "grad_norm": 1.8067283630371094, - "learning_rate": 9.702713567839196e-05, - "loss": 5.3885, - "step": 3459 - }, - { - "epoch": 1.804432855280313, - "grad_norm": 1.5002249479293823, - "learning_rate": 9.702613065326634e-05, - "loss": 6.2755, - "step": 3460 - }, - { - "epoch": 1.804954367666232, - "grad_norm": 1.6799871921539307, - "learning_rate": 9.702512562814072e-05, - "loss": 5.9691, - "step": 3461 - }, - { - "epoch": 1.805475880052151, - "grad_norm": 1.5397225618362427, - "learning_rate": 9.702412060301508e-05, - "loss": 6.2171, - "step": 3462 - }, - { - "epoch": 1.8059973924380706, - "grad_norm": 1.6402848958969116, - "learning_rate": 9.702311557788946e-05, - "loss": 5.8595, - "step": 3463 - }, - { - "epoch": 1.8065189048239896, - "grad_norm": 2.529421806335449, - "learning_rate": 9.702211055276382e-05, - "loss": 5.6316, - "step": 3464 - }, - { - "epoch": 1.8070404172099086, - "grad_norm": 1.891155481338501, - "learning_rate": 9.70211055276382e-05, - "loss": 6.0724, - "step": 3465 - }, - { - "epoch": 1.807561929595828, - "grad_norm": 1.5303215980529785, - "learning_rate": 9.702010050251257e-05, - "loss": 6.0312, - "step": 3466 - }, - { - "epoch": 1.808083441981747, - "grad_norm": 1.5245964527130127, - "learning_rate": 9.701909547738693e-05, - "loss": 6.1743, - "step": 3467 - }, - { - "epoch": 1.808604954367666, - "grad_norm": 1.6625903844833374, - "learning_rate": 9.701809045226131e-05, - "loss": 5.9876, - "step": 3468 - }, - { - "epoch": 1.8091264667535853, - "grad_norm": 1.609916090965271, - "learning_rate": 9.701708542713568e-05, - "loss": 5.8721, - "step": 3469 - }, - { - "epoch": 1.8096479791395046, - "grad_norm": 1.5482451915740967, - "learning_rate": 9.701608040201006e-05, - "loss": 5.9456, - "step": 3470 - }, - { - "epoch": 1.8101694915254236, - "grad_norm": 1.290881872177124, - "learning_rate": 9.701507537688443e-05, - "loss": 5.8151, - "step": 3471 - }, - { - "epoch": 1.8106910039113429, - "grad_norm": 1.5847201347351074, - "learning_rate": 9.70140703517588e-05, - "loss": 5.8954, - "step": 3472 - }, - { - "epoch": 1.811212516297262, - "grad_norm": 1.5597525835037231, - "learning_rate": 9.701306532663317e-05, - "loss": 5.3822, - "step": 3473 - }, - { - "epoch": 1.8117340286831811, - "grad_norm": 1.6361896991729736, - "learning_rate": 9.701206030150755e-05, - "loss": 5.9887, - "step": 3474 - }, - { - "epoch": 1.8122555410691004, - "grad_norm": 1.5049463510513306, - "learning_rate": 9.701105527638191e-05, - "loss": 6.4319, - "step": 3475 - }, - { - "epoch": 1.8127770534550196, - "grad_norm": 1.4886294603347778, - "learning_rate": 9.701005025125629e-05, - "loss": 6.2015, - "step": 3476 - }, - { - "epoch": 1.8132985658409386, - "grad_norm": 1.7461503744125366, - "learning_rate": 9.700904522613065e-05, - "loss": 5.7579, - "step": 3477 - }, - { - "epoch": 1.8138200782268579, - "grad_norm": 1.7426531314849854, - "learning_rate": 9.700804020100503e-05, - "loss": 6.0545, - "step": 3478 - }, - { - "epoch": 1.814341590612777, - "grad_norm": 1.6803030967712402, - "learning_rate": 9.70070351758794e-05, - "loss": 6.2379, - "step": 3479 - }, - { - "epoch": 1.8148631029986961, - "grad_norm": 1.469314455986023, - "learning_rate": 9.700603015075377e-05, - "loss": 6.2643, - "step": 3480 - }, - { - "epoch": 1.8153846153846154, - "grad_norm": 1.507590413093567, - "learning_rate": 9.700502512562815e-05, - "loss": 6.1819, - "step": 3481 - }, - { - "epoch": 1.8159061277705346, - "grad_norm": 2.1014201641082764, - "learning_rate": 9.700402010050252e-05, - "loss": 5.8218, - "step": 3482 - }, - { - "epoch": 1.8164276401564536, - "grad_norm": 2.0744240283966064, - "learning_rate": 9.70030150753769e-05, - "loss": 5.8149, - "step": 3483 - }, - { - "epoch": 1.8169491525423729, - "grad_norm": 1.7443263530731201, - "learning_rate": 9.700201005025126e-05, - "loss": 6.0395, - "step": 3484 - }, - { - "epoch": 1.8174706649282921, - "grad_norm": 1.690757393836975, - "learning_rate": 9.700100502512564e-05, - "loss": 6.3058, - "step": 3485 - }, - { - "epoch": 1.8179921773142111, - "grad_norm": 1.668442964553833, - "learning_rate": 9.7e-05, - "loss": 5.9292, - "step": 3486 - }, - { - "epoch": 1.8185136897001304, - "grad_norm": 1.6220622062683105, - "learning_rate": 9.699899497487438e-05, - "loss": 5.8296, - "step": 3487 - }, - { - "epoch": 1.8190352020860496, - "grad_norm": 1.5136754512786865, - "learning_rate": 9.699798994974874e-05, - "loss": 6.0959, - "step": 3488 - }, - { - "epoch": 1.8195567144719687, - "grad_norm": 1.5258347988128662, - "learning_rate": 9.699698492462312e-05, - "loss": 6.4227, - "step": 3489 - }, - { - "epoch": 1.820078226857888, - "grad_norm": 1.4603675603866577, - "learning_rate": 9.69959798994975e-05, - "loss": 6.0721, - "step": 3490 - }, - { - "epoch": 1.8205997392438071, - "grad_norm": 1.6585214138031006, - "learning_rate": 9.699497487437188e-05, - "loss": 5.8413, - "step": 3491 - }, - { - "epoch": 1.8211212516297262, - "grad_norm": 1.7849349975585938, - "learning_rate": 9.699396984924624e-05, - "loss": 5.9434, - "step": 3492 - }, - { - "epoch": 1.8216427640156454, - "grad_norm": 1.6531637907028198, - "learning_rate": 9.69929648241206e-05, - "loss": 6.0698, - "step": 3493 - }, - { - "epoch": 1.8221642764015646, - "grad_norm": 1.6902382373809814, - "learning_rate": 9.699195979899498e-05, - "loss": 5.7329, - "step": 3494 - }, - { - "epoch": 1.8226857887874837, - "grad_norm": 1.61663818359375, - "learning_rate": 9.699095477386935e-05, - "loss": 5.8391, - "step": 3495 - }, - { - "epoch": 1.823207301173403, - "grad_norm": 1.6554762125015259, - "learning_rate": 9.698994974874372e-05, - "loss": 6.0434, - "step": 3496 - }, - { - "epoch": 1.8237288135593221, - "grad_norm": 1.7411292791366577, - "learning_rate": 9.698894472361809e-05, - "loss": 5.5667, - "step": 3497 - }, - { - "epoch": 1.8242503259452412, - "grad_norm": 1.6766211986541748, - "learning_rate": 9.698793969849247e-05, - "loss": 6.034, - "step": 3498 - }, - { - "epoch": 1.8247718383311604, - "grad_norm": 1.5605521202087402, - "learning_rate": 9.698693467336683e-05, - "loss": 6.2319, - "step": 3499 - }, - { - "epoch": 1.8252933507170797, - "grad_norm": 1.7649636268615723, - "learning_rate": 9.698592964824121e-05, - "loss": 5.566, - "step": 3500 - }, - { - "epoch": 1.8258148631029987, - "grad_norm": 1.5264796018600464, - "learning_rate": 9.698492462311559e-05, - "loss": 5.5563, - "step": 3501 - }, - { - "epoch": 1.8263363754889177, - "grad_norm": 1.58991539478302, - "learning_rate": 9.698391959798996e-05, - "loss": 5.8224, - "step": 3502 - }, - { - "epoch": 1.8268578878748372, - "grad_norm": 1.978438138961792, - "learning_rate": 9.698291457286433e-05, - "loss": 5.8882, - "step": 3503 - }, - { - "epoch": 1.8273794002607562, - "grad_norm": 1.487432599067688, - "learning_rate": 9.69819095477387e-05, - "loss": 6.0081, - "step": 3504 - }, - { - "epoch": 1.8279009126466752, - "grad_norm": 1.5642988681793213, - "learning_rate": 9.698090452261307e-05, - "loss": 6.0111, - "step": 3505 - }, - { - "epoch": 1.8284224250325947, - "grad_norm": 1.683190107345581, - "learning_rate": 9.697989949748743e-05, - "loss": 6.2369, - "step": 3506 - }, - { - "epoch": 1.8289439374185137, - "grad_norm": 1.4596670866012573, - "learning_rate": 9.697889447236181e-05, - "loss": 6.2156, - "step": 3507 - }, - { - "epoch": 1.8294654498044327, - "grad_norm": 1.307924509048462, - "learning_rate": 9.697788944723618e-05, - "loss": 5.8237, - "step": 3508 - }, - { - "epoch": 1.8299869621903522, - "grad_norm": 1.692699909210205, - "learning_rate": 9.697688442211055e-05, - "loss": 5.6566, - "step": 3509 - }, - { - "epoch": 1.8305084745762712, - "grad_norm": 1.6478583812713623, - "learning_rate": 9.697587939698493e-05, - "loss": 5.3311, - "step": 3510 - }, - { - "epoch": 1.8310299869621902, - "grad_norm": 1.4691568613052368, - "learning_rate": 9.697487437185931e-05, - "loss": 6.2814, - "step": 3511 - }, - { - "epoch": 1.8315514993481097, - "grad_norm": 1.7584285736083984, - "learning_rate": 9.697386934673367e-05, - "loss": 6.0498, - "step": 3512 - }, - { - "epoch": 1.8320730117340287, - "grad_norm": 1.5221638679504395, - "learning_rate": 9.697286432160805e-05, - "loss": 6.0866, - "step": 3513 - }, - { - "epoch": 1.8325945241199477, - "grad_norm": 1.6671223640441895, - "learning_rate": 9.697185929648242e-05, - "loss": 5.5608, - "step": 3514 - }, - { - "epoch": 1.833116036505867, - "grad_norm": 1.5549525022506714, - "learning_rate": 9.69708542713568e-05, - "loss": 6.2136, - "step": 3515 - }, - { - "epoch": 1.8336375488917862, - "grad_norm": 1.5825947523117065, - "learning_rate": 9.696984924623116e-05, - "loss": 5.8405, - "step": 3516 - }, - { - "epoch": 1.8341590612777052, - "grad_norm": 1.693906545639038, - "learning_rate": 9.696884422110554e-05, - "loss": 5.5286, - "step": 3517 - }, - { - "epoch": 1.8346805736636245, - "grad_norm": 1.7293037176132202, - "learning_rate": 9.69678391959799e-05, - "loss": 6.0295, - "step": 3518 - }, - { - "epoch": 1.8352020860495437, - "grad_norm": 1.5592045783996582, - "learning_rate": 9.696683417085426e-05, - "loss": 6.2459, - "step": 3519 - }, - { - "epoch": 1.8357235984354627, - "grad_norm": 1.4750192165374756, - "learning_rate": 9.696582914572864e-05, - "loss": 5.811, - "step": 3520 - }, - { - "epoch": 1.836245110821382, - "grad_norm": 1.6404991149902344, - "learning_rate": 9.696482412060302e-05, - "loss": 5.3284, - "step": 3521 - }, - { - "epoch": 1.8367666232073012, - "grad_norm": 1.6890718936920166, - "learning_rate": 9.69638190954774e-05, - "loss": 6.0738, - "step": 3522 - }, - { - "epoch": 1.8372881355932202, - "grad_norm": 1.5716408491134644, - "learning_rate": 9.696281407035176e-05, - "loss": 5.7744, - "step": 3523 - }, - { - "epoch": 1.8378096479791395, - "grad_norm": 1.584784984588623, - "learning_rate": 9.696180904522614e-05, - "loss": 6.1147, - "step": 3524 - }, - { - "epoch": 1.8383311603650587, - "grad_norm": 1.6987148523330688, - "learning_rate": 9.69608040201005e-05, - "loss": 5.9771, - "step": 3525 - }, - { - "epoch": 1.8388526727509777, - "grad_norm": 1.8942301273345947, - "learning_rate": 9.695979899497488e-05, - "loss": 5.7769, - "step": 3526 - }, - { - "epoch": 1.839374185136897, - "grad_norm": 1.4818071126937866, - "learning_rate": 9.695879396984925e-05, - "loss": 6.1555, - "step": 3527 - }, - { - "epoch": 1.8398956975228162, - "grad_norm": 1.7362920045852661, - "learning_rate": 9.695778894472362e-05, - "loss": 5.8292, - "step": 3528 - }, - { - "epoch": 1.8404172099087353, - "grad_norm": 1.4890836477279663, - "learning_rate": 9.695678391959799e-05, - "loss": 5.929, - "step": 3529 - }, - { - "epoch": 1.8409387222946545, - "grad_norm": 1.589530348777771, - "learning_rate": 9.695577889447237e-05, - "loss": 5.9247, - "step": 3530 - }, - { - "epoch": 1.8414602346805737, - "grad_norm": 1.4957821369171143, - "learning_rate": 9.695477386934674e-05, - "loss": 6.0141, - "step": 3531 - }, - { - "epoch": 1.8419817470664928, - "grad_norm": 1.649237036705017, - "learning_rate": 9.695376884422112e-05, - "loss": 6.0488, - "step": 3532 - }, - { - "epoch": 1.842503259452412, - "grad_norm": 1.5398844480514526, - "learning_rate": 9.695276381909549e-05, - "loss": 5.5527, - "step": 3533 - }, - { - "epoch": 1.8430247718383312, - "grad_norm": 1.643724799156189, - "learning_rate": 9.695175879396985e-05, - "loss": 6.1538, - "step": 3534 - }, - { - "epoch": 1.8435462842242503, - "grad_norm": 1.4018816947937012, - "learning_rate": 9.695075376884423e-05, - "loss": 6.3606, - "step": 3535 - }, - { - "epoch": 1.8440677966101695, - "grad_norm": 1.5994594097137451, - "learning_rate": 9.694974874371859e-05, - "loss": 5.717, - "step": 3536 - }, - { - "epoch": 1.8445893089960888, - "grad_norm": 1.5260415077209473, - "learning_rate": 9.694874371859297e-05, - "loss": 6.0545, - "step": 3537 - }, - { - "epoch": 1.8451108213820078, - "grad_norm": 1.6104403734207153, - "learning_rate": 9.694773869346733e-05, - "loss": 5.9096, - "step": 3538 - }, - { - "epoch": 1.845632333767927, - "grad_norm": 1.954505443572998, - "learning_rate": 9.694673366834171e-05, - "loss": 5.7639, - "step": 3539 - }, - { - "epoch": 1.8461538461538463, - "grad_norm": 1.5488784313201904, - "learning_rate": 9.694572864321608e-05, - "loss": 5.9992, - "step": 3540 - }, - { - "epoch": 1.8466753585397653, - "grad_norm": 1.4249992370605469, - "learning_rate": 9.694472361809045e-05, - "loss": 6.205, - "step": 3541 - }, - { - "epoch": 1.8471968709256845, - "grad_norm": 1.4550275802612305, - "learning_rate": 9.694371859296483e-05, - "loss": 6.0722, - "step": 3542 - }, - { - "epoch": 1.8477183833116038, - "grad_norm": 1.8217707872390747, - "learning_rate": 9.694271356783921e-05, - "loss": 5.5045, - "step": 3543 - }, - { - "epoch": 1.8482398956975228, - "grad_norm": 1.561005711555481, - "learning_rate": 9.694170854271357e-05, - "loss": 6.2676, - "step": 3544 - }, - { - "epoch": 1.848761408083442, - "grad_norm": 1.5794198513031006, - "learning_rate": 9.694070351758795e-05, - "loss": 5.897, - "step": 3545 - }, - { - "epoch": 1.8492829204693613, - "grad_norm": 1.5415103435516357, - "learning_rate": 9.693969849246232e-05, - "loss": 5.985, - "step": 3546 - }, - { - "epoch": 1.8498044328552803, - "grad_norm": 1.5353405475616455, - "learning_rate": 9.693869346733668e-05, - "loss": 6.0127, - "step": 3547 - }, - { - "epoch": 1.8503259452411995, - "grad_norm": 1.559869647026062, - "learning_rate": 9.693768844221106e-05, - "loss": 6.1704, - "step": 3548 - }, - { - "epoch": 1.8508474576271188, - "grad_norm": 1.6556882858276367, - "learning_rate": 9.693668341708542e-05, - "loss": 5.8365, - "step": 3549 - }, - { - "epoch": 1.8513689700130378, - "grad_norm": 1.6140103340148926, - "learning_rate": 9.69356783919598e-05, - "loss": 5.6238, - "step": 3550 - }, - { - "epoch": 1.8518904823989568, - "grad_norm": 1.4674066305160522, - "learning_rate": 9.693467336683418e-05, - "loss": 6.0331, - "step": 3551 - }, - { - "epoch": 1.8524119947848763, - "grad_norm": 1.599793791770935, - "learning_rate": 9.693366834170856e-05, - "loss": 5.2199, - "step": 3552 - }, - { - "epoch": 1.8529335071707953, - "grad_norm": 1.4060035943984985, - "learning_rate": 9.693266331658292e-05, - "loss": 5.7664, - "step": 3553 - }, - { - "epoch": 1.8534550195567143, - "grad_norm": 1.574790358543396, - "learning_rate": 9.69316582914573e-05, - "loss": 6.0178, - "step": 3554 - }, - { - "epoch": 1.8539765319426338, - "grad_norm": 1.4803346395492554, - "learning_rate": 9.693065326633166e-05, - "loss": 6.2984, - "step": 3555 - }, - { - "epoch": 1.8544980443285528, - "grad_norm": 1.8222908973693848, - "learning_rate": 9.692964824120604e-05, - "loss": 5.6655, - "step": 3556 - }, - { - "epoch": 1.8550195567144718, - "grad_norm": 1.6952451467514038, - "learning_rate": 9.69286432160804e-05, - "loss": 5.886, - "step": 3557 - }, - { - "epoch": 1.8555410691003913, - "grad_norm": 1.5570813417434692, - "learning_rate": 9.692763819095478e-05, - "loss": 6.1473, - "step": 3558 - }, - { - "epoch": 1.8560625814863103, - "grad_norm": 1.4632219076156616, - "learning_rate": 9.692663316582915e-05, - "loss": 5.9331, - "step": 3559 - }, - { - "epoch": 1.8565840938722293, - "grad_norm": 1.6619189977645874, - "learning_rate": 9.692562814070351e-05, - "loss": 5.6498, - "step": 3560 - }, - { - "epoch": 1.8571056062581486, - "grad_norm": 1.6818832159042358, - "learning_rate": 9.692462311557789e-05, - "loss": 5.9141, - "step": 3561 - }, - { - "epoch": 1.8576271186440678, - "grad_norm": 1.857456922531128, - "learning_rate": 9.692361809045227e-05, - "loss": 5.522, - "step": 3562 - }, - { - "epoch": 1.8581486310299868, - "grad_norm": 1.5724027156829834, - "learning_rate": 9.692261306532665e-05, - "loss": 5.8487, - "step": 3563 - }, - { - "epoch": 1.858670143415906, - "grad_norm": 1.577483057975769, - "learning_rate": 9.692160804020101e-05, - "loss": 6.0312, - "step": 3564 - }, - { - "epoch": 1.8591916558018253, - "grad_norm": 1.5000864267349243, - "learning_rate": 9.692060301507539e-05, - "loss": 6.1475, - "step": 3565 - }, - { - "epoch": 1.8597131681877443, - "grad_norm": 1.5401833057403564, - "learning_rate": 9.691959798994975e-05, - "loss": 5.7778, - "step": 3566 - }, - { - "epoch": 1.8602346805736636, - "grad_norm": 1.595280408859253, - "learning_rate": 9.691859296482413e-05, - "loss": 5.6764, - "step": 3567 - }, - { - "epoch": 1.8607561929595828, - "grad_norm": 1.5504183769226074, - "learning_rate": 9.69175879396985e-05, - "loss": 6.2006, - "step": 3568 - }, - { - "epoch": 1.8612777053455019, - "grad_norm": 1.5815843343734741, - "learning_rate": 9.691658291457287e-05, - "loss": 6.287, - "step": 3569 - }, - { - "epoch": 1.861799217731421, - "grad_norm": 1.5669087171554565, - "learning_rate": 9.691557788944724e-05, - "loss": 6.1947, - "step": 3570 - }, - { - "epoch": 1.8623207301173403, - "grad_norm": 1.7047145366668701, - "learning_rate": 9.691457286432161e-05, - "loss": 5.807, - "step": 3571 - }, - { - "epoch": 1.8628422425032594, - "grad_norm": 1.723549246788025, - "learning_rate": 9.691356783919598e-05, - "loss": 5.7011, - "step": 3572 - }, - { - "epoch": 1.8633637548891786, - "grad_norm": 1.532957673072815, - "learning_rate": 9.691256281407036e-05, - "loss": 6.1912, - "step": 3573 - }, - { - "epoch": 1.8638852672750978, - "grad_norm": 1.662353754043579, - "learning_rate": 9.691155778894473e-05, - "loss": 6.0034, - "step": 3574 - }, - { - "epoch": 1.8644067796610169, - "grad_norm": 1.577138066291809, - "learning_rate": 9.69105527638191e-05, - "loss": 6.0462, - "step": 3575 - }, - { - "epoch": 1.864928292046936, - "grad_norm": 1.9192328453063965, - "learning_rate": 9.690954773869348e-05, - "loss": 5.8168, - "step": 3576 - }, - { - "epoch": 1.8654498044328554, - "grad_norm": 2.001338481903076, - "learning_rate": 9.690854271356784e-05, - "loss": 5.673, - "step": 3577 - }, - { - "epoch": 1.8659713168187744, - "grad_norm": 1.696831464767456, - "learning_rate": 9.690753768844222e-05, - "loss": 5.8345, - "step": 3578 - }, - { - "epoch": 1.8664928292046936, - "grad_norm": 1.3572980165481567, - "learning_rate": 9.690653266331658e-05, - "loss": 6.1702, - "step": 3579 - }, - { - "epoch": 1.8670143415906129, - "grad_norm": 1.3938509225845337, - "learning_rate": 9.690552763819096e-05, - "loss": 6.0875, - "step": 3580 - }, - { - "epoch": 1.8675358539765319, - "grad_norm": 1.474871277809143, - "learning_rate": 9.690452261306532e-05, - "loss": 5.8459, - "step": 3581 - }, - { - "epoch": 1.8680573663624511, - "grad_norm": 1.4626671075820923, - "learning_rate": 9.69035175879397e-05, - "loss": 6.2527, - "step": 3582 - }, - { - "epoch": 1.8685788787483704, - "grad_norm": 1.4968199729919434, - "learning_rate": 9.690251256281408e-05, - "loss": 5.7092, - "step": 3583 - }, - { - "epoch": 1.8691003911342894, - "grad_norm": 1.5720598697662354, - "learning_rate": 9.690150753768846e-05, - "loss": 5.9111, - "step": 3584 - }, - { - "epoch": 1.8696219035202086, - "grad_norm": 1.4595754146575928, - "learning_rate": 9.690050251256282e-05, - "loss": 6.2048, - "step": 3585 - }, - { - "epoch": 1.8701434159061279, - "grad_norm": 1.3937532901763916, - "learning_rate": 9.689949748743719e-05, - "loss": 6.111, - "step": 3586 - }, - { - "epoch": 1.870664928292047, - "grad_norm": 1.5596139430999756, - "learning_rate": 9.689849246231156e-05, - "loss": 5.6974, - "step": 3587 - }, - { - "epoch": 1.8711864406779661, - "grad_norm": 1.8474706411361694, - "learning_rate": 9.689748743718593e-05, - "loss": 5.6772, - "step": 3588 - }, - { - "epoch": 1.8717079530638854, - "grad_norm": 1.7659916877746582, - "learning_rate": 9.68964824120603e-05, - "loss": 6.1995, - "step": 3589 - }, - { - "epoch": 1.8722294654498044, - "grad_norm": 1.4868820905685425, - "learning_rate": 9.689547738693467e-05, - "loss": 6.3916, - "step": 3590 - }, - { - "epoch": 1.8727509778357236, - "grad_norm": 1.5156104564666748, - "learning_rate": 9.689447236180905e-05, - "loss": 5.9658, - "step": 3591 - }, - { - "epoch": 1.8732724902216429, - "grad_norm": 1.9636346101760864, - "learning_rate": 9.689346733668341e-05, - "loss": 5.6547, - "step": 3592 - }, - { - "epoch": 1.873794002607562, - "grad_norm": 1.7381705045700073, - "learning_rate": 9.689246231155779e-05, - "loss": 6.1177, - "step": 3593 - }, - { - "epoch": 1.8743155149934811, - "grad_norm": 1.7234363555908203, - "learning_rate": 9.689145728643217e-05, - "loss": 5.7103, - "step": 3594 - }, - { - "epoch": 1.8748370273794004, - "grad_norm": 1.4314123392105103, - "learning_rate": 9.689045226130655e-05, - "loss": 6.0758, - "step": 3595 - }, - { - "epoch": 1.8753585397653194, - "grad_norm": 1.7011404037475586, - "learning_rate": 9.688944723618091e-05, - "loss": 6.1152, - "step": 3596 - }, - { - "epoch": 1.8758800521512384, - "grad_norm": 1.7060736417770386, - "learning_rate": 9.688844221105529e-05, - "loss": 6.0837, - "step": 3597 - }, - { - "epoch": 1.876401564537158, - "grad_norm": 1.57675302028656, - "learning_rate": 9.688743718592965e-05, - "loss": 6.4183, - "step": 3598 - }, - { - "epoch": 1.876923076923077, - "grad_norm": 1.592848539352417, - "learning_rate": 9.688643216080402e-05, - "loss": 5.8429, - "step": 3599 - }, - { - "epoch": 1.877444589308996, - "grad_norm": 2.520181894302368, - "learning_rate": 9.68854271356784e-05, - "loss": 5.3106, - "step": 3600 - }, - { - "epoch": 1.8779661016949154, - "grad_norm": 1.861150860786438, - "learning_rate": 9.688442211055276e-05, - "loss": 5.5475, - "step": 3601 - }, - { - "epoch": 1.8784876140808344, - "grad_norm": 1.7574419975280762, - "learning_rate": 9.688341708542714e-05, - "loss": 5.9741, - "step": 3602 - }, - { - "epoch": 1.8790091264667534, - "grad_norm": 1.8051307201385498, - "learning_rate": 9.688241206030151e-05, - "loss": 5.9724, - "step": 3603 - }, - { - "epoch": 1.879530638852673, - "grad_norm": 1.9332389831542969, - "learning_rate": 9.688140703517589e-05, - "loss": 5.9075, - "step": 3604 - }, - { - "epoch": 1.880052151238592, - "grad_norm": 1.6104470491409302, - "learning_rate": 9.688040201005026e-05, - "loss": 5.9914, - "step": 3605 - }, - { - "epoch": 1.880573663624511, - "grad_norm": 1.6659700870513916, - "learning_rate": 9.687939698492463e-05, - "loss": 6.073, - "step": 3606 - }, - { - "epoch": 1.8810951760104304, - "grad_norm": 1.621062994003296, - "learning_rate": 9.6878391959799e-05, - "loss": 5.5826, - "step": 3607 - }, - { - "epoch": 1.8816166883963494, - "grad_norm": 1.5182889699935913, - "learning_rate": 9.687738693467338e-05, - "loss": 6.1019, - "step": 3608 - }, - { - "epoch": 1.8821382007822685, - "grad_norm": 1.5832126140594482, - "learning_rate": 9.687638190954774e-05, - "loss": 6.1679, - "step": 3609 - }, - { - "epoch": 1.8826597131681877, - "grad_norm": 1.609300136566162, - "learning_rate": 9.687537688442212e-05, - "loss": 6.16, - "step": 3610 - }, - { - "epoch": 1.883181225554107, - "grad_norm": 1.4409281015396118, - "learning_rate": 9.687437185929648e-05, - "loss": 6.409, - "step": 3611 - }, - { - "epoch": 1.883702737940026, - "grad_norm": 1.9251627922058105, - "learning_rate": 9.687336683417086e-05, - "loss": 5.3217, - "step": 3612 - }, - { - "epoch": 1.8842242503259452, - "grad_norm": 1.660509467124939, - "learning_rate": 9.687236180904522e-05, - "loss": 5.8955, - "step": 3613 - }, - { - "epoch": 1.8847457627118644, - "grad_norm": 1.8304067850112915, - "learning_rate": 9.68713567839196e-05, - "loss": 5.4229, - "step": 3614 - }, - { - "epoch": 1.8852672750977835, - "grad_norm": 1.6469128131866455, - "learning_rate": 9.687035175879398e-05, - "loss": 6.1424, - "step": 3615 - }, - { - "epoch": 1.8857887874837027, - "grad_norm": 1.35207998752594, - "learning_rate": 9.686934673366834e-05, - "loss": 5.8327, - "step": 3616 - }, - { - "epoch": 1.886310299869622, - "grad_norm": 1.7950124740600586, - "learning_rate": 9.686834170854272e-05, - "loss": 5.6104, - "step": 3617 - }, - { - "epoch": 1.886831812255541, - "grad_norm": 1.408860445022583, - "learning_rate": 9.686733668341709e-05, - "loss": 6.1429, - "step": 3618 - }, - { - "epoch": 1.8873533246414602, - "grad_norm": 1.6640617847442627, - "learning_rate": 9.686633165829146e-05, - "loss": 5.9708, - "step": 3619 - }, - { - "epoch": 1.8878748370273795, - "grad_norm": 1.5460660457611084, - "learning_rate": 9.686532663316583e-05, - "loss": 6.082, - "step": 3620 - }, - { - "epoch": 1.8883963494132985, - "grad_norm": 1.579760193824768, - "learning_rate": 9.686432160804021e-05, - "loss": 6.1989, - "step": 3621 - }, - { - "epoch": 1.8889178617992177, - "grad_norm": 1.460542917251587, - "learning_rate": 9.686331658291457e-05, - "loss": 6.0314, - "step": 3622 - }, - { - "epoch": 1.889439374185137, - "grad_norm": 1.4485174417495728, - "learning_rate": 9.686231155778895e-05, - "loss": 6.1999, - "step": 3623 - }, - { - "epoch": 1.889960886571056, - "grad_norm": 1.6901365518569946, - "learning_rate": 9.686130653266333e-05, - "loss": 5.3317, - "step": 3624 - }, - { - "epoch": 1.8904823989569752, - "grad_norm": 1.723552942276001, - "learning_rate": 9.68603015075377e-05, - "loss": 5.6563, - "step": 3625 - }, - { - "epoch": 1.8910039113428945, - "grad_norm": 2.3357629776000977, - "learning_rate": 9.685929648241207e-05, - "loss": 5.672, - "step": 3626 - }, - { - "epoch": 1.8915254237288135, - "grad_norm": 1.4890269041061401, - "learning_rate": 9.685829145728643e-05, - "loss": 6.1165, - "step": 3627 - }, - { - "epoch": 1.8920469361147327, - "grad_norm": 2.029008388519287, - "learning_rate": 9.685728643216081e-05, - "loss": 5.5383, - "step": 3628 - }, - { - "epoch": 1.892568448500652, - "grad_norm": 1.7153542041778564, - "learning_rate": 9.685628140703518e-05, - "loss": 5.7703, - "step": 3629 - }, - { - "epoch": 1.893089960886571, - "grad_norm": 1.4662227630615234, - "learning_rate": 9.685527638190955e-05, - "loss": 6.0931, - "step": 3630 - }, - { - "epoch": 1.8936114732724902, - "grad_norm": 1.8050633668899536, - "learning_rate": 9.685427135678392e-05, - "loss": 5.6545, - "step": 3631 - }, - { - "epoch": 1.8941329856584095, - "grad_norm": 1.5558801889419556, - "learning_rate": 9.68532663316583e-05, - "loss": 5.9254, - "step": 3632 - }, - { - "epoch": 1.8946544980443285, - "grad_norm": 1.4596816301345825, - "learning_rate": 9.685226130653266e-05, - "loss": 6.1951, - "step": 3633 - }, - { - "epoch": 1.8951760104302477, - "grad_norm": 1.4725292921066284, - "learning_rate": 9.685125628140704e-05, - "loss": 6.156, - "step": 3634 - }, - { - "epoch": 1.895697522816167, - "grad_norm": 1.5823264122009277, - "learning_rate": 9.685025125628142e-05, - "loss": 6.1368, - "step": 3635 - }, - { - "epoch": 1.896219035202086, - "grad_norm": 1.4274561405181885, - "learning_rate": 9.684924623115579e-05, - "loss": 6.1916, - "step": 3636 - }, - { - "epoch": 1.8967405475880053, - "grad_norm": 1.5348559617996216, - "learning_rate": 9.684824120603016e-05, - "loss": 6.1261, - "step": 3637 - }, - { - "epoch": 1.8972620599739245, - "grad_norm": 1.5813374519348145, - "learning_rate": 9.684723618090454e-05, - "loss": 6.1783, - "step": 3638 - }, - { - "epoch": 1.8977835723598435, - "grad_norm": 1.9786345958709717, - "learning_rate": 9.68462311557789e-05, - "loss": 5.6011, - "step": 3639 - }, - { - "epoch": 1.8983050847457628, - "grad_norm": 1.7165980339050293, - "learning_rate": 9.684522613065326e-05, - "loss": 5.8182, - "step": 3640 - }, - { - "epoch": 1.898826597131682, - "grad_norm": 1.8394607305526733, - "learning_rate": 9.684422110552764e-05, - "loss": 5.8259, - "step": 3641 - }, - { - "epoch": 1.899348109517601, - "grad_norm": 1.7221581935882568, - "learning_rate": 9.6843216080402e-05, - "loss": 5.7388, - "step": 3642 - }, - { - "epoch": 1.89986962190352, - "grad_norm": 1.5938208103179932, - "learning_rate": 9.684221105527638e-05, - "loss": 5.613, - "step": 3643 - }, - { - "epoch": 1.9003911342894395, - "grad_norm": 1.4545577764511108, - "learning_rate": 9.684120603015076e-05, - "loss": 5.9846, - "step": 3644 - }, - { - "epoch": 1.9009126466753585, - "grad_norm": 1.4908982515335083, - "learning_rate": 9.684020100502514e-05, - "loss": 5.8721, - "step": 3645 - }, - { - "epoch": 1.9014341590612776, - "grad_norm": 1.6030458211898804, - "learning_rate": 9.68391959798995e-05, - "loss": 6.164, - "step": 3646 - }, - { - "epoch": 1.901955671447197, - "grad_norm": 1.5814929008483887, - "learning_rate": 9.683819095477388e-05, - "loss": 5.4399, - "step": 3647 - }, - { - "epoch": 1.902477183833116, - "grad_norm": 1.5326217412948608, - "learning_rate": 9.683718592964825e-05, - "loss": 6.2265, - "step": 3648 - }, - { - "epoch": 1.902998696219035, - "grad_norm": 1.4068875312805176, - "learning_rate": 9.683618090452262e-05, - "loss": 6.3311, - "step": 3649 - }, - { - "epoch": 1.9035202086049545, - "grad_norm": 1.43031644821167, - "learning_rate": 9.683517587939699e-05, - "loss": 6.3475, - "step": 3650 - }, - { - "epoch": 1.9040417209908735, - "grad_norm": 1.4738000631332397, - "learning_rate": 9.683417085427137e-05, - "loss": 5.8946, - "step": 3651 - }, - { - "epoch": 1.9045632333767926, - "grad_norm": 1.7692428827285767, - "learning_rate": 9.683316582914573e-05, - "loss": 5.363, - "step": 3652 - }, - { - "epoch": 1.905084745762712, - "grad_norm": 1.4844504594802856, - "learning_rate": 9.68321608040201e-05, - "loss": 5.8269, - "step": 3653 - }, - { - "epoch": 1.905606258148631, - "grad_norm": 1.7501931190490723, - "learning_rate": 9.683115577889447e-05, - "loss": 6.0528, - "step": 3654 - }, - { - "epoch": 1.90612777053455, - "grad_norm": 1.7964050769805908, - "learning_rate": 9.683015075376885e-05, - "loss": 5.849, - "step": 3655 - }, - { - "epoch": 1.9066492829204693, - "grad_norm": 1.3265413045883179, - "learning_rate": 9.682914572864323e-05, - "loss": 6.1906, - "step": 3656 - }, - { - "epoch": 1.9071707953063886, - "grad_norm": 1.7927899360656738, - "learning_rate": 9.682814070351759e-05, - "loss": 5.7423, - "step": 3657 - }, - { - "epoch": 1.9076923076923076, - "grad_norm": 1.301763653755188, - "learning_rate": 9.682713567839197e-05, - "loss": 6.3257, - "step": 3658 - }, - { - "epoch": 1.9082138200782268, - "grad_norm": 1.6177808046340942, - "learning_rate": 9.682613065326633e-05, - "loss": 5.9454, - "step": 3659 - }, - { - "epoch": 1.908735332464146, - "grad_norm": 2.0057876110076904, - "learning_rate": 9.682512562814071e-05, - "loss": 5.9168, - "step": 3660 - }, - { - "epoch": 1.909256844850065, - "grad_norm": 1.649571180343628, - "learning_rate": 9.682412060301508e-05, - "loss": 5.9585, - "step": 3661 - }, - { - "epoch": 1.9097783572359843, - "grad_norm": 1.9685922861099243, - "learning_rate": 9.682311557788945e-05, - "loss": 5.5481, - "step": 3662 - }, - { - "epoch": 1.9102998696219036, - "grad_norm": 1.5866222381591797, - "learning_rate": 9.682211055276382e-05, - "loss": 5.9588, - "step": 3663 - }, - { - "epoch": 1.9108213820078226, - "grad_norm": 1.67631196975708, - "learning_rate": 9.68211055276382e-05, - "loss": 6.1244, - "step": 3664 - }, - { - "epoch": 1.9113428943937418, - "grad_norm": 1.4832837581634521, - "learning_rate": 9.682010050251257e-05, - "loss": 5.9704, - "step": 3665 - }, - { - "epoch": 1.911864406779661, - "grad_norm": 1.8317663669586182, - "learning_rate": 9.681909547738694e-05, - "loss": 5.3972, - "step": 3666 - }, - { - "epoch": 1.91238591916558, - "grad_norm": 1.7771217823028564, - "learning_rate": 9.681809045226132e-05, - "loss": 6.0876, - "step": 3667 - }, - { - "epoch": 1.9129074315514993, - "grad_norm": 1.3874270915985107, - "learning_rate": 9.681708542713568e-05, - "loss": 5.7121, - "step": 3668 - }, - { - "epoch": 1.9134289439374186, - "grad_norm": 1.506945252418518, - "learning_rate": 9.681608040201006e-05, - "loss": 6.0447, - "step": 3669 - }, - { - "epoch": 1.9139504563233376, - "grad_norm": 1.4367058277130127, - "learning_rate": 9.681507537688442e-05, - "loss": 5.656, - "step": 3670 - }, - { - "epoch": 1.9144719687092568, - "grad_norm": 1.5422313213348389, - "learning_rate": 9.68140703517588e-05, - "loss": 5.9431, - "step": 3671 - }, - { - "epoch": 1.914993481095176, - "grad_norm": 1.6148920059204102, - "learning_rate": 9.681306532663316e-05, - "loss": 5.8026, - "step": 3672 - }, - { - "epoch": 1.915514993481095, - "grad_norm": 1.6064120531082153, - "learning_rate": 9.681206030150754e-05, - "loss": 5.8293, - "step": 3673 - }, - { - "epoch": 1.9160365058670144, - "grad_norm": 1.6061739921569824, - "learning_rate": 9.68110552763819e-05, - "loss": 5.6975, - "step": 3674 - }, - { - "epoch": 1.9165580182529336, - "grad_norm": 1.921130657196045, - "learning_rate": 9.681005025125628e-05, - "loss": 5.4473, - "step": 3675 - }, - { - "epoch": 1.9170795306388526, - "grad_norm": 1.4987750053405762, - "learning_rate": 9.680904522613066e-05, - "loss": 6.0276, - "step": 3676 - }, - { - "epoch": 1.9176010430247719, - "grad_norm": 1.5462708473205566, - "learning_rate": 9.680804020100504e-05, - "loss": 6.0424, - "step": 3677 - }, - { - "epoch": 1.918122555410691, - "grad_norm": 1.6817291975021362, - "learning_rate": 9.68070351758794e-05, - "loss": 5.3902, - "step": 3678 - }, - { - "epoch": 1.9186440677966101, - "grad_norm": 1.459525227546692, - "learning_rate": 9.680603015075377e-05, - "loss": 5.698, - "step": 3679 - }, - { - "epoch": 1.9191655801825294, - "grad_norm": 1.5248008966445923, - "learning_rate": 9.680502512562815e-05, - "loss": 5.7205, - "step": 3680 - }, - { - "epoch": 1.9196870925684486, - "grad_norm": 1.4505418539047241, - "learning_rate": 9.680402010050251e-05, - "loss": 5.5123, - "step": 3681 - }, - { - "epoch": 1.9202086049543676, - "grad_norm": 1.4754718542099, - "learning_rate": 9.680301507537689e-05, - "loss": 6.1092, - "step": 3682 - }, - { - "epoch": 1.9207301173402869, - "grad_norm": 1.613889455795288, - "learning_rate": 9.680201005025125e-05, - "loss": 6.1246, - "step": 3683 - }, - { - "epoch": 1.9212516297262061, - "grad_norm": 1.6242589950561523, - "learning_rate": 9.680100502512563e-05, - "loss": 5.9992, - "step": 3684 - }, - { - "epoch": 1.9217731421121251, - "grad_norm": 1.5692026615142822, - "learning_rate": 9.680000000000001e-05, - "loss": 5.8122, - "step": 3685 - }, - { - "epoch": 1.9222946544980444, - "grad_norm": 1.7350645065307617, - "learning_rate": 9.679899497487439e-05, - "loss": 5.7234, - "step": 3686 - }, - { - "epoch": 1.9228161668839636, - "grad_norm": 1.5124965906143188, - "learning_rate": 9.679798994974875e-05, - "loss": 5.7415, - "step": 3687 - }, - { - "epoch": 1.9233376792698826, - "grad_norm": 1.4327722787857056, - "learning_rate": 9.679698492462313e-05, - "loss": 6.4429, - "step": 3688 - }, - { - "epoch": 1.9238591916558017, - "grad_norm": 1.4378658533096313, - "learning_rate": 9.679597989949749e-05, - "loss": 6.2302, - "step": 3689 - }, - { - "epoch": 1.9243807040417211, - "grad_norm": 1.5136994123458862, - "learning_rate": 9.679497487437187e-05, - "loss": 5.8949, - "step": 3690 - }, - { - "epoch": 1.9249022164276401, - "grad_norm": 1.6525558233261108, - "learning_rate": 9.679396984924623e-05, - "loss": 5.7527, - "step": 3691 - }, - { - "epoch": 1.9254237288135592, - "grad_norm": 1.4742815494537354, - "learning_rate": 9.67929648241206e-05, - "loss": 5.6675, - "step": 3692 - }, - { - "epoch": 1.9259452411994786, - "grad_norm": 1.4292960166931152, - "learning_rate": 9.679195979899498e-05, - "loss": 6.0268, - "step": 3693 - }, - { - "epoch": 1.9264667535853977, - "grad_norm": 1.3192503452301025, - "learning_rate": 9.679095477386934e-05, - "loss": 6.151, - "step": 3694 - }, - { - "epoch": 1.9269882659713167, - "grad_norm": 1.5001747608184814, - "learning_rate": 9.678994974874372e-05, - "loss": 6.0082, - "step": 3695 - }, - { - "epoch": 1.9275097783572361, - "grad_norm": 1.9304540157318115, - "learning_rate": 9.67889447236181e-05, - "loss": 5.8408, - "step": 3696 - }, - { - "epoch": 1.9280312907431552, - "grad_norm": 1.487881064414978, - "learning_rate": 9.678793969849247e-05, - "loss": 6.2902, - "step": 3697 - }, - { - "epoch": 1.9285528031290742, - "grad_norm": 1.7343789339065552, - "learning_rate": 9.678693467336684e-05, - "loss": 6.0957, - "step": 3698 - }, - { - "epoch": 1.9290743155149936, - "grad_norm": 1.4530911445617676, - "learning_rate": 9.678592964824122e-05, - "loss": 6.4411, - "step": 3699 - }, - { - "epoch": 1.9295958279009127, - "grad_norm": 1.397850751876831, - "learning_rate": 9.678492462311558e-05, - "loss": 5.3955, - "step": 3700 - }, - { - "epoch": 1.9301173402868317, - "grad_norm": 1.5376310348510742, - "learning_rate": 9.678391959798996e-05, - "loss": 6.1986, - "step": 3701 - }, - { - "epoch": 1.930638852672751, - "grad_norm": 1.6636452674865723, - "learning_rate": 9.678291457286432e-05, - "loss": 5.9068, - "step": 3702 - }, - { - "epoch": 1.9311603650586702, - "grad_norm": 1.582450270652771, - "learning_rate": 9.67819095477387e-05, - "loss": 5.9694, - "step": 3703 - }, - { - "epoch": 1.9316818774445892, - "grad_norm": 1.6411999464035034, - "learning_rate": 9.678090452261307e-05, - "loss": 6.144, - "step": 3704 - }, - { - "epoch": 1.9322033898305084, - "grad_norm": 1.5312954187393188, - "learning_rate": 9.677989949748744e-05, - "loss": 6.1232, - "step": 3705 - }, - { - "epoch": 1.9327249022164277, - "grad_norm": 1.5187140703201294, - "learning_rate": 9.677889447236182e-05, - "loss": 6.0082, - "step": 3706 - }, - { - "epoch": 1.9332464146023467, - "grad_norm": 1.7073029279708862, - "learning_rate": 9.677788944723619e-05, - "loss": 6.1666, - "step": 3707 - }, - { - "epoch": 1.933767926988266, - "grad_norm": 1.5173527002334595, - "learning_rate": 9.677688442211056e-05, - "loss": 6.3999, - "step": 3708 - }, - { - "epoch": 1.9342894393741852, - "grad_norm": 1.805742859840393, - "learning_rate": 9.677587939698493e-05, - "loss": 5.6408, - "step": 3709 - }, - { - "epoch": 1.9348109517601042, - "grad_norm": 1.513910174369812, - "learning_rate": 9.67748743718593e-05, - "loss": 6.1759, - "step": 3710 - }, - { - "epoch": 1.9353324641460234, - "grad_norm": 1.7959589958190918, - "learning_rate": 9.677386934673367e-05, - "loss": 5.3493, - "step": 3711 - }, - { - "epoch": 1.9358539765319427, - "grad_norm": 1.4577901363372803, - "learning_rate": 9.677286432160805e-05, - "loss": 6.3691, - "step": 3712 - }, - { - "epoch": 1.9363754889178617, - "grad_norm": 1.5365177392959595, - "learning_rate": 9.677185929648241e-05, - "loss": 5.9217, - "step": 3713 - }, - { - "epoch": 1.936897001303781, - "grad_norm": 1.5541095733642578, - "learning_rate": 9.677085427135679e-05, - "loss": 5.951, - "step": 3714 - }, - { - "epoch": 1.9374185136897002, - "grad_norm": 1.780725121498108, - "learning_rate": 9.676984924623115e-05, - "loss": 5.321, - "step": 3715 - }, - { - "epoch": 1.9379400260756192, - "grad_norm": 1.5229734182357788, - "learning_rate": 9.676884422110553e-05, - "loss": 5.98, - "step": 3716 - }, - { - "epoch": 1.9384615384615385, - "grad_norm": 1.438604712486267, - "learning_rate": 9.676783919597991e-05, - "loss": 6.3953, - "step": 3717 - }, - { - "epoch": 1.9389830508474577, - "grad_norm": 1.3150635957717896, - "learning_rate": 9.676683417085429e-05, - "loss": 6.2537, - "step": 3718 - }, - { - "epoch": 1.9395045632333767, - "grad_norm": 1.6807818412780762, - "learning_rate": 9.676582914572865e-05, - "loss": 5.6899, - "step": 3719 - }, - { - "epoch": 1.940026075619296, - "grad_norm": 1.4965217113494873, - "learning_rate": 9.676482412060302e-05, - "loss": 6.1998, - "step": 3720 - }, - { - "epoch": 1.9405475880052152, - "grad_norm": 1.3748282194137573, - "learning_rate": 9.67638190954774e-05, - "loss": 6.2531, - "step": 3721 - }, - { - "epoch": 1.9410691003911342, - "grad_norm": 1.4825294017791748, - "learning_rate": 9.676281407035176e-05, - "loss": 6.174, - "step": 3722 - }, - { - "epoch": 1.9415906127770535, - "grad_norm": 1.6296523809432983, - "learning_rate": 9.676180904522614e-05, - "loss": 5.9069, - "step": 3723 - }, - { - "epoch": 1.9421121251629727, - "grad_norm": 1.5608360767364502, - "learning_rate": 9.67608040201005e-05, - "loss": 5.8187, - "step": 3724 - }, - { - "epoch": 1.9426336375488917, - "grad_norm": 1.426539659500122, - "learning_rate": 9.675979899497488e-05, - "loss": 6.1302, - "step": 3725 - }, - { - "epoch": 1.943155149934811, - "grad_norm": 1.6397687196731567, - "learning_rate": 9.675879396984924e-05, - "loss": 5.624, - "step": 3726 - }, - { - "epoch": 1.9436766623207302, - "grad_norm": 1.4884954690933228, - "learning_rate": 9.675778894472362e-05, - "loss": 6.1793, - "step": 3727 - }, - { - "epoch": 1.9441981747066492, - "grad_norm": 1.4461029767990112, - "learning_rate": 9.6756783919598e-05, - "loss": 6.4219, - "step": 3728 - }, - { - "epoch": 1.9447196870925685, - "grad_norm": 1.827540397644043, - "learning_rate": 9.675577889447238e-05, - "loss": 5.2261, - "step": 3729 - }, - { - "epoch": 1.9452411994784877, - "grad_norm": 1.523616075515747, - "learning_rate": 9.675477386934674e-05, - "loss": 5.3159, - "step": 3730 - }, - { - "epoch": 1.9457627118644067, - "grad_norm": 1.957961916923523, - "learning_rate": 9.675376884422112e-05, - "loss": 5.4794, - "step": 3731 - }, - { - "epoch": 1.946284224250326, - "grad_norm": 1.5601928234100342, - "learning_rate": 9.675276381909548e-05, - "loss": 6.1306, - "step": 3732 - }, - { - "epoch": 1.9468057366362452, - "grad_norm": 1.561591386795044, - "learning_rate": 9.675175879396985e-05, - "loss": 5.7421, - "step": 3733 - }, - { - "epoch": 1.9473272490221643, - "grad_norm": 1.6185028553009033, - "learning_rate": 9.675075376884422e-05, - "loss": 5.677, - "step": 3734 - }, - { - "epoch": 1.9478487614080835, - "grad_norm": 1.4494253396987915, - "learning_rate": 9.674974874371859e-05, - "loss": 6.225, - "step": 3735 - }, - { - "epoch": 1.9483702737940027, - "grad_norm": 1.5567940473556519, - "learning_rate": 9.674874371859297e-05, - "loss": 6.0748, - "step": 3736 - }, - { - "epoch": 1.9488917861799218, - "grad_norm": 1.3867876529693604, - "learning_rate": 9.674773869346734e-05, - "loss": 6.2342, - "step": 3737 - }, - { - "epoch": 1.9494132985658408, - "grad_norm": 1.466679334640503, - "learning_rate": 9.674673366834172e-05, - "loss": 6.1619, - "step": 3738 - }, - { - "epoch": 1.9499348109517602, - "grad_norm": 1.555573582649231, - "learning_rate": 9.674572864321609e-05, - "loss": 5.7609, - "step": 3739 - }, - { - "epoch": 1.9504563233376793, - "grad_norm": 1.586954116821289, - "learning_rate": 9.674472361809046e-05, - "loss": 5.9281, - "step": 3740 - }, - { - "epoch": 1.9509778357235983, - "grad_norm": 1.5040411949157715, - "learning_rate": 9.674371859296483e-05, - "loss": 5.9583, - "step": 3741 - }, - { - "epoch": 1.9514993481095178, - "grad_norm": 1.2743133306503296, - "learning_rate": 9.67427135678392e-05, - "loss": 6.1137, - "step": 3742 - }, - { - "epoch": 1.9520208604954368, - "grad_norm": 1.4357311725616455, - "learning_rate": 9.674170854271357e-05, - "loss": 6.1235, - "step": 3743 - }, - { - "epoch": 1.9525423728813558, - "grad_norm": 1.805456519126892, - "learning_rate": 9.674070351758795e-05, - "loss": 5.7003, - "step": 3744 - }, - { - "epoch": 1.9530638852672753, - "grad_norm": 1.77345609664917, - "learning_rate": 9.673969849246231e-05, - "loss": 6.123, - "step": 3745 - }, - { - "epoch": 1.9535853976531943, - "grad_norm": 1.6671754121780396, - "learning_rate": 9.673869346733668e-05, - "loss": 5.6419, - "step": 3746 - }, - { - "epoch": 1.9541069100391133, - "grad_norm": 1.8957421779632568, - "learning_rate": 9.673768844221105e-05, - "loss": 5.7793, - "step": 3747 - }, - { - "epoch": 1.9546284224250325, - "grad_norm": 1.832861065864563, - "learning_rate": 9.673668341708543e-05, - "loss": 5.7934, - "step": 3748 - }, - { - "epoch": 1.9551499348109518, - "grad_norm": 1.5750383138656616, - "learning_rate": 9.673567839195981e-05, - "loss": 5.48, - "step": 3749 - }, - { - "epoch": 1.9556714471968708, - "grad_norm": 1.452635645866394, - "learning_rate": 9.673467336683417e-05, - "loss": 5.8993, - "step": 3750 - }, - { - "epoch": 1.95619295958279, - "grad_norm": 1.7086045742034912, - "learning_rate": 9.673366834170855e-05, - "loss": 5.4557, - "step": 3751 - }, - { - "epoch": 1.9567144719687093, - "grad_norm": 1.454647183418274, - "learning_rate": 9.673266331658292e-05, - "loss": 6.1394, - "step": 3752 - }, - { - "epoch": 1.9572359843546283, - "grad_norm": 1.5458574295043945, - "learning_rate": 9.67316582914573e-05, - "loss": 6.1411, - "step": 3753 - }, - { - "epoch": 1.9577574967405476, - "grad_norm": 1.579566478729248, - "learning_rate": 9.673065326633166e-05, - "loss": 5.6519, - "step": 3754 - }, - { - "epoch": 1.9582790091264668, - "grad_norm": 1.4239953756332397, - "learning_rate": 9.672964824120604e-05, - "loss": 6.176, - "step": 3755 - }, - { - "epoch": 1.9588005215123858, - "grad_norm": 1.6183122396469116, - "learning_rate": 9.67286432160804e-05, - "loss": 5.2611, - "step": 3756 - }, - { - "epoch": 1.959322033898305, - "grad_norm": 1.4526921510696411, - "learning_rate": 9.672763819095478e-05, - "loss": 6.4183, - "step": 3757 - }, - { - "epoch": 1.9598435462842243, - "grad_norm": 1.457470178604126, - "learning_rate": 9.672663316582916e-05, - "loss": 6.1146, - "step": 3758 - }, - { - "epoch": 1.9603650586701433, - "grad_norm": 1.4826791286468506, - "learning_rate": 9.672562814070352e-05, - "loss": 6.2853, - "step": 3759 - }, - { - "epoch": 1.9608865710560626, - "grad_norm": 1.5994478464126587, - "learning_rate": 9.67246231155779e-05, - "loss": 5.5664, - "step": 3760 - }, - { - "epoch": 1.9614080834419818, - "grad_norm": 1.586102843284607, - "learning_rate": 9.672361809045226e-05, - "loss": 5.9162, - "step": 3761 - }, - { - "epoch": 1.9619295958279008, - "grad_norm": 1.4627211093902588, - "learning_rate": 9.672261306532664e-05, - "loss": 5.5841, - "step": 3762 - }, - { - "epoch": 1.96245110821382, - "grad_norm": 1.7927237749099731, - "learning_rate": 9.6721608040201e-05, - "loss": 5.6951, - "step": 3763 - }, - { - "epoch": 1.9629726205997393, - "grad_norm": 1.4786993265151978, - "learning_rate": 9.672060301507538e-05, - "loss": 6.142, - "step": 3764 - }, - { - "epoch": 1.9634941329856583, - "grad_norm": 1.4124550819396973, - "learning_rate": 9.671959798994975e-05, - "loss": 6.111, - "step": 3765 - }, - { - "epoch": 1.9640156453715776, - "grad_norm": 1.4885871410369873, - "learning_rate": 9.671859296482412e-05, - "loss": 6.1683, - "step": 3766 - }, - { - "epoch": 1.9645371577574968, - "grad_norm": 1.4775676727294922, - "learning_rate": 9.671758793969849e-05, - "loss": 5.9439, - "step": 3767 - }, - { - "epoch": 1.9650586701434158, - "grad_norm": 1.5198376178741455, - "learning_rate": 9.671658291457287e-05, - "loss": 6.0779, - "step": 3768 - }, - { - "epoch": 1.965580182529335, - "grad_norm": 1.6087232828140259, - "learning_rate": 9.671557788944724e-05, - "loss": 6.021, - "step": 3769 - }, - { - "epoch": 1.9661016949152543, - "grad_norm": 1.5046133995056152, - "learning_rate": 9.671457286432162e-05, - "loss": 6.2484, - "step": 3770 - }, - { - "epoch": 1.9666232073011733, - "grad_norm": 1.5241562128067017, - "learning_rate": 9.671356783919599e-05, - "loss": 6.2958, - "step": 3771 - }, - { - "epoch": 1.9671447196870926, - "grad_norm": 1.6915507316589355, - "learning_rate": 9.671256281407035e-05, - "loss": 5.7114, - "step": 3772 - }, - { - "epoch": 1.9676662320730118, - "grad_norm": 1.9184249639511108, - "learning_rate": 9.671155778894473e-05, - "loss": 6.0257, - "step": 3773 - }, - { - "epoch": 1.9681877444589309, - "grad_norm": 1.6501777172088623, - "learning_rate": 9.671055276381909e-05, - "loss": 5.6487, - "step": 3774 - }, - { - "epoch": 1.96870925684485, - "grad_norm": 1.6688206195831299, - "learning_rate": 9.670954773869347e-05, - "loss": 6.1452, - "step": 3775 - }, - { - "epoch": 1.9692307692307693, - "grad_norm": 1.5813205242156982, - "learning_rate": 9.670854271356784e-05, - "loss": 6.1194, - "step": 3776 - }, - { - "epoch": 1.9697522816166884, - "grad_norm": 1.46859610080719, - "learning_rate": 9.670753768844221e-05, - "loss": 6.1389, - "step": 3777 - }, - { - "epoch": 1.9702737940026076, - "grad_norm": 1.5424631834030151, - "learning_rate": 9.670653266331659e-05, - "loss": 6.0538, - "step": 3778 - }, - { - "epoch": 1.9707953063885268, - "grad_norm": 1.7152600288391113, - "learning_rate": 9.670552763819097e-05, - "loss": 5.8784, - "step": 3779 - }, - { - "epoch": 1.9713168187744459, - "grad_norm": 1.6198248863220215, - "learning_rate": 9.670452261306533e-05, - "loss": 6.0991, - "step": 3780 - }, - { - "epoch": 1.971838331160365, - "grad_norm": 1.5231430530548096, - "learning_rate": 9.670351758793971e-05, - "loss": 6.2892, - "step": 3781 - }, - { - "epoch": 1.9723598435462844, - "grad_norm": 1.3140794038772583, - "learning_rate": 9.670251256281407e-05, - "loss": 6.2536, - "step": 3782 - }, - { - "epoch": 1.9728813559322034, - "grad_norm": 1.7115626335144043, - "learning_rate": 9.670150753768845e-05, - "loss": 5.8252, - "step": 3783 - }, - { - "epoch": 1.9734028683181224, - "grad_norm": 1.4999061822891235, - "learning_rate": 9.670050251256282e-05, - "loss": 6.0298, - "step": 3784 - }, - { - "epoch": 1.9739243807040419, - "grad_norm": 1.4897143840789795, - "learning_rate": 9.669949748743718e-05, - "loss": 5.8075, - "step": 3785 - }, - { - "epoch": 1.9744458930899609, - "grad_norm": 1.5358606576919556, - "learning_rate": 9.669849246231156e-05, - "loss": 5.7169, - "step": 3786 - }, - { - "epoch": 1.97496740547588, - "grad_norm": 1.6340683698654175, - "learning_rate": 9.669748743718592e-05, - "loss": 5.4938, - "step": 3787 - }, - { - "epoch": 1.9754889178617994, - "grad_norm": 1.6434447765350342, - "learning_rate": 9.66964824120603e-05, - "loss": 5.9882, - "step": 3788 - }, - { - "epoch": 1.9760104302477184, - "grad_norm": 1.490289330482483, - "learning_rate": 9.669547738693468e-05, - "loss": 5.9047, - "step": 3789 - }, - { - "epoch": 1.9765319426336374, - "grad_norm": 1.490796446800232, - "learning_rate": 9.669447236180906e-05, - "loss": 5.6832, - "step": 3790 - }, - { - "epoch": 1.9770534550195569, - "grad_norm": 1.7789455652236938, - "learning_rate": 9.669346733668342e-05, - "loss": 5.7787, - "step": 3791 - }, - { - "epoch": 1.977574967405476, - "grad_norm": 1.4790477752685547, - "learning_rate": 9.66924623115578e-05, - "loss": 5.9383, - "step": 3792 - }, - { - "epoch": 1.978096479791395, - "grad_norm": 1.644132375717163, - "learning_rate": 9.669145728643216e-05, - "loss": 6.1026, - "step": 3793 - }, - { - "epoch": 1.9786179921773144, - "grad_norm": 1.5769768953323364, - "learning_rate": 9.669045226130654e-05, - "loss": 5.7081, - "step": 3794 - }, - { - "epoch": 1.9791395045632334, - "grad_norm": 1.4699238538742065, - "learning_rate": 9.66894472361809e-05, - "loss": 5.7658, - "step": 3795 - }, - { - "epoch": 1.9796610169491524, - "grad_norm": 1.4182850122451782, - "learning_rate": 9.668844221105528e-05, - "loss": 5.9603, - "step": 3796 - }, - { - "epoch": 1.9801825293350717, - "grad_norm": 1.4898478984832764, - "learning_rate": 9.668743718592965e-05, - "loss": 6.0428, - "step": 3797 - }, - { - "epoch": 1.980704041720991, - "grad_norm": 1.700779914855957, - "learning_rate": 9.668643216080403e-05, - "loss": 5.9131, - "step": 3798 - }, - { - "epoch": 1.98122555410691, - "grad_norm": 1.4077318906784058, - "learning_rate": 9.66854271356784e-05, - "loss": 5.9479, - "step": 3799 - }, - { - "epoch": 1.9817470664928292, - "grad_norm": 1.3818029165267944, - "learning_rate": 9.668442211055277e-05, - "loss": 6.0141, - "step": 3800 - }, - { - "epoch": 1.9822685788787484, - "grad_norm": 1.4354159832000732, - "learning_rate": 9.668341708542715e-05, - "loss": 5.8762, - "step": 3801 - }, - { - "epoch": 1.9827900912646674, - "grad_norm": 1.5172733068466187, - "learning_rate": 9.668241206030151e-05, - "loss": 5.8266, - "step": 3802 - }, - { - "epoch": 1.9833116036505867, - "grad_norm": 1.4415496587753296, - "learning_rate": 9.668140703517589e-05, - "loss": 6.0618, - "step": 3803 - }, - { - "epoch": 1.983833116036506, - "grad_norm": 1.5004628896713257, - "learning_rate": 9.668040201005025e-05, - "loss": 5.4733, - "step": 3804 - }, - { - "epoch": 1.984354628422425, - "grad_norm": 1.3964412212371826, - "learning_rate": 9.667939698492463e-05, - "loss": 5.9146, - "step": 3805 - }, - { - "epoch": 1.9848761408083442, - "grad_norm": 1.3824177980422974, - "learning_rate": 9.6678391959799e-05, - "loss": 6.2999, - "step": 3806 - }, - { - "epoch": 1.9853976531942634, - "grad_norm": 1.4919296503067017, - "learning_rate": 9.667738693467337e-05, - "loss": 6.042, - "step": 3807 - }, - { - "epoch": 1.9859191655801824, - "grad_norm": 1.5292043685913086, - "learning_rate": 9.667638190954774e-05, - "loss": 6.1426, - "step": 3808 - }, - { - "epoch": 1.9864406779661017, - "grad_norm": 1.6136348247528076, - "learning_rate": 9.667537688442211e-05, - "loss": 5.7518, - "step": 3809 - }, - { - "epoch": 1.986962190352021, - "grad_norm": 1.4757779836654663, - "learning_rate": 9.667437185929649e-05, - "loss": 6.2082, - "step": 3810 - }, - { - "epoch": 1.98748370273794, - "grad_norm": 1.518336296081543, - "learning_rate": 9.667336683417087e-05, - "loss": 5.8855, - "step": 3811 - }, - { - "epoch": 1.9880052151238592, - "grad_norm": 1.4585412740707397, - "learning_rate": 9.667236180904523e-05, - "loss": 5.9526, - "step": 3812 - }, - { - "epoch": 1.9885267275097784, - "grad_norm": 1.7243646383285522, - "learning_rate": 9.66713567839196e-05, - "loss": 5.7422, - "step": 3813 - }, - { - "epoch": 1.9890482398956975, - "grad_norm": 1.3863049745559692, - "learning_rate": 9.667035175879398e-05, - "loss": 6.2211, - "step": 3814 - }, - { - "epoch": 1.9895697522816167, - "grad_norm": 1.8290050029754639, - "learning_rate": 9.666934673366834e-05, - "loss": 5.6805, - "step": 3815 - }, - { - "epoch": 1.990091264667536, - "grad_norm": 1.4061254262924194, - "learning_rate": 9.666834170854272e-05, - "loss": 6.1466, - "step": 3816 - }, - { - "epoch": 1.990612777053455, - "grad_norm": 1.6117753982543945, - "learning_rate": 9.666733668341708e-05, - "loss": 5.9243, - "step": 3817 - }, - { - "epoch": 1.9911342894393742, - "grad_norm": 1.3681560754776, - "learning_rate": 9.666633165829146e-05, - "loss": 6.0134, - "step": 3818 - }, - { - "epoch": 1.9916558018252934, - "grad_norm": 1.6231834888458252, - "learning_rate": 9.666532663316584e-05, - "loss": 5.595, - "step": 3819 - }, - { - "epoch": 1.9921773142112125, - "grad_norm": 1.4839259386062622, - "learning_rate": 9.666432160804022e-05, - "loss": 6.0455, - "step": 3820 - }, - { - "epoch": 1.9926988265971317, - "grad_norm": 1.4152849912643433, - "learning_rate": 9.666331658291458e-05, - "loss": 5.9413, - "step": 3821 - }, - { - "epoch": 1.993220338983051, - "grad_norm": 1.4076203107833862, - "learning_rate": 9.666231155778896e-05, - "loss": 6.0805, - "step": 3822 - }, - { - "epoch": 1.99374185136897, - "grad_norm": 1.4839211702346802, - "learning_rate": 9.666130653266332e-05, - "loss": 6.1253, - "step": 3823 - }, - { - "epoch": 1.9942633637548892, - "grad_norm": 1.455725073814392, - "learning_rate": 9.66603015075377e-05, - "loss": 6.0291, - "step": 3824 - }, - { - "epoch": 1.9947848761408085, - "grad_norm": 1.6449155807495117, - "learning_rate": 9.665929648241206e-05, - "loss": 6.0575, - "step": 3825 - }, - { - "epoch": 1.9953063885267275, - "grad_norm": 1.462751865386963, - "learning_rate": 9.665829145728643e-05, - "loss": 6.1895, - "step": 3826 - }, - { - "epoch": 1.9958279009126467, - "grad_norm": 1.4507899284362793, - "learning_rate": 9.66572864321608e-05, - "loss": 6.1622, - "step": 3827 - }, - { - "epoch": 1.996349413298566, - "grad_norm": 1.6966131925582886, - "learning_rate": 9.665628140703517e-05, - "loss": 4.9905, - "step": 3828 - }, - { - "epoch": 1.996870925684485, - "grad_norm": 1.7638691663742065, - "learning_rate": 9.665527638190955e-05, - "loss": 5.4357, - "step": 3829 - }, - { - "epoch": 1.997392438070404, - "grad_norm": 1.645431637763977, - "learning_rate": 9.665427135678393e-05, - "loss": 5.9473, - "step": 3830 - }, - { - "epoch": 1.9979139504563235, - "grad_norm": 1.3116331100463867, - "learning_rate": 9.66532663316583e-05, - "loss": 6.1634, - "step": 3831 - }, - { - "epoch": 1.9984354628422425, - "grad_norm": 1.5586097240447998, - "learning_rate": 9.665226130653267e-05, - "loss": 5.2557, - "step": 3832 - }, - { - "epoch": 1.9989569752281615, - "grad_norm": 1.672777771949768, - "learning_rate": 9.665125628140705e-05, - "loss": 5.6665, - "step": 3833 - }, - { - "epoch": 1.999478487614081, - "grad_norm": 1.5142594575881958, - "learning_rate": 9.665025125628141e-05, - "loss": 5.9705, - "step": 3834 - }, - { - "epoch": 2.0, - "grad_norm": 1.6594988107681274, - "learning_rate": 9.664924623115579e-05, - "loss": 5.7251, - "step": 3835 - }, - { - "epoch": 2.000521512385919, - "grad_norm": 1.591359257698059, - "learning_rate": 9.664824120603015e-05, - "loss": 6.2197, - "step": 3836 - }, - { - "epoch": 2.0010430247718385, - "grad_norm": 1.522283673286438, - "learning_rate": 9.664723618090453e-05, - "loss": 5.9446, - "step": 3837 - }, - { - "epoch": 2.0015645371577575, - "grad_norm": 1.6052857637405396, - "learning_rate": 9.66462311557789e-05, - "loss": 5.6827, - "step": 3838 - }, - { - "epoch": 2.0020860495436765, - "grad_norm": 1.4782124757766724, - "learning_rate": 9.664522613065327e-05, - "loss": 6.2934, - "step": 3839 - }, - { - "epoch": 2.002607561929596, - "grad_norm": 1.678267240524292, - "learning_rate": 9.664422110552765e-05, - "loss": 5.896, - "step": 3840 - }, - { - "epoch": 2.003129074315515, - "grad_norm": 1.5414811372756958, - "learning_rate": 9.664321608040201e-05, - "loss": 6.1196, - "step": 3841 - }, - { - "epoch": 2.003650586701434, - "grad_norm": 1.3931604623794556, - "learning_rate": 9.664221105527639e-05, - "loss": 6.2276, - "step": 3842 - }, - { - "epoch": 2.0041720990873535, - "grad_norm": 1.4502414464950562, - "learning_rate": 9.664120603015076e-05, - "loss": 6.0468, - "step": 3843 - }, - { - "epoch": 2.0046936114732725, - "grad_norm": 1.5150848627090454, - "learning_rate": 9.664020100502513e-05, - "loss": 6.2001, - "step": 3844 - }, - { - "epoch": 2.0052151238591915, - "grad_norm": 1.566394329071045, - "learning_rate": 9.66391959798995e-05, - "loss": 5.3592, - "step": 3845 - }, - { - "epoch": 2.005736636245111, - "grad_norm": 1.4968899488449097, - "learning_rate": 9.663819095477388e-05, - "loss": 5.6579, - "step": 3846 - }, - { - "epoch": 2.00625814863103, - "grad_norm": 1.525282621383667, - "learning_rate": 9.663718592964824e-05, - "loss": 5.6269, - "step": 3847 - }, - { - "epoch": 2.006779661016949, - "grad_norm": 1.497970461845398, - "learning_rate": 9.663618090452262e-05, - "loss": 6.0927, - "step": 3848 - }, - { - "epoch": 2.0073011734028685, - "grad_norm": 1.5611066818237305, - "learning_rate": 9.663517587939698e-05, - "loss": 6.1371, - "step": 3849 - }, - { - "epoch": 2.0078226857887875, - "grad_norm": 1.592439889907837, - "learning_rate": 9.663417085427136e-05, - "loss": 5.9127, - "step": 3850 - }, - { - "epoch": 2.0083441981747066, - "grad_norm": 1.4829678535461426, - "learning_rate": 9.663316582914574e-05, - "loss": 5.91, - "step": 3851 - }, - { - "epoch": 2.008865710560626, - "grad_norm": 1.4874320030212402, - "learning_rate": 9.66321608040201e-05, - "loss": 5.9968, - "step": 3852 - }, - { - "epoch": 2.009387222946545, - "grad_norm": 1.4022375345230103, - "learning_rate": 9.663115577889448e-05, - "loss": 5.5431, - "step": 3853 - }, - { - "epoch": 2.009908735332464, - "grad_norm": 1.426586627960205, - "learning_rate": 9.663015075376884e-05, - "loss": 5.9981, - "step": 3854 - }, - { - "epoch": 2.0104302477183835, - "grad_norm": 1.6040619611740112, - "learning_rate": 9.662914572864322e-05, - "loss": 5.7153, - "step": 3855 - }, - { - "epoch": 2.0109517601043025, - "grad_norm": 1.6716164350509644, - "learning_rate": 9.662814070351759e-05, - "loss": 6.1254, - "step": 3856 - }, - { - "epoch": 2.0114732724902216, - "grad_norm": 1.7159833908081055, - "learning_rate": 9.662713567839196e-05, - "loss": 5.5416, - "step": 3857 - }, - { - "epoch": 2.011994784876141, - "grad_norm": 1.431359887123108, - "learning_rate": 9.662613065326633e-05, - "loss": 6.0572, - "step": 3858 - }, - { - "epoch": 2.01251629726206, - "grad_norm": 1.6079773902893066, - "learning_rate": 9.662512562814071e-05, - "loss": 6.0283, - "step": 3859 - }, - { - "epoch": 2.013037809647979, - "grad_norm": 1.5487617254257202, - "learning_rate": 9.662412060301508e-05, - "loss": 6.2086, - "step": 3860 - }, - { - "epoch": 2.013559322033898, - "grad_norm": 1.469484806060791, - "learning_rate": 9.662311557788946e-05, - "loss": 5.9104, - "step": 3861 - }, - { - "epoch": 2.0140808344198176, - "grad_norm": 1.5899367332458496, - "learning_rate": 9.662211055276383e-05, - "loss": 5.9402, - "step": 3862 - }, - { - "epoch": 2.0146023468057366, - "grad_norm": 1.7232006788253784, - "learning_rate": 9.66211055276382e-05, - "loss": 5.7653, - "step": 3863 - }, - { - "epoch": 2.0151238591916556, - "grad_norm": 1.868619441986084, - "learning_rate": 9.662010050251257e-05, - "loss": 5.4457, - "step": 3864 - }, - { - "epoch": 2.015645371577575, - "grad_norm": 1.5389204025268555, - "learning_rate": 9.661909547738693e-05, - "loss": 5.9007, - "step": 3865 - }, - { - "epoch": 2.016166883963494, - "grad_norm": 1.610851764678955, - "learning_rate": 9.661809045226131e-05, - "loss": 5.8552, - "step": 3866 - }, - { - "epoch": 2.016688396349413, - "grad_norm": 1.612640619277954, - "learning_rate": 9.661708542713568e-05, - "loss": 6.0428, - "step": 3867 - }, - { - "epoch": 2.0172099087353326, - "grad_norm": 1.695522427558899, - "learning_rate": 9.661608040201005e-05, - "loss": 5.5925, - "step": 3868 - }, - { - "epoch": 2.0177314211212516, - "grad_norm": 1.6532551050186157, - "learning_rate": 9.661507537688442e-05, - "loss": 5.7311, - "step": 3869 - }, - { - "epoch": 2.0182529335071706, - "grad_norm": 1.60237455368042, - "learning_rate": 9.66140703517588e-05, - "loss": 5.9292, - "step": 3870 - }, - { - "epoch": 2.01877444589309, - "grad_norm": 1.5588821172714233, - "learning_rate": 9.661306532663317e-05, - "loss": 6.1017, - "step": 3871 - }, - { - "epoch": 2.019295958279009, - "grad_norm": 1.529531478881836, - "learning_rate": 9.661206030150755e-05, - "loss": 5.6163, - "step": 3872 - }, - { - "epoch": 2.019817470664928, - "grad_norm": 1.6026877164840698, - "learning_rate": 9.661105527638192e-05, - "loss": 6.1441, - "step": 3873 - }, - { - "epoch": 2.0203389830508476, - "grad_norm": 1.5312128067016602, - "learning_rate": 9.661005025125629e-05, - "loss": 5.8343, - "step": 3874 - }, - { - "epoch": 2.0208604954367666, - "grad_norm": 1.5682915449142456, - "learning_rate": 9.660904522613066e-05, - "loss": 5.9322, - "step": 3875 - }, - { - "epoch": 2.0213820078226856, - "grad_norm": 1.4330214262008667, - "learning_rate": 9.660804020100504e-05, - "loss": 5.642, - "step": 3876 - }, - { - "epoch": 2.021903520208605, - "grad_norm": 1.4143586158752441, - "learning_rate": 9.66070351758794e-05, - "loss": 5.9079, - "step": 3877 - }, - { - "epoch": 2.022425032594524, - "grad_norm": 1.4480289220809937, - "learning_rate": 9.660603015075376e-05, - "loss": 6.0554, - "step": 3878 - }, - { - "epoch": 2.022946544980443, - "grad_norm": 1.5762571096420288, - "learning_rate": 9.660502512562814e-05, - "loss": 5.8765, - "step": 3879 - }, - { - "epoch": 2.0234680573663626, - "grad_norm": 1.5856318473815918, - "learning_rate": 9.660402010050252e-05, - "loss": 5.3811, - "step": 3880 - }, - { - "epoch": 2.0239895697522816, - "grad_norm": 1.4558180570602417, - "learning_rate": 9.66030150753769e-05, - "loss": 5.811, - "step": 3881 - }, - { - "epoch": 2.0245110821382006, - "grad_norm": 1.688279151916504, - "learning_rate": 9.660201005025126e-05, - "loss": 5.6806, - "step": 3882 - }, - { - "epoch": 2.02503259452412, - "grad_norm": 1.5003595352172852, - "learning_rate": 9.660100502512564e-05, - "loss": 5.928, - "step": 3883 - }, - { - "epoch": 2.025554106910039, - "grad_norm": 1.7635873556137085, - "learning_rate": 9.66e-05, - "loss": 5.3602, - "step": 3884 - }, - { - "epoch": 2.026075619295958, - "grad_norm": 1.4176634550094604, - "learning_rate": 9.659899497487438e-05, - "loss": 6.0984, - "step": 3885 - }, - { - "epoch": 2.0265971316818776, - "grad_norm": 1.5006334781646729, - "learning_rate": 9.659798994974875e-05, - "loss": 5.7193, - "step": 3886 - }, - { - "epoch": 2.0271186440677966, - "grad_norm": 1.4504597187042236, - "learning_rate": 9.659698492462312e-05, - "loss": 5.9575, - "step": 3887 - }, - { - "epoch": 2.0276401564537156, - "grad_norm": 1.6058359146118164, - "learning_rate": 9.659597989949749e-05, - "loss": 6.0705, - "step": 3888 - }, - { - "epoch": 2.028161668839635, - "grad_norm": 1.6142563819885254, - "learning_rate": 9.659497487437187e-05, - "loss": 5.5714, - "step": 3889 - }, - { - "epoch": 2.028683181225554, - "grad_norm": 1.6901997327804565, - "learning_rate": 9.659396984924623e-05, - "loss": 5.81, - "step": 3890 - }, - { - "epoch": 2.029204693611473, - "grad_norm": 1.5869756937026978, - "learning_rate": 9.659296482412061e-05, - "loss": 5.9748, - "step": 3891 - }, - { - "epoch": 2.0297262059973926, - "grad_norm": 1.6090866327285767, - "learning_rate": 9.659195979899499e-05, - "loss": 6.1625, - "step": 3892 - }, - { - "epoch": 2.0302477183833116, - "grad_norm": 1.4449563026428223, - "learning_rate": 9.659095477386935e-05, - "loss": 5.9832, - "step": 3893 - }, - { - "epoch": 2.0307692307692307, - "grad_norm": 1.5957400798797607, - "learning_rate": 9.658994974874373e-05, - "loss": 6.0209, - "step": 3894 - }, - { - "epoch": 2.03129074315515, - "grad_norm": 1.4640491008758545, - "learning_rate": 9.658894472361809e-05, - "loss": 5.9661, - "step": 3895 - }, - { - "epoch": 2.031812255541069, - "grad_norm": 1.554290771484375, - "learning_rate": 9.658793969849247e-05, - "loss": 6.0164, - "step": 3896 - }, - { - "epoch": 2.032333767926988, - "grad_norm": 1.6988391876220703, - "learning_rate": 9.658693467336683e-05, - "loss": 5.5334, - "step": 3897 - }, - { - "epoch": 2.0328552803129076, - "grad_norm": 1.529518723487854, - "learning_rate": 9.658592964824121e-05, - "loss": 6.1945, - "step": 3898 - }, - { - "epoch": 2.0333767926988267, - "grad_norm": 1.6130419969558716, - "learning_rate": 9.658492462311558e-05, - "loss": 5.6871, - "step": 3899 - }, - { - "epoch": 2.0338983050847457, - "grad_norm": 1.4580706357955933, - "learning_rate": 9.658391959798995e-05, - "loss": 6.0258, - "step": 3900 - }, - { - "epoch": 2.034419817470665, - "grad_norm": 1.4608551263809204, - "learning_rate": 9.658291457286432e-05, - "loss": 6.0448, - "step": 3901 - }, - { - "epoch": 2.034941329856584, - "grad_norm": 1.5851224660873413, - "learning_rate": 9.65819095477387e-05, - "loss": 6.0628, - "step": 3902 - }, - { - "epoch": 2.035462842242503, - "grad_norm": 1.6293730735778809, - "learning_rate": 9.658090452261307e-05, - "loss": 5.6049, - "step": 3903 - }, - { - "epoch": 2.0359843546284226, - "grad_norm": 1.6573991775512695, - "learning_rate": 9.657989949748745e-05, - "loss": 5.5879, - "step": 3904 - }, - { - "epoch": 2.0365058670143417, - "grad_norm": 1.5741170644760132, - "learning_rate": 9.657889447236182e-05, - "loss": 5.6968, - "step": 3905 - }, - { - "epoch": 2.0370273794002607, - "grad_norm": 1.6803165674209595, - "learning_rate": 9.657788944723618e-05, - "loss": 5.8301, - "step": 3906 - }, - { - "epoch": 2.0375488917861797, - "grad_norm": 1.5366028547286987, - "learning_rate": 9.657688442211056e-05, - "loss": 5.3497, - "step": 3907 - }, - { - "epoch": 2.038070404172099, - "grad_norm": 1.7429250478744507, - "learning_rate": 9.657587939698492e-05, - "loss": 5.9486, - "step": 3908 - }, - { - "epoch": 2.038591916558018, - "grad_norm": 1.690645694732666, - "learning_rate": 9.65748743718593e-05, - "loss": 6.1225, - "step": 3909 - }, - { - "epoch": 2.039113428943937, - "grad_norm": 1.5855175256729126, - "learning_rate": 9.657386934673366e-05, - "loss": 4.9894, - "step": 3910 - }, - { - "epoch": 2.0396349413298567, - "grad_norm": 1.6470962762832642, - "learning_rate": 9.657286432160804e-05, - "loss": 6.0837, - "step": 3911 - }, - { - "epoch": 2.0401564537157757, - "grad_norm": 1.7859163284301758, - "learning_rate": 9.657185929648242e-05, - "loss": 5.9401, - "step": 3912 - }, - { - "epoch": 2.0406779661016947, - "grad_norm": 1.4241307973861694, - "learning_rate": 9.65708542713568e-05, - "loss": 6.3431, - "step": 3913 - }, - { - "epoch": 2.041199478487614, - "grad_norm": 1.505110263824463, - "learning_rate": 9.656984924623116e-05, - "loss": 5.9135, - "step": 3914 - }, - { - "epoch": 2.041720990873533, - "grad_norm": 1.7363044023513794, - "learning_rate": 9.656884422110554e-05, - "loss": 6.1511, - "step": 3915 - }, - { - "epoch": 2.0422425032594522, - "grad_norm": 1.6521581411361694, - "learning_rate": 9.65678391959799e-05, - "loss": 5.6167, - "step": 3916 - }, - { - "epoch": 2.0427640156453717, - "grad_norm": 1.510424017906189, - "learning_rate": 9.656683417085428e-05, - "loss": 5.6457, - "step": 3917 - }, - { - "epoch": 2.0432855280312907, - "grad_norm": 1.8391637802124023, - "learning_rate": 9.656582914572865e-05, - "loss": 6.0547, - "step": 3918 - }, - { - "epoch": 2.0438070404172097, - "grad_norm": 1.5372259616851807, - "learning_rate": 9.656482412060301e-05, - "loss": 5.4644, - "step": 3919 - }, - { - "epoch": 2.044328552803129, - "grad_norm": 1.5180379152297974, - "learning_rate": 9.656381909547739e-05, - "loss": 5.8849, - "step": 3920 - }, - { - "epoch": 2.044850065189048, - "grad_norm": 1.5068578720092773, - "learning_rate": 9.656281407035175e-05, - "loss": 5.9186, - "step": 3921 - }, - { - "epoch": 2.0453715775749672, - "grad_norm": 1.4703480005264282, - "learning_rate": 9.656180904522613e-05, - "loss": 6.3898, - "step": 3922 - }, - { - "epoch": 2.0458930899608867, - "grad_norm": 1.5425384044647217, - "learning_rate": 9.656080402010051e-05, - "loss": 5.5468, - "step": 3923 - }, - { - "epoch": 2.0464146023468057, - "grad_norm": 1.9228389263153076, - "learning_rate": 9.655979899497489e-05, - "loss": 5.6935, - "step": 3924 - }, - { - "epoch": 2.0469361147327247, - "grad_norm": 1.5889586210250854, - "learning_rate": 9.655879396984925e-05, - "loss": 5.598, - "step": 3925 - }, - { - "epoch": 2.047457627118644, - "grad_norm": 1.8198912143707275, - "learning_rate": 9.655778894472363e-05, - "loss": 5.7975, - "step": 3926 - }, - { - "epoch": 2.0479791395045632, - "grad_norm": 1.7293827533721924, - "learning_rate": 9.655678391959799e-05, - "loss": 5.581, - "step": 3927 - }, - { - "epoch": 2.0485006518904822, - "grad_norm": 1.5769946575164795, - "learning_rate": 9.655577889447237e-05, - "loss": 6.0248, - "step": 3928 - }, - { - "epoch": 2.0490221642764017, - "grad_norm": 1.509264588356018, - "learning_rate": 9.655477386934673e-05, - "loss": 5.905, - "step": 3929 - }, - { - "epoch": 2.0495436766623207, - "grad_norm": 1.5452791452407837, - "learning_rate": 9.655376884422111e-05, - "loss": 5.8895, - "step": 3930 - }, - { - "epoch": 2.0500651890482398, - "grad_norm": 1.654995083808899, - "learning_rate": 9.655276381909548e-05, - "loss": 5.5559, - "step": 3931 - }, - { - "epoch": 2.050586701434159, - "grad_norm": 1.5105726718902588, - "learning_rate": 9.655175879396985e-05, - "loss": 5.8949, - "step": 3932 - }, - { - "epoch": 2.0511082138200782, - "grad_norm": 1.3797494173049927, - "learning_rate": 9.655075376884423e-05, - "loss": 6.4024, - "step": 3933 - }, - { - "epoch": 2.0516297262059973, - "grad_norm": 1.5611279010772705, - "learning_rate": 9.65497487437186e-05, - "loss": 5.719, - "step": 3934 - }, - { - "epoch": 2.0521512385919167, - "grad_norm": 1.4523956775665283, - "learning_rate": 9.654874371859297e-05, - "loss": 6.3847, - "step": 3935 - }, - { - "epoch": 2.0526727509778357, - "grad_norm": 1.584633231163025, - "learning_rate": 9.654773869346734e-05, - "loss": 5.9134, - "step": 3936 - }, - { - "epoch": 2.0531942633637548, - "grad_norm": 1.4611663818359375, - "learning_rate": 9.654673366834172e-05, - "loss": 5.838, - "step": 3937 - }, - { - "epoch": 2.0537157757496742, - "grad_norm": 1.6152817010879517, - "learning_rate": 9.654572864321608e-05, - "loss": 6.1164, - "step": 3938 - }, - { - "epoch": 2.0542372881355933, - "grad_norm": 1.4726665019989014, - "learning_rate": 9.654472361809046e-05, - "loss": 5.9445, - "step": 3939 - }, - { - "epoch": 2.0547588005215123, - "grad_norm": 1.4976749420166016, - "learning_rate": 9.654371859296482e-05, - "loss": 5.5959, - "step": 3940 - }, - { - "epoch": 2.0552803129074317, - "grad_norm": 1.5572130680084229, - "learning_rate": 9.65427135678392e-05, - "loss": 5.8448, - "step": 3941 - }, - { - "epoch": 2.0558018252933508, - "grad_norm": 1.5943560600280762, - "learning_rate": 9.654170854271357e-05, - "loss": 6.1084, - "step": 3942 - }, - { - "epoch": 2.05632333767927, - "grad_norm": 1.4644074440002441, - "learning_rate": 9.654070351758794e-05, - "loss": 6.2114, - "step": 3943 - }, - { - "epoch": 2.0568448500651892, - "grad_norm": 1.5441384315490723, - "learning_rate": 9.653969849246232e-05, - "loss": 5.6302, - "step": 3944 - }, - { - "epoch": 2.0573663624511083, - "grad_norm": 1.536192536354065, - "learning_rate": 9.653869346733669e-05, - "loss": 6.0086, - "step": 3945 - }, - { - "epoch": 2.0578878748370273, - "grad_norm": 1.895548701286316, - "learning_rate": 9.653768844221106e-05, - "loss": 5.5938, - "step": 3946 - }, - { - "epoch": 2.0584093872229468, - "grad_norm": 1.5269862413406372, - "learning_rate": 9.653668341708543e-05, - "loss": 5.9658, - "step": 3947 - }, - { - "epoch": 2.0589308996088658, - "grad_norm": 1.6709858179092407, - "learning_rate": 9.65356783919598e-05, - "loss": 5.8162, - "step": 3948 - }, - { - "epoch": 2.059452411994785, - "grad_norm": 1.7584911584854126, - "learning_rate": 9.653467336683417e-05, - "loss": 5.8389, - "step": 3949 - }, - { - "epoch": 2.0599739243807043, - "grad_norm": 1.6268733739852905, - "learning_rate": 9.653366834170855e-05, - "loss": 5.7966, - "step": 3950 - }, - { - "epoch": 2.0604954367666233, - "grad_norm": 1.7307806015014648, - "learning_rate": 9.653266331658291e-05, - "loss": 5.8234, - "step": 3951 - }, - { - "epoch": 2.0610169491525423, - "grad_norm": 1.8314112424850464, - "learning_rate": 9.653165829145729e-05, - "loss": 5.7399, - "step": 3952 - }, - { - "epoch": 2.0615384615384613, - "grad_norm": 1.503750205039978, - "learning_rate": 9.653065326633167e-05, - "loss": 5.6829, - "step": 3953 - }, - { - "epoch": 2.062059973924381, - "grad_norm": 1.414094090461731, - "learning_rate": 9.652964824120604e-05, - "loss": 6.158, - "step": 3954 - }, - { - "epoch": 2.0625814863103, - "grad_norm": 1.7355551719665527, - "learning_rate": 9.652864321608041e-05, - "loss": 5.5072, - "step": 3955 - }, - { - "epoch": 2.063102998696219, - "grad_norm": 1.463829517364502, - "learning_rate": 9.652763819095479e-05, - "loss": 6.0604, - "step": 3956 - }, - { - "epoch": 2.0636245110821383, - "grad_norm": 1.6242479085922241, - "learning_rate": 9.652663316582915e-05, - "loss": 6.0493, - "step": 3957 - }, - { - "epoch": 2.0641460234680573, - "grad_norm": 1.633217692375183, - "learning_rate": 9.652562814070352e-05, - "loss": 5.8793, - "step": 3958 - }, - { - "epoch": 2.0646675358539763, - "grad_norm": 1.720479965209961, - "learning_rate": 9.65246231155779e-05, - "loss": 6.0504, - "step": 3959 - }, - { - "epoch": 2.065189048239896, - "grad_norm": 1.557276725769043, - "learning_rate": 9.652361809045226e-05, - "loss": 5.9358, - "step": 3960 - }, - { - "epoch": 2.065710560625815, - "grad_norm": 1.6484123468399048, - "learning_rate": 9.652261306532664e-05, - "loss": 5.8154, - "step": 3961 - }, - { - "epoch": 2.066232073011734, - "grad_norm": 1.497696876525879, - "learning_rate": 9.6521608040201e-05, - "loss": 6.0611, - "step": 3962 - }, - { - "epoch": 2.0667535853976533, - "grad_norm": 1.5143600702285767, - "learning_rate": 9.652060301507538e-05, - "loss": 5.915, - "step": 3963 - }, - { - "epoch": 2.0672750977835723, - "grad_norm": 1.6158775091171265, - "learning_rate": 9.651959798994976e-05, - "loss": 5.9119, - "step": 3964 - }, - { - "epoch": 2.0677966101694913, - "grad_norm": 1.7102049589157104, - "learning_rate": 9.651859296482413e-05, - "loss": 5.7317, - "step": 3965 - }, - { - "epoch": 2.068318122555411, - "grad_norm": 1.4505635499954224, - "learning_rate": 9.65175879396985e-05, - "loss": 6.0799, - "step": 3966 - }, - { - "epoch": 2.06883963494133, - "grad_norm": 1.8569592237472534, - "learning_rate": 9.651658291457288e-05, - "loss": 5.7805, - "step": 3967 - }, - { - "epoch": 2.069361147327249, - "grad_norm": 1.5475157499313354, - "learning_rate": 9.651557788944724e-05, - "loss": 6.171, - "step": 3968 - }, - { - "epoch": 2.0698826597131683, - "grad_norm": 1.5666013956069946, - "learning_rate": 9.651457286432162e-05, - "loss": 5.9443, - "step": 3969 - }, - { - "epoch": 2.0704041720990873, - "grad_norm": 1.365666151046753, - "learning_rate": 9.651356783919598e-05, - "loss": 6.1164, - "step": 3970 - }, - { - "epoch": 2.0709256844850064, - "grad_norm": 1.5494128465652466, - "learning_rate": 9.651256281407036e-05, - "loss": 5.8838, - "step": 3971 - }, - { - "epoch": 2.071447196870926, - "grad_norm": 1.8812227249145508, - "learning_rate": 9.651155778894472e-05, - "loss": 5.8664, - "step": 3972 - }, - { - "epoch": 2.071968709256845, - "grad_norm": 1.7095237970352173, - "learning_rate": 9.65105527638191e-05, - "loss": 5.9277, - "step": 3973 - }, - { - "epoch": 2.072490221642764, - "grad_norm": 1.5555728673934937, - "learning_rate": 9.650954773869348e-05, - "loss": 6.0149, - "step": 3974 - }, - { - "epoch": 2.0730117340286833, - "grad_norm": 1.5653584003448486, - "learning_rate": 9.650854271356784e-05, - "loss": 5.8689, - "step": 3975 - }, - { - "epoch": 2.0735332464146023, - "grad_norm": 1.4866209030151367, - "learning_rate": 9.650753768844222e-05, - "loss": 5.9425, - "step": 3976 - }, - { - "epoch": 2.0740547588005214, - "grad_norm": 2.0871634483337402, - "learning_rate": 9.650653266331659e-05, - "loss": 5.2804, - "step": 3977 - }, - { - "epoch": 2.074576271186441, - "grad_norm": 1.7012065649032593, - "learning_rate": 9.650552763819096e-05, - "loss": 5.672, - "step": 3978 - }, - { - "epoch": 2.07509778357236, - "grad_norm": 1.4521323442459106, - "learning_rate": 9.650452261306533e-05, - "loss": 6.3439, - "step": 3979 - }, - { - "epoch": 2.075619295958279, - "grad_norm": 1.5571131706237793, - "learning_rate": 9.65035175879397e-05, - "loss": 6.0636, - "step": 3980 - }, - { - "epoch": 2.0761408083441983, - "grad_norm": 1.4965920448303223, - "learning_rate": 9.650251256281407e-05, - "loss": 5.7312, - "step": 3981 - }, - { - "epoch": 2.0766623207301174, - "grad_norm": 1.4877055883407593, - "learning_rate": 9.650150753768845e-05, - "loss": 5.7582, - "step": 3982 - }, - { - "epoch": 2.0771838331160364, - "grad_norm": 1.5904436111450195, - "learning_rate": 9.650050251256281e-05, - "loss": 5.2658, - "step": 3983 - }, - { - "epoch": 2.077705345501956, - "grad_norm": 1.57761549949646, - "learning_rate": 9.649949748743719e-05, - "loss": 6.02, - "step": 3984 - }, - { - "epoch": 2.078226857887875, - "grad_norm": 1.5657042264938354, - "learning_rate": 9.649849246231157e-05, - "loss": 5.9347, - "step": 3985 - }, - { - "epoch": 2.078748370273794, - "grad_norm": 1.5054913759231567, - "learning_rate": 9.649748743718593e-05, - "loss": 6.1691, - "step": 3986 - }, - { - "epoch": 2.0792698826597134, - "grad_norm": 1.5268501043319702, - "learning_rate": 9.649648241206031e-05, - "loss": 5.8295, - "step": 3987 - }, - { - "epoch": 2.0797913950456324, - "grad_norm": 1.723880410194397, - "learning_rate": 9.649547738693467e-05, - "loss": 5.8641, - "step": 3988 - }, - { - "epoch": 2.0803129074315514, - "grad_norm": 1.9011268615722656, - "learning_rate": 9.649447236180905e-05, - "loss": 6.0203, - "step": 3989 - }, - { - "epoch": 2.080834419817471, - "grad_norm": 1.5875879526138306, - "learning_rate": 9.649346733668342e-05, - "loss": 5.5395, - "step": 3990 - }, - { - "epoch": 2.08135593220339, - "grad_norm": 1.588598370552063, - "learning_rate": 9.64924623115578e-05, - "loss": 5.9627, - "step": 3991 - }, - { - "epoch": 2.081877444589309, - "grad_norm": 1.5612637996673584, - "learning_rate": 9.649145728643216e-05, - "loss": 5.4503, - "step": 3992 - }, - { - "epoch": 2.0823989569752284, - "grad_norm": 1.4996964931488037, - "learning_rate": 9.649045226130654e-05, - "loss": 5.8918, - "step": 3993 - }, - { - "epoch": 2.0829204693611474, - "grad_norm": 1.5235801935195923, - "learning_rate": 9.648944723618091e-05, - "loss": 6.0025, - "step": 3994 - }, - { - "epoch": 2.0834419817470664, - "grad_norm": 1.4258363246917725, - "learning_rate": 9.648844221105529e-05, - "loss": 5.8052, - "step": 3995 - }, - { - "epoch": 2.083963494132986, - "grad_norm": 1.5788785219192505, - "learning_rate": 9.648743718592966e-05, - "loss": 5.9388, - "step": 3996 - }, - { - "epoch": 2.084485006518905, - "grad_norm": 1.5934689044952393, - "learning_rate": 9.648643216080403e-05, - "loss": 5.2253, - "step": 3997 - }, - { - "epoch": 2.085006518904824, - "grad_norm": 1.4850819110870361, - "learning_rate": 9.64854271356784e-05, - "loss": 5.9429, - "step": 3998 - }, - { - "epoch": 2.0855280312907434, - "grad_norm": 1.6332536935806274, - "learning_rate": 9.648442211055276e-05, - "loss": 6.106, - "step": 3999 - }, - { - "epoch": 2.0860495436766624, - "grad_norm": 1.7181698083877563, - "learning_rate": 9.648341708542714e-05, - "loss": 5.8514, - "step": 4000 - }, - { - "epoch": 2.0865710560625814, - "grad_norm": 1.4431015253067017, - "learning_rate": 9.64824120603015e-05, - "loss": 6.1348, - "step": 4001 - }, - { - "epoch": 2.0870925684485004, - "grad_norm": 1.458450198173523, - "learning_rate": 9.648140703517588e-05, - "loss": 5.9369, - "step": 4002 - }, - { - "epoch": 2.08761408083442, - "grad_norm": 1.7926479578018188, - "learning_rate": 9.648040201005025e-05, - "loss": 5.5524, - "step": 4003 - }, - { - "epoch": 2.088135593220339, - "grad_norm": 1.5437053442001343, - "learning_rate": 9.647939698492462e-05, - "loss": 5.9147, - "step": 4004 - }, - { - "epoch": 2.088657105606258, - "grad_norm": 1.5385366678237915, - "learning_rate": 9.6478391959799e-05, - "loss": 5.8543, - "step": 4005 - }, - { - "epoch": 2.0891786179921774, - "grad_norm": 1.580931305885315, - "learning_rate": 9.647738693467338e-05, - "loss": 5.8669, - "step": 4006 - }, - { - "epoch": 2.0897001303780964, - "grad_norm": 1.5132478475570679, - "learning_rate": 9.647638190954774e-05, - "loss": 5.99, - "step": 4007 - }, - { - "epoch": 2.0902216427640155, - "grad_norm": 1.4598722457885742, - "learning_rate": 9.647537688442212e-05, - "loss": 6.3002, - "step": 4008 - }, - { - "epoch": 2.090743155149935, - "grad_norm": 1.5921202898025513, - "learning_rate": 9.647437185929649e-05, - "loss": 5.7844, - "step": 4009 - }, - { - "epoch": 2.091264667535854, - "grad_norm": 2.1312170028686523, - "learning_rate": 9.647336683417086e-05, - "loss": 5.0035, - "step": 4010 - }, - { - "epoch": 2.091786179921773, - "grad_norm": 1.5497820377349854, - "learning_rate": 9.647236180904523e-05, - "loss": 6.1362, - "step": 4011 - }, - { - "epoch": 2.0923076923076924, - "grad_norm": 1.6374157667160034, - "learning_rate": 9.647135678391959e-05, - "loss": 5.308, - "step": 4012 - }, - { - "epoch": 2.0928292046936114, - "grad_norm": 1.6109650135040283, - "learning_rate": 9.647035175879397e-05, - "loss": 5.9434, - "step": 4013 - }, - { - "epoch": 2.0933507170795305, - "grad_norm": 1.5867388248443604, - "learning_rate": 9.646934673366835e-05, - "loss": 6.306, - "step": 4014 - }, - { - "epoch": 2.09387222946545, - "grad_norm": 1.6429471969604492, - "learning_rate": 9.646834170854273e-05, - "loss": 5.8896, - "step": 4015 - }, - { - "epoch": 2.094393741851369, - "grad_norm": 1.5101276636123657, - "learning_rate": 9.646733668341709e-05, - "loss": 6.1561, - "step": 4016 - }, - { - "epoch": 2.094915254237288, - "grad_norm": 1.5615636110305786, - "learning_rate": 9.646633165829147e-05, - "loss": 5.803, - "step": 4017 - }, - { - "epoch": 2.0954367666232074, - "grad_norm": 1.7508108615875244, - "learning_rate": 9.646532663316583e-05, - "loss": 6.0263, - "step": 4018 - }, - { - "epoch": 2.0959582790091265, - "grad_norm": 1.6000175476074219, - "learning_rate": 9.646432160804021e-05, - "loss": 5.9665, - "step": 4019 - }, - { - "epoch": 2.0964797913950455, - "grad_norm": 1.7278801202774048, - "learning_rate": 9.646331658291457e-05, - "loss": 5.7844, - "step": 4020 - }, - { - "epoch": 2.097001303780965, - "grad_norm": 1.519051432609558, - "learning_rate": 9.646231155778895e-05, - "loss": 6.0921, - "step": 4021 - }, - { - "epoch": 2.097522816166884, - "grad_norm": 1.4523789882659912, - "learning_rate": 9.646130653266332e-05, - "loss": 6.0326, - "step": 4022 - }, - { - "epoch": 2.098044328552803, - "grad_norm": 1.4682538509368896, - "learning_rate": 9.64603015075377e-05, - "loss": 5.7331, - "step": 4023 - }, - { - "epoch": 2.0985658409387224, - "grad_norm": 1.566469669342041, - "learning_rate": 9.645929648241206e-05, - "loss": 6.1406, - "step": 4024 - }, - { - "epoch": 2.0990873533246415, - "grad_norm": 1.5545958280563354, - "learning_rate": 9.645829145728644e-05, - "loss": 5.5258, - "step": 4025 - }, - { - "epoch": 2.0996088657105605, - "grad_norm": 1.524930477142334, - "learning_rate": 9.645728643216081e-05, - "loss": 5.9815, - "step": 4026 - }, - { - "epoch": 2.10013037809648, - "grad_norm": 1.4868464469909668, - "learning_rate": 9.645628140703518e-05, - "loss": 6.1474, - "step": 4027 - }, - { - "epoch": 2.100651890482399, - "grad_norm": 1.7345813512802124, - "learning_rate": 9.645527638190956e-05, - "loss": 5.9141, - "step": 4028 - }, - { - "epoch": 2.101173402868318, - "grad_norm": 1.4309501647949219, - "learning_rate": 9.645427135678392e-05, - "loss": 6.205, - "step": 4029 - }, - { - "epoch": 2.1016949152542375, - "grad_norm": 1.3851560354232788, - "learning_rate": 9.64532663316583e-05, - "loss": 6.2263, - "step": 4030 - }, - { - "epoch": 2.1022164276401565, - "grad_norm": 1.5849758386611938, - "learning_rate": 9.645226130653266e-05, - "loss": 5.774, - "step": 4031 - }, - { - "epoch": 2.1027379400260755, - "grad_norm": 1.5967957973480225, - "learning_rate": 9.645125628140704e-05, - "loss": 5.7253, - "step": 4032 - }, - { - "epoch": 2.103259452411995, - "grad_norm": 1.5128581523895264, - "learning_rate": 9.64502512562814e-05, - "loss": 5.7215, - "step": 4033 - }, - { - "epoch": 2.103780964797914, - "grad_norm": 1.4798904657363892, - "learning_rate": 9.644924623115578e-05, - "loss": 6.0232, - "step": 4034 - }, - { - "epoch": 2.104302477183833, - "grad_norm": 1.59605073928833, - "learning_rate": 9.644824120603016e-05, - "loss": 5.4373, - "step": 4035 - }, - { - "epoch": 2.1048239895697525, - "grad_norm": 1.5852055549621582, - "learning_rate": 9.644723618090454e-05, - "loss": 5.9024, - "step": 4036 - }, - { - "epoch": 2.1053455019556715, - "grad_norm": 1.6397496461868286, - "learning_rate": 9.64462311557789e-05, - "loss": 5.8841, - "step": 4037 - }, - { - "epoch": 2.1058670143415905, - "grad_norm": 1.6135516166687012, - "learning_rate": 9.644522613065327e-05, - "loss": 5.7623, - "step": 4038 - }, - { - "epoch": 2.10638852672751, - "grad_norm": 1.404086947441101, - "learning_rate": 9.644422110552765e-05, - "loss": 6.382, - "step": 4039 - }, - { - "epoch": 2.106910039113429, - "grad_norm": 1.554582118988037, - "learning_rate": 9.644321608040201e-05, - "loss": 5.9852, - "step": 4040 - }, - { - "epoch": 2.107431551499348, - "grad_norm": 1.5648540258407593, - "learning_rate": 9.644221105527639e-05, - "loss": 6.0648, - "step": 4041 - }, - { - "epoch": 2.1079530638852675, - "grad_norm": 1.491792917251587, - "learning_rate": 9.644120603015075e-05, - "loss": 6.0059, - "step": 4042 - }, - { - "epoch": 2.1084745762711865, - "grad_norm": 1.5058050155639648, - "learning_rate": 9.644020100502513e-05, - "loss": 6.0285, - "step": 4043 - }, - { - "epoch": 2.1089960886571055, - "grad_norm": 1.7225233316421509, - "learning_rate": 9.64391959798995e-05, - "loss": 5.6989, - "step": 4044 - }, - { - "epoch": 2.1095176010430245, - "grad_norm": 1.6390562057495117, - "learning_rate": 9.643819095477387e-05, - "loss": 5.8308, - "step": 4045 - }, - { - "epoch": 2.110039113428944, - "grad_norm": 1.5344575643539429, - "learning_rate": 9.643718592964825e-05, - "loss": 5.9846, - "step": 4046 - }, - { - "epoch": 2.110560625814863, - "grad_norm": 1.4064332246780396, - "learning_rate": 9.643618090452263e-05, - "loss": 6.068, - "step": 4047 - }, - { - "epoch": 2.111082138200782, - "grad_norm": 1.471551537513733, - "learning_rate": 9.643517587939699e-05, - "loss": 5.8043, - "step": 4048 - }, - { - "epoch": 2.1116036505867015, - "grad_norm": 1.5230891704559326, - "learning_rate": 9.643417085427137e-05, - "loss": 5.992, - "step": 4049 - }, - { - "epoch": 2.1121251629726205, - "grad_norm": 1.6296758651733398, - "learning_rate": 9.643316582914573e-05, - "loss": 5.6869, - "step": 4050 - }, - { - "epoch": 2.1126466753585396, - "grad_norm": 1.5253201723098755, - "learning_rate": 9.64321608040201e-05, - "loss": 5.9923, - "step": 4051 - }, - { - "epoch": 2.113168187744459, - "grad_norm": 1.433785319328308, - "learning_rate": 9.643115577889448e-05, - "loss": 5.8441, - "step": 4052 - }, - { - "epoch": 2.113689700130378, - "grad_norm": 1.6340131759643555, - "learning_rate": 9.643015075376884e-05, - "loss": 5.7386, - "step": 4053 - }, - { - "epoch": 2.114211212516297, - "grad_norm": 1.5894780158996582, - "learning_rate": 9.642914572864322e-05, - "loss": 6.0266, - "step": 4054 - }, - { - "epoch": 2.1147327249022165, - "grad_norm": 1.4413528442382812, - "learning_rate": 9.642814070351758e-05, - "loss": 6.1539, - "step": 4055 - }, - { - "epoch": 2.1152542372881356, - "grad_norm": 1.5714190006256104, - "learning_rate": 9.642713567839196e-05, - "loss": 5.7521, - "step": 4056 - }, - { - "epoch": 2.1157757496740546, - "grad_norm": 1.5080009698867798, - "learning_rate": 9.642613065326634e-05, - "loss": 6.0576, - "step": 4057 - }, - { - "epoch": 2.116297262059974, - "grad_norm": 1.5810869932174683, - "learning_rate": 9.642512562814072e-05, - "loss": 5.8681, - "step": 4058 - }, - { - "epoch": 2.116818774445893, - "grad_norm": 1.5400056838989258, - "learning_rate": 9.642412060301508e-05, - "loss": 5.7973, - "step": 4059 - }, - { - "epoch": 2.117340286831812, - "grad_norm": 1.499199390411377, - "learning_rate": 9.642311557788946e-05, - "loss": 5.8028, - "step": 4060 - }, - { - "epoch": 2.1178617992177315, - "grad_norm": 1.5160057544708252, - "learning_rate": 9.642211055276382e-05, - "loss": 5.9625, - "step": 4061 - }, - { - "epoch": 2.1183833116036506, - "grad_norm": 1.5690933465957642, - "learning_rate": 9.64211055276382e-05, - "loss": 5.8115, - "step": 4062 - }, - { - "epoch": 2.1189048239895696, - "grad_norm": 1.6263052225112915, - "learning_rate": 9.642010050251256e-05, - "loss": 5.7829, - "step": 4063 - }, - { - "epoch": 2.119426336375489, - "grad_norm": 1.5997531414031982, - "learning_rate": 9.641909547738694e-05, - "loss": 6.0098, - "step": 4064 - }, - { - "epoch": 2.119947848761408, - "grad_norm": 1.6260979175567627, - "learning_rate": 9.64180904522613e-05, - "loss": 5.7525, - "step": 4065 - }, - { - "epoch": 2.120469361147327, - "grad_norm": 1.4114818572998047, - "learning_rate": 9.641708542713568e-05, - "loss": 6.1736, - "step": 4066 - }, - { - "epoch": 2.1209908735332466, - "grad_norm": 1.5938032865524292, - "learning_rate": 9.641608040201006e-05, - "loss": 5.7228, - "step": 4067 - }, - { - "epoch": 2.1215123859191656, - "grad_norm": 1.5444624423980713, - "learning_rate": 9.641507537688443e-05, - "loss": 6.1097, - "step": 4068 - }, - { - "epoch": 2.1220338983050846, - "grad_norm": 1.4992380142211914, - "learning_rate": 9.64140703517588e-05, - "loss": 5.7934, - "step": 4069 - }, - { - "epoch": 2.122555410691004, - "grad_norm": 1.4691975116729736, - "learning_rate": 9.641306532663317e-05, - "loss": 6.1119, - "step": 4070 - }, - { - "epoch": 2.123076923076923, - "grad_norm": 1.4008792638778687, - "learning_rate": 9.641206030150755e-05, - "loss": 6.3395, - "step": 4071 - }, - { - "epoch": 2.123598435462842, - "grad_norm": 1.6199954748153687, - "learning_rate": 9.641105527638191e-05, - "loss": 6.1734, - "step": 4072 - }, - { - "epoch": 2.1241199478487616, - "grad_norm": 1.6932867765426636, - "learning_rate": 9.641005025125629e-05, - "loss": 5.2122, - "step": 4073 - }, - { - "epoch": 2.1246414602346806, - "grad_norm": 1.4030659198760986, - "learning_rate": 9.640904522613065e-05, - "loss": 5.9789, - "step": 4074 - }, - { - "epoch": 2.1251629726205996, - "grad_norm": 1.6059961318969727, - "learning_rate": 9.640804020100503e-05, - "loss": 6.0803, - "step": 4075 - }, - { - "epoch": 2.125684485006519, - "grad_norm": 1.414086937904358, - "learning_rate": 9.64070351758794e-05, - "loss": 6.0654, - "step": 4076 - }, - { - "epoch": 2.126205997392438, - "grad_norm": 1.471876859664917, - "learning_rate": 9.640603015075377e-05, - "loss": 6.2065, - "step": 4077 - }, - { - "epoch": 2.126727509778357, - "grad_norm": 1.6456488370895386, - "learning_rate": 9.640502512562815e-05, - "loss": 5.7527, - "step": 4078 - }, - { - "epoch": 2.1272490221642766, - "grad_norm": 1.4757113456726074, - "learning_rate": 9.640402010050251e-05, - "loss": 5.732, - "step": 4079 - }, - { - "epoch": 2.1277705345501956, - "grad_norm": 1.602286458015442, - "learning_rate": 9.640301507537689e-05, - "loss": 6.1571, - "step": 4080 - }, - { - "epoch": 2.1282920469361146, - "grad_norm": 1.5600515604019165, - "learning_rate": 9.640201005025126e-05, - "loss": 5.5636, - "step": 4081 - }, - { - "epoch": 2.128813559322034, - "grad_norm": 1.5856138467788696, - "learning_rate": 9.640100502512563e-05, - "loss": 5.9468, - "step": 4082 - }, - { - "epoch": 2.129335071707953, - "grad_norm": 1.6452568769454956, - "learning_rate": 9.64e-05, - "loss": 6.0919, - "step": 4083 - }, - { - "epoch": 2.129856584093872, - "grad_norm": 1.4803102016448975, - "learning_rate": 9.639899497487438e-05, - "loss": 5.9781, - "step": 4084 - }, - { - "epoch": 2.1303780964797916, - "grad_norm": 1.5239051580429077, - "learning_rate": 9.639798994974874e-05, - "loss": 5.4678, - "step": 4085 - }, - { - "epoch": 2.1308996088657106, - "grad_norm": 1.4939051866531372, - "learning_rate": 9.639698492462312e-05, - "loss": 6.0916, - "step": 4086 - }, - { - "epoch": 2.1314211212516296, - "grad_norm": 1.4965680837631226, - "learning_rate": 9.63959798994975e-05, - "loss": 5.982, - "step": 4087 - }, - { - "epoch": 2.131942633637549, - "grad_norm": 1.720604658126831, - "learning_rate": 9.639497487437187e-05, - "loss": 5.6354, - "step": 4088 - }, - { - "epoch": 2.132464146023468, - "grad_norm": 1.5248960256576538, - "learning_rate": 9.639396984924624e-05, - "loss": 6.2344, - "step": 4089 - }, - { - "epoch": 2.132985658409387, - "grad_norm": 1.582960844039917, - "learning_rate": 9.639296482412062e-05, - "loss": 5.7565, - "step": 4090 - }, - { - "epoch": 2.1335071707953066, - "grad_norm": 1.611987590789795, - "learning_rate": 9.639195979899498e-05, - "loss": 5.0651, - "step": 4091 - }, - { - "epoch": 2.1340286831812256, - "grad_norm": 1.4891964197158813, - "learning_rate": 9.639095477386934e-05, - "loss": 5.9972, - "step": 4092 - }, - { - "epoch": 2.1345501955671446, - "grad_norm": 1.5517021417617798, - "learning_rate": 9.638994974874372e-05, - "loss": 6.1375, - "step": 4093 - }, - { - "epoch": 2.135071707953064, - "grad_norm": 2.0423271656036377, - "learning_rate": 9.638894472361809e-05, - "loss": 5.8264, - "step": 4094 - }, - { - "epoch": 2.135593220338983, - "grad_norm": 1.6766407489776611, - "learning_rate": 9.638793969849246e-05, - "loss": 5.5975, - "step": 4095 - }, - { - "epoch": 2.136114732724902, - "grad_norm": 1.561306118965149, - "learning_rate": 9.638693467336683e-05, - "loss": 5.8673, - "step": 4096 - }, - { - "epoch": 2.136636245110821, - "grad_norm": 1.6276477575302124, - "learning_rate": 9.638592964824121e-05, - "loss": 5.8063, - "step": 4097 - }, - { - "epoch": 2.1371577574967406, - "grad_norm": 1.4295010566711426, - "learning_rate": 9.638492462311558e-05, - "loss": 6.0659, - "step": 4098 - }, - { - "epoch": 2.1376792698826597, - "grad_norm": 1.5658425092697144, - "learning_rate": 9.638391959798996e-05, - "loss": 5.5665, - "step": 4099 - }, - { - "epoch": 2.1382007822685787, - "grad_norm": 1.420542597770691, - "learning_rate": 9.638291457286433e-05, - "loss": 6.2492, - "step": 4100 - }, - { - "epoch": 2.138722294654498, - "grad_norm": 1.4702461957931519, - "learning_rate": 9.63819095477387e-05, - "loss": 5.9868, - "step": 4101 - }, - { - "epoch": 2.139243807040417, - "grad_norm": 1.813607931137085, - "learning_rate": 9.638090452261307e-05, - "loss": 5.7942, - "step": 4102 - }, - { - "epoch": 2.139765319426336, - "grad_norm": 1.6354705095291138, - "learning_rate": 9.637989949748745e-05, - "loss": 5.7793, - "step": 4103 - }, - { - "epoch": 2.1402868318122557, - "grad_norm": 1.837010145187378, - "learning_rate": 9.637889447236181e-05, - "loss": 5.8024, - "step": 4104 - }, - { - "epoch": 2.1408083441981747, - "grad_norm": 1.3408119678497314, - "learning_rate": 9.637788944723618e-05, - "loss": 5.8835, - "step": 4105 - }, - { - "epoch": 2.1413298565840937, - "grad_norm": 1.3725417852401733, - "learning_rate": 9.637688442211055e-05, - "loss": 6.3886, - "step": 4106 - }, - { - "epoch": 2.141851368970013, - "grad_norm": 1.642970085144043, - "learning_rate": 9.637587939698493e-05, - "loss": 5.8507, - "step": 4107 - }, - { - "epoch": 2.142372881355932, - "grad_norm": 1.8076266050338745, - "learning_rate": 9.637487437185931e-05, - "loss": 5.1372, - "step": 4108 - }, - { - "epoch": 2.142894393741851, - "grad_norm": 1.6173713207244873, - "learning_rate": 9.637386934673367e-05, - "loss": 5.9407, - "step": 4109 - }, - { - "epoch": 2.1434159061277707, - "grad_norm": 1.4310003519058228, - "learning_rate": 9.637286432160805e-05, - "loss": 5.8436, - "step": 4110 - }, - { - "epoch": 2.1439374185136897, - "grad_norm": 1.4432495832443237, - "learning_rate": 9.637185929648242e-05, - "loss": 5.8959, - "step": 4111 - }, - { - "epoch": 2.1444589308996087, - "grad_norm": 1.4538654088974, - "learning_rate": 9.637085427135679e-05, - "loss": 5.8117, - "step": 4112 - }, - { - "epoch": 2.144980443285528, - "grad_norm": 1.5540939569473267, - "learning_rate": 9.636984924623116e-05, - "loss": 5.7308, - "step": 4113 - }, - { - "epoch": 2.145501955671447, - "grad_norm": 1.3418340682983398, - "learning_rate": 9.636884422110554e-05, - "loss": 5.6499, - "step": 4114 - }, - { - "epoch": 2.146023468057366, - "grad_norm": 1.488295555114746, - "learning_rate": 9.63678391959799e-05, - "loss": 5.4413, - "step": 4115 - }, - { - "epoch": 2.1465449804432857, - "grad_norm": 1.5624216794967651, - "learning_rate": 9.636683417085428e-05, - "loss": 5.6711, - "step": 4116 - }, - { - "epoch": 2.1470664928292047, - "grad_norm": 2.7317700386047363, - "learning_rate": 9.636582914572864e-05, - "loss": 4.9592, - "step": 4117 - }, - { - "epoch": 2.1475880052151237, - "grad_norm": 1.4838392734527588, - "learning_rate": 9.636482412060302e-05, - "loss": 5.8295, - "step": 4118 - }, - { - "epoch": 2.148109517601043, - "grad_norm": 1.5660083293914795, - "learning_rate": 9.63638190954774e-05, - "loss": 5.8831, - "step": 4119 - }, - { - "epoch": 2.148631029986962, - "grad_norm": 1.6533023118972778, - "learning_rate": 9.636281407035176e-05, - "loss": 5.7993, - "step": 4120 - }, - { - "epoch": 2.1491525423728812, - "grad_norm": 1.318649411201477, - "learning_rate": 9.636180904522614e-05, - "loss": 5.2913, - "step": 4121 - }, - { - "epoch": 2.1496740547588007, - "grad_norm": 1.4923579692840576, - "learning_rate": 9.63608040201005e-05, - "loss": 5.6565, - "step": 4122 - }, - { - "epoch": 2.1501955671447197, - "grad_norm": 1.674963355064392, - "learning_rate": 9.635979899497488e-05, - "loss": 5.713, - "step": 4123 - }, - { - "epoch": 2.1507170795306387, - "grad_norm": 1.5670394897460938, - "learning_rate": 9.635879396984925e-05, - "loss": 6.189, - "step": 4124 - }, - { - "epoch": 2.151238591916558, - "grad_norm": 1.4565386772155762, - "learning_rate": 9.635778894472362e-05, - "loss": 5.8786, - "step": 4125 - }, - { - "epoch": 2.151760104302477, - "grad_norm": 1.587632179260254, - "learning_rate": 9.635678391959799e-05, - "loss": 5.4922, - "step": 4126 - }, - { - "epoch": 2.1522816166883962, - "grad_norm": 1.6793723106384277, - "learning_rate": 9.635577889447237e-05, - "loss": 5.9624, - "step": 4127 - }, - { - "epoch": 2.1528031290743157, - "grad_norm": 1.5574718713760376, - "learning_rate": 9.635477386934674e-05, - "loss": 5.9757, - "step": 4128 - }, - { - "epoch": 2.1533246414602347, - "grad_norm": 1.498686671257019, - "learning_rate": 9.635376884422112e-05, - "loss": 6.2249, - "step": 4129 - }, - { - "epoch": 2.1538461538461537, - "grad_norm": 1.4474456310272217, - "learning_rate": 9.635276381909549e-05, - "loss": 6.1448, - "step": 4130 - }, - { - "epoch": 2.154367666232073, - "grad_norm": 1.4918137788772583, - "learning_rate": 9.635175879396985e-05, - "loss": 5.8211, - "step": 4131 - }, - { - "epoch": 2.1548891786179922, - "grad_norm": 1.6043626070022583, - "learning_rate": 9.635075376884423e-05, - "loss": 6.2438, - "step": 4132 - }, - { - "epoch": 2.1554106910039112, - "grad_norm": 1.5281509160995483, - "learning_rate": 9.634974874371859e-05, - "loss": 5.9261, - "step": 4133 - }, - { - "epoch": 2.1559322033898307, - "grad_norm": 1.6100736856460571, - "learning_rate": 9.634874371859297e-05, - "loss": 5.8531, - "step": 4134 - }, - { - "epoch": 2.1564537157757497, - "grad_norm": 1.5253342390060425, - "learning_rate": 9.634773869346733e-05, - "loss": 5.9399, - "step": 4135 - }, - { - "epoch": 2.1569752281616688, - "grad_norm": 1.6915301084518433, - "learning_rate": 9.634673366834171e-05, - "loss": 5.7574, - "step": 4136 - }, - { - "epoch": 2.1574967405475878, - "grad_norm": 1.4801241159439087, - "learning_rate": 9.634572864321608e-05, - "loss": 5.8196, - "step": 4137 - }, - { - "epoch": 2.1580182529335072, - "grad_norm": 1.3558902740478516, - "learning_rate": 9.634472361809045e-05, - "loss": 6.2963, - "step": 4138 - }, - { - "epoch": 2.1585397653194263, - "grad_norm": 1.705980658531189, - "learning_rate": 9.634371859296483e-05, - "loss": 5.8074, - "step": 4139 - }, - { - "epoch": 2.1590612777053453, - "grad_norm": 1.4758377075195312, - "learning_rate": 9.634271356783921e-05, - "loss": 5.9884, - "step": 4140 - }, - { - "epoch": 2.1595827900912647, - "grad_norm": 1.509399175643921, - "learning_rate": 9.634170854271357e-05, - "loss": 5.996, - "step": 4141 - }, - { - "epoch": 2.1601043024771838, - "grad_norm": 1.4701310396194458, - "learning_rate": 9.634070351758795e-05, - "loss": 6.2279, - "step": 4142 - }, - { - "epoch": 2.160625814863103, - "grad_norm": 1.4827938079833984, - "learning_rate": 9.633969849246232e-05, - "loss": 5.6108, - "step": 4143 - }, - { - "epoch": 2.1611473272490223, - "grad_norm": 1.4241420030593872, - "learning_rate": 9.633869346733668e-05, - "loss": 6.2766, - "step": 4144 - }, - { - "epoch": 2.1616688396349413, - "grad_norm": 1.5790462493896484, - "learning_rate": 9.633768844221106e-05, - "loss": 5.6752, - "step": 4145 - }, - { - "epoch": 2.1621903520208603, - "grad_norm": 1.5406605005264282, - "learning_rate": 9.633668341708542e-05, - "loss": 5.8458, - "step": 4146 - }, - { - "epoch": 2.1627118644067798, - "grad_norm": 1.4286932945251465, - "learning_rate": 9.63356783919598e-05, - "loss": 5.7916, - "step": 4147 - }, - { - "epoch": 2.163233376792699, - "grad_norm": 1.7935925722122192, - "learning_rate": 9.633467336683418e-05, - "loss": 5.4178, - "step": 4148 - }, - { - "epoch": 2.163754889178618, - "grad_norm": 1.5130184888839722, - "learning_rate": 9.633366834170856e-05, - "loss": 6.0685, - "step": 4149 - }, - { - "epoch": 2.1642764015645373, - "grad_norm": 1.5140042304992676, - "learning_rate": 9.633266331658292e-05, - "loss": 5.8385, - "step": 4150 - }, - { - "epoch": 2.1647979139504563, - "grad_norm": 1.3754451274871826, - "learning_rate": 9.63316582914573e-05, - "loss": 6.1534, - "step": 4151 - }, - { - "epoch": 2.1653194263363753, - "grad_norm": 1.4569108486175537, - "learning_rate": 9.633065326633166e-05, - "loss": 5.857, - "step": 4152 - }, - { - "epoch": 2.1658409387222948, - "grad_norm": 1.430336356163025, - "learning_rate": 9.632964824120604e-05, - "loss": 6.0599, - "step": 4153 - }, - { - "epoch": 2.166362451108214, - "grad_norm": 1.5880956649780273, - "learning_rate": 9.63286432160804e-05, - "loss": 5.6173, - "step": 4154 - }, - { - "epoch": 2.166883963494133, - "grad_norm": 1.4856092929840088, - "learning_rate": 9.632763819095478e-05, - "loss": 5.8909, - "step": 4155 - }, - { - "epoch": 2.1674054758800523, - "grad_norm": 1.473040223121643, - "learning_rate": 9.632663316582915e-05, - "loss": 5.9461, - "step": 4156 - }, - { - "epoch": 2.1679269882659713, - "grad_norm": 1.711106300354004, - "learning_rate": 9.632562814070352e-05, - "loss": 5.6823, - "step": 4157 - }, - { - "epoch": 2.1684485006518903, - "grad_norm": 1.7813776731491089, - "learning_rate": 9.632462311557789e-05, - "loss": 5.4334, - "step": 4158 - }, - { - "epoch": 2.16897001303781, - "grad_norm": 1.412376880645752, - "learning_rate": 9.632361809045227e-05, - "loss": 5.9774, - "step": 4159 - }, - { - "epoch": 2.169491525423729, - "grad_norm": 1.4302338361740112, - "learning_rate": 9.632261306532664e-05, - "loss": 5.8441, - "step": 4160 - }, - { - "epoch": 2.170013037809648, - "grad_norm": 1.491858959197998, - "learning_rate": 9.632160804020101e-05, - "loss": 6.0293, - "step": 4161 - }, - { - "epoch": 2.1705345501955673, - "grad_norm": 1.7326774597167969, - "learning_rate": 9.632060301507539e-05, - "loss": 5.5771, - "step": 4162 - }, - { - "epoch": 2.1710560625814863, - "grad_norm": 1.4889442920684814, - "learning_rate": 9.631959798994975e-05, - "loss": 5.6667, - "step": 4163 - }, - { - "epoch": 2.1715775749674053, - "grad_norm": 1.4325683116912842, - "learning_rate": 9.631859296482413e-05, - "loss": 6.184, - "step": 4164 - }, - { - "epoch": 2.172099087353325, - "grad_norm": 1.5356519222259521, - "learning_rate": 9.631758793969849e-05, - "loss": 5.3529, - "step": 4165 - }, - { - "epoch": 2.172620599739244, - "grad_norm": 1.4379349946975708, - "learning_rate": 9.631658291457287e-05, - "loss": 6.2292, - "step": 4166 - }, - { - "epoch": 2.173142112125163, - "grad_norm": 1.4488633871078491, - "learning_rate": 9.631557788944723e-05, - "loss": 6.2015, - "step": 4167 - }, - { - "epoch": 2.1736636245110823, - "grad_norm": 1.5173065662384033, - "learning_rate": 9.631457286432161e-05, - "loss": 6.22, - "step": 4168 - }, - { - "epoch": 2.1741851368970013, - "grad_norm": 1.4114947319030762, - "learning_rate": 9.631356783919599e-05, - "loss": 6.0998, - "step": 4169 - }, - { - "epoch": 2.1747066492829203, - "grad_norm": 1.4875038862228394, - "learning_rate": 9.631256281407037e-05, - "loss": 5.9362, - "step": 4170 - }, - { - "epoch": 2.17522816166884, - "grad_norm": 1.5078508853912354, - "learning_rate": 9.631155778894473e-05, - "loss": 5.7843, - "step": 4171 - }, - { - "epoch": 2.175749674054759, - "grad_norm": 1.5399279594421387, - "learning_rate": 9.63105527638191e-05, - "loss": 6.0644, - "step": 4172 - }, - { - "epoch": 2.176271186440678, - "grad_norm": 1.6153684854507446, - "learning_rate": 9.630954773869347e-05, - "loss": 5.9724, - "step": 4173 - }, - { - "epoch": 2.1767926988265973, - "grad_norm": 1.5140578746795654, - "learning_rate": 9.630854271356784e-05, - "loss": 5.2287, - "step": 4174 - }, - { - "epoch": 2.1773142112125163, - "grad_norm": 1.5339590311050415, - "learning_rate": 9.630753768844222e-05, - "loss": 5.7597, - "step": 4175 - }, - { - "epoch": 2.1778357235984354, - "grad_norm": 1.5767537355422974, - "learning_rate": 9.630653266331658e-05, - "loss": 5.9243, - "step": 4176 - }, - { - "epoch": 2.178357235984355, - "grad_norm": 1.3952763080596924, - "learning_rate": 9.630552763819096e-05, - "loss": 6.1219, - "step": 4177 - }, - { - "epoch": 2.178878748370274, - "grad_norm": 1.3526992797851562, - "learning_rate": 9.630452261306532e-05, - "loss": 6.0543, - "step": 4178 - }, - { - "epoch": 2.179400260756193, - "grad_norm": 1.4731237888336182, - "learning_rate": 9.63035175879397e-05, - "loss": 5.0264, - "step": 4179 - }, - { - "epoch": 2.1799217731421123, - "grad_norm": 1.5704865455627441, - "learning_rate": 9.630251256281408e-05, - "loss": 6.0201, - "step": 4180 - }, - { - "epoch": 2.1804432855280313, - "grad_norm": 1.5120298862457275, - "learning_rate": 9.630150753768846e-05, - "loss": 5.8322, - "step": 4181 - }, - { - "epoch": 2.1809647979139504, - "grad_norm": 1.4749548435211182, - "learning_rate": 9.630050251256282e-05, - "loss": 5.7964, - "step": 4182 - }, - { - "epoch": 2.18148631029987, - "grad_norm": 1.5404901504516602, - "learning_rate": 9.62994974874372e-05, - "loss": 6.0657, - "step": 4183 - }, - { - "epoch": 2.182007822685789, - "grad_norm": 1.4624485969543457, - "learning_rate": 9.629849246231156e-05, - "loss": 5.8291, - "step": 4184 - }, - { - "epoch": 2.182529335071708, - "grad_norm": 1.398445963859558, - "learning_rate": 9.629748743718593e-05, - "loss": 5.9797, - "step": 4185 - }, - { - "epoch": 2.1830508474576273, - "grad_norm": 1.6162575483322144, - "learning_rate": 9.62964824120603e-05, - "loss": 5.6593, - "step": 4186 - }, - { - "epoch": 2.1835723598435464, - "grad_norm": 1.8853237628936768, - "learning_rate": 9.629547738693467e-05, - "loss": 5.6815, - "step": 4187 - }, - { - "epoch": 2.1840938722294654, - "grad_norm": 1.5584781169891357, - "learning_rate": 9.629447236180905e-05, - "loss": 5.3786, - "step": 4188 - }, - { - "epoch": 2.184615384615385, - "grad_norm": 1.5942392349243164, - "learning_rate": 9.629346733668342e-05, - "loss": 5.5381, - "step": 4189 - }, - { - "epoch": 2.185136897001304, - "grad_norm": 1.6615869998931885, - "learning_rate": 9.62924623115578e-05, - "loss": 5.9774, - "step": 4190 - }, - { - "epoch": 2.185658409387223, - "grad_norm": 1.5335358381271362, - "learning_rate": 9.629145728643217e-05, - "loss": 5.5768, - "step": 4191 - }, - { - "epoch": 2.186179921773142, - "grad_norm": 1.5381380319595337, - "learning_rate": 9.629045226130654e-05, - "loss": 6.1425, - "step": 4192 - }, - { - "epoch": 2.1867014341590614, - "grad_norm": 1.5397645235061646, - "learning_rate": 9.628944723618091e-05, - "loss": 6.2133, - "step": 4193 - }, - { - "epoch": 2.1872229465449804, - "grad_norm": 1.8237745761871338, - "learning_rate": 9.628844221105529e-05, - "loss": 5.9163, - "step": 4194 - }, - { - "epoch": 2.1877444589308994, - "grad_norm": 1.562219262123108, - "learning_rate": 9.628743718592965e-05, - "loss": 5.9384, - "step": 4195 - }, - { - "epoch": 2.188265971316819, - "grad_norm": 1.6798456907272339, - "learning_rate": 9.628643216080403e-05, - "loss": 5.6887, - "step": 4196 - }, - { - "epoch": 2.188787483702738, - "grad_norm": 1.5610941648483276, - "learning_rate": 9.62854271356784e-05, - "loss": 5.8478, - "step": 4197 - }, - { - "epoch": 2.189308996088657, - "grad_norm": 1.6944063901901245, - "learning_rate": 9.628442211055276e-05, - "loss": 5.792, - "step": 4198 - }, - { - "epoch": 2.1898305084745764, - "grad_norm": 1.4909008741378784, - "learning_rate": 9.628341708542714e-05, - "loss": 6.2984, - "step": 4199 - }, - { - "epoch": 2.1903520208604954, - "grad_norm": 1.4918161630630493, - "learning_rate": 9.628241206030151e-05, - "loss": 6.1658, - "step": 4200 - }, - { - "epoch": 2.1908735332464144, - "grad_norm": 1.5655443668365479, - "learning_rate": 9.628140703517589e-05, - "loss": 5.8382, - "step": 4201 - }, - { - "epoch": 2.191395045632334, - "grad_norm": 1.6505444049835205, - "learning_rate": 9.628040201005026e-05, - "loss": 5.6874, - "step": 4202 - }, - { - "epoch": 2.191916558018253, - "grad_norm": 1.5654683113098145, - "learning_rate": 9.627939698492463e-05, - "loss": 6.027, - "step": 4203 - }, - { - "epoch": 2.192438070404172, - "grad_norm": 1.4687938690185547, - "learning_rate": 9.6278391959799e-05, - "loss": 6.0516, - "step": 4204 - }, - { - "epoch": 2.1929595827900914, - "grad_norm": 1.531295657157898, - "learning_rate": 9.627738693467338e-05, - "loss": 6.0373, - "step": 4205 - }, - { - "epoch": 2.1934810951760104, - "grad_norm": 1.4420169591903687, - "learning_rate": 9.627638190954774e-05, - "loss": 6.207, - "step": 4206 - }, - { - "epoch": 2.1940026075619294, - "grad_norm": 1.6116113662719727, - "learning_rate": 9.627537688442212e-05, - "loss": 5.8803, - "step": 4207 - }, - { - "epoch": 2.194524119947849, - "grad_norm": 1.4752699136734009, - "learning_rate": 9.627437185929648e-05, - "loss": 5.9457, - "step": 4208 - }, - { - "epoch": 2.195045632333768, - "grad_norm": 1.4827733039855957, - "learning_rate": 9.627336683417086e-05, - "loss": 6.2149, - "step": 4209 - }, - { - "epoch": 2.195567144719687, - "grad_norm": 1.7995855808258057, - "learning_rate": 9.627236180904524e-05, - "loss": 5.9222, - "step": 4210 - }, - { - "epoch": 2.1960886571056064, - "grad_norm": 1.5062789916992188, - "learning_rate": 9.62713567839196e-05, - "loss": 6.1665, - "step": 4211 - }, - { - "epoch": 2.1966101694915254, - "grad_norm": 1.385849952697754, - "learning_rate": 9.627035175879398e-05, - "loss": 5.8802, - "step": 4212 - }, - { - "epoch": 2.1971316818774445, - "grad_norm": 1.4672178030014038, - "learning_rate": 9.626934673366834e-05, - "loss": 6.105, - "step": 4213 - }, - { - "epoch": 2.197653194263364, - "grad_norm": 1.5439850091934204, - "learning_rate": 9.626834170854272e-05, - "loss": 5.5583, - "step": 4214 - }, - { - "epoch": 2.198174706649283, - "grad_norm": 1.5459290742874146, - "learning_rate": 9.626733668341709e-05, - "loss": 6.0693, - "step": 4215 - }, - { - "epoch": 2.198696219035202, - "grad_norm": 1.5792269706726074, - "learning_rate": 9.626633165829146e-05, - "loss": 5.6666, - "step": 4216 - }, - { - "epoch": 2.1992177314211214, - "grad_norm": 1.4074305295944214, - "learning_rate": 9.626532663316583e-05, - "loss": 6.1492, - "step": 4217 - }, - { - "epoch": 2.1997392438070404, - "grad_norm": 1.566886067390442, - "learning_rate": 9.62643216080402e-05, - "loss": 6.0673, - "step": 4218 - }, - { - "epoch": 2.2002607561929595, - "grad_norm": 1.395219087600708, - "learning_rate": 9.626331658291457e-05, - "loss": 5.7235, - "step": 4219 - }, - { - "epoch": 2.200782268578879, - "grad_norm": 1.5499751567840576, - "learning_rate": 9.626231155778895e-05, - "loss": 5.6857, - "step": 4220 - }, - { - "epoch": 2.201303780964798, - "grad_norm": 1.4168339967727661, - "learning_rate": 9.626130653266333e-05, - "loss": 6.2124, - "step": 4221 - }, - { - "epoch": 2.201825293350717, - "grad_norm": 1.5315395593643188, - "learning_rate": 9.62603015075377e-05, - "loss": 5.4852, - "step": 4222 - }, - { - "epoch": 2.2023468057366364, - "grad_norm": 1.511814832687378, - "learning_rate": 9.625929648241207e-05, - "loss": 6.0523, - "step": 4223 - }, - { - "epoch": 2.2028683181225555, - "grad_norm": 1.5781643390655518, - "learning_rate": 9.625829145728643e-05, - "loss": 5.4806, - "step": 4224 - }, - { - "epoch": 2.2033898305084745, - "grad_norm": 1.5573172569274902, - "learning_rate": 9.625728643216081e-05, - "loss": 5.9037, - "step": 4225 - }, - { - "epoch": 2.203911342894394, - "grad_norm": 1.4940944910049438, - "learning_rate": 9.625628140703517e-05, - "loss": 5.7336, - "step": 4226 - }, - { - "epoch": 2.204432855280313, - "grad_norm": 1.5248242616653442, - "learning_rate": 9.625527638190955e-05, - "loss": 5.8876, - "step": 4227 - }, - { - "epoch": 2.204954367666232, - "grad_norm": 1.4782854318618774, - "learning_rate": 9.625427135678392e-05, - "loss": 5.993, - "step": 4228 - }, - { - "epoch": 2.205475880052151, - "grad_norm": 1.724940538406372, - "learning_rate": 9.62532663316583e-05, - "loss": 5.3682, - "step": 4229 - }, - { - "epoch": 2.2059973924380705, - "grad_norm": 1.558679461479187, - "learning_rate": 9.625226130653266e-05, - "loss": 5.9558, - "step": 4230 - }, - { - "epoch": 2.2065189048239895, - "grad_norm": 1.523055076599121, - "learning_rate": 9.625125628140704e-05, - "loss": 6.1006, - "step": 4231 - }, - { - "epoch": 2.2070404172099085, - "grad_norm": 1.6131079196929932, - "learning_rate": 9.625025125628141e-05, - "loss": 5.4843, - "step": 4232 - }, - { - "epoch": 2.207561929595828, - "grad_norm": 1.5662521123886108, - "learning_rate": 9.624924623115579e-05, - "loss": 5.4823, - "step": 4233 - }, - { - "epoch": 2.208083441981747, - "grad_norm": 1.6755551099777222, - "learning_rate": 9.624824120603016e-05, - "loss": 6.1826, - "step": 4234 - }, - { - "epoch": 2.208604954367666, - "grad_norm": 1.504688024520874, - "learning_rate": 9.624723618090453e-05, - "loss": 6.2576, - "step": 4235 - }, - { - "epoch": 2.2091264667535855, - "grad_norm": 1.5661379098892212, - "learning_rate": 9.62462311557789e-05, - "loss": 5.929, - "step": 4236 - }, - { - "epoch": 2.2096479791395045, - "grad_norm": 1.617279052734375, - "learning_rate": 9.624522613065326e-05, - "loss": 6.0677, - "step": 4237 - }, - { - "epoch": 2.2101694915254235, - "grad_norm": 1.54302978515625, - "learning_rate": 9.624422110552764e-05, - "loss": 6.288, - "step": 4238 - }, - { - "epoch": 2.210691003911343, - "grad_norm": 1.4098293781280518, - "learning_rate": 9.6243216080402e-05, - "loss": 5.7342, - "step": 4239 - }, - { - "epoch": 2.211212516297262, - "grad_norm": 1.533058524131775, - "learning_rate": 9.624221105527638e-05, - "loss": 6.125, - "step": 4240 - }, - { - "epoch": 2.211734028683181, - "grad_norm": 1.825698971748352, - "learning_rate": 9.624120603015076e-05, - "loss": 5.6156, - "step": 4241 - }, - { - "epoch": 2.2122555410691005, - "grad_norm": 1.5709443092346191, - "learning_rate": 9.624020100502514e-05, - "loss": 5.8662, - "step": 4242 - }, - { - "epoch": 2.2127770534550195, - "grad_norm": 1.6004518270492554, - "learning_rate": 9.62391959798995e-05, - "loss": 5.9142, - "step": 4243 - }, - { - "epoch": 2.2132985658409385, - "grad_norm": 1.69973623752594, - "learning_rate": 9.623819095477388e-05, - "loss": 5.9341, - "step": 4244 - }, - { - "epoch": 2.213820078226858, - "grad_norm": 1.8616737127304077, - "learning_rate": 9.623718592964824e-05, - "loss": 5.1441, - "step": 4245 - }, - { - "epoch": 2.214341590612777, - "grad_norm": 1.5581775903701782, - "learning_rate": 9.623618090452262e-05, - "loss": 5.7673, - "step": 4246 - }, - { - "epoch": 2.214863102998696, - "grad_norm": 1.606339454650879, - "learning_rate": 9.623517587939699e-05, - "loss": 6.0073, - "step": 4247 - }, - { - "epoch": 2.2153846153846155, - "grad_norm": 1.6371214389801025, - "learning_rate": 9.623417085427136e-05, - "loss": 5.9918, - "step": 4248 - }, - { - "epoch": 2.2159061277705345, - "grad_norm": 1.6329894065856934, - "learning_rate": 9.623316582914573e-05, - "loss": 5.7093, - "step": 4249 - }, - { - "epoch": 2.2164276401564535, - "grad_norm": 1.369614601135254, - "learning_rate": 9.62321608040201e-05, - "loss": 6.1167, - "step": 4250 - }, - { - "epoch": 2.216949152542373, - "grad_norm": 1.6419516801834106, - "learning_rate": 9.623115577889447e-05, - "loss": 6.0394, - "step": 4251 - }, - { - "epoch": 2.217470664928292, - "grad_norm": 1.577561855316162, - "learning_rate": 9.623015075376885e-05, - "loss": 5.7569, - "step": 4252 - }, - { - "epoch": 2.217992177314211, - "grad_norm": 1.5859085321426392, - "learning_rate": 9.622914572864323e-05, - "loss": 5.5057, - "step": 4253 - }, - { - "epoch": 2.2185136897001305, - "grad_norm": 1.4830831289291382, - "learning_rate": 9.622814070351759e-05, - "loss": 5.5024, - "step": 4254 - }, - { - "epoch": 2.2190352020860495, - "grad_norm": 1.8068671226501465, - "learning_rate": 9.622713567839197e-05, - "loss": 6.0393, - "step": 4255 - }, - { - "epoch": 2.2195567144719686, - "grad_norm": 1.6043729782104492, - "learning_rate": 9.622613065326633e-05, - "loss": 5.5409, - "step": 4256 - }, - { - "epoch": 2.220078226857888, - "grad_norm": 1.5886725187301636, - "learning_rate": 9.622512562814071e-05, - "loss": 5.6257, - "step": 4257 - }, - { - "epoch": 2.220599739243807, - "grad_norm": 1.414002776145935, - "learning_rate": 9.622412060301507e-05, - "loss": 6.2903, - "step": 4258 - }, - { - "epoch": 2.221121251629726, - "grad_norm": 1.5920710563659668, - "learning_rate": 9.622311557788945e-05, - "loss": 5.7007, - "step": 4259 - }, - { - "epoch": 2.2216427640156455, - "grad_norm": 1.6425975561141968, - "learning_rate": 9.622211055276382e-05, - "loss": 6.0939, - "step": 4260 - }, - { - "epoch": 2.2221642764015646, - "grad_norm": 1.7067420482635498, - "learning_rate": 9.62211055276382e-05, - "loss": 5.2148, - "step": 4261 - }, - { - "epoch": 2.2226857887874836, - "grad_norm": 1.5603572130203247, - "learning_rate": 9.622010050251257e-05, - "loss": 5.9083, - "step": 4262 - }, - { - "epoch": 2.223207301173403, - "grad_norm": 1.7151682376861572, - "learning_rate": 9.621909547738695e-05, - "loss": 5.5316, - "step": 4263 - }, - { - "epoch": 2.223728813559322, - "grad_norm": 1.7428044080734253, - "learning_rate": 9.621809045226131e-05, - "loss": 5.6826, - "step": 4264 - }, - { - "epoch": 2.224250325945241, - "grad_norm": 1.6141570806503296, - "learning_rate": 9.621708542713568e-05, - "loss": 5.972, - "step": 4265 - }, - { - "epoch": 2.2247718383311605, - "grad_norm": 1.4812819957733154, - "learning_rate": 9.621608040201006e-05, - "loss": 6.1273, - "step": 4266 - }, - { - "epoch": 2.2252933507170796, - "grad_norm": 1.4849430322647095, - "learning_rate": 9.621507537688442e-05, - "loss": 5.8146, - "step": 4267 - }, - { - "epoch": 2.2258148631029986, - "grad_norm": 1.6830472946166992, - "learning_rate": 9.62140703517588e-05, - "loss": 6.216, - "step": 4268 - }, - { - "epoch": 2.226336375488918, - "grad_norm": 1.4764950275421143, - "learning_rate": 9.621306532663316e-05, - "loss": 5.8217, - "step": 4269 - }, - { - "epoch": 2.226857887874837, - "grad_norm": 1.5452719926834106, - "learning_rate": 9.621206030150754e-05, - "loss": 5.3149, - "step": 4270 - }, - { - "epoch": 2.227379400260756, - "grad_norm": 1.4844051599502563, - "learning_rate": 9.62110552763819e-05, - "loss": 5.7902, - "step": 4271 - }, - { - "epoch": 2.2279009126466756, - "grad_norm": 2.1199533939361572, - "learning_rate": 9.621005025125628e-05, - "loss": 5.6594, - "step": 4272 - }, - { - "epoch": 2.2284224250325946, - "grad_norm": 1.4286423921585083, - "learning_rate": 9.620904522613066e-05, - "loss": 5.6212, - "step": 4273 - }, - { - "epoch": 2.2289439374185136, - "grad_norm": 1.4109450578689575, - "learning_rate": 9.620804020100504e-05, - "loss": 6.1969, - "step": 4274 - }, - { - "epoch": 2.229465449804433, - "grad_norm": 1.4942718744277954, - "learning_rate": 9.62070351758794e-05, - "loss": 6.1276, - "step": 4275 - }, - { - "epoch": 2.229986962190352, - "grad_norm": 1.536807656288147, - "learning_rate": 9.620603015075378e-05, - "loss": 5.9792, - "step": 4276 - }, - { - "epoch": 2.230508474576271, - "grad_norm": 1.5497015714645386, - "learning_rate": 9.620502512562815e-05, - "loss": 5.8802, - "step": 4277 - }, - { - "epoch": 2.2310299869621906, - "grad_norm": 1.4245916604995728, - "learning_rate": 9.620402010050251e-05, - "loss": 5.9729, - "step": 4278 - }, - { - "epoch": 2.2315514993481096, - "grad_norm": 1.392893671989441, - "learning_rate": 9.620301507537689e-05, - "loss": 6.044, - "step": 4279 - }, - { - "epoch": 2.2320730117340286, - "grad_norm": 1.6448756456375122, - "learning_rate": 9.620201005025125e-05, - "loss": 5.9053, - "step": 4280 - }, - { - "epoch": 2.232594524119948, - "grad_norm": 1.5301905870437622, - "learning_rate": 9.620100502512563e-05, - "loss": 5.609, - "step": 4281 - }, - { - "epoch": 2.233116036505867, - "grad_norm": 1.4638302326202393, - "learning_rate": 9.620000000000001e-05, - "loss": 5.8859, - "step": 4282 - }, - { - "epoch": 2.233637548891786, - "grad_norm": 1.5584371089935303, - "learning_rate": 9.619899497487439e-05, - "loss": 5.6482, - "step": 4283 - }, - { - "epoch": 2.234159061277705, - "grad_norm": 1.4065990447998047, - "learning_rate": 9.619798994974875e-05, - "loss": 6.1765, - "step": 4284 - }, - { - "epoch": 2.2346805736636246, - "grad_norm": 1.478596568107605, - "learning_rate": 9.619698492462313e-05, - "loss": 6.3193, - "step": 4285 - }, - { - "epoch": 2.2352020860495436, - "grad_norm": 1.3523467779159546, - "learning_rate": 9.619597989949749e-05, - "loss": 6.1597, - "step": 4286 - }, - { - "epoch": 2.2357235984354626, - "grad_norm": 1.549220323562622, - "learning_rate": 9.619497487437187e-05, - "loss": 5.9618, - "step": 4287 - }, - { - "epoch": 2.236245110821382, - "grad_norm": 1.5357836484909058, - "learning_rate": 9.619396984924623e-05, - "loss": 5.9269, - "step": 4288 - }, - { - "epoch": 2.236766623207301, - "grad_norm": 1.3849581480026245, - "learning_rate": 9.619296482412061e-05, - "loss": 6.1708, - "step": 4289 - }, - { - "epoch": 2.23728813559322, - "grad_norm": 1.5183906555175781, - "learning_rate": 9.619195979899498e-05, - "loss": 5.8385, - "step": 4290 - }, - { - "epoch": 2.2378096479791396, - "grad_norm": 1.7050729990005493, - "learning_rate": 9.619095477386934e-05, - "loss": 5.7082, - "step": 4291 - }, - { - "epoch": 2.2383311603650586, - "grad_norm": 1.8123122453689575, - "learning_rate": 9.618994974874372e-05, - "loss": 5.8841, - "step": 4292 - }, - { - "epoch": 2.2388526727509777, - "grad_norm": 1.5362670421600342, - "learning_rate": 9.61889447236181e-05, - "loss": 6.1838, - "step": 4293 - }, - { - "epoch": 2.239374185136897, - "grad_norm": 1.494167685508728, - "learning_rate": 9.618793969849247e-05, - "loss": 5.8561, - "step": 4294 - }, - { - "epoch": 2.239895697522816, - "grad_norm": 1.6800209283828735, - "learning_rate": 9.618693467336684e-05, - "loss": 5.9858, - "step": 4295 - }, - { - "epoch": 2.240417209908735, - "grad_norm": 1.3043183088302612, - "learning_rate": 9.618592964824122e-05, - "loss": 6.1153, - "step": 4296 - }, - { - "epoch": 2.2409387222946546, - "grad_norm": 1.5613501071929932, - "learning_rate": 9.618492462311558e-05, - "loss": 5.8189, - "step": 4297 - }, - { - "epoch": 2.2414602346805736, - "grad_norm": 1.4424045085906982, - "learning_rate": 9.618391959798996e-05, - "loss": 6.1198, - "step": 4298 - }, - { - "epoch": 2.2419817470664927, - "grad_norm": 1.499439001083374, - "learning_rate": 9.618291457286432e-05, - "loss": 5.7946, - "step": 4299 - }, - { - "epoch": 2.242503259452412, - "grad_norm": 1.6948657035827637, - "learning_rate": 9.61819095477387e-05, - "loss": 6.1627, - "step": 4300 - }, - { - "epoch": 2.243024771838331, - "grad_norm": 1.5387322902679443, - "learning_rate": 9.618090452261306e-05, - "loss": 6.0627, - "step": 4301 - }, - { - "epoch": 2.24354628422425, - "grad_norm": 1.4642891883850098, - "learning_rate": 9.617989949748744e-05, - "loss": 6.2121, - "step": 4302 - }, - { - "epoch": 2.2440677966101696, - "grad_norm": 1.7481859922409058, - "learning_rate": 9.617889447236182e-05, - "loss": 5.4606, - "step": 4303 - }, - { - "epoch": 2.2445893089960887, - "grad_norm": 1.5753322839736938, - "learning_rate": 9.617788944723618e-05, - "loss": 6.0436, - "step": 4304 - }, - { - "epoch": 2.2451108213820077, - "grad_norm": 1.5423458814620972, - "learning_rate": 9.617688442211056e-05, - "loss": 6.1791, - "step": 4305 - }, - { - "epoch": 2.245632333767927, - "grad_norm": 1.5293922424316406, - "learning_rate": 9.617587939698493e-05, - "loss": 6.0013, - "step": 4306 - }, - { - "epoch": 2.246153846153846, - "grad_norm": 1.3917522430419922, - "learning_rate": 9.61748743718593e-05, - "loss": 5.7268, - "step": 4307 - }, - { - "epoch": 2.246675358539765, - "grad_norm": 1.5378646850585938, - "learning_rate": 9.617386934673367e-05, - "loss": 5.9108, - "step": 4308 - }, - { - "epoch": 2.2471968709256847, - "grad_norm": 1.5497835874557495, - "learning_rate": 9.617286432160805e-05, - "loss": 6.2001, - "step": 4309 - }, - { - "epoch": 2.2477183833116037, - "grad_norm": 1.397845983505249, - "learning_rate": 9.617185929648241e-05, - "loss": 6.1975, - "step": 4310 - }, - { - "epoch": 2.2482398956975227, - "grad_norm": 1.3602240085601807, - "learning_rate": 9.617085427135679e-05, - "loss": 5.8275, - "step": 4311 - }, - { - "epoch": 2.248761408083442, - "grad_norm": 1.4670957326889038, - "learning_rate": 9.616984924623115e-05, - "loss": 5.7987, - "step": 4312 - }, - { - "epoch": 2.249282920469361, - "grad_norm": 1.549096703529358, - "learning_rate": 9.616884422110553e-05, - "loss": 5.7638, - "step": 4313 - }, - { - "epoch": 2.24980443285528, - "grad_norm": 1.3964394330978394, - "learning_rate": 9.616783919597991e-05, - "loss": 6.2001, - "step": 4314 - }, - { - "epoch": 2.2503259452411997, - "grad_norm": 1.4519095420837402, - "learning_rate": 9.616683417085429e-05, - "loss": 5.9808, - "step": 4315 - }, - { - "epoch": 2.2508474576271187, - "grad_norm": 1.6788465976715088, - "learning_rate": 9.616582914572865e-05, - "loss": 5.3371, - "step": 4316 - }, - { - "epoch": 2.2513689700130377, - "grad_norm": 1.4285285472869873, - "learning_rate": 9.616482412060301e-05, - "loss": 5.7738, - "step": 4317 - }, - { - "epoch": 2.2518904823989567, - "grad_norm": 1.3309590816497803, - "learning_rate": 9.616381909547739e-05, - "loss": 6.2485, - "step": 4318 - }, - { - "epoch": 2.252411994784876, - "grad_norm": 1.6147722005844116, - "learning_rate": 9.616281407035176e-05, - "loss": 5.5363, - "step": 4319 - }, - { - "epoch": 2.252933507170795, - "grad_norm": 1.5292545557022095, - "learning_rate": 9.616180904522613e-05, - "loss": 5.9387, - "step": 4320 - }, - { - "epoch": 2.2534550195567142, - "grad_norm": 1.735137939453125, - "learning_rate": 9.61608040201005e-05, - "loss": 5.7481, - "step": 4321 - }, - { - "epoch": 2.2539765319426337, - "grad_norm": 1.6055974960327148, - "learning_rate": 9.615979899497488e-05, - "loss": 5.6314, - "step": 4322 - }, - { - "epoch": 2.2544980443285527, - "grad_norm": 1.7140007019042969, - "learning_rate": 9.615879396984925e-05, - "loss": 5.6946, - "step": 4323 - }, - { - "epoch": 2.2550195567144717, - "grad_norm": 1.3877720832824707, - "learning_rate": 9.615778894472363e-05, - "loss": 6.141, - "step": 4324 - }, - { - "epoch": 2.255541069100391, - "grad_norm": 1.5285241603851318, - "learning_rate": 9.6156783919598e-05, - "loss": 5.9295, - "step": 4325 - }, - { - "epoch": 2.2560625814863102, - "grad_norm": 1.7423820495605469, - "learning_rate": 9.615577889447237e-05, - "loss": 5.8851, - "step": 4326 - }, - { - "epoch": 2.2565840938722292, - "grad_norm": 1.3713252544403076, - "learning_rate": 9.615477386934674e-05, - "loss": 5.5109, - "step": 4327 - }, - { - "epoch": 2.2571056062581487, - "grad_norm": 1.3748435974121094, - "learning_rate": 9.615376884422112e-05, - "loss": 6.1063, - "step": 4328 - }, - { - "epoch": 2.2576271186440677, - "grad_norm": 1.3678829669952393, - "learning_rate": 9.615276381909548e-05, - "loss": 5.9995, - "step": 4329 - }, - { - "epoch": 2.2581486310299868, - "grad_norm": 1.4961903095245361, - "learning_rate": 9.615175879396984e-05, - "loss": 5.7581, - "step": 4330 - }, - { - "epoch": 2.258670143415906, - "grad_norm": 1.9454693794250488, - "learning_rate": 9.615075376884422e-05, - "loss": 4.9097, - "step": 4331 - }, - { - "epoch": 2.2591916558018252, - "grad_norm": 1.4550645351409912, - "learning_rate": 9.614974874371859e-05, - "loss": 6.2048, - "step": 4332 - }, - { - "epoch": 2.2597131681877443, - "grad_norm": 1.4141511917114258, - "learning_rate": 9.614874371859296e-05, - "loss": 5.699, - "step": 4333 - }, - { - "epoch": 2.2602346805736637, - "grad_norm": 1.5122029781341553, - "learning_rate": 9.614773869346734e-05, - "loss": 5.9592, - "step": 4334 - }, - { - "epoch": 2.2607561929595827, - "grad_norm": 1.4960200786590576, - "learning_rate": 9.614673366834172e-05, - "loss": 5.8384, - "step": 4335 - }, - { - "epoch": 2.2612777053455018, - "grad_norm": 1.6694279909133911, - "learning_rate": 9.614572864321608e-05, - "loss": 5.6613, - "step": 4336 - }, - { - "epoch": 2.2617992177314212, - "grad_norm": 1.4061635732650757, - "learning_rate": 9.614472361809046e-05, - "loss": 6.1611, - "step": 4337 - }, - { - "epoch": 2.2623207301173403, - "grad_norm": 1.5312711000442505, - "learning_rate": 9.614371859296483e-05, - "loss": 5.5652, - "step": 4338 - }, - { - "epoch": 2.2628422425032593, - "grad_norm": 1.5969191789627075, - "learning_rate": 9.61427135678392e-05, - "loss": 5.5144, - "step": 4339 - }, - { - "epoch": 2.2633637548891787, - "grad_norm": 1.5830482244491577, - "learning_rate": 9.614170854271357e-05, - "loss": 6.021, - "step": 4340 - }, - { - "epoch": 2.2638852672750978, - "grad_norm": 1.474297046661377, - "learning_rate": 9.614070351758795e-05, - "loss": 5.7039, - "step": 4341 - }, - { - "epoch": 2.2644067796610168, - "grad_norm": 1.695778250694275, - "learning_rate": 9.613969849246231e-05, - "loss": 5.4556, - "step": 4342 - }, - { - "epoch": 2.2649282920469362, - "grad_norm": 1.3813631534576416, - "learning_rate": 9.613869346733669e-05, - "loss": 6.0123, - "step": 4343 - }, - { - "epoch": 2.2654498044328553, - "grad_norm": 1.7414000034332275, - "learning_rate": 9.613768844221107e-05, - "loss": 5.8064, - "step": 4344 - }, - { - "epoch": 2.2659713168187743, - "grad_norm": 1.7448735237121582, - "learning_rate": 9.613668341708543e-05, - "loss": 5.5019, - "step": 4345 - }, - { - "epoch": 2.2664928292046937, - "grad_norm": 1.7771391868591309, - "learning_rate": 9.613567839195981e-05, - "loss": 5.6437, - "step": 4346 - }, - { - "epoch": 2.2670143415906128, - "grad_norm": 1.4635871648788452, - "learning_rate": 9.613467336683417e-05, - "loss": 5.8773, - "step": 4347 - }, - { - "epoch": 2.267535853976532, - "grad_norm": 1.4158730506896973, - "learning_rate": 9.613366834170855e-05, - "loss": 6.1698, - "step": 4348 - }, - { - "epoch": 2.2680573663624513, - "grad_norm": 1.696439504623413, - "learning_rate": 9.613266331658292e-05, - "loss": 5.4449, - "step": 4349 - }, - { - "epoch": 2.2685788787483703, - "grad_norm": 1.5143623352050781, - "learning_rate": 9.613165829145729e-05, - "loss": 5.5649, - "step": 4350 - }, - { - "epoch": 2.2691003911342893, - "grad_norm": 1.6823991537094116, - "learning_rate": 9.613065326633166e-05, - "loss": 5.7486, - "step": 4351 - }, - { - "epoch": 2.2696219035202088, - "grad_norm": 1.483682632446289, - "learning_rate": 9.612964824120604e-05, - "loss": 5.7702, - "step": 4352 - }, - { - "epoch": 2.270143415906128, - "grad_norm": 1.4481534957885742, - "learning_rate": 9.61286432160804e-05, - "loss": 6.1517, - "step": 4353 - }, - { - "epoch": 2.270664928292047, - "grad_norm": 1.407548189163208, - "learning_rate": 9.612763819095478e-05, - "loss": 5.9806, - "step": 4354 - }, - { - "epoch": 2.2711864406779663, - "grad_norm": 1.3390120267868042, - "learning_rate": 9.612663316582916e-05, - "loss": 5.8685, - "step": 4355 - }, - { - "epoch": 2.2717079530638853, - "grad_norm": 1.4609589576721191, - "learning_rate": 9.612562814070353e-05, - "loss": 5.9152, - "step": 4356 - }, - { - "epoch": 2.2722294654498043, - "grad_norm": 1.6170445680618286, - "learning_rate": 9.61246231155779e-05, - "loss": 5.884, - "step": 4357 - }, - { - "epoch": 2.2727509778357238, - "grad_norm": 1.502043604850769, - "learning_rate": 9.612361809045226e-05, - "loss": 5.8997, - "step": 4358 - }, - { - "epoch": 2.273272490221643, - "grad_norm": 1.4751697778701782, - "learning_rate": 9.612261306532664e-05, - "loss": 5.9774, - "step": 4359 - }, - { - "epoch": 2.273794002607562, - "grad_norm": 1.401495337486267, - "learning_rate": 9.6121608040201e-05, - "loss": 6.1311, - "step": 4360 - }, - { - "epoch": 2.2743155149934813, - "grad_norm": 1.4665179252624512, - "learning_rate": 9.612060301507538e-05, - "loss": 5.8177, - "step": 4361 - }, - { - "epoch": 2.2748370273794003, - "grad_norm": 1.6854355335235596, - "learning_rate": 9.611959798994975e-05, - "loss": 5.8978, - "step": 4362 - }, - { - "epoch": 2.2753585397653193, - "grad_norm": 1.5078668594360352, - "learning_rate": 9.611859296482412e-05, - "loss": 6.052, - "step": 4363 - }, - { - "epoch": 2.275880052151239, - "grad_norm": 1.5174840688705444, - "learning_rate": 9.61175879396985e-05, - "loss": 5.8474, - "step": 4364 - }, - { - "epoch": 2.276401564537158, - "grad_norm": 1.5173968076705933, - "learning_rate": 9.611658291457288e-05, - "loss": 5.7233, - "step": 4365 - }, - { - "epoch": 2.276923076923077, - "grad_norm": 1.9840943813323975, - "learning_rate": 9.611557788944724e-05, - "loss": 5.6917, - "step": 4366 - }, - { - "epoch": 2.2774445893089963, - "grad_norm": 1.3654333353042603, - "learning_rate": 9.611457286432162e-05, - "loss": 6.3043, - "step": 4367 - }, - { - "epoch": 2.2779661016949153, - "grad_norm": 1.4341760873794556, - "learning_rate": 9.611356783919599e-05, - "loss": 6.2549, - "step": 4368 - }, - { - "epoch": 2.2784876140808343, - "grad_norm": 1.5898112058639526, - "learning_rate": 9.611256281407036e-05, - "loss": 5.7252, - "step": 4369 - }, - { - "epoch": 2.279009126466754, - "grad_norm": 1.5204517841339111, - "learning_rate": 9.611155778894473e-05, - "loss": 5.709, - "step": 4370 - }, - { - "epoch": 2.279530638852673, - "grad_norm": 1.7262276411056519, - "learning_rate": 9.611055276381909e-05, - "loss": 6.2853, - "step": 4371 - }, - { - "epoch": 2.280052151238592, - "grad_norm": 1.6239138841629028, - "learning_rate": 9.610954773869347e-05, - "loss": 6.1104, - "step": 4372 - }, - { - "epoch": 2.2805736636245113, - "grad_norm": 1.4525542259216309, - "learning_rate": 9.610854271356783e-05, - "loss": 5.9999, - "step": 4373 - }, - { - "epoch": 2.2810951760104303, - "grad_norm": 1.5398870706558228, - "learning_rate": 9.610753768844221e-05, - "loss": 5.8398, - "step": 4374 - }, - { - "epoch": 2.2816166883963493, - "grad_norm": 1.5358282327651978, - "learning_rate": 9.610653266331659e-05, - "loss": 5.3313, - "step": 4375 - }, - { - "epoch": 2.282138200782269, - "grad_norm": 1.573818325996399, - "learning_rate": 9.610552763819097e-05, - "loss": 5.7179, - "step": 4376 - }, - { - "epoch": 2.282659713168188, - "grad_norm": 1.6951656341552734, - "learning_rate": 9.610452261306533e-05, - "loss": 5.0575, - "step": 4377 - }, - { - "epoch": 2.283181225554107, - "grad_norm": 1.4231294393539429, - "learning_rate": 9.610351758793971e-05, - "loss": 5.9981, - "step": 4378 - }, - { - "epoch": 2.2837027379400263, - "grad_norm": 1.4839390516281128, - "learning_rate": 9.610251256281407e-05, - "loss": 5.2833, - "step": 4379 - }, - { - "epoch": 2.2842242503259453, - "grad_norm": 1.5798044204711914, - "learning_rate": 9.610150753768845e-05, - "loss": 5.6201, - "step": 4380 - }, - { - "epoch": 2.2847457627118644, - "grad_norm": 1.5019915103912354, - "learning_rate": 9.610050251256282e-05, - "loss": 5.5449, - "step": 4381 - }, - { - "epoch": 2.2852672750977834, - "grad_norm": 1.5175471305847168, - "learning_rate": 9.60994974874372e-05, - "loss": 6.1084, - "step": 4382 - }, - { - "epoch": 2.285788787483703, - "grad_norm": 1.4830224514007568, - "learning_rate": 9.609849246231156e-05, - "loss": 6.0644, - "step": 4383 - }, - { - "epoch": 2.286310299869622, - "grad_norm": 1.5157499313354492, - "learning_rate": 9.609748743718592e-05, - "loss": 6.0146, - "step": 4384 - }, - { - "epoch": 2.286831812255541, - "grad_norm": 1.4522380828857422, - "learning_rate": 9.60964824120603e-05, - "loss": 6.2253, - "step": 4385 - }, - { - "epoch": 2.2873533246414604, - "grad_norm": 1.4442965984344482, - "learning_rate": 9.609547738693468e-05, - "loss": 5.7933, - "step": 4386 - }, - { - "epoch": 2.2878748370273794, - "grad_norm": 1.7577446699142456, - "learning_rate": 9.609447236180906e-05, - "loss": 5.9946, - "step": 4387 - }, - { - "epoch": 2.2883963494132984, - "grad_norm": 1.5554211139678955, - "learning_rate": 9.609346733668342e-05, - "loss": 5.7703, - "step": 4388 - }, - { - "epoch": 2.288917861799218, - "grad_norm": 1.5655378103256226, - "learning_rate": 9.60924623115578e-05, - "loss": 5.6236, - "step": 4389 - }, - { - "epoch": 2.289439374185137, - "grad_norm": 1.6425355672836304, - "learning_rate": 9.609145728643216e-05, - "loss": 5.6981, - "step": 4390 - }, - { - "epoch": 2.289960886571056, - "grad_norm": 2.027571201324463, - "learning_rate": 9.609045226130654e-05, - "loss": 4.8238, - "step": 4391 - }, - { - "epoch": 2.2904823989569754, - "grad_norm": 1.6873457431793213, - "learning_rate": 9.60894472361809e-05, - "loss": 5.9091, - "step": 4392 - }, - { - "epoch": 2.2910039113428944, - "grad_norm": 1.572218894958496, - "learning_rate": 9.608844221105528e-05, - "loss": 5.819, - "step": 4393 - }, - { - "epoch": 2.2915254237288134, - "grad_norm": 1.587289571762085, - "learning_rate": 9.608743718592965e-05, - "loss": 5.7624, - "step": 4394 - }, - { - "epoch": 2.292046936114733, - "grad_norm": 1.6663979291915894, - "learning_rate": 9.608643216080402e-05, - "loss": 5.7209, - "step": 4395 - }, - { - "epoch": 2.292568448500652, - "grad_norm": 1.6945639848709106, - "learning_rate": 9.60854271356784e-05, - "loss": 5.5485, - "step": 4396 - }, - { - "epoch": 2.293089960886571, - "grad_norm": 1.6336361169815063, - "learning_rate": 9.608442211055277e-05, - "loss": 6.0035, - "step": 4397 - }, - { - "epoch": 2.2936114732724904, - "grad_norm": 1.7970013618469238, - "learning_rate": 9.608341708542714e-05, - "loss": 6.0355, - "step": 4398 - }, - { - "epoch": 2.2941329856584094, - "grad_norm": 1.6688491106033325, - "learning_rate": 9.608241206030151e-05, - "loss": 5.8533, - "step": 4399 - }, - { - "epoch": 2.2946544980443284, - "grad_norm": 2.2948222160339355, - "learning_rate": 9.608140703517589e-05, - "loss": 5.6316, - "step": 4400 - }, - { - "epoch": 2.295176010430248, - "grad_norm": 1.535705804824829, - "learning_rate": 9.608040201005025e-05, - "loss": 6.1406, - "step": 4401 - }, - { - "epoch": 2.295697522816167, - "grad_norm": 1.7565983533859253, - "learning_rate": 9.607939698492463e-05, - "loss": 5.6555, - "step": 4402 - }, - { - "epoch": 2.296219035202086, - "grad_norm": 1.6478540897369385, - "learning_rate": 9.607839195979899e-05, - "loss": 5.8054, - "step": 4403 - }, - { - "epoch": 2.2967405475880054, - "grad_norm": 1.502612829208374, - "learning_rate": 9.607738693467337e-05, - "loss": 5.9685, - "step": 4404 - }, - { - "epoch": 2.2972620599739244, - "grad_norm": 1.5802887678146362, - "learning_rate": 9.607638190954773e-05, - "loss": 6.1117, - "step": 4405 - }, - { - "epoch": 2.2977835723598434, - "grad_norm": 1.6156667470932007, - "learning_rate": 9.607537688442211e-05, - "loss": 5.6882, - "step": 4406 - }, - { - "epoch": 2.298305084745763, - "grad_norm": 1.5162800550460815, - "learning_rate": 9.607437185929649e-05, - "loss": 5.8482, - "step": 4407 - }, - { - "epoch": 2.298826597131682, - "grad_norm": 1.5501666069030762, - "learning_rate": 9.607336683417087e-05, - "loss": 6.0215, - "step": 4408 - }, - { - "epoch": 2.299348109517601, - "grad_norm": 1.4644646644592285, - "learning_rate": 9.607236180904523e-05, - "loss": 6.3952, - "step": 4409 - }, - { - "epoch": 2.2998696219035204, - "grad_norm": 1.480393886566162, - "learning_rate": 9.60713567839196e-05, - "loss": 5.9113, - "step": 4410 - }, - { - "epoch": 2.3003911342894394, - "grad_norm": 1.51812744140625, - "learning_rate": 9.607035175879397e-05, - "loss": 5.7259, - "step": 4411 - }, - { - "epoch": 2.3009126466753584, - "grad_norm": 1.4744949340820312, - "learning_rate": 9.606934673366834e-05, - "loss": 6.2802, - "step": 4412 - }, - { - "epoch": 2.3014341590612775, - "grad_norm": 1.4951553344726562, - "learning_rate": 9.606834170854272e-05, - "loss": 5.7906, - "step": 4413 - }, - { - "epoch": 2.301955671447197, - "grad_norm": 1.4387564659118652, - "learning_rate": 9.606733668341708e-05, - "loss": 6.2377, - "step": 4414 - }, - { - "epoch": 2.302477183833116, - "grad_norm": 1.440932273864746, - "learning_rate": 9.606633165829146e-05, - "loss": 5.518, - "step": 4415 - }, - { - "epoch": 2.302998696219035, - "grad_norm": 1.5527325868606567, - "learning_rate": 9.606532663316584e-05, - "loss": 5.9647, - "step": 4416 - }, - { - "epoch": 2.3035202086049544, - "grad_norm": 1.4493411779403687, - "learning_rate": 9.606432160804021e-05, - "loss": 6.1405, - "step": 4417 - }, - { - "epoch": 2.3040417209908735, - "grad_norm": 1.4464961290359497, - "learning_rate": 9.606331658291458e-05, - "loss": 5.8679, - "step": 4418 - }, - { - "epoch": 2.3045632333767925, - "grad_norm": 1.521118402481079, - "learning_rate": 9.606231155778896e-05, - "loss": 5.7272, - "step": 4419 - }, - { - "epoch": 2.305084745762712, - "grad_norm": 1.5399384498596191, - "learning_rate": 9.606130653266332e-05, - "loss": 6.2709, - "step": 4420 - }, - { - "epoch": 2.305606258148631, - "grad_norm": 1.576090693473816, - "learning_rate": 9.60603015075377e-05, - "loss": 5.8097, - "step": 4421 - }, - { - "epoch": 2.30612777053455, - "grad_norm": 1.4556787014007568, - "learning_rate": 9.605929648241206e-05, - "loss": 6.0951, - "step": 4422 - }, - { - "epoch": 2.3066492829204694, - "grad_norm": 1.5314087867736816, - "learning_rate": 9.605829145728644e-05, - "loss": 5.9041, - "step": 4423 - }, - { - "epoch": 2.3071707953063885, - "grad_norm": 1.5983115434646606, - "learning_rate": 9.60572864321608e-05, - "loss": 5.8668, - "step": 4424 - }, - { - "epoch": 2.3076923076923075, - "grad_norm": 1.3969697952270508, - "learning_rate": 9.605628140703517e-05, - "loss": 5.8842, - "step": 4425 - }, - { - "epoch": 2.308213820078227, - "grad_norm": 1.4701638221740723, - "learning_rate": 9.605527638190955e-05, - "loss": 5.8686, - "step": 4426 - }, - { - "epoch": 2.308735332464146, - "grad_norm": 1.4844608306884766, - "learning_rate": 9.605427135678392e-05, - "loss": 6.0574, - "step": 4427 - }, - { - "epoch": 2.309256844850065, - "grad_norm": 1.4334423542022705, - "learning_rate": 9.60532663316583e-05, - "loss": 6.0693, - "step": 4428 - }, - { - "epoch": 2.3097783572359845, - "grad_norm": 1.4853509664535522, - "learning_rate": 9.605226130653267e-05, - "loss": 5.8437, - "step": 4429 - }, - { - "epoch": 2.3102998696219035, - "grad_norm": 1.6010940074920654, - "learning_rate": 9.605125628140704e-05, - "loss": 5.6738, - "step": 4430 - }, - { - "epoch": 2.3108213820078225, - "grad_norm": 1.3954604864120483, - "learning_rate": 9.605025125628141e-05, - "loss": 6.2042, - "step": 4431 - }, - { - "epoch": 2.311342894393742, - "grad_norm": 1.596882939338684, - "learning_rate": 9.604924623115579e-05, - "loss": 5.6366, - "step": 4432 - }, - { - "epoch": 2.311864406779661, - "grad_norm": 1.5305641889572144, - "learning_rate": 9.604824120603015e-05, - "loss": 5.9767, - "step": 4433 - }, - { - "epoch": 2.31238591916558, - "grad_norm": 1.6144206523895264, - "learning_rate": 9.604723618090453e-05, - "loss": 5.8965, - "step": 4434 - }, - { - "epoch": 2.3129074315514995, - "grad_norm": 1.5516972541809082, - "learning_rate": 9.60462311557789e-05, - "loss": 5.8671, - "step": 4435 - }, - { - "epoch": 2.3134289439374185, - "grad_norm": 1.5360769033432007, - "learning_rate": 9.604522613065327e-05, - "loss": 5.6933, - "step": 4436 - }, - { - "epoch": 2.3139504563233375, - "grad_norm": 1.4740028381347656, - "learning_rate": 9.604422110552765e-05, - "loss": 6.2965, - "step": 4437 - }, - { - "epoch": 2.314471968709257, - "grad_norm": 1.4330319166183472, - "learning_rate": 9.604321608040201e-05, - "loss": 5.9253, - "step": 4438 - }, - { - "epoch": 2.314993481095176, - "grad_norm": 1.765086054801941, - "learning_rate": 9.604221105527639e-05, - "loss": 5.5736, - "step": 4439 - }, - { - "epoch": 2.315514993481095, - "grad_norm": 1.4596048593521118, - "learning_rate": 9.604120603015076e-05, - "loss": 5.88, - "step": 4440 - }, - { - "epoch": 2.3160365058670145, - "grad_norm": 1.415076494216919, - "learning_rate": 9.604020100502513e-05, - "loss": 6.061, - "step": 4441 - }, - { - "epoch": 2.3165580182529335, - "grad_norm": 1.5891660451889038, - "learning_rate": 9.60391959798995e-05, - "loss": 6.1773, - "step": 4442 - }, - { - "epoch": 2.3170795306388525, - "grad_norm": 1.5754941701889038, - "learning_rate": 9.603819095477388e-05, - "loss": 5.922, - "step": 4443 - }, - { - "epoch": 2.317601043024772, - "grad_norm": 1.4644241333007812, - "learning_rate": 9.603718592964824e-05, - "loss": 5.897, - "step": 4444 - }, - { - "epoch": 2.318122555410691, - "grad_norm": 1.592297077178955, - "learning_rate": 9.603618090452262e-05, - "loss": 5.7542, - "step": 4445 - }, - { - "epoch": 2.31864406779661, - "grad_norm": 1.6834914684295654, - "learning_rate": 9.603517587939698e-05, - "loss": 5.8045, - "step": 4446 - }, - { - "epoch": 2.3191655801825295, - "grad_norm": 1.720765233039856, - "learning_rate": 9.603417085427136e-05, - "loss": 5.8312, - "step": 4447 - }, - { - "epoch": 2.3196870925684485, - "grad_norm": 1.7956619262695312, - "learning_rate": 9.603316582914574e-05, - "loss": 5.4393, - "step": 4448 - }, - { - "epoch": 2.3202086049543675, - "grad_norm": 1.8300572633743286, - "learning_rate": 9.603216080402012e-05, - "loss": 5.7789, - "step": 4449 - }, - { - "epoch": 2.320730117340287, - "grad_norm": 1.712662935256958, - "learning_rate": 9.603115577889448e-05, - "loss": 5.8468, - "step": 4450 - }, - { - "epoch": 2.321251629726206, - "grad_norm": 1.781042218208313, - "learning_rate": 9.603015075376884e-05, - "loss": 5.2116, - "step": 4451 - }, - { - "epoch": 2.321773142112125, - "grad_norm": 1.5941672325134277, - "learning_rate": 9.602914572864322e-05, - "loss": 5.477, - "step": 4452 - }, - { - "epoch": 2.3222946544980445, - "grad_norm": 2.090144395828247, - "learning_rate": 9.602814070351759e-05, - "loss": 4.8318, - "step": 4453 - }, - { - "epoch": 2.3228161668839635, - "grad_norm": 1.7761808633804321, - "learning_rate": 9.602713567839196e-05, - "loss": 5.8825, - "step": 4454 - }, - { - "epoch": 2.3233376792698825, - "grad_norm": 1.5097930431365967, - "learning_rate": 9.602613065326633e-05, - "loss": 5.6314, - "step": 4455 - }, - { - "epoch": 2.323859191655802, - "grad_norm": 1.5698878765106201, - "learning_rate": 9.60251256281407e-05, - "loss": 5.8268, - "step": 4456 - }, - { - "epoch": 2.324380704041721, - "grad_norm": 1.5804098844528198, - "learning_rate": 9.602412060301508e-05, - "loss": 6.1351, - "step": 4457 - }, - { - "epoch": 2.32490221642764, - "grad_norm": 2.0465869903564453, - "learning_rate": 9.602311557788946e-05, - "loss": 5.3564, - "step": 4458 - }, - { - "epoch": 2.3254237288135595, - "grad_norm": 1.5301759243011475, - "learning_rate": 9.602211055276383e-05, - "loss": 5.8345, - "step": 4459 - }, - { - "epoch": 2.3259452411994785, - "grad_norm": 1.6353880167007446, - "learning_rate": 9.60211055276382e-05, - "loss": 5.7697, - "step": 4460 - }, - { - "epoch": 2.3264667535853976, - "grad_norm": 1.5556309223175049, - "learning_rate": 9.602010050251257e-05, - "loss": 5.9247, - "step": 4461 - }, - { - "epoch": 2.326988265971317, - "grad_norm": 1.3763997554779053, - "learning_rate": 9.601909547738695e-05, - "loss": 6.1411, - "step": 4462 - }, - { - "epoch": 2.327509778357236, - "grad_norm": 1.5687496662139893, - "learning_rate": 9.601809045226131e-05, - "loss": 5.6664, - "step": 4463 - }, - { - "epoch": 2.328031290743155, - "grad_norm": 1.8230608701705933, - "learning_rate": 9.601708542713567e-05, - "loss": 5.7319, - "step": 4464 - }, - { - "epoch": 2.3285528031290745, - "grad_norm": 1.5991131067276, - "learning_rate": 9.601608040201005e-05, - "loss": 5.59, - "step": 4465 - }, - { - "epoch": 2.3290743155149936, - "grad_norm": 1.5195105075836182, - "learning_rate": 9.601507537688442e-05, - "loss": 5.876, - "step": 4466 - }, - { - "epoch": 2.3295958279009126, - "grad_norm": 1.5950733423233032, - "learning_rate": 9.60140703517588e-05, - "loss": 6.0194, - "step": 4467 - }, - { - "epoch": 2.330117340286832, - "grad_norm": 1.636050820350647, - "learning_rate": 9.601306532663317e-05, - "loss": 5.7971, - "step": 4468 - }, - { - "epoch": 2.330638852672751, - "grad_norm": 1.6218888759613037, - "learning_rate": 9.601206030150755e-05, - "loss": 6.1563, - "step": 4469 - }, - { - "epoch": 2.33116036505867, - "grad_norm": 1.6371983289718628, - "learning_rate": 9.601105527638191e-05, - "loss": 6.0322, - "step": 4470 - }, - { - "epoch": 2.3316818774445895, - "grad_norm": 1.8263182640075684, - "learning_rate": 9.601005025125629e-05, - "loss": 5.9884, - "step": 4471 - }, - { - "epoch": 2.3322033898305086, - "grad_norm": 1.5905038118362427, - "learning_rate": 9.600904522613066e-05, - "loss": 5.7703, - "step": 4472 - }, - { - "epoch": 2.3327249022164276, - "grad_norm": 1.7052695751190186, - "learning_rate": 9.600804020100503e-05, - "loss": 5.8047, - "step": 4473 - }, - { - "epoch": 2.333246414602347, - "grad_norm": 1.6596468687057495, - "learning_rate": 9.60070351758794e-05, - "loss": 5.9574, - "step": 4474 - }, - { - "epoch": 2.333767926988266, - "grad_norm": 1.6254647970199585, - "learning_rate": 9.600603015075378e-05, - "loss": 5.6522, - "step": 4475 - }, - { - "epoch": 2.334289439374185, - "grad_norm": 2.4358134269714355, - "learning_rate": 9.600502512562814e-05, - "loss": 5.3955, - "step": 4476 - }, - { - "epoch": 2.334810951760104, - "grad_norm": 1.777006983757019, - "learning_rate": 9.600402010050252e-05, - "loss": 5.9254, - "step": 4477 - }, - { - "epoch": 2.3353324641460236, - "grad_norm": 2.2574398517608643, - "learning_rate": 9.60030150753769e-05, - "loss": 5.7396, - "step": 4478 - }, - { - "epoch": 2.3358539765319426, - "grad_norm": 1.642750859260559, - "learning_rate": 9.600201005025126e-05, - "loss": 6.1626, - "step": 4479 - }, - { - "epoch": 2.3363754889178616, - "grad_norm": 1.462148666381836, - "learning_rate": 9.600100502512564e-05, - "loss": 5.7837, - "step": 4480 - }, - { - "epoch": 2.336897001303781, - "grad_norm": 1.365599274635315, - "learning_rate": 9.6e-05, - "loss": 6.0614, - "step": 4481 - }, - { - "epoch": 2.3374185136897, - "grad_norm": 1.2903046607971191, - "learning_rate": 9.599899497487438e-05, - "loss": 5.2908, - "step": 4482 - }, - { - "epoch": 2.337940026075619, - "grad_norm": 1.455536127090454, - "learning_rate": 9.599798994974874e-05, - "loss": 6.0224, - "step": 4483 - }, - { - "epoch": 2.3384615384615386, - "grad_norm": 1.3664742708206177, - "learning_rate": 9.599698492462312e-05, - "loss": 6.1121, - "step": 4484 - }, - { - "epoch": 2.3389830508474576, - "grad_norm": 1.528079867362976, - "learning_rate": 9.599597989949749e-05, - "loss": 5.9836, - "step": 4485 - }, - { - "epoch": 2.3395045632333766, - "grad_norm": 1.520763874053955, - "learning_rate": 9.599497487437186e-05, - "loss": 5.2677, - "step": 4486 - }, - { - "epoch": 2.340026075619296, - "grad_norm": 1.4886879920959473, - "learning_rate": 9.599396984924623e-05, - "loss": 6.1658, - "step": 4487 - }, - { - "epoch": 2.340547588005215, - "grad_norm": 1.5882923603057861, - "learning_rate": 9.59929648241206e-05, - "loss": 5.6774, - "step": 4488 - }, - { - "epoch": 2.341069100391134, - "grad_norm": 1.6901179552078247, - "learning_rate": 9.599195979899498e-05, - "loss": 4.8573, - "step": 4489 - }, - { - "epoch": 2.3415906127770536, - "grad_norm": 1.4726617336273193, - "learning_rate": 9.599095477386935e-05, - "loss": 6.0376, - "step": 4490 - }, - { - "epoch": 2.3421121251629726, - "grad_norm": 1.5287299156188965, - "learning_rate": 9.598994974874373e-05, - "loss": 5.7189, - "step": 4491 - }, - { - "epoch": 2.3426336375488916, - "grad_norm": 2.0952367782592773, - "learning_rate": 9.598894472361809e-05, - "loss": 4.8662, - "step": 4492 - }, - { - "epoch": 2.343155149934811, - "grad_norm": 1.6680235862731934, - "learning_rate": 9.598793969849247e-05, - "loss": 5.654, - "step": 4493 - }, - { - "epoch": 2.34367666232073, - "grad_norm": 1.5571452379226685, - "learning_rate": 9.598693467336683e-05, - "loss": 5.8913, - "step": 4494 - }, - { - "epoch": 2.344198174706649, - "grad_norm": 1.5031296014785767, - "learning_rate": 9.598592964824121e-05, - "loss": 5.9042, - "step": 4495 - }, - { - "epoch": 2.3447196870925686, - "grad_norm": 1.675705075263977, - "learning_rate": 9.598492462311557e-05, - "loss": 6.2285, - "step": 4496 - }, - { - "epoch": 2.3452411994784876, - "grad_norm": 1.730527400970459, - "learning_rate": 9.598391959798995e-05, - "loss": 6.115, - "step": 4497 - }, - { - "epoch": 2.3457627118644067, - "grad_norm": 1.8623813390731812, - "learning_rate": 9.598291457286433e-05, - "loss": 5.7115, - "step": 4498 - }, - { - "epoch": 2.346284224250326, - "grad_norm": 1.4876582622528076, - "learning_rate": 9.598190954773871e-05, - "loss": 5.8698, - "step": 4499 - }, - { - "epoch": 2.346805736636245, - "grad_norm": 1.4324908256530762, - "learning_rate": 9.598090452261307e-05, - "loss": 5.9155, - "step": 4500 - }, - { - "epoch": 2.347327249022164, - "grad_norm": 1.574563980102539, - "learning_rate": 9.597989949748745e-05, - "loss": 4.9874, - "step": 4501 - }, - { - "epoch": 2.3478487614080836, - "grad_norm": 1.4200713634490967, - "learning_rate": 9.597889447236181e-05, - "loss": 6.0569, - "step": 4502 - }, - { - "epoch": 2.3483702737940026, - "grad_norm": 1.4853901863098145, - "learning_rate": 9.597788944723618e-05, - "loss": 5.8588, - "step": 4503 - }, - { - "epoch": 2.3488917861799217, - "grad_norm": 1.4420915842056274, - "learning_rate": 9.597688442211056e-05, - "loss": 5.7611, - "step": 4504 - }, - { - "epoch": 2.3494132985658407, - "grad_norm": 1.532476544380188, - "learning_rate": 9.597587939698492e-05, - "loss": 5.1777, - "step": 4505 - }, - { - "epoch": 2.34993481095176, - "grad_norm": 1.428236722946167, - "learning_rate": 9.59748743718593e-05, - "loss": 6.1347, - "step": 4506 - }, - { - "epoch": 2.350456323337679, - "grad_norm": 1.5343890190124512, - "learning_rate": 9.597386934673366e-05, - "loss": 6.329, - "step": 4507 - }, - { - "epoch": 2.350977835723598, - "grad_norm": 1.4280807971954346, - "learning_rate": 9.597286432160804e-05, - "loss": 5.8936, - "step": 4508 - }, - { - "epoch": 2.3514993481095177, - "grad_norm": 1.4378780126571655, - "learning_rate": 9.597185929648242e-05, - "loss": 5.7711, - "step": 4509 - }, - { - "epoch": 2.3520208604954367, - "grad_norm": 1.4348413944244385, - "learning_rate": 9.59708542713568e-05, - "loss": 6.2966, - "step": 4510 - }, - { - "epoch": 2.3525423728813557, - "grad_norm": 1.4434428215026855, - "learning_rate": 9.596984924623116e-05, - "loss": 5.8932, - "step": 4511 - }, - { - "epoch": 2.353063885267275, - "grad_norm": 1.5675967931747437, - "learning_rate": 9.596884422110554e-05, - "loss": 5.9126, - "step": 4512 - }, - { - "epoch": 2.353585397653194, - "grad_norm": 1.4478498697280884, - "learning_rate": 9.59678391959799e-05, - "loss": 5.7973, - "step": 4513 - }, - { - "epoch": 2.354106910039113, - "grad_norm": 1.6586133241653442, - "learning_rate": 9.596683417085428e-05, - "loss": 5.6017, - "step": 4514 - }, - { - "epoch": 2.3546284224250327, - "grad_norm": 1.5454996824264526, - "learning_rate": 9.596582914572865e-05, - "loss": 5.6388, - "step": 4515 - }, - { - "epoch": 2.3551499348109517, - "grad_norm": 1.5169801712036133, - "learning_rate": 9.596482412060302e-05, - "loss": 6.0144, - "step": 4516 - }, - { - "epoch": 2.3556714471968707, - "grad_norm": 1.511386513710022, - "learning_rate": 9.596381909547739e-05, - "loss": 6.0205, - "step": 4517 - }, - { - "epoch": 2.35619295958279, - "grad_norm": 1.4136213064193726, - "learning_rate": 9.596281407035177e-05, - "loss": 6.3008, - "step": 4518 - }, - { - "epoch": 2.356714471968709, - "grad_norm": 1.461074948310852, - "learning_rate": 9.596180904522614e-05, - "loss": 6.0524, - "step": 4519 - }, - { - "epoch": 2.3572359843546282, - "grad_norm": 1.5305243730545044, - "learning_rate": 9.596080402010051e-05, - "loss": 5.3605, - "step": 4520 - }, - { - "epoch": 2.3577574967405477, - "grad_norm": 1.663451075553894, - "learning_rate": 9.595979899497489e-05, - "loss": 5.4035, - "step": 4521 - }, - { - "epoch": 2.3582790091264667, - "grad_norm": 1.4723806381225586, - "learning_rate": 9.595879396984925e-05, - "loss": 6.159, - "step": 4522 - }, - { - "epoch": 2.3588005215123857, - "grad_norm": 1.403035283088684, - "learning_rate": 9.595778894472363e-05, - "loss": 6.0607, - "step": 4523 - }, - { - "epoch": 2.359322033898305, - "grad_norm": 1.5772836208343506, - "learning_rate": 9.595678391959799e-05, - "loss": 6.0094, - "step": 4524 - }, - { - "epoch": 2.359843546284224, - "grad_norm": 1.7120325565338135, - "learning_rate": 9.595577889447237e-05, - "loss": 5.5456, - "step": 4525 - }, - { - "epoch": 2.3603650586701432, - "grad_norm": 1.5275436639785767, - "learning_rate": 9.595477386934673e-05, - "loss": 5.7637, - "step": 4526 - }, - { - "epoch": 2.3608865710560627, - "grad_norm": 1.5996558666229248, - "learning_rate": 9.595376884422111e-05, - "loss": 5.6699, - "step": 4527 - }, - { - "epoch": 2.3614080834419817, - "grad_norm": 2.269113302230835, - "learning_rate": 9.595276381909548e-05, - "loss": 6.0552, - "step": 4528 - }, - { - "epoch": 2.3619295958279007, - "grad_norm": 1.7253103256225586, - "learning_rate": 9.595175879396985e-05, - "loss": 5.8906, - "step": 4529 - }, - { - "epoch": 2.36245110821382, - "grad_norm": 1.7174782752990723, - "learning_rate": 9.595075376884423e-05, - "loss": 6.0944, - "step": 4530 - }, - { - "epoch": 2.3629726205997392, - "grad_norm": 1.4801464080810547, - "learning_rate": 9.59497487437186e-05, - "loss": 6.0059, - "step": 4531 - }, - { - "epoch": 2.3634941329856582, - "grad_norm": 1.729068636894226, - "learning_rate": 9.594874371859297e-05, - "loss": 5.7179, - "step": 4532 - }, - { - "epoch": 2.3640156453715777, - "grad_norm": 1.4728573560714722, - "learning_rate": 9.594773869346734e-05, - "loss": 5.7603, - "step": 4533 - }, - { - "epoch": 2.3645371577574967, - "grad_norm": 1.4392963647842407, - "learning_rate": 9.594673366834172e-05, - "loss": 6.1734, - "step": 4534 - }, - { - "epoch": 2.3650586701434158, - "grad_norm": 1.7286458015441895, - "learning_rate": 9.594572864321608e-05, - "loss": 5.1894, - "step": 4535 - }, - { - "epoch": 2.365580182529335, - "grad_norm": 1.550196886062622, - "learning_rate": 9.594472361809046e-05, - "loss": 5.9731, - "step": 4536 - }, - { - "epoch": 2.3661016949152542, - "grad_norm": 1.5125144720077515, - "learning_rate": 9.594371859296482e-05, - "loss": 6.0515, - "step": 4537 - }, - { - "epoch": 2.3666232073011733, - "grad_norm": 1.3853062391281128, - "learning_rate": 9.59427135678392e-05, - "loss": 6.0473, - "step": 4538 - }, - { - "epoch": 2.3671447196870927, - "grad_norm": 1.6001782417297363, - "learning_rate": 9.594170854271358e-05, - "loss": 5.7747, - "step": 4539 - }, - { - "epoch": 2.3676662320730117, - "grad_norm": 1.567945122718811, - "learning_rate": 9.594070351758796e-05, - "loss": 5.8247, - "step": 4540 - }, - { - "epoch": 2.3681877444589308, - "grad_norm": 1.7327954769134521, - "learning_rate": 9.593969849246232e-05, - "loss": 4.9337, - "step": 4541 - }, - { - "epoch": 2.3687092568448502, - "grad_norm": 1.6749775409698486, - "learning_rate": 9.59386934673367e-05, - "loss": 5.8816, - "step": 4542 - }, - { - "epoch": 2.3692307692307693, - "grad_norm": 1.703731656074524, - "learning_rate": 9.593768844221106e-05, - "loss": 5.3507, - "step": 4543 - }, - { - "epoch": 2.3697522816166883, - "grad_norm": 1.559857964515686, - "learning_rate": 9.593668341708543e-05, - "loss": 5.9921, - "step": 4544 - }, - { - "epoch": 2.3702737940026077, - "grad_norm": 1.3558025360107422, - "learning_rate": 9.59356783919598e-05, - "loss": 6.0457, - "step": 4545 - }, - { - "epoch": 2.3707953063885268, - "grad_norm": 1.5474129915237427, - "learning_rate": 9.593467336683417e-05, - "loss": 5.7703, - "step": 4546 - }, - { - "epoch": 2.371316818774446, - "grad_norm": 1.417605996131897, - "learning_rate": 9.593366834170855e-05, - "loss": 5.9026, - "step": 4547 - }, - { - "epoch": 2.3718383311603652, - "grad_norm": 1.5283540487289429, - "learning_rate": 9.593266331658291e-05, - "loss": 5.7226, - "step": 4548 - }, - { - "epoch": 2.3723598435462843, - "grad_norm": 1.4524253606796265, - "learning_rate": 9.593165829145729e-05, - "loss": 5.824, - "step": 4549 - }, - { - "epoch": 2.3728813559322033, - "grad_norm": 1.6348868608474731, - "learning_rate": 9.593065326633167e-05, - "loss": 5.349, - "step": 4550 - }, - { - "epoch": 2.3734028683181227, - "grad_norm": 1.6062861680984497, - "learning_rate": 9.592964824120604e-05, - "loss": 5.9257, - "step": 4551 - }, - { - "epoch": 2.3739243807040418, - "grad_norm": 1.508410930633545, - "learning_rate": 9.592864321608041e-05, - "loss": 5.6424, - "step": 4552 - }, - { - "epoch": 2.374445893089961, - "grad_norm": 1.4030510187149048, - "learning_rate": 9.592763819095479e-05, - "loss": 6.3982, - "step": 4553 - }, - { - "epoch": 2.3749674054758803, - "grad_norm": 1.8900052309036255, - "learning_rate": 9.592663316582915e-05, - "loss": 5.3418, - "step": 4554 - }, - { - "epoch": 2.3754889178617993, - "grad_norm": 1.3713457584381104, - "learning_rate": 9.592562814070353e-05, - "loss": 5.8274, - "step": 4555 - }, - { - "epoch": 2.3760104302477183, - "grad_norm": 1.3878411054611206, - "learning_rate": 9.592462311557789e-05, - "loss": 6.1804, - "step": 4556 - }, - { - "epoch": 2.3765319426336378, - "grad_norm": 1.6658408641815186, - "learning_rate": 9.592361809045226e-05, - "loss": 5.4819, - "step": 4557 - }, - { - "epoch": 2.377053455019557, - "grad_norm": 1.5359734296798706, - "learning_rate": 9.592261306532663e-05, - "loss": 5.8124, - "step": 4558 - }, - { - "epoch": 2.377574967405476, - "grad_norm": 1.6980619430541992, - "learning_rate": 9.5921608040201e-05, - "loss": 6.0929, - "step": 4559 - }, - { - "epoch": 2.3780964797913953, - "grad_norm": 1.5610415935516357, - "learning_rate": 9.592060301507538e-05, - "loss": 5.8385, - "step": 4560 - }, - { - "epoch": 2.3786179921773143, - "grad_norm": 1.5844590663909912, - "learning_rate": 9.591959798994975e-05, - "loss": 5.5091, - "step": 4561 - }, - { - "epoch": 2.3791395045632333, - "grad_norm": 1.5414739847183228, - "learning_rate": 9.591859296482413e-05, - "loss": 5.7191, - "step": 4562 - }, - { - "epoch": 2.3796610169491528, - "grad_norm": 1.387691617012024, - "learning_rate": 9.59175879396985e-05, - "loss": 6.0045, - "step": 4563 - }, - { - "epoch": 2.380182529335072, - "grad_norm": 1.513169527053833, - "learning_rate": 9.591658291457287e-05, - "loss": 5.7804, - "step": 4564 - }, - { - "epoch": 2.380704041720991, - "grad_norm": 1.8554631471633911, - "learning_rate": 9.591557788944724e-05, - "loss": 5.3349, - "step": 4565 - }, - { - "epoch": 2.3812255541069103, - "grad_norm": 1.8047826290130615, - "learning_rate": 9.591457286432162e-05, - "loss": 5.9193, - "step": 4566 - }, - { - "epoch": 2.3817470664928293, - "grad_norm": 1.5484076738357544, - "learning_rate": 9.591356783919598e-05, - "loss": 5.8028, - "step": 4567 - }, - { - "epoch": 2.3822685788787483, - "grad_norm": 1.5238128900527954, - "learning_rate": 9.591256281407036e-05, - "loss": 6.213, - "step": 4568 - }, - { - "epoch": 2.3827900912646673, - "grad_norm": 1.4808193445205688, - "learning_rate": 9.591155778894472e-05, - "loss": 5.8315, - "step": 4569 - }, - { - "epoch": 2.383311603650587, - "grad_norm": 1.717157244682312, - "learning_rate": 9.59105527638191e-05, - "loss": 5.318, - "step": 4570 - }, - { - "epoch": 2.383833116036506, - "grad_norm": 1.5507549047470093, - "learning_rate": 9.590954773869348e-05, - "loss": 5.8774, - "step": 4571 - }, - { - "epoch": 2.384354628422425, - "grad_norm": 1.53330659866333, - "learning_rate": 9.590854271356784e-05, - "loss": 5.9961, - "step": 4572 - }, - { - "epoch": 2.3848761408083443, - "grad_norm": 1.7568445205688477, - "learning_rate": 9.590753768844222e-05, - "loss": 6.0764, - "step": 4573 - }, - { - "epoch": 2.3853976531942633, - "grad_norm": 1.6111587285995483, - "learning_rate": 9.590653266331658e-05, - "loss": 5.6564, - "step": 4574 - }, - { - "epoch": 2.3859191655801824, - "grad_norm": 1.4940314292907715, - "learning_rate": 9.590552763819096e-05, - "loss": 5.5372, - "step": 4575 - }, - { - "epoch": 2.386440677966102, - "grad_norm": 1.6053948402404785, - "learning_rate": 9.590452261306533e-05, - "loss": 6.1279, - "step": 4576 - }, - { - "epoch": 2.386962190352021, - "grad_norm": 1.5130175352096558, - "learning_rate": 9.59035175879397e-05, - "loss": 5.9668, - "step": 4577 - }, - { - "epoch": 2.38748370273794, - "grad_norm": 1.5389124155044556, - "learning_rate": 9.590251256281407e-05, - "loss": 5.811, - "step": 4578 - }, - { - "epoch": 2.3880052151238593, - "grad_norm": 1.3652188777923584, - "learning_rate": 9.590150753768845e-05, - "loss": 5.9825, - "step": 4579 - }, - { - "epoch": 2.3885267275097783, - "grad_norm": 1.3439152240753174, - "learning_rate": 9.590050251256281e-05, - "loss": 6.0908, - "step": 4580 - }, - { - "epoch": 2.3890482398956974, - "grad_norm": 1.574779748916626, - "learning_rate": 9.589949748743719e-05, - "loss": 5.7468, - "step": 4581 - }, - { - "epoch": 2.389569752281617, - "grad_norm": 1.4520387649536133, - "learning_rate": 9.589849246231157e-05, - "loss": 6.0826, - "step": 4582 - }, - { - "epoch": 2.390091264667536, - "grad_norm": 1.4827810525894165, - "learning_rate": 9.589748743718593e-05, - "loss": 6.0136, - "step": 4583 - }, - { - "epoch": 2.390612777053455, - "grad_norm": 1.5205166339874268, - "learning_rate": 9.589648241206031e-05, - "loss": 5.6181, - "step": 4584 - }, - { - "epoch": 2.3911342894393743, - "grad_norm": 2.024791955947876, - "learning_rate": 9.589547738693467e-05, - "loss": 6.0385, - "step": 4585 - }, - { - "epoch": 2.3916558018252934, - "grad_norm": 1.4049091339111328, - "learning_rate": 9.589447236180905e-05, - "loss": 6.0032, - "step": 4586 - }, - { - "epoch": 2.3921773142112124, - "grad_norm": 1.4850876331329346, - "learning_rate": 9.589346733668342e-05, - "loss": 5.8961, - "step": 4587 - }, - { - "epoch": 2.392698826597132, - "grad_norm": 1.6333667039871216, - "learning_rate": 9.589246231155779e-05, - "loss": 5.7758, - "step": 4588 - }, - { - "epoch": 2.393220338983051, - "grad_norm": 1.673466444015503, - "learning_rate": 9.589145728643216e-05, - "loss": 5.5469, - "step": 4589 - }, - { - "epoch": 2.39374185136897, - "grad_norm": 1.4635727405548096, - "learning_rate": 9.589045226130654e-05, - "loss": 6.0237, - "step": 4590 - }, - { - "epoch": 2.3942633637548894, - "grad_norm": 1.6333473920822144, - "learning_rate": 9.588944723618091e-05, - "loss": 5.1516, - "step": 4591 - }, - { - "epoch": 2.3947848761408084, - "grad_norm": 1.5557090044021606, - "learning_rate": 9.588844221105529e-05, - "loss": 5.5709, - "step": 4592 - }, - { - "epoch": 2.3953063885267274, - "grad_norm": 1.6182422637939453, - "learning_rate": 9.588743718592966e-05, - "loss": 5.4256, - "step": 4593 - }, - { - "epoch": 2.395827900912647, - "grad_norm": 1.4957574605941772, - "learning_rate": 9.588643216080403e-05, - "loss": 6.0476, - "step": 4594 - }, - { - "epoch": 2.396349413298566, - "grad_norm": 1.7536898851394653, - "learning_rate": 9.58854271356784e-05, - "loss": 5.1066, - "step": 4595 - }, - { - "epoch": 2.396870925684485, - "grad_norm": 1.4505441188812256, - "learning_rate": 9.588442211055276e-05, - "loss": 5.4728, - "step": 4596 - }, - { - "epoch": 2.3973924380704044, - "grad_norm": 1.4545371532440186, - "learning_rate": 9.588341708542714e-05, - "loss": 6.0833, - "step": 4597 - }, - { - "epoch": 2.3979139504563234, - "grad_norm": 1.5019114017486572, - "learning_rate": 9.58824120603015e-05, - "loss": 5.6848, - "step": 4598 - }, - { - "epoch": 2.3984354628422424, - "grad_norm": 1.6857526302337646, - "learning_rate": 9.588140703517588e-05, - "loss": 5.5029, - "step": 4599 - }, - { - "epoch": 2.3989569752281614, - "grad_norm": 2.2362821102142334, - "learning_rate": 9.588040201005025e-05, - "loss": 5.8717, - "step": 4600 - }, - { - "epoch": 2.399478487614081, - "grad_norm": 1.6894989013671875, - "learning_rate": 9.587939698492462e-05, - "loss": 5.8724, - "step": 4601 - }, - { - "epoch": 2.4, - "grad_norm": 1.631055474281311, - "learning_rate": 9.5878391959799e-05, - "loss": 5.8487, - "step": 4602 - }, - { - "epoch": 2.400521512385919, - "grad_norm": 1.458709716796875, - "learning_rate": 9.587738693467338e-05, - "loss": 6.1201, - "step": 4603 - }, - { - "epoch": 2.4010430247718384, - "grad_norm": 1.5102814435958862, - "learning_rate": 9.587638190954774e-05, - "loss": 5.9145, - "step": 4604 - }, - { - "epoch": 2.4015645371577574, - "grad_norm": 1.3990081548690796, - "learning_rate": 9.587537688442212e-05, - "loss": 6.0381, - "step": 4605 - }, - { - "epoch": 2.4020860495436764, - "grad_norm": 1.5177686214447021, - "learning_rate": 9.587437185929649e-05, - "loss": 5.8868, - "step": 4606 - }, - { - "epoch": 2.402607561929596, - "grad_norm": 1.540640950202942, - "learning_rate": 9.587336683417086e-05, - "loss": 5.7346, - "step": 4607 - }, - { - "epoch": 2.403129074315515, - "grad_norm": 1.5567206144332886, - "learning_rate": 9.587236180904523e-05, - "loss": 5.985, - "step": 4608 - }, - { - "epoch": 2.403650586701434, - "grad_norm": 1.5360157489776611, - "learning_rate": 9.58713567839196e-05, - "loss": 5.4928, - "step": 4609 - }, - { - "epoch": 2.4041720990873534, - "grad_norm": 1.5946108102798462, - "learning_rate": 9.587035175879397e-05, - "loss": 5.6713, - "step": 4610 - }, - { - "epoch": 2.4046936114732724, - "grad_norm": 1.4823250770568848, - "learning_rate": 9.586934673366835e-05, - "loss": 6.1183, - "step": 4611 - }, - { - "epoch": 2.4052151238591915, - "grad_norm": 1.450477123260498, - "learning_rate": 9.586834170854273e-05, - "loss": 6.335, - "step": 4612 - }, - { - "epoch": 2.405736636245111, - "grad_norm": 1.6137751340866089, - "learning_rate": 9.586733668341709e-05, - "loss": 5.6883, - "step": 4613 - }, - { - "epoch": 2.40625814863103, - "grad_norm": 1.3729891777038574, - "learning_rate": 9.586633165829147e-05, - "loss": 6.294, - "step": 4614 - }, - { - "epoch": 2.406779661016949, - "grad_norm": 1.6082178354263306, - "learning_rate": 9.586532663316583e-05, - "loss": 5.831, - "step": 4615 - }, - { - "epoch": 2.4073011734028684, - "grad_norm": 1.5930973291397095, - "learning_rate": 9.586432160804021e-05, - "loss": 5.4537, - "step": 4616 - }, - { - "epoch": 2.4078226857887874, - "grad_norm": 1.6211504936218262, - "learning_rate": 9.586331658291457e-05, - "loss": 5.7793, - "step": 4617 - }, - { - "epoch": 2.4083441981747065, - "grad_norm": 1.5887620449066162, - "learning_rate": 9.586231155778895e-05, - "loss": 5.4626, - "step": 4618 - }, - { - "epoch": 2.408865710560626, - "grad_norm": 1.5088756084442139, - "learning_rate": 9.586130653266332e-05, - "loss": 5.4662, - "step": 4619 - }, - { - "epoch": 2.409387222946545, - "grad_norm": 1.4432193040847778, - "learning_rate": 9.58603015075377e-05, - "loss": 6.064, - "step": 4620 - }, - { - "epoch": 2.409908735332464, - "grad_norm": 1.4517239332199097, - "learning_rate": 9.585929648241206e-05, - "loss": 6.1191, - "step": 4621 - }, - { - "epoch": 2.4104302477183834, - "grad_norm": 1.4885787963867188, - "learning_rate": 9.585829145728644e-05, - "loss": 5.9381, - "step": 4622 - }, - { - "epoch": 2.4109517601043025, - "grad_norm": 1.346200942993164, - "learning_rate": 9.585728643216081e-05, - "loss": 6.2227, - "step": 4623 - }, - { - "epoch": 2.4114732724902215, - "grad_norm": 1.4736666679382324, - "learning_rate": 9.585628140703518e-05, - "loss": 5.7237, - "step": 4624 - }, - { - "epoch": 2.411994784876141, - "grad_norm": 1.4049171209335327, - "learning_rate": 9.585527638190956e-05, - "loss": 6.0698, - "step": 4625 - }, - { - "epoch": 2.41251629726206, - "grad_norm": 1.551905870437622, - "learning_rate": 9.585427135678392e-05, - "loss": 5.675, - "step": 4626 - }, - { - "epoch": 2.413037809647979, - "grad_norm": 1.4387238025665283, - "learning_rate": 9.58532663316583e-05, - "loss": 6.113, - "step": 4627 - }, - { - "epoch": 2.4135593220338984, - "grad_norm": 1.519805908203125, - "learning_rate": 9.585226130653266e-05, - "loss": 5.8956, - "step": 4628 - }, - { - "epoch": 2.4140808344198175, - "grad_norm": 1.4321504831314087, - "learning_rate": 9.585125628140704e-05, - "loss": 5.9626, - "step": 4629 - }, - { - "epoch": 2.4146023468057365, - "grad_norm": 1.4967197179794312, - "learning_rate": 9.58502512562814e-05, - "loss": 6.1177, - "step": 4630 - }, - { - "epoch": 2.415123859191656, - "grad_norm": 1.3577544689178467, - "learning_rate": 9.584924623115578e-05, - "loss": 5.5475, - "step": 4631 - }, - { - "epoch": 2.415645371577575, - "grad_norm": 1.4306024312973022, - "learning_rate": 9.584824120603016e-05, - "loss": 5.9575, - "step": 4632 - }, - { - "epoch": 2.416166883963494, - "grad_norm": 1.3611029386520386, - "learning_rate": 9.584723618090454e-05, - "loss": 6.1253, - "step": 4633 - }, - { - "epoch": 2.4166883963494135, - "grad_norm": 1.4228655099868774, - "learning_rate": 9.58462311557789e-05, - "loss": 6.0514, - "step": 4634 - }, - { - "epoch": 2.4172099087353325, - "grad_norm": 1.5801129341125488, - "learning_rate": 9.584522613065328e-05, - "loss": 5.8469, - "step": 4635 - }, - { - "epoch": 2.4177314211212515, - "grad_norm": 1.451980471611023, - "learning_rate": 9.584422110552764e-05, - "loss": 6.1922, - "step": 4636 - }, - { - "epoch": 2.418252933507171, - "grad_norm": 1.3929085731506348, - "learning_rate": 9.584321608040201e-05, - "loss": 6.1539, - "step": 4637 - }, - { - "epoch": 2.41877444589309, - "grad_norm": 1.604539394378662, - "learning_rate": 9.584221105527639e-05, - "loss": 5.7634, - "step": 4638 - }, - { - "epoch": 2.419295958279009, - "grad_norm": 1.4325387477874756, - "learning_rate": 9.584120603015075e-05, - "loss": 6.032, - "step": 4639 - }, - { - "epoch": 2.4198174706649285, - "grad_norm": 1.3736181259155273, - "learning_rate": 9.584020100502513e-05, - "loss": 6.0197, - "step": 4640 - }, - { - "epoch": 2.4203389830508475, - "grad_norm": 1.39095938205719, - "learning_rate": 9.583919597989949e-05, - "loss": 5.8531, - "step": 4641 - }, - { - "epoch": 2.4208604954367665, - "grad_norm": 1.4647318124771118, - "learning_rate": 9.583819095477387e-05, - "loss": 6.0885, - "step": 4642 - }, - { - "epoch": 2.421382007822686, - "grad_norm": 1.495478868484497, - "learning_rate": 9.583718592964825e-05, - "loss": 5.6628, - "step": 4643 - }, - { - "epoch": 2.421903520208605, - "grad_norm": 1.4674917459487915, - "learning_rate": 9.583618090452263e-05, - "loss": 5.8017, - "step": 4644 - }, - { - "epoch": 2.422425032594524, - "grad_norm": 1.6027084589004517, - "learning_rate": 9.583517587939699e-05, - "loss": 5.8724, - "step": 4645 - }, - { - "epoch": 2.4229465449804435, - "grad_norm": 1.400303840637207, - "learning_rate": 9.583417085427137e-05, - "loss": 6.215, - "step": 4646 - }, - { - "epoch": 2.4234680573663625, - "grad_norm": 1.4617226123809814, - "learning_rate": 9.583316582914573e-05, - "loss": 6.1428, - "step": 4647 - }, - { - "epoch": 2.4239895697522815, - "grad_norm": 1.8257728815078735, - "learning_rate": 9.583216080402011e-05, - "loss": 5.3695, - "step": 4648 - }, - { - "epoch": 2.424511082138201, - "grad_norm": 1.6658849716186523, - "learning_rate": 9.583115577889447e-05, - "loss": 5.6682, - "step": 4649 - }, - { - "epoch": 2.42503259452412, - "grad_norm": 1.6101568937301636, - "learning_rate": 9.583015075376884e-05, - "loss": 6.2043, - "step": 4650 - }, - { - "epoch": 2.425554106910039, - "grad_norm": 1.4413847923278809, - "learning_rate": 9.582914572864322e-05, - "loss": 5.9258, - "step": 4651 - }, - { - "epoch": 2.4260756192959585, - "grad_norm": 1.541852355003357, - "learning_rate": 9.58281407035176e-05, - "loss": 5.7365, - "step": 4652 - }, - { - "epoch": 2.4265971316818775, - "grad_norm": 1.359946608543396, - "learning_rate": 9.582713567839197e-05, - "loss": 6.0591, - "step": 4653 - }, - { - "epoch": 2.4271186440677965, - "grad_norm": 1.4947724342346191, - "learning_rate": 9.582613065326634e-05, - "loss": 6.0119, - "step": 4654 - }, - { - "epoch": 2.427640156453716, - "grad_norm": 1.4753408432006836, - "learning_rate": 9.582512562814071e-05, - "loss": 5.9772, - "step": 4655 - }, - { - "epoch": 2.428161668839635, - "grad_norm": 1.4500113725662231, - "learning_rate": 9.582412060301508e-05, - "loss": 6.1391, - "step": 4656 - }, - { - "epoch": 2.428683181225554, - "grad_norm": 1.4572314023971558, - "learning_rate": 9.582311557788946e-05, - "loss": 5.8599, - "step": 4657 - }, - { - "epoch": 2.4292046936114735, - "grad_norm": 1.4415271282196045, - "learning_rate": 9.582211055276382e-05, - "loss": 6.0784, - "step": 4658 - }, - { - "epoch": 2.4297262059973925, - "grad_norm": 1.4185330867767334, - "learning_rate": 9.58211055276382e-05, - "loss": 6.1459, - "step": 4659 - }, - { - "epoch": 2.4302477183833116, - "grad_norm": 1.424500584602356, - "learning_rate": 9.582010050251256e-05, - "loss": 5.7663, - "step": 4660 - }, - { - "epoch": 2.430769230769231, - "grad_norm": 1.5055536031723022, - "learning_rate": 9.581909547738694e-05, - "loss": 5.8789, - "step": 4661 - }, - { - "epoch": 2.43129074315515, - "grad_norm": 1.6713930368423462, - "learning_rate": 9.58180904522613e-05, - "loss": 5.4247, - "step": 4662 - }, - { - "epoch": 2.431812255541069, - "grad_norm": 1.8298808336257935, - "learning_rate": 9.581708542713568e-05, - "loss": 5.7908, - "step": 4663 - }, - { - "epoch": 2.432333767926988, - "grad_norm": 1.4650450944900513, - "learning_rate": 9.581608040201006e-05, - "loss": 5.9735, - "step": 4664 - }, - { - "epoch": 2.4328552803129075, - "grad_norm": 1.2734521627426147, - "learning_rate": 9.581507537688443e-05, - "loss": 6.0537, - "step": 4665 - }, - { - "epoch": 2.4333767926988266, - "grad_norm": 1.4770925045013428, - "learning_rate": 9.58140703517588e-05, - "loss": 5.8323, - "step": 4666 - }, - { - "epoch": 2.4338983050847456, - "grad_norm": 1.4586842060089111, - "learning_rate": 9.581306532663317e-05, - "loss": 6.236, - "step": 4667 - }, - { - "epoch": 2.434419817470665, - "grad_norm": 1.4851281642913818, - "learning_rate": 9.581206030150754e-05, - "loss": 5.8193, - "step": 4668 - }, - { - "epoch": 2.434941329856584, - "grad_norm": 1.426017165184021, - "learning_rate": 9.581105527638191e-05, - "loss": 5.9804, - "step": 4669 - }, - { - "epoch": 2.435462842242503, - "grad_norm": 1.4702129364013672, - "learning_rate": 9.581005025125629e-05, - "loss": 5.8185, - "step": 4670 - }, - { - "epoch": 2.4359843546284226, - "grad_norm": 1.3530999422073364, - "learning_rate": 9.580904522613065e-05, - "loss": 6.1128, - "step": 4671 - }, - { - "epoch": 2.4365058670143416, - "grad_norm": 1.3138591051101685, - "learning_rate": 9.580804020100503e-05, - "loss": 6.0128, - "step": 4672 - }, - { - "epoch": 2.4370273794002606, - "grad_norm": 1.565187931060791, - "learning_rate": 9.580703517587941e-05, - "loss": 5.8332, - "step": 4673 - }, - { - "epoch": 2.43754889178618, - "grad_norm": 1.5213370323181152, - "learning_rate": 9.580603015075378e-05, - "loss": 5.6769, - "step": 4674 - }, - { - "epoch": 2.438070404172099, - "grad_norm": 1.5922952890396118, - "learning_rate": 9.580502512562815e-05, - "loss": 4.7799, - "step": 4675 - }, - { - "epoch": 2.438591916558018, - "grad_norm": 1.5340605974197388, - "learning_rate": 9.580402010050251e-05, - "loss": 6.0069, - "step": 4676 - }, - { - "epoch": 2.4391134289439376, - "grad_norm": 1.5610226392745972, - "learning_rate": 9.580301507537689e-05, - "loss": 5.7036, - "step": 4677 - }, - { - "epoch": 2.4396349413298566, - "grad_norm": 1.5556902885437012, - "learning_rate": 9.580201005025126e-05, - "loss": 5.7152, - "step": 4678 - }, - { - "epoch": 2.4401564537157756, - "grad_norm": 1.5280637741088867, - "learning_rate": 9.580100502512563e-05, - "loss": 5.395, - "step": 4679 - }, - { - "epoch": 2.440677966101695, - "grad_norm": 1.4267419576644897, - "learning_rate": 9.58e-05, - "loss": 6.193, - "step": 4680 - }, - { - "epoch": 2.441199478487614, - "grad_norm": 1.59563410282135, - "learning_rate": 9.579899497487438e-05, - "loss": 5.9021, - "step": 4681 - }, - { - "epoch": 2.441720990873533, - "grad_norm": 1.6640350818634033, - "learning_rate": 9.579798994974874e-05, - "loss": 5.3433, - "step": 4682 - }, - { - "epoch": 2.4422425032594526, - "grad_norm": 1.5032236576080322, - "learning_rate": 9.579698492462312e-05, - "loss": 5.204, - "step": 4683 - }, - { - "epoch": 2.4427640156453716, - "grad_norm": 1.8647037744522095, - "learning_rate": 9.57959798994975e-05, - "loss": 5.2493, - "step": 4684 - }, - { - "epoch": 2.4432855280312906, - "grad_norm": 1.6246224641799927, - "learning_rate": 9.579497487437187e-05, - "loss": 5.9873, - "step": 4685 - }, - { - "epoch": 2.44380704041721, - "grad_norm": 1.489141583442688, - "learning_rate": 9.579396984924624e-05, - "loss": 5.2149, - "step": 4686 - }, - { - "epoch": 2.444328552803129, - "grad_norm": 1.4809342622756958, - "learning_rate": 9.579296482412062e-05, - "loss": 5.49, - "step": 4687 - }, - { - "epoch": 2.444850065189048, - "grad_norm": 1.4862064123153687, - "learning_rate": 9.579195979899498e-05, - "loss": 5.7145, - "step": 4688 - }, - { - "epoch": 2.4453715775749676, - "grad_norm": 1.4272385835647583, - "learning_rate": 9.579095477386934e-05, - "loss": 6.1677, - "step": 4689 - }, - { - "epoch": 2.4458930899608866, - "grad_norm": 1.5657410621643066, - "learning_rate": 9.578994974874372e-05, - "loss": 5.5093, - "step": 4690 - }, - { - "epoch": 2.4464146023468056, - "grad_norm": 1.6081621646881104, - "learning_rate": 9.578894472361809e-05, - "loss": 4.6397, - "step": 4691 - }, - { - "epoch": 2.4469361147327247, - "grad_norm": 1.640946388244629, - "learning_rate": 9.578793969849246e-05, - "loss": 5.8015, - "step": 4692 - }, - { - "epoch": 2.447457627118644, - "grad_norm": 1.390912652015686, - "learning_rate": 9.578693467336684e-05, - "loss": 6.375, - "step": 4693 - }, - { - "epoch": 2.447979139504563, - "grad_norm": 1.6535742282867432, - "learning_rate": 9.578592964824122e-05, - "loss": 5.1161, - "step": 4694 - }, - { - "epoch": 2.448500651890482, - "grad_norm": 1.5865446329116821, - "learning_rate": 9.578492462311558e-05, - "loss": 5.1061, - "step": 4695 - }, - { - "epoch": 2.4490221642764016, - "grad_norm": 1.513089895248413, - "learning_rate": 9.578391959798996e-05, - "loss": 5.5305, - "step": 4696 - }, - { - "epoch": 2.4495436766623206, - "grad_norm": 1.5821477174758911, - "learning_rate": 9.578291457286433e-05, - "loss": 5.9291, - "step": 4697 - }, - { - "epoch": 2.4500651890482397, - "grad_norm": 1.3279614448547363, - "learning_rate": 9.57819095477387e-05, - "loss": 6.1858, - "step": 4698 - }, - { - "epoch": 2.450586701434159, - "grad_norm": 1.6804585456848145, - "learning_rate": 9.578090452261307e-05, - "loss": 5.3348, - "step": 4699 - }, - { - "epoch": 2.451108213820078, - "grad_norm": 1.386252522468567, - "learning_rate": 9.577989949748745e-05, - "loss": 6.0966, - "step": 4700 - }, - { - "epoch": 2.451629726205997, - "grad_norm": 1.5297173261642456, - "learning_rate": 9.577889447236181e-05, - "loss": 6.1776, - "step": 4701 - }, - { - "epoch": 2.4521512385919166, - "grad_norm": 1.6720232963562012, - "learning_rate": 9.577788944723619e-05, - "loss": 5.7404, - "step": 4702 - }, - { - "epoch": 2.4526727509778357, - "grad_norm": 1.7850764989852905, - "learning_rate": 9.577688442211055e-05, - "loss": 5.8397, - "step": 4703 - }, - { - "epoch": 2.4531942633637547, - "grad_norm": 1.8239599466323853, - "learning_rate": 9.577587939698493e-05, - "loss": 5.5024, - "step": 4704 - }, - { - "epoch": 2.453715775749674, - "grad_norm": 1.4998780488967896, - "learning_rate": 9.577487437185931e-05, - "loss": 6.064, - "step": 4705 - }, - { - "epoch": 2.454237288135593, - "grad_norm": 1.599571704864502, - "learning_rate": 9.577386934673367e-05, - "loss": 5.8617, - "step": 4706 - }, - { - "epoch": 2.454758800521512, - "grad_norm": 1.620847463607788, - "learning_rate": 9.577286432160805e-05, - "loss": 5.819, - "step": 4707 - }, - { - "epoch": 2.4552803129074317, - "grad_norm": 1.6738375425338745, - "learning_rate": 9.577185929648241e-05, - "loss": 5.762, - "step": 4708 - }, - { - "epoch": 2.4558018252933507, - "grad_norm": 1.5590448379516602, - "learning_rate": 9.577085427135679e-05, - "loss": 6.0509, - "step": 4709 - }, - { - "epoch": 2.4563233376792697, - "grad_norm": 1.493691325187683, - "learning_rate": 9.576984924623116e-05, - "loss": 6.1952, - "step": 4710 - }, - { - "epoch": 2.456844850065189, - "grad_norm": 1.7104767560958862, - "learning_rate": 9.576884422110553e-05, - "loss": 5.5211, - "step": 4711 - }, - { - "epoch": 2.457366362451108, - "grad_norm": 1.7929563522338867, - "learning_rate": 9.57678391959799e-05, - "loss": 5.7398, - "step": 4712 - }, - { - "epoch": 2.457887874837027, - "grad_norm": 1.6599271297454834, - "learning_rate": 9.576683417085428e-05, - "loss": 5.7055, - "step": 4713 - }, - { - "epoch": 2.4584093872229467, - "grad_norm": 1.6074079275131226, - "learning_rate": 9.576582914572864e-05, - "loss": 6.1165, - "step": 4714 - }, - { - "epoch": 2.4589308996088657, - "grad_norm": 1.6088043451309204, - "learning_rate": 9.576482412060302e-05, - "loss": 5.8875, - "step": 4715 - }, - { - "epoch": 2.4594524119947847, - "grad_norm": 1.594504952430725, - "learning_rate": 9.57638190954774e-05, - "loss": 6.0006, - "step": 4716 - }, - { - "epoch": 2.459973924380704, - "grad_norm": 1.6713759899139404, - "learning_rate": 9.576281407035176e-05, - "loss": 5.3991, - "step": 4717 - }, - { - "epoch": 2.460495436766623, - "grad_norm": 2.031285285949707, - "learning_rate": 9.576180904522614e-05, - "loss": 5.3336, - "step": 4718 - }, - { - "epoch": 2.461016949152542, - "grad_norm": 1.549748182296753, - "learning_rate": 9.57608040201005e-05, - "loss": 5.8627, - "step": 4719 - }, - { - "epoch": 2.4615384615384617, - "grad_norm": 1.3644412755966187, - "learning_rate": 9.575979899497488e-05, - "loss": 5.841, - "step": 4720 - }, - { - "epoch": 2.4620599739243807, - "grad_norm": 1.56881844997406, - "learning_rate": 9.575879396984924e-05, - "loss": 6.0316, - "step": 4721 - }, - { - "epoch": 2.4625814863102997, - "grad_norm": 1.541034460067749, - "learning_rate": 9.575778894472362e-05, - "loss": 5.7192, - "step": 4722 - }, - { - "epoch": 2.463102998696219, - "grad_norm": 1.4114419221878052, - "learning_rate": 9.575678391959799e-05, - "loss": 6.1718, - "step": 4723 - }, - { - "epoch": 2.463624511082138, - "grad_norm": 1.4744092226028442, - "learning_rate": 9.575577889447236e-05, - "loss": 5.459, - "step": 4724 - }, - { - "epoch": 2.4641460234680572, - "grad_norm": 1.394659161567688, - "learning_rate": 9.575477386934674e-05, - "loss": 6.0779, - "step": 4725 - }, - { - "epoch": 2.4646675358539767, - "grad_norm": 1.498979091644287, - "learning_rate": 9.575376884422112e-05, - "loss": 5.7417, - "step": 4726 - }, - { - "epoch": 2.4651890482398957, - "grad_norm": 1.8554108142852783, - "learning_rate": 9.575276381909548e-05, - "loss": 5.2206, - "step": 4727 - }, - { - "epoch": 2.4657105606258147, - "grad_norm": 1.4611144065856934, - "learning_rate": 9.575175879396986e-05, - "loss": 5.6197, - "step": 4728 - }, - { - "epoch": 2.466232073011734, - "grad_norm": 1.5486135482788086, - "learning_rate": 9.575075376884423e-05, - "loss": 6.0438, - "step": 4729 - }, - { - "epoch": 2.466753585397653, - "grad_norm": 1.507443904876709, - "learning_rate": 9.574974874371859e-05, - "loss": 5.8684, - "step": 4730 - }, - { - "epoch": 2.4672750977835722, - "grad_norm": 1.5569655895233154, - "learning_rate": 9.574874371859297e-05, - "loss": 5.813, - "step": 4731 - }, - { - "epoch": 2.4677966101694917, - "grad_norm": 1.544553518295288, - "learning_rate": 9.574773869346733e-05, - "loss": 5.7935, - "step": 4732 - }, - { - "epoch": 2.4683181225554107, - "grad_norm": 1.3460643291473389, - "learning_rate": 9.574673366834171e-05, - "loss": 6.0375, - "step": 4733 - }, - { - "epoch": 2.4688396349413297, - "grad_norm": 1.3455497026443481, - "learning_rate": 9.574572864321608e-05, - "loss": 5.7537, - "step": 4734 - }, - { - "epoch": 2.469361147327249, - "grad_norm": 1.5775635242462158, - "learning_rate": 9.574472361809045e-05, - "loss": 5.754, - "step": 4735 - }, - { - "epoch": 2.4698826597131682, - "grad_norm": 1.4673882722854614, - "learning_rate": 9.574371859296483e-05, - "loss": 6.143, - "step": 4736 - }, - { - "epoch": 2.4704041720990872, - "grad_norm": 1.4998196363449097, - "learning_rate": 9.574271356783921e-05, - "loss": 5.946, - "step": 4737 - }, - { - "epoch": 2.4709256844850067, - "grad_norm": 1.546087384223938, - "learning_rate": 9.574170854271357e-05, - "loss": 5.8402, - "step": 4738 - }, - { - "epoch": 2.4714471968709257, - "grad_norm": 1.560832142829895, - "learning_rate": 9.574070351758795e-05, - "loss": 5.5857, - "step": 4739 - }, - { - "epoch": 2.4719687092568448, - "grad_norm": 1.7449475526809692, - "learning_rate": 9.573969849246231e-05, - "loss": 5.7353, - "step": 4740 - }, - { - "epoch": 2.472490221642764, - "grad_norm": 1.5870628356933594, - "learning_rate": 9.573869346733669e-05, - "loss": 5.5628, - "step": 4741 - }, - { - "epoch": 2.4730117340286832, - "grad_norm": 1.463445782661438, - "learning_rate": 9.573768844221106e-05, - "loss": 5.5606, - "step": 4742 - }, - { - "epoch": 2.4735332464146023, - "grad_norm": 1.6449066400527954, - "learning_rate": 9.573668341708542e-05, - "loss": 5.1979, - "step": 4743 - }, - { - "epoch": 2.4740547588005217, - "grad_norm": 1.6277861595153809, - "learning_rate": 9.57356783919598e-05, - "loss": 5.5052, - "step": 4744 - }, - { - "epoch": 2.4745762711864407, - "grad_norm": 1.63543701171875, - "learning_rate": 9.573467336683418e-05, - "loss": 5.4885, - "step": 4745 - }, - { - "epoch": 2.4750977835723598, - "grad_norm": 1.6254291534423828, - "learning_rate": 9.573366834170855e-05, - "loss": 5.6125, - "step": 4746 - }, - { - "epoch": 2.4756192959582792, - "grad_norm": 1.6313624382019043, - "learning_rate": 9.573266331658292e-05, - "loss": 5.7128, - "step": 4747 - }, - { - "epoch": 2.4761408083441983, - "grad_norm": 1.4559804201126099, - "learning_rate": 9.57316582914573e-05, - "loss": 5.7288, - "step": 4748 - }, - { - "epoch": 2.4766623207301173, - "grad_norm": 2.1019771099090576, - "learning_rate": 9.573065326633166e-05, - "loss": 5.9452, - "step": 4749 - }, - { - "epoch": 2.4771838331160367, - "grad_norm": 1.559616208076477, - "learning_rate": 9.572964824120604e-05, - "loss": 5.5625, - "step": 4750 - }, - { - "epoch": 2.4777053455019558, - "grad_norm": 1.6559454202651978, - "learning_rate": 9.57286432160804e-05, - "loss": 5.727, - "step": 4751 - }, - { - "epoch": 2.478226857887875, - "grad_norm": 1.516391634941101, - "learning_rate": 9.572763819095478e-05, - "loss": 5.9819, - "step": 4752 - }, - { - "epoch": 2.4787483702737942, - "grad_norm": 1.4842084646224976, - "learning_rate": 9.572663316582915e-05, - "loss": 5.7383, - "step": 4753 - }, - { - "epoch": 2.4792698826597133, - "grad_norm": 1.40224289894104, - "learning_rate": 9.572562814070352e-05, - "loss": 6.0331, - "step": 4754 - }, - { - "epoch": 2.4797913950456323, - "grad_norm": 1.5375807285308838, - "learning_rate": 9.572462311557789e-05, - "loss": 5.6231, - "step": 4755 - }, - { - "epoch": 2.4803129074315513, - "grad_norm": 1.5442537069320679, - "learning_rate": 9.572361809045227e-05, - "loss": 5.702, - "step": 4756 - }, - { - "epoch": 2.4808344198174708, - "grad_norm": 1.5787838697433472, - "learning_rate": 9.572261306532664e-05, - "loss": 5.6647, - "step": 4757 - }, - { - "epoch": 2.48135593220339, - "grad_norm": 1.7905805110931396, - "learning_rate": 9.572160804020101e-05, - "loss": 5.2894, - "step": 4758 - }, - { - "epoch": 2.481877444589309, - "grad_norm": 1.6314866542816162, - "learning_rate": 9.572060301507539e-05, - "loss": 5.9956, - "step": 4759 - }, - { - "epoch": 2.4823989569752283, - "grad_norm": 1.91982901096344, - "learning_rate": 9.571959798994975e-05, - "loss": 5.6837, - "step": 4760 - }, - { - "epoch": 2.4829204693611473, - "grad_norm": 1.7811248302459717, - "learning_rate": 9.571859296482413e-05, - "loss": 6.1067, - "step": 4761 - }, - { - "epoch": 2.4834419817470663, - "grad_norm": 1.5440597534179688, - "learning_rate": 9.571758793969849e-05, - "loss": 5.9178, - "step": 4762 - }, - { - "epoch": 2.483963494132986, - "grad_norm": 1.6911349296569824, - "learning_rate": 9.571658291457287e-05, - "loss": 5.6568, - "step": 4763 - }, - { - "epoch": 2.484485006518905, - "grad_norm": 1.6182178258895874, - "learning_rate": 9.571557788944723e-05, - "loss": 5.8058, - "step": 4764 - }, - { - "epoch": 2.485006518904824, - "grad_norm": 1.519123911857605, - "learning_rate": 9.571457286432161e-05, - "loss": 5.7598, - "step": 4765 - }, - { - "epoch": 2.4855280312907433, - "grad_norm": 1.5415984392166138, - "learning_rate": 9.571356783919599e-05, - "loss": 5.8972, - "step": 4766 - }, - { - "epoch": 2.4860495436766623, - "grad_norm": 1.5686134099960327, - "learning_rate": 9.571256281407037e-05, - "loss": 5.7374, - "step": 4767 - }, - { - "epoch": 2.4865710560625813, - "grad_norm": 2.31042742729187, - "learning_rate": 9.571155778894473e-05, - "loss": 5.3739, - "step": 4768 - }, - { - "epoch": 2.487092568448501, - "grad_norm": 1.6043740510940552, - "learning_rate": 9.57105527638191e-05, - "loss": 5.6459, - "step": 4769 - }, - { - "epoch": 2.48761408083442, - "grad_norm": 1.3261629343032837, - "learning_rate": 9.570954773869347e-05, - "loss": 5.8653, - "step": 4770 - }, - { - "epoch": 2.488135593220339, - "grad_norm": 1.441320776939392, - "learning_rate": 9.570854271356784e-05, - "loss": 5.873, - "step": 4771 - }, - { - "epoch": 2.4886571056062583, - "grad_norm": 1.447891354560852, - "learning_rate": 9.570753768844222e-05, - "loss": 6.0027, - "step": 4772 - }, - { - "epoch": 2.4891786179921773, - "grad_norm": 1.6774041652679443, - "learning_rate": 9.570653266331658e-05, - "loss": 5.1079, - "step": 4773 - }, - { - "epoch": 2.4897001303780963, - "grad_norm": 1.5251694917678833, - "learning_rate": 9.570552763819096e-05, - "loss": 5.8963, - "step": 4774 - }, - { - "epoch": 2.490221642764016, - "grad_norm": 1.3824256658554077, - "learning_rate": 9.570452261306532e-05, - "loss": 6.0796, - "step": 4775 - }, - { - "epoch": 2.490743155149935, - "grad_norm": 1.3858067989349365, - "learning_rate": 9.57035175879397e-05, - "loss": 5.8237, - "step": 4776 - }, - { - "epoch": 2.491264667535854, - "grad_norm": 1.51829195022583, - "learning_rate": 9.570251256281408e-05, - "loss": 5.5509, - "step": 4777 - }, - { - "epoch": 2.4917861799217733, - "grad_norm": 1.477586030960083, - "learning_rate": 9.570150753768846e-05, - "loss": 5.6836, - "step": 4778 - }, - { - "epoch": 2.4923076923076923, - "grad_norm": 1.3985092639923096, - "learning_rate": 9.570050251256282e-05, - "loss": 6.1285, - "step": 4779 - }, - { - "epoch": 2.4928292046936114, - "grad_norm": 1.4266798496246338, - "learning_rate": 9.56994974874372e-05, - "loss": 6.2102, - "step": 4780 - }, - { - "epoch": 2.493350717079531, - "grad_norm": 1.603654384613037, - "learning_rate": 9.569849246231156e-05, - "loss": 5.817, - "step": 4781 - }, - { - "epoch": 2.49387222946545, - "grad_norm": 1.3765369653701782, - "learning_rate": 9.569748743718593e-05, - "loss": 6.139, - "step": 4782 - }, - { - "epoch": 2.494393741851369, - "grad_norm": 1.5857133865356445, - "learning_rate": 9.56964824120603e-05, - "loss": 5.8695, - "step": 4783 - }, - { - "epoch": 2.4949152542372883, - "grad_norm": 1.5431194305419922, - "learning_rate": 9.569547738693467e-05, - "loss": 6.276, - "step": 4784 - }, - { - "epoch": 2.4954367666232073, - "grad_norm": 1.583506464958191, - "learning_rate": 9.569447236180905e-05, - "loss": 5.5877, - "step": 4785 - }, - { - "epoch": 2.4959582790091264, - "grad_norm": 1.578717589378357, - "learning_rate": 9.569346733668342e-05, - "loss": 5.7058, - "step": 4786 - }, - { - "epoch": 2.4964797913950454, - "grad_norm": 1.6609982252120972, - "learning_rate": 9.56924623115578e-05, - "loss": 5.4426, - "step": 4787 - }, - { - "epoch": 2.497001303780965, - "grad_norm": 1.5315263271331787, - "learning_rate": 9.569145728643217e-05, - "loss": 5.5813, - "step": 4788 - }, - { - "epoch": 2.497522816166884, - "grad_norm": 1.8720115423202515, - "learning_rate": 9.569045226130654e-05, - "loss": 5.769, - "step": 4789 - }, - { - "epoch": 2.498044328552803, - "grad_norm": 1.5439645051956177, - "learning_rate": 9.568944723618091e-05, - "loss": 6.0571, - "step": 4790 - }, - { - "epoch": 2.4985658409387224, - "grad_norm": 1.4756160974502563, - "learning_rate": 9.568844221105529e-05, - "loss": 6.154, - "step": 4791 - }, - { - "epoch": 2.4990873533246414, - "grad_norm": 1.6719236373901367, - "learning_rate": 9.568743718592965e-05, - "loss": 5.434, - "step": 4792 - }, - { - "epoch": 2.4996088657105604, - "grad_norm": 2.0609192848205566, - "learning_rate": 9.568643216080403e-05, - "loss": 5.8785, - "step": 4793 - }, - { - "epoch": 2.50013037809648, - "grad_norm": 1.601980447769165, - "learning_rate": 9.568542713567839e-05, - "loss": 5.8929, - "step": 4794 - }, - { - "epoch": 2.500651890482399, - "grad_norm": 1.697913408279419, - "learning_rate": 9.568442211055277e-05, - "loss": 5.5296, - "step": 4795 - }, - { - "epoch": 2.501173402868318, - "grad_norm": 1.605042815208435, - "learning_rate": 9.568341708542713e-05, - "loss": 5.5867, - "step": 4796 - }, - { - "epoch": 2.5016949152542374, - "grad_norm": 1.446962833404541, - "learning_rate": 9.568241206030151e-05, - "loss": 6.0315, - "step": 4797 - }, - { - "epoch": 2.5022164276401564, - "grad_norm": 1.5430487394332886, - "learning_rate": 9.568140703517589e-05, - "loss": 5.7864, - "step": 4798 - }, - { - "epoch": 2.5027379400260754, - "grad_norm": 1.6912599802017212, - "learning_rate": 9.568040201005025e-05, - "loss": 5.8151, - "step": 4799 - }, - { - "epoch": 2.503259452411995, - "grad_norm": 1.7032774686813354, - "learning_rate": 9.567939698492463e-05, - "loss": 5.856, - "step": 4800 - }, - { - "epoch": 2.503780964797914, - "grad_norm": 2.432194948196411, - "learning_rate": 9.5678391959799e-05, - "loss": 5.803, - "step": 4801 - }, - { - "epoch": 2.504302477183833, - "grad_norm": 1.4741883277893066, - "learning_rate": 9.567738693467337e-05, - "loss": 5.8889, - "step": 4802 - }, - { - "epoch": 2.5048239895697524, - "grad_norm": 1.4171416759490967, - "learning_rate": 9.567638190954774e-05, - "loss": 5.7609, - "step": 4803 - }, - { - "epoch": 2.5053455019556714, - "grad_norm": 1.400147557258606, - "learning_rate": 9.567537688442212e-05, - "loss": 5.9252, - "step": 4804 - }, - { - "epoch": 2.5058670143415904, - "grad_norm": 1.4549416303634644, - "learning_rate": 9.567437185929648e-05, - "loss": 5.7882, - "step": 4805 - }, - { - "epoch": 2.50638852672751, - "grad_norm": 1.640956163406372, - "learning_rate": 9.567336683417086e-05, - "loss": 5.6103, - "step": 4806 - }, - { - "epoch": 2.506910039113429, - "grad_norm": 1.5338428020477295, - "learning_rate": 9.567236180904524e-05, - "loss": 5.8763, - "step": 4807 - }, - { - "epoch": 2.507431551499348, - "grad_norm": 1.457040786743164, - "learning_rate": 9.567135678391961e-05, - "loss": 6.2281, - "step": 4808 - }, - { - "epoch": 2.5079530638852674, - "grad_norm": 1.5344401597976685, - "learning_rate": 9.567035175879398e-05, - "loss": 5.9402, - "step": 4809 - }, - { - "epoch": 2.5084745762711864, - "grad_norm": 1.7055158615112305, - "learning_rate": 9.566934673366834e-05, - "loss": 5.8292, - "step": 4810 - }, - { - "epoch": 2.5089960886571054, - "grad_norm": 1.4953925609588623, - "learning_rate": 9.566834170854272e-05, - "loss": 6.1041, - "step": 4811 - }, - { - "epoch": 2.509517601043025, - "grad_norm": 1.5839004516601562, - "learning_rate": 9.566733668341708e-05, - "loss": 5.8289, - "step": 4812 - }, - { - "epoch": 2.510039113428944, - "grad_norm": 1.5163946151733398, - "learning_rate": 9.566633165829146e-05, - "loss": 5.2557, - "step": 4813 - }, - { - "epoch": 2.510560625814863, - "grad_norm": 1.5256576538085938, - "learning_rate": 9.566532663316583e-05, - "loss": 6.2139, - "step": 4814 - }, - { - "epoch": 2.5110821382007824, - "grad_norm": 1.6333038806915283, - "learning_rate": 9.56643216080402e-05, - "loss": 5.7649, - "step": 4815 - }, - { - "epoch": 2.5116036505867014, - "grad_norm": 1.4789555072784424, - "learning_rate": 9.566331658291457e-05, - "loss": 5.9014, - "step": 4816 - }, - { - "epoch": 2.5121251629726205, - "grad_norm": 1.6760848760604858, - "learning_rate": 9.566231155778895e-05, - "loss": 5.7538, - "step": 4817 - }, - { - "epoch": 2.51264667535854, - "grad_norm": 1.4620388746261597, - "learning_rate": 9.566130653266332e-05, - "loss": 5.9263, - "step": 4818 - }, - { - "epoch": 2.513168187744459, - "grad_norm": 1.491621971130371, - "learning_rate": 9.56603015075377e-05, - "loss": 6.107, - "step": 4819 - }, - { - "epoch": 2.513689700130378, - "grad_norm": 1.4770426750183105, - "learning_rate": 9.565929648241207e-05, - "loss": 5.9551, - "step": 4820 - }, - { - "epoch": 2.5142112125162974, - "grad_norm": 1.3915717601776123, - "learning_rate": 9.565829145728644e-05, - "loss": 5.9464, - "step": 4821 - }, - { - "epoch": 2.5147327249022164, - "grad_norm": 1.5212647914886475, - "learning_rate": 9.565728643216081e-05, - "loss": 5.9409, - "step": 4822 - }, - { - "epoch": 2.5152542372881355, - "grad_norm": 1.5616729259490967, - "learning_rate": 9.565628140703517e-05, - "loss": 5.487, - "step": 4823 - }, - { - "epoch": 2.515775749674055, - "grad_norm": 1.6819775104522705, - "learning_rate": 9.565527638190955e-05, - "loss": 5.4623, - "step": 4824 - }, - { - "epoch": 2.516297262059974, - "grad_norm": 1.412816047668457, - "learning_rate": 9.565427135678392e-05, - "loss": 5.7539, - "step": 4825 - }, - { - "epoch": 2.516818774445893, - "grad_norm": 1.454261064529419, - "learning_rate": 9.565326633165829e-05, - "loss": 6.1526, - "step": 4826 - }, - { - "epoch": 2.5173402868318124, - "grad_norm": 1.3960659503936768, - "learning_rate": 9.565226130653267e-05, - "loss": 6.3605, - "step": 4827 - }, - { - "epoch": 2.5178617992177315, - "grad_norm": 1.6657761335372925, - "learning_rate": 9.565125628140705e-05, - "loss": 4.9502, - "step": 4828 - }, - { - "epoch": 2.5183833116036505, - "grad_norm": 1.5701698064804077, - "learning_rate": 9.565025125628141e-05, - "loss": 6.1012, - "step": 4829 - }, - { - "epoch": 2.51890482398957, - "grad_norm": 1.5692607164382935, - "learning_rate": 9.564924623115579e-05, - "loss": 5.4425, - "step": 4830 - }, - { - "epoch": 2.519426336375489, - "grad_norm": 1.587478756904602, - "learning_rate": 9.564824120603016e-05, - "loss": 5.3714, - "step": 4831 - }, - { - "epoch": 2.519947848761408, - "grad_norm": 1.453775405883789, - "learning_rate": 9.564723618090453e-05, - "loss": 6.014, - "step": 4832 - }, - { - "epoch": 2.5204693611473274, - "grad_norm": 1.4537262916564941, - "learning_rate": 9.56462311557789e-05, - "loss": 5.9701, - "step": 4833 - }, - { - "epoch": 2.5209908735332465, - "grad_norm": 1.4299412965774536, - "learning_rate": 9.564522613065328e-05, - "loss": 6.1377, - "step": 4834 - }, - { - "epoch": 2.5215123859191655, - "grad_norm": 1.6620330810546875, - "learning_rate": 9.564422110552764e-05, - "loss": 5.7877, - "step": 4835 - }, - { - "epoch": 2.522033898305085, - "grad_norm": 1.4737210273742676, - "learning_rate": 9.5643216080402e-05, - "loss": 5.9387, - "step": 4836 - }, - { - "epoch": 2.522555410691004, - "grad_norm": 1.4558378458023071, - "learning_rate": 9.564221105527638e-05, - "loss": 5.8647, - "step": 4837 - }, - { - "epoch": 2.523076923076923, - "grad_norm": 1.3782732486724854, - "learning_rate": 9.564120603015076e-05, - "loss": 5.9465, - "step": 4838 - }, - { - "epoch": 2.5235984354628425, - "grad_norm": 1.4266347885131836, - "learning_rate": 9.564020100502514e-05, - "loss": 6.1785, - "step": 4839 - }, - { - "epoch": 2.5241199478487615, - "grad_norm": 1.575951337814331, - "learning_rate": 9.56391959798995e-05, - "loss": 5.886, - "step": 4840 - }, - { - "epoch": 2.5246414602346805, - "grad_norm": 1.402287244796753, - "learning_rate": 9.563819095477388e-05, - "loss": 6.1753, - "step": 4841 - }, - { - "epoch": 2.5251629726206, - "grad_norm": 1.5526705980300903, - "learning_rate": 9.563718592964824e-05, - "loss": 5.5445, - "step": 4842 - }, - { - "epoch": 2.525684485006519, - "grad_norm": 1.9571220874786377, - "learning_rate": 9.563618090452262e-05, - "loss": 5.5883, - "step": 4843 - }, - { - "epoch": 2.526205997392438, - "grad_norm": 1.5122853517532349, - "learning_rate": 9.563517587939699e-05, - "loss": 5.6845, - "step": 4844 - }, - { - "epoch": 2.5267275097783575, - "grad_norm": 1.51768159866333, - "learning_rate": 9.563417085427136e-05, - "loss": 5.5043, - "step": 4845 - }, - { - "epoch": 2.5272490221642765, - "grad_norm": 1.6747007369995117, - "learning_rate": 9.563316582914573e-05, - "loss": 5.6175, - "step": 4846 - }, - { - "epoch": 2.5277705345501955, - "grad_norm": 1.3852779865264893, - "learning_rate": 9.56321608040201e-05, - "loss": 5.893, - "step": 4847 - }, - { - "epoch": 2.528292046936115, - "grad_norm": 1.5290780067443848, - "learning_rate": 9.563115577889448e-05, - "loss": 5.6784, - "step": 4848 - }, - { - "epoch": 2.528813559322034, - "grad_norm": 1.4161065816879272, - "learning_rate": 9.563015075376885e-05, - "loss": 5.8204, - "step": 4849 - }, - { - "epoch": 2.529335071707953, - "grad_norm": 1.4041119813919067, - "learning_rate": 9.562914572864323e-05, - "loss": 5.5141, - "step": 4850 - }, - { - "epoch": 2.5298565840938725, - "grad_norm": 1.652336597442627, - "learning_rate": 9.562814070351759e-05, - "loss": 5.8498, - "step": 4851 - }, - { - "epoch": 2.5303780964797915, - "grad_norm": 1.5477467775344849, - "learning_rate": 9.562713567839197e-05, - "loss": 5.3674, - "step": 4852 - }, - { - "epoch": 2.5308996088657105, - "grad_norm": 1.5873663425445557, - "learning_rate": 9.562613065326633e-05, - "loss": 5.9149, - "step": 4853 - }, - { - "epoch": 2.53142112125163, - "grad_norm": 1.652782917022705, - "learning_rate": 9.562512562814071e-05, - "loss": 6.099, - "step": 4854 - }, - { - "epoch": 2.531942633637549, - "grad_norm": 1.3967654705047607, - "learning_rate": 9.562412060301507e-05, - "loss": 6.0186, - "step": 4855 - }, - { - "epoch": 2.532464146023468, - "grad_norm": 1.523650050163269, - "learning_rate": 9.562311557788945e-05, - "loss": 5.6946, - "step": 4856 - }, - { - "epoch": 2.5329856584093875, - "grad_norm": 1.7817187309265137, - "learning_rate": 9.562211055276382e-05, - "loss": 5.7264, - "step": 4857 - }, - { - "epoch": 2.5335071707953065, - "grad_norm": 1.386020541191101, - "learning_rate": 9.56211055276382e-05, - "loss": 6.0123, - "step": 4858 - }, - { - "epoch": 2.5340286831812255, - "grad_norm": 1.3775678873062134, - "learning_rate": 9.562010050251257e-05, - "loss": 6.342, - "step": 4859 - }, - { - "epoch": 2.5345501955671446, - "grad_norm": 1.7185759544372559, - "learning_rate": 9.561909547738695e-05, - "loss": 5.9197, - "step": 4860 - }, - { - "epoch": 2.535071707953064, - "grad_norm": 1.3208074569702148, - "learning_rate": 9.561809045226131e-05, - "loss": 6.4258, - "step": 4861 - }, - { - "epoch": 2.535593220338983, - "grad_norm": 1.3878352642059326, - "learning_rate": 9.561708542713568e-05, - "loss": 5.7574, - "step": 4862 - }, - { - "epoch": 2.536114732724902, - "grad_norm": 1.3933541774749756, - "learning_rate": 9.561608040201006e-05, - "loss": 5.9771, - "step": 4863 - }, - { - "epoch": 2.5366362451108215, - "grad_norm": 1.628386378288269, - "learning_rate": 9.561507537688442e-05, - "loss": 5.9234, - "step": 4864 - }, - { - "epoch": 2.5371577574967406, - "grad_norm": 1.4690829515457153, - "learning_rate": 9.56140703517588e-05, - "loss": 5.7361, - "step": 4865 - }, - { - "epoch": 2.5376792698826596, - "grad_norm": 1.6475598812103271, - "learning_rate": 9.561306532663316e-05, - "loss": 5.643, - "step": 4866 - }, - { - "epoch": 2.538200782268579, - "grad_norm": 1.423583745956421, - "learning_rate": 9.561206030150754e-05, - "loss": 5.9888, - "step": 4867 - }, - { - "epoch": 2.538722294654498, - "grad_norm": 1.5557703971862793, - "learning_rate": 9.561105527638192e-05, - "loss": 5.5081, - "step": 4868 - }, - { - "epoch": 2.539243807040417, - "grad_norm": 1.4079763889312744, - "learning_rate": 9.56100502512563e-05, - "loss": 5.9362, - "step": 4869 - }, - { - "epoch": 2.539765319426336, - "grad_norm": 1.3977445363998413, - "learning_rate": 9.560904522613066e-05, - "loss": 5.9808, - "step": 4870 - }, - { - "epoch": 2.5402868318122556, - "grad_norm": 1.5023306608200073, - "learning_rate": 9.560804020100504e-05, - "loss": 5.756, - "step": 4871 - }, - { - "epoch": 2.5408083441981746, - "grad_norm": 1.4479445219039917, - "learning_rate": 9.56070351758794e-05, - "loss": 5.8998, - "step": 4872 - }, - { - "epoch": 2.5413298565840936, - "grad_norm": 1.3944220542907715, - "learning_rate": 9.560603015075378e-05, - "loss": 5.8181, - "step": 4873 - }, - { - "epoch": 2.541851368970013, - "grad_norm": 1.6817361116409302, - "learning_rate": 9.560502512562814e-05, - "loss": 5.5604, - "step": 4874 - }, - { - "epoch": 2.542372881355932, - "grad_norm": 1.4943729639053345, - "learning_rate": 9.560402010050252e-05, - "loss": 6.3862, - "step": 4875 - }, - { - "epoch": 2.542894393741851, - "grad_norm": 1.6542384624481201, - "learning_rate": 9.560301507537689e-05, - "loss": 5.9109, - "step": 4876 - }, - { - "epoch": 2.5434159061277706, - "grad_norm": 1.7097663879394531, - "learning_rate": 9.560201005025125e-05, - "loss": 5.5542, - "step": 4877 - }, - { - "epoch": 2.5439374185136896, - "grad_norm": 1.5016015768051147, - "learning_rate": 9.560100502512563e-05, - "loss": 5.5871, - "step": 4878 - }, - { - "epoch": 2.5444589308996086, - "grad_norm": 1.5243148803710938, - "learning_rate": 9.56e-05, - "loss": 6.1649, - "step": 4879 - }, - { - "epoch": 2.544980443285528, - "grad_norm": 1.3934766054153442, - "learning_rate": 9.559899497487438e-05, - "loss": 6.0201, - "step": 4880 - }, - { - "epoch": 2.545501955671447, - "grad_norm": 1.353205919265747, - "learning_rate": 9.559798994974875e-05, - "loss": 6.1229, - "step": 4881 - }, - { - "epoch": 2.546023468057366, - "grad_norm": 1.8142156600952148, - "learning_rate": 9.559698492462313e-05, - "loss": 5.572, - "step": 4882 - }, - { - "epoch": 2.5465449804432856, - "grad_norm": 1.532049298286438, - "learning_rate": 9.559597989949749e-05, - "loss": 5.893, - "step": 4883 - }, - { - "epoch": 2.5470664928292046, - "grad_norm": 1.498363733291626, - "learning_rate": 9.559497487437187e-05, - "loss": 5.809, - "step": 4884 - }, - { - "epoch": 2.5475880052151236, - "grad_norm": 1.379697322845459, - "learning_rate": 9.559396984924623e-05, - "loss": 5.6439, - "step": 4885 - }, - { - "epoch": 2.548109517601043, - "grad_norm": 1.4061715602874756, - "learning_rate": 9.559296482412061e-05, - "loss": 5.8238, - "step": 4886 - }, - { - "epoch": 2.548631029986962, - "grad_norm": 1.4150760173797607, - "learning_rate": 9.559195979899497e-05, - "loss": 5.9641, - "step": 4887 - }, - { - "epoch": 2.549152542372881, - "grad_norm": 1.4395184516906738, - "learning_rate": 9.559095477386935e-05, - "loss": 5.2822, - "step": 4888 - }, - { - "epoch": 2.5496740547588006, - "grad_norm": 1.5797990560531616, - "learning_rate": 9.558994974874372e-05, - "loss": 5.9153, - "step": 4889 - }, - { - "epoch": 2.5501955671447196, - "grad_norm": 1.3622783422470093, - "learning_rate": 9.55889447236181e-05, - "loss": 6.1547, - "step": 4890 - }, - { - "epoch": 2.5507170795306386, - "grad_norm": 1.7607394456863403, - "learning_rate": 9.558793969849247e-05, - "loss": 5.386, - "step": 4891 - }, - { - "epoch": 2.551238591916558, - "grad_norm": 1.645982027053833, - "learning_rate": 9.558693467336684e-05, - "loss": 5.7626, - "step": 4892 - }, - { - "epoch": 2.551760104302477, - "grad_norm": 1.4103668928146362, - "learning_rate": 9.558592964824121e-05, - "loss": 5.9749, - "step": 4893 - }, - { - "epoch": 2.552281616688396, - "grad_norm": 1.505454659461975, - "learning_rate": 9.558492462311558e-05, - "loss": 5.9183, - "step": 4894 - }, - { - "epoch": 2.5528031290743156, - "grad_norm": 1.3817424774169922, - "learning_rate": 9.558391959798996e-05, - "loss": 6.1864, - "step": 4895 - }, - { - "epoch": 2.5533246414602346, - "grad_norm": 1.4669145345687866, - "learning_rate": 9.558291457286432e-05, - "loss": 6.0256, - "step": 4896 - }, - { - "epoch": 2.5538461538461537, - "grad_norm": 1.3954702615737915, - "learning_rate": 9.55819095477387e-05, - "loss": 6.2333, - "step": 4897 - }, - { - "epoch": 2.554367666232073, - "grad_norm": 1.648632526397705, - "learning_rate": 9.558090452261306e-05, - "loss": 5.8438, - "step": 4898 - }, - { - "epoch": 2.554889178617992, - "grad_norm": 1.800253987312317, - "learning_rate": 9.557989949748744e-05, - "loss": 5.2236, - "step": 4899 - }, - { - "epoch": 2.555410691003911, - "grad_norm": 1.7411351203918457, - "learning_rate": 9.557889447236182e-05, - "loss": 5.938, - "step": 4900 - }, - { - "epoch": 2.5559322033898306, - "grad_norm": 1.5227670669555664, - "learning_rate": 9.55778894472362e-05, - "loss": 5.787, - "step": 4901 - }, - { - "epoch": 2.5564537157757496, - "grad_norm": 1.5491819381713867, - "learning_rate": 9.557688442211056e-05, - "loss": 5.7506, - "step": 4902 - }, - { - "epoch": 2.5569752281616687, - "grad_norm": 1.4868522882461548, - "learning_rate": 9.557587939698493e-05, - "loss": 5.6488, - "step": 4903 - }, - { - "epoch": 2.557496740547588, - "grad_norm": 1.8262728452682495, - "learning_rate": 9.55748743718593e-05, - "loss": 5.7859, - "step": 4904 - }, - { - "epoch": 2.558018252933507, - "grad_norm": 1.544114112854004, - "learning_rate": 9.557386934673367e-05, - "loss": 5.5315, - "step": 4905 - }, - { - "epoch": 2.558539765319426, - "grad_norm": 1.5082577466964722, - "learning_rate": 9.557286432160804e-05, - "loss": 5.6623, - "step": 4906 - }, - { - "epoch": 2.5590612777053456, - "grad_norm": 1.4401352405548096, - "learning_rate": 9.557185929648241e-05, - "loss": 6.0024, - "step": 4907 - }, - { - "epoch": 2.5595827900912647, - "grad_norm": 1.5661818981170654, - "learning_rate": 9.557085427135679e-05, - "loss": 5.4126, - "step": 4908 - }, - { - "epoch": 2.5601043024771837, - "grad_norm": 1.4719387292861938, - "learning_rate": 9.556984924623115e-05, - "loss": 5.7271, - "step": 4909 - }, - { - "epoch": 2.560625814863103, - "grad_norm": 1.5360444784164429, - "learning_rate": 9.556884422110553e-05, - "loss": 5.9958, - "step": 4910 - }, - { - "epoch": 2.561147327249022, - "grad_norm": 1.5458017587661743, - "learning_rate": 9.556783919597991e-05, - "loss": 5.9146, - "step": 4911 - }, - { - "epoch": 2.561668839634941, - "grad_norm": 1.454142689704895, - "learning_rate": 9.556683417085428e-05, - "loss": 5.6069, - "step": 4912 - }, - { - "epoch": 2.5621903520208607, - "grad_norm": 1.5150444507598877, - "learning_rate": 9.556582914572865e-05, - "loss": 6.0435, - "step": 4913 - }, - { - "epoch": 2.5627118644067797, - "grad_norm": 1.492445707321167, - "learning_rate": 9.556482412060303e-05, - "loss": 5.8739, - "step": 4914 - }, - { - "epoch": 2.5632333767926987, - "grad_norm": 1.5032119750976562, - "learning_rate": 9.556381909547739e-05, - "loss": 5.6988, - "step": 4915 - }, - { - "epoch": 2.563754889178618, - "grad_norm": 1.543714165687561, - "learning_rate": 9.556281407035176e-05, - "loss": 5.8963, - "step": 4916 - }, - { - "epoch": 2.564276401564537, - "grad_norm": 1.3285914659500122, - "learning_rate": 9.556180904522613e-05, - "loss": 6.3071, - "step": 4917 - }, - { - "epoch": 2.564797913950456, - "grad_norm": 1.9831669330596924, - "learning_rate": 9.55608040201005e-05, - "loss": 4.7935, - "step": 4918 - }, - { - "epoch": 2.5653194263363757, - "grad_norm": 1.4202433824539185, - "learning_rate": 9.555979899497488e-05, - "loss": 6.1302, - "step": 4919 - }, - { - "epoch": 2.5658409387222947, - "grad_norm": 1.6701037883758545, - "learning_rate": 9.555879396984925e-05, - "loss": 5.6922, - "step": 4920 - }, - { - "epoch": 2.5663624511082137, - "grad_norm": 1.4507468938827515, - "learning_rate": 9.555778894472363e-05, - "loss": 6.1946, - "step": 4921 - }, - { - "epoch": 2.566883963494133, - "grad_norm": 1.585044026374817, - "learning_rate": 9.5556783919598e-05, - "loss": 5.6405, - "step": 4922 - }, - { - "epoch": 2.567405475880052, - "grad_norm": 1.5023295879364014, - "learning_rate": 9.555577889447237e-05, - "loss": 5.7222, - "step": 4923 - }, - { - "epoch": 2.567926988265971, - "grad_norm": 1.506818413734436, - "learning_rate": 9.555477386934674e-05, - "loss": 6.2653, - "step": 4924 - }, - { - "epoch": 2.5684485006518907, - "grad_norm": 1.4110885858535767, - "learning_rate": 9.555376884422112e-05, - "loss": 6.1549, - "step": 4925 - }, - { - "epoch": 2.5689700130378097, - "grad_norm": 1.5171003341674805, - "learning_rate": 9.555276381909548e-05, - "loss": 6.171, - "step": 4926 - }, - { - "epoch": 2.5694915254237287, - "grad_norm": 1.512742042541504, - "learning_rate": 9.555175879396986e-05, - "loss": 5.8396, - "step": 4927 - }, - { - "epoch": 2.570013037809648, - "grad_norm": 1.3136080503463745, - "learning_rate": 9.555075376884422e-05, - "loss": 6.2355, - "step": 4928 - }, - { - "epoch": 2.570534550195567, - "grad_norm": 1.3885431289672852, - "learning_rate": 9.554974874371859e-05, - "loss": 6.1975, - "step": 4929 - }, - { - "epoch": 2.5710560625814862, - "grad_norm": 1.5574791431427002, - "learning_rate": 9.554874371859296e-05, - "loss": 5.9415, - "step": 4930 - }, - { - "epoch": 2.5715775749674057, - "grad_norm": 1.5767273902893066, - "learning_rate": 9.554773869346734e-05, - "loss": 5.2698, - "step": 4931 - }, - { - "epoch": 2.5720990873533247, - "grad_norm": 1.4684633016586304, - "learning_rate": 9.554673366834172e-05, - "loss": 5.976, - "step": 4932 - }, - { - "epoch": 2.5726205997392437, - "grad_norm": 1.5518721342086792, - "learning_rate": 9.554572864321608e-05, - "loss": 5.7763, - "step": 4933 - }, - { - "epoch": 2.573142112125163, - "grad_norm": 1.42588472366333, - "learning_rate": 9.554472361809046e-05, - "loss": 6.045, - "step": 4934 - }, - { - "epoch": 2.573663624511082, - "grad_norm": 1.3854042291641235, - "learning_rate": 9.554371859296483e-05, - "loss": 6.1132, - "step": 4935 - }, - { - "epoch": 2.5741851368970012, - "grad_norm": 1.5302084684371948, - "learning_rate": 9.55427135678392e-05, - "loss": 5.8721, - "step": 4936 - }, - { - "epoch": 2.5747066492829207, - "grad_norm": 1.9314842224121094, - "learning_rate": 9.554170854271357e-05, - "loss": 5.1593, - "step": 4937 - }, - { - "epoch": 2.5752281616688397, - "grad_norm": 1.5143386125564575, - "learning_rate": 9.554070351758795e-05, - "loss": 6.1635, - "step": 4938 - }, - { - "epoch": 2.5757496740547587, - "grad_norm": 1.7665318250656128, - "learning_rate": 9.553969849246231e-05, - "loss": 5.8289, - "step": 4939 - }, - { - "epoch": 2.576271186440678, - "grad_norm": 1.4895192384719849, - "learning_rate": 9.553869346733669e-05, - "loss": 5.7266, - "step": 4940 - }, - { - "epoch": 2.5767926988265972, - "grad_norm": 1.491462230682373, - "learning_rate": 9.553768844221107e-05, - "loss": 6.2341, - "step": 4941 - }, - { - "epoch": 2.5773142112125162, - "grad_norm": 1.443203091621399, - "learning_rate": 9.553668341708543e-05, - "loss": 6.0857, - "step": 4942 - }, - { - "epoch": 2.5778357235984357, - "grad_norm": 1.5826008319854736, - "learning_rate": 9.553567839195981e-05, - "loss": 5.712, - "step": 4943 - }, - { - "epoch": 2.5783572359843547, - "grad_norm": 1.6070513725280762, - "learning_rate": 9.553467336683417e-05, - "loss": 5.5146, - "step": 4944 - }, - { - "epoch": 2.5788787483702738, - "grad_norm": 1.7267638444900513, - "learning_rate": 9.553366834170855e-05, - "loss": 5.3383, - "step": 4945 - }, - { - "epoch": 2.579400260756193, - "grad_norm": 1.65509831905365, - "learning_rate": 9.553266331658291e-05, - "loss": 5.8969, - "step": 4946 - }, - { - "epoch": 2.5799217731421122, - "grad_norm": 1.5705424547195435, - "learning_rate": 9.553165829145729e-05, - "loss": 6.2231, - "step": 4947 - }, - { - "epoch": 2.5804432855280313, - "grad_norm": 1.3533644676208496, - "learning_rate": 9.553065326633166e-05, - "loss": 6.1414, - "step": 4948 - }, - { - "epoch": 2.5809647979139507, - "grad_norm": 1.3411778211593628, - "learning_rate": 9.552964824120603e-05, - "loss": 5.8285, - "step": 4949 - }, - { - "epoch": 2.5814863102998697, - "grad_norm": 1.6119413375854492, - "learning_rate": 9.55286432160804e-05, - "loss": 5.6827, - "step": 4950 - }, - { - "epoch": 2.5820078226857888, - "grad_norm": 1.5210593938827515, - "learning_rate": 9.552763819095478e-05, - "loss": 5.9937, - "step": 4951 - }, - { - "epoch": 2.582529335071708, - "grad_norm": 1.3873943090438843, - "learning_rate": 9.552663316582915e-05, - "loss": 6.1973, - "step": 4952 - }, - { - "epoch": 2.5830508474576273, - "grad_norm": 1.7004578113555908, - "learning_rate": 9.552562814070353e-05, - "loss": 5.7198, - "step": 4953 - }, - { - "epoch": 2.5835723598435463, - "grad_norm": 1.4841238260269165, - "learning_rate": 9.55246231155779e-05, - "loss": 5.9674, - "step": 4954 - }, - { - "epoch": 2.5840938722294653, - "grad_norm": 1.6378461122512817, - "learning_rate": 9.552361809045226e-05, - "loss": 5.6279, - "step": 4955 - }, - { - "epoch": 2.5846153846153848, - "grad_norm": 1.4548827409744263, - "learning_rate": 9.552261306532664e-05, - "loss": 5.8073, - "step": 4956 - }, - { - "epoch": 2.585136897001304, - "grad_norm": 1.4955155849456787, - "learning_rate": 9.5521608040201e-05, - "loss": 6.1286, - "step": 4957 - }, - { - "epoch": 2.585658409387223, - "grad_norm": 1.456142783164978, - "learning_rate": 9.552060301507538e-05, - "loss": 6.2438, - "step": 4958 - }, - { - "epoch": 2.5861799217731423, - "grad_norm": 1.8174327611923218, - "learning_rate": 9.551959798994974e-05, - "loss": 5.1813, - "step": 4959 - }, - { - "epoch": 2.5867014341590613, - "grad_norm": 1.562380075454712, - "learning_rate": 9.551859296482412e-05, - "loss": 5.9697, - "step": 4960 - }, - { - "epoch": 2.5872229465449803, - "grad_norm": 1.456675410270691, - "learning_rate": 9.55175879396985e-05, - "loss": 5.9005, - "step": 4961 - }, - { - "epoch": 2.5877444589308998, - "grad_norm": 1.533780574798584, - "learning_rate": 9.551658291457288e-05, - "loss": 5.5308, - "step": 4962 - }, - { - "epoch": 2.588265971316819, - "grad_norm": 1.4614955186843872, - "learning_rate": 9.551557788944724e-05, - "loss": 6.1012, - "step": 4963 - }, - { - "epoch": 2.588787483702738, - "grad_norm": 1.6414686441421509, - "learning_rate": 9.551457286432162e-05, - "loss": 5.7823, - "step": 4964 - }, - { - "epoch": 2.589308996088657, - "grad_norm": 1.6913644075393677, - "learning_rate": 9.551356783919598e-05, - "loss": 6.14, - "step": 4965 - }, - { - "epoch": 2.5898305084745763, - "grad_norm": 1.6518419981002808, - "learning_rate": 9.551256281407036e-05, - "loss": 5.7146, - "step": 4966 - }, - { - "epoch": 2.5903520208604953, - "grad_norm": 1.5552659034729004, - "learning_rate": 9.551155778894473e-05, - "loss": 6.2022, - "step": 4967 - }, - { - "epoch": 2.5908735332464143, - "grad_norm": 1.525368571281433, - "learning_rate": 9.55105527638191e-05, - "loss": 5.6289, - "step": 4968 - }, - { - "epoch": 2.591395045632334, - "grad_norm": 1.6114280223846436, - "learning_rate": 9.550954773869347e-05, - "loss": 5.5026, - "step": 4969 - }, - { - "epoch": 2.591916558018253, - "grad_norm": 1.5574963092803955, - "learning_rate": 9.550854271356783e-05, - "loss": 5.8758, - "step": 4970 - }, - { - "epoch": 2.592438070404172, - "grad_norm": 1.4775002002716064, - "learning_rate": 9.550753768844221e-05, - "loss": 5.7687, - "step": 4971 - }, - { - "epoch": 2.5929595827900913, - "grad_norm": 1.3690000772476196, - "learning_rate": 9.550653266331659e-05, - "loss": 6.1546, - "step": 4972 - }, - { - "epoch": 2.5934810951760103, - "grad_norm": 1.43038010597229, - "learning_rate": 9.550552763819097e-05, - "loss": 6.0218, - "step": 4973 - }, - { - "epoch": 2.5940026075619294, - "grad_norm": 1.4997000694274902, - "learning_rate": 9.550452261306533e-05, - "loss": 5.9948, - "step": 4974 - }, - { - "epoch": 2.594524119947849, - "grad_norm": 1.4385737180709839, - "learning_rate": 9.550351758793971e-05, - "loss": 6.0313, - "step": 4975 - }, - { - "epoch": 2.595045632333768, - "grad_norm": 1.5429884195327759, - "learning_rate": 9.550251256281407e-05, - "loss": 6.1105, - "step": 4976 - }, - { - "epoch": 2.595567144719687, - "grad_norm": 1.461922526359558, - "learning_rate": 9.550150753768845e-05, - "loss": 6.0955, - "step": 4977 - }, - { - "epoch": 2.5960886571056063, - "grad_norm": 1.4797965288162231, - "learning_rate": 9.550050251256281e-05, - "loss": 5.7342, - "step": 4978 - }, - { - "epoch": 2.5966101694915253, - "grad_norm": 1.434741497039795, - "learning_rate": 9.549949748743719e-05, - "loss": 6.1408, - "step": 4979 - }, - { - "epoch": 2.5971316818774444, - "grad_norm": 1.3131860494613647, - "learning_rate": 9.549849246231156e-05, - "loss": 6.1472, - "step": 4980 - }, - { - "epoch": 2.597653194263364, - "grad_norm": 1.3303923606872559, - "learning_rate": 9.549748743718593e-05, - "loss": 5.994, - "step": 4981 - }, - { - "epoch": 2.598174706649283, - "grad_norm": 1.5733730792999268, - "learning_rate": 9.549648241206031e-05, - "loss": 5.9427, - "step": 4982 - }, - { - "epoch": 2.598696219035202, - "grad_norm": 1.4695848226547241, - "learning_rate": 9.549547738693468e-05, - "loss": 5.7732, - "step": 4983 - }, - { - "epoch": 2.5992177314211213, - "grad_norm": 1.4286550283432007, - "learning_rate": 9.549447236180905e-05, - "loss": 5.9407, - "step": 4984 - }, - { - "epoch": 2.5997392438070404, - "grad_norm": 1.4792206287384033, - "learning_rate": 9.549346733668342e-05, - "loss": 5.8254, - "step": 4985 - }, - { - "epoch": 2.6002607561929594, - "grad_norm": 1.4887049198150635, - "learning_rate": 9.54924623115578e-05, - "loss": 6.0146, - "step": 4986 - }, - { - "epoch": 2.600782268578879, - "grad_norm": 1.3689192533493042, - "learning_rate": 9.549145728643216e-05, - "loss": 5.6423, - "step": 4987 - }, - { - "epoch": 2.601303780964798, - "grad_norm": 1.4413626194000244, - "learning_rate": 9.549045226130654e-05, - "loss": 5.9723, - "step": 4988 - }, - { - "epoch": 2.601825293350717, - "grad_norm": 1.3952202796936035, - "learning_rate": 9.54894472361809e-05, - "loss": 5.8967, - "step": 4989 - }, - { - "epoch": 2.6023468057366363, - "grad_norm": 1.548154354095459, - "learning_rate": 9.548844221105528e-05, - "loss": 5.8004, - "step": 4990 - }, - { - "epoch": 2.6028683181225554, - "grad_norm": 1.4952946901321411, - "learning_rate": 9.548743718592965e-05, - "loss": 5.6286, - "step": 4991 - }, - { - "epoch": 2.6033898305084744, - "grad_norm": 1.5337224006652832, - "learning_rate": 9.548643216080402e-05, - "loss": 5.627, - "step": 4992 - }, - { - "epoch": 2.603911342894394, - "grad_norm": 1.517376184463501, - "learning_rate": 9.54854271356784e-05, - "loss": 6.2875, - "step": 4993 - }, - { - "epoch": 2.604432855280313, - "grad_norm": 1.521404504776001, - "learning_rate": 9.548442211055278e-05, - "loss": 6.0356, - "step": 4994 - }, - { - "epoch": 2.604954367666232, - "grad_norm": 1.956773042678833, - "learning_rate": 9.548341708542714e-05, - "loss": 5.4372, - "step": 4995 - }, - { - "epoch": 2.6054758800521514, - "grad_norm": 1.4439547061920166, - "learning_rate": 9.548241206030151e-05, - "loss": 6.24, - "step": 4996 - }, - { - "epoch": 2.6059973924380704, - "grad_norm": 1.5614346265792847, - "learning_rate": 9.548140703517589e-05, - "loss": 5.7138, - "step": 4997 - }, - { - "epoch": 2.6065189048239894, - "grad_norm": 1.5995956659317017, - "learning_rate": 9.548040201005025e-05, - "loss": 5.7402, - "step": 4998 - }, - { - "epoch": 2.607040417209909, - "grad_norm": 1.6731740236282349, - "learning_rate": 9.547939698492463e-05, - "loss": 5.6629, - "step": 4999 - }, - { - "epoch": 2.607561929595828, - "grad_norm": 1.6680164337158203, - "learning_rate": 9.547839195979899e-05, - "loss": 5.5758, - "step": 5000 - }, - { - "epoch": 2.607561929595828, - "eval_loss": 5.870407581329346, - "eval_runtime": 42.6912, - "eval_samples_per_second": 28.718, - "eval_steps_per_second": 3.607, - "step": 5000 - }, - { - "epoch": 2.608083441981747, - "grad_norm": 1.472576379776001, - "learning_rate": 9.547738693467337e-05, - "loss": 5.9558, - "step": 5001 - }, - { - "epoch": 2.6086049543676664, - "grad_norm": 1.6486186981201172, - "learning_rate": 9.547638190954775e-05, - "loss": 5.9835, - "step": 5002 - }, - { - "epoch": 2.6091264667535854, - "grad_norm": 1.7186744213104248, - "learning_rate": 9.547537688442213e-05, - "loss": 5.8526, - "step": 5003 - }, - { - "epoch": 2.6096479791395044, - "grad_norm": 1.8182179927825928, - "learning_rate": 9.547437185929649e-05, - "loss": 5.8459, - "step": 5004 - }, - { - "epoch": 2.610169491525424, - "grad_norm": 1.5836211442947388, - "learning_rate": 9.547336683417087e-05, - "loss": 5.7555, - "step": 5005 - }, - { - "epoch": 2.610691003911343, - "grad_norm": 1.366097092628479, - "learning_rate": 9.547236180904523e-05, - "loss": 5.4636, - "step": 5006 - }, - { - "epoch": 2.611212516297262, - "grad_norm": 1.4980669021606445, - "learning_rate": 9.547135678391961e-05, - "loss": 5.9617, - "step": 5007 - }, - { - "epoch": 2.6117340286831814, - "grad_norm": 1.5618220567703247, - "learning_rate": 9.547035175879397e-05, - "loss": 5.6871, - "step": 5008 - }, - { - "epoch": 2.6122555410691004, - "grad_norm": 1.6605079174041748, - "learning_rate": 9.546934673366834e-05, - "loss": 5.9256, - "step": 5009 - }, - { - "epoch": 2.6127770534550194, - "grad_norm": 1.5125423669815063, - "learning_rate": 9.546834170854272e-05, - "loss": 6.1618, - "step": 5010 - }, - { - "epoch": 2.613298565840939, - "grad_norm": 1.5087411403656006, - "learning_rate": 9.546733668341708e-05, - "loss": 5.7258, - "step": 5011 - }, - { - "epoch": 2.613820078226858, - "grad_norm": 1.5925954580307007, - "learning_rate": 9.546633165829146e-05, - "loss": 6.0616, - "step": 5012 - }, - { - "epoch": 2.614341590612777, - "grad_norm": 1.599152684211731, - "learning_rate": 9.546532663316584e-05, - "loss": 5.9506, - "step": 5013 - }, - { - "epoch": 2.6148631029986964, - "grad_norm": 1.51369309425354, - "learning_rate": 9.546432160804021e-05, - "loss": 6.0551, - "step": 5014 - }, - { - "epoch": 2.6153846153846154, - "grad_norm": 1.5197774171829224, - "learning_rate": 9.546331658291458e-05, - "loss": 5.7489, - "step": 5015 - }, - { - "epoch": 2.6159061277705344, - "grad_norm": 1.4035998582839966, - "learning_rate": 9.546231155778896e-05, - "loss": 6.1557, - "step": 5016 - }, - { - "epoch": 2.616427640156454, - "grad_norm": 1.3745861053466797, - "learning_rate": 9.546130653266332e-05, - "loss": 5.994, - "step": 5017 - }, - { - "epoch": 2.616949152542373, - "grad_norm": 1.6477463245391846, - "learning_rate": 9.54603015075377e-05, - "loss": 5.7367, - "step": 5018 - }, - { - "epoch": 2.617470664928292, - "grad_norm": 1.6669809818267822, - "learning_rate": 9.545929648241206e-05, - "loss": 5.7648, - "step": 5019 - }, - { - "epoch": 2.6179921773142114, - "grad_norm": 1.3900141716003418, - "learning_rate": 9.545829145728644e-05, - "loss": 5.9756, - "step": 5020 - }, - { - "epoch": 2.6185136897001304, - "grad_norm": 1.6460955142974854, - "learning_rate": 9.54572864321608e-05, - "loss": 5.465, - "step": 5021 - }, - { - "epoch": 2.6190352020860495, - "grad_norm": 1.5359174013137817, - "learning_rate": 9.545628140703518e-05, - "loss": 5.9153, - "step": 5022 - }, - { - "epoch": 2.619556714471969, - "grad_norm": 1.3302761316299438, - "learning_rate": 9.545527638190956e-05, - "loss": 6.1481, - "step": 5023 - }, - { - "epoch": 2.620078226857888, - "grad_norm": 1.5097354650497437, - "learning_rate": 9.545427135678392e-05, - "loss": 5.9412, - "step": 5024 - }, - { - "epoch": 2.620599739243807, - "grad_norm": 1.4498836994171143, - "learning_rate": 9.54532663316583e-05, - "loss": 6.0881, - "step": 5025 - }, - { - "epoch": 2.6211212516297264, - "grad_norm": 1.3311947584152222, - "learning_rate": 9.545226130653267e-05, - "loss": 6.0251, - "step": 5026 - }, - { - "epoch": 2.6216427640156454, - "grad_norm": 1.3834559917449951, - "learning_rate": 9.545125628140704e-05, - "loss": 5.1921, - "step": 5027 - }, - { - "epoch": 2.6221642764015645, - "grad_norm": 1.5339542627334595, - "learning_rate": 9.545025125628141e-05, - "loss": 5.9142, - "step": 5028 - }, - { - "epoch": 2.622685788787484, - "grad_norm": 1.3102749586105347, - "learning_rate": 9.544924623115579e-05, - "loss": 6.201, - "step": 5029 - }, - { - "epoch": 2.623207301173403, - "grad_norm": 1.4846876859664917, - "learning_rate": 9.544824120603015e-05, - "loss": 5.789, - "step": 5030 - }, - { - "epoch": 2.623728813559322, - "grad_norm": 1.4042407274246216, - "learning_rate": 9.544723618090453e-05, - "loss": 6.146, - "step": 5031 - }, - { - "epoch": 2.6242503259452414, - "grad_norm": 1.5999616384506226, - "learning_rate": 9.544623115577889e-05, - "loss": 5.6465, - "step": 5032 - }, - { - "epoch": 2.6247718383311605, - "grad_norm": 1.5846381187438965, - "learning_rate": 9.544522613065327e-05, - "loss": 5.5475, - "step": 5033 - }, - { - "epoch": 2.6252933507170795, - "grad_norm": 1.3971928358078003, - "learning_rate": 9.544422110552765e-05, - "loss": 6.2333, - "step": 5034 - }, - { - "epoch": 2.625814863102999, - "grad_norm": 1.4666469097137451, - "learning_rate": 9.544321608040201e-05, - "loss": 5.4194, - "step": 5035 - }, - { - "epoch": 2.626336375488918, - "grad_norm": 1.5029730796813965, - "learning_rate": 9.544221105527639e-05, - "loss": 5.7225, - "step": 5036 - }, - { - "epoch": 2.626857887874837, - "grad_norm": 1.3892558813095093, - "learning_rate": 9.544120603015075e-05, - "loss": 6.0422, - "step": 5037 - }, - { - "epoch": 2.6273794002607564, - "grad_norm": 1.5366686582565308, - "learning_rate": 9.544020100502513e-05, - "loss": 5.9961, - "step": 5038 - }, - { - "epoch": 2.6279009126466755, - "grad_norm": 1.545335054397583, - "learning_rate": 9.54391959798995e-05, - "loss": 5.7841, - "step": 5039 - }, - { - "epoch": 2.6284224250325945, - "grad_norm": 1.6340769529342651, - "learning_rate": 9.543819095477387e-05, - "loss": 5.5915, - "step": 5040 - }, - { - "epoch": 2.628943937418514, - "grad_norm": 1.677231788635254, - "learning_rate": 9.543718592964824e-05, - "loss": 5.713, - "step": 5041 - }, - { - "epoch": 2.629465449804433, - "grad_norm": 1.5500590801239014, - "learning_rate": 9.543618090452262e-05, - "loss": 5.8206, - "step": 5042 - }, - { - "epoch": 2.629986962190352, - "grad_norm": 1.632042407989502, - "learning_rate": 9.543517587939698e-05, - "loss": 5.4749, - "step": 5043 - }, - { - "epoch": 2.6305084745762715, - "grad_norm": 1.4213882684707642, - "learning_rate": 9.543417085427136e-05, - "loss": 5.806, - "step": 5044 - }, - { - "epoch": 2.6310299869621905, - "grad_norm": 1.5072643756866455, - "learning_rate": 9.543316582914574e-05, - "loss": 5.9314, - "step": 5045 - }, - { - "epoch": 2.6315514993481095, - "grad_norm": 1.4365506172180176, - "learning_rate": 9.543216080402011e-05, - "loss": 5.9133, - "step": 5046 - }, - { - "epoch": 2.6320730117340285, - "grad_norm": 1.436258316040039, - "learning_rate": 9.543115577889448e-05, - "loss": 6.0337, - "step": 5047 - }, - { - "epoch": 2.632594524119948, - "grad_norm": 1.5551789999008179, - "learning_rate": 9.543015075376884e-05, - "loss": 5.9603, - "step": 5048 - }, - { - "epoch": 2.633116036505867, - "grad_norm": 1.539718747138977, - "learning_rate": 9.542914572864322e-05, - "loss": 5.6086, - "step": 5049 - }, - { - "epoch": 2.633637548891786, - "grad_norm": 1.736962080001831, - "learning_rate": 9.542814070351758e-05, - "loss": 5.7569, - "step": 5050 - }, - { - "epoch": 2.6341590612777055, - "grad_norm": 1.513782024383545, - "learning_rate": 9.542713567839196e-05, - "loss": 5.7692, - "step": 5051 - }, - { - "epoch": 2.6346805736636245, - "grad_norm": 1.8075569868087769, - "learning_rate": 9.542613065326633e-05, - "loss": 5.5231, - "step": 5052 - }, - { - "epoch": 2.6352020860495435, - "grad_norm": 1.522943377494812, - "learning_rate": 9.54251256281407e-05, - "loss": 5.656, - "step": 5053 - }, - { - "epoch": 2.635723598435463, - "grad_norm": 1.5258946418762207, - "learning_rate": 9.542412060301508e-05, - "loss": 5.6287, - "step": 5054 - }, - { - "epoch": 2.636245110821382, - "grad_norm": 1.4363160133361816, - "learning_rate": 9.542311557788946e-05, - "loss": 5.8832, - "step": 5055 - }, - { - "epoch": 2.636766623207301, - "grad_norm": 1.6591776609420776, - "learning_rate": 9.542211055276382e-05, - "loss": 5.7782, - "step": 5056 - }, - { - "epoch": 2.63728813559322, - "grad_norm": 1.5033172369003296, - "learning_rate": 9.54211055276382e-05, - "loss": 5.529, - "step": 5057 - }, - { - "epoch": 2.6378096479791395, - "grad_norm": 1.5052874088287354, - "learning_rate": 9.542010050251257e-05, - "loss": 6.0534, - "step": 5058 - }, - { - "epoch": 2.6383311603650585, - "grad_norm": 1.443816900253296, - "learning_rate": 9.541909547738694e-05, - "loss": 6.0019, - "step": 5059 - }, - { - "epoch": 2.6388526727509776, - "grad_norm": 1.3907922506332397, - "learning_rate": 9.541809045226131e-05, - "loss": 6.1866, - "step": 5060 - }, - { - "epoch": 2.639374185136897, - "grad_norm": 1.5504872798919678, - "learning_rate": 9.541708542713569e-05, - "loss": 6.1834, - "step": 5061 - }, - { - "epoch": 2.639895697522816, - "grad_norm": 1.9608761072158813, - "learning_rate": 9.541608040201005e-05, - "loss": 5.7587, - "step": 5062 - }, - { - "epoch": 2.640417209908735, - "grad_norm": 1.4263288974761963, - "learning_rate": 9.541507537688442e-05, - "loss": 6.0363, - "step": 5063 - }, - { - "epoch": 2.6409387222946545, - "grad_norm": 1.5398608446121216, - "learning_rate": 9.541407035175879e-05, - "loss": 5.8031, - "step": 5064 - }, - { - "epoch": 2.6414602346805736, - "grad_norm": 1.4484831094741821, - "learning_rate": 9.541306532663317e-05, - "loss": 6.164, - "step": 5065 - }, - { - "epoch": 2.6419817470664926, - "grad_norm": 1.34367835521698, - "learning_rate": 9.541206030150755e-05, - "loss": 6.0677, - "step": 5066 - }, - { - "epoch": 2.642503259452412, - "grad_norm": 1.5244886875152588, - "learning_rate": 9.541105527638191e-05, - "loss": 5.6891, - "step": 5067 - }, - { - "epoch": 2.643024771838331, - "grad_norm": 1.5506577491760254, - "learning_rate": 9.541005025125629e-05, - "loss": 5.5522, - "step": 5068 - }, - { - "epoch": 2.64354628422425, - "grad_norm": 1.6631300449371338, - "learning_rate": 9.540904522613066e-05, - "loss": 5.4891, - "step": 5069 - }, - { - "epoch": 2.6440677966101696, - "grad_norm": 1.4600266218185425, - "learning_rate": 9.540804020100503e-05, - "loss": 5.6986, - "step": 5070 - }, - { - "epoch": 2.6445893089960886, - "grad_norm": 1.6264759302139282, - "learning_rate": 9.54070351758794e-05, - "loss": 5.4695, - "step": 5071 - }, - { - "epoch": 2.6451108213820076, - "grad_norm": 1.4172817468643188, - "learning_rate": 9.540603015075378e-05, - "loss": 5.8627, - "step": 5072 - }, - { - "epoch": 2.645632333767927, - "grad_norm": 1.5001908540725708, - "learning_rate": 9.540502512562814e-05, - "loss": 5.7113, - "step": 5073 - }, - { - "epoch": 2.646153846153846, - "grad_norm": 1.6415660381317139, - "learning_rate": 9.540402010050252e-05, - "loss": 5.4965, - "step": 5074 - }, - { - "epoch": 2.646675358539765, - "grad_norm": 1.3839116096496582, - "learning_rate": 9.54030150753769e-05, - "loss": 5.9701, - "step": 5075 - }, - { - "epoch": 2.6471968709256846, - "grad_norm": 1.4515818357467651, - "learning_rate": 9.540201005025126e-05, - "loss": 6.1795, - "step": 5076 - }, - { - "epoch": 2.6477183833116036, - "grad_norm": 1.5052530765533447, - "learning_rate": 9.540100502512564e-05, - "loss": 5.8367, - "step": 5077 - }, - { - "epoch": 2.6482398956975226, - "grad_norm": 1.445517659187317, - "learning_rate": 9.54e-05, - "loss": 5.8987, - "step": 5078 - }, - { - "epoch": 2.648761408083442, - "grad_norm": 1.46699059009552, - "learning_rate": 9.539899497487438e-05, - "loss": 6.0236, - "step": 5079 - }, - { - "epoch": 2.649282920469361, - "grad_norm": 1.8584587574005127, - "learning_rate": 9.539798994974874e-05, - "loss": 5.1286, - "step": 5080 - }, - { - "epoch": 2.64980443285528, - "grad_norm": 1.3936880826950073, - "learning_rate": 9.539698492462312e-05, - "loss": 6.0714, - "step": 5081 - }, - { - "epoch": 2.6503259452411996, - "grad_norm": 2.976459264755249, - "learning_rate": 9.539597989949749e-05, - "loss": 4.9812, - "step": 5082 - }, - { - "epoch": 2.6508474576271186, - "grad_norm": 1.6329282522201538, - "learning_rate": 9.539497487437186e-05, - "loss": 5.9051, - "step": 5083 - }, - { - "epoch": 2.6513689700130376, - "grad_norm": 1.4490526914596558, - "learning_rate": 9.539396984924623e-05, - "loss": 5.8654, - "step": 5084 - }, - { - "epoch": 2.651890482398957, - "grad_norm": 1.683050274848938, - "learning_rate": 9.53929648241206e-05, - "loss": 5.5123, - "step": 5085 - }, - { - "epoch": 2.652411994784876, - "grad_norm": 1.479514241218567, - "learning_rate": 9.539195979899498e-05, - "loss": 6.0158, - "step": 5086 - }, - { - "epoch": 2.652933507170795, - "grad_norm": 1.6067718267440796, - "learning_rate": 9.539095477386936e-05, - "loss": 5.8476, - "step": 5087 - }, - { - "epoch": 2.6534550195567146, - "grad_norm": 1.6182746887207031, - "learning_rate": 9.538994974874373e-05, - "loss": 5.991, - "step": 5088 - }, - { - "epoch": 2.6539765319426336, - "grad_norm": 1.6602853536605835, - "learning_rate": 9.538894472361809e-05, - "loss": 5.8801, - "step": 5089 - }, - { - "epoch": 2.6544980443285526, - "grad_norm": 1.5242979526519775, - "learning_rate": 9.538793969849247e-05, - "loss": 6.0922, - "step": 5090 - }, - { - "epoch": 2.655019556714472, - "grad_norm": 1.442383885383606, - "learning_rate": 9.538693467336683e-05, - "loss": 5.8011, - "step": 5091 - }, - { - "epoch": 2.655541069100391, - "grad_norm": 1.4333420991897583, - "learning_rate": 9.538592964824121e-05, - "loss": 5.7155, - "step": 5092 - }, - { - "epoch": 2.65606258148631, - "grad_norm": 1.4986058473587036, - "learning_rate": 9.538492462311557e-05, - "loss": 5.5191, - "step": 5093 - }, - { - "epoch": 2.6565840938722296, - "grad_norm": 1.3976603746414185, - "learning_rate": 9.538391959798995e-05, - "loss": 6.0328, - "step": 5094 - }, - { - "epoch": 2.6571056062581486, - "grad_norm": 1.400056004524231, - "learning_rate": 9.538291457286433e-05, - "loss": 6.127, - "step": 5095 - }, - { - "epoch": 2.6576271186440676, - "grad_norm": 1.5347181558609009, - "learning_rate": 9.538190954773871e-05, - "loss": 5.5926, - "step": 5096 - }, - { - "epoch": 2.658148631029987, - "grad_norm": 1.5451070070266724, - "learning_rate": 9.538090452261307e-05, - "loss": 5.6362, - "step": 5097 - }, - { - "epoch": 2.658670143415906, - "grad_norm": 1.563220739364624, - "learning_rate": 9.537989949748745e-05, - "loss": 6.0911, - "step": 5098 - }, - { - "epoch": 2.659191655801825, - "grad_norm": 1.447637915611267, - "learning_rate": 9.537889447236181e-05, - "loss": 5.7648, - "step": 5099 - }, - { - "epoch": 2.6597131681877446, - "grad_norm": 1.5903068780899048, - "learning_rate": 9.537788944723619e-05, - "loss": 5.8409, - "step": 5100 - }, - { - "epoch": 2.6602346805736636, - "grad_norm": 1.5306520462036133, - "learning_rate": 9.537688442211056e-05, - "loss": 5.929, - "step": 5101 - }, - { - "epoch": 2.6607561929595827, - "grad_norm": 1.5478739738464355, - "learning_rate": 9.537587939698492e-05, - "loss": 5.774, - "step": 5102 - }, - { - "epoch": 2.661277705345502, - "grad_norm": 1.6153430938720703, - "learning_rate": 9.53748743718593e-05, - "loss": 5.6799, - "step": 5103 - }, - { - "epoch": 2.661799217731421, - "grad_norm": 1.8335466384887695, - "learning_rate": 9.537386934673366e-05, - "loss": 5.3785, - "step": 5104 - }, - { - "epoch": 2.66232073011734, - "grad_norm": 1.7194207906723022, - "learning_rate": 9.537286432160804e-05, - "loss": 5.7453, - "step": 5105 - }, - { - "epoch": 2.6628422425032596, - "grad_norm": 1.543682336807251, - "learning_rate": 9.537185929648242e-05, - "loss": 5.8407, - "step": 5106 - }, - { - "epoch": 2.6633637548891786, - "grad_norm": 1.4730786085128784, - "learning_rate": 9.53708542713568e-05, - "loss": 5.7341, - "step": 5107 - }, - { - "epoch": 2.6638852672750977, - "grad_norm": 1.3241785764694214, - "learning_rate": 9.536984924623116e-05, - "loss": 6.1994, - "step": 5108 - }, - { - "epoch": 2.664406779661017, - "grad_norm": 1.4168648719787598, - "learning_rate": 9.536884422110554e-05, - "loss": 5.6842, - "step": 5109 - }, - { - "epoch": 2.664928292046936, - "grad_norm": 1.5501210689544678, - "learning_rate": 9.53678391959799e-05, - "loss": 5.6424, - "step": 5110 - }, - { - "epoch": 2.665449804432855, - "grad_norm": 1.5539036989212036, - "learning_rate": 9.536683417085428e-05, - "loss": 5.6763, - "step": 5111 - }, - { - "epoch": 2.6659713168187746, - "grad_norm": 1.6299200057983398, - "learning_rate": 9.536582914572864e-05, - "loss": 5.732, - "step": 5112 - }, - { - "epoch": 2.6664928292046937, - "grad_norm": 1.5042723417282104, - "learning_rate": 9.536482412060302e-05, - "loss": 5.9362, - "step": 5113 - }, - { - "epoch": 2.6670143415906127, - "grad_norm": 1.5157870054244995, - "learning_rate": 9.536381909547739e-05, - "loss": 5.3122, - "step": 5114 - }, - { - "epoch": 2.667535853976532, - "grad_norm": 1.6283293962478638, - "learning_rate": 9.536281407035176e-05, - "loss": 5.8761, - "step": 5115 - }, - { - "epoch": 2.668057366362451, - "grad_norm": 1.4683616161346436, - "learning_rate": 9.536180904522614e-05, - "loss": 6.0856, - "step": 5116 - }, - { - "epoch": 2.66857887874837, - "grad_norm": 1.3522953987121582, - "learning_rate": 9.53608040201005e-05, - "loss": 6.1401, - "step": 5117 - }, - { - "epoch": 2.6691003911342897, - "grad_norm": 1.7956067323684692, - "learning_rate": 9.535979899497488e-05, - "loss": 5.5571, - "step": 5118 - }, - { - "epoch": 2.6696219035202087, - "grad_norm": 1.7063608169555664, - "learning_rate": 9.535879396984925e-05, - "loss": 5.7644, - "step": 5119 - }, - { - "epoch": 2.6701434159061277, - "grad_norm": 1.540897011756897, - "learning_rate": 9.535778894472363e-05, - "loss": 5.69, - "step": 5120 - }, - { - "epoch": 2.670664928292047, - "grad_norm": 1.49859619140625, - "learning_rate": 9.535678391959799e-05, - "loss": 5.5124, - "step": 5121 - }, - { - "epoch": 2.671186440677966, - "grad_norm": 1.5233063697814941, - "learning_rate": 9.535577889447237e-05, - "loss": 5.5869, - "step": 5122 - }, - { - "epoch": 2.671707953063885, - "grad_norm": 1.841001272201538, - "learning_rate": 9.535477386934673e-05, - "loss": 5.5394, - "step": 5123 - }, - { - "epoch": 2.6722294654498047, - "grad_norm": 1.5955227613449097, - "learning_rate": 9.535376884422111e-05, - "loss": 6.1203, - "step": 5124 - }, - { - "epoch": 2.6727509778357237, - "grad_norm": 1.506130576133728, - "learning_rate": 9.535276381909547e-05, - "loss": 6.0362, - "step": 5125 - }, - { - "epoch": 2.6732724902216427, - "grad_norm": 1.6731722354888916, - "learning_rate": 9.535175879396985e-05, - "loss": 5.1234, - "step": 5126 - }, - { - "epoch": 2.673794002607562, - "grad_norm": 1.5869596004486084, - "learning_rate": 9.535075376884423e-05, - "loss": 6.0809, - "step": 5127 - }, - { - "epoch": 2.674315514993481, - "grad_norm": 1.615325927734375, - "learning_rate": 9.53497487437186e-05, - "loss": 5.6983, - "step": 5128 - }, - { - "epoch": 2.6748370273794, - "grad_norm": 1.5391204357147217, - "learning_rate": 9.534874371859297e-05, - "loss": 5.827, - "step": 5129 - }, - { - "epoch": 2.6753585397653197, - "grad_norm": 1.4242204427719116, - "learning_rate": 9.534773869346734e-05, - "loss": 6.3313, - "step": 5130 - }, - { - "epoch": 2.6758800521512387, - "grad_norm": 1.5119777917861938, - "learning_rate": 9.534673366834171e-05, - "loss": 5.3284, - "step": 5131 - }, - { - "epoch": 2.6764015645371577, - "grad_norm": 1.461450219154358, - "learning_rate": 9.534572864321608e-05, - "loss": 6.1421, - "step": 5132 - }, - { - "epoch": 2.676923076923077, - "grad_norm": 1.3794804811477661, - "learning_rate": 9.534472361809046e-05, - "loss": 6.1087, - "step": 5133 - }, - { - "epoch": 2.677444589308996, - "grad_norm": 1.720292091369629, - "learning_rate": 9.534371859296482e-05, - "loss": 5.5028, - "step": 5134 - }, - { - "epoch": 2.6779661016949152, - "grad_norm": 1.5049498081207275, - "learning_rate": 9.53427135678392e-05, - "loss": 5.955, - "step": 5135 - }, - { - "epoch": 2.6784876140808347, - "grad_norm": 1.4400975704193115, - "learning_rate": 9.534170854271358e-05, - "loss": 5.8641, - "step": 5136 - }, - { - "epoch": 2.6790091264667537, - "grad_norm": 1.5083614587783813, - "learning_rate": 9.534070351758795e-05, - "loss": 5.3277, - "step": 5137 - }, - { - "epoch": 2.6795306388526727, - "grad_norm": 1.5147428512573242, - "learning_rate": 9.533969849246232e-05, - "loss": 5.3186, - "step": 5138 - }, - { - "epoch": 2.6800521512385918, - "grad_norm": 1.4857791662216187, - "learning_rate": 9.53386934673367e-05, - "loss": 5.739, - "step": 5139 - }, - { - "epoch": 2.680573663624511, - "grad_norm": 1.4180806875228882, - "learning_rate": 9.533768844221106e-05, - "loss": 5.8829, - "step": 5140 - }, - { - "epoch": 2.6810951760104302, - "grad_norm": 1.3846855163574219, - "learning_rate": 9.533668341708543e-05, - "loss": 5.905, - "step": 5141 - }, - { - "epoch": 2.6816166883963493, - "grad_norm": 1.390498161315918, - "learning_rate": 9.53356783919598e-05, - "loss": 5.9663, - "step": 5142 - }, - { - "epoch": 2.6821382007822687, - "grad_norm": 1.3757820129394531, - "learning_rate": 9.533467336683417e-05, - "loss": 5.9549, - "step": 5143 - }, - { - "epoch": 2.6826597131681877, - "grad_norm": 1.4589306116104126, - "learning_rate": 9.533366834170855e-05, - "loss": 5.8502, - "step": 5144 - }, - { - "epoch": 2.6831812255541068, - "grad_norm": 1.3386086225509644, - "learning_rate": 9.533266331658291e-05, - "loss": 5.9957, - "step": 5145 - }, - { - "epoch": 2.6837027379400262, - "grad_norm": 1.4587695598602295, - "learning_rate": 9.533165829145729e-05, - "loss": 5.8309, - "step": 5146 - }, - { - "epoch": 2.6842242503259452, - "grad_norm": 1.360872745513916, - "learning_rate": 9.533065326633166e-05, - "loss": 5.8577, - "step": 5147 - }, - { - "epoch": 2.6847457627118643, - "grad_norm": 1.595858097076416, - "learning_rate": 9.532964824120604e-05, - "loss": 5.6721, - "step": 5148 - }, - { - "epoch": 2.6852672750977837, - "grad_norm": 1.4469940662384033, - "learning_rate": 9.532864321608041e-05, - "loss": 6.0894, - "step": 5149 - }, - { - "epoch": 2.6857887874837028, - "grad_norm": 1.5330045223236084, - "learning_rate": 9.532763819095478e-05, - "loss": 5.9846, - "step": 5150 - }, - { - "epoch": 2.6863102998696218, - "grad_norm": 1.7494968175888062, - "learning_rate": 9.532663316582915e-05, - "loss": 5.6543, - "step": 5151 - }, - { - "epoch": 2.686831812255541, - "grad_norm": 1.5228216648101807, - "learning_rate": 9.532562814070353e-05, - "loss": 5.7057, - "step": 5152 - }, - { - "epoch": 2.6873533246414603, - "grad_norm": 1.5397599935531616, - "learning_rate": 9.532462311557789e-05, - "loss": 5.7234, - "step": 5153 - }, - { - "epoch": 2.6878748370273793, - "grad_norm": 1.4691836833953857, - "learning_rate": 9.532361809045227e-05, - "loss": 5.7489, - "step": 5154 - }, - { - "epoch": 2.6883963494132983, - "grad_norm": 1.2525509595870972, - "learning_rate": 9.532261306532663e-05, - "loss": 5.9964, - "step": 5155 - }, - { - "epoch": 2.6889178617992178, - "grad_norm": 1.3811547756195068, - "learning_rate": 9.532160804020101e-05, - "loss": 6.0054, - "step": 5156 - }, - { - "epoch": 2.689439374185137, - "grad_norm": 1.4274972677230835, - "learning_rate": 9.532060301507539e-05, - "loss": 5.8716, - "step": 5157 - }, - { - "epoch": 2.689960886571056, - "grad_norm": 1.4014118909835815, - "learning_rate": 9.531959798994975e-05, - "loss": 5.8567, - "step": 5158 - }, - { - "epoch": 2.6904823989569753, - "grad_norm": 1.4200385808944702, - "learning_rate": 9.531859296482413e-05, - "loss": 5.802, - "step": 5159 - }, - { - "epoch": 2.6910039113428943, - "grad_norm": 1.6902804374694824, - "learning_rate": 9.53175879396985e-05, - "loss": 5.4695, - "step": 5160 - }, - { - "epoch": 2.6915254237288133, - "grad_norm": 1.6169764995574951, - "learning_rate": 9.531658291457287e-05, - "loss": 5.5174, - "step": 5161 - }, - { - "epoch": 2.692046936114733, - "grad_norm": 1.5280276536941528, - "learning_rate": 9.531557788944724e-05, - "loss": 6.0423, - "step": 5162 - }, - { - "epoch": 2.692568448500652, - "grad_norm": 1.4429396390914917, - "learning_rate": 9.531457286432162e-05, - "loss": 6.2644, - "step": 5163 - }, - { - "epoch": 2.693089960886571, - "grad_norm": 1.4513996839523315, - "learning_rate": 9.531356783919598e-05, - "loss": 6.1521, - "step": 5164 - }, - { - "epoch": 2.6936114732724903, - "grad_norm": 1.4250222444534302, - "learning_rate": 9.531256281407036e-05, - "loss": 6.0255, - "step": 5165 - }, - { - "epoch": 2.6941329856584093, - "grad_norm": 1.502251386642456, - "learning_rate": 9.531155778894472e-05, - "loss": 5.8056, - "step": 5166 - }, - { - "epoch": 2.6946544980443283, - "grad_norm": 1.4401469230651855, - "learning_rate": 9.53105527638191e-05, - "loss": 5.7866, - "step": 5167 - }, - { - "epoch": 2.695176010430248, - "grad_norm": 1.6457639932632446, - "learning_rate": 9.530954773869348e-05, - "loss": 5.9127, - "step": 5168 - }, - { - "epoch": 2.695697522816167, - "grad_norm": 1.3917865753173828, - "learning_rate": 9.530854271356784e-05, - "loss": 6.0433, - "step": 5169 - }, - { - "epoch": 2.696219035202086, - "grad_norm": 1.559657096862793, - "learning_rate": 9.530753768844222e-05, - "loss": 5.6674, - "step": 5170 - }, - { - "epoch": 2.6967405475880053, - "grad_norm": 1.5140495300292969, - "learning_rate": 9.530653266331658e-05, - "loss": 6.1845, - "step": 5171 - }, - { - "epoch": 2.6972620599739243, - "grad_norm": 1.5789153575897217, - "learning_rate": 9.530552763819096e-05, - "loss": 5.4262, - "step": 5172 - }, - { - "epoch": 2.6977835723598433, - "grad_norm": 1.3890787363052368, - "learning_rate": 9.530452261306533e-05, - "loss": 6.1416, - "step": 5173 - }, - { - "epoch": 2.698305084745763, - "grad_norm": 1.504050612449646, - "learning_rate": 9.53035175879397e-05, - "loss": 6.0222, - "step": 5174 - }, - { - "epoch": 2.698826597131682, - "grad_norm": 1.6642489433288574, - "learning_rate": 9.530251256281407e-05, - "loss": 5.6307, - "step": 5175 - }, - { - "epoch": 2.699348109517601, - "grad_norm": 1.6988753080368042, - "learning_rate": 9.530150753768845e-05, - "loss": 5.6507, - "step": 5176 - }, - { - "epoch": 2.6998696219035203, - "grad_norm": 1.7431178092956543, - "learning_rate": 9.530050251256282e-05, - "loss": 5.2001, - "step": 5177 - }, - { - "epoch": 2.7003911342894393, - "grad_norm": 1.5542442798614502, - "learning_rate": 9.52994974874372e-05, - "loss": 5.9271, - "step": 5178 - }, - { - "epoch": 2.7009126466753584, - "grad_norm": 1.6295182704925537, - "learning_rate": 9.529849246231157e-05, - "loss": 5.9589, - "step": 5179 - }, - { - "epoch": 2.701434159061278, - "grad_norm": 1.5354334115982056, - "learning_rate": 9.529748743718594e-05, - "loss": 5.4012, - "step": 5180 - }, - { - "epoch": 2.701955671447197, - "grad_norm": 1.575677752494812, - "learning_rate": 9.529648241206031e-05, - "loss": 5.685, - "step": 5181 - }, - { - "epoch": 2.702477183833116, - "grad_norm": 1.5911293029785156, - "learning_rate": 9.529547738693467e-05, - "loss": 5.9932, - "step": 5182 - }, - { - "epoch": 2.7029986962190353, - "grad_norm": 1.590155005455017, - "learning_rate": 9.529447236180905e-05, - "loss": 5.5379, - "step": 5183 - }, - { - "epoch": 2.7035202086049543, - "grad_norm": 1.3691349029541016, - "learning_rate": 9.529346733668341e-05, - "loss": 5.8124, - "step": 5184 - }, - { - "epoch": 2.7040417209908734, - "grad_norm": 1.5409071445465088, - "learning_rate": 9.529246231155779e-05, - "loss": 6.0204, - "step": 5185 - }, - { - "epoch": 2.704563233376793, - "grad_norm": 1.450858473777771, - "learning_rate": 9.529145728643216e-05, - "loss": 5.7684, - "step": 5186 - }, - { - "epoch": 2.705084745762712, - "grad_norm": 1.5022752285003662, - "learning_rate": 9.529045226130653e-05, - "loss": 5.687, - "step": 5187 - }, - { - "epoch": 2.705606258148631, - "grad_norm": 1.5702344179153442, - "learning_rate": 9.528944723618091e-05, - "loss": 5.7013, - "step": 5188 - }, - { - "epoch": 2.7061277705345503, - "grad_norm": 1.4816606044769287, - "learning_rate": 9.528844221105529e-05, - "loss": 6.062, - "step": 5189 - }, - { - "epoch": 2.7066492829204694, - "grad_norm": 1.5153523683547974, - "learning_rate": 9.528743718592965e-05, - "loss": 5.7961, - "step": 5190 - }, - { - "epoch": 2.7071707953063884, - "grad_norm": 1.3507033586502075, - "learning_rate": 9.528643216080403e-05, - "loss": 5.9947, - "step": 5191 - }, - { - "epoch": 2.707692307692308, - "grad_norm": 1.6690802574157715, - "learning_rate": 9.52854271356784e-05, - "loss": 5.3276, - "step": 5192 - }, - { - "epoch": 2.708213820078227, - "grad_norm": 1.5505234003067017, - "learning_rate": 9.528442211055277e-05, - "loss": 5.9616, - "step": 5193 - }, - { - "epoch": 2.708735332464146, - "grad_norm": 1.3932942152023315, - "learning_rate": 9.528341708542714e-05, - "loss": 6.2604, - "step": 5194 - }, - { - "epoch": 2.7092568448500653, - "grad_norm": 1.5112119913101196, - "learning_rate": 9.52824120603015e-05, - "loss": 5.7811, - "step": 5195 - }, - { - "epoch": 2.7097783572359844, - "grad_norm": 1.6142544746398926, - "learning_rate": 9.528140703517588e-05, - "loss": 5.9023, - "step": 5196 - }, - { - "epoch": 2.7102998696219034, - "grad_norm": 1.3298146724700928, - "learning_rate": 9.528040201005026e-05, - "loss": 6.2009, - "step": 5197 - }, - { - "epoch": 2.710821382007823, - "grad_norm": 1.4620105028152466, - "learning_rate": 9.527939698492464e-05, - "loss": 5.9448, - "step": 5198 - }, - { - "epoch": 2.711342894393742, - "grad_norm": 1.51407790184021, - "learning_rate": 9.5278391959799e-05, - "loss": 5.7596, - "step": 5199 - }, - { - "epoch": 2.711864406779661, - "grad_norm": 1.4731444120407104, - "learning_rate": 9.527738693467338e-05, - "loss": 5.8057, - "step": 5200 - }, - { - "epoch": 2.7123859191655804, - "grad_norm": 1.6514036655426025, - "learning_rate": 9.527638190954774e-05, - "loss": 6.2216, - "step": 5201 - }, - { - "epoch": 2.7129074315514994, - "grad_norm": 1.6662817001342773, - "learning_rate": 9.527537688442212e-05, - "loss": 5.9857, - "step": 5202 - }, - { - "epoch": 2.7134289439374184, - "grad_norm": 1.5811858177185059, - "learning_rate": 9.527437185929648e-05, - "loss": 5.6259, - "step": 5203 - }, - { - "epoch": 2.713950456323338, - "grad_norm": 1.4241440296173096, - "learning_rate": 9.527336683417086e-05, - "loss": 5.5792, - "step": 5204 - }, - { - "epoch": 2.714471968709257, - "grad_norm": 1.6506435871124268, - "learning_rate": 9.527236180904523e-05, - "loss": 5.7458, - "step": 5205 - }, - { - "epoch": 2.714993481095176, - "grad_norm": 1.456315517425537, - "learning_rate": 9.52713567839196e-05, - "loss": 5.8962, - "step": 5206 - }, - { - "epoch": 2.7155149934810954, - "grad_norm": 1.5771361589431763, - "learning_rate": 9.527035175879397e-05, - "loss": 5.98, - "step": 5207 - }, - { - "epoch": 2.7160365058670144, - "grad_norm": 1.5656245946884155, - "learning_rate": 9.526934673366835e-05, - "loss": 6.1064, - "step": 5208 - }, - { - "epoch": 2.7165580182529334, - "grad_norm": 1.4260226488113403, - "learning_rate": 9.526834170854272e-05, - "loss": 5.9251, - "step": 5209 - }, - { - "epoch": 2.717079530638853, - "grad_norm": 1.6678003072738647, - "learning_rate": 9.526733668341709e-05, - "loss": 5.5837, - "step": 5210 - }, - { - "epoch": 2.717601043024772, - "grad_norm": 1.546895146369934, - "learning_rate": 9.526633165829147e-05, - "loss": 6.3094, - "step": 5211 - }, - { - "epoch": 2.718122555410691, - "grad_norm": 1.4008996486663818, - "learning_rate": 9.526532663316583e-05, - "loss": 6.0016, - "step": 5212 - }, - { - "epoch": 2.7186440677966104, - "grad_norm": 1.5887113809585571, - "learning_rate": 9.526432160804021e-05, - "loss": 5.7557, - "step": 5213 - }, - { - "epoch": 2.7191655801825294, - "grad_norm": 1.6973627805709839, - "learning_rate": 9.526331658291457e-05, - "loss": 5.9702, - "step": 5214 - }, - { - "epoch": 2.7196870925684484, - "grad_norm": 1.6317579746246338, - "learning_rate": 9.526231155778895e-05, - "loss": 5.6162, - "step": 5215 - }, - { - "epoch": 2.720208604954368, - "grad_norm": 1.635650396347046, - "learning_rate": 9.526130653266331e-05, - "loss": 6.1131, - "step": 5216 - }, - { - "epoch": 2.720730117340287, - "grad_norm": 1.7450494766235352, - "learning_rate": 9.526030150753769e-05, - "loss": 5.6754, - "step": 5217 - }, - { - "epoch": 2.721251629726206, - "grad_norm": 1.7048437595367432, - "learning_rate": 9.525929648241206e-05, - "loss": 5.5702, - "step": 5218 - }, - { - "epoch": 2.7217731421121254, - "grad_norm": 1.4270488023757935, - "learning_rate": 9.525829145728643e-05, - "loss": 5.9984, - "step": 5219 - }, - { - "epoch": 2.7222946544980444, - "grad_norm": 1.9475847482681274, - "learning_rate": 9.525728643216081e-05, - "loss": 5.6623, - "step": 5220 - }, - { - "epoch": 2.7228161668839634, - "grad_norm": 1.6363919973373413, - "learning_rate": 9.525628140703518e-05, - "loss": 5.2335, - "step": 5221 - }, - { - "epoch": 2.723337679269883, - "grad_norm": 1.4743471145629883, - "learning_rate": 9.525527638190955e-05, - "loss": 5.9159, - "step": 5222 - }, - { - "epoch": 2.723859191655802, - "grad_norm": 1.596564531326294, - "learning_rate": 9.525427135678392e-05, - "loss": 5.4452, - "step": 5223 - }, - { - "epoch": 2.724380704041721, - "grad_norm": 1.6014529466629028, - "learning_rate": 9.52532663316583e-05, - "loss": 5.9816, - "step": 5224 - }, - { - "epoch": 2.7249022164276404, - "grad_norm": 1.4191795587539673, - "learning_rate": 9.525226130653266e-05, - "loss": 5.5061, - "step": 5225 - }, - { - "epoch": 2.7254237288135594, - "grad_norm": 1.6846827268600464, - "learning_rate": 9.525125628140704e-05, - "loss": 5.4852, - "step": 5226 - }, - { - "epoch": 2.7259452411994785, - "grad_norm": 1.5207194089889526, - "learning_rate": 9.52502512562814e-05, - "loss": 5.8801, - "step": 5227 - }, - { - "epoch": 2.726466753585398, - "grad_norm": 1.5339123010635376, - "learning_rate": 9.524924623115578e-05, - "loss": 5.5618, - "step": 5228 - }, - { - "epoch": 2.726988265971317, - "grad_norm": 1.5552427768707275, - "learning_rate": 9.524824120603016e-05, - "loss": 5.8779, - "step": 5229 - }, - { - "epoch": 2.727509778357236, - "grad_norm": 1.5081803798675537, - "learning_rate": 9.524723618090454e-05, - "loss": 6.1867, - "step": 5230 - }, - { - "epoch": 2.7280312907431554, - "grad_norm": 1.4801026582717896, - "learning_rate": 9.52462311557789e-05, - "loss": 5.9867, - "step": 5231 - }, - { - "epoch": 2.7285528031290744, - "grad_norm": 1.6295626163482666, - "learning_rate": 9.524522613065328e-05, - "loss": 5.6305, - "step": 5232 - }, - { - "epoch": 2.7290743155149935, - "grad_norm": 1.6634424924850464, - "learning_rate": 9.524422110552764e-05, - "loss": 5.8667, - "step": 5233 - }, - { - "epoch": 2.7295958279009125, - "grad_norm": 1.5615614652633667, - "learning_rate": 9.524321608040202e-05, - "loss": 5.5773, - "step": 5234 - }, - { - "epoch": 2.730117340286832, - "grad_norm": 1.3656904697418213, - "learning_rate": 9.524221105527639e-05, - "loss": 5.8018, - "step": 5235 - }, - { - "epoch": 2.730638852672751, - "grad_norm": 1.6661819219589233, - "learning_rate": 9.524120603015075e-05, - "loss": 5.3977, - "step": 5236 - }, - { - "epoch": 2.73116036505867, - "grad_norm": 1.5089644193649292, - "learning_rate": 9.524020100502513e-05, - "loss": 6.0428, - "step": 5237 - }, - { - "epoch": 2.7316818774445895, - "grad_norm": 1.4739046096801758, - "learning_rate": 9.523919597989949e-05, - "loss": 5.6986, - "step": 5238 - }, - { - "epoch": 2.7322033898305085, - "grad_norm": 1.7612227201461792, - "learning_rate": 9.523819095477387e-05, - "loss": 5.9634, - "step": 5239 - }, - { - "epoch": 2.7327249022164275, - "grad_norm": 1.6065841913223267, - "learning_rate": 9.523718592964825e-05, - "loss": 5.8587, - "step": 5240 - }, - { - "epoch": 2.733246414602347, - "grad_norm": 1.4992141723632812, - "learning_rate": 9.523618090452263e-05, - "loss": 5.8403, - "step": 5241 - }, - { - "epoch": 2.733767926988266, - "grad_norm": 1.5832908153533936, - "learning_rate": 9.523517587939699e-05, - "loss": 5.7498, - "step": 5242 - }, - { - "epoch": 2.734289439374185, - "grad_norm": 1.5719740390777588, - "learning_rate": 9.523417085427137e-05, - "loss": 5.6622, - "step": 5243 - }, - { - "epoch": 2.734810951760104, - "grad_norm": 1.4680424928665161, - "learning_rate": 9.523316582914573e-05, - "loss": 6.0262, - "step": 5244 - }, - { - "epoch": 2.7353324641460235, - "grad_norm": 1.319040298461914, - "learning_rate": 9.523216080402011e-05, - "loss": 5.6795, - "step": 5245 - }, - { - "epoch": 2.7358539765319425, - "grad_norm": 1.4283701181411743, - "learning_rate": 9.523115577889447e-05, - "loss": 5.9524, - "step": 5246 - }, - { - "epoch": 2.7363754889178615, - "grad_norm": 1.646993637084961, - "learning_rate": 9.523015075376885e-05, - "loss": 5.7868, - "step": 5247 - }, - { - "epoch": 2.736897001303781, - "grad_norm": 1.6843148469924927, - "learning_rate": 9.522914572864322e-05, - "loss": 5.2376, - "step": 5248 - }, - { - "epoch": 2.7374185136897, - "grad_norm": 1.5444928407669067, - "learning_rate": 9.52281407035176e-05, - "loss": 5.7794, - "step": 5249 - }, - { - "epoch": 2.737940026075619, - "grad_norm": 1.584246277809143, - "learning_rate": 9.522713567839197e-05, - "loss": 5.7508, - "step": 5250 - }, - { - "epoch": 2.7384615384615385, - "grad_norm": 1.4963221549987793, - "learning_rate": 9.522613065326634e-05, - "loss": 5.7942, - "step": 5251 - }, - { - "epoch": 2.7389830508474575, - "grad_norm": 1.556229829788208, - "learning_rate": 9.522512562814071e-05, - "loss": 6.1317, - "step": 5252 - }, - { - "epoch": 2.7395045632333765, - "grad_norm": 1.2980328798294067, - "learning_rate": 9.522412060301508e-05, - "loss": 6.0471, - "step": 5253 - }, - { - "epoch": 2.740026075619296, - "grad_norm": 1.4990075826644897, - "learning_rate": 9.522311557788946e-05, - "loss": 5.8681, - "step": 5254 - }, - { - "epoch": 2.740547588005215, - "grad_norm": 1.4045356512069702, - "learning_rate": 9.522211055276382e-05, - "loss": 5.7768, - "step": 5255 - }, - { - "epoch": 2.741069100391134, - "grad_norm": 1.508265733718872, - "learning_rate": 9.52211055276382e-05, - "loss": 6.0307, - "step": 5256 - }, - { - "epoch": 2.7415906127770535, - "grad_norm": 1.4944521188735962, - "learning_rate": 9.522010050251256e-05, - "loss": 5.8939, - "step": 5257 - }, - { - "epoch": 2.7421121251629725, - "grad_norm": 1.5403271913528442, - "learning_rate": 9.521909547738694e-05, - "loss": 5.7276, - "step": 5258 - }, - { - "epoch": 2.7426336375488916, - "grad_norm": 1.3685622215270996, - "learning_rate": 9.52180904522613e-05, - "loss": 5.5307, - "step": 5259 - }, - { - "epoch": 2.743155149934811, - "grad_norm": 1.7211834192276, - "learning_rate": 9.521708542713568e-05, - "loss": 6.1457, - "step": 5260 - }, - { - "epoch": 2.74367666232073, - "grad_norm": 1.4081915616989136, - "learning_rate": 9.521608040201006e-05, - "loss": 6.0878, - "step": 5261 - }, - { - "epoch": 2.744198174706649, - "grad_norm": 1.3571991920471191, - "learning_rate": 9.521507537688442e-05, - "loss": 5.8913, - "step": 5262 - }, - { - "epoch": 2.7447196870925685, - "grad_norm": 1.3831044435501099, - "learning_rate": 9.52140703517588e-05, - "loss": 6.1563, - "step": 5263 - }, - { - "epoch": 2.7452411994784875, - "grad_norm": 1.5365378856658936, - "learning_rate": 9.521306532663317e-05, - "loss": 5.8124, - "step": 5264 - }, - { - "epoch": 2.7457627118644066, - "grad_norm": 1.4498540163040161, - "learning_rate": 9.521206030150754e-05, - "loss": 5.812, - "step": 5265 - }, - { - "epoch": 2.746284224250326, - "grad_norm": 1.5595202445983887, - "learning_rate": 9.521105527638191e-05, - "loss": 5.526, - "step": 5266 - }, - { - "epoch": 2.746805736636245, - "grad_norm": 1.626511573791504, - "learning_rate": 9.521005025125629e-05, - "loss": 5.2402, - "step": 5267 - }, - { - "epoch": 2.747327249022164, - "grad_norm": 1.680371642112732, - "learning_rate": 9.520904522613065e-05, - "loss": 5.7109, - "step": 5268 - }, - { - "epoch": 2.7478487614080835, - "grad_norm": 1.5864266157150269, - "learning_rate": 9.520804020100503e-05, - "loss": 5.8349, - "step": 5269 - }, - { - "epoch": 2.7483702737940026, - "grad_norm": 1.446987271308899, - "learning_rate": 9.52070351758794e-05, - "loss": 6.3372, - "step": 5270 - }, - { - "epoch": 2.7488917861799216, - "grad_norm": 1.6633707284927368, - "learning_rate": 9.520603015075378e-05, - "loss": 5.4647, - "step": 5271 - }, - { - "epoch": 2.749413298565841, - "grad_norm": 1.7349506616592407, - "learning_rate": 9.520502512562815e-05, - "loss": 5.6277, - "step": 5272 - }, - { - "epoch": 2.74993481095176, - "grad_norm": 1.840499758720398, - "learning_rate": 9.520402010050253e-05, - "loss": 5.8586, - "step": 5273 - }, - { - "epoch": 2.750456323337679, - "grad_norm": 1.4551581144332886, - "learning_rate": 9.520301507537689e-05, - "loss": 5.6955, - "step": 5274 - }, - { - "epoch": 2.7509778357235986, - "grad_norm": 1.585686445236206, - "learning_rate": 9.520201005025125e-05, - "loss": 6.0729, - "step": 5275 - }, - { - "epoch": 2.7514993481095176, - "grad_norm": 1.6242752075195312, - "learning_rate": 9.520100502512563e-05, - "loss": 6.153, - "step": 5276 - }, - { - "epoch": 2.7520208604954366, - "grad_norm": 1.4138590097427368, - "learning_rate": 9.52e-05, - "loss": 4.8482, - "step": 5277 - }, - { - "epoch": 2.752542372881356, - "grad_norm": 1.6448571681976318, - "learning_rate": 9.519899497487437e-05, - "loss": 5.9421, - "step": 5278 - }, - { - "epoch": 2.753063885267275, - "grad_norm": 1.4570629596710205, - "learning_rate": 9.519798994974874e-05, - "loss": 6.1594, - "step": 5279 - }, - { - "epoch": 2.753585397653194, - "grad_norm": 1.3304738998413086, - "learning_rate": 9.519698492462312e-05, - "loss": 6.165, - "step": 5280 - }, - { - "epoch": 2.7541069100391136, - "grad_norm": 1.4532541036605835, - "learning_rate": 9.51959798994975e-05, - "loss": 5.5049, - "step": 5281 - }, - { - "epoch": 2.7546284224250326, - "grad_norm": 1.843823790550232, - "learning_rate": 9.519497487437187e-05, - "loss": 5.5123, - "step": 5282 - }, - { - "epoch": 2.7551499348109516, - "grad_norm": 1.6437034606933594, - "learning_rate": 9.519396984924624e-05, - "loss": 5.7397, - "step": 5283 - }, - { - "epoch": 2.755671447196871, - "grad_norm": 1.440700888633728, - "learning_rate": 9.519296482412061e-05, - "loss": 5.8074, - "step": 5284 - }, - { - "epoch": 2.75619295958279, - "grad_norm": 1.4381103515625, - "learning_rate": 9.519195979899498e-05, - "loss": 5.9209, - "step": 5285 - }, - { - "epoch": 2.756714471968709, - "grad_norm": 1.539421796798706, - "learning_rate": 9.519095477386936e-05, - "loss": 6.116, - "step": 5286 - }, - { - "epoch": 2.7572359843546286, - "grad_norm": 1.4918252229690552, - "learning_rate": 9.518994974874372e-05, - "loss": 6.007, - "step": 5287 - }, - { - "epoch": 2.7577574967405476, - "grad_norm": 1.6424527168273926, - "learning_rate": 9.518894472361808e-05, - "loss": 5.4255, - "step": 5288 - }, - { - "epoch": 2.7582790091264666, - "grad_norm": 1.709058403968811, - "learning_rate": 9.518793969849246e-05, - "loss": 5.2981, - "step": 5289 - }, - { - "epoch": 2.758800521512386, - "grad_norm": 1.7231090068817139, - "learning_rate": 9.518693467336684e-05, - "loss": 6.138, - "step": 5290 - }, - { - "epoch": 2.759322033898305, - "grad_norm": 1.4244486093521118, - "learning_rate": 9.518592964824122e-05, - "loss": 6.224, - "step": 5291 - }, - { - "epoch": 2.759843546284224, - "grad_norm": 1.5862233638763428, - "learning_rate": 9.518492462311558e-05, - "loss": 5.9853, - "step": 5292 - }, - { - "epoch": 2.7603650586701436, - "grad_norm": 1.7865623235702515, - "learning_rate": 9.518391959798996e-05, - "loss": 5.917, - "step": 5293 - }, - { - "epoch": 2.7608865710560626, - "grad_norm": 1.4637805223464966, - "learning_rate": 9.518291457286432e-05, - "loss": 5.9428, - "step": 5294 - }, - { - "epoch": 2.7614080834419816, - "grad_norm": 1.5762534141540527, - "learning_rate": 9.51819095477387e-05, - "loss": 6.1152, - "step": 5295 - }, - { - "epoch": 2.761929595827901, - "grad_norm": 1.545445442199707, - "learning_rate": 9.518090452261307e-05, - "loss": 6.152, - "step": 5296 - }, - { - "epoch": 2.76245110821382, - "grad_norm": 1.57672119140625, - "learning_rate": 9.517989949748744e-05, - "loss": 5.6974, - "step": 5297 - }, - { - "epoch": 2.762972620599739, - "grad_norm": 1.5162392854690552, - "learning_rate": 9.517889447236181e-05, - "loss": 5.714, - "step": 5298 - }, - { - "epoch": 2.7634941329856586, - "grad_norm": 1.56229829788208, - "learning_rate": 9.517788944723619e-05, - "loss": 5.6277, - "step": 5299 - }, - { - "epoch": 2.7640156453715776, - "grad_norm": 1.5812842845916748, - "learning_rate": 9.517688442211055e-05, - "loss": 4.6416, - "step": 5300 - }, - { - "epoch": 2.7645371577574966, - "grad_norm": 1.3698291778564453, - "learning_rate": 9.517587939698493e-05, - "loss": 5.9934, - "step": 5301 - }, - { - "epoch": 2.765058670143416, - "grad_norm": 1.4683197736740112, - "learning_rate": 9.51748743718593e-05, - "loss": 5.9069, - "step": 5302 - }, - { - "epoch": 2.765580182529335, - "grad_norm": 1.49152410030365, - "learning_rate": 9.517386934673367e-05, - "loss": 5.5447, - "step": 5303 - }, - { - "epoch": 2.766101694915254, - "grad_norm": 1.404457688331604, - "learning_rate": 9.517286432160805e-05, - "loss": 5.9199, - "step": 5304 - }, - { - "epoch": 2.7666232073011736, - "grad_norm": 1.3284046649932861, - "learning_rate": 9.517185929648241e-05, - "loss": 6.0208, - "step": 5305 - }, - { - "epoch": 2.7671447196870926, - "grad_norm": 1.4136748313903809, - "learning_rate": 9.517085427135679e-05, - "loss": 5.6044, - "step": 5306 - }, - { - "epoch": 2.7676662320730117, - "grad_norm": 1.5955919027328491, - "learning_rate": 9.516984924623116e-05, - "loss": 5.7154, - "step": 5307 - }, - { - "epoch": 2.768187744458931, - "grad_norm": 1.4675759077072144, - "learning_rate": 9.516884422110553e-05, - "loss": 5.6239, - "step": 5308 - }, - { - "epoch": 2.76870925684485, - "grad_norm": 1.539570689201355, - "learning_rate": 9.51678391959799e-05, - "loss": 5.4812, - "step": 5309 - }, - { - "epoch": 2.769230769230769, - "grad_norm": 1.4700385332107544, - "learning_rate": 9.516683417085428e-05, - "loss": 5.9133, - "step": 5310 - }, - { - "epoch": 2.7697522816166886, - "grad_norm": 1.6473584175109863, - "learning_rate": 9.516582914572865e-05, - "loss": 5.2062, - "step": 5311 - }, - { - "epoch": 2.7702737940026076, - "grad_norm": 1.4371368885040283, - "learning_rate": 9.516482412060303e-05, - "loss": 5.9706, - "step": 5312 - }, - { - "epoch": 2.7707953063885267, - "grad_norm": 1.3796151876449585, - "learning_rate": 9.51638190954774e-05, - "loss": 5.8902, - "step": 5313 - }, - { - "epoch": 2.771316818774446, - "grad_norm": 1.6886078119277954, - "learning_rate": 9.516281407035176e-05, - "loss": 4.981, - "step": 5314 - }, - { - "epoch": 2.771838331160365, - "grad_norm": 1.6681435108184814, - "learning_rate": 9.516180904522614e-05, - "loss": 6.0854, - "step": 5315 - }, - { - "epoch": 2.772359843546284, - "grad_norm": 1.42898428440094, - "learning_rate": 9.51608040201005e-05, - "loss": 5.865, - "step": 5316 - }, - { - "epoch": 2.7728813559322036, - "grad_norm": 1.4149826765060425, - "learning_rate": 9.515979899497488e-05, - "loss": 6.04, - "step": 5317 - }, - { - "epoch": 2.7734028683181227, - "grad_norm": 1.6551878452301025, - "learning_rate": 9.515879396984924e-05, - "loss": 5.4065, - "step": 5318 - }, - { - "epoch": 2.7739243807040417, - "grad_norm": 1.6741372346878052, - "learning_rate": 9.515778894472362e-05, - "loss": 5.5303, - "step": 5319 - }, - { - "epoch": 2.774445893089961, - "grad_norm": 1.5373040437698364, - "learning_rate": 9.515678391959799e-05, - "loss": 5.3272, - "step": 5320 - }, - { - "epoch": 2.77496740547588, - "grad_norm": 1.3805116415023804, - "learning_rate": 9.515577889447236e-05, - "loss": 6.1417, - "step": 5321 - }, - { - "epoch": 2.775488917861799, - "grad_norm": 1.7865455150604248, - "learning_rate": 9.515477386934674e-05, - "loss": 5.8941, - "step": 5322 - }, - { - "epoch": 2.7760104302477187, - "grad_norm": 1.5107587575912476, - "learning_rate": 9.515376884422112e-05, - "loss": 5.6905, - "step": 5323 - }, - { - "epoch": 2.7765319426336377, - "grad_norm": 1.3821722269058228, - "learning_rate": 9.515276381909548e-05, - "loss": 5.8651, - "step": 5324 - }, - { - "epoch": 2.7770534550195567, - "grad_norm": 1.5068247318267822, - "learning_rate": 9.515175879396986e-05, - "loss": 5.7301, - "step": 5325 - }, - { - "epoch": 2.7775749674054757, - "grad_norm": 1.6178607940673828, - "learning_rate": 9.515075376884423e-05, - "loss": 5.8373, - "step": 5326 - }, - { - "epoch": 2.778096479791395, - "grad_norm": 1.5161018371582031, - "learning_rate": 9.51497487437186e-05, - "loss": 5.9352, - "step": 5327 - }, - { - "epoch": 2.778617992177314, - "grad_norm": 1.3542464971542358, - "learning_rate": 9.514874371859297e-05, - "loss": 6.1018, - "step": 5328 - }, - { - "epoch": 2.779139504563233, - "grad_norm": 1.4921947717666626, - "learning_rate": 9.514773869346733e-05, - "loss": 5.8118, - "step": 5329 - }, - { - "epoch": 2.7796610169491527, - "grad_norm": 1.4626189470291138, - "learning_rate": 9.514673366834171e-05, - "loss": 5.637, - "step": 5330 - }, - { - "epoch": 2.7801825293350717, - "grad_norm": 1.5856717824935913, - "learning_rate": 9.514572864321609e-05, - "loss": 5.7706, - "step": 5331 - }, - { - "epoch": 2.7807040417209907, - "grad_norm": 1.7111527919769287, - "learning_rate": 9.514472361809047e-05, - "loss": 5.4238, - "step": 5332 - }, - { - "epoch": 2.78122555410691, - "grad_norm": 1.4562779664993286, - "learning_rate": 9.514371859296483e-05, - "loss": 5.7839, - "step": 5333 - }, - { - "epoch": 2.781747066492829, - "grad_norm": 1.4199082851409912, - "learning_rate": 9.514271356783921e-05, - "loss": 6.2704, - "step": 5334 - }, - { - "epoch": 2.7822685788787482, - "grad_norm": 1.3992305994033813, - "learning_rate": 9.514170854271357e-05, - "loss": 5.6577, - "step": 5335 - }, - { - "epoch": 2.7827900912646677, - "grad_norm": 1.749971628189087, - "learning_rate": 9.514070351758795e-05, - "loss": 5.7725, - "step": 5336 - }, - { - "epoch": 2.7833116036505867, - "grad_norm": 1.4763962030410767, - "learning_rate": 9.513969849246231e-05, - "loss": 5.9232, - "step": 5337 - }, - { - "epoch": 2.7838331160365057, - "grad_norm": 1.4396134614944458, - "learning_rate": 9.513869346733669e-05, - "loss": 6.0913, - "step": 5338 - }, - { - "epoch": 2.7843546284224248, - "grad_norm": 1.7151845693588257, - "learning_rate": 9.513768844221106e-05, - "loss": 5.6339, - "step": 5339 - }, - { - "epoch": 2.7848761408083442, - "grad_norm": 1.5447626113891602, - "learning_rate": 9.513668341708543e-05, - "loss": 5.652, - "step": 5340 - }, - { - "epoch": 2.7853976531942632, - "grad_norm": 1.5760295391082764, - "learning_rate": 9.51356783919598e-05, - "loss": 5.9678, - "step": 5341 - }, - { - "epoch": 2.7859191655801823, - "grad_norm": 1.720670461654663, - "learning_rate": 9.513467336683418e-05, - "loss": 5.6917, - "step": 5342 - }, - { - "epoch": 2.7864406779661017, - "grad_norm": 1.5652952194213867, - "learning_rate": 9.513366834170855e-05, - "loss": 5.9148, - "step": 5343 - }, - { - "epoch": 2.7869621903520208, - "grad_norm": 1.4579381942749023, - "learning_rate": 9.513266331658292e-05, - "loss": 6.1761, - "step": 5344 - }, - { - "epoch": 2.7874837027379398, - "grad_norm": 1.4980690479278564, - "learning_rate": 9.51316582914573e-05, - "loss": 6.0517, - "step": 5345 - }, - { - "epoch": 2.7880052151238592, - "grad_norm": 2.142726182937622, - "learning_rate": 9.513065326633166e-05, - "loss": 5.4164, - "step": 5346 - }, - { - "epoch": 2.7885267275097783, - "grad_norm": 1.4630898237228394, - "learning_rate": 9.512964824120604e-05, - "loss": 5.9949, - "step": 5347 - }, - { - "epoch": 2.7890482398956973, - "grad_norm": 1.5615463256835938, - "learning_rate": 9.51286432160804e-05, - "loss": 5.5716, - "step": 5348 - }, - { - "epoch": 2.7895697522816167, - "grad_norm": 1.5081136226654053, - "learning_rate": 9.512763819095478e-05, - "loss": 5.9938, - "step": 5349 - }, - { - "epoch": 2.7900912646675358, - "grad_norm": 1.423330307006836, - "learning_rate": 9.512663316582914e-05, - "loss": 6.0489, - "step": 5350 - }, - { - "epoch": 2.790612777053455, - "grad_norm": 1.4629098176956177, - "learning_rate": 9.512562814070352e-05, - "loss": 5.9993, - "step": 5351 - }, - { - "epoch": 2.7911342894393742, - "grad_norm": 1.6566526889801025, - "learning_rate": 9.51246231155779e-05, - "loss": 5.8187, - "step": 5352 - }, - { - "epoch": 2.7916558018252933, - "grad_norm": 1.7587112188339233, - "learning_rate": 9.512361809045228e-05, - "loss": 5.4436, - "step": 5353 - }, - { - "epoch": 2.7921773142112123, - "grad_norm": 1.542237639427185, - "learning_rate": 9.512261306532664e-05, - "loss": 5.6578, - "step": 5354 - }, - { - "epoch": 2.7926988265971318, - "grad_norm": 1.5793741941452026, - "learning_rate": 9.5121608040201e-05, - "loss": 5.5994, - "step": 5355 - }, - { - "epoch": 2.7932203389830508, - "grad_norm": 1.7326124906539917, - "learning_rate": 9.512060301507538e-05, - "loss": 5.4092, - "step": 5356 - }, - { - "epoch": 2.79374185136897, - "grad_norm": 1.6850502490997314, - "learning_rate": 9.511959798994975e-05, - "loss": 5.8681, - "step": 5357 - }, - { - "epoch": 2.7942633637548893, - "grad_norm": 1.6342360973358154, - "learning_rate": 9.511859296482413e-05, - "loss": 5.7698, - "step": 5358 - }, - { - "epoch": 2.7947848761408083, - "grad_norm": 2.0453426837921143, - "learning_rate": 9.511758793969849e-05, - "loss": 5.513, - "step": 5359 - }, - { - "epoch": 2.7953063885267273, - "grad_norm": 1.3269829750061035, - "learning_rate": 9.511658291457287e-05, - "loss": 6.1498, - "step": 5360 - }, - { - "epoch": 2.7958279009126468, - "grad_norm": 1.7415722608566284, - "learning_rate": 9.511557788944723e-05, - "loss": 5.6446, - "step": 5361 - }, - { - "epoch": 2.796349413298566, - "grad_norm": 1.816104531288147, - "learning_rate": 9.511457286432161e-05, - "loss": 5.5064, - "step": 5362 - }, - { - "epoch": 2.796870925684485, - "grad_norm": 1.4518098831176758, - "learning_rate": 9.511356783919599e-05, - "loss": 6.2357, - "step": 5363 - }, - { - "epoch": 2.7973924380704043, - "grad_norm": 1.3511797189712524, - "learning_rate": 9.511256281407037e-05, - "loss": 6.2815, - "step": 5364 - }, - { - "epoch": 2.7979139504563233, - "grad_norm": 1.592790126800537, - "learning_rate": 9.511155778894473e-05, - "loss": 5.4512, - "step": 5365 - }, - { - "epoch": 2.7984354628422423, - "grad_norm": 1.5891486406326294, - "learning_rate": 9.511055276381911e-05, - "loss": 5.8074, - "step": 5366 - }, - { - "epoch": 2.798956975228162, - "grad_norm": 1.4719278812408447, - "learning_rate": 9.510954773869347e-05, - "loss": 6.0028, - "step": 5367 - }, - { - "epoch": 2.799478487614081, - "grad_norm": 1.3128036260604858, - "learning_rate": 9.510854271356784e-05, - "loss": 5.9743, - "step": 5368 - }, - { - "epoch": 2.8, - "grad_norm": 1.3965797424316406, - "learning_rate": 9.510753768844221e-05, - "loss": 5.5463, - "step": 5369 - }, - { - "epoch": 2.8005215123859193, - "grad_norm": 1.5687730312347412, - "learning_rate": 9.510653266331658e-05, - "loss": 5.6928, - "step": 5370 - }, - { - "epoch": 2.8010430247718383, - "grad_norm": 1.8856099843978882, - "learning_rate": 9.510552763819096e-05, - "loss": 5.4445, - "step": 5371 - }, - { - "epoch": 2.8015645371577573, - "grad_norm": 1.4559097290039062, - "learning_rate": 9.510452261306532e-05, - "loss": 5.9814, - "step": 5372 - }, - { - "epoch": 2.802086049543677, - "grad_norm": 1.4248132705688477, - "learning_rate": 9.51035175879397e-05, - "loss": 5.853, - "step": 5373 - }, - { - "epoch": 2.802607561929596, - "grad_norm": 1.5703685283660889, - "learning_rate": 9.510251256281408e-05, - "loss": 5.6143, - "step": 5374 - }, - { - "epoch": 2.803129074315515, - "grad_norm": 1.511752963066101, - "learning_rate": 9.510150753768845e-05, - "loss": 5.9094, - "step": 5375 - }, - { - "epoch": 2.8036505867014343, - "grad_norm": 1.4544997215270996, - "learning_rate": 9.510050251256282e-05, - "loss": 5.8296, - "step": 5376 - }, - { - "epoch": 2.8041720990873533, - "grad_norm": 1.4706684350967407, - "learning_rate": 9.50994974874372e-05, - "loss": 5.8256, - "step": 5377 - }, - { - "epoch": 2.8046936114732723, - "grad_norm": 1.5199040174484253, - "learning_rate": 9.509849246231156e-05, - "loss": 5.6294, - "step": 5378 - }, - { - "epoch": 2.805215123859192, - "grad_norm": 1.4112098217010498, - "learning_rate": 9.509748743718594e-05, - "loss": 5.9636, - "step": 5379 - }, - { - "epoch": 2.805736636245111, - "grad_norm": 1.7174955606460571, - "learning_rate": 9.50964824120603e-05, - "loss": 5.4974, - "step": 5380 - }, - { - "epoch": 2.80625814863103, - "grad_norm": 1.4473001956939697, - "learning_rate": 9.509547738693467e-05, - "loss": 5.9944, - "step": 5381 - }, - { - "epoch": 2.8067796610169493, - "grad_norm": 1.4157061576843262, - "learning_rate": 9.509447236180905e-05, - "loss": 6.0787, - "step": 5382 - }, - { - "epoch": 2.8073011734028683, - "grad_norm": 1.3892755508422852, - "learning_rate": 9.509346733668342e-05, - "loss": 6.0385, - "step": 5383 - }, - { - "epoch": 2.8078226857887874, - "grad_norm": 1.507362961769104, - "learning_rate": 9.50924623115578e-05, - "loss": 5.6126, - "step": 5384 - }, - { - "epoch": 2.808344198174707, - "grad_norm": 1.5747789144515991, - "learning_rate": 9.509145728643216e-05, - "loss": 5.4299, - "step": 5385 - }, - { - "epoch": 2.808865710560626, - "grad_norm": 1.4668720960617065, - "learning_rate": 9.509045226130654e-05, - "loss": 5.503, - "step": 5386 - }, - { - "epoch": 2.809387222946545, - "grad_norm": 1.4045443534851074, - "learning_rate": 9.508944723618091e-05, - "loss": 6.026, - "step": 5387 - }, - { - "epoch": 2.8099087353324643, - "grad_norm": 1.4384702444076538, - "learning_rate": 9.508844221105528e-05, - "loss": 5.8359, - "step": 5388 - }, - { - "epoch": 2.8104302477183833, - "grad_norm": 1.3221298456192017, - "learning_rate": 9.508743718592965e-05, - "loss": 6.1395, - "step": 5389 - }, - { - "epoch": 2.8109517601043024, - "grad_norm": 1.432021141052246, - "learning_rate": 9.508643216080403e-05, - "loss": 5.8207, - "step": 5390 - }, - { - "epoch": 2.811473272490222, - "grad_norm": 1.4086962938308716, - "learning_rate": 9.508542713567839e-05, - "loss": 5.8159, - "step": 5391 - }, - { - "epoch": 2.811994784876141, - "grad_norm": 1.6543505191802979, - "learning_rate": 9.508442211055277e-05, - "loss": 5.2449, - "step": 5392 - }, - { - "epoch": 2.81251629726206, - "grad_norm": 1.4830341339111328, - "learning_rate": 9.508341708542713e-05, - "loss": 5.7108, - "step": 5393 - }, - { - "epoch": 2.8130378096479793, - "grad_norm": 1.4738376140594482, - "learning_rate": 9.508241206030151e-05, - "loss": 5.8759, - "step": 5394 - }, - { - "epoch": 2.8135593220338984, - "grad_norm": 1.4974769353866577, - "learning_rate": 9.508140703517589e-05, - "loss": 5.9035, - "step": 5395 - }, - { - "epoch": 2.8140808344198174, - "grad_norm": 1.7222765684127808, - "learning_rate": 9.508040201005025e-05, - "loss": 5.5961, - "step": 5396 - }, - { - "epoch": 2.814602346805737, - "grad_norm": 1.488580346107483, - "learning_rate": 9.507939698492463e-05, - "loss": 6.1087, - "step": 5397 - }, - { - "epoch": 2.815123859191656, - "grad_norm": 1.7291523218154907, - "learning_rate": 9.5078391959799e-05, - "loss": 5.0922, - "step": 5398 - }, - { - "epoch": 2.815645371577575, - "grad_norm": 1.4147628545761108, - "learning_rate": 9.507738693467337e-05, - "loss": 5.8352, - "step": 5399 - }, - { - "epoch": 2.8161668839634943, - "grad_norm": 1.519407033920288, - "learning_rate": 9.507638190954774e-05, - "loss": 5.809, - "step": 5400 - }, - { - "epoch": 2.8166883963494134, - "grad_norm": 1.5176655054092407, - "learning_rate": 9.507537688442212e-05, - "loss": 5.843, - "step": 5401 - }, - { - "epoch": 2.8172099087353324, - "grad_norm": 1.625676155090332, - "learning_rate": 9.507437185929648e-05, - "loss": 5.8345, - "step": 5402 - }, - { - "epoch": 2.817731421121252, - "grad_norm": 1.5023150444030762, - "learning_rate": 9.507336683417086e-05, - "loss": 5.7397, - "step": 5403 - }, - { - "epoch": 2.818252933507171, - "grad_norm": 1.5248651504516602, - "learning_rate": 9.507236180904524e-05, - "loss": 5.8339, - "step": 5404 - }, - { - "epoch": 2.81877444589309, - "grad_norm": 1.570803165435791, - "learning_rate": 9.507135678391961e-05, - "loss": 5.94, - "step": 5405 - }, - { - "epoch": 2.8192959582790094, - "grad_norm": 1.7953946590423584, - "learning_rate": 9.507035175879398e-05, - "loss": 5.2396, - "step": 5406 - }, - { - "epoch": 2.8198174706649284, - "grad_norm": 1.4574475288391113, - "learning_rate": 9.506934673366834e-05, - "loss": 5.9126, - "step": 5407 - }, - { - "epoch": 2.8203389830508474, - "grad_norm": 1.7406460046768188, - "learning_rate": 9.506834170854272e-05, - "loss": 5.4409, - "step": 5408 - }, - { - "epoch": 2.820860495436767, - "grad_norm": 1.6594797372817993, - "learning_rate": 9.506733668341708e-05, - "loss": 5.1006, - "step": 5409 - }, - { - "epoch": 2.821382007822686, - "grad_norm": 1.917070984840393, - "learning_rate": 9.506633165829146e-05, - "loss": 5.7658, - "step": 5410 - }, - { - "epoch": 2.821903520208605, - "grad_norm": 1.6266204118728638, - "learning_rate": 9.506532663316583e-05, - "loss": 5.5152, - "step": 5411 - }, - { - "epoch": 2.8224250325945244, - "grad_norm": 1.4641432762145996, - "learning_rate": 9.50643216080402e-05, - "loss": 5.8899, - "step": 5412 - }, - { - "epoch": 2.8229465449804434, - "grad_norm": 1.5079774856567383, - "learning_rate": 9.506331658291457e-05, - "loss": 6.0276, - "step": 5413 - }, - { - "epoch": 2.8234680573663624, - "grad_norm": 1.505654215812683, - "learning_rate": 9.506231155778895e-05, - "loss": 6.0536, - "step": 5414 - }, - { - "epoch": 2.823989569752282, - "grad_norm": 1.4815384149551392, - "learning_rate": 9.506130653266332e-05, - "loss": 6.0923, - "step": 5415 - }, - { - "epoch": 2.824511082138201, - "grad_norm": 1.402385950088501, - "learning_rate": 9.50603015075377e-05, - "loss": 6.1574, - "step": 5416 - }, - { - "epoch": 2.82503259452412, - "grad_norm": 1.7535570859909058, - "learning_rate": 9.505929648241207e-05, - "loss": 5.5894, - "step": 5417 - }, - { - "epoch": 2.8255541069100394, - "grad_norm": 1.4100937843322754, - "learning_rate": 9.505829145728644e-05, - "loss": 5.819, - "step": 5418 - }, - { - "epoch": 2.8260756192959584, - "grad_norm": 1.7048600912094116, - "learning_rate": 9.505728643216081e-05, - "loss": 5.6877, - "step": 5419 - }, - { - "epoch": 2.8265971316818774, - "grad_norm": 1.4972076416015625, - "learning_rate": 9.505628140703519e-05, - "loss": 6.0138, - "step": 5420 - }, - { - "epoch": 2.8271186440677964, - "grad_norm": 1.4135085344314575, - "learning_rate": 9.505527638190955e-05, - "loss": 6.0942, - "step": 5421 - }, - { - "epoch": 2.827640156453716, - "grad_norm": 1.540353775024414, - "learning_rate": 9.505427135678391e-05, - "loss": 5.8088, - "step": 5422 - }, - { - "epoch": 2.828161668839635, - "grad_norm": 1.3191885948181152, - "learning_rate": 9.505326633165829e-05, - "loss": 5.9692, - "step": 5423 - }, - { - "epoch": 2.828683181225554, - "grad_norm": 1.4066498279571533, - "learning_rate": 9.505226130653267e-05, - "loss": 5.9971, - "step": 5424 - }, - { - "epoch": 2.8292046936114734, - "grad_norm": 1.4812179803848267, - "learning_rate": 9.505125628140705e-05, - "loss": 5.6833, - "step": 5425 - }, - { - "epoch": 2.8297262059973924, - "grad_norm": 1.6258361339569092, - "learning_rate": 9.505025125628141e-05, - "loss": 5.7476, - "step": 5426 - }, - { - "epoch": 2.8302477183833115, - "grad_norm": 1.734784722328186, - "learning_rate": 9.504924623115579e-05, - "loss": 5.3781, - "step": 5427 - }, - { - "epoch": 2.830769230769231, - "grad_norm": 1.6339596509933472, - "learning_rate": 9.504824120603015e-05, - "loss": 5.7235, - "step": 5428 - }, - { - "epoch": 2.83129074315515, - "grad_norm": 1.5125846862792969, - "learning_rate": 9.504723618090453e-05, - "loss": 5.7626, - "step": 5429 - }, - { - "epoch": 2.831812255541069, - "grad_norm": 1.3996344804763794, - "learning_rate": 9.50462311557789e-05, - "loss": 5.8532, - "step": 5430 - }, - { - "epoch": 2.832333767926988, - "grad_norm": 1.362195611000061, - "learning_rate": 9.504522613065327e-05, - "loss": 5.97, - "step": 5431 - }, - { - "epoch": 2.8328552803129075, - "grad_norm": 1.4344533681869507, - "learning_rate": 9.504422110552764e-05, - "loss": 5.9257, - "step": 5432 - }, - { - "epoch": 2.8333767926988265, - "grad_norm": 1.478940725326538, - "learning_rate": 9.504321608040202e-05, - "loss": 5.8004, - "step": 5433 - }, - { - "epoch": 2.8338983050847455, - "grad_norm": 1.429983377456665, - "learning_rate": 9.504221105527638e-05, - "loss": 5.5731, - "step": 5434 - }, - { - "epoch": 2.834419817470665, - "grad_norm": 1.4382768869400024, - "learning_rate": 9.504120603015076e-05, - "loss": 6.0552, - "step": 5435 - }, - { - "epoch": 2.834941329856584, - "grad_norm": 1.300857663154602, - "learning_rate": 9.504020100502514e-05, - "loss": 5.9213, - "step": 5436 - }, - { - "epoch": 2.835462842242503, - "grad_norm": 1.4121084213256836, - "learning_rate": 9.50391959798995e-05, - "loss": 5.699, - "step": 5437 - }, - { - "epoch": 2.8359843546284225, - "grad_norm": 1.455958366394043, - "learning_rate": 9.503819095477388e-05, - "loss": 5.7875, - "step": 5438 - }, - { - "epoch": 2.8365058670143415, - "grad_norm": 1.4203109741210938, - "learning_rate": 9.503718592964824e-05, - "loss": 5.9604, - "step": 5439 - }, - { - "epoch": 2.8370273794002605, - "grad_norm": 1.6025632619857788, - "learning_rate": 9.503618090452262e-05, - "loss": 5.7122, - "step": 5440 - }, - { - "epoch": 2.83754889178618, - "grad_norm": 1.4892053604125977, - "learning_rate": 9.503517587939698e-05, - "loss": 5.8097, - "step": 5441 - }, - { - "epoch": 2.838070404172099, - "grad_norm": 1.4848047494888306, - "learning_rate": 9.503417085427136e-05, - "loss": 5.4508, - "step": 5442 - }, - { - "epoch": 2.838591916558018, - "grad_norm": 1.587677240371704, - "learning_rate": 9.503316582914573e-05, - "loss": 5.6504, - "step": 5443 - }, - { - "epoch": 2.8391134289439375, - "grad_norm": 1.5530227422714233, - "learning_rate": 9.50321608040201e-05, - "loss": 6.0686, - "step": 5444 - }, - { - "epoch": 2.8396349413298565, - "grad_norm": 1.4392659664154053, - "learning_rate": 9.503115577889448e-05, - "loss": 5.484, - "step": 5445 - }, - { - "epoch": 2.8401564537157755, - "grad_norm": 1.4964516162872314, - "learning_rate": 9.503015075376886e-05, - "loss": 6.1956, - "step": 5446 - }, - { - "epoch": 2.840677966101695, - "grad_norm": 1.532198190689087, - "learning_rate": 9.502914572864322e-05, - "loss": 5.9125, - "step": 5447 - }, - { - "epoch": 2.841199478487614, - "grad_norm": 1.5567309856414795, - "learning_rate": 9.502814070351759e-05, - "loss": 5.6818, - "step": 5448 - }, - { - "epoch": 2.841720990873533, - "grad_norm": 1.6278194189071655, - "learning_rate": 9.502713567839197e-05, - "loss": 5.6605, - "step": 5449 - }, - { - "epoch": 2.8422425032594525, - "grad_norm": 1.5022722482681274, - "learning_rate": 9.502613065326633e-05, - "loss": 5.6544, - "step": 5450 - }, - { - "epoch": 2.8427640156453715, - "grad_norm": 1.595187783241272, - "learning_rate": 9.502512562814071e-05, - "loss": 5.3391, - "step": 5451 - }, - { - "epoch": 2.8432855280312905, - "grad_norm": 1.3358861207962036, - "learning_rate": 9.502412060301507e-05, - "loss": 5.6795, - "step": 5452 - }, - { - "epoch": 2.84380704041721, - "grad_norm": 1.5346157550811768, - "learning_rate": 9.502311557788945e-05, - "loss": 5.3749, - "step": 5453 - }, - { - "epoch": 2.844328552803129, - "grad_norm": 1.4484132528305054, - "learning_rate": 9.502211055276381e-05, - "loss": 5.903, - "step": 5454 - }, - { - "epoch": 2.844850065189048, - "grad_norm": 1.3442084789276123, - "learning_rate": 9.502110552763819e-05, - "loss": 6.1121, - "step": 5455 - }, - { - "epoch": 2.8453715775749675, - "grad_norm": 1.3326928615570068, - "learning_rate": 9.502010050251257e-05, - "loss": 4.9753, - "step": 5456 - }, - { - "epoch": 2.8458930899608865, - "grad_norm": 1.4835693836212158, - "learning_rate": 9.501909547738695e-05, - "loss": 5.9082, - "step": 5457 - }, - { - "epoch": 2.8464146023468055, - "grad_norm": 1.3572524785995483, - "learning_rate": 9.501809045226131e-05, - "loss": 5.2772, - "step": 5458 - }, - { - "epoch": 2.846936114732725, - "grad_norm": 1.4405840635299683, - "learning_rate": 9.501708542713569e-05, - "loss": 5.9282, - "step": 5459 - }, - { - "epoch": 2.847457627118644, - "grad_norm": 1.5230143070220947, - "learning_rate": 9.501608040201005e-05, - "loss": 5.8626, - "step": 5460 - }, - { - "epoch": 2.847979139504563, - "grad_norm": 1.3778576850891113, - "learning_rate": 9.501507537688442e-05, - "loss": 5.7872, - "step": 5461 - }, - { - "epoch": 2.8485006518904825, - "grad_norm": 1.5256186723709106, - "learning_rate": 9.50140703517588e-05, - "loss": 5.7536, - "step": 5462 - }, - { - "epoch": 2.8490221642764015, - "grad_norm": 1.4342402219772339, - "learning_rate": 9.501306532663316e-05, - "loss": 5.558, - "step": 5463 - }, - { - "epoch": 2.8495436766623206, - "grad_norm": 1.6662518978118896, - "learning_rate": 9.501206030150754e-05, - "loss": 5.4814, - "step": 5464 - }, - { - "epoch": 2.85006518904824, - "grad_norm": 1.5614107847213745, - "learning_rate": 9.501105527638192e-05, - "loss": 5.8797, - "step": 5465 - }, - { - "epoch": 2.850586701434159, - "grad_norm": 1.4563586711883545, - "learning_rate": 9.50100502512563e-05, - "loss": 5.6478, - "step": 5466 - }, - { - "epoch": 2.851108213820078, - "grad_norm": 1.4055339097976685, - "learning_rate": 9.500904522613066e-05, - "loss": 5.8571, - "step": 5467 - }, - { - "epoch": 2.8516297262059975, - "grad_norm": 1.4934945106506348, - "learning_rate": 9.500804020100504e-05, - "loss": 5.4422, - "step": 5468 - }, - { - "epoch": 2.8521512385919165, - "grad_norm": 1.5744796991348267, - "learning_rate": 9.50070351758794e-05, - "loss": 5.7252, - "step": 5469 - }, - { - "epoch": 2.8526727509778356, - "grad_norm": 1.7383615970611572, - "learning_rate": 9.500603015075378e-05, - "loss": 5.3504, - "step": 5470 - }, - { - "epoch": 2.853194263363755, - "grad_norm": 1.5227082967758179, - "learning_rate": 9.500502512562814e-05, - "loss": 5.8242, - "step": 5471 - }, - { - "epoch": 2.853715775749674, - "grad_norm": 1.5848947763442993, - "learning_rate": 9.500402010050252e-05, - "loss": 5.4656, - "step": 5472 - }, - { - "epoch": 2.854237288135593, - "grad_norm": 1.5149763822555542, - "learning_rate": 9.500301507537689e-05, - "loss": 5.7653, - "step": 5473 - }, - { - "epoch": 2.8547588005215125, - "grad_norm": 1.5359797477722168, - "learning_rate": 9.500201005025125e-05, - "loss": 5.9524, - "step": 5474 - }, - { - "epoch": 2.8552803129074316, - "grad_norm": 1.417772650718689, - "learning_rate": 9.500100502512563e-05, - "loss": 5.9466, - "step": 5475 - }, - { - "epoch": 2.8558018252933506, - "grad_norm": 1.5595239400863647, - "learning_rate": 9.5e-05, - "loss": 5.5856, - "step": 5476 - }, - { - "epoch": 2.85632333767927, - "grad_norm": 1.4452238082885742, - "learning_rate": 9.499899497487438e-05, - "loss": 6.2531, - "step": 5477 - }, - { - "epoch": 2.856844850065189, - "grad_norm": 1.556312084197998, - "learning_rate": 9.499798994974875e-05, - "loss": 5.6933, - "step": 5478 - }, - { - "epoch": 2.857366362451108, - "grad_norm": 1.6568973064422607, - "learning_rate": 9.499698492462313e-05, - "loss": 5.1634, - "step": 5479 - }, - { - "epoch": 2.8578878748370276, - "grad_norm": 1.5206397771835327, - "learning_rate": 9.499597989949749e-05, - "loss": 6.1232, - "step": 5480 - }, - { - "epoch": 2.8584093872229466, - "grad_norm": 1.4250537157058716, - "learning_rate": 9.499497487437187e-05, - "loss": 5.292, - "step": 5481 - }, - { - "epoch": 2.8589308996088656, - "grad_norm": 1.371863603591919, - "learning_rate": 9.499396984924623e-05, - "loss": 5.7788, - "step": 5482 - }, - { - "epoch": 2.859452411994785, - "grad_norm": 1.4204965829849243, - "learning_rate": 9.499296482412061e-05, - "loss": 5.8184, - "step": 5483 - }, - { - "epoch": 2.859973924380704, - "grad_norm": 1.381773591041565, - "learning_rate": 9.499195979899497e-05, - "loss": 6.055, - "step": 5484 - }, - { - "epoch": 2.860495436766623, - "grad_norm": 1.3773912191390991, - "learning_rate": 9.499095477386935e-05, - "loss": 5.5454, - "step": 5485 - }, - { - "epoch": 2.8610169491525426, - "grad_norm": 1.4980450868606567, - "learning_rate": 9.498994974874373e-05, - "loss": 6.307, - "step": 5486 - }, - { - "epoch": 2.8615384615384616, - "grad_norm": 1.544684648513794, - "learning_rate": 9.49889447236181e-05, - "loss": 5.4023, - "step": 5487 - }, - { - "epoch": 2.8620599739243806, - "grad_norm": 1.5524007081985474, - "learning_rate": 9.498793969849247e-05, - "loss": 6.016, - "step": 5488 - }, - { - "epoch": 2.8625814863103, - "grad_norm": 1.5435796976089478, - "learning_rate": 9.498693467336684e-05, - "loss": 6.1719, - "step": 5489 - }, - { - "epoch": 2.863102998696219, - "grad_norm": 1.579563856124878, - "learning_rate": 9.498592964824121e-05, - "loss": 5.7286, - "step": 5490 - }, - { - "epoch": 2.863624511082138, - "grad_norm": 1.4664534330368042, - "learning_rate": 9.498492462311558e-05, - "loss": 6.0726, - "step": 5491 - }, - { - "epoch": 2.8641460234680576, - "grad_norm": 1.6674902439117432, - "learning_rate": 9.498391959798996e-05, - "loss": 5.5623, - "step": 5492 - }, - { - "epoch": 2.8646675358539766, - "grad_norm": 1.636983036994934, - "learning_rate": 9.498291457286432e-05, - "loss": 5.7435, - "step": 5493 - }, - { - "epoch": 2.8651890482398956, - "grad_norm": 1.6069456338882446, - "learning_rate": 9.49819095477387e-05, - "loss": 5.6692, - "step": 5494 - }, - { - "epoch": 2.865710560625815, - "grad_norm": 1.4391626119613647, - "learning_rate": 9.498090452261306e-05, - "loss": 6.0863, - "step": 5495 - }, - { - "epoch": 2.866232073011734, - "grad_norm": 1.5208591222763062, - "learning_rate": 9.497989949748744e-05, - "loss": 5.5298, - "step": 5496 - }, - { - "epoch": 2.866753585397653, - "grad_norm": 1.386375904083252, - "learning_rate": 9.497889447236182e-05, - "loss": 5.9272, - "step": 5497 - }, - { - "epoch": 2.8672750977835726, - "grad_norm": 1.4868290424346924, - "learning_rate": 9.49778894472362e-05, - "loss": 5.9604, - "step": 5498 - }, - { - "epoch": 2.8677966101694916, - "grad_norm": 1.542762279510498, - "learning_rate": 9.497688442211056e-05, - "loss": 5.7142, - "step": 5499 - }, - { - "epoch": 2.8683181225554106, - "grad_norm": 1.5957883596420288, - "learning_rate": 9.497587939698492e-05, - "loss": 5.3213, - "step": 5500 - }, - { - "epoch": 2.86883963494133, - "grad_norm": 1.4624228477478027, - "learning_rate": 9.49748743718593e-05, - "loss": 5.7974, - "step": 5501 - }, - { - "epoch": 2.869361147327249, - "grad_norm": 1.7716432809829712, - "learning_rate": 9.497386934673367e-05, - "loss": 5.3457, - "step": 5502 - }, - { - "epoch": 2.869882659713168, - "grad_norm": 1.6208593845367432, - "learning_rate": 9.497286432160804e-05, - "loss": 5.5484, - "step": 5503 - }, - { - "epoch": 2.8704041720990876, - "grad_norm": 1.4539518356323242, - "learning_rate": 9.497185929648241e-05, - "loss": 5.7758, - "step": 5504 - }, - { - "epoch": 2.8709256844850066, - "grad_norm": 1.8079545497894287, - "learning_rate": 9.497085427135679e-05, - "loss": 5.5796, - "step": 5505 - }, - { - "epoch": 2.8714471968709256, - "grad_norm": 1.576878309249878, - "learning_rate": 9.496984924623116e-05, - "loss": 5.7139, - "step": 5506 - }, - { - "epoch": 2.871968709256845, - "grad_norm": 1.630937933921814, - "learning_rate": 9.496884422110554e-05, - "loss": 5.637, - "step": 5507 - }, - { - "epoch": 2.872490221642764, - "grad_norm": 1.4767199754714966, - "learning_rate": 9.49678391959799e-05, - "loss": 6.0818, - "step": 5508 - }, - { - "epoch": 2.873011734028683, - "grad_norm": 1.413651466369629, - "learning_rate": 9.496683417085428e-05, - "loss": 5.5064, - "step": 5509 - }, - { - "epoch": 2.8735332464146026, - "grad_norm": 1.2960796356201172, - "learning_rate": 9.496582914572865e-05, - "loss": 5.967, - "step": 5510 - }, - { - "epoch": 2.8740547588005216, - "grad_norm": 1.406331181526184, - "learning_rate": 9.496482412060303e-05, - "loss": 6.0891, - "step": 5511 - }, - { - "epoch": 2.8745762711864407, - "grad_norm": 1.3793679475784302, - "learning_rate": 9.496381909547739e-05, - "loss": 6.0884, - "step": 5512 - }, - { - "epoch": 2.8750977835723597, - "grad_norm": 1.4555490016937256, - "learning_rate": 9.496281407035177e-05, - "loss": 5.9107, - "step": 5513 - }, - { - "epoch": 2.875619295958279, - "grad_norm": 1.5267013311386108, - "learning_rate": 9.496180904522613e-05, - "loss": 5.5727, - "step": 5514 - }, - { - "epoch": 2.876140808344198, - "grad_norm": 1.5769462585449219, - "learning_rate": 9.49608040201005e-05, - "loss": 5.3566, - "step": 5515 - }, - { - "epoch": 2.876662320730117, - "grad_norm": 1.6439229249954224, - "learning_rate": 9.495979899497487e-05, - "loss": 5.8007, - "step": 5516 - }, - { - "epoch": 2.8771838331160366, - "grad_norm": 1.8192572593688965, - "learning_rate": 9.495879396984925e-05, - "loss": 5.8796, - "step": 5517 - }, - { - "epoch": 2.8777053455019557, - "grad_norm": 1.9187562465667725, - "learning_rate": 9.495778894472363e-05, - "loss": 4.946, - "step": 5518 - }, - { - "epoch": 2.8782268578878747, - "grad_norm": 1.513122320175171, - "learning_rate": 9.4956783919598e-05, - "loss": 5.9961, - "step": 5519 - }, - { - "epoch": 2.878748370273794, - "grad_norm": 1.560675024986267, - "learning_rate": 9.495577889447237e-05, - "loss": 6.0901, - "step": 5520 - }, - { - "epoch": 2.879269882659713, - "grad_norm": 1.5983387231826782, - "learning_rate": 9.495477386934674e-05, - "loss": 5.7216, - "step": 5521 - }, - { - "epoch": 2.879791395045632, - "grad_norm": 1.3903577327728271, - "learning_rate": 9.495376884422111e-05, - "loss": 5.9275, - "step": 5522 - }, - { - "epoch": 2.8803129074315517, - "grad_norm": 1.4374443292617798, - "learning_rate": 9.495276381909548e-05, - "loss": 6.0819, - "step": 5523 - }, - { - "epoch": 2.8808344198174707, - "grad_norm": 1.6240648031234741, - "learning_rate": 9.495175879396986e-05, - "loss": 5.5084, - "step": 5524 - }, - { - "epoch": 2.8813559322033897, - "grad_norm": 1.4976316690444946, - "learning_rate": 9.495075376884422e-05, - "loss": 6.0516, - "step": 5525 - }, - { - "epoch": 2.8818774445893087, - "grad_norm": 1.41958487033844, - "learning_rate": 9.49497487437186e-05, - "loss": 6.0718, - "step": 5526 - }, - { - "epoch": 2.882398956975228, - "grad_norm": 1.4065748453140259, - "learning_rate": 9.494874371859298e-05, - "loss": 6.1582, - "step": 5527 - }, - { - "epoch": 2.882920469361147, - "grad_norm": 1.6554980278015137, - "learning_rate": 9.494773869346734e-05, - "loss": 4.9616, - "step": 5528 - }, - { - "epoch": 2.8834419817470662, - "grad_norm": 1.546533226966858, - "learning_rate": 9.494673366834172e-05, - "loss": 5.572, - "step": 5529 - }, - { - "epoch": 2.8839634941329857, - "grad_norm": 1.5250715017318726, - "learning_rate": 9.494572864321608e-05, - "loss": 5.7914, - "step": 5530 - }, - { - "epoch": 2.8844850065189047, - "grad_norm": 1.5076898336410522, - "learning_rate": 9.494472361809046e-05, - "loss": 5.8171, - "step": 5531 - }, - { - "epoch": 2.8850065189048237, - "grad_norm": 1.508210301399231, - "learning_rate": 9.494371859296482e-05, - "loss": 6.1137, - "step": 5532 - }, - { - "epoch": 2.885528031290743, - "grad_norm": 1.399817943572998, - "learning_rate": 9.49427135678392e-05, - "loss": 5.9738, - "step": 5533 - }, - { - "epoch": 2.886049543676662, - "grad_norm": 1.5446008443832397, - "learning_rate": 9.494170854271357e-05, - "loss": 5.9479, - "step": 5534 - }, - { - "epoch": 2.8865710560625812, - "grad_norm": 1.7366011142730713, - "learning_rate": 9.494070351758794e-05, - "loss": 5.7081, - "step": 5535 - }, - { - "epoch": 2.8870925684485007, - "grad_norm": 1.6573882102966309, - "learning_rate": 9.493969849246231e-05, - "loss": 5.2053, - "step": 5536 - }, - { - "epoch": 2.8876140808344197, - "grad_norm": 1.385514497756958, - "learning_rate": 9.493869346733669e-05, - "loss": 5.9661, - "step": 5537 - }, - { - "epoch": 2.8881355932203387, - "grad_norm": 1.395749568939209, - "learning_rate": 9.493768844221106e-05, - "loss": 5.8266, - "step": 5538 - }, - { - "epoch": 2.888657105606258, - "grad_norm": 1.4438681602478027, - "learning_rate": 9.493668341708544e-05, - "loss": 5.8758, - "step": 5539 - }, - { - "epoch": 2.8891786179921772, - "grad_norm": 1.4077194929122925, - "learning_rate": 9.49356783919598e-05, - "loss": 6.2478, - "step": 5540 - }, - { - "epoch": 2.8897001303780963, - "grad_norm": 1.8659459352493286, - "learning_rate": 9.493467336683417e-05, - "loss": 4.9709, - "step": 5541 - }, - { - "epoch": 2.8902216427640157, - "grad_norm": 1.4451112747192383, - "learning_rate": 9.493366834170855e-05, - "loss": 6.0871, - "step": 5542 - }, - { - "epoch": 2.8907431551499347, - "grad_norm": 1.5694302320480347, - "learning_rate": 9.493266331658291e-05, - "loss": 5.3811, - "step": 5543 - }, - { - "epoch": 2.8912646675358538, - "grad_norm": 1.551498293876648, - "learning_rate": 9.493165829145729e-05, - "loss": 5.3843, - "step": 5544 - }, - { - "epoch": 2.8917861799217732, - "grad_norm": 1.49081289768219, - "learning_rate": 9.493065326633166e-05, - "loss": 6.0154, - "step": 5545 - }, - { - "epoch": 2.8923076923076922, - "grad_norm": 1.5730396509170532, - "learning_rate": 9.492964824120603e-05, - "loss": 5.5861, - "step": 5546 - }, - { - "epoch": 2.8928292046936113, - "grad_norm": 1.5991287231445312, - "learning_rate": 9.49286432160804e-05, - "loss": 6.1216, - "step": 5547 - }, - { - "epoch": 2.8933507170795307, - "grad_norm": 1.5304365158081055, - "learning_rate": 9.492763819095478e-05, - "loss": 6.0717, - "step": 5548 - }, - { - "epoch": 2.8938722294654498, - "grad_norm": 1.4365475177764893, - "learning_rate": 9.492663316582915e-05, - "loss": 5.9597, - "step": 5549 - }, - { - "epoch": 2.8943937418513688, - "grad_norm": 1.5242412090301514, - "learning_rate": 9.492562814070353e-05, - "loss": 5.8683, - "step": 5550 - }, - { - "epoch": 2.8949152542372882, - "grad_norm": 1.6503796577453613, - "learning_rate": 9.49246231155779e-05, - "loss": 5.5276, - "step": 5551 - }, - { - "epoch": 2.8954367666232073, - "grad_norm": 1.4438531398773193, - "learning_rate": 9.492361809045227e-05, - "loss": 5.6723, - "step": 5552 - }, - { - "epoch": 2.8959582790091263, - "grad_norm": 1.5143316984176636, - "learning_rate": 9.492261306532664e-05, - "loss": 5.2545, - "step": 5553 - }, - { - "epoch": 2.8964797913950457, - "grad_norm": 1.7184367179870605, - "learning_rate": 9.4921608040201e-05, - "loss": 5.6888, - "step": 5554 - }, - { - "epoch": 2.8970013037809648, - "grad_norm": 1.4547812938690186, - "learning_rate": 9.492060301507538e-05, - "loss": 6.1858, - "step": 5555 - }, - { - "epoch": 2.897522816166884, - "grad_norm": 1.58124577999115, - "learning_rate": 9.491959798994974e-05, - "loss": 5.7511, - "step": 5556 - }, - { - "epoch": 2.8980443285528033, - "grad_norm": 1.4802014827728271, - "learning_rate": 9.491859296482412e-05, - "loss": 6.0066, - "step": 5557 - }, - { - "epoch": 2.8985658409387223, - "grad_norm": 1.438165545463562, - "learning_rate": 9.49175879396985e-05, - "loss": 5.8155, - "step": 5558 - }, - { - "epoch": 2.8990873533246413, - "grad_norm": 1.6287916898727417, - "learning_rate": 9.491658291457288e-05, - "loss": 5.7862, - "step": 5559 - }, - { - "epoch": 2.8996088657105608, - "grad_norm": 1.4050424098968506, - "learning_rate": 9.491557788944724e-05, - "loss": 5.8399, - "step": 5560 - }, - { - "epoch": 2.9001303780964798, - "grad_norm": 1.3505676984786987, - "learning_rate": 9.491457286432162e-05, - "loss": 6.1767, - "step": 5561 - }, - { - "epoch": 2.900651890482399, - "grad_norm": 1.541554570198059, - "learning_rate": 9.491356783919598e-05, - "loss": 5.3691, - "step": 5562 - }, - { - "epoch": 2.9011734028683183, - "grad_norm": 1.5259424448013306, - "learning_rate": 9.491256281407036e-05, - "loss": 4.9509, - "step": 5563 - }, - { - "epoch": 2.9016949152542373, - "grad_norm": 1.597011685371399, - "learning_rate": 9.491155778894473e-05, - "loss": 5.4731, - "step": 5564 - }, - { - "epoch": 2.9022164276401563, - "grad_norm": 1.4970884323120117, - "learning_rate": 9.49105527638191e-05, - "loss": 5.7372, - "step": 5565 - }, - { - "epoch": 2.9027379400260758, - "grad_norm": 1.469681739807129, - "learning_rate": 9.490954773869347e-05, - "loss": 5.6845, - "step": 5566 - }, - { - "epoch": 2.903259452411995, - "grad_norm": 1.468544602394104, - "learning_rate": 9.490854271356783e-05, - "loss": 5.9744, - "step": 5567 - }, - { - "epoch": 2.903780964797914, - "grad_norm": 1.6005274057388306, - "learning_rate": 9.490753768844221e-05, - "loss": 5.7703, - "step": 5568 - }, - { - "epoch": 2.9043024771838333, - "grad_norm": 1.3922381401062012, - "learning_rate": 9.490653266331659e-05, - "loss": 6.1833, - "step": 5569 - }, - { - "epoch": 2.9048239895697523, - "grad_norm": 1.4666475057601929, - "learning_rate": 9.490552763819097e-05, - "loss": 5.9374, - "step": 5570 - }, - { - "epoch": 2.9053455019556713, - "grad_norm": 1.5384091138839722, - "learning_rate": 9.490452261306533e-05, - "loss": 6.1484, - "step": 5571 - }, - { - "epoch": 2.905867014341591, - "grad_norm": 1.4271762371063232, - "learning_rate": 9.490351758793971e-05, - "loss": 5.9283, - "step": 5572 - }, - { - "epoch": 2.90638852672751, - "grad_norm": 1.4915387630462646, - "learning_rate": 9.490251256281407e-05, - "loss": 5.9849, - "step": 5573 - }, - { - "epoch": 2.906910039113429, - "grad_norm": 1.3377097845077515, - "learning_rate": 9.490150753768845e-05, - "loss": 6.1545, - "step": 5574 - }, - { - "epoch": 2.9074315514993483, - "grad_norm": 1.5475226640701294, - "learning_rate": 9.490050251256281e-05, - "loss": 5.633, - "step": 5575 - }, - { - "epoch": 2.9079530638852673, - "grad_norm": 1.4059722423553467, - "learning_rate": 9.489949748743719e-05, - "loss": 5.7741, - "step": 5576 - }, - { - "epoch": 2.9084745762711863, - "grad_norm": 1.4943660497665405, - "learning_rate": 9.489849246231156e-05, - "loss": 5.6764, - "step": 5577 - }, - { - "epoch": 2.908996088657106, - "grad_norm": 1.7238649129867554, - "learning_rate": 9.489748743718593e-05, - "loss": 5.6739, - "step": 5578 - }, - { - "epoch": 2.909517601043025, - "grad_norm": 1.4704395532608032, - "learning_rate": 9.489648241206031e-05, - "loss": 6.1874, - "step": 5579 - }, - { - "epoch": 2.910039113428944, - "grad_norm": 1.4585376977920532, - "learning_rate": 9.489547738693468e-05, - "loss": 5.777, - "step": 5580 - }, - { - "epoch": 2.9105606258148633, - "grad_norm": 1.5897916555404663, - "learning_rate": 9.489447236180905e-05, - "loss": 5.9803, - "step": 5581 - }, - { - "epoch": 2.9110821382007823, - "grad_norm": 1.5488941669464111, - "learning_rate": 9.489346733668342e-05, - "loss": 5.4327, - "step": 5582 - }, - { - "epoch": 2.9116036505867013, - "grad_norm": 1.34518563747406, - "learning_rate": 9.48924623115578e-05, - "loss": 6.1353, - "step": 5583 - }, - { - "epoch": 2.912125162972621, - "grad_norm": 1.4807626008987427, - "learning_rate": 9.489145728643216e-05, - "loss": 6.1081, - "step": 5584 - }, - { - "epoch": 2.91264667535854, - "grad_norm": 1.587143063545227, - "learning_rate": 9.489045226130654e-05, - "loss": 5.3597, - "step": 5585 - }, - { - "epoch": 2.913168187744459, - "grad_norm": 1.497709035873413, - "learning_rate": 9.48894472361809e-05, - "loss": 6.0506, - "step": 5586 - }, - { - "epoch": 2.9136897001303783, - "grad_norm": 1.4687743186950684, - "learning_rate": 9.488844221105528e-05, - "loss": 5.8789, - "step": 5587 - }, - { - "epoch": 2.9142112125162973, - "grad_norm": 1.4672162532806396, - "learning_rate": 9.488743718592964e-05, - "loss": 6.0882, - "step": 5588 - }, - { - "epoch": 2.9147327249022164, - "grad_norm": 1.664939284324646, - "learning_rate": 9.488643216080402e-05, - "loss": 5.747, - "step": 5589 - }, - { - "epoch": 2.915254237288136, - "grad_norm": 1.4529097080230713, - "learning_rate": 9.48854271356784e-05, - "loss": 5.9817, - "step": 5590 - }, - { - "epoch": 2.915775749674055, - "grad_norm": 1.486401081085205, - "learning_rate": 9.488442211055278e-05, - "loss": 6.0475, - "step": 5591 - }, - { - "epoch": 2.916297262059974, - "grad_norm": 1.4367727041244507, - "learning_rate": 9.488341708542714e-05, - "loss": 5.6162, - "step": 5592 - }, - { - "epoch": 2.9168187744458933, - "grad_norm": 1.50130033493042, - "learning_rate": 9.48824120603015e-05, - "loss": 5.8907, - "step": 5593 - }, - { - "epoch": 2.9173402868318123, - "grad_norm": 1.5267356634140015, - "learning_rate": 9.488140703517588e-05, - "loss": 5.7765, - "step": 5594 - }, - { - "epoch": 2.9178617992177314, - "grad_norm": 1.402603030204773, - "learning_rate": 9.488040201005025e-05, - "loss": 5.7163, - "step": 5595 - }, - { - "epoch": 2.918383311603651, - "grad_norm": 1.6174359321594238, - "learning_rate": 9.487939698492463e-05, - "loss": 5.3703, - "step": 5596 - }, - { - "epoch": 2.91890482398957, - "grad_norm": 1.472778081893921, - "learning_rate": 9.487839195979899e-05, - "loss": 5.5394, - "step": 5597 - }, - { - "epoch": 2.919426336375489, - "grad_norm": 1.5636790990829468, - "learning_rate": 9.487738693467337e-05, - "loss": 5.8308, - "step": 5598 - }, - { - "epoch": 2.9199478487614083, - "grad_norm": 1.8219919204711914, - "learning_rate": 9.487638190954775e-05, - "loss": 5.464, - "step": 5599 - }, - { - "epoch": 2.9204693611473274, - "grad_norm": 1.4276323318481445, - "learning_rate": 9.487537688442212e-05, - "loss": 5.9445, - "step": 5600 - }, - { - "epoch": 2.9209908735332464, - "grad_norm": 1.461767554283142, - "learning_rate": 9.487437185929649e-05, - "loss": 5.9794, - "step": 5601 - }, - { - "epoch": 2.921512385919166, - "grad_norm": 1.6382936239242554, - "learning_rate": 9.487336683417087e-05, - "loss": 5.6286, - "step": 5602 - }, - { - "epoch": 2.922033898305085, - "grad_norm": 1.7635223865509033, - "learning_rate": 9.487236180904523e-05, - "loss": 5.6945, - "step": 5603 - }, - { - "epoch": 2.922555410691004, - "grad_norm": 1.4411667585372925, - "learning_rate": 9.487135678391961e-05, - "loss": 5.9232, - "step": 5604 - }, - { - "epoch": 2.9230769230769234, - "grad_norm": 1.3225001096725464, - "learning_rate": 9.487035175879397e-05, - "loss": 6.1514, - "step": 5605 - }, - { - "epoch": 2.9235984354628424, - "grad_norm": 1.552101492881775, - "learning_rate": 9.486934673366835e-05, - "loss": 5.9468, - "step": 5606 - }, - { - "epoch": 2.9241199478487614, - "grad_norm": 1.7488456964492798, - "learning_rate": 9.486834170854271e-05, - "loss": 5.2925, - "step": 5607 - }, - { - "epoch": 2.9246414602346804, - "grad_norm": 1.5159887075424194, - "learning_rate": 9.486733668341708e-05, - "loss": 5.9201, - "step": 5608 - }, - { - "epoch": 2.9251629726206, - "grad_norm": 1.5896421670913696, - "learning_rate": 9.486633165829146e-05, - "loss": 5.7753, - "step": 5609 - }, - { - "epoch": 2.925684485006519, - "grad_norm": 1.5869134664535522, - "learning_rate": 9.486532663316583e-05, - "loss": 5.5884, - "step": 5610 - }, - { - "epoch": 2.926205997392438, - "grad_norm": 1.4332365989685059, - "learning_rate": 9.486432160804021e-05, - "loss": 5.6749, - "step": 5611 - }, - { - "epoch": 2.9267275097783574, - "grad_norm": 1.5491622686386108, - "learning_rate": 9.486331658291458e-05, - "loss": 5.7502, - "step": 5612 - }, - { - "epoch": 2.9272490221642764, - "grad_norm": 1.4510512351989746, - "learning_rate": 9.486231155778895e-05, - "loss": 5.7301, - "step": 5613 - }, - { - "epoch": 2.9277705345501954, - "grad_norm": 1.3961373567581177, - "learning_rate": 9.486130653266332e-05, - "loss": 5.9144, - "step": 5614 - }, - { - "epoch": 2.928292046936115, - "grad_norm": 1.7326092720031738, - "learning_rate": 9.48603015075377e-05, - "loss": 4.9302, - "step": 5615 - }, - { - "epoch": 2.928813559322034, - "grad_norm": 1.5977157354354858, - "learning_rate": 9.485929648241206e-05, - "loss": 5.8305, - "step": 5616 - }, - { - "epoch": 2.929335071707953, - "grad_norm": 1.4200847148895264, - "learning_rate": 9.485829145728644e-05, - "loss": 6.0305, - "step": 5617 - }, - { - "epoch": 2.9298565840938724, - "grad_norm": 1.375240445137024, - "learning_rate": 9.48572864321608e-05, - "loss": 5.9016, - "step": 5618 - }, - { - "epoch": 2.9303780964797914, - "grad_norm": 1.414204478263855, - "learning_rate": 9.485628140703518e-05, - "loss": 6.1293, - "step": 5619 - }, - { - "epoch": 2.9308996088657104, - "grad_norm": 1.5879254341125488, - "learning_rate": 9.485527638190956e-05, - "loss": 5.8432, - "step": 5620 - }, - { - "epoch": 2.9314211212516295, - "grad_norm": 1.3899239301681519, - "learning_rate": 9.485427135678392e-05, - "loss": 5.6386, - "step": 5621 - }, - { - "epoch": 2.931942633637549, - "grad_norm": 1.6143795251846313, - "learning_rate": 9.48532663316583e-05, - "loss": 6.0487, - "step": 5622 - }, - { - "epoch": 2.932464146023468, - "grad_norm": 1.525539517402649, - "learning_rate": 9.485226130653267e-05, - "loss": 5.9726, - "step": 5623 - }, - { - "epoch": 2.932985658409387, - "grad_norm": 1.4849265813827515, - "learning_rate": 9.485125628140704e-05, - "loss": 5.2215, - "step": 5624 - }, - { - "epoch": 2.9335071707953064, - "grad_norm": 1.5697646141052246, - "learning_rate": 9.485025125628141e-05, - "loss": 5.8088, - "step": 5625 - }, - { - "epoch": 2.9340286831812254, - "grad_norm": 1.5876095294952393, - "learning_rate": 9.484924623115578e-05, - "loss": 6.2564, - "step": 5626 - }, - { - "epoch": 2.9345501955671445, - "grad_norm": 1.7351067066192627, - "learning_rate": 9.484824120603015e-05, - "loss": 6.0734, - "step": 5627 - }, - { - "epoch": 2.935071707953064, - "grad_norm": 1.461938738822937, - "learning_rate": 9.484723618090453e-05, - "loss": 5.7064, - "step": 5628 - }, - { - "epoch": 2.935593220338983, - "grad_norm": 1.3551006317138672, - "learning_rate": 9.484623115577889e-05, - "loss": 6.125, - "step": 5629 - }, - { - "epoch": 2.936114732724902, - "grad_norm": 1.4544097185134888, - "learning_rate": 9.484522613065327e-05, - "loss": 5.6953, - "step": 5630 - }, - { - "epoch": 2.9366362451108214, - "grad_norm": 1.7247930765151978, - "learning_rate": 9.484422110552765e-05, - "loss": 5.3649, - "step": 5631 - }, - { - "epoch": 2.9371577574967405, - "grad_norm": 1.7665141820907593, - "learning_rate": 9.484321608040202e-05, - "loss": 5.5419, - "step": 5632 - }, - { - "epoch": 2.9376792698826595, - "grad_norm": 1.472756266593933, - "learning_rate": 9.484221105527639e-05, - "loss": 5.3947, - "step": 5633 - }, - { - "epoch": 2.938200782268579, - "grad_norm": 1.4072251319885254, - "learning_rate": 9.484120603015075e-05, - "loss": 5.791, - "step": 5634 - }, - { - "epoch": 2.938722294654498, - "grad_norm": 1.3676085472106934, - "learning_rate": 9.484020100502513e-05, - "loss": 5.3628, - "step": 5635 - }, - { - "epoch": 2.939243807040417, - "grad_norm": 1.455832839012146, - "learning_rate": 9.48391959798995e-05, - "loss": 6.0159, - "step": 5636 - }, - { - "epoch": 2.9397653194263365, - "grad_norm": 1.473459243774414, - "learning_rate": 9.483819095477387e-05, - "loss": 5.7373, - "step": 5637 - }, - { - "epoch": 2.9402868318122555, - "grad_norm": 1.543699026107788, - "learning_rate": 9.483718592964824e-05, - "loss": 5.8528, - "step": 5638 - }, - { - "epoch": 2.9408083441981745, - "grad_norm": 1.4179481267929077, - "learning_rate": 9.483618090452262e-05, - "loss": 6.2253, - "step": 5639 - }, - { - "epoch": 2.941329856584094, - "grad_norm": 1.4675509929656982, - "learning_rate": 9.4835175879397e-05, - "loss": 5.6815, - "step": 5640 - }, - { - "epoch": 2.941851368970013, - "grad_norm": 1.6443326473236084, - "learning_rate": 9.483417085427137e-05, - "loss": 5.5112, - "step": 5641 - }, - { - "epoch": 2.942372881355932, - "grad_norm": 1.6505610942840576, - "learning_rate": 9.483316582914574e-05, - "loss": 5.8538, - "step": 5642 - }, - { - "epoch": 2.9428943937418515, - "grad_norm": 1.365644097328186, - "learning_rate": 9.483216080402011e-05, - "loss": 5.9482, - "step": 5643 - }, - { - "epoch": 2.9434159061277705, - "grad_norm": 1.3721506595611572, - "learning_rate": 9.483115577889448e-05, - "loss": 5.7317, - "step": 5644 - }, - { - "epoch": 2.9439374185136895, - "grad_norm": 1.5284404754638672, - "learning_rate": 9.483015075376886e-05, - "loss": 5.5124, - "step": 5645 - }, - { - "epoch": 2.944458930899609, - "grad_norm": 1.693989634513855, - "learning_rate": 9.482914572864322e-05, - "loss": 5.5377, - "step": 5646 - }, - { - "epoch": 2.944980443285528, - "grad_norm": 1.6544488668441772, - "learning_rate": 9.482814070351758e-05, - "loss": 5.9952, - "step": 5647 - }, - { - "epoch": 2.945501955671447, - "grad_norm": 1.486353874206543, - "learning_rate": 9.482713567839196e-05, - "loss": 6.2196, - "step": 5648 - }, - { - "epoch": 2.9460234680573665, - "grad_norm": 1.6713837385177612, - "learning_rate": 9.482613065326633e-05, - "loss": 5.5003, - "step": 5649 - }, - { - "epoch": 2.9465449804432855, - "grad_norm": 1.5086448192596436, - "learning_rate": 9.48251256281407e-05, - "loss": 5.8956, - "step": 5650 - }, - { - "epoch": 2.9470664928292045, - "grad_norm": 1.4716354608535767, - "learning_rate": 9.482412060301508e-05, - "loss": 5.8852, - "step": 5651 - }, - { - "epoch": 2.947588005215124, - "grad_norm": 1.5243581533432007, - "learning_rate": 9.482311557788946e-05, - "loss": 5.7754, - "step": 5652 - }, - { - "epoch": 2.948109517601043, - "grad_norm": 1.5727814435958862, - "learning_rate": 9.482211055276382e-05, - "loss": 5.9325, - "step": 5653 - }, - { - "epoch": 2.948631029986962, - "grad_norm": 1.5930489301681519, - "learning_rate": 9.48211055276382e-05, - "loss": 5.5824, - "step": 5654 - }, - { - "epoch": 2.9491525423728815, - "grad_norm": 1.499829888343811, - "learning_rate": 9.482010050251257e-05, - "loss": 5.6296, - "step": 5655 - }, - { - "epoch": 2.9496740547588005, - "grad_norm": 1.6006007194519043, - "learning_rate": 9.481909547738694e-05, - "loss": 5.3989, - "step": 5656 - }, - { - "epoch": 2.9501955671447195, - "grad_norm": 1.5014506578445435, - "learning_rate": 9.481809045226131e-05, - "loss": 5.7784, - "step": 5657 - }, - { - "epoch": 2.950717079530639, - "grad_norm": 1.5451650619506836, - "learning_rate": 9.481708542713569e-05, - "loss": 5.7072, - "step": 5658 - }, - { - "epoch": 2.951238591916558, - "grad_norm": 1.3398109674453735, - "learning_rate": 9.481608040201005e-05, - "loss": 5.937, - "step": 5659 - }, - { - "epoch": 2.951760104302477, - "grad_norm": 1.3988779783248901, - "learning_rate": 9.481507537688443e-05, - "loss": 5.9181, - "step": 5660 - }, - { - "epoch": 2.9522816166883965, - "grad_norm": 1.4654664993286133, - "learning_rate": 9.48140703517588e-05, - "loss": 6.2438, - "step": 5661 - }, - { - "epoch": 2.9528031290743155, - "grad_norm": 1.4928780794143677, - "learning_rate": 9.481306532663317e-05, - "loss": 5.0629, - "step": 5662 - }, - { - "epoch": 2.9533246414602345, - "grad_norm": 1.5862302780151367, - "learning_rate": 9.481206030150755e-05, - "loss": 5.6133, - "step": 5663 - }, - { - "epoch": 2.953846153846154, - "grad_norm": 1.4297378063201904, - "learning_rate": 9.481105527638191e-05, - "loss": 5.9868, - "step": 5664 - }, - { - "epoch": 2.954367666232073, - "grad_norm": 1.5494303703308105, - "learning_rate": 9.481005025125629e-05, - "loss": 5.7933, - "step": 5665 - }, - { - "epoch": 2.954889178617992, - "grad_norm": 1.5773588418960571, - "learning_rate": 9.480904522613065e-05, - "loss": 5.092, - "step": 5666 - }, - { - "epoch": 2.9554106910039115, - "grad_norm": 1.5671412944793701, - "learning_rate": 9.480804020100503e-05, - "loss": 5.5462, - "step": 5667 - }, - { - "epoch": 2.9559322033898305, - "grad_norm": 1.572758674621582, - "learning_rate": 9.48070351758794e-05, - "loss": 6.1114, - "step": 5668 - }, - { - "epoch": 2.9564537157757496, - "grad_norm": 1.4552154541015625, - "learning_rate": 9.480603015075377e-05, - "loss": 5.6481, - "step": 5669 - }, - { - "epoch": 2.956975228161669, - "grad_norm": 1.5403410196304321, - "learning_rate": 9.480502512562814e-05, - "loss": 5.3032, - "step": 5670 - }, - { - "epoch": 2.957496740547588, - "grad_norm": 1.6212360858917236, - "learning_rate": 9.480402010050252e-05, - "loss": 5.7797, - "step": 5671 - }, - { - "epoch": 2.958018252933507, - "grad_norm": 1.4734113216400146, - "learning_rate": 9.48030150753769e-05, - "loss": 5.4639, - "step": 5672 - }, - { - "epoch": 2.9585397653194265, - "grad_norm": 1.2675044536590576, - "learning_rate": 9.480201005025126e-05, - "loss": 6.1917, - "step": 5673 - }, - { - "epoch": 2.9590612777053455, - "grad_norm": 1.4518061876296997, - "learning_rate": 9.480100502512564e-05, - "loss": 5.8391, - "step": 5674 - }, - { - "epoch": 2.9595827900912646, - "grad_norm": 1.5727113485336304, - "learning_rate": 9.48e-05, - "loss": 5.9255, - "step": 5675 - }, - { - "epoch": 2.960104302477184, - "grad_norm": 1.3488913774490356, - "learning_rate": 9.479899497487438e-05, - "loss": 5.9383, - "step": 5676 - }, - { - "epoch": 2.960625814863103, - "grad_norm": 1.4377706050872803, - "learning_rate": 9.479798994974874e-05, - "loss": 5.9509, - "step": 5677 - }, - { - "epoch": 2.961147327249022, - "grad_norm": 1.599881649017334, - "learning_rate": 9.479698492462312e-05, - "loss": 5.8221, - "step": 5678 - }, - { - "epoch": 2.9616688396349415, - "grad_norm": 1.5068464279174805, - "learning_rate": 9.479597989949748e-05, - "loss": 5.9585, - "step": 5679 - }, - { - "epoch": 2.9621903520208606, - "grad_norm": 1.545324444770813, - "learning_rate": 9.479497487437186e-05, - "loss": 5.7056, - "step": 5680 - }, - { - "epoch": 2.9627118644067796, - "grad_norm": 1.5406014919281006, - "learning_rate": 9.479396984924624e-05, - "loss": 5.1505, - "step": 5681 - }, - { - "epoch": 2.963233376792699, - "grad_norm": 1.4956676959991455, - "learning_rate": 9.479296482412062e-05, - "loss": 5.7456, - "step": 5682 - }, - { - "epoch": 2.963754889178618, - "grad_norm": 1.6768978834152222, - "learning_rate": 9.479195979899498e-05, - "loss": 5.4777, - "step": 5683 - }, - { - "epoch": 2.964276401564537, - "grad_norm": 1.5714713335037231, - "learning_rate": 9.479095477386936e-05, - "loss": 5.7941, - "step": 5684 - }, - { - "epoch": 2.9647979139504566, - "grad_norm": 1.4647243022918701, - "learning_rate": 9.478994974874372e-05, - "loss": 5.8484, - "step": 5685 - }, - { - "epoch": 2.9653194263363756, - "grad_norm": 1.5708014965057373, - "learning_rate": 9.47889447236181e-05, - "loss": 5.4629, - "step": 5686 - }, - { - "epoch": 2.9658409387222946, - "grad_norm": 1.4093737602233887, - "learning_rate": 9.478793969849247e-05, - "loss": 6.0379, - "step": 5687 - }, - { - "epoch": 2.966362451108214, - "grad_norm": 1.462388038635254, - "learning_rate": 9.478693467336683e-05, - "loss": 5.891, - "step": 5688 - }, - { - "epoch": 2.966883963494133, - "grad_norm": 1.395670771598816, - "learning_rate": 9.478592964824121e-05, - "loss": 5.9153, - "step": 5689 - }, - { - "epoch": 2.967405475880052, - "grad_norm": 1.5872045755386353, - "learning_rate": 9.478492462311557e-05, - "loss": 5.8976, - "step": 5690 - }, - { - "epoch": 2.9679269882659716, - "grad_norm": 1.495665192604065, - "learning_rate": 9.478391959798995e-05, - "loss": 5.6124, - "step": 5691 - }, - { - "epoch": 2.9684485006518906, - "grad_norm": 1.4160020351409912, - "learning_rate": 9.478291457286433e-05, - "loss": 6.3358, - "step": 5692 - }, - { - "epoch": 2.9689700130378096, - "grad_norm": 1.5315049886703491, - "learning_rate": 9.47819095477387e-05, - "loss": 5.8767, - "step": 5693 - }, - { - "epoch": 2.969491525423729, - "grad_norm": 1.5887882709503174, - "learning_rate": 9.478090452261307e-05, - "loss": 5.7797, - "step": 5694 - }, - { - "epoch": 2.970013037809648, - "grad_norm": 1.4905885457992554, - "learning_rate": 9.477989949748745e-05, - "loss": 5.9486, - "step": 5695 - }, - { - "epoch": 2.970534550195567, - "grad_norm": 1.5904028415679932, - "learning_rate": 9.477889447236181e-05, - "loss": 5.572, - "step": 5696 - }, - { - "epoch": 2.9710560625814866, - "grad_norm": 1.4948248863220215, - "learning_rate": 9.477788944723619e-05, - "loss": 5.6951, - "step": 5697 - }, - { - "epoch": 2.9715775749674056, - "grad_norm": 1.6443378925323486, - "learning_rate": 9.477688442211055e-05, - "loss": 5.5549, - "step": 5698 - }, - { - "epoch": 2.9720990873533246, - "grad_norm": 1.3996539115905762, - "learning_rate": 9.477587939698493e-05, - "loss": 5.9622, - "step": 5699 - }, - { - "epoch": 2.9726205997392436, - "grad_norm": 1.5761632919311523, - "learning_rate": 9.47748743718593e-05, - "loss": 5.3111, - "step": 5700 - }, - { - "epoch": 2.973142112125163, - "grad_norm": 1.4086540937423706, - "learning_rate": 9.477386934673366e-05, - "loss": 6.1915, - "step": 5701 - }, - { - "epoch": 2.973663624511082, - "grad_norm": 1.6071149110794067, - "learning_rate": 9.477286432160804e-05, - "loss": 5.6636, - "step": 5702 - }, - { - "epoch": 2.974185136897001, - "grad_norm": 1.5928254127502441, - "learning_rate": 9.477185929648242e-05, - "loss": 5.4637, - "step": 5703 - }, - { - "epoch": 2.9747066492829206, - "grad_norm": 1.5876655578613281, - "learning_rate": 9.47708542713568e-05, - "loss": 5.4165, - "step": 5704 - }, - { - "epoch": 2.9752281616688396, - "grad_norm": 1.4683613777160645, - "learning_rate": 9.476984924623116e-05, - "loss": 5.7055, - "step": 5705 - }, - { - "epoch": 2.9757496740547587, - "grad_norm": 1.523417353630066, - "learning_rate": 9.476884422110554e-05, - "loss": 5.5511, - "step": 5706 - }, - { - "epoch": 2.976271186440678, - "grad_norm": 1.6262173652648926, - "learning_rate": 9.47678391959799e-05, - "loss": 5.789, - "step": 5707 - }, - { - "epoch": 2.976792698826597, - "grad_norm": 1.5964269638061523, - "learning_rate": 9.476683417085428e-05, - "loss": 5.5677, - "step": 5708 - }, - { - "epoch": 2.977314211212516, - "grad_norm": 1.3546854257583618, - "learning_rate": 9.476582914572864e-05, - "loss": 5.9315, - "step": 5709 - }, - { - "epoch": 2.9778357235984356, - "grad_norm": 1.3683171272277832, - "learning_rate": 9.476482412060302e-05, - "loss": 5.9894, - "step": 5710 - }, - { - "epoch": 2.9783572359843546, - "grad_norm": 1.3763943910598755, - "learning_rate": 9.476381909547739e-05, - "loss": 6.0469, - "step": 5711 - }, - { - "epoch": 2.9788787483702737, - "grad_norm": 1.3965799808502197, - "learning_rate": 9.476281407035176e-05, - "loss": 6.0733, - "step": 5712 - }, - { - "epoch": 2.9794002607561927, - "grad_norm": 1.6557425260543823, - "learning_rate": 9.476180904522614e-05, - "loss": 5.1168, - "step": 5713 - }, - { - "epoch": 2.979921773142112, - "grad_norm": 1.378940224647522, - "learning_rate": 9.47608040201005e-05, - "loss": 5.8397, - "step": 5714 - }, - { - "epoch": 2.980443285528031, - "grad_norm": 1.3388956785202026, - "learning_rate": 9.475979899497488e-05, - "loss": 5.8605, - "step": 5715 - }, - { - "epoch": 2.98096479791395, - "grad_norm": 1.4287981986999512, - "learning_rate": 9.475879396984925e-05, - "loss": 6.075, - "step": 5716 - }, - { - "epoch": 2.9814863102998697, - "grad_norm": 1.383670449256897, - "learning_rate": 9.475778894472363e-05, - "loss": 5.9189, - "step": 5717 - }, - { - "epoch": 2.9820078226857887, - "grad_norm": 1.4722269773483276, - "learning_rate": 9.475678391959799e-05, - "loss": 5.6182, - "step": 5718 - }, - { - "epoch": 2.9825293350717077, - "grad_norm": 1.6591142416000366, - "learning_rate": 9.475577889447237e-05, - "loss": 5.6252, - "step": 5719 - }, - { - "epoch": 2.983050847457627, - "grad_norm": 1.6826410293579102, - "learning_rate": 9.475477386934673e-05, - "loss": 5.4618, - "step": 5720 - }, - { - "epoch": 2.983572359843546, - "grad_norm": 1.5898313522338867, - "learning_rate": 9.475376884422111e-05, - "loss": 5.5191, - "step": 5721 - }, - { - "epoch": 2.984093872229465, - "grad_norm": 1.4392000436782837, - "learning_rate": 9.475276381909547e-05, - "loss": 6.0886, - "step": 5722 - }, - { - "epoch": 2.9846153846153847, - "grad_norm": 1.5297683477401733, - "learning_rate": 9.475175879396985e-05, - "loss": 4.875, - "step": 5723 - }, - { - "epoch": 2.9851368970013037, - "grad_norm": 2.012697696685791, - "learning_rate": 9.475075376884423e-05, - "loss": 5.5471, - "step": 5724 - }, - { - "epoch": 2.9856584093872227, - "grad_norm": 1.5634838342666626, - "learning_rate": 9.474974874371861e-05, - "loss": 5.6011, - "step": 5725 - }, - { - "epoch": 2.986179921773142, - "grad_norm": 2.0749900341033936, - "learning_rate": 9.474874371859297e-05, - "loss": 4.8957, - "step": 5726 - }, - { - "epoch": 2.986701434159061, - "grad_norm": 1.7019398212432861, - "learning_rate": 9.474773869346734e-05, - "loss": 5.7448, - "step": 5727 - }, - { - "epoch": 2.98722294654498, - "grad_norm": 1.5106923580169678, - "learning_rate": 9.474673366834171e-05, - "loss": 5.8263, - "step": 5728 - }, - { - "epoch": 2.9877444589308997, - "grad_norm": 1.9888776540756226, - "learning_rate": 9.474572864321608e-05, - "loss": 5.0353, - "step": 5729 - }, - { - "epoch": 2.9882659713168187, - "grad_norm": 1.6606978178024292, - "learning_rate": 9.474472361809046e-05, - "loss": 5.5123, - "step": 5730 - }, - { - "epoch": 2.9887874837027377, - "grad_norm": 1.3773466348648071, - "learning_rate": 9.474371859296482e-05, - "loss": 5.9856, - "step": 5731 - }, - { - "epoch": 2.989308996088657, - "grad_norm": 1.4099878072738647, - "learning_rate": 9.47427135678392e-05, - "loss": 6.1027, - "step": 5732 - }, - { - "epoch": 2.989830508474576, - "grad_norm": 1.4960269927978516, - "learning_rate": 9.474170854271358e-05, - "loss": 5.488, - "step": 5733 - }, - { - "epoch": 2.9903520208604952, - "grad_norm": 1.3266113996505737, - "learning_rate": 9.474070351758795e-05, - "loss": 5.6943, - "step": 5734 - }, - { - "epoch": 2.9908735332464147, - "grad_norm": 1.333306908607483, - "learning_rate": 9.473969849246232e-05, - "loss": 5.8785, - "step": 5735 - }, - { - "epoch": 2.9913950456323337, - "grad_norm": 1.577088475227356, - "learning_rate": 9.47386934673367e-05, - "loss": 5.628, - "step": 5736 - }, - { - "epoch": 2.9919165580182527, - "grad_norm": 1.6364824771881104, - "learning_rate": 9.473768844221106e-05, - "loss": 5.3675, - "step": 5737 - }, - { - "epoch": 2.992438070404172, - "grad_norm": 1.5676016807556152, - "learning_rate": 9.473668341708544e-05, - "loss": 5.5322, - "step": 5738 - }, - { - "epoch": 2.992959582790091, - "grad_norm": 1.3868201971054077, - "learning_rate": 9.47356783919598e-05, - "loss": 5.7354, - "step": 5739 - }, - { - "epoch": 2.9934810951760102, - "grad_norm": 1.366782307624817, - "learning_rate": 9.473467336683417e-05, - "loss": 6.0451, - "step": 5740 - }, - { - "epoch": 2.9940026075619297, - "grad_norm": 1.390046238899231, - "learning_rate": 9.473366834170854e-05, - "loss": 6.0958, - "step": 5741 - }, - { - "epoch": 2.9945241199478487, - "grad_norm": 1.3986254930496216, - "learning_rate": 9.473266331658291e-05, - "loss": 6.0261, - "step": 5742 - }, - { - "epoch": 2.9950456323337677, - "grad_norm": 1.651505947113037, - "learning_rate": 9.473165829145729e-05, - "loss": 5.7223, - "step": 5743 - }, - { - "epoch": 2.995567144719687, - "grad_norm": 1.5541599988937378, - "learning_rate": 9.473065326633166e-05, - "loss": 5.4382, - "step": 5744 - }, - { - "epoch": 2.9960886571056062, - "grad_norm": 1.4342402219772339, - "learning_rate": 9.472964824120604e-05, - "loss": 6.1019, - "step": 5745 - }, - { - "epoch": 2.9966101694915253, - "grad_norm": 1.5408830642700195, - "learning_rate": 9.47286432160804e-05, - "loss": 5.6497, - "step": 5746 - }, - { - "epoch": 2.9971316818774447, - "grad_norm": 1.456231713294983, - "learning_rate": 9.472763819095478e-05, - "loss": 6.179, - "step": 5747 - }, - { - "epoch": 2.9976531942633637, - "grad_norm": 1.5744142532348633, - "learning_rate": 9.472663316582915e-05, - "loss": 5.7413, - "step": 5748 - }, - { - "epoch": 2.9981747066492828, - "grad_norm": 1.5043144226074219, - "learning_rate": 9.472562814070353e-05, - "loss": 5.3917, - "step": 5749 - }, - { - "epoch": 2.9986962190352022, - "grad_norm": 1.4089245796203613, - "learning_rate": 9.472462311557789e-05, - "loss": 6.1994, - "step": 5750 - }, - { - "epoch": 2.9992177314211212, - "grad_norm": 1.372663140296936, - "learning_rate": 9.472361809045227e-05, - "loss": 5.8687, - "step": 5751 - }, - { - "epoch": 2.9997392438070403, - "grad_norm": 1.5765117406845093, - "learning_rate": 9.472261306532663e-05, - "loss": 5.8483, - "step": 5752 - }, - { - "epoch": 3.0002607561929597, - "grad_norm": 1.4377204179763794, - "learning_rate": 9.472160804020101e-05, - "loss": 5.9295, - "step": 5753 - }, - { - "epoch": 3.0007822685788788, - "grad_norm": 1.3878682851791382, - "learning_rate": 9.472060301507539e-05, - "loss": 6.1257, - "step": 5754 - }, - { - "epoch": 3.0013037809647978, - "grad_norm": 1.4637115001678467, - "learning_rate": 9.471959798994975e-05, - "loss": 6.0069, - "step": 5755 - }, - { - "epoch": 3.0018252933507172, - "grad_norm": 1.5836331844329834, - "learning_rate": 9.471859296482413e-05, - "loss": 5.7355, - "step": 5756 - }, - { - "epoch": 3.0023468057366363, - "grad_norm": 1.6220051050186157, - "learning_rate": 9.47175879396985e-05, - "loss": 5.6817, - "step": 5757 - }, - { - "epoch": 3.0028683181225553, - "grad_norm": 1.4258496761322021, - "learning_rate": 9.471658291457287e-05, - "loss": 6.0942, - "step": 5758 - }, - { - "epoch": 3.0033898305084747, - "grad_norm": 1.5069963932037354, - "learning_rate": 9.471557788944724e-05, - "loss": 5.5966, - "step": 5759 - }, - { - "epoch": 3.0039113428943938, - "grad_norm": 1.6024360656738281, - "learning_rate": 9.471457286432161e-05, - "loss": 5.8251, - "step": 5760 - }, - { - "epoch": 3.004432855280313, - "grad_norm": 1.5853402614593506, - "learning_rate": 9.471356783919598e-05, - "loss": 5.7461, - "step": 5761 - }, - { - "epoch": 3.0049543676662323, - "grad_norm": 1.4520171880722046, - "learning_rate": 9.471256281407036e-05, - "loss": 6.1088, - "step": 5762 - }, - { - "epoch": 3.0054758800521513, - "grad_norm": 1.450812816619873, - "learning_rate": 9.471155778894472e-05, - "loss": 5.6208, - "step": 5763 - }, - { - "epoch": 3.0059973924380703, - "grad_norm": 1.7311595678329468, - "learning_rate": 9.47105527638191e-05, - "loss": 5.4351, - "step": 5764 - }, - { - "epoch": 3.0065189048239898, - "grad_norm": 1.3856539726257324, - "learning_rate": 9.470954773869348e-05, - "loss": 5.4222, - "step": 5765 - }, - { - "epoch": 3.007040417209909, - "grad_norm": 1.4523017406463623, - "learning_rate": 9.470854271356784e-05, - "loss": 5.6188, - "step": 5766 - }, - { - "epoch": 3.007561929595828, - "grad_norm": 1.5164885520935059, - "learning_rate": 9.470753768844222e-05, - "loss": 5.792, - "step": 5767 - }, - { - "epoch": 3.0080834419817473, - "grad_norm": 1.5523091554641724, - "learning_rate": 9.470653266331658e-05, - "loss": 5.5296, - "step": 5768 - }, - { - "epoch": 3.0086049543676663, - "grad_norm": 1.5385515689849854, - "learning_rate": 9.470552763819096e-05, - "loss": 5.3755, - "step": 5769 - }, - { - "epoch": 3.0091264667535853, - "grad_norm": 1.4706642627716064, - "learning_rate": 9.470452261306532e-05, - "loss": 5.9415, - "step": 5770 - }, - { - "epoch": 3.0096479791395048, - "grad_norm": 1.4606150388717651, - "learning_rate": 9.47035175879397e-05, - "loss": 5.9363, - "step": 5771 - }, - { - "epoch": 3.010169491525424, - "grad_norm": 1.360920786857605, - "learning_rate": 9.470251256281407e-05, - "loss": 5.6328, - "step": 5772 - }, - { - "epoch": 3.010691003911343, - "grad_norm": 1.5204272270202637, - "learning_rate": 9.470150753768844e-05, - "loss": 5.9155, - "step": 5773 - }, - { - "epoch": 3.0112125162972623, - "grad_norm": 1.4005473852157593, - "learning_rate": 9.470050251256282e-05, - "loss": 5.949, - "step": 5774 - }, - { - "epoch": 3.0117340286831813, - "grad_norm": 1.3721369504928589, - "learning_rate": 9.46994974874372e-05, - "loss": 6.1523, - "step": 5775 - }, - { - "epoch": 3.0122555410691003, - "grad_norm": 1.598720908164978, - "learning_rate": 9.469849246231156e-05, - "loss": 5.3843, - "step": 5776 - }, - { - "epoch": 3.0127770534550193, - "grad_norm": 1.5146353244781494, - "learning_rate": 9.469748743718594e-05, - "loss": 5.4694, - "step": 5777 - }, - { - "epoch": 3.013298565840939, - "grad_norm": 1.5398303270339966, - "learning_rate": 9.469648241206031e-05, - "loss": 5.889, - "step": 5778 - }, - { - "epoch": 3.013820078226858, - "grad_norm": 1.4430100917816162, - "learning_rate": 9.469547738693468e-05, - "loss": 5.8776, - "step": 5779 - }, - { - "epoch": 3.014341590612777, - "grad_norm": 1.5189950466156006, - "learning_rate": 9.469447236180905e-05, - "loss": 6.0229, - "step": 5780 - }, - { - "epoch": 3.0148631029986963, - "grad_norm": 1.3621580600738525, - "learning_rate": 9.469346733668341e-05, - "loss": 5.2735, - "step": 5781 - }, - { - "epoch": 3.0153846153846153, - "grad_norm": 1.4718878269195557, - "learning_rate": 9.469246231155779e-05, - "loss": 5.9845, - "step": 5782 - }, - { - "epoch": 3.0159061277705344, - "grad_norm": 1.358791708946228, - "learning_rate": 9.469145728643216e-05, - "loss": 5.2418, - "step": 5783 - }, - { - "epoch": 3.016427640156454, - "grad_norm": 1.4125661849975586, - "learning_rate": 9.469045226130653e-05, - "loss": 5.902, - "step": 5784 - }, - { - "epoch": 3.016949152542373, - "grad_norm": 1.5206825733184814, - "learning_rate": 9.468944723618091e-05, - "loss": 5.7103, - "step": 5785 - }, - { - "epoch": 3.017470664928292, - "grad_norm": 1.505163311958313, - "learning_rate": 9.468844221105529e-05, - "loss": 5.9654, - "step": 5786 - }, - { - "epoch": 3.0179921773142113, - "grad_norm": 1.4645029306411743, - "learning_rate": 9.468743718592965e-05, - "loss": 5.945, - "step": 5787 - }, - { - "epoch": 3.0185136897001303, - "grad_norm": 1.6592308282852173, - "learning_rate": 9.468643216080403e-05, - "loss": 5.6577, - "step": 5788 - }, - { - "epoch": 3.0190352020860494, - "grad_norm": 1.4215037822723389, - "learning_rate": 9.46854271356784e-05, - "loss": 6.1287, - "step": 5789 - }, - { - "epoch": 3.019556714471969, - "grad_norm": 1.3522984981536865, - "learning_rate": 9.468442211055277e-05, - "loss": 5.6766, - "step": 5790 - }, - { - "epoch": 3.020078226857888, - "grad_norm": 1.4199585914611816, - "learning_rate": 9.468341708542714e-05, - "loss": 5.8297, - "step": 5791 - }, - { - "epoch": 3.020599739243807, - "grad_norm": 1.475765585899353, - "learning_rate": 9.468241206030152e-05, - "loss": 5.9101, - "step": 5792 - }, - { - "epoch": 3.0211212516297263, - "grad_norm": 1.586753010749817, - "learning_rate": 9.468140703517588e-05, - "loss": 5.7375, - "step": 5793 - }, - { - "epoch": 3.0216427640156454, - "grad_norm": 1.5540598630905151, - "learning_rate": 9.468040201005026e-05, - "loss": 5.4643, - "step": 5794 - }, - { - "epoch": 3.0221642764015644, - "grad_norm": 1.4363815784454346, - "learning_rate": 9.467939698492463e-05, - "loss": 5.5748, - "step": 5795 - }, - { - "epoch": 3.022685788787484, - "grad_norm": 1.395122766494751, - "learning_rate": 9.4678391959799e-05, - "loss": 5.4364, - "step": 5796 - }, - { - "epoch": 3.023207301173403, - "grad_norm": 1.4867584705352783, - "learning_rate": 9.467738693467338e-05, - "loss": 6.1721, - "step": 5797 - }, - { - "epoch": 3.023728813559322, - "grad_norm": 1.4129140377044678, - "learning_rate": 9.467638190954774e-05, - "loss": 5.6208, - "step": 5798 - }, - { - "epoch": 3.0242503259452413, - "grad_norm": 1.4504239559173584, - "learning_rate": 9.467537688442212e-05, - "loss": 5.6813, - "step": 5799 - }, - { - "epoch": 3.0247718383311604, - "grad_norm": 1.2899487018585205, - "learning_rate": 9.467437185929648e-05, - "loss": 6.0924, - "step": 5800 - }, - { - "epoch": 3.0252933507170794, - "grad_norm": 1.5992485284805298, - "learning_rate": 9.467336683417086e-05, - "loss": 5.3625, - "step": 5801 - }, - { - "epoch": 3.025814863102999, - "grad_norm": 1.5796823501586914, - "learning_rate": 9.467236180904523e-05, - "loss": 5.3334, - "step": 5802 - }, - { - "epoch": 3.026336375488918, - "grad_norm": 1.6074366569519043, - "learning_rate": 9.46713567839196e-05, - "loss": 5.4262, - "step": 5803 - }, - { - "epoch": 3.026857887874837, - "grad_norm": 1.643280029296875, - "learning_rate": 9.467035175879397e-05, - "loss": 5.3232, - "step": 5804 - }, - { - "epoch": 3.0273794002607564, - "grad_norm": 1.5248814821243286, - "learning_rate": 9.466934673366835e-05, - "loss": 6.1436, - "step": 5805 - }, - { - "epoch": 3.0279009126466754, - "grad_norm": 1.3710942268371582, - "learning_rate": 9.466834170854272e-05, - "loss": 6.0537, - "step": 5806 - }, - { - "epoch": 3.0284224250325944, - "grad_norm": 1.504469394683838, - "learning_rate": 9.466733668341709e-05, - "loss": 5.7787, - "step": 5807 - }, - { - "epoch": 3.028943937418514, - "grad_norm": 1.4599465131759644, - "learning_rate": 9.466633165829147e-05, - "loss": 5.7793, - "step": 5808 - }, - { - "epoch": 3.029465449804433, - "grad_norm": 1.4530867338180542, - "learning_rate": 9.466532663316583e-05, - "loss": 5.881, - "step": 5809 - }, - { - "epoch": 3.029986962190352, - "grad_norm": 1.5725669860839844, - "learning_rate": 9.466432160804021e-05, - "loss": 5.5362, - "step": 5810 - }, - { - "epoch": 3.0305084745762714, - "grad_norm": 1.4958685636520386, - "learning_rate": 9.466331658291457e-05, - "loss": 5.4548, - "step": 5811 - }, - { - "epoch": 3.0310299869621904, - "grad_norm": 1.6312061548233032, - "learning_rate": 9.466231155778895e-05, - "loss": 5.6883, - "step": 5812 - }, - { - "epoch": 3.0315514993481094, - "grad_norm": 1.619693398475647, - "learning_rate": 9.466130653266331e-05, - "loss": 5.4717, - "step": 5813 - }, - { - "epoch": 3.032073011734029, - "grad_norm": 1.4062529802322388, - "learning_rate": 9.466030150753769e-05, - "loss": 5.8903, - "step": 5814 - }, - { - "epoch": 3.032594524119948, - "grad_norm": 1.5683705806732178, - "learning_rate": 9.465929648241207e-05, - "loss": 5.7117, - "step": 5815 - }, - { - "epoch": 3.033116036505867, - "grad_norm": 1.5052239894866943, - "learning_rate": 9.465829145728645e-05, - "loss": 5.6858, - "step": 5816 - }, - { - "epoch": 3.0336375488917864, - "grad_norm": 1.899996042251587, - "learning_rate": 9.465728643216081e-05, - "loss": 4.9304, - "step": 5817 - }, - { - "epoch": 3.0341590612777054, - "grad_norm": 1.5951212644577026, - "learning_rate": 9.465628140703519e-05, - "loss": 5.7052, - "step": 5818 - }, - { - "epoch": 3.0346805736636244, - "grad_norm": 1.5659523010253906, - "learning_rate": 9.465527638190955e-05, - "loss": 5.6977, - "step": 5819 - }, - { - "epoch": 3.035202086049544, - "grad_norm": 1.3789373636245728, - "learning_rate": 9.465427135678392e-05, - "loss": 6.1535, - "step": 5820 - }, - { - "epoch": 3.035723598435463, - "grad_norm": 1.4808989763259888, - "learning_rate": 9.46532663316583e-05, - "loss": 5.886, - "step": 5821 - }, - { - "epoch": 3.036245110821382, - "grad_norm": 1.3488909006118774, - "learning_rate": 9.465226130653266e-05, - "loss": 6.0475, - "step": 5822 - }, - { - "epoch": 3.036766623207301, - "grad_norm": 1.7176576852798462, - "learning_rate": 9.465125628140704e-05, - "loss": 5.4142, - "step": 5823 - }, - { - "epoch": 3.0372881355932204, - "grad_norm": 1.5322469472885132, - "learning_rate": 9.46502512562814e-05, - "loss": 5.9665, - "step": 5824 - }, - { - "epoch": 3.0378096479791394, - "grad_norm": 1.3634486198425293, - "learning_rate": 9.464924623115578e-05, - "loss": 5.8839, - "step": 5825 - }, - { - "epoch": 3.0383311603650585, - "grad_norm": 1.4671117067337036, - "learning_rate": 9.464824120603016e-05, - "loss": 5.7509, - "step": 5826 - }, - { - "epoch": 3.038852672750978, - "grad_norm": 1.3588237762451172, - "learning_rate": 9.464723618090454e-05, - "loss": 6.0299, - "step": 5827 - }, - { - "epoch": 3.039374185136897, - "grad_norm": 1.5312944650650024, - "learning_rate": 9.46462311557789e-05, - "loss": 5.9171, - "step": 5828 - }, - { - "epoch": 3.039895697522816, - "grad_norm": 1.436220645904541, - "learning_rate": 9.464522613065328e-05, - "loss": 6.023, - "step": 5829 - }, - { - "epoch": 3.0404172099087354, - "grad_norm": 1.474253535270691, - "learning_rate": 9.464422110552764e-05, - "loss": 5.8057, - "step": 5830 - }, - { - "epoch": 3.0409387222946545, - "grad_norm": 1.4670852422714233, - "learning_rate": 9.464321608040202e-05, - "loss": 5.8795, - "step": 5831 - }, - { - "epoch": 3.0414602346805735, - "grad_norm": 1.5747942924499512, - "learning_rate": 9.464221105527638e-05, - "loss": 6.0905, - "step": 5832 - }, - { - "epoch": 3.041981747066493, - "grad_norm": 1.484284520149231, - "learning_rate": 9.464120603015075e-05, - "loss": 5.7966, - "step": 5833 - }, - { - "epoch": 3.042503259452412, - "grad_norm": 1.5133543014526367, - "learning_rate": 9.464020100502513e-05, - "loss": 6.0068, - "step": 5834 - }, - { - "epoch": 3.043024771838331, - "grad_norm": 1.4207285642623901, - "learning_rate": 9.46391959798995e-05, - "loss": 5.8866, - "step": 5835 - }, - { - "epoch": 3.0435462842242504, - "grad_norm": 1.7068456411361694, - "learning_rate": 9.463819095477388e-05, - "loss": 5.5827, - "step": 5836 - }, - { - "epoch": 3.0440677966101695, - "grad_norm": 1.7409515380859375, - "learning_rate": 9.463718592964825e-05, - "loss": 5.5897, - "step": 5837 - }, - { - "epoch": 3.0445893089960885, - "grad_norm": 1.513808012008667, - "learning_rate": 9.463618090452262e-05, - "loss": 4.9028, - "step": 5838 - }, - { - "epoch": 3.045110821382008, - "grad_norm": 1.4568588733673096, - "learning_rate": 9.463517587939699e-05, - "loss": 5.8086, - "step": 5839 - }, - { - "epoch": 3.045632333767927, - "grad_norm": 1.5937316417694092, - "learning_rate": 9.463417085427137e-05, - "loss": 5.6933, - "step": 5840 - }, - { - "epoch": 3.046153846153846, - "grad_norm": 1.5717334747314453, - "learning_rate": 9.463316582914573e-05, - "loss": 5.3998, - "step": 5841 - }, - { - "epoch": 3.0466753585397655, - "grad_norm": 1.5751968622207642, - "learning_rate": 9.463216080402011e-05, - "loss": 5.6339, - "step": 5842 - }, - { - "epoch": 3.0471968709256845, - "grad_norm": 1.5425190925598145, - "learning_rate": 9.463115577889447e-05, - "loss": 6.1554, - "step": 5843 - }, - { - "epoch": 3.0477183833116035, - "grad_norm": 1.519472360610962, - "learning_rate": 9.463015075376885e-05, - "loss": 5.9995, - "step": 5844 - }, - { - "epoch": 3.048239895697523, - "grad_norm": 1.3662327527999878, - "learning_rate": 9.462914572864321e-05, - "loss": 5.3337, - "step": 5845 - }, - { - "epoch": 3.048761408083442, - "grad_norm": 1.5622271299362183, - "learning_rate": 9.462814070351759e-05, - "loss": 5.4109, - "step": 5846 - }, - { - "epoch": 3.049282920469361, - "grad_norm": 1.5849497318267822, - "learning_rate": 9.462713567839197e-05, - "loss": 6.1005, - "step": 5847 - }, - { - "epoch": 3.0498044328552805, - "grad_norm": 1.4504023790359497, - "learning_rate": 9.462613065326633e-05, - "loss": 5.6537, - "step": 5848 - }, - { - "epoch": 3.0503259452411995, - "grad_norm": 1.4177052974700928, - "learning_rate": 9.462512562814071e-05, - "loss": 5.8151, - "step": 5849 - }, - { - "epoch": 3.0508474576271185, - "grad_norm": 1.560486078262329, - "learning_rate": 9.462412060301508e-05, - "loss": 5.9267, - "step": 5850 - }, - { - "epoch": 3.051368970013038, - "grad_norm": 1.4382741451263428, - "learning_rate": 9.462311557788945e-05, - "loss": 5.7341, - "step": 5851 - }, - { - "epoch": 3.051890482398957, - "grad_norm": 1.557877779006958, - "learning_rate": 9.462211055276382e-05, - "loss": 5.5543, - "step": 5852 - }, - { - "epoch": 3.052411994784876, - "grad_norm": 1.4534897804260254, - "learning_rate": 9.46211055276382e-05, - "loss": 5.9318, - "step": 5853 - }, - { - "epoch": 3.0529335071707955, - "grad_norm": 1.612915277481079, - "learning_rate": 9.462010050251256e-05, - "loss": 5.547, - "step": 5854 - }, - { - "epoch": 3.0534550195567145, - "grad_norm": 1.7588146924972534, - "learning_rate": 9.461909547738694e-05, - "loss": 5.4807, - "step": 5855 - }, - { - "epoch": 3.0539765319426335, - "grad_norm": 1.558422565460205, - "learning_rate": 9.461809045226132e-05, - "loss": 5.4864, - "step": 5856 - }, - { - "epoch": 3.054498044328553, - "grad_norm": 1.5462599992752075, - "learning_rate": 9.46170854271357e-05, - "loss": 4.9951, - "step": 5857 - }, - { - "epoch": 3.055019556714472, - "grad_norm": 1.4371531009674072, - "learning_rate": 9.461608040201006e-05, - "loss": 5.763, - "step": 5858 - }, - { - "epoch": 3.055541069100391, - "grad_norm": 1.5271117687225342, - "learning_rate": 9.461507537688442e-05, - "loss": 5.6543, - "step": 5859 - }, - { - "epoch": 3.0560625814863105, - "grad_norm": 1.3888907432556152, - "learning_rate": 9.46140703517588e-05, - "loss": 5.9318, - "step": 5860 - }, - { - "epoch": 3.0565840938722295, - "grad_norm": 1.4006364345550537, - "learning_rate": 9.461306532663317e-05, - "loss": 5.802, - "step": 5861 - }, - { - "epoch": 3.0571056062581485, - "grad_norm": 1.4768800735473633, - "learning_rate": 9.461206030150754e-05, - "loss": 5.6076, - "step": 5862 - }, - { - "epoch": 3.057627118644068, - "grad_norm": 1.4391425848007202, - "learning_rate": 9.461105527638191e-05, - "loss": 5.9272, - "step": 5863 - }, - { - "epoch": 3.058148631029987, - "grad_norm": 1.5824787616729736, - "learning_rate": 9.461005025125628e-05, - "loss": 5.7183, - "step": 5864 - }, - { - "epoch": 3.058670143415906, - "grad_norm": 1.392802357673645, - "learning_rate": 9.460904522613065e-05, - "loss": 5.9929, - "step": 5865 - }, - { - "epoch": 3.0591916558018255, - "grad_norm": 1.3415416479110718, - "learning_rate": 9.460804020100503e-05, - "loss": 6.0666, - "step": 5866 - }, - { - "epoch": 3.0597131681877445, - "grad_norm": 1.5675480365753174, - "learning_rate": 9.46070351758794e-05, - "loss": 5.7511, - "step": 5867 - }, - { - "epoch": 3.0602346805736635, - "grad_norm": 1.5871272087097168, - "learning_rate": 9.460603015075378e-05, - "loss": 5.2832, - "step": 5868 - }, - { - "epoch": 3.060756192959583, - "grad_norm": 1.638749361038208, - "learning_rate": 9.460502512562815e-05, - "loss": 5.6168, - "step": 5869 - }, - { - "epoch": 3.061277705345502, - "grad_norm": 1.512995958328247, - "learning_rate": 9.460402010050252e-05, - "loss": 5.6994, - "step": 5870 - }, - { - "epoch": 3.061799217731421, - "grad_norm": 1.4366130828857422, - "learning_rate": 9.460301507537689e-05, - "loss": 5.8468, - "step": 5871 - }, - { - "epoch": 3.06232073011734, - "grad_norm": 1.6672816276550293, - "learning_rate": 9.460201005025127e-05, - "loss": 5.3897, - "step": 5872 - }, - { - "epoch": 3.0628422425032595, - "grad_norm": 1.3812741041183472, - "learning_rate": 9.460100502512563e-05, - "loss": 6.0461, - "step": 5873 - }, - { - "epoch": 3.0633637548891786, - "grad_norm": 1.6512980461120605, - "learning_rate": 9.46e-05, - "loss": 5.7461, - "step": 5874 - }, - { - "epoch": 3.0638852672750976, - "grad_norm": 1.5828953981399536, - "learning_rate": 9.459899497487437e-05, - "loss": 5.2683, - "step": 5875 - }, - { - "epoch": 3.064406779661017, - "grad_norm": 1.509559154510498, - "learning_rate": 9.459798994974874e-05, - "loss": 5.9163, - "step": 5876 - }, - { - "epoch": 3.064928292046936, - "grad_norm": 2.1858792304992676, - "learning_rate": 9.459698492462312e-05, - "loss": 5.6758, - "step": 5877 - }, - { - "epoch": 3.065449804432855, - "grad_norm": 1.5580583810806274, - "learning_rate": 9.45959798994975e-05, - "loss": 5.9811, - "step": 5878 - }, - { - "epoch": 3.0659713168187746, - "grad_norm": 1.4582642316818237, - "learning_rate": 9.459497487437187e-05, - "loss": 5.7829, - "step": 5879 - }, - { - "epoch": 3.0664928292046936, - "grad_norm": 1.4365782737731934, - "learning_rate": 9.459396984924624e-05, - "loss": 5.6398, - "step": 5880 - }, - { - "epoch": 3.0670143415906126, - "grad_norm": 2.1087427139282227, - "learning_rate": 9.459296482412061e-05, - "loss": 5.741, - "step": 5881 - }, - { - "epoch": 3.067535853976532, - "grad_norm": 1.5821714401245117, - "learning_rate": 9.459195979899498e-05, - "loss": 5.6478, - "step": 5882 - }, - { - "epoch": 3.068057366362451, - "grad_norm": 1.4344381093978882, - "learning_rate": 9.459095477386936e-05, - "loss": 6.1032, - "step": 5883 - }, - { - "epoch": 3.06857887874837, - "grad_norm": 1.5576742887496948, - "learning_rate": 9.458994974874372e-05, - "loss": 5.7646, - "step": 5884 - }, - { - "epoch": 3.0691003911342896, - "grad_norm": 1.5506314039230347, - "learning_rate": 9.45889447236181e-05, - "loss": 5.534, - "step": 5885 - }, - { - "epoch": 3.0696219035202086, - "grad_norm": 1.4927634000778198, - "learning_rate": 9.458793969849246e-05, - "loss": 6.0125, - "step": 5886 - }, - { - "epoch": 3.0701434159061276, - "grad_norm": 1.6473774909973145, - "learning_rate": 9.458693467336684e-05, - "loss": 5.9649, - "step": 5887 - }, - { - "epoch": 3.070664928292047, - "grad_norm": 1.560056209564209, - "learning_rate": 9.458592964824122e-05, - "loss": 5.3517, - "step": 5888 - }, - { - "epoch": 3.071186440677966, - "grad_norm": 1.6311829090118408, - "learning_rate": 9.458492462311558e-05, - "loss": 5.6599, - "step": 5889 - }, - { - "epoch": 3.071707953063885, - "grad_norm": 1.6032822132110596, - "learning_rate": 9.458391959798996e-05, - "loss": 5.4932, - "step": 5890 - }, - { - "epoch": 3.0722294654498046, - "grad_norm": 1.6072977781295776, - "learning_rate": 9.458291457286432e-05, - "loss": 5.2739, - "step": 5891 - }, - { - "epoch": 3.0727509778357236, - "grad_norm": 1.4746280908584595, - "learning_rate": 9.45819095477387e-05, - "loss": 5.7097, - "step": 5892 - }, - { - "epoch": 3.0732724902216426, - "grad_norm": 1.3764489889144897, - "learning_rate": 9.458090452261307e-05, - "loss": 5.8225, - "step": 5893 - }, - { - "epoch": 3.073794002607562, - "grad_norm": 1.7624118328094482, - "learning_rate": 9.457989949748744e-05, - "loss": 5.2923, - "step": 5894 - }, - { - "epoch": 3.074315514993481, - "grad_norm": 1.4760856628417969, - "learning_rate": 9.457889447236181e-05, - "loss": 5.7976, - "step": 5895 - }, - { - "epoch": 3.0748370273794, - "grad_norm": 1.5168676376342773, - "learning_rate": 9.457788944723619e-05, - "loss": 5.4996, - "step": 5896 - }, - { - "epoch": 3.0753585397653196, - "grad_norm": 1.5580554008483887, - "learning_rate": 9.457688442211055e-05, - "loss": 5.7098, - "step": 5897 - }, - { - "epoch": 3.0758800521512386, - "grad_norm": 1.4205609560012817, - "learning_rate": 9.457587939698493e-05, - "loss": 6.0651, - "step": 5898 - }, - { - "epoch": 3.0764015645371576, - "grad_norm": 1.4074150323867798, - "learning_rate": 9.45748743718593e-05, - "loss": 6.098, - "step": 5899 - }, - { - "epoch": 3.076923076923077, - "grad_norm": 1.58516263961792, - "learning_rate": 9.457386934673367e-05, - "loss": 5.526, - "step": 5900 - }, - { - "epoch": 3.077444589308996, - "grad_norm": 1.6538965702056885, - "learning_rate": 9.457286432160805e-05, - "loss": 5.2366, - "step": 5901 - }, - { - "epoch": 3.077966101694915, - "grad_norm": 1.427926778793335, - "learning_rate": 9.457185929648241e-05, - "loss": 5.9894, - "step": 5902 - }, - { - "epoch": 3.0784876140808346, - "grad_norm": 1.4214593172073364, - "learning_rate": 9.457085427135679e-05, - "loss": 5.6836, - "step": 5903 - }, - { - "epoch": 3.0790091264667536, - "grad_norm": 1.6015725135803223, - "learning_rate": 9.456984924623115e-05, - "loss": 5.4728, - "step": 5904 - }, - { - "epoch": 3.0795306388526726, - "grad_norm": 1.6532886028289795, - "learning_rate": 9.456884422110553e-05, - "loss": 5.4083, - "step": 5905 - }, - { - "epoch": 3.080052151238592, - "grad_norm": 1.7719734907150269, - "learning_rate": 9.45678391959799e-05, - "loss": 5.181, - "step": 5906 - }, - { - "epoch": 3.080573663624511, - "grad_norm": 1.5277974605560303, - "learning_rate": 9.456683417085427e-05, - "loss": 5.5575, - "step": 5907 - }, - { - "epoch": 3.08109517601043, - "grad_norm": 1.488882303237915, - "learning_rate": 9.456582914572865e-05, - "loss": 6.0537, - "step": 5908 - }, - { - "epoch": 3.0816166883963496, - "grad_norm": 1.6720529794692993, - "learning_rate": 9.456482412060303e-05, - "loss": 5.5987, - "step": 5909 - }, - { - "epoch": 3.0821382007822686, - "grad_norm": 1.5618151426315308, - "learning_rate": 9.45638190954774e-05, - "loss": 5.3473, - "step": 5910 - }, - { - "epoch": 3.0826597131681877, - "grad_norm": 1.6426540613174438, - "learning_rate": 9.456281407035177e-05, - "loss": 5.8116, - "step": 5911 - }, - { - "epoch": 3.083181225554107, - "grad_norm": 1.4284110069274902, - "learning_rate": 9.456180904522614e-05, - "loss": 5.8414, - "step": 5912 - }, - { - "epoch": 3.083702737940026, - "grad_norm": 1.4357808828353882, - "learning_rate": 9.45608040201005e-05, - "loss": 6.0882, - "step": 5913 - }, - { - "epoch": 3.084224250325945, - "grad_norm": 1.4815322160720825, - "learning_rate": 9.455979899497488e-05, - "loss": 5.6096, - "step": 5914 - }, - { - "epoch": 3.084745762711864, - "grad_norm": 1.3797491788864136, - "learning_rate": 9.455879396984924e-05, - "loss": 6.2326, - "step": 5915 - }, - { - "epoch": 3.0852672750977836, - "grad_norm": 1.5625903606414795, - "learning_rate": 9.455778894472362e-05, - "loss": 5.3565, - "step": 5916 - }, - { - "epoch": 3.0857887874837027, - "grad_norm": 1.6335209608078003, - "learning_rate": 9.455678391959798e-05, - "loss": 5.8099, - "step": 5917 - }, - { - "epoch": 3.0863102998696217, - "grad_norm": 1.396146535873413, - "learning_rate": 9.455577889447236e-05, - "loss": 6.2562, - "step": 5918 - }, - { - "epoch": 3.086831812255541, - "grad_norm": 1.334381103515625, - "learning_rate": 9.455477386934674e-05, - "loss": 6.1637, - "step": 5919 - }, - { - "epoch": 3.08735332464146, - "grad_norm": 1.219536304473877, - "learning_rate": 9.455376884422112e-05, - "loss": 6.2134, - "step": 5920 - }, - { - "epoch": 3.087874837027379, - "grad_norm": 1.2963420152664185, - "learning_rate": 9.455276381909548e-05, - "loss": 6.208, - "step": 5921 - }, - { - "epoch": 3.0883963494132987, - "grad_norm": 1.5001893043518066, - "learning_rate": 9.455175879396986e-05, - "loss": 5.8838, - "step": 5922 - }, - { - "epoch": 3.0889178617992177, - "grad_norm": 1.4513239860534668, - "learning_rate": 9.455075376884422e-05, - "loss": 5.2857, - "step": 5923 - }, - { - "epoch": 3.0894393741851367, - "grad_norm": 1.58176589012146, - "learning_rate": 9.45497487437186e-05, - "loss": 5.5655, - "step": 5924 - }, - { - "epoch": 3.089960886571056, - "grad_norm": 1.5114370584487915, - "learning_rate": 9.454874371859297e-05, - "loss": 5.9633, - "step": 5925 - }, - { - "epoch": 3.090482398956975, - "grad_norm": 1.4420485496520996, - "learning_rate": 9.454773869346733e-05, - "loss": 6.0799, - "step": 5926 - }, - { - "epoch": 3.091003911342894, - "grad_norm": 1.4343568086624146, - "learning_rate": 9.454673366834171e-05, - "loss": 5.9314, - "step": 5927 - }, - { - "epoch": 3.0915254237288137, - "grad_norm": 1.4300570487976074, - "learning_rate": 9.454572864321609e-05, - "loss": 5.5417, - "step": 5928 - }, - { - "epoch": 3.0920469361147327, - "grad_norm": 1.4604412317276, - "learning_rate": 9.454472361809046e-05, - "loss": 5.9906, - "step": 5929 - }, - { - "epoch": 3.0925684485006517, - "grad_norm": 1.4193544387817383, - "learning_rate": 9.454371859296483e-05, - "loss": 5.7321, - "step": 5930 - }, - { - "epoch": 3.093089960886571, - "grad_norm": 1.4256242513656616, - "learning_rate": 9.45427135678392e-05, - "loss": 6.0287, - "step": 5931 - }, - { - "epoch": 3.09361147327249, - "grad_norm": 1.4964123964309692, - "learning_rate": 9.454170854271357e-05, - "loss": 5.8263, - "step": 5932 - }, - { - "epoch": 3.094132985658409, - "grad_norm": 1.5592036247253418, - "learning_rate": 9.454070351758795e-05, - "loss": 5.6518, - "step": 5933 - }, - { - "epoch": 3.0946544980443287, - "grad_norm": 1.5268688201904297, - "learning_rate": 9.453969849246231e-05, - "loss": 5.8864, - "step": 5934 - }, - { - "epoch": 3.0951760104302477, - "grad_norm": 1.6359882354736328, - "learning_rate": 9.453869346733669e-05, - "loss": 5.8274, - "step": 5935 - }, - { - "epoch": 3.0956975228161667, - "grad_norm": 1.4909464120864868, - "learning_rate": 9.453768844221105e-05, - "loss": 5.9757, - "step": 5936 - }, - { - "epoch": 3.096219035202086, - "grad_norm": 1.5061111450195312, - "learning_rate": 9.453668341708543e-05, - "loss": 5.7473, - "step": 5937 - }, - { - "epoch": 3.096740547588005, - "grad_norm": 1.6529176235198975, - "learning_rate": 9.45356783919598e-05, - "loss": 5.7972, - "step": 5938 - }, - { - "epoch": 3.0972620599739242, - "grad_norm": 1.5472520589828491, - "learning_rate": 9.453467336683417e-05, - "loss": 5.8479, - "step": 5939 - }, - { - "epoch": 3.0977835723598437, - "grad_norm": 1.8916460275650024, - "learning_rate": 9.453366834170855e-05, - "loss": 5.0422, - "step": 5940 - }, - { - "epoch": 3.0983050847457627, - "grad_norm": 1.5509400367736816, - "learning_rate": 9.453266331658292e-05, - "loss": 6.0129, - "step": 5941 - }, - { - "epoch": 3.0988265971316817, - "grad_norm": 1.6247501373291016, - "learning_rate": 9.45316582914573e-05, - "loss": 5.5638, - "step": 5942 - }, - { - "epoch": 3.099348109517601, - "grad_norm": 1.4283233880996704, - "learning_rate": 9.453065326633166e-05, - "loss": 5.3822, - "step": 5943 - }, - { - "epoch": 3.0998696219035202, - "grad_norm": 1.5038272142410278, - "learning_rate": 9.452964824120604e-05, - "loss": 5.6856, - "step": 5944 - }, - { - "epoch": 3.1003911342894392, - "grad_norm": 1.495367169380188, - "learning_rate": 9.45286432160804e-05, - "loss": 6.0635, - "step": 5945 - }, - { - "epoch": 3.1009126466753587, - "grad_norm": 1.4465676546096802, - "learning_rate": 9.452763819095478e-05, - "loss": 5.7256, - "step": 5946 - }, - { - "epoch": 3.1014341590612777, - "grad_norm": 1.538765788078308, - "learning_rate": 9.452663316582914e-05, - "loss": 6.0468, - "step": 5947 - }, - { - "epoch": 3.1019556714471967, - "grad_norm": 1.4355207681655884, - "learning_rate": 9.452562814070352e-05, - "loss": 6.3044, - "step": 5948 - }, - { - "epoch": 3.102477183833116, - "grad_norm": 1.756052017211914, - "learning_rate": 9.45246231155779e-05, - "loss": 5.1981, - "step": 5949 - }, - { - "epoch": 3.1029986962190352, - "grad_norm": 1.6237757205963135, - "learning_rate": 9.452361809045228e-05, - "loss": 5.9434, - "step": 5950 - }, - { - "epoch": 3.1035202086049543, - "grad_norm": 1.4441238641738892, - "learning_rate": 9.452261306532664e-05, - "loss": 5.9907, - "step": 5951 - }, - { - "epoch": 3.1040417209908737, - "grad_norm": 1.4781140089035034, - "learning_rate": 9.4521608040201e-05, - "loss": 5.1045, - "step": 5952 - }, - { - "epoch": 3.1045632333767927, - "grad_norm": 1.5835574865341187, - "learning_rate": 9.452060301507538e-05, - "loss": 5.6959, - "step": 5953 - }, - { - "epoch": 3.1050847457627118, - "grad_norm": 1.4697521924972534, - "learning_rate": 9.451959798994975e-05, - "loss": 5.6298, - "step": 5954 - }, - { - "epoch": 3.1056062581486312, - "grad_norm": 1.340589165687561, - "learning_rate": 9.451859296482413e-05, - "loss": 5.8457, - "step": 5955 - }, - { - "epoch": 3.1061277705345502, - "grad_norm": 1.4671962261199951, - "learning_rate": 9.451758793969849e-05, - "loss": 5.5994, - "step": 5956 - }, - { - "epoch": 3.1066492829204693, - "grad_norm": 1.4438464641571045, - "learning_rate": 9.451658291457287e-05, - "loss": 6.0035, - "step": 5957 - }, - { - "epoch": 3.1071707953063887, - "grad_norm": 1.3971376419067383, - "learning_rate": 9.451557788944723e-05, - "loss": 5.5575, - "step": 5958 - }, - { - "epoch": 3.1076923076923078, - "grad_norm": 1.5090712308883667, - "learning_rate": 9.451457286432161e-05, - "loss": 5.684, - "step": 5959 - }, - { - "epoch": 3.1082138200782268, - "grad_norm": 1.3829509019851685, - "learning_rate": 9.451356783919599e-05, - "loss": 5.5098, - "step": 5960 - }, - { - "epoch": 3.1087353324641462, - "grad_norm": 1.4362118244171143, - "learning_rate": 9.451256281407037e-05, - "loss": 5.9396, - "step": 5961 - }, - { - "epoch": 3.1092568448500653, - "grad_norm": 1.3199585676193237, - "learning_rate": 9.451155778894473e-05, - "loss": 5.3113, - "step": 5962 - }, - { - "epoch": 3.1097783572359843, - "grad_norm": 1.462471604347229, - "learning_rate": 9.451055276381911e-05, - "loss": 5.4175, - "step": 5963 - }, - { - "epoch": 3.1102998696219037, - "grad_norm": 1.5190324783325195, - "learning_rate": 9.450954773869347e-05, - "loss": 5.6788, - "step": 5964 - }, - { - "epoch": 3.1108213820078228, - "grad_norm": 1.3350346088409424, - "learning_rate": 9.450854271356785e-05, - "loss": 5.9516, - "step": 5965 - }, - { - "epoch": 3.111342894393742, - "grad_norm": 1.3691679239273071, - "learning_rate": 9.450753768844221e-05, - "loss": 5.9994, - "step": 5966 - }, - { - "epoch": 3.111864406779661, - "grad_norm": 1.391628384590149, - "learning_rate": 9.450653266331658e-05, - "loss": 5.8559, - "step": 5967 - }, - { - "epoch": 3.1123859191655803, - "grad_norm": 1.37540864944458, - "learning_rate": 9.450552763819096e-05, - "loss": 6.054, - "step": 5968 - }, - { - "epoch": 3.1129074315514993, - "grad_norm": 1.538871169090271, - "learning_rate": 9.450452261306533e-05, - "loss": 5.0338, - "step": 5969 - }, - { - "epoch": 3.1134289439374183, - "grad_norm": 1.582134485244751, - "learning_rate": 9.450351758793971e-05, - "loss": 5.8991, - "step": 5970 - }, - { - "epoch": 3.113950456323338, - "grad_norm": 1.5040698051452637, - "learning_rate": 9.450251256281408e-05, - "loss": 5.785, - "step": 5971 - }, - { - "epoch": 3.114471968709257, - "grad_norm": 1.6494308710098267, - "learning_rate": 9.450150753768845e-05, - "loss": 5.7682, - "step": 5972 - }, - { - "epoch": 3.114993481095176, - "grad_norm": 1.397558569908142, - "learning_rate": 9.450050251256282e-05, - "loss": 6.094, - "step": 5973 - }, - { - "epoch": 3.1155149934810953, - "grad_norm": 1.4531022310256958, - "learning_rate": 9.44994974874372e-05, - "loss": 5.9844, - "step": 5974 - }, - { - "epoch": 3.1160365058670143, - "grad_norm": 1.4732410907745361, - "learning_rate": 9.449849246231156e-05, - "loss": 5.7983, - "step": 5975 - }, - { - "epoch": 3.1165580182529333, - "grad_norm": 1.818155288696289, - "learning_rate": 9.449748743718594e-05, - "loss": 5.2632, - "step": 5976 - }, - { - "epoch": 3.117079530638853, - "grad_norm": 1.5746917724609375, - "learning_rate": 9.44964824120603e-05, - "loss": 5.5873, - "step": 5977 - }, - { - "epoch": 3.117601043024772, - "grad_norm": 1.5462315082550049, - "learning_rate": 9.449547738693468e-05, - "loss": 5.7091, - "step": 5978 - }, - { - "epoch": 3.118122555410691, - "grad_norm": 1.8027981519699097, - "learning_rate": 9.449447236180904e-05, - "loss": 5.3698, - "step": 5979 - }, - { - "epoch": 3.1186440677966103, - "grad_norm": 1.6408286094665527, - "learning_rate": 9.449346733668342e-05, - "loss": 5.8103, - "step": 5980 - }, - { - "epoch": 3.1191655801825293, - "grad_norm": 1.5473012924194336, - "learning_rate": 9.44924623115578e-05, - "loss": 5.6519, - "step": 5981 - }, - { - "epoch": 3.1196870925684483, - "grad_norm": 1.5296783447265625, - "learning_rate": 9.449145728643216e-05, - "loss": 5.4603, - "step": 5982 - }, - { - "epoch": 3.120208604954368, - "grad_norm": 1.4732483625411987, - "learning_rate": 9.449045226130654e-05, - "loss": 6.0197, - "step": 5983 - }, - { - "epoch": 3.120730117340287, - "grad_norm": 1.3387699127197266, - "learning_rate": 9.44894472361809e-05, - "loss": 6.1192, - "step": 5984 - }, - { - "epoch": 3.121251629726206, - "grad_norm": 1.6230127811431885, - "learning_rate": 9.448844221105528e-05, - "loss": 5.1001, - "step": 5985 - }, - { - "epoch": 3.1217731421121253, - "grad_norm": 1.7012475728988647, - "learning_rate": 9.448743718592965e-05, - "loss": 5.387, - "step": 5986 - }, - { - "epoch": 3.1222946544980443, - "grad_norm": 1.5545032024383545, - "learning_rate": 9.448643216080403e-05, - "loss": 5.7839, - "step": 5987 - }, - { - "epoch": 3.1228161668839634, - "grad_norm": 1.654433250427246, - "learning_rate": 9.448542713567839e-05, - "loss": 5.4428, - "step": 5988 - }, - { - "epoch": 3.123337679269883, - "grad_norm": 1.5025675296783447, - "learning_rate": 9.448442211055277e-05, - "loss": 5.4167, - "step": 5989 - }, - { - "epoch": 3.123859191655802, - "grad_norm": 1.5476739406585693, - "learning_rate": 9.448341708542715e-05, - "loss": 5.8965, - "step": 5990 - }, - { - "epoch": 3.124380704041721, - "grad_norm": 1.4096719026565552, - "learning_rate": 9.448241206030152e-05, - "loss": 6.1156, - "step": 5991 - }, - { - "epoch": 3.1249022164276403, - "grad_norm": 1.653701663017273, - "learning_rate": 9.448140703517589e-05, - "loss": 5.9378, - "step": 5992 - }, - { - "epoch": 3.1254237288135593, - "grad_norm": 1.6070947647094727, - "learning_rate": 9.448040201005025e-05, - "loss": 5.999, - "step": 5993 - }, - { - "epoch": 3.1259452411994784, - "grad_norm": 1.4291785955429077, - "learning_rate": 9.447939698492463e-05, - "loss": 5.1386, - "step": 5994 - }, - { - "epoch": 3.126466753585398, - "grad_norm": 1.5960363149642944, - "learning_rate": 9.4478391959799e-05, - "loss": 5.6513, - "step": 5995 - }, - { - "epoch": 3.126988265971317, - "grad_norm": 1.8519086837768555, - "learning_rate": 9.447738693467337e-05, - "loss": 5.1645, - "step": 5996 - }, - { - "epoch": 3.127509778357236, - "grad_norm": 1.8942108154296875, - "learning_rate": 9.447638190954774e-05, - "loss": 5.519, - "step": 5997 - }, - { - "epoch": 3.1280312907431553, - "grad_norm": 1.4592946767807007, - "learning_rate": 9.447537688442211e-05, - "loss": 5.8364, - "step": 5998 - }, - { - "epoch": 3.1285528031290744, - "grad_norm": 1.4667688608169556, - "learning_rate": 9.447437185929648e-05, - "loss": 5.924, - "step": 5999 - }, - { - "epoch": 3.1290743155149934, - "grad_norm": 1.4714646339416504, - "learning_rate": 9.447336683417086e-05, - "loss": 5.7784, - "step": 6000 - }, - { - "epoch": 3.129595827900913, - "grad_norm": 1.8773066997528076, - "learning_rate": 9.447236180904523e-05, - "loss": 5.0069, - "step": 6001 - }, - { - "epoch": 3.130117340286832, - "grad_norm": 1.6906769275665283, - "learning_rate": 9.447135678391961e-05, - "loss": 5.9346, - "step": 6002 - }, - { - "epoch": 3.130638852672751, - "grad_norm": 1.8142781257629395, - "learning_rate": 9.447035175879398e-05, - "loss": 5.5597, - "step": 6003 - }, - { - "epoch": 3.1311603650586703, - "grad_norm": 1.3168134689331055, - "learning_rate": 9.446934673366835e-05, - "loss": 5.346, - "step": 6004 - }, - { - "epoch": 3.1316818774445894, - "grad_norm": 1.4408619403839111, - "learning_rate": 9.446834170854272e-05, - "loss": 5.6314, - "step": 6005 - }, - { - "epoch": 3.1322033898305084, - "grad_norm": 1.3345054388046265, - "learning_rate": 9.446733668341708e-05, - "loss": 6.0806, - "step": 6006 - }, - { - "epoch": 3.1327249022164274, - "grad_norm": 1.5057436227798462, - "learning_rate": 9.446633165829146e-05, - "loss": 5.7438, - "step": 6007 - }, - { - "epoch": 3.133246414602347, - "grad_norm": 1.3786718845367432, - "learning_rate": 9.446532663316582e-05, - "loss": 5.8939, - "step": 6008 - }, - { - "epoch": 3.133767926988266, - "grad_norm": 1.5443403720855713, - "learning_rate": 9.44643216080402e-05, - "loss": 5.6266, - "step": 6009 - }, - { - "epoch": 3.134289439374185, - "grad_norm": 1.5332121849060059, - "learning_rate": 9.446331658291458e-05, - "loss": 5.8174, - "step": 6010 - }, - { - "epoch": 3.1348109517601044, - "grad_norm": 1.6315749883651733, - "learning_rate": 9.446231155778896e-05, - "loss": 5.7356, - "step": 6011 - }, - { - "epoch": 3.1353324641460234, - "grad_norm": 1.5787394046783447, - "learning_rate": 9.446130653266332e-05, - "loss": 5.9696, - "step": 6012 - }, - { - "epoch": 3.1358539765319424, - "grad_norm": 1.541546106338501, - "learning_rate": 9.44603015075377e-05, - "loss": 5.5079, - "step": 6013 - }, - { - "epoch": 3.136375488917862, - "grad_norm": 1.458977460861206, - "learning_rate": 9.445929648241206e-05, - "loss": 6.0796, - "step": 6014 - }, - { - "epoch": 3.136897001303781, - "grad_norm": 1.4625805616378784, - "learning_rate": 9.445829145728644e-05, - "loss": 4.9626, - "step": 6015 - }, - { - "epoch": 3.1374185136897, - "grad_norm": 1.3486944437026978, - "learning_rate": 9.445728643216081e-05, - "loss": 6.0129, - "step": 6016 - }, - { - "epoch": 3.1379400260756194, - "grad_norm": 1.6166188716888428, - "learning_rate": 9.445628140703518e-05, - "loss": 5.2544, - "step": 6017 - }, - { - "epoch": 3.1384615384615384, - "grad_norm": 1.4961820840835571, - "learning_rate": 9.445527638190955e-05, - "loss": 5.3073, - "step": 6018 - }, - { - "epoch": 3.1389830508474574, - "grad_norm": 1.5515804290771484, - "learning_rate": 9.445427135678391e-05, - "loss": 5.8037, - "step": 6019 - }, - { - "epoch": 3.139504563233377, - "grad_norm": 1.4997084140777588, - "learning_rate": 9.445326633165829e-05, - "loss": 5.4583, - "step": 6020 - }, - { - "epoch": 3.140026075619296, - "grad_norm": 1.5628770589828491, - "learning_rate": 9.445226130653267e-05, - "loss": 5.7477, - "step": 6021 - }, - { - "epoch": 3.140547588005215, - "grad_norm": 1.3544843196868896, - "learning_rate": 9.445125628140705e-05, - "loss": 5.2025, - "step": 6022 - }, - { - "epoch": 3.1410691003911344, - "grad_norm": 1.4447132349014282, - "learning_rate": 9.445025125628141e-05, - "loss": 6.1132, - "step": 6023 - }, - { - "epoch": 3.1415906127770534, - "grad_norm": 1.4827256202697754, - "learning_rate": 9.444924623115579e-05, - "loss": 5.7322, - "step": 6024 - }, - { - "epoch": 3.1421121251629724, - "grad_norm": 1.6576439142227173, - "learning_rate": 9.444824120603015e-05, - "loss": 5.4234, - "step": 6025 - }, - { - "epoch": 3.142633637548892, - "grad_norm": 1.700350046157837, - "learning_rate": 9.444723618090453e-05, - "loss": 5.7305, - "step": 6026 - }, - { - "epoch": 3.143155149934811, - "grad_norm": 1.5395225286483765, - "learning_rate": 9.44462311557789e-05, - "loss": 5.45, - "step": 6027 - }, - { - "epoch": 3.14367666232073, - "grad_norm": 1.5126898288726807, - "learning_rate": 9.444522613065327e-05, - "loss": 5.8802, - "step": 6028 - }, - { - "epoch": 3.1441981747066494, - "grad_norm": 1.5107595920562744, - "learning_rate": 9.444422110552764e-05, - "loss": 5.9033, - "step": 6029 - }, - { - "epoch": 3.1447196870925684, - "grad_norm": 1.5929425954818726, - "learning_rate": 9.444321608040202e-05, - "loss": 5.9111, - "step": 6030 - }, - { - "epoch": 3.1452411994784875, - "grad_norm": 1.576727032661438, - "learning_rate": 9.444221105527638e-05, - "loss": 5.655, - "step": 6031 - }, - { - "epoch": 3.145762711864407, - "grad_norm": 1.4252485036849976, - "learning_rate": 9.444120603015076e-05, - "loss": 5.9926, - "step": 6032 - }, - { - "epoch": 3.146284224250326, - "grad_norm": 1.6902813911437988, - "learning_rate": 9.444020100502514e-05, - "loss": 5.5387, - "step": 6033 - }, - { - "epoch": 3.146805736636245, - "grad_norm": 1.4553967714309692, - "learning_rate": 9.44391959798995e-05, - "loss": 5.8227, - "step": 6034 - }, - { - "epoch": 3.1473272490221644, - "grad_norm": 1.5301074981689453, - "learning_rate": 9.443819095477388e-05, - "loss": 5.8581, - "step": 6035 - }, - { - "epoch": 3.1478487614080835, - "grad_norm": 1.3797202110290527, - "learning_rate": 9.443718592964824e-05, - "loss": 5.7593, - "step": 6036 - }, - { - "epoch": 3.1483702737940025, - "grad_norm": 1.5734614133834839, - "learning_rate": 9.443618090452262e-05, - "loss": 5.7256, - "step": 6037 - }, - { - "epoch": 3.148891786179922, - "grad_norm": 1.495334506034851, - "learning_rate": 9.443517587939698e-05, - "loss": 5.8223, - "step": 6038 - }, - { - "epoch": 3.149413298565841, - "grad_norm": 1.4399739503860474, - "learning_rate": 9.443417085427136e-05, - "loss": 5.8111, - "step": 6039 - }, - { - "epoch": 3.14993481095176, - "grad_norm": 1.624007225036621, - "learning_rate": 9.443316582914573e-05, - "loss": 4.9693, - "step": 6040 - }, - { - "epoch": 3.1504563233376794, - "grad_norm": 1.446977972984314, - "learning_rate": 9.44321608040201e-05, - "loss": 6.0927, - "step": 6041 - }, - { - "epoch": 3.1509778357235985, - "grad_norm": 1.4708313941955566, - "learning_rate": 9.443115577889448e-05, - "loss": 5.7453, - "step": 6042 - }, - { - "epoch": 3.1514993481095175, - "grad_norm": 1.644184947013855, - "learning_rate": 9.443015075376886e-05, - "loss": 5.6096, - "step": 6043 - }, - { - "epoch": 3.152020860495437, - "grad_norm": 1.5620064735412598, - "learning_rate": 9.442914572864322e-05, - "loss": 6.174, - "step": 6044 - }, - { - "epoch": 3.152542372881356, - "grad_norm": 1.5655866861343384, - "learning_rate": 9.44281407035176e-05, - "loss": 5.6261, - "step": 6045 - }, - { - "epoch": 3.153063885267275, - "grad_norm": 1.477390170097351, - "learning_rate": 9.442713567839197e-05, - "loss": 5.3152, - "step": 6046 - }, - { - "epoch": 3.1535853976531945, - "grad_norm": 1.8749686479568481, - "learning_rate": 9.442613065326633e-05, - "loss": 5.4449, - "step": 6047 - }, - { - "epoch": 3.1541069100391135, - "grad_norm": 1.433998942375183, - "learning_rate": 9.442512562814071e-05, - "loss": 5.8313, - "step": 6048 - }, - { - "epoch": 3.1546284224250325, - "grad_norm": 1.5486464500427246, - "learning_rate": 9.442412060301507e-05, - "loss": 5.7931, - "step": 6049 - }, - { - "epoch": 3.155149934810952, - "grad_norm": 1.4365578889846802, - "learning_rate": 9.442311557788945e-05, - "loss": 5.7501, - "step": 6050 - }, - { - "epoch": 3.155671447196871, - "grad_norm": 1.441725254058838, - "learning_rate": 9.442211055276381e-05, - "loss": 5.5554, - "step": 6051 - }, - { - "epoch": 3.15619295958279, - "grad_norm": 1.3874943256378174, - "learning_rate": 9.442110552763819e-05, - "loss": 5.989, - "step": 6052 - }, - { - "epoch": 3.1567144719687095, - "grad_norm": 1.451522946357727, - "learning_rate": 9.442010050251257e-05, - "loss": 5.662, - "step": 6053 - }, - { - "epoch": 3.1572359843546285, - "grad_norm": 1.3636561632156372, - "learning_rate": 9.441909547738695e-05, - "loss": 5.8379, - "step": 6054 - }, - { - "epoch": 3.1577574967405475, - "grad_norm": 1.2279136180877686, - "learning_rate": 9.441809045226131e-05, - "loss": 5.2987, - "step": 6055 - }, - { - "epoch": 3.158279009126467, - "grad_norm": 1.4659367799758911, - "learning_rate": 9.441708542713569e-05, - "loss": 5.5469, - "step": 6056 - }, - { - "epoch": 3.158800521512386, - "grad_norm": 1.4097001552581787, - "learning_rate": 9.441608040201005e-05, - "loss": 5.7817, - "step": 6057 - }, - { - "epoch": 3.159322033898305, - "grad_norm": 1.4194533824920654, - "learning_rate": 9.441507537688443e-05, - "loss": 5.8874, - "step": 6058 - }, - { - "epoch": 3.1598435462842245, - "grad_norm": 1.4247093200683594, - "learning_rate": 9.44140703517588e-05, - "loss": 5.6, - "step": 6059 - }, - { - "epoch": 3.1603650586701435, - "grad_norm": 1.4381343126296997, - "learning_rate": 9.441306532663316e-05, - "loss": 6.1708, - "step": 6060 - }, - { - "epoch": 3.1608865710560625, - "grad_norm": 1.387538194656372, - "learning_rate": 9.441206030150754e-05, - "loss": 5.8294, - "step": 6061 - }, - { - "epoch": 3.1614080834419815, - "grad_norm": 1.4847896099090576, - "learning_rate": 9.441105527638192e-05, - "loss": 5.7407, - "step": 6062 - }, - { - "epoch": 3.161929595827901, - "grad_norm": 1.4319195747375488, - "learning_rate": 9.44100502512563e-05, - "loss": 5.8631, - "step": 6063 - }, - { - "epoch": 3.16245110821382, - "grad_norm": 1.426947832107544, - "learning_rate": 9.440904522613066e-05, - "loss": 5.4207, - "step": 6064 - }, - { - "epoch": 3.162972620599739, - "grad_norm": 1.5201994180679321, - "learning_rate": 9.440804020100504e-05, - "loss": 5.586, - "step": 6065 - }, - { - "epoch": 3.1634941329856585, - "grad_norm": 1.363964319229126, - "learning_rate": 9.44070351758794e-05, - "loss": 5.9386, - "step": 6066 - }, - { - "epoch": 3.1640156453715775, - "grad_norm": 1.4356647729873657, - "learning_rate": 9.440603015075378e-05, - "loss": 5.8257, - "step": 6067 - }, - { - "epoch": 3.1645371577574966, - "grad_norm": 1.3996728658676147, - "learning_rate": 9.440502512562814e-05, - "loss": 5.8291, - "step": 6068 - }, - { - "epoch": 3.165058670143416, - "grad_norm": 1.3440876007080078, - "learning_rate": 9.440402010050252e-05, - "loss": 6.0764, - "step": 6069 - }, - { - "epoch": 3.165580182529335, - "grad_norm": 1.410819172859192, - "learning_rate": 9.440301507537688e-05, - "loss": 6.0274, - "step": 6070 - }, - { - "epoch": 3.166101694915254, - "grad_norm": 1.5255370140075684, - "learning_rate": 9.440201005025126e-05, - "loss": 5.768, - "step": 6071 - }, - { - "epoch": 3.1666232073011735, - "grad_norm": 1.7141236066818237, - "learning_rate": 9.440100502512563e-05, - "loss": 5.0864, - "step": 6072 - }, - { - "epoch": 3.1671447196870925, - "grad_norm": 1.5139796733856201, - "learning_rate": 9.44e-05, - "loss": 5.9771, - "step": 6073 - }, - { - "epoch": 3.1676662320730116, - "grad_norm": 1.5054020881652832, - "learning_rate": 9.439899497487438e-05, - "loss": 5.8732, - "step": 6074 - }, - { - "epoch": 3.168187744458931, - "grad_norm": 1.3833472728729248, - "learning_rate": 9.439798994974875e-05, - "loss": 6.0313, - "step": 6075 - }, - { - "epoch": 3.16870925684485, - "grad_norm": 1.567466378211975, - "learning_rate": 9.439698492462312e-05, - "loss": 5.4994, - "step": 6076 - }, - { - "epoch": 3.169230769230769, - "grad_norm": 1.5985236167907715, - "learning_rate": 9.439597989949749e-05, - "loss": 5.6879, - "step": 6077 - }, - { - "epoch": 3.1697522816166885, - "grad_norm": 1.7675803899765015, - "learning_rate": 9.439497487437187e-05, - "loss": 5.1523, - "step": 6078 - }, - { - "epoch": 3.1702737940026076, - "grad_norm": 1.955353021621704, - "learning_rate": 9.439396984924623e-05, - "loss": 6.0882, - "step": 6079 - }, - { - "epoch": 3.1707953063885266, - "grad_norm": 1.4026310443878174, - "learning_rate": 9.439296482412061e-05, - "loss": 5.9947, - "step": 6080 - }, - { - "epoch": 3.171316818774446, - "grad_norm": 1.45640230178833, - "learning_rate": 9.439195979899497e-05, - "loss": 5.7971, - "step": 6081 - }, - { - "epoch": 3.171838331160365, - "grad_norm": 1.6185758113861084, - "learning_rate": 9.439095477386935e-05, - "loss": 5.7397, - "step": 6082 - }, - { - "epoch": 3.172359843546284, - "grad_norm": 1.566951870918274, - "learning_rate": 9.438994974874373e-05, - "loss": 5.5345, - "step": 6083 - }, - { - "epoch": 3.1728813559322036, - "grad_norm": 1.386574387550354, - "learning_rate": 9.43889447236181e-05, - "loss": 6.0292, - "step": 6084 - }, - { - "epoch": 3.1734028683181226, - "grad_norm": 1.3778495788574219, - "learning_rate": 9.438793969849247e-05, - "loss": 6.2037, - "step": 6085 - }, - { - "epoch": 3.1739243807040416, - "grad_norm": 1.4927259683609009, - "learning_rate": 9.438693467336683e-05, - "loss": 5.8481, - "step": 6086 - }, - { - "epoch": 3.174445893089961, - "grad_norm": 1.4684858322143555, - "learning_rate": 9.438592964824121e-05, - "loss": 5.8527, - "step": 6087 - }, - { - "epoch": 3.17496740547588, - "grad_norm": 1.4262646436691284, - "learning_rate": 9.438492462311558e-05, - "loss": 5.9398, - "step": 6088 - }, - { - "epoch": 3.175488917861799, - "grad_norm": 1.5022201538085938, - "learning_rate": 9.438391959798995e-05, - "loss": 5.6911, - "step": 6089 - }, - { - "epoch": 3.1760104302477186, - "grad_norm": 1.6021486520767212, - "learning_rate": 9.438291457286432e-05, - "loss": 5.4866, - "step": 6090 - }, - { - "epoch": 3.1765319426336376, - "grad_norm": 1.419243574142456, - "learning_rate": 9.43819095477387e-05, - "loss": 5.9273, - "step": 6091 - }, - { - "epoch": 3.1770534550195566, - "grad_norm": 1.5401893854141235, - "learning_rate": 9.438090452261306e-05, - "loss": 5.6473, - "step": 6092 - }, - { - "epoch": 3.177574967405476, - "grad_norm": 1.4306854009628296, - "learning_rate": 9.437989949748744e-05, - "loss": 6.0221, - "step": 6093 - }, - { - "epoch": 3.178096479791395, - "grad_norm": 1.4795480966567993, - "learning_rate": 9.437889447236182e-05, - "loss": 5.8446, - "step": 6094 - }, - { - "epoch": 3.178617992177314, - "grad_norm": 1.435036063194275, - "learning_rate": 9.43778894472362e-05, - "loss": 6.0145, - "step": 6095 - }, - { - "epoch": 3.1791395045632336, - "grad_norm": 1.466265082359314, - "learning_rate": 9.437688442211056e-05, - "loss": 5.4105, - "step": 6096 - }, - { - "epoch": 3.1796610169491526, - "grad_norm": 1.4791849851608276, - "learning_rate": 9.437587939698494e-05, - "loss": 5.816, - "step": 6097 - }, - { - "epoch": 3.1801825293350716, - "grad_norm": 1.4403960704803467, - "learning_rate": 9.43748743718593e-05, - "loss": 5.8518, - "step": 6098 - }, - { - "epoch": 3.1807040417209906, - "grad_norm": 1.3632322549819946, - "learning_rate": 9.437386934673367e-05, - "loss": 6.1304, - "step": 6099 - }, - { - "epoch": 3.18122555410691, - "grad_norm": 1.5270798206329346, - "learning_rate": 9.437286432160804e-05, - "loss": 5.7269, - "step": 6100 - }, - { - "epoch": 3.181747066492829, - "grad_norm": 1.3566936254501343, - "learning_rate": 9.437185929648241e-05, - "loss": 5.7453, - "step": 6101 - }, - { - "epoch": 3.182268578878748, - "grad_norm": 1.4867922067642212, - "learning_rate": 9.437085427135679e-05, - "loss": 5.7366, - "step": 6102 - }, - { - "epoch": 3.1827900912646676, - "grad_norm": 1.4395008087158203, - "learning_rate": 9.436984924623116e-05, - "loss": 6.0992, - "step": 6103 - }, - { - "epoch": 3.1833116036505866, - "grad_norm": 1.4493725299835205, - "learning_rate": 9.436884422110554e-05, - "loss": 5.7234, - "step": 6104 - }, - { - "epoch": 3.1838331160365057, - "grad_norm": 1.5806353092193604, - "learning_rate": 9.43678391959799e-05, - "loss": 5.7826, - "step": 6105 - }, - { - "epoch": 3.184354628422425, - "grad_norm": 1.5170636177062988, - "learning_rate": 9.436683417085428e-05, - "loss": 5.876, - "step": 6106 - }, - { - "epoch": 3.184876140808344, - "grad_norm": 1.3262099027633667, - "learning_rate": 9.436582914572865e-05, - "loss": 5.4689, - "step": 6107 - }, - { - "epoch": 3.185397653194263, - "grad_norm": 1.426417589187622, - "learning_rate": 9.436482412060302e-05, - "loss": 5.9184, - "step": 6108 - }, - { - "epoch": 3.1859191655801826, - "grad_norm": 1.5642647743225098, - "learning_rate": 9.436381909547739e-05, - "loss": 5.6027, - "step": 6109 - }, - { - "epoch": 3.1864406779661016, - "grad_norm": 1.4244892597198486, - "learning_rate": 9.436281407035177e-05, - "loss": 5.8159, - "step": 6110 - }, - { - "epoch": 3.1869621903520207, - "grad_norm": 1.5422443151474, - "learning_rate": 9.436180904522613e-05, - "loss": 5.9187, - "step": 6111 - }, - { - "epoch": 3.18748370273794, - "grad_norm": 2.1564443111419678, - "learning_rate": 9.43608040201005e-05, - "loss": 5.8057, - "step": 6112 - }, - { - "epoch": 3.188005215123859, - "grad_norm": 1.6485815048217773, - "learning_rate": 9.435979899497487e-05, - "loss": 5.4979, - "step": 6113 - }, - { - "epoch": 3.188526727509778, - "grad_norm": 1.616163969039917, - "learning_rate": 9.435879396984925e-05, - "loss": 5.7881, - "step": 6114 - }, - { - "epoch": 3.1890482398956976, - "grad_norm": 1.4401814937591553, - "learning_rate": 9.435778894472363e-05, - "loss": 5.9127, - "step": 6115 - }, - { - "epoch": 3.1895697522816167, - "grad_norm": 1.4818191528320312, - "learning_rate": 9.4356783919598e-05, - "loss": 5.5766, - "step": 6116 - }, - { - "epoch": 3.1900912646675357, - "grad_norm": 1.3276259899139404, - "learning_rate": 9.435577889447237e-05, - "loss": 5.4775, - "step": 6117 - }, - { - "epoch": 3.190612777053455, - "grad_norm": 1.566565752029419, - "learning_rate": 9.435477386934674e-05, - "loss": 5.9043, - "step": 6118 - }, - { - "epoch": 3.191134289439374, - "grad_norm": 1.5047739744186401, - "learning_rate": 9.435376884422111e-05, - "loss": 6.1088, - "step": 6119 - }, - { - "epoch": 3.191655801825293, - "grad_norm": 1.364888310432434, - "learning_rate": 9.435276381909548e-05, - "loss": 5.96, - "step": 6120 - }, - { - "epoch": 3.1921773142112126, - "grad_norm": 1.382431983947754, - "learning_rate": 9.435175879396986e-05, - "loss": 6.1059, - "step": 6121 - }, - { - "epoch": 3.1926988265971317, - "grad_norm": 1.442171573638916, - "learning_rate": 9.435075376884422e-05, - "loss": 5.898, - "step": 6122 - }, - { - "epoch": 3.1932203389830507, - "grad_norm": 1.4232732057571411, - "learning_rate": 9.43497487437186e-05, - "loss": 5.4687, - "step": 6123 - }, - { - "epoch": 3.19374185136897, - "grad_norm": 1.4841036796569824, - "learning_rate": 9.434874371859298e-05, - "loss": 5.698, - "step": 6124 - }, - { - "epoch": 3.194263363754889, - "grad_norm": 1.4730920791625977, - "learning_rate": 9.434773869346734e-05, - "loss": 5.8187, - "step": 6125 - }, - { - "epoch": 3.194784876140808, - "grad_norm": 1.4859837293624878, - "learning_rate": 9.434673366834172e-05, - "loss": 5.7071, - "step": 6126 - }, - { - "epoch": 3.1953063885267277, - "grad_norm": 1.4210513830184937, - "learning_rate": 9.434572864321608e-05, - "loss": 6.1255, - "step": 6127 - }, - { - "epoch": 3.1958279009126467, - "grad_norm": 1.3236411809921265, - "learning_rate": 9.434472361809046e-05, - "loss": 6.0863, - "step": 6128 - }, - { - "epoch": 3.1963494132985657, - "grad_norm": 1.4877365827560425, - "learning_rate": 9.434371859296482e-05, - "loss": 5.6671, - "step": 6129 - }, - { - "epoch": 3.196870925684485, - "grad_norm": 1.759805679321289, - "learning_rate": 9.43427135678392e-05, - "loss": 5.6058, - "step": 6130 - }, - { - "epoch": 3.197392438070404, - "grad_norm": 1.6147934198379517, - "learning_rate": 9.434170854271357e-05, - "loss": 5.3863, - "step": 6131 - }, - { - "epoch": 3.197913950456323, - "grad_norm": 1.4364882707595825, - "learning_rate": 9.434070351758794e-05, - "loss": 5.9259, - "step": 6132 - }, - { - "epoch": 3.1984354628422427, - "grad_norm": 1.528192400932312, - "learning_rate": 9.433969849246231e-05, - "loss": 5.5135, - "step": 6133 - }, - { - "epoch": 3.1989569752281617, - "grad_norm": 1.563140869140625, - "learning_rate": 9.433869346733669e-05, - "loss": 5.6918, - "step": 6134 - }, - { - "epoch": 3.1994784876140807, - "grad_norm": 1.3805739879608154, - "learning_rate": 9.433768844221106e-05, - "loss": 6.0711, - "step": 6135 - }, - { - "epoch": 3.2, - "grad_norm": 1.4779897928237915, - "learning_rate": 9.433668341708544e-05, - "loss": 5.8135, - "step": 6136 - }, - { - "epoch": 3.200521512385919, - "grad_norm": 1.4872703552246094, - "learning_rate": 9.43356783919598e-05, - "loss": 5.7767, - "step": 6137 - }, - { - "epoch": 3.201043024771838, - "grad_norm": 1.5415042638778687, - "learning_rate": 9.433467336683418e-05, - "loss": 5.4083, - "step": 6138 - }, - { - "epoch": 3.2015645371577577, - "grad_norm": 1.3856995105743408, - "learning_rate": 9.433366834170855e-05, - "loss": 5.1743, - "step": 6139 - }, - { - "epoch": 3.2020860495436767, - "grad_norm": 1.4336004257202148, - "learning_rate": 9.433266331658291e-05, - "loss": 5.9174, - "step": 6140 - }, - { - "epoch": 3.2026075619295957, - "grad_norm": 1.4959810972213745, - "learning_rate": 9.433165829145729e-05, - "loss": 5.668, - "step": 6141 - }, - { - "epoch": 3.203129074315515, - "grad_norm": 1.725293517112732, - "learning_rate": 9.433065326633165e-05, - "loss": 5.4051, - "step": 6142 - }, - { - "epoch": 3.203650586701434, - "grad_norm": 1.5430099964141846, - "learning_rate": 9.432964824120603e-05, - "loss": 5.8292, - "step": 6143 - }, - { - "epoch": 3.2041720990873532, - "grad_norm": 1.504542350769043, - "learning_rate": 9.432864321608041e-05, - "loss": 5.9079, - "step": 6144 - }, - { - "epoch": 3.2046936114732727, - "grad_norm": 1.5091444253921509, - "learning_rate": 9.432763819095479e-05, - "loss": 5.9387, - "step": 6145 - }, - { - "epoch": 3.2052151238591917, - "grad_norm": 1.4000188112258911, - "learning_rate": 9.432663316582915e-05, - "loss": 6.0136, - "step": 6146 - }, - { - "epoch": 3.2057366362451107, - "grad_norm": 1.5463130474090576, - "learning_rate": 9.432562814070353e-05, - "loss": 6.0266, - "step": 6147 - }, - { - "epoch": 3.20625814863103, - "grad_norm": 1.504245638847351, - "learning_rate": 9.43246231155779e-05, - "loss": 6.0189, - "step": 6148 - }, - { - "epoch": 3.2067796610169492, - "grad_norm": 1.4624865055084229, - "learning_rate": 9.432361809045227e-05, - "loss": 5.9523, - "step": 6149 - }, - { - "epoch": 3.2073011734028682, - "grad_norm": 1.4019678831100464, - "learning_rate": 9.432261306532664e-05, - "loss": 5.9323, - "step": 6150 - }, - { - "epoch": 3.2078226857887877, - "grad_norm": 1.5969626903533936, - "learning_rate": 9.432160804020101e-05, - "loss": 5.4796, - "step": 6151 - }, - { - "epoch": 3.2083441981747067, - "grad_norm": 1.4079861640930176, - "learning_rate": 9.432060301507538e-05, - "loss": 5.9885, - "step": 6152 - }, - { - "epoch": 3.2088657105606258, - "grad_norm": 1.4934377670288086, - "learning_rate": 9.431959798994974e-05, - "loss": 5.8049, - "step": 6153 - }, - { - "epoch": 3.209387222946545, - "grad_norm": 1.526559591293335, - "learning_rate": 9.431859296482412e-05, - "loss": 5.6143, - "step": 6154 - }, - { - "epoch": 3.2099087353324642, - "grad_norm": Infinity, - "learning_rate": 9.431859296482412e-05, - "loss": 5.4821, - "step": 6155 - }, - { - "epoch": 3.2104302477183833, - "grad_norm": 1.4396079778671265, - "learning_rate": 9.43175879396985e-05, - "loss": 5.8192, - "step": 6156 - }, - { - "epoch": 3.2109517601043023, - "grad_norm": 1.6479132175445557, - "learning_rate": 9.431658291457288e-05, - "loss": 5.1891, - "step": 6157 - }, - { - "epoch": 3.2114732724902217, - "grad_norm": 1.5770562887191772, - "learning_rate": 9.431557788944724e-05, - "loss": 5.7889, - "step": 6158 - }, - { - "epoch": 3.2119947848761408, - "grad_norm": 1.4472016096115112, - "learning_rate": 9.431457286432162e-05, - "loss": 6.1605, - "step": 6159 - }, - { - "epoch": 3.21251629726206, - "grad_norm": 1.5895344018936157, - "learning_rate": 9.431356783919598e-05, - "loss": 5.8367, - "step": 6160 - }, - { - "epoch": 3.2130378096479792, - "grad_norm": 1.6122419834136963, - "learning_rate": 9.431256281407036e-05, - "loss": 5.8029, - "step": 6161 - }, - { - "epoch": 3.2135593220338983, - "grad_norm": 1.5912319421768188, - "learning_rate": 9.431155778894472e-05, - "loss": 5.8672, - "step": 6162 - }, - { - "epoch": 3.2140808344198173, - "grad_norm": 1.5507028102874756, - "learning_rate": 9.43105527638191e-05, - "loss": 5.7505, - "step": 6163 - }, - { - "epoch": 3.2146023468057368, - "grad_norm": 1.6034657955169678, - "learning_rate": 9.430954773869347e-05, - "loss": 5.8018, - "step": 6164 - }, - { - "epoch": 3.2151238591916558, - "grad_norm": 1.3653719425201416, - "learning_rate": 9.430854271356784e-05, - "loss": 5.7991, - "step": 6165 - }, - { - "epoch": 3.215645371577575, - "grad_norm": 1.4457855224609375, - "learning_rate": 9.430753768844222e-05, - "loss": 5.9308, - "step": 6166 - }, - { - "epoch": 3.2161668839634943, - "grad_norm": 1.4226210117340088, - "learning_rate": 9.430653266331659e-05, - "loss": 5.8975, - "step": 6167 - }, - { - "epoch": 3.2166883963494133, - "grad_norm": 1.3609356880187988, - "learning_rate": 9.430552763819096e-05, - "loss": 6.0341, - "step": 6168 - }, - { - "epoch": 3.2172099087353323, - "grad_norm": 1.5904911756515503, - "learning_rate": 9.430452261306533e-05, - "loss": 5.1053, - "step": 6169 - }, - { - "epoch": 3.2177314211212518, - "grad_norm": 1.5178179740905762, - "learning_rate": 9.43035175879397e-05, - "loss": 5.9833, - "step": 6170 - }, - { - "epoch": 3.218252933507171, - "grad_norm": 1.5483949184417725, - "learning_rate": 9.430251256281407e-05, - "loss": 5.8001, - "step": 6171 - }, - { - "epoch": 3.21877444589309, - "grad_norm": 1.507005214691162, - "learning_rate": 9.430150753768845e-05, - "loss": 5.8381, - "step": 6172 - }, - { - "epoch": 3.2192959582790093, - "grad_norm": 1.5757869482040405, - "learning_rate": 9.430050251256281e-05, - "loss": 5.9566, - "step": 6173 - }, - { - "epoch": 3.2198174706649283, - "grad_norm": 1.5093121528625488, - "learning_rate": 9.429949748743719e-05, - "loss": 5.8179, - "step": 6174 - }, - { - "epoch": 3.2203389830508473, - "grad_norm": 1.487396001815796, - "learning_rate": 9.429849246231155e-05, - "loss": 6.0699, - "step": 6175 - }, - { - "epoch": 3.220860495436767, - "grad_norm": 1.5610569715499878, - "learning_rate": 9.429748743718593e-05, - "loss": 5.8325, - "step": 6176 - }, - { - "epoch": 3.221382007822686, - "grad_norm": 1.5405446290969849, - "learning_rate": 9.429648241206031e-05, - "loss": 5.539, - "step": 6177 - }, - { - "epoch": 3.221903520208605, - "grad_norm": 1.6973882913589478, - "learning_rate": 9.429547738693469e-05, - "loss": 5.1859, - "step": 6178 - }, - { - "epoch": 3.2224250325945243, - "grad_norm": 1.4973224401474, - "learning_rate": 9.429447236180905e-05, - "loss": 6.0823, - "step": 6179 - }, - { - "epoch": 3.2229465449804433, - "grad_norm": 1.4997273683547974, - "learning_rate": 9.429346733668342e-05, - "loss": 5.795, - "step": 6180 - }, - { - "epoch": 3.2234680573663623, - "grad_norm": 1.463489294052124, - "learning_rate": 9.42924623115578e-05, - "loss": 5.5135, - "step": 6181 - }, - { - "epoch": 3.223989569752282, - "grad_norm": 1.7521713972091675, - "learning_rate": 9.429145728643216e-05, - "loss": 5.8986, - "step": 6182 - }, - { - "epoch": 3.224511082138201, - "grad_norm": 1.4429104328155518, - "learning_rate": 9.429045226130654e-05, - "loss": 6.2022, - "step": 6183 - }, - { - "epoch": 3.22503259452412, - "grad_norm": 1.5415095090866089, - "learning_rate": 9.42894472361809e-05, - "loss": 5.8418, - "step": 6184 - }, - { - "epoch": 3.2255541069100393, - "grad_norm": 1.4995697736740112, - "learning_rate": 9.428844221105528e-05, - "loss": 5.501, - "step": 6185 - }, - { - "epoch": 3.2260756192959583, - "grad_norm": 1.4442633390426636, - "learning_rate": 9.428743718592966e-05, - "loss": 5.9899, - "step": 6186 - }, - { - "epoch": 3.2265971316818773, - "grad_norm": 1.5890473127365112, - "learning_rate": 9.428643216080403e-05, - "loss": 5.6381, - "step": 6187 - }, - { - "epoch": 3.227118644067797, - "grad_norm": 1.4777387380599976, - "learning_rate": 9.42854271356784e-05, - "loss": 5.3432, - "step": 6188 - }, - { - "epoch": 3.227640156453716, - "grad_norm": 1.4187445640563965, - "learning_rate": 9.428442211055278e-05, - "loss": 5.7174, - "step": 6189 - }, - { - "epoch": 3.228161668839635, - "grad_norm": 1.4590586423873901, - "learning_rate": 9.428341708542714e-05, - "loss": 5.5079, - "step": 6190 - }, - { - "epoch": 3.2286831812255543, - "grad_norm": 1.3635677099227905, - "learning_rate": 9.428241206030152e-05, - "loss": 5.7444, - "step": 6191 - }, - { - "epoch": 3.2292046936114733, - "grad_norm": 1.4760468006134033, - "learning_rate": 9.428140703517588e-05, - "loss": 5.7087, - "step": 6192 - }, - { - "epoch": 3.2297262059973924, - "grad_norm": 1.5209574699401855, - "learning_rate": 9.428040201005025e-05, - "loss": 5.0851, - "step": 6193 - }, - { - "epoch": 3.2302477183833114, - "grad_norm": 1.878067970275879, - "learning_rate": 9.427939698492463e-05, - "loss": 5.6347, - "step": 6194 - }, - { - "epoch": 3.230769230769231, - "grad_norm": 1.3821834325790405, - "learning_rate": 9.427839195979899e-05, - "loss": 5.679, - "step": 6195 - }, - { - "epoch": 3.23129074315515, - "grad_norm": 1.4105803966522217, - "learning_rate": 9.427738693467337e-05, - "loss": 5.7259, - "step": 6196 - }, - { - "epoch": 3.231812255541069, - "grad_norm": 1.5469595193862915, - "learning_rate": 9.427638190954775e-05, - "loss": 5.6666, - "step": 6197 - }, - { - "epoch": 3.2323337679269883, - "grad_norm": 1.4934452772140503, - "learning_rate": 9.427537688442212e-05, - "loss": 5.738, - "step": 6198 - }, - { - "epoch": 3.2328552803129074, - "grad_norm": 1.5218204259872437, - "learning_rate": 9.427437185929649e-05, - "loss": 5.7836, - "step": 6199 - }, - { - "epoch": 3.2333767926988264, - "grad_norm": 1.5871773958206177, - "learning_rate": 9.427336683417087e-05, - "loss": 5.9141, - "step": 6200 - }, - { - "epoch": 3.233898305084746, - "grad_norm": 1.7273310422897339, - "learning_rate": 9.427236180904523e-05, - "loss": 5.7793, - "step": 6201 - }, - { - "epoch": 3.234419817470665, - "grad_norm": 1.4747353792190552, - "learning_rate": 9.427135678391961e-05, - "loss": 5.7821, - "step": 6202 - }, - { - "epoch": 3.234941329856584, - "grad_norm": 1.597123384475708, - "learning_rate": 9.427035175879397e-05, - "loss": 5.7171, - "step": 6203 - }, - { - "epoch": 3.2354628422425034, - "grad_norm": 1.7552379369735718, - "learning_rate": 9.426934673366835e-05, - "loss": 5.4173, - "step": 6204 - }, - { - "epoch": 3.2359843546284224, - "grad_norm": 1.599637508392334, - "learning_rate": 9.426834170854271e-05, - "loss": 5.8695, - "step": 6205 - }, - { - "epoch": 3.2365058670143414, - "grad_norm": 1.4572144746780396, - "learning_rate": 9.426733668341708e-05, - "loss": 5.8079, - "step": 6206 - }, - { - "epoch": 3.237027379400261, - "grad_norm": 1.5812299251556396, - "learning_rate": 9.426633165829146e-05, - "loss": 5.6675, - "step": 6207 - }, - { - "epoch": 3.23754889178618, - "grad_norm": 1.8448617458343506, - "learning_rate": 9.426532663316583e-05, - "loss": 5.1875, - "step": 6208 - }, - { - "epoch": 3.238070404172099, - "grad_norm": 1.4621716737747192, - "learning_rate": 9.426432160804021e-05, - "loss": 5.9924, - "step": 6209 - }, - { - "epoch": 3.2385919165580184, - "grad_norm": 1.5728709697723389, - "learning_rate": 9.426331658291458e-05, - "loss": 5.5001, - "step": 6210 - }, - { - "epoch": 3.2391134289439374, - "grad_norm": 1.4774038791656494, - "learning_rate": 9.426231155778895e-05, - "loss": 5.579, - "step": 6211 - }, - { - "epoch": 3.2396349413298564, - "grad_norm": 1.3596423864364624, - "learning_rate": 9.426130653266332e-05, - "loss": 5.9127, - "step": 6212 - }, - { - "epoch": 3.240156453715776, - "grad_norm": 1.4205951690673828, - "learning_rate": 9.42603015075377e-05, - "loss": 6.031, - "step": 6213 - }, - { - "epoch": 3.240677966101695, - "grad_norm": 1.4626179933547974, - "learning_rate": 9.425929648241206e-05, - "loss": 5.7473, - "step": 6214 - }, - { - "epoch": 3.241199478487614, - "grad_norm": 1.5989692211151123, - "learning_rate": 9.425829145728644e-05, - "loss": 6.1115, - "step": 6215 - }, - { - "epoch": 3.2417209908735334, - "grad_norm": 1.9289218187332153, - "learning_rate": 9.42572864321608e-05, - "loss": 5.3352, - "step": 6216 - }, - { - "epoch": 3.2422425032594524, - "grad_norm": 1.7610050439834595, - "learning_rate": 9.425628140703518e-05, - "loss": 5.5337, - "step": 6217 - }, - { - "epoch": 3.2427640156453714, - "grad_norm": 1.6252809762954712, - "learning_rate": 9.425527638190956e-05, - "loss": 5.7601, - "step": 6218 - }, - { - "epoch": 3.243285528031291, - "grad_norm": 2.0292553901672363, - "learning_rate": 9.425427135678392e-05, - "loss": 5.61, - "step": 6219 - }, - { - "epoch": 3.24380704041721, - "grad_norm": 1.5304805040359497, - "learning_rate": 9.42532663316583e-05, - "loss": 5.5402, - "step": 6220 - }, - { - "epoch": 3.244328552803129, - "grad_norm": 2.130779266357422, - "learning_rate": 9.425226130653266e-05, - "loss": 5.5874, - "step": 6221 - }, - { - "epoch": 3.2448500651890484, - "grad_norm": 1.6013003587722778, - "learning_rate": 9.425125628140704e-05, - "loss": 5.8521, - "step": 6222 - }, - { - "epoch": 3.2453715775749674, - "grad_norm": 1.3950397968292236, - "learning_rate": 9.42502512562814e-05, - "loss": 6.0946, - "step": 6223 - }, - { - "epoch": 3.2458930899608864, - "grad_norm": 1.4838836193084717, - "learning_rate": 9.424924623115578e-05, - "loss": 5.8705, - "step": 6224 - }, - { - "epoch": 3.246414602346806, - "grad_norm": 1.5574755668640137, - "learning_rate": 9.424824120603015e-05, - "loss": 5.2937, - "step": 6225 - }, - { - "epoch": 3.246936114732725, - "grad_norm": 1.6120214462280273, - "learning_rate": 9.424723618090453e-05, - "loss": 5.7608, - "step": 6226 - }, - { - "epoch": 3.247457627118644, - "grad_norm": 2.0517330169677734, - "learning_rate": 9.424623115577889e-05, - "loss": 5.3524, - "step": 6227 - }, - { - "epoch": 3.2479791395045634, - "grad_norm": 1.5514570474624634, - "learning_rate": 9.424522613065327e-05, - "loss": 5.5999, - "step": 6228 - }, - { - "epoch": 3.2485006518904824, - "grad_norm": 1.4709736108779907, - "learning_rate": 9.424422110552765e-05, - "loss": 5.0958, - "step": 6229 - }, - { - "epoch": 3.2490221642764014, - "grad_norm": 1.4478611946105957, - "learning_rate": 9.424321608040202e-05, - "loss": 5.348, - "step": 6230 - }, - { - "epoch": 3.249543676662321, - "grad_norm": 1.3686213493347168, - "learning_rate": 9.424221105527639e-05, - "loss": 5.5376, - "step": 6231 - }, - { - "epoch": 3.25006518904824, - "grad_norm": 1.3932489156723022, - "learning_rate": 9.424120603015077e-05, - "loss": 5.7222, - "step": 6232 - }, - { - "epoch": 3.250586701434159, - "grad_norm": 1.4102818965911865, - "learning_rate": 9.424020100502513e-05, - "loss": 5.9446, - "step": 6233 - }, - { - "epoch": 3.2511082138200784, - "grad_norm": 1.4870766401290894, - "learning_rate": 9.42391959798995e-05, - "loss": 5.6183, - "step": 6234 - }, - { - "epoch": 3.2516297262059974, - "grad_norm": 1.5106326341629028, - "learning_rate": 9.423819095477387e-05, - "loss": 5.3844, - "step": 6235 - }, - { - "epoch": 3.2521512385919165, - "grad_norm": 1.4735982418060303, - "learning_rate": 9.423718592964824e-05, - "loss": 5.4775, - "step": 6236 - }, - { - "epoch": 3.252672750977836, - "grad_norm": 1.5121105909347534, - "learning_rate": 9.423618090452261e-05, - "loss": 5.6267, - "step": 6237 - }, - { - "epoch": 3.253194263363755, - "grad_norm": 1.3464893102645874, - "learning_rate": 9.423517587939699e-05, - "loss": 5.8976, - "step": 6238 - }, - { - "epoch": 3.253715775749674, - "grad_norm": 1.4475207328796387, - "learning_rate": 9.423417085427137e-05, - "loss": 6.142, - "step": 6239 - }, - { - "epoch": 3.2542372881355934, - "grad_norm": 1.5315498113632202, - "learning_rate": 9.423316582914573e-05, - "loss": 5.5301, - "step": 6240 - }, - { - "epoch": 3.2547588005215125, - "grad_norm": 1.4776184558868408, - "learning_rate": 9.423216080402011e-05, - "loss": 6.0038, - "step": 6241 - }, - { - "epoch": 3.2552803129074315, - "grad_norm": 1.9797841310501099, - "learning_rate": 9.423115577889448e-05, - "loss": 5.6206, - "step": 6242 - }, - { - "epoch": 3.255801825293351, - "grad_norm": 1.600041151046753, - "learning_rate": 9.423015075376885e-05, - "loss": 5.4084, - "step": 6243 - }, - { - "epoch": 3.25632333767927, - "grad_norm": 1.8355685472488403, - "learning_rate": 9.422914572864322e-05, - "loss": 5.497, - "step": 6244 - }, - { - "epoch": 3.256844850065189, - "grad_norm": 1.7045159339904785, - "learning_rate": 9.42281407035176e-05, - "loss": 5.5723, - "step": 6245 - }, - { - "epoch": 3.2573663624511084, - "grad_norm": 1.6835495233535767, - "learning_rate": 9.422713567839196e-05, - "loss": 5.5444, - "step": 6246 - }, - { - "epoch": 3.2578878748370275, - "grad_norm": 1.5292305946350098, - "learning_rate": 9.422613065326632e-05, - "loss": 5.8151, - "step": 6247 - }, - { - "epoch": 3.2584093872229465, - "grad_norm": 1.321090817451477, - "learning_rate": 9.42251256281407e-05, - "loss": 5.5419, - "step": 6248 - }, - { - "epoch": 3.258930899608866, - "grad_norm": 1.6109527349472046, - "learning_rate": 9.422412060301508e-05, - "loss": 5.3523, - "step": 6249 - }, - { - "epoch": 3.259452411994785, - "grad_norm": 1.4249200820922852, - "learning_rate": 9.422311557788946e-05, - "loss": 5.9587, - "step": 6250 - }, - { - "epoch": 3.259973924380704, - "grad_norm": 1.5037729740142822, - "learning_rate": 9.422211055276382e-05, - "loss": 5.8155, - "step": 6251 - }, - { - "epoch": 3.2604954367666235, - "grad_norm": 1.536786437034607, - "learning_rate": 9.42211055276382e-05, - "loss": 5.7871, - "step": 6252 - }, - { - "epoch": 3.2610169491525425, - "grad_norm": 1.4061353206634521, - "learning_rate": 9.422010050251256e-05, - "loss": 5.838, - "step": 6253 - }, - { - "epoch": 3.2615384615384615, - "grad_norm": 1.4665380716323853, - "learning_rate": 9.421909547738694e-05, - "loss": 5.5961, - "step": 6254 - }, - { - "epoch": 3.2620599739243805, - "grad_norm": 1.559973120689392, - "learning_rate": 9.421809045226131e-05, - "loss": 5.863, - "step": 6255 - }, - { - "epoch": 3.2625814863103, - "grad_norm": 1.5399184226989746, - "learning_rate": 9.421708542713568e-05, - "loss": 5.9697, - "step": 6256 - }, - { - "epoch": 3.263102998696219, - "grad_norm": 1.605275273323059, - "learning_rate": 9.421608040201005e-05, - "loss": 5.6683, - "step": 6257 - }, - { - "epoch": 3.263624511082138, - "grad_norm": 1.561556100845337, - "learning_rate": 9.421507537688443e-05, - "loss": 5.604, - "step": 6258 - }, - { - "epoch": 3.2641460234680575, - "grad_norm": 1.568371057510376, - "learning_rate": 9.42140703517588e-05, - "loss": 5.4717, - "step": 6259 - }, - { - "epoch": 3.2646675358539765, - "grad_norm": 1.5287699699401855, - "learning_rate": 9.421306532663317e-05, - "loss": 5.5309, - "step": 6260 - }, - { - "epoch": 3.2651890482398955, - "grad_norm": 1.4565407037734985, - "learning_rate": 9.421206030150755e-05, - "loss": 5.991, - "step": 6261 - }, - { - "epoch": 3.265710560625815, - "grad_norm": 1.5254547595977783, - "learning_rate": 9.421105527638191e-05, - "loss": 5.6364, - "step": 6262 - }, - { - "epoch": 3.266232073011734, - "grad_norm": 1.5330129861831665, - "learning_rate": 9.421005025125629e-05, - "loss": 5.8375, - "step": 6263 - }, - { - "epoch": 3.266753585397653, - "grad_norm": 1.5813093185424805, - "learning_rate": 9.420904522613065e-05, - "loss": 5.5621, - "step": 6264 - }, - { - "epoch": 3.2672750977835725, - "grad_norm": 1.4711748361587524, - "learning_rate": 9.420804020100503e-05, - "loss": 5.9392, - "step": 6265 - }, - { - "epoch": 3.2677966101694915, - "grad_norm": 1.3868285417556763, - "learning_rate": 9.42070351758794e-05, - "loss": 6.0265, - "step": 6266 - }, - { - "epoch": 3.2683181225554105, - "grad_norm": 1.5267021656036377, - "learning_rate": 9.420603015075377e-05, - "loss": 5.3824, - "step": 6267 - }, - { - "epoch": 3.26883963494133, - "grad_norm": 1.511199951171875, - "learning_rate": 9.420502512562814e-05, - "loss": 5.5165, - "step": 6268 - }, - { - "epoch": 3.269361147327249, - "grad_norm": 1.5234041213989258, - "learning_rate": 9.420402010050252e-05, - "loss": 5.3421, - "step": 6269 - }, - { - "epoch": 3.269882659713168, - "grad_norm": 1.406374216079712, - "learning_rate": 9.420301507537689e-05, - "loss": 5.9479, - "step": 6270 - }, - { - "epoch": 3.2704041720990875, - "grad_norm": 1.6851351261138916, - "learning_rate": 9.420201005025127e-05, - "loss": 5.4759, - "step": 6271 - }, - { - "epoch": 3.2709256844850065, - "grad_norm": 1.625986099243164, - "learning_rate": 9.420100502512564e-05, - "loss": 5.9714, - "step": 6272 - }, - { - "epoch": 3.2714471968709256, - "grad_norm": 1.6122866868972778, - "learning_rate": 9.42e-05, - "loss": 5.7673, - "step": 6273 - }, - { - "epoch": 3.271968709256845, - "grad_norm": 1.575973391532898, - "learning_rate": 9.419899497487438e-05, - "loss": 5.3833, - "step": 6274 - }, - { - "epoch": 3.272490221642764, - "grad_norm": 1.5484036207199097, - "learning_rate": 9.419798994974874e-05, - "loss": 5.8943, - "step": 6275 - }, - { - "epoch": 3.273011734028683, - "grad_norm": 1.5021764039993286, - "learning_rate": 9.419698492462312e-05, - "loss": 5.7167, - "step": 6276 - }, - { - "epoch": 3.2735332464146025, - "grad_norm": 1.3298256397247314, - "learning_rate": 9.419597989949748e-05, - "loss": 5.8269, - "step": 6277 - }, - { - "epoch": 3.2740547588005215, - "grad_norm": 1.4415249824523926, - "learning_rate": 9.419497487437186e-05, - "loss": 5.8175, - "step": 6278 - }, - { - "epoch": 3.2745762711864406, - "grad_norm": 1.510022521018982, - "learning_rate": 9.419396984924624e-05, - "loss": 5.9399, - "step": 6279 - }, - { - "epoch": 3.27509778357236, - "grad_norm": 1.5080536603927612, - "learning_rate": 9.419296482412062e-05, - "loss": 6.111, - "step": 6280 - }, - { - "epoch": 3.275619295958279, - "grad_norm": 1.4757399559020996, - "learning_rate": 9.419195979899498e-05, - "loss": 5.955, - "step": 6281 - }, - { - "epoch": 3.276140808344198, - "grad_norm": 1.520505428314209, - "learning_rate": 9.419095477386936e-05, - "loss": 5.5962, - "step": 6282 - }, - { - "epoch": 3.276662320730117, - "grad_norm": 1.5172866582870483, - "learning_rate": 9.418994974874372e-05, - "loss": 5.8843, - "step": 6283 - }, - { - "epoch": 3.2771838331160366, - "grad_norm": 1.4995776414871216, - "learning_rate": 9.41889447236181e-05, - "loss": 5.85, - "step": 6284 - }, - { - "epoch": 3.2777053455019556, - "grad_norm": 1.474346399307251, - "learning_rate": 9.418793969849247e-05, - "loss": 5.6411, - "step": 6285 - }, - { - "epoch": 3.2782268578878746, - "grad_norm": 1.4377052783966064, - "learning_rate": 9.418693467336683e-05, - "loss": 6.004, - "step": 6286 - }, - { - "epoch": 3.278748370273794, - "grad_norm": 1.3709311485290527, - "learning_rate": 9.418592964824121e-05, - "loss": 6.1462, - "step": 6287 - }, - { - "epoch": 3.279269882659713, - "grad_norm": 1.5570496320724487, - "learning_rate": 9.418492462311557e-05, - "loss": 5.7198, - "step": 6288 - }, - { - "epoch": 3.279791395045632, - "grad_norm": 1.5457420349121094, - "learning_rate": 9.418391959798995e-05, - "loss": 5.7194, - "step": 6289 - }, - { - "epoch": 3.2803129074315516, - "grad_norm": 1.5750361680984497, - "learning_rate": 9.418291457286433e-05, - "loss": 5.8121, - "step": 6290 - }, - { - "epoch": 3.2808344198174706, - "grad_norm": 1.5564978122711182, - "learning_rate": 9.41819095477387e-05, - "loss": 6.1313, - "step": 6291 - }, - { - "epoch": 3.2813559322033896, - "grad_norm": 1.36670982837677, - "learning_rate": 9.418090452261307e-05, - "loss": 5.7456, - "step": 6292 - }, - { - "epoch": 3.281877444589309, - "grad_norm": 1.5152801275253296, - "learning_rate": 9.417989949748745e-05, - "loss": 5.859, - "step": 6293 - }, - { - "epoch": 3.282398956975228, - "grad_norm": 1.5515871047973633, - "learning_rate": 9.417889447236181e-05, - "loss": 5.7316, - "step": 6294 - }, - { - "epoch": 3.282920469361147, - "grad_norm": 1.3828035593032837, - "learning_rate": 9.417788944723619e-05, - "loss": 5.8897, - "step": 6295 - }, - { - "epoch": 3.2834419817470666, - "grad_norm": 1.3857958316802979, - "learning_rate": 9.417688442211055e-05, - "loss": 6.0004, - "step": 6296 - }, - { - "epoch": 3.2839634941329856, - "grad_norm": 1.5058804750442505, - "learning_rate": 9.417587939698493e-05, - "loss": 5.8873, - "step": 6297 - }, - { - "epoch": 3.2844850065189046, - "grad_norm": 1.6497896909713745, - "learning_rate": 9.41748743718593e-05, - "loss": 5.5469, - "step": 6298 - }, - { - "epoch": 3.285006518904824, - "grad_norm": 1.5003020763397217, - "learning_rate": 9.417386934673367e-05, - "loss": 5.9339, - "step": 6299 - }, - { - "epoch": 3.285528031290743, - "grad_norm": 1.633744239807129, - "learning_rate": 9.417286432160805e-05, - "loss": 5.5447, - "step": 6300 - }, - { - "epoch": 3.286049543676662, - "grad_norm": 1.451022744178772, - "learning_rate": 9.417185929648242e-05, - "loss": 6.1105, - "step": 6301 - }, - { - "epoch": 3.2865710560625816, - "grad_norm": 1.4520457983016968, - "learning_rate": 9.41708542713568e-05, - "loss": 5.7056, - "step": 6302 - }, - { - "epoch": 3.2870925684485006, - "grad_norm": 1.5921359062194824, - "learning_rate": 9.416984924623116e-05, - "loss": 5.5969, - "step": 6303 - }, - { - "epoch": 3.2876140808344196, - "grad_norm": 1.4548046588897705, - "learning_rate": 9.416884422110554e-05, - "loss": 5.8507, - "step": 6304 - }, - { - "epoch": 3.288135593220339, - "grad_norm": 1.4354314804077148, - "learning_rate": 9.41678391959799e-05, - "loss": 5.9224, - "step": 6305 - }, - { - "epoch": 3.288657105606258, - "grad_norm": 1.568544864654541, - "learning_rate": 9.416683417085428e-05, - "loss": 5.6933, - "step": 6306 - }, - { - "epoch": 3.289178617992177, - "grad_norm": 1.408516526222229, - "learning_rate": 9.416582914572864e-05, - "loss": 5.3399, - "step": 6307 - }, - { - "epoch": 3.2897001303780966, - "grad_norm": 1.516940951347351, - "learning_rate": 9.416482412060302e-05, - "loss": 6.087, - "step": 6308 - }, - { - "epoch": 3.2902216427640156, - "grad_norm": 1.546155333518982, - "learning_rate": 9.416381909547738e-05, - "loss": 5.8785, - "step": 6309 - }, - { - "epoch": 3.2907431551499347, - "grad_norm": 1.515830159187317, - "learning_rate": 9.416281407035176e-05, - "loss": 5.8744, - "step": 6310 - }, - { - "epoch": 3.291264667535854, - "grad_norm": 1.4667038917541504, - "learning_rate": 9.416180904522614e-05, - "loss": 5.7633, - "step": 6311 - }, - { - "epoch": 3.291786179921773, - "grad_norm": 1.4612380266189575, - "learning_rate": 9.41608040201005e-05, - "loss": 5.5035, - "step": 6312 - }, - { - "epoch": 3.292307692307692, - "grad_norm": 1.676938533782959, - "learning_rate": 9.415979899497488e-05, - "loss": 5.3423, - "step": 6313 - }, - { - "epoch": 3.2928292046936116, - "grad_norm": 1.6605854034423828, - "learning_rate": 9.415879396984925e-05, - "loss": 6.0312, - "step": 6314 - }, - { - "epoch": 3.2933507170795306, - "grad_norm": 1.5755916833877563, - "learning_rate": 9.415778894472362e-05, - "loss": 5.7056, - "step": 6315 - }, - { - "epoch": 3.2938722294654497, - "grad_norm": 1.5381625890731812, - "learning_rate": 9.415678391959799e-05, - "loss": 5.7779, - "step": 6316 - }, - { - "epoch": 3.294393741851369, - "grad_norm": 1.5037357807159424, - "learning_rate": 9.415577889447237e-05, - "loss": 5.9729, - "step": 6317 - }, - { - "epoch": 3.294915254237288, - "grad_norm": 1.6524542570114136, - "learning_rate": 9.415477386934673e-05, - "loss": 5.197, - "step": 6318 - }, - { - "epoch": 3.295436766623207, - "grad_norm": 1.7889353036880493, - "learning_rate": 9.415376884422111e-05, - "loss": 5.5201, - "step": 6319 - }, - { - "epoch": 3.2959582790091266, - "grad_norm": 1.4422242641448975, - "learning_rate": 9.415276381909549e-05, - "loss": 5.5314, - "step": 6320 - }, - { - "epoch": 3.2964797913950457, - "grad_norm": 1.4173004627227783, - "learning_rate": 9.415175879396986e-05, - "loss": 6.0459, - "step": 6321 - }, - { - "epoch": 3.2970013037809647, - "grad_norm": 1.5218665599822998, - "learning_rate": 9.415075376884423e-05, - "loss": 5.8927, - "step": 6322 - }, - { - "epoch": 3.297522816166884, - "grad_norm": 1.5410388708114624, - "learning_rate": 9.41497487437186e-05, - "loss": 5.8328, - "step": 6323 - }, - { - "epoch": 3.298044328552803, - "grad_norm": 1.4395819902420044, - "learning_rate": 9.414874371859297e-05, - "loss": 5.5142, - "step": 6324 - }, - { - "epoch": 3.298565840938722, - "grad_norm": 1.6833597421646118, - "learning_rate": 9.414773869346735e-05, - "loss": 5.1359, - "step": 6325 - }, - { - "epoch": 3.2990873533246416, - "grad_norm": 1.6448813676834106, - "learning_rate": 9.414673366834171e-05, - "loss": 5.4546, - "step": 6326 - }, - { - "epoch": 3.2996088657105607, - "grad_norm": 1.4420127868652344, - "learning_rate": 9.414572864321608e-05, - "loss": 5.8197, - "step": 6327 - }, - { - "epoch": 3.3001303780964797, - "grad_norm": 1.7373369932174683, - "learning_rate": 9.414472361809045e-05, - "loss": 5.5324, - "step": 6328 - }, - { - "epoch": 3.300651890482399, - "grad_norm": 1.632926106452942, - "learning_rate": 9.414371859296482e-05, - "loss": 4.6859, - "step": 6329 - }, - { - "epoch": 3.301173402868318, - "grad_norm": 1.5780954360961914, - "learning_rate": 9.41427135678392e-05, - "loss": 5.6283, - "step": 6330 - }, - { - "epoch": 3.301694915254237, - "grad_norm": 1.4100595712661743, - "learning_rate": 9.414170854271357e-05, - "loss": 5.762, - "step": 6331 - }, - { - "epoch": 3.3022164276401567, - "grad_norm": 1.4546133279800415, - "learning_rate": 9.414070351758795e-05, - "loss": 5.5743, - "step": 6332 - }, - { - "epoch": 3.3027379400260757, - "grad_norm": 1.309650182723999, - "learning_rate": 9.413969849246232e-05, - "loss": 5.936, - "step": 6333 - }, - { - "epoch": 3.3032594524119947, - "grad_norm": 1.7646243572235107, - "learning_rate": 9.41386934673367e-05, - "loss": 5.2422, - "step": 6334 - }, - { - "epoch": 3.303780964797914, - "grad_norm": 1.6041548252105713, - "learning_rate": 9.413768844221106e-05, - "loss": 5.9187, - "step": 6335 - }, - { - "epoch": 3.304302477183833, - "grad_norm": 1.5420531034469604, - "learning_rate": 9.413668341708544e-05, - "loss": 6.1333, - "step": 6336 - }, - { - "epoch": 3.304823989569752, - "grad_norm": 1.463961124420166, - "learning_rate": 9.41356783919598e-05, - "loss": 5.7786, - "step": 6337 - }, - { - "epoch": 3.3053455019556717, - "grad_norm": 1.6471130847930908, - "learning_rate": 9.413467336683418e-05, - "loss": 5.2185, - "step": 6338 - }, - { - "epoch": 3.3058670143415907, - "grad_norm": 1.5845367908477783, - "learning_rate": 9.413366834170854e-05, - "loss": 5.9985, - "step": 6339 - }, - { - "epoch": 3.3063885267275097, - "grad_norm": 1.623010277748108, - "learning_rate": 9.413266331658292e-05, - "loss": 5.1589, - "step": 6340 - }, - { - "epoch": 3.306910039113429, - "grad_norm": 1.464404582977295, - "learning_rate": 9.41316582914573e-05, - "loss": 5.8392, - "step": 6341 - }, - { - "epoch": 3.307431551499348, - "grad_norm": 1.41786789894104, - "learning_rate": 9.413065326633166e-05, - "loss": 6.0195, - "step": 6342 - }, - { - "epoch": 3.307953063885267, - "grad_norm": 1.5298436880111694, - "learning_rate": 9.412964824120604e-05, - "loss": 5.4, - "step": 6343 - }, - { - "epoch": 3.3084745762711867, - "grad_norm": 1.4281961917877197, - "learning_rate": 9.41286432160804e-05, - "loss": 5.7986, - "step": 6344 - }, - { - "epoch": 3.3089960886571057, - "grad_norm": 1.468389868736267, - "learning_rate": 9.412763819095478e-05, - "loss": 5.7825, - "step": 6345 - }, - { - "epoch": 3.3095176010430247, - "grad_norm": 1.503420114517212, - "learning_rate": 9.412663316582915e-05, - "loss": 5.8598, - "step": 6346 - }, - { - "epoch": 3.3100391134289437, - "grad_norm": 1.594232201576233, - "learning_rate": 9.412562814070352e-05, - "loss": 5.7053, - "step": 6347 - }, - { - "epoch": 3.310560625814863, - "grad_norm": 1.617573618888855, - "learning_rate": 9.412462311557789e-05, - "loss": 5.9103, - "step": 6348 - }, - { - "epoch": 3.3110821382007822, - "grad_norm": 1.515859603881836, - "learning_rate": 9.412361809045227e-05, - "loss": 5.7651, - "step": 6349 - }, - { - "epoch": 3.3116036505867013, - "grad_norm": 1.4639352560043335, - "learning_rate": 9.412261306532663e-05, - "loss": 6.1045, - "step": 6350 - }, - { - "epoch": 3.3121251629726207, - "grad_norm": 1.443021535873413, - "learning_rate": 9.412160804020101e-05, - "loss": 5.7979, - "step": 6351 - }, - { - "epoch": 3.3126466753585397, - "grad_norm": 1.6071858406066895, - "learning_rate": 9.412060301507539e-05, - "loss": 5.7652, - "step": 6352 - }, - { - "epoch": 3.3131681877444588, - "grad_norm": 1.4958491325378418, - "learning_rate": 9.411959798994975e-05, - "loss": 5.5006, - "step": 6353 - }, - { - "epoch": 3.3136897001303782, - "grad_norm": 1.4827463626861572, - "learning_rate": 9.411859296482413e-05, - "loss": 5.3885, - "step": 6354 - }, - { - "epoch": 3.3142112125162972, - "grad_norm": 1.5755757093429565, - "learning_rate": 9.41175879396985e-05, - "loss": 5.8804, - "step": 6355 - }, - { - "epoch": 3.3147327249022163, - "grad_norm": 1.6555750370025635, - "learning_rate": 9.411658291457287e-05, - "loss": 6.029, - "step": 6356 - }, - { - "epoch": 3.3152542372881357, - "grad_norm": 1.5130627155303955, - "learning_rate": 9.411557788944724e-05, - "loss": 6.1876, - "step": 6357 - }, - { - "epoch": 3.3157757496740548, - "grad_norm": 1.5127336978912354, - "learning_rate": 9.411457286432161e-05, - "loss": 5.6832, - "step": 6358 - }, - { - "epoch": 3.3162972620599738, - "grad_norm": 1.5517032146453857, - "learning_rate": 9.411356783919598e-05, - "loss": 5.6401, - "step": 6359 - }, - { - "epoch": 3.3168187744458932, - "grad_norm": 1.5533636808395386, - "learning_rate": 9.411256281407036e-05, - "loss": 5.794, - "step": 6360 - }, - { - "epoch": 3.3173402868318123, - "grad_norm": 1.5055230855941772, - "learning_rate": 9.411155778894472e-05, - "loss": 5.4846, - "step": 6361 - }, - { - "epoch": 3.3178617992177313, - "grad_norm": 1.5254874229431152, - "learning_rate": 9.41105527638191e-05, - "loss": 5.553, - "step": 6362 - }, - { - "epoch": 3.3183833116036507, - "grad_norm": 1.4541101455688477, - "learning_rate": 9.410954773869348e-05, - "loss": 5.512, - "step": 6363 - }, - { - "epoch": 3.3189048239895698, - "grad_norm": 1.4146724939346313, - "learning_rate": 9.410854271356785e-05, - "loss": 5.8886, - "step": 6364 - }, - { - "epoch": 3.319426336375489, - "grad_norm": 1.3644574880599976, - "learning_rate": 9.410753768844222e-05, - "loss": 6.21, - "step": 6365 - }, - { - "epoch": 3.3199478487614082, - "grad_norm": 1.454546570777893, - "learning_rate": 9.410653266331658e-05, - "loss": 6.128, - "step": 6366 - }, - { - "epoch": 3.3204693611473273, - "grad_norm": 1.4316980838775635, - "learning_rate": 9.410552763819096e-05, - "loss": 5.9424, - "step": 6367 - }, - { - "epoch": 3.3209908735332463, - "grad_norm": 1.4441211223602295, - "learning_rate": 9.410452261306532e-05, - "loss": 5.8854, - "step": 6368 - }, - { - "epoch": 3.3215123859191658, - "grad_norm": 1.5681601762771606, - "learning_rate": 9.41035175879397e-05, - "loss": 5.5148, - "step": 6369 - }, - { - "epoch": 3.3220338983050848, - "grad_norm": 1.6701209545135498, - "learning_rate": 9.410251256281407e-05, - "loss": 5.7655, - "step": 6370 - }, - { - "epoch": 3.322555410691004, - "grad_norm": 1.4533343315124512, - "learning_rate": 9.410150753768844e-05, - "loss": 6.0016, - "step": 6371 - }, - { - "epoch": 3.3230769230769233, - "grad_norm": 1.57914137840271, - "learning_rate": 9.410050251256282e-05, - "loss": 5.161, - "step": 6372 - }, - { - "epoch": 3.3235984354628423, - "grad_norm": 1.5068385601043701, - "learning_rate": 9.40994974874372e-05, - "loss": 5.0814, - "step": 6373 - }, - { - "epoch": 3.3241199478487613, - "grad_norm": 1.561515212059021, - "learning_rate": 9.409849246231156e-05, - "loss": 5.7566, - "step": 6374 - }, - { - "epoch": 3.3246414602346808, - "grad_norm": 1.3420336246490479, - "learning_rate": 9.409748743718594e-05, - "loss": 6.1102, - "step": 6375 - }, - { - "epoch": 3.3251629726206, - "grad_norm": 1.457801342010498, - "learning_rate": 9.40964824120603e-05, - "loss": 5.6199, - "step": 6376 - }, - { - "epoch": 3.325684485006519, - "grad_norm": 1.455648422241211, - "learning_rate": 9.409547738693468e-05, - "loss": 5.6653, - "step": 6377 - }, - { - "epoch": 3.326205997392438, - "grad_norm": 1.5172191858291626, - "learning_rate": 9.409447236180905e-05, - "loss": 5.2972, - "step": 6378 - }, - { - "epoch": 3.3267275097783573, - "grad_norm": 1.5300291776657104, - "learning_rate": 9.409346733668341e-05, - "loss": 5.4847, - "step": 6379 - }, - { - "epoch": 3.3272490221642763, - "grad_norm": 1.5404859781265259, - "learning_rate": 9.409246231155779e-05, - "loss": 5.604, - "step": 6380 - }, - { - "epoch": 3.3277705345501953, - "grad_norm": 1.6615705490112305, - "learning_rate": 9.409145728643215e-05, - "loss": 5.5327, - "step": 6381 - }, - { - "epoch": 3.328292046936115, - "grad_norm": 1.4334287643432617, - "learning_rate": 9.409045226130653e-05, - "loss": 5.7275, - "step": 6382 - }, - { - "epoch": 3.328813559322034, - "grad_norm": 1.4742642641067505, - "learning_rate": 9.408944723618091e-05, - "loss": 5.8876, - "step": 6383 - }, - { - "epoch": 3.329335071707953, - "grad_norm": 1.4157058000564575, - "learning_rate": 9.408844221105529e-05, - "loss": 5.9304, - "step": 6384 - }, - { - "epoch": 3.3298565840938723, - "grad_norm": 1.482864260673523, - "learning_rate": 9.408743718592965e-05, - "loss": 5.686, - "step": 6385 - }, - { - "epoch": 3.3303780964797913, - "grad_norm": 1.4490660429000854, - "learning_rate": 9.408643216080403e-05, - "loss": 5.7379, - "step": 6386 - }, - { - "epoch": 3.3308996088657103, - "grad_norm": 1.3994752168655396, - "learning_rate": 9.40854271356784e-05, - "loss": 6.064, - "step": 6387 - }, - { - "epoch": 3.33142112125163, - "grad_norm": 1.5044711828231812, - "learning_rate": 9.408442211055277e-05, - "loss": 5.9965, - "step": 6388 - }, - { - "epoch": 3.331942633637549, - "grad_norm": 1.583184003829956, - "learning_rate": 9.408341708542714e-05, - "loss": 5.3363, - "step": 6389 - }, - { - "epoch": 3.332464146023468, - "grad_norm": 1.439503788948059, - "learning_rate": 9.408241206030151e-05, - "loss": 5.6403, - "step": 6390 - }, - { - "epoch": 3.3329856584093873, - "grad_norm": 1.473559856414795, - "learning_rate": 9.408140703517588e-05, - "loss": 5.6616, - "step": 6391 - }, - { - "epoch": 3.3335071707953063, - "grad_norm": 1.3508695363998413, - "learning_rate": 9.408040201005026e-05, - "loss": 6.0295, - "step": 6392 - }, - { - "epoch": 3.3340286831812254, - "grad_norm": 1.5003570318222046, - "learning_rate": 9.407939698492463e-05, - "loss": 5.7976, - "step": 6393 - }, - { - "epoch": 3.334550195567145, - "grad_norm": 1.38614821434021, - "learning_rate": 9.4078391959799e-05, - "loss": 5.8254, - "step": 6394 - }, - { - "epoch": 3.335071707953064, - "grad_norm": 1.4649542570114136, - "learning_rate": 9.407738693467338e-05, - "loss": 5.6401, - "step": 6395 - }, - { - "epoch": 3.335593220338983, - "grad_norm": 1.518234372138977, - "learning_rate": 9.407638190954774e-05, - "loss": 5.7988, - "step": 6396 - }, - { - "epoch": 3.3361147327249023, - "grad_norm": 1.447389841079712, - "learning_rate": 9.407537688442212e-05, - "loss": 5.836, - "step": 6397 - }, - { - "epoch": 3.3366362451108214, - "grad_norm": 1.6171414852142334, - "learning_rate": 9.407437185929648e-05, - "loss": 5.657, - "step": 6398 - }, - { - "epoch": 3.3371577574967404, - "grad_norm": 1.524062156677246, - "learning_rate": 9.407336683417086e-05, - "loss": 5.655, - "step": 6399 - }, - { - "epoch": 3.33767926988266, - "grad_norm": 1.4842967987060547, - "learning_rate": 9.407236180904522e-05, - "loss": 5.5815, - "step": 6400 - }, - { - "epoch": 3.338200782268579, - "grad_norm": 1.468058705329895, - "learning_rate": 9.40713567839196e-05, - "loss": 5.9616, - "step": 6401 - }, - { - "epoch": 3.338722294654498, - "grad_norm": 1.4441412687301636, - "learning_rate": 9.407035175879397e-05, - "loss": 5.9398, - "step": 6402 - }, - { - "epoch": 3.3392438070404173, - "grad_norm": 1.589705467224121, - "learning_rate": 9.406934673366834e-05, - "loss": 5.4792, - "step": 6403 - }, - { - "epoch": 3.3397653194263364, - "grad_norm": 1.590893268585205, - "learning_rate": 9.406834170854272e-05, - "loss": 5.1545, - "step": 6404 - }, - { - "epoch": 3.3402868318122554, - "grad_norm": 1.415752649307251, - "learning_rate": 9.406733668341709e-05, - "loss": 5.5264, - "step": 6405 - }, - { - "epoch": 3.340808344198175, - "grad_norm": 1.5852222442626953, - "learning_rate": 9.406633165829146e-05, - "loss": 5.748, - "step": 6406 - }, - { - "epoch": 3.341329856584094, - "grad_norm": 1.490362524986267, - "learning_rate": 9.406532663316583e-05, - "loss": 5.8458, - "step": 6407 - }, - { - "epoch": 3.341851368970013, - "grad_norm": 1.492091178894043, - "learning_rate": 9.40643216080402e-05, - "loss": 5.3645, - "step": 6408 - }, - { - "epoch": 3.3423728813559324, - "grad_norm": 1.5137943029403687, - "learning_rate": 9.406331658291457e-05, - "loss": 5.9262, - "step": 6409 - }, - { - "epoch": 3.3428943937418514, - "grad_norm": 1.6387317180633545, - "learning_rate": 9.406231155778895e-05, - "loss": 5.7096, - "step": 6410 - }, - { - "epoch": 3.3434159061277704, - "grad_norm": 1.3785731792449951, - "learning_rate": 9.406130653266331e-05, - "loss": 5.525, - "step": 6411 - }, - { - "epoch": 3.34393741851369, - "grad_norm": 1.3889933824539185, - "learning_rate": 9.406030150753769e-05, - "loss": 5.8437, - "step": 6412 - }, - { - "epoch": 3.344458930899609, - "grad_norm": 1.3700517416000366, - "learning_rate": 9.405929648241207e-05, - "loss": 5.885, - "step": 6413 - }, - { - "epoch": 3.344980443285528, - "grad_norm": 1.4141086339950562, - "learning_rate": 9.405829145728645e-05, - "loss": 5.9811, - "step": 6414 - }, - { - "epoch": 3.3455019556714474, - "grad_norm": 1.4639480113983154, - "learning_rate": 9.405728643216081e-05, - "loss": 6.0593, - "step": 6415 - }, - { - "epoch": 3.3460234680573664, - "grad_norm": 1.5258677005767822, - "learning_rate": 9.405628140703519e-05, - "loss": 5.813, - "step": 6416 - }, - { - "epoch": 3.3465449804432854, - "grad_norm": 1.773355484008789, - "learning_rate": 9.405527638190955e-05, - "loss": 5.2558, - "step": 6417 - }, - { - "epoch": 3.347066492829205, - "grad_norm": 1.4043976068496704, - "learning_rate": 9.405427135678393e-05, - "loss": 6.0572, - "step": 6418 - }, - { - "epoch": 3.347588005215124, - "grad_norm": 1.3784087896347046, - "learning_rate": 9.40532663316583e-05, - "loss": 5.7787, - "step": 6419 - }, - { - "epoch": 3.348109517601043, - "grad_norm": 1.4332334995269775, - "learning_rate": 9.405226130653266e-05, - "loss": 5.7922, - "step": 6420 - }, - { - "epoch": 3.3486310299869624, - "grad_norm": 1.5401835441589355, - "learning_rate": 9.405125628140704e-05, - "loss": 5.1052, - "step": 6421 - }, - { - "epoch": 3.3491525423728814, - "grad_norm": 1.4492846727371216, - "learning_rate": 9.40502512562814e-05, - "loss": 5.6992, - "step": 6422 - }, - { - "epoch": 3.3496740547588004, - "grad_norm": 1.3719035387039185, - "learning_rate": 9.404924623115578e-05, - "loss": 5.9632, - "step": 6423 - }, - { - "epoch": 3.35019556714472, - "grad_norm": 1.5205992460250854, - "learning_rate": 9.404824120603016e-05, - "loss": 5.8245, - "step": 6424 - }, - { - "epoch": 3.350717079530639, - "grad_norm": 1.5881708860397339, - "learning_rate": 9.404723618090453e-05, - "loss": 5.6207, - "step": 6425 - }, - { - "epoch": 3.351238591916558, - "grad_norm": 1.5960251092910767, - "learning_rate": 9.40462311557789e-05, - "loss": 5.5833, - "step": 6426 - }, - { - "epoch": 3.3517601043024774, - "grad_norm": 1.4527392387390137, - "learning_rate": 9.404522613065328e-05, - "loss": 5.8865, - "step": 6427 - }, - { - "epoch": 3.3522816166883964, - "grad_norm": 1.6216917037963867, - "learning_rate": 9.404422110552764e-05, - "loss": 5.0898, - "step": 6428 - }, - { - "epoch": 3.3528031290743154, - "grad_norm": 1.5218192338943481, - "learning_rate": 9.404321608040202e-05, - "loss": 6.2305, - "step": 6429 - }, - { - "epoch": 3.353324641460235, - "grad_norm": 1.7974135875701904, - "learning_rate": 9.404221105527638e-05, - "loss": 5.3839, - "step": 6430 - }, - { - "epoch": 3.353846153846154, - "grad_norm": 1.4692200422286987, - "learning_rate": 9.404120603015076e-05, - "loss": 5.8771, - "step": 6431 - }, - { - "epoch": 3.354367666232073, - "grad_norm": 1.4745904207229614, - "learning_rate": 9.404020100502513e-05, - "loss": 5.6909, - "step": 6432 - }, - { - "epoch": 3.3548891786179924, - "grad_norm": 1.6213864088058472, - "learning_rate": 9.40391959798995e-05, - "loss": 5.9198, - "step": 6433 - }, - { - "epoch": 3.3554106910039114, - "grad_norm": 1.6370612382888794, - "learning_rate": 9.403819095477388e-05, - "loss": 5.6473, - "step": 6434 - }, - { - "epoch": 3.3559322033898304, - "grad_norm": 1.5853809118270874, - "learning_rate": 9.403718592964825e-05, - "loss": 5.4175, - "step": 6435 - }, - { - "epoch": 3.35645371577575, - "grad_norm": 1.519562840461731, - "learning_rate": 9.403618090452262e-05, - "loss": 5.8427, - "step": 6436 - }, - { - "epoch": 3.356975228161669, - "grad_norm": 1.610128402709961, - "learning_rate": 9.403517587939699e-05, - "loss": 5.8247, - "step": 6437 - }, - { - "epoch": 3.357496740547588, - "grad_norm": 1.520509123802185, - "learning_rate": 9.403417085427137e-05, - "loss": 5.4194, - "step": 6438 - }, - { - "epoch": 3.3580182529335074, - "grad_norm": 1.6838332414627075, - "learning_rate": 9.403316582914573e-05, - "loss": 5.5234, - "step": 6439 - }, - { - "epoch": 3.3585397653194264, - "grad_norm": 1.7237896919250488, - "learning_rate": 9.403216080402011e-05, - "loss": 5.2745, - "step": 6440 - }, - { - "epoch": 3.3590612777053455, - "grad_norm": 1.5761135816574097, - "learning_rate": 9.403115577889447e-05, - "loss": 5.6603, - "step": 6441 - }, - { - "epoch": 3.3595827900912645, - "grad_norm": 1.7206511497497559, - "learning_rate": 9.403015075376885e-05, - "loss": 5.4105, - "step": 6442 - }, - { - "epoch": 3.360104302477184, - "grad_norm": 1.7045494318008423, - "learning_rate": 9.402914572864321e-05, - "loss": 5.6755, - "step": 6443 - }, - { - "epoch": 3.360625814863103, - "grad_norm": 1.617280125617981, - "learning_rate": 9.402814070351759e-05, - "loss": 5.2217, - "step": 6444 - }, - { - "epoch": 3.361147327249022, - "grad_norm": 1.490423321723938, - "learning_rate": 9.402713567839197e-05, - "loss": 5.6791, - "step": 6445 - }, - { - "epoch": 3.3616688396349415, - "grad_norm": 1.5271817445755005, - "learning_rate": 9.402613065326633e-05, - "loss": 5.9129, - "step": 6446 - }, - { - "epoch": 3.3621903520208605, - "grad_norm": 1.3382233381271362, - "learning_rate": 9.402512562814071e-05, - "loss": 5.9247, - "step": 6447 - }, - { - "epoch": 3.3627118644067795, - "grad_norm": 1.5612832307815552, - "learning_rate": 9.402412060301508e-05, - "loss": 5.3969, - "step": 6448 - }, - { - "epoch": 3.363233376792699, - "grad_norm": 1.5798447132110596, - "learning_rate": 9.402311557788945e-05, - "loss": 5.9233, - "step": 6449 - }, - { - "epoch": 3.363754889178618, - "grad_norm": 1.377052664756775, - "learning_rate": 9.402211055276382e-05, - "loss": 6.0737, - "step": 6450 - }, - { - "epoch": 3.364276401564537, - "grad_norm": 1.3942419290542603, - "learning_rate": 9.40211055276382e-05, - "loss": 6.023, - "step": 6451 - }, - { - "epoch": 3.3647979139504565, - "grad_norm": 1.417812705039978, - "learning_rate": 9.402010050251256e-05, - "loss": 6.1282, - "step": 6452 - }, - { - "epoch": 3.3653194263363755, - "grad_norm": 1.3771721124649048, - "learning_rate": 9.401909547738694e-05, - "loss": 5.6269, - "step": 6453 - }, - { - "epoch": 3.3658409387222945, - "grad_norm": 1.456269383430481, - "learning_rate": 9.401809045226132e-05, - "loss": 6.0227, - "step": 6454 - }, - { - "epoch": 3.366362451108214, - "grad_norm": 1.4427556991577148, - "learning_rate": 9.40170854271357e-05, - "loss": 5.5068, - "step": 6455 - }, - { - "epoch": 3.366883963494133, - "grad_norm": 1.510710597038269, - "learning_rate": 9.401608040201006e-05, - "loss": 5.8937, - "step": 6456 - }, - { - "epoch": 3.367405475880052, - "grad_norm": 1.6357951164245605, - "learning_rate": 9.401507537688444e-05, - "loss": 5.8729, - "step": 6457 - }, - { - "epoch": 3.3679269882659715, - "grad_norm": 1.6245770454406738, - "learning_rate": 9.40140703517588e-05, - "loss": 5.7556, - "step": 6458 - }, - { - "epoch": 3.3684485006518905, - "grad_norm": 1.7717359066009521, - "learning_rate": 9.401306532663316e-05, - "loss": 5.8173, - "step": 6459 - }, - { - "epoch": 3.3689700130378095, - "grad_norm": 1.8022582530975342, - "learning_rate": 9.401206030150754e-05, - "loss": 4.9304, - "step": 6460 - }, - { - "epoch": 3.369491525423729, - "grad_norm": 1.3410917520523071, - "learning_rate": 9.40110552763819e-05, - "loss": 5.3652, - "step": 6461 - }, - { - "epoch": 3.370013037809648, - "grad_norm": 1.4655345678329468, - "learning_rate": 9.401005025125628e-05, - "loss": 6.0267, - "step": 6462 - }, - { - "epoch": 3.370534550195567, - "grad_norm": 1.4135409593582153, - "learning_rate": 9.400904522613065e-05, - "loss": 5.74, - "step": 6463 - }, - { - "epoch": 3.3710560625814865, - "grad_norm": 1.4556366205215454, - "learning_rate": 9.400804020100503e-05, - "loss": 5.7254, - "step": 6464 - }, - { - "epoch": 3.3715775749674055, - "grad_norm": 1.4365060329437256, - "learning_rate": 9.40070351758794e-05, - "loss": 5.8429, - "step": 6465 - }, - { - "epoch": 3.3720990873533245, - "grad_norm": 1.479904055595398, - "learning_rate": 9.400603015075378e-05, - "loss": 5.8722, - "step": 6466 - }, - { - "epoch": 3.372620599739244, - "grad_norm": 1.4162895679473877, - "learning_rate": 9.400502512562815e-05, - "loss": 6.2072, - "step": 6467 - }, - { - "epoch": 3.373142112125163, - "grad_norm": 1.381284236907959, - "learning_rate": 9.400402010050252e-05, - "loss": 5.2954, - "step": 6468 - }, - { - "epoch": 3.373663624511082, - "grad_norm": 1.4764337539672852, - "learning_rate": 9.400301507537689e-05, - "loss": 5.9313, - "step": 6469 - }, - { - "epoch": 3.374185136897001, - "grad_norm": 1.445488691329956, - "learning_rate": 9.400201005025127e-05, - "loss": 5.6215, - "step": 6470 - }, - { - "epoch": 3.3747066492829205, - "grad_norm": 1.3670355081558228, - "learning_rate": 9.400100502512563e-05, - "loss": 5.8369, - "step": 6471 - }, - { - "epoch": 3.3752281616688395, - "grad_norm": 1.4261252880096436, - "learning_rate": 9.4e-05, - "loss": 6.0461, - "step": 6472 - }, - { - "epoch": 3.3757496740547586, - "grad_norm": 1.5171678066253662, - "learning_rate": 9.399899497487437e-05, - "loss": 5.4054, - "step": 6473 - }, - { - "epoch": 3.376271186440678, - "grad_norm": 1.752041220664978, - "learning_rate": 9.399798994974875e-05, - "loss": 5.5749, - "step": 6474 - }, - { - "epoch": 3.376792698826597, - "grad_norm": 1.5243462324142456, - "learning_rate": 9.399698492462313e-05, - "loss": 5.3607, - "step": 6475 - }, - { - "epoch": 3.377314211212516, - "grad_norm": 1.493709921836853, - "learning_rate": 9.399597989949749e-05, - "loss": 5.7654, - "step": 6476 - }, - { - "epoch": 3.3778357235984355, - "grad_norm": 1.4140244722366333, - "learning_rate": 9.399497487437187e-05, - "loss": 5.4763, - "step": 6477 - }, - { - "epoch": 3.3783572359843546, - "grad_norm": 1.474185824394226, - "learning_rate": 9.399396984924623e-05, - "loss": 5.7317, - "step": 6478 - }, - { - "epoch": 3.3788787483702736, - "grad_norm": 1.5064306259155273, - "learning_rate": 9.399296482412061e-05, - "loss": 5.9905, - "step": 6479 - }, - { - "epoch": 3.379400260756193, - "grad_norm": 1.48843514919281, - "learning_rate": 9.399195979899498e-05, - "loss": 5.7787, - "step": 6480 - }, - { - "epoch": 3.379921773142112, - "grad_norm": 1.448557734489441, - "learning_rate": 9.399095477386935e-05, - "loss": 5.6171, - "step": 6481 - }, - { - "epoch": 3.380443285528031, - "grad_norm": 1.4944267272949219, - "learning_rate": 9.398994974874372e-05, - "loss": 5.8917, - "step": 6482 - }, - { - "epoch": 3.3809647979139505, - "grad_norm": 1.4344645738601685, - "learning_rate": 9.39889447236181e-05, - "loss": 5.8834, - "step": 6483 - }, - { - "epoch": 3.3814863102998696, - "grad_norm": 1.4446418285369873, - "learning_rate": 9.398793969849246e-05, - "loss": 5.7758, - "step": 6484 - }, - { - "epoch": 3.3820078226857886, - "grad_norm": 1.5224469900131226, - "learning_rate": 9.398693467336684e-05, - "loss": 6.1677, - "step": 6485 - }, - { - "epoch": 3.382529335071708, - "grad_norm": 1.6215016841888428, - "learning_rate": 9.398592964824122e-05, - "loss": 5.537, - "step": 6486 - }, - { - "epoch": 3.383050847457627, - "grad_norm": 1.5413540601730347, - "learning_rate": 9.398492462311558e-05, - "loss": 5.9245, - "step": 6487 - }, - { - "epoch": 3.383572359843546, - "grad_norm": 1.5161811113357544, - "learning_rate": 9.398391959798996e-05, - "loss": 5.6298, - "step": 6488 - }, - { - "epoch": 3.3840938722294656, - "grad_norm": 1.4435259103775024, - "learning_rate": 9.398291457286432e-05, - "loss": 6.1302, - "step": 6489 - }, - { - "epoch": 3.3846153846153846, - "grad_norm": 1.6425511837005615, - "learning_rate": 9.39819095477387e-05, - "loss": 5.3557, - "step": 6490 - }, - { - "epoch": 3.3851368970013036, - "grad_norm": 1.4682636260986328, - "learning_rate": 9.398090452261306e-05, - "loss": 5.6554, - "step": 6491 - }, - { - "epoch": 3.385658409387223, - "grad_norm": 1.4465373754501343, - "learning_rate": 9.397989949748744e-05, - "loss": 5.4763, - "step": 6492 - }, - { - "epoch": 3.386179921773142, - "grad_norm": 1.519806146621704, - "learning_rate": 9.397889447236181e-05, - "loss": 5.9476, - "step": 6493 - }, - { - "epoch": 3.386701434159061, - "grad_norm": 1.5260028839111328, - "learning_rate": 9.397788944723618e-05, - "loss": 5.5217, - "step": 6494 - }, - { - "epoch": 3.3872229465449806, - "grad_norm": 1.82406747341156, - "learning_rate": 9.397688442211056e-05, - "loss": 4.9978, - "step": 6495 - }, - { - "epoch": 3.3877444589308996, - "grad_norm": 1.5789798498153687, - "learning_rate": 9.397587939698494e-05, - "loss": 5.2265, - "step": 6496 - }, - { - "epoch": 3.3882659713168186, - "grad_norm": 1.605324625968933, - "learning_rate": 9.39748743718593e-05, - "loss": 5.3143, - "step": 6497 - }, - { - "epoch": 3.388787483702738, - "grad_norm": 1.4530377388000488, - "learning_rate": 9.397386934673368e-05, - "loss": 6.0444, - "step": 6498 - }, - { - "epoch": 3.389308996088657, - "grad_norm": 1.5502046346664429, - "learning_rate": 9.397286432160805e-05, - "loss": 5.7193, - "step": 6499 - }, - { - "epoch": 3.389830508474576, - "grad_norm": 1.5165114402770996, - "learning_rate": 9.397185929648241e-05, - "loss": 5.3546, - "step": 6500 - }, - { - "epoch": 3.3903520208604956, - "grad_norm": 1.553282380104065, - "learning_rate": 9.397085427135679e-05, - "loss": 6.0829, - "step": 6501 - }, - { - "epoch": 3.3908735332464146, - "grad_norm": 1.8076362609863281, - "learning_rate": 9.396984924623115e-05, - "loss": 5.6219, - "step": 6502 - }, - { - "epoch": 3.3913950456323336, - "grad_norm": 1.6001783609390259, - "learning_rate": 9.396884422110553e-05, - "loss": 5.7384, - "step": 6503 - }, - { - "epoch": 3.391916558018253, - "grad_norm": 1.4761345386505127, - "learning_rate": 9.39678391959799e-05, - "loss": 5.4711, - "step": 6504 - }, - { - "epoch": 3.392438070404172, - "grad_norm": 1.4391748905181885, - "learning_rate": 9.396683417085427e-05, - "loss": 5.7536, - "step": 6505 - }, - { - "epoch": 3.392959582790091, - "grad_norm": 1.533321738243103, - "learning_rate": 9.396582914572865e-05, - "loss": 5.9117, - "step": 6506 - }, - { - "epoch": 3.3934810951760106, - "grad_norm": 1.4328850507736206, - "learning_rate": 9.396482412060303e-05, - "loss": 5.7987, - "step": 6507 - }, - { - "epoch": 3.3940026075619296, - "grad_norm": 1.489643931388855, - "learning_rate": 9.396381909547739e-05, - "loss": 6.01, - "step": 6508 - }, - { - "epoch": 3.3945241199478486, - "grad_norm": 1.563175916671753, - "learning_rate": 9.396281407035177e-05, - "loss": 5.6983, - "step": 6509 - }, - { - "epoch": 3.395045632333768, - "grad_norm": 1.548081636428833, - "learning_rate": 9.396180904522614e-05, - "loss": 5.7492, - "step": 6510 - }, - { - "epoch": 3.395567144719687, - "grad_norm": 1.541178584098816, - "learning_rate": 9.396080402010051e-05, - "loss": 5.8637, - "step": 6511 - }, - { - "epoch": 3.396088657105606, - "grad_norm": 1.621647834777832, - "learning_rate": 9.395979899497488e-05, - "loss": 5.3276, - "step": 6512 - }, - { - "epoch": 3.3966101694915256, - "grad_norm": 1.3239518404006958, - "learning_rate": 9.395879396984924e-05, - "loss": 6.1462, - "step": 6513 - }, - { - "epoch": 3.3971316818774446, - "grad_norm": 1.3908641338348389, - "learning_rate": 9.395778894472362e-05, - "loss": 5.8648, - "step": 6514 - }, - { - "epoch": 3.3976531942633637, - "grad_norm": 1.498016595840454, - "learning_rate": 9.3956783919598e-05, - "loss": 5.8555, - "step": 6515 - }, - { - "epoch": 3.398174706649283, - "grad_norm": 1.5761386156082153, - "learning_rate": 9.395577889447237e-05, - "loss": 6.0078, - "step": 6516 - }, - { - "epoch": 3.398696219035202, - "grad_norm": 1.7212343215942383, - "learning_rate": 9.395477386934674e-05, - "loss": 5.2859, - "step": 6517 - }, - { - "epoch": 3.399217731421121, - "grad_norm": 1.5033856630325317, - "learning_rate": 9.395376884422112e-05, - "loss": 5.7599, - "step": 6518 - }, - { - "epoch": 3.3997392438070406, - "grad_norm": 1.6325711011886597, - "learning_rate": 9.395276381909548e-05, - "loss": 5.7241, - "step": 6519 - }, - { - "epoch": 3.4002607561929596, - "grad_norm": 1.7897790670394897, - "learning_rate": 9.395175879396986e-05, - "loss": 5.9737, - "step": 6520 - }, - { - "epoch": 3.4007822685788787, - "grad_norm": 1.4545655250549316, - "learning_rate": 9.395075376884422e-05, - "loss": 5.4988, - "step": 6521 - }, - { - "epoch": 3.401303780964798, - "grad_norm": 1.5968705415725708, - "learning_rate": 9.39497487437186e-05, - "loss": 5.6458, - "step": 6522 - }, - { - "epoch": 3.401825293350717, - "grad_norm": 1.4853588342666626, - "learning_rate": 9.394874371859297e-05, - "loss": 6.0466, - "step": 6523 - }, - { - "epoch": 3.402346805736636, - "grad_norm": 1.5399503707885742, - "learning_rate": 9.394773869346734e-05, - "loss": 5.9619, - "step": 6524 - }, - { - "epoch": 3.4028683181225556, - "grad_norm": 1.882503628730774, - "learning_rate": 9.394673366834171e-05, - "loss": 5.6956, - "step": 6525 - }, - { - "epoch": 3.4033898305084747, - "grad_norm": 1.6144382953643799, - "learning_rate": 9.394572864321609e-05, - "loss": 5.5976, - "step": 6526 - }, - { - "epoch": 3.4039113428943937, - "grad_norm": 1.5639595985412598, - "learning_rate": 9.394472361809046e-05, - "loss": 5.8781, - "step": 6527 - }, - { - "epoch": 3.404432855280313, - "grad_norm": 1.5439175367355347, - "learning_rate": 9.394371859296483e-05, - "loss": 5.8828, - "step": 6528 - }, - { - "epoch": 3.404954367666232, - "grad_norm": 1.5717159509658813, - "learning_rate": 9.39427135678392e-05, - "loss": 5.7316, - "step": 6529 - }, - { - "epoch": 3.405475880052151, - "grad_norm": 1.4159138202667236, - "learning_rate": 9.394170854271357e-05, - "loss": 6.0799, - "step": 6530 - }, - { - "epoch": 3.4059973924380706, - "grad_norm": 1.426571011543274, - "learning_rate": 9.394070351758795e-05, - "loss": 6.1232, - "step": 6531 - }, - { - "epoch": 3.4065189048239897, - "grad_norm": 1.424534797668457, - "learning_rate": 9.393969849246231e-05, - "loss": 5.9758, - "step": 6532 - }, - { - "epoch": 3.4070404172099087, - "grad_norm": 1.558273434638977, - "learning_rate": 9.393869346733669e-05, - "loss": 5.9933, - "step": 6533 - }, - { - "epoch": 3.4075619295958277, - "grad_norm": 1.4258747100830078, - "learning_rate": 9.393768844221105e-05, - "loss": 6.088, - "step": 6534 - }, - { - "epoch": 3.408083441981747, - "grad_norm": 1.4185106754302979, - "learning_rate": 9.393668341708543e-05, - "loss": 5.5963, - "step": 6535 - }, - { - "epoch": 3.408604954367666, - "grad_norm": 1.3699321746826172, - "learning_rate": 9.39356783919598e-05, - "loss": 5.5412, - "step": 6536 - }, - { - "epoch": 3.409126466753585, - "grad_norm": 1.414751648902893, - "learning_rate": 9.393467336683417e-05, - "loss": 5.8081, - "step": 6537 - }, - { - "epoch": 3.4096479791395047, - "grad_norm": 1.5143978595733643, - "learning_rate": 9.393366834170855e-05, - "loss": 5.9845, - "step": 6538 - }, - { - "epoch": 3.4101694915254237, - "grad_norm": 1.5359721183776855, - "learning_rate": 9.393266331658292e-05, - "loss": 5.924, - "step": 6539 - }, - { - "epoch": 3.4106910039113427, - "grad_norm": 1.9311178922653198, - "learning_rate": 9.39316582914573e-05, - "loss": 4.8777, - "step": 6540 - }, - { - "epoch": 3.411212516297262, - "grad_norm": 1.5462849140167236, - "learning_rate": 9.393065326633166e-05, - "loss": 5.5404, - "step": 6541 - }, - { - "epoch": 3.411734028683181, - "grad_norm": 1.6157095432281494, - "learning_rate": 9.392964824120604e-05, - "loss": 5.6673, - "step": 6542 - }, - { - "epoch": 3.4122555410691002, - "grad_norm": 1.5912237167358398, - "learning_rate": 9.39286432160804e-05, - "loss": 5.4115, - "step": 6543 - }, - { - "epoch": 3.4127770534550197, - "grad_norm": 1.451368808746338, - "learning_rate": 9.392763819095478e-05, - "loss": 5.9312, - "step": 6544 - }, - { - "epoch": 3.4132985658409387, - "grad_norm": 1.5446959733963013, - "learning_rate": 9.392663316582914e-05, - "loss": 5.761, - "step": 6545 - }, - { - "epoch": 3.4138200782268577, - "grad_norm": 1.6865609884262085, - "learning_rate": 9.392562814070352e-05, - "loss": 5.8478, - "step": 6546 - }, - { - "epoch": 3.414341590612777, - "grad_norm": 1.4885061979293823, - "learning_rate": 9.39246231155779e-05, - "loss": 6.0549, - "step": 6547 - }, - { - "epoch": 3.414863102998696, - "grad_norm": 1.610979437828064, - "learning_rate": 9.392361809045228e-05, - "loss": 5.7821, - "step": 6548 - }, - { - "epoch": 3.4153846153846152, - "grad_norm": 1.5412813425064087, - "learning_rate": 9.392261306532664e-05, - "loss": 5.5426, - "step": 6549 - }, - { - "epoch": 3.4159061277705347, - "grad_norm": 1.6531035900115967, - "learning_rate": 9.392160804020102e-05, - "loss": 5.5665, - "step": 6550 - }, - { - "epoch": 3.4164276401564537, - "grad_norm": 1.7348772287368774, - "learning_rate": 9.392060301507538e-05, - "loss": 5.4497, - "step": 6551 - }, - { - "epoch": 3.4169491525423727, - "grad_norm": 1.4983173608779907, - "learning_rate": 9.391959798994975e-05, - "loss": 5.9426, - "step": 6552 - }, - { - "epoch": 3.417470664928292, - "grad_norm": 1.392858862876892, - "learning_rate": 9.391859296482412e-05, - "loss": 5.7955, - "step": 6553 - }, - { - "epoch": 3.4179921773142112, - "grad_norm": 1.4265427589416504, - "learning_rate": 9.391758793969849e-05, - "loss": 5.5697, - "step": 6554 - }, - { - "epoch": 3.4185136897001303, - "grad_norm": 1.582919955253601, - "learning_rate": 9.391658291457287e-05, - "loss": 5.4613, - "step": 6555 - }, - { - "epoch": 3.4190352020860497, - "grad_norm": 1.8276000022888184, - "learning_rate": 9.391557788944723e-05, - "loss": 4.9726, - "step": 6556 - }, - { - "epoch": 3.4195567144719687, - "grad_norm": 1.6476658582687378, - "learning_rate": 9.391457286432161e-05, - "loss": 5.5401, - "step": 6557 - }, - { - "epoch": 3.4200782268578878, - "grad_norm": 1.3307180404663086, - "learning_rate": 9.391356783919599e-05, - "loss": 6.0084, - "step": 6558 - }, - { - "epoch": 3.4205997392438072, - "grad_norm": 1.3770465850830078, - "learning_rate": 9.391256281407036e-05, - "loss": 5.7313, - "step": 6559 - }, - { - "epoch": 3.4211212516297262, - "grad_norm": 1.4043294191360474, - "learning_rate": 9.391155778894473e-05, - "loss": 6.1037, - "step": 6560 - }, - { - "epoch": 3.4216427640156453, - "grad_norm": 1.3740513324737549, - "learning_rate": 9.39105527638191e-05, - "loss": 5.842, - "step": 6561 - }, - { - "epoch": 3.4221642764015647, - "grad_norm": 1.4339752197265625, - "learning_rate": 9.390954773869347e-05, - "loss": 5.5815, - "step": 6562 - }, - { - "epoch": 3.4226857887874838, - "grad_norm": 1.508982539176941, - "learning_rate": 9.390854271356785e-05, - "loss": 5.7548, - "step": 6563 - }, - { - "epoch": 3.4232073011734028, - "grad_norm": 1.3650470972061157, - "learning_rate": 9.390753768844221e-05, - "loss": 5.8276, - "step": 6564 - }, - { - "epoch": 3.423728813559322, - "grad_norm": 1.4773926734924316, - "learning_rate": 9.390653266331658e-05, - "loss": 5.7884, - "step": 6565 - }, - { - "epoch": 3.4242503259452413, - "grad_norm": 1.4572947025299072, - "learning_rate": 9.390552763819095e-05, - "loss": 5.658, - "step": 6566 - }, - { - "epoch": 3.4247718383311603, - "grad_norm": 1.6104774475097656, - "learning_rate": 9.390452261306533e-05, - "loss": 5.6263, - "step": 6567 - }, - { - "epoch": 3.4252933507170793, - "grad_norm": 1.599902629852295, - "learning_rate": 9.390351758793971e-05, - "loss": 5.3852, - "step": 6568 - }, - { - "epoch": 3.4258148631029988, - "grad_norm": 1.4869414567947388, - "learning_rate": 9.390251256281407e-05, - "loss": 5.8993, - "step": 6569 - }, - { - "epoch": 3.426336375488918, - "grad_norm": 1.6254209280014038, - "learning_rate": 9.390150753768845e-05, - "loss": 5.5851, - "step": 6570 - }, - { - "epoch": 3.426857887874837, - "grad_norm": 1.516729712486267, - "learning_rate": 9.390050251256282e-05, - "loss": 5.9488, - "step": 6571 - }, - { - "epoch": 3.4273794002607563, - "grad_norm": 1.5659940242767334, - "learning_rate": 9.38994974874372e-05, - "loss": 5.7598, - "step": 6572 - }, - { - "epoch": 3.4279009126466753, - "grad_norm": 1.5914667844772339, - "learning_rate": 9.389849246231156e-05, - "loss": 5.8017, - "step": 6573 - }, - { - "epoch": 3.4284224250325943, - "grad_norm": 1.6391860246658325, - "learning_rate": 9.389748743718594e-05, - "loss": 5.5948, - "step": 6574 - }, - { - "epoch": 3.4289439374185138, - "grad_norm": 1.673113226890564, - "learning_rate": 9.38964824120603e-05, - "loss": 5.4251, - "step": 6575 - }, - { - "epoch": 3.429465449804433, - "grad_norm": 1.6293518543243408, - "learning_rate": 9.389547738693468e-05, - "loss": 5.9745, - "step": 6576 - }, - { - "epoch": 3.429986962190352, - "grad_norm": 1.4809448719024658, - "learning_rate": 9.389447236180904e-05, - "loss": 6.0293, - "step": 6577 - }, - { - "epoch": 3.4305084745762713, - "grad_norm": 1.5601394176483154, - "learning_rate": 9.389346733668342e-05, - "loss": 5.4166, - "step": 6578 - }, - { - "epoch": 3.4310299869621903, - "grad_norm": 1.5302101373672485, - "learning_rate": 9.38924623115578e-05, - "loss": 5.4488, - "step": 6579 - }, - { - "epoch": 3.4315514993481093, - "grad_norm": 1.5051119327545166, - "learning_rate": 9.389145728643216e-05, - "loss": 5.895, - "step": 6580 - }, - { - "epoch": 3.432073011734029, - "grad_norm": 1.509310007095337, - "learning_rate": 9.389045226130654e-05, - "loss": 5.8249, - "step": 6581 - }, - { - "epoch": 3.432594524119948, - "grad_norm": 1.4073339700698853, - "learning_rate": 9.38894472361809e-05, - "loss": 5.7177, - "step": 6582 - }, - { - "epoch": 3.433116036505867, - "grad_norm": 1.4969583749771118, - "learning_rate": 9.388844221105528e-05, - "loss": 5.8456, - "step": 6583 - }, - { - "epoch": 3.4336375488917863, - "grad_norm": 1.3901371955871582, - "learning_rate": 9.388743718592965e-05, - "loss": 5.9751, - "step": 6584 - }, - { - "epoch": 3.4341590612777053, - "grad_norm": 1.7261689901351929, - "learning_rate": 9.388643216080402e-05, - "loss": 5.8478, - "step": 6585 - }, - { - "epoch": 3.4346805736636243, - "grad_norm": 1.5623786449432373, - "learning_rate": 9.388542713567839e-05, - "loss": 5.8553, - "step": 6586 - }, - { - "epoch": 3.435202086049544, - "grad_norm": 1.550999402999878, - "learning_rate": 9.388442211055277e-05, - "loss": 5.6507, - "step": 6587 - }, - { - "epoch": 3.435723598435463, - "grad_norm": 1.5739843845367432, - "learning_rate": 9.388341708542714e-05, - "loss": 5.953, - "step": 6588 - }, - { - "epoch": 3.436245110821382, - "grad_norm": 1.5290378332138062, - "learning_rate": 9.388241206030152e-05, - "loss": 5.6624, - "step": 6589 - }, - { - "epoch": 3.4367666232073013, - "grad_norm": 1.5524166822433472, - "learning_rate": 9.388140703517589e-05, - "loss": 5.5577, - "step": 6590 - }, - { - "epoch": 3.4372881355932203, - "grad_norm": 1.6305341720581055, - "learning_rate": 9.388040201005026e-05, - "loss": 5.3048, - "step": 6591 - }, - { - "epoch": 3.4378096479791393, - "grad_norm": 1.6362624168395996, - "learning_rate": 9.387939698492463e-05, - "loss": 5.5064, - "step": 6592 - }, - { - "epoch": 3.438331160365059, - "grad_norm": 1.5737831592559814, - "learning_rate": 9.3878391959799e-05, - "loss": 5.8229, - "step": 6593 - }, - { - "epoch": 3.438852672750978, - "grad_norm": 1.4959999322891235, - "learning_rate": 9.387738693467337e-05, - "loss": 5.936, - "step": 6594 - }, - { - "epoch": 3.439374185136897, - "grad_norm": 1.396419644355774, - "learning_rate": 9.387638190954774e-05, - "loss": 6.1069, - "step": 6595 - }, - { - "epoch": 3.4398956975228163, - "grad_norm": 1.880556344985962, - "learning_rate": 9.387537688442211e-05, - "loss": 5.3327, - "step": 6596 - }, - { - "epoch": 3.4404172099087353, - "grad_norm": 1.6038509607315063, - "learning_rate": 9.387437185929648e-05, - "loss": 5.8171, - "step": 6597 - }, - { - "epoch": 3.4409387222946544, - "grad_norm": 1.666276216506958, - "learning_rate": 9.387336683417086e-05, - "loss": 5.7179, - "step": 6598 - }, - { - "epoch": 3.441460234680574, - "grad_norm": 1.4037463665008545, - "learning_rate": 9.387236180904523e-05, - "loss": 5.7841, - "step": 6599 - }, - { - "epoch": 3.441981747066493, - "grad_norm": 1.4923604726791382, - "learning_rate": 9.387135678391961e-05, - "loss": 5.8195, - "step": 6600 - }, - { - "epoch": 3.442503259452412, - "grad_norm": 1.5877501964569092, - "learning_rate": 9.387035175879398e-05, - "loss": 6.0516, - "step": 6601 - }, - { - "epoch": 3.4430247718383313, - "grad_norm": 1.499282717704773, - "learning_rate": 9.386934673366835e-05, - "loss": 5.5857, - "step": 6602 - }, - { - "epoch": 3.4435462842242504, - "grad_norm": 1.5643787384033203, - "learning_rate": 9.386834170854272e-05, - "loss": 5.4177, - "step": 6603 - }, - { - "epoch": 3.4440677966101694, - "grad_norm": 1.4347288608551025, - "learning_rate": 9.38673366834171e-05, - "loss": 5.8908, - "step": 6604 - }, - { - "epoch": 3.444589308996089, - "grad_norm": 1.5068795680999756, - "learning_rate": 9.386633165829146e-05, - "loss": 5.4803, - "step": 6605 - }, - { - "epoch": 3.445110821382008, - "grad_norm": 1.438913345336914, - "learning_rate": 9.386532663316582e-05, - "loss": 5.8202, - "step": 6606 - }, - { - "epoch": 3.445632333767927, - "grad_norm": 1.4150618314743042, - "learning_rate": 9.38643216080402e-05, - "loss": 5.8957, - "step": 6607 - }, - { - "epoch": 3.4461538461538463, - "grad_norm": 1.514570713043213, - "learning_rate": 9.386331658291458e-05, - "loss": 5.7693, - "step": 6608 - }, - { - "epoch": 3.4466753585397654, - "grad_norm": 1.6335532665252686, - "learning_rate": 9.386231155778896e-05, - "loss": 5.4071, - "step": 6609 - }, - { - "epoch": 3.4471968709256844, - "grad_norm": 1.3357943296432495, - "learning_rate": 9.386130653266332e-05, - "loss": 5.7355, - "step": 6610 - }, - { - "epoch": 3.447718383311604, - "grad_norm": 1.7022035121917725, - "learning_rate": 9.38603015075377e-05, - "loss": 5.4866, - "step": 6611 - }, - { - "epoch": 3.448239895697523, - "grad_norm": 1.5016149282455444, - "learning_rate": 9.385929648241206e-05, - "loss": 5.7188, - "step": 6612 - }, - { - "epoch": 3.448761408083442, - "grad_norm": 1.4516148567199707, - "learning_rate": 9.385829145728644e-05, - "loss": 5.8095, - "step": 6613 - }, - { - "epoch": 3.4492829204693614, - "grad_norm": 1.4787834882736206, - "learning_rate": 9.38572864321608e-05, - "loss": 5.3873, - "step": 6614 - }, - { - "epoch": 3.4498044328552804, - "grad_norm": 1.458290696144104, - "learning_rate": 9.385628140703518e-05, - "loss": 5.9995, - "step": 6615 - }, - { - "epoch": 3.4503259452411994, - "grad_norm": 1.7118620872497559, - "learning_rate": 9.385527638190955e-05, - "loss": 6.0057, - "step": 6616 - }, - { - "epoch": 3.450847457627119, - "grad_norm": 1.298499345779419, - "learning_rate": 9.385427135678393e-05, - "loss": 6.2426, - "step": 6617 - }, - { - "epoch": 3.451368970013038, - "grad_norm": 1.4216827154159546, - "learning_rate": 9.385326633165829e-05, - "loss": 6.0461, - "step": 6618 - }, - { - "epoch": 3.451890482398957, - "grad_norm": 1.681530475616455, - "learning_rate": 9.385226130653267e-05, - "loss": 5.5226, - "step": 6619 - }, - { - "epoch": 3.4524119947848764, - "grad_norm": 1.4151504039764404, - "learning_rate": 9.385125628140705e-05, - "loss": 5.7454, - "step": 6620 - }, - { - "epoch": 3.4529335071707954, - "grad_norm": 1.3347724676132202, - "learning_rate": 9.385025125628141e-05, - "loss": 5.8242, - "step": 6621 - }, - { - "epoch": 3.4534550195567144, - "grad_norm": 1.4965983629226685, - "learning_rate": 9.384924623115579e-05, - "loss": 6.2195, - "step": 6622 - }, - { - "epoch": 3.453976531942634, - "grad_norm": 1.553650975227356, - "learning_rate": 9.384824120603015e-05, - "loss": 5.5846, - "step": 6623 - }, - { - "epoch": 3.454498044328553, - "grad_norm": 1.4847723245620728, - "learning_rate": 9.384723618090453e-05, - "loss": 5.9112, - "step": 6624 - }, - { - "epoch": 3.455019556714472, - "grad_norm": 1.516342043876648, - "learning_rate": 9.38462311557789e-05, - "loss": 5.7741, - "step": 6625 - }, - { - "epoch": 3.4555410691003914, - "grad_norm": 1.5443488359451294, - "learning_rate": 9.384522613065327e-05, - "loss": 5.3263, - "step": 6626 - }, - { - "epoch": 3.4560625814863104, - "grad_norm": 1.4456732273101807, - "learning_rate": 9.384422110552764e-05, - "loss": 6.1292, - "step": 6627 - }, - { - "epoch": 3.4565840938722294, - "grad_norm": 1.902841567993164, - "learning_rate": 9.384321608040201e-05, - "loss": 5.0048, - "step": 6628 - }, - { - "epoch": 3.4571056062581484, - "grad_norm": 1.8350127935409546, - "learning_rate": 9.384221105527639e-05, - "loss": 5.5224, - "step": 6629 - }, - { - "epoch": 3.457627118644068, - "grad_norm": 1.5191256999969482, - "learning_rate": 9.384120603015077e-05, - "loss": 6.043, - "step": 6630 - }, - { - "epoch": 3.458148631029987, - "grad_norm": 2.2479302883148193, - "learning_rate": 9.384020100502513e-05, - "loss": 5.6505, - "step": 6631 - }, - { - "epoch": 3.458670143415906, - "grad_norm": 1.5241508483886719, - "learning_rate": 9.38391959798995e-05, - "loss": 5.8403, - "step": 6632 - }, - { - "epoch": 3.4591916558018254, - "grad_norm": 1.5110018253326416, - "learning_rate": 9.383819095477388e-05, - "loss": 5.7992, - "step": 6633 - }, - { - "epoch": 3.4597131681877444, - "grad_norm": 1.4047322273254395, - "learning_rate": 9.383718592964824e-05, - "loss": 6.1065, - "step": 6634 - }, - { - "epoch": 3.4602346805736635, - "grad_norm": 1.509946346282959, - "learning_rate": 9.383618090452262e-05, - "loss": 5.6617, - "step": 6635 - }, - { - "epoch": 3.460756192959583, - "grad_norm": 1.503730058670044, - "learning_rate": 9.383517587939698e-05, - "loss": 6.0458, - "step": 6636 - }, - { - "epoch": 3.461277705345502, - "grad_norm": 1.4189656972885132, - "learning_rate": 9.383417085427136e-05, - "loss": 5.8626, - "step": 6637 - }, - { - "epoch": 3.461799217731421, - "grad_norm": 1.3749526739120483, - "learning_rate": 9.383316582914572e-05, - "loss": 5.6927, - "step": 6638 - }, - { - "epoch": 3.4623207301173404, - "grad_norm": 1.45967435836792, - "learning_rate": 9.38321608040201e-05, - "loss": 5.5792, - "step": 6639 - }, - { - "epoch": 3.4628422425032594, - "grad_norm": 1.6039050817489624, - "learning_rate": 9.383115577889448e-05, - "loss": 5.8064, - "step": 6640 - }, - { - "epoch": 3.4633637548891785, - "grad_norm": 1.4519388675689697, - "learning_rate": 9.383015075376886e-05, - "loss": 6.2917, - "step": 6641 - }, - { - "epoch": 3.463885267275098, - "grad_norm": 1.4016318321228027, - "learning_rate": 9.382914572864322e-05, - "loss": 5.9879, - "step": 6642 - }, - { - "epoch": 3.464406779661017, - "grad_norm": 1.621645450592041, - "learning_rate": 9.38281407035176e-05, - "loss": 5.5346, - "step": 6643 - }, - { - "epoch": 3.464928292046936, - "grad_norm": 1.639356017112732, - "learning_rate": 9.382713567839196e-05, - "loss": 5.3508, - "step": 6644 - }, - { - "epoch": 3.4654498044328554, - "grad_norm": 2.164665937423706, - "learning_rate": 9.382613065326633e-05, - "loss": 5.4644, - "step": 6645 - }, - { - "epoch": 3.4659713168187745, - "grad_norm": 1.4670137166976929, - "learning_rate": 9.38251256281407e-05, - "loss": 6.1142, - "step": 6646 - }, - { - "epoch": 3.4664928292046935, - "grad_norm": 1.4563056230545044, - "learning_rate": 9.382412060301507e-05, - "loss": 5.7108, - "step": 6647 - }, - { - "epoch": 3.467014341590613, - "grad_norm": 1.355540156364441, - "learning_rate": 9.382311557788945e-05, - "loss": 6.0788, - "step": 6648 - }, - { - "epoch": 3.467535853976532, - "grad_norm": 1.5267772674560547, - "learning_rate": 9.382211055276383e-05, - "loss": 5.3276, - "step": 6649 - }, - { - "epoch": 3.468057366362451, - "grad_norm": 1.5112308263778687, - "learning_rate": 9.38211055276382e-05, - "loss": 5.6088, - "step": 6650 - }, - { - "epoch": 3.4685788787483705, - "grad_norm": 1.426392674446106, - "learning_rate": 9.382010050251257e-05, - "loss": 6.0311, - "step": 6651 - }, - { - "epoch": 3.4691003911342895, - "grad_norm": 1.5667036771774292, - "learning_rate": 9.381909547738695e-05, - "loss": 5.7174, - "step": 6652 - }, - { - "epoch": 3.4696219035202085, - "grad_norm": 1.5902961492538452, - "learning_rate": 9.381809045226131e-05, - "loss": 5.4754, - "step": 6653 - }, - { - "epoch": 3.470143415906128, - "grad_norm": 1.343227505683899, - "learning_rate": 9.381708542713569e-05, - "loss": 6.0112, - "step": 6654 - }, - { - "epoch": 3.470664928292047, - "grad_norm": 1.4775993824005127, - "learning_rate": 9.381608040201005e-05, - "loss": 5.752, - "step": 6655 - }, - { - "epoch": 3.471186440677966, - "grad_norm": 1.455881953239441, - "learning_rate": 9.381507537688443e-05, - "loss": 5.8514, - "step": 6656 - }, - { - "epoch": 3.471707953063885, - "grad_norm": 1.5065813064575195, - "learning_rate": 9.38140703517588e-05, - "loss": 5.8573, - "step": 6657 - }, - { - "epoch": 3.4722294654498045, - "grad_norm": 1.4667167663574219, - "learning_rate": 9.381306532663316e-05, - "loss": 5.893, - "step": 6658 - }, - { - "epoch": 3.4727509778357235, - "grad_norm": 1.4245109558105469, - "learning_rate": 9.381206030150754e-05, - "loss": 5.7042, - "step": 6659 - }, - { - "epoch": 3.4732724902216425, - "grad_norm": 1.5085673332214355, - "learning_rate": 9.381105527638191e-05, - "loss": 5.576, - "step": 6660 - }, - { - "epoch": 3.473794002607562, - "grad_norm": 1.4945487976074219, - "learning_rate": 9.381005025125629e-05, - "loss": 5.9713, - "step": 6661 - }, - { - "epoch": 3.474315514993481, - "grad_norm": 1.5869314670562744, - "learning_rate": 9.380904522613066e-05, - "loss": 5.6719, - "step": 6662 - }, - { - "epoch": 3.4748370273794, - "grad_norm": 1.330153226852417, - "learning_rate": 9.380804020100503e-05, - "loss": 5.9293, - "step": 6663 - }, - { - "epoch": 3.4753585397653195, - "grad_norm": 1.4297603368759155, - "learning_rate": 9.38070351758794e-05, - "loss": 5.5187, - "step": 6664 - }, - { - "epoch": 3.4758800521512385, - "grad_norm": 1.3853750228881836, - "learning_rate": 9.380603015075378e-05, - "loss": 5.9606, - "step": 6665 - }, - { - "epoch": 3.4764015645371575, - "grad_norm": 1.3818566799163818, - "learning_rate": 9.380502512562814e-05, - "loss": 5.628, - "step": 6666 - }, - { - "epoch": 3.476923076923077, - "grad_norm": 1.4198203086853027, - "learning_rate": 9.380402010050252e-05, - "loss": 5.9571, - "step": 6667 - }, - { - "epoch": 3.477444589308996, - "grad_norm": 1.3652359247207642, - "learning_rate": 9.380301507537688e-05, - "loss": 5.789, - "step": 6668 - }, - { - "epoch": 3.477966101694915, - "grad_norm": 1.4546282291412354, - "learning_rate": 9.380201005025126e-05, - "loss": 6.0332, - "step": 6669 - }, - { - "epoch": 3.4784876140808345, - "grad_norm": 1.629219889640808, - "learning_rate": 9.380100502512564e-05, - "loss": 4.7585, - "step": 6670 - }, - { - "epoch": 3.4790091264667535, - "grad_norm": 1.3091163635253906, - "learning_rate": 9.38e-05, - "loss": 6.0605, - "step": 6671 - }, - { - "epoch": 3.4795306388526726, - "grad_norm": 1.4081790447235107, - "learning_rate": 9.379899497487438e-05, - "loss": 5.7456, - "step": 6672 - }, - { - "epoch": 3.480052151238592, - "grad_norm": 1.6523429155349731, - "learning_rate": 9.379798994974875e-05, - "loss": 5.0601, - "step": 6673 - }, - { - "epoch": 3.480573663624511, - "grad_norm": 1.5568499565124512, - "learning_rate": 9.379698492462312e-05, - "loss": 5.7539, - "step": 6674 - }, - { - "epoch": 3.48109517601043, - "grad_norm": 1.4979923963546753, - "learning_rate": 9.379597989949749e-05, - "loss": 5.6272, - "step": 6675 - }, - { - "epoch": 3.4816166883963495, - "grad_norm": 1.6634958982467651, - "learning_rate": 9.379497487437187e-05, - "loss": 5.1706, - "step": 6676 - }, - { - "epoch": 3.4821382007822685, - "grad_norm": 1.3872904777526855, - "learning_rate": 9.379396984924623e-05, - "loss": 5.744, - "step": 6677 - }, - { - "epoch": 3.4826597131681876, - "grad_norm": 1.6882271766662598, - "learning_rate": 9.379296482412061e-05, - "loss": 5.5906, - "step": 6678 - }, - { - "epoch": 3.483181225554107, - "grad_norm": 1.5967460870742798, - "learning_rate": 9.379195979899497e-05, - "loss": 5.219, - "step": 6679 - }, - { - "epoch": 3.483702737940026, - "grad_norm": 1.473443865776062, - "learning_rate": 9.379095477386935e-05, - "loss": 5.8302, - "step": 6680 - }, - { - "epoch": 3.484224250325945, - "grad_norm": 1.4015426635742188, - "learning_rate": 9.378994974874373e-05, - "loss": 5.5922, - "step": 6681 - }, - { - "epoch": 3.4847457627118645, - "grad_norm": 1.4231053590774536, - "learning_rate": 9.37889447236181e-05, - "loss": 6.0124, - "step": 6682 - }, - { - "epoch": 3.4852672750977836, - "grad_norm": 1.4743679761886597, - "learning_rate": 9.378793969849247e-05, - "loss": 5.414, - "step": 6683 - }, - { - "epoch": 3.4857887874837026, - "grad_norm": 1.5142967700958252, - "learning_rate": 9.378693467336685e-05, - "loss": 5.4161, - "step": 6684 - }, - { - "epoch": 3.486310299869622, - "grad_norm": 1.5752254724502563, - "learning_rate": 9.378592964824121e-05, - "loss": 5.767, - "step": 6685 - }, - { - "epoch": 3.486831812255541, - "grad_norm": 1.5675541162490845, - "learning_rate": 9.378492462311558e-05, - "loss": 5.7705, - "step": 6686 - }, - { - "epoch": 3.48735332464146, - "grad_norm": 2.1700732707977295, - "learning_rate": 9.378391959798995e-05, - "loss": 5.4788, - "step": 6687 - }, - { - "epoch": 3.4878748370273795, - "grad_norm": 1.623518705368042, - "learning_rate": 9.378291457286432e-05, - "loss": 6.2171, - "step": 6688 - }, - { - "epoch": 3.4883963494132986, - "grad_norm": 1.8324391841888428, - "learning_rate": 9.37819095477387e-05, - "loss": 5.7916, - "step": 6689 - }, - { - "epoch": 3.4889178617992176, - "grad_norm": 1.6197644472122192, - "learning_rate": 9.378090452261306e-05, - "loss": 5.7418, - "step": 6690 - }, - { - "epoch": 3.489439374185137, - "grad_norm": 1.7779282331466675, - "learning_rate": 9.377989949748744e-05, - "loss": 5.3195, - "step": 6691 - }, - { - "epoch": 3.489960886571056, - "grad_norm": 1.742152214050293, - "learning_rate": 9.377889447236182e-05, - "loss": 5.2932, - "step": 6692 - }, - { - "epoch": 3.490482398956975, - "grad_norm": 1.5503824949264526, - "learning_rate": 9.37778894472362e-05, - "loss": 5.6906, - "step": 6693 - }, - { - "epoch": 3.4910039113428946, - "grad_norm": 1.3012527227401733, - "learning_rate": 9.377688442211056e-05, - "loss": 6.0423, - "step": 6694 - }, - { - "epoch": 3.4915254237288136, - "grad_norm": 1.6402363777160645, - "learning_rate": 9.377587939698494e-05, - "loss": 5.1606, - "step": 6695 - }, - { - "epoch": 3.4920469361147326, - "grad_norm": 1.5132253170013428, - "learning_rate": 9.37748743718593e-05, - "loss": 5.9895, - "step": 6696 - }, - { - "epoch": 3.492568448500652, - "grad_norm": 1.6209053993225098, - "learning_rate": 9.377386934673368e-05, - "loss": 5.8754, - "step": 6697 - }, - { - "epoch": 3.493089960886571, - "grad_norm": 1.5189119577407837, - "learning_rate": 9.377286432160804e-05, - "loss": 5.8027, - "step": 6698 - }, - { - "epoch": 3.49361147327249, - "grad_norm": 1.549461007118225, - "learning_rate": 9.37718592964824e-05, - "loss": 5.6424, - "step": 6699 - }, - { - "epoch": 3.4941329856584096, - "grad_norm": 1.4738787412643433, - "learning_rate": 9.377085427135678e-05, - "loss": 5.7068, - "step": 6700 - }, - { - "epoch": 3.4946544980443286, - "grad_norm": 1.5362428426742554, - "learning_rate": 9.376984924623116e-05, - "loss": 5.8443, - "step": 6701 - }, - { - "epoch": 3.4951760104302476, - "grad_norm": 1.389529824256897, - "learning_rate": 9.376884422110554e-05, - "loss": 6.0809, - "step": 6702 - }, - { - "epoch": 3.495697522816167, - "grad_norm": 1.5592011213302612, - "learning_rate": 9.37678391959799e-05, - "loss": 5.4351, - "step": 6703 - }, - { - "epoch": 3.496219035202086, - "grad_norm": 1.6276932954788208, - "learning_rate": 9.376683417085428e-05, - "loss": 5.5809, - "step": 6704 - }, - { - "epoch": 3.496740547588005, - "grad_norm": 1.4372819662094116, - "learning_rate": 9.376582914572865e-05, - "loss": 6.0987, - "step": 6705 - }, - { - "epoch": 3.4972620599739246, - "grad_norm": 1.4826847314834595, - "learning_rate": 9.376482412060302e-05, - "loss": 5.9527, - "step": 6706 - }, - { - "epoch": 3.4977835723598436, - "grad_norm": 1.3536359071731567, - "learning_rate": 9.376381909547739e-05, - "loss": 5.9444, - "step": 6707 - }, - { - "epoch": 3.4983050847457626, - "grad_norm": 1.4969521760940552, - "learning_rate": 9.376281407035177e-05, - "loss": 6.0206, - "step": 6708 - }, - { - "epoch": 3.498826597131682, - "grad_norm": 1.5260578393936157, - "learning_rate": 9.376180904522613e-05, - "loss": 5.969, - "step": 6709 - }, - { - "epoch": 3.499348109517601, - "grad_norm": 1.7617967128753662, - "learning_rate": 9.376080402010051e-05, - "loss": 5.4568, - "step": 6710 - }, - { - "epoch": 3.49986962190352, - "grad_norm": 1.4503662586212158, - "learning_rate": 9.375979899497487e-05, - "loss": 5.8123, - "step": 6711 - }, - { - "epoch": 3.5003911342894396, - "grad_norm": 1.5299630165100098, - "learning_rate": 9.375879396984925e-05, - "loss": 5.6094, - "step": 6712 - }, - { - "epoch": 3.5009126466753586, - "grad_norm": 1.5533326864242554, - "learning_rate": 9.375778894472363e-05, - "loss": 5.8498, - "step": 6713 - }, - { - "epoch": 3.5014341590612776, - "grad_norm": 1.3899163007736206, - "learning_rate": 9.375678391959799e-05, - "loss": 6.0214, - "step": 6714 - }, - { - "epoch": 3.501955671447197, - "grad_norm": 1.3556277751922607, - "learning_rate": 9.375577889447237e-05, - "loss": 5.9884, - "step": 6715 - }, - { - "epoch": 3.502477183833116, - "grad_norm": 1.4719994068145752, - "learning_rate": 9.375477386934673e-05, - "loss": 5.4115, - "step": 6716 - }, - { - "epoch": 3.502998696219035, - "grad_norm": 1.6984734535217285, - "learning_rate": 9.375376884422111e-05, - "loss": 5.6738, - "step": 6717 - }, - { - "epoch": 3.5035202086049546, - "grad_norm": 1.3937760591506958, - "learning_rate": 9.375276381909548e-05, - "loss": 5.7454, - "step": 6718 - }, - { - "epoch": 3.5040417209908736, - "grad_norm": 1.308585286140442, - "learning_rate": 9.375175879396985e-05, - "loss": 5.9364, - "step": 6719 - }, - { - "epoch": 3.5045632333767927, - "grad_norm": 1.5324649810791016, - "learning_rate": 9.375075376884422e-05, - "loss": 5.8499, - "step": 6720 - }, - { - "epoch": 3.505084745762712, - "grad_norm": 1.3970049619674683, - "learning_rate": 9.37497487437186e-05, - "loss": 5.5446, - "step": 6721 - }, - { - "epoch": 3.505606258148631, - "grad_norm": 1.3376753330230713, - "learning_rate": 9.374874371859297e-05, - "loss": 6.193, - "step": 6722 - }, - { - "epoch": 3.50612777053455, - "grad_norm": 1.3524177074432373, - "learning_rate": 9.374773869346735e-05, - "loss": 6.0968, - "step": 6723 - }, - { - "epoch": 3.5066492829204696, - "grad_norm": 1.4086419343948364, - "learning_rate": 9.374673366834172e-05, - "loss": 6.0634, - "step": 6724 - }, - { - "epoch": 3.5071707953063886, - "grad_norm": 2.1302547454833984, - "learning_rate": 9.374572864321608e-05, - "loss": 5.4294, - "step": 6725 - }, - { - "epoch": 3.5076923076923077, - "grad_norm": 1.6257654428482056, - "learning_rate": 9.374472361809046e-05, - "loss": 5.8636, - "step": 6726 - }, - { - "epoch": 3.508213820078227, - "grad_norm": 1.6204512119293213, - "learning_rate": 9.374371859296482e-05, - "loss": 5.218, - "step": 6727 - }, - { - "epoch": 3.508735332464146, - "grad_norm": 1.6032049655914307, - "learning_rate": 9.37427135678392e-05, - "loss": 5.3196, - "step": 6728 - }, - { - "epoch": 3.509256844850065, - "grad_norm": 1.4963319301605225, - "learning_rate": 9.374170854271356e-05, - "loss": 5.8199, - "step": 6729 - }, - { - "epoch": 3.509778357235984, - "grad_norm": 1.4500224590301514, - "learning_rate": 9.374070351758794e-05, - "loss": 5.8819, - "step": 6730 - }, - { - "epoch": 3.5102998696219037, - "grad_norm": 1.35861074924469, - "learning_rate": 9.373969849246231e-05, - "loss": 6.1633, - "step": 6731 - }, - { - "epoch": 3.5108213820078227, - "grad_norm": 1.608127474784851, - "learning_rate": 9.373869346733668e-05, - "loss": 5.6522, - "step": 6732 - }, - { - "epoch": 3.5113428943937417, - "grad_norm": 1.5624979734420776, - "learning_rate": 9.373768844221106e-05, - "loss": 5.4789, - "step": 6733 - }, - { - "epoch": 3.511864406779661, - "grad_norm": 1.612146258354187, - "learning_rate": 9.373668341708544e-05, - "loss": 5.5371, - "step": 6734 - }, - { - "epoch": 3.51238591916558, - "grad_norm": 1.3103408813476562, - "learning_rate": 9.37356783919598e-05, - "loss": 6.1346, - "step": 6735 - }, - { - "epoch": 3.512907431551499, - "grad_norm": 2.056246280670166, - "learning_rate": 9.373467336683418e-05, - "loss": 5.5654, - "step": 6736 - }, - { - "epoch": 3.5134289439374187, - "grad_norm": 1.4977360963821411, - "learning_rate": 9.373366834170855e-05, - "loss": 5.7089, - "step": 6737 - }, - { - "epoch": 3.5139504563233377, - "grad_norm": 1.6363375186920166, - "learning_rate": 9.373266331658291e-05, - "loss": 5.7208, - "step": 6738 - }, - { - "epoch": 3.5144719687092567, - "grad_norm": 1.3955926895141602, - "learning_rate": 9.373165829145729e-05, - "loss": 6.0194, - "step": 6739 - }, - { - "epoch": 3.514993481095176, - "grad_norm": 1.450245976448059, - "learning_rate": 9.373065326633165e-05, - "loss": 5.9459, - "step": 6740 - }, - { - "epoch": 3.515514993481095, - "grad_norm": 1.554410457611084, - "learning_rate": 9.372964824120603e-05, - "loss": 5.4273, - "step": 6741 - }, - { - "epoch": 3.516036505867014, - "grad_norm": 1.5028961896896362, - "learning_rate": 9.372864321608041e-05, - "loss": 6.2075, - "step": 6742 - }, - { - "epoch": 3.5165580182529332, - "grad_norm": 1.414805293083191, - "learning_rate": 9.372763819095479e-05, - "loss": 5.9259, - "step": 6743 - }, - { - "epoch": 3.5170795306388527, - "grad_norm": 1.5399208068847656, - "learning_rate": 9.372663316582915e-05, - "loss": 5.663, - "step": 6744 - }, - { - "epoch": 3.5176010430247717, - "grad_norm": 1.3553940057754517, - "learning_rate": 9.372562814070353e-05, - "loss": 5.4974, - "step": 6745 - }, - { - "epoch": 3.5181225554106907, - "grad_norm": 1.5037813186645508, - "learning_rate": 9.372462311557789e-05, - "loss": 5.8822, - "step": 6746 - }, - { - "epoch": 3.51864406779661, - "grad_norm": 1.302720308303833, - "learning_rate": 9.372361809045227e-05, - "loss": 5.7823, - "step": 6747 - }, - { - "epoch": 3.5191655801825292, - "grad_norm": 1.6097068786621094, - "learning_rate": 9.372261306532664e-05, - "loss": 5.5633, - "step": 6748 - }, - { - "epoch": 3.5196870925684483, - "grad_norm": 1.4835314750671387, - "learning_rate": 9.372160804020101e-05, - "loss": 5.9501, - "step": 6749 - }, - { - "epoch": 3.5202086049543677, - "grad_norm": 1.4716057777404785, - "learning_rate": 9.372060301507538e-05, - "loss": 5.5612, - "step": 6750 - }, - { - "epoch": 3.5207301173402867, - "grad_norm": 1.5794897079467773, - "learning_rate": 9.371959798994974e-05, - "loss": 5.5504, - "step": 6751 - }, - { - "epoch": 3.5212516297262058, - "grad_norm": 1.5934181213378906, - "learning_rate": 9.371859296482412e-05, - "loss": 5.4545, - "step": 6752 - }, - { - "epoch": 3.521773142112125, - "grad_norm": 1.5200958251953125, - "learning_rate": 9.37175879396985e-05, - "loss": 5.7425, - "step": 6753 - }, - { - "epoch": 3.5222946544980442, - "grad_norm": 1.5170706510543823, - "learning_rate": 9.371658291457287e-05, - "loss": 5.7668, - "step": 6754 - }, - { - "epoch": 3.5228161668839633, - "grad_norm": 1.6036436557769775, - "learning_rate": 9.371557788944724e-05, - "loss": 4.8532, - "step": 6755 - }, - { - "epoch": 3.5233376792698827, - "grad_norm": 1.6023293733596802, - "learning_rate": 9.371457286432162e-05, - "loss": 5.407, - "step": 6756 - }, - { - "epoch": 3.5238591916558017, - "grad_norm": 1.491373896598816, - "learning_rate": 9.371356783919598e-05, - "loss": 5.9984, - "step": 6757 - }, - { - "epoch": 3.5243807040417208, - "grad_norm": 1.4562264680862427, - "learning_rate": 9.371256281407036e-05, - "loss": 5.5326, - "step": 6758 - }, - { - "epoch": 3.5249022164276402, - "grad_norm": 1.5026049613952637, - "learning_rate": 9.371155778894472e-05, - "loss": 5.6318, - "step": 6759 - }, - { - "epoch": 3.5254237288135593, - "grad_norm": 1.623980164527893, - "learning_rate": 9.37105527638191e-05, - "loss": 5.5478, - "step": 6760 - }, - { - "epoch": 3.5259452411994783, - "grad_norm": 1.494152307510376, - "learning_rate": 9.370954773869347e-05, - "loss": 5.8064, - "step": 6761 - }, - { - "epoch": 3.5264667535853977, - "grad_norm": 1.517565369606018, - "learning_rate": 9.370854271356784e-05, - "loss": 5.4861, - "step": 6762 - }, - { - "epoch": 3.5269882659713168, - "grad_norm": 1.523149847984314, - "learning_rate": 9.370753768844222e-05, - "loss": 5.8954, - "step": 6763 - }, - { - "epoch": 3.527509778357236, - "grad_norm": 1.4593862295150757, - "learning_rate": 9.370653266331659e-05, - "loss": 5.9466, - "step": 6764 - }, - { - "epoch": 3.5280312907431552, - "grad_norm": 1.3891209363937378, - "learning_rate": 9.370552763819096e-05, - "loss": 5.6893, - "step": 6765 - }, - { - "epoch": 3.5285528031290743, - "grad_norm": 1.4780783653259277, - "learning_rate": 9.370452261306533e-05, - "loss": 5.7885, - "step": 6766 - }, - { - "epoch": 3.5290743155149933, - "grad_norm": 1.550339698791504, - "learning_rate": 9.37035175879397e-05, - "loss": 5.8575, - "step": 6767 - }, - { - "epoch": 3.5295958279009128, - "grad_norm": 1.528484582901001, - "learning_rate": 9.370251256281407e-05, - "loss": 5.8926, - "step": 6768 - }, - { - "epoch": 3.5301173402868318, - "grad_norm": 1.460328221321106, - "learning_rate": 9.370150753768845e-05, - "loss": 5.6154, - "step": 6769 - }, - { - "epoch": 3.530638852672751, - "grad_norm": 1.518493890762329, - "learning_rate": 9.370050251256281e-05, - "loss": 5.7375, - "step": 6770 - }, - { - "epoch": 3.5311603650586703, - "grad_norm": 1.3918448686599731, - "learning_rate": 9.369949748743719e-05, - "loss": 6.1041, - "step": 6771 - }, - { - "epoch": 3.5316818774445893, - "grad_norm": 1.6287676095962524, - "learning_rate": 9.369849246231155e-05, - "loss": 5.8486, - "step": 6772 - }, - { - "epoch": 3.5322033898305083, - "grad_norm": 1.4579936265945435, - "learning_rate": 9.369748743718593e-05, - "loss": 6.1125, - "step": 6773 - }, - { - "epoch": 3.5327249022164278, - "grad_norm": 1.5070048570632935, - "learning_rate": 9.369648241206031e-05, - "loss": 5.9285, - "step": 6774 - }, - { - "epoch": 3.533246414602347, - "grad_norm": 1.5988515615463257, - "learning_rate": 9.369547738693469e-05, - "loss": 5.8352, - "step": 6775 - }, - { - "epoch": 3.533767926988266, - "grad_norm": 1.4927008152008057, - "learning_rate": 9.369447236180905e-05, - "loss": 5.5095, - "step": 6776 - }, - { - "epoch": 3.5342894393741853, - "grad_norm": 1.4838271141052246, - "learning_rate": 9.369346733668343e-05, - "loss": 5.7209, - "step": 6777 - }, - { - "epoch": 3.5348109517601043, - "grad_norm": 1.5043238401412964, - "learning_rate": 9.36924623115578e-05, - "loss": 4.9815, - "step": 6778 - }, - { - "epoch": 3.5353324641460233, - "grad_norm": 1.4178813695907593, - "learning_rate": 9.369145728643216e-05, - "loss": 5.7824, - "step": 6779 - }, - { - "epoch": 3.5358539765319428, - "grad_norm": 1.4742341041564941, - "learning_rate": 9.369045226130654e-05, - "loss": 5.7549, - "step": 6780 - }, - { - "epoch": 3.536375488917862, - "grad_norm": 1.5698914527893066, - "learning_rate": 9.36894472361809e-05, - "loss": 5.7693, - "step": 6781 - }, - { - "epoch": 3.536897001303781, - "grad_norm": 1.557430386543274, - "learning_rate": 9.368844221105528e-05, - "loss": 5.3739, - "step": 6782 - }, - { - "epoch": 3.5374185136897003, - "grad_norm": 1.4710289239883423, - "learning_rate": 9.368743718592966e-05, - "loss": 5.6676, - "step": 6783 - }, - { - "epoch": 3.5379400260756193, - "grad_norm": 1.4429258108139038, - "learning_rate": 9.368643216080403e-05, - "loss": 5.9515, - "step": 6784 - }, - { - "epoch": 3.5384615384615383, - "grad_norm": 1.5355881452560425, - "learning_rate": 9.36854271356784e-05, - "loss": 5.8846, - "step": 6785 - }, - { - "epoch": 3.538983050847458, - "grad_norm": 1.5289205312728882, - "learning_rate": 9.368442211055278e-05, - "loss": 6.0205, - "step": 6786 - }, - { - "epoch": 3.539504563233377, - "grad_norm": 1.4295034408569336, - "learning_rate": 9.368341708542714e-05, - "loss": 6.122, - "step": 6787 - }, - { - "epoch": 3.540026075619296, - "grad_norm": 1.6144932508468628, - "learning_rate": 9.368241206030152e-05, - "loss": 5.6577, - "step": 6788 - }, - { - "epoch": 3.5405475880052153, - "grad_norm": 1.5656191110610962, - "learning_rate": 9.368140703517588e-05, - "loss": 5.9539, - "step": 6789 - }, - { - "epoch": 3.5410691003911343, - "grad_norm": 1.4656474590301514, - "learning_rate": 9.368040201005026e-05, - "loss": 5.8874, - "step": 6790 - }, - { - "epoch": 3.5415906127770533, - "grad_norm": 1.5593163967132568, - "learning_rate": 9.367939698492462e-05, - "loss": 5.7532, - "step": 6791 - }, - { - "epoch": 3.542112125162973, - "grad_norm": 1.5778427124023438, - "learning_rate": 9.367839195979899e-05, - "loss": 6.2578, - "step": 6792 - }, - { - "epoch": 3.542633637548892, - "grad_norm": 1.5305495262145996, - "learning_rate": 9.367738693467337e-05, - "loss": 5.6411, - "step": 6793 - }, - { - "epoch": 3.543155149934811, - "grad_norm": 1.437207579612732, - "learning_rate": 9.367638190954774e-05, - "loss": 5.6023, - "step": 6794 - }, - { - "epoch": 3.5436766623207303, - "grad_norm": 1.6374677419662476, - "learning_rate": 9.367537688442212e-05, - "loss": 5.7315, - "step": 6795 - }, - { - "epoch": 3.5441981747066493, - "grad_norm": 1.4308102130889893, - "learning_rate": 9.367437185929649e-05, - "loss": 5.8646, - "step": 6796 - }, - { - "epoch": 3.5447196870925683, - "grad_norm": 1.4962522983551025, - "learning_rate": 9.367336683417086e-05, - "loss": 6.0144, - "step": 6797 - }, - { - "epoch": 3.545241199478488, - "grad_norm": 1.3831360340118408, - "learning_rate": 9.367236180904523e-05, - "loss": 5.9922, - "step": 6798 - }, - { - "epoch": 3.545762711864407, - "grad_norm": 1.5433286428451538, - "learning_rate": 9.36713567839196e-05, - "loss": 5.6331, - "step": 6799 - }, - { - "epoch": 3.546284224250326, - "grad_norm": 1.393429160118103, - "learning_rate": 9.367035175879397e-05, - "loss": 5.812, - "step": 6800 - }, - { - "epoch": 3.5468057366362453, - "grad_norm": 1.422887921333313, - "learning_rate": 9.366934673366835e-05, - "loss": 5.8153, - "step": 6801 - }, - { - "epoch": 3.5473272490221643, - "grad_norm": 1.4431641101837158, - "learning_rate": 9.366834170854271e-05, - "loss": 5.4828, - "step": 6802 - }, - { - "epoch": 3.5478487614080834, - "grad_norm": 1.4801768064498901, - "learning_rate": 9.366733668341709e-05, - "loss": 5.6167, - "step": 6803 - }, - { - "epoch": 3.548370273794003, - "grad_norm": 1.4948431253433228, - "learning_rate": 9.366633165829147e-05, - "loss": 5.7848, - "step": 6804 - }, - { - "epoch": 3.548891786179922, - "grad_norm": 1.540388822555542, - "learning_rate": 9.366532663316583e-05, - "loss": 5.5861, - "step": 6805 - }, - { - "epoch": 3.549413298565841, - "grad_norm": 1.3801501989364624, - "learning_rate": 9.366432160804021e-05, - "loss": 6.0132, - "step": 6806 - }, - { - "epoch": 3.5499348109517603, - "grad_norm": 1.4095205068588257, - "learning_rate": 9.366331658291457e-05, - "loss": 6.1008, - "step": 6807 - }, - { - "epoch": 3.5504563233376794, - "grad_norm": 1.5053099393844604, - "learning_rate": 9.366231155778895e-05, - "loss": 6.002, - "step": 6808 - }, - { - "epoch": 3.5509778357235984, - "grad_norm": 1.7069369554519653, - "learning_rate": 9.366130653266332e-05, - "loss": 5.2743, - "step": 6809 - }, - { - "epoch": 3.551499348109518, - "grad_norm": 1.5175182819366455, - "learning_rate": 9.36603015075377e-05, - "loss": 5.757, - "step": 6810 - }, - { - "epoch": 3.552020860495437, - "grad_norm": 1.5140146017074585, - "learning_rate": 9.365929648241206e-05, - "loss": 6.0598, - "step": 6811 - }, - { - "epoch": 3.552542372881356, - "grad_norm": 1.3476978540420532, - "learning_rate": 9.365829145728644e-05, - "loss": 5.9597, - "step": 6812 - }, - { - "epoch": 3.5530638852672753, - "grad_norm": 1.5037477016448975, - "learning_rate": 9.36572864321608e-05, - "loss": 5.5444, - "step": 6813 - }, - { - "epoch": 3.5535853976531944, - "grad_norm": 1.496700644493103, - "learning_rate": 9.365628140703518e-05, - "loss": 5.774, - "step": 6814 - }, - { - "epoch": 3.5541069100391134, - "grad_norm": 1.504744291305542, - "learning_rate": 9.365527638190956e-05, - "loss": 5.3697, - "step": 6815 - }, - { - "epoch": 3.554628422425033, - "grad_norm": 1.3509753942489624, - "learning_rate": 9.365427135678393e-05, - "loss": 5.8862, - "step": 6816 - }, - { - "epoch": 3.555149934810952, - "grad_norm": 1.4837826490402222, - "learning_rate": 9.36532663316583e-05, - "loss": 5.7488, - "step": 6817 - }, - { - "epoch": 3.555671447196871, - "grad_norm": 1.4449288845062256, - "learning_rate": 9.365226130653266e-05, - "loss": 5.8544, - "step": 6818 - }, - { - "epoch": 3.5561929595827904, - "grad_norm": 1.4951460361480713, - "learning_rate": 9.365125628140704e-05, - "loss": 6.1774, - "step": 6819 - }, - { - "epoch": 3.5567144719687094, - "grad_norm": 1.4116355180740356, - "learning_rate": 9.36502512562814e-05, - "loss": 6.0866, - "step": 6820 - }, - { - "epoch": 3.5572359843546284, - "grad_norm": 1.5097860097885132, - "learning_rate": 9.364924623115578e-05, - "loss": 5.7212, - "step": 6821 - }, - { - "epoch": 3.557757496740548, - "grad_norm": 1.4505469799041748, - "learning_rate": 9.364824120603015e-05, - "loss": 5.7694, - "step": 6822 - }, - { - "epoch": 3.558279009126467, - "grad_norm": 1.3850070238113403, - "learning_rate": 9.364723618090452e-05, - "loss": 6.0401, - "step": 6823 - }, - { - "epoch": 3.558800521512386, - "grad_norm": 1.5888214111328125, - "learning_rate": 9.36462311557789e-05, - "loss": 5.4822, - "step": 6824 - }, - { - "epoch": 3.559322033898305, - "grad_norm": 1.5971285104751587, - "learning_rate": 9.364522613065328e-05, - "loss": 5.9007, - "step": 6825 - }, - { - "epoch": 3.5598435462842244, - "grad_norm": 1.5771061182022095, - "learning_rate": 9.364422110552764e-05, - "loss": 5.8294, - "step": 6826 - }, - { - "epoch": 3.5603650586701434, - "grad_norm": 1.426080584526062, - "learning_rate": 9.364321608040202e-05, - "loss": 5.6197, - "step": 6827 - }, - { - "epoch": 3.5608865710560624, - "grad_norm": 1.4910591840744019, - "learning_rate": 9.364221105527639e-05, - "loss": 5.7713, - "step": 6828 - }, - { - "epoch": 3.561408083441982, - "grad_norm": 1.4725879430770874, - "learning_rate": 9.364120603015076e-05, - "loss": 5.4805, - "step": 6829 - }, - { - "epoch": 3.561929595827901, - "grad_norm": 1.406074047088623, - "learning_rate": 9.364020100502513e-05, - "loss": 5.8328, - "step": 6830 - }, - { - "epoch": 3.56245110821382, - "grad_norm": 1.671393632888794, - "learning_rate": 9.36391959798995e-05, - "loss": 5.7102, - "step": 6831 - }, - { - "epoch": 3.5629726205997394, - "grad_norm": 1.501727819442749, - "learning_rate": 9.363819095477387e-05, - "loss": 5.6989, - "step": 6832 - }, - { - "epoch": 3.5634941329856584, - "grad_norm": 1.5371509790420532, - "learning_rate": 9.363718592964824e-05, - "loss": 5.9144, - "step": 6833 - }, - { - "epoch": 3.5640156453715774, - "grad_norm": 1.4923715591430664, - "learning_rate": 9.363618090452261e-05, - "loss": 5.5012, - "step": 6834 - }, - { - "epoch": 3.5645371577574965, - "grad_norm": 1.4875831604003906, - "learning_rate": 9.363517587939699e-05, - "loss": 5.8962, - "step": 6835 - }, - { - "epoch": 3.565058670143416, - "grad_norm": 1.5725582838058472, - "learning_rate": 9.363417085427137e-05, - "loss": 5.7393, - "step": 6836 - }, - { - "epoch": 3.565580182529335, - "grad_norm": 1.466874599456787, - "learning_rate": 9.363316582914573e-05, - "loss": 5.6146, - "step": 6837 - }, - { - "epoch": 3.566101694915254, - "grad_norm": 1.498960018157959, - "learning_rate": 9.363216080402011e-05, - "loss": 5.607, - "step": 6838 - }, - { - "epoch": 3.5666232073011734, - "grad_norm": 1.6629704236984253, - "learning_rate": 9.363115577889448e-05, - "loss": 5.7833, - "step": 6839 - }, - { - "epoch": 3.5671447196870925, - "grad_norm": 1.5827151536941528, - "learning_rate": 9.363015075376885e-05, - "loss": 5.9138, - "step": 6840 - }, - { - "epoch": 3.5676662320730115, - "grad_norm": 1.405080795288086, - "learning_rate": 9.362914572864322e-05, - "loss": 6.0245, - "step": 6841 - }, - { - "epoch": 3.568187744458931, - "grad_norm": 1.703434944152832, - "learning_rate": 9.36281407035176e-05, - "loss": 5.7502, - "step": 6842 - }, - { - "epoch": 3.56870925684485, - "grad_norm": 1.3881704807281494, - "learning_rate": 9.362713567839196e-05, - "loss": 6.0196, - "step": 6843 - }, - { - "epoch": 3.569230769230769, - "grad_norm": 1.3772093057632446, - "learning_rate": 9.362613065326634e-05, - "loss": 5.6336, - "step": 6844 - }, - { - "epoch": 3.5697522816166884, - "grad_norm": 1.7843724489212036, - "learning_rate": 9.362512562814072e-05, - "loss": 5.5352, - "step": 6845 - }, - { - "epoch": 3.5702737940026075, - "grad_norm": 1.608591914176941, - "learning_rate": 9.362412060301508e-05, - "loss": 5.9506, - "step": 6846 - }, - { - "epoch": 3.5707953063885265, - "grad_norm": 1.453364610671997, - "learning_rate": 9.362311557788946e-05, - "loss": 5.8615, - "step": 6847 - }, - { - "epoch": 3.571316818774446, - "grad_norm": 1.5107251405715942, - "learning_rate": 9.362211055276382e-05, - "loss": 5.7107, - "step": 6848 - }, - { - "epoch": 3.571838331160365, - "grad_norm": 1.525380253791809, - "learning_rate": 9.36211055276382e-05, - "loss": 5.8206, - "step": 6849 - }, - { - "epoch": 3.572359843546284, - "grad_norm": 1.5033036470413208, - "learning_rate": 9.362010050251256e-05, - "loss": 5.3721, - "step": 6850 - }, - { - "epoch": 3.5728813559322035, - "grad_norm": 1.7732305526733398, - "learning_rate": 9.361909547738694e-05, - "loss": 5.4542, - "step": 6851 - }, - { - "epoch": 3.5734028683181225, - "grad_norm": 1.5028077363967896, - "learning_rate": 9.36180904522613e-05, - "loss": 4.9389, - "step": 6852 - }, - { - "epoch": 3.5739243807040415, - "grad_norm": 1.6341519355773926, - "learning_rate": 9.361708542713568e-05, - "loss": 5.7513, - "step": 6853 - }, - { - "epoch": 3.574445893089961, - "grad_norm": 1.5568238496780396, - "learning_rate": 9.361608040201005e-05, - "loss": 5.8738, - "step": 6854 - }, - { - "epoch": 3.57496740547588, - "grad_norm": 1.59091317653656, - "learning_rate": 9.361507537688443e-05, - "loss": 5.6566, - "step": 6855 - }, - { - "epoch": 3.575488917861799, - "grad_norm": 1.5449665784835815, - "learning_rate": 9.36140703517588e-05, - "loss": 5.7875, - "step": 6856 - }, - { - "epoch": 3.5760104302477185, - "grad_norm": 1.396132469177246, - "learning_rate": 9.361306532663318e-05, - "loss": 5.7099, - "step": 6857 - }, - { - "epoch": 3.5765319426336375, - "grad_norm": 1.68631112575531, - "learning_rate": 9.361206030150755e-05, - "loss": 5.9687, - "step": 6858 - }, - { - "epoch": 3.5770534550195565, - "grad_norm": 1.5919063091278076, - "learning_rate": 9.361105527638191e-05, - "loss": 5.6823, - "step": 6859 - }, - { - "epoch": 3.577574967405476, - "grad_norm": 1.4949712753295898, - "learning_rate": 9.361005025125629e-05, - "loss": 5.241, - "step": 6860 - }, - { - "epoch": 3.578096479791395, - "grad_norm": 1.6010483503341675, - "learning_rate": 9.360904522613065e-05, - "loss": 5.8532, - "step": 6861 - }, - { - "epoch": 3.578617992177314, - "grad_norm": 1.5974032878875732, - "learning_rate": 9.360804020100503e-05, - "loss": 6.2063, - "step": 6862 - }, - { - "epoch": 3.5791395045632335, - "grad_norm": 1.5263973474502563, - "learning_rate": 9.36070351758794e-05, - "loss": 5.7751, - "step": 6863 - }, - { - "epoch": 3.5796610169491525, - "grad_norm": 1.4749189615249634, - "learning_rate": 9.360603015075377e-05, - "loss": 5.9876, - "step": 6864 - }, - { - "epoch": 3.5801825293350715, - "grad_norm": 1.5189263820648193, - "learning_rate": 9.360502512562814e-05, - "loss": 6.0059, - "step": 6865 - }, - { - "epoch": 3.580704041720991, - "grad_norm": 1.3961609601974487, - "learning_rate": 9.360402010050251e-05, - "loss": 6.2034, - "step": 6866 - }, - { - "epoch": 3.58122555410691, - "grad_norm": 1.4077609777450562, - "learning_rate": 9.360301507537689e-05, - "loss": 5.9095, - "step": 6867 - }, - { - "epoch": 3.581747066492829, - "grad_norm": 1.5758384466171265, - "learning_rate": 9.360201005025127e-05, - "loss": 5.62, - "step": 6868 - }, - { - "epoch": 3.5822685788787485, - "grad_norm": 1.7166107892990112, - "learning_rate": 9.360100502512563e-05, - "loss": 5.2069, - "step": 6869 - }, - { - "epoch": 3.5827900912646675, - "grad_norm": 1.4898594617843628, - "learning_rate": 9.360000000000001e-05, - "loss": 5.6262, - "step": 6870 - }, - { - "epoch": 3.5833116036505865, - "grad_norm": 1.413983941078186, - "learning_rate": 9.359899497487438e-05, - "loss": 5.7946, - "step": 6871 - }, - { - "epoch": 3.583833116036506, - "grad_norm": 1.4677363634109497, - "learning_rate": 9.359798994974874e-05, - "loss": 5.7692, - "step": 6872 - }, - { - "epoch": 3.584354628422425, - "grad_norm": 1.4581385850906372, - "learning_rate": 9.359698492462312e-05, - "loss": 5.9509, - "step": 6873 - }, - { - "epoch": 3.584876140808344, - "grad_norm": 1.4012049436569214, - "learning_rate": 9.359597989949748e-05, - "loss": 5.1732, - "step": 6874 - }, - { - "epoch": 3.5853976531942635, - "grad_norm": 1.3268972635269165, - "learning_rate": 9.359497487437186e-05, - "loss": 5.7503, - "step": 6875 - }, - { - "epoch": 3.5859191655801825, - "grad_norm": 1.4763849973678589, - "learning_rate": 9.359396984924624e-05, - "loss": 6.001, - "step": 6876 - }, - { - "epoch": 3.5864406779661016, - "grad_norm": 1.4205474853515625, - "learning_rate": 9.359296482412062e-05, - "loss": 6.052, - "step": 6877 - }, - { - "epoch": 3.586962190352021, - "grad_norm": 1.562045693397522, - "learning_rate": 9.359195979899498e-05, - "loss": 5.8459, - "step": 6878 - }, - { - "epoch": 3.58748370273794, - "grad_norm": 1.4696348905563354, - "learning_rate": 9.359095477386936e-05, - "loss": 5.9648, - "step": 6879 - }, - { - "epoch": 3.588005215123859, - "grad_norm": 1.4018970727920532, - "learning_rate": 9.358994974874372e-05, - "loss": 5.7933, - "step": 6880 - }, - { - "epoch": 3.5885267275097785, - "grad_norm": 1.4879355430603027, - "learning_rate": 9.35889447236181e-05, - "loss": 5.6376, - "step": 6881 - }, - { - "epoch": 3.5890482398956975, - "grad_norm": 1.4292739629745483, - "learning_rate": 9.358793969849246e-05, - "loss": 6.1167, - "step": 6882 - }, - { - "epoch": 3.5895697522816166, - "grad_norm": 1.533405065536499, - "learning_rate": 9.358693467336684e-05, - "loss": 6.136, - "step": 6883 - }, - { - "epoch": 3.590091264667536, - "grad_norm": 1.3921878337860107, - "learning_rate": 9.35859296482412e-05, - "loss": 5.8444, - "step": 6884 - }, - { - "epoch": 3.590612777053455, - "grad_norm": 1.4075108766555786, - "learning_rate": 9.358492462311557e-05, - "loss": 5.9878, - "step": 6885 - }, - { - "epoch": 3.591134289439374, - "grad_norm": 1.324178695678711, - "learning_rate": 9.358391959798995e-05, - "loss": 6.1119, - "step": 6886 - }, - { - "epoch": 3.5916558018252935, - "grad_norm": 1.3948732614517212, - "learning_rate": 9.358291457286433e-05, - "loss": 5.9962, - "step": 6887 - }, - { - "epoch": 3.5921773142112126, - "grad_norm": 1.4392926692962646, - "learning_rate": 9.35819095477387e-05, - "loss": 5.9424, - "step": 6888 - }, - { - "epoch": 3.5926988265971316, - "grad_norm": 1.5435951948165894, - "learning_rate": 9.358090452261307e-05, - "loss": 5.6765, - "step": 6889 - }, - { - "epoch": 3.593220338983051, - "grad_norm": 1.6878023147583008, - "learning_rate": 9.357989949748745e-05, - "loss": 5.2532, - "step": 6890 - }, - { - "epoch": 3.59374185136897, - "grad_norm": 1.4812532663345337, - "learning_rate": 9.357889447236181e-05, - "loss": 5.6591, - "step": 6891 - }, - { - "epoch": 3.594263363754889, - "grad_norm": 1.420653223991394, - "learning_rate": 9.357788944723619e-05, - "loss": 5.7632, - "step": 6892 - }, - { - "epoch": 3.5947848761408085, - "grad_norm": 1.4115720987319946, - "learning_rate": 9.357688442211055e-05, - "loss": 5.798, - "step": 6893 - }, - { - "epoch": 3.5953063885267276, - "grad_norm": 1.5068799257278442, - "learning_rate": 9.357587939698493e-05, - "loss": 5.7412, - "step": 6894 - }, - { - "epoch": 3.5958279009126466, - "grad_norm": 1.433834195137024, - "learning_rate": 9.35748743718593e-05, - "loss": 6.0532, - "step": 6895 - }, - { - "epoch": 3.596349413298566, - "grad_norm": 1.4348320960998535, - "learning_rate": 9.357386934673367e-05, - "loss": 5.8764, - "step": 6896 - }, - { - "epoch": 3.596870925684485, - "grad_norm": 1.502423882484436, - "learning_rate": 9.357286432160805e-05, - "loss": 5.6252, - "step": 6897 - }, - { - "epoch": 3.597392438070404, - "grad_norm": 1.4934662580490112, - "learning_rate": 9.357185929648241e-05, - "loss": 5.7368, - "step": 6898 - }, - { - "epoch": 3.5979139504563236, - "grad_norm": 1.5547089576721191, - "learning_rate": 9.357085427135679e-05, - "loss": 5.0837, - "step": 6899 - }, - { - "epoch": 3.5984354628422426, - "grad_norm": 1.4599767923355103, - "learning_rate": 9.356984924623116e-05, - "loss": 5.6357, - "step": 6900 - }, - { - "epoch": 3.5989569752281616, - "grad_norm": 1.3981791734695435, - "learning_rate": 9.356884422110553e-05, - "loss": 5.6084, - "step": 6901 - }, - { - "epoch": 3.599478487614081, - "grad_norm": 1.4656355381011963, - "learning_rate": 9.35678391959799e-05, - "loss": 6.0239, - "step": 6902 - }, - { - "epoch": 3.6, - "grad_norm": 1.649858832359314, - "learning_rate": 9.356683417085428e-05, - "loss": 5.6213, - "step": 6903 - }, - { - "epoch": 3.600521512385919, - "grad_norm": 1.4889661073684692, - "learning_rate": 9.356582914572864e-05, - "loss": 5.7472, - "step": 6904 - }, - { - "epoch": 3.6010430247718386, - "grad_norm": 1.5171128511428833, - "learning_rate": 9.356482412060302e-05, - "loss": 6.0173, - "step": 6905 - }, - { - "epoch": 3.6015645371577576, - "grad_norm": 1.4820151329040527, - "learning_rate": 9.356381909547738e-05, - "loss": 5.6385, - "step": 6906 - }, - { - "epoch": 3.6020860495436766, - "grad_norm": 1.39638090133667, - "learning_rate": 9.356281407035176e-05, - "loss": 5.7745, - "step": 6907 - }, - { - "epoch": 3.602607561929596, - "grad_norm": 1.3798282146453857, - "learning_rate": 9.356180904522614e-05, - "loss": 5.9719, - "step": 6908 - }, - { - "epoch": 3.603129074315515, - "grad_norm": 1.4505019187927246, - "learning_rate": 9.356080402010052e-05, - "loss": 6.0753, - "step": 6909 - }, - { - "epoch": 3.603650586701434, - "grad_norm": 1.4945056438446045, - "learning_rate": 9.355979899497488e-05, - "loss": 5.6782, - "step": 6910 - }, - { - "epoch": 3.6041720990873536, - "grad_norm": 1.2827863693237305, - "learning_rate": 9.355879396984925e-05, - "loss": 6.128, - "step": 6911 - }, - { - "epoch": 3.6046936114732726, - "grad_norm": 1.4397509098052979, - "learning_rate": 9.355778894472362e-05, - "loss": 5.9843, - "step": 6912 - }, - { - "epoch": 3.6052151238591916, - "grad_norm": 1.3544766902923584, - "learning_rate": 9.355678391959799e-05, - "loss": 5.5331, - "step": 6913 - }, - { - "epoch": 3.605736636245111, - "grad_norm": 1.6406874656677246, - "learning_rate": 9.355577889447237e-05, - "loss": 5.7444, - "step": 6914 - }, - { - "epoch": 3.60625814863103, - "grad_norm": 1.4029700756072998, - "learning_rate": 9.355477386934673e-05, - "loss": 5.6262, - "step": 6915 - }, - { - "epoch": 3.606779661016949, - "grad_norm": 1.5530447959899902, - "learning_rate": 9.355376884422111e-05, - "loss": 6.0757, - "step": 6916 - }, - { - "epoch": 3.607301173402868, - "grad_norm": 1.4106826782226562, - "learning_rate": 9.355276381909549e-05, - "loss": 5.9752, - "step": 6917 - }, - { - "epoch": 3.6078226857887876, - "grad_norm": 1.4556238651275635, - "learning_rate": 9.355175879396986e-05, - "loss": 5.86, - "step": 6918 - }, - { - "epoch": 3.6083441981747066, - "grad_norm": 1.3775568008422852, - "learning_rate": 9.355075376884423e-05, - "loss": 5.8982, - "step": 6919 - }, - { - "epoch": 3.6088657105606257, - "grad_norm": 1.5369001626968384, - "learning_rate": 9.35497487437186e-05, - "loss": 5.4901, - "step": 6920 - }, - { - "epoch": 3.609387222946545, - "grad_norm": 1.3803353309631348, - "learning_rate": 9.354874371859297e-05, - "loss": 5.9655, - "step": 6921 - }, - { - "epoch": 3.609908735332464, - "grad_norm": 1.7061947584152222, - "learning_rate": 9.354773869346735e-05, - "loss": 5.7856, - "step": 6922 - }, - { - "epoch": 3.610430247718383, - "grad_norm": 1.4817575216293335, - "learning_rate": 9.354673366834171e-05, - "loss": 5.8383, - "step": 6923 - }, - { - "epoch": 3.6109517601043026, - "grad_norm": 1.4101464748382568, - "learning_rate": 9.354572864321608e-05, - "loss": 6.004, - "step": 6924 - }, - { - "epoch": 3.6114732724902217, - "grad_norm": 1.5287854671478271, - "learning_rate": 9.354472361809045e-05, - "loss": 6.1112, - "step": 6925 - }, - { - "epoch": 3.6119947848761407, - "grad_norm": 1.5323861837387085, - "learning_rate": 9.354371859296482e-05, - "loss": 5.6673, - "step": 6926 - }, - { - "epoch": 3.61251629726206, - "grad_norm": 1.5977439880371094, - "learning_rate": 9.35427135678392e-05, - "loss": 5.4685, - "step": 6927 - }, - { - "epoch": 3.613037809647979, - "grad_norm": 1.4773221015930176, - "learning_rate": 9.354170854271357e-05, - "loss": 5.8095, - "step": 6928 - }, - { - "epoch": 3.613559322033898, - "grad_norm": 1.5083141326904297, - "learning_rate": 9.354070351758795e-05, - "loss": 5.8477, - "step": 6929 - }, - { - "epoch": 3.614080834419817, - "grad_norm": 1.474170446395874, - "learning_rate": 9.353969849246232e-05, - "loss": 5.9531, - "step": 6930 - }, - { - "epoch": 3.6146023468057367, - "grad_norm": 1.3986929655075073, - "learning_rate": 9.35386934673367e-05, - "loss": 5.7439, - "step": 6931 - }, - { - "epoch": 3.6151238591916557, - "grad_norm": 1.4565824270248413, - "learning_rate": 9.353768844221106e-05, - "loss": 5.8178, - "step": 6932 - }, - { - "epoch": 3.6156453715775747, - "grad_norm": 1.601528286933899, - "learning_rate": 9.353668341708544e-05, - "loss": 5.5391, - "step": 6933 - }, - { - "epoch": 3.616166883963494, - "grad_norm": 1.6320865154266357, - "learning_rate": 9.35356783919598e-05, - "loss": 5.8251, - "step": 6934 - }, - { - "epoch": 3.616688396349413, - "grad_norm": 1.5504592657089233, - "learning_rate": 9.353467336683418e-05, - "loss": 5.7701, - "step": 6935 - }, - { - "epoch": 3.617209908735332, - "grad_norm": 1.602881908416748, - "learning_rate": 9.353366834170854e-05, - "loss": 5.8008, - "step": 6936 - }, - { - "epoch": 3.6177314211212517, - "grad_norm": 1.4159990549087524, - "learning_rate": 9.353266331658292e-05, - "loss": 6.053, - "step": 6937 - }, - { - "epoch": 3.6182529335071707, - "grad_norm": 1.5103563070297241, - "learning_rate": 9.35316582914573e-05, - "loss": 6.0597, - "step": 6938 - }, - { - "epoch": 3.6187744458930897, - "grad_norm": 1.4954168796539307, - "learning_rate": 9.353065326633166e-05, - "loss": 5.4636, - "step": 6939 - }, - { - "epoch": 3.619295958279009, - "grad_norm": 1.426872968673706, - "learning_rate": 9.352964824120604e-05, - "loss": 6.1467, - "step": 6940 - }, - { - "epoch": 3.619817470664928, - "grad_norm": 1.646280288696289, - "learning_rate": 9.35286432160804e-05, - "loss": 5.3514, - "step": 6941 - }, - { - "epoch": 3.6203389830508472, - "grad_norm": 1.4335969686508179, - "learning_rate": 9.352763819095478e-05, - "loss": 6.1869, - "step": 6942 - }, - { - "epoch": 3.6208604954367667, - "grad_norm": 1.4257240295410156, - "learning_rate": 9.352663316582915e-05, - "loss": 6.0428, - "step": 6943 - }, - { - "epoch": 3.6213820078226857, - "grad_norm": 1.5464763641357422, - "learning_rate": 9.352562814070352e-05, - "loss": 5.5642, - "step": 6944 - }, - { - "epoch": 3.6219035202086047, - "grad_norm": 1.3919329643249512, - "learning_rate": 9.352462311557789e-05, - "loss": 6.0843, - "step": 6945 - }, - { - "epoch": 3.622425032594524, - "grad_norm": 1.4617398977279663, - "learning_rate": 9.352361809045227e-05, - "loss": 5.7949, - "step": 6946 - }, - { - "epoch": 3.622946544980443, - "grad_norm": 1.5851168632507324, - "learning_rate": 9.352261306532663e-05, - "loss": 5.9043, - "step": 6947 - }, - { - "epoch": 3.6234680573663622, - "grad_norm": 1.373176097869873, - "learning_rate": 9.352160804020101e-05, - "loss": 5.7437, - "step": 6948 - }, - { - "epoch": 3.6239895697522817, - "grad_norm": 1.6997206211090088, - "learning_rate": 9.352060301507539e-05, - "loss": 5.6051, - "step": 6949 - }, - { - "epoch": 3.6245110821382007, - "grad_norm": 1.4387316703796387, - "learning_rate": 9.351959798994976e-05, - "loss": 6.0871, - "step": 6950 - }, - { - "epoch": 3.6250325945241197, - "grad_norm": 1.4237005710601807, - "learning_rate": 9.351859296482413e-05, - "loss": 6.025, - "step": 6951 - }, - { - "epoch": 3.625554106910039, - "grad_norm": 1.6108957529067993, - "learning_rate": 9.351758793969849e-05, - "loss": 5.8635, - "step": 6952 - }, - { - "epoch": 3.6260756192959582, - "grad_norm": 1.6192713975906372, - "learning_rate": 9.351658291457287e-05, - "loss": 5.932, - "step": 6953 - }, - { - "epoch": 3.6265971316818773, - "grad_norm": 1.6241966485977173, - "learning_rate": 9.351557788944723e-05, - "loss": 5.3448, - "step": 6954 - }, - { - "epoch": 3.6271186440677967, - "grad_norm": 1.5147408246994019, - "learning_rate": 9.351457286432161e-05, - "loss": 5.8407, - "step": 6955 - }, - { - "epoch": 3.6276401564537157, - "grad_norm": 1.494148850440979, - "learning_rate": 9.351356783919598e-05, - "loss": 5.9548, - "step": 6956 - }, - { - "epoch": 3.6281616688396348, - "grad_norm": 1.5650743246078491, - "learning_rate": 9.351256281407035e-05, - "loss": 5.8892, - "step": 6957 - }, - { - "epoch": 3.628683181225554, - "grad_norm": 1.5746864080429077, - "learning_rate": 9.351155778894473e-05, - "loss": 5.5157, - "step": 6958 - }, - { - "epoch": 3.6292046936114732, - "grad_norm": 1.6509934663772583, - "learning_rate": 9.351055276381911e-05, - "loss": 5.3898, - "step": 6959 - }, - { - "epoch": 3.6297262059973923, - "grad_norm": 1.4974578619003296, - "learning_rate": 9.350954773869347e-05, - "loss": 5.8562, - "step": 6960 - }, - { - "epoch": 3.6302477183833117, - "grad_norm": 1.46342933177948, - "learning_rate": 9.350854271356785e-05, - "loss": 5.8228, - "step": 6961 - }, - { - "epoch": 3.6307692307692307, - "grad_norm": 1.5128607749938965, - "learning_rate": 9.350753768844222e-05, - "loss": 5.7642, - "step": 6962 - }, - { - "epoch": 3.6312907431551498, - "grad_norm": 1.3214689493179321, - "learning_rate": 9.35065326633166e-05, - "loss": 5.9902, - "step": 6963 - }, - { - "epoch": 3.6318122555410692, - "grad_norm": 1.5082287788391113, - "learning_rate": 9.350552763819096e-05, - "loss": 5.4666, - "step": 6964 - }, - { - "epoch": 3.6323337679269883, - "grad_norm": 1.5935252904891968, - "learning_rate": 9.350452261306532e-05, - "loss": 5.7479, - "step": 6965 - }, - { - "epoch": 3.6328552803129073, - "grad_norm": 1.4112643003463745, - "learning_rate": 9.35035175879397e-05, - "loss": 5.9646, - "step": 6966 - }, - { - "epoch": 3.6333767926988267, - "grad_norm": 1.51386559009552, - "learning_rate": 9.350251256281406e-05, - "loss": 5.7332, - "step": 6967 - }, - { - "epoch": 3.6338983050847458, - "grad_norm": 1.3717272281646729, - "learning_rate": 9.350150753768844e-05, - "loss": 6.0705, - "step": 6968 - }, - { - "epoch": 3.634419817470665, - "grad_norm": 1.387502670288086, - "learning_rate": 9.350050251256282e-05, - "loss": 5.9247, - "step": 6969 - }, - { - "epoch": 3.6349413298565842, - "grad_norm": 1.6512454748153687, - "learning_rate": 9.34994974874372e-05, - "loss": 5.5091, - "step": 6970 - }, - { - "epoch": 3.6354628422425033, - "grad_norm": 1.5581622123718262, - "learning_rate": 9.349849246231156e-05, - "loss": 5.4997, - "step": 6971 - }, - { - "epoch": 3.6359843546284223, - "grad_norm": 1.5530822277069092, - "learning_rate": 9.349748743718594e-05, - "loss": 5.696, - "step": 6972 - }, - { - "epoch": 3.6365058670143418, - "grad_norm": 1.4497745037078857, - "learning_rate": 9.34964824120603e-05, - "loss": 5.6685, - "step": 6973 - }, - { - "epoch": 3.6370273794002608, - "grad_norm": 1.4639405012130737, - "learning_rate": 9.349547738693468e-05, - "loss": 5.6884, - "step": 6974 - }, - { - "epoch": 3.63754889178618, - "grad_norm": 1.4224026203155518, - "learning_rate": 9.349447236180905e-05, - "loss": 5.7743, - "step": 6975 - }, - { - "epoch": 3.6380704041720993, - "grad_norm": 1.544215202331543, - "learning_rate": 9.349346733668342e-05, - "loss": 5.6067, - "step": 6976 - }, - { - "epoch": 3.6385919165580183, - "grad_norm": 1.4786514043807983, - "learning_rate": 9.349246231155779e-05, - "loss": 5.7619, - "step": 6977 - }, - { - "epoch": 3.6391134289439373, - "grad_norm": 1.4969890117645264, - "learning_rate": 9.349145728643217e-05, - "loss": 6.003, - "step": 6978 - }, - { - "epoch": 3.6396349413298568, - "grad_norm": 1.5170743465423584, - "learning_rate": 9.349045226130654e-05, - "loss": 5.7452, - "step": 6979 - }, - { - "epoch": 3.640156453715776, - "grad_norm": 1.6823762655258179, - "learning_rate": 9.348944723618091e-05, - "loss": 5.653, - "step": 6980 - }, - { - "epoch": 3.640677966101695, - "grad_norm": 1.380423665046692, - "learning_rate": 9.348844221105529e-05, - "loss": 5.0421, - "step": 6981 - }, - { - "epoch": 3.6411994784876143, - "grad_norm": 1.5066813230514526, - "learning_rate": 9.348743718592965e-05, - "loss": 5.7063, - "step": 6982 - }, - { - "epoch": 3.6417209908735333, - "grad_norm": 1.5106076002120972, - "learning_rate": 9.348643216080403e-05, - "loss": 5.8842, - "step": 6983 - }, - { - "epoch": 3.6422425032594523, - "grad_norm": 1.4765019416809082, - "learning_rate": 9.348542713567839e-05, - "loss": 5.7175, - "step": 6984 - }, - { - "epoch": 3.6427640156453718, - "grad_norm": 1.6228536367416382, - "learning_rate": 9.348442211055277e-05, - "loss": 5.2125, - "step": 6985 - }, - { - "epoch": 3.643285528031291, - "grad_norm": 1.3125760555267334, - "learning_rate": 9.348341708542714e-05, - "loss": 6.1559, - "step": 6986 - }, - { - "epoch": 3.64380704041721, - "grad_norm": 1.5719709396362305, - "learning_rate": 9.348241206030151e-05, - "loss": 5.8974, - "step": 6987 - }, - { - "epoch": 3.6443285528031293, - "grad_norm": 1.5354093313217163, - "learning_rate": 9.348140703517588e-05, - "loss": 5.4638, - "step": 6988 - }, - { - "epoch": 3.6448500651890483, - "grad_norm": 1.613311529159546, - "learning_rate": 9.348040201005026e-05, - "loss": 5.4519, - "step": 6989 - }, - { - "epoch": 3.6453715775749673, - "grad_norm": 1.497916579246521, - "learning_rate": 9.347939698492463e-05, - "loss": 5.4487, - "step": 6990 - }, - { - "epoch": 3.645893089960887, - "grad_norm": 1.4887266159057617, - "learning_rate": 9.3478391959799e-05, - "loss": 5.6188, - "step": 6991 - }, - { - "epoch": 3.646414602346806, - "grad_norm": 1.5240713357925415, - "learning_rate": 9.347738693467338e-05, - "loss": 5.9876, - "step": 6992 - }, - { - "epoch": 3.646936114732725, - "grad_norm": 1.4290990829467773, - "learning_rate": 9.347638190954774e-05, - "loss": 5.8446, - "step": 6993 - }, - { - "epoch": 3.6474576271186443, - "grad_norm": 1.5863919258117676, - "learning_rate": 9.347537688442212e-05, - "loss": 5.6336, - "step": 6994 - }, - { - "epoch": 3.6479791395045633, - "grad_norm": 1.3621937036514282, - "learning_rate": 9.347437185929648e-05, - "loss": 6.2207, - "step": 6995 - }, - { - "epoch": 3.6485006518904823, - "grad_norm": 1.3958585262298584, - "learning_rate": 9.347336683417086e-05, - "loss": 6.0533, - "step": 6996 - }, - { - "epoch": 3.649022164276402, - "grad_norm": 1.386458158493042, - "learning_rate": 9.347236180904522e-05, - "loss": 5.9849, - "step": 6997 - }, - { - "epoch": 3.649543676662321, - "grad_norm": 1.4610226154327393, - "learning_rate": 9.34713567839196e-05, - "loss": 5.4203, - "step": 6998 - }, - { - "epoch": 3.65006518904824, - "grad_norm": 1.4660662412643433, - "learning_rate": 9.347035175879398e-05, - "loss": 5.7817, - "step": 6999 - }, - { - "epoch": 3.6505867014341593, - "grad_norm": 1.5710549354553223, - "learning_rate": 9.346934673366836e-05, - "loss": 5.1513, - "step": 7000 - }, - { - "epoch": 3.6511082138200783, - "grad_norm": 1.4879204034805298, - "learning_rate": 9.346834170854272e-05, - "loss": 5.897, - "step": 7001 - }, - { - "epoch": 3.6516297262059974, - "grad_norm": 1.4350230693817139, - "learning_rate": 9.34673366834171e-05, - "loss": 5.8321, - "step": 7002 - }, - { - "epoch": 3.652151238591917, - "grad_norm": 1.4425678253173828, - "learning_rate": 9.346633165829146e-05, - "loss": 5.7894, - "step": 7003 - }, - { - "epoch": 3.652672750977836, - "grad_norm": 1.3172986507415771, - "learning_rate": 9.346532663316583e-05, - "loss": 5.9681, - "step": 7004 - }, - { - "epoch": 3.653194263363755, - "grad_norm": 1.5918426513671875, - "learning_rate": 9.34643216080402e-05, - "loss": 5.0853, - "step": 7005 - }, - { - "epoch": 3.6537157757496743, - "grad_norm": 1.641069769859314, - "learning_rate": 9.346331658291457e-05, - "loss": 5.8079, - "step": 7006 - }, - { - "epoch": 3.6542372881355933, - "grad_norm": 1.4998801946640015, - "learning_rate": 9.346231155778895e-05, - "loss": 5.9092, - "step": 7007 - }, - { - "epoch": 3.6547588005215124, - "grad_norm": 1.6488269567489624, - "learning_rate": 9.346130653266331e-05, - "loss": 5.709, - "step": 7008 - }, - { - "epoch": 3.655280312907432, - "grad_norm": 1.4858925342559814, - "learning_rate": 9.346030150753769e-05, - "loss": 5.5682, - "step": 7009 - }, - { - "epoch": 3.655801825293351, - "grad_norm": 1.4368996620178223, - "learning_rate": 9.345929648241207e-05, - "loss": 5.9772, - "step": 7010 - }, - { - "epoch": 3.65632333767927, - "grad_norm": 1.4265004396438599, - "learning_rate": 9.345829145728645e-05, - "loss": 4.7863, - "step": 7011 - }, - { - "epoch": 3.656844850065189, - "grad_norm": 1.5192221403121948, - "learning_rate": 9.345728643216081e-05, - "loss": 5.8857, - "step": 7012 - }, - { - "epoch": 3.6573663624511084, - "grad_norm": 1.505833625793457, - "learning_rate": 9.345628140703519e-05, - "loss": 6.1111, - "step": 7013 - }, - { - "epoch": 3.6578878748370274, - "grad_norm": 1.5164717435836792, - "learning_rate": 9.345527638190955e-05, - "loss": 5.1709, - "step": 7014 - }, - { - "epoch": 3.6584093872229464, - "grad_norm": 1.4406355619430542, - "learning_rate": 9.345427135678393e-05, - "loss": 5.8385, - "step": 7015 - }, - { - "epoch": 3.658930899608866, - "grad_norm": 1.5299161672592163, - "learning_rate": 9.34532663316583e-05, - "loss": 5.6954, - "step": 7016 - }, - { - "epoch": 3.659452411994785, - "grad_norm": 1.488677740097046, - "learning_rate": 9.345226130653266e-05, - "loss": 5.8737, - "step": 7017 - }, - { - "epoch": 3.659973924380704, - "grad_norm": 1.5530858039855957, - "learning_rate": 9.345125628140704e-05, - "loss": 5.8156, - "step": 7018 - }, - { - "epoch": 3.6604954367666234, - "grad_norm": 1.3230679035186768, - "learning_rate": 9.345025125628141e-05, - "loss": 6.2401, - "step": 7019 - }, - { - "epoch": 3.6610169491525424, - "grad_norm": 1.3270398378372192, - "learning_rate": 9.344924623115579e-05, - "loss": 6.1277, - "step": 7020 - }, - { - "epoch": 3.6615384615384614, - "grad_norm": 1.384616732597351, - "learning_rate": 9.344824120603016e-05, - "loss": 5.9564, - "step": 7021 - }, - { - "epoch": 3.6620599739243804, - "grad_norm": 1.5136234760284424, - "learning_rate": 9.344723618090453e-05, - "loss": 5.829, - "step": 7022 - }, - { - "epoch": 3.6625814863103, - "grad_norm": 1.5446760654449463, - "learning_rate": 9.34462311557789e-05, - "loss": 5.339, - "step": 7023 - }, - { - "epoch": 3.663102998696219, - "grad_norm": 1.3896344900131226, - "learning_rate": 9.344522613065328e-05, - "loss": 6.0225, - "step": 7024 - }, - { - "epoch": 3.663624511082138, - "grad_norm": 1.495092749595642, - "learning_rate": 9.344422110552764e-05, - "loss": 5.5607, - "step": 7025 - }, - { - "epoch": 3.6641460234680574, - "grad_norm": 1.6290724277496338, - "learning_rate": 9.344321608040202e-05, - "loss": 5.3952, - "step": 7026 - }, - { - "epoch": 3.6646675358539764, - "grad_norm": 1.4316827058792114, - "learning_rate": 9.344221105527638e-05, - "loss": 5.9642, - "step": 7027 - }, - { - "epoch": 3.6651890482398954, - "grad_norm": 1.439576268196106, - "learning_rate": 9.344120603015076e-05, - "loss": 5.4803, - "step": 7028 - }, - { - "epoch": 3.665710560625815, - "grad_norm": 1.4314627647399902, - "learning_rate": 9.344020100502512e-05, - "loss": 5.7772, - "step": 7029 - }, - { - "epoch": 3.666232073011734, - "grad_norm": 1.5099279880523682, - "learning_rate": 9.34391959798995e-05, - "loss": 5.6698, - "step": 7030 - }, - { - "epoch": 3.666753585397653, - "grad_norm": 1.5364707708358765, - "learning_rate": 9.343819095477388e-05, - "loss": 5.5044, - "step": 7031 - }, - { - "epoch": 3.6672750977835724, - "grad_norm": 1.4501142501831055, - "learning_rate": 9.343718592964824e-05, - "loss": 5.6192, - "step": 7032 - }, - { - "epoch": 3.6677966101694914, - "grad_norm": 1.4350626468658447, - "learning_rate": 9.343618090452262e-05, - "loss": 5.3731, - "step": 7033 - }, - { - "epoch": 3.6683181225554105, - "grad_norm": 1.4982316493988037, - "learning_rate": 9.343517587939699e-05, - "loss": 5.8832, - "step": 7034 - }, - { - "epoch": 3.66883963494133, - "grad_norm": 1.384792685508728, - "learning_rate": 9.343417085427136e-05, - "loss": 5.831, - "step": 7035 - }, - { - "epoch": 3.669361147327249, - "grad_norm": 1.438024640083313, - "learning_rate": 9.343316582914573e-05, - "loss": 5.4215, - "step": 7036 - }, - { - "epoch": 3.669882659713168, - "grad_norm": 1.4174267053604126, - "learning_rate": 9.34321608040201e-05, - "loss": 5.9711, - "step": 7037 - }, - { - "epoch": 3.6704041720990874, - "grad_norm": 1.354045033454895, - "learning_rate": 9.343115577889447e-05, - "loss": 6.183, - "step": 7038 - }, - { - "epoch": 3.6709256844850064, - "grad_norm": 1.460918664932251, - "learning_rate": 9.343015075376885e-05, - "loss": 6.2018, - "step": 7039 - }, - { - "epoch": 3.6714471968709255, - "grad_norm": 1.8349027633666992, - "learning_rate": 9.342914572864321e-05, - "loss": 5.0873, - "step": 7040 - }, - { - "epoch": 3.671968709256845, - "grad_norm": 1.5097715854644775, - "learning_rate": 9.342814070351759e-05, - "loss": 6.0222, - "step": 7041 - }, - { - "epoch": 3.672490221642764, - "grad_norm": 1.5525531768798828, - "learning_rate": 9.342713567839197e-05, - "loss": 5.6614, - "step": 7042 - }, - { - "epoch": 3.673011734028683, - "grad_norm": 1.4713733196258545, - "learning_rate": 9.342613065326635e-05, - "loss": 5.3081, - "step": 7043 - }, - { - "epoch": 3.6735332464146024, - "grad_norm": 1.4880425930023193, - "learning_rate": 9.342512562814071e-05, - "loss": 5.7003, - "step": 7044 - }, - { - "epoch": 3.6740547588005215, - "grad_norm": 1.4369738101959229, - "learning_rate": 9.342412060301507e-05, - "loss": 5.4863, - "step": 7045 - }, - { - "epoch": 3.6745762711864405, - "grad_norm": 1.4913148880004883, - "learning_rate": 9.342311557788945e-05, - "loss": 5.5362, - "step": 7046 - }, - { - "epoch": 3.67509778357236, - "grad_norm": 1.344566822052002, - "learning_rate": 9.342211055276382e-05, - "loss": 6.1305, - "step": 7047 - }, - { - "epoch": 3.675619295958279, - "grad_norm": 1.4426178932189941, - "learning_rate": 9.34211055276382e-05, - "loss": 4.6851, - "step": 7048 - }, - { - "epoch": 3.676140808344198, - "grad_norm": 1.4049689769744873, - "learning_rate": 9.342010050251256e-05, - "loss": 5.5067, - "step": 7049 - }, - { - "epoch": 3.6766623207301175, - "grad_norm": 1.4476603269577026, - "learning_rate": 9.341909547738694e-05, - "loss": 5.8957, - "step": 7050 - }, - { - "epoch": 3.6771838331160365, - "grad_norm": 1.3590470552444458, - "learning_rate": 9.341809045226131e-05, - "loss": 6.0658, - "step": 7051 - }, - { - "epoch": 3.6777053455019555, - "grad_norm": 1.5423229932785034, - "learning_rate": 9.341708542713569e-05, - "loss": 5.4093, - "step": 7052 - }, - { - "epoch": 3.678226857887875, - "grad_norm": 1.639768362045288, - "learning_rate": 9.341608040201006e-05, - "loss": 5.525, - "step": 7053 - }, - { - "epoch": 3.678748370273794, - "grad_norm": 1.4362624883651733, - "learning_rate": 9.341507537688443e-05, - "loss": 5.879, - "step": 7054 - }, - { - "epoch": 3.679269882659713, - "grad_norm": 1.3900130987167358, - "learning_rate": 9.34140703517588e-05, - "loss": 5.7807, - "step": 7055 - }, - { - "epoch": 3.6797913950456325, - "grad_norm": 1.4527782201766968, - "learning_rate": 9.341306532663318e-05, - "loss": 4.9261, - "step": 7056 - }, - { - "epoch": 3.6803129074315515, - "grad_norm": 1.4593982696533203, - "learning_rate": 9.341206030150754e-05, - "loss": 5.9209, - "step": 7057 - }, - { - "epoch": 3.6808344198174705, - "grad_norm": 1.4559870958328247, - "learning_rate": 9.34110552763819e-05, - "loss": 5.8339, - "step": 7058 - }, - { - "epoch": 3.68135593220339, - "grad_norm": 1.3414433002471924, - "learning_rate": 9.341005025125628e-05, - "loss": 5.5587, - "step": 7059 - }, - { - "epoch": 3.681877444589309, - "grad_norm": 1.4395246505737305, - "learning_rate": 9.340904522613065e-05, - "loss": 5.7092, - "step": 7060 - }, - { - "epoch": 3.682398956975228, - "grad_norm": 1.5973732471466064, - "learning_rate": 9.340804020100503e-05, - "loss": 5.2823, - "step": 7061 - }, - { - "epoch": 3.6829204693611475, - "grad_norm": 1.5057066679000854, - "learning_rate": 9.34070351758794e-05, - "loss": 5.2951, - "step": 7062 - }, - { - "epoch": 3.6834419817470665, - "grad_norm": 1.3808677196502686, - "learning_rate": 9.340603015075378e-05, - "loss": 5.8972, - "step": 7063 - }, - { - "epoch": 3.6839634941329855, - "grad_norm": 1.3882790803909302, - "learning_rate": 9.340502512562814e-05, - "loss": 5.8086, - "step": 7064 - }, - { - "epoch": 3.684485006518905, - "grad_norm": 1.5697991847991943, - "learning_rate": 9.340402010050252e-05, - "loss": 5.7429, - "step": 7065 - }, - { - "epoch": 3.685006518904824, - "grad_norm": 1.5416051149368286, - "learning_rate": 9.340301507537689e-05, - "loss": 5.687, - "step": 7066 - }, - { - "epoch": 3.685528031290743, - "grad_norm": 1.4893791675567627, - "learning_rate": 9.340201005025126e-05, - "loss": 5.7826, - "step": 7067 - }, - { - "epoch": 3.6860495436766625, - "grad_norm": 1.418459177017212, - "learning_rate": 9.340100502512563e-05, - "loss": 5.9258, - "step": 7068 - }, - { - "epoch": 3.6865710560625815, - "grad_norm": 1.4895086288452148, - "learning_rate": 9.340000000000001e-05, - "loss": 5.4678, - "step": 7069 - }, - { - "epoch": 3.6870925684485005, - "grad_norm": 1.5759357213974, - "learning_rate": 9.339899497487437e-05, - "loss": 5.8896, - "step": 7070 - }, - { - "epoch": 3.68761408083442, - "grad_norm": 1.6505433320999146, - "learning_rate": 9.339798994974875e-05, - "loss": 5.4788, - "step": 7071 - }, - { - "epoch": 3.688135593220339, - "grad_norm": 1.3575966358184814, - "learning_rate": 9.339698492462313e-05, - "loss": 6.0762, - "step": 7072 - }, - { - "epoch": 3.688657105606258, - "grad_norm": 1.289697289466858, - "learning_rate": 9.339597989949749e-05, - "loss": 5.3123, - "step": 7073 - }, - { - "epoch": 3.6891786179921775, - "grad_norm": 1.7307019233703613, - "learning_rate": 9.339497487437187e-05, - "loss": 5.3798, - "step": 7074 - }, - { - "epoch": 3.6897001303780965, - "grad_norm": 1.558272123336792, - "learning_rate": 9.339396984924623e-05, - "loss": 5.5509, - "step": 7075 - }, - { - "epoch": 3.6902216427640155, - "grad_norm": 1.4052753448486328, - "learning_rate": 9.339296482412061e-05, - "loss": 5.7596, - "step": 7076 - }, - { - "epoch": 3.690743155149935, - "grad_norm": 1.4011518955230713, - "learning_rate": 9.339195979899498e-05, - "loss": 5.7918, - "step": 7077 - }, - { - "epoch": 3.691264667535854, - "grad_norm": 1.5099977254867554, - "learning_rate": 9.339095477386935e-05, - "loss": 5.8228, - "step": 7078 - }, - { - "epoch": 3.691786179921773, - "grad_norm": 1.541614294052124, - "learning_rate": 9.338994974874372e-05, - "loss": 5.3396, - "step": 7079 - }, - { - "epoch": 3.6923076923076925, - "grad_norm": 1.4532945156097412, - "learning_rate": 9.33889447236181e-05, - "loss": 5.6242, - "step": 7080 - }, - { - "epoch": 3.6928292046936115, - "grad_norm": 1.4778698682785034, - "learning_rate": 9.338793969849246e-05, - "loss": 5.9388, - "step": 7081 - }, - { - "epoch": 3.6933507170795306, - "grad_norm": 1.5695003271102905, - "learning_rate": 9.338693467336684e-05, - "loss": 5.2572, - "step": 7082 - }, - { - "epoch": 3.69387222946545, - "grad_norm": 1.472528338432312, - "learning_rate": 9.338592964824122e-05, - "loss": 5.444, - "step": 7083 - }, - { - "epoch": 3.694393741851369, - "grad_norm": 1.4267178773880005, - "learning_rate": 9.338492462311558e-05, - "loss": 6.0963, - "step": 7084 - }, - { - "epoch": 3.694915254237288, - "grad_norm": 1.4427791833877563, - "learning_rate": 9.338391959798996e-05, - "loss": 5.8147, - "step": 7085 - }, - { - "epoch": 3.6954367666232075, - "grad_norm": 1.495205044746399, - "learning_rate": 9.338291457286432e-05, - "loss": 5.8751, - "step": 7086 - }, - { - "epoch": 3.6959582790091265, - "grad_norm": 1.6204521656036377, - "learning_rate": 9.33819095477387e-05, - "loss": 5.5026, - "step": 7087 - }, - { - "epoch": 3.6964797913950456, - "grad_norm": 1.5209815502166748, - "learning_rate": 9.338090452261306e-05, - "loss": 5.3233, - "step": 7088 - }, - { - "epoch": 3.697001303780965, - "grad_norm": 1.6296061277389526, - "learning_rate": 9.337989949748744e-05, - "loss": 5.6748, - "step": 7089 - }, - { - "epoch": 3.697522816166884, - "grad_norm": 1.5483607053756714, - "learning_rate": 9.33788944723618e-05, - "loss": 5.5865, - "step": 7090 - }, - { - "epoch": 3.698044328552803, - "grad_norm": 1.5365935564041138, - "learning_rate": 9.337788944723618e-05, - "loss": 5.4778, - "step": 7091 - }, - { - "epoch": 3.6985658409387225, - "grad_norm": 1.5664517879486084, - "learning_rate": 9.337688442211056e-05, - "loss": 5.837, - "step": 7092 - }, - { - "epoch": 3.6990873533246416, - "grad_norm": 1.3794665336608887, - "learning_rate": 9.337587939698494e-05, - "loss": 5.8759, - "step": 7093 - }, - { - "epoch": 3.6996088657105606, - "grad_norm": 1.7324854135513306, - "learning_rate": 9.33748743718593e-05, - "loss": 5.4481, - "step": 7094 - }, - { - "epoch": 3.70013037809648, - "grad_norm": 1.5653431415557861, - "learning_rate": 9.337386934673368e-05, - "loss": 5.6578, - "step": 7095 - }, - { - "epoch": 3.700651890482399, - "grad_norm": 1.7156696319580078, - "learning_rate": 9.337286432160805e-05, - "loss": 5.1855, - "step": 7096 - }, - { - "epoch": 3.701173402868318, - "grad_norm": 1.4905757904052734, - "learning_rate": 9.337185929648241e-05, - "loss": 5.448, - "step": 7097 - }, - { - "epoch": 3.7016949152542376, - "grad_norm": 1.4102139472961426, - "learning_rate": 9.337085427135679e-05, - "loss": 5.8454, - "step": 7098 - }, - { - "epoch": 3.7022164276401566, - "grad_norm": 1.7421170473098755, - "learning_rate": 9.336984924623115e-05, - "loss": 5.1253, - "step": 7099 - }, - { - "epoch": 3.7027379400260756, - "grad_norm": 1.5678882598876953, - "learning_rate": 9.336884422110553e-05, - "loss": 5.998, - "step": 7100 - }, - { - "epoch": 3.703259452411995, - "grad_norm": 1.7802516222000122, - "learning_rate": 9.33678391959799e-05, - "loss": 5.5837, - "step": 7101 - }, - { - "epoch": 3.703780964797914, - "grad_norm": 1.3431756496429443, - "learning_rate": 9.336683417085427e-05, - "loss": 5.6267, - "step": 7102 - }, - { - "epoch": 3.704302477183833, - "grad_norm": 1.5176632404327393, - "learning_rate": 9.336582914572865e-05, - "loss": 5.7622, - "step": 7103 - }, - { - "epoch": 3.704823989569752, - "grad_norm": 1.433733582496643, - "learning_rate": 9.336482412060303e-05, - "loss": 5.8306, - "step": 7104 - }, - { - "epoch": 3.7053455019556716, - "grad_norm": 1.4294573068618774, - "learning_rate": 9.336381909547739e-05, - "loss": 6.1294, - "step": 7105 - }, - { - "epoch": 3.7058670143415906, - "grad_norm": 1.5673069953918457, - "learning_rate": 9.336281407035177e-05, - "loss": 5.3017, - "step": 7106 - }, - { - "epoch": 3.7063885267275096, - "grad_norm": 1.733873724937439, - "learning_rate": 9.336180904522613e-05, - "loss": 5.9482, - "step": 7107 - }, - { - "epoch": 3.706910039113429, - "grad_norm": 1.4529964923858643, - "learning_rate": 9.336080402010051e-05, - "loss": 5.7361, - "step": 7108 - }, - { - "epoch": 3.707431551499348, - "grad_norm": 1.461370587348938, - "learning_rate": 9.335979899497488e-05, - "loss": 5.9837, - "step": 7109 - }, - { - "epoch": 3.707953063885267, - "grad_norm": 1.5137066841125488, - "learning_rate": 9.335879396984924e-05, - "loss": 5.6109, - "step": 7110 - }, - { - "epoch": 3.7084745762711866, - "grad_norm": 1.4195433855056763, - "learning_rate": 9.335778894472362e-05, - "loss": 5.7316, - "step": 7111 - }, - { - "epoch": 3.7089960886571056, - "grad_norm": 1.3399107456207275, - "learning_rate": 9.3356783919598e-05, - "loss": 6.1522, - "step": 7112 - }, - { - "epoch": 3.7095176010430246, - "grad_norm": 1.4708726406097412, - "learning_rate": 9.335577889447237e-05, - "loss": 5.6083, - "step": 7113 - }, - { - "epoch": 3.710039113428944, - "grad_norm": 1.3465808629989624, - "learning_rate": 9.335477386934674e-05, - "loss": 5.6968, - "step": 7114 - }, - { - "epoch": 3.710560625814863, - "grad_norm": 1.5656622648239136, - "learning_rate": 9.335376884422112e-05, - "loss": 5.6475, - "step": 7115 - }, - { - "epoch": 3.711082138200782, - "grad_norm": 1.558059811592102, - "learning_rate": 9.335276381909548e-05, - "loss": 4.6389, - "step": 7116 - }, - { - "epoch": 3.711603650586701, - "grad_norm": 1.4750794172286987, - "learning_rate": 9.335175879396986e-05, - "loss": 5.6088, - "step": 7117 - }, - { - "epoch": 3.7121251629726206, - "grad_norm": 1.557699203491211, - "learning_rate": 9.335075376884422e-05, - "loss": 5.8315, - "step": 7118 - }, - { - "epoch": 3.7126466753585396, - "grad_norm": 1.6379343271255493, - "learning_rate": 9.33497487437186e-05, - "loss": 6.1365, - "step": 7119 - }, - { - "epoch": 3.7131681877444587, - "grad_norm": 1.4784435033798218, - "learning_rate": 9.334874371859296e-05, - "loss": 5.7336, - "step": 7120 - }, - { - "epoch": 3.713689700130378, - "grad_norm": 1.3685170412063599, - "learning_rate": 9.334773869346734e-05, - "loss": 6.0237, - "step": 7121 - }, - { - "epoch": 3.714211212516297, - "grad_norm": 1.3943976163864136, - "learning_rate": 9.33467336683417e-05, - "loss": 5.8507, - "step": 7122 - }, - { - "epoch": 3.714732724902216, - "grad_norm": 1.4288177490234375, - "learning_rate": 9.334572864321608e-05, - "loss": 5.337, - "step": 7123 - }, - { - "epoch": 3.7152542372881356, - "grad_norm": 1.4738540649414062, - "learning_rate": 9.334472361809046e-05, - "loss": 5.6156, - "step": 7124 - }, - { - "epoch": 3.7157757496740547, - "grad_norm": 1.4455350637435913, - "learning_rate": 9.334371859296483e-05, - "loss": 5.6818, - "step": 7125 - }, - { - "epoch": 3.7162972620599737, - "grad_norm": 1.4057294130325317, - "learning_rate": 9.33427135678392e-05, - "loss": 5.8931, - "step": 7126 - }, - { - "epoch": 3.716818774445893, - "grad_norm": 1.3574397563934326, - "learning_rate": 9.334170854271357e-05, - "loss": 5.7867, - "step": 7127 - }, - { - "epoch": 3.717340286831812, - "grad_norm": 1.3998303413391113, - "learning_rate": 9.334070351758795e-05, - "loss": 5.6385, - "step": 7128 - }, - { - "epoch": 3.717861799217731, - "grad_norm": 1.4585435390472412, - "learning_rate": 9.333969849246231e-05, - "loss": 5.9325, - "step": 7129 - }, - { - "epoch": 3.7183833116036507, - "grad_norm": 1.439855933189392, - "learning_rate": 9.333869346733669e-05, - "loss": 6.0231, - "step": 7130 - }, - { - "epoch": 3.7189048239895697, - "grad_norm": 1.503981351852417, - "learning_rate": 9.333768844221105e-05, - "loss": 5.5094, - "step": 7131 - }, - { - "epoch": 3.7194263363754887, - "grad_norm": 1.5188429355621338, - "learning_rate": 9.333668341708543e-05, - "loss": 5.9689, - "step": 7132 - }, - { - "epoch": 3.719947848761408, - "grad_norm": 1.443252444267273, - "learning_rate": 9.333567839195981e-05, - "loss": 5.6526, - "step": 7133 - }, - { - "epoch": 3.720469361147327, - "grad_norm": 1.598919153213501, - "learning_rate": 9.333467336683419e-05, - "loss": 5.6078, - "step": 7134 - }, - { - "epoch": 3.720990873533246, - "grad_norm": 1.4989668130874634, - "learning_rate": 9.333366834170855e-05, - "loss": 5.6883, - "step": 7135 - }, - { - "epoch": 3.7215123859191657, - "grad_norm": 1.5961953401565552, - "learning_rate": 9.333266331658293e-05, - "loss": 5.4955, - "step": 7136 - }, - { - "epoch": 3.7220338983050847, - "grad_norm": 1.37103271484375, - "learning_rate": 9.333165829145729e-05, - "loss": 5.6774, - "step": 7137 - }, - { - "epoch": 3.7225554106910037, - "grad_norm": 1.4646453857421875, - "learning_rate": 9.333065326633166e-05, - "loss": 5.9224, - "step": 7138 - }, - { - "epoch": 3.723076923076923, - "grad_norm": 1.4542596340179443, - "learning_rate": 9.332964824120603e-05, - "loss": 5.8611, - "step": 7139 - }, - { - "epoch": 3.723598435462842, - "grad_norm": 1.4691792726516724, - "learning_rate": 9.33286432160804e-05, - "loss": 5.7376, - "step": 7140 - }, - { - "epoch": 3.724119947848761, - "grad_norm": 1.433346152305603, - "learning_rate": 9.332763819095478e-05, - "loss": 5.8587, - "step": 7141 - }, - { - "epoch": 3.7246414602346807, - "grad_norm": 1.317259430885315, - "learning_rate": 9.332663316582914e-05, - "loss": 6.1521, - "step": 7142 - }, - { - "epoch": 3.7251629726205997, - "grad_norm": 1.4735013246536255, - "learning_rate": 9.332562814070352e-05, - "loss": 5.9624, - "step": 7143 - }, - { - "epoch": 3.7256844850065187, - "grad_norm": 1.5436524152755737, - "learning_rate": 9.33246231155779e-05, - "loss": 5.2357, - "step": 7144 - }, - { - "epoch": 3.726205997392438, - "grad_norm": 1.4885532855987549, - "learning_rate": 9.332361809045227e-05, - "loss": 5.7375, - "step": 7145 - }, - { - "epoch": 3.726727509778357, - "grad_norm": 1.477995753288269, - "learning_rate": 9.332261306532664e-05, - "loss": 5.643, - "step": 7146 - }, - { - "epoch": 3.7272490221642762, - "grad_norm": 1.4834468364715576, - "learning_rate": 9.332160804020102e-05, - "loss": 5.8137, - "step": 7147 - }, - { - "epoch": 3.7277705345501957, - "grad_norm": 1.504822015762329, - "learning_rate": 9.332060301507538e-05, - "loss": 5.8265, - "step": 7148 - }, - { - "epoch": 3.7282920469361147, - "grad_norm": 1.5279245376586914, - "learning_rate": 9.331959798994976e-05, - "loss": 5.7722, - "step": 7149 - }, - { - "epoch": 3.7288135593220337, - "grad_norm": 1.4921904802322388, - "learning_rate": 9.331859296482412e-05, - "loss": 5.8326, - "step": 7150 - }, - { - "epoch": 3.729335071707953, - "grad_norm": 1.4843816757202148, - "learning_rate": 9.331758793969849e-05, - "loss": 5.2011, - "step": 7151 - }, - { - "epoch": 3.729856584093872, - "grad_norm": 1.505672812461853, - "learning_rate": 9.331658291457287e-05, - "loss": 6.1325, - "step": 7152 - }, - { - "epoch": 3.7303780964797912, - "grad_norm": 1.4861228466033936, - "learning_rate": 9.331557788944724e-05, - "loss": 5.906, - "step": 7153 - }, - { - "epoch": 3.7308996088657107, - "grad_norm": 1.4283781051635742, - "learning_rate": 9.331457286432162e-05, - "loss": 5.8118, - "step": 7154 - }, - { - "epoch": 3.7314211212516297, - "grad_norm": 1.4170303344726562, - "learning_rate": 9.331356783919599e-05, - "loss": 6.1711, - "step": 7155 - }, - { - "epoch": 3.7319426336375487, - "grad_norm": 1.455032467842102, - "learning_rate": 9.331256281407036e-05, - "loss": 5.7403, - "step": 7156 - }, - { - "epoch": 3.732464146023468, - "grad_norm": 1.6105223894119263, - "learning_rate": 9.331155778894473e-05, - "loss": 5.7563, - "step": 7157 - }, - { - "epoch": 3.7329856584093872, - "grad_norm": 1.6175833940505981, - "learning_rate": 9.33105527638191e-05, - "loss": 5.7889, - "step": 7158 - }, - { - "epoch": 3.7335071707953063, - "grad_norm": 1.622033715248108, - "learning_rate": 9.330954773869347e-05, - "loss": 5.825, - "step": 7159 - }, - { - "epoch": 3.7340286831812257, - "grad_norm": 1.4206181764602661, - "learning_rate": 9.330854271356785e-05, - "loss": 5.7173, - "step": 7160 - }, - { - "epoch": 3.7345501955671447, - "grad_norm": 1.5119035243988037, - "learning_rate": 9.330753768844221e-05, - "loss": 5.3716, - "step": 7161 - }, - { - "epoch": 3.7350717079530638, - "grad_norm": 1.4509031772613525, - "learning_rate": 9.330653266331659e-05, - "loss": 6.0847, - "step": 7162 - }, - { - "epoch": 3.7355932203389832, - "grad_norm": 1.4662338495254517, - "learning_rate": 9.330552763819095e-05, - "loss": 6.0955, - "step": 7163 - }, - { - "epoch": 3.7361147327249022, - "grad_norm": 1.455979585647583, - "learning_rate": 9.330452261306533e-05, - "loss": 6.0037, - "step": 7164 - }, - { - "epoch": 3.7366362451108213, - "grad_norm": 1.4780399799346924, - "learning_rate": 9.330351758793971e-05, - "loss": 5.7048, - "step": 7165 - }, - { - "epoch": 3.7371577574967407, - "grad_norm": 1.5267775058746338, - "learning_rate": 9.330251256281407e-05, - "loss": 5.4724, - "step": 7166 - }, - { - "epoch": 3.7376792698826597, - "grad_norm": 1.3959031105041504, - "learning_rate": 9.330150753768845e-05, - "loss": 5.4785, - "step": 7167 - }, - { - "epoch": 3.7382007822685788, - "grad_norm": 1.5438158512115479, - "learning_rate": 9.330050251256282e-05, - "loss": 5.5544, - "step": 7168 - }, - { - "epoch": 3.7387222946544982, - "grad_norm": 1.5348466634750366, - "learning_rate": 9.32994974874372e-05, - "loss": 5.9541, - "step": 7169 - }, - { - "epoch": 3.7392438070404173, - "grad_norm": 1.3930391073226929, - "learning_rate": 9.329849246231156e-05, - "loss": 5.9569, - "step": 7170 - }, - { - "epoch": 3.7397653194263363, - "grad_norm": 1.515562653541565, - "learning_rate": 9.329748743718594e-05, - "loss": 5.8101, - "step": 7171 - }, - { - "epoch": 3.7402868318122557, - "grad_norm": 1.501370906829834, - "learning_rate": 9.32964824120603e-05, - "loss": 5.5987, - "step": 7172 - }, - { - "epoch": 3.7408083441981748, - "grad_norm": 1.5222851037979126, - "learning_rate": 9.329547738693468e-05, - "loss": 5.6979, - "step": 7173 - }, - { - "epoch": 3.741329856584094, - "grad_norm": 1.3771880865097046, - "learning_rate": 9.329447236180906e-05, - "loss": 5.9444, - "step": 7174 - }, - { - "epoch": 3.7418513689700132, - "grad_norm": 1.4558560848236084, - "learning_rate": 9.329346733668343e-05, - "loss": 5.8133, - "step": 7175 - }, - { - "epoch": 3.7423728813559323, - "grad_norm": 1.453021764755249, - "learning_rate": 9.32924623115578e-05, - "loss": 5.996, - "step": 7176 - }, - { - "epoch": 3.7428943937418513, - "grad_norm": 1.611220359802246, - "learning_rate": 9.329145728643216e-05, - "loss": 5.7464, - "step": 7177 - }, - { - "epoch": 3.7434159061277708, - "grad_norm": 1.5124820470809937, - "learning_rate": 9.329045226130654e-05, - "loss": 5.4252, - "step": 7178 - }, - { - "epoch": 3.7439374185136898, - "grad_norm": 1.552105188369751, - "learning_rate": 9.32894472361809e-05, - "loss": 6.0202, - "step": 7179 - }, - { - "epoch": 3.744458930899609, - "grad_norm": 1.4051285982131958, - "learning_rate": 9.328844221105528e-05, - "loss": 6.0398, - "step": 7180 - }, - { - "epoch": 3.7449804432855283, - "grad_norm": 1.3960286378860474, - "learning_rate": 9.328743718592965e-05, - "loss": 5.9059, - "step": 7181 - }, - { - "epoch": 3.7455019556714473, - "grad_norm": 1.5719902515411377, - "learning_rate": 9.328643216080402e-05, - "loss": 5.3359, - "step": 7182 - }, - { - "epoch": 3.7460234680573663, - "grad_norm": 1.4371936321258545, - "learning_rate": 9.328542713567839e-05, - "loss": 5.9243, - "step": 7183 - }, - { - "epoch": 3.7465449804432858, - "grad_norm": 1.4647128582000732, - "learning_rate": 9.328442211055277e-05, - "loss": 6.0369, - "step": 7184 - }, - { - "epoch": 3.747066492829205, - "grad_norm": 1.344878911972046, - "learning_rate": 9.328341708542714e-05, - "loss": 6.0636, - "step": 7185 - }, - { - "epoch": 3.747588005215124, - "grad_norm": 1.4798606634140015, - "learning_rate": 9.328241206030152e-05, - "loss": 5.7759, - "step": 7186 - }, - { - "epoch": 3.7481095176010433, - "grad_norm": 1.374454140663147, - "learning_rate": 9.328140703517589e-05, - "loss": 6.09, - "step": 7187 - }, - { - "epoch": 3.7486310299869623, - "grad_norm": 1.3482415676116943, - "learning_rate": 9.328040201005026e-05, - "loss": 5.8672, - "step": 7188 - }, - { - "epoch": 3.7491525423728813, - "grad_norm": 1.3266233205795288, - "learning_rate": 9.327939698492463e-05, - "loss": 5.8613, - "step": 7189 - }, - { - "epoch": 3.749674054758801, - "grad_norm": 1.8683648109436035, - "learning_rate": 9.327839195979899e-05, - "loss": 5.1099, - "step": 7190 - }, - { - "epoch": 3.75019556714472, - "grad_norm": 1.4043333530426025, - "learning_rate": 9.327738693467337e-05, - "loss": 5.8037, - "step": 7191 - }, - { - "epoch": 3.750717079530639, - "grad_norm": 1.4409458637237549, - "learning_rate": 9.327638190954773e-05, - "loss": 4.9895, - "step": 7192 - }, - { - "epoch": 3.7512385919165583, - "grad_norm": 1.4674501419067383, - "learning_rate": 9.327537688442211e-05, - "loss": 5.6206, - "step": 7193 - }, - { - "epoch": 3.7517601043024773, - "grad_norm": 1.5053274631500244, - "learning_rate": 9.327437185929648e-05, - "loss": 5.702, - "step": 7194 - }, - { - "epoch": 3.7522816166883963, - "grad_norm": 1.4777790307998657, - "learning_rate": 9.327336683417085e-05, - "loss": 5.8974, - "step": 7195 - }, - { - "epoch": 3.752803129074316, - "grad_norm": 1.8290059566497803, - "learning_rate": 9.327236180904523e-05, - "loss": 5.56, - "step": 7196 - }, - { - "epoch": 3.753324641460235, - "grad_norm": 1.438625693321228, - "learning_rate": 9.327135678391961e-05, - "loss": 5.5376, - "step": 7197 - }, - { - "epoch": 3.753846153846154, - "grad_norm": 1.3616231679916382, - "learning_rate": 9.327035175879397e-05, - "loss": 6.0821, - "step": 7198 - }, - { - "epoch": 3.754367666232073, - "grad_norm": 1.5707908868789673, - "learning_rate": 9.326934673366835e-05, - "loss": 5.2319, - "step": 7199 - }, - { - "epoch": 3.7548891786179923, - "grad_norm": 1.7347354888916016, - "learning_rate": 9.326834170854272e-05, - "loss": 5.4574, - "step": 7200 - }, - { - "epoch": 3.7554106910039113, - "grad_norm": 1.454602837562561, - "learning_rate": 9.32673366834171e-05, - "loss": 5.5007, - "step": 7201 - }, - { - "epoch": 3.7559322033898304, - "grad_norm": 1.3563610315322876, - "learning_rate": 9.326633165829146e-05, - "loss": 5.788, - "step": 7202 - }, - { - "epoch": 3.75645371577575, - "grad_norm": 1.3581563234329224, - "learning_rate": 9.326532663316582e-05, - "loss": 5.7897, - "step": 7203 - }, - { - "epoch": 3.756975228161669, - "grad_norm": 1.5297315120697021, - "learning_rate": 9.32643216080402e-05, - "loss": 5.8699, - "step": 7204 - }, - { - "epoch": 3.757496740547588, - "grad_norm": 1.4880070686340332, - "learning_rate": 9.326331658291458e-05, - "loss": 5.7665, - "step": 7205 - }, - { - "epoch": 3.7580182529335073, - "grad_norm": 1.4058812856674194, - "learning_rate": 9.326231155778896e-05, - "loss": 5.7026, - "step": 7206 - }, - { - "epoch": 3.7585397653194264, - "grad_norm": 1.4285526275634766, - "learning_rate": 9.326130653266332e-05, - "loss": 6.0716, - "step": 7207 - }, - { - "epoch": 3.7590612777053454, - "grad_norm": 1.393763542175293, - "learning_rate": 9.32603015075377e-05, - "loss": 5.93, - "step": 7208 - }, - { - "epoch": 3.7595827900912644, - "grad_norm": 1.5102362632751465, - "learning_rate": 9.325929648241206e-05, - "loss": 5.5759, - "step": 7209 - }, - { - "epoch": 3.760104302477184, - "grad_norm": 1.4420245885849, - "learning_rate": 9.325829145728644e-05, - "loss": 5.6187, - "step": 7210 - }, - { - "epoch": 3.760625814863103, - "grad_norm": 1.4810888767242432, - "learning_rate": 9.32572864321608e-05, - "loss": 5.6117, - "step": 7211 - }, - { - "epoch": 3.761147327249022, - "grad_norm": 1.4028464555740356, - "learning_rate": 9.325628140703518e-05, - "loss": 5.9744, - "step": 7212 - }, - { - "epoch": 3.7616688396349414, - "grad_norm": 1.429746150970459, - "learning_rate": 9.325527638190955e-05, - "loss": 5.5089, - "step": 7213 - }, - { - "epoch": 3.7621903520208604, - "grad_norm": 1.6163625717163086, - "learning_rate": 9.325427135678392e-05, - "loss": 5.779, - "step": 7214 - }, - { - "epoch": 3.7627118644067794, - "grad_norm": 1.8043291568756104, - "learning_rate": 9.325326633165829e-05, - "loss": 4.8775, - "step": 7215 - }, - { - "epoch": 3.763233376792699, - "grad_norm": 1.5143229961395264, - "learning_rate": 9.325226130653267e-05, - "loss": 5.9827, - "step": 7216 - }, - { - "epoch": 3.763754889178618, - "grad_norm": 1.4556866884231567, - "learning_rate": 9.325125628140704e-05, - "loss": 5.7868, - "step": 7217 - }, - { - "epoch": 3.764276401564537, - "grad_norm": 1.5184823274612427, - "learning_rate": 9.325025125628141e-05, - "loss": 5.5276, - "step": 7218 - }, - { - "epoch": 3.7647979139504564, - "grad_norm": 1.5395252704620361, - "learning_rate": 9.324924623115579e-05, - "loss": 5.4721, - "step": 7219 - }, - { - "epoch": 3.7653194263363754, - "grad_norm": 1.6493306159973145, - "learning_rate": 9.324824120603015e-05, - "loss": 5.561, - "step": 7220 - }, - { - "epoch": 3.7658409387222944, - "grad_norm": 1.4607335329055786, - "learning_rate": 9.324723618090453e-05, - "loss": 5.5343, - "step": 7221 - }, - { - "epoch": 3.766362451108214, - "grad_norm": 1.5821516513824463, - "learning_rate": 9.324623115577889e-05, - "loss": 5.3913, - "step": 7222 - }, - { - "epoch": 3.766883963494133, - "grad_norm": 1.551813006401062, - "learning_rate": 9.324522613065327e-05, - "loss": 5.892, - "step": 7223 - }, - { - "epoch": 3.767405475880052, - "grad_norm": 1.3982185125350952, - "learning_rate": 9.324422110552764e-05, - "loss": 5.5583, - "step": 7224 - }, - { - "epoch": 3.7679269882659714, - "grad_norm": 1.5022096633911133, - "learning_rate": 9.324321608040201e-05, - "loss": 5.9607, - "step": 7225 - }, - { - "epoch": 3.7684485006518904, - "grad_norm": 1.5564601421356201, - "learning_rate": 9.324221105527639e-05, - "loss": 5.6197, - "step": 7226 - }, - { - "epoch": 3.7689700130378094, - "grad_norm": 1.539781928062439, - "learning_rate": 9.324120603015077e-05, - "loss": 5.7089, - "step": 7227 - }, - { - "epoch": 3.769491525423729, - "grad_norm": 1.4592862129211426, - "learning_rate": 9.324020100502513e-05, - "loss": 5.7603, - "step": 7228 - }, - { - "epoch": 3.770013037809648, - "grad_norm": 1.4448012113571167, - "learning_rate": 9.323919597989951e-05, - "loss": 5.7562, - "step": 7229 - }, - { - "epoch": 3.770534550195567, - "grad_norm": 1.5434327125549316, - "learning_rate": 9.323819095477388e-05, - "loss": 5.8547, - "step": 7230 - }, - { - "epoch": 3.7710560625814864, - "grad_norm": 1.5035698413848877, - "learning_rate": 9.323718592964824e-05, - "loss": 5.2588, - "step": 7231 - }, - { - "epoch": 3.7715775749674054, - "grad_norm": 1.3654375076293945, - "learning_rate": 9.323618090452262e-05, - "loss": 6.1657, - "step": 7232 - }, - { - "epoch": 3.7720990873533244, - "grad_norm": 1.5737384557724, - "learning_rate": 9.323517587939698e-05, - "loss": 5.2929, - "step": 7233 - }, - { - "epoch": 3.772620599739244, - "grad_norm": 1.4058940410614014, - "learning_rate": 9.323417085427136e-05, - "loss": 4.9742, - "step": 7234 - }, - { - "epoch": 3.773142112125163, - "grad_norm": 1.484915018081665, - "learning_rate": 9.323316582914572e-05, - "loss": 5.8895, - "step": 7235 - }, - { - "epoch": 3.773663624511082, - "grad_norm": 1.4383556842803955, - "learning_rate": 9.32321608040201e-05, - "loss": 5.7523, - "step": 7236 - }, - { - "epoch": 3.7741851368970014, - "grad_norm": 1.45618736743927, - "learning_rate": 9.323115577889448e-05, - "loss": 5.9138, - "step": 7237 - }, - { - "epoch": 3.7747066492829204, - "grad_norm": 1.5671652555465698, - "learning_rate": 9.323015075376886e-05, - "loss": 5.4332, - "step": 7238 - }, - { - "epoch": 3.7752281616688395, - "grad_norm": 1.4395960569381714, - "learning_rate": 9.322914572864322e-05, - "loss": 5.4964, - "step": 7239 - }, - { - "epoch": 3.775749674054759, - "grad_norm": 1.4388784170150757, - "learning_rate": 9.32281407035176e-05, - "loss": 6.074, - "step": 7240 - }, - { - "epoch": 3.776271186440678, - "grad_norm": 1.5895932912826538, - "learning_rate": 9.322713567839196e-05, - "loss": 5.6353, - "step": 7241 - }, - { - "epoch": 3.776792698826597, - "grad_norm": 1.6872508525848389, - "learning_rate": 9.322613065326634e-05, - "loss": 5.3505, - "step": 7242 - }, - { - "epoch": 3.7773142112125164, - "grad_norm": 1.4466300010681152, - "learning_rate": 9.32251256281407e-05, - "loss": 5.841, - "step": 7243 - }, - { - "epoch": 3.7778357235984354, - "grad_norm": 1.572801947593689, - "learning_rate": 9.322412060301507e-05, - "loss": 5.8432, - "step": 7244 - }, - { - "epoch": 3.7783572359843545, - "grad_norm": 1.3997225761413574, - "learning_rate": 9.322311557788945e-05, - "loss": 5.8922, - "step": 7245 - }, - { - "epoch": 3.778878748370274, - "grad_norm": 1.5943304300308228, - "learning_rate": 9.322211055276383e-05, - "loss": 5.4997, - "step": 7246 - }, - { - "epoch": 3.779400260756193, - "grad_norm": 1.7823513746261597, - "learning_rate": 9.32211055276382e-05, - "loss": 5.6113, - "step": 7247 - }, - { - "epoch": 3.779921773142112, - "grad_norm": 1.5381051301956177, - "learning_rate": 9.322010050251257e-05, - "loss": 5.6708, - "step": 7248 - }, - { - "epoch": 3.7804432855280314, - "grad_norm": 1.616721510887146, - "learning_rate": 9.321909547738695e-05, - "loss": 5.4968, - "step": 7249 - }, - { - "epoch": 3.7809647979139505, - "grad_norm": 1.6024445295333862, - "learning_rate": 9.321809045226131e-05, - "loss": 5.5316, - "step": 7250 - }, - { - "epoch": 3.7814863102998695, - "grad_norm": 1.5536161661148071, - "learning_rate": 9.321708542713569e-05, - "loss": 5.4711, - "step": 7251 - }, - { - "epoch": 3.782007822685789, - "grad_norm": 1.4964324235916138, - "learning_rate": 9.321608040201005e-05, - "loss": 5.8638, - "step": 7252 - }, - { - "epoch": 3.782529335071708, - "grad_norm": 1.395397663116455, - "learning_rate": 9.321507537688443e-05, - "loss": 5.9046, - "step": 7253 - }, - { - "epoch": 3.783050847457627, - "grad_norm": 1.3754669427871704, - "learning_rate": 9.32140703517588e-05, - "loss": 5.8539, - "step": 7254 - }, - { - "epoch": 3.7835723598435465, - "grad_norm": 1.4803260564804077, - "learning_rate": 9.321306532663317e-05, - "loss": 5.8478, - "step": 7255 - }, - { - "epoch": 3.7840938722294655, - "grad_norm": 1.5437946319580078, - "learning_rate": 9.321206030150754e-05, - "loss": 5.6587, - "step": 7256 - }, - { - "epoch": 3.7846153846153845, - "grad_norm": 1.576158881187439, - "learning_rate": 9.321105527638191e-05, - "loss": 5.7187, - "step": 7257 - }, - { - "epoch": 3.785136897001304, - "grad_norm": 1.4382407665252686, - "learning_rate": 9.321005025125629e-05, - "loss": 5.7485, - "step": 7258 - }, - { - "epoch": 3.785658409387223, - "grad_norm": 1.462908387184143, - "learning_rate": 9.320904522613066e-05, - "loss": 5.7715, - "step": 7259 - }, - { - "epoch": 3.786179921773142, - "grad_norm": 1.4529008865356445, - "learning_rate": 9.320804020100503e-05, - "loss": 5.806, - "step": 7260 - }, - { - "epoch": 3.7867014341590615, - "grad_norm": 1.6221095323562622, - "learning_rate": 9.32070351758794e-05, - "loss": 5.1466, - "step": 7261 - }, - { - "epoch": 3.7872229465449805, - "grad_norm": 1.6239399909973145, - "learning_rate": 9.320603015075378e-05, - "loss": 5.6495, - "step": 7262 - }, - { - "epoch": 3.7877444589308995, - "grad_norm": 1.593031644821167, - "learning_rate": 9.320502512562814e-05, - "loss": 5.696, - "step": 7263 - }, - { - "epoch": 3.788265971316819, - "grad_norm": 1.4813724756240845, - "learning_rate": 9.320402010050252e-05, - "loss": 5.4802, - "step": 7264 - }, - { - "epoch": 3.788787483702738, - "grad_norm": 1.6157581806182861, - "learning_rate": 9.320301507537688e-05, - "loss": 5.2086, - "step": 7265 - }, - { - "epoch": 3.789308996088657, - "grad_norm": 1.5502675771713257, - "learning_rate": 9.320201005025126e-05, - "loss": 6.0485, - "step": 7266 - }, - { - "epoch": 3.7898305084745765, - "grad_norm": 1.2895877361297607, - "learning_rate": 9.320100502512564e-05, - "loss": 6.1049, - "step": 7267 - }, - { - "epoch": 3.7903520208604955, - "grad_norm": 1.4142566919326782, - "learning_rate": 9.320000000000002e-05, - "loss": 5.861, - "step": 7268 - }, - { - "epoch": 3.7908735332464145, - "grad_norm": 1.5140761137008667, - "learning_rate": 9.319899497487438e-05, - "loss": 5.7137, - "step": 7269 - }, - { - "epoch": 3.791395045632334, - "grad_norm": 1.4101840257644653, - "learning_rate": 9.319798994974874e-05, - "loss": 5.823, - "step": 7270 - }, - { - "epoch": 3.791916558018253, - "grad_norm": 1.2947514057159424, - "learning_rate": 9.319698492462312e-05, - "loss": 5.8618, - "step": 7271 - }, - { - "epoch": 3.792438070404172, - "grad_norm": 1.4608880281448364, - "learning_rate": 9.319597989949749e-05, - "loss": 5.5652, - "step": 7272 - }, - { - "epoch": 3.7929595827900915, - "grad_norm": 1.518391728401184, - "learning_rate": 9.319497487437186e-05, - "loss": 5.4611, - "step": 7273 - }, - { - "epoch": 3.7934810951760105, - "grad_norm": 1.664723515510559, - "learning_rate": 9.319396984924623e-05, - "loss": 5.4504, - "step": 7274 - }, - { - "epoch": 3.7940026075619295, - "grad_norm": 1.416684865951538, - "learning_rate": 9.31929648241206e-05, - "loss": 5.7277, - "step": 7275 - }, - { - "epoch": 3.794524119947849, - "grad_norm": 1.4352178573608398, - "learning_rate": 9.319195979899497e-05, - "loss": 5.91, - "step": 7276 - }, - { - "epoch": 3.795045632333768, - "grad_norm": 1.4008833169937134, - "learning_rate": 9.319095477386935e-05, - "loss": 5.6066, - "step": 7277 - }, - { - "epoch": 3.795567144719687, - "grad_norm": 1.3875792026519775, - "learning_rate": 9.318994974874373e-05, - "loss": 5.7657, - "step": 7278 - }, - { - "epoch": 3.7960886571056065, - "grad_norm": 1.8859283924102783, - "learning_rate": 9.31889447236181e-05, - "loss": 5.284, - "step": 7279 - }, - { - "epoch": 3.7966101694915255, - "grad_norm": 1.4276055097579956, - "learning_rate": 9.318793969849247e-05, - "loss": 5.8303, - "step": 7280 - }, - { - "epoch": 3.7971316818774445, - "grad_norm": 1.4568891525268555, - "learning_rate": 9.318693467336685e-05, - "loss": 5.6691, - "step": 7281 - }, - { - "epoch": 3.797653194263364, - "grad_norm": 1.5402214527130127, - "learning_rate": 9.318592964824121e-05, - "loss": 5.2943, - "step": 7282 - }, - { - "epoch": 3.798174706649283, - "grad_norm": 1.5098837614059448, - "learning_rate": 9.318492462311557e-05, - "loss": 5.7724, - "step": 7283 - }, - { - "epoch": 3.798696219035202, - "grad_norm": 1.5579032897949219, - "learning_rate": 9.318391959798995e-05, - "loss": 5.4571, - "step": 7284 - }, - { - "epoch": 3.7992177314211215, - "grad_norm": 1.5444303750991821, - "learning_rate": 9.318291457286432e-05, - "loss": 5.6682, - "step": 7285 - }, - { - "epoch": 3.7997392438070405, - "grad_norm": 1.6458990573883057, - "learning_rate": 9.31819095477387e-05, - "loss": 5.4333, - "step": 7286 - }, - { - "epoch": 3.8002607561929596, - "grad_norm": 1.695273995399475, - "learning_rate": 9.318090452261307e-05, - "loss": 5.3142, - "step": 7287 - }, - { - "epoch": 3.800782268578879, - "grad_norm": 1.6122409105300903, - "learning_rate": 9.317989949748745e-05, - "loss": 6.178, - "step": 7288 - }, - { - "epoch": 3.801303780964798, - "grad_norm": 1.466016173362732, - "learning_rate": 9.317889447236181e-05, - "loss": 5.9777, - "step": 7289 - }, - { - "epoch": 3.801825293350717, - "grad_norm": 1.5747724771499634, - "learning_rate": 9.317788944723619e-05, - "loss": 5.4203, - "step": 7290 - }, - { - "epoch": 3.802346805736636, - "grad_norm": 1.5811963081359863, - "learning_rate": 9.317688442211056e-05, - "loss": 5.5837, - "step": 7291 - }, - { - "epoch": 3.8028683181225555, - "grad_norm": 1.588461995124817, - "learning_rate": 9.317587939698493e-05, - "loss": 5.3391, - "step": 7292 - }, - { - "epoch": 3.8033898305084746, - "grad_norm": 1.4210489988327026, - "learning_rate": 9.31748743718593e-05, - "loss": 5.7186, - "step": 7293 - }, - { - "epoch": 3.8039113428943936, - "grad_norm": 1.428564190864563, - "learning_rate": 9.317386934673368e-05, - "loss": 5.7963, - "step": 7294 - }, - { - "epoch": 3.804432855280313, - "grad_norm": 1.6226942539215088, - "learning_rate": 9.317286432160804e-05, - "loss": 5.549, - "step": 7295 - }, - { - "epoch": 3.804954367666232, - "grad_norm": 1.4528485536575317, - "learning_rate": 9.31718592964824e-05, - "loss": 5.7482, - "step": 7296 - }, - { - "epoch": 3.805475880052151, - "grad_norm": 1.4543788433074951, - "learning_rate": 9.317085427135678e-05, - "loss": 5.8548, - "step": 7297 - }, - { - "epoch": 3.8059973924380706, - "grad_norm": 1.6189948320388794, - "learning_rate": 9.316984924623116e-05, - "loss": 5.3922, - "step": 7298 - }, - { - "epoch": 3.8065189048239896, - "grad_norm": 1.5571082830429077, - "learning_rate": 9.316884422110554e-05, - "loss": 5.8023, - "step": 7299 - }, - { - "epoch": 3.8070404172099086, - "grad_norm": 1.3986806869506836, - "learning_rate": 9.31678391959799e-05, - "loss": 6.1645, - "step": 7300 - }, - { - "epoch": 3.807561929595828, - "grad_norm": 1.3392630815505981, - "learning_rate": 9.316683417085428e-05, - "loss": 5.9051, - "step": 7301 - }, - { - "epoch": 3.808083441981747, - "grad_norm": 1.4621467590332031, - "learning_rate": 9.316582914572864e-05, - "loss": 5.8731, - "step": 7302 - }, - { - "epoch": 3.808604954367666, - "grad_norm": 1.4365191459655762, - "learning_rate": 9.316482412060302e-05, - "loss": 5.5736, - "step": 7303 - }, - { - "epoch": 3.809126466753585, - "grad_norm": 1.5669788122177124, - "learning_rate": 9.316381909547739e-05, - "loss": 5.4309, - "step": 7304 - }, - { - "epoch": 3.8096479791395046, - "grad_norm": 1.4692438840866089, - "learning_rate": 9.316281407035176e-05, - "loss": 5.6156, - "step": 7305 - }, - { - "epoch": 3.8101694915254236, - "grad_norm": 1.5946383476257324, - "learning_rate": 9.316180904522613e-05, - "loss": 5.6235, - "step": 7306 - }, - { - "epoch": 3.8106910039113426, - "grad_norm": 1.4411476850509644, - "learning_rate": 9.316080402010051e-05, - "loss": 5.474, - "step": 7307 - }, - { - "epoch": 3.811212516297262, - "grad_norm": 1.5943909883499146, - "learning_rate": 9.315979899497488e-05, - "loss": 5.7842, - "step": 7308 - }, - { - "epoch": 3.811734028683181, - "grad_norm": 1.4566446542739868, - "learning_rate": 9.315879396984926e-05, - "loss": 5.8226, - "step": 7309 - }, - { - "epoch": 3.8122555410691, - "grad_norm": 1.4428954124450684, - "learning_rate": 9.315778894472363e-05, - "loss": 6.093, - "step": 7310 - }, - { - "epoch": 3.8127770534550196, - "grad_norm": 1.6049491167068481, - "learning_rate": 9.315678391959799e-05, - "loss": 5.8609, - "step": 7311 - }, - { - "epoch": 3.8132985658409386, - "grad_norm": 1.4038115739822388, - "learning_rate": 9.315577889447237e-05, - "loss": 5.271, - "step": 7312 - }, - { - "epoch": 3.8138200782268576, - "grad_norm": 1.8441298007965088, - "learning_rate": 9.315477386934673e-05, - "loss": 5.5178, - "step": 7313 - }, - { - "epoch": 3.814341590612777, - "grad_norm": 1.3638814687728882, - "learning_rate": 9.315376884422111e-05, - "loss": 5.865, - "step": 7314 - }, - { - "epoch": 3.814863102998696, - "grad_norm": 1.3874976634979248, - "learning_rate": 9.315276381909548e-05, - "loss": 6.179, - "step": 7315 - }, - { - "epoch": 3.815384615384615, - "grad_norm": 1.5154038667678833, - "learning_rate": 9.315175879396985e-05, - "loss": 5.6758, - "step": 7316 - }, - { - "epoch": 3.8159061277705346, - "grad_norm": 1.4961614608764648, - "learning_rate": 9.315075376884422e-05, - "loss": 5.4213, - "step": 7317 - }, - { - "epoch": 3.8164276401564536, - "grad_norm": 1.4776335954666138, - "learning_rate": 9.31497487437186e-05, - "loss": 5.8478, - "step": 7318 - }, - { - "epoch": 3.8169491525423727, - "grad_norm": 1.537476658821106, - "learning_rate": 9.314874371859297e-05, - "loss": 5.9095, - "step": 7319 - }, - { - "epoch": 3.817470664928292, - "grad_norm": 1.3879420757293701, - "learning_rate": 9.314773869346735e-05, - "loss": 5.9753, - "step": 7320 - }, - { - "epoch": 3.817992177314211, - "grad_norm": 1.4604583978652954, - "learning_rate": 9.314673366834172e-05, - "loss": 5.3277, - "step": 7321 - }, - { - "epoch": 3.81851368970013, - "grad_norm": 1.5530931949615479, - "learning_rate": 9.314572864321609e-05, - "loss": 5.344, - "step": 7322 - }, - { - "epoch": 3.8190352020860496, - "grad_norm": 1.7422301769256592, - "learning_rate": 9.314472361809046e-05, - "loss": 5.529, - "step": 7323 - }, - { - "epoch": 3.8195567144719687, - "grad_norm": 1.6281100511550903, - "learning_rate": 9.314371859296482e-05, - "loss": 4.8211, - "step": 7324 - }, - { - "epoch": 3.8200782268578877, - "grad_norm": 1.9276742935180664, - "learning_rate": 9.31427135678392e-05, - "loss": 5.0726, - "step": 7325 - }, - { - "epoch": 3.820599739243807, - "grad_norm": 1.4983651638031006, - "learning_rate": 9.314170854271356e-05, - "loss": 5.4877, - "step": 7326 - }, - { - "epoch": 3.821121251629726, - "grad_norm": 1.5378259420394897, - "learning_rate": 9.314070351758794e-05, - "loss": 5.5339, - "step": 7327 - }, - { - "epoch": 3.821642764015645, - "grad_norm": 1.4841055870056152, - "learning_rate": 9.313969849246232e-05, - "loss": 5.7306, - "step": 7328 - }, - { - "epoch": 3.8221642764015646, - "grad_norm": 1.5960055589675903, - "learning_rate": 9.31386934673367e-05, - "loss": 5.159, - "step": 7329 - }, - { - "epoch": 3.8226857887874837, - "grad_norm": 1.4041827917099, - "learning_rate": 9.313768844221106e-05, - "loss": 5.6839, - "step": 7330 - }, - { - "epoch": 3.8232073011734027, - "grad_norm": 1.4106749296188354, - "learning_rate": 9.313668341708544e-05, - "loss": 5.8108, - "step": 7331 - }, - { - "epoch": 3.823728813559322, - "grad_norm": 1.524141788482666, - "learning_rate": 9.31356783919598e-05, - "loss": 5.0341, - "step": 7332 - }, - { - "epoch": 3.824250325945241, - "grad_norm": 1.4767173528671265, - "learning_rate": 9.313467336683418e-05, - "loss": 5.8565, - "step": 7333 - }, - { - "epoch": 3.82477183833116, - "grad_norm": 1.259976863861084, - "learning_rate": 9.313366834170855e-05, - "loss": 5.2375, - "step": 7334 - }, - { - "epoch": 3.8252933507170797, - "grad_norm": 1.5163867473602295, - "learning_rate": 9.313266331658292e-05, - "loss": 5.7012, - "step": 7335 - }, - { - "epoch": 3.8258148631029987, - "grad_norm": 1.5600740909576416, - "learning_rate": 9.313165829145729e-05, - "loss": 5.5869, - "step": 7336 - }, - { - "epoch": 3.8263363754889177, - "grad_norm": 1.5768271684646606, - "learning_rate": 9.313065326633165e-05, - "loss": 5.86, - "step": 7337 - }, - { - "epoch": 3.826857887874837, - "grad_norm": 1.4443084001541138, - "learning_rate": 9.312964824120603e-05, - "loss": 5.8948, - "step": 7338 - }, - { - "epoch": 3.827379400260756, - "grad_norm": 1.5020993947982788, - "learning_rate": 9.312864321608041e-05, - "loss": 5.8936, - "step": 7339 - }, - { - "epoch": 3.827900912646675, - "grad_norm": 1.5863524675369263, - "learning_rate": 9.312763819095479e-05, - "loss": 5.917, - "step": 7340 - }, - { - "epoch": 3.8284224250325947, - "grad_norm": 1.5403082370758057, - "learning_rate": 9.312663316582915e-05, - "loss": 5.2954, - "step": 7341 - }, - { - "epoch": 3.8289439374185137, - "grad_norm": 1.7329519987106323, - "learning_rate": 9.312562814070353e-05, - "loss": 5.965, - "step": 7342 - }, - { - "epoch": 3.8294654498044327, - "grad_norm": 1.5207000970840454, - "learning_rate": 9.312462311557789e-05, - "loss": 5.9517, - "step": 7343 - }, - { - "epoch": 3.829986962190352, - "grad_norm": 1.4660696983337402, - "learning_rate": 9.312361809045227e-05, - "loss": 5.368, - "step": 7344 - }, - { - "epoch": 3.830508474576271, - "grad_norm": 1.6758716106414795, - "learning_rate": 9.312261306532663e-05, - "loss": 5.7404, - "step": 7345 - }, - { - "epoch": 3.83102998696219, - "grad_norm": 1.622606873512268, - "learning_rate": 9.312160804020101e-05, - "loss": 6.0296, - "step": 7346 - }, - { - "epoch": 3.8315514993481097, - "grad_norm": 1.5950168371200562, - "learning_rate": 9.312060301507538e-05, - "loss": 5.636, - "step": 7347 - }, - { - "epoch": 3.8320730117340287, - "grad_norm": 1.4020154476165771, - "learning_rate": 9.311959798994975e-05, - "loss": 5.795, - "step": 7348 - }, - { - "epoch": 3.8325945241199477, - "grad_norm": 1.5346837043762207, - "learning_rate": 9.311859296482413e-05, - "loss": 5.9293, - "step": 7349 - }, - { - "epoch": 3.833116036505867, - "grad_norm": 1.5926657915115356, - "learning_rate": 9.31175879396985e-05, - "loss": 5.702, - "step": 7350 - }, - { - "epoch": 3.833637548891786, - "grad_norm": 1.3878870010375977, - "learning_rate": 9.311658291457287e-05, - "loss": 6.0429, - "step": 7351 - }, - { - "epoch": 3.8341590612777052, - "grad_norm": 1.4329559803009033, - "learning_rate": 9.311557788944724e-05, - "loss": 6.0858, - "step": 7352 - }, - { - "epoch": 3.8346805736636247, - "grad_norm": 1.5703593492507935, - "learning_rate": 9.311457286432162e-05, - "loss": 5.2387, - "step": 7353 - }, - { - "epoch": 3.8352020860495437, - "grad_norm": 1.588128924369812, - "learning_rate": 9.311356783919598e-05, - "loss": 5.7643, - "step": 7354 - }, - { - "epoch": 3.8357235984354627, - "grad_norm": 1.440496563911438, - "learning_rate": 9.311256281407036e-05, - "loss": 5.7707, - "step": 7355 - }, - { - "epoch": 3.836245110821382, - "grad_norm": 1.3987655639648438, - "learning_rate": 9.311155778894472e-05, - "loss": 5.7321, - "step": 7356 - }, - { - "epoch": 3.836766623207301, - "grad_norm": 1.4961631298065186, - "learning_rate": 9.31105527638191e-05, - "loss": 5.9595, - "step": 7357 - }, - { - "epoch": 3.8372881355932202, - "grad_norm": 1.5718039274215698, - "learning_rate": 9.310954773869346e-05, - "loss": 5.7678, - "step": 7358 - }, - { - "epoch": 3.8378096479791397, - "grad_norm": 1.4814274311065674, - "learning_rate": 9.310854271356784e-05, - "loss": 5.7824, - "step": 7359 - }, - { - "epoch": 3.8383311603650587, - "grad_norm": 1.5473933219909668, - "learning_rate": 9.310753768844222e-05, - "loss": 5.5492, - "step": 7360 - }, - { - "epoch": 3.8388526727509777, - "grad_norm": 1.5744168758392334, - "learning_rate": 9.31065326633166e-05, - "loss": 5.9439, - "step": 7361 - }, - { - "epoch": 3.839374185136897, - "grad_norm": 1.817595362663269, - "learning_rate": 9.310552763819096e-05, - "loss": 5.8264, - "step": 7362 - }, - { - "epoch": 3.8398956975228162, - "grad_norm": 1.365769863128662, - "learning_rate": 9.310452261306533e-05, - "loss": 5.8306, - "step": 7363 - }, - { - "epoch": 3.8404172099087353, - "grad_norm": 1.4149446487426758, - "learning_rate": 9.31035175879397e-05, - "loss": 5.696, - "step": 7364 - }, - { - "epoch": 3.8409387222946547, - "grad_norm": 1.5193815231323242, - "learning_rate": 9.310251256281407e-05, - "loss": 5.7487, - "step": 7365 - }, - { - "epoch": 3.8414602346805737, - "grad_norm": 1.3934236764907837, - "learning_rate": 9.310150753768845e-05, - "loss": 5.9013, - "step": 7366 - }, - { - "epoch": 3.8419817470664928, - "grad_norm": 1.521831750869751, - "learning_rate": 9.310050251256281e-05, - "loss": 5.6412, - "step": 7367 - }, - { - "epoch": 3.8425032594524122, - "grad_norm": 1.4701393842697144, - "learning_rate": 9.309949748743719e-05, - "loss": 6.2105, - "step": 7368 - }, - { - "epoch": 3.8430247718383312, - "grad_norm": 1.4762911796569824, - "learning_rate": 9.309849246231155e-05, - "loss": 5.265, - "step": 7369 - }, - { - "epoch": 3.8435462842242503, - "grad_norm": 1.4623039960861206, - "learning_rate": 9.309748743718593e-05, - "loss": 5.7053, - "step": 7370 - }, - { - "epoch": 3.8440677966101697, - "grad_norm": 1.442940354347229, - "learning_rate": 9.309648241206031e-05, - "loss": 5.6107, - "step": 7371 - }, - { - "epoch": 3.8445893089960888, - "grad_norm": 1.4975831508636475, - "learning_rate": 9.309547738693469e-05, - "loss": 5.7626, - "step": 7372 - }, - { - "epoch": 3.8451108213820078, - "grad_norm": 1.4760650396347046, - "learning_rate": 9.309447236180905e-05, - "loss": 5.8203, - "step": 7373 - }, - { - "epoch": 3.8456323337679272, - "grad_norm": 1.346336841583252, - "learning_rate": 9.309346733668343e-05, - "loss": 5.7178, - "step": 7374 - }, - { - "epoch": 3.8461538461538463, - "grad_norm": 1.4331563711166382, - "learning_rate": 9.309246231155779e-05, - "loss": 5.5786, - "step": 7375 - }, - { - "epoch": 3.8466753585397653, - "grad_norm": 1.5538933277130127, - "learning_rate": 9.309145728643216e-05, - "loss": 5.1849, - "step": 7376 - }, - { - "epoch": 3.8471968709256847, - "grad_norm": 1.509332299232483, - "learning_rate": 9.309045226130653e-05, - "loss": 5.6361, - "step": 7377 - }, - { - "epoch": 3.8477183833116038, - "grad_norm": 1.5013967752456665, - "learning_rate": 9.30894472361809e-05, - "loss": 5.8558, - "step": 7378 - }, - { - "epoch": 3.848239895697523, - "grad_norm": 1.5540711879730225, - "learning_rate": 9.308844221105528e-05, - "loss": 5.8808, - "step": 7379 - }, - { - "epoch": 3.8487614080834422, - "grad_norm": 1.3763415813446045, - "learning_rate": 9.308743718592965e-05, - "loss": 5.7128, - "step": 7380 - }, - { - "epoch": 3.8492829204693613, - "grad_norm": 1.5063140392303467, - "learning_rate": 9.308643216080403e-05, - "loss": 5.5197, - "step": 7381 - }, - { - "epoch": 3.8498044328552803, - "grad_norm": 1.6199727058410645, - "learning_rate": 9.30854271356784e-05, - "loss": 5.6432, - "step": 7382 - }, - { - "epoch": 3.8503259452411998, - "grad_norm": 1.4238388538360596, - "learning_rate": 9.308442211055277e-05, - "loss": 5.7256, - "step": 7383 - }, - { - "epoch": 3.8508474576271188, - "grad_norm": 1.488381266593933, - "learning_rate": 9.308341708542714e-05, - "loss": 5.7625, - "step": 7384 - }, - { - "epoch": 3.851368970013038, - "grad_norm": 1.540092945098877, - "learning_rate": 9.308241206030152e-05, - "loss": 5.6911, - "step": 7385 - }, - { - "epoch": 3.851890482398957, - "grad_norm": 1.4613125324249268, - "learning_rate": 9.308140703517588e-05, - "loss": 5.5411, - "step": 7386 - }, - { - "epoch": 3.8524119947848763, - "grad_norm": 1.5402295589447021, - "learning_rate": 9.308040201005026e-05, - "loss": 5.9203, - "step": 7387 - }, - { - "epoch": 3.8529335071707953, - "grad_norm": 1.3402388095855713, - "learning_rate": 9.307939698492462e-05, - "loss": 6.102, - "step": 7388 - }, - { - "epoch": 3.8534550195567143, - "grad_norm": 1.401375412940979, - "learning_rate": 9.307839195979899e-05, - "loss": 5.8722, - "step": 7389 - }, - { - "epoch": 3.853976531942634, - "grad_norm": 1.422985315322876, - "learning_rate": 9.307738693467337e-05, - "loss": 6.1178, - "step": 7390 - }, - { - "epoch": 3.854498044328553, - "grad_norm": 1.3574943542480469, - "learning_rate": 9.307638190954774e-05, - "loss": 5.8447, - "step": 7391 - }, - { - "epoch": 3.855019556714472, - "grad_norm": 1.3394254446029663, - "learning_rate": 9.307537688442212e-05, - "loss": 6.0167, - "step": 7392 - }, - { - "epoch": 3.8555410691003913, - "grad_norm": 1.533672571182251, - "learning_rate": 9.307437185929649e-05, - "loss": 5.7297, - "step": 7393 - }, - { - "epoch": 3.8560625814863103, - "grad_norm": 1.4482028484344482, - "learning_rate": 9.307336683417086e-05, - "loss": 5.8843, - "step": 7394 - }, - { - "epoch": 3.8565840938722293, - "grad_norm": 1.3827320337295532, - "learning_rate": 9.307236180904523e-05, - "loss": 6.0301, - "step": 7395 - }, - { - "epoch": 3.8571056062581484, - "grad_norm": 1.476450800895691, - "learning_rate": 9.30713567839196e-05, - "loss": 5.9115, - "step": 7396 - }, - { - "epoch": 3.857627118644068, - "grad_norm": 1.450947642326355, - "learning_rate": 9.307035175879397e-05, - "loss": 5.7652, - "step": 7397 - }, - { - "epoch": 3.858148631029987, - "grad_norm": 1.5619162321090698, - "learning_rate": 9.306934673366835e-05, - "loss": 5.8607, - "step": 7398 - }, - { - "epoch": 3.858670143415906, - "grad_norm": 1.583642840385437, - "learning_rate": 9.306834170854271e-05, - "loss": 5.5721, - "step": 7399 - }, - { - "epoch": 3.8591916558018253, - "grad_norm": 1.614371657371521, - "learning_rate": 9.306733668341709e-05, - "loss": 5.2191, - "step": 7400 - }, - { - "epoch": 3.8597131681877443, - "grad_norm": 1.3573259115219116, - "learning_rate": 9.306633165829147e-05, - "loss": 5.1005, - "step": 7401 - }, - { - "epoch": 3.8602346805736634, - "grad_norm": 1.3951905965805054, - "learning_rate": 9.306532663316585e-05, - "loss": 5.7928, - "step": 7402 - }, - { - "epoch": 3.860756192959583, - "grad_norm": 1.4096719026565552, - "learning_rate": 9.306432160804021e-05, - "loss": 5.4934, - "step": 7403 - }, - { - "epoch": 3.861277705345502, - "grad_norm": 1.4047895669937134, - "learning_rate": 9.306331658291457e-05, - "loss": 5.6583, - "step": 7404 - }, - { - "epoch": 3.861799217731421, - "grad_norm": 1.5388154983520508, - "learning_rate": 9.306231155778895e-05, - "loss": 5.2656, - "step": 7405 - }, - { - "epoch": 3.8623207301173403, - "grad_norm": 1.5309275388717651, - "learning_rate": 9.306130653266332e-05, - "loss": 5.6995, - "step": 7406 - }, - { - "epoch": 3.8628422425032594, - "grad_norm": 1.4648752212524414, - "learning_rate": 9.30603015075377e-05, - "loss": 5.8571, - "step": 7407 - }, - { - "epoch": 3.8633637548891784, - "grad_norm": 1.5527669191360474, - "learning_rate": 9.305929648241206e-05, - "loss": 5.7188, - "step": 7408 - }, - { - "epoch": 3.863885267275098, - "grad_norm": 1.4530038833618164, - "learning_rate": 9.305829145728644e-05, - "loss": 6.1491, - "step": 7409 - }, - { - "epoch": 3.864406779661017, - "grad_norm": 1.472064733505249, - "learning_rate": 9.30572864321608e-05, - "loss": 5.4047, - "step": 7410 - }, - { - "epoch": 3.864928292046936, - "grad_norm": 1.478812575340271, - "learning_rate": 9.305628140703518e-05, - "loss": 5.6873, - "step": 7411 - }, - { - "epoch": 3.8654498044328554, - "grad_norm": 1.4151957035064697, - "learning_rate": 9.305527638190956e-05, - "loss": 5.813, - "step": 7412 - }, - { - "epoch": 3.8659713168187744, - "grad_norm": 1.4414981603622437, - "learning_rate": 9.305427135678393e-05, - "loss": 5.7353, - "step": 7413 - }, - { - "epoch": 3.8664928292046934, - "grad_norm": 1.5408568382263184, - "learning_rate": 9.30532663316583e-05, - "loss": 6.1171, - "step": 7414 - }, - { - "epoch": 3.867014341590613, - "grad_norm": 1.4267795085906982, - "learning_rate": 9.305226130653268e-05, - "loss": 5.9389, - "step": 7415 - }, - { - "epoch": 3.867535853976532, - "grad_norm": 1.6250139474868774, - "learning_rate": 9.305125628140704e-05, - "loss": 5.6821, - "step": 7416 - }, - { - "epoch": 3.868057366362451, - "grad_norm": 1.6536126136779785, - "learning_rate": 9.30502512562814e-05, - "loss": 5.3526, - "step": 7417 - }, - { - "epoch": 3.8685788787483704, - "grad_norm": 1.5202834606170654, - "learning_rate": 9.304924623115578e-05, - "loss": 6.027, - "step": 7418 - }, - { - "epoch": 3.8691003911342894, - "grad_norm": 1.5742031335830688, - "learning_rate": 9.304824120603015e-05, - "loss": 5.4087, - "step": 7419 - }, - { - "epoch": 3.8696219035202084, - "grad_norm": 1.4938205480575562, - "learning_rate": 9.304723618090452e-05, - "loss": 5.4023, - "step": 7420 - }, - { - "epoch": 3.870143415906128, - "grad_norm": 1.5057107210159302, - "learning_rate": 9.30462311557789e-05, - "loss": 5.6376, - "step": 7421 - }, - { - "epoch": 3.870664928292047, - "grad_norm": 1.477750301361084, - "learning_rate": 9.304522613065328e-05, - "loss": 5.9479, - "step": 7422 - }, - { - "epoch": 3.871186440677966, - "grad_norm": 1.5440618991851807, - "learning_rate": 9.304422110552764e-05, - "loss": 5.5301, - "step": 7423 - }, - { - "epoch": 3.8717079530638854, - "grad_norm": 1.5467777252197266, - "learning_rate": 9.304321608040202e-05, - "loss": 5.9813, - "step": 7424 - }, - { - "epoch": 3.8722294654498044, - "grad_norm": 1.5010435581207275, - "learning_rate": 9.304221105527639e-05, - "loss": 5.5503, - "step": 7425 - }, - { - "epoch": 3.8727509778357234, - "grad_norm": 1.6053048372268677, - "learning_rate": 9.304120603015076e-05, - "loss": 5.39, - "step": 7426 - }, - { - "epoch": 3.873272490221643, - "grad_norm": 1.4336978197097778, - "learning_rate": 9.304020100502513e-05, - "loss": 5.6162, - "step": 7427 - }, - { - "epoch": 3.873794002607562, - "grad_norm": 1.4311177730560303, - "learning_rate": 9.30391959798995e-05, - "loss": 5.8641, - "step": 7428 - }, - { - "epoch": 3.874315514993481, - "grad_norm": 1.3721880912780762, - "learning_rate": 9.303819095477387e-05, - "loss": 5.9185, - "step": 7429 - }, - { - "epoch": 3.8748370273794004, - "grad_norm": 1.6035172939300537, - "learning_rate": 9.303718592964823e-05, - "loss": 5.5294, - "step": 7430 - }, - { - "epoch": 3.8753585397653194, - "grad_norm": 1.346137285232544, - "learning_rate": 9.303618090452261e-05, - "loss": 6.0711, - "step": 7431 - }, - { - "epoch": 3.8758800521512384, - "grad_norm": 1.4295613765716553, - "learning_rate": 9.303517587939699e-05, - "loss": 5.7064, - "step": 7432 - }, - { - "epoch": 3.876401564537158, - "grad_norm": 1.6339564323425293, - "learning_rate": 9.303417085427137e-05, - "loss": 5.0129, - "step": 7433 - }, - { - "epoch": 3.876923076923077, - "grad_norm": 1.5336120128631592, - "learning_rate": 9.303316582914573e-05, - "loss": 5.7114, - "step": 7434 - }, - { - "epoch": 3.877444589308996, - "grad_norm": 1.6509746313095093, - "learning_rate": 9.303216080402011e-05, - "loss": 5.0414, - "step": 7435 - }, - { - "epoch": 3.8779661016949154, - "grad_norm": 1.4861316680908203, - "learning_rate": 9.303115577889447e-05, - "loss": 5.6393, - "step": 7436 - }, - { - "epoch": 3.8784876140808344, - "grad_norm": 1.4018250703811646, - "learning_rate": 9.303015075376885e-05, - "loss": 6.0318, - "step": 7437 - }, - { - "epoch": 3.8790091264667534, - "grad_norm": 1.2611461877822876, - "learning_rate": 9.302914572864322e-05, - "loss": 6.0721, - "step": 7438 - }, - { - "epoch": 3.879530638852673, - "grad_norm": 1.6758310794830322, - "learning_rate": 9.30281407035176e-05, - "loss": 5.3176, - "step": 7439 - }, - { - "epoch": 3.880052151238592, - "grad_norm": 1.5434646606445312, - "learning_rate": 9.302713567839196e-05, - "loss": 5.3169, - "step": 7440 - }, - { - "epoch": 3.880573663624511, - "grad_norm": 1.5318260192871094, - "learning_rate": 9.302613065326634e-05, - "loss": 6.0378, - "step": 7441 - }, - { - "epoch": 3.8810951760104304, - "grad_norm": 1.4274393320083618, - "learning_rate": 9.302512562814071e-05, - "loss": 5.8074, - "step": 7442 - }, - { - "epoch": 3.8816166883963494, - "grad_norm": 1.5367181301116943, - "learning_rate": 9.302412060301508e-05, - "loss": 5.6289, - "step": 7443 - }, - { - "epoch": 3.8821382007822685, - "grad_norm": 1.608225703239441, - "learning_rate": 9.302311557788946e-05, - "loss": 5.5976, - "step": 7444 - }, - { - "epoch": 3.882659713168188, - "grad_norm": 1.3789702653884888, - "learning_rate": 9.302211055276382e-05, - "loss": 5.7309, - "step": 7445 - }, - { - "epoch": 3.883181225554107, - "grad_norm": 1.4663275480270386, - "learning_rate": 9.30211055276382e-05, - "loss": 5.7647, - "step": 7446 - }, - { - "epoch": 3.883702737940026, - "grad_norm": 1.5276672840118408, - "learning_rate": 9.302010050251256e-05, - "loss": 5.6749, - "step": 7447 - }, - { - "epoch": 3.8842242503259454, - "grad_norm": 1.5292439460754395, - "learning_rate": 9.301909547738694e-05, - "loss": 5.7349, - "step": 7448 - }, - { - "epoch": 3.8847457627118644, - "grad_norm": 1.4629054069519043, - "learning_rate": 9.30180904522613e-05, - "loss": 6.0572, - "step": 7449 - }, - { - "epoch": 3.8852672750977835, - "grad_norm": 1.4691002368927002, - "learning_rate": 9.301708542713568e-05, - "loss": 5.836, - "step": 7450 - }, - { - "epoch": 3.885788787483703, - "grad_norm": 1.5686439275741577, - "learning_rate": 9.301608040201005e-05, - "loss": 5.5289, - "step": 7451 - }, - { - "epoch": 3.886310299869622, - "grad_norm": 1.5806677341461182, - "learning_rate": 9.301507537688442e-05, - "loss": 5.5114, - "step": 7452 - }, - { - "epoch": 3.886831812255541, - "grad_norm": 1.3830044269561768, - "learning_rate": 9.30140703517588e-05, - "loss": 6.1422, - "step": 7453 - }, - { - "epoch": 3.8873533246414604, - "grad_norm": 1.4468969106674194, - "learning_rate": 9.301306532663318e-05, - "loss": 5.8428, - "step": 7454 - }, - { - "epoch": 3.8878748370273795, - "grad_norm": 1.5743839740753174, - "learning_rate": 9.301206030150754e-05, - "loss": 5.1823, - "step": 7455 - }, - { - "epoch": 3.8883963494132985, - "grad_norm": 1.662235975265503, - "learning_rate": 9.301105527638191e-05, - "loss": 5.1281, - "step": 7456 - }, - { - "epoch": 3.888917861799218, - "grad_norm": 1.4992676973342896, - "learning_rate": 9.301005025125629e-05, - "loss": 5.6663, - "step": 7457 - }, - { - "epoch": 3.889439374185137, - "grad_norm": 1.5631073713302612, - "learning_rate": 9.300904522613065e-05, - "loss": 5.6072, - "step": 7458 - }, - { - "epoch": 3.889960886571056, - "grad_norm": 1.5182746648788452, - "learning_rate": 9.300804020100503e-05, - "loss": 5.3214, - "step": 7459 - }, - { - "epoch": 3.8904823989569755, - "grad_norm": 1.403206706047058, - "learning_rate": 9.300703517587939e-05, - "loss": 5.8482, - "step": 7460 - }, - { - "epoch": 3.8910039113428945, - "grad_norm": 1.4329872131347656, - "learning_rate": 9.300603015075377e-05, - "loss": 5.6059, - "step": 7461 - }, - { - "epoch": 3.8915254237288135, - "grad_norm": 1.5377227067947388, - "learning_rate": 9.300502512562815e-05, - "loss": 5.9023, - "step": 7462 - }, - { - "epoch": 3.892046936114733, - "grad_norm": 1.6596980094909668, - "learning_rate": 9.300402010050253e-05, - "loss": 5.1913, - "step": 7463 - }, - { - "epoch": 3.892568448500652, - "grad_norm": 1.5277000665664673, - "learning_rate": 9.300301507537689e-05, - "loss": 5.6356, - "step": 7464 - }, - { - "epoch": 3.893089960886571, - "grad_norm": 1.583370327949524, - "learning_rate": 9.300201005025127e-05, - "loss": 5.6137, - "step": 7465 - }, - { - "epoch": 3.8936114732724905, - "grad_norm": 1.6872515678405762, - "learning_rate": 9.300100502512563e-05, - "loss": 5.2131, - "step": 7466 - }, - { - "epoch": 3.8941329856584095, - "grad_norm": 1.495287299156189, - "learning_rate": 9.300000000000001e-05, - "loss": 5.5891, - "step": 7467 - }, - { - "epoch": 3.8946544980443285, - "grad_norm": 1.4534456729888916, - "learning_rate": 9.299899497487438e-05, - "loss": 5.5515, - "step": 7468 - }, - { - "epoch": 3.895176010430248, - "grad_norm": 1.6854496002197266, - "learning_rate": 9.299798994974874e-05, - "loss": 5.3055, - "step": 7469 - }, - { - "epoch": 3.895697522816167, - "grad_norm": 1.9250746965408325, - "learning_rate": 9.299698492462312e-05, - "loss": 4.9859, - "step": 7470 - }, - { - "epoch": 3.896219035202086, - "grad_norm": 1.4997434616088867, - "learning_rate": 9.299597989949748e-05, - "loss": 5.4935, - "step": 7471 - }, - { - "epoch": 3.8967405475880055, - "grad_norm": 1.476922869682312, - "learning_rate": 9.299497487437186e-05, - "loss": 5.6764, - "step": 7472 - }, - { - "epoch": 3.8972620599739245, - "grad_norm": 1.4271098375320435, - "learning_rate": 9.299396984924624e-05, - "loss": 5.7544, - "step": 7473 - }, - { - "epoch": 3.8977835723598435, - "grad_norm": 1.4630951881408691, - "learning_rate": 9.299296482412061e-05, - "loss": 6.0254, - "step": 7474 - }, - { - "epoch": 3.898305084745763, - "grad_norm": 1.4139411449432373, - "learning_rate": 9.299195979899498e-05, - "loss": 6.1193, - "step": 7475 - }, - { - "epoch": 3.898826597131682, - "grad_norm": 1.588281273841858, - "learning_rate": 9.299095477386936e-05, - "loss": 5.5082, - "step": 7476 - }, - { - "epoch": 3.899348109517601, - "grad_norm": 2.37221097946167, - "learning_rate": 9.298994974874372e-05, - "loss": 5.4785, - "step": 7477 - }, - { - "epoch": 3.89986962190352, - "grad_norm": 1.4668819904327393, - "learning_rate": 9.29889447236181e-05, - "loss": 6.075, - "step": 7478 - }, - { - "epoch": 3.9003911342894395, - "grad_norm": 1.547419548034668, - "learning_rate": 9.298793969849246e-05, - "loss": 5.7685, - "step": 7479 - }, - { - "epoch": 3.9009126466753585, - "grad_norm": 1.425195336341858, - "learning_rate": 9.298693467336684e-05, - "loss": 5.6616, - "step": 7480 - }, - { - "epoch": 3.9014341590612776, - "grad_norm": 1.563821792602539, - "learning_rate": 9.29859296482412e-05, - "loss": 5.8288, - "step": 7481 - }, - { - "epoch": 3.901955671447197, - "grad_norm": 1.326955795288086, - "learning_rate": 9.298492462311558e-05, - "loss": 6.1041, - "step": 7482 - }, - { - "epoch": 3.902477183833116, - "grad_norm": 1.5521398782730103, - "learning_rate": 9.298391959798996e-05, - "loss": 5.8085, - "step": 7483 - }, - { - "epoch": 3.902998696219035, - "grad_norm": 1.4717357158660889, - "learning_rate": 9.298291457286433e-05, - "loss": 5.7656, - "step": 7484 - }, - { - "epoch": 3.9035202086049545, - "grad_norm": 1.5879474878311157, - "learning_rate": 9.29819095477387e-05, - "loss": 5.393, - "step": 7485 - }, - { - "epoch": 3.9040417209908735, - "grad_norm": 1.4940433502197266, - "learning_rate": 9.298090452261307e-05, - "loss": 5.7401, - "step": 7486 - }, - { - "epoch": 3.9045632333767926, - "grad_norm": 1.3808059692382812, - "learning_rate": 9.297989949748745e-05, - "loss": 6.0001, - "step": 7487 - }, - { - "epoch": 3.905084745762712, - "grad_norm": 1.3587883710861206, - "learning_rate": 9.297889447236181e-05, - "loss": 5.74, - "step": 7488 - }, - { - "epoch": 3.905606258148631, - "grad_norm": 1.4871399402618408, - "learning_rate": 9.297788944723619e-05, - "loss": 5.7021, - "step": 7489 - }, - { - "epoch": 3.90612777053455, - "grad_norm": 1.3692749738693237, - "learning_rate": 9.297688442211055e-05, - "loss": 5.6565, - "step": 7490 - }, - { - "epoch": 3.906649282920469, - "grad_norm": 1.6146240234375, - "learning_rate": 9.297587939698493e-05, - "loss": 5.4211, - "step": 7491 - }, - { - "epoch": 3.9071707953063886, - "grad_norm": 1.6459628343582153, - "learning_rate": 9.29748743718593e-05, - "loss": 6.0388, - "step": 7492 - }, - { - "epoch": 3.9076923076923076, - "grad_norm": 1.500594973564148, - "learning_rate": 9.297386934673367e-05, - "loss": 5.5474, - "step": 7493 - }, - { - "epoch": 3.9082138200782266, - "grad_norm": 1.5119051933288574, - "learning_rate": 9.297286432160805e-05, - "loss": 5.8912, - "step": 7494 - }, - { - "epoch": 3.908735332464146, - "grad_norm": 1.5113798379898071, - "learning_rate": 9.297185929648243e-05, - "loss": 5.7602, - "step": 7495 - }, - { - "epoch": 3.909256844850065, - "grad_norm": 1.6350038051605225, - "learning_rate": 9.297085427135679e-05, - "loss": 5.5792, - "step": 7496 - }, - { - "epoch": 3.909778357235984, - "grad_norm": 1.4959030151367188, - "learning_rate": 9.296984924623116e-05, - "loss": 5.6566, - "step": 7497 - }, - { - "epoch": 3.9102998696219036, - "grad_norm": 1.2636975049972534, - "learning_rate": 9.296884422110553e-05, - "loss": 6.0959, - "step": 7498 - }, - { - "epoch": 3.9108213820078226, - "grad_norm": 1.5478317737579346, - "learning_rate": 9.29678391959799e-05, - "loss": 5.6066, - "step": 7499 - }, - { - "epoch": 3.9113428943937416, - "grad_norm": 1.7196043729782104, - "learning_rate": 9.296683417085428e-05, - "loss": 5.6258, - "step": 7500 - }, - { - "epoch": 3.911864406779661, - "grad_norm": 1.3744937181472778, - "learning_rate": 9.296582914572864e-05, - "loss": 6.0598, - "step": 7501 - }, - { - "epoch": 3.91238591916558, - "grad_norm": 1.4504047632217407, - "learning_rate": 9.296482412060302e-05, - "loss": 5.9541, - "step": 7502 - }, - { - "epoch": 3.912907431551499, - "grad_norm": 1.5273666381835938, - "learning_rate": 9.29638190954774e-05, - "loss": 5.355, - "step": 7503 - }, - { - "epoch": 3.9134289439374186, - "grad_norm": 1.4829984903335571, - "learning_rate": 9.296281407035177e-05, - "loss": 5.9562, - "step": 7504 - }, - { - "epoch": 3.9139504563233376, - "grad_norm": 1.4446399211883545, - "learning_rate": 9.296180904522614e-05, - "loss": 5.2616, - "step": 7505 - }, - { - "epoch": 3.9144719687092566, - "grad_norm": 1.4765549898147583, - "learning_rate": 9.296080402010052e-05, - "loss": 6.0661, - "step": 7506 - }, - { - "epoch": 3.914993481095176, - "grad_norm": 1.4246501922607422, - "learning_rate": 9.295979899497488e-05, - "loss": 5.8992, - "step": 7507 - }, - { - "epoch": 3.915514993481095, - "grad_norm": 1.4726570844650269, - "learning_rate": 9.295879396984926e-05, - "loss": 5.7851, - "step": 7508 - }, - { - "epoch": 3.916036505867014, - "grad_norm": 1.4989628791809082, - "learning_rate": 9.295778894472362e-05, - "loss": 5.4837, - "step": 7509 - }, - { - "epoch": 3.9165580182529336, - "grad_norm": 1.3995634317398071, - "learning_rate": 9.295678391959799e-05, - "loss": 5.8205, - "step": 7510 - }, - { - "epoch": 3.9170795306388526, - "grad_norm": 1.5223127603530884, - "learning_rate": 9.295577889447236e-05, - "loss": 5.7295, - "step": 7511 - }, - { - "epoch": 3.9176010430247716, - "grad_norm": 1.5087918043136597, - "learning_rate": 9.295477386934673e-05, - "loss": 5.3821, - "step": 7512 - }, - { - "epoch": 3.918122555410691, - "grad_norm": 1.348781704902649, - "learning_rate": 9.29537688442211e-05, - "loss": 5.9685, - "step": 7513 - }, - { - "epoch": 3.91864406779661, - "grad_norm": 1.4479998350143433, - "learning_rate": 9.295276381909548e-05, - "loss": 5.7061, - "step": 7514 - }, - { - "epoch": 3.919165580182529, - "grad_norm": 1.9498530626296997, - "learning_rate": 9.295175879396986e-05, - "loss": 5.7486, - "step": 7515 - }, - { - "epoch": 3.9196870925684486, - "grad_norm": 1.5440491437911987, - "learning_rate": 9.295075376884423e-05, - "loss": 5.3503, - "step": 7516 - }, - { - "epoch": 3.9202086049543676, - "grad_norm": 1.404083013534546, - "learning_rate": 9.29497487437186e-05, - "loss": 5.7557, - "step": 7517 - }, - { - "epoch": 3.9207301173402866, - "grad_norm": 1.5156038999557495, - "learning_rate": 9.294874371859297e-05, - "loss": 5.9383, - "step": 7518 - }, - { - "epoch": 3.921251629726206, - "grad_norm": 1.5546176433563232, - "learning_rate": 9.294773869346735e-05, - "loss": 5.7311, - "step": 7519 - }, - { - "epoch": 3.921773142112125, - "grad_norm": 1.4764430522918701, - "learning_rate": 9.294673366834171e-05, - "loss": 5.631, - "step": 7520 - }, - { - "epoch": 3.922294654498044, - "grad_norm": 1.4786019325256348, - "learning_rate": 9.294572864321609e-05, - "loss": 5.515, - "step": 7521 - }, - { - "epoch": 3.9228161668839636, - "grad_norm": 1.3405482769012451, - "learning_rate": 9.294472361809045e-05, - "loss": 6.0299, - "step": 7522 - }, - { - "epoch": 3.9233376792698826, - "grad_norm": 1.5859111547470093, - "learning_rate": 9.294371859296482e-05, - "loss": 5.4544, - "step": 7523 - }, - { - "epoch": 3.9238591916558017, - "grad_norm": 1.3977775573730469, - "learning_rate": 9.29427135678392e-05, - "loss": 4.9013, - "step": 7524 - }, - { - "epoch": 3.924380704041721, - "grad_norm": 1.3908288478851318, - "learning_rate": 9.294170854271357e-05, - "loss": 5.9341, - "step": 7525 - }, - { - "epoch": 3.92490221642764, - "grad_norm": 1.5150518417358398, - "learning_rate": 9.294070351758795e-05, - "loss": 5.7293, - "step": 7526 - }, - { - "epoch": 3.925423728813559, - "grad_norm": 1.4363093376159668, - "learning_rate": 9.293969849246231e-05, - "loss": 5.6272, - "step": 7527 - }, - { - "epoch": 3.9259452411994786, - "grad_norm": 1.4544016122817993, - "learning_rate": 9.293869346733669e-05, - "loss": 5.9819, - "step": 7528 - }, - { - "epoch": 3.9264667535853977, - "grad_norm": 1.5386769771575928, - "learning_rate": 9.293768844221106e-05, - "loss": 5.8177, - "step": 7529 - }, - { - "epoch": 3.9269882659713167, - "grad_norm": 1.4453848600387573, - "learning_rate": 9.293668341708543e-05, - "loss": 5.665, - "step": 7530 - }, - { - "epoch": 3.927509778357236, - "grad_norm": 1.5423554182052612, - "learning_rate": 9.29356783919598e-05, - "loss": 5.5903, - "step": 7531 - }, - { - "epoch": 3.928031290743155, - "grad_norm": 1.6550586223602295, - "learning_rate": 9.293467336683418e-05, - "loss": 5.5713, - "step": 7532 - }, - { - "epoch": 3.928552803129074, - "grad_norm": 1.6271883249282837, - "learning_rate": 9.293366834170854e-05, - "loss": 5.5668, - "step": 7533 - }, - { - "epoch": 3.9290743155149936, - "grad_norm": 1.4324886798858643, - "learning_rate": 9.293266331658292e-05, - "loss": 5.4394, - "step": 7534 - }, - { - "epoch": 3.9295958279009127, - "grad_norm": 1.427464246749878, - "learning_rate": 9.29316582914573e-05, - "loss": 5.9379, - "step": 7535 - }, - { - "epoch": 3.9301173402868317, - "grad_norm": 1.3041108846664429, - "learning_rate": 9.293065326633166e-05, - "loss": 6.0153, - "step": 7536 - }, - { - "epoch": 3.930638852672751, - "grad_norm": 1.4490835666656494, - "learning_rate": 9.292964824120604e-05, - "loss": 5.9307, - "step": 7537 - }, - { - "epoch": 3.93116036505867, - "grad_norm": 1.6295241117477417, - "learning_rate": 9.29286432160804e-05, - "loss": 5.054, - "step": 7538 - }, - { - "epoch": 3.931681877444589, - "grad_norm": 1.4586610794067383, - "learning_rate": 9.292763819095478e-05, - "loss": 5.9426, - "step": 7539 - }, - { - "epoch": 3.9322033898305087, - "grad_norm": 1.8300080299377441, - "learning_rate": 9.292663316582915e-05, - "loss": 5.0026, - "step": 7540 - }, - { - "epoch": 3.9327249022164277, - "grad_norm": 1.4347162246704102, - "learning_rate": 9.292562814070352e-05, - "loss": 6.0806, - "step": 7541 - }, - { - "epoch": 3.9332464146023467, - "grad_norm": 1.4386295080184937, - "learning_rate": 9.292462311557789e-05, - "loss": 5.8059, - "step": 7542 - }, - { - "epoch": 3.933767926988266, - "grad_norm": 1.4731091260910034, - "learning_rate": 9.292361809045226e-05, - "loss": 5.6268, - "step": 7543 - }, - { - "epoch": 3.934289439374185, - "grad_norm": 1.5240801572799683, - "learning_rate": 9.292261306532663e-05, - "loss": 5.7642, - "step": 7544 - }, - { - "epoch": 3.934810951760104, - "grad_norm": 1.5005446672439575, - "learning_rate": 9.292160804020101e-05, - "loss": 5.4764, - "step": 7545 - }, - { - "epoch": 3.9353324641460237, - "grad_norm": 1.4473650455474854, - "learning_rate": 9.292060301507538e-05, - "loss": 5.8381, - "step": 7546 - }, - { - "epoch": 3.9358539765319427, - "grad_norm": 1.4751291275024414, - "learning_rate": 9.291959798994976e-05, - "loss": 5.7636, - "step": 7547 - }, - { - "epoch": 3.9363754889178617, - "grad_norm": 1.3723390102386475, - "learning_rate": 9.291859296482413e-05, - "loss": 5.882, - "step": 7548 - }, - { - "epoch": 3.936897001303781, - "grad_norm": 1.585329294204712, - "learning_rate": 9.291758793969849e-05, - "loss": 5.4485, - "step": 7549 - }, - { - "epoch": 3.9374185136897, - "grad_norm": 1.4724208116531372, - "learning_rate": 9.291658291457287e-05, - "loss": 5.9485, - "step": 7550 - }, - { - "epoch": 3.937940026075619, - "grad_norm": 1.5824873447418213, - "learning_rate": 9.291557788944723e-05, - "loss": 5.6632, - "step": 7551 - }, - { - "epoch": 3.9384615384615387, - "grad_norm": 1.585875391960144, - "learning_rate": 9.291457286432161e-05, - "loss": 5.5688, - "step": 7552 - }, - { - "epoch": 3.9389830508474577, - "grad_norm": 1.4935669898986816, - "learning_rate": 9.291356783919598e-05, - "loss": 5.7277, - "step": 7553 - }, - { - "epoch": 3.9395045632333767, - "grad_norm": 1.6523576974868774, - "learning_rate": 9.291256281407035e-05, - "loss": 6.1026, - "step": 7554 - }, - { - "epoch": 3.940026075619296, - "grad_norm": 1.4205392599105835, - "learning_rate": 9.291155778894473e-05, - "loss": 5.8633, - "step": 7555 - }, - { - "epoch": 3.940547588005215, - "grad_norm": 1.318536400794983, - "learning_rate": 9.291055276381911e-05, - "loss": 6.0398, - "step": 7556 - }, - { - "epoch": 3.9410691003911342, - "grad_norm": 1.6051002740859985, - "learning_rate": 9.290954773869347e-05, - "loss": 5.284, - "step": 7557 - }, - { - "epoch": 3.9415906127770537, - "grad_norm": 1.5022789239883423, - "learning_rate": 9.290854271356785e-05, - "loss": 5.8473, - "step": 7558 - }, - { - "epoch": 3.9421121251629727, - "grad_norm": 1.5657141208648682, - "learning_rate": 9.290753768844222e-05, - "loss": 5.5074, - "step": 7559 - }, - { - "epoch": 3.9426336375488917, - "grad_norm": 1.459274411201477, - "learning_rate": 9.290653266331659e-05, - "loss": 5.7827, - "step": 7560 - }, - { - "epoch": 3.943155149934811, - "grad_norm": 1.405693531036377, - "learning_rate": 9.290552763819096e-05, - "loss": 5.2736, - "step": 7561 - }, - { - "epoch": 3.94367666232073, - "grad_norm": 1.511597752571106, - "learning_rate": 9.290452261306532e-05, - "loss": 5.6205, - "step": 7562 - }, - { - "epoch": 3.9441981747066492, - "grad_norm": 1.6349049806594849, - "learning_rate": 9.29035175879397e-05, - "loss": 5.7479, - "step": 7563 - }, - { - "epoch": 3.9447196870925687, - "grad_norm": 1.4813382625579834, - "learning_rate": 9.290251256281406e-05, - "loss": 5.7309, - "step": 7564 - }, - { - "epoch": 3.9452411994784877, - "grad_norm": 1.4299134016036987, - "learning_rate": 9.290150753768844e-05, - "loss": 6.0001, - "step": 7565 - }, - { - "epoch": 3.9457627118644067, - "grad_norm": 1.4920083284378052, - "learning_rate": 9.290050251256282e-05, - "loss": 5.8295, - "step": 7566 - }, - { - "epoch": 3.946284224250326, - "grad_norm": 1.468429446220398, - "learning_rate": 9.28994974874372e-05, - "loss": 5.9551, - "step": 7567 - }, - { - "epoch": 3.9468057366362452, - "grad_norm": 1.422792673110962, - "learning_rate": 9.289849246231156e-05, - "loss": 5.9708, - "step": 7568 - }, - { - "epoch": 3.9473272490221643, - "grad_norm": 1.548006534576416, - "learning_rate": 9.289748743718594e-05, - "loss": 5.8878, - "step": 7569 - }, - { - "epoch": 3.9478487614080837, - "grad_norm": 1.5350459814071655, - "learning_rate": 9.28964824120603e-05, - "loss": 5.713, - "step": 7570 - }, - { - "epoch": 3.9483702737940027, - "grad_norm": 1.5185812711715698, - "learning_rate": 9.289547738693468e-05, - "loss": 6.029, - "step": 7571 - }, - { - "epoch": 3.9488917861799218, - "grad_norm": 1.4402681589126587, - "learning_rate": 9.289447236180905e-05, - "loss": 5.7709, - "step": 7572 - }, - { - "epoch": 3.949413298565841, - "grad_norm": 1.627800464630127, - "learning_rate": 9.289346733668342e-05, - "loss": 5.2502, - "step": 7573 - }, - { - "epoch": 3.9499348109517602, - "grad_norm": 1.446333885192871, - "learning_rate": 9.289246231155779e-05, - "loss": 6.0545, - "step": 7574 - }, - { - "epoch": 3.9504563233376793, - "grad_norm": 1.5081079006195068, - "learning_rate": 9.289145728643217e-05, - "loss": 5.4392, - "step": 7575 - }, - { - "epoch": 3.9509778357235983, - "grad_norm": 1.4688866138458252, - "learning_rate": 9.289045226130654e-05, - "loss": 6.1615, - "step": 7576 - }, - { - "epoch": 3.9514993481095178, - "grad_norm": 1.60893714427948, - "learning_rate": 9.288944723618091e-05, - "loss": 5.0862, - "step": 7577 - }, - { - "epoch": 3.9520208604954368, - "grad_norm": 1.5656142234802246, - "learning_rate": 9.288844221105529e-05, - "loss": 5.4297, - "step": 7578 - }, - { - "epoch": 3.952542372881356, - "grad_norm": 1.430444359779358, - "learning_rate": 9.288743718592965e-05, - "loss": 6.1117, - "step": 7579 - }, - { - "epoch": 3.9530638852672753, - "grad_norm": 1.396181583404541, - "learning_rate": 9.288643216080403e-05, - "loss": 6.0451, - "step": 7580 - }, - { - "epoch": 3.9535853976531943, - "grad_norm": 1.4671739339828491, - "learning_rate": 9.288542713567839e-05, - "loss": 5.6149, - "step": 7581 - }, - { - "epoch": 3.9541069100391133, - "grad_norm": 1.406620740890503, - "learning_rate": 9.288442211055277e-05, - "loss": 5.9762, - "step": 7582 - }, - { - "epoch": 3.9546284224250323, - "grad_norm": 1.5574852228164673, - "learning_rate": 9.288341708542713e-05, - "loss": 5.4051, - "step": 7583 - }, - { - "epoch": 3.955149934810952, - "grad_norm": 1.5554401874542236, - "learning_rate": 9.288241206030151e-05, - "loss": 5.6875, - "step": 7584 - }, - { - "epoch": 3.955671447196871, - "grad_norm": 1.449561595916748, - "learning_rate": 9.288140703517588e-05, - "loss": 6.2057, - "step": 7585 - }, - { - "epoch": 3.95619295958279, - "grad_norm": 1.4294153451919556, - "learning_rate": 9.288040201005025e-05, - "loss": 5.3888, - "step": 7586 - }, - { - "epoch": 3.9567144719687093, - "grad_norm": 1.441821575164795, - "learning_rate": 9.287939698492463e-05, - "loss": 5.9233, - "step": 7587 - }, - { - "epoch": 3.9572359843546283, - "grad_norm": 1.5153042078018188, - "learning_rate": 9.287839195979901e-05, - "loss": 5.5207, - "step": 7588 - }, - { - "epoch": 3.9577574967405473, - "grad_norm": 1.571147084236145, - "learning_rate": 9.287738693467337e-05, - "loss": 5.713, - "step": 7589 - }, - { - "epoch": 3.958279009126467, - "grad_norm": 1.419402837753296, - "learning_rate": 9.287638190954774e-05, - "loss": 5.6989, - "step": 7590 - }, - { - "epoch": 3.958800521512386, - "grad_norm": 1.5178238153457642, - "learning_rate": 9.287537688442212e-05, - "loss": 5.8249, - "step": 7591 - }, - { - "epoch": 3.959322033898305, - "grad_norm": 1.6613214015960693, - "learning_rate": 9.287437185929648e-05, - "loss": 5.9029, - "step": 7592 - }, - { - "epoch": 3.9598435462842243, - "grad_norm": 1.4186713695526123, - "learning_rate": 9.287336683417086e-05, - "loss": 5.6948, - "step": 7593 - }, - { - "epoch": 3.9603650586701433, - "grad_norm": 1.4362927675247192, - "learning_rate": 9.287236180904522e-05, - "loss": 4.9006, - "step": 7594 - }, - { - "epoch": 3.9608865710560623, - "grad_norm": 1.539212703704834, - "learning_rate": 9.28713567839196e-05, - "loss": 5.3297, - "step": 7595 - }, - { - "epoch": 3.961408083441982, - "grad_norm": 1.5027996301651, - "learning_rate": 9.287035175879398e-05, - "loss": 5.7344, - "step": 7596 - }, - { - "epoch": 3.961929595827901, - "grad_norm": 1.4980460405349731, - "learning_rate": 9.286934673366836e-05, - "loss": 5.6767, - "step": 7597 - }, - { - "epoch": 3.96245110821382, - "grad_norm": 1.6255090236663818, - "learning_rate": 9.286834170854272e-05, - "loss": 5.645, - "step": 7598 - }, - { - "epoch": 3.9629726205997393, - "grad_norm": 1.4851247072219849, - "learning_rate": 9.28673366834171e-05, - "loss": 5.7222, - "step": 7599 - }, - { - "epoch": 3.9634941329856583, - "grad_norm": 1.484117865562439, - "learning_rate": 9.286633165829146e-05, - "loss": 5.0496, - "step": 7600 - }, - { - "epoch": 3.9640156453715774, - "grad_norm": 1.484295129776001, - "learning_rate": 9.286532663316584e-05, - "loss": 5.9216, - "step": 7601 - }, - { - "epoch": 3.964537157757497, - "grad_norm": 1.51368248462677, - "learning_rate": 9.28643216080402e-05, - "loss": 6.0834, - "step": 7602 - }, - { - "epoch": 3.965058670143416, - "grad_norm": 1.5821075439453125, - "learning_rate": 9.286331658291457e-05, - "loss": 5.5843, - "step": 7603 - }, - { - "epoch": 3.965580182529335, - "grad_norm": 1.4412554502487183, - "learning_rate": 9.286231155778895e-05, - "loss": 5.9201, - "step": 7604 - }, - { - "epoch": 3.9661016949152543, - "grad_norm": 1.4975872039794922, - "learning_rate": 9.286130653266331e-05, - "loss": 5.4509, - "step": 7605 - }, - { - "epoch": 3.9666232073011733, - "grad_norm": 1.629420518875122, - "learning_rate": 9.286030150753769e-05, - "loss": 5.0407, - "step": 7606 - }, - { - "epoch": 3.9671447196870924, - "grad_norm": 1.5303471088409424, - "learning_rate": 9.285929648241207e-05, - "loss": 5.9024, - "step": 7607 - }, - { - "epoch": 3.967666232073012, - "grad_norm": 1.8433928489685059, - "learning_rate": 9.285829145728644e-05, - "loss": 5.3276, - "step": 7608 - }, - { - "epoch": 3.968187744458931, - "grad_norm": 1.5610548257827759, - "learning_rate": 9.285728643216081e-05, - "loss": 5.7488, - "step": 7609 - }, - { - "epoch": 3.96870925684485, - "grad_norm": 1.3613871335983276, - "learning_rate": 9.285628140703519e-05, - "loss": 5.9179, - "step": 7610 - }, - { - "epoch": 3.9692307692307693, - "grad_norm": 1.537189245223999, - "learning_rate": 9.285527638190955e-05, - "loss": 5.7852, - "step": 7611 - }, - { - "epoch": 3.9697522816166884, - "grad_norm": 1.3163654804229736, - "learning_rate": 9.285427135678393e-05, - "loss": 5.898, - "step": 7612 - }, - { - "epoch": 3.9702737940026074, - "grad_norm": 1.590118408203125, - "learning_rate": 9.285326633165829e-05, - "loss": 5.6266, - "step": 7613 - }, - { - "epoch": 3.970795306388527, - "grad_norm": 1.457840085029602, - "learning_rate": 9.285226130653267e-05, - "loss": 5.5584, - "step": 7614 - }, - { - "epoch": 3.971316818774446, - "grad_norm": 1.3191081285476685, - "learning_rate": 9.285125628140703e-05, - "loss": 6.1224, - "step": 7615 - }, - { - "epoch": 3.971838331160365, - "grad_norm": 1.3947561979293823, - "learning_rate": 9.285025125628141e-05, - "loss": 5.7126, - "step": 7616 - }, - { - "epoch": 3.9723598435462844, - "grad_norm": 1.405626893043518, - "learning_rate": 9.284924623115579e-05, - "loss": 5.4398, - "step": 7617 - }, - { - "epoch": 3.9728813559322034, - "grad_norm": 1.3092596530914307, - "learning_rate": 9.284824120603015e-05, - "loss": 6.0616, - "step": 7618 - }, - { - "epoch": 3.9734028683181224, - "grad_norm": 1.5253963470458984, - "learning_rate": 9.284723618090453e-05, - "loss": 5.5582, - "step": 7619 - }, - { - "epoch": 3.973924380704042, - "grad_norm": 1.3398442268371582, - "learning_rate": 9.28462311557789e-05, - "loss": 5.2515, - "step": 7620 - }, - { - "epoch": 3.974445893089961, - "grad_norm": 1.4413524866104126, - "learning_rate": 9.284522613065327e-05, - "loss": 5.5538, - "step": 7621 - }, - { - "epoch": 3.97496740547588, - "grad_norm": 1.470799207687378, - "learning_rate": 9.284422110552764e-05, - "loss": 5.2757, - "step": 7622 - }, - { - "epoch": 3.9754889178617994, - "grad_norm": 1.5957432985305786, - "learning_rate": 9.284321608040202e-05, - "loss": 5.6332, - "step": 7623 - }, - { - "epoch": 3.9760104302477184, - "grad_norm": 1.3365963697433472, - "learning_rate": 9.284221105527638e-05, - "loss": 5.8816, - "step": 7624 - }, - { - "epoch": 3.9765319426336374, - "grad_norm": 1.533018946647644, - "learning_rate": 9.284120603015076e-05, - "loss": 5.6524, - "step": 7625 - }, - { - "epoch": 3.977053455019557, - "grad_norm": 1.4953924417495728, - "learning_rate": 9.284020100502512e-05, - "loss": 6.1326, - "step": 7626 - }, - { - "epoch": 3.977574967405476, - "grad_norm": 1.4917718172073364, - "learning_rate": 9.28391959798995e-05, - "loss": 5.7034, - "step": 7627 - }, - { - "epoch": 3.978096479791395, - "grad_norm": 1.8134647607803345, - "learning_rate": 9.283819095477388e-05, - "loss": 5.4472, - "step": 7628 - }, - { - "epoch": 3.9786179921773144, - "grad_norm": 1.3935800790786743, - "learning_rate": 9.283718592964824e-05, - "loss": 5.8886, - "step": 7629 - }, - { - "epoch": 3.9791395045632334, - "grad_norm": 1.4648017883300781, - "learning_rate": 9.283618090452262e-05, - "loss": 5.8747, - "step": 7630 - }, - { - "epoch": 3.9796610169491524, - "grad_norm": 1.7654238939285278, - "learning_rate": 9.283517587939699e-05, - "loss": 6.0085, - "step": 7631 - }, - { - "epoch": 3.980182529335072, - "grad_norm": 1.5801446437835693, - "learning_rate": 9.283417085427136e-05, - "loss": 5.9736, - "step": 7632 - }, - { - "epoch": 3.980704041720991, - "grad_norm": 1.424108862876892, - "learning_rate": 9.283316582914573e-05, - "loss": 6.044, - "step": 7633 - }, - { - "epoch": 3.98122555410691, - "grad_norm": 1.3887425661087036, - "learning_rate": 9.28321608040201e-05, - "loss": 5.5715, - "step": 7634 - }, - { - "epoch": 3.9817470664928294, - "grad_norm": 1.8764564990997314, - "learning_rate": 9.283115577889447e-05, - "loss": 4.9603, - "step": 7635 - }, - { - "epoch": 3.9822685788787484, - "grad_norm": 1.5525306463241577, - "learning_rate": 9.283015075376885e-05, - "loss": 5.6231, - "step": 7636 - }, - { - "epoch": 3.9827900912646674, - "grad_norm": 1.5488239526748657, - "learning_rate": 9.282914572864323e-05, - "loss": 5.716, - "step": 7637 - }, - { - "epoch": 3.983311603650587, - "grad_norm": 1.4555163383483887, - "learning_rate": 9.28281407035176e-05, - "loss": 6.0467, - "step": 7638 - }, - { - "epoch": 3.983833116036506, - "grad_norm": 1.4187097549438477, - "learning_rate": 9.282713567839197e-05, - "loss": 5.8483, - "step": 7639 - }, - { - "epoch": 3.984354628422425, - "grad_norm": 1.5016056299209595, - "learning_rate": 9.282613065326635e-05, - "loss": 5.8125, - "step": 7640 - }, - { - "epoch": 3.9848761408083444, - "grad_norm": 1.5796687602996826, - "learning_rate": 9.282512562814071e-05, - "loss": 5.6155, - "step": 7641 - }, - { - "epoch": 3.9853976531942634, - "grad_norm": 1.368722915649414, - "learning_rate": 9.282412060301507e-05, - "loss": 5.9012, - "step": 7642 - }, - { - "epoch": 3.9859191655801824, - "grad_norm": 1.3489478826522827, - "learning_rate": 9.282311557788945e-05, - "loss": 6.0632, - "step": 7643 - }, - { - "epoch": 3.986440677966102, - "grad_norm": 1.5053138732910156, - "learning_rate": 9.282211055276382e-05, - "loss": 5.1006, - "step": 7644 - }, - { - "epoch": 3.986962190352021, - "grad_norm": 1.7075389623641968, - "learning_rate": 9.28211055276382e-05, - "loss": 5.4289, - "step": 7645 - }, - { - "epoch": 3.98748370273794, - "grad_norm": 1.3612264394760132, - "learning_rate": 9.282010050251256e-05, - "loss": 5.8387, - "step": 7646 - }, - { - "epoch": 3.9880052151238594, - "grad_norm": 1.3387603759765625, - "learning_rate": 9.281909547738694e-05, - "loss": 5.8833, - "step": 7647 - }, - { - "epoch": 3.9885267275097784, - "grad_norm": 1.4316917657852173, - "learning_rate": 9.281809045226131e-05, - "loss": 5.7647, - "step": 7648 - }, - { - "epoch": 3.9890482398956975, - "grad_norm": 1.4199106693267822, - "learning_rate": 9.281708542713569e-05, - "loss": 5.6353, - "step": 7649 - }, - { - "epoch": 3.989569752281617, - "grad_norm": 1.523295283317566, - "learning_rate": 9.281608040201006e-05, - "loss": 5.4034, - "step": 7650 - }, - { - "epoch": 3.990091264667536, - "grad_norm": 1.4094607830047607, - "learning_rate": 9.281507537688443e-05, - "loss": 5.8889, - "step": 7651 - }, - { - "epoch": 3.990612777053455, - "grad_norm": 1.4275928735733032, - "learning_rate": 9.28140703517588e-05, - "loss": 5.5642, - "step": 7652 - }, - { - "epoch": 3.9911342894393744, - "grad_norm": 1.4821397066116333, - "learning_rate": 9.281306532663318e-05, - "loss": 5.8514, - "step": 7653 - }, - { - "epoch": 3.9916558018252934, - "grad_norm": 1.406541109085083, - "learning_rate": 9.281206030150754e-05, - "loss": 5.6963, - "step": 7654 - }, - { - "epoch": 3.9921773142112125, - "grad_norm": 1.3825905323028564, - "learning_rate": 9.28110552763819e-05, - "loss": 5.9992, - "step": 7655 - }, - { - "epoch": 3.992698826597132, - "grad_norm": 1.6371179819107056, - "learning_rate": 9.281005025125628e-05, - "loss": 5.877, - "step": 7656 - }, - { - "epoch": 3.993220338983051, - "grad_norm": 1.6671850681304932, - "learning_rate": 9.280904522613066e-05, - "loss": 5.1487, - "step": 7657 - }, - { - "epoch": 3.99374185136897, - "grad_norm": 1.4873112440109253, - "learning_rate": 9.280804020100504e-05, - "loss": 5.6765, - "step": 7658 - }, - { - "epoch": 3.9942633637548894, - "grad_norm": 1.481482982635498, - "learning_rate": 9.28070351758794e-05, - "loss": 5.5725, - "step": 7659 - }, - { - "epoch": 3.9947848761408085, - "grad_norm": 1.6333673000335693, - "learning_rate": 9.280603015075378e-05, - "loss": 5.7679, - "step": 7660 - }, - { - "epoch": 3.9953063885267275, - "grad_norm": 1.4642921686172485, - "learning_rate": 9.280502512562814e-05, - "loss": 5.5233, - "step": 7661 - }, - { - "epoch": 3.995827900912647, - "grad_norm": 1.3596277236938477, - "learning_rate": 9.280402010050252e-05, - "loss": 5.7152, - "step": 7662 - }, - { - "epoch": 3.996349413298566, - "grad_norm": 1.4807305335998535, - "learning_rate": 9.280301507537689e-05, - "loss": 5.6184, - "step": 7663 - }, - { - "epoch": 3.996870925684485, - "grad_norm": 1.4810280799865723, - "learning_rate": 9.280201005025126e-05, - "loss": 5.3688, - "step": 7664 - }, - { - "epoch": 3.997392438070404, - "grad_norm": 1.5020126104354858, - "learning_rate": 9.280100502512563e-05, - "loss": 5.7442, - "step": 7665 - }, - { - "epoch": 3.9979139504563235, - "grad_norm": 1.5125086307525635, - "learning_rate": 9.28e-05, - "loss": 5.6178, - "step": 7666 - }, - { - "epoch": 3.9984354628422425, - "grad_norm": 1.5281710624694824, - "learning_rate": 9.279899497487437e-05, - "loss": 5.647, - "step": 7667 - }, - { - "epoch": 3.9989569752281615, - "grad_norm": 1.6058510541915894, - "learning_rate": 9.279798994974875e-05, - "loss": 5.0744, - "step": 7668 - }, - { - "epoch": 3.999478487614081, - "grad_norm": 1.623013973236084, - "learning_rate": 9.279698492462313e-05, - "loss": 5.2234, - "step": 7669 - }, - { - "epoch": 4.0, - "grad_norm": 1.7330044507980347, - "learning_rate": 9.279597989949749e-05, - "loss": 5.5358, - "step": 7670 - }, - { - "epoch": 4.0005215123859195, - "grad_norm": 1.608900785446167, - "learning_rate": 9.279497487437187e-05, - "loss": 5.4956, - "step": 7671 - }, - { - "epoch": 4.001043024771838, - "grad_norm": 1.503663182258606, - "learning_rate": 9.279396984924623e-05, - "loss": 5.6129, - "step": 7672 - }, - { - "epoch": 4.0015645371577575, - "grad_norm": 1.5316232442855835, - "learning_rate": 9.279296482412061e-05, - "loss": 5.455, - "step": 7673 - }, - { - "epoch": 4.002086049543677, - "grad_norm": 1.6079626083374023, - "learning_rate": 9.279195979899497e-05, - "loss": 5.8496, - "step": 7674 - }, - { - "epoch": 4.0026075619295955, - "grad_norm": 1.5660747289657593, - "learning_rate": 9.279095477386935e-05, - "loss": 5.5557, - "step": 7675 - }, - { - "epoch": 4.003129074315515, - "grad_norm": 1.5197049379348755, - "learning_rate": 9.278994974874372e-05, - "loss": 5.859, - "step": 7676 - }, - { - "epoch": 4.0036505867014345, - "grad_norm": 1.54172945022583, - "learning_rate": 9.27889447236181e-05, - "loss": 5.6918, - "step": 7677 - }, - { - "epoch": 4.004172099087353, - "grad_norm": 1.5502054691314697, - "learning_rate": 9.278793969849247e-05, - "loss": 5.5953, - "step": 7678 - }, - { - "epoch": 4.0046936114732725, - "grad_norm": 1.424559235572815, - "learning_rate": 9.278693467336685e-05, - "loss": 5.894, - "step": 7679 - }, - { - "epoch": 4.005215123859192, - "grad_norm": 1.5219298601150513, - "learning_rate": 9.278592964824121e-05, - "loss": 5.5015, - "step": 7680 - }, - { - "epoch": 4.005736636245111, - "grad_norm": 1.484984278678894, - "learning_rate": 9.278492462311559e-05, - "loss": 5.6447, - "step": 7681 - }, - { - "epoch": 4.00625814863103, - "grad_norm": 1.5557782649993896, - "learning_rate": 9.278391959798996e-05, - "loss": 5.4462, - "step": 7682 - }, - { - "epoch": 4.0067796610169495, - "grad_norm": 1.6625183820724487, - "learning_rate": 9.278291457286432e-05, - "loss": 5.2324, - "step": 7683 - }, - { - "epoch": 4.007301173402868, - "grad_norm": 1.4944870471954346, - "learning_rate": 9.27819095477387e-05, - "loss": 5.8019, - "step": 7684 - }, - { - "epoch": 4.0078226857887875, - "grad_norm": 1.5170711278915405, - "learning_rate": 9.278090452261306e-05, - "loss": 5.6584, - "step": 7685 - }, - { - "epoch": 4.008344198174707, - "grad_norm": 1.423714280128479, - "learning_rate": 9.277989949748744e-05, - "loss": 5.9605, - "step": 7686 - }, - { - "epoch": 4.008865710560626, - "grad_norm": 1.6840181350708008, - "learning_rate": 9.27788944723618e-05, - "loss": 5.4314, - "step": 7687 - }, - { - "epoch": 4.009387222946545, - "grad_norm": 1.5774157047271729, - "learning_rate": 9.277788944723618e-05, - "loss": 5.2665, - "step": 7688 - }, - { - "epoch": 4.0099087353324645, - "grad_norm": 1.5384485721588135, - "learning_rate": 9.277688442211056e-05, - "loss": 5.6497, - "step": 7689 - }, - { - "epoch": 4.010430247718383, - "grad_norm": 1.4617236852645874, - "learning_rate": 9.277587939698494e-05, - "loss": 5.6907, - "step": 7690 - }, - { - "epoch": 4.0109517601043025, - "grad_norm": 1.4371092319488525, - "learning_rate": 9.27748743718593e-05, - "loss": 5.6221, - "step": 7691 - }, - { - "epoch": 4.011473272490222, - "grad_norm": 1.3725258111953735, - "learning_rate": 9.277386934673368e-05, - "loss": 6.0872, - "step": 7692 - }, - { - "epoch": 4.011994784876141, - "grad_norm": 1.4017165899276733, - "learning_rate": 9.277286432160804e-05, - "loss": 5.6827, - "step": 7693 - }, - { - "epoch": 4.01251629726206, - "grad_norm": 1.4511051177978516, - "learning_rate": 9.277185929648242e-05, - "loss": 5.8644, - "step": 7694 - }, - { - "epoch": 4.0130378096479795, - "grad_norm": 1.7444595098495483, - "learning_rate": 9.277085427135679e-05, - "loss": 5.6013, - "step": 7695 - }, - { - "epoch": 4.013559322033898, - "grad_norm": 1.5286741256713867, - "learning_rate": 9.276984924623115e-05, - "loss": 5.2085, - "step": 7696 - }, - { - "epoch": 4.014080834419818, - "grad_norm": 1.4398947954177856, - "learning_rate": 9.276884422110553e-05, - "loss": 5.8112, - "step": 7697 - }, - { - "epoch": 4.014602346805737, - "grad_norm": 1.4774574041366577, - "learning_rate": 9.276783919597989e-05, - "loss": 5.8915, - "step": 7698 - }, - { - "epoch": 4.015123859191656, - "grad_norm": 1.4351156949996948, - "learning_rate": 9.276683417085427e-05, - "loss": 5.9567, - "step": 7699 - }, - { - "epoch": 4.015645371577575, - "grad_norm": 1.4024503231048584, - "learning_rate": 9.276582914572865e-05, - "loss": 5.6869, - "step": 7700 - }, - { - "epoch": 4.0161668839634945, - "grad_norm": 1.6496344804763794, - "learning_rate": 9.276482412060303e-05, - "loss": 5.6466, - "step": 7701 - }, - { - "epoch": 4.016688396349413, - "grad_norm": 1.4414211511611938, - "learning_rate": 9.276381909547739e-05, - "loss": 5.9198, - "step": 7702 - }, - { - "epoch": 4.017209908735333, - "grad_norm": 1.5342429876327515, - "learning_rate": 9.276281407035177e-05, - "loss": 5.4157, - "step": 7703 - }, - { - "epoch": 4.017731421121252, - "grad_norm": 1.4718905687332153, - "learning_rate": 9.276180904522613e-05, - "loss": 5.7798, - "step": 7704 - }, - { - "epoch": 4.018252933507171, - "grad_norm": 1.5217394828796387, - "learning_rate": 9.276080402010051e-05, - "loss": 5.8179, - "step": 7705 - }, - { - "epoch": 4.01877444589309, - "grad_norm": 1.467195987701416, - "learning_rate": 9.275979899497488e-05, - "loss": 5.5417, - "step": 7706 - }, - { - "epoch": 4.0192959582790095, - "grad_norm": 1.5121022462844849, - "learning_rate": 9.275879396984925e-05, - "loss": 5.8053, - "step": 7707 - }, - { - "epoch": 4.019817470664928, - "grad_norm": 1.4933853149414062, - "learning_rate": 9.275778894472362e-05, - "loss": 5.6759, - "step": 7708 - }, - { - "epoch": 4.020338983050848, - "grad_norm": 1.4149904251098633, - "learning_rate": 9.2756783919598e-05, - "loss": 5.8774, - "step": 7709 - }, - { - "epoch": 4.020860495436767, - "grad_norm": 1.48702073097229, - "learning_rate": 9.275577889447237e-05, - "loss": 5.6804, - "step": 7710 - }, - { - "epoch": 4.021382007822686, - "grad_norm": 1.4852831363677979, - "learning_rate": 9.275477386934674e-05, - "loss": 5.4904, - "step": 7711 - }, - { - "epoch": 4.021903520208605, - "grad_norm": 1.5853972434997559, - "learning_rate": 9.275376884422111e-05, - "loss": 5.4615, - "step": 7712 - }, - { - "epoch": 4.0224250325945246, - "grad_norm": 1.7149549722671509, - "learning_rate": 9.275276381909548e-05, - "loss": 5.9469, - "step": 7713 - }, - { - "epoch": 4.022946544980443, - "grad_norm": 1.577808141708374, - "learning_rate": 9.275175879396986e-05, - "loss": 5.6752, - "step": 7714 - }, - { - "epoch": 4.023468057366363, - "grad_norm": 1.4081953763961792, - "learning_rate": 9.275075376884422e-05, - "loss": 6.0338, - "step": 7715 - }, - { - "epoch": 4.023989569752282, - "grad_norm": 1.3134089708328247, - "learning_rate": 9.27497487437186e-05, - "loss": 6.0395, - "step": 7716 - }, - { - "epoch": 4.024511082138201, - "grad_norm": 1.3429710865020752, - "learning_rate": 9.274874371859296e-05, - "loss": 5.9513, - "step": 7717 - }, - { - "epoch": 4.02503259452412, - "grad_norm": 1.4886252880096436, - "learning_rate": 9.274773869346734e-05, - "loss": 5.949, - "step": 7718 - }, - { - "epoch": 4.025554106910039, - "grad_norm": 1.3194868564605713, - "learning_rate": 9.27467336683417e-05, - "loss": 6.03, - "step": 7719 - }, - { - "epoch": 4.026075619295958, - "grad_norm": 1.3274924755096436, - "learning_rate": 9.274572864321608e-05, - "loss": 6.1435, - "step": 7720 - }, - { - "epoch": 4.026597131681878, - "grad_norm": 1.4881117343902588, - "learning_rate": 9.274472361809046e-05, - "loss": 5.7653, - "step": 7721 - }, - { - "epoch": 4.027118644067796, - "grad_norm": 1.5620120763778687, - "learning_rate": 9.274371859296483e-05, - "loss": 5.6183, - "step": 7722 - }, - { - "epoch": 4.027640156453716, - "grad_norm": 1.4780586957931519, - "learning_rate": 9.27427135678392e-05, - "loss": 5.7696, - "step": 7723 - }, - { - "epoch": 4.028161668839635, - "grad_norm": 1.4265094995498657, - "learning_rate": 9.274170854271357e-05, - "loss": 5.4559, - "step": 7724 - }, - { - "epoch": 4.028683181225554, - "grad_norm": 1.5559715032577515, - "learning_rate": 9.274070351758795e-05, - "loss": 5.5493, - "step": 7725 - }, - { - "epoch": 4.029204693611473, - "grad_norm": 1.4153403043746948, - "learning_rate": 9.273969849246231e-05, - "loss": 5.916, - "step": 7726 - }, - { - "epoch": 4.029726205997393, - "grad_norm": 1.4770768880844116, - "learning_rate": 9.273869346733669e-05, - "loss": 5.3997, - "step": 7727 - }, - { - "epoch": 4.030247718383311, - "grad_norm": 1.426910400390625, - "learning_rate": 9.273768844221105e-05, - "loss": 5.9063, - "step": 7728 - }, - { - "epoch": 4.030769230769231, - "grad_norm": 1.796269178390503, - "learning_rate": 9.273668341708543e-05, - "loss": 5.6479, - "step": 7729 - }, - { - "epoch": 4.03129074315515, - "grad_norm": 1.398111343383789, - "learning_rate": 9.273567839195981e-05, - "loss": 5.7758, - "step": 7730 - }, - { - "epoch": 4.031812255541069, - "grad_norm": 1.3928663730621338, - "learning_rate": 9.273467336683419e-05, - "loss": 5.7595, - "step": 7731 - }, - { - "epoch": 4.032333767926988, - "grad_norm": 1.5666611194610596, - "learning_rate": 9.273366834170855e-05, - "loss": 5.4759, - "step": 7732 - }, - { - "epoch": 4.032855280312908, - "grad_norm": 1.4694808721542358, - "learning_rate": 9.273266331658293e-05, - "loss": 6.1685, - "step": 7733 - }, - { - "epoch": 4.033376792698826, - "grad_norm": 1.5558421611785889, - "learning_rate": 9.273165829145729e-05, - "loss": 5.801, - "step": 7734 - }, - { - "epoch": 4.033898305084746, - "grad_norm": 1.416417121887207, - "learning_rate": 9.273065326633166e-05, - "loss": 5.8836, - "step": 7735 - }, - { - "epoch": 4.034419817470665, - "grad_norm": 1.4595131874084473, - "learning_rate": 9.272964824120603e-05, - "loss": 5.9976, - "step": 7736 - }, - { - "epoch": 4.034941329856584, - "grad_norm": 1.4396376609802246, - "learning_rate": 9.27286432160804e-05, - "loss": 5.7767, - "step": 7737 - }, - { - "epoch": 4.035462842242503, - "grad_norm": 1.5201600790023804, - "learning_rate": 9.272763819095478e-05, - "loss": 5.4103, - "step": 7738 - }, - { - "epoch": 4.035984354628423, - "grad_norm": 1.4653089046478271, - "learning_rate": 9.272663316582914e-05, - "loss": 5.6577, - "step": 7739 - }, - { - "epoch": 4.036505867014341, - "grad_norm": 1.4416295289993286, - "learning_rate": 9.272562814070352e-05, - "loss": 5.8298, - "step": 7740 - }, - { - "epoch": 4.037027379400261, - "grad_norm": 1.5999308824539185, - "learning_rate": 9.27246231155779e-05, - "loss": 5.5366, - "step": 7741 - }, - { - "epoch": 4.03754889178618, - "grad_norm": 1.4435508251190186, - "learning_rate": 9.272361809045227e-05, - "loss": 5.8591, - "step": 7742 - }, - { - "epoch": 4.038070404172099, - "grad_norm": 1.6125752925872803, - "learning_rate": 9.272261306532664e-05, - "loss": 5.5196, - "step": 7743 - }, - { - "epoch": 4.038591916558018, - "grad_norm": 1.7814494371414185, - "learning_rate": 9.272160804020102e-05, - "loss": 5.7281, - "step": 7744 - }, - { - "epoch": 4.039113428943938, - "grad_norm": 1.517842411994934, - "learning_rate": 9.272060301507538e-05, - "loss": 5.4989, - "step": 7745 - }, - { - "epoch": 4.039634941329856, - "grad_norm": 1.4527209997177124, - "learning_rate": 9.271959798994976e-05, - "loss": 6.0198, - "step": 7746 - }, - { - "epoch": 4.040156453715776, - "grad_norm": 1.3927206993103027, - "learning_rate": 9.271859296482412e-05, - "loss": 5.6146, - "step": 7747 - }, - { - "epoch": 4.040677966101695, - "grad_norm": 1.4075472354888916, - "learning_rate": 9.271758793969849e-05, - "loss": 5.6879, - "step": 7748 - }, - { - "epoch": 4.041199478487614, - "grad_norm": 1.5470061302185059, - "learning_rate": 9.271658291457286e-05, - "loss": 5.637, - "step": 7749 - }, - { - "epoch": 4.041720990873533, - "grad_norm": 1.4664270877838135, - "learning_rate": 9.271557788944724e-05, - "loss": 5.5054, - "step": 7750 - }, - { - "epoch": 4.042242503259453, - "grad_norm": 1.5448638200759888, - "learning_rate": 9.271457286432162e-05, - "loss": 6.0242, - "step": 7751 - }, - { - "epoch": 4.042764015645371, - "grad_norm": 1.4784748554229736, - "learning_rate": 9.271356783919598e-05, - "loss": 6.1007, - "step": 7752 - }, - { - "epoch": 4.043285528031291, - "grad_norm": 1.4604638814926147, - "learning_rate": 9.271256281407036e-05, - "loss": 5.5787, - "step": 7753 - }, - { - "epoch": 4.04380704041721, - "grad_norm": 1.4200048446655273, - "learning_rate": 9.271155778894473e-05, - "loss": 5.7969, - "step": 7754 - }, - { - "epoch": 4.044328552803129, - "grad_norm": 1.2903281450271606, - "learning_rate": 9.27105527638191e-05, - "loss": 6.1428, - "step": 7755 - }, - { - "epoch": 4.044850065189048, - "grad_norm": 2.5602188110351562, - "learning_rate": 9.270954773869347e-05, - "loss": 5.1813, - "step": 7756 - }, - { - "epoch": 4.045371577574968, - "grad_norm": 1.551783800125122, - "learning_rate": 9.270854271356785e-05, - "loss": 5.324, - "step": 7757 - }, - { - "epoch": 4.045893089960886, - "grad_norm": 1.48075532913208, - "learning_rate": 9.270753768844221e-05, - "loss": 5.7773, - "step": 7758 - }, - { - "epoch": 4.046414602346806, - "grad_norm": 1.6551659107208252, - "learning_rate": 9.270653266331659e-05, - "loss": 5.6422, - "step": 7759 - }, - { - "epoch": 4.046936114732725, - "grad_norm": 1.4410001039505005, - "learning_rate": 9.270552763819095e-05, - "loss": 5.8324, - "step": 7760 - }, - { - "epoch": 4.047457627118644, - "grad_norm": 1.372829794883728, - "learning_rate": 9.270452261306533e-05, - "loss": 5.9677, - "step": 7761 - }, - { - "epoch": 4.047979139504563, - "grad_norm": 1.5349671840667725, - "learning_rate": 9.270351758793971e-05, - "loss": 5.3899, - "step": 7762 - }, - { - "epoch": 4.048500651890483, - "grad_norm": 1.4967706203460693, - "learning_rate": 9.270251256281407e-05, - "loss": 5.989, - "step": 7763 - }, - { - "epoch": 4.049022164276401, - "grad_norm": 1.472976565361023, - "learning_rate": 9.270150753768845e-05, - "loss": 5.8665, - "step": 7764 - }, - { - "epoch": 4.049543676662321, - "grad_norm": 1.645326852798462, - "learning_rate": 9.270050251256281e-05, - "loss": 5.4161, - "step": 7765 - }, - { - "epoch": 4.05006518904824, - "grad_norm": 1.3409143686294556, - "learning_rate": 9.269949748743719e-05, - "loss": 5.9341, - "step": 7766 - }, - { - "epoch": 4.050586701434159, - "grad_norm": 1.5240248441696167, - "learning_rate": 9.269849246231156e-05, - "loss": 5.8171, - "step": 7767 - }, - { - "epoch": 4.051108213820078, - "grad_norm": 1.3809316158294678, - "learning_rate": 9.269748743718593e-05, - "loss": 5.6442, - "step": 7768 - }, - { - "epoch": 4.051629726205998, - "grad_norm": 1.505065679550171, - "learning_rate": 9.26964824120603e-05, - "loss": 5.4101, - "step": 7769 - }, - { - "epoch": 4.052151238591916, - "grad_norm": 1.3311412334442139, - "learning_rate": 9.269547738693468e-05, - "loss": 5.9654, - "step": 7770 - }, - { - "epoch": 4.052672750977836, - "grad_norm": 1.7631900310516357, - "learning_rate": 9.269447236180905e-05, - "loss": 4.6755, - "step": 7771 - }, - { - "epoch": 4.053194263363755, - "grad_norm": 1.4635214805603027, - "learning_rate": 9.269346733668343e-05, - "loss": 5.8484, - "step": 7772 - }, - { - "epoch": 4.053715775749674, - "grad_norm": 1.4534518718719482, - "learning_rate": 9.26924623115578e-05, - "loss": 5.8568, - "step": 7773 - }, - { - "epoch": 4.054237288135593, - "grad_norm": 1.387290596961975, - "learning_rate": 9.269145728643217e-05, - "loss": 5.5539, - "step": 7774 - }, - { - "epoch": 4.054758800521513, - "grad_norm": 1.5100772380828857, - "learning_rate": 9.269045226130654e-05, - "loss": 5.4018, - "step": 7775 - }, - { - "epoch": 4.055280312907431, - "grad_norm": 1.3560349941253662, - "learning_rate": 9.26894472361809e-05, - "loss": 5.7, - "step": 7776 - }, - { - "epoch": 4.055801825293351, - "grad_norm": 1.522064208984375, - "learning_rate": 9.268844221105528e-05, - "loss": 5.534, - "step": 7777 - }, - { - "epoch": 4.05632333767927, - "grad_norm": 1.3884671926498413, - "learning_rate": 9.268743718592965e-05, - "loss": 5.9232, - "step": 7778 - }, - { - "epoch": 4.056844850065189, - "grad_norm": 1.495206356048584, - "learning_rate": 9.268643216080402e-05, - "loss": 5.8095, - "step": 7779 - }, - { - "epoch": 4.057366362451108, - "grad_norm": 1.2516480684280396, - "learning_rate": 9.268542713567839e-05, - "loss": 6.1777, - "step": 7780 - }, - { - "epoch": 4.057887874837028, - "grad_norm": 1.534379482269287, - "learning_rate": 9.268442211055276e-05, - "loss": 5.7913, - "step": 7781 - }, - { - "epoch": 4.058409387222946, - "grad_norm": 1.5259021520614624, - "learning_rate": 9.268341708542714e-05, - "loss": 6.1763, - "step": 7782 - }, - { - "epoch": 4.058930899608866, - "grad_norm": 1.428620457649231, - "learning_rate": 9.268241206030152e-05, - "loss": 5.8834, - "step": 7783 - }, - { - "epoch": 4.059452411994785, - "grad_norm": 1.426031231880188, - "learning_rate": 9.268140703517588e-05, - "loss": 5.4946, - "step": 7784 - }, - { - "epoch": 4.059973924380704, - "grad_norm": 1.348958134651184, - "learning_rate": 9.268040201005026e-05, - "loss": 5.8724, - "step": 7785 - }, - { - "epoch": 4.060495436766623, - "grad_norm": 1.5968679189682007, - "learning_rate": 9.267939698492463e-05, - "loss": 5.1261, - "step": 7786 - }, - { - "epoch": 4.061016949152543, - "grad_norm": 1.514798641204834, - "learning_rate": 9.2678391959799e-05, - "loss": 5.1686, - "step": 7787 - }, - { - "epoch": 4.061538461538461, - "grad_norm": 1.512370228767395, - "learning_rate": 9.267738693467337e-05, - "loss": 5.6299, - "step": 7788 - }, - { - "epoch": 4.062059973924381, - "grad_norm": 1.5720722675323486, - "learning_rate": 9.267638190954773e-05, - "loss": 5.8612, - "step": 7789 - }, - { - "epoch": 4.0625814863103, - "grad_norm": 1.3575310707092285, - "learning_rate": 9.267537688442211e-05, - "loss": 5.5367, - "step": 7790 - }, - { - "epoch": 4.063102998696219, - "grad_norm": 1.6446588039398193, - "learning_rate": 9.267437185929649e-05, - "loss": 5.6676, - "step": 7791 - }, - { - "epoch": 4.063624511082138, - "grad_norm": 1.4908177852630615, - "learning_rate": 9.267336683417087e-05, - "loss": 5.5288, - "step": 7792 - }, - { - "epoch": 4.064146023468058, - "grad_norm": 1.4198685884475708, - "learning_rate": 9.267236180904523e-05, - "loss": 6.0277, - "step": 7793 - }, - { - "epoch": 4.064667535853976, - "grad_norm": 1.4246426820755005, - "learning_rate": 9.267135678391961e-05, - "loss": 6.0894, - "step": 7794 - }, - { - "epoch": 4.065189048239896, - "grad_norm": 1.3967424631118774, - "learning_rate": 9.267035175879397e-05, - "loss": 5.9353, - "step": 7795 - }, - { - "epoch": 4.065710560625815, - "grad_norm": 1.4990264177322388, - "learning_rate": 9.266934673366835e-05, - "loss": 5.5904, - "step": 7796 - }, - { - "epoch": 4.066232073011734, - "grad_norm": 1.6803250312805176, - "learning_rate": 9.266834170854272e-05, - "loss": 5.3202, - "step": 7797 - }, - { - "epoch": 4.066753585397653, - "grad_norm": 1.5388721227645874, - "learning_rate": 9.266733668341709e-05, - "loss": 5.2679, - "step": 7798 - }, - { - "epoch": 4.067275097783573, - "grad_norm": 1.4957226514816284, - "learning_rate": 9.266633165829146e-05, - "loss": 5.7429, - "step": 7799 - }, - { - "epoch": 4.067796610169491, - "grad_norm": 1.5074998140335083, - "learning_rate": 9.266532663316584e-05, - "loss": 5.2797, - "step": 7800 - }, - { - "epoch": 4.068318122555411, - "grad_norm": 1.4297837018966675, - "learning_rate": 9.26643216080402e-05, - "loss": 6.0692, - "step": 7801 - }, - { - "epoch": 4.06883963494133, - "grad_norm": 1.4485712051391602, - "learning_rate": 9.266331658291458e-05, - "loss": 5.9786, - "step": 7802 - }, - { - "epoch": 4.069361147327249, - "grad_norm": 1.522100567817688, - "learning_rate": 9.266231155778896e-05, - "loss": 5.9051, - "step": 7803 - }, - { - "epoch": 4.069882659713168, - "grad_norm": 1.452163815498352, - "learning_rate": 9.266130653266332e-05, - "loss": 5.6852, - "step": 7804 - }, - { - "epoch": 4.070404172099088, - "grad_norm": 1.3876086473464966, - "learning_rate": 9.26603015075377e-05, - "loss": 5.5676, - "step": 7805 - }, - { - "epoch": 4.070925684485006, - "grad_norm": 1.4466336965560913, - "learning_rate": 9.265929648241206e-05, - "loss": 5.7301, - "step": 7806 - }, - { - "epoch": 4.071447196870926, - "grad_norm": 1.5619091987609863, - "learning_rate": 9.265829145728644e-05, - "loss": 5.4527, - "step": 7807 - }, - { - "epoch": 4.071968709256845, - "grad_norm": 1.4330257177352905, - "learning_rate": 9.26572864321608e-05, - "loss": 5.9768, - "step": 7808 - }, - { - "epoch": 4.072490221642764, - "grad_norm": 1.467200517654419, - "learning_rate": 9.265628140703518e-05, - "loss": 5.9427, - "step": 7809 - }, - { - "epoch": 4.073011734028683, - "grad_norm": 1.4852330684661865, - "learning_rate": 9.265527638190955e-05, - "loss": 5.6571, - "step": 7810 - }, - { - "epoch": 4.073533246414602, - "grad_norm": 1.4207968711853027, - "learning_rate": 9.265427135678392e-05, - "loss": 5.8103, - "step": 7811 - }, - { - "epoch": 4.074054758800521, - "grad_norm": 1.3855139017105103, - "learning_rate": 9.26532663316583e-05, - "loss": 5.9616, - "step": 7812 - }, - { - "epoch": 4.074576271186441, - "grad_norm": 1.5995575189590454, - "learning_rate": 9.265226130653268e-05, - "loss": 5.2768, - "step": 7813 - }, - { - "epoch": 4.075097783572359, - "grad_norm": 1.411941647529602, - "learning_rate": 9.265125628140704e-05, - "loss": 5.7762, - "step": 7814 - }, - { - "epoch": 4.075619295958279, - "grad_norm": 1.6782230138778687, - "learning_rate": 9.265025125628141e-05, - "loss": 5.3224, - "step": 7815 - }, - { - "epoch": 4.076140808344198, - "grad_norm": 1.4913586378097534, - "learning_rate": 9.264924623115579e-05, - "loss": 5.5944, - "step": 7816 - }, - { - "epoch": 4.076662320730117, - "grad_norm": 1.3571511507034302, - "learning_rate": 9.264824120603015e-05, - "loss": 5.5187, - "step": 7817 - }, - { - "epoch": 4.077183833116036, - "grad_norm": 1.529752492904663, - "learning_rate": 9.264723618090453e-05, - "loss": 5.3579, - "step": 7818 - }, - { - "epoch": 4.077705345501956, - "grad_norm": 1.3558462858200073, - "learning_rate": 9.264623115577889e-05, - "loss": 5.8429, - "step": 7819 - }, - { - "epoch": 4.078226857887874, - "grad_norm": 1.4224931001663208, - "learning_rate": 9.264522613065327e-05, - "loss": 5.6495, - "step": 7820 - }, - { - "epoch": 4.078748370273794, - "grad_norm": 1.3902943134307861, - "learning_rate": 9.264422110552763e-05, - "loss": 5.5789, - "step": 7821 - }, - { - "epoch": 4.079269882659713, - "grad_norm": 1.378652572631836, - "learning_rate": 9.264321608040201e-05, - "loss": 5.879, - "step": 7822 - }, - { - "epoch": 4.079791395045632, - "grad_norm": 1.3300321102142334, - "learning_rate": 9.264221105527639e-05, - "loss": 6.2782, - "step": 7823 - }, - { - "epoch": 4.080312907431551, - "grad_norm": 1.613664984703064, - "learning_rate": 9.264120603015077e-05, - "loss": 5.3662, - "step": 7824 - }, - { - "epoch": 4.080834419817471, - "grad_norm": 1.6215964555740356, - "learning_rate": 9.264020100502513e-05, - "loss": 5.6516, - "step": 7825 - }, - { - "epoch": 4.081355932203389, - "grad_norm": 1.4671005010604858, - "learning_rate": 9.263919597989951e-05, - "loss": 5.9505, - "step": 7826 - }, - { - "epoch": 4.081877444589309, - "grad_norm": 1.51253342628479, - "learning_rate": 9.263819095477387e-05, - "loss": 6.139, - "step": 7827 - }, - { - "epoch": 4.082398956975228, - "grad_norm": 1.5839118957519531, - "learning_rate": 9.263718592964824e-05, - "loss": 5.4195, - "step": 7828 - }, - { - "epoch": 4.082920469361147, - "grad_norm": 1.4242699146270752, - "learning_rate": 9.263618090452262e-05, - "loss": 5.6728, - "step": 7829 - }, - { - "epoch": 4.083441981747066, - "grad_norm": 1.3379266262054443, - "learning_rate": 9.263517587939698e-05, - "loss": 5.9512, - "step": 7830 - }, - { - "epoch": 4.083963494132986, - "grad_norm": 1.6825493574142456, - "learning_rate": 9.263417085427136e-05, - "loss": 5.5446, - "step": 7831 - }, - { - "epoch": 4.0844850065189044, - "grad_norm": 1.4088844060897827, - "learning_rate": 9.263316582914574e-05, - "loss": 6.1129, - "step": 7832 - }, - { - "epoch": 4.085006518904824, - "grad_norm": 1.4431238174438477, - "learning_rate": 9.263216080402011e-05, - "loss": 5.9501, - "step": 7833 - }, - { - "epoch": 4.085528031290743, - "grad_norm": 1.501042366027832, - "learning_rate": 9.263115577889448e-05, - "loss": 5.7203, - "step": 7834 - }, - { - "epoch": 4.086049543676662, - "grad_norm": 1.5105688571929932, - "learning_rate": 9.263015075376886e-05, - "loss": 5.2921, - "step": 7835 - }, - { - "epoch": 4.086571056062581, - "grad_norm": 1.484744906425476, - "learning_rate": 9.262914572864322e-05, - "loss": 5.3079, - "step": 7836 - }, - { - "epoch": 4.087092568448501, - "grad_norm": 1.466486930847168, - "learning_rate": 9.26281407035176e-05, - "loss": 5.6513, - "step": 7837 - }, - { - "epoch": 4.0876140808344195, - "grad_norm": 1.4407843351364136, - "learning_rate": 9.262713567839196e-05, - "loss": 5.8353, - "step": 7838 - }, - { - "epoch": 4.088135593220339, - "grad_norm": 1.3655436038970947, - "learning_rate": 9.262613065326634e-05, - "loss": 5.5426, - "step": 7839 - }, - { - "epoch": 4.088657105606258, - "grad_norm": 1.4706255197525024, - "learning_rate": 9.26251256281407e-05, - "loss": 5.5358, - "step": 7840 - }, - { - "epoch": 4.089178617992177, - "grad_norm": 1.5802680253982544, - "learning_rate": 9.262412060301507e-05, - "loss": 5.6086, - "step": 7841 - }, - { - "epoch": 4.089700130378096, - "grad_norm": 1.4216445684432983, - "learning_rate": 9.262311557788945e-05, - "loss": 5.7229, - "step": 7842 - }, - { - "epoch": 4.090221642764016, - "grad_norm": 1.4805076122283936, - "learning_rate": 9.262211055276382e-05, - "loss": 5.7629, - "step": 7843 - }, - { - "epoch": 4.0907431551499345, - "grad_norm": 1.5099854469299316, - "learning_rate": 9.26211055276382e-05, - "loss": 5.8572, - "step": 7844 - }, - { - "epoch": 4.091264667535854, - "grad_norm": 1.5518715381622314, - "learning_rate": 9.262010050251257e-05, - "loss": 5.8294, - "step": 7845 - }, - { - "epoch": 4.091786179921773, - "grad_norm": 1.448813557624817, - "learning_rate": 9.261909547738694e-05, - "loss": 5.8629, - "step": 7846 - }, - { - "epoch": 4.092307692307692, - "grad_norm": 1.4683127403259277, - "learning_rate": 9.261809045226131e-05, - "loss": 5.9436, - "step": 7847 - }, - { - "epoch": 4.0928292046936114, - "grad_norm": 1.5080376863479614, - "learning_rate": 9.261708542713569e-05, - "loss": 5.6822, - "step": 7848 - }, - { - "epoch": 4.093350717079531, - "grad_norm": 1.5682848691940308, - "learning_rate": 9.261608040201005e-05, - "loss": 5.3844, - "step": 7849 - }, - { - "epoch": 4.0938722294654495, - "grad_norm": 1.3002808094024658, - "learning_rate": 9.261507537688443e-05, - "loss": 5.9643, - "step": 7850 - }, - { - "epoch": 4.094393741851369, - "grad_norm": 1.480974793434143, - "learning_rate": 9.261407035175879e-05, - "loss": 5.379, - "step": 7851 - }, - { - "epoch": 4.094915254237288, - "grad_norm": 1.5606924295425415, - "learning_rate": 9.261306532663317e-05, - "loss": 6.0206, - "step": 7852 - }, - { - "epoch": 4.095436766623207, - "grad_norm": 1.7071213722229004, - "learning_rate": 9.261206030150753e-05, - "loss": 6.0218, - "step": 7853 - }, - { - "epoch": 4.0959582790091265, - "grad_norm": 1.4019232988357544, - "learning_rate": 9.261105527638191e-05, - "loss": 5.3954, - "step": 7854 - }, - { - "epoch": 4.096479791395046, - "grad_norm": 1.5293500423431396, - "learning_rate": 9.261005025125629e-05, - "loss": 5.7789, - "step": 7855 - }, - { - "epoch": 4.0970013037809645, - "grad_norm": 1.4657803773880005, - "learning_rate": 9.260904522613065e-05, - "loss": 5.5836, - "step": 7856 - }, - { - "epoch": 4.097522816166884, - "grad_norm": 1.630781888961792, - "learning_rate": 9.260804020100503e-05, - "loss": 5.8042, - "step": 7857 - }, - { - "epoch": 4.098044328552803, - "grad_norm": 1.4896132946014404, - "learning_rate": 9.26070351758794e-05, - "loss": 4.7895, - "step": 7858 - }, - { - "epoch": 4.098565840938722, - "grad_norm": 1.4963663816452026, - "learning_rate": 9.260603015075377e-05, - "loss": 5.6306, - "step": 7859 - }, - { - "epoch": 4.0990873533246415, - "grad_norm": 1.4638421535491943, - "learning_rate": 9.260502512562814e-05, - "loss": 5.8229, - "step": 7860 - }, - { - "epoch": 4.099608865710561, - "grad_norm": 1.4387476444244385, - "learning_rate": 9.260402010050252e-05, - "loss": 5.3654, - "step": 7861 - }, - { - "epoch": 4.1001303780964795, - "grad_norm": 1.4353965520858765, - "learning_rate": 9.260301507537688e-05, - "loss": 5.1393, - "step": 7862 - }, - { - "epoch": 4.100651890482399, - "grad_norm": 1.6442482471466064, - "learning_rate": 9.260201005025126e-05, - "loss": 4.5064, - "step": 7863 - }, - { - "epoch": 4.101173402868318, - "grad_norm": 1.5158424377441406, - "learning_rate": 9.260100502512564e-05, - "loss": 5.8922, - "step": 7864 - }, - { - "epoch": 4.101694915254237, - "grad_norm": 1.569647192955017, - "learning_rate": 9.260000000000001e-05, - "loss": 5.7434, - "step": 7865 - }, - { - "epoch": 4.1022164276401565, - "grad_norm": 1.4959419965744019, - "learning_rate": 9.259899497487438e-05, - "loss": 5.4061, - "step": 7866 - }, - { - "epoch": 4.102737940026076, - "grad_norm": 1.466566562652588, - "learning_rate": 9.259798994974876e-05, - "loss": 5.6213, - "step": 7867 - }, - { - "epoch": 4.1032594524119945, - "grad_norm": 1.5008877515792847, - "learning_rate": 9.259698492462312e-05, - "loss": 5.4114, - "step": 7868 - }, - { - "epoch": 4.103780964797914, - "grad_norm": 1.4613921642303467, - "learning_rate": 9.259597989949749e-05, - "loss": 5.6441, - "step": 7869 - }, - { - "epoch": 4.1043024771838335, - "grad_norm": 1.4618943929672241, - "learning_rate": 9.259497487437186e-05, - "loss": 5.6818, - "step": 7870 - }, - { - "epoch": 4.104823989569752, - "grad_norm": 1.4624210596084595, - "learning_rate": 9.259396984924623e-05, - "loss": 5.7114, - "step": 7871 - }, - { - "epoch": 4.1053455019556715, - "grad_norm": 1.4972069263458252, - "learning_rate": 9.25929648241206e-05, - "loss": 5.5877, - "step": 7872 - }, - { - "epoch": 4.105867014341591, - "grad_norm": 1.4567420482635498, - "learning_rate": 9.259195979899497e-05, - "loss": 6.0044, - "step": 7873 - }, - { - "epoch": 4.1063885267275095, - "grad_norm": 1.4059394598007202, - "learning_rate": 9.259095477386935e-05, - "loss": 5.9232, - "step": 7874 - }, - { - "epoch": 4.106910039113429, - "grad_norm": 1.5342375040054321, - "learning_rate": 9.258994974874373e-05, - "loss": 4.8226, - "step": 7875 - }, - { - "epoch": 4.1074315514993485, - "grad_norm": 1.4270868301391602, - "learning_rate": 9.25889447236181e-05, - "loss": 5.627, - "step": 7876 - }, - { - "epoch": 4.107953063885267, - "grad_norm": 1.4449658393859863, - "learning_rate": 9.258793969849247e-05, - "loss": 5.6322, - "step": 7877 - }, - { - "epoch": 4.1084745762711865, - "grad_norm": 1.4410638809204102, - "learning_rate": 9.258693467336685e-05, - "loss": 5.9928, - "step": 7878 - }, - { - "epoch": 4.108996088657106, - "grad_norm": 1.3757808208465576, - "learning_rate": 9.258592964824121e-05, - "loss": 5.8552, - "step": 7879 - }, - { - "epoch": 4.1095176010430245, - "grad_norm": 1.4711267948150635, - "learning_rate": 9.258492462311559e-05, - "loss": 5.2836, - "step": 7880 - }, - { - "epoch": 4.110039113428944, - "grad_norm": 1.507796049118042, - "learning_rate": 9.258391959798995e-05, - "loss": 5.7032, - "step": 7881 - }, - { - "epoch": 4.1105606258148635, - "grad_norm": 1.633172631263733, - "learning_rate": 9.258291457286432e-05, - "loss": 5.9098, - "step": 7882 - }, - { - "epoch": 4.111082138200782, - "grad_norm": 1.473927617073059, - "learning_rate": 9.25819095477387e-05, - "loss": 5.733, - "step": 7883 - }, - { - "epoch": 4.1116036505867015, - "grad_norm": 1.3980087041854858, - "learning_rate": 9.258090452261307e-05, - "loss": 5.7791, - "step": 7884 - }, - { - "epoch": 4.112125162972621, - "grad_norm": 1.584582805633545, - "learning_rate": 9.257989949748745e-05, - "loss": 5.4682, - "step": 7885 - }, - { - "epoch": 4.11264667535854, - "grad_norm": 1.5834003686904907, - "learning_rate": 9.257889447236181e-05, - "loss": 5.6921, - "step": 7886 - }, - { - "epoch": 4.113168187744459, - "grad_norm": 1.579816460609436, - "learning_rate": 9.257788944723619e-05, - "loss": 5.8401, - "step": 7887 - }, - { - "epoch": 4.1136897001303785, - "grad_norm": 1.3022929430007935, - "learning_rate": 9.257688442211056e-05, - "loss": 5.2846, - "step": 7888 - }, - { - "epoch": 4.114211212516297, - "grad_norm": 1.5924038887023926, - "learning_rate": 9.257587939698493e-05, - "loss": 5.3418, - "step": 7889 - }, - { - "epoch": 4.1147327249022165, - "grad_norm": 1.6345165967941284, - "learning_rate": 9.25748743718593e-05, - "loss": 5.3985, - "step": 7890 - }, - { - "epoch": 4.115254237288136, - "grad_norm": 1.5700654983520508, - "learning_rate": 9.257386934673368e-05, - "loss": 5.7724, - "step": 7891 - }, - { - "epoch": 4.115775749674055, - "grad_norm": 1.3302252292633057, - "learning_rate": 9.257286432160804e-05, - "loss": 5.8437, - "step": 7892 - }, - { - "epoch": 4.116297262059974, - "grad_norm": 1.578761100769043, - "learning_rate": 9.257185929648242e-05, - "loss": 5.3953, - "step": 7893 - }, - { - "epoch": 4.1168187744458935, - "grad_norm": 1.4199178218841553, - "learning_rate": 9.257085427135678e-05, - "loss": 5.8331, - "step": 7894 - }, - { - "epoch": 4.117340286831812, - "grad_norm": 1.4709476232528687, - "learning_rate": 9.256984924623116e-05, - "loss": 5.947, - "step": 7895 - }, - { - "epoch": 4.1178617992177315, - "grad_norm": 1.4902665615081787, - "learning_rate": 9.256884422110554e-05, - "loss": 5.5397, - "step": 7896 - }, - { - "epoch": 4.118383311603651, - "grad_norm": 1.3731474876403809, - "learning_rate": 9.25678391959799e-05, - "loss": 5.9759, - "step": 7897 - }, - { - "epoch": 4.11890482398957, - "grad_norm": 1.4054491519927979, - "learning_rate": 9.256683417085428e-05, - "loss": 5.5073, - "step": 7898 - }, - { - "epoch": 4.119426336375489, - "grad_norm": 1.5316388607025146, - "learning_rate": 9.256582914572864e-05, - "loss": 5.6306, - "step": 7899 - }, - { - "epoch": 4.1199478487614085, - "grad_norm": 1.4608274698257446, - "learning_rate": 9.256482412060302e-05, - "loss": 5.7544, - "step": 7900 - }, - { - "epoch": 4.120469361147327, - "grad_norm": 1.5278129577636719, - "learning_rate": 9.256381909547739e-05, - "loss": 5.2985, - "step": 7901 - }, - { - "epoch": 4.120990873533247, - "grad_norm": 1.323892593383789, - "learning_rate": 9.256281407035176e-05, - "loss": 6.077, - "step": 7902 - }, - { - "epoch": 4.121512385919166, - "grad_norm": 1.436211109161377, - "learning_rate": 9.256180904522613e-05, - "loss": 5.9673, - "step": 7903 - }, - { - "epoch": 4.122033898305085, - "grad_norm": 1.5019803047180176, - "learning_rate": 9.25608040201005e-05, - "loss": 5.4464, - "step": 7904 - }, - { - "epoch": 4.122555410691004, - "grad_norm": 1.529310941696167, - "learning_rate": 9.255979899497488e-05, - "loss": 5.8382, - "step": 7905 - }, - { - "epoch": 4.123076923076923, - "grad_norm": 1.551797866821289, - "learning_rate": 9.255879396984926e-05, - "loss": 5.8559, - "step": 7906 - }, - { - "epoch": 4.123598435462842, - "grad_norm": 1.4375746250152588, - "learning_rate": 9.255778894472363e-05, - "loss": 5.558, - "step": 7907 - }, - { - "epoch": 4.124119947848762, - "grad_norm": 1.5525685548782349, - "learning_rate": 9.255678391959799e-05, - "loss": 5.3522, - "step": 7908 - }, - { - "epoch": 4.12464146023468, - "grad_norm": 1.4690592288970947, - "learning_rate": 9.255577889447237e-05, - "loss": 5.5808, - "step": 7909 - }, - { - "epoch": 4.1251629726206, - "grad_norm": 1.3985209465026855, - "learning_rate": 9.255477386934673e-05, - "loss": 5.4361, - "step": 7910 - }, - { - "epoch": 4.125684485006519, - "grad_norm": 1.4502190351486206, - "learning_rate": 9.255376884422111e-05, - "loss": 6.0109, - "step": 7911 - }, - { - "epoch": 4.126205997392438, - "grad_norm": 1.4844197034835815, - "learning_rate": 9.255276381909547e-05, - "loss": 5.6406, - "step": 7912 - }, - { - "epoch": 4.126727509778357, - "grad_norm": 1.3644835948944092, - "learning_rate": 9.255175879396985e-05, - "loss": 6.0641, - "step": 7913 - }, - { - "epoch": 4.127249022164277, - "grad_norm": 1.5031625032424927, - "learning_rate": 9.255075376884422e-05, - "loss": 5.1412, - "step": 7914 - }, - { - "epoch": 4.127770534550195, - "grad_norm": 1.5173677206039429, - "learning_rate": 9.25497487437186e-05, - "loss": 5.8119, - "step": 7915 - }, - { - "epoch": 4.128292046936115, - "grad_norm": 1.4309849739074707, - "learning_rate": 9.254874371859297e-05, - "loss": 5.8947, - "step": 7916 - }, - { - "epoch": 4.128813559322034, - "grad_norm": 1.5204359292984009, - "learning_rate": 9.254773869346735e-05, - "loss": 5.3283, - "step": 7917 - }, - { - "epoch": 4.129335071707953, - "grad_norm": 1.494659185409546, - "learning_rate": 9.254673366834171e-05, - "loss": 5.0787, - "step": 7918 - }, - { - "epoch": 4.129856584093872, - "grad_norm": 1.6365699768066406, - "learning_rate": 9.254572864321609e-05, - "loss": 5.115, - "step": 7919 - }, - { - "epoch": 4.130378096479792, - "grad_norm": 1.482642650604248, - "learning_rate": 9.254472361809046e-05, - "loss": 5.1921, - "step": 7920 - }, - { - "epoch": 4.13089960886571, - "grad_norm": 1.5191080570220947, - "learning_rate": 9.254371859296482e-05, - "loss": 5.9552, - "step": 7921 - }, - { - "epoch": 4.13142112125163, - "grad_norm": 1.465709924697876, - "learning_rate": 9.25427135678392e-05, - "loss": 5.9255, - "step": 7922 - }, - { - "epoch": 4.131942633637549, - "grad_norm": 1.558511734008789, - "learning_rate": 9.254170854271356e-05, - "loss": 5.9157, - "step": 7923 - }, - { - "epoch": 4.132464146023468, - "grad_norm": 1.5937254428863525, - "learning_rate": 9.254070351758794e-05, - "loss": 5.5093, - "step": 7924 - }, - { - "epoch": 4.132985658409387, - "grad_norm": 1.5068422555923462, - "learning_rate": 9.253969849246232e-05, - "loss": 5.5133, - "step": 7925 - }, - { - "epoch": 4.133507170795307, - "grad_norm": 1.5828957557678223, - "learning_rate": 9.25386934673367e-05, - "loss": 5.9056, - "step": 7926 - }, - { - "epoch": 4.134028683181225, - "grad_norm": 1.4433095455169678, - "learning_rate": 9.253768844221106e-05, - "loss": 5.2279, - "step": 7927 - }, - { - "epoch": 4.134550195567145, - "grad_norm": 1.802423119544983, - "learning_rate": 9.253668341708544e-05, - "loss": 5.5105, - "step": 7928 - }, - { - "epoch": 4.135071707953064, - "grad_norm": 1.664573073387146, - "learning_rate": 9.25356783919598e-05, - "loss": 5.4618, - "step": 7929 - }, - { - "epoch": 4.135593220338983, - "grad_norm": 1.4259589910507202, - "learning_rate": 9.253467336683418e-05, - "loss": 5.81, - "step": 7930 - }, - { - "epoch": 4.136114732724902, - "grad_norm": 1.652590274810791, - "learning_rate": 9.253366834170854e-05, - "loss": 5.2233, - "step": 7931 - }, - { - "epoch": 4.136636245110822, - "grad_norm": 1.51335608959198, - "learning_rate": 9.253266331658292e-05, - "loss": 6.0791, - "step": 7932 - }, - { - "epoch": 4.13715775749674, - "grad_norm": 1.384605050086975, - "learning_rate": 9.253165829145729e-05, - "loss": 6.1452, - "step": 7933 - }, - { - "epoch": 4.13767926988266, - "grad_norm": 1.392144799232483, - "learning_rate": 9.253065326633165e-05, - "loss": 5.93, - "step": 7934 - }, - { - "epoch": 4.138200782268579, - "grad_norm": 1.40146005153656, - "learning_rate": 9.252964824120603e-05, - "loss": 5.8465, - "step": 7935 - }, - { - "epoch": 4.138722294654498, - "grad_norm": 1.5170398950576782, - "learning_rate": 9.25286432160804e-05, - "loss": 5.6845, - "step": 7936 - }, - { - "epoch": 4.139243807040417, - "grad_norm": 1.6018669605255127, - "learning_rate": 9.252763819095478e-05, - "loss": 5.7977, - "step": 7937 - }, - { - "epoch": 4.139765319426337, - "grad_norm": 1.4677003622055054, - "learning_rate": 9.252663316582915e-05, - "loss": 5.8254, - "step": 7938 - }, - { - "epoch": 4.140286831812255, - "grad_norm": 1.37051260471344, - "learning_rate": 9.252562814070353e-05, - "loss": 5.9204, - "step": 7939 - }, - { - "epoch": 4.140808344198175, - "grad_norm": 1.4085888862609863, - "learning_rate": 9.252462311557789e-05, - "loss": 5.9496, - "step": 7940 - }, - { - "epoch": 4.141329856584094, - "grad_norm": 1.4396485090255737, - "learning_rate": 9.252361809045227e-05, - "loss": 5.2377, - "step": 7941 - }, - { - "epoch": 4.141851368970013, - "grad_norm": 1.4618613719940186, - "learning_rate": 9.252261306532663e-05, - "loss": 5.6297, - "step": 7942 - }, - { - "epoch": 4.142372881355932, - "grad_norm": 1.4706356525421143, - "learning_rate": 9.252160804020101e-05, - "loss": 6.1027, - "step": 7943 - }, - { - "epoch": 4.142894393741852, - "grad_norm": 1.4448494911193848, - "learning_rate": 9.252060301507538e-05, - "loss": 5.1612, - "step": 7944 - }, - { - "epoch": 4.14341590612777, - "grad_norm": 1.3651525974273682, - "learning_rate": 9.251959798994975e-05, - "loss": 5.8663, - "step": 7945 - }, - { - "epoch": 4.14393741851369, - "grad_norm": 1.5383317470550537, - "learning_rate": 9.251859296482413e-05, - "loss": 5.5437, - "step": 7946 - }, - { - "epoch": 4.144458930899609, - "grad_norm": 1.5273597240447998, - "learning_rate": 9.251758793969851e-05, - "loss": 5.4389, - "step": 7947 - }, - { - "epoch": 4.144980443285528, - "grad_norm": 1.3910081386566162, - "learning_rate": 9.251658291457287e-05, - "loss": 5.9814, - "step": 7948 - }, - { - "epoch": 4.145501955671447, - "grad_norm": 1.5585614442825317, - "learning_rate": 9.251557788944724e-05, - "loss": 5.5628, - "step": 7949 - }, - { - "epoch": 4.146023468057367, - "grad_norm": 1.479105830192566, - "learning_rate": 9.251457286432162e-05, - "loss": 5.8745, - "step": 7950 - }, - { - "epoch": 4.146544980443285, - "grad_norm": 1.5561717748641968, - "learning_rate": 9.251356783919598e-05, - "loss": 5.1674, - "step": 7951 - }, - { - "epoch": 4.147066492829205, - "grad_norm": 1.5374099016189575, - "learning_rate": 9.251256281407036e-05, - "loss": 5.4857, - "step": 7952 - }, - { - "epoch": 4.147588005215124, - "grad_norm": 1.5257409811019897, - "learning_rate": 9.251155778894472e-05, - "loss": 5.5494, - "step": 7953 - }, - { - "epoch": 4.148109517601043, - "grad_norm": 1.6378060579299927, - "learning_rate": 9.25105527638191e-05, - "loss": 5.6747, - "step": 7954 - }, - { - "epoch": 4.148631029986962, - "grad_norm": 1.3717561960220337, - "learning_rate": 9.250954773869346e-05, - "loss": 5.891, - "step": 7955 - }, - { - "epoch": 4.149152542372882, - "grad_norm": 1.407088279724121, - "learning_rate": 9.250854271356784e-05, - "loss": 5.881, - "step": 7956 - }, - { - "epoch": 4.1496740547588, - "grad_norm": 1.562442421913147, - "learning_rate": 9.250753768844222e-05, - "loss": 5.8994, - "step": 7957 - }, - { - "epoch": 4.15019556714472, - "grad_norm": 1.3944790363311768, - "learning_rate": 9.25065326633166e-05, - "loss": 5.7611, - "step": 7958 - }, - { - "epoch": 4.150717079530639, - "grad_norm": 1.3896006345748901, - "learning_rate": 9.250552763819096e-05, - "loss": 6.0281, - "step": 7959 - }, - { - "epoch": 4.151238591916558, - "grad_norm": 1.4281668663024902, - "learning_rate": 9.250452261306534e-05, - "loss": 5.382, - "step": 7960 - }, - { - "epoch": 4.151760104302477, - "grad_norm": 1.5073614120483398, - "learning_rate": 9.25035175879397e-05, - "loss": 5.5142, - "step": 7961 - }, - { - "epoch": 4.152281616688397, - "grad_norm": 1.5959811210632324, - "learning_rate": 9.250251256281407e-05, - "loss": 4.9596, - "step": 7962 - }, - { - "epoch": 4.152803129074315, - "grad_norm": 1.3472485542297363, - "learning_rate": 9.250150753768845e-05, - "loss": 5.7849, - "step": 7963 - }, - { - "epoch": 4.153324641460235, - "grad_norm": 1.5522563457489014, - "learning_rate": 9.250050251256281e-05, - "loss": 5.6841, - "step": 7964 - }, - { - "epoch": 4.153846153846154, - "grad_norm": 1.5052525997161865, - "learning_rate": 9.249949748743719e-05, - "loss": 5.6975, - "step": 7965 - }, - { - "epoch": 4.154367666232073, - "grad_norm": 1.6255697011947632, - "learning_rate": 9.249849246231157e-05, - "loss": 5.8057, - "step": 7966 - }, - { - "epoch": 4.154889178617992, - "grad_norm": 1.364441156387329, - "learning_rate": 9.249748743718594e-05, - "loss": 5.9097, - "step": 7967 - }, - { - "epoch": 4.155410691003912, - "grad_norm": 1.4942996501922607, - "learning_rate": 9.249648241206031e-05, - "loss": 5.0428, - "step": 7968 - }, - { - "epoch": 4.15593220338983, - "grad_norm": 1.4311703443527222, - "learning_rate": 9.249547738693469e-05, - "loss": 5.575, - "step": 7969 - }, - { - "epoch": 4.15645371577575, - "grad_norm": 1.424970269203186, - "learning_rate": 9.249447236180905e-05, - "loss": 5.6306, - "step": 7970 - }, - { - "epoch": 4.156975228161669, - "grad_norm": 1.4862653017044067, - "learning_rate": 9.249346733668343e-05, - "loss": 5.15, - "step": 7971 - }, - { - "epoch": 4.157496740547588, - "grad_norm": 1.4854646921157837, - "learning_rate": 9.249246231155779e-05, - "loss": 5.7152, - "step": 7972 - }, - { - "epoch": 4.158018252933507, - "grad_norm": 1.49619722366333, - "learning_rate": 9.249145728643217e-05, - "loss": 5.9255, - "step": 7973 - }, - { - "epoch": 4.158539765319427, - "grad_norm": 1.3058419227600098, - "learning_rate": 9.249045226130653e-05, - "loss": 5.7663, - "step": 7974 - }, - { - "epoch": 4.159061277705345, - "grad_norm": 1.4758565425872803, - "learning_rate": 9.24894472361809e-05, - "loss": 5.8576, - "step": 7975 - }, - { - "epoch": 4.159582790091265, - "grad_norm": 1.5442945957183838, - "learning_rate": 9.248844221105528e-05, - "loss": 5.1784, - "step": 7976 - }, - { - "epoch": 4.160104302477184, - "grad_norm": 1.3577169179916382, - "learning_rate": 9.248743718592965e-05, - "loss": 5.7748, - "step": 7977 - }, - { - "epoch": 4.160625814863103, - "grad_norm": 1.4710315465927124, - "learning_rate": 9.248643216080403e-05, - "loss": 5.5217, - "step": 7978 - }, - { - "epoch": 4.161147327249022, - "grad_norm": 1.5290608406066895, - "learning_rate": 9.24854271356784e-05, - "loss": 5.7468, - "step": 7979 - }, - { - "epoch": 4.161668839634942, - "grad_norm": 1.4326730966567993, - "learning_rate": 9.248442211055277e-05, - "loss": 5.68, - "step": 7980 - }, - { - "epoch": 4.16219035202086, - "grad_norm": 1.4315330982208252, - "learning_rate": 9.248341708542714e-05, - "loss": 5.2461, - "step": 7981 - }, - { - "epoch": 4.16271186440678, - "grad_norm": 1.4383541345596313, - "learning_rate": 9.248241206030152e-05, - "loss": 5.766, - "step": 7982 - }, - { - "epoch": 4.163233376792699, - "grad_norm": 1.3988003730773926, - "learning_rate": 9.248140703517588e-05, - "loss": 5.7453, - "step": 7983 - }, - { - "epoch": 4.163754889178618, - "grad_norm": 1.5318245887756348, - "learning_rate": 9.248040201005026e-05, - "loss": 5.4455, - "step": 7984 - }, - { - "epoch": 4.164276401564537, - "grad_norm": 1.4302725791931152, - "learning_rate": 9.247939698492462e-05, - "loss": 5.8479, - "step": 7985 - }, - { - "epoch": 4.164797913950457, - "grad_norm": 1.4330464601516724, - "learning_rate": 9.2478391959799e-05, - "loss": 5.6143, - "step": 7986 - }, - { - "epoch": 4.165319426336375, - "grad_norm": 1.5396825075149536, - "learning_rate": 9.247738693467338e-05, - "loss": 5.7541, - "step": 7987 - }, - { - "epoch": 4.165840938722295, - "grad_norm": 1.403669834136963, - "learning_rate": 9.247638190954774e-05, - "loss": 5.8228, - "step": 7988 - }, - { - "epoch": 4.166362451108214, - "grad_norm": 1.4943293333053589, - "learning_rate": 9.247537688442212e-05, - "loss": 5.5104, - "step": 7989 - }, - { - "epoch": 4.166883963494133, - "grad_norm": 1.4229488372802734, - "learning_rate": 9.247437185929648e-05, - "loss": 5.8853, - "step": 7990 - }, - { - "epoch": 4.167405475880052, - "grad_norm": 1.4458130598068237, - "learning_rate": 9.247336683417086e-05, - "loss": 5.7799, - "step": 7991 - }, - { - "epoch": 4.167926988265972, - "grad_norm": 1.358916997909546, - "learning_rate": 9.247236180904523e-05, - "loss": 5.8871, - "step": 7992 - }, - { - "epoch": 4.16844850065189, - "grad_norm": 1.5322297811508179, - "learning_rate": 9.24713567839196e-05, - "loss": 5.6711, - "step": 7993 - }, - { - "epoch": 4.16897001303781, - "grad_norm": 1.5065218210220337, - "learning_rate": 9.247035175879397e-05, - "loss": 5.3637, - "step": 7994 - }, - { - "epoch": 4.169491525423728, - "grad_norm": 1.64816153049469, - "learning_rate": 9.246934673366835e-05, - "loss": 5.7693, - "step": 7995 - }, - { - "epoch": 4.170013037809648, - "grad_norm": 1.508472204208374, - "learning_rate": 9.246834170854271e-05, - "loss": 5.9295, - "step": 7996 - }, - { - "epoch": 4.170534550195567, - "grad_norm": 1.5284219980239868, - "learning_rate": 9.246733668341709e-05, - "loss": 5.7533, - "step": 7997 - }, - { - "epoch": 4.171056062581487, - "grad_norm": 1.4177913665771484, - "learning_rate": 9.246633165829147e-05, - "loss": 5.7313, - "step": 7998 - }, - { - "epoch": 4.171577574967405, - "grad_norm": 1.3781781196594238, - "learning_rate": 9.246532663316584e-05, - "loss": 5.5933, - "step": 7999 - }, - { - "epoch": 4.172099087353325, - "grad_norm": 1.5088404417037964, - "learning_rate": 9.246432160804021e-05, - "loss": 5.614, - "step": 8000 - }, - { - "epoch": 4.172620599739243, - "grad_norm": 1.5555975437164307, - "learning_rate": 9.246331658291457e-05, - "loss": 5.4365, - "step": 8001 - }, - { - "epoch": 4.173142112125163, - "grad_norm": 1.69544517993927, - "learning_rate": 9.246231155778895e-05, - "loss": 6.0654, - "step": 8002 - }, - { - "epoch": 4.173663624511082, - "grad_norm": 1.627017855644226, - "learning_rate": 9.246130653266331e-05, - "loss": 5.5741, - "step": 8003 - }, - { - "epoch": 4.174185136897001, - "grad_norm": 1.559555172920227, - "learning_rate": 9.246030150753769e-05, - "loss": 5.8388, - "step": 8004 - }, - { - "epoch": 4.17470664928292, - "grad_norm": 1.5767152309417725, - "learning_rate": 9.245929648241206e-05, - "loss": 5.0984, - "step": 8005 - }, - { - "epoch": 4.17522816166884, - "grad_norm": 1.6931376457214355, - "learning_rate": 9.245829145728643e-05, - "loss": 6.188, - "step": 8006 - }, - { - "epoch": 4.175749674054758, - "grad_norm": 1.7225605249404907, - "learning_rate": 9.245728643216081e-05, - "loss": 5.2533, - "step": 8007 - }, - { - "epoch": 4.176271186440678, - "grad_norm": 1.493012547492981, - "learning_rate": 9.245628140703519e-05, - "loss": 6.1509, - "step": 8008 - }, - { - "epoch": 4.176792698826597, - "grad_norm": 1.5385997295379639, - "learning_rate": 9.245527638190955e-05, - "loss": 5.5863, - "step": 8009 - }, - { - "epoch": 4.177314211212516, - "grad_norm": 1.5017783641815186, - "learning_rate": 9.245427135678393e-05, - "loss": 5.4661, - "step": 8010 - }, - { - "epoch": 4.177835723598435, - "grad_norm": 1.472206950187683, - "learning_rate": 9.24532663316583e-05, - "loss": 5.2551, - "step": 8011 - }, - { - "epoch": 4.178357235984355, - "grad_norm": 1.410810112953186, - "learning_rate": 9.245226130653267e-05, - "loss": 5.3972, - "step": 8012 - }, - { - "epoch": 4.178878748370273, - "grad_norm": 1.5154269933700562, - "learning_rate": 9.245125628140704e-05, - "loss": 5.6251, - "step": 8013 - }, - { - "epoch": 4.179400260756193, - "grad_norm": 1.6027485132217407, - "learning_rate": 9.24502512562814e-05, - "loss": 5.5104, - "step": 8014 - }, - { - "epoch": 4.179921773142112, - "grad_norm": 1.5796518325805664, - "learning_rate": 9.244924623115578e-05, - "loss": 5.6809, - "step": 8015 - }, - { - "epoch": 4.180443285528031, - "grad_norm": 1.4736182689666748, - "learning_rate": 9.244824120603015e-05, - "loss": 5.6487, - "step": 8016 - }, - { - "epoch": 4.18096479791395, - "grad_norm": 1.4706374406814575, - "learning_rate": 9.244723618090452e-05, - "loss": 5.7969, - "step": 8017 - }, - { - "epoch": 4.18148631029987, - "grad_norm": 1.2628108263015747, - "learning_rate": 9.24462311557789e-05, - "loss": 6.1244, - "step": 8018 - }, - { - "epoch": 4.182007822685788, - "grad_norm": 1.364096999168396, - "learning_rate": 9.244522613065328e-05, - "loss": 5.9715, - "step": 8019 - }, - { - "epoch": 4.182529335071708, - "grad_norm": 1.9296715259552002, - "learning_rate": 9.244422110552764e-05, - "loss": 5.1742, - "step": 8020 - }, - { - "epoch": 4.183050847457627, - "grad_norm": 1.4944796562194824, - "learning_rate": 9.244321608040202e-05, - "loss": 5.7869, - "step": 8021 - }, - { - "epoch": 4.183572359843546, - "grad_norm": 1.5324578285217285, - "learning_rate": 9.244221105527638e-05, - "loss": 5.2627, - "step": 8022 - }, - { - "epoch": 4.184093872229465, - "grad_norm": 1.5570334196090698, - "learning_rate": 9.244120603015076e-05, - "loss": 5.2615, - "step": 8023 - }, - { - "epoch": 4.184615384615385, - "grad_norm": 1.636074185371399, - "learning_rate": 9.244020100502513e-05, - "loss": 6.0295, - "step": 8024 - }, - { - "epoch": 4.185136897001303, - "grad_norm": 1.5826523303985596, - "learning_rate": 9.24391959798995e-05, - "loss": 5.3125, - "step": 8025 - }, - { - "epoch": 4.185658409387223, - "grad_norm": 1.5885404348373413, - "learning_rate": 9.243819095477387e-05, - "loss": 5.1305, - "step": 8026 - }, - { - "epoch": 4.186179921773142, - "grad_norm": 1.5010654926300049, - "learning_rate": 9.243718592964823e-05, - "loss": 5.6081, - "step": 8027 - }, - { - "epoch": 4.186701434159061, - "grad_norm": 1.4546791315078735, - "learning_rate": 9.243618090452261e-05, - "loss": 4.908, - "step": 8028 - }, - { - "epoch": 4.18722294654498, - "grad_norm": 1.3269590139389038, - "learning_rate": 9.243517587939699e-05, - "loss": 5.6341, - "step": 8029 - }, - { - "epoch": 4.1877444589309, - "grad_norm": 1.4500688314437866, - "learning_rate": 9.243417085427137e-05, - "loss": 5.6559, - "step": 8030 - }, - { - "epoch": 4.188265971316818, - "grad_norm": 1.6876097917556763, - "learning_rate": 9.243316582914573e-05, - "loss": 5.7327, - "step": 8031 - }, - { - "epoch": 4.188787483702738, - "grad_norm": 1.7296757698059082, - "learning_rate": 9.243216080402011e-05, - "loss": 5.6283, - "step": 8032 - }, - { - "epoch": 4.189308996088657, - "grad_norm": 1.5519893169403076, - "learning_rate": 9.243115577889447e-05, - "loss": 5.474, - "step": 8033 - }, - { - "epoch": 4.189830508474576, - "grad_norm": 1.4642395973205566, - "learning_rate": 9.243015075376885e-05, - "loss": 5.7745, - "step": 8034 - }, - { - "epoch": 4.190352020860495, - "grad_norm": 1.4915616512298584, - "learning_rate": 9.242914572864322e-05, - "loss": 5.6578, - "step": 8035 - }, - { - "epoch": 4.190873533246415, - "grad_norm": 1.5998424291610718, - "learning_rate": 9.24281407035176e-05, - "loss": 5.8621, - "step": 8036 - }, - { - "epoch": 4.1913950456323334, - "grad_norm": 1.5893317461013794, - "learning_rate": 9.242713567839196e-05, - "loss": 5.7653, - "step": 8037 - }, - { - "epoch": 4.191916558018253, - "grad_norm": 1.6256020069122314, - "learning_rate": 9.242613065326634e-05, - "loss": 5.5599, - "step": 8038 - }, - { - "epoch": 4.192438070404172, - "grad_norm": 1.5049679279327393, - "learning_rate": 9.242512562814071e-05, - "loss": 5.6968, - "step": 8039 - }, - { - "epoch": 4.192959582790091, - "grad_norm": 1.598910927772522, - "learning_rate": 9.242412060301509e-05, - "loss": 5.2, - "step": 8040 - }, - { - "epoch": 4.19348109517601, - "grad_norm": 1.5259355306625366, - "learning_rate": 9.242311557788946e-05, - "loss": 5.9417, - "step": 8041 - }, - { - "epoch": 4.19400260756193, - "grad_norm": 1.4717252254486084, - "learning_rate": 9.242211055276382e-05, - "loss": 5.6665, - "step": 8042 - }, - { - "epoch": 4.1945241199478485, - "grad_norm": 1.3602797985076904, - "learning_rate": 9.24211055276382e-05, - "loss": 5.8723, - "step": 8043 - }, - { - "epoch": 4.195045632333768, - "grad_norm": 1.5320335626602173, - "learning_rate": 9.242010050251256e-05, - "loss": 5.8053, - "step": 8044 - }, - { - "epoch": 4.195567144719687, - "grad_norm": 1.3722988367080688, - "learning_rate": 9.241909547738694e-05, - "loss": 5.7699, - "step": 8045 - }, - { - "epoch": 4.196088657105606, - "grad_norm": 1.485425353050232, - "learning_rate": 9.24180904522613e-05, - "loss": 5.5694, - "step": 8046 - }, - { - "epoch": 4.196610169491525, - "grad_norm": 1.444112777709961, - "learning_rate": 9.241708542713568e-05, - "loss": 5.9495, - "step": 8047 - }, - { - "epoch": 4.197131681877445, - "grad_norm": 1.403891682624817, - "learning_rate": 9.241608040201005e-05, - "loss": 5.7753, - "step": 8048 - }, - { - "epoch": 4.1976531942633635, - "grad_norm": 1.435088872909546, - "learning_rate": 9.241507537688442e-05, - "loss": 5.7373, - "step": 8049 - }, - { - "epoch": 4.198174706649283, - "grad_norm": 1.491249918937683, - "learning_rate": 9.24140703517588e-05, - "loss": 5.9124, - "step": 8050 - }, - { - "epoch": 4.198696219035202, - "grad_norm": 1.3308773040771484, - "learning_rate": 9.241306532663318e-05, - "loss": 6.0038, - "step": 8051 - }, - { - "epoch": 4.199217731421121, - "grad_norm": 1.4706188440322876, - "learning_rate": 9.241206030150754e-05, - "loss": 5.6725, - "step": 8052 - }, - { - "epoch": 4.1997392438070404, - "grad_norm": 1.4802765846252441, - "learning_rate": 9.241105527638192e-05, - "loss": 5.5719, - "step": 8053 - }, - { - "epoch": 4.20026075619296, - "grad_norm": 1.4592845439910889, - "learning_rate": 9.241005025125629e-05, - "loss": 5.7131, - "step": 8054 - }, - { - "epoch": 4.2007822685788785, - "grad_norm": 1.630377173423767, - "learning_rate": 9.240904522613065e-05, - "loss": 5.362, - "step": 8055 - }, - { - "epoch": 4.201303780964798, - "grad_norm": 1.4707138538360596, - "learning_rate": 9.240804020100503e-05, - "loss": 5.6395, - "step": 8056 - }, - { - "epoch": 4.201825293350717, - "grad_norm": 1.4473018646240234, - "learning_rate": 9.240703517587939e-05, - "loss": 5.2294, - "step": 8057 - }, - { - "epoch": 4.202346805736636, - "grad_norm": 1.4597961902618408, - "learning_rate": 9.240603015075377e-05, - "loss": 5.665, - "step": 8058 - }, - { - "epoch": 4.2028683181225555, - "grad_norm": 1.6309971809387207, - "learning_rate": 9.240502512562815e-05, - "loss": 5.4593, - "step": 8059 - }, - { - "epoch": 4.203389830508475, - "grad_norm": 1.3604625463485718, - "learning_rate": 9.240402010050253e-05, - "loss": 5.9838, - "step": 8060 - }, - { - "epoch": 4.2039113428943935, - "grad_norm": 1.3801305294036865, - "learning_rate": 9.240301507537689e-05, - "loss": 5.9604, - "step": 8061 - }, - { - "epoch": 4.204432855280313, - "grad_norm": 1.6933174133300781, - "learning_rate": 9.240201005025127e-05, - "loss": 5.5815, - "step": 8062 - }, - { - "epoch": 4.204954367666232, - "grad_norm": 1.545485019683838, - "learning_rate": 9.240100502512563e-05, - "loss": 5.7194, - "step": 8063 - }, - { - "epoch": 4.205475880052151, - "grad_norm": 1.4869271516799927, - "learning_rate": 9.240000000000001e-05, - "loss": 5.5889, - "step": 8064 - }, - { - "epoch": 4.2059973924380705, - "grad_norm": 1.4415230751037598, - "learning_rate": 9.239899497487437e-05, - "loss": 5.8783, - "step": 8065 - }, - { - "epoch": 4.20651890482399, - "grad_norm": 1.5789380073547363, - "learning_rate": 9.239798994974875e-05, - "loss": 5.5629, - "step": 8066 - }, - { - "epoch": 4.2070404172099085, - "grad_norm": 1.4147603511810303, - "learning_rate": 9.239698492462312e-05, - "loss": 5.7243, - "step": 8067 - }, - { - "epoch": 4.207561929595828, - "grad_norm": 1.3588975667953491, - "learning_rate": 9.239597989949748e-05, - "loss": 5.8473, - "step": 8068 - }, - { - "epoch": 4.208083441981747, - "grad_norm": 1.413963794708252, - "learning_rate": 9.239497487437186e-05, - "loss": 5.7056, - "step": 8069 - }, - { - "epoch": 4.208604954367666, - "grad_norm": 1.479322075843811, - "learning_rate": 9.239396984924624e-05, - "loss": 5.7815, - "step": 8070 - }, - { - "epoch": 4.2091264667535855, - "grad_norm": 1.493443489074707, - "learning_rate": 9.239296482412061e-05, - "loss": 5.6818, - "step": 8071 - }, - { - "epoch": 4.209647979139505, - "grad_norm": 1.341110110282898, - "learning_rate": 9.239195979899498e-05, - "loss": 5.9466, - "step": 8072 - }, - { - "epoch": 4.2101694915254235, - "grad_norm": 1.4842380285263062, - "learning_rate": 9.239095477386936e-05, - "loss": 5.8909, - "step": 8073 - }, - { - "epoch": 4.210691003911343, - "grad_norm": 1.4551920890808105, - "learning_rate": 9.238994974874372e-05, - "loss": 5.5545, - "step": 8074 - }, - { - "epoch": 4.2112125162972625, - "grad_norm": 1.4633193016052246, - "learning_rate": 9.23889447236181e-05, - "loss": 5.3482, - "step": 8075 - }, - { - "epoch": 4.211734028683181, - "grad_norm": 1.6214423179626465, - "learning_rate": 9.238793969849246e-05, - "loss": 5.0165, - "step": 8076 - }, - { - "epoch": 4.2122555410691005, - "grad_norm": 1.3305046558380127, - "learning_rate": 9.238693467336684e-05, - "loss": 6.0842, - "step": 8077 - }, - { - "epoch": 4.21277705345502, - "grad_norm": 1.534300446510315, - "learning_rate": 9.23859296482412e-05, - "loss": 5.573, - "step": 8078 - }, - { - "epoch": 4.2132985658409385, - "grad_norm": 1.4500904083251953, - "learning_rate": 9.238492462311558e-05, - "loss": 5.5678, - "step": 8079 - }, - { - "epoch": 4.213820078226858, - "grad_norm": 1.4248017072677612, - "learning_rate": 9.238391959798996e-05, - "loss": 5.6908, - "step": 8080 - }, - { - "epoch": 4.2143415906127775, - "grad_norm": 1.4029027223587036, - "learning_rate": 9.238291457286432e-05, - "loss": 5.1869, - "step": 8081 - }, - { - "epoch": 4.214863102998696, - "grad_norm": 1.471298336982727, - "learning_rate": 9.23819095477387e-05, - "loss": 5.8501, - "step": 8082 - }, - { - "epoch": 4.2153846153846155, - "grad_norm": 1.5383131504058838, - "learning_rate": 9.238090452261307e-05, - "loss": 5.3858, - "step": 8083 - }, - { - "epoch": 4.215906127770535, - "grad_norm": 1.4498381614685059, - "learning_rate": 9.237989949748744e-05, - "loss": 5.6163, - "step": 8084 - }, - { - "epoch": 4.2164276401564535, - "grad_norm": 1.5453029870986938, - "learning_rate": 9.237889447236181e-05, - "loss": 5.5652, - "step": 8085 - }, - { - "epoch": 4.216949152542373, - "grad_norm": 1.442050814628601, - "learning_rate": 9.237788944723619e-05, - "loss": 5.879, - "step": 8086 - }, - { - "epoch": 4.2174706649282925, - "grad_norm": 1.4322459697723389, - "learning_rate": 9.237688442211055e-05, - "loss": 6.0147, - "step": 8087 - }, - { - "epoch": 4.217992177314211, - "grad_norm": 1.515594720840454, - "learning_rate": 9.237587939698493e-05, - "loss": 5.2921, - "step": 8088 - }, - { - "epoch": 4.2185136897001305, - "grad_norm": 1.3660866022109985, - "learning_rate": 9.237487437185929e-05, - "loss": 5.762, - "step": 8089 - }, - { - "epoch": 4.219035202086049, - "grad_norm": 1.4891606569290161, - "learning_rate": 9.237386934673367e-05, - "loss": 4.9394, - "step": 8090 - }, - { - "epoch": 4.219556714471969, - "grad_norm": 1.5223805904388428, - "learning_rate": 9.237286432160805e-05, - "loss": 5.6508, - "step": 8091 - }, - { - "epoch": 4.220078226857888, - "grad_norm": 1.5101184844970703, - "learning_rate": 9.237185929648243e-05, - "loss": 5.4397, - "step": 8092 - }, - { - "epoch": 4.2205997392438075, - "grad_norm": 1.6051876544952393, - "learning_rate": 9.237085427135679e-05, - "loss": 5.8335, - "step": 8093 - }, - { - "epoch": 4.221121251629726, - "grad_norm": 1.9319525957107544, - "learning_rate": 9.236984924623115e-05, - "loss": 5.1592, - "step": 8094 - }, - { - "epoch": 4.2216427640156455, - "grad_norm": 1.6756622791290283, - "learning_rate": 9.236884422110553e-05, - "loss": 5.4938, - "step": 8095 - }, - { - "epoch": 4.222164276401564, - "grad_norm": 1.4761368036270142, - "learning_rate": 9.23678391959799e-05, - "loss": 5.5827, - "step": 8096 - }, - { - "epoch": 4.222685788787484, - "grad_norm": 1.4994632005691528, - "learning_rate": 9.236683417085427e-05, - "loss": 5.9181, - "step": 8097 - }, - { - "epoch": 4.223207301173403, - "grad_norm": 1.5741597414016724, - "learning_rate": 9.236582914572864e-05, - "loss": 5.3733, - "step": 8098 - }, - { - "epoch": 4.223728813559322, - "grad_norm": 1.4558801651000977, - "learning_rate": 9.236482412060302e-05, - "loss": 5.8769, - "step": 8099 - }, - { - "epoch": 4.224250325945241, - "grad_norm": 1.7334845066070557, - "learning_rate": 9.23638190954774e-05, - "loss": 5.1694, - "step": 8100 - }, - { - "epoch": 4.2247718383311605, - "grad_norm": 1.4909974336624146, - "learning_rate": 9.236281407035177e-05, - "loss": 4.9027, - "step": 8101 - }, - { - "epoch": 4.225293350717079, - "grad_norm": 1.4431517124176025, - "learning_rate": 9.236180904522614e-05, - "loss": 5.9496, - "step": 8102 - }, - { - "epoch": 4.225814863102999, - "grad_norm": 1.3642315864562988, - "learning_rate": 9.236080402010051e-05, - "loss": 5.9466, - "step": 8103 - }, - { - "epoch": 4.226336375488918, - "grad_norm": 1.3197346925735474, - "learning_rate": 9.235979899497488e-05, - "loss": 6.1465, - "step": 8104 - }, - { - "epoch": 4.226857887874837, - "grad_norm": 2.0636520385742188, - "learning_rate": 9.235879396984926e-05, - "loss": 3.8172, - "step": 8105 - }, - { - "epoch": 4.227379400260756, - "grad_norm": 1.411077857017517, - "learning_rate": 9.235778894472362e-05, - "loss": 6.0691, - "step": 8106 - }, - { - "epoch": 4.227900912646676, - "grad_norm": 1.543269395828247, - "learning_rate": 9.235678391959799e-05, - "loss": 5.7796, - "step": 8107 - }, - { - "epoch": 4.228422425032594, - "grad_norm": 1.3450006246566772, - "learning_rate": 9.235577889447236e-05, - "loss": 5.8724, - "step": 8108 - }, - { - "epoch": 4.228943937418514, - "grad_norm": 1.3910506963729858, - "learning_rate": 9.235477386934673e-05, - "loss": 5.9735, - "step": 8109 - }, - { - "epoch": 4.229465449804433, - "grad_norm": 1.4848816394805908, - "learning_rate": 9.23537688442211e-05, - "loss": 5.5918, - "step": 8110 - }, - { - "epoch": 4.229986962190352, - "grad_norm": 1.3522087335586548, - "learning_rate": 9.235276381909548e-05, - "loss": 5.9402, - "step": 8111 - }, - { - "epoch": 4.230508474576271, - "grad_norm": 1.4571064710617065, - "learning_rate": 9.235175879396986e-05, - "loss": 5.9054, - "step": 8112 - }, - { - "epoch": 4.231029986962191, - "grad_norm": 1.4212530851364136, - "learning_rate": 9.235075376884423e-05, - "loss": 6.0357, - "step": 8113 - }, - { - "epoch": 4.231551499348109, - "grad_norm": 1.387215495109558, - "learning_rate": 9.23497487437186e-05, - "loss": 5.8226, - "step": 8114 - }, - { - "epoch": 4.232073011734029, - "grad_norm": 1.4048011302947998, - "learning_rate": 9.234874371859297e-05, - "loss": 5.9157, - "step": 8115 - }, - { - "epoch": 4.232594524119948, - "grad_norm": 1.3140244483947754, - "learning_rate": 9.234773869346735e-05, - "loss": 5.9048, - "step": 8116 - }, - { - "epoch": 4.233116036505867, - "grad_norm": 1.5777630805969238, - "learning_rate": 9.234673366834171e-05, - "loss": 5.5979, - "step": 8117 - }, - { - "epoch": 4.233637548891786, - "grad_norm": 1.4291855096817017, - "learning_rate": 9.234572864321609e-05, - "loss": 5.9305, - "step": 8118 - }, - { - "epoch": 4.234159061277706, - "grad_norm": 1.5020042657852173, - "learning_rate": 9.234472361809045e-05, - "loss": 5.4507, - "step": 8119 - }, - { - "epoch": 4.234680573663624, - "grad_norm": 1.6197021007537842, - "learning_rate": 9.234371859296483e-05, - "loss": 5.8418, - "step": 8120 - }, - { - "epoch": 4.235202086049544, - "grad_norm": 1.4735828638076782, - "learning_rate": 9.234271356783921e-05, - "loss": 5.5402, - "step": 8121 - }, - { - "epoch": 4.235723598435463, - "grad_norm": 1.4384886026382446, - "learning_rate": 9.234170854271357e-05, - "loss": 5.7671, - "step": 8122 - }, - { - "epoch": 4.236245110821382, - "grad_norm": 1.551974892616272, - "learning_rate": 9.234070351758795e-05, - "loss": 5.6608, - "step": 8123 - }, - { - "epoch": 4.236766623207301, - "grad_norm": 1.574962854385376, - "learning_rate": 9.233969849246231e-05, - "loss": 5.7942, - "step": 8124 - }, - { - "epoch": 4.237288135593221, - "grad_norm": 1.3233717679977417, - "learning_rate": 9.233869346733669e-05, - "loss": 5.9314, - "step": 8125 - }, - { - "epoch": 4.237809647979139, - "grad_norm": 1.480355143547058, - "learning_rate": 9.233768844221106e-05, - "loss": 5.6188, - "step": 8126 - }, - { - "epoch": 4.238331160365059, - "grad_norm": 1.4255951642990112, - "learning_rate": 9.233668341708543e-05, - "loss": 5.8434, - "step": 8127 - }, - { - "epoch": 4.238852672750978, - "grad_norm": 1.6663564443588257, - "learning_rate": 9.23356783919598e-05, - "loss": 5.9194, - "step": 8128 - }, - { - "epoch": 4.239374185136897, - "grad_norm": 1.5636684894561768, - "learning_rate": 9.233467336683418e-05, - "loss": 5.7119, - "step": 8129 - }, - { - "epoch": 4.239895697522816, - "grad_norm": 1.4824167490005493, - "learning_rate": 9.233366834170854e-05, - "loss": 5.5152, - "step": 8130 - }, - { - "epoch": 4.240417209908736, - "grad_norm": 1.6236249208450317, - "learning_rate": 9.233266331658292e-05, - "loss": 5.6748, - "step": 8131 - }, - { - "epoch": 4.240938722294654, - "grad_norm": 1.6402028799057007, - "learning_rate": 9.23316582914573e-05, - "loss": 5.7157, - "step": 8132 - }, - { - "epoch": 4.241460234680574, - "grad_norm": 1.5611157417297363, - "learning_rate": 9.233065326633167e-05, - "loss": 5.6647, - "step": 8133 - }, - { - "epoch": 4.241981747066493, - "grad_norm": 1.4712896347045898, - "learning_rate": 9.232964824120604e-05, - "loss": 5.8031, - "step": 8134 - }, - { - "epoch": 4.242503259452412, - "grad_norm": 1.3820230960845947, - "learning_rate": 9.23286432160804e-05, - "loss": 6.2216, - "step": 8135 - }, - { - "epoch": 4.243024771838331, - "grad_norm": 1.7106844186782837, - "learning_rate": 9.232763819095478e-05, - "loss": 5.2614, - "step": 8136 - }, - { - "epoch": 4.243546284224251, - "grad_norm": 1.5530357360839844, - "learning_rate": 9.232663316582914e-05, - "loss": 5.8776, - "step": 8137 - }, - { - "epoch": 4.244067796610169, - "grad_norm": 1.416183352470398, - "learning_rate": 9.232562814070352e-05, - "loss": 5.216, - "step": 8138 - }, - { - "epoch": 4.244589308996089, - "grad_norm": 1.6258443593978882, - "learning_rate": 9.232462311557789e-05, - "loss": 5.6457, - "step": 8139 - }, - { - "epoch": 4.245110821382008, - "grad_norm": 1.585181474685669, - "learning_rate": 9.232361809045226e-05, - "loss": 5.2485, - "step": 8140 - }, - { - "epoch": 4.245632333767927, - "grad_norm": 1.6142514944076538, - "learning_rate": 9.232261306532664e-05, - "loss": 5.9728, - "step": 8141 - }, - { - "epoch": 4.246153846153846, - "grad_norm": 1.4848946332931519, - "learning_rate": 9.232160804020102e-05, - "loss": 5.6655, - "step": 8142 - }, - { - "epoch": 4.246675358539766, - "grad_norm": 1.508735179901123, - "learning_rate": 9.232060301507538e-05, - "loss": 5.5072, - "step": 8143 - }, - { - "epoch": 4.247196870925684, - "grad_norm": 1.488559365272522, - "learning_rate": 9.231959798994976e-05, - "loss": 5.1086, - "step": 8144 - }, - { - "epoch": 4.247718383311604, - "grad_norm": 1.5528466701507568, - "learning_rate": 9.231859296482413e-05, - "loss": 5.7921, - "step": 8145 - }, - { - "epoch": 4.248239895697523, - "grad_norm": 1.594647765159607, - "learning_rate": 9.23175879396985e-05, - "loss": 5.8262, - "step": 8146 - }, - { - "epoch": 4.248761408083442, - "grad_norm": 1.522204875946045, - "learning_rate": 9.231658291457287e-05, - "loss": 5.453, - "step": 8147 - }, - { - "epoch": 4.249282920469361, - "grad_norm": 1.4430971145629883, - "learning_rate": 9.231557788944723e-05, - "loss": 5.8618, - "step": 8148 - }, - { - "epoch": 4.249804432855281, - "grad_norm": 1.510711669921875, - "learning_rate": 9.231457286432161e-05, - "loss": 5.7433, - "step": 8149 - }, - { - "epoch": 4.250325945241199, - "grad_norm": 1.4264568090438843, - "learning_rate": 9.231356783919597e-05, - "loss": 6.0582, - "step": 8150 - }, - { - "epoch": 4.250847457627119, - "grad_norm": 1.5914751291275024, - "learning_rate": 9.231256281407035e-05, - "loss": 5.5834, - "step": 8151 - }, - { - "epoch": 4.251368970013038, - "grad_norm": 1.624890923500061, - "learning_rate": 9.231155778894473e-05, - "loss": 5.4993, - "step": 8152 - }, - { - "epoch": 4.251890482398957, - "grad_norm": 1.559191346168518, - "learning_rate": 9.231055276381911e-05, - "loss": 5.2716, - "step": 8153 - }, - { - "epoch": 4.252411994784876, - "grad_norm": 1.663712501525879, - "learning_rate": 9.230954773869347e-05, - "loss": 5.8165, - "step": 8154 - }, - { - "epoch": 4.252933507170796, - "grad_norm": 1.4753141403198242, - "learning_rate": 9.230854271356785e-05, - "loss": 5.3953, - "step": 8155 - }, - { - "epoch": 4.253455019556714, - "grad_norm": 1.5185589790344238, - "learning_rate": 9.230753768844221e-05, - "loss": 5.5593, - "step": 8156 - }, - { - "epoch": 4.253976531942634, - "grad_norm": 1.4911926984786987, - "learning_rate": 9.230653266331659e-05, - "loss": 5.7487, - "step": 8157 - }, - { - "epoch": 4.254498044328553, - "grad_norm": 1.5621613264083862, - "learning_rate": 9.230552763819096e-05, - "loss": 5.432, - "step": 8158 - }, - { - "epoch": 4.255019556714472, - "grad_norm": 1.4591282606124878, - "learning_rate": 9.230452261306533e-05, - "loss": 5.4393, - "step": 8159 - }, - { - "epoch": 4.255541069100391, - "grad_norm": 1.4202383756637573, - "learning_rate": 9.23035175879397e-05, - "loss": 5.5061, - "step": 8160 - }, - { - "epoch": 4.256062581486311, - "grad_norm": 1.8698112964630127, - "learning_rate": 9.230251256281408e-05, - "loss": 5.3721, - "step": 8161 - }, - { - "epoch": 4.256584093872229, - "grad_norm": 1.4089279174804688, - "learning_rate": 9.230150753768845e-05, - "loss": 5.9104, - "step": 8162 - }, - { - "epoch": 4.257105606258149, - "grad_norm": 1.3710532188415527, - "learning_rate": 9.230050251256282e-05, - "loss": 5.7923, - "step": 8163 - }, - { - "epoch": 4.257627118644068, - "grad_norm": 1.6177845001220703, - "learning_rate": 9.22994974874372e-05, - "loss": 5.5642, - "step": 8164 - }, - { - "epoch": 4.258148631029987, - "grad_norm": 1.7065272331237793, - "learning_rate": 9.229849246231156e-05, - "loss": 5.3446, - "step": 8165 - }, - { - "epoch": 4.258670143415906, - "grad_norm": 1.9377055168151855, - "learning_rate": 9.229748743718594e-05, - "loss": 5.4428, - "step": 8166 - }, - { - "epoch": 4.259191655801826, - "grad_norm": 1.6348875761032104, - "learning_rate": 9.22964824120603e-05, - "loss": 5.5372, - "step": 8167 - }, - { - "epoch": 4.259713168187744, - "grad_norm": 1.6998919248580933, - "learning_rate": 9.229547738693468e-05, - "loss": 5.7976, - "step": 8168 - }, - { - "epoch": 4.260234680573664, - "grad_norm": 1.843271255493164, - "learning_rate": 9.229447236180904e-05, - "loss": 5.8798, - "step": 8169 - }, - { - "epoch": 4.260756192959583, - "grad_norm": 1.5218173265457153, - "learning_rate": 9.229346733668342e-05, - "loss": 5.9107, - "step": 8170 - }, - { - "epoch": 4.261277705345502, - "grad_norm": 1.6195333003997803, - "learning_rate": 9.229246231155779e-05, - "loss": 5.3999, - "step": 8171 - }, - { - "epoch": 4.261799217731421, - "grad_norm": 1.7671188116073608, - "learning_rate": 9.229145728643216e-05, - "loss": 5.5851, - "step": 8172 - }, - { - "epoch": 4.262320730117341, - "grad_norm": 1.436708688735962, - "learning_rate": 9.229045226130654e-05, - "loss": 5.9291, - "step": 8173 - }, - { - "epoch": 4.262842242503259, - "grad_norm": 1.337443470954895, - "learning_rate": 9.228944723618091e-05, - "loss": 5.9242, - "step": 8174 - }, - { - "epoch": 4.263363754889179, - "grad_norm": 1.7119444608688354, - "learning_rate": 9.228844221105528e-05, - "loss": 5.3234, - "step": 8175 - }, - { - "epoch": 4.263885267275098, - "grad_norm": 1.4962674379348755, - "learning_rate": 9.228743718592965e-05, - "loss": 5.6466, - "step": 8176 - }, - { - "epoch": 4.264406779661017, - "grad_norm": 1.4163269996643066, - "learning_rate": 9.228643216080403e-05, - "loss": 5.6879, - "step": 8177 - }, - { - "epoch": 4.264928292046936, - "grad_norm": 1.393664836883545, - "learning_rate": 9.228542713567839e-05, - "loss": 5.5205, - "step": 8178 - }, - { - "epoch": 4.265449804432855, - "grad_norm": 1.4195810556411743, - "learning_rate": 9.228442211055277e-05, - "loss": 5.6404, - "step": 8179 - }, - { - "epoch": 4.265971316818774, - "grad_norm": 1.4232943058013916, - "learning_rate": 9.228341708542713e-05, - "loss": 5.925, - "step": 8180 - }, - { - "epoch": 4.266492829204694, - "grad_norm": 1.4507029056549072, - "learning_rate": 9.228241206030151e-05, - "loss": 5.5794, - "step": 8181 - }, - { - "epoch": 4.267014341590613, - "grad_norm": 1.7533091306686401, - "learning_rate": 9.228140703517588e-05, - "loss": 5.5335, - "step": 8182 - }, - { - "epoch": 4.267535853976532, - "grad_norm": 1.7871967554092407, - "learning_rate": 9.228040201005025e-05, - "loss": 5.5422, - "step": 8183 - }, - { - "epoch": 4.268057366362451, - "grad_norm": 1.6704190969467163, - "learning_rate": 9.227939698492463e-05, - "loss": 5.6682, - "step": 8184 - }, - { - "epoch": 4.26857887874837, - "grad_norm": 1.37361478805542, - "learning_rate": 9.227839195979901e-05, - "loss": 5.9673, - "step": 8185 - }, - { - "epoch": 4.269100391134289, - "grad_norm": 1.5022215843200684, - "learning_rate": 9.227738693467337e-05, - "loss": 6.0113, - "step": 8186 - }, - { - "epoch": 4.269621903520209, - "grad_norm": 1.3324862718582153, - "learning_rate": 9.227638190954774e-05, - "loss": 6.1827, - "step": 8187 - }, - { - "epoch": 4.270143415906128, - "grad_norm": 1.4685600996017456, - "learning_rate": 9.227537688442212e-05, - "loss": 5.808, - "step": 8188 - }, - { - "epoch": 4.270664928292047, - "grad_norm": 1.6173632144927979, - "learning_rate": 9.227437185929648e-05, - "loss": 5.374, - "step": 8189 - }, - { - "epoch": 4.271186440677966, - "grad_norm": 1.5838748216629028, - "learning_rate": 9.227336683417086e-05, - "loss": 5.4901, - "step": 8190 - }, - { - "epoch": 4.271707953063885, - "grad_norm": 1.6378674507141113, - "learning_rate": 9.227236180904522e-05, - "loss": 5.0881, - "step": 8191 - }, - { - "epoch": 4.272229465449804, - "grad_norm": 1.5104975700378418, - "learning_rate": 9.22713567839196e-05, - "loss": 5.7734, - "step": 8192 - }, - { - "epoch": 4.272750977835724, - "grad_norm": 1.5110844373703003, - "learning_rate": 9.227035175879398e-05, - "loss": 5.7168, - "step": 8193 - }, - { - "epoch": 4.273272490221642, - "grad_norm": 1.524345874786377, - "learning_rate": 9.226934673366835e-05, - "loss": 5.417, - "step": 8194 - }, - { - "epoch": 4.273794002607562, - "grad_norm": 1.5579891204833984, - "learning_rate": 9.226834170854272e-05, - "loss": 5.137, - "step": 8195 - }, - { - "epoch": 4.274315514993481, - "grad_norm": 1.3418807983398438, - "learning_rate": 9.22673366834171e-05, - "loss": 6.0523, - "step": 8196 - }, - { - "epoch": 4.2748370273794, - "grad_norm": 1.3959513902664185, - "learning_rate": 9.226633165829146e-05, - "loss": 5.7976, - "step": 8197 - }, - { - "epoch": 4.275358539765319, - "grad_norm": 1.454040288925171, - "learning_rate": 9.226532663316584e-05, - "loss": 5.9359, - "step": 8198 - }, - { - "epoch": 4.275880052151239, - "grad_norm": 1.4026768207550049, - "learning_rate": 9.22643216080402e-05, - "loss": 5.7754, - "step": 8199 - }, - { - "epoch": 4.276401564537157, - "grad_norm": 1.3072502613067627, - "learning_rate": 9.226331658291457e-05, - "loss": 5.8386, - "step": 8200 - }, - { - "epoch": 4.276923076923077, - "grad_norm": 1.4089983701705933, - "learning_rate": 9.226231155778895e-05, - "loss": 5.7708, - "step": 8201 - }, - { - "epoch": 4.277444589308996, - "grad_norm": 1.3349277973175049, - "learning_rate": 9.226130653266331e-05, - "loss": 5.6967, - "step": 8202 - }, - { - "epoch": 4.277966101694915, - "grad_norm": 1.459149718284607, - "learning_rate": 9.226030150753769e-05, - "loss": 5.6941, - "step": 8203 - }, - { - "epoch": 4.278487614080834, - "grad_norm": 1.5814669132232666, - "learning_rate": 9.225929648241207e-05, - "loss": 5.2754, - "step": 8204 - }, - { - "epoch": 4.279009126466754, - "grad_norm": 1.4944489002227783, - "learning_rate": 9.225829145728644e-05, - "loss": 5.6699, - "step": 8205 - }, - { - "epoch": 4.279530638852672, - "grad_norm": 1.438774585723877, - "learning_rate": 9.225728643216081e-05, - "loss": 5.5953, - "step": 8206 - }, - { - "epoch": 4.280052151238592, - "grad_norm": 1.5741498470306396, - "learning_rate": 9.225628140703519e-05, - "loss": 5.5386, - "step": 8207 - }, - { - "epoch": 4.280573663624511, - "grad_norm": 1.5372480154037476, - "learning_rate": 9.225527638190955e-05, - "loss": 5.795, - "step": 8208 - }, - { - "epoch": 4.28109517601043, - "grad_norm": 1.536525845527649, - "learning_rate": 9.225427135678393e-05, - "loss": 5.6796, - "step": 8209 - }, - { - "epoch": 4.281616688396349, - "grad_norm": 1.7071441411972046, - "learning_rate": 9.225326633165829e-05, - "loss": 5.4935, - "step": 8210 - }, - { - "epoch": 4.282138200782269, - "grad_norm": 1.4764519929885864, - "learning_rate": 9.225226130653267e-05, - "loss": 6.0142, - "step": 8211 - }, - { - "epoch": 4.282659713168187, - "grad_norm": 1.614942193031311, - "learning_rate": 9.225125628140703e-05, - "loss": 5.4503, - "step": 8212 - }, - { - "epoch": 4.283181225554107, - "grad_norm": 1.6166187524795532, - "learning_rate": 9.225025125628141e-05, - "loss": 5.2353, - "step": 8213 - }, - { - "epoch": 4.283702737940026, - "grad_norm": 1.451638102531433, - "learning_rate": 9.224924623115579e-05, - "loss": 5.6093, - "step": 8214 - }, - { - "epoch": 4.284224250325945, - "grad_norm": 1.3889650106430054, - "learning_rate": 9.224824120603015e-05, - "loss": 5.9482, - "step": 8215 - }, - { - "epoch": 4.284745762711864, - "grad_norm": 1.9577572345733643, - "learning_rate": 9.224723618090453e-05, - "loss": 5.2527, - "step": 8216 - }, - { - "epoch": 4.285267275097784, - "grad_norm": 1.489493727684021, - "learning_rate": 9.22462311557789e-05, - "loss": 5.8727, - "step": 8217 - }, - { - "epoch": 4.285788787483702, - "grad_norm": 1.4302802085876465, - "learning_rate": 9.224522613065327e-05, - "loss": 6.0055, - "step": 8218 - }, - { - "epoch": 4.286310299869622, - "grad_norm": 1.3200981616973877, - "learning_rate": 9.224422110552764e-05, - "loss": 5.8409, - "step": 8219 - }, - { - "epoch": 4.286831812255541, - "grad_norm": 1.5777288675308228, - "learning_rate": 9.224321608040202e-05, - "loss": 5.2896, - "step": 8220 - }, - { - "epoch": 4.28735332464146, - "grad_norm": 1.763900637626648, - "learning_rate": 9.224221105527638e-05, - "loss": 4.8608, - "step": 8221 - }, - { - "epoch": 4.287874837027379, - "grad_norm": 1.4336694478988647, - "learning_rate": 9.224120603015076e-05, - "loss": 5.8655, - "step": 8222 - }, - { - "epoch": 4.288396349413299, - "grad_norm": 1.3968309164047241, - "learning_rate": 9.224020100502512e-05, - "loss": 5.249, - "step": 8223 - }, - { - "epoch": 4.288917861799217, - "grad_norm": 1.4851211309432983, - "learning_rate": 9.22391959798995e-05, - "loss": 6.0764, - "step": 8224 - }, - { - "epoch": 4.289439374185137, - "grad_norm": 1.6122325658798218, - "learning_rate": 9.223819095477388e-05, - "loss": 5.6006, - "step": 8225 - }, - { - "epoch": 4.289960886571056, - "grad_norm": 1.5910156965255737, - "learning_rate": 9.223718592964826e-05, - "loss": 5.6395, - "step": 8226 - }, - { - "epoch": 4.290482398956975, - "grad_norm": 1.6631559133529663, - "learning_rate": 9.223618090452262e-05, - "loss": 5.5392, - "step": 8227 - }, - { - "epoch": 4.291003911342894, - "grad_norm": 1.510602355003357, - "learning_rate": 9.223517587939698e-05, - "loss": 5.9885, - "step": 8228 - }, - { - "epoch": 4.291525423728814, - "grad_norm": 1.6547300815582275, - "learning_rate": 9.223417085427136e-05, - "loss": 5.4395, - "step": 8229 - }, - { - "epoch": 4.292046936114732, - "grad_norm": 1.6233593225479126, - "learning_rate": 9.223316582914573e-05, - "loss": 5.3968, - "step": 8230 - }, - { - "epoch": 4.292568448500652, - "grad_norm": 1.5003654956817627, - "learning_rate": 9.22321608040201e-05, - "loss": 5.7329, - "step": 8231 - }, - { - "epoch": 4.293089960886571, - "grad_norm": 1.7074711322784424, - "learning_rate": 9.223115577889447e-05, - "loss": 5.562, - "step": 8232 - }, - { - "epoch": 4.29361147327249, - "grad_norm": 1.3998714685440063, - "learning_rate": 9.223015075376885e-05, - "loss": 5.7568, - "step": 8233 - }, - { - "epoch": 4.294132985658409, - "grad_norm": 1.3641412258148193, - "learning_rate": 9.222914572864322e-05, - "loss": 5.9578, - "step": 8234 - }, - { - "epoch": 4.294654498044329, - "grad_norm": 1.5097975730895996, - "learning_rate": 9.22281407035176e-05, - "loss": 5.4587, - "step": 8235 - }, - { - "epoch": 4.295176010430247, - "grad_norm": 1.4471375942230225, - "learning_rate": 9.222713567839197e-05, - "loss": 5.8648, - "step": 8236 - }, - { - "epoch": 4.295697522816167, - "grad_norm": 1.5024443864822388, - "learning_rate": 9.222613065326634e-05, - "loss": 5.4659, - "step": 8237 - }, - { - "epoch": 4.296219035202086, - "grad_norm": 1.5273675918579102, - "learning_rate": 9.222512562814071e-05, - "loss": 5.8314, - "step": 8238 - }, - { - "epoch": 4.296740547588005, - "grad_norm": 1.409576177597046, - "learning_rate": 9.222412060301509e-05, - "loss": 5.9524, - "step": 8239 - }, - { - "epoch": 4.297262059973924, - "grad_norm": 1.3998360633850098, - "learning_rate": 9.222311557788945e-05, - "loss": 5.7659, - "step": 8240 - }, - { - "epoch": 4.297783572359844, - "grad_norm": 1.4987726211547852, - "learning_rate": 9.222211055276381e-05, - "loss": 5.7813, - "step": 8241 - }, - { - "epoch": 4.2983050847457624, - "grad_norm": 1.4891552925109863, - "learning_rate": 9.222110552763819e-05, - "loss": 5.8661, - "step": 8242 - }, - { - "epoch": 4.298826597131682, - "grad_norm": 1.4616069793701172, - "learning_rate": 9.222010050251256e-05, - "loss": 5.3899, - "step": 8243 - }, - { - "epoch": 4.299348109517601, - "grad_norm": 1.7069586515426636, - "learning_rate": 9.221909547738693e-05, - "loss": 5.781, - "step": 8244 - }, - { - "epoch": 4.29986962190352, - "grad_norm": 1.4463789463043213, - "learning_rate": 9.221809045226131e-05, - "loss": 5.7591, - "step": 8245 - }, - { - "epoch": 4.300391134289439, - "grad_norm": 1.6459046602249146, - "learning_rate": 9.221708542713569e-05, - "loss": 5.2271, - "step": 8246 - }, - { - "epoch": 4.300912646675359, - "grad_norm": 1.41484797000885, - "learning_rate": 9.221608040201005e-05, - "loss": 6.163, - "step": 8247 - }, - { - "epoch": 4.3014341590612775, - "grad_norm": 1.3951733112335205, - "learning_rate": 9.221507537688443e-05, - "loss": 5.7739, - "step": 8248 - }, - { - "epoch": 4.301955671447197, - "grad_norm": 1.483382225036621, - "learning_rate": 9.22140703517588e-05, - "loss": 5.7216, - "step": 8249 - }, - { - "epoch": 4.302477183833116, - "grad_norm": 1.5166797637939453, - "learning_rate": 9.221306532663317e-05, - "loss": 5.828, - "step": 8250 - }, - { - "epoch": 4.302998696219035, - "grad_norm": 1.7322787046432495, - "learning_rate": 9.221206030150754e-05, - "loss": 5.1868, - "step": 8251 - }, - { - "epoch": 4.303520208604954, - "grad_norm": 1.4843647480010986, - "learning_rate": 9.221105527638192e-05, - "loss": 5.3207, - "step": 8252 - }, - { - "epoch": 4.304041720990874, - "grad_norm": 1.5308222770690918, - "learning_rate": 9.221005025125628e-05, - "loss": 5.4294, - "step": 8253 - }, - { - "epoch": 4.3045632333767925, - "grad_norm": 1.370050311088562, - "learning_rate": 9.220904522613066e-05, - "loss": 5.8593, - "step": 8254 - }, - { - "epoch": 4.305084745762712, - "grad_norm": 1.439728021621704, - "learning_rate": 9.220804020100504e-05, - "loss": 5.2714, - "step": 8255 - }, - { - "epoch": 4.305606258148631, - "grad_norm": 1.3228998184204102, - "learning_rate": 9.22070351758794e-05, - "loss": 5.8065, - "step": 8256 - }, - { - "epoch": 4.30612777053455, - "grad_norm": 1.4349212646484375, - "learning_rate": 9.220603015075378e-05, - "loss": 5.8353, - "step": 8257 - }, - { - "epoch": 4.3066492829204694, - "grad_norm": 1.3288393020629883, - "learning_rate": 9.220502512562814e-05, - "loss": 6.1164, - "step": 8258 - }, - { - "epoch": 4.307170795306389, - "grad_norm": 1.3884156942367554, - "learning_rate": 9.220402010050252e-05, - "loss": 5.8933, - "step": 8259 - }, - { - "epoch": 4.3076923076923075, - "grad_norm": 1.8922733068466187, - "learning_rate": 9.220301507537689e-05, - "loss": 5.9789, - "step": 8260 - }, - { - "epoch": 4.308213820078227, - "grad_norm": 1.3610353469848633, - "learning_rate": 9.220201005025126e-05, - "loss": 5.9777, - "step": 8261 - }, - { - "epoch": 4.308735332464146, - "grad_norm": 1.5024555921554565, - "learning_rate": 9.220100502512563e-05, - "loss": 5.9569, - "step": 8262 - }, - { - "epoch": 4.309256844850065, - "grad_norm": 1.4392033815383911, - "learning_rate": 9.22e-05, - "loss": 5.6902, - "step": 8263 - }, - { - "epoch": 4.3097783572359845, - "grad_norm": 1.544990062713623, - "learning_rate": 9.219899497487437e-05, - "loss": 5.5373, - "step": 8264 - }, - { - "epoch": 4.310299869621904, - "grad_norm": 1.5070922374725342, - "learning_rate": 9.219798994974875e-05, - "loss": 5.6636, - "step": 8265 - }, - { - "epoch": 4.3108213820078225, - "grad_norm": 1.4082627296447754, - "learning_rate": 9.219698492462312e-05, - "loss": 5.9458, - "step": 8266 - }, - { - "epoch": 4.311342894393742, - "grad_norm": 1.4262490272521973, - "learning_rate": 9.219597989949749e-05, - "loss": 5.9827, - "step": 8267 - }, - { - "epoch": 4.311864406779661, - "grad_norm": 1.4335191249847412, - "learning_rate": 9.219497487437187e-05, - "loss": 5.4422, - "step": 8268 - }, - { - "epoch": 4.31238591916558, - "grad_norm": 1.3269152641296387, - "learning_rate": 9.219396984924623e-05, - "loss": 5.6946, - "step": 8269 - }, - { - "epoch": 4.3129074315514995, - "grad_norm": 1.596197485923767, - "learning_rate": 9.219296482412061e-05, - "loss": 5.2704, - "step": 8270 - }, - { - "epoch": 4.313428943937419, - "grad_norm": 1.4878579378128052, - "learning_rate": 9.219195979899497e-05, - "loss": 5.6245, - "step": 8271 - }, - { - "epoch": 4.3139504563233375, - "grad_norm": 1.4377573728561401, - "learning_rate": 9.219095477386935e-05, - "loss": 5.5765, - "step": 8272 - }, - { - "epoch": 4.314471968709257, - "grad_norm": 1.7823678255081177, - "learning_rate": 9.218994974874372e-05, - "loss": 4.9428, - "step": 8273 - }, - { - "epoch": 4.3149934810951756, - "grad_norm": 1.48689603805542, - "learning_rate": 9.21889447236181e-05, - "loss": 5.6034, - "step": 8274 - }, - { - "epoch": 4.315514993481095, - "grad_norm": 1.5591906309127808, - "learning_rate": 9.218793969849247e-05, - "loss": 5.7031, - "step": 8275 - }, - { - "epoch": 4.3160365058670145, - "grad_norm": 1.5318689346313477, - "learning_rate": 9.218693467336685e-05, - "loss": 5.032, - "step": 8276 - }, - { - "epoch": 4.316558018252934, - "grad_norm": 1.5653507709503174, - "learning_rate": 9.218592964824121e-05, - "loss": 5.8399, - "step": 8277 - }, - { - "epoch": 4.3170795306388525, - "grad_norm": 1.4897016286849976, - "learning_rate": 9.218492462311559e-05, - "loss": 5.8772, - "step": 8278 - }, - { - "epoch": 4.317601043024772, - "grad_norm": 1.5363280773162842, - "learning_rate": 9.218391959798996e-05, - "loss": 5.0585, - "step": 8279 - }, - { - "epoch": 4.318122555410691, - "grad_norm": 1.4643422365188599, - "learning_rate": 9.218291457286432e-05, - "loss": 5.839, - "step": 8280 - }, - { - "epoch": 4.31864406779661, - "grad_norm": 1.350993037223816, - "learning_rate": 9.21819095477387e-05, - "loss": 5.4726, - "step": 8281 - }, - { - "epoch": 4.3191655801825295, - "grad_norm": 1.6100324392318726, - "learning_rate": 9.218090452261306e-05, - "loss": 5.5356, - "step": 8282 - }, - { - "epoch": 4.319687092568449, - "grad_norm": 1.4671727418899536, - "learning_rate": 9.217989949748744e-05, - "loss": 5.4441, - "step": 8283 - }, - { - "epoch": 4.3202086049543675, - "grad_norm": 1.7861403226852417, - "learning_rate": 9.21788944723618e-05, - "loss": 5.6657, - "step": 8284 - }, - { - "epoch": 4.320730117340287, - "grad_norm": 1.4524741172790527, - "learning_rate": 9.217788944723618e-05, - "loss": 5.9449, - "step": 8285 - }, - { - "epoch": 4.321251629726206, - "grad_norm": 1.5441502332687378, - "learning_rate": 9.217688442211056e-05, - "loss": 5.8028, - "step": 8286 - }, - { - "epoch": 4.321773142112125, - "grad_norm": 1.441234827041626, - "learning_rate": 9.217587939698494e-05, - "loss": 5.4962, - "step": 8287 - }, - { - "epoch": 4.3222946544980445, - "grad_norm": 1.4963382482528687, - "learning_rate": 9.21748743718593e-05, - "loss": 5.8134, - "step": 8288 - }, - { - "epoch": 4.322816166883963, - "grad_norm": 1.4531426429748535, - "learning_rate": 9.217386934673368e-05, - "loss": 5.5566, - "step": 8289 - }, - { - "epoch": 4.3233376792698825, - "grad_norm": 1.3265399932861328, - "learning_rate": 9.217286432160804e-05, - "loss": 5.8595, - "step": 8290 - }, - { - "epoch": 4.323859191655802, - "grad_norm": 1.561715841293335, - "learning_rate": 9.217185929648242e-05, - "loss": 5.337, - "step": 8291 - }, - { - "epoch": 4.324380704041721, - "grad_norm": 1.500207543373108, - "learning_rate": 9.217085427135679e-05, - "loss": 5.9507, - "step": 8292 - }, - { - "epoch": 4.32490221642764, - "grad_norm": 1.454492449760437, - "learning_rate": 9.216984924623115e-05, - "loss": 5.8808, - "step": 8293 - }, - { - "epoch": 4.3254237288135595, - "grad_norm": 1.433907389640808, - "learning_rate": 9.216884422110553e-05, - "loss": 5.4905, - "step": 8294 - }, - { - "epoch": 4.325945241199478, - "grad_norm": 1.5488888025283813, - "learning_rate": 9.21678391959799e-05, - "loss": 5.8872, - "step": 8295 - }, - { - "epoch": 4.326466753585398, - "grad_norm": 1.6049820184707642, - "learning_rate": 9.216683417085428e-05, - "loss": 5.4669, - "step": 8296 - }, - { - "epoch": 4.326988265971317, - "grad_norm": 1.405737042427063, - "learning_rate": 9.216582914572865e-05, - "loss": 5.8427, - "step": 8297 - }, - { - "epoch": 4.327509778357236, - "grad_norm": 1.5014399290084839, - "learning_rate": 9.216482412060303e-05, - "loss": 5.7054, - "step": 8298 - }, - { - "epoch": 4.328031290743155, - "grad_norm": 1.642096996307373, - "learning_rate": 9.216381909547739e-05, - "loss": 5.5729, - "step": 8299 - }, - { - "epoch": 4.3285528031290745, - "grad_norm": 1.4084992408752441, - "learning_rate": 9.216281407035177e-05, - "loss": 5.9822, - "step": 8300 - }, - { - "epoch": 4.329074315514993, - "grad_norm": 1.5618395805358887, - "learning_rate": 9.216180904522613e-05, - "loss": 5.9319, - "step": 8301 - }, - { - "epoch": 4.329595827900913, - "grad_norm": 1.4172029495239258, - "learning_rate": 9.216080402010051e-05, - "loss": 5.7945, - "step": 8302 - }, - { - "epoch": 4.330117340286832, - "grad_norm": 1.3057911396026611, - "learning_rate": 9.215979899497487e-05, - "loss": 6.0817, - "step": 8303 - }, - { - "epoch": 4.330638852672751, - "grad_norm": 1.4400438070297241, - "learning_rate": 9.215879396984925e-05, - "loss": 5.963, - "step": 8304 - }, - { - "epoch": 4.33116036505867, - "grad_norm": 1.3010063171386719, - "learning_rate": 9.215778894472362e-05, - "loss": 5.8811, - "step": 8305 - }, - { - "epoch": 4.3316818774445895, - "grad_norm": 1.3969155550003052, - "learning_rate": 9.2156783919598e-05, - "loss": 5.9768, - "step": 8306 - }, - { - "epoch": 4.332203389830508, - "grad_norm": 1.3720359802246094, - "learning_rate": 9.215577889447237e-05, - "loss": 5.6884, - "step": 8307 - }, - { - "epoch": 4.332724902216428, - "grad_norm": 1.526016116142273, - "learning_rate": 9.215477386934674e-05, - "loss": 5.4623, - "step": 8308 - }, - { - "epoch": 4.333246414602347, - "grad_norm": 1.5156179666519165, - "learning_rate": 9.215376884422111e-05, - "loss": 5.4928, - "step": 8309 - }, - { - "epoch": 4.333767926988266, - "grad_norm": 1.4007017612457275, - "learning_rate": 9.215276381909548e-05, - "loss": 5.6169, - "step": 8310 - }, - { - "epoch": 4.334289439374185, - "grad_norm": 1.4022386074066162, - "learning_rate": 9.215175879396986e-05, - "loss": 5.9981, - "step": 8311 - }, - { - "epoch": 4.334810951760105, - "grad_norm": 1.5535482168197632, - "learning_rate": 9.215075376884422e-05, - "loss": 5.5519, - "step": 8312 - }, - { - "epoch": 4.335332464146023, - "grad_norm": 1.3952038288116455, - "learning_rate": 9.21497487437186e-05, - "loss": 5.9426, - "step": 8313 - }, - { - "epoch": 4.335853976531943, - "grad_norm": 1.3793989419937134, - "learning_rate": 9.214874371859296e-05, - "loss": 5.999, - "step": 8314 - }, - { - "epoch": 4.336375488917862, - "grad_norm": 1.4024592638015747, - "learning_rate": 9.214773869346734e-05, - "loss": 6.0287, - "step": 8315 - }, - { - "epoch": 4.336897001303781, - "grad_norm": 1.5889145135879517, - "learning_rate": 9.214673366834172e-05, - "loss": 5.5329, - "step": 8316 - }, - { - "epoch": 4.3374185136897, - "grad_norm": 1.406420350074768, - "learning_rate": 9.21457286432161e-05, - "loss": 5.6124, - "step": 8317 - }, - { - "epoch": 4.33794002607562, - "grad_norm": 1.611305832862854, - "learning_rate": 9.214472361809046e-05, - "loss": 5.2362, - "step": 8318 - }, - { - "epoch": 4.338461538461538, - "grad_norm": 1.5082974433898926, - "learning_rate": 9.214371859296484e-05, - "loss": 5.7524, - "step": 8319 - }, - { - "epoch": 4.338983050847458, - "grad_norm": 1.4624544382095337, - "learning_rate": 9.21427135678392e-05, - "loss": 5.2791, - "step": 8320 - }, - { - "epoch": 4.339504563233377, - "grad_norm": 1.4520232677459717, - "learning_rate": 9.214170854271357e-05, - "loss": 5.7896, - "step": 8321 - }, - { - "epoch": 4.340026075619296, - "grad_norm": 1.5389471054077148, - "learning_rate": 9.214070351758794e-05, - "loss": 5.5861, - "step": 8322 - }, - { - "epoch": 4.340547588005215, - "grad_norm": 1.5276226997375488, - "learning_rate": 9.213969849246231e-05, - "loss": 5.0941, - "step": 8323 - }, - { - "epoch": 4.341069100391135, - "grad_norm": 1.5981365442276, - "learning_rate": 9.213869346733669e-05, - "loss": 5.8451, - "step": 8324 - }, - { - "epoch": 4.341590612777053, - "grad_norm": 1.3374897241592407, - "learning_rate": 9.213768844221105e-05, - "loss": 6.0184, - "step": 8325 - }, - { - "epoch": 4.342112125162973, - "grad_norm": 1.3429638147354126, - "learning_rate": 9.213668341708543e-05, - "loss": 5.8044, - "step": 8326 - }, - { - "epoch": 4.342633637548892, - "grad_norm": 1.4750995635986328, - "learning_rate": 9.21356783919598e-05, - "loss": 5.4614, - "step": 8327 - }, - { - "epoch": 4.343155149934811, - "grad_norm": 1.5890671014785767, - "learning_rate": 9.213467336683418e-05, - "loss": 5.8785, - "step": 8328 - }, - { - "epoch": 4.34367666232073, - "grad_norm": 1.6134897470474243, - "learning_rate": 9.213366834170855e-05, - "loss": 5.6074, - "step": 8329 - }, - { - "epoch": 4.34419817470665, - "grad_norm": 1.398579716682434, - "learning_rate": 9.213266331658293e-05, - "loss": 5.3171, - "step": 8330 - }, - { - "epoch": 4.344719687092568, - "grad_norm": 1.5976992845535278, - "learning_rate": 9.213165829145729e-05, - "loss": 5.8038, - "step": 8331 - }, - { - "epoch": 4.345241199478488, - "grad_norm": 1.424699068069458, - "learning_rate": 9.213065326633167e-05, - "loss": 5.6848, - "step": 8332 - }, - { - "epoch": 4.345762711864407, - "grad_norm": 1.4674835205078125, - "learning_rate": 9.212964824120603e-05, - "loss": 5.7585, - "step": 8333 - }, - { - "epoch": 4.346284224250326, - "grad_norm": 1.492355227470398, - "learning_rate": 9.21286432160804e-05, - "loss": 5.9154, - "step": 8334 - }, - { - "epoch": 4.346805736636245, - "grad_norm": 1.5886064767837524, - "learning_rate": 9.212763819095477e-05, - "loss": 5.5573, - "step": 8335 - }, - { - "epoch": 4.347327249022165, - "grad_norm": 1.3893243074417114, - "learning_rate": 9.212663316582915e-05, - "loss": 5.8334, - "step": 8336 - }, - { - "epoch": 4.347848761408083, - "grad_norm": 1.530548095703125, - "learning_rate": 9.212562814070353e-05, - "loss": 5.563, - "step": 8337 - }, - { - "epoch": 4.348370273794003, - "grad_norm": 1.5304397344589233, - "learning_rate": 9.21246231155779e-05, - "loss": 5.6397, - "step": 8338 - }, - { - "epoch": 4.348891786179922, - "grad_norm": 1.4951177835464478, - "learning_rate": 9.212361809045227e-05, - "loss": 5.7045, - "step": 8339 - }, - { - "epoch": 4.349413298565841, - "grad_norm": 1.5254300832748413, - "learning_rate": 9.212261306532664e-05, - "loss": 5.2991, - "step": 8340 - }, - { - "epoch": 4.34993481095176, - "grad_norm": 1.5525823831558228, - "learning_rate": 9.212160804020101e-05, - "loss": 5.7492, - "step": 8341 - }, - { - "epoch": 4.35045632333768, - "grad_norm": 1.5241031646728516, - "learning_rate": 9.212060301507538e-05, - "loss": 5.3788, - "step": 8342 - }, - { - "epoch": 4.350977835723598, - "grad_norm": 1.4791374206542969, - "learning_rate": 9.211959798994976e-05, - "loss": 5.7003, - "step": 8343 - }, - { - "epoch": 4.351499348109518, - "grad_norm": 1.3853644132614136, - "learning_rate": 9.211859296482412e-05, - "loss": 5.5897, - "step": 8344 - }, - { - "epoch": 4.352020860495437, - "grad_norm": 1.4254686832427979, - "learning_rate": 9.21175879396985e-05, - "loss": 5.8085, - "step": 8345 - }, - { - "epoch": 4.352542372881356, - "grad_norm": 1.519871711730957, - "learning_rate": 9.211658291457286e-05, - "loss": 5.6836, - "step": 8346 - }, - { - "epoch": 4.353063885267275, - "grad_norm": 1.4386110305786133, - "learning_rate": 9.211557788944724e-05, - "loss": 5.8079, - "step": 8347 - }, - { - "epoch": 4.353585397653195, - "grad_norm": 1.5834639072418213, - "learning_rate": 9.211457286432162e-05, - "loss": 5.5403, - "step": 8348 - }, - { - "epoch": 4.354106910039113, - "grad_norm": 1.3099068403244019, - "learning_rate": 9.211356783919598e-05, - "loss": 6.0781, - "step": 8349 - }, - { - "epoch": 4.354628422425033, - "grad_norm": 1.4098165035247803, - "learning_rate": 9.211256281407036e-05, - "loss": 5.5207, - "step": 8350 - }, - { - "epoch": 4.355149934810952, - "grad_norm": 1.5285649299621582, - "learning_rate": 9.211155778894473e-05, - "loss": 5.6153, - "step": 8351 - }, - { - "epoch": 4.355671447196871, - "grad_norm": 1.410719394683838, - "learning_rate": 9.21105527638191e-05, - "loss": 5.8071, - "step": 8352 - }, - { - "epoch": 4.35619295958279, - "grad_norm": 1.394924521446228, - "learning_rate": 9.210954773869347e-05, - "loss": 5.8622, - "step": 8353 - }, - { - "epoch": 4.35671447196871, - "grad_norm": 1.5606770515441895, - "learning_rate": 9.210854271356785e-05, - "loss": 5.902, - "step": 8354 - }, - { - "epoch": 4.357235984354628, - "grad_norm": 1.6630443334579468, - "learning_rate": 9.210753768844221e-05, - "loss": 5.5758, - "step": 8355 - }, - { - "epoch": 4.357757496740548, - "grad_norm": 1.7421988248825073, - "learning_rate": 9.210653266331659e-05, - "loss": 5.5141, - "step": 8356 - }, - { - "epoch": 4.358279009126467, - "grad_norm": 1.5668036937713623, - "learning_rate": 9.210552763819095e-05, - "loss": 5.7357, - "step": 8357 - }, - { - "epoch": 4.358800521512386, - "grad_norm": 1.4011017084121704, - "learning_rate": 9.210452261306533e-05, - "loss": 5.433, - "step": 8358 - }, - { - "epoch": 4.359322033898305, - "grad_norm": 1.5381667613983154, - "learning_rate": 9.210351758793971e-05, - "loss": 5.9227, - "step": 8359 - }, - { - "epoch": 4.359843546284225, - "grad_norm": 1.486783742904663, - "learning_rate": 9.210251256281407e-05, - "loss": 5.8329, - "step": 8360 - }, - { - "epoch": 4.360365058670143, - "grad_norm": 1.5890350341796875, - "learning_rate": 9.210150753768845e-05, - "loss": 5.393, - "step": 8361 - }, - { - "epoch": 4.360886571056063, - "grad_norm": 1.354018211364746, - "learning_rate": 9.210050251256281e-05, - "loss": 5.428, - "step": 8362 - }, - { - "epoch": 4.361408083441981, - "grad_norm": 1.4974853992462158, - "learning_rate": 9.209949748743719e-05, - "loss": 5.8411, - "step": 8363 - }, - { - "epoch": 4.361929595827901, - "grad_norm": 1.6185853481292725, - "learning_rate": 9.209849246231156e-05, - "loss": 4.7514, - "step": 8364 - }, - { - "epoch": 4.36245110821382, - "grad_norm": 1.6668237447738647, - "learning_rate": 9.209748743718593e-05, - "loss": 5.3646, - "step": 8365 - }, - { - "epoch": 4.36297262059974, - "grad_norm": 1.7495858669281006, - "learning_rate": 9.20964824120603e-05, - "loss": 5.6432, - "step": 8366 - }, - { - "epoch": 4.363494132985658, - "grad_norm": 1.4996405839920044, - "learning_rate": 9.209547738693468e-05, - "loss": 5.3552, - "step": 8367 - }, - { - "epoch": 4.364015645371578, - "grad_norm": 1.5243061780929565, - "learning_rate": 9.209447236180905e-05, - "loss": 5.5889, - "step": 8368 - }, - { - "epoch": 4.364537157757496, - "grad_norm": 1.3953418731689453, - "learning_rate": 9.209346733668343e-05, - "loss": 5.8574, - "step": 8369 - }, - { - "epoch": 4.365058670143416, - "grad_norm": 1.4753308296203613, - "learning_rate": 9.20924623115578e-05, - "loss": 5.7372, - "step": 8370 - }, - { - "epoch": 4.365580182529335, - "grad_norm": 1.351660966873169, - "learning_rate": 9.209145728643217e-05, - "loss": 4.9903, - "step": 8371 - }, - { - "epoch": 4.366101694915255, - "grad_norm": 1.3709269762039185, - "learning_rate": 9.209045226130654e-05, - "loss": 5.9865, - "step": 8372 - }, - { - "epoch": 4.366623207301173, - "grad_norm": 1.3364819288253784, - "learning_rate": 9.20894472361809e-05, - "loss": 5.7665, - "step": 8373 - }, - { - "epoch": 4.367144719687093, - "grad_norm": 1.4243733882904053, - "learning_rate": 9.208844221105528e-05, - "loss": 5.3551, - "step": 8374 - }, - { - "epoch": 4.367666232073011, - "grad_norm": 1.391890525817871, - "learning_rate": 9.208743718592964e-05, - "loss": 6.086, - "step": 8375 - }, - { - "epoch": 4.368187744458931, - "grad_norm": 1.4085190296173096, - "learning_rate": 9.208643216080402e-05, - "loss": 5.7305, - "step": 8376 - }, - { - "epoch": 4.36870925684485, - "grad_norm": 1.4323530197143555, - "learning_rate": 9.208542713567839e-05, - "loss": 5.4991, - "step": 8377 - }, - { - "epoch": 4.36923076923077, - "grad_norm": 1.4663281440734863, - "learning_rate": 9.208442211055276e-05, - "loss": 5.7581, - "step": 8378 - }, - { - "epoch": 4.369752281616688, - "grad_norm": 1.542172908782959, - "learning_rate": 9.208341708542714e-05, - "loss": 5.6001, - "step": 8379 - }, - { - "epoch": 4.370273794002608, - "grad_norm": 1.4254447221755981, - "learning_rate": 9.208241206030152e-05, - "loss": 6.0444, - "step": 8380 - }, - { - "epoch": 4.370795306388526, - "grad_norm": 1.5768224000930786, - "learning_rate": 9.208140703517588e-05, - "loss": 5.5014, - "step": 8381 - }, - { - "epoch": 4.371316818774446, - "grad_norm": 1.4610263109207153, - "learning_rate": 9.208040201005026e-05, - "loss": 5.3423, - "step": 8382 - }, - { - "epoch": 4.371838331160365, - "grad_norm": 1.477096438407898, - "learning_rate": 9.207939698492463e-05, - "loss": 5.5269, - "step": 8383 - }, - { - "epoch": 4.372359843546284, - "grad_norm": 1.5048654079437256, - "learning_rate": 9.2078391959799e-05, - "loss": 5.8143, - "step": 8384 - }, - { - "epoch": 4.372881355932203, - "grad_norm": 1.406623125076294, - "learning_rate": 9.207738693467337e-05, - "loss": 5.9251, - "step": 8385 - }, - { - "epoch": 4.373402868318123, - "grad_norm": 1.4663423299789429, - "learning_rate": 9.207638190954773e-05, - "loss": 5.3457, - "step": 8386 - }, - { - "epoch": 4.373924380704041, - "grad_norm": 1.526286005973816, - "learning_rate": 9.207537688442211e-05, - "loss": 5.7158, - "step": 8387 - }, - { - "epoch": 4.374445893089961, - "grad_norm": 1.4195431470870972, - "learning_rate": 9.207437185929649e-05, - "loss": 5.9976, - "step": 8388 - }, - { - "epoch": 4.37496740547588, - "grad_norm": 1.8005800247192383, - "learning_rate": 9.207336683417087e-05, - "loss": 5.5613, - "step": 8389 - }, - { - "epoch": 4.375488917861799, - "grad_norm": 1.5153237581253052, - "learning_rate": 9.207236180904523e-05, - "loss": 5.5277, - "step": 8390 - }, - { - "epoch": 4.376010430247718, - "grad_norm": 1.590010166168213, - "learning_rate": 9.207135678391961e-05, - "loss": 5.2405, - "step": 8391 - }, - { - "epoch": 4.376531942633638, - "grad_norm": 1.463494062423706, - "learning_rate": 9.207035175879397e-05, - "loss": 5.9051, - "step": 8392 - }, - { - "epoch": 4.377053455019556, - "grad_norm": 1.7580204010009766, - "learning_rate": 9.206934673366835e-05, - "loss": 5.3555, - "step": 8393 - }, - { - "epoch": 4.377574967405476, - "grad_norm": 1.3823187351226807, - "learning_rate": 9.206834170854271e-05, - "loss": 5.576, - "step": 8394 - }, - { - "epoch": 4.378096479791395, - "grad_norm": 1.4613856077194214, - "learning_rate": 9.206733668341709e-05, - "loss": 5.8373, - "step": 8395 - }, - { - "epoch": 4.378617992177314, - "grad_norm": 1.7275230884552002, - "learning_rate": 9.206633165829146e-05, - "loss": 5.4033, - "step": 8396 - }, - { - "epoch": 4.379139504563233, - "grad_norm": 1.3858929872512817, - "learning_rate": 9.206532663316583e-05, - "loss": 5.7299, - "step": 8397 - }, - { - "epoch": 4.379661016949153, - "grad_norm": 1.3780758380889893, - "learning_rate": 9.20643216080402e-05, - "loss": 5.643, - "step": 8398 - }, - { - "epoch": 4.380182529335071, - "grad_norm": 1.374350666999817, - "learning_rate": 9.206331658291458e-05, - "loss": 5.8481, - "step": 8399 - }, - { - "epoch": 4.380704041720991, - "grad_norm": 1.5050560235977173, - "learning_rate": 9.206231155778895e-05, - "loss": 5.2694, - "step": 8400 - }, - { - "epoch": 4.38122555410691, - "grad_norm": 1.3673675060272217, - "learning_rate": 9.206130653266332e-05, - "loss": 4.9404, - "step": 8401 - }, - { - "epoch": 4.381747066492829, - "grad_norm": 1.3309860229492188, - "learning_rate": 9.20603015075377e-05, - "loss": 5.8157, - "step": 8402 - }, - { - "epoch": 4.382268578878748, - "grad_norm": 1.5175151824951172, - "learning_rate": 9.205929648241206e-05, - "loss": 5.4714, - "step": 8403 - }, - { - "epoch": 4.382790091264668, - "grad_norm": 1.5734177827835083, - "learning_rate": 9.205829145728644e-05, - "loss": 5.0133, - "step": 8404 - }, - { - "epoch": 4.383311603650586, - "grad_norm": 1.4373915195465088, - "learning_rate": 9.20572864321608e-05, - "loss": 5.6947, - "step": 8405 - }, - { - "epoch": 4.383833116036506, - "grad_norm": 1.4614757299423218, - "learning_rate": 9.205628140703518e-05, - "loss": 5.6787, - "step": 8406 - }, - { - "epoch": 4.384354628422425, - "grad_norm": 1.3880873918533325, - "learning_rate": 9.205527638190954e-05, - "loss": 5.4356, - "step": 8407 - }, - { - "epoch": 4.384876140808344, - "grad_norm": 1.4495278596878052, - "learning_rate": 9.205427135678392e-05, - "loss": 6.1845, - "step": 8408 - }, - { - "epoch": 4.385397653194263, - "grad_norm": 1.4518636465072632, - "learning_rate": 9.20532663316583e-05, - "loss": 5.6366, - "step": 8409 - }, - { - "epoch": 4.385919165580183, - "grad_norm": 1.5414338111877441, - "learning_rate": 9.205226130653268e-05, - "loss": 5.6159, - "step": 8410 - }, - { - "epoch": 4.386440677966101, - "grad_norm": 1.560157299041748, - "learning_rate": 9.205125628140704e-05, - "loss": 5.6532, - "step": 8411 - }, - { - "epoch": 4.386962190352021, - "grad_norm": 1.695234775543213, - "learning_rate": 9.205025125628142e-05, - "loss": 5.7587, - "step": 8412 - }, - { - "epoch": 4.38748370273794, - "grad_norm": 1.6355156898498535, - "learning_rate": 9.204924623115578e-05, - "loss": 5.7145, - "step": 8413 - }, - { - "epoch": 4.388005215123859, - "grad_norm": 1.4882831573486328, - "learning_rate": 9.204824120603015e-05, - "loss": 5.1573, - "step": 8414 - }, - { - "epoch": 4.388526727509778, - "grad_norm": 1.3983027935028076, - "learning_rate": 9.204723618090453e-05, - "loss": 5.6067, - "step": 8415 - }, - { - "epoch": 4.389048239895698, - "grad_norm": 1.521182656288147, - "learning_rate": 9.204623115577889e-05, - "loss": 5.9549, - "step": 8416 - }, - { - "epoch": 4.389569752281616, - "grad_norm": 1.403138518333435, - "learning_rate": 9.204522613065327e-05, - "loss": 5.6383, - "step": 8417 - }, - { - "epoch": 4.390091264667536, - "grad_norm": 1.5549063682556152, - "learning_rate": 9.204422110552763e-05, - "loss": 5.5345, - "step": 8418 - }, - { - "epoch": 4.390612777053455, - "grad_norm": 1.3787462711334229, - "learning_rate": 9.204321608040201e-05, - "loss": 5.7699, - "step": 8419 - }, - { - "epoch": 4.391134289439374, - "grad_norm": 2.512211799621582, - "learning_rate": 9.204221105527639e-05, - "loss": 4.9544, - "step": 8420 - }, - { - "epoch": 4.391655801825293, - "grad_norm": 1.5605902671813965, - "learning_rate": 9.204120603015077e-05, - "loss": 5.5965, - "step": 8421 - }, - { - "epoch": 4.392177314211213, - "grad_norm": 1.5183457136154175, - "learning_rate": 9.204020100502513e-05, - "loss": 5.7996, - "step": 8422 - }, - { - "epoch": 4.392698826597131, - "grad_norm": 1.361090064048767, - "learning_rate": 9.203919597989951e-05, - "loss": 6.0559, - "step": 8423 - }, - { - "epoch": 4.393220338983051, - "grad_norm": 1.4565188884735107, - "learning_rate": 9.203819095477387e-05, - "loss": 5.434, - "step": 8424 - }, - { - "epoch": 4.39374185136897, - "grad_norm": 1.4279438257217407, - "learning_rate": 9.203718592964825e-05, - "loss": 5.9133, - "step": 8425 - }, - { - "epoch": 4.394263363754889, - "grad_norm": 1.3095670938491821, - "learning_rate": 9.203618090452262e-05, - "loss": 6.0918, - "step": 8426 - }, - { - "epoch": 4.394784876140808, - "grad_norm": 1.5468206405639648, - "learning_rate": 9.203517587939698e-05, - "loss": 5.5588, - "step": 8427 - }, - { - "epoch": 4.395306388526728, - "grad_norm": 1.5258855819702148, - "learning_rate": 9.203417085427136e-05, - "loss": 5.4129, - "step": 8428 - }, - { - "epoch": 4.395827900912646, - "grad_norm": 1.4535568952560425, - "learning_rate": 9.203316582914574e-05, - "loss": 5.5887, - "step": 8429 - }, - { - "epoch": 4.396349413298566, - "grad_norm": 1.3691370487213135, - "learning_rate": 9.203216080402011e-05, - "loss": 5.8739, - "step": 8430 - }, - { - "epoch": 4.396870925684485, - "grad_norm": 1.5456101894378662, - "learning_rate": 9.203115577889448e-05, - "loss": 5.6534, - "step": 8431 - }, - { - "epoch": 4.397392438070404, - "grad_norm": 1.592221736907959, - "learning_rate": 9.203015075376885e-05, - "loss": 4.8166, - "step": 8432 - }, - { - "epoch": 4.397913950456323, - "grad_norm": 1.545013189315796, - "learning_rate": 9.202914572864322e-05, - "loss": 5.7012, - "step": 8433 - }, - { - "epoch": 4.398435462842243, - "grad_norm": 1.4341459274291992, - "learning_rate": 9.20281407035176e-05, - "loss": 5.5957, - "step": 8434 - }, - { - "epoch": 4.398956975228161, - "grad_norm": 1.4897041320800781, - "learning_rate": 9.202713567839196e-05, - "loss": 5.5212, - "step": 8435 - }, - { - "epoch": 4.399478487614081, - "grad_norm": 1.4104868173599243, - "learning_rate": 9.202613065326634e-05, - "loss": 5.3544, - "step": 8436 - }, - { - "epoch": 4.4, - "grad_norm": 1.4893500804901123, - "learning_rate": 9.20251256281407e-05, - "loss": 5.6922, - "step": 8437 - }, - { - "epoch": 4.400521512385919, - "grad_norm": 1.5361658334732056, - "learning_rate": 9.202412060301508e-05, - "loss": 5.4917, - "step": 8438 - }, - { - "epoch": 4.401043024771838, - "grad_norm": 1.4369691610336304, - "learning_rate": 9.202311557788945e-05, - "loss": 5.7396, - "step": 8439 - }, - { - "epoch": 4.401564537157758, - "grad_norm": 1.6548476219177246, - "learning_rate": 9.202211055276382e-05, - "loss": 5.4319, - "step": 8440 - }, - { - "epoch": 4.402086049543676, - "grad_norm": 1.457098126411438, - "learning_rate": 9.20211055276382e-05, - "loss": 5.8626, - "step": 8441 - }, - { - "epoch": 4.402607561929596, - "grad_norm": 1.4279136657714844, - "learning_rate": 9.202010050251257e-05, - "loss": 5.7792, - "step": 8442 - }, - { - "epoch": 4.403129074315515, - "grad_norm": 1.5216660499572754, - "learning_rate": 9.201909547738694e-05, - "loss": 5.8591, - "step": 8443 - }, - { - "epoch": 4.403650586701434, - "grad_norm": 1.7144232988357544, - "learning_rate": 9.201809045226131e-05, - "loss": 4.9994, - "step": 8444 - }, - { - "epoch": 4.404172099087353, - "grad_norm": 1.5926251411437988, - "learning_rate": 9.201708542713569e-05, - "loss": 5.5904, - "step": 8445 - }, - { - "epoch": 4.404693611473273, - "grad_norm": 1.6200439929962158, - "learning_rate": 9.201608040201005e-05, - "loss": 5.2469, - "step": 8446 - }, - { - "epoch": 4.4052151238591915, - "grad_norm": 1.4537465572357178, - "learning_rate": 9.201507537688443e-05, - "loss": 5.9686, - "step": 8447 - }, - { - "epoch": 4.405736636245111, - "grad_norm": 1.345384955406189, - "learning_rate": 9.201407035175879e-05, - "loss": 6.07, - "step": 8448 - }, - { - "epoch": 4.40625814863103, - "grad_norm": 1.4377410411834717, - "learning_rate": 9.201306532663317e-05, - "loss": 5.953, - "step": 8449 - }, - { - "epoch": 4.406779661016949, - "grad_norm": 1.4386775493621826, - "learning_rate": 9.201206030150755e-05, - "loss": 5.8901, - "step": 8450 - }, - { - "epoch": 4.407301173402868, - "grad_norm": 1.4346686601638794, - "learning_rate": 9.201105527638193e-05, - "loss": 6.0064, - "step": 8451 - }, - { - "epoch": 4.407822685788788, - "grad_norm": 1.4705733060836792, - "learning_rate": 9.201005025125629e-05, - "loss": 5.8569, - "step": 8452 - }, - { - "epoch": 4.4083441981747065, - "grad_norm": 1.4149116277694702, - "learning_rate": 9.200904522613065e-05, - "loss": 5.9548, - "step": 8453 - }, - { - "epoch": 4.408865710560626, - "grad_norm": 1.5103158950805664, - "learning_rate": 9.200804020100503e-05, - "loss": 5.9015, - "step": 8454 - }, - { - "epoch": 4.409387222946545, - "grad_norm": 1.4623289108276367, - "learning_rate": 9.20070351758794e-05, - "loss": 5.5835, - "step": 8455 - }, - { - "epoch": 4.409908735332464, - "grad_norm": 1.57591712474823, - "learning_rate": 9.200603015075377e-05, - "loss": 5.148, - "step": 8456 - }, - { - "epoch": 4.410430247718383, - "grad_norm": 1.4319919347763062, - "learning_rate": 9.200502512562814e-05, - "loss": 5.8617, - "step": 8457 - }, - { - "epoch": 4.410951760104302, - "grad_norm": 1.5666691064834595, - "learning_rate": 9.200402010050252e-05, - "loss": 4.8042, - "step": 8458 - }, - { - "epoch": 4.4114732724902215, - "grad_norm": 1.3124778270721436, - "learning_rate": 9.200301507537688e-05, - "loss": 5.9753, - "step": 8459 - }, - { - "epoch": 4.411994784876141, - "grad_norm": 1.4362339973449707, - "learning_rate": 9.200201005025126e-05, - "loss": 5.7913, - "step": 8460 - }, - { - "epoch": 4.41251629726206, - "grad_norm": 1.4630024433135986, - "learning_rate": 9.200100502512564e-05, - "loss": 5.9437, - "step": 8461 - }, - { - "epoch": 4.413037809647979, - "grad_norm": 1.4020715951919556, - "learning_rate": 9.200000000000001e-05, - "loss": 5.4026, - "step": 8462 - }, - { - "epoch": 4.4135593220338984, - "grad_norm": 1.4005388021469116, - "learning_rate": 9.199899497487438e-05, - "loss": 5.8785, - "step": 8463 - }, - { - "epoch": 4.414080834419817, - "grad_norm": 1.3915948867797852, - "learning_rate": 9.199798994974876e-05, - "loss": 6.0384, - "step": 8464 - }, - { - "epoch": 4.4146023468057365, - "grad_norm": 1.4855458736419678, - "learning_rate": 9.199698492462312e-05, - "loss": 5.8079, - "step": 8465 - }, - { - "epoch": 4.415123859191656, - "grad_norm": 1.4312002658843994, - "learning_rate": 9.199597989949748e-05, - "loss": 5.6462, - "step": 8466 - }, - { - "epoch": 4.415645371577575, - "grad_norm": 1.4861952066421509, - "learning_rate": 9.199497487437186e-05, - "loss": 5.4427, - "step": 8467 - }, - { - "epoch": 4.416166883963494, - "grad_norm": 1.4727098941802979, - "learning_rate": 9.199396984924623e-05, - "loss": 5.9071, - "step": 8468 - }, - { - "epoch": 4.4166883963494135, - "grad_norm": 1.5043220520019531, - "learning_rate": 9.19929648241206e-05, - "loss": 5.9624, - "step": 8469 - }, - { - "epoch": 4.417209908735332, - "grad_norm": 1.3935840129852295, - "learning_rate": 9.199195979899498e-05, - "loss": 5.869, - "step": 8470 - }, - { - "epoch": 4.4177314211212515, - "grad_norm": 1.4584107398986816, - "learning_rate": 9.199095477386936e-05, - "loss": 5.9459, - "step": 8471 - }, - { - "epoch": 4.418252933507171, - "grad_norm": 1.551141619682312, - "learning_rate": 9.198994974874372e-05, - "loss": 5.5913, - "step": 8472 - }, - { - "epoch": 4.41877444589309, - "grad_norm": 1.510519027709961, - "learning_rate": 9.19889447236181e-05, - "loss": 5.4905, - "step": 8473 - }, - { - "epoch": 4.419295958279009, - "grad_norm": 1.5230745077133179, - "learning_rate": 9.198793969849247e-05, - "loss": 4.9994, - "step": 8474 - }, - { - "epoch": 4.4198174706649285, - "grad_norm": 1.6600494384765625, - "learning_rate": 9.198693467336684e-05, - "loss": 5.5652, - "step": 8475 - }, - { - "epoch": 4.420338983050847, - "grad_norm": 1.615227460861206, - "learning_rate": 9.198592964824121e-05, - "loss": 5.6311, - "step": 8476 - }, - { - "epoch": 4.4208604954367665, - "grad_norm": 1.4867579936981201, - "learning_rate": 9.198492462311559e-05, - "loss": 5.8333, - "step": 8477 - }, - { - "epoch": 4.421382007822686, - "grad_norm": 1.6071964502334595, - "learning_rate": 9.198391959798995e-05, - "loss": 5.53, - "step": 8478 - }, - { - "epoch": 4.4219035202086046, - "grad_norm": 1.3992449045181274, - "learning_rate": 9.198291457286433e-05, - "loss": 5.9556, - "step": 8479 - }, - { - "epoch": 4.422425032594524, - "grad_norm": 1.4068150520324707, - "learning_rate": 9.198190954773869e-05, - "loss": 5.9768, - "step": 8480 - }, - { - "epoch": 4.4229465449804435, - "grad_norm": 1.6571662425994873, - "learning_rate": 9.198090452261307e-05, - "loss": 5.2103, - "step": 8481 - }, - { - "epoch": 4.423468057366362, - "grad_norm": 1.4443618059158325, - "learning_rate": 9.197989949748745e-05, - "loss": 5.6709, - "step": 8482 - }, - { - "epoch": 4.4239895697522815, - "grad_norm": 1.5400941371917725, - "learning_rate": 9.197889447236181e-05, - "loss": 5.781, - "step": 8483 - }, - { - "epoch": 4.424511082138201, - "grad_norm": 1.4461978673934937, - "learning_rate": 9.197788944723619e-05, - "loss": 5.9138, - "step": 8484 - }, - { - "epoch": 4.42503259452412, - "grad_norm": 1.4809643030166626, - "learning_rate": 9.197688442211055e-05, - "loss": 5.9468, - "step": 8485 - }, - { - "epoch": 4.425554106910039, - "grad_norm": 1.3932218551635742, - "learning_rate": 9.197587939698493e-05, - "loss": 6.0351, - "step": 8486 - }, - { - "epoch": 4.4260756192959585, - "grad_norm": 1.3488879203796387, - "learning_rate": 9.19748743718593e-05, - "loss": 5.4122, - "step": 8487 - }, - { - "epoch": 4.426597131681877, - "grad_norm": 1.7205002307891846, - "learning_rate": 9.197386934673367e-05, - "loss": 5.3307, - "step": 8488 - }, - { - "epoch": 4.4271186440677965, - "grad_norm": 1.49896240234375, - "learning_rate": 9.197286432160804e-05, - "loss": 5.7515, - "step": 8489 - }, - { - "epoch": 4.427640156453716, - "grad_norm": 1.4991713762283325, - "learning_rate": 9.197185929648242e-05, - "loss": 5.8296, - "step": 8490 - }, - { - "epoch": 4.428161668839635, - "grad_norm": 1.3671789169311523, - "learning_rate": 9.19708542713568e-05, - "loss": 5.7061, - "step": 8491 - }, - { - "epoch": 4.428683181225554, - "grad_norm": 1.4467412233352661, - "learning_rate": 9.196984924623117e-05, - "loss": 5.9015, - "step": 8492 - }, - { - "epoch": 4.4292046936114735, - "grad_norm": 1.3666800260543823, - "learning_rate": 9.196884422110554e-05, - "loss": 6.086, - "step": 8493 - }, - { - "epoch": 4.429726205997392, - "grad_norm": 1.439660906791687, - "learning_rate": 9.19678391959799e-05, - "loss": 5.7588, - "step": 8494 - }, - { - "epoch": 4.4302477183833116, - "grad_norm": 1.4936786890029907, - "learning_rate": 9.196683417085428e-05, - "loss": 5.8316, - "step": 8495 - }, - { - "epoch": 4.430769230769231, - "grad_norm": 1.4797965288162231, - "learning_rate": 9.196582914572864e-05, - "loss": 5.9401, - "step": 8496 - }, - { - "epoch": 4.43129074315515, - "grad_norm": 1.413234829902649, - "learning_rate": 9.196482412060302e-05, - "loss": 5.7898, - "step": 8497 - }, - { - "epoch": 4.431812255541069, - "grad_norm": 1.5038443803787231, - "learning_rate": 9.196381909547739e-05, - "loss": 5.3227, - "step": 8498 - }, - { - "epoch": 4.4323337679269885, - "grad_norm": 1.6399686336517334, - "learning_rate": 9.196281407035176e-05, - "loss": 5.254, - "step": 8499 - }, - { - "epoch": 4.432855280312907, - "grad_norm": 1.483466386795044, - "learning_rate": 9.196180904522613e-05, - "loss": 5.5794, - "step": 8500 - }, - { - "epoch": 4.433376792698827, - "grad_norm": 1.499801754951477, - "learning_rate": 9.19608040201005e-05, - "loss": 5.3308, - "step": 8501 - }, - { - "epoch": 4.433898305084746, - "grad_norm": 1.4119614362716675, - "learning_rate": 9.195979899497488e-05, - "loss": 5.641, - "step": 8502 - }, - { - "epoch": 4.434419817470665, - "grad_norm": 1.4367282390594482, - "learning_rate": 9.195879396984926e-05, - "loss": 5.9951, - "step": 8503 - }, - { - "epoch": 4.434941329856584, - "grad_norm": 1.3815664052963257, - "learning_rate": 9.195778894472362e-05, - "loss": 5.8525, - "step": 8504 - }, - { - "epoch": 4.4354628422425035, - "grad_norm": 1.451997995376587, - "learning_rate": 9.1956783919598e-05, - "loss": 5.5217, - "step": 8505 - }, - { - "epoch": 4.435984354628422, - "grad_norm": 1.4958455562591553, - "learning_rate": 9.195577889447237e-05, - "loss": 5.5782, - "step": 8506 - }, - { - "epoch": 4.436505867014342, - "grad_norm": 1.344444990158081, - "learning_rate": 9.195477386934673e-05, - "loss": 5.9074, - "step": 8507 - }, - { - "epoch": 4.437027379400261, - "grad_norm": 1.3787497282028198, - "learning_rate": 9.195376884422111e-05, - "loss": 6.2664, - "step": 8508 - }, - { - "epoch": 4.43754889178618, - "grad_norm": 1.5069832801818848, - "learning_rate": 9.195276381909547e-05, - "loss": 5.8114, - "step": 8509 - }, - { - "epoch": 4.438070404172099, - "grad_norm": 1.4131133556365967, - "learning_rate": 9.195175879396985e-05, - "loss": 6.0574, - "step": 8510 - }, - { - "epoch": 4.4385919165580185, - "grad_norm": 1.4292198419570923, - "learning_rate": 9.195075376884422e-05, - "loss": 5.8268, - "step": 8511 - }, - { - "epoch": 4.439113428943937, - "grad_norm": 1.4499297142028809, - "learning_rate": 9.19497487437186e-05, - "loss": 5.9782, - "step": 8512 - }, - { - "epoch": 4.439634941329857, - "grad_norm": 1.5884841680526733, - "learning_rate": 9.194874371859297e-05, - "loss": 5.8748, - "step": 8513 - }, - { - "epoch": 4.440156453715776, - "grad_norm": 1.465819001197815, - "learning_rate": 9.194773869346735e-05, - "loss": 5.8577, - "step": 8514 - }, - { - "epoch": 4.440677966101695, - "grad_norm": 1.4402958154678345, - "learning_rate": 9.194673366834171e-05, - "loss": 5.5536, - "step": 8515 - }, - { - "epoch": 4.441199478487614, - "grad_norm": 1.5382643938064575, - "learning_rate": 9.194572864321609e-05, - "loss": 6.157, - "step": 8516 - }, - { - "epoch": 4.441720990873534, - "grad_norm": 1.4057233333587646, - "learning_rate": 9.194472361809046e-05, - "loss": 5.8058, - "step": 8517 - }, - { - "epoch": 4.442242503259452, - "grad_norm": 1.4820387363433838, - "learning_rate": 9.194371859296483e-05, - "loss": 5.8308, - "step": 8518 - }, - { - "epoch": 4.442764015645372, - "grad_norm": 1.5606887340545654, - "learning_rate": 9.19427135678392e-05, - "loss": 5.6488, - "step": 8519 - }, - { - "epoch": 4.443285528031291, - "grad_norm": 1.631502389907837, - "learning_rate": 9.194170854271356e-05, - "loss": 5.4746, - "step": 8520 - }, - { - "epoch": 4.44380704041721, - "grad_norm": 1.4100278615951538, - "learning_rate": 9.194070351758794e-05, - "loss": 5.7006, - "step": 8521 - }, - { - "epoch": 4.444328552803129, - "grad_norm": 1.665969967842102, - "learning_rate": 9.193969849246232e-05, - "loss": 5.7621, - "step": 8522 - }, - { - "epoch": 4.444850065189049, - "grad_norm": 1.6897965669631958, - "learning_rate": 9.19386934673367e-05, - "loss": 4.9844, - "step": 8523 - }, - { - "epoch": 4.445371577574967, - "grad_norm": 1.3030211925506592, - "learning_rate": 9.193768844221106e-05, - "loss": 5.2708, - "step": 8524 - }, - { - "epoch": 4.445893089960887, - "grad_norm": 1.5535478591918945, - "learning_rate": 9.193668341708544e-05, - "loss": 5.5621, - "step": 8525 - }, - { - "epoch": 4.446414602346806, - "grad_norm": 1.4582056999206543, - "learning_rate": 9.19356783919598e-05, - "loss": 5.7835, - "step": 8526 - }, - { - "epoch": 4.446936114732725, - "grad_norm": 1.5168739557266235, - "learning_rate": 9.193467336683418e-05, - "loss": 5.5721, - "step": 8527 - }, - { - "epoch": 4.447457627118644, - "grad_norm": 1.4553214311599731, - "learning_rate": 9.193366834170854e-05, - "loss": 5.6738, - "step": 8528 - }, - { - "epoch": 4.447979139504564, - "grad_norm": 1.5080841779708862, - "learning_rate": 9.193266331658292e-05, - "loss": 6.1544, - "step": 8529 - }, - { - "epoch": 4.448500651890482, - "grad_norm": 1.4700599908828735, - "learning_rate": 9.193165829145729e-05, - "loss": 6.0179, - "step": 8530 - }, - { - "epoch": 4.449022164276402, - "grad_norm": 1.6179734468460083, - "learning_rate": 9.193065326633166e-05, - "loss": 5.7113, - "step": 8531 - }, - { - "epoch": 4.449543676662321, - "grad_norm": 1.3442050218582153, - "learning_rate": 9.192964824120603e-05, - "loss": 6.1887, - "step": 8532 - }, - { - "epoch": 4.45006518904824, - "grad_norm": 1.412664532661438, - "learning_rate": 9.19286432160804e-05, - "loss": 5.4351, - "step": 8533 - }, - { - "epoch": 4.450586701434159, - "grad_norm": 1.536219596862793, - "learning_rate": 9.192763819095478e-05, - "loss": 5.2847, - "step": 8534 - }, - { - "epoch": 4.451108213820079, - "grad_norm": 1.3760039806365967, - "learning_rate": 9.192663316582915e-05, - "loss": 5.6688, - "step": 8535 - }, - { - "epoch": 4.451629726205997, - "grad_norm": 1.4286408424377441, - "learning_rate": 9.192562814070353e-05, - "loss": 5.7638, - "step": 8536 - }, - { - "epoch": 4.452151238591917, - "grad_norm": 1.4032690525054932, - "learning_rate": 9.192462311557789e-05, - "loss": 6.0695, - "step": 8537 - }, - { - "epoch": 4.452672750977836, - "grad_norm": 1.4611767530441284, - "learning_rate": 9.192361809045227e-05, - "loss": 5.6933, - "step": 8538 - }, - { - "epoch": 4.453194263363755, - "grad_norm": 2.585259199142456, - "learning_rate": 9.192261306532663e-05, - "loss": 5.4461, - "step": 8539 - }, - { - "epoch": 4.453715775749674, - "grad_norm": 1.4198665618896484, - "learning_rate": 9.192160804020101e-05, - "loss": 5.8548, - "step": 8540 - }, - { - "epoch": 4.454237288135594, - "grad_norm": 1.4937433004379272, - "learning_rate": 9.192060301507537e-05, - "loss": 5.1945, - "step": 8541 - }, - { - "epoch": 4.454758800521512, - "grad_norm": 1.4448587894439697, - "learning_rate": 9.191959798994975e-05, - "loss": 5.9158, - "step": 8542 - }, - { - "epoch": 4.455280312907432, - "grad_norm": 1.4473978281021118, - "learning_rate": 9.191859296482413e-05, - "loss": 5.8868, - "step": 8543 - }, - { - "epoch": 4.455801825293351, - "grad_norm": 1.4440726041793823, - "learning_rate": 9.191758793969851e-05, - "loss": 5.8285, - "step": 8544 - }, - { - "epoch": 4.45632333767927, - "grad_norm": 1.518088459968567, - "learning_rate": 9.191658291457287e-05, - "loss": 5.5903, - "step": 8545 - }, - { - "epoch": 4.456844850065189, - "grad_norm": 1.5184831619262695, - "learning_rate": 9.191557788944724e-05, - "loss": 5.8502, - "step": 8546 - }, - { - "epoch": 4.457366362451109, - "grad_norm": 1.522686243057251, - "learning_rate": 9.191457286432161e-05, - "loss": 5.3082, - "step": 8547 - }, - { - "epoch": 4.457887874837027, - "grad_norm": 1.6005076169967651, - "learning_rate": 9.191356783919598e-05, - "loss": 5.9446, - "step": 8548 - }, - { - "epoch": 4.458409387222947, - "grad_norm": 1.3821306228637695, - "learning_rate": 9.191256281407036e-05, - "loss": 5.7349, - "step": 8549 - }, - { - "epoch": 4.458930899608866, - "grad_norm": 1.351000428199768, - "learning_rate": 9.191155778894472e-05, - "loss": 5.8158, - "step": 8550 - }, - { - "epoch": 4.459452411994785, - "grad_norm": 1.5363116264343262, - "learning_rate": 9.19105527638191e-05, - "loss": 5.8607, - "step": 8551 - }, - { - "epoch": 4.459973924380704, - "grad_norm": 1.4856208562850952, - "learning_rate": 9.190954773869346e-05, - "loss": 5.8489, - "step": 8552 - }, - { - "epoch": 4.460495436766623, - "grad_norm": 1.3818703889846802, - "learning_rate": 9.190854271356784e-05, - "loss": 5.886, - "step": 8553 - }, - { - "epoch": 4.461016949152542, - "grad_norm": 1.5135165452957153, - "learning_rate": 9.190753768844222e-05, - "loss": 5.3485, - "step": 8554 - }, - { - "epoch": 4.461538461538462, - "grad_norm": 1.5400102138519287, - "learning_rate": 9.19065326633166e-05, - "loss": 5.3456, - "step": 8555 - }, - { - "epoch": 4.462059973924381, - "grad_norm": 1.5678253173828125, - "learning_rate": 9.190552763819096e-05, - "loss": 5.5728, - "step": 8556 - }, - { - "epoch": 4.4625814863103, - "grad_norm": 1.3783069849014282, - "learning_rate": 9.190452261306534e-05, - "loss": 6.0736, - "step": 8557 - }, - { - "epoch": 4.463102998696219, - "grad_norm": 1.3344589471817017, - "learning_rate": 9.19035175879397e-05, - "loss": 5.6555, - "step": 8558 - }, - { - "epoch": 4.463624511082138, - "grad_norm": 1.4527411460876465, - "learning_rate": 9.190251256281407e-05, - "loss": 5.8316, - "step": 8559 - }, - { - "epoch": 4.464146023468057, - "grad_norm": 1.5172758102416992, - "learning_rate": 9.190150753768844e-05, - "loss": 5.5564, - "step": 8560 - }, - { - "epoch": 4.464667535853977, - "grad_norm": 1.414687156677246, - "learning_rate": 9.190050251256281e-05, - "loss": 5.6541, - "step": 8561 - }, - { - "epoch": 4.465189048239896, - "grad_norm": 1.5021504163742065, - "learning_rate": 9.189949748743719e-05, - "loss": 5.6541, - "step": 8562 - }, - { - "epoch": 4.465710560625815, - "grad_norm": 1.6022124290466309, - "learning_rate": 9.189849246231156e-05, - "loss": 5.3845, - "step": 8563 - }, - { - "epoch": 4.466232073011734, - "grad_norm": 1.5220342874526978, - "learning_rate": 9.189748743718594e-05, - "loss": 5.9164, - "step": 8564 - }, - { - "epoch": 4.466753585397653, - "grad_norm": 1.5480796098709106, - "learning_rate": 9.18964824120603e-05, - "loss": 5.1833, - "step": 8565 - }, - { - "epoch": 4.467275097783572, - "grad_norm": 1.496398687362671, - "learning_rate": 9.189547738693468e-05, - "loss": 5.5376, - "step": 8566 - }, - { - "epoch": 4.467796610169492, - "grad_norm": 1.3889068365097046, - "learning_rate": 9.189447236180905e-05, - "loss": 6.2016, - "step": 8567 - }, - { - "epoch": 4.46831812255541, - "grad_norm": 1.4930278062820435, - "learning_rate": 9.189346733668343e-05, - "loss": 5.5242, - "step": 8568 - }, - { - "epoch": 4.46883963494133, - "grad_norm": 1.5703706741333008, - "learning_rate": 9.189246231155779e-05, - "loss": 5.8332, - "step": 8569 - }, - { - "epoch": 4.469361147327249, - "grad_norm": 1.769647240638733, - "learning_rate": 9.189145728643217e-05, - "loss": 5.1563, - "step": 8570 - }, - { - "epoch": 4.469882659713168, - "grad_norm": 1.5287221670150757, - "learning_rate": 9.189045226130653e-05, - "loss": 5.2833, - "step": 8571 - }, - { - "epoch": 4.470404172099087, - "grad_norm": 1.3657301664352417, - "learning_rate": 9.188944723618091e-05, - "loss": 5.9854, - "step": 8572 - }, - { - "epoch": 4.470925684485007, - "grad_norm": 1.5355093479156494, - "learning_rate": 9.188844221105527e-05, - "loss": 5.4672, - "step": 8573 - }, - { - "epoch": 4.471447196870925, - "grad_norm": 1.5090543031692505, - "learning_rate": 9.188743718592965e-05, - "loss": 5.6899, - "step": 8574 - }, - { - "epoch": 4.471968709256845, - "grad_norm": 1.5218592882156372, - "learning_rate": 9.188643216080403e-05, - "loss": 5.829, - "step": 8575 - }, - { - "epoch": 4.472490221642764, - "grad_norm": 1.5560450553894043, - "learning_rate": 9.18854271356784e-05, - "loss": 5.7415, - "step": 8576 - }, - { - "epoch": 4.473011734028683, - "grad_norm": 1.5394688844680786, - "learning_rate": 9.188442211055277e-05, - "loss": 5.7078, - "step": 8577 - }, - { - "epoch": 4.473533246414602, - "grad_norm": 1.5446885824203491, - "learning_rate": 9.188341708542714e-05, - "loss": 5.477, - "step": 8578 - }, - { - "epoch": 4.474054758800522, - "grad_norm": 1.4566049575805664, - "learning_rate": 9.188241206030151e-05, - "loss": 5.9237, - "step": 8579 - }, - { - "epoch": 4.47457627118644, - "grad_norm": 1.3754338026046753, - "learning_rate": 9.188140703517588e-05, - "loss": 6.1446, - "step": 8580 - }, - { - "epoch": 4.47509778357236, - "grad_norm": 1.3557640314102173, - "learning_rate": 9.188040201005026e-05, - "loss": 5.6307, - "step": 8581 - }, - { - "epoch": 4.475619295958279, - "grad_norm": 1.559217095375061, - "learning_rate": 9.187939698492462e-05, - "loss": 5.8377, - "step": 8582 - }, - { - "epoch": 4.476140808344198, - "grad_norm": 1.4263664484024048, - "learning_rate": 9.1878391959799e-05, - "loss": 5.8633, - "step": 8583 - }, - { - "epoch": 4.476662320730117, - "grad_norm": 1.6580474376678467, - "learning_rate": 9.187738693467338e-05, - "loss": 5.8852, - "step": 8584 - }, - { - "epoch": 4.477183833116037, - "grad_norm": 1.4045087099075317, - "learning_rate": 9.187638190954775e-05, - "loss": 5.8492, - "step": 8585 - }, - { - "epoch": 4.477705345501955, - "grad_norm": 1.6569414138793945, - "learning_rate": 9.187537688442212e-05, - "loss": 5.9031, - "step": 8586 - }, - { - "epoch": 4.478226857887875, - "grad_norm": 1.3638617992401123, - "learning_rate": 9.187437185929648e-05, - "loss": 5.8312, - "step": 8587 - }, - { - "epoch": 4.478748370273794, - "grad_norm": 1.5572504997253418, - "learning_rate": 9.187336683417086e-05, - "loss": 5.9045, - "step": 8588 - }, - { - "epoch": 4.479269882659713, - "grad_norm": 1.5944182872772217, - "learning_rate": 9.187236180904523e-05, - "loss": 5.9593, - "step": 8589 - }, - { - "epoch": 4.479791395045632, - "grad_norm": 1.414777159690857, - "learning_rate": 9.18713567839196e-05, - "loss": 5.6462, - "step": 8590 - }, - { - "epoch": 4.480312907431552, - "grad_norm": 1.508028268814087, - "learning_rate": 9.187035175879397e-05, - "loss": 5.641, - "step": 8591 - }, - { - "epoch": 4.48083441981747, - "grad_norm": 1.5503356456756592, - "learning_rate": 9.186934673366835e-05, - "loss": 5.8132, - "step": 8592 - }, - { - "epoch": 4.48135593220339, - "grad_norm": 1.3871147632598877, - "learning_rate": 9.186834170854271e-05, - "loss": 6.0496, - "step": 8593 - }, - { - "epoch": 4.481877444589309, - "grad_norm": 1.483432412147522, - "learning_rate": 9.186733668341709e-05, - "loss": 5.8166, - "step": 8594 - }, - { - "epoch": 4.482398956975228, - "grad_norm": 1.35496187210083, - "learning_rate": 9.186633165829147e-05, - "loss": 5.6819, - "step": 8595 - }, - { - "epoch": 4.482920469361147, - "grad_norm": 1.3960988521575928, - "learning_rate": 9.186532663316584e-05, - "loss": 5.728, - "step": 8596 - }, - { - "epoch": 4.483441981747067, - "grad_norm": 1.4727298021316528, - "learning_rate": 9.186432160804021e-05, - "loss": 5.817, - "step": 8597 - }, - { - "epoch": 4.483963494132985, - "grad_norm": 1.4193438291549683, - "learning_rate": 9.186331658291459e-05, - "loss": 5.9039, - "step": 8598 - }, - { - "epoch": 4.484485006518905, - "grad_norm": 1.3565367460250854, - "learning_rate": 9.186231155778895e-05, - "loss": 5.7083, - "step": 8599 - }, - { - "epoch": 4.485006518904824, - "grad_norm": 1.557286262512207, - "learning_rate": 9.186130653266331e-05, - "loss": 5.1771, - "step": 8600 - }, - { - "epoch": 4.485528031290743, - "grad_norm": 1.5794085264205933, - "learning_rate": 9.186030150753769e-05, - "loss": 5.7519, - "step": 8601 - }, - { - "epoch": 4.486049543676662, - "grad_norm": 1.3786311149597168, - "learning_rate": 9.185929648241206e-05, - "loss": 5.8461, - "step": 8602 - }, - { - "epoch": 4.486571056062582, - "grad_norm": 1.415019154548645, - "learning_rate": 9.185829145728643e-05, - "loss": 5.9663, - "step": 8603 - }, - { - "epoch": 4.4870925684485, - "grad_norm": 1.3741525411605835, - "learning_rate": 9.185728643216081e-05, - "loss": 6.0055, - "step": 8604 - }, - { - "epoch": 4.48761408083442, - "grad_norm": 1.4780945777893066, - "learning_rate": 9.185628140703519e-05, - "loss": 5.9083, - "step": 8605 - }, - { - "epoch": 4.488135593220339, - "grad_norm": 1.440409541130066, - "learning_rate": 9.185527638190955e-05, - "loss": 5.7522, - "step": 8606 - }, - { - "epoch": 4.488657105606258, - "grad_norm": 1.4269981384277344, - "learning_rate": 9.185427135678393e-05, - "loss": 5.6507, - "step": 8607 - }, - { - "epoch": 4.489178617992177, - "grad_norm": 1.4798200130462646, - "learning_rate": 9.18532663316583e-05, - "loss": 5.9282, - "step": 8608 - }, - { - "epoch": 4.489700130378097, - "grad_norm": 1.5554476976394653, - "learning_rate": 9.185226130653267e-05, - "loss": 5.68, - "step": 8609 - }, - { - "epoch": 4.490221642764015, - "grad_norm": 1.3552969694137573, - "learning_rate": 9.185125628140704e-05, - "loss": 5.8993, - "step": 8610 - }, - { - "epoch": 4.490743155149935, - "grad_norm": 1.3467867374420166, - "learning_rate": 9.185025125628142e-05, - "loss": 5.9724, - "step": 8611 - }, - { - "epoch": 4.491264667535854, - "grad_norm": 1.4372122287750244, - "learning_rate": 9.184924623115578e-05, - "loss": 5.7207, - "step": 8612 - }, - { - "epoch": 4.491786179921773, - "grad_norm": 1.3132340908050537, - "learning_rate": 9.184824120603014e-05, - "loss": 5.6983, - "step": 8613 - }, - { - "epoch": 4.492307692307692, - "grad_norm": 1.4207745790481567, - "learning_rate": 9.184723618090452e-05, - "loss": 5.7059, - "step": 8614 - }, - { - "epoch": 4.492829204693612, - "grad_norm": 1.5269960165023804, - "learning_rate": 9.18462311557789e-05, - "loss": 5.6554, - "step": 8615 - }, - { - "epoch": 4.49335071707953, - "grad_norm": 1.3541020154953003, - "learning_rate": 9.184522613065328e-05, - "loss": 5.7282, - "step": 8616 - }, - { - "epoch": 4.49387222946545, - "grad_norm": 1.4565259218215942, - "learning_rate": 9.184422110552764e-05, - "loss": 5.4086, - "step": 8617 - }, - { - "epoch": 4.494393741851369, - "grad_norm": 1.5182610750198364, - "learning_rate": 9.184321608040202e-05, - "loss": 5.6801, - "step": 8618 - }, - { - "epoch": 4.494915254237288, - "grad_norm": 1.4594647884368896, - "learning_rate": 9.184221105527638e-05, - "loss": 5.8581, - "step": 8619 - }, - { - "epoch": 4.495436766623207, - "grad_norm": 1.360872745513916, - "learning_rate": 9.184120603015076e-05, - "loss": 5.8351, - "step": 8620 - }, - { - "epoch": 4.495958279009127, - "grad_norm": 1.3282561302185059, - "learning_rate": 9.184020100502513e-05, - "loss": 5.9004, - "step": 8621 - }, - { - "epoch": 4.496479791395045, - "grad_norm": 1.5173332691192627, - "learning_rate": 9.18391959798995e-05, - "loss": 5.5809, - "step": 8622 - }, - { - "epoch": 4.497001303780965, - "grad_norm": 1.4153659343719482, - "learning_rate": 9.183819095477387e-05, - "loss": 5.9949, - "step": 8623 - }, - { - "epoch": 4.497522816166884, - "grad_norm": 1.3907524347305298, - "learning_rate": 9.183718592964825e-05, - "loss": 5.7586, - "step": 8624 - }, - { - "epoch": 4.498044328552803, - "grad_norm": 1.4530448913574219, - "learning_rate": 9.183618090452262e-05, - "loss": 5.7852, - "step": 8625 - }, - { - "epoch": 4.498565840938722, - "grad_norm": 1.3848886489868164, - "learning_rate": 9.183517587939699e-05, - "loss": 6.0236, - "step": 8626 - }, - { - "epoch": 4.499087353324642, - "grad_norm": 1.8457201719284058, - "learning_rate": 9.183417085427137e-05, - "loss": 5.8028, - "step": 8627 - }, - { - "epoch": 4.49960886571056, - "grad_norm": 1.6940728425979614, - "learning_rate": 9.183316582914573e-05, - "loss": 5.3094, - "step": 8628 - }, - { - "epoch": 4.50013037809648, - "grad_norm": 1.5547945499420166, - "learning_rate": 9.183216080402011e-05, - "loss": 5.1087, - "step": 8629 - }, - { - "epoch": 4.500651890482399, - "grad_norm": 1.7104084491729736, - "learning_rate": 9.183115577889447e-05, - "loss": 5.1528, - "step": 8630 - }, - { - "epoch": 4.501173402868318, - "grad_norm": 1.597961187362671, - "learning_rate": 9.183015075376885e-05, - "loss": 5.5687, - "step": 8631 - }, - { - "epoch": 4.501694915254237, - "grad_norm": 1.573063611984253, - "learning_rate": 9.182914572864321e-05, - "loss": 5.5501, - "step": 8632 - }, - { - "epoch": 4.502216427640157, - "grad_norm": 1.6524958610534668, - "learning_rate": 9.182814070351759e-05, - "loss": 5.6431, - "step": 8633 - }, - { - "epoch": 4.502737940026075, - "grad_norm": 1.6049894094467163, - "learning_rate": 9.182713567839196e-05, - "loss": 5.3403, - "step": 8634 - }, - { - "epoch": 4.503259452411995, - "grad_norm": 1.5342134237289429, - "learning_rate": 9.182613065326633e-05, - "loss": 5.6237, - "step": 8635 - }, - { - "epoch": 4.5037809647979135, - "grad_norm": 1.4730374813079834, - "learning_rate": 9.182512562814071e-05, - "loss": 5.7087, - "step": 8636 - }, - { - "epoch": 4.504302477183833, - "grad_norm": 1.5002256631851196, - "learning_rate": 9.182412060301509e-05, - "loss": 5.7181, - "step": 8637 - }, - { - "epoch": 4.504823989569752, - "grad_norm": 1.4677200317382812, - "learning_rate": 9.182311557788945e-05, - "loss": 5.7041, - "step": 8638 - }, - { - "epoch": 4.505345501955672, - "grad_norm": 1.510989785194397, - "learning_rate": 9.182211055276382e-05, - "loss": 5.608, - "step": 8639 - }, - { - "epoch": 4.50586701434159, - "grad_norm": 1.3488264083862305, - "learning_rate": 9.18211055276382e-05, - "loss": 5.9633, - "step": 8640 - }, - { - "epoch": 4.50638852672751, - "grad_norm": 1.6377688646316528, - "learning_rate": 9.182010050251256e-05, - "loss": 5.2273, - "step": 8641 - }, - { - "epoch": 4.5069100391134285, - "grad_norm": 1.4688682556152344, - "learning_rate": 9.181909547738694e-05, - "loss": 5.703, - "step": 8642 - }, - { - "epoch": 4.507431551499348, - "grad_norm": 1.4690124988555908, - "learning_rate": 9.18180904522613e-05, - "loss": 5.7019, - "step": 8643 - }, - { - "epoch": 4.507953063885267, - "grad_norm": 1.4913052320480347, - "learning_rate": 9.181708542713568e-05, - "loss": 5.3814, - "step": 8644 - }, - { - "epoch": 4.508474576271187, - "grad_norm": 1.340436339378357, - "learning_rate": 9.181608040201006e-05, - "loss": 5.9409, - "step": 8645 - }, - { - "epoch": 4.508996088657105, - "grad_norm": 1.4594584703445435, - "learning_rate": 9.181507537688444e-05, - "loss": 5.4165, - "step": 8646 - }, - { - "epoch": 4.509517601043025, - "grad_norm": 1.4875481128692627, - "learning_rate": 9.18140703517588e-05, - "loss": 5.7921, - "step": 8647 - }, - { - "epoch": 4.5100391134289435, - "grad_norm": 1.6275416612625122, - "learning_rate": 9.181306532663318e-05, - "loss": 5.1119, - "step": 8648 - }, - { - "epoch": 4.510560625814863, - "grad_norm": 1.4357553720474243, - "learning_rate": 9.181206030150754e-05, - "loss": 5.7113, - "step": 8649 - }, - { - "epoch": 4.511082138200782, - "grad_norm": 1.4437183141708374, - "learning_rate": 9.181105527638192e-05, - "loss": 5.6713, - "step": 8650 - }, - { - "epoch": 4.511603650586702, - "grad_norm": 1.3711045980453491, - "learning_rate": 9.181005025125628e-05, - "loss": 5.7976, - "step": 8651 - }, - { - "epoch": 4.5121251629726205, - "grad_norm": 1.3479400873184204, - "learning_rate": 9.180904522613065e-05, - "loss": 6.1029, - "step": 8652 - }, - { - "epoch": 4.51264667535854, - "grad_norm": 1.584433913230896, - "learning_rate": 9.180804020100503e-05, - "loss": 5.6639, - "step": 8653 - }, - { - "epoch": 4.5131681877444585, - "grad_norm": 1.4037836790084839, - "learning_rate": 9.180703517587939e-05, - "loss": 5.6903, - "step": 8654 - }, - { - "epoch": 4.513689700130378, - "grad_norm": 1.3228816986083984, - "learning_rate": 9.180603015075377e-05, - "loss": 4.6655, - "step": 8655 - }, - { - "epoch": 4.514211212516297, - "grad_norm": 1.471509575843811, - "learning_rate": 9.180502512562815e-05, - "loss": 5.8747, - "step": 8656 - }, - { - "epoch": 4.514732724902217, - "grad_norm": 1.4782936573028564, - "learning_rate": 9.180402010050252e-05, - "loss": 5.5354, - "step": 8657 - }, - { - "epoch": 4.5152542372881355, - "grad_norm": 1.483424425125122, - "learning_rate": 9.180301507537689e-05, - "loss": 5.7327, - "step": 8658 - }, - { - "epoch": 4.515775749674055, - "grad_norm": 1.429533839225769, - "learning_rate": 9.180201005025127e-05, - "loss": 5.4311, - "step": 8659 - }, - { - "epoch": 4.5162972620599735, - "grad_norm": 1.3572803735733032, - "learning_rate": 9.180100502512563e-05, - "loss": 5.7835, - "step": 8660 - }, - { - "epoch": 4.516818774445893, - "grad_norm": 1.533363938331604, - "learning_rate": 9.180000000000001e-05, - "loss": 5.5389, - "step": 8661 - }, - { - "epoch": 4.517340286831812, - "grad_norm": 1.574392557144165, - "learning_rate": 9.179899497487437e-05, - "loss": 5.4781, - "step": 8662 - }, - { - "epoch": 4.517861799217732, - "grad_norm": 1.4260330200195312, - "learning_rate": 9.179798994974875e-05, - "loss": 6.0582, - "step": 8663 - }, - { - "epoch": 4.5183833116036505, - "grad_norm": 1.4073082208633423, - "learning_rate": 9.179698492462312e-05, - "loss": 5.5068, - "step": 8664 - }, - { - "epoch": 4.51890482398957, - "grad_norm": 1.3972843885421753, - "learning_rate": 9.179597989949749e-05, - "loss": 5.8013, - "step": 8665 - }, - { - "epoch": 4.5194263363754885, - "grad_norm": 1.3590000867843628, - "learning_rate": 9.179497487437187e-05, - "loss": 5.8535, - "step": 8666 - }, - { - "epoch": 4.519947848761408, - "grad_norm": 1.4307937622070312, - "learning_rate": 9.179396984924624e-05, - "loss": 5.9192, - "step": 8667 - }, - { - "epoch": 4.5204693611473274, - "grad_norm": 1.2858229875564575, - "learning_rate": 9.179296482412061e-05, - "loss": 5.9523, - "step": 8668 - }, - { - "epoch": 4.520990873533247, - "grad_norm": 1.4741249084472656, - "learning_rate": 9.179195979899498e-05, - "loss": 5.7912, - "step": 8669 - }, - { - "epoch": 4.5215123859191655, - "grad_norm": 1.599625825881958, - "learning_rate": 9.179095477386936e-05, - "loss": 5.5054, - "step": 8670 - }, - { - "epoch": 4.522033898305085, - "grad_norm": 1.544257640838623, - "learning_rate": 9.178994974874372e-05, - "loss": 5.7665, - "step": 8671 - }, - { - "epoch": 4.5225554106910035, - "grad_norm": 1.4466798305511475, - "learning_rate": 9.17889447236181e-05, - "loss": 5.7608, - "step": 8672 - }, - { - "epoch": 4.523076923076923, - "grad_norm": 1.3976002931594849, - "learning_rate": 9.178793969849246e-05, - "loss": 5.5555, - "step": 8673 - }, - { - "epoch": 4.5235984354628425, - "grad_norm": 1.2706423997879028, - "learning_rate": 9.178693467336684e-05, - "loss": 6.121, - "step": 8674 - }, - { - "epoch": 4.524119947848761, - "grad_norm": 1.340128779411316, - "learning_rate": 9.17859296482412e-05, - "loss": 5.8943, - "step": 8675 - }, - { - "epoch": 4.5246414602346805, - "grad_norm": 1.560336947441101, - "learning_rate": 9.178492462311558e-05, - "loss": 5.7664, - "step": 8676 - }, - { - "epoch": 4.5251629726206, - "grad_norm": 1.4174737930297852, - "learning_rate": 9.178391959798996e-05, - "loss": 5.7851, - "step": 8677 - }, - { - "epoch": 4.5256844850065185, - "grad_norm": 1.4999704360961914, - "learning_rate": 9.178291457286434e-05, - "loss": 5.8814, - "step": 8678 - }, - { - "epoch": 4.526205997392438, - "grad_norm": 1.5226984024047852, - "learning_rate": 9.17819095477387e-05, - "loss": 5.5445, - "step": 8679 - }, - { - "epoch": 4.5267275097783575, - "grad_norm": 1.4151042699813843, - "learning_rate": 9.178090452261307e-05, - "loss": 5.4419, - "step": 8680 - }, - { - "epoch": 4.527249022164276, - "grad_norm": 1.3811421394348145, - "learning_rate": 9.177989949748744e-05, - "loss": 5.6137, - "step": 8681 - }, - { - "epoch": 4.5277705345501955, - "grad_norm": 1.4657293558120728, - "learning_rate": 9.177889447236181e-05, - "loss": 5.4847, - "step": 8682 - }, - { - "epoch": 4.528292046936115, - "grad_norm": 1.3787561655044556, - "learning_rate": 9.177788944723619e-05, - "loss": 5.6494, - "step": 8683 - }, - { - "epoch": 4.5288135593220336, - "grad_norm": 1.469744086265564, - "learning_rate": 9.177688442211055e-05, - "loss": 5.4992, - "step": 8684 - }, - { - "epoch": 4.529335071707953, - "grad_norm": 1.4011155366897583, - "learning_rate": 9.177587939698493e-05, - "loss": 5.9485, - "step": 8685 - }, - { - "epoch": 4.5298565840938725, - "grad_norm": 1.3122327327728271, - "learning_rate": 9.177487437185929e-05, - "loss": 5.9295, - "step": 8686 - }, - { - "epoch": 4.530378096479791, - "grad_norm": 1.4872660636901855, - "learning_rate": 9.177386934673367e-05, - "loss": 5.2667, - "step": 8687 - }, - { - "epoch": 4.5308996088657105, - "grad_norm": 1.4262043237686157, - "learning_rate": 9.177286432160805e-05, - "loss": 5.6177, - "step": 8688 - }, - { - "epoch": 4.53142112125163, - "grad_norm": 1.4216456413269043, - "learning_rate": 9.177185929648243e-05, - "loss": 5.7877, - "step": 8689 - }, - { - "epoch": 4.531942633637549, - "grad_norm": 1.4967772960662842, - "learning_rate": 9.177085427135679e-05, - "loss": 5.9375, - "step": 8690 - }, - { - "epoch": 4.532464146023468, - "grad_norm": 1.4880205392837524, - "learning_rate": 9.176984924623117e-05, - "loss": 5.7093, - "step": 8691 - }, - { - "epoch": 4.5329856584093875, - "grad_norm": 1.71427583694458, - "learning_rate": 9.176884422110553e-05, - "loss": 6.1252, - "step": 8692 - }, - { - "epoch": 4.533507170795306, - "grad_norm": 1.421834111213684, - "learning_rate": 9.17678391959799e-05, - "loss": 6.0578, - "step": 8693 - }, - { - "epoch": 4.5340286831812255, - "grad_norm": 1.4830957651138306, - "learning_rate": 9.176683417085427e-05, - "loss": 5.621, - "step": 8694 - }, - { - "epoch": 4.534550195567145, - "grad_norm": 1.423503041267395, - "learning_rate": 9.176582914572864e-05, - "loss": 6.1092, - "step": 8695 - }, - { - "epoch": 4.535071707953064, - "grad_norm": 1.677391529083252, - "learning_rate": 9.176482412060302e-05, - "loss": 5.8529, - "step": 8696 - }, - { - "epoch": 4.535593220338983, - "grad_norm": 1.4934273958206177, - "learning_rate": 9.17638190954774e-05, - "loss": 5.6132, - "step": 8697 - }, - { - "epoch": 4.5361147327249025, - "grad_norm": 1.444324254989624, - "learning_rate": 9.176281407035177e-05, - "loss": 5.8703, - "step": 8698 - }, - { - "epoch": 4.536636245110821, - "grad_norm": 1.494888186454773, - "learning_rate": 9.176180904522614e-05, - "loss": 5.6694, - "step": 8699 - }, - { - "epoch": 4.5371577574967406, - "grad_norm": 1.6088695526123047, - "learning_rate": 9.176080402010051e-05, - "loss": 5.4731, - "step": 8700 - }, - { - "epoch": 4.53767926988266, - "grad_norm": 1.5434436798095703, - "learning_rate": 9.175979899497488e-05, - "loss": 5.7504, - "step": 8701 - }, - { - "epoch": 4.538200782268579, - "grad_norm": 1.498025894165039, - "learning_rate": 9.175879396984926e-05, - "loss": 5.8716, - "step": 8702 - }, - { - "epoch": 4.538722294654498, - "grad_norm": 1.4497346878051758, - "learning_rate": 9.175778894472362e-05, - "loss": 6.0863, - "step": 8703 - }, - { - "epoch": 4.5392438070404175, - "grad_norm": 1.4767022132873535, - "learning_rate": 9.1756783919598e-05, - "loss": 5.2402, - "step": 8704 - }, - { - "epoch": 4.539765319426336, - "grad_norm": 1.3352497816085815, - "learning_rate": 9.175577889447236e-05, - "loss": 5.8142, - "step": 8705 - }, - { - "epoch": 4.540286831812256, - "grad_norm": 1.5846811532974243, - "learning_rate": 9.175477386934673e-05, - "loss": 5.515, - "step": 8706 - }, - { - "epoch": 4.540808344198175, - "grad_norm": 1.4087646007537842, - "learning_rate": 9.17537688442211e-05, - "loss": 5.7846, - "step": 8707 - }, - { - "epoch": 4.541329856584094, - "grad_norm": 1.5147112607955933, - "learning_rate": 9.175276381909548e-05, - "loss": 5.66, - "step": 8708 - }, - { - "epoch": 4.541851368970013, - "grad_norm": 1.4473127126693726, - "learning_rate": 9.175175879396986e-05, - "loss": 5.5291, - "step": 8709 - }, - { - "epoch": 4.5423728813559325, - "grad_norm": 1.4192097187042236, - "learning_rate": 9.175075376884422e-05, - "loss": 5.5304, - "step": 8710 - }, - { - "epoch": 4.542894393741851, - "grad_norm": 1.4624630212783813, - "learning_rate": 9.17497487437186e-05, - "loss": 5.9057, - "step": 8711 - }, - { - "epoch": 4.543415906127771, - "grad_norm": 1.585584282875061, - "learning_rate": 9.174874371859297e-05, - "loss": 5.3852, - "step": 8712 - }, - { - "epoch": 4.54393741851369, - "grad_norm": 1.5647541284561157, - "learning_rate": 9.174773869346734e-05, - "loss": 5.2074, - "step": 8713 - }, - { - "epoch": 4.544458930899609, - "grad_norm": 1.639654278755188, - "learning_rate": 9.174673366834171e-05, - "loss": 5.5947, - "step": 8714 - }, - { - "epoch": 4.544980443285528, - "grad_norm": 1.6701154708862305, - "learning_rate": 9.174572864321609e-05, - "loss": 5.25, - "step": 8715 - }, - { - "epoch": 4.5455019556714475, - "grad_norm": 1.5566840171813965, - "learning_rate": 9.174472361809045e-05, - "loss": 5.9526, - "step": 8716 - }, - { - "epoch": 4.546023468057366, - "grad_norm": 1.5511070489883423, - "learning_rate": 9.174371859296483e-05, - "loss": 5.9813, - "step": 8717 - }, - { - "epoch": 4.546544980443286, - "grad_norm": 1.4521782398223877, - "learning_rate": 9.17427135678392e-05, - "loss": 5.3153, - "step": 8718 - }, - { - "epoch": 4.547066492829205, - "grad_norm": 1.3967090845108032, - "learning_rate": 9.174170854271357e-05, - "loss": 5.7453, - "step": 8719 - }, - { - "epoch": 4.547588005215124, - "grad_norm": 1.680825114250183, - "learning_rate": 9.174070351758795e-05, - "loss": 5.264, - "step": 8720 - }, - { - "epoch": 4.548109517601043, - "grad_norm": 1.3267009258270264, - "learning_rate": 9.173969849246231e-05, - "loss": 6.1034, - "step": 8721 - }, - { - "epoch": 4.548631029986963, - "grad_norm": 1.4496004581451416, - "learning_rate": 9.173869346733669e-05, - "loss": 5.6644, - "step": 8722 - }, - { - "epoch": 4.549152542372881, - "grad_norm": 1.4133682250976562, - "learning_rate": 9.173768844221105e-05, - "loss": 6.1245, - "step": 8723 - }, - { - "epoch": 4.549674054758801, - "grad_norm": 1.5146615505218506, - "learning_rate": 9.173668341708543e-05, - "loss": 5.8531, - "step": 8724 - }, - { - "epoch": 4.55019556714472, - "grad_norm": 1.3808984756469727, - "learning_rate": 9.17356783919598e-05, - "loss": 5.6086, - "step": 8725 - }, - { - "epoch": 4.550717079530639, - "grad_norm": 1.46841299533844, - "learning_rate": 9.173467336683417e-05, - "loss": 5.4893, - "step": 8726 - }, - { - "epoch": 4.551238591916558, - "grad_norm": 1.4762296676635742, - "learning_rate": 9.173366834170854e-05, - "loss": 5.6428, - "step": 8727 - }, - { - "epoch": 4.551760104302478, - "grad_norm": 1.4300376176834106, - "learning_rate": 9.173266331658292e-05, - "loss": 5.5641, - "step": 8728 - }, - { - "epoch": 4.552281616688396, - "grad_norm": 1.4669941663742065, - "learning_rate": 9.17316582914573e-05, - "loss": 5.8773, - "step": 8729 - }, - { - "epoch": 4.552803129074316, - "grad_norm": 1.5359550714492798, - "learning_rate": 9.173065326633167e-05, - "loss": 5.8035, - "step": 8730 - }, - { - "epoch": 4.553324641460234, - "grad_norm": 1.5147991180419922, - "learning_rate": 9.172964824120604e-05, - "loss": 5.7443, - "step": 8731 - }, - { - "epoch": 4.553846153846154, - "grad_norm": 1.5537841320037842, - "learning_rate": 9.17286432160804e-05, - "loss": 5.622, - "step": 8732 - }, - { - "epoch": 4.554367666232073, - "grad_norm": 1.6170248985290527, - "learning_rate": 9.172763819095478e-05, - "loss": 5.7587, - "step": 8733 - }, - { - "epoch": 4.554889178617993, - "grad_norm": 1.4892398118972778, - "learning_rate": 9.172663316582914e-05, - "loss": 5.5878, - "step": 8734 - }, - { - "epoch": 4.555410691003911, - "grad_norm": 1.4726505279541016, - "learning_rate": 9.172562814070352e-05, - "loss": 5.0978, - "step": 8735 - }, - { - "epoch": 4.555932203389831, - "grad_norm": 1.3390599489212036, - "learning_rate": 9.172462311557789e-05, - "loss": 6.1221, - "step": 8736 - }, - { - "epoch": 4.556453715775749, - "grad_norm": 1.5746928453445435, - "learning_rate": 9.172361809045226e-05, - "loss": 5.3129, - "step": 8737 - }, - { - "epoch": 4.556975228161669, - "grad_norm": 1.510516881942749, - "learning_rate": 9.172261306532664e-05, - "loss": 5.6887, - "step": 8738 - }, - { - "epoch": 4.557496740547588, - "grad_norm": 1.4536935091018677, - "learning_rate": 9.172160804020102e-05, - "loss": 5.9217, - "step": 8739 - }, - { - "epoch": 4.558018252933508, - "grad_norm": 1.5286935567855835, - "learning_rate": 9.172060301507538e-05, - "loss": 5.4753, - "step": 8740 - }, - { - "epoch": 4.558539765319426, - "grad_norm": 1.454332947731018, - "learning_rate": 9.171959798994976e-05, - "loss": 5.8488, - "step": 8741 - }, - { - "epoch": 4.559061277705346, - "grad_norm": 1.4274616241455078, - "learning_rate": 9.171859296482412e-05, - "loss": 5.5225, - "step": 8742 - }, - { - "epoch": 4.559582790091264, - "grad_norm": 1.7226529121398926, - "learning_rate": 9.17175879396985e-05, - "loss": 5.6195, - "step": 8743 - }, - { - "epoch": 4.560104302477184, - "grad_norm": 1.4996707439422607, - "learning_rate": 9.171658291457287e-05, - "loss": 5.9606, - "step": 8744 - }, - { - "epoch": 4.560625814863103, - "grad_norm": 1.4412786960601807, - "learning_rate": 9.171557788944723e-05, - "loss": 5.7576, - "step": 8745 - }, - { - "epoch": 4.561147327249023, - "grad_norm": 1.3681682348251343, - "learning_rate": 9.171457286432161e-05, - "loss": 4.7421, - "step": 8746 - }, - { - "epoch": 4.561668839634941, - "grad_norm": 1.6577523946762085, - "learning_rate": 9.171356783919597e-05, - "loss": 5.7554, - "step": 8747 - }, - { - "epoch": 4.562190352020861, - "grad_norm": 1.4363926649093628, - "learning_rate": 9.171256281407035e-05, - "loss": 5.509, - "step": 8748 - }, - { - "epoch": 4.562711864406779, - "grad_norm": 1.6629704236984253, - "learning_rate": 9.171155778894473e-05, - "loss": 5.6517, - "step": 8749 - }, - { - "epoch": 4.563233376792699, - "grad_norm": 1.522254228591919, - "learning_rate": 9.171055276381911e-05, - "loss": 5.7574, - "step": 8750 - }, - { - "epoch": 4.563754889178618, - "grad_norm": 1.5916715860366821, - "learning_rate": 9.170954773869347e-05, - "loss": 5.2891, - "step": 8751 - }, - { - "epoch": 4.564276401564538, - "grad_norm": 1.6371300220489502, - "learning_rate": 9.170854271356785e-05, - "loss": 5.4513, - "step": 8752 - }, - { - "epoch": 4.564797913950456, - "grad_norm": 1.5032769441604614, - "learning_rate": 9.170753768844221e-05, - "loss": 5.8863, - "step": 8753 - }, - { - "epoch": 4.565319426336376, - "grad_norm": 1.6248977184295654, - "learning_rate": 9.170653266331659e-05, - "loss": 5.9064, - "step": 8754 - }, - { - "epoch": 4.565840938722294, - "grad_norm": 1.5138685703277588, - "learning_rate": 9.170552763819096e-05, - "loss": 5.5959, - "step": 8755 - }, - { - "epoch": 4.566362451108214, - "grad_norm": 1.5090042352676392, - "learning_rate": 9.170452261306533e-05, - "loss": 4.9782, - "step": 8756 - }, - { - "epoch": 4.566883963494133, - "grad_norm": 1.3818196058273315, - "learning_rate": 9.17035175879397e-05, - "loss": 6.0381, - "step": 8757 - }, - { - "epoch": 4.567405475880053, - "grad_norm": 1.4270144701004028, - "learning_rate": 9.170251256281408e-05, - "loss": 5.691, - "step": 8758 - }, - { - "epoch": 4.567926988265971, - "grad_norm": 1.3970242738723755, - "learning_rate": 9.170150753768845e-05, - "loss": 6.071, - "step": 8759 - }, - { - "epoch": 4.568448500651891, - "grad_norm": 1.5757447481155396, - "learning_rate": 9.170050251256282e-05, - "loss": 5.5438, - "step": 8760 - }, - { - "epoch": 4.568970013037809, - "grad_norm": 1.360434651374817, - "learning_rate": 9.16994974874372e-05, - "loss": 5.9421, - "step": 8761 - }, - { - "epoch": 4.569491525423729, - "grad_norm": 1.675575852394104, - "learning_rate": 9.169849246231156e-05, - "loss": 5.5551, - "step": 8762 - }, - { - "epoch": 4.570013037809648, - "grad_norm": 1.4929884672164917, - "learning_rate": 9.169748743718594e-05, - "loss": 5.7918, - "step": 8763 - }, - { - "epoch": 4.570534550195567, - "grad_norm": 1.4243988990783691, - "learning_rate": 9.16964824120603e-05, - "loss": 5.5457, - "step": 8764 - }, - { - "epoch": 4.571056062581486, - "grad_norm": 1.3796207904815674, - "learning_rate": 9.169547738693468e-05, - "loss": 5.7656, - "step": 8765 - }, - { - "epoch": 4.571577574967406, - "grad_norm": 1.4633604288101196, - "learning_rate": 9.169447236180904e-05, - "loss": 5.8621, - "step": 8766 - }, - { - "epoch": 4.572099087353324, - "grad_norm": 1.530441403388977, - "learning_rate": 9.169346733668342e-05, - "loss": 5.0478, - "step": 8767 - }, - { - "epoch": 4.572620599739244, - "grad_norm": 1.3494932651519775, - "learning_rate": 9.169246231155779e-05, - "loss": 6.0426, - "step": 8768 - }, - { - "epoch": 4.573142112125163, - "grad_norm": 1.4170658588409424, - "learning_rate": 9.169145728643216e-05, - "loss": 5.6762, - "step": 8769 - }, - { - "epoch": 4.573663624511082, - "grad_norm": 1.33936607837677, - "learning_rate": 9.169045226130654e-05, - "loss": 5.9197, - "step": 8770 - }, - { - "epoch": 4.574185136897001, - "grad_norm": 1.5455797910690308, - "learning_rate": 9.168944723618092e-05, - "loss": 5.2322, - "step": 8771 - }, - { - "epoch": 4.574706649282921, - "grad_norm": 1.478094458580017, - "learning_rate": 9.168844221105528e-05, - "loss": 5.6736, - "step": 8772 - }, - { - "epoch": 4.575228161668839, - "grad_norm": 1.526576042175293, - "learning_rate": 9.168743718592965e-05, - "loss": 4.5858, - "step": 8773 - }, - { - "epoch": 4.575749674054759, - "grad_norm": 1.5601142644882202, - "learning_rate": 9.168643216080403e-05, - "loss": 5.8187, - "step": 8774 - }, - { - "epoch": 4.576271186440678, - "grad_norm": 1.391241192817688, - "learning_rate": 9.168542713567839e-05, - "loss": 5.9999, - "step": 8775 - }, - { - "epoch": 4.576792698826597, - "grad_norm": 1.4582111835479736, - "learning_rate": 9.168442211055277e-05, - "loss": 5.5618, - "step": 8776 - }, - { - "epoch": 4.577314211212516, - "grad_norm": 1.4616389274597168, - "learning_rate": 9.168341708542713e-05, - "loss": 5.7089, - "step": 8777 - }, - { - "epoch": 4.577835723598436, - "grad_norm": 1.4980223178863525, - "learning_rate": 9.168241206030151e-05, - "loss": 5.3176, - "step": 8778 - }, - { - "epoch": 4.578357235984354, - "grad_norm": 1.4985878467559814, - "learning_rate": 9.168140703517589e-05, - "loss": 5.7584, - "step": 8779 - }, - { - "epoch": 4.578878748370274, - "grad_norm": 1.4382927417755127, - "learning_rate": 9.168040201005027e-05, - "loss": 5.9466, - "step": 8780 - }, - { - "epoch": 4.579400260756193, - "grad_norm": 1.5502241849899292, - "learning_rate": 9.167939698492463e-05, - "loss": 5.6022, - "step": 8781 - }, - { - "epoch": 4.579921773142112, - "grad_norm": 1.378084659576416, - "learning_rate": 9.167839195979901e-05, - "loss": 5.9144, - "step": 8782 - }, - { - "epoch": 4.580443285528031, - "grad_norm": 1.5262136459350586, - "learning_rate": 9.167738693467337e-05, - "loss": 5.6609, - "step": 8783 - }, - { - "epoch": 4.580964797913951, - "grad_norm": 1.5146045684814453, - "learning_rate": 9.167638190954775e-05, - "loss": 5.7288, - "step": 8784 - }, - { - "epoch": 4.581486310299869, - "grad_norm": 1.528562307357788, - "learning_rate": 9.167537688442211e-05, - "loss": 5.3503, - "step": 8785 - }, - { - "epoch": 4.582007822685789, - "grad_norm": 1.5825990438461304, - "learning_rate": 9.167437185929648e-05, - "loss": 5.439, - "step": 8786 - }, - { - "epoch": 4.582529335071708, - "grad_norm": 1.4602211713790894, - "learning_rate": 9.167336683417086e-05, - "loss": 5.4332, - "step": 8787 - }, - { - "epoch": 4.583050847457627, - "grad_norm": 1.5268741846084595, - "learning_rate": 9.167236180904522e-05, - "loss": 5.9975, - "step": 8788 - }, - { - "epoch": 4.583572359843546, - "grad_norm": 1.4936295747756958, - "learning_rate": 9.16713567839196e-05, - "loss": 5.3616, - "step": 8789 - }, - { - "epoch": 4.584093872229466, - "grad_norm": 1.466723084449768, - "learning_rate": 9.167035175879398e-05, - "loss": 5.3297, - "step": 8790 - }, - { - "epoch": 4.584615384615384, - "grad_norm": 1.4672462940216064, - "learning_rate": 9.166934673366835e-05, - "loss": 5.889, - "step": 8791 - }, - { - "epoch": 4.585136897001304, - "grad_norm": 1.6096230745315552, - "learning_rate": 9.166834170854272e-05, - "loss": 5.3103, - "step": 8792 - }, - { - "epoch": 4.585658409387223, - "grad_norm": 1.534489631652832, - "learning_rate": 9.16673366834171e-05, - "loss": 5.4349, - "step": 8793 - }, - { - "epoch": 4.586179921773142, - "grad_norm": 1.3953675031661987, - "learning_rate": 9.166633165829146e-05, - "loss": 5.828, - "step": 8794 - }, - { - "epoch": 4.586701434159061, - "grad_norm": 1.4405044317245483, - "learning_rate": 9.166532663316584e-05, - "loss": 5.6225, - "step": 8795 - }, - { - "epoch": 4.587222946544981, - "grad_norm": 1.6320164203643799, - "learning_rate": 9.16643216080402e-05, - "loss": 5.0623, - "step": 8796 - }, - { - "epoch": 4.587744458930899, - "grad_norm": 1.4186238050460815, - "learning_rate": 9.166331658291458e-05, - "loss": 5.7288, - "step": 8797 - }, - { - "epoch": 4.588265971316819, - "grad_norm": 1.3992280960083008, - "learning_rate": 9.166231155778894e-05, - "loss": 5.1396, - "step": 8798 - }, - { - "epoch": 4.588787483702738, - "grad_norm": 1.515467643737793, - "learning_rate": 9.166130653266332e-05, - "loss": 5.5447, - "step": 8799 - }, - { - "epoch": 4.589308996088657, - "grad_norm": 1.4290887117385864, - "learning_rate": 9.16603015075377e-05, - "loss": 5.8123, - "step": 8800 - }, - { - "epoch": 4.589830508474576, - "grad_norm": 1.3807953596115112, - "learning_rate": 9.165929648241206e-05, - "loss": 5.7783, - "step": 8801 - }, - { - "epoch": 4.590352020860496, - "grad_norm": 1.4325942993164062, - "learning_rate": 9.165829145728644e-05, - "loss": 4.8434, - "step": 8802 - }, - { - "epoch": 4.590873533246414, - "grad_norm": 1.4220232963562012, - "learning_rate": 9.16572864321608e-05, - "loss": 5.678, - "step": 8803 - }, - { - "epoch": 4.591395045632334, - "grad_norm": 1.4154174327850342, - "learning_rate": 9.165628140703518e-05, - "loss": 5.6142, - "step": 8804 - }, - { - "epoch": 4.591916558018253, - "grad_norm": 1.3722482919692993, - "learning_rate": 9.165527638190955e-05, - "loss": 5.5608, - "step": 8805 - }, - { - "epoch": 4.592438070404172, - "grad_norm": 1.2543498277664185, - "learning_rate": 9.165427135678393e-05, - "loss": 5.947, - "step": 8806 - }, - { - "epoch": 4.592959582790091, - "grad_norm": 1.3852460384368896, - "learning_rate": 9.165326633165829e-05, - "loss": 5.4052, - "step": 8807 - }, - { - "epoch": 4.593481095176011, - "grad_norm": 1.495202660560608, - "learning_rate": 9.165226130653267e-05, - "loss": 5.2904, - "step": 8808 - }, - { - "epoch": 4.594002607561929, - "grad_norm": 1.4949921369552612, - "learning_rate": 9.165125628140703e-05, - "loss": 5.9613, - "step": 8809 - }, - { - "epoch": 4.594524119947849, - "grad_norm": 1.5349218845367432, - "learning_rate": 9.165025125628141e-05, - "loss": 6.0211, - "step": 8810 - }, - { - "epoch": 4.595045632333768, - "grad_norm": 1.3602027893066406, - "learning_rate": 9.164924623115579e-05, - "loss": 5.7767, - "step": 8811 - }, - { - "epoch": 4.595567144719687, - "grad_norm": 1.56438410282135, - "learning_rate": 9.164824120603015e-05, - "loss": 5.3285, - "step": 8812 - }, - { - "epoch": 4.596088657105606, - "grad_norm": 1.4626444578170776, - "learning_rate": 9.164723618090453e-05, - "loss": 5.7047, - "step": 8813 - }, - { - "epoch": 4.596610169491526, - "grad_norm": 1.467127799987793, - "learning_rate": 9.16462311557789e-05, - "loss": 5.6033, - "step": 8814 - }, - { - "epoch": 4.597131681877444, - "grad_norm": 1.5471903085708618, - "learning_rate": 9.164522613065327e-05, - "loss": 5.9021, - "step": 8815 - }, - { - "epoch": 4.597653194263364, - "grad_norm": 1.5832263231277466, - "learning_rate": 9.164422110552764e-05, - "loss": 5.0782, - "step": 8816 - }, - { - "epoch": 4.598174706649283, - "grad_norm": 1.5199694633483887, - "learning_rate": 9.164321608040201e-05, - "loss": 5.5607, - "step": 8817 - }, - { - "epoch": 4.598696219035202, - "grad_norm": 1.6109683513641357, - "learning_rate": 9.164221105527638e-05, - "loss": 5.4597, - "step": 8818 - }, - { - "epoch": 4.599217731421121, - "grad_norm": 1.409184217453003, - "learning_rate": 9.164120603015076e-05, - "loss": 5.6409, - "step": 8819 - }, - { - "epoch": 4.599739243807041, - "grad_norm": 1.5530985593795776, - "learning_rate": 9.164020100502513e-05, - "loss": 5.8951, - "step": 8820 - }, - { - "epoch": 4.600260756192959, - "grad_norm": 1.4365237951278687, - "learning_rate": 9.163919597989951e-05, - "loss": 5.9634, - "step": 8821 - }, - { - "epoch": 4.600782268578879, - "grad_norm": 1.4714112281799316, - "learning_rate": 9.163819095477388e-05, - "loss": 5.97, - "step": 8822 - }, - { - "epoch": 4.601303780964798, - "grad_norm": 1.5788793563842773, - "learning_rate": 9.163718592964825e-05, - "loss": 5.389, - "step": 8823 - }, - { - "epoch": 4.601825293350717, - "grad_norm": 1.344529628753662, - "learning_rate": 9.163618090452262e-05, - "loss": 6.0297, - "step": 8824 - }, - { - "epoch": 4.602346805736636, - "grad_norm": 1.5211730003356934, - "learning_rate": 9.163517587939698e-05, - "loss": 5.393, - "step": 8825 - }, - { - "epoch": 4.602868318122555, - "grad_norm": 1.4628199338912964, - "learning_rate": 9.163417085427136e-05, - "loss": 5.5329, - "step": 8826 - }, - { - "epoch": 4.603389830508474, - "grad_norm": 1.4561302661895752, - "learning_rate": 9.163316582914573e-05, - "loss": 5.1572, - "step": 8827 - }, - { - "epoch": 4.603911342894394, - "grad_norm": 1.4034607410430908, - "learning_rate": 9.16321608040201e-05, - "loss": 5.8246, - "step": 8828 - }, - { - "epoch": 4.604432855280313, - "grad_norm": 1.6075559854507446, - "learning_rate": 9.163115577889447e-05, - "loss": 5.1842, - "step": 8829 - }, - { - "epoch": 4.604954367666232, - "grad_norm": 1.4692732095718384, - "learning_rate": 9.163015075376885e-05, - "loss": 5.186, - "step": 8830 - }, - { - "epoch": 4.605475880052151, - "grad_norm": 1.6216390132904053, - "learning_rate": 9.162914572864322e-05, - "loss": 5.104, - "step": 8831 - }, - { - "epoch": 4.60599739243807, - "grad_norm": 1.5397733449935913, - "learning_rate": 9.16281407035176e-05, - "loss": 5.4522, - "step": 8832 - }, - { - "epoch": 4.606518904823989, - "grad_norm": 1.5935832262039185, - "learning_rate": 9.162713567839197e-05, - "loss": 5.5685, - "step": 8833 - }, - { - "epoch": 4.607040417209909, - "grad_norm": 1.5095871686935425, - "learning_rate": 9.162613065326634e-05, - "loss": 5.6136, - "step": 8834 - }, - { - "epoch": 4.607561929595828, - "grad_norm": 1.358251929283142, - "learning_rate": 9.162512562814071e-05, - "loss": 5.7297, - "step": 8835 - }, - { - "epoch": 4.608083441981747, - "grad_norm": 1.468448281288147, - "learning_rate": 9.162412060301509e-05, - "loss": 5.2262, - "step": 8836 - }, - { - "epoch": 4.608604954367666, - "grad_norm": 1.5538736581802368, - "learning_rate": 9.162311557788945e-05, - "loss": 5.4248, - "step": 8837 - }, - { - "epoch": 4.609126466753585, - "grad_norm": 1.5137683153152466, - "learning_rate": 9.162211055276381e-05, - "loss": 5.505, - "step": 8838 - }, - { - "epoch": 4.609647979139504, - "grad_norm": 1.4768542051315308, - "learning_rate": 9.162110552763819e-05, - "loss": 5.9134, - "step": 8839 - }, - { - "epoch": 4.610169491525424, - "grad_norm": 1.5216604471206665, - "learning_rate": 9.162010050251256e-05, - "loss": 4.7665, - "step": 8840 - }, - { - "epoch": 4.610691003911343, - "grad_norm": 1.5291028022766113, - "learning_rate": 9.161909547738693e-05, - "loss": 5.8653, - "step": 8841 - }, - { - "epoch": 4.611212516297262, - "grad_norm": 1.5087006092071533, - "learning_rate": 9.161809045226131e-05, - "loss": 5.7902, - "step": 8842 - }, - { - "epoch": 4.611734028683181, - "grad_norm": 1.3554452657699585, - "learning_rate": 9.161708542713569e-05, - "loss": 5.9592, - "step": 8843 - }, - { - "epoch": 4.6122555410691, - "grad_norm": 1.570695161819458, - "learning_rate": 9.161608040201005e-05, - "loss": 5.5813, - "step": 8844 - }, - { - "epoch": 4.612777053455019, - "grad_norm": 1.385163426399231, - "learning_rate": 9.161507537688443e-05, - "loss": 5.9122, - "step": 8845 - }, - { - "epoch": 4.613298565840939, - "grad_norm": 1.5171250104904175, - "learning_rate": 9.16140703517588e-05, - "loss": 4.9443, - "step": 8846 - }, - { - "epoch": 4.613820078226858, - "grad_norm": 1.4526631832122803, - "learning_rate": 9.161306532663317e-05, - "loss": 5.8144, - "step": 8847 - }, - { - "epoch": 4.614341590612777, - "grad_norm": 1.7378019094467163, - "learning_rate": 9.161206030150754e-05, - "loss": 5.1773, - "step": 8848 - }, - { - "epoch": 4.614863102998696, - "grad_norm": 1.5640228986740112, - "learning_rate": 9.161105527638192e-05, - "loss": 4.9637, - "step": 8849 - }, - { - "epoch": 4.615384615384615, - "grad_norm": 1.5026824474334717, - "learning_rate": 9.161005025125628e-05, - "loss": 5.8132, - "step": 8850 - }, - { - "epoch": 4.615906127770534, - "grad_norm": 1.4358112812042236, - "learning_rate": 9.160904522613066e-05, - "loss": 5.5209, - "step": 8851 - }, - { - "epoch": 4.616427640156454, - "grad_norm": 1.3528293371200562, - "learning_rate": 9.160804020100504e-05, - "loss": 6.2289, - "step": 8852 - }, - { - "epoch": 4.616949152542373, - "grad_norm": 1.5077273845672607, - "learning_rate": 9.16070351758794e-05, - "loss": 5.4336, - "step": 8853 - }, - { - "epoch": 4.617470664928292, - "grad_norm": 1.4363735914230347, - "learning_rate": 9.160603015075378e-05, - "loss": 5.7049, - "step": 8854 - }, - { - "epoch": 4.617992177314211, - "grad_norm": 1.5072983503341675, - "learning_rate": 9.160502512562814e-05, - "loss": 5.6564, - "step": 8855 - }, - { - "epoch": 4.61851368970013, - "grad_norm": 1.529957890510559, - "learning_rate": 9.160402010050252e-05, - "loss": 5.6295, - "step": 8856 - }, - { - "epoch": 4.6190352020860495, - "grad_norm": 1.4247279167175293, - "learning_rate": 9.160301507537688e-05, - "loss": 4.949, - "step": 8857 - }, - { - "epoch": 4.619556714471969, - "grad_norm": 1.7308032512664795, - "learning_rate": 9.160201005025126e-05, - "loss": 5.3786, - "step": 8858 - }, - { - "epoch": 4.6200782268578875, - "grad_norm": 1.5831295251846313, - "learning_rate": 9.160100502512563e-05, - "loss": 5.139, - "step": 8859 - }, - { - "epoch": 4.620599739243807, - "grad_norm": 1.5297378301620483, - "learning_rate": 9.16e-05, - "loss": 5.9431, - "step": 8860 - }, - { - "epoch": 4.621121251629726, - "grad_norm": 1.801652193069458, - "learning_rate": 9.159899497487437e-05, - "loss": 5.2146, - "step": 8861 - }, - { - "epoch": 4.621642764015645, - "grad_norm": 1.4471423625946045, - "learning_rate": 9.159798994974875e-05, - "loss": 5.8936, - "step": 8862 - }, - { - "epoch": 4.6221642764015645, - "grad_norm": 1.5732618570327759, - "learning_rate": 9.159698492462312e-05, - "loss": 5.3459, - "step": 8863 - }, - { - "epoch": 4.622685788787484, - "grad_norm": 1.4443753957748413, - "learning_rate": 9.15959798994975e-05, - "loss": 5.8642, - "step": 8864 - }, - { - "epoch": 4.6232073011734025, - "grad_norm": 1.3780276775360107, - "learning_rate": 9.159497487437187e-05, - "loss": 6.192, - "step": 8865 - }, - { - "epoch": 4.623728813559322, - "grad_norm": 1.328078269958496, - "learning_rate": 9.159396984924623e-05, - "loss": 6.1086, - "step": 8866 - }, - { - "epoch": 4.624250325945241, - "grad_norm": 1.5539742708206177, - "learning_rate": 9.159296482412061e-05, - "loss": 5.7669, - "step": 8867 - }, - { - "epoch": 4.62477183833116, - "grad_norm": 1.398806095123291, - "learning_rate": 9.159195979899497e-05, - "loss": 6.0357, - "step": 8868 - }, - { - "epoch": 4.6252933507170795, - "grad_norm": 1.3689343929290771, - "learning_rate": 9.159095477386935e-05, - "loss": 5.6601, - "step": 8869 - }, - { - "epoch": 4.625814863102999, - "grad_norm": 1.3714078664779663, - "learning_rate": 9.158994974874371e-05, - "loss": 5.8136, - "step": 8870 - }, - { - "epoch": 4.6263363754889175, - "grad_norm": 1.5395281314849854, - "learning_rate": 9.158894472361809e-05, - "loss": 5.3022, - "step": 8871 - }, - { - "epoch": 4.626857887874837, - "grad_norm": 1.399531364440918, - "learning_rate": 9.158793969849247e-05, - "loss": 5.8124, - "step": 8872 - }, - { - "epoch": 4.6273794002607564, - "grad_norm": 1.7446609735488892, - "learning_rate": 9.158693467336685e-05, - "loss": 5.3629, - "step": 8873 - }, - { - "epoch": 4.627900912646675, - "grad_norm": 1.4858002662658691, - "learning_rate": 9.158592964824121e-05, - "loss": 5.3155, - "step": 8874 - }, - { - "epoch": 4.6284224250325945, - "grad_norm": 1.4753323793411255, - "learning_rate": 9.158492462311559e-05, - "loss": 5.7798, - "step": 8875 - }, - { - "epoch": 4.628943937418514, - "grad_norm": 2.341142416000366, - "learning_rate": 9.158391959798995e-05, - "loss": 5.4353, - "step": 8876 - }, - { - "epoch": 4.6294654498044325, - "grad_norm": 1.5102437734603882, - "learning_rate": 9.158291457286433e-05, - "loss": 5.674, - "step": 8877 - }, - { - "epoch": 4.629986962190352, - "grad_norm": 1.4230986833572388, - "learning_rate": 9.15819095477387e-05, - "loss": 5.5732, - "step": 8878 - }, - { - "epoch": 4.6305084745762715, - "grad_norm": 1.5111114978790283, - "learning_rate": 9.158090452261306e-05, - "loss": 5.5511, - "step": 8879 - }, - { - "epoch": 4.63102998696219, - "grad_norm": 1.5336503982543945, - "learning_rate": 9.157989949748744e-05, - "loss": 5.4364, - "step": 8880 - }, - { - "epoch": 4.6315514993481095, - "grad_norm": 1.5581824779510498, - "learning_rate": 9.15788944723618e-05, - "loss": 5.2983, - "step": 8881 - }, - { - "epoch": 4.632073011734029, - "grad_norm": 1.4625818729400635, - "learning_rate": 9.157788944723618e-05, - "loss": 5.6499, - "step": 8882 - }, - { - "epoch": 4.6325945241199475, - "grad_norm": 1.4452638626098633, - "learning_rate": 9.157688442211056e-05, - "loss": 5.5408, - "step": 8883 - }, - { - "epoch": 4.633116036505867, - "grad_norm": 1.665658950805664, - "learning_rate": 9.157587939698494e-05, - "loss": 5.1822, - "step": 8884 - }, - { - "epoch": 4.6336375488917865, - "grad_norm": 1.4525249004364014, - "learning_rate": 9.15748743718593e-05, - "loss": 5.7062, - "step": 8885 - }, - { - "epoch": 4.634159061277705, - "grad_norm": 1.691056251525879, - "learning_rate": 9.157386934673368e-05, - "loss": 5.7051, - "step": 8886 - }, - { - "epoch": 4.6346805736636245, - "grad_norm": 1.7112469673156738, - "learning_rate": 9.157286432160804e-05, - "loss": 5.4843, - "step": 8887 - }, - { - "epoch": 4.635202086049544, - "grad_norm": 1.4338489770889282, - "learning_rate": 9.157185929648242e-05, - "loss": 5.8355, - "step": 8888 - }, - { - "epoch": 4.6357235984354626, - "grad_norm": 1.7948569059371948, - "learning_rate": 9.157085427135678e-05, - "loss": 4.9228, - "step": 8889 - }, - { - "epoch": 4.636245110821382, - "grad_norm": 1.440433382987976, - "learning_rate": 9.156984924623116e-05, - "loss": 5.9302, - "step": 8890 - }, - { - "epoch": 4.6367666232073015, - "grad_norm": 1.4503041505813599, - "learning_rate": 9.156884422110553e-05, - "loss": 5.5507, - "step": 8891 - }, - { - "epoch": 4.63728813559322, - "grad_norm": 1.3866554498672485, - "learning_rate": 9.15678391959799e-05, - "loss": 5.7597, - "step": 8892 - }, - { - "epoch": 4.6378096479791395, - "grad_norm": 1.3843916654586792, - "learning_rate": 9.156683417085428e-05, - "loss": 6.0364, - "step": 8893 - }, - { - "epoch": 4.638331160365059, - "grad_norm": 1.2612018585205078, - "learning_rate": 9.156582914572865e-05, - "loss": 5.4888, - "step": 8894 - }, - { - "epoch": 4.638852672750978, - "grad_norm": 1.5215156078338623, - "learning_rate": 9.156482412060302e-05, - "loss": 5.6918, - "step": 8895 - }, - { - "epoch": 4.639374185136897, - "grad_norm": 1.3958278894424438, - "learning_rate": 9.156381909547739e-05, - "loss": 5.8621, - "step": 8896 - }, - { - "epoch": 4.6398956975228165, - "grad_norm": 1.4099003076553345, - "learning_rate": 9.156281407035177e-05, - "loss": 5.8216, - "step": 8897 - }, - { - "epoch": 4.640417209908735, - "grad_norm": 1.3947296142578125, - "learning_rate": 9.156180904522613e-05, - "loss": 5.4328, - "step": 8898 - }, - { - "epoch": 4.6409387222946545, - "grad_norm": 1.5159941911697388, - "learning_rate": 9.156080402010051e-05, - "loss": 5.9689, - "step": 8899 - }, - { - "epoch": 4.641460234680574, - "grad_norm": 1.307809829711914, - "learning_rate": 9.155979899497487e-05, - "loss": 4.9593, - "step": 8900 - }, - { - "epoch": 4.641981747066493, - "grad_norm": 1.4676752090454102, - "learning_rate": 9.155879396984925e-05, - "loss": 5.7988, - "step": 8901 - }, - { - "epoch": 4.642503259452412, - "grad_norm": 1.4932525157928467, - "learning_rate": 9.155778894472362e-05, - "loss": 5.8482, - "step": 8902 - }, - { - "epoch": 4.6430247718383315, - "grad_norm": 1.4859497547149658, - "learning_rate": 9.155678391959799e-05, - "loss": 5.5615, - "step": 8903 - }, - { - "epoch": 4.64354628422425, - "grad_norm": 1.3487786054611206, - "learning_rate": 9.155577889447237e-05, - "loss": 5.7663, - "step": 8904 - }, - { - "epoch": 4.6440677966101696, - "grad_norm": 1.3294057846069336, - "learning_rate": 9.155477386934674e-05, - "loss": 5.8078, - "step": 8905 - }, - { - "epoch": 4.644589308996089, - "grad_norm": 1.4576622247695923, - "learning_rate": 9.155376884422111e-05, - "loss": 5.7387, - "step": 8906 - }, - { - "epoch": 4.645110821382008, - "grad_norm": 1.5576660633087158, - "learning_rate": 9.155276381909548e-05, - "loss": 5.4381, - "step": 8907 - }, - { - "epoch": 4.645632333767927, - "grad_norm": 2.2306060791015625, - "learning_rate": 9.155175879396986e-05, - "loss": 5.074, - "step": 8908 - }, - { - "epoch": 4.6461538461538465, - "grad_norm": 1.3704370260238647, - "learning_rate": 9.155075376884422e-05, - "loss": 6.1558, - "step": 8909 - }, - { - "epoch": 4.646675358539765, - "grad_norm": 1.4179614782333374, - "learning_rate": 9.15497487437186e-05, - "loss": 5.7717, - "step": 8910 - }, - { - "epoch": 4.647196870925685, - "grad_norm": 1.6250797510147095, - "learning_rate": 9.154874371859296e-05, - "loss": 5.2835, - "step": 8911 - }, - { - "epoch": 4.647718383311604, - "grad_norm": 1.549228310585022, - "learning_rate": 9.154773869346734e-05, - "loss": 5.6355, - "step": 8912 - }, - { - "epoch": 4.648239895697523, - "grad_norm": 1.3285315036773682, - "learning_rate": 9.154673366834172e-05, - "loss": 5.9905, - "step": 8913 - }, - { - "epoch": 4.648761408083442, - "grad_norm": 1.4692710638046265, - "learning_rate": 9.15457286432161e-05, - "loss": 5.7967, - "step": 8914 - }, - { - "epoch": 4.6492829204693615, - "grad_norm": 1.5902204513549805, - "learning_rate": 9.154472361809046e-05, - "loss": 5.9225, - "step": 8915 - }, - { - "epoch": 4.64980443285528, - "grad_norm": 1.3674863576889038, - "learning_rate": 9.154371859296484e-05, - "loss": 5.8373, - "step": 8916 - }, - { - "epoch": 4.6503259452412, - "grad_norm": 1.371355414390564, - "learning_rate": 9.15427135678392e-05, - "loss": 5.7728, - "step": 8917 - }, - { - "epoch": 4.650847457627119, - "grad_norm": 1.4012854099273682, - "learning_rate": 9.154170854271357e-05, - "loss": 5.7148, - "step": 8918 - }, - { - "epoch": 4.651368970013038, - "grad_norm": 1.3569447994232178, - "learning_rate": 9.154070351758794e-05, - "loss": 4.7361, - "step": 8919 - }, - { - "epoch": 4.651890482398957, - "grad_norm": 1.3645433187484741, - "learning_rate": 9.153969849246231e-05, - "loss": 5.7414, - "step": 8920 - }, - { - "epoch": 4.652411994784876, - "grad_norm": 1.3499059677124023, - "learning_rate": 9.153869346733669e-05, - "loss": 5.7975, - "step": 8921 - }, - { - "epoch": 4.652933507170795, - "grad_norm": 1.4371075630187988, - "learning_rate": 9.153768844221105e-05, - "loss": 5.6486, - "step": 8922 - }, - { - "epoch": 4.653455019556715, - "grad_norm": 1.3871843814849854, - "learning_rate": 9.153668341708543e-05, - "loss": 5.5311, - "step": 8923 - }, - { - "epoch": 4.653976531942634, - "grad_norm": 1.6170580387115479, - "learning_rate": 9.15356783919598e-05, - "loss": 5.63, - "step": 8924 - }, - { - "epoch": 4.654498044328553, - "grad_norm": 1.6660054922103882, - "learning_rate": 9.153467336683418e-05, - "loss": 5.4734, - "step": 8925 - }, - { - "epoch": 4.655019556714472, - "grad_norm": 1.3537355661392212, - "learning_rate": 9.153366834170855e-05, - "loss": 5.8816, - "step": 8926 - }, - { - "epoch": 4.655541069100391, - "grad_norm": 1.499375581741333, - "learning_rate": 9.153266331658293e-05, - "loss": 5.3875, - "step": 8927 - }, - { - "epoch": 4.65606258148631, - "grad_norm": 1.3887629508972168, - "learning_rate": 9.153165829145729e-05, - "loss": 5.9481, - "step": 8928 - }, - { - "epoch": 4.65658409387223, - "grad_norm": 1.4425872564315796, - "learning_rate": 9.153065326633167e-05, - "loss": 5.814, - "step": 8929 - }, - { - "epoch": 4.657105606258149, - "grad_norm": 1.4644118547439575, - "learning_rate": 9.152964824120603e-05, - "loss": 5.7178, - "step": 8930 - }, - { - "epoch": 4.657627118644068, - "grad_norm": 1.4849790334701538, - "learning_rate": 9.152864321608041e-05, - "loss": 5.439, - "step": 8931 - }, - { - "epoch": 4.658148631029987, - "grad_norm": 1.6091728210449219, - "learning_rate": 9.152763819095477e-05, - "loss": 5.6283, - "step": 8932 - }, - { - "epoch": 4.658670143415906, - "grad_norm": 1.3358161449432373, - "learning_rate": 9.152663316582915e-05, - "loss": 5.3685, - "step": 8933 - }, - { - "epoch": 4.659191655801825, - "grad_norm": 1.5269153118133545, - "learning_rate": 9.152562814070353e-05, - "loss": 5.755, - "step": 8934 - }, - { - "epoch": 4.659713168187745, - "grad_norm": 1.4879975318908691, - "learning_rate": 9.15246231155779e-05, - "loss": 5.5222, - "step": 8935 - }, - { - "epoch": 4.660234680573664, - "grad_norm": 1.4070513248443604, - "learning_rate": 9.152361809045227e-05, - "loss": 5.5516, - "step": 8936 - }, - { - "epoch": 4.660756192959583, - "grad_norm": 1.6090805530548096, - "learning_rate": 9.152261306532664e-05, - "loss": 5.4754, - "step": 8937 - }, - { - "epoch": 4.661277705345502, - "grad_norm": 1.5029935836791992, - "learning_rate": 9.152160804020101e-05, - "loss": 5.9625, - "step": 8938 - }, - { - "epoch": 4.661799217731421, - "grad_norm": 1.3279627561569214, - "learning_rate": 9.152060301507538e-05, - "loss": 5.874, - "step": 8939 - }, - { - "epoch": 4.66232073011734, - "grad_norm": 1.3563531637191772, - "learning_rate": 9.151959798994976e-05, - "loss": 5.6903, - "step": 8940 - }, - { - "epoch": 4.66284224250326, - "grad_norm": 1.4384465217590332, - "learning_rate": 9.151859296482412e-05, - "loss": 5.433, - "step": 8941 - }, - { - "epoch": 4.663363754889179, - "grad_norm": 1.5777437686920166, - "learning_rate": 9.15175879396985e-05, - "loss": 4.9233, - "step": 8942 - }, - { - "epoch": 4.663885267275098, - "grad_norm": 1.427125334739685, - "learning_rate": 9.151658291457286e-05, - "loss": 5.771, - "step": 8943 - }, - { - "epoch": 4.664406779661017, - "grad_norm": 1.5229756832122803, - "learning_rate": 9.151557788944724e-05, - "loss": 5.6888, - "step": 8944 - }, - { - "epoch": 4.664928292046936, - "grad_norm": 1.436295986175537, - "learning_rate": 9.151457286432162e-05, - "loss": 5.5929, - "step": 8945 - }, - { - "epoch": 4.665449804432855, - "grad_norm": 1.527327060699463, - "learning_rate": 9.151356783919598e-05, - "loss": 5.4407, - "step": 8946 - }, - { - "epoch": 4.665971316818775, - "grad_norm": 1.5620461702346802, - "learning_rate": 9.151256281407036e-05, - "loss": 5.3203, - "step": 8947 - }, - { - "epoch": 4.666492829204694, - "grad_norm": 1.583417534828186, - "learning_rate": 9.151155778894472e-05, - "loss": 5.4058, - "step": 8948 - }, - { - "epoch": 4.667014341590613, - "grad_norm": 1.3752965927124023, - "learning_rate": 9.15105527638191e-05, - "loss": 5.8548, - "step": 8949 - }, - { - "epoch": 4.667535853976532, - "grad_norm": 1.4853248596191406, - "learning_rate": 9.150954773869347e-05, - "loss": 5.1245, - "step": 8950 - }, - { - "epoch": 4.668057366362451, - "grad_norm": 3.1839616298675537, - "learning_rate": 9.150854271356784e-05, - "loss": 5.4135, - "step": 8951 - }, - { - "epoch": 4.66857887874837, - "grad_norm": 1.5835926532745361, - "learning_rate": 9.150753768844221e-05, - "loss": 5.5596, - "step": 8952 - }, - { - "epoch": 4.66910039113429, - "grad_norm": 1.4251145124435425, - "learning_rate": 9.150653266331659e-05, - "loss": 5.7411, - "step": 8953 - }, - { - "epoch": 4.669621903520208, - "grad_norm": 1.4747676849365234, - "learning_rate": 9.150552763819096e-05, - "loss": 5.7814, - "step": 8954 - }, - { - "epoch": 4.670143415906128, - "grad_norm": 1.3662261962890625, - "learning_rate": 9.150452261306534e-05, - "loss": 5.9397, - "step": 8955 - }, - { - "epoch": 4.670664928292047, - "grad_norm": 1.4662872552871704, - "learning_rate": 9.15035175879397e-05, - "loss": 5.4231, - "step": 8956 - }, - { - "epoch": 4.671186440677966, - "grad_norm": 1.602906346321106, - "learning_rate": 9.150251256281408e-05, - "loss": 5.4193, - "step": 8957 - }, - { - "epoch": 4.671707953063885, - "grad_norm": 1.4468090534210205, - "learning_rate": 9.150150753768845e-05, - "loss": 5.835, - "step": 8958 - }, - { - "epoch": 4.672229465449805, - "grad_norm": 1.4304887056350708, - "learning_rate": 9.150050251256281e-05, - "loss": 5.8972, - "step": 8959 - }, - { - "epoch": 4.672750977835723, - "grad_norm": 1.3986389636993408, - "learning_rate": 9.149949748743719e-05, - "loss": 5.7505, - "step": 8960 - }, - { - "epoch": 4.673272490221643, - "grad_norm": 1.431880235671997, - "learning_rate": 9.149849246231155e-05, - "loss": 5.8482, - "step": 8961 - }, - { - "epoch": 4.673794002607562, - "grad_norm": 1.5084764957427979, - "learning_rate": 9.149748743718593e-05, - "loss": 5.967, - "step": 8962 - }, - { - "epoch": 4.674315514993481, - "grad_norm": 1.564209222793579, - "learning_rate": 9.14964824120603e-05, - "loss": 5.4923, - "step": 8963 - }, - { - "epoch": 4.6748370273794, - "grad_norm": 1.5230516195297241, - "learning_rate": 9.149547738693467e-05, - "loss": 5.8102, - "step": 8964 - }, - { - "epoch": 4.67535853976532, - "grad_norm": 1.5132273435592651, - "learning_rate": 9.149447236180905e-05, - "loss": 5.8582, - "step": 8965 - }, - { - "epoch": 4.675880052151238, - "grad_norm": 1.4988868236541748, - "learning_rate": 9.149346733668343e-05, - "loss": 5.5347, - "step": 8966 - }, - { - "epoch": 4.676401564537158, - "grad_norm": 1.3807342052459717, - "learning_rate": 9.14924623115578e-05, - "loss": 5.5683, - "step": 8967 - }, - { - "epoch": 4.676923076923077, - "grad_norm": 1.4003046751022339, - "learning_rate": 9.149145728643217e-05, - "loss": 5.6404, - "step": 8968 - }, - { - "epoch": 4.677444589308996, - "grad_norm": 1.3544319868087769, - "learning_rate": 9.149045226130654e-05, - "loss": 5.7221, - "step": 8969 - }, - { - "epoch": 4.677966101694915, - "grad_norm": 1.4137836694717407, - "learning_rate": 9.148944723618091e-05, - "loss": 5.3125, - "step": 8970 - }, - { - "epoch": 4.678487614080835, - "grad_norm": 1.5621602535247803, - "learning_rate": 9.148844221105528e-05, - "loss": 4.9161, - "step": 8971 - }, - { - "epoch": 4.679009126466753, - "grad_norm": 1.4041999578475952, - "learning_rate": 9.148743718592964e-05, - "loss": 5.8509, - "step": 8972 - }, - { - "epoch": 4.679530638852673, - "grad_norm": 1.4235516786575317, - "learning_rate": 9.148643216080402e-05, - "loss": 5.8531, - "step": 8973 - }, - { - "epoch": 4.680052151238592, - "grad_norm": 1.737185001373291, - "learning_rate": 9.14854271356784e-05, - "loss": 5.2012, - "step": 8974 - }, - { - "epoch": 4.680573663624511, - "grad_norm": 1.4435920715332031, - "learning_rate": 9.148442211055278e-05, - "loss": 5.5608, - "step": 8975 - }, - { - "epoch": 4.68109517601043, - "grad_norm": 1.4815598726272583, - "learning_rate": 9.148341708542714e-05, - "loss": 5.8218, - "step": 8976 - }, - { - "epoch": 4.68161668839635, - "grad_norm": 1.5097143650054932, - "learning_rate": 9.148241206030152e-05, - "loss": 5.7905, - "step": 8977 - }, - { - "epoch": 4.682138200782268, - "grad_norm": 1.5182691812515259, - "learning_rate": 9.148140703517588e-05, - "loss": 5.3923, - "step": 8978 - }, - { - "epoch": 4.682659713168188, - "grad_norm": 1.4539783000946045, - "learning_rate": 9.148040201005026e-05, - "loss": 5.9627, - "step": 8979 - }, - { - "epoch": 4.683181225554107, - "grad_norm": 1.6538606882095337, - "learning_rate": 9.147939698492462e-05, - "loss": 5.5894, - "step": 8980 - }, - { - "epoch": 4.683702737940026, - "grad_norm": 1.4775079488754272, - "learning_rate": 9.1478391959799e-05, - "loss": 5.809, - "step": 8981 - }, - { - "epoch": 4.684224250325945, - "grad_norm": 1.5564122200012207, - "learning_rate": 9.147738693467337e-05, - "loss": 5.5094, - "step": 8982 - }, - { - "epoch": 4.684745762711865, - "grad_norm": 1.4807054996490479, - "learning_rate": 9.147638190954774e-05, - "loss": 5.699, - "step": 8983 - }, - { - "epoch": 4.685267275097783, - "grad_norm": 1.3894282579421997, - "learning_rate": 9.147537688442211e-05, - "loss": 5.5643, - "step": 8984 - }, - { - "epoch": 4.685788787483703, - "grad_norm": 1.5078575611114502, - "learning_rate": 9.147437185929649e-05, - "loss": 5.6954, - "step": 8985 - }, - { - "epoch": 4.686310299869622, - "grad_norm": 1.5001779794692993, - "learning_rate": 9.147336683417086e-05, - "loss": 5.7835, - "step": 8986 - }, - { - "epoch": 4.686831812255541, - "grad_norm": 1.5259603261947632, - "learning_rate": 9.147236180904523e-05, - "loss": 5.5554, - "step": 8987 - }, - { - "epoch": 4.68735332464146, - "grad_norm": 1.6071741580963135, - "learning_rate": 9.147135678391961e-05, - "loss": 5.2273, - "step": 8988 - }, - { - "epoch": 4.68787483702738, - "grad_norm": 1.359210729598999, - "learning_rate": 9.147035175879397e-05, - "loss": 5.1423, - "step": 8989 - }, - { - "epoch": 4.688396349413298, - "grad_norm": 1.4546308517456055, - "learning_rate": 9.146934673366835e-05, - "loss": 5.9823, - "step": 8990 - }, - { - "epoch": 4.688917861799218, - "grad_norm": 1.4491236209869385, - "learning_rate": 9.146834170854271e-05, - "loss": 6.1331, - "step": 8991 - }, - { - "epoch": 4.689439374185137, - "grad_norm": 1.6614105701446533, - "learning_rate": 9.146733668341709e-05, - "loss": 5.7276, - "step": 8992 - }, - { - "epoch": 4.689960886571056, - "grad_norm": 1.532353401184082, - "learning_rate": 9.146633165829146e-05, - "loss": 5.66, - "step": 8993 - }, - { - "epoch": 4.690482398956975, - "grad_norm": 1.4076015949249268, - "learning_rate": 9.146532663316583e-05, - "loss": 5.6651, - "step": 8994 - }, - { - "epoch": 4.691003911342895, - "grad_norm": 1.4339251518249512, - "learning_rate": 9.146432160804021e-05, - "loss": 6.0726, - "step": 8995 - }, - { - "epoch": 4.691525423728813, - "grad_norm": 1.4163329601287842, - "learning_rate": 9.146331658291459e-05, - "loss": 5.8693, - "step": 8996 - }, - { - "epoch": 4.692046936114733, - "grad_norm": 1.3567012548446655, - "learning_rate": 9.146231155778895e-05, - "loss": 6.0816, - "step": 8997 - }, - { - "epoch": 4.692568448500652, - "grad_norm": 1.3494099378585815, - "learning_rate": 9.146130653266332e-05, - "loss": 5.7715, - "step": 8998 - }, - { - "epoch": 4.693089960886571, - "grad_norm": 1.460433840751648, - "learning_rate": 9.14603015075377e-05, - "loss": 5.6225, - "step": 8999 - }, - { - "epoch": 4.69361147327249, - "grad_norm": 1.6116814613342285, - "learning_rate": 9.145929648241206e-05, - "loss": 5.0279, - "step": 9000 - }, - { - "epoch": 4.69413298565841, - "grad_norm": 1.3569228649139404, - "learning_rate": 9.145829145728644e-05, - "loss": 5.827, - "step": 9001 - }, - { - "epoch": 4.694654498044328, - "grad_norm": 1.4466495513916016, - "learning_rate": 9.14572864321608e-05, - "loss": 5.5039, - "step": 9002 - }, - { - "epoch": 4.695176010430248, - "grad_norm": 1.4493533372879028, - "learning_rate": 9.145628140703518e-05, - "loss": 5.5406, - "step": 9003 - }, - { - "epoch": 4.695697522816167, - "grad_norm": 1.4221937656402588, - "learning_rate": 9.145527638190954e-05, - "loss": 6.0543, - "step": 9004 - }, - { - "epoch": 4.696219035202086, - "grad_norm": 1.4373024702072144, - "learning_rate": 9.145427135678392e-05, - "loss": 5.785, - "step": 9005 - }, - { - "epoch": 4.696740547588005, - "grad_norm": 1.7649263143539429, - "learning_rate": 9.14532663316583e-05, - "loss": 5.2041, - "step": 9006 - }, - { - "epoch": 4.697262059973925, - "grad_norm": 1.4852936267852783, - "learning_rate": 9.145226130653268e-05, - "loss": 5.6601, - "step": 9007 - }, - { - "epoch": 4.697783572359843, - "grad_norm": 1.399396538734436, - "learning_rate": 9.145125628140704e-05, - "loss": 5.5815, - "step": 9008 - }, - { - "epoch": 4.698305084745763, - "grad_norm": 1.3456224203109741, - "learning_rate": 9.145025125628142e-05, - "loss": 6.0985, - "step": 9009 - }, - { - "epoch": 4.698826597131681, - "grad_norm": 1.3736693859100342, - "learning_rate": 9.144924623115578e-05, - "loss": 5.9269, - "step": 9010 - }, - { - "epoch": 4.699348109517601, - "grad_norm": 1.3954074382781982, - "learning_rate": 9.144824120603015e-05, - "loss": 5.5458, - "step": 9011 - }, - { - "epoch": 4.69986962190352, - "grad_norm": 1.4896233081817627, - "learning_rate": 9.144723618090453e-05, - "loss": 5.7335, - "step": 9012 - }, - { - "epoch": 4.70039113428944, - "grad_norm": 1.7238097190856934, - "learning_rate": 9.144623115577889e-05, - "loss": 4.7773, - "step": 9013 - }, - { - "epoch": 4.700912646675358, - "grad_norm": 1.5320903062820435, - "learning_rate": 9.144522613065327e-05, - "loss": 5.4113, - "step": 9014 - }, - { - "epoch": 4.701434159061278, - "grad_norm": 1.4311516284942627, - "learning_rate": 9.144422110552763e-05, - "loss": 5.8728, - "step": 9015 - }, - { - "epoch": 4.701955671447196, - "grad_norm": 1.440740704536438, - "learning_rate": 9.144321608040201e-05, - "loss": 5.4243, - "step": 9016 - }, - { - "epoch": 4.702477183833116, - "grad_norm": 1.3550705909729004, - "learning_rate": 9.144221105527639e-05, - "loss": 5.9962, - "step": 9017 - }, - { - "epoch": 4.702998696219035, - "grad_norm": 1.640198826789856, - "learning_rate": 9.144120603015077e-05, - "loss": 5.8194, - "step": 9018 - }, - { - "epoch": 4.703520208604955, - "grad_norm": 1.4457640647888184, - "learning_rate": 9.144020100502513e-05, - "loss": 5.6678, - "step": 9019 - }, - { - "epoch": 4.704041720990873, - "grad_norm": 1.4302650690078735, - "learning_rate": 9.143919597989951e-05, - "loss": 5.6335, - "step": 9020 - }, - { - "epoch": 4.704563233376793, - "grad_norm": 1.5368174314498901, - "learning_rate": 9.143819095477387e-05, - "loss": 5.6934, - "step": 9021 - }, - { - "epoch": 4.705084745762711, - "grad_norm": 1.495213270187378, - "learning_rate": 9.143718592964825e-05, - "loss": 5.6112, - "step": 9022 - }, - { - "epoch": 4.705606258148631, - "grad_norm": 1.5291413068771362, - "learning_rate": 9.143618090452261e-05, - "loss": 5.541, - "step": 9023 - }, - { - "epoch": 4.70612777053455, - "grad_norm": 1.4981071949005127, - "learning_rate": 9.143517587939699e-05, - "loss": 5.6375, - "step": 9024 - }, - { - "epoch": 4.70664928292047, - "grad_norm": 1.413459062576294, - "learning_rate": 9.143417085427136e-05, - "loss": 5.6203, - "step": 9025 - }, - { - "epoch": 4.707170795306388, - "grad_norm": 1.4102391004562378, - "learning_rate": 9.143316582914573e-05, - "loss": 6.1645, - "step": 9026 - }, - { - "epoch": 4.707692307692308, - "grad_norm": 1.6332954168319702, - "learning_rate": 9.143216080402011e-05, - "loss": 5.2036, - "step": 9027 - }, - { - "epoch": 4.708213820078226, - "grad_norm": 1.3279831409454346, - "learning_rate": 9.143115577889448e-05, - "loss": 5.8759, - "step": 9028 - }, - { - "epoch": 4.708735332464146, - "grad_norm": 1.4790695905685425, - "learning_rate": 9.143015075376885e-05, - "loss": 5.8154, - "step": 9029 - }, - { - "epoch": 4.709256844850065, - "grad_norm": 1.5416569709777832, - "learning_rate": 9.142914572864322e-05, - "loss": 5.9505, - "step": 9030 - }, - { - "epoch": 4.709778357235985, - "grad_norm": 1.4662423133850098, - "learning_rate": 9.14281407035176e-05, - "loss": 5.7634, - "step": 9031 - }, - { - "epoch": 4.710299869621903, - "grad_norm": 1.4871242046356201, - "learning_rate": 9.142713567839196e-05, - "loss": 5.627, - "step": 9032 - }, - { - "epoch": 4.710821382007823, - "grad_norm": 2.0842692852020264, - "learning_rate": 9.142613065326634e-05, - "loss": 4.6802, - "step": 9033 - }, - { - "epoch": 4.711342894393741, - "grad_norm": 1.5658385753631592, - "learning_rate": 9.14251256281407e-05, - "loss": 5.7306, - "step": 9034 - }, - { - "epoch": 4.711864406779661, - "grad_norm": 1.3943983316421509, - "learning_rate": 9.142412060301508e-05, - "loss": 5.9888, - "step": 9035 - }, - { - "epoch": 4.71238591916558, - "grad_norm": 1.3971564769744873, - "learning_rate": 9.142311557788944e-05, - "loss": 5.5462, - "step": 9036 - }, - { - "epoch": 4.7129074315515, - "grad_norm": 1.4627493619918823, - "learning_rate": 9.142211055276382e-05, - "loss": 4.9882, - "step": 9037 - }, - { - "epoch": 4.713428943937418, - "grad_norm": 1.451422095298767, - "learning_rate": 9.14211055276382e-05, - "loss": 5.7891, - "step": 9038 - }, - { - "epoch": 4.713950456323338, - "grad_norm": 1.4828685522079468, - "learning_rate": 9.142010050251256e-05, - "loss": 6.0228, - "step": 9039 - }, - { - "epoch": 4.7144719687092564, - "grad_norm": 1.482391595840454, - "learning_rate": 9.141909547738694e-05, - "loss": 5.9859, - "step": 9040 - }, - { - "epoch": 4.714993481095176, - "grad_norm": 1.4323064088821411, - "learning_rate": 9.14180904522613e-05, - "loss": 6.0346, - "step": 9041 - }, - { - "epoch": 4.715514993481095, - "grad_norm": 1.3666317462921143, - "learning_rate": 9.141708542713568e-05, - "loss": 6.1018, - "step": 9042 - }, - { - "epoch": 4.716036505867015, - "grad_norm": 1.6140416860580444, - "learning_rate": 9.141608040201005e-05, - "loss": 5.1783, - "step": 9043 - }, - { - "epoch": 4.716558018252933, - "grad_norm": 1.3982648849487305, - "learning_rate": 9.141507537688443e-05, - "loss": 5.5526, - "step": 9044 - }, - { - "epoch": 4.717079530638853, - "grad_norm": 1.3734230995178223, - "learning_rate": 9.141407035175879e-05, - "loss": 5.9702, - "step": 9045 - }, - { - "epoch": 4.7176010430247715, - "grad_norm": 1.3606815338134766, - "learning_rate": 9.141306532663317e-05, - "loss": 5.7344, - "step": 9046 - }, - { - "epoch": 4.718122555410691, - "grad_norm": 1.4362610578536987, - "learning_rate": 9.141206030150755e-05, - "loss": 5.844, - "step": 9047 - }, - { - "epoch": 4.71864406779661, - "grad_norm": 1.369992971420288, - "learning_rate": 9.141105527638192e-05, - "loss": 6.0398, - "step": 9048 - }, - { - "epoch": 4.719165580182529, - "grad_norm": 1.417832374572754, - "learning_rate": 9.141005025125629e-05, - "loss": 5.8162, - "step": 9049 - }, - { - "epoch": 4.719687092568448, - "grad_norm": 1.4204243421554565, - "learning_rate": 9.140904522613067e-05, - "loss": 5.6441, - "step": 9050 - }, - { - "epoch": 4.720208604954368, - "grad_norm": 1.4764131307601929, - "learning_rate": 9.140804020100503e-05, - "loss": 5.8753, - "step": 9051 - }, - { - "epoch": 4.7207301173402865, - "grad_norm": 1.651686429977417, - "learning_rate": 9.14070351758794e-05, - "loss": 5.5999, - "step": 9052 - }, - { - "epoch": 4.721251629726206, - "grad_norm": 1.3869261741638184, - "learning_rate": 9.140603015075377e-05, - "loss": 5.748, - "step": 9053 - }, - { - "epoch": 4.721773142112125, - "grad_norm": 1.6321806907653809, - "learning_rate": 9.140502512562814e-05, - "loss": 5.4402, - "step": 9054 - }, - { - "epoch": 4.722294654498044, - "grad_norm": 1.4111485481262207, - "learning_rate": 9.140402010050251e-05, - "loss": 5.9797, - "step": 9055 - }, - { - "epoch": 4.722816166883963, - "grad_norm": 1.4663439989089966, - "learning_rate": 9.140301507537688e-05, - "loss": 5.4553, - "step": 9056 - }, - { - "epoch": 4.723337679269883, - "grad_norm": 1.4906105995178223, - "learning_rate": 9.140201005025126e-05, - "loss": 5.7151, - "step": 9057 - }, - { - "epoch": 4.7238591916558015, - "grad_norm": 1.602861762046814, - "learning_rate": 9.140100502512563e-05, - "loss": 5.2296, - "step": 9058 - }, - { - "epoch": 4.724380704041721, - "grad_norm": 1.5276763439178467, - "learning_rate": 9.140000000000001e-05, - "loss": 5.2685, - "step": 9059 - }, - { - "epoch": 4.72490221642764, - "grad_norm": 1.4650245904922485, - "learning_rate": 9.139899497487438e-05, - "loss": 5.307, - "step": 9060 - }, - { - "epoch": 4.725423728813559, - "grad_norm": 1.5386643409729004, - "learning_rate": 9.139798994974875e-05, - "loss": 5.2643, - "step": 9061 - }, - { - "epoch": 4.7259452411994785, - "grad_norm": 1.345570683479309, - "learning_rate": 9.139698492462312e-05, - "loss": 5.6409, - "step": 9062 - }, - { - "epoch": 4.726466753585398, - "grad_norm": 1.512941837310791, - "learning_rate": 9.13959798994975e-05, - "loss": 5.6289, - "step": 9063 - }, - { - "epoch": 4.7269882659713165, - "grad_norm": 1.60698664188385, - "learning_rate": 9.139497487437186e-05, - "loss": 5.1632, - "step": 9064 - }, - { - "epoch": 4.727509778357236, - "grad_norm": 1.3922162055969238, - "learning_rate": 9.139396984924623e-05, - "loss": 5.5216, - "step": 9065 - }, - { - "epoch": 4.728031290743155, - "grad_norm": 1.6042455434799194, - "learning_rate": 9.13929648241206e-05, - "loss": 5.5157, - "step": 9066 - }, - { - "epoch": 4.728552803129074, - "grad_norm": 1.4712001085281372, - "learning_rate": 9.139195979899498e-05, - "loss": 5.5428, - "step": 9067 - }, - { - "epoch": 4.7290743155149935, - "grad_norm": 1.8451874256134033, - "learning_rate": 9.139095477386936e-05, - "loss": 5.4963, - "step": 9068 - }, - { - "epoch": 4.729595827900913, - "grad_norm": 1.72721266746521, - "learning_rate": 9.138994974874372e-05, - "loss": 5.032, - "step": 9069 - }, - { - "epoch": 4.7301173402868315, - "grad_norm": 1.446954369544983, - "learning_rate": 9.13889447236181e-05, - "loss": 5.8181, - "step": 9070 - }, - { - "epoch": 4.730638852672751, - "grad_norm": 1.4077601432800293, - "learning_rate": 9.138793969849247e-05, - "loss": 5.7875, - "step": 9071 - }, - { - "epoch": 4.73116036505867, - "grad_norm": 1.346549153327942, - "learning_rate": 9.138693467336684e-05, - "loss": 5.9567, - "step": 9072 - }, - { - "epoch": 4.731681877444589, - "grad_norm": 1.4019945859909058, - "learning_rate": 9.138592964824121e-05, - "loss": 5.8842, - "step": 9073 - }, - { - "epoch": 4.7322033898305085, - "grad_norm": 1.4408315420150757, - "learning_rate": 9.138492462311559e-05, - "loss": 5.8469, - "step": 9074 - }, - { - "epoch": 4.732724902216428, - "grad_norm": 1.4403223991394043, - "learning_rate": 9.138391959798995e-05, - "loss": 5.9012, - "step": 9075 - }, - { - "epoch": 4.7332464146023465, - "grad_norm": 1.887406826019287, - "learning_rate": 9.138291457286433e-05, - "loss": 5.5895, - "step": 9076 - }, - { - "epoch": 4.733767926988266, - "grad_norm": 1.5437041521072388, - "learning_rate": 9.138190954773869e-05, - "loss": 5.5019, - "step": 9077 - }, - { - "epoch": 4.7342894393741854, - "grad_norm": 1.9618173837661743, - "learning_rate": 9.138090452261307e-05, - "loss": 5.4572, - "step": 9078 - }, - { - "epoch": 4.734810951760104, - "grad_norm": 1.5235689878463745, - "learning_rate": 9.137989949748745e-05, - "loss": 5.6323, - "step": 9079 - }, - { - "epoch": 4.7353324641460235, - "grad_norm": 1.4628865718841553, - "learning_rate": 9.137889447236181e-05, - "loss": 5.8498, - "step": 9080 - }, - { - "epoch": 4.735853976531943, - "grad_norm": 1.586213231086731, - "learning_rate": 9.137788944723619e-05, - "loss": 5.4729, - "step": 9081 - }, - { - "epoch": 4.7363754889178615, - "grad_norm": 1.5116747617721558, - "learning_rate": 9.137688442211055e-05, - "loss": 5.3241, - "step": 9082 - }, - { - "epoch": 4.736897001303781, - "grad_norm": 1.4192577600479126, - "learning_rate": 9.137587939698493e-05, - "loss": 5.716, - "step": 9083 - }, - { - "epoch": 4.7374185136897005, - "grad_norm": 1.6047323942184448, - "learning_rate": 9.13748743718593e-05, - "loss": 5.0733, - "step": 9084 - }, - { - "epoch": 4.737940026075619, - "grad_norm": 1.4939124584197998, - "learning_rate": 9.137386934673367e-05, - "loss": 5.2074, - "step": 9085 - }, - { - "epoch": 4.7384615384615385, - "grad_norm": 1.5527422428131104, - "learning_rate": 9.137286432160804e-05, - "loss": 5.4307, - "step": 9086 - }, - { - "epoch": 4.738983050847458, - "grad_norm": 1.457626461982727, - "learning_rate": 9.137185929648242e-05, - "loss": 5.5822, - "step": 9087 - }, - { - "epoch": 4.7395045632333765, - "grad_norm": 1.3833612203598022, - "learning_rate": 9.13708542713568e-05, - "loss": 5.5735, - "step": 9088 - }, - { - "epoch": 4.740026075619296, - "grad_norm": 1.473638653755188, - "learning_rate": 9.136984924623117e-05, - "loss": 5.451, - "step": 9089 - }, - { - "epoch": 4.7405475880052155, - "grad_norm": 1.5704928636550903, - "learning_rate": 9.136884422110554e-05, - "loss": 5.6413, - "step": 9090 - }, - { - "epoch": 4.741069100391134, - "grad_norm": 1.5840632915496826, - "learning_rate": 9.13678391959799e-05, - "loss": 5.9394, - "step": 9091 - }, - { - "epoch": 4.7415906127770535, - "grad_norm": 1.3526211977005005, - "learning_rate": 9.136683417085428e-05, - "loss": 5.6774, - "step": 9092 - }, - { - "epoch": 4.742112125162973, - "grad_norm": 1.466305136680603, - "learning_rate": 9.136582914572864e-05, - "loss": 5.4094, - "step": 9093 - }, - { - "epoch": 4.742633637548892, - "grad_norm": 1.5761760473251343, - "learning_rate": 9.136482412060302e-05, - "loss": 5.7431, - "step": 9094 - }, - { - "epoch": 4.743155149934811, - "grad_norm": 1.6857292652130127, - "learning_rate": 9.136381909547738e-05, - "loss": 5.1394, - "step": 9095 - }, - { - "epoch": 4.7436766623207305, - "grad_norm": 1.3879951238632202, - "learning_rate": 9.136281407035176e-05, - "loss": 5.8933, - "step": 9096 - }, - { - "epoch": 4.744198174706649, - "grad_norm": 1.4904520511627197, - "learning_rate": 9.136180904522613e-05, - "loss": 5.3952, - "step": 9097 - }, - { - "epoch": 4.7447196870925685, - "grad_norm": 1.4259960651397705, - "learning_rate": 9.13608040201005e-05, - "loss": 5.5144, - "step": 9098 - }, - { - "epoch": 4.745241199478488, - "grad_norm": 1.4510350227355957, - "learning_rate": 9.135979899497488e-05, - "loss": 5.2441, - "step": 9099 - }, - { - "epoch": 4.745762711864407, - "grad_norm": 1.461808443069458, - "learning_rate": 9.135879396984926e-05, - "loss": 5.5163, - "step": 9100 - }, - { - "epoch": 4.746284224250326, - "grad_norm": 1.4561563730239868, - "learning_rate": 9.135778894472362e-05, - "loss": 5.8927, - "step": 9101 - }, - { - "epoch": 4.7468057366362455, - "grad_norm": 1.5193779468536377, - "learning_rate": 9.1356783919598e-05, - "loss": 5.8097, - "step": 9102 - }, - { - "epoch": 4.747327249022164, - "grad_norm": 1.5146710872650146, - "learning_rate": 9.135577889447237e-05, - "loss": 5.5806, - "step": 9103 - }, - { - "epoch": 4.7478487614080835, - "grad_norm": 1.6041311025619507, - "learning_rate": 9.135477386934673e-05, - "loss": 5.0854, - "step": 9104 - }, - { - "epoch": 4.748370273794002, - "grad_norm": 1.437453269958496, - "learning_rate": 9.135376884422111e-05, - "loss": 5.7418, - "step": 9105 - }, - { - "epoch": 4.748891786179922, - "grad_norm": 1.6351330280303955, - "learning_rate": 9.135276381909547e-05, - "loss": 4.8252, - "step": 9106 - }, - { - "epoch": 4.749413298565841, - "grad_norm": 1.6741427183151245, - "learning_rate": 9.135175879396985e-05, - "loss": 5.8025, - "step": 9107 - }, - { - "epoch": 4.7499348109517605, - "grad_norm": 1.5103778839111328, - "learning_rate": 9.135075376884423e-05, - "loss": 5.4298, - "step": 9108 - }, - { - "epoch": 4.750456323337679, - "grad_norm": 1.3816335201263428, - "learning_rate": 9.13497487437186e-05, - "loss": 5.9762, - "step": 9109 - }, - { - "epoch": 4.7509778357235986, - "grad_norm": 1.5969825983047485, - "learning_rate": 9.134874371859297e-05, - "loss": 5.8125, - "step": 9110 - }, - { - "epoch": 4.751499348109517, - "grad_norm": 1.3938772678375244, - "learning_rate": 9.134773869346735e-05, - "loss": 5.7698, - "step": 9111 - }, - { - "epoch": 4.752020860495437, - "grad_norm": 1.5466505289077759, - "learning_rate": 9.134673366834171e-05, - "loss": 5.3483, - "step": 9112 - }, - { - "epoch": 4.752542372881356, - "grad_norm": 1.631811499595642, - "learning_rate": 9.134572864321609e-05, - "loss": 5.4662, - "step": 9113 - }, - { - "epoch": 4.7530638852672755, - "grad_norm": 1.4240127801895142, - "learning_rate": 9.134472361809045e-05, - "loss": 5.8259, - "step": 9114 - }, - { - "epoch": 4.753585397653194, - "grad_norm": 1.4549988508224487, - "learning_rate": 9.134371859296483e-05, - "loss": 5.8694, - "step": 9115 - }, - { - "epoch": 4.754106910039114, - "grad_norm": 1.4164403676986694, - "learning_rate": 9.13427135678392e-05, - "loss": 5.3762, - "step": 9116 - }, - { - "epoch": 4.754628422425032, - "grad_norm": 1.2941752672195435, - "learning_rate": 9.134170854271357e-05, - "loss": 6.1331, - "step": 9117 - }, - { - "epoch": 4.755149934810952, - "grad_norm": 1.4046121835708618, - "learning_rate": 9.134070351758794e-05, - "loss": 6.0018, - "step": 9118 - }, - { - "epoch": 4.755671447196871, - "grad_norm": 1.362645149230957, - "learning_rate": 9.133969849246232e-05, - "loss": 5.8689, - "step": 9119 - }, - { - "epoch": 4.7561929595827905, - "grad_norm": 1.3691561222076416, - "learning_rate": 9.13386934673367e-05, - "loss": 5.9532, - "step": 9120 - }, - { - "epoch": 4.756714471968709, - "grad_norm": 1.5510014295578003, - "learning_rate": 9.133768844221106e-05, - "loss": 5.2709, - "step": 9121 - }, - { - "epoch": 4.757235984354629, - "grad_norm": 1.4453661441802979, - "learning_rate": 9.133668341708544e-05, - "loss": 5.9252, - "step": 9122 - }, - { - "epoch": 4.757757496740547, - "grad_norm": 1.5274810791015625, - "learning_rate": 9.13356783919598e-05, - "loss": 5.6972, - "step": 9123 - }, - { - "epoch": 4.758279009126467, - "grad_norm": 1.6068421602249146, - "learning_rate": 9.133467336683418e-05, - "loss": 5.6297, - "step": 9124 - }, - { - "epoch": 4.758800521512386, - "grad_norm": 1.4487580060958862, - "learning_rate": 9.133366834170854e-05, - "loss": 5.8205, - "step": 9125 - }, - { - "epoch": 4.7593220338983055, - "grad_norm": 1.5535725355148315, - "learning_rate": 9.133266331658292e-05, - "loss": 5.4052, - "step": 9126 - }, - { - "epoch": 4.759843546284224, - "grad_norm": 1.5647845268249512, - "learning_rate": 9.133165829145728e-05, - "loss": 5.7759, - "step": 9127 - }, - { - "epoch": 4.760365058670144, - "grad_norm": 1.560448169708252, - "learning_rate": 9.133065326633166e-05, - "loss": 5.6621, - "step": 9128 - }, - { - "epoch": 4.760886571056062, - "grad_norm": 1.506197214126587, - "learning_rate": 9.132964824120604e-05, - "loss": 5.7506, - "step": 9129 - }, - { - "epoch": 4.761408083441982, - "grad_norm": 1.4386165142059326, - "learning_rate": 9.132864321608042e-05, - "loss": 5.5044, - "step": 9130 - }, - { - "epoch": 4.761929595827901, - "grad_norm": 1.301072359085083, - "learning_rate": 9.132763819095478e-05, - "loss": 6.0413, - "step": 9131 - }, - { - "epoch": 4.762451108213821, - "grad_norm": 1.5125846862792969, - "learning_rate": 9.132663316582915e-05, - "loss": 5.596, - "step": 9132 - }, - { - "epoch": 4.762972620599739, - "grad_norm": 1.4974647760391235, - "learning_rate": 9.132562814070352e-05, - "loss": 5.6211, - "step": 9133 - }, - { - "epoch": 4.763494132985659, - "grad_norm": 1.3536889553070068, - "learning_rate": 9.132462311557789e-05, - "loss": 5.8093, - "step": 9134 - }, - { - "epoch": 4.764015645371577, - "grad_norm": 1.540463924407959, - "learning_rate": 9.132361809045227e-05, - "loss": 5.6906, - "step": 9135 - }, - { - "epoch": 4.764537157757497, - "grad_norm": 1.395141839981079, - "learning_rate": 9.132261306532663e-05, - "loss": 5.6269, - "step": 9136 - }, - { - "epoch": 4.765058670143416, - "grad_norm": 1.4709663391113281, - "learning_rate": 9.132160804020101e-05, - "loss": 5.7312, - "step": 9137 - }, - { - "epoch": 4.765580182529335, - "grad_norm": 1.4944813251495361, - "learning_rate": 9.132060301507537e-05, - "loss": 5.2909, - "step": 9138 - }, - { - "epoch": 4.766101694915254, - "grad_norm": 1.3867850303649902, - "learning_rate": 9.131959798994975e-05, - "loss": 5.8935, - "step": 9139 - }, - { - "epoch": 4.766623207301174, - "grad_norm": 1.5259608030319214, - "learning_rate": 9.131859296482413e-05, - "loss": 5.8963, - "step": 9140 - }, - { - "epoch": 4.767144719687092, - "grad_norm": 1.3946045637130737, - "learning_rate": 9.13175879396985e-05, - "loss": 5.9652, - "step": 9141 - }, - { - "epoch": 4.767666232073012, - "grad_norm": 1.526745319366455, - "learning_rate": 9.131658291457287e-05, - "loss": 5.7687, - "step": 9142 - }, - { - "epoch": 4.768187744458931, - "grad_norm": 1.4344249963760376, - "learning_rate": 9.131557788944725e-05, - "loss": 5.5673, - "step": 9143 - }, - { - "epoch": 4.76870925684485, - "grad_norm": 1.6382249593734741, - "learning_rate": 9.131457286432161e-05, - "loss": 4.9605, - "step": 9144 - }, - { - "epoch": 4.769230769230769, - "grad_norm": 1.556731939315796, - "learning_rate": 9.131356783919598e-05, - "loss": 5.2825, - "step": 9145 - }, - { - "epoch": 4.769752281616689, - "grad_norm": 1.575476050376892, - "learning_rate": 9.131256281407036e-05, - "loss": 4.8136, - "step": 9146 - }, - { - "epoch": 4.770273794002607, - "grad_norm": 1.874936580657959, - "learning_rate": 9.131155778894472e-05, - "loss": 5.3173, - "step": 9147 - }, - { - "epoch": 4.770795306388527, - "grad_norm": 1.447564959526062, - "learning_rate": 9.13105527638191e-05, - "loss": 5.4181, - "step": 9148 - }, - { - "epoch": 4.771316818774446, - "grad_norm": 1.5838466882705688, - "learning_rate": 9.130954773869348e-05, - "loss": 5.7059, - "step": 9149 - }, - { - "epoch": 4.771838331160365, - "grad_norm": 1.4435768127441406, - "learning_rate": 9.130854271356785e-05, - "loss": 5.6171, - "step": 9150 - }, - { - "epoch": 4.772359843546284, - "grad_norm": 1.3939099311828613, - "learning_rate": 9.130753768844222e-05, - "loss": 5.7058, - "step": 9151 - }, - { - "epoch": 4.772881355932204, - "grad_norm": 1.5848551988601685, - "learning_rate": 9.13065326633166e-05, - "loss": 5.6698, - "step": 9152 - }, - { - "epoch": 4.773402868318122, - "grad_norm": 1.4105387926101685, - "learning_rate": 9.130552763819096e-05, - "loss": 5.2239, - "step": 9153 - }, - { - "epoch": 4.773924380704042, - "grad_norm": 1.502700924873352, - "learning_rate": 9.130452261306534e-05, - "loss": 5.6653, - "step": 9154 - }, - { - "epoch": 4.774445893089961, - "grad_norm": 1.4572415351867676, - "learning_rate": 9.13035175879397e-05, - "loss": 5.1227, - "step": 9155 - }, - { - "epoch": 4.77496740547588, - "grad_norm": 1.493349552154541, - "learning_rate": 9.130251256281408e-05, - "loss": 5.5554, - "step": 9156 - }, - { - "epoch": 4.775488917861799, - "grad_norm": 1.4434453248977661, - "learning_rate": 9.130150753768844e-05, - "loss": 5.5354, - "step": 9157 - }, - { - "epoch": 4.776010430247719, - "grad_norm": 1.3700617551803589, - "learning_rate": 9.130050251256281e-05, - "loss": 5.7642, - "step": 9158 - }, - { - "epoch": 4.776531942633637, - "grad_norm": 1.3702868223190308, - "learning_rate": 9.129949748743719e-05, - "loss": 5.802, - "step": 9159 - }, - { - "epoch": 4.777053455019557, - "grad_norm": 1.4306272268295288, - "learning_rate": 9.129849246231156e-05, - "loss": 5.5392, - "step": 9160 - }, - { - "epoch": 4.777574967405476, - "grad_norm": 1.3451359272003174, - "learning_rate": 9.129748743718594e-05, - "loss": 5.2147, - "step": 9161 - }, - { - "epoch": 4.778096479791395, - "grad_norm": 1.317811131477356, - "learning_rate": 9.12964824120603e-05, - "loss": 6.1418, - "step": 9162 - }, - { - "epoch": 4.778617992177314, - "grad_norm": 1.4160702228546143, - "learning_rate": 9.129547738693468e-05, - "loss": 5.3784, - "step": 9163 - }, - { - "epoch": 4.779139504563234, - "grad_norm": 1.4078781604766846, - "learning_rate": 9.129447236180905e-05, - "loss": 5.8647, - "step": 9164 - }, - { - "epoch": 4.779661016949152, - "grad_norm": 1.3747776746749878, - "learning_rate": 9.129346733668343e-05, - "loss": 5.6215, - "step": 9165 - }, - { - "epoch": 4.780182529335072, - "grad_norm": 1.5012469291687012, - "learning_rate": 9.129246231155779e-05, - "loss": 5.5002, - "step": 9166 - }, - { - "epoch": 4.780704041720991, - "grad_norm": 1.4056150913238525, - "learning_rate": 9.129145728643217e-05, - "loss": 5.6941, - "step": 9167 - }, - { - "epoch": 4.78122555410691, - "grad_norm": 1.4950807094573975, - "learning_rate": 9.129045226130653e-05, - "loss": 5.2233, - "step": 9168 - }, - { - "epoch": 4.781747066492829, - "grad_norm": 1.4580814838409424, - "learning_rate": 9.128944723618091e-05, - "loss": 5.281, - "step": 9169 - }, - { - "epoch": 4.782268578878749, - "grad_norm": 1.3682948350906372, - "learning_rate": 9.128844221105527e-05, - "loss": 6.0541, - "step": 9170 - }, - { - "epoch": 4.782790091264667, - "grad_norm": 1.407299280166626, - "learning_rate": 9.128743718592965e-05, - "loss": 5.7547, - "step": 9171 - }, - { - "epoch": 4.783311603650587, - "grad_norm": 1.4731935262680054, - "learning_rate": 9.128643216080403e-05, - "loss": 5.2749, - "step": 9172 - }, - { - "epoch": 4.783833116036506, - "grad_norm": 1.5174001455307007, - "learning_rate": 9.12854271356784e-05, - "loss": 5.5558, - "step": 9173 - }, - { - "epoch": 4.784354628422425, - "grad_norm": 1.4395952224731445, - "learning_rate": 9.128442211055277e-05, - "loss": 5.251, - "step": 9174 - }, - { - "epoch": 4.784876140808344, - "grad_norm": 1.7391411066055298, - "learning_rate": 9.128341708542714e-05, - "loss": 5.6005, - "step": 9175 - }, - { - "epoch": 4.785397653194264, - "grad_norm": 1.5495951175689697, - "learning_rate": 9.128241206030151e-05, - "loss": 5.3632, - "step": 9176 - }, - { - "epoch": 4.785919165580182, - "grad_norm": 1.6831517219543457, - "learning_rate": 9.128140703517588e-05, - "loss": 5.9957, - "step": 9177 - }, - { - "epoch": 4.786440677966102, - "grad_norm": 1.4336317777633667, - "learning_rate": 9.128040201005026e-05, - "loss": 5.7585, - "step": 9178 - }, - { - "epoch": 4.786962190352021, - "grad_norm": 1.5007797479629517, - "learning_rate": 9.127939698492462e-05, - "loss": 5.7118, - "step": 9179 - }, - { - "epoch": 4.78748370273794, - "grad_norm": 1.5858527421951294, - "learning_rate": 9.1278391959799e-05, - "loss": 5.7444, - "step": 9180 - }, - { - "epoch": 4.788005215123859, - "grad_norm": 1.4052417278289795, - "learning_rate": 9.127738693467338e-05, - "loss": 5.357, - "step": 9181 - }, - { - "epoch": 4.788526727509779, - "grad_norm": 1.5136873722076416, - "learning_rate": 9.127638190954775e-05, - "loss": 5.3587, - "step": 9182 - }, - { - "epoch": 4.789048239895697, - "grad_norm": 1.3230443000793457, - "learning_rate": 9.127537688442212e-05, - "loss": 6.1488, - "step": 9183 - }, - { - "epoch": 4.789569752281617, - "grad_norm": 1.5392048358917236, - "learning_rate": 9.127437185929648e-05, - "loss": 5.3886, - "step": 9184 - }, - { - "epoch": 4.790091264667536, - "grad_norm": 1.5167235136032104, - "learning_rate": 9.127336683417086e-05, - "loss": 5.8055, - "step": 9185 - }, - { - "epoch": 4.790612777053455, - "grad_norm": 1.4563608169555664, - "learning_rate": 9.127236180904522e-05, - "loss": 5.3622, - "step": 9186 - }, - { - "epoch": 4.791134289439374, - "grad_norm": 1.440362811088562, - "learning_rate": 9.12713567839196e-05, - "loss": 5.9062, - "step": 9187 - }, - { - "epoch": 4.791655801825294, - "grad_norm": 1.4581103324890137, - "learning_rate": 9.127035175879397e-05, - "loss": 5.3159, - "step": 9188 - }, - { - "epoch": 4.792177314211212, - "grad_norm": 1.2955031394958496, - "learning_rate": 9.126934673366834e-05, - "loss": 6.0017, - "step": 9189 - }, - { - "epoch": 4.792698826597132, - "grad_norm": 1.4146647453308105, - "learning_rate": 9.126834170854271e-05, - "loss": 5.8795, - "step": 9190 - }, - { - "epoch": 4.793220338983051, - "grad_norm": 1.3717771768569946, - "learning_rate": 9.126733668341709e-05, - "loss": 5.7957, - "step": 9191 - }, - { - "epoch": 4.79374185136897, - "grad_norm": 1.622470498085022, - "learning_rate": 9.126633165829146e-05, - "loss": 5.2551, - "step": 9192 - }, - { - "epoch": 4.794263363754889, - "grad_norm": 1.5048526525497437, - "learning_rate": 9.126532663316584e-05, - "loss": 5.5866, - "step": 9193 - }, - { - "epoch": 4.794784876140809, - "grad_norm": 1.4802837371826172, - "learning_rate": 9.12643216080402e-05, - "loss": 5.693, - "step": 9194 - }, - { - "epoch": 4.795306388526727, - "grad_norm": 1.7912784814834595, - "learning_rate": 9.126331658291458e-05, - "loss": 5.358, - "step": 9195 - }, - { - "epoch": 4.795827900912647, - "grad_norm": 1.4090813398361206, - "learning_rate": 9.126231155778895e-05, - "loss": 5.7942, - "step": 9196 - }, - { - "epoch": 4.796349413298566, - "grad_norm": 1.382284164428711, - "learning_rate": 9.126130653266331e-05, - "loss": 5.7545, - "step": 9197 - }, - { - "epoch": 4.796870925684485, - "grad_norm": 1.5322506427764893, - "learning_rate": 9.126030150753769e-05, - "loss": 5.8971, - "step": 9198 - }, - { - "epoch": 4.797392438070404, - "grad_norm": 1.4131627082824707, - "learning_rate": 9.125929648241205e-05, - "loss": 5.6542, - "step": 9199 - }, - { - "epoch": 4.797913950456323, - "grad_norm": 1.5863016843795776, - "learning_rate": 9.125829145728643e-05, - "loss": 5.6196, - "step": 9200 - }, - { - "epoch": 4.798435462842242, - "grad_norm": 1.4931647777557373, - "learning_rate": 9.125728643216081e-05, - "loss": 6.0294, - "step": 9201 - }, - { - "epoch": 4.798956975228162, - "grad_norm": 1.4778358936309814, - "learning_rate": 9.125628140703519e-05, - "loss": 5.7163, - "step": 9202 - }, - { - "epoch": 4.799478487614081, - "grad_norm": 1.4178093671798706, - "learning_rate": 9.125527638190955e-05, - "loss": 5.3801, - "step": 9203 - }, - { - "epoch": 4.8, - "grad_norm": 1.8072727918624878, - "learning_rate": 9.125427135678393e-05, - "loss": 5.4713, - "step": 9204 - }, - { - "epoch": 4.800521512385919, - "grad_norm": 1.5860620737075806, - "learning_rate": 9.12532663316583e-05, - "loss": 5.6139, - "step": 9205 - }, - { - "epoch": 4.801043024771838, - "grad_norm": 1.501033067703247, - "learning_rate": 9.125226130653267e-05, - "loss": 4.7683, - "step": 9206 - }, - { - "epoch": 4.801564537157757, - "grad_norm": 1.4287185668945312, - "learning_rate": 9.125125628140704e-05, - "loss": 5.8138, - "step": 9207 - }, - { - "epoch": 4.802086049543677, - "grad_norm": 1.7251499891281128, - "learning_rate": 9.125025125628141e-05, - "loss": 5.6245, - "step": 9208 - }, - { - "epoch": 4.802607561929596, - "grad_norm": 1.5832972526550293, - "learning_rate": 9.124924623115578e-05, - "loss": 5.4532, - "step": 9209 - }, - { - "epoch": 4.803129074315515, - "grad_norm": 1.5163676738739014, - "learning_rate": 9.124824120603016e-05, - "loss": 5.6448, - "step": 9210 - }, - { - "epoch": 4.803650586701434, - "grad_norm": 1.4867360591888428, - "learning_rate": 9.124723618090452e-05, - "loss": 5.5087, - "step": 9211 - }, - { - "epoch": 4.804172099087353, - "grad_norm": 1.4080156087875366, - "learning_rate": 9.12462311557789e-05, - "loss": 6.0164, - "step": 9212 - }, - { - "epoch": 4.804693611473272, - "grad_norm": 1.5526832342147827, - "learning_rate": 9.124522613065328e-05, - "loss": 5.2943, - "step": 9213 - }, - { - "epoch": 4.805215123859192, - "grad_norm": 1.5148519277572632, - "learning_rate": 9.124422110552764e-05, - "loss": 5.6564, - "step": 9214 - }, - { - "epoch": 4.805736636245111, - "grad_norm": 1.4474546909332275, - "learning_rate": 9.124321608040202e-05, - "loss": 5.8948, - "step": 9215 - }, - { - "epoch": 4.80625814863103, - "grad_norm": 1.3023163080215454, - "learning_rate": 9.124221105527638e-05, - "loss": 5.7305, - "step": 9216 - }, - { - "epoch": 4.806779661016949, - "grad_norm": 1.5272940397262573, - "learning_rate": 9.124120603015076e-05, - "loss": 5.7529, - "step": 9217 - }, - { - "epoch": 4.807301173402868, - "grad_norm": 1.4714819192886353, - "learning_rate": 9.124020100502513e-05, - "loss": 5.346, - "step": 9218 - }, - { - "epoch": 4.807822685788787, - "grad_norm": 1.4704169034957886, - "learning_rate": 9.12391959798995e-05, - "loss": 5.4741, - "step": 9219 - }, - { - "epoch": 4.808344198174707, - "grad_norm": 1.4541088342666626, - "learning_rate": 9.123819095477387e-05, - "loss": 5.8416, - "step": 9220 - }, - { - "epoch": 4.808865710560626, - "grad_norm": 1.5072027444839478, - "learning_rate": 9.123718592964824e-05, - "loss": 5.5403, - "step": 9221 - }, - { - "epoch": 4.809387222946545, - "grad_norm": 1.4763387441635132, - "learning_rate": 9.123618090452262e-05, - "loss": 5.4736, - "step": 9222 - }, - { - "epoch": 4.809908735332464, - "grad_norm": 1.4348726272583008, - "learning_rate": 9.1235175879397e-05, - "loss": 5.9259, - "step": 9223 - }, - { - "epoch": 4.810430247718383, - "grad_norm": 1.6023937463760376, - "learning_rate": 9.123417085427136e-05, - "loss": 4.8505, - "step": 9224 - }, - { - "epoch": 4.810951760104302, - "grad_norm": 1.8095829486846924, - "learning_rate": 9.123316582914573e-05, - "loss": 5.9335, - "step": 9225 - }, - { - "epoch": 4.811473272490222, - "grad_norm": 1.3951047658920288, - "learning_rate": 9.123216080402011e-05, - "loss": 5.8536, - "step": 9226 - }, - { - "epoch": 4.811994784876141, - "grad_norm": 1.4968910217285156, - "learning_rate": 9.123115577889447e-05, - "loss": 5.4357, - "step": 9227 - }, - { - "epoch": 4.81251629726206, - "grad_norm": 1.387775182723999, - "learning_rate": 9.123015075376885e-05, - "loss": 5.4818, - "step": 9228 - }, - { - "epoch": 4.813037809647979, - "grad_norm": 1.4894225597381592, - "learning_rate": 9.122914572864321e-05, - "loss": 5.8268, - "step": 9229 - }, - { - "epoch": 4.813559322033898, - "grad_norm": 1.4992468357086182, - "learning_rate": 9.122814070351759e-05, - "loss": 5.8371, - "step": 9230 - }, - { - "epoch": 4.814080834419817, - "grad_norm": 1.5869593620300293, - "learning_rate": 9.122713567839196e-05, - "loss": 4.9458, - "step": 9231 - }, - { - "epoch": 4.814602346805737, - "grad_norm": 1.4982163906097412, - "learning_rate": 9.122613065326633e-05, - "loss": 5.723, - "step": 9232 - }, - { - "epoch": 4.815123859191655, - "grad_norm": 1.4194111824035645, - "learning_rate": 9.122512562814071e-05, - "loss": 5.6377, - "step": 9233 - }, - { - "epoch": 4.815645371577575, - "grad_norm": 1.5596648454666138, - "learning_rate": 9.122412060301509e-05, - "loss": 5.6573, - "step": 9234 - }, - { - "epoch": 4.816166883963494, - "grad_norm": 1.322155475616455, - "learning_rate": 9.122311557788945e-05, - "loss": 5.9455, - "step": 9235 - }, - { - "epoch": 4.816688396349413, - "grad_norm": 1.3279646635055542, - "learning_rate": 9.122211055276383e-05, - "loss": 6.0966, - "step": 9236 - }, - { - "epoch": 4.817209908735332, - "grad_norm": 1.362428069114685, - "learning_rate": 9.12211055276382e-05, - "loss": 5.7814, - "step": 9237 - }, - { - "epoch": 4.817731421121252, - "grad_norm": 1.569911003112793, - "learning_rate": 9.122010050251256e-05, - "loss": 5.4444, - "step": 9238 - }, - { - "epoch": 4.81825293350717, - "grad_norm": 1.5388773679733276, - "learning_rate": 9.121909547738694e-05, - "loss": 5.5494, - "step": 9239 - }, - { - "epoch": 4.81877444589309, - "grad_norm": 1.5827172994613647, - "learning_rate": 9.12180904522613e-05, - "loss": 5.864, - "step": 9240 - }, - { - "epoch": 4.819295958279009, - "grad_norm": 1.4343013763427734, - "learning_rate": 9.121708542713568e-05, - "loss": 5.7891, - "step": 9241 - }, - { - "epoch": 4.819817470664928, - "grad_norm": 1.4862569570541382, - "learning_rate": 9.121608040201006e-05, - "loss": 5.3462, - "step": 9242 - }, - { - "epoch": 4.820338983050847, - "grad_norm": 1.3925949335098267, - "learning_rate": 9.121507537688444e-05, - "loss": 6.0967, - "step": 9243 - }, - { - "epoch": 4.820860495436767, - "grad_norm": 1.4368908405303955, - "learning_rate": 9.12140703517588e-05, - "loss": 5.8037, - "step": 9244 - }, - { - "epoch": 4.8213820078226854, - "grad_norm": 1.5739686489105225, - "learning_rate": 9.121306532663318e-05, - "loss": 5.8798, - "step": 9245 - }, - { - "epoch": 4.821903520208605, - "grad_norm": 1.6256200075149536, - "learning_rate": 9.121206030150754e-05, - "loss": 5.1229, - "step": 9246 - }, - { - "epoch": 4.822425032594524, - "grad_norm": 1.5344382524490356, - "learning_rate": 9.121105527638192e-05, - "loss": 5.7012, - "step": 9247 - }, - { - "epoch": 4.822946544980443, - "grad_norm": 1.5843276977539062, - "learning_rate": 9.121005025125628e-05, - "loss": 5.7181, - "step": 9248 - }, - { - "epoch": 4.823468057366362, - "grad_norm": 1.548142910003662, - "learning_rate": 9.120904522613066e-05, - "loss": 5.4549, - "step": 9249 - }, - { - "epoch": 4.823989569752282, - "grad_norm": 1.7478749752044678, - "learning_rate": 9.120804020100503e-05, - "loss": 5.6083, - "step": 9250 - }, - { - "epoch": 4.8245110821382005, - "grad_norm": 1.361341118812561, - "learning_rate": 9.120703517587939e-05, - "loss": 5.7902, - "step": 9251 - }, - { - "epoch": 4.82503259452412, - "grad_norm": 1.5743964910507202, - "learning_rate": 9.120603015075377e-05, - "loss": 5.2731, - "step": 9252 - }, - { - "epoch": 4.825554106910039, - "grad_norm": 1.4183433055877686, - "learning_rate": 9.120502512562815e-05, - "loss": 5.6402, - "step": 9253 - }, - { - "epoch": 4.826075619295958, - "grad_norm": 1.4950519800186157, - "learning_rate": 9.120402010050252e-05, - "loss": 5.582, - "step": 9254 - }, - { - "epoch": 4.826597131681877, - "grad_norm": 1.4537906646728516, - "learning_rate": 9.120301507537689e-05, - "loss": 5.5134, - "step": 9255 - }, - { - "epoch": 4.827118644067797, - "grad_norm": 1.5480533838272095, - "learning_rate": 9.120201005025127e-05, - "loss": 5.4403, - "step": 9256 - }, - { - "epoch": 4.8276401564537155, - "grad_norm": 1.5023926496505737, - "learning_rate": 9.120100502512563e-05, - "loss": 5.7625, - "step": 9257 - }, - { - "epoch": 4.828161668839635, - "grad_norm": 1.6865278482437134, - "learning_rate": 9.120000000000001e-05, - "loss": 5.5276, - "step": 9258 - }, - { - "epoch": 4.828683181225554, - "grad_norm": 1.6022428274154663, - "learning_rate": 9.119899497487437e-05, - "loss": 5.5368, - "step": 9259 - }, - { - "epoch": 4.829204693611473, - "grad_norm": 1.91963529586792, - "learning_rate": 9.119798994974875e-05, - "loss": 5.4684, - "step": 9260 - }, - { - "epoch": 4.829726205997392, - "grad_norm": 1.5416866540908813, - "learning_rate": 9.119698492462311e-05, - "loss": 5.3312, - "step": 9261 - }, - { - "epoch": 4.830247718383312, - "grad_norm": 1.6617872714996338, - "learning_rate": 9.119597989949749e-05, - "loss": 5.896, - "step": 9262 - }, - { - "epoch": 4.8307692307692305, - "grad_norm": 1.4679453372955322, - "learning_rate": 9.119497487437187e-05, - "loss": 5.7744, - "step": 9263 - }, - { - "epoch": 4.83129074315515, - "grad_norm": 1.3962866067886353, - "learning_rate": 9.119396984924623e-05, - "loss": 5.3435, - "step": 9264 - }, - { - "epoch": 4.831812255541069, - "grad_norm": 1.4566397666931152, - "learning_rate": 9.119296482412061e-05, - "loss": 5.7771, - "step": 9265 - }, - { - "epoch": 4.832333767926988, - "grad_norm": 1.4833298921585083, - "learning_rate": 9.119195979899498e-05, - "loss": 5.5102, - "step": 9266 - }, - { - "epoch": 4.8328552803129075, - "grad_norm": 1.5184600353240967, - "learning_rate": 9.119095477386935e-05, - "loss": 5.7137, - "step": 9267 - }, - { - "epoch": 4.833376792698827, - "grad_norm": 1.5187389850616455, - "learning_rate": 9.118994974874372e-05, - "loss": 5.3414, - "step": 9268 - }, - { - "epoch": 4.8338983050847455, - "grad_norm": 1.3666802644729614, - "learning_rate": 9.11889447236181e-05, - "loss": 5.8923, - "step": 9269 - }, - { - "epoch": 4.834419817470665, - "grad_norm": 1.3573850393295288, - "learning_rate": 9.118793969849246e-05, - "loss": 5.6331, - "step": 9270 - }, - { - "epoch": 4.834941329856584, - "grad_norm": 1.4911201000213623, - "learning_rate": 9.118693467336684e-05, - "loss": 5.5722, - "step": 9271 - }, - { - "epoch": 4.835462842242503, - "grad_norm": 1.3657877445220947, - "learning_rate": 9.11859296482412e-05, - "loss": 5.4478, - "step": 9272 - }, - { - "epoch": 4.8359843546284225, - "grad_norm": 1.3521032333374023, - "learning_rate": 9.118492462311558e-05, - "loss": 5.6861, - "step": 9273 - }, - { - "epoch": 4.836505867014342, - "grad_norm": 1.304948329925537, - "learning_rate": 9.118391959798996e-05, - "loss": 5.6432, - "step": 9274 - }, - { - "epoch": 4.8370273794002605, - "grad_norm": 1.4321249723434448, - "learning_rate": 9.118291457286434e-05, - "loss": 5.6818, - "step": 9275 - }, - { - "epoch": 4.83754889178618, - "grad_norm": 1.5321964025497437, - "learning_rate": 9.11819095477387e-05, - "loss": 5.8486, - "step": 9276 - }, - { - "epoch": 4.838070404172099, - "grad_norm": 1.4128707647323608, - "learning_rate": 9.118090452261306e-05, - "loss": 5.5959, - "step": 9277 - }, - { - "epoch": 4.838591916558018, - "grad_norm": 2.048426866531372, - "learning_rate": 9.117989949748744e-05, - "loss": 5.5242, - "step": 9278 - }, - { - "epoch": 4.8391134289439375, - "grad_norm": 1.550309658050537, - "learning_rate": 9.11788944723618e-05, - "loss": 5.7066, - "step": 9279 - }, - { - "epoch": 4.839634941329857, - "grad_norm": 1.4226003885269165, - "learning_rate": 9.117788944723618e-05, - "loss": 5.7614, - "step": 9280 - }, - { - "epoch": 4.8401564537157755, - "grad_norm": 1.741803765296936, - "learning_rate": 9.117688442211055e-05, - "loss": 5.2972, - "step": 9281 - }, - { - "epoch": 4.840677966101695, - "grad_norm": 1.4007216691970825, - "learning_rate": 9.117587939698493e-05, - "loss": 5.9674, - "step": 9282 - }, - { - "epoch": 4.8411994784876144, - "grad_norm": 1.3628923892974854, - "learning_rate": 9.11748743718593e-05, - "loss": 6.1243, - "step": 9283 - }, - { - "epoch": 4.841720990873533, - "grad_norm": 1.3534587621688843, - "learning_rate": 9.117386934673368e-05, - "loss": 5.9117, - "step": 9284 - }, - { - "epoch": 4.8422425032594525, - "grad_norm": 1.483657717704773, - "learning_rate": 9.117286432160805e-05, - "loss": 5.7305, - "step": 9285 - }, - { - "epoch": 4.842764015645372, - "grad_norm": 1.5553796291351318, - "learning_rate": 9.117185929648242e-05, - "loss": 5.4712, - "step": 9286 - }, - { - "epoch": 4.8432855280312905, - "grad_norm": 1.3317629098892212, - "learning_rate": 9.117085427135679e-05, - "loss": 5.9922, - "step": 9287 - }, - { - "epoch": 4.84380704041721, - "grad_norm": 1.3248112201690674, - "learning_rate": 9.116984924623117e-05, - "loss": 6.1447, - "step": 9288 - }, - { - "epoch": 4.8443285528031295, - "grad_norm": 1.4562768936157227, - "learning_rate": 9.116884422110553e-05, - "loss": 5.5182, - "step": 9289 - }, - { - "epoch": 4.844850065189048, - "grad_norm": 1.4521560668945312, - "learning_rate": 9.116783919597991e-05, - "loss": 5.3681, - "step": 9290 - }, - { - "epoch": 4.8453715775749675, - "grad_norm": 1.3736523389816284, - "learning_rate": 9.116683417085427e-05, - "loss": 5.6564, - "step": 9291 - }, - { - "epoch": 4.845893089960887, - "grad_norm": 1.459989070892334, - "learning_rate": 9.116582914572864e-05, - "loss": 5.9534, - "step": 9292 - }, - { - "epoch": 4.8464146023468055, - "grad_norm": 1.6139153242111206, - "learning_rate": 9.116482412060301e-05, - "loss": 5.7068, - "step": 9293 - }, - { - "epoch": 4.846936114732725, - "grad_norm": 1.4541608095169067, - "learning_rate": 9.116381909547739e-05, - "loss": 5.6516, - "step": 9294 - }, - { - "epoch": 4.847457627118644, - "grad_norm": 1.2638553380966187, - "learning_rate": 9.116281407035177e-05, - "loss": 6.0052, - "step": 9295 - }, - { - "epoch": 4.847979139504563, - "grad_norm": 1.4266360998153687, - "learning_rate": 9.116180904522613e-05, - "loss": 5.789, - "step": 9296 - }, - { - "epoch": 4.8485006518904825, - "grad_norm": 1.350271224975586, - "learning_rate": 9.116080402010051e-05, - "loss": 5.9609, - "step": 9297 - }, - { - "epoch": 4.849022164276402, - "grad_norm": 1.438230276107788, - "learning_rate": 9.115979899497488e-05, - "loss": 5.6213, - "step": 9298 - }, - { - "epoch": 4.849543676662321, - "grad_norm": 1.4094445705413818, - "learning_rate": 9.115879396984925e-05, - "loss": 5.4107, - "step": 9299 - }, - { - "epoch": 4.85006518904824, - "grad_norm": 1.4485507011413574, - "learning_rate": 9.115778894472362e-05, - "loss": 5.5516, - "step": 9300 - }, - { - "epoch": 4.850586701434159, - "grad_norm": 1.4261964559555054, - "learning_rate": 9.1156783919598e-05, - "loss": 6.0503, - "step": 9301 - }, - { - "epoch": 4.851108213820078, - "grad_norm": 1.504470944404602, - "learning_rate": 9.115577889447236e-05, - "loss": 5.4479, - "step": 9302 - }, - { - "epoch": 4.8516297262059975, - "grad_norm": 1.4866969585418701, - "learning_rate": 9.115477386934674e-05, - "loss": 5.2855, - "step": 9303 - }, - { - "epoch": 4.852151238591917, - "grad_norm": 1.4826127290725708, - "learning_rate": 9.115376884422112e-05, - "loss": 5.4432, - "step": 9304 - }, - { - "epoch": 4.852672750977836, - "grad_norm": 1.52995765209198, - "learning_rate": 9.115276381909548e-05, - "loss": 5.7464, - "step": 9305 - }, - { - "epoch": 4.853194263363755, - "grad_norm": 1.4683867692947388, - "learning_rate": 9.115175879396986e-05, - "loss": 5.8766, - "step": 9306 - }, - { - "epoch": 4.853715775749674, - "grad_norm": 1.3300960063934326, - "learning_rate": 9.115075376884422e-05, - "loss": 5.5447, - "step": 9307 - }, - { - "epoch": 4.854237288135593, - "grad_norm": 1.4404042959213257, - "learning_rate": 9.11497487437186e-05, - "loss": 5.1195, - "step": 9308 - }, - { - "epoch": 4.8547588005215125, - "grad_norm": 1.4807119369506836, - "learning_rate": 9.114874371859297e-05, - "loss": 5.5349, - "step": 9309 - }, - { - "epoch": 4.855280312907432, - "grad_norm": 1.3432385921478271, - "learning_rate": 9.114773869346734e-05, - "loss": 5.6125, - "step": 9310 - }, - { - "epoch": 4.855801825293351, - "grad_norm": 1.5868613719940186, - "learning_rate": 9.114673366834171e-05, - "loss": 5.7365, - "step": 9311 - }, - { - "epoch": 4.85632333767927, - "grad_norm": 1.4125149250030518, - "learning_rate": 9.114572864321609e-05, - "loss": 6.0113, - "step": 9312 - }, - { - "epoch": 4.856844850065189, - "grad_norm": 1.4138057231903076, - "learning_rate": 9.114472361809045e-05, - "loss": 5.6419, - "step": 9313 - }, - { - "epoch": 4.857366362451108, - "grad_norm": 1.447916865348816, - "learning_rate": 9.114371859296483e-05, - "loss": 5.9331, - "step": 9314 - }, - { - "epoch": 4.8578878748370276, - "grad_norm": 1.5571261644363403, - "learning_rate": 9.11427135678392e-05, - "loss": 5.2726, - "step": 9315 - }, - { - "epoch": 4.858409387222947, - "grad_norm": 1.4460302591323853, - "learning_rate": 9.114170854271358e-05, - "loss": 5.947, - "step": 9316 - }, - { - "epoch": 4.858930899608866, - "grad_norm": 1.3093674182891846, - "learning_rate": 9.114070351758795e-05, - "loss": 6.1393, - "step": 9317 - }, - { - "epoch": 4.859452411994785, - "grad_norm": 1.6715153455734253, - "learning_rate": 9.113969849246231e-05, - "loss": 5.3495, - "step": 9318 - }, - { - "epoch": 4.859973924380704, - "grad_norm": 1.5444271564483643, - "learning_rate": 9.113869346733669e-05, - "loss": 5.5903, - "step": 9319 - }, - { - "epoch": 4.860495436766623, - "grad_norm": 1.4306477308273315, - "learning_rate": 9.113768844221105e-05, - "loss": 5.575, - "step": 9320 - }, - { - "epoch": 4.861016949152543, - "grad_norm": 1.4778169393539429, - "learning_rate": 9.113668341708543e-05, - "loss": 5.5836, - "step": 9321 - }, - { - "epoch": 4.861538461538462, - "grad_norm": 1.461381435394287, - "learning_rate": 9.11356783919598e-05, - "loss": 5.5211, - "step": 9322 - }, - { - "epoch": 4.862059973924381, - "grad_norm": 1.4051614999771118, - "learning_rate": 9.113467336683417e-05, - "loss": 5.655, - "step": 9323 - }, - { - "epoch": 4.8625814863103, - "grad_norm": 1.4780243635177612, - "learning_rate": 9.113366834170855e-05, - "loss": 5.7677, - "step": 9324 - }, - { - "epoch": 4.863102998696219, - "grad_norm": 1.3828657865524292, - "learning_rate": 9.113266331658293e-05, - "loss": 5.7591, - "step": 9325 - }, - { - "epoch": 4.863624511082138, - "grad_norm": 1.5801982879638672, - "learning_rate": 9.11316582914573e-05, - "loss": 5.6065, - "step": 9326 - }, - { - "epoch": 4.864146023468058, - "grad_norm": 1.4674339294433594, - "learning_rate": 9.113065326633167e-05, - "loss": 5.8943, - "step": 9327 - }, - { - "epoch": 4.864667535853976, - "grad_norm": 1.3581868410110474, - "learning_rate": 9.112964824120604e-05, - "loss": 5.888, - "step": 9328 - }, - { - "epoch": 4.865189048239896, - "grad_norm": 1.3303418159484863, - "learning_rate": 9.112864321608041e-05, - "loss": 5.863, - "step": 9329 - }, - { - "epoch": 4.865710560625815, - "grad_norm": 1.5292084217071533, - "learning_rate": 9.112763819095478e-05, - "loss": 5.8049, - "step": 9330 - }, - { - "epoch": 4.866232073011734, - "grad_norm": 1.300917387008667, - "learning_rate": 9.112663316582914e-05, - "loss": 6.0319, - "step": 9331 - }, - { - "epoch": 4.866753585397653, - "grad_norm": 1.3341732025146484, - "learning_rate": 9.112562814070352e-05, - "loss": 6.0431, - "step": 9332 - }, - { - "epoch": 4.867275097783573, - "grad_norm": 1.3768621683120728, - "learning_rate": 9.112462311557788e-05, - "loss": 5.5139, - "step": 9333 - }, - { - "epoch": 4.867796610169491, - "grad_norm": 1.4998787641525269, - "learning_rate": 9.112361809045226e-05, - "loss": 6.0973, - "step": 9334 - }, - { - "epoch": 4.868318122555411, - "grad_norm": 1.4189060926437378, - "learning_rate": 9.112261306532664e-05, - "loss": 4.8085, - "step": 9335 - }, - { - "epoch": 4.86883963494133, - "grad_norm": 1.3150265216827393, - "learning_rate": 9.112160804020102e-05, - "loss": 5.7908, - "step": 9336 - }, - { - "epoch": 4.869361147327249, - "grad_norm": 1.3877546787261963, - "learning_rate": 9.112060301507538e-05, - "loss": 5.6236, - "step": 9337 - }, - { - "epoch": 4.869882659713168, - "grad_norm": 1.3660801649093628, - "learning_rate": 9.111959798994976e-05, - "loss": 5.6533, - "step": 9338 - }, - { - "epoch": 4.870404172099088, - "grad_norm": 1.298844337463379, - "learning_rate": 9.111859296482412e-05, - "loss": 5.686, - "step": 9339 - }, - { - "epoch": 4.870925684485006, - "grad_norm": 1.3915958404541016, - "learning_rate": 9.11175879396985e-05, - "loss": 5.6653, - "step": 9340 - }, - { - "epoch": 4.871447196870926, - "grad_norm": 1.4473007917404175, - "learning_rate": 9.111658291457287e-05, - "loss": 5.9343, - "step": 9341 - }, - { - "epoch": 4.871968709256845, - "grad_norm": 1.57075834274292, - "learning_rate": 9.111557788944724e-05, - "loss": 5.1206, - "step": 9342 - }, - { - "epoch": 4.872490221642764, - "grad_norm": 1.387607216835022, - "learning_rate": 9.111457286432161e-05, - "loss": 5.8418, - "step": 9343 - }, - { - "epoch": 4.873011734028683, - "grad_norm": 1.4739086627960205, - "learning_rate": 9.111356783919597e-05, - "loss": 5.7814, - "step": 9344 - }, - { - "epoch": 4.873533246414603, - "grad_norm": 1.4562593698501587, - "learning_rate": 9.111256281407035e-05, - "loss": 5.3762, - "step": 9345 - }, - { - "epoch": 4.874054758800521, - "grad_norm": 1.6553137302398682, - "learning_rate": 9.111155778894473e-05, - "loss": 4.976, - "step": 9346 - }, - { - "epoch": 4.874576271186441, - "grad_norm": 1.7254507541656494, - "learning_rate": 9.11105527638191e-05, - "loss": 5.4907, - "step": 9347 - }, - { - "epoch": 4.87509778357236, - "grad_norm": 1.476669192314148, - "learning_rate": 9.110954773869347e-05, - "loss": 5.6826, - "step": 9348 - }, - { - "epoch": 4.875619295958279, - "grad_norm": 1.497087001800537, - "learning_rate": 9.110854271356785e-05, - "loss": 5.2181, - "step": 9349 - }, - { - "epoch": 4.876140808344198, - "grad_norm": 1.39057457447052, - "learning_rate": 9.110753768844221e-05, - "loss": 5.6455, - "step": 9350 - }, - { - "epoch": 4.876662320730118, - "grad_norm": 1.5611858367919922, - "learning_rate": 9.110653266331659e-05, - "loss": 5.521, - "step": 9351 - }, - { - "epoch": 4.877183833116036, - "grad_norm": 1.4806443452835083, - "learning_rate": 9.110552763819095e-05, - "loss": 5.9352, - "step": 9352 - }, - { - "epoch": 4.877705345501956, - "grad_norm": 1.568584680557251, - "learning_rate": 9.110452261306533e-05, - "loss": 5.4091, - "step": 9353 - }, - { - "epoch": 4.878226857887875, - "grad_norm": 1.4509494304656982, - "learning_rate": 9.11035175879397e-05, - "loss": 6.0158, - "step": 9354 - }, - { - "epoch": 4.878748370273794, - "grad_norm": 1.5202879905700684, - "learning_rate": 9.110251256281407e-05, - "loss": 5.7209, - "step": 9355 - }, - { - "epoch": 4.879269882659713, - "grad_norm": 1.5131406784057617, - "learning_rate": 9.110150753768845e-05, - "loss": 5.6023, - "step": 9356 - }, - { - "epoch": 4.879791395045633, - "grad_norm": 1.4570741653442383, - "learning_rate": 9.110050251256282e-05, - "loss": 5.7224, - "step": 9357 - }, - { - "epoch": 4.880312907431551, - "grad_norm": 1.499408483505249, - "learning_rate": 9.10994974874372e-05, - "loss": 5.8401, - "step": 9358 - }, - { - "epoch": 4.880834419817471, - "grad_norm": 1.520756483078003, - "learning_rate": 9.109849246231156e-05, - "loss": 5.5546, - "step": 9359 - }, - { - "epoch": 4.88135593220339, - "grad_norm": 1.494465947151184, - "learning_rate": 9.109748743718594e-05, - "loss": 5.7074, - "step": 9360 - }, - { - "epoch": 4.881877444589309, - "grad_norm": 1.5333304405212402, - "learning_rate": 9.10964824120603e-05, - "loss": 4.9261, - "step": 9361 - }, - { - "epoch": 4.882398956975228, - "grad_norm": 1.4473456144332886, - "learning_rate": 9.109547738693468e-05, - "loss": 5.7631, - "step": 9362 - }, - { - "epoch": 4.882920469361148, - "grad_norm": 1.4831773042678833, - "learning_rate": 9.109447236180904e-05, - "loss": 5.7929, - "step": 9363 - }, - { - "epoch": 4.883441981747066, - "grad_norm": 1.5186314582824707, - "learning_rate": 9.109346733668342e-05, - "loss": 5.4476, - "step": 9364 - }, - { - "epoch": 4.883963494132986, - "grad_norm": 1.380068063735962, - "learning_rate": 9.109246231155778e-05, - "loss": 5.8617, - "step": 9365 - }, - { - "epoch": 4.884485006518905, - "grad_norm": 1.4257497787475586, - "learning_rate": 9.109145728643216e-05, - "loss": 5.7656, - "step": 9366 - }, - { - "epoch": 4.885006518904824, - "grad_norm": 1.354154348373413, - "learning_rate": 9.109045226130654e-05, - "loss": 6.0908, - "step": 9367 - }, - { - "epoch": 4.885528031290743, - "grad_norm": 1.3278284072875977, - "learning_rate": 9.108944723618092e-05, - "loss": 5.8533, - "step": 9368 - }, - { - "epoch": 4.886049543676663, - "grad_norm": 1.4890047311782837, - "learning_rate": 9.108844221105528e-05, - "loss": 5.4997, - "step": 9369 - }, - { - "epoch": 4.886571056062581, - "grad_norm": 1.5654428005218506, - "learning_rate": 9.108743718592965e-05, - "loss": 5.7629, - "step": 9370 - }, - { - "epoch": 4.887092568448501, - "grad_norm": 1.6418689489364624, - "learning_rate": 9.108643216080402e-05, - "loss": 5.1014, - "step": 9371 - }, - { - "epoch": 4.88761408083442, - "grad_norm": 1.7225359678268433, - "learning_rate": 9.108542713567839e-05, - "loss": 5.3718, - "step": 9372 - }, - { - "epoch": 4.888135593220339, - "grad_norm": 1.4604270458221436, - "learning_rate": 9.108442211055277e-05, - "loss": 5.8459, - "step": 9373 - }, - { - "epoch": 4.888657105606258, - "grad_norm": 1.4135218858718872, - "learning_rate": 9.108341708542713e-05, - "loss": 5.4729, - "step": 9374 - }, - { - "epoch": 4.889178617992178, - "grad_norm": 1.49873948097229, - "learning_rate": 9.108241206030151e-05, - "loss": 5.4259, - "step": 9375 - }, - { - "epoch": 4.889700130378096, - "grad_norm": 1.4721213579177856, - "learning_rate": 9.108140703517589e-05, - "loss": 5.2406, - "step": 9376 - }, - { - "epoch": 4.890221642764016, - "grad_norm": 1.4997867345809937, - "learning_rate": 9.108040201005026e-05, - "loss": 5.791, - "step": 9377 - }, - { - "epoch": 4.890743155149935, - "grad_norm": 1.54023277759552, - "learning_rate": 9.107939698492463e-05, - "loss": 5.5352, - "step": 9378 - }, - { - "epoch": 4.891264667535854, - "grad_norm": 1.4271931648254395, - "learning_rate": 9.1078391959799e-05, - "loss": 5.9129, - "step": 9379 - }, - { - "epoch": 4.891786179921773, - "grad_norm": 1.5283840894699097, - "learning_rate": 9.107738693467337e-05, - "loss": 5.6623, - "step": 9380 - }, - { - "epoch": 4.892307692307693, - "grad_norm": 1.4088325500488281, - "learning_rate": 9.107638190954775e-05, - "loss": 5.7835, - "step": 9381 - }, - { - "epoch": 4.892829204693611, - "grad_norm": 1.3421629667282104, - "learning_rate": 9.107537688442211e-05, - "loss": 5.8212, - "step": 9382 - }, - { - "epoch": 4.893350717079531, - "grad_norm": 1.4153391122817993, - "learning_rate": 9.107437185929649e-05, - "loss": 5.8393, - "step": 9383 - }, - { - "epoch": 4.893872229465449, - "grad_norm": 1.699793815612793, - "learning_rate": 9.107336683417086e-05, - "loss": 5.1491, - "step": 9384 - }, - { - "epoch": 4.894393741851369, - "grad_norm": 1.4392613172531128, - "learning_rate": 9.107236180904522e-05, - "loss": 5.8171, - "step": 9385 - }, - { - "epoch": 4.894915254237288, - "grad_norm": 1.4814019203186035, - "learning_rate": 9.10713567839196e-05, - "loss": 5.8058, - "step": 9386 - }, - { - "epoch": 4.895436766623208, - "grad_norm": 1.554839849472046, - "learning_rate": 9.107035175879398e-05, - "loss": 5.5459, - "step": 9387 - }, - { - "epoch": 4.895958279009126, - "grad_norm": 1.4767512083053589, - "learning_rate": 9.106934673366835e-05, - "loss": 5.5019, - "step": 9388 - }, - { - "epoch": 4.896479791395046, - "grad_norm": 1.5557408332824707, - "learning_rate": 9.106834170854272e-05, - "loss": 5.2855, - "step": 9389 - }, - { - "epoch": 4.897001303780964, - "grad_norm": 1.4546269178390503, - "learning_rate": 9.10673366834171e-05, - "loss": 5.6894, - "step": 9390 - }, - { - "epoch": 4.897522816166884, - "grad_norm": 1.7202287912368774, - "learning_rate": 9.106633165829146e-05, - "loss": 5.219, - "step": 9391 - }, - { - "epoch": 4.898044328552803, - "grad_norm": 1.3854893445968628, - "learning_rate": 9.106532663316584e-05, - "loss": 5.8198, - "step": 9392 - }, - { - "epoch": 4.898565840938723, - "grad_norm": 1.8713735342025757, - "learning_rate": 9.10643216080402e-05, - "loss": 5.4578, - "step": 9393 - }, - { - "epoch": 4.899087353324641, - "grad_norm": 1.438748836517334, - "learning_rate": 9.106331658291458e-05, - "loss": 5.9129, - "step": 9394 - }, - { - "epoch": 4.899608865710561, - "grad_norm": 1.5552420616149902, - "learning_rate": 9.106231155778894e-05, - "loss": 5.2891, - "step": 9395 - }, - { - "epoch": 4.900130378096479, - "grad_norm": 1.8511615991592407, - "learning_rate": 9.106130653266332e-05, - "loss": 5.3372, - "step": 9396 - }, - { - "epoch": 4.900651890482399, - "grad_norm": 1.5864999294281006, - "learning_rate": 9.10603015075377e-05, - "loss": 5.9007, - "step": 9397 - }, - { - "epoch": 4.901173402868318, - "grad_norm": 1.516449213027954, - "learning_rate": 9.105929648241206e-05, - "loss": 5.3411, - "step": 9398 - }, - { - "epoch": 4.901694915254238, - "grad_norm": 1.5337451696395874, - "learning_rate": 9.105829145728644e-05, - "loss": 5.6074, - "step": 9399 - }, - { - "epoch": 4.902216427640156, - "grad_norm": 1.4010370969772339, - "learning_rate": 9.10572864321608e-05, - "loss": 6.1563, - "step": 9400 - }, - { - "epoch": 4.902737940026076, - "grad_norm": 1.5485152006149292, - "learning_rate": 9.105628140703518e-05, - "loss": 6.0266, - "step": 9401 - }, - { - "epoch": 4.903259452411994, - "grad_norm": 1.4584873914718628, - "learning_rate": 9.105527638190955e-05, - "loss": 5.6044, - "step": 9402 - }, - { - "epoch": 4.903780964797914, - "grad_norm": 1.8672250509262085, - "learning_rate": 9.105427135678393e-05, - "loss": 5.9229, - "step": 9403 - }, - { - "epoch": 4.904302477183833, - "grad_norm": 1.9208084344863892, - "learning_rate": 9.105326633165829e-05, - "loss": 5.0152, - "step": 9404 - }, - { - "epoch": 4.904823989569753, - "grad_norm": 1.4721378087997437, - "learning_rate": 9.105226130653267e-05, - "loss": 6.0808, - "step": 9405 - }, - { - "epoch": 4.905345501955671, - "grad_norm": 1.4138704538345337, - "learning_rate": 9.105125628140703e-05, - "loss": 5.836, - "step": 9406 - }, - { - "epoch": 4.905867014341591, - "grad_norm": 1.4073163270950317, - "learning_rate": 9.105025125628141e-05, - "loss": 5.8614, - "step": 9407 - }, - { - "epoch": 4.906388526727509, - "grad_norm": 1.4413052797317505, - "learning_rate": 9.104924623115579e-05, - "loss": 5.9037, - "step": 9408 - }, - { - "epoch": 4.906910039113429, - "grad_norm": 1.6425275802612305, - "learning_rate": 9.104824120603017e-05, - "loss": 5.5354, - "step": 9409 - }, - { - "epoch": 4.907431551499348, - "grad_norm": 1.6388368606567383, - "learning_rate": 9.104723618090453e-05, - "loss": 5.7865, - "step": 9410 - }, - { - "epoch": 4.907953063885268, - "grad_norm": 1.4211171865463257, - "learning_rate": 9.10462311557789e-05, - "loss": 5.8618, - "step": 9411 - }, - { - "epoch": 4.908474576271186, - "grad_norm": 1.3258837461471558, - "learning_rate": 9.104522613065327e-05, - "loss": 5.8399, - "step": 9412 - }, - { - "epoch": 4.908996088657106, - "grad_norm": 1.31852388381958, - "learning_rate": 9.104422110552764e-05, - "loss": 6.004, - "step": 9413 - }, - { - "epoch": 4.909517601043024, - "grad_norm": 1.3715661764144897, - "learning_rate": 9.104321608040201e-05, - "loss": 5.4625, - "step": 9414 - }, - { - "epoch": 4.910039113428944, - "grad_norm": 1.4105144739151, - "learning_rate": 9.104221105527638e-05, - "loss": 6.1604, - "step": 9415 - }, - { - "epoch": 4.910560625814863, - "grad_norm": 1.50928795337677, - "learning_rate": 9.104120603015076e-05, - "loss": 5.9241, - "step": 9416 - }, - { - "epoch": 4.911082138200783, - "grad_norm": 1.6421071290969849, - "learning_rate": 9.104020100502513e-05, - "loss": 5.4649, - "step": 9417 - }, - { - "epoch": 4.911603650586701, - "grad_norm": 1.4532488584518433, - "learning_rate": 9.103919597989951e-05, - "loss": 5.4243, - "step": 9418 - }, - { - "epoch": 4.912125162972621, - "grad_norm": 1.6198644638061523, - "learning_rate": 9.103819095477388e-05, - "loss": 5.3546, - "step": 9419 - }, - { - "epoch": 4.912646675358539, - "grad_norm": 1.4275953769683838, - "learning_rate": 9.103718592964825e-05, - "loss": 5.7496, - "step": 9420 - }, - { - "epoch": 4.913168187744459, - "grad_norm": 1.4423511028289795, - "learning_rate": 9.103618090452262e-05, - "loss": 5.3867, - "step": 9421 - }, - { - "epoch": 4.913689700130378, - "grad_norm": 1.4166964292526245, - "learning_rate": 9.1035175879397e-05, - "loss": 5.6507, - "step": 9422 - }, - { - "epoch": 4.914211212516297, - "grad_norm": 1.3496978282928467, - "learning_rate": 9.103417085427136e-05, - "loss": 5.8646, - "step": 9423 - }, - { - "epoch": 4.914732724902216, - "grad_norm": 1.400513768196106, - "learning_rate": 9.103316582914572e-05, - "loss": 5.8356, - "step": 9424 - }, - { - "epoch": 4.915254237288136, - "grad_norm": 1.442557692527771, - "learning_rate": 9.10321608040201e-05, - "loss": 5.7954, - "step": 9425 - }, - { - "epoch": 4.915775749674054, - "grad_norm": 1.7103263139724731, - "learning_rate": 9.103115577889447e-05, - "loss": 5.3536, - "step": 9426 - }, - { - "epoch": 4.916297262059974, - "grad_norm": 1.374725341796875, - "learning_rate": 9.103015075376884e-05, - "loss": 5.7906, - "step": 9427 - }, - { - "epoch": 4.916818774445893, - "grad_norm": 1.5698847770690918, - "learning_rate": 9.102914572864322e-05, - "loss": 5.1926, - "step": 9428 - }, - { - "epoch": 4.917340286831812, - "grad_norm": 1.6266674995422363, - "learning_rate": 9.10281407035176e-05, - "loss": 5.6622, - "step": 9429 - }, - { - "epoch": 4.917861799217731, - "grad_norm": 1.3146287202835083, - "learning_rate": 9.102713567839196e-05, - "loss": 5.9735, - "step": 9430 - }, - { - "epoch": 4.918383311603651, - "grad_norm": 1.3534080982208252, - "learning_rate": 9.102613065326634e-05, - "loss": 5.8186, - "step": 9431 - }, - { - "epoch": 4.918904823989569, - "grad_norm": 1.4958523511886597, - "learning_rate": 9.10251256281407e-05, - "loss": 5.376, - "step": 9432 - }, - { - "epoch": 4.919426336375489, - "grad_norm": 1.7073709964752197, - "learning_rate": 9.102412060301508e-05, - "loss": 5.4069, - "step": 9433 - }, - { - "epoch": 4.919947848761408, - "grad_norm": 1.3544573783874512, - "learning_rate": 9.102311557788945e-05, - "loss": 5.695, - "step": 9434 - }, - { - "epoch": 4.920469361147327, - "grad_norm": 1.4455342292785645, - "learning_rate": 9.102211055276383e-05, - "loss": 5.833, - "step": 9435 - }, - { - "epoch": 4.920990873533246, - "grad_norm": 1.451461911201477, - "learning_rate": 9.102110552763819e-05, - "loss": 5.4009, - "step": 9436 - }, - { - "epoch": 4.921512385919166, - "grad_norm": 1.3886125087738037, - "learning_rate": 9.102010050251257e-05, - "loss": 5.5272, - "step": 9437 - }, - { - "epoch": 4.922033898305084, - "grad_norm": 1.506926417350769, - "learning_rate": 9.101909547738695e-05, - "loss": 5.6582, - "step": 9438 - }, - { - "epoch": 4.922555410691004, - "grad_norm": 1.4085625410079956, - "learning_rate": 9.101809045226131e-05, - "loss": 5.8507, - "step": 9439 - }, - { - "epoch": 4.923076923076923, - "grad_norm": 1.4849376678466797, - "learning_rate": 9.101708542713569e-05, - "loss": 5.5532, - "step": 9440 - }, - { - "epoch": 4.923598435462842, - "grad_norm": 1.3830053806304932, - "learning_rate": 9.101608040201005e-05, - "loss": 5.6157, - "step": 9441 - }, - { - "epoch": 4.924119947848761, - "grad_norm": 1.4043306112289429, - "learning_rate": 9.101507537688443e-05, - "loss": 5.5406, - "step": 9442 - }, - { - "epoch": 4.924641460234681, - "grad_norm": 1.4923397302627563, - "learning_rate": 9.10140703517588e-05, - "loss": 5.5923, - "step": 9443 - }, - { - "epoch": 4.925162972620599, - "grad_norm": 1.3725039958953857, - "learning_rate": 9.101306532663317e-05, - "loss": 5.2186, - "step": 9444 - }, - { - "epoch": 4.925684485006519, - "grad_norm": 1.4008874893188477, - "learning_rate": 9.101206030150754e-05, - "loss": 5.8808, - "step": 9445 - }, - { - "epoch": 4.926205997392438, - "grad_norm": 1.316645622253418, - "learning_rate": 9.101105527638191e-05, - "loss": 5.9233, - "step": 9446 - }, - { - "epoch": 4.926727509778357, - "grad_norm": 1.5887460708618164, - "learning_rate": 9.101005025125628e-05, - "loss": 4.9252, - "step": 9447 - }, - { - "epoch": 4.927249022164276, - "grad_norm": 1.5180329084396362, - "learning_rate": 9.100904522613066e-05, - "loss": 5.4201, - "step": 9448 - }, - { - "epoch": 4.927770534550196, - "grad_norm": 1.5920976400375366, - "learning_rate": 9.100804020100503e-05, - "loss": 5.3298, - "step": 9449 - }, - { - "epoch": 4.9282920469361144, - "grad_norm": 1.3984103202819824, - "learning_rate": 9.10070351758794e-05, - "loss": 5.848, - "step": 9450 - }, - { - "epoch": 4.928813559322034, - "grad_norm": 1.3771047592163086, - "learning_rate": 9.100603015075378e-05, - "loss": 5.9414, - "step": 9451 - }, - { - "epoch": 4.929335071707953, - "grad_norm": 1.4695042371749878, - "learning_rate": 9.100502512562814e-05, - "loss": 5.8318, - "step": 9452 - }, - { - "epoch": 4.929856584093872, - "grad_norm": 1.4828572273254395, - "learning_rate": 9.100402010050252e-05, - "loss": 4.7916, - "step": 9453 - }, - { - "epoch": 4.930378096479791, - "grad_norm": 1.3119230270385742, - "learning_rate": 9.100301507537688e-05, - "loss": 5.6552, - "step": 9454 - }, - { - "epoch": 4.930899608865711, - "grad_norm": 1.4734947681427002, - "learning_rate": 9.100201005025126e-05, - "loss": 4.8748, - "step": 9455 - }, - { - "epoch": 4.9314211212516295, - "grad_norm": 1.7135305404663086, - "learning_rate": 9.100100502512563e-05, - "loss": 5.272, - "step": 9456 - }, - { - "epoch": 4.931942633637549, - "grad_norm": 1.5139179229736328, - "learning_rate": 9.1e-05, - "loss": 5.4931, - "step": 9457 - }, - { - "epoch": 4.932464146023468, - "grad_norm": 1.7476977109909058, - "learning_rate": 9.099899497487438e-05, - "loss": 5.7799, - "step": 9458 - }, - { - "epoch": 4.932985658409387, - "grad_norm": 1.6605749130249023, - "learning_rate": 9.099798994974876e-05, - "loss": 5.8088, - "step": 9459 - }, - { - "epoch": 4.933507170795306, - "grad_norm": 1.5193214416503906, - "learning_rate": 9.099698492462312e-05, - "loss": 5.8406, - "step": 9460 - }, - { - "epoch": 4.934028683181226, - "grad_norm": 1.6792309284210205, - "learning_rate": 9.09959798994975e-05, - "loss": 5.7296, - "step": 9461 - }, - { - "epoch": 4.9345501955671445, - "grad_norm": 1.4395103454589844, - "learning_rate": 9.099497487437186e-05, - "loss": 5.9302, - "step": 9462 - }, - { - "epoch": 4.935071707953064, - "grad_norm": 1.6730462312698364, - "learning_rate": 9.099396984924623e-05, - "loss": 5.6989, - "step": 9463 - }, - { - "epoch": 4.935593220338983, - "grad_norm": 1.375931739807129, - "learning_rate": 9.099296482412061e-05, - "loss": 5.9066, - "step": 9464 - }, - { - "epoch": 4.936114732724902, - "grad_norm": 1.4723083972930908, - "learning_rate": 9.099195979899497e-05, - "loss": 5.8884, - "step": 9465 - }, - { - "epoch": 4.936636245110821, - "grad_norm": 1.536982774734497, - "learning_rate": 9.099095477386935e-05, - "loss": 5.4847, - "step": 9466 - }, - { - "epoch": 4.937157757496741, - "grad_norm": 1.4303357601165771, - "learning_rate": 9.098994974874371e-05, - "loss": 5.9326, - "step": 9467 - }, - { - "epoch": 4.9376792698826595, - "grad_norm": 1.5456264019012451, - "learning_rate": 9.098894472361809e-05, - "loss": 5.8387, - "step": 9468 - }, - { - "epoch": 4.938200782268579, - "grad_norm": 1.4804474115371704, - "learning_rate": 9.098793969849247e-05, - "loss": 5.7955, - "step": 9469 - }, - { - "epoch": 4.938722294654498, - "grad_norm": 1.4159609079360962, - "learning_rate": 9.098693467336685e-05, - "loss": 5.5897, - "step": 9470 - }, - { - "epoch": 4.939243807040417, - "grad_norm": 1.4278018474578857, - "learning_rate": 9.098592964824121e-05, - "loss": 5.6639, - "step": 9471 - }, - { - "epoch": 4.9397653194263365, - "grad_norm": 1.4861332178115845, - "learning_rate": 9.098492462311559e-05, - "loss": 5.6074, - "step": 9472 - }, - { - "epoch": 4.940286831812256, - "grad_norm": 1.5369443893432617, - "learning_rate": 9.098391959798995e-05, - "loss": 5.5954, - "step": 9473 - }, - { - "epoch": 4.9408083441981745, - "grad_norm": 1.4548251628875732, - "learning_rate": 9.098291457286433e-05, - "loss": 5.4274, - "step": 9474 - }, - { - "epoch": 4.941329856584094, - "grad_norm": 1.3125852346420288, - "learning_rate": 9.09819095477387e-05, - "loss": 6.105, - "step": 9475 - }, - { - "epoch": 4.941851368970013, - "grad_norm": 1.385012149810791, - "learning_rate": 9.098090452261307e-05, - "loss": 6.0171, - "step": 9476 - }, - { - "epoch": 4.942372881355932, - "grad_norm": 1.7182080745697021, - "learning_rate": 9.097989949748744e-05, - "loss": 5.7029, - "step": 9477 - }, - { - "epoch": 4.9428943937418515, - "grad_norm": 1.6209876537322998, - "learning_rate": 9.097889447236182e-05, - "loss": 5.6085, - "step": 9478 - }, - { - "epoch": 4.94341590612777, - "grad_norm": 1.587426781654358, - "learning_rate": 9.097788944723619e-05, - "loss": 5.5791, - "step": 9479 - }, - { - "epoch": 4.9439374185136895, - "grad_norm": 1.4025925397872925, - "learning_rate": 9.097688442211056e-05, - "loss": 5.6024, - "step": 9480 - }, - { - "epoch": 4.944458930899609, - "grad_norm": 1.5513529777526855, - "learning_rate": 9.097587939698494e-05, - "loss": 5.7844, - "step": 9481 - }, - { - "epoch": 4.944980443285528, - "grad_norm": 1.682541012763977, - "learning_rate": 9.09748743718593e-05, - "loss": 5.1822, - "step": 9482 - }, - { - "epoch": 4.945501955671447, - "grad_norm": 1.6363345384597778, - "learning_rate": 9.097386934673368e-05, - "loss": 5.6856, - "step": 9483 - }, - { - "epoch": 4.9460234680573665, - "grad_norm": 1.5173075199127197, - "learning_rate": 9.097286432160804e-05, - "loss": 5.8913, - "step": 9484 - }, - { - "epoch": 4.946544980443285, - "grad_norm": 1.7930773496627808, - "learning_rate": 9.097185929648242e-05, - "loss": 5.9791, - "step": 9485 - }, - { - "epoch": 4.9470664928292045, - "grad_norm": 1.4281975030899048, - "learning_rate": 9.097085427135678e-05, - "loss": 5.7972, - "step": 9486 - }, - { - "epoch": 4.947588005215124, - "grad_norm": 1.4980669021606445, - "learning_rate": 9.096984924623116e-05, - "loss": 5.8511, - "step": 9487 - }, - { - "epoch": 4.9481095176010434, - "grad_norm": 1.465827465057373, - "learning_rate": 9.096884422110553e-05, - "loss": 5.7684, - "step": 9488 - }, - { - "epoch": 4.948631029986962, - "grad_norm": 1.5295451879501343, - "learning_rate": 9.09678391959799e-05, - "loss": 5.3219, - "step": 9489 - }, - { - "epoch": 4.9491525423728815, - "grad_norm": 1.5520877838134766, - "learning_rate": 9.096683417085428e-05, - "loss": 6.1132, - "step": 9490 - }, - { - "epoch": 4.9496740547588, - "grad_norm": 1.6648917198181152, - "learning_rate": 9.096582914572865e-05, - "loss": 5.5262, - "step": 9491 - }, - { - "epoch": 4.9501955671447195, - "grad_norm": 1.3541868925094604, - "learning_rate": 9.096482412060302e-05, - "loss": 5.2264, - "step": 9492 - }, - { - "epoch": 4.950717079530639, - "grad_norm": 1.5014731884002686, - "learning_rate": 9.096381909547739e-05, - "loss": 6.012, - "step": 9493 - }, - { - "epoch": 4.9512385919165585, - "grad_norm": 1.4473848342895508, - "learning_rate": 9.096281407035177e-05, - "loss": 5.8941, - "step": 9494 - }, - { - "epoch": 4.951760104302477, - "grad_norm": 1.4296332597732544, - "learning_rate": 9.096180904522613e-05, - "loss": 6.1712, - "step": 9495 - }, - { - "epoch": 4.9522816166883965, - "grad_norm": 1.4028007984161377, - "learning_rate": 9.096080402010051e-05, - "loss": 5.7572, - "step": 9496 - }, - { - "epoch": 4.952803129074315, - "grad_norm": 1.364015817642212, - "learning_rate": 9.095979899497487e-05, - "loss": 4.98, - "step": 9497 - }, - { - "epoch": 4.9533246414602345, - "grad_norm": 1.5670381784439087, - "learning_rate": 9.095879396984925e-05, - "loss": 5.1124, - "step": 9498 - }, - { - "epoch": 4.953846153846154, - "grad_norm": 1.597085952758789, - "learning_rate": 9.095778894472361e-05, - "loss": 5.6751, - "step": 9499 - }, - { - "epoch": 4.9543676662320735, - "grad_norm": 1.5139598846435547, - "learning_rate": 9.095678391959799e-05, - "loss": 5.8707, - "step": 9500 - }, - { - "epoch": 4.954889178617992, - "grad_norm": 1.4488534927368164, - "learning_rate": 9.095577889447237e-05, - "loss": 5.6008, - "step": 9501 - }, - { - "epoch": 4.9554106910039115, - "grad_norm": 1.5516542196273804, - "learning_rate": 9.095477386934675e-05, - "loss": 5.5964, - "step": 9502 - }, - { - "epoch": 4.95593220338983, - "grad_norm": 1.4768667221069336, - "learning_rate": 9.095376884422111e-05, - "loss": 5.769, - "step": 9503 - }, - { - "epoch": 4.95645371577575, - "grad_norm": 1.4210253953933716, - "learning_rate": 9.095276381909548e-05, - "loss": 5.5513, - "step": 9504 - }, - { - "epoch": 4.956975228161669, - "grad_norm": 1.351924180984497, - "learning_rate": 9.095175879396985e-05, - "loss": 5.9911, - "step": 9505 - }, - { - "epoch": 4.9574967405475885, - "grad_norm": 1.5593440532684326, - "learning_rate": 9.095075376884422e-05, - "loss": 5.5771, - "step": 9506 - }, - { - "epoch": 4.958018252933507, - "grad_norm": 1.5490654706954956, - "learning_rate": 9.09497487437186e-05, - "loss": 5.3494, - "step": 9507 - }, - { - "epoch": 4.9585397653194265, - "grad_norm": 2.072201728820801, - "learning_rate": 9.094874371859296e-05, - "loss": 5.5346, - "step": 9508 - }, - { - "epoch": 4.959061277705345, - "grad_norm": 1.4470704793930054, - "learning_rate": 9.094773869346734e-05, - "loss": 5.3475, - "step": 9509 - }, - { - "epoch": 4.959582790091265, - "grad_norm": 1.5778701305389404, - "learning_rate": 9.094673366834172e-05, - "loss": 5.352, - "step": 9510 - }, - { - "epoch": 4.960104302477184, - "grad_norm": 1.4674078226089478, - "learning_rate": 9.09457286432161e-05, - "loss": 5.6995, - "step": 9511 - }, - { - "epoch": 4.960625814863103, - "grad_norm": 1.5061572790145874, - "learning_rate": 9.094472361809046e-05, - "loss": 5.7614, - "step": 9512 - }, - { - "epoch": 4.961147327249022, - "grad_norm": 1.729617953300476, - "learning_rate": 9.094371859296484e-05, - "loss": 5.4714, - "step": 9513 - }, - { - "epoch": 4.9616688396349415, - "grad_norm": 1.5308289527893066, - "learning_rate": 9.09427135678392e-05, - "loss": 5.9243, - "step": 9514 - }, - { - "epoch": 4.96219035202086, - "grad_norm": 1.4754334688186646, - "learning_rate": 9.094170854271358e-05, - "loss": 5.7547, - "step": 9515 - }, - { - "epoch": 4.96271186440678, - "grad_norm": 1.5298941135406494, - "learning_rate": 9.094070351758794e-05, - "loss": 5.5908, - "step": 9516 - }, - { - "epoch": 4.963233376792699, - "grad_norm": 1.6703627109527588, - "learning_rate": 9.09396984924623e-05, - "loss": 5.6519, - "step": 9517 - }, - { - "epoch": 4.963754889178618, - "grad_norm": 1.3146605491638184, - "learning_rate": 9.093869346733668e-05, - "loss": 6.0507, - "step": 9518 - }, - { - "epoch": 4.964276401564537, - "grad_norm": 1.3258230686187744, - "learning_rate": 9.093768844221105e-05, - "loss": 5.3924, - "step": 9519 - }, - { - "epoch": 4.9647979139504566, - "grad_norm": 1.32620370388031, - "learning_rate": 9.093668341708543e-05, - "loss": 6.0788, - "step": 9520 - }, - { - "epoch": 4.965319426336375, - "grad_norm": 1.4107927083969116, - "learning_rate": 9.09356783919598e-05, - "loss": 5.7069, - "step": 9521 - }, - { - "epoch": 4.965840938722295, - "grad_norm": 1.6384838819503784, - "learning_rate": 9.093467336683418e-05, - "loss": 5.2223, - "step": 9522 - }, - { - "epoch": 4.966362451108214, - "grad_norm": 1.527696132659912, - "learning_rate": 9.093366834170855e-05, - "loss": 5.45, - "step": 9523 - }, - { - "epoch": 4.966883963494133, - "grad_norm": 1.5236390829086304, - "learning_rate": 9.093266331658292e-05, - "loss": 5.4657, - "step": 9524 - }, - { - "epoch": 4.967405475880052, - "grad_norm": 1.3854538202285767, - "learning_rate": 9.093165829145729e-05, - "loss": 5.8235, - "step": 9525 - }, - { - "epoch": 4.967926988265972, - "grad_norm": 1.3617008924484253, - "learning_rate": 9.093065326633167e-05, - "loss": 5.95, - "step": 9526 - }, - { - "epoch": 4.96844850065189, - "grad_norm": 1.4061341285705566, - "learning_rate": 9.092964824120603e-05, - "loss": 5.7468, - "step": 9527 - }, - { - "epoch": 4.96897001303781, - "grad_norm": 1.4063454866409302, - "learning_rate": 9.092864321608041e-05, - "loss": 5.8414, - "step": 9528 - }, - { - "epoch": 4.969491525423729, - "grad_norm": 1.5409337282180786, - "learning_rate": 9.092763819095477e-05, - "loss": 5.7194, - "step": 9529 - }, - { - "epoch": 4.970013037809648, - "grad_norm": 1.5460985898971558, - "learning_rate": 9.092663316582915e-05, - "loss": 5.5166, - "step": 9530 - }, - { - "epoch": 4.970534550195567, - "grad_norm": 1.4051940441131592, - "learning_rate": 9.092562814070353e-05, - "loss": 5.8389, - "step": 9531 - }, - { - "epoch": 4.971056062581487, - "grad_norm": 1.391068458557129, - "learning_rate": 9.092462311557789e-05, - "loss": 6.0721, - "step": 9532 - }, - { - "epoch": 4.971577574967405, - "grad_norm": 1.4668761491775513, - "learning_rate": 9.092361809045227e-05, - "loss": 5.2495, - "step": 9533 - }, - { - "epoch": 4.972099087353325, - "grad_norm": 1.5273724794387817, - "learning_rate": 9.092261306532663e-05, - "loss": 5.2345, - "step": 9534 - }, - { - "epoch": 4.972620599739244, - "grad_norm": 1.4453911781311035, - "learning_rate": 9.092160804020101e-05, - "loss": 5.4535, - "step": 9535 - }, - { - "epoch": 4.973142112125163, - "grad_norm": 1.6361924409866333, - "learning_rate": 9.092060301507538e-05, - "loss": 5.562, - "step": 9536 - }, - { - "epoch": 4.973663624511082, - "grad_norm": 1.5112252235412598, - "learning_rate": 9.091959798994975e-05, - "loss": 5.5583, - "step": 9537 - }, - { - "epoch": 4.974185136897002, - "grad_norm": 1.4776006937026978, - "learning_rate": 9.091859296482412e-05, - "loss": 5.4313, - "step": 9538 - }, - { - "epoch": 4.97470664928292, - "grad_norm": 1.448299527168274, - "learning_rate": 9.09175879396985e-05, - "loss": 5.729, - "step": 9539 - }, - { - "epoch": 4.97522816166884, - "grad_norm": 1.4582806825637817, - "learning_rate": 9.091658291457286e-05, - "loss": 5.5718, - "step": 9540 - }, - { - "epoch": 4.975749674054759, - "grad_norm": 1.483436107635498, - "learning_rate": 9.091557788944724e-05, - "loss": 5.6077, - "step": 9541 - }, - { - "epoch": 4.976271186440678, - "grad_norm": 1.4505349397659302, - "learning_rate": 9.091457286432162e-05, - "loss": 5.2934, - "step": 9542 - }, - { - "epoch": 4.976792698826597, - "grad_norm": 1.4045169353485107, - "learning_rate": 9.091356783919598e-05, - "loss": 5.7949, - "step": 9543 - }, - { - "epoch": 4.977314211212517, - "grad_norm": 1.5387983322143555, - "learning_rate": 9.091256281407036e-05, - "loss": 5.6566, - "step": 9544 - }, - { - "epoch": 4.977835723598435, - "grad_norm": 1.427734613418579, - "learning_rate": 9.091155778894472e-05, - "loss": 5.7165, - "step": 9545 - }, - { - "epoch": 4.978357235984355, - "grad_norm": 1.4534152746200562, - "learning_rate": 9.09105527638191e-05, - "loss": 5.5994, - "step": 9546 - }, - { - "epoch": 4.978878748370274, - "grad_norm": 1.2882143259048462, - "learning_rate": 9.090954773869347e-05, - "loss": 6.0772, - "step": 9547 - }, - { - "epoch": 4.979400260756193, - "grad_norm": 1.518412470817566, - "learning_rate": 9.090854271356784e-05, - "loss": 5.5623, - "step": 9548 - }, - { - "epoch": 4.979921773142112, - "grad_norm": 1.4349064826965332, - "learning_rate": 9.090753768844221e-05, - "loss": 5.6247, - "step": 9549 - }, - { - "epoch": 4.980443285528032, - "grad_norm": 1.4152820110321045, - "learning_rate": 9.090653266331659e-05, - "loss": 5.7244, - "step": 9550 - }, - { - "epoch": 4.98096479791395, - "grad_norm": 1.3649566173553467, - "learning_rate": 9.090552763819096e-05, - "loss": 5.9428, - "step": 9551 - }, - { - "epoch": 4.98148631029987, - "grad_norm": 1.4020839929580688, - "learning_rate": 9.090452261306534e-05, - "loss": 6.0766, - "step": 9552 - }, - { - "epoch": 4.982007822685789, - "grad_norm": 1.5051881074905396, - "learning_rate": 9.09035175879397e-05, - "loss": 5.7072, - "step": 9553 - }, - { - "epoch": 4.982529335071708, - "grad_norm": 1.3744229078292847, - "learning_rate": 9.090251256281408e-05, - "loss": 5.6609, - "step": 9554 - }, - { - "epoch": 4.983050847457627, - "grad_norm": 1.5600794553756714, - "learning_rate": 9.090150753768845e-05, - "loss": 5.2753, - "step": 9555 - }, - { - "epoch": 4.983572359843547, - "grad_norm": 1.4500224590301514, - "learning_rate": 9.090050251256281e-05, - "loss": 5.8845, - "step": 9556 - }, - { - "epoch": 4.984093872229465, - "grad_norm": 1.5056763887405396, - "learning_rate": 9.089949748743719e-05, - "loss": 5.5953, - "step": 9557 - }, - { - "epoch": 4.984615384615385, - "grad_norm": 1.458542823791504, - "learning_rate": 9.089849246231155e-05, - "loss": 5.743, - "step": 9558 - }, - { - "epoch": 4.985136897001304, - "grad_norm": 1.4352842569351196, - "learning_rate": 9.089748743718593e-05, - "loss": 5.8879, - "step": 9559 - }, - { - "epoch": 4.985658409387223, - "grad_norm": 1.509617805480957, - "learning_rate": 9.08964824120603e-05, - "loss": 5.4957, - "step": 9560 - }, - { - "epoch": 4.986179921773142, - "grad_norm": 1.4734725952148438, - "learning_rate": 9.089547738693467e-05, - "loss": 5.909, - "step": 9561 - }, - { - "epoch": 4.986701434159062, - "grad_norm": 1.4185115098953247, - "learning_rate": 9.089447236180905e-05, - "loss": 5.3034, - "step": 9562 - }, - { - "epoch": 4.98722294654498, - "grad_norm": 1.3917583227157593, - "learning_rate": 9.089346733668343e-05, - "loss": 5.7737, - "step": 9563 - }, - { - "epoch": 4.9877444589309, - "grad_norm": 1.4229999780654907, - "learning_rate": 9.08924623115578e-05, - "loss": 5.5737, - "step": 9564 - }, - { - "epoch": 4.988265971316819, - "grad_norm": 1.3993674516677856, - "learning_rate": 9.089145728643217e-05, - "loss": 5.8591, - "step": 9565 - }, - { - "epoch": 4.988787483702738, - "grad_norm": 1.3302586078643799, - "learning_rate": 9.089045226130654e-05, - "loss": 5.4385, - "step": 9566 - }, - { - "epoch": 4.989308996088657, - "grad_norm": 1.453735113143921, - "learning_rate": 9.088944723618091e-05, - "loss": 5.4714, - "step": 9567 - }, - { - "epoch": 4.989830508474577, - "grad_norm": 1.37211275100708, - "learning_rate": 9.088844221105528e-05, - "loss": 5.0002, - "step": 9568 - }, - { - "epoch": 4.990352020860495, - "grad_norm": 1.3916655778884888, - "learning_rate": 9.088743718592966e-05, - "loss": 5.6153, - "step": 9569 - }, - { - "epoch": 4.990873533246415, - "grad_norm": 1.422653317451477, - "learning_rate": 9.088643216080402e-05, - "loss": 5.6719, - "step": 9570 - }, - { - "epoch": 4.991395045632334, - "grad_norm": 1.440453052520752, - "learning_rate": 9.08854271356784e-05, - "loss": 5.6978, - "step": 9571 - }, - { - "epoch": 4.991916558018253, - "grad_norm": 1.4746758937835693, - "learning_rate": 9.088442211055278e-05, - "loss": 5.7122, - "step": 9572 - }, - { - "epoch": 4.992438070404172, - "grad_norm": 1.535989761352539, - "learning_rate": 9.088341708542714e-05, - "loss": 5.5598, - "step": 9573 - }, - { - "epoch": 4.992959582790091, - "grad_norm": 1.5031657218933105, - "learning_rate": 9.088241206030152e-05, - "loss": 5.2815, - "step": 9574 - }, - { - "epoch": 4.99348109517601, - "grad_norm": 1.5244801044464111, - "learning_rate": 9.088140703517588e-05, - "loss": 5.7473, - "step": 9575 - }, - { - "epoch": 4.99400260756193, - "grad_norm": 1.3193918466567993, - "learning_rate": 9.088040201005026e-05, - "loss": 5.222, - "step": 9576 - }, - { - "epoch": 4.994524119947849, - "grad_norm": 1.6308892965316772, - "learning_rate": 9.087939698492462e-05, - "loss": 5.2045, - "step": 9577 - }, - { - "epoch": 4.995045632333768, - "grad_norm": 1.5379629135131836, - "learning_rate": 9.0878391959799e-05, - "loss": 5.3605, - "step": 9578 - }, - { - "epoch": 4.995567144719687, - "grad_norm": 1.3673256635665894, - "learning_rate": 9.087738693467337e-05, - "loss": 5.8343, - "step": 9579 - }, - { - "epoch": 4.996088657105606, - "grad_norm": 1.4191076755523682, - "learning_rate": 9.087638190954774e-05, - "loss": 5.5442, - "step": 9580 - }, - { - "epoch": 4.996610169491525, - "grad_norm": 1.3783507347106934, - "learning_rate": 9.087537688442211e-05, - "loss": 5.3831, - "step": 9581 - }, - { - "epoch": 4.997131681877445, - "grad_norm": 1.510187029838562, - "learning_rate": 9.087437185929649e-05, - "loss": 5.483, - "step": 9582 - }, - { - "epoch": 4.997653194263364, - "grad_norm": 1.507614016532898, - "learning_rate": 9.087336683417086e-05, - "loss": 5.5779, - "step": 9583 - }, - { - "epoch": 4.998174706649283, - "grad_norm": 1.4115822315216064, - "learning_rate": 9.087236180904523e-05, - "loss": 5.7898, - "step": 9584 - }, - { - "epoch": 4.998696219035202, - "grad_norm": 1.4444499015808105, - "learning_rate": 9.08713567839196e-05, - "loss": 5.7272, - "step": 9585 - }, - { - "epoch": 4.999217731421121, - "grad_norm": 1.549178123474121, - "learning_rate": 9.087035175879397e-05, - "loss": 5.4709, - "step": 9586 - }, - { - "epoch": 4.99973924380704, - "grad_norm": 1.4479438066482544, - "learning_rate": 9.086934673366835e-05, - "loss": 5.6143, - "step": 9587 - }, - { - "epoch": 5.00026075619296, - "grad_norm": 1.625806212425232, - "learning_rate": 9.086834170854271e-05, - "loss": 5.5754, - "step": 9588 - }, - { - "epoch": 5.000782268578878, - "grad_norm": 1.6331462860107422, - "learning_rate": 9.086733668341709e-05, - "loss": 4.9812, - "step": 9589 - }, - { - "epoch": 5.001303780964798, - "grad_norm": 1.6176091432571411, - "learning_rate": 9.086633165829145e-05, - "loss": 5.6923, - "step": 9590 - }, - { - "epoch": 5.001825293350717, - "grad_norm": 1.4320005178451538, - "learning_rate": 9.086532663316583e-05, - "loss": 6.015, - "step": 9591 - }, - { - "epoch": 5.002346805736636, - "grad_norm": 1.3827357292175293, - "learning_rate": 9.086432160804021e-05, - "loss": 5.6476, - "step": 9592 - }, - { - "epoch": 5.002868318122555, - "grad_norm": 1.474095106124878, - "learning_rate": 9.086331658291459e-05, - "loss": 5.8443, - "step": 9593 - }, - { - "epoch": 5.003389830508475, - "grad_norm": 1.5613101720809937, - "learning_rate": 9.086231155778895e-05, - "loss": 5.2433, - "step": 9594 - }, - { - "epoch": 5.003911342894393, - "grad_norm": 1.460621953010559, - "learning_rate": 9.086130653266333e-05, - "loss": 5.9803, - "step": 9595 - }, - { - "epoch": 5.004432855280313, - "grad_norm": 1.5051751136779785, - "learning_rate": 9.08603015075377e-05, - "loss": 5.7192, - "step": 9596 - }, - { - "epoch": 5.004954367666232, - "grad_norm": 1.4604213237762451, - "learning_rate": 9.085929648241206e-05, - "loss": 5.3661, - "step": 9597 - }, - { - "epoch": 5.005475880052151, - "grad_norm": 1.5485050678253174, - "learning_rate": 9.085829145728644e-05, - "loss": 5.7578, - "step": 9598 - }, - { - "epoch": 5.00599739243807, - "grad_norm": 1.643455147743225, - "learning_rate": 9.08572864321608e-05, - "loss": 5.9708, - "step": 9599 - }, - { - "epoch": 5.00651890482399, - "grad_norm": 1.5654058456420898, - "learning_rate": 9.085628140703518e-05, - "loss": 5.4986, - "step": 9600 - }, - { - "epoch": 5.007040417209908, - "grad_norm": 1.4612113237380981, - "learning_rate": 9.085527638190954e-05, - "loss": 5.5646, - "step": 9601 - }, - { - "epoch": 5.007561929595828, - "grad_norm": 1.461822509765625, - "learning_rate": 9.085427135678392e-05, - "loss": 5.4933, - "step": 9602 - }, - { - "epoch": 5.008083441981747, - "grad_norm": 1.7780969142913818, - "learning_rate": 9.08532663316583e-05, - "loss": 5.0113, - "step": 9603 - }, - { - "epoch": 5.008604954367666, - "grad_norm": 1.5177302360534668, - "learning_rate": 9.085226130653268e-05, - "loss": 5.2657, - "step": 9604 - }, - { - "epoch": 5.009126466753585, - "grad_norm": 1.5507392883300781, - "learning_rate": 9.085125628140704e-05, - "loss": 5.2775, - "step": 9605 - }, - { - "epoch": 5.009647979139505, - "grad_norm": 1.4160226583480835, - "learning_rate": 9.085025125628142e-05, - "loss": 5.8781, - "step": 9606 - }, - { - "epoch": 5.010169491525423, - "grad_norm": 1.4104321002960205, - "learning_rate": 9.084924623115578e-05, - "loss": 5.7939, - "step": 9607 - }, - { - "epoch": 5.010691003911343, - "grad_norm": 1.4358967542648315, - "learning_rate": 9.084824120603016e-05, - "loss": 5.9341, - "step": 9608 - }, - { - "epoch": 5.011212516297262, - "grad_norm": 1.4435081481933594, - "learning_rate": 9.084723618090452e-05, - "loss": 5.8186, - "step": 9609 - }, - { - "epoch": 5.011734028683181, - "grad_norm": 1.4456539154052734, - "learning_rate": 9.084623115577889e-05, - "loss": 5.2494, - "step": 9610 - }, - { - "epoch": 5.0122555410691, - "grad_norm": 1.7066278457641602, - "learning_rate": 9.084522613065327e-05, - "loss": 5.3539, - "step": 9611 - }, - { - "epoch": 5.01277705345502, - "grad_norm": 1.6565117835998535, - "learning_rate": 9.084422110552764e-05, - "loss": 5.1258, - "step": 9612 - }, - { - "epoch": 5.013298565840938, - "grad_norm": 1.626996397972107, - "learning_rate": 9.084321608040202e-05, - "loss": 5.3644, - "step": 9613 - }, - { - "epoch": 5.013820078226858, - "grad_norm": 1.5126397609710693, - "learning_rate": 9.084221105527639e-05, - "loss": 5.862, - "step": 9614 - }, - { - "epoch": 5.014341590612777, - "grad_norm": 1.359494924545288, - "learning_rate": 9.084120603015076e-05, - "loss": 5.7691, - "step": 9615 - }, - { - "epoch": 5.014863102998696, - "grad_norm": 1.4283158779144287, - "learning_rate": 9.084020100502513e-05, - "loss": 5.6098, - "step": 9616 - }, - { - "epoch": 5.015384615384615, - "grad_norm": 1.448838710784912, - "learning_rate": 9.08391959798995e-05, - "loss": 5.556, - "step": 9617 - }, - { - "epoch": 5.015906127770535, - "grad_norm": 1.3984805345535278, - "learning_rate": 9.083819095477387e-05, - "loss": 5.9086, - "step": 9618 - }, - { - "epoch": 5.016427640156453, - "grad_norm": 1.3539408445358276, - "learning_rate": 9.083718592964825e-05, - "loss": 5.9561, - "step": 9619 - }, - { - "epoch": 5.016949152542373, - "grad_norm": 1.601210117340088, - "learning_rate": 9.083618090452261e-05, - "loss": 5.0634, - "step": 9620 - }, - { - "epoch": 5.017470664928292, - "grad_norm": 1.4492470026016235, - "learning_rate": 9.083517587939699e-05, - "loss": 5.5131, - "step": 9621 - }, - { - "epoch": 5.017992177314211, - "grad_norm": 1.3575685024261475, - "learning_rate": 9.083417085427136e-05, - "loss": 6.0419, - "step": 9622 - }, - { - "epoch": 5.01851368970013, - "grad_norm": 1.416258454322815, - "learning_rate": 9.083316582914573e-05, - "loss": 5.6356, - "step": 9623 - }, - { - "epoch": 5.01903520208605, - "grad_norm": 1.453626275062561, - "learning_rate": 9.083216080402011e-05, - "loss": 5.7638, - "step": 9624 - }, - { - "epoch": 5.019556714471968, - "grad_norm": 1.5545746088027954, - "learning_rate": 9.083115577889448e-05, - "loss": 5.3387, - "step": 9625 - }, - { - "epoch": 5.020078226857888, - "grad_norm": 1.53034245967865, - "learning_rate": 9.083015075376885e-05, - "loss": 5.7037, - "step": 9626 - }, - { - "epoch": 5.020599739243807, - "grad_norm": 1.3486019372940063, - "learning_rate": 9.082914572864322e-05, - "loss": 6.0162, - "step": 9627 - }, - { - "epoch": 5.021121251629726, - "grad_norm": 1.3466379642486572, - "learning_rate": 9.08281407035176e-05, - "loss": 6.1135, - "step": 9628 - }, - { - "epoch": 5.021642764015645, - "grad_norm": 1.2867568731307983, - "learning_rate": 9.082713567839196e-05, - "loss": 6.1289, - "step": 9629 - }, - { - "epoch": 5.022164276401565, - "grad_norm": 1.6420936584472656, - "learning_rate": 9.082613065326634e-05, - "loss": 5.0427, - "step": 9630 - }, - { - "epoch": 5.022685788787483, - "grad_norm": 1.4582635164260864, - "learning_rate": 9.08251256281407e-05, - "loss": 5.682, - "step": 9631 - }, - { - "epoch": 5.023207301173403, - "grad_norm": 1.5015995502471924, - "learning_rate": 9.082412060301508e-05, - "loss": 5.6702, - "step": 9632 - }, - { - "epoch": 5.023728813559322, - "grad_norm": 1.4561374187469482, - "learning_rate": 9.082311557788946e-05, - "loss": 5.7117, - "step": 9633 - }, - { - "epoch": 5.024250325945241, - "grad_norm": 1.5734344720840454, - "learning_rate": 9.082211055276383e-05, - "loss": 5.655, - "step": 9634 - }, - { - "epoch": 5.02477183833116, - "grad_norm": 1.427914023399353, - "learning_rate": 9.08211055276382e-05, - "loss": 5.9042, - "step": 9635 - }, - { - "epoch": 5.02529335071708, - "grad_norm": 1.3230676651000977, - "learning_rate": 9.082010050251256e-05, - "loss": 5.9891, - "step": 9636 - }, - { - "epoch": 5.025814863102998, - "grad_norm": 1.329751968383789, - "learning_rate": 9.081909547738694e-05, - "loss": 5.9401, - "step": 9637 - }, - { - "epoch": 5.026336375488918, - "grad_norm": 1.3566443920135498, - "learning_rate": 9.08180904522613e-05, - "loss": 5.7345, - "step": 9638 - }, - { - "epoch": 5.026857887874837, - "grad_norm": 1.5137128829956055, - "learning_rate": 9.081708542713568e-05, - "loss": 5.4097, - "step": 9639 - }, - { - "epoch": 5.027379400260756, - "grad_norm": 1.303293228149414, - "learning_rate": 9.081608040201005e-05, - "loss": 5.763, - "step": 9640 - }, - { - "epoch": 5.027900912646675, - "grad_norm": 1.4488989114761353, - "learning_rate": 9.081507537688443e-05, - "loss": 5.6348, - "step": 9641 - }, - { - "epoch": 5.028422425032595, - "grad_norm": 1.337268590927124, - "learning_rate": 9.081407035175879e-05, - "loss": 5.702, - "step": 9642 - }, - { - "epoch": 5.028943937418513, - "grad_norm": 1.5853475332260132, - "learning_rate": 9.081306532663317e-05, - "loss": 5.4197, - "step": 9643 - }, - { - "epoch": 5.029465449804433, - "grad_norm": 1.4773191213607788, - "learning_rate": 9.081206030150755e-05, - "loss": 5.7977, - "step": 9644 - }, - { - "epoch": 5.029986962190352, - "grad_norm": 1.3736628293991089, - "learning_rate": 9.081105527638192e-05, - "loss": 5.4086, - "step": 9645 - }, - { - "epoch": 5.030508474576271, - "grad_norm": 1.5422024726867676, - "learning_rate": 9.081005025125629e-05, - "loss": 5.0358, - "step": 9646 - }, - { - "epoch": 5.03102998696219, - "grad_norm": 1.5655113458633423, - "learning_rate": 9.080904522613067e-05, - "loss": 5.2046, - "step": 9647 - }, - { - "epoch": 5.03155149934811, - "grad_norm": 1.4077508449554443, - "learning_rate": 9.080804020100503e-05, - "loss": 5.9684, - "step": 9648 - }, - { - "epoch": 5.032073011734028, - "grad_norm": 1.2865736484527588, - "learning_rate": 9.08070351758794e-05, - "loss": 6.0274, - "step": 9649 - }, - { - "epoch": 5.032594524119948, - "grad_norm": 1.5737452507019043, - "learning_rate": 9.080603015075377e-05, - "loss": 5.1977, - "step": 9650 - }, - { - "epoch": 5.033116036505867, - "grad_norm": 1.5545756816864014, - "learning_rate": 9.080502512562814e-05, - "loss": 5.6668, - "step": 9651 - }, - { - "epoch": 5.033637548891786, - "grad_norm": 1.4996898174285889, - "learning_rate": 9.080402010050251e-05, - "loss": 5.5639, - "step": 9652 - }, - { - "epoch": 5.034159061277705, - "grad_norm": 1.4492385387420654, - "learning_rate": 9.080301507537689e-05, - "loss": 5.7191, - "step": 9653 - }, - { - "epoch": 5.034680573663625, - "grad_norm": 1.407344937324524, - "learning_rate": 9.080201005025127e-05, - "loss": 5.937, - "step": 9654 - }, - { - "epoch": 5.0352020860495434, - "grad_norm": 1.4051048755645752, - "learning_rate": 9.080100502512563e-05, - "loss": 4.997, - "step": 9655 - }, - { - "epoch": 5.035723598435463, - "grad_norm": 1.5161521434783936, - "learning_rate": 9.080000000000001e-05, - "loss": 5.3697, - "step": 9656 - }, - { - "epoch": 5.036245110821382, - "grad_norm": 1.473875641822815, - "learning_rate": 9.079899497487438e-05, - "loss": 5.7287, - "step": 9657 - }, - { - "epoch": 5.036766623207301, - "grad_norm": 1.3484997749328613, - "learning_rate": 9.079798994974875e-05, - "loss": 5.7778, - "step": 9658 - }, - { - "epoch": 5.03728813559322, - "grad_norm": 1.5568821430206299, - "learning_rate": 9.079698492462312e-05, - "loss": 5.7683, - "step": 9659 - }, - { - "epoch": 5.03780964797914, - "grad_norm": 1.4554647207260132, - "learning_rate": 9.07959798994975e-05, - "loss": 5.3819, - "step": 9660 - }, - { - "epoch": 5.0383311603650585, - "grad_norm": 1.455867886543274, - "learning_rate": 9.079497487437186e-05, - "loss": 5.9223, - "step": 9661 - }, - { - "epoch": 5.038852672750978, - "grad_norm": 1.4219870567321777, - "learning_rate": 9.079396984924624e-05, - "loss": 5.7912, - "step": 9662 - }, - { - "epoch": 5.039374185136897, - "grad_norm": 1.6555315256118774, - "learning_rate": 9.07929648241206e-05, - "loss": 5.3327, - "step": 9663 - }, - { - "epoch": 5.039895697522816, - "grad_norm": 1.4760990142822266, - "learning_rate": 9.079195979899498e-05, - "loss": 5.9177, - "step": 9664 - }, - { - "epoch": 5.040417209908735, - "grad_norm": 1.7738091945648193, - "learning_rate": 9.079095477386936e-05, - "loss": 5.3492, - "step": 9665 - }, - { - "epoch": 5.040938722294655, - "grad_norm": 1.5841596126556396, - "learning_rate": 9.078994974874372e-05, - "loss": 5.8397, - "step": 9666 - }, - { - "epoch": 5.0414602346805735, - "grad_norm": 1.4651416540145874, - "learning_rate": 9.07889447236181e-05, - "loss": 5.5619, - "step": 9667 - }, - { - "epoch": 5.041981747066493, - "grad_norm": 1.428672194480896, - "learning_rate": 9.078793969849246e-05, - "loss": 5.8147, - "step": 9668 - }, - { - "epoch": 5.042503259452412, - "grad_norm": 1.4610378742218018, - "learning_rate": 9.078693467336684e-05, - "loss": 5.9564, - "step": 9669 - }, - { - "epoch": 5.043024771838331, - "grad_norm": 1.5630525350570679, - "learning_rate": 9.07859296482412e-05, - "loss": 5.7911, - "step": 9670 - }, - { - "epoch": 5.04354628422425, - "grad_norm": 1.3922629356384277, - "learning_rate": 9.078492462311558e-05, - "loss": 6.1054, - "step": 9671 - }, - { - "epoch": 5.04406779661017, - "grad_norm": 1.5127930641174316, - "learning_rate": 9.078391959798995e-05, - "loss": 5.782, - "step": 9672 - }, - { - "epoch": 5.0445893089960885, - "grad_norm": 1.3915724754333496, - "learning_rate": 9.078291457286433e-05, - "loss": 5.9943, - "step": 9673 - }, - { - "epoch": 5.045110821382008, - "grad_norm": 1.3761885166168213, - "learning_rate": 9.078190954773869e-05, - "loss": 5.9184, - "step": 9674 - }, - { - "epoch": 5.045632333767927, - "grad_norm": 1.6044615507125854, - "learning_rate": 9.078090452261307e-05, - "loss": 5.4293, - "step": 9675 - }, - { - "epoch": 5.046153846153846, - "grad_norm": 1.3893239498138428, - "learning_rate": 9.077989949748745e-05, - "loss": 5.4735, - "step": 9676 - }, - { - "epoch": 5.0466753585397655, - "grad_norm": 1.4942357540130615, - "learning_rate": 9.077889447236181e-05, - "loss": 5.646, - "step": 9677 - }, - { - "epoch": 5.047196870925685, - "grad_norm": 1.6065033674240112, - "learning_rate": 9.077788944723619e-05, - "loss": 5.101, - "step": 9678 - }, - { - "epoch": 5.0477183833116035, - "grad_norm": 1.5114279985427856, - "learning_rate": 9.077688442211055e-05, - "loss": 5.7689, - "step": 9679 - }, - { - "epoch": 5.048239895697523, - "grad_norm": 1.7003482580184937, - "learning_rate": 9.077587939698493e-05, - "loss": 4.7868, - "step": 9680 - }, - { - "epoch": 5.0487614080834415, - "grad_norm": 1.5474112033843994, - "learning_rate": 9.07748743718593e-05, - "loss": 5.74, - "step": 9681 - }, - { - "epoch": 5.049282920469361, - "grad_norm": 1.630046010017395, - "learning_rate": 9.077386934673367e-05, - "loss": 5.4271, - "step": 9682 - }, - { - "epoch": 5.0498044328552805, - "grad_norm": 1.3415035009384155, - "learning_rate": 9.077286432160804e-05, - "loss": 5.7669, - "step": 9683 - }, - { - "epoch": 5.050325945241199, - "grad_norm": 2.007796049118042, - "learning_rate": 9.077185929648241e-05, - "loss": 5.1961, - "step": 9684 - }, - { - "epoch": 5.0508474576271185, - "grad_norm": 1.484814167022705, - "learning_rate": 9.077085427135679e-05, - "loss": 5.5597, - "step": 9685 - }, - { - "epoch": 5.051368970013038, - "grad_norm": 1.4897054433822632, - "learning_rate": 9.076984924623117e-05, - "loss": 5.5614, - "step": 9686 - }, - { - "epoch": 5.0518904823989566, - "grad_norm": 1.5001835823059082, - "learning_rate": 9.076884422110553e-05, - "loss": 5.451, - "step": 9687 - }, - { - "epoch": 5.052411994784876, - "grad_norm": 1.3624945878982544, - "learning_rate": 9.076783919597991e-05, - "loss": 5.7762, - "step": 9688 - }, - { - "epoch": 5.0529335071707955, - "grad_norm": 1.6024837493896484, - "learning_rate": 9.076683417085428e-05, - "loss": 5.2915, - "step": 9689 - }, - { - "epoch": 5.053455019556714, - "grad_norm": 1.3378024101257324, - "learning_rate": 9.076582914572864e-05, - "loss": 5.8693, - "step": 9690 - }, - { - "epoch": 5.0539765319426335, - "grad_norm": 1.4738073348999023, - "learning_rate": 9.076482412060302e-05, - "loss": 5.6446, - "step": 9691 - }, - { - "epoch": 5.054498044328553, - "grad_norm": 1.3401093482971191, - "learning_rate": 9.076381909547738e-05, - "loss": 5.6551, - "step": 9692 - }, - { - "epoch": 5.055019556714472, - "grad_norm": 1.4309895038604736, - "learning_rate": 9.076281407035176e-05, - "loss": 6.0019, - "step": 9693 - }, - { - "epoch": 5.055541069100391, - "grad_norm": 1.6626337766647339, - "learning_rate": 9.076180904522613e-05, - "loss": 5.2261, - "step": 9694 - }, - { - "epoch": 5.0560625814863105, - "grad_norm": 1.3755441904067993, - "learning_rate": 9.07608040201005e-05, - "loss": 5.6063, - "step": 9695 - }, - { - "epoch": 5.056584093872229, - "grad_norm": 1.4033710956573486, - "learning_rate": 9.075979899497488e-05, - "loss": 5.8309, - "step": 9696 - }, - { - "epoch": 5.0571056062581485, - "grad_norm": 1.540905475616455, - "learning_rate": 9.075879396984926e-05, - "loss": 4.8308, - "step": 9697 - }, - { - "epoch": 5.057627118644068, - "grad_norm": 1.4502434730529785, - "learning_rate": 9.075778894472362e-05, - "loss": 5.1935, - "step": 9698 - }, - { - "epoch": 5.058148631029987, - "grad_norm": 1.5704296827316284, - "learning_rate": 9.0756783919598e-05, - "loss": 5.7042, - "step": 9699 - }, - { - "epoch": 5.058670143415906, - "grad_norm": 1.5186989307403564, - "learning_rate": 9.075577889447236e-05, - "loss": 5.3473, - "step": 9700 - }, - { - "epoch": 5.0591916558018255, - "grad_norm": 1.3873562812805176, - "learning_rate": 9.075477386934674e-05, - "loss": 5.969, - "step": 9701 - }, - { - "epoch": 5.059713168187744, - "grad_norm": 1.4044115543365479, - "learning_rate": 9.075376884422111e-05, - "loss": 6.0869, - "step": 9702 - }, - { - "epoch": 5.0602346805736635, - "grad_norm": 1.7077969312667847, - "learning_rate": 9.075276381909547e-05, - "loss": 5.6628, - "step": 9703 - }, - { - "epoch": 5.060756192959583, - "grad_norm": 1.5040206909179688, - "learning_rate": 9.075175879396985e-05, - "loss": 5.7937, - "step": 9704 - }, - { - "epoch": 5.061277705345502, - "grad_norm": 1.373559594154358, - "learning_rate": 9.075075376884423e-05, - "loss": 5.6147, - "step": 9705 - }, - { - "epoch": 5.061799217731421, - "grad_norm": 1.6548230648040771, - "learning_rate": 9.07497487437186e-05, - "loss": 4.9918, - "step": 9706 - }, - { - "epoch": 5.0623207301173405, - "grad_norm": 1.4347732067108154, - "learning_rate": 9.074874371859297e-05, - "loss": 5.6706, - "step": 9707 - }, - { - "epoch": 5.062842242503259, - "grad_norm": 1.3914793729782104, - "learning_rate": 9.074773869346735e-05, - "loss": 5.8049, - "step": 9708 - }, - { - "epoch": 5.063363754889179, - "grad_norm": 1.471257209777832, - "learning_rate": 9.074673366834171e-05, - "loss": 5.2838, - "step": 9709 - }, - { - "epoch": 5.063885267275098, - "grad_norm": 1.4206657409667969, - "learning_rate": 9.074572864321609e-05, - "loss": 5.6992, - "step": 9710 - }, - { - "epoch": 5.064406779661017, - "grad_norm": 1.539571762084961, - "learning_rate": 9.074472361809045e-05, - "loss": 5.1258, - "step": 9711 - }, - { - "epoch": 5.064928292046936, - "grad_norm": 1.5601158142089844, - "learning_rate": 9.074371859296483e-05, - "loss": 5.8603, - "step": 9712 - }, - { - "epoch": 5.0654498044328555, - "grad_norm": 1.6400681734085083, - "learning_rate": 9.07427135678392e-05, - "loss": 5.3704, - "step": 9713 - }, - { - "epoch": 5.065971316818774, - "grad_norm": 1.572854995727539, - "learning_rate": 9.074170854271357e-05, - "loss": 5.7176, - "step": 9714 - }, - { - "epoch": 5.066492829204694, - "grad_norm": 1.5074845552444458, - "learning_rate": 9.074070351758794e-05, - "loss": 4.9472, - "step": 9715 - }, - { - "epoch": 5.067014341590613, - "grad_norm": 1.4631696939468384, - "learning_rate": 9.073969849246232e-05, - "loss": 5.2816, - "step": 9716 - }, - { - "epoch": 5.067535853976532, - "grad_norm": 1.378720998764038, - "learning_rate": 9.073869346733669e-05, - "loss": 5.6105, - "step": 9717 - }, - { - "epoch": 5.068057366362451, - "grad_norm": 1.5094642639160156, - "learning_rate": 9.073768844221106e-05, - "loss": 5.4617, - "step": 9718 - }, - { - "epoch": 5.0685788787483705, - "grad_norm": 1.3430471420288086, - "learning_rate": 9.073668341708544e-05, - "loss": 4.9771, - "step": 9719 - }, - { - "epoch": 5.069100391134289, - "grad_norm": 1.631402611732483, - "learning_rate": 9.07356783919598e-05, - "loss": 5.3833, - "step": 9720 - }, - { - "epoch": 5.069621903520209, - "grad_norm": 1.3520491123199463, - "learning_rate": 9.073467336683418e-05, - "loss": 5.8105, - "step": 9721 - }, - { - "epoch": 5.070143415906128, - "grad_norm": 1.4429627656936646, - "learning_rate": 9.073366834170854e-05, - "loss": 5.8603, - "step": 9722 - }, - { - "epoch": 5.070664928292047, - "grad_norm": 1.3772015571594238, - "learning_rate": 9.073266331658292e-05, - "loss": 5.8779, - "step": 9723 - }, - { - "epoch": 5.071186440677966, - "grad_norm": 1.4569953680038452, - "learning_rate": 9.073165829145728e-05, - "loss": 5.7043, - "step": 9724 - }, - { - "epoch": 5.0717079530638856, - "grad_norm": 1.454325556755066, - "learning_rate": 9.073065326633166e-05, - "loss": 5.9363, - "step": 9725 - }, - { - "epoch": 5.072229465449804, - "grad_norm": 1.496927261352539, - "learning_rate": 9.072964824120604e-05, - "loss": 5.69, - "step": 9726 - }, - { - "epoch": 5.072750977835724, - "grad_norm": 1.3535856008529663, - "learning_rate": 9.072864321608042e-05, - "loss": 5.5874, - "step": 9727 - }, - { - "epoch": 5.073272490221643, - "grad_norm": 1.4858235120773315, - "learning_rate": 9.072763819095478e-05, - "loss": 5.4547, - "step": 9728 - }, - { - "epoch": 5.073794002607562, - "grad_norm": 1.5924283266067505, - "learning_rate": 9.072663316582915e-05, - "loss": 4.9759, - "step": 9729 - }, - { - "epoch": 5.074315514993481, - "grad_norm": 1.4746376276016235, - "learning_rate": 9.072562814070352e-05, - "loss": 5.9143, - "step": 9730 - }, - { - "epoch": 5.074837027379401, - "grad_norm": 1.6171820163726807, - "learning_rate": 9.072462311557789e-05, - "loss": 5.4089, - "step": 9731 - }, - { - "epoch": 5.075358539765319, - "grad_norm": 1.360903024673462, - "learning_rate": 9.072361809045227e-05, - "loss": 5.8715, - "step": 9732 - }, - { - "epoch": 5.075880052151239, - "grad_norm": 1.4499837160110474, - "learning_rate": 9.072261306532663e-05, - "loss": 5.8175, - "step": 9733 - }, - { - "epoch": 5.076401564537158, - "grad_norm": 1.4684085845947266, - "learning_rate": 9.072160804020101e-05, - "loss": 5.8982, - "step": 9734 - }, - { - "epoch": 5.076923076923077, - "grad_norm": 1.41493558883667, - "learning_rate": 9.072060301507537e-05, - "loss": 5.872, - "step": 9735 - }, - { - "epoch": 5.077444589308996, - "grad_norm": 1.5447040796279907, - "learning_rate": 9.071959798994975e-05, - "loss": 5.7936, - "step": 9736 - }, - { - "epoch": 5.077966101694916, - "grad_norm": 1.4709868431091309, - "learning_rate": 9.071859296482413e-05, - "loss": 5.6794, - "step": 9737 - }, - { - "epoch": 5.078487614080834, - "grad_norm": 1.5439459085464478, - "learning_rate": 9.07175879396985e-05, - "loss": 5.6377, - "step": 9738 - }, - { - "epoch": 5.079009126466754, - "grad_norm": 1.4479284286499023, - "learning_rate": 9.071658291457287e-05, - "loss": 5.931, - "step": 9739 - }, - { - "epoch": 5.079530638852673, - "grad_norm": 1.6863757371902466, - "learning_rate": 9.071557788944725e-05, - "loss": 4.592, - "step": 9740 - }, - { - "epoch": 5.080052151238592, - "grad_norm": 1.4134440422058105, - "learning_rate": 9.071457286432161e-05, - "loss": 5.6923, - "step": 9741 - }, - { - "epoch": 5.080573663624511, - "grad_norm": 1.5288927555084229, - "learning_rate": 9.071356783919599e-05, - "loss": 5.9839, - "step": 9742 - }, - { - "epoch": 5.081095176010431, - "grad_norm": 1.362660527229309, - "learning_rate": 9.071256281407035e-05, - "loss": 5.6391, - "step": 9743 - }, - { - "epoch": 5.081616688396349, - "grad_norm": 1.488044261932373, - "learning_rate": 9.071155778894472e-05, - "loss": 5.5249, - "step": 9744 - }, - { - "epoch": 5.082138200782269, - "grad_norm": 1.5648248195648193, - "learning_rate": 9.07105527638191e-05, - "loss": 5.6881, - "step": 9745 - }, - { - "epoch": 5.082659713168188, - "grad_norm": 1.4697654247283936, - "learning_rate": 9.070954773869347e-05, - "loss": 5.6773, - "step": 9746 - }, - { - "epoch": 5.083181225554107, - "grad_norm": 1.4515063762664795, - "learning_rate": 9.070854271356785e-05, - "loss": 5.7245, - "step": 9747 - }, - { - "epoch": 5.083702737940026, - "grad_norm": 1.3670313358306885, - "learning_rate": 9.070753768844222e-05, - "loss": 4.9729, - "step": 9748 - }, - { - "epoch": 5.084224250325946, - "grad_norm": 1.3423715829849243, - "learning_rate": 9.07065326633166e-05, - "loss": 5.6593, - "step": 9749 - }, - { - "epoch": 5.084745762711864, - "grad_norm": 1.4635305404663086, - "learning_rate": 9.070552763819096e-05, - "loss": 5.7544, - "step": 9750 - }, - { - "epoch": 5.085267275097784, - "grad_norm": 1.501207947731018, - "learning_rate": 9.070452261306534e-05, - "loss": 5.6287, - "step": 9751 - }, - { - "epoch": 5.085788787483703, - "grad_norm": 1.4701714515686035, - "learning_rate": 9.07035175879397e-05, - "loss": 5.6029, - "step": 9752 - }, - { - "epoch": 5.086310299869622, - "grad_norm": 1.4711005687713623, - "learning_rate": 9.070251256281408e-05, - "loss": 5.4826, - "step": 9753 - }, - { - "epoch": 5.086831812255541, - "grad_norm": 1.6107921600341797, - "learning_rate": 9.070150753768844e-05, - "loss": 5.3437, - "step": 9754 - }, - { - "epoch": 5.087353324641461, - "grad_norm": 1.395771861076355, - "learning_rate": 9.070050251256282e-05, - "loss": 4.7987, - "step": 9755 - }, - { - "epoch": 5.087874837027379, - "grad_norm": 1.4143706560134888, - "learning_rate": 9.069949748743718e-05, - "loss": 5.4825, - "step": 9756 - }, - { - "epoch": 5.088396349413299, - "grad_norm": 1.3993958234786987, - "learning_rate": 9.069849246231156e-05, - "loss": 6.0312, - "step": 9757 - }, - { - "epoch": 5.088917861799218, - "grad_norm": 1.4078961610794067, - "learning_rate": 9.069748743718594e-05, - "loss": 5.5264, - "step": 9758 - }, - { - "epoch": 5.089439374185137, - "grad_norm": 1.431488275527954, - "learning_rate": 9.06964824120603e-05, - "loss": 5.1128, - "step": 9759 - }, - { - "epoch": 5.089960886571056, - "grad_norm": 1.3280187845230103, - "learning_rate": 9.069547738693468e-05, - "loss": 6.0015, - "step": 9760 - }, - { - "epoch": 5.090482398956976, - "grad_norm": 1.5541555881500244, - "learning_rate": 9.069447236180905e-05, - "loss": 5.1856, - "step": 9761 - }, - { - "epoch": 5.091003911342894, - "grad_norm": 1.4489773511886597, - "learning_rate": 9.069346733668342e-05, - "loss": 6.1718, - "step": 9762 - }, - { - "epoch": 5.091525423728814, - "grad_norm": 1.4081870317459106, - "learning_rate": 9.069246231155779e-05, - "loss": 5.868, - "step": 9763 - }, - { - "epoch": 5.092046936114733, - "grad_norm": 1.4468175172805786, - "learning_rate": 9.069145728643217e-05, - "loss": 5.421, - "step": 9764 - }, - { - "epoch": 5.092568448500652, - "grad_norm": 1.471042513847351, - "learning_rate": 9.069045226130653e-05, - "loss": 5.3036, - "step": 9765 - }, - { - "epoch": 5.093089960886571, - "grad_norm": 1.7339308261871338, - "learning_rate": 9.068944723618091e-05, - "loss": 5.3725, - "step": 9766 - }, - { - "epoch": 5.093611473272491, - "grad_norm": 1.4780833721160889, - "learning_rate": 9.068844221105529e-05, - "loss": 5.8853, - "step": 9767 - }, - { - "epoch": 5.094132985658409, - "grad_norm": 1.4603228569030762, - "learning_rate": 9.068743718592966e-05, - "loss": 5.767, - "step": 9768 - }, - { - "epoch": 5.094654498044329, - "grad_norm": 1.62308669090271, - "learning_rate": 9.068643216080403e-05, - "loss": 5.4449, - "step": 9769 - }, - { - "epoch": 5.095176010430248, - "grad_norm": 1.5132569074630737, - "learning_rate": 9.068542713567839e-05, - "loss": 5.761, - "step": 9770 - }, - { - "epoch": 5.095697522816167, - "grad_norm": 1.355875849723816, - "learning_rate": 9.068442211055277e-05, - "loss": 6.0694, - "step": 9771 - }, - { - "epoch": 5.096219035202086, - "grad_norm": 1.4979161024093628, - "learning_rate": 9.068341708542713e-05, - "loss": 5.7338, - "step": 9772 - }, - { - "epoch": 5.096740547588006, - "grad_norm": 1.5663098096847534, - "learning_rate": 9.068241206030151e-05, - "loss": 4.9289, - "step": 9773 - }, - { - "epoch": 5.097262059973924, - "grad_norm": 1.4345723390579224, - "learning_rate": 9.068140703517588e-05, - "loss": 5.3931, - "step": 9774 - }, - { - "epoch": 5.097783572359844, - "grad_norm": 1.4108630418777466, - "learning_rate": 9.068040201005025e-05, - "loss": 5.8292, - "step": 9775 - }, - { - "epoch": 5.098305084745762, - "grad_norm": 1.6083283424377441, - "learning_rate": 9.067939698492462e-05, - "loss": 5.3571, - "step": 9776 - }, - { - "epoch": 5.098826597131682, - "grad_norm": 1.486512303352356, - "learning_rate": 9.0678391959799e-05, - "loss": 5.8291, - "step": 9777 - }, - { - "epoch": 5.099348109517601, - "grad_norm": 1.3981120586395264, - "learning_rate": 9.067738693467337e-05, - "loss": 5.107, - "step": 9778 - }, - { - "epoch": 5.09986962190352, - "grad_norm": 1.4322900772094727, - "learning_rate": 9.067638190954775e-05, - "loss": 5.511, - "step": 9779 - }, - { - "epoch": 5.100391134289439, - "grad_norm": 1.3672746419906616, - "learning_rate": 9.067537688442212e-05, - "loss": 5.6516, - "step": 9780 - }, - { - "epoch": 5.100912646675359, - "grad_norm": 1.4941037893295288, - "learning_rate": 9.06743718592965e-05, - "loss": 6.0601, - "step": 9781 - }, - { - "epoch": 5.101434159061277, - "grad_norm": 1.463607907295227, - "learning_rate": 9.067336683417086e-05, - "loss": 5.7807, - "step": 9782 - }, - { - "epoch": 5.101955671447197, - "grad_norm": 1.5489896535873413, - "learning_rate": 9.067236180904522e-05, - "loss": 5.5802, - "step": 9783 - }, - { - "epoch": 5.102477183833116, - "grad_norm": 1.5914478302001953, - "learning_rate": 9.06713567839196e-05, - "loss": 5.0058, - "step": 9784 - }, - { - "epoch": 5.102998696219035, - "grad_norm": 1.4818017482757568, - "learning_rate": 9.067035175879397e-05, - "loss": 5.8193, - "step": 9785 - }, - { - "epoch": 5.103520208604954, - "grad_norm": 1.3809045553207397, - "learning_rate": 9.066934673366834e-05, - "loss": 5.6353, - "step": 9786 - }, - { - "epoch": 5.104041720990874, - "grad_norm": 1.4551916122436523, - "learning_rate": 9.066834170854272e-05, - "loss": 5.9308, - "step": 9787 - }, - { - "epoch": 5.104563233376792, - "grad_norm": 1.4862685203552246, - "learning_rate": 9.06673366834171e-05, - "loss": 5.4249, - "step": 9788 - }, - { - "epoch": 5.105084745762712, - "grad_norm": 1.3899887800216675, - "learning_rate": 9.066633165829146e-05, - "loss": 5.8669, - "step": 9789 - }, - { - "epoch": 5.105606258148631, - "grad_norm": 1.5009490251541138, - "learning_rate": 9.066532663316584e-05, - "loss": 5.7767, - "step": 9790 - }, - { - "epoch": 5.10612777053455, - "grad_norm": 1.5084518194198608, - "learning_rate": 9.06643216080402e-05, - "loss": 5.665, - "step": 9791 - }, - { - "epoch": 5.106649282920469, - "grad_norm": 1.4969829320907593, - "learning_rate": 9.066331658291458e-05, - "loss": 5.2416, - "step": 9792 - }, - { - "epoch": 5.107170795306389, - "grad_norm": 1.8450356721878052, - "learning_rate": 9.066231155778895e-05, - "loss": 5.2188, - "step": 9793 - }, - { - "epoch": 5.107692307692307, - "grad_norm": 1.4782931804656982, - "learning_rate": 9.066130653266333e-05, - "loss": 5.8377, - "step": 9794 - }, - { - "epoch": 5.108213820078227, - "grad_norm": 1.5405880212783813, - "learning_rate": 9.066030150753769e-05, - "loss": 5.2095, - "step": 9795 - }, - { - "epoch": 5.108735332464146, - "grad_norm": 1.4334425926208496, - "learning_rate": 9.065929648241205e-05, - "loss": 5.8627, - "step": 9796 - }, - { - "epoch": 5.109256844850065, - "grad_norm": 1.375910758972168, - "learning_rate": 9.065829145728643e-05, - "loss": 5.6561, - "step": 9797 - }, - { - "epoch": 5.109778357235984, - "grad_norm": 1.5919585227966309, - "learning_rate": 9.065728643216081e-05, - "loss": 5.1581, - "step": 9798 - }, - { - "epoch": 5.110299869621904, - "grad_norm": 1.446583867073059, - "learning_rate": 9.065628140703519e-05, - "loss": 5.5144, - "step": 9799 - }, - { - "epoch": 5.110821382007822, - "grad_norm": 1.5610660314559937, - "learning_rate": 9.065527638190955e-05, - "loss": 5.0388, - "step": 9800 - }, - { - "epoch": 5.111342894393742, - "grad_norm": 1.4241214990615845, - "learning_rate": 9.065427135678393e-05, - "loss": 5.1163, - "step": 9801 - }, - { - "epoch": 5.111864406779661, - "grad_norm": 1.4626935720443726, - "learning_rate": 9.06532663316583e-05, - "loss": 5.9674, - "step": 9802 - }, - { - "epoch": 5.11238591916558, - "grad_norm": 1.5444343090057373, - "learning_rate": 9.065226130653267e-05, - "loss": 5.8328, - "step": 9803 - }, - { - "epoch": 5.112907431551499, - "grad_norm": 1.5496864318847656, - "learning_rate": 9.065125628140704e-05, - "loss": 5.9242, - "step": 9804 - }, - { - "epoch": 5.113428943937419, - "grad_norm": 1.4796339273452759, - "learning_rate": 9.065025125628141e-05, - "loss": 5.872, - "step": 9805 - }, - { - "epoch": 5.113950456323337, - "grad_norm": 1.5841716527938843, - "learning_rate": 9.064924623115578e-05, - "loss": 4.9304, - "step": 9806 - }, - { - "epoch": 5.114471968709257, - "grad_norm": 1.5000571012496948, - "learning_rate": 9.064824120603016e-05, - "loss": 5.5772, - "step": 9807 - }, - { - "epoch": 5.114993481095176, - "grad_norm": 1.4948301315307617, - "learning_rate": 9.064723618090453e-05, - "loss": 5.7241, - "step": 9808 - }, - { - "epoch": 5.115514993481095, - "grad_norm": 1.3835678100585938, - "learning_rate": 9.06462311557789e-05, - "loss": 6.0186, - "step": 9809 - }, - { - "epoch": 5.116036505867014, - "grad_norm": 1.462010145187378, - "learning_rate": 9.064522613065328e-05, - "loss": 5.5947, - "step": 9810 - }, - { - "epoch": 5.116558018252934, - "grad_norm": 1.33342707157135, - "learning_rate": 9.064422110552764e-05, - "loss": 5.9445, - "step": 9811 - }, - { - "epoch": 5.117079530638852, - "grad_norm": 1.3512498140335083, - "learning_rate": 9.064321608040202e-05, - "loss": 5.8685, - "step": 9812 - }, - { - "epoch": 5.117601043024772, - "grad_norm": 1.4778683185577393, - "learning_rate": 9.064221105527638e-05, - "loss": 5.7171, - "step": 9813 - }, - { - "epoch": 5.118122555410691, - "grad_norm": 1.4596741199493408, - "learning_rate": 9.064120603015076e-05, - "loss": 5.8663, - "step": 9814 - }, - { - "epoch": 5.11864406779661, - "grad_norm": 1.5302433967590332, - "learning_rate": 9.064020100502512e-05, - "loss": 5.544, - "step": 9815 - }, - { - "epoch": 5.119165580182529, - "grad_norm": 1.4922175407409668, - "learning_rate": 9.06391959798995e-05, - "loss": 5.5738, - "step": 9816 - }, - { - "epoch": 5.119687092568449, - "grad_norm": 1.572393536567688, - "learning_rate": 9.063819095477387e-05, - "loss": 5.6146, - "step": 9817 - }, - { - "epoch": 5.120208604954367, - "grad_norm": 1.3849321603775024, - "learning_rate": 9.063718592964824e-05, - "loss": 5.56, - "step": 9818 - }, - { - "epoch": 5.120730117340287, - "grad_norm": 1.4509801864624023, - "learning_rate": 9.063618090452262e-05, - "loss": 5.5305, - "step": 9819 - }, - { - "epoch": 5.121251629726206, - "grad_norm": 1.4161043167114258, - "learning_rate": 9.0635175879397e-05, - "loss": 5.737, - "step": 9820 - }, - { - "epoch": 5.121773142112125, - "grad_norm": 1.4041106700897217, - "learning_rate": 9.063417085427136e-05, - "loss": 5.5359, - "step": 9821 - }, - { - "epoch": 5.122294654498044, - "grad_norm": 1.411484718322754, - "learning_rate": 9.063316582914573e-05, - "loss": 5.5164, - "step": 9822 - }, - { - "epoch": 5.122816166883964, - "grad_norm": 1.4705920219421387, - "learning_rate": 9.06321608040201e-05, - "loss": 5.7782, - "step": 9823 - }, - { - "epoch": 5.123337679269882, - "grad_norm": 1.74918794631958, - "learning_rate": 9.063115577889447e-05, - "loss": 4.8537, - "step": 9824 - }, - { - "epoch": 5.123859191655802, - "grad_norm": 1.4819347858428955, - "learning_rate": 9.063015075376885e-05, - "loss": 5.8837, - "step": 9825 - }, - { - "epoch": 5.124380704041721, - "grad_norm": 1.3932563066482544, - "learning_rate": 9.062914572864321e-05, - "loss": 5.4595, - "step": 9826 - }, - { - "epoch": 5.12490221642764, - "grad_norm": 1.4674808979034424, - "learning_rate": 9.062814070351759e-05, - "loss": 5.9844, - "step": 9827 - }, - { - "epoch": 5.125423728813559, - "grad_norm": 1.5516093969345093, - "learning_rate": 9.062713567839195e-05, - "loss": 5.268, - "step": 9828 - }, - { - "epoch": 5.125945241199479, - "grad_norm": 1.5034351348876953, - "learning_rate": 9.062613065326633e-05, - "loss": 5.4152, - "step": 9829 - }, - { - "epoch": 5.126466753585397, - "grad_norm": 1.4039462804794312, - "learning_rate": 9.062512562814071e-05, - "loss": 5.773, - "step": 9830 - }, - { - "epoch": 5.126988265971317, - "grad_norm": 1.5145410299301147, - "learning_rate": 9.062412060301509e-05, - "loss": 5.9328, - "step": 9831 - }, - { - "epoch": 5.127509778357236, - "grad_norm": 1.4692150354385376, - "learning_rate": 9.062311557788945e-05, - "loss": 5.7311, - "step": 9832 - }, - { - "epoch": 5.128031290743155, - "grad_norm": 1.4548693895339966, - "learning_rate": 9.062211055276383e-05, - "loss": 5.6759, - "step": 9833 - }, - { - "epoch": 5.128552803129074, - "grad_norm": 1.3963068723678589, - "learning_rate": 9.06211055276382e-05, - "loss": 5.56, - "step": 9834 - }, - { - "epoch": 5.129074315514994, - "grad_norm": 1.5062861442565918, - "learning_rate": 9.062010050251257e-05, - "loss": 5.3152, - "step": 9835 - }, - { - "epoch": 5.129595827900912, - "grad_norm": 1.4415667057037354, - "learning_rate": 9.061909547738694e-05, - "loss": 5.9564, - "step": 9836 - }, - { - "epoch": 5.130117340286832, - "grad_norm": 1.4332231283187866, - "learning_rate": 9.06180904522613e-05, - "loss": 6.0575, - "step": 9837 - }, - { - "epoch": 5.130638852672751, - "grad_norm": 1.7090414762496948, - "learning_rate": 9.061708542713568e-05, - "loss": 4.7546, - "step": 9838 - }, - { - "epoch": 5.13116036505867, - "grad_norm": 1.496900200843811, - "learning_rate": 9.061608040201006e-05, - "loss": 5.7875, - "step": 9839 - }, - { - "epoch": 5.131681877444589, - "grad_norm": 1.3233397006988525, - "learning_rate": 9.061507537688443e-05, - "loss": 5.5907, - "step": 9840 - }, - { - "epoch": 5.132203389830509, - "grad_norm": 1.2502939701080322, - "learning_rate": 9.06140703517588e-05, - "loss": 6.0124, - "step": 9841 - }, - { - "epoch": 5.132724902216427, - "grad_norm": 1.5362752676010132, - "learning_rate": 9.061306532663318e-05, - "loss": 4.901, - "step": 9842 - }, - { - "epoch": 5.133246414602347, - "grad_norm": 1.4712611436843872, - "learning_rate": 9.061206030150754e-05, - "loss": 5.9767, - "step": 9843 - }, - { - "epoch": 5.133767926988266, - "grad_norm": 1.4584966897964478, - "learning_rate": 9.061105527638192e-05, - "loss": 6.128, - "step": 9844 - }, - { - "epoch": 5.134289439374185, - "grad_norm": 1.527664065361023, - "learning_rate": 9.061005025125628e-05, - "loss": 5.3689, - "step": 9845 - }, - { - "epoch": 5.134810951760104, - "grad_norm": 1.400612711906433, - "learning_rate": 9.060904522613066e-05, - "loss": 5.9846, - "step": 9846 - }, - { - "epoch": 5.135332464146024, - "grad_norm": 1.4903448820114136, - "learning_rate": 9.060804020100502e-05, - "loss": 5.9402, - "step": 9847 - }, - { - "epoch": 5.135853976531942, - "grad_norm": 1.4752806425094604, - "learning_rate": 9.06070351758794e-05, - "loss": 4.8726, - "step": 9848 - }, - { - "epoch": 5.136375488917862, - "grad_norm": 1.3786475658416748, - "learning_rate": 9.060603015075377e-05, - "loss": 5.896, - "step": 9849 - }, - { - "epoch": 5.136897001303781, - "grad_norm": 1.4106947183609009, - "learning_rate": 9.060502512562814e-05, - "loss": 6.1492, - "step": 9850 - }, - { - "epoch": 5.1374185136897, - "grad_norm": 1.6404285430908203, - "learning_rate": 9.060402010050252e-05, - "loss": 4.8631, - "step": 9851 - }, - { - "epoch": 5.137940026075619, - "grad_norm": 1.4154287576675415, - "learning_rate": 9.060301507537689e-05, - "loss": 6.0008, - "step": 9852 - }, - { - "epoch": 5.138461538461539, - "grad_norm": 1.669723391532898, - "learning_rate": 9.060201005025126e-05, - "loss": 5.3895, - "step": 9853 - }, - { - "epoch": 5.138983050847457, - "grad_norm": 1.3670129776000977, - "learning_rate": 9.060100502512563e-05, - "loss": 5.7036, - "step": 9854 - }, - { - "epoch": 5.139504563233377, - "grad_norm": 1.483460783958435, - "learning_rate": 9.06e-05, - "loss": 5.6754, - "step": 9855 - }, - { - "epoch": 5.140026075619296, - "grad_norm": 1.4509013891220093, - "learning_rate": 9.059899497487437e-05, - "loss": 5.7762, - "step": 9856 - }, - { - "epoch": 5.140547588005215, - "grad_norm": 1.6064743995666504, - "learning_rate": 9.059798994974875e-05, - "loss": 5.2685, - "step": 9857 - }, - { - "epoch": 5.141069100391134, - "grad_norm": 1.64907705783844, - "learning_rate": 9.059698492462311e-05, - "loss": 5.6139, - "step": 9858 - }, - { - "epoch": 5.141590612777054, - "grad_norm": 1.5877978801727295, - "learning_rate": 9.059597989949749e-05, - "loss": 5.6623, - "step": 9859 - }, - { - "epoch": 5.1421121251629724, - "grad_norm": 1.5105050802230835, - "learning_rate": 9.059497487437187e-05, - "loss": 5.3956, - "step": 9860 - }, - { - "epoch": 5.142633637548892, - "grad_norm": 1.4230408668518066, - "learning_rate": 9.059396984924625e-05, - "loss": 5.6681, - "step": 9861 - }, - { - "epoch": 5.143155149934811, - "grad_norm": 1.427879810333252, - "learning_rate": 9.059296482412061e-05, - "loss": 5.8953, - "step": 9862 - }, - { - "epoch": 5.14367666232073, - "grad_norm": 1.5457682609558105, - "learning_rate": 9.059195979899498e-05, - "loss": 5.3973, - "step": 9863 - }, - { - "epoch": 5.144198174706649, - "grad_norm": 1.5197532176971436, - "learning_rate": 9.059095477386935e-05, - "loss": 5.2124, - "step": 9864 - }, - { - "epoch": 5.144719687092568, - "grad_norm": 1.5479347705841064, - "learning_rate": 9.058994974874372e-05, - "loss": 5.8841, - "step": 9865 - }, - { - "epoch": 5.1452411994784875, - "grad_norm": 1.4521557092666626, - "learning_rate": 9.05889447236181e-05, - "loss": 5.8088, - "step": 9866 - }, - { - "epoch": 5.145762711864407, - "grad_norm": 1.4345005750656128, - "learning_rate": 9.058793969849246e-05, - "loss": 6.0466, - "step": 9867 - }, - { - "epoch": 5.146284224250326, - "grad_norm": 1.5639299154281616, - "learning_rate": 9.058693467336684e-05, - "loss": 4.9669, - "step": 9868 - }, - { - "epoch": 5.146805736636245, - "grad_norm": 1.5659247636795044, - "learning_rate": 9.05859296482412e-05, - "loss": 5.9409, - "step": 9869 - }, - { - "epoch": 5.147327249022164, - "grad_norm": 1.532589316368103, - "learning_rate": 9.058492462311558e-05, - "loss": 5.6503, - "step": 9870 - }, - { - "epoch": 5.147848761408083, - "grad_norm": 1.4843394756317139, - "learning_rate": 9.058391959798996e-05, - "loss": 5.1897, - "step": 9871 - }, - { - "epoch": 5.1483702737940025, - "grad_norm": 1.5393644571304321, - "learning_rate": 9.058291457286433e-05, - "loss": 5.5436, - "step": 9872 - }, - { - "epoch": 5.148891786179922, - "grad_norm": 1.4026321172714233, - "learning_rate": 9.05819095477387e-05, - "loss": 5.84, - "step": 9873 - }, - { - "epoch": 5.1494132985658405, - "grad_norm": 1.415183186531067, - "learning_rate": 9.058090452261308e-05, - "loss": 6.0467, - "step": 9874 - }, - { - "epoch": 5.14993481095176, - "grad_norm": 1.534190058708191, - "learning_rate": 9.057989949748744e-05, - "loss": 5.3389, - "step": 9875 - }, - { - "epoch": 5.150456323337679, - "grad_norm": 1.4060906171798706, - "learning_rate": 9.05788944723618e-05, - "loss": 5.6426, - "step": 9876 - }, - { - "epoch": 5.150977835723598, - "grad_norm": 1.5965155363082886, - "learning_rate": 9.057788944723618e-05, - "loss": 5.6661, - "step": 9877 - }, - { - "epoch": 5.1514993481095175, - "grad_norm": 1.67709481716156, - "learning_rate": 9.057688442211055e-05, - "loss": 5.5397, - "step": 9878 - }, - { - "epoch": 5.152020860495437, - "grad_norm": 1.57057785987854, - "learning_rate": 9.057587939698493e-05, - "loss": 5.6929, - "step": 9879 - }, - { - "epoch": 5.1525423728813555, - "grad_norm": 1.5763025283813477, - "learning_rate": 9.05748743718593e-05, - "loss": 5.6596, - "step": 9880 - }, - { - "epoch": 5.153063885267275, - "grad_norm": 1.4687844514846802, - "learning_rate": 9.057386934673368e-05, - "loss": 5.9008, - "step": 9881 - }, - { - "epoch": 5.1535853976531945, - "grad_norm": 1.6843392848968506, - "learning_rate": 9.057286432160805e-05, - "loss": 5.3926, - "step": 9882 - }, - { - "epoch": 5.154106910039113, - "grad_norm": 1.5013320446014404, - "learning_rate": 9.057185929648242e-05, - "loss": 5.5651, - "step": 9883 - }, - { - "epoch": 5.1546284224250325, - "grad_norm": 1.4606292247772217, - "learning_rate": 9.057085427135679e-05, - "loss": 5.5041, - "step": 9884 - }, - { - "epoch": 5.155149934810952, - "grad_norm": 1.3849786520004272, - "learning_rate": 9.056984924623117e-05, - "loss": 6.0217, - "step": 9885 - }, - { - "epoch": 5.1556714471968705, - "grad_norm": 1.420807123184204, - "learning_rate": 9.056884422110553e-05, - "loss": 5.3309, - "step": 9886 - }, - { - "epoch": 5.15619295958279, - "grad_norm": 1.4809677600860596, - "learning_rate": 9.056783919597991e-05, - "loss": 5.4858, - "step": 9887 - }, - { - "epoch": 5.1567144719687095, - "grad_norm": 1.3820477724075317, - "learning_rate": 9.056683417085427e-05, - "loss": 5.9631, - "step": 9888 - }, - { - "epoch": 5.157235984354628, - "grad_norm": 1.4236769676208496, - "learning_rate": 9.056582914572864e-05, - "loss": 5.6739, - "step": 9889 - }, - { - "epoch": 5.1577574967405475, - "grad_norm": 1.5265676975250244, - "learning_rate": 9.056482412060301e-05, - "loss": 5.0209, - "step": 9890 - }, - { - "epoch": 5.158279009126467, - "grad_norm": 1.4566926956176758, - "learning_rate": 9.056381909547739e-05, - "loss": 5.1486, - "step": 9891 - }, - { - "epoch": 5.1588005215123856, - "grad_norm": 1.5168931484222412, - "learning_rate": 9.056281407035177e-05, - "loss": 5.4519, - "step": 9892 - }, - { - "epoch": 5.159322033898305, - "grad_norm": 1.403723120689392, - "learning_rate": 9.056180904522613e-05, - "loss": 5.9349, - "step": 9893 - }, - { - "epoch": 5.1598435462842245, - "grad_norm": 1.4123072624206543, - "learning_rate": 9.056080402010051e-05, - "loss": 5.8165, - "step": 9894 - }, - { - "epoch": 5.160365058670143, - "grad_norm": 1.542697548866272, - "learning_rate": 9.055979899497488e-05, - "loss": 5.0845, - "step": 9895 - }, - { - "epoch": 5.1608865710560625, - "grad_norm": 1.4831340312957764, - "learning_rate": 9.055879396984925e-05, - "loss": 5.823, - "step": 9896 - }, - { - "epoch": 5.161408083441982, - "grad_norm": 1.4721499681472778, - "learning_rate": 9.055778894472362e-05, - "loss": 5.5354, - "step": 9897 - }, - { - "epoch": 5.161929595827901, - "grad_norm": 1.696143627166748, - "learning_rate": 9.0556783919598e-05, - "loss": 5.8719, - "step": 9898 - }, - { - "epoch": 5.16245110821382, - "grad_norm": 1.5072728395462036, - "learning_rate": 9.055577889447236e-05, - "loss": 5.6589, - "step": 9899 - }, - { - "epoch": 5.1629726205997395, - "grad_norm": 1.4187848567962646, - "learning_rate": 9.055477386934674e-05, - "loss": 5.8588, - "step": 9900 - }, - { - "epoch": 5.163494132985658, - "grad_norm": 1.558765172958374, - "learning_rate": 9.055376884422112e-05, - "loss": 5.344, - "step": 9901 - }, - { - "epoch": 5.1640156453715775, - "grad_norm": 1.5710194110870361, - "learning_rate": 9.055276381909548e-05, - "loss": 5.7589, - "step": 9902 - }, - { - "epoch": 5.164537157757497, - "grad_norm": 1.685858130455017, - "learning_rate": 9.055175879396986e-05, - "loss": 5.3133, - "step": 9903 - }, - { - "epoch": 5.165058670143416, - "grad_norm": 1.626330018043518, - "learning_rate": 9.055075376884422e-05, - "loss": 5.1475, - "step": 9904 - }, - { - "epoch": 5.165580182529335, - "grad_norm": 1.401828646659851, - "learning_rate": 9.05497487437186e-05, - "loss": 5.6071, - "step": 9905 - }, - { - "epoch": 5.1661016949152545, - "grad_norm": 1.4693831205368042, - "learning_rate": 9.054874371859296e-05, - "loss": 5.7467, - "step": 9906 - }, - { - "epoch": 5.166623207301173, - "grad_norm": 1.4842503070831299, - "learning_rate": 9.054773869346734e-05, - "loss": 5.8732, - "step": 9907 - }, - { - "epoch": 5.1671447196870925, - "grad_norm": 1.4684351682662964, - "learning_rate": 9.05467336683417e-05, - "loss": 5.9743, - "step": 9908 - }, - { - "epoch": 5.167666232073012, - "grad_norm": 1.3606348037719727, - "learning_rate": 9.054572864321608e-05, - "loss": 5.8519, - "step": 9909 - }, - { - "epoch": 5.168187744458931, - "grad_norm": 1.523033618927002, - "learning_rate": 9.054472361809045e-05, - "loss": 5.7302, - "step": 9910 - }, - { - "epoch": 5.16870925684485, - "grad_norm": 1.637416958808899, - "learning_rate": 9.054371859296483e-05, - "loss": 5.4082, - "step": 9911 - }, - { - "epoch": 5.1692307692307695, - "grad_norm": 1.3497196435928345, - "learning_rate": 9.05427135678392e-05, - "loss": 5.9323, - "step": 9912 - }, - { - "epoch": 5.169752281616688, - "grad_norm": 1.6495516300201416, - "learning_rate": 9.054170854271358e-05, - "loss": 5.5983, - "step": 9913 - }, - { - "epoch": 5.170273794002608, - "grad_norm": 1.4929183721542358, - "learning_rate": 9.054070351758795e-05, - "loss": 5.4216, - "step": 9914 - }, - { - "epoch": 5.170795306388527, - "grad_norm": 1.592025637626648, - "learning_rate": 9.053969849246231e-05, - "loss": 5.7795, - "step": 9915 - }, - { - "epoch": 5.171316818774446, - "grad_norm": 1.4331640005111694, - "learning_rate": 9.053869346733669e-05, - "loss": 5.8521, - "step": 9916 - }, - { - "epoch": 5.171838331160365, - "grad_norm": 1.3735859394073486, - "learning_rate": 9.053768844221105e-05, - "loss": 6.1483, - "step": 9917 - }, - { - "epoch": 5.1723598435462845, - "grad_norm": 1.6215449571609497, - "learning_rate": 9.053668341708543e-05, - "loss": 5.2722, - "step": 9918 - }, - { - "epoch": 5.172881355932203, - "grad_norm": 1.5381478071212769, - "learning_rate": 9.05356783919598e-05, - "loss": 5.4203, - "step": 9919 - }, - { - "epoch": 5.173402868318123, - "grad_norm": 1.3846725225448608, - "learning_rate": 9.053467336683417e-05, - "loss": 5.8898, - "step": 9920 - }, - { - "epoch": 5.173924380704042, - "grad_norm": 1.4400655031204224, - "learning_rate": 9.053366834170855e-05, - "loss": 5.7759, - "step": 9921 - }, - { - "epoch": 5.174445893089961, - "grad_norm": 1.5162898302078247, - "learning_rate": 9.053266331658293e-05, - "loss": 5.8293, - "step": 9922 - }, - { - "epoch": 5.17496740547588, - "grad_norm": 1.3530471324920654, - "learning_rate": 9.053165829145729e-05, - "loss": 6.0285, - "step": 9923 - }, - { - "epoch": 5.1754889178617995, - "grad_norm": 1.497591257095337, - "learning_rate": 9.053065326633167e-05, - "loss": 5.6772, - "step": 9924 - }, - { - "epoch": 5.176010430247718, - "grad_norm": 1.293992042541504, - "learning_rate": 9.052964824120603e-05, - "loss": 5.6945, - "step": 9925 - }, - { - "epoch": 5.176531942633638, - "grad_norm": 1.3455917835235596, - "learning_rate": 9.052864321608041e-05, - "loss": 5.9888, - "step": 9926 - }, - { - "epoch": 5.177053455019557, - "grad_norm": 1.4572997093200684, - "learning_rate": 9.052763819095478e-05, - "loss": 5.8011, - "step": 9927 - }, - { - "epoch": 5.177574967405476, - "grad_norm": 1.5736212730407715, - "learning_rate": 9.052663316582915e-05, - "loss": 5.276, - "step": 9928 - }, - { - "epoch": 5.178096479791395, - "grad_norm": 1.4594172239303589, - "learning_rate": 9.052562814070352e-05, - "loss": 5.5647, - "step": 9929 - }, - { - "epoch": 5.1786179921773146, - "grad_norm": 1.3668320178985596, - "learning_rate": 9.052462311557788e-05, - "loss": 5.0599, - "step": 9930 - }, - { - "epoch": 5.179139504563233, - "grad_norm": 1.4575880765914917, - "learning_rate": 9.052361809045226e-05, - "loss": 5.5572, - "step": 9931 - }, - { - "epoch": 5.179661016949153, - "grad_norm": 1.5186054706573486, - "learning_rate": 9.052261306532664e-05, - "loss": 4.8642, - "step": 9932 - }, - { - "epoch": 5.180182529335072, - "grad_norm": 1.4596121311187744, - "learning_rate": 9.052160804020102e-05, - "loss": 5.7415, - "step": 9933 - }, - { - "epoch": 5.180704041720991, - "grad_norm": 1.5136321783065796, - "learning_rate": 9.052060301507538e-05, - "loss": 5.6622, - "step": 9934 - }, - { - "epoch": 5.18122555410691, - "grad_norm": 1.470464825630188, - "learning_rate": 9.051959798994976e-05, - "loss": 6.0204, - "step": 9935 - }, - { - "epoch": 5.18174706649283, - "grad_norm": 1.466240406036377, - "learning_rate": 9.051859296482412e-05, - "loss": 5.9422, - "step": 9936 - }, - { - "epoch": 5.182268578878748, - "grad_norm": 1.3966820240020752, - "learning_rate": 9.05175879396985e-05, - "loss": 5.5859, - "step": 9937 - }, - { - "epoch": 5.182790091264668, - "grad_norm": 1.437476396560669, - "learning_rate": 9.051658291457286e-05, - "loss": 5.8368, - "step": 9938 - }, - { - "epoch": 5.183311603650587, - "grad_norm": 1.4285919666290283, - "learning_rate": 9.051557788944724e-05, - "loss": 5.9559, - "step": 9939 - }, - { - "epoch": 5.183833116036506, - "grad_norm": 1.519158124923706, - "learning_rate": 9.051457286432161e-05, - "loss": 5.531, - "step": 9940 - }, - { - "epoch": 5.184354628422425, - "grad_norm": 1.4807437658309937, - "learning_rate": 9.051356783919598e-05, - "loss": 5.4824, - "step": 9941 - }, - { - "epoch": 5.184876140808345, - "grad_norm": 1.4306085109710693, - "learning_rate": 9.051256281407036e-05, - "loss": 5.6454, - "step": 9942 - }, - { - "epoch": 5.185397653194263, - "grad_norm": 1.3726962804794312, - "learning_rate": 9.051155778894473e-05, - "loss": 5.8071, - "step": 9943 - }, - { - "epoch": 5.185919165580183, - "grad_norm": 1.9009023904800415, - "learning_rate": 9.05105527638191e-05, - "loss": 5.1974, - "step": 9944 - }, - { - "epoch": 5.186440677966102, - "grad_norm": 1.511934518814087, - "learning_rate": 9.050954773869347e-05, - "loss": 5.6521, - "step": 9945 - }, - { - "epoch": 5.186962190352021, - "grad_norm": 1.5876998901367188, - "learning_rate": 9.050854271356785e-05, - "loss": 5.5606, - "step": 9946 - }, - { - "epoch": 5.18748370273794, - "grad_norm": 1.417900800704956, - "learning_rate": 9.050753768844221e-05, - "loss": 5.8679, - "step": 9947 - }, - { - "epoch": 5.18800521512386, - "grad_norm": 1.4556668996810913, - "learning_rate": 9.050653266331659e-05, - "loss": 5.6389, - "step": 9948 - }, - { - "epoch": 5.188526727509778, - "grad_norm": 1.4087109565734863, - "learning_rate": 9.050552763819095e-05, - "loss": 5.5398, - "step": 9949 - }, - { - "epoch": 5.189048239895698, - "grad_norm": 1.4323667287826538, - "learning_rate": 9.050452261306533e-05, - "loss": 5.6941, - "step": 9950 - }, - { - "epoch": 5.189569752281617, - "grad_norm": 1.505454421043396, - "learning_rate": 9.05035175879397e-05, - "loss": 5.9104, - "step": 9951 - }, - { - "epoch": 5.190091264667536, - "grad_norm": 1.33050537109375, - "learning_rate": 9.050251256281407e-05, - "loss": 6.2153, - "step": 9952 - }, - { - "epoch": 5.190612777053455, - "grad_norm": 1.4077614545822144, - "learning_rate": 9.050150753768845e-05, - "loss": 5.3064, - "step": 9953 - }, - { - "epoch": 5.191134289439375, - "grad_norm": 1.4086315631866455, - "learning_rate": 9.050050251256283e-05, - "loss": 5.5545, - "step": 9954 - }, - { - "epoch": 5.191655801825293, - "grad_norm": 1.5410484075546265, - "learning_rate": 9.049949748743719e-05, - "loss": 5.6678, - "step": 9955 - }, - { - "epoch": 5.192177314211213, - "grad_norm": 1.4053854942321777, - "learning_rate": 9.049849246231156e-05, - "loss": 5.9258, - "step": 9956 - }, - { - "epoch": 5.192698826597132, - "grad_norm": 1.503434658050537, - "learning_rate": 9.049748743718594e-05, - "loss": 5.7043, - "step": 9957 - }, - { - "epoch": 5.193220338983051, - "grad_norm": 1.5614920854568481, - "learning_rate": 9.04964824120603e-05, - "loss": 5.3317, - "step": 9958 - }, - { - "epoch": 5.19374185136897, - "grad_norm": 1.7876731157302856, - "learning_rate": 9.049547738693468e-05, - "loss": 5.058, - "step": 9959 - }, - { - "epoch": 5.194263363754889, - "grad_norm": 1.5546025037765503, - "learning_rate": 9.049447236180904e-05, - "loss": 5.4056, - "step": 9960 - }, - { - "epoch": 5.194784876140808, - "grad_norm": 1.4160833358764648, - "learning_rate": 9.049346733668342e-05, - "loss": 5.8904, - "step": 9961 - }, - { - "epoch": 5.195306388526728, - "grad_norm": 1.4793853759765625, - "learning_rate": 9.04924623115578e-05, - "loss": 5.1416, - "step": 9962 - }, - { - "epoch": 5.195827900912647, - "grad_norm": 1.484444499015808, - "learning_rate": 9.049145728643218e-05, - "loss": 5.9224, - "step": 9963 - }, - { - "epoch": 5.196349413298566, - "grad_norm": 1.4560766220092773, - "learning_rate": 9.049045226130654e-05, - "loss": 5.6225, - "step": 9964 - }, - { - "epoch": 5.196870925684485, - "grad_norm": 1.7138055562973022, - "learning_rate": 9.048944723618092e-05, - "loss": 5.0761, - "step": 9965 - }, - { - "epoch": 5.197392438070404, - "grad_norm": 1.4021923542022705, - "learning_rate": 9.048844221105528e-05, - "loss": 6.0894, - "step": 9966 - }, - { - "epoch": 5.197913950456323, - "grad_norm": 1.4645551443099976, - "learning_rate": 9.048743718592966e-05, - "loss": 5.8846, - "step": 9967 - }, - { - "epoch": 5.198435462842243, - "grad_norm": 1.411424160003662, - "learning_rate": 9.048643216080402e-05, - "loss": 5.6751, - "step": 9968 - }, - { - "epoch": 5.198956975228161, - "grad_norm": 1.5094726085662842, - "learning_rate": 9.048542713567839e-05, - "loss": 5.7017, - "step": 9969 - }, - { - "epoch": 5.199478487614081, - "grad_norm": 1.5539816617965698, - "learning_rate": 9.048442211055277e-05, - "loss": 5.035, - "step": 9970 - }, - { - "epoch": 5.2, - "grad_norm": 1.4314384460449219, - "learning_rate": 9.048341708542713e-05, - "loss": 5.4059, - "step": 9971 - }, - { - "epoch": 5.200521512385919, - "grad_norm": 2.4178171157836914, - "learning_rate": 9.048241206030151e-05, - "loss": 5.4255, - "step": 9972 - }, - { - "epoch": 5.201043024771838, - "grad_norm": 1.4391679763793945, - "learning_rate": 9.048140703517589e-05, - "loss": 5.6224, - "step": 9973 - }, - { - "epoch": 5.201564537157758, - "grad_norm": 1.3902226686477661, - "learning_rate": 9.048040201005026e-05, - "loss": 5.8473, - "step": 9974 - }, - { - "epoch": 5.202086049543676, - "grad_norm": 1.543908715248108, - "learning_rate": 9.047939698492463e-05, - "loss": 5.5054, - "step": 9975 - }, - { - "epoch": 5.202607561929596, - "grad_norm": 1.5774601697921753, - "learning_rate": 9.0478391959799e-05, - "loss": 5.632, - "step": 9976 - }, - { - "epoch": 5.203129074315515, - "grad_norm": 1.4888261556625366, - "learning_rate": 9.047738693467337e-05, - "loss": 5.4613, - "step": 9977 - }, - { - "epoch": 5.203650586701434, - "grad_norm": 1.4399272203445435, - "learning_rate": 9.047638190954775e-05, - "loss": 5.4807, - "step": 9978 - }, - { - "epoch": 5.204172099087353, - "grad_norm": 1.4446275234222412, - "learning_rate": 9.047537688442211e-05, - "loss": 5.5286, - "step": 9979 - }, - { - "epoch": 5.204693611473273, - "grad_norm": 1.481410264968872, - "learning_rate": 9.047437185929649e-05, - "loss": 5.6388, - "step": 9980 - }, - { - "epoch": 5.205215123859191, - "grad_norm": 1.4537277221679688, - "learning_rate": 9.047336683417085e-05, - "loss": 5.7902, - "step": 9981 - }, - { - "epoch": 5.205736636245111, - "grad_norm": 1.4115307331085205, - "learning_rate": 9.047236180904523e-05, - "loss": 5.7082, - "step": 9982 - }, - { - "epoch": 5.20625814863103, - "grad_norm": 1.5536311864852905, - "learning_rate": 9.047135678391961e-05, - "loss": 4.8692, - "step": 9983 - }, - { - "epoch": 5.206779661016949, - "grad_norm": 1.576254963874817, - "learning_rate": 9.047035175879397e-05, - "loss": 5.6896, - "step": 9984 - }, - { - "epoch": 5.207301173402868, - "grad_norm": 1.5688122510910034, - "learning_rate": 9.046934673366835e-05, - "loss": 5.6857, - "step": 9985 - }, - { - "epoch": 5.207822685788788, - "grad_norm": 1.4543150663375854, - "learning_rate": 9.046834170854272e-05, - "loss": 5.583, - "step": 9986 - }, - { - "epoch": 5.208344198174706, - "grad_norm": 1.409266710281372, - "learning_rate": 9.04673366834171e-05, - "loss": 5.7489, - "step": 9987 - }, - { - "epoch": 5.208865710560626, - "grad_norm": 1.4648630619049072, - "learning_rate": 9.046633165829146e-05, - "loss": 5.6368, - "step": 9988 - }, - { - "epoch": 5.209387222946545, - "grad_norm": 1.4539110660552979, - "learning_rate": 9.046532663316584e-05, - "loss": 5.7356, - "step": 9989 - }, - { - "epoch": 5.209908735332464, - "grad_norm": 1.6872522830963135, - "learning_rate": 9.04643216080402e-05, - "loss": 5.0475, - "step": 9990 - }, - { - "epoch": 5.210430247718383, - "grad_norm": 1.5245758295059204, - "learning_rate": 9.046331658291458e-05, - "loss": 5.6195, - "step": 9991 - }, - { - "epoch": 5.210951760104303, - "grad_norm": 1.443320393562317, - "learning_rate": 9.046231155778894e-05, - "loss": 5.3965, - "step": 9992 - }, - { - "epoch": 5.211473272490221, - "grad_norm": 1.4111230373382568, - "learning_rate": 9.046130653266332e-05, - "loss": 5.5074, - "step": 9993 - }, - { - "epoch": 5.211994784876141, - "grad_norm": 1.4921717643737793, - "learning_rate": 9.04603015075377e-05, - "loss": 5.2305, - "step": 9994 - }, - { - "epoch": 5.21251629726206, - "grad_norm": 1.4672561883926392, - "learning_rate": 9.045929648241206e-05, - "loss": 5.6841, - "step": 9995 - }, - { - "epoch": 5.213037809647979, - "grad_norm": 1.5562853813171387, - "learning_rate": 9.045829145728644e-05, - "loss": 5.1881, - "step": 9996 - }, - { - "epoch": 5.213559322033898, - "grad_norm": 1.4447107315063477, - "learning_rate": 9.04572864321608e-05, - "loss": 5.8668, - "step": 9997 - }, - { - "epoch": 5.214080834419818, - "grad_norm": 1.523319125175476, - "learning_rate": 9.045628140703518e-05, - "loss": 5.4494, - "step": 9998 - }, - { - "epoch": 5.214602346805736, - "grad_norm": 1.4796721935272217, - "learning_rate": 9.045527638190955e-05, - "loss": 5.9487, - "step": 9999 - }, - { - "epoch": 5.215123859191656, - "grad_norm": 1.4420745372772217, - "learning_rate": 9.045427135678392e-05, - "loss": 5.3726, - "step": 10000 - }, - { - "epoch": 5.215123859191656, - "eval_loss": 5.693417549133301, - "eval_runtime": 42.6818, - "eval_samples_per_second": 28.724, - "eval_steps_per_second": 3.608, - "step": 10000 - }, - { - "epoch": 5.215645371577575, - "grad_norm": 1.4717859029769897, - "learning_rate": 9.045326633165829e-05, - "loss": 5.9161, - "step": 10001 - }, - { - "epoch": 5.216166883963494, - "grad_norm": 1.5446093082427979, - "learning_rate": 9.045226130653267e-05, - "loss": 5.8622, - "step": 10002 - }, - { - "epoch": 5.216688396349413, - "grad_norm": 1.3811769485473633, - "learning_rate": 9.045125628140703e-05, - "loss": 5.8583, - "step": 10003 - }, - { - "epoch": 5.217209908735333, - "grad_norm": 1.4074715375900269, - "learning_rate": 9.045025125628141e-05, - "loss": 5.6315, - "step": 10004 - }, - { - "epoch": 5.217731421121251, - "grad_norm": 1.37552011013031, - "learning_rate": 9.044924623115579e-05, - "loss": 5.5401, - "step": 10005 - }, - { - "epoch": 5.218252933507171, - "grad_norm": 1.5397658348083496, - "learning_rate": 9.044824120603016e-05, - "loss": 5.4292, - "step": 10006 - }, - { - "epoch": 5.21877444589309, - "grad_norm": 1.4655550718307495, - "learning_rate": 9.044723618090453e-05, - "loss": 5.3384, - "step": 10007 - }, - { - "epoch": 5.219295958279009, - "grad_norm": 1.4058303833007812, - "learning_rate": 9.044623115577889e-05, - "loss": 5.9096, - "step": 10008 - }, - { - "epoch": 5.219817470664928, - "grad_norm": 1.471301555633545, - "learning_rate": 9.044522613065327e-05, - "loss": 5.9044, - "step": 10009 - }, - { - "epoch": 5.220338983050848, - "grad_norm": 1.3151054382324219, - "learning_rate": 9.044422110552763e-05, - "loss": 5.7608, - "step": 10010 - }, - { - "epoch": 5.220860495436766, - "grad_norm": 1.332016944885254, - "learning_rate": 9.044321608040201e-05, - "loss": 5.9322, - "step": 10011 - }, - { - "epoch": 5.221382007822686, - "grad_norm": 1.55919349193573, - "learning_rate": 9.044221105527638e-05, - "loss": 5.4266, - "step": 10012 - }, - { - "epoch": 5.221903520208605, - "grad_norm": 1.512215256690979, - "learning_rate": 9.044120603015075e-05, - "loss": 5.553, - "step": 10013 - }, - { - "epoch": 5.222425032594524, - "grad_norm": 1.4980473518371582, - "learning_rate": 9.044020100502513e-05, - "loss": 5.3979, - "step": 10014 - }, - { - "epoch": 5.222946544980443, - "grad_norm": 1.3802341222763062, - "learning_rate": 9.043919597989951e-05, - "loss": 6.1245, - "step": 10015 - }, - { - "epoch": 5.223468057366363, - "grad_norm": 1.3954429626464844, - "learning_rate": 9.043819095477387e-05, - "loss": 5.479, - "step": 10016 - }, - { - "epoch": 5.223989569752281, - "grad_norm": 1.4645367860794067, - "learning_rate": 9.043718592964825e-05, - "loss": 5.3299, - "step": 10017 - }, - { - "epoch": 5.224511082138201, - "grad_norm": 1.4221478700637817, - "learning_rate": 9.043618090452262e-05, - "loss": 5.9462, - "step": 10018 - }, - { - "epoch": 5.22503259452412, - "grad_norm": 1.364375352859497, - "learning_rate": 9.0435175879397e-05, - "loss": 6.1325, - "step": 10019 - }, - { - "epoch": 5.225554106910039, - "grad_norm": 1.4787803888320923, - "learning_rate": 9.043417085427136e-05, - "loss": 5.7425, - "step": 10020 - }, - { - "epoch": 5.226075619295958, - "grad_norm": 1.407372236251831, - "learning_rate": 9.043316582914574e-05, - "loss": 5.6933, - "step": 10021 - }, - { - "epoch": 5.226597131681878, - "grad_norm": 1.4925684928894043, - "learning_rate": 9.04321608040201e-05, - "loss": 5.7532, - "step": 10022 - }, - { - "epoch": 5.227118644067796, - "grad_norm": 1.4356073141098022, - "learning_rate": 9.043115577889447e-05, - "loss": 5.5757, - "step": 10023 - }, - { - "epoch": 5.227640156453716, - "grad_norm": 1.456728458404541, - "learning_rate": 9.043015075376884e-05, - "loss": 5.7391, - "step": 10024 - }, - { - "epoch": 5.228161668839635, - "grad_norm": 1.4190521240234375, - "learning_rate": 9.042914572864322e-05, - "loss": 5.8676, - "step": 10025 - }, - { - "epoch": 5.228683181225554, - "grad_norm": 1.445654034614563, - "learning_rate": 9.04281407035176e-05, - "loss": 5.5109, - "step": 10026 - }, - { - "epoch": 5.229204693611473, - "grad_norm": 1.4841558933258057, - "learning_rate": 9.042713567839196e-05, - "loss": 5.6778, - "step": 10027 - }, - { - "epoch": 5.229726205997393, - "grad_norm": 1.6084147691726685, - "learning_rate": 9.042613065326634e-05, - "loss": 5.6225, - "step": 10028 - }, - { - "epoch": 5.230247718383311, - "grad_norm": 1.5230908393859863, - "learning_rate": 9.04251256281407e-05, - "loss": 5.7634, - "step": 10029 - }, - { - "epoch": 5.230769230769231, - "grad_norm": 1.4932098388671875, - "learning_rate": 9.042412060301508e-05, - "loss": 5.7, - "step": 10030 - }, - { - "epoch": 5.23129074315515, - "grad_norm": 1.4683929681777954, - "learning_rate": 9.042311557788945e-05, - "loss": 5.8991, - "step": 10031 - }, - { - "epoch": 5.231812255541069, - "grad_norm": 1.5104502439498901, - "learning_rate": 9.042211055276383e-05, - "loss": 5.7744, - "step": 10032 - }, - { - "epoch": 5.232333767926988, - "grad_norm": 1.6425775289535522, - "learning_rate": 9.042110552763819e-05, - "loss": 5.6253, - "step": 10033 - }, - { - "epoch": 5.232855280312908, - "grad_norm": 1.5350946187973022, - "learning_rate": 9.042010050251257e-05, - "loss": 5.2043, - "step": 10034 - }, - { - "epoch": 5.233376792698826, - "grad_norm": 1.3478316068649292, - "learning_rate": 9.041909547738695e-05, - "loss": 5.9213, - "step": 10035 - }, - { - "epoch": 5.233898305084746, - "grad_norm": 1.4705829620361328, - "learning_rate": 9.041809045226131e-05, - "loss": 5.7951, - "step": 10036 - }, - { - "epoch": 5.234419817470665, - "grad_norm": 1.5671786069869995, - "learning_rate": 9.041708542713569e-05, - "loss": 4.932, - "step": 10037 - }, - { - "epoch": 5.234941329856584, - "grad_norm": 1.5181962251663208, - "learning_rate": 9.041608040201005e-05, - "loss": 6.1354, - "step": 10038 - }, - { - "epoch": 5.235462842242503, - "grad_norm": 1.3497366905212402, - "learning_rate": 9.041507537688443e-05, - "loss": 5.4015, - "step": 10039 - }, - { - "epoch": 5.235984354628423, - "grad_norm": 1.4979101419448853, - "learning_rate": 9.04140703517588e-05, - "loss": 5.6389, - "step": 10040 - }, - { - "epoch": 5.236505867014341, - "grad_norm": 1.6562597751617432, - "learning_rate": 9.041306532663317e-05, - "loss": 5.3035, - "step": 10041 - }, - { - "epoch": 5.237027379400261, - "grad_norm": 1.4748013019561768, - "learning_rate": 9.041206030150754e-05, - "loss": 6.0393, - "step": 10042 - }, - { - "epoch": 5.23754889178618, - "grad_norm": 1.5297287702560425, - "learning_rate": 9.041105527638191e-05, - "loss": 5.8313, - "step": 10043 - }, - { - "epoch": 5.238070404172099, - "grad_norm": 1.5088549852371216, - "learning_rate": 9.041005025125628e-05, - "loss": 5.8193, - "step": 10044 - }, - { - "epoch": 5.238591916558018, - "grad_norm": 1.5079742670059204, - "learning_rate": 9.040904522613066e-05, - "loss": 5.5471, - "step": 10045 - }, - { - "epoch": 5.239113428943938, - "grad_norm": 1.3613719940185547, - "learning_rate": 9.040804020100503e-05, - "loss": 5.8865, - "step": 10046 - }, - { - "epoch": 5.239634941329856, - "grad_norm": 1.499681830406189, - "learning_rate": 9.040703517587941e-05, - "loss": 5.0419, - "step": 10047 - }, - { - "epoch": 5.240156453715776, - "grad_norm": 1.4805010557174683, - "learning_rate": 9.040603015075378e-05, - "loss": 5.449, - "step": 10048 - }, - { - "epoch": 5.240677966101695, - "grad_norm": 1.4263910055160522, - "learning_rate": 9.040502512562814e-05, - "loss": 5.6308, - "step": 10049 - }, - { - "epoch": 5.241199478487614, - "grad_norm": 1.4595465660095215, - "learning_rate": 9.040402010050252e-05, - "loss": 5.6999, - "step": 10050 - }, - { - "epoch": 5.241720990873533, - "grad_norm": 1.6155757904052734, - "learning_rate": 9.040301507537688e-05, - "loss": 4.6291, - "step": 10051 - }, - { - "epoch": 5.242242503259453, - "grad_norm": 1.448711633682251, - "learning_rate": 9.040201005025126e-05, - "loss": 5.5144, - "step": 10052 - }, - { - "epoch": 5.242764015645371, - "grad_norm": 1.4708187580108643, - "learning_rate": 9.040100502512562e-05, - "loss": 5.4994, - "step": 10053 - }, - { - "epoch": 5.243285528031291, - "grad_norm": 1.3723044395446777, - "learning_rate": 9.04e-05, - "loss": 5.5186, - "step": 10054 - }, - { - "epoch": 5.2438070404172095, - "grad_norm": 1.4838917255401611, - "learning_rate": 9.039899497487438e-05, - "loss": 5.3591, - "step": 10055 - }, - { - "epoch": 5.244328552803129, - "grad_norm": 1.454161524772644, - "learning_rate": 9.039798994974876e-05, - "loss": 5.7842, - "step": 10056 - }, - { - "epoch": 5.244850065189048, - "grad_norm": 1.4295337200164795, - "learning_rate": 9.039698492462312e-05, - "loss": 5.7393, - "step": 10057 - }, - { - "epoch": 5.245371577574968, - "grad_norm": 1.4544376134872437, - "learning_rate": 9.03959798994975e-05, - "loss": 5.292, - "step": 10058 - }, - { - "epoch": 5.245893089960886, - "grad_norm": 1.4494431018829346, - "learning_rate": 9.039497487437186e-05, - "loss": 5.5997, - "step": 10059 - }, - { - "epoch": 5.246414602346806, - "grad_norm": 1.491150140762329, - "learning_rate": 9.039396984924624e-05, - "loss": 5.858, - "step": 10060 - }, - { - "epoch": 5.2469361147327245, - "grad_norm": 1.5064276456832886, - "learning_rate": 9.03929648241206e-05, - "loss": 5.6776, - "step": 10061 - }, - { - "epoch": 5.247457627118644, - "grad_norm": 1.3450371026992798, - "learning_rate": 9.039195979899497e-05, - "loss": 5.7663, - "step": 10062 - }, - { - "epoch": 5.247979139504563, - "grad_norm": 1.3917235136032104, - "learning_rate": 9.039095477386935e-05, - "loss": 5.4919, - "step": 10063 - }, - { - "epoch": 5.248500651890482, - "grad_norm": 1.5457264184951782, - "learning_rate": 9.038994974874371e-05, - "loss": 5.8379, - "step": 10064 - }, - { - "epoch": 5.2490221642764014, - "grad_norm": 1.4693011045455933, - "learning_rate": 9.038894472361809e-05, - "loss": 5.6482, - "step": 10065 - }, - { - "epoch": 5.249543676662321, - "grad_norm": 1.490593671798706, - "learning_rate": 9.038793969849247e-05, - "loss": 5.3512, - "step": 10066 - }, - { - "epoch": 5.2500651890482395, - "grad_norm": 1.5154200792312622, - "learning_rate": 9.038693467336685e-05, - "loss": 5.6765, - "step": 10067 - }, - { - "epoch": 5.250586701434159, - "grad_norm": 1.402030348777771, - "learning_rate": 9.038592964824121e-05, - "loss": 5.8638, - "step": 10068 - }, - { - "epoch": 5.251108213820078, - "grad_norm": 1.4765713214874268, - "learning_rate": 9.038492462311559e-05, - "loss": 5.713, - "step": 10069 - }, - { - "epoch": 5.251629726205997, - "grad_norm": 1.348554015159607, - "learning_rate": 9.038391959798995e-05, - "loss": 6.0765, - "step": 10070 - }, - { - "epoch": 5.2521512385919165, - "grad_norm": 1.3900163173675537, - "learning_rate": 9.038291457286433e-05, - "loss": 5.7635, - "step": 10071 - }, - { - "epoch": 5.252672750977836, - "grad_norm": 1.3848965167999268, - "learning_rate": 9.03819095477387e-05, - "loss": 5.8575, - "step": 10072 - }, - { - "epoch": 5.2531942633637545, - "grad_norm": 1.469152569770813, - "learning_rate": 9.038090452261307e-05, - "loss": 5.4415, - "step": 10073 - }, - { - "epoch": 5.253715775749674, - "grad_norm": 1.501121997833252, - "learning_rate": 9.037989949748744e-05, - "loss": 5.668, - "step": 10074 - }, - { - "epoch": 5.254237288135593, - "grad_norm": 1.628551721572876, - "learning_rate": 9.037889447236181e-05, - "loss": 5.3139, - "step": 10075 - }, - { - "epoch": 5.254758800521512, - "grad_norm": 1.576874017715454, - "learning_rate": 9.037788944723619e-05, - "loss": 5.5314, - "step": 10076 - }, - { - "epoch": 5.2552803129074315, - "grad_norm": 1.5907871723175049, - "learning_rate": 9.037688442211056e-05, - "loss": 5.0692, - "step": 10077 - }, - { - "epoch": 5.255801825293351, - "grad_norm": 1.501671552658081, - "learning_rate": 9.037587939698493e-05, - "loss": 5.8724, - "step": 10078 - }, - { - "epoch": 5.2563233376792695, - "grad_norm": 1.4172048568725586, - "learning_rate": 9.03748743718593e-05, - "loss": 6.048, - "step": 10079 - }, - { - "epoch": 5.256844850065189, - "grad_norm": 1.3762807846069336, - "learning_rate": 9.037386934673368e-05, - "loss": 5.4745, - "step": 10080 - }, - { - "epoch": 5.257366362451108, - "grad_norm": 1.3691222667694092, - "learning_rate": 9.037286432160804e-05, - "loss": 5.6501, - "step": 10081 - }, - { - "epoch": 5.257887874837027, - "grad_norm": 1.4266730546951294, - "learning_rate": 9.037185929648242e-05, - "loss": 5.8197, - "step": 10082 - }, - { - "epoch": 5.2584093872229465, - "grad_norm": 1.4243927001953125, - "learning_rate": 9.037085427135678e-05, - "loss": 5.7378, - "step": 10083 - }, - { - "epoch": 5.258930899608866, - "grad_norm": 1.440061330795288, - "learning_rate": 9.036984924623116e-05, - "loss": 5.7501, - "step": 10084 - }, - { - "epoch": 5.2594524119947845, - "grad_norm": 1.4981987476348877, - "learning_rate": 9.036884422110552e-05, - "loss": 5.5786, - "step": 10085 - }, - { - "epoch": 5.259973924380704, - "grad_norm": 1.4598643779754639, - "learning_rate": 9.03678391959799e-05, - "loss": 4.6012, - "step": 10086 - }, - { - "epoch": 5.2604954367666235, - "grad_norm": 1.5161511898040771, - "learning_rate": 9.036683417085428e-05, - "loss": 5.312, - "step": 10087 - }, - { - "epoch": 5.261016949152542, - "grad_norm": 1.7268807888031006, - "learning_rate": 9.036582914572864e-05, - "loss": 5.2498, - "step": 10088 - }, - { - "epoch": 5.2615384615384615, - "grad_norm": 1.5388851165771484, - "learning_rate": 9.036482412060302e-05, - "loss": 5.3284, - "step": 10089 - }, - { - "epoch": 5.262059973924381, - "grad_norm": 1.4702500104904175, - "learning_rate": 9.036381909547739e-05, - "loss": 5.5491, - "step": 10090 - }, - { - "epoch": 5.2625814863102995, - "grad_norm": 1.4577462673187256, - "learning_rate": 9.036281407035176e-05, - "loss": 5.192, - "step": 10091 - }, - { - "epoch": 5.263102998696219, - "grad_norm": 1.6016701459884644, - "learning_rate": 9.036180904522613e-05, - "loss": 5.8432, - "step": 10092 - }, - { - "epoch": 5.2636245110821385, - "grad_norm": 1.4500590562820435, - "learning_rate": 9.03608040201005e-05, - "loss": 5.5604, - "step": 10093 - }, - { - "epoch": 5.264146023468057, - "grad_norm": 1.4224238395690918, - "learning_rate": 9.035979899497487e-05, - "loss": 5.474, - "step": 10094 - }, - { - "epoch": 5.2646675358539765, - "grad_norm": 1.4048842191696167, - "learning_rate": 9.035879396984925e-05, - "loss": 5.4519, - "step": 10095 - }, - { - "epoch": 5.265189048239896, - "grad_norm": 1.4362159967422485, - "learning_rate": 9.035778894472363e-05, - "loss": 5.798, - "step": 10096 - }, - { - "epoch": 5.2657105606258146, - "grad_norm": 1.4492383003234863, - "learning_rate": 9.0356783919598e-05, - "loss": 4.7177, - "step": 10097 - }, - { - "epoch": 5.266232073011734, - "grad_norm": 1.4999266862869263, - "learning_rate": 9.035577889447237e-05, - "loss": 5.8647, - "step": 10098 - }, - { - "epoch": 5.2667535853976535, - "grad_norm": 1.5726128816604614, - "learning_rate": 9.035477386934675e-05, - "loss": 5.254, - "step": 10099 - }, - { - "epoch": 5.267275097783572, - "grad_norm": 1.466484785079956, - "learning_rate": 9.035376884422111e-05, - "loss": 5.8386, - "step": 10100 - }, - { - "epoch": 5.2677966101694915, - "grad_norm": 1.5123391151428223, - "learning_rate": 9.035276381909549e-05, - "loss": 5.3832, - "step": 10101 - }, - { - "epoch": 5.268318122555411, - "grad_norm": 1.5916695594787598, - "learning_rate": 9.035175879396985e-05, - "loss": 5.2203, - "step": 10102 - }, - { - "epoch": 5.26883963494133, - "grad_norm": 1.3694965839385986, - "learning_rate": 9.035075376884422e-05, - "loss": 5.1611, - "step": 10103 - }, - { - "epoch": 5.269361147327249, - "grad_norm": 1.5151638984680176, - "learning_rate": 9.03497487437186e-05, - "loss": 5.9515, - "step": 10104 - }, - { - "epoch": 5.2698826597131685, - "grad_norm": 1.4841432571411133, - "learning_rate": 9.034874371859296e-05, - "loss": 5.7103, - "step": 10105 - }, - { - "epoch": 5.270404172099087, - "grad_norm": 1.4815245866775513, - "learning_rate": 9.034773869346734e-05, - "loss": 5.9809, - "step": 10106 - }, - { - "epoch": 5.2709256844850065, - "grad_norm": 1.4271259307861328, - "learning_rate": 9.034673366834172e-05, - "loss": 5.6026, - "step": 10107 - }, - { - "epoch": 5.271447196870926, - "grad_norm": 1.3901634216308594, - "learning_rate": 9.034572864321609e-05, - "loss": 5.9341, - "step": 10108 - }, - { - "epoch": 5.271968709256845, - "grad_norm": 1.4707971811294556, - "learning_rate": 9.034472361809046e-05, - "loss": 5.6568, - "step": 10109 - }, - { - "epoch": 5.272490221642764, - "grad_norm": 1.428215503692627, - "learning_rate": 9.034371859296483e-05, - "loss": 5.9458, - "step": 10110 - }, - { - "epoch": 5.2730117340286835, - "grad_norm": 1.3498142957687378, - "learning_rate": 9.03427135678392e-05, - "loss": 5.9059, - "step": 10111 - }, - { - "epoch": 5.273533246414602, - "grad_norm": 1.3889614343643188, - "learning_rate": 9.034170854271358e-05, - "loss": 5.501, - "step": 10112 - }, - { - "epoch": 5.2740547588005215, - "grad_norm": 1.4207524061203003, - "learning_rate": 9.034070351758794e-05, - "loss": 5.5392, - "step": 10113 - }, - { - "epoch": 5.274576271186441, - "grad_norm": 1.3346121311187744, - "learning_rate": 9.033969849246232e-05, - "loss": 5.434, - "step": 10114 - }, - { - "epoch": 5.27509778357236, - "grad_norm": 1.5477559566497803, - "learning_rate": 9.033869346733668e-05, - "loss": 5.4977, - "step": 10115 - }, - { - "epoch": 5.275619295958279, - "grad_norm": 1.4411602020263672, - "learning_rate": 9.033768844221106e-05, - "loss": 5.6205, - "step": 10116 - }, - { - "epoch": 5.2761408083441985, - "grad_norm": 1.404096245765686, - "learning_rate": 9.033668341708544e-05, - "loss": 5.7789, - "step": 10117 - }, - { - "epoch": 5.276662320730117, - "grad_norm": 1.3902307748794556, - "learning_rate": 9.03356783919598e-05, - "loss": 5.6599, - "step": 10118 - }, - { - "epoch": 5.277183833116037, - "grad_norm": 1.4467017650604248, - "learning_rate": 9.033467336683418e-05, - "loss": 5.8722, - "step": 10119 - }, - { - "epoch": 5.277705345501956, - "grad_norm": 1.5538296699523926, - "learning_rate": 9.033366834170855e-05, - "loss": 5.5758, - "step": 10120 - }, - { - "epoch": 5.278226857887875, - "grad_norm": 1.6112912893295288, - "learning_rate": 9.033266331658292e-05, - "loss": 5.4212, - "step": 10121 - }, - { - "epoch": 5.278748370273794, - "grad_norm": 1.451098084449768, - "learning_rate": 9.033165829145729e-05, - "loss": 5.9175, - "step": 10122 - }, - { - "epoch": 5.2792698826597135, - "grad_norm": 1.2418203353881836, - "learning_rate": 9.033065326633167e-05, - "loss": 5.2708, - "step": 10123 - }, - { - "epoch": 5.279791395045632, - "grad_norm": 1.3613461256027222, - "learning_rate": 9.032964824120603e-05, - "loss": 5.648, - "step": 10124 - }, - { - "epoch": 5.280312907431552, - "grad_norm": 1.3433095216751099, - "learning_rate": 9.032864321608041e-05, - "loss": 6.0698, - "step": 10125 - }, - { - "epoch": 5.280834419817471, - "grad_norm": 1.5903409719467163, - "learning_rate": 9.032763819095477e-05, - "loss": 5.3012, - "step": 10126 - }, - { - "epoch": 5.28135593220339, - "grad_norm": 1.3229511976242065, - "learning_rate": 9.032663316582915e-05, - "loss": 5.7769, - "step": 10127 - }, - { - "epoch": 5.281877444589309, - "grad_norm": 1.435637354850769, - "learning_rate": 9.032562814070353e-05, - "loss": 5.5244, - "step": 10128 - }, - { - "epoch": 5.2823989569752285, - "grad_norm": 1.488949179649353, - "learning_rate": 9.032462311557789e-05, - "loss": 5.8349, - "step": 10129 - }, - { - "epoch": 5.282920469361147, - "grad_norm": 1.4263296127319336, - "learning_rate": 9.032361809045227e-05, - "loss": 5.5644, - "step": 10130 - }, - { - "epoch": 5.283441981747067, - "grad_norm": 1.4931843280792236, - "learning_rate": 9.032261306532663e-05, - "loss": 5.1947, - "step": 10131 - }, - { - "epoch": 5.283963494132986, - "grad_norm": 1.652774691581726, - "learning_rate": 9.032160804020101e-05, - "loss": 5.7006, - "step": 10132 - }, - { - "epoch": 5.284485006518905, - "grad_norm": 1.3690481185913086, - "learning_rate": 9.032060301507538e-05, - "loss": 5.689, - "step": 10133 - }, - { - "epoch": 5.285006518904824, - "grad_norm": 1.6037957668304443, - "learning_rate": 9.031959798994975e-05, - "loss": 5.7437, - "step": 10134 - }, - { - "epoch": 5.2855280312907436, - "grad_norm": 1.5038952827453613, - "learning_rate": 9.031859296482412e-05, - "loss": 5.6974, - "step": 10135 - }, - { - "epoch": 5.286049543676662, - "grad_norm": 1.4818822145462036, - "learning_rate": 9.03175879396985e-05, - "loss": 5.8746, - "step": 10136 - }, - { - "epoch": 5.286571056062582, - "grad_norm": 1.4067444801330566, - "learning_rate": 9.031658291457287e-05, - "loss": 5.7685, - "step": 10137 - }, - { - "epoch": 5.287092568448501, - "grad_norm": 1.4086865186691284, - "learning_rate": 9.031557788944725e-05, - "loss": 5.7503, - "step": 10138 - }, - { - "epoch": 5.28761408083442, - "grad_norm": 1.4859592914581299, - "learning_rate": 9.031457286432162e-05, - "loss": 6.0192, - "step": 10139 - }, - { - "epoch": 5.288135593220339, - "grad_norm": 1.4527862071990967, - "learning_rate": 9.0313567839196e-05, - "loss": 5.6315, - "step": 10140 - }, - { - "epoch": 5.288657105606259, - "grad_norm": 1.4255590438842773, - "learning_rate": 9.031256281407036e-05, - "loss": 5.8929, - "step": 10141 - }, - { - "epoch": 5.289178617992177, - "grad_norm": 1.403523325920105, - "learning_rate": 9.031155778894472e-05, - "loss": 5.5291, - "step": 10142 - }, - { - "epoch": 5.289700130378097, - "grad_norm": 1.3358978033065796, - "learning_rate": 9.03105527638191e-05, - "loss": 5.4483, - "step": 10143 - }, - { - "epoch": 5.290221642764015, - "grad_norm": 1.4017674922943115, - "learning_rate": 9.030954773869346e-05, - "loss": 5.7032, - "step": 10144 - }, - { - "epoch": 5.290743155149935, - "grad_norm": 1.4424452781677246, - "learning_rate": 9.030854271356784e-05, - "loss": 5.6551, - "step": 10145 - }, - { - "epoch": 5.291264667535854, - "grad_norm": 1.4213212728500366, - "learning_rate": 9.03075376884422e-05, - "loss": 6.1358, - "step": 10146 - }, - { - "epoch": 5.291786179921774, - "grad_norm": 1.9626235961914062, - "learning_rate": 9.030653266331658e-05, - "loss": 4.9429, - "step": 10147 - }, - { - "epoch": 5.292307692307692, - "grad_norm": 1.463068962097168, - "learning_rate": 9.030552763819096e-05, - "loss": 5.5942, - "step": 10148 - }, - { - "epoch": 5.292829204693612, - "grad_norm": 1.4770915508270264, - "learning_rate": 9.030452261306534e-05, - "loss": 5.6926, - "step": 10149 - }, - { - "epoch": 5.29335071707953, - "grad_norm": 1.3663451671600342, - "learning_rate": 9.03035175879397e-05, - "loss": 5.9934, - "step": 10150 - }, - { - "epoch": 5.29387222946545, - "grad_norm": 1.4516716003417969, - "learning_rate": 9.030251256281408e-05, - "loss": 5.9346, - "step": 10151 - }, - { - "epoch": 5.294393741851369, - "grad_norm": 1.3995884656906128, - "learning_rate": 9.030150753768845e-05, - "loss": 5.9001, - "step": 10152 - }, - { - "epoch": 5.294915254237289, - "grad_norm": 1.6173863410949707, - "learning_rate": 9.030050251256282e-05, - "loss": 5.3547, - "step": 10153 - }, - { - "epoch": 5.295436766623207, - "grad_norm": 1.5221269130706787, - "learning_rate": 9.029949748743719e-05, - "loss": 5.6693, - "step": 10154 - }, - { - "epoch": 5.295958279009127, - "grad_norm": 1.488456130027771, - "learning_rate": 9.029849246231155e-05, - "loss": 5.5436, - "step": 10155 - }, - { - "epoch": 5.296479791395045, - "grad_norm": 1.3692303895950317, - "learning_rate": 9.029748743718593e-05, - "loss": 5.8905, - "step": 10156 - }, - { - "epoch": 5.297001303780965, - "grad_norm": 1.6565728187561035, - "learning_rate": 9.02964824120603e-05, - "loss": 5.4402, - "step": 10157 - }, - { - "epoch": 5.297522816166884, - "grad_norm": 1.3411543369293213, - "learning_rate": 9.029547738693467e-05, - "loss": 5.7118, - "step": 10158 - }, - { - "epoch": 5.298044328552803, - "grad_norm": 1.4342840909957886, - "learning_rate": 9.029447236180905e-05, - "loss": 6.162, - "step": 10159 - }, - { - "epoch": 5.298565840938722, - "grad_norm": 1.4498540163040161, - "learning_rate": 9.029346733668343e-05, - "loss": 5.5208, - "step": 10160 - }, - { - "epoch": 5.299087353324642, - "grad_norm": 1.4588541984558105, - "learning_rate": 9.029246231155779e-05, - "loss": 5.5778, - "step": 10161 - }, - { - "epoch": 5.29960886571056, - "grad_norm": 1.4278212785720825, - "learning_rate": 9.029145728643217e-05, - "loss": 5.8526, - "step": 10162 - }, - { - "epoch": 5.30013037809648, - "grad_norm": 1.4877732992172241, - "learning_rate": 9.029045226130653e-05, - "loss": 5.6603, - "step": 10163 - }, - { - "epoch": 5.300651890482399, - "grad_norm": 1.5012656450271606, - "learning_rate": 9.028944723618091e-05, - "loss": 5.5734, - "step": 10164 - }, - { - "epoch": 5.301173402868318, - "grad_norm": 1.3371511697769165, - "learning_rate": 9.028844221105528e-05, - "loss": 5.3745, - "step": 10165 - }, - { - "epoch": 5.301694915254237, - "grad_norm": 1.5119991302490234, - "learning_rate": 9.028743718592965e-05, - "loss": 5.4259, - "step": 10166 - }, - { - "epoch": 5.302216427640157, - "grad_norm": 1.4748679399490356, - "learning_rate": 9.028643216080402e-05, - "loss": 4.708, - "step": 10167 - }, - { - "epoch": 5.302737940026075, - "grad_norm": 1.6002538204193115, - "learning_rate": 9.02854271356784e-05, - "loss": 5.6048, - "step": 10168 - }, - { - "epoch": 5.303259452411995, - "grad_norm": 1.7044093608856201, - "learning_rate": 9.028442211055277e-05, - "loss": 4.8962, - "step": 10169 - }, - { - "epoch": 5.303780964797914, - "grad_norm": 1.4794741868972778, - "learning_rate": 9.028341708542714e-05, - "loss": 5.8414, - "step": 10170 - }, - { - "epoch": 5.304302477183833, - "grad_norm": 1.54389226436615, - "learning_rate": 9.028241206030152e-05, - "loss": 5.5981, - "step": 10171 - }, - { - "epoch": 5.304823989569752, - "grad_norm": 1.5560040473937988, - "learning_rate": 9.028140703517588e-05, - "loss": 5.6244, - "step": 10172 - }, - { - "epoch": 5.305345501955672, - "grad_norm": 1.4358760118484497, - "learning_rate": 9.028040201005026e-05, - "loss": 5.9609, - "step": 10173 - }, - { - "epoch": 5.30586701434159, - "grad_norm": 1.410981297492981, - "learning_rate": 9.027939698492462e-05, - "loss": 5.8108, - "step": 10174 - }, - { - "epoch": 5.30638852672751, - "grad_norm": 1.4781606197357178, - "learning_rate": 9.0278391959799e-05, - "loss": 5.7265, - "step": 10175 - }, - { - "epoch": 5.306910039113429, - "grad_norm": 1.4008976221084595, - "learning_rate": 9.027738693467337e-05, - "loss": 5.4647, - "step": 10176 - }, - { - "epoch": 5.307431551499348, - "grad_norm": 1.357210397720337, - "learning_rate": 9.027638190954774e-05, - "loss": 5.7917, - "step": 10177 - }, - { - "epoch": 5.307953063885267, - "grad_norm": 1.4074064493179321, - "learning_rate": 9.027537688442211e-05, - "loss": 5.697, - "step": 10178 - }, - { - "epoch": 5.308474576271187, - "grad_norm": 1.404657244682312, - "learning_rate": 9.027437185929648e-05, - "loss": 5.8148, - "step": 10179 - }, - { - "epoch": 5.308996088657105, - "grad_norm": 1.389084815979004, - "learning_rate": 9.027336683417086e-05, - "loss": 5.7696, - "step": 10180 - }, - { - "epoch": 5.309517601043025, - "grad_norm": 1.4845212697982788, - "learning_rate": 9.027236180904523e-05, - "loss": 5.6774, - "step": 10181 - }, - { - "epoch": 5.310039113428944, - "grad_norm": 1.489313006401062, - "learning_rate": 9.02713567839196e-05, - "loss": 5.7094, - "step": 10182 - }, - { - "epoch": 5.310560625814863, - "grad_norm": 1.4481770992279053, - "learning_rate": 9.027035175879397e-05, - "loss": 5.4335, - "step": 10183 - }, - { - "epoch": 5.311082138200782, - "grad_norm": 1.3650178909301758, - "learning_rate": 9.026934673366835e-05, - "loss": 5.6826, - "step": 10184 - }, - { - "epoch": 5.311603650586702, - "grad_norm": 1.5408092737197876, - "learning_rate": 9.026834170854271e-05, - "loss": 5.9372, - "step": 10185 - }, - { - "epoch": 5.31212516297262, - "grad_norm": 1.6812245845794678, - "learning_rate": 9.026733668341709e-05, - "loss": 5.8724, - "step": 10186 - }, - { - "epoch": 5.31264667535854, - "grad_norm": 1.5636911392211914, - "learning_rate": 9.026633165829145e-05, - "loss": 5.2088, - "step": 10187 - }, - { - "epoch": 5.313168187744459, - "grad_norm": 1.507303237915039, - "learning_rate": 9.026532663316583e-05, - "loss": 5.7872, - "step": 10188 - }, - { - "epoch": 5.313689700130378, - "grad_norm": 1.3101260662078857, - "learning_rate": 9.026432160804021e-05, - "loss": 5.7012, - "step": 10189 - }, - { - "epoch": 5.314211212516297, - "grad_norm": 1.4885419607162476, - "learning_rate": 9.026331658291459e-05, - "loss": 5.466, - "step": 10190 - }, - { - "epoch": 5.314732724902217, - "grad_norm": 1.385655164718628, - "learning_rate": 9.026231155778895e-05, - "loss": 5.8726, - "step": 10191 - }, - { - "epoch": 5.315254237288135, - "grad_norm": 1.4238570928573608, - "learning_rate": 9.026130653266333e-05, - "loss": 5.8543, - "step": 10192 - }, - { - "epoch": 5.315775749674055, - "grad_norm": 1.2176191806793213, - "learning_rate": 9.026030150753769e-05, - "loss": 5.9873, - "step": 10193 - }, - { - "epoch": 5.316297262059974, - "grad_norm": 1.4420068264007568, - "learning_rate": 9.025929648241207e-05, - "loss": 5.5586, - "step": 10194 - }, - { - "epoch": 5.316818774445893, - "grad_norm": 1.4391130208969116, - "learning_rate": 9.025829145728644e-05, - "loss": 5.8113, - "step": 10195 - }, - { - "epoch": 5.317340286831812, - "grad_norm": 1.4397854804992676, - "learning_rate": 9.02572864321608e-05, - "loss": 5.9375, - "step": 10196 - }, - { - "epoch": 5.317861799217732, - "grad_norm": 1.4512836933135986, - "learning_rate": 9.025628140703518e-05, - "loss": 5.548, - "step": 10197 - }, - { - "epoch": 5.31838331160365, - "grad_norm": 1.4951709508895874, - "learning_rate": 9.025527638190954e-05, - "loss": 5.8345, - "step": 10198 - }, - { - "epoch": 5.31890482398957, - "grad_norm": 1.5930612087249756, - "learning_rate": 9.025427135678392e-05, - "loss": 5.5683, - "step": 10199 - }, - { - "epoch": 5.319426336375489, - "grad_norm": 1.3639918565750122, - "learning_rate": 9.02532663316583e-05, - "loss": 5.9459, - "step": 10200 - }, - { - "epoch": 5.319947848761408, - "grad_norm": 1.4860025644302368, - "learning_rate": 9.025226130653268e-05, - "loss": 5.4401, - "step": 10201 - }, - { - "epoch": 5.320469361147327, - "grad_norm": 1.5039597749710083, - "learning_rate": 9.025125628140704e-05, - "loss": 5.9156, - "step": 10202 - }, - { - "epoch": 5.320990873533247, - "grad_norm": 1.4666608572006226, - "learning_rate": 9.025025125628142e-05, - "loss": 5.5488, - "step": 10203 - }, - { - "epoch": 5.321512385919165, - "grad_norm": 1.3759040832519531, - "learning_rate": 9.024924623115578e-05, - "loss": 4.2594, - "step": 10204 - }, - { - "epoch": 5.322033898305085, - "grad_norm": 1.4700740575790405, - "learning_rate": 9.024824120603016e-05, - "loss": 5.9183, - "step": 10205 - }, - { - "epoch": 5.322555410691004, - "grad_norm": 1.5964179039001465, - "learning_rate": 9.024723618090452e-05, - "loss": 5.6895, - "step": 10206 - }, - { - "epoch": 5.323076923076923, - "grad_norm": 1.4286209344863892, - "learning_rate": 9.02462311557789e-05, - "loss": 5.9161, - "step": 10207 - }, - { - "epoch": 5.323598435462842, - "grad_norm": 1.3978111743927002, - "learning_rate": 9.024522613065327e-05, - "loss": 6.1078, - "step": 10208 - }, - { - "epoch": 5.324119947848762, - "grad_norm": 1.5734169483184814, - "learning_rate": 9.024422110552764e-05, - "loss": 5.2924, - "step": 10209 - }, - { - "epoch": 5.32464146023468, - "grad_norm": 1.6161092519760132, - "learning_rate": 9.024321608040202e-05, - "loss": 5.6936, - "step": 10210 - }, - { - "epoch": 5.3251629726206, - "grad_norm": 1.5728449821472168, - "learning_rate": 9.024221105527639e-05, - "loss": 5.4333, - "step": 10211 - }, - { - "epoch": 5.325684485006519, - "grad_norm": 1.773559331893921, - "learning_rate": 9.024120603015076e-05, - "loss": 5.4845, - "step": 10212 - }, - { - "epoch": 5.326205997392438, - "grad_norm": 1.5202322006225586, - "learning_rate": 9.024020100502513e-05, - "loss": 5.5811, - "step": 10213 - }, - { - "epoch": 5.326727509778357, - "grad_norm": 1.4308488368988037, - "learning_rate": 9.02391959798995e-05, - "loss": 5.2266, - "step": 10214 - }, - { - "epoch": 5.327249022164277, - "grad_norm": 1.4669445753097534, - "learning_rate": 9.023819095477387e-05, - "loss": 5.6467, - "step": 10215 - }, - { - "epoch": 5.327770534550195, - "grad_norm": 1.5192683935165405, - "learning_rate": 9.023718592964825e-05, - "loss": 5.4324, - "step": 10216 - }, - { - "epoch": 5.328292046936115, - "grad_norm": 1.468769907951355, - "learning_rate": 9.023618090452261e-05, - "loss": 5.3246, - "step": 10217 - }, - { - "epoch": 5.328813559322034, - "grad_norm": 1.3620758056640625, - "learning_rate": 9.023517587939699e-05, - "loss": 5.8385, - "step": 10218 - }, - { - "epoch": 5.329335071707953, - "grad_norm": 1.3868898153305054, - "learning_rate": 9.023417085427135e-05, - "loss": 5.5634, - "step": 10219 - }, - { - "epoch": 5.329856584093872, - "grad_norm": 1.8883161544799805, - "learning_rate": 9.023316582914573e-05, - "loss": 5.0034, - "step": 10220 - }, - { - "epoch": 5.330378096479792, - "grad_norm": 1.5229982137680054, - "learning_rate": 9.023216080402011e-05, - "loss": 4.8879, - "step": 10221 - }, - { - "epoch": 5.33089960886571, - "grad_norm": 1.6382133960723877, - "learning_rate": 9.023115577889447e-05, - "loss": 5.4922, - "step": 10222 - }, - { - "epoch": 5.33142112125163, - "grad_norm": 1.6447757482528687, - "learning_rate": 9.023015075376885e-05, - "loss": 5.8495, - "step": 10223 - }, - { - "epoch": 5.331942633637549, - "grad_norm": 1.4234298467636108, - "learning_rate": 9.022914572864322e-05, - "loss": 5.5402, - "step": 10224 - }, - { - "epoch": 5.332464146023468, - "grad_norm": 1.7102653980255127, - "learning_rate": 9.02281407035176e-05, - "loss": 5.6542, - "step": 10225 - }, - { - "epoch": 5.332985658409387, - "grad_norm": 1.505831241607666, - "learning_rate": 9.022713567839196e-05, - "loss": 5.2729, - "step": 10226 - }, - { - "epoch": 5.333507170795307, - "grad_norm": 1.5086112022399902, - "learning_rate": 9.022613065326634e-05, - "loss": 5.6718, - "step": 10227 - }, - { - "epoch": 5.334028683181225, - "grad_norm": 1.5301624536514282, - "learning_rate": 9.02251256281407e-05, - "loss": 5.4514, - "step": 10228 - }, - { - "epoch": 5.334550195567145, - "grad_norm": 1.4157813787460327, - "learning_rate": 9.022412060301508e-05, - "loss": 5.673, - "step": 10229 - }, - { - "epoch": 5.335071707953064, - "grad_norm": 1.4857059717178345, - "learning_rate": 9.022311557788946e-05, - "loss": 5.7814, - "step": 10230 - }, - { - "epoch": 5.335593220338983, - "grad_norm": 1.4703820943832397, - "learning_rate": 9.022211055276383e-05, - "loss": 5.9424, - "step": 10231 - }, - { - "epoch": 5.336114732724902, - "grad_norm": 1.5231715440750122, - "learning_rate": 9.02211055276382e-05, - "loss": 5.835, - "step": 10232 - }, - { - "epoch": 5.336636245110822, - "grad_norm": 1.3984918594360352, - "learning_rate": 9.022010050251258e-05, - "loss": 5.8445, - "step": 10233 - }, - { - "epoch": 5.33715775749674, - "grad_norm": 1.4330426454544067, - "learning_rate": 9.021909547738694e-05, - "loss": 5.6257, - "step": 10234 - }, - { - "epoch": 5.33767926988266, - "grad_norm": 1.496041178703308, - "learning_rate": 9.02180904522613e-05, - "loss": 5.6275, - "step": 10235 - }, - { - "epoch": 5.338200782268579, - "grad_norm": 1.6019511222839355, - "learning_rate": 9.021708542713568e-05, - "loss": 4.8149, - "step": 10236 - }, - { - "epoch": 5.338722294654498, - "grad_norm": 1.4021120071411133, - "learning_rate": 9.021608040201005e-05, - "loss": 5.6017, - "step": 10237 - }, - { - "epoch": 5.339243807040417, - "grad_norm": 1.4280763864517212, - "learning_rate": 9.021507537688442e-05, - "loss": 5.6507, - "step": 10238 - }, - { - "epoch": 5.339765319426336, - "grad_norm": 1.4885696172714233, - "learning_rate": 9.021407035175879e-05, - "loss": 5.3205, - "step": 10239 - }, - { - "epoch": 5.340286831812255, - "grad_norm": 1.4745653867721558, - "learning_rate": 9.021306532663317e-05, - "loss": 5.5354, - "step": 10240 - }, - { - "epoch": 5.340808344198175, - "grad_norm": 1.3587496280670166, - "learning_rate": 9.021206030150754e-05, - "loss": 5.6468, - "step": 10241 - }, - { - "epoch": 5.341329856584094, - "grad_norm": 1.3264514207839966, - "learning_rate": 9.021105527638192e-05, - "loss": 5.8478, - "step": 10242 - }, - { - "epoch": 5.341851368970013, - "grad_norm": 1.437589168548584, - "learning_rate": 9.021005025125629e-05, - "loss": 5.8311, - "step": 10243 - }, - { - "epoch": 5.342372881355932, - "grad_norm": 1.343753457069397, - "learning_rate": 9.020904522613066e-05, - "loss": 5.2333, - "step": 10244 - }, - { - "epoch": 5.342894393741851, - "grad_norm": 1.4322502613067627, - "learning_rate": 9.020804020100503e-05, - "loss": 5.5893, - "step": 10245 - }, - { - "epoch": 5.34341590612777, - "grad_norm": 1.6897600889205933, - "learning_rate": 9.02070351758794e-05, - "loss": 5.3949, - "step": 10246 - }, - { - "epoch": 5.34393741851369, - "grad_norm": 1.4204440116882324, - "learning_rate": 9.020603015075377e-05, - "loss": 5.4185, - "step": 10247 - }, - { - "epoch": 5.344458930899609, - "grad_norm": 1.366647481918335, - "learning_rate": 9.020502512562813e-05, - "loss": 5.6635, - "step": 10248 - }, - { - "epoch": 5.344980443285528, - "grad_norm": 1.4737663269042969, - "learning_rate": 9.020402010050251e-05, - "loss": 5.6163, - "step": 10249 - }, - { - "epoch": 5.345501955671447, - "grad_norm": 1.4024121761322021, - "learning_rate": 9.020301507537689e-05, - "loss": 5.4959, - "step": 10250 - }, - { - "epoch": 5.346023468057366, - "grad_norm": 1.5999839305877686, - "learning_rate": 9.020201005025127e-05, - "loss": 5.4925, - "step": 10251 - }, - { - "epoch": 5.346544980443285, - "grad_norm": 1.316460132598877, - "learning_rate": 9.020100502512563e-05, - "loss": 5.7873, - "step": 10252 - }, - { - "epoch": 5.347066492829205, - "grad_norm": 1.504062533378601, - "learning_rate": 9.020000000000001e-05, - "loss": 5.1032, - "step": 10253 - }, - { - "epoch": 5.3475880052151235, - "grad_norm": 1.4356709718704224, - "learning_rate": 9.019899497487437e-05, - "loss": 5.7118, - "step": 10254 - }, - { - "epoch": 5.348109517601043, - "grad_norm": 1.5183720588684082, - "learning_rate": 9.019798994974875e-05, - "loss": 5.2328, - "step": 10255 - }, - { - "epoch": 5.348631029986962, - "grad_norm": 1.6235761642456055, - "learning_rate": 9.019698492462312e-05, - "loss": 5.1812, - "step": 10256 - }, - { - "epoch": 5.349152542372881, - "grad_norm": 1.476263165473938, - "learning_rate": 9.01959798994975e-05, - "loss": 5.8137, - "step": 10257 - }, - { - "epoch": 5.3496740547588, - "grad_norm": 1.47592031955719, - "learning_rate": 9.019497487437186e-05, - "loss": 4.9739, - "step": 10258 - }, - { - "epoch": 5.35019556714472, - "grad_norm": 1.382989525794983, - "learning_rate": 9.019396984924624e-05, - "loss": 5.8285, - "step": 10259 - }, - { - "epoch": 5.3507170795306385, - "grad_norm": 1.4239100217819214, - "learning_rate": 9.01929648241206e-05, - "loss": 5.3573, - "step": 10260 - }, - { - "epoch": 5.351238591916558, - "grad_norm": 1.7319763898849487, - "learning_rate": 9.019195979899498e-05, - "loss": 5.3675, - "step": 10261 - }, - { - "epoch": 5.351760104302477, - "grad_norm": 1.3405685424804688, - "learning_rate": 9.019095477386936e-05, - "loss": 5.9522, - "step": 10262 - }, - { - "epoch": 5.352281616688396, - "grad_norm": 1.3371466398239136, - "learning_rate": 9.018994974874372e-05, - "loss": 5.5198, - "step": 10263 - }, - { - "epoch": 5.352803129074315, - "grad_norm": 1.3950738906860352, - "learning_rate": 9.01889447236181e-05, - "loss": 6.0362, - "step": 10264 - }, - { - "epoch": 5.353324641460235, - "grad_norm": 1.435194730758667, - "learning_rate": 9.018793969849246e-05, - "loss": 5.6534, - "step": 10265 - }, - { - "epoch": 5.3538461538461535, - "grad_norm": 1.60768723487854, - "learning_rate": 9.018693467336684e-05, - "loss": 5.7581, - "step": 10266 - }, - { - "epoch": 5.354367666232073, - "grad_norm": 1.4624313116073608, - "learning_rate": 9.01859296482412e-05, - "loss": 5.6737, - "step": 10267 - }, - { - "epoch": 5.354889178617992, - "grad_norm": 1.5445939302444458, - "learning_rate": 9.018492462311558e-05, - "loss": 5.6502, - "step": 10268 - }, - { - "epoch": 5.355410691003911, - "grad_norm": 1.3557714223861694, - "learning_rate": 9.018391959798995e-05, - "loss": 5.9079, - "step": 10269 - }, - { - "epoch": 5.3559322033898304, - "grad_norm": 1.7879862785339355, - "learning_rate": 9.018291457286433e-05, - "loss": 5.1269, - "step": 10270 - }, - { - "epoch": 5.35645371577575, - "grad_norm": 1.4320110082626343, - "learning_rate": 9.01819095477387e-05, - "loss": 5.7046, - "step": 10271 - }, - { - "epoch": 5.3569752281616685, - "grad_norm": 1.4466145038604736, - "learning_rate": 9.018090452261308e-05, - "loss": 5.7731, - "step": 10272 - }, - { - "epoch": 5.357496740547588, - "grad_norm": 1.3664839267730713, - "learning_rate": 9.017989949748745e-05, - "loss": 5.5415, - "step": 10273 - }, - { - "epoch": 5.358018252933507, - "grad_norm": 1.432259202003479, - "learning_rate": 9.017889447236181e-05, - "loss": 6.1035, - "step": 10274 - }, - { - "epoch": 5.358539765319426, - "grad_norm": 1.4063067436218262, - "learning_rate": 9.017788944723619e-05, - "loss": 5.8523, - "step": 10275 - }, - { - "epoch": 5.3590612777053455, - "grad_norm": 1.6309561729431152, - "learning_rate": 9.017688442211055e-05, - "loss": 4.2884, - "step": 10276 - }, - { - "epoch": 5.359582790091265, - "grad_norm": 1.4899494647979736, - "learning_rate": 9.017587939698493e-05, - "loss": 5.7082, - "step": 10277 - }, - { - "epoch": 5.3601043024771835, - "grad_norm": 1.4235202074050903, - "learning_rate": 9.01748743718593e-05, - "loss": 5.8704, - "step": 10278 - }, - { - "epoch": 5.360625814863103, - "grad_norm": 1.5385639667510986, - "learning_rate": 9.017386934673367e-05, - "loss": 5.3477, - "step": 10279 - }, - { - "epoch": 5.361147327249022, - "grad_norm": 1.5799310207366943, - "learning_rate": 9.017286432160804e-05, - "loss": 5.9103, - "step": 10280 - }, - { - "epoch": 5.361668839634941, - "grad_norm": 1.677376389503479, - "learning_rate": 9.017185929648241e-05, - "loss": 5.6047, - "step": 10281 - }, - { - "epoch": 5.3621903520208605, - "grad_norm": 1.6116411685943604, - "learning_rate": 9.017085427135679e-05, - "loss": 4.9714, - "step": 10282 - }, - { - "epoch": 5.36271186440678, - "grad_norm": 1.559818148612976, - "learning_rate": 9.016984924623117e-05, - "loss": 5.093, - "step": 10283 - }, - { - "epoch": 5.3632333767926985, - "grad_norm": 1.4089144468307495, - "learning_rate": 9.016884422110553e-05, - "loss": 5.9716, - "step": 10284 - }, - { - "epoch": 5.363754889178618, - "grad_norm": 1.4709949493408203, - "learning_rate": 9.016783919597991e-05, - "loss": 5.6195, - "step": 10285 - }, - { - "epoch": 5.3642764015645374, - "grad_norm": 1.384634256362915, - "learning_rate": 9.016683417085428e-05, - "loss": 5.6891, - "step": 10286 - }, - { - "epoch": 5.364797913950456, - "grad_norm": 1.4020015001296997, - "learning_rate": 9.016582914572865e-05, - "loss": 4.4311, - "step": 10287 - }, - { - "epoch": 5.3653194263363755, - "grad_norm": 1.389190912246704, - "learning_rate": 9.016482412060302e-05, - "loss": 5.963, - "step": 10288 - }, - { - "epoch": 5.365840938722295, - "grad_norm": 1.4642819166183472, - "learning_rate": 9.016381909547738e-05, - "loss": 5.7096, - "step": 10289 - }, - { - "epoch": 5.3663624511082135, - "grad_norm": 1.5249472856521606, - "learning_rate": 9.016281407035176e-05, - "loss": 5.733, - "step": 10290 - }, - { - "epoch": 5.366883963494133, - "grad_norm": 1.4018474817276, - "learning_rate": 9.016180904522614e-05, - "loss": 5.8569, - "step": 10291 - }, - { - "epoch": 5.3674054758800525, - "grad_norm": 1.5382846593856812, - "learning_rate": 9.016080402010052e-05, - "loss": 5.5011, - "step": 10292 - }, - { - "epoch": 5.367926988265971, - "grad_norm": 1.5073137283325195, - "learning_rate": 9.015979899497488e-05, - "loss": 5.6861, - "step": 10293 - }, - { - "epoch": 5.3684485006518905, - "grad_norm": 1.3321858644485474, - "learning_rate": 9.015879396984926e-05, - "loss": 5.8215, - "step": 10294 - }, - { - "epoch": 5.36897001303781, - "grad_norm": 1.55703604221344, - "learning_rate": 9.015778894472362e-05, - "loss": 5.2027, - "step": 10295 - }, - { - "epoch": 5.3694915254237285, - "grad_norm": 1.644277572631836, - "learning_rate": 9.0156783919598e-05, - "loss": 5.6514, - "step": 10296 - }, - { - "epoch": 5.370013037809648, - "grad_norm": 1.5605595111846924, - "learning_rate": 9.015577889447236e-05, - "loss": 5.7261, - "step": 10297 - }, - { - "epoch": 5.3705345501955675, - "grad_norm": 1.5061653852462769, - "learning_rate": 9.015477386934674e-05, - "loss": 5.7205, - "step": 10298 - }, - { - "epoch": 5.371056062581486, - "grad_norm": 1.313819169998169, - "learning_rate": 9.01537688442211e-05, - "loss": 5.9347, - "step": 10299 - }, - { - "epoch": 5.3715775749674055, - "grad_norm": 1.448825716972351, - "learning_rate": 9.015276381909548e-05, - "loss": 5.5963, - "step": 10300 - }, - { - "epoch": 5.372099087353325, - "grad_norm": 1.6235548257827759, - "learning_rate": 9.015175879396985e-05, - "loss": 5.4979, - "step": 10301 - }, - { - "epoch": 5.3726205997392436, - "grad_norm": 1.4101418256759644, - "learning_rate": 9.015075376884423e-05, - "loss": 5.6974, - "step": 10302 - }, - { - "epoch": 5.373142112125163, - "grad_norm": 1.4671521186828613, - "learning_rate": 9.01497487437186e-05, - "loss": 5.1368, - "step": 10303 - }, - { - "epoch": 5.3736636245110825, - "grad_norm": 1.4604076147079468, - "learning_rate": 9.014874371859297e-05, - "loss": 5.3258, - "step": 10304 - }, - { - "epoch": 5.374185136897001, - "grad_norm": 1.5924981832504272, - "learning_rate": 9.014773869346735e-05, - "loss": 5.6054, - "step": 10305 - }, - { - "epoch": 5.3747066492829205, - "grad_norm": 1.3894344568252563, - "learning_rate": 9.014673366834171e-05, - "loss": 5.7446, - "step": 10306 - }, - { - "epoch": 5.37522816166884, - "grad_norm": 1.40617835521698, - "learning_rate": 9.014572864321609e-05, - "loss": 5.9261, - "step": 10307 - }, - { - "epoch": 5.375749674054759, - "grad_norm": 1.6797858476638794, - "learning_rate": 9.014472361809045e-05, - "loss": 5.0447, - "step": 10308 - }, - { - "epoch": 5.376271186440678, - "grad_norm": 1.363195538520813, - "learning_rate": 9.014371859296483e-05, - "loss": 5.5461, - "step": 10309 - }, - { - "epoch": 5.3767926988265975, - "grad_norm": 1.4109382629394531, - "learning_rate": 9.01427135678392e-05, - "loss": 5.6599, - "step": 10310 - }, - { - "epoch": 5.377314211212516, - "grad_norm": 1.3722935914993286, - "learning_rate": 9.014170854271357e-05, - "loss": 5.95, - "step": 10311 - }, - { - "epoch": 5.3778357235984355, - "grad_norm": 1.544797658920288, - "learning_rate": 9.014070351758795e-05, - "loss": 5.4314, - "step": 10312 - }, - { - "epoch": 5.378357235984355, - "grad_norm": 1.5354318618774414, - "learning_rate": 9.013969849246233e-05, - "loss": 5.6137, - "step": 10313 - }, - { - "epoch": 5.378878748370274, - "grad_norm": 1.3543375730514526, - "learning_rate": 9.013869346733669e-05, - "loss": 5.7397, - "step": 10314 - }, - { - "epoch": 5.379400260756193, - "grad_norm": 1.5135337114334106, - "learning_rate": 9.013768844221106e-05, - "loss": 5.6471, - "step": 10315 - }, - { - "epoch": 5.3799217731421125, - "grad_norm": 1.3590775728225708, - "learning_rate": 9.013668341708543e-05, - "loss": 5.8951, - "step": 10316 - }, - { - "epoch": 5.380443285528031, - "grad_norm": 1.4523017406463623, - "learning_rate": 9.01356783919598e-05, - "loss": 5.0874, - "step": 10317 - }, - { - "epoch": 5.3809647979139505, - "grad_norm": 1.4886469841003418, - "learning_rate": 9.013467336683418e-05, - "loss": 5.597, - "step": 10318 - }, - { - "epoch": 5.38148631029987, - "grad_norm": 1.4635052680969238, - "learning_rate": 9.013366834170854e-05, - "loss": 5.7498, - "step": 10319 - }, - { - "epoch": 5.382007822685789, - "grad_norm": 1.4065181016921997, - "learning_rate": 9.013266331658292e-05, - "loss": 5.8623, - "step": 10320 - }, - { - "epoch": 5.382529335071708, - "grad_norm": 1.755972981452942, - "learning_rate": 9.013165829145728e-05, - "loss": 5.6345, - "step": 10321 - }, - { - "epoch": 5.3830508474576275, - "grad_norm": 1.532627820968628, - "learning_rate": 9.013065326633166e-05, - "loss": 5.9674, - "step": 10322 - }, - { - "epoch": 5.383572359843546, - "grad_norm": 1.3523974418640137, - "learning_rate": 9.012964824120604e-05, - "loss": 5.5732, - "step": 10323 - }, - { - "epoch": 5.384093872229466, - "grad_norm": 1.3074290752410889, - "learning_rate": 9.012864321608042e-05, - "loss": 5.7384, - "step": 10324 - }, - { - "epoch": 5.384615384615385, - "grad_norm": 1.3894941806793213, - "learning_rate": 9.012763819095478e-05, - "loss": 6.211, - "step": 10325 - }, - { - "epoch": 5.385136897001304, - "grad_norm": 1.5131900310516357, - "learning_rate": 9.012663316582916e-05, - "loss": 5.2463, - "step": 10326 - }, - { - "epoch": 5.385658409387223, - "grad_norm": 1.3227185010910034, - "learning_rate": 9.012562814070352e-05, - "loss": 5.946, - "step": 10327 - }, - { - "epoch": 5.386179921773142, - "grad_norm": 1.3603779077529907, - "learning_rate": 9.012462311557789e-05, - "loss": 5.5044, - "step": 10328 - }, - { - "epoch": 5.386701434159061, - "grad_norm": 1.5274333953857422, - "learning_rate": 9.012361809045226e-05, - "loss": 5.7537, - "step": 10329 - }, - { - "epoch": 5.387222946544981, - "grad_norm": 1.4906944036483765, - "learning_rate": 9.012261306532663e-05, - "loss": 5.6782, - "step": 10330 - }, - { - "epoch": 5.3877444589309, - "grad_norm": 1.473856806755066, - "learning_rate": 9.0121608040201e-05, - "loss": 5.7622, - "step": 10331 - }, - { - "epoch": 5.388265971316819, - "grad_norm": 1.5477938652038574, - "learning_rate": 9.012060301507537e-05, - "loss": 5.281, - "step": 10332 - }, - { - "epoch": 5.388787483702738, - "grad_norm": 1.6846359968185425, - "learning_rate": 9.011959798994975e-05, - "loss": 4.9587, - "step": 10333 - }, - { - "epoch": 5.389308996088657, - "grad_norm": 1.5388920307159424, - "learning_rate": 9.011859296482413e-05, - "loss": 6.0003, - "step": 10334 - }, - { - "epoch": 5.389830508474576, - "grad_norm": 1.4506977796554565, - "learning_rate": 9.01175879396985e-05, - "loss": 5.8095, - "step": 10335 - }, - { - "epoch": 5.390352020860496, - "grad_norm": 1.391053557395935, - "learning_rate": 9.011658291457287e-05, - "loss": 6.0712, - "step": 10336 - }, - { - "epoch": 5.390873533246415, - "grad_norm": 1.425901174545288, - "learning_rate": 9.011557788944725e-05, - "loss": 5.5349, - "step": 10337 - }, - { - "epoch": 5.391395045632334, - "grad_norm": 1.3986331224441528, - "learning_rate": 9.011457286432161e-05, - "loss": 5.7131, - "step": 10338 - }, - { - "epoch": 5.391916558018253, - "grad_norm": 1.4293824434280396, - "learning_rate": 9.011356783919599e-05, - "loss": 5.9808, - "step": 10339 - }, - { - "epoch": 5.392438070404172, - "grad_norm": 1.7722313404083252, - "learning_rate": 9.011256281407035e-05, - "loss": 5.4427, - "step": 10340 - }, - { - "epoch": 5.392959582790091, - "grad_norm": 1.4018216133117676, - "learning_rate": 9.011155778894472e-05, - "loss": 5.7794, - "step": 10341 - }, - { - "epoch": 5.393481095176011, - "grad_norm": 1.395029067993164, - "learning_rate": 9.01105527638191e-05, - "loss": 5.6949, - "step": 10342 - }, - { - "epoch": 5.39400260756193, - "grad_norm": 1.4609378576278687, - "learning_rate": 9.010954773869347e-05, - "loss": 5.4976, - "step": 10343 - }, - { - "epoch": 5.394524119947849, - "grad_norm": 1.47409188747406, - "learning_rate": 9.010854271356785e-05, - "loss": 5.5711, - "step": 10344 - }, - { - "epoch": 5.395045632333768, - "grad_norm": 1.441180944442749, - "learning_rate": 9.010753768844222e-05, - "loss": 5.9536, - "step": 10345 - }, - { - "epoch": 5.395567144719687, - "grad_norm": 1.5181047916412354, - "learning_rate": 9.010653266331659e-05, - "loss": 5.8325, - "step": 10346 - }, - { - "epoch": 5.396088657105606, - "grad_norm": 1.6373478174209595, - "learning_rate": 9.010552763819096e-05, - "loss": 5.5028, - "step": 10347 - }, - { - "epoch": 5.396610169491526, - "grad_norm": 1.4294440746307373, - "learning_rate": 9.010452261306533e-05, - "loss": 5.6257, - "step": 10348 - }, - { - "epoch": 5.397131681877444, - "grad_norm": 1.5571463108062744, - "learning_rate": 9.01035175879397e-05, - "loss": 5.0681, - "step": 10349 - }, - { - "epoch": 5.397653194263364, - "grad_norm": 1.546956181526184, - "learning_rate": 9.010251256281408e-05, - "loss": 5.8283, - "step": 10350 - }, - { - "epoch": 5.398174706649283, - "grad_norm": 1.402212142944336, - "learning_rate": 9.010150753768844e-05, - "loss": 5.3116, - "step": 10351 - }, - { - "epoch": 5.398696219035202, - "grad_norm": 1.5935919284820557, - "learning_rate": 9.010050251256282e-05, - "loss": 5.2964, - "step": 10352 - }, - { - "epoch": 5.399217731421121, - "grad_norm": 1.4337387084960938, - "learning_rate": 9.009949748743718e-05, - "loss": 5.4802, - "step": 10353 - }, - { - "epoch": 5.399739243807041, - "grad_norm": 1.3245999813079834, - "learning_rate": 9.009849246231156e-05, - "loss": 5.709, - "step": 10354 - }, - { - "epoch": 5.400260756192959, - "grad_norm": 1.9014265537261963, - "learning_rate": 9.009748743718594e-05, - "loss": 5.4192, - "step": 10355 - }, - { - "epoch": 5.400782268578879, - "grad_norm": 1.7964805364608765, - "learning_rate": 9.00964824120603e-05, - "loss": 5.5363, - "step": 10356 - }, - { - "epoch": 5.401303780964798, - "grad_norm": 1.516506314277649, - "learning_rate": 9.009547738693468e-05, - "loss": 4.9884, - "step": 10357 - }, - { - "epoch": 5.401825293350717, - "grad_norm": 1.4831973314285278, - "learning_rate": 9.009447236180905e-05, - "loss": 5.979, - "step": 10358 - }, - { - "epoch": 5.402346805736636, - "grad_norm": 1.4365599155426025, - "learning_rate": 9.009346733668342e-05, - "loss": 5.5484, - "step": 10359 - }, - { - "epoch": 5.402868318122556, - "grad_norm": 1.5162920951843262, - "learning_rate": 9.009246231155779e-05, - "loss": 5.5688, - "step": 10360 - }, - { - "epoch": 5.403389830508474, - "grad_norm": 1.4060022830963135, - "learning_rate": 9.009145728643217e-05, - "loss": 5.4415, - "step": 10361 - }, - { - "epoch": 5.403911342894394, - "grad_norm": 1.5157737731933594, - "learning_rate": 9.009045226130653e-05, - "loss": 5.4192, - "step": 10362 - }, - { - "epoch": 5.404432855280313, - "grad_norm": 1.3115202188491821, - "learning_rate": 9.008944723618091e-05, - "loss": 6.0647, - "step": 10363 - }, - { - "epoch": 5.404954367666232, - "grad_norm": 1.4188824892044067, - "learning_rate": 9.008844221105529e-05, - "loss": 5.6286, - "step": 10364 - }, - { - "epoch": 5.405475880052151, - "grad_norm": 1.4039252996444702, - "learning_rate": 9.008743718592966e-05, - "loss": 5.7515, - "step": 10365 - }, - { - "epoch": 5.405997392438071, - "grad_norm": 1.4414304494857788, - "learning_rate": 9.008643216080403e-05, - "loss": 6.0306, - "step": 10366 - }, - { - "epoch": 5.406518904823989, - "grad_norm": 1.345015287399292, - "learning_rate": 9.008542713567839e-05, - "loss": 6.0037, - "step": 10367 - }, - { - "epoch": 5.407040417209909, - "grad_norm": 1.5031777620315552, - "learning_rate": 9.008442211055277e-05, - "loss": 5.6039, - "step": 10368 - }, - { - "epoch": 5.407561929595828, - "grad_norm": 1.443379521369934, - "learning_rate": 9.008341708542713e-05, - "loss": 5.6645, - "step": 10369 - }, - { - "epoch": 5.408083441981747, - "grad_norm": 1.408402442932129, - "learning_rate": 9.008241206030151e-05, - "loss": 5.8753, - "step": 10370 - }, - { - "epoch": 5.408604954367666, - "grad_norm": 1.420959711074829, - "learning_rate": 9.008140703517588e-05, - "loss": 5.7371, - "step": 10371 - }, - { - "epoch": 5.409126466753586, - "grad_norm": 1.4725791215896606, - "learning_rate": 9.008040201005025e-05, - "loss": 5.6141, - "step": 10372 - }, - { - "epoch": 5.409647979139504, - "grad_norm": 1.3845386505126953, - "learning_rate": 9.007939698492462e-05, - "loss": 5.6863, - "step": 10373 - }, - { - "epoch": 5.410169491525424, - "grad_norm": 1.3944177627563477, - "learning_rate": 9.0078391959799e-05, - "loss": 5.6025, - "step": 10374 - }, - { - "epoch": 5.410691003911343, - "grad_norm": 1.349969744682312, - "learning_rate": 9.007738693467337e-05, - "loss": 5.8363, - "step": 10375 - }, - { - "epoch": 5.411212516297262, - "grad_norm": 1.4599504470825195, - "learning_rate": 9.007638190954775e-05, - "loss": 5.7438, - "step": 10376 - }, - { - "epoch": 5.411734028683181, - "grad_norm": 1.8842430114746094, - "learning_rate": 9.007537688442212e-05, - "loss": 5.3469, - "step": 10377 - }, - { - "epoch": 5.412255541069101, - "grad_norm": 1.8325340747833252, - "learning_rate": 9.00743718592965e-05, - "loss": 5.3805, - "step": 10378 - }, - { - "epoch": 5.412777053455019, - "grad_norm": 1.5064162015914917, - "learning_rate": 9.007336683417086e-05, - "loss": 5.5533, - "step": 10379 - }, - { - "epoch": 5.413298565840939, - "grad_norm": 1.4795126914978027, - "learning_rate": 9.007236180904524e-05, - "loss": 5.8021, - "step": 10380 - }, - { - "epoch": 5.413820078226858, - "grad_norm": 1.48528254032135, - "learning_rate": 9.00713567839196e-05, - "loss": 5.5695, - "step": 10381 - }, - { - "epoch": 5.414341590612777, - "grad_norm": 1.524933099746704, - "learning_rate": 9.007035175879396e-05, - "loss": 5.5983, - "step": 10382 - }, - { - "epoch": 5.414863102998696, - "grad_norm": 1.5218279361724854, - "learning_rate": 9.006934673366834e-05, - "loss": 5.5254, - "step": 10383 - }, - { - "epoch": 5.415384615384616, - "grad_norm": 1.6243934631347656, - "learning_rate": 9.006834170854272e-05, - "loss": 5.4019, - "step": 10384 - }, - { - "epoch": 5.415906127770534, - "grad_norm": 1.4459525346755981, - "learning_rate": 9.00673366834171e-05, - "loss": 5.9141, - "step": 10385 - }, - { - "epoch": 5.416427640156454, - "grad_norm": 1.442171573638916, - "learning_rate": 9.006633165829146e-05, - "loss": 5.6479, - "step": 10386 - }, - { - "epoch": 5.416949152542373, - "grad_norm": 1.3921960592269897, - "learning_rate": 9.006532663316584e-05, - "loss": 6.0575, - "step": 10387 - }, - { - "epoch": 5.417470664928292, - "grad_norm": 1.4171091318130493, - "learning_rate": 9.00643216080402e-05, - "loss": 5.7419, - "step": 10388 - }, - { - "epoch": 5.417992177314211, - "grad_norm": 1.5791563987731934, - "learning_rate": 9.006331658291458e-05, - "loss": 4.8853, - "step": 10389 - }, - { - "epoch": 5.418513689700131, - "grad_norm": 1.4508466720581055, - "learning_rate": 9.006231155778895e-05, - "loss": 5.8386, - "step": 10390 - }, - { - "epoch": 5.419035202086049, - "grad_norm": 1.5315054655075073, - "learning_rate": 9.006130653266332e-05, - "loss": 5.0505, - "step": 10391 - }, - { - "epoch": 5.419556714471969, - "grad_norm": 1.4277361631393433, - "learning_rate": 9.006030150753769e-05, - "loss": 5.2858, - "step": 10392 - }, - { - "epoch": 5.420078226857888, - "grad_norm": 1.3090296983718872, - "learning_rate": 9.005929648241207e-05, - "loss": 5.8854, - "step": 10393 - }, - { - "epoch": 5.420599739243807, - "grad_norm": 1.3864718675613403, - "learning_rate": 9.005829145728643e-05, - "loss": 5.7432, - "step": 10394 - }, - { - "epoch": 5.421121251629726, - "grad_norm": 1.472598671913147, - "learning_rate": 9.005728643216081e-05, - "loss": 5.8862, - "step": 10395 - }, - { - "epoch": 5.421642764015646, - "grad_norm": 1.3983243703842163, - "learning_rate": 9.005628140703519e-05, - "loss": 5.0842, - "step": 10396 - }, - { - "epoch": 5.422164276401564, - "grad_norm": 1.3727399110794067, - "learning_rate": 9.005527638190955e-05, - "loss": 5.7735, - "step": 10397 - }, - { - "epoch": 5.422685788787484, - "grad_norm": 1.494642972946167, - "learning_rate": 9.005427135678393e-05, - "loss": 5.3708, - "step": 10398 - }, - { - "epoch": 5.423207301173403, - "grad_norm": 1.4355601072311401, - "learning_rate": 9.005326633165829e-05, - "loss": 5.5744, - "step": 10399 - }, - { - "epoch": 5.423728813559322, - "grad_norm": 1.460964560508728, - "learning_rate": 9.005226130653267e-05, - "loss": 5.8205, - "step": 10400 - }, - { - "epoch": 5.424250325945241, - "grad_norm": 1.5543617010116577, - "learning_rate": 9.005125628140703e-05, - "loss": 5.3298, - "step": 10401 - }, - { - "epoch": 5.424771838331161, - "grad_norm": 1.6537758111953735, - "learning_rate": 9.005025125628141e-05, - "loss": 5.3461, - "step": 10402 - }, - { - "epoch": 5.425293350717079, - "grad_norm": 1.6786447763442993, - "learning_rate": 9.004924623115578e-05, - "loss": 5.7416, - "step": 10403 - }, - { - "epoch": 5.425814863102999, - "grad_norm": 1.506950855255127, - "learning_rate": 9.004824120603015e-05, - "loss": 5.6922, - "step": 10404 - }, - { - "epoch": 5.426336375488918, - "grad_norm": 1.486511468887329, - "learning_rate": 9.004723618090453e-05, - "loss": 5.9835, - "step": 10405 - }, - { - "epoch": 5.426857887874837, - "grad_norm": 1.4177052974700928, - "learning_rate": 9.004623115577891e-05, - "loss": 5.6917, - "step": 10406 - }, - { - "epoch": 5.427379400260756, - "grad_norm": 1.8048750162124634, - "learning_rate": 9.004522613065327e-05, - "loss": 5.605, - "step": 10407 - }, - { - "epoch": 5.427900912646676, - "grad_norm": 1.6026074886322021, - "learning_rate": 9.004422110552764e-05, - "loss": 5.6529, - "step": 10408 - }, - { - "epoch": 5.428422425032594, - "grad_norm": 1.5728422403335571, - "learning_rate": 9.004321608040202e-05, - "loss": 5.3737, - "step": 10409 - }, - { - "epoch": 5.428943937418514, - "grad_norm": 1.5810431241989136, - "learning_rate": 9.004221105527638e-05, - "loss": 5.3264, - "step": 10410 - }, - { - "epoch": 5.429465449804433, - "grad_norm": 1.4311530590057373, - "learning_rate": 9.004120603015076e-05, - "loss": 5.6839, - "step": 10411 - }, - { - "epoch": 5.429986962190352, - "grad_norm": 1.4722007513046265, - "learning_rate": 9.004020100502512e-05, - "loss": 5.7458, - "step": 10412 - }, - { - "epoch": 5.430508474576271, - "grad_norm": 1.6334959268569946, - "learning_rate": 9.00391959798995e-05, - "loss": 5.4403, - "step": 10413 - }, - { - "epoch": 5.431029986962191, - "grad_norm": 1.2720927000045776, - "learning_rate": 9.003819095477387e-05, - "loss": 5.9236, - "step": 10414 - }, - { - "epoch": 5.431551499348109, - "grad_norm": 1.6285679340362549, - "learning_rate": 9.003718592964824e-05, - "loss": 5.1026, - "step": 10415 - }, - { - "epoch": 5.432073011734029, - "grad_norm": 1.603682041168213, - "learning_rate": 9.003618090452262e-05, - "loss": 5.4022, - "step": 10416 - }, - { - "epoch": 5.432594524119948, - "grad_norm": 1.5664619207382202, - "learning_rate": 9.0035175879397e-05, - "loss": 5.3534, - "step": 10417 - }, - { - "epoch": 5.433116036505867, - "grad_norm": 1.4744374752044678, - "learning_rate": 9.003417085427136e-05, - "loss": 5.4306, - "step": 10418 - }, - { - "epoch": 5.433637548891786, - "grad_norm": 1.4028116464614868, - "learning_rate": 9.003316582914574e-05, - "loss": 6.0245, - "step": 10419 - }, - { - "epoch": 5.434159061277706, - "grad_norm": 1.508144497871399, - "learning_rate": 9.00321608040201e-05, - "loss": 5.9922, - "step": 10420 - }, - { - "epoch": 5.434680573663624, - "grad_norm": 1.356785535812378, - "learning_rate": 9.003115577889447e-05, - "loss": 5.9281, - "step": 10421 - }, - { - "epoch": 5.435202086049544, - "grad_norm": 1.443791389465332, - "learning_rate": 9.003015075376885e-05, - "loss": 5.7952, - "step": 10422 - }, - { - "epoch": 5.435723598435462, - "grad_norm": 1.430184006690979, - "learning_rate": 9.002914572864321e-05, - "loss": 5.5113, - "step": 10423 - }, - { - "epoch": 5.436245110821382, - "grad_norm": 1.3964896202087402, - "learning_rate": 9.002814070351759e-05, - "loss": 5.8624, - "step": 10424 - }, - { - "epoch": 5.436766623207301, - "grad_norm": 1.4694124460220337, - "learning_rate": 9.002713567839197e-05, - "loss": 5.7595, - "step": 10425 - }, - { - "epoch": 5.437288135593221, - "grad_norm": 1.5289971828460693, - "learning_rate": 9.002613065326634e-05, - "loss": 5.6549, - "step": 10426 - }, - { - "epoch": 5.437809647979139, - "grad_norm": 1.3552371263504028, - "learning_rate": 9.002512562814071e-05, - "loss": 5.6389, - "step": 10427 - }, - { - "epoch": 5.438331160365059, - "grad_norm": 1.3424198627471924, - "learning_rate": 9.002412060301509e-05, - "loss": 5.9506, - "step": 10428 - }, - { - "epoch": 5.438852672750977, - "grad_norm": 1.3919686079025269, - "learning_rate": 9.002311557788945e-05, - "loss": 5.1115, - "step": 10429 - }, - { - "epoch": 5.439374185136897, - "grad_norm": 1.4892040491104126, - "learning_rate": 9.002211055276383e-05, - "loss": 5.6385, - "step": 10430 - }, - { - "epoch": 5.439895697522816, - "grad_norm": 1.4934167861938477, - "learning_rate": 9.00211055276382e-05, - "loss": 5.581, - "step": 10431 - }, - { - "epoch": 5.440417209908736, - "grad_norm": 1.3194286823272705, - "learning_rate": 9.002010050251257e-05, - "loss": 5.5712, - "step": 10432 - }, - { - "epoch": 5.440938722294654, - "grad_norm": 1.5284663438796997, - "learning_rate": 9.001909547738694e-05, - "loss": 4.9961, - "step": 10433 - }, - { - "epoch": 5.441460234680574, - "grad_norm": 1.4860661029815674, - "learning_rate": 9.00180904522613e-05, - "loss": 5.5736, - "step": 10434 - }, - { - "epoch": 5.441981747066492, - "grad_norm": 1.4548406600952148, - "learning_rate": 9.001708542713568e-05, - "loss": 5.8606, - "step": 10435 - }, - { - "epoch": 5.442503259452412, - "grad_norm": 1.420127272605896, - "learning_rate": 9.001608040201006e-05, - "loss": 6.01, - "step": 10436 - }, - { - "epoch": 5.443024771838331, - "grad_norm": 1.5450996160507202, - "learning_rate": 9.001507537688443e-05, - "loss": 5.6367, - "step": 10437 - }, - { - "epoch": 5.443546284224251, - "grad_norm": 1.3860366344451904, - "learning_rate": 9.00140703517588e-05, - "loss": 5.9346, - "step": 10438 - }, - { - "epoch": 5.444067796610169, - "grad_norm": 1.525604486465454, - "learning_rate": 9.001306532663318e-05, - "loss": 5.5312, - "step": 10439 - }, - { - "epoch": 5.444589308996089, - "grad_norm": 1.4250444173812866, - "learning_rate": 9.001206030150754e-05, - "loss": 5.3246, - "step": 10440 - }, - { - "epoch": 5.445110821382007, - "grad_norm": 1.4571789503097534, - "learning_rate": 9.001105527638192e-05, - "loss": 5.5194, - "step": 10441 - }, - { - "epoch": 5.445632333767927, - "grad_norm": 1.4137465953826904, - "learning_rate": 9.001005025125628e-05, - "loss": 5.726, - "step": 10442 - }, - { - "epoch": 5.446153846153846, - "grad_norm": 1.3715574741363525, - "learning_rate": 9.000904522613066e-05, - "loss": 5.3377, - "step": 10443 - }, - { - "epoch": 5.446675358539765, - "grad_norm": 1.4113187789916992, - "learning_rate": 9.000804020100502e-05, - "loss": 5.5306, - "step": 10444 - }, - { - "epoch": 5.447196870925684, - "grad_norm": 1.4104368686676025, - "learning_rate": 9.00070351758794e-05, - "loss": 5.8549, - "step": 10445 - }, - { - "epoch": 5.447718383311604, - "grad_norm": 1.5098892450332642, - "learning_rate": 9.000603015075378e-05, - "loss": 5.3358, - "step": 10446 - }, - { - "epoch": 5.448239895697522, - "grad_norm": 1.4788087606430054, - "learning_rate": 9.000502512562814e-05, - "loss": 5.8211, - "step": 10447 - }, - { - "epoch": 5.448761408083442, - "grad_norm": 1.470460057258606, - "learning_rate": 9.000402010050252e-05, - "loss": 5.5852, - "step": 10448 - }, - { - "epoch": 5.449282920469361, - "grad_norm": 1.3504749536514282, - "learning_rate": 9.000301507537689e-05, - "loss": 5.8415, - "step": 10449 - }, - { - "epoch": 5.44980443285528, - "grad_norm": 1.4444488286972046, - "learning_rate": 9.000201005025126e-05, - "loss": 5.629, - "step": 10450 - }, - { - "epoch": 5.450325945241199, - "grad_norm": 1.4733641147613525, - "learning_rate": 9.000100502512563e-05, - "loss": 5.8316, - "step": 10451 - }, - { - "epoch": 5.450847457627119, - "grad_norm": 1.4013129472732544, - "learning_rate": 9e-05, - "loss": 5.2798, - "step": 10452 - }, - { - "epoch": 5.451368970013037, - "grad_norm": 1.5350227355957031, - "learning_rate": 8.999899497487437e-05, - "loss": 5.8352, - "step": 10453 - }, - { - "epoch": 5.451890482398957, - "grad_norm": 1.4888927936553955, - "learning_rate": 8.999798994974875e-05, - "loss": 5.8298, - "step": 10454 - }, - { - "epoch": 5.452411994784876, - "grad_norm": 1.5187705755233765, - "learning_rate": 8.999698492462311e-05, - "loss": 5.5905, - "step": 10455 - }, - { - "epoch": 5.452933507170795, - "grad_norm": 1.486060380935669, - "learning_rate": 8.999597989949749e-05, - "loss": 5.7646, - "step": 10456 - }, - { - "epoch": 5.453455019556714, - "grad_norm": 1.5799533128738403, - "learning_rate": 8.999497487437187e-05, - "loss": 5.3956, - "step": 10457 - }, - { - "epoch": 5.453976531942634, - "grad_norm": 1.5062335729599, - "learning_rate": 8.999396984924625e-05, - "loss": 5.7285, - "step": 10458 - }, - { - "epoch": 5.4544980443285525, - "grad_norm": 1.3899563550949097, - "learning_rate": 8.999296482412061e-05, - "loss": 5.6554, - "step": 10459 - }, - { - "epoch": 5.455019556714472, - "grad_norm": 1.5313035249710083, - "learning_rate": 8.999195979899497e-05, - "loss": 5.0297, - "step": 10460 - }, - { - "epoch": 5.455541069100391, - "grad_norm": 1.498185157775879, - "learning_rate": 8.999095477386935e-05, - "loss": 5.9457, - "step": 10461 - }, - { - "epoch": 5.45606258148631, - "grad_norm": 1.371760606765747, - "learning_rate": 8.998994974874372e-05, - "loss": 5.8825, - "step": 10462 - }, - { - "epoch": 5.456584093872229, - "grad_norm": 1.4524531364440918, - "learning_rate": 8.99889447236181e-05, - "loss": 5.3893, - "step": 10463 - }, - { - "epoch": 5.457105606258149, - "grad_norm": 1.5481804609298706, - "learning_rate": 8.998793969849246e-05, - "loss": 5.2363, - "step": 10464 - }, - { - "epoch": 5.4576271186440675, - "grad_norm": 1.421221375465393, - "learning_rate": 8.998693467336684e-05, - "loss": 5.6129, - "step": 10465 - }, - { - "epoch": 5.458148631029987, - "grad_norm": 1.669452428817749, - "learning_rate": 8.998592964824121e-05, - "loss": 4.9568, - "step": 10466 - }, - { - "epoch": 5.458670143415906, - "grad_norm": 1.4031566381454468, - "learning_rate": 8.998492462311559e-05, - "loss": 5.6301, - "step": 10467 - }, - { - "epoch": 5.459191655801825, - "grad_norm": 1.5026051998138428, - "learning_rate": 8.998391959798996e-05, - "loss": 5.4623, - "step": 10468 - }, - { - "epoch": 5.459713168187744, - "grad_norm": 1.549522876739502, - "learning_rate": 8.998291457286433e-05, - "loss": 5.6162, - "step": 10469 - }, - { - "epoch": 5.460234680573664, - "grad_norm": 1.5481055974960327, - "learning_rate": 8.99819095477387e-05, - "loss": 5.5638, - "step": 10470 - }, - { - "epoch": 5.4607561929595825, - "grad_norm": 1.3635905981063843, - "learning_rate": 8.998090452261308e-05, - "loss": 5.6781, - "step": 10471 - }, - { - "epoch": 5.461277705345502, - "grad_norm": 1.4630109071731567, - "learning_rate": 8.997989949748744e-05, - "loss": 5.7109, - "step": 10472 - }, - { - "epoch": 5.461799217731421, - "grad_norm": 1.8126877546310425, - "learning_rate": 8.997889447236182e-05, - "loss": 5.443, - "step": 10473 - }, - { - "epoch": 5.46232073011734, - "grad_norm": 1.3666918277740479, - "learning_rate": 8.997788944723618e-05, - "loss": 5.8661, - "step": 10474 - }, - { - "epoch": 5.4628422425032594, - "grad_norm": 1.5255168676376343, - "learning_rate": 8.997688442211055e-05, - "loss": 5.4004, - "step": 10475 - }, - { - "epoch": 5.463363754889179, - "grad_norm": 1.488571286201477, - "learning_rate": 8.997587939698492e-05, - "loss": 5.7884, - "step": 10476 - }, - { - "epoch": 5.4638852672750975, - "grad_norm": 1.5981428623199463, - "learning_rate": 8.99748743718593e-05, - "loss": 5.505, - "step": 10477 - }, - { - "epoch": 5.464406779661017, - "grad_norm": 2.0508005619049072, - "learning_rate": 8.997386934673368e-05, - "loss": 5.2415, - "step": 10478 - }, - { - "epoch": 5.464928292046936, - "grad_norm": 1.5541373491287231, - "learning_rate": 8.997286432160804e-05, - "loss": 5.6101, - "step": 10479 - }, - { - "epoch": 5.465449804432855, - "grad_norm": 1.4701709747314453, - "learning_rate": 8.997185929648242e-05, - "loss": 6.086, - "step": 10480 - }, - { - "epoch": 5.4659713168187745, - "grad_norm": 1.4314124584197998, - "learning_rate": 8.997085427135679e-05, - "loss": 5.6981, - "step": 10481 - }, - { - "epoch": 5.466492829204694, - "grad_norm": 1.411684513092041, - "learning_rate": 8.996984924623116e-05, - "loss": 5.5636, - "step": 10482 - }, - { - "epoch": 5.4670143415906125, - "grad_norm": 1.3733701705932617, - "learning_rate": 8.996884422110553e-05, - "loss": 5.7607, - "step": 10483 - }, - { - "epoch": 5.467535853976532, - "grad_norm": 1.463255763053894, - "learning_rate": 8.99678391959799e-05, - "loss": 5.7746, - "step": 10484 - }, - { - "epoch": 5.468057366362451, - "grad_norm": 2.0161988735198975, - "learning_rate": 8.996683417085427e-05, - "loss": 5.6159, - "step": 10485 - }, - { - "epoch": 5.46857887874837, - "grad_norm": 1.43341863155365, - "learning_rate": 8.996582914572865e-05, - "loss": 5.5757, - "step": 10486 - }, - { - "epoch": 5.4691003911342895, - "grad_norm": 1.4315468072891235, - "learning_rate": 8.996482412060301e-05, - "loss": 5.6547, - "step": 10487 - }, - { - "epoch": 5.469621903520209, - "grad_norm": 1.3538097143173218, - "learning_rate": 8.996381909547739e-05, - "loss": 5.5146, - "step": 10488 - }, - { - "epoch": 5.4701434159061275, - "grad_norm": 1.4585342407226562, - "learning_rate": 8.996281407035177e-05, - "loss": 5.5846, - "step": 10489 - }, - { - "epoch": 5.470664928292047, - "grad_norm": 1.448188304901123, - "learning_rate": 8.996180904522613e-05, - "loss": 5.5696, - "step": 10490 - }, - { - "epoch": 5.4711864406779664, - "grad_norm": 1.3081797361373901, - "learning_rate": 8.996080402010051e-05, - "loss": 5.7939, - "step": 10491 - }, - { - "epoch": 5.471707953063885, - "grad_norm": 1.4029816389083862, - "learning_rate": 8.995979899497487e-05, - "loss": 5.978, - "step": 10492 - }, - { - "epoch": 5.4722294654498045, - "grad_norm": 1.3842742443084717, - "learning_rate": 8.995879396984925e-05, - "loss": 5.7605, - "step": 10493 - }, - { - "epoch": 5.472750977835724, - "grad_norm": 1.3958038091659546, - "learning_rate": 8.995778894472362e-05, - "loss": 5.9076, - "step": 10494 - }, - { - "epoch": 5.4732724902216425, - "grad_norm": 1.5873847007751465, - "learning_rate": 8.9956783919598e-05, - "loss": 5.4344, - "step": 10495 - }, - { - "epoch": 5.473794002607562, - "grad_norm": 1.4464925527572632, - "learning_rate": 8.995577889447236e-05, - "loss": 5.7357, - "step": 10496 - }, - { - "epoch": 5.4743155149934815, - "grad_norm": 1.6261836290359497, - "learning_rate": 8.995477386934674e-05, - "loss": 5.1764, - "step": 10497 - }, - { - "epoch": 5.4748370273794, - "grad_norm": 1.523697018623352, - "learning_rate": 8.995376884422111e-05, - "loss": 5.1895, - "step": 10498 - }, - { - "epoch": 5.4753585397653195, - "grad_norm": 1.4699403047561646, - "learning_rate": 8.995276381909549e-05, - "loss": 5.2165, - "step": 10499 - }, - { - "epoch": 5.475880052151239, - "grad_norm": 1.6217836141586304, - "learning_rate": 8.995175879396986e-05, - "loss": 5.5246, - "step": 10500 - }, - { - "epoch": 5.4764015645371575, - "grad_norm": 1.571385145187378, - "learning_rate": 8.995075376884422e-05, - "loss": 5.4143, - "step": 10501 - }, - { - "epoch": 5.476923076923077, - "grad_norm": 1.463789939880371, - "learning_rate": 8.99497487437186e-05, - "loss": 5.6644, - "step": 10502 - }, - { - "epoch": 5.4774445893089965, - "grad_norm": 1.4198336601257324, - "learning_rate": 8.994874371859296e-05, - "loss": 5.5575, - "step": 10503 - }, - { - "epoch": 5.477966101694915, - "grad_norm": 1.4517531394958496, - "learning_rate": 8.994773869346734e-05, - "loss": 5.8132, - "step": 10504 - }, - { - "epoch": 5.4784876140808345, - "grad_norm": 1.3597767353057861, - "learning_rate": 8.99467336683417e-05, - "loss": 6.0039, - "step": 10505 - }, - { - "epoch": 5.479009126466754, - "grad_norm": 1.5586787462234497, - "learning_rate": 8.994572864321608e-05, - "loss": 5.6402, - "step": 10506 - }, - { - "epoch": 5.4795306388526726, - "grad_norm": 1.3150954246520996, - "learning_rate": 8.994472361809045e-05, - "loss": 5.6866, - "step": 10507 - }, - { - "epoch": 5.480052151238592, - "grad_norm": 1.4818559885025024, - "learning_rate": 8.994371859296483e-05, - "loss": 5.6192, - "step": 10508 - }, - { - "epoch": 5.4805736636245115, - "grad_norm": 1.442344307899475, - "learning_rate": 8.99427135678392e-05, - "loss": 6.0299, - "step": 10509 - }, - { - "epoch": 5.48109517601043, - "grad_norm": 1.401706576347351, - "learning_rate": 8.994170854271358e-05, - "loss": 5.6443, - "step": 10510 - }, - { - "epoch": 5.4816166883963495, - "grad_norm": 1.3801261186599731, - "learning_rate": 8.994070351758795e-05, - "loss": 5.7512, - "step": 10511 - }, - { - "epoch": 5.482138200782269, - "grad_norm": 1.5764576196670532, - "learning_rate": 8.993969849246232e-05, - "loss": 5.3058, - "step": 10512 - }, - { - "epoch": 5.482659713168188, - "grad_norm": 1.63459312915802, - "learning_rate": 8.993869346733669e-05, - "loss": 5.0983, - "step": 10513 - }, - { - "epoch": 5.483181225554107, - "grad_norm": 1.4830843210220337, - "learning_rate": 8.993768844221105e-05, - "loss": 5.9483, - "step": 10514 - }, - { - "epoch": 5.4837027379400265, - "grad_norm": 1.5304527282714844, - "learning_rate": 8.993668341708543e-05, - "loss": 5.6194, - "step": 10515 - }, - { - "epoch": 5.484224250325945, - "grad_norm": 1.499582290649414, - "learning_rate": 8.99356783919598e-05, - "loss": 5.5716, - "step": 10516 - }, - { - "epoch": 5.4847457627118645, - "grad_norm": 1.3592082262039185, - "learning_rate": 8.993467336683417e-05, - "loss": 6.1083, - "step": 10517 - }, - { - "epoch": 5.485267275097783, - "grad_norm": 1.5519745349884033, - "learning_rate": 8.993366834170855e-05, - "loss": 5.4412, - "step": 10518 - }, - { - "epoch": 5.485788787483703, - "grad_norm": 1.4407771825790405, - "learning_rate": 8.993266331658293e-05, - "loss": 5.7656, - "step": 10519 - }, - { - "epoch": 5.486310299869622, - "grad_norm": 1.480281114578247, - "learning_rate": 8.993165829145729e-05, - "loss": 5.4931, - "step": 10520 - }, - { - "epoch": 5.4868318122555415, - "grad_norm": 1.3419010639190674, - "learning_rate": 8.993065326633167e-05, - "loss": 5.6337, - "step": 10521 - }, - { - "epoch": 5.48735332464146, - "grad_norm": 1.4074193239212036, - "learning_rate": 8.992964824120603e-05, - "loss": 5.8925, - "step": 10522 - }, - { - "epoch": 5.4878748370273795, - "grad_norm": 1.3500640392303467, - "learning_rate": 8.992864321608041e-05, - "loss": 5.5958, - "step": 10523 - }, - { - "epoch": 5.488396349413298, - "grad_norm": 1.3122191429138184, - "learning_rate": 8.992763819095478e-05, - "loss": 6.0418, - "step": 10524 - }, - { - "epoch": 5.488917861799218, - "grad_norm": 1.4566152095794678, - "learning_rate": 8.992663316582915e-05, - "loss": 5.188, - "step": 10525 - }, - { - "epoch": 5.489439374185137, - "grad_norm": Infinity, - "learning_rate": 8.992663316582915e-05, - "loss": 5.1295, - "step": 10526 - }, - { - "epoch": 5.4899608865710565, - "grad_norm": 1.4198960065841675, - "learning_rate": 8.992562814070352e-05, - "loss": 5.7096, - "step": 10527 - }, - { - "epoch": 5.490482398956975, - "grad_norm": 1.4923793077468872, - "learning_rate": 8.992462311557788e-05, - "loss": 5.4687, - "step": 10528 - }, - { - "epoch": 5.491003911342895, - "grad_norm": 1.4669264554977417, - "learning_rate": 8.992361809045226e-05, - "loss": 5.2823, - "step": 10529 - }, - { - "epoch": 5.491525423728813, - "grad_norm": 1.3114299774169922, - "learning_rate": 8.992261306532664e-05, - "loss": 5.67, - "step": 10530 - }, - { - "epoch": 5.492046936114733, - "grad_norm": 1.3704609870910645, - "learning_rate": 8.992160804020102e-05, - "loss": 5.568, - "step": 10531 - }, - { - "epoch": 5.492568448500652, - "grad_norm": 1.3184897899627686, - "learning_rate": 8.992060301507538e-05, - "loss": 5.702, - "step": 10532 - }, - { - "epoch": 5.493089960886571, - "grad_norm": 1.3617497682571411, - "learning_rate": 8.991959798994976e-05, - "loss": 5.3451, - "step": 10533 - }, - { - "epoch": 5.49361147327249, - "grad_norm": 1.4746029376983643, - "learning_rate": 8.991859296482412e-05, - "loss": 5.5987, - "step": 10534 - }, - { - "epoch": 5.49413298565841, - "grad_norm": 1.4093337059020996, - "learning_rate": 8.99175879396985e-05, - "loss": 5.7173, - "step": 10535 - }, - { - "epoch": 5.494654498044328, - "grad_norm": 1.4069545269012451, - "learning_rate": 8.991658291457286e-05, - "loss": 5.6955, - "step": 10536 - }, - { - "epoch": 5.495176010430248, - "grad_norm": 1.3906537294387817, - "learning_rate": 8.991557788944724e-05, - "loss": 5.9466, - "step": 10537 - }, - { - "epoch": 5.495697522816167, - "grad_norm": 1.407332181930542, - "learning_rate": 8.99145728643216e-05, - "loss": 5.4268, - "step": 10538 - }, - { - "epoch": 5.496219035202086, - "grad_norm": 1.3363192081451416, - "learning_rate": 8.991356783919598e-05, - "loss": 5.9909, - "step": 10539 - }, - { - "epoch": 5.496740547588005, - "grad_norm": 1.3480929136276245, - "learning_rate": 8.991256281407036e-05, - "loss": 5.886, - "step": 10540 - }, - { - "epoch": 5.497262059973925, - "grad_norm": 1.5294699668884277, - "learning_rate": 8.991155778894473e-05, - "loss": 5.1719, - "step": 10541 - }, - { - "epoch": 5.497783572359843, - "grad_norm": 1.4702763557434082, - "learning_rate": 8.99105527638191e-05, - "loss": 5.5723, - "step": 10542 - }, - { - "epoch": 5.498305084745763, - "grad_norm": 1.4049068689346313, - "learning_rate": 8.990954773869347e-05, - "loss": 5.7262, - "step": 10543 - }, - { - "epoch": 5.498826597131682, - "grad_norm": 1.4722094535827637, - "learning_rate": 8.990854271356785e-05, - "loss": 5.5266, - "step": 10544 - }, - { - "epoch": 5.499348109517601, - "grad_norm": 1.4187508821487427, - "learning_rate": 8.990753768844221e-05, - "loss": 5.7603, - "step": 10545 - }, - { - "epoch": 5.49986962190352, - "grad_norm": 1.4626790285110474, - "learning_rate": 8.990653266331659e-05, - "loss": 5.6251, - "step": 10546 - }, - { - "epoch": 5.50039113428944, - "grad_norm": 1.557761549949646, - "learning_rate": 8.990552763819095e-05, - "loss": 5.1311, - "step": 10547 - }, - { - "epoch": 5.500912646675358, - "grad_norm": 1.5824556350708008, - "learning_rate": 8.990452261306533e-05, - "loss": 5.7965, - "step": 10548 - }, - { - "epoch": 5.501434159061278, - "grad_norm": 1.4628400802612305, - "learning_rate": 8.99035175879397e-05, - "loss": 5.5349, - "step": 10549 - }, - { - "epoch": 5.501955671447197, - "grad_norm": 1.5432307720184326, - "learning_rate": 8.990251256281407e-05, - "loss": 5.6609, - "step": 10550 - }, - { - "epoch": 5.502477183833116, - "grad_norm": 1.4701995849609375, - "learning_rate": 8.990150753768845e-05, - "loss": 5.682, - "step": 10551 - }, - { - "epoch": 5.502998696219035, - "grad_norm": 1.5971382856369019, - "learning_rate": 8.990050251256283e-05, - "loss": 5.4788, - "step": 10552 - }, - { - "epoch": 5.503520208604955, - "grad_norm": 1.3728458881378174, - "learning_rate": 8.989949748743719e-05, - "loss": 5.7085, - "step": 10553 - }, - { - "epoch": 5.504041720990873, - "grad_norm": 1.4562474489212036, - "learning_rate": 8.989849246231157e-05, - "loss": 5.9446, - "step": 10554 - }, - { - "epoch": 5.504563233376793, - "grad_norm": 1.5724762678146362, - "learning_rate": 8.989748743718593e-05, - "loss": 5.259, - "step": 10555 - }, - { - "epoch": 5.505084745762712, - "grad_norm": 1.3382744789123535, - "learning_rate": 8.98964824120603e-05, - "loss": 5.6605, - "step": 10556 - }, - { - "epoch": 5.505606258148631, - "grad_norm": 1.3708651065826416, - "learning_rate": 8.989547738693468e-05, - "loss": 6.0056, - "step": 10557 - }, - { - "epoch": 5.50612777053455, - "grad_norm": 1.4889352321624756, - "learning_rate": 8.989447236180904e-05, - "loss": 5.1108, - "step": 10558 - }, - { - "epoch": 5.50664928292047, - "grad_norm": 1.4954710006713867, - "learning_rate": 8.989346733668342e-05, - "loss": 5.4475, - "step": 10559 - }, - { - "epoch": 5.507170795306388, - "grad_norm": 1.6496992111206055, - "learning_rate": 8.98924623115578e-05, - "loss": 4.9671, - "step": 10560 - }, - { - "epoch": 5.507692307692308, - "grad_norm": 1.398339867591858, - "learning_rate": 8.989145728643217e-05, - "loss": 5.4961, - "step": 10561 - }, - { - "epoch": 5.508213820078227, - "grad_norm": 1.358109474182129, - "learning_rate": 8.989045226130654e-05, - "loss": 5.5646, - "step": 10562 - }, - { - "epoch": 5.508735332464146, - "grad_norm": 1.439739465713501, - "learning_rate": 8.988944723618092e-05, - "loss": 5.8499, - "step": 10563 - }, - { - "epoch": 5.509256844850065, - "grad_norm": 1.5602704286575317, - "learning_rate": 8.988844221105528e-05, - "loss": 5.8156, - "step": 10564 - }, - { - "epoch": 5.509778357235985, - "grad_norm": 1.5778114795684814, - "learning_rate": 8.988743718592966e-05, - "loss": 5.7366, - "step": 10565 - }, - { - "epoch": 5.510299869621903, - "grad_norm": 1.4247666597366333, - "learning_rate": 8.988643216080402e-05, - "loss": 5.5688, - "step": 10566 - }, - { - "epoch": 5.510821382007823, - "grad_norm": 1.6040902137756348, - "learning_rate": 8.98854271356784e-05, - "loss": 5.7247, - "step": 10567 - }, - { - "epoch": 5.511342894393742, - "grad_norm": 1.5974342823028564, - "learning_rate": 8.988442211055276e-05, - "loss": 5.6316, - "step": 10568 - }, - { - "epoch": 5.511864406779661, - "grad_norm": 1.574756145477295, - "learning_rate": 8.988341708542713e-05, - "loss": 5.0064, - "step": 10569 - }, - { - "epoch": 5.51238591916558, - "grad_norm": 1.4884158372879028, - "learning_rate": 8.988241206030151e-05, - "loss": 5.8027, - "step": 10570 - }, - { - "epoch": 5.5129074315515, - "grad_norm": 1.619646430015564, - "learning_rate": 8.988140703517588e-05, - "loss": 5.8179, - "step": 10571 - }, - { - "epoch": 5.513428943937418, - "grad_norm": 1.6148234605789185, - "learning_rate": 8.988040201005026e-05, - "loss": 5.3252, - "step": 10572 - }, - { - "epoch": 5.513950456323338, - "grad_norm": 1.5819690227508545, - "learning_rate": 8.987939698492463e-05, - "loss": 5.5503, - "step": 10573 - }, - { - "epoch": 5.514471968709257, - "grad_norm": 1.5805248022079468, - "learning_rate": 8.9878391959799e-05, - "loss": 5.9462, - "step": 10574 - }, - { - "epoch": 5.514993481095176, - "grad_norm": 1.406213641166687, - "learning_rate": 8.987738693467337e-05, - "loss": 5.845, - "step": 10575 - }, - { - "epoch": 5.515514993481095, - "grad_norm": 1.6024516820907593, - "learning_rate": 8.987638190954775e-05, - "loss": 5.6243, - "step": 10576 - }, - { - "epoch": 5.516036505867015, - "grad_norm": 1.4450137615203857, - "learning_rate": 8.987537688442211e-05, - "loss": 5.6585, - "step": 10577 - }, - { - "epoch": 5.516558018252933, - "grad_norm": 1.4574956893920898, - "learning_rate": 8.987437185929649e-05, - "loss": 5.6695, - "step": 10578 - }, - { - "epoch": 5.517079530638853, - "grad_norm": 1.340336561203003, - "learning_rate": 8.987336683417085e-05, - "loss": 5.7985, - "step": 10579 - }, - { - "epoch": 5.517601043024772, - "grad_norm": 1.2795006036758423, - "learning_rate": 8.987236180904523e-05, - "loss": 5.8606, - "step": 10580 - }, - { - "epoch": 5.518122555410691, - "grad_norm": 1.5590077638626099, - "learning_rate": 8.987135678391961e-05, - "loss": 5.5806, - "step": 10581 - }, - { - "epoch": 5.51864406779661, - "grad_norm": 1.617225170135498, - "learning_rate": 8.987035175879397e-05, - "loss": 4.6873, - "step": 10582 - }, - { - "epoch": 5.51916558018253, - "grad_norm": 1.4698587656021118, - "learning_rate": 8.986934673366835e-05, - "loss": 6.0167, - "step": 10583 - }, - { - "epoch": 5.519687092568448, - "grad_norm": 1.4182524681091309, - "learning_rate": 8.986834170854272e-05, - "loss": 5.7454, - "step": 10584 - }, - { - "epoch": 5.520208604954368, - "grad_norm": 1.429352879524231, - "learning_rate": 8.986733668341709e-05, - "loss": 5.3285, - "step": 10585 - }, - { - "epoch": 5.520730117340287, - "grad_norm": 1.5638583898544312, - "learning_rate": 8.986633165829146e-05, - "loss": 5.0868, - "step": 10586 - }, - { - "epoch": 5.521251629726206, - "grad_norm": 1.6468461751937866, - "learning_rate": 8.986532663316584e-05, - "loss": 5.3661, - "step": 10587 - }, - { - "epoch": 5.521773142112125, - "grad_norm": 1.465616226196289, - "learning_rate": 8.98643216080402e-05, - "loss": 5.5858, - "step": 10588 - }, - { - "epoch": 5.522294654498045, - "grad_norm": 1.4571688175201416, - "learning_rate": 8.986331658291458e-05, - "loss": 5.5763, - "step": 10589 - }, - { - "epoch": 5.522816166883963, - "grad_norm": 1.4036407470703125, - "learning_rate": 8.986231155778894e-05, - "loss": 5.7032, - "step": 10590 - }, - { - "epoch": 5.523337679269883, - "grad_norm": 1.4435793161392212, - "learning_rate": 8.986130653266332e-05, - "loss": 5.3011, - "step": 10591 - }, - { - "epoch": 5.523859191655802, - "grad_norm": 1.4620122909545898, - "learning_rate": 8.98603015075377e-05, - "loss": 5.6311, - "step": 10592 - }, - { - "epoch": 5.524380704041721, - "grad_norm": 1.5172603130340576, - "learning_rate": 8.985929648241207e-05, - "loss": 5.853, - "step": 10593 - }, - { - "epoch": 5.52490221642764, - "grad_norm": 1.4170520305633545, - "learning_rate": 8.985829145728644e-05, - "loss": 5.4437, - "step": 10594 - }, - { - "epoch": 5.52542372881356, - "grad_norm": 1.4288333654403687, - "learning_rate": 8.98572864321608e-05, - "loss": 5.6101, - "step": 10595 - }, - { - "epoch": 5.525945241199478, - "grad_norm": 1.5294800996780396, - "learning_rate": 8.985628140703518e-05, - "loss": 5.4093, - "step": 10596 - }, - { - "epoch": 5.526466753585398, - "grad_norm": 1.3892121315002441, - "learning_rate": 8.985527638190955e-05, - "loss": 5.1796, - "step": 10597 - }, - { - "epoch": 5.526988265971317, - "grad_norm": 1.5848643779754639, - "learning_rate": 8.985427135678392e-05, - "loss": 5.3476, - "step": 10598 - }, - { - "epoch": 5.527509778357236, - "grad_norm": 1.5734790563583374, - "learning_rate": 8.985326633165829e-05, - "loss": 5.5475, - "step": 10599 - }, - { - "epoch": 5.528031290743155, - "grad_norm": 1.8406802415847778, - "learning_rate": 8.985226130653267e-05, - "loss": 5.2144, - "step": 10600 - }, - { - "epoch": 5.528552803129074, - "grad_norm": 1.4999756813049316, - "learning_rate": 8.985125628140704e-05, - "loss": 5.2671, - "step": 10601 - }, - { - "epoch": 5.529074315514993, - "grad_norm": 1.4691054821014404, - "learning_rate": 8.985025125628142e-05, - "loss": 5.7284, - "step": 10602 - }, - { - "epoch": 5.529595827900913, - "grad_norm": 1.4603995084762573, - "learning_rate": 8.984924623115579e-05, - "loss": 5.2496, - "step": 10603 - }, - { - "epoch": 5.530117340286832, - "grad_norm": 1.4993500709533691, - "learning_rate": 8.984824120603016e-05, - "loss": 5.475, - "step": 10604 - }, - { - "epoch": 5.530638852672751, - "grad_norm": 1.489986777305603, - "learning_rate": 8.984723618090453e-05, - "loss": 5.6777, - "step": 10605 - }, - { - "epoch": 5.53116036505867, - "grad_norm": 1.5664591789245605, - "learning_rate": 8.98462311557789e-05, - "loss": 5.1724, - "step": 10606 - }, - { - "epoch": 5.531681877444589, - "grad_norm": 1.8480204343795776, - "learning_rate": 8.984522613065327e-05, - "loss": 4.8966, - "step": 10607 - }, - { - "epoch": 5.532203389830508, - "grad_norm": 1.4696300029754639, - "learning_rate": 8.984422110552763e-05, - "loss": 5.4115, - "step": 10608 - }, - { - "epoch": 5.532724902216428, - "grad_norm": 1.3801639080047607, - "learning_rate": 8.984321608040201e-05, - "loss": 5.6578, - "step": 10609 - }, - { - "epoch": 5.533246414602347, - "grad_norm": 1.5254082679748535, - "learning_rate": 8.984221105527638e-05, - "loss": 5.3079, - "step": 10610 - }, - { - "epoch": 5.533767926988266, - "grad_norm": 1.5073308944702148, - "learning_rate": 8.984120603015075e-05, - "loss": 5.5272, - "step": 10611 - }, - { - "epoch": 5.534289439374185, - "grad_norm": 1.3704605102539062, - "learning_rate": 8.984020100502513e-05, - "loss": 5.6318, - "step": 10612 - }, - { - "epoch": 5.534810951760104, - "grad_norm": 1.4746230840682983, - "learning_rate": 8.983919597989951e-05, - "loss": 5.5075, - "step": 10613 - }, - { - "epoch": 5.535332464146023, - "grad_norm": 1.4431073665618896, - "learning_rate": 8.983819095477387e-05, - "loss": 5.389, - "step": 10614 - }, - { - "epoch": 5.535853976531943, - "grad_norm": 1.351820945739746, - "learning_rate": 8.983718592964825e-05, - "loss": 5.7438, - "step": 10615 - }, - { - "epoch": 5.536375488917862, - "grad_norm": 1.5364855527877808, - "learning_rate": 8.983618090452262e-05, - "loss": 5.4546, - "step": 10616 - }, - { - "epoch": 5.536897001303781, - "grad_norm": 1.5313864946365356, - "learning_rate": 8.9835175879397e-05, - "loss": 5.5136, - "step": 10617 - }, - { - "epoch": 5.5374185136897, - "grad_norm": 1.2924866676330566, - "learning_rate": 8.983417085427136e-05, - "loss": 6.0312, - "step": 10618 - }, - { - "epoch": 5.537940026075619, - "grad_norm": 1.4201401472091675, - "learning_rate": 8.983316582914574e-05, - "loss": 5.7921, - "step": 10619 - }, - { - "epoch": 5.538461538461538, - "grad_norm": 1.2767994403839111, - "learning_rate": 8.98321608040201e-05, - "loss": 6.0358, - "step": 10620 - }, - { - "epoch": 5.538983050847458, - "grad_norm": 1.6226348876953125, - "learning_rate": 8.983115577889448e-05, - "loss": 5.2615, - "step": 10621 - }, - { - "epoch": 5.539504563233377, - "grad_norm": 1.3227152824401855, - "learning_rate": 8.983015075376886e-05, - "loss": 5.7721, - "step": 10622 - }, - { - "epoch": 5.540026075619296, - "grad_norm": 1.4500418901443481, - "learning_rate": 8.982914572864322e-05, - "loss": 5.5227, - "step": 10623 - }, - { - "epoch": 5.540547588005215, - "grad_norm": 1.32511568069458, - "learning_rate": 8.98281407035176e-05, - "loss": 5.6303, - "step": 10624 - }, - { - "epoch": 5.541069100391134, - "grad_norm": 1.4139864444732666, - "learning_rate": 8.982713567839196e-05, - "loss": 5.779, - "step": 10625 - }, - { - "epoch": 5.541590612777053, - "grad_norm": 1.6047974824905396, - "learning_rate": 8.982613065326634e-05, - "loss": 5.8524, - "step": 10626 - }, - { - "epoch": 5.542112125162973, - "grad_norm": 1.4181280136108398, - "learning_rate": 8.98251256281407e-05, - "loss": 5.75, - "step": 10627 - }, - { - "epoch": 5.542633637548892, - "grad_norm": 1.4477341175079346, - "learning_rate": 8.982412060301508e-05, - "loss": 5.327, - "step": 10628 - }, - { - "epoch": 5.543155149934811, - "grad_norm": 1.451240062713623, - "learning_rate": 8.982311557788945e-05, - "loss": 5.4207, - "step": 10629 - }, - { - "epoch": 5.54367666232073, - "grad_norm": 1.4802944660186768, - "learning_rate": 8.982211055276382e-05, - "loss": 5.4428, - "step": 10630 - }, - { - "epoch": 5.544198174706649, - "grad_norm": 1.5272756814956665, - "learning_rate": 8.982110552763819e-05, - "loss": 5.185, - "step": 10631 - }, - { - "epoch": 5.544719687092568, - "grad_norm": 1.4353892803192139, - "learning_rate": 8.982010050251257e-05, - "loss": 5.6617, - "step": 10632 - }, - { - "epoch": 5.545241199478488, - "grad_norm": 1.3849009275436401, - "learning_rate": 8.981909547738694e-05, - "loss": 5.8204, - "step": 10633 - }, - { - "epoch": 5.545762711864406, - "grad_norm": 1.441770315170288, - "learning_rate": 8.981809045226131e-05, - "loss": 5.4206, - "step": 10634 - }, - { - "epoch": 5.546284224250326, - "grad_norm": 1.3511791229248047, - "learning_rate": 8.981708542713569e-05, - "loss": 6.1352, - "step": 10635 - }, - { - "epoch": 5.546805736636245, - "grad_norm": 1.5235979557037354, - "learning_rate": 8.981608040201005e-05, - "loss": 5.3545, - "step": 10636 - }, - { - "epoch": 5.547327249022164, - "grad_norm": 1.431562900543213, - "learning_rate": 8.981507537688443e-05, - "loss": 5.7072, - "step": 10637 - }, - { - "epoch": 5.547848761408083, - "grad_norm": 1.5068142414093018, - "learning_rate": 8.981407035175879e-05, - "loss": 5.2638, - "step": 10638 - }, - { - "epoch": 5.548370273794003, - "grad_norm": 1.5173410177230835, - "learning_rate": 8.981306532663317e-05, - "loss": 5.5781, - "step": 10639 - }, - { - "epoch": 5.548891786179921, - "grad_norm": 1.4697211980819702, - "learning_rate": 8.981206030150753e-05, - "loss": 5.7283, - "step": 10640 - }, - { - "epoch": 5.549413298565841, - "grad_norm": 1.4464901685714722, - "learning_rate": 8.981105527638191e-05, - "loss": 5.6286, - "step": 10641 - }, - { - "epoch": 5.54993481095176, - "grad_norm": 1.5917302370071411, - "learning_rate": 8.981005025125629e-05, - "loss": 5.4144, - "step": 10642 - }, - { - "epoch": 5.550456323337679, - "grad_norm": 1.7045180797576904, - "learning_rate": 8.980904522613067e-05, - "loss": 5.1914, - "step": 10643 - }, - { - "epoch": 5.550977835723598, - "grad_norm": 1.456911325454712, - "learning_rate": 8.980804020100503e-05, - "loss": 5.5286, - "step": 10644 - }, - { - "epoch": 5.551499348109518, - "grad_norm": 1.3162492513656616, - "learning_rate": 8.980703517587941e-05, - "loss": 5.6386, - "step": 10645 - }, - { - "epoch": 5.552020860495436, - "grad_norm": 1.572289228439331, - "learning_rate": 8.980603015075377e-05, - "loss": 5.259, - "step": 10646 - }, - { - "epoch": 5.552542372881356, - "grad_norm": 1.5026469230651855, - "learning_rate": 8.980502512562815e-05, - "loss": 5.6571, - "step": 10647 - }, - { - "epoch": 5.553063885267275, - "grad_norm": 1.4612823724746704, - "learning_rate": 8.980402010050252e-05, - "loss": 5.9021, - "step": 10648 - }, - { - "epoch": 5.553585397653194, - "grad_norm": 1.4366511106491089, - "learning_rate": 8.980301507537688e-05, - "loss": 5.5088, - "step": 10649 - }, - { - "epoch": 5.554106910039113, - "grad_norm": 1.3544087409973145, - "learning_rate": 8.980201005025126e-05, - "loss": 6.0035, - "step": 10650 - }, - { - "epoch": 5.554628422425033, - "grad_norm": 1.4471513032913208, - "learning_rate": 8.980100502512562e-05, - "loss": 5.6498, - "step": 10651 - }, - { - "epoch": 5.555149934810951, - "grad_norm": 1.4358118772506714, - "learning_rate": 8.98e-05, - "loss": 5.2415, - "step": 10652 - }, - { - "epoch": 5.555671447196871, - "grad_norm": 1.5437648296356201, - "learning_rate": 8.979899497487438e-05, - "loss": 5.0231, - "step": 10653 - }, - { - "epoch": 5.55619295958279, - "grad_norm": 1.5040374994277954, - "learning_rate": 8.979798994974876e-05, - "loss": 5.4041, - "step": 10654 - }, - { - "epoch": 5.556714471968709, - "grad_norm": 1.440911054611206, - "learning_rate": 8.979698492462312e-05, - "loss": 5.7352, - "step": 10655 - }, - { - "epoch": 5.557235984354628, - "grad_norm": 1.463666558265686, - "learning_rate": 8.97959798994975e-05, - "loss": 5.5876, - "step": 10656 - }, - { - "epoch": 5.557757496740548, - "grad_norm": 1.5259830951690674, - "learning_rate": 8.979497487437186e-05, - "loss": 5.2268, - "step": 10657 - }, - { - "epoch": 5.558279009126466, - "grad_norm": 1.4595916271209717, - "learning_rate": 8.979396984924624e-05, - "loss": 5.8421, - "step": 10658 - }, - { - "epoch": 5.558800521512386, - "grad_norm": 1.4905551671981812, - "learning_rate": 8.97929648241206e-05, - "loss": 5.5621, - "step": 10659 - }, - { - "epoch": 5.559322033898305, - "grad_norm": 1.4744524955749512, - "learning_rate": 8.979195979899498e-05, - "loss": 5.705, - "step": 10660 - }, - { - "epoch": 5.559843546284224, - "grad_norm": 1.5570566654205322, - "learning_rate": 8.979095477386935e-05, - "loss": 5.7084, - "step": 10661 - }, - { - "epoch": 5.560365058670143, - "grad_norm": 1.5039702653884888, - "learning_rate": 8.978994974874371e-05, - "loss": 5.3165, - "step": 10662 - }, - { - "epoch": 5.560886571056063, - "grad_norm": 1.373837947845459, - "learning_rate": 8.978894472361809e-05, - "loss": 5.6137, - "step": 10663 - }, - { - "epoch": 5.5614080834419815, - "grad_norm": 1.5403666496276855, - "learning_rate": 8.978793969849247e-05, - "loss": 5.9779, - "step": 10664 - }, - { - "epoch": 5.561929595827901, - "grad_norm": 1.3370065689086914, - "learning_rate": 8.978693467336684e-05, - "loss": 5.9706, - "step": 10665 - }, - { - "epoch": 5.56245110821382, - "grad_norm": 1.425170660018921, - "learning_rate": 8.978592964824121e-05, - "loss": 5.4408, - "step": 10666 - }, - { - "epoch": 5.562972620599739, - "grad_norm": 1.4637962579727173, - "learning_rate": 8.978492462311559e-05, - "loss": 5.1684, - "step": 10667 - }, - { - "epoch": 5.563494132985658, - "grad_norm": 1.6192643642425537, - "learning_rate": 8.978391959798995e-05, - "loss": 5.5964, - "step": 10668 - }, - { - "epoch": 5.564015645371578, - "grad_norm": 1.5467052459716797, - "learning_rate": 8.978291457286433e-05, - "loss": 4.8645, - "step": 10669 - }, - { - "epoch": 5.5645371577574965, - "grad_norm": 1.4735559225082397, - "learning_rate": 8.97819095477387e-05, - "loss": 5.4924, - "step": 10670 - }, - { - "epoch": 5.565058670143416, - "grad_norm": 1.4907965660095215, - "learning_rate": 8.978090452261307e-05, - "loss": 5.2758, - "step": 10671 - }, - { - "epoch": 5.565580182529335, - "grad_norm": 1.443861484527588, - "learning_rate": 8.977989949748744e-05, - "loss": 5.9669, - "step": 10672 - }, - { - "epoch": 5.566101694915254, - "grad_norm": 1.4585715532302856, - "learning_rate": 8.977889447236181e-05, - "loss": 5.257, - "step": 10673 - }, - { - "epoch": 5.566623207301173, - "grad_norm": 1.4580118656158447, - "learning_rate": 8.977788944723619e-05, - "loss": 5.4391, - "step": 10674 - }, - { - "epoch": 5.567144719687093, - "grad_norm": 1.3736151456832886, - "learning_rate": 8.977688442211056e-05, - "loss": 5.4986, - "step": 10675 - }, - { - "epoch": 5.5676662320730115, - "grad_norm": 1.4873274564743042, - "learning_rate": 8.977587939698493e-05, - "loss": 5.3497, - "step": 10676 - }, - { - "epoch": 5.568187744458931, - "grad_norm": 1.4092762470245361, - "learning_rate": 8.97748743718593e-05, - "loss": 5.8026, - "step": 10677 - }, - { - "epoch": 5.56870925684485, - "grad_norm": 1.4899189472198486, - "learning_rate": 8.977386934673368e-05, - "loss": 5.0795, - "step": 10678 - }, - { - "epoch": 5.569230769230769, - "grad_norm": 1.4775065183639526, - "learning_rate": 8.977286432160804e-05, - "loss": 5.588, - "step": 10679 - }, - { - "epoch": 5.5697522816166884, - "grad_norm": 1.4677801132202148, - "learning_rate": 8.977185929648242e-05, - "loss": 5.5959, - "step": 10680 - }, - { - "epoch": 5.570273794002608, - "grad_norm": 1.4771485328674316, - "learning_rate": 8.977085427135678e-05, - "loss": 5.6068, - "step": 10681 - }, - { - "epoch": 5.5707953063885265, - "grad_norm": 1.5151886940002441, - "learning_rate": 8.976984924623116e-05, - "loss": 5.7509, - "step": 10682 - }, - { - "epoch": 5.571316818774446, - "grad_norm": 1.349482774734497, - "learning_rate": 8.976884422110552e-05, - "loss": 5.5763, - "step": 10683 - }, - { - "epoch": 5.571838331160365, - "grad_norm": 1.3438308238983154, - "learning_rate": 8.97678391959799e-05, - "loss": 5.8156, - "step": 10684 - }, - { - "epoch": 5.572359843546284, - "grad_norm": 1.5698109865188599, - "learning_rate": 8.976683417085428e-05, - "loss": 4.9264, - "step": 10685 - }, - { - "epoch": 5.5728813559322035, - "grad_norm": 1.441450595855713, - "learning_rate": 8.976582914572866e-05, - "loss": 5.7915, - "step": 10686 - }, - { - "epoch": 5.573402868318123, - "grad_norm": 1.3790888786315918, - "learning_rate": 8.976482412060302e-05, - "loss": 5.5852, - "step": 10687 - }, - { - "epoch": 5.5739243807040415, - "grad_norm": 1.468972086906433, - "learning_rate": 8.976381909547739e-05, - "loss": 5.6414, - "step": 10688 - }, - { - "epoch": 5.574445893089961, - "grad_norm": 1.307621717453003, - "learning_rate": 8.976281407035176e-05, - "loss": 5.2198, - "step": 10689 - }, - { - "epoch": 5.57496740547588, - "grad_norm": 1.4904778003692627, - "learning_rate": 8.976180904522613e-05, - "loss": 5.4697, - "step": 10690 - }, - { - "epoch": 5.575488917861799, - "grad_norm": 1.4442285299301147, - "learning_rate": 8.97608040201005e-05, - "loss": 6.0406, - "step": 10691 - }, - { - "epoch": 5.5760104302477185, - "grad_norm": 1.4455108642578125, - "learning_rate": 8.975979899497487e-05, - "loss": 5.5153, - "step": 10692 - }, - { - "epoch": 5.576531942633638, - "grad_norm": 1.4157465696334839, - "learning_rate": 8.975879396984925e-05, - "loss": 5.4922, - "step": 10693 - }, - { - "epoch": 5.5770534550195565, - "grad_norm": 1.4414554834365845, - "learning_rate": 8.975778894472363e-05, - "loss": 5.6488, - "step": 10694 - }, - { - "epoch": 5.577574967405476, - "grad_norm": 1.3763607740402222, - "learning_rate": 8.9756783919598e-05, - "loss": 5.5192, - "step": 10695 - }, - { - "epoch": 5.578096479791395, - "grad_norm": 1.4902335405349731, - "learning_rate": 8.975577889447237e-05, - "loss": 5.6269, - "step": 10696 - }, - { - "epoch": 5.578617992177314, - "grad_norm": 1.3486636877059937, - "learning_rate": 8.975477386934675e-05, - "loss": 5.8181, - "step": 10697 - }, - { - "epoch": 5.5791395045632335, - "grad_norm": 1.4632179737091064, - "learning_rate": 8.975376884422111e-05, - "loss": 5.1638, - "step": 10698 - }, - { - "epoch": 5.579661016949153, - "grad_norm": 1.8298215866088867, - "learning_rate": 8.975276381909549e-05, - "loss": 5.6288, - "step": 10699 - }, - { - "epoch": 5.5801825293350715, - "grad_norm": 1.473177194595337, - "learning_rate": 8.975175879396985e-05, - "loss": 5.7395, - "step": 10700 - }, - { - "epoch": 5.580704041720991, - "grad_norm": 1.4965496063232422, - "learning_rate": 8.975075376884422e-05, - "loss": 5.8979, - "step": 10701 - }, - { - "epoch": 5.58122555410691, - "grad_norm": 1.496484637260437, - "learning_rate": 8.97497487437186e-05, - "loss": 5.6839, - "step": 10702 - }, - { - "epoch": 5.581747066492829, - "grad_norm": 1.6331276893615723, - "learning_rate": 8.974874371859296e-05, - "loss": 5.6053, - "step": 10703 - }, - { - "epoch": 5.5822685788787485, - "grad_norm": 1.4352926015853882, - "learning_rate": 8.974773869346734e-05, - "loss": 5.7001, - "step": 10704 - }, - { - "epoch": 5.582790091264668, - "grad_norm": 1.5482256412506104, - "learning_rate": 8.974673366834171e-05, - "loss": 5.3971, - "step": 10705 - }, - { - "epoch": 5.5833116036505865, - "grad_norm": 1.4308723211288452, - "learning_rate": 8.974572864321609e-05, - "loss": 5.8068, - "step": 10706 - }, - { - "epoch": 5.583833116036506, - "grad_norm": 1.468033790588379, - "learning_rate": 8.974472361809046e-05, - "loss": 5.4917, - "step": 10707 - }, - { - "epoch": 5.584354628422425, - "grad_norm": 1.4369900226593018, - "learning_rate": 8.974371859296483e-05, - "loss": 5.5902, - "step": 10708 - }, - { - "epoch": 5.584876140808344, - "grad_norm": 1.4233335256576538, - "learning_rate": 8.97427135678392e-05, - "loss": 5.9019, - "step": 10709 - }, - { - "epoch": 5.5853976531942635, - "grad_norm": 1.5117610692977905, - "learning_rate": 8.974170854271358e-05, - "loss": 5.5663, - "step": 10710 - }, - { - "epoch": 5.585919165580183, - "grad_norm": 1.6657543182373047, - "learning_rate": 8.974070351758794e-05, - "loss": 5.517, - "step": 10711 - }, - { - "epoch": 5.5864406779661016, - "grad_norm": 1.5158084630966187, - "learning_rate": 8.973969849246232e-05, - "loss": 5.2128, - "step": 10712 - }, - { - "epoch": 5.586962190352021, - "grad_norm": 1.4552608728408813, - "learning_rate": 8.973869346733668e-05, - "loss": 5.2406, - "step": 10713 - }, - { - "epoch": 5.58748370273794, - "grad_norm": 1.4881843328475952, - "learning_rate": 8.973768844221106e-05, - "loss": 5.299, - "step": 10714 - }, - { - "epoch": 5.588005215123859, - "grad_norm": 1.497010588645935, - "learning_rate": 8.973668341708544e-05, - "loss": 5.6019, - "step": 10715 - }, - { - "epoch": 5.5885267275097785, - "grad_norm": 1.6426019668579102, - "learning_rate": 8.97356783919598e-05, - "loss": 5.6731, - "step": 10716 - }, - { - "epoch": 5.589048239895698, - "grad_norm": 1.3704931735992432, - "learning_rate": 8.973467336683418e-05, - "loss": 5.5186, - "step": 10717 - }, - { - "epoch": 5.589569752281617, - "grad_norm": 1.3988368511199951, - "learning_rate": 8.973366834170854e-05, - "loss": 5.4417, - "step": 10718 - }, - { - "epoch": 5.590091264667536, - "grad_norm": 1.3900200128555298, - "learning_rate": 8.973266331658292e-05, - "loss": 5.7314, - "step": 10719 - }, - { - "epoch": 5.590612777053455, - "grad_norm": 1.4626874923706055, - "learning_rate": 8.973165829145729e-05, - "loss": 5.5044, - "step": 10720 - }, - { - "epoch": 5.591134289439374, - "grad_norm": 1.4698872566223145, - "learning_rate": 8.973065326633166e-05, - "loss": 5.5728, - "step": 10721 - }, - { - "epoch": 5.5916558018252935, - "grad_norm": 1.391894817352295, - "learning_rate": 8.972964824120603e-05, - "loss": 5.8829, - "step": 10722 - }, - { - "epoch": 5.592177314211213, - "grad_norm": 1.3896067142486572, - "learning_rate": 8.97286432160804e-05, - "loss": 5.8358, - "step": 10723 - }, - { - "epoch": 5.592698826597132, - "grad_norm": 1.4350017309188843, - "learning_rate": 8.972763819095477e-05, - "loss": 5.7083, - "step": 10724 - }, - { - "epoch": 5.593220338983051, - "grad_norm": 1.4307115077972412, - "learning_rate": 8.972663316582915e-05, - "loss": 5.4359, - "step": 10725 - }, - { - "epoch": 5.59374185136897, - "grad_norm": 2.6177988052368164, - "learning_rate": 8.972562814070353e-05, - "loss": 5.0889, - "step": 10726 - }, - { - "epoch": 5.594263363754889, - "grad_norm": 1.5480645895004272, - "learning_rate": 8.972462311557789e-05, - "loss": 5.5142, - "step": 10727 - }, - { - "epoch": 5.5947848761408085, - "grad_norm": 1.384631872177124, - "learning_rate": 8.972361809045227e-05, - "loss": 5.7605, - "step": 10728 - }, - { - "epoch": 5.595306388526727, - "grad_norm": 1.7982373237609863, - "learning_rate": 8.972261306532663e-05, - "loss": 4.7923, - "step": 10729 - }, - { - "epoch": 5.595827900912647, - "grad_norm": 1.5866572856903076, - "learning_rate": 8.972160804020101e-05, - "loss": 5.2396, - "step": 10730 - }, - { - "epoch": 5.596349413298566, - "grad_norm": 1.5016323328018188, - "learning_rate": 8.972060301507537e-05, - "loss": 5.9116, - "step": 10731 - }, - { - "epoch": 5.596870925684485, - "grad_norm": 1.4689751863479614, - "learning_rate": 8.971959798994975e-05, - "loss": 5.7532, - "step": 10732 - }, - { - "epoch": 5.597392438070404, - "grad_norm": 1.4844993352890015, - "learning_rate": 8.971859296482412e-05, - "loss": 5.8688, - "step": 10733 - }, - { - "epoch": 5.597913950456324, - "grad_norm": 1.5646308660507202, - "learning_rate": 8.97175879396985e-05, - "loss": 5.5851, - "step": 10734 - }, - { - "epoch": 5.598435462842242, - "grad_norm": 1.4569100141525269, - "learning_rate": 8.971658291457287e-05, - "loss": 5.8676, - "step": 10735 - }, - { - "epoch": 5.598956975228162, - "grad_norm": 1.5433564186096191, - "learning_rate": 8.971557788944725e-05, - "loss": 4.9837, - "step": 10736 - }, - { - "epoch": 5.599478487614081, - "grad_norm": 1.4119356870651245, - "learning_rate": 8.971457286432161e-05, - "loss": 5.6885, - "step": 10737 - }, - { - "epoch": 5.6, - "grad_norm": 1.3119248151779175, - "learning_rate": 8.971356783919599e-05, - "loss": 5.923, - "step": 10738 - }, - { - "epoch": 5.600521512385919, - "grad_norm": 1.4582245349884033, - "learning_rate": 8.971256281407036e-05, - "loss": 5.2739, - "step": 10739 - }, - { - "epoch": 5.601043024771839, - "grad_norm": 1.5176348686218262, - "learning_rate": 8.971155778894473e-05, - "loss": 5.6583, - "step": 10740 - }, - { - "epoch": 5.601564537157757, - "grad_norm": 1.5285310745239258, - "learning_rate": 8.97105527638191e-05, - "loss": 5.5315, - "step": 10741 - }, - { - "epoch": 5.602086049543677, - "grad_norm": 1.5655077695846558, - "learning_rate": 8.970954773869346e-05, - "loss": 5.3607, - "step": 10742 - }, - { - "epoch": 5.602607561929596, - "grad_norm": 1.5796623229980469, - "learning_rate": 8.970854271356784e-05, - "loss": 5.2698, - "step": 10743 - }, - { - "epoch": 5.603129074315515, - "grad_norm": 1.4663238525390625, - "learning_rate": 8.97075376884422e-05, - "loss": 5.6631, - "step": 10744 - }, - { - "epoch": 5.603650586701434, - "grad_norm": 1.4652966260910034, - "learning_rate": 8.970653266331658e-05, - "loss": 5.6178, - "step": 10745 - }, - { - "epoch": 5.604172099087354, - "grad_norm": 1.7010948657989502, - "learning_rate": 8.970552763819096e-05, - "loss": 4.4571, - "step": 10746 - }, - { - "epoch": 5.604693611473272, - "grad_norm": 1.422619104385376, - "learning_rate": 8.970452261306534e-05, - "loss": 5.6791, - "step": 10747 - }, - { - "epoch": 5.605215123859192, - "grad_norm": 1.5135239362716675, - "learning_rate": 8.97035175879397e-05, - "loss": 5.024, - "step": 10748 - }, - { - "epoch": 5.605736636245111, - "grad_norm": 1.374961495399475, - "learning_rate": 8.970251256281408e-05, - "loss": 5.9613, - "step": 10749 - }, - { - "epoch": 5.60625814863103, - "grad_norm": 1.4520517587661743, - "learning_rate": 8.970150753768845e-05, - "loss": 5.6929, - "step": 10750 - }, - { - "epoch": 5.606779661016949, - "grad_norm": 1.4950183629989624, - "learning_rate": 8.970050251256282e-05, - "loss": 5.6589, - "step": 10751 - }, - { - "epoch": 5.607301173402869, - "grad_norm": 1.620504379272461, - "learning_rate": 8.969949748743719e-05, - "loss": 5.3102, - "step": 10752 - }, - { - "epoch": 5.607822685788787, - "grad_norm": 1.5027135610580444, - "learning_rate": 8.969849246231157e-05, - "loss": 5.3994, - "step": 10753 - }, - { - "epoch": 5.608344198174707, - "grad_norm": 1.611693024635315, - "learning_rate": 8.969748743718593e-05, - "loss": 5.3491, - "step": 10754 - }, - { - "epoch": 5.608865710560626, - "grad_norm": 1.4383951425552368, - "learning_rate": 8.969648241206031e-05, - "loss": 5.6933, - "step": 10755 - }, - { - "epoch": 5.609387222946545, - "grad_norm": 1.414306402206421, - "learning_rate": 8.969547738693469e-05, - "loss": 5.8188, - "step": 10756 - }, - { - "epoch": 5.609908735332464, - "grad_norm": 1.493955135345459, - "learning_rate": 8.969447236180905e-05, - "loss": 5.1613, - "step": 10757 - }, - { - "epoch": 5.610430247718384, - "grad_norm": 1.6525912284851074, - "learning_rate": 8.969346733668343e-05, - "loss": 5.8144, - "step": 10758 - }, - { - "epoch": 5.610951760104302, - "grad_norm": 1.40420401096344, - "learning_rate": 8.969246231155779e-05, - "loss": 5.5869, - "step": 10759 - }, - { - "epoch": 5.611473272490222, - "grad_norm": 1.5402380228042603, - "learning_rate": 8.969145728643217e-05, - "loss": 5.3246, - "step": 10760 - }, - { - "epoch": 5.611994784876141, - "grad_norm": 1.4089709520339966, - "learning_rate": 8.969045226130653e-05, - "loss": 5.8276, - "step": 10761 - }, - { - "epoch": 5.61251629726206, - "grad_norm": 1.4307835102081299, - "learning_rate": 8.968944723618091e-05, - "loss": 5.7339, - "step": 10762 - }, - { - "epoch": 5.613037809647979, - "grad_norm": 1.4012186527252197, - "learning_rate": 8.968844221105528e-05, - "loss": 5.5585, - "step": 10763 - }, - { - "epoch": 5.613559322033899, - "grad_norm": 1.3981788158416748, - "learning_rate": 8.968743718592965e-05, - "loss": 6.0062, - "step": 10764 - }, - { - "epoch": 5.614080834419817, - "grad_norm": 1.535621166229248, - "learning_rate": 8.968643216080402e-05, - "loss": 5.3075, - "step": 10765 - }, - { - "epoch": 5.614602346805737, - "grad_norm": 1.3954375982284546, - "learning_rate": 8.96854271356784e-05, - "loss": 5.8018, - "step": 10766 - }, - { - "epoch": 5.615123859191656, - "grad_norm": 1.4822372198104858, - "learning_rate": 8.968442211055277e-05, - "loss": 5.6684, - "step": 10767 - }, - { - "epoch": 5.615645371577575, - "grad_norm": 1.35394287109375, - "learning_rate": 8.968341708542714e-05, - "loss": 5.7385, - "step": 10768 - }, - { - "epoch": 5.616166883963494, - "grad_norm": 1.413093090057373, - "learning_rate": 8.968241206030152e-05, - "loss": 5.8517, - "step": 10769 - }, - { - "epoch": 5.616688396349414, - "grad_norm": 1.531387209892273, - "learning_rate": 8.968140703517588e-05, - "loss": 5.2694, - "step": 10770 - }, - { - "epoch": 5.617209908735332, - "grad_norm": 1.4426822662353516, - "learning_rate": 8.968040201005026e-05, - "loss": 5.9047, - "step": 10771 - }, - { - "epoch": 5.617731421121252, - "grad_norm": 1.2957112789154053, - "learning_rate": 8.967939698492462e-05, - "loss": 5.0518, - "step": 10772 - }, - { - "epoch": 5.618252933507171, - "grad_norm": 1.6042314767837524, - "learning_rate": 8.9678391959799e-05, - "loss": 5.7134, - "step": 10773 - }, - { - "epoch": 5.61877444589309, - "grad_norm": 1.4272891283035278, - "learning_rate": 8.967738693467336e-05, - "loss": 6.0769, - "step": 10774 - }, - { - "epoch": 5.619295958279009, - "grad_norm": 1.5628610849380493, - "learning_rate": 8.967638190954774e-05, - "loss": 5.6745, - "step": 10775 - }, - { - "epoch": 5.619817470664929, - "grad_norm": 1.3754807710647583, - "learning_rate": 8.967537688442212e-05, - "loss": 5.8278, - "step": 10776 - }, - { - "epoch": 5.620338983050847, - "grad_norm": 1.4431676864624023, - "learning_rate": 8.96743718592965e-05, - "loss": 5.7195, - "step": 10777 - }, - { - "epoch": 5.620860495436767, - "grad_norm": 1.5327861309051514, - "learning_rate": 8.967336683417086e-05, - "loss": 5.4871, - "step": 10778 - }, - { - "epoch": 5.621382007822686, - "grad_norm": 1.5918577909469604, - "learning_rate": 8.967236180904524e-05, - "loss": 5.5217, - "step": 10779 - }, - { - "epoch": 5.621903520208605, - "grad_norm": 1.4155633449554443, - "learning_rate": 8.96713567839196e-05, - "loss": 5.6386, - "step": 10780 - }, - { - "epoch": 5.622425032594524, - "grad_norm": 1.5881943702697754, - "learning_rate": 8.967035175879397e-05, - "loss": 5.9649, - "step": 10781 - }, - { - "epoch": 5.622946544980444, - "grad_norm": 1.3059442043304443, - "learning_rate": 8.966934673366835e-05, - "loss": 5.9676, - "step": 10782 - }, - { - "epoch": 5.623468057366362, - "grad_norm": 1.6797270774841309, - "learning_rate": 8.966834170854271e-05, - "loss": 5.5308, - "step": 10783 - }, - { - "epoch": 5.623989569752282, - "grad_norm": 1.5364915132522583, - "learning_rate": 8.966733668341709e-05, - "loss": 5.4937, - "step": 10784 - }, - { - "epoch": 5.624511082138201, - "grad_norm": 1.6600849628448486, - "learning_rate": 8.966633165829145e-05, - "loss": 5.5003, - "step": 10785 - }, - { - "epoch": 5.62503259452412, - "grad_norm": 1.4681226015090942, - "learning_rate": 8.966532663316583e-05, - "loss": 5.872, - "step": 10786 - }, - { - "epoch": 5.625554106910039, - "grad_norm": 1.4834232330322266, - "learning_rate": 8.966432160804021e-05, - "loss": 5.0439, - "step": 10787 - }, - { - "epoch": 5.626075619295959, - "grad_norm": 1.5923528671264648, - "learning_rate": 8.966331658291459e-05, - "loss": 5.4792, - "step": 10788 - }, - { - "epoch": 5.626597131681877, - "grad_norm": 1.408738613128662, - "learning_rate": 8.966231155778895e-05, - "loss": 5.4845, - "step": 10789 - }, - { - "epoch": 5.627118644067797, - "grad_norm": 1.4079418182373047, - "learning_rate": 8.966130653266333e-05, - "loss": 5.9044, - "step": 10790 - }, - { - "epoch": 5.627640156453715, - "grad_norm": 1.492214322090149, - "learning_rate": 8.966030150753769e-05, - "loss": 5.3748, - "step": 10791 - }, - { - "epoch": 5.628161668839635, - "grad_norm": 1.4931862354278564, - "learning_rate": 8.965929648241207e-05, - "loss": 5.4204, - "step": 10792 - }, - { - "epoch": 5.628683181225554, - "grad_norm": 1.6052953004837036, - "learning_rate": 8.965829145728643e-05, - "loss": 4.7524, - "step": 10793 - }, - { - "epoch": 5.629204693611474, - "grad_norm": 1.421484112739563, - "learning_rate": 8.96572864321608e-05, - "loss": 5.9184, - "step": 10794 - }, - { - "epoch": 5.629726205997392, - "grad_norm": 1.387969970703125, - "learning_rate": 8.965628140703518e-05, - "loss": 5.8504, - "step": 10795 - }, - { - "epoch": 5.630247718383312, - "grad_norm": 1.38021981716156, - "learning_rate": 8.965527638190955e-05, - "loss": 5.9066, - "step": 10796 - }, - { - "epoch": 5.63076923076923, - "grad_norm": 1.6067055463790894, - "learning_rate": 8.965427135678393e-05, - "loss": 5.2257, - "step": 10797 - }, - { - "epoch": 5.63129074315515, - "grad_norm": 1.5525658130645752, - "learning_rate": 8.96532663316583e-05, - "loss": 5.0937, - "step": 10798 - }, - { - "epoch": 5.631812255541069, - "grad_norm": 1.4380319118499756, - "learning_rate": 8.965226130653267e-05, - "loss": 5.514, - "step": 10799 - }, - { - "epoch": 5.632333767926989, - "grad_norm": 1.5950044393539429, - "learning_rate": 8.965125628140704e-05, - "loss": 5.2153, - "step": 10800 - }, - { - "epoch": 5.632855280312907, - "grad_norm": 1.7496587038040161, - "learning_rate": 8.965025125628142e-05, - "loss": 5.4496, - "step": 10801 - }, - { - "epoch": 5.633376792698827, - "grad_norm": 1.4401425123214722, - "learning_rate": 8.964924623115578e-05, - "loss": 5.7522, - "step": 10802 - }, - { - "epoch": 5.633898305084745, - "grad_norm": 1.6351449489593506, - "learning_rate": 8.964824120603016e-05, - "loss": 5.2958, - "step": 10803 - }, - { - "epoch": 5.634419817470665, - "grad_norm": 1.4320244789123535, - "learning_rate": 8.964723618090452e-05, - "loss": 5.8712, - "step": 10804 - }, - { - "epoch": 5.634941329856584, - "grad_norm": 1.5549267530441284, - "learning_rate": 8.96462311557789e-05, - "loss": 5.6277, - "step": 10805 - }, - { - "epoch": 5.635462842242504, - "grad_norm": 1.5231528282165527, - "learning_rate": 8.964522613065326e-05, - "loss": 5.6973, - "step": 10806 - }, - { - "epoch": 5.635984354628422, - "grad_norm": 1.475305438041687, - "learning_rate": 8.964422110552764e-05, - "loss": 5.8017, - "step": 10807 - }, - { - "epoch": 5.636505867014342, - "grad_norm": 1.4046663045883179, - "learning_rate": 8.964321608040202e-05, - "loss": 5.5645, - "step": 10808 - }, - { - "epoch": 5.63702737940026, - "grad_norm": 1.4886283874511719, - "learning_rate": 8.964221105527638e-05, - "loss": 4.701, - "step": 10809 - }, - { - "epoch": 5.63754889178618, - "grad_norm": 1.4730623960494995, - "learning_rate": 8.964120603015076e-05, - "loss": 5.4826, - "step": 10810 - }, - { - "epoch": 5.638070404172099, - "grad_norm": 1.5099440813064575, - "learning_rate": 8.964020100502513e-05, - "loss": 5.2469, - "step": 10811 - }, - { - "epoch": 5.638591916558019, - "grad_norm": 1.4545295238494873, - "learning_rate": 8.96391959798995e-05, - "loss": 5.9159, - "step": 10812 - }, - { - "epoch": 5.639113428943937, - "grad_norm": 1.5190812349319458, - "learning_rate": 8.963819095477387e-05, - "loss": 5.6385, - "step": 10813 - }, - { - "epoch": 5.639634941329857, - "grad_norm": 1.3853734731674194, - "learning_rate": 8.963718592964825e-05, - "loss": 5.4425, - "step": 10814 - }, - { - "epoch": 5.640156453715775, - "grad_norm": 1.6198279857635498, - "learning_rate": 8.963618090452261e-05, - "loss": 5.4611, - "step": 10815 - }, - { - "epoch": 5.640677966101695, - "grad_norm": 1.6030395030975342, - "learning_rate": 8.963517587939699e-05, - "loss": 5.0929, - "step": 10816 - }, - { - "epoch": 5.641199478487614, - "grad_norm": 1.7084485292434692, - "learning_rate": 8.963417085427135e-05, - "loss": 4.9479, - "step": 10817 - }, - { - "epoch": 5.641720990873534, - "grad_norm": 1.5259308815002441, - "learning_rate": 8.963316582914573e-05, - "loss": 5.9477, - "step": 10818 - }, - { - "epoch": 5.642242503259452, - "grad_norm": 1.4764087200164795, - "learning_rate": 8.963216080402011e-05, - "loss": 5.5445, - "step": 10819 - }, - { - "epoch": 5.642764015645372, - "grad_norm": 1.4806472063064575, - "learning_rate": 8.963115577889447e-05, - "loss": 5.6853, - "step": 10820 - }, - { - "epoch": 5.64328552803129, - "grad_norm": 1.3320348262786865, - "learning_rate": 8.963015075376885e-05, - "loss": 5.7958, - "step": 10821 - }, - { - "epoch": 5.64380704041721, - "grad_norm": 1.4778175354003906, - "learning_rate": 8.962914572864322e-05, - "loss": 5.4333, - "step": 10822 - }, - { - "epoch": 5.644328552803129, - "grad_norm": 1.459382176399231, - "learning_rate": 8.962814070351759e-05, - "loss": 5.5178, - "step": 10823 - }, - { - "epoch": 5.644850065189048, - "grad_norm": 1.4744855165481567, - "learning_rate": 8.962713567839196e-05, - "loss": 5.7683, - "step": 10824 - }, - { - "epoch": 5.645371577574967, - "grad_norm": 1.4650914669036865, - "learning_rate": 8.962613065326634e-05, - "loss": 5.8183, - "step": 10825 - }, - { - "epoch": 5.645893089960887, - "grad_norm": 1.6759175062179565, - "learning_rate": 8.96251256281407e-05, - "loss": 5.5627, - "step": 10826 - }, - { - "epoch": 5.646414602346805, - "grad_norm": 1.444706916809082, - "learning_rate": 8.962412060301508e-05, - "loss": 5.8642, - "step": 10827 - }, - { - "epoch": 5.646936114732725, - "grad_norm": 1.4047213792800903, - "learning_rate": 8.962311557788945e-05, - "loss": 5.4089, - "step": 10828 - }, - { - "epoch": 5.647457627118644, - "grad_norm": 1.5012317895889282, - "learning_rate": 8.962211055276383e-05, - "loss": 5.6848, - "step": 10829 - }, - { - "epoch": 5.647979139504563, - "grad_norm": 1.4350608587265015, - "learning_rate": 8.96211055276382e-05, - "loss": 5.6241, - "step": 10830 - }, - { - "epoch": 5.648500651890482, - "grad_norm": 1.3215248584747314, - "learning_rate": 8.962010050251257e-05, - "loss": 5.7633, - "step": 10831 - }, - { - "epoch": 5.649022164276402, - "grad_norm": 1.6051334142684937, - "learning_rate": 8.961909547738694e-05, - "loss": 5.2729, - "step": 10832 - }, - { - "epoch": 5.64954367666232, - "grad_norm": 1.459229826927185, - "learning_rate": 8.961809045226132e-05, - "loss": 5.4403, - "step": 10833 - }, - { - "epoch": 5.65006518904824, - "grad_norm": 1.3123656511306763, - "learning_rate": 8.961708542713568e-05, - "loss": 5.1454, - "step": 10834 - }, - { - "epoch": 5.650586701434159, - "grad_norm": 1.4791381359100342, - "learning_rate": 8.961608040201005e-05, - "loss": 5.4484, - "step": 10835 - }, - { - "epoch": 5.651108213820078, - "grad_norm": 1.4722166061401367, - "learning_rate": 8.961507537688442e-05, - "loss": 5.4197, - "step": 10836 - }, - { - "epoch": 5.651629726205997, - "grad_norm": 1.3411507606506348, - "learning_rate": 8.961407035175879e-05, - "loss": 5.8679, - "step": 10837 - }, - { - "epoch": 5.652151238591917, - "grad_norm": 1.3844192028045654, - "learning_rate": 8.961306532663317e-05, - "loss": 5.5692, - "step": 10838 - }, - { - "epoch": 5.652672750977835, - "grad_norm": 1.4378823041915894, - "learning_rate": 8.961206030150754e-05, - "loss": 5.5874, - "step": 10839 - }, - { - "epoch": 5.653194263363755, - "grad_norm": 1.4036285877227783, - "learning_rate": 8.961105527638192e-05, - "loss": 5.6101, - "step": 10840 - }, - { - "epoch": 5.653715775749674, - "grad_norm": 1.4930002689361572, - "learning_rate": 8.961005025125629e-05, - "loss": 5.6957, - "step": 10841 - }, - { - "epoch": 5.654237288135593, - "grad_norm": 1.4854040145874023, - "learning_rate": 8.960904522613066e-05, - "loss": 5.7187, - "step": 10842 - }, - { - "epoch": 5.654758800521512, - "grad_norm": 1.4667001962661743, - "learning_rate": 8.960804020100503e-05, - "loss": 5.8265, - "step": 10843 - }, - { - "epoch": 5.655280312907432, - "grad_norm": 1.4470330476760864, - "learning_rate": 8.96070351758794e-05, - "loss": 5.7775, - "step": 10844 - }, - { - "epoch": 5.65580182529335, - "grad_norm": 1.4831807613372803, - "learning_rate": 8.960603015075377e-05, - "loss": 5.8434, - "step": 10845 - }, - { - "epoch": 5.65632333767927, - "grad_norm": 1.838244915008545, - "learning_rate": 8.960502512562815e-05, - "loss": 5.8163, - "step": 10846 - }, - { - "epoch": 5.656844850065189, - "grad_norm": 1.3642475605010986, - "learning_rate": 8.960402010050251e-05, - "loss": 5.8054, - "step": 10847 - }, - { - "epoch": 5.657366362451108, - "grad_norm": 1.3965799808502197, - "learning_rate": 8.960301507537689e-05, - "loss": 5.5258, - "step": 10848 - }, - { - "epoch": 5.657887874837027, - "grad_norm": 1.4310121536254883, - "learning_rate": 8.960201005025127e-05, - "loss": 5.7902, - "step": 10849 - }, - { - "epoch": 5.658409387222947, - "grad_norm": 1.4178639650344849, - "learning_rate": 8.960100502512563e-05, - "loss": 5.6708, - "step": 10850 - }, - { - "epoch": 5.658930899608865, - "grad_norm": 1.403517484664917, - "learning_rate": 8.960000000000001e-05, - "loss": 6.0024, - "step": 10851 - }, - { - "epoch": 5.659452411994785, - "grad_norm": 1.3901526927947998, - "learning_rate": 8.959899497487437e-05, - "loss": 5.7589, - "step": 10852 - }, - { - "epoch": 5.659973924380704, - "grad_norm": 1.5637240409851074, - "learning_rate": 8.959798994974875e-05, - "loss": 5.8144, - "step": 10853 - }, - { - "epoch": 5.660495436766623, - "grad_norm": 1.81907057762146, - "learning_rate": 8.959698492462312e-05, - "loss": 4.6165, - "step": 10854 - }, - { - "epoch": 5.661016949152542, - "grad_norm": 1.438905119895935, - "learning_rate": 8.95959798994975e-05, - "loss": 5.8098, - "step": 10855 - }, - { - "epoch": 5.661538461538462, - "grad_norm": 1.4679975509643555, - "learning_rate": 8.959497487437186e-05, - "loss": 5.6438, - "step": 10856 - }, - { - "epoch": 5.66205997392438, - "grad_norm": 1.4496254920959473, - "learning_rate": 8.959396984924624e-05, - "loss": 5.8134, - "step": 10857 - }, - { - "epoch": 5.6625814863103, - "grad_norm": 1.461247205734253, - "learning_rate": 8.95929648241206e-05, - "loss": 5.7992, - "step": 10858 - }, - { - "epoch": 5.663102998696219, - "grad_norm": 1.4455519914627075, - "learning_rate": 8.959195979899498e-05, - "loss": 5.777, - "step": 10859 - }, - { - "epoch": 5.663624511082138, - "grad_norm": 1.4913793802261353, - "learning_rate": 8.959095477386936e-05, - "loss": 5.6603, - "step": 10860 - }, - { - "epoch": 5.664146023468057, - "grad_norm": 1.4361423254013062, - "learning_rate": 8.958994974874372e-05, - "loss": 5.5089, - "step": 10861 - }, - { - "epoch": 5.664667535853977, - "grad_norm": 1.4435937404632568, - "learning_rate": 8.95889447236181e-05, - "loss": 5.3958, - "step": 10862 - }, - { - "epoch": 5.665189048239895, - "grad_norm": 1.5203258991241455, - "learning_rate": 8.958793969849246e-05, - "loss": 4.9937, - "step": 10863 - }, - { - "epoch": 5.665710560625815, - "grad_norm": 1.4278147220611572, - "learning_rate": 8.958693467336684e-05, - "loss": 5.3512, - "step": 10864 - }, - { - "epoch": 5.666232073011734, - "grad_norm": 1.4207921028137207, - "learning_rate": 8.95859296482412e-05, - "loss": 5.3578, - "step": 10865 - }, - { - "epoch": 5.666753585397653, - "grad_norm": 1.4400840997695923, - "learning_rate": 8.958492462311558e-05, - "loss": 5.4376, - "step": 10866 - }, - { - "epoch": 5.667275097783572, - "grad_norm": 1.3464784622192383, - "learning_rate": 8.958391959798995e-05, - "loss": 5.9813, - "step": 10867 - }, - { - "epoch": 5.667796610169492, - "grad_norm": 1.3826415538787842, - "learning_rate": 8.958291457286432e-05, - "loss": 5.6781, - "step": 10868 - }, - { - "epoch": 5.6683181225554105, - "grad_norm": 1.3927260637283325, - "learning_rate": 8.95819095477387e-05, - "loss": 5.8858, - "step": 10869 - }, - { - "epoch": 5.66883963494133, - "grad_norm": 1.358038306236267, - "learning_rate": 8.958090452261308e-05, - "loss": 5.9488, - "step": 10870 - }, - { - "epoch": 5.669361147327249, - "grad_norm": 1.544036865234375, - "learning_rate": 8.957989949748744e-05, - "loss": 5.5982, - "step": 10871 - }, - { - "epoch": 5.669882659713168, - "grad_norm": 1.4062224626541138, - "learning_rate": 8.957889447236182e-05, - "loss": 5.4303, - "step": 10872 - }, - { - "epoch": 5.670404172099087, - "grad_norm": 1.447485327720642, - "learning_rate": 8.957788944723619e-05, - "loss": 5.4872, - "step": 10873 - }, - { - "epoch": 5.670925684485007, - "grad_norm": 1.4075819253921509, - "learning_rate": 8.957688442211055e-05, - "loss": 5.7405, - "step": 10874 - }, - { - "epoch": 5.6714471968709255, - "grad_norm": 1.404503345489502, - "learning_rate": 8.957587939698493e-05, - "loss": 5.9383, - "step": 10875 - }, - { - "epoch": 5.671968709256845, - "grad_norm": 1.4012337923049927, - "learning_rate": 8.957487437185929e-05, - "loss": 5.4766, - "step": 10876 - }, - { - "epoch": 5.672490221642764, - "grad_norm": 1.7492698431015015, - "learning_rate": 8.957386934673367e-05, - "loss": 5.4612, - "step": 10877 - }, - { - "epoch": 5.673011734028683, - "grad_norm": 1.3542150259017944, - "learning_rate": 8.957286432160803e-05, - "loss": 5.8676, - "step": 10878 - }, - { - "epoch": 5.673533246414602, - "grad_norm": 1.6544008255004883, - "learning_rate": 8.957185929648241e-05, - "loss": 5.1669, - "step": 10879 - }, - { - "epoch": 5.674054758800521, - "grad_norm": 1.485646367073059, - "learning_rate": 8.957085427135679e-05, - "loss": 5.9936, - "step": 10880 - }, - { - "epoch": 5.6745762711864405, - "grad_norm": 1.6065763235092163, - "learning_rate": 8.956984924623117e-05, - "loss": 5.2191, - "step": 10881 - }, - { - "epoch": 5.67509778357236, - "grad_norm": 1.42881441116333, - "learning_rate": 8.956884422110553e-05, - "loss": 5.903, - "step": 10882 - }, - { - "epoch": 5.675619295958279, - "grad_norm": 1.604276418685913, - "learning_rate": 8.956783919597991e-05, - "loss": 5.742, - "step": 10883 - }, - { - "epoch": 5.676140808344198, - "grad_norm": 1.4211664199829102, - "learning_rate": 8.956683417085427e-05, - "loss": 5.7315, - "step": 10884 - }, - { - "epoch": 5.6766623207301175, - "grad_norm": 1.5545965433120728, - "learning_rate": 8.956582914572865e-05, - "loss": 5.7242, - "step": 10885 - }, - { - "epoch": 5.677183833116036, - "grad_norm": 1.5674493312835693, - "learning_rate": 8.956482412060302e-05, - "loss": 5.8122, - "step": 10886 - }, - { - "epoch": 5.6777053455019555, - "grad_norm": 1.4700905084609985, - "learning_rate": 8.956381909547738e-05, - "loss": 5.8799, - "step": 10887 - }, - { - "epoch": 5.678226857887875, - "grad_norm": 1.5269750356674194, - "learning_rate": 8.956281407035176e-05, - "loss": 5.5959, - "step": 10888 - }, - { - "epoch": 5.678748370273794, - "grad_norm": 1.5720492601394653, - "learning_rate": 8.956180904522614e-05, - "loss": 5.4541, - "step": 10889 - }, - { - "epoch": 5.679269882659713, - "grad_norm": 1.4046553373336792, - "learning_rate": 8.956080402010051e-05, - "loss": 5.6894, - "step": 10890 - }, - { - "epoch": 5.6797913950456325, - "grad_norm": 1.5193123817443848, - "learning_rate": 8.955979899497488e-05, - "loss": 5.3373, - "step": 10891 - }, - { - "epoch": 5.680312907431551, - "grad_norm": 1.3828458786010742, - "learning_rate": 8.955879396984926e-05, - "loss": 5.8327, - "step": 10892 - }, - { - "epoch": 5.6808344198174705, - "grad_norm": 1.5247118473052979, - "learning_rate": 8.955778894472362e-05, - "loss": 5.4157, - "step": 10893 - }, - { - "epoch": 5.68135593220339, - "grad_norm": 1.5297023057937622, - "learning_rate": 8.9556783919598e-05, - "loss": 5.6032, - "step": 10894 - }, - { - "epoch": 5.681877444589309, - "grad_norm": 1.4664528369903564, - "learning_rate": 8.955577889447236e-05, - "loss": 5.4147, - "step": 10895 - }, - { - "epoch": 5.682398956975228, - "grad_norm": 1.573445439338684, - "learning_rate": 8.955477386934674e-05, - "loss": 5.0414, - "step": 10896 - }, - { - "epoch": 5.6829204693611475, - "grad_norm": 1.6101837158203125, - "learning_rate": 8.95537688442211e-05, - "loss": 5.188, - "step": 10897 - }, - { - "epoch": 5.683441981747066, - "grad_norm": 1.4774879217147827, - "learning_rate": 8.955276381909548e-05, - "loss": 5.5439, - "step": 10898 - }, - { - "epoch": 5.6839634941329855, - "grad_norm": 1.341994285583496, - "learning_rate": 8.955175879396985e-05, - "loss": 6.0121, - "step": 10899 - }, - { - "epoch": 5.684485006518905, - "grad_norm": 1.5684735774993896, - "learning_rate": 8.955075376884422e-05, - "loss": 5.1122, - "step": 10900 - }, - { - "epoch": 5.6850065189048244, - "grad_norm": 1.5068870782852173, - "learning_rate": 8.95497487437186e-05, - "loss": 5.4564, - "step": 10901 - }, - { - "epoch": 5.685528031290743, - "grad_norm": 1.398180365562439, - "learning_rate": 8.954874371859297e-05, - "loss": 5.2319, - "step": 10902 - }, - { - "epoch": 5.6860495436766625, - "grad_norm": 1.425860047340393, - "learning_rate": 8.954773869346734e-05, - "loss": 5.4307, - "step": 10903 - }, - { - "epoch": 5.686571056062581, - "grad_norm": 1.578149676322937, - "learning_rate": 8.954673366834171e-05, - "loss": 5.4038, - "step": 10904 - }, - { - "epoch": 5.6870925684485005, - "grad_norm": 1.5110795497894287, - "learning_rate": 8.954572864321609e-05, - "loss": 5.1053, - "step": 10905 - }, - { - "epoch": 5.68761408083442, - "grad_norm": 1.4004648923873901, - "learning_rate": 8.954472361809045e-05, - "loss": 5.8621, - "step": 10906 - }, - { - "epoch": 5.6881355932203395, - "grad_norm": 1.4452509880065918, - "learning_rate": 8.954371859296483e-05, - "loss": 5.6713, - "step": 10907 - }, - { - "epoch": 5.688657105606258, - "grad_norm": 1.3804337978363037, - "learning_rate": 8.95427135678392e-05, - "loss": 5.801, - "step": 10908 - }, - { - "epoch": 5.6891786179921775, - "grad_norm": 1.4068528413772583, - "learning_rate": 8.954170854271357e-05, - "loss": 5.6451, - "step": 10909 - }, - { - "epoch": 5.689700130378096, - "grad_norm": 1.4152898788452148, - "learning_rate": 8.954070351758795e-05, - "loss": 5.5171, - "step": 10910 - }, - { - "epoch": 5.6902216427640155, - "grad_norm": 1.4434278011322021, - "learning_rate": 8.953969849246233e-05, - "loss": 5.7044, - "step": 10911 - }, - { - "epoch": 5.690743155149935, - "grad_norm": 1.3902643918991089, - "learning_rate": 8.953869346733669e-05, - "loss": 5.7079, - "step": 10912 - }, - { - "epoch": 5.6912646675358545, - "grad_norm": 1.4508655071258545, - "learning_rate": 8.953768844221107e-05, - "loss": 5.7864, - "step": 10913 - }, - { - "epoch": 5.691786179921773, - "grad_norm": 1.452014446258545, - "learning_rate": 8.953668341708543e-05, - "loss": 5.5031, - "step": 10914 - }, - { - "epoch": 5.6923076923076925, - "grad_norm": 1.376511812210083, - "learning_rate": 8.95356783919598e-05, - "loss": 5.7767, - "step": 10915 - }, - { - "epoch": 5.692829204693611, - "grad_norm": 1.4678125381469727, - "learning_rate": 8.953467336683418e-05, - "loss": 5.5009, - "step": 10916 - }, - { - "epoch": 5.6933507170795306, - "grad_norm": 1.4048317670822144, - "learning_rate": 8.953366834170854e-05, - "loss": 5.8665, - "step": 10917 - }, - { - "epoch": 5.69387222946545, - "grad_norm": 1.605446219444275, - "learning_rate": 8.953266331658292e-05, - "loss": 5.5289, - "step": 10918 - }, - { - "epoch": 5.694393741851369, - "grad_norm": 1.4213716983795166, - "learning_rate": 8.953165829145728e-05, - "loss": 5.7146, - "step": 10919 - }, - { - "epoch": 5.694915254237288, - "grad_norm": 1.523597240447998, - "learning_rate": 8.953065326633166e-05, - "loss": 5.691, - "step": 10920 - }, - { - "epoch": 5.6954367666232075, - "grad_norm": 1.4171901941299438, - "learning_rate": 8.952964824120604e-05, - "loss": 5.7422, - "step": 10921 - }, - { - "epoch": 5.695958279009126, - "grad_norm": 1.5542359352111816, - "learning_rate": 8.952864321608042e-05, - "loss": 5.2608, - "step": 10922 - }, - { - "epoch": 5.696479791395046, - "grad_norm": 1.3167171478271484, - "learning_rate": 8.952763819095478e-05, - "loss": 5.9786, - "step": 10923 - }, - { - "epoch": 5.697001303780965, - "grad_norm": 1.7536647319793701, - "learning_rate": 8.952663316582916e-05, - "loss": 5.6015, - "step": 10924 - }, - { - "epoch": 5.697522816166884, - "grad_norm": 1.4066106081008911, - "learning_rate": 8.952562814070352e-05, - "loss": 5.1558, - "step": 10925 - }, - { - "epoch": 5.698044328552803, - "grad_norm": 1.5758917331695557, - "learning_rate": 8.95246231155779e-05, - "loss": 5.6382, - "step": 10926 - }, - { - "epoch": 5.6985658409387225, - "grad_norm": 1.3920670747756958, - "learning_rate": 8.952361809045226e-05, - "loss": 5.924, - "step": 10927 - }, - { - "epoch": 5.699087353324641, - "grad_norm": 1.3939228057861328, - "learning_rate": 8.952261306532663e-05, - "loss": 5.4202, - "step": 10928 - }, - { - "epoch": 5.699608865710561, - "grad_norm": 1.463529348373413, - "learning_rate": 8.9521608040201e-05, - "loss": 5.4931, - "step": 10929 - }, - { - "epoch": 5.70013037809648, - "grad_norm": 1.5447313785552979, - "learning_rate": 8.952060301507538e-05, - "loss": 5.8251, - "step": 10930 - }, - { - "epoch": 5.700651890482399, - "grad_norm": 1.4554716348648071, - "learning_rate": 8.951959798994976e-05, - "loss": 5.4079, - "step": 10931 - }, - { - "epoch": 5.701173402868318, - "grad_norm": 1.4944186210632324, - "learning_rate": 8.951859296482413e-05, - "loss": 5.5237, - "step": 10932 - }, - { - "epoch": 5.7016949152542376, - "grad_norm": 1.6450525522232056, - "learning_rate": 8.95175879396985e-05, - "loss": 5.4721, - "step": 10933 - }, - { - "epoch": 5.702216427640156, - "grad_norm": 1.318076729774475, - "learning_rate": 8.951658291457287e-05, - "loss": 5.9095, - "step": 10934 - }, - { - "epoch": 5.702737940026076, - "grad_norm": 1.4043376445770264, - "learning_rate": 8.951557788944725e-05, - "loss": 5.7314, - "step": 10935 - }, - { - "epoch": 5.703259452411995, - "grad_norm": 1.3855525255203247, - "learning_rate": 8.951457286432161e-05, - "loss": 5.9091, - "step": 10936 - }, - { - "epoch": 5.703780964797914, - "grad_norm": 1.3910993337631226, - "learning_rate": 8.951356783919599e-05, - "loss": 5.6053, - "step": 10937 - }, - { - "epoch": 5.704302477183833, - "grad_norm": 1.4748504161834717, - "learning_rate": 8.951256281407035e-05, - "loss": 5.7609, - "step": 10938 - }, - { - "epoch": 5.704823989569753, - "grad_norm": 1.404198169708252, - "learning_rate": 8.951155778894473e-05, - "loss": 5.9651, - "step": 10939 - }, - { - "epoch": 5.705345501955671, - "grad_norm": 1.5238829851150513, - "learning_rate": 8.95105527638191e-05, - "loss": 5.2186, - "step": 10940 - }, - { - "epoch": 5.705867014341591, - "grad_norm": 1.435915470123291, - "learning_rate": 8.950954773869347e-05, - "loss": 5.5549, - "step": 10941 - }, - { - "epoch": 5.70638852672751, - "grad_norm": 1.5584204196929932, - "learning_rate": 8.950854271356785e-05, - "loss": 5.1931, - "step": 10942 - }, - { - "epoch": 5.706910039113429, - "grad_norm": 1.3807698488235474, - "learning_rate": 8.950753768844221e-05, - "loss": 5.2595, - "step": 10943 - }, - { - "epoch": 5.707431551499348, - "grad_norm": 1.4175143241882324, - "learning_rate": 8.950653266331659e-05, - "loss": 5.7175, - "step": 10944 - }, - { - "epoch": 5.707953063885268, - "grad_norm": 1.3361456394195557, - "learning_rate": 8.950552763819096e-05, - "loss": 5.734, - "step": 10945 - }, - { - "epoch": 5.708474576271186, - "grad_norm": 1.260603666305542, - "learning_rate": 8.950452261306533e-05, - "loss": 5.6654, - "step": 10946 - }, - { - "epoch": 5.708996088657106, - "grad_norm": 2.0194332599639893, - "learning_rate": 8.95035175879397e-05, - "loss": 5.0596, - "step": 10947 - }, - { - "epoch": 5.709517601043025, - "grad_norm": 1.5251694917678833, - "learning_rate": 8.950251256281408e-05, - "loss": 5.2217, - "step": 10948 - }, - { - "epoch": 5.710039113428944, - "grad_norm": 1.3567287921905518, - "learning_rate": 8.950150753768844e-05, - "loss": 5.9272, - "step": 10949 - }, - { - "epoch": 5.710560625814863, - "grad_norm": 1.4431544542312622, - "learning_rate": 8.950050251256282e-05, - "loss": 5.4764, - "step": 10950 - }, - { - "epoch": 5.711082138200783, - "grad_norm": 1.408400058746338, - "learning_rate": 8.94994974874372e-05, - "loss": 6.2005, - "step": 10951 - }, - { - "epoch": 5.711603650586701, - "grad_norm": 1.502579927444458, - "learning_rate": 8.949849246231157e-05, - "loss": 5.8148, - "step": 10952 - }, - { - "epoch": 5.712125162972621, - "grad_norm": 1.5935227870941162, - "learning_rate": 8.949748743718594e-05, - "loss": 5.3022, - "step": 10953 - }, - { - "epoch": 5.71264667535854, - "grad_norm": 1.394395112991333, - "learning_rate": 8.94964824120603e-05, - "loss": 5.3106, - "step": 10954 - }, - { - "epoch": 5.713168187744459, - "grad_norm": 1.4722621440887451, - "learning_rate": 8.949547738693468e-05, - "loss": 5.7117, - "step": 10955 - }, - { - "epoch": 5.713689700130378, - "grad_norm": 1.5680347681045532, - "learning_rate": 8.949447236180904e-05, - "loss": 5.2212, - "step": 10956 - }, - { - "epoch": 5.714211212516298, - "grad_norm": 1.603228211402893, - "learning_rate": 8.949346733668342e-05, - "loss": 5.3674, - "step": 10957 - }, - { - "epoch": 5.714732724902216, - "grad_norm": 1.4112343788146973, - "learning_rate": 8.949246231155779e-05, - "loss": 5.8224, - "step": 10958 - }, - { - "epoch": 5.715254237288136, - "grad_norm": 1.4585505723953247, - "learning_rate": 8.949145728643216e-05, - "loss": 5.7207, - "step": 10959 - }, - { - "epoch": 5.715775749674055, - "grad_norm": 1.5137776136398315, - "learning_rate": 8.949045226130653e-05, - "loss": 5.4966, - "step": 10960 - }, - { - "epoch": 5.716297262059974, - "grad_norm": 1.5856624841690063, - "learning_rate": 8.94894472361809e-05, - "loss": 5.5685, - "step": 10961 - }, - { - "epoch": 5.716818774445893, - "grad_norm": 1.4281328916549683, - "learning_rate": 8.948844221105528e-05, - "loss": 5.3968, - "step": 10962 - }, - { - "epoch": 5.717340286831813, - "grad_norm": 1.4963778257369995, - "learning_rate": 8.948743718592966e-05, - "loss": 5.6856, - "step": 10963 - }, - { - "epoch": 5.717861799217731, - "grad_norm": 1.5307120084762573, - "learning_rate": 8.948643216080403e-05, - "loss": 5.8347, - "step": 10964 - }, - { - "epoch": 5.718383311603651, - "grad_norm": 1.4833736419677734, - "learning_rate": 8.94854271356784e-05, - "loss": 5.399, - "step": 10965 - }, - { - "epoch": 5.71890482398957, - "grad_norm": 1.4166479110717773, - "learning_rate": 8.948442211055277e-05, - "loss": 5.6579, - "step": 10966 - }, - { - "epoch": 5.719426336375489, - "grad_norm": 1.4053109884262085, - "learning_rate": 8.948341708542713e-05, - "loss": 5.7199, - "step": 10967 - }, - { - "epoch": 5.719947848761408, - "grad_norm": 1.5429428815841675, - "learning_rate": 8.948241206030151e-05, - "loss": 5.4244, - "step": 10968 - }, - { - "epoch": 5.720469361147328, - "grad_norm": 1.529392957687378, - "learning_rate": 8.948140703517587e-05, - "loss": 5.0858, - "step": 10969 - }, - { - "epoch": 5.720990873533246, - "grad_norm": 1.5008496046066284, - "learning_rate": 8.948040201005025e-05, - "loss": 5.657, - "step": 10970 - }, - { - "epoch": 5.721512385919166, - "grad_norm": 1.4729105234146118, - "learning_rate": 8.947939698492463e-05, - "loss": 5.9612, - "step": 10971 - }, - { - "epoch": 5.722033898305085, - "grad_norm": 1.409668207168579, - "learning_rate": 8.947839195979901e-05, - "loss": 5.8277, - "step": 10972 - }, - { - "epoch": 5.722555410691004, - "grad_norm": 1.4998316764831543, - "learning_rate": 8.947738693467337e-05, - "loss": 5.8199, - "step": 10973 - }, - { - "epoch": 5.723076923076923, - "grad_norm": 1.370540976524353, - "learning_rate": 8.947638190954775e-05, - "loss": 5.5689, - "step": 10974 - }, - { - "epoch": 5.723598435462842, - "grad_norm": 1.4309996366500854, - "learning_rate": 8.947537688442211e-05, - "loss": 5.6935, - "step": 10975 - }, - { - "epoch": 5.724119947848761, - "grad_norm": 1.3925691843032837, - "learning_rate": 8.947437185929649e-05, - "loss": 5.8133, - "step": 10976 - }, - { - "epoch": 5.724641460234681, - "grad_norm": 1.4683897495269775, - "learning_rate": 8.947336683417086e-05, - "loss": 5.8919, - "step": 10977 - }, - { - "epoch": 5.7251629726206, - "grad_norm": 1.4365804195404053, - "learning_rate": 8.947236180904523e-05, - "loss": 5.5713, - "step": 10978 - }, - { - "epoch": 5.725684485006519, - "grad_norm": 1.537802815437317, - "learning_rate": 8.94713567839196e-05, - "loss": 5.5111, - "step": 10979 - }, - { - "epoch": 5.726205997392438, - "grad_norm": 1.5508480072021484, - "learning_rate": 8.947035175879396e-05, - "loss": 5.7635, - "step": 10980 - }, - { - "epoch": 5.726727509778357, - "grad_norm": 1.4963473081588745, - "learning_rate": 8.946934673366834e-05, - "loss": 5.3067, - "step": 10981 - }, - { - "epoch": 5.727249022164276, - "grad_norm": 1.3477096557617188, - "learning_rate": 8.946834170854272e-05, - "loss": 5.6427, - "step": 10982 - }, - { - "epoch": 5.727770534550196, - "grad_norm": 1.5321629047393799, - "learning_rate": 8.94673366834171e-05, - "loss": 5.6251, - "step": 10983 - }, - { - "epoch": 5.728292046936115, - "grad_norm": 1.311721920967102, - "learning_rate": 8.946633165829146e-05, - "loss": 5.7563, - "step": 10984 - }, - { - "epoch": 5.728813559322034, - "grad_norm": 1.5104773044586182, - "learning_rate": 8.946532663316584e-05, - "loss": 5.5172, - "step": 10985 - }, - { - "epoch": 5.729335071707953, - "grad_norm": 1.5570684671401978, - "learning_rate": 8.94643216080402e-05, - "loss": 5.4071, - "step": 10986 - }, - { - "epoch": 5.729856584093872, - "grad_norm": 1.3081616163253784, - "learning_rate": 8.946331658291458e-05, - "loss": 5.8312, - "step": 10987 - }, - { - "epoch": 5.730378096479791, - "grad_norm": 1.4622939825057983, - "learning_rate": 8.946231155778895e-05, - "loss": 5.4964, - "step": 10988 - }, - { - "epoch": 5.730899608865711, - "grad_norm": 1.5187374353408813, - "learning_rate": 8.946130653266332e-05, - "loss": 5.6395, - "step": 10989 - }, - { - "epoch": 5.73142112125163, - "grad_norm": 1.5808544158935547, - "learning_rate": 8.946030150753769e-05, - "loss": 5.5626, - "step": 10990 - }, - { - "epoch": 5.731942633637549, - "grad_norm": 1.4094609022140503, - "learning_rate": 8.945929648241207e-05, - "loss": 5.7477, - "step": 10991 - }, - { - "epoch": 5.732464146023468, - "grad_norm": 1.3323421478271484, - "learning_rate": 8.945829145728643e-05, - "loss": 5.7479, - "step": 10992 - }, - { - "epoch": 5.732985658409387, - "grad_norm": 1.4520392417907715, - "learning_rate": 8.945728643216081e-05, - "loss": 5.5093, - "step": 10993 - }, - { - "epoch": 5.733507170795306, - "grad_norm": 1.4037079811096191, - "learning_rate": 8.945628140703519e-05, - "loss": 5.6726, - "step": 10994 - }, - { - "epoch": 5.734028683181226, - "grad_norm": 1.3451672792434692, - "learning_rate": 8.945527638190955e-05, - "loss": 5.8028, - "step": 10995 - }, - { - "epoch": 5.734550195567145, - "grad_norm": 1.447629690170288, - "learning_rate": 8.945427135678393e-05, - "loss": 5.7794, - "step": 10996 - }, - { - "epoch": 5.735071707953064, - "grad_norm": 1.350425362586975, - "learning_rate": 8.945326633165829e-05, - "loss": 5.8462, - "step": 10997 - }, - { - "epoch": 5.735593220338983, - "grad_norm": 1.5589371919631958, - "learning_rate": 8.945226130653267e-05, - "loss": 5.1152, - "step": 10998 - }, - { - "epoch": 5.736114732724902, - "grad_norm": 1.5589927434921265, - "learning_rate": 8.945125628140703e-05, - "loss": 5.5062, - "step": 10999 - }, - { - "epoch": 5.736636245110821, - "grad_norm": 1.450381875038147, - "learning_rate": 8.945025125628141e-05, - "loss": 5.7672, - "step": 11000 - }, - { - "epoch": 5.737157757496741, - "grad_norm": 1.3628703355789185, - "learning_rate": 8.944924623115578e-05, - "loss": 5.6845, - "step": 11001 - }, - { - "epoch": 5.73767926988266, - "grad_norm": 1.3711543083190918, - "learning_rate": 8.944824120603015e-05, - "loss": 5.6646, - "step": 11002 - }, - { - "epoch": 5.738200782268579, - "grad_norm": 1.4496585130691528, - "learning_rate": 8.944723618090453e-05, - "loss": 5.2687, - "step": 11003 - }, - { - "epoch": 5.738722294654498, - "grad_norm": 1.3922224044799805, - "learning_rate": 8.944623115577891e-05, - "loss": 5.9829, - "step": 11004 - }, - { - "epoch": 5.739243807040417, - "grad_norm": 1.3956485986709595, - "learning_rate": 8.944522613065327e-05, - "loss": 6.0203, - "step": 11005 - }, - { - "epoch": 5.739765319426336, - "grad_norm": 1.4096879959106445, - "learning_rate": 8.944422110552765e-05, - "loss": 6.0832, - "step": 11006 - }, - { - "epoch": 5.740286831812256, - "grad_norm": 1.3465909957885742, - "learning_rate": 8.944321608040202e-05, - "loss": 5.6256, - "step": 11007 - }, - { - "epoch": 5.740808344198175, - "grad_norm": 1.356597900390625, - "learning_rate": 8.944221105527638e-05, - "loss": 5.4909, - "step": 11008 - }, - { - "epoch": 5.741329856584094, - "grad_norm": 1.4740556478500366, - "learning_rate": 8.944120603015076e-05, - "loss": 5.6433, - "step": 11009 - }, - { - "epoch": 5.741851368970013, - "grad_norm": 1.419491171836853, - "learning_rate": 8.944020100502512e-05, - "loss": 5.7459, - "step": 11010 - }, - { - "epoch": 5.742372881355932, - "grad_norm": 1.5163482427597046, - "learning_rate": 8.94391959798995e-05, - "loss": 5.7211, - "step": 11011 - }, - { - "epoch": 5.742894393741851, - "grad_norm": 1.496117353439331, - "learning_rate": 8.943819095477386e-05, - "loss": 5.5877, - "step": 11012 - }, - { - "epoch": 5.743415906127771, - "grad_norm": 1.4984523057937622, - "learning_rate": 8.943718592964824e-05, - "loss": 5.6667, - "step": 11013 - }, - { - "epoch": 5.743937418513689, - "grad_norm": 1.4717193841934204, - "learning_rate": 8.943618090452262e-05, - "loss": 5.4994, - "step": 11014 - }, - { - "epoch": 5.744458930899609, - "grad_norm": 1.6193602085113525, - "learning_rate": 8.9435175879397e-05, - "loss": 5.395, - "step": 11015 - }, - { - "epoch": 5.744980443285528, - "grad_norm": 1.4333332777023315, - "learning_rate": 8.943417085427136e-05, - "loss": 5.4213, - "step": 11016 - }, - { - "epoch": 5.745501955671447, - "grad_norm": 1.5097194910049438, - "learning_rate": 8.943316582914574e-05, - "loss": 5.9078, - "step": 11017 - }, - { - "epoch": 5.746023468057366, - "grad_norm": 1.432334542274475, - "learning_rate": 8.94321608040201e-05, - "loss": 5.1842, - "step": 11018 - }, - { - "epoch": 5.746544980443286, - "grad_norm": 1.4199100732803345, - "learning_rate": 8.943115577889448e-05, - "loss": 5.8599, - "step": 11019 - }, - { - "epoch": 5.747066492829204, - "grad_norm": 1.3694907426834106, - "learning_rate": 8.943015075376885e-05, - "loss": 5.927, - "step": 11020 - }, - { - "epoch": 5.747588005215124, - "grad_norm": 1.545912504196167, - "learning_rate": 8.942914572864321e-05, - "loss": 5.4135, - "step": 11021 - }, - { - "epoch": 5.748109517601043, - "grad_norm": 1.4554182291030884, - "learning_rate": 8.942814070351759e-05, - "loss": 5.826, - "step": 11022 - }, - { - "epoch": 5.748631029986962, - "grad_norm": 1.4468672275543213, - "learning_rate": 8.942713567839197e-05, - "loss": 5.5431, - "step": 11023 - }, - { - "epoch": 5.749152542372881, - "grad_norm": 1.3289719820022583, - "learning_rate": 8.942613065326634e-05, - "loss": 5.7963, - "step": 11024 - }, - { - "epoch": 5.749674054758801, - "grad_norm": 1.5375677347183228, - "learning_rate": 8.942512562814071e-05, - "loss": 5.4564, - "step": 11025 - }, - { - "epoch": 5.750195567144719, - "grad_norm": 1.456183910369873, - "learning_rate": 8.942412060301509e-05, - "loss": 5.8883, - "step": 11026 - }, - { - "epoch": 5.750717079530639, - "grad_norm": 1.474006175994873, - "learning_rate": 8.942311557788945e-05, - "loss": 5.1966, - "step": 11027 - }, - { - "epoch": 5.751238591916558, - "grad_norm": 1.4617528915405273, - "learning_rate": 8.942211055276383e-05, - "loss": 5.3073, - "step": 11028 - }, - { - "epoch": 5.751760104302477, - "grad_norm": 1.2781676054000854, - "learning_rate": 8.942110552763819e-05, - "loss": 5.6155, - "step": 11029 - }, - { - "epoch": 5.752281616688396, - "grad_norm": 1.4530922174453735, - "learning_rate": 8.942010050251257e-05, - "loss": 5.7863, - "step": 11030 - }, - { - "epoch": 5.752803129074316, - "grad_norm": 1.499869704246521, - "learning_rate": 8.941909547738693e-05, - "loss": 6.0168, - "step": 11031 - }, - { - "epoch": 5.753324641460234, - "grad_norm": 1.4238908290863037, - "learning_rate": 8.941809045226131e-05, - "loss": 5.6372, - "step": 11032 - }, - { - "epoch": 5.753846153846154, - "grad_norm": 1.2409641742706299, - "learning_rate": 8.941708542713568e-05, - "loss": 6.0574, - "step": 11033 - }, - { - "epoch": 5.754367666232073, - "grad_norm": 1.3865808248519897, - "learning_rate": 8.941608040201005e-05, - "loss": 5.89, - "step": 11034 - }, - { - "epoch": 5.754889178617992, - "grad_norm": 1.4461098909378052, - "learning_rate": 8.941507537688443e-05, - "loss": 5.7926, - "step": 11035 - }, - { - "epoch": 5.755410691003911, - "grad_norm": 1.344017744064331, - "learning_rate": 8.94140703517588e-05, - "loss": 5.6779, - "step": 11036 - }, - { - "epoch": 5.755932203389831, - "grad_norm": 1.4051610231399536, - "learning_rate": 8.941306532663317e-05, - "loss": 5.5074, - "step": 11037 - }, - { - "epoch": 5.756453715775749, - "grad_norm": 1.697458267211914, - "learning_rate": 8.941206030150754e-05, - "loss": 5.7422, - "step": 11038 - }, - { - "epoch": 5.756975228161669, - "grad_norm": 1.4263421297073364, - "learning_rate": 8.941105527638192e-05, - "loss": 5.6597, - "step": 11039 - }, - { - "epoch": 5.757496740547588, - "grad_norm": 1.430764079093933, - "learning_rate": 8.941005025125628e-05, - "loss": 5.9198, - "step": 11040 - }, - { - "epoch": 5.758018252933507, - "grad_norm": 1.4301328659057617, - "learning_rate": 8.940904522613066e-05, - "loss": 5.6439, - "step": 11041 - }, - { - "epoch": 5.758539765319426, - "grad_norm": 1.4273492097854614, - "learning_rate": 8.940804020100502e-05, - "loss": 5.2004, - "step": 11042 - }, - { - "epoch": 5.759061277705346, - "grad_norm": 1.3749626874923706, - "learning_rate": 8.94070351758794e-05, - "loss": 5.2141, - "step": 11043 - }, - { - "epoch": 5.759582790091264, - "grad_norm": 1.4186491966247559, - "learning_rate": 8.940603015075378e-05, - "loss": 5.7131, - "step": 11044 - }, - { - "epoch": 5.760104302477184, - "grad_norm": 1.457743763923645, - "learning_rate": 8.940502512562816e-05, - "loss": 5.35, - "step": 11045 - }, - { - "epoch": 5.760625814863103, - "grad_norm": 1.3756777048110962, - "learning_rate": 8.940402010050252e-05, - "loss": 5.6292, - "step": 11046 - }, - { - "epoch": 5.761147327249022, - "grad_norm": 1.5122383832931519, - "learning_rate": 8.940301507537688e-05, - "loss": 5.3864, - "step": 11047 - }, - { - "epoch": 5.761668839634941, - "grad_norm": 1.7569669485092163, - "learning_rate": 8.940201005025126e-05, - "loss": 5.3712, - "step": 11048 - }, - { - "epoch": 5.762190352020861, - "grad_norm": 1.3629153966903687, - "learning_rate": 8.940100502512563e-05, - "loss": 5.37, - "step": 11049 - }, - { - "epoch": 5.762711864406779, - "grad_norm": 1.326552152633667, - "learning_rate": 8.94e-05, - "loss": 5.9947, - "step": 11050 - }, - { - "epoch": 5.763233376792699, - "grad_norm": 1.3558276891708374, - "learning_rate": 8.939899497487437e-05, - "loss": 5.6667, - "step": 11051 - }, - { - "epoch": 5.763754889178618, - "grad_norm": 1.3653569221496582, - "learning_rate": 8.939798994974875e-05, - "loss": 5.8591, - "step": 11052 - }, - { - "epoch": 5.764276401564537, - "grad_norm": 1.493650197982788, - "learning_rate": 8.939698492462311e-05, - "loss": 5.5972, - "step": 11053 - }, - { - "epoch": 5.764797913950456, - "grad_norm": 1.481748342514038, - "learning_rate": 8.939597989949749e-05, - "loss": 5.1749, - "step": 11054 - }, - { - "epoch": 5.765319426336376, - "grad_norm": 1.3626928329467773, - "learning_rate": 8.939497487437187e-05, - "loss": 5.9639, - "step": 11055 - }, - { - "epoch": 5.765840938722294, - "grad_norm": 1.3350484371185303, - "learning_rate": 8.939396984924624e-05, - "loss": 5.6259, - "step": 11056 - }, - { - "epoch": 5.766362451108214, - "grad_norm": 1.4143821001052856, - "learning_rate": 8.939296482412061e-05, - "loss": 5.7037, - "step": 11057 - }, - { - "epoch": 5.766883963494133, - "grad_norm": 1.4553712606430054, - "learning_rate": 8.939195979899499e-05, - "loss": 6.1459, - "step": 11058 - }, - { - "epoch": 5.767405475880052, - "grad_norm": 1.3865184783935547, - "learning_rate": 8.939095477386935e-05, - "loss": 5.31, - "step": 11059 - }, - { - "epoch": 5.767926988265971, - "grad_norm": 1.4803879261016846, - "learning_rate": 8.938994974874372e-05, - "loss": 5.8505, - "step": 11060 - }, - { - "epoch": 5.768448500651891, - "grad_norm": 1.5032525062561035, - "learning_rate": 8.938894472361809e-05, - "loss": 5.665, - "step": 11061 - }, - { - "epoch": 5.768970013037809, - "grad_norm": 1.4906001091003418, - "learning_rate": 8.938793969849246e-05, - "loss": 5.6805, - "step": 11062 - }, - { - "epoch": 5.769491525423729, - "grad_norm": 1.5074368715286255, - "learning_rate": 8.938693467336684e-05, - "loss": 5.082, - "step": 11063 - }, - { - "epoch": 5.770013037809648, - "grad_norm": 1.3212162256240845, - "learning_rate": 8.938592964824121e-05, - "loss": 4.9176, - "step": 11064 - }, - { - "epoch": 5.770534550195567, - "grad_norm": 1.5805473327636719, - "learning_rate": 8.938492462311559e-05, - "loss": 5.4522, - "step": 11065 - }, - { - "epoch": 5.771056062581486, - "grad_norm": 1.3666032552719116, - "learning_rate": 8.938391959798996e-05, - "loss": 5.1997, - "step": 11066 - }, - { - "epoch": 5.771577574967406, - "grad_norm": 1.3874982595443726, - "learning_rate": 8.938291457286433e-05, - "loss": 5.6514, - "step": 11067 - }, - { - "epoch": 5.772099087353324, - "grad_norm": 1.5265312194824219, - "learning_rate": 8.93819095477387e-05, - "loss": 4.5437, - "step": 11068 - }, - { - "epoch": 5.772620599739244, - "grad_norm": 1.3165547847747803, - "learning_rate": 8.938090452261307e-05, - "loss": 5.8756, - "step": 11069 - }, - { - "epoch": 5.7731421121251625, - "grad_norm": 1.5182777643203735, - "learning_rate": 8.937989949748744e-05, - "loss": 5.2466, - "step": 11070 - }, - { - "epoch": 5.773663624511082, - "grad_norm": 1.555492639541626, - "learning_rate": 8.937889447236182e-05, - "loss": 5.0518, - "step": 11071 - }, - { - "epoch": 5.774185136897001, - "grad_norm": 1.4603099822998047, - "learning_rate": 8.937788944723618e-05, - "loss": 5.8957, - "step": 11072 - }, - { - "epoch": 5.774706649282921, - "grad_norm": 1.4643217325210571, - "learning_rate": 8.937688442211055e-05, - "loss": 5.5346, - "step": 11073 - }, - { - "epoch": 5.7752281616688395, - "grad_norm": 1.7149631977081299, - "learning_rate": 8.937587939698492e-05, - "loss": 5.227, - "step": 11074 - }, - { - "epoch": 5.775749674054759, - "grad_norm": 1.679935097694397, - "learning_rate": 8.93748743718593e-05, - "loss": 5.737, - "step": 11075 - }, - { - "epoch": 5.7762711864406775, - "grad_norm": 1.650301456451416, - "learning_rate": 8.937386934673368e-05, - "loss": 5.7239, - "step": 11076 - }, - { - "epoch": 5.776792698826597, - "grad_norm": 1.6086139678955078, - "learning_rate": 8.937286432160804e-05, - "loss": 5.4067, - "step": 11077 - }, - { - "epoch": 5.777314211212516, - "grad_norm": 1.38319730758667, - "learning_rate": 8.937185929648242e-05, - "loss": 5.8881, - "step": 11078 - }, - { - "epoch": 5.777835723598436, - "grad_norm": 1.432379961013794, - "learning_rate": 8.937085427135679e-05, - "loss": 5.9094, - "step": 11079 - }, - { - "epoch": 5.7783572359843545, - "grad_norm": 1.3226871490478516, - "learning_rate": 8.936984924623116e-05, - "loss": 5.8952, - "step": 11080 - }, - { - "epoch": 5.778878748370274, - "grad_norm": 1.5094563961029053, - "learning_rate": 8.936884422110553e-05, - "loss": 5.8823, - "step": 11081 - }, - { - "epoch": 5.7794002607561925, - "grad_norm": 1.4125616550445557, - "learning_rate": 8.93678391959799e-05, - "loss": 5.7093, - "step": 11082 - }, - { - "epoch": 5.779921773142112, - "grad_norm": 1.3918821811676025, - "learning_rate": 8.936683417085427e-05, - "loss": 5.8885, - "step": 11083 - }, - { - "epoch": 5.780443285528031, - "grad_norm": 1.442017912864685, - "learning_rate": 8.936582914572865e-05, - "loss": 5.5438, - "step": 11084 - }, - { - "epoch": 5.780964797913951, - "grad_norm": 1.5405405759811401, - "learning_rate": 8.936482412060303e-05, - "loss": 5.6053, - "step": 11085 - }, - { - "epoch": 5.7814863102998695, - "grad_norm": 1.3721171617507935, - "learning_rate": 8.936381909547739e-05, - "loss": 5.0246, - "step": 11086 - }, - { - "epoch": 5.782007822685789, - "grad_norm": 1.5957597494125366, - "learning_rate": 8.936281407035177e-05, - "loss": 5.8572, - "step": 11087 - }, - { - "epoch": 5.7825293350717075, - "grad_norm": 1.4496006965637207, - "learning_rate": 8.936180904522613e-05, - "loss": 5.6295, - "step": 11088 - }, - { - "epoch": 5.783050847457627, - "grad_norm": 1.6491602659225464, - "learning_rate": 8.936080402010051e-05, - "loss": 5.4069, - "step": 11089 - }, - { - "epoch": 5.7835723598435465, - "grad_norm": 1.4083802700042725, - "learning_rate": 8.935979899497487e-05, - "loss": 5.6554, - "step": 11090 - }, - { - "epoch": 5.784093872229466, - "grad_norm": 1.447011947631836, - "learning_rate": 8.935879396984925e-05, - "loss": 5.7874, - "step": 11091 - }, - { - "epoch": 5.7846153846153845, - "grad_norm": 1.5056390762329102, - "learning_rate": 8.935778894472362e-05, - "loss": 5.3976, - "step": 11092 - }, - { - "epoch": 5.785136897001304, - "grad_norm": 1.5651241540908813, - "learning_rate": 8.9356783919598e-05, - "loss": 4.9541, - "step": 11093 - }, - { - "epoch": 5.7856584093872225, - "grad_norm": 2.1426172256469727, - "learning_rate": 8.935577889447236e-05, - "loss": 5.3628, - "step": 11094 - }, - { - "epoch": 5.786179921773142, - "grad_norm": 1.5441268682479858, - "learning_rate": 8.935477386934674e-05, - "loss": 5.7623, - "step": 11095 - }, - { - "epoch": 5.7867014341590615, - "grad_norm": 1.530523419380188, - "learning_rate": 8.935376884422111e-05, - "loss": 6.1924, - "step": 11096 - }, - { - "epoch": 5.787222946544981, - "grad_norm": 1.4667168855667114, - "learning_rate": 8.935276381909549e-05, - "loss": 5.5349, - "step": 11097 - }, - { - "epoch": 5.7877444589308995, - "grad_norm": 1.4252454042434692, - "learning_rate": 8.935175879396986e-05, - "loss": 5.8541, - "step": 11098 - }, - { - "epoch": 5.788265971316819, - "grad_norm": 1.4585285186767578, - "learning_rate": 8.935075376884423e-05, - "loss": 5.3541, - "step": 11099 - }, - { - "epoch": 5.7887874837027375, - "grad_norm": 1.462559700012207, - "learning_rate": 8.93497487437186e-05, - "loss": 6.0104, - "step": 11100 - }, - { - "epoch": 5.789308996088657, - "grad_norm": 1.467221736907959, - "learning_rate": 8.934874371859296e-05, - "loss": 5.7553, - "step": 11101 - }, - { - "epoch": 5.7898305084745765, - "grad_norm": 1.3957103490829468, - "learning_rate": 8.934773869346734e-05, - "loss": 5.8107, - "step": 11102 - }, - { - "epoch": 5.790352020860495, - "grad_norm": 1.5709515810012817, - "learning_rate": 8.93467336683417e-05, - "loss": 5.9105, - "step": 11103 - }, - { - "epoch": 5.7908735332464145, - "grad_norm": 1.4631412029266357, - "learning_rate": 8.934572864321608e-05, - "loss": 5.7856, - "step": 11104 - }, - { - "epoch": 5.791395045632334, - "grad_norm": 1.3820090293884277, - "learning_rate": 8.934472361809046e-05, - "loss": 5.6592, - "step": 11105 - }, - { - "epoch": 5.791916558018253, - "grad_norm": 1.5266880989074707, - "learning_rate": 8.934371859296484e-05, - "loss": 5.8181, - "step": 11106 - }, - { - "epoch": 5.792438070404172, - "grad_norm": 1.4228094816207886, - "learning_rate": 8.93427135678392e-05, - "loss": 5.9663, - "step": 11107 - }, - { - "epoch": 5.7929595827900915, - "grad_norm": 1.4558594226837158, - "learning_rate": 8.934170854271358e-05, - "loss": 5.5827, - "step": 11108 - }, - { - "epoch": 5.79348109517601, - "grad_norm": 1.6535422801971436, - "learning_rate": 8.934070351758794e-05, - "loss": 5.1751, - "step": 11109 - }, - { - "epoch": 5.7940026075619295, - "grad_norm": 1.3984551429748535, - "learning_rate": 8.933969849246232e-05, - "loss": 6.0277, - "step": 11110 - }, - { - "epoch": 5.794524119947849, - "grad_norm": 1.4308351278305054, - "learning_rate": 8.933869346733669e-05, - "loss": 5.4417, - "step": 11111 - }, - { - "epoch": 5.795045632333768, - "grad_norm": 1.5795142650604248, - "learning_rate": 8.933768844221106e-05, - "loss": 5.1329, - "step": 11112 - }, - { - "epoch": 5.795567144719687, - "grad_norm": 1.3238779306411743, - "learning_rate": 8.933668341708543e-05, - "loss": 5.746, - "step": 11113 - }, - { - "epoch": 5.7960886571056065, - "grad_norm": 1.5570451021194458, - "learning_rate": 8.933567839195979e-05, - "loss": 5.6325, - "step": 11114 - }, - { - "epoch": 5.796610169491525, - "grad_norm": 1.3931790590286255, - "learning_rate": 8.933467336683417e-05, - "loss": 5.566, - "step": 11115 - }, - { - "epoch": 5.7971316818774445, - "grad_norm": 1.429088830947876, - "learning_rate": 8.933366834170855e-05, - "loss": 5.5243, - "step": 11116 - }, - { - "epoch": 5.797653194263364, - "grad_norm": 1.45830237865448, - "learning_rate": 8.933266331658293e-05, - "loss": 5.3583, - "step": 11117 - }, - { - "epoch": 5.798174706649283, - "grad_norm": 1.5274248123168945, - "learning_rate": 8.933165829145729e-05, - "loss": 5.3013, - "step": 11118 - }, - { - "epoch": 5.798696219035202, - "grad_norm": 1.4783509969711304, - "learning_rate": 8.933065326633167e-05, - "loss": 5.4999, - "step": 11119 - }, - { - "epoch": 5.7992177314211215, - "grad_norm": 1.3838443756103516, - "learning_rate": 8.932964824120603e-05, - "loss": 5.6652, - "step": 11120 - }, - { - "epoch": 5.79973924380704, - "grad_norm": 1.520408034324646, - "learning_rate": 8.932864321608041e-05, - "loss": 5.6449, - "step": 11121 - }, - { - "epoch": 5.8002607561929596, - "grad_norm": 1.590740442276001, - "learning_rate": 8.932763819095477e-05, - "loss": 5.2637, - "step": 11122 - }, - { - "epoch": 5.800782268578879, - "grad_norm": 1.6381679773330688, - "learning_rate": 8.932663316582915e-05, - "loss": 5.4653, - "step": 11123 - }, - { - "epoch": 5.801303780964798, - "grad_norm": 1.4744840860366821, - "learning_rate": 8.932562814070352e-05, - "loss": 5.5353, - "step": 11124 - }, - { - "epoch": 5.801825293350717, - "grad_norm": 1.555695652961731, - "learning_rate": 8.93246231155779e-05, - "loss": 5.2291, - "step": 11125 - }, - { - "epoch": 5.8023468057366365, - "grad_norm": 1.5720689296722412, - "learning_rate": 8.932361809045227e-05, - "loss": 5.4784, - "step": 11126 - }, - { - "epoch": 5.802868318122555, - "grad_norm": 1.4789228439331055, - "learning_rate": 8.932261306532664e-05, - "loss": 5.7007, - "step": 11127 - }, - { - "epoch": 5.803389830508475, - "grad_norm": 1.482513666152954, - "learning_rate": 8.932160804020101e-05, - "loss": 5.1978, - "step": 11128 - }, - { - "epoch": 5.803911342894394, - "grad_norm": 1.5388909578323364, - "learning_rate": 8.932060301507538e-05, - "loss": 5.8298, - "step": 11129 - }, - { - "epoch": 5.804432855280313, - "grad_norm": 1.469780445098877, - "learning_rate": 8.931959798994976e-05, - "loss": 5.3417, - "step": 11130 - }, - { - "epoch": 5.804954367666232, - "grad_norm": 1.4363586902618408, - "learning_rate": 8.931859296482412e-05, - "loss": 5.2953, - "step": 11131 - }, - { - "epoch": 5.8054758800521515, - "grad_norm": 1.5173383951187134, - "learning_rate": 8.93175879396985e-05, - "loss": 5.8719, - "step": 11132 - }, - { - "epoch": 5.80599739243807, - "grad_norm": 1.7427622079849243, - "learning_rate": 8.931658291457286e-05, - "loss": 5.9253, - "step": 11133 - }, - { - "epoch": 5.80651890482399, - "grad_norm": 1.2970123291015625, - "learning_rate": 8.931557788944724e-05, - "loss": 5.6237, - "step": 11134 - }, - { - "epoch": 5.807040417209909, - "grad_norm": 1.3498351573944092, - "learning_rate": 8.93145728643216e-05, - "loss": 5.8077, - "step": 11135 - }, - { - "epoch": 5.807561929595828, - "grad_norm": 1.4313232898712158, - "learning_rate": 8.931356783919598e-05, - "loss": 5.7617, - "step": 11136 - }, - { - "epoch": 5.808083441981747, - "grad_norm": 1.3648717403411865, - "learning_rate": 8.931256281407036e-05, - "loss": 5.7821, - "step": 11137 - }, - { - "epoch": 5.8086049543676666, - "grad_norm": 1.4350194931030273, - "learning_rate": 8.931155778894474e-05, - "loss": 5.5779, - "step": 11138 - }, - { - "epoch": 5.809126466753585, - "grad_norm": 1.3823164701461792, - "learning_rate": 8.93105527638191e-05, - "loss": 5.7715, - "step": 11139 - }, - { - "epoch": 5.809647979139505, - "grad_norm": 1.3920141458511353, - "learning_rate": 8.930954773869347e-05, - "loss": 5.8081, - "step": 11140 - }, - { - "epoch": 5.810169491525424, - "grad_norm": 1.4346535205841064, - "learning_rate": 8.930854271356784e-05, - "loss": 5.7096, - "step": 11141 - }, - { - "epoch": 5.810691003911343, - "grad_norm": 1.5402590036392212, - "learning_rate": 8.930753768844221e-05, - "loss": 5.6574, - "step": 11142 - }, - { - "epoch": 5.811212516297262, - "grad_norm": 1.358138084411621, - "learning_rate": 8.930653266331659e-05, - "loss": 5.6125, - "step": 11143 - }, - { - "epoch": 5.811734028683182, - "grad_norm": 1.6958190202713013, - "learning_rate": 8.930552763819095e-05, - "loss": 5.9064, - "step": 11144 - }, - { - "epoch": 5.8122555410691, - "grad_norm": 1.3471524715423584, - "learning_rate": 8.930452261306533e-05, - "loss": 5.8068, - "step": 11145 - }, - { - "epoch": 5.81277705345502, - "grad_norm": 1.4115896224975586, - "learning_rate": 8.93035175879397e-05, - "loss": 5.2993, - "step": 11146 - }, - { - "epoch": 5.813298565840939, - "grad_norm": 1.3721095323562622, - "learning_rate": 8.930251256281407e-05, - "loss": 5.7738, - "step": 11147 - }, - { - "epoch": 5.813820078226858, - "grad_norm": 1.3466501235961914, - "learning_rate": 8.930150753768845e-05, - "loss": 5.0943, - "step": 11148 - }, - { - "epoch": 5.814341590612777, - "grad_norm": 1.4687105417251587, - "learning_rate": 8.930050251256283e-05, - "loss": 5.6988, - "step": 11149 - }, - { - "epoch": 5.814863102998697, - "grad_norm": 1.5961259603500366, - "learning_rate": 8.929949748743719e-05, - "loss": 5.5994, - "step": 11150 - }, - { - "epoch": 5.815384615384615, - "grad_norm": 1.5093573331832886, - "learning_rate": 8.929849246231157e-05, - "loss": 5.7422, - "step": 11151 - }, - { - "epoch": 5.815906127770535, - "grad_norm": 1.3976436853408813, - "learning_rate": 8.929748743718593e-05, - "loss": 5.8555, - "step": 11152 - }, - { - "epoch": 5.816427640156454, - "grad_norm": 1.4196374416351318, - "learning_rate": 8.92964824120603e-05, - "loss": 5.7201, - "step": 11153 - }, - { - "epoch": 5.816949152542373, - "grad_norm": 1.4761029481887817, - "learning_rate": 8.929547738693468e-05, - "loss": 5.2788, - "step": 11154 - }, - { - "epoch": 5.817470664928292, - "grad_norm": 1.3595227003097534, - "learning_rate": 8.929447236180904e-05, - "loss": 5.6304, - "step": 11155 - }, - { - "epoch": 5.817992177314212, - "grad_norm": 1.4221817255020142, - "learning_rate": 8.929346733668342e-05, - "loss": 5.7275, - "step": 11156 - }, - { - "epoch": 5.81851368970013, - "grad_norm": 1.335982084274292, - "learning_rate": 8.92924623115578e-05, - "loss": 5.9086, - "step": 11157 - }, - { - "epoch": 5.81903520208605, - "grad_norm": 1.7649877071380615, - "learning_rate": 8.929145728643217e-05, - "loss": 5.435, - "step": 11158 - }, - { - "epoch": 5.819556714471969, - "grad_norm": 1.4133331775665283, - "learning_rate": 8.929045226130654e-05, - "loss": 5.5371, - "step": 11159 - }, - { - "epoch": 5.820078226857888, - "grad_norm": 1.4357035160064697, - "learning_rate": 8.928944723618092e-05, - "loss": 5.2902, - "step": 11160 - }, - { - "epoch": 5.820599739243807, - "grad_norm": 1.399936318397522, - "learning_rate": 8.928844221105528e-05, - "loss": 5.9164, - "step": 11161 - }, - { - "epoch": 5.821121251629727, - "grad_norm": 1.4258466958999634, - "learning_rate": 8.928743718592966e-05, - "loss": 5.5577, - "step": 11162 - }, - { - "epoch": 5.821642764015645, - "grad_norm": 1.5698837041854858, - "learning_rate": 8.928643216080402e-05, - "loss": 5.2503, - "step": 11163 - }, - { - "epoch": 5.822164276401565, - "grad_norm": 1.6133439540863037, - "learning_rate": 8.92854271356784e-05, - "loss": 5.5462, - "step": 11164 - }, - { - "epoch": 5.822685788787483, - "grad_norm": 1.7021983861923218, - "learning_rate": 8.928442211055276e-05, - "loss": 5.6466, - "step": 11165 - }, - { - "epoch": 5.823207301173403, - "grad_norm": 1.749825358390808, - "learning_rate": 8.928341708542713e-05, - "loss": 5.7529, - "step": 11166 - }, - { - "epoch": 5.823728813559322, - "grad_norm": 1.6092408895492554, - "learning_rate": 8.92824120603015e-05, - "loss": 5.4618, - "step": 11167 - }, - { - "epoch": 5.824250325945242, - "grad_norm": 1.5879006385803223, - "learning_rate": 8.928140703517588e-05, - "loss": 5.5362, - "step": 11168 - }, - { - "epoch": 5.82477183833116, - "grad_norm": 1.5245084762573242, - "learning_rate": 8.928040201005026e-05, - "loss": 5.7537, - "step": 11169 - }, - { - "epoch": 5.82529335071708, - "grad_norm": 1.5205795764923096, - "learning_rate": 8.927939698492463e-05, - "loss": 5.6498, - "step": 11170 - }, - { - "epoch": 5.825814863102998, - "grad_norm": 1.4795408248901367, - "learning_rate": 8.9278391959799e-05, - "loss": 5.6866, - "step": 11171 - }, - { - "epoch": 5.826336375488918, - "grad_norm": 1.6438924074172974, - "learning_rate": 8.927738693467337e-05, - "loss": 5.6655, - "step": 11172 - }, - { - "epoch": 5.826857887874837, - "grad_norm": 1.7088818550109863, - "learning_rate": 8.927638190954775e-05, - "loss": 5.7583, - "step": 11173 - }, - { - "epoch": 5.827379400260757, - "grad_norm": 1.390309453010559, - "learning_rate": 8.927537688442211e-05, - "loss": 5.8022, - "step": 11174 - }, - { - "epoch": 5.827900912646675, - "grad_norm": 1.4313936233520508, - "learning_rate": 8.927437185929649e-05, - "loss": 6.1077, - "step": 11175 - }, - { - "epoch": 5.828422425032595, - "grad_norm": 1.5214931964874268, - "learning_rate": 8.927336683417085e-05, - "loss": 5.5847, - "step": 11176 - }, - { - "epoch": 5.828943937418513, - "grad_norm": 1.4822449684143066, - "learning_rate": 8.927236180904523e-05, - "loss": 5.8708, - "step": 11177 - }, - { - "epoch": 5.829465449804433, - "grad_norm": 1.5202186107635498, - "learning_rate": 8.927135678391961e-05, - "loss": 5.4048, - "step": 11178 - }, - { - "epoch": 5.829986962190352, - "grad_norm": 1.358853816986084, - "learning_rate": 8.927035175879397e-05, - "loss": 5.9877, - "step": 11179 - }, - { - "epoch": 5.830508474576272, - "grad_norm": 1.3562949895858765, - "learning_rate": 8.926934673366835e-05, - "loss": 5.3845, - "step": 11180 - }, - { - "epoch": 5.83102998696219, - "grad_norm": 1.502917766571045, - "learning_rate": 8.926834170854271e-05, - "loss": 5.8421, - "step": 11181 - }, - { - "epoch": 5.83155149934811, - "grad_norm": 1.4197522401809692, - "learning_rate": 8.926733668341709e-05, - "loss": 5.5628, - "step": 11182 - }, - { - "epoch": 5.832073011734028, - "grad_norm": 1.4737796783447266, - "learning_rate": 8.926633165829146e-05, - "loss": 5.9039, - "step": 11183 - }, - { - "epoch": 5.832594524119948, - "grad_norm": 1.4002681970596313, - "learning_rate": 8.926532663316583e-05, - "loss": 5.8194, - "step": 11184 - }, - { - "epoch": 5.833116036505867, - "grad_norm": 1.3653066158294678, - "learning_rate": 8.92643216080402e-05, - "loss": 5.7555, - "step": 11185 - }, - { - "epoch": 5.833637548891787, - "grad_norm": 1.4676254987716675, - "learning_rate": 8.926331658291458e-05, - "loss": 5.9091, - "step": 11186 - }, - { - "epoch": 5.834159061277705, - "grad_norm": 1.4901639223098755, - "learning_rate": 8.926231155778894e-05, - "loss": 5.4499, - "step": 11187 - }, - { - "epoch": 5.834680573663625, - "grad_norm": 1.497543215751648, - "learning_rate": 8.926130653266332e-05, - "loss": 5.8084, - "step": 11188 - }, - { - "epoch": 5.835202086049543, - "grad_norm": 1.5010689496994019, - "learning_rate": 8.92603015075377e-05, - "loss": 5.7141, - "step": 11189 - }, - { - "epoch": 5.835723598435463, - "grad_norm": 1.4037083387374878, - "learning_rate": 8.925929648241207e-05, - "loss": 5.5224, - "step": 11190 - }, - { - "epoch": 5.836245110821382, - "grad_norm": 1.6191229820251465, - "learning_rate": 8.925829145728644e-05, - "loss": 5.7469, - "step": 11191 - }, - { - "epoch": 5.836766623207302, - "grad_norm": 1.4571068286895752, - "learning_rate": 8.925728643216082e-05, - "loss": 5.4179, - "step": 11192 - }, - { - "epoch": 5.83728813559322, - "grad_norm": 1.4861764907836914, - "learning_rate": 8.925628140703518e-05, - "loss": 5.7857, - "step": 11193 - }, - { - "epoch": 5.83780964797914, - "grad_norm": 1.3416666984558105, - "learning_rate": 8.925527638190954e-05, - "loss": 5.6452, - "step": 11194 - }, - { - "epoch": 5.838331160365058, - "grad_norm": 1.254943609237671, - "learning_rate": 8.925427135678392e-05, - "loss": 5.9883, - "step": 11195 - }, - { - "epoch": 5.838852672750978, - "grad_norm": 1.4150797128677368, - "learning_rate": 8.925326633165829e-05, - "loss": 5.7723, - "step": 11196 - }, - { - "epoch": 5.839374185136897, - "grad_norm": 1.4482008218765259, - "learning_rate": 8.925226130653266e-05, - "loss": 5.5461, - "step": 11197 - }, - { - "epoch": 5.839895697522816, - "grad_norm": 1.4165109395980835, - "learning_rate": 8.925125628140704e-05, - "loss": 5.7423, - "step": 11198 - }, - { - "epoch": 5.840417209908735, - "grad_norm": 1.3813997507095337, - "learning_rate": 8.925025125628142e-05, - "loss": 5.6766, - "step": 11199 - }, - { - "epoch": 5.840938722294655, - "grad_norm": 1.3078476190567017, - "learning_rate": 8.924924623115578e-05, - "loss": 5.8741, - "step": 11200 - }, - { - "epoch": 5.841460234680573, - "grad_norm": 1.6934423446655273, - "learning_rate": 8.924824120603016e-05, - "loss": 5.1995, - "step": 11201 - }, - { - "epoch": 5.841981747066493, - "grad_norm": 1.3250765800476074, - "learning_rate": 8.924723618090453e-05, - "loss": 6.0364, - "step": 11202 - }, - { - "epoch": 5.842503259452412, - "grad_norm": 1.3266997337341309, - "learning_rate": 8.92462311557789e-05, - "loss": 5.3317, - "step": 11203 - }, - { - "epoch": 5.843024771838331, - "grad_norm": 1.5322840213775635, - "learning_rate": 8.924522613065327e-05, - "loss": 5.8234, - "step": 11204 - }, - { - "epoch": 5.84354628422425, - "grad_norm": 1.4361432790756226, - "learning_rate": 8.924422110552765e-05, - "loss": 5.3023, - "step": 11205 - }, - { - "epoch": 5.84406779661017, - "grad_norm": 1.4494373798370361, - "learning_rate": 8.924321608040201e-05, - "loss": 5.2297, - "step": 11206 - }, - { - "epoch": 5.844589308996088, - "grad_norm": 1.3717869520187378, - "learning_rate": 8.924221105527637e-05, - "loss": 5.3078, - "step": 11207 - }, - { - "epoch": 5.845110821382008, - "grad_norm": 1.4638352394104004, - "learning_rate": 8.924120603015075e-05, - "loss": 5.7285, - "step": 11208 - }, - { - "epoch": 5.845632333767927, - "grad_norm": 1.4879920482635498, - "learning_rate": 8.924020100502513e-05, - "loss": 5.5045, - "step": 11209 - }, - { - "epoch": 5.846153846153846, - "grad_norm": 1.4486687183380127, - "learning_rate": 8.923919597989951e-05, - "loss": 5.7852, - "step": 11210 - }, - { - "epoch": 5.846675358539765, - "grad_norm": 1.638842225074768, - "learning_rate": 8.923819095477387e-05, - "loss": 5.5267, - "step": 11211 - }, - { - "epoch": 5.847196870925685, - "grad_norm": 1.6264042854309082, - "learning_rate": 8.923718592964825e-05, - "loss": 5.643, - "step": 11212 - }, - { - "epoch": 5.847718383311603, - "grad_norm": 1.5895634889602661, - "learning_rate": 8.923618090452261e-05, - "loss": 5.665, - "step": 11213 - }, - { - "epoch": 5.848239895697523, - "grad_norm": 1.5642768144607544, - "learning_rate": 8.923517587939699e-05, - "loss": 5.2479, - "step": 11214 - }, - { - "epoch": 5.848761408083442, - "grad_norm": 1.454853892326355, - "learning_rate": 8.923417085427136e-05, - "loss": 5.3818, - "step": 11215 - }, - { - "epoch": 5.849282920469361, - "grad_norm": 1.4737509489059448, - "learning_rate": 8.923316582914573e-05, - "loss": 5.5742, - "step": 11216 - }, - { - "epoch": 5.84980443285528, - "grad_norm": 1.7577749490737915, - "learning_rate": 8.92321608040201e-05, - "loss": 5.4552, - "step": 11217 - }, - { - "epoch": 5.8503259452412, - "grad_norm": 1.448093056678772, - "learning_rate": 8.923115577889448e-05, - "loss": 5.3674, - "step": 11218 - }, - { - "epoch": 5.850847457627118, - "grad_norm": 1.6210650205612183, - "learning_rate": 8.923015075376885e-05, - "loss": 5.4763, - "step": 11219 - }, - { - "epoch": 5.851368970013038, - "grad_norm": 1.369286298751831, - "learning_rate": 8.922914572864322e-05, - "loss": 5.8898, - "step": 11220 - }, - { - "epoch": 5.851890482398957, - "grad_norm": 1.4452582597732544, - "learning_rate": 8.92281407035176e-05, - "loss": 5.7388, - "step": 11221 - }, - { - "epoch": 5.852411994784876, - "grad_norm": 1.384647250175476, - "learning_rate": 8.922713567839196e-05, - "loss": 5.7958, - "step": 11222 - }, - { - "epoch": 5.852933507170795, - "grad_norm": 1.3613686561584473, - "learning_rate": 8.922613065326634e-05, - "loss": 5.9357, - "step": 11223 - }, - { - "epoch": 5.853455019556715, - "grad_norm": 1.3587231636047363, - "learning_rate": 8.92251256281407e-05, - "loss": 6.0704, - "step": 11224 - }, - { - "epoch": 5.853976531942633, - "grad_norm": 1.5404865741729736, - "learning_rate": 8.922412060301508e-05, - "loss": 5.6551, - "step": 11225 - }, - { - "epoch": 5.854498044328553, - "grad_norm": 1.4239871501922607, - "learning_rate": 8.922311557788945e-05, - "loss": 5.2827, - "step": 11226 - }, - { - "epoch": 5.855019556714472, - "grad_norm": 1.525867223739624, - "learning_rate": 8.922211055276382e-05, - "loss": 5.7399, - "step": 11227 - }, - { - "epoch": 5.855541069100391, - "grad_norm": 1.5702109336853027, - "learning_rate": 8.922110552763819e-05, - "loss": 5.0989, - "step": 11228 - }, - { - "epoch": 5.85606258148631, - "grad_norm": 1.4979186058044434, - "learning_rate": 8.922010050251257e-05, - "loss": 5.8947, - "step": 11229 - }, - { - "epoch": 5.85658409387223, - "grad_norm": 1.5616304874420166, - "learning_rate": 8.921909547738694e-05, - "loss": 5.3075, - "step": 11230 - }, - { - "epoch": 5.857105606258148, - "grad_norm": 1.3542653322219849, - "learning_rate": 8.921809045226132e-05, - "loss": 5.8263, - "step": 11231 - }, - { - "epoch": 5.857627118644068, - "grad_norm": 1.4963207244873047, - "learning_rate": 8.921708542713569e-05, - "loss": 5.6241, - "step": 11232 - }, - { - "epoch": 5.858148631029987, - "grad_norm": 1.391697883605957, - "learning_rate": 8.921608040201005e-05, - "loss": 5.8542, - "step": 11233 - }, - { - "epoch": 5.858670143415906, - "grad_norm": 1.4556691646575928, - "learning_rate": 8.921507537688443e-05, - "loss": 5.5906, - "step": 11234 - }, - { - "epoch": 5.859191655801825, - "grad_norm": 1.3006312847137451, - "learning_rate": 8.921407035175879e-05, - "loss": 5.8226, - "step": 11235 - }, - { - "epoch": 5.859713168187745, - "grad_norm": 1.2992162704467773, - "learning_rate": 8.921306532663317e-05, - "loss": 4.2548, - "step": 11236 - }, - { - "epoch": 5.860234680573663, - "grad_norm": 1.440169095993042, - "learning_rate": 8.921206030150753e-05, - "loss": 5.7552, - "step": 11237 - }, - { - "epoch": 5.860756192959583, - "grad_norm": 1.4144957065582275, - "learning_rate": 8.921105527638191e-05, - "loss": 5.8141, - "step": 11238 - }, - { - "epoch": 5.861277705345502, - "grad_norm": 1.377417802810669, - "learning_rate": 8.921005025125629e-05, - "loss": 5.5762, - "step": 11239 - }, - { - "epoch": 5.861799217731421, - "grad_norm": 1.4270271062850952, - "learning_rate": 8.920904522613067e-05, - "loss": 5.6306, - "step": 11240 - }, - { - "epoch": 5.86232073011734, - "grad_norm": 1.423186182975769, - "learning_rate": 8.920804020100503e-05, - "loss": 5.8154, - "step": 11241 - }, - { - "epoch": 5.86284224250326, - "grad_norm": 1.482991337776184, - "learning_rate": 8.920703517587941e-05, - "loss": 5.5338, - "step": 11242 - }, - { - "epoch": 5.863363754889178, - "grad_norm": 1.467436671257019, - "learning_rate": 8.920603015075377e-05, - "loss": 5.561, - "step": 11243 - }, - { - "epoch": 5.863885267275098, - "grad_norm": 1.4468964338302612, - "learning_rate": 8.920502512562815e-05, - "loss": 5.1277, - "step": 11244 - }, - { - "epoch": 5.864406779661017, - "grad_norm": 1.5056766271591187, - "learning_rate": 8.920402010050252e-05, - "loss": 5.5974, - "step": 11245 - }, - { - "epoch": 5.864928292046936, - "grad_norm": 1.5367873907089233, - "learning_rate": 8.920301507537688e-05, - "loss": 5.5052, - "step": 11246 - }, - { - "epoch": 5.865449804432855, - "grad_norm": 1.5522658824920654, - "learning_rate": 8.920201005025126e-05, - "loss": 5.4104, - "step": 11247 - }, - { - "epoch": 5.865971316818775, - "grad_norm": 1.349218487739563, - "learning_rate": 8.920100502512562e-05, - "loss": 5.9719, - "step": 11248 - }, - { - "epoch": 5.866492829204693, - "grad_norm": 1.5161595344543457, - "learning_rate": 8.92e-05, - "loss": 5.4518, - "step": 11249 - }, - { - "epoch": 5.867014341590613, - "grad_norm": 1.3313595056533813, - "learning_rate": 8.919899497487438e-05, - "loss": 5.4305, - "step": 11250 - }, - { - "epoch": 5.867535853976532, - "grad_norm": 1.5281251668930054, - "learning_rate": 8.919798994974876e-05, - "loss": 4.5744, - "step": 11251 - }, - { - "epoch": 5.868057366362451, - "grad_norm": 1.6386549472808838, - "learning_rate": 8.919698492462312e-05, - "loss": 5.1994, - "step": 11252 - }, - { - "epoch": 5.86857887874837, - "grad_norm": 1.4716168642044067, - "learning_rate": 8.91959798994975e-05, - "loss": 5.5595, - "step": 11253 - }, - { - "epoch": 5.869100391134289, - "grad_norm": 1.388664722442627, - "learning_rate": 8.919497487437186e-05, - "loss": 5.6521, - "step": 11254 - }, - { - "epoch": 5.869621903520208, - "grad_norm": 1.4519238471984863, - "learning_rate": 8.919396984924624e-05, - "loss": 5.3899, - "step": 11255 - }, - { - "epoch": 5.870143415906128, - "grad_norm": 1.4538769721984863, - "learning_rate": 8.91929648241206e-05, - "loss": 5.6714, - "step": 11256 - }, - { - "epoch": 5.870664928292047, - "grad_norm": 1.553675889968872, - "learning_rate": 8.919195979899498e-05, - "loss": 5.2295, - "step": 11257 - }, - { - "epoch": 5.871186440677966, - "grad_norm": 1.4229531288146973, - "learning_rate": 8.919095477386935e-05, - "loss": 5.37, - "step": 11258 - }, - { - "epoch": 5.871707953063885, - "grad_norm": 1.385155200958252, - "learning_rate": 8.918994974874372e-05, - "loss": 5.898, - "step": 11259 - }, - { - "epoch": 5.872229465449804, - "grad_norm": 1.3691452741622925, - "learning_rate": 8.91889447236181e-05, - "loss": 5.5142, - "step": 11260 - }, - { - "epoch": 5.872750977835723, - "grad_norm": 1.5023103952407837, - "learning_rate": 8.918793969849247e-05, - "loss": 5.5344, - "step": 11261 - }, - { - "epoch": 5.873272490221643, - "grad_norm": 1.3645367622375488, - "learning_rate": 8.918693467336684e-05, - "loss": 5.6034, - "step": 11262 - }, - { - "epoch": 5.873794002607562, - "grad_norm": 1.3820292949676514, - "learning_rate": 8.918592964824121e-05, - "loss": 5.9078, - "step": 11263 - }, - { - "epoch": 5.874315514993481, - "grad_norm": 1.4005110263824463, - "learning_rate": 8.918492462311559e-05, - "loss": 5.4086, - "step": 11264 - }, - { - "epoch": 5.8748370273794, - "grad_norm": 1.4512995481491089, - "learning_rate": 8.918391959798995e-05, - "loss": 5.5766, - "step": 11265 - }, - { - "epoch": 5.875358539765319, - "grad_norm": 1.5693503618240356, - "learning_rate": 8.918291457286433e-05, - "loss": 5.857, - "step": 11266 - }, - { - "epoch": 5.875880052151238, - "grad_norm": 1.3535808324813843, - "learning_rate": 8.918190954773869e-05, - "loss": 5.8553, - "step": 11267 - }, - { - "epoch": 5.876401564537158, - "grad_norm": 1.4316158294677734, - "learning_rate": 8.918090452261307e-05, - "loss": 5.763, - "step": 11268 - }, - { - "epoch": 5.876923076923077, - "grad_norm": 1.4741361141204834, - "learning_rate": 8.917989949748743e-05, - "loss": 5.3904, - "step": 11269 - }, - { - "epoch": 5.877444589308996, - "grad_norm": 1.6620171070098877, - "learning_rate": 8.917889447236181e-05, - "loss": 5.3642, - "step": 11270 - }, - { - "epoch": 5.877966101694915, - "grad_norm": 1.3702480792999268, - "learning_rate": 8.917788944723619e-05, - "loss": 5.7901, - "step": 11271 - }, - { - "epoch": 5.878487614080834, - "grad_norm": 1.4051774740219116, - "learning_rate": 8.917688442211055e-05, - "loss": 5.7447, - "step": 11272 - }, - { - "epoch": 5.879009126466753, - "grad_norm": 1.4597913026809692, - "learning_rate": 8.917587939698493e-05, - "loss": 5.5276, - "step": 11273 - }, - { - "epoch": 5.879530638852673, - "grad_norm": 1.403415322303772, - "learning_rate": 8.91748743718593e-05, - "loss": 5.4252, - "step": 11274 - }, - { - "epoch": 5.880052151238592, - "grad_norm": 1.4147672653198242, - "learning_rate": 8.917386934673367e-05, - "loss": 5.6639, - "step": 11275 - }, - { - "epoch": 5.880573663624511, - "grad_norm": 1.2771334648132324, - "learning_rate": 8.917286432160804e-05, - "loss": 6.0458, - "step": 11276 - }, - { - "epoch": 5.88109517601043, - "grad_norm": 1.4177157878875732, - "learning_rate": 8.917185929648242e-05, - "loss": 6.0046, - "step": 11277 - }, - { - "epoch": 5.881616688396349, - "grad_norm": 1.548216700553894, - "learning_rate": 8.917085427135678e-05, - "loss": 5.2175, - "step": 11278 - }, - { - "epoch": 5.8821382007822685, - "grad_norm": 1.3531478643417358, - "learning_rate": 8.916984924623116e-05, - "loss": 5.7483, - "step": 11279 - }, - { - "epoch": 5.882659713168188, - "grad_norm": 1.3845802545547485, - "learning_rate": 8.916884422110554e-05, - "loss": 5.6746, - "step": 11280 - }, - { - "epoch": 5.883181225554107, - "grad_norm": 1.51714026927948, - "learning_rate": 8.916783919597991e-05, - "loss": 5.4076, - "step": 11281 - }, - { - "epoch": 5.883702737940026, - "grad_norm": 1.4058209657669067, - "learning_rate": 8.916683417085428e-05, - "loss": 5.9292, - "step": 11282 - }, - { - "epoch": 5.884224250325945, - "grad_norm": 1.4360978603363037, - "learning_rate": 8.916582914572866e-05, - "loss": 5.7357, - "step": 11283 - }, - { - "epoch": 5.884745762711864, - "grad_norm": 1.4907541275024414, - "learning_rate": 8.916482412060302e-05, - "loss": 5.8058, - "step": 11284 - }, - { - "epoch": 5.8852672750977835, - "grad_norm": 1.4319883584976196, - "learning_rate": 8.91638190954774e-05, - "loss": 5.9572, - "step": 11285 - }, - { - "epoch": 5.885788787483703, - "grad_norm": 1.5984867811203003, - "learning_rate": 8.916281407035176e-05, - "loss": 5.3557, - "step": 11286 - }, - { - "epoch": 5.886310299869622, - "grad_norm": 1.3156123161315918, - "learning_rate": 8.916180904522613e-05, - "loss": 5.4029, - "step": 11287 - }, - { - "epoch": 5.886831812255541, - "grad_norm": 1.5979077816009521, - "learning_rate": 8.91608040201005e-05, - "loss": 5.324, - "step": 11288 - }, - { - "epoch": 5.88735332464146, - "grad_norm": 1.466156005859375, - "learning_rate": 8.915979899497487e-05, - "loss": 5.6915, - "step": 11289 - }, - { - "epoch": 5.887874837027379, - "grad_norm": 1.3098756074905396, - "learning_rate": 8.915879396984925e-05, - "loss": 4.8949, - "step": 11290 - }, - { - "epoch": 5.8883963494132985, - "grad_norm": 1.3836251497268677, - "learning_rate": 8.915778894472362e-05, - "loss": 5.6736, - "step": 11291 - }, - { - "epoch": 5.888917861799218, - "grad_norm": 1.4004181623458862, - "learning_rate": 8.9156783919598e-05, - "loss": 5.2314, - "step": 11292 - }, - { - "epoch": 5.8894393741851365, - "grad_norm": 1.5102849006652832, - "learning_rate": 8.915577889447237e-05, - "loss": 5.551, - "step": 11293 - }, - { - "epoch": 5.889960886571056, - "grad_norm": 1.3285257816314697, - "learning_rate": 8.915477386934674e-05, - "loss": 5.7873, - "step": 11294 - }, - { - "epoch": 5.8904823989569755, - "grad_norm": 1.5043864250183105, - "learning_rate": 8.915376884422111e-05, - "loss": 5.5391, - "step": 11295 - }, - { - "epoch": 5.891003911342894, - "grad_norm": 1.4117249250411987, - "learning_rate": 8.915276381909549e-05, - "loss": 5.5583, - "step": 11296 - }, - { - "epoch": 5.8915254237288135, - "grad_norm": 1.5790517330169678, - "learning_rate": 8.915175879396985e-05, - "loss": 5.3185, - "step": 11297 - }, - { - "epoch": 5.892046936114733, - "grad_norm": 1.515542984008789, - "learning_rate": 8.915075376884423e-05, - "loss": 5.4032, - "step": 11298 - }, - { - "epoch": 5.8925684485006515, - "grad_norm": 1.3816111087799072, - "learning_rate": 8.914974874371859e-05, - "loss": 5.5197, - "step": 11299 - }, - { - "epoch": 5.893089960886571, - "grad_norm": 1.368752360343933, - "learning_rate": 8.914874371859297e-05, - "loss": 5.797, - "step": 11300 - }, - { - "epoch": 5.8936114732724905, - "grad_norm": 1.4097065925598145, - "learning_rate": 8.914773869346735e-05, - "loss": 5.2533, - "step": 11301 - }, - { - "epoch": 5.894132985658409, - "grad_norm": 1.377744197845459, - "learning_rate": 8.914673366834171e-05, - "loss": 5.6341, - "step": 11302 - }, - { - "epoch": 5.8946544980443285, - "grad_norm": 1.3712315559387207, - "learning_rate": 8.914572864321609e-05, - "loss": 5.629, - "step": 11303 - }, - { - "epoch": 5.895176010430248, - "grad_norm": 1.3958592414855957, - "learning_rate": 8.914472361809046e-05, - "loss": 5.5864, - "step": 11304 - }, - { - "epoch": 5.8956975228161665, - "grad_norm": 1.4758470058441162, - "learning_rate": 8.914371859296483e-05, - "loss": 5.3796, - "step": 11305 - }, - { - "epoch": 5.896219035202086, - "grad_norm": 1.36129891872406, - "learning_rate": 8.91427135678392e-05, - "loss": 5.9166, - "step": 11306 - }, - { - "epoch": 5.8967405475880055, - "grad_norm": 1.490878939628601, - "learning_rate": 8.914170854271357e-05, - "loss": 5.418, - "step": 11307 - }, - { - "epoch": 5.897262059973924, - "grad_norm": 1.5105335712432861, - "learning_rate": 8.914070351758794e-05, - "loss": 5.5746, - "step": 11308 - }, - { - "epoch": 5.8977835723598435, - "grad_norm": 1.7786872386932373, - "learning_rate": 8.913969849246232e-05, - "loss": 5.7376, - "step": 11309 - }, - { - "epoch": 5.898305084745763, - "grad_norm": 1.5075725317001343, - "learning_rate": 8.913869346733668e-05, - "loss": 5.5121, - "step": 11310 - }, - { - "epoch": 5.898826597131682, - "grad_norm": 1.4755496978759766, - "learning_rate": 8.913768844221106e-05, - "loss": 5.7465, - "step": 11311 - }, - { - "epoch": 5.899348109517601, - "grad_norm": 1.4680331945419312, - "learning_rate": 8.913668341708544e-05, - "loss": 5.707, - "step": 11312 - }, - { - "epoch": 5.8998696219035205, - "grad_norm": 1.4212799072265625, - "learning_rate": 8.91356783919598e-05, - "loss": 5.7232, - "step": 11313 - }, - { - "epoch": 5.900391134289439, - "grad_norm": 1.486430048942566, - "learning_rate": 8.913467336683418e-05, - "loss": 5.2711, - "step": 11314 - }, - { - "epoch": 5.9009126466753585, - "grad_norm": 1.5364068746566772, - "learning_rate": 8.913366834170854e-05, - "loss": 5.7329, - "step": 11315 - }, - { - "epoch": 5.901434159061278, - "grad_norm": 1.506272554397583, - "learning_rate": 8.913266331658292e-05, - "loss": 5.5568, - "step": 11316 - }, - { - "epoch": 5.901955671447197, - "grad_norm": 1.6023550033569336, - "learning_rate": 8.913165829145729e-05, - "loss": 5.5899, - "step": 11317 - }, - { - "epoch": 5.902477183833116, - "grad_norm": 1.4245072603225708, - "learning_rate": 8.913065326633166e-05, - "loss": 5.377, - "step": 11318 - }, - { - "epoch": 5.9029986962190355, - "grad_norm": 1.4291298389434814, - "learning_rate": 8.912964824120603e-05, - "loss": 5.7934, - "step": 11319 - }, - { - "epoch": 5.903520208604954, - "grad_norm": 1.518874168395996, - "learning_rate": 8.91286432160804e-05, - "loss": 5.4947, - "step": 11320 - }, - { - "epoch": 5.9040417209908735, - "grad_norm": 1.404563546180725, - "learning_rate": 8.912763819095477e-05, - "loss": 5.5883, - "step": 11321 - }, - { - "epoch": 5.904563233376793, - "grad_norm": 1.5758540630340576, - "learning_rate": 8.912663316582915e-05, - "loss": 5.316, - "step": 11322 - }, - { - "epoch": 5.905084745762712, - "grad_norm": 1.3868333101272583, - "learning_rate": 8.912562814070353e-05, - "loss": 5.6828, - "step": 11323 - }, - { - "epoch": 5.905606258148631, - "grad_norm": 1.342061996459961, - "learning_rate": 8.91246231155779e-05, - "loss": 5.4567, - "step": 11324 - }, - { - "epoch": 5.9061277705345505, - "grad_norm": 1.5016229152679443, - "learning_rate": 8.912361809045227e-05, - "loss": 5.226, - "step": 11325 - }, - { - "epoch": 5.906649282920469, - "grad_norm": 1.6057603359222412, - "learning_rate": 8.912261306532663e-05, - "loss": 5.4608, - "step": 11326 - }, - { - "epoch": 5.9071707953063886, - "grad_norm": 1.3618230819702148, - "learning_rate": 8.912160804020101e-05, - "loss": 5.5455, - "step": 11327 - }, - { - "epoch": 5.907692307692308, - "grad_norm": 1.423213005065918, - "learning_rate": 8.912060301507537e-05, - "loss": 5.6373, - "step": 11328 - }, - { - "epoch": 5.908213820078227, - "grad_norm": 1.6753309965133667, - "learning_rate": 8.911959798994975e-05, - "loss": 5.2058, - "step": 11329 - }, - { - "epoch": 5.908735332464146, - "grad_norm": 1.3807997703552246, - "learning_rate": 8.911859296482412e-05, - "loss": 5.8074, - "step": 11330 - }, - { - "epoch": 5.9092568448500655, - "grad_norm": 1.4062408208847046, - "learning_rate": 8.91175879396985e-05, - "loss": 5.9126, - "step": 11331 - }, - { - "epoch": 5.909778357235984, - "grad_norm": 1.4261136054992676, - "learning_rate": 8.911658291457287e-05, - "loss": 5.7805, - "step": 11332 - }, - { - "epoch": 5.910299869621904, - "grad_norm": 1.5612930059432983, - "learning_rate": 8.911557788944725e-05, - "loss": 5.5183, - "step": 11333 - }, - { - "epoch": 5.910821382007823, - "grad_norm": 1.4853062629699707, - "learning_rate": 8.911457286432161e-05, - "loss": 5.7412, - "step": 11334 - }, - { - "epoch": 5.911342894393742, - "grad_norm": 1.520404577255249, - "learning_rate": 8.911356783919599e-05, - "loss": 5.9055, - "step": 11335 - }, - { - "epoch": 5.911864406779661, - "grad_norm": 1.4446961879730225, - "learning_rate": 8.911256281407036e-05, - "loss": 5.5019, - "step": 11336 - }, - { - "epoch": 5.9123859191655805, - "grad_norm": 1.4221844673156738, - "learning_rate": 8.911155778894473e-05, - "loss": 5.4092, - "step": 11337 - }, - { - "epoch": 5.912907431551499, - "grad_norm": 1.3140445947647095, - "learning_rate": 8.91105527638191e-05, - "loss": 5.9498, - "step": 11338 - }, - { - "epoch": 5.913428943937419, - "grad_norm": 1.4865093231201172, - "learning_rate": 8.910954773869346e-05, - "loss": 5.9765, - "step": 11339 - }, - { - "epoch": 5.913950456323338, - "grad_norm": 1.4483637809753418, - "learning_rate": 8.910854271356784e-05, - "loss": 5.9316, - "step": 11340 - }, - { - "epoch": 5.914471968709257, - "grad_norm": 1.4083068370819092, - "learning_rate": 8.91075376884422e-05, - "loss": 5.9515, - "step": 11341 - }, - { - "epoch": 5.914993481095176, - "grad_norm": 1.5300514698028564, - "learning_rate": 8.910653266331658e-05, - "loss": 5.5368, - "step": 11342 - }, - { - "epoch": 5.9155149934810956, - "grad_norm": 1.472047209739685, - "learning_rate": 8.910552763819096e-05, - "loss": 5.6447, - "step": 11343 - }, - { - "epoch": 5.916036505867014, - "grad_norm": 1.4264626502990723, - "learning_rate": 8.910452261306534e-05, - "loss": 5.7227, - "step": 11344 - }, - { - "epoch": 5.916558018252934, - "grad_norm": 1.372085690498352, - "learning_rate": 8.91035175879397e-05, - "loss": 5.4932, - "step": 11345 - }, - { - "epoch": 5.917079530638853, - "grad_norm": 1.514417052268982, - "learning_rate": 8.910251256281408e-05, - "loss": 5.5076, - "step": 11346 - }, - { - "epoch": 5.917601043024772, - "grad_norm": 1.5213464498519897, - "learning_rate": 8.910150753768844e-05, - "loss": 5.4914, - "step": 11347 - }, - { - "epoch": 5.918122555410691, - "grad_norm": 1.5110071897506714, - "learning_rate": 8.910050251256282e-05, - "loss": 5.8044, - "step": 11348 - }, - { - "epoch": 5.91864406779661, - "grad_norm": 1.4245574474334717, - "learning_rate": 8.909949748743719e-05, - "loss": 5.507, - "step": 11349 - }, - { - "epoch": 5.919165580182529, - "grad_norm": 1.375240445137024, - "learning_rate": 8.909849246231156e-05, - "loss": 5.8026, - "step": 11350 - }, - { - "epoch": 5.919687092568449, - "grad_norm": 1.4586305618286133, - "learning_rate": 8.909748743718593e-05, - "loss": 5.9376, - "step": 11351 - }, - { - "epoch": 5.920208604954368, - "grad_norm": 1.4838114976882935, - "learning_rate": 8.90964824120603e-05, - "loss": 5.8598, - "step": 11352 - }, - { - "epoch": 5.920730117340287, - "grad_norm": 1.3929152488708496, - "learning_rate": 8.909547738693468e-05, - "loss": 5.7718, - "step": 11353 - }, - { - "epoch": 5.921251629726206, - "grad_norm": 1.4700287580490112, - "learning_rate": 8.909447236180905e-05, - "loss": 5.7076, - "step": 11354 - }, - { - "epoch": 5.921773142112125, - "grad_norm": 1.4567495584487915, - "learning_rate": 8.909346733668343e-05, - "loss": 5.7887, - "step": 11355 - }, - { - "epoch": 5.922294654498044, - "grad_norm": 1.4027135372161865, - "learning_rate": 8.909246231155779e-05, - "loss": 5.7896, - "step": 11356 - }, - { - "epoch": 5.922816166883964, - "grad_norm": 1.3650051355361938, - "learning_rate": 8.909145728643217e-05, - "loss": 5.7979, - "step": 11357 - }, - { - "epoch": 5.923337679269883, - "grad_norm": 1.4105373620986938, - "learning_rate": 8.909045226130653e-05, - "loss": 5.5241, - "step": 11358 - }, - { - "epoch": 5.923859191655802, - "grad_norm": 1.3619812726974487, - "learning_rate": 8.908944723618091e-05, - "loss": 5.6616, - "step": 11359 - }, - { - "epoch": 5.924380704041721, - "grad_norm": 1.4172947406768799, - "learning_rate": 8.908844221105527e-05, - "loss": 5.9015, - "step": 11360 - }, - { - "epoch": 5.92490221642764, - "grad_norm": 1.6320081949234009, - "learning_rate": 8.908743718592965e-05, - "loss": 4.2789, - "step": 11361 - }, - { - "epoch": 5.925423728813559, - "grad_norm": 1.3815693855285645, - "learning_rate": 8.908643216080402e-05, - "loss": 5.9405, - "step": 11362 - }, - { - "epoch": 5.925945241199479, - "grad_norm": 1.435364007949829, - "learning_rate": 8.90854271356784e-05, - "loss": 5.6102, - "step": 11363 - }, - { - "epoch": 5.926466753585398, - "grad_norm": 1.3903636932373047, - "learning_rate": 8.908442211055277e-05, - "loss": 5.8981, - "step": 11364 - }, - { - "epoch": 5.926988265971317, - "grad_norm": 1.427901029586792, - "learning_rate": 8.908341708542715e-05, - "loss": 5.6857, - "step": 11365 - }, - { - "epoch": 5.927509778357236, - "grad_norm": 1.4351145029067993, - "learning_rate": 8.908241206030151e-05, - "loss": 5.8725, - "step": 11366 - }, - { - "epoch": 5.928031290743155, - "grad_norm": 1.4889357089996338, - "learning_rate": 8.908140703517588e-05, - "loss": 5.5546, - "step": 11367 - }, - { - "epoch": 5.928552803129074, - "grad_norm": 1.5373586416244507, - "learning_rate": 8.908040201005026e-05, - "loss": 5.6925, - "step": 11368 - }, - { - "epoch": 5.929074315514994, - "grad_norm": 1.474624752998352, - "learning_rate": 8.907939698492462e-05, - "loss": 5.5231, - "step": 11369 - }, - { - "epoch": 5.929595827900913, - "grad_norm": 1.4162204265594482, - "learning_rate": 8.9078391959799e-05, - "loss": 5.8624, - "step": 11370 - }, - { - "epoch": 5.930117340286832, - "grad_norm": 1.4377985000610352, - "learning_rate": 8.907738693467336e-05, - "loss": 5.496, - "step": 11371 - }, - { - "epoch": 5.930638852672751, - "grad_norm": 1.349130630493164, - "learning_rate": 8.907638190954774e-05, - "loss": 6.0411, - "step": 11372 - }, - { - "epoch": 5.93116036505867, - "grad_norm": 1.4302442073822021, - "learning_rate": 8.907537688442212e-05, - "loss": 5.5336, - "step": 11373 - }, - { - "epoch": 5.931681877444589, - "grad_norm": 1.4982818365097046, - "learning_rate": 8.90743718592965e-05, - "loss": 5.6895, - "step": 11374 - }, - { - "epoch": 5.932203389830509, - "grad_norm": 1.4246960878372192, - "learning_rate": 8.907336683417086e-05, - "loss": 5.567, - "step": 11375 - }, - { - "epoch": 5.932724902216428, - "grad_norm": 1.3444892168045044, - "learning_rate": 8.907236180904524e-05, - "loss": 5.4424, - "step": 11376 - }, - { - "epoch": 5.933246414602347, - "grad_norm": 1.3398813009262085, - "learning_rate": 8.90713567839196e-05, - "loss": 6.1753, - "step": 11377 - }, - { - "epoch": 5.933767926988266, - "grad_norm": 1.5791714191436768, - "learning_rate": 8.907035175879398e-05, - "loss": 5.5122, - "step": 11378 - }, - { - "epoch": 5.934289439374185, - "grad_norm": 1.46021568775177, - "learning_rate": 8.906934673366834e-05, - "loss": 5.779, - "step": 11379 - }, - { - "epoch": 5.934810951760104, - "grad_norm": 1.4690427780151367, - "learning_rate": 8.906834170854271e-05, - "loss": 5.5667, - "step": 11380 - }, - { - "epoch": 5.935332464146024, - "grad_norm": 1.5943454504013062, - "learning_rate": 8.906733668341709e-05, - "loss": 5.5241, - "step": 11381 - }, - { - "epoch": 5.935853976531943, - "grad_norm": 1.4608924388885498, - "learning_rate": 8.906633165829145e-05, - "loss": 6.0709, - "step": 11382 - }, - { - "epoch": 5.936375488917862, - "grad_norm": 1.455690860748291, - "learning_rate": 8.906532663316583e-05, - "loss": 5.777, - "step": 11383 - }, - { - "epoch": 5.936897001303781, - "grad_norm": 1.5414828062057495, - "learning_rate": 8.906432160804021e-05, - "loss": 5.2531, - "step": 11384 - }, - { - "epoch": 5.9374185136897, - "grad_norm": 1.4874972105026245, - "learning_rate": 8.906331658291458e-05, - "loss": 5.3322, - "step": 11385 - }, - { - "epoch": 5.937940026075619, - "grad_norm": 1.8665611743927002, - "learning_rate": 8.906231155778895e-05, - "loss": 5.5093, - "step": 11386 - }, - { - "epoch": 5.938461538461539, - "grad_norm": 1.4702199697494507, - "learning_rate": 8.906130653266333e-05, - "loss": 5.055, - "step": 11387 - }, - { - "epoch": 5.938983050847457, - "grad_norm": 1.4513535499572754, - "learning_rate": 8.906030150753769e-05, - "loss": 5.7584, - "step": 11388 - }, - { - "epoch": 5.939504563233377, - "grad_norm": 1.4360400438308716, - "learning_rate": 8.905929648241207e-05, - "loss": 5.6066, - "step": 11389 - }, - { - "epoch": 5.940026075619296, - "grad_norm": 1.4236586093902588, - "learning_rate": 8.905829145728643e-05, - "loss": 5.6627, - "step": 11390 - }, - { - "epoch": 5.940547588005215, - "grad_norm": 1.449127435684204, - "learning_rate": 8.905728643216081e-05, - "loss": 5.5126, - "step": 11391 - }, - { - "epoch": 5.941069100391134, - "grad_norm": 1.6454875469207764, - "learning_rate": 8.905628140703518e-05, - "loss": 5.342, - "step": 11392 - }, - { - "epoch": 5.941590612777054, - "grad_norm": 1.440747618675232, - "learning_rate": 8.905527638190955e-05, - "loss": 6.0258, - "step": 11393 - }, - { - "epoch": 5.942112125162972, - "grad_norm": 1.4467829465866089, - "learning_rate": 8.905427135678393e-05, - "loss": 5.8607, - "step": 11394 - }, - { - "epoch": 5.942633637548892, - "grad_norm": 1.3585546016693115, - "learning_rate": 8.90532663316583e-05, - "loss": 5.9992, - "step": 11395 - }, - { - "epoch": 5.943155149934811, - "grad_norm": 1.42280912399292, - "learning_rate": 8.905226130653267e-05, - "loss": 5.7084, - "step": 11396 - }, - { - "epoch": 5.94367666232073, - "grad_norm": 1.3828580379486084, - "learning_rate": 8.905125628140704e-05, - "loss": 5.6464, - "step": 11397 - }, - { - "epoch": 5.944198174706649, - "grad_norm": 1.433448076248169, - "learning_rate": 8.905025125628142e-05, - "loss": 5.6937, - "step": 11398 - }, - { - "epoch": 5.944719687092569, - "grad_norm": 1.3970472812652588, - "learning_rate": 8.904924623115578e-05, - "loss": 5.6277, - "step": 11399 - }, - { - "epoch": 5.945241199478487, - "grad_norm": 1.5787118673324585, - "learning_rate": 8.904824120603016e-05, - "loss": 5.1386, - "step": 11400 - }, - { - "epoch": 5.945762711864407, - "grad_norm": 1.4131290912628174, - "learning_rate": 8.904723618090452e-05, - "loss": 5.4191, - "step": 11401 - }, - { - "epoch": 5.946284224250326, - "grad_norm": 1.4641848802566528, - "learning_rate": 8.90462311557789e-05, - "loss": 5.3758, - "step": 11402 - }, - { - "epoch": 5.946805736636245, - "grad_norm": 1.3387724161148071, - "learning_rate": 8.904522613065326e-05, - "loss": 5.0605, - "step": 11403 - }, - { - "epoch": 5.947327249022164, - "grad_norm": 1.5031414031982422, - "learning_rate": 8.904422110552764e-05, - "loss": 5.5768, - "step": 11404 - }, - { - "epoch": 5.947848761408084, - "grad_norm": 1.4257253408432007, - "learning_rate": 8.904321608040202e-05, - "loss": 5.3292, - "step": 11405 - }, - { - "epoch": 5.948370273794002, - "grad_norm": 1.5816388130187988, - "learning_rate": 8.904221105527638e-05, - "loss": 5.7983, - "step": 11406 - }, - { - "epoch": 5.948891786179922, - "grad_norm": 1.448049783706665, - "learning_rate": 8.904120603015076e-05, - "loss": 5.1479, - "step": 11407 - }, - { - "epoch": 5.949413298565841, - "grad_norm": 1.49215567111969, - "learning_rate": 8.904020100502513e-05, - "loss": 5.5868, - "step": 11408 - }, - { - "epoch": 5.94993481095176, - "grad_norm": 1.53244948387146, - "learning_rate": 8.90391959798995e-05, - "loss": 6.0492, - "step": 11409 - }, - { - "epoch": 5.950456323337679, - "grad_norm": 1.6240018606185913, - "learning_rate": 8.903819095477387e-05, - "loss": 5.2907, - "step": 11410 - }, - { - "epoch": 5.950977835723599, - "grad_norm": 1.397289752960205, - "learning_rate": 8.903718592964825e-05, - "loss": 5.5432, - "step": 11411 - }, - { - "epoch": 5.951499348109517, - "grad_norm": 1.4843603372573853, - "learning_rate": 8.903618090452261e-05, - "loss": 5.1018, - "step": 11412 - }, - { - "epoch": 5.952020860495437, - "grad_norm": 1.651544213294983, - "learning_rate": 8.903517587939699e-05, - "loss": 5.0194, - "step": 11413 - }, - { - "epoch": 5.952542372881356, - "grad_norm": 1.4784749746322632, - "learning_rate": 8.903417085427137e-05, - "loss": 5.8767, - "step": 11414 - }, - { - "epoch": 5.953063885267275, - "grad_norm": 1.4332085847854614, - "learning_rate": 8.903316582914574e-05, - "loss": 5.7246, - "step": 11415 - }, - { - "epoch": 5.953585397653194, - "grad_norm": 1.6198838949203491, - "learning_rate": 8.903216080402011e-05, - "loss": 5.7252, - "step": 11416 - }, - { - "epoch": 5.954106910039114, - "grad_norm": 1.6024205684661865, - "learning_rate": 8.903115577889449e-05, - "loss": 5.3997, - "step": 11417 - }, - { - "epoch": 5.954628422425032, - "grad_norm": 1.45270574092865, - "learning_rate": 8.903015075376885e-05, - "loss": 5.579, - "step": 11418 - }, - { - "epoch": 5.955149934810952, - "grad_norm": 1.5584391355514526, - "learning_rate": 8.902914572864321e-05, - "loss": 4.9728, - "step": 11419 - }, - { - "epoch": 5.955671447196871, - "grad_norm": 1.6731576919555664, - "learning_rate": 8.902814070351759e-05, - "loss": 4.9593, - "step": 11420 - }, - { - "epoch": 5.95619295958279, - "grad_norm": 1.4699910879135132, - "learning_rate": 8.902713567839196e-05, - "loss": 5.5176, - "step": 11421 - }, - { - "epoch": 5.956714471968709, - "grad_norm": 1.4992434978485107, - "learning_rate": 8.902613065326633e-05, - "loss": 5.9015, - "step": 11422 - }, - { - "epoch": 5.957235984354629, - "grad_norm": 1.4685287475585938, - "learning_rate": 8.90251256281407e-05, - "loss": 5.6006, - "step": 11423 - }, - { - "epoch": 5.957757496740547, - "grad_norm": 1.4554989337921143, - "learning_rate": 8.902412060301508e-05, - "loss": 5.6344, - "step": 11424 - }, - { - "epoch": 5.958279009126467, - "grad_norm": 1.4735748767852783, - "learning_rate": 8.902311557788945e-05, - "loss": 5.0677, - "step": 11425 - }, - { - "epoch": 5.958800521512386, - "grad_norm": 1.4445641040802002, - "learning_rate": 8.902211055276383e-05, - "loss": 5.2927, - "step": 11426 - }, - { - "epoch": 5.959322033898305, - "grad_norm": 1.5215175151824951, - "learning_rate": 8.90211055276382e-05, - "loss": 5.6959, - "step": 11427 - }, - { - "epoch": 5.959843546284224, - "grad_norm": 1.54198157787323, - "learning_rate": 8.902010050251257e-05, - "loss": 5.5956, - "step": 11428 - }, - { - "epoch": 5.960365058670144, - "grad_norm": 1.460075855255127, - "learning_rate": 8.901909547738694e-05, - "loss": 5.5793, - "step": 11429 - }, - { - "epoch": 5.960886571056062, - "grad_norm": 1.3017932176589966, - "learning_rate": 8.901809045226132e-05, - "loss": 5.6999, - "step": 11430 - }, - { - "epoch": 5.961408083441982, - "grad_norm": 1.467511773109436, - "learning_rate": 8.901708542713568e-05, - "loss": 5.544, - "step": 11431 - }, - { - "epoch": 5.961929595827901, - "grad_norm": 1.4471137523651123, - "learning_rate": 8.901608040201004e-05, - "loss": 5.5961, - "step": 11432 - }, - { - "epoch": 5.96245110821382, - "grad_norm": 1.6525609493255615, - "learning_rate": 8.901507537688442e-05, - "loss": 5.1083, - "step": 11433 - }, - { - "epoch": 5.962972620599739, - "grad_norm": 1.4385781288146973, - "learning_rate": 8.90140703517588e-05, - "loss": 5.6165, - "step": 11434 - }, - { - "epoch": 5.963494132985659, - "grad_norm": 1.2901585102081299, - "learning_rate": 8.901306532663318e-05, - "loss": 6.1053, - "step": 11435 - }, - { - "epoch": 5.964015645371577, - "grad_norm": 1.438503623008728, - "learning_rate": 8.901206030150754e-05, - "loss": 5.821, - "step": 11436 - }, - { - "epoch": 5.964537157757497, - "grad_norm": 1.5674656629562378, - "learning_rate": 8.901105527638192e-05, - "loss": 5.1993, - "step": 11437 - }, - { - "epoch": 5.965058670143416, - "grad_norm": 1.355259656906128, - "learning_rate": 8.901005025125628e-05, - "loss": 5.4785, - "step": 11438 - }, - { - "epoch": 5.965580182529335, - "grad_norm": 1.4853551387786865, - "learning_rate": 8.900904522613066e-05, - "loss": 5.683, - "step": 11439 - }, - { - "epoch": 5.966101694915254, - "grad_norm": 1.393019437789917, - "learning_rate": 8.900804020100503e-05, - "loss": 5.7418, - "step": 11440 - }, - { - "epoch": 5.966623207301174, - "grad_norm": 1.4571832418441772, - "learning_rate": 8.90070351758794e-05, - "loss": 5.6825, - "step": 11441 - }, - { - "epoch": 5.967144719687092, - "grad_norm": 1.360360860824585, - "learning_rate": 8.900603015075377e-05, - "loss": 6.1209, - "step": 11442 - }, - { - "epoch": 5.967666232073012, - "grad_norm": 1.3024564981460571, - "learning_rate": 8.900502512562815e-05, - "loss": 5.8015, - "step": 11443 - }, - { - "epoch": 5.96818774445893, - "grad_norm": 1.4167948961257935, - "learning_rate": 8.900402010050251e-05, - "loss": 5.7435, - "step": 11444 - }, - { - "epoch": 5.96870925684485, - "grad_norm": 1.4393635988235474, - "learning_rate": 8.900301507537689e-05, - "loss": 5.5192, - "step": 11445 - }, - { - "epoch": 5.969230769230769, - "grad_norm": 1.4602512121200562, - "learning_rate": 8.900201005025127e-05, - "loss": 5.7962, - "step": 11446 - }, - { - "epoch": 5.969752281616689, - "grad_norm": 1.5254912376403809, - "learning_rate": 8.900100502512563e-05, - "loss": 5.4532, - "step": 11447 - }, - { - "epoch": 5.970273794002607, - "grad_norm": 1.371071696281433, - "learning_rate": 8.900000000000001e-05, - "loss": 5.9296, - "step": 11448 - }, - { - "epoch": 5.970795306388527, - "grad_norm": 1.363547444343567, - "learning_rate": 8.899899497487437e-05, - "loss": 5.6911, - "step": 11449 - }, - { - "epoch": 5.971316818774445, - "grad_norm": 1.5369521379470825, - "learning_rate": 8.899798994974875e-05, - "loss": 5.6967, - "step": 11450 - }, - { - "epoch": 5.971838331160365, - "grad_norm": 1.3703279495239258, - "learning_rate": 8.899698492462311e-05, - "loss": 5.8964, - "step": 11451 - }, - { - "epoch": 5.972359843546284, - "grad_norm": 1.4732915163040161, - "learning_rate": 8.899597989949749e-05, - "loss": 5.3285, - "step": 11452 - }, - { - "epoch": 5.972881355932204, - "grad_norm": 1.410583734512329, - "learning_rate": 8.899497487437186e-05, - "loss": 5.8994, - "step": 11453 - }, - { - "epoch": 5.973402868318122, - "grad_norm": 1.433791995048523, - "learning_rate": 8.899396984924623e-05, - "loss": 5.7509, - "step": 11454 - }, - { - "epoch": 5.973924380704042, - "grad_norm": 1.524071216583252, - "learning_rate": 8.899296482412061e-05, - "loss": 5.1739, - "step": 11455 - }, - { - "epoch": 5.97444589308996, - "grad_norm": 1.5249229669570923, - "learning_rate": 8.899195979899499e-05, - "loss": 5.2025, - "step": 11456 - }, - { - "epoch": 5.97496740547588, - "grad_norm": 1.3883343935012817, - "learning_rate": 8.899095477386935e-05, - "loss": 5.7042, - "step": 11457 - }, - { - "epoch": 5.975488917861799, - "grad_norm": 1.433491587638855, - "learning_rate": 8.898994974874373e-05, - "loss": 5.7486, - "step": 11458 - }, - { - "epoch": 5.976010430247719, - "grad_norm": 1.4000357389450073, - "learning_rate": 8.89889447236181e-05, - "loss": 6.129, - "step": 11459 - }, - { - "epoch": 5.976531942633637, - "grad_norm": 1.4899263381958008, - "learning_rate": 8.898793969849246e-05, - "loss": 5.64, - "step": 11460 - }, - { - "epoch": 5.977053455019557, - "grad_norm": 1.5438705682754517, - "learning_rate": 8.898693467336684e-05, - "loss": 5.4661, - "step": 11461 - }, - { - "epoch": 5.9775749674054754, - "grad_norm": 1.496922492980957, - "learning_rate": 8.89859296482412e-05, - "loss": 5.4025, - "step": 11462 - }, - { - "epoch": 5.978096479791395, - "grad_norm": 1.4808335304260254, - "learning_rate": 8.898492462311558e-05, - "loss": 5.5306, - "step": 11463 - }, - { - "epoch": 5.978617992177314, - "grad_norm": 1.411497950553894, - "learning_rate": 8.898391959798995e-05, - "loss": 5.7863, - "step": 11464 - }, - { - "epoch": 5.979139504563234, - "grad_norm": 1.6915209293365479, - "learning_rate": 8.898291457286432e-05, - "loss": 5.0461, - "step": 11465 - }, - { - "epoch": 5.979661016949152, - "grad_norm": 1.4937763214111328, - "learning_rate": 8.89819095477387e-05, - "loss": 5.9495, - "step": 11466 - }, - { - "epoch": 5.980182529335072, - "grad_norm": 1.571192741394043, - "learning_rate": 8.898090452261308e-05, - "loss": 5.2026, - "step": 11467 - }, - { - "epoch": 5.9807040417209905, - "grad_norm": 2.140212297439575, - "learning_rate": 8.897989949748744e-05, - "loss": 5.0994, - "step": 11468 - }, - { - "epoch": 5.98122555410691, - "grad_norm": 1.9276847839355469, - "learning_rate": 8.897889447236182e-05, - "loss": 5.8296, - "step": 11469 - }, - { - "epoch": 5.981747066492829, - "grad_norm": 1.4022644758224487, - "learning_rate": 8.897788944723619e-05, - "loss": 5.9593, - "step": 11470 - }, - { - "epoch": 5.982268578878749, - "grad_norm": 1.5062175989151, - "learning_rate": 8.897688442211056e-05, - "loss": 6.0316, - "step": 11471 - }, - { - "epoch": 5.982790091264667, - "grad_norm": 1.536367654800415, - "learning_rate": 8.897587939698493e-05, - "loss": 5.8859, - "step": 11472 - }, - { - "epoch": 5.983311603650587, - "grad_norm": 1.39445161819458, - "learning_rate": 8.897487437185929e-05, - "loss": 5.7954, - "step": 11473 - }, - { - "epoch": 5.9838331160365055, - "grad_norm": 1.6776553392410278, - "learning_rate": 8.897386934673367e-05, - "loss": 5.3543, - "step": 11474 - }, - { - "epoch": 5.984354628422425, - "grad_norm": 1.392533540725708, - "learning_rate": 8.897286432160805e-05, - "loss": 5.9663, - "step": 11475 - }, - { - "epoch": 5.984876140808344, - "grad_norm": 1.7007397413253784, - "learning_rate": 8.897185929648243e-05, - "loss": 5.1544, - "step": 11476 - }, - { - "epoch": 5.985397653194263, - "grad_norm": 1.4021518230438232, - "learning_rate": 8.897085427135679e-05, - "loss": 6.0448, - "step": 11477 - }, - { - "epoch": 5.985919165580182, - "grad_norm": 1.3803470134735107, - "learning_rate": 8.896984924623117e-05, - "loss": 5.9063, - "step": 11478 - }, - { - "epoch": 5.986440677966102, - "grad_norm": 1.388715147972107, - "learning_rate": 8.896884422110553e-05, - "loss": 5.6731, - "step": 11479 - }, - { - "epoch": 5.9869621903520205, - "grad_norm": 1.28661048412323, - "learning_rate": 8.896783919597991e-05, - "loss": 6.0246, - "step": 11480 - }, - { - "epoch": 5.98748370273794, - "grad_norm": 1.5021650791168213, - "learning_rate": 8.896683417085427e-05, - "loss": 5.6801, - "step": 11481 - }, - { - "epoch": 5.988005215123859, - "grad_norm": 1.6011439561843872, - "learning_rate": 8.896582914572865e-05, - "loss": 5.4163, - "step": 11482 - }, - { - "epoch": 5.988526727509778, - "grad_norm": 1.4522206783294678, - "learning_rate": 8.896482412060302e-05, - "loss": 6.0047, - "step": 11483 - }, - { - "epoch": 5.9890482398956975, - "grad_norm": 1.3615741729736328, - "learning_rate": 8.89638190954774e-05, - "loss": 5.4999, - "step": 11484 - }, - { - "epoch": 5.989569752281617, - "grad_norm": 1.5473908185958862, - "learning_rate": 8.896281407035176e-05, - "loss": 5.5526, - "step": 11485 - }, - { - "epoch": 5.9900912646675355, - "grad_norm": 1.435376763343811, - "learning_rate": 8.896180904522614e-05, - "loss": 5.7747, - "step": 11486 - }, - { - "epoch": 5.990612777053455, - "grad_norm": 1.4560998678207397, - "learning_rate": 8.896080402010051e-05, - "loss": 6.1515, - "step": 11487 - }, - { - "epoch": 5.991134289439374, - "grad_norm": 1.4988056421279907, - "learning_rate": 8.895979899497488e-05, - "loss": 5.1282, - "step": 11488 - }, - { - "epoch": 5.991655801825293, - "grad_norm": 1.594580054283142, - "learning_rate": 8.895879396984926e-05, - "loss": 5.9263, - "step": 11489 - }, - { - "epoch": 5.9921773142112125, - "grad_norm": 1.5053037405014038, - "learning_rate": 8.895778894472362e-05, - "loss": 5.8692, - "step": 11490 - }, - { - "epoch": 5.992698826597132, - "grad_norm": 1.5817185640335083, - "learning_rate": 8.8956783919598e-05, - "loss": 5.4713, - "step": 11491 - }, - { - "epoch": 5.9932203389830505, - "grad_norm": 1.4370204210281372, - "learning_rate": 8.895577889447236e-05, - "loss": 5.6285, - "step": 11492 - }, - { - "epoch": 5.99374185136897, - "grad_norm": 1.382635474205017, - "learning_rate": 8.895477386934674e-05, - "loss": 4.9485, - "step": 11493 - }, - { - "epoch": 5.994263363754889, - "grad_norm": 1.752809762954712, - "learning_rate": 8.89537688442211e-05, - "loss": 4.9758, - "step": 11494 - }, - { - "epoch": 5.994784876140808, - "grad_norm": 1.543208360671997, - "learning_rate": 8.895276381909548e-05, - "loss": 5.3748, - "step": 11495 - }, - { - "epoch": 5.9953063885267275, - "grad_norm": 1.6799103021621704, - "learning_rate": 8.895175879396985e-05, - "loss": 5.4698, - "step": 11496 - }, - { - "epoch": 5.995827900912647, - "grad_norm": 1.495679497718811, - "learning_rate": 8.895075376884422e-05, - "loss": 5.6099, - "step": 11497 - }, - { - "epoch": 5.9963494132985655, - "grad_norm": 1.4469408988952637, - "learning_rate": 8.89497487437186e-05, - "loss": 5.7036, - "step": 11498 - }, - { - "epoch": 5.996870925684485, - "grad_norm": 1.3990968465805054, - "learning_rate": 8.894874371859297e-05, - "loss": 5.4651, - "step": 11499 - }, - { - "epoch": 5.9973924380704045, - "grad_norm": 1.550546646118164, - "learning_rate": 8.894773869346734e-05, - "loss": 5.9033, - "step": 11500 - }, - { - "epoch": 5.997913950456323, - "grad_norm": 1.5084245204925537, - "learning_rate": 8.894673366834171e-05, - "loss": 5.7971, - "step": 11501 - }, - { - "epoch": 5.9984354628422425, - "grad_norm": 1.3780282735824585, - "learning_rate": 8.894572864321609e-05, - "loss": 5.4991, - "step": 11502 - }, - { - "epoch": 5.998956975228162, - "grad_norm": 1.5459003448486328, - "learning_rate": 8.894472361809045e-05, - "loss": 5.4689, - "step": 11503 - }, - { - "epoch": 5.9994784876140805, - "grad_norm": 1.7015087604522705, - "learning_rate": 8.894371859296483e-05, - "loss": 4.799, - "step": 11504 - }, - { - "epoch": 6.0, - "grad_norm": 1.493294358253479, - "learning_rate": 8.894271356783919e-05, - "loss": 6.2573, - "step": 11505 - }, - { - "epoch": 6.0005215123859195, - "grad_norm": 1.506612777709961, - "learning_rate": 8.894170854271357e-05, - "loss": 5.8965, - "step": 11506 - }, - { - "epoch": 6.001043024771838, - "grad_norm": 1.6368435621261597, - "learning_rate": 8.894070351758795e-05, - "loss": 5.431, - "step": 11507 - }, - { - "epoch": 6.0015645371577575, - "grad_norm": 1.4499064683914185, - "learning_rate": 8.893969849246233e-05, - "loss": 6.0299, - "step": 11508 - }, - { - "epoch": 6.002086049543677, - "grad_norm": 1.5409725904464722, - "learning_rate": 8.893869346733669e-05, - "loss": 5.775, - "step": 11509 - }, - { - "epoch": 6.0026075619295955, - "grad_norm": 1.525002360343933, - "learning_rate": 8.893768844221107e-05, - "loss": 5.3477, - "step": 11510 - }, - { - "epoch": 6.003129074315515, - "grad_norm": 1.3902626037597656, - "learning_rate": 8.893668341708543e-05, - "loss": 5.3153, - "step": 11511 - }, - { - "epoch": 6.0036505867014345, - "grad_norm": 1.407558798789978, - "learning_rate": 8.89356783919598e-05, - "loss": 5.848, - "step": 11512 - }, - { - "epoch": 6.004172099087353, - "grad_norm": 1.495614767074585, - "learning_rate": 8.893467336683417e-05, - "loss": 5.1514, - "step": 11513 - }, - { - "epoch": 6.0046936114732725, - "grad_norm": 1.3853685855865479, - "learning_rate": 8.893366834170854e-05, - "loss": 5.4911, - "step": 11514 - }, - { - "epoch": 6.005215123859192, - "grad_norm": 1.4366756677627563, - "learning_rate": 8.893266331658292e-05, - "loss": 5.7491, - "step": 11515 - }, - { - "epoch": 6.005736636245111, - "grad_norm": 1.4816884994506836, - "learning_rate": 8.893165829145728e-05, - "loss": 5.359, - "step": 11516 - }, - { - "epoch": 6.00625814863103, - "grad_norm": 1.650976300239563, - "learning_rate": 8.893065326633166e-05, - "loss": 5.2963, - "step": 11517 - }, - { - "epoch": 6.0067796610169495, - "grad_norm": 1.4794443845748901, - "learning_rate": 8.892964824120604e-05, - "loss": 5.352, - "step": 11518 - }, - { - "epoch": 6.007301173402868, - "grad_norm": 1.4016081094741821, - "learning_rate": 8.892864321608041e-05, - "loss": 5.5628, - "step": 11519 - }, - { - "epoch": 6.0078226857887875, - "grad_norm": 1.489331603050232, - "learning_rate": 8.892763819095478e-05, - "loss": 5.6822, - "step": 11520 - }, - { - "epoch": 6.008344198174707, - "grad_norm": 1.5036084651947021, - "learning_rate": 8.892663316582916e-05, - "loss": 5.6775, - "step": 11521 - }, - { - "epoch": 6.008865710560626, - "grad_norm": 1.5228267908096313, - "learning_rate": 8.892562814070352e-05, - "loss": 5.0648, - "step": 11522 - }, - { - "epoch": 6.009387222946545, - "grad_norm": 1.438232183456421, - "learning_rate": 8.89246231155779e-05, - "loss": 5.7455, - "step": 11523 - }, - { - "epoch": 6.0099087353324645, - "grad_norm": 1.4587231874465942, - "learning_rate": 8.892361809045226e-05, - "loss": 5.3596, - "step": 11524 - }, - { - "epoch": 6.010430247718383, - "grad_norm": 1.5024542808532715, - "learning_rate": 8.892261306532663e-05, - "loss": 5.8227, - "step": 11525 - }, - { - "epoch": 6.0109517601043025, - "grad_norm": 1.577108383178711, - "learning_rate": 8.8921608040201e-05, - "loss": 5.2668, - "step": 11526 - }, - { - "epoch": 6.011473272490222, - "grad_norm": 1.4503118991851807, - "learning_rate": 8.892060301507538e-05, - "loss": 5.5014, - "step": 11527 - }, - { - "epoch": 6.011994784876141, - "grad_norm": 1.5820109844207764, - "learning_rate": 8.891959798994976e-05, - "loss": 5.3072, - "step": 11528 - }, - { - "epoch": 6.01251629726206, - "grad_norm": 1.4806160926818848, - "learning_rate": 8.891859296482412e-05, - "loss": 5.783, - "step": 11529 - }, - { - "epoch": 6.0130378096479795, - "grad_norm": 1.5678306818008423, - "learning_rate": 8.89175879396985e-05, - "loss": 5.4599, - "step": 11530 - }, - { - "epoch": 6.013559322033898, - "grad_norm": 1.46477210521698, - "learning_rate": 8.891658291457287e-05, - "loss": 5.6101, - "step": 11531 - }, - { - "epoch": 6.014080834419818, - "grad_norm": 1.3827650547027588, - "learning_rate": 8.891557788944724e-05, - "loss": 5.6189, - "step": 11532 - }, - { - "epoch": 6.014602346805737, - "grad_norm": 1.3744256496429443, - "learning_rate": 8.891457286432161e-05, - "loss": 5.6499, - "step": 11533 - }, - { - "epoch": 6.015123859191656, - "grad_norm": 1.4532227516174316, - "learning_rate": 8.891356783919599e-05, - "loss": 5.3989, - "step": 11534 - }, - { - "epoch": 6.015645371577575, - "grad_norm": 1.350378394126892, - "learning_rate": 8.891256281407035e-05, - "loss": 5.5783, - "step": 11535 - }, - { - "epoch": 6.0161668839634945, - "grad_norm": 1.338955044746399, - "learning_rate": 8.891155778894473e-05, - "loss": 6.0902, - "step": 11536 - }, - { - "epoch": 6.016688396349413, - "grad_norm": 1.33506178855896, - "learning_rate": 8.891055276381909e-05, - "loss": 6.1081, - "step": 11537 - }, - { - "epoch": 6.017209908735333, - "grad_norm": 1.4431123733520508, - "learning_rate": 8.890954773869347e-05, - "loss": 5.3238, - "step": 11538 - }, - { - "epoch": 6.017731421121252, - "grad_norm": 1.391509771347046, - "learning_rate": 8.890854271356785e-05, - "loss": 5.4759, - "step": 11539 - }, - { - "epoch": 6.018252933507171, - "grad_norm": 1.3812015056610107, - "learning_rate": 8.890753768844221e-05, - "loss": 5.3988, - "step": 11540 - }, - { - "epoch": 6.01877444589309, - "grad_norm": 1.4787200689315796, - "learning_rate": 8.890653266331659e-05, - "loss": 5.7927, - "step": 11541 - }, - { - "epoch": 6.0192959582790095, - "grad_norm": 1.3573094606399536, - "learning_rate": 8.890552763819096e-05, - "loss": 5.8857, - "step": 11542 - }, - { - "epoch": 6.019817470664928, - "grad_norm": 1.5243029594421387, - "learning_rate": 8.890452261306533e-05, - "loss": 5.6116, - "step": 11543 - }, - { - "epoch": 6.020338983050848, - "grad_norm": 1.4293748140335083, - "learning_rate": 8.89035175879397e-05, - "loss": 5.7861, - "step": 11544 - }, - { - "epoch": 6.020860495436767, - "grad_norm": 1.40329110622406, - "learning_rate": 8.890251256281408e-05, - "loss": 5.5211, - "step": 11545 - }, - { - "epoch": 6.021382007822686, - "grad_norm": 1.320410132408142, - "learning_rate": 8.890150753768844e-05, - "loss": 5.8615, - "step": 11546 - }, - { - "epoch": 6.021903520208605, - "grad_norm": 1.4855666160583496, - "learning_rate": 8.890050251256282e-05, - "loss": 5.6527, - "step": 11547 - }, - { - "epoch": 6.0224250325945246, - "grad_norm": 1.480035424232483, - "learning_rate": 8.88994974874372e-05, - "loss": 5.154, - "step": 11548 - }, - { - "epoch": 6.022946544980443, - "grad_norm": 1.5009076595306396, - "learning_rate": 8.889849246231157e-05, - "loss": 5.6175, - "step": 11549 - }, - { - "epoch": 6.023468057366363, - "grad_norm": 1.49691903591156, - "learning_rate": 8.889748743718594e-05, - "loss": 5.1467, - "step": 11550 - }, - { - "epoch": 6.023989569752282, - "grad_norm": 1.355566143989563, - "learning_rate": 8.889648241206031e-05, - "loss": 5.5139, - "step": 11551 - }, - { - "epoch": 6.024511082138201, - "grad_norm": 1.4316003322601318, - "learning_rate": 8.889547738693468e-05, - "loss": 5.8601, - "step": 11552 - }, - { - "epoch": 6.02503259452412, - "grad_norm": 1.645242691040039, - "learning_rate": 8.889447236180904e-05, - "loss": 5.0952, - "step": 11553 - }, - { - "epoch": 6.025554106910039, - "grad_norm": 1.5290937423706055, - "learning_rate": 8.889346733668342e-05, - "loss": 5.2091, - "step": 11554 - }, - { - "epoch": 6.026075619295958, - "grad_norm": 1.5301264524459839, - "learning_rate": 8.889246231155779e-05, - "loss": 5.3883, - "step": 11555 - }, - { - "epoch": 6.026597131681878, - "grad_norm": 1.3776392936706543, - "learning_rate": 8.889145728643216e-05, - "loss": 5.8995, - "step": 11556 - }, - { - "epoch": 6.027118644067796, - "grad_norm": 1.407408356666565, - "learning_rate": 8.889045226130653e-05, - "loss": 5.5612, - "step": 11557 - }, - { - "epoch": 6.027640156453716, - "grad_norm": 1.337822675704956, - "learning_rate": 8.88894472361809e-05, - "loss": 5.5711, - "step": 11558 - }, - { - "epoch": 6.028161668839635, - "grad_norm": 1.384225606918335, - "learning_rate": 8.888844221105528e-05, - "loss": 5.6158, - "step": 11559 - }, - { - "epoch": 6.028683181225554, - "grad_norm": 1.5943576097488403, - "learning_rate": 8.888743718592966e-05, - "loss": 5.0925, - "step": 11560 - }, - { - "epoch": 6.029204693611473, - "grad_norm": 1.378939151763916, - "learning_rate": 8.888643216080403e-05, - "loss": 5.4412, - "step": 11561 - }, - { - "epoch": 6.029726205997393, - "grad_norm": 1.3964170217514038, - "learning_rate": 8.88854271356784e-05, - "loss": 5.84, - "step": 11562 - }, - { - "epoch": 6.030247718383311, - "grad_norm": 1.4324042797088623, - "learning_rate": 8.888442211055277e-05, - "loss": 5.5921, - "step": 11563 - }, - { - "epoch": 6.030769230769231, - "grad_norm": 1.347200870513916, - "learning_rate": 8.888341708542715e-05, - "loss": 5.8994, - "step": 11564 - }, - { - "epoch": 6.03129074315515, - "grad_norm": 1.4284348487854004, - "learning_rate": 8.888241206030151e-05, - "loss": 5.663, - "step": 11565 - }, - { - "epoch": 6.031812255541069, - "grad_norm": 1.361517071723938, - "learning_rate": 8.888140703517587e-05, - "loss": 5.8241, - "step": 11566 - }, - { - "epoch": 6.032333767926988, - "grad_norm": 1.410835862159729, - "learning_rate": 8.888040201005025e-05, - "loss": 5.5417, - "step": 11567 - }, - { - "epoch": 6.032855280312908, - "grad_norm": 1.5175306797027588, - "learning_rate": 8.887939698492463e-05, - "loss": 5.6678, - "step": 11568 - }, - { - "epoch": 6.033376792698826, - "grad_norm": 1.3770477771759033, - "learning_rate": 8.887839195979901e-05, - "loss": 5.9723, - "step": 11569 - }, - { - "epoch": 6.033898305084746, - "grad_norm": 1.539601445198059, - "learning_rate": 8.887738693467337e-05, - "loss": 5.4048, - "step": 11570 - }, - { - "epoch": 6.034419817470665, - "grad_norm": 1.4945060014724731, - "learning_rate": 8.887638190954775e-05, - "loss": 5.6874, - "step": 11571 - }, - { - "epoch": 6.034941329856584, - "grad_norm": 1.3982456922531128, - "learning_rate": 8.887537688442211e-05, - "loss": 5.5145, - "step": 11572 - }, - { - "epoch": 6.035462842242503, - "grad_norm": 1.5270003080368042, - "learning_rate": 8.887437185929649e-05, - "loss": 5.542, - "step": 11573 - }, - { - "epoch": 6.035984354628423, - "grad_norm": 1.3933851718902588, - "learning_rate": 8.887336683417086e-05, - "loss": 5.5848, - "step": 11574 - }, - { - "epoch": 6.036505867014341, - "grad_norm": 1.4296610355377197, - "learning_rate": 8.887236180904523e-05, - "loss": 5.7676, - "step": 11575 - }, - { - "epoch": 6.037027379400261, - "grad_norm": 1.3824865818023682, - "learning_rate": 8.88713567839196e-05, - "loss": 5.5698, - "step": 11576 - }, - { - "epoch": 6.03754889178618, - "grad_norm": 1.4425302743911743, - "learning_rate": 8.887035175879398e-05, - "loss": 5.4209, - "step": 11577 - }, - { - "epoch": 6.038070404172099, - "grad_norm": 1.3651498556137085, - "learning_rate": 8.886934673366834e-05, - "loss": 5.6307, - "step": 11578 - }, - { - "epoch": 6.038591916558018, - "grad_norm": 1.4273524284362793, - "learning_rate": 8.886834170854272e-05, - "loss": 5.3766, - "step": 11579 - }, - { - "epoch": 6.039113428943938, - "grad_norm": 1.528870701789856, - "learning_rate": 8.88673366834171e-05, - "loss": 5.6867, - "step": 11580 - }, - { - "epoch": 6.039634941329856, - "grad_norm": 1.3965225219726562, - "learning_rate": 8.886633165829146e-05, - "loss": 6.0731, - "step": 11581 - }, - { - "epoch": 6.040156453715776, - "grad_norm": 1.47834050655365, - "learning_rate": 8.886532663316584e-05, - "loss": 5.2698, - "step": 11582 - }, - { - "epoch": 6.040677966101695, - "grad_norm": 1.5256093740463257, - "learning_rate": 8.88643216080402e-05, - "loss": 5.1856, - "step": 11583 - }, - { - "epoch": 6.041199478487614, - "grad_norm": 1.538853645324707, - "learning_rate": 8.886331658291458e-05, - "loss": 5.4396, - "step": 11584 - }, - { - "epoch": 6.041720990873533, - "grad_norm": 1.454728126525879, - "learning_rate": 8.886231155778894e-05, - "loss": 5.1644, - "step": 11585 - }, - { - "epoch": 6.042242503259453, - "grad_norm": 1.583857536315918, - "learning_rate": 8.886130653266332e-05, - "loss": 5.5083, - "step": 11586 - }, - { - "epoch": 6.042764015645371, - "grad_norm": 1.436181902885437, - "learning_rate": 8.886030150753769e-05, - "loss": 5.4298, - "step": 11587 - }, - { - "epoch": 6.043285528031291, - "grad_norm": 1.5486282110214233, - "learning_rate": 8.885929648241206e-05, - "loss": 5.5792, - "step": 11588 - }, - { - "epoch": 6.04380704041721, - "grad_norm": 1.3282279968261719, - "learning_rate": 8.885829145728644e-05, - "loss": 5.9995, - "step": 11589 - }, - { - "epoch": 6.044328552803129, - "grad_norm": 1.665515661239624, - "learning_rate": 8.885728643216082e-05, - "loss": 4.9865, - "step": 11590 - }, - { - "epoch": 6.044850065189048, - "grad_norm": 1.7850240468978882, - "learning_rate": 8.885628140703518e-05, - "loss": 4.6307, - "step": 11591 - }, - { - "epoch": 6.045371577574968, - "grad_norm": 1.5753324031829834, - "learning_rate": 8.885527638190955e-05, - "loss": 5.8452, - "step": 11592 - }, - { - "epoch": 6.045893089960886, - "grad_norm": 1.6387650966644287, - "learning_rate": 8.885427135678393e-05, - "loss": 5.4738, - "step": 11593 - }, - { - "epoch": 6.046414602346806, - "grad_norm": 1.410549521446228, - "learning_rate": 8.885326633165829e-05, - "loss": 5.871, - "step": 11594 - }, - { - "epoch": 6.046936114732725, - "grad_norm": 1.5383152961730957, - "learning_rate": 8.885226130653267e-05, - "loss": 5.5865, - "step": 11595 - }, - { - "epoch": 6.047457627118644, - "grad_norm": 1.536430835723877, - "learning_rate": 8.885125628140703e-05, - "loss": 5.5154, - "step": 11596 - }, - { - "epoch": 6.047979139504563, - "grad_norm": 1.5007210969924927, - "learning_rate": 8.885025125628141e-05, - "loss": 5.6281, - "step": 11597 - }, - { - "epoch": 6.048500651890483, - "grad_norm": 1.5569000244140625, - "learning_rate": 8.884924623115577e-05, - "loss": 5.6222, - "step": 11598 - }, - { - "epoch": 6.049022164276401, - "grad_norm": 1.3333216905593872, - "learning_rate": 8.884824120603015e-05, - "loss": 5.4981, - "step": 11599 - }, - { - "epoch": 6.049543676662321, - "grad_norm": 1.4326790571212769, - "learning_rate": 8.884723618090453e-05, - "loss": 5.7576, - "step": 11600 - }, - { - "epoch": 6.05006518904824, - "grad_norm": 1.5845881700515747, - "learning_rate": 8.884623115577891e-05, - "loss": 4.4274, - "step": 11601 - }, - { - "epoch": 6.050586701434159, - "grad_norm": 1.3588321208953857, - "learning_rate": 8.884522613065327e-05, - "loss": 5.8819, - "step": 11602 - }, - { - "epoch": 6.051108213820078, - "grad_norm": 1.3394415378570557, - "learning_rate": 8.884422110552765e-05, - "loss": 5.8239, - "step": 11603 - }, - { - "epoch": 6.051629726205998, - "grad_norm": 1.5041611194610596, - "learning_rate": 8.884321608040201e-05, - "loss": 5.5807, - "step": 11604 - }, - { - "epoch": 6.052151238591916, - "grad_norm": 1.4032191038131714, - "learning_rate": 8.884221105527638e-05, - "loss": 5.518, - "step": 11605 - }, - { - "epoch": 6.052672750977836, - "grad_norm": 1.507315754890442, - "learning_rate": 8.884120603015076e-05, - "loss": 5.6709, - "step": 11606 - }, - { - "epoch": 6.053194263363755, - "grad_norm": 1.6724522113800049, - "learning_rate": 8.884020100502512e-05, - "loss": 5.7313, - "step": 11607 - }, - { - "epoch": 6.053715775749674, - "grad_norm": 1.3834474086761475, - "learning_rate": 8.88391959798995e-05, - "loss": 5.7011, - "step": 11608 - }, - { - "epoch": 6.054237288135593, - "grad_norm": 1.3845744132995605, - "learning_rate": 8.883819095477388e-05, - "loss": 5.8044, - "step": 11609 - }, - { - "epoch": 6.054758800521513, - "grad_norm": 1.4126548767089844, - "learning_rate": 8.883718592964825e-05, - "loss": 5.5829, - "step": 11610 - }, - { - "epoch": 6.055280312907431, - "grad_norm": 1.4210928678512573, - "learning_rate": 8.883618090452262e-05, - "loss": 6.0685, - "step": 11611 - }, - { - "epoch": 6.055801825293351, - "grad_norm": 1.4478729963302612, - "learning_rate": 8.8835175879397e-05, - "loss": 5.4942, - "step": 11612 - }, - { - "epoch": 6.05632333767927, - "grad_norm": 1.4040195941925049, - "learning_rate": 8.883417085427136e-05, - "loss": 5.154, - "step": 11613 - }, - { - "epoch": 6.056844850065189, - "grad_norm": 1.458060622215271, - "learning_rate": 8.883316582914574e-05, - "loss": 5.5009, - "step": 11614 - }, - { - "epoch": 6.057366362451108, - "grad_norm": 1.3442856073379517, - "learning_rate": 8.88321608040201e-05, - "loss": 5.4299, - "step": 11615 - }, - { - "epoch": 6.057887874837028, - "grad_norm": 1.3361666202545166, - "learning_rate": 8.883115577889448e-05, - "loss": 5.4157, - "step": 11616 - }, - { - "epoch": 6.058409387222946, - "grad_norm": 1.5110055208206177, - "learning_rate": 8.883015075376884e-05, - "loss": 5.8257, - "step": 11617 - }, - { - "epoch": 6.058930899608866, - "grad_norm": 1.4149893522262573, - "learning_rate": 8.882914572864321e-05, - "loss": 5.4033, - "step": 11618 - }, - { - "epoch": 6.059452411994785, - "grad_norm": 1.8572697639465332, - "learning_rate": 8.882814070351759e-05, - "loss": 4.8073, - "step": 11619 - }, - { - "epoch": 6.059973924380704, - "grad_norm": 1.3739112615585327, - "learning_rate": 8.882713567839196e-05, - "loss": 5.7173, - "step": 11620 - }, - { - "epoch": 6.060495436766623, - "grad_norm": 1.4313302040100098, - "learning_rate": 8.882613065326634e-05, - "loss": 5.768, - "step": 11621 - }, - { - "epoch": 6.061016949152543, - "grad_norm": 1.4166511297225952, - "learning_rate": 8.882512562814071e-05, - "loss": 5.6159, - "step": 11622 - }, - { - "epoch": 6.061538461538461, - "grad_norm": 1.4441481828689575, - "learning_rate": 8.882412060301508e-05, - "loss": 5.5485, - "step": 11623 - }, - { - "epoch": 6.062059973924381, - "grad_norm": 1.3893630504608154, - "learning_rate": 8.882311557788945e-05, - "loss": 5.7115, - "step": 11624 - }, - { - "epoch": 6.0625814863103, - "grad_norm": 1.4057269096374512, - "learning_rate": 8.882211055276383e-05, - "loss": 5.7058, - "step": 11625 - }, - { - "epoch": 6.063102998696219, - "grad_norm": 1.5195238590240479, - "learning_rate": 8.882110552763819e-05, - "loss": 5.7299, - "step": 11626 - }, - { - "epoch": 6.063624511082138, - "grad_norm": 1.432850956916809, - "learning_rate": 8.882010050251257e-05, - "loss": 5.8528, - "step": 11627 - }, - { - "epoch": 6.064146023468058, - "grad_norm": 1.605488657951355, - "learning_rate": 8.881909547738693e-05, - "loss": 5.7096, - "step": 11628 - }, - { - "epoch": 6.064667535853976, - "grad_norm": 1.761354923248291, - "learning_rate": 8.881809045226131e-05, - "loss": 4.8571, - "step": 11629 - }, - { - "epoch": 6.065189048239896, - "grad_norm": 1.3983947038650513, - "learning_rate": 8.881708542713569e-05, - "loss": 6.0137, - "step": 11630 - }, - { - "epoch": 6.065710560625815, - "grad_norm": 1.4955530166625977, - "learning_rate": 8.881608040201005e-05, - "loss": 5.5121, - "step": 11631 - }, - { - "epoch": 6.066232073011734, - "grad_norm": 1.3789114952087402, - "learning_rate": 8.881507537688443e-05, - "loss": 5.6556, - "step": 11632 - }, - { - "epoch": 6.066753585397653, - "grad_norm": 1.4820038080215454, - "learning_rate": 8.88140703517588e-05, - "loss": 5.6404, - "step": 11633 - }, - { - "epoch": 6.067275097783573, - "grad_norm": 1.62134850025177, - "learning_rate": 8.881306532663317e-05, - "loss": 5.1017, - "step": 11634 - }, - { - "epoch": 6.067796610169491, - "grad_norm": 1.5597553253173828, - "learning_rate": 8.881206030150754e-05, - "loss": 5.4375, - "step": 11635 - }, - { - "epoch": 6.068318122555411, - "grad_norm": 1.4159907102584839, - "learning_rate": 8.881105527638192e-05, - "loss": 5.8136, - "step": 11636 - }, - { - "epoch": 6.06883963494133, - "grad_norm": 1.380881905555725, - "learning_rate": 8.881005025125628e-05, - "loss": 5.7713, - "step": 11637 - }, - { - "epoch": 6.069361147327249, - "grad_norm": 1.4326351881027222, - "learning_rate": 8.880904522613066e-05, - "loss": 5.2108, - "step": 11638 - }, - { - "epoch": 6.069882659713168, - "grad_norm": 1.4535613059997559, - "learning_rate": 8.880804020100502e-05, - "loss": 5.5082, - "step": 11639 - }, - { - "epoch": 6.070404172099088, - "grad_norm": 1.4238014221191406, - "learning_rate": 8.88070351758794e-05, - "loss": 5.6508, - "step": 11640 - }, - { - "epoch": 6.070925684485006, - "grad_norm": 1.4178804159164429, - "learning_rate": 8.880603015075378e-05, - "loss": 5.8139, - "step": 11641 - }, - { - "epoch": 6.071447196870926, - "grad_norm": 1.4395684003829956, - "learning_rate": 8.880502512562816e-05, - "loss": 5.7993, - "step": 11642 - }, - { - "epoch": 6.071968709256845, - "grad_norm": 1.4325587749481201, - "learning_rate": 8.880402010050252e-05, - "loss": 5.7201, - "step": 11643 - }, - { - "epoch": 6.072490221642764, - "grad_norm": 1.4966754913330078, - "learning_rate": 8.88030150753769e-05, - "loss": 5.7462, - "step": 11644 - }, - { - "epoch": 6.073011734028683, - "grad_norm": 1.3985131978988647, - "learning_rate": 8.880201005025126e-05, - "loss": 5.8019, - "step": 11645 - }, - { - "epoch": 6.073533246414602, - "grad_norm": 1.5723531246185303, - "learning_rate": 8.880100502512563e-05, - "loss": 5.7066, - "step": 11646 - }, - { - "epoch": 6.074054758800521, - "grad_norm": 1.439120888710022, - "learning_rate": 8.88e-05, - "loss": 5.4052, - "step": 11647 - }, - { - "epoch": 6.074576271186441, - "grad_norm": 1.5430370569229126, - "learning_rate": 8.879899497487437e-05, - "loss": 5.6761, - "step": 11648 - }, - { - "epoch": 6.075097783572359, - "grad_norm": 1.5189270973205566, - "learning_rate": 8.879798994974875e-05, - "loss": 5.4173, - "step": 11649 - }, - { - "epoch": 6.075619295958279, - "grad_norm": 1.4400956630706787, - "learning_rate": 8.879698492462311e-05, - "loss": 5.532, - "step": 11650 - }, - { - "epoch": 6.076140808344198, - "grad_norm": 1.4955275058746338, - "learning_rate": 8.879597989949749e-05, - "loss": 5.3771, - "step": 11651 - }, - { - "epoch": 6.076662320730117, - "grad_norm": 1.4612884521484375, - "learning_rate": 8.879497487437187e-05, - "loss": 5.4253, - "step": 11652 - }, - { - "epoch": 6.077183833116036, - "grad_norm": 1.4572975635528564, - "learning_rate": 8.879396984924624e-05, - "loss": 5.7556, - "step": 11653 - }, - { - "epoch": 6.077705345501956, - "grad_norm": 1.5068236589431763, - "learning_rate": 8.879296482412061e-05, - "loss": 5.5518, - "step": 11654 - }, - { - "epoch": 6.078226857887874, - "grad_norm": 1.5407376289367676, - "learning_rate": 8.879195979899499e-05, - "loss": 5.5085, - "step": 11655 - }, - { - "epoch": 6.078748370273794, - "grad_norm": 1.4388582706451416, - "learning_rate": 8.879095477386935e-05, - "loss": 5.378, - "step": 11656 - }, - { - "epoch": 6.079269882659713, - "grad_norm": 1.4095635414123535, - "learning_rate": 8.878994974874373e-05, - "loss": 5.8043, - "step": 11657 - }, - { - "epoch": 6.079791395045632, - "grad_norm": 1.4487318992614746, - "learning_rate": 8.878894472361809e-05, - "loss": 5.4226, - "step": 11658 - }, - { - "epoch": 6.080312907431551, - "grad_norm": 1.4810349941253662, - "learning_rate": 8.878793969849246e-05, - "loss": 5.7132, - "step": 11659 - }, - { - "epoch": 6.080834419817471, - "grad_norm": 1.5533474683761597, - "learning_rate": 8.878693467336683e-05, - "loss": 5.4808, - "step": 11660 - }, - { - "epoch": 6.081355932203389, - "grad_norm": 1.6641393899917603, - "learning_rate": 8.878592964824121e-05, - "loss": 5.4347, - "step": 11661 - }, - { - "epoch": 6.081877444589309, - "grad_norm": 1.3789678812026978, - "learning_rate": 8.878492462311559e-05, - "loss": 5.5371, - "step": 11662 - }, - { - "epoch": 6.082398956975228, - "grad_norm": 1.6892882585525513, - "learning_rate": 8.878391959798995e-05, - "loss": 5.4211, - "step": 11663 - }, - { - "epoch": 6.082920469361147, - "grad_norm": 1.6255137920379639, - "learning_rate": 8.878291457286433e-05, - "loss": 5.4877, - "step": 11664 - }, - { - "epoch": 6.083441981747066, - "grad_norm": 1.39809250831604, - "learning_rate": 8.87819095477387e-05, - "loss": 5.9126, - "step": 11665 - }, - { - "epoch": 6.083963494132986, - "grad_norm": 1.4208098649978638, - "learning_rate": 8.878090452261307e-05, - "loss": 5.9038, - "step": 11666 - }, - { - "epoch": 6.0844850065189044, - "grad_norm": 1.321784257888794, - "learning_rate": 8.877989949748744e-05, - "loss": 5.7531, - "step": 11667 - }, - { - "epoch": 6.085006518904824, - "grad_norm": 1.4681508541107178, - "learning_rate": 8.877889447236182e-05, - "loss": 5.7306, - "step": 11668 - }, - { - "epoch": 6.085528031290743, - "grad_norm": 1.4576343297958374, - "learning_rate": 8.877788944723618e-05, - "loss": 5.2853, - "step": 11669 - }, - { - "epoch": 6.086049543676662, - "grad_norm": 1.4131999015808105, - "learning_rate": 8.877688442211056e-05, - "loss": 5.7182, - "step": 11670 - }, - { - "epoch": 6.086571056062581, - "grad_norm": 1.393664836883545, - "learning_rate": 8.877587939698492e-05, - "loss": 5.7543, - "step": 11671 - }, - { - "epoch": 6.087092568448501, - "grad_norm": 1.4018250703811646, - "learning_rate": 8.87748743718593e-05, - "loss": 6.014, - "step": 11672 - }, - { - "epoch": 6.0876140808344195, - "grad_norm": 1.5123642683029175, - "learning_rate": 8.877386934673368e-05, - "loss": 5.2173, - "step": 11673 - }, - { - "epoch": 6.088135593220339, - "grad_norm": 1.3852519989013672, - "learning_rate": 8.877286432160804e-05, - "loss": 5.5522, - "step": 11674 - }, - { - "epoch": 6.088657105606258, - "grad_norm": 1.513590931892395, - "learning_rate": 8.877185929648242e-05, - "loss": 5.5405, - "step": 11675 - }, - { - "epoch": 6.089178617992177, - "grad_norm": 1.454188585281372, - "learning_rate": 8.877085427135678e-05, - "loss": 5.6613, - "step": 11676 - }, - { - "epoch": 6.089700130378096, - "grad_norm": 1.3352017402648926, - "learning_rate": 8.876984924623116e-05, - "loss": 5.6963, - "step": 11677 - }, - { - "epoch": 6.090221642764016, - "grad_norm": 1.4689871072769165, - "learning_rate": 8.876884422110553e-05, - "loss": 5.7368, - "step": 11678 - }, - { - "epoch": 6.0907431551499345, - "grad_norm": 1.439414143562317, - "learning_rate": 8.87678391959799e-05, - "loss": 5.7839, - "step": 11679 - }, - { - "epoch": 6.091264667535854, - "grad_norm": 1.6185612678527832, - "learning_rate": 8.876683417085427e-05, - "loss": 4.8691, - "step": 11680 - }, - { - "epoch": 6.091786179921773, - "grad_norm": 1.4994142055511475, - "learning_rate": 8.876582914572865e-05, - "loss": 5.6558, - "step": 11681 - }, - { - "epoch": 6.092307692307692, - "grad_norm": 1.5859827995300293, - "learning_rate": 8.876482412060302e-05, - "loss": 5.3107, - "step": 11682 - }, - { - "epoch": 6.0928292046936114, - "grad_norm": 1.45134437084198, - "learning_rate": 8.87638190954774e-05, - "loss": 5.4972, - "step": 11683 - }, - { - "epoch": 6.093350717079531, - "grad_norm": 1.494283676147461, - "learning_rate": 8.876281407035177e-05, - "loss": 5.6476, - "step": 11684 - }, - { - "epoch": 6.0938722294654495, - "grad_norm": 1.471144676208496, - "learning_rate": 8.876180904522613e-05, - "loss": 5.474, - "step": 11685 - }, - { - "epoch": 6.094393741851369, - "grad_norm": 1.5183571577072144, - "learning_rate": 8.876080402010051e-05, - "loss": 5.7644, - "step": 11686 - }, - { - "epoch": 6.094915254237288, - "grad_norm": 1.5407963991165161, - "learning_rate": 8.875979899497487e-05, - "loss": 5.7056, - "step": 11687 - }, - { - "epoch": 6.095436766623207, - "grad_norm": 1.3944048881530762, - "learning_rate": 8.875879396984925e-05, - "loss": 5.7754, - "step": 11688 - }, - { - "epoch": 6.0959582790091265, - "grad_norm": 1.3787580728530884, - "learning_rate": 8.875778894472361e-05, - "loss": 5.5175, - "step": 11689 - }, - { - "epoch": 6.096479791395046, - "grad_norm": 1.3635141849517822, - "learning_rate": 8.875678391959799e-05, - "loss": 5.3298, - "step": 11690 - }, - { - "epoch": 6.0970013037809645, - "grad_norm": 1.5315465927124023, - "learning_rate": 8.875577889447236e-05, - "loss": 5.7511, - "step": 11691 - }, - { - "epoch": 6.097522816166884, - "grad_norm": 1.5471428632736206, - "learning_rate": 8.875477386934673e-05, - "loss": 5.6441, - "step": 11692 - }, - { - "epoch": 6.098044328552803, - "grad_norm": 1.3699969053268433, - "learning_rate": 8.875376884422111e-05, - "loss": 5.7577, - "step": 11693 - }, - { - "epoch": 6.098565840938722, - "grad_norm": 1.3851144313812256, - "learning_rate": 8.875276381909549e-05, - "loss": 5.9529, - "step": 11694 - }, - { - "epoch": 6.0990873533246415, - "grad_norm": 1.634214997291565, - "learning_rate": 8.875175879396985e-05, - "loss": 5.8345, - "step": 11695 - }, - { - "epoch": 6.099608865710561, - "grad_norm": 1.4744995832443237, - "learning_rate": 8.875075376884423e-05, - "loss": 5.5098, - "step": 11696 - }, - { - "epoch": 6.1001303780964795, - "grad_norm": 1.4078611135482788, - "learning_rate": 8.87497487437186e-05, - "loss": 5.6124, - "step": 11697 - }, - { - "epoch": 6.100651890482399, - "grad_norm": 1.3949488401412964, - "learning_rate": 8.874874371859296e-05, - "loss": 5.4422, - "step": 11698 - }, - { - "epoch": 6.101173402868318, - "grad_norm": 1.47588050365448, - "learning_rate": 8.874773869346734e-05, - "loss": 5.7768, - "step": 11699 - }, - { - "epoch": 6.101694915254237, - "grad_norm": 1.3734062910079956, - "learning_rate": 8.87467336683417e-05, - "loss": 5.3536, - "step": 11700 - }, - { - "epoch": 6.1022164276401565, - "grad_norm": 2.007230281829834, - "learning_rate": 8.874572864321608e-05, - "loss": 5.1544, - "step": 11701 - }, - { - "epoch": 6.102737940026076, - "grad_norm": 1.427955985069275, - "learning_rate": 8.874472361809046e-05, - "loss": 5.5794, - "step": 11702 - }, - { - "epoch": 6.1032594524119945, - "grad_norm": 1.4774253368377686, - "learning_rate": 8.874371859296484e-05, - "loss": 5.9474, - "step": 11703 - }, - { - "epoch": 6.103780964797914, - "grad_norm": 1.4604557752609253, - "learning_rate": 8.87427135678392e-05, - "loss": 5.4862, - "step": 11704 - }, - { - "epoch": 6.1043024771838335, - "grad_norm": 1.3595337867736816, - "learning_rate": 8.874170854271358e-05, - "loss": 5.8414, - "step": 11705 - }, - { - "epoch": 6.104823989569752, - "grad_norm": 1.4700387716293335, - "learning_rate": 8.874070351758794e-05, - "loss": 5.8489, - "step": 11706 - }, - { - "epoch": 6.1053455019556715, - "grad_norm": 1.3889039754867554, - "learning_rate": 8.873969849246232e-05, - "loss": 5.7875, - "step": 11707 - }, - { - "epoch": 6.105867014341591, - "grad_norm": 1.4499835968017578, - "learning_rate": 8.873869346733669e-05, - "loss": 4.8339, - "step": 11708 - }, - { - "epoch": 6.1063885267275095, - "grad_norm": 1.4780635833740234, - "learning_rate": 8.873768844221106e-05, - "loss": 5.6033, - "step": 11709 - }, - { - "epoch": 6.106910039113429, - "grad_norm": 1.5425723791122437, - "learning_rate": 8.873668341708543e-05, - "loss": 5.537, - "step": 11710 - }, - { - "epoch": 6.1074315514993485, - "grad_norm": 1.3761768341064453, - "learning_rate": 8.873567839195979e-05, - "loss": 5.8357, - "step": 11711 - }, - { - "epoch": 6.107953063885267, - "grad_norm": 1.3207741975784302, - "learning_rate": 8.873467336683417e-05, - "loss": 5.8168, - "step": 11712 - }, - { - "epoch": 6.1084745762711865, - "grad_norm": 1.4480620622634888, - "learning_rate": 8.873366834170855e-05, - "loss": 5.8813, - "step": 11713 - }, - { - "epoch": 6.108996088657106, - "grad_norm": 1.4407176971435547, - "learning_rate": 8.873266331658293e-05, - "loss": 4.728, - "step": 11714 - }, - { - "epoch": 6.1095176010430245, - "grad_norm": 1.5314844846725464, - "learning_rate": 8.873165829145729e-05, - "loss": 6.0628, - "step": 11715 - }, - { - "epoch": 6.110039113428944, - "grad_norm": 1.3178815841674805, - "learning_rate": 8.873065326633167e-05, - "loss": 6.0351, - "step": 11716 - }, - { - "epoch": 6.1105606258148635, - "grad_norm": 1.5768862962722778, - "learning_rate": 8.872964824120603e-05, - "loss": 5.7221, - "step": 11717 - }, - { - "epoch": 6.111082138200782, - "grad_norm": 1.5467371940612793, - "learning_rate": 8.872864321608041e-05, - "loss": 5.3905, - "step": 11718 - }, - { - "epoch": 6.1116036505867015, - "grad_norm": 1.3839572668075562, - "learning_rate": 8.872763819095477e-05, - "loss": 5.9048, - "step": 11719 - }, - { - "epoch": 6.112125162972621, - "grad_norm": 1.478502869606018, - "learning_rate": 8.872663316582915e-05, - "loss": 5.3227, - "step": 11720 - }, - { - "epoch": 6.11264667535854, - "grad_norm": 1.5848363637924194, - "learning_rate": 8.872562814070352e-05, - "loss": 5.2032, - "step": 11721 - }, - { - "epoch": 6.113168187744459, - "grad_norm": 1.4280774593353271, - "learning_rate": 8.87246231155779e-05, - "loss": 5.5671, - "step": 11722 - }, - { - "epoch": 6.1136897001303785, - "grad_norm": 1.6996437311172485, - "learning_rate": 8.872361809045227e-05, - "loss": 5.2829, - "step": 11723 - }, - { - "epoch": 6.114211212516297, - "grad_norm": 1.5391329526901245, - "learning_rate": 8.872261306532665e-05, - "loss": 5.6899, - "step": 11724 - }, - { - "epoch": 6.1147327249022165, - "grad_norm": 1.3922899961471558, - "learning_rate": 8.872160804020101e-05, - "loss": 5.7069, - "step": 11725 - }, - { - "epoch": 6.115254237288136, - "grad_norm": 1.5605159997940063, - "learning_rate": 8.872060301507538e-05, - "loss": 5.7086, - "step": 11726 - }, - { - "epoch": 6.115775749674055, - "grad_norm": 1.3838860988616943, - "learning_rate": 8.871959798994976e-05, - "loss": 5.4775, - "step": 11727 - }, - { - "epoch": 6.116297262059974, - "grad_norm": 1.8620003461837769, - "learning_rate": 8.871859296482412e-05, - "loss": 4.9248, - "step": 11728 - }, - { - "epoch": 6.1168187744458935, - "grad_norm": 1.4944069385528564, - "learning_rate": 8.87175879396985e-05, - "loss": 4.9139, - "step": 11729 - }, - { - "epoch": 6.117340286831812, - "grad_norm": 1.3432064056396484, - "learning_rate": 8.871658291457286e-05, - "loss": 5.7852, - "step": 11730 - }, - { - "epoch": 6.1178617992177315, - "grad_norm": 1.5580792427062988, - "learning_rate": 8.871557788944724e-05, - "loss": 5.5494, - "step": 11731 - }, - { - "epoch": 6.118383311603651, - "grad_norm": 1.5761548280715942, - "learning_rate": 8.87145728643216e-05, - "loss": 5.6777, - "step": 11732 - }, - { - "epoch": 6.11890482398957, - "grad_norm": 1.4622244834899902, - "learning_rate": 8.871356783919598e-05, - "loss": 5.6417, - "step": 11733 - }, - { - "epoch": 6.119426336375489, - "grad_norm": 1.561968445777893, - "learning_rate": 8.871256281407036e-05, - "loss": 5.5437, - "step": 11734 - }, - { - "epoch": 6.1199478487614085, - "grad_norm": 1.5147289037704468, - "learning_rate": 8.871155778894474e-05, - "loss": 5.5692, - "step": 11735 - }, - { - "epoch": 6.120469361147327, - "grad_norm": 1.3611663579940796, - "learning_rate": 8.87105527638191e-05, - "loss": 5.554, - "step": 11736 - }, - { - "epoch": 6.120990873533247, - "grad_norm": 1.492305874824524, - "learning_rate": 8.870954773869348e-05, - "loss": 5.5693, - "step": 11737 - }, - { - "epoch": 6.121512385919166, - "grad_norm": 1.4689035415649414, - "learning_rate": 8.870854271356784e-05, - "loss": 5.6215, - "step": 11738 - }, - { - "epoch": 6.122033898305085, - "grad_norm": 1.4971598386764526, - "learning_rate": 8.870753768844221e-05, - "loss": 5.7214, - "step": 11739 - }, - { - "epoch": 6.122555410691004, - "grad_norm": 1.374586582183838, - "learning_rate": 8.870653266331659e-05, - "loss": 5.991, - "step": 11740 - }, - { - "epoch": 6.123076923076923, - "grad_norm": 1.5213510990142822, - "learning_rate": 8.870552763819095e-05, - "loss": 5.5045, - "step": 11741 - }, - { - "epoch": 6.123598435462842, - "grad_norm": 1.480839729309082, - "learning_rate": 8.870452261306533e-05, - "loss": 5.6927, - "step": 11742 - }, - { - "epoch": 6.124119947848762, - "grad_norm": 1.4107108116149902, - "learning_rate": 8.87035175879397e-05, - "loss": 5.7727, - "step": 11743 - }, - { - "epoch": 6.12464146023468, - "grad_norm": 1.3872690200805664, - "learning_rate": 8.870251256281408e-05, - "loss": 5.9504, - "step": 11744 - }, - { - "epoch": 6.1251629726206, - "grad_norm": 1.3829232454299927, - "learning_rate": 8.870150753768845e-05, - "loss": 5.5219, - "step": 11745 - }, - { - "epoch": 6.125684485006519, - "grad_norm": 1.56228768825531, - "learning_rate": 8.870050251256283e-05, - "loss": 5.8073, - "step": 11746 - }, - { - "epoch": 6.126205997392438, - "grad_norm": 1.5938564538955688, - "learning_rate": 8.869949748743719e-05, - "loss": 5.4584, - "step": 11747 - }, - { - "epoch": 6.126727509778357, - "grad_norm": 1.4453153610229492, - "learning_rate": 8.869849246231157e-05, - "loss": 5.9468, - "step": 11748 - }, - { - "epoch": 6.127249022164277, - "grad_norm": 1.4430454969406128, - "learning_rate": 8.869748743718593e-05, - "loss": 5.6423, - "step": 11749 - }, - { - "epoch": 6.127770534550195, - "grad_norm": 1.6684865951538086, - "learning_rate": 8.869648241206031e-05, - "loss": 5.5481, - "step": 11750 - }, - { - "epoch": 6.128292046936115, - "grad_norm": 1.4122414588928223, - "learning_rate": 8.869547738693467e-05, - "loss": 5.6588, - "step": 11751 - }, - { - "epoch": 6.128813559322034, - "grad_norm": 1.363555669784546, - "learning_rate": 8.869447236180904e-05, - "loss": 6.0239, - "step": 11752 - }, - { - "epoch": 6.129335071707953, - "grad_norm": 1.432004690170288, - "learning_rate": 8.869346733668342e-05, - "loss": 5.7494, - "step": 11753 - }, - { - "epoch": 6.129856584093872, - "grad_norm": 1.430413007736206, - "learning_rate": 8.86924623115578e-05, - "loss": 5.6699, - "step": 11754 - }, - { - "epoch": 6.130378096479792, - "grad_norm": 1.4717974662780762, - "learning_rate": 8.869145728643217e-05, - "loss": 4.6029, - "step": 11755 - }, - { - "epoch": 6.13089960886571, - "grad_norm": 1.481825351715088, - "learning_rate": 8.869045226130654e-05, - "loss": 5.5618, - "step": 11756 - }, - { - "epoch": 6.13142112125163, - "grad_norm": 1.4975093603134155, - "learning_rate": 8.868944723618091e-05, - "loss": 5.3753, - "step": 11757 - }, - { - "epoch": 6.131942633637549, - "grad_norm": 1.5655969381332397, - "learning_rate": 8.868844221105528e-05, - "loss": 5.6315, - "step": 11758 - }, - { - "epoch": 6.132464146023468, - "grad_norm": 1.5698857307434082, - "learning_rate": 8.868743718592966e-05, - "loss": 5.2135, - "step": 11759 - }, - { - "epoch": 6.132985658409387, - "grad_norm": 1.5084257125854492, - "learning_rate": 8.868643216080402e-05, - "loss": 5.3691, - "step": 11760 - }, - { - "epoch": 6.133507170795307, - "grad_norm": 1.6932837963104248, - "learning_rate": 8.86854271356784e-05, - "loss": 5.7103, - "step": 11761 - }, - { - "epoch": 6.134028683181225, - "grad_norm": 1.4110647439956665, - "learning_rate": 8.868442211055276e-05, - "loss": 5.4994, - "step": 11762 - }, - { - "epoch": 6.134550195567145, - "grad_norm": 1.3253979682922363, - "learning_rate": 8.868341708542714e-05, - "loss": 5.7329, - "step": 11763 - }, - { - "epoch": 6.135071707953064, - "grad_norm": 1.3837213516235352, - "learning_rate": 8.868241206030152e-05, - "loss": 5.5544, - "step": 11764 - }, - { - "epoch": 6.135593220338983, - "grad_norm": 1.414543628692627, - "learning_rate": 8.868140703517588e-05, - "loss": 5.869, - "step": 11765 - }, - { - "epoch": 6.136114732724902, - "grad_norm": 1.4943103790283203, - "learning_rate": 8.868040201005026e-05, - "loss": 5.2877, - "step": 11766 - }, - { - "epoch": 6.136636245110822, - "grad_norm": 1.4421781301498413, - "learning_rate": 8.867939698492462e-05, - "loss": 5.5923, - "step": 11767 - }, - { - "epoch": 6.13715775749674, - "grad_norm": 1.3864333629608154, - "learning_rate": 8.8678391959799e-05, - "loss": 5.8791, - "step": 11768 - }, - { - "epoch": 6.13767926988266, - "grad_norm": 1.6303201913833618, - "learning_rate": 8.867738693467337e-05, - "loss": 5.3575, - "step": 11769 - }, - { - "epoch": 6.138200782268579, - "grad_norm": 1.5077857971191406, - "learning_rate": 8.867638190954774e-05, - "loss": 5.0241, - "step": 11770 - }, - { - "epoch": 6.138722294654498, - "grad_norm": 1.5140588283538818, - "learning_rate": 8.867537688442211e-05, - "loss": 5.9976, - "step": 11771 - }, - { - "epoch": 6.139243807040417, - "grad_norm": 1.4903478622436523, - "learning_rate": 8.867437185929649e-05, - "loss": 5.6388, - "step": 11772 - }, - { - "epoch": 6.139765319426337, - "grad_norm": 1.5027995109558105, - "learning_rate": 8.867336683417085e-05, - "loss": 5.7344, - "step": 11773 - }, - { - "epoch": 6.140286831812255, - "grad_norm": 1.402720332145691, - "learning_rate": 8.867236180904523e-05, - "loss": 5.7508, - "step": 11774 - }, - { - "epoch": 6.140808344198175, - "grad_norm": 1.4316145181655884, - "learning_rate": 8.86713567839196e-05, - "loss": 5.9082, - "step": 11775 - }, - { - "epoch": 6.141329856584094, - "grad_norm": 1.3722779750823975, - "learning_rate": 8.867035175879398e-05, - "loss": 5.8194, - "step": 11776 - }, - { - "epoch": 6.141851368970013, - "grad_norm": 1.4731295108795166, - "learning_rate": 8.866934673366835e-05, - "loss": 5.3685, - "step": 11777 - }, - { - "epoch": 6.142372881355932, - "grad_norm": 1.3905938863754272, - "learning_rate": 8.866834170854271e-05, - "loss": 5.8951, - "step": 11778 - }, - { - "epoch": 6.142894393741852, - "grad_norm": 1.3644099235534668, - "learning_rate": 8.866733668341709e-05, - "loss": 5.9503, - "step": 11779 - }, - { - "epoch": 6.14341590612777, - "grad_norm": 1.3189258575439453, - "learning_rate": 8.866633165829146e-05, - "loss": 5.8498, - "step": 11780 - }, - { - "epoch": 6.14393741851369, - "grad_norm": 1.4673799276351929, - "learning_rate": 8.866532663316583e-05, - "loss": 5.0007, - "step": 11781 - }, - { - "epoch": 6.144458930899609, - "grad_norm": 1.4968609809875488, - "learning_rate": 8.86643216080402e-05, - "loss": 5.3277, - "step": 11782 - }, - { - "epoch": 6.144980443285528, - "grad_norm": 1.353542685508728, - "learning_rate": 8.866331658291458e-05, - "loss": 5.6635, - "step": 11783 - }, - { - "epoch": 6.145501955671447, - "grad_norm": 1.3106651306152344, - "learning_rate": 8.866231155778895e-05, - "loss": 5.7497, - "step": 11784 - }, - { - "epoch": 6.146023468057367, - "grad_norm": 1.517261028289795, - "learning_rate": 8.866130653266333e-05, - "loss": 5.5916, - "step": 11785 - }, - { - "epoch": 6.146544980443285, - "grad_norm": 1.5019961595535278, - "learning_rate": 8.86603015075377e-05, - "loss": 5.528, - "step": 11786 - }, - { - "epoch": 6.147066492829205, - "grad_norm": 1.5186127424240112, - "learning_rate": 8.865929648241207e-05, - "loss": 5.1882, - "step": 11787 - }, - { - "epoch": 6.147588005215124, - "grad_norm": 1.4122878313064575, - "learning_rate": 8.865829145728644e-05, - "loss": 5.9559, - "step": 11788 - }, - { - "epoch": 6.148109517601043, - "grad_norm": 1.4793450832366943, - "learning_rate": 8.865728643216081e-05, - "loss": 5.4195, - "step": 11789 - }, - { - "epoch": 6.148631029986962, - "grad_norm": 1.4662984609603882, - "learning_rate": 8.865628140703518e-05, - "loss": 5.7764, - "step": 11790 - }, - { - "epoch": 6.149152542372882, - "grad_norm": 1.3539190292358398, - "learning_rate": 8.865527638190954e-05, - "loss": 5.8997, - "step": 11791 - }, - { - "epoch": 6.1496740547588, - "grad_norm": 1.5010665655136108, - "learning_rate": 8.865427135678392e-05, - "loss": 4.4386, - "step": 11792 - }, - { - "epoch": 6.15019556714472, - "grad_norm": 1.5357328653335571, - "learning_rate": 8.865326633165829e-05, - "loss": 5.6625, - "step": 11793 - }, - { - "epoch": 6.150717079530639, - "grad_norm": 1.4749938249588013, - "learning_rate": 8.865226130653266e-05, - "loss": 5.5029, - "step": 11794 - }, - { - "epoch": 6.151238591916558, - "grad_norm": 1.4935063123703003, - "learning_rate": 8.865125628140704e-05, - "loss": 5.788, - "step": 11795 - }, - { - "epoch": 6.151760104302477, - "grad_norm": 1.3627625703811646, - "learning_rate": 8.865025125628142e-05, - "loss": 5.7943, - "step": 11796 - }, - { - "epoch": 6.152281616688397, - "grad_norm": 1.435412049293518, - "learning_rate": 8.864924623115578e-05, - "loss": 5.6105, - "step": 11797 - }, - { - "epoch": 6.152803129074315, - "grad_norm": 1.6423413753509521, - "learning_rate": 8.864824120603016e-05, - "loss": 5.3461, - "step": 11798 - }, - { - "epoch": 6.153324641460235, - "grad_norm": 1.4718775749206543, - "learning_rate": 8.864723618090453e-05, - "loss": 5.5383, - "step": 11799 - }, - { - "epoch": 6.153846153846154, - "grad_norm": 1.345640778541565, - "learning_rate": 8.86462311557789e-05, - "loss": 6.0342, - "step": 11800 - }, - { - "epoch": 6.154367666232073, - "grad_norm": 1.5049800872802734, - "learning_rate": 8.864522613065327e-05, - "loss": 5.4265, - "step": 11801 - }, - { - "epoch": 6.154889178617992, - "grad_norm": 1.4488152265548706, - "learning_rate": 8.864422110552765e-05, - "loss": 5.9624, - "step": 11802 - }, - { - "epoch": 6.155410691003912, - "grad_norm": 1.4978324174880981, - "learning_rate": 8.864321608040201e-05, - "loss": 5.4616, - "step": 11803 - }, - { - "epoch": 6.15593220338983, - "grad_norm": 1.4560325145721436, - "learning_rate": 8.864221105527639e-05, - "loss": 5.7261, - "step": 11804 - }, - { - "epoch": 6.15645371577575, - "grad_norm": 1.474463939666748, - "learning_rate": 8.864120603015077e-05, - "loss": 4.8193, - "step": 11805 - }, - { - "epoch": 6.156975228161669, - "grad_norm": 1.3275582790374756, - "learning_rate": 8.864020100502513e-05, - "loss": 5.7143, - "step": 11806 - }, - { - "epoch": 6.157496740547588, - "grad_norm": 1.4722857475280762, - "learning_rate": 8.863919597989951e-05, - "loss": 5.299, - "step": 11807 - }, - { - "epoch": 6.158018252933507, - "grad_norm": 1.4630850553512573, - "learning_rate": 8.863819095477387e-05, - "loss": 5.797, - "step": 11808 - }, - { - "epoch": 6.158539765319427, - "grad_norm": 1.4254553318023682, - "learning_rate": 8.863718592964825e-05, - "loss": 5.3101, - "step": 11809 - }, - { - "epoch": 6.159061277705345, - "grad_norm": 1.5943248271942139, - "learning_rate": 8.863618090452261e-05, - "loss": 5.6027, - "step": 11810 - }, - { - "epoch": 6.159582790091265, - "grad_norm": 1.494563102722168, - "learning_rate": 8.863517587939699e-05, - "loss": 5.7263, - "step": 11811 - }, - { - "epoch": 6.160104302477184, - "grad_norm": 1.5194485187530518, - "learning_rate": 8.863417085427136e-05, - "loss": 5.4292, - "step": 11812 - }, - { - "epoch": 6.160625814863103, - "grad_norm": 1.4063482284545898, - "learning_rate": 8.863316582914573e-05, - "loss": 5.7952, - "step": 11813 - }, - { - "epoch": 6.161147327249022, - "grad_norm": 1.5100476741790771, - "learning_rate": 8.86321608040201e-05, - "loss": 5.689, - "step": 11814 - }, - { - "epoch": 6.161668839634942, - "grad_norm": 1.6573777198791504, - "learning_rate": 8.863115577889448e-05, - "loss": 5.7039, - "step": 11815 - }, - { - "epoch": 6.16219035202086, - "grad_norm": 1.5657254457473755, - "learning_rate": 8.863015075376885e-05, - "loss": 5.1507, - "step": 11816 - }, - { - "epoch": 6.16271186440678, - "grad_norm": 1.4246325492858887, - "learning_rate": 8.862914572864323e-05, - "loss": 5.6766, - "step": 11817 - }, - { - "epoch": 6.163233376792699, - "grad_norm": 1.5750113725662231, - "learning_rate": 8.86281407035176e-05, - "loss": 5.4888, - "step": 11818 - }, - { - "epoch": 6.163754889178618, - "grad_norm": 1.3645308017730713, - "learning_rate": 8.862713567839196e-05, - "loss": 5.6753, - "step": 11819 - }, - { - "epoch": 6.164276401564537, - "grad_norm": 1.4580389261245728, - "learning_rate": 8.862613065326634e-05, - "loss": 4.9128, - "step": 11820 - }, - { - "epoch": 6.164797913950457, - "grad_norm": 1.41416335105896, - "learning_rate": 8.86251256281407e-05, - "loss": 5.712, - "step": 11821 - }, - { - "epoch": 6.165319426336375, - "grad_norm": 1.420069694519043, - "learning_rate": 8.862412060301508e-05, - "loss": 5.6865, - "step": 11822 - }, - { - "epoch": 6.165840938722295, - "grad_norm": 1.4848796129226685, - "learning_rate": 8.862311557788944e-05, - "loss": 5.6346, - "step": 11823 - }, - { - "epoch": 6.166362451108214, - "grad_norm": 1.57956063747406, - "learning_rate": 8.862211055276382e-05, - "loss": 5.6165, - "step": 11824 - }, - { - "epoch": 6.166883963494133, - "grad_norm": 1.5811635255813599, - "learning_rate": 8.862110552763819e-05, - "loss": 5.146, - "step": 11825 - }, - { - "epoch": 6.167405475880052, - "grad_norm": 1.6357394456863403, - "learning_rate": 8.862010050251256e-05, - "loss": 4.7783, - "step": 11826 - }, - { - "epoch": 6.167926988265972, - "grad_norm": 1.397835612297058, - "learning_rate": 8.861909547738694e-05, - "loss": 5.3284, - "step": 11827 - }, - { - "epoch": 6.16844850065189, - "grad_norm": 1.4105814695358276, - "learning_rate": 8.861809045226132e-05, - "loss": 5.5837, - "step": 11828 - }, - { - "epoch": 6.16897001303781, - "grad_norm": 1.4551613330841064, - "learning_rate": 8.861708542713568e-05, - "loss": 5.5932, - "step": 11829 - }, - { - "epoch": 6.169491525423728, - "grad_norm": 1.4965585470199585, - "learning_rate": 8.861608040201006e-05, - "loss": 5.3643, - "step": 11830 - }, - { - "epoch": 6.170013037809648, - "grad_norm": 1.4951233863830566, - "learning_rate": 8.861507537688443e-05, - "loss": 5.3626, - "step": 11831 - }, - { - "epoch": 6.170534550195567, - "grad_norm": 1.5110570192337036, - "learning_rate": 8.861407035175879e-05, - "loss": 5.3436, - "step": 11832 - }, - { - "epoch": 6.171056062581487, - "grad_norm": 1.5097216367721558, - "learning_rate": 8.861306532663317e-05, - "loss": 5.0431, - "step": 11833 - }, - { - "epoch": 6.171577574967405, - "grad_norm": 1.5769481658935547, - "learning_rate": 8.861206030150753e-05, - "loss": 5.7407, - "step": 11834 - }, - { - "epoch": 6.172099087353325, - "grad_norm": 1.3957126140594482, - "learning_rate": 8.861105527638191e-05, - "loss": 5.7101, - "step": 11835 - }, - { - "epoch": 6.172620599739243, - "grad_norm": 1.4627699851989746, - "learning_rate": 8.861005025125629e-05, - "loss": 5.2413, - "step": 11836 - }, - { - "epoch": 6.173142112125163, - "grad_norm": 1.359870433807373, - "learning_rate": 8.860904522613067e-05, - "loss": 5.5997, - "step": 11837 - }, - { - "epoch": 6.173663624511082, - "grad_norm": 1.4907082319259644, - "learning_rate": 8.860804020100503e-05, - "loss": 4.8432, - "step": 11838 - }, - { - "epoch": 6.174185136897001, - "grad_norm": 1.6459912061691284, - "learning_rate": 8.860703517587941e-05, - "loss": 5.7904, - "step": 11839 - }, - { - "epoch": 6.17470664928292, - "grad_norm": 1.3764584064483643, - "learning_rate": 8.860603015075377e-05, - "loss": 5.7687, - "step": 11840 - }, - { - "epoch": 6.17522816166884, - "grad_norm": 1.4441922903060913, - "learning_rate": 8.860502512562815e-05, - "loss": 5.7576, - "step": 11841 - }, - { - "epoch": 6.175749674054758, - "grad_norm": 1.5991111993789673, - "learning_rate": 8.860402010050251e-05, - "loss": 4.8069, - "step": 11842 - }, - { - "epoch": 6.176271186440678, - "grad_norm": 1.6642111539840698, - "learning_rate": 8.860301507537689e-05, - "loss": 5.8093, - "step": 11843 - }, - { - "epoch": 6.176792698826597, - "grad_norm": 1.634757399559021, - "learning_rate": 8.860201005025126e-05, - "loss": 5.7146, - "step": 11844 - }, - { - "epoch": 6.177314211212516, - "grad_norm": 1.3511865139007568, - "learning_rate": 8.860100502512562e-05, - "loss": 5.4757, - "step": 11845 - }, - { - "epoch": 6.177835723598435, - "grad_norm": 1.4301635026931763, - "learning_rate": 8.86e-05, - "loss": 6.091, - "step": 11846 - }, - { - "epoch": 6.178357235984355, - "grad_norm": 1.4631774425506592, - "learning_rate": 8.859899497487438e-05, - "loss": 5.891, - "step": 11847 - }, - { - "epoch": 6.178878748370273, - "grad_norm": 1.7007461786270142, - "learning_rate": 8.859798994974875e-05, - "loss": 5.2152, - "step": 11848 - }, - { - "epoch": 6.179400260756193, - "grad_norm": 1.348065972328186, - "learning_rate": 8.859698492462312e-05, - "loss": 5.8798, - "step": 11849 - }, - { - "epoch": 6.179921773142112, - "grad_norm": 1.3159105777740479, - "learning_rate": 8.85959798994975e-05, - "loss": 5.523, - "step": 11850 - }, - { - "epoch": 6.180443285528031, - "grad_norm": 1.3755289316177368, - "learning_rate": 8.859497487437186e-05, - "loss": 6.0156, - "step": 11851 - }, - { - "epoch": 6.18096479791395, - "grad_norm": 1.5139750242233276, - "learning_rate": 8.859396984924624e-05, - "loss": 5.2929, - "step": 11852 - }, - { - "epoch": 6.18148631029987, - "grad_norm": 1.4493751525878906, - "learning_rate": 8.85929648241206e-05, - "loss": 5.5841, - "step": 11853 - }, - { - "epoch": 6.182007822685788, - "grad_norm": 1.423294186592102, - "learning_rate": 8.859195979899498e-05, - "loss": 5.7616, - "step": 11854 - }, - { - "epoch": 6.182529335071708, - "grad_norm": 1.3874956369400024, - "learning_rate": 8.859095477386934e-05, - "loss": 5.885, - "step": 11855 - }, - { - "epoch": 6.183050847457627, - "grad_norm": 1.2573902606964111, - "learning_rate": 8.858994974874372e-05, - "loss": 5.3385, - "step": 11856 - }, - { - "epoch": 6.183572359843546, - "grad_norm": 1.3708124160766602, - "learning_rate": 8.85889447236181e-05, - "loss": 5.8966, - "step": 11857 - }, - { - "epoch": 6.184093872229465, - "grad_norm": 1.6050593852996826, - "learning_rate": 8.858793969849246e-05, - "loss": 5.4642, - "step": 11858 - }, - { - "epoch": 6.184615384615385, - "grad_norm": 1.5259462594985962, - "learning_rate": 8.858693467336684e-05, - "loss": 5.7711, - "step": 11859 - }, - { - "epoch": 6.185136897001303, - "grad_norm": 1.584122896194458, - "learning_rate": 8.858592964824121e-05, - "loss": 4.8023, - "step": 11860 - }, - { - "epoch": 6.185658409387223, - "grad_norm": 1.5089665651321411, - "learning_rate": 8.858492462311558e-05, - "loss": 4.4374, - "step": 11861 - }, - { - "epoch": 6.186179921773142, - "grad_norm": 1.384785532951355, - "learning_rate": 8.858391959798995e-05, - "loss": 5.8398, - "step": 11862 - }, - { - "epoch": 6.186701434159061, - "grad_norm": 1.4850534200668335, - "learning_rate": 8.858291457286433e-05, - "loss": 5.6826, - "step": 11863 - }, - { - "epoch": 6.18722294654498, - "grad_norm": 1.4849025011062622, - "learning_rate": 8.858190954773869e-05, - "loss": 5.4371, - "step": 11864 - }, - { - "epoch": 6.1877444589309, - "grad_norm": 1.5544782876968384, - "learning_rate": 8.858090452261307e-05, - "loss": 5.7144, - "step": 11865 - }, - { - "epoch": 6.188265971316818, - "grad_norm": 1.4645835161209106, - "learning_rate": 8.857989949748743e-05, - "loss": 5.7876, - "step": 11866 - }, - { - "epoch": 6.188787483702738, - "grad_norm": 1.4199129343032837, - "learning_rate": 8.857889447236181e-05, - "loss": 5.3352, - "step": 11867 - }, - { - "epoch": 6.189308996088657, - "grad_norm": 1.4587538242340088, - "learning_rate": 8.857788944723619e-05, - "loss": 5.5971, - "step": 11868 - }, - { - "epoch": 6.189830508474576, - "grad_norm": 1.3427462577819824, - "learning_rate": 8.857688442211057e-05, - "loss": 5.7332, - "step": 11869 - }, - { - "epoch": 6.190352020860495, - "grad_norm": 1.4890226125717163, - "learning_rate": 8.857587939698493e-05, - "loss": 5.5312, - "step": 11870 - }, - { - "epoch": 6.190873533246415, - "grad_norm": 1.4173305034637451, - "learning_rate": 8.85748743718593e-05, - "loss": 5.7983, - "step": 11871 - }, - { - "epoch": 6.1913950456323334, - "grad_norm": 1.6279641389846802, - "learning_rate": 8.857386934673367e-05, - "loss": 5.5996, - "step": 11872 - }, - { - "epoch": 6.191916558018253, - "grad_norm": 1.6986795663833618, - "learning_rate": 8.857286432160804e-05, - "loss": 5.1919, - "step": 11873 - }, - { - "epoch": 6.192438070404172, - "grad_norm": 1.5237427949905396, - "learning_rate": 8.857185929648242e-05, - "loss": 5.4886, - "step": 11874 - }, - { - "epoch": 6.192959582790091, - "grad_norm": 1.4921013116836548, - "learning_rate": 8.857085427135678e-05, - "loss": 5.5943, - "step": 11875 - }, - { - "epoch": 6.19348109517601, - "grad_norm": 1.5208439826965332, - "learning_rate": 8.856984924623116e-05, - "loss": 5.2234, - "step": 11876 - }, - { - "epoch": 6.19400260756193, - "grad_norm": 1.498189091682434, - "learning_rate": 8.856884422110554e-05, - "loss": 5.6834, - "step": 11877 - }, - { - "epoch": 6.1945241199478485, - "grad_norm": 1.3653897047042847, - "learning_rate": 8.856783919597991e-05, - "loss": 5.8321, - "step": 11878 - }, - { - "epoch": 6.195045632333768, - "grad_norm": 1.4282323122024536, - "learning_rate": 8.856683417085428e-05, - "loss": 5.5926, - "step": 11879 - }, - { - "epoch": 6.195567144719687, - "grad_norm": 1.5079749822616577, - "learning_rate": 8.856582914572866e-05, - "loss": 5.3989, - "step": 11880 - }, - { - "epoch": 6.196088657105606, - "grad_norm": 1.409413456916809, - "learning_rate": 8.856482412060302e-05, - "loss": 5.4374, - "step": 11881 - }, - { - "epoch": 6.196610169491525, - "grad_norm": 1.499351978302002, - "learning_rate": 8.85638190954774e-05, - "loss": 5.7453, - "step": 11882 - }, - { - "epoch": 6.197131681877445, - "grad_norm": 1.4912595748901367, - "learning_rate": 8.856281407035176e-05, - "loss": 4.936, - "step": 11883 - }, - { - "epoch": 6.1976531942633635, - "grad_norm": 1.4058117866516113, - "learning_rate": 8.856180904522613e-05, - "loss": 5.8313, - "step": 11884 - }, - { - "epoch": 6.198174706649283, - "grad_norm": 1.445358395576477, - "learning_rate": 8.85608040201005e-05, - "loss": 5.4986, - "step": 11885 - }, - { - "epoch": 6.198696219035202, - "grad_norm": 1.545884132385254, - "learning_rate": 8.855979899497487e-05, - "loss": 5.1402, - "step": 11886 - }, - { - "epoch": 6.199217731421121, - "grad_norm": 1.3578044176101685, - "learning_rate": 8.855879396984925e-05, - "loss": 5.5937, - "step": 11887 - }, - { - "epoch": 6.1997392438070404, - "grad_norm": 1.389089822769165, - "learning_rate": 8.855778894472362e-05, - "loss": 5.8169, - "step": 11888 - }, - { - "epoch": 6.20026075619296, - "grad_norm": 1.643487811088562, - "learning_rate": 8.8556783919598e-05, - "loss": 5.136, - "step": 11889 - }, - { - "epoch": 6.2007822685788785, - "grad_norm": 1.4691789150238037, - "learning_rate": 8.855577889447237e-05, - "loss": 5.5834, - "step": 11890 - }, - { - "epoch": 6.201303780964798, - "grad_norm": 1.4292867183685303, - "learning_rate": 8.855477386934674e-05, - "loss": 5.6219, - "step": 11891 - }, - { - "epoch": 6.201825293350717, - "grad_norm": 1.4417500495910645, - "learning_rate": 8.855376884422111e-05, - "loss": 5.3366, - "step": 11892 - }, - { - "epoch": 6.202346805736636, - "grad_norm": 1.3738152980804443, - "learning_rate": 8.855276381909549e-05, - "loss": 5.7958, - "step": 11893 - }, - { - "epoch": 6.2028683181225555, - "grad_norm": 1.3699332475662231, - "learning_rate": 8.855175879396985e-05, - "loss": 5.6761, - "step": 11894 - }, - { - "epoch": 6.203389830508475, - "grad_norm": 1.5214205980300903, - "learning_rate": 8.855075376884423e-05, - "loss": 5.4801, - "step": 11895 - }, - { - "epoch": 6.2039113428943935, - "grad_norm": 1.4743198156356812, - "learning_rate": 8.854974874371859e-05, - "loss": 5.6001, - "step": 11896 - }, - { - "epoch": 6.204432855280313, - "grad_norm": 1.4038487672805786, - "learning_rate": 8.854874371859297e-05, - "loss": 5.467, - "step": 11897 - }, - { - "epoch": 6.204954367666232, - "grad_norm": 1.4264708757400513, - "learning_rate": 8.854773869346735e-05, - "loss": 5.2505, - "step": 11898 - }, - { - "epoch": 6.205475880052151, - "grad_norm": 1.4308420419692993, - "learning_rate": 8.854673366834171e-05, - "loss": 5.7043, - "step": 11899 - }, - { - "epoch": 6.2059973924380705, - "grad_norm": 1.6168981790542603, - "learning_rate": 8.854572864321609e-05, - "loss": 5.3636, - "step": 11900 - }, - { - "epoch": 6.20651890482399, - "grad_norm": 1.3622676134109497, - "learning_rate": 8.854472361809045e-05, - "loss": 5.8862, - "step": 11901 - }, - { - "epoch": 6.2070404172099085, - "grad_norm": 1.6386746168136597, - "learning_rate": 8.854371859296483e-05, - "loss": 4.7339, - "step": 11902 - }, - { - "epoch": 6.207561929595828, - "grad_norm": 1.375769853591919, - "learning_rate": 8.85427135678392e-05, - "loss": 5.9584, - "step": 11903 - }, - { - "epoch": 6.208083441981747, - "grad_norm": 1.7631757259368896, - "learning_rate": 8.854170854271357e-05, - "loss": 5.306, - "step": 11904 - }, - { - "epoch": 6.208604954367666, - "grad_norm": 1.7043920755386353, - "learning_rate": 8.854070351758794e-05, - "loss": 4.8314, - "step": 11905 - }, - { - "epoch": 6.2091264667535855, - "grad_norm": 1.4538995027542114, - "learning_rate": 8.853969849246232e-05, - "loss": 5.8703, - "step": 11906 - }, - { - "epoch": 6.209647979139505, - "grad_norm": 1.557161569595337, - "learning_rate": 8.853869346733668e-05, - "loss": 5.1377, - "step": 11907 - }, - { - "epoch": 6.2101694915254235, - "grad_norm": 1.4238965511322021, - "learning_rate": 8.853768844221106e-05, - "loss": 5.8377, - "step": 11908 - }, - { - "epoch": 6.210691003911343, - "grad_norm": 1.4127355813980103, - "learning_rate": 8.853668341708544e-05, - "loss": 5.6875, - "step": 11909 - }, - { - "epoch": 6.2112125162972625, - "grad_norm": 1.4903483390808105, - "learning_rate": 8.853567839195981e-05, - "loss": 5.7097, - "step": 11910 - }, - { - "epoch": 6.211734028683181, - "grad_norm": 1.4389986991882324, - "learning_rate": 8.853467336683418e-05, - "loss": 5.8747, - "step": 11911 - }, - { - "epoch": 6.2122555410691005, - "grad_norm": 1.4929208755493164, - "learning_rate": 8.853366834170854e-05, - "loss": 6.0806, - "step": 11912 - }, - { - "epoch": 6.21277705345502, - "grad_norm": 1.4764546155929565, - "learning_rate": 8.853266331658292e-05, - "loss": 5.7014, - "step": 11913 - }, - { - "epoch": 6.2132985658409385, - "grad_norm": 1.4485608339309692, - "learning_rate": 8.853165829145728e-05, - "loss": 5.7795, - "step": 11914 - }, - { - "epoch": 6.213820078226858, - "grad_norm": 1.5148439407348633, - "learning_rate": 8.853065326633166e-05, - "loss": 5.2809, - "step": 11915 - }, - { - "epoch": 6.2143415906127775, - "grad_norm": 1.5536158084869385, - "learning_rate": 8.852964824120603e-05, - "loss": 5.7626, - "step": 11916 - }, - { - "epoch": 6.214863102998696, - "grad_norm": 1.44167959690094, - "learning_rate": 8.85286432160804e-05, - "loss": 5.5751, - "step": 11917 - }, - { - "epoch": 6.2153846153846155, - "grad_norm": 1.5543429851531982, - "learning_rate": 8.852763819095478e-05, - "loss": 5.0301, - "step": 11918 - }, - { - "epoch": 6.215906127770535, - "grad_norm": 1.4895976781845093, - "learning_rate": 8.852663316582916e-05, - "loss": 5.496, - "step": 11919 - }, - { - "epoch": 6.2164276401564535, - "grad_norm": 1.4518386125564575, - "learning_rate": 8.852562814070352e-05, - "loss": 5.7203, - "step": 11920 - }, - { - "epoch": 6.216949152542373, - "grad_norm": 1.4725435972213745, - "learning_rate": 8.85246231155779e-05, - "loss": 5.6008, - "step": 11921 - }, - { - "epoch": 6.2174706649282925, - "grad_norm": 1.5274981260299683, - "learning_rate": 8.852361809045227e-05, - "loss": 5.6513, - "step": 11922 - }, - { - "epoch": 6.217992177314211, - "grad_norm": 1.5392793416976929, - "learning_rate": 8.852261306532664e-05, - "loss": 5.5344, - "step": 11923 - }, - { - "epoch": 6.2185136897001305, - "grad_norm": 1.430160403251648, - "learning_rate": 8.852160804020101e-05, - "loss": 5.6396, - "step": 11924 - }, - { - "epoch": 6.219035202086049, - "grad_norm": 1.3830451965332031, - "learning_rate": 8.852060301507537e-05, - "loss": 5.44, - "step": 11925 - }, - { - "epoch": 6.219556714471969, - "grad_norm": 1.6933056116104126, - "learning_rate": 8.851959798994975e-05, - "loss": 5.482, - "step": 11926 - }, - { - "epoch": 6.220078226857888, - "grad_norm": 1.5601814985275269, - "learning_rate": 8.851859296482411e-05, - "loss": 5.4514, - "step": 11927 - }, - { - "epoch": 6.2205997392438075, - "grad_norm": 1.3834760189056396, - "learning_rate": 8.851758793969849e-05, - "loss": 5.8367, - "step": 11928 - }, - { - "epoch": 6.221121251629726, - "grad_norm": 1.5393314361572266, - "learning_rate": 8.851658291457287e-05, - "loss": 5.793, - "step": 11929 - }, - { - "epoch": 6.2216427640156455, - "grad_norm": 1.4756754636764526, - "learning_rate": 8.851557788944725e-05, - "loss": 5.6314, - "step": 11930 - }, - { - "epoch": 6.222164276401564, - "grad_norm": 1.3858733177185059, - "learning_rate": 8.851457286432161e-05, - "loss": 5.2934, - "step": 11931 - }, - { - "epoch": 6.222685788787484, - "grad_norm": 1.4901131391525269, - "learning_rate": 8.851356783919599e-05, - "loss": 5.443, - "step": 11932 - }, - { - "epoch": 6.223207301173403, - "grad_norm": 1.4807761907577515, - "learning_rate": 8.851256281407035e-05, - "loss": 5.7936, - "step": 11933 - }, - { - "epoch": 6.223728813559322, - "grad_norm": 1.4142582416534424, - "learning_rate": 8.851155778894473e-05, - "loss": 5.6842, - "step": 11934 - }, - { - "epoch": 6.224250325945241, - "grad_norm": 1.4730491638183594, - "learning_rate": 8.85105527638191e-05, - "loss": 5.7457, - "step": 11935 - }, - { - "epoch": 6.2247718383311605, - "grad_norm": 1.6209534406661987, - "learning_rate": 8.850954773869347e-05, - "loss": 5.2467, - "step": 11936 - }, - { - "epoch": 6.225293350717079, - "grad_norm": 1.5777654647827148, - "learning_rate": 8.850854271356784e-05, - "loss": 5.9202, - "step": 11937 - }, - { - "epoch": 6.225814863102999, - "grad_norm": 1.443739414215088, - "learning_rate": 8.850753768844222e-05, - "loss": 5.5999, - "step": 11938 - }, - { - "epoch": 6.226336375488918, - "grad_norm": 1.3688842058181763, - "learning_rate": 8.85065326633166e-05, - "loss": 5.3899, - "step": 11939 - }, - { - "epoch": 6.226857887874837, - "grad_norm": 1.470777988433838, - "learning_rate": 8.850552763819096e-05, - "loss": 5.783, - "step": 11940 - }, - { - "epoch": 6.227379400260756, - "grad_norm": 1.5731507539749146, - "learning_rate": 8.850452261306534e-05, - "loss": 5.4962, - "step": 11941 - }, - { - "epoch": 6.227900912646676, - "grad_norm": 1.4565469026565552, - "learning_rate": 8.85035175879397e-05, - "loss": 5.7536, - "step": 11942 - }, - { - "epoch": 6.228422425032594, - "grad_norm": 1.4491099119186401, - "learning_rate": 8.850251256281408e-05, - "loss": 5.5301, - "step": 11943 - }, - { - "epoch": 6.228943937418514, - "grad_norm": 1.3789137601852417, - "learning_rate": 8.850150753768844e-05, - "loss": 5.6872, - "step": 11944 - }, - { - "epoch": 6.229465449804433, - "grad_norm": 1.3986762762069702, - "learning_rate": 8.850050251256282e-05, - "loss": 5.5919, - "step": 11945 - }, - { - "epoch": 6.229986962190352, - "grad_norm": 1.7459979057312012, - "learning_rate": 8.849949748743719e-05, - "loss": 4.7146, - "step": 11946 - }, - { - "epoch": 6.230508474576271, - "grad_norm": 1.395611047744751, - "learning_rate": 8.849849246231156e-05, - "loss": 5.5137, - "step": 11947 - }, - { - "epoch": 6.231029986962191, - "grad_norm": 1.446032166481018, - "learning_rate": 8.849748743718593e-05, - "loss": 5.4701, - "step": 11948 - }, - { - "epoch": 6.231551499348109, - "grad_norm": 1.48972487449646, - "learning_rate": 8.84964824120603e-05, - "loss": 5.8392, - "step": 11949 - }, - { - "epoch": 6.232073011734029, - "grad_norm": 1.4500093460083008, - "learning_rate": 8.849547738693468e-05, - "loss": 5.7625, - "step": 11950 - }, - { - "epoch": 6.232594524119948, - "grad_norm": 1.540531039237976, - "learning_rate": 8.849447236180905e-05, - "loss": 5.2494, - "step": 11951 - }, - { - "epoch": 6.233116036505867, - "grad_norm": 1.5164334774017334, - "learning_rate": 8.849346733668343e-05, - "loss": 5.6017, - "step": 11952 - }, - { - "epoch": 6.233637548891786, - "grad_norm": 1.4520556926727295, - "learning_rate": 8.849246231155779e-05, - "loss": 5.3142, - "step": 11953 - }, - { - "epoch": 6.234159061277706, - "grad_norm": 1.5704611539840698, - "learning_rate": 8.849145728643217e-05, - "loss": 4.9755, - "step": 11954 - }, - { - "epoch": 6.234680573663624, - "grad_norm": 1.717331051826477, - "learning_rate": 8.849045226130653e-05, - "loss": 5.8494, - "step": 11955 - }, - { - "epoch": 6.235202086049544, - "grad_norm": 1.488868236541748, - "learning_rate": 8.848944723618091e-05, - "loss": 5.3888, - "step": 11956 - }, - { - "epoch": 6.235723598435463, - "grad_norm": 1.455293893814087, - "learning_rate": 8.848844221105527e-05, - "loss": 5.1665, - "step": 11957 - }, - { - "epoch": 6.236245110821382, - "grad_norm": 1.4924079179763794, - "learning_rate": 8.848743718592965e-05, - "loss": 5.6594, - "step": 11958 - }, - { - "epoch": 6.236766623207301, - "grad_norm": 1.3830736875534058, - "learning_rate": 8.848643216080403e-05, - "loss": 5.6482, - "step": 11959 - }, - { - "epoch": 6.237288135593221, - "grad_norm": 1.3520116806030273, - "learning_rate": 8.848542713567841e-05, - "loss": 5.6981, - "step": 11960 - }, - { - "epoch": 6.237809647979139, - "grad_norm": 1.3648649454116821, - "learning_rate": 8.848442211055277e-05, - "loss": 5.7346, - "step": 11961 - }, - { - "epoch": 6.238331160365059, - "grad_norm": 1.3294916152954102, - "learning_rate": 8.848341708542715e-05, - "loss": 5.7448, - "step": 11962 - }, - { - "epoch": 6.238852672750978, - "grad_norm": 1.4094016551971436, - "learning_rate": 8.848241206030151e-05, - "loss": 5.7646, - "step": 11963 - }, - { - "epoch": 6.239374185136897, - "grad_norm": 1.5919846296310425, - "learning_rate": 8.848140703517588e-05, - "loss": 5.3905, - "step": 11964 - }, - { - "epoch": 6.239895697522816, - "grad_norm": 1.7241411209106445, - "learning_rate": 8.848040201005026e-05, - "loss": 5.4075, - "step": 11965 - }, - { - "epoch": 6.240417209908736, - "grad_norm": 1.454823613166809, - "learning_rate": 8.847939698492462e-05, - "loss": 5.2356, - "step": 11966 - }, - { - "epoch": 6.240938722294654, - "grad_norm": 1.4595212936401367, - "learning_rate": 8.8478391959799e-05, - "loss": 5.3216, - "step": 11967 - }, - { - "epoch": 6.241460234680574, - "grad_norm": 1.384568214416504, - "learning_rate": 8.847738693467336e-05, - "loss": 5.8206, - "step": 11968 - }, - { - "epoch": 6.241981747066493, - "grad_norm": 1.3129727840423584, - "learning_rate": 8.847638190954774e-05, - "loss": 5.7102, - "step": 11969 - }, - { - "epoch": 6.242503259452412, - "grad_norm": 1.3979953527450562, - "learning_rate": 8.847537688442212e-05, - "loss": 4.3481, - "step": 11970 - }, - { - "epoch": 6.243024771838331, - "grad_norm": 1.3557648658752441, - "learning_rate": 8.84743718592965e-05, - "loss": 5.6939, - "step": 11971 - }, - { - "epoch": 6.243546284224251, - "grad_norm": 1.3782685995101929, - "learning_rate": 8.847336683417086e-05, - "loss": 5.8809, - "step": 11972 - }, - { - "epoch": 6.244067796610169, - "grad_norm": 1.5902217626571655, - "learning_rate": 8.847236180904524e-05, - "loss": 5.787, - "step": 11973 - }, - { - "epoch": 6.244589308996089, - "grad_norm": 1.5943399667739868, - "learning_rate": 8.84713567839196e-05, - "loss": 5.1174, - "step": 11974 - }, - { - "epoch": 6.245110821382008, - "grad_norm": 1.4708154201507568, - "learning_rate": 8.847035175879398e-05, - "loss": 5.0109, - "step": 11975 - }, - { - "epoch": 6.245632333767927, - "grad_norm": 1.4456918239593506, - "learning_rate": 8.846934673366834e-05, - "loss": 5.7668, - "step": 11976 - }, - { - "epoch": 6.246153846153846, - "grad_norm": 1.4126795530319214, - "learning_rate": 8.846834170854271e-05, - "loss": 5.6281, - "step": 11977 - }, - { - "epoch": 6.246675358539766, - "grad_norm": 1.5016158819198608, - "learning_rate": 8.846733668341709e-05, - "loss": 5.8234, - "step": 11978 - }, - { - "epoch": 6.247196870925684, - "grad_norm": 1.4188992977142334, - "learning_rate": 8.846633165829145e-05, - "loss": 5.421, - "step": 11979 - }, - { - "epoch": 6.247718383311604, - "grad_norm": 1.401224136352539, - "learning_rate": 8.846532663316583e-05, - "loss": 5.8244, - "step": 11980 - }, - { - "epoch": 6.248239895697523, - "grad_norm": 1.3241792917251587, - "learning_rate": 8.84643216080402e-05, - "loss": 5.7332, - "step": 11981 - }, - { - "epoch": 6.248761408083442, - "grad_norm": 1.426016926765442, - "learning_rate": 8.846331658291458e-05, - "loss": 5.8855, - "step": 11982 - }, - { - "epoch": 6.249282920469361, - "grad_norm": 1.3285021781921387, - "learning_rate": 8.846231155778895e-05, - "loss": 5.4545, - "step": 11983 - }, - { - "epoch": 6.249804432855281, - "grad_norm": 1.4279423952102661, - "learning_rate": 8.846130653266333e-05, - "loss": 5.6828, - "step": 11984 - }, - { - "epoch": 6.250325945241199, - "grad_norm": 1.418649435043335, - "learning_rate": 8.846030150753769e-05, - "loss": 5.9416, - "step": 11985 - }, - { - "epoch": 6.250847457627119, - "grad_norm": 1.4831743240356445, - "learning_rate": 8.845929648241207e-05, - "loss": 5.5728, - "step": 11986 - }, - { - "epoch": 6.251368970013038, - "grad_norm": 1.3504599332809448, - "learning_rate": 8.845829145728643e-05, - "loss": 5.7973, - "step": 11987 - }, - { - "epoch": 6.251890482398957, - "grad_norm": 1.589269995689392, - "learning_rate": 8.845728643216081e-05, - "loss": 4.9596, - "step": 11988 - }, - { - "epoch": 6.252411994784876, - "grad_norm": 1.4082505702972412, - "learning_rate": 8.845628140703517e-05, - "loss": 5.6044, - "step": 11989 - }, - { - "epoch": 6.252933507170796, - "grad_norm": 1.5215415954589844, - "learning_rate": 8.845527638190955e-05, - "loss": 5.4683, - "step": 11990 - }, - { - "epoch": 6.253455019556714, - "grad_norm": 1.410811185836792, - "learning_rate": 8.845427135678393e-05, - "loss": 5.2327, - "step": 11991 - }, - { - "epoch": 6.253976531942634, - "grad_norm": 1.560035228729248, - "learning_rate": 8.84532663316583e-05, - "loss": 5.6108, - "step": 11992 - }, - { - "epoch": 6.254498044328553, - "grad_norm": 1.3670291900634766, - "learning_rate": 8.845226130653267e-05, - "loss": 5.8912, - "step": 11993 - }, - { - "epoch": 6.255019556714472, - "grad_norm": 1.5167073011398315, - "learning_rate": 8.845125628140704e-05, - "loss": 5.4399, - "step": 11994 - }, - { - "epoch": 6.255541069100391, - "grad_norm": 1.4532827138900757, - "learning_rate": 8.845025125628141e-05, - "loss": 5.3466, - "step": 11995 - }, - { - "epoch": 6.256062581486311, - "grad_norm": 1.3444067239761353, - "learning_rate": 8.844924623115578e-05, - "loss": 6.0429, - "step": 11996 - }, - { - "epoch": 6.256584093872229, - "grad_norm": 1.4738945960998535, - "learning_rate": 8.844824120603016e-05, - "loss": 5.4488, - "step": 11997 - }, - { - "epoch": 6.257105606258149, - "grad_norm": 1.4280755519866943, - "learning_rate": 8.844723618090452e-05, - "loss": 5.5526, - "step": 11998 - }, - { - "epoch": 6.257627118644068, - "grad_norm": 1.3474198579788208, - "learning_rate": 8.84462311557789e-05, - "loss": 5.5838, - "step": 11999 - }, - { - "epoch": 6.258148631029987, - "grad_norm": 1.6222712993621826, - "learning_rate": 8.844522613065326e-05, - "loss": 5.5771, - "step": 12000 - }, - { - "epoch": 6.258670143415906, - "grad_norm": 1.4202916622161865, - "learning_rate": 8.844422110552764e-05, - "loss": 5.3481, - "step": 12001 - }, - { - "epoch": 6.259191655801826, - "grad_norm": 1.4208847284317017, - "learning_rate": 8.844321608040202e-05, - "loss": 5.7071, - "step": 12002 - }, - { - "epoch": 6.259713168187744, - "grad_norm": 1.4509614706039429, - "learning_rate": 8.84422110552764e-05, - "loss": 5.801, - "step": 12003 - }, - { - "epoch": 6.260234680573664, - "grad_norm": 1.450060486793518, - "learning_rate": 8.844120603015076e-05, - "loss": 5.3519, - "step": 12004 - }, - { - "epoch": 6.260756192959583, - "grad_norm": 1.5493963956832886, - "learning_rate": 8.844020100502512e-05, - "loss": 5.0173, - "step": 12005 - }, - { - "epoch": 6.261277705345502, - "grad_norm": 1.6297320127487183, - "learning_rate": 8.84391959798995e-05, - "loss": 5.4839, - "step": 12006 - }, - { - "epoch": 6.261799217731421, - "grad_norm": 1.5515103340148926, - "learning_rate": 8.843819095477387e-05, - "loss": 5.6686, - "step": 12007 - }, - { - "epoch": 6.262320730117341, - "grad_norm": 1.5418565273284912, - "learning_rate": 8.843718592964824e-05, - "loss": 5.6546, - "step": 12008 - }, - { - "epoch": 6.262842242503259, - "grad_norm": 1.4638097286224365, - "learning_rate": 8.843618090452261e-05, - "loss": 5.5552, - "step": 12009 - }, - { - "epoch": 6.263363754889179, - "grad_norm": 1.5352773666381836, - "learning_rate": 8.843517587939699e-05, - "loss": 5.4533, - "step": 12010 - }, - { - "epoch": 6.263885267275098, - "grad_norm": 1.5890592336654663, - "learning_rate": 8.843417085427136e-05, - "loss": 5.6646, - "step": 12011 - }, - { - "epoch": 6.264406779661017, - "grad_norm": 1.627415657043457, - "learning_rate": 8.843316582914574e-05, - "loss": 5.4101, - "step": 12012 - }, - { - "epoch": 6.264928292046936, - "grad_norm": 1.501802682876587, - "learning_rate": 8.84321608040201e-05, - "loss": 5.7791, - "step": 12013 - }, - { - "epoch": 6.265449804432855, - "grad_norm": 1.5207936763763428, - "learning_rate": 8.843115577889448e-05, - "loss": 5.2369, - "step": 12014 - }, - { - "epoch": 6.265971316818774, - "grad_norm": 1.3493332862854004, - "learning_rate": 8.843015075376885e-05, - "loss": 5.9268, - "step": 12015 - }, - { - "epoch": 6.266492829204694, - "grad_norm": 1.3398523330688477, - "learning_rate": 8.842914572864323e-05, - "loss": 5.7881, - "step": 12016 - }, - { - "epoch": 6.267014341590613, - "grad_norm": 1.3919600248336792, - "learning_rate": 8.842814070351759e-05, - "loss": 5.8231, - "step": 12017 - }, - { - "epoch": 6.267535853976532, - "grad_norm": 1.4427608251571655, - "learning_rate": 8.842713567839196e-05, - "loss": 5.6764, - "step": 12018 - }, - { - "epoch": 6.268057366362451, - "grad_norm": 1.4395346641540527, - "learning_rate": 8.842613065326633e-05, - "loss": 5.8923, - "step": 12019 - }, - { - "epoch": 6.26857887874837, - "grad_norm": 1.5666147470474243, - "learning_rate": 8.84251256281407e-05, - "loss": 5.5648, - "step": 12020 - }, - { - "epoch": 6.269100391134289, - "grad_norm": 1.4631036520004272, - "learning_rate": 8.842412060301508e-05, - "loss": 5.3906, - "step": 12021 - }, - { - "epoch": 6.269621903520209, - "grad_norm": 1.498826503753662, - "learning_rate": 8.842311557788945e-05, - "loss": 6.0368, - "step": 12022 - }, - { - "epoch": 6.270143415906128, - "grad_norm": 1.4078971147537231, - "learning_rate": 8.842211055276383e-05, - "loss": 5.443, - "step": 12023 - }, - { - "epoch": 6.270664928292047, - "grad_norm": 1.3704824447631836, - "learning_rate": 8.84211055276382e-05, - "loss": 5.9619, - "step": 12024 - }, - { - "epoch": 6.271186440677966, - "grad_norm": 1.4154819250106812, - "learning_rate": 8.842010050251257e-05, - "loss": 5.7941, - "step": 12025 - }, - { - "epoch": 6.271707953063885, - "grad_norm": 1.5781630277633667, - "learning_rate": 8.841909547738694e-05, - "loss": 5.4478, - "step": 12026 - }, - { - "epoch": 6.272229465449804, - "grad_norm": 1.4118572473526, - "learning_rate": 8.841809045226131e-05, - "loss": 5.4527, - "step": 12027 - }, - { - "epoch": 6.272750977835724, - "grad_norm": 1.3601629734039307, - "learning_rate": 8.841708542713568e-05, - "loss": 5.2582, - "step": 12028 - }, - { - "epoch": 6.273272490221642, - "grad_norm": 1.5208673477172852, - "learning_rate": 8.841608040201006e-05, - "loss": 5.4067, - "step": 12029 - }, - { - "epoch": 6.273794002607562, - "grad_norm": 1.55955171585083, - "learning_rate": 8.841507537688442e-05, - "loss": 5.2712, - "step": 12030 - }, - { - "epoch": 6.274315514993481, - "grad_norm": 1.4853382110595703, - "learning_rate": 8.84140703517588e-05, - "loss": 5.2547, - "step": 12031 - }, - { - "epoch": 6.2748370273794, - "grad_norm": 1.4155550003051758, - "learning_rate": 8.841306532663318e-05, - "loss": 5.7208, - "step": 12032 - }, - { - "epoch": 6.275358539765319, - "grad_norm": 1.453655481338501, - "learning_rate": 8.841206030150754e-05, - "loss": 5.2229, - "step": 12033 - }, - { - "epoch": 6.275880052151239, - "grad_norm": 1.3679389953613281, - "learning_rate": 8.841105527638192e-05, - "loss": 5.8292, - "step": 12034 - }, - { - "epoch": 6.276401564537157, - "grad_norm": 1.4981225728988647, - "learning_rate": 8.841005025125628e-05, - "loss": 5.6897, - "step": 12035 - }, - { - "epoch": 6.276923076923077, - "grad_norm": 1.3583247661590576, - "learning_rate": 8.840904522613066e-05, - "loss": 5.469, - "step": 12036 - }, - { - "epoch": 6.277444589308996, - "grad_norm": 1.4579613208770752, - "learning_rate": 8.840804020100503e-05, - "loss": 5.0794, - "step": 12037 - }, - { - "epoch": 6.277966101694915, - "grad_norm": 1.503066062927246, - "learning_rate": 8.84070351758794e-05, - "loss": 5.804, - "step": 12038 - }, - { - "epoch": 6.278487614080834, - "grad_norm": 1.3480857610702515, - "learning_rate": 8.840603015075377e-05, - "loss": 5.6839, - "step": 12039 - }, - { - "epoch": 6.279009126466754, - "grad_norm": 1.3260538578033447, - "learning_rate": 8.840502512562815e-05, - "loss": 5.8841, - "step": 12040 - }, - { - "epoch": 6.279530638852672, - "grad_norm": 1.2928334474563599, - "learning_rate": 8.840402010050251e-05, - "loss": 5.7839, - "step": 12041 - }, - { - "epoch": 6.280052151238592, - "grad_norm": 1.4295276403427124, - "learning_rate": 8.840301507537689e-05, - "loss": 5.8267, - "step": 12042 - }, - { - "epoch": 6.280573663624511, - "grad_norm": 1.4535051584243774, - "learning_rate": 8.840201005025127e-05, - "loss": 5.0047, - "step": 12043 - }, - { - "epoch": 6.28109517601043, - "grad_norm": 1.391901969909668, - "learning_rate": 8.840100502512563e-05, - "loss": 5.6807, - "step": 12044 - }, - { - "epoch": 6.281616688396349, - "grad_norm": 1.4072856903076172, - "learning_rate": 8.840000000000001e-05, - "loss": 5.7186, - "step": 12045 - }, - { - "epoch": 6.282138200782269, - "grad_norm": 1.4760102033615112, - "learning_rate": 8.839899497487437e-05, - "loss": 5.6033, - "step": 12046 - }, - { - "epoch": 6.282659713168187, - "grad_norm": 1.4833502769470215, - "learning_rate": 8.839798994974875e-05, - "loss": 5.8992, - "step": 12047 - }, - { - "epoch": 6.283181225554107, - "grad_norm": 1.4219847917556763, - "learning_rate": 8.839698492462311e-05, - "loss": 5.8911, - "step": 12048 - }, - { - "epoch": 6.283702737940026, - "grad_norm": 1.5908845663070679, - "learning_rate": 8.839597989949749e-05, - "loss": 5.6855, - "step": 12049 - }, - { - "epoch": 6.284224250325945, - "grad_norm": 1.4310568571090698, - "learning_rate": 8.839497487437186e-05, - "loss": 5.4337, - "step": 12050 - }, - { - "epoch": 6.284745762711864, - "grad_norm": 1.5826183557510376, - "learning_rate": 8.839396984924623e-05, - "loss": 5.6392, - "step": 12051 - }, - { - "epoch": 6.285267275097784, - "grad_norm": 1.4675840139389038, - "learning_rate": 8.839296482412061e-05, - "loss": 5.4086, - "step": 12052 - }, - { - "epoch": 6.285788787483702, - "grad_norm": 1.5819295644760132, - "learning_rate": 8.839195979899499e-05, - "loss": 5.3551, - "step": 12053 - }, - { - "epoch": 6.286310299869622, - "grad_norm": 1.3784035444259644, - "learning_rate": 8.839095477386935e-05, - "loss": 5.4538, - "step": 12054 - }, - { - "epoch": 6.286831812255541, - "grad_norm": 1.453951120376587, - "learning_rate": 8.838994974874373e-05, - "loss": 5.1917, - "step": 12055 - }, - { - "epoch": 6.28735332464146, - "grad_norm": 1.4149829149246216, - "learning_rate": 8.83889447236181e-05, - "loss": 5.8249, - "step": 12056 - }, - { - "epoch": 6.287874837027379, - "grad_norm": 1.307054042816162, - "learning_rate": 8.838793969849246e-05, - "loss": 5.9435, - "step": 12057 - }, - { - "epoch": 6.288396349413299, - "grad_norm": 1.3327168226242065, - "learning_rate": 8.838693467336684e-05, - "loss": 5.3628, - "step": 12058 - }, - { - "epoch": 6.288917861799217, - "grad_norm": 1.4397270679473877, - "learning_rate": 8.83859296482412e-05, - "loss": 5.5649, - "step": 12059 - }, - { - "epoch": 6.289439374185137, - "grad_norm": 1.378709316253662, - "learning_rate": 8.838492462311558e-05, - "loss": 5.8172, - "step": 12060 - }, - { - "epoch": 6.289960886571056, - "grad_norm": 1.4211546182632446, - "learning_rate": 8.838391959798994e-05, - "loss": 5.4336, - "step": 12061 - }, - { - "epoch": 6.290482398956975, - "grad_norm": 1.3965972661972046, - "learning_rate": 8.838291457286432e-05, - "loss": 5.4897, - "step": 12062 - }, - { - "epoch": 6.291003911342894, - "grad_norm": 1.4729199409484863, - "learning_rate": 8.83819095477387e-05, - "loss": 5.2197, - "step": 12063 - }, - { - "epoch": 6.291525423728814, - "grad_norm": 1.5061179399490356, - "learning_rate": 8.838090452261308e-05, - "loss": 5.4361, - "step": 12064 - }, - { - "epoch": 6.292046936114732, - "grad_norm": 1.4339280128479004, - "learning_rate": 8.837989949748744e-05, - "loss": 5.6993, - "step": 12065 - }, - { - "epoch": 6.292568448500652, - "grad_norm": 1.4180926084518433, - "learning_rate": 8.837889447236182e-05, - "loss": 5.4446, - "step": 12066 - }, - { - "epoch": 6.293089960886571, - "grad_norm": 1.3643224239349365, - "learning_rate": 8.837788944723618e-05, - "loss": 4.8061, - "step": 12067 - }, - { - "epoch": 6.29361147327249, - "grad_norm": 1.5898480415344238, - "learning_rate": 8.837688442211056e-05, - "loss": 5.2628, - "step": 12068 - }, - { - "epoch": 6.294132985658409, - "grad_norm": 1.377640962600708, - "learning_rate": 8.837587939698493e-05, - "loss": 5.5901, - "step": 12069 - }, - { - "epoch": 6.294654498044329, - "grad_norm": 1.373678207397461, - "learning_rate": 8.837487437185929e-05, - "loss": 5.984, - "step": 12070 - }, - { - "epoch": 6.295176010430247, - "grad_norm": 1.59443199634552, - "learning_rate": 8.837386934673367e-05, - "loss": 4.9514, - "step": 12071 - }, - { - "epoch": 6.295697522816167, - "grad_norm": 1.4677097797393799, - "learning_rate": 8.837286432160805e-05, - "loss": 5.7602, - "step": 12072 - }, - { - "epoch": 6.296219035202086, - "grad_norm": 1.4832299947738647, - "learning_rate": 8.837185929648242e-05, - "loss": 4.8366, - "step": 12073 - }, - { - "epoch": 6.296740547588005, - "grad_norm": 1.3001480102539062, - "learning_rate": 8.837085427135679e-05, - "loss": 5.9922, - "step": 12074 - }, - { - "epoch": 6.297262059973924, - "grad_norm": 1.5553359985351562, - "learning_rate": 8.836984924623117e-05, - "loss": 5.2948, - "step": 12075 - }, - { - "epoch": 6.297783572359844, - "grad_norm": 1.4530389308929443, - "learning_rate": 8.836884422110553e-05, - "loss": 5.8257, - "step": 12076 - }, - { - "epoch": 6.2983050847457624, - "grad_norm": 1.4759005308151245, - "learning_rate": 8.836783919597991e-05, - "loss": 5.6376, - "step": 12077 - }, - { - "epoch": 6.298826597131682, - "grad_norm": 1.3651950359344482, - "learning_rate": 8.836683417085427e-05, - "loss": 5.5992, - "step": 12078 - }, - { - "epoch": 6.299348109517601, - "grad_norm": 1.5011557340621948, - "learning_rate": 8.836582914572865e-05, - "loss": 5.7003, - "step": 12079 - }, - { - "epoch": 6.29986962190352, - "grad_norm": 1.3884644508361816, - "learning_rate": 8.836482412060301e-05, - "loss": 5.9019, - "step": 12080 - }, - { - "epoch": 6.300391134289439, - "grad_norm": 1.4578614234924316, - "learning_rate": 8.836381909547739e-05, - "loss": 5.9988, - "step": 12081 - }, - { - "epoch": 6.300912646675359, - "grad_norm": 1.5090234279632568, - "learning_rate": 8.836281407035176e-05, - "loss": 5.5877, - "step": 12082 - }, - { - "epoch": 6.3014341590612775, - "grad_norm": 1.4326368570327759, - "learning_rate": 8.836180904522613e-05, - "loss": 5.8152, - "step": 12083 - }, - { - "epoch": 6.301955671447197, - "grad_norm": 1.7344613075256348, - "learning_rate": 8.836080402010051e-05, - "loss": 5.0787, - "step": 12084 - }, - { - "epoch": 6.302477183833116, - "grad_norm": 2.0626323223114014, - "learning_rate": 8.835979899497488e-05, - "loss": 4.6779, - "step": 12085 - }, - { - "epoch": 6.302998696219035, - "grad_norm": 1.5382282733917236, - "learning_rate": 8.835879396984925e-05, - "loss": 5.6686, - "step": 12086 - }, - { - "epoch": 6.303520208604954, - "grad_norm": 1.5743306875228882, - "learning_rate": 8.835778894472362e-05, - "loss": 5.3387, - "step": 12087 - }, - { - "epoch": 6.304041720990874, - "grad_norm": 1.3784205913543701, - "learning_rate": 8.8356783919598e-05, - "loss": 5.8669, - "step": 12088 - }, - { - "epoch": 6.3045632333767925, - "grad_norm": 1.3935288190841675, - "learning_rate": 8.835577889447236e-05, - "loss": 5.8338, - "step": 12089 - }, - { - "epoch": 6.305084745762712, - "grad_norm": 1.533415675163269, - "learning_rate": 8.835477386934674e-05, - "loss": 5.5201, - "step": 12090 - }, - { - "epoch": 6.305606258148631, - "grad_norm": 1.4763392210006714, - "learning_rate": 8.83537688442211e-05, - "loss": 5.4968, - "step": 12091 - }, - { - "epoch": 6.30612777053455, - "grad_norm": 1.3943382501602173, - "learning_rate": 8.835276381909548e-05, - "loss": 5.8321, - "step": 12092 - }, - { - "epoch": 6.3066492829204694, - "grad_norm": 1.5169154405593872, - "learning_rate": 8.835175879396986e-05, - "loss": 5.7063, - "step": 12093 - }, - { - "epoch": 6.307170795306389, - "grad_norm": 1.3771291971206665, - "learning_rate": 8.835075376884424e-05, - "loss": 5.9224, - "step": 12094 - }, - { - "epoch": 6.3076923076923075, - "grad_norm": 1.5625802278518677, - "learning_rate": 8.83497487437186e-05, - "loss": 5.6444, - "step": 12095 - }, - { - "epoch": 6.308213820078227, - "grad_norm": 1.4044710397720337, - "learning_rate": 8.834874371859298e-05, - "loss": 5.628, - "step": 12096 - }, - { - "epoch": 6.308735332464146, - "grad_norm": 1.3922559022903442, - "learning_rate": 8.834773869346734e-05, - "loss": 5.8079, - "step": 12097 - }, - { - "epoch": 6.309256844850065, - "grad_norm": 1.3689286708831787, - "learning_rate": 8.834673366834171e-05, - "loss": 5.9705, - "step": 12098 - }, - { - "epoch": 6.3097783572359845, - "grad_norm": 1.4325518608093262, - "learning_rate": 8.834572864321608e-05, - "loss": 5.7138, - "step": 12099 - }, - { - "epoch": 6.310299869621904, - "grad_norm": 1.475907325744629, - "learning_rate": 8.834472361809045e-05, - "loss": 5.4447, - "step": 12100 - }, - { - "epoch": 6.3108213820078225, - "grad_norm": 1.5030018091201782, - "learning_rate": 8.834371859296483e-05, - "loss": 5.1858, - "step": 12101 - }, - { - "epoch": 6.311342894393742, - "grad_norm": 1.2887983322143555, - "learning_rate": 8.834271356783919e-05, - "loss": 5.8435, - "step": 12102 - }, - { - "epoch": 6.311864406779661, - "grad_norm": 1.4396398067474365, - "learning_rate": 8.834170854271357e-05, - "loss": 5.5629, - "step": 12103 - }, - { - "epoch": 6.31238591916558, - "grad_norm": 1.5048037767410278, - "learning_rate": 8.834070351758795e-05, - "loss": 5.7272, - "step": 12104 - }, - { - "epoch": 6.3129074315514995, - "grad_norm": 1.51114821434021, - "learning_rate": 8.833969849246232e-05, - "loss": 5.825, - "step": 12105 - }, - { - "epoch": 6.313428943937419, - "grad_norm": 1.3946329355239868, - "learning_rate": 8.833869346733669e-05, - "loss": 5.8396, - "step": 12106 - }, - { - "epoch": 6.3139504563233375, - "grad_norm": 1.3331453800201416, - "learning_rate": 8.833768844221107e-05, - "loss": 5.8089, - "step": 12107 - }, - { - "epoch": 6.314471968709257, - "grad_norm": 1.3997777700424194, - "learning_rate": 8.833668341708543e-05, - "loss": 5.8215, - "step": 12108 - }, - { - "epoch": 6.3149934810951756, - "grad_norm": 1.4033890962600708, - "learning_rate": 8.833567839195981e-05, - "loss": 5.5975, - "step": 12109 - }, - { - "epoch": 6.315514993481095, - "grad_norm": 1.5121886730194092, - "learning_rate": 8.833467336683417e-05, - "loss": 5.6423, - "step": 12110 - }, - { - "epoch": 6.3160365058670145, - "grad_norm": 1.6517807245254517, - "learning_rate": 8.833366834170854e-05, - "loss": 5.2433, - "step": 12111 - }, - { - "epoch": 6.316558018252934, - "grad_norm": 1.4465112686157227, - "learning_rate": 8.833266331658292e-05, - "loss": 5.7615, - "step": 12112 - }, - { - "epoch": 6.3170795306388525, - "grad_norm": 1.4385017156600952, - "learning_rate": 8.833165829145729e-05, - "loss": 5.5185, - "step": 12113 - }, - { - "epoch": 6.317601043024772, - "grad_norm": 1.3362377882003784, - "learning_rate": 8.833065326633167e-05, - "loss": 5.5966, - "step": 12114 - }, - { - "epoch": 6.318122555410691, - "grad_norm": 1.467989444732666, - "learning_rate": 8.832964824120604e-05, - "loss": 5.769, - "step": 12115 - }, - { - "epoch": 6.31864406779661, - "grad_norm": 1.2873549461364746, - "learning_rate": 8.832864321608041e-05, - "loss": 5.8695, - "step": 12116 - }, - { - "epoch": 6.3191655801825295, - "grad_norm": 1.3832297325134277, - "learning_rate": 8.832763819095478e-05, - "loss": 5.4459, - "step": 12117 - }, - { - "epoch": 6.319687092568449, - "grad_norm": 1.3490833044052124, - "learning_rate": 8.832663316582916e-05, - "loss": 5.9918, - "step": 12118 - }, - { - "epoch": 6.3202086049543675, - "grad_norm": 1.6328246593475342, - "learning_rate": 8.832562814070352e-05, - "loss": 5.508, - "step": 12119 - }, - { - "epoch": 6.320730117340287, - "grad_norm": 1.3724193572998047, - "learning_rate": 8.83246231155779e-05, - "loss": 5.6168, - "step": 12120 - }, - { - "epoch": 6.321251629726206, - "grad_norm": 1.4536322355270386, - "learning_rate": 8.832361809045226e-05, - "loss": 5.013, - "step": 12121 - }, - { - "epoch": 6.321773142112125, - "grad_norm": 1.5350182056427002, - "learning_rate": 8.832261306532664e-05, - "loss": 5.1548, - "step": 12122 - }, - { - "epoch": 6.3222946544980445, - "grad_norm": 1.5707682371139526, - "learning_rate": 8.8321608040201e-05, - "loss": 5.1853, - "step": 12123 - }, - { - "epoch": 6.322816166883963, - "grad_norm": 1.5580750703811646, - "learning_rate": 8.832060301507538e-05, - "loss": 5.2579, - "step": 12124 - }, - { - "epoch": 6.3233376792698825, - "grad_norm": 1.3891470432281494, - "learning_rate": 8.831959798994976e-05, - "loss": 5.8914, - "step": 12125 - }, - { - "epoch": 6.323859191655802, - "grad_norm": 1.4920660257339478, - "learning_rate": 8.831859296482412e-05, - "loss": 5.503, - "step": 12126 - }, - { - "epoch": 6.324380704041721, - "grad_norm": 1.4493019580841064, - "learning_rate": 8.83175879396985e-05, - "loss": 5.5421, - "step": 12127 - }, - { - "epoch": 6.32490221642764, - "grad_norm": 1.3207662105560303, - "learning_rate": 8.831658291457287e-05, - "loss": 6.0284, - "step": 12128 - }, - { - "epoch": 6.3254237288135595, - "grad_norm": 1.3509985208511353, - "learning_rate": 8.831557788944724e-05, - "loss": 5.6847, - "step": 12129 - }, - { - "epoch": 6.325945241199478, - "grad_norm": 1.4327774047851562, - "learning_rate": 8.831457286432161e-05, - "loss": 5.5769, - "step": 12130 - }, - { - "epoch": 6.326466753585398, - "grad_norm": 1.3275338411331177, - "learning_rate": 8.831356783919599e-05, - "loss": 5.6197, - "step": 12131 - }, - { - "epoch": 6.326988265971317, - "grad_norm": 1.440687656402588, - "learning_rate": 8.831256281407035e-05, - "loss": 5.3316, - "step": 12132 - }, - { - "epoch": 6.327509778357236, - "grad_norm": 1.3027369976043701, - "learning_rate": 8.831155778894473e-05, - "loss": 6.0799, - "step": 12133 - }, - { - "epoch": 6.328031290743155, - "grad_norm": 1.628475546836853, - "learning_rate": 8.83105527638191e-05, - "loss": 5.1924, - "step": 12134 - }, - { - "epoch": 6.3285528031290745, - "grad_norm": 1.344660758972168, - "learning_rate": 8.830954773869348e-05, - "loss": 5.9238, - "step": 12135 - }, - { - "epoch": 6.329074315514993, - "grad_norm": 1.2707737684249878, - "learning_rate": 8.830854271356785e-05, - "loss": 5.9772, - "step": 12136 - }, - { - "epoch": 6.329595827900913, - "grad_norm": 1.6080198287963867, - "learning_rate": 8.830753768844221e-05, - "loss": 5.4598, - "step": 12137 - }, - { - "epoch": 6.330117340286832, - "grad_norm": 1.6154594421386719, - "learning_rate": 8.830653266331659e-05, - "loss": 5.2443, - "step": 12138 - }, - { - "epoch": 6.330638852672751, - "grad_norm": 1.4336411952972412, - "learning_rate": 8.830552763819095e-05, - "loss": 5.8761, - "step": 12139 - }, - { - "epoch": 6.33116036505867, - "grad_norm": 1.6947381496429443, - "learning_rate": 8.830452261306533e-05, - "loss": 5.4769, - "step": 12140 - }, - { - "epoch": 6.3316818774445895, - "grad_norm": 1.4000518321990967, - "learning_rate": 8.83035175879397e-05, - "loss": 5.8468, - "step": 12141 - }, - { - "epoch": 6.332203389830508, - "grad_norm": 1.4407309293746948, - "learning_rate": 8.830251256281407e-05, - "loss": 5.6561, - "step": 12142 - }, - { - "epoch": 6.332724902216428, - "grad_norm": 1.4863505363464355, - "learning_rate": 8.830150753768844e-05, - "loss": 5.4698, - "step": 12143 - }, - { - "epoch": 6.333246414602347, - "grad_norm": 1.5159229040145874, - "learning_rate": 8.830050251256282e-05, - "loss": 5.6421, - "step": 12144 - }, - { - "epoch": 6.333767926988266, - "grad_norm": 1.378055214881897, - "learning_rate": 8.82994974874372e-05, - "loss": 5.9447, - "step": 12145 - }, - { - "epoch": 6.334289439374185, - "grad_norm": 1.3307565450668335, - "learning_rate": 8.829849246231157e-05, - "loss": 5.6431, - "step": 12146 - }, - { - "epoch": 6.334810951760105, - "grad_norm": 1.526389718055725, - "learning_rate": 8.829748743718594e-05, - "loss": 5.5554, - "step": 12147 - }, - { - "epoch": 6.335332464146023, - "grad_norm": 1.4343065023422241, - "learning_rate": 8.829648241206031e-05, - "loss": 5.4554, - "step": 12148 - }, - { - "epoch": 6.335853976531943, - "grad_norm": 1.3296794891357422, - "learning_rate": 8.829547738693468e-05, - "loss": 5.9344, - "step": 12149 - }, - { - "epoch": 6.336375488917862, - "grad_norm": 1.435777187347412, - "learning_rate": 8.829447236180904e-05, - "loss": 5.6421, - "step": 12150 - }, - { - "epoch": 6.336897001303781, - "grad_norm": 1.3877465724945068, - "learning_rate": 8.829346733668342e-05, - "loss": 5.5561, - "step": 12151 - }, - { - "epoch": 6.3374185136897, - "grad_norm": 1.5047132968902588, - "learning_rate": 8.829246231155778e-05, - "loss": 5.3564, - "step": 12152 - }, - { - "epoch": 6.33794002607562, - "grad_norm": 1.3505843877792358, - "learning_rate": 8.829145728643216e-05, - "loss": 5.6948, - "step": 12153 - }, - { - "epoch": 6.338461538461538, - "grad_norm": 1.330729603767395, - "learning_rate": 8.829045226130653e-05, - "loss": 5.994, - "step": 12154 - }, - { - "epoch": 6.338983050847458, - "grad_norm": 1.5584123134613037, - "learning_rate": 8.82894472361809e-05, - "loss": 5.4732, - "step": 12155 - }, - { - "epoch": 6.339504563233377, - "grad_norm": 1.5374057292938232, - "learning_rate": 8.828844221105528e-05, - "loss": 5.5678, - "step": 12156 - }, - { - "epoch": 6.340026075619296, - "grad_norm": 1.4868539571762085, - "learning_rate": 8.828743718592966e-05, - "loss": 5.2561, - "step": 12157 - }, - { - "epoch": 6.340547588005215, - "grad_norm": 1.3896902799606323, - "learning_rate": 8.828643216080402e-05, - "loss": 5.3905, - "step": 12158 - }, - { - "epoch": 6.341069100391135, - "grad_norm": 1.3424299955368042, - "learning_rate": 8.82854271356784e-05, - "loss": 5.7307, - "step": 12159 - }, - { - "epoch": 6.341590612777053, - "grad_norm": 1.4135481119155884, - "learning_rate": 8.828442211055277e-05, - "loss": 5.763, - "step": 12160 - }, - { - "epoch": 6.342112125162973, - "grad_norm": 1.558407187461853, - "learning_rate": 8.828341708542714e-05, - "loss": 5.3588, - "step": 12161 - }, - { - "epoch": 6.342633637548892, - "grad_norm": 1.5697002410888672, - "learning_rate": 8.828241206030151e-05, - "loss": 5.3563, - "step": 12162 - }, - { - "epoch": 6.343155149934811, - "grad_norm": 1.4083223342895508, - "learning_rate": 8.828140703517587e-05, - "loss": 5.9154, - "step": 12163 - }, - { - "epoch": 6.34367666232073, - "grad_norm": 1.7371326684951782, - "learning_rate": 8.828040201005025e-05, - "loss": 5.8864, - "step": 12164 - }, - { - "epoch": 6.34419817470665, - "grad_norm": 1.6310065984725952, - "learning_rate": 8.827939698492463e-05, - "loss": 5.8073, - "step": 12165 - }, - { - "epoch": 6.344719687092568, - "grad_norm": 1.528121829032898, - "learning_rate": 8.8278391959799e-05, - "loss": 5.359, - "step": 12166 - }, - { - "epoch": 6.345241199478488, - "grad_norm": 1.533721923828125, - "learning_rate": 8.827738693467337e-05, - "loss": 5.5962, - "step": 12167 - }, - { - "epoch": 6.345762711864407, - "grad_norm": 1.4835429191589355, - "learning_rate": 8.827638190954775e-05, - "loss": 5.6752, - "step": 12168 - }, - { - "epoch": 6.346284224250326, - "grad_norm": 1.4723503589630127, - "learning_rate": 8.827537688442211e-05, - "loss": 5.6876, - "step": 12169 - }, - { - "epoch": 6.346805736636245, - "grad_norm": 1.506423830986023, - "learning_rate": 8.827437185929649e-05, - "loss": 5.0883, - "step": 12170 - }, - { - "epoch": 6.347327249022165, - "grad_norm": 1.251632809638977, - "learning_rate": 8.827336683417085e-05, - "loss": 5.9783, - "step": 12171 - }, - { - "epoch": 6.347848761408083, - "grad_norm": 1.4227330684661865, - "learning_rate": 8.827236180904523e-05, - "loss": 6.0599, - "step": 12172 - }, - { - "epoch": 6.348370273794003, - "grad_norm": 1.3502322435379028, - "learning_rate": 8.82713567839196e-05, - "loss": 5.4769, - "step": 12173 - }, - { - "epoch": 6.348891786179922, - "grad_norm": 1.4116591215133667, - "learning_rate": 8.827035175879397e-05, - "loss": 5.4481, - "step": 12174 - }, - { - "epoch": 6.349413298565841, - "grad_norm": 1.4646697044372559, - "learning_rate": 8.826934673366834e-05, - "loss": 5.6691, - "step": 12175 - }, - { - "epoch": 6.34993481095176, - "grad_norm": 1.4485068321228027, - "learning_rate": 8.826834170854272e-05, - "loss": 5.4802, - "step": 12176 - }, - { - "epoch": 6.35045632333768, - "grad_norm": 1.4672069549560547, - "learning_rate": 8.82673366834171e-05, - "loss": 5.5141, - "step": 12177 - }, - { - "epoch": 6.350977835723598, - "grad_norm": 1.296041488647461, - "learning_rate": 8.826633165829146e-05, - "loss": 5.8917, - "step": 12178 - }, - { - "epoch": 6.351499348109518, - "grad_norm": 1.480319857597351, - "learning_rate": 8.826532663316584e-05, - "loss": 5.8102, - "step": 12179 - }, - { - "epoch": 6.352020860495437, - "grad_norm": 1.5142804384231567, - "learning_rate": 8.82643216080402e-05, - "loss": 5.5116, - "step": 12180 - }, - { - "epoch": 6.352542372881356, - "grad_norm": 1.4915592670440674, - "learning_rate": 8.826331658291458e-05, - "loss": 5.2372, - "step": 12181 - }, - { - "epoch": 6.353063885267275, - "grad_norm": 1.5487685203552246, - "learning_rate": 8.826231155778894e-05, - "loss": 4.7799, - "step": 12182 - }, - { - "epoch": 6.353585397653195, - "grad_norm": 1.3946934938430786, - "learning_rate": 8.826130653266332e-05, - "loss": 5.6234, - "step": 12183 - }, - { - "epoch": 6.354106910039113, - "grad_norm": 1.5801653861999512, - "learning_rate": 8.826030150753769e-05, - "loss": 5.3972, - "step": 12184 - }, - { - "epoch": 6.354628422425033, - "grad_norm": 1.4909510612487793, - "learning_rate": 8.825929648241206e-05, - "loss": 5.3546, - "step": 12185 - }, - { - "epoch": 6.355149934810952, - "grad_norm": 1.4385992288589478, - "learning_rate": 8.825829145728644e-05, - "loss": 5.29, - "step": 12186 - }, - { - "epoch": 6.355671447196871, - "grad_norm": 1.4090741872787476, - "learning_rate": 8.825728643216082e-05, - "loss": 5.737, - "step": 12187 - }, - { - "epoch": 6.35619295958279, - "grad_norm": 1.4292742013931274, - "learning_rate": 8.825628140703518e-05, - "loss": 4.9245, - "step": 12188 - }, - { - "epoch": 6.35671447196871, - "grad_norm": 1.4844692945480347, - "learning_rate": 8.825527638190956e-05, - "loss": 5.7668, - "step": 12189 - }, - { - "epoch": 6.357235984354628, - "grad_norm": 1.4666094779968262, - "learning_rate": 8.825427135678393e-05, - "loss": 5.5606, - "step": 12190 - }, - { - "epoch": 6.357757496740548, - "grad_norm": 1.4430983066558838, - "learning_rate": 8.825326633165829e-05, - "loss": 5.5265, - "step": 12191 - }, - { - "epoch": 6.358279009126467, - "grad_norm": 1.4285188913345337, - "learning_rate": 8.825226130653267e-05, - "loss": 5.7811, - "step": 12192 - }, - { - "epoch": 6.358800521512386, - "grad_norm": 1.4459666013717651, - "learning_rate": 8.825125628140703e-05, - "loss": 5.7705, - "step": 12193 - }, - { - "epoch": 6.359322033898305, - "grad_norm": 1.3884646892547607, - "learning_rate": 8.825025125628141e-05, - "loss": 6.0039, - "step": 12194 - }, - { - "epoch": 6.359843546284225, - "grad_norm": 1.3178671598434448, - "learning_rate": 8.824924623115577e-05, - "loss": 5.9169, - "step": 12195 - }, - { - "epoch": 6.360365058670143, - "grad_norm": 1.510746717453003, - "learning_rate": 8.824824120603015e-05, - "loss": 5.4208, - "step": 12196 - }, - { - "epoch": 6.360886571056063, - "grad_norm": 1.4888139963150024, - "learning_rate": 8.824723618090453e-05, - "loss": 5.6368, - "step": 12197 - }, - { - "epoch": 6.361408083441981, - "grad_norm": 1.3968088626861572, - "learning_rate": 8.824623115577891e-05, - "loss": 5.4919, - "step": 12198 - }, - { - "epoch": 6.361929595827901, - "grad_norm": 1.405064344406128, - "learning_rate": 8.824522613065327e-05, - "loss": 5.4922, - "step": 12199 - }, - { - "epoch": 6.36245110821382, - "grad_norm": 2.084150791168213, - "learning_rate": 8.824422110552765e-05, - "loss": 4.2513, - "step": 12200 - }, - { - "epoch": 6.36297262059974, - "grad_norm": 1.5728200674057007, - "learning_rate": 8.824321608040201e-05, - "loss": 5.9125, - "step": 12201 - }, - { - "epoch": 6.363494132985658, - "grad_norm": 1.5503734350204468, - "learning_rate": 8.824221105527639e-05, - "loss": 5.9045, - "step": 12202 - }, - { - "epoch": 6.364015645371578, - "grad_norm": 1.5716289281845093, - "learning_rate": 8.824120603015076e-05, - "loss": 4.7721, - "step": 12203 - }, - { - "epoch": 6.364537157757496, - "grad_norm": 1.4194059371948242, - "learning_rate": 8.824020100502512e-05, - "loss": 5.519, - "step": 12204 - }, - { - "epoch": 6.365058670143416, - "grad_norm": 1.3935153484344482, - "learning_rate": 8.82391959798995e-05, - "loss": 5.4924, - "step": 12205 - }, - { - "epoch": 6.365580182529335, - "grad_norm": 1.4296725988388062, - "learning_rate": 8.823819095477388e-05, - "loss": 5.7943, - "step": 12206 - }, - { - "epoch": 6.366101694915255, - "grad_norm": 1.4515386819839478, - "learning_rate": 8.823718592964825e-05, - "loss": 5.4516, - "step": 12207 - }, - { - "epoch": 6.366623207301173, - "grad_norm": 1.3468106985092163, - "learning_rate": 8.823618090452262e-05, - "loss": 5.6462, - "step": 12208 - }, - { - "epoch": 6.367144719687093, - "grad_norm": 1.4124257564544678, - "learning_rate": 8.8235175879397e-05, - "loss": 5.7585, - "step": 12209 - }, - { - "epoch": 6.367666232073011, - "grad_norm": 1.381523609161377, - "learning_rate": 8.823417085427136e-05, - "loss": 5.6055, - "step": 12210 - }, - { - "epoch": 6.368187744458931, - "grad_norm": 1.4750747680664062, - "learning_rate": 8.823316582914574e-05, - "loss": 5.5155, - "step": 12211 - }, - { - "epoch": 6.36870925684485, - "grad_norm": 1.4453010559082031, - "learning_rate": 8.82321608040201e-05, - "loss": 5.5929, - "step": 12212 - }, - { - "epoch": 6.36923076923077, - "grad_norm": 1.5138916969299316, - "learning_rate": 8.823115577889448e-05, - "loss": 5.3795, - "step": 12213 - }, - { - "epoch": 6.369752281616688, - "grad_norm": 1.699020504951477, - "learning_rate": 8.823015075376884e-05, - "loss": 5.3188, - "step": 12214 - }, - { - "epoch": 6.370273794002608, - "grad_norm": 1.786414623260498, - "learning_rate": 8.822914572864322e-05, - "loss": 5.3821, - "step": 12215 - }, - { - "epoch": 6.370795306388526, - "grad_norm": 1.3971307277679443, - "learning_rate": 8.822814070351759e-05, - "loss": 5.52, - "step": 12216 - }, - { - "epoch": 6.371316818774446, - "grad_norm": 1.4634970426559448, - "learning_rate": 8.822713567839196e-05, - "loss": 5.8883, - "step": 12217 - }, - { - "epoch": 6.371838331160365, - "grad_norm": 1.4427783489227295, - "learning_rate": 8.822613065326634e-05, - "loss": 5.4954, - "step": 12218 - }, - { - "epoch": 6.372359843546284, - "grad_norm": 1.4870893955230713, - "learning_rate": 8.82251256281407e-05, - "loss": 5.7131, - "step": 12219 - }, - { - "epoch": 6.372881355932203, - "grad_norm": 1.4399052858352661, - "learning_rate": 8.822412060301508e-05, - "loss": 5.2521, - "step": 12220 - }, - { - "epoch": 6.373402868318123, - "grad_norm": 1.3952280282974243, - "learning_rate": 8.822311557788945e-05, - "loss": 5.3951, - "step": 12221 - }, - { - "epoch": 6.373924380704041, - "grad_norm": 1.397067666053772, - "learning_rate": 8.822211055276383e-05, - "loss": 5.618, - "step": 12222 - }, - { - "epoch": 6.374445893089961, - "grad_norm": 1.4171732664108276, - "learning_rate": 8.822110552763819e-05, - "loss": 5.548, - "step": 12223 - }, - { - "epoch": 6.37496740547588, - "grad_norm": 1.2858281135559082, - "learning_rate": 8.822010050251257e-05, - "loss": 6.0247, - "step": 12224 - }, - { - "epoch": 6.375488917861799, - "grad_norm": 1.3728171586990356, - "learning_rate": 8.821909547738693e-05, - "loss": 5.8506, - "step": 12225 - }, - { - "epoch": 6.376010430247718, - "grad_norm": 1.4187681674957275, - "learning_rate": 8.821809045226131e-05, - "loss": 5.4468, - "step": 12226 - }, - { - "epoch": 6.376531942633638, - "grad_norm": 1.3169276714324951, - "learning_rate": 8.821708542713569e-05, - "loss": 5.783, - "step": 12227 - }, - { - "epoch": 6.377053455019556, - "grad_norm": 1.49747633934021, - "learning_rate": 8.821608040201007e-05, - "loss": 5.4559, - "step": 12228 - }, - { - "epoch": 6.377574967405476, - "grad_norm": 1.409165859222412, - "learning_rate": 8.821507537688443e-05, - "loss": 5.7901, - "step": 12229 - }, - { - "epoch": 6.378096479791395, - "grad_norm": 1.4317535161972046, - "learning_rate": 8.82140703517588e-05, - "loss": 5.369, - "step": 12230 - }, - { - "epoch": 6.378617992177314, - "grad_norm": 1.6110883951187134, - "learning_rate": 8.821306532663317e-05, - "loss": 4.9418, - "step": 12231 - }, - { - "epoch": 6.379139504563233, - "grad_norm": 1.5186666250228882, - "learning_rate": 8.821206030150754e-05, - "loss": 5.358, - "step": 12232 - }, - { - "epoch": 6.379661016949153, - "grad_norm": 1.4872722625732422, - "learning_rate": 8.821105527638191e-05, - "loss": 5.7829, - "step": 12233 - }, - { - "epoch": 6.380182529335071, - "grad_norm": 1.4706742763519287, - "learning_rate": 8.821005025125628e-05, - "loss": 5.4025, - "step": 12234 - }, - { - "epoch": 6.380704041720991, - "grad_norm": 1.4497088193893433, - "learning_rate": 8.820904522613066e-05, - "loss": 5.3688, - "step": 12235 - }, - { - "epoch": 6.38122555410691, - "grad_norm": 1.4196417331695557, - "learning_rate": 8.820804020100502e-05, - "loss": 5.5335, - "step": 12236 - }, - { - "epoch": 6.381747066492829, - "grad_norm": 1.3944603204727173, - "learning_rate": 8.82070351758794e-05, - "loss": 5.7376, - "step": 12237 - }, - { - "epoch": 6.382268578878748, - "grad_norm": 1.5761350393295288, - "learning_rate": 8.820603015075378e-05, - "loss": 5.2931, - "step": 12238 - }, - { - "epoch": 6.382790091264668, - "grad_norm": 1.5221244096755981, - "learning_rate": 8.820502512562815e-05, - "loss": 5.3599, - "step": 12239 - }, - { - "epoch": 6.383311603650586, - "grad_norm": 1.382493019104004, - "learning_rate": 8.820402010050252e-05, - "loss": 5.9875, - "step": 12240 - }, - { - "epoch": 6.383833116036506, - "grad_norm": 1.335075855255127, - "learning_rate": 8.82030150753769e-05, - "loss": 5.873, - "step": 12241 - }, - { - "epoch": 6.384354628422425, - "grad_norm": 1.2675015926361084, - "learning_rate": 8.820201005025126e-05, - "loss": 5.8856, - "step": 12242 - }, - { - "epoch": 6.384876140808344, - "grad_norm": 1.3323925733566284, - "learning_rate": 8.820100502512562e-05, - "loss": 6.0389, - "step": 12243 - }, - { - "epoch": 6.385397653194263, - "grad_norm": 1.5182682275772095, - "learning_rate": 8.82e-05, - "loss": 5.3876, - "step": 12244 - }, - { - "epoch": 6.385919165580183, - "grad_norm": 1.4591470956802368, - "learning_rate": 8.819899497487437e-05, - "loss": 5.2304, - "step": 12245 - }, - { - "epoch": 6.386440677966101, - "grad_norm": 1.4802595376968384, - "learning_rate": 8.819798994974874e-05, - "loss": 5.5211, - "step": 12246 - }, - { - "epoch": 6.386962190352021, - "grad_norm": 1.4425345659255981, - "learning_rate": 8.819698492462312e-05, - "loss": 5.7371, - "step": 12247 - }, - { - "epoch": 6.38748370273794, - "grad_norm": 1.3161156177520752, - "learning_rate": 8.81959798994975e-05, - "loss": 5.8716, - "step": 12248 - }, - { - "epoch": 6.388005215123859, - "grad_norm": 1.360721230506897, - "learning_rate": 8.819497487437186e-05, - "loss": 5.9604, - "step": 12249 - }, - { - "epoch": 6.388526727509778, - "grad_norm": 1.4703439474105835, - "learning_rate": 8.819396984924624e-05, - "loss": 5.4409, - "step": 12250 - }, - { - "epoch": 6.389048239895698, - "grad_norm": 1.4632480144500732, - "learning_rate": 8.81929648241206e-05, - "loss": 5.305, - "step": 12251 - }, - { - "epoch": 6.389569752281616, - "grad_norm": 1.3937593698501587, - "learning_rate": 8.819195979899498e-05, - "loss": 5.7847, - "step": 12252 - }, - { - "epoch": 6.390091264667536, - "grad_norm": 1.3734287023544312, - "learning_rate": 8.819095477386935e-05, - "loss": 5.8971, - "step": 12253 - }, - { - "epoch": 6.390612777053455, - "grad_norm": 1.4256542921066284, - "learning_rate": 8.818994974874373e-05, - "loss": 5.3316, - "step": 12254 - }, - { - "epoch": 6.391134289439374, - "grad_norm": 1.2939655780792236, - "learning_rate": 8.818894472361809e-05, - "loss": 5.89, - "step": 12255 - }, - { - "epoch": 6.391655801825293, - "grad_norm": 1.4324458837509155, - "learning_rate": 8.818793969849246e-05, - "loss": 5.7079, - "step": 12256 - }, - { - "epoch": 6.392177314211213, - "grad_norm": 1.5167888402938843, - "learning_rate": 8.818693467336683e-05, - "loss": 5.7998, - "step": 12257 - }, - { - "epoch": 6.392698826597131, - "grad_norm": 1.5175211429595947, - "learning_rate": 8.818592964824121e-05, - "loss": 5.435, - "step": 12258 - }, - { - "epoch": 6.393220338983051, - "grad_norm": 1.528090238571167, - "learning_rate": 8.818492462311559e-05, - "loss": 5.5824, - "step": 12259 - }, - { - "epoch": 6.39374185136897, - "grad_norm": 1.4604538679122925, - "learning_rate": 8.818391959798995e-05, - "loss": 5.5871, - "step": 12260 - }, - { - "epoch": 6.394263363754889, - "grad_norm": 1.47371244430542, - "learning_rate": 8.818291457286433e-05, - "loss": 5.1875, - "step": 12261 - }, - { - "epoch": 6.394784876140808, - "grad_norm": 1.3886140584945679, - "learning_rate": 8.81819095477387e-05, - "loss": 5.491, - "step": 12262 - }, - { - "epoch": 6.395306388526728, - "grad_norm": 1.4838526248931885, - "learning_rate": 8.818090452261307e-05, - "loss": 5.4091, - "step": 12263 - }, - { - "epoch": 6.395827900912646, - "grad_norm": 1.3425787687301636, - "learning_rate": 8.817989949748744e-05, - "loss": 5.6645, - "step": 12264 - }, - { - "epoch": 6.396349413298566, - "grad_norm": 1.4481582641601562, - "learning_rate": 8.817889447236181e-05, - "loss": 5.8373, - "step": 12265 - }, - { - "epoch": 6.396870925684485, - "grad_norm": 1.518369436264038, - "learning_rate": 8.817788944723618e-05, - "loss": 5.3999, - "step": 12266 - }, - { - "epoch": 6.397392438070404, - "grad_norm": 1.3389809131622314, - "learning_rate": 8.817688442211056e-05, - "loss": 5.8671, - "step": 12267 - }, - { - "epoch": 6.397913950456323, - "grad_norm": 1.4639378786087036, - "learning_rate": 8.817587939698493e-05, - "loss": 5.7784, - "step": 12268 - }, - { - "epoch": 6.398435462842243, - "grad_norm": 1.5024561882019043, - "learning_rate": 8.817487437185931e-05, - "loss": 5.9637, - "step": 12269 - }, - { - "epoch": 6.398956975228161, - "grad_norm": 1.560990810394287, - "learning_rate": 8.817386934673368e-05, - "loss": 5.5154, - "step": 12270 - }, - { - "epoch": 6.399478487614081, - "grad_norm": 1.2266737222671509, - "learning_rate": 8.817286432160804e-05, - "loss": 4.8219, - "step": 12271 - }, - { - "epoch": 6.4, - "grad_norm": 1.4626342058181763, - "learning_rate": 8.817185929648242e-05, - "loss": 5.2933, - "step": 12272 - }, - { - "epoch": 6.400521512385919, - "grad_norm": 1.427392601966858, - "learning_rate": 8.817085427135678e-05, - "loss": 5.8878, - "step": 12273 - }, - { - "epoch": 6.401043024771838, - "grad_norm": 1.4779373407363892, - "learning_rate": 8.816984924623116e-05, - "loss": 5.6789, - "step": 12274 - }, - { - "epoch": 6.401564537157758, - "grad_norm": 1.3206887245178223, - "learning_rate": 8.816884422110553e-05, - "loss": 5.0854, - "step": 12275 - }, - { - "epoch": 6.402086049543676, - "grad_norm": 1.4355905055999756, - "learning_rate": 8.81678391959799e-05, - "loss": 5.2877, - "step": 12276 - }, - { - "epoch": 6.402607561929596, - "grad_norm": 1.578148603439331, - "learning_rate": 8.816683417085427e-05, - "loss": 5.3097, - "step": 12277 - }, - { - "epoch": 6.403129074315515, - "grad_norm": 1.5697033405303955, - "learning_rate": 8.816582914572865e-05, - "loss": 5.8357, - "step": 12278 - }, - { - "epoch": 6.403650586701434, - "grad_norm": 1.6093648672103882, - "learning_rate": 8.816482412060302e-05, - "loss": 5.2604, - "step": 12279 - }, - { - "epoch": 6.404172099087353, - "grad_norm": 1.4446955919265747, - "learning_rate": 8.81638190954774e-05, - "loss": 5.4395, - "step": 12280 - }, - { - "epoch": 6.404693611473273, - "grad_norm": 1.6522186994552612, - "learning_rate": 8.816281407035177e-05, - "loss": 5.6057, - "step": 12281 - }, - { - "epoch": 6.4052151238591915, - "grad_norm": 1.612200379371643, - "learning_rate": 8.816180904522614e-05, - "loss": 5.1585, - "step": 12282 - }, - { - "epoch": 6.405736636245111, - "grad_norm": 1.4451087713241577, - "learning_rate": 8.816080402010051e-05, - "loss": 5.6742, - "step": 12283 - }, - { - "epoch": 6.40625814863103, - "grad_norm": 1.4551714658737183, - "learning_rate": 8.815979899497487e-05, - "loss": 5.5968, - "step": 12284 - }, - { - "epoch": 6.406779661016949, - "grad_norm": 1.5619081258773804, - "learning_rate": 8.815879396984925e-05, - "loss": 5.5912, - "step": 12285 - }, - { - "epoch": 6.407301173402868, - "grad_norm": 1.412952184677124, - "learning_rate": 8.815778894472361e-05, - "loss": 5.7991, - "step": 12286 - }, - { - "epoch": 6.407822685788788, - "grad_norm": 1.4653518199920654, - "learning_rate": 8.815678391959799e-05, - "loss": 5.5906, - "step": 12287 - }, - { - "epoch": 6.4083441981747065, - "grad_norm": 1.572576880455017, - "learning_rate": 8.815577889447237e-05, - "loss": 5.319, - "step": 12288 - }, - { - "epoch": 6.408865710560626, - "grad_norm": 1.4404702186584473, - "learning_rate": 8.815477386934675e-05, - "loss": 5.3296, - "step": 12289 - }, - { - "epoch": 6.409387222946545, - "grad_norm": 1.482928991317749, - "learning_rate": 8.815376884422111e-05, - "loss": 5.2498, - "step": 12290 - }, - { - "epoch": 6.409908735332464, - "grad_norm": 1.4822885990142822, - "learning_rate": 8.815276381909549e-05, - "loss": 5.6699, - "step": 12291 - }, - { - "epoch": 6.410430247718383, - "grad_norm": 1.5829187631607056, - "learning_rate": 8.815175879396985e-05, - "loss": 5.616, - "step": 12292 - }, - { - "epoch": 6.410951760104302, - "grad_norm": 1.4730305671691895, - "learning_rate": 8.815075376884423e-05, - "loss": 5.4947, - "step": 12293 - }, - { - "epoch": 6.4114732724902215, - "grad_norm": 1.4638900756835938, - "learning_rate": 8.81497487437186e-05, - "loss": 5.2828, - "step": 12294 - }, - { - "epoch": 6.411994784876141, - "grad_norm": 1.6327484846115112, - "learning_rate": 8.814874371859297e-05, - "loss": 5.4319, - "step": 12295 - }, - { - "epoch": 6.41251629726206, - "grad_norm": 1.5855402946472168, - "learning_rate": 8.814773869346734e-05, - "loss": 5.4827, - "step": 12296 - }, - { - "epoch": 6.413037809647979, - "grad_norm": 1.5764530897140503, - "learning_rate": 8.81467336683417e-05, - "loss": 5.6907, - "step": 12297 - }, - { - "epoch": 6.4135593220338984, - "grad_norm": 1.3923953771591187, - "learning_rate": 8.814572864321608e-05, - "loss": 5.6651, - "step": 12298 - }, - { - "epoch": 6.414080834419817, - "grad_norm": 1.4534045457839966, - "learning_rate": 8.814472361809046e-05, - "loss": 5.7933, - "step": 12299 - }, - { - "epoch": 6.4146023468057365, - "grad_norm": 1.592774748802185, - "learning_rate": 8.814371859296484e-05, - "loss": 4.5249, - "step": 12300 - }, - { - "epoch": 6.415123859191656, - "grad_norm": 1.413705825805664, - "learning_rate": 8.81427135678392e-05, - "loss": 5.7643, - "step": 12301 - }, - { - "epoch": 6.415645371577575, - "grad_norm": 1.6202391386032104, - "learning_rate": 8.814170854271358e-05, - "loss": 5.2539, - "step": 12302 - }, - { - "epoch": 6.416166883963494, - "grad_norm": 1.7524176836013794, - "learning_rate": 8.814070351758794e-05, - "loss": 5.1757, - "step": 12303 - }, - { - "epoch": 6.4166883963494135, - "grad_norm": 1.5299781560897827, - "learning_rate": 8.813969849246232e-05, - "loss": 5.3008, - "step": 12304 - }, - { - "epoch": 6.417209908735332, - "grad_norm": 1.4312807321548462, - "learning_rate": 8.813869346733668e-05, - "loss": 5.6172, - "step": 12305 - }, - { - "epoch": 6.4177314211212515, - "grad_norm": 1.5321975946426392, - "learning_rate": 8.813768844221106e-05, - "loss": 5.5221, - "step": 12306 - }, - { - "epoch": 6.418252933507171, - "grad_norm": 1.4345386028289795, - "learning_rate": 8.813668341708543e-05, - "loss": 5.927, - "step": 12307 - }, - { - "epoch": 6.41877444589309, - "grad_norm": 1.618627905845642, - "learning_rate": 8.81356783919598e-05, - "loss": 4.8395, - "step": 12308 - }, - { - "epoch": 6.419295958279009, - "grad_norm": 1.5540077686309814, - "learning_rate": 8.813467336683417e-05, - "loss": 5.36, - "step": 12309 - }, - { - "epoch": 6.4198174706649285, - "grad_norm": 1.444062352180481, - "learning_rate": 8.813366834170855e-05, - "loss": 5.4994, - "step": 12310 - }, - { - "epoch": 6.420338983050847, - "grad_norm": 1.415596604347229, - "learning_rate": 8.813266331658292e-05, - "loss": 5.7162, - "step": 12311 - }, - { - "epoch": 6.4208604954367665, - "grad_norm": 1.401892066001892, - "learning_rate": 8.813165829145729e-05, - "loss": 5.5236, - "step": 12312 - }, - { - "epoch": 6.421382007822686, - "grad_norm": 1.5646836757659912, - "learning_rate": 8.813065326633167e-05, - "loss": 5.8446, - "step": 12313 - }, - { - "epoch": 6.4219035202086046, - "grad_norm": 1.491349458694458, - "learning_rate": 8.812964824120603e-05, - "loss": 5.74, - "step": 12314 - }, - { - "epoch": 6.422425032594524, - "grad_norm": 1.5022974014282227, - "learning_rate": 8.812864321608041e-05, - "loss": 5.5079, - "step": 12315 - }, - { - "epoch": 6.4229465449804435, - "grad_norm": 1.4012523889541626, - "learning_rate": 8.812763819095477e-05, - "loss": 5.8197, - "step": 12316 - }, - { - "epoch": 6.423468057366362, - "grad_norm": 1.3767344951629639, - "learning_rate": 8.812663316582915e-05, - "loss": 5.7991, - "step": 12317 - }, - { - "epoch": 6.4239895697522815, - "grad_norm": 1.9282090663909912, - "learning_rate": 8.812562814070351e-05, - "loss": 5.4469, - "step": 12318 - }, - { - "epoch": 6.424511082138201, - "grad_norm": 1.481793761253357, - "learning_rate": 8.812462311557789e-05, - "loss": 5.6434, - "step": 12319 - }, - { - "epoch": 6.42503259452412, - "grad_norm": 1.4800115823745728, - "learning_rate": 8.812361809045227e-05, - "loss": 5.6715, - "step": 12320 - }, - { - "epoch": 6.425554106910039, - "grad_norm": 1.646868348121643, - "learning_rate": 8.812261306532665e-05, - "loss": 5.3738, - "step": 12321 - }, - { - "epoch": 6.4260756192959585, - "grad_norm": 1.3726975917816162, - "learning_rate": 8.812160804020101e-05, - "loss": 5.983, - "step": 12322 - }, - { - "epoch": 6.426597131681877, - "grad_norm": 1.5390900373458862, - "learning_rate": 8.812060301507538e-05, - "loss": 5.3409, - "step": 12323 - }, - { - "epoch": 6.4271186440677965, - "grad_norm": 1.726402997970581, - "learning_rate": 8.811959798994975e-05, - "loss": 4.9655, - "step": 12324 - }, - { - "epoch": 6.427640156453716, - "grad_norm": 1.6351051330566406, - "learning_rate": 8.811859296482412e-05, - "loss": 5.1929, - "step": 12325 - }, - { - "epoch": 6.428161668839635, - "grad_norm": 1.4899537563323975, - "learning_rate": 8.81175879396985e-05, - "loss": 5.4563, - "step": 12326 - }, - { - "epoch": 6.428683181225554, - "grad_norm": 1.4254094362258911, - "learning_rate": 8.811658291457286e-05, - "loss": 5.7713, - "step": 12327 - }, - { - "epoch": 6.4292046936114735, - "grad_norm": 1.563011646270752, - "learning_rate": 8.811557788944724e-05, - "loss": 4.9107, - "step": 12328 - }, - { - "epoch": 6.429726205997392, - "grad_norm": 1.5598279237747192, - "learning_rate": 8.81145728643216e-05, - "loss": 5.8376, - "step": 12329 - }, - { - "epoch": 6.4302477183833116, - "grad_norm": 1.4684690237045288, - "learning_rate": 8.811356783919598e-05, - "loss": 5.6438, - "step": 12330 - }, - { - "epoch": 6.430769230769231, - "grad_norm": 1.5901228189468384, - "learning_rate": 8.811256281407036e-05, - "loss": 5.4757, - "step": 12331 - }, - { - "epoch": 6.43129074315515, - "grad_norm": 1.4514451026916504, - "learning_rate": 8.811155778894474e-05, - "loss": 5.7955, - "step": 12332 - }, - { - "epoch": 6.431812255541069, - "grad_norm": 1.461692214012146, - "learning_rate": 8.81105527638191e-05, - "loss": 5.4813, - "step": 12333 - }, - { - "epoch": 6.4323337679269885, - "grad_norm": 1.3234068155288696, - "learning_rate": 8.810954773869348e-05, - "loss": 5.5391, - "step": 12334 - }, - { - "epoch": 6.432855280312907, - "grad_norm": 1.4290522336959839, - "learning_rate": 8.810854271356784e-05, - "loss": 5.6446, - "step": 12335 - }, - { - "epoch": 6.433376792698827, - "grad_norm": 1.4482927322387695, - "learning_rate": 8.810753768844221e-05, - "loss": 5.3787, - "step": 12336 - }, - { - "epoch": 6.433898305084746, - "grad_norm": 1.3852169513702393, - "learning_rate": 8.810653266331658e-05, - "loss": 6.0821, - "step": 12337 - }, - { - "epoch": 6.434419817470665, - "grad_norm": 1.2715764045715332, - "learning_rate": 8.810552763819095e-05, - "loss": 6.0843, - "step": 12338 - }, - { - "epoch": 6.434941329856584, - "grad_norm": 2.1181516647338867, - "learning_rate": 8.810452261306533e-05, - "loss": 5.3792, - "step": 12339 - }, - { - "epoch": 6.4354628422425035, - "grad_norm": 1.448868989944458, - "learning_rate": 8.81035175879397e-05, - "loss": 5.6746, - "step": 12340 - }, - { - "epoch": 6.435984354628422, - "grad_norm": 1.4440921545028687, - "learning_rate": 8.810251256281408e-05, - "loss": 5.8219, - "step": 12341 - }, - { - "epoch": 6.436505867014342, - "grad_norm": 1.3736803531646729, - "learning_rate": 8.810150753768845e-05, - "loss": 5.7113, - "step": 12342 - }, - { - "epoch": 6.437027379400261, - "grad_norm": 1.3876579999923706, - "learning_rate": 8.810050251256282e-05, - "loss": 5.7087, - "step": 12343 - }, - { - "epoch": 6.43754889178618, - "grad_norm": 1.359338402748108, - "learning_rate": 8.809949748743719e-05, - "loss": 5.6221, - "step": 12344 - }, - { - "epoch": 6.438070404172099, - "grad_norm": 1.551284909248352, - "learning_rate": 8.809849246231157e-05, - "loss": 5.3263, - "step": 12345 - }, - { - "epoch": 6.4385919165580185, - "grad_norm": 1.3460392951965332, - "learning_rate": 8.809748743718593e-05, - "loss": 4.76, - "step": 12346 - }, - { - "epoch": 6.439113428943937, - "grad_norm": 1.465205192565918, - "learning_rate": 8.809648241206031e-05, - "loss": 5.9279, - "step": 12347 - }, - { - "epoch": 6.439634941329857, - "grad_norm": 1.500546932220459, - "learning_rate": 8.809547738693467e-05, - "loss": 5.6204, - "step": 12348 - }, - { - "epoch": 6.440156453715776, - "grad_norm": 1.4785327911376953, - "learning_rate": 8.809447236180904e-05, - "loss": 5.8211, - "step": 12349 - }, - { - "epoch": 6.440677966101695, - "grad_norm": 1.405735731124878, - "learning_rate": 8.809346733668342e-05, - "loss": 6.0353, - "step": 12350 - }, - { - "epoch": 6.441199478487614, - "grad_norm": 1.4556152820587158, - "learning_rate": 8.809246231155779e-05, - "loss": 5.7157, - "step": 12351 - }, - { - "epoch": 6.441720990873534, - "grad_norm": 1.434609055519104, - "learning_rate": 8.809145728643217e-05, - "loss": 4.8225, - "step": 12352 - }, - { - "epoch": 6.442242503259452, - "grad_norm": 1.7365261316299438, - "learning_rate": 8.809045226130654e-05, - "loss": 4.8069, - "step": 12353 - }, - { - "epoch": 6.442764015645372, - "grad_norm": 1.5130285024642944, - "learning_rate": 8.808944723618091e-05, - "loss": 5.7442, - "step": 12354 - }, - { - "epoch": 6.443285528031291, - "grad_norm": 1.674997329711914, - "learning_rate": 8.808844221105528e-05, - "loss": 4.9937, - "step": 12355 - }, - { - "epoch": 6.44380704041721, - "grad_norm": 1.4525433778762817, - "learning_rate": 8.808743718592966e-05, - "loss": 5.7793, - "step": 12356 - }, - { - "epoch": 6.444328552803129, - "grad_norm": 1.4050140380859375, - "learning_rate": 8.808643216080402e-05, - "loss": 5.5348, - "step": 12357 - }, - { - "epoch": 6.444850065189049, - "grad_norm": 1.4503260850906372, - "learning_rate": 8.80854271356784e-05, - "loss": 5.7861, - "step": 12358 - }, - { - "epoch": 6.445371577574967, - "grad_norm": 1.4740691184997559, - "learning_rate": 8.808442211055276e-05, - "loss": 5.8656, - "step": 12359 - }, - { - "epoch": 6.445893089960887, - "grad_norm": 1.3675254583358765, - "learning_rate": 8.808341708542714e-05, - "loss": 5.7501, - "step": 12360 - }, - { - "epoch": 6.446414602346806, - "grad_norm": 1.4367542266845703, - "learning_rate": 8.808241206030152e-05, - "loss": 5.7249, - "step": 12361 - }, - { - "epoch": 6.446936114732725, - "grad_norm": 1.5021365880966187, - "learning_rate": 8.80814070351759e-05, - "loss": 5.5976, - "step": 12362 - }, - { - "epoch": 6.447457627118644, - "grad_norm": 1.484460473060608, - "learning_rate": 8.808040201005026e-05, - "loss": 5.6577, - "step": 12363 - }, - { - "epoch": 6.447979139504564, - "grad_norm": 1.5684963464736938, - "learning_rate": 8.807939698492462e-05, - "loss": 5.2501, - "step": 12364 - }, - { - "epoch": 6.448500651890482, - "grad_norm": 1.54085373878479, - "learning_rate": 8.8078391959799e-05, - "loss": 5.1348, - "step": 12365 - }, - { - "epoch": 6.449022164276402, - "grad_norm": 1.7493807077407837, - "learning_rate": 8.807738693467337e-05, - "loss": 5.6407, - "step": 12366 - }, - { - "epoch": 6.449543676662321, - "grad_norm": 1.405956745147705, - "learning_rate": 8.807638190954774e-05, - "loss": 5.596, - "step": 12367 - }, - { - "epoch": 6.45006518904824, - "grad_norm": 1.3265783786773682, - "learning_rate": 8.807537688442211e-05, - "loss": 5.534, - "step": 12368 - }, - { - "epoch": 6.450586701434159, - "grad_norm": 1.5398539304733276, - "learning_rate": 8.807437185929649e-05, - "loss": 4.9131, - "step": 12369 - }, - { - "epoch": 6.451108213820079, - "grad_norm": 1.3642629384994507, - "learning_rate": 8.807336683417085e-05, - "loss": 5.8148, - "step": 12370 - }, - { - "epoch": 6.451629726205997, - "grad_norm": 1.3724558353424072, - "learning_rate": 8.807236180904523e-05, - "loss": 5.5896, - "step": 12371 - }, - { - "epoch": 6.452151238591917, - "grad_norm": 1.436241626739502, - "learning_rate": 8.80713567839196e-05, - "loss": 5.4805, - "step": 12372 - }, - { - "epoch": 6.452672750977836, - "grad_norm": 1.3802975416183472, - "learning_rate": 8.807035175879398e-05, - "loss": 5.6184, - "step": 12373 - }, - { - "epoch": 6.453194263363755, - "grad_norm": 1.4066349267959595, - "learning_rate": 8.806934673366835e-05, - "loss": 5.7961, - "step": 12374 - }, - { - "epoch": 6.453715775749674, - "grad_norm": 1.438141942024231, - "learning_rate": 8.806834170854273e-05, - "loss": 5.4654, - "step": 12375 - }, - { - "epoch": 6.454237288135594, - "grad_norm": 1.4204347133636475, - "learning_rate": 8.806733668341709e-05, - "loss": 5.5599, - "step": 12376 - }, - { - "epoch": 6.454758800521512, - "grad_norm": 1.5285028219223022, - "learning_rate": 8.806633165829145e-05, - "loss": 5.6744, - "step": 12377 - }, - { - "epoch": 6.455280312907432, - "grad_norm": 1.6296660900115967, - "learning_rate": 8.806532663316583e-05, - "loss": 5.083, - "step": 12378 - }, - { - "epoch": 6.455801825293351, - "grad_norm": 1.4418717622756958, - "learning_rate": 8.80643216080402e-05, - "loss": 5.4439, - "step": 12379 - }, - { - "epoch": 6.45632333767927, - "grad_norm": 1.4093581438064575, - "learning_rate": 8.806331658291457e-05, - "loss": 5.345, - "step": 12380 - }, - { - "epoch": 6.456844850065189, - "grad_norm": 1.611208438873291, - "learning_rate": 8.806231155778895e-05, - "loss": 5.7432, - "step": 12381 - }, - { - "epoch": 6.457366362451109, - "grad_norm": 1.590815782546997, - "learning_rate": 8.806130653266333e-05, - "loss": 5.3405, - "step": 12382 - }, - { - "epoch": 6.457887874837027, - "grad_norm": 1.3767541646957397, - "learning_rate": 8.80603015075377e-05, - "loss": 5.7199, - "step": 12383 - }, - { - "epoch": 6.458409387222947, - "grad_norm": 1.3843947649002075, - "learning_rate": 8.805929648241207e-05, - "loss": 5.7899, - "step": 12384 - }, - { - "epoch": 6.458930899608866, - "grad_norm": 1.4581185579299927, - "learning_rate": 8.805829145728644e-05, - "loss": 4.8054, - "step": 12385 - }, - { - "epoch": 6.459452411994785, - "grad_norm": 1.5644441843032837, - "learning_rate": 8.805728643216081e-05, - "loss": 5.0286, - "step": 12386 - }, - { - "epoch": 6.459973924380704, - "grad_norm": 1.5694894790649414, - "learning_rate": 8.805628140703518e-05, - "loss": 5.5962, - "step": 12387 - }, - { - "epoch": 6.460495436766623, - "grad_norm": 1.5887560844421387, - "learning_rate": 8.805527638190956e-05, - "loss": 5.2852, - "step": 12388 - }, - { - "epoch": 6.461016949152542, - "grad_norm": 1.4364598989486694, - "learning_rate": 8.805427135678392e-05, - "loss": 5.2558, - "step": 12389 - }, - { - "epoch": 6.461538461538462, - "grad_norm": 1.437892198562622, - "learning_rate": 8.805326633165828e-05, - "loss": 5.7539, - "step": 12390 - }, - { - "epoch": 6.462059973924381, - "grad_norm": 1.5709222555160522, - "learning_rate": 8.805226130653266e-05, - "loss": 5.1556, - "step": 12391 - }, - { - "epoch": 6.4625814863103, - "grad_norm": 1.368564486503601, - "learning_rate": 8.805125628140704e-05, - "loss": 5.7982, - "step": 12392 - }, - { - "epoch": 6.463102998696219, - "grad_norm": 1.7009506225585938, - "learning_rate": 8.805025125628142e-05, - "loss": 5.3254, - "step": 12393 - }, - { - "epoch": 6.463624511082138, - "grad_norm": 1.4114599227905273, - "learning_rate": 8.804924623115578e-05, - "loss": 5.3063, - "step": 12394 - }, - { - "epoch": 6.464146023468057, - "grad_norm": 1.4827611446380615, - "learning_rate": 8.804824120603016e-05, - "loss": 5.4065, - "step": 12395 - }, - { - "epoch": 6.464667535853977, - "grad_norm": 1.415856957435608, - "learning_rate": 8.804723618090452e-05, - "loss": 5.5719, - "step": 12396 - }, - { - "epoch": 6.465189048239896, - "grad_norm": 1.4004614353179932, - "learning_rate": 8.80462311557789e-05, - "loss": 5.4921, - "step": 12397 - }, - { - "epoch": 6.465710560625815, - "grad_norm": 1.4583027362823486, - "learning_rate": 8.804522613065327e-05, - "loss": 5.6516, - "step": 12398 - }, - { - "epoch": 6.466232073011734, - "grad_norm": 1.8655674457550049, - "learning_rate": 8.804422110552764e-05, - "loss": 5.4548, - "step": 12399 - }, - { - "epoch": 6.466753585397653, - "grad_norm": 1.4628874063491821, - "learning_rate": 8.804321608040201e-05, - "loss": 6.0187, - "step": 12400 - }, - { - "epoch": 6.467275097783572, - "grad_norm": 1.3126760721206665, - "learning_rate": 8.804221105527639e-05, - "loss": 5.7048, - "step": 12401 - }, - { - "epoch": 6.467796610169492, - "grad_norm": 1.3370603322982788, - "learning_rate": 8.804120603015076e-05, - "loss": 5.616, - "step": 12402 - }, - { - "epoch": 6.46831812255541, - "grad_norm": 1.4881099462509155, - "learning_rate": 8.804020100502513e-05, - "loss": 5.7212, - "step": 12403 - }, - { - "epoch": 6.46883963494133, - "grad_norm": 1.3161354064941406, - "learning_rate": 8.80391959798995e-05, - "loss": 5.9275, - "step": 12404 - }, - { - "epoch": 6.469361147327249, - "grad_norm": 1.446272611618042, - "learning_rate": 8.803819095477387e-05, - "loss": 4.7986, - "step": 12405 - }, - { - "epoch": 6.469882659713168, - "grad_norm": 1.4071003198623657, - "learning_rate": 8.803718592964825e-05, - "loss": 5.9418, - "step": 12406 - }, - { - "epoch": 6.470404172099087, - "grad_norm": 1.5117669105529785, - "learning_rate": 8.803618090452261e-05, - "loss": 5.6083, - "step": 12407 - }, - { - "epoch": 6.470925684485007, - "grad_norm": 1.4707705974578857, - "learning_rate": 8.803517587939699e-05, - "loss": 5.3514, - "step": 12408 - }, - { - "epoch": 6.471447196870925, - "grad_norm": 1.7434885501861572, - "learning_rate": 8.803417085427135e-05, - "loss": 5.252, - "step": 12409 - }, - { - "epoch": 6.471968709256845, - "grad_norm": 1.5283344984054565, - "learning_rate": 8.803316582914573e-05, - "loss": 5.7439, - "step": 12410 - }, - { - "epoch": 6.472490221642764, - "grad_norm": 1.5107430219650269, - "learning_rate": 8.80321608040201e-05, - "loss": 5.8647, - "step": 12411 - }, - { - "epoch": 6.473011734028683, - "grad_norm": 1.4917230606079102, - "learning_rate": 8.803115577889447e-05, - "loss": 5.3346, - "step": 12412 - }, - { - "epoch": 6.473533246414602, - "grad_norm": 1.560365080833435, - "learning_rate": 8.803015075376885e-05, - "loss": 5.6884, - "step": 12413 - }, - { - "epoch": 6.474054758800522, - "grad_norm": 1.4106907844543457, - "learning_rate": 8.802914572864323e-05, - "loss": 5.7369, - "step": 12414 - }, - { - "epoch": 6.47457627118644, - "grad_norm": 1.7385326623916626, - "learning_rate": 8.80281407035176e-05, - "loss": 5.4126, - "step": 12415 - }, - { - "epoch": 6.47509778357236, - "grad_norm": 1.4627634286880493, - "learning_rate": 8.802713567839196e-05, - "loss": 5.4544, - "step": 12416 - }, - { - "epoch": 6.475619295958279, - "grad_norm": 1.4005321264266968, - "learning_rate": 8.802613065326634e-05, - "loss": 5.687, - "step": 12417 - }, - { - "epoch": 6.476140808344198, - "grad_norm": 1.6482642889022827, - "learning_rate": 8.80251256281407e-05, - "loss": 5.6323, - "step": 12418 - }, - { - "epoch": 6.476662320730117, - "grad_norm": 1.5998446941375732, - "learning_rate": 8.802412060301508e-05, - "loss": 5.0683, - "step": 12419 - }, - { - "epoch": 6.477183833116037, - "grad_norm": 1.469269037246704, - "learning_rate": 8.802311557788944e-05, - "loss": 5.9523, - "step": 12420 - }, - { - "epoch": 6.477705345501955, - "grad_norm": 1.5380696058273315, - "learning_rate": 8.802211055276382e-05, - "loss": 5.6246, - "step": 12421 - }, - { - "epoch": 6.478226857887875, - "grad_norm": 1.4603004455566406, - "learning_rate": 8.80211055276382e-05, - "loss": 5.7065, - "step": 12422 - }, - { - "epoch": 6.478748370273794, - "grad_norm": 1.5581802129745483, - "learning_rate": 8.802010050251258e-05, - "loss": 5.0572, - "step": 12423 - }, - { - "epoch": 6.479269882659713, - "grad_norm": 1.5039197206497192, - "learning_rate": 8.801909547738694e-05, - "loss": 5.6126, - "step": 12424 - }, - { - "epoch": 6.479791395045632, - "grad_norm": 1.5614122152328491, - "learning_rate": 8.801809045226132e-05, - "loss": 5.5312, - "step": 12425 - }, - { - "epoch": 6.480312907431552, - "grad_norm": 1.3942195177078247, - "learning_rate": 8.801708542713568e-05, - "loss": 5.7958, - "step": 12426 - }, - { - "epoch": 6.48083441981747, - "grad_norm": 1.468167781829834, - "learning_rate": 8.801608040201006e-05, - "loss": 5.6224, - "step": 12427 - }, - { - "epoch": 6.48135593220339, - "grad_norm": 1.4568753242492676, - "learning_rate": 8.801507537688443e-05, - "loss": 5.6129, - "step": 12428 - }, - { - "epoch": 6.481877444589309, - "grad_norm": 1.447894811630249, - "learning_rate": 8.801407035175879e-05, - "loss": 5.1642, - "step": 12429 - }, - { - "epoch": 6.482398956975228, - "grad_norm": 1.4283291101455688, - "learning_rate": 8.801306532663317e-05, - "loss": 5.7924, - "step": 12430 - }, - { - "epoch": 6.482920469361147, - "grad_norm": 1.4039815664291382, - "learning_rate": 8.801206030150753e-05, - "loss": 5.8967, - "step": 12431 - }, - { - "epoch": 6.483441981747067, - "grad_norm": 1.5332776308059692, - "learning_rate": 8.801105527638191e-05, - "loss": 5.5242, - "step": 12432 - }, - { - "epoch": 6.483963494132985, - "grad_norm": 1.5946636199951172, - "learning_rate": 8.801005025125629e-05, - "loss": 5.2754, - "step": 12433 - }, - { - "epoch": 6.484485006518905, - "grad_norm": 1.5294524431228638, - "learning_rate": 8.800904522613067e-05, - "loss": 5.3223, - "step": 12434 - }, - { - "epoch": 6.485006518904824, - "grad_norm": 1.486592173576355, - "learning_rate": 8.800804020100503e-05, - "loss": 5.2983, - "step": 12435 - }, - { - "epoch": 6.485528031290743, - "grad_norm": 1.672872543334961, - "learning_rate": 8.800703517587941e-05, - "loss": 5.2014, - "step": 12436 - }, - { - "epoch": 6.486049543676662, - "grad_norm": 1.4293420314788818, - "learning_rate": 8.800603015075377e-05, - "loss": 5.8217, - "step": 12437 - }, - { - "epoch": 6.486571056062582, - "grad_norm": 1.465050220489502, - "learning_rate": 8.800502512562815e-05, - "loss": 5.6198, - "step": 12438 - }, - { - "epoch": 6.4870925684485, - "grad_norm": 1.4624377489089966, - "learning_rate": 8.800402010050251e-05, - "loss": 5.525, - "step": 12439 - }, - { - "epoch": 6.48761408083442, - "grad_norm": 1.5186265707015991, - "learning_rate": 8.800301507537689e-05, - "loss": 5.5175, - "step": 12440 - }, - { - "epoch": 6.488135593220339, - "grad_norm": 1.3876105546951294, - "learning_rate": 8.800201005025126e-05, - "loss": 5.6541, - "step": 12441 - }, - { - "epoch": 6.488657105606258, - "grad_norm": 1.6032865047454834, - "learning_rate": 8.800100502512563e-05, - "loss": 5.2484, - "step": 12442 - }, - { - "epoch": 6.489178617992177, - "grad_norm": 1.4521745443344116, - "learning_rate": 8.800000000000001e-05, - "loss": 5.792, - "step": 12443 - }, - { - "epoch": 6.489700130378097, - "grad_norm": 1.413445234298706, - "learning_rate": 8.799899497487438e-05, - "loss": 5.7826, - "step": 12444 - }, - { - "epoch": 6.490221642764015, - "grad_norm": 1.2929264307022095, - "learning_rate": 8.799798994974875e-05, - "loss": 5.7395, - "step": 12445 - }, - { - "epoch": 6.490743155149935, - "grad_norm": 1.49071204662323, - "learning_rate": 8.799698492462312e-05, - "loss": 5.5639, - "step": 12446 - }, - { - "epoch": 6.491264667535854, - "grad_norm": 1.4352720975875854, - "learning_rate": 8.79959798994975e-05, - "loss": 5.5656, - "step": 12447 - }, - { - "epoch": 6.491786179921773, - "grad_norm": 1.4231700897216797, - "learning_rate": 8.799497487437186e-05, - "loss": 5.4795, - "step": 12448 - }, - { - "epoch": 6.492307692307692, - "grad_norm": 1.3763982057571411, - "learning_rate": 8.799396984924624e-05, - "loss": 6.0767, - "step": 12449 - }, - { - "epoch": 6.492829204693612, - "grad_norm": 1.5284106731414795, - "learning_rate": 8.79929648241206e-05, - "loss": 5.6098, - "step": 12450 - }, - { - "epoch": 6.49335071707953, - "grad_norm": 1.3937326669692993, - "learning_rate": 8.799195979899498e-05, - "loss": 4.7173, - "step": 12451 - }, - { - "epoch": 6.49387222946545, - "grad_norm": 1.4928662776947021, - "learning_rate": 8.799095477386934e-05, - "loss": 5.4585, - "step": 12452 - }, - { - "epoch": 6.494393741851369, - "grad_norm": 1.3493150472640991, - "learning_rate": 8.798994974874372e-05, - "loss": 5.815, - "step": 12453 - }, - { - "epoch": 6.494915254237288, - "grad_norm": 1.5055917501449585, - "learning_rate": 8.79889447236181e-05, - "loss": 4.9744, - "step": 12454 - }, - { - "epoch": 6.495436766623207, - "grad_norm": 1.4732328653335571, - "learning_rate": 8.798793969849248e-05, - "loss": 5.3897, - "step": 12455 - }, - { - "epoch": 6.495958279009127, - "grad_norm": 1.3425999879837036, - "learning_rate": 8.798693467336684e-05, - "loss": 4.9154, - "step": 12456 - }, - { - "epoch": 6.496479791395045, - "grad_norm": 1.4671053886413574, - "learning_rate": 8.79859296482412e-05, - "loss": 5.6627, - "step": 12457 - }, - { - "epoch": 6.497001303780965, - "grad_norm": 1.441741943359375, - "learning_rate": 8.798492462311558e-05, - "loss": 5.1425, - "step": 12458 - }, - { - "epoch": 6.497522816166884, - "grad_norm": 1.4229652881622314, - "learning_rate": 8.798391959798995e-05, - "loss": 5.7178, - "step": 12459 - }, - { - "epoch": 6.498044328552803, - "grad_norm": 1.3382782936096191, - "learning_rate": 8.798291457286433e-05, - "loss": 5.4547, - "step": 12460 - }, - { - "epoch": 6.498565840938722, - "grad_norm": 1.5382015705108643, - "learning_rate": 8.798190954773869e-05, - "loss": 5.7715, - "step": 12461 - }, - { - "epoch": 6.499087353324642, - "grad_norm": 1.4722826480865479, - "learning_rate": 8.798090452261307e-05, - "loss": 5.7657, - "step": 12462 - }, - { - "epoch": 6.49960886571056, - "grad_norm": 1.4082194566726685, - "learning_rate": 8.797989949748745e-05, - "loss": 5.3909, - "step": 12463 - }, - { - "epoch": 6.50013037809648, - "grad_norm": 1.464550256729126, - "learning_rate": 8.797889447236182e-05, - "loss": 5.6014, - "step": 12464 - }, - { - "epoch": 6.500651890482399, - "grad_norm": 1.4342536926269531, - "learning_rate": 8.797788944723619e-05, - "loss": 5.5927, - "step": 12465 - }, - { - "epoch": 6.501173402868318, - "grad_norm": 1.4378613233566284, - "learning_rate": 8.797688442211057e-05, - "loss": 5.7741, - "step": 12466 - }, - { - "epoch": 6.501694915254237, - "grad_norm": 1.3294721841812134, - "learning_rate": 8.797587939698493e-05, - "loss": 5.7286, - "step": 12467 - }, - { - "epoch": 6.502216427640157, - "grad_norm": 1.5109871625900269, - "learning_rate": 8.797487437185931e-05, - "loss": 5.64, - "step": 12468 - }, - { - "epoch": 6.502737940026075, - "grad_norm": 1.5205719470977783, - "learning_rate": 8.797386934673367e-05, - "loss": 5.915, - "step": 12469 - }, - { - "epoch": 6.503259452411995, - "grad_norm": 1.4309018850326538, - "learning_rate": 8.797286432160804e-05, - "loss": 5.5132, - "step": 12470 - }, - { - "epoch": 6.5037809647979135, - "grad_norm": 1.332653522491455, - "learning_rate": 8.797185929648241e-05, - "loss": 5.7183, - "step": 12471 - }, - { - "epoch": 6.504302477183833, - "grad_norm": 1.5436627864837646, - "learning_rate": 8.797085427135678e-05, - "loss": 5.4086, - "step": 12472 - }, - { - "epoch": 6.504823989569752, - "grad_norm": 1.407082200050354, - "learning_rate": 8.796984924623116e-05, - "loss": 5.8656, - "step": 12473 - }, - { - "epoch": 6.505345501955672, - "grad_norm": 1.5265003442764282, - "learning_rate": 8.796884422110553e-05, - "loss": 5.5808, - "step": 12474 - }, - { - "epoch": 6.50586701434159, - "grad_norm": 1.3611451387405396, - "learning_rate": 8.796783919597991e-05, - "loss": 5.4537, - "step": 12475 - }, - { - "epoch": 6.50638852672751, - "grad_norm": 1.3606764078140259, - "learning_rate": 8.796683417085428e-05, - "loss": 5.798, - "step": 12476 - }, - { - "epoch": 6.5069100391134285, - "grad_norm": 1.628183364868164, - "learning_rate": 8.796582914572865e-05, - "loss": 5.0206, - "step": 12477 - }, - { - "epoch": 6.507431551499348, - "grad_norm": 1.4681569337844849, - "learning_rate": 8.796482412060302e-05, - "loss": 5.4871, - "step": 12478 - }, - { - "epoch": 6.507953063885267, - "grad_norm": 1.3806575536727905, - "learning_rate": 8.79638190954774e-05, - "loss": 5.4177, - "step": 12479 - }, - { - "epoch": 6.508474576271187, - "grad_norm": 1.4010251760482788, - "learning_rate": 8.796281407035176e-05, - "loss": 5.9606, - "step": 12480 - }, - { - "epoch": 6.508996088657105, - "grad_norm": 1.4544035196304321, - "learning_rate": 8.796180904522614e-05, - "loss": 5.6542, - "step": 12481 - }, - { - "epoch": 6.509517601043025, - "grad_norm": 1.456703543663025, - "learning_rate": 8.79608040201005e-05, - "loss": 5.5143, - "step": 12482 - }, - { - "epoch": 6.5100391134289435, - "grad_norm": 1.3905287981033325, - "learning_rate": 8.795979899497487e-05, - "loss": 5.9592, - "step": 12483 - }, - { - "epoch": 6.510560625814863, - "grad_norm": 1.462240219116211, - "learning_rate": 8.795879396984924e-05, - "loss": 5.8446, - "step": 12484 - }, - { - "epoch": 6.511082138200782, - "grad_norm": 1.3944820165634155, - "learning_rate": 8.795778894472362e-05, - "loss": 5.6792, - "step": 12485 - }, - { - "epoch": 6.511603650586702, - "grad_norm": 1.3809010982513428, - "learning_rate": 8.7956783919598e-05, - "loss": 5.407, - "step": 12486 - }, - { - "epoch": 6.5121251629726205, - "grad_norm": 1.3088277578353882, - "learning_rate": 8.795577889447236e-05, - "loss": 5.9453, - "step": 12487 - }, - { - "epoch": 6.51264667535854, - "grad_norm": 1.4226888418197632, - "learning_rate": 8.795477386934674e-05, - "loss": 5.186, - "step": 12488 - }, - { - "epoch": 6.5131681877444585, - "grad_norm": 1.3380358219146729, - "learning_rate": 8.79537688442211e-05, - "loss": 6.0949, - "step": 12489 - }, - { - "epoch": 6.513689700130378, - "grad_norm": 1.5355838537216187, - "learning_rate": 8.795276381909548e-05, - "loss": 5.2893, - "step": 12490 - }, - { - "epoch": 6.514211212516297, - "grad_norm": 1.3450136184692383, - "learning_rate": 8.795175879396985e-05, - "loss": 5.9804, - "step": 12491 - }, - { - "epoch": 6.514732724902217, - "grad_norm": 1.4839774370193481, - "learning_rate": 8.795075376884423e-05, - "loss": 5.6333, - "step": 12492 - }, - { - "epoch": 6.5152542372881355, - "grad_norm": 1.47777259349823, - "learning_rate": 8.794974874371859e-05, - "loss": 5.4413, - "step": 12493 - }, - { - "epoch": 6.515775749674055, - "grad_norm": 1.423406958580017, - "learning_rate": 8.794874371859297e-05, - "loss": 5.4427, - "step": 12494 - }, - { - "epoch": 6.5162972620599735, - "grad_norm": 1.4609050750732422, - "learning_rate": 8.794773869346735e-05, - "loss": 5.3153, - "step": 12495 - }, - { - "epoch": 6.516818774445893, - "grad_norm": 1.584741473197937, - "learning_rate": 8.794673366834171e-05, - "loss": 5.1374, - "step": 12496 - }, - { - "epoch": 6.517340286831812, - "grad_norm": 1.5071356296539307, - "learning_rate": 8.794572864321609e-05, - "loss": 5.4185, - "step": 12497 - }, - { - "epoch": 6.517861799217732, - "grad_norm": 1.4606349468231201, - "learning_rate": 8.794472361809045e-05, - "loss": 5.3414, - "step": 12498 - }, - { - "epoch": 6.5183833116036505, - "grad_norm": 1.4920042753219604, - "learning_rate": 8.794371859296483e-05, - "loss": 5.5956, - "step": 12499 - }, - { - "epoch": 6.51890482398957, - "grad_norm": 1.4514347314834595, - "learning_rate": 8.79427135678392e-05, - "loss": 5.4224, - "step": 12500 - }, - { - "epoch": 6.5194263363754885, - "grad_norm": 1.418641448020935, - "learning_rate": 8.794170854271357e-05, - "loss": 5.7454, - "step": 12501 - }, - { - "epoch": 6.519947848761408, - "grad_norm": 1.430914282798767, - "learning_rate": 8.794070351758794e-05, - "loss": 5.4475, - "step": 12502 - }, - { - "epoch": 6.5204693611473274, - "grad_norm": 1.6658300161361694, - "learning_rate": 8.793969849246232e-05, - "loss": 5.6307, - "step": 12503 - }, - { - "epoch": 6.520990873533247, - "grad_norm": 1.5116956233978271, - "learning_rate": 8.793869346733668e-05, - "loss": 4.508, - "step": 12504 - }, - { - "epoch": 6.5215123859191655, - "grad_norm": 1.659289002418518, - "learning_rate": 8.793768844221106e-05, - "loss": 4.9623, - "step": 12505 - }, - { - "epoch": 6.522033898305085, - "grad_norm": 1.4499802589416504, - "learning_rate": 8.793668341708543e-05, - "loss": 5.3696, - "step": 12506 - }, - { - "epoch": 6.5225554106910035, - "grad_norm": 1.4461456537246704, - "learning_rate": 8.793567839195981e-05, - "loss": 5.6694, - "step": 12507 - }, - { - "epoch": 6.523076923076923, - "grad_norm": 1.4375935792922974, - "learning_rate": 8.793467336683418e-05, - "loss": 5.476, - "step": 12508 - }, - { - "epoch": 6.5235984354628425, - "grad_norm": 1.4778969287872314, - "learning_rate": 8.793366834170854e-05, - "loss": 5.8047, - "step": 12509 - }, - { - "epoch": 6.524119947848761, - "grad_norm": 1.355550765991211, - "learning_rate": 8.793266331658292e-05, - "loss": 6.1013, - "step": 12510 - }, - { - "epoch": 6.5246414602346805, - "grad_norm": 1.4744062423706055, - "learning_rate": 8.793165829145728e-05, - "loss": 5.1813, - "step": 12511 - }, - { - "epoch": 6.5251629726206, - "grad_norm": 1.3988584280014038, - "learning_rate": 8.793065326633166e-05, - "loss": 5.7425, - "step": 12512 - }, - { - "epoch": 6.5256844850065185, - "grad_norm": 1.4191807508468628, - "learning_rate": 8.792964824120603e-05, - "loss": 5.6828, - "step": 12513 - }, - { - "epoch": 6.526205997392438, - "grad_norm": 1.398184061050415, - "learning_rate": 8.79286432160804e-05, - "loss": 5.3636, - "step": 12514 - }, - { - "epoch": 6.5267275097783575, - "grad_norm": 1.3990029096603394, - "learning_rate": 8.792763819095478e-05, - "loss": 5.2309, - "step": 12515 - }, - { - "epoch": 6.527249022164276, - "grad_norm": 1.3994126319885254, - "learning_rate": 8.792663316582916e-05, - "loss": 6.0042, - "step": 12516 - }, - { - "epoch": 6.5277705345501955, - "grad_norm": 1.4808038473129272, - "learning_rate": 8.792562814070352e-05, - "loss": 5.5677, - "step": 12517 - }, - { - "epoch": 6.528292046936115, - "grad_norm": 1.4258002042770386, - "learning_rate": 8.79246231155779e-05, - "loss": 5.7161, - "step": 12518 - }, - { - "epoch": 6.5288135593220336, - "grad_norm": 1.622502088546753, - "learning_rate": 8.792361809045227e-05, - "loss": 5.5452, - "step": 12519 - }, - { - "epoch": 6.529335071707953, - "grad_norm": 1.3774826526641846, - "learning_rate": 8.792261306532664e-05, - "loss": 5.5119, - "step": 12520 - }, - { - "epoch": 6.5298565840938725, - "grad_norm": 1.5008872747421265, - "learning_rate": 8.792160804020101e-05, - "loss": 5.44, - "step": 12521 - }, - { - "epoch": 6.530378096479791, - "grad_norm": 1.4999505281448364, - "learning_rate": 8.792060301507537e-05, - "loss": 5.6408, - "step": 12522 - }, - { - "epoch": 6.5308996088657105, - "grad_norm": 1.329841136932373, - "learning_rate": 8.791959798994975e-05, - "loss": 5.9098, - "step": 12523 - }, - { - "epoch": 6.53142112125163, - "grad_norm": 1.4376024007797241, - "learning_rate": 8.791859296482411e-05, - "loss": 5.5938, - "step": 12524 - }, - { - "epoch": 6.531942633637549, - "grad_norm": 1.4345446825027466, - "learning_rate": 8.791758793969849e-05, - "loss": 5.197, - "step": 12525 - }, - { - "epoch": 6.532464146023468, - "grad_norm": 1.3469228744506836, - "learning_rate": 8.791658291457287e-05, - "loss": 5.9648, - "step": 12526 - }, - { - "epoch": 6.5329856584093875, - "grad_norm": 1.4169988632202148, - "learning_rate": 8.791557788944725e-05, - "loss": 5.8552, - "step": 12527 - }, - { - "epoch": 6.533507170795306, - "grad_norm": 1.3211077451705933, - "learning_rate": 8.791457286432161e-05, - "loss": 5.7412, - "step": 12528 - }, - { - "epoch": 6.5340286831812255, - "grad_norm": 1.3898082971572876, - "learning_rate": 8.791356783919599e-05, - "loss": 5.2091, - "step": 12529 - }, - { - "epoch": 6.534550195567145, - "grad_norm": 1.45436429977417, - "learning_rate": 8.791256281407035e-05, - "loss": 5.5723, - "step": 12530 - }, - { - "epoch": 6.535071707953064, - "grad_norm": 1.3811384439468384, - "learning_rate": 8.791155778894473e-05, - "loss": 5.8656, - "step": 12531 - }, - { - "epoch": 6.535593220338983, - "grad_norm": 1.4799333810806274, - "learning_rate": 8.79105527638191e-05, - "loss": 5.5559, - "step": 12532 - }, - { - "epoch": 6.5361147327249025, - "grad_norm": 1.3495765924453735, - "learning_rate": 8.790954773869347e-05, - "loss": 5.614, - "step": 12533 - }, - { - "epoch": 6.536636245110821, - "grad_norm": 1.653756856918335, - "learning_rate": 8.790854271356784e-05, - "loss": 5.2663, - "step": 12534 - }, - { - "epoch": 6.5371577574967406, - "grad_norm": 1.469201683998108, - "learning_rate": 8.790753768844222e-05, - "loss": 5.5156, - "step": 12535 - }, - { - "epoch": 6.53767926988266, - "grad_norm": 1.4156748056411743, - "learning_rate": 8.79065326633166e-05, - "loss": 5.8851, - "step": 12536 - }, - { - "epoch": 6.538200782268579, - "grad_norm": 1.4300384521484375, - "learning_rate": 8.790552763819096e-05, - "loss": 5.5242, - "step": 12537 - }, - { - "epoch": 6.538722294654498, - "grad_norm": 1.4932236671447754, - "learning_rate": 8.790452261306534e-05, - "loss": 4.9289, - "step": 12538 - }, - { - "epoch": 6.5392438070404175, - "grad_norm": 1.4619979858398438, - "learning_rate": 8.79035175879397e-05, - "loss": 5.6292, - "step": 12539 - }, - { - "epoch": 6.539765319426336, - "grad_norm": 1.422136902809143, - "learning_rate": 8.790251256281408e-05, - "loss": 5.8012, - "step": 12540 - }, - { - "epoch": 6.540286831812256, - "grad_norm": 1.4003074169158936, - "learning_rate": 8.790150753768844e-05, - "loss": 5.213, - "step": 12541 - }, - { - "epoch": 6.540808344198175, - "grad_norm": 1.4106879234313965, - "learning_rate": 8.790050251256282e-05, - "loss": 5.5448, - "step": 12542 - }, - { - "epoch": 6.541329856584094, - "grad_norm": 1.482836127281189, - "learning_rate": 8.789949748743718e-05, - "loss": 5.465, - "step": 12543 - }, - { - "epoch": 6.541851368970013, - "grad_norm": 1.408078908920288, - "learning_rate": 8.789849246231156e-05, - "loss": 5.7231, - "step": 12544 - }, - { - "epoch": 6.5423728813559325, - "grad_norm": 1.3140257596969604, - "learning_rate": 8.789748743718593e-05, - "loss": 5.6402, - "step": 12545 - }, - { - "epoch": 6.542894393741851, - "grad_norm": 1.4012560844421387, - "learning_rate": 8.78964824120603e-05, - "loss": 5.7401, - "step": 12546 - }, - { - "epoch": 6.543415906127771, - "grad_norm": 1.5670433044433594, - "learning_rate": 8.789547738693468e-05, - "loss": 5.1313, - "step": 12547 - }, - { - "epoch": 6.54393741851369, - "grad_norm": 1.430067539215088, - "learning_rate": 8.789447236180906e-05, - "loss": 5.6382, - "step": 12548 - }, - { - "epoch": 6.544458930899609, - "grad_norm": 1.3594905138015747, - "learning_rate": 8.789346733668342e-05, - "loss": 5.7503, - "step": 12549 - }, - { - "epoch": 6.544980443285528, - "grad_norm": 1.4518684148788452, - "learning_rate": 8.789246231155779e-05, - "loss": 5.649, - "step": 12550 - }, - { - "epoch": 6.5455019556714475, - "grad_norm": 1.4472609758377075, - "learning_rate": 8.789145728643217e-05, - "loss": 5.1447, - "step": 12551 - }, - { - "epoch": 6.546023468057366, - "grad_norm": 1.4639800786972046, - "learning_rate": 8.789045226130653e-05, - "loss": 5.756, - "step": 12552 - }, - { - "epoch": 6.546544980443286, - "grad_norm": 1.3664597272872925, - "learning_rate": 8.788944723618091e-05, - "loss": 5.9137, - "step": 12553 - }, - { - "epoch": 6.547066492829205, - "grad_norm": 1.4685629606246948, - "learning_rate": 8.788844221105527e-05, - "loss": 5.2428, - "step": 12554 - }, - { - "epoch": 6.547588005215124, - "grad_norm": 1.3590062856674194, - "learning_rate": 8.788743718592965e-05, - "loss": 5.8129, - "step": 12555 - }, - { - "epoch": 6.548109517601043, - "grad_norm": 1.6284739971160889, - "learning_rate": 8.788643216080403e-05, - "loss": 4.9765, - "step": 12556 - }, - { - "epoch": 6.548631029986963, - "grad_norm": 1.492897391319275, - "learning_rate": 8.78854271356784e-05, - "loss": 5.2583, - "step": 12557 - }, - { - "epoch": 6.549152542372881, - "grad_norm": 1.4101232290267944, - "learning_rate": 8.788442211055277e-05, - "loss": 5.6307, - "step": 12558 - }, - { - "epoch": 6.549674054758801, - "grad_norm": 1.3859524726867676, - "learning_rate": 8.788341708542715e-05, - "loss": 6.0056, - "step": 12559 - }, - { - "epoch": 6.55019556714472, - "grad_norm": 1.4319469928741455, - "learning_rate": 8.788241206030151e-05, - "loss": 5.6361, - "step": 12560 - }, - { - "epoch": 6.550717079530639, - "grad_norm": 1.3612784147262573, - "learning_rate": 8.788140703517589e-05, - "loss": 5.847, - "step": 12561 - }, - { - "epoch": 6.551238591916558, - "grad_norm": 1.5109992027282715, - "learning_rate": 8.788040201005025e-05, - "loss": 5.6937, - "step": 12562 - }, - { - "epoch": 6.551760104302478, - "grad_norm": 1.5105756521224976, - "learning_rate": 8.787939698492462e-05, - "loss": 5.1252, - "step": 12563 - }, - { - "epoch": 6.552281616688396, - "grad_norm": 1.5078566074371338, - "learning_rate": 8.7878391959799e-05, - "loss": 5.7021, - "step": 12564 - }, - { - "epoch": 6.552803129074316, - "grad_norm": 1.373811960220337, - "learning_rate": 8.787738693467336e-05, - "loss": 5.5237, - "step": 12565 - }, - { - "epoch": 6.553324641460234, - "grad_norm": 1.5315057039260864, - "learning_rate": 8.787638190954774e-05, - "loss": 4.8549, - "step": 12566 - }, - { - "epoch": 6.553846153846154, - "grad_norm": 1.3391033411026, - "learning_rate": 8.787537688442212e-05, - "loss": 5.328, - "step": 12567 - }, - { - "epoch": 6.554367666232073, - "grad_norm": 1.4832059144973755, - "learning_rate": 8.78743718592965e-05, - "loss": 5.5747, - "step": 12568 - }, - { - "epoch": 6.554889178617993, - "grad_norm": 1.3858699798583984, - "learning_rate": 8.787336683417086e-05, - "loss": 5.9456, - "step": 12569 - }, - { - "epoch": 6.555410691003911, - "grad_norm": 1.2609801292419434, - "learning_rate": 8.787236180904524e-05, - "loss": 5.4284, - "step": 12570 - }, - { - "epoch": 6.555932203389831, - "grad_norm": 1.4727749824523926, - "learning_rate": 8.78713567839196e-05, - "loss": 5.7881, - "step": 12571 - }, - { - "epoch": 6.556453715775749, - "grad_norm": 1.4419755935668945, - "learning_rate": 8.787035175879398e-05, - "loss": 5.5763, - "step": 12572 - }, - { - "epoch": 6.556975228161669, - "grad_norm": 1.359143614768982, - "learning_rate": 8.786934673366834e-05, - "loss": 5.753, - "step": 12573 - }, - { - "epoch": 6.557496740547588, - "grad_norm": 1.4163727760314941, - "learning_rate": 8.786834170854272e-05, - "loss": 6.0008, - "step": 12574 - }, - { - "epoch": 6.558018252933508, - "grad_norm": 1.5440621376037598, - "learning_rate": 8.786733668341708e-05, - "loss": 5.464, - "step": 12575 - }, - { - "epoch": 6.558539765319426, - "grad_norm": 1.50942862033844, - "learning_rate": 8.786633165829146e-05, - "loss": 5.3041, - "step": 12576 - }, - { - "epoch": 6.559061277705346, - "grad_norm": 1.4390968084335327, - "learning_rate": 8.786532663316584e-05, - "loss": 5.7568, - "step": 12577 - }, - { - "epoch": 6.559582790091264, - "grad_norm": 1.6266926527023315, - "learning_rate": 8.78643216080402e-05, - "loss": 5.3408, - "step": 12578 - }, - { - "epoch": 6.560104302477184, - "grad_norm": 1.3825894594192505, - "learning_rate": 8.786331658291458e-05, - "loss": 5.6441, - "step": 12579 - }, - { - "epoch": 6.560625814863103, - "grad_norm": 1.4531939029693604, - "learning_rate": 8.786231155778895e-05, - "loss": 5.1761, - "step": 12580 - }, - { - "epoch": 6.561147327249023, - "grad_norm": 1.4138857126235962, - "learning_rate": 8.786130653266332e-05, - "loss": 5.2873, - "step": 12581 - }, - { - "epoch": 6.561668839634941, - "grad_norm": 1.2894039154052734, - "learning_rate": 8.786030150753769e-05, - "loss": 5.8891, - "step": 12582 - }, - { - "epoch": 6.562190352020861, - "grad_norm": 1.5632343292236328, - "learning_rate": 8.785929648241207e-05, - "loss": 5.3088, - "step": 12583 - }, - { - "epoch": 6.562711864406779, - "grad_norm": 1.4690945148468018, - "learning_rate": 8.785829145728643e-05, - "loss": 5.4137, - "step": 12584 - }, - { - "epoch": 6.563233376792699, - "grad_norm": 1.4702199697494507, - "learning_rate": 8.785728643216081e-05, - "loss": 5.2498, - "step": 12585 - }, - { - "epoch": 6.563754889178618, - "grad_norm": 1.4037480354309082, - "learning_rate": 8.785628140703517e-05, - "loss": 5.5399, - "step": 12586 - }, - { - "epoch": 6.564276401564538, - "grad_norm": 1.5832880735397339, - "learning_rate": 8.785527638190955e-05, - "loss": 5.3659, - "step": 12587 - }, - { - "epoch": 6.564797913950456, - "grad_norm": 1.5006401538848877, - "learning_rate": 8.785427135678393e-05, - "loss": 5.6755, - "step": 12588 - }, - { - "epoch": 6.565319426336376, - "grad_norm": 1.4322131872177124, - "learning_rate": 8.78532663316583e-05, - "loss": 5.7679, - "step": 12589 - }, - { - "epoch": 6.565840938722294, - "grad_norm": 1.420424222946167, - "learning_rate": 8.785226130653267e-05, - "loss": 5.5745, - "step": 12590 - }, - { - "epoch": 6.566362451108214, - "grad_norm": 1.459281086921692, - "learning_rate": 8.785125628140704e-05, - "loss": 5.7776, - "step": 12591 - }, - { - "epoch": 6.566883963494133, - "grad_norm": 1.4524139165878296, - "learning_rate": 8.785025125628141e-05, - "loss": 5.732, - "step": 12592 - }, - { - "epoch": 6.567405475880053, - "grad_norm": 1.4340914487838745, - "learning_rate": 8.784924623115578e-05, - "loss": 5.5924, - "step": 12593 - }, - { - "epoch": 6.567926988265971, - "grad_norm": 1.532705545425415, - "learning_rate": 8.784824120603016e-05, - "loss": 5.1333, - "step": 12594 - }, - { - "epoch": 6.568448500651891, - "grad_norm": 1.683125615119934, - "learning_rate": 8.784723618090452e-05, - "loss": 5.4298, - "step": 12595 - }, - { - "epoch": 6.568970013037809, - "grad_norm": 1.6345151662826538, - "learning_rate": 8.78462311557789e-05, - "loss": 5.5559, - "step": 12596 - }, - { - "epoch": 6.569491525423729, - "grad_norm": 1.471537709236145, - "learning_rate": 8.784522613065328e-05, - "loss": 5.5485, - "step": 12597 - }, - { - "epoch": 6.570013037809648, - "grad_norm": 1.5202357769012451, - "learning_rate": 8.784422110552765e-05, - "loss": 5.4631, - "step": 12598 - }, - { - "epoch": 6.570534550195567, - "grad_norm": 1.4303057193756104, - "learning_rate": 8.784321608040202e-05, - "loss": 5.65, - "step": 12599 - }, - { - "epoch": 6.571056062581486, - "grad_norm": 1.4635146856307983, - "learning_rate": 8.78422110552764e-05, - "loss": 5.6473, - "step": 12600 - }, - { - "epoch": 6.571577574967406, - "grad_norm": 1.347988486289978, - "learning_rate": 8.784120603015076e-05, - "loss": 5.7522, - "step": 12601 - }, - { - "epoch": 6.572099087353324, - "grad_norm": 1.3689754009246826, - "learning_rate": 8.784020100502512e-05, - "loss": 5.4076, - "step": 12602 - }, - { - "epoch": 6.572620599739244, - "grad_norm": 1.4595489501953125, - "learning_rate": 8.78391959798995e-05, - "loss": 4.8959, - "step": 12603 - }, - { - "epoch": 6.573142112125163, - "grad_norm": 1.5766183137893677, - "learning_rate": 8.783819095477387e-05, - "loss": 5.0713, - "step": 12604 - }, - { - "epoch": 6.573663624511082, - "grad_norm": 1.424189805984497, - "learning_rate": 8.783718592964824e-05, - "loss": 5.6537, - "step": 12605 - }, - { - "epoch": 6.574185136897001, - "grad_norm": 1.4660588502883911, - "learning_rate": 8.783618090452261e-05, - "loss": 5.6814, - "step": 12606 - }, - { - "epoch": 6.574706649282921, - "grad_norm": 1.4175626039505005, - "learning_rate": 8.783517587939699e-05, - "loss": 5.4575, - "step": 12607 - }, - { - "epoch": 6.575228161668839, - "grad_norm": 1.4180727005004883, - "learning_rate": 8.783417085427136e-05, - "loss": 5.694, - "step": 12608 - }, - { - "epoch": 6.575749674054759, - "grad_norm": 1.3509196043014526, - "learning_rate": 8.783316582914574e-05, - "loss": 5.9463, - "step": 12609 - }, - { - "epoch": 6.576271186440678, - "grad_norm": 1.5047180652618408, - "learning_rate": 8.78321608040201e-05, - "loss": 5.7021, - "step": 12610 - }, - { - "epoch": 6.576792698826597, - "grad_norm": 1.4767088890075684, - "learning_rate": 8.783115577889448e-05, - "loss": 5.4066, - "step": 12611 - }, - { - "epoch": 6.577314211212516, - "grad_norm": 1.562290906906128, - "learning_rate": 8.783015075376885e-05, - "loss": 5.211, - "step": 12612 - }, - { - "epoch": 6.577835723598436, - "grad_norm": 1.3040703535079956, - "learning_rate": 8.782914572864323e-05, - "loss": 5.796, - "step": 12613 - }, - { - "epoch": 6.578357235984354, - "grad_norm": 1.5241940021514893, - "learning_rate": 8.782814070351759e-05, - "loss": 5.2323, - "step": 12614 - }, - { - "epoch": 6.578878748370274, - "grad_norm": 1.440870761871338, - "learning_rate": 8.782713567839195e-05, - "loss": 5.4726, - "step": 12615 - }, - { - "epoch": 6.579400260756193, - "grad_norm": 1.5338983535766602, - "learning_rate": 8.782613065326633e-05, - "loss": 5.529, - "step": 12616 - }, - { - "epoch": 6.579921773142112, - "grad_norm": 1.4736131429672241, - "learning_rate": 8.782512562814071e-05, - "loss": 4.5135, - "step": 12617 - }, - { - "epoch": 6.580443285528031, - "grad_norm": 1.5682207345962524, - "learning_rate": 8.782412060301509e-05, - "loss": 5.0513, - "step": 12618 - }, - { - "epoch": 6.580964797913951, - "grad_norm": 1.7090058326721191, - "learning_rate": 8.782311557788945e-05, - "loss": 5.0464, - "step": 12619 - }, - { - "epoch": 6.581486310299869, - "grad_norm": 1.4685333967208862, - "learning_rate": 8.782211055276383e-05, - "loss": 5.3914, - "step": 12620 - }, - { - "epoch": 6.582007822685789, - "grad_norm": 1.3985859155654907, - "learning_rate": 8.78211055276382e-05, - "loss": 5.8129, - "step": 12621 - }, - { - "epoch": 6.582529335071708, - "grad_norm": 1.6404573917388916, - "learning_rate": 8.782010050251257e-05, - "loss": 5.424, - "step": 12622 - }, - { - "epoch": 6.583050847457627, - "grad_norm": 1.3094804286956787, - "learning_rate": 8.781909547738694e-05, - "loss": 5.7867, - "step": 12623 - }, - { - "epoch": 6.583572359843546, - "grad_norm": 1.485745906829834, - "learning_rate": 8.781809045226131e-05, - "loss": 5.875, - "step": 12624 - }, - { - "epoch": 6.584093872229466, - "grad_norm": 1.517445683479309, - "learning_rate": 8.781708542713568e-05, - "loss": 5.7562, - "step": 12625 - }, - { - "epoch": 6.584615384615384, - "grad_norm": 1.4216309785842896, - "learning_rate": 8.781608040201006e-05, - "loss": 5.4867, - "step": 12626 - }, - { - "epoch": 6.585136897001304, - "grad_norm": 1.3893240690231323, - "learning_rate": 8.781507537688442e-05, - "loss": 5.606, - "step": 12627 - }, - { - "epoch": 6.585658409387223, - "grad_norm": 1.4484920501708984, - "learning_rate": 8.78140703517588e-05, - "loss": 5.6204, - "step": 12628 - }, - { - "epoch": 6.586179921773142, - "grad_norm": 1.5166114568710327, - "learning_rate": 8.781306532663318e-05, - "loss": 5.7967, - "step": 12629 - }, - { - "epoch": 6.586701434159061, - "grad_norm": 1.3741916418075562, - "learning_rate": 8.781206030150754e-05, - "loss": 5.8036, - "step": 12630 - }, - { - "epoch": 6.587222946544981, - "grad_norm": 1.2892519235610962, - "learning_rate": 8.781105527638192e-05, - "loss": 6.0055, - "step": 12631 - }, - { - "epoch": 6.587744458930899, - "grad_norm": 1.5957785844802856, - "learning_rate": 8.781005025125628e-05, - "loss": 5.3736, - "step": 12632 - }, - { - "epoch": 6.588265971316819, - "grad_norm": 1.5308053493499756, - "learning_rate": 8.780904522613066e-05, - "loss": 5.2508, - "step": 12633 - }, - { - "epoch": 6.588787483702738, - "grad_norm": 1.4613311290740967, - "learning_rate": 8.780804020100502e-05, - "loss": 5.8208, - "step": 12634 - }, - { - "epoch": 6.589308996088657, - "grad_norm": 1.397038221359253, - "learning_rate": 8.78070351758794e-05, - "loss": 5.4577, - "step": 12635 - }, - { - "epoch": 6.589830508474576, - "grad_norm": 1.3209458589553833, - "learning_rate": 8.780603015075377e-05, - "loss": 5.9181, - "step": 12636 - }, - { - "epoch": 6.590352020860496, - "grad_norm": 1.50836181640625, - "learning_rate": 8.780502512562814e-05, - "loss": 5.425, - "step": 12637 - }, - { - "epoch": 6.590873533246414, - "grad_norm": 1.4640510082244873, - "learning_rate": 8.780402010050251e-05, - "loss": 5.7029, - "step": 12638 - }, - { - "epoch": 6.591395045632334, - "grad_norm": 1.4712388515472412, - "learning_rate": 8.780301507537689e-05, - "loss": 5.7552, - "step": 12639 - }, - { - "epoch": 6.591916558018253, - "grad_norm": 1.5009455680847168, - "learning_rate": 8.780201005025126e-05, - "loss": 5.4579, - "step": 12640 - }, - { - "epoch": 6.592438070404172, - "grad_norm": 1.5606987476348877, - "learning_rate": 8.780100502512564e-05, - "loss": 5.7232, - "step": 12641 - }, - { - "epoch": 6.592959582790091, - "grad_norm": 1.4215489625930786, - "learning_rate": 8.78e-05, - "loss": 5.6592, - "step": 12642 - }, - { - "epoch": 6.593481095176011, - "grad_norm": 1.6158361434936523, - "learning_rate": 8.779899497487437e-05, - "loss": 5.1185, - "step": 12643 - }, - { - "epoch": 6.594002607561929, - "grad_norm": 1.4108761548995972, - "learning_rate": 8.779798994974875e-05, - "loss": 5.3929, - "step": 12644 - }, - { - "epoch": 6.594524119947849, - "grad_norm": 1.5046839714050293, - "learning_rate": 8.779698492462311e-05, - "loss": 5.5334, - "step": 12645 - }, - { - "epoch": 6.595045632333768, - "grad_norm": 1.4471229314804077, - "learning_rate": 8.779597989949749e-05, - "loss": 5.7661, - "step": 12646 - }, - { - "epoch": 6.595567144719687, - "grad_norm": 1.4066576957702637, - "learning_rate": 8.779497487437185e-05, - "loss": 5.5804, - "step": 12647 - }, - { - "epoch": 6.596088657105606, - "grad_norm": 1.4027494192123413, - "learning_rate": 8.779396984924623e-05, - "loss": 4.9957, - "step": 12648 - }, - { - "epoch": 6.596610169491526, - "grad_norm": 1.4992258548736572, - "learning_rate": 8.779296482412061e-05, - "loss": 5.2928, - "step": 12649 - }, - { - "epoch": 6.597131681877444, - "grad_norm": 1.5492808818817139, - "learning_rate": 8.779195979899499e-05, - "loss": 5.2913, - "step": 12650 - }, - { - "epoch": 6.597653194263364, - "grad_norm": 1.4365154504776, - "learning_rate": 8.779095477386935e-05, - "loss": 5.6598, - "step": 12651 - }, - { - "epoch": 6.598174706649283, - "grad_norm": 1.3569459915161133, - "learning_rate": 8.778994974874373e-05, - "loss": 5.8414, - "step": 12652 - }, - { - "epoch": 6.598696219035202, - "grad_norm": 1.3475803136825562, - "learning_rate": 8.77889447236181e-05, - "loss": 5.5124, - "step": 12653 - }, - { - "epoch": 6.599217731421121, - "grad_norm": 1.4482766389846802, - "learning_rate": 8.778793969849247e-05, - "loss": 5.0951, - "step": 12654 - }, - { - "epoch": 6.599739243807041, - "grad_norm": 1.4267802238464355, - "learning_rate": 8.778693467336684e-05, - "loss": 5.8302, - "step": 12655 - }, - { - "epoch": 6.600260756192959, - "grad_norm": 1.3828619718551636, - "learning_rate": 8.77859296482412e-05, - "loss": 5.7241, - "step": 12656 - }, - { - "epoch": 6.600782268578879, - "grad_norm": 1.3185272216796875, - "learning_rate": 8.778492462311558e-05, - "loss": 5.7261, - "step": 12657 - }, - { - "epoch": 6.601303780964798, - "grad_norm": 1.3820860385894775, - "learning_rate": 8.778391959798994e-05, - "loss": 5.8836, - "step": 12658 - }, - { - "epoch": 6.601825293350717, - "grad_norm": 1.5078847408294678, - "learning_rate": 8.778291457286432e-05, - "loss": 5.8358, - "step": 12659 - }, - { - "epoch": 6.602346805736636, - "grad_norm": 1.4330273866653442, - "learning_rate": 8.77819095477387e-05, - "loss": 5.6504, - "step": 12660 - }, - { - "epoch": 6.602868318122555, - "grad_norm": 1.4467077255249023, - "learning_rate": 8.778090452261308e-05, - "loss": 5.7501, - "step": 12661 - }, - { - "epoch": 6.603389830508474, - "grad_norm": 1.5018937587738037, - "learning_rate": 8.777989949748744e-05, - "loss": 5.6609, - "step": 12662 - }, - { - "epoch": 6.603911342894394, - "grad_norm": 2.646347999572754, - "learning_rate": 8.777889447236182e-05, - "loss": 5.5618, - "step": 12663 - }, - { - "epoch": 6.604432855280313, - "grad_norm": 1.4570772647857666, - "learning_rate": 8.777788944723618e-05, - "loss": 5.7391, - "step": 12664 - }, - { - "epoch": 6.604954367666232, - "grad_norm": 1.4348684549331665, - "learning_rate": 8.777688442211056e-05, - "loss": 5.9916, - "step": 12665 - }, - { - "epoch": 6.605475880052151, - "grad_norm": 2.033144235610962, - "learning_rate": 8.777587939698493e-05, - "loss": 4.8194, - "step": 12666 - }, - { - "epoch": 6.60599739243807, - "grad_norm": 1.4769818782806396, - "learning_rate": 8.77748743718593e-05, - "loss": 5.572, - "step": 12667 - }, - { - "epoch": 6.606518904823989, - "grad_norm": 1.595062017440796, - "learning_rate": 8.777386934673367e-05, - "loss": 5.3813, - "step": 12668 - }, - { - "epoch": 6.607040417209909, - "grad_norm": 1.3825657367706299, - "learning_rate": 8.777286432160805e-05, - "loss": 5.9431, - "step": 12669 - }, - { - "epoch": 6.607561929595828, - "grad_norm": 1.4021836519241333, - "learning_rate": 8.777185929648242e-05, - "loss": 5.6312, - "step": 12670 - }, - { - "epoch": 6.608083441981747, - "grad_norm": 1.646450161933899, - "learning_rate": 8.777085427135679e-05, - "loss": 5.071, - "step": 12671 - }, - { - "epoch": 6.608604954367666, - "grad_norm": 1.6757258176803589, - "learning_rate": 8.776984924623117e-05, - "loss": 5.3128, - "step": 12672 - }, - { - "epoch": 6.609126466753585, - "grad_norm": 1.4847173690795898, - "learning_rate": 8.776884422110553e-05, - "loss": 5.8981, - "step": 12673 - }, - { - "epoch": 6.609647979139504, - "grad_norm": 1.5196614265441895, - "learning_rate": 8.776783919597991e-05, - "loss": 5.683, - "step": 12674 - }, - { - "epoch": 6.610169491525424, - "grad_norm": 1.5948742628097534, - "learning_rate": 8.776683417085427e-05, - "loss": 5.4589, - "step": 12675 - }, - { - "epoch": 6.610691003911343, - "grad_norm": 1.5615875720977783, - "learning_rate": 8.776582914572865e-05, - "loss": 5.8068, - "step": 12676 - }, - { - "epoch": 6.611212516297262, - "grad_norm": 1.4867985248565674, - "learning_rate": 8.776482412060301e-05, - "loss": 5.5718, - "step": 12677 - }, - { - "epoch": 6.611734028683181, - "grad_norm": 1.6656172275543213, - "learning_rate": 8.776381909547739e-05, - "loss": 5.5089, - "step": 12678 - }, - { - "epoch": 6.6122555410691, - "grad_norm": 1.5028811693191528, - "learning_rate": 8.776281407035176e-05, - "loss": 5.55, - "step": 12679 - }, - { - "epoch": 6.612777053455019, - "grad_norm": 1.4071787595748901, - "learning_rate": 8.776180904522613e-05, - "loss": 5.9175, - "step": 12680 - }, - { - "epoch": 6.613298565840939, - "grad_norm": 1.4265327453613281, - "learning_rate": 8.776080402010051e-05, - "loss": 5.2983, - "step": 12681 - }, - { - "epoch": 6.613820078226858, - "grad_norm": 1.4804184436798096, - "learning_rate": 8.775979899497488e-05, - "loss": 5.441, - "step": 12682 - }, - { - "epoch": 6.614341590612777, - "grad_norm": 1.4344085454940796, - "learning_rate": 8.775879396984925e-05, - "loss": 5.4587, - "step": 12683 - }, - { - "epoch": 6.614863102998696, - "grad_norm": 1.426405906677246, - "learning_rate": 8.775778894472362e-05, - "loss": 5.029, - "step": 12684 - }, - { - "epoch": 6.615384615384615, - "grad_norm": 1.4278090000152588, - "learning_rate": 8.7756783919598e-05, - "loss": 5.7661, - "step": 12685 - }, - { - "epoch": 6.615906127770534, - "grad_norm": 1.3954023122787476, - "learning_rate": 8.775577889447236e-05, - "loss": 5.6818, - "step": 12686 - }, - { - "epoch": 6.616427640156454, - "grad_norm": 1.458397388458252, - "learning_rate": 8.775477386934674e-05, - "loss": 5.6246, - "step": 12687 - }, - { - "epoch": 6.616949152542373, - "grad_norm": 1.4321602582931519, - "learning_rate": 8.77537688442211e-05, - "loss": 5.6714, - "step": 12688 - }, - { - "epoch": 6.617470664928292, - "grad_norm": 1.4184343814849854, - "learning_rate": 8.775276381909548e-05, - "loss": 5.7405, - "step": 12689 - }, - { - "epoch": 6.617992177314211, - "grad_norm": 1.5464221239089966, - "learning_rate": 8.775175879396986e-05, - "loss": 4.666, - "step": 12690 - }, - { - "epoch": 6.61851368970013, - "grad_norm": 1.6467781066894531, - "learning_rate": 8.775075376884424e-05, - "loss": 5.4469, - "step": 12691 - }, - { - "epoch": 6.6190352020860495, - "grad_norm": 1.391797423362732, - "learning_rate": 8.77497487437186e-05, - "loss": 5.8458, - "step": 12692 - }, - { - "epoch": 6.619556714471969, - "grad_norm": 1.3713209629058838, - "learning_rate": 8.774874371859298e-05, - "loss": 5.9564, - "step": 12693 - }, - { - "epoch": 6.6200782268578875, - "grad_norm": 1.374988079071045, - "learning_rate": 8.774773869346734e-05, - "loss": 5.0061, - "step": 12694 - }, - { - "epoch": 6.620599739243807, - "grad_norm": 1.3422836065292358, - "learning_rate": 8.77467336683417e-05, - "loss": 5.7642, - "step": 12695 - }, - { - "epoch": 6.621121251629726, - "grad_norm": 1.5050263404846191, - "learning_rate": 8.774572864321608e-05, - "loss": 5.3079, - "step": 12696 - }, - { - "epoch": 6.621642764015645, - "grad_norm": 1.3768254518508911, - "learning_rate": 8.774472361809045e-05, - "loss": 5.3917, - "step": 12697 - }, - { - "epoch": 6.6221642764015645, - "grad_norm": 1.417422890663147, - "learning_rate": 8.774371859296483e-05, - "loss": 5.8489, - "step": 12698 - }, - { - "epoch": 6.622685788787484, - "grad_norm": 1.5776814222335815, - "learning_rate": 8.774271356783919e-05, - "loss": 5.159, - "step": 12699 - }, - { - "epoch": 6.6232073011734025, - "grad_norm": 1.6243717670440674, - "learning_rate": 8.774170854271357e-05, - "loss": 5.5926, - "step": 12700 - }, - { - "epoch": 6.623728813559322, - "grad_norm": 1.4073995351791382, - "learning_rate": 8.774070351758795e-05, - "loss": 5.734, - "step": 12701 - }, - { - "epoch": 6.624250325945241, - "grad_norm": 1.3392020463943481, - "learning_rate": 8.773969849246232e-05, - "loss": 5.5972, - "step": 12702 - }, - { - "epoch": 6.62477183833116, - "grad_norm": 1.5713303089141846, - "learning_rate": 8.773869346733669e-05, - "loss": 5.7704, - "step": 12703 - }, - { - "epoch": 6.6252933507170795, - "grad_norm": 1.379541039466858, - "learning_rate": 8.773768844221107e-05, - "loss": 5.631, - "step": 12704 - }, - { - "epoch": 6.625814863102999, - "grad_norm": 1.4228026866912842, - "learning_rate": 8.773668341708543e-05, - "loss": 5.6018, - "step": 12705 - }, - { - "epoch": 6.6263363754889175, - "grad_norm": 1.4532055854797363, - "learning_rate": 8.773567839195981e-05, - "loss": 5.7198, - "step": 12706 - }, - { - "epoch": 6.626857887874837, - "grad_norm": 1.4348878860473633, - "learning_rate": 8.773467336683417e-05, - "loss": 5.4645, - "step": 12707 - }, - { - "epoch": 6.6273794002607564, - "grad_norm": 1.5477967262268066, - "learning_rate": 8.773366834170854e-05, - "loss": 5.3382, - "step": 12708 - }, - { - "epoch": 6.627900912646675, - "grad_norm": 1.6413317918777466, - "learning_rate": 8.773266331658291e-05, - "loss": 5.231, - "step": 12709 - }, - { - "epoch": 6.6284224250325945, - "grad_norm": 1.4687913656234741, - "learning_rate": 8.773165829145729e-05, - "loss": 5.2688, - "step": 12710 - }, - { - "epoch": 6.628943937418514, - "grad_norm": 1.382320761680603, - "learning_rate": 8.773065326633167e-05, - "loss": 5.2741, - "step": 12711 - }, - { - "epoch": 6.6294654498044325, - "grad_norm": 1.3559846878051758, - "learning_rate": 8.772964824120603e-05, - "loss": 5.904, - "step": 12712 - }, - { - "epoch": 6.629986962190352, - "grad_norm": 1.300837516784668, - "learning_rate": 8.772864321608041e-05, - "loss": 6.0428, - "step": 12713 - }, - { - "epoch": 6.6305084745762715, - "grad_norm": 1.3721174001693726, - "learning_rate": 8.772763819095478e-05, - "loss": 5.5082, - "step": 12714 - }, - { - "epoch": 6.63102998696219, - "grad_norm": 1.4383790493011475, - "learning_rate": 8.772663316582915e-05, - "loss": 5.3882, - "step": 12715 - }, - { - "epoch": 6.6315514993481095, - "grad_norm": 1.5193337202072144, - "learning_rate": 8.772562814070352e-05, - "loss": 5.4044, - "step": 12716 - }, - { - "epoch": 6.632073011734029, - "grad_norm": 1.5063661336898804, - "learning_rate": 8.77246231155779e-05, - "loss": 5.5102, - "step": 12717 - }, - { - "epoch": 6.6325945241199475, - "grad_norm": 1.6173903942108154, - "learning_rate": 8.772361809045226e-05, - "loss": 5.1089, - "step": 12718 - }, - { - "epoch": 6.633116036505867, - "grad_norm": 1.4732931852340698, - "learning_rate": 8.772261306532664e-05, - "loss": 5.6493, - "step": 12719 - }, - { - "epoch": 6.6336375488917865, - "grad_norm": 1.3904318809509277, - "learning_rate": 8.7721608040201e-05, - "loss": 5.7361, - "step": 12720 - }, - { - "epoch": 6.634159061277705, - "grad_norm": 1.4752448797225952, - "learning_rate": 8.772060301507538e-05, - "loss": 5.3505, - "step": 12721 - }, - { - "epoch": 6.6346805736636245, - "grad_norm": 1.310181736946106, - "learning_rate": 8.771959798994976e-05, - "loss": 5.1371, - "step": 12722 - }, - { - "epoch": 6.635202086049544, - "grad_norm": 1.5187984704971313, - "learning_rate": 8.771859296482412e-05, - "loss": 5.3394, - "step": 12723 - }, - { - "epoch": 6.6357235984354626, - "grad_norm": 1.3388502597808838, - "learning_rate": 8.77175879396985e-05, - "loss": 5.5252, - "step": 12724 - }, - { - "epoch": 6.636245110821382, - "grad_norm": 1.4206173419952393, - "learning_rate": 8.771658291457286e-05, - "loss": 5.7729, - "step": 12725 - }, - { - "epoch": 6.6367666232073015, - "grad_norm": 1.3759329319000244, - "learning_rate": 8.771557788944724e-05, - "loss": 5.842, - "step": 12726 - }, - { - "epoch": 6.63728813559322, - "grad_norm": 1.243151068687439, - "learning_rate": 8.771457286432161e-05, - "loss": 6.1274, - "step": 12727 - }, - { - "epoch": 6.6378096479791395, - "grad_norm": 1.3851327896118164, - "learning_rate": 8.771356783919598e-05, - "loss": 5.8062, - "step": 12728 - }, - { - "epoch": 6.638331160365059, - "grad_norm": 1.492940068244934, - "learning_rate": 8.771256281407035e-05, - "loss": 5.3939, - "step": 12729 - }, - { - "epoch": 6.638852672750978, - "grad_norm": 1.4239271879196167, - "learning_rate": 8.771155778894473e-05, - "loss": 5.9161, - "step": 12730 - }, - { - "epoch": 6.639374185136897, - "grad_norm": 1.495632529258728, - "learning_rate": 8.77105527638191e-05, - "loss": 5.8951, - "step": 12731 - }, - { - "epoch": 6.6398956975228165, - "grad_norm": 1.5401967763900757, - "learning_rate": 8.770954773869348e-05, - "loss": 5.4034, - "step": 12732 - }, - { - "epoch": 6.640417209908735, - "grad_norm": 1.3943099975585938, - "learning_rate": 8.770854271356785e-05, - "loss": 5.4726, - "step": 12733 - }, - { - "epoch": 6.6409387222946545, - "grad_norm": 1.4532575607299805, - "learning_rate": 8.770753768844222e-05, - "loss": 5.7614, - "step": 12734 - }, - { - "epoch": 6.641460234680574, - "grad_norm": 1.4067597389221191, - "learning_rate": 8.770653266331659e-05, - "loss": 5.3414, - "step": 12735 - }, - { - "epoch": 6.641981747066493, - "grad_norm": 1.455438256263733, - "learning_rate": 8.770552763819095e-05, - "loss": 5.7926, - "step": 12736 - }, - { - "epoch": 6.642503259452412, - "grad_norm": 1.365914225578308, - "learning_rate": 8.770452261306533e-05, - "loss": 5.5659, - "step": 12737 - }, - { - "epoch": 6.6430247718383315, - "grad_norm": 1.4311046600341797, - "learning_rate": 8.77035175879397e-05, - "loss": 5.617, - "step": 12738 - }, - { - "epoch": 6.64354628422425, - "grad_norm": 1.3220605850219727, - "learning_rate": 8.770251256281407e-05, - "loss": 5.7599, - "step": 12739 - }, - { - "epoch": 6.6440677966101696, - "grad_norm": 1.304575800895691, - "learning_rate": 8.770150753768844e-05, - "loss": 5.8248, - "step": 12740 - }, - { - "epoch": 6.644589308996089, - "grad_norm": 1.4549000263214111, - "learning_rate": 8.770050251256282e-05, - "loss": 5.7301, - "step": 12741 - }, - { - "epoch": 6.645110821382008, - "grad_norm": 1.3772786855697632, - "learning_rate": 8.769949748743719e-05, - "loss": 5.3651, - "step": 12742 - }, - { - "epoch": 6.645632333767927, - "grad_norm": 1.3597321510314941, - "learning_rate": 8.769849246231157e-05, - "loss": 5.7362, - "step": 12743 - }, - { - "epoch": 6.6461538461538465, - "grad_norm": 1.366639256477356, - "learning_rate": 8.769748743718593e-05, - "loss": 5.8753, - "step": 12744 - }, - { - "epoch": 6.646675358539765, - "grad_norm": 1.414522409439087, - "learning_rate": 8.769648241206031e-05, - "loss": 4.8106, - "step": 12745 - }, - { - "epoch": 6.647196870925685, - "grad_norm": 1.3914718627929688, - "learning_rate": 8.769547738693468e-05, - "loss": 5.7686, - "step": 12746 - }, - { - "epoch": 6.647718383311604, - "grad_norm": 1.4633933305740356, - "learning_rate": 8.769447236180905e-05, - "loss": 5.9685, - "step": 12747 - }, - { - "epoch": 6.648239895697523, - "grad_norm": 1.4253698587417603, - "learning_rate": 8.769346733668342e-05, - "loss": 5.2858, - "step": 12748 - }, - { - "epoch": 6.648761408083442, - "grad_norm": 1.4143096208572388, - "learning_rate": 8.769246231155778e-05, - "loss": 5.5635, - "step": 12749 - }, - { - "epoch": 6.6492829204693615, - "grad_norm": 1.2959723472595215, - "learning_rate": 8.769145728643216e-05, - "loss": 6.0348, - "step": 12750 - }, - { - "epoch": 6.64980443285528, - "grad_norm": 1.3806322813034058, - "learning_rate": 8.769045226130654e-05, - "loss": 5.7625, - "step": 12751 - }, - { - "epoch": 6.6503259452412, - "grad_norm": 1.4848254919052124, - "learning_rate": 8.768944723618092e-05, - "loss": 5.3936, - "step": 12752 - }, - { - "epoch": 6.650847457627119, - "grad_norm": 1.328060507774353, - "learning_rate": 8.768844221105528e-05, - "loss": 5.7045, - "step": 12753 - }, - { - "epoch": 6.651368970013038, - "grad_norm": 1.4735866785049438, - "learning_rate": 8.768743718592966e-05, - "loss": 5.3884, - "step": 12754 - }, - { - "epoch": 6.651890482398957, - "grad_norm": 1.5322743654251099, - "learning_rate": 8.768643216080402e-05, - "loss": 5.3671, - "step": 12755 - }, - { - "epoch": 6.652411994784876, - "grad_norm": 1.2741072177886963, - "learning_rate": 8.76854271356784e-05, - "loss": 5.7149, - "step": 12756 - }, - { - "epoch": 6.652933507170795, - "grad_norm": 1.458165168762207, - "learning_rate": 8.768442211055277e-05, - "loss": 5.5331, - "step": 12757 - }, - { - "epoch": 6.653455019556715, - "grad_norm": 1.3718332052230835, - "learning_rate": 8.768341708542714e-05, - "loss": 5.9309, - "step": 12758 - }, - { - "epoch": 6.653976531942634, - "grad_norm": 1.5553209781646729, - "learning_rate": 8.768241206030151e-05, - "loss": 4.8039, - "step": 12759 - }, - { - "epoch": 6.654498044328553, - "grad_norm": 1.4828732013702393, - "learning_rate": 8.768140703517589e-05, - "loss": 5.8454, - "step": 12760 - }, - { - "epoch": 6.655019556714472, - "grad_norm": 1.466138243675232, - "learning_rate": 8.768040201005025e-05, - "loss": 5.7301, - "step": 12761 - }, - { - "epoch": 6.655541069100391, - "grad_norm": 1.4061579704284668, - "learning_rate": 8.767939698492463e-05, - "loss": 5.5757, - "step": 12762 - }, - { - "epoch": 6.65606258148631, - "grad_norm": 1.4469388723373413, - "learning_rate": 8.7678391959799e-05, - "loss": 5.4785, - "step": 12763 - }, - { - "epoch": 6.65658409387223, - "grad_norm": 1.3981482982635498, - "learning_rate": 8.767738693467337e-05, - "loss": 5.6365, - "step": 12764 - }, - { - "epoch": 6.657105606258149, - "grad_norm": 1.6990699768066406, - "learning_rate": 8.767638190954775e-05, - "loss": 5.5737, - "step": 12765 - }, - { - "epoch": 6.657627118644068, - "grad_norm": 1.548140287399292, - "learning_rate": 8.767537688442211e-05, - "loss": 5.7171, - "step": 12766 - }, - { - "epoch": 6.658148631029987, - "grad_norm": 1.5899220705032349, - "learning_rate": 8.767437185929649e-05, - "loss": 5.3846, - "step": 12767 - }, - { - "epoch": 6.658670143415906, - "grad_norm": 1.3387292623519897, - "learning_rate": 8.767336683417085e-05, - "loss": 6.0568, - "step": 12768 - }, - { - "epoch": 6.659191655801825, - "grad_norm": 1.3478761911392212, - "learning_rate": 8.767236180904523e-05, - "loss": 5.7805, - "step": 12769 - }, - { - "epoch": 6.659713168187745, - "grad_norm": 1.42526113986969, - "learning_rate": 8.76713567839196e-05, - "loss": 5.8357, - "step": 12770 - }, - { - "epoch": 6.660234680573664, - "grad_norm": 1.4818952083587646, - "learning_rate": 8.767035175879397e-05, - "loss": 5.8157, - "step": 12771 - }, - { - "epoch": 6.660756192959583, - "grad_norm": 1.4578192234039307, - "learning_rate": 8.766934673366835e-05, - "loss": 5.2855, - "step": 12772 - }, - { - "epoch": 6.661277705345502, - "grad_norm": 1.4734182357788086, - "learning_rate": 8.766834170854273e-05, - "loss": 5.6158, - "step": 12773 - }, - { - "epoch": 6.661799217731421, - "grad_norm": 1.6257375478744507, - "learning_rate": 8.76673366834171e-05, - "loss": 5.6439, - "step": 12774 - }, - { - "epoch": 6.66232073011734, - "grad_norm": 1.4205780029296875, - "learning_rate": 8.766633165829146e-05, - "loss": 5.2789, - "step": 12775 - }, - { - "epoch": 6.66284224250326, - "grad_norm": 1.5704612731933594, - "learning_rate": 8.766532663316584e-05, - "loss": 5.4904, - "step": 12776 - }, - { - "epoch": 6.663363754889179, - "grad_norm": 1.4090694189071655, - "learning_rate": 8.76643216080402e-05, - "loss": 5.3308, - "step": 12777 - }, - { - "epoch": 6.663885267275098, - "grad_norm": 1.536839485168457, - "learning_rate": 8.766331658291458e-05, - "loss": 5.5016, - "step": 12778 - }, - { - "epoch": 6.664406779661017, - "grad_norm": 1.3951611518859863, - "learning_rate": 8.766231155778894e-05, - "loss": 5.5258, - "step": 12779 - }, - { - "epoch": 6.664928292046936, - "grad_norm": 1.523594617843628, - "learning_rate": 8.766130653266332e-05, - "loss": 5.5442, - "step": 12780 - }, - { - "epoch": 6.665449804432855, - "grad_norm": 1.4146018028259277, - "learning_rate": 8.766030150753768e-05, - "loss": 5.62, - "step": 12781 - }, - { - "epoch": 6.665971316818775, - "grad_norm": 1.394034743309021, - "learning_rate": 8.765929648241206e-05, - "loss": 5.6149, - "step": 12782 - }, - { - "epoch": 6.666492829204694, - "grad_norm": 1.3811874389648438, - "learning_rate": 8.765829145728644e-05, - "loss": 5.7538, - "step": 12783 - }, - { - "epoch": 6.667014341590613, - "grad_norm": 1.4417401552200317, - "learning_rate": 8.765728643216082e-05, - "loss": 5.6746, - "step": 12784 - }, - { - "epoch": 6.667535853976532, - "grad_norm": 1.4531605243682861, - "learning_rate": 8.765628140703518e-05, - "loss": 5.2057, - "step": 12785 - }, - { - "epoch": 6.668057366362451, - "grad_norm": 1.3937252759933472, - "learning_rate": 8.765527638190956e-05, - "loss": 5.8601, - "step": 12786 - }, - { - "epoch": 6.66857887874837, - "grad_norm": 1.3796398639678955, - "learning_rate": 8.765427135678392e-05, - "loss": 5.9233, - "step": 12787 - }, - { - "epoch": 6.66910039113429, - "grad_norm": 1.3045505285263062, - "learning_rate": 8.765326633165829e-05, - "loss": 5.0307, - "step": 12788 - }, - { - "epoch": 6.669621903520208, - "grad_norm": 1.3593705892562866, - "learning_rate": 8.765226130653267e-05, - "loss": 5.9502, - "step": 12789 - }, - { - "epoch": 6.670143415906128, - "grad_norm": 1.4448844194412231, - "learning_rate": 8.765125628140703e-05, - "loss": 5.7594, - "step": 12790 - }, - { - "epoch": 6.670664928292047, - "grad_norm": 1.425901174545288, - "learning_rate": 8.765025125628141e-05, - "loss": 6.0024, - "step": 12791 - }, - { - "epoch": 6.671186440677966, - "grad_norm": 1.4235531091690063, - "learning_rate": 8.764924623115579e-05, - "loss": 5.7386, - "step": 12792 - }, - { - "epoch": 6.671707953063885, - "grad_norm": 1.4574190378189087, - "learning_rate": 8.764824120603016e-05, - "loss": 4.8158, - "step": 12793 - }, - { - "epoch": 6.672229465449805, - "grad_norm": 1.4605151414871216, - "learning_rate": 8.764723618090453e-05, - "loss": 5.8209, - "step": 12794 - }, - { - "epoch": 6.672750977835723, - "grad_norm": 1.4213238954544067, - "learning_rate": 8.76462311557789e-05, - "loss": 5.5796, - "step": 12795 - }, - { - "epoch": 6.673272490221643, - "grad_norm": 1.410429835319519, - "learning_rate": 8.764522613065327e-05, - "loss": 5.4916, - "step": 12796 - }, - { - "epoch": 6.673794002607562, - "grad_norm": 1.3719998598098755, - "learning_rate": 8.764422110552765e-05, - "loss": 5.4208, - "step": 12797 - }, - { - "epoch": 6.674315514993481, - "grad_norm": 1.3995013236999512, - "learning_rate": 8.764321608040201e-05, - "loss": 5.2556, - "step": 12798 - }, - { - "epoch": 6.6748370273794, - "grad_norm": 1.488429307937622, - "learning_rate": 8.764221105527639e-05, - "loss": 5.428, - "step": 12799 - }, - { - "epoch": 6.67535853976532, - "grad_norm": 1.4296579360961914, - "learning_rate": 8.764120603015075e-05, - "loss": 6.0151, - "step": 12800 - }, - { - "epoch": 6.675880052151238, - "grad_norm": 1.5000637769699097, - "learning_rate": 8.764020100502512e-05, - "loss": 5.6728, - "step": 12801 - }, - { - "epoch": 6.676401564537158, - "grad_norm": 1.3603116273880005, - "learning_rate": 8.76391959798995e-05, - "loss": 5.7496, - "step": 12802 - }, - { - "epoch": 6.676923076923077, - "grad_norm": 1.472214698791504, - "learning_rate": 8.763819095477387e-05, - "loss": 5.4024, - "step": 12803 - }, - { - "epoch": 6.677444589308996, - "grad_norm": 1.5616724491119385, - "learning_rate": 8.763718592964825e-05, - "loss": 5.3116, - "step": 12804 - }, - { - "epoch": 6.677966101694915, - "grad_norm": 1.351335048675537, - "learning_rate": 8.763618090452262e-05, - "loss": 5.6412, - "step": 12805 - }, - { - "epoch": 6.678487614080835, - "grad_norm": 1.3945475816726685, - "learning_rate": 8.7635175879397e-05, - "loss": 5.1248, - "step": 12806 - }, - { - "epoch": 6.679009126466753, - "grad_norm": 1.5649261474609375, - "learning_rate": 8.763417085427136e-05, - "loss": 4.9444, - "step": 12807 - }, - { - "epoch": 6.679530638852673, - "grad_norm": 1.782228946685791, - "learning_rate": 8.763316582914574e-05, - "loss": 4.9237, - "step": 12808 - }, - { - "epoch": 6.680052151238592, - "grad_norm": 1.4529575109481812, - "learning_rate": 8.76321608040201e-05, - "loss": 5.2834, - "step": 12809 - }, - { - "epoch": 6.680573663624511, - "grad_norm": 1.6766923666000366, - "learning_rate": 8.763115577889448e-05, - "loss": 5.5437, - "step": 12810 - }, - { - "epoch": 6.68109517601043, - "grad_norm": 1.367929458618164, - "learning_rate": 8.763015075376884e-05, - "loss": 5.4899, - "step": 12811 - }, - { - "epoch": 6.68161668839635, - "grad_norm": 1.438033103942871, - "learning_rate": 8.762914572864322e-05, - "loss": 5.3234, - "step": 12812 - }, - { - "epoch": 6.682138200782268, - "grad_norm": 1.445300579071045, - "learning_rate": 8.762814070351758e-05, - "loss": 5.319, - "step": 12813 - }, - { - "epoch": 6.682659713168188, - "grad_norm": 1.4838016033172607, - "learning_rate": 8.762713567839196e-05, - "loss": 5.4681, - "step": 12814 - }, - { - "epoch": 6.683181225554107, - "grad_norm": 1.3934568166732788, - "learning_rate": 8.762613065326634e-05, - "loss": 6.2128, - "step": 12815 - }, - { - "epoch": 6.683702737940026, - "grad_norm": 1.3830921649932861, - "learning_rate": 8.76251256281407e-05, - "loss": 5.6773, - "step": 12816 - }, - { - "epoch": 6.684224250325945, - "grad_norm": 1.567723035812378, - "learning_rate": 8.762412060301508e-05, - "loss": 5.415, - "step": 12817 - }, - { - "epoch": 6.684745762711865, - "grad_norm": 1.479431390762329, - "learning_rate": 8.762311557788945e-05, - "loss": 5.8143, - "step": 12818 - }, - { - "epoch": 6.685267275097783, - "grad_norm": 1.4677802324295044, - "learning_rate": 8.762211055276382e-05, - "loss": 5.733, - "step": 12819 - }, - { - "epoch": 6.685788787483703, - "grad_norm": 1.3229190111160278, - "learning_rate": 8.762110552763819e-05, - "loss": 5.4112, - "step": 12820 - }, - { - "epoch": 6.686310299869622, - "grad_norm": 1.5249003171920776, - "learning_rate": 8.762010050251257e-05, - "loss": 5.551, - "step": 12821 - }, - { - "epoch": 6.686831812255541, - "grad_norm": 1.4870644807815552, - "learning_rate": 8.761909547738693e-05, - "loss": 5.5876, - "step": 12822 - }, - { - "epoch": 6.68735332464146, - "grad_norm": 1.4516531229019165, - "learning_rate": 8.761809045226131e-05, - "loss": 5.3875, - "step": 12823 - }, - { - "epoch": 6.68787483702738, - "grad_norm": 1.5230038166046143, - "learning_rate": 8.761708542713569e-05, - "loss": 5.5145, - "step": 12824 - }, - { - "epoch": 6.688396349413298, - "grad_norm": 1.3259910345077515, - "learning_rate": 8.761608040201006e-05, - "loss": 6.002, - "step": 12825 - }, - { - "epoch": 6.688917861799218, - "grad_norm": 1.4712027311325073, - "learning_rate": 8.761507537688443e-05, - "loss": 5.5068, - "step": 12826 - }, - { - "epoch": 6.689439374185137, - "grad_norm": 1.4919196367263794, - "learning_rate": 8.761407035175881e-05, - "loss": 5.0339, - "step": 12827 - }, - { - "epoch": 6.689960886571056, - "grad_norm": 1.5420600175857544, - "learning_rate": 8.761306532663317e-05, - "loss": 5.6854, - "step": 12828 - }, - { - "epoch": 6.690482398956975, - "grad_norm": 1.422290325164795, - "learning_rate": 8.761206030150754e-05, - "loss": 5.6722, - "step": 12829 - }, - { - "epoch": 6.691003911342895, - "grad_norm": 1.423728346824646, - "learning_rate": 8.761105527638191e-05, - "loss": 5.6957, - "step": 12830 - }, - { - "epoch": 6.691525423728813, - "grad_norm": 1.457627296447754, - "learning_rate": 8.761005025125628e-05, - "loss": 5.9858, - "step": 12831 - }, - { - "epoch": 6.692046936114733, - "grad_norm": 1.4462013244628906, - "learning_rate": 8.760904522613066e-05, - "loss": 5.7718, - "step": 12832 - }, - { - "epoch": 6.692568448500652, - "grad_norm": 1.5031582117080688, - "learning_rate": 8.760804020100502e-05, - "loss": 5.622, - "step": 12833 - }, - { - "epoch": 6.693089960886571, - "grad_norm": 1.4214770793914795, - "learning_rate": 8.76070351758794e-05, - "loss": 5.9267, - "step": 12834 - }, - { - "epoch": 6.69361147327249, - "grad_norm": 1.6774251461029053, - "learning_rate": 8.760603015075378e-05, - "loss": 5.2405, - "step": 12835 - }, - { - "epoch": 6.69413298565841, - "grad_norm": 1.5648654699325562, - "learning_rate": 8.760502512562815e-05, - "loss": 4.3913, - "step": 12836 - }, - { - "epoch": 6.694654498044328, - "grad_norm": 1.455964207649231, - "learning_rate": 8.760402010050252e-05, - "loss": 5.5786, - "step": 12837 - }, - { - "epoch": 6.695176010430248, - "grad_norm": 1.5922733545303345, - "learning_rate": 8.76030150753769e-05, - "loss": 5.6251, - "step": 12838 - }, - { - "epoch": 6.695697522816167, - "grad_norm": 1.4571183919906616, - "learning_rate": 8.760201005025126e-05, - "loss": 5.5927, - "step": 12839 - }, - { - "epoch": 6.696219035202086, - "grad_norm": 1.420799970626831, - "learning_rate": 8.760100502512564e-05, - "loss": 5.5727, - "step": 12840 - }, - { - "epoch": 6.696740547588005, - "grad_norm": 1.4972995519638062, - "learning_rate": 8.76e-05, - "loss": 5.7641, - "step": 12841 - }, - { - "epoch": 6.697262059973925, - "grad_norm": 1.6494814157485962, - "learning_rate": 8.759899497487437e-05, - "loss": 5.2854, - "step": 12842 - }, - { - "epoch": 6.697783572359843, - "grad_norm": 1.5611070394515991, - "learning_rate": 8.759798994974874e-05, - "loss": 5.6837, - "step": 12843 - }, - { - "epoch": 6.698305084745763, - "grad_norm": 1.448598861694336, - "learning_rate": 8.759698492462312e-05, - "loss": 5.5205, - "step": 12844 - }, - { - "epoch": 6.698826597131681, - "grad_norm": 1.506805658340454, - "learning_rate": 8.75959798994975e-05, - "loss": 5.5927, - "step": 12845 - }, - { - "epoch": 6.699348109517601, - "grad_norm": 1.4841135740280151, - "learning_rate": 8.759497487437186e-05, - "loss": 5.7315, - "step": 12846 - }, - { - "epoch": 6.69986962190352, - "grad_norm": 1.4664496183395386, - "learning_rate": 8.759396984924624e-05, - "loss": 5.8964, - "step": 12847 - }, - { - "epoch": 6.70039113428944, - "grad_norm": 1.5546787977218628, - "learning_rate": 8.75929648241206e-05, - "loss": 5.2254, - "step": 12848 - }, - { - "epoch": 6.700912646675358, - "grad_norm": 1.4552676677703857, - "learning_rate": 8.759195979899498e-05, - "loss": 5.1671, - "step": 12849 - }, - { - "epoch": 6.701434159061278, - "grad_norm": 1.4057780504226685, - "learning_rate": 8.759095477386935e-05, - "loss": 5.464, - "step": 12850 - }, - { - "epoch": 6.701955671447196, - "grad_norm": 1.4960026741027832, - "learning_rate": 8.758994974874373e-05, - "loss": 5.165, - "step": 12851 - }, - { - "epoch": 6.702477183833116, - "grad_norm": 1.4670170545578003, - "learning_rate": 8.758894472361809e-05, - "loss": 5.5209, - "step": 12852 - }, - { - "epoch": 6.702998696219035, - "grad_norm": 1.3772598505020142, - "learning_rate": 8.758793969849247e-05, - "loss": 5.8247, - "step": 12853 - }, - { - "epoch": 6.703520208604955, - "grad_norm": 1.6594270467758179, - "learning_rate": 8.758693467336683e-05, - "loss": 5.1074, - "step": 12854 - }, - { - "epoch": 6.704041720990873, - "grad_norm": 1.5321290493011475, - "learning_rate": 8.758592964824121e-05, - "loss": 5.3988, - "step": 12855 - }, - { - "epoch": 6.704563233376793, - "grad_norm": 1.4614876508712769, - "learning_rate": 8.758492462311559e-05, - "loss": 5.3676, - "step": 12856 - }, - { - "epoch": 6.705084745762711, - "grad_norm": 1.4088186025619507, - "learning_rate": 8.758391959798995e-05, - "loss": 5.5491, - "step": 12857 - }, - { - "epoch": 6.705606258148631, - "grad_norm": 1.354395866394043, - "learning_rate": 8.758291457286433e-05, - "loss": 5.9257, - "step": 12858 - }, - { - "epoch": 6.70612777053455, - "grad_norm": 1.4872767925262451, - "learning_rate": 8.75819095477387e-05, - "loss": 5.7384, - "step": 12859 - }, - { - "epoch": 6.70664928292047, - "grad_norm": 1.5058050155639648, - "learning_rate": 8.758090452261307e-05, - "loss": 5.701, - "step": 12860 - }, - { - "epoch": 6.707170795306388, - "grad_norm": 1.4931447505950928, - "learning_rate": 8.757989949748744e-05, - "loss": 5.314, - "step": 12861 - }, - { - "epoch": 6.707692307692308, - "grad_norm": 1.3797168731689453, - "learning_rate": 8.757889447236181e-05, - "loss": 5.4726, - "step": 12862 - }, - { - "epoch": 6.708213820078226, - "grad_norm": 1.3089187145233154, - "learning_rate": 8.757788944723618e-05, - "loss": 5.0096, - "step": 12863 - }, - { - "epoch": 6.708735332464146, - "grad_norm": 1.4904160499572754, - "learning_rate": 8.757688442211056e-05, - "loss": 5.8512, - "step": 12864 - }, - { - "epoch": 6.709256844850065, - "grad_norm": 1.6877117156982422, - "learning_rate": 8.757587939698493e-05, - "loss": 5.2088, - "step": 12865 - }, - { - "epoch": 6.709778357235985, - "grad_norm": 1.397830843925476, - "learning_rate": 8.757487437185931e-05, - "loss": 5.4284, - "step": 12866 - }, - { - "epoch": 6.710299869621903, - "grad_norm": 1.425405502319336, - "learning_rate": 8.757386934673368e-05, - "loss": 5.8197, - "step": 12867 - }, - { - "epoch": 6.710821382007823, - "grad_norm": 1.35599684715271, - "learning_rate": 8.757286432160804e-05, - "loss": 5.4481, - "step": 12868 - }, - { - "epoch": 6.711342894393741, - "grad_norm": 1.4064160585403442, - "learning_rate": 8.757185929648242e-05, - "loss": 5.3225, - "step": 12869 - }, - { - "epoch": 6.711864406779661, - "grad_norm": 1.5886445045471191, - "learning_rate": 8.757085427135678e-05, - "loss": 5.5014, - "step": 12870 - }, - { - "epoch": 6.71238591916558, - "grad_norm": 1.5648869276046753, - "learning_rate": 8.756984924623116e-05, - "loss": 5.312, - "step": 12871 - }, - { - "epoch": 6.7129074315515, - "grad_norm": 1.6132185459136963, - "learning_rate": 8.756884422110552e-05, - "loss": 5.8742, - "step": 12872 - }, - { - "epoch": 6.713428943937418, - "grad_norm": 1.5035642385482788, - "learning_rate": 8.75678391959799e-05, - "loss": 5.4092, - "step": 12873 - }, - { - "epoch": 6.713950456323338, - "grad_norm": 1.4022771120071411, - "learning_rate": 8.756683417085427e-05, - "loss": 5.8416, - "step": 12874 - }, - { - "epoch": 6.7144719687092564, - "grad_norm": 1.4623056650161743, - "learning_rate": 8.756582914572864e-05, - "loss": 5.7253, - "step": 12875 - }, - { - "epoch": 6.714993481095176, - "grad_norm": 1.438920021057129, - "learning_rate": 8.756482412060302e-05, - "loss": 5.4507, - "step": 12876 - }, - { - "epoch": 6.715514993481095, - "grad_norm": 1.503581166267395, - "learning_rate": 8.75638190954774e-05, - "loss": 5.2992, - "step": 12877 - }, - { - "epoch": 6.716036505867015, - "grad_norm": 1.7550193071365356, - "learning_rate": 8.756281407035176e-05, - "loss": 4.1992, - "step": 12878 - }, - { - "epoch": 6.716558018252933, - "grad_norm": 1.4273511171340942, - "learning_rate": 8.756180904522614e-05, - "loss": 5.6976, - "step": 12879 - }, - { - "epoch": 6.717079530638853, - "grad_norm": 1.417890191078186, - "learning_rate": 8.75608040201005e-05, - "loss": 6.0029, - "step": 12880 - }, - { - "epoch": 6.7176010430247715, - "grad_norm": 1.38090980052948, - "learning_rate": 8.755979899497487e-05, - "loss": 5.5322, - "step": 12881 - }, - { - "epoch": 6.718122555410691, - "grad_norm": 1.5042381286621094, - "learning_rate": 8.755879396984925e-05, - "loss": 5.5812, - "step": 12882 - }, - { - "epoch": 6.71864406779661, - "grad_norm": 1.400622010231018, - "learning_rate": 8.755778894472361e-05, - "loss": 5.8757, - "step": 12883 - }, - { - "epoch": 6.719165580182529, - "grad_norm": 1.306994080543518, - "learning_rate": 8.755678391959799e-05, - "loss": 5.9408, - "step": 12884 - }, - { - "epoch": 6.719687092568448, - "grad_norm": 1.3501349687576294, - "learning_rate": 8.755577889447237e-05, - "loss": 5.9628, - "step": 12885 - }, - { - "epoch": 6.720208604954368, - "grad_norm": 1.4997843503952026, - "learning_rate": 8.755477386934675e-05, - "loss": 5.3667, - "step": 12886 - }, - { - "epoch": 6.7207301173402865, - "grad_norm": 1.3222925662994385, - "learning_rate": 8.755376884422111e-05, - "loss": 5.8042, - "step": 12887 - }, - { - "epoch": 6.721251629726206, - "grad_norm": 1.421418309211731, - "learning_rate": 8.755276381909549e-05, - "loss": 5.6018, - "step": 12888 - }, - { - "epoch": 6.721773142112125, - "grad_norm": 1.5302369594573975, - "learning_rate": 8.755175879396985e-05, - "loss": 5.455, - "step": 12889 - }, - { - "epoch": 6.722294654498044, - "grad_norm": 1.4184293746948242, - "learning_rate": 8.755075376884423e-05, - "loss": 5.8157, - "step": 12890 - }, - { - "epoch": 6.722816166883963, - "grad_norm": 1.4871785640716553, - "learning_rate": 8.75497487437186e-05, - "loss": 5.3941, - "step": 12891 - }, - { - "epoch": 6.723337679269883, - "grad_norm": 1.353327751159668, - "learning_rate": 8.754874371859297e-05, - "loss": 5.8094, - "step": 12892 - }, - { - "epoch": 6.7238591916558015, - "grad_norm": 1.4081494808197021, - "learning_rate": 8.754773869346734e-05, - "loss": 5.6403, - "step": 12893 - }, - { - "epoch": 6.724380704041721, - "grad_norm": 1.4502794742584229, - "learning_rate": 8.75467336683417e-05, - "loss": 5.5797, - "step": 12894 - }, - { - "epoch": 6.72490221642764, - "grad_norm": 1.3688503503799438, - "learning_rate": 8.754572864321608e-05, - "loss": 5.8725, - "step": 12895 - }, - { - "epoch": 6.725423728813559, - "grad_norm": 1.5035464763641357, - "learning_rate": 8.754472361809046e-05, - "loss": 5.3924, - "step": 12896 - }, - { - "epoch": 6.7259452411994785, - "grad_norm": 1.286468267440796, - "learning_rate": 8.754371859296483e-05, - "loss": 6.0677, - "step": 12897 - }, - { - "epoch": 6.726466753585398, - "grad_norm": 1.5716204643249512, - "learning_rate": 8.75427135678392e-05, - "loss": 4.645, - "step": 12898 - }, - { - "epoch": 6.7269882659713165, - "grad_norm": 1.5926568508148193, - "learning_rate": 8.754170854271358e-05, - "loss": 5.5912, - "step": 12899 - }, - { - "epoch": 6.727509778357236, - "grad_norm": 1.3775511980056763, - "learning_rate": 8.754070351758794e-05, - "loss": 5.3362, - "step": 12900 - }, - { - "epoch": 6.728031290743155, - "grad_norm": 1.4979419708251953, - "learning_rate": 8.753969849246232e-05, - "loss": 5.7803, - "step": 12901 - }, - { - "epoch": 6.728552803129074, - "grad_norm": 1.529505968093872, - "learning_rate": 8.753869346733668e-05, - "loss": 5.2302, - "step": 12902 - }, - { - "epoch": 6.7290743155149935, - "grad_norm": 1.518815279006958, - "learning_rate": 8.753768844221106e-05, - "loss": 5.6444, - "step": 12903 - }, - { - "epoch": 6.729595827900913, - "grad_norm": 1.6006195545196533, - "learning_rate": 8.753668341708543e-05, - "loss": 5.3328, - "step": 12904 - }, - { - "epoch": 6.7301173402868315, - "grad_norm": 1.432692050933838, - "learning_rate": 8.75356783919598e-05, - "loss": 5.5458, - "step": 12905 - }, - { - "epoch": 6.730638852672751, - "grad_norm": 1.5494927167892456, - "learning_rate": 8.753467336683418e-05, - "loss": 5.3325, - "step": 12906 - }, - { - "epoch": 6.73116036505867, - "grad_norm": 1.7235878705978394, - "learning_rate": 8.753366834170856e-05, - "loss": 4.7747, - "step": 12907 - }, - { - "epoch": 6.731681877444589, - "grad_norm": 1.5326013565063477, - "learning_rate": 8.753266331658292e-05, - "loss": 5.5954, - "step": 12908 - }, - { - "epoch": 6.7322033898305085, - "grad_norm": 1.4726730585098267, - "learning_rate": 8.753165829145729e-05, - "loss": 5.4155, - "step": 12909 - }, - { - "epoch": 6.732724902216428, - "grad_norm": 1.5503407716751099, - "learning_rate": 8.753065326633167e-05, - "loss": 5.3932, - "step": 12910 - }, - { - "epoch": 6.7332464146023465, - "grad_norm": 1.3695964813232422, - "learning_rate": 8.752964824120603e-05, - "loss": 5.6673, - "step": 12911 - }, - { - "epoch": 6.733767926988266, - "grad_norm": 1.41269052028656, - "learning_rate": 8.752864321608041e-05, - "loss": 6.0206, - "step": 12912 - }, - { - "epoch": 6.7342894393741854, - "grad_norm": 1.4139107465744019, - "learning_rate": 8.752763819095477e-05, - "loss": 5.3379, - "step": 12913 - }, - { - "epoch": 6.734810951760104, - "grad_norm": 1.3406486511230469, - "learning_rate": 8.752663316582915e-05, - "loss": 5.4926, - "step": 12914 - }, - { - "epoch": 6.7353324641460235, - "grad_norm": 1.3313837051391602, - "learning_rate": 8.752562814070351e-05, - "loss": 5.3679, - "step": 12915 - }, - { - "epoch": 6.735853976531943, - "grad_norm": 1.3869768381118774, - "learning_rate": 8.752462311557789e-05, - "loss": 5.4576, - "step": 12916 - }, - { - "epoch": 6.7363754889178615, - "grad_norm": 1.4364079236984253, - "learning_rate": 8.752361809045227e-05, - "loss": 5.5214, - "step": 12917 - }, - { - "epoch": 6.736897001303781, - "grad_norm": 1.432847023010254, - "learning_rate": 8.752261306532665e-05, - "loss": 5.6061, - "step": 12918 - }, - { - "epoch": 6.7374185136897005, - "grad_norm": 1.4812605381011963, - "learning_rate": 8.752160804020101e-05, - "loss": 5.1621, - "step": 12919 - }, - { - "epoch": 6.737940026075619, - "grad_norm": 1.572106122970581, - "learning_rate": 8.752060301507539e-05, - "loss": 5.7396, - "step": 12920 - }, - { - "epoch": 6.7384615384615385, - "grad_norm": 1.3806670904159546, - "learning_rate": 8.751959798994975e-05, - "loss": 5.6612, - "step": 12921 - }, - { - "epoch": 6.738983050847458, - "grad_norm": 1.4857136011123657, - "learning_rate": 8.751859296482412e-05, - "loss": 5.2353, - "step": 12922 - }, - { - "epoch": 6.7395045632333765, - "grad_norm": 1.5276367664337158, - "learning_rate": 8.75175879396985e-05, - "loss": 5.2185, - "step": 12923 - }, - { - "epoch": 6.740026075619296, - "grad_norm": 1.446948766708374, - "learning_rate": 8.751658291457286e-05, - "loss": 5.4536, - "step": 12924 - }, - { - "epoch": 6.7405475880052155, - "grad_norm": 1.9266706705093384, - "learning_rate": 8.751557788944724e-05, - "loss": 5.5783, - "step": 12925 - }, - { - "epoch": 6.741069100391134, - "grad_norm": 1.3439608812332153, - "learning_rate": 8.751457286432162e-05, - "loss": 5.6052, - "step": 12926 - }, - { - "epoch": 6.7415906127770535, - "grad_norm": 1.3757380247116089, - "learning_rate": 8.7513567839196e-05, - "loss": 5.6875, - "step": 12927 - }, - { - "epoch": 6.742112125162973, - "grad_norm": 1.377844214439392, - "learning_rate": 8.751256281407036e-05, - "loss": 5.9908, - "step": 12928 - }, - { - "epoch": 6.742633637548892, - "grad_norm": 1.3863346576690674, - "learning_rate": 8.751155778894474e-05, - "loss": 5.7368, - "step": 12929 - }, - { - "epoch": 6.743155149934811, - "grad_norm": 1.2930681705474854, - "learning_rate": 8.75105527638191e-05, - "loss": 5.8552, - "step": 12930 - }, - { - "epoch": 6.7436766623207305, - "grad_norm": 1.4953399896621704, - "learning_rate": 8.750954773869348e-05, - "loss": 5.6349, - "step": 12931 - }, - { - "epoch": 6.744198174706649, - "grad_norm": 1.5454089641571045, - "learning_rate": 8.750854271356784e-05, - "loss": 5.5474, - "step": 12932 - }, - { - "epoch": 6.7447196870925685, - "grad_norm": 1.4068899154663086, - "learning_rate": 8.750753768844222e-05, - "loss": 5.7806, - "step": 12933 - }, - { - "epoch": 6.745241199478488, - "grad_norm": 1.4993643760681152, - "learning_rate": 8.750653266331658e-05, - "loss": 4.7752, - "step": 12934 - }, - { - "epoch": 6.745762711864407, - "grad_norm": 1.5210257768630981, - "learning_rate": 8.750552763819095e-05, - "loss": 5.4497, - "step": 12935 - }, - { - "epoch": 6.746284224250326, - "grad_norm": 1.435760736465454, - "learning_rate": 8.750452261306533e-05, - "loss": 5.6749, - "step": 12936 - }, - { - "epoch": 6.7468057366362455, - "grad_norm": 1.5299052000045776, - "learning_rate": 8.75035175879397e-05, - "loss": 5.8583, - "step": 12937 - }, - { - "epoch": 6.747327249022164, - "grad_norm": 1.3997024297714233, - "learning_rate": 8.750251256281408e-05, - "loss": 5.4734, - "step": 12938 - }, - { - "epoch": 6.7478487614080835, - "grad_norm": 1.3807650804519653, - "learning_rate": 8.750150753768845e-05, - "loss": 5.8145, - "step": 12939 - }, - { - "epoch": 6.748370273794002, - "grad_norm": 1.3790351152420044, - "learning_rate": 8.750050251256282e-05, - "loss": 5.7283, - "step": 12940 - }, - { - "epoch": 6.748891786179922, - "grad_norm": 1.414139986038208, - "learning_rate": 8.749949748743719e-05, - "loss": 5.8767, - "step": 12941 - }, - { - "epoch": 6.749413298565841, - "grad_norm": 1.413427710533142, - "learning_rate": 8.749849246231157e-05, - "loss": 5.6973, - "step": 12942 - }, - { - "epoch": 6.7499348109517605, - "grad_norm": 1.5015395879745483, - "learning_rate": 8.749748743718593e-05, - "loss": 5.5305, - "step": 12943 - }, - { - "epoch": 6.750456323337679, - "grad_norm": 1.5135955810546875, - "learning_rate": 8.749648241206031e-05, - "loss": 5.6143, - "step": 12944 - }, - { - "epoch": 6.7509778357235986, - "grad_norm": 1.4148367643356323, - "learning_rate": 8.749547738693467e-05, - "loss": 5.4748, - "step": 12945 - }, - { - "epoch": 6.751499348109517, - "grad_norm": 1.4204000234603882, - "learning_rate": 8.749447236180905e-05, - "loss": 5.8322, - "step": 12946 - }, - { - "epoch": 6.752020860495437, - "grad_norm": 1.5825470685958862, - "learning_rate": 8.749346733668343e-05, - "loss": 5.3464, - "step": 12947 - }, - { - "epoch": 6.752542372881356, - "grad_norm": 1.4701452255249023, - "learning_rate": 8.749246231155779e-05, - "loss": 5.5515, - "step": 12948 - }, - { - "epoch": 6.7530638852672755, - "grad_norm": 1.4197903871536255, - "learning_rate": 8.749145728643217e-05, - "loss": 5.6421, - "step": 12949 - }, - { - "epoch": 6.753585397653194, - "grad_norm": 1.5351446866989136, - "learning_rate": 8.749045226130653e-05, - "loss": 5.246, - "step": 12950 - }, - { - "epoch": 6.754106910039114, - "grad_norm": 1.487221598625183, - "learning_rate": 8.748944723618091e-05, - "loss": 5.5169, - "step": 12951 - }, - { - "epoch": 6.754628422425032, - "grad_norm": 1.5500482320785522, - "learning_rate": 8.748844221105528e-05, - "loss": 4.8799, - "step": 12952 - }, - { - "epoch": 6.755149934810952, - "grad_norm": 1.5753501653671265, - "learning_rate": 8.748743718592965e-05, - "loss": 5.6033, - "step": 12953 - }, - { - "epoch": 6.755671447196871, - "grad_norm": 1.5642931461334229, - "learning_rate": 8.748643216080402e-05, - "loss": 5.4088, - "step": 12954 - }, - { - "epoch": 6.7561929595827905, - "grad_norm": 1.4574861526489258, - "learning_rate": 8.74854271356784e-05, - "loss": 5.2381, - "step": 12955 - }, - { - "epoch": 6.756714471968709, - "grad_norm": 1.3989100456237793, - "learning_rate": 8.748442211055276e-05, - "loss": 5.6792, - "step": 12956 - }, - { - "epoch": 6.757235984354629, - "grad_norm": 1.4543070793151855, - "learning_rate": 8.748341708542714e-05, - "loss": 5.8931, - "step": 12957 - }, - { - "epoch": 6.757757496740547, - "grad_norm": 1.556498646736145, - "learning_rate": 8.748241206030152e-05, - "loss": 5.2696, - "step": 12958 - }, - { - "epoch": 6.758279009126467, - "grad_norm": 1.520209550857544, - "learning_rate": 8.74814070351759e-05, - "loss": 5.7003, - "step": 12959 - }, - { - "epoch": 6.758800521512386, - "grad_norm": 1.4835176467895508, - "learning_rate": 8.748040201005026e-05, - "loss": 5.4375, - "step": 12960 - }, - { - "epoch": 6.7593220338983055, - "grad_norm": 1.5834152698516846, - "learning_rate": 8.747939698492462e-05, - "loss": 5.167, - "step": 12961 - }, - { - "epoch": 6.759843546284224, - "grad_norm": 1.3461530208587646, - "learning_rate": 8.7478391959799e-05, - "loss": 5.6189, - "step": 12962 - }, - { - "epoch": 6.760365058670144, - "grad_norm": 1.4783391952514648, - "learning_rate": 8.747738693467336e-05, - "loss": 5.4832, - "step": 12963 - }, - { - "epoch": 6.760886571056062, - "grad_norm": 1.547647476196289, - "learning_rate": 8.747638190954774e-05, - "loss": 5.3697, - "step": 12964 - }, - { - "epoch": 6.761408083441982, - "grad_norm": 1.3840641975402832, - "learning_rate": 8.747537688442211e-05, - "loss": 5.8469, - "step": 12965 - }, - { - "epoch": 6.761929595827901, - "grad_norm": 1.3153142929077148, - "learning_rate": 8.747437185929648e-05, - "loss": 6.0385, - "step": 12966 - }, - { - "epoch": 6.762451108213821, - "grad_norm": 1.418047308921814, - "learning_rate": 8.747336683417085e-05, - "loss": 5.4114, - "step": 12967 - }, - { - "epoch": 6.762972620599739, - "grad_norm": 1.3821126222610474, - "learning_rate": 8.747236180904523e-05, - "loss": 5.7989, - "step": 12968 - }, - { - "epoch": 6.763494132985659, - "grad_norm": 1.5547492504119873, - "learning_rate": 8.74713567839196e-05, - "loss": 5.2473, - "step": 12969 - }, - { - "epoch": 6.764015645371577, - "grad_norm": 1.3935418128967285, - "learning_rate": 8.747035175879398e-05, - "loss": 5.7549, - "step": 12970 - }, - { - "epoch": 6.764537157757497, - "grad_norm": 1.4017019271850586, - "learning_rate": 8.746934673366835e-05, - "loss": 5.5046, - "step": 12971 - }, - { - "epoch": 6.765058670143416, - "grad_norm": 1.466787576675415, - "learning_rate": 8.746834170854272e-05, - "loss": 5.6034, - "step": 12972 - }, - { - "epoch": 6.765580182529335, - "grad_norm": 1.405820608139038, - "learning_rate": 8.746733668341709e-05, - "loss": 5.7176, - "step": 12973 - }, - { - "epoch": 6.766101694915254, - "grad_norm": 1.4241139888763428, - "learning_rate": 8.746633165829145e-05, - "loss": 5.7412, - "step": 12974 - }, - { - "epoch": 6.766623207301174, - "grad_norm": 1.5310122966766357, - "learning_rate": 8.746532663316583e-05, - "loss": 5.049, - "step": 12975 - }, - { - "epoch": 6.767144719687092, - "grad_norm": 1.5013501644134521, - "learning_rate": 8.74643216080402e-05, - "loss": 5.6641, - "step": 12976 - }, - { - "epoch": 6.767666232073012, - "grad_norm": 1.8976328372955322, - "learning_rate": 8.746331658291457e-05, - "loss": 5.3523, - "step": 12977 - }, - { - "epoch": 6.768187744458931, - "grad_norm": 1.2967411279678345, - "learning_rate": 8.746231155778895e-05, - "loss": 5.9684, - "step": 12978 - }, - { - "epoch": 6.76870925684485, - "grad_norm": 1.5587066411972046, - "learning_rate": 8.746130653266333e-05, - "loss": 5.2376, - "step": 12979 - }, - { - "epoch": 6.769230769230769, - "grad_norm": 1.3736681938171387, - "learning_rate": 8.746030150753769e-05, - "loss": 5.9197, - "step": 12980 - }, - { - "epoch": 6.769752281616689, - "grad_norm": 1.3633453845977783, - "learning_rate": 8.745929648241207e-05, - "loss": 5.603, - "step": 12981 - }, - { - "epoch": 6.770273794002607, - "grad_norm": 1.431043267250061, - "learning_rate": 8.745829145728644e-05, - "loss": 5.597, - "step": 12982 - }, - { - "epoch": 6.770795306388527, - "grad_norm": 1.4546245336532593, - "learning_rate": 8.745728643216081e-05, - "loss": 5.6864, - "step": 12983 - }, - { - "epoch": 6.771316818774446, - "grad_norm": 1.4225929975509644, - "learning_rate": 8.745628140703518e-05, - "loss": 5.556, - "step": 12984 - }, - { - "epoch": 6.771838331160365, - "grad_norm": 1.413989782333374, - "learning_rate": 8.745527638190955e-05, - "loss": 5.9142, - "step": 12985 - }, - { - "epoch": 6.772359843546284, - "grad_norm": 1.3322391510009766, - "learning_rate": 8.745427135678392e-05, - "loss": 5.9289, - "step": 12986 - }, - { - "epoch": 6.772881355932204, - "grad_norm": 1.3640295267105103, - "learning_rate": 8.74532663316583e-05, - "loss": 5.7926, - "step": 12987 - }, - { - "epoch": 6.773402868318122, - "grad_norm": 1.5264508724212646, - "learning_rate": 8.745226130653266e-05, - "loss": 5.3824, - "step": 12988 - }, - { - "epoch": 6.773924380704042, - "grad_norm": 1.8715994358062744, - "learning_rate": 8.745125628140704e-05, - "loss": 5.2562, - "step": 12989 - }, - { - "epoch": 6.774445893089961, - "grad_norm": 1.4258023500442505, - "learning_rate": 8.745025125628142e-05, - "loss": 5.6618, - "step": 12990 - }, - { - "epoch": 6.77496740547588, - "grad_norm": 1.7969495058059692, - "learning_rate": 8.744924623115578e-05, - "loss": 5.6377, - "step": 12991 - }, - { - "epoch": 6.775488917861799, - "grad_norm": 1.3364239931106567, - "learning_rate": 8.744824120603016e-05, - "loss": 5.5466, - "step": 12992 - }, - { - "epoch": 6.776010430247719, - "grad_norm": 1.4687128067016602, - "learning_rate": 8.744723618090452e-05, - "loss": 5.7711, - "step": 12993 - }, - { - "epoch": 6.776531942633637, - "grad_norm": 1.484465479850769, - "learning_rate": 8.74462311557789e-05, - "loss": 5.7482, - "step": 12994 - }, - { - "epoch": 6.777053455019557, - "grad_norm": 1.5390015840530396, - "learning_rate": 8.744522613065327e-05, - "loss": 5.0742, - "step": 12995 - }, - { - "epoch": 6.777574967405476, - "grad_norm": 1.407578945159912, - "learning_rate": 8.744422110552764e-05, - "loss": 5.764, - "step": 12996 - }, - { - "epoch": 6.778096479791395, - "grad_norm": 1.3707042932510376, - "learning_rate": 8.744321608040201e-05, - "loss": 5.9204, - "step": 12997 - }, - { - "epoch": 6.778617992177314, - "grad_norm": 1.3791706562042236, - "learning_rate": 8.744221105527639e-05, - "loss": 5.2279, - "step": 12998 - }, - { - "epoch": 6.779139504563234, - "grad_norm": 1.417040467262268, - "learning_rate": 8.744120603015076e-05, - "loss": 5.4195, - "step": 12999 - }, - { - "epoch": 6.779661016949152, - "grad_norm": 1.5586284399032593, - "learning_rate": 8.744020100502514e-05, - "loss": 5.4982, - "step": 13000 - }, - { - "epoch": 6.780182529335072, - "grad_norm": 1.5421696901321411, - "learning_rate": 8.74391959798995e-05, - "loss": 5.245, - "step": 13001 - }, - { - "epoch": 6.780704041720991, - "grad_norm": 1.4511457681655884, - "learning_rate": 8.743819095477387e-05, - "loss": 5.4794, - "step": 13002 - }, - { - "epoch": 6.78122555410691, - "grad_norm": 1.533961296081543, - "learning_rate": 8.743718592964825e-05, - "loss": 5.7046, - "step": 13003 - }, - { - "epoch": 6.781747066492829, - "grad_norm": 2.3832619190216064, - "learning_rate": 8.743618090452261e-05, - "loss": 5.0649, - "step": 13004 - }, - { - "epoch": 6.782268578878749, - "grad_norm": 1.5229096412658691, - "learning_rate": 8.743517587939699e-05, - "loss": 5.4284, - "step": 13005 - }, - { - "epoch": 6.782790091264667, - "grad_norm": 1.560542106628418, - "learning_rate": 8.743417085427135e-05, - "loss": 5.2422, - "step": 13006 - }, - { - "epoch": 6.783311603650587, - "grad_norm": 1.4768189191818237, - "learning_rate": 8.743316582914573e-05, - "loss": 5.6699, - "step": 13007 - }, - { - "epoch": 6.783833116036506, - "grad_norm": 1.5079182386398315, - "learning_rate": 8.74321608040201e-05, - "loss": 5.6358, - "step": 13008 - }, - { - "epoch": 6.784354628422425, - "grad_norm": 1.4047681093215942, - "learning_rate": 8.743115577889447e-05, - "loss": 5.3879, - "step": 13009 - }, - { - "epoch": 6.784876140808344, - "grad_norm": 1.4878028631210327, - "learning_rate": 8.743015075376885e-05, - "loss": 5.4708, - "step": 13010 - }, - { - "epoch": 6.785397653194264, - "grad_norm": 1.3969591856002808, - "learning_rate": 8.742914572864323e-05, - "loss": 5.5117, - "step": 13011 - }, - { - "epoch": 6.785919165580182, - "grad_norm": 1.5361905097961426, - "learning_rate": 8.74281407035176e-05, - "loss": 5.6062, - "step": 13012 - }, - { - "epoch": 6.786440677966102, - "grad_norm": 1.4069421291351318, - "learning_rate": 8.742713567839197e-05, - "loss": 6.0374, - "step": 13013 - }, - { - "epoch": 6.786962190352021, - "grad_norm": 1.290174961090088, - "learning_rate": 8.742613065326634e-05, - "loss": 5.8874, - "step": 13014 - }, - { - "epoch": 6.78748370273794, - "grad_norm": 1.3905510902404785, - "learning_rate": 8.74251256281407e-05, - "loss": 5.6522, - "step": 13015 - }, - { - "epoch": 6.788005215123859, - "grad_norm": 1.504899501800537, - "learning_rate": 8.742412060301508e-05, - "loss": 5.6411, - "step": 13016 - }, - { - "epoch": 6.788526727509779, - "grad_norm": 1.5551254749298096, - "learning_rate": 8.742311557788944e-05, - "loss": 5.0454, - "step": 13017 - }, - { - "epoch": 6.789048239895697, - "grad_norm": 1.402003526687622, - "learning_rate": 8.742211055276382e-05, - "loss": 5.6868, - "step": 13018 - }, - { - "epoch": 6.789569752281617, - "grad_norm": 1.5429739952087402, - "learning_rate": 8.74211055276382e-05, - "loss": 5.7435, - "step": 13019 - }, - { - "epoch": 6.790091264667536, - "grad_norm": 1.4808398485183716, - "learning_rate": 8.742010050251258e-05, - "loss": 5.3817, - "step": 13020 - }, - { - "epoch": 6.790612777053455, - "grad_norm": 1.4027894735336304, - "learning_rate": 8.741909547738694e-05, - "loss": 5.768, - "step": 13021 - }, - { - "epoch": 6.791134289439374, - "grad_norm": 1.3579949140548706, - "learning_rate": 8.741809045226132e-05, - "loss": 5.853, - "step": 13022 - }, - { - "epoch": 6.791655801825294, - "grad_norm": 1.3913379907608032, - "learning_rate": 8.741708542713568e-05, - "loss": 5.5749, - "step": 13023 - }, - { - "epoch": 6.792177314211212, - "grad_norm": 1.5369387865066528, - "learning_rate": 8.741608040201006e-05, - "loss": 5.3369, - "step": 13024 - }, - { - "epoch": 6.792698826597132, - "grad_norm": 1.3365228176116943, - "learning_rate": 8.741507537688442e-05, - "loss": 4.9083, - "step": 13025 - }, - { - "epoch": 6.793220338983051, - "grad_norm": 1.3750261068344116, - "learning_rate": 8.74140703517588e-05, - "loss": 5.8712, - "step": 13026 - }, - { - "epoch": 6.79374185136897, - "grad_norm": 1.5418564081192017, - "learning_rate": 8.741306532663317e-05, - "loss": 5.6122, - "step": 13027 - }, - { - "epoch": 6.794263363754889, - "grad_norm": 1.4683575630187988, - "learning_rate": 8.741206030150753e-05, - "loss": 5.1398, - "step": 13028 - }, - { - "epoch": 6.794784876140809, - "grad_norm": 1.4433772563934326, - "learning_rate": 8.741105527638191e-05, - "loss": 5.2609, - "step": 13029 - }, - { - "epoch": 6.795306388526727, - "grad_norm": 1.3538075685501099, - "learning_rate": 8.741005025125629e-05, - "loss": 5.8461, - "step": 13030 - }, - { - "epoch": 6.795827900912647, - "grad_norm": 1.7590866088867188, - "learning_rate": 8.740904522613066e-05, - "loss": 4.9492, - "step": 13031 - }, - { - "epoch": 6.796349413298566, - "grad_norm": 1.3921585083007812, - "learning_rate": 8.740804020100503e-05, - "loss": 5.4751, - "step": 13032 - }, - { - "epoch": 6.796870925684485, - "grad_norm": 1.394156575202942, - "learning_rate": 8.74070351758794e-05, - "loss": 6.0373, - "step": 13033 - }, - { - "epoch": 6.797392438070404, - "grad_norm": 1.454831838607788, - "learning_rate": 8.740603015075377e-05, - "loss": 5.6624, - "step": 13034 - }, - { - "epoch": 6.797913950456323, - "grad_norm": 1.3722530603408813, - "learning_rate": 8.740502512562815e-05, - "loss": 5.1511, - "step": 13035 - }, - { - "epoch": 6.798435462842242, - "grad_norm": 1.4915679693222046, - "learning_rate": 8.740402010050251e-05, - "loss": 5.3913, - "step": 13036 - }, - { - "epoch": 6.798956975228162, - "grad_norm": 1.481310248374939, - "learning_rate": 8.740301507537689e-05, - "loss": 5.7484, - "step": 13037 - }, - { - "epoch": 6.799478487614081, - "grad_norm": 1.4843041896820068, - "learning_rate": 8.740201005025125e-05, - "loss": 5.5584, - "step": 13038 - }, - { - "epoch": 6.8, - "grad_norm": 1.4647318124771118, - "learning_rate": 8.740100502512563e-05, - "loss": 5.5884, - "step": 13039 - }, - { - "epoch": 6.800521512385919, - "grad_norm": 1.444783329963684, - "learning_rate": 8.740000000000001e-05, - "loss": 5.9094, - "step": 13040 - }, - { - "epoch": 6.801043024771838, - "grad_norm": 1.4303377866744995, - "learning_rate": 8.739899497487437e-05, - "loss": 5.5005, - "step": 13041 - }, - { - "epoch": 6.801564537157757, - "grad_norm": 1.4127198457717896, - "learning_rate": 8.739798994974875e-05, - "loss": 5.465, - "step": 13042 - }, - { - "epoch": 6.802086049543677, - "grad_norm": 1.520289421081543, - "learning_rate": 8.739698492462312e-05, - "loss": 5.6286, - "step": 13043 - }, - { - "epoch": 6.802607561929596, - "grad_norm": 1.4306286573410034, - "learning_rate": 8.73959798994975e-05, - "loss": 5.452, - "step": 13044 - }, - { - "epoch": 6.803129074315515, - "grad_norm": 1.5512374639511108, - "learning_rate": 8.739497487437186e-05, - "loss": 5.7288, - "step": 13045 - }, - { - "epoch": 6.803650586701434, - "grad_norm": 1.3893169164657593, - "learning_rate": 8.739396984924624e-05, - "loss": 5.7595, - "step": 13046 - }, - { - "epoch": 6.804172099087353, - "grad_norm": 1.4788779020309448, - "learning_rate": 8.73929648241206e-05, - "loss": 5.7912, - "step": 13047 - }, - { - "epoch": 6.804693611473272, - "grad_norm": 1.4758238792419434, - "learning_rate": 8.739195979899498e-05, - "loss": 5.6745, - "step": 13048 - }, - { - "epoch": 6.805215123859192, - "grad_norm": 1.554914951324463, - "learning_rate": 8.739095477386934e-05, - "loss": 5.2051, - "step": 13049 - }, - { - "epoch": 6.805736636245111, - "grad_norm": 1.3898274898529053, - "learning_rate": 8.738994974874372e-05, - "loss": 5.7279, - "step": 13050 - }, - { - "epoch": 6.80625814863103, - "grad_norm": 1.4800206422805786, - "learning_rate": 8.73889447236181e-05, - "loss": 5.6009, - "step": 13051 - }, - { - "epoch": 6.806779661016949, - "grad_norm": 1.4042654037475586, - "learning_rate": 8.738793969849248e-05, - "loss": 5.8474, - "step": 13052 - }, - { - "epoch": 6.807301173402868, - "grad_norm": 1.4371329545974731, - "learning_rate": 8.738693467336684e-05, - "loss": 5.4417, - "step": 13053 - }, - { - "epoch": 6.807822685788787, - "grad_norm": 1.5045546293258667, - "learning_rate": 8.73859296482412e-05, - "loss": 5.4124, - "step": 13054 - }, - { - "epoch": 6.808344198174707, - "grad_norm": 1.6229749917984009, - "learning_rate": 8.738492462311558e-05, - "loss": 4.8445, - "step": 13055 - }, - { - "epoch": 6.808865710560626, - "grad_norm": 1.2861509323120117, - "learning_rate": 8.738391959798995e-05, - "loss": 5.0374, - "step": 13056 - }, - { - "epoch": 6.809387222946545, - "grad_norm": 1.3762145042419434, - "learning_rate": 8.738291457286432e-05, - "loss": 5.8783, - "step": 13057 - }, - { - "epoch": 6.809908735332464, - "grad_norm": 1.5025694370269775, - "learning_rate": 8.738190954773869e-05, - "loss": 5.5861, - "step": 13058 - }, - { - "epoch": 6.810430247718383, - "grad_norm": 1.3696616888046265, - "learning_rate": 8.738090452261307e-05, - "loss": 5.7225, - "step": 13059 - }, - { - "epoch": 6.810951760104302, - "grad_norm": 1.5139281749725342, - "learning_rate": 8.737989949748744e-05, - "loss": 5.4498, - "step": 13060 - }, - { - "epoch": 6.811473272490222, - "grad_norm": 1.5652334690093994, - "learning_rate": 8.737889447236182e-05, - "loss": 5.3399, - "step": 13061 - }, - { - "epoch": 6.811994784876141, - "grad_norm": 1.4104660749435425, - "learning_rate": 8.737788944723619e-05, - "loss": 5.6597, - "step": 13062 - }, - { - "epoch": 6.81251629726206, - "grad_norm": 1.5945795774459839, - "learning_rate": 8.737688442211056e-05, - "loss": 5.4533, - "step": 13063 - }, - { - "epoch": 6.813037809647979, - "grad_norm": 1.4883118867874146, - "learning_rate": 8.737587939698493e-05, - "loss": 5.7554, - "step": 13064 - }, - { - "epoch": 6.813559322033898, - "grad_norm": 1.5934516191482544, - "learning_rate": 8.737487437185931e-05, - "loss": 5.2979, - "step": 13065 - }, - { - "epoch": 6.814080834419817, - "grad_norm": 1.4103431701660156, - "learning_rate": 8.737386934673367e-05, - "loss": 5.7426, - "step": 13066 - }, - { - "epoch": 6.814602346805737, - "grad_norm": 1.4790726900100708, - "learning_rate": 8.737286432160804e-05, - "loss": 5.495, - "step": 13067 - }, - { - "epoch": 6.815123859191655, - "grad_norm": 1.421108365058899, - "learning_rate": 8.737185929648241e-05, - "loss": 5.6065, - "step": 13068 - }, - { - "epoch": 6.815645371577575, - "grad_norm": 1.347221851348877, - "learning_rate": 8.737085427135678e-05, - "loss": 5.8322, - "step": 13069 - }, - { - "epoch": 6.816166883963494, - "grad_norm": 1.4749139547348022, - "learning_rate": 8.736984924623116e-05, - "loss": 5.6167, - "step": 13070 - }, - { - "epoch": 6.816688396349413, - "grad_norm": 1.4353291988372803, - "learning_rate": 8.736884422110553e-05, - "loss": 5.4377, - "step": 13071 - }, - { - "epoch": 6.817209908735332, - "grad_norm": 1.2957545518875122, - "learning_rate": 8.736783919597991e-05, - "loss": 5.8858, - "step": 13072 - }, - { - "epoch": 6.817731421121252, - "grad_norm": 1.396743655204773, - "learning_rate": 8.736683417085428e-05, - "loss": 5.3925, - "step": 13073 - }, - { - "epoch": 6.81825293350717, - "grad_norm": 1.5138754844665527, - "learning_rate": 8.736582914572865e-05, - "loss": 5.572, - "step": 13074 - }, - { - "epoch": 6.81877444589309, - "grad_norm": 1.4415092468261719, - "learning_rate": 8.736482412060302e-05, - "loss": 5.6987, - "step": 13075 - }, - { - "epoch": 6.819295958279009, - "grad_norm": 1.4668790102005005, - "learning_rate": 8.73638190954774e-05, - "loss": 5.3156, - "step": 13076 - }, - { - "epoch": 6.819817470664928, - "grad_norm": 1.4260199069976807, - "learning_rate": 8.736281407035176e-05, - "loss": 5.6959, - "step": 13077 - }, - { - "epoch": 6.820338983050847, - "grad_norm": 1.6572165489196777, - "learning_rate": 8.736180904522614e-05, - "loss": 5.6058, - "step": 13078 - }, - { - "epoch": 6.820860495436767, - "grad_norm": 1.5033843517303467, - "learning_rate": 8.73608040201005e-05, - "loss": 5.5271, - "step": 13079 - }, - { - "epoch": 6.8213820078226854, - "grad_norm": 1.427453637123108, - "learning_rate": 8.735979899497488e-05, - "loss": 5.3157, - "step": 13080 - }, - { - "epoch": 6.821903520208605, - "grad_norm": 1.3666183948516846, - "learning_rate": 8.735879396984926e-05, - "loss": 5.8858, - "step": 13081 - }, - { - "epoch": 6.822425032594524, - "grad_norm": 1.4239518642425537, - "learning_rate": 8.735778894472362e-05, - "loss": 5.3818, - "step": 13082 - }, - { - "epoch": 6.822946544980443, - "grad_norm": 1.3181743621826172, - "learning_rate": 8.7356783919598e-05, - "loss": 5.7137, - "step": 13083 - }, - { - "epoch": 6.823468057366362, - "grad_norm": 1.5173778533935547, - "learning_rate": 8.735577889447236e-05, - "loss": 5.3016, - "step": 13084 - }, - { - "epoch": 6.823989569752282, - "grad_norm": 1.4261589050292969, - "learning_rate": 8.735477386934674e-05, - "loss": 5.5066, - "step": 13085 - }, - { - "epoch": 6.8245110821382005, - "grad_norm": 1.3882603645324707, - "learning_rate": 8.73537688442211e-05, - "loss": 5.5096, - "step": 13086 - }, - { - "epoch": 6.82503259452412, - "grad_norm": 1.5649189949035645, - "learning_rate": 8.735276381909548e-05, - "loss": 5.1197, - "step": 13087 - }, - { - "epoch": 6.825554106910039, - "grad_norm": 1.3453960418701172, - "learning_rate": 8.735175879396985e-05, - "loss": 5.7102, - "step": 13088 - }, - { - "epoch": 6.826075619295958, - "grad_norm": 1.4414796829223633, - "learning_rate": 8.735075376884423e-05, - "loss": 5.4909, - "step": 13089 - }, - { - "epoch": 6.826597131681877, - "grad_norm": 1.4232909679412842, - "learning_rate": 8.734974874371859e-05, - "loss": 5.5597, - "step": 13090 - }, - { - "epoch": 6.827118644067797, - "grad_norm": 1.464868426322937, - "learning_rate": 8.734874371859297e-05, - "loss": 5.0611, - "step": 13091 - }, - { - "epoch": 6.8276401564537155, - "grad_norm": 1.4110345840454102, - "learning_rate": 8.734773869346735e-05, - "loss": 5.8023, - "step": 13092 - }, - { - "epoch": 6.828161668839635, - "grad_norm": 1.4148502349853516, - "learning_rate": 8.734673366834172e-05, - "loss": 5.6084, - "step": 13093 - }, - { - "epoch": 6.828683181225554, - "grad_norm": 1.4783415794372559, - "learning_rate": 8.734572864321609e-05, - "loss": 5.6349, - "step": 13094 - }, - { - "epoch": 6.829204693611473, - "grad_norm": 1.4886547327041626, - "learning_rate": 8.734472361809045e-05, - "loss": 5.4447, - "step": 13095 - }, - { - "epoch": 6.829726205997392, - "grad_norm": 1.335707187652588, - "learning_rate": 8.734371859296483e-05, - "loss": 5.9977, - "step": 13096 - }, - { - "epoch": 6.830247718383312, - "grad_norm": 1.4298427104949951, - "learning_rate": 8.73427135678392e-05, - "loss": 5.8318, - "step": 13097 - }, - { - "epoch": 6.8307692307692305, - "grad_norm": 1.414836049079895, - "learning_rate": 8.734170854271357e-05, - "loss": 5.7182, - "step": 13098 - }, - { - "epoch": 6.83129074315515, - "grad_norm": 1.4735349416732788, - "learning_rate": 8.734070351758794e-05, - "loss": 5.626, - "step": 13099 - }, - { - "epoch": 6.831812255541069, - "grad_norm": 1.4156290292739868, - "learning_rate": 8.733969849246231e-05, - "loss": 5.6918, - "step": 13100 - }, - { - "epoch": 6.832333767926988, - "grad_norm": 1.4047309160232544, - "learning_rate": 8.733869346733669e-05, - "loss": 5.5215, - "step": 13101 - }, - { - "epoch": 6.8328552803129075, - "grad_norm": 1.494475245475769, - "learning_rate": 8.733768844221107e-05, - "loss": 5.6677, - "step": 13102 - }, - { - "epoch": 6.833376792698827, - "grad_norm": 1.7289236783981323, - "learning_rate": 8.733668341708543e-05, - "loss": 4.7159, - "step": 13103 - }, - { - "epoch": 6.8338983050847455, - "grad_norm": 1.4428136348724365, - "learning_rate": 8.733567839195981e-05, - "loss": 5.8102, - "step": 13104 - }, - { - "epoch": 6.834419817470665, - "grad_norm": 1.3666611909866333, - "learning_rate": 8.733467336683418e-05, - "loss": 5.7284, - "step": 13105 - }, - { - "epoch": 6.834941329856584, - "grad_norm": 1.3787580728530884, - "learning_rate": 8.733366834170855e-05, - "loss": 5.5316, - "step": 13106 - }, - { - "epoch": 6.835462842242503, - "grad_norm": 1.4141874313354492, - "learning_rate": 8.733266331658292e-05, - "loss": 5.7226, - "step": 13107 - }, - { - "epoch": 6.8359843546284225, - "grad_norm": 1.5294097661972046, - "learning_rate": 8.733165829145728e-05, - "loss": 4.9287, - "step": 13108 - }, - { - "epoch": 6.836505867014342, - "grad_norm": 1.4488030672073364, - "learning_rate": 8.733065326633166e-05, - "loss": 5.3022, - "step": 13109 - }, - { - "epoch": 6.8370273794002605, - "grad_norm": 1.5019017457962036, - "learning_rate": 8.732964824120602e-05, - "loss": 5.5886, - "step": 13110 - }, - { - "epoch": 6.83754889178618, - "grad_norm": 1.4181054830551147, - "learning_rate": 8.73286432160804e-05, - "loss": 5.9906, - "step": 13111 - }, - { - "epoch": 6.838070404172099, - "grad_norm": 1.4252159595489502, - "learning_rate": 8.732763819095478e-05, - "loss": 5.7324, - "step": 13112 - }, - { - "epoch": 6.838591916558018, - "grad_norm": 1.4503065347671509, - "learning_rate": 8.732663316582916e-05, - "loss": 5.6754, - "step": 13113 - }, - { - "epoch": 6.8391134289439375, - "grad_norm": 1.3199219703674316, - "learning_rate": 8.732562814070352e-05, - "loss": 5.9995, - "step": 13114 - }, - { - "epoch": 6.839634941329857, - "grad_norm": 1.496468186378479, - "learning_rate": 8.73246231155779e-05, - "loss": 5.1386, - "step": 13115 - }, - { - "epoch": 6.8401564537157755, - "grad_norm": 1.4446656703948975, - "learning_rate": 8.732361809045226e-05, - "loss": 5.0613, - "step": 13116 - }, - { - "epoch": 6.840677966101695, - "grad_norm": 1.3997827768325806, - "learning_rate": 8.732261306532664e-05, - "loss": 5.2641, - "step": 13117 - }, - { - "epoch": 6.8411994784876144, - "grad_norm": Infinity, - "learning_rate": 8.732261306532664e-05, - "loss": 5.361, - "step": 13118 - }, - { - "epoch": 6.841720990873533, - "grad_norm": 1.3405433893203735, - "learning_rate": 8.7321608040201e-05, - "loss": 5.6263, - "step": 13119 - }, - { - "epoch": 6.8422425032594525, - "grad_norm": 1.3896105289459229, - "learning_rate": 8.732060301507538e-05, - "loss": 5.8165, - "step": 13120 - }, - { - "epoch": 6.842764015645372, - "grad_norm": 1.392599105834961, - "learning_rate": 8.731959798994975e-05, - "loss": 5.7815, - "step": 13121 - }, - { - "epoch": 6.8432855280312905, - "grad_norm": 1.6999871730804443, - "learning_rate": 8.731859296482413e-05, - "loss": 5.5647, - "step": 13122 - }, - { - "epoch": 6.84380704041721, - "grad_norm": 1.4028860330581665, - "learning_rate": 8.73175879396985e-05, - "loss": 5.2894, - "step": 13123 - }, - { - "epoch": 6.8443285528031295, - "grad_norm": 1.4749451875686646, - "learning_rate": 8.731658291457287e-05, - "loss": 6.0361, - "step": 13124 - }, - { - "epoch": 6.844850065189048, - "grad_norm": 1.3465672731399536, - "learning_rate": 8.731557788944725e-05, - "loss": 6.0044, - "step": 13125 - }, - { - "epoch": 6.8453715775749675, - "grad_norm": 1.495519757270813, - "learning_rate": 8.731457286432161e-05, - "loss": 5.6031, - "step": 13126 - }, - { - "epoch": 6.845893089960887, - "grad_norm": 1.4587502479553223, - "learning_rate": 8.731356783919599e-05, - "loss": 5.5831, - "step": 13127 - }, - { - "epoch": 6.8464146023468055, - "grad_norm": 1.5295110940933228, - "learning_rate": 8.731256281407035e-05, - "loss": 5.8104, - "step": 13128 - }, - { - "epoch": 6.846936114732725, - "grad_norm": 1.461858868598938, - "learning_rate": 8.731155778894473e-05, - "loss": 4.9841, - "step": 13129 - }, - { - "epoch": 6.847457627118644, - "grad_norm": 1.4394958019256592, - "learning_rate": 8.73105527638191e-05, - "loss": 5.5966, - "step": 13130 - }, - { - "epoch": 6.847979139504563, - "grad_norm": 1.6307897567749023, - "learning_rate": 8.730954773869347e-05, - "loss": 5.3414, - "step": 13131 - }, - { - "epoch": 6.8485006518904825, - "grad_norm": 1.3828166723251343, - "learning_rate": 8.730854271356784e-05, - "loss": 5.4863, - "step": 13132 - }, - { - "epoch": 6.849022164276402, - "grad_norm": 1.5379228591918945, - "learning_rate": 8.730753768844221e-05, - "loss": 5.478, - "step": 13133 - }, - { - "epoch": 6.849543676662321, - "grad_norm": 1.4205949306488037, - "learning_rate": 8.730653266331659e-05, - "loss": 5.4469, - "step": 13134 - }, - { - "epoch": 6.85006518904824, - "grad_norm": 1.390470266342163, - "learning_rate": 8.730552763819096e-05, - "loss": 5.7008, - "step": 13135 - }, - { - "epoch": 6.850586701434159, - "grad_norm": 1.335784673690796, - "learning_rate": 8.730452261306533e-05, - "loss": 5.9986, - "step": 13136 - }, - { - "epoch": 6.851108213820078, - "grad_norm": 1.372817039489746, - "learning_rate": 8.73035175879397e-05, - "loss": 5.7393, - "step": 13137 - }, - { - "epoch": 6.8516297262059975, - "grad_norm": 1.3501542806625366, - "learning_rate": 8.730251256281408e-05, - "loss": 5.6741, - "step": 13138 - }, - { - "epoch": 6.852151238591917, - "grad_norm": 1.5561789274215698, - "learning_rate": 8.730150753768844e-05, - "loss": 5.5, - "step": 13139 - }, - { - "epoch": 6.852672750977836, - "grad_norm": 1.3708488941192627, - "learning_rate": 8.730050251256282e-05, - "loss": 5.6536, - "step": 13140 - }, - { - "epoch": 6.853194263363755, - "grad_norm": 1.3966197967529297, - "learning_rate": 8.729949748743718e-05, - "loss": 5.3566, - "step": 13141 - }, - { - "epoch": 6.853715775749674, - "grad_norm": 1.4969977140426636, - "learning_rate": 8.729849246231156e-05, - "loss": 5.0373, - "step": 13142 - }, - { - "epoch": 6.854237288135593, - "grad_norm": 1.532609462738037, - "learning_rate": 8.729748743718593e-05, - "loss": 5.8968, - "step": 13143 - }, - { - "epoch": 6.8547588005215125, - "grad_norm": 1.4303885698318481, - "learning_rate": 8.72964824120603e-05, - "loss": 4.8713, - "step": 13144 - }, - { - "epoch": 6.855280312907432, - "grad_norm": 1.4966503381729126, - "learning_rate": 8.729547738693468e-05, - "loss": 5.9034, - "step": 13145 - }, - { - "epoch": 6.855801825293351, - "grad_norm": 1.4323278665542603, - "learning_rate": 8.729447236180906e-05, - "loss": 5.3319, - "step": 13146 - }, - { - "epoch": 6.85632333767927, - "grad_norm": 1.4096976518630981, - "learning_rate": 8.729346733668342e-05, - "loss": 5.8221, - "step": 13147 - }, - { - "epoch": 6.856844850065189, - "grad_norm": 1.512694001197815, - "learning_rate": 8.729246231155779e-05, - "loss": 5.2648, - "step": 13148 - }, - { - "epoch": 6.857366362451108, - "grad_norm": 1.4097522497177124, - "learning_rate": 8.729145728643217e-05, - "loss": 5.6509, - "step": 13149 - }, - { - "epoch": 6.8578878748370276, - "grad_norm": 1.4021409749984741, - "learning_rate": 8.729045226130653e-05, - "loss": 5.8579, - "step": 13150 - }, - { - "epoch": 6.858409387222947, - "grad_norm": 1.6009441614151, - "learning_rate": 8.728944723618091e-05, - "loss": 5.2366, - "step": 13151 - }, - { - "epoch": 6.858930899608866, - "grad_norm": 1.5279228687286377, - "learning_rate": 8.728844221105527e-05, - "loss": 4.8398, - "step": 13152 - }, - { - "epoch": 6.859452411994785, - "grad_norm": 1.3921234607696533, - "learning_rate": 8.728743718592965e-05, - "loss": 5.796, - "step": 13153 - }, - { - "epoch": 6.859973924380704, - "grad_norm": 1.5485622882843018, - "learning_rate": 8.728643216080403e-05, - "loss": 5.3697, - "step": 13154 - }, - { - "epoch": 6.860495436766623, - "grad_norm": 1.4151583909988403, - "learning_rate": 8.72854271356784e-05, - "loss": 5.8837, - "step": 13155 - }, - { - "epoch": 6.861016949152543, - "grad_norm": 1.3956092596054077, - "learning_rate": 8.728442211055277e-05, - "loss": 5.8138, - "step": 13156 - }, - { - "epoch": 6.861538461538462, - "grad_norm": 1.4662173986434937, - "learning_rate": 8.728341708542715e-05, - "loss": 5.3054, - "step": 13157 - }, - { - "epoch": 6.862059973924381, - "grad_norm": 1.5415301322937012, - "learning_rate": 8.728241206030151e-05, - "loss": 4.9936, - "step": 13158 - }, - { - "epoch": 6.8625814863103, - "grad_norm": 1.335253119468689, - "learning_rate": 8.728140703517589e-05, - "loss": 5.7895, - "step": 13159 - }, - { - "epoch": 6.863102998696219, - "grad_norm": 1.4056620597839355, - "learning_rate": 8.728040201005025e-05, - "loss": 5.115, - "step": 13160 - }, - { - "epoch": 6.863624511082138, - "grad_norm": 1.553186058998108, - "learning_rate": 8.727939698492462e-05, - "loss": 4.7464, - "step": 13161 - }, - { - "epoch": 6.864146023468058, - "grad_norm": 1.364774465560913, - "learning_rate": 8.7278391959799e-05, - "loss": 5.6091, - "step": 13162 - }, - { - "epoch": 6.864667535853976, - "grad_norm": 1.4432374238967896, - "learning_rate": 8.727738693467336e-05, - "loss": 5.1616, - "step": 13163 - }, - { - "epoch": 6.865189048239896, - "grad_norm": 1.4214255809783936, - "learning_rate": 8.727638190954774e-05, - "loss": 5.3747, - "step": 13164 - }, - { - "epoch": 6.865710560625815, - "grad_norm": 1.4623116254806519, - "learning_rate": 8.727537688442212e-05, - "loss": 5.699, - "step": 13165 - }, - { - "epoch": 6.866232073011734, - "grad_norm": 1.4343132972717285, - "learning_rate": 8.72743718592965e-05, - "loss": 5.6034, - "step": 13166 - }, - { - "epoch": 6.866753585397653, - "grad_norm": 1.4713164567947388, - "learning_rate": 8.727336683417086e-05, - "loss": 5.3098, - "step": 13167 - }, - { - "epoch": 6.867275097783573, - "grad_norm": 1.3502472639083862, - "learning_rate": 8.727236180904524e-05, - "loss": 5.0736, - "step": 13168 - }, - { - "epoch": 6.867796610169491, - "grad_norm": 1.4484864473342896, - "learning_rate": 8.72713567839196e-05, - "loss": 5.2323, - "step": 13169 - }, - { - "epoch": 6.868318122555411, - "grad_norm": 1.3253417015075684, - "learning_rate": 8.727035175879398e-05, - "loss": 5.9456, - "step": 13170 - }, - { - "epoch": 6.86883963494133, - "grad_norm": 1.4791173934936523, - "learning_rate": 8.726934673366834e-05, - "loss": 5.4112, - "step": 13171 - }, - { - "epoch": 6.869361147327249, - "grad_norm": 1.4555723667144775, - "learning_rate": 8.726834170854272e-05, - "loss": 5.6912, - "step": 13172 - }, - { - "epoch": 6.869882659713168, - "grad_norm": 1.3762667179107666, - "learning_rate": 8.726733668341708e-05, - "loss": 5.7866, - "step": 13173 - }, - { - "epoch": 6.870404172099088, - "grad_norm": 1.5583804845809937, - "learning_rate": 8.726633165829146e-05, - "loss": 5.5142, - "step": 13174 - }, - { - "epoch": 6.870925684485006, - "grad_norm": 1.2752480506896973, - "learning_rate": 8.726532663316584e-05, - "loss": 5.5938, - "step": 13175 - }, - { - "epoch": 6.871447196870926, - "grad_norm": 1.5201846361160278, - "learning_rate": 8.72643216080402e-05, - "loss": 5.1749, - "step": 13176 - }, - { - "epoch": 6.871968709256845, - "grad_norm": 1.4283307790756226, - "learning_rate": 8.726331658291458e-05, - "loss": 5.5305, - "step": 13177 - }, - { - "epoch": 6.872490221642764, - "grad_norm": 1.3894824981689453, - "learning_rate": 8.726231155778895e-05, - "loss": 5.2629, - "step": 13178 - }, - { - "epoch": 6.873011734028683, - "grad_norm": 1.442294716835022, - "learning_rate": 8.726130653266332e-05, - "loss": 5.4619, - "step": 13179 - }, - { - "epoch": 6.873533246414603, - "grad_norm": 1.4326878786087036, - "learning_rate": 8.726030150753769e-05, - "loss": 5.865, - "step": 13180 - }, - { - "epoch": 6.874054758800521, - "grad_norm": 1.3078805208206177, - "learning_rate": 8.725929648241207e-05, - "loss": 6.0233, - "step": 13181 - }, - { - "epoch": 6.874576271186441, - "grad_norm": 1.4145395755767822, - "learning_rate": 8.725829145728643e-05, - "loss": 5.3984, - "step": 13182 - }, - { - "epoch": 6.87509778357236, - "grad_norm": 1.3141976594924927, - "learning_rate": 8.725728643216081e-05, - "loss": 5.8508, - "step": 13183 - }, - { - "epoch": 6.875619295958279, - "grad_norm": 1.3464046716690063, - "learning_rate": 8.725628140703517e-05, - "loss": 5.7132, - "step": 13184 - }, - { - "epoch": 6.876140808344198, - "grad_norm": 1.4892749786376953, - "learning_rate": 8.725527638190955e-05, - "loss": 5.4607, - "step": 13185 - }, - { - "epoch": 6.876662320730118, - "grad_norm": 1.3456188440322876, - "learning_rate": 8.725427135678393e-05, - "loss": 5.9198, - "step": 13186 - }, - { - "epoch": 6.877183833116036, - "grad_norm": 1.6661677360534668, - "learning_rate": 8.72532663316583e-05, - "loss": 5.585, - "step": 13187 - }, - { - "epoch": 6.877705345501956, - "grad_norm": 1.4901773929595947, - "learning_rate": 8.725226130653267e-05, - "loss": 5.5624, - "step": 13188 - }, - { - "epoch": 6.878226857887875, - "grad_norm": 1.4497349262237549, - "learning_rate": 8.725125628140703e-05, - "loss": 5.5006, - "step": 13189 - }, - { - "epoch": 6.878748370273794, - "grad_norm": 1.4228477478027344, - "learning_rate": 8.725025125628141e-05, - "loss": 5.9949, - "step": 13190 - }, - { - "epoch": 6.879269882659713, - "grad_norm": 1.5094822645187378, - "learning_rate": 8.724924623115578e-05, - "loss": 5.7937, - "step": 13191 - }, - { - "epoch": 6.879791395045633, - "grad_norm": 1.6918152570724487, - "learning_rate": 8.724824120603015e-05, - "loss": 5.4494, - "step": 13192 - }, - { - "epoch": 6.880312907431551, - "grad_norm": 1.3921840190887451, - "learning_rate": 8.724723618090452e-05, - "loss": 5.6562, - "step": 13193 - }, - { - "epoch": 6.880834419817471, - "grad_norm": 1.324840784072876, - "learning_rate": 8.72462311557789e-05, - "loss": 5.8676, - "step": 13194 - }, - { - "epoch": 6.88135593220339, - "grad_norm": 1.4437055587768555, - "learning_rate": 8.724522613065327e-05, - "loss": 5.2625, - "step": 13195 - }, - { - "epoch": 6.881877444589309, - "grad_norm": 1.4696484804153442, - "learning_rate": 8.724422110552765e-05, - "loss": 5.8266, - "step": 13196 - }, - { - "epoch": 6.882398956975228, - "grad_norm": 1.4741723537445068, - "learning_rate": 8.724321608040202e-05, - "loss": 5.7074, - "step": 13197 - }, - { - "epoch": 6.882920469361148, - "grad_norm": 1.3416105508804321, - "learning_rate": 8.72422110552764e-05, - "loss": 5.9414, - "step": 13198 - }, - { - "epoch": 6.883441981747066, - "grad_norm": 1.3794000148773193, - "learning_rate": 8.724120603015076e-05, - "loss": 5.491, - "step": 13199 - }, - { - "epoch": 6.883963494132986, - "grad_norm": 1.409883737564087, - "learning_rate": 8.724020100502514e-05, - "loss": 5.8139, - "step": 13200 - }, - { - "epoch": 6.884485006518905, - "grad_norm": 1.5332021713256836, - "learning_rate": 8.72391959798995e-05, - "loss": 5.5962, - "step": 13201 - }, - { - "epoch": 6.885006518904824, - "grad_norm": 1.505989670753479, - "learning_rate": 8.723819095477386e-05, - "loss": 5.6001, - "step": 13202 - }, - { - "epoch": 6.885528031290743, - "grad_norm": 1.3647571802139282, - "learning_rate": 8.723718592964824e-05, - "loss": 5.6893, - "step": 13203 - }, - { - "epoch": 6.886049543676663, - "grad_norm": 1.4559099674224854, - "learning_rate": 8.723618090452261e-05, - "loss": 5.4508, - "step": 13204 - }, - { - "epoch": 6.886571056062581, - "grad_norm": 1.4732754230499268, - "learning_rate": 8.723517587939698e-05, - "loss": 5.5885, - "step": 13205 - }, - { - "epoch": 6.887092568448501, - "grad_norm": 1.3421133756637573, - "learning_rate": 8.723417085427136e-05, - "loss": 5.44, - "step": 13206 - }, - { - "epoch": 6.88761408083442, - "grad_norm": 1.4015789031982422, - "learning_rate": 8.723316582914574e-05, - "loss": 5.8009, - "step": 13207 - }, - { - "epoch": 6.888135593220339, - "grad_norm": 1.5020679235458374, - "learning_rate": 8.72321608040201e-05, - "loss": 5.4555, - "step": 13208 - }, - { - "epoch": 6.888657105606258, - "grad_norm": 1.4048621654510498, - "learning_rate": 8.723115577889448e-05, - "loss": 6.0182, - "step": 13209 - }, - { - "epoch": 6.889178617992178, - "grad_norm": 1.480368733406067, - "learning_rate": 8.723015075376885e-05, - "loss": 5.6949, - "step": 13210 - }, - { - "epoch": 6.889700130378096, - "grad_norm": 1.453173279762268, - "learning_rate": 8.722914572864322e-05, - "loss": 5.549, - "step": 13211 - }, - { - "epoch": 6.890221642764016, - "grad_norm": 1.3605462312698364, - "learning_rate": 8.722814070351759e-05, - "loss": 5.7951, - "step": 13212 - }, - { - "epoch": 6.890743155149935, - "grad_norm": 1.3382529020309448, - "learning_rate": 8.722713567839197e-05, - "loss": 6.1252, - "step": 13213 - }, - { - "epoch": 6.891264667535854, - "grad_norm": 1.4121787548065186, - "learning_rate": 8.722613065326633e-05, - "loss": 5.169, - "step": 13214 - }, - { - "epoch": 6.891786179921773, - "grad_norm": 1.4558900594711304, - "learning_rate": 8.722512562814071e-05, - "loss": 5.5668, - "step": 13215 - }, - { - "epoch": 6.892307692307693, - "grad_norm": 1.3969424962997437, - "learning_rate": 8.722412060301509e-05, - "loss": 5.9214, - "step": 13216 - }, - { - "epoch": 6.892829204693611, - "grad_norm": 1.4808926582336426, - "learning_rate": 8.722311557788945e-05, - "loss": 5.6017, - "step": 13217 - }, - { - "epoch": 6.893350717079531, - "grad_norm": 1.392142415046692, - "learning_rate": 8.722211055276383e-05, - "loss": 5.5681, - "step": 13218 - }, - { - "epoch": 6.893872229465449, - "grad_norm": 1.3810856342315674, - "learning_rate": 8.722110552763819e-05, - "loss": 5.2584, - "step": 13219 - }, - { - "epoch": 6.894393741851369, - "grad_norm": 1.4694420099258423, - "learning_rate": 8.722010050251257e-05, - "loss": 5.6668, - "step": 13220 - }, - { - "epoch": 6.894915254237288, - "grad_norm": 1.3267583847045898, - "learning_rate": 8.721909547738694e-05, - "loss": 5.6424, - "step": 13221 - }, - { - "epoch": 6.895436766623208, - "grad_norm": 1.3700863122940063, - "learning_rate": 8.721809045226131e-05, - "loss": 6.0408, - "step": 13222 - }, - { - "epoch": 6.895958279009126, - "grad_norm": 1.3975127935409546, - "learning_rate": 8.721708542713568e-05, - "loss": 5.7675, - "step": 13223 - }, - { - "epoch": 6.896479791395046, - "grad_norm": 1.8679968118667603, - "learning_rate": 8.721608040201005e-05, - "loss": 4.4971, - "step": 13224 - }, - { - "epoch": 6.897001303780964, - "grad_norm": 1.4413628578186035, - "learning_rate": 8.721507537688442e-05, - "loss": 5.3969, - "step": 13225 - }, - { - "epoch": 6.897522816166884, - "grad_norm": 1.5381438732147217, - "learning_rate": 8.72140703517588e-05, - "loss": 5.8236, - "step": 13226 - }, - { - "epoch": 6.898044328552803, - "grad_norm": 1.4670847654342651, - "learning_rate": 8.721306532663317e-05, - "loss": 5.6581, - "step": 13227 - }, - { - "epoch": 6.898565840938723, - "grad_norm": 1.5865319967269897, - "learning_rate": 8.721206030150754e-05, - "loss": 5.2943, - "step": 13228 - }, - { - "epoch": 6.899087353324641, - "grad_norm": 1.394509196281433, - "learning_rate": 8.721105527638192e-05, - "loss": 5.9158, - "step": 13229 - }, - { - "epoch": 6.899608865710561, - "grad_norm": 1.3617596626281738, - "learning_rate": 8.721005025125628e-05, - "loss": 5.9961, - "step": 13230 - }, - { - "epoch": 6.900130378096479, - "grad_norm": 1.4174559116363525, - "learning_rate": 8.720904522613066e-05, - "loss": 5.5684, - "step": 13231 - }, - { - "epoch": 6.900651890482399, - "grad_norm": 1.3633148670196533, - "learning_rate": 8.720804020100502e-05, - "loss": 5.644, - "step": 13232 - }, - { - "epoch": 6.901173402868318, - "grad_norm": 1.4199458360671997, - "learning_rate": 8.72070351758794e-05, - "loss": 5.7105, - "step": 13233 - }, - { - "epoch": 6.901694915254238, - "grad_norm": 1.4530327320098877, - "learning_rate": 8.720603015075377e-05, - "loss": 5.4772, - "step": 13234 - }, - { - "epoch": 6.902216427640156, - "grad_norm": 1.3954952955245972, - "learning_rate": 8.720502512562814e-05, - "loss": 5.5851, - "step": 13235 - }, - { - "epoch": 6.902737940026076, - "grad_norm": 1.3727400302886963, - "learning_rate": 8.720402010050252e-05, - "loss": 5.9219, - "step": 13236 - }, - { - "epoch": 6.903259452411994, - "grad_norm": 1.3902932405471802, - "learning_rate": 8.72030150753769e-05, - "loss": 5.7362, - "step": 13237 - }, - { - "epoch": 6.903780964797914, - "grad_norm": 1.447379231452942, - "learning_rate": 8.720201005025126e-05, - "loss": 5.4472, - "step": 13238 - }, - { - "epoch": 6.904302477183833, - "grad_norm": 1.3186500072479248, - "learning_rate": 8.720100502512564e-05, - "loss": 5.7276, - "step": 13239 - }, - { - "epoch": 6.904823989569753, - "grad_norm": 1.385095477104187, - "learning_rate": 8.72e-05, - "loss": 5.3252, - "step": 13240 - }, - { - "epoch": 6.905345501955671, - "grad_norm": 1.3445531129837036, - "learning_rate": 8.719899497487437e-05, - "loss": 5.443, - "step": 13241 - }, - { - "epoch": 6.905867014341591, - "grad_norm": 1.3237533569335938, - "learning_rate": 8.719798994974875e-05, - "loss": 5.8137, - "step": 13242 - }, - { - "epoch": 6.906388526727509, - "grad_norm": 1.2612031698226929, - "learning_rate": 8.719698492462311e-05, - "loss": 5.8713, - "step": 13243 - }, - { - "epoch": 6.906910039113429, - "grad_norm": 1.385340690612793, - "learning_rate": 8.719597989949749e-05, - "loss": 5.498, - "step": 13244 - }, - { - "epoch": 6.907431551499348, - "grad_norm": 1.701211929321289, - "learning_rate": 8.719497487437185e-05, - "loss": 5.5625, - "step": 13245 - }, - { - "epoch": 6.907953063885268, - "grad_norm": 1.4198564291000366, - "learning_rate": 8.719396984924623e-05, - "loss": 5.8423, - "step": 13246 - }, - { - "epoch": 6.908474576271186, - "grad_norm": 1.3155579566955566, - "learning_rate": 8.719296482412061e-05, - "loss": 5.8663, - "step": 13247 - }, - { - "epoch": 6.908996088657106, - "grad_norm": 1.3304059505462646, - "learning_rate": 8.719195979899499e-05, - "loss": 5.7987, - "step": 13248 - }, - { - "epoch": 6.909517601043024, - "grad_norm": 1.4307845830917358, - "learning_rate": 8.719095477386935e-05, - "loss": 5.7693, - "step": 13249 - }, - { - "epoch": 6.910039113428944, - "grad_norm": 1.343855619430542, - "learning_rate": 8.718994974874373e-05, - "loss": 5.8502, - "step": 13250 - }, - { - "epoch": 6.910560625814863, - "grad_norm": 1.706536889076233, - "learning_rate": 8.71889447236181e-05, - "loss": 5.3318, - "step": 13251 - }, - { - "epoch": 6.911082138200783, - "grad_norm": 1.364210844039917, - "learning_rate": 8.718793969849247e-05, - "loss": 5.7844, - "step": 13252 - }, - { - "epoch": 6.911603650586701, - "grad_norm": 1.4805934429168701, - "learning_rate": 8.718693467336684e-05, - "loss": 5.7988, - "step": 13253 - }, - { - "epoch": 6.912125162972621, - "grad_norm": 1.3838690519332886, - "learning_rate": 8.71859296482412e-05, - "loss": 5.6777, - "step": 13254 - }, - { - "epoch": 6.912646675358539, - "grad_norm": 1.459098219871521, - "learning_rate": 8.718492462311558e-05, - "loss": 5.3691, - "step": 13255 - }, - { - "epoch": 6.913168187744459, - "grad_norm": 1.3118094205856323, - "learning_rate": 8.718391959798996e-05, - "loss": 5.8146, - "step": 13256 - }, - { - "epoch": 6.913689700130378, - "grad_norm": 1.4691168069839478, - "learning_rate": 8.718291457286433e-05, - "loss": 5.5871, - "step": 13257 - }, - { - "epoch": 6.914211212516297, - "grad_norm": 1.2898691892623901, - "learning_rate": 8.71819095477387e-05, - "loss": 5.6116, - "step": 13258 - }, - { - "epoch": 6.914732724902216, - "grad_norm": 1.5554901361465454, - "learning_rate": 8.718090452261308e-05, - "loss": 5.5725, - "step": 13259 - }, - { - "epoch": 6.915254237288136, - "grad_norm": 1.4737842082977295, - "learning_rate": 8.717989949748744e-05, - "loss": 5.7095, - "step": 13260 - }, - { - "epoch": 6.915775749674054, - "grad_norm": 1.4652020931243896, - "learning_rate": 8.717889447236182e-05, - "loss": 5.3408, - "step": 13261 - }, - { - "epoch": 6.916297262059974, - "grad_norm": 1.3736578226089478, - "learning_rate": 8.717788944723618e-05, - "loss": 5.7088, - "step": 13262 - }, - { - "epoch": 6.916818774445893, - "grad_norm": 1.4180052280426025, - "learning_rate": 8.717688442211056e-05, - "loss": 5.5649, - "step": 13263 - }, - { - "epoch": 6.917340286831812, - "grad_norm": 1.4963427782058716, - "learning_rate": 8.717587939698492e-05, - "loss": 5.4054, - "step": 13264 - }, - { - "epoch": 6.917861799217731, - "grad_norm": 1.4722579717636108, - "learning_rate": 8.71748743718593e-05, - "loss": 5.5226, - "step": 13265 - }, - { - "epoch": 6.918383311603651, - "grad_norm": 1.46590256690979, - "learning_rate": 8.717386934673367e-05, - "loss": 5.2344, - "step": 13266 - }, - { - "epoch": 6.918904823989569, - "grad_norm": 1.4531309604644775, - "learning_rate": 8.717286432160804e-05, - "loss": 5.4399, - "step": 13267 - }, - { - "epoch": 6.919426336375489, - "grad_norm": 1.354246735572815, - "learning_rate": 8.717185929648242e-05, - "loss": 5.66, - "step": 13268 - }, - { - "epoch": 6.919947848761408, - "grad_norm": 1.443540334701538, - "learning_rate": 8.717085427135679e-05, - "loss": 5.4641, - "step": 13269 - }, - { - "epoch": 6.920469361147327, - "grad_norm": 1.4912827014923096, - "learning_rate": 8.716984924623116e-05, - "loss": 5.8973, - "step": 13270 - }, - { - "epoch": 6.920990873533246, - "grad_norm": 1.3750600814819336, - "learning_rate": 8.716884422110553e-05, - "loss": 5.5577, - "step": 13271 - }, - { - "epoch": 6.921512385919166, - "grad_norm": 1.5366754531860352, - "learning_rate": 8.71678391959799e-05, - "loss": 5.0216, - "step": 13272 - }, - { - "epoch": 6.922033898305084, - "grad_norm": 1.6127934455871582, - "learning_rate": 8.716683417085427e-05, - "loss": 5.4429, - "step": 13273 - }, - { - "epoch": 6.922555410691004, - "grad_norm": 1.4167646169662476, - "learning_rate": 8.716582914572865e-05, - "loss": 5.8597, - "step": 13274 - }, - { - "epoch": 6.923076923076923, - "grad_norm": 1.4411351680755615, - "learning_rate": 8.716482412060301e-05, - "loss": 5.5687, - "step": 13275 - }, - { - "epoch": 6.923598435462842, - "grad_norm": 1.4399065971374512, - "learning_rate": 8.716381909547739e-05, - "loss": 5.8471, - "step": 13276 - }, - { - "epoch": 6.924119947848761, - "grad_norm": 1.3144664764404297, - "learning_rate": 8.716281407035177e-05, - "loss": 5.6316, - "step": 13277 - }, - { - "epoch": 6.924641460234681, - "grad_norm": 1.5150513648986816, - "learning_rate": 8.716180904522615e-05, - "loss": 5.3082, - "step": 13278 - }, - { - "epoch": 6.925162972620599, - "grad_norm": 1.4315844774246216, - "learning_rate": 8.716080402010051e-05, - "loss": 5.8042, - "step": 13279 - }, - { - "epoch": 6.925684485006519, - "grad_norm": 1.4487781524658203, - "learning_rate": 8.715979899497489e-05, - "loss": 5.6111, - "step": 13280 - }, - { - "epoch": 6.926205997392438, - "grad_norm": 1.4278978109359741, - "learning_rate": 8.715879396984925e-05, - "loss": 5.785, - "step": 13281 - }, - { - "epoch": 6.926727509778357, - "grad_norm": 1.4267510175704956, - "learning_rate": 8.715778894472362e-05, - "loss": 5.5838, - "step": 13282 - }, - { - "epoch": 6.927249022164276, - "grad_norm": 1.3004896640777588, - "learning_rate": 8.7156783919598e-05, - "loss": 5.8541, - "step": 13283 - }, - { - "epoch": 6.927770534550196, - "grad_norm": 1.5250394344329834, - "learning_rate": 8.715577889447236e-05, - "loss": 5.7226, - "step": 13284 - }, - { - "epoch": 6.9282920469361144, - "grad_norm": 1.5303797721862793, - "learning_rate": 8.715477386934674e-05, - "loss": 5.9732, - "step": 13285 - }, - { - "epoch": 6.928813559322034, - "grad_norm": 1.4950717687606812, - "learning_rate": 8.71537688442211e-05, - "loss": 4.8119, - "step": 13286 - }, - { - "epoch": 6.929335071707953, - "grad_norm": 1.4639976024627686, - "learning_rate": 8.715276381909548e-05, - "loss": 5.6793, - "step": 13287 - }, - { - "epoch": 6.929856584093872, - "grad_norm": 1.38613760471344, - "learning_rate": 8.715175879396986e-05, - "loss": 5.5166, - "step": 13288 - }, - { - "epoch": 6.930378096479791, - "grad_norm": 1.4668622016906738, - "learning_rate": 8.715075376884423e-05, - "loss": 5.8096, - "step": 13289 - }, - { - "epoch": 6.930899608865711, - "grad_norm": 1.3955070972442627, - "learning_rate": 8.71497487437186e-05, - "loss": 5.7188, - "step": 13290 - }, - { - "epoch": 6.9314211212516295, - "grad_norm": 1.3983622789382935, - "learning_rate": 8.714874371859298e-05, - "loss": 5.4946, - "step": 13291 - }, - { - "epoch": 6.931942633637549, - "grad_norm": 1.6636418104171753, - "learning_rate": 8.714773869346734e-05, - "loss": 5.3901, - "step": 13292 - }, - { - "epoch": 6.932464146023468, - "grad_norm": 1.5902067422866821, - "learning_rate": 8.714673366834172e-05, - "loss": 5.5319, - "step": 13293 - }, - { - "epoch": 6.932985658409387, - "grad_norm": 1.3940727710723877, - "learning_rate": 8.714572864321608e-05, - "loss": 5.6151, - "step": 13294 - }, - { - "epoch": 6.933507170795306, - "grad_norm": 1.5068354606628418, - "learning_rate": 8.714472361809045e-05, - "loss": 5.3728, - "step": 13295 - }, - { - "epoch": 6.934028683181226, - "grad_norm": 1.6044501066207886, - "learning_rate": 8.714371859296482e-05, - "loss": 5.5412, - "step": 13296 - }, - { - "epoch": 6.9345501955671445, - "grad_norm": 1.5169472694396973, - "learning_rate": 8.714271356783919e-05, - "loss": 5.1521, - "step": 13297 - }, - { - "epoch": 6.935071707953064, - "grad_norm": 1.4585224390029907, - "learning_rate": 8.714170854271357e-05, - "loss": 5.432, - "step": 13298 - }, - { - "epoch": 6.935593220338983, - "grad_norm": 1.4531594514846802, - "learning_rate": 8.714070351758794e-05, - "loss": 5.5702, - "step": 13299 - }, - { - "epoch": 6.936114732724902, - "grad_norm": 1.396121621131897, - "learning_rate": 8.713969849246232e-05, - "loss": 5.7547, - "step": 13300 - }, - { - "epoch": 6.936636245110821, - "grad_norm": 1.4604768753051758, - "learning_rate": 8.713869346733669e-05, - "loss": 5.4952, - "step": 13301 - }, - { - "epoch": 6.937157757496741, - "grad_norm": 1.4175890684127808, - "learning_rate": 8.713768844221106e-05, - "loss": 5.6259, - "step": 13302 - }, - { - "epoch": 6.9376792698826595, - "grad_norm": 1.452567219734192, - "learning_rate": 8.713668341708543e-05, - "loss": 5.5131, - "step": 13303 - }, - { - "epoch": 6.938200782268579, - "grad_norm": 1.3835870027542114, - "learning_rate": 8.713567839195981e-05, - "loss": 5.7012, - "step": 13304 - }, - { - "epoch": 6.938722294654498, - "grad_norm": 1.4527009725570679, - "learning_rate": 8.713467336683417e-05, - "loss": 5.474, - "step": 13305 - }, - { - "epoch": 6.939243807040417, - "grad_norm": 1.482246994972229, - "learning_rate": 8.713366834170855e-05, - "loss": 5.391, - "step": 13306 - }, - { - "epoch": 6.9397653194263365, - "grad_norm": 1.6015952825546265, - "learning_rate": 8.713266331658291e-05, - "loss": 4.9857, - "step": 13307 - }, - { - "epoch": 6.940286831812256, - "grad_norm": 1.4695680141448975, - "learning_rate": 8.713165829145729e-05, - "loss": 5.6018, - "step": 13308 - }, - { - "epoch": 6.9408083441981745, - "grad_norm": 1.4726529121398926, - "learning_rate": 8.713065326633167e-05, - "loss": 5.0986, - "step": 13309 - }, - { - "epoch": 6.941329856584094, - "grad_norm": 1.4324073791503906, - "learning_rate": 8.712964824120603e-05, - "loss": 5.3927, - "step": 13310 - }, - { - "epoch": 6.941851368970013, - "grad_norm": 1.4207093715667725, - "learning_rate": 8.712864321608041e-05, - "loss": 5.8956, - "step": 13311 - }, - { - "epoch": 6.942372881355932, - "grad_norm": 1.4475767612457275, - "learning_rate": 8.712763819095478e-05, - "loss": 5.5813, - "step": 13312 - }, - { - "epoch": 6.9428943937418515, - "grad_norm": 1.5410809516906738, - "learning_rate": 8.712663316582915e-05, - "loss": 5.635, - "step": 13313 - }, - { - "epoch": 6.94341590612777, - "grad_norm": 1.4451422691345215, - "learning_rate": 8.712562814070352e-05, - "loss": 5.8924, - "step": 13314 - }, - { - "epoch": 6.9439374185136895, - "grad_norm": 1.508739948272705, - "learning_rate": 8.71246231155779e-05, - "loss": 5.7591, - "step": 13315 - }, - { - "epoch": 6.944458930899609, - "grad_norm": 1.3732361793518066, - "learning_rate": 8.712361809045226e-05, - "loss": 5.6796, - "step": 13316 - }, - { - "epoch": 6.944980443285528, - "grad_norm": 1.5606842041015625, - "learning_rate": 8.712261306532664e-05, - "loss": 5.3723, - "step": 13317 - }, - { - "epoch": 6.945501955671447, - "grad_norm": 1.4442291259765625, - "learning_rate": 8.7121608040201e-05, - "loss": 5.4069, - "step": 13318 - }, - { - "epoch": 6.9460234680573665, - "grad_norm": 1.581460952758789, - "learning_rate": 8.712060301507538e-05, - "loss": 5.2218, - "step": 13319 - }, - { - "epoch": 6.946544980443285, - "grad_norm": 1.3619707822799683, - "learning_rate": 8.711959798994976e-05, - "loss": 5.7508, - "step": 13320 - }, - { - "epoch": 6.9470664928292045, - "grad_norm": 1.5278903245925903, - "learning_rate": 8.711859296482412e-05, - "loss": 5.8552, - "step": 13321 - }, - { - "epoch": 6.947588005215124, - "grad_norm": 1.3526830673217773, - "learning_rate": 8.71175879396985e-05, - "loss": 5.7088, - "step": 13322 - }, - { - "epoch": 6.9481095176010434, - "grad_norm": 1.4586069583892822, - "learning_rate": 8.711658291457286e-05, - "loss": 5.0016, - "step": 13323 - }, - { - "epoch": 6.948631029986962, - "grad_norm": 1.407771110534668, - "learning_rate": 8.711557788944724e-05, - "loss": 5.5982, - "step": 13324 - }, - { - "epoch": 6.9491525423728815, - "grad_norm": 1.3836181163787842, - "learning_rate": 8.71145728643216e-05, - "loss": 5.618, - "step": 13325 - }, - { - "epoch": 6.9496740547588, - "grad_norm": 1.3673350811004639, - "learning_rate": 8.711356783919598e-05, - "loss": 5.7113, - "step": 13326 - }, - { - "epoch": 6.9501955671447195, - "grad_norm": 1.4720780849456787, - "learning_rate": 8.711256281407035e-05, - "loss": 5.5075, - "step": 13327 - }, - { - "epoch": 6.950717079530639, - "grad_norm": 1.395456075668335, - "learning_rate": 8.711155778894473e-05, - "loss": 5.7791, - "step": 13328 - }, - { - "epoch": 6.9512385919165585, - "grad_norm": 1.4478695392608643, - "learning_rate": 8.71105527638191e-05, - "loss": 5.8729, - "step": 13329 - }, - { - "epoch": 6.951760104302477, - "grad_norm": 1.3494130373001099, - "learning_rate": 8.710954773869348e-05, - "loss": 5.9408, - "step": 13330 - }, - { - "epoch": 6.9522816166883965, - "grad_norm": 1.450488567352295, - "learning_rate": 8.710854271356785e-05, - "loss": 5.2868, - "step": 13331 - }, - { - "epoch": 6.952803129074315, - "grad_norm": 1.5069586038589478, - "learning_rate": 8.710753768844222e-05, - "loss": 5.4187, - "step": 13332 - }, - { - "epoch": 6.9533246414602345, - "grad_norm": 1.3240537643432617, - "learning_rate": 8.710653266331659e-05, - "loss": 5.4792, - "step": 13333 - }, - { - "epoch": 6.953846153846154, - "grad_norm": 1.6023038625717163, - "learning_rate": 8.710552763819095e-05, - "loss": 5.6603, - "step": 13334 - }, - { - "epoch": 6.9543676662320735, - "grad_norm": 1.555156946182251, - "learning_rate": 8.710452261306533e-05, - "loss": 5.9118, - "step": 13335 - }, - { - "epoch": 6.954889178617992, - "grad_norm": 1.3293298482894897, - "learning_rate": 8.71035175879397e-05, - "loss": 5.9274, - "step": 13336 - }, - { - "epoch": 6.9554106910039115, - "grad_norm": 1.4671381711959839, - "learning_rate": 8.710251256281407e-05, - "loss": 5.5823, - "step": 13337 - }, - { - "epoch": 6.95593220338983, - "grad_norm": 1.4281806945800781, - "learning_rate": 8.710150753768844e-05, - "loss": 5.6768, - "step": 13338 - }, - { - "epoch": 6.95645371577575, - "grad_norm": 1.4684056043624878, - "learning_rate": 8.710050251256281e-05, - "loss": 5.787, - "step": 13339 - }, - { - "epoch": 6.956975228161669, - "grad_norm": 1.3198171854019165, - "learning_rate": 8.709949748743719e-05, - "loss": 5.4559, - "step": 13340 - }, - { - "epoch": 6.9574967405475885, - "grad_norm": 1.5199273824691772, - "learning_rate": 8.709849246231157e-05, - "loss": 5.5438, - "step": 13341 - }, - { - "epoch": 6.958018252933507, - "grad_norm": 1.3330984115600586, - "learning_rate": 8.709748743718593e-05, - "loss": 6.1115, - "step": 13342 - }, - { - "epoch": 6.9585397653194265, - "grad_norm": 1.3391082286834717, - "learning_rate": 8.709648241206031e-05, - "loss": 5.5991, - "step": 13343 - }, - { - "epoch": 6.959061277705345, - "grad_norm": 1.4201982021331787, - "learning_rate": 8.709547738693468e-05, - "loss": 5.8293, - "step": 13344 - }, - { - "epoch": 6.959582790091265, - "grad_norm": 1.4060670137405396, - "learning_rate": 8.709447236180905e-05, - "loss": 5.2834, - "step": 13345 - }, - { - "epoch": 6.960104302477184, - "grad_norm": 1.4142917394638062, - "learning_rate": 8.709346733668342e-05, - "loss": 5.8882, - "step": 13346 - }, - { - "epoch": 6.960625814863103, - "grad_norm": 1.42680025100708, - "learning_rate": 8.709246231155778e-05, - "loss": 5.602, - "step": 13347 - }, - { - "epoch": 6.961147327249022, - "grad_norm": 1.4544587135314941, - "learning_rate": 8.709145728643216e-05, - "loss": 5.63, - "step": 13348 - }, - { - "epoch": 6.9616688396349415, - "grad_norm": 1.4432710409164429, - "learning_rate": 8.709045226130654e-05, - "loss": 5.746, - "step": 13349 - }, - { - "epoch": 6.96219035202086, - "grad_norm": 1.4538097381591797, - "learning_rate": 8.708944723618092e-05, - "loss": 5.5986, - "step": 13350 - }, - { - "epoch": 6.96271186440678, - "grad_norm": 1.4957667589187622, - "learning_rate": 8.708844221105528e-05, - "loss": 5.4376, - "step": 13351 - }, - { - "epoch": 6.963233376792699, - "grad_norm": 1.4634274244308472, - "learning_rate": 8.708743718592966e-05, - "loss": 5.6969, - "step": 13352 - }, - { - "epoch": 6.963754889178618, - "grad_norm": 1.4709599018096924, - "learning_rate": 8.708643216080402e-05, - "loss": 5.7251, - "step": 13353 - }, - { - "epoch": 6.964276401564537, - "grad_norm": 1.4052854776382446, - "learning_rate": 8.70854271356784e-05, - "loss": 5.4901, - "step": 13354 - }, - { - "epoch": 6.9647979139504566, - "grad_norm": 1.3707952499389648, - "learning_rate": 8.708442211055276e-05, - "loss": 5.7167, - "step": 13355 - }, - { - "epoch": 6.965319426336375, - "grad_norm": 1.4914451837539673, - "learning_rate": 8.708341708542714e-05, - "loss": 5.1317, - "step": 13356 - }, - { - "epoch": 6.965840938722295, - "grad_norm": 1.4216047525405884, - "learning_rate": 8.70824120603015e-05, - "loss": 5.161, - "step": 13357 - }, - { - "epoch": 6.966362451108214, - "grad_norm": 1.4134098291397095, - "learning_rate": 8.708140703517588e-05, - "loss": 5.1967, - "step": 13358 - }, - { - "epoch": 6.966883963494133, - "grad_norm": 1.4451937675476074, - "learning_rate": 8.708040201005025e-05, - "loss": 5.5869, - "step": 13359 - }, - { - "epoch": 6.967405475880052, - "grad_norm": 1.3835577964782715, - "learning_rate": 8.707939698492463e-05, - "loss": 5.7478, - "step": 13360 - }, - { - "epoch": 6.967926988265972, - "grad_norm": 1.659940481185913, - "learning_rate": 8.7078391959799e-05, - "loss": 5.3998, - "step": 13361 - }, - { - "epoch": 6.96844850065189, - "grad_norm": 1.451550006866455, - "learning_rate": 8.707738693467337e-05, - "loss": 5.1264, - "step": 13362 - }, - { - "epoch": 6.96897001303781, - "grad_norm": 1.3033826351165771, - "learning_rate": 8.707638190954775e-05, - "loss": 5.9982, - "step": 13363 - }, - { - "epoch": 6.969491525423729, - "grad_norm": 1.4470417499542236, - "learning_rate": 8.707537688442211e-05, - "loss": 5.9449, - "step": 13364 - }, - { - "epoch": 6.970013037809648, - "grad_norm": 1.505178689956665, - "learning_rate": 8.707437185929649e-05, - "loss": 5.3828, - "step": 13365 - }, - { - "epoch": 6.970534550195567, - "grad_norm": 1.4007105827331543, - "learning_rate": 8.707336683417085e-05, - "loss": 5.0486, - "step": 13366 - }, - { - "epoch": 6.971056062581487, - "grad_norm": 1.3721015453338623, - "learning_rate": 8.707236180904523e-05, - "loss": 5.843, - "step": 13367 - }, - { - "epoch": 6.971577574967405, - "grad_norm": 1.439216136932373, - "learning_rate": 8.70713567839196e-05, - "loss": 5.4816, - "step": 13368 - }, - { - "epoch": 6.972099087353325, - "grad_norm": 1.418509840965271, - "learning_rate": 8.707035175879397e-05, - "loss": 5.6509, - "step": 13369 - }, - { - "epoch": 6.972620599739244, - "grad_norm": 1.4475274085998535, - "learning_rate": 8.706934673366835e-05, - "loss": 5.8797, - "step": 13370 - }, - { - "epoch": 6.973142112125163, - "grad_norm": 1.4043692350387573, - "learning_rate": 8.706834170854273e-05, - "loss": 5.8034, - "step": 13371 - }, - { - "epoch": 6.973663624511082, - "grad_norm": 1.3142307996749878, - "learning_rate": 8.706733668341709e-05, - "loss": 5.5609, - "step": 13372 - }, - { - "epoch": 6.974185136897002, - "grad_norm": 1.4457515478134155, - "learning_rate": 8.706633165829147e-05, - "loss": 5.459, - "step": 13373 - }, - { - "epoch": 6.97470664928292, - "grad_norm": 1.3880631923675537, - "learning_rate": 8.706532663316583e-05, - "loss": 5.5104, - "step": 13374 - }, - { - "epoch": 6.97522816166884, - "grad_norm": 1.508703589439392, - "learning_rate": 8.70643216080402e-05, - "loss": 5.5321, - "step": 13375 - }, - { - "epoch": 6.975749674054759, - "grad_norm": 1.346994161605835, - "learning_rate": 8.706331658291458e-05, - "loss": 5.8971, - "step": 13376 - }, - { - "epoch": 6.976271186440678, - "grad_norm": 1.3925755023956299, - "learning_rate": 8.706231155778894e-05, - "loss": 5.4844, - "step": 13377 - }, - { - "epoch": 6.976792698826597, - "grad_norm": 1.409274697303772, - "learning_rate": 8.706130653266332e-05, - "loss": 5.3255, - "step": 13378 - }, - { - "epoch": 6.977314211212517, - "grad_norm": 1.4572656154632568, - "learning_rate": 8.706030150753768e-05, - "loss": 5.5552, - "step": 13379 - }, - { - "epoch": 6.977835723598435, - "grad_norm": 1.3959450721740723, - "learning_rate": 8.705929648241206e-05, - "loss": 5.5771, - "step": 13380 - }, - { - "epoch": 6.978357235984355, - "grad_norm": 1.3494313955307007, - "learning_rate": 8.705829145728644e-05, - "loss": 5.6901, - "step": 13381 - }, - { - "epoch": 6.978878748370274, - "grad_norm": 1.3455893993377686, - "learning_rate": 8.705728643216082e-05, - "loss": 5.424, - "step": 13382 - }, - { - "epoch": 6.979400260756193, - "grad_norm": 1.5395665168762207, - "learning_rate": 8.705628140703518e-05, - "loss": 5.8227, - "step": 13383 - }, - { - "epoch": 6.979921773142112, - "grad_norm": 1.3639477491378784, - "learning_rate": 8.705527638190956e-05, - "loss": 5.9073, - "step": 13384 - }, - { - "epoch": 6.980443285528032, - "grad_norm": 1.3254749774932861, - "learning_rate": 8.705427135678392e-05, - "loss": 5.5652, - "step": 13385 - }, - { - "epoch": 6.98096479791395, - "grad_norm": 1.4098268747329712, - "learning_rate": 8.70532663316583e-05, - "loss": 5.5309, - "step": 13386 - }, - { - "epoch": 6.98148631029987, - "grad_norm": 1.4973217248916626, - "learning_rate": 8.705226130653267e-05, - "loss": 5.689, - "step": 13387 - }, - { - "epoch": 6.982007822685789, - "grad_norm": 1.577728271484375, - "learning_rate": 8.705125628140703e-05, - "loss": 5.507, - "step": 13388 - }, - { - "epoch": 6.982529335071708, - "grad_norm": 1.4927242994308472, - "learning_rate": 8.705025125628141e-05, - "loss": 5.5177, - "step": 13389 - }, - { - "epoch": 6.983050847457627, - "grad_norm": 1.638493537902832, - "learning_rate": 8.704924623115579e-05, - "loss": 5.5891, - "step": 13390 - }, - { - "epoch": 6.983572359843547, - "grad_norm": 1.4359697103500366, - "learning_rate": 8.704824120603016e-05, - "loss": 5.4611, - "step": 13391 - }, - { - "epoch": 6.984093872229465, - "grad_norm": 1.3334813117980957, - "learning_rate": 8.704723618090453e-05, - "loss": 5.8773, - "step": 13392 - }, - { - "epoch": 6.984615384615385, - "grad_norm": 1.4892915487289429, - "learning_rate": 8.70462311557789e-05, - "loss": 5.6921, - "step": 13393 - }, - { - "epoch": 6.985136897001304, - "grad_norm": 1.5081355571746826, - "learning_rate": 8.704522613065327e-05, - "loss": 5.2545, - "step": 13394 - }, - { - "epoch": 6.985658409387223, - "grad_norm": 1.355001449584961, - "learning_rate": 8.704422110552765e-05, - "loss": 5.555, - "step": 13395 - }, - { - "epoch": 6.986179921773142, - "grad_norm": 1.3713586330413818, - "learning_rate": 8.704321608040201e-05, - "loss": 5.6285, - "step": 13396 - }, - { - "epoch": 6.986701434159062, - "grad_norm": 1.4931268692016602, - "learning_rate": 8.704221105527639e-05, - "loss": 5.6208, - "step": 13397 - }, - { - "epoch": 6.98722294654498, - "grad_norm": 1.436623215675354, - "learning_rate": 8.704120603015075e-05, - "loss": 5.6248, - "step": 13398 - }, - { - "epoch": 6.9877444589309, - "grad_norm": 1.3965542316436768, - "learning_rate": 8.704020100502513e-05, - "loss": 5.8099, - "step": 13399 - }, - { - "epoch": 6.988265971316819, - "grad_norm": 1.3200595378875732, - "learning_rate": 8.70391959798995e-05, - "loss": 5.9473, - "step": 13400 - }, - { - "epoch": 6.988787483702738, - "grad_norm": 1.465320110321045, - "learning_rate": 8.703819095477387e-05, - "loss": 5.6862, - "step": 13401 - }, - { - "epoch": 6.989308996088657, - "grad_norm": 1.3251965045928955, - "learning_rate": 8.703718592964825e-05, - "loss": 6.0016, - "step": 13402 - }, - { - "epoch": 6.989830508474577, - "grad_norm": 1.3845266103744507, - "learning_rate": 8.703618090452262e-05, - "loss": 5.4742, - "step": 13403 - }, - { - "epoch": 6.990352020860495, - "grad_norm": 1.3564001321792603, - "learning_rate": 8.7035175879397e-05, - "loss": 5.9768, - "step": 13404 - }, - { - "epoch": 6.990873533246415, - "grad_norm": 1.4856059551239014, - "learning_rate": 8.703417085427136e-05, - "loss": 5.3674, - "step": 13405 - }, - { - "epoch": 6.991395045632334, - "grad_norm": 1.3881319761276245, - "learning_rate": 8.703316582914574e-05, - "loss": 5.6545, - "step": 13406 - }, - { - "epoch": 6.991916558018253, - "grad_norm": 1.5049464702606201, - "learning_rate": 8.70321608040201e-05, - "loss": 5.6046, - "step": 13407 - }, - { - "epoch": 6.992438070404172, - "grad_norm": 1.3670753240585327, - "learning_rate": 8.703115577889448e-05, - "loss": 5.6352, - "step": 13408 - }, - { - "epoch": 6.992959582790091, - "grad_norm": 1.3510066270828247, - "learning_rate": 8.703015075376884e-05, - "loss": 5.8483, - "step": 13409 - }, - { - "epoch": 6.99348109517601, - "grad_norm": 1.4152847528457642, - "learning_rate": 8.702914572864322e-05, - "loss": 5.5295, - "step": 13410 - }, - { - "epoch": 6.99400260756193, - "grad_norm": 1.3526033163070679, - "learning_rate": 8.70281407035176e-05, - "loss": 5.8497, - "step": 13411 - }, - { - "epoch": 6.994524119947849, - "grad_norm": 1.4955945014953613, - "learning_rate": 8.702713567839198e-05, - "loss": 5.5484, - "step": 13412 - }, - { - "epoch": 6.995045632333768, - "grad_norm": 1.4018276929855347, - "learning_rate": 8.702613065326634e-05, - "loss": 5.9126, - "step": 13413 - }, - { - "epoch": 6.995567144719687, - "grad_norm": 1.5255998373031616, - "learning_rate": 8.70251256281407e-05, - "loss": 5.7088, - "step": 13414 - }, - { - "epoch": 6.996088657105606, - "grad_norm": 1.4157768487930298, - "learning_rate": 8.702412060301508e-05, - "loss": 5.5033, - "step": 13415 - }, - { - "epoch": 6.996610169491525, - "grad_norm": 1.5596543550491333, - "learning_rate": 8.702311557788945e-05, - "loss": 5.1652, - "step": 13416 - }, - { - "epoch": 6.997131681877445, - "grad_norm": 1.5296235084533691, - "learning_rate": 8.702211055276382e-05, - "loss": 4.7977, - "step": 13417 - }, - { - "epoch": 6.997653194263364, - "grad_norm": 1.4895062446594238, - "learning_rate": 8.702110552763819e-05, - "loss": 5.6861, - "step": 13418 - }, - { - "epoch": 6.998174706649283, - "grad_norm": 1.4128345251083374, - "learning_rate": 8.702010050251257e-05, - "loss": 5.6152, - "step": 13419 - }, - { - "epoch": 6.998696219035202, - "grad_norm": 1.6972578763961792, - "learning_rate": 8.701909547738693e-05, - "loss": 5.0183, - "step": 13420 - }, - { - "epoch": 6.999217731421121, - "grad_norm": 1.426389217376709, - "learning_rate": 8.701809045226131e-05, - "loss": 5.5888, - "step": 13421 - }, - { - "epoch": 6.99973924380704, - "grad_norm": 1.4051347970962524, - "learning_rate": 8.701708542713569e-05, - "loss": 5.4924, - "step": 13422 - }, - { - "epoch": 7.00026075619296, - "grad_norm": 1.8485748767852783, - "learning_rate": 8.701608040201006e-05, - "loss": 5.6172, - "step": 13423 - }, - { - "epoch": 7.000782268578878, - "grad_norm": 1.4821194410324097, - "learning_rate": 8.701507537688443e-05, - "loss": 5.2798, - "step": 13424 - }, - { - "epoch": 7.001303780964798, - "grad_norm": 1.610002040863037, - "learning_rate": 8.70140703517588e-05, - "loss": 5.4295, - "step": 13425 - }, - { - "epoch": 7.001825293350717, - "grad_norm": 1.4609360694885254, - "learning_rate": 8.701306532663317e-05, - "loss": 5.311, - "step": 13426 - }, - { - "epoch": 7.002346805736636, - "grad_norm": 1.4490759372711182, - "learning_rate": 8.701206030150753e-05, - "loss": 5.0553, - "step": 13427 - }, - { - "epoch": 7.002868318122555, - "grad_norm": 1.7876455783843994, - "learning_rate": 8.701105527638191e-05, - "loss": 5.6138, - "step": 13428 - }, - { - "epoch": 7.003389830508475, - "grad_norm": 1.4610207080841064, - "learning_rate": 8.701005025125628e-05, - "loss": 5.5573, - "step": 13429 - }, - { - "epoch": 7.003911342894393, - "grad_norm": 1.4007673263549805, - "learning_rate": 8.700904522613065e-05, - "loss": 5.4963, - "step": 13430 - }, - { - "epoch": 7.004432855280313, - "grad_norm": 1.5872492790222168, - "learning_rate": 8.700804020100503e-05, - "loss": 5.518, - "step": 13431 - }, - { - "epoch": 7.004954367666232, - "grad_norm": 1.4385663270950317, - "learning_rate": 8.700703517587941e-05, - "loss": 5.2833, - "step": 13432 - }, - { - "epoch": 7.005475880052151, - "grad_norm": 1.425424575805664, - "learning_rate": 8.700603015075377e-05, - "loss": 5.4996, - "step": 13433 - }, - { - "epoch": 7.00599739243807, - "grad_norm": 1.5454621315002441, - "learning_rate": 8.700502512562815e-05, - "loss": 5.3359, - "step": 13434 - }, - { - "epoch": 7.00651890482399, - "grad_norm": 1.3315880298614502, - "learning_rate": 8.700402010050252e-05, - "loss": 5.838, - "step": 13435 - }, - { - "epoch": 7.007040417209908, - "grad_norm": 1.322046160697937, - "learning_rate": 8.70030150753769e-05, - "loss": 5.8882, - "step": 13436 - }, - { - "epoch": 7.007561929595828, - "grad_norm": 1.3955097198486328, - "learning_rate": 8.700201005025126e-05, - "loss": 5.7727, - "step": 13437 - }, - { - "epoch": 7.008083441981747, - "grad_norm": 1.434022068977356, - "learning_rate": 8.700100502512564e-05, - "loss": 5.4132, - "step": 13438 - }, - { - "epoch": 7.008604954367666, - "grad_norm": 1.4591773748397827, - "learning_rate": 8.7e-05, - "loss": 5.4758, - "step": 13439 - }, - { - "epoch": 7.009126466753585, - "grad_norm": 1.4431689977645874, - "learning_rate": 8.699899497487438e-05, - "loss": 5.9166, - "step": 13440 - }, - { - "epoch": 7.009647979139505, - "grad_norm": 1.3714346885681152, - "learning_rate": 8.699798994974874e-05, - "loss": 5.9118, - "step": 13441 - }, - { - "epoch": 7.010169491525423, - "grad_norm": 1.4963582754135132, - "learning_rate": 8.699698492462312e-05, - "loss": 5.6545, - "step": 13442 - }, - { - "epoch": 7.010691003911343, - "grad_norm": 1.4886062145233154, - "learning_rate": 8.69959798994975e-05, - "loss": 5.4472, - "step": 13443 - }, - { - "epoch": 7.011212516297262, - "grad_norm": 1.5133861303329468, - "learning_rate": 8.699497487437186e-05, - "loss": 5.708, - "step": 13444 - }, - { - "epoch": 7.011734028683181, - "grad_norm": 1.6591498851776123, - "learning_rate": 8.699396984924624e-05, - "loss": 5.3799, - "step": 13445 - }, - { - "epoch": 7.0122555410691, - "grad_norm": 1.5151041746139526, - "learning_rate": 8.69929648241206e-05, - "loss": 5.5876, - "step": 13446 - }, - { - "epoch": 7.01277705345502, - "grad_norm": 1.502970576286316, - "learning_rate": 8.699195979899498e-05, - "loss": 5.3159, - "step": 13447 - }, - { - "epoch": 7.013298565840938, - "grad_norm": 1.4799509048461914, - "learning_rate": 8.699095477386935e-05, - "loss": 5.8265, - "step": 13448 - }, - { - "epoch": 7.013820078226858, - "grad_norm": 1.3858861923217773, - "learning_rate": 8.698994974874372e-05, - "loss": 5.8121, - "step": 13449 - }, - { - "epoch": 7.014341590612777, - "grad_norm": 1.400848388671875, - "learning_rate": 8.698894472361809e-05, - "loss": 5.3686, - "step": 13450 - }, - { - "epoch": 7.014863102998696, - "grad_norm": 1.4534039497375488, - "learning_rate": 8.698793969849247e-05, - "loss": 5.6712, - "step": 13451 - }, - { - "epoch": 7.015384615384615, - "grad_norm": 1.4710307121276855, - "learning_rate": 8.698693467336684e-05, - "loss": 5.8047, - "step": 13452 - }, - { - "epoch": 7.015906127770535, - "grad_norm": 1.4461052417755127, - "learning_rate": 8.698592964824122e-05, - "loss": 5.6924, - "step": 13453 - }, - { - "epoch": 7.016427640156453, - "grad_norm": 1.4467244148254395, - "learning_rate": 8.698492462311559e-05, - "loss": 5.7575, - "step": 13454 - }, - { - "epoch": 7.016949152542373, - "grad_norm": 1.2938076257705688, - "learning_rate": 8.698391959798995e-05, - "loss": 5.3051, - "step": 13455 - }, - { - "epoch": 7.017470664928292, - "grad_norm": 1.4672337770462036, - "learning_rate": 8.698291457286433e-05, - "loss": 5.6897, - "step": 13456 - }, - { - "epoch": 7.017992177314211, - "grad_norm": 1.4397361278533936, - "learning_rate": 8.698190954773869e-05, - "loss": 5.7627, - "step": 13457 - }, - { - "epoch": 7.01851368970013, - "grad_norm": 1.5544052124023438, - "learning_rate": 8.698090452261307e-05, - "loss": 5.0664, - "step": 13458 - }, - { - "epoch": 7.01903520208605, - "grad_norm": 1.4260209798812866, - "learning_rate": 8.697989949748744e-05, - "loss": 5.7323, - "step": 13459 - }, - { - "epoch": 7.019556714471968, - "grad_norm": 1.4896206855773926, - "learning_rate": 8.697889447236181e-05, - "loss": 5.6959, - "step": 13460 - }, - { - "epoch": 7.020078226857888, - "grad_norm": 1.3778749704360962, - "learning_rate": 8.697788944723618e-05, - "loss": 5.0909, - "step": 13461 - }, - { - "epoch": 7.020599739243807, - "grad_norm": 1.3835723400115967, - "learning_rate": 8.697688442211056e-05, - "loss": 5.4147, - "step": 13462 - }, - { - "epoch": 7.021121251629726, - "grad_norm": 1.4496538639068604, - "learning_rate": 8.697587939698493e-05, - "loss": 5.148, - "step": 13463 - }, - { - "epoch": 7.021642764015645, - "grad_norm": 1.460734486579895, - "learning_rate": 8.697487437185931e-05, - "loss": 5.3684, - "step": 13464 - }, - { - "epoch": 7.022164276401565, - "grad_norm": 1.3311572074890137, - "learning_rate": 8.697386934673367e-05, - "loss": 5.7251, - "step": 13465 - }, - { - "epoch": 7.022685788787483, - "grad_norm": 1.3198803663253784, - "learning_rate": 8.697286432160805e-05, - "loss": 5.4769, - "step": 13466 - }, - { - "epoch": 7.023207301173403, - "grad_norm": 1.4641104936599731, - "learning_rate": 8.697185929648242e-05, - "loss": 5.5742, - "step": 13467 - }, - { - "epoch": 7.023728813559322, - "grad_norm": 1.3770196437835693, - "learning_rate": 8.697085427135678e-05, - "loss": 5.8993, - "step": 13468 - }, - { - "epoch": 7.024250325945241, - "grad_norm": 1.4775896072387695, - "learning_rate": 8.696984924623116e-05, - "loss": 5.5317, - "step": 13469 - }, - { - "epoch": 7.02477183833116, - "grad_norm": 1.4371564388275146, - "learning_rate": 8.696884422110552e-05, - "loss": 5.4453, - "step": 13470 - }, - { - "epoch": 7.02529335071708, - "grad_norm": 1.4484115839004517, - "learning_rate": 8.69678391959799e-05, - "loss": 5.3682, - "step": 13471 - }, - { - "epoch": 7.025814863102998, - "grad_norm": 1.4084595441818237, - "learning_rate": 8.696683417085427e-05, - "loss": 5.4254, - "step": 13472 - }, - { - "epoch": 7.026336375488918, - "grad_norm": 1.4904628992080688, - "learning_rate": 8.696582914572864e-05, - "loss": 5.5454, - "step": 13473 - }, - { - "epoch": 7.026857887874837, - "grad_norm": 1.6426067352294922, - "learning_rate": 8.696482412060302e-05, - "loss": 5.2355, - "step": 13474 - }, - { - "epoch": 7.027379400260756, - "grad_norm": 1.4853923320770264, - "learning_rate": 8.69638190954774e-05, - "loss": 5.5352, - "step": 13475 - }, - { - "epoch": 7.027900912646675, - "grad_norm": 1.371253252029419, - "learning_rate": 8.696281407035176e-05, - "loss": 5.7212, - "step": 13476 - }, - { - "epoch": 7.028422425032595, - "grad_norm": 1.5297445058822632, - "learning_rate": 8.696180904522614e-05, - "loss": 5.3758, - "step": 13477 - }, - { - "epoch": 7.028943937418513, - "grad_norm": 1.5484615564346313, - "learning_rate": 8.69608040201005e-05, - "loss": 5.1708, - "step": 13478 - }, - { - "epoch": 7.029465449804433, - "grad_norm": 1.3931173086166382, - "learning_rate": 8.695979899497488e-05, - "loss": 5.7852, - "step": 13479 - }, - { - "epoch": 7.029986962190352, - "grad_norm": 1.4016035795211792, - "learning_rate": 8.695879396984925e-05, - "loss": 5.7639, - "step": 13480 - }, - { - "epoch": 7.030508474576271, - "grad_norm": 1.443292260169983, - "learning_rate": 8.695778894472361e-05, - "loss": 5.4668, - "step": 13481 - }, - { - "epoch": 7.03102998696219, - "grad_norm": 1.3710988759994507, - "learning_rate": 8.695678391959799e-05, - "loss": 5.8426, - "step": 13482 - }, - { - "epoch": 7.03155149934811, - "grad_norm": 1.5292260646820068, - "learning_rate": 8.695577889447237e-05, - "loss": 5.4736, - "step": 13483 - }, - { - "epoch": 7.032073011734028, - "grad_norm": 1.471585750579834, - "learning_rate": 8.695477386934675e-05, - "loss": 5.0398, - "step": 13484 - }, - { - "epoch": 7.032594524119948, - "grad_norm": 1.4033136367797852, - "learning_rate": 8.695376884422111e-05, - "loss": 5.7082, - "step": 13485 - }, - { - "epoch": 7.033116036505867, - "grad_norm": 1.3297590017318726, - "learning_rate": 8.695276381909549e-05, - "loss": 5.1385, - "step": 13486 - }, - { - "epoch": 7.033637548891786, - "grad_norm": 1.3944824934005737, - "learning_rate": 8.695175879396985e-05, - "loss": 5.7619, - "step": 13487 - }, - { - "epoch": 7.034159061277705, - "grad_norm": 1.3541886806488037, - "learning_rate": 8.695075376884423e-05, - "loss": 5.3419, - "step": 13488 - }, - { - "epoch": 7.034680573663625, - "grad_norm": 1.4420626163482666, - "learning_rate": 8.69497487437186e-05, - "loss": 5.7486, - "step": 13489 - }, - { - "epoch": 7.0352020860495434, - "grad_norm": 1.4421377182006836, - "learning_rate": 8.694874371859297e-05, - "loss": 5.756, - "step": 13490 - }, - { - "epoch": 7.035723598435463, - "grad_norm": 1.3632266521453857, - "learning_rate": 8.694773869346734e-05, - "loss": 5.7661, - "step": 13491 - }, - { - "epoch": 7.036245110821382, - "grad_norm": 1.4393789768218994, - "learning_rate": 8.694673366834171e-05, - "loss": 5.3793, - "step": 13492 - }, - { - "epoch": 7.036766623207301, - "grad_norm": 1.511124849319458, - "learning_rate": 8.694572864321608e-05, - "loss": 5.7117, - "step": 13493 - }, - { - "epoch": 7.03728813559322, - "grad_norm": 1.382969617843628, - "learning_rate": 8.694472361809046e-05, - "loss": 5.7368, - "step": 13494 - }, - { - "epoch": 7.03780964797914, - "grad_norm": 1.4124302864074707, - "learning_rate": 8.694371859296483e-05, - "loss": 5.9784, - "step": 13495 - }, - { - "epoch": 7.0383311603650585, - "grad_norm": 1.3480901718139648, - "learning_rate": 8.69427135678392e-05, - "loss": 5.4335, - "step": 13496 - }, - { - "epoch": 7.038852672750978, - "grad_norm": 1.4954389333724976, - "learning_rate": 8.694170854271358e-05, - "loss": 5.4678, - "step": 13497 - }, - { - "epoch": 7.039374185136897, - "grad_norm": 1.579001784324646, - "learning_rate": 8.694070351758794e-05, - "loss": 5.4604, - "step": 13498 - }, - { - "epoch": 7.039895697522816, - "grad_norm": 1.4330686330795288, - "learning_rate": 8.693969849246232e-05, - "loss": 5.6495, - "step": 13499 - }, - { - "epoch": 7.040417209908735, - "grad_norm": 1.358410120010376, - "learning_rate": 8.693869346733668e-05, - "loss": 5.8587, - "step": 13500 - }, - { - "epoch": 7.040938722294655, - "grad_norm": 1.4798851013183594, - "learning_rate": 8.693768844221106e-05, - "loss": 5.0451, - "step": 13501 - }, - { - "epoch": 7.0414602346805735, - "grad_norm": 1.5255372524261475, - "learning_rate": 8.693668341708542e-05, - "loss": 5.1084, - "step": 13502 - }, - { - "epoch": 7.041981747066493, - "grad_norm": 1.4601895809173584, - "learning_rate": 8.69356783919598e-05, - "loss": 5.433, - "step": 13503 - }, - { - "epoch": 7.042503259452412, - "grad_norm": 1.4498512744903564, - "learning_rate": 8.693467336683418e-05, - "loss": 5.7736, - "step": 13504 - }, - { - "epoch": 7.043024771838331, - "grad_norm": 1.4497058391571045, - "learning_rate": 8.693366834170856e-05, - "loss": 5.009, - "step": 13505 - }, - { - "epoch": 7.04354628422425, - "grad_norm": 1.3038369417190552, - "learning_rate": 8.693266331658292e-05, - "loss": 5.8339, - "step": 13506 - }, - { - "epoch": 7.04406779661017, - "grad_norm": 1.395769715309143, - "learning_rate": 8.693165829145729e-05, - "loss": 5.7608, - "step": 13507 - }, - { - "epoch": 7.0445893089960885, - "grad_norm": 1.4769400358200073, - "learning_rate": 8.693065326633166e-05, - "loss": 5.5145, - "step": 13508 - }, - { - "epoch": 7.045110821382008, - "grad_norm": 1.4044547080993652, - "learning_rate": 8.692964824120603e-05, - "loss": 5.7547, - "step": 13509 - }, - { - "epoch": 7.045632333767927, - "grad_norm": 1.4802017211914062, - "learning_rate": 8.69286432160804e-05, - "loss": 5.116, - "step": 13510 - }, - { - "epoch": 7.046153846153846, - "grad_norm": 1.4355244636535645, - "learning_rate": 8.692763819095477e-05, - "loss": 5.7684, - "step": 13511 - }, - { - "epoch": 7.0466753585397655, - "grad_norm": 1.5294073820114136, - "learning_rate": 8.692663316582915e-05, - "loss": 5.6104, - "step": 13512 - }, - { - "epoch": 7.047196870925685, - "grad_norm": 1.4788920879364014, - "learning_rate": 8.692562814070351e-05, - "loss": 5.6739, - "step": 13513 - }, - { - "epoch": 7.0477183833116035, - "grad_norm": 1.4485869407653809, - "learning_rate": 8.692462311557789e-05, - "loss": 5.8912, - "step": 13514 - }, - { - "epoch": 7.048239895697523, - "grad_norm": 1.4866095781326294, - "learning_rate": 8.692361809045227e-05, - "loss": 5.3231, - "step": 13515 - }, - { - "epoch": 7.0487614080834415, - "grad_norm": 1.4245738983154297, - "learning_rate": 8.692261306532665e-05, - "loss": 5.608, - "step": 13516 - }, - { - "epoch": 7.049282920469361, - "grad_norm": 1.5063649415969849, - "learning_rate": 8.692160804020101e-05, - "loss": 5.2061, - "step": 13517 - }, - { - "epoch": 7.0498044328552805, - "grad_norm": 1.6474918127059937, - "learning_rate": 8.692060301507539e-05, - "loss": 5.2798, - "step": 13518 - }, - { - "epoch": 7.050325945241199, - "grad_norm": 1.2920011281967163, - "learning_rate": 8.691959798994975e-05, - "loss": 5.1269, - "step": 13519 - }, - { - "epoch": 7.0508474576271185, - "grad_norm": 1.4522672891616821, - "learning_rate": 8.691859296482412e-05, - "loss": 5.5168, - "step": 13520 - }, - { - "epoch": 7.051368970013038, - "grad_norm": 1.417240858078003, - "learning_rate": 8.69175879396985e-05, - "loss": 5.5638, - "step": 13521 - }, - { - "epoch": 7.0518904823989566, - "grad_norm": 1.3392002582550049, - "learning_rate": 8.691658291457286e-05, - "loss": 5.0803, - "step": 13522 - }, - { - "epoch": 7.052411994784876, - "grad_norm": 1.4109644889831543, - "learning_rate": 8.691557788944724e-05, - "loss": 5.7814, - "step": 13523 - }, - { - "epoch": 7.0529335071707955, - "grad_norm": 1.3801614046096802, - "learning_rate": 8.691457286432161e-05, - "loss": 5.2116, - "step": 13524 - }, - { - "epoch": 7.053455019556714, - "grad_norm": 1.4768915176391602, - "learning_rate": 8.691356783919599e-05, - "loss": 5.0764, - "step": 13525 - }, - { - "epoch": 7.0539765319426335, - "grad_norm": 1.4160035848617554, - "learning_rate": 8.691256281407036e-05, - "loss": 5.5037, - "step": 13526 - }, - { - "epoch": 7.054498044328553, - "grad_norm": 1.4442335367202759, - "learning_rate": 8.691155778894473e-05, - "loss": 5.4608, - "step": 13527 - }, - { - "epoch": 7.055019556714472, - "grad_norm": 1.4763801097869873, - "learning_rate": 8.69105527638191e-05, - "loss": 5.323, - "step": 13528 - }, - { - "epoch": 7.055541069100391, - "grad_norm": 1.3409476280212402, - "learning_rate": 8.690954773869348e-05, - "loss": 5.5344, - "step": 13529 - }, - { - "epoch": 7.0560625814863105, - "grad_norm": 1.46872878074646, - "learning_rate": 8.690854271356784e-05, - "loss": 5.0455, - "step": 13530 - }, - { - "epoch": 7.056584093872229, - "grad_norm": 1.4129635095596313, - "learning_rate": 8.690753768844222e-05, - "loss": 5.5784, - "step": 13531 - }, - { - "epoch": 7.0571056062581485, - "grad_norm": 1.2868163585662842, - "learning_rate": 8.690653266331658e-05, - "loss": 4.9813, - "step": 13532 - }, - { - "epoch": 7.057627118644068, - "grad_norm": 1.399598240852356, - "learning_rate": 8.690552763819096e-05, - "loss": 5.0879, - "step": 13533 - }, - { - "epoch": 7.058148631029987, - "grad_norm": 1.3907204866409302, - "learning_rate": 8.690452261306532e-05, - "loss": 5.6694, - "step": 13534 - }, - { - "epoch": 7.058670143415906, - "grad_norm": 1.6101486682891846, - "learning_rate": 8.69035175879397e-05, - "loss": 5.1895, - "step": 13535 - }, - { - "epoch": 7.0591916558018255, - "grad_norm": 1.5695613622665405, - "learning_rate": 8.690251256281408e-05, - "loss": 5.6528, - "step": 13536 - }, - { - "epoch": 7.059713168187744, - "grad_norm": 1.4829063415527344, - "learning_rate": 8.690150753768844e-05, - "loss": 5.6244, - "step": 13537 - }, - { - "epoch": 7.0602346805736635, - "grad_norm": 1.4469066858291626, - "learning_rate": 8.690050251256282e-05, - "loss": 5.6093, - "step": 13538 - }, - { - "epoch": 7.060756192959583, - "grad_norm": 1.427062749862671, - "learning_rate": 8.689949748743719e-05, - "loss": 5.6679, - "step": 13539 - }, - { - "epoch": 7.061277705345502, - "grad_norm": 1.4258912801742554, - "learning_rate": 8.689849246231156e-05, - "loss": 5.6927, - "step": 13540 - }, - { - "epoch": 7.061799217731421, - "grad_norm": 1.3217735290527344, - "learning_rate": 8.689748743718593e-05, - "loss": 5.9511, - "step": 13541 - }, - { - "epoch": 7.0623207301173405, - "grad_norm": 1.3539990186691284, - "learning_rate": 8.689648241206031e-05, - "loss": 5.8172, - "step": 13542 - }, - { - "epoch": 7.062842242503259, - "grad_norm": 1.369462251663208, - "learning_rate": 8.689547738693467e-05, - "loss": 5.5092, - "step": 13543 - }, - { - "epoch": 7.063363754889179, - "grad_norm": 1.4070191383361816, - "learning_rate": 8.689447236180905e-05, - "loss": 5.423, - "step": 13544 - }, - { - "epoch": 7.063885267275098, - "grad_norm": 1.3957527875900269, - "learning_rate": 8.689346733668343e-05, - "loss": 5.6322, - "step": 13545 - }, - { - "epoch": 7.064406779661017, - "grad_norm": 1.4492450952529907, - "learning_rate": 8.68924623115578e-05, - "loss": 5.5996, - "step": 13546 - }, - { - "epoch": 7.064928292046936, - "grad_norm": 1.5721734762191772, - "learning_rate": 8.689145728643217e-05, - "loss": 5.325, - "step": 13547 - }, - { - "epoch": 7.0654498044328555, - "grad_norm": 1.4552375078201294, - "learning_rate": 8.689045226130653e-05, - "loss": 5.5795, - "step": 13548 - }, - { - "epoch": 7.065971316818774, - "grad_norm": 1.4627131223678589, - "learning_rate": 8.688944723618091e-05, - "loss": 5.6862, - "step": 13549 - }, - { - "epoch": 7.066492829204694, - "grad_norm": 1.556059718132019, - "learning_rate": 8.688844221105528e-05, - "loss": 5.3182, - "step": 13550 - }, - { - "epoch": 7.067014341590613, - "grad_norm": 1.5156433582305908, - "learning_rate": 8.688743718592965e-05, - "loss": 5.4054, - "step": 13551 - }, - { - "epoch": 7.067535853976532, - "grad_norm": 1.3692740201950073, - "learning_rate": 8.688643216080402e-05, - "loss": 5.9718, - "step": 13552 - }, - { - "epoch": 7.068057366362451, - "grad_norm": 1.3856106996536255, - "learning_rate": 8.68854271356784e-05, - "loss": 5.4612, - "step": 13553 - }, - { - "epoch": 7.0685788787483705, - "grad_norm": 1.4792388677597046, - "learning_rate": 8.688442211055276e-05, - "loss": 5.2844, - "step": 13554 - }, - { - "epoch": 7.069100391134289, - "grad_norm": 1.3618789911270142, - "learning_rate": 8.688341708542714e-05, - "loss": 5.5473, - "step": 13555 - }, - { - "epoch": 7.069621903520209, - "grad_norm": 1.6274526119232178, - "learning_rate": 8.688241206030152e-05, - "loss": 4.9174, - "step": 13556 - }, - { - "epoch": 7.070143415906128, - "grad_norm": 1.492628574371338, - "learning_rate": 8.688140703517589e-05, - "loss": 5.5222, - "step": 13557 - }, - { - "epoch": 7.070664928292047, - "grad_norm": 1.421893835067749, - "learning_rate": 8.688040201005026e-05, - "loss": 5.7627, - "step": 13558 - }, - { - "epoch": 7.071186440677966, - "grad_norm": 1.4931410551071167, - "learning_rate": 8.687939698492464e-05, - "loss": 5.8527, - "step": 13559 - }, - { - "epoch": 7.0717079530638856, - "grad_norm": 1.4539486169815063, - "learning_rate": 8.6878391959799e-05, - "loss": 5.8022, - "step": 13560 - }, - { - "epoch": 7.072229465449804, - "grad_norm": 1.4285629987716675, - "learning_rate": 8.687738693467336e-05, - "loss": 5.3471, - "step": 13561 - }, - { - "epoch": 7.072750977835724, - "grad_norm": 1.4815881252288818, - "learning_rate": 8.687638190954774e-05, - "loss": 5.7462, - "step": 13562 - }, - { - "epoch": 7.073272490221643, - "grad_norm": 1.5508919954299927, - "learning_rate": 8.68753768844221e-05, - "loss": 5.1894, - "step": 13563 - }, - { - "epoch": 7.073794002607562, - "grad_norm": 1.4936853647232056, - "learning_rate": 8.687437185929648e-05, - "loss": 5.6241, - "step": 13564 - }, - { - "epoch": 7.074315514993481, - "grad_norm": 1.5734385251998901, - "learning_rate": 8.687336683417086e-05, - "loss": 5.684, - "step": 13565 - }, - { - "epoch": 7.074837027379401, - "grad_norm": 1.3043161630630493, - "learning_rate": 8.687236180904524e-05, - "loss": 5.8608, - "step": 13566 - }, - { - "epoch": 7.075358539765319, - "grad_norm": 1.4227317571640015, - "learning_rate": 8.68713567839196e-05, - "loss": 5.0418, - "step": 13567 - }, - { - "epoch": 7.075880052151239, - "grad_norm": 1.2740521430969238, - "learning_rate": 8.687035175879398e-05, - "loss": 5.6072, - "step": 13568 - }, - { - "epoch": 7.076401564537158, - "grad_norm": 1.4511280059814453, - "learning_rate": 8.686934673366835e-05, - "loss": 5.6268, - "step": 13569 - }, - { - "epoch": 7.076923076923077, - "grad_norm": 1.455051302909851, - "learning_rate": 8.686834170854272e-05, - "loss": 5.6008, - "step": 13570 - }, - { - "epoch": 7.077444589308996, - "grad_norm": 1.5562998056411743, - "learning_rate": 8.686733668341709e-05, - "loss": 5.4041, - "step": 13571 - }, - { - "epoch": 7.077966101694916, - "grad_norm": 1.5050525665283203, - "learning_rate": 8.686633165829147e-05, - "loss": 5.6107, - "step": 13572 - }, - { - "epoch": 7.078487614080834, - "grad_norm": 1.4060953855514526, - "learning_rate": 8.686532663316583e-05, - "loss": 5.7125, - "step": 13573 - }, - { - "epoch": 7.079009126466754, - "grad_norm": 1.5273085832595825, - "learning_rate": 8.68643216080402e-05, - "loss": 4.9446, - "step": 13574 - }, - { - "epoch": 7.079530638852673, - "grad_norm": 1.5765935182571411, - "learning_rate": 8.686331658291457e-05, - "loss": 5.2055, - "step": 13575 - }, - { - "epoch": 7.080052151238592, - "grad_norm": 1.5833678245544434, - "learning_rate": 8.686231155778895e-05, - "loss": 5.3419, - "step": 13576 - }, - { - "epoch": 7.080573663624511, - "grad_norm": 1.6957190036773682, - "learning_rate": 8.686130653266333e-05, - "loss": 4.5964, - "step": 13577 - }, - { - "epoch": 7.081095176010431, - "grad_norm": 1.4957491159439087, - "learning_rate": 8.686030150753769e-05, - "loss": 5.6978, - "step": 13578 - }, - { - "epoch": 7.081616688396349, - "grad_norm": 1.4473893642425537, - "learning_rate": 8.685929648241207e-05, - "loss": 5.9766, - "step": 13579 - }, - { - "epoch": 7.082138200782269, - "grad_norm": 1.5708186626434326, - "learning_rate": 8.685829145728643e-05, - "loss": 5.2648, - "step": 13580 - }, - { - "epoch": 7.082659713168188, - "grad_norm": 1.546856164932251, - "learning_rate": 8.685728643216081e-05, - "loss": 5.5832, - "step": 13581 - }, - { - "epoch": 7.083181225554107, - "grad_norm": 1.6524760723114014, - "learning_rate": 8.685628140703518e-05, - "loss": 5.1632, - "step": 13582 - }, - { - "epoch": 7.083702737940026, - "grad_norm": 1.5229398012161255, - "learning_rate": 8.685527638190955e-05, - "loss": 5.5758, - "step": 13583 - }, - { - "epoch": 7.084224250325946, - "grad_norm": 1.4050939083099365, - "learning_rate": 8.685427135678392e-05, - "loss": 5.4897, - "step": 13584 - }, - { - "epoch": 7.084745762711864, - "grad_norm": 1.4276978969573975, - "learning_rate": 8.68532663316583e-05, - "loss": 5.7754, - "step": 13585 - }, - { - "epoch": 7.085267275097784, - "grad_norm": 1.5231478214263916, - "learning_rate": 8.685226130653267e-05, - "loss": 5.4932, - "step": 13586 - }, - { - "epoch": 7.085788787483703, - "grad_norm": 1.5633713006973267, - "learning_rate": 8.685125628140704e-05, - "loss": 5.1065, - "step": 13587 - }, - { - "epoch": 7.086310299869622, - "grad_norm": 1.30764639377594, - "learning_rate": 8.685025125628142e-05, - "loss": 5.5082, - "step": 13588 - }, - { - "epoch": 7.086831812255541, - "grad_norm": 1.5375173091888428, - "learning_rate": 8.684924623115578e-05, - "loss": 5.4468, - "step": 13589 - }, - { - "epoch": 7.087353324641461, - "grad_norm": 1.376344919204712, - "learning_rate": 8.684824120603016e-05, - "loss": 5.6734, - "step": 13590 - }, - { - "epoch": 7.087874837027379, - "grad_norm": 1.3691282272338867, - "learning_rate": 8.684723618090452e-05, - "loss": 5.6659, - "step": 13591 - }, - { - "epoch": 7.088396349413299, - "grad_norm": 1.4925811290740967, - "learning_rate": 8.68462311557789e-05, - "loss": 5.6942, - "step": 13592 - }, - { - "epoch": 7.088917861799218, - "grad_norm": 1.5294550657272339, - "learning_rate": 8.684522613065326e-05, - "loss": 5.2502, - "step": 13593 - }, - { - "epoch": 7.089439374185137, - "grad_norm": 1.4407625198364258, - "learning_rate": 8.684422110552764e-05, - "loss": 5.5743, - "step": 13594 - }, - { - "epoch": 7.089960886571056, - "grad_norm": 1.5639410018920898, - "learning_rate": 8.6843216080402e-05, - "loss": 5.3387, - "step": 13595 - }, - { - "epoch": 7.090482398956976, - "grad_norm": 1.6965670585632324, - "learning_rate": 8.684221105527638e-05, - "loss": 4.8863, - "step": 13596 - }, - { - "epoch": 7.091003911342894, - "grad_norm": 1.4019525051116943, - "learning_rate": 8.684120603015076e-05, - "loss": 5.8264, - "step": 13597 - }, - { - "epoch": 7.091525423728814, - "grad_norm": 1.390803337097168, - "learning_rate": 8.684020100502514e-05, - "loss": 5.7193, - "step": 13598 - }, - { - "epoch": 7.092046936114733, - "grad_norm": 1.3046754598617554, - "learning_rate": 8.68391959798995e-05, - "loss": 4.8235, - "step": 13599 - }, - { - "epoch": 7.092568448500652, - "grad_norm": 1.4319632053375244, - "learning_rate": 8.683819095477387e-05, - "loss": 5.7886, - "step": 13600 - }, - { - "epoch": 7.093089960886571, - "grad_norm": 1.4180150032043457, - "learning_rate": 8.683718592964825e-05, - "loss": 5.58, - "step": 13601 - }, - { - "epoch": 7.093611473272491, - "grad_norm": 1.4659533500671387, - "learning_rate": 8.683618090452261e-05, - "loss": 5.5936, - "step": 13602 - }, - { - "epoch": 7.094132985658409, - "grad_norm": 1.4651235342025757, - "learning_rate": 8.683517587939699e-05, - "loss": 5.6928, - "step": 13603 - }, - { - "epoch": 7.094654498044329, - "grad_norm": 1.5278723239898682, - "learning_rate": 8.683417085427135e-05, - "loss": 5.3064, - "step": 13604 - }, - { - "epoch": 7.095176010430248, - "grad_norm": 1.3299847841262817, - "learning_rate": 8.683316582914573e-05, - "loss": 5.0405, - "step": 13605 - }, - { - "epoch": 7.095697522816167, - "grad_norm": 1.595296025276184, - "learning_rate": 8.683216080402011e-05, - "loss": 5.4664, - "step": 13606 - }, - { - "epoch": 7.096219035202086, - "grad_norm": 1.4436908960342407, - "learning_rate": 8.683115577889449e-05, - "loss": 5.3747, - "step": 13607 - }, - { - "epoch": 7.096740547588006, - "grad_norm": 1.4779636859893799, - "learning_rate": 8.683015075376885e-05, - "loss": 5.4351, - "step": 13608 - }, - { - "epoch": 7.097262059973924, - "grad_norm": 1.3523204326629639, - "learning_rate": 8.682914572864323e-05, - "loss": 5.87, - "step": 13609 - }, - { - "epoch": 7.097783572359844, - "grad_norm": 1.7195734977722168, - "learning_rate": 8.682814070351759e-05, - "loss": 5.1955, - "step": 13610 - }, - { - "epoch": 7.098305084745762, - "grad_norm": 1.4393012523651123, - "learning_rate": 8.682713567839197e-05, - "loss": 5.7195, - "step": 13611 - }, - { - "epoch": 7.098826597131682, - "grad_norm": 1.3339797258377075, - "learning_rate": 8.682613065326633e-05, - "loss": 5.1458, - "step": 13612 - }, - { - "epoch": 7.099348109517601, - "grad_norm": 1.31460440158844, - "learning_rate": 8.68251256281407e-05, - "loss": 5.0231, - "step": 13613 - }, - { - "epoch": 7.09986962190352, - "grad_norm": 1.3422274589538574, - "learning_rate": 8.682412060301508e-05, - "loss": 5.889, - "step": 13614 - }, - { - "epoch": 7.100391134289439, - "grad_norm": 1.5090866088867188, - "learning_rate": 8.682311557788944e-05, - "loss": 5.6441, - "step": 13615 - }, - { - "epoch": 7.100912646675359, - "grad_norm": 1.4507813453674316, - "learning_rate": 8.682211055276382e-05, - "loss": 5.3904, - "step": 13616 - }, - { - "epoch": 7.101434159061277, - "grad_norm": 1.4274462461471558, - "learning_rate": 8.68211055276382e-05, - "loss": 5.4279, - "step": 13617 - }, - { - "epoch": 7.101955671447197, - "grad_norm": 1.4477678537368774, - "learning_rate": 8.682010050251257e-05, - "loss": 5.7736, - "step": 13618 - }, - { - "epoch": 7.102477183833116, - "grad_norm": 1.4626460075378418, - "learning_rate": 8.681909547738694e-05, - "loss": 5.6208, - "step": 13619 - }, - { - "epoch": 7.102998696219035, - "grad_norm": 1.4424768686294556, - "learning_rate": 8.681809045226132e-05, - "loss": 5.5077, - "step": 13620 - }, - { - "epoch": 7.103520208604954, - "grad_norm": 1.4640727043151855, - "learning_rate": 8.681708542713568e-05, - "loss": 5.5983, - "step": 13621 - }, - { - "epoch": 7.104041720990874, - "grad_norm": 1.9582855701446533, - "learning_rate": 8.681608040201006e-05, - "loss": 4.5508, - "step": 13622 - }, - { - "epoch": 7.104563233376792, - "grad_norm": 1.5002244710922241, - "learning_rate": 8.681507537688442e-05, - "loss": 5.7418, - "step": 13623 - }, - { - "epoch": 7.105084745762712, - "grad_norm": 1.4540371894836426, - "learning_rate": 8.68140703517588e-05, - "loss": 5.8811, - "step": 13624 - }, - { - "epoch": 7.105606258148631, - "grad_norm": 1.4526832103729248, - "learning_rate": 8.681306532663317e-05, - "loss": 5.5539, - "step": 13625 - }, - { - "epoch": 7.10612777053455, - "grad_norm": 1.5479185581207275, - "learning_rate": 8.681206030150754e-05, - "loss": 5.2525, - "step": 13626 - }, - { - "epoch": 7.106649282920469, - "grad_norm": 1.3460255861282349, - "learning_rate": 8.681105527638191e-05, - "loss": 5.6054, - "step": 13627 - }, - { - "epoch": 7.107170795306389, - "grad_norm": 1.4631646871566772, - "learning_rate": 8.681005025125629e-05, - "loss": 5.7908, - "step": 13628 - }, - { - "epoch": 7.107692307692307, - "grad_norm": 1.329628586769104, - "learning_rate": 8.680904522613066e-05, - "loss": 5.5694, - "step": 13629 - }, - { - "epoch": 7.108213820078227, - "grad_norm": 1.5061373710632324, - "learning_rate": 8.680804020100503e-05, - "loss": 5.6819, - "step": 13630 - }, - { - "epoch": 7.108735332464146, - "grad_norm": 1.5007331371307373, - "learning_rate": 8.68070351758794e-05, - "loss": 5.2998, - "step": 13631 - }, - { - "epoch": 7.109256844850065, - "grad_norm": 1.515008568763733, - "learning_rate": 8.680603015075377e-05, - "loss": 5.6694, - "step": 13632 - }, - { - "epoch": 7.109778357235984, - "grad_norm": 1.3977910280227661, - "learning_rate": 8.680502512562815e-05, - "loss": 5.5662, - "step": 13633 - }, - { - "epoch": 7.110299869621904, - "grad_norm": 1.730996012687683, - "learning_rate": 8.680402010050251e-05, - "loss": 4.9271, - "step": 13634 - }, - { - "epoch": 7.110821382007822, - "grad_norm": 1.5520387887954712, - "learning_rate": 8.680301507537689e-05, - "loss": 5.24, - "step": 13635 - }, - { - "epoch": 7.111342894393742, - "grad_norm": 1.572482943534851, - "learning_rate": 8.680201005025125e-05, - "loss": 5.6464, - "step": 13636 - }, - { - "epoch": 7.111864406779661, - "grad_norm": 1.4917972087860107, - "learning_rate": 8.680100502512563e-05, - "loss": 5.1299, - "step": 13637 - }, - { - "epoch": 7.11238591916558, - "grad_norm": 1.5377639532089233, - "learning_rate": 8.680000000000001e-05, - "loss": 5.7582, - "step": 13638 - }, - { - "epoch": 7.112907431551499, - "grad_norm": 1.372374415397644, - "learning_rate": 8.679899497487439e-05, - "loss": 5.8432, - "step": 13639 - }, - { - "epoch": 7.113428943937419, - "grad_norm": 1.4222642183303833, - "learning_rate": 8.679798994974875e-05, - "loss": 5.5761, - "step": 13640 - }, - { - "epoch": 7.113950456323337, - "grad_norm": 1.6515171527862549, - "learning_rate": 8.679698492462312e-05, - "loss": 5.0948, - "step": 13641 - }, - { - "epoch": 7.114471968709257, - "grad_norm": 1.5630109310150146, - "learning_rate": 8.67959798994975e-05, - "loss": 5.3326, - "step": 13642 - }, - { - "epoch": 7.114993481095176, - "grad_norm": 1.3394486904144287, - "learning_rate": 8.679497487437186e-05, - "loss": 5.3268, - "step": 13643 - }, - { - "epoch": 7.115514993481095, - "grad_norm": 1.7022830247879028, - "learning_rate": 8.679396984924624e-05, - "loss": 4.6412, - "step": 13644 - }, - { - "epoch": 7.116036505867014, - "grad_norm": 1.7195699214935303, - "learning_rate": 8.67929648241206e-05, - "loss": 5.3261, - "step": 13645 - }, - { - "epoch": 7.116558018252934, - "grad_norm": 1.6442387104034424, - "learning_rate": 8.679195979899498e-05, - "loss": 5.5225, - "step": 13646 - }, - { - "epoch": 7.117079530638852, - "grad_norm": 1.6683534383773804, - "learning_rate": 8.679095477386934e-05, - "loss": 5.1136, - "step": 13647 - }, - { - "epoch": 7.117601043024772, - "grad_norm": 1.6192593574523926, - "learning_rate": 8.678994974874372e-05, - "loss": 5.1716, - "step": 13648 - }, - { - "epoch": 7.118122555410691, - "grad_norm": 1.4473369121551514, - "learning_rate": 8.67889447236181e-05, - "loss": 5.4246, - "step": 13649 - }, - { - "epoch": 7.11864406779661, - "grad_norm": 1.6402381658554077, - "learning_rate": 8.678793969849248e-05, - "loss": 5.5722, - "step": 13650 - }, - { - "epoch": 7.119165580182529, - "grad_norm": 1.5254406929016113, - "learning_rate": 8.678693467336684e-05, - "loss": 5.4803, - "step": 13651 - }, - { - "epoch": 7.119687092568449, - "grad_norm": 1.4192821979522705, - "learning_rate": 8.678592964824122e-05, - "loss": 5.5274, - "step": 13652 - }, - { - "epoch": 7.120208604954367, - "grad_norm": 1.4525632858276367, - "learning_rate": 8.678492462311558e-05, - "loss": 5.7034, - "step": 13653 - }, - { - "epoch": 7.120730117340287, - "grad_norm": 1.5107717514038086, - "learning_rate": 8.678391959798995e-05, - "loss": 4.9533, - "step": 13654 - }, - { - "epoch": 7.121251629726206, - "grad_norm": 1.3504157066345215, - "learning_rate": 8.678291457286432e-05, - "loss": 5.9177, - "step": 13655 - }, - { - "epoch": 7.121773142112125, - "grad_norm": 1.297222375869751, - "learning_rate": 8.678190954773869e-05, - "loss": 5.702, - "step": 13656 - }, - { - "epoch": 7.122294654498044, - "grad_norm": 1.4336378574371338, - "learning_rate": 8.678090452261307e-05, - "loss": 5.8329, - "step": 13657 - }, - { - "epoch": 7.122816166883964, - "grad_norm": 1.4444360733032227, - "learning_rate": 8.677989949748744e-05, - "loss": 5.476, - "step": 13658 - }, - { - "epoch": 7.123337679269882, - "grad_norm": 1.497109055519104, - "learning_rate": 8.677889447236182e-05, - "loss": 5.5574, - "step": 13659 - }, - { - "epoch": 7.123859191655802, - "grad_norm": 1.5590578317642212, - "learning_rate": 8.677788944723619e-05, - "loss": 4.7393, - "step": 13660 - }, - { - "epoch": 7.124380704041721, - "grad_norm": 1.4764740467071533, - "learning_rate": 8.677688442211056e-05, - "loss": 5.3534, - "step": 13661 - }, - { - "epoch": 7.12490221642764, - "grad_norm": 1.4598698616027832, - "learning_rate": 8.677587939698493e-05, - "loss": 5.6324, - "step": 13662 - }, - { - "epoch": 7.125423728813559, - "grad_norm": 1.5051214694976807, - "learning_rate": 8.67748743718593e-05, - "loss": 5.8152, - "step": 13663 - }, - { - "epoch": 7.125945241199479, - "grad_norm": 1.4194072484970093, - "learning_rate": 8.677386934673367e-05, - "loss": 5.062, - "step": 13664 - }, - { - "epoch": 7.126466753585397, - "grad_norm": 1.5226991176605225, - "learning_rate": 8.677286432160805e-05, - "loss": 5.6502, - "step": 13665 - }, - { - "epoch": 7.126988265971317, - "grad_norm": 1.4377437829971313, - "learning_rate": 8.677185929648241e-05, - "loss": 5.6032, - "step": 13666 - }, - { - "epoch": 7.127509778357236, - "grad_norm": 1.485182285308838, - "learning_rate": 8.677085427135678e-05, - "loss": 5.4856, - "step": 13667 - }, - { - "epoch": 7.128031290743155, - "grad_norm": 1.5292298793792725, - "learning_rate": 8.676984924623115e-05, - "loss": 5.2058, - "step": 13668 - }, - { - "epoch": 7.128552803129074, - "grad_norm": 1.403167486190796, - "learning_rate": 8.676884422110553e-05, - "loss": 5.3197, - "step": 13669 - }, - { - "epoch": 7.129074315514994, - "grad_norm": 1.5798016786575317, - "learning_rate": 8.676783919597991e-05, - "loss": 5.3361, - "step": 13670 - }, - { - "epoch": 7.129595827900912, - "grad_norm": 1.5143396854400635, - "learning_rate": 8.676683417085427e-05, - "loss": 5.3996, - "step": 13671 - }, - { - "epoch": 7.130117340286832, - "grad_norm": 1.3972196578979492, - "learning_rate": 8.676582914572865e-05, - "loss": 5.7792, - "step": 13672 - }, - { - "epoch": 7.130638852672751, - "grad_norm": 1.501516342163086, - "learning_rate": 8.676482412060302e-05, - "loss": 5.2751, - "step": 13673 - }, - { - "epoch": 7.13116036505867, - "grad_norm": 1.4885339736938477, - "learning_rate": 8.67638190954774e-05, - "loss": 5.8677, - "step": 13674 - }, - { - "epoch": 7.131681877444589, - "grad_norm": 1.5981541872024536, - "learning_rate": 8.676281407035176e-05, - "loss": 5.1525, - "step": 13675 - }, - { - "epoch": 7.132203389830509, - "grad_norm": 1.4468238353729248, - "learning_rate": 8.676180904522614e-05, - "loss": 5.119, - "step": 13676 - }, - { - "epoch": 7.132724902216427, - "grad_norm": 1.4865953922271729, - "learning_rate": 8.67608040201005e-05, - "loss": 5.6191, - "step": 13677 - }, - { - "epoch": 7.133246414602347, - "grad_norm": 1.5246223211288452, - "learning_rate": 8.675979899497488e-05, - "loss": 5.2911, - "step": 13678 - }, - { - "epoch": 7.133767926988266, - "grad_norm": 1.4922221899032593, - "learning_rate": 8.675879396984926e-05, - "loss": 5.5074, - "step": 13679 - }, - { - "epoch": 7.134289439374185, - "grad_norm": 1.3871506452560425, - "learning_rate": 8.675778894472362e-05, - "loss": 5.4585, - "step": 13680 - }, - { - "epoch": 7.134810951760104, - "grad_norm": 1.4123525619506836, - "learning_rate": 8.6756783919598e-05, - "loss": 5.6936, - "step": 13681 - }, - { - "epoch": 7.135332464146024, - "grad_norm": 1.4054672718048096, - "learning_rate": 8.675577889447236e-05, - "loss": 5.7809, - "step": 13682 - }, - { - "epoch": 7.135853976531942, - "grad_norm": 1.6185574531555176, - "learning_rate": 8.675477386934674e-05, - "loss": 5.424, - "step": 13683 - }, - { - "epoch": 7.136375488917862, - "grad_norm": 1.5688014030456543, - "learning_rate": 8.67537688442211e-05, - "loss": 5.4405, - "step": 13684 - }, - { - "epoch": 7.136897001303781, - "grad_norm": 1.365963101387024, - "learning_rate": 8.675276381909548e-05, - "loss": 5.9682, - "step": 13685 - }, - { - "epoch": 7.1374185136897, - "grad_norm": 1.4275968074798584, - "learning_rate": 8.675175879396985e-05, - "loss": 5.4065, - "step": 13686 - }, - { - "epoch": 7.137940026075619, - "grad_norm": 1.3992513418197632, - "learning_rate": 8.675075376884422e-05, - "loss": 5.6803, - "step": 13687 - }, - { - "epoch": 7.138461538461539, - "grad_norm": 1.3723068237304688, - "learning_rate": 8.674974874371859e-05, - "loss": 5.7803, - "step": 13688 - }, - { - "epoch": 7.138983050847457, - "grad_norm": 1.4092477560043335, - "learning_rate": 8.674874371859297e-05, - "loss": 5.5841, - "step": 13689 - }, - { - "epoch": 7.139504563233377, - "grad_norm": 1.4170984029769897, - "learning_rate": 8.674773869346734e-05, - "loss": 5.5125, - "step": 13690 - }, - { - "epoch": 7.140026075619296, - "grad_norm": 1.399552345275879, - "learning_rate": 8.674673366834172e-05, - "loss": 5.8054, - "step": 13691 - }, - { - "epoch": 7.140547588005215, - "grad_norm": 1.4512799978256226, - "learning_rate": 8.674572864321609e-05, - "loss": 5.1677, - "step": 13692 - }, - { - "epoch": 7.141069100391134, - "grad_norm": 1.4772684574127197, - "learning_rate": 8.674472361809045e-05, - "loss": 5.7397, - "step": 13693 - }, - { - "epoch": 7.141590612777054, - "grad_norm": 1.4428117275238037, - "learning_rate": 8.674371859296483e-05, - "loss": 5.5534, - "step": 13694 - }, - { - "epoch": 7.1421121251629724, - "grad_norm": 1.3874369859695435, - "learning_rate": 8.674271356783919e-05, - "loss": 5.7227, - "step": 13695 - }, - { - "epoch": 7.142633637548892, - "grad_norm": 1.4359149932861328, - "learning_rate": 8.674170854271357e-05, - "loss": 5.8466, - "step": 13696 - }, - { - "epoch": 7.143155149934811, - "grad_norm": 1.3171013593673706, - "learning_rate": 8.674070351758794e-05, - "loss": 5.7037, - "step": 13697 - }, - { - "epoch": 7.14367666232073, - "grad_norm": 1.3525609970092773, - "learning_rate": 8.673969849246231e-05, - "loss": 5.5682, - "step": 13698 - }, - { - "epoch": 7.144198174706649, - "grad_norm": 1.4392998218536377, - "learning_rate": 8.673869346733669e-05, - "loss": 5.6141, - "step": 13699 - }, - { - "epoch": 7.144719687092568, - "grad_norm": 1.593989372253418, - "learning_rate": 8.673768844221107e-05, - "loss": 5.436, - "step": 13700 - }, - { - "epoch": 7.1452411994784875, - "grad_norm": 1.3460668325424194, - "learning_rate": 8.673668341708543e-05, - "loss": 5.6313, - "step": 13701 - }, - { - "epoch": 7.145762711864407, - "grad_norm": 1.3352442979812622, - "learning_rate": 8.673567839195981e-05, - "loss": 5.1501, - "step": 13702 - }, - { - "epoch": 7.146284224250326, - "grad_norm": 1.4672331809997559, - "learning_rate": 8.673467336683417e-05, - "loss": 5.7272, - "step": 13703 - }, - { - "epoch": 7.146805736636245, - "grad_norm": 1.6010291576385498, - "learning_rate": 8.673366834170855e-05, - "loss": 5.091, - "step": 13704 - }, - { - "epoch": 7.147327249022164, - "grad_norm": 1.3806794881820679, - "learning_rate": 8.673266331658292e-05, - "loss": 5.709, - "step": 13705 - }, - { - "epoch": 7.147848761408083, - "grad_norm": 1.4654204845428467, - "learning_rate": 8.673165829145728e-05, - "loss": 5.0132, - "step": 13706 - }, - { - "epoch": 7.1483702737940025, - "grad_norm": 1.416214942932129, - "learning_rate": 8.673065326633166e-05, - "loss": 5.799, - "step": 13707 - }, - { - "epoch": 7.148891786179922, - "grad_norm": 1.348501205444336, - "learning_rate": 8.672964824120602e-05, - "loss": 5.6978, - "step": 13708 - }, - { - "epoch": 7.1494132985658405, - "grad_norm": 1.3492417335510254, - "learning_rate": 8.67286432160804e-05, - "loss": 5.6261, - "step": 13709 - }, - { - "epoch": 7.14993481095176, - "grad_norm": 1.4385771751403809, - "learning_rate": 8.672763819095478e-05, - "loss": 4.9238, - "step": 13710 - }, - { - "epoch": 7.150456323337679, - "grad_norm": 1.4635928869247437, - "learning_rate": 8.672663316582916e-05, - "loss": 5.9071, - "step": 13711 - }, - { - "epoch": 7.150977835723598, - "grad_norm": 1.5019280910491943, - "learning_rate": 8.672562814070352e-05, - "loss": 5.6247, - "step": 13712 - }, - { - "epoch": 7.1514993481095175, - "grad_norm": 1.4594782590866089, - "learning_rate": 8.67246231155779e-05, - "loss": 5.3033, - "step": 13713 - }, - { - "epoch": 7.152020860495437, - "grad_norm": 1.454990267753601, - "learning_rate": 8.672361809045226e-05, - "loss": 5.7657, - "step": 13714 - }, - { - "epoch": 7.1525423728813555, - "grad_norm": 1.6095958948135376, - "learning_rate": 8.672261306532664e-05, - "loss": 5.5021, - "step": 13715 - }, - { - "epoch": 7.153063885267275, - "grad_norm": 1.3762080669403076, - "learning_rate": 8.6721608040201e-05, - "loss": 5.8533, - "step": 13716 - }, - { - "epoch": 7.1535853976531945, - "grad_norm": 2.1343610286712646, - "learning_rate": 8.672060301507538e-05, - "loss": 5.0848, - "step": 13717 - }, - { - "epoch": 7.154106910039113, - "grad_norm": 1.5194721221923828, - "learning_rate": 8.671959798994975e-05, - "loss": 5.6722, - "step": 13718 - }, - { - "epoch": 7.1546284224250325, - "grad_norm": 1.5119245052337646, - "learning_rate": 8.671859296482413e-05, - "loss": 5.0735, - "step": 13719 - }, - { - "epoch": 7.155149934810952, - "grad_norm": 1.4835466146469116, - "learning_rate": 8.67175879396985e-05, - "loss": 5.9239, - "step": 13720 - }, - { - "epoch": 7.1556714471968705, - "grad_norm": 1.8728574514389038, - "learning_rate": 8.671658291457287e-05, - "loss": 5.1611, - "step": 13721 - }, - { - "epoch": 7.15619295958279, - "grad_norm": 1.4794845581054688, - "learning_rate": 8.671557788944725e-05, - "loss": 5.9314, - "step": 13722 - }, - { - "epoch": 7.1567144719687095, - "grad_norm": 1.4493411779403687, - "learning_rate": 8.671457286432161e-05, - "loss": 5.8483, - "step": 13723 - }, - { - "epoch": 7.157235984354628, - "grad_norm": 1.4273525476455688, - "learning_rate": 8.671356783919599e-05, - "loss": 5.0698, - "step": 13724 - }, - { - "epoch": 7.1577574967405475, - "grad_norm": 1.4652984142303467, - "learning_rate": 8.671256281407035e-05, - "loss": 5.4427, - "step": 13725 - }, - { - "epoch": 7.158279009126467, - "grad_norm": 1.5137547254562378, - "learning_rate": 8.671155778894473e-05, - "loss": 5.6676, - "step": 13726 - }, - { - "epoch": 7.1588005215123856, - "grad_norm": 1.380881428718567, - "learning_rate": 8.67105527638191e-05, - "loss": 5.6083, - "step": 13727 - }, - { - "epoch": 7.159322033898305, - "grad_norm": 1.395025372505188, - "learning_rate": 8.670954773869347e-05, - "loss": 5.2951, - "step": 13728 - }, - { - "epoch": 7.1598435462842245, - "grad_norm": 1.5917667150497437, - "learning_rate": 8.670854271356784e-05, - "loss": 5.392, - "step": 13729 - }, - { - "epoch": 7.160365058670143, - "grad_norm": 1.5410608053207397, - "learning_rate": 8.670753768844221e-05, - "loss": 5.3055, - "step": 13730 - }, - { - "epoch": 7.1608865710560625, - "grad_norm": 1.3579561710357666, - "learning_rate": 8.670653266331659e-05, - "loss": 6.0048, - "step": 13731 - }, - { - "epoch": 7.161408083441982, - "grad_norm": 1.3876606225967407, - "learning_rate": 8.670552763819097e-05, - "loss": 5.8546, - "step": 13732 - }, - { - "epoch": 7.161929595827901, - "grad_norm": 1.3969610929489136, - "learning_rate": 8.670452261306533e-05, - "loss": 5.6641, - "step": 13733 - }, - { - "epoch": 7.16245110821382, - "grad_norm": 1.5410056114196777, - "learning_rate": 8.67035175879397e-05, - "loss": 5.3438, - "step": 13734 - }, - { - "epoch": 7.1629726205997395, - "grad_norm": 1.4928194284439087, - "learning_rate": 8.670251256281408e-05, - "loss": 5.5356, - "step": 13735 - }, - { - "epoch": 7.163494132985658, - "grad_norm": 1.6006301641464233, - "learning_rate": 8.670150753768844e-05, - "loss": 5.6162, - "step": 13736 - }, - { - "epoch": 7.1640156453715775, - "grad_norm": 1.477206826210022, - "learning_rate": 8.670050251256282e-05, - "loss": 5.0868, - "step": 13737 - }, - { - "epoch": 7.164537157757497, - "grad_norm": 1.3133422136306763, - "learning_rate": 8.669949748743718e-05, - "loss": 5.854, - "step": 13738 - }, - { - "epoch": 7.165058670143416, - "grad_norm": 1.5052441358566284, - "learning_rate": 8.669849246231156e-05, - "loss": 5.6181, - "step": 13739 - }, - { - "epoch": 7.165580182529335, - "grad_norm": 1.4646841287612915, - "learning_rate": 8.669748743718594e-05, - "loss": 5.8571, - "step": 13740 - }, - { - "epoch": 7.1661016949152545, - "grad_norm": 1.4477499723434448, - "learning_rate": 8.669648241206032e-05, - "loss": 5.6897, - "step": 13741 - }, - { - "epoch": 7.166623207301173, - "grad_norm": 1.5311987400054932, - "learning_rate": 8.669547738693468e-05, - "loss": 5.8968, - "step": 13742 - }, - { - "epoch": 7.1671447196870925, - "grad_norm": 1.4337255954742432, - "learning_rate": 8.669447236180906e-05, - "loss": 5.6948, - "step": 13743 - }, - { - "epoch": 7.167666232073012, - "grad_norm": 1.4166909456253052, - "learning_rate": 8.669346733668342e-05, - "loss": 5.6831, - "step": 13744 - }, - { - "epoch": 7.168187744458931, - "grad_norm": 1.4642064571380615, - "learning_rate": 8.66924623115578e-05, - "loss": 5.7748, - "step": 13745 - }, - { - "epoch": 7.16870925684485, - "grad_norm": 1.3687443733215332, - "learning_rate": 8.669145728643216e-05, - "loss": 5.8872, - "step": 13746 - }, - { - "epoch": 7.1692307692307695, - "grad_norm": 1.5333423614501953, - "learning_rate": 8.669045226130653e-05, - "loss": 5.0032, - "step": 13747 - }, - { - "epoch": 7.169752281616688, - "grad_norm": 1.3645167350769043, - "learning_rate": 8.66894472361809e-05, - "loss": 5.7995, - "step": 13748 - }, - { - "epoch": 7.170273794002608, - "grad_norm": 1.5342808961868286, - "learning_rate": 8.668844221105527e-05, - "loss": 5.8671, - "step": 13749 - }, - { - "epoch": 7.170795306388527, - "grad_norm": 1.4890203475952148, - "learning_rate": 8.668743718592965e-05, - "loss": 5.395, - "step": 13750 - }, - { - "epoch": 7.171316818774446, - "grad_norm": 1.5375837087631226, - "learning_rate": 8.668643216080403e-05, - "loss": 5.7229, - "step": 13751 - }, - { - "epoch": 7.171838331160365, - "grad_norm": 1.4149529933929443, - "learning_rate": 8.66854271356784e-05, - "loss": 5.729, - "step": 13752 - }, - { - "epoch": 7.1723598435462845, - "grad_norm": 1.4262226819992065, - "learning_rate": 8.668442211055277e-05, - "loss": 5.4925, - "step": 13753 - }, - { - "epoch": 7.172881355932203, - "grad_norm": 1.3137096166610718, - "learning_rate": 8.668341708542715e-05, - "loss": 5.9522, - "step": 13754 - }, - { - "epoch": 7.173402868318123, - "grad_norm": 1.6734628677368164, - "learning_rate": 8.668241206030151e-05, - "loss": 5.5751, - "step": 13755 - }, - { - "epoch": 7.173924380704042, - "grad_norm": 1.39096200466156, - "learning_rate": 8.668140703517589e-05, - "loss": 6.1222, - "step": 13756 - }, - { - "epoch": 7.174445893089961, - "grad_norm": 1.5020592212677002, - "learning_rate": 8.668040201005025e-05, - "loss": 5.4956, - "step": 13757 - }, - { - "epoch": 7.17496740547588, - "grad_norm": 1.5459926128387451, - "learning_rate": 8.667939698492463e-05, - "loss": 5.4374, - "step": 13758 - }, - { - "epoch": 7.1754889178617995, - "grad_norm": 1.4007116556167603, - "learning_rate": 8.6678391959799e-05, - "loss": 5.7001, - "step": 13759 - }, - { - "epoch": 7.176010430247718, - "grad_norm": 1.4595884084701538, - "learning_rate": 8.667738693467337e-05, - "loss": 5.6649, - "step": 13760 - }, - { - "epoch": 7.176531942633638, - "grad_norm": 1.3825914859771729, - "learning_rate": 8.667638190954775e-05, - "loss": 5.7271, - "step": 13761 - }, - { - "epoch": 7.177053455019557, - "grad_norm": 1.50836980342865, - "learning_rate": 8.667537688442211e-05, - "loss": 5.2057, - "step": 13762 - }, - { - "epoch": 7.177574967405476, - "grad_norm": 1.475838303565979, - "learning_rate": 8.667437185929649e-05, - "loss": 5.4637, - "step": 13763 - }, - { - "epoch": 7.178096479791395, - "grad_norm": 1.5173999071121216, - "learning_rate": 8.667336683417086e-05, - "loss": 5.1034, - "step": 13764 - }, - { - "epoch": 7.1786179921773146, - "grad_norm": 1.4801719188690186, - "learning_rate": 8.667236180904523e-05, - "loss": 5.474, - "step": 13765 - }, - { - "epoch": 7.179139504563233, - "grad_norm": 1.4594944715499878, - "learning_rate": 8.66713567839196e-05, - "loss": 5.5546, - "step": 13766 - }, - { - "epoch": 7.179661016949153, - "grad_norm": 1.3547470569610596, - "learning_rate": 8.667035175879398e-05, - "loss": 5.476, - "step": 13767 - }, - { - "epoch": 7.180182529335072, - "grad_norm": 1.4691829681396484, - "learning_rate": 8.666934673366834e-05, - "loss": 5.2176, - "step": 13768 - }, - { - "epoch": 7.180704041720991, - "grad_norm": 1.3840066194534302, - "learning_rate": 8.666834170854272e-05, - "loss": 5.6359, - "step": 13769 - }, - { - "epoch": 7.18122555410691, - "grad_norm": 1.4959218502044678, - "learning_rate": 8.666733668341708e-05, - "loss": 5.5634, - "step": 13770 - }, - { - "epoch": 7.18174706649283, - "grad_norm": 1.4436759948730469, - "learning_rate": 8.666633165829146e-05, - "loss": 5.666, - "step": 13771 - }, - { - "epoch": 7.182268578878748, - "grad_norm": 1.4756942987442017, - "learning_rate": 8.666532663316584e-05, - "loss": 5.5328, - "step": 13772 - }, - { - "epoch": 7.182790091264668, - "grad_norm": 1.3193011283874512, - "learning_rate": 8.66643216080402e-05, - "loss": 5.6252, - "step": 13773 - }, - { - "epoch": 7.183311603650587, - "grad_norm": 1.611114263534546, - "learning_rate": 8.666331658291458e-05, - "loss": 5.2047, - "step": 13774 - }, - { - "epoch": 7.183833116036506, - "grad_norm": 1.4112190008163452, - "learning_rate": 8.666231155778894e-05, - "loss": 5.3845, - "step": 13775 - }, - { - "epoch": 7.184354628422425, - "grad_norm": 1.5705552101135254, - "learning_rate": 8.666130653266332e-05, - "loss": 5.151, - "step": 13776 - }, - { - "epoch": 7.184876140808345, - "grad_norm": 1.465720295906067, - "learning_rate": 8.666030150753769e-05, - "loss": 5.5399, - "step": 13777 - }, - { - "epoch": 7.185397653194263, - "grad_norm": 1.4347004890441895, - "learning_rate": 8.665929648241206e-05, - "loss": 5.187, - "step": 13778 - }, - { - "epoch": 7.185919165580183, - "grad_norm": 1.3448377847671509, - "learning_rate": 8.665829145728643e-05, - "loss": 5.8386, - "step": 13779 - }, - { - "epoch": 7.186440677966102, - "grad_norm": 1.4741203784942627, - "learning_rate": 8.665728643216081e-05, - "loss": 5.4708, - "step": 13780 - }, - { - "epoch": 7.186962190352021, - "grad_norm": 1.4139752388000488, - "learning_rate": 8.665628140703518e-05, - "loss": 5.7875, - "step": 13781 - }, - { - "epoch": 7.18748370273794, - "grad_norm": 1.4072082042694092, - "learning_rate": 8.665527638190956e-05, - "loss": 5.7757, - "step": 13782 - }, - { - "epoch": 7.18800521512386, - "grad_norm": 1.4167219400405884, - "learning_rate": 8.665427135678393e-05, - "loss": 5.3975, - "step": 13783 - }, - { - "epoch": 7.188526727509778, - "grad_norm": 1.4888793230056763, - "learning_rate": 8.66532663316583e-05, - "loss": 5.0454, - "step": 13784 - }, - { - "epoch": 7.189048239895698, - "grad_norm": 1.6166670322418213, - "learning_rate": 8.665226130653267e-05, - "loss": 5.3865, - "step": 13785 - }, - { - "epoch": 7.189569752281617, - "grad_norm": 1.3914917707443237, - "learning_rate": 8.665125628140703e-05, - "loss": 5.6687, - "step": 13786 - }, - { - "epoch": 7.190091264667536, - "grad_norm": 1.485008716583252, - "learning_rate": 8.665025125628141e-05, - "loss": 5.0224, - "step": 13787 - }, - { - "epoch": 7.190612777053455, - "grad_norm": 1.5275533199310303, - "learning_rate": 8.664924623115578e-05, - "loss": 5.325, - "step": 13788 - }, - { - "epoch": 7.191134289439375, - "grad_norm": 1.5702720880508423, - "learning_rate": 8.664824120603015e-05, - "loss": 5.3958, - "step": 13789 - }, - { - "epoch": 7.191655801825293, - "grad_norm": 1.8940235376358032, - "learning_rate": 8.664723618090452e-05, - "loss": 5.1567, - "step": 13790 - }, - { - "epoch": 7.192177314211213, - "grad_norm": 1.4270268678665161, - "learning_rate": 8.66462311557789e-05, - "loss": 5.489, - "step": 13791 - }, - { - "epoch": 7.192698826597132, - "grad_norm": 1.486464023590088, - "learning_rate": 8.664522613065327e-05, - "loss": 5.6908, - "step": 13792 - }, - { - "epoch": 7.193220338983051, - "grad_norm": 1.3854243755340576, - "learning_rate": 8.664422110552765e-05, - "loss": 5.4293, - "step": 13793 - }, - { - "epoch": 7.19374185136897, - "grad_norm": 1.4980734586715698, - "learning_rate": 8.664321608040202e-05, - "loss": 5.3788, - "step": 13794 - }, - { - "epoch": 7.194263363754889, - "grad_norm": 1.5902016162872314, - "learning_rate": 8.664221105527639e-05, - "loss": 5.3741, - "step": 13795 - }, - { - "epoch": 7.194784876140808, - "grad_norm": 1.5114284753799438, - "learning_rate": 8.664120603015076e-05, - "loss": 5.7927, - "step": 13796 - }, - { - "epoch": 7.195306388526728, - "grad_norm": 1.5146446228027344, - "learning_rate": 8.664020100502514e-05, - "loss": 5.5852, - "step": 13797 - }, - { - "epoch": 7.195827900912647, - "grad_norm": 1.4084800481796265, - "learning_rate": 8.66391959798995e-05, - "loss": 5.8451, - "step": 13798 - }, - { - "epoch": 7.196349413298566, - "grad_norm": 1.4775413274765015, - "learning_rate": 8.663819095477388e-05, - "loss": 5.6098, - "step": 13799 - }, - { - "epoch": 7.196870925684485, - "grad_norm": 1.4530738592147827, - "learning_rate": 8.663718592964824e-05, - "loss": 5.9596, - "step": 13800 - }, - { - "epoch": 7.197392438070404, - "grad_norm": 1.28622305393219, - "learning_rate": 8.66361809045226e-05, - "loss": 5.9268, - "step": 13801 - }, - { - "epoch": 7.197913950456323, - "grad_norm": 1.4587386846542358, - "learning_rate": 8.663517587939698e-05, - "loss": 5.8263, - "step": 13802 - }, - { - "epoch": 7.198435462842243, - "grad_norm": 1.3289525508880615, - "learning_rate": 8.663417085427136e-05, - "loss": 6.0443, - "step": 13803 - }, - { - "epoch": 7.198956975228161, - "grad_norm": 1.5427478551864624, - "learning_rate": 8.663316582914574e-05, - "loss": 5.3255, - "step": 13804 - }, - { - "epoch": 7.199478487614081, - "grad_norm": 1.4756428003311157, - "learning_rate": 8.66321608040201e-05, - "loss": 5.877, - "step": 13805 - }, - { - "epoch": 7.2, - "grad_norm": 1.5827785730361938, - "learning_rate": 8.663115577889448e-05, - "loss": 5.9185, - "step": 13806 - }, - { - "epoch": 7.200521512385919, - "grad_norm": 1.5714043378829956, - "learning_rate": 8.663015075376885e-05, - "loss": 5.3746, - "step": 13807 - }, - { - "epoch": 7.201043024771838, - "grad_norm": 1.602576732635498, - "learning_rate": 8.662914572864322e-05, - "loss": 5.4826, - "step": 13808 - }, - { - "epoch": 7.201564537157758, - "grad_norm": 1.5858824253082275, - "learning_rate": 8.662814070351759e-05, - "loss": 5.4808, - "step": 13809 - }, - { - "epoch": 7.202086049543676, - "grad_norm": 1.4796278476715088, - "learning_rate": 8.662713567839197e-05, - "loss": 5.7076, - "step": 13810 - }, - { - "epoch": 7.202607561929596, - "grad_norm": 1.4641693830490112, - "learning_rate": 8.662613065326633e-05, - "loss": 5.4828, - "step": 13811 - }, - { - "epoch": 7.203129074315515, - "grad_norm": 1.3879826068878174, - "learning_rate": 8.662512562814071e-05, - "loss": 5.4879, - "step": 13812 - }, - { - "epoch": 7.203650586701434, - "grad_norm": 1.446555733680725, - "learning_rate": 8.662412060301509e-05, - "loss": 5.9207, - "step": 13813 - }, - { - "epoch": 7.204172099087353, - "grad_norm": 1.3338100910186768, - "learning_rate": 8.662311557788945e-05, - "loss": 5.8018, - "step": 13814 - }, - { - "epoch": 7.204693611473273, - "grad_norm": 1.3744752407073975, - "learning_rate": 8.662211055276383e-05, - "loss": 5.5258, - "step": 13815 - }, - { - "epoch": 7.205215123859191, - "grad_norm": 1.5686001777648926, - "learning_rate": 8.662110552763819e-05, - "loss": 5.4953, - "step": 13816 - }, - { - "epoch": 7.205736636245111, - "grad_norm": 1.3690036535263062, - "learning_rate": 8.662010050251257e-05, - "loss": 5.6988, - "step": 13817 - }, - { - "epoch": 7.20625814863103, - "grad_norm": 1.290788173675537, - "learning_rate": 8.661909547738693e-05, - "loss": 6.1169, - "step": 13818 - }, - { - "epoch": 7.206779661016949, - "grad_norm": 1.6079181432724, - "learning_rate": 8.661809045226131e-05, - "loss": 5.3602, - "step": 13819 - }, - { - "epoch": 7.207301173402868, - "grad_norm": 1.5802100896835327, - "learning_rate": 8.661708542713568e-05, - "loss": 5.8747, - "step": 13820 - }, - { - "epoch": 7.207822685788788, - "grad_norm": 1.5026919841766357, - "learning_rate": 8.661608040201005e-05, - "loss": 5.598, - "step": 13821 - }, - { - "epoch": 7.208344198174706, - "grad_norm": 1.445643424987793, - "learning_rate": 8.661507537688442e-05, - "loss": 5.8844, - "step": 13822 - }, - { - "epoch": 7.208865710560626, - "grad_norm": 1.4263205528259277, - "learning_rate": 8.66140703517588e-05, - "loss": 5.6695, - "step": 13823 - }, - { - "epoch": 7.209387222946545, - "grad_norm": 1.4534839391708374, - "learning_rate": 8.661306532663317e-05, - "loss": 5.8565, - "step": 13824 - }, - { - "epoch": 7.209908735332464, - "grad_norm": 1.4577950239181519, - "learning_rate": 8.661206030150755e-05, - "loss": 5.4555, - "step": 13825 - }, - { - "epoch": 7.210430247718383, - "grad_norm": 1.5838875770568848, - "learning_rate": 8.661105527638192e-05, - "loss": 5.6712, - "step": 13826 - }, - { - "epoch": 7.210951760104303, - "grad_norm": 1.571539044380188, - "learning_rate": 8.661005025125628e-05, - "loss": 5.5337, - "step": 13827 - }, - { - "epoch": 7.211473272490221, - "grad_norm": 1.3274136781692505, - "learning_rate": 8.660904522613066e-05, - "loss": 5.7376, - "step": 13828 - }, - { - "epoch": 7.211994784876141, - "grad_norm": 1.4326677322387695, - "learning_rate": 8.660804020100502e-05, - "loss": 5.3089, - "step": 13829 - }, - { - "epoch": 7.21251629726206, - "grad_norm": 1.5162547826766968, - "learning_rate": 8.66070351758794e-05, - "loss": 5.0942, - "step": 13830 - }, - { - "epoch": 7.213037809647979, - "grad_norm": 1.4269556999206543, - "learning_rate": 8.660603015075376e-05, - "loss": 5.9023, - "step": 13831 - }, - { - "epoch": 7.213559322033898, - "grad_norm": 1.380919337272644, - "learning_rate": 8.660502512562814e-05, - "loss": 6.0221, - "step": 13832 - }, - { - "epoch": 7.214080834419818, - "grad_norm": 1.4417619705200195, - "learning_rate": 8.660402010050252e-05, - "loss": 5.618, - "step": 13833 - }, - { - "epoch": 7.214602346805736, - "grad_norm": 1.4398239850997925, - "learning_rate": 8.66030150753769e-05, - "loss": 5.7128, - "step": 13834 - }, - { - "epoch": 7.215123859191656, - "grad_norm": 1.526016354560852, - "learning_rate": 8.660201005025126e-05, - "loss": 5.7215, - "step": 13835 - }, - { - "epoch": 7.215645371577575, - "grad_norm": 1.7095297574996948, - "learning_rate": 8.660100502512564e-05, - "loss": 5.1353, - "step": 13836 - }, - { - "epoch": 7.216166883963494, - "grad_norm": 1.6299515962600708, - "learning_rate": 8.66e-05, - "loss": 5.4414, - "step": 13837 - }, - { - "epoch": 7.216688396349413, - "grad_norm": 1.4907594919204712, - "learning_rate": 8.659899497487438e-05, - "loss": 5.8544, - "step": 13838 - }, - { - "epoch": 7.217209908735333, - "grad_norm": 1.2853449583053589, - "learning_rate": 8.659798994974875e-05, - "loss": 5.1076, - "step": 13839 - }, - { - "epoch": 7.217731421121251, - "grad_norm": 1.388832449913025, - "learning_rate": 8.659698492462311e-05, - "loss": 5.5784, - "step": 13840 - }, - { - "epoch": 7.218252933507171, - "grad_norm": 1.4117907285690308, - "learning_rate": 8.659597989949749e-05, - "loss": 5.0705, - "step": 13841 - }, - { - "epoch": 7.21877444589309, - "grad_norm": 1.7573840618133545, - "learning_rate": 8.659497487437185e-05, - "loss": 5.339, - "step": 13842 - }, - { - "epoch": 7.219295958279009, - "grad_norm": 1.5462278127670288, - "learning_rate": 8.659396984924623e-05, - "loss": 5.2077, - "step": 13843 - }, - { - "epoch": 7.219817470664928, - "grad_norm": 1.4318897724151611, - "learning_rate": 8.659296482412061e-05, - "loss": 5.7215, - "step": 13844 - }, - { - "epoch": 7.220338983050848, - "grad_norm": 1.4130226373672485, - "learning_rate": 8.659195979899499e-05, - "loss": 5.7834, - "step": 13845 - }, - { - "epoch": 7.220860495436766, - "grad_norm": 1.4549789428710938, - "learning_rate": 8.659095477386935e-05, - "loss": 5.5173, - "step": 13846 - }, - { - "epoch": 7.221382007822686, - "grad_norm": 1.41904616355896, - "learning_rate": 8.658994974874373e-05, - "loss": 5.7609, - "step": 13847 - }, - { - "epoch": 7.221903520208605, - "grad_norm": 1.4960914850234985, - "learning_rate": 8.658894472361809e-05, - "loss": 5.6916, - "step": 13848 - }, - { - "epoch": 7.222425032594524, - "grad_norm": 1.4038286209106445, - "learning_rate": 8.658793969849247e-05, - "loss": 5.5725, - "step": 13849 - }, - { - "epoch": 7.222946544980443, - "grad_norm": 1.471781611442566, - "learning_rate": 8.658693467336683e-05, - "loss": 5.7024, - "step": 13850 - }, - { - "epoch": 7.223468057366363, - "grad_norm": 1.4794539213180542, - "learning_rate": 8.658592964824121e-05, - "loss": 4.8575, - "step": 13851 - }, - { - "epoch": 7.223989569752281, - "grad_norm": 1.3971178531646729, - "learning_rate": 8.658492462311558e-05, - "loss": 5.8211, - "step": 13852 - }, - { - "epoch": 7.224511082138201, - "grad_norm": 1.606467843055725, - "learning_rate": 8.658391959798995e-05, - "loss": 5.3796, - "step": 13853 - }, - { - "epoch": 7.22503259452412, - "grad_norm": 1.379504919052124, - "learning_rate": 8.658291457286433e-05, - "loss": 5.7754, - "step": 13854 - }, - { - "epoch": 7.225554106910039, - "grad_norm": 1.4252222776412964, - "learning_rate": 8.65819095477387e-05, - "loss": 5.7975, - "step": 13855 - }, - { - "epoch": 7.226075619295958, - "grad_norm": 1.3371676206588745, - "learning_rate": 8.658090452261307e-05, - "loss": 5.5498, - "step": 13856 - }, - { - "epoch": 7.226597131681878, - "grad_norm": 1.43900465965271, - "learning_rate": 8.657989949748744e-05, - "loss": 5.3648, - "step": 13857 - }, - { - "epoch": 7.227118644067796, - "grad_norm": 1.4741994142532349, - "learning_rate": 8.657889447236182e-05, - "loss": 5.4846, - "step": 13858 - }, - { - "epoch": 7.227640156453716, - "grad_norm": 1.5168529748916626, - "learning_rate": 8.657788944723618e-05, - "loss": 5.5055, - "step": 13859 - }, - { - "epoch": 7.228161668839635, - "grad_norm": 1.3755425214767456, - "learning_rate": 8.657688442211056e-05, - "loss": 5.5566, - "step": 13860 - }, - { - "epoch": 7.228683181225554, - "grad_norm": 1.393425464630127, - "learning_rate": 8.657587939698492e-05, - "loss": 5.5367, - "step": 13861 - }, - { - "epoch": 7.229204693611473, - "grad_norm": 1.5195808410644531, - "learning_rate": 8.65748743718593e-05, - "loss": 5.0902, - "step": 13862 - }, - { - "epoch": 7.229726205997393, - "grad_norm": 1.3544471263885498, - "learning_rate": 8.657386934673367e-05, - "loss": 5.1259, - "step": 13863 - }, - { - "epoch": 7.230247718383311, - "grad_norm": 1.2949868440628052, - "learning_rate": 8.657286432160804e-05, - "loss": 5.8949, - "step": 13864 - }, - { - "epoch": 7.230769230769231, - "grad_norm": 1.3344718217849731, - "learning_rate": 8.657185929648242e-05, - "loss": 5.939, - "step": 13865 - }, - { - "epoch": 7.23129074315515, - "grad_norm": 1.58356511592865, - "learning_rate": 8.657085427135679e-05, - "loss": 5.7261, - "step": 13866 - }, - { - "epoch": 7.231812255541069, - "grad_norm": 1.352306842803955, - "learning_rate": 8.656984924623116e-05, - "loss": 5.7658, - "step": 13867 - }, - { - "epoch": 7.232333767926988, - "grad_norm": 1.534409999847412, - "learning_rate": 8.656884422110553e-05, - "loss": 5.5348, - "step": 13868 - }, - { - "epoch": 7.232855280312908, - "grad_norm": 1.5511760711669922, - "learning_rate": 8.65678391959799e-05, - "loss": 5.496, - "step": 13869 - }, - { - "epoch": 7.233376792698826, - "grad_norm": 1.5484240055084229, - "learning_rate": 8.656683417085427e-05, - "loss": 5.3226, - "step": 13870 - }, - { - "epoch": 7.233898305084746, - "grad_norm": 1.3928508758544922, - "learning_rate": 8.656582914572865e-05, - "loss": 5.5875, - "step": 13871 - }, - { - "epoch": 7.234419817470665, - "grad_norm": 1.4767929315567017, - "learning_rate": 8.656482412060301e-05, - "loss": 5.4868, - "step": 13872 - }, - { - "epoch": 7.234941329856584, - "grad_norm": 1.4403691291809082, - "learning_rate": 8.656381909547739e-05, - "loss": 5.3799, - "step": 13873 - }, - { - "epoch": 7.235462842242503, - "grad_norm": 1.6841845512390137, - "learning_rate": 8.656281407035177e-05, - "loss": 5.1363, - "step": 13874 - }, - { - "epoch": 7.235984354628423, - "grad_norm": 1.3859190940856934, - "learning_rate": 8.656180904522614e-05, - "loss": 5.8355, - "step": 13875 - }, - { - "epoch": 7.236505867014341, - "grad_norm": 1.4919509887695312, - "learning_rate": 8.656080402010051e-05, - "loss": 5.4711, - "step": 13876 - }, - { - "epoch": 7.237027379400261, - "grad_norm": 1.3247184753417969, - "learning_rate": 8.655979899497489e-05, - "loss": 5.4693, - "step": 13877 - }, - { - "epoch": 7.23754889178618, - "grad_norm": 1.345331072807312, - "learning_rate": 8.655879396984925e-05, - "loss": 5.7911, - "step": 13878 - }, - { - "epoch": 7.238070404172099, - "grad_norm": 1.5611610412597656, - "learning_rate": 8.655778894472362e-05, - "loss": 5.0772, - "step": 13879 - }, - { - "epoch": 7.238591916558018, - "grad_norm": 1.384142279624939, - "learning_rate": 8.6556783919598e-05, - "loss": 5.3248, - "step": 13880 - }, - { - "epoch": 7.239113428943938, - "grad_norm": 1.534067988395691, - "learning_rate": 8.655577889447236e-05, - "loss": 5.6008, - "step": 13881 - }, - { - "epoch": 7.239634941329856, - "grad_norm": 1.6124788522720337, - "learning_rate": 8.655477386934674e-05, - "loss": 5.29, - "step": 13882 - }, - { - "epoch": 7.240156453715776, - "grad_norm": 1.5609769821166992, - "learning_rate": 8.65537688442211e-05, - "loss": 5.7622, - "step": 13883 - }, - { - "epoch": 7.240677966101695, - "grad_norm": 1.373262882232666, - "learning_rate": 8.655276381909548e-05, - "loss": 5.6946, - "step": 13884 - }, - { - "epoch": 7.241199478487614, - "grad_norm": 1.4647949934005737, - "learning_rate": 8.655175879396986e-05, - "loss": 5.1776, - "step": 13885 - }, - { - "epoch": 7.241720990873533, - "grad_norm": 1.4784338474273682, - "learning_rate": 8.655075376884423e-05, - "loss": 5.6479, - "step": 13886 - }, - { - "epoch": 7.242242503259453, - "grad_norm": 1.5353575944900513, - "learning_rate": 8.65497487437186e-05, - "loss": 5.4822, - "step": 13887 - }, - { - "epoch": 7.242764015645371, - "grad_norm": 1.4073857069015503, - "learning_rate": 8.654874371859298e-05, - "loss": 5.7631, - "step": 13888 - }, - { - "epoch": 7.243285528031291, - "grad_norm": 1.4472273588180542, - "learning_rate": 8.654773869346734e-05, - "loss": 5.5994, - "step": 13889 - }, - { - "epoch": 7.2438070404172095, - "grad_norm": 1.391561508178711, - "learning_rate": 8.654673366834172e-05, - "loss": 5.8855, - "step": 13890 - }, - { - "epoch": 7.244328552803129, - "grad_norm": 1.4057506322860718, - "learning_rate": 8.654572864321608e-05, - "loss": 5.8038, - "step": 13891 - }, - { - "epoch": 7.244850065189048, - "grad_norm": 1.361465573310852, - "learning_rate": 8.654472361809046e-05, - "loss": 5.8476, - "step": 13892 - }, - { - "epoch": 7.245371577574968, - "grad_norm": 1.4027957916259766, - "learning_rate": 8.654371859296482e-05, - "loss": 5.9451, - "step": 13893 - }, - { - "epoch": 7.245893089960886, - "grad_norm": 1.3662689924240112, - "learning_rate": 8.65427135678392e-05, - "loss": 5.9848, - "step": 13894 - }, - { - "epoch": 7.246414602346806, - "grad_norm": 1.7352598905563354, - "learning_rate": 8.654170854271358e-05, - "loss": 4.7504, - "step": 13895 - }, - { - "epoch": 7.2469361147327245, - "grad_norm": 1.3858022689819336, - "learning_rate": 8.654070351758794e-05, - "loss": 5.8145, - "step": 13896 - }, - { - "epoch": 7.247457627118644, - "grad_norm": 1.515507459640503, - "learning_rate": 8.653969849246232e-05, - "loss": 5.3422, - "step": 13897 - }, - { - "epoch": 7.247979139504563, - "grad_norm": 1.4698268175125122, - "learning_rate": 8.653869346733669e-05, - "loss": 5.3281, - "step": 13898 - }, - { - "epoch": 7.248500651890482, - "grad_norm": 1.384896993637085, - "learning_rate": 8.653768844221106e-05, - "loss": 5.9505, - "step": 13899 - }, - { - "epoch": 7.2490221642764014, - "grad_norm": 1.4043617248535156, - "learning_rate": 8.653668341708543e-05, - "loss": 5.6624, - "step": 13900 - }, - { - "epoch": 7.249543676662321, - "grad_norm": 1.4650789499282837, - "learning_rate": 8.65356783919598e-05, - "loss": 5.2496, - "step": 13901 - }, - { - "epoch": 7.2500651890482395, - "grad_norm": 1.5133768320083618, - "learning_rate": 8.653467336683417e-05, - "loss": 5.5005, - "step": 13902 - }, - { - "epoch": 7.250586701434159, - "grad_norm": 1.4040523767471313, - "learning_rate": 8.653366834170855e-05, - "loss": 5.7626, - "step": 13903 - }, - { - "epoch": 7.251108213820078, - "grad_norm": 1.4309961795806885, - "learning_rate": 8.653266331658291e-05, - "loss": 5.6769, - "step": 13904 - }, - { - "epoch": 7.251629726205997, - "grad_norm": 1.3854128122329712, - "learning_rate": 8.653165829145729e-05, - "loss": 5.9449, - "step": 13905 - }, - { - "epoch": 7.2521512385919165, - "grad_norm": 1.3234103918075562, - "learning_rate": 8.653065326633167e-05, - "loss": 5.6511, - "step": 13906 - }, - { - "epoch": 7.252672750977836, - "grad_norm": 1.3531053066253662, - "learning_rate": 8.652964824120603e-05, - "loss": 5.9423, - "step": 13907 - }, - { - "epoch": 7.2531942633637545, - "grad_norm": 1.396986961364746, - "learning_rate": 8.652864321608041e-05, - "loss": 5.2624, - "step": 13908 - }, - { - "epoch": 7.253715775749674, - "grad_norm": 1.416448950767517, - "learning_rate": 8.652763819095477e-05, - "loss": 5.5106, - "step": 13909 - }, - { - "epoch": 7.254237288135593, - "grad_norm": 1.3871735334396362, - "learning_rate": 8.652663316582915e-05, - "loss": 5.6092, - "step": 13910 - }, - { - "epoch": 7.254758800521512, - "grad_norm": 1.4899522066116333, - "learning_rate": 8.652562814070352e-05, - "loss": 5.6682, - "step": 13911 - }, - { - "epoch": 7.2552803129074315, - "grad_norm": 1.3787933588027954, - "learning_rate": 8.65246231155779e-05, - "loss": 5.2395, - "step": 13912 - }, - { - "epoch": 7.255801825293351, - "grad_norm": 1.376882791519165, - "learning_rate": 8.652361809045226e-05, - "loss": 5.8448, - "step": 13913 - }, - { - "epoch": 7.2563233376792695, - "grad_norm": 1.4317625761032104, - "learning_rate": 8.652261306532664e-05, - "loss": 5.9074, - "step": 13914 - }, - { - "epoch": 7.256844850065189, - "grad_norm": 1.4317023754119873, - "learning_rate": 8.652160804020101e-05, - "loss": 5.4882, - "step": 13915 - }, - { - "epoch": 7.257366362451108, - "grad_norm": 1.519873023033142, - "learning_rate": 8.652060301507539e-05, - "loss": 5.3983, - "step": 13916 - }, - { - "epoch": 7.257887874837027, - "grad_norm": 1.4214595556259155, - "learning_rate": 8.651959798994976e-05, - "loss": 5.4822, - "step": 13917 - }, - { - "epoch": 7.2584093872229465, - "grad_norm": 1.426478385925293, - "learning_rate": 8.651859296482413e-05, - "loss": 5.2393, - "step": 13918 - }, - { - "epoch": 7.258930899608866, - "grad_norm": 1.3471466302871704, - "learning_rate": 8.65175879396985e-05, - "loss": 5.5461, - "step": 13919 - }, - { - "epoch": 7.2594524119947845, - "grad_norm": 1.3822931051254272, - "learning_rate": 8.651658291457286e-05, - "loss": 5.8928, - "step": 13920 - }, - { - "epoch": 7.259973924380704, - "grad_norm": 1.4765878915786743, - "learning_rate": 8.651557788944724e-05, - "loss": 5.5311, - "step": 13921 - }, - { - "epoch": 7.2604954367666235, - "grad_norm": 1.3334802389144897, - "learning_rate": 8.65145728643216e-05, - "loss": 5.2608, - "step": 13922 - }, - { - "epoch": 7.261016949152542, - "grad_norm": 1.4536545276641846, - "learning_rate": 8.651356783919598e-05, - "loss": 5.5716, - "step": 13923 - }, - { - "epoch": 7.2615384615384615, - "grad_norm": 1.436621069908142, - "learning_rate": 8.651256281407035e-05, - "loss": 5.4864, - "step": 13924 - }, - { - "epoch": 7.262059973924381, - "grad_norm": 1.6455384492874146, - "learning_rate": 8.651155778894472e-05, - "loss": 5.2419, - "step": 13925 - }, - { - "epoch": 7.2625814863102995, - "grad_norm": 1.4164351224899292, - "learning_rate": 8.65105527638191e-05, - "loss": 5.7885, - "step": 13926 - }, - { - "epoch": 7.263102998696219, - "grad_norm": 1.4594738483428955, - "learning_rate": 8.650954773869348e-05, - "loss": 5.7336, - "step": 13927 - }, - { - "epoch": 7.2636245110821385, - "grad_norm": 1.5486878156661987, - "learning_rate": 8.650854271356784e-05, - "loss": 5.3165, - "step": 13928 - }, - { - "epoch": 7.264146023468057, - "grad_norm": 1.3693488836288452, - "learning_rate": 8.650753768844222e-05, - "loss": 5.9375, - "step": 13929 - }, - { - "epoch": 7.2646675358539765, - "grad_norm": 1.5233376026153564, - "learning_rate": 8.650653266331659e-05, - "loss": 5.4604, - "step": 13930 - }, - { - "epoch": 7.265189048239896, - "grad_norm": 1.3979462385177612, - "learning_rate": 8.650552763819096e-05, - "loss": 5.8789, - "step": 13931 - }, - { - "epoch": 7.2657105606258146, - "grad_norm": 1.4136751890182495, - "learning_rate": 8.650452261306533e-05, - "loss": 5.5115, - "step": 13932 - }, - { - "epoch": 7.266232073011734, - "grad_norm": 1.3632471561431885, - "learning_rate": 8.650351758793969e-05, - "loss": 5.7081, - "step": 13933 - }, - { - "epoch": 7.2667535853976535, - "grad_norm": 1.3664360046386719, - "learning_rate": 8.650251256281407e-05, - "loss": 5.8476, - "step": 13934 - }, - { - "epoch": 7.267275097783572, - "grad_norm": 1.2854349613189697, - "learning_rate": 8.650150753768845e-05, - "loss": 5.9687, - "step": 13935 - }, - { - "epoch": 7.2677966101694915, - "grad_norm": 1.4060533046722412, - "learning_rate": 8.650050251256283e-05, - "loss": 5.5127, - "step": 13936 - }, - { - "epoch": 7.268318122555411, - "grad_norm": 1.4356714487075806, - "learning_rate": 8.649949748743719e-05, - "loss": 5.4107, - "step": 13937 - }, - { - "epoch": 7.26883963494133, - "grad_norm": 1.4627318382263184, - "learning_rate": 8.649849246231157e-05, - "loss": 5.6968, - "step": 13938 - }, - { - "epoch": 7.269361147327249, - "grad_norm": 1.5060369968414307, - "learning_rate": 8.649748743718593e-05, - "loss": 5.4215, - "step": 13939 - }, - { - "epoch": 7.2698826597131685, - "grad_norm": 1.5632301568984985, - "learning_rate": 8.649648241206031e-05, - "loss": 5.4403, - "step": 13940 - }, - { - "epoch": 7.270404172099087, - "grad_norm": 1.3948538303375244, - "learning_rate": 8.649547738693468e-05, - "loss": 5.4469, - "step": 13941 - }, - { - "epoch": 7.2709256844850065, - "grad_norm": 1.414812445640564, - "learning_rate": 8.649447236180905e-05, - "loss": 5.3449, - "step": 13942 - }, - { - "epoch": 7.271447196870926, - "grad_norm": 1.3936457633972168, - "learning_rate": 8.649346733668342e-05, - "loss": 5.6481, - "step": 13943 - }, - { - "epoch": 7.271968709256845, - "grad_norm": 1.3703370094299316, - "learning_rate": 8.64924623115578e-05, - "loss": 5.6422, - "step": 13944 - }, - { - "epoch": 7.272490221642764, - "grad_norm": 1.4949051141738892, - "learning_rate": 8.649145728643216e-05, - "loss": 5.4538, - "step": 13945 - }, - { - "epoch": 7.2730117340286835, - "grad_norm": 1.5425009727478027, - "learning_rate": 8.649045226130654e-05, - "loss": 5.3842, - "step": 13946 - }, - { - "epoch": 7.273533246414602, - "grad_norm": 1.4803504943847656, - "learning_rate": 8.648944723618091e-05, - "loss": 5.7159, - "step": 13947 - }, - { - "epoch": 7.2740547588005215, - "grad_norm": 1.4635493755340576, - "learning_rate": 8.648844221105528e-05, - "loss": 5.4063, - "step": 13948 - }, - { - "epoch": 7.274576271186441, - "grad_norm": 1.4413903951644897, - "learning_rate": 8.648743718592966e-05, - "loss": 5.4398, - "step": 13949 - }, - { - "epoch": 7.27509778357236, - "grad_norm": 1.7470332384109497, - "learning_rate": 8.648643216080402e-05, - "loss": 5.207, - "step": 13950 - }, - { - "epoch": 7.275619295958279, - "grad_norm": 1.4892231225967407, - "learning_rate": 8.64854271356784e-05, - "loss": 5.7095, - "step": 13951 - }, - { - "epoch": 7.2761408083441985, - "grad_norm": 1.4328135251998901, - "learning_rate": 8.648442211055276e-05, - "loss": 5.8331, - "step": 13952 - }, - { - "epoch": 7.276662320730117, - "grad_norm": 1.4619526863098145, - "learning_rate": 8.648341708542714e-05, - "loss": 5.6052, - "step": 13953 - }, - { - "epoch": 7.277183833116037, - "grad_norm": 1.5187896490097046, - "learning_rate": 8.64824120603015e-05, - "loss": 5.1837, - "step": 13954 - }, - { - "epoch": 7.277705345501956, - "grad_norm": 1.334855318069458, - "learning_rate": 8.648140703517588e-05, - "loss": 5.5522, - "step": 13955 - }, - { - "epoch": 7.278226857887875, - "grad_norm": 1.3558253049850464, - "learning_rate": 8.648040201005025e-05, - "loss": 5.5673, - "step": 13956 - }, - { - "epoch": 7.278748370273794, - "grad_norm": 1.40957510471344, - "learning_rate": 8.647939698492463e-05, - "loss": 5.6261, - "step": 13957 - }, - { - "epoch": 7.2792698826597135, - "grad_norm": 1.423427700996399, - "learning_rate": 8.6478391959799e-05, - "loss": 4.9521, - "step": 13958 - }, - { - "epoch": 7.279791395045632, - "grad_norm": 1.3916763067245483, - "learning_rate": 8.647738693467337e-05, - "loss": 5.9292, - "step": 13959 - }, - { - "epoch": 7.280312907431552, - "grad_norm": 1.3993924856185913, - "learning_rate": 8.647638190954775e-05, - "loss": 5.631, - "step": 13960 - }, - { - "epoch": 7.280834419817471, - "grad_norm": 1.441568374633789, - "learning_rate": 8.647537688442211e-05, - "loss": 4.9591, - "step": 13961 - }, - { - "epoch": 7.28135593220339, - "grad_norm": 1.2964200973510742, - "learning_rate": 8.647437185929649e-05, - "loss": 5.8975, - "step": 13962 - }, - { - "epoch": 7.281877444589309, - "grad_norm": 1.4882776737213135, - "learning_rate": 8.647336683417085e-05, - "loss": 5.5268, - "step": 13963 - }, - { - "epoch": 7.2823989569752285, - "grad_norm": 1.4841235876083374, - "learning_rate": 8.647236180904523e-05, - "loss": 5.8094, - "step": 13964 - }, - { - "epoch": 7.282920469361147, - "grad_norm": 1.41879141330719, - "learning_rate": 8.64713567839196e-05, - "loss": 5.435, - "step": 13965 - }, - { - "epoch": 7.283441981747067, - "grad_norm": 1.3464205265045166, - "learning_rate": 8.647035175879397e-05, - "loss": 5.8486, - "step": 13966 - }, - { - "epoch": 7.283963494132986, - "grad_norm": 1.497770071029663, - "learning_rate": 8.646934673366835e-05, - "loss": 5.617, - "step": 13967 - }, - { - "epoch": 7.284485006518905, - "grad_norm": 1.4082905054092407, - "learning_rate": 8.646834170854273e-05, - "loss": 5.5819, - "step": 13968 - }, - { - "epoch": 7.285006518904824, - "grad_norm": 1.4159069061279297, - "learning_rate": 8.646733668341709e-05, - "loss": 5.5347, - "step": 13969 - }, - { - "epoch": 7.2855280312907436, - "grad_norm": 1.3514378070831299, - "learning_rate": 8.646633165829147e-05, - "loss": 5.0306, - "step": 13970 - }, - { - "epoch": 7.286049543676662, - "grad_norm": 1.4812904596328735, - "learning_rate": 8.646532663316583e-05, - "loss": 5.6348, - "step": 13971 - }, - { - "epoch": 7.286571056062582, - "grad_norm": 1.380391001701355, - "learning_rate": 8.64643216080402e-05, - "loss": 6.0167, - "step": 13972 - }, - { - "epoch": 7.287092568448501, - "grad_norm": 1.4003443717956543, - "learning_rate": 8.646331658291458e-05, - "loss": 5.5624, - "step": 13973 - }, - { - "epoch": 7.28761408083442, - "grad_norm": 1.4305994510650635, - "learning_rate": 8.646231155778894e-05, - "loss": 5.9424, - "step": 13974 - }, - { - "epoch": 7.288135593220339, - "grad_norm": 1.419061541557312, - "learning_rate": 8.646130653266332e-05, - "loss": 5.9904, - "step": 13975 - }, - { - "epoch": 7.288657105606259, - "grad_norm": 1.5180294513702393, - "learning_rate": 8.646030150753768e-05, - "loss": 5.2705, - "step": 13976 - }, - { - "epoch": 7.289178617992177, - "grad_norm": 1.5428037643432617, - "learning_rate": 8.645929648241206e-05, - "loss": 5.787, - "step": 13977 - }, - { - "epoch": 7.289700130378097, - "grad_norm": 1.4225468635559082, - "learning_rate": 8.645829145728644e-05, - "loss": 5.4238, - "step": 13978 - }, - { - "epoch": 7.290221642764015, - "grad_norm": 1.3030236959457397, - "learning_rate": 8.645728643216082e-05, - "loss": 5.4661, - "step": 13979 - }, - { - "epoch": 7.290743155149935, - "grad_norm": 1.369858980178833, - "learning_rate": 8.645628140703518e-05, - "loss": 5.5244, - "step": 13980 - }, - { - "epoch": 7.291264667535854, - "grad_norm": 1.536669135093689, - "learning_rate": 8.645527638190956e-05, - "loss": 5.4945, - "step": 13981 - }, - { - "epoch": 7.291786179921774, - "grad_norm": 1.4402844905853271, - "learning_rate": 8.645427135678392e-05, - "loss": 5.644, - "step": 13982 - }, - { - "epoch": 7.292307692307692, - "grad_norm": 1.3958327770233154, - "learning_rate": 8.64532663316583e-05, - "loss": 5.6003, - "step": 13983 - }, - { - "epoch": 7.292829204693612, - "grad_norm": 1.4838604927062988, - "learning_rate": 8.645226130653266e-05, - "loss": 5.3198, - "step": 13984 - }, - { - "epoch": 7.29335071707953, - "grad_norm": 1.375259518623352, - "learning_rate": 8.645125628140704e-05, - "loss": 5.855, - "step": 13985 - }, - { - "epoch": 7.29387222946545, - "grad_norm": 1.4475239515304565, - "learning_rate": 8.64502512562814e-05, - "loss": 5.4398, - "step": 13986 - }, - { - "epoch": 7.294393741851369, - "grad_norm": 1.4006599187850952, - "learning_rate": 8.644924623115578e-05, - "loss": 5.9076, - "step": 13987 - }, - { - "epoch": 7.294915254237289, - "grad_norm": 1.486405849456787, - "learning_rate": 8.644824120603016e-05, - "loss": 5.5671, - "step": 13988 - }, - { - "epoch": 7.295436766623207, - "grad_norm": 1.4042093753814697, - "learning_rate": 8.644723618090453e-05, - "loss": 5.7671, - "step": 13989 - }, - { - "epoch": 7.295958279009127, - "grad_norm": 1.550836443901062, - "learning_rate": 8.64462311557789e-05, - "loss": 5.1654, - "step": 13990 - }, - { - "epoch": 7.296479791395045, - "grad_norm": 1.4479562044143677, - "learning_rate": 8.644522613065327e-05, - "loss": 5.5512, - "step": 13991 - }, - { - "epoch": 7.297001303780965, - "grad_norm": 1.47080659866333, - "learning_rate": 8.644422110552765e-05, - "loss": 5.7425, - "step": 13992 - }, - { - "epoch": 7.297522816166884, - "grad_norm": 1.6019738912582397, - "learning_rate": 8.644321608040201e-05, - "loss": 5.4217, - "step": 13993 - }, - { - "epoch": 7.298044328552803, - "grad_norm": 1.4677263498306274, - "learning_rate": 8.644221105527639e-05, - "loss": 5.3774, - "step": 13994 - }, - { - "epoch": 7.298565840938722, - "grad_norm": 1.6464918851852417, - "learning_rate": 8.644120603015075e-05, - "loss": 5.3443, - "step": 13995 - }, - { - "epoch": 7.299087353324642, - "grad_norm": 1.5800527334213257, - "learning_rate": 8.644020100502513e-05, - "loss": 5.2024, - "step": 13996 - }, - { - "epoch": 7.29960886571056, - "grad_norm": 1.4476515054702759, - "learning_rate": 8.64391959798995e-05, - "loss": 5.7206, - "step": 13997 - }, - { - "epoch": 7.30013037809648, - "grad_norm": 1.4453465938568115, - "learning_rate": 8.643819095477387e-05, - "loss": 5.7064, - "step": 13998 - }, - { - "epoch": 7.300651890482399, - "grad_norm": 1.5834791660308838, - "learning_rate": 8.643718592964825e-05, - "loss": 5.1279, - "step": 13999 - }, - { - "epoch": 7.301173402868318, - "grad_norm": 1.8103598356246948, - "learning_rate": 8.643618090452261e-05, - "loss": 5.2943, - "step": 14000 - }, - { - "epoch": 7.301694915254237, - "grad_norm": 1.6738951206207275, - "learning_rate": 8.643517587939699e-05, - "loss": 5.5403, - "step": 14001 - }, - { - "epoch": 7.302216427640157, - "grad_norm": 1.5249449014663696, - "learning_rate": 8.643417085427136e-05, - "loss": 5.3345, - "step": 14002 - }, - { - "epoch": 7.302737940026075, - "grad_norm": 1.4163775444030762, - "learning_rate": 8.643316582914573e-05, - "loss": 5.8385, - "step": 14003 - }, - { - "epoch": 7.303259452411995, - "grad_norm": 1.4143809080123901, - "learning_rate": 8.64321608040201e-05, - "loss": 5.4805, - "step": 14004 - }, - { - "epoch": 7.303780964797914, - "grad_norm": 1.3905314207077026, - "learning_rate": 8.643115577889448e-05, - "loss": 5.5365, - "step": 14005 - }, - { - "epoch": 7.304302477183833, - "grad_norm": 1.403387427330017, - "learning_rate": 8.643015075376884e-05, - "loss": 5.9316, - "step": 14006 - }, - { - "epoch": 7.304823989569752, - "grad_norm": 1.5198328495025635, - "learning_rate": 8.642914572864322e-05, - "loss": 5.6235, - "step": 14007 - }, - { - "epoch": 7.305345501955672, - "grad_norm": 1.6171300411224365, - "learning_rate": 8.64281407035176e-05, - "loss": 5.3242, - "step": 14008 - }, - { - "epoch": 7.30586701434159, - "grad_norm": 1.378237009048462, - "learning_rate": 8.642713567839197e-05, - "loss": 5.6539, - "step": 14009 - }, - { - "epoch": 7.30638852672751, - "grad_norm": 1.3996731042861938, - "learning_rate": 8.642613065326634e-05, - "loss": 5.6495, - "step": 14010 - }, - { - "epoch": 7.306910039113429, - "grad_norm": 1.5628018379211426, - "learning_rate": 8.642512562814072e-05, - "loss": 5.5354, - "step": 14011 - }, - { - "epoch": 7.307431551499348, - "grad_norm": 1.536037564277649, - "learning_rate": 8.642412060301508e-05, - "loss": 5.3698, - "step": 14012 - }, - { - "epoch": 7.307953063885267, - "grad_norm": 1.4203882217407227, - "learning_rate": 8.642311557788944e-05, - "loss": 5.5838, - "step": 14013 - }, - { - "epoch": 7.308474576271187, - "grad_norm": 1.611579179763794, - "learning_rate": 8.642211055276382e-05, - "loss": 5.2257, - "step": 14014 - }, - { - "epoch": 7.308996088657105, - "grad_norm": 1.7825345993041992, - "learning_rate": 8.642110552763819e-05, - "loss": 5.3274, - "step": 14015 - }, - { - "epoch": 7.309517601043025, - "grad_norm": 1.5274690389633179, - "learning_rate": 8.642010050251256e-05, - "loss": 5.5968, - "step": 14016 - }, - { - "epoch": 7.310039113428944, - "grad_norm": 1.407712697982788, - "learning_rate": 8.641909547738693e-05, - "loss": 5.7945, - "step": 14017 - }, - { - "epoch": 7.310560625814863, - "grad_norm": 1.454009771347046, - "learning_rate": 8.641809045226131e-05, - "loss": 5.3691, - "step": 14018 - }, - { - "epoch": 7.311082138200782, - "grad_norm": 1.4856528043746948, - "learning_rate": 8.641708542713568e-05, - "loss": 5.0777, - "step": 14019 - }, - { - "epoch": 7.311603650586702, - "grad_norm": 1.525679111480713, - "learning_rate": 8.641608040201006e-05, - "loss": 4.8728, - "step": 14020 - }, - { - "epoch": 7.31212516297262, - "grad_norm": 1.3854422569274902, - "learning_rate": 8.641507537688443e-05, - "loss": 5.5315, - "step": 14021 - }, - { - "epoch": 7.31264667535854, - "grad_norm": 1.365897536277771, - "learning_rate": 8.64140703517588e-05, - "loss": 5.3863, - "step": 14022 - }, - { - "epoch": 7.313168187744459, - "grad_norm": 1.3100353479385376, - "learning_rate": 8.641306532663317e-05, - "loss": 5.5204, - "step": 14023 - }, - { - "epoch": 7.313689700130378, - "grad_norm": 1.429561972618103, - "learning_rate": 8.641206030150755e-05, - "loss": 5.7937, - "step": 14024 - }, - { - "epoch": 7.314211212516297, - "grad_norm": 1.5019487142562866, - "learning_rate": 8.641105527638191e-05, - "loss": 5.6593, - "step": 14025 - }, - { - "epoch": 7.314732724902217, - "grad_norm": 1.2921549081802368, - "learning_rate": 8.641005025125628e-05, - "loss": 5.7422, - "step": 14026 - }, - { - "epoch": 7.315254237288135, - "grad_norm": 1.4780654907226562, - "learning_rate": 8.640904522613065e-05, - "loss": 5.6734, - "step": 14027 - }, - { - "epoch": 7.315775749674055, - "grad_norm": 1.5460532903671265, - "learning_rate": 8.640804020100503e-05, - "loss": 5.18, - "step": 14028 - }, - { - "epoch": 7.316297262059974, - "grad_norm": 1.673261284828186, - "learning_rate": 8.640703517587941e-05, - "loss": 5.6863, - "step": 14029 - }, - { - "epoch": 7.316818774445893, - "grad_norm": 1.5857906341552734, - "learning_rate": 8.640603015075377e-05, - "loss": 5.2165, - "step": 14030 - }, - { - "epoch": 7.317340286831812, - "grad_norm": 1.2880761623382568, - "learning_rate": 8.640502512562815e-05, - "loss": 6.1114, - "step": 14031 - }, - { - "epoch": 7.317861799217732, - "grad_norm": 1.600763201713562, - "learning_rate": 8.640402010050252e-05, - "loss": 4.9352, - "step": 14032 - }, - { - "epoch": 7.31838331160365, - "grad_norm": 1.4753471612930298, - "learning_rate": 8.640301507537689e-05, - "loss": 5.224, - "step": 14033 - }, - { - "epoch": 7.31890482398957, - "grad_norm": 1.3306764364242554, - "learning_rate": 8.640201005025126e-05, - "loss": 5.7428, - "step": 14034 - }, - { - "epoch": 7.319426336375489, - "grad_norm": 1.3086230754852295, - "learning_rate": 8.640100502512564e-05, - "loss": 5.7517, - "step": 14035 - }, - { - "epoch": 7.319947848761408, - "grad_norm": 1.421722650527954, - "learning_rate": 8.64e-05, - "loss": 5.7174, - "step": 14036 - }, - { - "epoch": 7.320469361147327, - "grad_norm": 1.365938663482666, - "learning_rate": 8.639899497487438e-05, - "loss": 5.7607, - "step": 14037 - }, - { - "epoch": 7.320990873533247, - "grad_norm": 1.5328855514526367, - "learning_rate": 8.639798994974874e-05, - "loss": 5.2778, - "step": 14038 - }, - { - "epoch": 7.321512385919165, - "grad_norm": 1.4467675685882568, - "learning_rate": 8.639698492462312e-05, - "loss": 5.6991, - "step": 14039 - }, - { - "epoch": 7.322033898305085, - "grad_norm": 1.4114993810653687, - "learning_rate": 8.63959798994975e-05, - "loss": 5.5077, - "step": 14040 - }, - { - "epoch": 7.322555410691004, - "grad_norm": 1.5762125253677368, - "learning_rate": 8.639497487437186e-05, - "loss": 5.3227, - "step": 14041 - }, - { - "epoch": 7.323076923076923, - "grad_norm": 1.4283225536346436, - "learning_rate": 8.639396984924624e-05, - "loss": 5.7728, - "step": 14042 - }, - { - "epoch": 7.323598435462842, - "grad_norm": 1.5265038013458252, - "learning_rate": 8.63929648241206e-05, - "loss": 5.4598, - "step": 14043 - }, - { - "epoch": 7.324119947848762, - "grad_norm": 1.6257599592208862, - "learning_rate": 8.639195979899498e-05, - "loss": 5.2035, - "step": 14044 - }, - { - "epoch": 7.32464146023468, - "grad_norm": 1.5463647842407227, - "learning_rate": 8.639095477386935e-05, - "loss": 5.4449, - "step": 14045 - }, - { - "epoch": 7.3251629726206, - "grad_norm": 1.5769624710083008, - "learning_rate": 8.638994974874372e-05, - "loss": 4.8303, - "step": 14046 - }, - { - "epoch": 7.325684485006519, - "grad_norm": 1.517297387123108, - "learning_rate": 8.638894472361809e-05, - "loss": 5.5122, - "step": 14047 - }, - { - "epoch": 7.326205997392438, - "grad_norm": 1.4957104921340942, - "learning_rate": 8.638793969849247e-05, - "loss": 5.418, - "step": 14048 - }, - { - "epoch": 7.326727509778357, - "grad_norm": 1.4642125368118286, - "learning_rate": 8.638693467336684e-05, - "loss": 5.5927, - "step": 14049 - }, - { - "epoch": 7.327249022164277, - "grad_norm": 1.4118324518203735, - "learning_rate": 8.638592964824122e-05, - "loss": 5.8966, - "step": 14050 - }, - { - "epoch": 7.327770534550195, - "grad_norm": 1.5057858228683472, - "learning_rate": 8.638492462311559e-05, - "loss": 4.9099, - "step": 14051 - }, - { - "epoch": 7.328292046936115, - "grad_norm": 1.4634974002838135, - "learning_rate": 8.638391959798995e-05, - "loss": 5.7176, - "step": 14052 - }, - { - "epoch": 7.328813559322034, - "grad_norm": 1.3176823854446411, - "learning_rate": 8.638291457286433e-05, - "loss": 5.8532, - "step": 14053 - }, - { - "epoch": 7.329335071707953, - "grad_norm": 1.4588236808776855, - "learning_rate": 8.638190954773869e-05, - "loss": 4.7157, - "step": 14054 - }, - { - "epoch": 7.329856584093872, - "grad_norm": 1.3887196779251099, - "learning_rate": 8.638090452261307e-05, - "loss": 5.862, - "step": 14055 - }, - { - "epoch": 7.330378096479792, - "grad_norm": 1.4252700805664062, - "learning_rate": 8.637989949748743e-05, - "loss": 5.6735, - "step": 14056 - }, - { - "epoch": 7.33089960886571, - "grad_norm": 1.5182180404663086, - "learning_rate": 8.637889447236181e-05, - "loss": 5.7014, - "step": 14057 - }, - { - "epoch": 7.33142112125163, - "grad_norm": 1.3444551229476929, - "learning_rate": 8.637788944723618e-05, - "loss": 5.4334, - "step": 14058 - }, - { - "epoch": 7.331942633637549, - "grad_norm": 1.405275583267212, - "learning_rate": 8.637688442211055e-05, - "loss": 5.6922, - "step": 14059 - }, - { - "epoch": 7.332464146023468, - "grad_norm": 1.2852128744125366, - "learning_rate": 8.637587939698493e-05, - "loss": 6.1625, - "step": 14060 - }, - { - "epoch": 7.332985658409387, - "grad_norm": 1.7548332214355469, - "learning_rate": 8.637487437185931e-05, - "loss": 4.9837, - "step": 14061 - }, - { - "epoch": 7.333507170795307, - "grad_norm": 1.4273828268051147, - "learning_rate": 8.637386934673367e-05, - "loss": 5.5702, - "step": 14062 - }, - { - "epoch": 7.334028683181225, - "grad_norm": 1.4239511489868164, - "learning_rate": 8.637286432160805e-05, - "loss": 5.7775, - "step": 14063 - }, - { - "epoch": 7.334550195567145, - "grad_norm": 1.5540846586227417, - "learning_rate": 8.637185929648242e-05, - "loss": 5.0651, - "step": 14064 - }, - { - "epoch": 7.335071707953064, - "grad_norm": 1.4383010864257812, - "learning_rate": 8.637085427135678e-05, - "loss": 4.9367, - "step": 14065 - }, - { - "epoch": 7.335593220338983, - "grad_norm": 1.4802595376968384, - "learning_rate": 8.636984924623116e-05, - "loss": 5.4442, - "step": 14066 - }, - { - "epoch": 7.336114732724902, - "grad_norm": 1.548940658569336, - "learning_rate": 8.636884422110552e-05, - "loss": 5.2126, - "step": 14067 - }, - { - "epoch": 7.336636245110822, - "grad_norm": 1.4102771282196045, - "learning_rate": 8.63678391959799e-05, - "loss": 5.2937, - "step": 14068 - }, - { - "epoch": 7.33715775749674, - "grad_norm": 1.4836252927780151, - "learning_rate": 8.636683417085428e-05, - "loss": 5.4667, - "step": 14069 - }, - { - "epoch": 7.33767926988266, - "grad_norm": 1.7689772844314575, - "learning_rate": 8.636582914572866e-05, - "loss": 4.8048, - "step": 14070 - }, - { - "epoch": 7.338200782268579, - "grad_norm": 1.3359594345092773, - "learning_rate": 8.636482412060302e-05, - "loss": 5.8243, - "step": 14071 - }, - { - "epoch": 7.338722294654498, - "grad_norm": 1.3384689092636108, - "learning_rate": 8.63638190954774e-05, - "loss": 5.7357, - "step": 14072 - }, - { - "epoch": 7.339243807040417, - "grad_norm": 1.5541619062423706, - "learning_rate": 8.636281407035176e-05, - "loss": 5.0173, - "step": 14073 - }, - { - "epoch": 7.339765319426336, - "grad_norm": 1.5428541898727417, - "learning_rate": 8.636180904522614e-05, - "loss": 5.0967, - "step": 14074 - }, - { - "epoch": 7.340286831812255, - "grad_norm": 1.3656858205795288, - "learning_rate": 8.63608040201005e-05, - "loss": 5.8534, - "step": 14075 - }, - { - "epoch": 7.340808344198175, - "grad_norm": 1.4813761711120605, - "learning_rate": 8.635979899497488e-05, - "loss": 5.2809, - "step": 14076 - }, - { - "epoch": 7.341329856584094, - "grad_norm": 1.468874454498291, - "learning_rate": 8.635879396984925e-05, - "loss": 5.1922, - "step": 14077 - }, - { - "epoch": 7.341851368970013, - "grad_norm": 1.5164567232131958, - "learning_rate": 8.635778894472362e-05, - "loss": 5.3245, - "step": 14078 - }, - { - "epoch": 7.342372881355932, - "grad_norm": 1.3253073692321777, - "learning_rate": 8.635678391959799e-05, - "loss": 5.5998, - "step": 14079 - }, - { - "epoch": 7.342894393741851, - "grad_norm": 1.5097453594207764, - "learning_rate": 8.635577889447237e-05, - "loss": 4.9828, - "step": 14080 - }, - { - "epoch": 7.34341590612777, - "grad_norm": 1.4521331787109375, - "learning_rate": 8.635477386934674e-05, - "loss": 5.0385, - "step": 14081 - }, - { - "epoch": 7.34393741851369, - "grad_norm": 1.426523208618164, - "learning_rate": 8.635376884422111e-05, - "loss": 5.4786, - "step": 14082 - }, - { - "epoch": 7.344458930899609, - "grad_norm": 1.3585010766983032, - "learning_rate": 8.635276381909549e-05, - "loss": 5.3661, - "step": 14083 - }, - { - "epoch": 7.344980443285528, - "grad_norm": 1.3712060451507568, - "learning_rate": 8.635175879396985e-05, - "loss": 5.5181, - "step": 14084 - }, - { - "epoch": 7.345501955671447, - "grad_norm": 1.387413501739502, - "learning_rate": 8.635075376884423e-05, - "loss": 5.4512, - "step": 14085 - }, - { - "epoch": 7.346023468057366, - "grad_norm": 1.4190168380737305, - "learning_rate": 8.634974874371859e-05, - "loss": 5.65, - "step": 14086 - }, - { - "epoch": 7.346544980443285, - "grad_norm": 1.4257169961929321, - "learning_rate": 8.634874371859297e-05, - "loss": 5.2987, - "step": 14087 - }, - { - "epoch": 7.347066492829205, - "grad_norm": 1.4336555004119873, - "learning_rate": 8.634773869346733e-05, - "loss": 5.6053, - "step": 14088 - }, - { - "epoch": 7.3475880052151235, - "grad_norm": 1.525833010673523, - "learning_rate": 8.634673366834171e-05, - "loss": 5.0805, - "step": 14089 - }, - { - "epoch": 7.348109517601043, - "grad_norm": 1.6121132373809814, - "learning_rate": 8.634572864321609e-05, - "loss": 5.6931, - "step": 14090 - }, - { - "epoch": 7.348631029986962, - "grad_norm": 1.5063132047653198, - "learning_rate": 8.634472361809047e-05, - "loss": 5.856, - "step": 14091 - }, - { - "epoch": 7.349152542372881, - "grad_norm": 1.3940805196762085, - "learning_rate": 8.634371859296483e-05, - "loss": 5.6085, - "step": 14092 - }, - { - "epoch": 7.3496740547588, - "grad_norm": 1.4855287075042725, - "learning_rate": 8.63427135678392e-05, - "loss": 5.5625, - "step": 14093 - }, - { - "epoch": 7.35019556714472, - "grad_norm": 1.4418648481369019, - "learning_rate": 8.634170854271357e-05, - "loss": 6.0882, - "step": 14094 - }, - { - "epoch": 7.3507170795306385, - "grad_norm": 1.4847739934921265, - "learning_rate": 8.634070351758794e-05, - "loss": 5.6298, - "step": 14095 - }, - { - "epoch": 7.351238591916558, - "grad_norm": 1.4054909944534302, - "learning_rate": 8.633969849246232e-05, - "loss": 5.494, - "step": 14096 - }, - { - "epoch": 7.351760104302477, - "grad_norm": 1.4102665185928345, - "learning_rate": 8.633869346733668e-05, - "loss": 5.842, - "step": 14097 - }, - { - "epoch": 7.352281616688396, - "grad_norm": 1.5243723392486572, - "learning_rate": 8.633768844221106e-05, - "loss": 5.2948, - "step": 14098 - }, - { - "epoch": 7.352803129074315, - "grad_norm": 1.3316948413848877, - "learning_rate": 8.633668341708542e-05, - "loss": 5.5262, - "step": 14099 - }, - { - "epoch": 7.353324641460235, - "grad_norm": 1.4595082998275757, - "learning_rate": 8.63356783919598e-05, - "loss": 5.8955, - "step": 14100 - }, - { - "epoch": 7.3538461538461535, - "grad_norm": 1.4924750328063965, - "learning_rate": 8.633467336683418e-05, - "loss": 5.5194, - "step": 14101 - }, - { - "epoch": 7.354367666232073, - "grad_norm": 1.5112718343734741, - "learning_rate": 8.633366834170856e-05, - "loss": 5.3683, - "step": 14102 - }, - { - "epoch": 7.354889178617992, - "grad_norm": 1.3881654739379883, - "learning_rate": 8.633266331658292e-05, - "loss": 5.3937, - "step": 14103 - }, - { - "epoch": 7.355410691003911, - "grad_norm": 1.3978618383407593, - "learning_rate": 8.63316582914573e-05, - "loss": 5.617, - "step": 14104 - }, - { - "epoch": 7.3559322033898304, - "grad_norm": 1.425763726234436, - "learning_rate": 8.633065326633166e-05, - "loss": 5.7066, - "step": 14105 - }, - { - "epoch": 7.35645371577575, - "grad_norm": 1.5361095666885376, - "learning_rate": 8.632964824120603e-05, - "loss": 5.6143, - "step": 14106 - }, - { - "epoch": 7.3569752281616685, - "grad_norm": 1.532286286354065, - "learning_rate": 8.63286432160804e-05, - "loss": 5.5893, - "step": 14107 - }, - { - "epoch": 7.357496740547588, - "grad_norm": 1.708723545074463, - "learning_rate": 8.632763819095477e-05, - "loss": 4.7609, - "step": 14108 - }, - { - "epoch": 7.358018252933507, - "grad_norm": 1.4268895387649536, - "learning_rate": 8.632663316582915e-05, - "loss": 5.6622, - "step": 14109 - }, - { - "epoch": 7.358539765319426, - "grad_norm": 1.3774293661117554, - "learning_rate": 8.632562814070353e-05, - "loss": 5.8767, - "step": 14110 - }, - { - "epoch": 7.3590612777053455, - "grad_norm": 1.5874992609024048, - "learning_rate": 8.63246231155779e-05, - "loss": 5.2593, - "step": 14111 - }, - { - "epoch": 7.359582790091265, - "grad_norm": 1.436953067779541, - "learning_rate": 8.632361809045227e-05, - "loss": 5.6697, - "step": 14112 - }, - { - "epoch": 7.3601043024771835, - "grad_norm": 1.5072623491287231, - "learning_rate": 8.632261306532664e-05, - "loss": 5.527, - "step": 14113 - }, - { - "epoch": 7.360625814863103, - "grad_norm": 1.3641146421432495, - "learning_rate": 8.632160804020101e-05, - "loss": 5.9778, - "step": 14114 - }, - { - "epoch": 7.361147327249022, - "grad_norm": 1.4975948333740234, - "learning_rate": 8.632060301507539e-05, - "loss": 5.4439, - "step": 14115 - }, - { - "epoch": 7.361668839634941, - "grad_norm": 1.3952375650405884, - "learning_rate": 8.631959798994975e-05, - "loss": 5.9967, - "step": 14116 - }, - { - "epoch": 7.3621903520208605, - "grad_norm": 1.4663046598434448, - "learning_rate": 8.631859296482413e-05, - "loss": 5.404, - "step": 14117 - }, - { - "epoch": 7.36271186440678, - "grad_norm": 1.351913332939148, - "learning_rate": 8.63175879396985e-05, - "loss": 6.0953, - "step": 14118 - }, - { - "epoch": 7.3632333767926985, - "grad_norm": 1.3791661262512207, - "learning_rate": 8.631658291457286e-05, - "loss": 5.8144, - "step": 14119 - }, - { - "epoch": 7.363754889178618, - "grad_norm": 1.4670295715332031, - "learning_rate": 8.631557788944724e-05, - "loss": 5.8817, - "step": 14120 - }, - { - "epoch": 7.3642764015645374, - "grad_norm": 1.4128338098526, - "learning_rate": 8.631457286432161e-05, - "loss": 5.7477, - "step": 14121 - }, - { - "epoch": 7.364797913950456, - "grad_norm": 1.478087306022644, - "learning_rate": 8.631356783919599e-05, - "loss": 5.4937, - "step": 14122 - }, - { - "epoch": 7.3653194263363755, - "grad_norm": 1.5136597156524658, - "learning_rate": 8.631256281407036e-05, - "loss": 5.5861, - "step": 14123 - }, - { - "epoch": 7.365840938722295, - "grad_norm": 1.5503915548324585, - "learning_rate": 8.631155778894473e-05, - "loss": 5.8446, - "step": 14124 - }, - { - "epoch": 7.3663624511082135, - "grad_norm": 1.4703823328018188, - "learning_rate": 8.63105527638191e-05, - "loss": 5.9279, - "step": 14125 - }, - { - "epoch": 7.366883963494133, - "grad_norm": 1.454862117767334, - "learning_rate": 8.630954773869348e-05, - "loss": 5.1826, - "step": 14126 - }, - { - "epoch": 7.3674054758800525, - "grad_norm": 1.3330039978027344, - "learning_rate": 8.630854271356784e-05, - "loss": 5.2879, - "step": 14127 - }, - { - "epoch": 7.367926988265971, - "grad_norm": 1.427688479423523, - "learning_rate": 8.630753768844222e-05, - "loss": 5.3737, - "step": 14128 - }, - { - "epoch": 7.3684485006518905, - "grad_norm": 1.67949640750885, - "learning_rate": 8.630653266331658e-05, - "loss": 4.4529, - "step": 14129 - }, - { - "epoch": 7.36897001303781, - "grad_norm": 1.4954167604446411, - "learning_rate": 8.630552763819096e-05, - "loss": 5.6532, - "step": 14130 - }, - { - "epoch": 7.3694915254237285, - "grad_norm": 1.5530565977096558, - "learning_rate": 8.630452261306532e-05, - "loss": 5.8876, - "step": 14131 - }, - { - "epoch": 7.370013037809648, - "grad_norm": 1.4944267272949219, - "learning_rate": 8.63035175879397e-05, - "loss": 5.806, - "step": 14132 - }, - { - "epoch": 7.3705345501955675, - "grad_norm": 1.4697961807250977, - "learning_rate": 8.630251256281408e-05, - "loss": 5.4654, - "step": 14133 - }, - { - "epoch": 7.371056062581486, - "grad_norm": 1.4897717237472534, - "learning_rate": 8.630150753768844e-05, - "loss": 5.5045, - "step": 14134 - }, - { - "epoch": 7.3715775749674055, - "grad_norm": 1.4835759401321411, - "learning_rate": 8.630050251256282e-05, - "loss": 5.4403, - "step": 14135 - }, - { - "epoch": 7.372099087353325, - "grad_norm": 1.3518813848495483, - "learning_rate": 8.629949748743719e-05, - "loss": 5.8997, - "step": 14136 - }, - { - "epoch": 7.3726205997392436, - "grad_norm": 2.0226664543151855, - "learning_rate": 8.629849246231156e-05, - "loss": 4.6146, - "step": 14137 - }, - { - "epoch": 7.373142112125163, - "grad_norm": 1.4120374917984009, - "learning_rate": 8.629748743718593e-05, - "loss": 5.9279, - "step": 14138 - }, - { - "epoch": 7.3736636245110825, - "grad_norm": 1.7325031757354736, - "learning_rate": 8.62964824120603e-05, - "loss": 5.1797, - "step": 14139 - }, - { - "epoch": 7.374185136897001, - "grad_norm": 1.496110200881958, - "learning_rate": 8.629547738693467e-05, - "loss": 5.7291, - "step": 14140 - }, - { - "epoch": 7.3747066492829205, - "grad_norm": 1.2764310836791992, - "learning_rate": 8.629447236180905e-05, - "loss": 6.0163, - "step": 14141 - }, - { - "epoch": 7.37522816166884, - "grad_norm": 1.3636783361434937, - "learning_rate": 8.629346733668343e-05, - "loss": 5.7685, - "step": 14142 - }, - { - "epoch": 7.375749674054759, - "grad_norm": 1.4744255542755127, - "learning_rate": 8.62924623115578e-05, - "loss": 5.4804, - "step": 14143 - }, - { - "epoch": 7.376271186440678, - "grad_norm": 1.4802439212799072, - "learning_rate": 8.629145728643217e-05, - "loss": 5.6106, - "step": 14144 - }, - { - "epoch": 7.3767926988265975, - "grad_norm": 1.6065635681152344, - "learning_rate": 8.629045226130653e-05, - "loss": 5.3015, - "step": 14145 - }, - { - "epoch": 7.377314211212516, - "grad_norm": 1.4100929498672485, - "learning_rate": 8.628944723618091e-05, - "loss": 5.8161, - "step": 14146 - }, - { - "epoch": 7.3778357235984355, - "grad_norm": 1.4408624172210693, - "learning_rate": 8.628844221105527e-05, - "loss": 5.5822, - "step": 14147 - }, - { - "epoch": 7.378357235984355, - "grad_norm": 1.3833177089691162, - "learning_rate": 8.628743718592965e-05, - "loss": 5.9259, - "step": 14148 - }, - { - "epoch": 7.378878748370274, - "grad_norm": 1.8256405591964722, - "learning_rate": 8.628643216080402e-05, - "loss": 4.9419, - "step": 14149 - }, - { - "epoch": 7.379400260756193, - "grad_norm": 1.5105611085891724, - "learning_rate": 8.62854271356784e-05, - "loss": 5.689, - "step": 14150 - }, - { - "epoch": 7.3799217731421125, - "grad_norm": 1.714981198310852, - "learning_rate": 8.628442211055276e-05, - "loss": 4.9384, - "step": 14151 - }, - { - "epoch": 7.380443285528031, - "grad_norm": 1.4923653602600098, - "learning_rate": 8.628341708542714e-05, - "loss": 5.4446, - "step": 14152 - }, - { - "epoch": 7.3809647979139505, - "grad_norm": 1.4777165651321411, - "learning_rate": 8.628241206030151e-05, - "loss": 5.914, - "step": 14153 - }, - { - "epoch": 7.38148631029987, - "grad_norm": 1.5271995067596436, - "learning_rate": 8.628140703517589e-05, - "loss": 5.3885, - "step": 14154 - }, - { - "epoch": 7.382007822685789, - "grad_norm": 1.8909293413162231, - "learning_rate": 8.628040201005026e-05, - "loss": 4.7539, - "step": 14155 - }, - { - "epoch": 7.382529335071708, - "grad_norm": 1.5430923700332642, - "learning_rate": 8.627939698492463e-05, - "loss": 4.9579, - "step": 14156 - }, - { - "epoch": 7.3830508474576275, - "grad_norm": 1.4381492137908936, - "learning_rate": 8.6278391959799e-05, - "loss": 5.648, - "step": 14157 - }, - { - "epoch": 7.383572359843546, - "grad_norm": 1.5892207622528076, - "learning_rate": 8.627738693467336e-05, - "loss": 5.3703, - "step": 14158 - }, - { - "epoch": 7.384093872229466, - "grad_norm": 1.285823106765747, - "learning_rate": 8.627638190954774e-05, - "loss": 5.3019, - "step": 14159 - }, - { - "epoch": 7.384615384615385, - "grad_norm": 1.4033992290496826, - "learning_rate": 8.62753768844221e-05, - "loss": 5.8053, - "step": 14160 - }, - { - "epoch": 7.385136897001304, - "grad_norm": 1.4279005527496338, - "learning_rate": 8.627437185929648e-05, - "loss": 5.8314, - "step": 14161 - }, - { - "epoch": 7.385658409387223, - "grad_norm": 1.3596521615982056, - "learning_rate": 8.627336683417086e-05, - "loss": 5.8954, - "step": 14162 - }, - { - "epoch": 7.386179921773142, - "grad_norm": 1.4434218406677246, - "learning_rate": 8.627236180904524e-05, - "loss": 5.2857, - "step": 14163 - }, - { - "epoch": 7.386701434159061, - "grad_norm": 1.4004137516021729, - "learning_rate": 8.62713567839196e-05, - "loss": 5.6606, - "step": 14164 - }, - { - "epoch": 7.387222946544981, - "grad_norm": 1.4420850276947021, - "learning_rate": 8.627035175879398e-05, - "loss": 5.6736, - "step": 14165 - }, - { - "epoch": 7.3877444589309, - "grad_norm": 1.382262110710144, - "learning_rate": 8.626934673366834e-05, - "loss": 5.5453, - "step": 14166 - }, - { - "epoch": 7.388265971316819, - "grad_norm": 1.3904736042022705, - "learning_rate": 8.626834170854272e-05, - "loss": 5.9901, - "step": 14167 - }, - { - "epoch": 7.388787483702738, - "grad_norm": 1.4048441648483276, - "learning_rate": 8.626733668341709e-05, - "loss": 5.4459, - "step": 14168 - }, - { - "epoch": 7.389308996088657, - "grad_norm": 1.4792418479919434, - "learning_rate": 8.626633165829146e-05, - "loss": 5.8025, - "step": 14169 - }, - { - "epoch": 7.389830508474576, - "grad_norm": 1.386809229850769, - "learning_rate": 8.626532663316583e-05, - "loss": 5.4546, - "step": 14170 - }, - { - "epoch": 7.390352020860496, - "grad_norm": 1.4145687818527222, - "learning_rate": 8.62643216080402e-05, - "loss": 5.4652, - "step": 14171 - }, - { - "epoch": 7.390873533246415, - "grad_norm": 1.4539874792099, - "learning_rate": 8.626331658291457e-05, - "loss": 5.6454, - "step": 14172 - }, - { - "epoch": 7.391395045632334, - "grad_norm": 1.400189995765686, - "learning_rate": 8.626231155778895e-05, - "loss": 5.6029, - "step": 14173 - }, - { - "epoch": 7.391916558018253, - "grad_norm": 1.529093861579895, - "learning_rate": 8.626130653266333e-05, - "loss": 5.8987, - "step": 14174 - }, - { - "epoch": 7.392438070404172, - "grad_norm": 1.6764836311340332, - "learning_rate": 8.626030150753769e-05, - "loss": 5.0425, - "step": 14175 - }, - { - "epoch": 7.392959582790091, - "grad_norm": 1.3173460960388184, - "learning_rate": 8.625929648241207e-05, - "loss": 5.6325, - "step": 14176 - }, - { - "epoch": 7.393481095176011, - "grad_norm": 1.4846165180206299, - "learning_rate": 8.625829145728643e-05, - "loss": 5.4129, - "step": 14177 - }, - { - "epoch": 7.39400260756193, - "grad_norm": 1.4071072340011597, - "learning_rate": 8.625728643216081e-05, - "loss": 5.5546, - "step": 14178 - }, - { - "epoch": 7.394524119947849, - "grad_norm": 1.435632348060608, - "learning_rate": 8.625628140703518e-05, - "loss": 5.7503, - "step": 14179 - }, - { - "epoch": 7.395045632333768, - "grad_norm": 1.408699631690979, - "learning_rate": 8.625527638190955e-05, - "loss": 5.6445, - "step": 14180 - }, - { - "epoch": 7.395567144719687, - "grad_norm": 1.5238648653030396, - "learning_rate": 8.625427135678392e-05, - "loss": 4.9994, - "step": 14181 - }, - { - "epoch": 7.396088657105606, - "grad_norm": 1.5956357717514038, - "learning_rate": 8.62532663316583e-05, - "loss": 5.4867, - "step": 14182 - }, - { - "epoch": 7.396610169491526, - "grad_norm": 1.3702948093414307, - "learning_rate": 8.625226130653267e-05, - "loss": 5.6506, - "step": 14183 - }, - { - "epoch": 7.397131681877444, - "grad_norm": 1.5504330396652222, - "learning_rate": 8.625125628140705e-05, - "loss": 5.5675, - "step": 14184 - }, - { - "epoch": 7.397653194263364, - "grad_norm": 1.352691888809204, - "learning_rate": 8.625025125628141e-05, - "loss": 5.9027, - "step": 14185 - }, - { - "epoch": 7.398174706649283, - "grad_norm": 1.3896793127059937, - "learning_rate": 8.624924623115578e-05, - "loss": 5.4436, - "step": 14186 - }, - { - "epoch": 7.398696219035202, - "grad_norm": 1.3449150323867798, - "learning_rate": 8.624824120603016e-05, - "loss": 5.4449, - "step": 14187 - }, - { - "epoch": 7.399217731421121, - "grad_norm": 1.524226188659668, - "learning_rate": 8.624723618090452e-05, - "loss": 4.974, - "step": 14188 - }, - { - "epoch": 7.399739243807041, - "grad_norm": 1.511344313621521, - "learning_rate": 8.62462311557789e-05, - "loss": 5.4156, - "step": 14189 - }, - { - "epoch": 7.400260756192959, - "grad_norm": 1.4743949174880981, - "learning_rate": 8.624522613065326e-05, - "loss": 5.1156, - "step": 14190 - }, - { - "epoch": 7.400782268578879, - "grad_norm": 1.6428529024124146, - "learning_rate": 8.624422110552764e-05, - "loss": 5.4457, - "step": 14191 - }, - { - "epoch": 7.401303780964798, - "grad_norm": 1.577919840812683, - "learning_rate": 8.6243216080402e-05, - "loss": 5.1141, - "step": 14192 - }, - { - "epoch": 7.401825293350717, - "grad_norm": 1.4119775295257568, - "learning_rate": 8.624221105527638e-05, - "loss": 5.5281, - "step": 14193 - }, - { - "epoch": 7.402346805736636, - "grad_norm": 1.4805152416229248, - "learning_rate": 8.624120603015076e-05, - "loss": 5.753, - "step": 14194 - }, - { - "epoch": 7.402868318122556, - "grad_norm": 1.2799268960952759, - "learning_rate": 8.624020100502514e-05, - "loss": 5.2896, - "step": 14195 - }, - { - "epoch": 7.403389830508474, - "grad_norm": 1.3589462041854858, - "learning_rate": 8.62391959798995e-05, - "loss": 5.8541, - "step": 14196 - }, - { - "epoch": 7.403911342894394, - "grad_norm": 1.2868988513946533, - "learning_rate": 8.623819095477388e-05, - "loss": 5.6222, - "step": 14197 - }, - { - "epoch": 7.404432855280313, - "grad_norm": 1.3450533151626587, - "learning_rate": 8.623718592964825e-05, - "loss": 5.5451, - "step": 14198 - }, - { - "epoch": 7.404954367666232, - "grad_norm": 1.5555152893066406, - "learning_rate": 8.623618090452261e-05, - "loss": 5.0558, - "step": 14199 - }, - { - "epoch": 7.405475880052151, - "grad_norm": 1.4277386665344238, - "learning_rate": 8.623517587939699e-05, - "loss": 5.4719, - "step": 14200 - }, - { - "epoch": 7.405997392438071, - "grad_norm": 1.464926838874817, - "learning_rate": 8.623417085427135e-05, - "loss": 5.4429, - "step": 14201 - }, - { - "epoch": 7.406518904823989, - "grad_norm": 1.5777472257614136, - "learning_rate": 8.623316582914573e-05, - "loss": 5.0734, - "step": 14202 - }, - { - "epoch": 7.407040417209909, - "grad_norm": 1.4172298908233643, - "learning_rate": 8.623216080402011e-05, - "loss": 5.7, - "step": 14203 - }, - { - "epoch": 7.407561929595828, - "grad_norm": 1.48367178440094, - "learning_rate": 8.623115577889449e-05, - "loss": 5.3557, - "step": 14204 - }, - { - "epoch": 7.408083441981747, - "grad_norm": 1.4576481580734253, - "learning_rate": 8.623015075376885e-05, - "loss": 5.5282, - "step": 14205 - }, - { - "epoch": 7.408604954367666, - "grad_norm": 1.548510193824768, - "learning_rate": 8.622914572864323e-05, - "loss": 5.2592, - "step": 14206 - }, - { - "epoch": 7.409126466753586, - "grad_norm": 1.6012077331542969, - "learning_rate": 8.622814070351759e-05, - "loss": 5.7055, - "step": 14207 - }, - { - "epoch": 7.409647979139504, - "grad_norm": 1.425754189491272, - "learning_rate": 8.622713567839197e-05, - "loss": 6.0353, - "step": 14208 - }, - { - "epoch": 7.410169491525424, - "grad_norm": 1.463584303855896, - "learning_rate": 8.622613065326633e-05, - "loss": 4.9069, - "step": 14209 - }, - { - "epoch": 7.410691003911343, - "grad_norm": 1.405301570892334, - "learning_rate": 8.622512562814071e-05, - "loss": 5.4763, - "step": 14210 - }, - { - "epoch": 7.411212516297262, - "grad_norm": 1.4896106719970703, - "learning_rate": 8.622412060301508e-05, - "loss": 5.2585, - "step": 14211 - }, - { - "epoch": 7.411734028683181, - "grad_norm": 1.3785629272460938, - "learning_rate": 8.622311557788944e-05, - "loss": 5.569, - "step": 14212 - }, - { - "epoch": 7.412255541069101, - "grad_norm": 1.3498270511627197, - "learning_rate": 8.622211055276382e-05, - "loss": 5.922, - "step": 14213 - }, - { - "epoch": 7.412777053455019, - "grad_norm": 1.3219002485275269, - "learning_rate": 8.62211055276382e-05, - "loss": 5.6531, - "step": 14214 - }, - { - "epoch": 7.413298565840939, - "grad_norm": 1.4623290300369263, - "learning_rate": 8.622010050251257e-05, - "loss": 5.4577, - "step": 14215 - }, - { - "epoch": 7.413820078226858, - "grad_norm": 1.515224814414978, - "learning_rate": 8.621909547738694e-05, - "loss": 5.167, - "step": 14216 - }, - { - "epoch": 7.414341590612777, - "grad_norm": 1.4089800119400024, - "learning_rate": 8.621809045226132e-05, - "loss": 5.3811, - "step": 14217 - }, - { - "epoch": 7.414863102998696, - "grad_norm": 1.4516761302947998, - "learning_rate": 8.621708542713568e-05, - "loss": 5.5102, - "step": 14218 - }, - { - "epoch": 7.415384615384616, - "grad_norm": 1.4150274991989136, - "learning_rate": 8.621608040201006e-05, - "loss": 5.2768, - "step": 14219 - }, - { - "epoch": 7.415906127770534, - "grad_norm": 1.3778207302093506, - "learning_rate": 8.621507537688442e-05, - "loss": 5.6867, - "step": 14220 - }, - { - "epoch": 7.416427640156454, - "grad_norm": 1.5403506755828857, - "learning_rate": 8.62140703517588e-05, - "loss": 5.6693, - "step": 14221 - }, - { - "epoch": 7.416949152542373, - "grad_norm": 1.5226300954818726, - "learning_rate": 8.621306532663316e-05, - "loss": 5.4529, - "step": 14222 - }, - { - "epoch": 7.417470664928292, - "grad_norm": 1.405263900756836, - "learning_rate": 8.621206030150754e-05, - "loss": 5.4865, - "step": 14223 - }, - { - "epoch": 7.417992177314211, - "grad_norm": 1.376161813735962, - "learning_rate": 8.621105527638192e-05, - "loss": 5.8112, - "step": 14224 - }, - { - "epoch": 7.418513689700131, - "grad_norm": 1.4538315534591675, - "learning_rate": 8.621005025125628e-05, - "loss": 5.9042, - "step": 14225 - }, - { - "epoch": 7.419035202086049, - "grad_norm": 1.4149014949798584, - "learning_rate": 8.620904522613066e-05, - "loss": 5.8692, - "step": 14226 - }, - { - "epoch": 7.419556714471969, - "grad_norm": 1.4087064266204834, - "learning_rate": 8.620804020100503e-05, - "loss": 5.8994, - "step": 14227 - }, - { - "epoch": 7.420078226857888, - "grad_norm": 1.4016448259353638, - "learning_rate": 8.62070351758794e-05, - "loss": 5.622, - "step": 14228 - }, - { - "epoch": 7.420599739243807, - "grad_norm": 1.2972010374069214, - "learning_rate": 8.620603015075377e-05, - "loss": 5.7947, - "step": 14229 - }, - { - "epoch": 7.421121251629726, - "grad_norm": 1.474238634109497, - "learning_rate": 8.620502512562815e-05, - "loss": 5.0965, - "step": 14230 - }, - { - "epoch": 7.421642764015646, - "grad_norm": 1.372192144393921, - "learning_rate": 8.620402010050251e-05, - "loss": 5.6062, - "step": 14231 - }, - { - "epoch": 7.422164276401564, - "grad_norm": 1.4769084453582764, - "learning_rate": 8.620301507537689e-05, - "loss": 5.4419, - "step": 14232 - }, - { - "epoch": 7.422685788787484, - "grad_norm": 1.513552188873291, - "learning_rate": 8.620201005025125e-05, - "loss": 5.0112, - "step": 14233 - }, - { - "epoch": 7.423207301173403, - "grad_norm": 1.4308239221572876, - "learning_rate": 8.620100502512563e-05, - "loss": 5.4621, - "step": 14234 - }, - { - "epoch": 7.423728813559322, - "grad_norm": 1.5503261089324951, - "learning_rate": 8.620000000000001e-05, - "loss": 5.3774, - "step": 14235 - }, - { - "epoch": 7.424250325945241, - "grad_norm": 1.4259485006332397, - "learning_rate": 8.619899497487439e-05, - "loss": 5.779, - "step": 14236 - }, - { - "epoch": 7.424771838331161, - "grad_norm": 1.3928489685058594, - "learning_rate": 8.619798994974875e-05, - "loss": 5.4128, - "step": 14237 - }, - { - "epoch": 7.425293350717079, - "grad_norm": 1.4417411088943481, - "learning_rate": 8.619698492462311e-05, - "loss": 5.6561, - "step": 14238 - }, - { - "epoch": 7.425814863102999, - "grad_norm": 1.4377459287643433, - "learning_rate": 8.619597989949749e-05, - "loss": 5.7665, - "step": 14239 - }, - { - "epoch": 7.426336375488918, - "grad_norm": 1.379299521446228, - "learning_rate": 8.619497487437186e-05, - "loss": 5.7633, - "step": 14240 - }, - { - "epoch": 7.426857887874837, - "grad_norm": 1.4549612998962402, - "learning_rate": 8.619396984924623e-05, - "loss": 5.7221, - "step": 14241 - }, - { - "epoch": 7.427379400260756, - "grad_norm": 1.4235965013504028, - "learning_rate": 8.61929648241206e-05, - "loss": 5.6509, - "step": 14242 - }, - { - "epoch": 7.427900912646676, - "grad_norm": 1.4879059791564941, - "learning_rate": 8.619195979899498e-05, - "loss": 5.3719, - "step": 14243 - }, - { - "epoch": 7.428422425032594, - "grad_norm": 1.3479435443878174, - "learning_rate": 8.619095477386935e-05, - "loss": 5.8037, - "step": 14244 - }, - { - "epoch": 7.428943937418514, - "grad_norm": 1.3629789352416992, - "learning_rate": 8.618994974874373e-05, - "loss": 5.2061, - "step": 14245 - }, - { - "epoch": 7.429465449804433, - "grad_norm": 1.4932838678359985, - "learning_rate": 8.61889447236181e-05, - "loss": 5.6273, - "step": 14246 - }, - { - "epoch": 7.429986962190352, - "grad_norm": 1.4447717666625977, - "learning_rate": 8.618793969849247e-05, - "loss": 5.1216, - "step": 14247 - }, - { - "epoch": 7.430508474576271, - "grad_norm": 1.4133471250534058, - "learning_rate": 8.618693467336684e-05, - "loss": 5.4463, - "step": 14248 - }, - { - "epoch": 7.431029986962191, - "grad_norm": 1.4122138023376465, - "learning_rate": 8.618592964824122e-05, - "loss": 5.7575, - "step": 14249 - }, - { - "epoch": 7.431551499348109, - "grad_norm": 1.5188621282577515, - "learning_rate": 8.618492462311558e-05, - "loss": 5.3654, - "step": 14250 - }, - { - "epoch": 7.432073011734029, - "grad_norm": 1.442542314529419, - "learning_rate": 8.618391959798996e-05, - "loss": 5.8125, - "step": 14251 - }, - { - "epoch": 7.432594524119948, - "grad_norm": 1.6895517110824585, - "learning_rate": 8.618291457286432e-05, - "loss": 4.7752, - "step": 14252 - }, - { - "epoch": 7.433116036505867, - "grad_norm": 1.4602105617523193, - "learning_rate": 8.618190954773869e-05, - "loss": 5.6572, - "step": 14253 - }, - { - "epoch": 7.433637548891786, - "grad_norm": 1.510026216506958, - "learning_rate": 8.618090452261306e-05, - "loss": 5.8225, - "step": 14254 - }, - { - "epoch": 7.434159061277706, - "grad_norm": 1.5581762790679932, - "learning_rate": 8.617989949748744e-05, - "loss": 4.9862, - "step": 14255 - }, - { - "epoch": 7.434680573663624, - "grad_norm": 1.412340760231018, - "learning_rate": 8.617889447236182e-05, - "loss": 5.5487, - "step": 14256 - }, - { - "epoch": 7.435202086049544, - "grad_norm": 1.6891721487045288, - "learning_rate": 8.617788944723618e-05, - "loss": 5.444, - "step": 14257 - }, - { - "epoch": 7.435723598435462, - "grad_norm": 1.4973411560058594, - "learning_rate": 8.617688442211056e-05, - "loss": 5.5478, - "step": 14258 - }, - { - "epoch": 7.436245110821382, - "grad_norm": 1.429060697555542, - "learning_rate": 8.617587939698493e-05, - "loss": 5.7634, - "step": 14259 - }, - { - "epoch": 7.436766623207301, - "grad_norm": 1.5158412456512451, - "learning_rate": 8.61748743718593e-05, - "loss": 5.6012, - "step": 14260 - }, - { - "epoch": 7.437288135593221, - "grad_norm": 1.5324784517288208, - "learning_rate": 8.617386934673367e-05, - "loss": 5.4205, - "step": 14261 - }, - { - "epoch": 7.437809647979139, - "grad_norm": 1.5659664869308472, - "learning_rate": 8.617286432160805e-05, - "loss": 5.3359, - "step": 14262 - }, - { - "epoch": 7.438331160365059, - "grad_norm": 1.3550336360931396, - "learning_rate": 8.617185929648241e-05, - "loss": 5.5797, - "step": 14263 - }, - { - "epoch": 7.438852672750977, - "grad_norm": 1.6186634302139282, - "learning_rate": 8.617085427135679e-05, - "loss": 5.8263, - "step": 14264 - }, - { - "epoch": 7.439374185136897, - "grad_norm": 1.4324424266815186, - "learning_rate": 8.616984924623117e-05, - "loss": 5.6028, - "step": 14265 - }, - { - "epoch": 7.439895697522816, - "grad_norm": 1.4691213369369507, - "learning_rate": 8.616884422110553e-05, - "loss": 5.6158, - "step": 14266 - }, - { - "epoch": 7.440417209908736, - "grad_norm": 1.4039791822433472, - "learning_rate": 8.616783919597991e-05, - "loss": 5.4533, - "step": 14267 - }, - { - "epoch": 7.440938722294654, - "grad_norm": 1.518585443496704, - "learning_rate": 8.616683417085427e-05, - "loss": 5.4842, - "step": 14268 - }, - { - "epoch": 7.441460234680574, - "grad_norm": 1.4168916940689087, - "learning_rate": 8.616582914572865e-05, - "loss": 5.9592, - "step": 14269 - }, - { - "epoch": 7.441981747066492, - "grad_norm": 1.4451208114624023, - "learning_rate": 8.616482412060302e-05, - "loss": 5.6947, - "step": 14270 - }, - { - "epoch": 7.442503259452412, - "grad_norm": 1.381415843963623, - "learning_rate": 8.616381909547739e-05, - "loss": 5.4075, - "step": 14271 - }, - { - "epoch": 7.443024771838331, - "grad_norm": 1.3678151369094849, - "learning_rate": 8.616281407035176e-05, - "loss": 5.5872, - "step": 14272 - }, - { - "epoch": 7.443546284224251, - "grad_norm": 1.3448539972305298, - "learning_rate": 8.616180904522614e-05, - "loss": 5.5019, - "step": 14273 - }, - { - "epoch": 7.444067796610169, - "grad_norm": 1.3953989744186401, - "learning_rate": 8.61608040201005e-05, - "loss": 5.6416, - "step": 14274 - }, - { - "epoch": 7.444589308996089, - "grad_norm": 1.479372262954712, - "learning_rate": 8.615979899497488e-05, - "loss": 5.5388, - "step": 14275 - }, - { - "epoch": 7.445110821382007, - "grad_norm": 1.4568132162094116, - "learning_rate": 8.615879396984926e-05, - "loss": 4.832, - "step": 14276 - }, - { - "epoch": 7.445632333767927, - "grad_norm": 1.3524776697158813, - "learning_rate": 8.615778894472363e-05, - "loss": 5.7165, - "step": 14277 - }, - { - "epoch": 7.446153846153846, - "grad_norm": 1.3663841485977173, - "learning_rate": 8.6156783919598e-05, - "loss": 5.6367, - "step": 14278 - }, - { - "epoch": 7.446675358539765, - "grad_norm": 1.4485597610473633, - "learning_rate": 8.615577889447236e-05, - "loss": 5.753, - "step": 14279 - }, - { - "epoch": 7.447196870925684, - "grad_norm": 1.4402482509613037, - "learning_rate": 8.615477386934674e-05, - "loss": 5.7825, - "step": 14280 - }, - { - "epoch": 7.447718383311604, - "grad_norm": 1.3657389879226685, - "learning_rate": 8.61537688442211e-05, - "loss": 5.356, - "step": 14281 - }, - { - "epoch": 7.448239895697522, - "grad_norm": 1.363399863243103, - "learning_rate": 8.615276381909548e-05, - "loss": 5.6831, - "step": 14282 - }, - { - "epoch": 7.448761408083442, - "grad_norm": 1.4248377084732056, - "learning_rate": 8.615175879396985e-05, - "loss": 5.8573, - "step": 14283 - }, - { - "epoch": 7.449282920469361, - "grad_norm": 1.4792426824569702, - "learning_rate": 8.615075376884422e-05, - "loss": 5.5372, - "step": 14284 - }, - { - "epoch": 7.44980443285528, - "grad_norm": 1.4645453691482544, - "learning_rate": 8.614974874371859e-05, - "loss": 5.0142, - "step": 14285 - }, - { - "epoch": 7.450325945241199, - "grad_norm": 1.5381807088851929, - "learning_rate": 8.614874371859297e-05, - "loss": 4.9206, - "step": 14286 - }, - { - "epoch": 7.450847457627119, - "grad_norm": 1.4675301313400269, - "learning_rate": 8.614773869346734e-05, - "loss": 4.806, - "step": 14287 - }, - { - "epoch": 7.451368970013037, - "grad_norm": 1.3951455354690552, - "learning_rate": 8.614673366834172e-05, - "loss": 5.1818, - "step": 14288 - }, - { - "epoch": 7.451890482398957, - "grad_norm": 1.4525424242019653, - "learning_rate": 8.614572864321609e-05, - "loss": 5.5625, - "step": 14289 - }, - { - "epoch": 7.452411994784876, - "grad_norm": 1.3821938037872314, - "learning_rate": 8.614472361809046e-05, - "loss": 5.8409, - "step": 14290 - }, - { - "epoch": 7.452933507170795, - "grad_norm": 1.5408557653427124, - "learning_rate": 8.614371859296483e-05, - "loss": 5.2333, - "step": 14291 - }, - { - "epoch": 7.453455019556714, - "grad_norm": 1.34048330783844, - "learning_rate": 8.614271356783919e-05, - "loss": 5.6896, - "step": 14292 - }, - { - "epoch": 7.453976531942634, - "grad_norm": 1.7631473541259766, - "learning_rate": 8.614170854271357e-05, - "loss": 5.2045, - "step": 14293 - }, - { - "epoch": 7.4544980443285525, - "grad_norm": 1.4007327556610107, - "learning_rate": 8.614070351758793e-05, - "loss": 5.7203, - "step": 14294 - }, - { - "epoch": 7.455019556714472, - "grad_norm": 1.5579476356506348, - "learning_rate": 8.613969849246231e-05, - "loss": 5.6567, - "step": 14295 - }, - { - "epoch": 7.455541069100391, - "grad_norm": 1.3906112909317017, - "learning_rate": 8.613869346733669e-05, - "loss": 5.431, - "step": 14296 - }, - { - "epoch": 7.45606258148631, - "grad_norm": 1.4342093467712402, - "learning_rate": 8.613768844221107e-05, - "loss": 5.6358, - "step": 14297 - }, - { - "epoch": 7.456584093872229, - "grad_norm": 1.5345861911773682, - "learning_rate": 8.613668341708543e-05, - "loss": 5.4142, - "step": 14298 - }, - { - "epoch": 7.457105606258149, - "grad_norm": 1.5842835903167725, - "learning_rate": 8.613567839195981e-05, - "loss": 5.3757, - "step": 14299 - }, - { - "epoch": 7.4576271186440675, - "grad_norm": 1.519406795501709, - "learning_rate": 8.613467336683417e-05, - "loss": 5.73, - "step": 14300 - }, - { - "epoch": 7.458148631029987, - "grad_norm": 1.5290672779083252, - "learning_rate": 8.613366834170855e-05, - "loss": 5.6529, - "step": 14301 - }, - { - "epoch": 7.458670143415906, - "grad_norm": 1.4627504348754883, - "learning_rate": 8.613266331658292e-05, - "loss": 5.4367, - "step": 14302 - }, - { - "epoch": 7.459191655801825, - "grad_norm": 1.431751012802124, - "learning_rate": 8.61316582914573e-05, - "loss": 5.3066, - "step": 14303 - }, - { - "epoch": 7.459713168187744, - "grad_norm": 1.4224778413772583, - "learning_rate": 8.613065326633166e-05, - "loss": 5.5867, - "step": 14304 - }, - { - "epoch": 7.460234680573664, - "grad_norm": 1.473611831665039, - "learning_rate": 8.612964824120602e-05, - "loss": 5.2475, - "step": 14305 - }, - { - "epoch": 7.4607561929595825, - "grad_norm": 1.4644843339920044, - "learning_rate": 8.61286432160804e-05, - "loss": 5.4721, - "step": 14306 - }, - { - "epoch": 7.461277705345502, - "grad_norm": 1.4035083055496216, - "learning_rate": 8.612763819095478e-05, - "loss": 5.5835, - "step": 14307 - }, - { - "epoch": 7.461799217731421, - "grad_norm": 1.5075477361679077, - "learning_rate": 8.612663316582916e-05, - "loss": 5.3304, - "step": 14308 - }, - { - "epoch": 7.46232073011734, - "grad_norm": 1.4983278512954712, - "learning_rate": 8.612562814070352e-05, - "loss": 5.432, - "step": 14309 - }, - { - "epoch": 7.4628422425032594, - "grad_norm": 1.5182899236679077, - "learning_rate": 8.61246231155779e-05, - "loss": 5.524, - "step": 14310 - }, - { - "epoch": 7.463363754889179, - "grad_norm": 1.2855689525604248, - "learning_rate": 8.612361809045226e-05, - "loss": 5.911, - "step": 14311 - }, - { - "epoch": 7.4638852672750975, - "grad_norm": 1.466791033744812, - "learning_rate": 8.612261306532664e-05, - "loss": 5.7725, - "step": 14312 - }, - { - "epoch": 7.464406779661017, - "grad_norm": 1.6274499893188477, - "learning_rate": 8.6121608040201e-05, - "loss": 5.0167, - "step": 14313 - }, - { - "epoch": 7.464928292046936, - "grad_norm": 1.4677599668502808, - "learning_rate": 8.612060301507538e-05, - "loss": 5.6113, - "step": 14314 - }, - { - "epoch": 7.465449804432855, - "grad_norm": 1.3876099586486816, - "learning_rate": 8.611959798994975e-05, - "loss": 5.6266, - "step": 14315 - }, - { - "epoch": 7.4659713168187745, - "grad_norm": 1.4407918453216553, - "learning_rate": 8.611859296482412e-05, - "loss": 5.6032, - "step": 14316 - }, - { - "epoch": 7.466492829204694, - "grad_norm": 1.4900299310684204, - "learning_rate": 8.61175879396985e-05, - "loss": 5.7813, - "step": 14317 - }, - { - "epoch": 7.4670143415906125, - "grad_norm": 1.4530835151672363, - "learning_rate": 8.611658291457287e-05, - "loss": 5.5158, - "step": 14318 - }, - { - "epoch": 7.467535853976532, - "grad_norm": 1.6213852167129517, - "learning_rate": 8.611557788944724e-05, - "loss": 5.4528, - "step": 14319 - }, - { - "epoch": 7.468057366362451, - "grad_norm": 1.4295357465744019, - "learning_rate": 8.611457286432161e-05, - "loss": 5.5588, - "step": 14320 - }, - { - "epoch": 7.46857887874837, - "grad_norm": 1.5892099142074585, - "learning_rate": 8.611356783919599e-05, - "loss": 5.2406, - "step": 14321 - }, - { - "epoch": 7.4691003911342895, - "grad_norm": 1.3827917575836182, - "learning_rate": 8.611256281407035e-05, - "loss": 5.6161, - "step": 14322 - }, - { - "epoch": 7.469621903520209, - "grad_norm": 1.5175367593765259, - "learning_rate": 8.611155778894473e-05, - "loss": 5.4232, - "step": 14323 - }, - { - "epoch": 7.4701434159061275, - "grad_norm": 1.5817011594772339, - "learning_rate": 8.611055276381909e-05, - "loss": 5.4281, - "step": 14324 - }, - { - "epoch": 7.470664928292047, - "grad_norm": 1.4407696723937988, - "learning_rate": 8.610954773869347e-05, - "loss": 5.9789, - "step": 14325 - }, - { - "epoch": 7.4711864406779664, - "grad_norm": 1.514825701713562, - "learning_rate": 8.610854271356783e-05, - "loss": 5.6888, - "step": 14326 - }, - { - "epoch": 7.471707953063885, - "grad_norm": 1.4803707599639893, - "learning_rate": 8.610753768844221e-05, - "loss": 5.1423, - "step": 14327 - }, - { - "epoch": 7.4722294654498045, - "grad_norm": 1.4762588739395142, - "learning_rate": 8.610653266331659e-05, - "loss": 5.6034, - "step": 14328 - }, - { - "epoch": 7.472750977835724, - "grad_norm": 1.3832391500473022, - "learning_rate": 8.610552763819097e-05, - "loss": 5.4574, - "step": 14329 - }, - { - "epoch": 7.4732724902216425, - "grad_norm": 1.4260191917419434, - "learning_rate": 8.610452261306533e-05, - "loss": 5.464, - "step": 14330 - }, - { - "epoch": 7.473794002607562, - "grad_norm": 1.5728232860565186, - "learning_rate": 8.61035175879397e-05, - "loss": 5.539, - "step": 14331 - }, - { - "epoch": 7.4743155149934815, - "grad_norm": 1.6049944162368774, - "learning_rate": 8.610251256281407e-05, - "loss": 5.23, - "step": 14332 - }, - { - "epoch": 7.4748370273794, - "grad_norm": 1.5035383701324463, - "learning_rate": 8.610150753768844e-05, - "loss": 5.4694, - "step": 14333 - }, - { - "epoch": 7.4753585397653195, - "grad_norm": 1.4754914045333862, - "learning_rate": 8.610050251256282e-05, - "loss": 5.7656, - "step": 14334 - }, - { - "epoch": 7.475880052151239, - "grad_norm": 1.4320921897888184, - "learning_rate": 8.609949748743718e-05, - "loss": 5.5553, - "step": 14335 - }, - { - "epoch": 7.4764015645371575, - "grad_norm": 1.3926585912704468, - "learning_rate": 8.609849246231156e-05, - "loss": 5.4793, - "step": 14336 - }, - { - "epoch": 7.476923076923077, - "grad_norm": 1.3676341772079468, - "learning_rate": 8.609748743718594e-05, - "loss": 5.7603, - "step": 14337 - }, - { - "epoch": 7.4774445893089965, - "grad_norm": 1.3652431964874268, - "learning_rate": 8.609648241206031e-05, - "loss": 5.918, - "step": 14338 - }, - { - "epoch": 7.477966101694915, - "grad_norm": 1.4791314601898193, - "learning_rate": 8.609547738693468e-05, - "loss": 5.3456, - "step": 14339 - }, - { - "epoch": 7.4784876140808345, - "grad_norm": 1.4573028087615967, - "learning_rate": 8.609447236180906e-05, - "loss": 5.94, - "step": 14340 - }, - { - "epoch": 7.479009126466754, - "grad_norm": 1.5230895280838013, - "learning_rate": 8.609346733668342e-05, - "loss": 5.3826, - "step": 14341 - }, - { - "epoch": 7.4795306388526726, - "grad_norm": 1.504063606262207, - "learning_rate": 8.60924623115578e-05, - "loss": 5.5283, - "step": 14342 - }, - { - "epoch": 7.480052151238592, - "grad_norm": 2.577378273010254, - "learning_rate": 8.609145728643216e-05, - "loss": 4.5913, - "step": 14343 - }, - { - "epoch": 7.4805736636245115, - "grad_norm": 1.601867914199829, - "learning_rate": 8.609045226130654e-05, - "loss": 5.655, - "step": 14344 - }, - { - "epoch": 7.48109517601043, - "grad_norm": 1.509210228919983, - "learning_rate": 8.60894472361809e-05, - "loss": 5.6375, - "step": 14345 - }, - { - "epoch": 7.4816166883963495, - "grad_norm": 1.4378777742385864, - "learning_rate": 8.608844221105527e-05, - "loss": 5.5022, - "step": 14346 - }, - { - "epoch": 7.482138200782269, - "grad_norm": 1.5622271299362183, - "learning_rate": 8.608743718592965e-05, - "loss": 5.4502, - "step": 14347 - }, - { - "epoch": 7.482659713168188, - "grad_norm": 1.3820501565933228, - "learning_rate": 8.608643216080403e-05, - "loss": 5.7039, - "step": 14348 - }, - { - "epoch": 7.483181225554107, - "grad_norm": 1.594468355178833, - "learning_rate": 8.60854271356784e-05, - "loss": 4.5349, - "step": 14349 - }, - { - "epoch": 7.4837027379400265, - "grad_norm": 1.524670124053955, - "learning_rate": 8.608442211055277e-05, - "loss": 4.9033, - "step": 14350 - }, - { - "epoch": 7.484224250325945, - "grad_norm": 1.510387897491455, - "learning_rate": 8.608341708542715e-05, - "loss": 5.3089, - "step": 14351 - }, - { - "epoch": 7.4847457627118645, - "grad_norm": 1.419113278388977, - "learning_rate": 8.608241206030151e-05, - "loss": 5.8636, - "step": 14352 - }, - { - "epoch": 7.485267275097783, - "grad_norm": 1.3459213972091675, - "learning_rate": 8.608140703517589e-05, - "loss": 5.9803, - "step": 14353 - }, - { - "epoch": 7.485788787483703, - "grad_norm": 1.4623483419418335, - "learning_rate": 8.608040201005025e-05, - "loss": 5.2705, - "step": 14354 - }, - { - "epoch": 7.486310299869622, - "grad_norm": 1.3900104761123657, - "learning_rate": 8.607939698492463e-05, - "loss": 5.5013, - "step": 14355 - }, - { - "epoch": 7.4868318122555415, - "grad_norm": 1.4796334505081177, - "learning_rate": 8.6078391959799e-05, - "loss": 5.4071, - "step": 14356 - }, - { - "epoch": 7.48735332464146, - "grad_norm": 1.5062527656555176, - "learning_rate": 8.607738693467337e-05, - "loss": 5.412, - "step": 14357 - }, - { - "epoch": 7.4878748370273795, - "grad_norm": 1.3945561647415161, - "learning_rate": 8.607638190954775e-05, - "loss": 5.5428, - "step": 14358 - }, - { - "epoch": 7.488396349413298, - "grad_norm": 1.3692220449447632, - "learning_rate": 8.607537688442211e-05, - "loss": 5.7688, - "step": 14359 - }, - { - "epoch": 7.488917861799218, - "grad_norm": 1.6224329471588135, - "learning_rate": 8.607437185929649e-05, - "loss": 5.6577, - "step": 14360 - }, - { - "epoch": 7.489439374185137, - "grad_norm": 1.385735034942627, - "learning_rate": 8.607336683417086e-05, - "loss": 5.6236, - "step": 14361 - }, - { - "epoch": 7.4899608865710565, - "grad_norm": 1.4128612279891968, - "learning_rate": 8.607236180904523e-05, - "loss": 5.6155, - "step": 14362 - }, - { - "epoch": 7.490482398956975, - "grad_norm": 1.532691478729248, - "learning_rate": 8.60713567839196e-05, - "loss": 5.1946, - "step": 14363 - }, - { - "epoch": 7.491003911342895, - "grad_norm": 1.4949405193328857, - "learning_rate": 8.607035175879398e-05, - "loss": 5.4635, - "step": 14364 - }, - { - "epoch": 7.491525423728813, - "grad_norm": 1.409379243850708, - "learning_rate": 8.606934673366834e-05, - "loss": 5.5668, - "step": 14365 - }, - { - "epoch": 7.492046936114733, - "grad_norm": 1.4646391868591309, - "learning_rate": 8.606834170854272e-05, - "loss": 5.8451, - "step": 14366 - }, - { - "epoch": 7.492568448500652, - "grad_norm": 1.3866653442382812, - "learning_rate": 8.606733668341708e-05, - "loss": 5.6677, - "step": 14367 - }, - { - "epoch": 7.493089960886571, - "grad_norm": 1.3936150074005127, - "learning_rate": 8.606633165829146e-05, - "loss": 5.5154, - "step": 14368 - }, - { - "epoch": 7.49361147327249, - "grad_norm": 1.4900511503219604, - "learning_rate": 8.606532663316584e-05, - "loss": 5.5131, - "step": 14369 - }, - { - "epoch": 7.49413298565841, - "grad_norm": 1.5182266235351562, - "learning_rate": 8.606432160804022e-05, - "loss": 5.3355, - "step": 14370 - }, - { - "epoch": 7.494654498044328, - "grad_norm": 1.418129324913025, - "learning_rate": 8.606331658291458e-05, - "loss": 5.5601, - "step": 14371 - }, - { - "epoch": 7.495176010430248, - "grad_norm": 1.3847951889038086, - "learning_rate": 8.606231155778894e-05, - "loss": 5.6372, - "step": 14372 - }, - { - "epoch": 7.495697522816167, - "grad_norm": 1.40888512134552, - "learning_rate": 8.606130653266332e-05, - "loss": 5.418, - "step": 14373 - }, - { - "epoch": 7.496219035202086, - "grad_norm": 1.4367265701293945, - "learning_rate": 8.606030150753769e-05, - "loss": 5.5986, - "step": 14374 - }, - { - "epoch": 7.496740547588005, - "grad_norm": 1.5357054471969604, - "learning_rate": 8.605929648241206e-05, - "loss": 5.6421, - "step": 14375 - }, - { - "epoch": 7.497262059973925, - "grad_norm": 1.4539191722869873, - "learning_rate": 8.605829145728643e-05, - "loss": 5.4075, - "step": 14376 - }, - { - "epoch": 7.497783572359843, - "grad_norm": 1.3913840055465698, - "learning_rate": 8.60572864321608e-05, - "loss": 5.6927, - "step": 14377 - }, - { - "epoch": 7.498305084745763, - "grad_norm": 1.5202126502990723, - "learning_rate": 8.605628140703518e-05, - "loss": 4.9966, - "step": 14378 - }, - { - "epoch": 7.498826597131682, - "grad_norm": 1.3729082345962524, - "learning_rate": 8.605527638190956e-05, - "loss": 5.2882, - "step": 14379 - }, - { - "epoch": 7.499348109517601, - "grad_norm": 1.469238519668579, - "learning_rate": 8.605427135678393e-05, - "loss": 5.3945, - "step": 14380 - }, - { - "epoch": 7.49986962190352, - "grad_norm": 1.5600395202636719, - "learning_rate": 8.60532663316583e-05, - "loss": 5.7606, - "step": 14381 - }, - { - "epoch": 7.50039113428944, - "grad_norm": 1.3746979236602783, - "learning_rate": 8.605226130653267e-05, - "loss": 5.7023, - "step": 14382 - }, - { - "epoch": 7.500912646675358, - "grad_norm": 1.4826563596725464, - "learning_rate": 8.605125628140705e-05, - "loss": 5.389, - "step": 14383 - }, - { - "epoch": 7.501434159061278, - "grad_norm": 1.403035044670105, - "learning_rate": 8.605025125628141e-05, - "loss": 5.3065, - "step": 14384 - }, - { - "epoch": 7.501955671447197, - "grad_norm": 1.4151207208633423, - "learning_rate": 8.604924623115577e-05, - "loss": 5.562, - "step": 14385 - }, - { - "epoch": 7.502477183833116, - "grad_norm": 1.4127271175384521, - "learning_rate": 8.604824120603015e-05, - "loss": 5.6924, - "step": 14386 - }, - { - "epoch": 7.502998696219035, - "grad_norm": 1.569226861000061, - "learning_rate": 8.604723618090452e-05, - "loss": 5.9015, - "step": 14387 - }, - { - "epoch": 7.503520208604955, - "grad_norm": 1.4697234630584717, - "learning_rate": 8.60462311557789e-05, - "loss": 5.6387, - "step": 14388 - }, - { - "epoch": 7.504041720990873, - "grad_norm": 1.353944182395935, - "learning_rate": 8.604522613065327e-05, - "loss": 5.8782, - "step": 14389 - }, - { - "epoch": 7.504563233376793, - "grad_norm": 1.2924259901046753, - "learning_rate": 8.604422110552765e-05, - "loss": 5.8684, - "step": 14390 - }, - { - "epoch": 7.505084745762712, - "grad_norm": 1.4000943899154663, - "learning_rate": 8.604321608040201e-05, - "loss": 4.7434, - "step": 14391 - }, - { - "epoch": 7.505606258148631, - "grad_norm": 1.4217276573181152, - "learning_rate": 8.604221105527639e-05, - "loss": 5.3836, - "step": 14392 - }, - { - "epoch": 7.50612777053455, - "grad_norm": 1.4223589897155762, - "learning_rate": 8.604120603015076e-05, - "loss": 5.45, - "step": 14393 - }, - { - "epoch": 7.50664928292047, - "grad_norm": 1.4876317977905273, - "learning_rate": 8.604020100502513e-05, - "loss": 5.0462, - "step": 14394 - }, - { - "epoch": 7.507170795306388, - "grad_norm": 1.4112242460250854, - "learning_rate": 8.60391959798995e-05, - "loss": 5.4269, - "step": 14395 - }, - { - "epoch": 7.507692307692308, - "grad_norm": 1.35599946975708, - "learning_rate": 8.603819095477388e-05, - "loss": 5.1399, - "step": 14396 - }, - { - "epoch": 7.508213820078227, - "grad_norm": 1.4037548303604126, - "learning_rate": 8.603718592964824e-05, - "loss": 5.7485, - "step": 14397 - }, - { - "epoch": 7.508735332464146, - "grad_norm": 1.39333975315094, - "learning_rate": 8.603618090452262e-05, - "loss": 5.6734, - "step": 14398 - }, - { - "epoch": 7.509256844850065, - "grad_norm": 1.3511556386947632, - "learning_rate": 8.6035175879397e-05, - "loss": 5.9106, - "step": 14399 - }, - { - "epoch": 7.509778357235985, - "grad_norm": 1.4763686656951904, - "learning_rate": 8.603417085427136e-05, - "loss": 5.1948, - "step": 14400 - }, - { - "epoch": 7.510299869621903, - "grad_norm": 1.3850815296173096, - "learning_rate": 8.603316582914574e-05, - "loss": 5.9674, - "step": 14401 - }, - { - "epoch": 7.510821382007823, - "grad_norm": 1.3484561443328857, - "learning_rate": 8.60321608040201e-05, - "loss": 5.6702, - "step": 14402 - }, - { - "epoch": 7.511342894393742, - "grad_norm": 1.4632606506347656, - "learning_rate": 8.603115577889448e-05, - "loss": 5.7945, - "step": 14403 - }, - { - "epoch": 7.511864406779661, - "grad_norm": 1.4342886209487915, - "learning_rate": 8.603015075376884e-05, - "loss": 5.8189, - "step": 14404 - }, - { - "epoch": 7.51238591916558, - "grad_norm": 1.3690242767333984, - "learning_rate": 8.602914572864322e-05, - "loss": 5.6657, - "step": 14405 - }, - { - "epoch": 7.5129074315515, - "grad_norm": 1.4263441562652588, - "learning_rate": 8.602814070351759e-05, - "loss": 5.3018, - "step": 14406 - }, - { - "epoch": 7.513428943937418, - "grad_norm": 1.4232409000396729, - "learning_rate": 8.602713567839196e-05, - "loss": 5.5999, - "step": 14407 - }, - { - "epoch": 7.513950456323338, - "grad_norm": 1.4232099056243896, - "learning_rate": 8.602613065326633e-05, - "loss": 5.4424, - "step": 14408 - }, - { - "epoch": 7.514471968709257, - "grad_norm": 1.4716486930847168, - "learning_rate": 8.60251256281407e-05, - "loss": 5.3518, - "step": 14409 - }, - { - "epoch": 7.514993481095176, - "grad_norm": 1.5529555082321167, - "learning_rate": 8.602412060301508e-05, - "loss": 4.8271, - "step": 14410 - }, - { - "epoch": 7.515514993481095, - "grad_norm": 1.3759700059890747, - "learning_rate": 8.602311557788945e-05, - "loss": 5.6752, - "step": 14411 - }, - { - "epoch": 7.516036505867015, - "grad_norm": 1.3353825807571411, - "learning_rate": 8.602211055276383e-05, - "loss": 5.7744, - "step": 14412 - }, - { - "epoch": 7.516558018252933, - "grad_norm": 1.339860439300537, - "learning_rate": 8.602110552763819e-05, - "loss": 5.5908, - "step": 14413 - }, - { - "epoch": 7.517079530638853, - "grad_norm": 1.3372331857681274, - "learning_rate": 8.602010050251257e-05, - "loss": 5.8045, - "step": 14414 - }, - { - "epoch": 7.517601043024772, - "grad_norm": 1.5018301010131836, - "learning_rate": 8.601909547738693e-05, - "loss": 5.5836, - "step": 14415 - }, - { - "epoch": 7.518122555410691, - "grad_norm": 1.3613617420196533, - "learning_rate": 8.601809045226131e-05, - "loss": 5.7909, - "step": 14416 - }, - { - "epoch": 7.51864406779661, - "grad_norm": 1.4318809509277344, - "learning_rate": 8.601708542713568e-05, - "loss": 5.427, - "step": 14417 - }, - { - "epoch": 7.51916558018253, - "grad_norm": 1.3824576139450073, - "learning_rate": 8.601608040201005e-05, - "loss": 5.3923, - "step": 14418 - }, - { - "epoch": 7.519687092568448, - "grad_norm": 1.4758055210113525, - "learning_rate": 8.601507537688443e-05, - "loss": 4.8623, - "step": 14419 - }, - { - "epoch": 7.520208604954368, - "grad_norm": 1.4531676769256592, - "learning_rate": 8.601407035175881e-05, - "loss": 5.6739, - "step": 14420 - }, - { - "epoch": 7.520730117340287, - "grad_norm": 1.496105670928955, - "learning_rate": 8.601306532663317e-05, - "loss": 5.1408, - "step": 14421 - }, - { - "epoch": 7.521251629726206, - "grad_norm": 1.534218192100525, - "learning_rate": 8.601206030150755e-05, - "loss": 5.451, - "step": 14422 - }, - { - "epoch": 7.521773142112125, - "grad_norm": 1.4804588556289673, - "learning_rate": 8.601105527638191e-05, - "loss": 5.54, - "step": 14423 - }, - { - "epoch": 7.522294654498045, - "grad_norm": 1.330087423324585, - "learning_rate": 8.601005025125628e-05, - "loss": 5.4902, - "step": 14424 - }, - { - "epoch": 7.522816166883963, - "grad_norm": 1.3668454885482788, - "learning_rate": 8.600904522613066e-05, - "loss": 5.2132, - "step": 14425 - }, - { - "epoch": 7.523337679269883, - "grad_norm": 1.491457462310791, - "learning_rate": 8.600804020100502e-05, - "loss": 5.6804, - "step": 14426 - }, - { - "epoch": 7.523859191655802, - "grad_norm": 1.4169975519180298, - "learning_rate": 8.60070351758794e-05, - "loss": 5.6025, - "step": 14427 - }, - { - "epoch": 7.524380704041721, - "grad_norm": 1.469190239906311, - "learning_rate": 8.600603015075376e-05, - "loss": 4.8548, - "step": 14428 - }, - { - "epoch": 7.52490221642764, - "grad_norm": 1.3302276134490967, - "learning_rate": 8.600502512562814e-05, - "loss": 5.7888, - "step": 14429 - }, - { - "epoch": 7.52542372881356, - "grad_norm": 1.4936333894729614, - "learning_rate": 8.600402010050252e-05, - "loss": 5.2033, - "step": 14430 - }, - { - "epoch": 7.525945241199478, - "grad_norm": 1.512298822402954, - "learning_rate": 8.60030150753769e-05, - "loss": 5.5871, - "step": 14431 - }, - { - "epoch": 7.526466753585398, - "grad_norm": 1.3258998394012451, - "learning_rate": 8.600201005025126e-05, - "loss": 5.8521, - "step": 14432 - }, - { - "epoch": 7.526988265971317, - "grad_norm": 1.3351576328277588, - "learning_rate": 8.600100502512564e-05, - "loss": 5.7803, - "step": 14433 - }, - { - "epoch": 7.527509778357236, - "grad_norm": 1.3888005018234253, - "learning_rate": 8.6e-05, - "loss": 5.5329, - "step": 14434 - }, - { - "epoch": 7.528031290743155, - "grad_norm": 1.3629043102264404, - "learning_rate": 8.599899497487438e-05, - "loss": 5.6424, - "step": 14435 - }, - { - "epoch": 7.528552803129074, - "grad_norm": 1.4096856117248535, - "learning_rate": 8.599798994974875e-05, - "loss": 5.2571, - "step": 14436 - }, - { - "epoch": 7.529074315514993, - "grad_norm": 1.2979843616485596, - "learning_rate": 8.599698492462312e-05, - "loss": 5.6272, - "step": 14437 - }, - { - "epoch": 7.529595827900913, - "grad_norm": 1.3374485969543457, - "learning_rate": 8.599597989949749e-05, - "loss": 5.9124, - "step": 14438 - }, - { - "epoch": 7.530117340286832, - "grad_norm": 1.3806493282318115, - "learning_rate": 8.599497487437187e-05, - "loss": 5.3851, - "step": 14439 - }, - { - "epoch": 7.530638852672751, - "grad_norm": 1.3650271892547607, - "learning_rate": 8.599396984924624e-05, - "loss": 5.5857, - "step": 14440 - }, - { - "epoch": 7.53116036505867, - "grad_norm": 1.3986033201217651, - "learning_rate": 8.599296482412061e-05, - "loss": 6.0091, - "step": 14441 - }, - { - "epoch": 7.531681877444589, - "grad_norm": 1.366439938545227, - "learning_rate": 8.599195979899499e-05, - "loss": 5.5646, - "step": 14442 - }, - { - "epoch": 7.532203389830508, - "grad_norm": 1.4063334465026855, - "learning_rate": 8.599095477386935e-05, - "loss": 5.4478, - "step": 14443 - }, - { - "epoch": 7.532724902216428, - "grad_norm": 1.5154016017913818, - "learning_rate": 8.598994974874373e-05, - "loss": 5.284, - "step": 14444 - }, - { - "epoch": 7.533246414602347, - "grad_norm": 1.3358287811279297, - "learning_rate": 8.598894472361809e-05, - "loss": 5.3807, - "step": 14445 - }, - { - "epoch": 7.533767926988266, - "grad_norm": 1.4691824913024902, - "learning_rate": 8.598793969849247e-05, - "loss": 5.7575, - "step": 14446 - }, - { - "epoch": 7.534289439374185, - "grad_norm": 1.4193345308303833, - "learning_rate": 8.598693467336683e-05, - "loss": 5.4015, - "step": 14447 - }, - { - "epoch": 7.534810951760104, - "grad_norm": 1.415890097618103, - "learning_rate": 8.598592964824121e-05, - "loss": 5.8251, - "step": 14448 - }, - { - "epoch": 7.535332464146023, - "grad_norm": 1.4307185411453247, - "learning_rate": 8.598492462311558e-05, - "loss": 4.9567, - "step": 14449 - }, - { - "epoch": 7.535853976531943, - "grad_norm": 1.449263334274292, - "learning_rate": 8.598391959798995e-05, - "loss": 5.6431, - "step": 14450 - }, - { - "epoch": 7.536375488917862, - "grad_norm": 1.53867506980896, - "learning_rate": 8.598291457286433e-05, - "loss": 5.398, - "step": 14451 - }, - { - "epoch": 7.536897001303781, - "grad_norm": 1.434108018875122, - "learning_rate": 8.59819095477387e-05, - "loss": 5.5036, - "step": 14452 - }, - { - "epoch": 7.5374185136897, - "grad_norm": 1.3901947736740112, - "learning_rate": 8.598090452261307e-05, - "loss": 5.622, - "step": 14453 - }, - { - "epoch": 7.537940026075619, - "grad_norm": 1.5232458114624023, - "learning_rate": 8.597989949748744e-05, - "loss": 5.5522, - "step": 14454 - }, - { - "epoch": 7.538461538461538, - "grad_norm": 1.437117338180542, - "learning_rate": 8.597889447236182e-05, - "loss": 5.8877, - "step": 14455 - }, - { - "epoch": 7.538983050847458, - "grad_norm": 1.5587292909622192, - "learning_rate": 8.597788944723618e-05, - "loss": 5.2392, - "step": 14456 - }, - { - "epoch": 7.539504563233377, - "grad_norm": 1.5550967454910278, - "learning_rate": 8.597688442211056e-05, - "loss": 5.7528, - "step": 14457 - }, - { - "epoch": 7.540026075619296, - "grad_norm": 1.2937589883804321, - "learning_rate": 8.597587939698492e-05, - "loss": 6.0642, - "step": 14458 - }, - { - "epoch": 7.540547588005215, - "grad_norm": 1.3744772672653198, - "learning_rate": 8.59748743718593e-05, - "loss": 5.6839, - "step": 14459 - }, - { - "epoch": 7.541069100391134, - "grad_norm": 1.498262643814087, - "learning_rate": 8.597386934673366e-05, - "loss": 5.6816, - "step": 14460 - }, - { - "epoch": 7.541590612777053, - "grad_norm": 1.398675799369812, - "learning_rate": 8.597286432160804e-05, - "loss": 5.2682, - "step": 14461 - }, - { - "epoch": 7.542112125162973, - "grad_norm": 1.4919991493225098, - "learning_rate": 8.597185929648242e-05, - "loss": 5.569, - "step": 14462 - }, - { - "epoch": 7.542633637548892, - "grad_norm": 1.3810702562332153, - "learning_rate": 8.59708542713568e-05, - "loss": 5.3449, - "step": 14463 - }, - { - "epoch": 7.543155149934811, - "grad_norm": 1.480148434638977, - "learning_rate": 8.596984924623116e-05, - "loss": 5.5025, - "step": 14464 - }, - { - "epoch": 7.54367666232073, - "grad_norm": 1.4620416164398193, - "learning_rate": 8.596884422110553e-05, - "loss": 5.5, - "step": 14465 - }, - { - "epoch": 7.544198174706649, - "grad_norm": 1.3785277605056763, - "learning_rate": 8.59678391959799e-05, - "loss": 5.8036, - "step": 14466 - }, - { - "epoch": 7.544719687092568, - "grad_norm": 1.348463535308838, - "learning_rate": 8.596683417085427e-05, - "loss": 5.9949, - "step": 14467 - }, - { - "epoch": 7.545241199478488, - "grad_norm": 1.608410120010376, - "learning_rate": 8.596582914572865e-05, - "loss": 4.9123, - "step": 14468 - }, - { - "epoch": 7.545762711864406, - "grad_norm": 1.5562827587127686, - "learning_rate": 8.596482412060301e-05, - "loss": 5.4268, - "step": 14469 - }, - { - "epoch": 7.546284224250326, - "grad_norm": 1.3261356353759766, - "learning_rate": 8.596381909547739e-05, - "loss": 5.9104, - "step": 14470 - }, - { - "epoch": 7.546805736636245, - "grad_norm": 1.419384479522705, - "learning_rate": 8.596281407035177e-05, - "loss": 5.4102, - "step": 14471 - }, - { - "epoch": 7.547327249022164, - "grad_norm": 1.487569808959961, - "learning_rate": 8.596180904522614e-05, - "loss": 5.7281, - "step": 14472 - }, - { - "epoch": 7.547848761408083, - "grad_norm": 1.5440367460250854, - "learning_rate": 8.596080402010051e-05, - "loss": 5.6354, - "step": 14473 - }, - { - "epoch": 7.548370273794003, - "grad_norm": 1.8302373886108398, - "learning_rate": 8.595979899497489e-05, - "loss": 5.5104, - "step": 14474 - }, - { - "epoch": 7.548891786179921, - "grad_norm": 1.380648136138916, - "learning_rate": 8.595879396984925e-05, - "loss": 5.6028, - "step": 14475 - }, - { - "epoch": 7.549413298565841, - "grad_norm": 1.5158854722976685, - "learning_rate": 8.595778894472363e-05, - "loss": 5.3443, - "step": 14476 - }, - { - "epoch": 7.54993481095176, - "grad_norm": 1.4212790727615356, - "learning_rate": 8.595678391959799e-05, - "loss": 5.7541, - "step": 14477 - }, - { - "epoch": 7.550456323337679, - "grad_norm": 1.4399834871292114, - "learning_rate": 8.595577889447236e-05, - "loss": 5.4171, - "step": 14478 - }, - { - "epoch": 7.550977835723598, - "grad_norm": 1.3858368396759033, - "learning_rate": 8.595477386934673e-05, - "loss": 5.6575, - "step": 14479 - }, - { - "epoch": 7.551499348109518, - "grad_norm": 1.3507689237594604, - "learning_rate": 8.59537688442211e-05, - "loss": 5.5356, - "step": 14480 - }, - { - "epoch": 7.552020860495436, - "grad_norm": 1.3642578125, - "learning_rate": 8.595276381909548e-05, - "loss": 5.8055, - "step": 14481 - }, - { - "epoch": 7.552542372881356, - "grad_norm": 1.4757659435272217, - "learning_rate": 8.595175879396985e-05, - "loss": 5.4589, - "step": 14482 - }, - { - "epoch": 7.553063885267275, - "grad_norm": 1.4366340637207031, - "learning_rate": 8.595075376884423e-05, - "loss": 5.456, - "step": 14483 - }, - { - "epoch": 7.553585397653194, - "grad_norm": 1.3678559064865112, - "learning_rate": 8.59497487437186e-05, - "loss": 5.866, - "step": 14484 - }, - { - "epoch": 7.554106910039113, - "grad_norm": 1.377436876296997, - "learning_rate": 8.594874371859297e-05, - "loss": 5.7199, - "step": 14485 - }, - { - "epoch": 7.554628422425033, - "grad_norm": 1.4054288864135742, - "learning_rate": 8.594773869346734e-05, - "loss": 5.829, - "step": 14486 - }, - { - "epoch": 7.555149934810951, - "grad_norm": 1.4703978300094604, - "learning_rate": 8.594673366834172e-05, - "loss": 5.4015, - "step": 14487 - }, - { - "epoch": 7.555671447196871, - "grad_norm": 1.502322793006897, - "learning_rate": 8.594572864321608e-05, - "loss": 5.4738, - "step": 14488 - }, - { - "epoch": 7.55619295958279, - "grad_norm": 1.4493664503097534, - "learning_rate": 8.594472361809046e-05, - "loss": 5.5929, - "step": 14489 - }, - { - "epoch": 7.556714471968709, - "grad_norm": 1.4948803186416626, - "learning_rate": 8.594371859296482e-05, - "loss": 5.6785, - "step": 14490 - }, - { - "epoch": 7.557235984354628, - "grad_norm": 1.4655007123947144, - "learning_rate": 8.59427135678392e-05, - "loss": 6.0633, - "step": 14491 - }, - { - "epoch": 7.557757496740548, - "grad_norm": 1.351957082748413, - "learning_rate": 8.594170854271358e-05, - "loss": 5.2559, - "step": 14492 - }, - { - "epoch": 7.558279009126466, - "grad_norm": 1.675897479057312, - "learning_rate": 8.594070351758794e-05, - "loss": 5.0084, - "step": 14493 - }, - { - "epoch": 7.558800521512386, - "grad_norm": 1.486124038696289, - "learning_rate": 8.593969849246232e-05, - "loss": 5.3124, - "step": 14494 - }, - { - "epoch": 7.559322033898305, - "grad_norm": 1.5843571424484253, - "learning_rate": 8.593869346733668e-05, - "loss": 4.7213, - "step": 14495 - }, - { - "epoch": 7.559843546284224, - "grad_norm": 1.4767789840698242, - "learning_rate": 8.593768844221106e-05, - "loss": 5.6542, - "step": 14496 - }, - { - "epoch": 7.560365058670143, - "grad_norm": 1.5027638673782349, - "learning_rate": 8.593668341708543e-05, - "loss": 5.6343, - "step": 14497 - }, - { - "epoch": 7.560886571056063, - "grad_norm": 1.38805091381073, - "learning_rate": 8.59356783919598e-05, - "loss": 5.9068, - "step": 14498 - }, - { - "epoch": 7.5614080834419815, - "grad_norm": 1.3647178411483765, - "learning_rate": 8.593467336683417e-05, - "loss": 5.8438, - "step": 14499 - }, - { - "epoch": 7.561929595827901, - "grad_norm": 1.3861160278320312, - "learning_rate": 8.593366834170855e-05, - "loss": 5.5691, - "step": 14500 - }, - { - "epoch": 7.56245110821382, - "grad_norm": 1.3433409929275513, - "learning_rate": 8.593266331658291e-05, - "loss": 5.8111, - "step": 14501 - }, - { - "epoch": 7.562972620599739, - "grad_norm": 1.4534703493118286, - "learning_rate": 8.593165829145729e-05, - "loss": 5.737, - "step": 14502 - }, - { - "epoch": 7.563494132985658, - "grad_norm": 1.4970481395721436, - "learning_rate": 8.593065326633167e-05, - "loss": 5.6365, - "step": 14503 - }, - { - "epoch": 7.564015645371578, - "grad_norm": 1.42659592628479, - "learning_rate": 8.592964824120603e-05, - "loss": 5.668, - "step": 14504 - }, - { - "epoch": 7.5645371577574965, - "grad_norm": 1.5125224590301514, - "learning_rate": 8.592864321608041e-05, - "loss": 5.5271, - "step": 14505 - }, - { - "epoch": 7.565058670143416, - "grad_norm": 1.475508689880371, - "learning_rate": 8.592763819095477e-05, - "loss": 5.8637, - "step": 14506 - }, - { - "epoch": 7.565580182529335, - "grad_norm": 1.5756394863128662, - "learning_rate": 8.592663316582915e-05, - "loss": 5.1239, - "step": 14507 - }, - { - "epoch": 7.566101694915254, - "grad_norm": 1.4717713594436646, - "learning_rate": 8.592562814070352e-05, - "loss": 5.0009, - "step": 14508 - }, - { - "epoch": 7.566623207301173, - "grad_norm": 1.3063156604766846, - "learning_rate": 8.592462311557789e-05, - "loss": 6.163, - "step": 14509 - }, - { - "epoch": 7.567144719687093, - "grad_norm": 1.4517159461975098, - "learning_rate": 8.592361809045226e-05, - "loss": 5.211, - "step": 14510 - }, - { - "epoch": 7.5676662320730115, - "grad_norm": 1.3912851810455322, - "learning_rate": 8.592261306532664e-05, - "loss": 5.778, - "step": 14511 - }, - { - "epoch": 7.568187744458931, - "grad_norm": 1.343937873840332, - "learning_rate": 8.592160804020101e-05, - "loss": 5.5559, - "step": 14512 - }, - { - "epoch": 7.56870925684485, - "grad_norm": 1.5128029584884644, - "learning_rate": 8.592060301507539e-05, - "loss": 5.522, - "step": 14513 - }, - { - "epoch": 7.569230769230769, - "grad_norm": 1.5881611108779907, - "learning_rate": 8.591959798994976e-05, - "loss": 5.4716, - "step": 14514 - }, - { - "epoch": 7.5697522816166884, - "grad_norm": 1.3552659749984741, - "learning_rate": 8.591859296482413e-05, - "loss": 5.87, - "step": 14515 - }, - { - "epoch": 7.570273794002608, - "grad_norm": 1.4371100664138794, - "learning_rate": 8.59175879396985e-05, - "loss": 5.4748, - "step": 14516 - }, - { - "epoch": 7.5707953063885265, - "grad_norm": 1.406935453414917, - "learning_rate": 8.591658291457286e-05, - "loss": 5.4257, - "step": 14517 - }, - { - "epoch": 7.571316818774446, - "grad_norm": 1.3984942436218262, - "learning_rate": 8.591557788944724e-05, - "loss": 5.3912, - "step": 14518 - }, - { - "epoch": 7.571838331160365, - "grad_norm": 1.581470251083374, - "learning_rate": 8.59145728643216e-05, - "loss": 5.1202, - "step": 14519 - }, - { - "epoch": 7.572359843546284, - "grad_norm": 1.4650976657867432, - "learning_rate": 8.591356783919598e-05, - "loss": 5.7334, - "step": 14520 - }, - { - "epoch": 7.5728813559322035, - "grad_norm": 1.5230457782745361, - "learning_rate": 8.591256281407035e-05, - "loss": 5.4284, - "step": 14521 - }, - { - "epoch": 7.573402868318123, - "grad_norm": 1.5607821941375732, - "learning_rate": 8.591155778894472e-05, - "loss": 5.5479, - "step": 14522 - }, - { - "epoch": 7.5739243807040415, - "grad_norm": 1.4428925514221191, - "learning_rate": 8.59105527638191e-05, - "loss": 5.8909, - "step": 14523 - }, - { - "epoch": 7.574445893089961, - "grad_norm": 1.5931625366210938, - "learning_rate": 8.590954773869348e-05, - "loss": 5.0729, - "step": 14524 - }, - { - "epoch": 7.57496740547588, - "grad_norm": 1.4365637302398682, - "learning_rate": 8.590854271356784e-05, - "loss": 5.4241, - "step": 14525 - }, - { - "epoch": 7.575488917861799, - "grad_norm": 1.6107031106948853, - "learning_rate": 8.590753768844222e-05, - "loss": 4.9528, - "step": 14526 - }, - { - "epoch": 7.5760104302477185, - "grad_norm": 1.5184694528579712, - "learning_rate": 8.590653266331659e-05, - "loss": 5.1976, - "step": 14527 - }, - { - "epoch": 7.576531942633638, - "grad_norm": 1.4071118831634521, - "learning_rate": 8.590552763819096e-05, - "loss": 5.8603, - "step": 14528 - }, - { - "epoch": 7.5770534550195565, - "grad_norm": 1.459815263748169, - "learning_rate": 8.590452261306533e-05, - "loss": 5.7651, - "step": 14529 - }, - { - "epoch": 7.577574967405476, - "grad_norm": 1.5541770458221436, - "learning_rate": 8.59035175879397e-05, - "loss": 5.2337, - "step": 14530 - }, - { - "epoch": 7.578096479791395, - "grad_norm": 1.3894541263580322, - "learning_rate": 8.590251256281407e-05, - "loss": 5.6794, - "step": 14531 - }, - { - "epoch": 7.578617992177314, - "grad_norm": 1.4117916822433472, - "learning_rate": 8.590150753768845e-05, - "loss": 5.4685, - "step": 14532 - }, - { - "epoch": 7.5791395045632335, - "grad_norm": 1.4792473316192627, - "learning_rate": 8.590050251256283e-05, - "loss": 5.9351, - "step": 14533 - }, - { - "epoch": 7.579661016949153, - "grad_norm": 1.4343838691711426, - "learning_rate": 8.589949748743719e-05, - "loss": 5.4192, - "step": 14534 - }, - { - "epoch": 7.5801825293350715, - "grad_norm": 1.3310928344726562, - "learning_rate": 8.589849246231157e-05, - "loss": 5.6716, - "step": 14535 - }, - { - "epoch": 7.580704041720991, - "grad_norm": 1.4210768938064575, - "learning_rate": 8.589748743718593e-05, - "loss": 5.8952, - "step": 14536 - }, - { - "epoch": 7.58122555410691, - "grad_norm": 1.4781638383865356, - "learning_rate": 8.589648241206031e-05, - "loss": 5.6032, - "step": 14537 - }, - { - "epoch": 7.581747066492829, - "grad_norm": 1.4891130924224854, - "learning_rate": 8.589547738693467e-05, - "loss": 5.3457, - "step": 14538 - }, - { - "epoch": 7.5822685788787485, - "grad_norm": 1.453339695930481, - "learning_rate": 8.589447236180905e-05, - "loss": 5.626, - "step": 14539 - }, - { - "epoch": 7.582790091264668, - "grad_norm": 1.4110653400421143, - "learning_rate": 8.589346733668342e-05, - "loss": 5.4056, - "step": 14540 - }, - { - "epoch": 7.5833116036505865, - "grad_norm": 1.3479136228561401, - "learning_rate": 8.58924623115578e-05, - "loss": 5.2099, - "step": 14541 - }, - { - "epoch": 7.583833116036506, - "grad_norm": 1.3129987716674805, - "learning_rate": 8.589145728643216e-05, - "loss": 5.8359, - "step": 14542 - }, - { - "epoch": 7.584354628422425, - "grad_norm": 1.4158906936645508, - "learning_rate": 8.589045226130654e-05, - "loss": 4.9837, - "step": 14543 - }, - { - "epoch": 7.584876140808344, - "grad_norm": 1.5240343809127808, - "learning_rate": 8.588944723618091e-05, - "loss": 5.398, - "step": 14544 - }, - { - "epoch": 7.5853976531942635, - "grad_norm": 1.4125995635986328, - "learning_rate": 8.588844221105528e-05, - "loss": 5.6063, - "step": 14545 - }, - { - "epoch": 7.585919165580183, - "grad_norm": 1.7432910203933716, - "learning_rate": 8.588743718592966e-05, - "loss": 5.3051, - "step": 14546 - }, - { - "epoch": 7.5864406779661016, - "grad_norm": 1.4536397457122803, - "learning_rate": 8.588643216080402e-05, - "loss": 5.6085, - "step": 14547 - }, - { - "epoch": 7.586962190352021, - "grad_norm": 1.5073397159576416, - "learning_rate": 8.58854271356784e-05, - "loss": 5.3252, - "step": 14548 - }, - { - "epoch": 7.58748370273794, - "grad_norm": 1.4076570272445679, - "learning_rate": 8.588442211055276e-05, - "loss": 6.083, - "step": 14549 - }, - { - "epoch": 7.588005215123859, - "grad_norm": 1.5253801345825195, - "learning_rate": 8.588341708542714e-05, - "loss": 5.5869, - "step": 14550 - }, - { - "epoch": 7.5885267275097785, - "grad_norm": 1.4401565790176392, - "learning_rate": 8.58824120603015e-05, - "loss": 5.3567, - "step": 14551 - }, - { - "epoch": 7.589048239895698, - "grad_norm": 1.4530861377716064, - "learning_rate": 8.588140703517588e-05, - "loss": 5.7219, - "step": 14552 - }, - { - "epoch": 7.589569752281617, - "grad_norm": 1.6317775249481201, - "learning_rate": 8.588040201005026e-05, - "loss": 5.271, - "step": 14553 - }, - { - "epoch": 7.590091264667536, - "grad_norm": 1.4088332653045654, - "learning_rate": 8.587939698492464e-05, - "loss": 5.887, - "step": 14554 - }, - { - "epoch": 7.590612777053455, - "grad_norm": 1.4815906286239624, - "learning_rate": 8.5878391959799e-05, - "loss": 5.3528, - "step": 14555 - }, - { - "epoch": 7.591134289439374, - "grad_norm": 1.4869357347488403, - "learning_rate": 8.587738693467338e-05, - "loss": 5.5335, - "step": 14556 - }, - { - "epoch": 7.5916558018252935, - "grad_norm": 1.6332803964614868, - "learning_rate": 8.587638190954774e-05, - "loss": 4.9895, - "step": 14557 - }, - { - "epoch": 7.592177314211213, - "grad_norm": 1.4781339168548584, - "learning_rate": 8.587537688442211e-05, - "loss": 5.1508, - "step": 14558 - }, - { - "epoch": 7.592698826597132, - "grad_norm": 1.6180503368377686, - "learning_rate": 8.587437185929649e-05, - "loss": 5.1106, - "step": 14559 - }, - { - "epoch": 7.593220338983051, - "grad_norm": 1.3682177066802979, - "learning_rate": 8.587336683417085e-05, - "loss": 5.3729, - "step": 14560 - }, - { - "epoch": 7.59374185136897, - "grad_norm": 1.426377296447754, - "learning_rate": 8.587236180904523e-05, - "loss": 5.187, - "step": 14561 - }, - { - "epoch": 7.594263363754889, - "grad_norm": 1.3945800065994263, - "learning_rate": 8.587135678391959e-05, - "loss": 5.6364, - "step": 14562 - }, - { - "epoch": 7.5947848761408085, - "grad_norm": 1.5481476783752441, - "learning_rate": 8.587035175879397e-05, - "loss": 5.8999, - "step": 14563 - }, - { - "epoch": 7.595306388526727, - "grad_norm": 1.465919852256775, - "learning_rate": 8.586934673366835e-05, - "loss": 5.9348, - "step": 14564 - }, - { - "epoch": 7.595827900912647, - "grad_norm": 1.4737963676452637, - "learning_rate": 8.586834170854273e-05, - "loss": 5.6371, - "step": 14565 - }, - { - "epoch": 7.596349413298566, - "grad_norm": 1.4509172439575195, - "learning_rate": 8.586733668341709e-05, - "loss": 5.5305, - "step": 14566 - }, - { - "epoch": 7.596870925684485, - "grad_norm": 1.4933651685714722, - "learning_rate": 8.586633165829147e-05, - "loss": 5.4979, - "step": 14567 - }, - { - "epoch": 7.597392438070404, - "grad_norm": 1.4171520471572876, - "learning_rate": 8.586532663316583e-05, - "loss": 5.9455, - "step": 14568 - }, - { - "epoch": 7.597913950456324, - "grad_norm": 1.4872018098831177, - "learning_rate": 8.586432160804021e-05, - "loss": 5.6514, - "step": 14569 - }, - { - "epoch": 7.598435462842242, - "grad_norm": 1.4347187280654907, - "learning_rate": 8.586331658291457e-05, - "loss": 5.5563, - "step": 14570 - }, - { - "epoch": 7.598956975228162, - "grad_norm": 1.3686541318893433, - "learning_rate": 8.586231155778894e-05, - "loss": 5.4448, - "step": 14571 - }, - { - "epoch": 7.599478487614081, - "grad_norm": 1.5874465703964233, - "learning_rate": 8.586130653266332e-05, - "loss": 5.4713, - "step": 14572 - }, - { - "epoch": 7.6, - "grad_norm": 1.3978043794631958, - "learning_rate": 8.58603015075377e-05, - "loss": 5.678, - "step": 14573 - }, - { - "epoch": 7.600521512385919, - "grad_norm": 1.8229976892471313, - "learning_rate": 8.585929648241207e-05, - "loss": 5.5195, - "step": 14574 - }, - { - "epoch": 7.601043024771839, - "grad_norm": 1.413501262664795, - "learning_rate": 8.585829145728644e-05, - "loss": 5.7271, - "step": 14575 - }, - { - "epoch": 7.601564537157757, - "grad_norm": 1.420708179473877, - "learning_rate": 8.585728643216081e-05, - "loss": 5.3309, - "step": 14576 - }, - { - "epoch": 7.602086049543677, - "grad_norm": 1.5734912157058716, - "learning_rate": 8.585628140703518e-05, - "loss": 5.5062, - "step": 14577 - }, - { - "epoch": 7.602607561929596, - "grad_norm": 1.4779011011123657, - "learning_rate": 8.585527638190956e-05, - "loss": 5.5413, - "step": 14578 - }, - { - "epoch": 7.603129074315515, - "grad_norm": 1.727867603302002, - "learning_rate": 8.585427135678392e-05, - "loss": 5.3585, - "step": 14579 - }, - { - "epoch": 7.603650586701434, - "grad_norm": 1.494776964187622, - "learning_rate": 8.58532663316583e-05, - "loss": 5.5079, - "step": 14580 - }, - { - "epoch": 7.604172099087354, - "grad_norm": 1.4733668565750122, - "learning_rate": 8.585226130653266e-05, - "loss": 5.7606, - "step": 14581 - }, - { - "epoch": 7.604693611473272, - "grad_norm": 1.4775092601776123, - "learning_rate": 8.585125628140704e-05, - "loss": 5.0424, - "step": 14582 - }, - { - "epoch": 7.605215123859192, - "grad_norm": 1.3640263080596924, - "learning_rate": 8.58502512562814e-05, - "loss": 5.8514, - "step": 14583 - }, - { - "epoch": 7.605736636245111, - "grad_norm": 1.4299324750900269, - "learning_rate": 8.584924623115578e-05, - "loss": 5.5092, - "step": 14584 - }, - { - "epoch": 7.60625814863103, - "grad_norm": 1.3825961351394653, - "learning_rate": 8.584824120603016e-05, - "loss": 5.7907, - "step": 14585 - }, - { - "epoch": 7.606779661016949, - "grad_norm": 1.4047893285751343, - "learning_rate": 8.584723618090453e-05, - "loss": 5.266, - "step": 14586 - }, - { - "epoch": 7.607301173402869, - "grad_norm": 1.6981569528579712, - "learning_rate": 8.58462311557789e-05, - "loss": 5.5537, - "step": 14587 - }, - { - "epoch": 7.607822685788787, - "grad_norm": 1.4846986532211304, - "learning_rate": 8.584522613065327e-05, - "loss": 5.2103, - "step": 14588 - }, - { - "epoch": 7.608344198174707, - "grad_norm": 1.4243905544281006, - "learning_rate": 8.584422110552765e-05, - "loss": 5.3094, - "step": 14589 - }, - { - "epoch": 7.608865710560626, - "grad_norm": 1.459595799446106, - "learning_rate": 8.584321608040201e-05, - "loss": 5.7184, - "step": 14590 - }, - { - "epoch": 7.609387222946545, - "grad_norm": 1.4177128076553345, - "learning_rate": 8.584221105527639e-05, - "loss": 5.876, - "step": 14591 - }, - { - "epoch": 7.609908735332464, - "grad_norm": 1.4679104089736938, - "learning_rate": 8.584120603015075e-05, - "loss": 5.6221, - "step": 14592 - }, - { - "epoch": 7.610430247718384, - "grad_norm": 1.353390097618103, - "learning_rate": 8.584020100502513e-05, - "loss": 5.8467, - "step": 14593 - }, - { - "epoch": 7.610951760104302, - "grad_norm": 1.4778640270233154, - "learning_rate": 8.583919597989951e-05, - "loss": 5.1659, - "step": 14594 - }, - { - "epoch": 7.611473272490222, - "grad_norm": 1.3148934841156006, - "learning_rate": 8.583819095477388e-05, - "loss": 5.8654, - "step": 14595 - }, - { - "epoch": 7.611994784876141, - "grad_norm": 1.4188932180404663, - "learning_rate": 8.583718592964825e-05, - "loss": 5.2034, - "step": 14596 - }, - { - "epoch": 7.61251629726206, - "grad_norm": 1.5931352376937866, - "learning_rate": 8.583618090452261e-05, - "loss": 4.7301, - "step": 14597 - }, - { - "epoch": 7.613037809647979, - "grad_norm": 1.4532877206802368, - "learning_rate": 8.583517587939699e-05, - "loss": 5.9231, - "step": 14598 - }, - { - "epoch": 7.613559322033899, - "grad_norm": 1.4350519180297852, - "learning_rate": 8.583417085427136e-05, - "loss": 5.5897, - "step": 14599 - }, - { - "epoch": 7.614080834419817, - "grad_norm": 1.4797754287719727, - "learning_rate": 8.583316582914573e-05, - "loss": 5.016, - "step": 14600 - }, - { - "epoch": 7.614602346805737, - "grad_norm": 1.3961119651794434, - "learning_rate": 8.58321608040201e-05, - "loss": 5.5914, - "step": 14601 - }, - { - "epoch": 7.615123859191656, - "grad_norm": 1.3779646158218384, - "learning_rate": 8.583115577889448e-05, - "loss": 5.4804, - "step": 14602 - }, - { - "epoch": 7.615645371577575, - "grad_norm": 1.3455175161361694, - "learning_rate": 8.583015075376884e-05, - "loss": 5.4765, - "step": 14603 - }, - { - "epoch": 7.616166883963494, - "grad_norm": 1.3976656198501587, - "learning_rate": 8.582914572864322e-05, - "loss": 5.1494, - "step": 14604 - }, - { - "epoch": 7.616688396349414, - "grad_norm": 1.3625767230987549, - "learning_rate": 8.58281407035176e-05, - "loss": 5.6284, - "step": 14605 - }, - { - "epoch": 7.617209908735332, - "grad_norm": 1.4791330099105835, - "learning_rate": 8.582713567839197e-05, - "loss": 5.4715, - "step": 14606 - }, - { - "epoch": 7.617731421121252, - "grad_norm": 1.4508873224258423, - "learning_rate": 8.582613065326634e-05, - "loss": 5.4205, - "step": 14607 - }, - { - "epoch": 7.618252933507171, - "grad_norm": 1.375443935394287, - "learning_rate": 8.582512562814072e-05, - "loss": 5.6043, - "step": 14608 - }, - { - "epoch": 7.61877444589309, - "grad_norm": 1.3604334592819214, - "learning_rate": 8.582412060301508e-05, - "loss": 5.3671, - "step": 14609 - }, - { - "epoch": 7.619295958279009, - "grad_norm": 1.6541023254394531, - "learning_rate": 8.582311557788946e-05, - "loss": 5.0596, - "step": 14610 - }, - { - "epoch": 7.619817470664929, - "grad_norm": 1.3720474243164062, - "learning_rate": 8.582211055276382e-05, - "loss": 5.2987, - "step": 14611 - }, - { - "epoch": 7.620338983050847, - "grad_norm": 1.4350706338882446, - "learning_rate": 8.582110552763819e-05, - "loss": 5.6824, - "step": 14612 - }, - { - "epoch": 7.620860495436767, - "grad_norm": 1.3727245330810547, - "learning_rate": 8.582010050251256e-05, - "loss": 5.4298, - "step": 14613 - }, - { - "epoch": 7.621382007822686, - "grad_norm": 1.4559354782104492, - "learning_rate": 8.581909547738693e-05, - "loss": 5.5784, - "step": 14614 - }, - { - "epoch": 7.621903520208605, - "grad_norm": 1.4342036247253418, - "learning_rate": 8.58180904522613e-05, - "loss": 5.5907, - "step": 14615 - }, - { - "epoch": 7.622425032594524, - "grad_norm": 1.3768900632858276, - "learning_rate": 8.581708542713568e-05, - "loss": 5.6485, - "step": 14616 - }, - { - "epoch": 7.622946544980444, - "grad_norm": 1.4285547733306885, - "learning_rate": 8.581608040201006e-05, - "loss": 5.7578, - "step": 14617 - }, - { - "epoch": 7.623468057366362, - "grad_norm": 1.675321340560913, - "learning_rate": 8.581507537688443e-05, - "loss": 5.3942, - "step": 14618 - }, - { - "epoch": 7.623989569752282, - "grad_norm": 1.5953563451766968, - "learning_rate": 8.58140703517588e-05, - "loss": 4.6846, - "step": 14619 - }, - { - "epoch": 7.624511082138201, - "grad_norm": 1.4773523807525635, - "learning_rate": 8.581306532663317e-05, - "loss": 5.4585, - "step": 14620 - }, - { - "epoch": 7.62503259452412, - "grad_norm": 1.5057294368743896, - "learning_rate": 8.581206030150755e-05, - "loss": 5.4498, - "step": 14621 - }, - { - "epoch": 7.625554106910039, - "grad_norm": 1.4820457696914673, - "learning_rate": 8.581105527638191e-05, - "loss": 5.6534, - "step": 14622 - }, - { - "epoch": 7.626075619295959, - "grad_norm": 1.4526172876358032, - "learning_rate": 8.581005025125629e-05, - "loss": 5.552, - "step": 14623 - }, - { - "epoch": 7.626597131681877, - "grad_norm": 1.4059566259384155, - "learning_rate": 8.580904522613065e-05, - "loss": 5.421, - "step": 14624 - }, - { - "epoch": 7.627118644067797, - "grad_norm": 1.3826589584350586, - "learning_rate": 8.580804020100503e-05, - "loss": 5.772, - "step": 14625 - }, - { - "epoch": 7.627640156453715, - "grad_norm": 1.397700548171997, - "learning_rate": 8.580703517587941e-05, - "loss": 5.7374, - "step": 14626 - }, - { - "epoch": 7.628161668839635, - "grad_norm": 1.5908472537994385, - "learning_rate": 8.580603015075377e-05, - "loss": 5.1126, - "step": 14627 - }, - { - "epoch": 7.628683181225554, - "grad_norm": 1.555870771408081, - "learning_rate": 8.580502512562815e-05, - "loss": 5.3093, - "step": 14628 - }, - { - "epoch": 7.629204693611474, - "grad_norm": 1.5971022844314575, - "learning_rate": 8.580402010050251e-05, - "loss": 5.5678, - "step": 14629 - }, - { - "epoch": 7.629726205997392, - "grad_norm": 1.4287526607513428, - "learning_rate": 8.580301507537689e-05, - "loss": 5.5924, - "step": 14630 - }, - { - "epoch": 7.630247718383312, - "grad_norm": 1.3843683004379272, - "learning_rate": 8.580201005025126e-05, - "loss": 5.6856, - "step": 14631 - }, - { - "epoch": 7.63076923076923, - "grad_norm": 1.3418704271316528, - "learning_rate": 8.580100502512563e-05, - "loss": 5.2955, - "step": 14632 - }, - { - "epoch": 7.63129074315515, - "grad_norm": 1.382317066192627, - "learning_rate": 8.58e-05, - "loss": 5.4985, - "step": 14633 - }, - { - "epoch": 7.631812255541069, - "grad_norm": 1.432120680809021, - "learning_rate": 8.579899497487438e-05, - "loss": 5.3769, - "step": 14634 - }, - { - "epoch": 7.632333767926989, - "grad_norm": 1.3869717121124268, - "learning_rate": 8.579798994974874e-05, - "loss": 5.8852, - "step": 14635 - }, - { - "epoch": 7.632855280312907, - "grad_norm": 1.2733527421951294, - "learning_rate": 8.579698492462312e-05, - "loss": 5.9152, - "step": 14636 - }, - { - "epoch": 7.633376792698827, - "grad_norm": 1.4165894985198975, - "learning_rate": 8.57959798994975e-05, - "loss": 5.8174, - "step": 14637 - }, - { - "epoch": 7.633898305084745, - "grad_norm": 1.4351458549499512, - "learning_rate": 8.579497487437186e-05, - "loss": 5.8018, - "step": 14638 - }, - { - "epoch": 7.634419817470665, - "grad_norm": 1.3981125354766846, - "learning_rate": 8.579396984924624e-05, - "loss": 5.7204, - "step": 14639 - }, - { - "epoch": 7.634941329856584, - "grad_norm": 1.5012327432632446, - "learning_rate": 8.57929648241206e-05, - "loss": 5.7069, - "step": 14640 - }, - { - "epoch": 7.635462842242504, - "grad_norm": 1.5171332359313965, - "learning_rate": 8.579195979899498e-05, - "loss": 5.3609, - "step": 14641 - }, - { - "epoch": 7.635984354628422, - "grad_norm": 1.4870562553405762, - "learning_rate": 8.579095477386934e-05, - "loss": 5.685, - "step": 14642 - }, - { - "epoch": 7.636505867014342, - "grad_norm": 1.4352293014526367, - "learning_rate": 8.578994974874372e-05, - "loss": 5.6263, - "step": 14643 - }, - { - "epoch": 7.63702737940026, - "grad_norm": 1.4950265884399414, - "learning_rate": 8.578894472361809e-05, - "loss": 5.5199, - "step": 14644 - }, - { - "epoch": 7.63754889178618, - "grad_norm": 1.4033253192901611, - "learning_rate": 8.578793969849246e-05, - "loss": 5.3981, - "step": 14645 - }, - { - "epoch": 7.638070404172099, - "grad_norm": 1.4909754991531372, - "learning_rate": 8.578693467336684e-05, - "loss": 5.5713, - "step": 14646 - }, - { - "epoch": 7.638591916558019, - "grad_norm": 1.42000150680542, - "learning_rate": 8.578592964824122e-05, - "loss": 5.6759, - "step": 14647 - }, - { - "epoch": 7.639113428943937, - "grad_norm": 1.503796100616455, - "learning_rate": 8.578492462311558e-05, - "loss": 5.5221, - "step": 14648 - }, - { - "epoch": 7.639634941329857, - "grad_norm": 1.4921307563781738, - "learning_rate": 8.578391959798996e-05, - "loss": 5.2454, - "step": 14649 - }, - { - "epoch": 7.640156453715775, - "grad_norm": 1.5497536659240723, - "learning_rate": 8.578291457286433e-05, - "loss": 5.7581, - "step": 14650 - }, - { - "epoch": 7.640677966101695, - "grad_norm": 1.3489325046539307, - "learning_rate": 8.578190954773869e-05, - "loss": 5.5266, - "step": 14651 - }, - { - "epoch": 7.641199478487614, - "grad_norm": 1.3150960206985474, - "learning_rate": 8.578090452261307e-05, - "loss": 5.7612, - "step": 14652 - }, - { - "epoch": 7.641720990873534, - "grad_norm": 1.3955278396606445, - "learning_rate": 8.577989949748743e-05, - "loss": 5.5386, - "step": 14653 - }, - { - "epoch": 7.642242503259452, - "grad_norm": 1.4535070657730103, - "learning_rate": 8.577889447236181e-05, - "loss": 5.7066, - "step": 14654 - }, - { - "epoch": 7.642764015645372, - "grad_norm": 1.4087462425231934, - "learning_rate": 8.577788944723618e-05, - "loss": 5.8026, - "step": 14655 - }, - { - "epoch": 7.64328552803129, - "grad_norm": 1.3564918041229248, - "learning_rate": 8.577688442211055e-05, - "loss": 5.2437, - "step": 14656 - }, - { - "epoch": 7.64380704041721, - "grad_norm": 1.5357944965362549, - "learning_rate": 8.577587939698493e-05, - "loss": 5.0253, - "step": 14657 - }, - { - "epoch": 7.644328552803129, - "grad_norm": 1.5106899738311768, - "learning_rate": 8.577487437185931e-05, - "loss": 5.565, - "step": 14658 - }, - { - "epoch": 7.644850065189048, - "grad_norm": 1.4094544649124146, - "learning_rate": 8.577386934673367e-05, - "loss": 5.2372, - "step": 14659 - }, - { - "epoch": 7.645371577574967, - "grad_norm": 1.3909111022949219, - "learning_rate": 8.577286432160805e-05, - "loss": 5.9288, - "step": 14660 - }, - { - "epoch": 7.645893089960887, - "grad_norm": 1.3765878677368164, - "learning_rate": 8.577185929648241e-05, - "loss": 5.8483, - "step": 14661 - }, - { - "epoch": 7.646414602346805, - "grad_norm": 1.544650912284851, - "learning_rate": 8.577085427135679e-05, - "loss": 5.2543, - "step": 14662 - }, - { - "epoch": 7.646936114732725, - "grad_norm": 1.6233195066452026, - "learning_rate": 8.576984924623116e-05, - "loss": 5.7661, - "step": 14663 - }, - { - "epoch": 7.647457627118644, - "grad_norm": 1.4827147722244263, - "learning_rate": 8.576884422110552e-05, - "loss": 5.5571, - "step": 14664 - }, - { - "epoch": 7.647979139504563, - "grad_norm": 1.4367491006851196, - "learning_rate": 8.57678391959799e-05, - "loss": 5.6647, - "step": 14665 - }, - { - "epoch": 7.648500651890482, - "grad_norm": 1.475691318511963, - "learning_rate": 8.576683417085428e-05, - "loss": 5.2522, - "step": 14666 - }, - { - "epoch": 7.649022164276402, - "grad_norm": 1.486930012702942, - "learning_rate": 8.576582914572865e-05, - "loss": 5.5879, - "step": 14667 - }, - { - "epoch": 7.64954367666232, - "grad_norm": 1.3296513557434082, - "learning_rate": 8.576482412060302e-05, - "loss": 5.947, - "step": 14668 - }, - { - "epoch": 7.65006518904824, - "grad_norm": 1.3885064125061035, - "learning_rate": 8.57638190954774e-05, - "loss": 5.8143, - "step": 14669 - }, - { - "epoch": 7.650586701434159, - "grad_norm": 1.4947150945663452, - "learning_rate": 8.576281407035176e-05, - "loss": 5.7642, - "step": 14670 - }, - { - "epoch": 7.651108213820078, - "grad_norm": 1.5866930484771729, - "learning_rate": 8.576180904522614e-05, - "loss": 5.2412, - "step": 14671 - }, - { - "epoch": 7.651629726205997, - "grad_norm": 1.5581918954849243, - "learning_rate": 8.57608040201005e-05, - "loss": 4.9765, - "step": 14672 - }, - { - "epoch": 7.652151238591917, - "grad_norm": 1.4363638162612915, - "learning_rate": 8.575979899497488e-05, - "loss": 5.7409, - "step": 14673 - }, - { - "epoch": 7.652672750977835, - "grad_norm": 1.4252647161483765, - "learning_rate": 8.575879396984925e-05, - "loss": 5.6977, - "step": 14674 - }, - { - "epoch": 7.653194263363755, - "grad_norm": 1.4991096258163452, - "learning_rate": 8.575778894472362e-05, - "loss": 6.0273, - "step": 14675 - }, - { - "epoch": 7.653715775749674, - "grad_norm": 1.421834111213684, - "learning_rate": 8.575678391959799e-05, - "loss": 5.0179, - "step": 14676 - }, - { - "epoch": 7.654237288135593, - "grad_norm": 1.4294553995132446, - "learning_rate": 8.575577889447237e-05, - "loss": 5.4072, - "step": 14677 - }, - { - "epoch": 7.654758800521512, - "grad_norm": 1.4207764863967896, - "learning_rate": 8.575477386934674e-05, - "loss": 5.2973, - "step": 14678 - }, - { - "epoch": 7.655280312907432, - "grad_norm": 1.3687636852264404, - "learning_rate": 8.575376884422111e-05, - "loss": 5.4252, - "step": 14679 - }, - { - "epoch": 7.65580182529335, - "grad_norm": 1.4813637733459473, - "learning_rate": 8.575276381909549e-05, - "loss": 5.8017, - "step": 14680 - }, - { - "epoch": 7.65632333767927, - "grad_norm": 1.5380475521087646, - "learning_rate": 8.575175879396985e-05, - "loss": 5.5064, - "step": 14681 - }, - { - "epoch": 7.656844850065189, - "grad_norm": 1.491517424583435, - "learning_rate": 8.575075376884423e-05, - "loss": 5.3233, - "step": 14682 - }, - { - "epoch": 7.657366362451108, - "grad_norm": 1.4311333894729614, - "learning_rate": 8.574974874371859e-05, - "loss": 5.3413, - "step": 14683 - }, - { - "epoch": 7.657887874837027, - "grad_norm": 1.3998380899429321, - "learning_rate": 8.574874371859297e-05, - "loss": 5.5189, - "step": 14684 - }, - { - "epoch": 7.658409387222947, - "grad_norm": 1.292834997177124, - "learning_rate": 8.574773869346733e-05, - "loss": 5.8297, - "step": 14685 - }, - { - "epoch": 7.658930899608865, - "grad_norm": 1.4793167114257812, - "learning_rate": 8.574673366834171e-05, - "loss": 6.0114, - "step": 14686 - }, - { - "epoch": 7.659452411994785, - "grad_norm": 1.5299097299575806, - "learning_rate": 8.574572864321609e-05, - "loss": 5.881, - "step": 14687 - }, - { - "epoch": 7.659973924380704, - "grad_norm": 1.321669340133667, - "learning_rate": 8.574472361809047e-05, - "loss": 5.3794, - "step": 14688 - }, - { - "epoch": 7.660495436766623, - "grad_norm": 1.3716124296188354, - "learning_rate": 8.574371859296483e-05, - "loss": 5.6012, - "step": 14689 - }, - { - "epoch": 7.661016949152542, - "grad_norm": 1.4647706747055054, - "learning_rate": 8.57427135678392e-05, - "loss": 5.5811, - "step": 14690 - }, - { - "epoch": 7.661538461538462, - "grad_norm": 1.3908463716506958, - "learning_rate": 8.574170854271357e-05, - "loss": 5.8113, - "step": 14691 - }, - { - "epoch": 7.66205997392438, - "grad_norm": 1.5413529872894287, - "learning_rate": 8.574070351758794e-05, - "loss": 5.652, - "step": 14692 - }, - { - "epoch": 7.6625814863103, - "grad_norm": 1.4689393043518066, - "learning_rate": 8.573969849246232e-05, - "loss": 5.8838, - "step": 14693 - }, - { - "epoch": 7.663102998696219, - "grad_norm": 1.5362683534622192, - "learning_rate": 8.573869346733668e-05, - "loss": 4.8633, - "step": 14694 - }, - { - "epoch": 7.663624511082138, - "grad_norm": 1.3247417211532593, - "learning_rate": 8.573768844221106e-05, - "loss": 5.7853, - "step": 14695 - }, - { - "epoch": 7.664146023468057, - "grad_norm": 1.548058271408081, - "learning_rate": 8.573668341708542e-05, - "loss": 4.7817, - "step": 14696 - }, - { - "epoch": 7.664667535853977, - "grad_norm": 1.520589828491211, - "learning_rate": 8.57356783919598e-05, - "loss": 5.7826, - "step": 14697 - }, - { - "epoch": 7.665189048239895, - "grad_norm": 1.7015143632888794, - "learning_rate": 8.573467336683418e-05, - "loss": 5.5699, - "step": 14698 - }, - { - "epoch": 7.665710560625815, - "grad_norm": 1.4458372592926025, - "learning_rate": 8.573366834170856e-05, - "loss": 5.3857, - "step": 14699 - }, - { - "epoch": 7.666232073011734, - "grad_norm": 1.6074435710906982, - "learning_rate": 8.573266331658292e-05, - "loss": 5.8484, - "step": 14700 - }, - { - "epoch": 7.666753585397653, - "grad_norm": 1.624144434928894, - "learning_rate": 8.57316582914573e-05, - "loss": 5.8928, - "step": 14701 - }, - { - "epoch": 7.667275097783572, - "grad_norm": 1.5539202690124512, - "learning_rate": 8.573065326633166e-05, - "loss": 5.3345, - "step": 14702 - }, - { - "epoch": 7.667796610169492, - "grad_norm": 1.6379830837249756, - "learning_rate": 8.572964824120604e-05, - "loss": 5.0391, - "step": 14703 - }, - { - "epoch": 7.6683181225554105, - "grad_norm": 1.4335107803344727, - "learning_rate": 8.57286432160804e-05, - "loss": 5.6575, - "step": 14704 - }, - { - "epoch": 7.66883963494133, - "grad_norm": 1.4010604619979858, - "learning_rate": 8.572763819095477e-05, - "loss": 5.3618, - "step": 14705 - }, - { - "epoch": 7.669361147327249, - "grad_norm": 1.437680721282959, - "learning_rate": 8.572663316582915e-05, - "loss": 5.0952, - "step": 14706 - }, - { - "epoch": 7.669882659713168, - "grad_norm": 1.45354163646698, - "learning_rate": 8.572562814070352e-05, - "loss": 5.221, - "step": 14707 - }, - { - "epoch": 7.670404172099087, - "grad_norm": 1.4506324529647827, - "learning_rate": 8.57246231155779e-05, - "loss": 5.2993, - "step": 14708 - }, - { - "epoch": 7.670925684485007, - "grad_norm": 1.4666541814804077, - "learning_rate": 8.572361809045227e-05, - "loss": 5.5502, - "step": 14709 - }, - { - "epoch": 7.6714471968709255, - "grad_norm": 1.4368038177490234, - "learning_rate": 8.572261306532664e-05, - "loss": 5.347, - "step": 14710 - }, - { - "epoch": 7.671968709256845, - "grad_norm": 1.4859592914581299, - "learning_rate": 8.572160804020101e-05, - "loss": 5.4601, - "step": 14711 - }, - { - "epoch": 7.672490221642764, - "grad_norm": 1.5199806690216064, - "learning_rate": 8.572060301507539e-05, - "loss": 5.7809, - "step": 14712 - }, - { - "epoch": 7.673011734028683, - "grad_norm": 1.3522474765777588, - "learning_rate": 8.571959798994975e-05, - "loss": 5.8287, - "step": 14713 - }, - { - "epoch": 7.673533246414602, - "grad_norm": 1.5956192016601562, - "learning_rate": 8.571859296482413e-05, - "loss": 5.4111, - "step": 14714 - }, - { - "epoch": 7.674054758800521, - "grad_norm": 1.495283603668213, - "learning_rate": 8.571758793969849e-05, - "loss": 5.7645, - "step": 14715 - }, - { - "epoch": 7.6745762711864405, - "grad_norm": 1.3905409574508667, - "learning_rate": 8.571658291457287e-05, - "loss": 5.7638, - "step": 14716 - }, - { - "epoch": 7.67509778357236, - "grad_norm": 1.4557733535766602, - "learning_rate": 8.571557788944723e-05, - "loss": 5.4745, - "step": 14717 - }, - { - "epoch": 7.675619295958279, - "grad_norm": 1.392626404762268, - "learning_rate": 8.571457286432161e-05, - "loss": 5.4911, - "step": 14718 - }, - { - "epoch": 7.676140808344198, - "grad_norm": 1.3730162382125854, - "learning_rate": 8.571356783919599e-05, - "loss": 5.8842, - "step": 14719 - }, - { - "epoch": 7.6766623207301175, - "grad_norm": 1.467215895652771, - "learning_rate": 8.571256281407035e-05, - "loss": 5.5235, - "step": 14720 - }, - { - "epoch": 7.677183833116036, - "grad_norm": 1.4114973545074463, - "learning_rate": 8.571155778894473e-05, - "loss": 5.7143, - "step": 14721 - }, - { - "epoch": 7.6777053455019555, - "grad_norm": 1.3557264804840088, - "learning_rate": 8.57105527638191e-05, - "loss": 5.5797, - "step": 14722 - }, - { - "epoch": 7.678226857887875, - "grad_norm": 1.39503812789917, - "learning_rate": 8.570954773869347e-05, - "loss": 5.5436, - "step": 14723 - }, - { - "epoch": 7.678748370273794, - "grad_norm": 1.5187641382217407, - "learning_rate": 8.570854271356784e-05, - "loss": 5.6563, - "step": 14724 - }, - { - "epoch": 7.679269882659713, - "grad_norm": 1.4089620113372803, - "learning_rate": 8.570753768844222e-05, - "loss": 5.5209, - "step": 14725 - }, - { - "epoch": 7.6797913950456325, - "grad_norm": 1.508353352546692, - "learning_rate": 8.570653266331658e-05, - "loss": 5.2482, - "step": 14726 - }, - { - "epoch": 7.680312907431551, - "grad_norm": 1.4408546686172485, - "learning_rate": 8.570552763819096e-05, - "loss": 6.01, - "step": 14727 - }, - { - "epoch": 7.6808344198174705, - "grad_norm": 1.4495387077331543, - "learning_rate": 8.570452261306534e-05, - "loss": 5.5215, - "step": 14728 - }, - { - "epoch": 7.68135593220339, - "grad_norm": 1.3931432962417603, - "learning_rate": 8.570351758793971e-05, - "loss": 5.3699, - "step": 14729 - }, - { - "epoch": 7.681877444589309, - "grad_norm": 1.4001067876815796, - "learning_rate": 8.570251256281408e-05, - "loss": 5.0847, - "step": 14730 - }, - { - "epoch": 7.682398956975228, - "grad_norm": 1.3747936487197876, - "learning_rate": 8.570150753768844e-05, - "loss": 5.6811, - "step": 14731 - }, - { - "epoch": 7.6829204693611475, - "grad_norm": 1.3512450456619263, - "learning_rate": 8.570050251256282e-05, - "loss": 5.6691, - "step": 14732 - }, - { - "epoch": 7.683441981747066, - "grad_norm": 1.45374596118927, - "learning_rate": 8.569949748743718e-05, - "loss": 5.0967, - "step": 14733 - }, - { - "epoch": 7.6839634941329855, - "grad_norm": 1.5682382583618164, - "learning_rate": 8.569849246231156e-05, - "loss": 5.071, - "step": 14734 - }, - { - "epoch": 7.684485006518905, - "grad_norm": 1.4758872985839844, - "learning_rate": 8.569748743718593e-05, - "loss": 5.8897, - "step": 14735 - }, - { - "epoch": 7.6850065189048244, - "grad_norm": 1.4261268377304077, - "learning_rate": 8.56964824120603e-05, - "loss": 5.4662, - "step": 14736 - }, - { - "epoch": 7.685528031290743, - "grad_norm": 1.362537145614624, - "learning_rate": 8.569547738693467e-05, - "loss": 5.7164, - "step": 14737 - }, - { - "epoch": 7.6860495436766625, - "grad_norm": 1.335270643234253, - "learning_rate": 8.569447236180905e-05, - "loss": 5.755, - "step": 14738 - }, - { - "epoch": 7.686571056062581, - "grad_norm": 1.4678642749786377, - "learning_rate": 8.569346733668342e-05, - "loss": 5.8006, - "step": 14739 - }, - { - "epoch": 7.6870925684485005, - "grad_norm": 1.2940086126327515, - "learning_rate": 8.56924623115578e-05, - "loss": 6.0066, - "step": 14740 - }, - { - "epoch": 7.68761408083442, - "grad_norm": 1.4905263185501099, - "learning_rate": 8.569145728643217e-05, - "loss": 5.6827, - "step": 14741 - }, - { - "epoch": 7.6881355932203395, - "grad_norm": 1.500954031944275, - "learning_rate": 8.569045226130654e-05, - "loss": 5.2072, - "step": 14742 - }, - { - "epoch": 7.688657105606258, - "grad_norm": 1.471570372581482, - "learning_rate": 8.568944723618091e-05, - "loss": 5.8793, - "step": 14743 - }, - { - "epoch": 7.6891786179921775, - "grad_norm": 1.6480016708374023, - "learning_rate": 8.568844221105527e-05, - "loss": 5.3504, - "step": 14744 - }, - { - "epoch": 7.689700130378096, - "grad_norm": 1.4533756971359253, - "learning_rate": 8.568743718592965e-05, - "loss": 5.3577, - "step": 14745 - }, - { - "epoch": 7.6902216427640155, - "grad_norm": 1.4899380207061768, - "learning_rate": 8.568643216080402e-05, - "loss": 5.2142, - "step": 14746 - }, - { - "epoch": 7.690743155149935, - "grad_norm": 1.3690046072006226, - "learning_rate": 8.568542713567839e-05, - "loss": 5.5417, - "step": 14747 - }, - { - "epoch": 7.6912646675358545, - "grad_norm": 1.3653653860092163, - "learning_rate": 8.568442211055277e-05, - "loss": 5.3554, - "step": 14748 - }, - { - "epoch": 7.691786179921773, - "grad_norm": 1.3265948295593262, - "learning_rate": 8.568341708542715e-05, - "loss": 5.7507, - "step": 14749 - }, - { - "epoch": 7.6923076923076925, - "grad_norm": 1.4186019897460938, - "learning_rate": 8.568241206030151e-05, - "loss": 5.7338, - "step": 14750 - }, - { - "epoch": 7.692829204693611, - "grad_norm": 1.8793933391571045, - "learning_rate": 8.568140703517589e-05, - "loss": 4.9948, - "step": 14751 - }, - { - "epoch": 7.6933507170795306, - "grad_norm": 1.34813392162323, - "learning_rate": 8.568040201005026e-05, - "loss": 5.4168, - "step": 14752 - }, - { - "epoch": 7.69387222946545, - "grad_norm": 1.4172838926315308, - "learning_rate": 8.567939698492463e-05, - "loss": 5.6237, - "step": 14753 - }, - { - "epoch": 7.694393741851369, - "grad_norm": 1.3378936052322388, - "learning_rate": 8.5678391959799e-05, - "loss": 5.5808, - "step": 14754 - }, - { - "epoch": 7.694915254237288, - "grad_norm": 1.472091794013977, - "learning_rate": 8.567738693467338e-05, - "loss": 5.2642, - "step": 14755 - }, - { - "epoch": 7.6954367666232075, - "grad_norm": 1.5203274488449097, - "learning_rate": 8.567638190954774e-05, - "loss": 5.643, - "step": 14756 - }, - { - "epoch": 7.695958279009126, - "grad_norm": 1.6018379926681519, - "learning_rate": 8.56753768844221e-05, - "loss": 4.9829, - "step": 14757 - }, - { - "epoch": 7.696479791395046, - "grad_norm": 1.3405405282974243, - "learning_rate": 8.567437185929648e-05, - "loss": 5.711, - "step": 14758 - }, - { - "epoch": 7.697001303780965, - "grad_norm": 1.4114539623260498, - "learning_rate": 8.567336683417086e-05, - "loss": 5.7309, - "step": 14759 - }, - { - "epoch": 7.697522816166884, - "grad_norm": 1.4446299076080322, - "learning_rate": 8.567236180904524e-05, - "loss": 5.6742, - "step": 14760 - }, - { - "epoch": 7.698044328552803, - "grad_norm": 1.5071051120758057, - "learning_rate": 8.56713567839196e-05, - "loss": 5.8511, - "step": 14761 - }, - { - "epoch": 7.6985658409387225, - "grad_norm": 1.4111144542694092, - "learning_rate": 8.567035175879398e-05, - "loss": 5.9173, - "step": 14762 - }, - { - "epoch": 7.699087353324641, - "grad_norm": 1.6788824796676636, - "learning_rate": 8.566934673366834e-05, - "loss": 5.2343, - "step": 14763 - }, - { - "epoch": 7.699608865710561, - "grad_norm": 1.4079809188842773, - "learning_rate": 8.566834170854272e-05, - "loss": 5.7582, - "step": 14764 - }, - { - "epoch": 7.70013037809648, - "grad_norm": 1.4450013637542725, - "learning_rate": 8.566733668341709e-05, - "loss": 5.3325, - "step": 14765 - }, - { - "epoch": 7.700651890482399, - "grad_norm": 1.4873801469802856, - "learning_rate": 8.566633165829146e-05, - "loss": 5.4086, - "step": 14766 - }, - { - "epoch": 7.701173402868318, - "grad_norm": 1.3587675094604492, - "learning_rate": 8.566532663316583e-05, - "loss": 5.6883, - "step": 14767 - }, - { - "epoch": 7.7016949152542376, - "grad_norm": 1.3025877475738525, - "learning_rate": 8.56643216080402e-05, - "loss": 5.8786, - "step": 14768 - }, - { - "epoch": 7.702216427640156, - "grad_norm": 1.4709336757659912, - "learning_rate": 8.566331658291458e-05, - "loss": 5.6206, - "step": 14769 - }, - { - "epoch": 7.702737940026076, - "grad_norm": 1.413230299949646, - "learning_rate": 8.566231155778895e-05, - "loss": 5.6284, - "step": 14770 - }, - { - "epoch": 7.703259452411995, - "grad_norm": 1.6142833232879639, - "learning_rate": 8.566130653266333e-05, - "loss": 5.1408, - "step": 14771 - }, - { - "epoch": 7.703780964797914, - "grad_norm": 1.6423684358596802, - "learning_rate": 8.566030150753769e-05, - "loss": 5.6306, - "step": 14772 - }, - { - "epoch": 7.704302477183833, - "grad_norm": 1.535393476486206, - "learning_rate": 8.565929648241207e-05, - "loss": 5.4607, - "step": 14773 - }, - { - "epoch": 7.704823989569753, - "grad_norm": 1.5291744470596313, - "learning_rate": 8.565829145728643e-05, - "loss": 5.6793, - "step": 14774 - }, - { - "epoch": 7.705345501955671, - "grad_norm": 1.39779794216156, - "learning_rate": 8.565728643216081e-05, - "loss": 5.811, - "step": 14775 - }, - { - "epoch": 7.705867014341591, - "grad_norm": 1.5096255540847778, - "learning_rate": 8.565628140703517e-05, - "loss": 5.6343, - "step": 14776 - }, - { - "epoch": 7.70638852672751, - "grad_norm": 1.4525707960128784, - "learning_rate": 8.565527638190955e-05, - "loss": 5.6238, - "step": 14777 - }, - { - "epoch": 7.706910039113429, - "grad_norm": 1.4152907133102417, - "learning_rate": 8.565427135678392e-05, - "loss": 5.1513, - "step": 14778 - }, - { - "epoch": 7.707431551499348, - "grad_norm": 1.3965309858322144, - "learning_rate": 8.56532663316583e-05, - "loss": 5.8588, - "step": 14779 - }, - { - "epoch": 7.707953063885268, - "grad_norm": 1.5874367952346802, - "learning_rate": 8.565226130653267e-05, - "loss": 5.3228, - "step": 14780 - }, - { - "epoch": 7.708474576271186, - "grad_norm": 1.5047407150268555, - "learning_rate": 8.565125628140705e-05, - "loss": 4.8841, - "step": 14781 - }, - { - "epoch": 7.708996088657106, - "grad_norm": 1.4959686994552612, - "learning_rate": 8.565025125628141e-05, - "loss": 5.604, - "step": 14782 - }, - { - "epoch": 7.709517601043025, - "grad_norm": 1.529354453086853, - "learning_rate": 8.564924623115578e-05, - "loss": 5.3697, - "step": 14783 - }, - { - "epoch": 7.710039113428944, - "grad_norm": 1.5478074550628662, - "learning_rate": 8.564824120603016e-05, - "loss": 5.3552, - "step": 14784 - }, - { - "epoch": 7.710560625814863, - "grad_norm": 1.5828460454940796, - "learning_rate": 8.564723618090452e-05, - "loss": 5.9121, - "step": 14785 - }, - { - "epoch": 7.711082138200783, - "grad_norm": 1.5950356721878052, - "learning_rate": 8.56462311557789e-05, - "loss": 5.6399, - "step": 14786 - }, - { - "epoch": 7.711603650586701, - "grad_norm": 1.4942759275436401, - "learning_rate": 8.564522613065326e-05, - "loss": 5.7578, - "step": 14787 - }, - { - "epoch": 7.712125162972621, - "grad_norm": 1.3950265645980835, - "learning_rate": 8.564422110552764e-05, - "loss": 5.5435, - "step": 14788 - }, - { - "epoch": 7.71264667535854, - "grad_norm": 1.3896054029464722, - "learning_rate": 8.5643216080402e-05, - "loss": 5.8613, - "step": 14789 - }, - { - "epoch": 7.713168187744459, - "grad_norm": 1.562682032585144, - "learning_rate": 8.564221105527638e-05, - "loss": 5.1252, - "step": 14790 - }, - { - "epoch": 7.713689700130378, - "grad_norm": 1.450722575187683, - "learning_rate": 8.564120603015076e-05, - "loss": 5.37, - "step": 14791 - }, - { - "epoch": 7.714211212516298, - "grad_norm": 1.90537691116333, - "learning_rate": 8.564020100502514e-05, - "loss": 5.0483, - "step": 14792 - }, - { - "epoch": 7.714732724902216, - "grad_norm": 1.4214389324188232, - "learning_rate": 8.56391959798995e-05, - "loss": 4.7918, - "step": 14793 - }, - { - "epoch": 7.715254237288136, - "grad_norm": 1.469675898551941, - "learning_rate": 8.563819095477388e-05, - "loss": 5.4333, - "step": 14794 - }, - { - "epoch": 7.715775749674055, - "grad_norm": 1.411699891090393, - "learning_rate": 8.563718592964824e-05, - "loss": 5.4798, - "step": 14795 - }, - { - "epoch": 7.716297262059974, - "grad_norm": 1.6021950244903564, - "learning_rate": 8.563618090452262e-05, - "loss": 5.5775, - "step": 14796 - }, - { - "epoch": 7.716818774445893, - "grad_norm": 1.5753060579299927, - "learning_rate": 8.563517587939699e-05, - "loss": 4.8378, - "step": 14797 - }, - { - "epoch": 7.717340286831813, - "grad_norm": 1.5072102546691895, - "learning_rate": 8.563417085427135e-05, - "loss": 5.5738, - "step": 14798 - }, - { - "epoch": 7.717861799217731, - "grad_norm": 1.3707101345062256, - "learning_rate": 8.563316582914573e-05, - "loss": 5.7431, - "step": 14799 - }, - { - "epoch": 7.718383311603651, - "grad_norm": 1.4262690544128418, - "learning_rate": 8.56321608040201e-05, - "loss": 5.663, - "step": 14800 - }, - { - "epoch": 7.71890482398957, - "grad_norm": 1.348139762878418, - "learning_rate": 8.563115577889448e-05, - "loss": 5.1651, - "step": 14801 - }, - { - "epoch": 7.719426336375489, - "grad_norm": 1.3446165323257446, - "learning_rate": 8.563015075376885e-05, - "loss": 5.5771, - "step": 14802 - }, - { - "epoch": 7.719947848761408, - "grad_norm": 1.4711182117462158, - "learning_rate": 8.562914572864323e-05, - "loss": 5.6746, - "step": 14803 - }, - { - "epoch": 7.720469361147328, - "grad_norm": 1.3799740076065063, - "learning_rate": 8.562814070351759e-05, - "loss": 5.8473, - "step": 14804 - }, - { - "epoch": 7.720990873533246, - "grad_norm": 1.3516179323196411, - "learning_rate": 8.562713567839197e-05, - "loss": 5.6321, - "step": 14805 - }, - { - "epoch": 7.721512385919166, - "grad_norm": 1.6261240243911743, - "learning_rate": 8.562613065326633e-05, - "loss": 5.7353, - "step": 14806 - }, - { - "epoch": 7.722033898305085, - "grad_norm": 1.3962204456329346, - "learning_rate": 8.562512562814071e-05, - "loss": 5.5102, - "step": 14807 - }, - { - "epoch": 7.722555410691004, - "grad_norm": 1.4644442796707153, - "learning_rate": 8.562412060301507e-05, - "loss": 5.2088, - "step": 14808 - }, - { - "epoch": 7.723076923076923, - "grad_norm": 1.487269401550293, - "learning_rate": 8.562311557788945e-05, - "loss": 5.4267, - "step": 14809 - }, - { - "epoch": 7.723598435462842, - "grad_norm": 1.3908530473709106, - "learning_rate": 8.562211055276382e-05, - "loss": 5.3987, - "step": 14810 - }, - { - "epoch": 7.724119947848761, - "grad_norm": 1.5016013383865356, - "learning_rate": 8.56211055276382e-05, - "loss": 5.7055, - "step": 14811 - }, - { - "epoch": 7.724641460234681, - "grad_norm": 1.3737385272979736, - "learning_rate": 8.562010050251257e-05, - "loss": 5.939, - "step": 14812 - }, - { - "epoch": 7.7251629726206, - "grad_norm": 1.5205028057098389, - "learning_rate": 8.561909547738694e-05, - "loss": 5.5123, - "step": 14813 - }, - { - "epoch": 7.725684485006519, - "grad_norm": 1.3895668983459473, - "learning_rate": 8.561809045226131e-05, - "loss": 5.6332, - "step": 14814 - }, - { - "epoch": 7.726205997392438, - "grad_norm": 1.416590690612793, - "learning_rate": 8.561708542713568e-05, - "loss": 5.4861, - "step": 14815 - }, - { - "epoch": 7.726727509778357, - "grad_norm": 1.5170279741287231, - "learning_rate": 8.561608040201006e-05, - "loss": 4.6731, - "step": 14816 - }, - { - "epoch": 7.727249022164276, - "grad_norm": 1.4652515649795532, - "learning_rate": 8.561507537688442e-05, - "loss": 5.6669, - "step": 14817 - }, - { - "epoch": 7.727770534550196, - "grad_norm": 1.4400982856750488, - "learning_rate": 8.56140703517588e-05, - "loss": 5.8589, - "step": 14818 - }, - { - "epoch": 7.728292046936115, - "grad_norm": 1.37783682346344, - "learning_rate": 8.561306532663316e-05, - "loss": 5.492, - "step": 14819 - }, - { - "epoch": 7.728813559322034, - "grad_norm": 1.6035935878753662, - "learning_rate": 8.561206030150754e-05, - "loss": 4.804, - "step": 14820 - }, - { - "epoch": 7.729335071707953, - "grad_norm": 1.47847580909729, - "learning_rate": 8.561105527638192e-05, - "loss": 5.328, - "step": 14821 - }, - { - "epoch": 7.729856584093872, - "grad_norm": 1.4305375814437866, - "learning_rate": 8.56100502512563e-05, - "loss": 5.5318, - "step": 14822 - }, - { - "epoch": 7.730378096479791, - "grad_norm": 1.6848556995391846, - "learning_rate": 8.560904522613066e-05, - "loss": 5.5313, - "step": 14823 - }, - { - "epoch": 7.730899608865711, - "grad_norm": 1.4849793910980225, - "learning_rate": 8.560804020100503e-05, - "loss": 5.3012, - "step": 14824 - }, - { - "epoch": 7.73142112125163, - "grad_norm": 1.3191434144973755, - "learning_rate": 8.56070351758794e-05, - "loss": 5.7006, - "step": 14825 - }, - { - "epoch": 7.731942633637549, - "grad_norm": 1.4468410015106201, - "learning_rate": 8.560603015075377e-05, - "loss": 5.6186, - "step": 14826 - }, - { - "epoch": 7.732464146023468, - "grad_norm": 1.4431235790252686, - "learning_rate": 8.560502512562815e-05, - "loss": 5.4975, - "step": 14827 - }, - { - "epoch": 7.732985658409387, - "grad_norm": 1.5484137535095215, - "learning_rate": 8.560402010050251e-05, - "loss": 5.7887, - "step": 14828 - }, - { - "epoch": 7.733507170795306, - "grad_norm": 1.529132604598999, - "learning_rate": 8.560301507537689e-05, - "loss": 5.6447, - "step": 14829 - }, - { - "epoch": 7.734028683181226, - "grad_norm": 1.2961342334747314, - "learning_rate": 8.560201005025125e-05, - "loss": 5.9338, - "step": 14830 - }, - { - "epoch": 7.734550195567145, - "grad_norm": 1.599540114402771, - "learning_rate": 8.560100502512563e-05, - "loss": 4.8755, - "step": 14831 - }, - { - "epoch": 7.735071707953064, - "grad_norm": 1.6193578243255615, - "learning_rate": 8.560000000000001e-05, - "loss": 5.1817, - "step": 14832 - }, - { - "epoch": 7.735593220338983, - "grad_norm": 1.616989016532898, - "learning_rate": 8.559899497487438e-05, - "loss": 4.9347, - "step": 14833 - }, - { - "epoch": 7.736114732724902, - "grad_norm": 1.3978490829467773, - "learning_rate": 8.559798994974875e-05, - "loss": 5.5828, - "step": 14834 - }, - { - "epoch": 7.736636245110821, - "grad_norm": 1.7077770233154297, - "learning_rate": 8.559698492462313e-05, - "loss": 5.2395, - "step": 14835 - }, - { - "epoch": 7.737157757496741, - "grad_norm": 1.3986114263534546, - "learning_rate": 8.559597989949749e-05, - "loss": 5.6592, - "step": 14836 - }, - { - "epoch": 7.73767926988266, - "grad_norm": 1.5664916038513184, - "learning_rate": 8.559497487437186e-05, - "loss": 4.9861, - "step": 14837 - }, - { - "epoch": 7.738200782268579, - "grad_norm": 1.341298222541809, - "learning_rate": 8.559396984924623e-05, - "loss": 5.8691, - "step": 14838 - }, - { - "epoch": 7.738722294654498, - "grad_norm": 1.3619561195373535, - "learning_rate": 8.55929648241206e-05, - "loss": 5.5233, - "step": 14839 - }, - { - "epoch": 7.739243807040417, - "grad_norm": 1.5581225156784058, - "learning_rate": 8.559195979899498e-05, - "loss": 5.4068, - "step": 14840 - }, - { - "epoch": 7.739765319426336, - "grad_norm": 1.3480321168899536, - "learning_rate": 8.559095477386935e-05, - "loss": 5.5538, - "step": 14841 - }, - { - "epoch": 7.740286831812256, - "grad_norm": 1.4259237051010132, - "learning_rate": 8.558994974874373e-05, - "loss": 5.7791, - "step": 14842 - }, - { - "epoch": 7.740808344198175, - "grad_norm": 1.422835111618042, - "learning_rate": 8.55889447236181e-05, - "loss": 5.1687, - "step": 14843 - }, - { - "epoch": 7.741329856584094, - "grad_norm": 1.4099276065826416, - "learning_rate": 8.558793969849247e-05, - "loss": 5.367, - "step": 14844 - }, - { - "epoch": 7.741851368970013, - "grad_norm": 1.6038451194763184, - "learning_rate": 8.558693467336684e-05, - "loss": 5.4124, - "step": 14845 - }, - { - "epoch": 7.742372881355932, - "grad_norm": 1.406217336654663, - "learning_rate": 8.558592964824122e-05, - "loss": 5.7604, - "step": 14846 - }, - { - "epoch": 7.742894393741851, - "grad_norm": 1.3630237579345703, - "learning_rate": 8.558492462311558e-05, - "loss": 5.667, - "step": 14847 - }, - { - "epoch": 7.743415906127771, - "grad_norm": 1.2810211181640625, - "learning_rate": 8.558391959798996e-05, - "loss": 5.9569, - "step": 14848 - }, - { - "epoch": 7.743937418513689, - "grad_norm": 1.3420000076293945, - "learning_rate": 8.558291457286432e-05, - "loss": 5.8974, - "step": 14849 - }, - { - "epoch": 7.744458930899609, - "grad_norm": 1.4180821180343628, - "learning_rate": 8.558190954773869e-05, - "loss": 5.804, - "step": 14850 - }, - { - "epoch": 7.744980443285528, - "grad_norm": 1.4010448455810547, - "learning_rate": 8.558090452261306e-05, - "loss": 5.5227, - "step": 14851 - }, - { - "epoch": 7.745501955671447, - "grad_norm": 1.4597938060760498, - "learning_rate": 8.557989949748744e-05, - "loss": 5.2178, - "step": 14852 - }, - { - "epoch": 7.746023468057366, - "grad_norm": 1.3471705913543701, - "learning_rate": 8.557889447236182e-05, - "loss": 5.6795, - "step": 14853 - }, - { - "epoch": 7.746544980443286, - "grad_norm": 1.351319670677185, - "learning_rate": 8.557788944723618e-05, - "loss": 5.7393, - "step": 14854 - }, - { - "epoch": 7.747066492829204, - "grad_norm": 1.4462369680404663, - "learning_rate": 8.557688442211056e-05, - "loss": 5.6822, - "step": 14855 - }, - { - "epoch": 7.747588005215124, - "grad_norm": 1.5280177593231201, - "learning_rate": 8.557587939698493e-05, - "loss": 5.321, - "step": 14856 - }, - { - "epoch": 7.748109517601043, - "grad_norm": 1.3730849027633667, - "learning_rate": 8.55748743718593e-05, - "loss": 5.8742, - "step": 14857 - }, - { - "epoch": 7.748631029986962, - "grad_norm": 1.318661093711853, - "learning_rate": 8.557386934673367e-05, - "loss": 5.4389, - "step": 14858 - }, - { - "epoch": 7.749152542372881, - "grad_norm": 1.4118945598602295, - "learning_rate": 8.557286432160805e-05, - "loss": 5.2063, - "step": 14859 - }, - { - "epoch": 7.749674054758801, - "grad_norm": 1.4344338178634644, - "learning_rate": 8.557185929648241e-05, - "loss": 5.5654, - "step": 14860 - }, - { - "epoch": 7.750195567144719, - "grad_norm": 1.392613172531128, - "learning_rate": 8.557085427135679e-05, - "loss": 5.5008, - "step": 14861 - }, - { - "epoch": 7.750717079530639, - "grad_norm": 1.3882275819778442, - "learning_rate": 8.556984924623117e-05, - "loss": 5.6829, - "step": 14862 - }, - { - "epoch": 7.751238591916558, - "grad_norm": 1.5139144659042358, - "learning_rate": 8.556884422110553e-05, - "loss": 5.4817, - "step": 14863 - }, - { - "epoch": 7.751760104302477, - "grad_norm": 1.4410115480422974, - "learning_rate": 8.556783919597991e-05, - "loss": 5.467, - "step": 14864 - }, - { - "epoch": 7.752281616688396, - "grad_norm": 1.3939090967178345, - "learning_rate": 8.556683417085427e-05, - "loss": 5.6303, - "step": 14865 - }, - { - "epoch": 7.752803129074316, - "grad_norm": 1.6484264135360718, - "learning_rate": 8.556582914572865e-05, - "loss": 4.5525, - "step": 14866 - }, - { - "epoch": 7.753324641460234, - "grad_norm": 1.347132682800293, - "learning_rate": 8.556482412060301e-05, - "loss": 5.404, - "step": 14867 - }, - { - "epoch": 7.753846153846154, - "grad_norm": 1.3919563293457031, - "learning_rate": 8.556381909547739e-05, - "loss": 5.5045, - "step": 14868 - }, - { - "epoch": 7.754367666232073, - "grad_norm": 1.3859913349151611, - "learning_rate": 8.556281407035176e-05, - "loss": 5.5335, - "step": 14869 - }, - { - "epoch": 7.754889178617992, - "grad_norm": 1.341484546661377, - "learning_rate": 8.556180904522613e-05, - "loss": 5.4631, - "step": 14870 - }, - { - "epoch": 7.755410691003911, - "grad_norm": 1.4081883430480957, - "learning_rate": 8.55608040201005e-05, - "loss": 5.6345, - "step": 14871 - }, - { - "epoch": 7.755932203389831, - "grad_norm": 1.4294639825820923, - "learning_rate": 8.555979899497488e-05, - "loss": 5.4696, - "step": 14872 - }, - { - "epoch": 7.756453715775749, - "grad_norm": 1.3975739479064941, - "learning_rate": 8.555879396984925e-05, - "loss": 5.8295, - "step": 14873 - }, - { - "epoch": 7.756975228161669, - "grad_norm": 1.3718607425689697, - "learning_rate": 8.555778894472363e-05, - "loss": 5.7622, - "step": 14874 - }, - { - "epoch": 7.757496740547588, - "grad_norm": 1.4000059366226196, - "learning_rate": 8.5556783919598e-05, - "loss": 5.8113, - "step": 14875 - }, - { - "epoch": 7.758018252933507, - "grad_norm": 1.6417006254196167, - "learning_rate": 8.555577889447236e-05, - "loss": 4.9166, - "step": 14876 - }, - { - "epoch": 7.758539765319426, - "grad_norm": 1.366329550743103, - "learning_rate": 8.555477386934674e-05, - "loss": 5.8791, - "step": 14877 - }, - { - "epoch": 7.759061277705346, - "grad_norm": 1.49155855178833, - "learning_rate": 8.55537688442211e-05, - "loss": 5.6821, - "step": 14878 - }, - { - "epoch": 7.759582790091264, - "grad_norm": 1.4981639385223389, - "learning_rate": 8.555276381909548e-05, - "loss": 5.2831, - "step": 14879 - }, - { - "epoch": 7.760104302477184, - "grad_norm": 1.4202502965927124, - "learning_rate": 8.555175879396984e-05, - "loss": 5.4306, - "step": 14880 - }, - { - "epoch": 7.760625814863103, - "grad_norm": 1.4384582042694092, - "learning_rate": 8.555075376884422e-05, - "loss": 5.6187, - "step": 14881 - }, - { - "epoch": 7.761147327249022, - "grad_norm": 1.5004130601882935, - "learning_rate": 8.55497487437186e-05, - "loss": 5.5152, - "step": 14882 - }, - { - "epoch": 7.761668839634941, - "grad_norm": 1.4274336099624634, - "learning_rate": 8.554874371859298e-05, - "loss": 5.6489, - "step": 14883 - }, - { - "epoch": 7.762190352020861, - "grad_norm": 1.4857982397079468, - "learning_rate": 8.554773869346734e-05, - "loss": 5.6123, - "step": 14884 - }, - { - "epoch": 7.762711864406779, - "grad_norm": 1.3764257431030273, - "learning_rate": 8.554673366834172e-05, - "loss": 5.6241, - "step": 14885 - }, - { - "epoch": 7.763233376792699, - "grad_norm": 1.520395278930664, - "learning_rate": 8.554572864321608e-05, - "loss": 5.1258, - "step": 14886 - }, - { - "epoch": 7.763754889178618, - "grad_norm": 1.4276330471038818, - "learning_rate": 8.554472361809046e-05, - "loss": 5.3689, - "step": 14887 - }, - { - "epoch": 7.764276401564537, - "grad_norm": 1.5538287162780762, - "learning_rate": 8.554371859296483e-05, - "loss": 5.4306, - "step": 14888 - }, - { - "epoch": 7.764797913950456, - "grad_norm": 1.5398759841918945, - "learning_rate": 8.55427135678392e-05, - "loss": 5.3133, - "step": 14889 - }, - { - "epoch": 7.765319426336376, - "grad_norm": 1.3996349573135376, - "learning_rate": 8.554170854271357e-05, - "loss": 5.58, - "step": 14890 - }, - { - "epoch": 7.765840938722294, - "grad_norm": 1.4135024547576904, - "learning_rate": 8.554070351758793e-05, - "loss": 5.8082, - "step": 14891 - }, - { - "epoch": 7.766362451108214, - "grad_norm": 1.3489164113998413, - "learning_rate": 8.553969849246231e-05, - "loss": 5.8045, - "step": 14892 - }, - { - "epoch": 7.766883963494133, - "grad_norm": 1.4408648014068604, - "learning_rate": 8.553869346733669e-05, - "loss": 5.5447, - "step": 14893 - }, - { - "epoch": 7.767405475880052, - "grad_norm": 1.426633358001709, - "learning_rate": 8.553768844221107e-05, - "loss": 5.7946, - "step": 14894 - }, - { - "epoch": 7.767926988265971, - "grad_norm": 1.4904190301895142, - "learning_rate": 8.553668341708543e-05, - "loss": 5.7233, - "step": 14895 - }, - { - "epoch": 7.768448500651891, - "grad_norm": 1.3592597246170044, - "learning_rate": 8.553567839195981e-05, - "loss": 5.8029, - "step": 14896 - }, - { - "epoch": 7.768970013037809, - "grad_norm": 1.4270555973052979, - "learning_rate": 8.553467336683417e-05, - "loss": 5.3394, - "step": 14897 - }, - { - "epoch": 7.769491525423729, - "grad_norm": 1.3691420555114746, - "learning_rate": 8.553366834170855e-05, - "loss": 5.7616, - "step": 14898 - }, - { - "epoch": 7.770013037809648, - "grad_norm": 1.4500921964645386, - "learning_rate": 8.553266331658292e-05, - "loss": 5.4399, - "step": 14899 - }, - { - "epoch": 7.770534550195567, - "grad_norm": 1.4810245037078857, - "learning_rate": 8.553165829145729e-05, - "loss": 5.5693, - "step": 14900 - }, - { - "epoch": 7.771056062581486, - "grad_norm": 1.4829703569412231, - "learning_rate": 8.553065326633166e-05, - "loss": 5.2089, - "step": 14901 - }, - { - "epoch": 7.771577574967406, - "grad_norm": 1.5846468210220337, - "learning_rate": 8.552964824120603e-05, - "loss": 5.3582, - "step": 14902 - }, - { - "epoch": 7.772099087353324, - "grad_norm": 1.5076220035552979, - "learning_rate": 8.552864321608041e-05, - "loss": 4.9961, - "step": 14903 - }, - { - "epoch": 7.772620599739244, - "grad_norm": 1.4856815338134766, - "learning_rate": 8.552763819095478e-05, - "loss": 5.5356, - "step": 14904 - }, - { - "epoch": 7.7731421121251625, - "grad_norm": 1.3288263082504272, - "learning_rate": 8.552663316582915e-05, - "loss": 5.6145, - "step": 14905 - }, - { - "epoch": 7.773663624511082, - "grad_norm": 1.5263499021530151, - "learning_rate": 8.552562814070352e-05, - "loss": 5.3872, - "step": 14906 - }, - { - "epoch": 7.774185136897001, - "grad_norm": 1.4119091033935547, - "learning_rate": 8.55246231155779e-05, - "loss": 5.485, - "step": 14907 - }, - { - "epoch": 7.774706649282921, - "grad_norm": 1.3823961019515991, - "learning_rate": 8.552361809045226e-05, - "loss": 5.7401, - "step": 14908 - }, - { - "epoch": 7.7752281616688395, - "grad_norm": 1.4780751466751099, - "learning_rate": 8.552261306532664e-05, - "loss": 5.5376, - "step": 14909 - }, - { - "epoch": 7.775749674054759, - "grad_norm": 1.429235577583313, - "learning_rate": 8.5521608040201e-05, - "loss": 5.4443, - "step": 14910 - }, - { - "epoch": 7.7762711864406775, - "grad_norm": 1.5963925123214722, - "learning_rate": 8.552060301507538e-05, - "loss": 5.2469, - "step": 14911 - }, - { - "epoch": 7.776792698826597, - "grad_norm": 1.5040754079818726, - "learning_rate": 8.551959798994975e-05, - "loss": 5.5508, - "step": 14912 - }, - { - "epoch": 7.777314211212516, - "grad_norm": 1.386915683746338, - "learning_rate": 8.551859296482412e-05, - "loss": 5.8192, - "step": 14913 - }, - { - "epoch": 7.777835723598436, - "grad_norm": 1.442958950996399, - "learning_rate": 8.55175879396985e-05, - "loss": 5.5002, - "step": 14914 - }, - { - "epoch": 7.7783572359843545, - "grad_norm": 1.4908467531204224, - "learning_rate": 8.551658291457288e-05, - "loss": 4.9773, - "step": 14915 - }, - { - "epoch": 7.778878748370274, - "grad_norm": 1.4285255670547485, - "learning_rate": 8.551557788944724e-05, - "loss": 5.6449, - "step": 14916 - }, - { - "epoch": 7.7794002607561925, - "grad_norm": 1.462139368057251, - "learning_rate": 8.551457286432161e-05, - "loss": 5.1195, - "step": 14917 - }, - { - "epoch": 7.779921773142112, - "grad_norm": 1.5461052656173706, - "learning_rate": 8.551356783919599e-05, - "loss": 5.4099, - "step": 14918 - }, - { - "epoch": 7.780443285528031, - "grad_norm": 1.354026436805725, - "learning_rate": 8.551256281407035e-05, - "loss": 5.6542, - "step": 14919 - }, - { - "epoch": 7.780964797913951, - "grad_norm": 1.4298937320709229, - "learning_rate": 8.551155778894473e-05, - "loss": 5.5751, - "step": 14920 - }, - { - "epoch": 7.7814863102998695, - "grad_norm": 1.3176008462905884, - "learning_rate": 8.551055276381909e-05, - "loss": 5.684, - "step": 14921 - }, - { - "epoch": 7.782007822685789, - "grad_norm": 1.3620240688323975, - "learning_rate": 8.550954773869347e-05, - "loss": 5.8003, - "step": 14922 - }, - { - "epoch": 7.7825293350717075, - "grad_norm": 1.5781197547912598, - "learning_rate": 8.550854271356785e-05, - "loss": 5.6626, - "step": 14923 - }, - { - "epoch": 7.783050847457627, - "grad_norm": 1.5242420434951782, - "learning_rate": 8.550753768844223e-05, - "loss": 5.2804, - "step": 14924 - }, - { - "epoch": 7.7835723598435465, - "grad_norm": 1.3122990131378174, - "learning_rate": 8.550653266331659e-05, - "loss": 5.811, - "step": 14925 - }, - { - "epoch": 7.784093872229466, - "grad_norm": 1.4122151136398315, - "learning_rate": 8.550552763819097e-05, - "loss": 5.5895, - "step": 14926 - }, - { - "epoch": 7.7846153846153845, - "grad_norm": 1.452530860900879, - "learning_rate": 8.550452261306533e-05, - "loss": 5.5226, - "step": 14927 - }, - { - "epoch": 7.785136897001304, - "grad_norm": 1.4963266849517822, - "learning_rate": 8.550351758793971e-05, - "loss": 5.7235, - "step": 14928 - }, - { - "epoch": 7.7856584093872225, - "grad_norm": 1.647230863571167, - "learning_rate": 8.550251256281407e-05, - "loss": 5.2832, - "step": 14929 - }, - { - "epoch": 7.786179921773142, - "grad_norm": 1.3008484840393066, - "learning_rate": 8.550150753768844e-05, - "loss": 5.8745, - "step": 14930 - }, - { - "epoch": 7.7867014341590615, - "grad_norm": 1.3708586692810059, - "learning_rate": 8.550050251256282e-05, - "loss": 5.6008, - "step": 14931 - }, - { - "epoch": 7.787222946544981, - "grad_norm": 1.56390380859375, - "learning_rate": 8.549949748743718e-05, - "loss": 5.6742, - "step": 14932 - }, - { - "epoch": 7.7877444589308995, - "grad_norm": 1.610580563545227, - "learning_rate": 8.549849246231156e-05, - "loss": 5.3062, - "step": 14933 - }, - { - "epoch": 7.788265971316819, - "grad_norm": 1.531510353088379, - "learning_rate": 8.549748743718594e-05, - "loss": 5.1024, - "step": 14934 - }, - { - "epoch": 7.7887874837027375, - "grad_norm": 1.4622738361358643, - "learning_rate": 8.549648241206031e-05, - "loss": 5.4442, - "step": 14935 - }, - { - "epoch": 7.789308996088657, - "grad_norm": 1.540500283241272, - "learning_rate": 8.549547738693468e-05, - "loss": 5.9065, - "step": 14936 - }, - { - "epoch": 7.7898305084745765, - "grad_norm": 1.4684021472930908, - "learning_rate": 8.549447236180906e-05, - "loss": 5.6382, - "step": 14937 - }, - { - "epoch": 7.790352020860495, - "grad_norm": 1.5925712585449219, - "learning_rate": 8.549346733668342e-05, - "loss": 5.2667, - "step": 14938 - }, - { - "epoch": 7.7908735332464145, - "grad_norm": 1.4803155660629272, - "learning_rate": 8.54924623115578e-05, - "loss": 5.7217, - "step": 14939 - }, - { - "epoch": 7.791395045632334, - "grad_norm": 1.3818414211273193, - "learning_rate": 8.549145728643216e-05, - "loss": 5.4047, - "step": 14940 - }, - { - "epoch": 7.791916558018253, - "grad_norm": 1.4123834371566772, - "learning_rate": 8.549045226130654e-05, - "loss": 5.1711, - "step": 14941 - }, - { - "epoch": 7.792438070404172, - "grad_norm": 1.3774057626724243, - "learning_rate": 8.54894472361809e-05, - "loss": 5.7568, - "step": 14942 - }, - { - "epoch": 7.7929595827900915, - "grad_norm": 1.3357653617858887, - "learning_rate": 8.548844221105527e-05, - "loss": 5.8832, - "step": 14943 - }, - { - "epoch": 7.79348109517601, - "grad_norm": 1.3212697505950928, - "learning_rate": 8.548743718592965e-05, - "loss": 5.5552, - "step": 14944 - }, - { - "epoch": 7.7940026075619295, - "grad_norm": 1.335204839706421, - "learning_rate": 8.548643216080402e-05, - "loss": 6.042, - "step": 14945 - }, - { - "epoch": 7.794524119947849, - "grad_norm": 1.3908599615097046, - "learning_rate": 8.54854271356784e-05, - "loss": 5.3363, - "step": 14946 - }, - { - "epoch": 7.795045632333768, - "grad_norm": 1.30963134765625, - "learning_rate": 8.548442211055277e-05, - "loss": 5.7074, - "step": 14947 - }, - { - "epoch": 7.795567144719687, - "grad_norm": 1.3709102869033813, - "learning_rate": 8.548341708542714e-05, - "loss": 5.5427, - "step": 14948 - }, - { - "epoch": 7.7960886571056065, - "grad_norm": 1.6157333850860596, - "learning_rate": 8.548241206030151e-05, - "loss": 5.2273, - "step": 14949 - }, - { - "epoch": 7.796610169491525, - "grad_norm": 1.4239939451217651, - "learning_rate": 8.548140703517589e-05, - "loss": 5.1344, - "step": 14950 - }, - { - "epoch": 7.7971316818774445, - "grad_norm": 1.435671091079712, - "learning_rate": 8.548040201005025e-05, - "loss": 5.9393, - "step": 14951 - }, - { - "epoch": 7.797653194263364, - "grad_norm": 1.5465866327285767, - "learning_rate": 8.547939698492463e-05, - "loss": 5.3754, - "step": 14952 - }, - { - "epoch": 7.798174706649283, - "grad_norm": 1.487158179283142, - "learning_rate": 8.547839195979899e-05, - "loss": 5.3784, - "step": 14953 - }, - { - "epoch": 7.798696219035202, - "grad_norm": 1.4286308288574219, - "learning_rate": 8.547738693467337e-05, - "loss": 5.4272, - "step": 14954 - }, - { - "epoch": 7.7992177314211215, - "grad_norm": 1.3836826086044312, - "learning_rate": 8.547638190954775e-05, - "loss": 5.7135, - "step": 14955 - }, - { - "epoch": 7.79973924380704, - "grad_norm": 1.4383779764175415, - "learning_rate": 8.547537688442211e-05, - "loss": 5.5651, - "step": 14956 - }, - { - "epoch": 7.8002607561929596, - "grad_norm": 1.3960251808166504, - "learning_rate": 8.547437185929649e-05, - "loss": 5.6825, - "step": 14957 - }, - { - "epoch": 7.800782268578879, - "grad_norm": 1.378402590751648, - "learning_rate": 8.547336683417085e-05, - "loss": 5.7609, - "step": 14958 - }, - { - "epoch": 7.801303780964798, - "grad_norm": 1.3570935726165771, - "learning_rate": 8.547236180904523e-05, - "loss": 5.8092, - "step": 14959 - }, - { - "epoch": 7.801825293350717, - "grad_norm": 1.3703584671020508, - "learning_rate": 8.54713567839196e-05, - "loss": 5.7603, - "step": 14960 - }, - { - "epoch": 7.8023468057366365, - "grad_norm": 1.4241948127746582, - "learning_rate": 8.547035175879397e-05, - "loss": 5.7046, - "step": 14961 - }, - { - "epoch": 7.802868318122555, - "grad_norm": 1.4227250814437866, - "learning_rate": 8.546934673366834e-05, - "loss": 5.5059, - "step": 14962 - }, - { - "epoch": 7.803389830508475, - "grad_norm": 1.313709020614624, - "learning_rate": 8.546834170854272e-05, - "loss": 5.6134, - "step": 14963 - }, - { - "epoch": 7.803911342894394, - "grad_norm": 1.4118183851242065, - "learning_rate": 8.546733668341708e-05, - "loss": 5.5656, - "step": 14964 - }, - { - "epoch": 7.804432855280313, - "grad_norm": 1.5375540256500244, - "learning_rate": 8.546633165829146e-05, - "loss": 5.5011, - "step": 14965 - }, - { - "epoch": 7.804954367666232, - "grad_norm": 1.3843860626220703, - "learning_rate": 8.546532663316584e-05, - "loss": 5.5353, - "step": 14966 - }, - { - "epoch": 7.8054758800521515, - "grad_norm": 1.4926122426986694, - "learning_rate": 8.546432160804021e-05, - "loss": 5.6153, - "step": 14967 - }, - { - "epoch": 7.80599739243807, - "grad_norm": 1.3765252828598022, - "learning_rate": 8.546331658291458e-05, - "loss": 5.6661, - "step": 14968 - }, - { - "epoch": 7.80651890482399, - "grad_norm": 1.4020739793777466, - "learning_rate": 8.546231155778894e-05, - "loss": 5.671, - "step": 14969 - }, - { - "epoch": 7.807040417209909, - "grad_norm": 1.4202426671981812, - "learning_rate": 8.546130653266332e-05, - "loss": 6.0366, - "step": 14970 - }, - { - "epoch": 7.807561929595828, - "grad_norm": 1.4802809953689575, - "learning_rate": 8.546030150753768e-05, - "loss": 5.2948, - "step": 14971 - }, - { - "epoch": 7.808083441981747, - "grad_norm": 1.3885397911071777, - "learning_rate": 8.545929648241206e-05, - "loss": 5.7996, - "step": 14972 - }, - { - "epoch": 7.8086049543676666, - "grad_norm": 1.4254744052886963, - "learning_rate": 8.545829145728643e-05, - "loss": 5.5984, - "step": 14973 - }, - { - "epoch": 7.809126466753585, - "grad_norm": 1.3783246278762817, - "learning_rate": 8.54572864321608e-05, - "loss": 5.9201, - "step": 14974 - }, - { - "epoch": 7.809647979139505, - "grad_norm": 1.5453559160232544, - "learning_rate": 8.545628140703518e-05, - "loss": 4.978, - "step": 14975 - }, - { - "epoch": 7.810169491525424, - "grad_norm": 1.3881481885910034, - "learning_rate": 8.545527638190956e-05, - "loss": 5.7656, - "step": 14976 - }, - { - "epoch": 7.810691003911343, - "grad_norm": 1.4385395050048828, - "learning_rate": 8.545427135678392e-05, - "loss": 5.6781, - "step": 14977 - }, - { - "epoch": 7.811212516297262, - "grad_norm": 1.3628040552139282, - "learning_rate": 8.54532663316583e-05, - "loss": 5.8769, - "step": 14978 - }, - { - "epoch": 7.811734028683182, - "grad_norm": 1.5564589500427246, - "learning_rate": 8.545226130653267e-05, - "loss": 4.9415, - "step": 14979 - }, - { - "epoch": 7.8122555410691, - "grad_norm": 1.4005335569381714, - "learning_rate": 8.545125628140704e-05, - "loss": 5.4438, - "step": 14980 - }, - { - "epoch": 7.81277705345502, - "grad_norm": 1.3782708644866943, - "learning_rate": 8.545025125628141e-05, - "loss": 5.674, - "step": 14981 - }, - { - "epoch": 7.813298565840939, - "grad_norm": 1.6356866359710693, - "learning_rate": 8.544924623115579e-05, - "loss": 5.0197, - "step": 14982 - }, - { - "epoch": 7.813820078226858, - "grad_norm": 1.4557808637619019, - "learning_rate": 8.544824120603015e-05, - "loss": 5.6339, - "step": 14983 - }, - { - "epoch": 7.814341590612777, - "grad_norm": 1.5113551616668701, - "learning_rate": 8.544723618090452e-05, - "loss": 4.9117, - "step": 14984 - }, - { - "epoch": 7.814863102998697, - "grad_norm": 1.6178703308105469, - "learning_rate": 8.54462311557789e-05, - "loss": 5.3937, - "step": 14985 - }, - { - "epoch": 7.815384615384615, - "grad_norm": 1.593268632888794, - "learning_rate": 8.544522613065327e-05, - "loss": 5.494, - "step": 14986 - }, - { - "epoch": 7.815906127770535, - "grad_norm": 1.622837781906128, - "learning_rate": 8.544422110552765e-05, - "loss": 5.3244, - "step": 14987 - }, - { - "epoch": 7.816427640156454, - "grad_norm": 1.4447187185287476, - "learning_rate": 8.544321608040201e-05, - "loss": 5.758, - "step": 14988 - }, - { - "epoch": 7.816949152542373, - "grad_norm": 1.3926409482955933, - "learning_rate": 8.544221105527639e-05, - "loss": 5.6213, - "step": 14989 - }, - { - "epoch": 7.817470664928292, - "grad_norm": 1.4714878797531128, - "learning_rate": 8.544120603015076e-05, - "loss": 5.3579, - "step": 14990 - }, - { - "epoch": 7.817992177314212, - "grad_norm": 1.4858633279800415, - "learning_rate": 8.544020100502513e-05, - "loss": 5.6764, - "step": 14991 - }, - { - "epoch": 7.81851368970013, - "grad_norm": 1.4992055892944336, - "learning_rate": 8.54391959798995e-05, - "loss": 5.7646, - "step": 14992 - }, - { - "epoch": 7.81903520208605, - "grad_norm": 1.3898687362670898, - "learning_rate": 8.543819095477388e-05, - "loss": 5.3982, - "step": 14993 - }, - { - "epoch": 7.819556714471969, - "grad_norm": 1.3627413511276245, - "learning_rate": 8.543718592964824e-05, - "loss": 5.9543, - "step": 14994 - }, - { - "epoch": 7.820078226857888, - "grad_norm": 1.4318257570266724, - "learning_rate": 8.543618090452262e-05, - "loss": 5.6955, - "step": 14995 - }, - { - "epoch": 7.820599739243807, - "grad_norm": 1.4412955045700073, - "learning_rate": 8.5435175879397e-05, - "loss": 5.6002, - "step": 14996 - }, - { - "epoch": 7.821121251629727, - "grad_norm": 1.37083899974823, - "learning_rate": 8.543417085427136e-05, - "loss": 5.9952, - "step": 14997 - }, - { - "epoch": 7.821642764015645, - "grad_norm": 1.443403959274292, - "learning_rate": 8.543316582914574e-05, - "loss": 5.3132, - "step": 14998 - }, - { - "epoch": 7.822164276401565, - "grad_norm": 1.4042354822158813, - "learning_rate": 8.54321608040201e-05, - "loss": 5.7752, - "step": 14999 - }, - { - "epoch": 7.822685788787483, - "grad_norm": 1.4015518426895142, - "learning_rate": 8.543115577889448e-05, - "loss": 5.4686, - "step": 15000 - }, - { - "epoch": 7.822685788787483, - "eval_loss": 5.586970806121826, - "eval_runtime": 42.6841, - "eval_samples_per_second": 28.723, - "eval_steps_per_second": 3.608, - "step": 15000 - }, - { - "epoch": 7.823207301173403, - "grad_norm": 1.6287904977798462, - "learning_rate": 8.543015075376884e-05, - "loss": 4.6893, - "step": 15001 - }, - { - "epoch": 7.823728813559322, - "grad_norm": 1.4765905141830444, - "learning_rate": 8.542914572864322e-05, - "loss": 5.3284, - "step": 15002 - }, - { - "epoch": 7.824250325945242, - "grad_norm": 1.3667665719985962, - "learning_rate": 8.542814070351759e-05, - "loss": 5.5729, - "step": 15003 - }, - { - "epoch": 7.82477183833116, - "grad_norm": 1.5398184061050415, - "learning_rate": 8.542713567839196e-05, - "loss": 5.0842, - "step": 15004 - }, - { - "epoch": 7.82529335071708, - "grad_norm": 1.7367740869522095, - "learning_rate": 8.542613065326633e-05, - "loss": 5.0605, - "step": 15005 - }, - { - "epoch": 7.825814863102998, - "grad_norm": 1.4108339548110962, - "learning_rate": 8.54251256281407e-05, - "loss": 5.1719, - "step": 15006 - }, - { - "epoch": 7.826336375488918, - "grad_norm": 1.4791722297668457, - "learning_rate": 8.542412060301508e-05, - "loss": 5.2301, - "step": 15007 - }, - { - "epoch": 7.826857887874837, - "grad_norm": 1.4308544397354126, - "learning_rate": 8.542311557788946e-05, - "loss": 5.3753, - "step": 15008 - }, - { - "epoch": 7.827379400260757, - "grad_norm": 1.4527614116668701, - "learning_rate": 8.542211055276383e-05, - "loss": 5.9292, - "step": 15009 - }, - { - "epoch": 7.827900912646675, - "grad_norm": 1.535609245300293, - "learning_rate": 8.542110552763819e-05, - "loss": 5.3159, - "step": 15010 - }, - { - "epoch": 7.828422425032595, - "grad_norm": 1.4543719291687012, - "learning_rate": 8.542010050251257e-05, - "loss": 5.8706, - "step": 15011 - }, - { - "epoch": 7.828943937418513, - "grad_norm": 1.4280061721801758, - "learning_rate": 8.541909547738693e-05, - "loss": 5.5142, - "step": 15012 - }, - { - "epoch": 7.829465449804433, - "grad_norm": 1.5681068897247314, - "learning_rate": 8.541809045226131e-05, - "loss": 4.9883, - "step": 15013 - }, - { - "epoch": 7.829986962190352, - "grad_norm": 1.482130765914917, - "learning_rate": 8.541708542713567e-05, - "loss": 5.3713, - "step": 15014 - }, - { - "epoch": 7.830508474576272, - "grad_norm": 1.4447333812713623, - "learning_rate": 8.541608040201005e-05, - "loss": 5.7443, - "step": 15015 - }, - { - "epoch": 7.83102998696219, - "grad_norm": 1.3203773498535156, - "learning_rate": 8.541507537688443e-05, - "loss": 5.7947, - "step": 15016 - }, - { - "epoch": 7.83155149934811, - "grad_norm": 1.375363826751709, - "learning_rate": 8.541407035175881e-05, - "loss": 5.5196, - "step": 15017 - }, - { - "epoch": 7.832073011734028, - "grad_norm": 1.3012640476226807, - "learning_rate": 8.541306532663317e-05, - "loss": 6.0431, - "step": 15018 - }, - { - "epoch": 7.832594524119948, - "grad_norm": 1.3550634384155273, - "learning_rate": 8.541206030150755e-05, - "loss": 5.7213, - "step": 15019 - }, - { - "epoch": 7.833116036505867, - "grad_norm": 1.5413992404937744, - "learning_rate": 8.541105527638191e-05, - "loss": 5.1833, - "step": 15020 - }, - { - "epoch": 7.833637548891787, - "grad_norm": 1.3638540506362915, - "learning_rate": 8.541005025125629e-05, - "loss": 5.5447, - "step": 15021 - }, - { - "epoch": 7.834159061277705, - "grad_norm": 1.4122185707092285, - "learning_rate": 8.540904522613066e-05, - "loss": 5.8244, - "step": 15022 - }, - { - "epoch": 7.834680573663625, - "grad_norm": 1.6213678121566772, - "learning_rate": 8.540804020100502e-05, - "loss": 5.4667, - "step": 15023 - }, - { - "epoch": 7.835202086049543, - "grad_norm": 1.5544401407241821, - "learning_rate": 8.54070351758794e-05, - "loss": 5.622, - "step": 15024 - }, - { - "epoch": 7.835723598435463, - "grad_norm": 1.3995805978775024, - "learning_rate": 8.540603015075376e-05, - "loss": 5.679, - "step": 15025 - }, - { - "epoch": 7.836245110821382, - "grad_norm": 1.2829991579055786, - "learning_rate": 8.540502512562814e-05, - "loss": 5.8957, - "step": 15026 - }, - { - "epoch": 7.836766623207302, - "grad_norm": 1.494748592376709, - "learning_rate": 8.540402010050252e-05, - "loss": 5.2327, - "step": 15027 - }, - { - "epoch": 7.83728813559322, - "grad_norm": 1.6106172800064087, - "learning_rate": 8.54030150753769e-05, - "loss": 5.2702, - "step": 15028 - }, - { - "epoch": 7.83780964797914, - "grad_norm": 1.4900389909744263, - "learning_rate": 8.540201005025126e-05, - "loss": 5.4684, - "step": 15029 - }, - { - "epoch": 7.838331160365058, - "grad_norm": 1.3579721450805664, - "learning_rate": 8.540100502512564e-05, - "loss": 5.5901, - "step": 15030 - }, - { - "epoch": 7.838852672750978, - "grad_norm": 1.5330694913864136, - "learning_rate": 8.54e-05, - "loss": 5.103, - "step": 15031 - }, - { - "epoch": 7.839374185136897, - "grad_norm": 1.4363458156585693, - "learning_rate": 8.539899497487438e-05, - "loss": 5.6268, - "step": 15032 - }, - { - "epoch": 7.839895697522816, - "grad_norm": 1.6081088781356812, - "learning_rate": 8.539798994974874e-05, - "loss": 5.439, - "step": 15033 - }, - { - "epoch": 7.840417209908735, - "grad_norm": 1.3614498376846313, - "learning_rate": 8.539698492462312e-05, - "loss": 5.1323, - "step": 15034 - }, - { - "epoch": 7.840938722294655, - "grad_norm": 1.5176000595092773, - "learning_rate": 8.539597989949749e-05, - "loss": 5.6593, - "step": 15035 - }, - { - "epoch": 7.841460234680573, - "grad_norm": 1.502511978149414, - "learning_rate": 8.539497487437186e-05, - "loss": 5.8224, - "step": 15036 - }, - { - "epoch": 7.841981747066493, - "grad_norm": 1.3678221702575684, - "learning_rate": 8.539396984924624e-05, - "loss": 5.6818, - "step": 15037 - }, - { - "epoch": 7.842503259452412, - "grad_norm": 1.548999547958374, - "learning_rate": 8.53929648241206e-05, - "loss": 5.5301, - "step": 15038 - }, - { - "epoch": 7.843024771838331, - "grad_norm": 1.56722891330719, - "learning_rate": 8.539195979899498e-05, - "loss": 5.5254, - "step": 15039 - }, - { - "epoch": 7.84354628422425, - "grad_norm": 1.4623483419418335, - "learning_rate": 8.539095477386935e-05, - "loss": 5.6195, - "step": 15040 - }, - { - "epoch": 7.84406779661017, - "grad_norm": 1.4051355123519897, - "learning_rate": 8.538994974874373e-05, - "loss": 5.6393, - "step": 15041 - }, - { - "epoch": 7.844589308996088, - "grad_norm": 1.5261871814727783, - "learning_rate": 8.538894472361809e-05, - "loss": 4.897, - "step": 15042 - }, - { - "epoch": 7.845110821382008, - "grad_norm": 1.5428394079208374, - "learning_rate": 8.538793969849247e-05, - "loss": 5.4516, - "step": 15043 - }, - { - "epoch": 7.845632333767927, - "grad_norm": 1.5369852781295776, - "learning_rate": 8.538693467336683e-05, - "loss": 5.4793, - "step": 15044 - }, - { - "epoch": 7.846153846153846, - "grad_norm": 1.4259412288665771, - "learning_rate": 8.538592964824121e-05, - "loss": 5.7292, - "step": 15045 - }, - { - "epoch": 7.846675358539765, - "grad_norm": 1.391097903251648, - "learning_rate": 8.538492462311557e-05, - "loss": 6.0522, - "step": 15046 - }, - { - "epoch": 7.847196870925685, - "grad_norm": 1.5544253587722778, - "learning_rate": 8.538391959798995e-05, - "loss": 5.4924, - "step": 15047 - }, - { - "epoch": 7.847718383311603, - "grad_norm": 1.4836848974227905, - "learning_rate": 8.538291457286433e-05, - "loss": 5.4672, - "step": 15048 - }, - { - "epoch": 7.848239895697523, - "grad_norm": 1.4115604162216187, - "learning_rate": 8.53819095477387e-05, - "loss": 5.5548, - "step": 15049 - }, - { - "epoch": 7.848761408083442, - "grad_norm": 1.4422063827514648, - "learning_rate": 8.538090452261307e-05, - "loss": 5.5241, - "step": 15050 - }, - { - "epoch": 7.849282920469361, - "grad_norm": 1.9059169292449951, - "learning_rate": 8.537989949748744e-05, - "loss": 5.2593, - "step": 15051 - }, - { - "epoch": 7.84980443285528, - "grad_norm": 1.6694374084472656, - "learning_rate": 8.537889447236181e-05, - "loss": 5.5679, - "step": 15052 - }, - { - "epoch": 7.8503259452412, - "grad_norm": 1.4957221746444702, - "learning_rate": 8.537788944723618e-05, - "loss": 5.1374, - "step": 15053 - }, - { - "epoch": 7.850847457627118, - "grad_norm": 1.4089535474777222, - "learning_rate": 8.537688442211056e-05, - "loss": 5.709, - "step": 15054 - }, - { - "epoch": 7.851368970013038, - "grad_norm": 1.2785613536834717, - "learning_rate": 8.537587939698492e-05, - "loss": 5.8871, - "step": 15055 - }, - { - "epoch": 7.851890482398957, - "grad_norm": 1.4616209268569946, - "learning_rate": 8.53748743718593e-05, - "loss": 5.6016, - "step": 15056 - }, - { - "epoch": 7.852411994784876, - "grad_norm": 1.4319452047348022, - "learning_rate": 8.537386934673368e-05, - "loss": 5.6519, - "step": 15057 - }, - { - "epoch": 7.852933507170795, - "grad_norm": 1.5094350576400757, - "learning_rate": 8.537286432160805e-05, - "loss": 5.0269, - "step": 15058 - }, - { - "epoch": 7.853455019556715, - "grad_norm": 1.488564372062683, - "learning_rate": 8.537185929648242e-05, - "loss": 5.4592, - "step": 15059 - }, - { - "epoch": 7.853976531942633, - "grad_norm": 1.3258891105651855, - "learning_rate": 8.53708542713568e-05, - "loss": 5.2956, - "step": 15060 - }, - { - "epoch": 7.854498044328553, - "grad_norm": 1.536194086074829, - "learning_rate": 8.536984924623116e-05, - "loss": 5.4889, - "step": 15061 - }, - { - "epoch": 7.855019556714472, - "grad_norm": 1.4162681102752686, - "learning_rate": 8.536884422110554e-05, - "loss": 5.9623, - "step": 15062 - }, - { - "epoch": 7.855541069100391, - "grad_norm": 1.4789527654647827, - "learning_rate": 8.53678391959799e-05, - "loss": 5.5669, - "step": 15063 - }, - { - "epoch": 7.85606258148631, - "grad_norm": 1.4228535890579224, - "learning_rate": 8.536683417085427e-05, - "loss": 5.0022, - "step": 15064 - }, - { - "epoch": 7.85658409387223, - "grad_norm": 1.528040885925293, - "learning_rate": 8.536582914572865e-05, - "loss": 5.5102, - "step": 15065 - }, - { - "epoch": 7.857105606258148, - "grad_norm": 1.3114972114562988, - "learning_rate": 8.536482412060301e-05, - "loss": 6.0327, - "step": 15066 - }, - { - "epoch": 7.857627118644068, - "grad_norm": 1.3635048866271973, - "learning_rate": 8.536381909547739e-05, - "loss": 5.7595, - "step": 15067 - }, - { - "epoch": 7.858148631029987, - "grad_norm": 1.3831486701965332, - "learning_rate": 8.536281407035177e-05, - "loss": 5.3657, - "step": 15068 - }, - { - "epoch": 7.858670143415906, - "grad_norm": 1.4145101308822632, - "learning_rate": 8.536180904522614e-05, - "loss": 5.8606, - "step": 15069 - }, - { - "epoch": 7.859191655801825, - "grad_norm": 1.4955402612686157, - "learning_rate": 8.536080402010051e-05, - "loss": 5.5973, - "step": 15070 - }, - { - "epoch": 7.859713168187745, - "grad_norm": 1.379913091659546, - "learning_rate": 8.535979899497489e-05, - "loss": 5.827, - "step": 15071 - }, - { - "epoch": 7.860234680573663, - "grad_norm": 1.4982719421386719, - "learning_rate": 8.535879396984925e-05, - "loss": 5.3341, - "step": 15072 - }, - { - "epoch": 7.860756192959583, - "grad_norm": 1.4902962446212769, - "learning_rate": 8.535778894472363e-05, - "loss": 5.4331, - "step": 15073 - }, - { - "epoch": 7.861277705345502, - "grad_norm": 1.463169813156128, - "learning_rate": 8.535678391959799e-05, - "loss": 5.4924, - "step": 15074 - }, - { - "epoch": 7.861799217731421, - "grad_norm": 1.4087530374526978, - "learning_rate": 8.535577889447237e-05, - "loss": 5.5269, - "step": 15075 - }, - { - "epoch": 7.86232073011734, - "grad_norm": 1.3833088874816895, - "learning_rate": 8.535477386934673e-05, - "loss": 5.557, - "step": 15076 - }, - { - "epoch": 7.86284224250326, - "grad_norm": 1.4652096033096313, - "learning_rate": 8.535376884422111e-05, - "loss": 5.6565, - "step": 15077 - }, - { - "epoch": 7.863363754889178, - "grad_norm": 1.461596965789795, - "learning_rate": 8.535276381909549e-05, - "loss": 5.86, - "step": 15078 - }, - { - "epoch": 7.863885267275098, - "grad_norm": 1.4087460041046143, - "learning_rate": 8.535175879396985e-05, - "loss": 5.4279, - "step": 15079 - }, - { - "epoch": 7.864406779661017, - "grad_norm": 1.4727836847305298, - "learning_rate": 8.535075376884423e-05, - "loss": 5.4585, - "step": 15080 - }, - { - "epoch": 7.864928292046936, - "grad_norm": 1.4426206350326538, - "learning_rate": 8.53497487437186e-05, - "loss": 5.5214, - "step": 15081 - }, - { - "epoch": 7.865449804432855, - "grad_norm": 1.5194305181503296, - "learning_rate": 8.534874371859297e-05, - "loss": 5.4417, - "step": 15082 - }, - { - "epoch": 7.865971316818775, - "grad_norm": 1.4611155986785889, - "learning_rate": 8.534773869346734e-05, - "loss": 5.8036, - "step": 15083 - }, - { - "epoch": 7.866492829204693, - "grad_norm": 1.2946679592132568, - "learning_rate": 8.534673366834172e-05, - "loss": 5.8575, - "step": 15084 - }, - { - "epoch": 7.867014341590613, - "grad_norm": 1.3799494504928589, - "learning_rate": 8.534572864321608e-05, - "loss": 5.5527, - "step": 15085 - }, - { - "epoch": 7.867535853976532, - "grad_norm": 1.3568342924118042, - "learning_rate": 8.534472361809046e-05, - "loss": 5.3962, - "step": 15086 - }, - { - "epoch": 7.868057366362451, - "grad_norm": 1.3647531270980835, - "learning_rate": 8.534371859296482e-05, - "loss": 5.8666, - "step": 15087 - }, - { - "epoch": 7.86857887874837, - "grad_norm": 1.4006540775299072, - "learning_rate": 8.53427135678392e-05, - "loss": 4.536, - "step": 15088 - }, - { - "epoch": 7.869100391134289, - "grad_norm": 1.3616218566894531, - "learning_rate": 8.534170854271358e-05, - "loss": 4.4246, - "step": 15089 - }, - { - "epoch": 7.869621903520208, - "grad_norm": 1.4096369743347168, - "learning_rate": 8.534070351758794e-05, - "loss": 5.2451, - "step": 15090 - }, - { - "epoch": 7.870143415906128, - "grad_norm": 1.5202655792236328, - "learning_rate": 8.533969849246232e-05, - "loss": 5.8854, - "step": 15091 - }, - { - "epoch": 7.870664928292047, - "grad_norm": 1.396000862121582, - "learning_rate": 8.533869346733668e-05, - "loss": 5.5469, - "step": 15092 - }, - { - "epoch": 7.871186440677966, - "grad_norm": 1.4148433208465576, - "learning_rate": 8.533768844221106e-05, - "loss": 5.707, - "step": 15093 - }, - { - "epoch": 7.871707953063885, - "grad_norm": 1.358188509941101, - "learning_rate": 8.533668341708543e-05, - "loss": 5.6775, - "step": 15094 - }, - { - "epoch": 7.872229465449804, - "grad_norm": 1.3573997020721436, - "learning_rate": 8.53356783919598e-05, - "loss": 5.9248, - "step": 15095 - }, - { - "epoch": 7.872750977835723, - "grad_norm": 1.4630261659622192, - "learning_rate": 8.533467336683417e-05, - "loss": 5.3223, - "step": 15096 - }, - { - "epoch": 7.873272490221643, - "grad_norm": 1.6047700643539429, - "learning_rate": 8.533366834170855e-05, - "loss": 5.0501, - "step": 15097 - }, - { - "epoch": 7.873794002607562, - "grad_norm": 1.3787583112716675, - "learning_rate": 8.533266331658292e-05, - "loss": 5.7106, - "step": 15098 - }, - { - "epoch": 7.874315514993481, - "grad_norm": 1.4319475889205933, - "learning_rate": 8.53316582914573e-05, - "loss": 5.7008, - "step": 15099 - }, - { - "epoch": 7.8748370273794, - "grad_norm": 1.501732587814331, - "learning_rate": 8.533065326633167e-05, - "loss": 5.1752, - "step": 15100 - }, - { - "epoch": 7.875358539765319, - "grad_norm": 1.4385813474655151, - "learning_rate": 8.532964824120604e-05, - "loss": 5.9613, - "step": 15101 - }, - { - "epoch": 7.875880052151238, - "grad_norm": 1.4393730163574219, - "learning_rate": 8.532864321608041e-05, - "loss": 5.453, - "step": 15102 - }, - { - "epoch": 7.876401564537158, - "grad_norm": 1.3807787895202637, - "learning_rate": 8.532763819095477e-05, - "loss": 5.4495, - "step": 15103 - }, - { - "epoch": 7.876923076923077, - "grad_norm": 1.4870423078536987, - "learning_rate": 8.532663316582915e-05, - "loss": 5.1602, - "step": 15104 - }, - { - "epoch": 7.877444589308996, - "grad_norm": 1.4968969821929932, - "learning_rate": 8.532562814070351e-05, - "loss": 5.196, - "step": 15105 - }, - { - "epoch": 7.877966101694915, - "grad_norm": 1.4364585876464844, - "learning_rate": 8.532462311557789e-05, - "loss": 5.4205, - "step": 15106 - }, - { - "epoch": 7.878487614080834, - "grad_norm": 1.5904603004455566, - "learning_rate": 8.532361809045226e-05, - "loss": 5.3567, - "step": 15107 - }, - { - "epoch": 7.879009126466753, - "grad_norm": 1.4066187143325806, - "learning_rate": 8.532261306532663e-05, - "loss": 5.4079, - "step": 15108 - }, - { - "epoch": 7.879530638852673, - "grad_norm": 1.4495388269424438, - "learning_rate": 8.532160804020101e-05, - "loss": 5.956, - "step": 15109 - }, - { - "epoch": 7.880052151238592, - "grad_norm": 1.6409300565719604, - "learning_rate": 8.532060301507539e-05, - "loss": 5.1347, - "step": 15110 - }, - { - "epoch": 7.880573663624511, - "grad_norm": 1.520734190940857, - "learning_rate": 8.531959798994975e-05, - "loss": 5.0023, - "step": 15111 - }, - { - "epoch": 7.88109517601043, - "grad_norm": 1.4600203037261963, - "learning_rate": 8.531859296482413e-05, - "loss": 5.5362, - "step": 15112 - }, - { - "epoch": 7.881616688396349, - "grad_norm": 1.3994910717010498, - "learning_rate": 8.53175879396985e-05, - "loss": 5.5924, - "step": 15113 - }, - { - "epoch": 7.8821382007822685, - "grad_norm": 1.2789793014526367, - "learning_rate": 8.531658291457287e-05, - "loss": 5.384, - "step": 15114 - }, - { - "epoch": 7.882659713168188, - "grad_norm": 1.4117274284362793, - "learning_rate": 8.531557788944724e-05, - "loss": 5.6372, - "step": 15115 - }, - { - "epoch": 7.883181225554107, - "grad_norm": 1.39469575881958, - "learning_rate": 8.53145728643216e-05, - "loss": 5.8044, - "step": 15116 - }, - { - "epoch": 7.883702737940026, - "grad_norm": 1.4595617055892944, - "learning_rate": 8.531356783919598e-05, - "loss": 5.55, - "step": 15117 - }, - { - "epoch": 7.884224250325945, - "grad_norm": 1.6286848783493042, - "learning_rate": 8.531256281407034e-05, - "loss": 4.5939, - "step": 15118 - }, - { - "epoch": 7.884745762711864, - "grad_norm": 1.3540353775024414, - "learning_rate": 8.531155778894472e-05, - "loss": 5.837, - "step": 15119 - }, - { - "epoch": 7.8852672750977835, - "grad_norm": 1.4304085969924927, - "learning_rate": 8.53105527638191e-05, - "loss": 5.3405, - "step": 15120 - }, - { - "epoch": 7.885788787483703, - "grad_norm": 1.350635051727295, - "learning_rate": 8.530954773869348e-05, - "loss": 5.5468, - "step": 15121 - }, - { - "epoch": 7.886310299869622, - "grad_norm": 1.5186350345611572, - "learning_rate": 8.530854271356784e-05, - "loss": 5.1778, - "step": 15122 - }, - { - "epoch": 7.886831812255541, - "grad_norm": 1.4620275497436523, - "learning_rate": 8.530753768844222e-05, - "loss": 5.5753, - "step": 15123 - }, - { - "epoch": 7.88735332464146, - "grad_norm": 1.4612905979156494, - "learning_rate": 8.530653266331658e-05, - "loss": 5.1761, - "step": 15124 - }, - { - "epoch": 7.887874837027379, - "grad_norm": 1.4512892961502075, - "learning_rate": 8.530552763819096e-05, - "loss": 5.5808, - "step": 15125 - }, - { - "epoch": 7.8883963494132985, - "grad_norm": 1.3227237462997437, - "learning_rate": 8.530452261306533e-05, - "loss": 4.9647, - "step": 15126 - }, - { - "epoch": 7.888917861799218, - "grad_norm": 1.318312406539917, - "learning_rate": 8.53035175879397e-05, - "loss": 5.7042, - "step": 15127 - }, - { - "epoch": 7.8894393741851365, - "grad_norm": 1.3888776302337646, - "learning_rate": 8.530251256281407e-05, - "loss": 5.8986, - "step": 15128 - }, - { - "epoch": 7.889960886571056, - "grad_norm": 1.4443271160125732, - "learning_rate": 8.530150753768845e-05, - "loss": 5.4043, - "step": 15129 - }, - { - "epoch": 7.8904823989569755, - "grad_norm": 1.4924161434173584, - "learning_rate": 8.530050251256282e-05, - "loss": 5.5869, - "step": 15130 - }, - { - "epoch": 7.891003911342894, - "grad_norm": 1.273324728012085, - "learning_rate": 8.529949748743719e-05, - "loss": 5.7011, - "step": 15131 - }, - { - "epoch": 7.8915254237288135, - "grad_norm": 1.4935715198516846, - "learning_rate": 8.529849246231157e-05, - "loss": 5.3492, - "step": 15132 - }, - { - "epoch": 7.892046936114733, - "grad_norm": 1.3872042894363403, - "learning_rate": 8.529748743718593e-05, - "loss": 5.7037, - "step": 15133 - }, - { - "epoch": 7.8925684485006515, - "grad_norm": 1.4551434516906738, - "learning_rate": 8.529648241206031e-05, - "loss": 5.3256, - "step": 15134 - }, - { - "epoch": 7.893089960886571, - "grad_norm": 1.3485249280929565, - "learning_rate": 8.529547738693467e-05, - "loss": 5.6967, - "step": 15135 - }, - { - "epoch": 7.8936114732724905, - "grad_norm": 1.5651288032531738, - "learning_rate": 8.529447236180905e-05, - "loss": 5.3123, - "step": 15136 - }, - { - "epoch": 7.894132985658409, - "grad_norm": 1.4877749681472778, - "learning_rate": 8.529346733668342e-05, - "loss": 5.5541, - "step": 15137 - }, - { - "epoch": 7.8946544980443285, - "grad_norm": 1.3793089389801025, - "learning_rate": 8.529246231155779e-05, - "loss": 5.8982, - "step": 15138 - }, - { - "epoch": 7.895176010430248, - "grad_norm": 1.5156059265136719, - "learning_rate": 8.529145728643216e-05, - "loss": 5.4214, - "step": 15139 - }, - { - "epoch": 7.8956975228161665, - "grad_norm": 1.454900860786438, - "learning_rate": 8.529045226130654e-05, - "loss": 5.7453, - "step": 15140 - }, - { - "epoch": 7.896219035202086, - "grad_norm": 1.4198014736175537, - "learning_rate": 8.528944723618091e-05, - "loss": 5.6704, - "step": 15141 - }, - { - "epoch": 7.8967405475880055, - "grad_norm": 1.3729057312011719, - "learning_rate": 8.528844221105528e-05, - "loss": 5.6246, - "step": 15142 - }, - { - "epoch": 7.897262059973924, - "grad_norm": 1.478641152381897, - "learning_rate": 8.528743718592965e-05, - "loss": 5.467, - "step": 15143 - }, - { - "epoch": 7.8977835723598435, - "grad_norm": 1.4752105474472046, - "learning_rate": 8.528643216080402e-05, - "loss": 5.2984, - "step": 15144 - }, - { - "epoch": 7.898305084745763, - "grad_norm": 1.5561600923538208, - "learning_rate": 8.52854271356784e-05, - "loss": 5.2162, - "step": 15145 - }, - { - "epoch": 7.898826597131682, - "grad_norm": 1.4026646614074707, - "learning_rate": 8.528442211055276e-05, - "loss": 5.1414, - "step": 15146 - }, - { - "epoch": 7.899348109517601, - "grad_norm": 1.4712929725646973, - "learning_rate": 8.528341708542714e-05, - "loss": 5.6309, - "step": 15147 - }, - { - "epoch": 7.8998696219035205, - "grad_norm": 1.3349515199661255, - "learning_rate": 8.52824120603015e-05, - "loss": 5.6453, - "step": 15148 - }, - { - "epoch": 7.900391134289439, - "grad_norm": 1.5216326713562012, - "learning_rate": 8.528140703517588e-05, - "loss": 5.1009, - "step": 15149 - }, - { - "epoch": 7.9009126466753585, - "grad_norm": 1.511669635772705, - "learning_rate": 8.528040201005026e-05, - "loss": 5.0473, - "step": 15150 - }, - { - "epoch": 7.901434159061278, - "grad_norm": 1.4194422960281372, - "learning_rate": 8.527939698492464e-05, - "loss": 5.5475, - "step": 15151 - }, - { - "epoch": 7.901955671447197, - "grad_norm": 1.4626944065093994, - "learning_rate": 8.5278391959799e-05, - "loss": 5.9754, - "step": 15152 - }, - { - "epoch": 7.902477183833116, - "grad_norm": 1.4818096160888672, - "learning_rate": 8.527738693467338e-05, - "loss": 5.3183, - "step": 15153 - }, - { - "epoch": 7.9029986962190355, - "grad_norm": 1.3330494165420532, - "learning_rate": 8.527638190954774e-05, - "loss": 5.7167, - "step": 15154 - }, - { - "epoch": 7.903520208604954, - "grad_norm": 1.3322207927703857, - "learning_rate": 8.527537688442212e-05, - "loss": 5.8472, - "step": 15155 - }, - { - "epoch": 7.9040417209908735, - "grad_norm": 1.426382303237915, - "learning_rate": 8.527437185929649e-05, - "loss": 5.7864, - "step": 15156 - }, - { - "epoch": 7.904563233376793, - "grad_norm": 1.382879376411438, - "learning_rate": 8.527336683417085e-05, - "loss": 5.6453, - "step": 15157 - }, - { - "epoch": 7.905084745762712, - "grad_norm": 1.369033932685852, - "learning_rate": 8.527236180904523e-05, - "loss": 5.5535, - "step": 15158 - }, - { - "epoch": 7.905606258148631, - "grad_norm": 1.4164831638336182, - "learning_rate": 8.527135678391959e-05, - "loss": 5.7967, - "step": 15159 - }, - { - "epoch": 7.9061277705345505, - "grad_norm": 1.4145272970199585, - "learning_rate": 8.527035175879397e-05, - "loss": 5.5071, - "step": 15160 - }, - { - "epoch": 7.906649282920469, - "grad_norm": 1.4025137424468994, - "learning_rate": 8.526934673366835e-05, - "loss": 5.9705, - "step": 15161 - }, - { - "epoch": 7.9071707953063886, - "grad_norm": 1.6208710670471191, - "learning_rate": 8.526834170854273e-05, - "loss": 4.9091, - "step": 15162 - }, - { - "epoch": 7.907692307692308, - "grad_norm": 1.4814720153808594, - "learning_rate": 8.526733668341709e-05, - "loss": 5.6513, - "step": 15163 - }, - { - "epoch": 7.908213820078227, - "grad_norm": 1.49767005443573, - "learning_rate": 8.526633165829147e-05, - "loss": 5.6786, - "step": 15164 - }, - { - "epoch": 7.908735332464146, - "grad_norm": 1.4896337985992432, - "learning_rate": 8.526532663316583e-05, - "loss": 5.8827, - "step": 15165 - }, - { - "epoch": 7.9092568448500655, - "grad_norm": 1.4572288990020752, - "learning_rate": 8.526432160804021e-05, - "loss": 5.241, - "step": 15166 - }, - { - "epoch": 7.909778357235984, - "grad_norm": 1.257376790046692, - "learning_rate": 8.526331658291457e-05, - "loss": 5.3694, - "step": 15167 - }, - { - "epoch": 7.910299869621904, - "grad_norm": 1.6026183366775513, - "learning_rate": 8.526231155778895e-05, - "loss": 5.2162, - "step": 15168 - }, - { - "epoch": 7.910821382007823, - "grad_norm": 1.5042181015014648, - "learning_rate": 8.526130653266332e-05, - "loss": 5.3845, - "step": 15169 - }, - { - "epoch": 7.911342894393742, - "grad_norm": 1.6723257303237915, - "learning_rate": 8.52603015075377e-05, - "loss": 5.4323, - "step": 15170 - }, - { - "epoch": 7.911864406779661, - "grad_norm": 1.412706732749939, - "learning_rate": 8.525929648241207e-05, - "loss": 5.3419, - "step": 15171 - }, - { - "epoch": 7.9123859191655805, - "grad_norm": 1.4601646661758423, - "learning_rate": 8.525829145728644e-05, - "loss": 5.8551, - "step": 15172 - }, - { - "epoch": 7.912907431551499, - "grad_norm": 1.5339144468307495, - "learning_rate": 8.525728643216081e-05, - "loss": 4.904, - "step": 15173 - }, - { - "epoch": 7.913428943937419, - "grad_norm": 1.5483357906341553, - "learning_rate": 8.525628140703518e-05, - "loss": 5.4289, - "step": 15174 - }, - { - "epoch": 7.913950456323338, - "grad_norm": 1.4401049613952637, - "learning_rate": 8.525527638190956e-05, - "loss": 5.8031, - "step": 15175 - }, - { - "epoch": 7.914471968709257, - "grad_norm": 1.4061312675476074, - "learning_rate": 8.525427135678392e-05, - "loss": 5.1282, - "step": 15176 - }, - { - "epoch": 7.914993481095176, - "grad_norm": 1.3567041158676147, - "learning_rate": 8.52532663316583e-05, - "loss": 5.6051, - "step": 15177 - }, - { - "epoch": 7.9155149934810956, - "grad_norm": 1.4950910806655884, - "learning_rate": 8.525226130653266e-05, - "loss": 5.5834, - "step": 15178 - }, - { - "epoch": 7.916036505867014, - "grad_norm": 1.4400360584259033, - "learning_rate": 8.525125628140704e-05, - "loss": 5.6582, - "step": 15179 - }, - { - "epoch": 7.916558018252934, - "grad_norm": 1.5387053489685059, - "learning_rate": 8.52502512562814e-05, - "loss": 5.2724, - "step": 15180 - }, - { - "epoch": 7.917079530638853, - "grad_norm": 1.4269241094589233, - "learning_rate": 8.524924623115578e-05, - "loss": 5.3669, - "step": 15181 - }, - { - "epoch": 7.917601043024772, - "grad_norm": 1.4306923151016235, - "learning_rate": 8.524824120603016e-05, - "loss": 5.446, - "step": 15182 - }, - { - "epoch": 7.918122555410691, - "grad_norm": 1.3901735544204712, - "learning_rate": 8.524723618090452e-05, - "loss": 5.4479, - "step": 15183 - }, - { - "epoch": 7.91864406779661, - "grad_norm": 1.539788007736206, - "learning_rate": 8.52462311557789e-05, - "loss": 5.4663, - "step": 15184 - }, - { - "epoch": 7.919165580182529, - "grad_norm": 1.4476512670516968, - "learning_rate": 8.524522613065327e-05, - "loss": 5.6078, - "step": 15185 - }, - { - "epoch": 7.919687092568449, - "grad_norm": 1.5279911756515503, - "learning_rate": 8.524422110552764e-05, - "loss": 5.214, - "step": 15186 - }, - { - "epoch": 7.920208604954368, - "grad_norm": 1.4672143459320068, - "learning_rate": 8.524321608040201e-05, - "loss": 5.6883, - "step": 15187 - }, - { - "epoch": 7.920730117340287, - "grad_norm": 1.6178632974624634, - "learning_rate": 8.524221105527639e-05, - "loss": 5.2662, - "step": 15188 - }, - { - "epoch": 7.921251629726206, - "grad_norm": 1.4577678442001343, - "learning_rate": 8.524120603015075e-05, - "loss": 5.2795, - "step": 15189 - }, - { - "epoch": 7.921773142112125, - "grad_norm": 1.5147814750671387, - "learning_rate": 8.524020100502513e-05, - "loss": 5.3702, - "step": 15190 - }, - { - "epoch": 7.922294654498044, - "grad_norm": 1.6658542156219482, - "learning_rate": 8.52391959798995e-05, - "loss": 4.8294, - "step": 15191 - }, - { - "epoch": 7.922816166883964, - "grad_norm": 1.4964237213134766, - "learning_rate": 8.523819095477388e-05, - "loss": 5.3909, - "step": 15192 - }, - { - "epoch": 7.923337679269883, - "grad_norm": 1.4004024267196655, - "learning_rate": 8.523718592964825e-05, - "loss": 5.0614, - "step": 15193 - }, - { - "epoch": 7.923859191655802, - "grad_norm": 1.4363291263580322, - "learning_rate": 8.523618090452263e-05, - "loss": 5.6777, - "step": 15194 - }, - { - "epoch": 7.924380704041721, - "grad_norm": 1.3250662088394165, - "learning_rate": 8.523517587939699e-05, - "loss": 5.894, - "step": 15195 - }, - { - "epoch": 7.92490221642764, - "grad_norm": 1.3632279634475708, - "learning_rate": 8.523417085427135e-05, - "loss": 5.8331, - "step": 15196 - }, - { - "epoch": 7.925423728813559, - "grad_norm": 1.5371427536010742, - "learning_rate": 8.523316582914573e-05, - "loss": 5.1806, - "step": 15197 - }, - { - "epoch": 7.925945241199479, - "grad_norm": 1.4039794206619263, - "learning_rate": 8.52321608040201e-05, - "loss": 5.5982, - "step": 15198 - }, - { - "epoch": 7.926466753585398, - "grad_norm": 1.425973892211914, - "learning_rate": 8.523115577889447e-05, - "loss": 5.7676, - "step": 15199 - }, - { - "epoch": 7.926988265971317, - "grad_norm": 1.4697774648666382, - "learning_rate": 8.523015075376884e-05, - "loss": 5.4149, - "step": 15200 - }, - { - "epoch": 7.927509778357236, - "grad_norm": 1.3331897258758545, - "learning_rate": 8.522914572864322e-05, - "loss": 5.9802, - "step": 15201 - }, - { - "epoch": 7.928031290743155, - "grad_norm": 1.3886746168136597, - "learning_rate": 8.52281407035176e-05, - "loss": 5.3149, - "step": 15202 - }, - { - "epoch": 7.928552803129074, - "grad_norm": 1.3997042179107666, - "learning_rate": 8.522713567839197e-05, - "loss": 5.9363, - "step": 15203 - }, - { - "epoch": 7.929074315514994, - "grad_norm": 1.5172057151794434, - "learning_rate": 8.522613065326634e-05, - "loss": 5.2101, - "step": 15204 - }, - { - "epoch": 7.929595827900913, - "grad_norm": 1.4293462038040161, - "learning_rate": 8.522512562814071e-05, - "loss": 5.5653, - "step": 15205 - }, - { - "epoch": 7.930117340286832, - "grad_norm": 1.517549753189087, - "learning_rate": 8.522412060301508e-05, - "loss": 5.2287, - "step": 15206 - }, - { - "epoch": 7.930638852672751, - "grad_norm": 1.5149362087249756, - "learning_rate": 8.522311557788946e-05, - "loss": 4.5762, - "step": 15207 - }, - { - "epoch": 7.93116036505867, - "grad_norm": 1.5136775970458984, - "learning_rate": 8.522211055276382e-05, - "loss": 5.4389, - "step": 15208 - }, - { - "epoch": 7.931681877444589, - "grad_norm": 1.54416823387146, - "learning_rate": 8.522110552763819e-05, - "loss": 5.1876, - "step": 15209 - }, - { - "epoch": 7.932203389830509, - "grad_norm": 1.5124694108963013, - "learning_rate": 8.522010050251256e-05, - "loss": 4.9563, - "step": 15210 - }, - { - "epoch": 7.932724902216428, - "grad_norm": 1.5802602767944336, - "learning_rate": 8.521909547738694e-05, - "loss": 4.8955, - "step": 15211 - }, - { - "epoch": 7.933246414602347, - "grad_norm": 1.409733533859253, - "learning_rate": 8.521809045226132e-05, - "loss": 5.9149, - "step": 15212 - }, - { - "epoch": 7.933767926988266, - "grad_norm": 1.3385579586029053, - "learning_rate": 8.521708542713568e-05, - "loss": 5.2984, - "step": 15213 - }, - { - "epoch": 7.934289439374185, - "grad_norm": 1.5719866752624512, - "learning_rate": 8.521608040201006e-05, - "loss": 4.4866, - "step": 15214 - }, - { - "epoch": 7.934810951760104, - "grad_norm": 1.3830783367156982, - "learning_rate": 8.521507537688442e-05, - "loss": 5.5335, - "step": 15215 - }, - { - "epoch": 7.935332464146024, - "grad_norm": 1.3493869304656982, - "learning_rate": 8.52140703517588e-05, - "loss": 5.1096, - "step": 15216 - }, - { - "epoch": 7.935853976531943, - "grad_norm": 1.394181728363037, - "learning_rate": 8.521306532663317e-05, - "loss": 5.7092, - "step": 15217 - }, - { - "epoch": 7.936375488917862, - "grad_norm": 1.3165771961212158, - "learning_rate": 8.521206030150754e-05, - "loss": 4.3173, - "step": 15218 - }, - { - "epoch": 7.936897001303781, - "grad_norm": 1.4170275926589966, - "learning_rate": 8.521105527638191e-05, - "loss": 5.4556, - "step": 15219 - }, - { - "epoch": 7.9374185136897, - "grad_norm": 1.3206665515899658, - "learning_rate": 8.521005025125629e-05, - "loss": 5.699, - "step": 15220 - }, - { - "epoch": 7.937940026075619, - "grad_norm": 1.4654545783996582, - "learning_rate": 8.520904522613065e-05, - "loss": 5.3956, - "step": 15221 - }, - { - "epoch": 7.938461538461539, - "grad_norm": 1.3279049396514893, - "learning_rate": 8.520804020100503e-05, - "loss": 5.9053, - "step": 15222 - }, - { - "epoch": 7.938983050847457, - "grad_norm": 1.6796492338180542, - "learning_rate": 8.520703517587941e-05, - "loss": 5.3831, - "step": 15223 - }, - { - "epoch": 7.939504563233377, - "grad_norm": 1.3637524843215942, - "learning_rate": 8.520603015075377e-05, - "loss": 5.6448, - "step": 15224 - }, - { - "epoch": 7.940026075619296, - "grad_norm": 1.4378116130828857, - "learning_rate": 8.520502512562815e-05, - "loss": 5.6229, - "step": 15225 - }, - { - "epoch": 7.940547588005215, - "grad_norm": 1.4071587324142456, - "learning_rate": 8.520402010050251e-05, - "loss": 5.7901, - "step": 15226 - }, - { - "epoch": 7.941069100391134, - "grad_norm": 1.399428367614746, - "learning_rate": 8.520301507537689e-05, - "loss": 5.3863, - "step": 15227 - }, - { - "epoch": 7.941590612777054, - "grad_norm": 1.6730403900146484, - "learning_rate": 8.520201005025126e-05, - "loss": 5.0204, - "step": 15228 - }, - { - "epoch": 7.942112125162972, - "grad_norm": 1.6284486055374146, - "learning_rate": 8.520100502512563e-05, - "loss": 4.9125, - "step": 15229 - }, - { - "epoch": 7.942633637548892, - "grad_norm": 1.3896194696426392, - "learning_rate": 8.52e-05, - "loss": 5.8081, - "step": 15230 - }, - { - "epoch": 7.943155149934811, - "grad_norm": 1.5234479904174805, - "learning_rate": 8.519899497487438e-05, - "loss": 5.317, - "step": 15231 - }, - { - "epoch": 7.94367666232073, - "grad_norm": 1.3589720726013184, - "learning_rate": 8.519798994974875e-05, - "loss": 5.8638, - "step": 15232 - }, - { - "epoch": 7.944198174706649, - "grad_norm": 1.5559979677200317, - "learning_rate": 8.519698492462313e-05, - "loss": 5.0755, - "step": 15233 - }, - { - "epoch": 7.944719687092569, - "grad_norm": 1.623522400856018, - "learning_rate": 8.51959798994975e-05, - "loss": 5.3817, - "step": 15234 - }, - { - "epoch": 7.945241199478487, - "grad_norm": 1.4860962629318237, - "learning_rate": 8.519497487437186e-05, - "loss": 5.4415, - "step": 15235 - }, - { - "epoch": 7.945762711864407, - "grad_norm": 1.375150442123413, - "learning_rate": 8.519396984924624e-05, - "loss": 5.8364, - "step": 15236 - }, - { - "epoch": 7.946284224250326, - "grad_norm": 1.5239845514297485, - "learning_rate": 8.51929648241206e-05, - "loss": 5.1835, - "step": 15237 - }, - { - "epoch": 7.946805736636245, - "grad_norm": 1.393642544746399, - "learning_rate": 8.519195979899498e-05, - "loss": 6.0036, - "step": 15238 - }, - { - "epoch": 7.947327249022164, - "grad_norm": 1.6617746353149414, - "learning_rate": 8.519095477386934e-05, - "loss": 5.0826, - "step": 15239 - }, - { - "epoch": 7.947848761408084, - "grad_norm": 1.4281734228134155, - "learning_rate": 8.518994974874372e-05, - "loss": 5.6755, - "step": 15240 - }, - { - "epoch": 7.948370273794002, - "grad_norm": 1.451132893562317, - "learning_rate": 8.518894472361809e-05, - "loss": 5.5549, - "step": 15241 - }, - { - "epoch": 7.948891786179922, - "grad_norm": 1.3334482908248901, - "learning_rate": 8.518793969849246e-05, - "loss": 5.3643, - "step": 15242 - }, - { - "epoch": 7.949413298565841, - "grad_norm": 1.4052555561065674, - "learning_rate": 8.518693467336684e-05, - "loss": 5.5226, - "step": 15243 - }, - { - "epoch": 7.94993481095176, - "grad_norm": 1.3117587566375732, - "learning_rate": 8.518592964824122e-05, - "loss": 5.2404, - "step": 15244 - }, - { - "epoch": 7.950456323337679, - "grad_norm": 1.4296751022338867, - "learning_rate": 8.518492462311558e-05, - "loss": 5.3118, - "step": 15245 - }, - { - "epoch": 7.950977835723599, - "grad_norm": 1.4126226902008057, - "learning_rate": 8.518391959798996e-05, - "loss": 5.6899, - "step": 15246 - }, - { - "epoch": 7.951499348109517, - "grad_norm": 1.4016408920288086, - "learning_rate": 8.518291457286433e-05, - "loss": 5.4898, - "step": 15247 - }, - { - "epoch": 7.952020860495437, - "grad_norm": 1.5280274152755737, - "learning_rate": 8.51819095477387e-05, - "loss": 5.7939, - "step": 15248 - }, - { - "epoch": 7.952542372881356, - "grad_norm": 1.5310653448104858, - "learning_rate": 8.518090452261307e-05, - "loss": 5.4623, - "step": 15249 - }, - { - "epoch": 7.953063885267275, - "grad_norm": 1.4558191299438477, - "learning_rate": 8.517989949748743e-05, - "loss": 5.7817, - "step": 15250 - }, - { - "epoch": 7.953585397653194, - "grad_norm": 1.4668272733688354, - "learning_rate": 8.517889447236181e-05, - "loss": 5.7941, - "step": 15251 - }, - { - "epoch": 7.954106910039114, - "grad_norm": 1.4545966386795044, - "learning_rate": 8.517788944723619e-05, - "loss": 5.5657, - "step": 15252 - }, - { - "epoch": 7.954628422425032, - "grad_norm": 1.4763559103012085, - "learning_rate": 8.517688442211057e-05, - "loss": 5.7165, - "step": 15253 - }, - { - "epoch": 7.955149934810952, - "grad_norm": 1.331691026687622, - "learning_rate": 8.517587939698493e-05, - "loss": 5.7758, - "step": 15254 - }, - { - "epoch": 7.955671447196871, - "grad_norm": 1.5719778537750244, - "learning_rate": 8.517487437185931e-05, - "loss": 5.1272, - "step": 15255 - }, - { - "epoch": 7.95619295958279, - "grad_norm": 1.61163330078125, - "learning_rate": 8.517386934673367e-05, - "loss": 5.4316, - "step": 15256 - }, - { - "epoch": 7.956714471968709, - "grad_norm": 1.4955809116363525, - "learning_rate": 8.517286432160805e-05, - "loss": 5.5667, - "step": 15257 - }, - { - "epoch": 7.957235984354629, - "grad_norm": 1.492393970489502, - "learning_rate": 8.517185929648241e-05, - "loss": 5.0672, - "step": 15258 - }, - { - "epoch": 7.957757496740547, - "grad_norm": 1.394473671913147, - "learning_rate": 8.517085427135679e-05, - "loss": 5.2745, - "step": 15259 - }, - { - "epoch": 7.958279009126467, - "grad_norm": 1.5231703519821167, - "learning_rate": 8.516984924623116e-05, - "loss": 5.2811, - "step": 15260 - }, - { - "epoch": 7.958800521512386, - "grad_norm": 1.4779448509216309, - "learning_rate": 8.516884422110553e-05, - "loss": 5.0882, - "step": 15261 - }, - { - "epoch": 7.959322033898305, - "grad_norm": 1.3228049278259277, - "learning_rate": 8.51678391959799e-05, - "loss": 5.9125, - "step": 15262 - }, - { - "epoch": 7.959843546284224, - "grad_norm": 1.433498740196228, - "learning_rate": 8.516683417085428e-05, - "loss": 5.6853, - "step": 15263 - }, - { - "epoch": 7.960365058670144, - "grad_norm": 1.2500802278518677, - "learning_rate": 8.516582914572865e-05, - "loss": 5.8688, - "step": 15264 - }, - { - "epoch": 7.960886571056062, - "grad_norm": 1.4490346908569336, - "learning_rate": 8.516482412060302e-05, - "loss": 5.6436, - "step": 15265 - }, - { - "epoch": 7.961408083441982, - "grad_norm": 1.535935878753662, - "learning_rate": 8.51638190954774e-05, - "loss": 5.4192, - "step": 15266 - }, - { - "epoch": 7.961929595827901, - "grad_norm": 1.5511960983276367, - "learning_rate": 8.516281407035176e-05, - "loss": 5.2033, - "step": 15267 - }, - { - "epoch": 7.96245110821382, - "grad_norm": 1.6857048273086548, - "learning_rate": 8.516180904522614e-05, - "loss": 4.7125, - "step": 15268 - }, - { - "epoch": 7.962972620599739, - "grad_norm": 1.3394516706466675, - "learning_rate": 8.51608040201005e-05, - "loss": 5.7048, - "step": 15269 - }, - { - "epoch": 7.963494132985659, - "grad_norm": 1.423370122909546, - "learning_rate": 8.515979899497488e-05, - "loss": 5.6357, - "step": 15270 - }, - { - "epoch": 7.964015645371577, - "grad_norm": 1.4120911359786987, - "learning_rate": 8.515879396984924e-05, - "loss": 5.558, - "step": 15271 - }, - { - "epoch": 7.964537157757497, - "grad_norm": 1.4099324941635132, - "learning_rate": 8.515778894472362e-05, - "loss": 5.4853, - "step": 15272 - }, - { - "epoch": 7.965058670143416, - "grad_norm": 1.3890247344970703, - "learning_rate": 8.515678391959799e-05, - "loss": 5.8892, - "step": 15273 - }, - { - "epoch": 7.965580182529335, - "grad_norm": 1.496289610862732, - "learning_rate": 8.515577889447236e-05, - "loss": 5.577, - "step": 15274 - }, - { - "epoch": 7.966101694915254, - "grad_norm": 1.3353215456008911, - "learning_rate": 8.515477386934674e-05, - "loss": 5.7682, - "step": 15275 - }, - { - "epoch": 7.966623207301174, - "grad_norm": 1.3960866928100586, - "learning_rate": 8.51537688442211e-05, - "loss": 5.8876, - "step": 15276 - }, - { - "epoch": 7.967144719687092, - "grad_norm": 1.4779362678527832, - "learning_rate": 8.515276381909548e-05, - "loss": 5.8568, - "step": 15277 - }, - { - "epoch": 7.967666232073012, - "grad_norm": 1.4771121740341187, - "learning_rate": 8.515175879396985e-05, - "loss": 5.7406, - "step": 15278 - }, - { - "epoch": 7.96818774445893, - "grad_norm": 1.441556453704834, - "learning_rate": 8.515075376884423e-05, - "loss": 5.584, - "step": 15279 - }, - { - "epoch": 7.96870925684485, - "grad_norm": 1.5398660898208618, - "learning_rate": 8.514974874371859e-05, - "loss": 5.6864, - "step": 15280 - }, - { - "epoch": 7.969230769230769, - "grad_norm": 1.3906174898147583, - "learning_rate": 8.514874371859297e-05, - "loss": 4.9637, - "step": 15281 - }, - { - "epoch": 7.969752281616689, - "grad_norm": 1.4767365455627441, - "learning_rate": 8.514773869346733e-05, - "loss": 5.6841, - "step": 15282 - }, - { - "epoch": 7.970273794002607, - "grad_norm": 1.4871493577957153, - "learning_rate": 8.514673366834171e-05, - "loss": 5.6768, - "step": 15283 - }, - { - "epoch": 7.970795306388527, - "grad_norm": 1.4496350288391113, - "learning_rate": 8.514572864321609e-05, - "loss": 5.7647, - "step": 15284 - }, - { - "epoch": 7.971316818774445, - "grad_norm": 1.571225643157959, - "learning_rate": 8.514472361809047e-05, - "loss": 5.3357, - "step": 15285 - }, - { - "epoch": 7.971838331160365, - "grad_norm": 1.4347412586212158, - "learning_rate": 8.514371859296483e-05, - "loss": 5.4819, - "step": 15286 - }, - { - "epoch": 7.972359843546284, - "grad_norm": 1.4508397579193115, - "learning_rate": 8.514271356783921e-05, - "loss": 5.5535, - "step": 15287 - }, - { - "epoch": 7.972881355932204, - "grad_norm": 1.8567384481430054, - "learning_rate": 8.514170854271357e-05, - "loss": 5.2025, - "step": 15288 - }, - { - "epoch": 7.973402868318122, - "grad_norm": 1.5012658834457397, - "learning_rate": 8.514070351758794e-05, - "loss": 5.6018, - "step": 15289 - }, - { - "epoch": 7.973924380704042, - "grad_norm": 1.3899730443954468, - "learning_rate": 8.513969849246231e-05, - "loss": 5.4807, - "step": 15290 - }, - { - "epoch": 7.97444589308996, - "grad_norm": 1.4612985849380493, - "learning_rate": 8.513869346733668e-05, - "loss": 5.4457, - "step": 15291 - }, - { - "epoch": 7.97496740547588, - "grad_norm": 1.4013341665267944, - "learning_rate": 8.513768844221106e-05, - "loss": 5.5916, - "step": 15292 - }, - { - "epoch": 7.975488917861799, - "grad_norm": 1.5835975408554077, - "learning_rate": 8.513668341708542e-05, - "loss": 5.4713, - "step": 15293 - }, - { - "epoch": 7.976010430247719, - "grad_norm": 1.3606103658676147, - "learning_rate": 8.51356783919598e-05, - "loss": 5.7688, - "step": 15294 - }, - { - "epoch": 7.976531942633637, - "grad_norm": 1.4576640129089355, - "learning_rate": 8.513467336683418e-05, - "loss": 5.743, - "step": 15295 - }, - { - "epoch": 7.977053455019557, - "grad_norm": 1.417160987854004, - "learning_rate": 8.513366834170855e-05, - "loss": 5.8171, - "step": 15296 - }, - { - "epoch": 7.9775749674054754, - "grad_norm": 1.2952017784118652, - "learning_rate": 8.513266331658292e-05, - "loss": 5.8054, - "step": 15297 - }, - { - "epoch": 7.978096479791395, - "grad_norm": 1.3735994100570679, - "learning_rate": 8.51316582914573e-05, - "loss": 5.3766, - "step": 15298 - }, - { - "epoch": 7.978617992177314, - "grad_norm": 1.5863746404647827, - "learning_rate": 8.513065326633166e-05, - "loss": 5.2855, - "step": 15299 - }, - { - "epoch": 7.979139504563234, - "grad_norm": 1.5093586444854736, - "learning_rate": 8.512964824120604e-05, - "loss": 5.7473, - "step": 15300 - }, - { - "epoch": 7.979661016949152, - "grad_norm": 1.3143222332000732, - "learning_rate": 8.51286432160804e-05, - "loss": 5.46, - "step": 15301 - }, - { - "epoch": 7.980182529335072, - "grad_norm": 1.551090121269226, - "learning_rate": 8.512763819095477e-05, - "loss": 5.1317, - "step": 15302 - }, - { - "epoch": 7.9807040417209905, - "grad_norm": 1.493478536605835, - "learning_rate": 8.512663316582915e-05, - "loss": 5.3051, - "step": 15303 - }, - { - "epoch": 7.98122555410691, - "grad_norm": 1.488033413887024, - "learning_rate": 8.512562814070352e-05, - "loss": 5.6546, - "step": 15304 - }, - { - "epoch": 7.981747066492829, - "grad_norm": 1.5889873504638672, - "learning_rate": 8.51246231155779e-05, - "loss": 4.5752, - "step": 15305 - }, - { - "epoch": 7.982268578878749, - "grad_norm": 1.4546301364898682, - "learning_rate": 8.512361809045227e-05, - "loss": 5.2777, - "step": 15306 - }, - { - "epoch": 7.982790091264667, - "grad_norm": 1.295559048652649, - "learning_rate": 8.512261306532664e-05, - "loss": 5.8592, - "step": 15307 - }, - { - "epoch": 7.983311603650587, - "grad_norm": 1.630520224571228, - "learning_rate": 8.512160804020101e-05, - "loss": 5.3112, - "step": 15308 - }, - { - "epoch": 7.9838331160365055, - "grad_norm": 1.4489284753799438, - "learning_rate": 8.512060301507539e-05, - "loss": 5.6933, - "step": 15309 - }, - { - "epoch": 7.984354628422425, - "grad_norm": 1.3093754053115845, - "learning_rate": 8.511959798994975e-05, - "loss": 5.8363, - "step": 15310 - }, - { - "epoch": 7.984876140808344, - "grad_norm": 1.3743376731872559, - "learning_rate": 8.511859296482413e-05, - "loss": 5.2697, - "step": 15311 - }, - { - "epoch": 7.985397653194263, - "grad_norm": 1.4283168315887451, - "learning_rate": 8.511758793969849e-05, - "loss": 5.7218, - "step": 15312 - }, - { - "epoch": 7.985919165580182, - "grad_norm": 1.4341809749603271, - "learning_rate": 8.511658291457287e-05, - "loss": 5.497, - "step": 15313 - }, - { - "epoch": 7.986440677966102, - "grad_norm": 1.3436769247055054, - "learning_rate": 8.511557788944723e-05, - "loss": 5.5615, - "step": 15314 - }, - { - "epoch": 7.9869621903520205, - "grad_norm": 1.420452356338501, - "learning_rate": 8.511457286432161e-05, - "loss": 5.3189, - "step": 15315 - }, - { - "epoch": 7.98748370273794, - "grad_norm": 1.559373140335083, - "learning_rate": 8.511356783919599e-05, - "loss": 5.6767, - "step": 15316 - }, - { - "epoch": 7.988005215123859, - "grad_norm": 1.3048617839813232, - "learning_rate": 8.511256281407035e-05, - "loss": 6.0294, - "step": 15317 - }, - { - "epoch": 7.988526727509778, - "grad_norm": 1.6018431186676025, - "learning_rate": 8.511155778894473e-05, - "loss": 5.421, - "step": 15318 - }, - { - "epoch": 7.9890482398956975, - "grad_norm": 1.4375603199005127, - "learning_rate": 8.51105527638191e-05, - "loss": 5.8983, - "step": 15319 - }, - { - "epoch": 7.989569752281617, - "grad_norm": 1.5743921995162964, - "learning_rate": 8.510954773869347e-05, - "loss": 5.5944, - "step": 15320 - }, - { - "epoch": 7.9900912646675355, - "grad_norm": 1.795737385749817, - "learning_rate": 8.510854271356784e-05, - "loss": 5.2687, - "step": 15321 - }, - { - "epoch": 7.990612777053455, - "grad_norm": 1.385797381401062, - "learning_rate": 8.510753768844222e-05, - "loss": 5.6302, - "step": 15322 - }, - { - "epoch": 7.991134289439374, - "grad_norm": 1.385337233543396, - "learning_rate": 8.510653266331658e-05, - "loss": 5.7713, - "step": 15323 - }, - { - "epoch": 7.991655801825293, - "grad_norm": 1.3908530473709106, - "learning_rate": 8.510552763819096e-05, - "loss": 5.4136, - "step": 15324 - }, - { - "epoch": 7.9921773142112125, - "grad_norm": 1.4376829862594604, - "learning_rate": 8.510452261306534e-05, - "loss": 5.673, - "step": 15325 - }, - { - "epoch": 7.992698826597132, - "grad_norm": 1.315650463104248, - "learning_rate": 8.510351758793971e-05, - "loss": 6.0592, - "step": 15326 - }, - { - "epoch": 7.9932203389830505, - "grad_norm": 1.445202112197876, - "learning_rate": 8.510251256281408e-05, - "loss": 5.3184, - "step": 15327 - }, - { - "epoch": 7.99374185136897, - "grad_norm": 1.4960308074951172, - "learning_rate": 8.510150753768844e-05, - "loss": 5.4449, - "step": 15328 - }, - { - "epoch": 7.994263363754889, - "grad_norm": 1.4911420345306396, - "learning_rate": 8.510050251256282e-05, - "loss": 5.1792, - "step": 15329 - }, - { - "epoch": 7.994784876140808, - "grad_norm": 1.3966928720474243, - "learning_rate": 8.509949748743718e-05, - "loss": 5.6183, - "step": 15330 - }, - { - "epoch": 7.9953063885267275, - "grad_norm": 1.4473296403884888, - "learning_rate": 8.509849246231156e-05, - "loss": 5.5198, - "step": 15331 - }, - { - "epoch": 7.995827900912647, - "grad_norm": 1.639607310295105, - "learning_rate": 8.509748743718593e-05, - "loss": 5.4604, - "step": 15332 - }, - { - "epoch": 7.9963494132985655, - "grad_norm": 1.513789415359497, - "learning_rate": 8.50964824120603e-05, - "loss": 5.764, - "step": 15333 - }, - { - "epoch": 7.996870925684485, - "grad_norm": 1.4958255290985107, - "learning_rate": 8.509547738693467e-05, - "loss": 5.0624, - "step": 15334 - }, - { - "epoch": 7.9973924380704045, - "grad_norm": 1.4431947469711304, - "learning_rate": 8.509447236180905e-05, - "loss": 5.7611, - "step": 15335 - }, - { - "epoch": 7.997913950456323, - "grad_norm": 1.5927302837371826, - "learning_rate": 8.509346733668342e-05, - "loss": 5.2051, - "step": 15336 - }, - { - "epoch": 7.9984354628422425, - "grad_norm": 1.6910638809204102, - "learning_rate": 8.50924623115578e-05, - "loss": 5.5017, - "step": 15337 - }, - { - "epoch": 7.998956975228162, - "grad_norm": 1.3044342994689941, - "learning_rate": 8.509145728643217e-05, - "loss": 5.9232, - "step": 15338 - }, - { - "epoch": 7.9994784876140805, - "grad_norm": 1.3677867650985718, - "learning_rate": 8.509045226130654e-05, - "loss": 5.5916, - "step": 15339 - }, - { - "epoch": 8.0, - "grad_norm": 1.5175011157989502, - "learning_rate": 8.508944723618091e-05, - "loss": 5.7852, - "step": 15340 - }, - { - "epoch": 8.00052151238592, - "grad_norm": 1.390304684638977, - "learning_rate": 8.508844221105529e-05, - "loss": 5.5628, - "step": 15341 - }, - { - "epoch": 8.001043024771839, - "grad_norm": 1.446747064590454, - "learning_rate": 8.508743718592965e-05, - "loss": 5.1767, - "step": 15342 - }, - { - "epoch": 8.001564537157757, - "grad_norm": 1.4341310262680054, - "learning_rate": 8.508643216080401e-05, - "loss": 5.7307, - "step": 15343 - }, - { - "epoch": 8.002086049543676, - "grad_norm": 1.3088773488998413, - "learning_rate": 8.508542713567839e-05, - "loss": 5.832, - "step": 15344 - }, - { - "epoch": 8.002607561929596, - "grad_norm": 1.376611351966858, - "learning_rate": 8.508442211055277e-05, - "loss": 5.71, - "step": 15345 - }, - { - "epoch": 8.003129074315515, - "grad_norm": 1.4603737592697144, - "learning_rate": 8.508341708542715e-05, - "loss": 5.3734, - "step": 15346 - }, - { - "epoch": 8.003650586701434, - "grad_norm": 1.4773428440093994, - "learning_rate": 8.508241206030151e-05, - "loss": 5.7034, - "step": 15347 - }, - { - "epoch": 8.004172099087354, - "grad_norm": 1.3950238227844238, - "learning_rate": 8.508140703517589e-05, - "loss": 5.489, - "step": 15348 - }, - { - "epoch": 8.004693611473272, - "grad_norm": 1.3618615865707397, - "learning_rate": 8.508040201005025e-05, - "loss": 5.4327, - "step": 15349 - }, - { - "epoch": 8.005215123859191, - "grad_norm": 1.5947985649108887, - "learning_rate": 8.507939698492463e-05, - "loss": 5.4367, - "step": 15350 - }, - { - "epoch": 8.00573663624511, - "grad_norm": 1.4083995819091797, - "learning_rate": 8.5078391959799e-05, - "loss": 5.853, - "step": 15351 - }, - { - "epoch": 8.00625814863103, - "grad_norm": 1.5465725660324097, - "learning_rate": 8.507738693467337e-05, - "loss": 5.5842, - "step": 15352 - }, - { - "epoch": 8.00677966101695, - "grad_norm": 1.5305765867233276, - "learning_rate": 8.507638190954774e-05, - "loss": 5.7323, - "step": 15353 - }, - { - "epoch": 8.007301173402869, - "grad_norm": 1.5905872583389282, - "learning_rate": 8.507537688442212e-05, - "loss": 5.4104, - "step": 15354 - }, - { - "epoch": 8.007822685788787, - "grad_norm": 1.66057550907135, - "learning_rate": 8.507437185929648e-05, - "loss": 5.3539, - "step": 15355 - }, - { - "epoch": 8.008344198174706, - "grad_norm": 1.5634052753448486, - "learning_rate": 8.507336683417086e-05, - "loss": 5.3184, - "step": 15356 - }, - { - "epoch": 8.008865710560626, - "grad_norm": 1.5202713012695312, - "learning_rate": 8.507236180904524e-05, - "loss": 5.5682, - "step": 15357 - }, - { - "epoch": 8.009387222946545, - "grad_norm": 1.3338435888290405, - "learning_rate": 8.50713567839196e-05, - "loss": 5.784, - "step": 15358 - }, - { - "epoch": 8.009908735332465, - "grad_norm": 1.3778048753738403, - "learning_rate": 8.507035175879398e-05, - "loss": 5.3451, - "step": 15359 - }, - { - "epoch": 8.010430247718384, - "grad_norm": 1.5769524574279785, - "learning_rate": 8.506934673366834e-05, - "loss": 5.4301, - "step": 15360 - }, - { - "epoch": 8.010951760104302, - "grad_norm": 1.6457512378692627, - "learning_rate": 8.506834170854272e-05, - "loss": 5.336, - "step": 15361 - }, - { - "epoch": 8.011473272490221, - "grad_norm": 1.4528099298477173, - "learning_rate": 8.506733668341708e-05, - "loss": 5.4424, - "step": 15362 - }, - { - "epoch": 8.01199478487614, - "grad_norm": 1.3624287843704224, - "learning_rate": 8.506633165829146e-05, - "loss": 5.1465, - "step": 15363 - }, - { - "epoch": 8.01251629726206, - "grad_norm": 1.390187382698059, - "learning_rate": 8.506532663316583e-05, - "loss": 5.8776, - "step": 15364 - }, - { - "epoch": 8.01303780964798, - "grad_norm": 1.5628948211669922, - "learning_rate": 8.50643216080402e-05, - "loss": 5.3956, - "step": 15365 - }, - { - "epoch": 8.013559322033899, - "grad_norm": 1.4415775537490845, - "learning_rate": 8.506331658291458e-05, - "loss": 5.8267, - "step": 15366 - }, - { - "epoch": 8.014080834419817, - "grad_norm": 1.4305692911148071, - "learning_rate": 8.506231155778896e-05, - "loss": 5.6353, - "step": 15367 - }, - { - "epoch": 8.014602346805736, - "grad_norm": 1.8618881702423096, - "learning_rate": 8.506130653266332e-05, - "loss": 4.9697, - "step": 15368 - }, - { - "epoch": 8.015123859191656, - "grad_norm": 1.418805480003357, - "learning_rate": 8.506030150753769e-05, - "loss": 5.7107, - "step": 15369 - }, - { - "epoch": 8.015645371577575, - "grad_norm": 1.3926386833190918, - "learning_rate": 8.505929648241207e-05, - "loss": 5.7812, - "step": 15370 - }, - { - "epoch": 8.016166883963495, - "grad_norm": 1.564953327178955, - "learning_rate": 8.505829145728643e-05, - "loss": 5.0464, - "step": 15371 - }, - { - "epoch": 8.016688396349414, - "grad_norm": 1.4493554830551147, - "learning_rate": 8.505728643216081e-05, - "loss": 5.635, - "step": 15372 - }, - { - "epoch": 8.017209908735332, - "grad_norm": 1.5577951669692993, - "learning_rate": 8.505628140703517e-05, - "loss": 4.5504, - "step": 15373 - }, - { - "epoch": 8.017731421121251, - "grad_norm": 1.4878005981445312, - "learning_rate": 8.505527638190955e-05, - "loss": 5.5053, - "step": 15374 - }, - { - "epoch": 8.01825293350717, - "grad_norm": 1.4111437797546387, - "learning_rate": 8.505427135678392e-05, - "loss": 5.8786, - "step": 15375 - }, - { - "epoch": 8.01877444589309, - "grad_norm": 1.3699852228164673, - "learning_rate": 8.505326633165829e-05, - "loss": 5.6801, - "step": 15376 - }, - { - "epoch": 8.01929595827901, - "grad_norm": 1.3855986595153809, - "learning_rate": 8.505226130653267e-05, - "loss": 5.524, - "step": 15377 - }, - { - "epoch": 8.019817470664929, - "grad_norm": 1.4664092063903809, - "learning_rate": 8.505125628140705e-05, - "loss": 5.5515, - "step": 15378 - }, - { - "epoch": 8.020338983050847, - "grad_norm": 1.5992985963821411, - "learning_rate": 8.505025125628141e-05, - "loss": 5.0621, - "step": 15379 - }, - { - "epoch": 8.020860495436766, - "grad_norm": 1.3935317993164062, - "learning_rate": 8.504924623115579e-05, - "loss": 5.9358, - "step": 15380 - }, - { - "epoch": 8.021382007822686, - "grad_norm": 1.2516634464263916, - "learning_rate": 8.504824120603015e-05, - "loss": 5.0581, - "step": 15381 - }, - { - "epoch": 8.021903520208605, - "grad_norm": 1.3793138265609741, - "learning_rate": 8.504723618090452e-05, - "loss": 5.7427, - "step": 15382 - }, - { - "epoch": 8.022425032594525, - "grad_norm": 1.437124490737915, - "learning_rate": 8.50462311557789e-05, - "loss": 5.6964, - "step": 15383 - }, - { - "epoch": 8.022946544980444, - "grad_norm": 1.5285338163375854, - "learning_rate": 8.504522613065326e-05, - "loss": 5.3843, - "step": 15384 - }, - { - "epoch": 8.023468057366362, - "grad_norm": 1.4900380373001099, - "learning_rate": 8.504422110552764e-05, - "loss": 5.5768, - "step": 15385 - }, - { - "epoch": 8.023989569752281, - "grad_norm": 1.3816604614257812, - "learning_rate": 8.504321608040202e-05, - "loss": 5.6575, - "step": 15386 - }, - { - "epoch": 8.0245110821382, - "grad_norm": 1.5043259859085083, - "learning_rate": 8.50422110552764e-05, - "loss": 4.5592, - "step": 15387 - }, - { - "epoch": 8.02503259452412, - "grad_norm": 1.421183705329895, - "learning_rate": 8.504120603015076e-05, - "loss": 5.6438, - "step": 15388 - }, - { - "epoch": 8.02555410691004, - "grad_norm": 1.2909908294677734, - "learning_rate": 8.504020100502514e-05, - "loss": 5.9081, - "step": 15389 - }, - { - "epoch": 8.026075619295959, - "grad_norm": 1.41033935546875, - "learning_rate": 8.50391959798995e-05, - "loss": 5.3814, - "step": 15390 - }, - { - "epoch": 8.026597131681877, - "grad_norm": 1.4824128150939941, - "learning_rate": 8.503819095477388e-05, - "loss": 5.3274, - "step": 15391 - }, - { - "epoch": 8.027118644067796, - "grad_norm": 1.3800396919250488, - "learning_rate": 8.503718592964824e-05, - "loss": 5.5972, - "step": 15392 - }, - { - "epoch": 8.027640156453716, - "grad_norm": 1.3465046882629395, - "learning_rate": 8.503618090452262e-05, - "loss": 5.9861, - "step": 15393 - }, - { - "epoch": 8.028161668839635, - "grad_norm": 1.3072959184646606, - "learning_rate": 8.503517587939699e-05, - "loss": 5.6239, - "step": 15394 - }, - { - "epoch": 8.028683181225555, - "grad_norm": 1.6013771295547485, - "learning_rate": 8.503417085427135e-05, - "loss": 4.7791, - "step": 15395 - }, - { - "epoch": 8.029204693611474, - "grad_norm": 1.400961995124817, - "learning_rate": 8.503316582914573e-05, - "loss": 5.5055, - "step": 15396 - }, - { - "epoch": 8.029726205997392, - "grad_norm": 1.4158071279525757, - "learning_rate": 8.50321608040201e-05, - "loss": 5.3556, - "step": 15397 - }, - { - "epoch": 8.030247718383311, - "grad_norm": 1.4595032930374146, - "learning_rate": 8.503115577889448e-05, - "loss": 5.0704, - "step": 15398 - }, - { - "epoch": 8.03076923076923, - "grad_norm": 1.455165982246399, - "learning_rate": 8.503015075376885e-05, - "loss": 5.6415, - "step": 15399 - }, - { - "epoch": 8.03129074315515, - "grad_norm": 1.4311209917068481, - "learning_rate": 8.502914572864323e-05, - "loss": 5.5771, - "step": 15400 - }, - { - "epoch": 8.03181225554107, - "grad_norm": 1.413723349571228, - "learning_rate": 8.502814070351759e-05, - "loss": 5.4104, - "step": 15401 - }, - { - "epoch": 8.032333767926989, - "grad_norm": 1.4406311511993408, - "learning_rate": 8.502713567839197e-05, - "loss": 5.5958, - "step": 15402 - }, - { - "epoch": 8.032855280312907, - "grad_norm": 1.378806471824646, - "learning_rate": 8.502613065326633e-05, - "loss": 5.4055, - "step": 15403 - }, - { - "epoch": 8.033376792698826, - "grad_norm": 1.4509977102279663, - "learning_rate": 8.502512562814071e-05, - "loss": 5.4141, - "step": 15404 - }, - { - "epoch": 8.033898305084746, - "grad_norm": 1.394758701324463, - "learning_rate": 8.502412060301507e-05, - "loss": 5.4701, - "step": 15405 - }, - { - "epoch": 8.034419817470665, - "grad_norm": 1.471449613571167, - "learning_rate": 8.502311557788945e-05, - "loss": 5.3859, - "step": 15406 - }, - { - "epoch": 8.034941329856585, - "grad_norm": 1.5800950527191162, - "learning_rate": 8.502211055276383e-05, - "loss": 5.5598, - "step": 15407 - }, - { - "epoch": 8.035462842242504, - "grad_norm": 1.5112214088439941, - "learning_rate": 8.50211055276382e-05, - "loss": 5.134, - "step": 15408 - }, - { - "epoch": 8.035984354628422, - "grad_norm": 1.603190541267395, - "learning_rate": 8.502010050251257e-05, - "loss": 4.6321, - "step": 15409 - }, - { - "epoch": 8.036505867014341, - "grad_norm": 1.8280936479568481, - "learning_rate": 8.501909547738694e-05, - "loss": 5.029, - "step": 15410 - }, - { - "epoch": 8.03702737940026, - "grad_norm": 1.4177597761154175, - "learning_rate": 8.501809045226131e-05, - "loss": 5.2904, - "step": 15411 - }, - { - "epoch": 8.03754889178618, - "grad_norm": 1.3215174674987793, - "learning_rate": 8.501708542713568e-05, - "loss": 5.94, - "step": 15412 - }, - { - "epoch": 8.0380704041721, - "grad_norm": 1.4715266227722168, - "learning_rate": 8.501608040201006e-05, - "loss": 5.3254, - "step": 15413 - }, - { - "epoch": 8.038591916558019, - "grad_norm": 1.523896336555481, - "learning_rate": 8.501507537688442e-05, - "loss": 4.7039, - "step": 15414 - }, - { - "epoch": 8.039113428943937, - "grad_norm": 1.4576537609100342, - "learning_rate": 8.50140703517588e-05, - "loss": 5.1779, - "step": 15415 - }, - { - "epoch": 8.039634941329856, - "grad_norm": 1.5641121864318848, - "learning_rate": 8.501306532663316e-05, - "loss": 5.3889, - "step": 15416 - }, - { - "epoch": 8.040156453715776, - "grad_norm": 1.4114723205566406, - "learning_rate": 8.501206030150754e-05, - "loss": 5.658, - "step": 15417 - }, - { - "epoch": 8.040677966101695, - "grad_norm": 1.432917833328247, - "learning_rate": 8.501105527638192e-05, - "loss": 5.6712, - "step": 15418 - }, - { - "epoch": 8.041199478487615, - "grad_norm": 1.4624650478363037, - "learning_rate": 8.50100502512563e-05, - "loss": 5.5913, - "step": 15419 - }, - { - "epoch": 8.041720990873534, - "grad_norm": 1.6010112762451172, - "learning_rate": 8.500904522613066e-05, - "loss": 5.4416, - "step": 15420 - }, - { - "epoch": 8.042242503259452, - "grad_norm": 1.4051388502120972, - "learning_rate": 8.500804020100504e-05, - "loss": 5.8383, - "step": 15421 - }, - { - "epoch": 8.042764015645371, - "grad_norm": 1.410883903503418, - "learning_rate": 8.50070351758794e-05, - "loss": 5.663, - "step": 15422 - }, - { - "epoch": 8.04328552803129, - "grad_norm": 1.731884241104126, - "learning_rate": 8.500603015075377e-05, - "loss": 4.7957, - "step": 15423 - }, - { - "epoch": 8.04380704041721, - "grad_norm": 2.5581672191619873, - "learning_rate": 8.500502512562814e-05, - "loss": 5.4425, - "step": 15424 - }, - { - "epoch": 8.04432855280313, - "grad_norm": 1.8473352193832397, - "learning_rate": 8.500402010050251e-05, - "loss": 5.0052, - "step": 15425 - }, - { - "epoch": 8.044850065189049, - "grad_norm": 1.5868275165557861, - "learning_rate": 8.500301507537689e-05, - "loss": 5.5932, - "step": 15426 - }, - { - "epoch": 8.045371577574967, - "grad_norm": 1.4267325401306152, - "learning_rate": 8.500201005025126e-05, - "loss": 5.4899, - "step": 15427 - }, - { - "epoch": 8.045893089960886, - "grad_norm": 1.4029301404953003, - "learning_rate": 8.500100502512564e-05, - "loss": 5.5891, - "step": 15428 - }, - { - "epoch": 8.046414602346806, - "grad_norm": 1.5177831649780273, - "learning_rate": 8.5e-05, - "loss": 5.2651, - "step": 15429 - }, - { - "epoch": 8.046936114732725, - "grad_norm": 1.4123040437698364, - "learning_rate": 8.499899497487438e-05, - "loss": 5.718, - "step": 15430 - }, - { - "epoch": 8.047457627118645, - "grad_norm": 1.53366219997406, - "learning_rate": 8.499798994974875e-05, - "loss": 5.3367, - "step": 15431 - }, - { - "epoch": 8.047979139504564, - "grad_norm": 1.4641448259353638, - "learning_rate": 8.499698492462313e-05, - "loss": 5.4563, - "step": 15432 - }, - { - "epoch": 8.048500651890482, - "grad_norm": 1.5163789987564087, - "learning_rate": 8.499597989949749e-05, - "loss": 5.3743, - "step": 15433 - }, - { - "epoch": 8.049022164276401, - "grad_norm": 1.433473825454712, - "learning_rate": 8.499497487437187e-05, - "loss": 5.7143, - "step": 15434 - }, - { - "epoch": 8.04954367666232, - "grad_norm": 1.4664863348007202, - "learning_rate": 8.499396984924623e-05, - "loss": 5.6089, - "step": 15435 - }, - { - "epoch": 8.05006518904824, - "grad_norm": 1.5481555461883545, - "learning_rate": 8.49929648241206e-05, - "loss": 5.7872, - "step": 15436 - }, - { - "epoch": 8.05058670143416, - "grad_norm": 1.5192092657089233, - "learning_rate": 8.499195979899497e-05, - "loss": 5.8185, - "step": 15437 - }, - { - "epoch": 8.051108213820077, - "grad_norm": 1.4179058074951172, - "learning_rate": 8.499095477386935e-05, - "loss": 5.7085, - "step": 15438 - }, - { - "epoch": 8.051629726205997, - "grad_norm": 1.5664829015731812, - "learning_rate": 8.498994974874373e-05, - "loss": 5.1257, - "step": 15439 - }, - { - "epoch": 8.052151238591916, - "grad_norm": 1.6102935075759888, - "learning_rate": 8.49889447236181e-05, - "loss": 5.3093, - "step": 15440 - }, - { - "epoch": 8.052672750977836, - "grad_norm": 1.4769339561462402, - "learning_rate": 8.498793969849247e-05, - "loss": 5.2589, - "step": 15441 - }, - { - "epoch": 8.053194263363755, - "grad_norm": 1.4127150774002075, - "learning_rate": 8.498693467336684e-05, - "loss": 5.6652, - "step": 15442 - }, - { - "epoch": 8.053715775749675, - "grad_norm": 1.338387131690979, - "learning_rate": 8.498592964824121e-05, - "loss": 5.3556, - "step": 15443 - }, - { - "epoch": 8.054237288135592, - "grad_norm": 1.3104321956634521, - "learning_rate": 8.498492462311558e-05, - "loss": 5.7493, - "step": 15444 - }, - { - "epoch": 8.054758800521512, - "grad_norm": 1.9962481260299683, - "learning_rate": 8.498391959798996e-05, - "loss": 5.2251, - "step": 15445 - }, - { - "epoch": 8.055280312907431, - "grad_norm": 1.5775083303451538, - "learning_rate": 8.498291457286432e-05, - "loss": 5.1545, - "step": 15446 - }, - { - "epoch": 8.05580182529335, - "grad_norm": 1.5940611362457275, - "learning_rate": 8.49819095477387e-05, - "loss": 5.5182, - "step": 15447 - }, - { - "epoch": 8.05632333767927, - "grad_norm": 1.4766106605529785, - "learning_rate": 8.498090452261306e-05, - "loss": 5.6882, - "step": 15448 - }, - { - "epoch": 8.05684485006519, - "grad_norm": 1.7149903774261475, - "learning_rate": 8.497989949748744e-05, - "loss": 4.9646, - "step": 15449 - }, - { - "epoch": 8.057366362451107, - "grad_norm": 1.4124594926834106, - "learning_rate": 8.497889447236182e-05, - "loss": 5.1177, - "step": 15450 - }, - { - "epoch": 8.057887874837027, - "grad_norm": 1.469590663909912, - "learning_rate": 8.497788944723618e-05, - "loss": 4.7279, - "step": 15451 - }, - { - "epoch": 8.058409387222946, - "grad_norm": 1.5134634971618652, - "learning_rate": 8.497688442211056e-05, - "loss": 5.6592, - "step": 15452 - }, - { - "epoch": 8.058930899608866, - "grad_norm": 1.416188359260559, - "learning_rate": 8.497587939698492e-05, - "loss": 5.6155, - "step": 15453 - }, - { - "epoch": 8.059452411994785, - "grad_norm": 1.485105037689209, - "learning_rate": 8.49748743718593e-05, - "loss": 5.8069, - "step": 15454 - }, - { - "epoch": 8.059973924380705, - "grad_norm": 1.4645200967788696, - "learning_rate": 8.497386934673367e-05, - "loss": 5.0796, - "step": 15455 - }, - { - "epoch": 8.060495436766622, - "grad_norm": 1.4247392416000366, - "learning_rate": 8.497286432160804e-05, - "loss": 5.8166, - "step": 15456 - }, - { - "epoch": 8.061016949152542, - "grad_norm": 1.487082839012146, - "learning_rate": 8.497185929648241e-05, - "loss": 5.5928, - "step": 15457 - }, - { - "epoch": 8.061538461538461, - "grad_norm": 1.3990180492401123, - "learning_rate": 8.497085427135679e-05, - "loss": 5.6357, - "step": 15458 - }, - { - "epoch": 8.06205997392438, - "grad_norm": 1.3720003366470337, - "learning_rate": 8.496984924623116e-05, - "loss": 5.6588, - "step": 15459 - }, - { - "epoch": 8.0625814863103, - "grad_norm": 1.586195707321167, - "learning_rate": 8.496884422110554e-05, - "loss": 5.1869, - "step": 15460 - }, - { - "epoch": 8.06310299869622, - "grad_norm": 1.4899824857711792, - "learning_rate": 8.496783919597991e-05, - "loss": 5.6204, - "step": 15461 - }, - { - "epoch": 8.063624511082137, - "grad_norm": 1.422252893447876, - "learning_rate": 8.496683417085427e-05, - "loss": 5.5023, - "step": 15462 - }, - { - "epoch": 8.064146023468057, - "grad_norm": 1.4452879428863525, - "learning_rate": 8.496582914572865e-05, - "loss": 5.6389, - "step": 15463 - }, - { - "epoch": 8.064667535853976, - "grad_norm": 1.4618357419967651, - "learning_rate": 8.496482412060301e-05, - "loss": 5.5253, - "step": 15464 - }, - { - "epoch": 8.065189048239896, - "grad_norm": 1.4778043031692505, - "learning_rate": 8.496381909547739e-05, - "loss": 5.1973, - "step": 15465 - }, - { - "epoch": 8.065710560625815, - "grad_norm": 1.5097606182098389, - "learning_rate": 8.496281407035176e-05, - "loss": 5.6891, - "step": 15466 - }, - { - "epoch": 8.066232073011735, - "grad_norm": 1.5006811618804932, - "learning_rate": 8.496180904522613e-05, - "loss": 5.3355, - "step": 15467 - }, - { - "epoch": 8.066753585397652, - "grad_norm": 1.4825302362442017, - "learning_rate": 8.49608040201005e-05, - "loss": 5.3646, - "step": 15468 - }, - { - "epoch": 8.067275097783572, - "grad_norm": 1.3978360891342163, - "learning_rate": 8.495979899497488e-05, - "loss": 5.7727, - "step": 15469 - }, - { - "epoch": 8.067796610169491, - "grad_norm": 1.712721347808838, - "learning_rate": 8.495879396984925e-05, - "loss": 4.803, - "step": 15470 - }, - { - "epoch": 8.06831812255541, - "grad_norm": 1.4304463863372803, - "learning_rate": 8.495778894472363e-05, - "loss": 5.1077, - "step": 15471 - }, - { - "epoch": 8.06883963494133, - "grad_norm": 1.5150009393692017, - "learning_rate": 8.4956783919598e-05, - "loss": 5.1705, - "step": 15472 - }, - { - "epoch": 8.06936114732725, - "grad_norm": 1.4580217599868774, - "learning_rate": 8.495577889447237e-05, - "loss": 5.9172, - "step": 15473 - }, - { - "epoch": 8.069882659713167, - "grad_norm": 1.4772493839263916, - "learning_rate": 8.495477386934674e-05, - "loss": 5.6573, - "step": 15474 - }, - { - "epoch": 8.070404172099087, - "grad_norm": 1.4542133808135986, - "learning_rate": 8.49537688442211e-05, - "loss": 5.5825, - "step": 15475 - }, - { - "epoch": 8.070925684485006, - "grad_norm": 1.392167091369629, - "learning_rate": 8.495276381909548e-05, - "loss": 5.7194, - "step": 15476 - }, - { - "epoch": 8.071447196870926, - "grad_norm": 1.408933401107788, - "learning_rate": 8.495175879396984e-05, - "loss": 5.626, - "step": 15477 - }, - { - "epoch": 8.071968709256845, - "grad_norm": Infinity, - "learning_rate": 8.495175879396984e-05, - "loss": 5.1782, - "step": 15478 - }, - { - "epoch": 8.072490221642765, - "grad_norm": 1.4997482299804688, - "learning_rate": 8.495075376884422e-05, - "loss": 5.391, - "step": 15479 - }, - { - "epoch": 8.073011734028682, - "grad_norm": 1.5334839820861816, - "learning_rate": 8.49497487437186e-05, - "loss": 5.957, - "step": 15480 - }, - { - "epoch": 8.073533246414602, - "grad_norm": 1.5397247076034546, - "learning_rate": 8.494874371859298e-05, - "loss": 5.3534, - "step": 15481 - }, - { - "epoch": 8.074054758800521, - "grad_norm": 1.359954833984375, - "learning_rate": 8.494773869346734e-05, - "loss": 5.9303, - "step": 15482 - }, - { - "epoch": 8.07457627118644, - "grad_norm": 1.5373835563659668, - "learning_rate": 8.494673366834172e-05, - "loss": 5.5577, - "step": 15483 - }, - { - "epoch": 8.07509778357236, - "grad_norm": 1.4575697183609009, - "learning_rate": 8.494572864321608e-05, - "loss": 5.537, - "step": 15484 - }, - { - "epoch": 8.07561929595828, - "grad_norm": 1.4422193765640259, - "learning_rate": 8.494472361809046e-05, - "loss": 5.9706, - "step": 15485 - }, - { - "epoch": 8.076140808344197, - "grad_norm": 1.4497785568237305, - "learning_rate": 8.494371859296483e-05, - "loss": 5.5231, - "step": 15486 - }, - { - "epoch": 8.076662320730117, - "grad_norm": 1.369357705116272, - "learning_rate": 8.49427135678392e-05, - "loss": 5.6282, - "step": 15487 - }, - { - "epoch": 8.077183833116036, - "grad_norm": 1.682360053062439, - "learning_rate": 8.494170854271357e-05, - "loss": 5.0933, - "step": 15488 - }, - { - "epoch": 8.077705345501956, - "grad_norm": 1.4189612865447998, - "learning_rate": 8.494070351758793e-05, - "loss": 5.6542, - "step": 15489 - }, - { - "epoch": 8.078226857887875, - "grad_norm": 1.475553274154663, - "learning_rate": 8.493969849246231e-05, - "loss": 5.1978, - "step": 15490 - }, - { - "epoch": 8.078748370273795, - "grad_norm": 1.4298346042633057, - "learning_rate": 8.493869346733669e-05, - "loss": 5.5747, - "step": 15491 - }, - { - "epoch": 8.079269882659712, - "grad_norm": 1.511306881904602, - "learning_rate": 8.493768844221107e-05, - "loss": 5.3114, - "step": 15492 - }, - { - "epoch": 8.079791395045632, - "grad_norm": 1.3894777297973633, - "learning_rate": 8.493668341708543e-05, - "loss": 5.6377, - "step": 15493 - }, - { - "epoch": 8.080312907431551, - "grad_norm": 1.4410673379898071, - "learning_rate": 8.493567839195981e-05, - "loss": 5.3914, - "step": 15494 - }, - { - "epoch": 8.08083441981747, - "grad_norm": 1.3325062990188599, - "learning_rate": 8.493467336683417e-05, - "loss": 5.6608, - "step": 15495 - }, - { - "epoch": 8.08135593220339, - "grad_norm": 1.4935847520828247, - "learning_rate": 8.493366834170855e-05, - "loss": 5.7059, - "step": 15496 - }, - { - "epoch": 8.08187744458931, - "grad_norm": 1.532165765762329, - "learning_rate": 8.493266331658291e-05, - "loss": 5.569, - "step": 15497 - }, - { - "epoch": 8.082398956975227, - "grad_norm": 1.5525789260864258, - "learning_rate": 8.493165829145729e-05, - "loss": 4.8551, - "step": 15498 - }, - { - "epoch": 8.082920469361147, - "grad_norm": 1.4461520910263062, - "learning_rate": 8.493065326633166e-05, - "loss": 5.5708, - "step": 15499 - }, - { - "epoch": 8.083441981747066, - "grad_norm": 1.6441563367843628, - "learning_rate": 8.492964824120603e-05, - "loss": 5.2709, - "step": 15500 - }, - { - "epoch": 8.083963494132986, - "grad_norm": 1.3830727338790894, - "learning_rate": 8.492864321608041e-05, - "loss": 5.6674, - "step": 15501 - }, - { - "epoch": 8.084485006518905, - "grad_norm": 1.392408847808838, - "learning_rate": 8.492763819095478e-05, - "loss": 5.8682, - "step": 15502 - }, - { - "epoch": 8.085006518904825, - "grad_norm": 1.5038927793502808, - "learning_rate": 8.492663316582915e-05, - "loss": 5.5057, - "step": 15503 - }, - { - "epoch": 8.085528031290742, - "grad_norm": 1.3829306364059448, - "learning_rate": 8.492562814070352e-05, - "loss": 5.7114, - "step": 15504 - }, - { - "epoch": 8.086049543676662, - "grad_norm": 1.4830255508422852, - "learning_rate": 8.49246231155779e-05, - "loss": 5.0291, - "step": 15505 - }, - { - "epoch": 8.086571056062581, - "grad_norm": 1.4965134859085083, - "learning_rate": 8.492361809045226e-05, - "loss": 5.616, - "step": 15506 - }, - { - "epoch": 8.0870925684485, - "grad_norm": 1.433705449104309, - "learning_rate": 8.492261306532664e-05, - "loss": 5.5876, - "step": 15507 - }, - { - "epoch": 8.08761408083442, - "grad_norm": 1.637774109840393, - "learning_rate": 8.4921608040201e-05, - "loss": 5.3905, - "step": 15508 - }, - { - "epoch": 8.08813559322034, - "grad_norm": 1.4559588432312012, - "learning_rate": 8.492060301507538e-05, - "loss": 5.2745, - "step": 15509 - }, - { - "epoch": 8.088657105606258, - "grad_norm": 1.4403352737426758, - "learning_rate": 8.491959798994974e-05, - "loss": 5.7179, - "step": 15510 - }, - { - "epoch": 8.089178617992177, - "grad_norm": 1.5928643941879272, - "learning_rate": 8.491859296482412e-05, - "loss": 4.9602, - "step": 15511 - }, - { - "epoch": 8.089700130378096, - "grad_norm": 1.4875457286834717, - "learning_rate": 8.49175879396985e-05, - "loss": 5.7121, - "step": 15512 - }, - { - "epoch": 8.090221642764016, - "grad_norm": 1.3799540996551514, - "learning_rate": 8.491658291457288e-05, - "loss": 5.7536, - "step": 15513 - }, - { - "epoch": 8.090743155149935, - "grad_norm": 1.3551236391067505, - "learning_rate": 8.491557788944724e-05, - "loss": 5.799, - "step": 15514 - }, - { - "epoch": 8.091264667535855, - "grad_norm": 1.4892292022705078, - "learning_rate": 8.491457286432162e-05, - "loss": 5.7994, - "step": 15515 - }, - { - "epoch": 8.091786179921773, - "grad_norm": 1.520025372505188, - "learning_rate": 8.491356783919598e-05, - "loss": 5.7286, - "step": 15516 - }, - { - "epoch": 8.092307692307692, - "grad_norm": 1.4459669589996338, - "learning_rate": 8.491256281407035e-05, - "loss": 5.3076, - "step": 15517 - }, - { - "epoch": 8.092829204693611, - "grad_norm": 1.4724669456481934, - "learning_rate": 8.491155778894473e-05, - "loss": 5.4054, - "step": 15518 - }, - { - "epoch": 8.093350717079531, - "grad_norm": 1.446352481842041, - "learning_rate": 8.491055276381909e-05, - "loss": 5.3088, - "step": 15519 - }, - { - "epoch": 8.09387222946545, - "grad_norm": 1.6142348051071167, - "learning_rate": 8.490954773869347e-05, - "loss": 5.4955, - "step": 15520 - }, - { - "epoch": 8.09439374185137, - "grad_norm": 1.6913491487503052, - "learning_rate": 8.490854271356785e-05, - "loss": 5.611, - "step": 15521 - }, - { - "epoch": 8.094915254237288, - "grad_norm": 1.3881362676620483, - "learning_rate": 8.490753768844222e-05, - "loss": 5.3703, - "step": 15522 - }, - { - "epoch": 8.095436766623207, - "grad_norm": 1.3482580184936523, - "learning_rate": 8.490653266331659e-05, - "loss": 5.8333, - "step": 15523 - }, - { - "epoch": 8.095958279009126, - "grad_norm": 1.392115592956543, - "learning_rate": 8.490552763819097e-05, - "loss": 5.0755, - "step": 15524 - }, - { - "epoch": 8.096479791395046, - "grad_norm": 1.3963944911956787, - "learning_rate": 8.490452261306533e-05, - "loss": 5.6712, - "step": 15525 - }, - { - "epoch": 8.097001303780965, - "grad_norm": 1.5378104448318481, - "learning_rate": 8.490351758793971e-05, - "loss": 5.0091, - "step": 15526 - }, - { - "epoch": 8.097522816166883, - "grad_norm": 1.3617117404937744, - "learning_rate": 8.490251256281407e-05, - "loss": 5.5242, - "step": 15527 - }, - { - "epoch": 8.098044328552803, - "grad_norm": 1.394676685333252, - "learning_rate": 8.490150753768845e-05, - "loss": 5.5016, - "step": 15528 - }, - { - "epoch": 8.098565840938722, - "grad_norm": 1.4392294883728027, - "learning_rate": 8.490050251256281e-05, - "loss": 5.6538, - "step": 15529 - }, - { - "epoch": 8.099087353324641, - "grad_norm": 1.353211522102356, - "learning_rate": 8.489949748743718e-05, - "loss": 5.6043, - "step": 15530 - }, - { - "epoch": 8.099608865710561, - "grad_norm": 1.3959040641784668, - "learning_rate": 8.489849246231156e-05, - "loss": 5.8259, - "step": 15531 - }, - { - "epoch": 8.10013037809648, - "grad_norm": 1.4671725034713745, - "learning_rate": 8.489748743718593e-05, - "loss": 4.787, - "step": 15532 - }, - { - "epoch": 8.100651890482398, - "grad_norm": 1.4205200672149658, - "learning_rate": 8.489648241206031e-05, - "loss": 5.4802, - "step": 15533 - }, - { - "epoch": 8.101173402868318, - "grad_norm": 1.4414615631103516, - "learning_rate": 8.489547738693468e-05, - "loss": 5.4476, - "step": 15534 - }, - { - "epoch": 8.101694915254237, - "grad_norm": 1.4431724548339844, - "learning_rate": 8.489447236180905e-05, - "loss": 5.6447, - "step": 15535 - }, - { - "epoch": 8.102216427640156, - "grad_norm": 1.4930686950683594, - "learning_rate": 8.489346733668342e-05, - "loss": 5.6196, - "step": 15536 - }, - { - "epoch": 8.102737940026076, - "grad_norm": 1.3843939304351807, - "learning_rate": 8.48924623115578e-05, - "loss": 5.3746, - "step": 15537 - }, - { - "epoch": 8.103259452411995, - "grad_norm": 1.3679341077804565, - "learning_rate": 8.489145728643216e-05, - "loss": 5.6248, - "step": 15538 - }, - { - "epoch": 8.103780964797913, - "grad_norm": 1.3605953454971313, - "learning_rate": 8.489045226130654e-05, - "loss": 5.6407, - "step": 15539 - }, - { - "epoch": 8.104302477183833, - "grad_norm": 1.3607312440872192, - "learning_rate": 8.48894472361809e-05, - "loss": 6.0562, - "step": 15540 - }, - { - "epoch": 8.104823989569752, - "grad_norm": 1.4508575201034546, - "learning_rate": 8.488844221105528e-05, - "loss": 5.4414, - "step": 15541 - }, - { - "epoch": 8.105345501955671, - "grad_norm": 1.4736027717590332, - "learning_rate": 8.488743718592966e-05, - "loss": 5.7324, - "step": 15542 - }, - { - "epoch": 8.105867014341591, - "grad_norm": 1.7025319337844849, - "learning_rate": 8.488643216080402e-05, - "loss": 5.1381, - "step": 15543 - }, - { - "epoch": 8.10638852672751, - "grad_norm": 1.571522831916809, - "learning_rate": 8.48854271356784e-05, - "loss": 4.866, - "step": 15544 - }, - { - "epoch": 8.106910039113428, - "grad_norm": 1.3378335237503052, - "learning_rate": 8.488442211055277e-05, - "loss": 5.4035, - "step": 15545 - }, - { - "epoch": 8.107431551499348, - "grad_norm": 1.3754727840423584, - "learning_rate": 8.488341708542714e-05, - "loss": 5.224, - "step": 15546 - }, - { - "epoch": 8.107953063885267, - "grad_norm": 1.5679142475128174, - "learning_rate": 8.488241206030151e-05, - "loss": 4.6494, - "step": 15547 - }, - { - "epoch": 8.108474576271187, - "grad_norm": 1.496989130973816, - "learning_rate": 8.488140703517589e-05, - "loss": 5.7979, - "step": 15548 - }, - { - "epoch": 8.108996088657106, - "grad_norm": 1.4647834300994873, - "learning_rate": 8.488040201005025e-05, - "loss": 5.2785, - "step": 15549 - }, - { - "epoch": 8.109517601043025, - "grad_norm": 1.3811612129211426, - "learning_rate": 8.487939698492463e-05, - "loss": 5.7119, - "step": 15550 - }, - { - "epoch": 8.110039113428943, - "grad_norm": 1.5355618000030518, - "learning_rate": 8.487839195979899e-05, - "loss": 5.2248, - "step": 15551 - }, - { - "epoch": 8.110560625814863, - "grad_norm": 1.4873042106628418, - "learning_rate": 8.487738693467337e-05, - "loss": 5.4367, - "step": 15552 - }, - { - "epoch": 8.111082138200782, - "grad_norm": 1.3422654867172241, - "learning_rate": 8.487638190954775e-05, - "loss": 5.9475, - "step": 15553 - }, - { - "epoch": 8.111603650586702, - "grad_norm": 1.499496340751648, - "learning_rate": 8.487537688442212e-05, - "loss": 5.5472, - "step": 15554 - }, - { - "epoch": 8.112125162972621, - "grad_norm": 1.4672340154647827, - "learning_rate": 8.487437185929649e-05, - "loss": 5.5653, - "step": 15555 - }, - { - "epoch": 8.11264667535854, - "grad_norm": 1.346651315689087, - "learning_rate": 8.487336683417085e-05, - "loss": 5.1771, - "step": 15556 - }, - { - "epoch": 8.113168187744458, - "grad_norm": 1.8967205286026, - "learning_rate": 8.487236180904523e-05, - "loss": 4.9222, - "step": 15557 - }, - { - "epoch": 8.113689700130378, - "grad_norm": 1.4375241994857788, - "learning_rate": 8.48713567839196e-05, - "loss": 5.379, - "step": 15558 - }, - { - "epoch": 8.114211212516297, - "grad_norm": 1.6120744943618774, - "learning_rate": 8.487035175879397e-05, - "loss": 5.1128, - "step": 15559 - }, - { - "epoch": 8.114732724902217, - "grad_norm": 1.300262451171875, - "learning_rate": 8.486934673366834e-05, - "loss": 6.1117, - "step": 15560 - }, - { - "epoch": 8.115254237288136, - "grad_norm": 1.4084118604660034, - "learning_rate": 8.486834170854272e-05, - "loss": 5.8308, - "step": 15561 - }, - { - "epoch": 8.115775749674055, - "grad_norm": 1.3975722789764404, - "learning_rate": 8.48673366834171e-05, - "loss": 5.889, - "step": 15562 - }, - { - "epoch": 8.116297262059973, - "grad_norm": 1.4709371328353882, - "learning_rate": 8.486633165829147e-05, - "loss": 5.5223, - "step": 15563 - }, - { - "epoch": 8.116818774445893, - "grad_norm": 1.381808876991272, - "learning_rate": 8.486532663316584e-05, - "loss": 5.9209, - "step": 15564 - }, - { - "epoch": 8.117340286831812, - "grad_norm": 1.5552624464035034, - "learning_rate": 8.486432160804021e-05, - "loss": 5.3573, - "step": 15565 - }, - { - "epoch": 8.117861799217732, - "grad_norm": 1.4626195430755615, - "learning_rate": 8.486331658291458e-05, - "loss": 4.7643, - "step": 15566 - }, - { - "epoch": 8.118383311603651, - "grad_norm": 1.4831053018569946, - "learning_rate": 8.486231155778896e-05, - "loss": 5.2142, - "step": 15567 - }, - { - "epoch": 8.11890482398957, - "grad_norm": 1.4958866834640503, - "learning_rate": 8.486130653266332e-05, - "loss": 5.1064, - "step": 15568 - }, - { - "epoch": 8.119426336375488, - "grad_norm": 1.3202338218688965, - "learning_rate": 8.486030150753768e-05, - "loss": 5.9428, - "step": 15569 - }, - { - "epoch": 8.119947848761408, - "grad_norm": 1.3805500268936157, - "learning_rate": 8.485929648241206e-05, - "loss": 5.6702, - "step": 15570 - }, - { - "epoch": 8.120469361147327, - "grad_norm": 1.4750313758850098, - "learning_rate": 8.485829145728643e-05, - "loss": 5.3515, - "step": 15571 - }, - { - "epoch": 8.120990873533247, - "grad_norm": 1.4101132154464722, - "learning_rate": 8.48572864321608e-05, - "loss": 5.0853, - "step": 15572 - }, - { - "epoch": 8.121512385919166, - "grad_norm": 1.5301276445388794, - "learning_rate": 8.485628140703518e-05, - "loss": 5.3527, - "step": 15573 - }, - { - "epoch": 8.122033898305085, - "grad_norm": 1.4019984006881714, - "learning_rate": 8.485527638190956e-05, - "loss": 5.2153, - "step": 15574 - }, - { - "epoch": 8.122555410691003, - "grad_norm": 1.3314241170883179, - "learning_rate": 8.485427135678392e-05, - "loss": 5.6257, - "step": 15575 - }, - { - "epoch": 8.123076923076923, - "grad_norm": 1.4106816053390503, - "learning_rate": 8.48532663316583e-05, - "loss": 5.5899, - "step": 15576 - }, - { - "epoch": 8.123598435462842, - "grad_norm": 1.7705309391021729, - "learning_rate": 8.485226130653267e-05, - "loss": 5.594, - "step": 15577 - }, - { - "epoch": 8.124119947848762, - "grad_norm": 1.337105631828308, - "learning_rate": 8.485125628140704e-05, - "loss": 5.5343, - "step": 15578 - }, - { - "epoch": 8.124641460234681, - "grad_norm": 1.3498470783233643, - "learning_rate": 8.485025125628141e-05, - "loss": 5.9471, - "step": 15579 - }, - { - "epoch": 8.1251629726206, - "grad_norm": 1.4374278783798218, - "learning_rate": 8.484924623115579e-05, - "loss": 5.5415, - "step": 15580 - }, - { - "epoch": 8.125684485006518, - "grad_norm": 1.3575382232666016, - "learning_rate": 8.484824120603015e-05, - "loss": 5.5804, - "step": 15581 - }, - { - "epoch": 8.126205997392438, - "grad_norm": 1.4507306814193726, - "learning_rate": 8.484723618090453e-05, - "loss": 5.4324, - "step": 15582 - }, - { - "epoch": 8.126727509778357, - "grad_norm": 1.3927377462387085, - "learning_rate": 8.48462311557789e-05, - "loss": 5.7295, - "step": 15583 - }, - { - "epoch": 8.127249022164277, - "grad_norm": 1.5501129627227783, - "learning_rate": 8.484522613065327e-05, - "loss": 5.1406, - "step": 15584 - }, - { - "epoch": 8.127770534550196, - "grad_norm": 1.673203468322754, - "learning_rate": 8.484422110552765e-05, - "loss": 5.1729, - "step": 15585 - }, - { - "epoch": 8.128292046936116, - "grad_norm": 1.396837592124939, - "learning_rate": 8.484321608040201e-05, - "loss": 5.6673, - "step": 15586 - }, - { - "epoch": 8.128813559322033, - "grad_norm": 1.453805685043335, - "learning_rate": 8.484221105527639e-05, - "loss": 5.8017, - "step": 15587 - }, - { - "epoch": 8.129335071707953, - "grad_norm": 1.4538160562515259, - "learning_rate": 8.484120603015075e-05, - "loss": 5.2467, - "step": 15588 - }, - { - "epoch": 8.129856584093872, - "grad_norm": 1.3509242534637451, - "learning_rate": 8.484020100502513e-05, - "loss": 5.9653, - "step": 15589 - }, - { - "epoch": 8.130378096479792, - "grad_norm": 1.3285996913909912, - "learning_rate": 8.48391959798995e-05, - "loss": 5.6723, - "step": 15590 - }, - { - "epoch": 8.130899608865711, - "grad_norm": 1.4589732885360718, - "learning_rate": 8.483819095477387e-05, - "loss": 5.2919, - "step": 15591 - }, - { - "epoch": 8.13142112125163, - "grad_norm": 1.4421581029891968, - "learning_rate": 8.483718592964824e-05, - "loss": 5.4921, - "step": 15592 - }, - { - "epoch": 8.131942633637548, - "grad_norm": 1.5017290115356445, - "learning_rate": 8.483618090452262e-05, - "loss": 5.576, - "step": 15593 - }, - { - "epoch": 8.132464146023468, - "grad_norm": 1.409816861152649, - "learning_rate": 8.4835175879397e-05, - "loss": 5.3614, - "step": 15594 - }, - { - "epoch": 8.132985658409387, - "grad_norm": 1.415540099143982, - "learning_rate": 8.483417085427136e-05, - "loss": 5.4161, - "step": 15595 - }, - { - "epoch": 8.133507170795307, - "grad_norm": 1.5131416320800781, - "learning_rate": 8.483316582914574e-05, - "loss": 5.5096, - "step": 15596 - }, - { - "epoch": 8.134028683181226, - "grad_norm": 1.2805544137954712, - "learning_rate": 8.48321608040201e-05, - "loss": 5.7007, - "step": 15597 - }, - { - "epoch": 8.134550195567146, - "grad_norm": 1.4210224151611328, - "learning_rate": 8.483115577889448e-05, - "loss": 5.5568, - "step": 15598 - }, - { - "epoch": 8.135071707953063, - "grad_norm": 1.4034459590911865, - "learning_rate": 8.483015075376884e-05, - "loss": 5.6493, - "step": 15599 - }, - { - "epoch": 8.135593220338983, - "grad_norm": 1.5517778396606445, - "learning_rate": 8.482914572864322e-05, - "loss": 5.6293, - "step": 15600 - }, - { - "epoch": 8.136114732724902, - "grad_norm": 1.581545352935791, - "learning_rate": 8.482814070351758e-05, - "loss": 5.3953, - "step": 15601 - }, - { - "epoch": 8.136636245110822, - "grad_norm": 1.5390088558197021, - "learning_rate": 8.482713567839196e-05, - "loss": 5.1723, - "step": 15602 - }, - { - "epoch": 8.137157757496741, - "grad_norm": 1.5111488103866577, - "learning_rate": 8.482613065326633e-05, - "loss": 5.2074, - "step": 15603 - }, - { - "epoch": 8.13767926988266, - "grad_norm": 1.5426474809646606, - "learning_rate": 8.48251256281407e-05, - "loss": 5.6368, - "step": 15604 - }, - { - "epoch": 8.138200782268578, - "grad_norm": 1.4915671348571777, - "learning_rate": 8.482412060301508e-05, - "loss": 5.311, - "step": 15605 - }, - { - "epoch": 8.138722294654498, - "grad_norm": 1.4542789459228516, - "learning_rate": 8.482311557788946e-05, - "loss": 5.599, - "step": 15606 - }, - { - "epoch": 8.139243807040417, - "grad_norm": 1.4096118211746216, - "learning_rate": 8.482211055276382e-05, - "loss": 5.9645, - "step": 15607 - }, - { - "epoch": 8.139765319426337, - "grad_norm": 1.5096935033798218, - "learning_rate": 8.48211055276382e-05, - "loss": 5.6701, - "step": 15608 - }, - { - "epoch": 8.140286831812256, - "grad_norm": 1.592988133430481, - "learning_rate": 8.482010050251257e-05, - "loss": 5.3796, - "step": 15609 - }, - { - "epoch": 8.140808344198176, - "grad_norm": 1.453254222869873, - "learning_rate": 8.481909547738693e-05, - "loss": 5.5625, - "step": 15610 - }, - { - "epoch": 8.141329856584093, - "grad_norm": 1.572135090827942, - "learning_rate": 8.481809045226131e-05, - "loss": 5.5304, - "step": 15611 - }, - { - "epoch": 8.141851368970013, - "grad_norm": 1.3350560665130615, - "learning_rate": 8.481708542713567e-05, - "loss": 5.5486, - "step": 15612 - }, - { - "epoch": 8.142372881355932, - "grad_norm": 1.4541770219802856, - "learning_rate": 8.481608040201005e-05, - "loss": 5.7234, - "step": 15613 - }, - { - "epoch": 8.142894393741852, - "grad_norm": 1.4264473915100098, - "learning_rate": 8.481507537688443e-05, - "loss": 5.6934, - "step": 15614 - }, - { - "epoch": 8.143415906127771, - "grad_norm": 1.3341819047927856, - "learning_rate": 8.48140703517588e-05, - "loss": 5.4834, - "step": 15615 - }, - { - "epoch": 8.14393741851369, - "grad_norm": 1.3868813514709473, - "learning_rate": 8.481306532663317e-05, - "loss": 5.2922, - "step": 15616 - }, - { - "epoch": 8.144458930899608, - "grad_norm": 1.4651952981948853, - "learning_rate": 8.481206030150755e-05, - "loss": 5.6416, - "step": 15617 - }, - { - "epoch": 8.144980443285528, - "grad_norm": 1.4945381879806519, - "learning_rate": 8.481105527638191e-05, - "loss": 5.453, - "step": 15618 - }, - { - "epoch": 8.145501955671447, - "grad_norm": 1.509943962097168, - "learning_rate": 8.481005025125629e-05, - "loss": 5.3416, - "step": 15619 - }, - { - "epoch": 8.146023468057367, - "grad_norm": 1.369494915008545, - "learning_rate": 8.480904522613066e-05, - "loss": 5.5599, - "step": 15620 - }, - { - "epoch": 8.146544980443286, - "grad_norm": 1.5115758180618286, - "learning_rate": 8.480804020100503e-05, - "loss": 5.541, - "step": 15621 - }, - { - "epoch": 8.147066492829204, - "grad_norm": 1.4213604927062988, - "learning_rate": 8.48070351758794e-05, - "loss": 5.4217, - "step": 15622 - }, - { - "epoch": 8.147588005215123, - "grad_norm": 1.5920116901397705, - "learning_rate": 8.480603015075376e-05, - "loss": 5.4849, - "step": 15623 - }, - { - "epoch": 8.148109517601043, - "grad_norm": 1.3764736652374268, - "learning_rate": 8.480502512562814e-05, - "loss": 5.7304, - "step": 15624 - }, - { - "epoch": 8.148631029986962, - "grad_norm": 1.476110816001892, - "learning_rate": 8.480402010050252e-05, - "loss": 5.7648, - "step": 15625 - }, - { - "epoch": 8.149152542372882, - "grad_norm": 1.3749607801437378, - "learning_rate": 8.48030150753769e-05, - "loss": 5.7916, - "step": 15626 - }, - { - "epoch": 8.149674054758801, - "grad_norm": 1.4094314575195312, - "learning_rate": 8.480201005025126e-05, - "loss": 5.5941, - "step": 15627 - }, - { - "epoch": 8.150195567144719, - "grad_norm": 1.385056495666504, - "learning_rate": 8.480100502512564e-05, - "loss": 5.2933, - "step": 15628 - }, - { - "epoch": 8.150717079530638, - "grad_norm": 1.6045433282852173, - "learning_rate": 8.48e-05, - "loss": 5.4459, - "step": 15629 - }, - { - "epoch": 8.151238591916558, - "grad_norm": 1.5553525686264038, - "learning_rate": 8.479899497487438e-05, - "loss": 5.4154, - "step": 15630 - }, - { - "epoch": 8.151760104302477, - "grad_norm": 1.4157049655914307, - "learning_rate": 8.479798994974874e-05, - "loss": 5.9612, - "step": 15631 - }, - { - "epoch": 8.152281616688397, - "grad_norm": 1.3862642049789429, - "learning_rate": 8.479698492462312e-05, - "loss": 5.811, - "step": 15632 - }, - { - "epoch": 8.152803129074316, - "grad_norm": 1.3505843877792358, - "learning_rate": 8.479597989949749e-05, - "loss": 6.0447, - "step": 15633 - }, - { - "epoch": 8.153324641460234, - "grad_norm": 1.4560343027114868, - "learning_rate": 8.479497487437186e-05, - "loss": 5.2017, - "step": 15634 - }, - { - "epoch": 8.153846153846153, - "grad_norm": 1.427320122718811, - "learning_rate": 8.479396984924624e-05, - "loss": 5.519, - "step": 15635 - }, - { - "epoch": 8.154367666232073, - "grad_norm": 1.3723108768463135, - "learning_rate": 8.47929648241206e-05, - "loss": 5.3713, - "step": 15636 - }, - { - "epoch": 8.154889178617992, - "grad_norm": 1.4132267236709595, - "learning_rate": 8.479195979899498e-05, - "loss": 5.1862, - "step": 15637 - }, - { - "epoch": 8.155410691003912, - "grad_norm": 1.5348517894744873, - "learning_rate": 8.479095477386935e-05, - "loss": 5.3815, - "step": 15638 - }, - { - "epoch": 8.155932203389831, - "grad_norm": 1.4944921731948853, - "learning_rate": 8.478994974874373e-05, - "loss": 5.2171, - "step": 15639 - }, - { - "epoch": 8.156453715775749, - "grad_norm": 1.5292121171951294, - "learning_rate": 8.478894472361809e-05, - "loss": 5.6398, - "step": 15640 - }, - { - "epoch": 8.156975228161668, - "grad_norm": 1.3823909759521484, - "learning_rate": 8.478793969849247e-05, - "loss": 5.3769, - "step": 15641 - }, - { - "epoch": 8.157496740547588, - "grad_norm": 1.4275274276733398, - "learning_rate": 8.478693467336683e-05, - "loss": 5.5798, - "step": 15642 - }, - { - "epoch": 8.158018252933507, - "grad_norm": 1.7292616367340088, - "learning_rate": 8.478592964824121e-05, - "loss": 5.3565, - "step": 15643 - }, - { - "epoch": 8.158539765319427, - "grad_norm": 1.463364601135254, - "learning_rate": 8.478492462311557e-05, - "loss": 5.7937, - "step": 15644 - }, - { - "epoch": 8.159061277705346, - "grad_norm": 1.5435712337493896, - "learning_rate": 8.478391959798995e-05, - "loss": 5.3912, - "step": 15645 - }, - { - "epoch": 8.159582790091264, - "grad_norm": 1.4186781644821167, - "learning_rate": 8.478291457286433e-05, - "loss": 5.1981, - "step": 15646 - }, - { - "epoch": 8.160104302477183, - "grad_norm": 1.4178798198699951, - "learning_rate": 8.478190954773871e-05, - "loss": 5.4085, - "step": 15647 - }, - { - "epoch": 8.160625814863103, - "grad_norm": 1.4930795431137085, - "learning_rate": 8.478090452261307e-05, - "loss": 5.1776, - "step": 15648 - }, - { - "epoch": 8.161147327249022, - "grad_norm": 1.518615484237671, - "learning_rate": 8.477989949748744e-05, - "loss": 5.5114, - "step": 15649 - }, - { - "epoch": 8.161668839634942, - "grad_norm": 1.4567126035690308, - "learning_rate": 8.477889447236181e-05, - "loss": 5.4336, - "step": 15650 - }, - { - "epoch": 8.162190352020861, - "grad_norm": 1.5706982612609863, - "learning_rate": 8.477788944723618e-05, - "loss": 5.475, - "step": 15651 - }, - { - "epoch": 8.162711864406779, - "grad_norm": 1.4382402896881104, - "learning_rate": 8.477688442211056e-05, - "loss": 5.3548, - "step": 15652 - }, - { - "epoch": 8.163233376792698, - "grad_norm": 1.4422733783721924, - "learning_rate": 8.477587939698492e-05, - "loss": 5.5328, - "step": 15653 - }, - { - "epoch": 8.163754889178618, - "grad_norm": 1.310920238494873, - "learning_rate": 8.47748743718593e-05, - "loss": 5.9082, - "step": 15654 - }, - { - "epoch": 8.164276401564537, - "grad_norm": 1.4188412427902222, - "learning_rate": 8.477386934673368e-05, - "loss": 5.6939, - "step": 15655 - }, - { - "epoch": 8.164797913950457, - "grad_norm": 1.429704189300537, - "learning_rate": 8.477286432160805e-05, - "loss": 5.8276, - "step": 15656 - }, - { - "epoch": 8.165319426336376, - "grad_norm": 1.486853837966919, - "learning_rate": 8.477185929648242e-05, - "loss": 5.1127, - "step": 15657 - }, - { - "epoch": 8.165840938722294, - "grad_norm": 1.6279070377349854, - "learning_rate": 8.47708542713568e-05, - "loss": 4.5894, - "step": 15658 - }, - { - "epoch": 8.166362451108213, - "grad_norm": 1.485946536064148, - "learning_rate": 8.476984924623116e-05, - "loss": 5.0548, - "step": 15659 - }, - { - "epoch": 8.166883963494133, - "grad_norm": 1.537684679031372, - "learning_rate": 8.476884422110554e-05, - "loss": 5.1216, - "step": 15660 - }, - { - "epoch": 8.167405475880052, - "grad_norm": 1.4714460372924805, - "learning_rate": 8.47678391959799e-05, - "loss": 5.6528, - "step": 15661 - }, - { - "epoch": 8.167926988265972, - "grad_norm": 1.4220597743988037, - "learning_rate": 8.476683417085427e-05, - "loss": 5.2816, - "step": 15662 - }, - { - "epoch": 8.168448500651891, - "grad_norm": 1.3877047300338745, - "learning_rate": 8.476582914572864e-05, - "loss": 5.6596, - "step": 15663 - }, - { - "epoch": 8.168970013037809, - "grad_norm": 1.4273946285247803, - "learning_rate": 8.476482412060301e-05, - "loss": 5.6472, - "step": 15664 - }, - { - "epoch": 8.169491525423728, - "grad_norm": 1.5067089796066284, - "learning_rate": 8.476381909547739e-05, - "loss": 5.1788, - "step": 15665 - }, - { - "epoch": 8.170013037809648, - "grad_norm": 1.725943684577942, - "learning_rate": 8.476281407035176e-05, - "loss": 4.6123, - "step": 15666 - }, - { - "epoch": 8.170534550195567, - "grad_norm": 1.4579700231552124, - "learning_rate": 8.476180904522614e-05, - "loss": 5.5621, - "step": 15667 - }, - { - "epoch": 8.171056062581487, - "grad_norm": 1.3820748329162598, - "learning_rate": 8.47608040201005e-05, - "loss": 5.663, - "step": 15668 - }, - { - "epoch": 8.171577574967406, - "grad_norm": 1.5525779724121094, - "learning_rate": 8.475979899497488e-05, - "loss": 5.1679, - "step": 15669 - }, - { - "epoch": 8.172099087353324, - "grad_norm": 1.4974778890609741, - "learning_rate": 8.475879396984925e-05, - "loss": 5.2157, - "step": 15670 - }, - { - "epoch": 8.172620599739243, - "grad_norm": 1.4455845355987549, - "learning_rate": 8.475778894472363e-05, - "loss": 5.5693, - "step": 15671 - }, - { - "epoch": 8.173142112125163, - "grad_norm": 1.3637913465499878, - "learning_rate": 8.475678391959799e-05, - "loss": 5.9652, - "step": 15672 - }, - { - "epoch": 8.173663624511082, - "grad_norm": 1.5420129299163818, - "learning_rate": 8.475577889447237e-05, - "loss": 5.4046, - "step": 15673 - }, - { - "epoch": 8.174185136897002, - "grad_norm": 1.6305127143859863, - "learning_rate": 8.475477386934673e-05, - "loss": 5.4738, - "step": 15674 - }, - { - "epoch": 8.174706649282921, - "grad_norm": 1.4297443628311157, - "learning_rate": 8.475376884422111e-05, - "loss": 4.7748, - "step": 15675 - }, - { - "epoch": 8.175228161668839, - "grad_norm": 1.500221610069275, - "learning_rate": 8.475276381909549e-05, - "loss": 4.8599, - "step": 15676 - }, - { - "epoch": 8.175749674054758, - "grad_norm": 1.3496217727661133, - "learning_rate": 8.475175879396985e-05, - "loss": 5.5598, - "step": 15677 - }, - { - "epoch": 8.176271186440678, - "grad_norm": 1.4575495719909668, - "learning_rate": 8.475075376884423e-05, - "loss": 5.3015, - "step": 15678 - }, - { - "epoch": 8.176792698826597, - "grad_norm": 1.43996262550354, - "learning_rate": 8.47497487437186e-05, - "loss": 5.7395, - "step": 15679 - }, - { - "epoch": 8.177314211212517, - "grad_norm": 1.385510802268982, - "learning_rate": 8.474874371859297e-05, - "loss": 5.6473, - "step": 15680 - }, - { - "epoch": 8.177835723598436, - "grad_norm": 1.2863478660583496, - "learning_rate": 8.474773869346734e-05, - "loss": 5.811, - "step": 15681 - }, - { - "epoch": 8.178357235984354, - "grad_norm": 1.4524357318878174, - "learning_rate": 8.474673366834171e-05, - "loss": 5.8097, - "step": 15682 - }, - { - "epoch": 8.178878748370273, - "grad_norm": 1.5861839056015015, - "learning_rate": 8.474572864321608e-05, - "loss": 5.3148, - "step": 15683 - }, - { - "epoch": 8.179400260756193, - "grad_norm": 1.3613831996917725, - "learning_rate": 8.474472361809046e-05, - "loss": 5.7718, - "step": 15684 - }, - { - "epoch": 8.179921773142112, - "grad_norm": 1.4768444299697876, - "learning_rate": 8.474371859296482e-05, - "loss": 5.4666, - "step": 15685 - }, - { - "epoch": 8.180443285528032, - "grad_norm": 1.5379939079284668, - "learning_rate": 8.47427135678392e-05, - "loss": 5.0985, - "step": 15686 - }, - { - "epoch": 8.180964797913951, - "grad_norm": 1.4930436611175537, - "learning_rate": 8.474170854271358e-05, - "loss": 5.9038, - "step": 15687 - }, - { - "epoch": 8.181486310299869, - "grad_norm": 1.481609582901001, - "learning_rate": 8.474070351758794e-05, - "loss": 5.2875, - "step": 15688 - }, - { - "epoch": 8.182007822685788, - "grad_norm": 1.5259296894073486, - "learning_rate": 8.473969849246232e-05, - "loss": 5.2092, - "step": 15689 - }, - { - "epoch": 8.182529335071708, - "grad_norm": 1.4880670309066772, - "learning_rate": 8.473869346733668e-05, - "loss": 5.6307, - "step": 15690 - }, - { - "epoch": 8.183050847457627, - "grad_norm": 1.515535593032837, - "learning_rate": 8.473768844221106e-05, - "loss": 4.9057, - "step": 15691 - }, - { - "epoch": 8.183572359843547, - "grad_norm": 1.462506890296936, - "learning_rate": 8.473668341708542e-05, - "loss": 5.9277, - "step": 15692 - }, - { - "epoch": 8.184093872229466, - "grad_norm": 1.5277743339538574, - "learning_rate": 8.47356783919598e-05, - "loss": 5.8953, - "step": 15693 - }, - { - "epoch": 8.184615384615384, - "grad_norm": 1.4780571460723877, - "learning_rate": 8.473467336683417e-05, - "loss": 5.6029, - "step": 15694 - }, - { - "epoch": 8.185136897001303, - "grad_norm": 1.3528010845184326, - "learning_rate": 8.473366834170854e-05, - "loss": 5.7132, - "step": 15695 - }, - { - "epoch": 8.185658409387223, - "grad_norm": 1.5806350708007812, - "learning_rate": 8.473266331658292e-05, - "loss": 5.4253, - "step": 15696 - }, - { - "epoch": 8.186179921773142, - "grad_norm": 1.4897887706756592, - "learning_rate": 8.47316582914573e-05, - "loss": 5.1961, - "step": 15697 - }, - { - "epoch": 8.186701434159062, - "grad_norm": 1.4663089513778687, - "learning_rate": 8.473065326633166e-05, - "loss": 5.5321, - "step": 15698 - }, - { - "epoch": 8.187222946544981, - "grad_norm": 1.5161285400390625, - "learning_rate": 8.472964824120604e-05, - "loss": 5.3861, - "step": 15699 - }, - { - "epoch": 8.187744458930899, - "grad_norm": 1.588722825050354, - "learning_rate": 8.472864321608041e-05, - "loss": 5.1899, - "step": 15700 - }, - { - "epoch": 8.188265971316818, - "grad_norm": 1.4283664226531982, - "learning_rate": 8.472763819095478e-05, - "loss": 5.7274, - "step": 15701 - }, - { - "epoch": 8.188787483702738, - "grad_norm": 1.3967450857162476, - "learning_rate": 8.472663316582915e-05, - "loss": 5.8239, - "step": 15702 - }, - { - "epoch": 8.189308996088657, - "grad_norm": 1.4912315607070923, - "learning_rate": 8.472562814070351e-05, - "loss": 5.3167, - "step": 15703 - }, - { - "epoch": 8.189830508474577, - "grad_norm": 1.4418059587478638, - "learning_rate": 8.472462311557789e-05, - "loss": 5.2679, - "step": 15704 - }, - { - "epoch": 8.190352020860496, - "grad_norm": 1.4699418544769287, - "learning_rate": 8.472361809045226e-05, - "loss": 5.6366, - "step": 15705 - }, - { - "epoch": 8.190873533246414, - "grad_norm": 1.4006768465042114, - "learning_rate": 8.472261306532663e-05, - "loss": 6.0613, - "step": 15706 - }, - { - "epoch": 8.191395045632333, - "grad_norm": 1.5386254787445068, - "learning_rate": 8.472160804020101e-05, - "loss": 5.5841, - "step": 15707 - }, - { - "epoch": 8.191916558018253, - "grad_norm": 1.5806063413619995, - "learning_rate": 8.472060301507539e-05, - "loss": 5.6865, - "step": 15708 - }, - { - "epoch": 8.192438070404172, - "grad_norm": 1.5957324504852295, - "learning_rate": 8.471959798994975e-05, - "loss": 5.4481, - "step": 15709 - }, - { - "epoch": 8.192959582790092, - "grad_norm": 1.435654878616333, - "learning_rate": 8.471859296482413e-05, - "loss": 5.8497, - "step": 15710 - }, - { - "epoch": 8.193481095176011, - "grad_norm": 1.4764268398284912, - "learning_rate": 8.47175879396985e-05, - "loss": 5.2377, - "step": 15711 - }, - { - "epoch": 8.194002607561929, - "grad_norm": 1.3605573177337646, - "learning_rate": 8.471658291457287e-05, - "loss": 5.8971, - "step": 15712 - }, - { - "epoch": 8.194524119947848, - "grad_norm": 1.3622684478759766, - "learning_rate": 8.471557788944724e-05, - "loss": 5.7264, - "step": 15713 - }, - { - "epoch": 8.195045632333768, - "grad_norm": 1.4610645771026611, - "learning_rate": 8.471457286432162e-05, - "loss": 5.8218, - "step": 15714 - }, - { - "epoch": 8.195567144719687, - "grad_norm": 1.4439469575881958, - "learning_rate": 8.471356783919598e-05, - "loss": 5.4333, - "step": 15715 - }, - { - "epoch": 8.196088657105607, - "grad_norm": 1.4013832807540894, - "learning_rate": 8.471256281407036e-05, - "loss": 5.3492, - "step": 15716 - }, - { - "epoch": 8.196610169491525, - "grad_norm": 1.4151861667633057, - "learning_rate": 8.471155778894474e-05, - "loss": 5.3737, - "step": 15717 - }, - { - "epoch": 8.197131681877444, - "grad_norm": 1.4271551370620728, - "learning_rate": 8.47105527638191e-05, - "loss": 5.9077, - "step": 15718 - }, - { - "epoch": 8.197653194263363, - "grad_norm": 1.4510663747787476, - "learning_rate": 8.470954773869348e-05, - "loss": 5.6372, - "step": 15719 - }, - { - "epoch": 8.198174706649283, - "grad_norm": 1.3848557472229004, - "learning_rate": 8.470854271356784e-05, - "loss": 5.5285, - "step": 15720 - }, - { - "epoch": 8.198696219035202, - "grad_norm": 1.4996144771575928, - "learning_rate": 8.470753768844222e-05, - "loss": 5.1585, - "step": 15721 - }, - { - "epoch": 8.199217731421122, - "grad_norm": 1.5980603694915771, - "learning_rate": 8.470653266331658e-05, - "loss": 5.3544, - "step": 15722 - }, - { - "epoch": 8.19973924380704, - "grad_norm": 1.5464061498641968, - "learning_rate": 8.470552763819096e-05, - "loss": 5.0331, - "step": 15723 - }, - { - "epoch": 8.200260756192959, - "grad_norm": 1.4435473680496216, - "learning_rate": 8.470452261306533e-05, - "loss": 5.0285, - "step": 15724 - }, - { - "epoch": 8.200782268578878, - "grad_norm": 1.419960856437683, - "learning_rate": 8.47035175879397e-05, - "loss": 5.7502, - "step": 15725 - }, - { - "epoch": 8.201303780964798, - "grad_norm": 1.5281155109405518, - "learning_rate": 8.470251256281407e-05, - "loss": 5.628, - "step": 15726 - }, - { - "epoch": 8.201825293350717, - "grad_norm": 1.7027063369750977, - "learning_rate": 8.470150753768845e-05, - "loss": 5.2764, - "step": 15727 - }, - { - "epoch": 8.202346805736637, - "grad_norm": 1.434435248374939, - "learning_rate": 8.470050251256282e-05, - "loss": 5.0294, - "step": 15728 - }, - { - "epoch": 8.202868318122555, - "grad_norm": 1.4521324634552002, - "learning_rate": 8.469949748743719e-05, - "loss": 5.4083, - "step": 15729 - }, - { - "epoch": 8.203389830508474, - "grad_norm": 1.4004123210906982, - "learning_rate": 8.469849246231157e-05, - "loss": 5.6841, - "step": 15730 - }, - { - "epoch": 8.203911342894393, - "grad_norm": 1.5111312866210938, - "learning_rate": 8.469748743718593e-05, - "loss": 5.6646, - "step": 15731 - }, - { - "epoch": 8.204432855280313, - "grad_norm": 1.3249565362930298, - "learning_rate": 8.469648241206031e-05, - "loss": 5.8177, - "step": 15732 - }, - { - "epoch": 8.204954367666232, - "grad_norm": 1.3625563383102417, - "learning_rate": 8.469547738693467e-05, - "loss": 5.6257, - "step": 15733 - }, - { - "epoch": 8.205475880052152, - "grad_norm": 1.4290738105773926, - "learning_rate": 8.469447236180905e-05, - "loss": 5.7234, - "step": 15734 - }, - { - "epoch": 8.20599739243807, - "grad_norm": 1.4249438047409058, - "learning_rate": 8.469346733668341e-05, - "loss": 5.6813, - "step": 15735 - }, - { - "epoch": 8.206518904823989, - "grad_norm": 1.4201111793518066, - "learning_rate": 8.469246231155779e-05, - "loss": 5.4823, - "step": 15736 - }, - { - "epoch": 8.207040417209909, - "grad_norm": 1.4363564252853394, - "learning_rate": 8.469145728643217e-05, - "loss": 5.5656, - "step": 15737 - }, - { - "epoch": 8.207561929595828, - "grad_norm": 1.4932661056518555, - "learning_rate": 8.469045226130655e-05, - "loss": 5.5949, - "step": 15738 - }, - { - "epoch": 8.208083441981747, - "grad_norm": 1.4237985610961914, - "learning_rate": 8.468944723618091e-05, - "loss": 5.6583, - "step": 15739 - }, - { - "epoch": 8.208604954367667, - "grad_norm": 1.387248158454895, - "learning_rate": 8.468844221105529e-05, - "loss": 5.6805, - "step": 15740 - }, - { - "epoch": 8.209126466753585, - "grad_norm": 1.3265591859817505, - "learning_rate": 8.468743718592965e-05, - "loss": 5.662, - "step": 15741 - }, - { - "epoch": 8.209647979139504, - "grad_norm": 1.3854267597198486, - "learning_rate": 8.468643216080402e-05, - "loss": 5.8296, - "step": 15742 - }, - { - "epoch": 8.210169491525424, - "grad_norm": 1.3817492723464966, - "learning_rate": 8.46854271356784e-05, - "loss": 5.4144, - "step": 15743 - }, - { - "epoch": 8.210691003911343, - "grad_norm": 1.4484914541244507, - "learning_rate": 8.468442211055276e-05, - "loss": 5.2767, - "step": 15744 - }, - { - "epoch": 8.211212516297262, - "grad_norm": 1.366478443145752, - "learning_rate": 8.468341708542714e-05, - "loss": 5.7542, - "step": 15745 - }, - { - "epoch": 8.211734028683182, - "grad_norm": 1.3785945177078247, - "learning_rate": 8.46824120603015e-05, - "loss": 5.7536, - "step": 15746 - }, - { - "epoch": 8.2122555410691, - "grad_norm": 1.382473349571228, - "learning_rate": 8.468140703517588e-05, - "loss": 5.6491, - "step": 15747 - }, - { - "epoch": 8.212777053455019, - "grad_norm": 1.3798565864562988, - "learning_rate": 8.468040201005026e-05, - "loss": 5.6605, - "step": 15748 - }, - { - "epoch": 8.213298565840939, - "grad_norm": 1.4109631776809692, - "learning_rate": 8.467939698492464e-05, - "loss": 5.428, - "step": 15749 - }, - { - "epoch": 8.213820078226858, - "grad_norm": 1.3077722787857056, - "learning_rate": 8.4678391959799e-05, - "loss": 5.6788, - "step": 15750 - }, - { - "epoch": 8.214341590612777, - "grad_norm": 1.3707021474838257, - "learning_rate": 8.467738693467338e-05, - "loss": 5.7515, - "step": 15751 - }, - { - "epoch": 8.214863102998697, - "grad_norm": 1.4192339181900024, - "learning_rate": 8.467638190954774e-05, - "loss": 5.3697, - "step": 15752 - }, - { - "epoch": 8.215384615384615, - "grad_norm": 1.508329153060913, - "learning_rate": 8.467537688442212e-05, - "loss": 5.3475, - "step": 15753 - }, - { - "epoch": 8.215906127770534, - "grad_norm": 1.535009503364563, - "learning_rate": 8.467437185929648e-05, - "loss": 5.8883, - "step": 15754 - }, - { - "epoch": 8.216427640156454, - "grad_norm": 1.476557731628418, - "learning_rate": 8.467336683417085e-05, - "loss": 5.2315, - "step": 15755 - }, - { - "epoch": 8.216949152542373, - "grad_norm": 1.41514253616333, - "learning_rate": 8.467236180904523e-05, - "loss": 5.3246, - "step": 15756 - }, - { - "epoch": 8.217470664928292, - "grad_norm": 1.5276771783828735, - "learning_rate": 8.46713567839196e-05, - "loss": 5.1752, - "step": 15757 - }, - { - "epoch": 8.217992177314212, - "grad_norm": 1.4018306732177734, - "learning_rate": 8.467035175879398e-05, - "loss": 5.8253, - "step": 15758 - }, - { - "epoch": 8.21851368970013, - "grad_norm": 1.5412282943725586, - "learning_rate": 8.466934673366835e-05, - "loss": 5.1982, - "step": 15759 - }, - { - "epoch": 8.219035202086049, - "grad_norm": 1.49917471408844, - "learning_rate": 8.466834170854272e-05, - "loss": 5.4657, - "step": 15760 - }, - { - "epoch": 8.219556714471969, - "grad_norm": 1.4708592891693115, - "learning_rate": 8.466733668341709e-05, - "loss": 4.9168, - "step": 15761 - }, - { - "epoch": 8.220078226857888, - "grad_norm": 1.4547802209854126, - "learning_rate": 8.466633165829147e-05, - "loss": 5.5119, - "step": 15762 - }, - { - "epoch": 8.220599739243807, - "grad_norm": 1.4105051755905151, - "learning_rate": 8.466532663316583e-05, - "loss": 5.4833, - "step": 15763 - }, - { - "epoch": 8.221121251629727, - "grad_norm": 1.4455944299697876, - "learning_rate": 8.466432160804021e-05, - "loss": 5.6255, - "step": 15764 - }, - { - "epoch": 8.221642764015645, - "grad_norm": 1.3468468189239502, - "learning_rate": 8.466331658291457e-05, - "loss": 5.506, - "step": 15765 - }, - { - "epoch": 8.222164276401564, - "grad_norm": 1.447231650352478, - "learning_rate": 8.466231155778895e-05, - "loss": 5.7529, - "step": 15766 - }, - { - "epoch": 8.222685788787484, - "grad_norm": 1.423258662223816, - "learning_rate": 8.466130653266331e-05, - "loss": 5.5575, - "step": 15767 - }, - { - "epoch": 8.223207301173403, - "grad_norm": 1.4167636632919312, - "learning_rate": 8.466030150753769e-05, - "loss": 5.7239, - "step": 15768 - }, - { - "epoch": 8.223728813559323, - "grad_norm": 1.4478312730789185, - "learning_rate": 8.465929648241207e-05, - "loss": 5.295, - "step": 15769 - }, - { - "epoch": 8.224250325945242, - "grad_norm": 1.256091833114624, - "learning_rate": 8.465829145728643e-05, - "loss": 5.8064, - "step": 15770 - }, - { - "epoch": 8.22477183833116, - "grad_norm": 1.4325037002563477, - "learning_rate": 8.465728643216081e-05, - "loss": 5.8573, - "step": 15771 - }, - { - "epoch": 8.22529335071708, - "grad_norm": 1.4392352104187012, - "learning_rate": 8.465628140703518e-05, - "loss": 5.7699, - "step": 15772 - }, - { - "epoch": 8.225814863102999, - "grad_norm": 1.2974876165390015, - "learning_rate": 8.465527638190955e-05, - "loss": 5.901, - "step": 15773 - }, - { - "epoch": 8.226336375488918, - "grad_norm": 1.5390645265579224, - "learning_rate": 8.465427135678392e-05, - "loss": 5.0797, - "step": 15774 - }, - { - "epoch": 8.226857887874838, - "grad_norm": 1.4045251607894897, - "learning_rate": 8.46532663316583e-05, - "loss": 5.3441, - "step": 15775 - }, - { - "epoch": 8.227379400260757, - "grad_norm": 1.418887734413147, - "learning_rate": 8.465226130653266e-05, - "loss": 5.4019, - "step": 15776 - }, - { - "epoch": 8.227900912646675, - "grad_norm": 1.4092835187911987, - "learning_rate": 8.465125628140704e-05, - "loss": 5.5853, - "step": 15777 - }, - { - "epoch": 8.228422425032594, - "grad_norm": 1.4801493883132935, - "learning_rate": 8.46502512562814e-05, - "loss": 5.7136, - "step": 15778 - }, - { - "epoch": 8.228943937418514, - "grad_norm": 1.5004867315292358, - "learning_rate": 8.464924623115578e-05, - "loss": 5.0606, - "step": 15779 - }, - { - "epoch": 8.229465449804433, - "grad_norm": 1.4344185590744019, - "learning_rate": 8.464824120603016e-05, - "loss": 5.6834, - "step": 15780 - }, - { - "epoch": 8.229986962190353, - "grad_norm": 1.3830920457839966, - "learning_rate": 8.464723618090452e-05, - "loss": 5.6339, - "step": 15781 - }, - { - "epoch": 8.230508474576272, - "grad_norm": 1.3700313568115234, - "learning_rate": 8.46462311557789e-05, - "loss": 5.7486, - "step": 15782 - }, - { - "epoch": 8.23102998696219, - "grad_norm": 1.3572949171066284, - "learning_rate": 8.464522613065327e-05, - "loss": 5.2489, - "step": 15783 - }, - { - "epoch": 8.23155149934811, - "grad_norm": 1.505470633506775, - "learning_rate": 8.464422110552764e-05, - "loss": 5.3648, - "step": 15784 - }, - { - "epoch": 8.232073011734029, - "grad_norm": 1.5689599514007568, - "learning_rate": 8.464321608040201e-05, - "loss": 5.3144, - "step": 15785 - }, - { - "epoch": 8.232594524119948, - "grad_norm": 1.3889964818954468, - "learning_rate": 8.464221105527639e-05, - "loss": 5.7106, - "step": 15786 - }, - { - "epoch": 8.233116036505868, - "grad_norm": 1.4004110097885132, - "learning_rate": 8.464120603015075e-05, - "loss": 5.8092, - "step": 15787 - }, - { - "epoch": 8.233637548891787, - "grad_norm": 1.3814398050308228, - "learning_rate": 8.464020100502513e-05, - "loss": 5.2431, - "step": 15788 - }, - { - "epoch": 8.234159061277705, - "grad_norm": 1.454531192779541, - "learning_rate": 8.46391959798995e-05, - "loss": 5.4928, - "step": 15789 - }, - { - "epoch": 8.234680573663624, - "grad_norm": 1.3786059617996216, - "learning_rate": 8.463819095477388e-05, - "loss": 5.8994, - "step": 15790 - }, - { - "epoch": 8.235202086049544, - "grad_norm": 1.338403344154358, - "learning_rate": 8.463718592964825e-05, - "loss": 5.917, - "step": 15791 - }, - { - "epoch": 8.235723598435463, - "grad_norm": 1.7408145666122437, - "learning_rate": 8.463618090452262e-05, - "loss": 5.3605, - "step": 15792 - }, - { - "epoch": 8.236245110821383, - "grad_norm": 1.424072027206421, - "learning_rate": 8.463517587939699e-05, - "loss": 5.9482, - "step": 15793 - }, - { - "epoch": 8.236766623207302, - "grad_norm": 1.5715824365615845, - "learning_rate": 8.463417085427137e-05, - "loss": 4.7835, - "step": 15794 - }, - { - "epoch": 8.23728813559322, - "grad_norm": 1.445021152496338, - "learning_rate": 8.463316582914573e-05, - "loss": 5.8397, - "step": 15795 - }, - { - "epoch": 8.23780964797914, - "grad_norm": 1.4627408981323242, - "learning_rate": 8.46321608040201e-05, - "loss": 5.5774, - "step": 15796 - }, - { - "epoch": 8.238331160365059, - "grad_norm": 1.5527275800704956, - "learning_rate": 8.463115577889447e-05, - "loss": 5.2931, - "step": 15797 - }, - { - "epoch": 8.238852672750978, - "grad_norm": 1.5406156778335571, - "learning_rate": 8.463015075376884e-05, - "loss": 5.6067, - "step": 15798 - }, - { - "epoch": 8.239374185136898, - "grad_norm": 1.417326807975769, - "learning_rate": 8.462914572864322e-05, - "loss": 5.9641, - "step": 15799 - }, - { - "epoch": 8.239895697522817, - "grad_norm": 1.5441179275512695, - "learning_rate": 8.46281407035176e-05, - "loss": 5.4097, - "step": 15800 - }, - { - "epoch": 8.240417209908735, - "grad_norm": 1.6008589267730713, - "learning_rate": 8.462713567839197e-05, - "loss": 5.6699, - "step": 15801 - }, - { - "epoch": 8.240938722294654, - "grad_norm": 1.3133982419967651, - "learning_rate": 8.462613065326634e-05, - "loss": 5.8193, - "step": 15802 - }, - { - "epoch": 8.241460234680574, - "grad_norm": 1.5236480236053467, - "learning_rate": 8.462512562814071e-05, - "loss": 5.548, - "step": 15803 - }, - { - "epoch": 8.241981747066493, - "grad_norm": 1.5558503866195679, - "learning_rate": 8.462412060301508e-05, - "loss": 5.6978, - "step": 15804 - }, - { - "epoch": 8.242503259452413, - "grad_norm": 1.5416392087936401, - "learning_rate": 8.462311557788946e-05, - "loss": 5.7067, - "step": 15805 - }, - { - "epoch": 8.243024771838332, - "grad_norm": 1.562567114830017, - "learning_rate": 8.462211055276382e-05, - "loss": 5.1771, - "step": 15806 - }, - { - "epoch": 8.24354628422425, - "grad_norm": 1.5003142356872559, - "learning_rate": 8.46211055276382e-05, - "loss": 5.2657, - "step": 15807 - }, - { - "epoch": 8.24406779661017, - "grad_norm": 1.6471900939941406, - "learning_rate": 8.462010050251256e-05, - "loss": 5.5614, - "step": 15808 - }, - { - "epoch": 8.244589308996089, - "grad_norm": 1.534129023551941, - "learning_rate": 8.461909547738694e-05, - "loss": 5.5332, - "step": 15809 - }, - { - "epoch": 8.245110821382008, - "grad_norm": 1.5146350860595703, - "learning_rate": 8.461809045226132e-05, - "loss": 5.2022, - "step": 15810 - }, - { - "epoch": 8.245632333767928, - "grad_norm": 1.4150269031524658, - "learning_rate": 8.461708542713568e-05, - "loss": 5.7878, - "step": 15811 - }, - { - "epoch": 8.246153846153845, - "grad_norm": 1.5084728002548218, - "learning_rate": 8.461608040201006e-05, - "loss": 5.2717, - "step": 15812 - }, - { - "epoch": 8.246675358539765, - "grad_norm": 1.3794447183609009, - "learning_rate": 8.461507537688442e-05, - "loss": 5.801, - "step": 15813 - }, - { - "epoch": 8.247196870925684, - "grad_norm": 1.4749020338058472, - "learning_rate": 8.46140703517588e-05, - "loss": 5.8158, - "step": 15814 - }, - { - "epoch": 8.247718383311604, - "grad_norm": 1.3189536333084106, - "learning_rate": 8.461306532663317e-05, - "loss": 5.7886, - "step": 15815 - }, - { - "epoch": 8.248239895697523, - "grad_norm": 1.4136351346969604, - "learning_rate": 8.461206030150754e-05, - "loss": 5.5895, - "step": 15816 - }, - { - "epoch": 8.248761408083443, - "grad_norm": 1.570310354232788, - "learning_rate": 8.461105527638191e-05, - "loss": 5.8626, - "step": 15817 - }, - { - "epoch": 8.24928292046936, - "grad_norm": 1.4577895402908325, - "learning_rate": 8.461005025125629e-05, - "loss": 5.5529, - "step": 15818 - }, - { - "epoch": 8.24980443285528, - "grad_norm": 1.8627870082855225, - "learning_rate": 8.460904522613065e-05, - "loss": 4.4952, - "step": 15819 - }, - { - "epoch": 8.2503259452412, - "grad_norm": 1.4534896612167358, - "learning_rate": 8.460804020100503e-05, - "loss": 5.6476, - "step": 15820 - }, - { - "epoch": 8.250847457627119, - "grad_norm": 1.475164532661438, - "learning_rate": 8.46070351758794e-05, - "loss": 5.5647, - "step": 15821 - }, - { - "epoch": 8.251368970013038, - "grad_norm": 1.484103798866272, - "learning_rate": 8.460603015075377e-05, - "loss": 5.5263, - "step": 15822 - }, - { - "epoch": 8.251890482398958, - "grad_norm": 1.5220357179641724, - "learning_rate": 8.460502512562815e-05, - "loss": 5.4031, - "step": 15823 - }, - { - "epoch": 8.252411994784875, - "grad_norm": 1.455743432044983, - "learning_rate": 8.460402010050251e-05, - "loss": 5.2914, - "step": 15824 - }, - { - "epoch": 8.252933507170795, - "grad_norm": 1.4494571685791016, - "learning_rate": 8.460301507537689e-05, - "loss": 5.6511, - "step": 15825 - }, - { - "epoch": 8.253455019556714, - "grad_norm": 1.6204783916473389, - "learning_rate": 8.460201005025125e-05, - "loss": 5.8298, - "step": 15826 - }, - { - "epoch": 8.253976531942634, - "grad_norm": 1.4394683837890625, - "learning_rate": 8.460100502512563e-05, - "loss": 5.2912, - "step": 15827 - }, - { - "epoch": 8.254498044328553, - "grad_norm": 1.5136851072311401, - "learning_rate": 8.46e-05, - "loss": 5.2703, - "step": 15828 - }, - { - "epoch": 8.255019556714473, - "grad_norm": 1.4477945566177368, - "learning_rate": 8.459899497487437e-05, - "loss": 5.4448, - "step": 15829 - }, - { - "epoch": 8.25554106910039, - "grad_norm": 1.4141771793365479, - "learning_rate": 8.459798994974875e-05, - "loss": 5.7948, - "step": 15830 - }, - { - "epoch": 8.25606258148631, - "grad_norm": 1.4768807888031006, - "learning_rate": 8.459698492462313e-05, - "loss": 5.8481, - "step": 15831 - }, - { - "epoch": 8.25658409387223, - "grad_norm": 1.4375606775283813, - "learning_rate": 8.45959798994975e-05, - "loss": 6.0027, - "step": 15832 - }, - { - "epoch": 8.257105606258149, - "grad_norm": 1.406207799911499, - "learning_rate": 8.459497487437187e-05, - "loss": 5.3202, - "step": 15833 - }, - { - "epoch": 8.257627118644068, - "grad_norm": 1.482801914215088, - "learning_rate": 8.459396984924624e-05, - "loss": 5.4423, - "step": 15834 - }, - { - "epoch": 8.258148631029988, - "grad_norm": 1.5668821334838867, - "learning_rate": 8.45929648241206e-05, - "loss": 5.4518, - "step": 15835 - }, - { - "epoch": 8.258670143415905, - "grad_norm": 1.4035617113113403, - "learning_rate": 8.459195979899498e-05, - "loss": 5.6356, - "step": 15836 - }, - { - "epoch": 8.259191655801825, - "grad_norm": 1.5411652326583862, - "learning_rate": 8.459095477386934e-05, - "loss": 5.2687, - "step": 15837 - }, - { - "epoch": 8.259713168187744, - "grad_norm": 1.4445432424545288, - "learning_rate": 8.458994974874372e-05, - "loss": 5.6304, - "step": 15838 - }, - { - "epoch": 8.260234680573664, - "grad_norm": 1.5861488580703735, - "learning_rate": 8.458894472361808e-05, - "loss": 5.1478, - "step": 15839 - }, - { - "epoch": 8.260756192959583, - "grad_norm": 1.5702685117721558, - "learning_rate": 8.458793969849246e-05, - "loss": 5.166, - "step": 15840 - }, - { - "epoch": 8.261277705345503, - "grad_norm": 1.558767318725586, - "learning_rate": 8.458693467336684e-05, - "loss": 5.2816, - "step": 15841 - }, - { - "epoch": 8.26179921773142, - "grad_norm": 1.56748366355896, - "learning_rate": 8.458592964824122e-05, - "loss": 5.3942, - "step": 15842 - }, - { - "epoch": 8.26232073011734, - "grad_norm": 1.5700925588607788, - "learning_rate": 8.458492462311558e-05, - "loss": 4.9963, - "step": 15843 - }, - { - "epoch": 8.26284224250326, - "grad_norm": 1.4957177639007568, - "learning_rate": 8.458391959798996e-05, - "loss": 5.4783, - "step": 15844 - }, - { - "epoch": 8.263363754889179, - "grad_norm": 1.546433925628662, - "learning_rate": 8.458291457286432e-05, - "loss": 5.7368, - "step": 15845 - }, - { - "epoch": 8.263885267275098, - "grad_norm": 1.5821900367736816, - "learning_rate": 8.45819095477387e-05, - "loss": 5.5289, - "step": 15846 - }, - { - "epoch": 8.264406779661018, - "grad_norm": 1.3721505403518677, - "learning_rate": 8.458090452261307e-05, - "loss": 5.6886, - "step": 15847 - }, - { - "epoch": 8.264928292046935, - "grad_norm": 1.3780944347381592, - "learning_rate": 8.457989949748743e-05, - "loss": 5.6399, - "step": 15848 - }, - { - "epoch": 8.265449804432855, - "grad_norm": 1.4012144804000854, - "learning_rate": 8.457889447236181e-05, - "loss": 5.4458, - "step": 15849 - }, - { - "epoch": 8.265971316818774, - "grad_norm": 1.578778624534607, - "learning_rate": 8.457788944723619e-05, - "loss": 5.3058, - "step": 15850 - }, - { - "epoch": 8.266492829204694, - "grad_norm": 1.4275809526443481, - "learning_rate": 8.457688442211056e-05, - "loss": 5.5585, - "step": 15851 - }, - { - "epoch": 8.267014341590613, - "grad_norm": 1.37882399559021, - "learning_rate": 8.457587939698493e-05, - "loss": 5.5629, - "step": 15852 - }, - { - "epoch": 8.267535853976533, - "grad_norm": 1.4180371761322021, - "learning_rate": 8.45748743718593e-05, - "loss": 5.6888, - "step": 15853 - }, - { - "epoch": 8.26805736636245, - "grad_norm": 1.3796091079711914, - "learning_rate": 8.457386934673367e-05, - "loss": 5.9032, - "step": 15854 - }, - { - "epoch": 8.26857887874837, - "grad_norm": 1.4430314302444458, - "learning_rate": 8.457286432160805e-05, - "loss": 5.0611, - "step": 15855 - }, - { - "epoch": 8.26910039113429, - "grad_norm": 1.3288707733154297, - "learning_rate": 8.457185929648241e-05, - "loss": 5.9358, - "step": 15856 - }, - { - "epoch": 8.269621903520209, - "grad_norm": 1.5671818256378174, - "learning_rate": 8.457085427135679e-05, - "loss": 5.3142, - "step": 15857 - }, - { - "epoch": 8.270143415906128, - "grad_norm": 1.3701246976852417, - "learning_rate": 8.456984924623116e-05, - "loss": 5.8527, - "step": 15858 - }, - { - "epoch": 8.270664928292048, - "grad_norm": 1.4828314781188965, - "learning_rate": 8.456884422110553e-05, - "loss": 5.4784, - "step": 15859 - }, - { - "epoch": 8.271186440677965, - "grad_norm": 1.5456962585449219, - "learning_rate": 8.45678391959799e-05, - "loss": 5.5513, - "step": 15860 - }, - { - "epoch": 8.271707953063885, - "grad_norm": 1.474442720413208, - "learning_rate": 8.456683417085427e-05, - "loss": 5.451, - "step": 15861 - }, - { - "epoch": 8.272229465449804, - "grad_norm": 1.7194995880126953, - "learning_rate": 8.456582914572865e-05, - "loss": 5.4999, - "step": 15862 - }, - { - "epoch": 8.272750977835724, - "grad_norm": 1.4589446783065796, - "learning_rate": 8.456482412060302e-05, - "loss": 5.6123, - "step": 15863 - }, - { - "epoch": 8.273272490221643, - "grad_norm": 1.4888437986373901, - "learning_rate": 8.45638190954774e-05, - "loss": 5.4977, - "step": 15864 - }, - { - "epoch": 8.273794002607563, - "grad_norm": 1.4184558391571045, - "learning_rate": 8.456281407035176e-05, - "loss": 5.2206, - "step": 15865 - }, - { - "epoch": 8.27431551499348, - "grad_norm": 1.6213154792785645, - "learning_rate": 8.456180904522614e-05, - "loss": 5.3435, - "step": 15866 - }, - { - "epoch": 8.2748370273794, - "grad_norm": 1.397697925567627, - "learning_rate": 8.45608040201005e-05, - "loss": 5.872, - "step": 15867 - }, - { - "epoch": 8.27535853976532, - "grad_norm": 1.3756823539733887, - "learning_rate": 8.455979899497488e-05, - "loss": 4.6247, - "step": 15868 - }, - { - "epoch": 8.275880052151239, - "grad_norm": 1.330529808998108, - "learning_rate": 8.455879396984924e-05, - "loss": 5.8374, - "step": 15869 - }, - { - "epoch": 8.276401564537158, - "grad_norm": 1.3128541707992554, - "learning_rate": 8.455778894472362e-05, - "loss": 5.8602, - "step": 15870 - }, - { - "epoch": 8.276923076923078, - "grad_norm": 1.377893090248108, - "learning_rate": 8.4556783919598e-05, - "loss": 5.7595, - "step": 15871 - }, - { - "epoch": 8.277444589308995, - "grad_norm": 1.273043155670166, - "learning_rate": 8.455577889447238e-05, - "loss": 5.8662, - "step": 15872 - }, - { - "epoch": 8.277966101694915, - "grad_norm": 1.4187883138656616, - "learning_rate": 8.455477386934674e-05, - "loss": 5.7781, - "step": 15873 - }, - { - "epoch": 8.278487614080834, - "grad_norm": 1.4822087287902832, - "learning_rate": 8.455376884422112e-05, - "loss": 4.9331, - "step": 15874 - }, - { - "epoch": 8.279009126466754, - "grad_norm": 1.4608142375946045, - "learning_rate": 8.455276381909548e-05, - "loss": 5.7062, - "step": 15875 - }, - { - "epoch": 8.279530638852673, - "grad_norm": 1.3936388492584229, - "learning_rate": 8.455175879396985e-05, - "loss": 5.7084, - "step": 15876 - }, - { - "epoch": 8.280052151238593, - "grad_norm": 1.477307915687561, - "learning_rate": 8.455075376884423e-05, - "loss": 5.5546, - "step": 15877 - }, - { - "epoch": 8.28057366362451, - "grad_norm": 1.495760202407837, - "learning_rate": 8.454974874371859e-05, - "loss": 5.5461, - "step": 15878 - }, - { - "epoch": 8.28109517601043, - "grad_norm": 1.4030472040176392, - "learning_rate": 8.454874371859297e-05, - "loss": 5.6119, - "step": 15879 - }, - { - "epoch": 8.28161668839635, - "grad_norm": 1.4525583982467651, - "learning_rate": 8.454773869346733e-05, - "loss": 5.8582, - "step": 15880 - }, - { - "epoch": 8.282138200782269, - "grad_norm": 1.4331893920898438, - "learning_rate": 8.454673366834171e-05, - "loss": 5.0769, - "step": 15881 - }, - { - "epoch": 8.282659713168188, - "grad_norm": 1.4314820766448975, - "learning_rate": 8.454572864321609e-05, - "loss": 5.405, - "step": 15882 - }, - { - "epoch": 8.283181225554108, - "grad_norm": 1.67923104763031, - "learning_rate": 8.454472361809047e-05, - "loss": 4.7148, - "step": 15883 - }, - { - "epoch": 8.283702737940025, - "grad_norm": 1.4646562337875366, - "learning_rate": 8.454371859296483e-05, - "loss": 5.2748, - "step": 15884 - }, - { - "epoch": 8.284224250325945, - "grad_norm": 1.3863946199417114, - "learning_rate": 8.454271356783921e-05, - "loss": 5.5994, - "step": 15885 - }, - { - "epoch": 8.284745762711864, - "grad_norm": 1.3674498796463013, - "learning_rate": 8.454170854271357e-05, - "loss": 5.9057, - "step": 15886 - }, - { - "epoch": 8.285267275097784, - "grad_norm": 1.4301966428756714, - "learning_rate": 8.454070351758795e-05, - "loss": 5.3135, - "step": 15887 - }, - { - "epoch": 8.285788787483703, - "grad_norm": 1.4029064178466797, - "learning_rate": 8.453969849246231e-05, - "loss": 5.8095, - "step": 15888 - }, - { - "epoch": 8.286310299869623, - "grad_norm": 1.363068699836731, - "learning_rate": 8.453869346733668e-05, - "loss": 5.5714, - "step": 15889 - }, - { - "epoch": 8.28683181225554, - "grad_norm": 1.5678194761276245, - "learning_rate": 8.453768844221106e-05, - "loss": 5.3005, - "step": 15890 - }, - { - "epoch": 8.28735332464146, - "grad_norm": 1.3899160623550415, - "learning_rate": 8.453668341708543e-05, - "loss": 5.6153, - "step": 15891 - }, - { - "epoch": 8.28787483702738, - "grad_norm": 1.6290831565856934, - "learning_rate": 8.453567839195981e-05, - "loss": 5.3544, - "step": 15892 - }, - { - "epoch": 8.288396349413299, - "grad_norm": 1.3395203351974487, - "learning_rate": 8.453467336683418e-05, - "loss": 5.8449, - "step": 15893 - }, - { - "epoch": 8.288917861799218, - "grad_norm": 1.426565170288086, - "learning_rate": 8.453366834170855e-05, - "loss": 5.7371, - "step": 15894 - }, - { - "epoch": 8.289439374185136, - "grad_norm": 1.3848758935928345, - "learning_rate": 8.453266331658292e-05, - "loss": 5.7198, - "step": 15895 - }, - { - "epoch": 8.289960886571055, - "grad_norm": 1.4147223234176636, - "learning_rate": 8.45316582914573e-05, - "loss": 5.6899, - "step": 15896 - }, - { - "epoch": 8.290482398956975, - "grad_norm": 1.3713607788085938, - "learning_rate": 8.453065326633166e-05, - "loss": 5.3943, - "step": 15897 - }, - { - "epoch": 8.291003911342894, - "grad_norm": 1.4115911722183228, - "learning_rate": 8.452964824120604e-05, - "loss": 5.4378, - "step": 15898 - }, - { - "epoch": 8.291525423728814, - "grad_norm": 1.4592313766479492, - "learning_rate": 8.45286432160804e-05, - "loss": 5.9101, - "step": 15899 - }, - { - "epoch": 8.292046936114733, - "grad_norm": 1.4952404499053955, - "learning_rate": 8.452763819095478e-05, - "loss": 5.4142, - "step": 15900 - }, - { - "epoch": 8.292568448500653, - "grad_norm": 1.5101289749145508, - "learning_rate": 8.452663316582914e-05, - "loss": 5.2558, - "step": 15901 - }, - { - "epoch": 8.29308996088657, - "grad_norm": 1.3972727060317993, - "learning_rate": 8.452562814070352e-05, - "loss": 5.6953, - "step": 15902 - }, - { - "epoch": 8.29361147327249, - "grad_norm": 1.4211363792419434, - "learning_rate": 8.45246231155779e-05, - "loss": 4.922, - "step": 15903 - }, - { - "epoch": 8.29413298565841, - "grad_norm": 1.3111618757247925, - "learning_rate": 8.452361809045226e-05, - "loss": 5.9986, - "step": 15904 - }, - { - "epoch": 8.294654498044329, - "grad_norm": 1.5290096998214722, - "learning_rate": 8.452261306532664e-05, - "loss": 5.6036, - "step": 15905 - }, - { - "epoch": 8.295176010430248, - "grad_norm": 1.477309226989746, - "learning_rate": 8.4521608040201e-05, - "loss": 5.8305, - "step": 15906 - }, - { - "epoch": 8.295697522816166, - "grad_norm": 1.2797796726226807, - "learning_rate": 8.452060301507538e-05, - "loss": 5.626, - "step": 15907 - }, - { - "epoch": 8.296219035202085, - "grad_norm": 1.4107009172439575, - "learning_rate": 8.451959798994975e-05, - "loss": 5.4285, - "step": 15908 - }, - { - "epoch": 8.296740547588005, - "grad_norm": 1.4235445261001587, - "learning_rate": 8.451859296482413e-05, - "loss": 5.6693, - "step": 15909 - }, - { - "epoch": 8.297262059973924, - "grad_norm": 1.4250483512878418, - "learning_rate": 8.451758793969849e-05, - "loss": 5.7822, - "step": 15910 - }, - { - "epoch": 8.297783572359844, - "grad_norm": 1.4717257022857666, - "learning_rate": 8.451658291457287e-05, - "loss": 5.4109, - "step": 15911 - }, - { - "epoch": 8.298305084745763, - "grad_norm": 1.503605604171753, - "learning_rate": 8.451557788944725e-05, - "loss": 5.27, - "step": 15912 - }, - { - "epoch": 8.298826597131681, - "grad_norm": 1.4258735179901123, - "learning_rate": 8.451457286432162e-05, - "loss": 5.3671, - "step": 15913 - }, - { - "epoch": 8.2993481095176, - "grad_norm": 1.4839059114456177, - "learning_rate": 8.451356783919599e-05, - "loss": 5.6198, - "step": 15914 - }, - { - "epoch": 8.29986962190352, - "grad_norm": 1.4041774272918701, - "learning_rate": 8.451256281407035e-05, - "loss": 5.685, - "step": 15915 - }, - { - "epoch": 8.30039113428944, - "grad_norm": 1.649997591972351, - "learning_rate": 8.451155778894473e-05, - "loss": 5.733, - "step": 15916 - }, - { - "epoch": 8.300912646675359, - "grad_norm": 1.3233736753463745, - "learning_rate": 8.45105527638191e-05, - "loss": 5.9413, - "step": 15917 - }, - { - "epoch": 8.301434159061278, - "grad_norm": 1.3886852264404297, - "learning_rate": 8.450954773869347e-05, - "loss": 5.6069, - "step": 15918 - }, - { - "epoch": 8.301955671447196, - "grad_norm": 1.3759963512420654, - "learning_rate": 8.450854271356784e-05, - "loss": 5.446, - "step": 15919 - }, - { - "epoch": 8.302477183833116, - "grad_norm": 1.6840280294418335, - "learning_rate": 8.450753768844221e-05, - "loss": 4.9565, - "step": 15920 - }, - { - "epoch": 8.302998696219035, - "grad_norm": 1.5398145914077759, - "learning_rate": 8.450653266331658e-05, - "loss": 5.3624, - "step": 15921 - }, - { - "epoch": 8.303520208604954, - "grad_norm": 1.6161919832229614, - "learning_rate": 8.450552763819096e-05, - "loss": 5.122, - "step": 15922 - }, - { - "epoch": 8.304041720990874, - "grad_norm": 1.5116100311279297, - "learning_rate": 8.450452261306533e-05, - "loss": 5.1173, - "step": 15923 - }, - { - "epoch": 8.304563233376793, - "grad_norm": 1.438167929649353, - "learning_rate": 8.450351758793971e-05, - "loss": 5.6139, - "step": 15924 - }, - { - "epoch": 8.305084745762711, - "grad_norm": 1.711519718170166, - "learning_rate": 8.450251256281408e-05, - "loss": 4.8603, - "step": 15925 - }, - { - "epoch": 8.30560625814863, - "grad_norm": 1.4468477964401245, - "learning_rate": 8.450150753768845e-05, - "loss": 5.3458, - "step": 15926 - }, - { - "epoch": 8.30612777053455, - "grad_norm": 1.5403406620025635, - "learning_rate": 8.450050251256282e-05, - "loss": 5.753, - "step": 15927 - }, - { - "epoch": 8.30664928292047, - "grad_norm": 1.4824362993240356, - "learning_rate": 8.449949748743718e-05, - "loss": 5.8273, - "step": 15928 - }, - { - "epoch": 8.307170795306389, - "grad_norm": 1.5934383869171143, - "learning_rate": 8.449849246231156e-05, - "loss": 5.1556, - "step": 15929 - }, - { - "epoch": 8.307692307692308, - "grad_norm": 1.4519424438476562, - "learning_rate": 8.449748743718592e-05, - "loss": 5.8314, - "step": 15930 - }, - { - "epoch": 8.308213820078226, - "grad_norm": 1.5523474216461182, - "learning_rate": 8.44964824120603e-05, - "loss": 5.358, - "step": 15931 - }, - { - "epoch": 8.308735332464146, - "grad_norm": 1.3591452836990356, - "learning_rate": 8.449547738693468e-05, - "loss": 5.9337, - "step": 15932 - }, - { - "epoch": 8.309256844850065, - "grad_norm": 1.624999761581421, - "learning_rate": 8.449447236180906e-05, - "loss": 4.902, - "step": 15933 - }, - { - "epoch": 8.309778357235984, - "grad_norm": 1.460902452468872, - "learning_rate": 8.449346733668342e-05, - "loss": 5.3314, - "step": 15934 - }, - { - "epoch": 8.310299869621904, - "grad_norm": 1.4620457887649536, - "learning_rate": 8.44924623115578e-05, - "loss": 5.4444, - "step": 15935 - }, - { - "epoch": 8.310821382007823, - "grad_norm": 1.4059715270996094, - "learning_rate": 8.449145728643216e-05, - "loss": 5.6099, - "step": 15936 - }, - { - "epoch": 8.311342894393741, - "grad_norm": 1.5995643138885498, - "learning_rate": 8.449045226130654e-05, - "loss": 5.1441, - "step": 15937 - }, - { - "epoch": 8.31186440677966, - "grad_norm": 1.4582644701004028, - "learning_rate": 8.448944723618091e-05, - "loss": 5.4897, - "step": 15938 - }, - { - "epoch": 8.31238591916558, - "grad_norm": 1.4212092161178589, - "learning_rate": 8.448844221105528e-05, - "loss": 5.642, - "step": 15939 - }, - { - "epoch": 8.3129074315515, - "grad_norm": 1.4048030376434326, - "learning_rate": 8.448743718592965e-05, - "loss": 5.5626, - "step": 15940 - }, - { - "epoch": 8.313428943937419, - "grad_norm": 1.4095392227172852, - "learning_rate": 8.448643216080401e-05, - "loss": 5.595, - "step": 15941 - }, - { - "epoch": 8.313950456323338, - "grad_norm": 1.3824983835220337, - "learning_rate": 8.448542713567839e-05, - "loss": 5.5757, - "step": 15942 - }, - { - "epoch": 8.314471968709256, - "grad_norm": 1.466179609298706, - "learning_rate": 8.448442211055277e-05, - "loss": 5.5241, - "step": 15943 - }, - { - "epoch": 8.314993481095176, - "grad_norm": 1.5262266397476196, - "learning_rate": 8.448341708542715e-05, - "loss": 5.3025, - "step": 15944 - }, - { - "epoch": 8.315514993481095, - "grad_norm": 1.3975412845611572, - "learning_rate": 8.448241206030151e-05, - "loss": 5.5968, - "step": 15945 - }, - { - "epoch": 8.316036505867014, - "grad_norm": 1.4115992784500122, - "learning_rate": 8.448140703517589e-05, - "loss": 5.5584, - "step": 15946 - }, - { - "epoch": 8.316558018252934, - "grad_norm": 1.307379961013794, - "learning_rate": 8.448040201005025e-05, - "loss": 5.7984, - "step": 15947 - }, - { - "epoch": 8.317079530638853, - "grad_norm": 1.4425749778747559, - "learning_rate": 8.447939698492463e-05, - "loss": 5.6679, - "step": 15948 - }, - { - "epoch": 8.317601043024771, - "grad_norm": 1.3776566982269287, - "learning_rate": 8.4478391959799e-05, - "loss": 5.1993, - "step": 15949 - }, - { - "epoch": 8.31812255541069, - "grad_norm": 1.3335384130477905, - "learning_rate": 8.447738693467337e-05, - "loss": 5.7974, - "step": 15950 - }, - { - "epoch": 8.31864406779661, - "grad_norm": 1.291144609451294, - "learning_rate": 8.447638190954774e-05, - "loss": 5.7945, - "step": 15951 - }, - { - "epoch": 8.31916558018253, - "grad_norm": 1.39948570728302, - "learning_rate": 8.447537688442212e-05, - "loss": 5.1874, - "step": 15952 - }, - { - "epoch": 8.319687092568449, - "grad_norm": 1.533009648323059, - "learning_rate": 8.447437185929648e-05, - "loss": 5.2387, - "step": 15953 - }, - { - "epoch": 8.320208604954368, - "grad_norm": 1.42972993850708, - "learning_rate": 8.447336683417086e-05, - "loss": 5.8858, - "step": 15954 - }, - { - "epoch": 8.320730117340286, - "grad_norm": 1.391144871711731, - "learning_rate": 8.447236180904524e-05, - "loss": 5.6155, - "step": 15955 - }, - { - "epoch": 8.321251629726206, - "grad_norm": 1.4496901035308838, - "learning_rate": 8.44713567839196e-05, - "loss": 5.8481, - "step": 15956 - }, - { - "epoch": 8.321773142112125, - "grad_norm": 1.4705294370651245, - "learning_rate": 8.447035175879398e-05, - "loss": 4.9626, - "step": 15957 - }, - { - "epoch": 8.322294654498045, - "grad_norm": 1.395022988319397, - "learning_rate": 8.446934673366834e-05, - "loss": 5.3669, - "step": 15958 - }, - { - "epoch": 8.322816166883964, - "grad_norm": 1.4462367296218872, - "learning_rate": 8.446834170854272e-05, - "loss": 5.6842, - "step": 15959 - }, - { - "epoch": 8.323337679269883, - "grad_norm": 1.3980281352996826, - "learning_rate": 8.446733668341708e-05, - "loss": 5.3362, - "step": 15960 - }, - { - "epoch": 8.323859191655801, - "grad_norm": 1.4465581178665161, - "learning_rate": 8.446633165829146e-05, - "loss": 5.3237, - "step": 15961 - }, - { - "epoch": 8.32438070404172, - "grad_norm": 1.3908741474151611, - "learning_rate": 8.446532663316583e-05, - "loss": 5.3435, - "step": 15962 - }, - { - "epoch": 8.32490221642764, - "grad_norm": 1.3185327053070068, - "learning_rate": 8.44643216080402e-05, - "loss": 5.6351, - "step": 15963 - }, - { - "epoch": 8.32542372881356, - "grad_norm": 1.4448957443237305, - "learning_rate": 8.446331658291458e-05, - "loss": 5.7112, - "step": 15964 - }, - { - "epoch": 8.325945241199479, - "grad_norm": 1.4519954919815063, - "learning_rate": 8.446231155778896e-05, - "loss": 5.7076, - "step": 15965 - }, - { - "epoch": 8.326466753585398, - "grad_norm": 1.2674859762191772, - "learning_rate": 8.446130653266332e-05, - "loss": 5.7608, - "step": 15966 - }, - { - "epoch": 8.326988265971316, - "grad_norm": 1.4240796566009521, - "learning_rate": 8.44603015075377e-05, - "loss": 5.8488, - "step": 15967 - }, - { - "epoch": 8.327509778357236, - "grad_norm": 1.5564864873886108, - "learning_rate": 8.445929648241207e-05, - "loss": 5.568, - "step": 15968 - }, - { - "epoch": 8.328031290743155, - "grad_norm": 1.602820634841919, - "learning_rate": 8.445829145728643e-05, - "loss": 5.7399, - "step": 15969 - }, - { - "epoch": 8.328552803129075, - "grad_norm": 1.5959762334823608, - "learning_rate": 8.445728643216081e-05, - "loss": 5.5063, - "step": 15970 - }, - { - "epoch": 8.329074315514994, - "grad_norm": 1.5072026252746582, - "learning_rate": 8.445628140703517e-05, - "loss": 5.5989, - "step": 15971 - }, - { - "epoch": 8.329595827900913, - "grad_norm": 1.5525572299957275, - "learning_rate": 8.445527638190955e-05, - "loss": 5.297, - "step": 15972 - }, - { - "epoch": 8.330117340286831, - "grad_norm": 1.4932000637054443, - "learning_rate": 8.445427135678391e-05, - "loss": 5.6033, - "step": 15973 - }, - { - "epoch": 8.33063885267275, - "grad_norm": 1.399903655052185, - "learning_rate": 8.445326633165829e-05, - "loss": 5.6169, - "step": 15974 - }, - { - "epoch": 8.33116036505867, - "grad_norm": 1.3966963291168213, - "learning_rate": 8.445226130653267e-05, - "loss": 5.1716, - "step": 15975 - }, - { - "epoch": 8.33168187744459, - "grad_norm": 1.529606819152832, - "learning_rate": 8.445125628140705e-05, - "loss": 5.4551, - "step": 15976 - }, - { - "epoch": 8.332203389830509, - "grad_norm": 1.478464126586914, - "learning_rate": 8.445025125628141e-05, - "loss": 5.5018, - "step": 15977 - }, - { - "epoch": 8.332724902216428, - "grad_norm": 1.4331082105636597, - "learning_rate": 8.444924623115579e-05, - "loss": 5.383, - "step": 15978 - }, - { - "epoch": 8.333246414602346, - "grad_norm": 1.5272414684295654, - "learning_rate": 8.444824120603015e-05, - "loss": 5.4435, - "step": 15979 - }, - { - "epoch": 8.333767926988266, - "grad_norm": 1.4849690198898315, - "learning_rate": 8.444723618090453e-05, - "loss": 5.4343, - "step": 15980 - }, - { - "epoch": 8.334289439374185, - "grad_norm": 1.4924222230911255, - "learning_rate": 8.44462311557789e-05, - "loss": 5.3274, - "step": 15981 - }, - { - "epoch": 8.334810951760105, - "grad_norm": 1.4785796403884888, - "learning_rate": 8.444522613065326e-05, - "loss": 5.2807, - "step": 15982 - }, - { - "epoch": 8.335332464146024, - "grad_norm": 1.3742555379867554, - "learning_rate": 8.444422110552764e-05, - "loss": 5.4324, - "step": 15983 - }, - { - "epoch": 8.335853976531943, - "grad_norm": 1.3757057189941406, - "learning_rate": 8.444321608040202e-05, - "loss": 5.782, - "step": 15984 - }, - { - "epoch": 8.336375488917861, - "grad_norm": 1.4534735679626465, - "learning_rate": 8.44422110552764e-05, - "loss": 5.1405, - "step": 15985 - }, - { - "epoch": 8.33689700130378, - "grad_norm": 1.5605055093765259, - "learning_rate": 8.444120603015076e-05, - "loss": 5.4312, - "step": 15986 - }, - { - "epoch": 8.3374185136897, - "grad_norm": 1.5666334629058838, - "learning_rate": 8.444020100502514e-05, - "loss": 5.3037, - "step": 15987 - }, - { - "epoch": 8.33794002607562, - "grad_norm": 1.497889518737793, - "learning_rate": 8.44391959798995e-05, - "loss": 5.276, - "step": 15988 - }, - { - "epoch": 8.338461538461539, - "grad_norm": 1.3910679817199707, - "learning_rate": 8.443819095477388e-05, - "loss": 5.5844, - "step": 15989 - }, - { - "epoch": 8.338983050847457, - "grad_norm": 1.362622857093811, - "learning_rate": 8.443718592964824e-05, - "loss": 5.6741, - "step": 15990 - }, - { - "epoch": 8.339504563233376, - "grad_norm": 1.4490845203399658, - "learning_rate": 8.443618090452262e-05, - "loss": 5.5092, - "step": 15991 - }, - { - "epoch": 8.340026075619296, - "grad_norm": 1.3583098649978638, - "learning_rate": 8.443517587939698e-05, - "loss": 5.4207, - "step": 15992 - }, - { - "epoch": 8.340547588005215, - "grad_norm": 1.3696868419647217, - "learning_rate": 8.443417085427136e-05, - "loss": 5.7722, - "step": 15993 - }, - { - "epoch": 8.341069100391135, - "grad_norm": 1.4488942623138428, - "learning_rate": 8.443316582914573e-05, - "loss": 5.8139, - "step": 15994 - }, - { - "epoch": 8.341590612777054, - "grad_norm": 1.3814903497695923, - "learning_rate": 8.44321608040201e-05, - "loss": 5.6847, - "step": 15995 - }, - { - "epoch": 8.342112125162974, - "grad_norm": 1.458188533782959, - "learning_rate": 8.443115577889448e-05, - "loss": 5.3698, - "step": 15996 - }, - { - "epoch": 8.342633637548891, - "grad_norm": 1.3734210729599, - "learning_rate": 8.443015075376885e-05, - "loss": 5.4613, - "step": 15997 - }, - { - "epoch": 8.34315514993481, - "grad_norm": 1.4795769453048706, - "learning_rate": 8.442914572864322e-05, - "loss": 5.232, - "step": 15998 - }, - { - "epoch": 8.34367666232073, - "grad_norm": 1.5418455600738525, - "learning_rate": 8.442814070351759e-05, - "loss": 5.4994, - "step": 15999 - }, - { - "epoch": 8.34419817470665, - "grad_norm": 1.504374623298645, - "learning_rate": 8.442713567839197e-05, - "loss": 5.55, - "step": 16000 - }, - { - "epoch": 8.344719687092569, - "grad_norm": 1.6758397817611694, - "learning_rate": 8.442613065326633e-05, - "loss": 4.9375, - "step": 16001 - }, - { - "epoch": 8.345241199478487, - "grad_norm": 1.4920449256896973, - "learning_rate": 8.442512562814071e-05, - "loss": 5.2795, - "step": 16002 - }, - { - "epoch": 8.345762711864406, - "grad_norm": 1.4816075563430786, - "learning_rate": 8.442412060301507e-05, - "loss": 5.3093, - "step": 16003 - }, - { - "epoch": 8.346284224250326, - "grad_norm": 1.4014040231704712, - "learning_rate": 8.442311557788945e-05, - "loss": 5.0151, - "step": 16004 - }, - { - "epoch": 8.346805736636245, - "grad_norm": 1.4864224195480347, - "learning_rate": 8.442211055276383e-05, - "loss": 5.097, - "step": 16005 - }, - { - "epoch": 8.347327249022165, - "grad_norm": 1.414717435836792, - "learning_rate": 8.44211055276382e-05, - "loss": 5.7251, - "step": 16006 - }, - { - "epoch": 8.347848761408084, - "grad_norm": 1.385803461074829, - "learning_rate": 8.442010050251257e-05, - "loss": 5.6724, - "step": 16007 - }, - { - "epoch": 8.348370273794002, - "grad_norm": 1.4936527013778687, - "learning_rate": 8.441909547738693e-05, - "loss": 5.3157, - "step": 16008 - }, - { - "epoch": 8.348891786179921, - "grad_norm": 1.3844199180603027, - "learning_rate": 8.441809045226131e-05, - "loss": 5.7103, - "step": 16009 - }, - { - "epoch": 8.34941329856584, - "grad_norm": 1.628217339515686, - "learning_rate": 8.441708542713568e-05, - "loss": 5.5027, - "step": 16010 - }, - { - "epoch": 8.34993481095176, - "grad_norm": 1.4541045427322388, - "learning_rate": 8.441608040201005e-05, - "loss": 5.554, - "step": 16011 - }, - { - "epoch": 8.35045632333768, - "grad_norm": 1.4511072635650635, - "learning_rate": 8.441507537688442e-05, - "loss": 5.6127, - "step": 16012 - }, - { - "epoch": 8.350977835723599, - "grad_norm": 1.5201529264450073, - "learning_rate": 8.44140703517588e-05, - "loss": 5.8629, - "step": 16013 - }, - { - "epoch": 8.351499348109517, - "grad_norm": 1.3730664253234863, - "learning_rate": 8.441306532663316e-05, - "loss": 5.7893, - "step": 16014 - }, - { - "epoch": 8.352020860495436, - "grad_norm": 1.4325379133224487, - "learning_rate": 8.441206030150754e-05, - "loss": 5.8405, - "step": 16015 - }, - { - "epoch": 8.352542372881356, - "grad_norm": 1.4827306270599365, - "learning_rate": 8.441105527638192e-05, - "loss": 5.6013, - "step": 16016 - }, - { - "epoch": 8.353063885267275, - "grad_norm": 1.427308201789856, - "learning_rate": 8.44100502512563e-05, - "loss": 5.5002, - "step": 16017 - }, - { - "epoch": 8.353585397653195, - "grad_norm": 1.4410616159439087, - "learning_rate": 8.440904522613066e-05, - "loss": 5.5908, - "step": 16018 - }, - { - "epoch": 8.354106910039114, - "grad_norm": 1.468735933303833, - "learning_rate": 8.440804020100504e-05, - "loss": 5.7491, - "step": 16019 - }, - { - "epoch": 8.354628422425032, - "grad_norm": 1.4429700374603271, - "learning_rate": 8.44070351758794e-05, - "loss": 5.6745, - "step": 16020 - }, - { - "epoch": 8.355149934810951, - "grad_norm": 1.3804229497909546, - "learning_rate": 8.440603015075377e-05, - "loss": 5.6305, - "step": 16021 - }, - { - "epoch": 8.35567144719687, - "grad_norm": 1.4659391641616821, - "learning_rate": 8.440502512562814e-05, - "loss": 5.838, - "step": 16022 - }, - { - "epoch": 8.35619295958279, - "grad_norm": 1.3757655620574951, - "learning_rate": 8.440402010050251e-05, - "loss": 5.6665, - "step": 16023 - }, - { - "epoch": 8.35671447196871, - "grad_norm": 1.3690297603607178, - "learning_rate": 8.440301507537689e-05, - "loss": 5.6853, - "step": 16024 - }, - { - "epoch": 8.357235984354629, - "grad_norm": 1.6084791421890259, - "learning_rate": 8.440201005025126e-05, - "loss": 5.5372, - "step": 16025 - }, - { - "epoch": 8.357757496740547, - "grad_norm": 1.384351372718811, - "learning_rate": 8.440100502512564e-05, - "loss": 5.8988, - "step": 16026 - }, - { - "epoch": 8.358279009126466, - "grad_norm": 1.387410044670105, - "learning_rate": 8.44e-05, - "loss": 5.8066, - "step": 16027 - }, - { - "epoch": 8.358800521512386, - "grad_norm": 1.3674942255020142, - "learning_rate": 8.439899497487438e-05, - "loss": 5.6325, - "step": 16028 - }, - { - "epoch": 8.359322033898305, - "grad_norm": 1.5085363388061523, - "learning_rate": 8.439798994974875e-05, - "loss": 5.2262, - "step": 16029 - }, - { - "epoch": 8.359843546284225, - "grad_norm": 1.5006816387176514, - "learning_rate": 8.439698492462313e-05, - "loss": 5.5047, - "step": 16030 - }, - { - "epoch": 8.360365058670144, - "grad_norm": 1.5318262577056885, - "learning_rate": 8.439597989949749e-05, - "loss": 5.1853, - "step": 16031 - }, - { - "epoch": 8.360886571056062, - "grad_norm": 1.7897796630859375, - "learning_rate": 8.439497487437187e-05, - "loss": 5.2258, - "step": 16032 - }, - { - "epoch": 8.361408083441981, - "grad_norm": 1.5215789079666138, - "learning_rate": 8.439396984924623e-05, - "loss": 5.7157, - "step": 16033 - }, - { - "epoch": 8.3619295958279, - "grad_norm": 1.3863027095794678, - "learning_rate": 8.43929648241206e-05, - "loss": 5.4673, - "step": 16034 - }, - { - "epoch": 8.36245110821382, - "grad_norm": 1.4120126962661743, - "learning_rate": 8.439195979899497e-05, - "loss": 5.5129, - "step": 16035 - }, - { - "epoch": 8.36297262059974, - "grad_norm": 1.4492841958999634, - "learning_rate": 8.439095477386935e-05, - "loss": 5.3652, - "step": 16036 - }, - { - "epoch": 8.36349413298566, - "grad_norm": 1.4481147527694702, - "learning_rate": 8.438994974874373e-05, - "loss": 4.968, - "step": 16037 - }, - { - "epoch": 8.364015645371577, - "grad_norm": 1.4658496379852295, - "learning_rate": 8.43889447236181e-05, - "loss": 5.1149, - "step": 16038 - }, - { - "epoch": 8.364537157757496, - "grad_norm": 1.4367713928222656, - "learning_rate": 8.438793969849247e-05, - "loss": 5.533, - "step": 16039 - }, - { - "epoch": 8.365058670143416, - "grad_norm": 1.3960587978363037, - "learning_rate": 8.438693467336684e-05, - "loss": 5.5039, - "step": 16040 - }, - { - "epoch": 8.365580182529335, - "grad_norm": 1.346131682395935, - "learning_rate": 8.438592964824121e-05, - "loss": 5.1948, - "step": 16041 - }, - { - "epoch": 8.366101694915255, - "grad_norm": 1.4895384311676025, - "learning_rate": 8.438492462311558e-05, - "loss": 4.7733, - "step": 16042 - }, - { - "epoch": 8.366623207301174, - "grad_norm": 1.53669011592865, - "learning_rate": 8.438391959798996e-05, - "loss": 5.6217, - "step": 16043 - }, - { - "epoch": 8.367144719687092, - "grad_norm": 1.3956218957901, - "learning_rate": 8.438291457286432e-05, - "loss": 5.825, - "step": 16044 - }, - { - "epoch": 8.367666232073011, - "grad_norm": 1.4116652011871338, - "learning_rate": 8.43819095477387e-05, - "loss": 5.4489, - "step": 16045 - }, - { - "epoch": 8.36818774445893, - "grad_norm": 1.431934118270874, - "learning_rate": 8.438090452261308e-05, - "loss": 5.8106, - "step": 16046 - }, - { - "epoch": 8.36870925684485, - "grad_norm": 1.4541420936584473, - "learning_rate": 8.437989949748744e-05, - "loss": 5.8838, - "step": 16047 - }, - { - "epoch": 8.36923076923077, - "grad_norm": 1.4531906843185425, - "learning_rate": 8.437889447236182e-05, - "loss": 5.3466, - "step": 16048 - }, - { - "epoch": 8.36975228161669, - "grad_norm": 1.338457465171814, - "learning_rate": 8.437788944723618e-05, - "loss": 5.8195, - "step": 16049 - }, - { - "epoch": 8.370273794002607, - "grad_norm": 1.4263468980789185, - "learning_rate": 8.437688442211056e-05, - "loss": 5.5701, - "step": 16050 - }, - { - "epoch": 8.370795306388526, - "grad_norm": 1.570862889289856, - "learning_rate": 8.437587939698492e-05, - "loss": 5.4856, - "step": 16051 - }, - { - "epoch": 8.371316818774446, - "grad_norm": 1.418013095855713, - "learning_rate": 8.43748743718593e-05, - "loss": 5.1883, - "step": 16052 - }, - { - "epoch": 8.371838331160365, - "grad_norm": 1.3834482431411743, - "learning_rate": 8.437386934673367e-05, - "loss": 5.2519, - "step": 16053 - }, - { - "epoch": 8.372359843546285, - "grad_norm": 1.5436902046203613, - "learning_rate": 8.437286432160804e-05, - "loss": 5.3318, - "step": 16054 - }, - { - "epoch": 8.372881355932204, - "grad_norm": 1.510059118270874, - "learning_rate": 8.437185929648241e-05, - "loss": 5.1861, - "step": 16055 - }, - { - "epoch": 8.373402868318122, - "grad_norm": 1.556776762008667, - "learning_rate": 8.437085427135679e-05, - "loss": 4.9405, - "step": 16056 - }, - { - "epoch": 8.373924380704041, - "grad_norm": 1.4405328035354614, - "learning_rate": 8.436984924623116e-05, - "loss": 5.5123, - "step": 16057 - }, - { - "epoch": 8.37444589308996, - "grad_norm": 1.4164496660232544, - "learning_rate": 8.436884422110554e-05, - "loss": 5.2874, - "step": 16058 - }, - { - "epoch": 8.37496740547588, - "grad_norm": 1.3285411596298218, - "learning_rate": 8.43678391959799e-05, - "loss": 5.517, - "step": 16059 - }, - { - "epoch": 8.3754889178618, - "grad_norm": 1.5317302942276, - "learning_rate": 8.436683417085428e-05, - "loss": 4.6043, - "step": 16060 - }, - { - "epoch": 8.37601043024772, - "grad_norm": 1.3769731521606445, - "learning_rate": 8.436582914572865e-05, - "loss": 5.9274, - "step": 16061 - }, - { - "epoch": 8.376531942633637, - "grad_norm": 1.3655836582183838, - "learning_rate": 8.436482412060301e-05, - "loss": 5.7668, - "step": 16062 - }, - { - "epoch": 8.377053455019556, - "grad_norm": 1.5482333898544312, - "learning_rate": 8.436381909547739e-05, - "loss": 5.5191, - "step": 16063 - }, - { - "epoch": 8.377574967405476, - "grad_norm": 1.497146487236023, - "learning_rate": 8.436281407035175e-05, - "loss": 5.0571, - "step": 16064 - }, - { - "epoch": 8.378096479791395, - "grad_norm": 1.4152060747146606, - "learning_rate": 8.436180904522613e-05, - "loss": 5.487, - "step": 16065 - }, - { - "epoch": 8.378617992177315, - "grad_norm": 1.423811912536621, - "learning_rate": 8.436080402010051e-05, - "loss": 5.7586, - "step": 16066 - }, - { - "epoch": 8.379139504563234, - "grad_norm": 1.376279592514038, - "learning_rate": 8.435979899497489e-05, - "loss": 5.256, - "step": 16067 - }, - { - "epoch": 8.379661016949152, - "grad_norm": 1.3610820770263672, - "learning_rate": 8.435879396984925e-05, - "loss": 5.8684, - "step": 16068 - }, - { - "epoch": 8.380182529335071, - "grad_norm": 1.4266982078552246, - "learning_rate": 8.435778894472363e-05, - "loss": 5.6716, - "step": 16069 - }, - { - "epoch": 8.38070404172099, - "grad_norm": 1.4553724527359009, - "learning_rate": 8.4356783919598e-05, - "loss": 5.653, - "step": 16070 - }, - { - "epoch": 8.38122555410691, - "grad_norm": 1.3894379138946533, - "learning_rate": 8.435577889447237e-05, - "loss": 5.6149, - "step": 16071 - }, - { - "epoch": 8.38174706649283, - "grad_norm": 1.5245280265808105, - "learning_rate": 8.435477386934674e-05, - "loss": 5.2165, - "step": 16072 - }, - { - "epoch": 8.38226857887875, - "grad_norm": 1.4296437501907349, - "learning_rate": 8.435376884422111e-05, - "loss": 5.8494, - "step": 16073 - }, - { - "epoch": 8.382790091264667, - "grad_norm": 1.4475390911102295, - "learning_rate": 8.435276381909548e-05, - "loss": 5.3021, - "step": 16074 - }, - { - "epoch": 8.383311603650586, - "grad_norm": 1.459594964981079, - "learning_rate": 8.435175879396984e-05, - "loss": 5.342, - "step": 16075 - }, - { - "epoch": 8.383833116036506, - "grad_norm": 1.513715147972107, - "learning_rate": 8.435075376884422e-05, - "loss": 5.1607, - "step": 16076 - }, - { - "epoch": 8.384354628422425, - "grad_norm": 1.404284954071045, - "learning_rate": 8.43497487437186e-05, - "loss": 5.635, - "step": 16077 - }, - { - "epoch": 8.384876140808345, - "grad_norm": 1.4496219158172607, - "learning_rate": 8.434874371859298e-05, - "loss": 5.5622, - "step": 16078 - }, - { - "epoch": 8.385397653194264, - "grad_norm": 1.9338045120239258, - "learning_rate": 8.434773869346734e-05, - "loss": 4.9661, - "step": 16079 - }, - { - "epoch": 8.385919165580182, - "grad_norm": 1.490051031112671, - "learning_rate": 8.434673366834172e-05, - "loss": 5.299, - "step": 16080 - }, - { - "epoch": 8.386440677966101, - "grad_norm": 1.4671480655670166, - "learning_rate": 8.434572864321608e-05, - "loss": 5.63, - "step": 16081 - }, - { - "epoch": 8.38696219035202, - "grad_norm": 1.4292240142822266, - "learning_rate": 8.434472361809046e-05, - "loss": 5.6074, - "step": 16082 - }, - { - "epoch": 8.38748370273794, - "grad_norm": 1.665950059890747, - "learning_rate": 8.434371859296482e-05, - "loss": 5.3331, - "step": 16083 - }, - { - "epoch": 8.38800521512386, - "grad_norm": 1.5039023160934448, - "learning_rate": 8.43427135678392e-05, - "loss": 5.8048, - "step": 16084 - }, - { - "epoch": 8.388526727509777, - "grad_norm": 1.3658021688461304, - "learning_rate": 8.434170854271357e-05, - "loss": 5.3961, - "step": 16085 - }, - { - "epoch": 8.389048239895697, - "grad_norm": 1.390698790550232, - "learning_rate": 8.434070351758794e-05, - "loss": 5.4883, - "step": 16086 - }, - { - "epoch": 8.389569752281616, - "grad_norm": 1.3121788501739502, - "learning_rate": 8.433969849246232e-05, - "loss": 5.8658, - "step": 16087 - }, - { - "epoch": 8.390091264667536, - "grad_norm": 1.4939700365066528, - "learning_rate": 8.433869346733669e-05, - "loss": 5.405, - "step": 16088 - }, - { - "epoch": 8.390612777053455, - "grad_norm": 1.4339754581451416, - "learning_rate": 8.433768844221106e-05, - "loss": 5.6689, - "step": 16089 - }, - { - "epoch": 8.391134289439375, - "grad_norm": 1.4499372243881226, - "learning_rate": 8.433668341708543e-05, - "loss": 5.6579, - "step": 16090 - }, - { - "epoch": 8.391655801825294, - "grad_norm": 1.4279826879501343, - "learning_rate": 8.43356783919598e-05, - "loss": 5.5256, - "step": 16091 - }, - { - "epoch": 8.392177314211212, - "grad_norm": 1.3224105834960938, - "learning_rate": 8.433467336683417e-05, - "loss": 5.6988, - "step": 16092 - }, - { - "epoch": 8.392698826597131, - "grad_norm": 1.379210114479065, - "learning_rate": 8.433366834170855e-05, - "loss": 5.9081, - "step": 16093 - }, - { - "epoch": 8.39322033898305, - "grad_norm": 1.3657768964767456, - "learning_rate": 8.433266331658291e-05, - "loss": 5.2152, - "step": 16094 - }, - { - "epoch": 8.39374185136897, - "grad_norm": 1.5339962244033813, - "learning_rate": 8.433165829145729e-05, - "loss": 5.5086, - "step": 16095 - }, - { - "epoch": 8.39426336375489, - "grad_norm": 1.5190377235412598, - "learning_rate": 8.433065326633166e-05, - "loss": 5.6024, - "step": 16096 - }, - { - "epoch": 8.394784876140807, - "grad_norm": 1.3834750652313232, - "learning_rate": 8.432964824120603e-05, - "loss": 5.8988, - "step": 16097 - }, - { - "epoch": 8.395306388526727, - "grad_norm": 1.5147089958190918, - "learning_rate": 8.432864321608041e-05, - "loss": 5.4647, - "step": 16098 - }, - { - "epoch": 8.395827900912646, - "grad_norm": 1.4412109851837158, - "learning_rate": 8.432763819095479e-05, - "loss": 5.4041, - "step": 16099 - }, - { - "epoch": 8.396349413298566, - "grad_norm": 1.5783789157867432, - "learning_rate": 8.432663316582915e-05, - "loss": 4.9433, - "step": 16100 - }, - { - "epoch": 8.396870925684485, - "grad_norm": 1.4107398986816406, - "learning_rate": 8.432562814070352e-05, - "loss": 5.6723, - "step": 16101 - }, - { - "epoch": 8.397392438070405, - "grad_norm": 1.4339709281921387, - "learning_rate": 8.43246231155779e-05, - "loss": 5.1507, - "step": 16102 - }, - { - "epoch": 8.397913950456322, - "grad_norm": 1.4619797468185425, - "learning_rate": 8.432361809045226e-05, - "loss": 5.2728, - "step": 16103 - }, - { - "epoch": 8.398435462842242, - "grad_norm": 1.5045579671859741, - "learning_rate": 8.432261306532664e-05, - "loss": 5.6034, - "step": 16104 - }, - { - "epoch": 8.398956975228161, - "grad_norm": 1.406790852546692, - "learning_rate": 8.4321608040201e-05, - "loss": 5.1439, - "step": 16105 - }, - { - "epoch": 8.399478487614081, - "grad_norm": 1.4507883787155151, - "learning_rate": 8.432060301507538e-05, - "loss": 5.7224, - "step": 16106 - }, - { - "epoch": 8.4, - "grad_norm": 1.595935344696045, - "learning_rate": 8.431959798994974e-05, - "loss": 5.1339, - "step": 16107 - }, - { - "epoch": 8.40052151238592, - "grad_norm": 1.4726231098175049, - "learning_rate": 8.431859296482412e-05, - "loss": 5.3194, - "step": 16108 - }, - { - "epoch": 8.401043024771838, - "grad_norm": 1.4438586235046387, - "learning_rate": 8.43175879396985e-05, - "loss": 5.6857, - "step": 16109 - }, - { - "epoch": 8.401564537157757, - "grad_norm": 1.3812757730484009, - "learning_rate": 8.431658291457288e-05, - "loss": 5.5029, - "step": 16110 - }, - { - "epoch": 8.402086049543676, - "grad_norm": 1.4957808256149292, - "learning_rate": 8.431557788944724e-05, - "loss": 4.9263, - "step": 16111 - }, - { - "epoch": 8.402607561929596, - "grad_norm": 1.3335469961166382, - "learning_rate": 8.431457286432162e-05, - "loss": 5.6713, - "step": 16112 - }, - { - "epoch": 8.403129074315515, - "grad_norm": 1.3659363985061646, - "learning_rate": 8.431356783919598e-05, - "loss": 5.4633, - "step": 16113 - }, - { - "epoch": 8.403650586701435, - "grad_norm": 1.3799513578414917, - "learning_rate": 8.431256281407035e-05, - "loss": 4.9012, - "step": 16114 - }, - { - "epoch": 8.404172099087353, - "grad_norm": 1.5219062566757202, - "learning_rate": 8.431155778894473e-05, - "loss": 5.3564, - "step": 16115 - }, - { - "epoch": 8.404693611473272, - "grad_norm": 1.5627562999725342, - "learning_rate": 8.431055276381909e-05, - "loss": 5.3155, - "step": 16116 - }, - { - "epoch": 8.405215123859191, - "grad_norm": 1.5942436456680298, - "learning_rate": 8.430954773869347e-05, - "loss": 5.1577, - "step": 16117 - }, - { - "epoch": 8.405736636245111, - "grad_norm": 1.4543038606643677, - "learning_rate": 8.430854271356785e-05, - "loss": 5.566, - "step": 16118 - }, - { - "epoch": 8.40625814863103, - "grad_norm": 1.3444411754608154, - "learning_rate": 8.430753768844222e-05, - "loss": 5.7669, - "step": 16119 - }, - { - "epoch": 8.40677966101695, - "grad_norm": 1.3484081029891968, - "learning_rate": 8.430653266331659e-05, - "loss": 5.895, - "step": 16120 - }, - { - "epoch": 8.407301173402868, - "grad_norm": 1.4837757349014282, - "learning_rate": 8.430552763819097e-05, - "loss": 5.7888, - "step": 16121 - }, - { - "epoch": 8.407822685788787, - "grad_norm": 1.4480828046798706, - "learning_rate": 8.430452261306533e-05, - "loss": 5.5082, - "step": 16122 - }, - { - "epoch": 8.408344198174706, - "grad_norm": 1.4532753229141235, - "learning_rate": 8.430351758793971e-05, - "loss": 5.7895, - "step": 16123 - }, - { - "epoch": 8.408865710560626, - "grad_norm": 1.4868870973587036, - "learning_rate": 8.430251256281407e-05, - "loss": 5.4205, - "step": 16124 - }, - { - "epoch": 8.409387222946545, - "grad_norm": 1.5160789489746094, - "learning_rate": 8.430150753768845e-05, - "loss": 5.5973, - "step": 16125 - }, - { - "epoch": 8.409908735332465, - "grad_norm": 1.4569697380065918, - "learning_rate": 8.430050251256281e-05, - "loss": 5.3046, - "step": 16126 - }, - { - "epoch": 8.410430247718383, - "grad_norm": 1.642928957939148, - "learning_rate": 8.429949748743718e-05, - "loss": 5.179, - "step": 16127 - }, - { - "epoch": 8.410951760104302, - "grad_norm": 1.5835330486297607, - "learning_rate": 8.429849246231156e-05, - "loss": 5.3991, - "step": 16128 - }, - { - "epoch": 8.411473272490221, - "grad_norm": 1.4960864782333374, - "learning_rate": 8.429748743718593e-05, - "loss": 5.5054, - "step": 16129 - }, - { - "epoch": 8.411994784876141, - "grad_norm": 1.587125539779663, - "learning_rate": 8.429648241206031e-05, - "loss": 5.3242, - "step": 16130 - }, - { - "epoch": 8.41251629726206, - "grad_norm": 1.3519264459609985, - "learning_rate": 8.429547738693468e-05, - "loss": 5.7859, - "step": 16131 - }, - { - "epoch": 8.41303780964798, - "grad_norm": 1.5771528482437134, - "learning_rate": 8.429447236180905e-05, - "loss": 4.9605, - "step": 16132 - }, - { - "epoch": 8.413559322033898, - "grad_norm": 1.4783871173858643, - "learning_rate": 8.429346733668342e-05, - "loss": 5.6059, - "step": 16133 - }, - { - "epoch": 8.414080834419817, - "grad_norm": 1.2771652936935425, - "learning_rate": 8.42924623115578e-05, - "loss": 5.7807, - "step": 16134 - }, - { - "epoch": 8.414602346805736, - "grad_norm": 1.3679823875427246, - "learning_rate": 8.429145728643216e-05, - "loss": 5.7281, - "step": 16135 - }, - { - "epoch": 8.415123859191656, - "grad_norm": 1.4651376008987427, - "learning_rate": 8.429045226130654e-05, - "loss": 5.8054, - "step": 16136 - }, - { - "epoch": 8.415645371577575, - "grad_norm": 1.471177339553833, - "learning_rate": 8.42894472361809e-05, - "loss": 5.4151, - "step": 16137 - }, - { - "epoch": 8.416166883963495, - "grad_norm": 1.5430511236190796, - "learning_rate": 8.428844221105528e-05, - "loss": 4.9125, - "step": 16138 - }, - { - "epoch": 8.416688396349413, - "grad_norm": 1.4281829595565796, - "learning_rate": 8.428743718592966e-05, - "loss": 5.6038, - "step": 16139 - }, - { - "epoch": 8.417209908735332, - "grad_norm": 1.3441716432571411, - "learning_rate": 8.428643216080402e-05, - "loss": 5.7535, - "step": 16140 - }, - { - "epoch": 8.417731421121252, - "grad_norm": 1.3890423774719238, - "learning_rate": 8.42854271356784e-05, - "loss": 5.8084, - "step": 16141 - }, - { - "epoch": 8.418252933507171, - "grad_norm": 1.4765045642852783, - "learning_rate": 8.428442211055276e-05, - "loss": 5.2657, - "step": 16142 - }, - { - "epoch": 8.41877444589309, - "grad_norm": 1.5323928594589233, - "learning_rate": 8.428341708542714e-05, - "loss": 5.7274, - "step": 16143 - }, - { - "epoch": 8.41929595827901, - "grad_norm": 1.3907129764556885, - "learning_rate": 8.42824120603015e-05, - "loss": 5.4728, - "step": 16144 - }, - { - "epoch": 8.419817470664928, - "grad_norm": 1.3522963523864746, - "learning_rate": 8.428140703517588e-05, - "loss": 5.2692, - "step": 16145 - }, - { - "epoch": 8.420338983050847, - "grad_norm": 1.3624845743179321, - "learning_rate": 8.428040201005025e-05, - "loss": 5.5769, - "step": 16146 - }, - { - "epoch": 8.420860495436767, - "grad_norm": 1.4033408164978027, - "learning_rate": 8.427939698492463e-05, - "loss": 5.7202, - "step": 16147 - }, - { - "epoch": 8.421382007822686, - "grad_norm": 1.36985182762146, - "learning_rate": 8.427839195979899e-05, - "loss": 5.7425, - "step": 16148 - }, - { - "epoch": 8.421903520208605, - "grad_norm": 1.5910874605178833, - "learning_rate": 8.427738693467337e-05, - "loss": 5.5441, - "step": 16149 - }, - { - "epoch": 8.422425032594525, - "grad_norm": 1.4702595472335815, - "learning_rate": 8.427638190954775e-05, - "loss": 5.6634, - "step": 16150 - }, - { - "epoch": 8.422946544980443, - "grad_norm": 1.5648807287216187, - "learning_rate": 8.427537688442212e-05, - "loss": 5.3563, - "step": 16151 - }, - { - "epoch": 8.423468057366362, - "grad_norm": 1.4909021854400635, - "learning_rate": 8.427437185929649e-05, - "loss": 5.4283, - "step": 16152 - }, - { - "epoch": 8.423989569752282, - "grad_norm": 1.4872814416885376, - "learning_rate": 8.427336683417087e-05, - "loss": 5.2605, - "step": 16153 - }, - { - "epoch": 8.424511082138201, - "grad_norm": 1.478987455368042, - "learning_rate": 8.427236180904523e-05, - "loss": 5.3484, - "step": 16154 - }, - { - "epoch": 8.42503259452412, - "grad_norm": 1.35179603099823, - "learning_rate": 8.42713567839196e-05, - "loss": 5.4565, - "step": 16155 - }, - { - "epoch": 8.42555410691004, - "grad_norm": 1.5015243291854858, - "learning_rate": 8.427035175879397e-05, - "loss": 5.4256, - "step": 16156 - }, - { - "epoch": 8.426075619295958, - "grad_norm": 1.5357385873794556, - "learning_rate": 8.426934673366834e-05, - "loss": 4.8303, - "step": 16157 - }, - { - "epoch": 8.426597131681877, - "grad_norm": 1.368300199508667, - "learning_rate": 8.426834170854271e-05, - "loss": 5.5543, - "step": 16158 - }, - { - "epoch": 8.427118644067797, - "grad_norm": 1.3007712364196777, - "learning_rate": 8.426733668341709e-05, - "loss": 5.9314, - "step": 16159 - }, - { - "epoch": 8.427640156453716, - "grad_norm": 1.4460233449935913, - "learning_rate": 8.426633165829147e-05, - "loss": 5.7066, - "step": 16160 - }, - { - "epoch": 8.428161668839635, - "grad_norm": 1.3938862085342407, - "learning_rate": 8.426532663316583e-05, - "loss": 5.7754, - "step": 16161 - }, - { - "epoch": 8.428683181225555, - "grad_norm": 1.7233937978744507, - "learning_rate": 8.426432160804021e-05, - "loss": 5.1365, - "step": 16162 - }, - { - "epoch": 8.429204693611473, - "grad_norm": 1.2903954982757568, - "learning_rate": 8.426331658291458e-05, - "loss": 6.0814, - "step": 16163 - }, - { - "epoch": 8.429726205997392, - "grad_norm": 1.4944130182266235, - "learning_rate": 8.426231155778895e-05, - "loss": 5.193, - "step": 16164 - }, - { - "epoch": 8.430247718383312, - "grad_norm": 1.4338456392288208, - "learning_rate": 8.426130653266332e-05, - "loss": 5.0643, - "step": 16165 - }, - { - "epoch": 8.430769230769231, - "grad_norm": 1.4309262037277222, - "learning_rate": 8.42603015075377e-05, - "loss": 4.9456, - "step": 16166 - }, - { - "epoch": 8.43129074315515, - "grad_norm": 1.6960926055908203, - "learning_rate": 8.425929648241206e-05, - "loss": 5.3937, - "step": 16167 - }, - { - "epoch": 8.43181225554107, - "grad_norm": 1.5381755828857422, - "learning_rate": 8.425829145728643e-05, - "loss": 5.6298, - "step": 16168 - }, - { - "epoch": 8.432333767926988, - "grad_norm": 1.3458833694458008, - "learning_rate": 8.42572864321608e-05, - "loss": 5.3015, - "step": 16169 - }, - { - "epoch": 8.432855280312907, - "grad_norm": 1.5250509977340698, - "learning_rate": 8.425628140703518e-05, - "loss": 5.1764, - "step": 16170 - }, - { - "epoch": 8.433376792698827, - "grad_norm": 1.4611537456512451, - "learning_rate": 8.425527638190956e-05, - "loss": 5.1801, - "step": 16171 - }, - { - "epoch": 8.433898305084746, - "grad_norm": 1.4016579389572144, - "learning_rate": 8.425427135678392e-05, - "loss": 5.7022, - "step": 16172 - }, - { - "epoch": 8.434419817470665, - "grad_norm": 1.4487282037734985, - "learning_rate": 8.42532663316583e-05, - "loss": 5.3909, - "step": 16173 - }, - { - "epoch": 8.434941329856585, - "grad_norm": 1.4919952154159546, - "learning_rate": 8.425226130653266e-05, - "loss": 5.0875, - "step": 16174 - }, - { - "epoch": 8.435462842242503, - "grad_norm": 1.4578818082809448, - "learning_rate": 8.425125628140704e-05, - "loss": 5.5867, - "step": 16175 - }, - { - "epoch": 8.435984354628422, - "grad_norm": 1.4479092359542847, - "learning_rate": 8.425025125628141e-05, - "loss": 5.0202, - "step": 16176 - }, - { - "epoch": 8.436505867014342, - "grad_norm": 1.6090083122253418, - "learning_rate": 8.424924623115578e-05, - "loss": 5.3188, - "step": 16177 - }, - { - "epoch": 8.437027379400261, - "grad_norm": 1.4183322191238403, - "learning_rate": 8.424824120603015e-05, - "loss": 5.5488, - "step": 16178 - }, - { - "epoch": 8.43754889178618, - "grad_norm": 1.3837671279907227, - "learning_rate": 8.424723618090453e-05, - "loss": 5.3759, - "step": 16179 - }, - { - "epoch": 8.438070404172098, - "grad_norm": 1.4647283554077148, - "learning_rate": 8.42462311557789e-05, - "loss": 5.5902, - "step": 16180 - }, - { - "epoch": 8.438591916558018, - "grad_norm": 1.438813328742981, - "learning_rate": 8.424522613065327e-05, - "loss": 5.696, - "step": 16181 - }, - { - "epoch": 8.439113428943937, - "grad_norm": 1.6649941205978394, - "learning_rate": 8.424422110552765e-05, - "loss": 5.5085, - "step": 16182 - }, - { - "epoch": 8.439634941329857, - "grad_norm": 1.462525486946106, - "learning_rate": 8.424321608040201e-05, - "loss": 5.1229, - "step": 16183 - }, - { - "epoch": 8.440156453715776, - "grad_norm": 1.3341935873031616, - "learning_rate": 8.424221105527639e-05, - "loss": 5.9746, - "step": 16184 - }, - { - "epoch": 8.440677966101696, - "grad_norm": 1.5025049448013306, - "learning_rate": 8.424120603015075e-05, - "loss": 5.0136, - "step": 16185 - }, - { - "epoch": 8.441199478487615, - "grad_norm": 1.4485257863998413, - "learning_rate": 8.424020100502513e-05, - "loss": 5.3904, - "step": 16186 - }, - { - "epoch": 8.441720990873533, - "grad_norm": 1.3277555704116821, - "learning_rate": 8.42391959798995e-05, - "loss": 5.6961, - "step": 16187 - }, - { - "epoch": 8.442242503259452, - "grad_norm": 1.4968492984771729, - "learning_rate": 8.423819095477387e-05, - "loss": 5.1768, - "step": 16188 - }, - { - "epoch": 8.442764015645372, - "grad_norm": 1.546457290649414, - "learning_rate": 8.423718592964824e-05, - "loss": 5.2617, - "step": 16189 - }, - { - "epoch": 8.443285528031291, - "grad_norm": 1.3422008752822876, - "learning_rate": 8.423618090452262e-05, - "loss": 5.0841, - "step": 16190 - }, - { - "epoch": 8.44380704041721, - "grad_norm": 1.4390556812286377, - "learning_rate": 8.423517587939699e-05, - "loss": 5.5186, - "step": 16191 - }, - { - "epoch": 8.444328552803128, - "grad_norm": 1.4618686437606812, - "learning_rate": 8.423417085427137e-05, - "loss": 5.8814, - "step": 16192 - }, - { - "epoch": 8.444850065189048, - "grad_norm": 1.3392894268035889, - "learning_rate": 8.423316582914574e-05, - "loss": 5.7298, - "step": 16193 - }, - { - "epoch": 8.445371577574967, - "grad_norm": 1.4093984365463257, - "learning_rate": 8.42321608040201e-05, - "loss": 5.082, - "step": 16194 - }, - { - "epoch": 8.445893089960887, - "grad_norm": 1.8295124769210815, - "learning_rate": 8.423115577889448e-05, - "loss": 5.1186, - "step": 16195 - }, - { - "epoch": 8.446414602346806, - "grad_norm": 1.4293227195739746, - "learning_rate": 8.423015075376884e-05, - "loss": 5.4479, - "step": 16196 - }, - { - "epoch": 8.446936114732726, - "grad_norm": 1.389785885810852, - "learning_rate": 8.422914572864322e-05, - "loss": 4.9152, - "step": 16197 - }, - { - "epoch": 8.447457627118643, - "grad_norm": 1.395507574081421, - "learning_rate": 8.422814070351758e-05, - "loss": 5.639, - "step": 16198 - }, - { - "epoch": 8.447979139504563, - "grad_norm": 1.279334306716919, - "learning_rate": 8.422713567839196e-05, - "loss": 5.9153, - "step": 16199 - }, - { - "epoch": 8.448500651890482, - "grad_norm": 1.463991641998291, - "learning_rate": 8.422613065326634e-05, - "loss": 5.404, - "step": 16200 - }, - { - "epoch": 8.449022164276402, - "grad_norm": 1.4850136041641235, - "learning_rate": 8.422512562814072e-05, - "loss": 5.6013, - "step": 16201 - }, - { - "epoch": 8.449543676662321, - "grad_norm": 1.3926880359649658, - "learning_rate": 8.422412060301508e-05, - "loss": 5.2944, - "step": 16202 - }, - { - "epoch": 8.45006518904824, - "grad_norm": 1.3785253763198853, - "learning_rate": 8.422311557788946e-05, - "loss": 5.6757, - "step": 16203 - }, - { - "epoch": 8.450586701434158, - "grad_norm": 1.3904322385787964, - "learning_rate": 8.422211055276382e-05, - "loss": 5.4191, - "step": 16204 - }, - { - "epoch": 8.451108213820078, - "grad_norm": 1.4383984804153442, - "learning_rate": 8.42211055276382e-05, - "loss": 5.5075, - "step": 16205 - }, - { - "epoch": 8.451629726205997, - "grad_norm": 1.3795032501220703, - "learning_rate": 8.422010050251257e-05, - "loss": 5.379, - "step": 16206 - }, - { - "epoch": 8.452151238591917, - "grad_norm": 1.3220949172973633, - "learning_rate": 8.421909547738693e-05, - "loss": 5.7296, - "step": 16207 - }, - { - "epoch": 8.452672750977836, - "grad_norm": 1.5404318571090698, - "learning_rate": 8.421809045226131e-05, - "loss": 5.5361, - "step": 16208 - }, - { - "epoch": 8.453194263363756, - "grad_norm": 1.310841679573059, - "learning_rate": 8.421708542713567e-05, - "loss": 5.8339, - "step": 16209 - }, - { - "epoch": 8.453715775749673, - "grad_norm": 1.4948856830596924, - "learning_rate": 8.421608040201005e-05, - "loss": 5.2088, - "step": 16210 - }, - { - "epoch": 8.454237288135593, - "grad_norm": 1.5076905488967896, - "learning_rate": 8.421507537688443e-05, - "loss": 5.2338, - "step": 16211 - }, - { - "epoch": 8.454758800521512, - "grad_norm": 1.3501328229904175, - "learning_rate": 8.42140703517588e-05, - "loss": 5.9439, - "step": 16212 - }, - { - "epoch": 8.455280312907432, - "grad_norm": 1.415837049484253, - "learning_rate": 8.421306532663317e-05, - "loss": 5.5858, - "step": 16213 - }, - { - "epoch": 8.455801825293351, - "grad_norm": 1.4384658336639404, - "learning_rate": 8.421206030150755e-05, - "loss": 5.6015, - "step": 16214 - }, - { - "epoch": 8.45632333767927, - "grad_norm": 1.4486894607543945, - "learning_rate": 8.421105527638191e-05, - "loss": 5.6896, - "step": 16215 - }, - { - "epoch": 8.456844850065188, - "grad_norm": 1.3354312181472778, - "learning_rate": 8.421005025125629e-05, - "loss": 5.5815, - "step": 16216 - }, - { - "epoch": 8.457366362451108, - "grad_norm": 1.366239309310913, - "learning_rate": 8.420904522613065e-05, - "loss": 5.9688, - "step": 16217 - }, - { - "epoch": 8.457887874837027, - "grad_norm": 1.5704858303070068, - "learning_rate": 8.420804020100503e-05, - "loss": 4.7469, - "step": 16218 - }, - { - "epoch": 8.458409387222947, - "grad_norm": 1.3572789430618286, - "learning_rate": 8.42070351758794e-05, - "loss": 5.712, - "step": 16219 - }, - { - "epoch": 8.458930899608866, - "grad_norm": 1.339353322982788, - "learning_rate": 8.420603015075377e-05, - "loss": 5.3306, - "step": 16220 - }, - { - "epoch": 8.459452411994786, - "grad_norm": 1.3810932636260986, - "learning_rate": 8.420502512562815e-05, - "loss": 5.2048, - "step": 16221 - }, - { - "epoch": 8.459973924380703, - "grad_norm": 1.4041317701339722, - "learning_rate": 8.420402010050252e-05, - "loss": 5.6239, - "step": 16222 - }, - { - "epoch": 8.460495436766623, - "grad_norm": 1.4304488897323608, - "learning_rate": 8.42030150753769e-05, - "loss": 5.6196, - "step": 16223 - }, - { - "epoch": 8.461016949152542, - "grad_norm": 1.405139446258545, - "learning_rate": 8.420201005025126e-05, - "loss": 5.538, - "step": 16224 - }, - { - "epoch": 8.461538461538462, - "grad_norm": 1.3897625207901, - "learning_rate": 8.420100502512564e-05, - "loss": 5.7567, - "step": 16225 - }, - { - "epoch": 8.462059973924381, - "grad_norm": 1.551687479019165, - "learning_rate": 8.42e-05, - "loss": 5.621, - "step": 16226 - }, - { - "epoch": 8.4625814863103, - "grad_norm": 1.7834742069244385, - "learning_rate": 8.419899497487438e-05, - "loss": 4.929, - "step": 16227 - }, - { - "epoch": 8.463102998696218, - "grad_norm": 1.3782892227172852, - "learning_rate": 8.419798994974874e-05, - "loss": 5.2548, - "step": 16228 - }, - { - "epoch": 8.463624511082138, - "grad_norm": 1.401948094367981, - "learning_rate": 8.419698492462312e-05, - "loss": 4.6291, - "step": 16229 - }, - { - "epoch": 8.464146023468057, - "grad_norm": 1.503957986831665, - "learning_rate": 8.419597989949748e-05, - "loss": 5.6403, - "step": 16230 - }, - { - "epoch": 8.464667535853977, - "grad_norm": 1.5762001276016235, - "learning_rate": 8.419497487437186e-05, - "loss": 5.4079, - "step": 16231 - }, - { - "epoch": 8.465189048239896, - "grad_norm": 1.459452509880066, - "learning_rate": 8.419396984924624e-05, - "loss": 5.5275, - "step": 16232 - }, - { - "epoch": 8.465710560625816, - "grad_norm": 1.5235280990600586, - "learning_rate": 8.419296482412062e-05, - "loss": 5.3013, - "step": 16233 - }, - { - "epoch": 8.466232073011733, - "grad_norm": 1.5874536037445068, - "learning_rate": 8.419195979899498e-05, - "loss": 4.9698, - "step": 16234 - }, - { - "epoch": 8.466753585397653, - "grad_norm": 1.3900612592697144, - "learning_rate": 8.419095477386935e-05, - "loss": 5.3742, - "step": 16235 - }, - { - "epoch": 8.467275097783572, - "grad_norm": 1.3569087982177734, - "learning_rate": 8.418994974874372e-05, - "loss": 5.1134, - "step": 16236 - }, - { - "epoch": 8.467796610169492, - "grad_norm": 1.4767959117889404, - "learning_rate": 8.418894472361809e-05, - "loss": 5.8616, - "step": 16237 - }, - { - "epoch": 8.468318122555411, - "grad_norm": 1.5518466234207153, - "learning_rate": 8.418793969849247e-05, - "loss": 5.3588, - "step": 16238 - }, - { - "epoch": 8.46883963494133, - "grad_norm": 1.4954818487167358, - "learning_rate": 8.418693467336683e-05, - "loss": 5.1225, - "step": 16239 - }, - { - "epoch": 8.469361147327248, - "grad_norm": 1.4030823707580566, - "learning_rate": 8.418592964824121e-05, - "loss": 5.2526, - "step": 16240 - }, - { - "epoch": 8.469882659713168, - "grad_norm": 1.41080904006958, - "learning_rate": 8.418492462311559e-05, - "loss": 5.6533, - "step": 16241 - }, - { - "epoch": 8.470404172099087, - "grad_norm": 1.4401237964630127, - "learning_rate": 8.418391959798996e-05, - "loss": 5.6491, - "step": 16242 - }, - { - "epoch": 8.470925684485007, - "grad_norm": 1.4135055541992188, - "learning_rate": 8.418291457286433e-05, - "loss": 5.2678, - "step": 16243 - }, - { - "epoch": 8.471447196870926, - "grad_norm": 1.4171780347824097, - "learning_rate": 8.41819095477387e-05, - "loss": 5.8043, - "step": 16244 - }, - { - "epoch": 8.471968709256846, - "grad_norm": 1.4009233713150024, - "learning_rate": 8.418090452261307e-05, - "loss": 5.7416, - "step": 16245 - }, - { - "epoch": 8.472490221642763, - "grad_norm": 1.4349644184112549, - "learning_rate": 8.417989949748745e-05, - "loss": 5.5646, - "step": 16246 - }, - { - "epoch": 8.473011734028683, - "grad_norm": 1.3982341289520264, - "learning_rate": 8.417889447236181e-05, - "loss": 5.4435, - "step": 16247 - }, - { - "epoch": 8.473533246414602, - "grad_norm": 1.3714706897735596, - "learning_rate": 8.417788944723618e-05, - "loss": 5.7795, - "step": 16248 - }, - { - "epoch": 8.474054758800522, - "grad_norm": 1.396523118019104, - "learning_rate": 8.417688442211055e-05, - "loss": 5.2386, - "step": 16249 - }, - { - "epoch": 8.474576271186441, - "grad_norm": 1.6258031129837036, - "learning_rate": 8.417587939698492e-05, - "loss": 5.3586, - "step": 16250 - }, - { - "epoch": 8.47509778357236, - "grad_norm": 1.5053828954696655, - "learning_rate": 8.41748743718593e-05, - "loss": 5.6597, - "step": 16251 - }, - { - "epoch": 8.475619295958278, - "grad_norm": 1.5445096492767334, - "learning_rate": 8.417386934673367e-05, - "loss": 5.8149, - "step": 16252 - }, - { - "epoch": 8.476140808344198, - "grad_norm": 1.4885343313217163, - "learning_rate": 8.417286432160805e-05, - "loss": 5.7406, - "step": 16253 - }, - { - "epoch": 8.476662320730117, - "grad_norm": 1.4981075525283813, - "learning_rate": 8.417185929648242e-05, - "loss": 5.4396, - "step": 16254 - }, - { - "epoch": 8.477183833116037, - "grad_norm": 1.3831754922866821, - "learning_rate": 8.41708542713568e-05, - "loss": 5.9081, - "step": 16255 - }, - { - "epoch": 8.477705345501956, - "grad_norm": 1.5725641250610352, - "learning_rate": 8.416984924623116e-05, - "loss": 5.1548, - "step": 16256 - }, - { - "epoch": 8.478226857887876, - "grad_norm": 1.40445077419281, - "learning_rate": 8.416884422110554e-05, - "loss": 5.2288, - "step": 16257 - }, - { - "epoch": 8.478748370273793, - "grad_norm": 1.4143438339233398, - "learning_rate": 8.41678391959799e-05, - "loss": 5.4474, - "step": 16258 - }, - { - "epoch": 8.479269882659713, - "grad_norm": 1.4662890434265137, - "learning_rate": 8.416683417085428e-05, - "loss": 5.7607, - "step": 16259 - }, - { - "epoch": 8.479791395045632, - "grad_norm": 1.3890568017959595, - "learning_rate": 8.416582914572864e-05, - "loss": 5.5942, - "step": 16260 - }, - { - "epoch": 8.480312907431552, - "grad_norm": 1.4208444356918335, - "learning_rate": 8.416482412060302e-05, - "loss": 5.4662, - "step": 16261 - }, - { - "epoch": 8.480834419817471, - "grad_norm": 1.682942509651184, - "learning_rate": 8.41638190954774e-05, - "loss": 5.2358, - "step": 16262 - }, - { - "epoch": 8.48135593220339, - "grad_norm": 1.386042594909668, - "learning_rate": 8.416281407035176e-05, - "loss": 5.2574, - "step": 16263 - }, - { - "epoch": 8.481877444589308, - "grad_norm": 1.3789124488830566, - "learning_rate": 8.416180904522614e-05, - "loss": 5.5634, - "step": 16264 - }, - { - "epoch": 8.482398956975228, - "grad_norm": 1.3166377544403076, - "learning_rate": 8.41608040201005e-05, - "loss": 5.8143, - "step": 16265 - }, - { - "epoch": 8.482920469361147, - "grad_norm": 1.4488633871078491, - "learning_rate": 8.415979899497488e-05, - "loss": 5.2373, - "step": 16266 - }, - { - "epoch": 8.483441981747067, - "grad_norm": 1.5035679340362549, - "learning_rate": 8.415879396984925e-05, - "loss": 5.2976, - "step": 16267 - }, - { - "epoch": 8.483963494132986, - "grad_norm": 1.4377559423446655, - "learning_rate": 8.415778894472363e-05, - "loss": 5.0286, - "step": 16268 - }, - { - "epoch": 8.484485006518906, - "grad_norm": 1.4902321100234985, - "learning_rate": 8.415678391959799e-05, - "loss": 5.8942, - "step": 16269 - }, - { - "epoch": 8.485006518904823, - "grad_norm": 1.3802274465560913, - "learning_rate": 8.415577889447237e-05, - "loss": 5.7178, - "step": 16270 - }, - { - "epoch": 8.485528031290743, - "grad_norm": 1.4650157690048218, - "learning_rate": 8.415477386934673e-05, - "loss": 5.6931, - "step": 16271 - }, - { - "epoch": 8.486049543676662, - "grad_norm": 1.3341090679168701, - "learning_rate": 8.415376884422111e-05, - "loss": 5.8694, - "step": 16272 - }, - { - "epoch": 8.486571056062582, - "grad_norm": 1.4870949983596802, - "learning_rate": 8.415276381909549e-05, - "loss": 5.702, - "step": 16273 - }, - { - "epoch": 8.487092568448501, - "grad_norm": 1.424792766571045, - "learning_rate": 8.415175879396985e-05, - "loss": 5.6508, - "step": 16274 - }, - { - "epoch": 8.487614080834419, - "grad_norm": 1.6119937896728516, - "learning_rate": 8.415075376884423e-05, - "loss": 5.2143, - "step": 16275 - }, - { - "epoch": 8.488135593220338, - "grad_norm": 1.3839800357818604, - "learning_rate": 8.41497487437186e-05, - "loss": 5.6363, - "step": 16276 - }, - { - "epoch": 8.488657105606258, - "grad_norm": 1.3895314931869507, - "learning_rate": 8.414874371859297e-05, - "loss": 5.5482, - "step": 16277 - }, - { - "epoch": 8.489178617992177, - "grad_norm": 1.4103795289993286, - "learning_rate": 8.414773869346734e-05, - "loss": 5.7319, - "step": 16278 - }, - { - "epoch": 8.489700130378097, - "grad_norm": 1.6477429866790771, - "learning_rate": 8.414673366834171e-05, - "loss": 5.0811, - "step": 16279 - }, - { - "epoch": 8.490221642764016, - "grad_norm": 1.4250315427780151, - "learning_rate": 8.414572864321608e-05, - "loss": 5.6652, - "step": 16280 - }, - { - "epoch": 8.490743155149936, - "grad_norm": 1.3829952478408813, - "learning_rate": 8.414472361809046e-05, - "loss": 5.5521, - "step": 16281 - }, - { - "epoch": 8.491264667535853, - "grad_norm": 1.5686968564987183, - "learning_rate": 8.414371859296482e-05, - "loss": 5.7742, - "step": 16282 - }, - { - "epoch": 8.491786179921773, - "grad_norm": 1.3568753004074097, - "learning_rate": 8.41427135678392e-05, - "loss": 5.5451, - "step": 16283 - }, - { - "epoch": 8.492307692307692, - "grad_norm": 1.4286757707595825, - "learning_rate": 8.414170854271358e-05, - "loss": 5.4459, - "step": 16284 - }, - { - "epoch": 8.492829204693612, - "grad_norm": 1.4185134172439575, - "learning_rate": 8.414070351758795e-05, - "loss": 5.771, - "step": 16285 - }, - { - "epoch": 8.493350717079531, - "grad_norm": 1.4366488456726074, - "learning_rate": 8.413969849246232e-05, - "loss": 5.5368, - "step": 16286 - }, - { - "epoch": 8.493872229465449, - "grad_norm": 1.4074499607086182, - "learning_rate": 8.413869346733668e-05, - "loss": 5.6121, - "step": 16287 - }, - { - "epoch": 8.494393741851368, - "grad_norm": 1.5088961124420166, - "learning_rate": 8.413768844221106e-05, - "loss": 5.3589, - "step": 16288 - }, - { - "epoch": 8.494915254237288, - "grad_norm": 1.461081624031067, - "learning_rate": 8.413668341708542e-05, - "loss": 4.7038, - "step": 16289 - }, - { - "epoch": 8.495436766623207, - "grad_norm": 1.372451901435852, - "learning_rate": 8.41356783919598e-05, - "loss": 5.9244, - "step": 16290 - }, - { - "epoch": 8.495958279009127, - "grad_norm": 1.7016204595565796, - "learning_rate": 8.413467336683417e-05, - "loss": 5.0494, - "step": 16291 - }, - { - "epoch": 8.496479791395046, - "grad_norm": 1.4344096183776855, - "learning_rate": 8.413366834170854e-05, - "loss": 5.7458, - "step": 16292 - }, - { - "epoch": 8.497001303780964, - "grad_norm": 1.51285982131958, - "learning_rate": 8.413266331658292e-05, - "loss": 5.2118, - "step": 16293 - }, - { - "epoch": 8.497522816166883, - "grad_norm": 1.4368088245391846, - "learning_rate": 8.41316582914573e-05, - "loss": 5.1611, - "step": 16294 - }, - { - "epoch": 8.498044328552803, - "grad_norm": 1.4745235443115234, - "learning_rate": 8.413065326633166e-05, - "loss": 5.7714, - "step": 16295 - }, - { - "epoch": 8.498565840938722, - "grad_norm": 1.392412543296814, - "learning_rate": 8.412964824120604e-05, - "loss": 5.7046, - "step": 16296 - }, - { - "epoch": 8.499087353324642, - "grad_norm": 1.2590153217315674, - "learning_rate": 8.41286432160804e-05, - "loss": 5.0089, - "step": 16297 - }, - { - "epoch": 8.499608865710561, - "grad_norm": 1.3884339332580566, - "learning_rate": 8.412763819095478e-05, - "loss": 5.0125, - "step": 16298 - }, - { - "epoch": 8.500130378096479, - "grad_norm": 1.4788516759872437, - "learning_rate": 8.412663316582915e-05, - "loss": 5.3835, - "step": 16299 - }, - { - "epoch": 8.500651890482398, - "grad_norm": 1.5755811929702759, - "learning_rate": 8.412562814070351e-05, - "loss": 5.1858, - "step": 16300 - }, - { - "epoch": 8.501173402868318, - "grad_norm": 1.4121792316436768, - "learning_rate": 8.412462311557789e-05, - "loss": 5.7234, - "step": 16301 - }, - { - "epoch": 8.501694915254237, - "grad_norm": 1.5003864765167236, - "learning_rate": 8.412361809045225e-05, - "loss": 5.6416, - "step": 16302 - }, - { - "epoch": 8.502216427640157, - "grad_norm": 1.3301002979278564, - "learning_rate": 8.412261306532663e-05, - "loss": 6.0105, - "step": 16303 - }, - { - "epoch": 8.502737940026076, - "grad_norm": 1.5484566688537598, - "learning_rate": 8.412160804020101e-05, - "loss": 5.2331, - "step": 16304 - }, - { - "epoch": 8.503259452411994, - "grad_norm": 1.3522982597351074, - "learning_rate": 8.412060301507539e-05, - "loss": 5.9683, - "step": 16305 - }, - { - "epoch": 8.503780964797913, - "grad_norm": 1.4320651292800903, - "learning_rate": 8.411959798994975e-05, - "loss": 5.6104, - "step": 16306 - }, - { - "epoch": 8.504302477183833, - "grad_norm": 1.5427515506744385, - "learning_rate": 8.411859296482413e-05, - "loss": 5.0654, - "step": 16307 - }, - { - "epoch": 8.504823989569752, - "grad_norm": 1.4770077466964722, - "learning_rate": 8.41175879396985e-05, - "loss": 5.6556, - "step": 16308 - }, - { - "epoch": 8.505345501955672, - "grad_norm": 1.4306730031967163, - "learning_rate": 8.411658291457287e-05, - "loss": 5.423, - "step": 16309 - }, - { - "epoch": 8.505867014341591, - "grad_norm": 1.4186207056045532, - "learning_rate": 8.411557788944724e-05, - "loss": 5.6707, - "step": 16310 - }, - { - "epoch": 8.506388526727509, - "grad_norm": 1.4631394147872925, - "learning_rate": 8.411457286432161e-05, - "loss": 5.4871, - "step": 16311 - }, - { - "epoch": 8.506910039113428, - "grad_norm": 1.4884411096572876, - "learning_rate": 8.411356783919598e-05, - "loss": 5.4567, - "step": 16312 - }, - { - "epoch": 8.507431551499348, - "grad_norm": 1.4493452310562134, - "learning_rate": 8.411256281407036e-05, - "loss": 5.5605, - "step": 16313 - }, - { - "epoch": 8.507953063885267, - "grad_norm": 1.3339916467666626, - "learning_rate": 8.411155778894473e-05, - "loss": 5.9729, - "step": 16314 - }, - { - "epoch": 8.508474576271187, - "grad_norm": 1.523606300354004, - "learning_rate": 8.41105527638191e-05, - "loss": 5.0904, - "step": 16315 - }, - { - "epoch": 8.508996088657106, - "grad_norm": 1.533424973487854, - "learning_rate": 8.410954773869348e-05, - "loss": 5.1596, - "step": 16316 - }, - { - "epoch": 8.509517601043024, - "grad_norm": 1.4198166131973267, - "learning_rate": 8.410854271356784e-05, - "loss": 5.532, - "step": 16317 - }, - { - "epoch": 8.510039113428943, - "grad_norm": 1.3499614000320435, - "learning_rate": 8.410753768844222e-05, - "loss": 5.5899, - "step": 16318 - }, - { - "epoch": 8.510560625814863, - "grad_norm": 1.3293159008026123, - "learning_rate": 8.410653266331658e-05, - "loss": 5.8062, - "step": 16319 - }, - { - "epoch": 8.511082138200782, - "grad_norm": 1.3016242980957031, - "learning_rate": 8.410552763819096e-05, - "loss": 5.8828, - "step": 16320 - }, - { - "epoch": 8.511603650586702, - "grad_norm": 1.4299463033676147, - "learning_rate": 8.410452261306532e-05, - "loss": 4.7974, - "step": 16321 - }, - { - "epoch": 8.512125162972621, - "grad_norm": 1.4880151748657227, - "learning_rate": 8.41035175879397e-05, - "loss": 5.4461, - "step": 16322 - }, - { - "epoch": 8.512646675358539, - "grad_norm": 1.4608279466629028, - "learning_rate": 8.410251256281407e-05, - "loss": 5.7073, - "step": 16323 - }, - { - "epoch": 8.513168187744458, - "grad_norm": 1.4951132535934448, - "learning_rate": 8.410150753768844e-05, - "loss": 5.318, - "step": 16324 - }, - { - "epoch": 8.513689700130378, - "grad_norm": 1.4994184970855713, - "learning_rate": 8.410050251256282e-05, - "loss": 5.2159, - "step": 16325 - }, - { - "epoch": 8.514211212516297, - "grad_norm": 1.4930249452590942, - "learning_rate": 8.40994974874372e-05, - "loss": 5.4586, - "step": 16326 - }, - { - "epoch": 8.514732724902217, - "grad_norm": 1.363211989402771, - "learning_rate": 8.409849246231156e-05, - "loss": 5.5099, - "step": 16327 - }, - { - "epoch": 8.515254237288136, - "grad_norm": 1.3574644327163696, - "learning_rate": 8.409748743718593e-05, - "loss": 5.8946, - "step": 16328 - }, - { - "epoch": 8.515775749674054, - "grad_norm": 1.417824625968933, - "learning_rate": 8.40964824120603e-05, - "loss": 5.7409, - "step": 16329 - }, - { - "epoch": 8.516297262059974, - "grad_norm": 1.4192311763763428, - "learning_rate": 8.409547738693467e-05, - "loss": 5.4264, - "step": 16330 - }, - { - "epoch": 8.516818774445893, - "grad_norm": 1.434242844581604, - "learning_rate": 8.409447236180905e-05, - "loss": 5.1939, - "step": 16331 - }, - { - "epoch": 8.517340286831812, - "grad_norm": 1.310712218284607, - "learning_rate": 8.409346733668341e-05, - "loss": 5.9505, - "step": 16332 - }, - { - "epoch": 8.517861799217732, - "grad_norm": 1.4450511932373047, - "learning_rate": 8.409246231155779e-05, - "loss": 5.2603, - "step": 16333 - }, - { - "epoch": 8.518383311603651, - "grad_norm": 1.393729329109192, - "learning_rate": 8.409145728643217e-05, - "loss": 5.5123, - "step": 16334 - }, - { - "epoch": 8.518904823989569, - "grad_norm": 1.3721262216567993, - "learning_rate": 8.409045226130655e-05, - "loss": 5.5054, - "step": 16335 - }, - { - "epoch": 8.519426336375489, - "grad_norm": 1.4702799320220947, - "learning_rate": 8.408944723618091e-05, - "loss": 5.5488, - "step": 16336 - }, - { - "epoch": 8.519947848761408, - "grad_norm": 1.3862028121948242, - "learning_rate": 8.408844221105529e-05, - "loss": 5.4412, - "step": 16337 - }, - { - "epoch": 8.520469361147327, - "grad_norm": 1.440938115119934, - "learning_rate": 8.408743718592965e-05, - "loss": 5.7279, - "step": 16338 - }, - { - "epoch": 8.520990873533247, - "grad_norm": 1.5683718919754028, - "learning_rate": 8.408643216080403e-05, - "loss": 5.4503, - "step": 16339 - }, - { - "epoch": 8.521512385919166, - "grad_norm": 1.348432183265686, - "learning_rate": 8.40854271356784e-05, - "loss": 5.7932, - "step": 16340 - }, - { - "epoch": 8.522033898305084, - "grad_norm": 1.519518494606018, - "learning_rate": 8.408442211055276e-05, - "loss": 4.9353, - "step": 16341 - }, - { - "epoch": 8.522555410691004, - "grad_norm": 1.4163525104522705, - "learning_rate": 8.408341708542714e-05, - "loss": 5.6686, - "step": 16342 - }, - { - "epoch": 8.523076923076923, - "grad_norm": 1.3802011013031006, - "learning_rate": 8.40824120603015e-05, - "loss": 5.9269, - "step": 16343 - }, - { - "epoch": 8.523598435462842, - "grad_norm": 1.5114336013793945, - "learning_rate": 8.408140703517588e-05, - "loss": 4.9831, - "step": 16344 - }, - { - "epoch": 8.524119947848762, - "grad_norm": 1.3513715267181396, - "learning_rate": 8.408040201005026e-05, - "loss": 5.5953, - "step": 16345 - }, - { - "epoch": 8.524641460234681, - "grad_norm": 1.4421672821044922, - "learning_rate": 8.407939698492463e-05, - "loss": 5.3267, - "step": 16346 - }, - { - "epoch": 8.525162972620599, - "grad_norm": 1.476525902748108, - "learning_rate": 8.4078391959799e-05, - "loss": 5.7631, - "step": 16347 - }, - { - "epoch": 8.525684485006519, - "grad_norm": 1.523127555847168, - "learning_rate": 8.407738693467338e-05, - "loss": 5.4638, - "step": 16348 - }, - { - "epoch": 8.526205997392438, - "grad_norm": 1.402724027633667, - "learning_rate": 8.407638190954774e-05, - "loss": 5.699, - "step": 16349 - }, - { - "epoch": 8.526727509778357, - "grad_norm": 1.3704191446304321, - "learning_rate": 8.407537688442212e-05, - "loss": 5.7752, - "step": 16350 - }, - { - "epoch": 8.527249022164277, - "grad_norm": 1.3813397884368896, - "learning_rate": 8.407437185929648e-05, - "loss": 5.7721, - "step": 16351 - }, - { - "epoch": 8.527770534550196, - "grad_norm": 1.4107345342636108, - "learning_rate": 8.407336683417086e-05, - "loss": 5.2107, - "step": 16352 - }, - { - "epoch": 8.528292046936114, - "grad_norm": 1.4279944896697998, - "learning_rate": 8.407236180904523e-05, - "loss": 5.4633, - "step": 16353 - }, - { - "epoch": 8.528813559322034, - "grad_norm": 1.469723105430603, - "learning_rate": 8.40713567839196e-05, - "loss": 5.5019, - "step": 16354 - }, - { - "epoch": 8.529335071707953, - "grad_norm": 1.328547477722168, - "learning_rate": 8.407035175879398e-05, - "loss": 5.6764, - "step": 16355 - }, - { - "epoch": 8.529856584093872, - "grad_norm": 1.3986341953277588, - "learning_rate": 8.406934673366835e-05, - "loss": 5.6489, - "step": 16356 - }, - { - "epoch": 8.530378096479792, - "grad_norm": 1.4318324327468872, - "learning_rate": 8.406834170854272e-05, - "loss": 4.8899, - "step": 16357 - }, - { - "epoch": 8.53089960886571, - "grad_norm": 1.440152883529663, - "learning_rate": 8.406733668341709e-05, - "loss": 5.478, - "step": 16358 - }, - { - "epoch": 8.531421121251629, - "grad_norm": 1.4767831563949585, - "learning_rate": 8.406633165829147e-05, - "loss": 5.5326, - "step": 16359 - }, - { - "epoch": 8.531942633637549, - "grad_norm": 1.2952995300292969, - "learning_rate": 8.406532663316583e-05, - "loss": 5.9321, - "step": 16360 - }, - { - "epoch": 8.532464146023468, - "grad_norm": 1.4433907270431519, - "learning_rate": 8.406432160804021e-05, - "loss": 5.2721, - "step": 16361 - }, - { - "epoch": 8.532985658409387, - "grad_norm": 1.444433569908142, - "learning_rate": 8.406331658291457e-05, - "loss": 5.6761, - "step": 16362 - }, - { - "epoch": 8.533507170795307, - "grad_norm": 1.4009400606155396, - "learning_rate": 8.406231155778895e-05, - "loss": 5.1576, - "step": 16363 - }, - { - "epoch": 8.534028683181226, - "grad_norm": 1.5646318197250366, - "learning_rate": 8.406130653266331e-05, - "loss": 5.7697, - "step": 16364 - }, - { - "epoch": 8.534550195567144, - "grad_norm": 1.6418472528457642, - "learning_rate": 8.406030150753769e-05, - "loss": 5.3607, - "step": 16365 - }, - { - "epoch": 8.535071707953064, - "grad_norm": 1.4822769165039062, - "learning_rate": 8.405929648241207e-05, - "loss": 5.6217, - "step": 16366 - }, - { - "epoch": 8.535593220338983, - "grad_norm": 1.5673905611038208, - "learning_rate": 8.405829145728643e-05, - "loss": 4.9887, - "step": 16367 - }, - { - "epoch": 8.536114732724903, - "grad_norm": 1.3535528182983398, - "learning_rate": 8.405728643216081e-05, - "loss": 5.4071, - "step": 16368 - }, - { - "epoch": 8.536636245110822, - "grad_norm": 1.4707698822021484, - "learning_rate": 8.405628140703518e-05, - "loss": 5.6665, - "step": 16369 - }, - { - "epoch": 8.53715775749674, - "grad_norm": 1.6002399921417236, - "learning_rate": 8.405527638190955e-05, - "loss": 5.7226, - "step": 16370 - }, - { - "epoch": 8.53767926988266, - "grad_norm": 1.4739612340927124, - "learning_rate": 8.405427135678392e-05, - "loss": 5.3693, - "step": 16371 - }, - { - "epoch": 8.538200782268579, - "grad_norm": 1.4879001379013062, - "learning_rate": 8.40532663316583e-05, - "loss": 5.4926, - "step": 16372 - }, - { - "epoch": 8.538722294654498, - "grad_norm": 1.5028101205825806, - "learning_rate": 8.405226130653266e-05, - "loss": 5.3624, - "step": 16373 - }, - { - "epoch": 8.539243807040418, - "grad_norm": 1.3963958024978638, - "learning_rate": 8.405125628140704e-05, - "loss": 5.7082, - "step": 16374 - }, - { - "epoch": 8.539765319426337, - "grad_norm": 1.4851833581924438, - "learning_rate": 8.405025125628142e-05, - "loss": 5.1404, - "step": 16375 - }, - { - "epoch": 8.540286831812256, - "grad_norm": 1.4073140621185303, - "learning_rate": 8.40492462311558e-05, - "loss": 5.7296, - "step": 16376 - }, - { - "epoch": 8.540808344198174, - "grad_norm": 1.3304623365402222, - "learning_rate": 8.404824120603016e-05, - "loss": 6.0101, - "step": 16377 - }, - { - "epoch": 8.541329856584094, - "grad_norm": 1.4013386964797974, - "learning_rate": 8.404723618090454e-05, - "loss": 5.6639, - "step": 16378 - }, - { - "epoch": 8.541851368970013, - "grad_norm": 1.3646821975708008, - "learning_rate": 8.40462311557789e-05, - "loss": 5.5512, - "step": 16379 - }, - { - "epoch": 8.542372881355933, - "grad_norm": 1.4587630033493042, - "learning_rate": 8.404522613065326e-05, - "loss": 5.4475, - "step": 16380 - }, - { - "epoch": 8.542894393741852, - "grad_norm": 1.4335111379623413, - "learning_rate": 8.404422110552764e-05, - "loss": 5.875, - "step": 16381 - }, - { - "epoch": 8.54341590612777, - "grad_norm": 1.3422613143920898, - "learning_rate": 8.4043216080402e-05, - "loss": 5.8181, - "step": 16382 - }, - { - "epoch": 8.54393741851369, - "grad_norm": 1.4235994815826416, - "learning_rate": 8.404221105527638e-05, - "loss": 5.6201, - "step": 16383 - }, - { - "epoch": 8.544458930899609, - "grad_norm": 1.339044213294983, - "learning_rate": 8.404120603015075e-05, - "loss": 5.6226, - "step": 16384 - }, - { - "epoch": 8.544980443285528, - "grad_norm": 1.3962174654006958, - "learning_rate": 8.404020100502513e-05, - "loss": 5.8274, - "step": 16385 - }, - { - "epoch": 8.545501955671448, - "grad_norm": 1.3991062641143799, - "learning_rate": 8.40391959798995e-05, - "loss": 5.8344, - "step": 16386 - }, - { - "epoch": 8.546023468057367, - "grad_norm": 1.4849735498428345, - "learning_rate": 8.403819095477388e-05, - "loss": 5.4119, - "step": 16387 - }, - { - "epoch": 8.546544980443285, - "grad_norm": 1.3055260181427002, - "learning_rate": 8.403718592964825e-05, - "loss": 5.7153, - "step": 16388 - }, - { - "epoch": 8.547066492829204, - "grad_norm": 1.406346082687378, - "learning_rate": 8.403618090452262e-05, - "loss": 5.215, - "step": 16389 - }, - { - "epoch": 8.547588005215124, - "grad_norm": 1.384652853012085, - "learning_rate": 8.403517587939699e-05, - "loss": 5.7013, - "step": 16390 - }, - { - "epoch": 8.548109517601043, - "grad_norm": 1.4237455129623413, - "learning_rate": 8.403417085427137e-05, - "loss": 5.6436, - "step": 16391 - }, - { - "epoch": 8.548631029986963, - "grad_norm": 1.447649359703064, - "learning_rate": 8.403316582914573e-05, - "loss": 4.9642, - "step": 16392 - }, - { - "epoch": 8.549152542372882, - "grad_norm": 1.4991704225540161, - "learning_rate": 8.40321608040201e-05, - "loss": 5.1046, - "step": 16393 - }, - { - "epoch": 8.5496740547588, - "grad_norm": 1.4957998991012573, - "learning_rate": 8.403115577889447e-05, - "loss": 5.1178, - "step": 16394 - }, - { - "epoch": 8.55019556714472, - "grad_norm": 1.3411298990249634, - "learning_rate": 8.403015075376885e-05, - "loss": 5.8324, - "step": 16395 - }, - { - "epoch": 8.550717079530639, - "grad_norm": 1.338019609451294, - "learning_rate": 8.402914572864323e-05, - "loss": 5.5814, - "step": 16396 - }, - { - "epoch": 8.551238591916558, - "grad_norm": 1.4990488290786743, - "learning_rate": 8.402814070351759e-05, - "loss": 5.1198, - "step": 16397 - }, - { - "epoch": 8.551760104302478, - "grad_norm": 1.44180428981781, - "learning_rate": 8.402713567839197e-05, - "loss": 5.8572, - "step": 16398 - }, - { - "epoch": 8.552281616688397, - "grad_norm": 1.4442390203475952, - "learning_rate": 8.402613065326633e-05, - "loss": 5.3527, - "step": 16399 - }, - { - "epoch": 8.552803129074315, - "grad_norm": 1.495782494544983, - "learning_rate": 8.402512562814071e-05, - "loss": 5.7837, - "step": 16400 - }, - { - "epoch": 8.553324641460234, - "grad_norm": 1.3417143821716309, - "learning_rate": 8.402412060301508e-05, - "loss": 5.8992, - "step": 16401 - }, - { - "epoch": 8.553846153846154, - "grad_norm": 1.4816838502883911, - "learning_rate": 8.402311557788945e-05, - "loss": 5.7034, - "step": 16402 - }, - { - "epoch": 8.554367666232073, - "grad_norm": 1.5080004930496216, - "learning_rate": 8.402211055276382e-05, - "loss": 5.4601, - "step": 16403 - }, - { - "epoch": 8.554889178617993, - "grad_norm": 1.4068087339401245, - "learning_rate": 8.40211055276382e-05, - "loss": 5.8374, - "step": 16404 - }, - { - "epoch": 8.555410691003912, - "grad_norm": 1.4894593954086304, - "learning_rate": 8.402010050251256e-05, - "loss": 5.6616, - "step": 16405 - }, - { - "epoch": 8.55593220338983, - "grad_norm": 1.3798271417617798, - "learning_rate": 8.401909547738694e-05, - "loss": 5.8787, - "step": 16406 - }, - { - "epoch": 8.55645371577575, - "grad_norm": 1.3827537298202515, - "learning_rate": 8.401809045226132e-05, - "loss": 5.6701, - "step": 16407 - }, - { - "epoch": 8.556975228161669, - "grad_norm": 1.3991239070892334, - "learning_rate": 8.401708542713568e-05, - "loss": 5.4336, - "step": 16408 - }, - { - "epoch": 8.557496740547588, - "grad_norm": 1.4078681468963623, - "learning_rate": 8.401608040201006e-05, - "loss": 5.8665, - "step": 16409 - }, - { - "epoch": 8.558018252933508, - "grad_norm": 1.5952829122543335, - "learning_rate": 8.401507537688442e-05, - "loss": 5.5405, - "step": 16410 - }, - { - "epoch": 8.558539765319427, - "grad_norm": 1.4282077550888062, - "learning_rate": 8.40140703517588e-05, - "loss": 5.9755, - "step": 16411 - }, - { - "epoch": 8.559061277705345, - "grad_norm": 1.3922971487045288, - "learning_rate": 8.401306532663316e-05, - "loss": 5.4017, - "step": 16412 - }, - { - "epoch": 8.559582790091264, - "grad_norm": 1.5971148014068604, - "learning_rate": 8.401206030150754e-05, - "loss": 4.9622, - "step": 16413 - }, - { - "epoch": 8.560104302477184, - "grad_norm": 1.4166117906570435, - "learning_rate": 8.401105527638191e-05, - "loss": 5.2486, - "step": 16414 - }, - { - "epoch": 8.560625814863103, - "grad_norm": 1.4766515493392944, - "learning_rate": 8.401005025125628e-05, - "loss": 5.443, - "step": 16415 - }, - { - "epoch": 8.561147327249023, - "grad_norm": 1.3745697736740112, - "learning_rate": 8.400904522613066e-05, - "loss": 4.9973, - "step": 16416 - }, - { - "epoch": 8.561668839634942, - "grad_norm": 1.4124486446380615, - "learning_rate": 8.400804020100504e-05, - "loss": 5.4626, - "step": 16417 - }, - { - "epoch": 8.56219035202086, - "grad_norm": 1.377261996269226, - "learning_rate": 8.40070351758794e-05, - "loss": 5.6449, - "step": 16418 - }, - { - "epoch": 8.56271186440678, - "grad_norm": 1.4113552570343018, - "learning_rate": 8.400603015075378e-05, - "loss": 5.5016, - "step": 16419 - }, - { - "epoch": 8.563233376792699, - "grad_norm": 1.4962824583053589, - "learning_rate": 8.400502512562815e-05, - "loss": 5.4951, - "step": 16420 - }, - { - "epoch": 8.563754889178618, - "grad_norm": 1.4838231801986694, - "learning_rate": 8.400402010050251e-05, - "loss": 5.5482, - "step": 16421 - }, - { - "epoch": 8.564276401564538, - "grad_norm": 1.3402056694030762, - "learning_rate": 8.400301507537689e-05, - "loss": 5.8117, - "step": 16422 - }, - { - "epoch": 8.564797913950457, - "grad_norm": 1.440248727798462, - "learning_rate": 8.400201005025125e-05, - "loss": 5.5855, - "step": 16423 - }, - { - "epoch": 8.565319426336375, - "grad_norm": 1.4750702381134033, - "learning_rate": 8.400100502512563e-05, - "loss": 5.4166, - "step": 16424 - }, - { - "epoch": 8.565840938722294, - "grad_norm": 1.5666407346725464, - "learning_rate": 8.4e-05, - "loss": 5.2097, - "step": 16425 - }, - { - "epoch": 8.566362451108214, - "grad_norm": 1.4203320741653442, - "learning_rate": 8.399899497487437e-05, - "loss": 5.6418, - "step": 16426 - }, - { - "epoch": 8.566883963494133, - "grad_norm": 1.3683960437774658, - "learning_rate": 8.399798994974875e-05, - "loss": 5.5084, - "step": 16427 - }, - { - "epoch": 8.567405475880053, - "grad_norm": 1.5285090208053589, - "learning_rate": 8.399698492462313e-05, - "loss": 5.5311, - "step": 16428 - }, - { - "epoch": 8.567926988265972, - "grad_norm": 1.4183475971221924, - "learning_rate": 8.399597989949749e-05, - "loss": 5.4719, - "step": 16429 - }, - { - "epoch": 8.56844850065189, - "grad_norm": 1.4531680345535278, - "learning_rate": 8.399497487437187e-05, - "loss": 5.5417, - "step": 16430 - }, - { - "epoch": 8.56897001303781, - "grad_norm": 1.42386794090271, - "learning_rate": 8.399396984924624e-05, - "loss": 5.4014, - "step": 16431 - }, - { - "epoch": 8.569491525423729, - "grad_norm": 1.4993036985397339, - "learning_rate": 8.399296482412061e-05, - "loss": 5.2145, - "step": 16432 - }, - { - "epoch": 8.570013037809648, - "grad_norm": 1.4096256494522095, - "learning_rate": 8.399195979899498e-05, - "loss": 5.9852, - "step": 16433 - }, - { - "epoch": 8.570534550195568, - "grad_norm": 1.475264549255371, - "learning_rate": 8.399095477386934e-05, - "loss": 5.5759, - "step": 16434 - }, - { - "epoch": 8.571056062581487, - "grad_norm": 1.562327265739441, - "learning_rate": 8.398994974874372e-05, - "loss": 5.5271, - "step": 16435 - }, - { - "epoch": 8.571577574967405, - "grad_norm": 1.4583144187927246, - "learning_rate": 8.398894472361808e-05, - "loss": 5.5762, - "step": 16436 - }, - { - "epoch": 8.572099087353324, - "grad_norm": 1.3762739896774292, - "learning_rate": 8.398793969849246e-05, - "loss": 5.1247, - "step": 16437 - }, - { - "epoch": 8.572620599739244, - "grad_norm": 1.5386998653411865, - "learning_rate": 8.398693467336684e-05, - "loss": 5.5611, - "step": 16438 - }, - { - "epoch": 8.573142112125163, - "grad_norm": 1.4216035604476929, - "learning_rate": 8.398592964824122e-05, - "loss": 6.0088, - "step": 16439 - }, - { - "epoch": 8.573663624511083, - "grad_norm": 1.327891230583191, - "learning_rate": 8.398492462311558e-05, - "loss": 5.7278, - "step": 16440 - }, - { - "epoch": 8.574185136897002, - "grad_norm": 1.3435205221176147, - "learning_rate": 8.398391959798996e-05, - "loss": 5.5298, - "step": 16441 - }, - { - "epoch": 8.57470664928292, - "grad_norm": 1.4175007343292236, - "learning_rate": 8.398291457286432e-05, - "loss": 5.8152, - "step": 16442 - }, - { - "epoch": 8.57522816166884, - "grad_norm": 1.4266506433486938, - "learning_rate": 8.39819095477387e-05, - "loss": 5.549, - "step": 16443 - }, - { - "epoch": 8.575749674054759, - "grad_norm": 1.3712613582611084, - "learning_rate": 8.398090452261307e-05, - "loss": 5.8054, - "step": 16444 - }, - { - "epoch": 8.576271186440678, - "grad_norm": 1.294525384902954, - "learning_rate": 8.397989949748744e-05, - "loss": 5.6886, - "step": 16445 - }, - { - "epoch": 8.576792698826598, - "grad_norm": 1.3315651416778564, - "learning_rate": 8.397889447236181e-05, - "loss": 5.6663, - "step": 16446 - }, - { - "epoch": 8.577314211212517, - "grad_norm": 1.4869273900985718, - "learning_rate": 8.397788944723619e-05, - "loss": 4.5788, - "step": 16447 - }, - { - "epoch": 8.577835723598435, - "grad_norm": 1.3594143390655518, - "learning_rate": 8.397688442211056e-05, - "loss": 5.6146, - "step": 16448 - }, - { - "epoch": 8.578357235984354, - "grad_norm": 1.3804850578308105, - "learning_rate": 8.397587939698493e-05, - "loss": 4.865, - "step": 16449 - }, - { - "epoch": 8.578878748370274, - "grad_norm": 1.5370855331420898, - "learning_rate": 8.39748743718593e-05, - "loss": 5.6898, - "step": 16450 - }, - { - "epoch": 8.579400260756193, - "grad_norm": 1.639402151107788, - "learning_rate": 8.397386934673367e-05, - "loss": 5.1599, - "step": 16451 - }, - { - "epoch": 8.579921773142113, - "grad_norm": 1.4942504167556763, - "learning_rate": 8.397286432160805e-05, - "loss": 5.1904, - "step": 16452 - }, - { - "epoch": 8.58044328552803, - "grad_norm": 1.413206934928894, - "learning_rate": 8.397185929648241e-05, - "loss": 5.4894, - "step": 16453 - }, - { - "epoch": 8.58096479791395, - "grad_norm": 1.6142665147781372, - "learning_rate": 8.397085427135679e-05, - "loss": 5.7246, - "step": 16454 - }, - { - "epoch": 8.58148631029987, - "grad_norm": 1.4179819822311401, - "learning_rate": 8.396984924623115e-05, - "loss": 5.7597, - "step": 16455 - }, - { - "epoch": 8.582007822685789, - "grad_norm": 1.4921118021011353, - "learning_rate": 8.396884422110553e-05, - "loss": 5.2921, - "step": 16456 - }, - { - "epoch": 8.582529335071708, - "grad_norm": 1.3822928667068481, - "learning_rate": 8.39678391959799e-05, - "loss": 5.851, - "step": 16457 - }, - { - "epoch": 8.583050847457628, - "grad_norm": 1.4476432800292969, - "learning_rate": 8.396683417085427e-05, - "loss": 5.8784, - "step": 16458 - }, - { - "epoch": 8.583572359843547, - "grad_norm": 1.359006643295288, - "learning_rate": 8.396582914572865e-05, - "loss": 5.6763, - "step": 16459 - }, - { - "epoch": 8.584093872229465, - "grad_norm": 1.4410808086395264, - "learning_rate": 8.396482412060302e-05, - "loss": 5.5889, - "step": 16460 - }, - { - "epoch": 8.584615384615384, - "grad_norm": 1.4647161960601807, - "learning_rate": 8.39638190954774e-05, - "loss": 5.2294, - "step": 16461 - }, - { - "epoch": 8.585136897001304, - "grad_norm": 1.5320937633514404, - "learning_rate": 8.396281407035176e-05, - "loss": 5.4201, - "step": 16462 - }, - { - "epoch": 8.585658409387223, - "grad_norm": 1.5162944793701172, - "learning_rate": 8.396180904522614e-05, - "loss": 5.6362, - "step": 16463 - }, - { - "epoch": 8.586179921773143, - "grad_norm": 1.4587829113006592, - "learning_rate": 8.39608040201005e-05, - "loss": 5.4805, - "step": 16464 - }, - { - "epoch": 8.58670143415906, - "grad_norm": 1.5023447275161743, - "learning_rate": 8.395979899497488e-05, - "loss": 5.7375, - "step": 16465 - }, - { - "epoch": 8.58722294654498, - "grad_norm": 1.3555914163589478, - "learning_rate": 8.395879396984924e-05, - "loss": 5.5202, - "step": 16466 - }, - { - "epoch": 8.5877444589309, - "grad_norm": 1.4481830596923828, - "learning_rate": 8.395778894472362e-05, - "loss": 5.7434, - "step": 16467 - }, - { - "epoch": 8.588265971316819, - "grad_norm": 1.4838504791259766, - "learning_rate": 8.3956783919598e-05, - "loss": 5.3673, - "step": 16468 - }, - { - "epoch": 8.588787483702738, - "grad_norm": 1.47458815574646, - "learning_rate": 8.395577889447238e-05, - "loss": 5.1476, - "step": 16469 - }, - { - "epoch": 8.589308996088658, - "grad_norm": 1.4146173000335693, - "learning_rate": 8.395477386934674e-05, - "loss": 5.7709, - "step": 16470 - }, - { - "epoch": 8.589830508474577, - "grad_norm": 1.5466421842575073, - "learning_rate": 8.395376884422112e-05, - "loss": 5.3961, - "step": 16471 - }, - { - "epoch": 8.590352020860495, - "grad_norm": 1.449985146522522, - "learning_rate": 8.395276381909548e-05, - "loss": 5.76, - "step": 16472 - }, - { - "epoch": 8.590873533246414, - "grad_norm": 1.4856599569320679, - "learning_rate": 8.395175879396985e-05, - "loss": 5.5067, - "step": 16473 - }, - { - "epoch": 8.591395045632334, - "grad_norm": 1.3422532081604004, - "learning_rate": 8.395075376884422e-05, - "loss": 5.8052, - "step": 16474 - }, - { - "epoch": 8.591916558018253, - "grad_norm": 1.4634809494018555, - "learning_rate": 8.394974874371859e-05, - "loss": 5.4581, - "step": 16475 - }, - { - "epoch": 8.592438070404173, - "grad_norm": 1.4853835105895996, - "learning_rate": 8.394874371859297e-05, - "loss": 5.3119, - "step": 16476 - }, - { - "epoch": 8.59295958279009, - "grad_norm": 1.4685561656951904, - "learning_rate": 8.394773869346733e-05, - "loss": 5.7229, - "step": 16477 - }, - { - "epoch": 8.59348109517601, - "grad_norm": 1.4492119550704956, - "learning_rate": 8.394673366834171e-05, - "loss": 5.7958, - "step": 16478 - }, - { - "epoch": 8.59400260756193, - "grad_norm": 1.34714937210083, - "learning_rate": 8.394572864321609e-05, - "loss": 6.0179, - "step": 16479 - }, - { - "epoch": 8.594524119947849, - "grad_norm": 1.346030592918396, - "learning_rate": 8.394472361809046e-05, - "loss": 5.5636, - "step": 16480 - }, - { - "epoch": 8.595045632333768, - "grad_norm": 1.476633071899414, - "learning_rate": 8.394371859296483e-05, - "loss": 4.9938, - "step": 16481 - }, - { - "epoch": 8.595567144719688, - "grad_norm": 1.4267587661743164, - "learning_rate": 8.39427135678392e-05, - "loss": 5.5708, - "step": 16482 - }, - { - "epoch": 8.596088657105605, - "grad_norm": 1.5029383897781372, - "learning_rate": 8.394170854271357e-05, - "loss": 5.4351, - "step": 16483 - }, - { - "epoch": 8.596610169491525, - "grad_norm": 1.6294695138931274, - "learning_rate": 8.394070351758795e-05, - "loss": 5.5123, - "step": 16484 - }, - { - "epoch": 8.597131681877444, - "grad_norm": 1.4931389093399048, - "learning_rate": 8.393969849246231e-05, - "loss": 5.8067, - "step": 16485 - }, - { - "epoch": 8.597653194263364, - "grad_norm": 1.5537701845169067, - "learning_rate": 8.393869346733668e-05, - "loss": 5.6398, - "step": 16486 - }, - { - "epoch": 8.598174706649283, - "grad_norm": 1.4526457786560059, - "learning_rate": 8.393768844221105e-05, - "loss": 5.6443, - "step": 16487 - }, - { - "epoch": 8.598696219035203, - "grad_norm": 1.6119247674942017, - "learning_rate": 8.393668341708543e-05, - "loss": 5.1907, - "step": 16488 - }, - { - "epoch": 8.59921773142112, - "grad_norm": 1.4856294393539429, - "learning_rate": 8.393567839195981e-05, - "loss": 5.8804, - "step": 16489 - }, - { - "epoch": 8.59973924380704, - "grad_norm": 1.636724829673767, - "learning_rate": 8.393467336683417e-05, - "loss": 4.9076, - "step": 16490 - }, - { - "epoch": 8.60026075619296, - "grad_norm": 1.4764469861984253, - "learning_rate": 8.393366834170855e-05, - "loss": 5.7675, - "step": 16491 - }, - { - "epoch": 8.600782268578879, - "grad_norm": 1.4495655298233032, - "learning_rate": 8.393266331658292e-05, - "loss": 5.1568, - "step": 16492 - }, - { - "epoch": 8.601303780964798, - "grad_norm": 1.519147515296936, - "learning_rate": 8.39316582914573e-05, - "loss": 5.6042, - "step": 16493 - }, - { - "epoch": 8.601825293350718, - "grad_norm": 1.677255392074585, - "learning_rate": 8.393065326633166e-05, - "loss": 5.8338, - "step": 16494 - }, - { - "epoch": 8.602346805736635, - "grad_norm": 1.4560002088546753, - "learning_rate": 8.392964824120604e-05, - "loss": 5.6589, - "step": 16495 - }, - { - "epoch": 8.602868318122555, - "grad_norm": 1.3913286924362183, - "learning_rate": 8.39286432160804e-05, - "loss": 5.6062, - "step": 16496 - }, - { - "epoch": 8.603389830508474, - "grad_norm": 1.362370491027832, - "learning_rate": 8.392763819095478e-05, - "loss": 5.8623, - "step": 16497 - }, - { - "epoch": 8.603911342894394, - "grad_norm": 1.344109058380127, - "learning_rate": 8.392663316582914e-05, - "loss": 5.4936, - "step": 16498 - }, - { - "epoch": 8.604432855280313, - "grad_norm": 1.5962419509887695, - "learning_rate": 8.392562814070352e-05, - "loss": 5.4933, - "step": 16499 - }, - { - "epoch": 8.604954367666233, - "grad_norm": 1.5037920475006104, - "learning_rate": 8.39246231155779e-05, - "loss": 5.3841, - "step": 16500 - }, - { - "epoch": 8.60547588005215, - "grad_norm": 1.4102195501327515, - "learning_rate": 8.392361809045226e-05, - "loss": 5.809, - "step": 16501 - }, - { - "epoch": 8.60599739243807, - "grad_norm": 1.4363417625427246, - "learning_rate": 8.392261306532664e-05, - "loss": 5.4381, - "step": 16502 - }, - { - "epoch": 8.60651890482399, - "grad_norm": 1.4897860288619995, - "learning_rate": 8.3921608040201e-05, - "loss": 5.2644, - "step": 16503 - }, - { - "epoch": 8.607040417209909, - "grad_norm": 1.5019948482513428, - "learning_rate": 8.392060301507538e-05, - "loss": 5.1836, - "step": 16504 - }, - { - "epoch": 8.607561929595828, - "grad_norm": 1.5264772176742554, - "learning_rate": 8.391959798994975e-05, - "loss": 5.235, - "step": 16505 - }, - { - "epoch": 8.608083441981748, - "grad_norm": 1.3974666595458984, - "learning_rate": 8.391859296482413e-05, - "loss": 5.6258, - "step": 16506 - }, - { - "epoch": 8.608604954367665, - "grad_norm": 1.443668007850647, - "learning_rate": 8.391758793969849e-05, - "loss": 5.6057, - "step": 16507 - }, - { - "epoch": 8.609126466753585, - "grad_norm": 1.3504314422607422, - "learning_rate": 8.391658291457287e-05, - "loss": 5.7339, - "step": 16508 - }, - { - "epoch": 8.609647979139504, - "grad_norm": 1.3409943580627441, - "learning_rate": 8.391557788944725e-05, - "loss": 5.7428, - "step": 16509 - }, - { - "epoch": 8.610169491525424, - "grad_norm": 1.45543372631073, - "learning_rate": 8.391457286432162e-05, - "loss": 5.6455, - "step": 16510 - }, - { - "epoch": 8.610691003911343, - "grad_norm": 1.395363211631775, - "learning_rate": 8.391356783919599e-05, - "loss": 5.8471, - "step": 16511 - }, - { - "epoch": 8.611212516297263, - "grad_norm": 1.2858837842941284, - "learning_rate": 8.391256281407036e-05, - "loss": 5.4495, - "step": 16512 - }, - { - "epoch": 8.61173402868318, - "grad_norm": 1.366391658782959, - "learning_rate": 8.391155778894473e-05, - "loss": 5.7874, - "step": 16513 - }, - { - "epoch": 8.6122555410691, - "grad_norm": 1.6932330131530762, - "learning_rate": 8.39105527638191e-05, - "loss": 4.9325, - "step": 16514 - }, - { - "epoch": 8.61277705345502, - "grad_norm": 1.3596315383911133, - "learning_rate": 8.390954773869347e-05, - "loss": 5.225, - "step": 16515 - }, - { - "epoch": 8.613298565840939, - "grad_norm": 1.458364486694336, - "learning_rate": 8.390854271356784e-05, - "loss": 5.3605, - "step": 16516 - }, - { - "epoch": 8.613820078226858, - "grad_norm": 1.4696861505508423, - "learning_rate": 8.390753768844221e-05, - "loss": 5.5098, - "step": 16517 - }, - { - "epoch": 8.614341590612778, - "grad_norm": 1.6012495756149292, - "learning_rate": 8.390653266331658e-05, - "loss": 5.4593, - "step": 16518 - }, - { - "epoch": 8.614863102998696, - "grad_norm": 1.3780916929244995, - "learning_rate": 8.390552763819096e-05, - "loss": 5.1492, - "step": 16519 - }, - { - "epoch": 8.615384615384615, - "grad_norm": 1.4615017175674438, - "learning_rate": 8.390452261306533e-05, - "loss": 5.5484, - "step": 16520 - }, - { - "epoch": 8.615906127770534, - "grad_norm": 1.3668862581253052, - "learning_rate": 8.390351758793971e-05, - "loss": 5.5416, - "step": 16521 - }, - { - "epoch": 8.616427640156454, - "grad_norm": 1.4412810802459717, - "learning_rate": 8.390251256281408e-05, - "loss": 5.7227, - "step": 16522 - }, - { - "epoch": 8.616949152542373, - "grad_norm": 1.4808135032653809, - "learning_rate": 8.390150753768845e-05, - "loss": 5.1615, - "step": 16523 - }, - { - "epoch": 8.617470664928293, - "grad_norm": 1.5771206617355347, - "learning_rate": 8.390050251256282e-05, - "loss": 5.713, - "step": 16524 - }, - { - "epoch": 8.61799217731421, - "grad_norm": 1.3992239236831665, - "learning_rate": 8.38994974874372e-05, - "loss": 5.503, - "step": 16525 - }, - { - "epoch": 8.61851368970013, - "grad_norm": 1.4297034740447998, - "learning_rate": 8.389849246231156e-05, - "loss": 5.7046, - "step": 16526 - }, - { - "epoch": 8.61903520208605, - "grad_norm": 1.4618194103240967, - "learning_rate": 8.389748743718592e-05, - "loss": 5.2111, - "step": 16527 - }, - { - "epoch": 8.619556714471969, - "grad_norm": 1.409436821937561, - "learning_rate": 8.38964824120603e-05, - "loss": 5.26, - "step": 16528 - }, - { - "epoch": 8.620078226857888, - "grad_norm": 1.481938123703003, - "learning_rate": 8.389547738693468e-05, - "loss": 5.5337, - "step": 16529 - }, - { - "epoch": 8.620599739243808, - "grad_norm": 1.4171274900436401, - "learning_rate": 8.389447236180906e-05, - "loss": 5.8327, - "step": 16530 - }, - { - "epoch": 8.621121251629726, - "grad_norm": 1.403080940246582, - "learning_rate": 8.389346733668342e-05, - "loss": 5.6899, - "step": 16531 - }, - { - "epoch": 8.621642764015645, - "grad_norm": 1.2920950651168823, - "learning_rate": 8.38924623115578e-05, - "loss": 5.9852, - "step": 16532 - }, - { - "epoch": 8.622164276401564, - "grad_norm": 1.4430574178695679, - "learning_rate": 8.389145728643216e-05, - "loss": 5.5321, - "step": 16533 - }, - { - "epoch": 8.622685788787484, - "grad_norm": 1.2973804473876953, - "learning_rate": 8.389045226130654e-05, - "loss": 5.921, - "step": 16534 - }, - { - "epoch": 8.623207301173403, - "grad_norm": 1.6506587266921997, - "learning_rate": 8.38894472361809e-05, - "loss": 5.7581, - "step": 16535 - }, - { - "epoch": 8.623728813559323, - "grad_norm": 1.521450161933899, - "learning_rate": 8.388844221105528e-05, - "loss": 5.4354, - "step": 16536 - }, - { - "epoch": 8.62425032594524, - "grad_norm": 1.596073865890503, - "learning_rate": 8.388743718592965e-05, - "loss": 5.5277, - "step": 16537 - }, - { - "epoch": 8.62477183833116, - "grad_norm": 1.5043069124221802, - "learning_rate": 8.388643216080403e-05, - "loss": 5.2578, - "step": 16538 - }, - { - "epoch": 8.62529335071708, - "grad_norm": 1.3466498851776123, - "learning_rate": 8.388542713567839e-05, - "loss": 5.6852, - "step": 16539 - }, - { - "epoch": 8.625814863102999, - "grad_norm": 1.5276778936386108, - "learning_rate": 8.388442211055277e-05, - "loss": 5.5039, - "step": 16540 - }, - { - "epoch": 8.626336375488918, - "grad_norm": 1.50129234790802, - "learning_rate": 8.388341708542715e-05, - "loss": 5.4158, - "step": 16541 - }, - { - "epoch": 8.626857887874838, - "grad_norm": 1.472386121749878, - "learning_rate": 8.388241206030151e-05, - "loss": 5.3504, - "step": 16542 - }, - { - "epoch": 8.627379400260756, - "grad_norm": 1.4357982873916626, - "learning_rate": 8.388140703517589e-05, - "loss": 5.705, - "step": 16543 - }, - { - "epoch": 8.627900912646675, - "grad_norm": 1.3610848188400269, - "learning_rate": 8.388040201005025e-05, - "loss": 5.5192, - "step": 16544 - }, - { - "epoch": 8.628422425032594, - "grad_norm": 1.3610913753509521, - "learning_rate": 8.387939698492463e-05, - "loss": 5.8306, - "step": 16545 - }, - { - "epoch": 8.628943937418514, - "grad_norm": 1.36136794090271, - "learning_rate": 8.3878391959799e-05, - "loss": 5.6766, - "step": 16546 - }, - { - "epoch": 8.629465449804433, - "grad_norm": 1.440032958984375, - "learning_rate": 8.387738693467337e-05, - "loss": 5.4307, - "step": 16547 - }, - { - "epoch": 8.629986962190351, - "grad_norm": 1.3191920518875122, - "learning_rate": 8.387638190954774e-05, - "loss": 5.6834, - "step": 16548 - }, - { - "epoch": 8.63050847457627, - "grad_norm": 1.4927971363067627, - "learning_rate": 8.387537688442211e-05, - "loss": 5.5975, - "step": 16549 - }, - { - "epoch": 8.63102998696219, - "grad_norm": 1.393389105796814, - "learning_rate": 8.387437185929649e-05, - "loss": 5.3358, - "step": 16550 - }, - { - "epoch": 8.63155149934811, - "grad_norm": 1.4889214038848877, - "learning_rate": 8.387336683417087e-05, - "loss": 5.0777, - "step": 16551 - }, - { - "epoch": 8.632073011734029, - "grad_norm": 1.4875729084014893, - "learning_rate": 8.387236180904523e-05, - "loss": 5.3896, - "step": 16552 - }, - { - "epoch": 8.632594524119948, - "grad_norm": 1.5702426433563232, - "learning_rate": 8.38713567839196e-05, - "loss": 5.4024, - "step": 16553 - }, - { - "epoch": 8.633116036505868, - "grad_norm": 1.4722286462783813, - "learning_rate": 8.387035175879398e-05, - "loss": 5.4165, - "step": 16554 - }, - { - "epoch": 8.633637548891786, - "grad_norm": 1.5269958972930908, - "learning_rate": 8.386934673366834e-05, - "loss": 5.2223, - "step": 16555 - }, - { - "epoch": 8.634159061277705, - "grad_norm": 1.428118109703064, - "learning_rate": 8.386834170854272e-05, - "loss": 5.4126, - "step": 16556 - }, - { - "epoch": 8.634680573663625, - "grad_norm": 1.4544731378555298, - "learning_rate": 8.386733668341708e-05, - "loss": 5.2815, - "step": 16557 - }, - { - "epoch": 8.635202086049544, - "grad_norm": 1.3946664333343506, - "learning_rate": 8.386633165829146e-05, - "loss": 5.5954, - "step": 16558 - }, - { - "epoch": 8.635723598435463, - "grad_norm": 1.3931223154067993, - "learning_rate": 8.386532663316582e-05, - "loss": 5.5907, - "step": 16559 - }, - { - "epoch": 8.636245110821381, - "grad_norm": 1.459550142288208, - "learning_rate": 8.38643216080402e-05, - "loss": 5.5241, - "step": 16560 - }, - { - "epoch": 8.6367666232073, - "grad_norm": 1.4300659894943237, - "learning_rate": 8.386331658291458e-05, - "loss": 5.8512, - "step": 16561 - }, - { - "epoch": 8.63728813559322, - "grad_norm": 1.5030372142791748, - "learning_rate": 8.386231155778896e-05, - "loss": 5.6323, - "step": 16562 - }, - { - "epoch": 8.63780964797914, - "grad_norm": 1.405116319656372, - "learning_rate": 8.386130653266332e-05, - "loss": 5.3041, - "step": 16563 - }, - { - "epoch": 8.638331160365059, - "grad_norm": 1.4750925302505493, - "learning_rate": 8.38603015075377e-05, - "loss": 4.863, - "step": 16564 - }, - { - "epoch": 8.638852672750978, - "grad_norm": 1.569077968597412, - "learning_rate": 8.385929648241206e-05, - "loss": 4.8725, - "step": 16565 - }, - { - "epoch": 8.639374185136898, - "grad_norm": 1.5400176048278809, - "learning_rate": 8.385829145728643e-05, - "loss": 4.9517, - "step": 16566 - }, - { - "epoch": 8.639895697522816, - "grad_norm": 1.4053049087524414, - "learning_rate": 8.38572864321608e-05, - "loss": 5.6578, - "step": 16567 - }, - { - "epoch": 8.640417209908735, - "grad_norm": 1.3419307470321655, - "learning_rate": 8.385628140703517e-05, - "loss": 5.7574, - "step": 16568 - }, - { - "epoch": 8.640938722294655, - "grad_norm": 1.4492665529251099, - "learning_rate": 8.385527638190955e-05, - "loss": 5.1601, - "step": 16569 - }, - { - "epoch": 8.641460234680574, - "grad_norm": 1.487099051475525, - "learning_rate": 8.385427135678393e-05, - "loss": 5.2153, - "step": 16570 - }, - { - "epoch": 8.641981747066493, - "grad_norm": 1.4220266342163086, - "learning_rate": 8.38532663316583e-05, - "loss": 5.7231, - "step": 16571 - }, - { - "epoch": 8.642503259452411, - "grad_norm": 1.453260898590088, - "learning_rate": 8.385226130653267e-05, - "loss": 5.7866, - "step": 16572 - }, - { - "epoch": 8.64302477183833, - "grad_norm": 1.3707654476165771, - "learning_rate": 8.385125628140705e-05, - "loss": 5.7274, - "step": 16573 - }, - { - "epoch": 8.64354628422425, - "grad_norm": 1.487196445465088, - "learning_rate": 8.385025125628141e-05, - "loss": 5.173, - "step": 16574 - }, - { - "epoch": 8.64406779661017, - "grad_norm": 1.6495578289031982, - "learning_rate": 8.384924623115579e-05, - "loss": 5.1331, - "step": 16575 - }, - { - "epoch": 8.644589308996089, - "grad_norm": 1.3457738161087036, - "learning_rate": 8.384824120603015e-05, - "loss": 5.737, - "step": 16576 - }, - { - "epoch": 8.645110821382008, - "grad_norm": 1.4844590425491333, - "learning_rate": 8.384723618090453e-05, - "loss": 5.5775, - "step": 16577 - }, - { - "epoch": 8.645632333767926, - "grad_norm": 1.3897250890731812, - "learning_rate": 8.38462311557789e-05, - "loss": 5.5134, - "step": 16578 - }, - { - "epoch": 8.646153846153846, - "grad_norm": 1.4173873662948608, - "learning_rate": 8.384522613065326e-05, - "loss": 5.6111, - "step": 16579 - }, - { - "epoch": 8.646675358539765, - "grad_norm": 1.5056480169296265, - "learning_rate": 8.384422110552764e-05, - "loss": 5.6111, - "step": 16580 - }, - { - "epoch": 8.647196870925685, - "grad_norm": 1.397499442100525, - "learning_rate": 8.384321608040201e-05, - "loss": 5.6046, - "step": 16581 - }, - { - "epoch": 8.647718383311604, - "grad_norm": 1.5398305654525757, - "learning_rate": 8.384221105527639e-05, - "loss": 5.4465, - "step": 16582 - }, - { - "epoch": 8.648239895697523, - "grad_norm": 1.4561458826065063, - "learning_rate": 8.384120603015076e-05, - "loss": 5.5711, - "step": 16583 - }, - { - "epoch": 8.648761408083441, - "grad_norm": 1.5548501014709473, - "learning_rate": 8.384020100502513e-05, - "loss": 5.0746, - "step": 16584 - }, - { - "epoch": 8.64928292046936, - "grad_norm": 1.4613229036331177, - "learning_rate": 8.38391959798995e-05, - "loss": 5.7759, - "step": 16585 - }, - { - "epoch": 8.64980443285528, - "grad_norm": 1.5699530839920044, - "learning_rate": 8.383819095477388e-05, - "loss": 5.2883, - "step": 16586 - }, - { - "epoch": 8.6503259452412, - "grad_norm": 1.3970142602920532, - "learning_rate": 8.383718592964824e-05, - "loss": 5.6803, - "step": 16587 - }, - { - "epoch": 8.650847457627119, - "grad_norm": 1.49769127368927, - "learning_rate": 8.383618090452262e-05, - "loss": 5.1077, - "step": 16588 - }, - { - "epoch": 8.651368970013039, - "grad_norm": 1.5174593925476074, - "learning_rate": 8.383517587939698e-05, - "loss": 5.6081, - "step": 16589 - }, - { - "epoch": 8.651890482398956, - "grad_norm": 1.519070029258728, - "learning_rate": 8.383417085427136e-05, - "loss": 5.4573, - "step": 16590 - }, - { - "epoch": 8.652411994784876, - "grad_norm": 1.391111969947815, - "learning_rate": 8.383316582914574e-05, - "loss": 5.6005, - "step": 16591 - }, - { - "epoch": 8.652933507170795, - "grad_norm": 1.52463960647583, - "learning_rate": 8.38321608040201e-05, - "loss": 5.5859, - "step": 16592 - }, - { - "epoch": 8.653455019556715, - "grad_norm": 1.5849878787994385, - "learning_rate": 8.383115577889448e-05, - "loss": 5.3983, - "step": 16593 - }, - { - "epoch": 8.653976531942634, - "grad_norm": 1.430938959121704, - "learning_rate": 8.383015075376885e-05, - "loss": 5.4646, - "step": 16594 - }, - { - "epoch": 8.654498044328554, - "grad_norm": 1.525486946105957, - "learning_rate": 8.382914572864322e-05, - "loss": 5.3287, - "step": 16595 - }, - { - "epoch": 8.655019556714471, - "grad_norm": 1.5679303407669067, - "learning_rate": 8.382814070351759e-05, - "loss": 5.2893, - "step": 16596 - }, - { - "epoch": 8.65554106910039, - "grad_norm": 1.312322735786438, - "learning_rate": 8.382713567839197e-05, - "loss": 5.9517, - "step": 16597 - }, - { - "epoch": 8.65606258148631, - "grad_norm": 1.4227416515350342, - "learning_rate": 8.382613065326633e-05, - "loss": 5.1929, - "step": 16598 - }, - { - "epoch": 8.65658409387223, - "grad_norm": 1.429144263267517, - "learning_rate": 8.382512562814071e-05, - "loss": 5.5503, - "step": 16599 - }, - { - "epoch": 8.657105606258149, - "grad_norm": 1.4796130657196045, - "learning_rate": 8.382412060301507e-05, - "loss": 5.2019, - "step": 16600 - }, - { - "epoch": 8.657627118644069, - "grad_norm": 1.555585503578186, - "learning_rate": 8.382311557788945e-05, - "loss": 5.1949, - "step": 16601 - }, - { - "epoch": 8.658148631029986, - "grad_norm": 1.5434623956680298, - "learning_rate": 8.382211055276383e-05, - "loss": 5.7691, - "step": 16602 - }, - { - "epoch": 8.658670143415906, - "grad_norm": 1.4552699327468872, - "learning_rate": 8.38211055276382e-05, - "loss": 5.5276, - "step": 16603 - }, - { - "epoch": 8.659191655801825, - "grad_norm": 1.494680643081665, - "learning_rate": 8.382010050251257e-05, - "loss": 5.3751, - "step": 16604 - }, - { - "epoch": 8.659713168187745, - "grad_norm": 1.4369823932647705, - "learning_rate": 8.381909547738695e-05, - "loss": 5.6482, - "step": 16605 - }, - { - "epoch": 8.660234680573664, - "grad_norm": 1.360063076019287, - "learning_rate": 8.381809045226131e-05, - "loss": 5.3941, - "step": 16606 - }, - { - "epoch": 8.660756192959584, - "grad_norm": 1.672528862953186, - "learning_rate": 8.381708542713568e-05, - "loss": 4.9607, - "step": 16607 - }, - { - "epoch": 8.661277705345501, - "grad_norm": 1.5254144668579102, - "learning_rate": 8.381608040201005e-05, - "loss": 5.7766, - "step": 16608 - }, - { - "epoch": 8.66179921773142, - "grad_norm": 1.5074207782745361, - "learning_rate": 8.381507537688442e-05, - "loss": 5.329, - "step": 16609 - }, - { - "epoch": 8.66232073011734, - "grad_norm": 1.401104211807251, - "learning_rate": 8.38140703517588e-05, - "loss": 5.6074, - "step": 16610 - }, - { - "epoch": 8.66284224250326, - "grad_norm": 1.4910780191421509, - "learning_rate": 8.381306532663316e-05, - "loss": 5.5213, - "step": 16611 - }, - { - "epoch": 8.663363754889179, - "grad_norm": 1.4465361833572388, - "learning_rate": 8.381206030150754e-05, - "loss": 5.2407, - "step": 16612 - }, - { - "epoch": 8.663885267275099, - "grad_norm": 1.5335185527801514, - "learning_rate": 8.381105527638192e-05, - "loss": 5.018, - "step": 16613 - }, - { - "epoch": 8.664406779661016, - "grad_norm": 1.4419161081314087, - "learning_rate": 8.38100502512563e-05, - "loss": 5.4165, - "step": 16614 - }, - { - "epoch": 8.664928292046936, - "grad_norm": 1.4816707372665405, - "learning_rate": 8.380904522613066e-05, - "loss": 5.7111, - "step": 16615 - }, - { - "epoch": 8.665449804432855, - "grad_norm": 1.4090548753738403, - "learning_rate": 8.380804020100504e-05, - "loss": 5.0344, - "step": 16616 - }, - { - "epoch": 8.665971316818775, - "grad_norm": 1.5145695209503174, - "learning_rate": 8.38070351758794e-05, - "loss": 5.4544, - "step": 16617 - }, - { - "epoch": 8.666492829204694, - "grad_norm": 1.4700844287872314, - "learning_rate": 8.380603015075378e-05, - "loss": 5.3024, - "step": 16618 - }, - { - "epoch": 8.667014341590614, - "grad_norm": 1.4595328569412231, - "learning_rate": 8.380502512562814e-05, - "loss": 5.4788, - "step": 16619 - }, - { - "epoch": 8.667535853976531, - "grad_norm": 1.3690799474716187, - "learning_rate": 8.38040201005025e-05, - "loss": 5.5844, - "step": 16620 - }, - { - "epoch": 8.66805736636245, - "grad_norm": 1.3423594236373901, - "learning_rate": 8.380301507537688e-05, - "loss": 5.4903, - "step": 16621 - }, - { - "epoch": 8.66857887874837, - "grad_norm": 1.449497103691101, - "learning_rate": 8.380201005025126e-05, - "loss": 5.3351, - "step": 16622 - }, - { - "epoch": 8.66910039113429, - "grad_norm": 1.5126217603683472, - "learning_rate": 8.380100502512564e-05, - "loss": 5.219, - "step": 16623 - }, - { - "epoch": 8.66962190352021, - "grad_norm": 1.4892776012420654, - "learning_rate": 8.38e-05, - "loss": 5.1101, - "step": 16624 - }, - { - "epoch": 8.670143415906129, - "grad_norm": 1.4260246753692627, - "learning_rate": 8.379899497487438e-05, - "loss": 5.3634, - "step": 16625 - }, - { - "epoch": 8.670664928292046, - "grad_norm": 1.4476381540298462, - "learning_rate": 8.379798994974875e-05, - "loss": 5.5392, - "step": 16626 - }, - { - "epoch": 8.671186440677966, - "grad_norm": 1.4339954853057861, - "learning_rate": 8.379698492462312e-05, - "loss": 5.4832, - "step": 16627 - }, - { - "epoch": 8.671707953063885, - "grad_norm": 1.371097207069397, - "learning_rate": 8.379597989949749e-05, - "loss": 5.9241, - "step": 16628 - }, - { - "epoch": 8.672229465449805, - "grad_norm": 1.6015836000442505, - "learning_rate": 8.379497487437187e-05, - "loss": 4.8434, - "step": 16629 - }, - { - "epoch": 8.672750977835724, - "grad_norm": 1.5464744567871094, - "learning_rate": 8.379396984924623e-05, - "loss": 5.4228, - "step": 16630 - }, - { - "epoch": 8.673272490221644, - "grad_norm": 1.3944470882415771, - "learning_rate": 8.379296482412061e-05, - "loss": 5.4343, - "step": 16631 - }, - { - "epoch": 8.673794002607561, - "grad_norm": 1.5173838138580322, - "learning_rate": 8.379195979899497e-05, - "loss": 5.1319, - "step": 16632 - }, - { - "epoch": 8.67431551499348, - "grad_norm": 1.4492597579956055, - "learning_rate": 8.379095477386935e-05, - "loss": 5.2887, - "step": 16633 - }, - { - "epoch": 8.6748370273794, - "grad_norm": 1.365016222000122, - "learning_rate": 8.378994974874373e-05, - "loss": 5.6963, - "step": 16634 - }, - { - "epoch": 8.67535853976532, - "grad_norm": 1.3998199701309204, - "learning_rate": 8.378894472361809e-05, - "loss": 5.6116, - "step": 16635 - }, - { - "epoch": 8.67588005215124, - "grad_norm": 1.4436501264572144, - "learning_rate": 8.378793969849247e-05, - "loss": 5.2586, - "step": 16636 - }, - { - "epoch": 8.676401564537159, - "grad_norm": 1.3133180141448975, - "learning_rate": 8.378693467336683e-05, - "loss": 5.705, - "step": 16637 - }, - { - "epoch": 8.676923076923076, - "grad_norm": 1.3691469430923462, - "learning_rate": 8.378592964824121e-05, - "loss": 5.7805, - "step": 16638 - }, - { - "epoch": 8.677444589308996, - "grad_norm": 1.362877368927002, - "learning_rate": 8.378492462311558e-05, - "loss": 5.6339, - "step": 16639 - }, - { - "epoch": 8.677966101694915, - "grad_norm": 1.380416989326477, - "learning_rate": 8.378391959798995e-05, - "loss": 5.6602, - "step": 16640 - }, - { - "epoch": 8.678487614080835, - "grad_norm": 1.470716953277588, - "learning_rate": 8.378291457286432e-05, - "loss": 5.5072, - "step": 16641 - }, - { - "epoch": 8.679009126466754, - "grad_norm": 1.3453530073165894, - "learning_rate": 8.37819095477387e-05, - "loss": 5.8824, - "step": 16642 - }, - { - "epoch": 8.679530638852672, - "grad_norm": 1.406777024269104, - "learning_rate": 8.378090452261307e-05, - "loss": 5.8271, - "step": 16643 - }, - { - "epoch": 8.680052151238591, - "grad_norm": 1.4406081438064575, - "learning_rate": 8.377989949748745e-05, - "loss": 5.559, - "step": 16644 - }, - { - "epoch": 8.68057366362451, - "grad_norm": 1.3449913263320923, - "learning_rate": 8.377889447236182e-05, - "loss": 5.802, - "step": 16645 - }, - { - "epoch": 8.68109517601043, - "grad_norm": 1.348537802696228, - "learning_rate": 8.377788944723618e-05, - "loss": 5.8322, - "step": 16646 - }, - { - "epoch": 8.68161668839635, - "grad_norm": 1.546535611152649, - "learning_rate": 8.377688442211056e-05, - "loss": 5.4605, - "step": 16647 - }, - { - "epoch": 8.68213820078227, - "grad_norm": 1.3823496103286743, - "learning_rate": 8.377587939698492e-05, - "loss": 5.834, - "step": 16648 - }, - { - "epoch": 8.682659713168189, - "grad_norm": 1.4969735145568848, - "learning_rate": 8.37748743718593e-05, - "loss": 5.7348, - "step": 16649 - }, - { - "epoch": 8.683181225554106, - "grad_norm": 1.5523357391357422, - "learning_rate": 8.377386934673366e-05, - "loss": 5.3828, - "step": 16650 - }, - { - "epoch": 8.683702737940026, - "grad_norm": 1.404862403869629, - "learning_rate": 8.377286432160804e-05, - "loss": 5.4521, - "step": 16651 - }, - { - "epoch": 8.684224250325945, - "grad_norm": 1.3687931299209595, - "learning_rate": 8.377185929648241e-05, - "loss": 5.8628, - "step": 16652 - }, - { - "epoch": 8.684745762711865, - "grad_norm": 1.393282413482666, - "learning_rate": 8.377085427135678e-05, - "loss": 4.9944, - "step": 16653 - }, - { - "epoch": 8.685267275097784, - "grad_norm": 1.4880640506744385, - "learning_rate": 8.376984924623116e-05, - "loss": 5.2984, - "step": 16654 - }, - { - "epoch": 8.685788787483702, - "grad_norm": 1.565285563468933, - "learning_rate": 8.376884422110554e-05, - "loss": 4.7236, - "step": 16655 - }, - { - "epoch": 8.686310299869621, - "grad_norm": 1.3942184448242188, - "learning_rate": 8.37678391959799e-05, - "loss": 5.6128, - "step": 16656 - }, - { - "epoch": 8.68683181225554, - "grad_norm": 1.3721123933792114, - "learning_rate": 8.376683417085428e-05, - "loss": 5.9228, - "step": 16657 - }, - { - "epoch": 8.68735332464146, - "grad_norm": 1.5190467834472656, - "learning_rate": 8.376582914572865e-05, - "loss": 5.3673, - "step": 16658 - }, - { - "epoch": 8.68787483702738, - "grad_norm": 1.3845018148422241, - "learning_rate": 8.376482412060301e-05, - "loss": 5.6038, - "step": 16659 - }, - { - "epoch": 8.6883963494133, - "grad_norm": 1.3710434436798096, - "learning_rate": 8.376381909547739e-05, - "loss": 4.7887, - "step": 16660 - }, - { - "epoch": 8.688917861799219, - "grad_norm": 1.3422789573669434, - "learning_rate": 8.376281407035175e-05, - "loss": 5.8356, - "step": 16661 - }, - { - "epoch": 8.689439374185136, - "grad_norm": 1.3422796726226807, - "learning_rate": 8.376180904522613e-05, - "loss": 5.8807, - "step": 16662 - }, - { - "epoch": 8.689960886571056, - "grad_norm": 1.3242710828781128, - "learning_rate": 8.376080402010051e-05, - "loss": 5.7034, - "step": 16663 - }, - { - "epoch": 8.690482398956975, - "grad_norm": 1.6194626092910767, - "learning_rate": 8.375979899497489e-05, - "loss": 5.4305, - "step": 16664 - }, - { - "epoch": 8.691003911342895, - "grad_norm": 1.565456509590149, - "learning_rate": 8.375879396984925e-05, - "loss": 5.7412, - "step": 16665 - }, - { - "epoch": 8.691525423728814, - "grad_norm": 1.3932210206985474, - "learning_rate": 8.375778894472363e-05, - "loss": 5.587, - "step": 16666 - }, - { - "epoch": 8.692046936114732, - "grad_norm": 1.6675975322723389, - "learning_rate": 8.375678391959799e-05, - "loss": 5.4927, - "step": 16667 - }, - { - "epoch": 8.692568448500651, - "grad_norm": 1.529707431793213, - "learning_rate": 8.375577889447237e-05, - "loss": 4.7833, - "step": 16668 - }, - { - "epoch": 8.69308996088657, - "grad_norm": 1.6564233303070068, - "learning_rate": 8.375477386934674e-05, - "loss": 5.2622, - "step": 16669 - }, - { - "epoch": 8.69361147327249, - "grad_norm": 1.4605923891067505, - "learning_rate": 8.375376884422111e-05, - "loss": 5.4845, - "step": 16670 - }, - { - "epoch": 8.69413298565841, - "grad_norm": 1.3680305480957031, - "learning_rate": 8.375276381909548e-05, - "loss": 5.8573, - "step": 16671 - }, - { - "epoch": 8.69465449804433, - "grad_norm": 1.4333151578903198, - "learning_rate": 8.375175879396984e-05, - "loss": 6.0871, - "step": 16672 - }, - { - "epoch": 8.695176010430247, - "grad_norm": 1.4516117572784424, - "learning_rate": 8.375075376884422e-05, - "loss": 5.1747, - "step": 16673 - }, - { - "epoch": 8.695697522816166, - "grad_norm": 1.4465407133102417, - "learning_rate": 8.37497487437186e-05, - "loss": 5.7295, - "step": 16674 - }, - { - "epoch": 8.696219035202086, - "grad_norm": 1.3650143146514893, - "learning_rate": 8.374874371859298e-05, - "loss": 5.5376, - "step": 16675 - }, - { - "epoch": 8.696740547588005, - "grad_norm": 1.3408390283584595, - "learning_rate": 8.374773869346734e-05, - "loss": 5.858, - "step": 16676 - }, - { - "epoch": 8.697262059973925, - "grad_norm": 1.3400743007659912, - "learning_rate": 8.374673366834172e-05, - "loss": 5.7518, - "step": 16677 - }, - { - "epoch": 8.697783572359844, - "grad_norm": 1.3094632625579834, - "learning_rate": 8.374572864321608e-05, - "loss": 5.697, - "step": 16678 - }, - { - "epoch": 8.698305084745762, - "grad_norm": 1.4265309572219849, - "learning_rate": 8.374472361809046e-05, - "loss": 5.5808, - "step": 16679 - }, - { - "epoch": 8.698826597131681, - "grad_norm": 1.5020571947097778, - "learning_rate": 8.374371859296482e-05, - "loss": 5.305, - "step": 16680 - }, - { - "epoch": 8.6993481095176, - "grad_norm": 1.4864927530288696, - "learning_rate": 8.37427135678392e-05, - "loss": 5.3129, - "step": 16681 - }, - { - "epoch": 8.69986962190352, - "grad_norm": 1.3416062593460083, - "learning_rate": 8.374170854271357e-05, - "loss": 5.7326, - "step": 16682 - }, - { - "epoch": 8.70039113428944, - "grad_norm": 1.530544400215149, - "learning_rate": 8.374070351758794e-05, - "loss": 5.1403, - "step": 16683 - }, - { - "epoch": 8.70091264667536, - "grad_norm": 1.2932556867599487, - "learning_rate": 8.373969849246232e-05, - "loss": 5.6282, - "step": 16684 - }, - { - "epoch": 8.701434159061277, - "grad_norm": 1.4915558099746704, - "learning_rate": 8.37386934673367e-05, - "loss": 5.6674, - "step": 16685 - }, - { - "epoch": 8.701955671447196, - "grad_norm": 1.4061027765274048, - "learning_rate": 8.373768844221106e-05, - "loss": 5.6555, - "step": 16686 - }, - { - "epoch": 8.702477183833116, - "grad_norm": 1.2282124757766724, - "learning_rate": 8.373668341708543e-05, - "loss": 5.1559, - "step": 16687 - }, - { - "epoch": 8.702998696219035, - "grad_norm": 1.5652388334274292, - "learning_rate": 8.37356783919598e-05, - "loss": 5.4897, - "step": 16688 - }, - { - "epoch": 8.703520208604955, - "grad_norm": 1.5356403589248657, - "learning_rate": 8.373467336683417e-05, - "loss": 5.7543, - "step": 16689 - }, - { - "epoch": 8.704041720990874, - "grad_norm": 1.4982415437698364, - "learning_rate": 8.373366834170855e-05, - "loss": 5.8602, - "step": 16690 - }, - { - "epoch": 8.704563233376792, - "grad_norm": 1.5782814025878906, - "learning_rate": 8.373266331658291e-05, - "loss": 5.4121, - "step": 16691 - }, - { - "epoch": 8.705084745762711, - "grad_norm": 1.6122368574142456, - "learning_rate": 8.373165829145729e-05, - "loss": 5.1218, - "step": 16692 - }, - { - "epoch": 8.70560625814863, - "grad_norm": 1.4120765924453735, - "learning_rate": 8.373065326633165e-05, - "loss": 5.5466, - "step": 16693 - }, - { - "epoch": 8.70612777053455, - "grad_norm": 1.5860767364501953, - "learning_rate": 8.372964824120603e-05, - "loss": 5.7217, - "step": 16694 - }, - { - "epoch": 8.70664928292047, - "grad_norm": 1.471635341644287, - "learning_rate": 8.372864321608041e-05, - "loss": 5.7475, - "step": 16695 - }, - { - "epoch": 8.70717079530639, - "grad_norm": 1.5463684797286987, - "learning_rate": 8.372763819095479e-05, - "loss": 5.4705, - "step": 16696 - }, - { - "epoch": 8.707692307692307, - "grad_norm": 1.5364494323730469, - "learning_rate": 8.372663316582915e-05, - "loss": 5.5806, - "step": 16697 - }, - { - "epoch": 8.708213820078226, - "grad_norm": 1.5216912031173706, - "learning_rate": 8.372562814070353e-05, - "loss": 5.293, - "step": 16698 - }, - { - "epoch": 8.708735332464146, - "grad_norm": 1.46867835521698, - "learning_rate": 8.37246231155779e-05, - "loss": 5.7486, - "step": 16699 - }, - { - "epoch": 8.709256844850065, - "grad_norm": 1.4986095428466797, - "learning_rate": 8.372361809045226e-05, - "loss": 5.6637, - "step": 16700 - }, - { - "epoch": 8.709778357235985, - "grad_norm": 1.4553935527801514, - "learning_rate": 8.372261306532664e-05, - "loss": 5.3883, - "step": 16701 - }, - { - "epoch": 8.710299869621904, - "grad_norm": 1.3768388032913208, - "learning_rate": 8.3721608040201e-05, - "loss": 5.6687, - "step": 16702 - }, - { - "epoch": 8.710821382007822, - "grad_norm": 1.4736624956130981, - "learning_rate": 8.372060301507538e-05, - "loss": 5.7795, - "step": 16703 - }, - { - "epoch": 8.711342894393741, - "grad_norm": 1.6664537191390991, - "learning_rate": 8.371959798994976e-05, - "loss": 5.4556, - "step": 16704 - }, - { - "epoch": 8.711864406779661, - "grad_norm": 1.3951318264007568, - "learning_rate": 8.371859296482413e-05, - "loss": 5.6392, - "step": 16705 - }, - { - "epoch": 8.71238591916558, - "grad_norm": 1.4170111417770386, - "learning_rate": 8.37175879396985e-05, - "loss": 5.444, - "step": 16706 - }, - { - "epoch": 8.7129074315515, - "grad_norm": 1.4265649318695068, - "learning_rate": 8.371658291457288e-05, - "loss": 5.3132, - "step": 16707 - }, - { - "epoch": 8.71342894393742, - "grad_norm": 1.3890067338943481, - "learning_rate": 8.371557788944724e-05, - "loss": 5.1139, - "step": 16708 - }, - { - "epoch": 8.713950456323337, - "grad_norm": 1.4727482795715332, - "learning_rate": 8.371457286432162e-05, - "loss": 5.129, - "step": 16709 - }, - { - "epoch": 8.714471968709256, - "grad_norm": 1.387488603591919, - "learning_rate": 8.371356783919598e-05, - "loss": 5.2465, - "step": 16710 - }, - { - "epoch": 8.714993481095176, - "grad_norm": 1.515058994293213, - "learning_rate": 8.371256281407036e-05, - "loss": 5.4956, - "step": 16711 - }, - { - "epoch": 8.715514993481095, - "grad_norm": 1.3480738401412964, - "learning_rate": 8.371155778894472e-05, - "loss": 5.707, - "step": 16712 - }, - { - "epoch": 8.716036505867015, - "grad_norm": 1.4705146551132202, - "learning_rate": 8.371055276381909e-05, - "loss": 5.0665, - "step": 16713 - }, - { - "epoch": 8.716558018252934, - "grad_norm": 1.3832088708877563, - "learning_rate": 8.370954773869347e-05, - "loss": 5.4319, - "step": 16714 - }, - { - "epoch": 8.717079530638852, - "grad_norm": 1.373336911201477, - "learning_rate": 8.370854271356784e-05, - "loss": 5.9443, - "step": 16715 - }, - { - "epoch": 8.717601043024771, - "grad_norm": 1.3842707872390747, - "learning_rate": 8.370753768844222e-05, - "loss": 5.8435, - "step": 16716 - }, - { - "epoch": 8.718122555410691, - "grad_norm": 1.469234824180603, - "learning_rate": 8.370653266331659e-05, - "loss": 5.3407, - "step": 16717 - }, - { - "epoch": 8.71864406779661, - "grad_norm": 1.4248286485671997, - "learning_rate": 8.370552763819096e-05, - "loss": 5.547, - "step": 16718 - }, - { - "epoch": 8.71916558018253, - "grad_norm": 1.7512394189834595, - "learning_rate": 8.370452261306533e-05, - "loss": 5.2079, - "step": 16719 - }, - { - "epoch": 8.71968709256845, - "grad_norm": 1.4802579879760742, - "learning_rate": 8.37035175879397e-05, - "loss": 5.4379, - "step": 16720 - }, - { - "epoch": 8.720208604954367, - "grad_norm": 1.4202677011489868, - "learning_rate": 8.370251256281407e-05, - "loss": 5.8019, - "step": 16721 - }, - { - "epoch": 8.720730117340286, - "grad_norm": 1.4714112281799316, - "learning_rate": 8.370150753768845e-05, - "loss": 5.6391, - "step": 16722 - }, - { - "epoch": 8.721251629726206, - "grad_norm": 1.4480019807815552, - "learning_rate": 8.370050251256281e-05, - "loss": 5.2298, - "step": 16723 - }, - { - "epoch": 8.721773142112125, - "grad_norm": 1.3937593698501587, - "learning_rate": 8.369949748743719e-05, - "loss": 5.4182, - "step": 16724 - }, - { - "epoch": 8.722294654498045, - "grad_norm": 1.4685941934585571, - "learning_rate": 8.369849246231157e-05, - "loss": 5.4653, - "step": 16725 - }, - { - "epoch": 8.722816166883963, - "grad_norm": 1.3177833557128906, - "learning_rate": 8.369748743718593e-05, - "loss": 5.7544, - "step": 16726 - }, - { - "epoch": 8.723337679269882, - "grad_norm": 1.4430752992630005, - "learning_rate": 8.369648241206031e-05, - "loss": 5.6831, - "step": 16727 - }, - { - "epoch": 8.723859191655801, - "grad_norm": 1.4679450988769531, - "learning_rate": 8.369547738693467e-05, - "loss": 5.1022, - "step": 16728 - }, - { - "epoch": 8.724380704041721, - "grad_norm": 1.4259769916534424, - "learning_rate": 8.369447236180905e-05, - "loss": 5.7719, - "step": 16729 - }, - { - "epoch": 8.72490221642764, - "grad_norm": 1.3920800685882568, - "learning_rate": 8.369346733668342e-05, - "loss": 5.8762, - "step": 16730 - }, - { - "epoch": 8.72542372881356, - "grad_norm": 1.4668123722076416, - "learning_rate": 8.36924623115578e-05, - "loss": 5.2653, - "step": 16731 - }, - { - "epoch": 8.72594524119948, - "grad_norm": 1.3466932773590088, - "learning_rate": 8.369145728643216e-05, - "loss": 5.6474, - "step": 16732 - }, - { - "epoch": 8.726466753585397, - "grad_norm": 1.4934674501419067, - "learning_rate": 8.369045226130654e-05, - "loss": 5.0215, - "step": 16733 - }, - { - "epoch": 8.726988265971316, - "grad_norm": 1.4377673864364624, - "learning_rate": 8.36894472361809e-05, - "loss": 5.0752, - "step": 16734 - }, - { - "epoch": 8.727509778357236, - "grad_norm": 1.3366882801055908, - "learning_rate": 8.368844221105528e-05, - "loss": 5.8394, - "step": 16735 - }, - { - "epoch": 8.728031290743155, - "grad_norm": 1.4576045274734497, - "learning_rate": 8.368743718592966e-05, - "loss": 5.136, - "step": 16736 - }, - { - "epoch": 8.728552803129075, - "grad_norm": 1.4368394613265991, - "learning_rate": 8.368643216080403e-05, - "loss": 5.8123, - "step": 16737 - }, - { - "epoch": 8.729074315514993, - "grad_norm": 1.4711084365844727, - "learning_rate": 8.36854271356784e-05, - "loss": 5.5184, - "step": 16738 - }, - { - "epoch": 8.729595827900912, - "grad_norm": 1.5634417533874512, - "learning_rate": 8.368442211055276e-05, - "loss": 5.5109, - "step": 16739 - }, - { - "epoch": 8.730117340286832, - "grad_norm": 1.3339828252792358, - "learning_rate": 8.368341708542714e-05, - "loss": 5.5781, - "step": 16740 - }, - { - "epoch": 8.730638852672751, - "grad_norm": 1.352977991104126, - "learning_rate": 8.36824120603015e-05, - "loss": 5.7356, - "step": 16741 - }, - { - "epoch": 8.73116036505867, - "grad_norm": 1.5020301342010498, - "learning_rate": 8.368140703517588e-05, - "loss": 5.5498, - "step": 16742 - }, - { - "epoch": 8.73168187744459, - "grad_norm": 1.383081078529358, - "learning_rate": 8.368040201005025e-05, - "loss": 5.9311, - "step": 16743 - }, - { - "epoch": 8.73220338983051, - "grad_norm": 1.4595805406570435, - "learning_rate": 8.367939698492463e-05, - "loss": 5.4031, - "step": 16744 - }, - { - "epoch": 8.732724902216427, - "grad_norm": 1.3481124639511108, - "learning_rate": 8.3678391959799e-05, - "loss": 5.8381, - "step": 16745 - }, - { - "epoch": 8.733246414602347, - "grad_norm": 1.5753884315490723, - "learning_rate": 8.367738693467338e-05, - "loss": 5.5978, - "step": 16746 - }, - { - "epoch": 8.733767926988266, - "grad_norm": 1.4132298231124878, - "learning_rate": 8.367638190954775e-05, - "loss": 5.4811, - "step": 16747 - }, - { - "epoch": 8.734289439374185, - "grad_norm": 1.4920029640197754, - "learning_rate": 8.367537688442212e-05, - "loss": 5.7596, - "step": 16748 - }, - { - "epoch": 8.734810951760105, - "grad_norm": 1.5488255023956299, - "learning_rate": 8.367437185929649e-05, - "loss": 5.353, - "step": 16749 - }, - { - "epoch": 8.735332464146023, - "grad_norm": 1.4874367713928223, - "learning_rate": 8.367336683417086e-05, - "loss": 5.3531, - "step": 16750 - }, - { - "epoch": 8.735853976531942, - "grad_norm": 1.3697317838668823, - "learning_rate": 8.367236180904523e-05, - "loss": 5.5202, - "step": 16751 - }, - { - "epoch": 8.736375488917862, - "grad_norm": 1.5995389223098755, - "learning_rate": 8.36713567839196e-05, - "loss": 5.307, - "step": 16752 - }, - { - "epoch": 8.736897001303781, - "grad_norm": 1.3747880458831787, - "learning_rate": 8.367035175879397e-05, - "loss": 5.2946, - "step": 16753 - }, - { - "epoch": 8.7374185136897, - "grad_norm": 1.4809263944625854, - "learning_rate": 8.366934673366834e-05, - "loss": 5.6079, - "step": 16754 - }, - { - "epoch": 8.73794002607562, - "grad_norm": 1.4195374250411987, - "learning_rate": 8.366834170854271e-05, - "loss": 5.3515, - "step": 16755 - }, - { - "epoch": 8.73846153846154, - "grad_norm": 1.4479291439056396, - "learning_rate": 8.366733668341709e-05, - "loss": 5.2242, - "step": 16756 - }, - { - "epoch": 8.738983050847457, - "grad_norm": 1.4068193435668945, - "learning_rate": 8.366633165829147e-05, - "loss": 5.4452, - "step": 16757 - }, - { - "epoch": 8.739504563233377, - "grad_norm": 1.4583686590194702, - "learning_rate": 8.366532663316583e-05, - "loss": 5.5487, - "step": 16758 - }, - { - "epoch": 8.740026075619296, - "grad_norm": 1.3693798780441284, - "learning_rate": 8.366432160804021e-05, - "loss": 5.1096, - "step": 16759 - }, - { - "epoch": 8.740547588005215, - "grad_norm": 1.4219781160354614, - "learning_rate": 8.366331658291458e-05, - "loss": 5.6573, - "step": 16760 - }, - { - "epoch": 8.741069100391135, - "grad_norm": 1.3781505823135376, - "learning_rate": 8.366231155778895e-05, - "loss": 5.5478, - "step": 16761 - }, - { - "epoch": 8.741590612777053, - "grad_norm": 1.506433367729187, - "learning_rate": 8.366130653266332e-05, - "loss": 5.4081, - "step": 16762 - }, - { - "epoch": 8.742112125162972, - "grad_norm": 1.355911374092102, - "learning_rate": 8.36603015075377e-05, - "loss": 5.1731, - "step": 16763 - }, - { - "epoch": 8.742633637548892, - "grad_norm": 1.3465626239776611, - "learning_rate": 8.365929648241206e-05, - "loss": 5.8523, - "step": 16764 - }, - { - "epoch": 8.743155149934811, - "grad_norm": 1.2796212434768677, - "learning_rate": 8.365829145728642e-05, - "loss": 5.3959, - "step": 16765 - }, - { - "epoch": 8.74367666232073, - "grad_norm": 1.3779938220977783, - "learning_rate": 8.36572864321608e-05, - "loss": 5.4638, - "step": 16766 - }, - { - "epoch": 8.74419817470665, - "grad_norm": 1.3357949256896973, - "learning_rate": 8.365628140703518e-05, - "loss": 5.7988, - "step": 16767 - }, - { - "epoch": 8.744719687092568, - "grad_norm": 1.4129763841629028, - "learning_rate": 8.365527638190956e-05, - "loss": 5.6914, - "step": 16768 - }, - { - "epoch": 8.745241199478487, - "grad_norm": 1.4481457471847534, - "learning_rate": 8.365427135678392e-05, - "loss": 4.9778, - "step": 16769 - }, - { - "epoch": 8.745762711864407, - "grad_norm": 1.4216219186782837, - "learning_rate": 8.36532663316583e-05, - "loss": 5.6847, - "step": 16770 - }, - { - "epoch": 8.746284224250326, - "grad_norm": 1.3897520303726196, - "learning_rate": 8.365226130653266e-05, - "loss": 5.8577, - "step": 16771 - }, - { - "epoch": 8.746805736636245, - "grad_norm": 1.412482738494873, - "learning_rate": 8.365125628140704e-05, - "loss": 5.5371, - "step": 16772 - }, - { - "epoch": 8.747327249022165, - "grad_norm": 1.2854068279266357, - "learning_rate": 8.36502512562814e-05, - "loss": 5.7943, - "step": 16773 - }, - { - "epoch": 8.747848761408083, - "grad_norm": 1.350219488143921, - "learning_rate": 8.364924623115578e-05, - "loss": 5.9674, - "step": 16774 - }, - { - "epoch": 8.748370273794002, - "grad_norm": 1.3814570903778076, - "learning_rate": 8.364824120603015e-05, - "loss": 5.7001, - "step": 16775 - }, - { - "epoch": 8.748891786179922, - "grad_norm": 1.4835392236709595, - "learning_rate": 8.364723618090453e-05, - "loss": 5.1368, - "step": 16776 - }, - { - "epoch": 8.749413298565841, - "grad_norm": 1.3639581203460693, - "learning_rate": 8.36462311557789e-05, - "loss": 5.3531, - "step": 16777 - }, - { - "epoch": 8.74993481095176, - "grad_norm": 1.427730679512024, - "learning_rate": 8.364522613065328e-05, - "loss": 5.6159, - "step": 16778 - }, - { - "epoch": 8.75045632333768, - "grad_norm": 1.382274866104126, - "learning_rate": 8.364422110552765e-05, - "loss": 5.496, - "step": 16779 - }, - { - "epoch": 8.750977835723598, - "grad_norm": 1.4300868511199951, - "learning_rate": 8.364321608040201e-05, - "loss": 5.7387, - "step": 16780 - }, - { - "epoch": 8.751499348109517, - "grad_norm": 1.4523026943206787, - "learning_rate": 8.364221105527639e-05, - "loss": 5.6092, - "step": 16781 - }, - { - "epoch": 8.752020860495437, - "grad_norm": 1.456408143043518, - "learning_rate": 8.364120603015075e-05, - "loss": 5.318, - "step": 16782 - }, - { - "epoch": 8.752542372881356, - "grad_norm": 1.4793192148208618, - "learning_rate": 8.364020100502513e-05, - "loss": 5.8044, - "step": 16783 - }, - { - "epoch": 8.753063885267276, - "grad_norm": 1.406471848487854, - "learning_rate": 8.36391959798995e-05, - "loss": 5.6701, - "step": 16784 - }, - { - "epoch": 8.753585397653195, - "grad_norm": 1.3408664464950562, - "learning_rate": 8.363819095477387e-05, - "loss": 6.0207, - "step": 16785 - }, - { - "epoch": 8.754106910039113, - "grad_norm": 1.3876365423202515, - "learning_rate": 8.363718592964824e-05, - "loss": 5.67, - "step": 16786 - }, - { - "epoch": 8.754628422425032, - "grad_norm": 1.465591549873352, - "learning_rate": 8.363618090452261e-05, - "loss": 5.6, - "step": 16787 - }, - { - "epoch": 8.755149934810952, - "grad_norm": 1.5773639678955078, - "learning_rate": 8.363517587939699e-05, - "loss": 5.6216, - "step": 16788 - }, - { - "epoch": 8.755671447196871, - "grad_norm": 1.3617335557937622, - "learning_rate": 8.363417085427137e-05, - "loss": 5.6771, - "step": 16789 - }, - { - "epoch": 8.75619295958279, - "grad_norm": 1.5147019624710083, - "learning_rate": 8.363316582914573e-05, - "loss": 5.5599, - "step": 16790 - }, - { - "epoch": 8.75671447196871, - "grad_norm": 1.590320110321045, - "learning_rate": 8.363216080402011e-05, - "loss": 5.4066, - "step": 16791 - }, - { - "epoch": 8.757235984354628, - "grad_norm": 1.530150294303894, - "learning_rate": 8.363115577889448e-05, - "loss": 4.8645, - "step": 16792 - }, - { - "epoch": 8.757757496740547, - "grad_norm": 1.52835214138031, - "learning_rate": 8.363015075376884e-05, - "loss": 5.376, - "step": 16793 - }, - { - "epoch": 8.758279009126467, - "grad_norm": 1.4662717580795288, - "learning_rate": 8.362914572864322e-05, - "loss": 5.2965, - "step": 16794 - }, - { - "epoch": 8.758800521512386, - "grad_norm": 1.5479800701141357, - "learning_rate": 8.362814070351758e-05, - "loss": 5.2467, - "step": 16795 - }, - { - "epoch": 8.759322033898306, - "grad_norm": 1.5133029222488403, - "learning_rate": 8.362713567839196e-05, - "loss": 5.3545, - "step": 16796 - }, - { - "epoch": 8.759843546284225, - "grad_norm": 1.3063569068908691, - "learning_rate": 8.362613065326634e-05, - "loss": 5.7658, - "step": 16797 - }, - { - "epoch": 8.760365058670143, - "grad_norm": 1.4074227809906006, - "learning_rate": 8.362512562814072e-05, - "loss": 5.8603, - "step": 16798 - }, - { - "epoch": 8.760886571056062, - "grad_norm": 1.5554444789886475, - "learning_rate": 8.362412060301508e-05, - "loss": 5.6723, - "step": 16799 - }, - { - "epoch": 8.761408083441982, - "grad_norm": 1.5207096338272095, - "learning_rate": 8.362311557788946e-05, - "loss": 5.6313, - "step": 16800 - }, - { - "epoch": 8.761929595827901, - "grad_norm": 1.3871796131134033, - "learning_rate": 8.362211055276382e-05, - "loss": 5.7474, - "step": 16801 - }, - { - "epoch": 8.76245110821382, - "grad_norm": 1.2831425666809082, - "learning_rate": 8.36211055276382e-05, - "loss": 6.0772, - "step": 16802 - }, - { - "epoch": 8.76297262059974, - "grad_norm": 1.3616855144500732, - "learning_rate": 8.362010050251256e-05, - "loss": 5.5427, - "step": 16803 - }, - { - "epoch": 8.763494132985658, - "grad_norm": 1.3339133262634277, - "learning_rate": 8.361909547738694e-05, - "loss": 6.0597, - "step": 16804 - }, - { - "epoch": 8.764015645371577, - "grad_norm": 1.4548790454864502, - "learning_rate": 8.36180904522613e-05, - "loss": 5.634, - "step": 16805 - }, - { - "epoch": 8.764537157757497, - "grad_norm": 1.4748520851135254, - "learning_rate": 8.361708542713567e-05, - "loss": 5.3476, - "step": 16806 - }, - { - "epoch": 8.765058670143416, - "grad_norm": 1.413817286491394, - "learning_rate": 8.361608040201005e-05, - "loss": 5.3578, - "step": 16807 - }, - { - "epoch": 8.765580182529336, - "grad_norm": 1.5144597291946411, - "learning_rate": 8.361507537688443e-05, - "loss": 5.2415, - "step": 16808 - }, - { - "epoch": 8.766101694915255, - "grad_norm": 1.406569242477417, - "learning_rate": 8.36140703517588e-05, - "loss": 5.2083, - "step": 16809 - }, - { - "epoch": 8.766623207301173, - "grad_norm": 1.4216920137405396, - "learning_rate": 8.361306532663317e-05, - "loss": 5.2735, - "step": 16810 - }, - { - "epoch": 8.767144719687092, - "grad_norm": 1.4975683689117432, - "learning_rate": 8.361206030150755e-05, - "loss": 5.4491, - "step": 16811 - }, - { - "epoch": 8.767666232073012, - "grad_norm": 1.5107976198196411, - "learning_rate": 8.361105527638191e-05, - "loss": 5.0824, - "step": 16812 - }, - { - "epoch": 8.768187744458931, - "grad_norm": 1.3963868618011475, - "learning_rate": 8.361005025125629e-05, - "loss": 5.7164, - "step": 16813 - }, - { - "epoch": 8.76870925684485, - "grad_norm": 1.3925755023956299, - "learning_rate": 8.360904522613065e-05, - "loss": 5.8144, - "step": 16814 - }, - { - "epoch": 8.76923076923077, - "grad_norm": 1.4478256702423096, - "learning_rate": 8.360804020100503e-05, - "loss": 5.3605, - "step": 16815 - }, - { - "epoch": 8.769752281616688, - "grad_norm": 1.50880765914917, - "learning_rate": 8.36070351758794e-05, - "loss": 5.544, - "step": 16816 - }, - { - "epoch": 8.770273794002607, - "grad_norm": 1.4451451301574707, - "learning_rate": 8.360603015075377e-05, - "loss": 5.3065, - "step": 16817 - }, - { - "epoch": 8.770795306388527, - "grad_norm": 1.4471367597579956, - "learning_rate": 8.360502512562815e-05, - "loss": 5.7304, - "step": 16818 - }, - { - "epoch": 8.771316818774446, - "grad_norm": 1.5849237442016602, - "learning_rate": 8.360402010050251e-05, - "loss": 5.2789, - "step": 16819 - }, - { - "epoch": 8.771838331160366, - "grad_norm": 1.4914264678955078, - "learning_rate": 8.360301507537689e-05, - "loss": 5.2538, - "step": 16820 - }, - { - "epoch": 8.772359843546283, - "grad_norm": 1.4861900806427002, - "learning_rate": 8.360201005025126e-05, - "loss": 5.4958, - "step": 16821 - }, - { - "epoch": 8.772881355932203, - "grad_norm": 1.508380651473999, - "learning_rate": 8.360100502512563e-05, - "loss": 5.2876, - "step": 16822 - }, - { - "epoch": 8.773402868318122, - "grad_norm": 1.3423089981079102, - "learning_rate": 8.36e-05, - "loss": 5.6757, - "step": 16823 - }, - { - "epoch": 8.773924380704042, - "grad_norm": 1.510724663734436, - "learning_rate": 8.359899497487438e-05, - "loss": 4.9152, - "step": 16824 - }, - { - "epoch": 8.774445893089961, - "grad_norm": 1.474795937538147, - "learning_rate": 8.359798994974874e-05, - "loss": 5.0595, - "step": 16825 - }, - { - "epoch": 8.77496740547588, - "grad_norm": 1.3546725511550903, - "learning_rate": 8.359698492462312e-05, - "loss": 5.6638, - "step": 16826 - }, - { - "epoch": 8.7754889178618, - "grad_norm": 1.5240637063980103, - "learning_rate": 8.359597989949748e-05, - "loss": 5.3462, - "step": 16827 - }, - { - "epoch": 8.776010430247718, - "grad_norm": 1.5037168264389038, - "learning_rate": 8.359497487437186e-05, - "loss": 4.6578, - "step": 16828 - }, - { - "epoch": 8.776531942633637, - "grad_norm": 1.3781753778457642, - "learning_rate": 8.359396984924624e-05, - "loss": 5.6199, - "step": 16829 - }, - { - "epoch": 8.777053455019557, - "grad_norm": 1.2693275213241577, - "learning_rate": 8.359296482412062e-05, - "loss": 5.761, - "step": 16830 - }, - { - "epoch": 8.777574967405476, - "grad_norm": 1.3607226610183716, - "learning_rate": 8.359195979899498e-05, - "loss": 5.0678, - "step": 16831 - }, - { - "epoch": 8.778096479791396, - "grad_norm": 1.5265661478042603, - "learning_rate": 8.359095477386935e-05, - "loss": 5.0558, - "step": 16832 - }, - { - "epoch": 8.778617992177313, - "grad_norm": 1.358443021774292, - "learning_rate": 8.358994974874372e-05, - "loss": 5.824, - "step": 16833 - }, - { - "epoch": 8.779139504563233, - "grad_norm": 1.433332920074463, - "learning_rate": 8.358894472361809e-05, - "loss": 5.1886, - "step": 16834 - }, - { - "epoch": 8.779661016949152, - "grad_norm": 1.7115193605422974, - "learning_rate": 8.358793969849247e-05, - "loss": 5.2058, - "step": 16835 - }, - { - "epoch": 8.780182529335072, - "grad_norm": 1.6949700117111206, - "learning_rate": 8.358693467336683e-05, - "loss": 5.3628, - "step": 16836 - }, - { - "epoch": 8.780704041720991, - "grad_norm": 1.4137567281723022, - "learning_rate": 8.358592964824121e-05, - "loss": 5.8545, - "step": 16837 - }, - { - "epoch": 8.78122555410691, - "grad_norm": 1.4295179843902588, - "learning_rate": 8.358492462311559e-05, - "loss": 5.478, - "step": 16838 - }, - { - "epoch": 8.78174706649283, - "grad_norm": 1.4594531059265137, - "learning_rate": 8.358391959798996e-05, - "loss": 5.3657, - "step": 16839 - }, - { - "epoch": 8.782268578878748, - "grad_norm": 1.3532254695892334, - "learning_rate": 8.358291457286433e-05, - "loss": 6.0747, - "step": 16840 - }, - { - "epoch": 8.782790091264667, - "grad_norm": 1.3369441032409668, - "learning_rate": 8.35819095477387e-05, - "loss": 5.6178, - "step": 16841 - }, - { - "epoch": 8.783311603650587, - "grad_norm": 1.5422906875610352, - "learning_rate": 8.358090452261307e-05, - "loss": 5.7111, - "step": 16842 - }, - { - "epoch": 8.783833116036506, - "grad_norm": 1.5672085285186768, - "learning_rate": 8.357989949748745e-05, - "loss": 5.0497, - "step": 16843 - }, - { - "epoch": 8.784354628422426, - "grad_norm": 1.7414251565933228, - "learning_rate": 8.357889447236181e-05, - "loss": 5.3783, - "step": 16844 - }, - { - "epoch": 8.784876140808343, - "grad_norm": 1.4242902994155884, - "learning_rate": 8.357788944723618e-05, - "loss": 5.2149, - "step": 16845 - }, - { - "epoch": 8.785397653194263, - "grad_norm": 1.6199777126312256, - "learning_rate": 8.357688442211055e-05, - "loss": 5.0149, - "step": 16846 - }, - { - "epoch": 8.785919165580182, - "grad_norm": 1.433197021484375, - "learning_rate": 8.357587939698492e-05, - "loss": 5.1928, - "step": 16847 - }, - { - "epoch": 8.786440677966102, - "grad_norm": 1.3965153694152832, - "learning_rate": 8.35748743718593e-05, - "loss": 5.7487, - "step": 16848 - }, - { - "epoch": 8.786962190352021, - "grad_norm": 1.5306296348571777, - "learning_rate": 8.357386934673367e-05, - "loss": 5.5879, - "step": 16849 - }, - { - "epoch": 8.78748370273794, - "grad_norm": 1.4932234287261963, - "learning_rate": 8.357286432160805e-05, - "loss": 5.7642, - "step": 16850 - }, - { - "epoch": 8.78800521512386, - "grad_norm": 1.5806595087051392, - "learning_rate": 8.357185929648242e-05, - "loss": 5.5309, - "step": 16851 - }, - { - "epoch": 8.788526727509778, - "grad_norm": 1.6302812099456787, - "learning_rate": 8.35708542713568e-05, - "loss": 5.1454, - "step": 16852 - }, - { - "epoch": 8.789048239895697, - "grad_norm": 1.476861834526062, - "learning_rate": 8.356984924623116e-05, - "loss": 5.4957, - "step": 16853 - }, - { - "epoch": 8.789569752281617, - "grad_norm": 1.4508271217346191, - "learning_rate": 8.356884422110554e-05, - "loss": 5.7729, - "step": 16854 - }, - { - "epoch": 8.790091264667536, - "grad_norm": 1.4186556339263916, - "learning_rate": 8.35678391959799e-05, - "loss": 5.7472, - "step": 16855 - }, - { - "epoch": 8.790612777053456, - "grad_norm": 1.5407264232635498, - "learning_rate": 8.356683417085428e-05, - "loss": 5.3061, - "step": 16856 - }, - { - "epoch": 8.791134289439373, - "grad_norm": 1.4556219577789307, - "learning_rate": 8.356582914572864e-05, - "loss": 5.6796, - "step": 16857 - }, - { - "epoch": 8.791655801825293, - "grad_norm": 1.5846318006515503, - "learning_rate": 8.356482412060302e-05, - "loss": 5.3805, - "step": 16858 - }, - { - "epoch": 8.792177314211212, - "grad_norm": 1.413019061088562, - "learning_rate": 8.35638190954774e-05, - "loss": 5.4452, - "step": 16859 - }, - { - "epoch": 8.792698826597132, - "grad_norm": 1.3643913269042969, - "learning_rate": 8.356281407035176e-05, - "loss": 5.4753, - "step": 16860 - }, - { - "epoch": 8.793220338983051, - "grad_norm": 1.8906751871109009, - "learning_rate": 8.356180904522614e-05, - "loss": 5.295, - "step": 16861 - }, - { - "epoch": 8.79374185136897, - "grad_norm": 1.4246219396591187, - "learning_rate": 8.35608040201005e-05, - "loss": 5.3317, - "step": 16862 - }, - { - "epoch": 8.794263363754888, - "grad_norm": 1.7063485383987427, - "learning_rate": 8.355979899497488e-05, - "loss": 5.1487, - "step": 16863 - }, - { - "epoch": 8.794784876140808, - "grad_norm": 1.4483554363250732, - "learning_rate": 8.355879396984925e-05, - "loss": 5.5261, - "step": 16864 - }, - { - "epoch": 8.795306388526727, - "grad_norm": 1.5707721710205078, - "learning_rate": 8.355778894472362e-05, - "loss": 5.55, - "step": 16865 - }, - { - "epoch": 8.795827900912647, - "grad_norm": 1.3515585660934448, - "learning_rate": 8.355678391959799e-05, - "loss": 5.9651, - "step": 16866 - }, - { - "epoch": 8.796349413298566, - "grad_norm": 1.337493658065796, - "learning_rate": 8.355577889447237e-05, - "loss": 5.5937, - "step": 16867 - }, - { - "epoch": 8.796870925684486, - "grad_norm": 1.433868408203125, - "learning_rate": 8.355477386934673e-05, - "loss": 5.1979, - "step": 16868 - }, - { - "epoch": 8.797392438070403, - "grad_norm": 1.3700644969940186, - "learning_rate": 8.355376884422111e-05, - "loss": 5.7416, - "step": 16869 - }, - { - "epoch": 8.797913950456323, - "grad_norm": 1.6479696035385132, - "learning_rate": 8.355276381909549e-05, - "loss": 5.0886, - "step": 16870 - }, - { - "epoch": 8.798435462842242, - "grad_norm": 1.396196961402893, - "learning_rate": 8.355175879396986e-05, - "loss": 5.4423, - "step": 16871 - }, - { - "epoch": 8.798956975228162, - "grad_norm": 1.4713166952133179, - "learning_rate": 8.355075376884423e-05, - "loss": 5.6392, - "step": 16872 - }, - { - "epoch": 8.799478487614081, - "grad_norm": 1.455247402191162, - "learning_rate": 8.354974874371859e-05, - "loss": 5.1469, - "step": 16873 - }, - { - "epoch": 8.8, - "grad_norm": 1.3316524028778076, - "learning_rate": 8.354874371859297e-05, - "loss": 5.6778, - "step": 16874 - }, - { - "epoch": 8.800521512385918, - "grad_norm": 1.4325711727142334, - "learning_rate": 8.354773869346733e-05, - "loss": 5.1406, - "step": 16875 - }, - { - "epoch": 8.801043024771838, - "grad_norm": 1.462689757347107, - "learning_rate": 8.354673366834171e-05, - "loss": 5.2024, - "step": 16876 - }, - { - "epoch": 8.801564537157757, - "grad_norm": 1.358154296875, - "learning_rate": 8.354572864321608e-05, - "loss": 5.2823, - "step": 16877 - }, - { - "epoch": 8.802086049543677, - "grad_norm": 1.4956189393997192, - "learning_rate": 8.354472361809045e-05, - "loss": 5.3321, - "step": 16878 - }, - { - "epoch": 8.802607561929596, - "grad_norm": 1.3645659685134888, - "learning_rate": 8.354371859296483e-05, - "loss": 5.6986, - "step": 16879 - }, - { - "epoch": 8.803129074315516, - "grad_norm": 1.381750226020813, - "learning_rate": 8.354271356783921e-05, - "loss": 5.5485, - "step": 16880 - }, - { - "epoch": 8.803650586701433, - "grad_norm": 1.297747254371643, - "learning_rate": 8.354170854271357e-05, - "loss": 5.5325, - "step": 16881 - }, - { - "epoch": 8.804172099087353, - "grad_norm": 1.3522764444351196, - "learning_rate": 8.354070351758795e-05, - "loss": 5.539, - "step": 16882 - }, - { - "epoch": 8.804693611473272, - "grad_norm": 1.368298053741455, - "learning_rate": 8.353969849246232e-05, - "loss": 5.9373, - "step": 16883 - }, - { - "epoch": 8.805215123859192, - "grad_norm": 1.4929381608963013, - "learning_rate": 8.35386934673367e-05, - "loss": 5.2503, - "step": 16884 - }, - { - "epoch": 8.805736636245111, - "grad_norm": 1.6607733964920044, - "learning_rate": 8.353768844221106e-05, - "loss": 5.2451, - "step": 16885 - }, - { - "epoch": 8.80625814863103, - "grad_norm": 1.3980218172073364, - "learning_rate": 8.353668341708542e-05, - "loss": 5.6394, - "step": 16886 - }, - { - "epoch": 8.806779661016948, - "grad_norm": 2.247128963470459, - "learning_rate": 8.35356783919598e-05, - "loss": 4.8425, - "step": 16887 - }, - { - "epoch": 8.807301173402868, - "grad_norm": 1.5632272958755493, - "learning_rate": 8.353467336683416e-05, - "loss": 5.2634, - "step": 16888 - }, - { - "epoch": 8.807822685788787, - "grad_norm": 1.433674931526184, - "learning_rate": 8.353366834170854e-05, - "loss": 5.3955, - "step": 16889 - }, - { - "epoch": 8.808344198174707, - "grad_norm": 1.3963311910629272, - "learning_rate": 8.353266331658292e-05, - "loss": 5.6262, - "step": 16890 - }, - { - "epoch": 8.808865710560626, - "grad_norm": 1.4912768602371216, - "learning_rate": 8.35316582914573e-05, - "loss": 5.2931, - "step": 16891 - }, - { - "epoch": 8.809387222946546, - "grad_norm": 1.5207792520523071, - "learning_rate": 8.353065326633166e-05, - "loss": 5.2045, - "step": 16892 - }, - { - "epoch": 8.809908735332463, - "grad_norm": 1.3671205043792725, - "learning_rate": 8.352964824120604e-05, - "loss": 5.5073, - "step": 16893 - }, - { - "epoch": 8.810430247718383, - "grad_norm": 1.3994511365890503, - "learning_rate": 8.35286432160804e-05, - "loss": 5.8148, - "step": 16894 - }, - { - "epoch": 8.810951760104302, - "grad_norm": 1.49443519115448, - "learning_rate": 8.352763819095478e-05, - "loss": 5.4196, - "step": 16895 - }, - { - "epoch": 8.811473272490222, - "grad_norm": 1.3969900608062744, - "learning_rate": 8.352663316582915e-05, - "loss": 5.453, - "step": 16896 - }, - { - "epoch": 8.811994784876141, - "grad_norm": 1.392042636871338, - "learning_rate": 8.352562814070352e-05, - "loss": 5.8502, - "step": 16897 - }, - { - "epoch": 8.81251629726206, - "grad_norm": 1.4709107875823975, - "learning_rate": 8.352462311557789e-05, - "loss": 5.8584, - "step": 16898 - }, - { - "epoch": 8.813037809647978, - "grad_norm": 1.3911820650100708, - "learning_rate": 8.352361809045227e-05, - "loss": 5.337, - "step": 16899 - }, - { - "epoch": 8.813559322033898, - "grad_norm": 1.4821738004684448, - "learning_rate": 8.352261306532664e-05, - "loss": 5.603, - "step": 16900 - }, - { - "epoch": 8.814080834419817, - "grad_norm": 1.4141621589660645, - "learning_rate": 8.352160804020101e-05, - "loss": 5.4516, - "step": 16901 - }, - { - "epoch": 8.814602346805737, - "grad_norm": 1.3985174894332886, - "learning_rate": 8.352060301507539e-05, - "loss": 5.5283, - "step": 16902 - }, - { - "epoch": 8.815123859191656, - "grad_norm": 1.4878042936325073, - "learning_rate": 8.351959798994975e-05, - "loss": 5.084, - "step": 16903 - }, - { - "epoch": 8.815645371577576, - "grad_norm": 1.3323835134506226, - "learning_rate": 8.351859296482413e-05, - "loss": 5.6111, - "step": 16904 - }, - { - "epoch": 8.816166883963493, - "grad_norm": 1.53725266456604, - "learning_rate": 8.351758793969849e-05, - "loss": 5.4393, - "step": 16905 - }, - { - "epoch": 8.816688396349413, - "grad_norm": 1.4048932790756226, - "learning_rate": 8.351658291457287e-05, - "loss": 4.9404, - "step": 16906 - }, - { - "epoch": 8.817209908735332, - "grad_norm": 1.3381874561309814, - "learning_rate": 8.351557788944724e-05, - "loss": 5.4357, - "step": 16907 - }, - { - "epoch": 8.817731421121252, - "grad_norm": 1.2154691219329834, - "learning_rate": 8.351457286432161e-05, - "loss": 4.7563, - "step": 16908 - }, - { - "epoch": 8.818252933507171, - "grad_norm": 1.4678230285644531, - "learning_rate": 8.351356783919598e-05, - "loss": 5.6308, - "step": 16909 - }, - { - "epoch": 8.81877444589309, - "grad_norm": 1.4678895473480225, - "learning_rate": 8.351256281407036e-05, - "loss": 5.117, - "step": 16910 - }, - { - "epoch": 8.819295958279008, - "grad_norm": 1.517960786819458, - "learning_rate": 8.351155778894473e-05, - "loss": 4.9413, - "step": 16911 - }, - { - "epoch": 8.819817470664928, - "grad_norm": 1.364450454711914, - "learning_rate": 8.35105527638191e-05, - "loss": 5.7033, - "step": 16912 - }, - { - "epoch": 8.820338983050847, - "grad_norm": 1.479051113128662, - "learning_rate": 8.350954773869348e-05, - "loss": 5.8627, - "step": 16913 - }, - { - "epoch": 8.820860495436767, - "grad_norm": 1.5600918531417847, - "learning_rate": 8.350854271356784e-05, - "loss": 5.1991, - "step": 16914 - }, - { - "epoch": 8.821382007822686, - "grad_norm": 1.2732475996017456, - "learning_rate": 8.350753768844222e-05, - "loss": 5.9649, - "step": 16915 - }, - { - "epoch": 8.821903520208604, - "grad_norm": 1.341502070426941, - "learning_rate": 8.350653266331658e-05, - "loss": 5.5747, - "step": 16916 - }, - { - "epoch": 8.822425032594523, - "grad_norm": 1.460534691810608, - "learning_rate": 8.350552763819096e-05, - "loss": 5.4994, - "step": 16917 - }, - { - "epoch": 8.822946544980443, - "grad_norm": 1.393713116645813, - "learning_rate": 8.350452261306532e-05, - "loss": 5.6668, - "step": 16918 - }, - { - "epoch": 8.823468057366362, - "grad_norm": 1.3329353332519531, - "learning_rate": 8.35035175879397e-05, - "loss": 5.8865, - "step": 16919 - }, - { - "epoch": 8.823989569752282, - "grad_norm": 1.5972837209701538, - "learning_rate": 8.350251256281408e-05, - "loss": 4.7382, - "step": 16920 - }, - { - "epoch": 8.824511082138201, - "grad_norm": 1.5040769577026367, - "learning_rate": 8.350150753768846e-05, - "loss": 5.2618, - "step": 16921 - }, - { - "epoch": 8.82503259452412, - "grad_norm": 1.3760379552841187, - "learning_rate": 8.350050251256282e-05, - "loss": 5.7294, - "step": 16922 - }, - { - "epoch": 8.825554106910038, - "grad_norm": 1.50537109375, - "learning_rate": 8.34994974874372e-05, - "loss": 5.5036, - "step": 16923 - }, - { - "epoch": 8.826075619295958, - "grad_norm": 1.4735703468322754, - "learning_rate": 8.349849246231156e-05, - "loss": 5.5226, - "step": 16924 - }, - { - "epoch": 8.826597131681877, - "grad_norm": 1.377119779586792, - "learning_rate": 8.349748743718593e-05, - "loss": 5.6651, - "step": 16925 - }, - { - "epoch": 8.827118644067797, - "grad_norm": 1.453910231590271, - "learning_rate": 8.34964824120603e-05, - "loss": 5.3666, - "step": 16926 - }, - { - "epoch": 8.827640156453716, - "grad_norm": 1.5694024562835693, - "learning_rate": 8.349547738693467e-05, - "loss": 5.5927, - "step": 16927 - }, - { - "epoch": 8.828161668839634, - "grad_norm": 1.4811666011810303, - "learning_rate": 8.349447236180905e-05, - "loss": 5.4048, - "step": 16928 - }, - { - "epoch": 8.828683181225554, - "grad_norm": 1.4881889820098877, - "learning_rate": 8.349346733668341e-05, - "loss": 5.1142, - "step": 16929 - }, - { - "epoch": 8.829204693611473, - "grad_norm": 1.6575106382369995, - "learning_rate": 8.349246231155779e-05, - "loss": 4.9174, - "step": 16930 - }, - { - "epoch": 8.829726205997392, - "grad_norm": 1.5757826566696167, - "learning_rate": 8.349145728643217e-05, - "loss": 4.8897, - "step": 16931 - }, - { - "epoch": 8.830247718383312, - "grad_norm": 1.513580083847046, - "learning_rate": 8.349045226130655e-05, - "loss": 5.2723, - "step": 16932 - }, - { - "epoch": 8.830769230769231, - "grad_norm": 1.440398931503296, - "learning_rate": 8.348944723618091e-05, - "loss": 4.9353, - "step": 16933 - }, - { - "epoch": 8.83129074315515, - "grad_norm": 1.471454381942749, - "learning_rate": 8.348844221105529e-05, - "loss": 4.4621, - "step": 16934 - }, - { - "epoch": 8.831812255541069, - "grad_norm": 1.472762107849121, - "learning_rate": 8.348743718592965e-05, - "loss": 5.4558, - "step": 16935 - }, - { - "epoch": 8.832333767926988, - "grad_norm": 1.4964758157730103, - "learning_rate": 8.348643216080403e-05, - "loss": 5.6354, - "step": 16936 - }, - { - "epoch": 8.832855280312907, - "grad_norm": 1.433240294456482, - "learning_rate": 8.34854271356784e-05, - "loss": 5.107, - "step": 16937 - }, - { - "epoch": 8.833376792698827, - "grad_norm": 1.4073988199234009, - "learning_rate": 8.348442211055276e-05, - "loss": 5.387, - "step": 16938 - }, - { - "epoch": 8.833898305084746, - "grad_norm": 1.5243284702301025, - "learning_rate": 8.348341708542714e-05, - "loss": 5.4604, - "step": 16939 - }, - { - "epoch": 8.834419817470664, - "grad_norm": 1.4080723524093628, - "learning_rate": 8.34824120603015e-05, - "loss": 5.4903, - "step": 16940 - }, - { - "epoch": 8.834941329856584, - "grad_norm": 1.5761204957962036, - "learning_rate": 8.348140703517588e-05, - "loss": 5.2512, - "step": 16941 - }, - { - "epoch": 8.835462842242503, - "grad_norm": 1.3451896905899048, - "learning_rate": 8.348040201005026e-05, - "loss": 5.8983, - "step": 16942 - }, - { - "epoch": 8.835984354628422, - "grad_norm": 1.377536654472351, - "learning_rate": 8.347939698492463e-05, - "loss": 5.704, - "step": 16943 - }, - { - "epoch": 8.836505867014342, - "grad_norm": 1.3376017808914185, - "learning_rate": 8.3478391959799e-05, - "loss": 5.5209, - "step": 16944 - }, - { - "epoch": 8.837027379400261, - "grad_norm": 1.3826223611831665, - "learning_rate": 8.347738693467338e-05, - "loss": 5.4096, - "step": 16945 - }, - { - "epoch": 8.83754889178618, - "grad_norm": 1.4316983222961426, - "learning_rate": 8.347638190954774e-05, - "loss": 5.5008, - "step": 16946 - }, - { - "epoch": 8.838070404172099, - "grad_norm": 1.2980077266693115, - "learning_rate": 8.347537688442212e-05, - "loss": 5.8769, - "step": 16947 - }, - { - "epoch": 8.838591916558018, - "grad_norm": 1.366762638092041, - "learning_rate": 8.347437185929648e-05, - "loss": 5.544, - "step": 16948 - }, - { - "epoch": 8.839113428943937, - "grad_norm": 1.3625450134277344, - "learning_rate": 8.347336683417086e-05, - "loss": 5.6355, - "step": 16949 - }, - { - "epoch": 8.839634941329857, - "grad_norm": 1.3293616771697998, - "learning_rate": 8.347236180904522e-05, - "loss": 5.6614, - "step": 16950 - }, - { - "epoch": 8.840156453715776, - "grad_norm": 1.3836934566497803, - "learning_rate": 8.34713567839196e-05, - "loss": 5.6594, - "step": 16951 - }, - { - "epoch": 8.840677966101694, - "grad_norm": 1.4554105997085571, - "learning_rate": 8.347035175879398e-05, - "loss": 5.6642, - "step": 16952 - }, - { - "epoch": 8.841199478487614, - "grad_norm": 1.4288793802261353, - "learning_rate": 8.346934673366834e-05, - "loss": 5.9669, - "step": 16953 - }, - { - "epoch": 8.841720990873533, - "grad_norm": 1.3535306453704834, - "learning_rate": 8.346834170854272e-05, - "loss": 5.6109, - "step": 16954 - }, - { - "epoch": 8.842242503259452, - "grad_norm": 1.4145543575286865, - "learning_rate": 8.346733668341709e-05, - "loss": 5.5129, - "step": 16955 - }, - { - "epoch": 8.842764015645372, - "grad_norm": 2.244659900665283, - "learning_rate": 8.346633165829146e-05, - "loss": 5.3592, - "step": 16956 - }, - { - "epoch": 8.843285528031291, - "grad_norm": 1.3620402812957764, - "learning_rate": 8.346532663316583e-05, - "loss": 5.1688, - "step": 16957 - }, - { - "epoch": 8.843807040417209, - "grad_norm": 1.4569514989852905, - "learning_rate": 8.34643216080402e-05, - "loss": 5.6433, - "step": 16958 - }, - { - "epoch": 8.844328552803129, - "grad_norm": 1.5284662246704102, - "learning_rate": 8.346331658291457e-05, - "loss": 5.8126, - "step": 16959 - }, - { - "epoch": 8.844850065189048, - "grad_norm": 1.5196880102157593, - "learning_rate": 8.346231155778895e-05, - "loss": 5.1741, - "step": 16960 - }, - { - "epoch": 8.845371577574968, - "grad_norm": 1.3637324571609497, - "learning_rate": 8.346130653266331e-05, - "loss": 5.7165, - "step": 16961 - }, - { - "epoch": 8.845893089960887, - "grad_norm": 1.616620659828186, - "learning_rate": 8.346030150753769e-05, - "loss": 4.8574, - "step": 16962 - }, - { - "epoch": 8.846414602346806, - "grad_norm": 1.5080894231796265, - "learning_rate": 8.345929648241207e-05, - "loss": 5.1315, - "step": 16963 - }, - { - "epoch": 8.846936114732724, - "grad_norm": 1.3144590854644775, - "learning_rate": 8.345829145728645e-05, - "loss": 5.703, - "step": 16964 - }, - { - "epoch": 8.847457627118644, - "grad_norm": 1.377013921737671, - "learning_rate": 8.345728643216081e-05, - "loss": 5.9419, - "step": 16965 - }, - { - "epoch": 8.847979139504563, - "grad_norm": 1.4240809679031372, - "learning_rate": 8.345628140703517e-05, - "loss": 5.2925, - "step": 16966 - }, - { - "epoch": 8.848500651890483, - "grad_norm": 1.557939887046814, - "learning_rate": 8.345527638190955e-05, - "loss": 5.0437, - "step": 16967 - }, - { - "epoch": 8.849022164276402, - "grad_norm": 1.432033896446228, - "learning_rate": 8.345427135678392e-05, - "loss": 5.1258, - "step": 16968 - }, - { - "epoch": 8.849543676662321, - "grad_norm": 1.6672288179397583, - "learning_rate": 8.34532663316583e-05, - "loss": 5.9851, - "step": 16969 - }, - { - "epoch": 8.85006518904824, - "grad_norm": 1.5628514289855957, - "learning_rate": 8.345226130653266e-05, - "loss": 5.653, - "step": 16970 - }, - { - "epoch": 8.850586701434159, - "grad_norm": 1.4421374797821045, - "learning_rate": 8.345125628140704e-05, - "loss": 5.4535, - "step": 16971 - }, - { - "epoch": 8.851108213820078, - "grad_norm": 1.4180487394332886, - "learning_rate": 8.345025125628141e-05, - "loss": 5.7102, - "step": 16972 - }, - { - "epoch": 8.851629726205998, - "grad_norm": 1.468701720237732, - "learning_rate": 8.344924623115579e-05, - "loss": 5.6521, - "step": 16973 - }, - { - "epoch": 8.852151238591917, - "grad_norm": 1.4608471393585205, - "learning_rate": 8.344824120603016e-05, - "loss": 5.3677, - "step": 16974 - }, - { - "epoch": 8.852672750977836, - "grad_norm": 1.4559251070022583, - "learning_rate": 8.344723618090453e-05, - "loss": 5.1728, - "step": 16975 - }, - { - "epoch": 8.853194263363754, - "grad_norm": 1.4544708728790283, - "learning_rate": 8.34462311557789e-05, - "loss": 5.401, - "step": 16976 - }, - { - "epoch": 8.853715775749674, - "grad_norm": 1.5114163160324097, - "learning_rate": 8.344522613065328e-05, - "loss": 5.1762, - "step": 16977 - }, - { - "epoch": 8.854237288135593, - "grad_norm": 1.4597363471984863, - "learning_rate": 8.344422110552764e-05, - "loss": 5.3442, - "step": 16978 - }, - { - "epoch": 8.854758800521513, - "grad_norm": 1.66215181350708, - "learning_rate": 8.3443216080402e-05, - "loss": 5.8399, - "step": 16979 - }, - { - "epoch": 8.855280312907432, - "grad_norm": 1.31671142578125, - "learning_rate": 8.344221105527638e-05, - "loss": 5.3776, - "step": 16980 - }, - { - "epoch": 8.855801825293351, - "grad_norm": 1.4573251008987427, - "learning_rate": 8.344120603015075e-05, - "loss": 5.4253, - "step": 16981 - }, - { - "epoch": 8.85632333767927, - "grad_norm": 1.4840813875198364, - "learning_rate": 8.344020100502513e-05, - "loss": 5.8279, - "step": 16982 - }, - { - "epoch": 8.856844850065189, - "grad_norm": 1.892006754875183, - "learning_rate": 8.34391959798995e-05, - "loss": 4.9287, - "step": 16983 - }, - { - "epoch": 8.857366362451108, - "grad_norm": 1.381309151649475, - "learning_rate": 8.343819095477388e-05, - "loss": 5.4582, - "step": 16984 - }, - { - "epoch": 8.857887874837028, - "grad_norm": 1.5987880229949951, - "learning_rate": 8.343718592964825e-05, - "loss": 5.0217, - "step": 16985 - }, - { - "epoch": 8.858409387222947, - "grad_norm": 1.5874238014221191, - "learning_rate": 8.343618090452262e-05, - "loss": 5.425, - "step": 16986 - }, - { - "epoch": 8.858930899608866, - "grad_norm": 1.377091407775879, - "learning_rate": 8.343517587939699e-05, - "loss": 5.3538, - "step": 16987 - }, - { - "epoch": 8.859452411994784, - "grad_norm": 1.6570526361465454, - "learning_rate": 8.343417085427137e-05, - "loss": 5.2637, - "step": 16988 - }, - { - "epoch": 8.859973924380704, - "grad_norm": 1.389289379119873, - "learning_rate": 8.343316582914573e-05, - "loss": 5.5364, - "step": 16989 - }, - { - "epoch": 8.860495436766623, - "grad_norm": 1.4337373971939087, - "learning_rate": 8.343216080402011e-05, - "loss": 5.5896, - "step": 16990 - }, - { - "epoch": 8.861016949152543, - "grad_norm": 1.3667356967926025, - "learning_rate": 8.343115577889447e-05, - "loss": 5.5545, - "step": 16991 - }, - { - "epoch": 8.861538461538462, - "grad_norm": 1.3697766065597534, - "learning_rate": 8.343015075376885e-05, - "loss": 5.8303, - "step": 16992 - }, - { - "epoch": 8.862059973924381, - "grad_norm": 1.3990833759307861, - "learning_rate": 8.342914572864323e-05, - "loss": 5.7169, - "step": 16993 - }, - { - "epoch": 8.8625814863103, - "grad_norm": 1.2958526611328125, - "learning_rate": 8.342814070351759e-05, - "loss": 5.8773, - "step": 16994 - }, - { - "epoch": 8.863102998696219, - "grad_norm": 1.4506174325942993, - "learning_rate": 8.342713567839197e-05, - "loss": 5.3704, - "step": 16995 - }, - { - "epoch": 8.863624511082138, - "grad_norm": 1.6168773174285889, - "learning_rate": 8.342613065326633e-05, - "loss": 4.7147, - "step": 16996 - }, - { - "epoch": 8.864146023468058, - "grad_norm": 1.447791576385498, - "learning_rate": 8.342512562814071e-05, - "loss": 5.6523, - "step": 16997 - }, - { - "epoch": 8.864667535853977, - "grad_norm": 1.4244554042816162, - "learning_rate": 8.342412060301508e-05, - "loss": 5.4265, - "step": 16998 - }, - { - "epoch": 8.865189048239897, - "grad_norm": 1.4577950239181519, - "learning_rate": 8.342311557788945e-05, - "loss": 5.5983, - "step": 16999 - }, - { - "epoch": 8.865710560625814, - "grad_norm": 1.4404419660568237, - "learning_rate": 8.342211055276382e-05, - "loss": 5.7697, - "step": 17000 - }, - { - "epoch": 8.866232073011734, - "grad_norm": 1.5066076517105103, - "learning_rate": 8.34211055276382e-05, - "loss": 5.7429, - "step": 17001 - }, - { - "epoch": 8.866753585397653, - "grad_norm": 1.450811743736267, - "learning_rate": 8.342010050251256e-05, - "loss": 5.3872, - "step": 17002 - }, - { - "epoch": 8.867275097783573, - "grad_norm": 1.483816146850586, - "learning_rate": 8.341909547738694e-05, - "loss": 5.7021, - "step": 17003 - }, - { - "epoch": 8.867796610169492, - "grad_norm": 1.6130540370941162, - "learning_rate": 8.341809045226132e-05, - "loss": 5.2377, - "step": 17004 - }, - { - "epoch": 8.868318122555412, - "grad_norm": 1.522417664527893, - "learning_rate": 8.341708542713568e-05, - "loss": 5.0206, - "step": 17005 - }, - { - "epoch": 8.86883963494133, - "grad_norm": 1.358878254890442, - "learning_rate": 8.341608040201006e-05, - "loss": 5.3277, - "step": 17006 - }, - { - "epoch": 8.869361147327249, - "grad_norm": 1.41798734664917, - "learning_rate": 8.341507537688442e-05, - "loss": 5.6458, - "step": 17007 - }, - { - "epoch": 8.869882659713168, - "grad_norm": 1.4129235744476318, - "learning_rate": 8.34140703517588e-05, - "loss": 5.5138, - "step": 17008 - }, - { - "epoch": 8.870404172099088, - "grad_norm": 1.3933632373809814, - "learning_rate": 8.341306532663316e-05, - "loss": 5.6212, - "step": 17009 - }, - { - "epoch": 8.870925684485007, - "grad_norm": 1.2871266603469849, - "learning_rate": 8.341206030150754e-05, - "loss": 5.8201, - "step": 17010 - }, - { - "epoch": 8.871447196870925, - "grad_norm": 1.3837333917617798, - "learning_rate": 8.34110552763819e-05, - "loss": 5.7127, - "step": 17011 - }, - { - "epoch": 8.871968709256844, - "grad_norm": 1.3612205982208252, - "learning_rate": 8.341005025125628e-05, - "loss": 5.4922, - "step": 17012 - }, - { - "epoch": 8.872490221642764, - "grad_norm": 1.4796544313430786, - "learning_rate": 8.340904522613066e-05, - "loss": 5.4851, - "step": 17013 - }, - { - "epoch": 8.873011734028683, - "grad_norm": 1.4177509546279907, - "learning_rate": 8.340804020100504e-05, - "loss": 5.2942, - "step": 17014 - }, - { - "epoch": 8.873533246414603, - "grad_norm": 1.3008191585540771, - "learning_rate": 8.34070351758794e-05, - "loss": 5.7318, - "step": 17015 - }, - { - "epoch": 8.874054758800522, - "grad_norm": 1.3889836072921753, - "learning_rate": 8.340603015075378e-05, - "loss": 5.2947, - "step": 17016 - }, - { - "epoch": 8.874576271186442, - "grad_norm": 1.5064547061920166, - "learning_rate": 8.340502512562815e-05, - "loss": 5.4118, - "step": 17017 - }, - { - "epoch": 8.87509778357236, - "grad_norm": 1.4889479875564575, - "learning_rate": 8.340402010050251e-05, - "loss": 5.5919, - "step": 17018 - }, - { - "epoch": 8.875619295958279, - "grad_norm": 1.3423919677734375, - "learning_rate": 8.340301507537689e-05, - "loss": 5.7147, - "step": 17019 - }, - { - "epoch": 8.876140808344198, - "grad_norm": 1.381288766860962, - "learning_rate": 8.340201005025125e-05, - "loss": 5.3398, - "step": 17020 - }, - { - "epoch": 8.876662320730118, - "grad_norm": 1.491529941558838, - "learning_rate": 8.340100502512563e-05, - "loss": 5.7879, - "step": 17021 - }, - { - "epoch": 8.877183833116037, - "grad_norm": 1.4192219972610474, - "learning_rate": 8.34e-05, - "loss": 5.5968, - "step": 17022 - }, - { - "epoch": 8.877705345501955, - "grad_norm": 1.439959168434143, - "learning_rate": 8.339899497487437e-05, - "loss": 5.4637, - "step": 17023 - }, - { - "epoch": 8.878226857887874, - "grad_norm": 1.5505597591400146, - "learning_rate": 8.339798994974875e-05, - "loss": 5.1219, - "step": 17024 - }, - { - "epoch": 8.878748370273794, - "grad_norm": 1.4464688301086426, - "learning_rate": 8.339698492462313e-05, - "loss": 5.3009, - "step": 17025 - }, - { - "epoch": 8.879269882659713, - "grad_norm": 1.4401503801345825, - "learning_rate": 8.339597989949749e-05, - "loss": 5.6133, - "step": 17026 - }, - { - "epoch": 8.879791395045633, - "grad_norm": 1.3498351573944092, - "learning_rate": 8.339497487437187e-05, - "loss": 5.6691, - "step": 17027 - }, - { - "epoch": 8.880312907431552, - "grad_norm": 1.4354692697525024, - "learning_rate": 8.339396984924623e-05, - "loss": 5.7865, - "step": 17028 - }, - { - "epoch": 8.880834419817472, - "grad_norm": 1.3337249755859375, - "learning_rate": 8.339296482412061e-05, - "loss": 5.9128, - "step": 17029 - }, - { - "epoch": 8.88135593220339, - "grad_norm": 1.3404130935668945, - "learning_rate": 8.339195979899498e-05, - "loss": 5.3865, - "step": 17030 - }, - { - "epoch": 8.881877444589309, - "grad_norm": 1.3697166442871094, - "learning_rate": 8.339095477386934e-05, - "loss": 5.7218, - "step": 17031 - }, - { - "epoch": 8.882398956975228, - "grad_norm": 1.3653641939163208, - "learning_rate": 8.338994974874372e-05, - "loss": 5.6249, - "step": 17032 - }, - { - "epoch": 8.882920469361148, - "grad_norm": 1.5404266119003296, - "learning_rate": 8.33889447236181e-05, - "loss": 5.7161, - "step": 17033 - }, - { - "epoch": 8.883441981747067, - "grad_norm": 1.2855253219604492, - "learning_rate": 8.338793969849247e-05, - "loss": 4.8932, - "step": 17034 - }, - { - "epoch": 8.883963494132985, - "grad_norm": 1.4245060682296753, - "learning_rate": 8.338693467336684e-05, - "loss": 5.2884, - "step": 17035 - }, - { - "epoch": 8.884485006518904, - "grad_norm": 1.217742681503296, - "learning_rate": 8.338592964824122e-05, - "loss": 5.6901, - "step": 17036 - }, - { - "epoch": 8.885006518904824, - "grad_norm": 1.3787429332733154, - "learning_rate": 8.338492462311558e-05, - "loss": 5.0566, - "step": 17037 - }, - { - "epoch": 8.885528031290743, - "grad_norm": 1.331830620765686, - "learning_rate": 8.338391959798996e-05, - "loss": 5.7823, - "step": 17038 - }, - { - "epoch": 8.886049543676663, - "grad_norm": 1.393920660018921, - "learning_rate": 8.338291457286432e-05, - "loss": 5.4975, - "step": 17039 - }, - { - "epoch": 8.886571056062582, - "grad_norm": 1.3951112031936646, - "learning_rate": 8.33819095477387e-05, - "loss": 5.7929, - "step": 17040 - }, - { - "epoch": 8.887092568448502, - "grad_norm": 1.4013617038726807, - "learning_rate": 8.338090452261306e-05, - "loss": 5.6447, - "step": 17041 - }, - { - "epoch": 8.88761408083442, - "grad_norm": 1.4405478239059448, - "learning_rate": 8.337989949748744e-05, - "loss": 4.9556, - "step": 17042 - }, - { - "epoch": 8.888135593220339, - "grad_norm": 1.5156874656677246, - "learning_rate": 8.33788944723618e-05, - "loss": 5.0333, - "step": 17043 - }, - { - "epoch": 8.888657105606258, - "grad_norm": 1.5013337135314941, - "learning_rate": 8.337788944723618e-05, - "loss": 5.6318, - "step": 17044 - }, - { - "epoch": 8.889178617992178, - "grad_norm": 1.3334108591079712, - "learning_rate": 8.337688442211056e-05, - "loss": 5.8097, - "step": 17045 - }, - { - "epoch": 8.889700130378097, - "grad_norm": 1.4015549421310425, - "learning_rate": 8.337587939698493e-05, - "loss": 5.6382, - "step": 17046 - }, - { - "epoch": 8.890221642764015, - "grad_norm": 1.3645899295806885, - "learning_rate": 8.33748743718593e-05, - "loss": 4.9561, - "step": 17047 - }, - { - "epoch": 8.890743155149934, - "grad_norm": 1.3264247179031372, - "learning_rate": 8.337386934673367e-05, - "loss": 5.4444, - "step": 17048 - }, - { - "epoch": 8.891264667535854, - "grad_norm": 1.3543262481689453, - "learning_rate": 8.337286432160805e-05, - "loss": 5.6786, - "step": 17049 - }, - { - "epoch": 8.891786179921773, - "grad_norm": 1.3420767784118652, - "learning_rate": 8.337185929648241e-05, - "loss": 5.7155, - "step": 17050 - }, - { - "epoch": 8.892307692307693, - "grad_norm": 1.298834204673767, - "learning_rate": 8.337085427135679e-05, - "loss": 6.0569, - "step": 17051 - }, - { - "epoch": 8.892829204693612, - "grad_norm": 1.3356413841247559, - "learning_rate": 8.336984924623115e-05, - "loss": 5.9322, - "step": 17052 - }, - { - "epoch": 8.89335071707953, - "grad_norm": 1.4659143686294556, - "learning_rate": 8.336884422110553e-05, - "loss": 5.6301, - "step": 17053 - }, - { - "epoch": 8.89387222946545, - "grad_norm": 1.458458423614502, - "learning_rate": 8.336783919597991e-05, - "loss": 5.5794, - "step": 17054 - }, - { - "epoch": 8.894393741851369, - "grad_norm": 1.4793978929519653, - "learning_rate": 8.336683417085429e-05, - "loss": 5.2095, - "step": 17055 - }, - { - "epoch": 8.894915254237288, - "grad_norm": 1.438133955001831, - "learning_rate": 8.336582914572865e-05, - "loss": 5.4103, - "step": 17056 - }, - { - "epoch": 8.895436766623208, - "grad_norm": 1.3856194019317627, - "learning_rate": 8.336482412060303e-05, - "loss": 5.7321, - "step": 17057 - }, - { - "epoch": 8.895958279009127, - "grad_norm": 1.6212068796157837, - "learning_rate": 8.336381909547739e-05, - "loss": 5.4017, - "step": 17058 - }, - { - "epoch": 8.896479791395045, - "grad_norm": 1.5043728351593018, - "learning_rate": 8.336281407035176e-05, - "loss": 5.3008, - "step": 17059 - }, - { - "epoch": 8.897001303780964, - "grad_norm": 1.4352198839187622, - "learning_rate": 8.336180904522613e-05, - "loss": 5.4944, - "step": 17060 - }, - { - "epoch": 8.897522816166884, - "grad_norm": 1.417343258857727, - "learning_rate": 8.33608040201005e-05, - "loss": 5.656, - "step": 17061 - }, - { - "epoch": 8.898044328552803, - "grad_norm": 1.6876219511032104, - "learning_rate": 8.335979899497488e-05, - "loss": 5.3495, - "step": 17062 - }, - { - "epoch": 8.898565840938723, - "grad_norm": 1.3979650735855103, - "learning_rate": 8.335879396984924e-05, - "loss": 5.4753, - "step": 17063 - }, - { - "epoch": 8.899087353324642, - "grad_norm": 1.3552018404006958, - "learning_rate": 8.335778894472362e-05, - "loss": 5.235, - "step": 17064 - }, - { - "epoch": 8.89960886571056, - "grad_norm": 1.5721842050552368, - "learning_rate": 8.3356783919598e-05, - "loss": 5.2712, - "step": 17065 - }, - { - "epoch": 8.90013037809648, - "grad_norm": 1.8373327255249023, - "learning_rate": 8.335577889447237e-05, - "loss": 5.2961, - "step": 17066 - }, - { - "epoch": 8.900651890482399, - "grad_norm": 1.507252812385559, - "learning_rate": 8.335477386934674e-05, - "loss": 5.9541, - "step": 17067 - }, - { - "epoch": 8.901173402868318, - "grad_norm": 1.4344172477722168, - "learning_rate": 8.335376884422112e-05, - "loss": 5.3591, - "step": 17068 - }, - { - "epoch": 8.901694915254238, - "grad_norm": 1.2831131219863892, - "learning_rate": 8.335276381909548e-05, - "loss": 5.7469, - "step": 17069 - }, - { - "epoch": 8.902216427640157, - "grad_norm": 1.346082329750061, - "learning_rate": 8.335175879396986e-05, - "loss": 5.4401, - "step": 17070 - }, - { - "epoch": 8.902737940026075, - "grad_norm": 1.490307092666626, - "learning_rate": 8.335075376884422e-05, - "loss": 5.2968, - "step": 17071 - }, - { - "epoch": 8.903259452411994, - "grad_norm": 1.3915188312530518, - "learning_rate": 8.334974874371859e-05, - "loss": 5.7367, - "step": 17072 - }, - { - "epoch": 8.903780964797914, - "grad_norm": 1.520064353942871, - "learning_rate": 8.334874371859297e-05, - "loss": 5.4297, - "step": 17073 - }, - { - "epoch": 8.904302477183833, - "grad_norm": 1.380345344543457, - "learning_rate": 8.334773869346734e-05, - "loss": 5.5236, - "step": 17074 - }, - { - "epoch": 8.904823989569753, - "grad_norm": 1.365453839302063, - "learning_rate": 8.334673366834172e-05, - "loss": 5.3083, - "step": 17075 - }, - { - "epoch": 8.905345501955672, - "grad_norm": 1.5276308059692383, - "learning_rate": 8.334572864321609e-05, - "loss": 4.6554, - "step": 17076 - }, - { - "epoch": 8.90586701434159, - "grad_norm": 1.5000911951065063, - "learning_rate": 8.334472361809046e-05, - "loss": 5.7593, - "step": 17077 - }, - { - "epoch": 8.90638852672751, - "grad_norm": 1.5297530889511108, - "learning_rate": 8.334371859296483e-05, - "loss": 5.373, - "step": 17078 - }, - { - "epoch": 8.906910039113429, - "grad_norm": 1.4709762334823608, - "learning_rate": 8.33427135678392e-05, - "loss": 5.7279, - "step": 17079 - }, - { - "epoch": 8.907431551499348, - "grad_norm": 1.4669662714004517, - "learning_rate": 8.334170854271357e-05, - "loss": 5.0233, - "step": 17080 - }, - { - "epoch": 8.907953063885268, - "grad_norm": 1.3567931652069092, - "learning_rate": 8.334070351758795e-05, - "loss": 5.818, - "step": 17081 - }, - { - "epoch": 8.908474576271187, - "grad_norm": 1.4955748319625854, - "learning_rate": 8.333969849246231e-05, - "loss": 5.5165, - "step": 17082 - }, - { - "epoch": 8.908996088657105, - "grad_norm": 1.8687615394592285, - "learning_rate": 8.333869346733669e-05, - "loss": 5.3448, - "step": 17083 - }, - { - "epoch": 8.909517601043024, - "grad_norm": 1.4678385257720947, - "learning_rate": 8.333768844221105e-05, - "loss": 4.9365, - "step": 17084 - }, - { - "epoch": 8.910039113428944, - "grad_norm": 1.36420476436615, - "learning_rate": 8.333668341708543e-05, - "loss": 5.7715, - "step": 17085 - }, - { - "epoch": 8.910560625814863, - "grad_norm": 1.4271156787872314, - "learning_rate": 8.333567839195981e-05, - "loss": 5.3347, - "step": 17086 - }, - { - "epoch": 8.911082138200783, - "grad_norm": 1.390344262123108, - "learning_rate": 8.333467336683417e-05, - "loss": 5.6018, - "step": 17087 - }, - { - "epoch": 8.911603650586702, - "grad_norm": 1.4372127056121826, - "learning_rate": 8.333366834170855e-05, - "loss": 5.3205, - "step": 17088 - }, - { - "epoch": 8.91212516297262, - "grad_norm": 1.480141043663025, - "learning_rate": 8.333266331658292e-05, - "loss": 5.5567, - "step": 17089 - }, - { - "epoch": 8.91264667535854, - "grad_norm": 1.500901222229004, - "learning_rate": 8.33316582914573e-05, - "loss": 5.497, - "step": 17090 - }, - { - "epoch": 8.913168187744459, - "grad_norm": 1.415892243385315, - "learning_rate": 8.333065326633166e-05, - "loss": 5.5679, - "step": 17091 - }, - { - "epoch": 8.913689700130378, - "grad_norm": 1.5265836715698242, - "learning_rate": 8.332964824120604e-05, - "loss": 5.1616, - "step": 17092 - }, - { - "epoch": 8.914211212516298, - "grad_norm": 1.3410894870758057, - "learning_rate": 8.33286432160804e-05, - "loss": 5.7341, - "step": 17093 - }, - { - "epoch": 8.914732724902217, - "grad_norm": 1.4134284257888794, - "learning_rate": 8.332763819095478e-05, - "loss": 5.2393, - "step": 17094 - }, - { - "epoch": 8.915254237288135, - "grad_norm": 1.4884827136993408, - "learning_rate": 8.332663316582914e-05, - "loss": 5.4105, - "step": 17095 - }, - { - "epoch": 8.915775749674054, - "grad_norm": 1.4100183248519897, - "learning_rate": 8.332562814070352e-05, - "loss": 5.5505, - "step": 17096 - }, - { - "epoch": 8.916297262059974, - "grad_norm": 1.3131128549575806, - "learning_rate": 8.33246231155779e-05, - "loss": 5.3032, - "step": 17097 - }, - { - "epoch": 8.916818774445893, - "grad_norm": 1.5243116617202759, - "learning_rate": 8.332361809045226e-05, - "loss": 5.8592, - "step": 17098 - }, - { - "epoch": 8.917340286831813, - "grad_norm": 1.4357651472091675, - "learning_rate": 8.332261306532664e-05, - "loss": 5.3969, - "step": 17099 - }, - { - "epoch": 8.917861799217732, - "grad_norm": 1.4844194650650024, - "learning_rate": 8.3321608040201e-05, - "loss": 5.5607, - "step": 17100 - }, - { - "epoch": 8.91838331160365, - "grad_norm": 1.4648679494857788, - "learning_rate": 8.332060301507538e-05, - "loss": 5.7571, - "step": 17101 - }, - { - "epoch": 8.91890482398957, - "grad_norm": 1.299300193786621, - "learning_rate": 8.331959798994975e-05, - "loss": 5.4771, - "step": 17102 - }, - { - "epoch": 8.919426336375489, - "grad_norm": 1.5151951313018799, - "learning_rate": 8.331859296482412e-05, - "loss": 5.4314, - "step": 17103 - }, - { - "epoch": 8.919947848761408, - "grad_norm": 1.5636613368988037, - "learning_rate": 8.331758793969849e-05, - "loss": 4.9766, - "step": 17104 - }, - { - "epoch": 8.920469361147328, - "grad_norm": 1.5324138402938843, - "learning_rate": 8.331658291457287e-05, - "loss": 5.1772, - "step": 17105 - }, - { - "epoch": 8.920990873533245, - "grad_norm": 1.3634483814239502, - "learning_rate": 8.331557788944724e-05, - "loss": 5.55, - "step": 17106 - }, - { - "epoch": 8.921512385919165, - "grad_norm": 1.3414641618728638, - "learning_rate": 8.331457286432162e-05, - "loss": 5.5048, - "step": 17107 - }, - { - "epoch": 8.922033898305084, - "grad_norm": 1.5255392789840698, - "learning_rate": 8.331356783919599e-05, - "loss": 5.3767, - "step": 17108 - }, - { - "epoch": 8.922555410691004, - "grad_norm": 1.3961032629013062, - "learning_rate": 8.331256281407036e-05, - "loss": 5.1272, - "step": 17109 - }, - { - "epoch": 8.923076923076923, - "grad_norm": 1.5240453481674194, - "learning_rate": 8.331155778894473e-05, - "loss": 5.6604, - "step": 17110 - }, - { - "epoch": 8.923598435462843, - "grad_norm": 1.4089477062225342, - "learning_rate": 8.331055276381909e-05, - "loss": 5.3614, - "step": 17111 - }, - { - "epoch": 8.924119947848762, - "grad_norm": 1.4352165460586548, - "learning_rate": 8.330954773869347e-05, - "loss": 5.1746, - "step": 17112 - }, - { - "epoch": 8.92464146023468, - "grad_norm": 1.444793939590454, - "learning_rate": 8.330854271356783e-05, - "loss": 5.4358, - "step": 17113 - }, - { - "epoch": 8.9251629726206, - "grad_norm": 1.3942513465881348, - "learning_rate": 8.330753768844221e-05, - "loss": 5.9376, - "step": 17114 - }, - { - "epoch": 8.925684485006519, - "grad_norm": 1.451870322227478, - "learning_rate": 8.330653266331658e-05, - "loss": 5.8044, - "step": 17115 - }, - { - "epoch": 8.926205997392438, - "grad_norm": 1.3676447868347168, - "learning_rate": 8.330552763819095e-05, - "loss": 5.8257, - "step": 17116 - }, - { - "epoch": 8.926727509778358, - "grad_norm": 1.4487427473068237, - "learning_rate": 8.330452261306533e-05, - "loss": 5.64, - "step": 17117 - }, - { - "epoch": 8.927249022164276, - "grad_norm": 1.390188455581665, - "learning_rate": 8.330351758793971e-05, - "loss": 5.5017, - "step": 17118 - }, - { - "epoch": 8.927770534550195, - "grad_norm": 1.4026213884353638, - "learning_rate": 8.330251256281407e-05, - "loss": 5.6648, - "step": 17119 - }, - { - "epoch": 8.928292046936114, - "grad_norm": 1.46074640750885, - "learning_rate": 8.330150753768845e-05, - "loss": 4.9842, - "step": 17120 - }, - { - "epoch": 8.928813559322034, - "grad_norm": 1.4290302991867065, - "learning_rate": 8.330050251256282e-05, - "loss": 5.3563, - "step": 17121 - }, - { - "epoch": 8.929335071707953, - "grad_norm": 1.4640207290649414, - "learning_rate": 8.32994974874372e-05, - "loss": 5.4237, - "step": 17122 - }, - { - "epoch": 8.929856584093873, - "grad_norm": 1.5441086292266846, - "learning_rate": 8.329849246231156e-05, - "loss": 5.4458, - "step": 17123 - }, - { - "epoch": 8.930378096479792, - "grad_norm": 1.6134448051452637, - "learning_rate": 8.329748743718592e-05, - "loss": 5.5827, - "step": 17124 - }, - { - "epoch": 8.93089960886571, - "grad_norm": 1.3906546831130981, - "learning_rate": 8.32964824120603e-05, - "loss": 5.8959, - "step": 17125 - }, - { - "epoch": 8.93142112125163, - "grad_norm": 1.3615833520889282, - "learning_rate": 8.329547738693468e-05, - "loss": 5.5994, - "step": 17126 - }, - { - "epoch": 8.931942633637549, - "grad_norm": 1.5156500339508057, - "learning_rate": 8.329447236180906e-05, - "loss": 4.4772, - "step": 17127 - }, - { - "epoch": 8.932464146023468, - "grad_norm": 1.4692529439926147, - "learning_rate": 8.329346733668342e-05, - "loss": 5.3978, - "step": 17128 - }, - { - "epoch": 8.932985658409388, - "grad_norm": 1.4153026342391968, - "learning_rate": 8.32924623115578e-05, - "loss": 5.4921, - "step": 17129 - }, - { - "epoch": 8.933507170795306, - "grad_norm": 1.3382090330123901, - "learning_rate": 8.329145728643216e-05, - "loss": 5.7855, - "step": 17130 - }, - { - "epoch": 8.934028683181225, - "grad_norm": 1.4358071088790894, - "learning_rate": 8.329045226130654e-05, - "loss": 5.7591, - "step": 17131 - }, - { - "epoch": 8.934550195567144, - "grad_norm": 1.3905256986618042, - "learning_rate": 8.32894472361809e-05, - "loss": 5.4849, - "step": 17132 - }, - { - "epoch": 8.935071707953064, - "grad_norm": 1.313779354095459, - "learning_rate": 8.328844221105528e-05, - "loss": 5.8745, - "step": 17133 - }, - { - "epoch": 8.935593220338983, - "grad_norm": 1.3895442485809326, - "learning_rate": 8.328743718592965e-05, - "loss": 5.4586, - "step": 17134 - }, - { - "epoch": 8.936114732724903, - "grad_norm": 1.3851535320281982, - "learning_rate": 8.328643216080402e-05, - "loss": 5.3865, - "step": 17135 - }, - { - "epoch": 8.93663624511082, - "grad_norm": 1.594976544380188, - "learning_rate": 8.328542713567839e-05, - "loss": 4.9754, - "step": 17136 - }, - { - "epoch": 8.93715775749674, - "grad_norm": 1.4534224271774292, - "learning_rate": 8.328442211055277e-05, - "loss": 5.239, - "step": 17137 - }, - { - "epoch": 8.93767926988266, - "grad_norm": 1.4070262908935547, - "learning_rate": 8.328341708542714e-05, - "loss": 4.8981, - "step": 17138 - }, - { - "epoch": 8.938200782268579, - "grad_norm": 1.4809869527816772, - "learning_rate": 8.328241206030151e-05, - "loss": 5.8516, - "step": 17139 - }, - { - "epoch": 8.938722294654498, - "grad_norm": 1.370180368423462, - "learning_rate": 8.328140703517589e-05, - "loss": 5.5463, - "step": 17140 - }, - { - "epoch": 8.939243807040418, - "grad_norm": 1.2913397550582886, - "learning_rate": 8.328040201005025e-05, - "loss": 5.7666, - "step": 17141 - }, - { - "epoch": 8.939765319426336, - "grad_norm": 1.4915170669555664, - "learning_rate": 8.327939698492463e-05, - "loss": 5.5432, - "step": 17142 - }, - { - "epoch": 8.940286831812255, - "grad_norm": 1.4622982740402222, - "learning_rate": 8.327839195979899e-05, - "loss": 5.4715, - "step": 17143 - }, - { - "epoch": 8.940808344198174, - "grad_norm": 1.4105147123336792, - "learning_rate": 8.327738693467337e-05, - "loss": 5.797, - "step": 17144 - }, - { - "epoch": 8.941329856584094, - "grad_norm": 1.4037774801254272, - "learning_rate": 8.327638190954774e-05, - "loss": 5.0832, - "step": 17145 - }, - { - "epoch": 8.941851368970013, - "grad_norm": 1.387237310409546, - "learning_rate": 8.327537688442211e-05, - "loss": 5.2959, - "step": 17146 - }, - { - "epoch": 8.942372881355933, - "grad_norm": 1.4382861852645874, - "learning_rate": 8.327437185929649e-05, - "loss": 5.4297, - "step": 17147 - }, - { - "epoch": 8.94289439374185, - "grad_norm": 1.4246585369110107, - "learning_rate": 8.327336683417087e-05, - "loss": 5.8101, - "step": 17148 - }, - { - "epoch": 8.94341590612777, - "grad_norm": 1.3523129224777222, - "learning_rate": 8.327236180904523e-05, - "loss": 5.7402, - "step": 17149 - }, - { - "epoch": 8.94393741851369, - "grad_norm": 1.4960265159606934, - "learning_rate": 8.327135678391961e-05, - "loss": 5.2109, - "step": 17150 - }, - { - "epoch": 8.944458930899609, - "grad_norm": 1.402561068534851, - "learning_rate": 8.327035175879398e-05, - "loss": 5.6836, - "step": 17151 - }, - { - "epoch": 8.944980443285528, - "grad_norm": 1.3994759321212769, - "learning_rate": 8.326934673366834e-05, - "loss": 5.6714, - "step": 17152 - }, - { - "epoch": 8.945501955671448, - "grad_norm": 1.4177947044372559, - "learning_rate": 8.326834170854272e-05, - "loss": 5.4891, - "step": 17153 - }, - { - "epoch": 8.946023468057366, - "grad_norm": 1.376839518547058, - "learning_rate": 8.326733668341708e-05, - "loss": 5.7043, - "step": 17154 - }, - { - "epoch": 8.946544980443285, - "grad_norm": 1.4017196893692017, - "learning_rate": 8.326633165829146e-05, - "loss": 5.3827, - "step": 17155 - }, - { - "epoch": 8.947066492829205, - "grad_norm": 1.3988215923309326, - "learning_rate": 8.326532663316582e-05, - "loss": 5.4928, - "step": 17156 - }, - { - "epoch": 8.947588005215124, - "grad_norm": 1.4062068462371826, - "learning_rate": 8.32643216080402e-05, - "loss": 5.5322, - "step": 17157 - }, - { - "epoch": 8.948109517601043, - "grad_norm": 1.5087904930114746, - "learning_rate": 8.326331658291458e-05, - "loss": 5.2961, - "step": 17158 - }, - { - "epoch": 8.948631029986963, - "grad_norm": 1.4344910383224487, - "learning_rate": 8.326231155778896e-05, - "loss": 5.5631, - "step": 17159 - }, - { - "epoch": 8.94915254237288, - "grad_norm": 1.397058367729187, - "learning_rate": 8.326130653266332e-05, - "loss": 5.1979, - "step": 17160 - }, - { - "epoch": 8.9496740547588, - "grad_norm": 1.4380309581756592, - "learning_rate": 8.32603015075377e-05, - "loss": 5.1578, - "step": 17161 - }, - { - "epoch": 8.95019556714472, - "grad_norm": 1.4572501182556152, - "learning_rate": 8.325929648241206e-05, - "loss": 5.9931, - "step": 17162 - }, - { - "epoch": 8.950717079530639, - "grad_norm": 1.3975975513458252, - "learning_rate": 8.325829145728644e-05, - "loss": 5.3183, - "step": 17163 - }, - { - "epoch": 8.951238591916558, - "grad_norm": 1.431423306465149, - "learning_rate": 8.32572864321608e-05, - "loss": 5.472, - "step": 17164 - }, - { - "epoch": 8.951760104302478, - "grad_norm": 1.4654908180236816, - "learning_rate": 8.325628140703517e-05, - "loss": 5.6797, - "step": 17165 - }, - { - "epoch": 8.952281616688396, - "grad_norm": 1.4292670488357544, - "learning_rate": 8.325527638190955e-05, - "loss": 5.6124, - "step": 17166 - }, - { - "epoch": 8.952803129074315, - "grad_norm": 1.3400132656097412, - "learning_rate": 8.325427135678393e-05, - "loss": 5.7285, - "step": 17167 - }, - { - "epoch": 8.953324641460235, - "grad_norm": 1.3689686059951782, - "learning_rate": 8.32532663316583e-05, - "loss": 5.5468, - "step": 17168 - }, - { - "epoch": 8.953846153846154, - "grad_norm": 1.452040195465088, - "learning_rate": 8.325226130653267e-05, - "loss": 5.4681, - "step": 17169 - }, - { - "epoch": 8.954367666232073, - "grad_norm": 1.5705493688583374, - "learning_rate": 8.325125628140705e-05, - "loss": 5.0562, - "step": 17170 - }, - { - "epoch": 8.954889178617993, - "grad_norm": 1.4372966289520264, - "learning_rate": 8.325025125628141e-05, - "loss": 5.3552, - "step": 17171 - }, - { - "epoch": 8.95541069100391, - "grad_norm": 1.3601624965667725, - "learning_rate": 8.324924623115579e-05, - "loss": 5.5806, - "step": 17172 - }, - { - "epoch": 8.95593220338983, - "grad_norm": 1.4037408828735352, - "learning_rate": 8.324824120603015e-05, - "loss": 5.25, - "step": 17173 - }, - { - "epoch": 8.95645371577575, - "grad_norm": 1.609368085861206, - "learning_rate": 8.324723618090453e-05, - "loss": 5.4643, - "step": 17174 - }, - { - "epoch": 8.956975228161669, - "grad_norm": 1.3728997707366943, - "learning_rate": 8.32462311557789e-05, - "loss": 5.878, - "step": 17175 - }, - { - "epoch": 8.957496740547588, - "grad_norm": 1.404362678527832, - "learning_rate": 8.324522613065327e-05, - "loss": 5.4491, - "step": 17176 - }, - { - "epoch": 8.958018252933508, - "grad_norm": 1.4871209859848022, - "learning_rate": 8.324422110552764e-05, - "loss": 5.4258, - "step": 17177 - }, - { - "epoch": 8.958539765319426, - "grad_norm": 1.4671587944030762, - "learning_rate": 8.324321608040201e-05, - "loss": 5.1731, - "step": 17178 - }, - { - "epoch": 8.959061277705345, - "grad_norm": 1.4353214502334595, - "learning_rate": 8.324221105527639e-05, - "loss": 5.6285, - "step": 17179 - }, - { - "epoch": 8.959582790091265, - "grad_norm": 1.5662236213684082, - "learning_rate": 8.324120603015076e-05, - "loss": 5.4203, - "step": 17180 - }, - { - "epoch": 8.960104302477184, - "grad_norm": 1.4547046422958374, - "learning_rate": 8.324020100502513e-05, - "loss": 5.2743, - "step": 17181 - }, - { - "epoch": 8.960625814863103, - "grad_norm": 1.475999355316162, - "learning_rate": 8.32391959798995e-05, - "loss": 5.459, - "step": 17182 - }, - { - "epoch": 8.961147327249023, - "grad_norm": 1.5241751670837402, - "learning_rate": 8.323819095477388e-05, - "loss": 5.6888, - "step": 17183 - }, - { - "epoch": 8.96166883963494, - "grad_norm": 1.439688801765442, - "learning_rate": 8.323718592964824e-05, - "loss": 5.8426, - "step": 17184 - }, - { - "epoch": 8.96219035202086, - "grad_norm": 1.4469650983810425, - "learning_rate": 8.323618090452262e-05, - "loss": 5.7653, - "step": 17185 - }, - { - "epoch": 8.96271186440678, - "grad_norm": 1.5088584423065186, - "learning_rate": 8.323517587939698e-05, - "loss": 5.8094, - "step": 17186 - }, - { - "epoch": 8.963233376792699, - "grad_norm": 1.3987655639648438, - "learning_rate": 8.323417085427136e-05, - "loss": 5.6686, - "step": 17187 - }, - { - "epoch": 8.963754889178619, - "grad_norm": 1.471657633781433, - "learning_rate": 8.323316582914574e-05, - "loss": 5.4917, - "step": 17188 - }, - { - "epoch": 8.964276401564538, - "grad_norm": 1.5371453762054443, - "learning_rate": 8.323216080402012e-05, - "loss": 5.5069, - "step": 17189 - }, - { - "epoch": 8.964797913950456, - "grad_norm": 1.4816153049468994, - "learning_rate": 8.323115577889448e-05, - "loss": 5.6106, - "step": 17190 - }, - { - "epoch": 8.965319426336375, - "grad_norm": 1.3056435585021973, - "learning_rate": 8.323015075376884e-05, - "loss": 5.6304, - "step": 17191 - }, - { - "epoch": 8.965840938722295, - "grad_norm": 1.4310269355773926, - "learning_rate": 8.322914572864322e-05, - "loss": 5.7725, - "step": 17192 - }, - { - "epoch": 8.966362451108214, - "grad_norm": 1.3863972425460815, - "learning_rate": 8.322814070351759e-05, - "loss": 5.4714, - "step": 17193 - }, - { - "epoch": 8.966883963494134, - "grad_norm": 1.6251235008239746, - "learning_rate": 8.322713567839196e-05, - "loss": 5.3596, - "step": 17194 - }, - { - "epoch": 8.967405475880053, - "grad_norm": 1.4058809280395508, - "learning_rate": 8.322613065326633e-05, - "loss": 5.3678, - "step": 17195 - }, - { - "epoch": 8.96792698826597, - "grad_norm": 1.4945447444915771, - "learning_rate": 8.32251256281407e-05, - "loss": 5.4943, - "step": 17196 - }, - { - "epoch": 8.96844850065189, - "grad_norm": 1.4155802726745605, - "learning_rate": 8.322412060301507e-05, - "loss": 5.3265, - "step": 17197 - }, - { - "epoch": 8.96897001303781, - "grad_norm": 1.5083956718444824, - "learning_rate": 8.322311557788945e-05, - "loss": 5.5104, - "step": 17198 - }, - { - "epoch": 8.969491525423729, - "grad_norm": 1.5089318752288818, - "learning_rate": 8.322211055276383e-05, - "loss": 5.0366, - "step": 17199 - }, - { - "epoch": 8.970013037809649, - "grad_norm": 1.4194135665893555, - "learning_rate": 8.32211055276382e-05, - "loss": 5.6467, - "step": 17200 - }, - { - "epoch": 8.970534550195566, - "grad_norm": 1.4024479389190674, - "learning_rate": 8.322010050251257e-05, - "loss": 5.5535, - "step": 17201 - }, - { - "epoch": 8.971056062581486, - "grad_norm": 1.4617587327957153, - "learning_rate": 8.321909547738695e-05, - "loss": 5.131, - "step": 17202 - }, - { - "epoch": 8.971577574967405, - "grad_norm": 1.3386296033859253, - "learning_rate": 8.321809045226131e-05, - "loss": 5.2928, - "step": 17203 - }, - { - "epoch": 8.972099087353325, - "grad_norm": 1.3522703647613525, - "learning_rate": 8.321708542713567e-05, - "loss": 4.5587, - "step": 17204 - }, - { - "epoch": 8.972620599739244, - "grad_norm": 1.4241321086883545, - "learning_rate": 8.321608040201005e-05, - "loss": 5.3886, - "step": 17205 - }, - { - "epoch": 8.973142112125164, - "grad_norm": 1.6165413856506348, - "learning_rate": 8.321507537688442e-05, - "loss": 5.2495, - "step": 17206 - }, - { - "epoch": 8.973663624511083, - "grad_norm": 1.4865392446517944, - "learning_rate": 8.32140703517588e-05, - "loss": 5.6458, - "step": 17207 - }, - { - "epoch": 8.974185136897, - "grad_norm": 1.3910518884658813, - "learning_rate": 8.321306532663317e-05, - "loss": 5.6272, - "step": 17208 - }, - { - "epoch": 8.97470664928292, - "grad_norm": 1.538120150566101, - "learning_rate": 8.321206030150755e-05, - "loss": 4.7425, - "step": 17209 - }, - { - "epoch": 8.97522816166884, - "grad_norm": 1.379615306854248, - "learning_rate": 8.321105527638191e-05, - "loss": 5.5212, - "step": 17210 - }, - { - "epoch": 8.975749674054759, - "grad_norm": 1.3391060829162598, - "learning_rate": 8.321005025125629e-05, - "loss": 5.874, - "step": 17211 - }, - { - "epoch": 8.976271186440679, - "grad_norm": 1.5537376403808594, - "learning_rate": 8.320904522613066e-05, - "loss": 5.3268, - "step": 17212 - }, - { - "epoch": 8.976792698826596, - "grad_norm": 1.719254732131958, - "learning_rate": 8.320804020100503e-05, - "loss": 4.8647, - "step": 17213 - }, - { - "epoch": 8.977314211212516, - "grad_norm": 1.3998106718063354, - "learning_rate": 8.32070351758794e-05, - "loss": 5.1449, - "step": 17214 - }, - { - "epoch": 8.977835723598435, - "grad_norm": 1.490836501121521, - "learning_rate": 8.320603015075378e-05, - "loss": 5.778, - "step": 17215 - }, - { - "epoch": 8.978357235984355, - "grad_norm": 1.3912994861602783, - "learning_rate": 8.320502512562814e-05, - "loss": 5.5397, - "step": 17216 - }, - { - "epoch": 8.978878748370274, - "grad_norm": 1.4528592824935913, - "learning_rate": 8.32040201005025e-05, - "loss": 5.7737, - "step": 17217 - }, - { - "epoch": 8.979400260756194, - "grad_norm": 1.454712986946106, - "learning_rate": 8.320301507537688e-05, - "loss": 5.1977, - "step": 17218 - }, - { - "epoch": 8.979921773142113, - "grad_norm": 1.427297830581665, - "learning_rate": 8.320201005025126e-05, - "loss": 5.6389, - "step": 17219 - }, - { - "epoch": 8.98044328552803, - "grad_norm": 1.6764613389968872, - "learning_rate": 8.320100502512564e-05, - "loss": 5.2489, - "step": 17220 - }, - { - "epoch": 8.98096479791395, - "grad_norm": 1.4285928010940552, - "learning_rate": 8.32e-05, - "loss": 5.6645, - "step": 17221 - }, - { - "epoch": 8.98148631029987, - "grad_norm": 1.5021356344223022, - "learning_rate": 8.319899497487438e-05, - "loss": 5.5771, - "step": 17222 - }, - { - "epoch": 8.98200782268579, - "grad_norm": 1.5852656364440918, - "learning_rate": 8.319798994974875e-05, - "loss": 5.3452, - "step": 17223 - }, - { - "epoch": 8.982529335071709, - "grad_norm": 1.424446940422058, - "learning_rate": 8.319698492462312e-05, - "loss": 5.664, - "step": 17224 - }, - { - "epoch": 8.983050847457626, - "grad_norm": 1.4138952493667603, - "learning_rate": 8.319597989949749e-05, - "loss": 5.0664, - "step": 17225 - }, - { - "epoch": 8.983572359843546, - "grad_norm": 1.50666344165802, - "learning_rate": 8.319497487437187e-05, - "loss": 5.565, - "step": 17226 - }, - { - "epoch": 8.984093872229465, - "grad_norm": 1.3904057741165161, - "learning_rate": 8.319396984924623e-05, - "loss": 5.6808, - "step": 17227 - }, - { - "epoch": 8.984615384615385, - "grad_norm": 1.38749098777771, - "learning_rate": 8.319296482412061e-05, - "loss": 5.6827, - "step": 17228 - }, - { - "epoch": 8.985136897001304, - "grad_norm": 1.340425729751587, - "learning_rate": 8.319195979899498e-05, - "loss": 5.8358, - "step": 17229 - }, - { - "epoch": 8.985658409387224, - "grad_norm": 1.5228389501571655, - "learning_rate": 8.319095477386936e-05, - "loss": 5.3213, - "step": 17230 - }, - { - "epoch": 8.986179921773141, - "grad_norm": 1.5093159675598145, - "learning_rate": 8.318994974874373e-05, - "loss": 5.0826, - "step": 17231 - }, - { - "epoch": 8.98670143415906, - "grad_norm": 1.4177839756011963, - "learning_rate": 8.318894472361809e-05, - "loss": 4.9367, - "step": 17232 - }, - { - "epoch": 8.98722294654498, - "grad_norm": 1.586360216140747, - "learning_rate": 8.318793969849247e-05, - "loss": 5.6794, - "step": 17233 - }, - { - "epoch": 8.9877444589309, - "grad_norm": 1.5562132596969604, - "learning_rate": 8.318693467336683e-05, - "loss": 5.641, - "step": 17234 - }, - { - "epoch": 8.98826597131682, - "grad_norm": 1.4377762079238892, - "learning_rate": 8.318592964824121e-05, - "loss": 5.395, - "step": 17235 - }, - { - "epoch": 8.988787483702739, - "grad_norm": 1.4195791482925415, - "learning_rate": 8.318492462311558e-05, - "loss": 5.5753, - "step": 17236 - }, - { - "epoch": 8.989308996088656, - "grad_norm": 1.4816346168518066, - "learning_rate": 8.318391959798995e-05, - "loss": 5.6385, - "step": 17237 - }, - { - "epoch": 8.989830508474576, - "grad_norm": 1.4216594696044922, - "learning_rate": 8.318291457286432e-05, - "loss": 5.3342, - "step": 17238 - }, - { - "epoch": 8.990352020860495, - "grad_norm": 1.2920938730239868, - "learning_rate": 8.31819095477387e-05, - "loss": 4.6397, - "step": 17239 - }, - { - "epoch": 8.990873533246415, - "grad_norm": 1.636558175086975, - "learning_rate": 8.318090452261307e-05, - "loss": 5.5482, - "step": 17240 - }, - { - "epoch": 8.991395045632334, - "grad_norm": 1.4978289604187012, - "learning_rate": 8.317989949748745e-05, - "loss": 5.1398, - "step": 17241 - }, - { - "epoch": 8.991916558018254, - "grad_norm": 1.4544918537139893, - "learning_rate": 8.317889447236182e-05, - "loss": 5.8264, - "step": 17242 - }, - { - "epoch": 8.992438070404171, - "grad_norm": 1.4580154418945312, - "learning_rate": 8.31778894472362e-05, - "loss": 5.5585, - "step": 17243 - }, - { - "epoch": 8.99295958279009, - "grad_norm": 1.3980594873428345, - "learning_rate": 8.317688442211056e-05, - "loss": 5.4949, - "step": 17244 - }, - { - "epoch": 8.99348109517601, - "grad_norm": 1.5808533430099487, - "learning_rate": 8.317587939698492e-05, - "loss": 5.4413, - "step": 17245 - }, - { - "epoch": 8.99400260756193, - "grad_norm": 1.5741877555847168, - "learning_rate": 8.31748743718593e-05, - "loss": 5.6705, - "step": 17246 - }, - { - "epoch": 8.99452411994785, - "grad_norm": 1.430740237236023, - "learning_rate": 8.317386934673366e-05, - "loss": 6.04, - "step": 17247 - }, - { - "epoch": 8.995045632333769, - "grad_norm": 1.486871361732483, - "learning_rate": 8.317286432160804e-05, - "loss": 5.6867, - "step": 17248 - }, - { - "epoch": 8.995567144719686, - "grad_norm": 1.4932712316513062, - "learning_rate": 8.317185929648242e-05, - "loss": 5.8603, - "step": 17249 - }, - { - "epoch": 8.996088657105606, - "grad_norm": 1.428758144378662, - "learning_rate": 8.31708542713568e-05, - "loss": 5.8367, - "step": 17250 - }, - { - "epoch": 8.996610169491525, - "grad_norm": 1.4661887884140015, - "learning_rate": 8.316984924623116e-05, - "loss": 5.2528, - "step": 17251 - }, - { - "epoch": 8.997131681877445, - "grad_norm": 1.3328206539154053, - "learning_rate": 8.316884422110554e-05, - "loss": 5.4592, - "step": 17252 - }, - { - "epoch": 8.997653194263364, - "grad_norm": 1.4775805473327637, - "learning_rate": 8.31678391959799e-05, - "loss": 5.8215, - "step": 17253 - }, - { - "epoch": 8.998174706649284, - "grad_norm": 1.4234619140625, - "learning_rate": 8.316683417085428e-05, - "loss": 5.8893, - "step": 17254 - }, - { - "epoch": 8.998696219035201, - "grad_norm": 1.4394845962524414, - "learning_rate": 8.316582914572865e-05, - "loss": 5.51, - "step": 17255 - }, - { - "epoch": 8.99921773142112, - "grad_norm": 1.3854269981384277, - "learning_rate": 8.316482412060302e-05, - "loss": 5.4478, - "step": 17256 - }, - { - "epoch": 8.99973924380704, - "grad_norm": 1.4971160888671875, - "learning_rate": 8.316381909547739e-05, - "loss": 5.1449, - "step": 17257 - }, - { - "epoch": 9.00026075619296, - "grad_norm": 1.5786570310592651, - "learning_rate": 8.316281407035175e-05, - "loss": 5.5181, - "step": 17258 - }, - { - "epoch": 9.00078226857888, - "grad_norm": 1.384230136871338, - "learning_rate": 8.316180904522613e-05, - "loss": 5.6279, - "step": 17259 - }, - { - "epoch": 9.001303780964799, - "grad_norm": 1.6187585592269897, - "learning_rate": 8.316080402010051e-05, - "loss": 5.567, - "step": 17260 - }, - { - "epoch": 9.001825293350716, - "grad_norm": 1.3567012548446655, - "learning_rate": 8.315979899497489e-05, - "loss": 5.4745, - "step": 17261 - }, - { - "epoch": 9.002346805736636, - "grad_norm": 1.606665849685669, - "learning_rate": 8.315879396984925e-05, - "loss": 5.2077, - "step": 17262 - }, - { - "epoch": 9.002868318122555, - "grad_norm": 1.4134471416473389, - "learning_rate": 8.315778894472363e-05, - "loss": 5.0948, - "step": 17263 - }, - { - "epoch": 9.003389830508475, - "grad_norm": 1.4917067289352417, - "learning_rate": 8.315678391959799e-05, - "loss": 5.6457, - "step": 17264 - }, - { - "epoch": 9.003911342894394, - "grad_norm": 1.3551150560379028, - "learning_rate": 8.315577889447237e-05, - "loss": 5.7812, - "step": 17265 - }, - { - "epoch": 9.004432855280314, - "grad_norm": 1.3671259880065918, - "learning_rate": 8.315477386934673e-05, - "loss": 5.7403, - "step": 17266 - }, - { - "epoch": 9.004954367666231, - "grad_norm": 1.444583773612976, - "learning_rate": 8.315376884422111e-05, - "loss": 5.6212, - "step": 17267 - }, - { - "epoch": 9.00547588005215, - "grad_norm": 1.4186958074569702, - "learning_rate": 8.315276381909548e-05, - "loss": 5.7717, - "step": 17268 - }, - { - "epoch": 9.00599739243807, - "grad_norm": 1.2992233037948608, - "learning_rate": 8.315175879396985e-05, - "loss": 5.7591, - "step": 17269 - }, - { - "epoch": 9.00651890482399, - "grad_norm": 1.2047125101089478, - "learning_rate": 8.315075376884422e-05, - "loss": 4.8728, - "step": 17270 - }, - { - "epoch": 9.00704041720991, - "grad_norm": 1.3439866304397583, - "learning_rate": 8.31497487437186e-05, - "loss": 4.9014, - "step": 17271 - }, - { - "epoch": 9.007561929595829, - "grad_norm": 1.5586837530136108, - "learning_rate": 8.314874371859297e-05, - "loss": 5.1273, - "step": 17272 - }, - { - "epoch": 9.008083441981746, - "grad_norm": 1.4726762771606445, - "learning_rate": 8.314773869346734e-05, - "loss": 5.3694, - "step": 17273 - }, - { - "epoch": 9.008604954367666, - "grad_norm": 1.4158573150634766, - "learning_rate": 8.314673366834172e-05, - "loss": 5.9375, - "step": 17274 - }, - { - "epoch": 9.009126466753585, - "grad_norm": 1.310899019241333, - "learning_rate": 8.314572864321608e-05, - "loss": 5.6242, - "step": 17275 - }, - { - "epoch": 9.009647979139505, - "grad_norm": 1.2785108089447021, - "learning_rate": 8.314472361809046e-05, - "loss": 5.4763, - "step": 17276 - }, - { - "epoch": 9.010169491525424, - "grad_norm": 1.3256282806396484, - "learning_rate": 8.314371859296482e-05, - "loss": 5.9303, - "step": 17277 - }, - { - "epoch": 9.010691003911344, - "grad_norm": 1.2801897525787354, - "learning_rate": 8.31427135678392e-05, - "loss": 5.8318, - "step": 17278 - }, - { - "epoch": 9.011212516297261, - "grad_norm": 1.3779349327087402, - "learning_rate": 8.314170854271356e-05, - "loss": 5.8494, - "step": 17279 - }, - { - "epoch": 9.01173402868318, - "grad_norm": 1.4310824871063232, - "learning_rate": 8.314070351758794e-05, - "loss": 5.7356, - "step": 17280 - }, - { - "epoch": 9.0122555410691, - "grad_norm": 1.418982744216919, - "learning_rate": 8.313969849246232e-05, - "loss": 5.8391, - "step": 17281 - }, - { - "epoch": 9.01277705345502, - "grad_norm": 1.5339874029159546, - "learning_rate": 8.31386934673367e-05, - "loss": 5.0038, - "step": 17282 - }, - { - "epoch": 9.01329856584094, - "grad_norm": 1.429993987083435, - "learning_rate": 8.313768844221106e-05, - "loss": 5.7216, - "step": 17283 - }, - { - "epoch": 9.013820078226859, - "grad_norm": 1.5197460651397705, - "learning_rate": 8.313668341708543e-05, - "loss": 4.9276, - "step": 17284 - }, - { - "epoch": 9.014341590612776, - "grad_norm": 1.3862816095352173, - "learning_rate": 8.31356783919598e-05, - "loss": 5.5973, - "step": 17285 - }, - { - "epoch": 9.014863102998696, - "grad_norm": 1.3136045932769775, - "learning_rate": 8.313467336683417e-05, - "loss": 6.0376, - "step": 17286 - }, - { - "epoch": 9.015384615384615, - "grad_norm": 1.3364133834838867, - "learning_rate": 8.313366834170855e-05, - "loss": 5.9168, - "step": 17287 - }, - { - "epoch": 9.015906127770535, - "grad_norm": 1.4384169578552246, - "learning_rate": 8.313266331658291e-05, - "loss": 5.5916, - "step": 17288 - }, - { - "epoch": 9.016427640156454, - "grad_norm": 1.4226306676864624, - "learning_rate": 8.313165829145729e-05, - "loss": 5.1922, - "step": 17289 - }, - { - "epoch": 9.016949152542374, - "grad_norm": 1.2863487005233765, - "learning_rate": 8.313065326633165e-05, - "loss": 5.9147, - "step": 17290 - }, - { - "epoch": 9.017470664928291, - "grad_norm": 1.5965043306350708, - "learning_rate": 8.312964824120603e-05, - "loss": 5.3932, - "step": 17291 - }, - { - "epoch": 9.01799217731421, - "grad_norm": 1.622576117515564, - "learning_rate": 8.312864321608041e-05, - "loss": 4.6088, - "step": 17292 - }, - { - "epoch": 9.01851368970013, - "grad_norm": 1.4195871353149414, - "learning_rate": 8.312763819095479e-05, - "loss": 5.5237, - "step": 17293 - }, - { - "epoch": 9.01903520208605, - "grad_norm": 1.4993692636489868, - "learning_rate": 8.312663316582915e-05, - "loss": 5.5925, - "step": 17294 - }, - { - "epoch": 9.01955671447197, - "grad_norm": 1.4799340963363647, - "learning_rate": 8.312562814070353e-05, - "loss": 5.3647, - "step": 17295 - }, - { - "epoch": 9.020078226857889, - "grad_norm": 1.5004442930221558, - "learning_rate": 8.312462311557789e-05, - "loss": 5.459, - "step": 17296 - }, - { - "epoch": 9.020599739243806, - "grad_norm": 1.4664863348007202, - "learning_rate": 8.312361809045226e-05, - "loss": 5.3069, - "step": 17297 - }, - { - "epoch": 9.021121251629726, - "grad_norm": 1.2745188474655151, - "learning_rate": 8.312261306532663e-05, - "loss": 5.5265, - "step": 17298 - }, - { - "epoch": 9.021642764015645, - "grad_norm": 1.3095803260803223, - "learning_rate": 8.3121608040201e-05, - "loss": 5.3468, - "step": 17299 - }, - { - "epoch": 9.022164276401565, - "grad_norm": 1.4460837841033936, - "learning_rate": 8.312060301507538e-05, - "loss": 5.4027, - "step": 17300 - }, - { - "epoch": 9.022685788787484, - "grad_norm": 1.3879367113113403, - "learning_rate": 8.311959798994975e-05, - "loss": 5.5874, - "step": 17301 - }, - { - "epoch": 9.023207301173404, - "grad_norm": 1.3812978267669678, - "learning_rate": 8.311859296482413e-05, - "loss": 5.5985, - "step": 17302 - }, - { - "epoch": 9.023728813559321, - "grad_norm": 1.3877918720245361, - "learning_rate": 8.31175879396985e-05, - "loss": 5.5825, - "step": 17303 - }, - { - "epoch": 9.024250325945241, - "grad_norm": 1.4428832530975342, - "learning_rate": 8.311658291457287e-05, - "loss": 5.5169, - "step": 17304 - }, - { - "epoch": 9.02477183833116, - "grad_norm": 1.5336729288101196, - "learning_rate": 8.311557788944724e-05, - "loss": 5.0039, - "step": 17305 - }, - { - "epoch": 9.02529335071708, - "grad_norm": 1.446858525276184, - "learning_rate": 8.311457286432162e-05, - "loss": 5.1676, - "step": 17306 - }, - { - "epoch": 9.025814863103, - "grad_norm": 1.446319818496704, - "learning_rate": 8.311356783919598e-05, - "loss": 5.5209, - "step": 17307 - }, - { - "epoch": 9.026336375488917, - "grad_norm": 1.50642991065979, - "learning_rate": 8.311256281407036e-05, - "loss": 5.2162, - "step": 17308 - }, - { - "epoch": 9.026857887874836, - "grad_norm": 1.4276093244552612, - "learning_rate": 8.311155778894472e-05, - "loss": 4.945, - "step": 17309 - }, - { - "epoch": 9.027379400260756, - "grad_norm": 1.4663605690002441, - "learning_rate": 8.311055276381909e-05, - "loss": 5.4768, - "step": 17310 - }, - { - "epoch": 9.027900912646675, - "grad_norm": 1.2230736017227173, - "learning_rate": 8.310954773869347e-05, - "loss": 5.9437, - "step": 17311 - }, - { - "epoch": 9.028422425032595, - "grad_norm": 1.4952106475830078, - "learning_rate": 8.310854271356784e-05, - "loss": 5.3185, - "step": 17312 - }, - { - "epoch": 9.028943937418514, - "grad_norm": 1.389928936958313, - "learning_rate": 8.310753768844222e-05, - "loss": 5.5904, - "step": 17313 - }, - { - "epoch": 9.029465449804432, - "grad_norm": 1.3805091381072998, - "learning_rate": 8.310653266331659e-05, - "loss": 5.4408, - "step": 17314 - }, - { - "epoch": 9.029986962190351, - "grad_norm": 1.4228839874267578, - "learning_rate": 8.310552763819096e-05, - "loss": 5.9412, - "step": 17315 - }, - { - "epoch": 9.030508474576271, - "grad_norm": 1.4413777589797974, - "learning_rate": 8.310452261306533e-05, - "loss": 5.201, - "step": 17316 - }, - { - "epoch": 9.03102998696219, - "grad_norm": 1.4316178560256958, - "learning_rate": 8.31035175879397e-05, - "loss": 5.7298, - "step": 17317 - }, - { - "epoch": 9.03155149934811, - "grad_norm": 1.4059079885482788, - "learning_rate": 8.310251256281407e-05, - "loss": 5.9132, - "step": 17318 - }, - { - "epoch": 9.03207301173403, - "grad_norm": 1.5063568353652954, - "learning_rate": 8.310150753768845e-05, - "loss": 5.6178, - "step": 17319 - }, - { - "epoch": 9.032594524119947, - "grad_norm": 1.3380324840545654, - "learning_rate": 8.310050251256281e-05, - "loss": 5.7459, - "step": 17320 - }, - { - "epoch": 9.033116036505866, - "grad_norm": 1.4272377490997314, - "learning_rate": 8.309949748743719e-05, - "loss": 5.2959, - "step": 17321 - }, - { - "epoch": 9.033637548891786, - "grad_norm": 1.2505565881729126, - "learning_rate": 8.309849246231157e-05, - "loss": 5.6842, - "step": 17322 - }, - { - "epoch": 9.034159061277705, - "grad_norm": 1.3441941738128662, - "learning_rate": 8.309748743718595e-05, - "loss": 5.7448, - "step": 17323 - }, - { - "epoch": 9.034680573663625, - "grad_norm": 1.358316421508789, - "learning_rate": 8.309648241206031e-05, - "loss": 5.6825, - "step": 17324 - }, - { - "epoch": 9.035202086049544, - "grad_norm": 1.3717707395553589, - "learning_rate": 8.309547738693467e-05, - "loss": 5.4427, - "step": 17325 - }, - { - "epoch": 9.035723598435462, - "grad_norm": 1.417128324508667, - "learning_rate": 8.309447236180905e-05, - "loss": 5.6936, - "step": 17326 - }, - { - "epoch": 9.036245110821381, - "grad_norm": 1.3807530403137207, - "learning_rate": 8.309346733668342e-05, - "loss": 5.5494, - "step": 17327 - }, - { - "epoch": 9.036766623207301, - "grad_norm": 1.49567449092865, - "learning_rate": 8.30924623115578e-05, - "loss": 5.1268, - "step": 17328 - }, - { - "epoch": 9.03728813559322, - "grad_norm": 1.4016844034194946, - "learning_rate": 8.309145728643216e-05, - "loss": 5.6063, - "step": 17329 - }, - { - "epoch": 9.03780964797914, - "grad_norm": 1.5244462490081787, - "learning_rate": 8.309045226130654e-05, - "loss": 5.1669, - "step": 17330 - }, - { - "epoch": 9.03833116036506, - "grad_norm": 1.5828564167022705, - "learning_rate": 8.30894472361809e-05, - "loss": 5.0767, - "step": 17331 - }, - { - "epoch": 9.038852672750977, - "grad_norm": 1.3195710182189941, - "learning_rate": 8.308844221105528e-05, - "loss": 5.6305, - "step": 17332 - }, - { - "epoch": 9.039374185136897, - "grad_norm": 1.3506046533584595, - "learning_rate": 8.308743718592966e-05, - "loss": 5.714, - "step": 17333 - }, - { - "epoch": 9.039895697522816, - "grad_norm": 1.5116578340530396, - "learning_rate": 8.308643216080403e-05, - "loss": 5.0102, - "step": 17334 - }, - { - "epoch": 9.040417209908735, - "grad_norm": 1.4128819704055786, - "learning_rate": 8.30854271356784e-05, - "loss": 5.0252, - "step": 17335 - }, - { - "epoch": 9.040938722294655, - "grad_norm": 1.451920509338379, - "learning_rate": 8.308442211055278e-05, - "loss": 4.8412, - "step": 17336 - }, - { - "epoch": 9.041460234680574, - "grad_norm": 1.5551118850708008, - "learning_rate": 8.308341708542714e-05, - "loss": 5.1303, - "step": 17337 - }, - { - "epoch": 9.041981747066492, - "grad_norm": 1.519775152206421, - "learning_rate": 8.30824120603015e-05, - "loss": 5.2272, - "step": 17338 - }, - { - "epoch": 9.042503259452412, - "grad_norm": 1.5881339311599731, - "learning_rate": 8.308140703517588e-05, - "loss": 5.0547, - "step": 17339 - }, - { - "epoch": 9.043024771838331, - "grad_norm": 1.3865727186203003, - "learning_rate": 8.308040201005025e-05, - "loss": 5.3989, - "step": 17340 - }, - { - "epoch": 9.04354628422425, - "grad_norm": 1.478986144065857, - "learning_rate": 8.307939698492462e-05, - "loss": 5.2753, - "step": 17341 - }, - { - "epoch": 9.04406779661017, - "grad_norm": 1.4215189218521118, - "learning_rate": 8.3078391959799e-05, - "loss": 5.7989, - "step": 17342 - }, - { - "epoch": 9.04458930899609, - "grad_norm": 1.4156526327133179, - "learning_rate": 8.307738693467338e-05, - "loss": 5.6127, - "step": 17343 - }, - { - "epoch": 9.045110821382007, - "grad_norm": 1.3703389167785645, - "learning_rate": 8.307638190954774e-05, - "loss": 5.5435, - "step": 17344 - }, - { - "epoch": 9.045632333767927, - "grad_norm": 1.310840129852295, - "learning_rate": 8.307537688442212e-05, - "loss": 5.9149, - "step": 17345 - }, - { - "epoch": 9.046153846153846, - "grad_norm": 1.5163453817367554, - "learning_rate": 8.307437185929649e-05, - "loss": 5.2806, - "step": 17346 - }, - { - "epoch": 9.046675358539765, - "grad_norm": 1.6051056385040283, - "learning_rate": 8.307336683417086e-05, - "loss": 5.1681, - "step": 17347 - }, - { - "epoch": 9.047196870925685, - "grad_norm": 1.460837721824646, - "learning_rate": 8.307236180904523e-05, - "loss": 5.86, - "step": 17348 - }, - { - "epoch": 9.047718383311604, - "grad_norm": 1.6169523000717163, - "learning_rate": 8.30713567839196e-05, - "loss": 5.2894, - "step": 17349 - }, - { - "epoch": 9.048239895697522, - "grad_norm": 1.3713034391403198, - "learning_rate": 8.307035175879397e-05, - "loss": 5.6633, - "step": 17350 - }, - { - "epoch": 9.048761408083442, - "grad_norm": 1.4699265956878662, - "learning_rate": 8.306934673366833e-05, - "loss": 5.1366, - "step": 17351 - }, - { - "epoch": 9.049282920469361, - "grad_norm": 1.3074005842208862, - "learning_rate": 8.306834170854271e-05, - "loss": 5.9986, - "step": 17352 - }, - { - "epoch": 9.04980443285528, - "grad_norm": 1.4643789529800415, - "learning_rate": 8.306733668341709e-05, - "loss": 5.5953, - "step": 17353 - }, - { - "epoch": 9.0503259452412, - "grad_norm": 1.423485517501831, - "learning_rate": 8.306633165829147e-05, - "loss": 5.7128, - "step": 17354 - }, - { - "epoch": 9.05084745762712, - "grad_norm": 1.4116674661636353, - "learning_rate": 8.306532663316583e-05, - "loss": 5.7514, - "step": 17355 - }, - { - "epoch": 9.051368970013037, - "grad_norm": 1.5203075408935547, - "learning_rate": 8.306432160804021e-05, - "loss": 4.9239, - "step": 17356 - }, - { - "epoch": 9.051890482398957, - "grad_norm": 1.6594421863555908, - "learning_rate": 8.306331658291457e-05, - "loss": 5.5951, - "step": 17357 - }, - { - "epoch": 9.052411994784876, - "grad_norm": 1.4546432495117188, - "learning_rate": 8.306231155778895e-05, - "loss": 5.6273, - "step": 17358 - }, - { - "epoch": 9.052933507170795, - "grad_norm": 1.5356796979904175, - "learning_rate": 8.306130653266332e-05, - "loss": 5.7414, - "step": 17359 - }, - { - "epoch": 9.053455019556715, - "grad_norm": 1.3447059392929077, - "learning_rate": 8.30603015075377e-05, - "loss": 5.8532, - "step": 17360 - }, - { - "epoch": 9.053976531942634, - "grad_norm": 1.4360140562057495, - "learning_rate": 8.305929648241206e-05, - "loss": 5.8874, - "step": 17361 - }, - { - "epoch": 9.054498044328552, - "grad_norm": 1.5309480428695679, - "learning_rate": 8.305829145728644e-05, - "loss": 5.3664, - "step": 17362 - }, - { - "epoch": 9.055019556714472, - "grad_norm": 1.4308847188949585, - "learning_rate": 8.305728643216081e-05, - "loss": 5.4792, - "step": 17363 - }, - { - "epoch": 9.055541069100391, - "grad_norm": 1.4932876825332642, - "learning_rate": 8.305628140703518e-05, - "loss": 5.3515, - "step": 17364 - }, - { - "epoch": 9.05606258148631, - "grad_norm": 1.457777500152588, - "learning_rate": 8.305527638190956e-05, - "loss": 5.5464, - "step": 17365 - }, - { - "epoch": 9.05658409387223, - "grad_norm": 1.412912368774414, - "learning_rate": 8.305427135678392e-05, - "loss": 5.4111, - "step": 17366 - }, - { - "epoch": 9.05710560625815, - "grad_norm": 1.4156126976013184, - "learning_rate": 8.30532663316583e-05, - "loss": 5.7497, - "step": 17367 - }, - { - "epoch": 9.057627118644067, - "grad_norm": 1.5218912363052368, - "learning_rate": 8.305226130653266e-05, - "loss": 5.3347, - "step": 17368 - }, - { - "epoch": 9.058148631029987, - "grad_norm": 1.3184617757797241, - "learning_rate": 8.305125628140704e-05, - "loss": 5.5071, - "step": 17369 - }, - { - "epoch": 9.058670143415906, - "grad_norm": 1.438066005706787, - "learning_rate": 8.30502512562814e-05, - "loss": 5.4316, - "step": 17370 - }, - { - "epoch": 9.059191655801826, - "grad_norm": 1.3778554201126099, - "learning_rate": 8.304924623115578e-05, - "loss": 5.7027, - "step": 17371 - }, - { - "epoch": 9.059713168187745, - "grad_norm": 1.3240668773651123, - "learning_rate": 8.304824120603015e-05, - "loss": 5.2936, - "step": 17372 - }, - { - "epoch": 9.060234680573664, - "grad_norm": 1.397803783416748, - "learning_rate": 8.304723618090452e-05, - "loss": 5.7948, - "step": 17373 - }, - { - "epoch": 9.060756192959582, - "grad_norm": 1.392176628112793, - "learning_rate": 8.30462311557789e-05, - "loss": 5.3707, - "step": 17374 - }, - { - "epoch": 9.061277705345502, - "grad_norm": 1.3226953744888306, - "learning_rate": 8.304522613065328e-05, - "loss": 5.8663, - "step": 17375 - }, - { - "epoch": 9.061799217731421, - "grad_norm": 1.4812403917312622, - "learning_rate": 8.304422110552764e-05, - "loss": 5.3976, - "step": 17376 - }, - { - "epoch": 9.06232073011734, - "grad_norm": 1.3814200162887573, - "learning_rate": 8.304321608040201e-05, - "loss": 5.5654, - "step": 17377 - }, - { - "epoch": 9.06284224250326, - "grad_norm": 1.6280336380004883, - "learning_rate": 8.304221105527639e-05, - "loss": 5.4208, - "step": 17378 - }, - { - "epoch": 9.06336375488918, - "grad_norm": 1.4174515008926392, - "learning_rate": 8.304120603015075e-05, - "loss": 5.8252, - "step": 17379 - }, - { - "epoch": 9.063885267275097, - "grad_norm": 1.4717650413513184, - "learning_rate": 8.304020100502513e-05, - "loss": 5.554, - "step": 17380 - }, - { - "epoch": 9.064406779661017, - "grad_norm": 1.3201528787612915, - "learning_rate": 8.30391959798995e-05, - "loss": 5.8354, - "step": 17381 - }, - { - "epoch": 9.064928292046936, - "grad_norm": 1.5411276817321777, - "learning_rate": 8.303819095477387e-05, - "loss": 5.8163, - "step": 17382 - }, - { - "epoch": 9.065449804432856, - "grad_norm": 1.4123568534851074, - "learning_rate": 8.303718592964825e-05, - "loss": 5.7554, - "step": 17383 - }, - { - "epoch": 9.065971316818775, - "grad_norm": 1.412353754043579, - "learning_rate": 8.303618090452263e-05, - "loss": 5.7539, - "step": 17384 - }, - { - "epoch": 9.066492829204694, - "grad_norm": 1.4115382432937622, - "learning_rate": 8.303517587939699e-05, - "loss": 5.3396, - "step": 17385 - }, - { - "epoch": 9.067014341590612, - "grad_norm": 1.37235426902771, - "learning_rate": 8.303417085427137e-05, - "loss": 5.5629, - "step": 17386 - }, - { - "epoch": 9.067535853976532, - "grad_norm": 1.6963540315628052, - "learning_rate": 8.303316582914573e-05, - "loss": 5.3108, - "step": 17387 - }, - { - "epoch": 9.068057366362451, - "grad_norm": 1.3113943338394165, - "learning_rate": 8.303216080402011e-05, - "loss": 5.3469, - "step": 17388 - }, - { - "epoch": 9.06857887874837, - "grad_norm": 1.4021053314208984, - "learning_rate": 8.303115577889448e-05, - "loss": 5.7066, - "step": 17389 - }, - { - "epoch": 9.06910039113429, - "grad_norm": 2.1672706604003906, - "learning_rate": 8.303015075376884e-05, - "loss": 5.0522, - "step": 17390 - }, - { - "epoch": 9.06962190352021, - "grad_norm": 1.5698199272155762, - "learning_rate": 8.302914572864322e-05, - "loss": 5.3889, - "step": 17391 - }, - { - "epoch": 9.070143415906127, - "grad_norm": 1.4200937747955322, - "learning_rate": 8.302814070351758e-05, - "loss": 5.7938, - "step": 17392 - }, - { - "epoch": 9.070664928292047, - "grad_norm": 1.5251377820968628, - "learning_rate": 8.302713567839196e-05, - "loss": 5.3933, - "step": 17393 - }, - { - "epoch": 9.071186440677966, - "grad_norm": 1.4254982471466064, - "learning_rate": 8.302613065326634e-05, - "loss": 5.2304, - "step": 17394 - }, - { - "epoch": 9.071707953063886, - "grad_norm": 1.4910202026367188, - "learning_rate": 8.302512562814072e-05, - "loss": 5.8362, - "step": 17395 - }, - { - "epoch": 9.072229465449805, - "grad_norm": 1.446841835975647, - "learning_rate": 8.302412060301508e-05, - "loss": 5.736, - "step": 17396 - }, - { - "epoch": 9.072750977835724, - "grad_norm": 1.2596241235733032, - "learning_rate": 8.302311557788946e-05, - "loss": 4.8413, - "step": 17397 - }, - { - "epoch": 9.073272490221642, - "grad_norm": 1.5027480125427246, - "learning_rate": 8.302211055276382e-05, - "loss": 5.2697, - "step": 17398 - }, - { - "epoch": 9.073794002607562, - "grad_norm": 1.414673089981079, - "learning_rate": 8.30211055276382e-05, - "loss": 5.6959, - "step": 17399 - }, - { - "epoch": 9.074315514993481, - "grad_norm": 1.580088496208191, - "learning_rate": 8.302010050251256e-05, - "loss": 5.6353, - "step": 17400 - }, - { - "epoch": 9.0748370273794, - "grad_norm": 1.3119784593582153, - "learning_rate": 8.301909547738694e-05, - "loss": 5.9653, - "step": 17401 - }, - { - "epoch": 9.07535853976532, - "grad_norm": 1.441422939300537, - "learning_rate": 8.30180904522613e-05, - "loss": 5.4862, - "step": 17402 - }, - { - "epoch": 9.075880052151238, - "grad_norm": 1.4223519563674927, - "learning_rate": 8.301708542713568e-05, - "loss": 5.8194, - "step": 17403 - }, - { - "epoch": 9.076401564537157, - "grad_norm": 1.5598829984664917, - "learning_rate": 8.301608040201006e-05, - "loss": 5.5011, - "step": 17404 - }, - { - "epoch": 9.076923076923077, - "grad_norm": 1.478488564491272, - "learning_rate": 8.301507537688443e-05, - "loss": 5.3334, - "step": 17405 - }, - { - "epoch": 9.077444589308996, - "grad_norm": 1.3402127027511597, - "learning_rate": 8.30140703517588e-05, - "loss": 5.8466, - "step": 17406 - }, - { - "epoch": 9.077966101694916, - "grad_norm": 1.4737426042556763, - "learning_rate": 8.301306532663317e-05, - "loss": 5.646, - "step": 17407 - }, - { - "epoch": 9.078487614080835, - "grad_norm": 1.4537936449050903, - "learning_rate": 8.301206030150755e-05, - "loss": 5.9012, - "step": 17408 - }, - { - "epoch": 9.079009126466753, - "grad_norm": 1.4110138416290283, - "learning_rate": 8.301105527638191e-05, - "loss": 5.7321, - "step": 17409 - }, - { - "epoch": 9.079530638852672, - "grad_norm": 1.7334249019622803, - "learning_rate": 8.301005025125629e-05, - "loss": 5.2519, - "step": 17410 - }, - { - "epoch": 9.080052151238592, - "grad_norm": 1.6736717224121094, - "learning_rate": 8.300904522613065e-05, - "loss": 4.9881, - "step": 17411 - }, - { - "epoch": 9.080573663624511, - "grad_norm": 1.726289987564087, - "learning_rate": 8.300804020100503e-05, - "loss": 5.2542, - "step": 17412 - }, - { - "epoch": 9.08109517601043, - "grad_norm": 1.5112303495407104, - "learning_rate": 8.30070351758794e-05, - "loss": 5.8662, - "step": 17413 - }, - { - "epoch": 9.08161668839635, - "grad_norm": 1.8039460182189941, - "learning_rate": 8.300603015075377e-05, - "loss": 5.3206, - "step": 17414 - }, - { - "epoch": 9.082138200782268, - "grad_norm": 1.5248981714248657, - "learning_rate": 8.300502512562815e-05, - "loss": 5.3502, - "step": 17415 - }, - { - "epoch": 9.082659713168187, - "grad_norm": 1.3873485326766968, - "learning_rate": 8.300402010050253e-05, - "loss": 5.241, - "step": 17416 - }, - { - "epoch": 9.083181225554107, - "grad_norm": 1.4890539646148682, - "learning_rate": 8.300301507537689e-05, - "loss": 5.126, - "step": 17417 - }, - { - "epoch": 9.083702737940026, - "grad_norm": 1.336972951889038, - "learning_rate": 8.300201005025126e-05, - "loss": 5.9818, - "step": 17418 - }, - { - "epoch": 9.084224250325946, - "grad_norm": 1.4566400051116943, - "learning_rate": 8.300100502512563e-05, - "loss": 5.3876, - "step": 17419 - }, - { - "epoch": 9.084745762711865, - "grad_norm": 1.376192569732666, - "learning_rate": 8.3e-05, - "loss": 5.4696, - "step": 17420 - }, - { - "epoch": 9.085267275097783, - "grad_norm": 1.5269981622695923, - "learning_rate": 8.299899497487438e-05, - "loss": 5.351, - "step": 17421 - }, - { - "epoch": 9.085788787483702, - "grad_norm": 1.4576040506362915, - "learning_rate": 8.299798994974874e-05, - "loss": 5.1772, - "step": 17422 - }, - { - "epoch": 9.086310299869622, - "grad_norm": 1.4987351894378662, - "learning_rate": 8.299698492462312e-05, - "loss": 5.3615, - "step": 17423 - }, - { - "epoch": 9.086831812255541, - "grad_norm": 1.460443377494812, - "learning_rate": 8.299597989949748e-05, - "loss": 5.9458, - "step": 17424 - }, - { - "epoch": 9.08735332464146, - "grad_norm": 1.3801147937774658, - "learning_rate": 8.299497487437186e-05, - "loss": 5.702, - "step": 17425 - }, - { - "epoch": 9.08787483702738, - "grad_norm": 1.62882399559021, - "learning_rate": 8.299396984924624e-05, - "loss": 5.7623, - "step": 17426 - }, - { - "epoch": 9.088396349413298, - "grad_norm": 1.3255188465118408, - "learning_rate": 8.299296482412062e-05, - "loss": 5.7723, - "step": 17427 - }, - { - "epoch": 9.088917861799217, - "grad_norm": 1.4351930618286133, - "learning_rate": 8.299195979899498e-05, - "loss": 5.6463, - "step": 17428 - }, - { - "epoch": 9.089439374185137, - "grad_norm": 1.2872576713562012, - "learning_rate": 8.299095477386936e-05, - "loss": 5.8058, - "step": 17429 - }, - { - "epoch": 9.089960886571056, - "grad_norm": 1.4364506006240845, - "learning_rate": 8.298994974874372e-05, - "loss": 5.4134, - "step": 17430 - }, - { - "epoch": 9.090482398956976, - "grad_norm": 1.5186632871627808, - "learning_rate": 8.298894472361809e-05, - "loss": 5.4517, - "step": 17431 - }, - { - "epoch": 9.091003911342895, - "grad_norm": 1.4624884128570557, - "learning_rate": 8.298793969849246e-05, - "loss": 5.4913, - "step": 17432 - }, - { - "epoch": 9.091525423728813, - "grad_norm": 1.5325835943222046, - "learning_rate": 8.298693467336683e-05, - "loss": 5.2369, - "step": 17433 - }, - { - "epoch": 9.092046936114732, - "grad_norm": 1.5087807178497314, - "learning_rate": 8.29859296482412e-05, - "loss": 5.385, - "step": 17434 - }, - { - "epoch": 9.092568448500652, - "grad_norm": 1.4986335039138794, - "learning_rate": 8.298492462311558e-05, - "loss": 5.421, - "step": 17435 - }, - { - "epoch": 9.093089960886571, - "grad_norm": 1.4067739248275757, - "learning_rate": 8.298391959798996e-05, - "loss": 5.5257, - "step": 17436 - }, - { - "epoch": 9.09361147327249, - "grad_norm": 1.4479731321334839, - "learning_rate": 8.298291457286433e-05, - "loss": 5.2703, - "step": 17437 - }, - { - "epoch": 9.09413298565841, - "grad_norm": 1.4827561378479004, - "learning_rate": 8.29819095477387e-05, - "loss": 5.3402, - "step": 17438 - }, - { - "epoch": 9.094654498044328, - "grad_norm": 1.557031273841858, - "learning_rate": 8.298090452261307e-05, - "loss": 4.7915, - "step": 17439 - }, - { - "epoch": 9.095176010430247, - "grad_norm": 1.4283612966537476, - "learning_rate": 8.297989949748745e-05, - "loss": 5.7033, - "step": 17440 - }, - { - "epoch": 9.095697522816167, - "grad_norm": 1.421241283416748, - "learning_rate": 8.297889447236181e-05, - "loss": 5.7455, - "step": 17441 - }, - { - "epoch": 9.096219035202086, - "grad_norm": 1.439592957496643, - "learning_rate": 8.297788944723619e-05, - "loss": 5.7228, - "step": 17442 - }, - { - "epoch": 9.096740547588006, - "grad_norm": 1.3568246364593506, - "learning_rate": 8.297688442211055e-05, - "loss": 5.612, - "step": 17443 - }, - { - "epoch": 9.097262059973925, - "grad_norm": 1.4010051488876343, - "learning_rate": 8.297587939698492e-05, - "loss": 5.2714, - "step": 17444 - }, - { - "epoch": 9.097783572359843, - "grad_norm": 1.5350604057312012, - "learning_rate": 8.29748743718593e-05, - "loss": 4.9473, - "step": 17445 - }, - { - "epoch": 9.098305084745762, - "grad_norm": 1.5072736740112305, - "learning_rate": 8.297386934673367e-05, - "loss": 5.3916, - "step": 17446 - }, - { - "epoch": 9.098826597131682, - "grad_norm": 1.3866479396820068, - "learning_rate": 8.297286432160805e-05, - "loss": 5.5689, - "step": 17447 - }, - { - "epoch": 9.099348109517601, - "grad_norm": 1.3451699018478394, - "learning_rate": 8.297185929648241e-05, - "loss": 4.9109, - "step": 17448 - }, - { - "epoch": 9.09986962190352, - "grad_norm": 1.4360448122024536, - "learning_rate": 8.297085427135679e-05, - "loss": 5.2072, - "step": 17449 - }, - { - "epoch": 9.10039113428944, - "grad_norm": 1.5435600280761719, - "learning_rate": 8.296984924623116e-05, - "loss": 5.2725, - "step": 17450 - }, - { - "epoch": 9.100912646675358, - "grad_norm": 1.5193167924880981, - "learning_rate": 8.296884422110553e-05, - "loss": 5.9036, - "step": 17451 - }, - { - "epoch": 9.101434159061277, - "grad_norm": 1.440637230873108, - "learning_rate": 8.29678391959799e-05, - "loss": 5.225, - "step": 17452 - }, - { - "epoch": 9.101955671447197, - "grad_norm": 1.3297873735427856, - "learning_rate": 8.296683417085428e-05, - "loss": 5.7108, - "step": 17453 - }, - { - "epoch": 9.102477183833116, - "grad_norm": 1.4542567729949951, - "learning_rate": 8.296582914572864e-05, - "loss": 5.5267, - "step": 17454 - }, - { - "epoch": 9.102998696219036, - "grad_norm": 1.4624172449111938, - "learning_rate": 8.296482412060302e-05, - "loss": 5.6338, - "step": 17455 - }, - { - "epoch": 9.103520208604955, - "grad_norm": 1.3213274478912354, - "learning_rate": 8.29638190954774e-05, - "loss": 6.0327, - "step": 17456 - }, - { - "epoch": 9.104041720990873, - "grad_norm": 1.4577445983886719, - "learning_rate": 8.296281407035176e-05, - "loss": 5.2787, - "step": 17457 - }, - { - "epoch": 9.104563233376792, - "grad_norm": 1.3276745080947876, - "learning_rate": 8.296180904522614e-05, - "loss": 5.4953, - "step": 17458 - }, - { - "epoch": 9.105084745762712, - "grad_norm": 1.381100058555603, - "learning_rate": 8.29608040201005e-05, - "loss": 5.7648, - "step": 17459 - }, - { - "epoch": 9.105606258148631, - "grad_norm": 1.4709526300430298, - "learning_rate": 8.295979899497488e-05, - "loss": 5.0387, - "step": 17460 - }, - { - "epoch": 9.10612777053455, - "grad_norm": 1.349472999572754, - "learning_rate": 8.295879396984925e-05, - "loss": 5.4266, - "step": 17461 - }, - { - "epoch": 9.10664928292047, - "grad_norm": 1.4498356580734253, - "learning_rate": 8.295778894472362e-05, - "loss": 5.3874, - "step": 17462 - }, - { - "epoch": 9.107170795306388, - "grad_norm": 1.4817912578582764, - "learning_rate": 8.295678391959799e-05, - "loss": 5.569, - "step": 17463 - }, - { - "epoch": 9.107692307692307, - "grad_norm": 1.416117787361145, - "learning_rate": 8.295577889447237e-05, - "loss": 5.3167, - "step": 17464 - }, - { - "epoch": 9.108213820078227, - "grad_norm": 1.3756608963012695, - "learning_rate": 8.295477386934673e-05, - "loss": 5.6054, - "step": 17465 - }, - { - "epoch": 9.108735332464146, - "grad_norm": 1.3864023685455322, - "learning_rate": 8.295376884422111e-05, - "loss": 5.2917, - "step": 17466 - }, - { - "epoch": 9.109256844850066, - "grad_norm": 1.3921328783035278, - "learning_rate": 8.295276381909549e-05, - "loss": 5.4259, - "step": 17467 - }, - { - "epoch": 9.109778357235985, - "grad_norm": 1.4250743389129639, - "learning_rate": 8.295175879396986e-05, - "loss": 5.6739, - "step": 17468 - }, - { - "epoch": 9.110299869621903, - "grad_norm": 1.424360990524292, - "learning_rate": 8.295075376884423e-05, - "loss": 5.418, - "step": 17469 - }, - { - "epoch": 9.110821382007822, - "grad_norm": 1.4408375024795532, - "learning_rate": 8.294974874371859e-05, - "loss": 5.5368, - "step": 17470 - }, - { - "epoch": 9.111342894393742, - "grad_norm": 1.364951491355896, - "learning_rate": 8.294874371859297e-05, - "loss": 5.2961, - "step": 17471 - }, - { - "epoch": 9.111864406779661, - "grad_norm": 1.513312816619873, - "learning_rate": 8.294773869346733e-05, - "loss": 5.7127, - "step": 17472 - }, - { - "epoch": 9.11238591916558, - "grad_norm": 1.5675184726715088, - "learning_rate": 8.294673366834171e-05, - "loss": 5.3082, - "step": 17473 - }, - { - "epoch": 9.1129074315515, - "grad_norm": 1.508382797241211, - "learning_rate": 8.294572864321608e-05, - "loss": 5.6338, - "step": 17474 - }, - { - "epoch": 9.113428943937418, - "grad_norm": 1.4550862312316895, - "learning_rate": 8.294472361809045e-05, - "loss": 5.321, - "step": 17475 - }, - { - "epoch": 9.113950456323337, - "grad_norm": 1.6515109539031982, - "learning_rate": 8.294371859296483e-05, - "loss": 4.763, - "step": 17476 - }, - { - "epoch": 9.114471968709257, - "grad_norm": 1.5508466958999634, - "learning_rate": 8.294271356783921e-05, - "loss": 5.1989, - "step": 17477 - }, - { - "epoch": 9.114993481095176, - "grad_norm": 1.4244736433029175, - "learning_rate": 8.294170854271357e-05, - "loss": 5.5589, - "step": 17478 - }, - { - "epoch": 9.115514993481096, - "grad_norm": 1.4555209875106812, - "learning_rate": 8.294070351758795e-05, - "loss": 5.7463, - "step": 17479 - }, - { - "epoch": 9.116036505867015, - "grad_norm": 1.474456548690796, - "learning_rate": 8.293969849246232e-05, - "loss": 5.581, - "step": 17480 - }, - { - "epoch": 9.116558018252933, - "grad_norm": 1.2895766496658325, - "learning_rate": 8.29386934673367e-05, - "loss": 6.0398, - "step": 17481 - }, - { - "epoch": 9.117079530638852, - "grad_norm": 1.5545542240142822, - "learning_rate": 8.293768844221106e-05, - "loss": 5.29, - "step": 17482 - }, - { - "epoch": 9.117601043024772, - "grad_norm": 1.3431050777435303, - "learning_rate": 8.293668341708542e-05, - "loss": 5.2334, - "step": 17483 - }, - { - "epoch": 9.118122555410691, - "grad_norm": 1.355223298072815, - "learning_rate": 8.29356783919598e-05, - "loss": 5.2276, - "step": 17484 - }, - { - "epoch": 9.11864406779661, - "grad_norm": 1.3577569723129272, - "learning_rate": 8.293467336683416e-05, - "loss": 5.655, - "step": 17485 - }, - { - "epoch": 9.11916558018253, - "grad_norm": 1.4809763431549072, - "learning_rate": 8.293366834170854e-05, - "loss": 5.503, - "step": 17486 - }, - { - "epoch": 9.119687092568448, - "grad_norm": 1.509393334388733, - "learning_rate": 8.293266331658292e-05, - "loss": 5.3449, - "step": 17487 - }, - { - "epoch": 9.120208604954367, - "grad_norm": 1.4681872129440308, - "learning_rate": 8.29316582914573e-05, - "loss": 5.5672, - "step": 17488 - }, - { - "epoch": 9.120730117340287, - "grad_norm": 1.5185085535049438, - "learning_rate": 8.293065326633166e-05, - "loss": 5.6478, - "step": 17489 - }, - { - "epoch": 9.121251629726206, - "grad_norm": 1.5685802698135376, - "learning_rate": 8.292964824120604e-05, - "loss": 4.8326, - "step": 17490 - }, - { - "epoch": 9.121773142112126, - "grad_norm": 1.4697155952453613, - "learning_rate": 8.29286432160804e-05, - "loss": 5.7857, - "step": 17491 - }, - { - "epoch": 9.122294654498045, - "grad_norm": 1.4286540746688843, - "learning_rate": 8.292763819095478e-05, - "loss": 5.3961, - "step": 17492 - }, - { - "epoch": 9.122816166883963, - "grad_norm": 1.531938910484314, - "learning_rate": 8.292663316582915e-05, - "loss": 5.2973, - "step": 17493 - }, - { - "epoch": 9.123337679269882, - "grad_norm": 1.317456841468811, - "learning_rate": 8.292562814070352e-05, - "loss": 5.5813, - "step": 17494 - }, - { - "epoch": 9.123859191655802, - "grad_norm": 1.4007821083068848, - "learning_rate": 8.292462311557789e-05, - "loss": 5.5097, - "step": 17495 - }, - { - "epoch": 9.124380704041721, - "grad_norm": 1.4721264839172363, - "learning_rate": 8.292361809045227e-05, - "loss": 4.7658, - "step": 17496 - }, - { - "epoch": 9.12490221642764, - "grad_norm": 1.4705638885498047, - "learning_rate": 8.292261306532664e-05, - "loss": 5.6728, - "step": 17497 - }, - { - "epoch": 9.125423728813558, - "grad_norm": 1.3995777368545532, - "learning_rate": 8.292160804020101e-05, - "loss": 5.6023, - "step": 17498 - }, - { - "epoch": 9.125945241199478, - "grad_norm": 1.3988995552062988, - "learning_rate": 8.292060301507539e-05, - "loss": 5.5977, - "step": 17499 - }, - { - "epoch": 9.126466753585397, - "grad_norm": 1.3367202281951904, - "learning_rate": 8.291959798994975e-05, - "loss": 5.9081, - "step": 17500 - }, - { - "epoch": 9.126988265971317, - "grad_norm": 1.4216432571411133, - "learning_rate": 8.291859296482413e-05, - "loss": 5.3788, - "step": 17501 - }, - { - "epoch": 9.127509778357236, - "grad_norm": 1.3712421655654907, - "learning_rate": 8.291758793969849e-05, - "loss": 5.4006, - "step": 17502 - }, - { - "epoch": 9.128031290743156, - "grad_norm": 1.5073949098587036, - "learning_rate": 8.291658291457287e-05, - "loss": 5.3384, - "step": 17503 - }, - { - "epoch": 9.128552803129073, - "grad_norm": 1.4523956775665283, - "learning_rate": 8.291557788944723e-05, - "loss": 5.2102, - "step": 17504 - }, - { - "epoch": 9.129074315514993, - "grad_norm": 1.5132452249526978, - "learning_rate": 8.291457286432161e-05, - "loss": 5.1829, - "step": 17505 - }, - { - "epoch": 9.129595827900912, - "grad_norm": 1.4598431587219238, - "learning_rate": 8.291356783919598e-05, - "loss": 5.5705, - "step": 17506 - }, - { - "epoch": 9.130117340286832, - "grad_norm": 1.4349600076675415, - "learning_rate": 8.291256281407035e-05, - "loss": 5.5997, - "step": 17507 - }, - { - "epoch": 9.130638852672751, - "grad_norm": 1.4027385711669922, - "learning_rate": 8.291155778894473e-05, - "loss": 5.9634, - "step": 17508 - }, - { - "epoch": 9.13116036505867, - "grad_norm": 1.5839043855667114, - "learning_rate": 8.291055276381911e-05, - "loss": 5.7256, - "step": 17509 - }, - { - "epoch": 9.131681877444588, - "grad_norm": 1.6967203617095947, - "learning_rate": 8.290954773869347e-05, - "loss": 5.18, - "step": 17510 - }, - { - "epoch": 9.132203389830508, - "grad_norm": 1.4456632137298584, - "learning_rate": 8.290854271356784e-05, - "loss": 5.4095, - "step": 17511 - }, - { - "epoch": 9.132724902216427, - "grad_norm": 1.3805888891220093, - "learning_rate": 8.290753768844222e-05, - "loss": 5.6674, - "step": 17512 - }, - { - "epoch": 9.133246414602347, - "grad_norm": 1.3847852945327759, - "learning_rate": 8.290653266331658e-05, - "loss": 5.7867, - "step": 17513 - }, - { - "epoch": 9.133767926988266, - "grad_norm": 1.4477274417877197, - "learning_rate": 8.290552763819096e-05, - "loss": 5.0835, - "step": 17514 - }, - { - "epoch": 9.134289439374186, - "grad_norm": 1.3245892524719238, - "learning_rate": 8.290452261306532e-05, - "loss": 5.2861, - "step": 17515 - }, - { - "epoch": 9.134810951760103, - "grad_norm": 1.3178510665893555, - "learning_rate": 8.29035175879397e-05, - "loss": 5.1574, - "step": 17516 - }, - { - "epoch": 9.135332464146023, - "grad_norm": 1.418753743171692, - "learning_rate": 8.290251256281408e-05, - "loss": 5.545, - "step": 17517 - }, - { - "epoch": 9.135853976531942, - "grad_norm": 1.3742074966430664, - "learning_rate": 8.290150753768846e-05, - "loss": 5.7524, - "step": 17518 - }, - { - "epoch": 9.136375488917862, - "grad_norm": 1.5047922134399414, - "learning_rate": 8.290050251256282e-05, - "loss": 5.403, - "step": 17519 - }, - { - "epoch": 9.136897001303781, - "grad_norm": 1.4077062606811523, - "learning_rate": 8.28994974874372e-05, - "loss": 5.6404, - "step": 17520 - }, - { - "epoch": 9.1374185136897, - "grad_norm": 1.5135817527770996, - "learning_rate": 8.289849246231156e-05, - "loss": 5.0699, - "step": 17521 - }, - { - "epoch": 9.137940026075619, - "grad_norm": 1.4100223779678345, - "learning_rate": 8.289748743718594e-05, - "loss": 5.4393, - "step": 17522 - }, - { - "epoch": 9.138461538461538, - "grad_norm": 1.3604862689971924, - "learning_rate": 8.28964824120603e-05, - "loss": 5.8098, - "step": 17523 - }, - { - "epoch": 9.138983050847457, - "grad_norm": 1.3384654521942139, - "learning_rate": 8.289547738693467e-05, - "loss": 4.9263, - "step": 17524 - }, - { - "epoch": 9.139504563233377, - "grad_norm": 1.4652363061904907, - "learning_rate": 8.289447236180905e-05, - "loss": 5.4594, - "step": 17525 - }, - { - "epoch": 9.140026075619296, - "grad_norm": 1.56032133102417, - "learning_rate": 8.289346733668341e-05, - "loss": 5.3084, - "step": 17526 - }, - { - "epoch": 9.140547588005216, - "grad_norm": 1.4480454921722412, - "learning_rate": 8.289246231155779e-05, - "loss": 5.663, - "step": 17527 - }, - { - "epoch": 9.141069100391134, - "grad_norm": 1.4233883619308472, - "learning_rate": 8.289145728643217e-05, - "loss": 5.6823, - "step": 17528 - }, - { - "epoch": 9.141590612777053, - "grad_norm": 1.4505610466003418, - "learning_rate": 8.289045226130654e-05, - "loss": 5.2032, - "step": 17529 - }, - { - "epoch": 9.142112125162972, - "grad_norm": 1.4688643217086792, - "learning_rate": 8.288944723618091e-05, - "loss": 5.4064, - "step": 17530 - }, - { - "epoch": 9.142633637548892, - "grad_norm": 1.4583970308303833, - "learning_rate": 8.288844221105529e-05, - "loss": 5.5059, - "step": 17531 - }, - { - "epoch": 9.143155149934811, - "grad_norm": 1.4218381643295288, - "learning_rate": 8.288743718592965e-05, - "loss": 5.2915, - "step": 17532 - }, - { - "epoch": 9.14367666232073, - "grad_norm": 1.5849950313568115, - "learning_rate": 8.288643216080403e-05, - "loss": 5.0037, - "step": 17533 - }, - { - "epoch": 9.144198174706649, - "grad_norm": 1.4498271942138672, - "learning_rate": 8.288542713567839e-05, - "loss": 5.5432, - "step": 17534 - }, - { - "epoch": 9.144719687092568, - "grad_norm": 1.4505702257156372, - "learning_rate": 8.288442211055277e-05, - "loss": 5.26, - "step": 17535 - }, - { - "epoch": 9.145241199478487, - "grad_norm": 1.4658080339431763, - "learning_rate": 8.288341708542714e-05, - "loss": 5.5097, - "step": 17536 - }, - { - "epoch": 9.145762711864407, - "grad_norm": 1.3829625844955444, - "learning_rate": 8.288241206030151e-05, - "loss": 5.7773, - "step": 17537 - }, - { - "epoch": 9.146284224250326, - "grad_norm": 1.4326821565628052, - "learning_rate": 8.288140703517589e-05, - "loss": 5.5707, - "step": 17538 - }, - { - "epoch": 9.146805736636246, - "grad_norm": 1.4130661487579346, - "learning_rate": 8.288040201005025e-05, - "loss": 5.5991, - "step": 17539 - }, - { - "epoch": 9.147327249022164, - "grad_norm": 1.3829634189605713, - "learning_rate": 8.287939698492463e-05, - "loss": 5.5918, - "step": 17540 - }, - { - "epoch": 9.147848761408083, - "grad_norm": 1.3999228477478027, - "learning_rate": 8.2878391959799e-05, - "loss": 5.5576, - "step": 17541 - }, - { - "epoch": 9.148370273794002, - "grad_norm": 1.4937453269958496, - "learning_rate": 8.287738693467337e-05, - "loss": 5.6063, - "step": 17542 - }, - { - "epoch": 9.148891786179922, - "grad_norm": 1.5668882131576538, - "learning_rate": 8.287638190954774e-05, - "loss": 4.865, - "step": 17543 - }, - { - "epoch": 9.149413298565841, - "grad_norm": 1.6915048360824585, - "learning_rate": 8.287537688442212e-05, - "loss": 5.0527, - "step": 17544 - }, - { - "epoch": 9.14993481095176, - "grad_norm": 1.525158166885376, - "learning_rate": 8.287437185929648e-05, - "loss": 5.3446, - "step": 17545 - }, - { - "epoch": 9.150456323337679, - "grad_norm": 1.420320749282837, - "learning_rate": 8.287336683417086e-05, - "loss": 5.8751, - "step": 17546 - }, - { - "epoch": 9.150977835723598, - "grad_norm": 1.4821721315383911, - "learning_rate": 8.287236180904522e-05, - "loss": 5.5025, - "step": 17547 - }, - { - "epoch": 9.151499348109517, - "grad_norm": 1.4563132524490356, - "learning_rate": 8.28713567839196e-05, - "loss": 5.3199, - "step": 17548 - }, - { - "epoch": 9.152020860495437, - "grad_norm": 1.3764809370040894, - "learning_rate": 8.287035175879398e-05, - "loss": 5.6727, - "step": 17549 - }, - { - "epoch": 9.152542372881356, - "grad_norm": 1.3452060222625732, - "learning_rate": 8.286934673366834e-05, - "loss": 5.5378, - "step": 17550 - }, - { - "epoch": 9.153063885267276, - "grad_norm": 1.4641693830490112, - "learning_rate": 8.286834170854272e-05, - "loss": 5.5167, - "step": 17551 - }, - { - "epoch": 9.153585397653194, - "grad_norm": 1.349853515625, - "learning_rate": 8.286733668341709e-05, - "loss": 5.7151, - "step": 17552 - }, - { - "epoch": 9.154106910039113, - "grad_norm": 1.6118121147155762, - "learning_rate": 8.286633165829146e-05, - "loss": 5.4501, - "step": 17553 - }, - { - "epoch": 9.154628422425032, - "grad_norm": 1.591229796409607, - "learning_rate": 8.286532663316583e-05, - "loss": 5.3909, - "step": 17554 - }, - { - "epoch": 9.155149934810952, - "grad_norm": 1.4568344354629517, - "learning_rate": 8.28643216080402e-05, - "loss": 5.4359, - "step": 17555 - }, - { - "epoch": 9.155671447196871, - "grad_norm": 1.4254376888275146, - "learning_rate": 8.286331658291457e-05, - "loss": 5.698, - "step": 17556 - }, - { - "epoch": 9.156192959582791, - "grad_norm": 1.3674976825714111, - "learning_rate": 8.286231155778895e-05, - "loss": 5.515, - "step": 17557 - }, - { - "epoch": 9.156714471968709, - "grad_norm": 1.417150616645813, - "learning_rate": 8.286130653266333e-05, - "loss": 5.5988, - "step": 17558 - }, - { - "epoch": 9.157235984354628, - "grad_norm": 1.5065217018127441, - "learning_rate": 8.28603015075377e-05, - "loss": 4.7749, - "step": 17559 - }, - { - "epoch": 9.157757496740548, - "grad_norm": 1.4005374908447266, - "learning_rate": 8.285929648241207e-05, - "loss": 5.6332, - "step": 17560 - }, - { - "epoch": 9.158279009126467, - "grad_norm": 1.4230185747146606, - "learning_rate": 8.285829145728645e-05, - "loss": 5.9786, - "step": 17561 - }, - { - "epoch": 9.158800521512386, - "grad_norm": 1.3802645206451416, - "learning_rate": 8.285728643216081e-05, - "loss": 5.6485, - "step": 17562 - }, - { - "epoch": 9.159322033898306, - "grad_norm": 1.4748741388320923, - "learning_rate": 8.285628140703517e-05, - "loss": 5.4553, - "step": 17563 - }, - { - "epoch": 9.159843546284224, - "grad_norm": 1.4272775650024414, - "learning_rate": 8.285527638190955e-05, - "loss": 5.9084, - "step": 17564 - }, - { - "epoch": 9.160365058670143, - "grad_norm": 1.4472540616989136, - "learning_rate": 8.285427135678392e-05, - "loss": 5.4636, - "step": 17565 - }, - { - "epoch": 9.160886571056063, - "grad_norm": 1.4482941627502441, - "learning_rate": 8.28532663316583e-05, - "loss": 5.3368, - "step": 17566 - }, - { - "epoch": 9.161408083441982, - "grad_norm": 1.3999342918395996, - "learning_rate": 8.285226130653266e-05, - "loss": 5.6753, - "step": 17567 - }, - { - "epoch": 9.161929595827901, - "grad_norm": 1.4229156970977783, - "learning_rate": 8.285125628140704e-05, - "loss": 5.4792, - "step": 17568 - }, - { - "epoch": 9.162451108213821, - "grad_norm": 1.405462622642517, - "learning_rate": 8.285025125628141e-05, - "loss": 5.5454, - "step": 17569 - }, - { - "epoch": 9.162972620599739, - "grad_norm": 1.282599925994873, - "learning_rate": 8.284924623115579e-05, - "loss": 5.6419, - "step": 17570 - }, - { - "epoch": 9.163494132985658, - "grad_norm": 1.30425226688385, - "learning_rate": 8.284824120603016e-05, - "loss": 5.8811, - "step": 17571 - }, - { - "epoch": 9.164015645371578, - "grad_norm": 1.3920621871948242, - "learning_rate": 8.284723618090453e-05, - "loss": 5.601, - "step": 17572 - }, - { - "epoch": 9.164537157757497, - "grad_norm": 1.48684823513031, - "learning_rate": 8.28462311557789e-05, - "loss": 5.3812, - "step": 17573 - }, - { - "epoch": 9.165058670143416, - "grad_norm": 1.463987112045288, - "learning_rate": 8.284522613065328e-05, - "loss": 5.4253, - "step": 17574 - }, - { - "epoch": 9.165580182529336, - "grad_norm": 1.3474451303482056, - "learning_rate": 8.284422110552764e-05, - "loss": 5.3419, - "step": 17575 - }, - { - "epoch": 9.166101694915254, - "grad_norm": 1.5164052248001099, - "learning_rate": 8.2843216080402e-05, - "loss": 5.5013, - "step": 17576 - }, - { - "epoch": 9.166623207301173, - "grad_norm": 1.4014441967010498, - "learning_rate": 8.284221105527638e-05, - "loss": 5.656, - "step": 17577 - }, - { - "epoch": 9.167144719687093, - "grad_norm": 1.3937108516693115, - "learning_rate": 8.284120603015076e-05, - "loss": 5.4745, - "step": 17578 - }, - { - "epoch": 9.167666232073012, - "grad_norm": 1.3265430927276611, - "learning_rate": 8.284020100502514e-05, - "loss": 5.7653, - "step": 17579 - }, - { - "epoch": 9.168187744458931, - "grad_norm": 1.3760735988616943, - "learning_rate": 8.28391959798995e-05, - "loss": 5.7452, - "step": 17580 - }, - { - "epoch": 9.16870925684485, - "grad_norm": 1.3954495191574097, - "learning_rate": 8.283819095477388e-05, - "loss": 5.6599, - "step": 17581 - }, - { - "epoch": 9.169230769230769, - "grad_norm": 1.5597609281539917, - "learning_rate": 8.283718592964824e-05, - "loss": 5.8035, - "step": 17582 - }, - { - "epoch": 9.169752281616688, - "grad_norm": 1.377284049987793, - "learning_rate": 8.283618090452262e-05, - "loss": 5.7174, - "step": 17583 - }, - { - "epoch": 9.170273794002608, - "grad_norm": 1.4259051084518433, - "learning_rate": 8.283517587939699e-05, - "loss": 5.7643, - "step": 17584 - }, - { - "epoch": 9.170795306388527, - "grad_norm": 1.56025230884552, - "learning_rate": 8.283417085427136e-05, - "loss": 4.7893, - "step": 17585 - }, - { - "epoch": 9.171316818774446, - "grad_norm": 1.4201688766479492, - "learning_rate": 8.283316582914573e-05, - "loss": 5.2393, - "step": 17586 - }, - { - "epoch": 9.171838331160366, - "grad_norm": 1.4882559776306152, - "learning_rate": 8.28321608040201e-05, - "loss": 5.1553, - "step": 17587 - }, - { - "epoch": 9.172359843546284, - "grad_norm": 1.48074471950531, - "learning_rate": 8.283115577889447e-05, - "loss": 5.1222, - "step": 17588 - }, - { - "epoch": 9.172881355932203, - "grad_norm": 1.3785502910614014, - "learning_rate": 8.283015075376885e-05, - "loss": 5.5778, - "step": 17589 - }, - { - "epoch": 9.173402868318123, - "grad_norm": 1.4323077201843262, - "learning_rate": 8.282914572864323e-05, - "loss": 5.7063, - "step": 17590 - }, - { - "epoch": 9.173924380704042, - "grad_norm": 1.4603569507598877, - "learning_rate": 8.282814070351759e-05, - "loss": 5.5582, - "step": 17591 - }, - { - "epoch": 9.174445893089962, - "grad_norm": 1.5300102233886719, - "learning_rate": 8.282713567839197e-05, - "loss": 5.5465, - "step": 17592 - }, - { - "epoch": 9.17496740547588, - "grad_norm": 1.458500623703003, - "learning_rate": 8.282613065326633e-05, - "loss": 5.1887, - "step": 17593 - }, - { - "epoch": 9.175488917861799, - "grad_norm": 1.482334017753601, - "learning_rate": 8.282512562814071e-05, - "loss": 5.401, - "step": 17594 - }, - { - "epoch": 9.176010430247718, - "grad_norm": 1.440600037574768, - "learning_rate": 8.282412060301507e-05, - "loss": 5.6056, - "step": 17595 - }, - { - "epoch": 9.176531942633638, - "grad_norm": 1.4747061729431152, - "learning_rate": 8.282311557788945e-05, - "loss": 5.5679, - "step": 17596 - }, - { - "epoch": 9.177053455019557, - "grad_norm": 1.4951287508010864, - "learning_rate": 8.282211055276382e-05, - "loss": 5.5671, - "step": 17597 - }, - { - "epoch": 9.177574967405477, - "grad_norm": 1.364728569984436, - "learning_rate": 8.28211055276382e-05, - "loss": 5.63, - "step": 17598 - }, - { - "epoch": 9.178096479791394, - "grad_norm": 1.4617865085601807, - "learning_rate": 8.282010050251256e-05, - "loss": 5.2295, - "step": 17599 - }, - { - "epoch": 9.178617992177314, - "grad_norm": 1.9245483875274658, - "learning_rate": 8.281909547738694e-05, - "loss": 5.4074, - "step": 17600 - }, - { - "epoch": 9.179139504563233, - "grad_norm": 1.4554659128189087, - "learning_rate": 8.281809045226131e-05, - "loss": 5.7127, - "step": 17601 - }, - { - "epoch": 9.179661016949153, - "grad_norm": 1.355150818824768, - "learning_rate": 8.281708542713569e-05, - "loss": 5.7414, - "step": 17602 - }, - { - "epoch": 9.180182529335072, - "grad_norm": 1.4895449876785278, - "learning_rate": 8.281608040201006e-05, - "loss": 5.5627, - "step": 17603 - }, - { - "epoch": 9.180704041720992, - "grad_norm": 1.3890935182571411, - "learning_rate": 8.281507537688442e-05, - "loss": 5.4939, - "step": 17604 - }, - { - "epoch": 9.18122555410691, - "grad_norm": 1.5726248025894165, - "learning_rate": 8.28140703517588e-05, - "loss": 5.2488, - "step": 17605 - }, - { - "epoch": 9.181747066492829, - "grad_norm": 1.4720786809921265, - "learning_rate": 8.281306532663316e-05, - "loss": 5.9398, - "step": 17606 - }, - { - "epoch": 9.182268578878748, - "grad_norm": 1.4492038488388062, - "learning_rate": 8.281206030150754e-05, - "loss": 5.5169, - "step": 17607 - }, - { - "epoch": 9.182790091264668, - "grad_norm": 1.5100561380386353, - "learning_rate": 8.28110552763819e-05, - "loss": 5.426, - "step": 17608 - }, - { - "epoch": 9.183311603650587, - "grad_norm": 1.3984851837158203, - "learning_rate": 8.281005025125628e-05, - "loss": 5.514, - "step": 17609 - }, - { - "epoch": 9.183833116036507, - "grad_norm": 1.3936129808425903, - "learning_rate": 8.280904522613066e-05, - "loss": 5.6657, - "step": 17610 - }, - { - "epoch": 9.184354628422424, - "grad_norm": 1.3618017435073853, - "learning_rate": 8.280804020100504e-05, - "loss": 5.5397, - "step": 17611 - }, - { - "epoch": 9.184876140808344, - "grad_norm": 1.537179946899414, - "learning_rate": 8.28070351758794e-05, - "loss": 5.4335, - "step": 17612 - }, - { - "epoch": 9.185397653194263, - "grad_norm": 1.502042293548584, - "learning_rate": 8.280603015075378e-05, - "loss": 5.4586, - "step": 17613 - }, - { - "epoch": 9.185919165580183, - "grad_norm": 1.4345784187316895, - "learning_rate": 8.280502512562814e-05, - "loss": 5.5487, - "step": 17614 - }, - { - "epoch": 9.186440677966102, - "grad_norm": 1.3357983827590942, - "learning_rate": 8.280402010050252e-05, - "loss": 4.9328, - "step": 17615 - }, - { - "epoch": 9.186962190352022, - "grad_norm": 1.6318801641464233, - "learning_rate": 8.280301507537689e-05, - "loss": 5.2136, - "step": 17616 - }, - { - "epoch": 9.18748370273794, - "grad_norm": 1.5728073120117188, - "learning_rate": 8.280201005025125e-05, - "loss": 5.1954, - "step": 17617 - }, - { - "epoch": 9.188005215123859, - "grad_norm": 1.7080765962600708, - "learning_rate": 8.280100502512563e-05, - "loss": 5.4728, - "step": 17618 - }, - { - "epoch": 9.188526727509778, - "grad_norm": 1.4476102590560913, - "learning_rate": 8.28e-05, - "loss": 4.7064, - "step": 17619 - }, - { - "epoch": 9.189048239895698, - "grad_norm": 1.7387069463729858, - "learning_rate": 8.279899497487437e-05, - "loss": 5.3665, - "step": 17620 - }, - { - "epoch": 9.189569752281617, - "grad_norm": 1.3859039545059204, - "learning_rate": 8.279798994974875e-05, - "loss": 5.635, - "step": 17621 - }, - { - "epoch": 9.190091264667537, - "grad_norm": 1.3331228494644165, - "learning_rate": 8.279698492462313e-05, - "loss": 5.6012, - "step": 17622 - }, - { - "epoch": 9.190612777053454, - "grad_norm": 1.3810651302337646, - "learning_rate": 8.279597989949749e-05, - "loss": 5.4908, - "step": 17623 - }, - { - "epoch": 9.191134289439374, - "grad_norm": 1.423636555671692, - "learning_rate": 8.279497487437187e-05, - "loss": 5.4396, - "step": 17624 - }, - { - "epoch": 9.191655801825293, - "grad_norm": 1.4310568571090698, - "learning_rate": 8.279396984924623e-05, - "loss": 5.5062, - "step": 17625 - }, - { - "epoch": 9.192177314211213, - "grad_norm": 1.2940860986709595, - "learning_rate": 8.279296482412061e-05, - "loss": 5.7708, - "step": 17626 - }, - { - "epoch": 9.192698826597132, - "grad_norm": 1.371362328529358, - "learning_rate": 8.279195979899498e-05, - "loss": 5.4643, - "step": 17627 - }, - { - "epoch": 9.193220338983052, - "grad_norm": 1.3716537952423096, - "learning_rate": 8.279095477386935e-05, - "loss": 5.6986, - "step": 17628 - }, - { - "epoch": 9.19374185136897, - "grad_norm": 1.4497904777526855, - "learning_rate": 8.278994974874372e-05, - "loss": 5.6092, - "step": 17629 - }, - { - "epoch": 9.194263363754889, - "grad_norm": 1.3885223865509033, - "learning_rate": 8.27889447236181e-05, - "loss": 5.7769, - "step": 17630 - }, - { - "epoch": 9.194784876140808, - "grad_norm": 1.4163585901260376, - "learning_rate": 8.278793969849247e-05, - "loss": 5.6964, - "step": 17631 - }, - { - "epoch": 9.195306388526728, - "grad_norm": 1.423647165298462, - "learning_rate": 8.278693467336684e-05, - "loss": 5.7558, - "step": 17632 - }, - { - "epoch": 9.195827900912647, - "grad_norm": 1.3835922479629517, - "learning_rate": 8.278592964824122e-05, - "loss": 5.5271, - "step": 17633 - }, - { - "epoch": 9.196349413298567, - "grad_norm": 1.454591155052185, - "learning_rate": 8.278492462311558e-05, - "loss": 5.5688, - "step": 17634 - }, - { - "epoch": 9.196870925684484, - "grad_norm": 1.486068844795227, - "learning_rate": 8.278391959798996e-05, - "loss": 5.4146, - "step": 17635 - }, - { - "epoch": 9.197392438070404, - "grad_norm": 1.4386987686157227, - "learning_rate": 8.278291457286432e-05, - "loss": 5.4193, - "step": 17636 - }, - { - "epoch": 9.197913950456323, - "grad_norm": 1.450072169303894, - "learning_rate": 8.27819095477387e-05, - "loss": 5.8757, - "step": 17637 - }, - { - "epoch": 9.198435462842243, - "grad_norm": 1.4008394479751587, - "learning_rate": 8.278090452261306e-05, - "loss": 5.4398, - "step": 17638 - }, - { - "epoch": 9.198956975228162, - "grad_norm": 1.447314739227295, - "learning_rate": 8.277989949748744e-05, - "loss": 5.1488, - "step": 17639 - }, - { - "epoch": 9.199478487614082, - "grad_norm": 1.4682559967041016, - "learning_rate": 8.27788944723618e-05, - "loss": 4.8958, - "step": 17640 - }, - { - "epoch": 9.2, - "grad_norm": 1.4638503789901733, - "learning_rate": 8.277788944723618e-05, - "loss": 5.4602, - "step": 17641 - }, - { - "epoch": 9.200521512385919, - "grad_norm": 1.723026156425476, - "learning_rate": 8.277688442211056e-05, - "loss": 4.9565, - "step": 17642 - }, - { - "epoch": 9.201043024771838, - "grad_norm": 1.330110788345337, - "learning_rate": 8.277587939698493e-05, - "loss": 5.6894, - "step": 17643 - }, - { - "epoch": 9.201564537157758, - "grad_norm": 1.5313704013824463, - "learning_rate": 8.27748743718593e-05, - "loss": 5.2646, - "step": 17644 - }, - { - "epoch": 9.202086049543677, - "grad_norm": 1.430442214012146, - "learning_rate": 8.277386934673367e-05, - "loss": 5.5454, - "step": 17645 - }, - { - "epoch": 9.202607561929597, - "grad_norm": 1.3994237184524536, - "learning_rate": 8.277286432160805e-05, - "loss": 5.5042, - "step": 17646 - }, - { - "epoch": 9.203129074315514, - "grad_norm": 1.3784518241882324, - "learning_rate": 8.277185929648241e-05, - "loss": 5.2501, - "step": 17647 - }, - { - "epoch": 9.203650586701434, - "grad_norm": 1.4315900802612305, - "learning_rate": 8.277085427135679e-05, - "loss": 5.4179, - "step": 17648 - }, - { - "epoch": 9.204172099087353, - "grad_norm": 1.4309481382369995, - "learning_rate": 8.276984924623115e-05, - "loss": 5.5097, - "step": 17649 - }, - { - "epoch": 9.204693611473273, - "grad_norm": 1.5385570526123047, - "learning_rate": 8.276884422110553e-05, - "loss": 5.3369, - "step": 17650 - }, - { - "epoch": 9.205215123859192, - "grad_norm": 1.3369423151016235, - "learning_rate": 8.276783919597991e-05, - "loss": 5.7796, - "step": 17651 - }, - { - "epoch": 9.205736636245112, - "grad_norm": 3.2356762886047363, - "learning_rate": 8.276683417085429e-05, - "loss": 4.7624, - "step": 17652 - }, - { - "epoch": 9.20625814863103, - "grad_norm": 1.574173927307129, - "learning_rate": 8.276582914572865e-05, - "loss": 5.7549, - "step": 17653 - }, - { - "epoch": 9.206779661016949, - "grad_norm": 1.439113736152649, - "learning_rate": 8.276482412060303e-05, - "loss": 5.7702, - "step": 17654 - }, - { - "epoch": 9.207301173402868, - "grad_norm": 1.8748517036437988, - "learning_rate": 8.276381909547739e-05, - "loss": 5.1836, - "step": 17655 - }, - { - "epoch": 9.207822685788788, - "grad_norm": 1.4921178817749023, - "learning_rate": 8.276281407035176e-05, - "loss": 5.6261, - "step": 17656 - }, - { - "epoch": 9.208344198174707, - "grad_norm": 1.6219825744628906, - "learning_rate": 8.276180904522613e-05, - "loss": 5.3317, - "step": 17657 - }, - { - "epoch": 9.208865710560627, - "grad_norm": 1.4128663539886475, - "learning_rate": 8.27608040201005e-05, - "loss": 5.7334, - "step": 17658 - }, - { - "epoch": 9.209387222946544, - "grad_norm": 1.4538252353668213, - "learning_rate": 8.275979899497488e-05, - "loss": 5.2933, - "step": 17659 - }, - { - "epoch": 9.209908735332464, - "grad_norm": 1.4339607954025269, - "learning_rate": 8.275879396984924e-05, - "loss": 4.9229, - "step": 17660 - }, - { - "epoch": 9.210430247718383, - "grad_norm": 1.4889628887176514, - "learning_rate": 8.275778894472362e-05, - "loss": 5.8008, - "step": 17661 - }, - { - "epoch": 9.210951760104303, - "grad_norm": 1.4561161994934082, - "learning_rate": 8.2756783919598e-05, - "loss": 5.6648, - "step": 17662 - }, - { - "epoch": 9.211473272490222, - "grad_norm": 1.4771101474761963, - "learning_rate": 8.275577889447237e-05, - "loss": 5.5873, - "step": 17663 - }, - { - "epoch": 9.211994784876142, - "grad_norm": 1.453764796257019, - "learning_rate": 8.275477386934674e-05, - "loss": 5.1223, - "step": 17664 - }, - { - "epoch": 9.21251629726206, - "grad_norm": 1.5894732475280762, - "learning_rate": 8.275376884422112e-05, - "loss": 5.1655, - "step": 17665 - }, - { - "epoch": 9.213037809647979, - "grad_norm": 1.532426118850708, - "learning_rate": 8.275276381909548e-05, - "loss": 5.0926, - "step": 17666 - }, - { - "epoch": 9.213559322033898, - "grad_norm": 1.4770989418029785, - "learning_rate": 8.275175879396986e-05, - "loss": 5.681, - "step": 17667 - }, - { - "epoch": 9.214080834419818, - "grad_norm": 1.4984917640686035, - "learning_rate": 8.275075376884422e-05, - "loss": 5.4786, - "step": 17668 - }, - { - "epoch": 9.214602346805737, - "grad_norm": 1.602663278579712, - "learning_rate": 8.274974874371859e-05, - "loss": 5.2213, - "step": 17669 - }, - { - "epoch": 9.215123859191657, - "grad_norm": 1.6268552541732788, - "learning_rate": 8.274874371859296e-05, - "loss": 5.5139, - "step": 17670 - }, - { - "epoch": 9.215645371577574, - "grad_norm": 1.418068528175354, - "learning_rate": 8.274773869346734e-05, - "loss": 5.5173, - "step": 17671 - }, - { - "epoch": 9.216166883963494, - "grad_norm": 1.482157826423645, - "learning_rate": 8.274673366834172e-05, - "loss": 5.327, - "step": 17672 - }, - { - "epoch": 9.216688396349413, - "grad_norm": 1.4569991827011108, - "learning_rate": 8.274572864321608e-05, - "loss": 5.6689, - "step": 17673 - }, - { - "epoch": 9.217209908735333, - "grad_norm": 1.5634193420410156, - "learning_rate": 8.274472361809046e-05, - "loss": 5.5763, - "step": 17674 - }, - { - "epoch": 9.217731421121252, - "grad_norm": 1.4919040203094482, - "learning_rate": 8.274371859296483e-05, - "loss": 5.6473, - "step": 17675 - }, - { - "epoch": 9.21825293350717, - "grad_norm": 1.368727684020996, - "learning_rate": 8.27427135678392e-05, - "loss": 5.3042, - "step": 17676 - }, - { - "epoch": 9.21877444589309, - "grad_norm": 1.451511025428772, - "learning_rate": 8.274170854271357e-05, - "loss": 5.0175, - "step": 17677 - }, - { - "epoch": 9.219295958279009, - "grad_norm": 1.4704653024673462, - "learning_rate": 8.274070351758795e-05, - "loss": 5.8526, - "step": 17678 - }, - { - "epoch": 9.219817470664928, - "grad_norm": 1.3986951112747192, - "learning_rate": 8.273969849246231e-05, - "loss": 5.101, - "step": 17679 - }, - { - "epoch": 9.220338983050848, - "grad_norm": 1.4220006465911865, - "learning_rate": 8.273869346733669e-05, - "loss": 6.0345, - "step": 17680 - }, - { - "epoch": 9.220860495436767, - "grad_norm": 1.3828160762786865, - "learning_rate": 8.273768844221105e-05, - "loss": 5.365, - "step": 17681 - }, - { - "epoch": 9.221382007822687, - "grad_norm": 1.3801802396774292, - "learning_rate": 8.273668341708543e-05, - "loss": 5.4237, - "step": 17682 - }, - { - "epoch": 9.221903520208604, - "grad_norm": 1.5068162679672241, - "learning_rate": 8.273567839195981e-05, - "loss": 5.5745, - "step": 17683 - }, - { - "epoch": 9.222425032594524, - "grad_norm": 1.4924283027648926, - "learning_rate": 8.273467336683417e-05, - "loss": 5.5884, - "step": 17684 - }, - { - "epoch": 9.222946544980443, - "grad_norm": 1.5309468507766724, - "learning_rate": 8.273366834170855e-05, - "loss": 5.0803, - "step": 17685 - }, - { - "epoch": 9.223468057366363, - "grad_norm": 1.3742297887802124, - "learning_rate": 8.273266331658291e-05, - "loss": 5.3688, - "step": 17686 - }, - { - "epoch": 9.223989569752282, - "grad_norm": 1.4030663967132568, - "learning_rate": 8.273165829145729e-05, - "loss": 5.4677, - "step": 17687 - }, - { - "epoch": 9.2245110821382, - "grad_norm": 1.4440110921859741, - "learning_rate": 8.273065326633166e-05, - "loss": 5.3977, - "step": 17688 - }, - { - "epoch": 9.22503259452412, - "grad_norm": 1.4401971101760864, - "learning_rate": 8.272964824120603e-05, - "loss": 5.4229, - "step": 17689 - }, - { - "epoch": 9.225554106910039, - "grad_norm": 1.55340576171875, - "learning_rate": 8.27286432160804e-05, - "loss": 4.9109, - "step": 17690 - }, - { - "epoch": 9.226075619295958, - "grad_norm": 1.7044262886047363, - "learning_rate": 8.272763819095478e-05, - "loss": 5.547, - "step": 17691 - }, - { - "epoch": 9.226597131681878, - "grad_norm": 1.5394755601882935, - "learning_rate": 8.272663316582915e-05, - "loss": 5.6826, - "step": 17692 - }, - { - "epoch": 9.227118644067797, - "grad_norm": 1.85234534740448, - "learning_rate": 8.272562814070353e-05, - "loss": 5.19, - "step": 17693 - }, - { - "epoch": 9.227640156453715, - "grad_norm": 1.628873348236084, - "learning_rate": 8.27246231155779e-05, - "loss": 5.614, - "step": 17694 - }, - { - "epoch": 9.228161668839634, - "grad_norm": 1.4411709308624268, - "learning_rate": 8.272361809045227e-05, - "loss": 5.6469, - "step": 17695 - }, - { - "epoch": 9.228683181225554, - "grad_norm": 1.5104748010635376, - "learning_rate": 8.272261306532664e-05, - "loss": 5.5845, - "step": 17696 - }, - { - "epoch": 9.229204693611473, - "grad_norm": 1.5508170127868652, - "learning_rate": 8.2721608040201e-05, - "loss": 5.5121, - "step": 17697 - }, - { - "epoch": 9.229726205997393, - "grad_norm": 1.5393257141113281, - "learning_rate": 8.272060301507538e-05, - "loss": 5.7822, - "step": 17698 - }, - { - "epoch": 9.230247718383312, - "grad_norm": 1.4890458583831787, - "learning_rate": 8.271959798994975e-05, - "loss": 5.1785, - "step": 17699 - }, - { - "epoch": 9.23076923076923, - "grad_norm": 1.4417632818222046, - "learning_rate": 8.271859296482412e-05, - "loss": 5.4486, - "step": 17700 - }, - { - "epoch": 9.23129074315515, - "grad_norm": 1.496584415435791, - "learning_rate": 8.271758793969849e-05, - "loss": 5.3299, - "step": 17701 - }, - { - "epoch": 9.231812255541069, - "grad_norm": 1.4363856315612793, - "learning_rate": 8.271658291457287e-05, - "loss": 5.5088, - "step": 17702 - }, - { - "epoch": 9.232333767926988, - "grad_norm": 1.4310808181762695, - "learning_rate": 8.271557788944724e-05, - "loss": 5.2238, - "step": 17703 - }, - { - "epoch": 9.232855280312908, - "grad_norm": 1.5458145141601562, - "learning_rate": 8.271457286432162e-05, - "loss": 5.2272, - "step": 17704 - }, - { - "epoch": 9.233376792698827, - "grad_norm": 1.4364433288574219, - "learning_rate": 8.271356783919599e-05, - "loss": 5.7206, - "step": 17705 - }, - { - "epoch": 9.233898305084745, - "grad_norm": 1.3885798454284668, - "learning_rate": 8.271256281407036e-05, - "loss": 5.6752, - "step": 17706 - }, - { - "epoch": 9.234419817470664, - "grad_norm": 1.825056791305542, - "learning_rate": 8.271155778894473e-05, - "loss": 4.8135, - "step": 17707 - }, - { - "epoch": 9.234941329856584, - "grad_norm": 1.501104712486267, - "learning_rate": 8.27105527638191e-05, - "loss": 5.6924, - "step": 17708 - }, - { - "epoch": 9.235462842242503, - "grad_norm": 1.4547303915023804, - "learning_rate": 8.270954773869347e-05, - "loss": 5.4298, - "step": 17709 - }, - { - "epoch": 9.235984354628423, - "grad_norm": 1.5359396934509277, - "learning_rate": 8.270854271356783e-05, - "loss": 5.3711, - "step": 17710 - }, - { - "epoch": 9.236505867014342, - "grad_norm": 1.6270534992218018, - "learning_rate": 8.270753768844221e-05, - "loss": 5.1488, - "step": 17711 - }, - { - "epoch": 9.23702737940026, - "grad_norm": 1.5204668045043945, - "learning_rate": 8.270653266331659e-05, - "loss": 5.4652, - "step": 17712 - }, - { - "epoch": 9.23754889178618, - "grad_norm": 1.4600553512573242, - "learning_rate": 8.270552763819097e-05, - "loss": 5.8536, - "step": 17713 - }, - { - "epoch": 9.238070404172099, - "grad_norm": 1.570419192314148, - "learning_rate": 8.270452261306533e-05, - "loss": 5.7535, - "step": 17714 - }, - { - "epoch": 9.238591916558018, - "grad_norm": 1.5596985816955566, - "learning_rate": 8.270351758793971e-05, - "loss": 4.7723, - "step": 17715 - }, - { - "epoch": 9.239113428943938, - "grad_norm": 1.4300976991653442, - "learning_rate": 8.270251256281407e-05, - "loss": 5.869, - "step": 17716 - }, - { - "epoch": 9.239634941329857, - "grad_norm": 1.390319585800171, - "learning_rate": 8.270150753768845e-05, - "loss": 5.884, - "step": 17717 - }, - { - "epoch": 9.240156453715775, - "grad_norm": 1.4808224439620972, - "learning_rate": 8.270050251256282e-05, - "loss": 5.4536, - "step": 17718 - }, - { - "epoch": 9.240677966101694, - "grad_norm": 1.3515801429748535, - "learning_rate": 8.26994974874372e-05, - "loss": 5.4511, - "step": 17719 - }, - { - "epoch": 9.241199478487614, - "grad_norm": 1.4516122341156006, - "learning_rate": 8.269849246231156e-05, - "loss": 5.5167, - "step": 17720 - }, - { - "epoch": 9.241720990873533, - "grad_norm": 1.4122556447982788, - "learning_rate": 8.269748743718594e-05, - "loss": 5.7024, - "step": 17721 - }, - { - "epoch": 9.242242503259453, - "grad_norm": 1.501846194267273, - "learning_rate": 8.26964824120603e-05, - "loss": 5.4392, - "step": 17722 - }, - { - "epoch": 9.242764015645372, - "grad_norm": 1.4527744054794312, - "learning_rate": 8.269547738693468e-05, - "loss": 5.5754, - "step": 17723 - }, - { - "epoch": 9.24328552803129, - "grad_norm": 1.4426966905593872, - "learning_rate": 8.269447236180906e-05, - "loss": 4.868, - "step": 17724 - }, - { - "epoch": 9.24380704041721, - "grad_norm": 1.5528970956802368, - "learning_rate": 8.269346733668342e-05, - "loss": 5.261, - "step": 17725 - }, - { - "epoch": 9.244328552803129, - "grad_norm": 1.436089277267456, - "learning_rate": 8.26924623115578e-05, - "loss": 5.2065, - "step": 17726 - }, - { - "epoch": 9.244850065189048, - "grad_norm": 1.4731885194778442, - "learning_rate": 8.269145728643216e-05, - "loss": 5.2934, - "step": 17727 - }, - { - "epoch": 9.245371577574968, - "grad_norm": 1.4337959289550781, - "learning_rate": 8.269045226130654e-05, - "loss": 5.8395, - "step": 17728 - }, - { - "epoch": 9.245893089960887, - "grad_norm": 1.3323200941085815, - "learning_rate": 8.26894472361809e-05, - "loss": 5.722, - "step": 17729 - }, - { - "epoch": 9.246414602346805, - "grad_norm": 1.4669828414916992, - "learning_rate": 8.268844221105528e-05, - "loss": 5.3796, - "step": 17730 - }, - { - "epoch": 9.246936114732724, - "grad_norm": 1.4089971780776978, - "learning_rate": 8.268743718592965e-05, - "loss": 5.6769, - "step": 17731 - }, - { - "epoch": 9.247457627118644, - "grad_norm": 1.4375344514846802, - "learning_rate": 8.268643216080402e-05, - "loss": 5.766, - "step": 17732 - }, - { - "epoch": 9.247979139504563, - "grad_norm": 1.368233561515808, - "learning_rate": 8.26854271356784e-05, - "loss": 5.6777, - "step": 17733 - }, - { - "epoch": 9.248500651890483, - "grad_norm": 1.322202205657959, - "learning_rate": 8.268442211055278e-05, - "loss": 5.9624, - "step": 17734 - }, - { - "epoch": 9.249022164276402, - "grad_norm": 1.480490803718567, - "learning_rate": 8.268341708542714e-05, - "loss": 5.7003, - "step": 17735 - }, - { - "epoch": 9.24954367666232, - "grad_norm": 1.52757728099823, - "learning_rate": 8.268241206030151e-05, - "loss": 5.4412, - "step": 17736 - }, - { - "epoch": 9.25006518904824, - "grad_norm": 1.489312767982483, - "learning_rate": 8.268140703517589e-05, - "loss": 5.2207, - "step": 17737 - }, - { - "epoch": 9.250586701434159, - "grad_norm": 1.373445749282837, - "learning_rate": 8.268040201005025e-05, - "loss": 5.4687, - "step": 17738 - }, - { - "epoch": 9.251108213820078, - "grad_norm": 1.4333490133285522, - "learning_rate": 8.267939698492463e-05, - "loss": 5.3688, - "step": 17739 - }, - { - "epoch": 9.251629726205998, - "grad_norm": 1.6089802980422974, - "learning_rate": 8.267839195979899e-05, - "loss": 5.4163, - "step": 17740 - }, - { - "epoch": 9.252151238591917, - "grad_norm": 1.6360340118408203, - "learning_rate": 8.267738693467337e-05, - "loss": 5.4082, - "step": 17741 - }, - { - "epoch": 9.252672750977835, - "grad_norm": 1.397901177406311, - "learning_rate": 8.267638190954773e-05, - "loss": 5.6299, - "step": 17742 - }, - { - "epoch": 9.253194263363755, - "grad_norm": 1.5862754583358765, - "learning_rate": 8.267537688442211e-05, - "loss": 5.4751, - "step": 17743 - }, - { - "epoch": 9.253715775749674, - "grad_norm": 1.5196529626846313, - "learning_rate": 8.267437185929649e-05, - "loss": 5.1015, - "step": 17744 - }, - { - "epoch": 9.254237288135593, - "grad_norm": 1.425155758857727, - "learning_rate": 8.267336683417087e-05, - "loss": 5.6747, - "step": 17745 - }, - { - "epoch": 9.254758800521513, - "grad_norm": 1.3501734733581543, - "learning_rate": 8.267236180904523e-05, - "loss": 5.7347, - "step": 17746 - }, - { - "epoch": 9.255280312907432, - "grad_norm": 1.3600839376449585, - "learning_rate": 8.267135678391961e-05, - "loss": 5.1343, - "step": 17747 - }, - { - "epoch": 9.25580182529335, - "grad_norm": 1.443397045135498, - "learning_rate": 8.267035175879397e-05, - "loss": 5.4496, - "step": 17748 - }, - { - "epoch": 9.25632333767927, - "grad_norm": 1.40093195438385, - "learning_rate": 8.266934673366834e-05, - "loss": 5.8704, - "step": 17749 - }, - { - "epoch": 9.256844850065189, - "grad_norm": 1.5421353578567505, - "learning_rate": 8.266834170854272e-05, - "loss": 5.6515, - "step": 17750 - }, - { - "epoch": 9.257366362451108, - "grad_norm": 1.4101674556732178, - "learning_rate": 8.266733668341708e-05, - "loss": 5.2107, - "step": 17751 - }, - { - "epoch": 9.257887874837028, - "grad_norm": 1.5319768190383911, - "learning_rate": 8.266633165829146e-05, - "loss": 5.5386, - "step": 17752 - }, - { - "epoch": 9.258409387222947, - "grad_norm": 1.455776333808899, - "learning_rate": 8.266532663316582e-05, - "loss": 5.768, - "step": 17753 - }, - { - "epoch": 9.258930899608865, - "grad_norm": 1.3888945579528809, - "learning_rate": 8.26643216080402e-05, - "loss": 5.6254, - "step": 17754 - }, - { - "epoch": 9.259452411994785, - "grad_norm": 1.4808905124664307, - "learning_rate": 8.266331658291458e-05, - "loss": 5.6364, - "step": 17755 - }, - { - "epoch": 9.259973924380704, - "grad_norm": 1.31795072555542, - "learning_rate": 8.266231155778896e-05, - "loss": 4.6417, - "step": 17756 - }, - { - "epoch": 9.260495436766623, - "grad_norm": 1.4797512292861938, - "learning_rate": 8.266130653266332e-05, - "loss": 5.6042, - "step": 17757 - }, - { - "epoch": 9.261016949152543, - "grad_norm": 1.4107348918914795, - "learning_rate": 8.26603015075377e-05, - "loss": 5.6475, - "step": 17758 - }, - { - "epoch": 9.261538461538462, - "grad_norm": 1.6351252794265747, - "learning_rate": 8.265929648241206e-05, - "loss": 4.9061, - "step": 17759 - }, - { - "epoch": 9.26205997392438, - "grad_norm": 1.4848254919052124, - "learning_rate": 8.265829145728644e-05, - "loss": 5.3394, - "step": 17760 - }, - { - "epoch": 9.2625814863103, - "grad_norm": 1.4435182809829712, - "learning_rate": 8.26572864321608e-05, - "loss": 5.5685, - "step": 17761 - }, - { - "epoch": 9.263102998696219, - "grad_norm": 1.4421178102493286, - "learning_rate": 8.265628140703517e-05, - "loss": 5.5667, - "step": 17762 - }, - { - "epoch": 9.263624511082138, - "grad_norm": 1.3644686937332153, - "learning_rate": 8.265527638190955e-05, - "loss": 5.8948, - "step": 17763 - }, - { - "epoch": 9.264146023468058, - "grad_norm": 1.7325308322906494, - "learning_rate": 8.265427135678392e-05, - "loss": 5.0516, - "step": 17764 - }, - { - "epoch": 9.264667535853977, - "grad_norm": 1.352449655532837, - "learning_rate": 8.26532663316583e-05, - "loss": 5.4747, - "step": 17765 - }, - { - "epoch": 9.265189048239895, - "grad_norm": 1.326794981956482, - "learning_rate": 8.265226130653267e-05, - "loss": 5.8045, - "step": 17766 - }, - { - "epoch": 9.265710560625815, - "grad_norm": 1.4304171800613403, - "learning_rate": 8.265125628140704e-05, - "loss": 5.5455, - "step": 17767 - }, - { - "epoch": 9.266232073011734, - "grad_norm": 1.515570044517517, - "learning_rate": 8.265025125628141e-05, - "loss": 5.6154, - "step": 17768 - }, - { - "epoch": 9.266753585397653, - "grad_norm": 1.5441948175430298, - "learning_rate": 8.264924623115579e-05, - "loss": 5.3523, - "step": 17769 - }, - { - "epoch": 9.267275097783573, - "grad_norm": 1.4001528024673462, - "learning_rate": 8.264824120603015e-05, - "loss": 5.9866, - "step": 17770 - }, - { - "epoch": 9.26779661016949, - "grad_norm": 1.4114130735397339, - "learning_rate": 8.264723618090453e-05, - "loss": 5.2829, - "step": 17771 - }, - { - "epoch": 9.26831812255541, - "grad_norm": 1.2364174127578735, - "learning_rate": 8.264623115577889e-05, - "loss": 5.9014, - "step": 17772 - }, - { - "epoch": 9.26883963494133, - "grad_norm": 1.2977726459503174, - "learning_rate": 8.264522613065327e-05, - "loss": 5.8592, - "step": 17773 - }, - { - "epoch": 9.269361147327249, - "grad_norm": 1.4391499757766724, - "learning_rate": 8.264422110552764e-05, - "loss": 5.338, - "step": 17774 - }, - { - "epoch": 9.269882659713168, - "grad_norm": 1.4657695293426514, - "learning_rate": 8.264321608040201e-05, - "loss": 5.2253, - "step": 17775 - }, - { - "epoch": 9.270404172099088, - "grad_norm": 1.4366235733032227, - "learning_rate": 8.264221105527639e-05, - "loss": 5.3919, - "step": 17776 - }, - { - "epoch": 9.270925684485007, - "grad_norm": 1.4689220190048218, - "learning_rate": 8.264120603015075e-05, - "loss": 5.1949, - "step": 17777 - }, - { - "epoch": 9.271447196870925, - "grad_norm": 1.3017221689224243, - "learning_rate": 8.264020100502513e-05, - "loss": 5.8293, - "step": 17778 - }, - { - "epoch": 9.271968709256845, - "grad_norm": 1.3868951797485352, - "learning_rate": 8.26391959798995e-05, - "loss": 5.8295, - "step": 17779 - }, - { - "epoch": 9.272490221642764, - "grad_norm": 1.4491794109344482, - "learning_rate": 8.263819095477387e-05, - "loss": 5.615, - "step": 17780 - }, - { - "epoch": 9.273011734028684, - "grad_norm": 1.4298210144042969, - "learning_rate": 8.263718592964824e-05, - "loss": 5.5277, - "step": 17781 - }, - { - "epoch": 9.273533246414603, - "grad_norm": 1.3873612880706787, - "learning_rate": 8.263618090452262e-05, - "loss": 5.9259, - "step": 17782 - }, - { - "epoch": 9.27405475880052, - "grad_norm": 1.481906771659851, - "learning_rate": 8.263517587939698e-05, - "loss": 5.0981, - "step": 17783 - }, - { - "epoch": 9.27457627118644, - "grad_norm": 1.4937111139297485, - "learning_rate": 8.263417085427136e-05, - "loss": 5.7052, - "step": 17784 - }, - { - "epoch": 9.27509778357236, - "grad_norm": 1.462133765220642, - "learning_rate": 8.263316582914574e-05, - "loss": 5.5949, - "step": 17785 - }, - { - "epoch": 9.275619295958279, - "grad_norm": 1.4933279752731323, - "learning_rate": 8.263216080402011e-05, - "loss": 5.1822, - "step": 17786 - }, - { - "epoch": 9.276140808344199, - "grad_norm": 1.3937264680862427, - "learning_rate": 8.263115577889448e-05, - "loss": 5.1483, - "step": 17787 - }, - { - "epoch": 9.276662320730118, - "grad_norm": 1.3768932819366455, - "learning_rate": 8.263015075376886e-05, - "loss": 5.2787, - "step": 17788 - }, - { - "epoch": 9.277183833116036, - "grad_norm": 1.4572068452835083, - "learning_rate": 8.262914572864322e-05, - "loss": 5.3111, - "step": 17789 - }, - { - "epoch": 9.277705345501955, - "grad_norm": 1.4442051649093628, - "learning_rate": 8.262814070351759e-05, - "loss": 5.2887, - "step": 17790 - }, - { - "epoch": 9.278226857887875, - "grad_norm": 1.4284992218017578, - "learning_rate": 8.262713567839196e-05, - "loss": 5.5789, - "step": 17791 - }, - { - "epoch": 9.278748370273794, - "grad_norm": 1.4486550092697144, - "learning_rate": 8.262613065326633e-05, - "loss": 5.6187, - "step": 17792 - }, - { - "epoch": 9.279269882659714, - "grad_norm": 1.5003979206085205, - "learning_rate": 8.26251256281407e-05, - "loss": 5.6175, - "step": 17793 - }, - { - "epoch": 9.279791395045633, - "grad_norm": 1.545966625213623, - "learning_rate": 8.262412060301507e-05, - "loss": 5.4569, - "step": 17794 - }, - { - "epoch": 9.28031290743155, - "grad_norm": 1.5083329677581787, - "learning_rate": 8.262311557788945e-05, - "loss": 5.6087, - "step": 17795 - }, - { - "epoch": 9.28083441981747, - "grad_norm": 1.4852930307388306, - "learning_rate": 8.262211055276383e-05, - "loss": 5.855, - "step": 17796 - }, - { - "epoch": 9.28135593220339, - "grad_norm": 1.4113904237747192, - "learning_rate": 8.26211055276382e-05, - "loss": 5.4847, - "step": 17797 - }, - { - "epoch": 9.281877444589309, - "grad_norm": 1.3508943319320679, - "learning_rate": 8.262010050251257e-05, - "loss": 5.6492, - "step": 17798 - }, - { - "epoch": 9.282398956975229, - "grad_norm": 1.3639181852340698, - "learning_rate": 8.261909547738695e-05, - "loss": 5.8892, - "step": 17799 - }, - { - "epoch": 9.282920469361148, - "grad_norm": 1.4258133172988892, - "learning_rate": 8.261809045226131e-05, - "loss": 5.8121, - "step": 17800 - }, - { - "epoch": 9.283441981747066, - "grad_norm": 1.454408884048462, - "learning_rate": 8.261708542713569e-05, - "loss": 5.39, - "step": 17801 - }, - { - "epoch": 9.283963494132985, - "grad_norm": 1.47089684009552, - "learning_rate": 8.261608040201005e-05, - "loss": 5.2088, - "step": 17802 - }, - { - "epoch": 9.284485006518905, - "grad_norm": 1.5482689142227173, - "learning_rate": 8.261507537688442e-05, - "loss": 5.3165, - "step": 17803 - }, - { - "epoch": 9.285006518904824, - "grad_norm": 1.501946210861206, - "learning_rate": 8.26140703517588e-05, - "loss": 5.8013, - "step": 17804 - }, - { - "epoch": 9.285528031290744, - "grad_norm": 1.4821916818618774, - "learning_rate": 8.261306532663317e-05, - "loss": 5.4193, - "step": 17805 - }, - { - "epoch": 9.286049543676663, - "grad_norm": 1.419727087020874, - "learning_rate": 8.261206030150755e-05, - "loss": 5.7411, - "step": 17806 - }, - { - "epoch": 9.28657105606258, - "grad_norm": 1.5094538927078247, - "learning_rate": 8.261105527638191e-05, - "loss": 5.1663, - "step": 17807 - }, - { - "epoch": 9.2870925684485, - "grad_norm": 1.438965082168579, - "learning_rate": 8.261005025125629e-05, - "loss": 5.6341, - "step": 17808 - }, - { - "epoch": 9.28761408083442, - "grad_norm": 1.3845906257629395, - "learning_rate": 8.260904522613066e-05, - "loss": 5.5458, - "step": 17809 - }, - { - "epoch": 9.288135593220339, - "grad_norm": 1.5995172262191772, - "learning_rate": 8.260804020100503e-05, - "loss": 5.6844, - "step": 17810 - }, - { - "epoch": 9.288657105606259, - "grad_norm": 1.4947564601898193, - "learning_rate": 8.26070351758794e-05, - "loss": 5.4382, - "step": 17811 - }, - { - "epoch": 9.289178617992178, - "grad_norm": 1.4404096603393555, - "learning_rate": 8.260603015075378e-05, - "loss": 5.6746, - "step": 17812 - }, - { - "epoch": 9.289700130378096, - "grad_norm": 1.4321869611740112, - "learning_rate": 8.260502512562814e-05, - "loss": 5.6774, - "step": 17813 - }, - { - "epoch": 9.290221642764015, - "grad_norm": 1.4165780544281006, - "learning_rate": 8.260402010050252e-05, - "loss": 5.7601, - "step": 17814 - }, - { - "epoch": 9.290743155149935, - "grad_norm": 1.5595192909240723, - "learning_rate": 8.260301507537688e-05, - "loss": 5.256, - "step": 17815 - }, - { - "epoch": 9.291264667535854, - "grad_norm": 1.3722832202911377, - "learning_rate": 8.260201005025126e-05, - "loss": 5.4997, - "step": 17816 - }, - { - "epoch": 9.291786179921774, - "grad_norm": 1.419309377670288, - "learning_rate": 8.260100502512564e-05, - "loss": 5.5381, - "step": 17817 - }, - { - "epoch": 9.292307692307693, - "grad_norm": 1.4021756649017334, - "learning_rate": 8.26e-05, - "loss": 5.5132, - "step": 17818 - }, - { - "epoch": 9.29282920469361, - "grad_norm": 1.3934431076049805, - "learning_rate": 8.259899497487438e-05, - "loss": 5.653, - "step": 17819 - }, - { - "epoch": 9.29335071707953, - "grad_norm": 1.3754043579101562, - "learning_rate": 8.259798994974874e-05, - "loss": 5.8093, - "step": 17820 - }, - { - "epoch": 9.29387222946545, - "grad_norm": 1.460658073425293, - "learning_rate": 8.259698492462312e-05, - "loss": 5.8559, - "step": 17821 - }, - { - "epoch": 9.29439374185137, - "grad_norm": 1.4343043565750122, - "learning_rate": 8.259597989949749e-05, - "loss": 5.7651, - "step": 17822 - }, - { - "epoch": 9.294915254237289, - "grad_norm": 1.6085783243179321, - "learning_rate": 8.259497487437186e-05, - "loss": 5.2225, - "step": 17823 - }, - { - "epoch": 9.295436766623208, - "grad_norm": 1.387020468711853, - "learning_rate": 8.259396984924623e-05, - "loss": 4.83, - "step": 17824 - }, - { - "epoch": 9.295958279009126, - "grad_norm": 1.401745319366455, - "learning_rate": 8.25929648241206e-05, - "loss": 5.6094, - "step": 17825 - }, - { - "epoch": 9.296479791395045, - "grad_norm": 1.4833757877349854, - "learning_rate": 8.259195979899498e-05, - "loss": 5.6106, - "step": 17826 - }, - { - "epoch": 9.297001303780965, - "grad_norm": 1.5132322311401367, - "learning_rate": 8.259095477386936e-05, - "loss": 5.8721, - "step": 17827 - }, - { - "epoch": 9.297522816166884, - "grad_norm": 1.4045954942703247, - "learning_rate": 8.258994974874373e-05, - "loss": 5.4682, - "step": 17828 - }, - { - "epoch": 9.298044328552804, - "grad_norm": 1.4058681726455688, - "learning_rate": 8.258894472361809e-05, - "loss": 5.6622, - "step": 17829 - }, - { - "epoch": 9.298565840938723, - "grad_norm": 1.424513339996338, - "learning_rate": 8.258793969849247e-05, - "loss": 5.7598, - "step": 17830 - }, - { - "epoch": 9.29908735332464, - "grad_norm": 1.4308576583862305, - "learning_rate": 8.258693467336683e-05, - "loss": 5.4005, - "step": 17831 - }, - { - "epoch": 9.29960886571056, - "grad_norm": 1.5109597444534302, - "learning_rate": 8.258592964824121e-05, - "loss": 4.7401, - "step": 17832 - }, - { - "epoch": 9.30013037809648, - "grad_norm": 1.4433708190917969, - "learning_rate": 8.258492462311557e-05, - "loss": 5.126, - "step": 17833 - }, - { - "epoch": 9.3006518904824, - "grad_norm": 1.3627104759216309, - "learning_rate": 8.258391959798995e-05, - "loss": 5.9153, - "step": 17834 - }, - { - "epoch": 9.301173402868319, - "grad_norm": 1.5091513395309448, - "learning_rate": 8.258291457286432e-05, - "loss": 5.4223, - "step": 17835 - }, - { - "epoch": 9.301694915254238, - "grad_norm": 1.5741223096847534, - "learning_rate": 8.25819095477387e-05, - "loss": 5.2912, - "step": 17836 - }, - { - "epoch": 9.302216427640156, - "grad_norm": 1.3514258861541748, - "learning_rate": 8.258090452261307e-05, - "loss": 5.8114, - "step": 17837 - }, - { - "epoch": 9.302737940026075, - "grad_norm": 1.470290184020996, - "learning_rate": 8.257989949748745e-05, - "loss": 5.3753, - "step": 17838 - }, - { - "epoch": 9.303259452411995, - "grad_norm": 1.4314285516738892, - "learning_rate": 8.257889447236181e-05, - "loss": 5.6803, - "step": 17839 - }, - { - "epoch": 9.303780964797914, - "grad_norm": 1.3847752809524536, - "learning_rate": 8.257788944723619e-05, - "loss": 5.8366, - "step": 17840 - }, - { - "epoch": 9.304302477183834, - "grad_norm": 1.4238336086273193, - "learning_rate": 8.257688442211056e-05, - "loss": 5.624, - "step": 17841 - }, - { - "epoch": 9.304823989569753, - "grad_norm": 1.4281706809997559, - "learning_rate": 8.257587939698492e-05, - "loss": 6.0283, - "step": 17842 - }, - { - "epoch": 9.30534550195567, - "grad_norm": 1.339417576789856, - "learning_rate": 8.25748743718593e-05, - "loss": 5.9674, - "step": 17843 - }, - { - "epoch": 9.30586701434159, - "grad_norm": 1.3529119491577148, - "learning_rate": 8.257386934673366e-05, - "loss": 5.801, - "step": 17844 - }, - { - "epoch": 9.30638852672751, - "grad_norm": 1.4565587043762207, - "learning_rate": 8.257286432160804e-05, - "loss": 5.1853, - "step": 17845 - }, - { - "epoch": 9.30691003911343, - "grad_norm": 1.4607961177825928, - "learning_rate": 8.257185929648242e-05, - "loss": 5.5579, - "step": 17846 - }, - { - "epoch": 9.307431551499349, - "grad_norm": 1.53420889377594, - "learning_rate": 8.25708542713568e-05, - "loss": 5.8213, - "step": 17847 - }, - { - "epoch": 9.307953063885268, - "grad_norm": 1.3008708953857422, - "learning_rate": 8.256984924623116e-05, - "loss": 5.6503, - "step": 17848 - }, - { - "epoch": 9.308474576271186, - "grad_norm": 1.4964479207992554, - "learning_rate": 8.256884422110554e-05, - "loss": 5.3106, - "step": 17849 - }, - { - "epoch": 9.308996088657105, - "grad_norm": 1.3184634447097778, - "learning_rate": 8.25678391959799e-05, - "loss": 5.7327, - "step": 17850 - }, - { - "epoch": 9.309517601043025, - "grad_norm": 1.414868950843811, - "learning_rate": 8.256683417085428e-05, - "loss": 5.2075, - "step": 17851 - }, - { - "epoch": 9.310039113428944, - "grad_norm": 1.4079985618591309, - "learning_rate": 8.256582914572864e-05, - "loss": 5.424, - "step": 17852 - }, - { - "epoch": 9.310560625814864, - "grad_norm": 1.4072798490524292, - "learning_rate": 8.256482412060302e-05, - "loss": 5.4649, - "step": 17853 - }, - { - "epoch": 9.311082138200783, - "grad_norm": 1.432481050491333, - "learning_rate": 8.256381909547739e-05, - "loss": 5.4919, - "step": 17854 - }, - { - "epoch": 9.3116036505867, - "grad_norm": 1.766871452331543, - "learning_rate": 8.256281407035176e-05, - "loss": 4.6631, - "step": 17855 - }, - { - "epoch": 9.31212516297262, - "grad_norm": 1.3734943866729736, - "learning_rate": 8.256180904522613e-05, - "loss": 5.9, - "step": 17856 - }, - { - "epoch": 9.31264667535854, - "grad_norm": 1.4107590913772583, - "learning_rate": 8.256080402010051e-05, - "loss": 5.4374, - "step": 17857 - }, - { - "epoch": 9.31316818774446, - "grad_norm": 1.4185676574707031, - "learning_rate": 8.255979899497488e-05, - "loss": 5.6754, - "step": 17858 - }, - { - "epoch": 9.313689700130379, - "grad_norm": 1.3607779741287231, - "learning_rate": 8.255879396984925e-05, - "loss": 5.8673, - "step": 17859 - }, - { - "epoch": 9.314211212516298, - "grad_norm": 1.4063372611999512, - "learning_rate": 8.255778894472363e-05, - "loss": 5.8023, - "step": 17860 - }, - { - "epoch": 9.314732724902216, - "grad_norm": 1.4814351797103882, - "learning_rate": 8.255678391959799e-05, - "loss": 5.2821, - "step": 17861 - }, - { - "epoch": 9.315254237288135, - "grad_norm": 1.386693000793457, - "learning_rate": 8.255577889447237e-05, - "loss": 5.421, - "step": 17862 - }, - { - "epoch": 9.315775749674055, - "grad_norm": 1.2949022054672241, - "learning_rate": 8.255477386934673e-05, - "loss": 5.7929, - "step": 17863 - }, - { - "epoch": 9.316297262059974, - "grad_norm": 1.4064475297927856, - "learning_rate": 8.255376884422111e-05, - "loss": 5.6332, - "step": 17864 - }, - { - "epoch": 9.316818774445894, - "grad_norm": 1.3515608310699463, - "learning_rate": 8.255276381909548e-05, - "loss": 5.9011, - "step": 17865 - }, - { - "epoch": 9.317340286831811, - "grad_norm": 1.490886926651001, - "learning_rate": 8.255175879396985e-05, - "loss": 5.5776, - "step": 17866 - }, - { - "epoch": 9.31786179921773, - "grad_norm": 1.3623943328857422, - "learning_rate": 8.255075376884423e-05, - "loss": 5.6724, - "step": 17867 - }, - { - "epoch": 9.31838331160365, - "grad_norm": 1.4380289316177368, - "learning_rate": 8.254974874371861e-05, - "loss": 5.4623, - "step": 17868 - }, - { - "epoch": 9.31890482398957, - "grad_norm": 1.5070984363555908, - "learning_rate": 8.254874371859297e-05, - "loss": 5.3932, - "step": 17869 - }, - { - "epoch": 9.31942633637549, - "grad_norm": 1.626399278640747, - "learning_rate": 8.254773869346734e-05, - "loss": 5.1842, - "step": 17870 - }, - { - "epoch": 9.319947848761409, - "grad_norm": 1.7615388631820679, - "learning_rate": 8.254673366834172e-05, - "loss": 5.3426, - "step": 17871 - }, - { - "epoch": 9.320469361147328, - "grad_norm": 1.5111043453216553, - "learning_rate": 8.254572864321608e-05, - "loss": 5.6556, - "step": 17872 - }, - { - "epoch": 9.320990873533246, - "grad_norm": 1.4626452922821045, - "learning_rate": 8.254472361809046e-05, - "loss": 5.4478, - "step": 17873 - }, - { - "epoch": 9.321512385919165, - "grad_norm": 1.5261832475662231, - "learning_rate": 8.254371859296482e-05, - "loss": 5.2033, - "step": 17874 - }, - { - "epoch": 9.322033898305085, - "grad_norm": 1.4829351902008057, - "learning_rate": 8.25427135678392e-05, - "loss": 5.2263, - "step": 17875 - }, - { - "epoch": 9.322555410691004, - "grad_norm": 1.4065306186676025, - "learning_rate": 8.254170854271356e-05, - "loss": 5.5689, - "step": 17876 - }, - { - "epoch": 9.323076923076924, - "grad_norm": 1.4220730066299438, - "learning_rate": 8.254070351758794e-05, - "loss": 5.6552, - "step": 17877 - }, - { - "epoch": 9.323598435462841, - "grad_norm": 1.432881474494934, - "learning_rate": 8.253969849246232e-05, - "loss": 5.7778, - "step": 17878 - }, - { - "epoch": 9.32411994784876, - "grad_norm": 1.5804471969604492, - "learning_rate": 8.25386934673367e-05, - "loss": 5.5164, - "step": 17879 - }, - { - "epoch": 9.32464146023468, - "grad_norm": 1.488150954246521, - "learning_rate": 8.253768844221106e-05, - "loss": 5.472, - "step": 17880 - }, - { - "epoch": 9.3251629726206, - "grad_norm": 1.4708503484725952, - "learning_rate": 8.253668341708544e-05, - "loss": 5.412, - "step": 17881 - }, - { - "epoch": 9.32568448500652, - "grad_norm": 1.4484410285949707, - "learning_rate": 8.25356783919598e-05, - "loss": 5.7416, - "step": 17882 - }, - { - "epoch": 9.326205997392439, - "grad_norm": 1.460995078086853, - "learning_rate": 8.253467336683417e-05, - "loss": 5.6304, - "step": 17883 - }, - { - "epoch": 9.326727509778356, - "grad_norm": 1.4780486822128296, - "learning_rate": 8.253366834170855e-05, - "loss": 5.4301, - "step": 17884 - }, - { - "epoch": 9.327249022164276, - "grad_norm": 1.4285001754760742, - "learning_rate": 8.253266331658291e-05, - "loss": 5.7071, - "step": 17885 - }, - { - "epoch": 9.327770534550195, - "grad_norm": 1.4567639827728271, - "learning_rate": 8.253165829145729e-05, - "loss": 5.7185, - "step": 17886 - }, - { - "epoch": 9.328292046936115, - "grad_norm": 1.35545015335083, - "learning_rate": 8.253065326633167e-05, - "loss": 5.6046, - "step": 17887 - }, - { - "epoch": 9.328813559322034, - "grad_norm": 1.4215872287750244, - "learning_rate": 8.252964824120604e-05, - "loss": 5.4419, - "step": 17888 - }, - { - "epoch": 9.329335071707954, - "grad_norm": 1.3905999660491943, - "learning_rate": 8.252864321608041e-05, - "loss": 5.5414, - "step": 17889 - }, - { - "epoch": 9.329856584093871, - "grad_norm": 1.4365301132202148, - "learning_rate": 8.252763819095479e-05, - "loss": 5.5871, - "step": 17890 - }, - { - "epoch": 9.33037809647979, - "grad_norm": 1.3647773265838623, - "learning_rate": 8.252663316582915e-05, - "loss": 5.9381, - "step": 17891 - }, - { - "epoch": 9.33089960886571, - "grad_norm": 1.3923392295837402, - "learning_rate": 8.252562814070353e-05, - "loss": 5.7685, - "step": 17892 - }, - { - "epoch": 9.33142112125163, - "grad_norm": 1.3768190145492554, - "learning_rate": 8.252462311557789e-05, - "loss": 5.1359, - "step": 17893 - }, - { - "epoch": 9.33194263363755, - "grad_norm": 1.530752182006836, - "learning_rate": 8.252361809045227e-05, - "loss": 5.4826, - "step": 17894 - }, - { - "epoch": 9.332464146023469, - "grad_norm": 1.388155221939087, - "learning_rate": 8.252261306532663e-05, - "loss": 5.9109, - "step": 17895 - }, - { - "epoch": 9.332985658409386, - "grad_norm": 1.4167159795761108, - "learning_rate": 8.2521608040201e-05, - "loss": 5.3899, - "step": 17896 - }, - { - "epoch": 9.333507170795306, - "grad_norm": 1.4357798099517822, - "learning_rate": 8.252060301507538e-05, - "loss": 5.3507, - "step": 17897 - }, - { - "epoch": 9.334028683181225, - "grad_norm": 1.3018168210983276, - "learning_rate": 8.251959798994975e-05, - "loss": 5.9912, - "step": 17898 - }, - { - "epoch": 9.334550195567145, - "grad_norm": 1.4591072797775269, - "learning_rate": 8.251859296482413e-05, - "loss": 5.5688, - "step": 17899 - }, - { - "epoch": 9.335071707953064, - "grad_norm": 1.5286787748336792, - "learning_rate": 8.25175879396985e-05, - "loss": 5.5641, - "step": 17900 - }, - { - "epoch": 9.335593220338984, - "grad_norm": 1.4416577816009521, - "learning_rate": 8.251658291457287e-05, - "loss": 5.2482, - "step": 17901 - }, - { - "epoch": 9.336114732724901, - "grad_norm": 1.3891352415084839, - "learning_rate": 8.251557788944724e-05, - "loss": 5.7154, - "step": 17902 - }, - { - "epoch": 9.336636245110821, - "grad_norm": 1.4562835693359375, - "learning_rate": 8.251457286432162e-05, - "loss": 5.1169, - "step": 17903 - }, - { - "epoch": 9.33715775749674, - "grad_norm": 1.4311975240707397, - "learning_rate": 8.251356783919598e-05, - "loss": 5.3427, - "step": 17904 - }, - { - "epoch": 9.33767926988266, - "grad_norm": 1.2712786197662354, - "learning_rate": 8.251256281407036e-05, - "loss": 5.716, - "step": 17905 - }, - { - "epoch": 9.33820078226858, - "grad_norm": 1.3678568601608276, - "learning_rate": 8.251155778894472e-05, - "loss": 5.8946, - "step": 17906 - }, - { - "epoch": 9.338722294654499, - "grad_norm": 1.40695321559906, - "learning_rate": 8.25105527638191e-05, - "loss": 5.5554, - "step": 17907 - }, - { - "epoch": 9.339243807040416, - "grad_norm": 1.5746636390686035, - "learning_rate": 8.250954773869348e-05, - "loss": 5.1641, - "step": 17908 - }, - { - "epoch": 9.339765319426336, - "grad_norm": 1.4663691520690918, - "learning_rate": 8.250854271356784e-05, - "loss": 5.1414, - "step": 17909 - }, - { - "epoch": 9.340286831812255, - "grad_norm": 1.4852806329727173, - "learning_rate": 8.250753768844222e-05, - "loss": 5.6245, - "step": 17910 - }, - { - "epoch": 9.340808344198175, - "grad_norm": 1.5104782581329346, - "learning_rate": 8.250653266331658e-05, - "loss": 5.3716, - "step": 17911 - }, - { - "epoch": 9.341329856584094, - "grad_norm": 1.5047508478164673, - "learning_rate": 8.250552763819096e-05, - "loss": 5.7315, - "step": 17912 - }, - { - "epoch": 9.341851368970014, - "grad_norm": 1.4777884483337402, - "learning_rate": 8.250452261306533e-05, - "loss": 5.4365, - "step": 17913 - }, - { - "epoch": 9.342372881355931, - "grad_norm": 1.5596725940704346, - "learning_rate": 8.25035175879397e-05, - "loss": 4.9822, - "step": 17914 - }, - { - "epoch": 9.342894393741851, - "grad_norm": 1.4197951555252075, - "learning_rate": 8.250251256281407e-05, - "loss": 5.7182, - "step": 17915 - }, - { - "epoch": 9.34341590612777, - "grad_norm": 1.5719045400619507, - "learning_rate": 8.250150753768845e-05, - "loss": 5.0499, - "step": 17916 - }, - { - "epoch": 9.34393741851369, - "grad_norm": 1.4012089967727661, - "learning_rate": 8.250050251256281e-05, - "loss": 5.2021, - "step": 17917 - }, - { - "epoch": 9.34445893089961, - "grad_norm": 1.6200636625289917, - "learning_rate": 8.249949748743719e-05, - "loss": 4.6423, - "step": 17918 - }, - { - "epoch": 9.344980443285529, - "grad_norm": 1.5283403396606445, - "learning_rate": 8.249849246231157e-05, - "loss": 5.2189, - "step": 17919 - }, - { - "epoch": 9.345501955671446, - "grad_norm": 1.517523169517517, - "learning_rate": 8.249748743718594e-05, - "loss": 4.9356, - "step": 17920 - }, - { - "epoch": 9.346023468057366, - "grad_norm": 1.5763232707977295, - "learning_rate": 8.249648241206031e-05, - "loss": 4.7929, - "step": 17921 - }, - { - "epoch": 9.346544980443285, - "grad_norm": 1.542974829673767, - "learning_rate": 8.249547738693467e-05, - "loss": 5.6636, - "step": 17922 - }, - { - "epoch": 9.347066492829205, - "grad_norm": 1.4555479288101196, - "learning_rate": 8.249447236180905e-05, - "loss": 5.3015, - "step": 17923 - }, - { - "epoch": 9.347588005215124, - "grad_norm": 1.4468481540679932, - "learning_rate": 8.249346733668341e-05, - "loss": 5.2464, - "step": 17924 - }, - { - "epoch": 9.348109517601044, - "grad_norm": 1.4097073078155518, - "learning_rate": 8.249246231155779e-05, - "loss": 5.0684, - "step": 17925 - }, - { - "epoch": 9.348631029986961, - "grad_norm": 1.3771286010742188, - "learning_rate": 8.249145728643216e-05, - "loss": 5.4908, - "step": 17926 - }, - { - "epoch": 9.349152542372881, - "grad_norm": 1.3886290788650513, - "learning_rate": 8.249045226130653e-05, - "loss": 5.5659, - "step": 17927 - }, - { - "epoch": 9.3496740547588, - "grad_norm": 1.3563481569290161, - "learning_rate": 8.24894472361809e-05, - "loss": 5.9208, - "step": 17928 - }, - { - "epoch": 9.35019556714472, - "grad_norm": 1.4084186553955078, - "learning_rate": 8.248844221105528e-05, - "loss": 5.2676, - "step": 17929 - }, - { - "epoch": 9.35071707953064, - "grad_norm": 1.3822731971740723, - "learning_rate": 8.248743718592965e-05, - "loss": 5.6401, - "step": 17930 - }, - { - "epoch": 9.351238591916559, - "grad_norm": 1.3452178239822388, - "learning_rate": 8.248643216080403e-05, - "loss": 5.644, - "step": 17931 - }, - { - "epoch": 9.351760104302477, - "grad_norm": 1.5102779865264893, - "learning_rate": 8.24854271356784e-05, - "loss": 5.435, - "step": 17932 - }, - { - "epoch": 9.352281616688396, - "grad_norm": 1.5017430782318115, - "learning_rate": 8.248442211055277e-05, - "loss": 4.8151, - "step": 17933 - }, - { - "epoch": 9.352803129074315, - "grad_norm": 1.4006116390228271, - "learning_rate": 8.248341708542714e-05, - "loss": 5.3501, - "step": 17934 - }, - { - "epoch": 9.353324641460235, - "grad_norm": 1.424072027206421, - "learning_rate": 8.24824120603015e-05, - "loss": 5.4814, - "step": 17935 - }, - { - "epoch": 9.353846153846154, - "grad_norm": 1.2976264953613281, - "learning_rate": 8.248140703517588e-05, - "loss": 5.8588, - "step": 17936 - }, - { - "epoch": 9.354367666232074, - "grad_norm": 1.42351233959198, - "learning_rate": 8.248040201005025e-05, - "loss": 5.2779, - "step": 17937 - }, - { - "epoch": 9.354889178617992, - "grad_norm": 1.3425321578979492, - "learning_rate": 8.247939698492462e-05, - "loss": 5.8503, - "step": 17938 - }, - { - "epoch": 9.355410691003911, - "grad_norm": 1.427116870880127, - "learning_rate": 8.2478391959799e-05, - "loss": 5.6053, - "step": 17939 - }, - { - "epoch": 9.35593220338983, - "grad_norm": 1.4962180852890015, - "learning_rate": 8.247738693467338e-05, - "loss": 5.5289, - "step": 17940 - }, - { - "epoch": 9.35645371577575, - "grad_norm": 1.5405765771865845, - "learning_rate": 8.247638190954774e-05, - "loss": 5.1594, - "step": 17941 - }, - { - "epoch": 9.35697522816167, - "grad_norm": 1.4033749103546143, - "learning_rate": 8.247537688442212e-05, - "loss": 4.8032, - "step": 17942 - }, - { - "epoch": 9.357496740547589, - "grad_norm": 1.322434902191162, - "learning_rate": 8.247437185929649e-05, - "loss": 5.8958, - "step": 17943 - }, - { - "epoch": 9.358018252933507, - "grad_norm": 1.5230109691619873, - "learning_rate": 8.247336683417086e-05, - "loss": 5.624, - "step": 17944 - }, - { - "epoch": 9.358539765319426, - "grad_norm": 1.7094820737838745, - "learning_rate": 8.247236180904523e-05, - "loss": 4.8566, - "step": 17945 - }, - { - "epoch": 9.359061277705345, - "grad_norm": 1.3719080686569214, - "learning_rate": 8.24713567839196e-05, - "loss": 5.4225, - "step": 17946 - }, - { - "epoch": 9.359582790091265, - "grad_norm": 1.4816004037857056, - "learning_rate": 8.247035175879397e-05, - "loss": 5.4571, - "step": 17947 - }, - { - "epoch": 9.360104302477184, - "grad_norm": 1.7452983856201172, - "learning_rate": 8.246934673366835e-05, - "loss": 5.2402, - "step": 17948 - }, - { - "epoch": 9.360625814863104, - "grad_norm": 2.221337080001831, - "learning_rate": 8.246834170854271e-05, - "loss": 5.0285, - "step": 17949 - }, - { - "epoch": 9.361147327249022, - "grad_norm": 1.3470616340637207, - "learning_rate": 8.246733668341709e-05, - "loss": 5.8865, - "step": 17950 - }, - { - "epoch": 9.361668839634941, - "grad_norm": 1.5290498733520508, - "learning_rate": 8.246633165829147e-05, - "loss": 5.5062, - "step": 17951 - }, - { - "epoch": 9.36219035202086, - "grad_norm": 1.3727655410766602, - "learning_rate": 8.246532663316583e-05, - "loss": 5.5302, - "step": 17952 - }, - { - "epoch": 9.36271186440678, - "grad_norm": 1.3910139799118042, - "learning_rate": 8.246432160804021e-05, - "loss": 5.4259, - "step": 17953 - }, - { - "epoch": 9.3632333767927, - "grad_norm": 1.377794861793518, - "learning_rate": 8.246331658291457e-05, - "loss": 5.5224, - "step": 17954 - }, - { - "epoch": 9.363754889178619, - "grad_norm": 1.4284231662750244, - "learning_rate": 8.246231155778895e-05, - "loss": 5.0708, - "step": 17955 - }, - { - "epoch": 9.364276401564537, - "grad_norm": 1.4817920923233032, - "learning_rate": 8.246130653266332e-05, - "loss": 5.4352, - "step": 17956 - }, - { - "epoch": 9.364797913950456, - "grad_norm": 1.4663662910461426, - "learning_rate": 8.24603015075377e-05, - "loss": 5.4967, - "step": 17957 - }, - { - "epoch": 9.365319426336375, - "grad_norm": 1.4427212476730347, - "learning_rate": 8.245929648241206e-05, - "loss": 5.6383, - "step": 17958 - }, - { - "epoch": 9.365840938722295, - "grad_norm": 1.5546072721481323, - "learning_rate": 8.245829145728644e-05, - "loss": 5.5745, - "step": 17959 - }, - { - "epoch": 9.366362451108214, - "grad_norm": 1.3749091625213623, - "learning_rate": 8.245728643216081e-05, - "loss": 5.6164, - "step": 17960 - }, - { - "epoch": 9.366883963494132, - "grad_norm": 1.3923178911209106, - "learning_rate": 8.245628140703519e-05, - "loss": 5.5829, - "step": 17961 - }, - { - "epoch": 9.367405475880052, - "grad_norm": 1.3997128009796143, - "learning_rate": 8.245527638190956e-05, - "loss": 5.9493, - "step": 17962 - }, - { - "epoch": 9.367926988265971, - "grad_norm": 1.4413673877716064, - "learning_rate": 8.245427135678392e-05, - "loss": 5.5563, - "step": 17963 - }, - { - "epoch": 9.36844850065189, - "grad_norm": 1.4447077512741089, - "learning_rate": 8.24532663316583e-05, - "loss": 5.7543, - "step": 17964 - }, - { - "epoch": 9.36897001303781, - "grad_norm": 1.4972916841506958, - "learning_rate": 8.245226130653266e-05, - "loss": 5.3976, - "step": 17965 - }, - { - "epoch": 9.36949152542373, - "grad_norm": 1.383237361907959, - "learning_rate": 8.245125628140704e-05, - "loss": 5.7381, - "step": 17966 - }, - { - "epoch": 9.370013037809649, - "grad_norm": 1.5408557653427124, - "learning_rate": 8.24502512562814e-05, - "loss": 5.6729, - "step": 17967 - }, - { - "epoch": 9.370534550195567, - "grad_norm": 1.3171483278274536, - "learning_rate": 8.244924623115578e-05, - "loss": 5.8904, - "step": 17968 - }, - { - "epoch": 9.371056062581486, - "grad_norm": 1.3889222145080566, - "learning_rate": 8.244824120603015e-05, - "loss": 5.5424, - "step": 17969 - }, - { - "epoch": 9.371577574967406, - "grad_norm": 1.5315359830856323, - "learning_rate": 8.244723618090452e-05, - "loss": 5.5191, - "step": 17970 - }, - { - "epoch": 9.372099087353325, - "grad_norm": 1.6181244850158691, - "learning_rate": 8.24462311557789e-05, - "loss": 5.5743, - "step": 17971 - }, - { - "epoch": 9.372620599739244, - "grad_norm": 1.56166410446167, - "learning_rate": 8.244522613065328e-05, - "loss": 4.9858, - "step": 17972 - }, - { - "epoch": 9.373142112125162, - "grad_norm": 1.3547282218933105, - "learning_rate": 8.244422110552764e-05, - "loss": 5.8043, - "step": 17973 - }, - { - "epoch": 9.373663624511082, - "grad_norm": 1.454676628112793, - "learning_rate": 8.244321608040202e-05, - "loss": 5.4957, - "step": 17974 - }, - { - "epoch": 9.374185136897001, - "grad_norm": 1.628665566444397, - "learning_rate": 8.244221105527639e-05, - "loss": 5.1826, - "step": 17975 - }, - { - "epoch": 9.37470664928292, - "grad_norm": 1.4482622146606445, - "learning_rate": 8.244120603015075e-05, - "loss": 5.906, - "step": 17976 - }, - { - "epoch": 9.37522816166884, - "grad_norm": 1.3745375871658325, - "learning_rate": 8.244020100502513e-05, - "loss": 5.5776, - "step": 17977 - }, - { - "epoch": 9.37574967405476, - "grad_norm": 1.471303105354309, - "learning_rate": 8.243919597989949e-05, - "loss": 5.7686, - "step": 17978 - }, - { - "epoch": 9.376271186440677, - "grad_norm": 1.3374967575073242, - "learning_rate": 8.243819095477387e-05, - "loss": 5.665, - "step": 17979 - }, - { - "epoch": 9.376792698826597, - "grad_norm": 1.32734215259552, - "learning_rate": 8.243718592964825e-05, - "loss": 5.81, - "step": 17980 - }, - { - "epoch": 9.377314211212516, - "grad_norm": 1.4587576389312744, - "learning_rate": 8.243618090452263e-05, - "loss": 5.5801, - "step": 17981 - }, - { - "epoch": 9.377835723598436, - "grad_norm": 1.3039119243621826, - "learning_rate": 8.243517587939699e-05, - "loss": 5.9042, - "step": 17982 - }, - { - "epoch": 9.378357235984355, - "grad_norm": 1.4757232666015625, - "learning_rate": 8.243417085427137e-05, - "loss": 5.1175, - "step": 17983 - }, - { - "epoch": 9.378878748370274, - "grad_norm": 1.684645175933838, - "learning_rate": 8.243316582914573e-05, - "loss": 5.6087, - "step": 17984 - }, - { - "epoch": 9.379400260756192, - "grad_norm": 1.4444822072982788, - "learning_rate": 8.243216080402011e-05, - "loss": 5.3423, - "step": 17985 - }, - { - "epoch": 9.379921773142112, - "grad_norm": 1.4454703330993652, - "learning_rate": 8.243115577889447e-05, - "loss": 5.353, - "step": 17986 - }, - { - "epoch": 9.380443285528031, - "grad_norm": 1.5121877193450928, - "learning_rate": 8.243015075376885e-05, - "loss": 5.4985, - "step": 17987 - }, - { - "epoch": 9.38096479791395, - "grad_norm": 1.5544407367706299, - "learning_rate": 8.242914572864322e-05, - "loss": 5.3647, - "step": 17988 - }, - { - "epoch": 9.38148631029987, - "grad_norm": 1.5461288690567017, - "learning_rate": 8.242814070351758e-05, - "loss": 5.2543, - "step": 17989 - }, - { - "epoch": 9.38200782268579, - "grad_norm": 1.6166555881500244, - "learning_rate": 8.242713567839196e-05, - "loss": 5.6636, - "step": 17990 - }, - { - "epoch": 9.382529335071707, - "grad_norm": 1.5054370164871216, - "learning_rate": 8.242613065326634e-05, - "loss": 5.5862, - "step": 17991 - }, - { - "epoch": 9.383050847457627, - "grad_norm": 1.4394770860671997, - "learning_rate": 8.242512562814071e-05, - "loss": 5.2118, - "step": 17992 - }, - { - "epoch": 9.383572359843546, - "grad_norm": 1.3795188665390015, - "learning_rate": 8.242412060301508e-05, - "loss": 5.3331, - "step": 17993 - }, - { - "epoch": 9.384093872229466, - "grad_norm": 1.5623884201049805, - "learning_rate": 8.242311557788946e-05, - "loss": 5.2352, - "step": 17994 - }, - { - "epoch": 9.384615384615385, - "grad_norm": 1.4661865234375, - "learning_rate": 8.242211055276382e-05, - "loss": 5.8629, - "step": 17995 - }, - { - "epoch": 9.385136897001304, - "grad_norm": 1.5410175323486328, - "learning_rate": 8.24211055276382e-05, - "loss": 5.7062, - "step": 17996 - }, - { - "epoch": 9.385658409387222, - "grad_norm": 1.4762966632843018, - "learning_rate": 8.242010050251256e-05, - "loss": 5.5354, - "step": 17997 - }, - { - "epoch": 9.386179921773142, - "grad_norm": 1.3615158796310425, - "learning_rate": 8.241909547738694e-05, - "loss": 5.0909, - "step": 17998 - }, - { - "epoch": 9.386701434159061, - "grad_norm": 1.6491873264312744, - "learning_rate": 8.24180904522613e-05, - "loss": 5.3157, - "step": 17999 - }, - { - "epoch": 9.38722294654498, - "grad_norm": 1.5471603870391846, - "learning_rate": 8.241708542713568e-05, - "loss": 5.1254, - "step": 18000 - }, - { - "epoch": 9.3877444589309, - "grad_norm": 1.5019398927688599, - "learning_rate": 8.241608040201006e-05, - "loss": 5.7954, - "step": 18001 - }, - { - "epoch": 9.38826597131682, - "grad_norm": 1.4378749132156372, - "learning_rate": 8.241507537688442e-05, - "loss": 5.6587, - "step": 18002 - }, - { - "epoch": 9.388787483702737, - "grad_norm": 1.4048480987548828, - "learning_rate": 8.24140703517588e-05, - "loss": 5.3177, - "step": 18003 - }, - { - "epoch": 9.389308996088657, - "grad_norm": 1.4480230808258057, - "learning_rate": 8.241306532663317e-05, - "loss": 5.4942, - "step": 18004 - }, - { - "epoch": 9.389830508474576, - "grad_norm": 1.3602466583251953, - "learning_rate": 8.241206030150754e-05, - "loss": 5.8556, - "step": 18005 - }, - { - "epoch": 9.390352020860496, - "grad_norm": 1.4130569696426392, - "learning_rate": 8.241105527638191e-05, - "loss": 5.1122, - "step": 18006 - }, - { - "epoch": 9.390873533246415, - "grad_norm": 1.3938939571380615, - "learning_rate": 8.241005025125629e-05, - "loss": 5.179, - "step": 18007 - }, - { - "epoch": 9.391395045632335, - "grad_norm": 1.584280252456665, - "learning_rate": 8.240904522613065e-05, - "loss": 5.1791, - "step": 18008 - }, - { - "epoch": 9.391916558018252, - "grad_norm": 1.3995248079299927, - "learning_rate": 8.240804020100503e-05, - "loss": 5.5704, - "step": 18009 - }, - { - "epoch": 9.392438070404172, - "grad_norm": 1.5258076190948486, - "learning_rate": 8.240703517587939e-05, - "loss": 5.2678, - "step": 18010 - }, - { - "epoch": 9.392959582790091, - "grad_norm": 1.4317625761032104, - "learning_rate": 8.240603015075377e-05, - "loss": 5.6957, - "step": 18011 - }, - { - "epoch": 9.39348109517601, - "grad_norm": 1.4740370512008667, - "learning_rate": 8.240502512562815e-05, - "loss": 5.564, - "step": 18012 - }, - { - "epoch": 9.39400260756193, - "grad_norm": 1.4794515371322632, - "learning_rate": 8.240402010050253e-05, - "loss": 5.6956, - "step": 18013 - }, - { - "epoch": 9.39452411994785, - "grad_norm": 1.445555329322815, - "learning_rate": 8.240301507537689e-05, - "loss": 5.3945, - "step": 18014 - }, - { - "epoch": 9.395045632333767, - "grad_norm": 1.5151803493499756, - "learning_rate": 8.240201005025126e-05, - "loss": 5.5269, - "step": 18015 - }, - { - "epoch": 9.395567144719687, - "grad_norm": 1.547906756401062, - "learning_rate": 8.240100502512563e-05, - "loss": 5.3196, - "step": 18016 - }, - { - "epoch": 9.396088657105606, - "grad_norm": 1.3441587686538696, - "learning_rate": 8.24e-05, - "loss": 5.1135, - "step": 18017 - }, - { - "epoch": 9.396610169491526, - "grad_norm": 1.4341620206832886, - "learning_rate": 8.239899497487437e-05, - "loss": 5.0826, - "step": 18018 - }, - { - "epoch": 9.397131681877445, - "grad_norm": 1.4498653411865234, - "learning_rate": 8.239798994974874e-05, - "loss": 5.1991, - "step": 18019 - }, - { - "epoch": 9.397653194263365, - "grad_norm": 1.4594868421554565, - "learning_rate": 8.239698492462312e-05, - "loss": 5.5379, - "step": 18020 - }, - { - "epoch": 9.398174706649282, - "grad_norm": 1.5081995725631714, - "learning_rate": 8.23959798994975e-05, - "loss": 5.3452, - "step": 18021 - }, - { - "epoch": 9.398696219035202, - "grad_norm": 1.4254034757614136, - "learning_rate": 8.239497487437187e-05, - "loss": 5.73, - "step": 18022 - }, - { - "epoch": 9.399217731421121, - "grad_norm": 1.512642502784729, - "learning_rate": 8.239396984924624e-05, - "loss": 5.008, - "step": 18023 - }, - { - "epoch": 9.39973924380704, - "grad_norm": 1.362318992614746, - "learning_rate": 8.239296482412061e-05, - "loss": 5.6289, - "step": 18024 - }, - { - "epoch": 9.40026075619296, - "grad_norm": 1.4266718626022339, - "learning_rate": 8.239195979899498e-05, - "loss": 5.6463, - "step": 18025 - }, - { - "epoch": 9.40078226857888, - "grad_norm": 1.5579407215118408, - "learning_rate": 8.239095477386936e-05, - "loss": 5.0737, - "step": 18026 - }, - { - "epoch": 9.401303780964797, - "grad_norm": 1.8256499767303467, - "learning_rate": 8.238994974874372e-05, - "loss": 5.0753, - "step": 18027 - }, - { - "epoch": 9.401825293350717, - "grad_norm": 1.4612624645233154, - "learning_rate": 8.238894472361809e-05, - "loss": 5.5984, - "step": 18028 - }, - { - "epoch": 9.402346805736636, - "grad_norm": 1.4165050983428955, - "learning_rate": 8.238793969849246e-05, - "loss": 5.4601, - "step": 18029 - }, - { - "epoch": 9.402868318122556, - "grad_norm": 1.3717306852340698, - "learning_rate": 8.238693467336683e-05, - "loss": 5.6591, - "step": 18030 - }, - { - "epoch": 9.403389830508475, - "grad_norm": 1.4065914154052734, - "learning_rate": 8.23859296482412e-05, - "loss": 5.6561, - "step": 18031 - }, - { - "epoch": 9.403911342894395, - "grad_norm": 1.488102674484253, - "learning_rate": 8.238492462311558e-05, - "loss": 5.4781, - "step": 18032 - }, - { - "epoch": 9.404432855280312, - "grad_norm": 1.8169970512390137, - "learning_rate": 8.238391959798996e-05, - "loss": 4.3866, - "step": 18033 - }, - { - "epoch": 9.404954367666232, - "grad_norm": 1.3515421152114868, - "learning_rate": 8.238291457286433e-05, - "loss": 5.9125, - "step": 18034 - }, - { - "epoch": 9.405475880052151, - "grad_norm": 1.518065094947815, - "learning_rate": 8.23819095477387e-05, - "loss": 5.7127, - "step": 18035 - }, - { - "epoch": 9.40599739243807, - "grad_norm": 1.429169774055481, - "learning_rate": 8.238090452261307e-05, - "loss": 5.5387, - "step": 18036 - }, - { - "epoch": 9.40651890482399, - "grad_norm": 1.4230960607528687, - "learning_rate": 8.237989949748745e-05, - "loss": 5.8775, - "step": 18037 - }, - { - "epoch": 9.40704041720991, - "grad_norm": 1.5465497970581055, - "learning_rate": 8.237889447236181e-05, - "loss": 5.559, - "step": 18038 - }, - { - "epoch": 9.407561929595827, - "grad_norm": 1.6945523023605347, - "learning_rate": 8.237788944723619e-05, - "loss": 4.756, - "step": 18039 - }, - { - "epoch": 9.408083441981747, - "grad_norm": 1.4151487350463867, - "learning_rate": 8.237688442211055e-05, - "loss": 5.817, - "step": 18040 - }, - { - "epoch": 9.408604954367666, - "grad_norm": 1.3039671182632446, - "learning_rate": 8.237587939698493e-05, - "loss": 5.8605, - "step": 18041 - }, - { - "epoch": 9.409126466753586, - "grad_norm": 1.3569629192352295, - "learning_rate": 8.237487437185931e-05, - "loss": 5.6501, - "step": 18042 - }, - { - "epoch": 9.409647979139505, - "grad_norm": 1.5257023572921753, - "learning_rate": 8.237386934673367e-05, - "loss": 5.4587, - "step": 18043 - }, - { - "epoch": 9.410169491525423, - "grad_norm": 1.5034717321395874, - "learning_rate": 8.237286432160805e-05, - "loss": 5.3345, - "step": 18044 - }, - { - "epoch": 9.410691003911342, - "grad_norm": 1.5466091632843018, - "learning_rate": 8.237185929648241e-05, - "loss": 4.8443, - "step": 18045 - }, - { - "epoch": 9.411212516297262, - "grad_norm": 1.4566874504089355, - "learning_rate": 8.237085427135679e-05, - "loss": 5.5678, - "step": 18046 - }, - { - "epoch": 9.411734028683181, - "grad_norm": 1.510926604270935, - "learning_rate": 8.236984924623116e-05, - "loss": 5.5622, - "step": 18047 - }, - { - "epoch": 9.4122555410691, - "grad_norm": 1.66961669921875, - "learning_rate": 8.236884422110553e-05, - "loss": 5.0882, - "step": 18048 - }, - { - "epoch": 9.41277705345502, - "grad_norm": 1.4918900728225708, - "learning_rate": 8.23678391959799e-05, - "loss": 5.3934, - "step": 18049 - }, - { - "epoch": 9.41329856584094, - "grad_norm": 1.328500747680664, - "learning_rate": 8.236683417085428e-05, - "loss": 5.118, - "step": 18050 - }, - { - "epoch": 9.413820078226857, - "grad_norm": 1.504177212715149, - "learning_rate": 8.236582914572864e-05, - "loss": 5.1081, - "step": 18051 - }, - { - "epoch": 9.414341590612777, - "grad_norm": 1.4242361783981323, - "learning_rate": 8.236482412060302e-05, - "loss": 5.5008, - "step": 18052 - }, - { - "epoch": 9.414863102998696, - "grad_norm": 1.5699717998504639, - "learning_rate": 8.23638190954774e-05, - "loss": 5.3011, - "step": 18053 - }, - { - "epoch": 9.415384615384616, - "grad_norm": 1.5572208166122437, - "learning_rate": 8.236281407035177e-05, - "loss": 5.1298, - "step": 18054 - }, - { - "epoch": 9.415906127770535, - "grad_norm": 1.5199990272521973, - "learning_rate": 8.236180904522614e-05, - "loss": 5.2991, - "step": 18055 - }, - { - "epoch": 9.416427640156453, - "grad_norm": 1.422109842300415, - "learning_rate": 8.23608040201005e-05, - "loss": 5.1622, - "step": 18056 - }, - { - "epoch": 9.416949152542372, - "grad_norm": 1.4713125228881836, - "learning_rate": 8.235979899497488e-05, - "loss": 5.1302, - "step": 18057 - }, - { - "epoch": 9.417470664928292, - "grad_norm": 1.607741355895996, - "learning_rate": 8.235879396984924e-05, - "loss": 5.0307, - "step": 18058 - }, - { - "epoch": 9.417992177314211, - "grad_norm": 1.513523817062378, - "learning_rate": 8.235778894472362e-05, - "loss": 5.3707, - "step": 18059 - }, - { - "epoch": 9.41851368970013, - "grad_norm": 1.473988652229309, - "learning_rate": 8.235678391959799e-05, - "loss": 5.5948, - "step": 18060 - }, - { - "epoch": 9.41903520208605, - "grad_norm": 1.4688174724578857, - "learning_rate": 8.235577889447236e-05, - "loss": 5.3526, - "step": 18061 - }, - { - "epoch": 9.419556714471968, - "grad_norm": 1.426521897315979, - "learning_rate": 8.235477386934674e-05, - "loss": 5.926, - "step": 18062 - }, - { - "epoch": 9.420078226857887, - "grad_norm": 1.4833139181137085, - "learning_rate": 8.235376884422112e-05, - "loss": 5.5656, - "step": 18063 - }, - { - "epoch": 9.420599739243807, - "grad_norm": 1.46029531955719, - "learning_rate": 8.235276381909548e-05, - "loss": 5.7143, - "step": 18064 - }, - { - "epoch": 9.421121251629726, - "grad_norm": 1.4743659496307373, - "learning_rate": 8.235175879396986e-05, - "loss": 5.2686, - "step": 18065 - }, - { - "epoch": 9.421642764015646, - "grad_norm": 1.349268913269043, - "learning_rate": 8.235075376884423e-05, - "loss": 5.7953, - "step": 18066 - }, - { - "epoch": 9.422164276401565, - "grad_norm": 1.3131568431854248, - "learning_rate": 8.23497487437186e-05, - "loss": 5.6447, - "step": 18067 - }, - { - "epoch": 9.422685788787483, - "grad_norm": 1.400826334953308, - "learning_rate": 8.234874371859297e-05, - "loss": 5.8676, - "step": 18068 - }, - { - "epoch": 9.423207301173402, - "grad_norm": 1.463335394859314, - "learning_rate": 8.234773869346733e-05, - "loss": 5.2802, - "step": 18069 - }, - { - "epoch": 9.423728813559322, - "grad_norm": 1.4013656377792358, - "learning_rate": 8.234673366834171e-05, - "loss": 5.7373, - "step": 18070 - }, - { - "epoch": 9.424250325945241, - "grad_norm": 1.4669426679611206, - "learning_rate": 8.234572864321607e-05, - "loss": 5.3381, - "step": 18071 - }, - { - "epoch": 9.42477183833116, - "grad_norm": 1.4457329511642456, - "learning_rate": 8.234472361809045e-05, - "loss": 5.6334, - "step": 18072 - }, - { - "epoch": 9.42529335071708, - "grad_norm": 1.395687222480774, - "learning_rate": 8.234371859296483e-05, - "loss": 5.3716, - "step": 18073 - }, - { - "epoch": 9.425814863102998, - "grad_norm": 1.4185758829116821, - "learning_rate": 8.234271356783921e-05, - "loss": 5.4904, - "step": 18074 - }, - { - "epoch": 9.426336375488917, - "grad_norm": Infinity, - "learning_rate": 8.234271356783921e-05, - "loss": 5.029, - "step": 18075 - }, - { - "epoch": 9.426857887874837, - "grad_norm": 1.4754424095153809, - "learning_rate": 8.234170854271357e-05, - "loss": 5.425, - "step": 18076 - }, - { - "epoch": 9.427379400260756, - "grad_norm": 1.492533564567566, - "learning_rate": 8.234070351758795e-05, - "loss": 5.3713, - "step": 18077 - }, - { - "epoch": 9.427900912646676, - "grad_norm": 1.4642823934555054, - "learning_rate": 8.233969849246231e-05, - "loss": 5.7868, - "step": 18078 - }, - { - "epoch": 9.428422425032595, - "grad_norm": 1.4120131731033325, - "learning_rate": 8.233869346733669e-05, - "loss": 5.6027, - "step": 18079 - }, - { - "epoch": 9.428943937418513, - "grad_norm": 2.0073161125183105, - "learning_rate": 8.233768844221106e-05, - "loss": 5.2987, - "step": 18080 - }, - { - "epoch": 9.429465449804432, - "grad_norm": 1.4899280071258545, - "learning_rate": 8.233668341708543e-05, - "loss": 5.5589, - "step": 18081 - }, - { - "epoch": 9.429986962190352, - "grad_norm": 1.5576510429382324, - "learning_rate": 8.23356783919598e-05, - "loss": 5.7059, - "step": 18082 - }, - { - "epoch": 9.430508474576271, - "grad_norm": 1.4661530256271362, - "learning_rate": 8.233467336683416e-05, - "loss": 5.511, - "step": 18083 - }, - { - "epoch": 9.43102998696219, - "grad_norm": 1.5719908475875854, - "learning_rate": 8.233366834170854e-05, - "loss": 5.4821, - "step": 18084 - }, - { - "epoch": 9.43155149934811, - "grad_norm": 1.341475486755371, - "learning_rate": 8.233266331658292e-05, - "loss": 6.0162, - "step": 18085 - }, - { - "epoch": 9.432073011734028, - "grad_norm": 1.5909852981567383, - "learning_rate": 8.23316582914573e-05, - "loss": 5.4696, - "step": 18086 - }, - { - "epoch": 9.432594524119947, - "grad_norm": 1.4381407499313354, - "learning_rate": 8.233065326633166e-05, - "loss": 5.5216, - "step": 18087 - }, - { - "epoch": 9.433116036505867, - "grad_norm": 1.468895673751831, - "learning_rate": 8.232964824120604e-05, - "loss": 5.5397, - "step": 18088 - }, - { - "epoch": 9.433637548891786, - "grad_norm": 1.5393102169036865, - "learning_rate": 8.23286432160804e-05, - "loss": 5.2813, - "step": 18089 - }, - { - "epoch": 9.434159061277706, - "grad_norm": 1.4526560306549072, - "learning_rate": 8.232763819095478e-05, - "loss": 5.3149, - "step": 18090 - }, - { - "epoch": 9.434680573663625, - "grad_norm": 1.6928762197494507, - "learning_rate": 8.232663316582914e-05, - "loss": 5.3869, - "step": 18091 - }, - { - "epoch": 9.435202086049543, - "grad_norm": 1.4105455875396729, - "learning_rate": 8.232562814070352e-05, - "loss": 5.1559, - "step": 18092 - }, - { - "epoch": 9.435723598435462, - "grad_norm": 1.6322052478790283, - "learning_rate": 8.232462311557789e-05, - "loss": 5.3534, - "step": 18093 - }, - { - "epoch": 9.436245110821382, - "grad_norm": 1.3640223741531372, - "learning_rate": 8.232361809045226e-05, - "loss": 5.3899, - "step": 18094 - }, - { - "epoch": 9.436766623207301, - "grad_norm": 1.4348257780075073, - "learning_rate": 8.232261306532664e-05, - "loss": 5.2832, - "step": 18095 - }, - { - "epoch": 9.43728813559322, - "grad_norm": 1.4504704475402832, - "learning_rate": 8.232160804020101e-05, - "loss": 5.5772, - "step": 18096 - }, - { - "epoch": 9.43780964797914, - "grad_norm": 1.4425619840621948, - "learning_rate": 8.232060301507538e-05, - "loss": 5.1431, - "step": 18097 - }, - { - "epoch": 9.438331160365058, - "grad_norm": 1.4713711738586426, - "learning_rate": 8.231959798994975e-05, - "loss": 4.998, - "step": 18098 - }, - { - "epoch": 9.438852672750977, - "grad_norm": 1.4756457805633545, - "learning_rate": 8.231859296482413e-05, - "loss": 5.8032, - "step": 18099 - }, - { - "epoch": 9.439374185136897, - "grad_norm": 1.6445220708847046, - "learning_rate": 8.231758793969849e-05, - "loss": 5.4773, - "step": 18100 - }, - { - "epoch": 9.439895697522816, - "grad_norm": 1.5312669277191162, - "learning_rate": 8.231658291457287e-05, - "loss": 4.8865, - "step": 18101 - }, - { - "epoch": 9.440417209908736, - "grad_norm": 1.4233381748199463, - "learning_rate": 8.231557788944723e-05, - "loss": 5.7649, - "step": 18102 - }, - { - "epoch": 9.440938722294655, - "grad_norm": 1.312266230583191, - "learning_rate": 8.231457286432161e-05, - "loss": 5.7553, - "step": 18103 - }, - { - "epoch": 9.441460234680573, - "grad_norm": 1.4358023405075073, - "learning_rate": 8.231356783919598e-05, - "loss": 5.4793, - "step": 18104 - }, - { - "epoch": 9.441981747066492, - "grad_norm": 1.5911058187484741, - "learning_rate": 8.231256281407035e-05, - "loss": 5.3449, - "step": 18105 - }, - { - "epoch": 9.442503259452412, - "grad_norm": 1.562393307685852, - "learning_rate": 8.231155778894473e-05, - "loss": 5.6163, - "step": 18106 - }, - { - "epoch": 9.443024771838331, - "grad_norm": 1.4893900156021118, - "learning_rate": 8.231055276381911e-05, - "loss": 5.6934, - "step": 18107 - }, - { - "epoch": 9.44354628422425, - "grad_norm": 1.5460155010223389, - "learning_rate": 8.230954773869347e-05, - "loss": 5.2206, - "step": 18108 - }, - { - "epoch": 9.44406779661017, - "grad_norm": 1.542029857635498, - "learning_rate": 8.230854271356784e-05, - "loss": 5.3362, - "step": 18109 - }, - { - "epoch": 9.444589308996088, - "grad_norm": 1.4582087993621826, - "learning_rate": 8.230753768844222e-05, - "loss": 5.6703, - "step": 18110 - }, - { - "epoch": 9.445110821382007, - "grad_norm": 1.481508731842041, - "learning_rate": 8.230653266331658e-05, - "loss": 5.005, - "step": 18111 - }, - { - "epoch": 9.445632333767927, - "grad_norm": 1.489197850227356, - "learning_rate": 8.230552763819096e-05, - "loss": 5.6683, - "step": 18112 - }, - { - "epoch": 9.446153846153846, - "grad_norm": 1.4981640577316284, - "learning_rate": 8.230452261306532e-05, - "loss": 5.6698, - "step": 18113 - }, - { - "epoch": 9.446675358539766, - "grad_norm": 1.465809941291809, - "learning_rate": 8.23035175879397e-05, - "loss": 5.5604, - "step": 18114 - }, - { - "epoch": 9.447196870925685, - "grad_norm": 1.3653984069824219, - "learning_rate": 8.230251256281408e-05, - "loss": 5.6712, - "step": 18115 - }, - { - "epoch": 9.447718383311603, - "grad_norm": 1.3996353149414062, - "learning_rate": 8.230150753768846e-05, - "loss": 5.6924, - "step": 18116 - }, - { - "epoch": 9.448239895697522, - "grad_norm": 1.3451921939849854, - "learning_rate": 8.230050251256282e-05, - "loss": 5.6786, - "step": 18117 - }, - { - "epoch": 9.448761408083442, - "grad_norm": 1.451648235321045, - "learning_rate": 8.22994974874372e-05, - "loss": 5.4813, - "step": 18118 - }, - { - "epoch": 9.449282920469361, - "grad_norm": 1.7541193962097168, - "learning_rate": 8.229849246231156e-05, - "loss": 5.4532, - "step": 18119 - }, - { - "epoch": 9.44980443285528, - "grad_norm": 1.4935994148254395, - "learning_rate": 8.229748743718594e-05, - "loss": 5.3215, - "step": 18120 - }, - { - "epoch": 9.4503259452412, - "grad_norm": 1.5612107515335083, - "learning_rate": 8.22964824120603e-05, - "loss": 5.7442, - "step": 18121 - }, - { - "epoch": 9.450847457627118, - "grad_norm": 1.2248151302337646, - "learning_rate": 8.229547738693467e-05, - "loss": 5.3356, - "step": 18122 - }, - { - "epoch": 9.451368970013037, - "grad_norm": 1.3826615810394287, - "learning_rate": 8.229447236180905e-05, - "loss": 5.704, - "step": 18123 - }, - { - "epoch": 9.451890482398957, - "grad_norm": 1.496376395225525, - "learning_rate": 8.229346733668341e-05, - "loss": 5.4235, - "step": 18124 - }, - { - "epoch": 9.452411994784876, - "grad_norm": 1.413495421409607, - "learning_rate": 8.229246231155779e-05, - "loss": 5.2761, - "step": 18125 - }, - { - "epoch": 9.452933507170796, - "grad_norm": 1.5687239170074463, - "learning_rate": 8.229145728643217e-05, - "loss": 5.7749, - "step": 18126 - }, - { - "epoch": 9.453455019556715, - "grad_norm": 1.41693913936615, - "learning_rate": 8.229045226130654e-05, - "loss": 5.6602, - "step": 18127 - }, - { - "epoch": 9.453976531942633, - "grad_norm": 1.4881340265274048, - "learning_rate": 8.228944723618091e-05, - "loss": 5.209, - "step": 18128 - }, - { - "epoch": 9.454498044328552, - "grad_norm": 1.4121490716934204, - "learning_rate": 8.228844221105529e-05, - "loss": 5.9324, - "step": 18129 - }, - { - "epoch": 9.455019556714472, - "grad_norm": 1.4613633155822754, - "learning_rate": 8.228743718592965e-05, - "loss": 5.3094, - "step": 18130 - }, - { - "epoch": 9.455541069100391, - "grad_norm": 1.3947190046310425, - "learning_rate": 8.228643216080403e-05, - "loss": 4.7986, - "step": 18131 - }, - { - "epoch": 9.45606258148631, - "grad_norm": 1.319873571395874, - "learning_rate": 8.228542713567839e-05, - "loss": 5.7077, - "step": 18132 - }, - { - "epoch": 9.45658409387223, - "grad_norm": 1.3327840566635132, - "learning_rate": 8.228442211055277e-05, - "loss": 5.8916, - "step": 18133 - }, - { - "epoch": 9.457105606258148, - "grad_norm": 1.391348958015442, - "learning_rate": 8.228341708542713e-05, - "loss": 5.8923, - "step": 18134 - }, - { - "epoch": 9.457627118644067, - "grad_norm": 1.5446748733520508, - "learning_rate": 8.228241206030151e-05, - "loss": 5.4669, - "step": 18135 - }, - { - "epoch": 9.458148631029987, - "grad_norm": 1.49965238571167, - "learning_rate": 8.228140703517589e-05, - "loss": 4.9423, - "step": 18136 - }, - { - "epoch": 9.458670143415906, - "grad_norm": 1.4364827871322632, - "learning_rate": 8.228040201005025e-05, - "loss": 5.3427, - "step": 18137 - }, - { - "epoch": 9.459191655801826, - "grad_norm": 1.3767116069793701, - "learning_rate": 8.227939698492463e-05, - "loss": 5.4819, - "step": 18138 - }, - { - "epoch": 9.459713168187744, - "grad_norm": 1.4229307174682617, - "learning_rate": 8.2278391959799e-05, - "loss": 5.3037, - "step": 18139 - }, - { - "epoch": 9.460234680573663, - "grad_norm": 1.3355556726455688, - "learning_rate": 8.227738693467337e-05, - "loss": 5.5886, - "step": 18140 - }, - { - "epoch": 9.460756192959582, - "grad_norm": 1.5224268436431885, - "learning_rate": 8.227638190954774e-05, - "loss": 5.3713, - "step": 18141 - }, - { - "epoch": 9.461277705345502, - "grad_norm": 1.420087218284607, - "learning_rate": 8.227537688442212e-05, - "loss": 5.5908, - "step": 18142 - }, - { - "epoch": 9.461799217731421, - "grad_norm": 1.5322381258010864, - "learning_rate": 8.227437185929648e-05, - "loss": 5.3671, - "step": 18143 - }, - { - "epoch": 9.46232073011734, - "grad_norm": 1.3624498844146729, - "learning_rate": 8.227336683417086e-05, - "loss": 5.8578, - "step": 18144 - }, - { - "epoch": 9.46284224250326, - "grad_norm": 1.4236645698547363, - "learning_rate": 8.227236180904522e-05, - "loss": 5.3029, - "step": 18145 - }, - { - "epoch": 9.463363754889178, - "grad_norm": 1.392197608947754, - "learning_rate": 8.22713567839196e-05, - "loss": 5.6463, - "step": 18146 - }, - { - "epoch": 9.463885267275097, - "grad_norm": 1.433524250984192, - "learning_rate": 8.227035175879398e-05, - "loss": 5.3958, - "step": 18147 - }, - { - "epoch": 9.464406779661017, - "grad_norm": 1.4635636806488037, - "learning_rate": 8.226934673366836e-05, - "loss": 5.5263, - "step": 18148 - }, - { - "epoch": 9.464928292046936, - "grad_norm": 1.4210084676742554, - "learning_rate": 8.226834170854272e-05, - "loss": 5.6245, - "step": 18149 - }, - { - "epoch": 9.465449804432856, - "grad_norm": 1.423142671585083, - "learning_rate": 8.226733668341708e-05, - "loss": 5.4601, - "step": 18150 - }, - { - "epoch": 9.465971316818774, - "grad_norm": 1.4893046617507935, - "learning_rate": 8.226633165829146e-05, - "loss": 5.0598, - "step": 18151 - }, - { - "epoch": 9.466492829204693, - "grad_norm": 1.4561524391174316, - "learning_rate": 8.226532663316583e-05, - "loss": 5.2508, - "step": 18152 - }, - { - "epoch": 9.467014341590613, - "grad_norm": 1.452970027923584, - "learning_rate": 8.22643216080402e-05, - "loss": 5.3921, - "step": 18153 - }, - { - "epoch": 9.467535853976532, - "grad_norm": 1.411190390586853, - "learning_rate": 8.226331658291457e-05, - "loss": 5.2647, - "step": 18154 - }, - { - "epoch": 9.468057366362451, - "grad_norm": 1.4163627624511719, - "learning_rate": 8.226231155778895e-05, - "loss": 5.3229, - "step": 18155 - }, - { - "epoch": 9.468578878748371, - "grad_norm": 1.3621416091918945, - "learning_rate": 8.226130653266332e-05, - "loss": 5.8127, - "step": 18156 - }, - { - "epoch": 9.469100391134289, - "grad_norm": 1.3748116493225098, - "learning_rate": 8.22603015075377e-05, - "loss": 5.7932, - "step": 18157 - }, - { - "epoch": 9.469621903520208, - "grad_norm": 1.4435878992080688, - "learning_rate": 8.225929648241207e-05, - "loss": 5.4144, - "step": 18158 - }, - { - "epoch": 9.470143415906128, - "grad_norm": 1.4483124017715454, - "learning_rate": 8.225829145728644e-05, - "loss": 5.357, - "step": 18159 - }, - { - "epoch": 9.470664928292047, - "grad_norm": 1.4436863660812378, - "learning_rate": 8.225728643216081e-05, - "loss": 5.3453, - "step": 18160 - }, - { - "epoch": 9.471186440677966, - "grad_norm": 1.498558521270752, - "learning_rate": 8.225628140703519e-05, - "loss": 5.4217, - "step": 18161 - }, - { - "epoch": 9.471707953063886, - "grad_norm": 1.395704984664917, - "learning_rate": 8.225527638190955e-05, - "loss": 5.2846, - "step": 18162 - }, - { - "epoch": 9.472229465449804, - "grad_norm": 1.4260677099227905, - "learning_rate": 8.225427135678391e-05, - "loss": 5.5014, - "step": 18163 - }, - { - "epoch": 9.472750977835723, - "grad_norm": 1.3684805631637573, - "learning_rate": 8.225326633165829e-05, - "loss": 5.7329, - "step": 18164 - }, - { - "epoch": 9.473272490221643, - "grad_norm": 1.3593389987945557, - "learning_rate": 8.225226130653266e-05, - "loss": 5.5549, - "step": 18165 - }, - { - "epoch": 9.473794002607562, - "grad_norm": 1.4801114797592163, - "learning_rate": 8.225125628140703e-05, - "loss": 5.2773, - "step": 18166 - }, - { - "epoch": 9.474315514993481, - "grad_norm": 1.5836405754089355, - "learning_rate": 8.225025125628141e-05, - "loss": 5.4989, - "step": 18167 - }, - { - "epoch": 9.474837027379401, - "grad_norm": 1.4267648458480835, - "learning_rate": 8.224924623115579e-05, - "loss": 5.5055, - "step": 18168 - }, - { - "epoch": 9.475358539765319, - "grad_norm": 1.4449280500411987, - "learning_rate": 8.224824120603015e-05, - "loss": 5.4869, - "step": 18169 - }, - { - "epoch": 9.475880052151238, - "grad_norm": 1.437248945236206, - "learning_rate": 8.224723618090453e-05, - "loss": 5.5926, - "step": 18170 - }, - { - "epoch": 9.476401564537158, - "grad_norm": 1.452779769897461, - "learning_rate": 8.22462311557789e-05, - "loss": 5.7291, - "step": 18171 - }, - { - "epoch": 9.476923076923077, - "grad_norm": 1.5104244947433472, - "learning_rate": 8.224522613065327e-05, - "loss": 5.5095, - "step": 18172 - }, - { - "epoch": 9.477444589308996, - "grad_norm": 1.511814832687378, - "learning_rate": 8.224422110552764e-05, - "loss": 5.0351, - "step": 18173 - }, - { - "epoch": 9.477966101694916, - "grad_norm": 1.4124157428741455, - "learning_rate": 8.224321608040202e-05, - "loss": 5.8251, - "step": 18174 - }, - { - "epoch": 9.478487614080834, - "grad_norm": 1.4250255823135376, - "learning_rate": 8.224221105527638e-05, - "loss": 5.5644, - "step": 18175 - }, - { - "epoch": 9.479009126466753, - "grad_norm": 1.3649638891220093, - "learning_rate": 8.224120603015076e-05, - "loss": 5.7612, - "step": 18176 - }, - { - "epoch": 9.479530638852673, - "grad_norm": 1.401392936706543, - "learning_rate": 8.224020100502514e-05, - "loss": 5.4735, - "step": 18177 - }, - { - "epoch": 9.480052151238592, - "grad_norm": 1.4920768737792969, - "learning_rate": 8.22391959798995e-05, - "loss": 5.5475, - "step": 18178 - }, - { - "epoch": 9.480573663624511, - "grad_norm": 1.4040324687957764, - "learning_rate": 8.223819095477388e-05, - "loss": 5.4189, - "step": 18179 - }, - { - "epoch": 9.481095176010431, - "grad_norm": 1.4657247066497803, - "learning_rate": 8.223718592964824e-05, - "loss": 5.3389, - "step": 18180 - }, - { - "epoch": 9.481616688396349, - "grad_norm": 1.4899739027023315, - "learning_rate": 8.223618090452262e-05, - "loss": 5.6506, - "step": 18181 - }, - { - "epoch": 9.482138200782268, - "grad_norm": 1.3842153549194336, - "learning_rate": 8.223517587939699e-05, - "loss": 5.6072, - "step": 18182 - }, - { - "epoch": 9.482659713168188, - "grad_norm": 1.4851194620132446, - "learning_rate": 8.223417085427136e-05, - "loss": 5.7379, - "step": 18183 - }, - { - "epoch": 9.483181225554107, - "grad_norm": 1.5168735980987549, - "learning_rate": 8.223316582914573e-05, - "loss": 4.9531, - "step": 18184 - }, - { - "epoch": 9.483702737940026, - "grad_norm": 1.2675786018371582, - "learning_rate": 8.22321608040201e-05, - "loss": 5.6221, - "step": 18185 - }, - { - "epoch": 9.484224250325946, - "grad_norm": 1.530930519104004, - "learning_rate": 8.223115577889447e-05, - "loss": 5.0403, - "step": 18186 - }, - { - "epoch": 9.484745762711864, - "grad_norm": 1.5466848611831665, - "learning_rate": 8.223015075376885e-05, - "loss": 5.4197, - "step": 18187 - }, - { - "epoch": 9.485267275097783, - "grad_norm": 1.3801974058151245, - "learning_rate": 8.222914572864322e-05, - "loss": 4.8385, - "step": 18188 - }, - { - "epoch": 9.485788787483703, - "grad_norm": 1.4981963634490967, - "learning_rate": 8.222814070351759e-05, - "loss": 5.5528, - "step": 18189 - }, - { - "epoch": 9.486310299869622, - "grad_norm": 1.5197471380233765, - "learning_rate": 8.222713567839197e-05, - "loss": 5.5813, - "step": 18190 - }, - { - "epoch": 9.486831812255542, - "grad_norm": 1.3611218929290771, - "learning_rate": 8.222613065326633e-05, - "loss": 5.3733, - "step": 18191 - }, - { - "epoch": 9.487353324641461, - "grad_norm": 1.5808380842208862, - "learning_rate": 8.222512562814071e-05, - "loss": 4.7444, - "step": 18192 - }, - { - "epoch": 9.487874837027379, - "grad_norm": 1.9542973041534424, - "learning_rate": 8.222412060301507e-05, - "loss": 4.9066, - "step": 18193 - }, - { - "epoch": 9.488396349413298, - "grad_norm": 1.3597484827041626, - "learning_rate": 8.222311557788945e-05, - "loss": 5.6176, - "step": 18194 - }, - { - "epoch": 9.488917861799218, - "grad_norm": 1.4931716918945312, - "learning_rate": 8.222211055276382e-05, - "loss": 5.9245, - "step": 18195 - }, - { - "epoch": 9.489439374185137, - "grad_norm": 1.4505963325500488, - "learning_rate": 8.22211055276382e-05, - "loss": 5.478, - "step": 18196 - }, - { - "epoch": 9.489960886571057, - "grad_norm": 1.3911272287368774, - "learning_rate": 8.222010050251257e-05, - "loss": 5.8677, - "step": 18197 - }, - { - "epoch": 9.490482398956976, - "grad_norm": 1.4939067363739014, - "learning_rate": 8.221909547738695e-05, - "loss": 5.9113, - "step": 18198 - }, - { - "epoch": 9.491003911342894, - "grad_norm": 1.3979356288909912, - "learning_rate": 8.221809045226131e-05, - "loss": 5.5785, - "step": 18199 - }, - { - "epoch": 9.491525423728813, - "grad_norm": 1.600269079208374, - "learning_rate": 8.221708542713569e-05, - "loss": 4.367, - "step": 18200 - }, - { - "epoch": 9.492046936114733, - "grad_norm": 1.6095668077468872, - "learning_rate": 8.221608040201006e-05, - "loss": 5.1741, - "step": 18201 - }, - { - "epoch": 9.492568448500652, - "grad_norm": 1.4539459943771362, - "learning_rate": 8.221507537688442e-05, - "loss": 5.8506, - "step": 18202 - }, - { - "epoch": 9.493089960886572, - "grad_norm": 1.4142072200775146, - "learning_rate": 8.22140703517588e-05, - "loss": 5.6662, - "step": 18203 - }, - { - "epoch": 9.493611473272491, - "grad_norm": 1.4713304042816162, - "learning_rate": 8.221306532663316e-05, - "loss": 5.6001, - "step": 18204 - }, - { - "epoch": 9.494132985658409, - "grad_norm": 1.4410302639007568, - "learning_rate": 8.221206030150754e-05, - "loss": 5.3059, - "step": 18205 - }, - { - "epoch": 9.494654498044328, - "grad_norm": 1.490399956703186, - "learning_rate": 8.22110552763819e-05, - "loss": 5.2455, - "step": 18206 - }, - { - "epoch": 9.495176010430248, - "grad_norm": 1.3780299425125122, - "learning_rate": 8.221005025125628e-05, - "loss": 5.8952, - "step": 18207 - }, - { - "epoch": 9.495697522816167, - "grad_norm": 1.4944435358047485, - "learning_rate": 8.220904522613066e-05, - "loss": 5.4811, - "step": 18208 - }, - { - "epoch": 9.496219035202087, - "grad_norm": 1.5864391326904297, - "learning_rate": 8.220804020100504e-05, - "loss": 5.3721, - "step": 18209 - }, - { - "epoch": 9.496740547588006, - "grad_norm": 1.5121114253997803, - "learning_rate": 8.22070351758794e-05, - "loss": 5.9032, - "step": 18210 - }, - { - "epoch": 9.497262059973924, - "grad_norm": 1.5650163888931274, - "learning_rate": 8.220603015075378e-05, - "loss": 4.9563, - "step": 18211 - }, - { - "epoch": 9.497783572359843, - "grad_norm": 1.367896556854248, - "learning_rate": 8.220502512562814e-05, - "loss": 5.0594, - "step": 18212 - }, - { - "epoch": 9.498305084745763, - "grad_norm": 1.4310622215270996, - "learning_rate": 8.220402010050252e-05, - "loss": 5.6884, - "step": 18213 - }, - { - "epoch": 9.498826597131682, - "grad_norm": 1.469844102859497, - "learning_rate": 8.220301507537689e-05, - "loss": 5.4649, - "step": 18214 - }, - { - "epoch": 9.499348109517602, - "grad_norm": 1.5925878286361694, - "learning_rate": 8.220201005025125e-05, - "loss": 5.3178, - "step": 18215 - }, - { - "epoch": 9.499869621903521, - "grad_norm": 1.462751865386963, - "learning_rate": 8.220100502512563e-05, - "loss": 5.6707, - "step": 18216 - }, - { - "epoch": 9.500391134289439, - "grad_norm": 1.4164036512374878, - "learning_rate": 8.22e-05, - "loss": 5.4118, - "step": 18217 - }, - { - "epoch": 9.500912646675358, - "grad_norm": 1.429270625114441, - "learning_rate": 8.219899497487438e-05, - "loss": 5.5524, - "step": 18218 - }, - { - "epoch": 9.501434159061278, - "grad_norm": 1.4899104833602905, - "learning_rate": 8.219798994974875e-05, - "loss": 5.5119, - "step": 18219 - }, - { - "epoch": 9.501955671447197, - "grad_norm": 1.4726436138153076, - "learning_rate": 8.219698492462313e-05, - "loss": 5.4973, - "step": 18220 - }, - { - "epoch": 9.502477183833117, - "grad_norm": 1.416284203529358, - "learning_rate": 8.219597989949749e-05, - "loss": 5.5501, - "step": 18221 - }, - { - "epoch": 9.502998696219036, - "grad_norm": 1.3790576457977295, - "learning_rate": 8.219497487437187e-05, - "loss": 5.5611, - "step": 18222 - }, - { - "epoch": 9.503520208604954, - "grad_norm": 1.3395150899887085, - "learning_rate": 8.219396984924623e-05, - "loss": 5.7675, - "step": 18223 - }, - { - "epoch": 9.504041720990873, - "grad_norm": 1.4494237899780273, - "learning_rate": 8.219296482412061e-05, - "loss": 4.9527, - "step": 18224 - }, - { - "epoch": 9.504563233376793, - "grad_norm": 1.4830390214920044, - "learning_rate": 8.219195979899497e-05, - "loss": 5.2787, - "step": 18225 - }, - { - "epoch": 9.505084745762712, - "grad_norm": 1.434812068939209, - "learning_rate": 8.219095477386935e-05, - "loss": 5.1871, - "step": 18226 - }, - { - "epoch": 9.505606258148632, - "grad_norm": 1.4104773998260498, - "learning_rate": 8.218994974874372e-05, - "loss": 5.9006, - "step": 18227 - }, - { - "epoch": 9.506127770534551, - "grad_norm": 1.461693525314331, - "learning_rate": 8.21889447236181e-05, - "loss": 5.0644, - "step": 18228 - }, - { - "epoch": 9.506649282920469, - "grad_norm": 1.6193628311157227, - "learning_rate": 8.218793969849247e-05, - "loss": 5.5503, - "step": 18229 - }, - { - "epoch": 9.507170795306388, - "grad_norm": 1.5623362064361572, - "learning_rate": 8.218693467336684e-05, - "loss": 5.4542, - "step": 18230 - }, - { - "epoch": 9.507692307692308, - "grad_norm": 1.3683351278305054, - "learning_rate": 8.218592964824121e-05, - "loss": 4.9658, - "step": 18231 - }, - { - "epoch": 9.508213820078227, - "grad_norm": 1.4455311298370361, - "learning_rate": 8.218492462311558e-05, - "loss": 5.3637, - "step": 18232 - }, - { - "epoch": 9.508735332464147, - "grad_norm": 1.406025767326355, - "learning_rate": 8.218391959798996e-05, - "loss": 5.6437, - "step": 18233 - }, - { - "epoch": 9.509256844850064, - "grad_norm": 1.3505845069885254, - "learning_rate": 8.218291457286432e-05, - "loss": 5.3173, - "step": 18234 - }, - { - "epoch": 9.509778357235984, - "grad_norm": 1.3794913291931152, - "learning_rate": 8.21819095477387e-05, - "loss": 5.2234, - "step": 18235 - }, - { - "epoch": 9.510299869621903, - "grad_norm": 1.3628244400024414, - "learning_rate": 8.218090452261306e-05, - "loss": 5.7041, - "step": 18236 - }, - { - "epoch": 9.510821382007823, - "grad_norm": 1.593664526939392, - "learning_rate": 8.217989949748744e-05, - "loss": 5.4752, - "step": 18237 - }, - { - "epoch": 9.511342894393742, - "grad_norm": 1.3716124296188354, - "learning_rate": 8.217889447236182e-05, - "loss": 5.7151, - "step": 18238 - }, - { - "epoch": 9.511864406779662, - "grad_norm": 1.363182544708252, - "learning_rate": 8.21778894472362e-05, - "loss": 5.602, - "step": 18239 - }, - { - "epoch": 9.512385919165581, - "grad_norm": 1.3740439414978027, - "learning_rate": 8.217688442211056e-05, - "loss": 5.5541, - "step": 18240 - }, - { - "epoch": 9.512907431551499, - "grad_norm": 1.4734939336776733, - "learning_rate": 8.217587939698494e-05, - "loss": 5.5309, - "step": 18241 - }, - { - "epoch": 9.513428943937418, - "grad_norm": 1.4590164422988892, - "learning_rate": 8.21748743718593e-05, - "loss": 5.7523, - "step": 18242 - }, - { - "epoch": 9.513950456323338, - "grad_norm": 1.4605140686035156, - "learning_rate": 8.217386934673367e-05, - "loss": 5.3006, - "step": 18243 - }, - { - "epoch": 9.514471968709257, - "grad_norm": 1.74355149269104, - "learning_rate": 8.217286432160804e-05, - "loss": 5.4567, - "step": 18244 - }, - { - "epoch": 9.514993481095177, - "grad_norm": 1.3160961866378784, - "learning_rate": 8.217185929648241e-05, - "loss": 5.5054, - "step": 18245 - }, - { - "epoch": 9.515514993481094, - "grad_norm": 1.3497449159622192, - "learning_rate": 8.217085427135679e-05, - "loss": 5.7948, - "step": 18246 - }, - { - "epoch": 9.516036505867014, - "grad_norm": 1.6617319583892822, - "learning_rate": 8.216984924623115e-05, - "loss": 5.4085, - "step": 18247 - }, - { - "epoch": 9.516558018252933, - "grad_norm": 1.53920316696167, - "learning_rate": 8.216884422110553e-05, - "loss": 5.8398, - "step": 18248 - }, - { - "epoch": 9.517079530638853, - "grad_norm": 1.5589096546173096, - "learning_rate": 8.21678391959799e-05, - "loss": 5.7034, - "step": 18249 - }, - { - "epoch": 9.517601043024772, - "grad_norm": 1.4164501428604126, - "learning_rate": 8.216683417085428e-05, - "loss": 5.0216, - "step": 18250 - }, - { - "epoch": 9.518122555410692, - "grad_norm": 1.4184236526489258, - "learning_rate": 8.216582914572865e-05, - "loss": 5.4354, - "step": 18251 - }, - { - "epoch": 9.518644067796611, - "grad_norm": 1.385456919670105, - "learning_rate": 8.216482412060303e-05, - "loss": 5.6413, - "step": 18252 - }, - { - "epoch": 9.519165580182529, - "grad_norm": 1.3513023853302002, - "learning_rate": 8.216381909547739e-05, - "loss": 5.3303, - "step": 18253 - }, - { - "epoch": 9.519687092568448, - "grad_norm": 1.4512683153152466, - "learning_rate": 8.216281407035177e-05, - "loss": 5.6187, - "step": 18254 - }, - { - "epoch": 9.520208604954368, - "grad_norm": 1.455775260925293, - "learning_rate": 8.216180904522613e-05, - "loss": 5.3244, - "step": 18255 - }, - { - "epoch": 9.520730117340287, - "grad_norm": 1.304795265197754, - "learning_rate": 8.21608040201005e-05, - "loss": 5.2349, - "step": 18256 - }, - { - "epoch": 9.521251629726207, - "grad_norm": 1.3285906314849854, - "learning_rate": 8.215979899497487e-05, - "loss": 5.759, - "step": 18257 - }, - { - "epoch": 9.521773142112124, - "grad_norm": 1.4240984916687012, - "learning_rate": 8.215879396984924e-05, - "loss": 5.79, - "step": 18258 - }, - { - "epoch": 9.522294654498044, - "grad_norm": 1.3226473331451416, - "learning_rate": 8.215778894472362e-05, - "loss": 5.3782, - "step": 18259 - }, - { - "epoch": 9.522816166883963, - "grad_norm": 1.4028522968292236, - "learning_rate": 8.2156783919598e-05, - "loss": 5.5869, - "step": 18260 - }, - { - "epoch": 9.523337679269883, - "grad_norm": 1.5244452953338623, - "learning_rate": 8.215577889447237e-05, - "loss": 5.0069, - "step": 18261 - }, - { - "epoch": 9.523859191655802, - "grad_norm": 1.4071040153503418, - "learning_rate": 8.215477386934674e-05, - "loss": 5.7212, - "step": 18262 - }, - { - "epoch": 9.524380704041722, - "grad_norm": 1.6735200881958008, - "learning_rate": 8.215376884422111e-05, - "loss": 5.3954, - "step": 18263 - }, - { - "epoch": 9.52490221642764, - "grad_norm": 1.404481291770935, - "learning_rate": 8.215276381909548e-05, - "loss": 5.7385, - "step": 18264 - }, - { - "epoch": 9.525423728813559, - "grad_norm": 1.5389699935913086, - "learning_rate": 8.215175879396986e-05, - "loss": 5.0943, - "step": 18265 - }, - { - "epoch": 9.525945241199478, - "grad_norm": 1.458540439605713, - "learning_rate": 8.215075376884422e-05, - "loss": 5.1922, - "step": 18266 - }, - { - "epoch": 9.526466753585398, - "grad_norm": 1.4522292613983154, - "learning_rate": 8.21497487437186e-05, - "loss": 5.5324, - "step": 18267 - }, - { - "epoch": 9.526988265971317, - "grad_norm": 1.4940396547317505, - "learning_rate": 8.214874371859296e-05, - "loss": 5.105, - "step": 18268 - }, - { - "epoch": 9.527509778357237, - "grad_norm": 1.4925856590270996, - "learning_rate": 8.214773869346734e-05, - "loss": 5.0233, - "step": 18269 - }, - { - "epoch": 9.528031290743154, - "grad_norm": 1.4090768098831177, - "learning_rate": 8.214673366834172e-05, - "loss": 5.7091, - "step": 18270 - }, - { - "epoch": 9.528552803129074, - "grad_norm": 1.516034722328186, - "learning_rate": 8.214572864321608e-05, - "loss": 5.5249, - "step": 18271 - }, - { - "epoch": 9.529074315514993, - "grad_norm": 1.5842247009277344, - "learning_rate": 8.214472361809046e-05, - "loss": 5.4615, - "step": 18272 - }, - { - "epoch": 9.529595827900913, - "grad_norm": 1.6436148881912231, - "learning_rate": 8.214371859296483e-05, - "loss": 4.6069, - "step": 18273 - }, - { - "epoch": 9.530117340286832, - "grad_norm": 1.4806472063064575, - "learning_rate": 8.21427135678392e-05, - "loss": 5.4777, - "step": 18274 - }, - { - "epoch": 9.530638852672752, - "grad_norm": 1.428080677986145, - "learning_rate": 8.214170854271357e-05, - "loss": 5.0148, - "step": 18275 - }, - { - "epoch": 9.53116036505867, - "grad_norm": 1.465238332748413, - "learning_rate": 8.214070351758795e-05, - "loss": 5.5715, - "step": 18276 - }, - { - "epoch": 9.531681877444589, - "grad_norm": 1.4791090488433838, - "learning_rate": 8.213969849246231e-05, - "loss": 5.5603, - "step": 18277 - }, - { - "epoch": 9.532203389830508, - "grad_norm": 1.5235860347747803, - "learning_rate": 8.213869346733669e-05, - "loss": 4.9259, - "step": 18278 - }, - { - "epoch": 9.532724902216428, - "grad_norm": 1.3658149242401123, - "learning_rate": 8.213768844221105e-05, - "loss": 5.5824, - "step": 18279 - }, - { - "epoch": 9.533246414602347, - "grad_norm": 1.3070549964904785, - "learning_rate": 8.213668341708543e-05, - "loss": 5.8367, - "step": 18280 - }, - { - "epoch": 9.533767926988267, - "grad_norm": 1.3196557760238647, - "learning_rate": 8.213567839195981e-05, - "loss": 5.6928, - "step": 18281 - }, - { - "epoch": 9.534289439374184, - "grad_norm": 1.3971120119094849, - "learning_rate": 8.213467336683417e-05, - "loss": 5.5049, - "step": 18282 - }, - { - "epoch": 9.534810951760104, - "grad_norm": 1.4376627206802368, - "learning_rate": 8.213366834170855e-05, - "loss": 5.729, - "step": 18283 - }, - { - "epoch": 9.535332464146023, - "grad_norm": 1.4513133764266968, - "learning_rate": 8.213266331658291e-05, - "loss": 5.7432, - "step": 18284 - }, - { - "epoch": 9.535853976531943, - "grad_norm": 1.367724061012268, - "learning_rate": 8.213165829145729e-05, - "loss": 5.4314, - "step": 18285 - }, - { - "epoch": 9.536375488917862, - "grad_norm": 1.4014049768447876, - "learning_rate": 8.213065326633166e-05, - "loss": 4.9823, - "step": 18286 - }, - { - "epoch": 9.536897001303782, - "grad_norm": 1.3572008609771729, - "learning_rate": 8.212964824120603e-05, - "loss": 5.5571, - "step": 18287 - }, - { - "epoch": 9.5374185136897, - "grad_norm": 1.4626684188842773, - "learning_rate": 8.21286432160804e-05, - "loss": 5.1801, - "step": 18288 - }, - { - "epoch": 9.537940026075619, - "grad_norm": 1.4876142740249634, - "learning_rate": 8.212763819095478e-05, - "loss": 5.4644, - "step": 18289 - }, - { - "epoch": 9.538461538461538, - "grad_norm": 1.4779225587844849, - "learning_rate": 8.212663316582915e-05, - "loss": 5.5038, - "step": 18290 - }, - { - "epoch": 9.538983050847458, - "grad_norm": 1.3988007307052612, - "learning_rate": 8.212562814070353e-05, - "loss": 5.3775, - "step": 18291 - }, - { - "epoch": 9.539504563233377, - "grad_norm": 1.3744595050811768, - "learning_rate": 8.21246231155779e-05, - "loss": 5.7703, - "step": 18292 - }, - { - "epoch": 9.540026075619297, - "grad_norm": 1.4616116285324097, - "learning_rate": 8.212361809045227e-05, - "loss": 5.6302, - "step": 18293 - }, - { - "epoch": 9.540547588005214, - "grad_norm": 1.4305635690689087, - "learning_rate": 8.212261306532664e-05, - "loss": 5.4902, - "step": 18294 - }, - { - "epoch": 9.541069100391134, - "grad_norm": 1.4078748226165771, - "learning_rate": 8.2121608040201e-05, - "loss": 5.8846, - "step": 18295 - }, - { - "epoch": 9.541590612777053, - "grad_norm": 1.3034636974334717, - "learning_rate": 8.212060301507538e-05, - "loss": 5.7178, - "step": 18296 - }, - { - "epoch": 9.542112125162973, - "grad_norm": 1.4172894954681396, - "learning_rate": 8.211959798994974e-05, - "loss": 5.5372, - "step": 18297 - }, - { - "epoch": 9.542633637548892, - "grad_norm": 1.48302161693573, - "learning_rate": 8.211859296482412e-05, - "loss": 5.5398, - "step": 18298 - }, - { - "epoch": 9.543155149934812, - "grad_norm": 1.2536650896072388, - "learning_rate": 8.211758793969849e-05, - "loss": 5.8565, - "step": 18299 - }, - { - "epoch": 9.54367666232073, - "grad_norm": 1.4930989742279053, - "learning_rate": 8.211658291457286e-05, - "loss": 5.2333, - "step": 18300 - }, - { - "epoch": 9.544198174706649, - "grad_norm": 1.387807011604309, - "learning_rate": 8.211557788944724e-05, - "loss": 5.5534, - "step": 18301 - }, - { - "epoch": 9.544719687092568, - "grad_norm": 1.3794152736663818, - "learning_rate": 8.211457286432162e-05, - "loss": 5.6356, - "step": 18302 - }, - { - "epoch": 9.545241199478488, - "grad_norm": 1.4397284984588623, - "learning_rate": 8.211356783919598e-05, - "loss": 5.1983, - "step": 18303 - }, - { - "epoch": 9.545762711864407, - "grad_norm": 1.4625598192214966, - "learning_rate": 8.211256281407036e-05, - "loss": 5.0812, - "step": 18304 - }, - { - "epoch": 9.546284224250327, - "grad_norm": 1.481059193611145, - "learning_rate": 8.211155778894473e-05, - "loss": 5.3373, - "step": 18305 - }, - { - "epoch": 9.546805736636244, - "grad_norm": 1.3754284381866455, - "learning_rate": 8.21105527638191e-05, - "loss": 5.0372, - "step": 18306 - }, - { - "epoch": 9.547327249022164, - "grad_norm": 1.4203444719314575, - "learning_rate": 8.210954773869347e-05, - "loss": 5.2696, - "step": 18307 - }, - { - "epoch": 9.547848761408083, - "grad_norm": 1.4433382749557495, - "learning_rate": 8.210854271356785e-05, - "loss": 5.7037, - "step": 18308 - }, - { - "epoch": 9.548370273794003, - "grad_norm": 1.4698634147644043, - "learning_rate": 8.210753768844221e-05, - "loss": 5.5036, - "step": 18309 - }, - { - "epoch": 9.548891786179922, - "grad_norm": 1.3343249559402466, - "learning_rate": 8.210653266331659e-05, - "loss": 5.714, - "step": 18310 - }, - { - "epoch": 9.549413298565842, - "grad_norm": 1.594611644744873, - "learning_rate": 8.210552763819097e-05, - "loss": 5.099, - "step": 18311 - }, - { - "epoch": 9.54993481095176, - "grad_norm": 1.299289345741272, - "learning_rate": 8.210452261306533e-05, - "loss": 5.9767, - "step": 18312 - }, - { - "epoch": 9.550456323337679, - "grad_norm": 1.4694918394088745, - "learning_rate": 8.210351758793971e-05, - "loss": 5.443, - "step": 18313 - }, - { - "epoch": 9.550977835723598, - "grad_norm": 1.3582379817962646, - "learning_rate": 8.210251256281407e-05, - "loss": 5.7773, - "step": 18314 - }, - { - "epoch": 9.551499348109518, - "grad_norm": 1.4757329225540161, - "learning_rate": 8.210150753768845e-05, - "loss": 5.2156, - "step": 18315 - }, - { - "epoch": 9.552020860495437, - "grad_norm": 1.4559952020645142, - "learning_rate": 8.210050251256281e-05, - "loss": 5.7311, - "step": 18316 - }, - { - "epoch": 9.552542372881355, - "grad_norm": 1.4397358894348145, - "learning_rate": 8.209949748743719e-05, - "loss": 4.953, - "step": 18317 - }, - { - "epoch": 9.553063885267274, - "grad_norm": 1.4345252513885498, - "learning_rate": 8.209849246231156e-05, - "loss": 5.4396, - "step": 18318 - }, - { - "epoch": 9.553585397653194, - "grad_norm": 1.4411087036132812, - "learning_rate": 8.209748743718593e-05, - "loss": 5.318, - "step": 18319 - }, - { - "epoch": 9.554106910039113, - "grad_norm": 1.359351396560669, - "learning_rate": 8.20964824120603e-05, - "loss": 5.4927, - "step": 18320 - }, - { - "epoch": 9.554628422425033, - "grad_norm": 1.3427972793579102, - "learning_rate": 8.209547738693468e-05, - "loss": 5.685, - "step": 18321 - }, - { - "epoch": 9.555149934810952, - "grad_norm": 1.4132918119430542, - "learning_rate": 8.209447236180905e-05, - "loss": 5.6539, - "step": 18322 - }, - { - "epoch": 9.555671447196872, - "grad_norm": 1.3377482891082764, - "learning_rate": 8.209346733668342e-05, - "loss": 5.8177, - "step": 18323 - }, - { - "epoch": 9.55619295958279, - "grad_norm": 1.4829702377319336, - "learning_rate": 8.20924623115578e-05, - "loss": 5.1974, - "step": 18324 - }, - { - "epoch": 9.556714471968709, - "grad_norm": 1.5048482418060303, - "learning_rate": 8.209145728643216e-05, - "loss": 5.0607, - "step": 18325 - }, - { - "epoch": 9.557235984354628, - "grad_norm": 1.2498520612716675, - "learning_rate": 8.209045226130654e-05, - "loss": 4.6177, - "step": 18326 - }, - { - "epoch": 9.557757496740548, - "grad_norm": 1.4731005430221558, - "learning_rate": 8.20894472361809e-05, - "loss": 5.2686, - "step": 18327 - }, - { - "epoch": 9.558279009126467, - "grad_norm": 1.4990254640579224, - "learning_rate": 8.208844221105528e-05, - "loss": 5.9133, - "step": 18328 - }, - { - "epoch": 9.558800521512385, - "grad_norm": 1.5770225524902344, - "learning_rate": 8.208743718592964e-05, - "loss": 5.4638, - "step": 18329 - }, - { - "epoch": 9.559322033898304, - "grad_norm": 1.3742477893829346, - "learning_rate": 8.208643216080402e-05, - "loss": 5.5491, - "step": 18330 - }, - { - "epoch": 9.559843546284224, - "grad_norm": 1.3710339069366455, - "learning_rate": 8.20854271356784e-05, - "loss": 5.5765, - "step": 18331 - }, - { - "epoch": 9.560365058670143, - "grad_norm": 1.4051966667175293, - "learning_rate": 8.208442211055278e-05, - "loss": 5.5219, - "step": 18332 - }, - { - "epoch": 9.560886571056063, - "grad_norm": 1.5180063247680664, - "learning_rate": 8.208341708542714e-05, - "loss": 5.4054, - "step": 18333 - }, - { - "epoch": 9.561408083441982, - "grad_norm": 1.5427783727645874, - "learning_rate": 8.208241206030152e-05, - "loss": 5.1961, - "step": 18334 - }, - { - "epoch": 9.561929595827902, - "grad_norm": 1.583938479423523, - "learning_rate": 8.208140703517588e-05, - "loss": 5.4539, - "step": 18335 - }, - { - "epoch": 9.56245110821382, - "grad_norm": 1.5169938802719116, - "learning_rate": 8.208040201005025e-05, - "loss": 5.1016, - "step": 18336 - }, - { - "epoch": 9.562972620599739, - "grad_norm": 1.4597307443618774, - "learning_rate": 8.207939698492463e-05, - "loss": 4.7695, - "step": 18337 - }, - { - "epoch": 9.563494132985658, - "grad_norm": 1.4156146049499512, - "learning_rate": 8.207839195979899e-05, - "loss": 5.5606, - "step": 18338 - }, - { - "epoch": 9.564015645371578, - "grad_norm": 1.4167028665542603, - "learning_rate": 8.207738693467337e-05, - "loss": 5.579, - "step": 18339 - }, - { - "epoch": 9.564537157757497, - "grad_norm": 1.5436700582504272, - "learning_rate": 8.207638190954773e-05, - "loss": 5.5429, - "step": 18340 - }, - { - "epoch": 9.565058670143415, - "grad_norm": 1.4117560386657715, - "learning_rate": 8.207537688442211e-05, - "loss": 5.4547, - "step": 18341 - }, - { - "epoch": 9.565580182529335, - "grad_norm": 1.4248008728027344, - "learning_rate": 8.207437185929649e-05, - "loss": 5.8892, - "step": 18342 - }, - { - "epoch": 9.566101694915254, - "grad_norm": 1.4370615482330322, - "learning_rate": 8.207336683417087e-05, - "loss": 5.3731, - "step": 18343 - }, - { - "epoch": 9.566623207301173, - "grad_norm": 1.5552817583084106, - "learning_rate": 8.207236180904523e-05, - "loss": 4.9971, - "step": 18344 - }, - { - "epoch": 9.567144719687093, - "grad_norm": 1.5064730644226074, - "learning_rate": 8.207135678391961e-05, - "loss": 5.564, - "step": 18345 - }, - { - "epoch": 9.567666232073012, - "grad_norm": 1.4385499954223633, - "learning_rate": 8.207035175879397e-05, - "loss": 5.788, - "step": 18346 - }, - { - "epoch": 9.568187744458932, - "grad_norm": 1.4837226867675781, - "learning_rate": 8.206934673366835e-05, - "loss": 5.4653, - "step": 18347 - }, - { - "epoch": 9.56870925684485, - "grad_norm": 1.3416292667388916, - "learning_rate": 8.206834170854272e-05, - "loss": 5.6925, - "step": 18348 - }, - { - "epoch": 9.569230769230769, - "grad_norm": 1.4626269340515137, - "learning_rate": 8.206733668341708e-05, - "loss": 5.5569, - "step": 18349 - }, - { - "epoch": 9.569752281616688, - "grad_norm": 1.382272720336914, - "learning_rate": 8.206633165829146e-05, - "loss": 5.7215, - "step": 18350 - }, - { - "epoch": 9.570273794002608, - "grad_norm": 1.402427077293396, - "learning_rate": 8.206532663316584e-05, - "loss": 5.6271, - "step": 18351 - }, - { - "epoch": 9.570795306388527, - "grad_norm": 1.5238440036773682, - "learning_rate": 8.206432160804021e-05, - "loss": 5.3069, - "step": 18352 - }, - { - "epoch": 9.571316818774445, - "grad_norm": 1.5014384984970093, - "learning_rate": 8.206331658291458e-05, - "loss": 5.9431, - "step": 18353 - }, - { - "epoch": 9.571838331160365, - "grad_norm": 1.4079301357269287, - "learning_rate": 8.206231155778896e-05, - "loss": 5.4642, - "step": 18354 - }, - { - "epoch": 9.572359843546284, - "grad_norm": 1.3998842239379883, - "learning_rate": 8.206130653266332e-05, - "loss": 5.6276, - "step": 18355 - }, - { - "epoch": 9.572881355932203, - "grad_norm": 1.4364532232284546, - "learning_rate": 8.20603015075377e-05, - "loss": 5.1197, - "step": 18356 - }, - { - "epoch": 9.573402868318123, - "grad_norm": 1.3005691766738892, - "learning_rate": 8.205929648241206e-05, - "loss": 5.8906, - "step": 18357 - }, - { - "epoch": 9.573924380704042, - "grad_norm": 1.3533811569213867, - "learning_rate": 8.205829145728644e-05, - "loss": 5.2059, - "step": 18358 - }, - { - "epoch": 9.57444589308996, - "grad_norm": 1.4331055879592896, - "learning_rate": 8.20572864321608e-05, - "loss": 5.7286, - "step": 18359 - }, - { - "epoch": 9.57496740547588, - "grad_norm": 1.4530460834503174, - "learning_rate": 8.205628140703518e-05, - "loss": 5.5951, - "step": 18360 - }, - { - "epoch": 9.575488917861799, - "grad_norm": 1.50508451461792, - "learning_rate": 8.205527638190955e-05, - "loss": 5.048, - "step": 18361 - }, - { - "epoch": 9.576010430247718, - "grad_norm": 1.3186484575271606, - "learning_rate": 8.205427135678392e-05, - "loss": 5.3076, - "step": 18362 - }, - { - "epoch": 9.576531942633638, - "grad_norm": 1.5734187364578247, - "learning_rate": 8.20532663316583e-05, - "loss": 5.5519, - "step": 18363 - }, - { - "epoch": 9.577053455019557, - "grad_norm": 1.4869722127914429, - "learning_rate": 8.205226130653267e-05, - "loss": 5.2079, - "step": 18364 - }, - { - "epoch": 9.577574967405475, - "grad_norm": 1.3278858661651611, - "learning_rate": 8.205125628140704e-05, - "loss": 5.9114, - "step": 18365 - }, - { - "epoch": 9.578096479791395, - "grad_norm": 1.5703504085540771, - "learning_rate": 8.205025125628141e-05, - "loss": 4.6126, - "step": 18366 - }, - { - "epoch": 9.578617992177314, - "grad_norm": 1.4676986932754517, - "learning_rate": 8.204924623115579e-05, - "loss": 5.323, - "step": 18367 - }, - { - "epoch": 9.579139504563233, - "grad_norm": 1.5338784456253052, - "learning_rate": 8.204824120603015e-05, - "loss": 5.6064, - "step": 18368 - }, - { - "epoch": 9.579661016949153, - "grad_norm": 1.3678576946258545, - "learning_rate": 8.204723618090453e-05, - "loss": 5.7686, - "step": 18369 - }, - { - "epoch": 9.580182529335072, - "grad_norm": 1.5025380849838257, - "learning_rate": 8.204623115577889e-05, - "loss": 5.5066, - "step": 18370 - }, - { - "epoch": 9.58070404172099, - "grad_norm": 1.4196665287017822, - "learning_rate": 8.204522613065327e-05, - "loss": 5.4945, - "step": 18371 - }, - { - "epoch": 9.58122555410691, - "grad_norm": 1.513031244277954, - "learning_rate": 8.204422110552765e-05, - "loss": 4.8683, - "step": 18372 - }, - { - "epoch": 9.581747066492829, - "grad_norm": 1.3793237209320068, - "learning_rate": 8.204321608040203e-05, - "loss": 4.9467, - "step": 18373 - }, - { - "epoch": 9.582268578878748, - "grad_norm": 1.5272548198699951, - "learning_rate": 8.204221105527639e-05, - "loss": 5.6703, - "step": 18374 - }, - { - "epoch": 9.582790091264668, - "grad_norm": 1.5520931482315063, - "learning_rate": 8.204120603015075e-05, - "loss": 5.2037, - "step": 18375 - }, - { - "epoch": 9.583311603650587, - "grad_norm": 1.3727627992630005, - "learning_rate": 8.204020100502513e-05, - "loss": 5.9334, - "step": 18376 - }, - { - "epoch": 9.583833116036505, - "grad_norm": 1.329674482345581, - "learning_rate": 8.20391959798995e-05, - "loss": 5.6482, - "step": 18377 - }, - { - "epoch": 9.584354628422425, - "grad_norm": 1.6507521867752075, - "learning_rate": 8.203819095477387e-05, - "loss": 4.8525, - "step": 18378 - }, - { - "epoch": 9.584876140808344, - "grad_norm": 1.470011591911316, - "learning_rate": 8.203718592964824e-05, - "loss": 5.2684, - "step": 18379 - }, - { - "epoch": 9.585397653194264, - "grad_norm": 1.4173240661621094, - "learning_rate": 8.203618090452262e-05, - "loss": 5.4601, - "step": 18380 - }, - { - "epoch": 9.585919165580183, - "grad_norm": 1.3744840621948242, - "learning_rate": 8.203517587939698e-05, - "loss": 5.2331, - "step": 18381 - }, - { - "epoch": 9.586440677966102, - "grad_norm": 1.4921026229858398, - "learning_rate": 8.203417085427136e-05, - "loss": 4.9676, - "step": 18382 - }, - { - "epoch": 9.58696219035202, - "grad_norm": 1.3921645879745483, - "learning_rate": 8.203316582914574e-05, - "loss": 5.6855, - "step": 18383 - }, - { - "epoch": 9.58748370273794, - "grad_norm": 1.4761162996292114, - "learning_rate": 8.203216080402011e-05, - "loss": 5.2246, - "step": 18384 - }, - { - "epoch": 9.588005215123859, - "grad_norm": 1.4205816984176636, - "learning_rate": 8.203115577889448e-05, - "loss": 5.2386, - "step": 18385 - }, - { - "epoch": 9.588526727509779, - "grad_norm": 1.3870853185653687, - "learning_rate": 8.203015075376886e-05, - "loss": 5.5675, - "step": 18386 - }, - { - "epoch": 9.589048239895698, - "grad_norm": 1.4582679271697998, - "learning_rate": 8.202914572864322e-05, - "loss": 5.3366, - "step": 18387 - }, - { - "epoch": 9.589569752281617, - "grad_norm": 1.533099889755249, - "learning_rate": 8.202814070351758e-05, - "loss": 5.1712, - "step": 18388 - }, - { - "epoch": 9.590091264667535, - "grad_norm": 1.7438627481460571, - "learning_rate": 8.202713567839196e-05, - "loss": 5.0833, - "step": 18389 - }, - { - "epoch": 9.590612777053455, - "grad_norm": 1.4177792072296143, - "learning_rate": 8.202613065326633e-05, - "loss": 5.6486, - "step": 18390 - }, - { - "epoch": 9.591134289439374, - "grad_norm": 1.4243345260620117, - "learning_rate": 8.20251256281407e-05, - "loss": 5.6234, - "step": 18391 - }, - { - "epoch": 9.591655801825294, - "grad_norm": 1.468032717704773, - "learning_rate": 8.202412060301508e-05, - "loss": 5.56, - "step": 18392 - }, - { - "epoch": 9.592177314211213, - "grad_norm": 1.4212567806243896, - "learning_rate": 8.202311557788946e-05, - "loss": 5.8212, - "step": 18393 - }, - { - "epoch": 9.592698826597132, - "grad_norm": 1.52122962474823, - "learning_rate": 8.202211055276382e-05, - "loss": 5.385, - "step": 18394 - }, - { - "epoch": 9.59322033898305, - "grad_norm": 1.281960129737854, - "learning_rate": 8.20211055276382e-05, - "loss": 5.1863, - "step": 18395 - }, - { - "epoch": 9.59374185136897, - "grad_norm": 1.6613415479660034, - "learning_rate": 8.202010050251257e-05, - "loss": 5.2463, - "step": 18396 - }, - { - "epoch": 9.594263363754889, - "grad_norm": 1.4577258825302124, - "learning_rate": 8.201909547738694e-05, - "loss": 5.2644, - "step": 18397 - }, - { - "epoch": 9.594784876140809, - "grad_norm": 1.4321560859680176, - "learning_rate": 8.201809045226131e-05, - "loss": 5.7986, - "step": 18398 - }, - { - "epoch": 9.595306388526728, - "grad_norm": 1.3093544244766235, - "learning_rate": 8.201708542713569e-05, - "loss": 5.6196, - "step": 18399 - }, - { - "epoch": 9.595827900912647, - "grad_norm": 1.4659130573272705, - "learning_rate": 8.201608040201005e-05, - "loss": 5.5407, - "step": 18400 - }, - { - "epoch": 9.596349413298565, - "grad_norm": 1.3577601909637451, - "learning_rate": 8.201507537688443e-05, - "loss": 5.3856, - "step": 18401 - }, - { - "epoch": 9.596870925684485, - "grad_norm": 1.4544841051101685, - "learning_rate": 8.201407035175879e-05, - "loss": 5.2095, - "step": 18402 - }, - { - "epoch": 9.597392438070404, - "grad_norm": 1.5011377334594727, - "learning_rate": 8.201306532663317e-05, - "loss": 5.3268, - "step": 18403 - }, - { - "epoch": 9.597913950456324, - "grad_norm": 1.519648790359497, - "learning_rate": 8.201206030150755e-05, - "loss": 5.0936, - "step": 18404 - }, - { - "epoch": 9.598435462842243, - "grad_norm": 1.5134702920913696, - "learning_rate": 8.201105527638191e-05, - "loss": 5.5277, - "step": 18405 - }, - { - "epoch": 9.598956975228162, - "grad_norm": 1.504813551902771, - "learning_rate": 8.201005025125629e-05, - "loss": 5.6111, - "step": 18406 - }, - { - "epoch": 9.59947848761408, - "grad_norm": 1.4620130062103271, - "learning_rate": 8.200904522613065e-05, - "loss": 4.9746, - "step": 18407 - }, - { - "epoch": 9.6, - "grad_norm": 1.426472544670105, - "learning_rate": 8.200804020100503e-05, - "loss": 5.7596, - "step": 18408 - }, - { - "epoch": 9.600521512385919, - "grad_norm": 1.5054049491882324, - "learning_rate": 8.20070351758794e-05, - "loss": 5.6354, - "step": 18409 - }, - { - "epoch": 9.601043024771839, - "grad_norm": 1.5484403371810913, - "learning_rate": 8.200603015075377e-05, - "loss": 5.0328, - "step": 18410 - }, - { - "epoch": 9.601564537157758, - "grad_norm": 1.5226997137069702, - "learning_rate": 8.200502512562814e-05, - "loss": 4.9849, - "step": 18411 - }, - { - "epoch": 9.602086049543676, - "grad_norm": 1.4151394367218018, - "learning_rate": 8.200402010050252e-05, - "loss": 5.4376, - "step": 18412 - }, - { - "epoch": 9.602607561929595, - "grad_norm": 1.5649597644805908, - "learning_rate": 8.200301507537688e-05, - "loss": 5.4881, - "step": 18413 - }, - { - "epoch": 9.603129074315515, - "grad_norm": 1.3760944604873657, - "learning_rate": 8.200201005025126e-05, - "loss": 5.416, - "step": 18414 - }, - { - "epoch": 9.603650586701434, - "grad_norm": 1.530717134475708, - "learning_rate": 8.200100502512564e-05, - "loss": 5.3066, - "step": 18415 - }, - { - "epoch": 9.604172099087354, - "grad_norm": 1.4824564456939697, - "learning_rate": 8.2e-05, - "loss": 5.533, - "step": 18416 - }, - { - "epoch": 9.604693611473273, - "grad_norm": 1.4879517555236816, - "learning_rate": 8.199899497487438e-05, - "loss": 5.554, - "step": 18417 - }, - { - "epoch": 9.605215123859193, - "grad_norm": 1.4429326057434082, - "learning_rate": 8.199798994974874e-05, - "loss": 5.6076, - "step": 18418 - }, - { - "epoch": 9.60573663624511, - "grad_norm": 1.4914982318878174, - "learning_rate": 8.199698492462312e-05, - "loss": 5.4015, - "step": 18419 - }, - { - "epoch": 9.60625814863103, - "grad_norm": 1.3539097309112549, - "learning_rate": 8.199597989949749e-05, - "loss": 5.6999, - "step": 18420 - }, - { - "epoch": 9.60677966101695, - "grad_norm": 1.3722338676452637, - "learning_rate": 8.199497487437186e-05, - "loss": 5.8473, - "step": 18421 - }, - { - "epoch": 9.607301173402869, - "grad_norm": 1.3885574340820312, - "learning_rate": 8.199396984924623e-05, - "loss": 5.604, - "step": 18422 - }, - { - "epoch": 9.607822685788788, - "grad_norm": 1.6428301334381104, - "learning_rate": 8.19929648241206e-05, - "loss": 5.1716, - "step": 18423 - }, - { - "epoch": 9.608344198174706, - "grad_norm": 1.5168668031692505, - "learning_rate": 8.199195979899498e-05, - "loss": 5.3154, - "step": 18424 - }, - { - "epoch": 9.608865710560625, - "grad_norm": 1.4617899656295776, - "learning_rate": 8.199095477386936e-05, - "loss": 5.5677, - "step": 18425 - }, - { - "epoch": 9.609387222946545, - "grad_norm": 1.3344500064849854, - "learning_rate": 8.198994974874373e-05, - "loss": 5.6638, - "step": 18426 - }, - { - "epoch": 9.609908735332464, - "grad_norm": 1.3702986240386963, - "learning_rate": 8.19889447236181e-05, - "loss": 5.6939, - "step": 18427 - }, - { - "epoch": 9.610430247718384, - "grad_norm": 1.5982046127319336, - "learning_rate": 8.198793969849247e-05, - "loss": 5.3848, - "step": 18428 - }, - { - "epoch": 9.610951760104303, - "grad_norm": 1.4961978197097778, - "learning_rate": 8.198693467336683e-05, - "loss": 5.5576, - "step": 18429 - }, - { - "epoch": 9.611473272490223, - "grad_norm": 1.3434443473815918, - "learning_rate": 8.198592964824121e-05, - "loss": 5.6093, - "step": 18430 - }, - { - "epoch": 9.61199478487614, - "grad_norm": 1.3862298727035522, - "learning_rate": 8.198492462311557e-05, - "loss": 5.7293, - "step": 18431 - }, - { - "epoch": 9.61251629726206, - "grad_norm": 1.4626322984695435, - "learning_rate": 8.198391959798995e-05, - "loss": 5.4167, - "step": 18432 - }, - { - "epoch": 9.61303780964798, - "grad_norm": 1.5036150217056274, - "learning_rate": 8.198291457286432e-05, - "loss": 5.5649, - "step": 18433 - }, - { - "epoch": 9.613559322033899, - "grad_norm": 1.624660849571228, - "learning_rate": 8.19819095477387e-05, - "loss": 4.9833, - "step": 18434 - }, - { - "epoch": 9.614080834419818, - "grad_norm": 1.5296603441238403, - "learning_rate": 8.198090452261307e-05, - "loss": 5.1221, - "step": 18435 - }, - { - "epoch": 9.614602346805736, - "grad_norm": 1.4884387254714966, - "learning_rate": 8.197989949748745e-05, - "loss": 5.6654, - "step": 18436 - }, - { - "epoch": 9.615123859191655, - "grad_norm": 1.4879038333892822, - "learning_rate": 8.197889447236181e-05, - "loss": 5.8457, - "step": 18437 - }, - { - "epoch": 9.615645371577575, - "grad_norm": 1.6049697399139404, - "learning_rate": 8.197788944723619e-05, - "loss": 5.0425, - "step": 18438 - }, - { - "epoch": 9.616166883963494, - "grad_norm": 1.4730912446975708, - "learning_rate": 8.197688442211056e-05, - "loss": 5.6596, - "step": 18439 - }, - { - "epoch": 9.616688396349414, - "grad_norm": 1.6819064617156982, - "learning_rate": 8.197587939698493e-05, - "loss": 5.4894, - "step": 18440 - }, - { - "epoch": 9.617209908735333, - "grad_norm": 1.522387146949768, - "learning_rate": 8.19748743718593e-05, - "loss": 5.6538, - "step": 18441 - }, - { - "epoch": 9.617731421121253, - "grad_norm": 1.6732240915298462, - "learning_rate": 8.197386934673366e-05, - "loss": 5.3023, - "step": 18442 - }, - { - "epoch": 9.61825293350717, - "grad_norm": 1.3353803157806396, - "learning_rate": 8.197286432160804e-05, - "loss": 5.7024, - "step": 18443 - }, - { - "epoch": 9.61877444589309, - "grad_norm": 1.3304393291473389, - "learning_rate": 8.197185929648242e-05, - "loss": 5.4267, - "step": 18444 - }, - { - "epoch": 9.61929595827901, - "grad_norm": 1.5635309219360352, - "learning_rate": 8.19708542713568e-05, - "loss": 4.9325, - "step": 18445 - }, - { - "epoch": 9.619817470664929, - "grad_norm": 1.3761059045791626, - "learning_rate": 8.196984924623116e-05, - "loss": 5.5599, - "step": 18446 - }, - { - "epoch": 9.620338983050848, - "grad_norm": 1.47213613986969, - "learning_rate": 8.196884422110554e-05, - "loss": 4.7837, - "step": 18447 - }, - { - "epoch": 9.620860495436766, - "grad_norm": 1.3744595050811768, - "learning_rate": 8.19678391959799e-05, - "loss": 5.0223, - "step": 18448 - }, - { - "epoch": 9.621382007822685, - "grad_norm": 1.358615756034851, - "learning_rate": 8.196683417085428e-05, - "loss": 5.7578, - "step": 18449 - }, - { - "epoch": 9.621903520208605, - "grad_norm": 1.438184142112732, - "learning_rate": 8.196582914572864e-05, - "loss": 5.2886, - "step": 18450 - }, - { - "epoch": 9.622425032594524, - "grad_norm": 1.5022472143173218, - "learning_rate": 8.196482412060302e-05, - "loss": 5.7655, - "step": 18451 - }, - { - "epoch": 9.622946544980444, - "grad_norm": 1.4404679536819458, - "learning_rate": 8.196381909547739e-05, - "loss": 5.3263, - "step": 18452 - }, - { - "epoch": 9.623468057366363, - "grad_norm": 1.4668915271759033, - "learning_rate": 8.196281407035176e-05, - "loss": 5.3864, - "step": 18453 - }, - { - "epoch": 9.62398956975228, - "grad_norm": 1.4519809484481812, - "learning_rate": 8.196180904522613e-05, - "loss": 5.3109, - "step": 18454 - }, - { - "epoch": 9.6245110821382, - "grad_norm": 1.523984670639038, - "learning_rate": 8.19608040201005e-05, - "loss": 5.4361, - "step": 18455 - }, - { - "epoch": 9.62503259452412, - "grad_norm": 1.437371015548706, - "learning_rate": 8.195979899497488e-05, - "loss": 5.2901, - "step": 18456 - }, - { - "epoch": 9.62555410691004, - "grad_norm": 1.426194429397583, - "learning_rate": 8.195879396984925e-05, - "loss": 5.8952, - "step": 18457 - }, - { - "epoch": 9.626075619295959, - "grad_norm": 1.4064124822616577, - "learning_rate": 8.195778894472363e-05, - "loss": 5.749, - "step": 18458 - }, - { - "epoch": 9.626597131681878, - "grad_norm": 1.3884235620498657, - "learning_rate": 8.195678391959799e-05, - "loss": 5.3946, - "step": 18459 - }, - { - "epoch": 9.627118644067796, - "grad_norm": 1.3600341081619263, - "learning_rate": 8.195577889447237e-05, - "loss": 5.5987, - "step": 18460 - }, - { - "epoch": 9.627640156453715, - "grad_norm": 1.3391847610473633, - "learning_rate": 8.195477386934673e-05, - "loss": 5.5961, - "step": 18461 - }, - { - "epoch": 9.628161668839635, - "grad_norm": 1.533144474029541, - "learning_rate": 8.195376884422111e-05, - "loss": 5.2199, - "step": 18462 - }, - { - "epoch": 9.628683181225554, - "grad_norm": 1.5005134344100952, - "learning_rate": 8.195276381909547e-05, - "loss": 5.7109, - "step": 18463 - }, - { - "epoch": 9.629204693611474, - "grad_norm": 1.3814374208450317, - "learning_rate": 8.195175879396985e-05, - "loss": 5.4567, - "step": 18464 - }, - { - "epoch": 9.629726205997393, - "grad_norm": 1.4182343482971191, - "learning_rate": 8.195075376884423e-05, - "loss": 5.6996, - "step": 18465 - }, - { - "epoch": 9.63024771838331, - "grad_norm": 1.408696174621582, - "learning_rate": 8.194974874371861e-05, - "loss": 5.4403, - "step": 18466 - }, - { - "epoch": 9.63076923076923, - "grad_norm": 1.5200049877166748, - "learning_rate": 8.194874371859297e-05, - "loss": 5.5859, - "step": 18467 - }, - { - "epoch": 9.63129074315515, - "grad_norm": 1.5586684942245483, - "learning_rate": 8.194773869346734e-05, - "loss": 5.139, - "step": 18468 - }, - { - "epoch": 9.63181225554107, - "grad_norm": 1.422659158706665, - "learning_rate": 8.194673366834171e-05, - "loss": 5.3886, - "step": 18469 - }, - { - "epoch": 9.632333767926989, - "grad_norm": 1.4464585781097412, - "learning_rate": 8.194572864321608e-05, - "loss": 5.2581, - "step": 18470 - }, - { - "epoch": 9.632855280312908, - "grad_norm": 1.5193934440612793, - "learning_rate": 8.194472361809046e-05, - "loss": 5.7944, - "step": 18471 - }, - { - "epoch": 9.633376792698826, - "grad_norm": 1.4978387355804443, - "learning_rate": 8.194371859296482e-05, - "loss": 5.314, - "step": 18472 - }, - { - "epoch": 9.633898305084745, - "grad_norm": 1.4781062602996826, - "learning_rate": 8.19427135678392e-05, - "loss": 5.3965, - "step": 18473 - }, - { - "epoch": 9.634419817470665, - "grad_norm": 1.39667809009552, - "learning_rate": 8.194170854271356e-05, - "loss": 5.3114, - "step": 18474 - }, - { - "epoch": 9.634941329856584, - "grad_norm": 1.4217413663864136, - "learning_rate": 8.194070351758794e-05, - "loss": 5.2763, - "step": 18475 - }, - { - "epoch": 9.635462842242504, - "grad_norm": 1.406404972076416, - "learning_rate": 8.193969849246232e-05, - "loss": 5.5626, - "step": 18476 - }, - { - "epoch": 9.635984354628423, - "grad_norm": 1.4393328428268433, - "learning_rate": 8.19386934673367e-05, - "loss": 5.1243, - "step": 18477 - }, - { - "epoch": 9.63650586701434, - "grad_norm": 1.3487833738327026, - "learning_rate": 8.193768844221106e-05, - "loss": 5.4863, - "step": 18478 - }, - { - "epoch": 9.63702737940026, - "grad_norm": 1.4196925163269043, - "learning_rate": 8.193668341708544e-05, - "loss": 5.3122, - "step": 18479 - }, - { - "epoch": 9.63754889178618, - "grad_norm": 1.5375620126724243, - "learning_rate": 8.19356783919598e-05, - "loss": 5.024, - "step": 18480 - }, - { - "epoch": 9.6380704041721, - "grad_norm": 1.49607253074646, - "learning_rate": 8.193467336683417e-05, - "loss": 5.5294, - "step": 18481 - }, - { - "epoch": 9.638591916558019, - "grad_norm": 1.4931012392044067, - "learning_rate": 8.193366834170854e-05, - "loss": 5.086, - "step": 18482 - }, - { - "epoch": 9.639113428943938, - "grad_norm": 1.450100302696228, - "learning_rate": 8.193266331658291e-05, - "loss": 5.4947, - "step": 18483 - }, - { - "epoch": 9.639634941329856, - "grad_norm": 1.6052347421646118, - "learning_rate": 8.193165829145729e-05, - "loss": 5.2095, - "step": 18484 - }, - { - "epoch": 9.640156453715775, - "grad_norm": 1.4805704355239868, - "learning_rate": 8.193065326633166e-05, - "loss": 4.869, - "step": 18485 - }, - { - "epoch": 9.640677966101695, - "grad_norm": 1.335795521736145, - "learning_rate": 8.192964824120604e-05, - "loss": 5.673, - "step": 18486 - }, - { - "epoch": 9.641199478487614, - "grad_norm": 1.5425224304199219, - "learning_rate": 8.19286432160804e-05, - "loss": 5.2155, - "step": 18487 - }, - { - "epoch": 9.641720990873534, - "grad_norm": 1.4234529733657837, - "learning_rate": 8.192763819095478e-05, - "loss": 5.5917, - "step": 18488 - }, - { - "epoch": 9.642242503259453, - "grad_norm": 1.4052904844284058, - "learning_rate": 8.192663316582915e-05, - "loss": 5.7944, - "step": 18489 - }, - { - "epoch": 9.642764015645371, - "grad_norm": 1.516837239265442, - "learning_rate": 8.192562814070353e-05, - "loss": 5.4385, - "step": 18490 - }, - { - "epoch": 9.64328552803129, - "grad_norm": 1.4365055561065674, - "learning_rate": 8.192462311557789e-05, - "loss": 5.0849, - "step": 18491 - }, - { - "epoch": 9.64380704041721, - "grad_norm": 1.4088938236236572, - "learning_rate": 8.192361809045227e-05, - "loss": 5.4662, - "step": 18492 - }, - { - "epoch": 9.64432855280313, - "grad_norm": 1.4429852962493896, - "learning_rate": 8.192261306532663e-05, - "loss": 5.7832, - "step": 18493 - }, - { - "epoch": 9.644850065189049, - "grad_norm": 1.3857098817825317, - "learning_rate": 8.192160804020101e-05, - "loss": 5.206, - "step": 18494 - }, - { - "epoch": 9.645371577574968, - "grad_norm": 1.403621792793274, - "learning_rate": 8.192060301507538e-05, - "loss": 5.3351, - "step": 18495 - }, - { - "epoch": 9.645893089960886, - "grad_norm": 1.488184928894043, - "learning_rate": 8.191959798994975e-05, - "loss": 5.5051, - "step": 18496 - }, - { - "epoch": 9.646414602346805, - "grad_norm": 1.3625980615615845, - "learning_rate": 8.191859296482413e-05, - "loss": 5.4946, - "step": 18497 - }, - { - "epoch": 9.646936114732725, - "grad_norm": 1.4375823736190796, - "learning_rate": 8.19175879396985e-05, - "loss": 5.4029, - "step": 18498 - }, - { - "epoch": 9.647457627118644, - "grad_norm": 1.4194555282592773, - "learning_rate": 8.191658291457287e-05, - "loss": 5.2528, - "step": 18499 - }, - { - "epoch": 9.647979139504564, - "grad_norm": 1.4066858291625977, - "learning_rate": 8.191557788944724e-05, - "loss": 5.333, - "step": 18500 - }, - { - "epoch": 9.648500651890483, - "grad_norm": 1.5877779722213745, - "learning_rate": 8.191457286432161e-05, - "loss": 5.4645, - "step": 18501 - }, - { - "epoch": 9.649022164276401, - "grad_norm": 1.4791117906570435, - "learning_rate": 8.191356783919598e-05, - "loss": 5.476, - "step": 18502 - }, - { - "epoch": 9.64954367666232, - "grad_norm": 1.6221725940704346, - "learning_rate": 8.191256281407036e-05, - "loss": 5.2129, - "step": 18503 - }, - { - "epoch": 9.65006518904824, - "grad_norm": 1.4762927293777466, - "learning_rate": 8.191155778894472e-05, - "loss": 5.4215, - "step": 18504 - }, - { - "epoch": 9.65058670143416, - "grad_norm": 1.5427014827728271, - "learning_rate": 8.19105527638191e-05, - "loss": 5.3017, - "step": 18505 - }, - { - "epoch": 9.651108213820079, - "grad_norm": 1.366348147392273, - "learning_rate": 8.190954773869348e-05, - "loss": 5.4331, - "step": 18506 - }, - { - "epoch": 9.651629726205996, - "grad_norm": 1.5387725830078125, - "learning_rate": 8.190854271356785e-05, - "loss": 5.424, - "step": 18507 - }, - { - "epoch": 9.652151238591916, - "grad_norm": 1.43003249168396, - "learning_rate": 8.190753768844222e-05, - "loss": 5.4255, - "step": 18508 - }, - { - "epoch": 9.652672750977835, - "grad_norm": 1.439244270324707, - "learning_rate": 8.190653266331658e-05, - "loss": 5.5497, - "step": 18509 - }, - { - "epoch": 9.653194263363755, - "grad_norm": 1.3890118598937988, - "learning_rate": 8.190552763819096e-05, - "loss": 5.626, - "step": 18510 - }, - { - "epoch": 9.653715775749674, - "grad_norm": 1.3543609380722046, - "learning_rate": 8.190452261306533e-05, - "loss": 5.5644, - "step": 18511 - }, - { - "epoch": 9.654237288135594, - "grad_norm": 1.467510461807251, - "learning_rate": 8.19035175879397e-05, - "loss": 5.5853, - "step": 18512 - }, - { - "epoch": 9.654758800521513, - "grad_norm": 1.4935667514801025, - "learning_rate": 8.190251256281407e-05, - "loss": 5.0923, - "step": 18513 - }, - { - "epoch": 9.655280312907431, - "grad_norm": 1.4377920627593994, - "learning_rate": 8.190150753768845e-05, - "loss": 5.791, - "step": 18514 - }, - { - "epoch": 9.65580182529335, - "grad_norm": 1.4241738319396973, - "learning_rate": 8.190050251256281e-05, - "loss": 5.5682, - "step": 18515 - }, - { - "epoch": 9.65632333767927, - "grad_norm": 1.4348348379135132, - "learning_rate": 8.189949748743719e-05, - "loss": 5.6983, - "step": 18516 - }, - { - "epoch": 9.65684485006519, - "grad_norm": 1.3447948694229126, - "learning_rate": 8.189849246231157e-05, - "loss": 5.5444, - "step": 18517 - }, - { - "epoch": 9.657366362451109, - "grad_norm": 1.3739675283432007, - "learning_rate": 8.189748743718594e-05, - "loss": 5.695, - "step": 18518 - }, - { - "epoch": 9.657887874837026, - "grad_norm": 1.5312697887420654, - "learning_rate": 8.189648241206031e-05, - "loss": 5.1959, - "step": 18519 - }, - { - "epoch": 9.658409387222946, - "grad_norm": 1.495697259902954, - "learning_rate": 8.189547738693469e-05, - "loss": 5.1828, - "step": 18520 - }, - { - "epoch": 9.658930899608865, - "grad_norm": 1.5004196166992188, - "learning_rate": 8.189447236180905e-05, - "loss": 4.9661, - "step": 18521 - }, - { - "epoch": 9.659452411994785, - "grad_norm": 1.4237473011016846, - "learning_rate": 8.189346733668341e-05, - "loss": 5.5479, - "step": 18522 - }, - { - "epoch": 9.659973924380704, - "grad_norm": 1.601123332977295, - "learning_rate": 8.189246231155779e-05, - "loss": 5.3587, - "step": 18523 - }, - { - "epoch": 9.660495436766624, - "grad_norm": 1.5177417993545532, - "learning_rate": 8.189145728643216e-05, - "loss": 5.503, - "step": 18524 - }, - { - "epoch": 9.661016949152543, - "grad_norm": 1.3770344257354736, - "learning_rate": 8.189045226130653e-05, - "loss": 5.7034, - "step": 18525 - }, - { - "epoch": 9.661538461538461, - "grad_norm": 1.377465844154358, - "learning_rate": 8.188944723618091e-05, - "loss": 4.9787, - "step": 18526 - }, - { - "epoch": 9.66205997392438, - "grad_norm": 1.5656682252883911, - "learning_rate": 8.188844221105529e-05, - "loss": 5.4231, - "step": 18527 - }, - { - "epoch": 9.6625814863103, - "grad_norm": 1.5359641313552856, - "learning_rate": 8.188743718592965e-05, - "loss": 5.3672, - "step": 18528 - }, - { - "epoch": 9.66310299869622, - "grad_norm": 1.5246117115020752, - "learning_rate": 8.188643216080403e-05, - "loss": 5.4485, - "step": 18529 - }, - { - "epoch": 9.663624511082139, - "grad_norm": 1.4060789346694946, - "learning_rate": 8.18854271356784e-05, - "loss": 5.4822, - "step": 18530 - }, - { - "epoch": 9.664146023468057, - "grad_norm": 1.41658616065979, - "learning_rate": 8.188442211055277e-05, - "loss": 5.7391, - "step": 18531 - }, - { - "epoch": 9.664667535853976, - "grad_norm": 1.3729561567306519, - "learning_rate": 8.188341708542714e-05, - "loss": 5.6623, - "step": 18532 - }, - { - "epoch": 9.665189048239895, - "grad_norm": 1.4294042587280273, - "learning_rate": 8.188241206030152e-05, - "loss": 5.6419, - "step": 18533 - }, - { - "epoch": 9.665710560625815, - "grad_norm": 1.4671632051467896, - "learning_rate": 8.188140703517588e-05, - "loss": 5.3906, - "step": 18534 - }, - { - "epoch": 9.666232073011734, - "grad_norm": 1.4637659788131714, - "learning_rate": 8.188040201005024e-05, - "loss": 5.6943, - "step": 18535 - }, - { - "epoch": 9.666753585397654, - "grad_norm": 1.3566315174102783, - "learning_rate": 8.187939698492462e-05, - "loss": 5.6497, - "step": 18536 - }, - { - "epoch": 9.667275097783573, - "grad_norm": 1.4262971878051758, - "learning_rate": 8.1878391959799e-05, - "loss": 5.3377, - "step": 18537 - }, - { - "epoch": 9.667796610169491, - "grad_norm": 1.4709491729736328, - "learning_rate": 8.187738693467338e-05, - "loss": 5.2975, - "step": 18538 - }, - { - "epoch": 9.66831812255541, - "grad_norm": 1.3951104879379272, - "learning_rate": 8.187638190954774e-05, - "loss": 5.3456, - "step": 18539 - }, - { - "epoch": 9.66883963494133, - "grad_norm": 1.448448657989502, - "learning_rate": 8.187537688442212e-05, - "loss": 5.2095, - "step": 18540 - }, - { - "epoch": 9.66936114732725, - "grad_norm": 1.4019685983657837, - "learning_rate": 8.187437185929648e-05, - "loss": 5.2884, - "step": 18541 - }, - { - "epoch": 9.669882659713169, - "grad_norm": 1.421449065208435, - "learning_rate": 8.187336683417086e-05, - "loss": 5.7136, - "step": 18542 - }, - { - "epoch": 9.670404172099087, - "grad_norm": 1.3908705711364746, - "learning_rate": 8.187236180904523e-05, - "loss": 5.4797, - "step": 18543 - }, - { - "epoch": 9.670925684485006, - "grad_norm": 1.4989664554595947, - "learning_rate": 8.18713567839196e-05, - "loss": 5.4516, - "step": 18544 - }, - { - "epoch": 9.671447196870925, - "grad_norm": 1.4984453916549683, - "learning_rate": 8.187035175879397e-05, - "loss": 5.0046, - "step": 18545 - }, - { - "epoch": 9.671968709256845, - "grad_norm": 1.4291915893554688, - "learning_rate": 8.186934673366835e-05, - "loss": 5.673, - "step": 18546 - }, - { - "epoch": 9.672490221642764, - "grad_norm": 1.303781270980835, - "learning_rate": 8.186834170854272e-05, - "loss": 5.6747, - "step": 18547 - }, - { - "epoch": 9.673011734028684, - "grad_norm": 1.503077745437622, - "learning_rate": 8.186733668341709e-05, - "loss": 5.4237, - "step": 18548 - }, - { - "epoch": 9.673533246414602, - "grad_norm": 1.4869318008422852, - "learning_rate": 8.186633165829147e-05, - "loss": 5.1963, - "step": 18549 - }, - { - "epoch": 9.674054758800521, - "grad_norm": 1.4966760873794556, - "learning_rate": 8.186532663316583e-05, - "loss": 5.0418, - "step": 18550 - }, - { - "epoch": 9.67457627118644, - "grad_norm": 1.285454273223877, - "learning_rate": 8.186432160804021e-05, - "loss": 5.3238, - "step": 18551 - }, - { - "epoch": 9.67509778357236, - "grad_norm": 1.3157427310943604, - "learning_rate": 8.186331658291457e-05, - "loss": 5.6321, - "step": 18552 - }, - { - "epoch": 9.67561929595828, - "grad_norm": 1.5899189710617065, - "learning_rate": 8.186231155778895e-05, - "loss": 4.7137, - "step": 18553 - }, - { - "epoch": 9.676140808344199, - "grad_norm": 1.3239777088165283, - "learning_rate": 8.186130653266331e-05, - "loss": 4.8424, - "step": 18554 - }, - { - "epoch": 9.676662320730117, - "grad_norm": 1.2444803714752197, - "learning_rate": 8.186030150753769e-05, - "loss": 4.6692, - "step": 18555 - }, - { - "epoch": 9.677183833116036, - "grad_norm": 1.4459562301635742, - "learning_rate": 8.185929648241206e-05, - "loss": 5.3119, - "step": 18556 - }, - { - "epoch": 9.677705345501955, - "grad_norm": 1.5792566537857056, - "learning_rate": 8.185829145728643e-05, - "loss": 4.3991, - "step": 18557 - }, - { - "epoch": 9.678226857887875, - "grad_norm": 1.550134301185608, - "learning_rate": 8.185728643216081e-05, - "loss": 5.1472, - "step": 18558 - }, - { - "epoch": 9.678748370273794, - "grad_norm": 1.4792834520339966, - "learning_rate": 8.185628140703519e-05, - "loss": 5.2587, - "step": 18559 - }, - { - "epoch": 9.679269882659714, - "grad_norm": 1.254046082496643, - "learning_rate": 8.185527638190955e-05, - "loss": 5.5426, - "step": 18560 - }, - { - "epoch": 9.679791395045632, - "grad_norm": 1.423142671585083, - "learning_rate": 8.185427135678392e-05, - "loss": 5.6034, - "step": 18561 - }, - { - "epoch": 9.680312907431551, - "grad_norm": 1.693392276763916, - "learning_rate": 8.18532663316583e-05, - "loss": 5.4676, - "step": 18562 - }, - { - "epoch": 9.68083441981747, - "grad_norm": 1.3050262928009033, - "learning_rate": 8.185226130653266e-05, - "loss": 5.8323, - "step": 18563 - }, - { - "epoch": 9.68135593220339, - "grad_norm": 1.4795236587524414, - "learning_rate": 8.185125628140704e-05, - "loss": 5.2812, - "step": 18564 - }, - { - "epoch": 9.68187744458931, - "grad_norm": 1.5197834968566895, - "learning_rate": 8.18502512562814e-05, - "loss": 5.2166, - "step": 18565 - }, - { - "epoch": 9.682398956975229, - "grad_norm": 1.443449854850769, - "learning_rate": 8.184924623115578e-05, - "loss": 4.9839, - "step": 18566 - }, - { - "epoch": 9.682920469361147, - "grad_norm": 1.4738835096359253, - "learning_rate": 8.184824120603016e-05, - "loss": 5.5138, - "step": 18567 - }, - { - "epoch": 9.683441981747066, - "grad_norm": 1.6017212867736816, - "learning_rate": 8.184723618090454e-05, - "loss": 5.5475, - "step": 18568 - }, - { - "epoch": 9.683963494132986, - "grad_norm": 1.610708236694336, - "learning_rate": 8.18462311557789e-05, - "loss": 4.7084, - "step": 18569 - }, - { - "epoch": 9.684485006518905, - "grad_norm": 1.3483750820159912, - "learning_rate": 8.184522613065328e-05, - "loss": 5.642, - "step": 18570 - }, - { - "epoch": 9.685006518904824, - "grad_norm": 1.3887420892715454, - "learning_rate": 8.184422110552764e-05, - "loss": 5.6433, - "step": 18571 - }, - { - "epoch": 9.685528031290744, - "grad_norm": 1.4449222087860107, - "learning_rate": 8.184321608040202e-05, - "loss": 5.1799, - "step": 18572 - }, - { - "epoch": 9.686049543676662, - "grad_norm": 1.4323101043701172, - "learning_rate": 8.184221105527638e-05, - "loss": 5.4026, - "step": 18573 - }, - { - "epoch": 9.686571056062581, - "grad_norm": 1.399656057357788, - "learning_rate": 8.184120603015075e-05, - "loss": 5.8414, - "step": 18574 - }, - { - "epoch": 9.6870925684485, - "grad_norm": 1.3919442892074585, - "learning_rate": 8.184020100502513e-05, - "loss": 5.7636, - "step": 18575 - }, - { - "epoch": 9.68761408083442, - "grad_norm": 1.544398307800293, - "learning_rate": 8.183919597989949e-05, - "loss": 5.3126, - "step": 18576 - }, - { - "epoch": 9.68813559322034, - "grad_norm": 1.567663550376892, - "learning_rate": 8.183819095477387e-05, - "loss": 5.1066, - "step": 18577 - }, - { - "epoch": 9.688657105606259, - "grad_norm": 1.5866788625717163, - "learning_rate": 8.183718592964825e-05, - "loss": 5.6647, - "step": 18578 - }, - { - "epoch": 9.689178617992177, - "grad_norm": 1.4563877582550049, - "learning_rate": 8.183618090452262e-05, - "loss": 5.2073, - "step": 18579 - }, - { - "epoch": 9.689700130378096, - "grad_norm": 1.367419719696045, - "learning_rate": 8.183517587939699e-05, - "loss": 5.8106, - "step": 18580 - }, - { - "epoch": 9.690221642764016, - "grad_norm": 1.4419505596160889, - "learning_rate": 8.183417085427137e-05, - "loss": 5.3653, - "step": 18581 - }, - { - "epoch": 9.690743155149935, - "grad_norm": 1.481321096420288, - "learning_rate": 8.183316582914573e-05, - "loss": 5.2723, - "step": 18582 - }, - { - "epoch": 9.691264667535854, - "grad_norm": 1.4356770515441895, - "learning_rate": 8.183216080402011e-05, - "loss": 5.468, - "step": 18583 - }, - { - "epoch": 9.691786179921774, - "grad_norm": 1.494615912437439, - "learning_rate": 8.183115577889447e-05, - "loss": 5.5991, - "step": 18584 - }, - { - "epoch": 9.692307692307692, - "grad_norm": 1.3518173694610596, - "learning_rate": 8.183015075376885e-05, - "loss": 5.7269, - "step": 18585 - }, - { - "epoch": 9.692829204693611, - "grad_norm": 1.5440707206726074, - "learning_rate": 8.182914572864322e-05, - "loss": 5.1808, - "step": 18586 - }, - { - "epoch": 9.69335071707953, - "grad_norm": 1.5151623487472534, - "learning_rate": 8.182814070351759e-05, - "loss": 5.1726, - "step": 18587 - }, - { - "epoch": 9.69387222946545, - "grad_norm": 1.4492238759994507, - "learning_rate": 8.182713567839196e-05, - "loss": 5.5534, - "step": 18588 - }, - { - "epoch": 9.69439374185137, - "grad_norm": 1.3935397863388062, - "learning_rate": 8.182613065326634e-05, - "loss": 5.4794, - "step": 18589 - }, - { - "epoch": 9.694915254237289, - "grad_norm": 1.5803651809692383, - "learning_rate": 8.182512562814071e-05, - "loss": 5.0965, - "step": 18590 - }, - { - "epoch": 9.695436766623207, - "grad_norm": 1.482117772102356, - "learning_rate": 8.182412060301508e-05, - "loss": 5.7144, - "step": 18591 - }, - { - "epoch": 9.695958279009126, - "grad_norm": 1.5487957000732422, - "learning_rate": 8.182311557788946e-05, - "loss": 5.0869, - "step": 18592 - }, - { - "epoch": 9.696479791395046, - "grad_norm": 1.4621587991714478, - "learning_rate": 8.182211055276382e-05, - "loss": 5.4922, - "step": 18593 - }, - { - "epoch": 9.697001303780965, - "grad_norm": 1.4516290426254272, - "learning_rate": 8.18211055276382e-05, - "loss": 5.8441, - "step": 18594 - }, - { - "epoch": 9.697522816166884, - "grad_norm": 1.3454692363739014, - "learning_rate": 8.182010050251256e-05, - "loss": 5.6367, - "step": 18595 - }, - { - "epoch": 9.698044328552804, - "grad_norm": 1.4739863872528076, - "learning_rate": 8.181909547738694e-05, - "loss": 5.4223, - "step": 18596 - }, - { - "epoch": 9.698565840938722, - "grad_norm": 1.389207124710083, - "learning_rate": 8.18180904522613e-05, - "loss": 5.8802, - "step": 18597 - }, - { - "epoch": 9.699087353324641, - "grad_norm": 1.4222999811172485, - "learning_rate": 8.181708542713568e-05, - "loss": 5.4466, - "step": 18598 - }, - { - "epoch": 9.69960886571056, - "grad_norm": 1.351042628288269, - "learning_rate": 8.181608040201006e-05, - "loss": 5.7687, - "step": 18599 - }, - { - "epoch": 9.70013037809648, - "grad_norm": 1.5414776802062988, - "learning_rate": 8.181507537688444e-05, - "loss": 5.0474, - "step": 18600 - }, - { - "epoch": 9.7006518904824, - "grad_norm": 1.4343734979629517, - "learning_rate": 8.18140703517588e-05, - "loss": 5.4148, - "step": 18601 - }, - { - "epoch": 9.701173402868317, - "grad_norm": 1.4445431232452393, - "learning_rate": 8.181306532663317e-05, - "loss": 5.771, - "step": 18602 - }, - { - "epoch": 9.701694915254237, - "grad_norm": 1.6484830379486084, - "learning_rate": 8.181206030150754e-05, - "loss": 4.5569, - "step": 18603 - }, - { - "epoch": 9.702216427640156, - "grad_norm": 1.4708679914474487, - "learning_rate": 8.181105527638191e-05, - "loss": 5.6621, - "step": 18604 - }, - { - "epoch": 9.702737940026076, - "grad_norm": 1.5130834579467773, - "learning_rate": 8.181005025125629e-05, - "loss": 5.1753, - "step": 18605 - }, - { - "epoch": 9.703259452411995, - "grad_norm": 1.4642672538757324, - "learning_rate": 8.180904522613065e-05, - "loss": 5.7014, - "step": 18606 - }, - { - "epoch": 9.703780964797915, - "grad_norm": 1.4952397346496582, - "learning_rate": 8.180804020100503e-05, - "loss": 5.2089, - "step": 18607 - }, - { - "epoch": 9.704302477183834, - "grad_norm": 1.5272061824798584, - "learning_rate": 8.180703517587939e-05, - "loss": 5.4861, - "step": 18608 - }, - { - "epoch": 9.704823989569752, - "grad_norm": 1.4700024127960205, - "learning_rate": 8.180603015075377e-05, - "loss": 5.7819, - "step": 18609 - }, - { - "epoch": 9.705345501955671, - "grad_norm": 1.366637110710144, - "learning_rate": 8.180502512562815e-05, - "loss": 5.4501, - "step": 18610 - }, - { - "epoch": 9.70586701434159, - "grad_norm": 1.4005943536758423, - "learning_rate": 8.180402010050253e-05, - "loss": 5.7976, - "step": 18611 - }, - { - "epoch": 9.70638852672751, - "grad_norm": 1.403409481048584, - "learning_rate": 8.180301507537689e-05, - "loss": 4.8107, - "step": 18612 - }, - { - "epoch": 9.70691003911343, - "grad_norm": 1.4385838508605957, - "learning_rate": 8.180201005025127e-05, - "loss": 4.8594, - "step": 18613 - }, - { - "epoch": 9.707431551499347, - "grad_norm": 1.5059614181518555, - "learning_rate": 8.180100502512563e-05, - "loss": 5.3527, - "step": 18614 - }, - { - "epoch": 9.707953063885267, - "grad_norm": 1.3317420482635498, - "learning_rate": 8.18e-05, - "loss": 5.899, - "step": 18615 - }, - { - "epoch": 9.708474576271186, - "grad_norm": 1.4835461378097534, - "learning_rate": 8.179899497487437e-05, - "loss": 5.8567, - "step": 18616 - }, - { - "epoch": 9.708996088657106, - "grad_norm": 1.4382926225662231, - "learning_rate": 8.179798994974874e-05, - "loss": 5.486, - "step": 18617 - }, - { - "epoch": 9.709517601043025, - "grad_norm": 1.438188076019287, - "learning_rate": 8.179698492462312e-05, - "loss": 4.6588, - "step": 18618 - }, - { - "epoch": 9.710039113428945, - "grad_norm": 1.7021583318710327, - "learning_rate": 8.17959798994975e-05, - "loss": 5.2937, - "step": 18619 - }, - { - "epoch": 9.710560625814864, - "grad_norm": 1.44141685962677, - "learning_rate": 8.179497487437187e-05, - "loss": 5.4536, - "step": 18620 - }, - { - "epoch": 9.711082138200782, - "grad_norm": 1.5291990041732788, - "learning_rate": 8.179396984924624e-05, - "loss": 5.2909, - "step": 18621 - }, - { - "epoch": 9.711603650586701, - "grad_norm": 1.3769620656967163, - "learning_rate": 8.179296482412061e-05, - "loss": 5.5902, - "step": 18622 - }, - { - "epoch": 9.71212516297262, - "grad_norm": 1.3233537673950195, - "learning_rate": 8.179195979899498e-05, - "loss": 5.7766, - "step": 18623 - }, - { - "epoch": 9.71264667535854, - "grad_norm": 1.6375271081924438, - "learning_rate": 8.179095477386936e-05, - "loss": 5.3433, - "step": 18624 - }, - { - "epoch": 9.71316818774446, - "grad_norm": 1.5172581672668457, - "learning_rate": 8.178994974874372e-05, - "loss": 5.039, - "step": 18625 - }, - { - "epoch": 9.713689700130377, - "grad_norm": 1.3950598239898682, - "learning_rate": 8.17889447236181e-05, - "loss": 5.5522, - "step": 18626 - }, - { - "epoch": 9.714211212516297, - "grad_norm": 1.4036316871643066, - "learning_rate": 8.178793969849246e-05, - "loss": 5.3894, - "step": 18627 - }, - { - "epoch": 9.714732724902216, - "grad_norm": 1.308119773864746, - "learning_rate": 8.178693467336683e-05, - "loss": 5.75, - "step": 18628 - }, - { - "epoch": 9.715254237288136, - "grad_norm": 1.3728142976760864, - "learning_rate": 8.17859296482412e-05, - "loss": 5.5433, - "step": 18629 - }, - { - "epoch": 9.715775749674055, - "grad_norm": 1.3861063718795776, - "learning_rate": 8.178492462311558e-05, - "loss": 5.6978, - "step": 18630 - }, - { - "epoch": 9.716297262059975, - "grad_norm": 1.3265024423599243, - "learning_rate": 8.178391959798996e-05, - "loss": 5.6679, - "step": 18631 - }, - { - "epoch": 9.716818774445892, - "grad_norm": 1.6424033641815186, - "learning_rate": 8.178291457286432e-05, - "loss": 5.1796, - "step": 18632 - }, - { - "epoch": 9.717340286831812, - "grad_norm": 1.4027554988861084, - "learning_rate": 8.17819095477387e-05, - "loss": 5.9367, - "step": 18633 - }, - { - "epoch": 9.717861799217731, - "grad_norm": 1.5086455345153809, - "learning_rate": 8.178090452261307e-05, - "loss": 5.4798, - "step": 18634 - }, - { - "epoch": 9.71838331160365, - "grad_norm": 1.455838680267334, - "learning_rate": 8.177989949748744e-05, - "loss": 5.5253, - "step": 18635 - }, - { - "epoch": 9.71890482398957, - "grad_norm": 1.4250065088272095, - "learning_rate": 8.177889447236181e-05, - "loss": 5.3475, - "step": 18636 - }, - { - "epoch": 9.71942633637549, - "grad_norm": 1.4521886110305786, - "learning_rate": 8.177788944723619e-05, - "loss": 5.3148, - "step": 18637 - }, - { - "epoch": 9.719947848761407, - "grad_norm": 1.432077407836914, - "learning_rate": 8.177688442211055e-05, - "loss": 5.6794, - "step": 18638 - }, - { - "epoch": 9.720469361147327, - "grad_norm": 1.4427014589309692, - "learning_rate": 8.177587939698493e-05, - "loss": 5.6651, - "step": 18639 - }, - { - "epoch": 9.720990873533246, - "grad_norm": 1.474668025970459, - "learning_rate": 8.17748743718593e-05, - "loss": 5.6559, - "step": 18640 - }, - { - "epoch": 9.721512385919166, - "grad_norm": 1.5356340408325195, - "learning_rate": 8.177386934673367e-05, - "loss": 5.1265, - "step": 18641 - }, - { - "epoch": 9.722033898305085, - "grad_norm": 1.472482681274414, - "learning_rate": 8.177286432160805e-05, - "loss": 5.8024, - "step": 18642 - }, - { - "epoch": 9.722555410691005, - "grad_norm": 1.4466021060943604, - "learning_rate": 8.177185929648241e-05, - "loss": 5.2977, - "step": 18643 - }, - { - "epoch": 9.723076923076922, - "grad_norm": 1.3532615900039673, - "learning_rate": 8.177085427135679e-05, - "loss": 5.6708, - "step": 18644 - }, - { - "epoch": 9.723598435462842, - "grad_norm": 1.4758414030075073, - "learning_rate": 8.176984924623115e-05, - "loss": 5.4848, - "step": 18645 - }, - { - "epoch": 9.724119947848761, - "grad_norm": 1.3391772508621216, - "learning_rate": 8.176884422110553e-05, - "loss": 5.5246, - "step": 18646 - }, - { - "epoch": 9.72464146023468, - "grad_norm": 1.4163191318511963, - "learning_rate": 8.17678391959799e-05, - "loss": 5.3795, - "step": 18647 - }, - { - "epoch": 9.7251629726206, - "grad_norm": 1.4137624502182007, - "learning_rate": 8.176683417085427e-05, - "loss": 5.4173, - "step": 18648 - }, - { - "epoch": 9.72568448500652, - "grad_norm": 1.5122489929199219, - "learning_rate": 8.176582914572864e-05, - "loss": 5.2407, - "step": 18649 - }, - { - "epoch": 9.726205997392437, - "grad_norm": 1.5166511535644531, - "learning_rate": 8.176482412060302e-05, - "loss": 5.7698, - "step": 18650 - }, - { - "epoch": 9.726727509778357, - "grad_norm": 1.531610131263733, - "learning_rate": 8.17638190954774e-05, - "loss": 5.683, - "step": 18651 - }, - { - "epoch": 9.727249022164276, - "grad_norm": 1.3876957893371582, - "learning_rate": 8.176281407035177e-05, - "loss": 5.3044, - "step": 18652 - }, - { - "epoch": 9.727770534550196, - "grad_norm": 1.6202011108398438, - "learning_rate": 8.176180904522614e-05, - "loss": 5.0108, - "step": 18653 - }, - { - "epoch": 9.728292046936115, - "grad_norm": 1.3470455408096313, - "learning_rate": 8.17608040201005e-05, - "loss": 5.4393, - "step": 18654 - }, - { - "epoch": 9.728813559322035, - "grad_norm": 1.4794467687606812, - "learning_rate": 8.175979899497488e-05, - "loss": 5.0472, - "step": 18655 - }, - { - "epoch": 9.729335071707952, - "grad_norm": 1.507926344871521, - "learning_rate": 8.175879396984924e-05, - "loss": 5.1952, - "step": 18656 - }, - { - "epoch": 9.729856584093872, - "grad_norm": 1.3959763050079346, - "learning_rate": 8.175778894472362e-05, - "loss": 5.9126, - "step": 18657 - }, - { - "epoch": 9.730378096479791, - "grad_norm": 1.419403314590454, - "learning_rate": 8.175678391959799e-05, - "loss": 5.8554, - "step": 18658 - }, - { - "epoch": 9.73089960886571, - "grad_norm": 1.4629031419754028, - "learning_rate": 8.175577889447236e-05, - "loss": 5.2055, - "step": 18659 - }, - { - "epoch": 9.73142112125163, - "grad_norm": 1.4522790908813477, - "learning_rate": 8.175477386934674e-05, - "loss": 4.9952, - "step": 18660 - }, - { - "epoch": 9.73194263363755, - "grad_norm": 1.4359452724456787, - "learning_rate": 8.175376884422112e-05, - "loss": 4.9995, - "step": 18661 - }, - { - "epoch": 9.732464146023467, - "grad_norm": 1.4343265295028687, - "learning_rate": 8.175276381909548e-05, - "loss": 5.6549, - "step": 18662 - }, - { - "epoch": 9.732985658409387, - "grad_norm": 1.41818106174469, - "learning_rate": 8.175175879396986e-05, - "loss": 5.5114, - "step": 18663 - }, - { - "epoch": 9.733507170795306, - "grad_norm": 1.5784001350402832, - "learning_rate": 8.175075376884423e-05, - "loss": 5.1904, - "step": 18664 - }, - { - "epoch": 9.734028683181226, - "grad_norm": 1.4167181253433228, - "learning_rate": 8.17497487437186e-05, - "loss": 5.7491, - "step": 18665 - }, - { - "epoch": 9.734550195567145, - "grad_norm": 1.412499189376831, - "learning_rate": 8.174874371859297e-05, - "loss": 5.0275, - "step": 18666 - }, - { - "epoch": 9.735071707953065, - "grad_norm": 1.4488942623138428, - "learning_rate": 8.174773869346734e-05, - "loss": 5.4696, - "step": 18667 - }, - { - "epoch": 9.735593220338982, - "grad_norm": 1.3687667846679688, - "learning_rate": 8.174673366834171e-05, - "loss": 4.674, - "step": 18668 - }, - { - "epoch": 9.736114732724902, - "grad_norm": 1.4834998846054077, - "learning_rate": 8.174572864321607e-05, - "loss": 5.3708, - "step": 18669 - }, - { - "epoch": 9.736636245110821, - "grad_norm": 1.5331450700759888, - "learning_rate": 8.174472361809045e-05, - "loss": 4.8616, - "step": 18670 - }, - { - "epoch": 9.73715775749674, - "grad_norm": 1.5534929037094116, - "learning_rate": 8.174371859296483e-05, - "loss": 5.228, - "step": 18671 - }, - { - "epoch": 9.73767926988266, - "grad_norm": 1.3710709810256958, - "learning_rate": 8.174271356783921e-05, - "loss": 6.1562, - "step": 18672 - }, - { - "epoch": 9.73820078226858, - "grad_norm": 1.4513472318649292, - "learning_rate": 8.174170854271357e-05, - "loss": 5.6704, - "step": 18673 - }, - { - "epoch": 9.738722294654497, - "grad_norm": 1.4319325685501099, - "learning_rate": 8.174070351758795e-05, - "loss": 5.7313, - "step": 18674 - }, - { - "epoch": 9.739243807040417, - "grad_norm": 1.5158483982086182, - "learning_rate": 8.173969849246231e-05, - "loss": 5.4816, - "step": 18675 - }, - { - "epoch": 9.739765319426336, - "grad_norm": 1.444833755493164, - "learning_rate": 8.173869346733669e-05, - "loss": 5.3023, - "step": 18676 - }, - { - "epoch": 9.740286831812256, - "grad_norm": 1.5773440599441528, - "learning_rate": 8.173768844221106e-05, - "loss": 5.803, - "step": 18677 - }, - { - "epoch": 9.740808344198175, - "grad_norm": 1.4418303966522217, - "learning_rate": 8.173668341708543e-05, - "loss": 5.7438, - "step": 18678 - }, - { - "epoch": 9.741329856584095, - "grad_norm": 1.4371198415756226, - "learning_rate": 8.17356783919598e-05, - "loss": 5.644, - "step": 18679 - }, - { - "epoch": 9.741851368970012, - "grad_norm": 1.6133707761764526, - "learning_rate": 8.173467336683418e-05, - "loss": 5.1333, - "step": 18680 - }, - { - "epoch": 9.742372881355932, - "grad_norm": 1.56196129322052, - "learning_rate": 8.173366834170855e-05, - "loss": 5.6199, - "step": 18681 - }, - { - "epoch": 9.742894393741851, - "grad_norm": 1.460421085357666, - "learning_rate": 8.173266331658292e-05, - "loss": 5.7842, - "step": 18682 - }, - { - "epoch": 9.74341590612777, - "grad_norm": 1.53962242603302, - "learning_rate": 8.17316582914573e-05, - "loss": 5.7664, - "step": 18683 - }, - { - "epoch": 9.74393741851369, - "grad_norm": 1.4369391202926636, - "learning_rate": 8.173065326633166e-05, - "loss": 5.3777, - "step": 18684 - }, - { - "epoch": 9.74445893089961, - "grad_norm": 1.4716638326644897, - "learning_rate": 8.172964824120604e-05, - "loss": 5.6756, - "step": 18685 - }, - { - "epoch": 9.744980443285527, - "grad_norm": 1.4272010326385498, - "learning_rate": 8.17286432160804e-05, - "loss": 5.5638, - "step": 18686 - }, - { - "epoch": 9.745501955671447, - "grad_norm": 1.4544620513916016, - "learning_rate": 8.172763819095478e-05, - "loss": 5.4273, - "step": 18687 - }, - { - "epoch": 9.746023468057366, - "grad_norm": 1.345214605331421, - "learning_rate": 8.172663316582914e-05, - "loss": 5.7922, - "step": 18688 - }, - { - "epoch": 9.746544980443286, - "grad_norm": 1.5575881004333496, - "learning_rate": 8.172562814070352e-05, - "loss": 4.8657, - "step": 18689 - }, - { - "epoch": 9.747066492829205, - "grad_norm": 1.45172917842865, - "learning_rate": 8.172462311557789e-05, - "loss": 5.379, - "step": 18690 - }, - { - "epoch": 9.747588005215125, - "grad_norm": 1.420206069946289, - "learning_rate": 8.172361809045226e-05, - "loss": 5.6552, - "step": 18691 - }, - { - "epoch": 9.748109517601042, - "grad_norm": 1.3357425928115845, - "learning_rate": 8.172261306532664e-05, - "loss": 5.5288, - "step": 18692 - }, - { - "epoch": 9.748631029986962, - "grad_norm": 1.6237003803253174, - "learning_rate": 8.172160804020102e-05, - "loss": 5.0656, - "step": 18693 - }, - { - "epoch": 9.749152542372881, - "grad_norm": 1.4047448635101318, - "learning_rate": 8.172060301507538e-05, - "loss": 5.4136, - "step": 18694 - }, - { - "epoch": 9.7496740547588, - "grad_norm": 1.459893822669983, - "learning_rate": 8.171959798994975e-05, - "loss": 5.2996, - "step": 18695 - }, - { - "epoch": 9.75019556714472, - "grad_norm": 1.427721381187439, - "learning_rate": 8.171859296482413e-05, - "loss": 5.6767, - "step": 18696 - }, - { - "epoch": 9.750717079530638, - "grad_norm": 1.4373003244400024, - "learning_rate": 8.171758793969849e-05, - "loss": 5.4612, - "step": 18697 - }, - { - "epoch": 9.751238591916557, - "grad_norm": 1.4318232536315918, - "learning_rate": 8.171658291457287e-05, - "loss": 5.4256, - "step": 18698 - }, - { - "epoch": 9.751760104302477, - "grad_norm": 1.4988802671432495, - "learning_rate": 8.171557788944723e-05, - "loss": 5.6443, - "step": 18699 - }, - { - "epoch": 9.752281616688396, - "grad_norm": 1.4381216764450073, - "learning_rate": 8.171457286432161e-05, - "loss": 5.6104, - "step": 18700 - }, - { - "epoch": 9.752803129074316, - "grad_norm": 1.4902812242507935, - "learning_rate": 8.171356783919599e-05, - "loss": 5.2999, - "step": 18701 - }, - { - "epoch": 9.753324641460235, - "grad_norm": 1.4792976379394531, - "learning_rate": 8.171256281407037e-05, - "loss": 5.7409, - "step": 18702 - }, - { - "epoch": 9.753846153846155, - "grad_norm": 1.424703598022461, - "learning_rate": 8.171155778894473e-05, - "loss": 4.8409, - "step": 18703 - }, - { - "epoch": 9.754367666232072, - "grad_norm": 1.395787239074707, - "learning_rate": 8.171055276381911e-05, - "loss": 5.4305, - "step": 18704 - }, - { - "epoch": 9.754889178617992, - "grad_norm": 1.4718749523162842, - "learning_rate": 8.170954773869347e-05, - "loss": 5.609, - "step": 18705 - }, - { - "epoch": 9.755410691003911, - "grad_norm": 1.542810082435608, - "learning_rate": 8.170854271356785e-05, - "loss": 5.1836, - "step": 18706 - }, - { - "epoch": 9.75593220338983, - "grad_norm": 1.3462250232696533, - "learning_rate": 8.170753768844221e-05, - "loss": 5.1766, - "step": 18707 - }, - { - "epoch": 9.75645371577575, - "grad_norm": 1.4435293674468994, - "learning_rate": 8.170653266331658e-05, - "loss": 5.6317, - "step": 18708 - }, - { - "epoch": 9.756975228161668, - "grad_norm": 1.445448875427246, - "learning_rate": 8.170552763819096e-05, - "loss": 5.2633, - "step": 18709 - }, - { - "epoch": 9.757496740547587, - "grad_norm": 1.5255001783370972, - "learning_rate": 8.170452261306532e-05, - "loss": 5.0939, - "step": 18710 - }, - { - "epoch": 9.758018252933507, - "grad_norm": 1.5364750623703003, - "learning_rate": 8.17035175879397e-05, - "loss": 5.3467, - "step": 18711 - }, - { - "epoch": 9.758539765319426, - "grad_norm": 1.48344886302948, - "learning_rate": 8.170251256281408e-05, - "loss": 5.7316, - "step": 18712 - }, - { - "epoch": 9.759061277705346, - "grad_norm": 1.4243154525756836, - "learning_rate": 8.170150753768845e-05, - "loss": 5.3532, - "step": 18713 - }, - { - "epoch": 9.759582790091265, - "grad_norm": 1.5385390520095825, - "learning_rate": 8.170050251256282e-05, - "loss": 5.6724, - "step": 18714 - }, - { - "epoch": 9.760104302477185, - "grad_norm": 1.5950798988342285, - "learning_rate": 8.16994974874372e-05, - "loss": 5.3799, - "step": 18715 - }, - { - "epoch": 9.760625814863102, - "grad_norm": 1.4778952598571777, - "learning_rate": 8.169849246231156e-05, - "loss": 5.1686, - "step": 18716 - }, - { - "epoch": 9.761147327249022, - "grad_norm": 1.3493316173553467, - "learning_rate": 8.169748743718594e-05, - "loss": 5.8916, - "step": 18717 - }, - { - "epoch": 9.761668839634941, - "grad_norm": 1.4093279838562012, - "learning_rate": 8.16964824120603e-05, - "loss": 5.5277, - "step": 18718 - }, - { - "epoch": 9.76219035202086, - "grad_norm": 1.5444834232330322, - "learning_rate": 8.169547738693468e-05, - "loss": 5.654, - "step": 18719 - }, - { - "epoch": 9.76271186440678, - "grad_norm": 1.4080978631973267, - "learning_rate": 8.169447236180904e-05, - "loss": 5.4278, - "step": 18720 - }, - { - "epoch": 9.763233376792698, - "grad_norm": 1.491479516029358, - "learning_rate": 8.169346733668342e-05, - "loss": 5.5259, - "step": 18721 - }, - { - "epoch": 9.763754889178617, - "grad_norm": 1.353216528892517, - "learning_rate": 8.16924623115578e-05, - "loss": 5.6442, - "step": 18722 - }, - { - "epoch": 9.764276401564537, - "grad_norm": 1.4683020114898682, - "learning_rate": 8.169145728643216e-05, - "loss": 5.6103, - "step": 18723 - }, - { - "epoch": 9.764797913950456, - "grad_norm": 1.49290931224823, - "learning_rate": 8.169045226130654e-05, - "loss": 4.9945, - "step": 18724 - }, - { - "epoch": 9.765319426336376, - "grad_norm": 1.505211353302002, - "learning_rate": 8.16894472361809e-05, - "loss": 5.2664, - "step": 18725 - }, - { - "epoch": 9.765840938722295, - "grad_norm": 1.4182904958724976, - "learning_rate": 8.168844221105528e-05, - "loss": 5.5644, - "step": 18726 - }, - { - "epoch": 9.766362451108213, - "grad_norm": 1.3857450485229492, - "learning_rate": 8.168743718592965e-05, - "loss": 5.532, - "step": 18727 - }, - { - "epoch": 9.766883963494132, - "grad_norm": 1.423683762550354, - "learning_rate": 8.168643216080403e-05, - "loss": 5.4862, - "step": 18728 - }, - { - "epoch": 9.767405475880052, - "grad_norm": 1.4100990295410156, - "learning_rate": 8.168542713567839e-05, - "loss": 5.5865, - "step": 18729 - }, - { - "epoch": 9.767926988265971, - "grad_norm": 1.3633017539978027, - "learning_rate": 8.168442211055277e-05, - "loss": 5.6582, - "step": 18730 - }, - { - "epoch": 9.76844850065189, - "grad_norm": 1.432947039604187, - "learning_rate": 8.168341708542713e-05, - "loss": 5.3155, - "step": 18731 - }, - { - "epoch": 9.76897001303781, - "grad_norm": 1.4410195350646973, - "learning_rate": 8.168241206030151e-05, - "loss": 4.972, - "step": 18732 - }, - { - "epoch": 9.769491525423728, - "grad_norm": 1.383204698562622, - "learning_rate": 8.168140703517589e-05, - "loss": 5.7137, - "step": 18733 - }, - { - "epoch": 9.770013037809647, - "grad_norm": 1.5261754989624023, - "learning_rate": 8.168040201005025e-05, - "loss": 5.5841, - "step": 18734 - }, - { - "epoch": 9.770534550195567, - "grad_norm": 1.4425770044326782, - "learning_rate": 8.167939698492463e-05, - "loss": 5.2751, - "step": 18735 - }, - { - "epoch": 9.771056062581486, - "grad_norm": 1.4786243438720703, - "learning_rate": 8.1678391959799e-05, - "loss": 5.4452, - "step": 18736 - }, - { - "epoch": 9.771577574967406, - "grad_norm": 1.4356626272201538, - "learning_rate": 8.167738693467337e-05, - "loss": 5.629, - "step": 18737 - }, - { - "epoch": 9.772099087353325, - "grad_norm": 1.450703740119934, - "learning_rate": 8.167638190954774e-05, - "loss": 4.8787, - "step": 18738 - }, - { - "epoch": 9.772620599739243, - "grad_norm": 1.58195161819458, - "learning_rate": 8.167537688442211e-05, - "loss": 5.18, - "step": 18739 - }, - { - "epoch": 9.773142112125162, - "grad_norm": 1.5900564193725586, - "learning_rate": 8.167437185929648e-05, - "loss": 5.1515, - "step": 18740 - }, - { - "epoch": 9.773663624511082, - "grad_norm": 1.4741532802581787, - "learning_rate": 8.167336683417086e-05, - "loss": 5.4508, - "step": 18741 - }, - { - "epoch": 9.774185136897001, - "grad_norm": 1.6422709226608276, - "learning_rate": 8.167236180904522e-05, - "loss": 5.4192, - "step": 18742 - }, - { - "epoch": 9.77470664928292, - "grad_norm": 1.3778619766235352, - "learning_rate": 8.16713567839196e-05, - "loss": 5.9432, - "step": 18743 - }, - { - "epoch": 9.77522816166884, - "grad_norm": 1.5278359651565552, - "learning_rate": 8.167035175879398e-05, - "loss": 5.6264, - "step": 18744 - }, - { - "epoch": 9.775749674054758, - "grad_norm": 1.4320108890533447, - "learning_rate": 8.166934673366835e-05, - "loss": 5.5473, - "step": 18745 - }, - { - "epoch": 9.776271186440677, - "grad_norm": 1.3931071758270264, - "learning_rate": 8.166834170854272e-05, - "loss": 5.6958, - "step": 18746 - }, - { - "epoch": 9.776792698826597, - "grad_norm": 1.4775317907333374, - "learning_rate": 8.166733668341708e-05, - "loss": 5.446, - "step": 18747 - }, - { - "epoch": 9.777314211212516, - "grad_norm": 1.413123607635498, - "learning_rate": 8.166633165829146e-05, - "loss": 5.1131, - "step": 18748 - }, - { - "epoch": 9.777835723598436, - "grad_norm": 1.368774652481079, - "learning_rate": 8.166532663316583e-05, - "loss": 5.3037, - "step": 18749 - }, - { - "epoch": 9.778357235984355, - "grad_norm": 1.403531551361084, - "learning_rate": 8.16643216080402e-05, - "loss": 5.3152, - "step": 18750 - }, - { - "epoch": 9.778878748370273, - "grad_norm": 1.403852105140686, - "learning_rate": 8.166331658291457e-05, - "loss": 5.2872, - "step": 18751 - }, - { - "epoch": 9.779400260756193, - "grad_norm": 1.3769657611846924, - "learning_rate": 8.166231155778895e-05, - "loss": 5.687, - "step": 18752 - }, - { - "epoch": 9.779921773142112, - "grad_norm": 1.4277992248535156, - "learning_rate": 8.166130653266332e-05, - "loss": 5.7698, - "step": 18753 - }, - { - "epoch": 9.780443285528031, - "grad_norm": 1.3920516967773438, - "learning_rate": 8.16603015075377e-05, - "loss": 5.4083, - "step": 18754 - }, - { - "epoch": 9.780964797913951, - "grad_norm": 1.63652765750885, - "learning_rate": 8.165929648241207e-05, - "loss": 5.1056, - "step": 18755 - }, - { - "epoch": 9.78148631029987, - "grad_norm": 1.4491411447525024, - "learning_rate": 8.165829145728644e-05, - "loss": 5.2202, - "step": 18756 - }, - { - "epoch": 9.782007822685788, - "grad_norm": 1.5373526811599731, - "learning_rate": 8.165728643216081e-05, - "loss": 5.3665, - "step": 18757 - }, - { - "epoch": 9.782529335071708, - "grad_norm": 1.4949219226837158, - "learning_rate": 8.165628140703519e-05, - "loss": 5.1171, - "step": 18758 - }, - { - "epoch": 9.783050847457627, - "grad_norm": 1.4364678859710693, - "learning_rate": 8.165527638190955e-05, - "loss": 4.9946, - "step": 18759 - }, - { - "epoch": 9.783572359843546, - "grad_norm": 1.4808757305145264, - "learning_rate": 8.165427135678393e-05, - "loss": 5.5155, - "step": 18760 - }, - { - "epoch": 9.784093872229466, - "grad_norm": 1.4968109130859375, - "learning_rate": 8.165326633165829e-05, - "loss": 5.0605, - "step": 18761 - }, - { - "epoch": 9.784615384615385, - "grad_norm": 1.4909348487854004, - "learning_rate": 8.165226130653266e-05, - "loss": 5.6026, - "step": 18762 - }, - { - "epoch": 9.785136897001303, - "grad_norm": 1.5875134468078613, - "learning_rate": 8.165125628140703e-05, - "loss": 5.2109, - "step": 18763 - }, - { - "epoch": 9.785658409387223, - "grad_norm": 1.4120519161224365, - "learning_rate": 8.165025125628141e-05, - "loss": 5.6189, - "step": 18764 - }, - { - "epoch": 9.786179921773142, - "grad_norm": 1.4031940698623657, - "learning_rate": 8.164924623115579e-05, - "loss": 5.5703, - "step": 18765 - }, - { - "epoch": 9.786701434159061, - "grad_norm": 1.4226372241973877, - "learning_rate": 8.164824120603015e-05, - "loss": 5.3847, - "step": 18766 - }, - { - "epoch": 9.787222946544981, - "grad_norm": 1.4345930814743042, - "learning_rate": 8.164723618090453e-05, - "loss": 5.3802, - "step": 18767 - }, - { - "epoch": 9.7877444589309, - "grad_norm": 1.6860346794128418, - "learning_rate": 8.16462311557789e-05, - "loss": 4.2616, - "step": 18768 - }, - { - "epoch": 9.788265971316818, - "grad_norm": 1.481545090675354, - "learning_rate": 8.164522613065327e-05, - "loss": 5.7262, - "step": 18769 - }, - { - "epoch": 9.788787483702738, - "grad_norm": 1.4126156568527222, - "learning_rate": 8.164422110552764e-05, - "loss": 5.1905, - "step": 18770 - }, - { - "epoch": 9.789308996088657, - "grad_norm": 1.4206933975219727, - "learning_rate": 8.164321608040202e-05, - "loss": 5.4232, - "step": 18771 - }, - { - "epoch": 9.789830508474576, - "grad_norm": 1.4722241163253784, - "learning_rate": 8.164221105527638e-05, - "loss": 5.5743, - "step": 18772 - }, - { - "epoch": 9.790352020860496, - "grad_norm": 1.3364452123641968, - "learning_rate": 8.164120603015076e-05, - "loss": 5.8062, - "step": 18773 - }, - { - "epoch": 9.790873533246415, - "grad_norm": 1.3238264322280884, - "learning_rate": 8.164020100502514e-05, - "loss": 5.7054, - "step": 18774 - }, - { - "epoch": 9.791395045632333, - "grad_norm": 1.4345972537994385, - "learning_rate": 8.16391959798995e-05, - "loss": 5.4621, - "step": 18775 - }, - { - "epoch": 9.791916558018253, - "grad_norm": 1.4506257772445679, - "learning_rate": 8.163819095477388e-05, - "loss": 5.482, - "step": 18776 - }, - { - "epoch": 9.792438070404172, - "grad_norm": 1.4590561389923096, - "learning_rate": 8.163718592964824e-05, - "loss": 5.709, - "step": 18777 - }, - { - "epoch": 9.792959582790091, - "grad_norm": 1.4055347442626953, - "learning_rate": 8.163618090452262e-05, - "loss": 5.7277, - "step": 18778 - }, - { - "epoch": 9.793481095176011, - "grad_norm": 1.4934043884277344, - "learning_rate": 8.163517587939698e-05, - "loss": 5.5902, - "step": 18779 - }, - { - "epoch": 9.79400260756193, - "grad_norm": 1.4280189275741577, - "learning_rate": 8.163417085427136e-05, - "loss": 5.5951, - "step": 18780 - }, - { - "epoch": 9.794524119947848, - "grad_norm": 1.4233018159866333, - "learning_rate": 8.163316582914573e-05, - "loss": 5.5975, - "step": 18781 - }, - { - "epoch": 9.795045632333768, - "grad_norm": 1.420574426651001, - "learning_rate": 8.16321608040201e-05, - "loss": 4.9335, - "step": 18782 - }, - { - "epoch": 9.795567144719687, - "grad_norm": 1.4069085121154785, - "learning_rate": 8.163115577889447e-05, - "loss": 5.7459, - "step": 18783 - }, - { - "epoch": 9.796088657105607, - "grad_norm": 1.7237991094589233, - "learning_rate": 8.163015075376885e-05, - "loss": 5.232, - "step": 18784 - }, - { - "epoch": 9.796610169491526, - "grad_norm": 1.396418571472168, - "learning_rate": 8.162914572864322e-05, - "loss": 5.2956, - "step": 18785 - }, - { - "epoch": 9.797131681877445, - "grad_norm": 1.4072816371917725, - "learning_rate": 8.16281407035176e-05, - "loss": 5.6309, - "step": 18786 - }, - { - "epoch": 9.797653194263363, - "grad_norm": 1.359890341758728, - "learning_rate": 8.162713567839197e-05, - "loss": 5.7561, - "step": 18787 - }, - { - "epoch": 9.798174706649283, - "grad_norm": 1.4151477813720703, - "learning_rate": 8.162613065326633e-05, - "loss": 5.4661, - "step": 18788 - }, - { - "epoch": 9.798696219035202, - "grad_norm": 1.3870978355407715, - "learning_rate": 8.162512562814071e-05, - "loss": 5.382, - "step": 18789 - }, - { - "epoch": 9.799217731421122, - "grad_norm": 1.5548274517059326, - "learning_rate": 8.162412060301507e-05, - "loss": 5.0256, - "step": 18790 - }, - { - "epoch": 9.799739243807041, - "grad_norm": 1.6256885528564453, - "learning_rate": 8.162311557788945e-05, - "loss": 5.3333, - "step": 18791 - }, - { - "epoch": 9.800260756192959, - "grad_norm": 1.363315224647522, - "learning_rate": 8.162211055276381e-05, - "loss": 5.9492, - "step": 18792 - }, - { - "epoch": 9.800782268578878, - "grad_norm": 1.3605036735534668, - "learning_rate": 8.162110552763819e-05, - "loss": 5.7858, - "step": 18793 - }, - { - "epoch": 9.801303780964798, - "grad_norm": 1.4177042245864868, - "learning_rate": 8.162010050251257e-05, - "loss": 5.5254, - "step": 18794 - }, - { - "epoch": 9.801825293350717, - "grad_norm": 1.4957058429718018, - "learning_rate": 8.161909547738695e-05, - "loss": 5.7703, - "step": 18795 - }, - { - "epoch": 9.802346805736637, - "grad_norm": 1.3762632608413696, - "learning_rate": 8.161809045226131e-05, - "loss": 5.2932, - "step": 18796 - }, - { - "epoch": 9.802868318122556, - "grad_norm": 1.277329444885254, - "learning_rate": 8.161708542713569e-05, - "loss": 5.7655, - "step": 18797 - }, - { - "epoch": 9.803389830508475, - "grad_norm": 1.4631900787353516, - "learning_rate": 8.161608040201005e-05, - "loss": 5.2617, - "step": 18798 - }, - { - "epoch": 9.803911342894393, - "grad_norm": 1.6917686462402344, - "learning_rate": 8.161507537688443e-05, - "loss": 5.1344, - "step": 18799 - }, - { - "epoch": 9.804432855280313, - "grad_norm": 1.4270349740982056, - "learning_rate": 8.16140703517588e-05, - "loss": 5.7663, - "step": 18800 - }, - { - "epoch": 9.804954367666232, - "grad_norm": 1.4527724981307983, - "learning_rate": 8.161306532663316e-05, - "loss": 5.6209, - "step": 18801 - }, - { - "epoch": 9.805475880052152, - "grad_norm": 1.4133434295654297, - "learning_rate": 8.161206030150754e-05, - "loss": 5.923, - "step": 18802 - }, - { - "epoch": 9.805997392438071, - "grad_norm": 1.418936014175415, - "learning_rate": 8.16110552763819e-05, - "loss": 5.8359, - "step": 18803 - }, - { - "epoch": 9.806518904823989, - "grad_norm": 1.3846182823181152, - "learning_rate": 8.161005025125628e-05, - "loss": 5.585, - "step": 18804 - }, - { - "epoch": 9.807040417209908, - "grad_norm": 1.406559944152832, - "learning_rate": 8.160904522613066e-05, - "loss": 5.3978, - "step": 18805 - }, - { - "epoch": 9.807561929595828, - "grad_norm": 1.467599630355835, - "learning_rate": 8.160804020100504e-05, - "loss": 5.545, - "step": 18806 - }, - { - "epoch": 9.808083441981747, - "grad_norm": 1.3431358337402344, - "learning_rate": 8.16070351758794e-05, - "loss": 5.0636, - "step": 18807 - }, - { - "epoch": 9.808604954367667, - "grad_norm": 1.398579478263855, - "learning_rate": 8.160603015075378e-05, - "loss": 5.5567, - "step": 18808 - }, - { - "epoch": 9.809126466753586, - "grad_norm": 1.3804455995559692, - "learning_rate": 8.160502512562814e-05, - "loss": 5.449, - "step": 18809 - }, - { - "epoch": 9.809647979139505, - "grad_norm": 1.5802336931228638, - "learning_rate": 8.160402010050252e-05, - "loss": 5.2804, - "step": 18810 - }, - { - "epoch": 9.810169491525423, - "grad_norm": 1.5036910772323608, - "learning_rate": 8.160301507537688e-05, - "loss": 5.104, - "step": 18811 - }, - { - "epoch": 9.810691003911343, - "grad_norm": 1.335680365562439, - "learning_rate": 8.160201005025126e-05, - "loss": 5.1279, - "step": 18812 - }, - { - "epoch": 9.811212516297262, - "grad_norm": 1.5513349771499634, - "learning_rate": 8.160100502512563e-05, - "loss": 5.0275, - "step": 18813 - }, - { - "epoch": 9.811734028683182, - "grad_norm": 1.557739019393921, - "learning_rate": 8.16e-05, - "loss": 5.3305, - "step": 18814 - }, - { - "epoch": 9.812255541069101, - "grad_norm": 1.3543133735656738, - "learning_rate": 8.159899497487438e-05, - "loss": 5.3197, - "step": 18815 - }, - { - "epoch": 9.812777053455019, - "grad_norm": 1.380679726600647, - "learning_rate": 8.159798994974875e-05, - "loss": 5.5661, - "step": 18816 - }, - { - "epoch": 9.813298565840938, - "grad_norm": 1.3249982595443726, - "learning_rate": 8.159698492462312e-05, - "loss": 5.8286, - "step": 18817 - }, - { - "epoch": 9.813820078226858, - "grad_norm": 1.5055056810379028, - "learning_rate": 8.159597989949749e-05, - "loss": 4.5437, - "step": 18818 - }, - { - "epoch": 9.814341590612777, - "grad_norm": 1.5723572969436646, - "learning_rate": 8.159497487437187e-05, - "loss": 5.2732, - "step": 18819 - }, - { - "epoch": 9.814863102998697, - "grad_norm": 1.386507511138916, - "learning_rate": 8.159396984924623e-05, - "loss": 5.8666, - "step": 18820 - }, - { - "epoch": 9.815384615384616, - "grad_norm": 1.4611873626708984, - "learning_rate": 8.159296482412061e-05, - "loss": 5.8214, - "step": 18821 - }, - { - "epoch": 9.815906127770534, - "grad_norm": 1.3396035432815552, - "learning_rate": 8.159195979899497e-05, - "loss": 5.9309, - "step": 18822 - }, - { - "epoch": 9.816427640156453, - "grad_norm": 1.283207654953003, - "learning_rate": 8.159095477386935e-05, - "loss": 5.9526, - "step": 18823 - }, - { - "epoch": 9.816949152542373, - "grad_norm": 1.5138282775878906, - "learning_rate": 8.158994974874372e-05, - "loss": 5.157, - "step": 18824 - }, - { - "epoch": 9.817470664928292, - "grad_norm": 1.6958012580871582, - "learning_rate": 8.158894472361809e-05, - "loss": 5.3487, - "step": 18825 - }, - { - "epoch": 9.817992177314212, - "grad_norm": 1.5657508373260498, - "learning_rate": 8.158793969849247e-05, - "loss": 4.8755, - "step": 18826 - }, - { - "epoch": 9.818513689700131, - "grad_norm": 1.4833314418792725, - "learning_rate": 8.158693467336684e-05, - "loss": 5.0079, - "step": 18827 - }, - { - "epoch": 9.819035202086049, - "grad_norm": 1.5305812358856201, - "learning_rate": 8.158592964824121e-05, - "loss": 5.4943, - "step": 18828 - }, - { - "epoch": 9.819556714471968, - "grad_norm": 1.434797763824463, - "learning_rate": 8.158492462311558e-05, - "loss": 5.5853, - "step": 18829 - }, - { - "epoch": 9.820078226857888, - "grad_norm": 1.4151307344436646, - "learning_rate": 8.158391959798996e-05, - "loss": 5.7899, - "step": 18830 - }, - { - "epoch": 9.820599739243807, - "grad_norm": 1.575729250907898, - "learning_rate": 8.158291457286432e-05, - "loss": 5.0509, - "step": 18831 - }, - { - "epoch": 9.821121251629727, - "grad_norm": 1.4846992492675781, - "learning_rate": 8.15819095477387e-05, - "loss": 5.2576, - "step": 18832 - }, - { - "epoch": 9.821642764015646, - "grad_norm": 1.4463894367218018, - "learning_rate": 8.158090452261306e-05, - "loss": 5.6715, - "step": 18833 - }, - { - "epoch": 9.822164276401564, - "grad_norm": 1.5585966110229492, - "learning_rate": 8.157989949748744e-05, - "loss": 5.6754, - "step": 18834 - }, - { - "epoch": 9.822685788787483, - "grad_norm": 1.617010235786438, - "learning_rate": 8.157889447236182e-05, - "loss": 4.9578, - "step": 18835 - }, - { - "epoch": 9.823207301173403, - "grad_norm": 1.4858665466308594, - "learning_rate": 8.15778894472362e-05, - "loss": 5.4435, - "step": 18836 - }, - { - "epoch": 9.823728813559322, - "grad_norm": 1.4326707124710083, - "learning_rate": 8.157688442211056e-05, - "loss": 5.2799, - "step": 18837 - }, - { - "epoch": 9.824250325945242, - "grad_norm": 1.4918309450149536, - "learning_rate": 8.157587939698494e-05, - "loss": 5.579, - "step": 18838 - }, - { - "epoch": 9.824771838331161, - "grad_norm": 1.600682258605957, - "learning_rate": 8.15748743718593e-05, - "loss": 5.1021, - "step": 18839 - }, - { - "epoch": 9.825293350717079, - "grad_norm": 1.4583885669708252, - "learning_rate": 8.157386934673367e-05, - "loss": 5.341, - "step": 18840 - }, - { - "epoch": 9.825814863102998, - "grad_norm": 1.5668346881866455, - "learning_rate": 8.157286432160804e-05, - "loss": 5.6872, - "step": 18841 - }, - { - "epoch": 9.826336375488918, - "grad_norm": 1.375795602798462, - "learning_rate": 8.157185929648241e-05, - "loss": 5.7434, - "step": 18842 - }, - { - "epoch": 9.826857887874837, - "grad_norm": 1.4205663204193115, - "learning_rate": 8.157085427135679e-05, - "loss": 5.9063, - "step": 18843 - }, - { - "epoch": 9.827379400260757, - "grad_norm": 1.510650634765625, - "learning_rate": 8.156984924623115e-05, - "loss": 5.5451, - "step": 18844 - }, - { - "epoch": 9.827900912646676, - "grad_norm": 1.3845927715301514, - "learning_rate": 8.156884422110553e-05, - "loss": 5.7148, - "step": 18845 - }, - { - "epoch": 9.828422425032594, - "grad_norm": 1.3882251977920532, - "learning_rate": 8.15678391959799e-05, - "loss": 5.2, - "step": 18846 - }, - { - "epoch": 9.828943937418513, - "grad_norm": 1.4715758562088013, - "learning_rate": 8.156683417085428e-05, - "loss": 5.4933, - "step": 18847 - }, - { - "epoch": 9.829465449804433, - "grad_norm": 1.342267394065857, - "learning_rate": 8.156582914572865e-05, - "loss": 5.6232, - "step": 18848 - }, - { - "epoch": 9.829986962190352, - "grad_norm": 1.542335033416748, - "learning_rate": 8.156482412060303e-05, - "loss": 5.7102, - "step": 18849 - }, - { - "epoch": 9.830508474576272, - "grad_norm": 1.3038665056228638, - "learning_rate": 8.156381909547739e-05, - "loss": 5.8304, - "step": 18850 - }, - { - "epoch": 9.831029986962191, - "grad_norm": 1.475143313407898, - "learning_rate": 8.156281407035177e-05, - "loss": 5.1172, - "step": 18851 - }, - { - "epoch": 9.831551499348109, - "grad_norm": 1.4131441116333008, - "learning_rate": 8.156180904522613e-05, - "loss": 5.2947, - "step": 18852 - }, - { - "epoch": 9.832073011734028, - "grad_norm": 1.5086700916290283, - "learning_rate": 8.156080402010051e-05, - "loss": 5.6492, - "step": 18853 - }, - { - "epoch": 9.832594524119948, - "grad_norm": 1.3493973016738892, - "learning_rate": 8.155979899497487e-05, - "loss": 5.7619, - "step": 18854 - }, - { - "epoch": 9.833116036505867, - "grad_norm": 1.3690553903579712, - "learning_rate": 8.155879396984925e-05, - "loss": 5.8245, - "step": 18855 - }, - { - "epoch": 9.833637548891787, - "grad_norm": 1.4208611249923706, - "learning_rate": 8.155778894472363e-05, - "loss": 5.079, - "step": 18856 - }, - { - "epoch": 9.834159061277706, - "grad_norm": 1.2651432752609253, - "learning_rate": 8.1556783919598e-05, - "loss": 5.6992, - "step": 18857 - }, - { - "epoch": 9.834680573663624, - "grad_norm": 1.4521236419677734, - "learning_rate": 8.155577889447237e-05, - "loss": 5.1632, - "step": 18858 - }, - { - "epoch": 9.835202086049543, - "grad_norm": 1.4869657754898071, - "learning_rate": 8.155477386934674e-05, - "loss": 5.1051, - "step": 18859 - }, - { - "epoch": 9.835723598435463, - "grad_norm": 1.4910728931427002, - "learning_rate": 8.155376884422111e-05, - "loss": 5.356, - "step": 18860 - }, - { - "epoch": 9.836245110821382, - "grad_norm": 1.47030770778656, - "learning_rate": 8.155276381909548e-05, - "loss": 5.5372, - "step": 18861 - }, - { - "epoch": 9.836766623207302, - "grad_norm": 1.3523238897323608, - "learning_rate": 8.155175879396986e-05, - "loss": 4.7673, - "step": 18862 - }, - { - "epoch": 9.837288135593221, - "grad_norm": 1.3635660409927368, - "learning_rate": 8.155075376884422e-05, - "loss": 4.6646, - "step": 18863 - }, - { - "epoch": 9.837809647979139, - "grad_norm": 1.3605055809020996, - "learning_rate": 8.15497487437186e-05, - "loss": 5.6889, - "step": 18864 - }, - { - "epoch": 9.838331160365058, - "grad_norm": 1.4220993518829346, - "learning_rate": 8.154874371859296e-05, - "loss": 5.2597, - "step": 18865 - }, - { - "epoch": 9.838852672750978, - "grad_norm": 1.4281303882598877, - "learning_rate": 8.154773869346734e-05, - "loss": 5.7396, - "step": 18866 - }, - { - "epoch": 9.839374185136897, - "grad_norm": 1.4710369110107422, - "learning_rate": 8.154673366834172e-05, - "loss": 5.6739, - "step": 18867 - }, - { - "epoch": 9.839895697522817, - "grad_norm": 1.4634591341018677, - "learning_rate": 8.154572864321608e-05, - "loss": 5.2925, - "step": 18868 - }, - { - "epoch": 9.840417209908736, - "grad_norm": 1.4451037645339966, - "learning_rate": 8.154472361809046e-05, - "loss": 5.0543, - "step": 18869 - }, - { - "epoch": 9.840938722294654, - "grad_norm": 1.5098178386688232, - "learning_rate": 8.154371859296482e-05, - "loss": 5.3023, - "step": 18870 - }, - { - "epoch": 9.841460234680573, - "grad_norm": 1.4066853523254395, - "learning_rate": 8.15427135678392e-05, - "loss": 5.8847, - "step": 18871 - }, - { - "epoch": 9.841981747066493, - "grad_norm": 1.3362759351730347, - "learning_rate": 8.154170854271357e-05, - "loss": 5.8221, - "step": 18872 - }, - { - "epoch": 9.842503259452412, - "grad_norm": 1.4905142784118652, - "learning_rate": 8.154070351758794e-05, - "loss": 4.8676, - "step": 18873 - }, - { - "epoch": 9.843024771838332, - "grad_norm": 1.4173389673233032, - "learning_rate": 8.153969849246231e-05, - "loss": 5.4939, - "step": 18874 - }, - { - "epoch": 9.843546284224251, - "grad_norm": 1.3508365154266357, - "learning_rate": 8.153869346733669e-05, - "loss": 5.5821, - "step": 18875 - }, - { - "epoch": 9.844067796610169, - "grad_norm": 1.3303414583206177, - "learning_rate": 8.153768844221106e-05, - "loss": 5.6829, - "step": 18876 - }, - { - "epoch": 9.844589308996088, - "grad_norm": 1.514719843864441, - "learning_rate": 8.153668341708544e-05, - "loss": 4.7906, - "step": 18877 - }, - { - "epoch": 9.845110821382008, - "grad_norm": 1.423122763633728, - "learning_rate": 8.15356783919598e-05, - "loss": 5.5742, - "step": 18878 - }, - { - "epoch": 9.845632333767927, - "grad_norm": 1.4707762002944946, - "learning_rate": 8.153467336683418e-05, - "loss": 5.3542, - "step": 18879 - }, - { - "epoch": 9.846153846153847, - "grad_norm": 1.637756109237671, - "learning_rate": 8.153366834170855e-05, - "loss": 5.2408, - "step": 18880 - }, - { - "epoch": 9.846675358539766, - "grad_norm": 1.491081714630127, - "learning_rate": 8.153266331658291e-05, - "loss": 5.5207, - "step": 18881 - }, - { - "epoch": 9.847196870925684, - "grad_norm": 1.529356598854065, - "learning_rate": 8.153165829145729e-05, - "loss": 5.5198, - "step": 18882 - }, - { - "epoch": 9.847718383311603, - "grad_norm": 1.4644250869750977, - "learning_rate": 8.153065326633165e-05, - "loss": 5.4591, - "step": 18883 - }, - { - "epoch": 9.848239895697523, - "grad_norm": 1.4229185581207275, - "learning_rate": 8.152964824120603e-05, - "loss": 4.9404, - "step": 18884 - }, - { - "epoch": 9.848761408083442, - "grad_norm": 1.3820608854293823, - "learning_rate": 8.15286432160804e-05, - "loss": 5.6197, - "step": 18885 - }, - { - "epoch": 9.849282920469362, - "grad_norm": 1.3401633501052856, - "learning_rate": 8.152763819095477e-05, - "loss": 5.9148, - "step": 18886 - }, - { - "epoch": 9.84980443285528, - "grad_norm": 1.5599145889282227, - "learning_rate": 8.152663316582915e-05, - "loss": 4.9957, - "step": 18887 - }, - { - "epoch": 9.850325945241199, - "grad_norm": 1.4089999198913574, - "learning_rate": 8.152562814070353e-05, - "loss": 5.125, - "step": 18888 - }, - { - "epoch": 9.850847457627118, - "grad_norm": 1.3984298706054688, - "learning_rate": 8.15246231155779e-05, - "loss": 5.8352, - "step": 18889 - }, - { - "epoch": 9.851368970013038, - "grad_norm": 1.3625659942626953, - "learning_rate": 8.152361809045227e-05, - "loss": 5.4781, - "step": 18890 - }, - { - "epoch": 9.851890482398957, - "grad_norm": 1.3269946575164795, - "learning_rate": 8.152261306532664e-05, - "loss": 5.5337, - "step": 18891 - }, - { - "epoch": 9.852411994784877, - "grad_norm": 1.382431983947754, - "learning_rate": 8.152160804020101e-05, - "loss": 5.6886, - "step": 18892 - }, - { - "epoch": 9.852933507170796, - "grad_norm": 1.3401826620101929, - "learning_rate": 8.152060301507538e-05, - "loss": 5.7318, - "step": 18893 - }, - { - "epoch": 9.853455019556714, - "grad_norm": 1.5526294708251953, - "learning_rate": 8.151959798994974e-05, - "loss": 5.3919, - "step": 18894 - }, - { - "epoch": 9.853976531942633, - "grad_norm": 1.318988561630249, - "learning_rate": 8.151859296482412e-05, - "loss": 5.6052, - "step": 18895 - }, - { - "epoch": 9.854498044328553, - "grad_norm": 1.5080275535583496, - "learning_rate": 8.15175879396985e-05, - "loss": 5.1096, - "step": 18896 - }, - { - "epoch": 9.855019556714472, - "grad_norm": 1.4578192234039307, - "learning_rate": 8.151658291457288e-05, - "loss": 5.2731, - "step": 18897 - }, - { - "epoch": 9.855541069100392, - "grad_norm": 1.4876171350479126, - "learning_rate": 8.151557788944724e-05, - "loss": 4.9786, - "step": 18898 - }, - { - "epoch": 9.85606258148631, - "grad_norm": 1.474180817604065, - "learning_rate": 8.151457286432162e-05, - "loss": 5.4343, - "step": 18899 - }, - { - "epoch": 9.856584093872229, - "grad_norm": 1.388884425163269, - "learning_rate": 8.151356783919598e-05, - "loss": 5.5142, - "step": 18900 - }, - { - "epoch": 9.857105606258148, - "grad_norm": 1.4965856075286865, - "learning_rate": 8.151256281407036e-05, - "loss": 5.6792, - "step": 18901 - }, - { - "epoch": 9.857627118644068, - "grad_norm": 1.4323372840881348, - "learning_rate": 8.151155778894473e-05, - "loss": 5.3885, - "step": 18902 - }, - { - "epoch": 9.858148631029987, - "grad_norm": 1.359602689743042, - "learning_rate": 8.15105527638191e-05, - "loss": 5.6085, - "step": 18903 - }, - { - "epoch": 9.858670143415907, - "grad_norm": 1.3607302904129028, - "learning_rate": 8.150954773869347e-05, - "loss": 5.4653, - "step": 18904 - }, - { - "epoch": 9.859191655801826, - "grad_norm": 1.4618481397628784, - "learning_rate": 8.150854271356785e-05, - "loss": 5.7132, - "step": 18905 - }, - { - "epoch": 9.859713168187744, - "grad_norm": 1.419859528541565, - "learning_rate": 8.150753768844221e-05, - "loss": 5.5395, - "step": 18906 - }, - { - "epoch": 9.860234680573663, - "grad_norm": 1.3764078617095947, - "learning_rate": 8.150653266331659e-05, - "loss": 5.7023, - "step": 18907 - }, - { - "epoch": 9.860756192959583, - "grad_norm": 1.3198775053024292, - "learning_rate": 8.150552763819096e-05, - "loss": 5.4857, - "step": 18908 - }, - { - "epoch": 9.861277705345502, - "grad_norm": 1.4312407970428467, - "learning_rate": 8.150452261306533e-05, - "loss": 5.0223, - "step": 18909 - }, - { - "epoch": 9.861799217731422, - "grad_norm": 1.5155631303787231, - "learning_rate": 8.150351758793971e-05, - "loss": 5.2398, - "step": 18910 - }, - { - "epoch": 9.86232073011734, - "grad_norm": 1.5549267530441284, - "learning_rate": 8.150251256281407e-05, - "loss": 5.0545, - "step": 18911 - }, - { - "epoch": 9.862842242503259, - "grad_norm": 1.3913756608963013, - "learning_rate": 8.150150753768845e-05, - "loss": 5.7832, - "step": 18912 - }, - { - "epoch": 9.863363754889178, - "grad_norm": 1.5730245113372803, - "learning_rate": 8.150050251256281e-05, - "loss": 5.1918, - "step": 18913 - }, - { - "epoch": 9.863885267275098, - "grad_norm": 1.479029893875122, - "learning_rate": 8.149949748743719e-05, - "loss": 5.3568, - "step": 18914 - }, - { - "epoch": 9.864406779661017, - "grad_norm": 1.4946562051773071, - "learning_rate": 8.149849246231156e-05, - "loss": 5.5501, - "step": 18915 - }, - { - "epoch": 9.864928292046937, - "grad_norm": 1.527305245399475, - "learning_rate": 8.149748743718593e-05, - "loss": 5.3042, - "step": 18916 - }, - { - "epoch": 9.865449804432854, - "grad_norm": 1.4032378196716309, - "learning_rate": 8.14964824120603e-05, - "loss": 5.3333, - "step": 18917 - }, - { - "epoch": 9.865971316818774, - "grad_norm": 1.4341539144515991, - "learning_rate": 8.149547738693468e-05, - "loss": 5.2739, - "step": 18918 - }, - { - "epoch": 9.866492829204693, - "grad_norm": 1.556546926498413, - "learning_rate": 8.149447236180905e-05, - "loss": 5.2747, - "step": 18919 - }, - { - "epoch": 9.867014341590613, - "grad_norm": 1.3808815479278564, - "learning_rate": 8.149346733668342e-05, - "loss": 5.2508, - "step": 18920 - }, - { - "epoch": 9.867535853976532, - "grad_norm": 1.4103188514709473, - "learning_rate": 8.14924623115578e-05, - "loss": 5.6544, - "step": 18921 - }, - { - "epoch": 9.868057366362452, - "grad_norm": 1.5518792867660522, - "learning_rate": 8.149145728643216e-05, - "loss": 5.2806, - "step": 18922 - }, - { - "epoch": 9.86857887874837, - "grad_norm": 1.5141112804412842, - "learning_rate": 8.149045226130654e-05, - "loss": 5.3195, - "step": 18923 - }, - { - "epoch": 9.869100391134289, - "grad_norm": 1.3024266958236694, - "learning_rate": 8.14894472361809e-05, - "loss": 5.4753, - "step": 18924 - }, - { - "epoch": 9.869621903520208, - "grad_norm": 1.3423711061477661, - "learning_rate": 8.148844221105528e-05, - "loss": 5.4957, - "step": 18925 - }, - { - "epoch": 9.870143415906128, - "grad_norm": 1.5627321004867554, - "learning_rate": 8.148743718592964e-05, - "loss": 5.2467, - "step": 18926 - }, - { - "epoch": 9.870664928292047, - "grad_norm": 1.3703978061676025, - "learning_rate": 8.148643216080402e-05, - "loss": 5.6933, - "step": 18927 - }, - { - "epoch": 9.871186440677967, - "grad_norm": 1.450538992881775, - "learning_rate": 8.14854271356784e-05, - "loss": 5.6107, - "step": 18928 - }, - { - "epoch": 9.871707953063884, - "grad_norm": 1.4881075620651245, - "learning_rate": 8.148442211055278e-05, - "loss": 5.3103, - "step": 18929 - }, - { - "epoch": 9.872229465449804, - "grad_norm": 1.4314340353012085, - "learning_rate": 8.148341708542714e-05, - "loss": 5.5758, - "step": 18930 - }, - { - "epoch": 9.872750977835723, - "grad_norm": 1.535010576248169, - "learning_rate": 8.148241206030152e-05, - "loss": 5.5448, - "step": 18931 - }, - { - "epoch": 9.873272490221643, - "grad_norm": 1.4442405700683594, - "learning_rate": 8.148140703517588e-05, - "loss": 5.6248, - "step": 18932 - }, - { - "epoch": 9.873794002607562, - "grad_norm": 1.33511483669281, - "learning_rate": 8.148040201005025e-05, - "loss": 5.9814, - "step": 18933 - }, - { - "epoch": 9.874315514993482, - "grad_norm": 1.346980094909668, - "learning_rate": 8.147939698492463e-05, - "loss": 5.7066, - "step": 18934 - }, - { - "epoch": 9.8748370273794, - "grad_norm": 1.3210828304290771, - "learning_rate": 8.147839195979899e-05, - "loss": 5.9721, - "step": 18935 - }, - { - "epoch": 9.875358539765319, - "grad_norm": 1.4853821992874146, - "learning_rate": 8.147738693467337e-05, - "loss": 5.5041, - "step": 18936 - }, - { - "epoch": 9.875880052151238, - "grad_norm": 1.4116325378417969, - "learning_rate": 8.147638190954773e-05, - "loss": 5.3517, - "step": 18937 - }, - { - "epoch": 9.876401564537158, - "grad_norm": 1.4929753541946411, - "learning_rate": 8.147537688442211e-05, - "loss": 5.6231, - "step": 18938 - }, - { - "epoch": 9.876923076923077, - "grad_norm": 1.5282824039459229, - "learning_rate": 8.147437185929649e-05, - "loss": 5.4738, - "step": 18939 - }, - { - "epoch": 9.877444589308997, - "grad_norm": 1.4355461597442627, - "learning_rate": 8.147336683417087e-05, - "loss": 5.4087, - "step": 18940 - }, - { - "epoch": 9.877966101694915, - "grad_norm": 1.4045108556747437, - "learning_rate": 8.147236180904523e-05, - "loss": 5.753, - "step": 18941 - }, - { - "epoch": 9.878487614080834, - "grad_norm": 1.3695921897888184, - "learning_rate": 8.147135678391961e-05, - "loss": 5.7157, - "step": 18942 - }, - { - "epoch": 9.879009126466753, - "grad_norm": 1.511897325515747, - "learning_rate": 8.147035175879397e-05, - "loss": 5.5872, - "step": 18943 - }, - { - "epoch": 9.879530638852673, - "grad_norm": 1.4101191759109497, - "learning_rate": 8.146934673366835e-05, - "loss": 5.7312, - "step": 18944 - }, - { - "epoch": 9.880052151238592, - "grad_norm": 1.4767974615097046, - "learning_rate": 8.146834170854271e-05, - "loss": 5.1691, - "step": 18945 - }, - { - "epoch": 9.880573663624512, - "grad_norm": 1.4591628313064575, - "learning_rate": 8.146733668341709e-05, - "loss": 5.4313, - "step": 18946 - }, - { - "epoch": 9.88109517601043, - "grad_norm": 1.3642665147781372, - "learning_rate": 8.146633165829146e-05, - "loss": 5.5792, - "step": 18947 - }, - { - "epoch": 9.881616688396349, - "grad_norm": 1.4090399742126465, - "learning_rate": 8.146532663316583e-05, - "loss": 5.9452, - "step": 18948 - }, - { - "epoch": 9.882138200782268, - "grad_norm": 1.458201289176941, - "learning_rate": 8.146432160804021e-05, - "loss": 5.1397, - "step": 18949 - }, - { - "epoch": 9.882659713168188, - "grad_norm": 1.4682083129882812, - "learning_rate": 8.146331658291458e-05, - "loss": 4.8965, - "step": 18950 - }, - { - "epoch": 9.883181225554107, - "grad_norm": 1.3434338569641113, - "learning_rate": 8.146231155778895e-05, - "loss": 5.7222, - "step": 18951 - }, - { - "epoch": 9.883702737940027, - "grad_norm": 1.4034472703933716, - "learning_rate": 8.146130653266332e-05, - "loss": 5.2877, - "step": 18952 - }, - { - "epoch": 9.884224250325945, - "grad_norm": 1.4746540784835815, - "learning_rate": 8.14603015075377e-05, - "loss": 5.7083, - "step": 18953 - }, - { - "epoch": 9.884745762711864, - "grad_norm": 1.6290390491485596, - "learning_rate": 8.145929648241206e-05, - "loss": 5.4689, - "step": 18954 - }, - { - "epoch": 9.885267275097783, - "grad_norm": 1.4388952255249023, - "learning_rate": 8.145829145728644e-05, - "loss": 5.5161, - "step": 18955 - }, - { - "epoch": 9.885788787483703, - "grad_norm": 1.4674385786056519, - "learning_rate": 8.14572864321608e-05, - "loss": 5.5271, - "step": 18956 - }, - { - "epoch": 9.886310299869622, - "grad_norm": 1.400253415107727, - "learning_rate": 8.145628140703518e-05, - "loss": 5.4612, - "step": 18957 - }, - { - "epoch": 9.886831812255542, - "grad_norm": 1.420029878616333, - "learning_rate": 8.145527638190954e-05, - "loss": 5.4385, - "step": 18958 - }, - { - "epoch": 9.88735332464146, - "grad_norm": 1.3299713134765625, - "learning_rate": 8.145427135678392e-05, - "loss": 5.6023, - "step": 18959 - }, - { - "epoch": 9.887874837027379, - "grad_norm": 1.4464234113693237, - "learning_rate": 8.14532663316583e-05, - "loss": 5.0186, - "step": 18960 - }, - { - "epoch": 9.888396349413298, - "grad_norm": 1.492723822593689, - "learning_rate": 8.145226130653266e-05, - "loss": 5.6454, - "step": 18961 - }, - { - "epoch": 9.888917861799218, - "grad_norm": 1.315596580505371, - "learning_rate": 8.145125628140704e-05, - "loss": 5.5022, - "step": 18962 - }, - { - "epoch": 9.889439374185137, - "grad_norm": 1.3878294229507446, - "learning_rate": 8.14502512562814e-05, - "loss": 5.5521, - "step": 18963 - }, - { - "epoch": 9.889960886571057, - "grad_norm": 1.4243993759155273, - "learning_rate": 8.144924623115578e-05, - "loss": 5.8142, - "step": 18964 - }, - { - "epoch": 9.890482398956975, - "grad_norm": 1.3934370279312134, - "learning_rate": 8.144824120603015e-05, - "loss": 5.8063, - "step": 18965 - }, - { - "epoch": 9.891003911342894, - "grad_norm": 1.5747432708740234, - "learning_rate": 8.144723618090453e-05, - "loss": 5.1517, - "step": 18966 - }, - { - "epoch": 9.891525423728813, - "grad_norm": 1.6129409074783325, - "learning_rate": 8.144623115577889e-05, - "loss": 5.2656, - "step": 18967 - }, - { - "epoch": 9.892046936114733, - "grad_norm": 1.6949797868728638, - "learning_rate": 8.144522613065327e-05, - "loss": 5.5283, - "step": 18968 - }, - { - "epoch": 9.892568448500652, - "grad_norm": 1.5663963556289673, - "learning_rate": 8.144422110552765e-05, - "loss": 5.3111, - "step": 18969 - }, - { - "epoch": 9.893089960886572, - "grad_norm": 1.4378150701522827, - "learning_rate": 8.144321608040202e-05, - "loss": 5.4715, - "step": 18970 - }, - { - "epoch": 9.89361147327249, - "grad_norm": 1.432955265045166, - "learning_rate": 8.144221105527639e-05, - "loss": 5.4892, - "step": 18971 - }, - { - "epoch": 9.894132985658409, - "grad_norm": 1.3503034114837646, - "learning_rate": 8.144120603015077e-05, - "loss": 5.669, - "step": 18972 - }, - { - "epoch": 9.894654498044329, - "grad_norm": 1.3985849618911743, - "learning_rate": 8.144020100502513e-05, - "loss": 5.7261, - "step": 18973 - }, - { - "epoch": 9.895176010430248, - "grad_norm": 1.440733551979065, - "learning_rate": 8.14391959798995e-05, - "loss": 4.8851, - "step": 18974 - }, - { - "epoch": 9.895697522816167, - "grad_norm": 1.4635276794433594, - "learning_rate": 8.143819095477387e-05, - "loss": 5.7972, - "step": 18975 - }, - { - "epoch": 9.896219035202087, - "grad_norm": 1.3380005359649658, - "learning_rate": 8.143718592964824e-05, - "loss": 5.2589, - "step": 18976 - }, - { - "epoch": 9.896740547588005, - "grad_norm": 1.4112027883529663, - "learning_rate": 8.143618090452261e-05, - "loss": 5.7545, - "step": 18977 - }, - { - "epoch": 9.897262059973924, - "grad_norm": 1.5215460062026978, - "learning_rate": 8.143517587939698e-05, - "loss": 4.6125, - "step": 18978 - }, - { - "epoch": 9.897783572359844, - "grad_norm": 1.330257534980774, - "learning_rate": 8.143417085427136e-05, - "loss": 5.9892, - "step": 18979 - }, - { - "epoch": 9.898305084745763, - "grad_norm": 1.8168784379959106, - "learning_rate": 8.143316582914573e-05, - "loss": 5.2948, - "step": 18980 - }, - { - "epoch": 9.898826597131682, - "grad_norm": 1.2780791521072388, - "learning_rate": 8.143216080402011e-05, - "loss": 5.8721, - "step": 18981 - }, - { - "epoch": 9.8993481095176, - "grad_norm": 1.421470046043396, - "learning_rate": 8.143115577889448e-05, - "loss": 5.2903, - "step": 18982 - }, - { - "epoch": 9.89986962190352, - "grad_norm": 1.4439671039581299, - "learning_rate": 8.143015075376885e-05, - "loss": 5.499, - "step": 18983 - }, - { - "epoch": 9.900391134289439, - "grad_norm": 1.71378755569458, - "learning_rate": 8.142914572864322e-05, - "loss": 5.0259, - "step": 18984 - }, - { - "epoch": 9.900912646675359, - "grad_norm": 1.4430296421051025, - "learning_rate": 8.14281407035176e-05, - "loss": 5.4741, - "step": 18985 - }, - { - "epoch": 9.901434159061278, - "grad_norm": 1.4124850034713745, - "learning_rate": 8.142713567839196e-05, - "loss": 5.6969, - "step": 18986 - }, - { - "epoch": 9.901955671447197, - "grad_norm": 1.395836353302002, - "learning_rate": 8.142613065326633e-05, - "loss": 5.4566, - "step": 18987 - }, - { - "epoch": 9.902477183833117, - "grad_norm": 1.2927149534225464, - "learning_rate": 8.14251256281407e-05, - "loss": 5.8466, - "step": 18988 - }, - { - "epoch": 9.902998696219035, - "grad_norm": 1.9621624946594238, - "learning_rate": 8.142412060301508e-05, - "loss": 5.177, - "step": 18989 - }, - { - "epoch": 9.903520208604954, - "grad_norm": 1.398086667060852, - "learning_rate": 8.142311557788946e-05, - "loss": 5.4037, - "step": 18990 - }, - { - "epoch": 9.904041720990874, - "grad_norm": 1.6141467094421387, - "learning_rate": 8.142211055276382e-05, - "loss": 5.251, - "step": 18991 - }, - { - "epoch": 9.904563233376793, - "grad_norm": 1.3788906335830688, - "learning_rate": 8.14211055276382e-05, - "loss": 5.407, - "step": 18992 - }, - { - "epoch": 9.905084745762712, - "grad_norm": 1.5052052736282349, - "learning_rate": 8.142010050251257e-05, - "loss": 5.4253, - "step": 18993 - }, - { - "epoch": 9.90560625814863, - "grad_norm": 1.4209084510803223, - "learning_rate": 8.141909547738694e-05, - "loss": 5.3344, - "step": 18994 - }, - { - "epoch": 9.90612777053455, - "grad_norm": 1.3242473602294922, - "learning_rate": 8.141809045226131e-05, - "loss": 5.9352, - "step": 18995 - }, - { - "epoch": 9.906649282920469, - "grad_norm": 1.426491141319275, - "learning_rate": 8.141708542713569e-05, - "loss": 5.5666, - "step": 18996 - }, - { - "epoch": 9.907170795306389, - "grad_norm": 1.3522502183914185, - "learning_rate": 8.141608040201005e-05, - "loss": 5.601, - "step": 18997 - }, - { - "epoch": 9.907692307692308, - "grad_norm": 1.4910187721252441, - "learning_rate": 8.141507537688443e-05, - "loss": 5.2339, - "step": 18998 - }, - { - "epoch": 9.908213820078227, - "grad_norm": 1.5837610960006714, - "learning_rate": 8.141407035175879e-05, - "loss": 4.8941, - "step": 18999 - }, - { - "epoch": 9.908735332464147, - "grad_norm": 1.4559060335159302, - "learning_rate": 8.141306532663317e-05, - "loss": 5.6368, - "step": 19000 - }, - { - "epoch": 9.909256844850065, - "grad_norm": 1.4467158317565918, - "learning_rate": 8.141206030150755e-05, - "loss": 5.6214, - "step": 19001 - }, - { - "epoch": 9.909778357235984, - "grad_norm": 1.4220423698425293, - "learning_rate": 8.141105527638191e-05, - "loss": 5.2892, - "step": 19002 - }, - { - "epoch": 9.910299869621904, - "grad_norm": 1.4295992851257324, - "learning_rate": 8.141005025125629e-05, - "loss": 5.2701, - "step": 19003 - }, - { - "epoch": 9.910821382007823, - "grad_norm": 1.4433839321136475, - "learning_rate": 8.140904522613065e-05, - "loss": 5.5522, - "step": 19004 - }, - { - "epoch": 9.911342894393742, - "grad_norm": 1.5417948961257935, - "learning_rate": 8.140804020100503e-05, - "loss": 5.5151, - "step": 19005 - }, - { - "epoch": 9.91186440677966, - "grad_norm": 1.5292103290557861, - "learning_rate": 8.14070351758794e-05, - "loss": 5.0369, - "step": 19006 - }, - { - "epoch": 9.91238591916558, - "grad_norm": 1.4945679903030396, - "learning_rate": 8.140603015075377e-05, - "loss": 5.1697, - "step": 19007 - }, - { - "epoch": 9.9129074315515, - "grad_norm": 1.4211195707321167, - "learning_rate": 8.140502512562814e-05, - "loss": 5.594, - "step": 19008 - }, - { - "epoch": 9.913428943937419, - "grad_norm": 1.366811752319336, - "learning_rate": 8.140402010050252e-05, - "loss": 5.4322, - "step": 19009 - }, - { - "epoch": 9.913950456323338, - "grad_norm": 1.3830369710922241, - "learning_rate": 8.14030150753769e-05, - "loss": 5.9532, - "step": 19010 - }, - { - "epoch": 9.914471968709258, - "grad_norm": 1.4110532999038696, - "learning_rate": 8.140201005025127e-05, - "loss": 5.7933, - "step": 19011 - }, - { - "epoch": 9.914993481095175, - "grad_norm": 1.4042266607284546, - "learning_rate": 8.140100502512564e-05, - "loss": 5.5369, - "step": 19012 - }, - { - "epoch": 9.915514993481095, - "grad_norm": 1.3875893354415894, - "learning_rate": 8.14e-05, - "loss": 5.4546, - "step": 19013 - }, - { - "epoch": 9.916036505867014, - "grad_norm": 1.5292747020721436, - "learning_rate": 8.139899497487438e-05, - "loss": 4.7787, - "step": 19014 - }, - { - "epoch": 9.916558018252934, - "grad_norm": 1.4562453031539917, - "learning_rate": 8.139798994974874e-05, - "loss": 5.776, - "step": 19015 - }, - { - "epoch": 9.917079530638853, - "grad_norm": 1.4821418523788452, - "learning_rate": 8.139698492462312e-05, - "loss": 4.6333, - "step": 19016 - }, - { - "epoch": 9.917601043024773, - "grad_norm": 1.5525473356246948, - "learning_rate": 8.139597989949748e-05, - "loss": 5.5162, - "step": 19017 - }, - { - "epoch": 9.91812255541069, - "grad_norm": 1.346979022026062, - "learning_rate": 8.139497487437186e-05, - "loss": 5.6803, - "step": 19018 - }, - { - "epoch": 9.91864406779661, - "grad_norm": 1.3840863704681396, - "learning_rate": 8.139396984924623e-05, - "loss": 5.4334, - "step": 19019 - }, - { - "epoch": 9.91916558018253, - "grad_norm": 1.428637981414795, - "learning_rate": 8.13929648241206e-05, - "loss": 5.841, - "step": 19020 - }, - { - "epoch": 9.919687092568449, - "grad_norm": 1.471103310585022, - "learning_rate": 8.139195979899498e-05, - "loss": 5.5944, - "step": 19021 - }, - { - "epoch": 9.920208604954368, - "grad_norm": 1.3140798807144165, - "learning_rate": 8.139095477386936e-05, - "loss": 5.7029, - "step": 19022 - }, - { - "epoch": 9.920730117340288, - "grad_norm": 1.289729118347168, - "learning_rate": 8.138994974874372e-05, - "loss": 5.7942, - "step": 19023 - }, - { - "epoch": 9.921251629726205, - "grad_norm": 1.3748635053634644, - "learning_rate": 8.13889447236181e-05, - "loss": 5.6691, - "step": 19024 - }, - { - "epoch": 9.921773142112125, - "grad_norm": 1.4134958982467651, - "learning_rate": 8.138793969849247e-05, - "loss": 5.6204, - "step": 19025 - }, - { - "epoch": 9.922294654498044, - "grad_norm": 1.3892827033996582, - "learning_rate": 8.138693467336683e-05, - "loss": 5.6974, - "step": 19026 - }, - { - "epoch": 9.922816166883964, - "grad_norm": 1.4403830766677856, - "learning_rate": 8.138592964824121e-05, - "loss": 5.5608, - "step": 19027 - }, - { - "epoch": 9.923337679269883, - "grad_norm": 1.4532029628753662, - "learning_rate": 8.138492462311557e-05, - "loss": 5.6023, - "step": 19028 - }, - { - "epoch": 9.923859191655803, - "grad_norm": 1.4322328567504883, - "learning_rate": 8.138391959798995e-05, - "loss": 5.3622, - "step": 19029 - }, - { - "epoch": 9.92438070404172, - "grad_norm": 1.4148895740509033, - "learning_rate": 8.138291457286433e-05, - "loss": 5.5552, - "step": 19030 - }, - { - "epoch": 9.92490221642764, - "grad_norm": 1.4331471920013428, - "learning_rate": 8.13819095477387e-05, - "loss": 5.4715, - "step": 19031 - }, - { - "epoch": 9.92542372881356, - "grad_norm": 1.46225905418396, - "learning_rate": 8.138090452261307e-05, - "loss": 5.6782, - "step": 19032 - }, - { - "epoch": 9.925945241199479, - "grad_norm": 1.5385360717773438, - "learning_rate": 8.137989949748745e-05, - "loss": 5.6772, - "step": 19033 - }, - { - "epoch": 9.926466753585398, - "grad_norm": 1.5795198678970337, - "learning_rate": 8.137889447236181e-05, - "loss": 5.539, - "step": 19034 - }, - { - "epoch": 9.926988265971318, - "grad_norm": 1.3662545680999756, - "learning_rate": 8.137788944723619e-05, - "loss": 5.8319, - "step": 19035 - }, - { - "epoch": 9.927509778357235, - "grad_norm": 1.510064959526062, - "learning_rate": 8.137688442211055e-05, - "loss": 5.4328, - "step": 19036 - }, - { - "epoch": 9.928031290743155, - "grad_norm": 1.371141791343689, - "learning_rate": 8.137587939698493e-05, - "loss": 5.6232, - "step": 19037 - }, - { - "epoch": 9.928552803129074, - "grad_norm": 1.355199933052063, - "learning_rate": 8.13748743718593e-05, - "loss": 5.6193, - "step": 19038 - }, - { - "epoch": 9.929074315514994, - "grad_norm": 1.5269793272018433, - "learning_rate": 8.137386934673367e-05, - "loss": 5.0624, - "step": 19039 - }, - { - "epoch": 9.929595827900913, - "grad_norm": 1.4727648496627808, - "learning_rate": 8.137286432160804e-05, - "loss": 5.772, - "step": 19040 - }, - { - "epoch": 9.930117340286833, - "grad_norm": 1.4341942071914673, - "learning_rate": 8.137185929648242e-05, - "loss": 4.9072, - "step": 19041 - }, - { - "epoch": 9.93063885267275, - "grad_norm": 1.525680661201477, - "learning_rate": 8.13708542713568e-05, - "loss": 5.2146, - "step": 19042 - }, - { - "epoch": 9.93116036505867, - "grad_norm": 1.4286155700683594, - "learning_rate": 8.136984924623116e-05, - "loss": 5.2272, - "step": 19043 - }, - { - "epoch": 9.93168187744459, - "grad_norm": 1.4472020864486694, - "learning_rate": 8.136884422110554e-05, - "loss": 5.0355, - "step": 19044 - }, - { - "epoch": 9.932203389830509, - "grad_norm": 1.394162654876709, - "learning_rate": 8.13678391959799e-05, - "loss": 5.3629, - "step": 19045 - }, - { - "epoch": 9.932724902216428, - "grad_norm": 1.4716174602508545, - "learning_rate": 8.136683417085428e-05, - "loss": 5.4329, - "step": 19046 - }, - { - "epoch": 9.933246414602348, - "grad_norm": 1.4157112836837769, - "learning_rate": 8.136582914572864e-05, - "loss": 5.531, - "step": 19047 - }, - { - "epoch": 9.933767926988265, - "grad_norm": 1.4641704559326172, - "learning_rate": 8.136482412060302e-05, - "loss": 5.4517, - "step": 19048 - }, - { - "epoch": 9.934289439374185, - "grad_norm": 1.509461760520935, - "learning_rate": 8.136381909547738e-05, - "loss": 5.3161, - "step": 19049 - }, - { - "epoch": 9.934810951760104, - "grad_norm": 1.3616735935211182, - "learning_rate": 8.136281407035176e-05, - "loss": 5.8504, - "step": 19050 - }, - { - "epoch": 9.935332464146024, - "grad_norm": 1.3911982774734497, - "learning_rate": 8.136180904522614e-05, - "loss": 5.4846, - "step": 19051 - }, - { - "epoch": 9.935853976531943, - "grad_norm": 1.4084901809692383, - "learning_rate": 8.136080402010052e-05, - "loss": 5.7379, - "step": 19052 - }, - { - "epoch": 9.936375488917863, - "grad_norm": 1.4128212928771973, - "learning_rate": 8.135979899497488e-05, - "loss": 5.6189, - "step": 19053 - }, - { - "epoch": 9.93689700130378, - "grad_norm": 1.5238960981369019, - "learning_rate": 8.135879396984925e-05, - "loss": 5.1966, - "step": 19054 - }, - { - "epoch": 9.9374185136897, - "grad_norm": 1.4262605905532837, - "learning_rate": 8.135778894472362e-05, - "loss": 5.2772, - "step": 19055 - }, - { - "epoch": 9.93794002607562, - "grad_norm": 1.5504429340362549, - "learning_rate": 8.135678391959799e-05, - "loss": 5.4162, - "step": 19056 - }, - { - "epoch": 9.938461538461539, - "grad_norm": 1.5035852193832397, - "learning_rate": 8.135577889447237e-05, - "loss": 5.5948, - "step": 19057 - }, - { - "epoch": 9.938983050847458, - "grad_norm": 1.4751832485198975, - "learning_rate": 8.135477386934673e-05, - "loss": 5.3762, - "step": 19058 - }, - { - "epoch": 9.939504563233378, - "grad_norm": 1.5473871231079102, - "learning_rate": 8.135376884422111e-05, - "loss": 5.3947, - "step": 19059 - }, - { - "epoch": 9.940026075619295, - "grad_norm": 1.7073365449905396, - "learning_rate": 8.135276381909547e-05, - "loss": 5.3955, - "step": 19060 - }, - { - "epoch": 9.940547588005215, - "grad_norm": 1.4953656196594238, - "learning_rate": 8.135175879396985e-05, - "loss": 5.5613, - "step": 19061 - }, - { - "epoch": 9.941069100391134, - "grad_norm": 1.52162766456604, - "learning_rate": 8.135075376884423e-05, - "loss": 5.4654, - "step": 19062 - }, - { - "epoch": 9.941590612777054, - "grad_norm": 1.4691964387893677, - "learning_rate": 8.13497487437186e-05, - "loss": 5.523, - "step": 19063 - }, - { - "epoch": 9.942112125162973, - "grad_norm": 1.3093374967575073, - "learning_rate": 8.134874371859297e-05, - "loss": 5.8557, - "step": 19064 - }, - { - "epoch": 9.94263363754889, - "grad_norm": 1.3412202596664429, - "learning_rate": 8.134773869346735e-05, - "loss": 5.6495, - "step": 19065 - }, - { - "epoch": 9.94315514993481, - "grad_norm": 1.3825339078903198, - "learning_rate": 8.134673366834171e-05, - "loss": 5.512, - "step": 19066 - }, - { - "epoch": 9.94367666232073, - "grad_norm": 1.4201782941818237, - "learning_rate": 8.134572864321608e-05, - "loss": 4.9418, - "step": 19067 - }, - { - "epoch": 9.94419817470665, - "grad_norm": 1.3730721473693848, - "learning_rate": 8.134472361809046e-05, - "loss": 5.4788, - "step": 19068 - }, - { - "epoch": 9.944719687092569, - "grad_norm": 1.4039807319641113, - "learning_rate": 8.134371859296482e-05, - "loss": 5.7617, - "step": 19069 - }, - { - "epoch": 9.945241199478488, - "grad_norm": 1.56761634349823, - "learning_rate": 8.13427135678392e-05, - "loss": 5.2011, - "step": 19070 - }, - { - "epoch": 9.945762711864408, - "grad_norm": 1.5166455507278442, - "learning_rate": 8.134170854271356e-05, - "loss": 5.8872, - "step": 19071 - }, - { - "epoch": 9.946284224250325, - "grad_norm": 1.5359549522399902, - "learning_rate": 8.134070351758794e-05, - "loss": 5.1756, - "step": 19072 - }, - { - "epoch": 9.946805736636245, - "grad_norm": 1.5010124444961548, - "learning_rate": 8.133969849246232e-05, - "loss": 5.6014, - "step": 19073 - }, - { - "epoch": 9.947327249022164, - "grad_norm": 1.5179216861724854, - "learning_rate": 8.13386934673367e-05, - "loss": 4.9029, - "step": 19074 - }, - { - "epoch": 9.947848761408084, - "grad_norm": 1.416573166847229, - "learning_rate": 8.133768844221106e-05, - "loss": 5.483, - "step": 19075 - }, - { - "epoch": 9.948370273794003, - "grad_norm": 1.463661789894104, - "learning_rate": 8.133668341708544e-05, - "loss": 5.3683, - "step": 19076 - }, - { - "epoch": 9.94889178617992, - "grad_norm": 1.4426389932632446, - "learning_rate": 8.13356783919598e-05, - "loss": 5.0996, - "step": 19077 - }, - { - "epoch": 9.94941329856584, - "grad_norm": 1.3805052042007446, - "learning_rate": 8.133467336683418e-05, - "loss": 5.6084, - "step": 19078 - }, - { - "epoch": 9.94993481095176, - "grad_norm": 1.3703936338424683, - "learning_rate": 8.133366834170854e-05, - "loss": 5.4708, - "step": 19079 - }, - { - "epoch": 9.95045632333768, - "grad_norm": 1.4453959465026855, - "learning_rate": 8.133266331658291e-05, - "loss": 5.5801, - "step": 19080 - }, - { - "epoch": 9.950977835723599, - "grad_norm": 1.4377617835998535, - "learning_rate": 8.133165829145729e-05, - "loss": 5.4107, - "step": 19081 - }, - { - "epoch": 9.951499348109518, - "grad_norm": 1.5398176908493042, - "learning_rate": 8.133065326633166e-05, - "loss": 5.36, - "step": 19082 - }, - { - "epoch": 9.952020860495438, - "grad_norm": 1.3322324752807617, - "learning_rate": 8.132964824120604e-05, - "loss": 5.8526, - "step": 19083 - }, - { - "epoch": 9.952542372881355, - "grad_norm": 1.4438626766204834, - "learning_rate": 8.13286432160804e-05, - "loss": 5.248, - "step": 19084 - }, - { - "epoch": 9.953063885267275, - "grad_norm": 1.5246882438659668, - "learning_rate": 8.132763819095478e-05, - "loss": 5.4507, - "step": 19085 - }, - { - "epoch": 9.953585397653194, - "grad_norm": 1.5473477840423584, - "learning_rate": 8.132663316582915e-05, - "loss": 5.057, - "step": 19086 - }, - { - "epoch": 9.954106910039114, - "grad_norm": 1.3806309700012207, - "learning_rate": 8.132562814070353e-05, - "loss": 5.9723, - "step": 19087 - }, - { - "epoch": 9.954628422425033, - "grad_norm": 1.5173453092575073, - "learning_rate": 8.132462311557789e-05, - "loss": 5.0633, - "step": 19088 - }, - { - "epoch": 9.955149934810951, - "grad_norm": 1.3269238471984863, - "learning_rate": 8.132361809045227e-05, - "loss": 5.3715, - "step": 19089 - }, - { - "epoch": 9.95567144719687, - "grad_norm": 1.367398977279663, - "learning_rate": 8.132261306532663e-05, - "loss": 5.333, - "step": 19090 - }, - { - "epoch": 9.95619295958279, - "grad_norm": 1.4719958305358887, - "learning_rate": 8.132160804020101e-05, - "loss": 5.5071, - "step": 19091 - }, - { - "epoch": 9.95671447196871, - "grad_norm": 1.3012548685073853, - "learning_rate": 8.132060301507537e-05, - "loss": 5.599, - "step": 19092 - }, - { - "epoch": 9.957235984354629, - "grad_norm": 1.501981258392334, - "learning_rate": 8.131959798994975e-05, - "loss": 5.3871, - "step": 19093 - }, - { - "epoch": 9.957757496740548, - "grad_norm": 1.3773146867752075, - "learning_rate": 8.131859296482413e-05, - "loss": 5.3722, - "step": 19094 - }, - { - "epoch": 9.958279009126468, - "grad_norm": 1.4484068155288696, - "learning_rate": 8.13175879396985e-05, - "loss": 5.5825, - "step": 19095 - }, - { - "epoch": 9.958800521512385, - "grad_norm": 1.4462921619415283, - "learning_rate": 8.131658291457287e-05, - "loss": 5.8939, - "step": 19096 - }, - { - "epoch": 9.959322033898305, - "grad_norm": 1.4065076112747192, - "learning_rate": 8.131557788944724e-05, - "loss": 5.4214, - "step": 19097 - }, - { - "epoch": 9.959843546284224, - "grad_norm": 1.4755072593688965, - "learning_rate": 8.131457286432161e-05, - "loss": 5.4905, - "step": 19098 - }, - { - "epoch": 9.960365058670144, - "grad_norm": 1.3575702905654907, - "learning_rate": 8.131356783919598e-05, - "loss": 5.7864, - "step": 19099 - }, - { - "epoch": 9.960886571056063, - "grad_norm": 1.2982832193374634, - "learning_rate": 8.131256281407036e-05, - "loss": 5.281, - "step": 19100 - }, - { - "epoch": 9.961408083441981, - "grad_norm": 1.420318603515625, - "learning_rate": 8.131155778894472e-05, - "loss": 5.4519, - "step": 19101 - }, - { - "epoch": 9.9619295958279, - "grad_norm": 1.3651236295700073, - "learning_rate": 8.13105527638191e-05, - "loss": 5.6365, - "step": 19102 - }, - { - "epoch": 9.96245110821382, - "grad_norm": 1.3081884384155273, - "learning_rate": 8.130954773869348e-05, - "loss": 5.8474, - "step": 19103 - }, - { - "epoch": 9.96297262059974, - "grad_norm": 1.3793697357177734, - "learning_rate": 8.130854271356785e-05, - "loss": 5.4978, - "step": 19104 - }, - { - "epoch": 9.963494132985659, - "grad_norm": 1.3946070671081543, - "learning_rate": 8.130753768844222e-05, - "loss": 5.3388, - "step": 19105 - }, - { - "epoch": 9.964015645371578, - "grad_norm": 1.403092861175537, - "learning_rate": 8.130653266331658e-05, - "loss": 5.5323, - "step": 19106 - }, - { - "epoch": 9.964537157757496, - "grad_norm": 1.5760133266448975, - "learning_rate": 8.130552763819096e-05, - "loss": 5.6037, - "step": 19107 - }, - { - "epoch": 9.965058670143415, - "grad_norm": 1.443266749382019, - "learning_rate": 8.130452261306532e-05, - "loss": 5.1146, - "step": 19108 - }, - { - "epoch": 9.965580182529335, - "grad_norm": 1.3929591178894043, - "learning_rate": 8.13035175879397e-05, - "loss": 5.5923, - "step": 19109 - }, - { - "epoch": 9.966101694915254, - "grad_norm": 1.5283291339874268, - "learning_rate": 8.130251256281407e-05, - "loss": 5.2434, - "step": 19110 - }, - { - "epoch": 9.966623207301174, - "grad_norm": 1.3588038682937622, - "learning_rate": 8.130150753768844e-05, - "loss": 5.7242, - "step": 19111 - }, - { - "epoch": 9.967144719687093, - "grad_norm": 1.527885913848877, - "learning_rate": 8.130050251256281e-05, - "loss": 5.2507, - "step": 19112 - }, - { - "epoch": 9.967666232073011, - "grad_norm": 1.4081473350524902, - "learning_rate": 8.129949748743719e-05, - "loss": 5.7167, - "step": 19113 - }, - { - "epoch": 9.96818774445893, - "grad_norm": 1.514689326286316, - "learning_rate": 8.129849246231156e-05, - "loss": 5.3345, - "step": 19114 - }, - { - "epoch": 9.96870925684485, - "grad_norm": 1.3903493881225586, - "learning_rate": 8.129748743718594e-05, - "loss": 5.7088, - "step": 19115 - }, - { - "epoch": 9.96923076923077, - "grad_norm": 1.497959852218628, - "learning_rate": 8.12964824120603e-05, - "loss": 5.4127, - "step": 19116 - }, - { - "epoch": 9.969752281616689, - "grad_norm": 1.5525034666061401, - "learning_rate": 8.129547738693468e-05, - "loss": 5.6004, - "step": 19117 - }, - { - "epoch": 9.970273794002608, - "grad_norm": 1.4513038396835327, - "learning_rate": 8.129447236180905e-05, - "loss": 5.7249, - "step": 19118 - }, - { - "epoch": 9.970795306388526, - "grad_norm": 1.483587622642517, - "learning_rate": 8.129346733668343e-05, - "loss": 5.2588, - "step": 19119 - }, - { - "epoch": 9.971316818774445, - "grad_norm": 1.284005045890808, - "learning_rate": 8.129246231155779e-05, - "loss": 5.8445, - "step": 19120 - }, - { - "epoch": 9.971838331160365, - "grad_norm": 1.5742162466049194, - "learning_rate": 8.129145728643215e-05, - "loss": 4.8139, - "step": 19121 - }, - { - "epoch": 9.972359843546284, - "grad_norm": 1.6167311668395996, - "learning_rate": 8.129045226130653e-05, - "loss": 5.5813, - "step": 19122 - }, - { - "epoch": 9.972881355932204, - "grad_norm": 1.394535779953003, - "learning_rate": 8.128944723618091e-05, - "loss": 5.3446, - "step": 19123 - }, - { - "epoch": 9.973402868318123, - "grad_norm": 1.4823797941207886, - "learning_rate": 8.128844221105529e-05, - "loss": 4.4942, - "step": 19124 - }, - { - "epoch": 9.973924380704041, - "grad_norm": 1.344460368156433, - "learning_rate": 8.128743718592965e-05, - "loss": 5.5226, - "step": 19125 - }, - { - "epoch": 9.97444589308996, - "grad_norm": 1.4834285974502563, - "learning_rate": 8.128643216080403e-05, - "loss": 5.4989, - "step": 19126 - }, - { - "epoch": 9.97496740547588, - "grad_norm": 1.3655022382736206, - "learning_rate": 8.12854271356784e-05, - "loss": 5.1271, - "step": 19127 - }, - { - "epoch": 9.9754889178618, - "grad_norm": 1.428627610206604, - "learning_rate": 8.128442211055277e-05, - "loss": 5.2347, - "step": 19128 - }, - { - "epoch": 9.976010430247719, - "grad_norm": 1.4022146463394165, - "learning_rate": 8.128341708542714e-05, - "loss": 5.8174, - "step": 19129 - }, - { - "epoch": 9.976531942633638, - "grad_norm": 1.3465352058410645, - "learning_rate": 8.128241206030151e-05, - "loss": 5.7361, - "step": 19130 - }, - { - "epoch": 9.977053455019556, - "grad_norm": 1.331575870513916, - "learning_rate": 8.128140703517588e-05, - "loss": 5.7451, - "step": 19131 - }, - { - "epoch": 9.977574967405475, - "grad_norm": 1.3546241521835327, - "learning_rate": 8.128040201005026e-05, - "loss": 5.5175, - "step": 19132 - }, - { - "epoch": 9.978096479791395, - "grad_norm": 1.4345918893814087, - "learning_rate": 8.127939698492462e-05, - "loss": 5.3407, - "step": 19133 - }, - { - "epoch": 9.978617992177314, - "grad_norm": 1.356874704360962, - "learning_rate": 8.1278391959799e-05, - "loss": 5.389, - "step": 19134 - }, - { - "epoch": 9.979139504563234, - "grad_norm": 1.4534032344818115, - "learning_rate": 8.127738693467338e-05, - "loss": 5.4185, - "step": 19135 - }, - { - "epoch": 9.979661016949153, - "grad_norm": 1.3718070983886719, - "learning_rate": 8.127638190954774e-05, - "loss": 5.5759, - "step": 19136 - }, - { - "epoch": 9.980182529335071, - "grad_norm": 1.3757226467132568, - "learning_rate": 8.127537688442212e-05, - "loss": 5.3197, - "step": 19137 - }, - { - "epoch": 9.98070404172099, - "grad_norm": 1.4161653518676758, - "learning_rate": 8.127437185929648e-05, - "loss": 5.5569, - "step": 19138 - }, - { - "epoch": 9.98122555410691, - "grad_norm": 1.432983636856079, - "learning_rate": 8.127336683417086e-05, - "loss": 5.288, - "step": 19139 - }, - { - "epoch": 9.98174706649283, - "grad_norm": 1.389546275138855, - "learning_rate": 8.127236180904523e-05, - "loss": 5.4848, - "step": 19140 - }, - { - "epoch": 9.982268578878749, - "grad_norm": 1.4189705848693848, - "learning_rate": 8.12713567839196e-05, - "loss": 5.5295, - "step": 19141 - }, - { - "epoch": 9.982790091264668, - "grad_norm": 1.4098879098892212, - "learning_rate": 8.127035175879397e-05, - "loss": 5.7444, - "step": 19142 - }, - { - "epoch": 9.983311603650586, - "grad_norm": 1.406623363494873, - "learning_rate": 8.126934673366835e-05, - "loss": 5.4983, - "step": 19143 - }, - { - "epoch": 9.983833116036505, - "grad_norm": 1.4388296604156494, - "learning_rate": 8.126834170854272e-05, - "loss": 5.5782, - "step": 19144 - }, - { - "epoch": 9.984354628422425, - "grad_norm": 1.3847720623016357, - "learning_rate": 8.12673366834171e-05, - "loss": 5.5961, - "step": 19145 - }, - { - "epoch": 9.984876140808344, - "grad_norm": 1.3790737390518188, - "learning_rate": 8.126633165829146e-05, - "loss": 5.3601, - "step": 19146 - }, - { - "epoch": 9.985397653194264, - "grad_norm": 1.4118373394012451, - "learning_rate": 8.126532663316583e-05, - "loss": 5.473, - "step": 19147 - }, - { - "epoch": 9.985919165580183, - "grad_norm": 1.3063966035842896, - "learning_rate": 8.126432160804021e-05, - "loss": 5.806, - "step": 19148 - }, - { - "epoch": 9.986440677966101, - "grad_norm": 1.3166313171386719, - "learning_rate": 8.126331658291457e-05, - "loss": 5.4967, - "step": 19149 - }, - { - "epoch": 9.98696219035202, - "grad_norm": 1.5583546161651611, - "learning_rate": 8.126231155778895e-05, - "loss": 5.1759, - "step": 19150 - }, - { - "epoch": 9.98748370273794, - "grad_norm": 1.504077672958374, - "learning_rate": 8.126130653266331e-05, - "loss": 5.6572, - "step": 19151 - }, - { - "epoch": 9.98800521512386, - "grad_norm": 1.3517287969589233, - "learning_rate": 8.126030150753769e-05, - "loss": 5.6065, - "step": 19152 - }, - { - "epoch": 9.988526727509779, - "grad_norm": 1.460100769996643, - "learning_rate": 8.125929648241206e-05, - "loss": 5.6533, - "step": 19153 - }, - { - "epoch": 9.989048239895698, - "grad_norm": 1.320932388305664, - "learning_rate": 8.125829145728643e-05, - "loss": 5.8114, - "step": 19154 - }, - { - "epoch": 9.989569752281616, - "grad_norm": 1.3253446817398071, - "learning_rate": 8.125728643216081e-05, - "loss": 5.4911, - "step": 19155 - }, - { - "epoch": 9.990091264667535, - "grad_norm": 1.4313970804214478, - "learning_rate": 8.125628140703519e-05, - "loss": 5.6992, - "step": 19156 - }, - { - "epoch": 9.990612777053455, - "grad_norm": 1.4552379846572876, - "learning_rate": 8.125527638190955e-05, - "loss": 5.3911, - "step": 19157 - }, - { - "epoch": 9.991134289439374, - "grad_norm": 1.6307376623153687, - "learning_rate": 8.125427135678393e-05, - "loss": 5.1911, - "step": 19158 - }, - { - "epoch": 9.991655801825294, - "grad_norm": 1.400229573249817, - "learning_rate": 8.12532663316583e-05, - "loss": 5.6716, - "step": 19159 - }, - { - "epoch": 9.992177314211212, - "grad_norm": 1.3206816911697388, - "learning_rate": 8.125226130653266e-05, - "loss": 5.5433, - "step": 19160 - }, - { - "epoch": 9.992698826597131, - "grad_norm": 1.6872576475143433, - "learning_rate": 8.125125628140704e-05, - "loss": 5.1651, - "step": 19161 - }, - { - "epoch": 9.99322033898305, - "grad_norm": 1.4041895866394043, - "learning_rate": 8.12502512562814e-05, - "loss": 5.4849, - "step": 19162 - }, - { - "epoch": 9.99374185136897, - "grad_norm": 1.4195914268493652, - "learning_rate": 8.124924623115578e-05, - "loss": 5.4219, - "step": 19163 - }, - { - "epoch": 9.99426336375489, - "grad_norm": 1.5154247283935547, - "learning_rate": 8.124824120603016e-05, - "loss": 5.6025, - "step": 19164 - }, - { - "epoch": 9.994784876140809, - "grad_norm": 1.4071340560913086, - "learning_rate": 8.124723618090454e-05, - "loss": 4.9277, - "step": 19165 - }, - { - "epoch": 9.995306388526728, - "grad_norm": 1.4853787422180176, - "learning_rate": 8.12462311557789e-05, - "loss": 5.3997, - "step": 19166 - }, - { - "epoch": 9.995827900912646, - "grad_norm": 1.794089674949646, - "learning_rate": 8.124522613065328e-05, - "loss": 4.8875, - "step": 19167 - }, - { - "epoch": 9.996349413298566, - "grad_norm": 1.427100658416748, - "learning_rate": 8.124422110552764e-05, - "loss": 5.7298, - "step": 19168 - }, - { - "epoch": 9.996870925684485, - "grad_norm": 1.4734169244766235, - "learning_rate": 8.124321608040202e-05, - "loss": 5.3671, - "step": 19169 - }, - { - "epoch": 9.997392438070404, - "grad_norm": 1.436055302619934, - "learning_rate": 8.124221105527638e-05, - "loss": 5.5048, - "step": 19170 - }, - { - "epoch": 9.997913950456324, - "grad_norm": 1.3756427764892578, - "learning_rate": 8.124120603015076e-05, - "loss": 5.7793, - "step": 19171 - }, - { - "epoch": 9.998435462842242, - "grad_norm": 1.5057133436203003, - "learning_rate": 8.124020100502513e-05, - "loss": 5.406, - "step": 19172 - }, - { - "epoch": 9.998956975228161, - "grad_norm": 1.444242000579834, - "learning_rate": 8.123919597989949e-05, - "loss": 5.2039, - "step": 19173 - }, - { - "epoch": 9.99947848761408, - "grad_norm": 1.3665419816970825, - "learning_rate": 8.123819095477387e-05, - "loss": 5.6518, - "step": 19174 - }, - { - "epoch": 10.0, - "grad_norm": 1.4895446300506592, - "learning_rate": 8.123718592964825e-05, - "loss": 5.7852, - "step": 19175 - }, - { - "epoch": 10.00052151238592, - "grad_norm": 1.3535881042480469, - "learning_rate": 8.123618090452262e-05, - "loss": 5.4831, - "step": 19176 - }, - { - "epoch": 10.001043024771839, - "grad_norm": 1.5101163387298584, - "learning_rate": 8.123517587939699e-05, - "loss": 4.9482, - "step": 19177 - }, - { - "epoch": 10.001564537157757, - "grad_norm": 1.3609122037887573, - "learning_rate": 8.123417085427137e-05, - "loss": 5.4377, - "step": 19178 - }, - { - "epoch": 10.002086049543676, - "grad_norm": 1.4885094165802002, - "learning_rate": 8.123316582914573e-05, - "loss": 4.9194, - "step": 19179 - }, - { - "epoch": 10.002607561929596, - "grad_norm": 1.359678864479065, - "learning_rate": 8.123216080402011e-05, - "loss": 5.2817, - "step": 19180 - }, - { - "epoch": 10.003129074315515, - "grad_norm": 1.4171149730682373, - "learning_rate": 8.123115577889447e-05, - "loss": 5.2793, - "step": 19181 - }, - { - "epoch": 10.003650586701434, - "grad_norm": 1.406046748161316, - "learning_rate": 8.123015075376885e-05, - "loss": 5.5008, - "step": 19182 - }, - { - "epoch": 10.004172099087354, - "grad_norm": 1.5066537857055664, - "learning_rate": 8.122914572864321e-05, - "loss": 5.5782, - "step": 19183 - }, - { - "epoch": 10.004693611473272, - "grad_norm": 1.3261677026748657, - "learning_rate": 8.122814070351759e-05, - "loss": 5.7969, - "step": 19184 - }, - { - "epoch": 10.005215123859191, - "grad_norm": 1.5266515016555786, - "learning_rate": 8.122713567839197e-05, - "loss": 5.0743, - "step": 19185 - }, - { - "epoch": 10.00573663624511, - "grad_norm": 1.331714391708374, - "learning_rate": 8.122613065326633e-05, - "loss": 5.8745, - "step": 19186 - }, - { - "epoch": 10.00625814863103, - "grad_norm": 1.3899743556976318, - "learning_rate": 8.122512562814071e-05, - "loss": 5.7709, - "step": 19187 - }, - { - "epoch": 10.00677966101695, - "grad_norm": 1.5344220399856567, - "learning_rate": 8.122412060301508e-05, - "loss": 5.0891, - "step": 19188 - }, - { - "epoch": 10.007301173402869, - "grad_norm": 1.4051916599273682, - "learning_rate": 8.122311557788945e-05, - "loss": 5.7676, - "step": 19189 - }, - { - "epoch": 10.007822685788787, - "grad_norm": 1.4859658479690552, - "learning_rate": 8.122211055276382e-05, - "loss": 5.1602, - "step": 19190 - }, - { - "epoch": 10.008344198174706, - "grad_norm": 1.427297592163086, - "learning_rate": 8.12211055276382e-05, - "loss": 5.5231, - "step": 19191 - }, - { - "epoch": 10.008865710560626, - "grad_norm": 1.5073884725570679, - "learning_rate": 8.122010050251256e-05, - "loss": 5.5955, - "step": 19192 - }, - { - "epoch": 10.009387222946545, - "grad_norm": 1.3651376962661743, - "learning_rate": 8.121909547738694e-05, - "loss": 5.4368, - "step": 19193 - }, - { - "epoch": 10.009908735332465, - "grad_norm": 1.4432231187820435, - "learning_rate": 8.12180904522613e-05, - "loss": 5.4201, - "step": 19194 - }, - { - "epoch": 10.010430247718384, - "grad_norm": 1.496381163597107, - "learning_rate": 8.121708542713568e-05, - "loss": 5.1609, - "step": 19195 - }, - { - "epoch": 10.010951760104302, - "grad_norm": 1.4189872741699219, - "learning_rate": 8.121608040201006e-05, - "loss": 5.6764, - "step": 19196 - }, - { - "epoch": 10.011473272490221, - "grad_norm": 1.487499713897705, - "learning_rate": 8.121507537688444e-05, - "loss": 5.4268, - "step": 19197 - }, - { - "epoch": 10.01199478487614, - "grad_norm": 1.4959497451782227, - "learning_rate": 8.12140703517588e-05, - "loss": 5.6745, - "step": 19198 - }, - { - "epoch": 10.01251629726206, - "grad_norm": 1.5172396898269653, - "learning_rate": 8.121306532663316e-05, - "loss": 5.4773, - "step": 19199 - }, - { - "epoch": 10.01303780964798, - "grad_norm": 1.5348068475723267, - "learning_rate": 8.121206030150754e-05, - "loss": 5.5291, - "step": 19200 - }, - { - "epoch": 10.013559322033899, - "grad_norm": 1.4773908853530884, - "learning_rate": 8.12110552763819e-05, - "loss": 5.3614, - "step": 19201 - }, - { - "epoch": 10.014080834419817, - "grad_norm": 1.494300365447998, - "learning_rate": 8.121005025125628e-05, - "loss": 5.3198, - "step": 19202 - }, - { - "epoch": 10.014602346805736, - "grad_norm": 1.4780830144882202, - "learning_rate": 8.120904522613065e-05, - "loss": 5.5639, - "step": 19203 - }, - { - "epoch": 10.015123859191656, - "grad_norm": 1.4712142944335938, - "learning_rate": 8.120804020100503e-05, - "loss": 5.4654, - "step": 19204 - }, - { - "epoch": 10.015645371577575, - "grad_norm": 1.4856996536254883, - "learning_rate": 8.12070351758794e-05, - "loss": 4.9498, - "step": 19205 - }, - { - "epoch": 10.016166883963495, - "grad_norm": 1.514612078666687, - "learning_rate": 8.120603015075378e-05, - "loss": 5.2722, - "step": 19206 - }, - { - "epoch": 10.016688396349414, - "grad_norm": 1.4236782789230347, - "learning_rate": 8.120502512562815e-05, - "loss": 5.5055, - "step": 19207 - }, - { - "epoch": 10.017209908735332, - "grad_norm": 1.4451059103012085, - "learning_rate": 8.120402010050252e-05, - "loss": 5.618, - "step": 19208 - }, - { - "epoch": 10.017731421121251, - "grad_norm": 1.487575650215149, - "learning_rate": 8.120301507537689e-05, - "loss": 5.589, - "step": 19209 - }, - { - "epoch": 10.01825293350717, - "grad_norm": 1.370160460472107, - "learning_rate": 8.120201005025127e-05, - "loss": 5.2558, - "step": 19210 - }, - { - "epoch": 10.01877444589309, - "grad_norm": 1.4894109964370728, - "learning_rate": 8.120100502512563e-05, - "loss": 4.8025, - "step": 19211 - }, - { - "epoch": 10.01929595827901, - "grad_norm": 1.4270145893096924, - "learning_rate": 8.120000000000001e-05, - "loss": 5.447, - "step": 19212 - }, - { - "epoch": 10.019817470664929, - "grad_norm": 1.5027819871902466, - "learning_rate": 8.119899497487437e-05, - "loss": 5.3873, - "step": 19213 - }, - { - "epoch": 10.020338983050847, - "grad_norm": 1.4780253171920776, - "learning_rate": 8.119798994974874e-05, - "loss": 5.4928, - "step": 19214 - }, - { - "epoch": 10.020860495436766, - "grad_norm": 1.502392292022705, - "learning_rate": 8.119698492462311e-05, - "loss": 5.5449, - "step": 19215 - }, - { - "epoch": 10.021382007822686, - "grad_norm": 1.3431240320205688, - "learning_rate": 8.119597989949749e-05, - "loss": 5.7271, - "step": 19216 - }, - { - "epoch": 10.021903520208605, - "grad_norm": 1.4712812900543213, - "learning_rate": 8.119497487437187e-05, - "loss": 5.5106, - "step": 19217 - }, - { - "epoch": 10.022425032594525, - "grad_norm": 1.4397385120391846, - "learning_rate": 8.119396984924623e-05, - "loss": 5.4591, - "step": 19218 - }, - { - "epoch": 10.022946544980444, - "grad_norm": 1.3612157106399536, - "learning_rate": 8.119296482412061e-05, - "loss": 5.6743, - "step": 19219 - }, - { - "epoch": 10.023468057366362, - "grad_norm": 1.4182382822036743, - "learning_rate": 8.119195979899498e-05, - "loss": 5.3529, - "step": 19220 - }, - { - "epoch": 10.023989569752281, - "grad_norm": 1.4806327819824219, - "learning_rate": 8.119095477386935e-05, - "loss": 5.5076, - "step": 19221 - }, - { - "epoch": 10.0245110821382, - "grad_norm": 1.3532460927963257, - "learning_rate": 8.118994974874372e-05, - "loss": 5.7604, - "step": 19222 - }, - { - "epoch": 10.02503259452412, - "grad_norm": 1.7349433898925781, - "learning_rate": 8.11889447236181e-05, - "loss": 5.2048, - "step": 19223 - }, - { - "epoch": 10.02555410691004, - "grad_norm": 1.383598804473877, - "learning_rate": 8.118793969849246e-05, - "loss": 5.8589, - "step": 19224 - }, - { - "epoch": 10.026075619295959, - "grad_norm": 1.5332878828048706, - "learning_rate": 8.118693467336684e-05, - "loss": 5.2024, - "step": 19225 - }, - { - "epoch": 10.026597131681877, - "grad_norm": 1.41984224319458, - "learning_rate": 8.118592964824122e-05, - "loss": 5.4899, - "step": 19226 - }, - { - "epoch": 10.027118644067796, - "grad_norm": 1.4189543724060059, - "learning_rate": 8.118492462311558e-05, - "loss": 5.6564, - "step": 19227 - }, - { - "epoch": 10.027640156453716, - "grad_norm": 1.540278434753418, - "learning_rate": 8.118391959798996e-05, - "loss": 5.0673, - "step": 19228 - }, - { - "epoch": 10.028161668839635, - "grad_norm": 1.5051237344741821, - "learning_rate": 8.118291457286432e-05, - "loss": 5.47, - "step": 19229 - }, - { - "epoch": 10.028683181225555, - "grad_norm": 1.3818399906158447, - "learning_rate": 8.11819095477387e-05, - "loss": 5.5001, - "step": 19230 - }, - { - "epoch": 10.029204693611474, - "grad_norm": 1.543975591659546, - "learning_rate": 8.118090452261307e-05, - "loss": 5.258, - "step": 19231 - }, - { - "epoch": 10.029726205997392, - "grad_norm": 1.5582503080368042, - "learning_rate": 8.117989949748744e-05, - "loss": 5.428, - "step": 19232 - }, - { - "epoch": 10.030247718383311, - "grad_norm": 1.5041838884353638, - "learning_rate": 8.117889447236181e-05, - "loss": 4.9071, - "step": 19233 - }, - { - "epoch": 10.03076923076923, - "grad_norm": 1.5186221599578857, - "learning_rate": 8.117788944723619e-05, - "loss": 5.512, - "step": 19234 - }, - { - "epoch": 10.03129074315515, - "grad_norm": 1.4308427572250366, - "learning_rate": 8.117688442211055e-05, - "loss": 5.3441, - "step": 19235 - }, - { - "epoch": 10.03181225554107, - "grad_norm": 1.4917926788330078, - "learning_rate": 8.117587939698493e-05, - "loss": 4.9616, - "step": 19236 - }, - { - "epoch": 10.032333767926989, - "grad_norm": 1.531262993812561, - "learning_rate": 8.11748743718593e-05, - "loss": 4.8622, - "step": 19237 - }, - { - "epoch": 10.032855280312907, - "grad_norm": 1.365200400352478, - "learning_rate": 8.117386934673368e-05, - "loss": 5.8046, - "step": 19238 - }, - { - "epoch": 10.033376792698826, - "grad_norm": 1.3833461999893188, - "learning_rate": 8.117286432160805e-05, - "loss": 5.3911, - "step": 19239 - }, - { - "epoch": 10.033898305084746, - "grad_norm": 1.380707025527954, - "learning_rate": 8.117185929648241e-05, - "loss": 5.5584, - "step": 19240 - }, - { - "epoch": 10.034419817470665, - "grad_norm": 1.441726565361023, - "learning_rate": 8.117085427135679e-05, - "loss": 5.5312, - "step": 19241 - }, - { - "epoch": 10.034941329856585, - "grad_norm": 1.6169283390045166, - "learning_rate": 8.116984924623115e-05, - "loss": 5.0093, - "step": 19242 - }, - { - "epoch": 10.035462842242504, - "grad_norm": 1.451399803161621, - "learning_rate": 8.116884422110553e-05, - "loss": 5.3974, - "step": 19243 - }, - { - "epoch": 10.035984354628422, - "grad_norm": 1.3837239742279053, - "learning_rate": 8.11678391959799e-05, - "loss": 5.7725, - "step": 19244 - }, - { - "epoch": 10.036505867014341, - "grad_norm": 1.3740222454071045, - "learning_rate": 8.116683417085427e-05, - "loss": 5.2841, - "step": 19245 - }, - { - "epoch": 10.03702737940026, - "grad_norm": 1.4473029375076294, - "learning_rate": 8.116582914572864e-05, - "loss": 5.4854, - "step": 19246 - }, - { - "epoch": 10.03754889178618, - "grad_norm": 1.4862998723983765, - "learning_rate": 8.116482412060302e-05, - "loss": 5.6429, - "step": 19247 - }, - { - "epoch": 10.0380704041721, - "grad_norm": 1.5405683517456055, - "learning_rate": 8.11638190954774e-05, - "loss": 5.6094, - "step": 19248 - }, - { - "epoch": 10.038591916558019, - "grad_norm": 1.3177255392074585, - "learning_rate": 8.116281407035177e-05, - "loss": 5.5173, - "step": 19249 - }, - { - "epoch": 10.039113428943937, - "grad_norm": 1.5374847650527954, - "learning_rate": 8.116180904522614e-05, - "loss": 5.5432, - "step": 19250 - }, - { - "epoch": 10.039634941329856, - "grad_norm": 1.396622657775879, - "learning_rate": 8.116080402010051e-05, - "loss": 5.4305, - "step": 19251 - }, - { - "epoch": 10.040156453715776, - "grad_norm": 1.354366421699524, - "learning_rate": 8.115979899497488e-05, - "loss": 5.8078, - "step": 19252 - }, - { - "epoch": 10.040677966101695, - "grad_norm": 1.49080228805542, - "learning_rate": 8.115879396984924e-05, - "loss": 5.6135, - "step": 19253 - }, - { - "epoch": 10.041199478487615, - "grad_norm": 1.6180803775787354, - "learning_rate": 8.115778894472362e-05, - "loss": 5.2574, - "step": 19254 - }, - { - "epoch": 10.041720990873534, - "grad_norm": 1.431665301322937, - "learning_rate": 8.115678391959798e-05, - "loss": 5.5684, - "step": 19255 - }, - { - "epoch": 10.042242503259452, - "grad_norm": 1.4182755947113037, - "learning_rate": 8.115577889447236e-05, - "loss": 5.2074, - "step": 19256 - }, - { - "epoch": 10.042764015645371, - "grad_norm": 1.4100284576416016, - "learning_rate": 8.115477386934674e-05, - "loss": 5.4743, - "step": 19257 - }, - { - "epoch": 10.04328552803129, - "grad_norm": 1.3494096994400024, - "learning_rate": 8.115376884422112e-05, - "loss": 5.6684, - "step": 19258 - }, - { - "epoch": 10.04380704041721, - "grad_norm": 1.4632843732833862, - "learning_rate": 8.115276381909548e-05, - "loss": 5.6635, - "step": 19259 - }, - { - "epoch": 10.04432855280313, - "grad_norm": 1.3621827363967896, - "learning_rate": 8.115175879396986e-05, - "loss": 5.721, - "step": 19260 - }, - { - "epoch": 10.044850065189049, - "grad_norm": 1.4241751432418823, - "learning_rate": 8.115075376884422e-05, - "loss": 5.2756, - "step": 19261 - }, - { - "epoch": 10.045371577574967, - "grad_norm": 1.68697190284729, - "learning_rate": 8.11497487437186e-05, - "loss": 5.0448, - "step": 19262 - }, - { - "epoch": 10.045893089960886, - "grad_norm": 1.4379736185073853, - "learning_rate": 8.114874371859297e-05, - "loss": 5.7328, - "step": 19263 - }, - { - "epoch": 10.046414602346806, - "grad_norm": 1.338120698928833, - "learning_rate": 8.114773869346734e-05, - "loss": 5.0565, - "step": 19264 - }, - { - "epoch": 10.046936114732725, - "grad_norm": 1.503679633140564, - "learning_rate": 8.114673366834171e-05, - "loss": 5.6368, - "step": 19265 - }, - { - "epoch": 10.047457627118645, - "grad_norm": 1.379663348197937, - "learning_rate": 8.114572864321607e-05, - "loss": 5.5629, - "step": 19266 - }, - { - "epoch": 10.047979139504564, - "grad_norm": 1.4055668115615845, - "learning_rate": 8.114472361809045e-05, - "loss": 5.401, - "step": 19267 - }, - { - "epoch": 10.048500651890482, - "grad_norm": 1.4922829866409302, - "learning_rate": 8.114371859296483e-05, - "loss": 5.3688, - "step": 19268 - }, - { - "epoch": 10.049022164276401, - "grad_norm": 1.3867160081863403, - "learning_rate": 8.11427135678392e-05, - "loss": 5.8862, - "step": 19269 - }, - { - "epoch": 10.04954367666232, - "grad_norm": 2.515937089920044, - "learning_rate": 8.114170854271357e-05, - "loss": 4.9629, - "step": 19270 - }, - { - "epoch": 10.05006518904824, - "grad_norm": 1.5366891622543335, - "learning_rate": 8.114070351758795e-05, - "loss": 5.4601, - "step": 19271 - }, - { - "epoch": 10.05058670143416, - "grad_norm": 1.474718689918518, - "learning_rate": 8.113969849246231e-05, - "loss": 5.4887, - "step": 19272 - }, - { - "epoch": 10.051108213820077, - "grad_norm": 1.3567965030670166, - "learning_rate": 8.113869346733669e-05, - "loss": 5.8179, - "step": 19273 - }, - { - "epoch": 10.051629726205997, - "grad_norm": 1.3927943706512451, - "learning_rate": 8.113768844221105e-05, - "loss": 5.2114, - "step": 19274 - }, - { - "epoch": 10.052151238591916, - "grad_norm": 1.4238077402114868, - "learning_rate": 8.113668341708543e-05, - "loss": 5.1244, - "step": 19275 - }, - { - "epoch": 10.052672750977836, - "grad_norm": 1.471169352531433, - "learning_rate": 8.11356783919598e-05, - "loss": 5.4444, - "step": 19276 - }, - { - "epoch": 10.053194263363755, - "grad_norm": 1.5888460874557495, - "learning_rate": 8.113467336683417e-05, - "loss": 5.4136, - "step": 19277 - }, - { - "epoch": 10.053715775749675, - "grad_norm": 1.392857313156128, - "learning_rate": 8.113366834170855e-05, - "loss": 5.5142, - "step": 19278 - }, - { - "epoch": 10.054237288135592, - "grad_norm": 1.7594866752624512, - "learning_rate": 8.113266331658292e-05, - "loss": 5.1871, - "step": 19279 - }, - { - "epoch": 10.054758800521512, - "grad_norm": 1.4361591339111328, - "learning_rate": 8.11316582914573e-05, - "loss": 5.482, - "step": 19280 - }, - { - "epoch": 10.055280312907431, - "grad_norm": 1.4749622344970703, - "learning_rate": 8.113065326633166e-05, - "loss": 5.6214, - "step": 19281 - }, - { - "epoch": 10.05580182529335, - "grad_norm": 1.5904312133789062, - "learning_rate": 8.112964824120604e-05, - "loss": 5.2324, - "step": 19282 - }, - { - "epoch": 10.05632333767927, - "grad_norm": 1.427329659461975, - "learning_rate": 8.11286432160804e-05, - "loss": 5.6467, - "step": 19283 - }, - { - "epoch": 10.05684485006519, - "grad_norm": 1.5107073783874512, - "learning_rate": 8.112763819095478e-05, - "loss": 5.6049, - "step": 19284 - }, - { - "epoch": 10.057366362451107, - "grad_norm": 1.4855140447616577, - "learning_rate": 8.112663316582914e-05, - "loss": 5.5145, - "step": 19285 - }, - { - "epoch": 10.057887874837027, - "grad_norm": 1.4284321069717407, - "learning_rate": 8.112562814070352e-05, - "loss": 5.6905, - "step": 19286 - }, - { - "epoch": 10.058409387222946, - "grad_norm": 1.4676274061203003, - "learning_rate": 8.112462311557788e-05, - "loss": 5.6387, - "step": 19287 - }, - { - "epoch": 10.058930899608866, - "grad_norm": 1.5244499444961548, - "learning_rate": 8.112361809045226e-05, - "loss": 4.9867, - "step": 19288 - }, - { - "epoch": 10.059452411994785, - "grad_norm": 1.493977665901184, - "learning_rate": 8.112261306532664e-05, - "loss": 5.4306, - "step": 19289 - }, - { - "epoch": 10.059973924380705, - "grad_norm": 1.535022258758545, - "learning_rate": 8.112160804020102e-05, - "loss": 5.3021, - "step": 19290 - }, - { - "epoch": 10.060495436766622, - "grad_norm": 1.5175269842147827, - "learning_rate": 8.112060301507538e-05, - "loss": 5.4837, - "step": 19291 - }, - { - "epoch": 10.061016949152542, - "grad_norm": 1.3836230039596558, - "learning_rate": 8.111959798994975e-05, - "loss": 5.3813, - "step": 19292 - }, - { - "epoch": 10.061538461538461, - "grad_norm": 1.4451349973678589, - "learning_rate": 8.111859296482412e-05, - "loss": 5.7162, - "step": 19293 - }, - { - "epoch": 10.06205997392438, - "grad_norm": 1.4583630561828613, - "learning_rate": 8.111758793969849e-05, - "loss": 5.83, - "step": 19294 - }, - { - "epoch": 10.0625814863103, - "grad_norm": 1.5019235610961914, - "learning_rate": 8.111658291457287e-05, - "loss": 5.0436, - "step": 19295 - }, - { - "epoch": 10.06310299869622, - "grad_norm": 1.531646490097046, - "learning_rate": 8.111557788944723e-05, - "loss": 4.8945, - "step": 19296 - }, - { - "epoch": 10.063624511082137, - "grad_norm": 1.3866416215896606, - "learning_rate": 8.111457286432161e-05, - "loss": 4.9915, - "step": 19297 - }, - { - "epoch": 10.064146023468057, - "grad_norm": 1.4777435064315796, - "learning_rate": 8.111356783919599e-05, - "loss": 5.4067, - "step": 19298 - }, - { - "epoch": 10.064667535853976, - "grad_norm": 1.4132122993469238, - "learning_rate": 8.111256281407036e-05, - "loss": 5.4919, - "step": 19299 - }, - { - "epoch": 10.065189048239896, - "grad_norm": 1.4551937580108643, - "learning_rate": 8.111155778894473e-05, - "loss": 5.8075, - "step": 19300 - }, - { - "epoch": 10.065710560625815, - "grad_norm": 1.3863393068313599, - "learning_rate": 8.11105527638191e-05, - "loss": 5.5971, - "step": 19301 - }, - { - "epoch": 10.066232073011735, - "grad_norm": 1.4928009510040283, - "learning_rate": 8.110954773869347e-05, - "loss": 5.0422, - "step": 19302 - }, - { - "epoch": 10.066753585397652, - "grad_norm": 1.4350641965866089, - "learning_rate": 8.110854271356785e-05, - "loss": 5.8135, - "step": 19303 - }, - { - "epoch": 10.067275097783572, - "grad_norm": 1.7269724607467651, - "learning_rate": 8.110753768844221e-05, - "loss": 5.4302, - "step": 19304 - }, - { - "epoch": 10.067796610169491, - "grad_norm": 1.3695640563964844, - "learning_rate": 8.110653266331659e-05, - "loss": 5.6059, - "step": 19305 - }, - { - "epoch": 10.06831812255541, - "grad_norm": 1.4098560810089111, - "learning_rate": 8.110552763819096e-05, - "loss": 5.7357, - "step": 19306 - }, - { - "epoch": 10.06883963494133, - "grad_norm": 1.4084358215332031, - "learning_rate": 8.110452261306532e-05, - "loss": 5.7217, - "step": 19307 - }, - { - "epoch": 10.06936114732725, - "grad_norm": 1.4089970588684082, - "learning_rate": 8.11035175879397e-05, - "loss": 5.3665, - "step": 19308 - }, - { - "epoch": 10.069882659713167, - "grad_norm": 1.421798586845398, - "learning_rate": 8.110251256281408e-05, - "loss": 5.3983, - "step": 19309 - }, - { - "epoch": 10.070404172099087, - "grad_norm": 1.4100911617279053, - "learning_rate": 8.110150753768845e-05, - "loss": 5.4405, - "step": 19310 - }, - { - "epoch": 10.070925684485006, - "grad_norm": 1.4434186220169067, - "learning_rate": 8.110050251256282e-05, - "loss": 5.7355, - "step": 19311 - }, - { - "epoch": 10.071447196870926, - "grad_norm": 1.3049589395523071, - "learning_rate": 8.10994974874372e-05, - "loss": 5.4985, - "step": 19312 - }, - { - "epoch": 10.071968709256845, - "grad_norm": 1.4290828704833984, - "learning_rate": 8.109849246231156e-05, - "loss": 5.4818, - "step": 19313 - }, - { - "epoch": 10.072490221642765, - "grad_norm": 1.4059878587722778, - "learning_rate": 8.109748743718594e-05, - "loss": 5.6919, - "step": 19314 - }, - { - "epoch": 10.073011734028682, - "grad_norm": 1.525236964225769, - "learning_rate": 8.10964824120603e-05, - "loss": 5.7167, - "step": 19315 - }, - { - "epoch": 10.073533246414602, - "grad_norm": 1.4142699241638184, - "learning_rate": 8.109547738693468e-05, - "loss": 5.5478, - "step": 19316 - }, - { - "epoch": 10.074054758800521, - "grad_norm": 1.2994166612625122, - "learning_rate": 8.109447236180904e-05, - "loss": 5.8627, - "step": 19317 - }, - { - "epoch": 10.07457627118644, - "grad_norm": 1.4129185676574707, - "learning_rate": 8.109346733668342e-05, - "loss": 5.3876, - "step": 19318 - }, - { - "epoch": 10.07509778357236, - "grad_norm": 1.5683854818344116, - "learning_rate": 8.10924623115578e-05, - "loss": 5.1457, - "step": 19319 - }, - { - "epoch": 10.07561929595828, - "grad_norm": 1.378893494606018, - "learning_rate": 8.109145728643216e-05, - "loss": 5.6973, - "step": 19320 - }, - { - "epoch": 10.076140808344197, - "grad_norm": 1.4195994138717651, - "learning_rate": 8.109045226130654e-05, - "loss": 5.7066, - "step": 19321 - }, - { - "epoch": 10.076662320730117, - "grad_norm": 1.4260025024414062, - "learning_rate": 8.10894472361809e-05, - "loss": 5.6352, - "step": 19322 - }, - { - "epoch": 10.077183833116036, - "grad_norm": 1.4632985591888428, - "learning_rate": 8.108844221105528e-05, - "loss": 5.5168, - "step": 19323 - }, - { - "epoch": 10.077705345501956, - "grad_norm": 1.6575618982315063, - "learning_rate": 8.108743718592965e-05, - "loss": 5.1193, - "step": 19324 - }, - { - "epoch": 10.078226857887875, - "grad_norm": 1.5155746936798096, - "learning_rate": 8.108643216080403e-05, - "loss": 5.014, - "step": 19325 - }, - { - "epoch": 10.078748370273795, - "grad_norm": 1.4355226755142212, - "learning_rate": 8.108542713567839e-05, - "loss": 5.4741, - "step": 19326 - }, - { - "epoch": 10.079269882659712, - "grad_norm": 1.430546522140503, - "learning_rate": 8.108442211055277e-05, - "loss": 5.6278, - "step": 19327 - }, - { - "epoch": 10.079791395045632, - "grad_norm": 1.4982028007507324, - "learning_rate": 8.108341708542713e-05, - "loss": 5.4793, - "step": 19328 - }, - { - "epoch": 10.080312907431551, - "grad_norm": 1.305800437927246, - "learning_rate": 8.108241206030151e-05, - "loss": 5.6507, - "step": 19329 - }, - { - "epoch": 10.08083441981747, - "grad_norm": 1.5027934312820435, - "learning_rate": 8.108140703517589e-05, - "loss": 5.4888, - "step": 19330 - }, - { - "epoch": 10.08135593220339, - "grad_norm": 1.4169176816940308, - "learning_rate": 8.108040201005027e-05, - "loss": 5.8532, - "step": 19331 - }, - { - "epoch": 10.08187744458931, - "grad_norm": 1.4002432823181152, - "learning_rate": 8.107939698492463e-05, - "loss": 5.2923, - "step": 19332 - }, - { - "epoch": 10.082398956975227, - "grad_norm": 1.5200055837631226, - "learning_rate": 8.1078391959799e-05, - "loss": 5.4707, - "step": 19333 - }, - { - "epoch": 10.082920469361147, - "grad_norm": 1.373326301574707, - "learning_rate": 8.107738693467337e-05, - "loss": 5.6683, - "step": 19334 - }, - { - "epoch": 10.083441981747066, - "grad_norm": 1.4947618246078491, - "learning_rate": 8.107638190954774e-05, - "loss": 5.5216, - "step": 19335 - }, - { - "epoch": 10.083963494132986, - "grad_norm": 1.5954937934875488, - "learning_rate": 8.107537688442211e-05, - "loss": 5.4502, - "step": 19336 - }, - { - "epoch": 10.084485006518905, - "grad_norm": 1.3913668394088745, - "learning_rate": 8.107437185929648e-05, - "loss": 5.6349, - "step": 19337 - }, - { - "epoch": 10.085006518904825, - "grad_norm": 1.3716650009155273, - "learning_rate": 8.107336683417086e-05, - "loss": 5.5101, - "step": 19338 - }, - { - "epoch": 10.085528031290742, - "grad_norm": 1.420180320739746, - "learning_rate": 8.107236180904523e-05, - "loss": 5.2732, - "step": 19339 - }, - { - "epoch": 10.086049543676662, - "grad_norm": 1.3234541416168213, - "learning_rate": 8.107135678391961e-05, - "loss": 5.8117, - "step": 19340 - }, - { - "epoch": 10.086571056062581, - "grad_norm": 1.9023507833480835, - "learning_rate": 8.107035175879398e-05, - "loss": 5.3586, - "step": 19341 - }, - { - "epoch": 10.0870925684485, - "grad_norm": 1.6827796697616577, - "learning_rate": 8.106934673366835e-05, - "loss": 5.2181, - "step": 19342 - }, - { - "epoch": 10.08761408083442, - "grad_norm": 1.3675897121429443, - "learning_rate": 8.106834170854272e-05, - "loss": 5.5892, - "step": 19343 - }, - { - "epoch": 10.08813559322034, - "grad_norm": 1.315682053565979, - "learning_rate": 8.10673366834171e-05, - "loss": 4.6557, - "step": 19344 - }, - { - "epoch": 10.088657105606258, - "grad_norm": 1.3968167304992676, - "learning_rate": 8.106633165829146e-05, - "loss": 5.5062, - "step": 19345 - }, - { - "epoch": 10.089178617992177, - "grad_norm": 1.4380054473876953, - "learning_rate": 8.106532663316582e-05, - "loss": 5.3305, - "step": 19346 - }, - { - "epoch": 10.089700130378096, - "grad_norm": 1.3944514989852905, - "learning_rate": 8.10643216080402e-05, - "loss": 5.6389, - "step": 19347 - }, - { - "epoch": 10.090221642764016, - "grad_norm": 1.4481744766235352, - "learning_rate": 8.106331658291457e-05, - "loss": 5.0974, - "step": 19348 - }, - { - "epoch": 10.090743155149935, - "grad_norm": 1.4218538999557495, - "learning_rate": 8.106231155778894e-05, - "loss": 5.3883, - "step": 19349 - }, - { - "epoch": 10.091264667535855, - "grad_norm": 1.4861276149749756, - "learning_rate": 8.106130653266332e-05, - "loss": 5.6051, - "step": 19350 - }, - { - "epoch": 10.091786179921773, - "grad_norm": 1.5284123420715332, - "learning_rate": 8.10603015075377e-05, - "loss": 5.2251, - "step": 19351 - }, - { - "epoch": 10.092307692307692, - "grad_norm": 1.428809404373169, - "learning_rate": 8.105929648241206e-05, - "loss": 5.4539, - "step": 19352 - }, - { - "epoch": 10.092829204693611, - "grad_norm": 1.4252064228057861, - "learning_rate": 8.105829145728644e-05, - "loss": 4.7584, - "step": 19353 - }, - { - "epoch": 10.093350717079531, - "grad_norm": 1.4737119674682617, - "learning_rate": 8.10572864321608e-05, - "loss": 5.9467, - "step": 19354 - }, - { - "epoch": 10.09387222946545, - "grad_norm": 1.4642868041992188, - "learning_rate": 8.105628140703518e-05, - "loss": 4.7951, - "step": 19355 - }, - { - "epoch": 10.09439374185137, - "grad_norm": 1.4414517879486084, - "learning_rate": 8.105527638190955e-05, - "loss": 5.6432, - "step": 19356 - }, - { - "epoch": 10.094915254237288, - "grad_norm": 1.6741973161697388, - "learning_rate": 8.105427135678393e-05, - "loss": 4.8376, - "step": 19357 - }, - { - "epoch": 10.095436766623207, - "grad_norm": 1.4339593648910522, - "learning_rate": 8.105326633165829e-05, - "loss": 5.7529, - "step": 19358 - }, - { - "epoch": 10.095958279009126, - "grad_norm": 1.4616647958755493, - "learning_rate": 8.105226130653267e-05, - "loss": 5.6174, - "step": 19359 - }, - { - "epoch": 10.096479791395046, - "grad_norm": 1.4643762111663818, - "learning_rate": 8.105125628140705e-05, - "loss": 5.4295, - "step": 19360 - }, - { - "epoch": 10.097001303780965, - "grad_norm": 1.4800091981887817, - "learning_rate": 8.105025125628141e-05, - "loss": 5.4135, - "step": 19361 - }, - { - "epoch": 10.097522816166883, - "grad_norm": 1.598447322845459, - "learning_rate": 8.104924623115579e-05, - "loss": 4.9903, - "step": 19362 - }, - { - "epoch": 10.098044328552803, - "grad_norm": 1.6117134094238281, - "learning_rate": 8.104824120603015e-05, - "loss": 5.7504, - "step": 19363 - }, - { - "epoch": 10.098565840938722, - "grad_norm": 1.470806360244751, - "learning_rate": 8.104723618090453e-05, - "loss": 6.0269, - "step": 19364 - }, - { - "epoch": 10.099087353324641, - "grad_norm": 1.3830100297927856, - "learning_rate": 8.10462311557789e-05, - "loss": 5.5934, - "step": 19365 - }, - { - "epoch": 10.099608865710561, - "grad_norm": 1.4174232482910156, - "learning_rate": 8.104522613065327e-05, - "loss": 5.7619, - "step": 19366 - }, - { - "epoch": 10.10013037809648, - "grad_norm": 1.622283697128296, - "learning_rate": 8.104422110552764e-05, - "loss": 5.3888, - "step": 19367 - }, - { - "epoch": 10.100651890482398, - "grad_norm": 1.6269261837005615, - "learning_rate": 8.104321608040201e-05, - "loss": 5.6361, - "step": 19368 - }, - { - "epoch": 10.101173402868318, - "grad_norm": 1.5204291343688965, - "learning_rate": 8.104221105527638e-05, - "loss": 5.4566, - "step": 19369 - }, - { - "epoch": 10.101694915254237, - "grad_norm": 1.4245600700378418, - "learning_rate": 8.104120603015076e-05, - "loss": 5.4761, - "step": 19370 - }, - { - "epoch": 10.102216427640156, - "grad_norm": 1.4549909830093384, - "learning_rate": 8.104020100502513e-05, - "loss": 5.2651, - "step": 19371 - }, - { - "epoch": 10.102737940026076, - "grad_norm": 1.4305329322814941, - "learning_rate": 8.10391959798995e-05, - "loss": 5.5125, - "step": 19372 - }, - { - "epoch": 10.103259452411995, - "grad_norm": 1.3953747749328613, - "learning_rate": 8.103819095477388e-05, - "loss": 5.8032, - "step": 19373 - }, - { - "epoch": 10.103780964797913, - "grad_norm": 1.3545929193496704, - "learning_rate": 8.103718592964824e-05, - "loss": 5.8731, - "step": 19374 - }, - { - "epoch": 10.104302477183833, - "grad_norm": 1.460200548171997, - "learning_rate": 8.103618090452262e-05, - "loss": 4.9004, - "step": 19375 - }, - { - "epoch": 10.104823989569752, - "grad_norm": 1.399328589439392, - "learning_rate": 8.103517587939698e-05, - "loss": 5.4886, - "step": 19376 - }, - { - "epoch": 10.105345501955671, - "grad_norm": 1.5347579717636108, - "learning_rate": 8.103417085427136e-05, - "loss": 5.4536, - "step": 19377 - }, - { - "epoch": 10.105867014341591, - "grad_norm": 1.4564110040664673, - "learning_rate": 8.103316582914573e-05, - "loss": 5.2865, - "step": 19378 - }, - { - "epoch": 10.10638852672751, - "grad_norm": 1.391337275505066, - "learning_rate": 8.10321608040201e-05, - "loss": 5.0897, - "step": 19379 - }, - { - "epoch": 10.106910039113428, - "grad_norm": 1.4346710443496704, - "learning_rate": 8.103115577889448e-05, - "loss": 5.3711, - "step": 19380 - }, - { - "epoch": 10.107431551499348, - "grad_norm": 1.4485634565353394, - "learning_rate": 8.103015075376886e-05, - "loss": 5.6235, - "step": 19381 - }, - { - "epoch": 10.107953063885267, - "grad_norm": 1.4469274282455444, - "learning_rate": 8.102914572864322e-05, - "loss": 5.5899, - "step": 19382 - }, - { - "epoch": 10.108474576271187, - "grad_norm": 1.2840619087219238, - "learning_rate": 8.10281407035176e-05, - "loss": 5.5122, - "step": 19383 - }, - { - "epoch": 10.108996088657106, - "grad_norm": 1.2605955600738525, - "learning_rate": 8.102713567839197e-05, - "loss": 5.6477, - "step": 19384 - }, - { - "epoch": 10.109517601043025, - "grad_norm": 1.327576756477356, - "learning_rate": 8.102613065326633e-05, - "loss": 5.4141, - "step": 19385 - }, - { - "epoch": 10.110039113428943, - "grad_norm": 1.3796167373657227, - "learning_rate": 8.102512562814071e-05, - "loss": 5.9073, - "step": 19386 - }, - { - "epoch": 10.110560625814863, - "grad_norm": 1.468694806098938, - "learning_rate": 8.102412060301507e-05, - "loss": 5.129, - "step": 19387 - }, - { - "epoch": 10.111082138200782, - "grad_norm": 1.4745725393295288, - "learning_rate": 8.102311557788945e-05, - "loss": 5.6439, - "step": 19388 - }, - { - "epoch": 10.111603650586702, - "grad_norm": 1.5124186277389526, - "learning_rate": 8.102211055276381e-05, - "loss": 5.0527, - "step": 19389 - }, - { - "epoch": 10.112125162972621, - "grad_norm": 1.5504379272460938, - "learning_rate": 8.102110552763819e-05, - "loss": 5.3652, - "step": 19390 - }, - { - "epoch": 10.11264667535854, - "grad_norm": 1.3777881860733032, - "learning_rate": 8.102010050251257e-05, - "loss": 5.3954, - "step": 19391 - }, - { - "epoch": 10.113168187744458, - "grad_norm": 1.3130061626434326, - "learning_rate": 8.101909547738695e-05, - "loss": 5.512, - "step": 19392 - }, - { - "epoch": 10.113689700130378, - "grad_norm": 1.4320799112319946, - "learning_rate": 8.101809045226131e-05, - "loss": 5.425, - "step": 19393 - }, - { - "epoch": 10.114211212516297, - "grad_norm": 1.5394262075424194, - "learning_rate": 8.101708542713569e-05, - "loss": 5.1248, - "step": 19394 - }, - { - "epoch": 10.114732724902217, - "grad_norm": 1.4459471702575684, - "learning_rate": 8.101608040201005e-05, - "loss": 5.4195, - "step": 19395 - }, - { - "epoch": 10.115254237288136, - "grad_norm": 1.4085073471069336, - "learning_rate": 8.101507537688443e-05, - "loss": 5.5481, - "step": 19396 - }, - { - "epoch": 10.115775749674055, - "grad_norm": 1.4302815198898315, - "learning_rate": 8.10140703517588e-05, - "loss": 5.7015, - "step": 19397 - }, - { - "epoch": 10.116297262059973, - "grad_norm": 1.343001365661621, - "learning_rate": 8.101306532663317e-05, - "loss": 5.6206, - "step": 19398 - }, - { - "epoch": 10.116818774445893, - "grad_norm": 1.4386265277862549, - "learning_rate": 8.101206030150754e-05, - "loss": 5.0602, - "step": 19399 - }, - { - "epoch": 10.117340286831812, - "grad_norm": 1.368187665939331, - "learning_rate": 8.10110552763819e-05, - "loss": 5.7946, - "step": 19400 - }, - { - "epoch": 10.117861799217732, - "grad_norm": 1.4549378156661987, - "learning_rate": 8.101005025125628e-05, - "loss": 5.6113, - "step": 19401 - }, - { - "epoch": 10.118383311603651, - "grad_norm": 1.3259813785552979, - "learning_rate": 8.100904522613066e-05, - "loss": 5.206, - "step": 19402 - }, - { - "epoch": 10.11890482398957, - "grad_norm": 1.48300039768219, - "learning_rate": 8.100804020100504e-05, - "loss": 5.4663, - "step": 19403 - }, - { - "epoch": 10.119426336375488, - "grad_norm": 1.3611689805984497, - "learning_rate": 8.10070351758794e-05, - "loss": 5.3548, - "step": 19404 - }, - { - "epoch": 10.119947848761408, - "grad_norm": 1.3667930364608765, - "learning_rate": 8.100603015075378e-05, - "loss": 5.7014, - "step": 19405 - }, - { - "epoch": 10.120469361147327, - "grad_norm": 1.438826560974121, - "learning_rate": 8.100502512562814e-05, - "loss": 5.3092, - "step": 19406 - }, - { - "epoch": 10.120990873533247, - "grad_norm": 1.365665078163147, - "learning_rate": 8.100402010050252e-05, - "loss": 5.135, - "step": 19407 - }, - { - "epoch": 10.121512385919166, - "grad_norm": 1.95602548122406, - "learning_rate": 8.100301507537688e-05, - "loss": 4.5689, - "step": 19408 - }, - { - "epoch": 10.122033898305085, - "grad_norm": 1.7213242053985596, - "learning_rate": 8.100201005025126e-05, - "loss": 5.4611, - "step": 19409 - }, - { - "epoch": 10.122555410691003, - "grad_norm": 1.490323543548584, - "learning_rate": 8.100100502512563e-05, - "loss": 5.6172, - "step": 19410 - }, - { - "epoch": 10.123076923076923, - "grad_norm": 1.3966107368469238, - "learning_rate": 8.1e-05, - "loss": 5.7484, - "step": 19411 - }, - { - "epoch": 10.123598435462842, - "grad_norm": 1.4256106615066528, - "learning_rate": 8.099899497487438e-05, - "loss": 5.3941, - "step": 19412 - }, - { - "epoch": 10.124119947848762, - "grad_norm": 1.477371096611023, - "learning_rate": 8.099798994974875e-05, - "loss": 5.326, - "step": 19413 - }, - { - "epoch": 10.124641460234681, - "grad_norm": 1.534819483757019, - "learning_rate": 8.099698492462312e-05, - "loss": 5.324, - "step": 19414 - }, - { - "epoch": 10.1251629726206, - "grad_norm": 1.40630042552948, - "learning_rate": 8.099597989949749e-05, - "loss": 5.3949, - "step": 19415 - }, - { - "epoch": 10.125684485006518, - "grad_norm": 1.6721776723861694, - "learning_rate": 8.099497487437187e-05, - "loss": 5.3887, - "step": 19416 - }, - { - "epoch": 10.126205997392438, - "grad_norm": 1.4102164506912231, - "learning_rate": 8.099396984924623e-05, - "loss": 5.5115, - "step": 19417 - }, - { - "epoch": 10.126727509778357, - "grad_norm": 1.416919469833374, - "learning_rate": 8.099296482412061e-05, - "loss": 5.7177, - "step": 19418 - }, - { - "epoch": 10.127249022164277, - "grad_norm": 1.4293354749679565, - "learning_rate": 8.099195979899497e-05, - "loss": 5.6671, - "step": 19419 - }, - { - "epoch": 10.127770534550196, - "grad_norm": 1.5438790321350098, - "learning_rate": 8.099095477386935e-05, - "loss": 5.5132, - "step": 19420 - }, - { - "epoch": 10.128292046936116, - "grad_norm": 1.5145270824432373, - "learning_rate": 8.098994974874371e-05, - "loss": 5.7938, - "step": 19421 - }, - { - "epoch": 10.128813559322033, - "grad_norm": 1.338782787322998, - "learning_rate": 8.098894472361809e-05, - "loss": 5.1055, - "step": 19422 - }, - { - "epoch": 10.129335071707953, - "grad_norm": 1.3905164003372192, - "learning_rate": 8.098793969849247e-05, - "loss": 5.2523, - "step": 19423 - }, - { - "epoch": 10.129856584093872, - "grad_norm": 1.5859357118606567, - "learning_rate": 8.098693467336685e-05, - "loss": 4.6029, - "step": 19424 - }, - { - "epoch": 10.130378096479792, - "grad_norm": 1.3598369359970093, - "learning_rate": 8.098592964824121e-05, - "loss": 5.5411, - "step": 19425 - }, - { - "epoch": 10.130899608865711, - "grad_norm": 1.4330435991287231, - "learning_rate": 8.098492462311558e-05, - "loss": 5.594, - "step": 19426 - }, - { - "epoch": 10.13142112125163, - "grad_norm": 1.339848518371582, - "learning_rate": 8.098391959798995e-05, - "loss": 5.3849, - "step": 19427 - }, - { - "epoch": 10.131942633637548, - "grad_norm": 1.4098496437072754, - "learning_rate": 8.098291457286432e-05, - "loss": 5.018, - "step": 19428 - }, - { - "epoch": 10.132464146023468, - "grad_norm": 1.4616820812225342, - "learning_rate": 8.09819095477387e-05, - "loss": 5.2786, - "step": 19429 - }, - { - "epoch": 10.132985658409387, - "grad_norm": 1.5147500038146973, - "learning_rate": 8.098090452261306e-05, - "loss": 5.4018, - "step": 19430 - }, - { - "epoch": 10.133507170795307, - "grad_norm": 1.4410611391067505, - "learning_rate": 8.097989949748744e-05, - "loss": 5.2911, - "step": 19431 - }, - { - "epoch": 10.134028683181226, - "grad_norm": 1.4800357818603516, - "learning_rate": 8.097889447236182e-05, - "loss": 5.5125, - "step": 19432 - }, - { - "epoch": 10.134550195567146, - "grad_norm": 1.4444271326065063, - "learning_rate": 8.09778894472362e-05, - "loss": 5.2846, - "step": 19433 - }, - { - "epoch": 10.135071707953063, - "grad_norm": 1.4188146591186523, - "learning_rate": 8.097688442211056e-05, - "loss": 5.4669, - "step": 19434 - }, - { - "epoch": 10.135593220338983, - "grad_norm": 1.3974123001098633, - "learning_rate": 8.097587939698494e-05, - "loss": 5.0747, - "step": 19435 - }, - { - "epoch": 10.136114732724902, - "grad_norm": 1.412791132926941, - "learning_rate": 8.09748743718593e-05, - "loss": 5.1656, - "step": 19436 - }, - { - "epoch": 10.136636245110822, - "grad_norm": 1.3574947118759155, - "learning_rate": 8.097386934673368e-05, - "loss": 5.7706, - "step": 19437 - }, - { - "epoch": 10.137157757496741, - "grad_norm": 1.3819210529327393, - "learning_rate": 8.097286432160804e-05, - "loss": 5.5968, - "step": 19438 - }, - { - "epoch": 10.13767926988266, - "grad_norm": 1.4285236597061157, - "learning_rate": 8.09718592964824e-05, - "loss": 5.5119, - "step": 19439 - }, - { - "epoch": 10.138200782268578, - "grad_norm": 1.436313509941101, - "learning_rate": 8.097085427135678e-05, - "loss": 5.4203, - "step": 19440 - }, - { - "epoch": 10.138722294654498, - "grad_norm": 1.4157030582427979, - "learning_rate": 8.096984924623115e-05, - "loss": 5.5211, - "step": 19441 - }, - { - "epoch": 10.139243807040417, - "grad_norm": 1.4061200618743896, - "learning_rate": 8.096884422110553e-05, - "loss": 5.4446, - "step": 19442 - }, - { - "epoch": 10.139765319426337, - "grad_norm": 1.3598960638046265, - "learning_rate": 8.09678391959799e-05, - "loss": 4.9979, - "step": 19443 - }, - { - "epoch": 10.140286831812256, - "grad_norm": 1.563914179801941, - "learning_rate": 8.096683417085428e-05, - "loss": 5.3924, - "step": 19444 - }, - { - "epoch": 10.140808344198176, - "grad_norm": 1.3593469858169556, - "learning_rate": 8.096582914572865e-05, - "loss": 5.6077, - "step": 19445 - }, - { - "epoch": 10.141329856584093, - "grad_norm": 1.568639874458313, - "learning_rate": 8.096482412060302e-05, - "loss": 5.43, - "step": 19446 - }, - { - "epoch": 10.141851368970013, - "grad_norm": 1.5309666395187378, - "learning_rate": 8.096381909547739e-05, - "loss": 5.4006, - "step": 19447 - }, - { - "epoch": 10.142372881355932, - "grad_norm": 1.432808756828308, - "learning_rate": 8.096281407035177e-05, - "loss": 5.5184, - "step": 19448 - }, - { - "epoch": 10.142894393741852, - "grad_norm": 1.391912579536438, - "learning_rate": 8.096180904522613e-05, - "loss": 5.7298, - "step": 19449 - }, - { - "epoch": 10.143415906127771, - "grad_norm": 1.4470077753067017, - "learning_rate": 8.096080402010051e-05, - "loss": 5.5497, - "step": 19450 - }, - { - "epoch": 10.14393741851369, - "grad_norm": 1.379650354385376, - "learning_rate": 8.095979899497487e-05, - "loss": 5.4782, - "step": 19451 - }, - { - "epoch": 10.144458930899608, - "grad_norm": 1.406801462173462, - "learning_rate": 8.095879396984925e-05, - "loss": 5.4487, - "step": 19452 - }, - { - "epoch": 10.144980443285528, - "grad_norm": 1.4356049299240112, - "learning_rate": 8.095778894472363e-05, - "loss": 5.4606, - "step": 19453 - }, - { - "epoch": 10.145501955671447, - "grad_norm": 1.5053104162216187, - "learning_rate": 8.095678391959799e-05, - "loss": 4.8403, - "step": 19454 - }, - { - "epoch": 10.146023468057367, - "grad_norm": 1.5434212684631348, - "learning_rate": 8.095577889447237e-05, - "loss": 5.6106, - "step": 19455 - }, - { - "epoch": 10.146544980443286, - "grad_norm": 1.4464401006698608, - "learning_rate": 8.095477386934673e-05, - "loss": 5.6273, - "step": 19456 - }, - { - "epoch": 10.147066492829204, - "grad_norm": 1.5642483234405518, - "learning_rate": 8.095376884422111e-05, - "loss": 5.4732, - "step": 19457 - }, - { - "epoch": 10.147588005215123, - "grad_norm": 1.44374680519104, - "learning_rate": 8.095276381909548e-05, - "loss": 5.4537, - "step": 19458 - }, - { - "epoch": 10.148109517601043, - "grad_norm": 1.4491840600967407, - "learning_rate": 8.095175879396985e-05, - "loss": 5.5975, - "step": 19459 - }, - { - "epoch": 10.148631029986962, - "grad_norm": 1.3132178783416748, - "learning_rate": 8.095075376884422e-05, - "loss": 5.9017, - "step": 19460 - }, - { - "epoch": 10.149152542372882, - "grad_norm": 1.4303269386291504, - "learning_rate": 8.09497487437186e-05, - "loss": 5.6484, - "step": 19461 - }, - { - "epoch": 10.149674054758801, - "grad_norm": 1.4474009275436401, - "learning_rate": 8.094874371859296e-05, - "loss": 5.2511, - "step": 19462 - }, - { - "epoch": 10.150195567144719, - "grad_norm": 1.4012030363082886, - "learning_rate": 8.094773869346734e-05, - "loss": 5.486, - "step": 19463 - }, - { - "epoch": 10.150717079530638, - "grad_norm": 1.488549828529358, - "learning_rate": 8.094673366834172e-05, - "loss": 5.5351, - "step": 19464 - }, - { - "epoch": 10.151238591916558, - "grad_norm": 1.4877772331237793, - "learning_rate": 8.094572864321608e-05, - "loss": 5.0137, - "step": 19465 - }, - { - "epoch": 10.151760104302477, - "grad_norm": 1.348861575126648, - "learning_rate": 8.094472361809046e-05, - "loss": 5.4438, - "step": 19466 - }, - { - "epoch": 10.152281616688397, - "grad_norm": 1.3827354907989502, - "learning_rate": 8.094371859296482e-05, - "loss": 5.6863, - "step": 19467 - }, - { - "epoch": 10.152803129074316, - "grad_norm": 1.4139838218688965, - "learning_rate": 8.09427135678392e-05, - "loss": 4.9972, - "step": 19468 - }, - { - "epoch": 10.153324641460234, - "grad_norm": 1.3846946954727173, - "learning_rate": 8.094170854271357e-05, - "loss": 5.5418, - "step": 19469 - }, - { - "epoch": 10.153846153846153, - "grad_norm": 1.3526767492294312, - "learning_rate": 8.094070351758794e-05, - "loss": 5.8296, - "step": 19470 - }, - { - "epoch": 10.154367666232073, - "grad_norm": 1.424808382987976, - "learning_rate": 8.093969849246231e-05, - "loss": 4.9865, - "step": 19471 - }, - { - "epoch": 10.154889178617992, - "grad_norm": 1.451377034187317, - "learning_rate": 8.093869346733669e-05, - "loss": 5.3889, - "step": 19472 - }, - { - "epoch": 10.155410691003912, - "grad_norm": 1.4603956937789917, - "learning_rate": 8.093768844221106e-05, - "loss": 5.0889, - "step": 19473 - }, - { - "epoch": 10.155932203389831, - "grad_norm": 1.4246495962142944, - "learning_rate": 8.093668341708544e-05, - "loss": 5.5314, - "step": 19474 - }, - { - "epoch": 10.156453715775749, - "grad_norm": 1.5225346088409424, - "learning_rate": 8.09356783919598e-05, - "loss": 5.4891, - "step": 19475 - }, - { - "epoch": 10.156975228161668, - "grad_norm": 1.4308207035064697, - "learning_rate": 8.093467336683418e-05, - "loss": 5.6255, - "step": 19476 - }, - { - "epoch": 10.157496740547588, - "grad_norm": 1.4333186149597168, - "learning_rate": 8.093366834170855e-05, - "loss": 5.5403, - "step": 19477 - }, - { - "epoch": 10.158018252933507, - "grad_norm": 1.4095463752746582, - "learning_rate": 8.093266331658293e-05, - "loss": 5.7589, - "step": 19478 - }, - { - "epoch": 10.158539765319427, - "grad_norm": 1.499704122543335, - "learning_rate": 8.093165829145729e-05, - "loss": 5.198, - "step": 19479 - }, - { - "epoch": 10.159061277705346, - "grad_norm": 1.3963059186935425, - "learning_rate": 8.093065326633165e-05, - "loss": 4.9333, - "step": 19480 - }, - { - "epoch": 10.159582790091264, - "grad_norm": 1.4165339469909668, - "learning_rate": 8.092964824120603e-05, - "loss": 5.8574, - "step": 19481 - }, - { - "epoch": 10.160104302477183, - "grad_norm": 1.505659818649292, - "learning_rate": 8.09286432160804e-05, - "loss": 5.2383, - "step": 19482 - }, - { - "epoch": 10.160625814863103, - "grad_norm": 1.4203904867172241, - "learning_rate": 8.092763819095477e-05, - "loss": 5.4004, - "step": 19483 - }, - { - "epoch": 10.161147327249022, - "grad_norm": 1.4006047248840332, - "learning_rate": 8.092663316582915e-05, - "loss": 5.3541, - "step": 19484 - }, - { - "epoch": 10.161668839634942, - "grad_norm": 1.5044543743133545, - "learning_rate": 8.092562814070353e-05, - "loss": 5.5395, - "step": 19485 - }, - { - "epoch": 10.162190352020861, - "grad_norm": 1.5390909910202026, - "learning_rate": 8.09246231155779e-05, - "loss": 5.3361, - "step": 19486 - }, - { - "epoch": 10.162711864406779, - "grad_norm": 1.608689546585083, - "learning_rate": 8.092361809045227e-05, - "loss": 5.282, - "step": 19487 - }, - { - "epoch": 10.163233376792698, - "grad_norm": 1.4848641157150269, - "learning_rate": 8.092261306532664e-05, - "loss": 5.425, - "step": 19488 - }, - { - "epoch": 10.163754889178618, - "grad_norm": 1.4469681978225708, - "learning_rate": 8.092160804020101e-05, - "loss": 5.5673, - "step": 19489 - }, - { - "epoch": 10.164276401564537, - "grad_norm": 1.508902668952942, - "learning_rate": 8.092060301507538e-05, - "loss": 5.3573, - "step": 19490 - }, - { - "epoch": 10.164797913950457, - "grad_norm": 1.4977362155914307, - "learning_rate": 8.091959798994976e-05, - "loss": 5.1354, - "step": 19491 - }, - { - "epoch": 10.165319426336376, - "grad_norm": 1.5958330631256104, - "learning_rate": 8.091859296482412e-05, - "loss": 5.0081, - "step": 19492 - }, - { - "epoch": 10.165840938722294, - "grad_norm": 1.3918375968933105, - "learning_rate": 8.09175879396985e-05, - "loss": 5.3736, - "step": 19493 - }, - { - "epoch": 10.166362451108213, - "grad_norm": 1.5944808721542358, - "learning_rate": 8.091658291457288e-05, - "loss": 5.3807, - "step": 19494 - }, - { - "epoch": 10.166883963494133, - "grad_norm": 1.4084001779556274, - "learning_rate": 8.091557788944724e-05, - "loss": 5.3604, - "step": 19495 - }, - { - "epoch": 10.167405475880052, - "grad_norm": 1.475034236907959, - "learning_rate": 8.091457286432162e-05, - "loss": 5.0697, - "step": 19496 - }, - { - "epoch": 10.167926988265972, - "grad_norm": 1.4723076820373535, - "learning_rate": 8.091356783919598e-05, - "loss": 5.5588, - "step": 19497 - }, - { - "epoch": 10.168448500651891, - "grad_norm": 1.3276011943817139, - "learning_rate": 8.091256281407036e-05, - "loss": 4.8597, - "step": 19498 - }, - { - "epoch": 10.168970013037809, - "grad_norm": 1.5811959505081177, - "learning_rate": 8.091155778894472e-05, - "loss": 5.5177, - "step": 19499 - }, - { - "epoch": 10.169491525423728, - "grad_norm": 1.60812509059906, - "learning_rate": 8.09105527638191e-05, - "loss": 5.284, - "step": 19500 - }, - { - "epoch": 10.170013037809648, - "grad_norm": 1.5519214868545532, - "learning_rate": 8.090954773869347e-05, - "loss": 5.6353, - "step": 19501 - }, - { - "epoch": 10.170534550195567, - "grad_norm": 1.5051097869873047, - "learning_rate": 8.090854271356784e-05, - "loss": 5.5308, - "step": 19502 - }, - { - "epoch": 10.171056062581487, - "grad_norm": 1.5351275205612183, - "learning_rate": 8.090753768844221e-05, - "loss": 5.0022, - "step": 19503 - }, - { - "epoch": 10.171577574967406, - "grad_norm": 1.3498361110687256, - "learning_rate": 8.090653266331659e-05, - "loss": 5.7266, - "step": 19504 - }, - { - "epoch": 10.172099087353324, - "grad_norm": 1.5015009641647339, - "learning_rate": 8.090552763819096e-05, - "loss": 5.3658, - "step": 19505 - }, - { - "epoch": 10.172620599739243, - "grad_norm": 1.4775804281234741, - "learning_rate": 8.090452261306533e-05, - "loss": 5.3991, - "step": 19506 - }, - { - "epoch": 10.173142112125163, - "grad_norm": 1.4987527132034302, - "learning_rate": 8.09035175879397e-05, - "loss": 5.1844, - "step": 19507 - }, - { - "epoch": 10.173663624511082, - "grad_norm": 1.495638132095337, - "learning_rate": 8.090251256281407e-05, - "loss": 5.3837, - "step": 19508 - }, - { - "epoch": 10.174185136897002, - "grad_norm": 1.4567981958389282, - "learning_rate": 8.090150753768845e-05, - "loss": 5.7706, - "step": 19509 - }, - { - "epoch": 10.174706649282921, - "grad_norm": 1.5451104640960693, - "learning_rate": 8.090050251256281e-05, - "loss": 5.198, - "step": 19510 - }, - { - "epoch": 10.175228161668839, - "grad_norm": 1.3409355878829956, - "learning_rate": 8.089949748743719e-05, - "loss": 5.4722, - "step": 19511 - }, - { - "epoch": 10.175749674054758, - "grad_norm": 1.4394769668579102, - "learning_rate": 8.089849246231155e-05, - "loss": 5.4397, - "step": 19512 - }, - { - "epoch": 10.176271186440678, - "grad_norm": 1.394413948059082, - "learning_rate": 8.089748743718593e-05, - "loss": 5.6753, - "step": 19513 - }, - { - "epoch": 10.176792698826597, - "grad_norm": 1.8432376384735107, - "learning_rate": 8.089648241206031e-05, - "loss": 5.5957, - "step": 19514 - }, - { - "epoch": 10.177314211212517, - "grad_norm": 1.456478238105774, - "learning_rate": 8.089547738693469e-05, - "loss": 5.6535, - "step": 19515 - }, - { - "epoch": 10.177835723598436, - "grad_norm": 1.4209572076797485, - "learning_rate": 8.089447236180905e-05, - "loss": 5.8665, - "step": 19516 - }, - { - "epoch": 10.178357235984354, - "grad_norm": 1.4971332550048828, - "learning_rate": 8.089346733668343e-05, - "loss": 4.9922, - "step": 19517 - }, - { - "epoch": 10.178878748370273, - "grad_norm": 1.5081357955932617, - "learning_rate": 8.08924623115578e-05, - "loss": 5.5818, - "step": 19518 - }, - { - "epoch": 10.179400260756193, - "grad_norm": 1.4449812173843384, - "learning_rate": 8.089145728643216e-05, - "loss": 5.5052, - "step": 19519 - }, - { - "epoch": 10.179921773142112, - "grad_norm": 1.45856773853302, - "learning_rate": 8.089045226130654e-05, - "loss": 5.8847, - "step": 19520 - }, - { - "epoch": 10.180443285528032, - "grad_norm": 1.4284471273422241, - "learning_rate": 8.08894472361809e-05, - "loss": 5.4045, - "step": 19521 - }, - { - "epoch": 10.180964797913951, - "grad_norm": 1.4807220697402954, - "learning_rate": 8.088844221105528e-05, - "loss": 5.1943, - "step": 19522 - }, - { - "epoch": 10.181486310299869, - "grad_norm": 1.4431860446929932, - "learning_rate": 8.088743718592964e-05, - "loss": 4.7862, - "step": 19523 - }, - { - "epoch": 10.182007822685788, - "grad_norm": 1.328526496887207, - "learning_rate": 8.088643216080402e-05, - "loss": 5.6054, - "step": 19524 - }, - { - "epoch": 10.182529335071708, - "grad_norm": 1.472015380859375, - "learning_rate": 8.08854271356784e-05, - "loss": 5.3308, - "step": 19525 - }, - { - "epoch": 10.183050847457627, - "grad_norm": 1.3769311904907227, - "learning_rate": 8.088442211055278e-05, - "loss": 5.9147, - "step": 19526 - }, - { - "epoch": 10.183572359843547, - "grad_norm": 1.3648805618286133, - "learning_rate": 8.088341708542714e-05, - "loss": 5.5299, - "step": 19527 - }, - { - "epoch": 10.184093872229466, - "grad_norm": 1.3849849700927734, - "learning_rate": 8.088241206030152e-05, - "loss": 5.5687, - "step": 19528 - }, - { - "epoch": 10.184615384615384, - "grad_norm": 1.339496374130249, - "learning_rate": 8.088140703517588e-05, - "loss": 5.8218, - "step": 19529 - }, - { - "epoch": 10.185136897001303, - "grad_norm": 1.3970128297805786, - "learning_rate": 8.088040201005026e-05, - "loss": 5.3434, - "step": 19530 - }, - { - "epoch": 10.185658409387223, - "grad_norm": 1.6431965827941895, - "learning_rate": 8.087939698492462e-05, - "loss": 4.6975, - "step": 19531 - }, - { - "epoch": 10.186179921773142, - "grad_norm": 1.4530564546585083, - "learning_rate": 8.087839195979899e-05, - "loss": 5.6143, - "step": 19532 - }, - { - "epoch": 10.186701434159062, - "grad_norm": 1.364723563194275, - "learning_rate": 8.087738693467337e-05, - "loss": 5.9728, - "step": 19533 - }, - { - "epoch": 10.187222946544981, - "grad_norm": 1.397769570350647, - "learning_rate": 8.087638190954774e-05, - "loss": 5.2697, - "step": 19534 - }, - { - "epoch": 10.187744458930899, - "grad_norm": 1.472971796989441, - "learning_rate": 8.087537688442212e-05, - "loss": 5.5195, - "step": 19535 - }, - { - "epoch": 10.188265971316818, - "grad_norm": 1.5955216884613037, - "learning_rate": 8.087437185929649e-05, - "loss": 5.4901, - "step": 19536 - }, - { - "epoch": 10.188787483702738, - "grad_norm": 1.387121558189392, - "learning_rate": 8.087336683417086e-05, - "loss": 5.573, - "step": 19537 - }, - { - "epoch": 10.189308996088657, - "grad_norm": 1.525375247001648, - "learning_rate": 8.087236180904523e-05, - "loss": 5.535, - "step": 19538 - }, - { - "epoch": 10.189830508474577, - "grad_norm": 1.4134432077407837, - "learning_rate": 8.087135678391961e-05, - "loss": 5.5791, - "step": 19539 - }, - { - "epoch": 10.190352020860496, - "grad_norm": 1.3946542739868164, - "learning_rate": 8.087035175879397e-05, - "loss": 5.5472, - "step": 19540 - }, - { - "epoch": 10.190873533246414, - "grad_norm": 1.3503446578979492, - "learning_rate": 8.086934673366835e-05, - "loss": 5.6657, - "step": 19541 - }, - { - "epoch": 10.191395045632333, - "grad_norm": 1.617401361465454, - "learning_rate": 8.086834170854271e-05, - "loss": 5.303, - "step": 19542 - }, - { - "epoch": 10.191916558018253, - "grad_norm": 1.4470224380493164, - "learning_rate": 8.086733668341709e-05, - "loss": 5.5871, - "step": 19543 - }, - { - "epoch": 10.192438070404172, - "grad_norm": 1.5155029296875, - "learning_rate": 8.086633165829146e-05, - "loss": 5.3056, - "step": 19544 - }, - { - "epoch": 10.192959582790092, - "grad_norm": 1.3856877088546753, - "learning_rate": 8.086532663316583e-05, - "loss": 5.3653, - "step": 19545 - }, - { - "epoch": 10.193481095176011, - "grad_norm": 1.6283833980560303, - "learning_rate": 8.086432160804021e-05, - "loss": 5.465, - "step": 19546 - }, - { - "epoch": 10.194002607561929, - "grad_norm": 1.4039561748504639, - "learning_rate": 8.086331658291458e-05, - "loss": 5.3723, - "step": 19547 - }, - { - "epoch": 10.194524119947848, - "grad_norm": 1.4427791833877563, - "learning_rate": 8.086231155778895e-05, - "loss": 5.4508, - "step": 19548 - }, - { - "epoch": 10.195045632333768, - "grad_norm": 1.3689892292022705, - "learning_rate": 8.086130653266332e-05, - "loss": 5.1913, - "step": 19549 - }, - { - "epoch": 10.195567144719687, - "grad_norm": 1.553533673286438, - "learning_rate": 8.08603015075377e-05, - "loss": 5.1189, - "step": 19550 - }, - { - "epoch": 10.196088657105607, - "grad_norm": 1.5530108213424683, - "learning_rate": 8.085929648241206e-05, - "loss": 5.4885, - "step": 19551 - }, - { - "epoch": 10.196610169491525, - "grad_norm": 1.5213204622268677, - "learning_rate": 8.085829145728644e-05, - "loss": 5.1214, - "step": 19552 - }, - { - "epoch": 10.197131681877444, - "grad_norm": 1.4061203002929688, - "learning_rate": 8.08572864321608e-05, - "loss": 5.5267, - "step": 19553 - }, - { - "epoch": 10.197653194263363, - "grad_norm": 1.5255638360977173, - "learning_rate": 8.085628140703518e-05, - "loss": 5.5033, - "step": 19554 - }, - { - "epoch": 10.198174706649283, - "grad_norm": 1.5357197523117065, - "learning_rate": 8.085527638190956e-05, - "loss": 5.2114, - "step": 19555 - }, - { - "epoch": 10.198696219035202, - "grad_norm": 1.5390784740447998, - "learning_rate": 8.085427135678393e-05, - "loss": 5.2242, - "step": 19556 - }, - { - "epoch": 10.199217731421122, - "grad_norm": 1.3924392461776733, - "learning_rate": 8.08532663316583e-05, - "loss": 5.6394, - "step": 19557 - }, - { - "epoch": 10.19973924380704, - "grad_norm": 1.5759093761444092, - "learning_rate": 8.085226130653266e-05, - "loss": 4.9743, - "step": 19558 - }, - { - "epoch": 10.200260756192959, - "grad_norm": 1.426018476486206, - "learning_rate": 8.085125628140704e-05, - "loss": 5.377, - "step": 19559 - }, - { - "epoch": 10.200782268578878, - "grad_norm": 1.4727998971939087, - "learning_rate": 8.08502512562814e-05, - "loss": 5.7096, - "step": 19560 - }, - { - "epoch": 10.201303780964798, - "grad_norm": 1.4071377515792847, - "learning_rate": 8.084924623115578e-05, - "loss": 5.3999, - "step": 19561 - }, - { - "epoch": 10.201825293350717, - "grad_norm": 1.4726241827011108, - "learning_rate": 8.084824120603015e-05, - "loss": 5.36, - "step": 19562 - }, - { - "epoch": 10.202346805736637, - "grad_norm": 1.7984583377838135, - "learning_rate": 8.084723618090453e-05, - "loss": 5.3368, - "step": 19563 - }, - { - "epoch": 10.202868318122555, - "grad_norm": 1.4509174823760986, - "learning_rate": 8.084623115577889e-05, - "loss": 5.5806, - "step": 19564 - }, - { - "epoch": 10.203389830508474, - "grad_norm": 1.4595590829849243, - "learning_rate": 8.084522613065327e-05, - "loss": 5.1923, - "step": 19565 - }, - { - "epoch": 10.203911342894393, - "grad_norm": 1.4845354557037354, - "learning_rate": 8.084422110552765e-05, - "loss": 5.4969, - "step": 19566 - }, - { - "epoch": 10.204432855280313, - "grad_norm": 1.356342077255249, - "learning_rate": 8.084321608040202e-05, - "loss": 5.8975, - "step": 19567 - }, - { - "epoch": 10.204954367666232, - "grad_norm": 1.5728614330291748, - "learning_rate": 8.084221105527639e-05, - "loss": 5.5541, - "step": 19568 - }, - { - "epoch": 10.205475880052152, - "grad_norm": 1.5314064025878906, - "learning_rate": 8.084120603015077e-05, - "loss": 5.3693, - "step": 19569 - }, - { - "epoch": 10.20599739243807, - "grad_norm": 1.5257186889648438, - "learning_rate": 8.084020100502513e-05, - "loss": 5.6453, - "step": 19570 - }, - { - "epoch": 10.206518904823989, - "grad_norm": 1.4200880527496338, - "learning_rate": 8.083919597989951e-05, - "loss": 5.2554, - "step": 19571 - }, - { - "epoch": 10.207040417209909, - "grad_norm": 1.5496748685836792, - "learning_rate": 8.083819095477387e-05, - "loss": 5.5453, - "step": 19572 - }, - { - "epoch": 10.207561929595828, - "grad_norm": 1.4285515546798706, - "learning_rate": 8.083718592964824e-05, - "loss": 5.4685, - "step": 19573 - }, - { - "epoch": 10.208083441981747, - "grad_norm": 1.3518513441085815, - "learning_rate": 8.083618090452261e-05, - "loss": 5.6231, - "step": 19574 - }, - { - "epoch": 10.208604954367667, - "grad_norm": 1.4289746284484863, - "learning_rate": 8.083517587939698e-05, - "loss": 5.6491, - "step": 19575 - }, - { - "epoch": 10.209126466753585, - "grad_norm": 1.4411437511444092, - "learning_rate": 8.083417085427136e-05, - "loss": 5.303, - "step": 19576 - }, - { - "epoch": 10.209647979139504, - "grad_norm": 1.4915893077850342, - "learning_rate": 8.083316582914573e-05, - "loss": 5.5105, - "step": 19577 - }, - { - "epoch": 10.210169491525424, - "grad_norm": 1.4788860082626343, - "learning_rate": 8.083216080402011e-05, - "loss": 5.5654, - "step": 19578 - }, - { - "epoch": 10.210691003911343, - "grad_norm": 1.4436284303665161, - "learning_rate": 8.083115577889448e-05, - "loss": 5.8927, - "step": 19579 - }, - { - "epoch": 10.211212516297262, - "grad_norm": 1.4151679277420044, - "learning_rate": 8.083015075376885e-05, - "loss": 5.5406, - "step": 19580 - }, - { - "epoch": 10.211734028683182, - "grad_norm": 1.408040165901184, - "learning_rate": 8.082914572864322e-05, - "loss": 5.7614, - "step": 19581 - }, - { - "epoch": 10.2122555410691, - "grad_norm": 1.4444472789764404, - "learning_rate": 8.08281407035176e-05, - "loss": 5.5461, - "step": 19582 - }, - { - "epoch": 10.212777053455019, - "grad_norm": 1.4350438117980957, - "learning_rate": 8.082713567839196e-05, - "loss": 5.509, - "step": 19583 - }, - { - "epoch": 10.213298565840939, - "grad_norm": 1.4145528078079224, - "learning_rate": 8.082613065326634e-05, - "loss": 5.254, - "step": 19584 - }, - { - "epoch": 10.213820078226858, - "grad_norm": 1.481654167175293, - "learning_rate": 8.08251256281407e-05, - "loss": 5.8675, - "step": 19585 - }, - { - "epoch": 10.214341590612777, - "grad_norm": 1.5872541666030884, - "learning_rate": 8.082412060301508e-05, - "loss": 4.7835, - "step": 19586 - }, - { - "epoch": 10.214863102998697, - "grad_norm": 1.3086472749710083, - "learning_rate": 8.082311557788946e-05, - "loss": 5.5659, - "step": 19587 - }, - { - "epoch": 10.215384615384615, - "grad_norm": 1.4820972681045532, - "learning_rate": 8.082211055276382e-05, - "loss": 5.4076, - "step": 19588 - }, - { - "epoch": 10.215906127770534, - "grad_norm": 1.45146906375885, - "learning_rate": 8.08211055276382e-05, - "loss": 5.822, - "step": 19589 - }, - { - "epoch": 10.216427640156454, - "grad_norm": 1.4474022388458252, - "learning_rate": 8.082010050251256e-05, - "loss": 5.3611, - "step": 19590 - }, - { - "epoch": 10.216949152542373, - "grad_norm": 1.4980000257492065, - "learning_rate": 8.081909547738694e-05, - "loss": 5.281, - "step": 19591 - }, - { - "epoch": 10.217470664928292, - "grad_norm": 1.3466154336929321, - "learning_rate": 8.08180904522613e-05, - "loss": 5.7644, - "step": 19592 - }, - { - "epoch": 10.217992177314212, - "grad_norm": 1.531679630279541, - "learning_rate": 8.081708542713568e-05, - "loss": 5.5486, - "step": 19593 - }, - { - "epoch": 10.21851368970013, - "grad_norm": 1.5522480010986328, - "learning_rate": 8.081608040201005e-05, - "loss": 5.0496, - "step": 19594 - }, - { - "epoch": 10.219035202086049, - "grad_norm": 1.503346562385559, - "learning_rate": 8.081507537688443e-05, - "loss": 5.2293, - "step": 19595 - }, - { - "epoch": 10.219556714471969, - "grad_norm": 1.3459651470184326, - "learning_rate": 8.081407035175879e-05, - "loss": 5.4918, - "step": 19596 - }, - { - "epoch": 10.220078226857888, - "grad_norm": 1.4573163986206055, - "learning_rate": 8.081306532663317e-05, - "loss": 5.2729, - "step": 19597 - }, - { - "epoch": 10.220599739243807, - "grad_norm": 1.498792052268982, - "learning_rate": 8.081206030150755e-05, - "loss": 5.2739, - "step": 19598 - }, - { - "epoch": 10.221121251629727, - "grad_norm": 1.4183645248413086, - "learning_rate": 8.081105527638191e-05, - "loss": 5.1902, - "step": 19599 - }, - { - "epoch": 10.221642764015645, - "grad_norm": 1.5358985662460327, - "learning_rate": 8.081005025125629e-05, - "loss": 5.3685, - "step": 19600 - }, - { - "epoch": 10.222164276401564, - "grad_norm": 1.5211600065231323, - "learning_rate": 8.080904522613065e-05, - "loss": 4.8663, - "step": 19601 - }, - { - "epoch": 10.222685788787484, - "grad_norm": 1.4565904140472412, - "learning_rate": 8.080804020100503e-05, - "loss": 5.9083, - "step": 19602 - }, - { - "epoch": 10.223207301173403, - "grad_norm": 1.4751838445663452, - "learning_rate": 8.08070351758794e-05, - "loss": 5.2475, - "step": 19603 - }, - { - "epoch": 10.223728813559323, - "grad_norm": 1.4287371635437012, - "learning_rate": 8.080603015075377e-05, - "loss": 5.6182, - "step": 19604 - }, - { - "epoch": 10.224250325945242, - "grad_norm": 1.3804352283477783, - "learning_rate": 8.080502512562814e-05, - "loss": 5.5716, - "step": 19605 - }, - { - "epoch": 10.22477183833116, - "grad_norm": 1.7012817859649658, - "learning_rate": 8.080402010050251e-05, - "loss": 5.5798, - "step": 19606 - }, - { - "epoch": 10.22529335071708, - "grad_norm": 1.3865857124328613, - "learning_rate": 8.080301507537689e-05, - "loss": 5.8727, - "step": 19607 - }, - { - "epoch": 10.225814863102999, - "grad_norm": 1.4692447185516357, - "learning_rate": 8.080201005025127e-05, - "loss": 5.4642, - "step": 19608 - }, - { - "epoch": 10.226336375488918, - "grad_norm": 1.4985402822494507, - "learning_rate": 8.080100502512563e-05, - "loss": 5.3091, - "step": 19609 - }, - { - "epoch": 10.226857887874838, - "grad_norm": 1.4226957559585571, - "learning_rate": 8.080000000000001e-05, - "loss": 5.4742, - "step": 19610 - }, - { - "epoch": 10.227379400260757, - "grad_norm": 1.4808582067489624, - "learning_rate": 8.079899497487438e-05, - "loss": 5.7134, - "step": 19611 - }, - { - "epoch": 10.227900912646675, - "grad_norm": 1.5480968952178955, - "learning_rate": 8.079798994974874e-05, - "loss": 5.4516, - "step": 19612 - }, - { - "epoch": 10.228422425032594, - "grad_norm": 1.4227374792099, - "learning_rate": 8.079698492462312e-05, - "loss": 5.5684, - "step": 19613 - }, - { - "epoch": 10.228943937418514, - "grad_norm": 1.4119012355804443, - "learning_rate": 8.079597989949748e-05, - "loss": 5.222, - "step": 19614 - }, - { - "epoch": 10.229465449804433, - "grad_norm": 1.2883234024047852, - "learning_rate": 8.079497487437186e-05, - "loss": 5.7549, - "step": 19615 - }, - { - "epoch": 10.229986962190353, - "grad_norm": 1.3884540796279907, - "learning_rate": 8.079396984924623e-05, - "loss": 5.5718, - "step": 19616 - }, - { - "epoch": 10.230508474576272, - "grad_norm": 1.4120254516601562, - "learning_rate": 8.07929648241206e-05, - "loss": 5.2385, - "step": 19617 - }, - { - "epoch": 10.23102998696219, - "grad_norm": 1.3745555877685547, - "learning_rate": 8.079195979899498e-05, - "loss": 5.4634, - "step": 19618 - }, - { - "epoch": 10.23155149934811, - "grad_norm": 1.2764778137207031, - "learning_rate": 8.079095477386936e-05, - "loss": 5.1321, - "step": 19619 - }, - { - "epoch": 10.232073011734029, - "grad_norm": 1.6837291717529297, - "learning_rate": 8.078994974874372e-05, - "loss": 5.1021, - "step": 19620 - }, - { - "epoch": 10.232594524119948, - "grad_norm": 1.3928585052490234, - "learning_rate": 8.07889447236181e-05, - "loss": 5.28, - "step": 19621 - }, - { - "epoch": 10.233116036505868, - "grad_norm": 1.4383965730667114, - "learning_rate": 8.078793969849247e-05, - "loss": 5.3772, - "step": 19622 - }, - { - "epoch": 10.233637548891787, - "grad_norm": 1.4495186805725098, - "learning_rate": 8.078693467336684e-05, - "loss": 5.4689, - "step": 19623 - }, - { - "epoch": 10.234159061277705, - "grad_norm": 1.293430209159851, - "learning_rate": 8.078592964824121e-05, - "loss": 5.4266, - "step": 19624 - }, - { - "epoch": 10.234680573663624, - "grad_norm": 1.3780547380447388, - "learning_rate": 8.078492462311557e-05, - "loss": 5.5844, - "step": 19625 - }, - { - "epoch": 10.235202086049544, - "grad_norm": 1.3688702583312988, - "learning_rate": 8.078391959798995e-05, - "loss": 5.9246, - "step": 19626 - }, - { - "epoch": 10.235723598435463, - "grad_norm": 1.4142534732818604, - "learning_rate": 8.078291457286433e-05, - "loss": 5.5482, - "step": 19627 - }, - { - "epoch": 10.236245110821383, - "grad_norm": 1.346163034439087, - "learning_rate": 8.07819095477387e-05, - "loss": 5.816, - "step": 19628 - }, - { - "epoch": 10.236766623207302, - "grad_norm": 1.3786574602127075, - "learning_rate": 8.078090452261307e-05, - "loss": 5.3948, - "step": 19629 - }, - { - "epoch": 10.23728813559322, - "grad_norm": 1.3628560304641724, - "learning_rate": 8.077989949748745e-05, - "loss": 5.6967, - "step": 19630 - }, - { - "epoch": 10.23780964797914, - "grad_norm": 1.3814842700958252, - "learning_rate": 8.077889447236181e-05, - "loss": 5.7126, - "step": 19631 - }, - { - "epoch": 10.238331160365059, - "grad_norm": 1.5733493566513062, - "learning_rate": 8.077788944723619e-05, - "loss": 4.9158, - "step": 19632 - }, - { - "epoch": 10.238852672750978, - "grad_norm": 1.5785692930221558, - "learning_rate": 8.077688442211055e-05, - "loss": 5.4759, - "step": 19633 - }, - { - "epoch": 10.239374185136898, - "grad_norm": 1.3717598915100098, - "learning_rate": 8.077587939698493e-05, - "loss": 5.5135, - "step": 19634 - }, - { - "epoch": 10.239895697522817, - "grad_norm": 1.4811967611312866, - "learning_rate": 8.07748743718593e-05, - "loss": 5.6997, - "step": 19635 - }, - { - "epoch": 10.240417209908735, - "grad_norm": 1.4611668586730957, - "learning_rate": 8.077386934673367e-05, - "loss": 5.7389, - "step": 19636 - }, - { - "epoch": 10.240938722294654, - "grad_norm": 1.3842359781265259, - "learning_rate": 8.077286432160804e-05, - "loss": 5.4109, - "step": 19637 - }, - { - "epoch": 10.241460234680574, - "grad_norm": 1.5225578546524048, - "learning_rate": 8.077185929648242e-05, - "loss": 5.3212, - "step": 19638 - }, - { - "epoch": 10.241981747066493, - "grad_norm": 1.5818392038345337, - "learning_rate": 8.07708542713568e-05, - "loss": 5.1879, - "step": 19639 - }, - { - "epoch": 10.242503259452413, - "grad_norm": 1.403860092163086, - "learning_rate": 8.076984924623116e-05, - "loss": 5.3438, - "step": 19640 - }, - { - "epoch": 10.243024771838332, - "grad_norm": 1.3938579559326172, - "learning_rate": 8.076884422110554e-05, - "loss": 5.7699, - "step": 19641 - }, - { - "epoch": 10.24354628422425, - "grad_norm": 1.543271780014038, - "learning_rate": 8.07678391959799e-05, - "loss": 4.9279, - "step": 19642 - }, - { - "epoch": 10.24406779661017, - "grad_norm": 1.581017017364502, - "learning_rate": 8.076683417085428e-05, - "loss": 5.259, - "step": 19643 - }, - { - "epoch": 10.244589308996089, - "grad_norm": 1.5491999387741089, - "learning_rate": 8.076582914572864e-05, - "loss": 5.1597, - "step": 19644 - }, - { - "epoch": 10.245110821382008, - "grad_norm": 1.6269747018814087, - "learning_rate": 8.076482412060302e-05, - "loss": 5.7479, - "step": 19645 - }, - { - "epoch": 10.245632333767928, - "grad_norm": 1.579291820526123, - "learning_rate": 8.076381909547738e-05, - "loss": 5.2597, - "step": 19646 - }, - { - "epoch": 10.246153846153845, - "grad_norm": 1.5358558893203735, - "learning_rate": 8.076281407035176e-05, - "loss": 5.698, - "step": 19647 - }, - { - "epoch": 10.246675358539765, - "grad_norm": 1.5543749332427979, - "learning_rate": 8.076180904522614e-05, - "loss": 5.4434, - "step": 19648 - }, - { - "epoch": 10.247196870925684, - "grad_norm": 1.3405026197433472, - "learning_rate": 8.076080402010052e-05, - "loss": 5.6659, - "step": 19649 - }, - { - "epoch": 10.247718383311604, - "grad_norm": 1.593376874923706, - "learning_rate": 8.075979899497488e-05, - "loss": 5.5829, - "step": 19650 - }, - { - "epoch": 10.248239895697523, - "grad_norm": 1.425152063369751, - "learning_rate": 8.075879396984925e-05, - "loss": 5.5907, - "step": 19651 - }, - { - "epoch": 10.248761408083443, - "grad_norm": 1.331728219985962, - "learning_rate": 8.075778894472362e-05, - "loss": 5.167, - "step": 19652 - }, - { - "epoch": 10.24928292046936, - "grad_norm": 1.5044746398925781, - "learning_rate": 8.075678391959799e-05, - "loss": 5.6241, - "step": 19653 - }, - { - "epoch": 10.24980443285528, - "grad_norm": 1.4949020147323608, - "learning_rate": 8.075577889447237e-05, - "loss": 5.5695, - "step": 19654 - }, - { - "epoch": 10.2503259452412, - "grad_norm": 1.456380844116211, - "learning_rate": 8.075477386934673e-05, - "loss": 5.7567, - "step": 19655 - }, - { - "epoch": 10.250847457627119, - "grad_norm": 1.4906812906265259, - "learning_rate": 8.075376884422111e-05, - "loss": 5.7877, - "step": 19656 - }, - { - "epoch": 10.251368970013038, - "grad_norm": 1.444018840789795, - "learning_rate": 8.075276381909547e-05, - "loss": 5.0632, - "step": 19657 - }, - { - "epoch": 10.251890482398958, - "grad_norm": 1.4124714136123657, - "learning_rate": 8.075175879396985e-05, - "loss": 5.1224, - "step": 19658 - }, - { - "epoch": 10.252411994784875, - "grad_norm": 1.5122853517532349, - "learning_rate": 8.075075376884423e-05, - "loss": 5.0807, - "step": 19659 - }, - { - "epoch": 10.252933507170795, - "grad_norm": 1.5395748615264893, - "learning_rate": 8.07497487437186e-05, - "loss": 5.3744, - "step": 19660 - }, - { - "epoch": 10.253455019556714, - "grad_norm": 1.4178379774093628, - "learning_rate": 8.074874371859297e-05, - "loss": 5.6678, - "step": 19661 - }, - { - "epoch": 10.253976531942634, - "grad_norm": 1.5267361402511597, - "learning_rate": 8.074773869346735e-05, - "loss": 5.2803, - "step": 19662 - }, - { - "epoch": 10.254498044328553, - "grad_norm": 1.484771966934204, - "learning_rate": 8.074673366834171e-05, - "loss": 5.4648, - "step": 19663 - }, - { - "epoch": 10.255019556714473, - "grad_norm": 1.386078119277954, - "learning_rate": 8.074572864321609e-05, - "loss": 5.4271, - "step": 19664 - }, - { - "epoch": 10.25554106910039, - "grad_norm": 1.5352014303207397, - "learning_rate": 8.074472361809045e-05, - "loss": 5.249, - "step": 19665 - }, - { - "epoch": 10.25606258148631, - "grad_norm": 1.4068042039871216, - "learning_rate": 8.074371859296482e-05, - "loss": 4.8913, - "step": 19666 - }, - { - "epoch": 10.25658409387223, - "grad_norm": 1.3765565156936646, - "learning_rate": 8.07427135678392e-05, - "loss": 5.7824, - "step": 19667 - }, - { - "epoch": 10.257105606258149, - "grad_norm": 1.435583233833313, - "learning_rate": 8.074170854271357e-05, - "loss": 5.6348, - "step": 19668 - }, - { - "epoch": 10.257627118644068, - "grad_norm": 1.4217019081115723, - "learning_rate": 8.074070351758795e-05, - "loss": 5.8315, - "step": 19669 - }, - { - "epoch": 10.258148631029988, - "grad_norm": 1.4041348695755005, - "learning_rate": 8.073969849246232e-05, - "loss": 5.1795, - "step": 19670 - }, - { - "epoch": 10.258670143415905, - "grad_norm": 1.4029031991958618, - "learning_rate": 8.07386934673367e-05, - "loss": 5.3346, - "step": 19671 - }, - { - "epoch": 10.259191655801825, - "grad_norm": 1.457323431968689, - "learning_rate": 8.073768844221106e-05, - "loss": 5.5564, - "step": 19672 - }, - { - "epoch": 10.259713168187744, - "grad_norm": 1.3805011510849, - "learning_rate": 8.073668341708544e-05, - "loss": 5.6959, - "step": 19673 - }, - { - "epoch": 10.260234680573664, - "grad_norm": 1.586300253868103, - "learning_rate": 8.07356783919598e-05, - "loss": 5.1742, - "step": 19674 - }, - { - "epoch": 10.260756192959583, - "grad_norm": 1.426257610321045, - "learning_rate": 8.073467336683418e-05, - "loss": 5.4104, - "step": 19675 - }, - { - "epoch": 10.261277705345503, - "grad_norm": 1.5237494707107544, - "learning_rate": 8.073366834170854e-05, - "loss": 4.9027, - "step": 19676 - }, - { - "epoch": 10.26179921773142, - "grad_norm": 1.5269652605056763, - "learning_rate": 8.073266331658292e-05, - "loss": 5.621, - "step": 19677 - }, - { - "epoch": 10.26232073011734, - "grad_norm": 1.3534530401229858, - "learning_rate": 8.073165829145728e-05, - "loss": 5.7794, - "step": 19678 - }, - { - "epoch": 10.26284224250326, - "grad_norm": 1.4539451599121094, - "learning_rate": 8.073065326633166e-05, - "loss": 5.3409, - "step": 19679 - }, - { - "epoch": 10.263363754889179, - "grad_norm": 1.4593442678451538, - "learning_rate": 8.072964824120604e-05, - "loss": 5.7777, - "step": 19680 - }, - { - "epoch": 10.263885267275098, - "grad_norm": 1.5943520069122314, - "learning_rate": 8.07286432160804e-05, - "loss": 5.5489, - "step": 19681 - }, - { - "epoch": 10.264406779661018, - "grad_norm": 1.513055443763733, - "learning_rate": 8.072763819095478e-05, - "loss": 5.1005, - "step": 19682 - }, - { - "epoch": 10.264928292046935, - "grad_norm": 1.454781174659729, - "learning_rate": 8.072663316582915e-05, - "loss": 5.7266, - "step": 19683 - }, - { - "epoch": 10.265449804432855, - "grad_norm": 1.4382703304290771, - "learning_rate": 8.072562814070352e-05, - "loss": 5.5601, - "step": 19684 - }, - { - "epoch": 10.265971316818774, - "grad_norm": 1.3640079498291016, - "learning_rate": 8.072462311557789e-05, - "loss": 5.1543, - "step": 19685 - }, - { - "epoch": 10.266492829204694, - "grad_norm": 1.3830897808074951, - "learning_rate": 8.072361809045227e-05, - "loss": 5.7764, - "step": 19686 - }, - { - "epoch": 10.267014341590613, - "grad_norm": 1.5197783708572388, - "learning_rate": 8.072261306532663e-05, - "loss": 5.1717, - "step": 19687 - }, - { - "epoch": 10.267535853976533, - "grad_norm": 1.3675800561904907, - "learning_rate": 8.072160804020101e-05, - "loss": 5.6146, - "step": 19688 - }, - { - "epoch": 10.26805736636245, - "grad_norm": 1.3948725461959839, - "learning_rate": 8.072060301507539e-05, - "loss": 5.4829, - "step": 19689 - }, - { - "epoch": 10.26857887874837, - "grad_norm": 1.5496026277542114, - "learning_rate": 8.071959798994976e-05, - "loss": 5.4633, - "step": 19690 - }, - { - "epoch": 10.26910039113429, - "grad_norm": 1.3389567136764526, - "learning_rate": 8.071859296482413e-05, - "loss": 5.7157, - "step": 19691 - }, - { - "epoch": 10.269621903520209, - "grad_norm": 1.4519349336624146, - "learning_rate": 8.071758793969849e-05, - "loss": 5.744, - "step": 19692 - }, - { - "epoch": 10.270143415906128, - "grad_norm": 1.413030982017517, - "learning_rate": 8.071658291457287e-05, - "loss": 5.866, - "step": 19693 - }, - { - "epoch": 10.270664928292048, - "grad_norm": 1.3603932857513428, - "learning_rate": 8.071557788944723e-05, - "loss": 5.4254, - "step": 19694 - }, - { - "epoch": 10.271186440677965, - "grad_norm": 1.5265876054763794, - "learning_rate": 8.071457286432161e-05, - "loss": 5.2407, - "step": 19695 - }, - { - "epoch": 10.271707953063885, - "grad_norm": 1.535760760307312, - "learning_rate": 8.071356783919598e-05, - "loss": 5.5262, - "step": 19696 - }, - { - "epoch": 10.272229465449804, - "grad_norm": 1.4889934062957764, - "learning_rate": 8.071256281407035e-05, - "loss": 5.5721, - "step": 19697 - }, - { - "epoch": 10.272750977835724, - "grad_norm": 1.3927905559539795, - "learning_rate": 8.071155778894472e-05, - "loss": 5.624, - "step": 19698 - }, - { - "epoch": 10.273272490221643, - "grad_norm": 1.4411745071411133, - "learning_rate": 8.07105527638191e-05, - "loss": 5.1418, - "step": 19699 - }, - { - "epoch": 10.273794002607563, - "grad_norm": 1.4094096422195435, - "learning_rate": 8.070954773869347e-05, - "loss": 5.6996, - "step": 19700 - }, - { - "epoch": 10.27431551499348, - "grad_norm": 1.331878423690796, - "learning_rate": 8.070854271356785e-05, - "loss": 5.4242, - "step": 19701 - }, - { - "epoch": 10.2748370273794, - "grad_norm": 1.5839717388153076, - "learning_rate": 8.070753768844222e-05, - "loss": 4.9366, - "step": 19702 - }, - { - "epoch": 10.27535853976532, - "grad_norm": 1.5349148511886597, - "learning_rate": 8.07065326633166e-05, - "loss": 5.1897, - "step": 19703 - }, - { - "epoch": 10.275880052151239, - "grad_norm": 1.4989714622497559, - "learning_rate": 8.070552763819096e-05, - "loss": 5.6447, - "step": 19704 - }, - { - "epoch": 10.276401564537158, - "grad_norm": 1.45926034450531, - "learning_rate": 8.070452261306532e-05, - "loss": 5.0081, - "step": 19705 - }, - { - "epoch": 10.276923076923078, - "grad_norm": 1.3755757808685303, - "learning_rate": 8.07035175879397e-05, - "loss": 5.262, - "step": 19706 - }, - { - "epoch": 10.277444589308995, - "grad_norm": 1.4238673448562622, - "learning_rate": 8.070251256281407e-05, - "loss": 5.7853, - "step": 19707 - }, - { - "epoch": 10.277966101694915, - "grad_norm": 1.5322812795639038, - "learning_rate": 8.070150753768844e-05, - "loss": 5.0723, - "step": 19708 - }, - { - "epoch": 10.278487614080834, - "grad_norm": 1.4861791133880615, - "learning_rate": 8.070050251256282e-05, - "loss": 5.3556, - "step": 19709 - }, - { - "epoch": 10.279009126466754, - "grad_norm": 1.3552556037902832, - "learning_rate": 8.06994974874372e-05, - "loss": 5.6164, - "step": 19710 - }, - { - "epoch": 10.279530638852673, - "grad_norm": 1.383139967918396, - "learning_rate": 8.069849246231156e-05, - "loss": 5.4888, - "step": 19711 - }, - { - "epoch": 10.280052151238593, - "grad_norm": 1.3704948425292969, - "learning_rate": 8.069748743718594e-05, - "loss": 5.7088, - "step": 19712 - }, - { - "epoch": 10.28057366362451, - "grad_norm": 1.482918381690979, - "learning_rate": 8.06964824120603e-05, - "loss": 5.237, - "step": 19713 - }, - { - "epoch": 10.28109517601043, - "grad_norm": 1.3751720190048218, - "learning_rate": 8.069547738693468e-05, - "loss": 5.6332, - "step": 19714 - }, - { - "epoch": 10.28161668839635, - "grad_norm": 1.4367578029632568, - "learning_rate": 8.069447236180905e-05, - "loss": 5.5835, - "step": 19715 - }, - { - "epoch": 10.282138200782269, - "grad_norm": 1.4691919088363647, - "learning_rate": 8.069346733668343e-05, - "loss": 5.3707, - "step": 19716 - }, - { - "epoch": 10.282659713168188, - "grad_norm": 1.379629373550415, - "learning_rate": 8.069246231155779e-05, - "loss": 5.4364, - "step": 19717 - }, - { - "epoch": 10.283181225554108, - "grad_norm": 1.3491318225860596, - "learning_rate": 8.069145728643215e-05, - "loss": 5.564, - "step": 19718 - }, - { - "epoch": 10.283702737940025, - "grad_norm": 1.4381773471832275, - "learning_rate": 8.069045226130653e-05, - "loss": 5.5097, - "step": 19719 - }, - { - "epoch": 10.284224250325945, - "grad_norm": 1.3275089263916016, - "learning_rate": 8.068944723618091e-05, - "loss": 5.864, - "step": 19720 - }, - { - "epoch": 10.284745762711864, - "grad_norm": 1.4705091714859009, - "learning_rate": 8.068844221105529e-05, - "loss": 5.7192, - "step": 19721 - }, - { - "epoch": 10.285267275097784, - "grad_norm": 1.4034209251403809, - "learning_rate": 8.068743718592965e-05, - "loss": 5.4819, - "step": 19722 - }, - { - "epoch": 10.285788787483703, - "grad_norm": 1.5316036939620972, - "learning_rate": 8.068643216080403e-05, - "loss": 5.3658, - "step": 19723 - }, - { - "epoch": 10.286310299869623, - "grad_norm": 1.272879958152771, - "learning_rate": 8.06854271356784e-05, - "loss": 5.6798, - "step": 19724 - }, - { - "epoch": 10.28683181225554, - "grad_norm": 1.457205891609192, - "learning_rate": 8.068442211055277e-05, - "loss": 5.424, - "step": 19725 - }, - { - "epoch": 10.28735332464146, - "grad_norm": 1.5827877521514893, - "learning_rate": 8.068341708542714e-05, - "loss": 5.038, - "step": 19726 - }, - { - "epoch": 10.28787483702738, - "grad_norm": 1.5501630306243896, - "learning_rate": 8.068241206030151e-05, - "loss": 5.0506, - "step": 19727 - }, - { - "epoch": 10.288396349413299, - "grad_norm": 1.5606615543365479, - "learning_rate": 8.068140703517588e-05, - "loss": 5.526, - "step": 19728 - }, - { - "epoch": 10.288917861799218, - "grad_norm": 1.5909795761108398, - "learning_rate": 8.068040201005026e-05, - "loss": 5.444, - "step": 19729 - }, - { - "epoch": 10.289439374185136, - "grad_norm": 1.5725868940353394, - "learning_rate": 8.067939698492462e-05, - "loss": 5.1615, - "step": 19730 - }, - { - "epoch": 10.289960886571055, - "grad_norm": 1.5448347330093384, - "learning_rate": 8.0678391959799e-05, - "loss": 5.5534, - "step": 19731 - }, - { - "epoch": 10.290482398956975, - "grad_norm": 1.4106744527816772, - "learning_rate": 8.067738693467338e-05, - "loss": 5.2316, - "step": 19732 - }, - { - "epoch": 10.291003911342894, - "grad_norm": 1.3785110712051392, - "learning_rate": 8.067638190954774e-05, - "loss": 5.3182, - "step": 19733 - }, - { - "epoch": 10.291525423728814, - "grad_norm": 1.3887274265289307, - "learning_rate": 8.067537688442212e-05, - "loss": 5.7057, - "step": 19734 - }, - { - "epoch": 10.292046936114733, - "grad_norm": 1.396034598350525, - "learning_rate": 8.067437185929648e-05, - "loss": 5.6827, - "step": 19735 - }, - { - "epoch": 10.292568448500653, - "grad_norm": 1.5648188591003418, - "learning_rate": 8.067336683417086e-05, - "loss": 5.3744, - "step": 19736 - }, - { - "epoch": 10.29308996088657, - "grad_norm": 1.3619029521942139, - "learning_rate": 8.067236180904522e-05, - "loss": 5.7955, - "step": 19737 - }, - { - "epoch": 10.29361147327249, - "grad_norm": 1.4577748775482178, - "learning_rate": 8.06713567839196e-05, - "loss": 5.452, - "step": 19738 - }, - { - "epoch": 10.29413298565841, - "grad_norm": 1.3790501356124878, - "learning_rate": 8.067035175879397e-05, - "loss": 5.284, - "step": 19739 - }, - { - "epoch": 10.294654498044329, - "grad_norm": 1.47101891040802, - "learning_rate": 8.066934673366834e-05, - "loss": 5.5862, - "step": 19740 - }, - { - "epoch": 10.295176010430248, - "grad_norm": 1.4468876123428345, - "learning_rate": 8.066834170854272e-05, - "loss": 5.2988, - "step": 19741 - }, - { - "epoch": 10.295697522816166, - "grad_norm": 1.4375320672988892, - "learning_rate": 8.06673366834171e-05, - "loss": 5.6101, - "step": 19742 - }, - { - "epoch": 10.296219035202085, - "grad_norm": 1.4924684762954712, - "learning_rate": 8.066633165829146e-05, - "loss": 5.1953, - "step": 19743 - }, - { - "epoch": 10.296740547588005, - "grad_norm": 1.500489592552185, - "learning_rate": 8.066532663316583e-05, - "loss": 5.477, - "step": 19744 - }, - { - "epoch": 10.297262059973924, - "grad_norm": 1.3847743272781372, - "learning_rate": 8.06643216080402e-05, - "loss": 5.4902, - "step": 19745 - }, - { - "epoch": 10.297783572359844, - "grad_norm": 1.635524034500122, - "learning_rate": 8.066331658291457e-05, - "loss": 5.2532, - "step": 19746 - }, - { - "epoch": 10.298305084745763, - "grad_norm": 1.353298306465149, - "learning_rate": 8.066231155778895e-05, - "loss": 5.7615, - "step": 19747 - }, - { - "epoch": 10.298826597131681, - "grad_norm": 1.3988207578659058, - "learning_rate": 8.066130653266331e-05, - "loss": 5.6587, - "step": 19748 - }, - { - "epoch": 10.2993481095176, - "grad_norm": 1.42936110496521, - "learning_rate": 8.066030150753769e-05, - "loss": 5.5101, - "step": 19749 - }, - { - "epoch": 10.29986962190352, - "grad_norm": 1.389043927192688, - "learning_rate": 8.065929648241205e-05, - "loss": 5.2493, - "step": 19750 - }, - { - "epoch": 10.30039113428944, - "grad_norm": 1.5476787090301514, - "learning_rate": 8.065829145728643e-05, - "loss": 4.9675, - "step": 19751 - }, - { - "epoch": 10.300912646675359, - "grad_norm": 1.5318892002105713, - "learning_rate": 8.065728643216081e-05, - "loss": 5.127, - "step": 19752 - }, - { - "epoch": 10.301434159061278, - "grad_norm": 1.492361307144165, - "learning_rate": 8.065628140703519e-05, - "loss": 5.099, - "step": 19753 - }, - { - "epoch": 10.301955671447196, - "grad_norm": 1.3157211542129517, - "learning_rate": 8.065527638190955e-05, - "loss": 5.7055, - "step": 19754 - }, - { - "epoch": 10.302477183833116, - "grad_norm": 1.41178560256958, - "learning_rate": 8.065427135678393e-05, - "loss": 5.7273, - "step": 19755 - }, - { - "epoch": 10.302998696219035, - "grad_norm": 1.404778003692627, - "learning_rate": 8.06532663316583e-05, - "loss": 5.7915, - "step": 19756 - }, - { - "epoch": 10.303520208604954, - "grad_norm": 1.3424372673034668, - "learning_rate": 8.065226130653267e-05, - "loss": 5.8097, - "step": 19757 - }, - { - "epoch": 10.304041720990874, - "grad_norm": 1.3517769575119019, - "learning_rate": 8.065125628140704e-05, - "loss": 5.9101, - "step": 19758 - }, - { - "epoch": 10.304563233376793, - "grad_norm": 1.4897257089614868, - "learning_rate": 8.06502512562814e-05, - "loss": 5.5216, - "step": 19759 - }, - { - "epoch": 10.305084745762711, - "grad_norm": 1.4176007509231567, - "learning_rate": 8.064924623115578e-05, - "loss": 5.2794, - "step": 19760 - }, - { - "epoch": 10.30560625814863, - "grad_norm": 1.3954837322235107, - "learning_rate": 8.064824120603016e-05, - "loss": 5.6428, - "step": 19761 - }, - { - "epoch": 10.30612777053455, - "grad_norm": 1.5730704069137573, - "learning_rate": 8.064723618090453e-05, - "loss": 5.3944, - "step": 19762 - }, - { - "epoch": 10.30664928292047, - "grad_norm": 1.5818344354629517, - "learning_rate": 8.06462311557789e-05, - "loss": 5.1065, - "step": 19763 - }, - { - "epoch": 10.307170795306389, - "grad_norm": 1.5362111330032349, - "learning_rate": 8.064522613065328e-05, - "loss": 5.6834, - "step": 19764 - }, - { - "epoch": 10.307692307692308, - "grad_norm": 1.4292616844177246, - "learning_rate": 8.064422110552764e-05, - "loss": 5.3976, - "step": 19765 - }, - { - "epoch": 10.308213820078226, - "grad_norm": 1.4488484859466553, - "learning_rate": 8.064321608040202e-05, - "loss": 5.5285, - "step": 19766 - }, - { - "epoch": 10.308735332464146, - "grad_norm": 1.3237862586975098, - "learning_rate": 8.064221105527638e-05, - "loss": 5.5267, - "step": 19767 - }, - { - "epoch": 10.309256844850065, - "grad_norm": 1.552161693572998, - "learning_rate": 8.064120603015076e-05, - "loss": 5.1688, - "step": 19768 - }, - { - "epoch": 10.309778357235984, - "grad_norm": 1.4585472345352173, - "learning_rate": 8.064020100502512e-05, - "loss": 5.6491, - "step": 19769 - }, - { - "epoch": 10.310299869621904, - "grad_norm": 1.4584161043167114, - "learning_rate": 8.06391959798995e-05, - "loss": 5.373, - "step": 19770 - }, - { - "epoch": 10.310821382007823, - "grad_norm": 1.5045256614685059, - "learning_rate": 8.063819095477387e-05, - "loss": 5.3348, - "step": 19771 - }, - { - "epoch": 10.311342894393741, - "grad_norm": 1.413736343383789, - "learning_rate": 8.063718592964824e-05, - "loss": 5.6683, - "step": 19772 - }, - { - "epoch": 10.31186440677966, - "grad_norm": 1.437482476234436, - "learning_rate": 8.063618090452262e-05, - "loss": 5.4111, - "step": 19773 - }, - { - "epoch": 10.31238591916558, - "grad_norm": 1.4075196981430054, - "learning_rate": 8.063517587939699e-05, - "loss": 5.7813, - "step": 19774 - }, - { - "epoch": 10.3129074315515, - "grad_norm": 1.5158138275146484, - "learning_rate": 8.063417085427136e-05, - "loss": 4.9191, - "step": 19775 - }, - { - "epoch": 10.313428943937419, - "grad_norm": 1.4858710765838623, - "learning_rate": 8.063316582914573e-05, - "loss": 5.2243, - "step": 19776 - }, - { - "epoch": 10.313950456323338, - "grad_norm": 1.4248180389404297, - "learning_rate": 8.063216080402011e-05, - "loss": 5.3247, - "step": 19777 - }, - { - "epoch": 10.314471968709256, - "grad_norm": 1.5085856914520264, - "learning_rate": 8.063115577889447e-05, - "loss": 5.5014, - "step": 19778 - }, - { - "epoch": 10.314993481095176, - "grad_norm": 1.4496021270751953, - "learning_rate": 8.063015075376885e-05, - "loss": 5.6399, - "step": 19779 - }, - { - "epoch": 10.315514993481095, - "grad_norm": 1.4402644634246826, - "learning_rate": 8.062914572864321e-05, - "loss": 5.4492, - "step": 19780 - }, - { - "epoch": 10.316036505867014, - "grad_norm": 1.4391881227493286, - "learning_rate": 8.062814070351759e-05, - "loss": 5.8239, - "step": 19781 - }, - { - "epoch": 10.316558018252934, - "grad_norm": 1.4308634996414185, - "learning_rate": 8.062713567839197e-05, - "loss": 5.5093, - "step": 19782 - }, - { - "epoch": 10.317079530638853, - "grad_norm": 1.3870587348937988, - "learning_rate": 8.062613065326635e-05, - "loss": 5.8527, - "step": 19783 - }, - { - "epoch": 10.317601043024771, - "grad_norm": 1.5040020942687988, - "learning_rate": 8.062512562814071e-05, - "loss": 5.495, - "step": 19784 - }, - { - "epoch": 10.31812255541069, - "grad_norm": 1.3588260412216187, - "learning_rate": 8.062412060301508e-05, - "loss": 5.4612, - "step": 19785 - }, - { - "epoch": 10.31864406779661, - "grad_norm": 1.3641928434371948, - "learning_rate": 8.062311557788945e-05, - "loss": 5.5744, - "step": 19786 - }, - { - "epoch": 10.31916558018253, - "grad_norm": 1.4436606168746948, - "learning_rate": 8.062211055276382e-05, - "loss": 5.0211, - "step": 19787 - }, - { - "epoch": 10.319687092568449, - "grad_norm": 1.4276338815689087, - "learning_rate": 8.06211055276382e-05, - "loss": 5.6513, - "step": 19788 - }, - { - "epoch": 10.320208604954368, - "grad_norm": 1.4876201152801514, - "learning_rate": 8.062010050251256e-05, - "loss": 5.6305, - "step": 19789 - }, - { - "epoch": 10.320730117340286, - "grad_norm": 1.5833113193511963, - "learning_rate": 8.061909547738694e-05, - "loss": 4.6314, - "step": 19790 - }, - { - "epoch": 10.321251629726206, - "grad_norm": 1.4114474058151245, - "learning_rate": 8.06180904522613e-05, - "loss": 5.5418, - "step": 19791 - }, - { - "epoch": 10.321773142112125, - "grad_norm": 1.4560524225234985, - "learning_rate": 8.061708542713568e-05, - "loss": 5.241, - "step": 19792 - }, - { - "epoch": 10.322294654498045, - "grad_norm": 1.444079875946045, - "learning_rate": 8.061608040201006e-05, - "loss": 5.5885, - "step": 19793 - }, - { - "epoch": 10.322816166883964, - "grad_norm": 1.3734147548675537, - "learning_rate": 8.061507537688444e-05, - "loss": 5.5322, - "step": 19794 - }, - { - "epoch": 10.323337679269883, - "grad_norm": 1.4911632537841797, - "learning_rate": 8.06140703517588e-05, - "loss": 5.3889, - "step": 19795 - }, - { - "epoch": 10.323859191655801, - "grad_norm": 1.4427682161331177, - "learning_rate": 8.061306532663318e-05, - "loss": 5.8106, - "step": 19796 - }, - { - "epoch": 10.32438070404172, - "grad_norm": 1.4021507501602173, - "learning_rate": 8.061206030150754e-05, - "loss": 5.5727, - "step": 19797 - }, - { - "epoch": 10.32490221642764, - "grad_norm": 1.3106871843338013, - "learning_rate": 8.06110552763819e-05, - "loss": 5.74, - "step": 19798 - }, - { - "epoch": 10.32542372881356, - "grad_norm": 1.3510991334915161, - "learning_rate": 8.061005025125628e-05, - "loss": 5.5231, - "step": 19799 - }, - { - "epoch": 10.325945241199479, - "grad_norm": 1.386620283126831, - "learning_rate": 8.060904522613065e-05, - "loss": 5.3057, - "step": 19800 - }, - { - "epoch": 10.326466753585398, - "grad_norm": 1.3749103546142578, - "learning_rate": 8.060804020100503e-05, - "loss": 5.7692, - "step": 19801 - }, - { - "epoch": 10.326988265971316, - "grad_norm": 1.348341941833496, - "learning_rate": 8.06070351758794e-05, - "loss": 5.8455, - "step": 19802 - }, - { - "epoch": 10.327509778357236, - "grad_norm": 1.565545678138733, - "learning_rate": 8.060603015075378e-05, - "loss": 5.2218, - "step": 19803 - }, - { - "epoch": 10.328031290743155, - "grad_norm": 1.565821647644043, - "learning_rate": 8.060502512562815e-05, - "loss": 5.4205, - "step": 19804 - }, - { - "epoch": 10.328552803129075, - "grad_norm": 1.4983456134796143, - "learning_rate": 8.060402010050252e-05, - "loss": 5.3835, - "step": 19805 - }, - { - "epoch": 10.329074315514994, - "grad_norm": 1.3550137281417847, - "learning_rate": 8.060301507537689e-05, - "loss": 5.4738, - "step": 19806 - }, - { - "epoch": 10.329595827900913, - "grad_norm": 1.4826171398162842, - "learning_rate": 8.060201005025127e-05, - "loss": 5.1315, - "step": 19807 - }, - { - "epoch": 10.330117340286831, - "grad_norm": 1.479589581489563, - "learning_rate": 8.060100502512563e-05, - "loss": 5.4416, - "step": 19808 - }, - { - "epoch": 10.33063885267275, - "grad_norm": 1.4084969758987427, - "learning_rate": 8.060000000000001e-05, - "loss": 5.3883, - "step": 19809 - }, - { - "epoch": 10.33116036505867, - "grad_norm": 1.3796697854995728, - "learning_rate": 8.059899497487437e-05, - "loss": 5.5951, - "step": 19810 - }, - { - "epoch": 10.33168187744459, - "grad_norm": 1.350969910621643, - "learning_rate": 8.059798994974874e-05, - "loss": 5.5594, - "step": 19811 - }, - { - "epoch": 10.332203389830509, - "grad_norm": 1.4563184976577759, - "learning_rate": 8.059698492462311e-05, - "loss": 5.5697, - "step": 19812 - }, - { - "epoch": 10.332724902216428, - "grad_norm": 1.5332075357437134, - "learning_rate": 8.059597989949749e-05, - "loss": 5.3495, - "step": 19813 - }, - { - "epoch": 10.333246414602346, - "grad_norm": 1.4552077054977417, - "learning_rate": 8.059497487437187e-05, - "loss": 5.3637, - "step": 19814 - }, - { - "epoch": 10.333767926988266, - "grad_norm": 1.5139317512512207, - "learning_rate": 8.059396984924623e-05, - "loss": 5.182, - "step": 19815 - }, - { - "epoch": 10.334289439374185, - "grad_norm": 1.5004678964614868, - "learning_rate": 8.059296482412061e-05, - "loss": 5.203, - "step": 19816 - }, - { - "epoch": 10.334810951760105, - "grad_norm": 1.357333779335022, - "learning_rate": 8.059195979899498e-05, - "loss": 5.6212, - "step": 19817 - }, - { - "epoch": 10.335332464146024, - "grad_norm": 1.552425742149353, - "learning_rate": 8.059095477386935e-05, - "loss": 5.3441, - "step": 19818 - }, - { - "epoch": 10.335853976531943, - "grad_norm": 1.4816426038742065, - "learning_rate": 8.058994974874372e-05, - "loss": 5.4108, - "step": 19819 - }, - { - "epoch": 10.336375488917861, - "grad_norm": 1.409996747970581, - "learning_rate": 8.05889447236181e-05, - "loss": 5.776, - "step": 19820 - }, - { - "epoch": 10.33689700130378, - "grad_norm": 1.372625708580017, - "learning_rate": 8.058793969849246e-05, - "loss": 5.5522, - "step": 19821 - }, - { - "epoch": 10.3374185136897, - "grad_norm": 1.458839774131775, - "learning_rate": 8.058693467336684e-05, - "loss": 5.2303, - "step": 19822 - }, - { - "epoch": 10.33794002607562, - "grad_norm": 1.305536150932312, - "learning_rate": 8.058592964824122e-05, - "loss": 5.5047, - "step": 19823 - }, - { - "epoch": 10.338461538461539, - "grad_norm": 1.4422553777694702, - "learning_rate": 8.058492462311558e-05, - "loss": 5.3836, - "step": 19824 - }, - { - "epoch": 10.338983050847457, - "grad_norm": 1.3653644323349, - "learning_rate": 8.058391959798996e-05, - "loss": 5.9654, - "step": 19825 - }, - { - "epoch": 10.339504563233376, - "grad_norm": 1.4360255002975464, - "learning_rate": 8.058291457286432e-05, - "loss": 5.1319, - "step": 19826 - }, - { - "epoch": 10.340026075619296, - "grad_norm": 1.8096814155578613, - "learning_rate": 8.05819095477387e-05, - "loss": 4.4558, - "step": 19827 - }, - { - "epoch": 10.340547588005215, - "grad_norm": 1.394923448562622, - "learning_rate": 8.058090452261306e-05, - "loss": 5.6642, - "step": 19828 - }, - { - "epoch": 10.341069100391135, - "grad_norm": 1.3629857301712036, - "learning_rate": 8.057989949748744e-05, - "loss": 5.2881, - "step": 19829 - }, - { - "epoch": 10.341590612777054, - "grad_norm": 1.4019643068313599, - "learning_rate": 8.05788944723618e-05, - "loss": 5.653, - "step": 19830 - }, - { - "epoch": 10.342112125162974, - "grad_norm": 1.414834976196289, - "learning_rate": 8.057788944723618e-05, - "loss": 5.3295, - "step": 19831 - }, - { - "epoch": 10.342633637548891, - "grad_norm": 1.4326661825180054, - "learning_rate": 8.057688442211055e-05, - "loss": 5.1506, - "step": 19832 - }, - { - "epoch": 10.34315514993481, - "grad_norm": 1.4020349979400635, - "learning_rate": 8.057587939698493e-05, - "loss": 5.7316, - "step": 19833 - }, - { - "epoch": 10.34367666232073, - "grad_norm": 1.2964818477630615, - "learning_rate": 8.05748743718593e-05, - "loss": 5.1177, - "step": 19834 - }, - { - "epoch": 10.34419817470665, - "grad_norm": 1.3350223302841187, - "learning_rate": 8.057386934673368e-05, - "loss": 5.6078, - "step": 19835 - }, - { - "epoch": 10.344719687092569, - "grad_norm": 1.181787133216858, - "learning_rate": 8.057286432160805e-05, - "loss": 5.2182, - "step": 19836 - }, - { - "epoch": 10.345241199478487, - "grad_norm": 1.3475735187530518, - "learning_rate": 8.057185929648241e-05, - "loss": 5.9375, - "step": 19837 - }, - { - "epoch": 10.345762711864406, - "grad_norm": 1.4052367210388184, - "learning_rate": 8.057085427135679e-05, - "loss": 5.336, - "step": 19838 - }, - { - "epoch": 10.346284224250326, - "grad_norm": 1.3503929376602173, - "learning_rate": 8.056984924623115e-05, - "loss": 5.8518, - "step": 19839 - }, - { - "epoch": 10.346805736636245, - "grad_norm": 1.4892840385437012, - "learning_rate": 8.056884422110553e-05, - "loss": 5.5372, - "step": 19840 - }, - { - "epoch": 10.347327249022165, - "grad_norm": 1.33498215675354, - "learning_rate": 8.05678391959799e-05, - "loss": 4.739, - "step": 19841 - }, - { - "epoch": 10.347848761408084, - "grad_norm": 1.4579837322235107, - "learning_rate": 8.056683417085427e-05, - "loss": 5.2181, - "step": 19842 - }, - { - "epoch": 10.348370273794002, - "grad_norm": 1.3395124673843384, - "learning_rate": 8.056582914572865e-05, - "loss": 5.6773, - "step": 19843 - }, - { - "epoch": 10.348891786179921, - "grad_norm": 1.3693982362747192, - "learning_rate": 8.056482412060303e-05, - "loss": 5.6594, - "step": 19844 - }, - { - "epoch": 10.34941329856584, - "grad_norm": 1.4906103610992432, - "learning_rate": 8.056381909547739e-05, - "loss": 4.9869, - "step": 19845 - }, - { - "epoch": 10.34993481095176, - "grad_norm": 1.3108893632888794, - "learning_rate": 8.056281407035177e-05, - "loss": 5.6334, - "step": 19846 - }, - { - "epoch": 10.35045632333768, - "grad_norm": 1.4764999151229858, - "learning_rate": 8.056180904522613e-05, - "loss": 5.6453, - "step": 19847 - }, - { - "epoch": 10.350977835723599, - "grad_norm": 1.5518531799316406, - "learning_rate": 8.056080402010051e-05, - "loss": 5.1401, - "step": 19848 - }, - { - "epoch": 10.351499348109517, - "grad_norm": 1.3917856216430664, - "learning_rate": 8.055979899497488e-05, - "loss": 5.0446, - "step": 19849 - }, - { - "epoch": 10.352020860495436, - "grad_norm": 1.3659136295318604, - "learning_rate": 8.055879396984925e-05, - "loss": 5.7786, - "step": 19850 - }, - { - "epoch": 10.352542372881356, - "grad_norm": 1.457262396812439, - "learning_rate": 8.055778894472362e-05, - "loss": 5.6548, - "step": 19851 - }, - { - "epoch": 10.353063885267275, - "grad_norm": 1.4075727462768555, - "learning_rate": 8.055678391959798e-05, - "loss": 5.816, - "step": 19852 - }, - { - "epoch": 10.353585397653195, - "grad_norm": 1.5356796979904175, - "learning_rate": 8.055577889447236e-05, - "loss": 5.1764, - "step": 19853 - }, - { - "epoch": 10.354106910039114, - "grad_norm": 1.461155891418457, - "learning_rate": 8.055477386934674e-05, - "loss": 5.6068, - "step": 19854 - }, - { - "epoch": 10.354628422425032, - "grad_norm": 1.5037068128585815, - "learning_rate": 8.055376884422112e-05, - "loss": 5.4061, - "step": 19855 - }, - { - "epoch": 10.355149934810951, - "grad_norm": 1.517964243888855, - "learning_rate": 8.055276381909548e-05, - "loss": 5.6929, - "step": 19856 - }, - { - "epoch": 10.35567144719687, - "grad_norm": 1.4980500936508179, - "learning_rate": 8.055175879396986e-05, - "loss": 5.2745, - "step": 19857 - }, - { - "epoch": 10.35619295958279, - "grad_norm": 1.4591717720031738, - "learning_rate": 8.055075376884422e-05, - "loss": 5.2639, - "step": 19858 - }, - { - "epoch": 10.35671447196871, - "grad_norm": 1.3534847497940063, - "learning_rate": 8.05497487437186e-05, - "loss": 5.6992, - "step": 19859 - }, - { - "epoch": 10.357235984354629, - "grad_norm": 1.4396982192993164, - "learning_rate": 8.054874371859297e-05, - "loss": 5.3898, - "step": 19860 - }, - { - "epoch": 10.357757496740547, - "grad_norm": 1.4613100290298462, - "learning_rate": 8.054773869346734e-05, - "loss": 5.6587, - "step": 19861 - }, - { - "epoch": 10.358279009126466, - "grad_norm": 1.4455432891845703, - "learning_rate": 8.054673366834171e-05, - "loss": 5.317, - "step": 19862 - }, - { - "epoch": 10.358800521512386, - "grad_norm": 1.44996976852417, - "learning_rate": 8.054572864321609e-05, - "loss": 5.5909, - "step": 19863 - }, - { - "epoch": 10.359322033898305, - "grad_norm": 1.4759631156921387, - "learning_rate": 8.054472361809046e-05, - "loss": 5.4615, - "step": 19864 - }, - { - "epoch": 10.359843546284225, - "grad_norm": 1.38669753074646, - "learning_rate": 8.054371859296483e-05, - "loss": 5.6315, - "step": 19865 - }, - { - "epoch": 10.360365058670144, - "grad_norm": 1.324921727180481, - "learning_rate": 8.05427135678392e-05, - "loss": 5.7339, - "step": 19866 - }, - { - "epoch": 10.360886571056062, - "grad_norm": 1.3872483968734741, - "learning_rate": 8.054170854271357e-05, - "loss": 5.4624, - "step": 19867 - }, - { - "epoch": 10.361408083441981, - "grad_norm": 1.4025781154632568, - "learning_rate": 8.054070351758795e-05, - "loss": 5.2241, - "step": 19868 - }, - { - "epoch": 10.3619295958279, - "grad_norm": 1.3966808319091797, - "learning_rate": 8.053969849246231e-05, - "loss": 5.8647, - "step": 19869 - }, - { - "epoch": 10.36245110821382, - "grad_norm": 1.3957785367965698, - "learning_rate": 8.053869346733669e-05, - "loss": 5.5616, - "step": 19870 - }, - { - "epoch": 10.36297262059974, - "grad_norm": 1.4674654006958008, - "learning_rate": 8.053768844221105e-05, - "loss": 5.5028, - "step": 19871 - }, - { - "epoch": 10.36349413298566, - "grad_norm": 1.3123106956481934, - "learning_rate": 8.053668341708543e-05, - "loss": 4.8583, - "step": 19872 - }, - { - "epoch": 10.364015645371577, - "grad_norm": 1.3349554538726807, - "learning_rate": 8.05356783919598e-05, - "loss": 5.5804, - "step": 19873 - }, - { - "epoch": 10.364537157757496, - "grad_norm": 1.4158401489257812, - "learning_rate": 8.053467336683417e-05, - "loss": 5.6288, - "step": 19874 - }, - { - "epoch": 10.365058670143416, - "grad_norm": 1.4898449182510376, - "learning_rate": 8.053366834170855e-05, - "loss": 5.5112, - "step": 19875 - }, - { - "epoch": 10.365580182529335, - "grad_norm": 1.4258404970169067, - "learning_rate": 8.053266331658293e-05, - "loss": 5.4408, - "step": 19876 - }, - { - "epoch": 10.366101694915255, - "grad_norm": 1.4582278728485107, - "learning_rate": 8.05316582914573e-05, - "loss": 5.6328, - "step": 19877 - }, - { - "epoch": 10.366623207301174, - "grad_norm": 1.5079933404922485, - "learning_rate": 8.053065326633166e-05, - "loss": 5.2247, - "step": 19878 - }, - { - "epoch": 10.367144719687092, - "grad_norm": 1.3985559940338135, - "learning_rate": 8.052964824120604e-05, - "loss": 5.364, - "step": 19879 - }, - { - "epoch": 10.367666232073011, - "grad_norm": 1.3819801807403564, - "learning_rate": 8.05286432160804e-05, - "loss": 5.4357, - "step": 19880 - }, - { - "epoch": 10.36818774445893, - "grad_norm": 1.5296952724456787, - "learning_rate": 8.052763819095478e-05, - "loss": 5.3386, - "step": 19881 - }, - { - "epoch": 10.36870925684485, - "grad_norm": 1.4211132526397705, - "learning_rate": 8.052663316582914e-05, - "loss": 5.4523, - "step": 19882 - }, - { - "epoch": 10.36923076923077, - "grad_norm": 1.3990495204925537, - "learning_rate": 8.052562814070352e-05, - "loss": 5.7629, - "step": 19883 - }, - { - "epoch": 10.36975228161669, - "grad_norm": 1.321939468383789, - "learning_rate": 8.05246231155779e-05, - "loss": 5.4636, - "step": 19884 - }, - { - "epoch": 10.370273794002607, - "grad_norm": 1.5131045579910278, - "learning_rate": 8.052361809045228e-05, - "loss": 5.6481, - "step": 19885 - }, - { - "epoch": 10.370795306388526, - "grad_norm": 1.4053682088851929, - "learning_rate": 8.052261306532664e-05, - "loss": 5.6659, - "step": 19886 - }, - { - "epoch": 10.371316818774446, - "grad_norm": 1.3573309183120728, - "learning_rate": 8.052160804020102e-05, - "loss": 5.775, - "step": 19887 - }, - { - "epoch": 10.371838331160365, - "grad_norm": 1.5085012912750244, - "learning_rate": 8.052060301507538e-05, - "loss": 5.328, - "step": 19888 - }, - { - "epoch": 10.372359843546285, - "grad_norm": 1.4049532413482666, - "learning_rate": 8.051959798994976e-05, - "loss": 5.433, - "step": 19889 - }, - { - "epoch": 10.372881355932204, - "grad_norm": 1.5856317281723022, - "learning_rate": 8.051859296482412e-05, - "loss": 5.2516, - "step": 19890 - }, - { - "epoch": 10.373402868318122, - "grad_norm": 1.3369239568710327, - "learning_rate": 8.051758793969849e-05, - "loss": 5.7125, - "step": 19891 - }, - { - "epoch": 10.373924380704041, - "grad_norm": 1.4399809837341309, - "learning_rate": 8.051658291457287e-05, - "loss": 5.509, - "step": 19892 - }, - { - "epoch": 10.37444589308996, - "grad_norm": 1.462722897529602, - "learning_rate": 8.051557788944723e-05, - "loss": 5.5633, - "step": 19893 - }, - { - "epoch": 10.37496740547588, - "grad_norm": 1.394222617149353, - "learning_rate": 8.051457286432161e-05, - "loss": 5.5082, - "step": 19894 - }, - { - "epoch": 10.3754889178618, - "grad_norm": 1.3280123472213745, - "learning_rate": 8.051356783919599e-05, - "loss": 5.6744, - "step": 19895 - }, - { - "epoch": 10.37601043024772, - "grad_norm": 1.4408824443817139, - "learning_rate": 8.051256281407036e-05, - "loss": 5.6258, - "step": 19896 - }, - { - "epoch": 10.376531942633637, - "grad_norm": 1.478295922279358, - "learning_rate": 8.051155778894473e-05, - "loss": 5.0166, - "step": 19897 - }, - { - "epoch": 10.377053455019556, - "grad_norm": 1.2992810010910034, - "learning_rate": 8.05105527638191e-05, - "loss": 5.9811, - "step": 19898 - }, - { - "epoch": 10.377574967405476, - "grad_norm": 1.4433223009109497, - "learning_rate": 8.050954773869347e-05, - "loss": 5.5634, - "step": 19899 - }, - { - "epoch": 10.378096479791395, - "grad_norm": 1.4470492601394653, - "learning_rate": 8.050854271356785e-05, - "loss": 5.4205, - "step": 19900 - }, - { - "epoch": 10.378617992177315, - "grad_norm": 1.5078661441802979, - "learning_rate": 8.050753768844221e-05, - "loss": 5.1658, - "step": 19901 - }, - { - "epoch": 10.379139504563234, - "grad_norm": 1.4682739973068237, - "learning_rate": 8.050653266331659e-05, - "loss": 5.8103, - "step": 19902 - }, - { - "epoch": 10.379661016949152, - "grad_norm": 1.437899112701416, - "learning_rate": 8.050552763819095e-05, - "loss": 5.5797, - "step": 19903 - }, - { - "epoch": 10.380182529335071, - "grad_norm": 1.4933797121047974, - "learning_rate": 8.050452261306532e-05, - "loss": 5.599, - "step": 19904 - }, - { - "epoch": 10.38070404172099, - "grad_norm": 1.548477292060852, - "learning_rate": 8.05035175879397e-05, - "loss": 5.4559, - "step": 19905 - }, - { - "epoch": 10.38122555410691, - "grad_norm": 1.4017431735992432, - "learning_rate": 8.050251256281407e-05, - "loss": 5.5069, - "step": 19906 - }, - { - "epoch": 10.38174706649283, - "grad_norm": 1.4197993278503418, - "learning_rate": 8.050150753768845e-05, - "loss": 5.5367, - "step": 19907 - }, - { - "epoch": 10.38226857887875, - "grad_norm": 1.3796544075012207, - "learning_rate": 8.050050251256282e-05, - "loss": 4.8312, - "step": 19908 - }, - { - "epoch": 10.382790091264667, - "grad_norm": 1.4183613061904907, - "learning_rate": 8.04994974874372e-05, - "loss": 5.6411, - "step": 19909 - }, - { - "epoch": 10.383311603650586, - "grad_norm": 1.5794012546539307, - "learning_rate": 8.049849246231156e-05, - "loss": 5.301, - "step": 19910 - }, - { - "epoch": 10.383833116036506, - "grad_norm": 1.3517440557479858, - "learning_rate": 8.049748743718594e-05, - "loss": 5.8297, - "step": 19911 - }, - { - "epoch": 10.384354628422425, - "grad_norm": 1.2583736181259155, - "learning_rate": 8.04964824120603e-05, - "loss": 6.0487, - "step": 19912 - }, - { - "epoch": 10.384876140808345, - "grad_norm": 1.450395107269287, - "learning_rate": 8.049547738693468e-05, - "loss": 5.3735, - "step": 19913 - }, - { - "epoch": 10.385397653194264, - "grad_norm": 1.29973304271698, - "learning_rate": 8.049447236180904e-05, - "loss": 5.641, - "step": 19914 - }, - { - "epoch": 10.385919165580182, - "grad_norm": 1.5069270133972168, - "learning_rate": 8.049346733668342e-05, - "loss": 5.2526, - "step": 19915 - }, - { - "epoch": 10.386440677966101, - "grad_norm": 1.3516182899475098, - "learning_rate": 8.04924623115578e-05, - "loss": 5.8404, - "step": 19916 - }, - { - "epoch": 10.38696219035202, - "grad_norm": 1.3427523374557495, - "learning_rate": 8.049145728643216e-05, - "loss": 5.8426, - "step": 19917 - }, - { - "epoch": 10.38748370273794, - "grad_norm": 1.4065072536468506, - "learning_rate": 8.049045226130654e-05, - "loss": 5.5529, - "step": 19918 - }, - { - "epoch": 10.38800521512386, - "grad_norm": 1.5368915796279907, - "learning_rate": 8.04894472361809e-05, - "loss": 5.0762, - "step": 19919 - }, - { - "epoch": 10.388526727509777, - "grad_norm": 1.504387378692627, - "learning_rate": 8.048844221105528e-05, - "loss": 5.273, - "step": 19920 - }, - { - "epoch": 10.389048239895697, - "grad_norm": 1.5388939380645752, - "learning_rate": 8.048743718592965e-05, - "loss": 5.2839, - "step": 19921 - }, - { - "epoch": 10.389569752281616, - "grad_norm": 1.4012963771820068, - "learning_rate": 8.048643216080402e-05, - "loss": 5.4168, - "step": 19922 - }, - { - "epoch": 10.390091264667536, - "grad_norm": 1.428266167640686, - "learning_rate": 8.048542713567839e-05, - "loss": 5.4528, - "step": 19923 - }, - { - "epoch": 10.390612777053455, - "grad_norm": 1.3810570240020752, - "learning_rate": 8.048442211055277e-05, - "loss": 5.6033, - "step": 19924 - }, - { - "epoch": 10.391134289439375, - "grad_norm": 1.554585337638855, - "learning_rate": 8.048341708542713e-05, - "loss": 5.5115, - "step": 19925 - }, - { - "epoch": 10.391655801825294, - "grad_norm": 1.4755282402038574, - "learning_rate": 8.048241206030151e-05, - "loss": 5.136, - "step": 19926 - }, - { - "epoch": 10.392177314211212, - "grad_norm": 1.4590977430343628, - "learning_rate": 8.048140703517589e-05, - "loss": 5.5444, - "step": 19927 - }, - { - "epoch": 10.392698826597131, - "grad_norm": 1.4041529893875122, - "learning_rate": 8.048040201005026e-05, - "loss": 5.6541, - "step": 19928 - }, - { - "epoch": 10.39322033898305, - "grad_norm": 1.38002347946167, - "learning_rate": 8.047939698492463e-05, - "loss": 5.7059, - "step": 19929 - }, - { - "epoch": 10.39374185136897, - "grad_norm": 1.372519612312317, - "learning_rate": 8.0478391959799e-05, - "loss": 5.8126, - "step": 19930 - }, - { - "epoch": 10.39426336375489, - "grad_norm": 1.43014657497406, - "learning_rate": 8.047738693467337e-05, - "loss": 5.6474, - "step": 19931 - }, - { - "epoch": 10.394784876140807, - "grad_norm": 1.477543830871582, - "learning_rate": 8.047638190954774e-05, - "loss": 5.3197, - "step": 19932 - }, - { - "epoch": 10.395306388526727, - "grad_norm": 1.488138198852539, - "learning_rate": 8.047537688442211e-05, - "loss": 5.0518, - "step": 19933 - }, - { - "epoch": 10.395827900912646, - "grad_norm": 1.35440993309021, - "learning_rate": 8.047437185929648e-05, - "loss": 5.6389, - "step": 19934 - }, - { - "epoch": 10.396349413298566, - "grad_norm": 1.3866780996322632, - "learning_rate": 8.047336683417085e-05, - "loss": 5.3888, - "step": 19935 - }, - { - "epoch": 10.396870925684485, - "grad_norm": 1.4034926891326904, - "learning_rate": 8.047236180904523e-05, - "loss": 5.6027, - "step": 19936 - }, - { - "epoch": 10.397392438070405, - "grad_norm": 1.374549388885498, - "learning_rate": 8.047135678391961e-05, - "loss": 5.6272, - "step": 19937 - }, - { - "epoch": 10.397913950456322, - "grad_norm": 1.4031211137771606, - "learning_rate": 8.047035175879397e-05, - "loss": 5.3778, - "step": 19938 - }, - { - "epoch": 10.398435462842242, - "grad_norm": 2.1654834747314453, - "learning_rate": 8.046934673366835e-05, - "loss": 5.0983, - "step": 19939 - }, - { - "epoch": 10.398956975228161, - "grad_norm": 1.4986932277679443, - "learning_rate": 8.046834170854272e-05, - "loss": 5.4858, - "step": 19940 - }, - { - "epoch": 10.399478487614081, - "grad_norm": 1.480557918548584, - "learning_rate": 8.04673366834171e-05, - "loss": 5.5685, - "step": 19941 - }, - { - "epoch": 10.4, - "grad_norm": 1.4317172765731812, - "learning_rate": 8.046633165829146e-05, - "loss": 5.2303, - "step": 19942 - }, - { - "epoch": 10.40052151238592, - "grad_norm": 1.5317397117614746, - "learning_rate": 8.046532663316584e-05, - "loss": 4.61, - "step": 19943 - }, - { - "epoch": 10.401043024771838, - "grad_norm": 1.4820213317871094, - "learning_rate": 8.04643216080402e-05, - "loss": 5.1816, - "step": 19944 - }, - { - "epoch": 10.401564537157757, - "grad_norm": 1.555635929107666, - "learning_rate": 8.046331658291457e-05, - "loss": 5.3381, - "step": 19945 - }, - { - "epoch": 10.402086049543676, - "grad_norm": 1.419935941696167, - "learning_rate": 8.046231155778894e-05, - "loss": 5.802, - "step": 19946 - }, - { - "epoch": 10.402607561929596, - "grad_norm": 1.4380462169647217, - "learning_rate": 8.046130653266332e-05, - "loss": 5.3628, - "step": 19947 - }, - { - "epoch": 10.403129074315515, - "grad_norm": 1.5624200105667114, - "learning_rate": 8.04603015075377e-05, - "loss": 5.2221, - "step": 19948 - }, - { - "epoch": 10.403650586701435, - "grad_norm": 1.4189268350601196, - "learning_rate": 8.045929648241206e-05, - "loss": 4.976, - "step": 19949 - }, - { - "epoch": 10.404172099087353, - "grad_norm": 1.3355695009231567, - "learning_rate": 8.045829145728644e-05, - "loss": 5.7669, - "step": 19950 - }, - { - "epoch": 10.404693611473272, - "grad_norm": 1.316667079925537, - "learning_rate": 8.04572864321608e-05, - "loss": 5.7616, - "step": 19951 - }, - { - "epoch": 10.405215123859191, - "grad_norm": 1.545291543006897, - "learning_rate": 8.045628140703518e-05, - "loss": 5.3583, - "step": 19952 - }, - { - "epoch": 10.405736636245111, - "grad_norm": 1.4335500001907349, - "learning_rate": 8.045527638190955e-05, - "loss": 5.5262, - "step": 19953 - }, - { - "epoch": 10.40625814863103, - "grad_norm": 1.4660751819610596, - "learning_rate": 8.045427135678393e-05, - "loss": 5.4173, - "step": 19954 - }, - { - "epoch": 10.40677966101695, - "grad_norm": 1.6030346155166626, - "learning_rate": 8.045326633165829e-05, - "loss": 4.7999, - "step": 19955 - }, - { - "epoch": 10.407301173402868, - "grad_norm": 1.493813157081604, - "learning_rate": 8.045226130653267e-05, - "loss": 5.0958, - "step": 19956 - }, - { - "epoch": 10.407822685788787, - "grad_norm": 1.4698222875595093, - "learning_rate": 8.045125628140705e-05, - "loss": 5.4604, - "step": 19957 - }, - { - "epoch": 10.408344198174706, - "grad_norm": 1.450211763381958, - "learning_rate": 8.045025125628141e-05, - "loss": 5.4088, - "step": 19958 - }, - { - "epoch": 10.408865710560626, - "grad_norm": 1.4533228874206543, - "learning_rate": 8.044924623115579e-05, - "loss": 5.2014, - "step": 19959 - }, - { - "epoch": 10.409387222946545, - "grad_norm": 1.3268141746520996, - "learning_rate": 8.044824120603015e-05, - "loss": 5.735, - "step": 19960 - }, - { - "epoch": 10.409908735332465, - "grad_norm": 1.3400477170944214, - "learning_rate": 8.044723618090453e-05, - "loss": 5.6407, - "step": 19961 - }, - { - "epoch": 10.410430247718383, - "grad_norm": 1.3470672369003296, - "learning_rate": 8.04462311557789e-05, - "loss": 5.8412, - "step": 19962 - }, - { - "epoch": 10.410951760104302, - "grad_norm": 1.3763611316680908, - "learning_rate": 8.044522613065327e-05, - "loss": 5.3828, - "step": 19963 - }, - { - "epoch": 10.411473272490221, - "grad_norm": 1.4590768814086914, - "learning_rate": 8.044422110552764e-05, - "loss": 5.4262, - "step": 19964 - }, - { - "epoch": 10.411994784876141, - "grad_norm": 1.4496899843215942, - "learning_rate": 8.044321608040201e-05, - "loss": 5.334, - "step": 19965 - }, - { - "epoch": 10.41251629726206, - "grad_norm": 1.3286716938018799, - "learning_rate": 8.044221105527638e-05, - "loss": 5.3623, - "step": 19966 - }, - { - "epoch": 10.41303780964798, - "grad_norm": 1.3421027660369873, - "learning_rate": 8.044120603015076e-05, - "loss": 5.7623, - "step": 19967 - }, - { - "epoch": 10.413559322033898, - "grad_norm": 1.4689563512802124, - "learning_rate": 8.044020100502513e-05, - "loss": 5.6136, - "step": 19968 - }, - { - "epoch": 10.414080834419817, - "grad_norm": 1.4162120819091797, - "learning_rate": 8.043919597989951e-05, - "loss": 5.5996, - "step": 19969 - }, - { - "epoch": 10.414602346805736, - "grad_norm": 1.5002307891845703, - "learning_rate": 8.043819095477388e-05, - "loss": 5.2115, - "step": 19970 - }, - { - "epoch": 10.415123859191656, - "grad_norm": 1.377344012260437, - "learning_rate": 8.043718592964824e-05, - "loss": 5.8095, - "step": 19971 - }, - { - "epoch": 10.415645371577575, - "grad_norm": 1.451331615447998, - "learning_rate": 8.043618090452262e-05, - "loss": 5.5103, - "step": 19972 - }, - { - "epoch": 10.416166883963495, - "grad_norm": 1.4327746629714966, - "learning_rate": 8.043517587939698e-05, - "loss": 5.7684, - "step": 19973 - }, - { - "epoch": 10.416688396349413, - "grad_norm": 1.4448026418685913, - "learning_rate": 8.043417085427136e-05, - "loss": 5.6955, - "step": 19974 - }, - { - "epoch": 10.417209908735332, - "grad_norm": 1.39994215965271, - "learning_rate": 8.043316582914572e-05, - "loss": 5.33, - "step": 19975 - }, - { - "epoch": 10.417731421121252, - "grad_norm": 1.6093553304672241, - "learning_rate": 8.04321608040201e-05, - "loss": 5.2564, - "step": 19976 - }, - { - "epoch": 10.418252933507171, - "grad_norm": 1.683631420135498, - "learning_rate": 8.043115577889448e-05, - "loss": 5.3948, - "step": 19977 - }, - { - "epoch": 10.41877444589309, - "grad_norm": 1.3730961084365845, - "learning_rate": 8.043015075376886e-05, - "loss": 5.709, - "step": 19978 - }, - { - "epoch": 10.41929595827901, - "grad_norm": 1.523729681968689, - "learning_rate": 8.042914572864322e-05, - "loss": 5.4784, - "step": 19979 - }, - { - "epoch": 10.419817470664928, - "grad_norm": 1.4242973327636719, - "learning_rate": 8.04281407035176e-05, - "loss": 5.2078, - "step": 19980 - }, - { - "epoch": 10.420338983050847, - "grad_norm": 1.529475450515747, - "learning_rate": 8.042713567839196e-05, - "loss": 5.0303, - "step": 19981 - }, - { - "epoch": 10.420860495436767, - "grad_norm": 1.6017335653305054, - "learning_rate": 8.042613065326634e-05, - "loss": 4.8492, - "step": 19982 - }, - { - "epoch": 10.421382007822686, - "grad_norm": 1.5167200565338135, - "learning_rate": 8.04251256281407e-05, - "loss": 4.7967, - "step": 19983 - }, - { - "epoch": 10.421903520208605, - "grad_norm": 1.3874661922454834, - "learning_rate": 8.042412060301507e-05, - "loss": 5.4967, - "step": 19984 - }, - { - "epoch": 10.422425032594525, - "grad_norm": 1.3371829986572266, - "learning_rate": 8.042311557788945e-05, - "loss": 5.6029, - "step": 19985 - }, - { - "epoch": 10.422946544980443, - "grad_norm": 1.4296631813049316, - "learning_rate": 8.042211055276381e-05, - "loss": 5.321, - "step": 19986 - }, - { - "epoch": 10.423468057366362, - "grad_norm": 1.345910668373108, - "learning_rate": 8.042110552763819e-05, - "loss": 5.575, - "step": 19987 - }, - { - "epoch": 10.423989569752282, - "grad_norm": 1.4190165996551514, - "learning_rate": 8.042010050251257e-05, - "loss": 4.9707, - "step": 19988 - }, - { - "epoch": 10.424511082138201, - "grad_norm": 1.3458001613616943, - "learning_rate": 8.041909547738695e-05, - "loss": 4.5988, - "step": 19989 - }, - { - "epoch": 10.42503259452412, - "grad_norm": 1.4076688289642334, - "learning_rate": 8.041809045226131e-05, - "loss": 5.7325, - "step": 19990 - }, - { - "epoch": 10.42555410691004, - "grad_norm": 1.3628008365631104, - "learning_rate": 8.041708542713569e-05, - "loss": 5.5956, - "step": 19991 - }, - { - "epoch": 10.426075619295958, - "grad_norm": 1.5019603967666626, - "learning_rate": 8.041608040201005e-05, - "loss": 4.9267, - "step": 19992 - }, - { - "epoch": 10.426597131681877, - "grad_norm": 1.5173205137252808, - "learning_rate": 8.041507537688443e-05, - "loss": 5.5556, - "step": 19993 - }, - { - "epoch": 10.427118644067797, - "grad_norm": 1.376816987991333, - "learning_rate": 8.04140703517588e-05, - "loss": 5.8866, - "step": 19994 - }, - { - "epoch": 10.427640156453716, - "grad_norm": 1.3881354331970215, - "learning_rate": 8.041306532663317e-05, - "loss": 5.5236, - "step": 19995 - }, - { - "epoch": 10.428161668839635, - "grad_norm": 1.3512701988220215, - "learning_rate": 8.041206030150754e-05, - "loss": 5.603, - "step": 19996 - }, - { - "epoch": 10.428683181225555, - "grad_norm": 1.4895601272583008, - "learning_rate": 8.041105527638191e-05, - "loss": 5.7758, - "step": 19997 - }, - { - "epoch": 10.429204693611473, - "grad_norm": 1.5127331018447876, - "learning_rate": 8.041005025125629e-05, - "loss": 5.6008, - "step": 19998 - }, - { - "epoch": 10.429726205997392, - "grad_norm": 1.3891011476516724, - "learning_rate": 8.040904522613066e-05, - "loss": 5.5819, - "step": 19999 - }, - { - "epoch": 10.430247718383312, - "grad_norm": 1.3282994031906128, - "learning_rate": 8.040804020100503e-05, - "loss": 5.8975, - "step": 20000 - }, - { - "epoch": 10.430247718383312, - "eval_loss": 5.543665885925293, - "eval_runtime": 42.6755, - "eval_samples_per_second": 28.728, - "eval_steps_per_second": 3.609, - "step": 20000 - }, - { - "epoch": 10.430769230769231, - "grad_norm": 1.529467225074768, - "learning_rate": 8.04070351758794e-05, - "loss": 5.1916, - "step": 20001 - }, - { - "epoch": 10.43129074315515, - "grad_norm": 1.4803346395492554, - "learning_rate": 8.040603015075378e-05, - "loss": 5.8947, - "step": 20002 - }, - { - "epoch": 10.43181225554107, - "grad_norm": 1.5069351196289062, - "learning_rate": 8.040502512562814e-05, - "loss": 5.4197, - "step": 20003 - }, - { - "epoch": 10.432333767926988, - "grad_norm": 1.2735505104064941, - "learning_rate": 8.040402010050252e-05, - "loss": 4.9513, - "step": 20004 - }, - { - "epoch": 10.432855280312907, - "grad_norm": 1.4791656732559204, - "learning_rate": 8.040301507537688e-05, - "loss": 5.4412, - "step": 20005 - }, - { - "epoch": 10.433376792698827, - "grad_norm": 1.4405359029769897, - "learning_rate": 8.040201005025126e-05, - "loss": 5.3529, - "step": 20006 - }, - { - "epoch": 10.433898305084746, - "grad_norm": 1.4260791540145874, - "learning_rate": 8.040100502512562e-05, - "loss": 5.6774, - "step": 20007 - }, - { - "epoch": 10.434419817470665, - "grad_norm": 1.3430771827697754, - "learning_rate": 8.04e-05, - "loss": 5.3872, - "step": 20008 - }, - { - "epoch": 10.434941329856585, - "grad_norm": 1.5009551048278809, - "learning_rate": 8.039899497487438e-05, - "loss": 5.656, - "step": 20009 - }, - { - "epoch": 10.435462842242503, - "grad_norm": 1.4954805374145508, - "learning_rate": 8.039798994974874e-05, - "loss": 5.5423, - "step": 20010 - }, - { - "epoch": 10.435984354628422, - "grad_norm": 1.4897936582565308, - "learning_rate": 8.039698492462312e-05, - "loss": 4.7234, - "step": 20011 - }, - { - "epoch": 10.436505867014342, - "grad_norm": 1.4408941268920898, - "learning_rate": 8.039597989949749e-05, - "loss": 5.6228, - "step": 20012 - }, - { - "epoch": 10.437027379400261, - "grad_norm": 1.435788631439209, - "learning_rate": 8.039497487437186e-05, - "loss": 4.7287, - "step": 20013 - }, - { - "epoch": 10.43754889178618, - "grad_norm": 1.4675769805908203, - "learning_rate": 8.039396984924623e-05, - "loss": 5.1364, - "step": 20014 - }, - { - "epoch": 10.438070404172098, - "grad_norm": 1.4676421880722046, - "learning_rate": 8.039296482412061e-05, - "loss": 5.652, - "step": 20015 - }, - { - "epoch": 10.438591916558018, - "grad_norm": 1.4743765592575073, - "learning_rate": 8.039195979899497e-05, - "loss": 5.5859, - "step": 20016 - }, - { - "epoch": 10.439113428943937, - "grad_norm": 1.6436294317245483, - "learning_rate": 8.039095477386935e-05, - "loss": 5.5575, - "step": 20017 - }, - { - "epoch": 10.439634941329857, - "grad_norm": 1.536979079246521, - "learning_rate": 8.038994974874373e-05, - "loss": 5.1156, - "step": 20018 - }, - { - "epoch": 10.440156453715776, - "grad_norm": 1.4624965190887451, - "learning_rate": 8.03889447236181e-05, - "loss": 5.3718, - "step": 20019 - }, - { - "epoch": 10.440677966101696, - "grad_norm": 1.5442885160446167, - "learning_rate": 8.038793969849247e-05, - "loss": 5.5724, - "step": 20020 - }, - { - "epoch": 10.441199478487615, - "grad_norm": 1.4140805006027222, - "learning_rate": 8.038693467336685e-05, - "loss": 5.3503, - "step": 20021 - }, - { - "epoch": 10.441720990873533, - "grad_norm": 1.4362179040908813, - "learning_rate": 8.038592964824121e-05, - "loss": 5.5993, - "step": 20022 - }, - { - "epoch": 10.442242503259452, - "grad_norm": 1.4394136667251587, - "learning_rate": 8.038492462311559e-05, - "loss": 5.1675, - "step": 20023 - }, - { - "epoch": 10.442764015645372, - "grad_norm": 1.3118693828582764, - "learning_rate": 8.038391959798995e-05, - "loss": 5.3249, - "step": 20024 - }, - { - "epoch": 10.443285528031291, - "grad_norm": 1.4695910215377808, - "learning_rate": 8.038291457286432e-05, - "loss": 5.2091, - "step": 20025 - }, - { - "epoch": 10.44380704041721, - "grad_norm": 1.607361078262329, - "learning_rate": 8.03819095477387e-05, - "loss": 4.823, - "step": 20026 - }, - { - "epoch": 10.444328552803128, - "grad_norm": 1.382102608680725, - "learning_rate": 8.038090452261306e-05, - "loss": 5.5601, - "step": 20027 - }, - { - "epoch": 10.444850065189048, - "grad_norm": 1.4521409273147583, - "learning_rate": 8.037989949748744e-05, - "loss": 5.7681, - "step": 20028 - }, - { - "epoch": 10.445371577574967, - "grad_norm": 1.4423408508300781, - "learning_rate": 8.037889447236182e-05, - "loss": 5.6067, - "step": 20029 - }, - { - "epoch": 10.445893089960887, - "grad_norm": 1.4133226871490479, - "learning_rate": 8.037788944723619e-05, - "loss": 5.8418, - "step": 20030 - }, - { - "epoch": 10.446414602346806, - "grad_norm": 1.4076740741729736, - "learning_rate": 8.037688442211056e-05, - "loss": 5.4703, - "step": 20031 - }, - { - "epoch": 10.446936114732726, - "grad_norm": 1.3780018091201782, - "learning_rate": 8.037587939698494e-05, - "loss": 5.4979, - "step": 20032 - }, - { - "epoch": 10.447457627118643, - "grad_norm": 1.4381235837936401, - "learning_rate": 8.03748743718593e-05, - "loss": 5.9097, - "step": 20033 - }, - { - "epoch": 10.447979139504563, - "grad_norm": 1.3625723123550415, - "learning_rate": 8.037386934673368e-05, - "loss": 5.2353, - "step": 20034 - }, - { - "epoch": 10.448500651890482, - "grad_norm": 1.3680737018585205, - "learning_rate": 8.037286432160804e-05, - "loss": 5.9323, - "step": 20035 - }, - { - "epoch": 10.449022164276402, - "grad_norm": 1.349200963973999, - "learning_rate": 8.037185929648242e-05, - "loss": 5.5433, - "step": 20036 - }, - { - "epoch": 10.449543676662321, - "grad_norm": 1.517087697982788, - "learning_rate": 8.037085427135678e-05, - "loss": 5.1883, - "step": 20037 - }, - { - "epoch": 10.45006518904824, - "grad_norm": 1.662245750427246, - "learning_rate": 8.036984924623116e-05, - "loss": 5.1363, - "step": 20038 - }, - { - "epoch": 10.450586701434158, - "grad_norm": 1.4189982414245605, - "learning_rate": 8.036884422110554e-05, - "loss": 5.1179, - "step": 20039 - }, - { - "epoch": 10.451108213820078, - "grad_norm": 1.4751296043395996, - "learning_rate": 8.03678391959799e-05, - "loss": 5.5195, - "step": 20040 - }, - { - "epoch": 10.451629726205997, - "grad_norm": 1.4925450086593628, - "learning_rate": 8.036683417085428e-05, - "loss": 5.5955, - "step": 20041 - }, - { - "epoch": 10.452151238591917, - "grad_norm": 1.6336311101913452, - "learning_rate": 8.036582914572865e-05, - "loss": 5.1711, - "step": 20042 - }, - { - "epoch": 10.452672750977836, - "grad_norm": 1.4046108722686768, - "learning_rate": 8.036482412060302e-05, - "loss": 5.0925, - "step": 20043 - }, - { - "epoch": 10.453194263363756, - "grad_norm": 1.3398125171661377, - "learning_rate": 8.036381909547739e-05, - "loss": 5.7482, - "step": 20044 - }, - { - "epoch": 10.453715775749673, - "grad_norm": 1.4135229587554932, - "learning_rate": 8.036281407035177e-05, - "loss": 5.4997, - "step": 20045 - }, - { - "epoch": 10.454237288135593, - "grad_norm": 1.3738007545471191, - "learning_rate": 8.036180904522613e-05, - "loss": 5.6229, - "step": 20046 - }, - { - "epoch": 10.454758800521512, - "grad_norm": 1.3398128747940063, - "learning_rate": 8.036080402010051e-05, - "loss": 5.8303, - "step": 20047 - }, - { - "epoch": 10.455280312907432, - "grad_norm": 1.3650574684143066, - "learning_rate": 8.035979899497487e-05, - "loss": 5.6673, - "step": 20048 - }, - { - "epoch": 10.455801825293351, - "grad_norm": 1.3992582559585571, - "learning_rate": 8.035879396984925e-05, - "loss": 5.7969, - "step": 20049 - }, - { - "epoch": 10.45632333767927, - "grad_norm": 1.5242825746536255, - "learning_rate": 8.035778894472363e-05, - "loss": 5.5548, - "step": 20050 - }, - { - "epoch": 10.456844850065188, - "grad_norm": 1.4391740560531616, - "learning_rate": 8.035678391959799e-05, - "loss": 5.346, - "step": 20051 - }, - { - "epoch": 10.457366362451108, - "grad_norm": 1.3462814092636108, - "learning_rate": 8.035577889447237e-05, - "loss": 5.3232, - "step": 20052 - }, - { - "epoch": 10.457887874837027, - "grad_norm": 1.4395314455032349, - "learning_rate": 8.035477386934673e-05, - "loss": 5.6247, - "step": 20053 - }, - { - "epoch": 10.458409387222947, - "grad_norm": 1.5495858192443848, - "learning_rate": 8.035376884422111e-05, - "loss": 5.2577, - "step": 20054 - }, - { - "epoch": 10.458930899608866, - "grad_norm": 1.5547749996185303, - "learning_rate": 8.035276381909548e-05, - "loss": 5.3037, - "step": 20055 - }, - { - "epoch": 10.459452411994786, - "grad_norm": 1.4934298992156982, - "learning_rate": 8.035175879396985e-05, - "loss": 5.4626, - "step": 20056 - }, - { - "epoch": 10.459973924380703, - "grad_norm": 1.421324372291565, - "learning_rate": 8.035075376884422e-05, - "loss": 5.7333, - "step": 20057 - }, - { - "epoch": 10.460495436766623, - "grad_norm": 1.4620543718338013, - "learning_rate": 8.03497487437186e-05, - "loss": 5.8741, - "step": 20058 - }, - { - "epoch": 10.461016949152542, - "grad_norm": 1.35321044921875, - "learning_rate": 8.034874371859297e-05, - "loss": 5.4649, - "step": 20059 - }, - { - "epoch": 10.461538461538462, - "grad_norm": 1.3660801649093628, - "learning_rate": 8.034773869346735e-05, - "loss": 5.7613, - "step": 20060 - }, - { - "epoch": 10.462059973924381, - "grad_norm": 1.3329601287841797, - "learning_rate": 8.034673366834172e-05, - "loss": 5.7614, - "step": 20061 - }, - { - "epoch": 10.4625814863103, - "grad_norm": 1.40496826171875, - "learning_rate": 8.03457286432161e-05, - "loss": 5.0708, - "step": 20062 - }, - { - "epoch": 10.463102998696218, - "grad_norm": 1.4888346195220947, - "learning_rate": 8.034472361809046e-05, - "loss": 5.2961, - "step": 20063 - }, - { - "epoch": 10.463624511082138, - "grad_norm": 1.4310290813446045, - "learning_rate": 8.034371859296482e-05, - "loss": 5.5376, - "step": 20064 - }, - { - "epoch": 10.464146023468057, - "grad_norm": 1.488019585609436, - "learning_rate": 8.03427135678392e-05, - "loss": 5.4115, - "step": 20065 - }, - { - "epoch": 10.464667535853977, - "grad_norm": 1.447433590888977, - "learning_rate": 8.034170854271356e-05, - "loss": 5.5074, - "step": 20066 - }, - { - "epoch": 10.465189048239896, - "grad_norm": 1.3938887119293213, - "learning_rate": 8.034070351758794e-05, - "loss": 5.3147, - "step": 20067 - }, - { - "epoch": 10.465710560625816, - "grad_norm": 1.459694504737854, - "learning_rate": 8.03396984924623e-05, - "loss": 4.9825, - "step": 20068 - }, - { - "epoch": 10.466232073011733, - "grad_norm": 1.3934952020645142, - "learning_rate": 8.033869346733668e-05, - "loss": 5.8073, - "step": 20069 - }, - { - "epoch": 10.466753585397653, - "grad_norm": 1.4346258640289307, - "learning_rate": 8.033768844221106e-05, - "loss": 4.9881, - "step": 20070 - }, - { - "epoch": 10.467275097783572, - "grad_norm": 1.7041927576065063, - "learning_rate": 8.033668341708544e-05, - "loss": 4.8441, - "step": 20071 - }, - { - "epoch": 10.467796610169492, - "grad_norm": 1.3612425327301025, - "learning_rate": 8.03356783919598e-05, - "loss": 5.5077, - "step": 20072 - }, - { - "epoch": 10.468318122555411, - "grad_norm": 1.420318603515625, - "learning_rate": 8.033467336683418e-05, - "loss": 5.7527, - "step": 20073 - }, - { - "epoch": 10.46883963494133, - "grad_norm": 1.5034620761871338, - "learning_rate": 8.033366834170855e-05, - "loss": 5.6687, - "step": 20074 - }, - { - "epoch": 10.469361147327248, - "grad_norm": 1.3948981761932373, - "learning_rate": 8.033266331658292e-05, - "loss": 5.4495, - "step": 20075 - }, - { - "epoch": 10.469882659713168, - "grad_norm": 1.4296536445617676, - "learning_rate": 8.033165829145729e-05, - "loss": 5.3257, - "step": 20076 - }, - { - "epoch": 10.470404172099087, - "grad_norm": 1.371720552444458, - "learning_rate": 8.033065326633165e-05, - "loss": 5.5087, - "step": 20077 - }, - { - "epoch": 10.470925684485007, - "grad_norm": 1.3871865272521973, - "learning_rate": 8.032964824120603e-05, - "loss": 5.8193, - "step": 20078 - }, - { - "epoch": 10.471447196870926, - "grad_norm": 1.447293758392334, - "learning_rate": 8.03286432160804e-05, - "loss": 5.1732, - "step": 20079 - }, - { - "epoch": 10.471968709256846, - "grad_norm": 1.4432779550552368, - "learning_rate": 8.032763819095477e-05, - "loss": 5.0305, - "step": 20080 - }, - { - "epoch": 10.472490221642763, - "grad_norm": 1.597410798072815, - "learning_rate": 8.032663316582915e-05, - "loss": 5.6692, - "step": 20081 - }, - { - "epoch": 10.473011734028683, - "grad_norm": 1.4556232690811157, - "learning_rate": 8.032562814070353e-05, - "loss": 5.549, - "step": 20082 - }, - { - "epoch": 10.473533246414602, - "grad_norm": 1.5208501815795898, - "learning_rate": 8.032462311557789e-05, - "loss": 5.3184, - "step": 20083 - }, - { - "epoch": 10.474054758800522, - "grad_norm": 1.4683654308319092, - "learning_rate": 8.032361809045227e-05, - "loss": 5.207, - "step": 20084 - }, - { - "epoch": 10.474576271186441, - "grad_norm": 1.6169641017913818, - "learning_rate": 8.032261306532663e-05, - "loss": 5.5924, - "step": 20085 - }, - { - "epoch": 10.47509778357236, - "grad_norm": 1.363630771636963, - "learning_rate": 8.032160804020101e-05, - "loss": 5.502, - "step": 20086 - }, - { - "epoch": 10.475619295958278, - "grad_norm": 1.439030408859253, - "learning_rate": 8.032060301507538e-05, - "loss": 5.4995, - "step": 20087 - }, - { - "epoch": 10.476140808344198, - "grad_norm": 1.3769181966781616, - "learning_rate": 8.031959798994975e-05, - "loss": 5.8242, - "step": 20088 - }, - { - "epoch": 10.476662320730117, - "grad_norm": 1.316177248954773, - "learning_rate": 8.031859296482412e-05, - "loss": 5.7361, - "step": 20089 - }, - { - "epoch": 10.477183833116037, - "grad_norm": 1.4474636316299438, - "learning_rate": 8.03175879396985e-05, - "loss": 5.3925, - "step": 20090 - }, - { - "epoch": 10.477705345501956, - "grad_norm": 1.381136178970337, - "learning_rate": 8.031658291457287e-05, - "loss": 5.4628, - "step": 20091 - }, - { - "epoch": 10.478226857887876, - "grad_norm": 1.589564323425293, - "learning_rate": 8.031557788944724e-05, - "loss": 5.0696, - "step": 20092 - }, - { - "epoch": 10.478748370273793, - "grad_norm": 1.5227071046829224, - "learning_rate": 8.031457286432162e-05, - "loss": 5.511, - "step": 20093 - }, - { - "epoch": 10.479269882659713, - "grad_norm": 1.4458715915679932, - "learning_rate": 8.031356783919598e-05, - "loss": 4.9456, - "step": 20094 - }, - { - "epoch": 10.479791395045632, - "grad_norm": 1.332492470741272, - "learning_rate": 8.031256281407036e-05, - "loss": 5.4605, - "step": 20095 - }, - { - "epoch": 10.480312907431552, - "grad_norm": 1.4579753875732422, - "learning_rate": 8.031155778894472e-05, - "loss": 5.3445, - "step": 20096 - }, - { - "epoch": 10.480834419817471, - "grad_norm": 1.5495436191558838, - "learning_rate": 8.03105527638191e-05, - "loss": 4.9402, - "step": 20097 - }, - { - "epoch": 10.48135593220339, - "grad_norm": 1.3392939567565918, - "learning_rate": 8.030954773869347e-05, - "loss": 5.7875, - "step": 20098 - }, - { - "epoch": 10.481877444589308, - "grad_norm": 1.437435269355774, - "learning_rate": 8.030854271356784e-05, - "loss": 5.5514, - "step": 20099 - }, - { - "epoch": 10.482398956975228, - "grad_norm": 1.5421310663223267, - "learning_rate": 8.030753768844221e-05, - "loss": 5.3931, - "step": 20100 - }, - { - "epoch": 10.482920469361147, - "grad_norm": 1.4569154977798462, - "learning_rate": 8.030653266331659e-05, - "loss": 5.7439, - "step": 20101 - }, - { - "epoch": 10.483441981747067, - "grad_norm": 1.448169231414795, - "learning_rate": 8.030552763819096e-05, - "loss": 5.4705, - "step": 20102 - }, - { - "epoch": 10.483963494132986, - "grad_norm": 1.540806531906128, - "learning_rate": 8.030452261306533e-05, - "loss": 5.2795, - "step": 20103 - }, - { - "epoch": 10.484485006518906, - "grad_norm": 1.6125191450119019, - "learning_rate": 8.03035175879397e-05, - "loss": 5.1977, - "step": 20104 - }, - { - "epoch": 10.485006518904823, - "grad_norm": 1.4899132251739502, - "learning_rate": 8.030251256281407e-05, - "loss": 5.5326, - "step": 20105 - }, - { - "epoch": 10.485528031290743, - "grad_norm": 1.6312202215194702, - "learning_rate": 8.030150753768845e-05, - "loss": 5.033, - "step": 20106 - }, - { - "epoch": 10.486049543676662, - "grad_norm": 1.4905378818511963, - "learning_rate": 8.030050251256281e-05, - "loss": 5.4171, - "step": 20107 - }, - { - "epoch": 10.486571056062582, - "grad_norm": 1.5115697383880615, - "learning_rate": 8.029949748743719e-05, - "loss": 4.9864, - "step": 20108 - }, - { - "epoch": 10.487092568448501, - "grad_norm": 1.4244314432144165, - "learning_rate": 8.029849246231155e-05, - "loss": 5.1203, - "step": 20109 - }, - { - "epoch": 10.487614080834419, - "grad_norm": 1.4433428049087524, - "learning_rate": 8.029748743718593e-05, - "loss": 4.7104, - "step": 20110 - }, - { - "epoch": 10.488135593220338, - "grad_norm": 1.5289520025253296, - "learning_rate": 8.029648241206031e-05, - "loss": 5.3702, - "step": 20111 - }, - { - "epoch": 10.488657105606258, - "grad_norm": 1.4814295768737793, - "learning_rate": 8.029547738693469e-05, - "loss": 5.4001, - "step": 20112 - }, - { - "epoch": 10.489178617992177, - "grad_norm": 1.4356199502944946, - "learning_rate": 8.029447236180905e-05, - "loss": 5.5284, - "step": 20113 - }, - { - "epoch": 10.489700130378097, - "grad_norm": 1.4398802518844604, - "learning_rate": 8.029346733668343e-05, - "loss": 5.4107, - "step": 20114 - }, - { - "epoch": 10.490221642764016, - "grad_norm": 1.5544341802597046, - "learning_rate": 8.02924623115578e-05, - "loss": 5.1624, - "step": 20115 - }, - { - "epoch": 10.490743155149936, - "grad_norm": 1.3757280111312866, - "learning_rate": 8.029145728643217e-05, - "loss": 5.3625, - "step": 20116 - }, - { - "epoch": 10.491264667535853, - "grad_norm": 1.5161798000335693, - "learning_rate": 8.029045226130654e-05, - "loss": 5.4733, - "step": 20117 - }, - { - "epoch": 10.491786179921773, - "grad_norm": 1.4094661474227905, - "learning_rate": 8.02894472361809e-05, - "loss": 5.8116, - "step": 20118 - }, - { - "epoch": 10.492307692307692, - "grad_norm": 1.4476978778839111, - "learning_rate": 8.028844221105528e-05, - "loss": 5.4741, - "step": 20119 - }, - { - "epoch": 10.492829204693612, - "grad_norm": 1.312962532043457, - "learning_rate": 8.028743718592964e-05, - "loss": 5.5957, - "step": 20120 - }, - { - "epoch": 10.493350717079531, - "grad_norm": 1.5023175477981567, - "learning_rate": 8.028643216080402e-05, - "loss": 5.3582, - "step": 20121 - }, - { - "epoch": 10.493872229465449, - "grad_norm": 1.5131431818008423, - "learning_rate": 8.02854271356784e-05, - "loss": 5.2185, - "step": 20122 - }, - { - "epoch": 10.494393741851368, - "grad_norm": 1.4092590808868408, - "learning_rate": 8.028442211055278e-05, - "loss": 5.5067, - "step": 20123 - }, - { - "epoch": 10.494915254237288, - "grad_norm": 1.3934842348098755, - "learning_rate": 8.028341708542714e-05, - "loss": 5.6525, - "step": 20124 - }, - { - "epoch": 10.495436766623207, - "grad_norm": 1.29954993724823, - "learning_rate": 8.028241206030152e-05, - "loss": 5.8051, - "step": 20125 - }, - { - "epoch": 10.495958279009127, - "grad_norm": 1.6116315126419067, - "learning_rate": 8.028140703517588e-05, - "loss": 4.9881, - "step": 20126 - }, - { - "epoch": 10.496479791395046, - "grad_norm": 1.3698092699050903, - "learning_rate": 8.028040201005026e-05, - "loss": 5.2496, - "step": 20127 - }, - { - "epoch": 10.497001303780964, - "grad_norm": 1.3783009052276611, - "learning_rate": 8.027939698492462e-05, - "loss": 5.7843, - "step": 20128 - }, - { - "epoch": 10.497522816166883, - "grad_norm": 1.4377788305282593, - "learning_rate": 8.0278391959799e-05, - "loss": 5.8222, - "step": 20129 - }, - { - "epoch": 10.498044328552803, - "grad_norm": 1.45806884765625, - "learning_rate": 8.027738693467337e-05, - "loss": 5.6618, - "step": 20130 - }, - { - "epoch": 10.498565840938722, - "grad_norm": 1.3856240510940552, - "learning_rate": 8.027638190954774e-05, - "loss": 5.8062, - "step": 20131 - }, - { - "epoch": 10.499087353324642, - "grad_norm": 1.4183002710342407, - "learning_rate": 8.027537688442212e-05, - "loss": 5.5042, - "step": 20132 - }, - { - "epoch": 10.499608865710561, - "grad_norm": 1.3297879695892334, - "learning_rate": 8.027437185929649e-05, - "loss": 5.0398, - "step": 20133 - }, - { - "epoch": 10.500130378096479, - "grad_norm": 1.342708945274353, - "learning_rate": 8.027336683417086e-05, - "loss": 5.6797, - "step": 20134 - }, - { - "epoch": 10.500651890482398, - "grad_norm": 1.3475439548492432, - "learning_rate": 8.027236180904523e-05, - "loss": 5.9379, - "step": 20135 - }, - { - "epoch": 10.501173402868318, - "grad_norm": 1.365881085395813, - "learning_rate": 8.02713567839196e-05, - "loss": 5.8206, - "step": 20136 - }, - { - "epoch": 10.501694915254237, - "grad_norm": 1.3986566066741943, - "learning_rate": 8.027035175879397e-05, - "loss": 5.1699, - "step": 20137 - }, - { - "epoch": 10.502216427640157, - "grad_norm": 1.4930223226547241, - "learning_rate": 8.026934673366835e-05, - "loss": 5.1956, - "step": 20138 - }, - { - "epoch": 10.502737940026076, - "grad_norm": 1.4285295009613037, - "learning_rate": 8.026834170854271e-05, - "loss": 5.5209, - "step": 20139 - }, - { - "epoch": 10.503259452411994, - "grad_norm": 1.431732177734375, - "learning_rate": 8.026733668341709e-05, - "loss": 5.3187, - "step": 20140 - }, - { - "epoch": 10.503780964797913, - "grad_norm": 1.3656694889068604, - "learning_rate": 8.026633165829145e-05, - "loss": 5.8665, - "step": 20141 - }, - { - "epoch": 10.504302477183833, - "grad_norm": 1.4408859014511108, - "learning_rate": 8.026532663316583e-05, - "loss": 5.8001, - "step": 20142 - }, - { - "epoch": 10.504823989569752, - "grad_norm": 1.3333592414855957, - "learning_rate": 8.026432160804021e-05, - "loss": 5.7296, - "step": 20143 - }, - { - "epoch": 10.505345501955672, - "grad_norm": 1.326034426689148, - "learning_rate": 8.026331658291457e-05, - "loss": 5.3533, - "step": 20144 - }, - { - "epoch": 10.505867014341591, - "grad_norm": 1.6271909475326538, - "learning_rate": 8.026231155778895e-05, - "loss": 5.2683, - "step": 20145 - }, - { - "epoch": 10.506388526727509, - "grad_norm": 1.423873782157898, - "learning_rate": 8.026130653266332e-05, - "loss": 5.1823, - "step": 20146 - }, - { - "epoch": 10.506910039113428, - "grad_norm": 1.4594800472259521, - "learning_rate": 8.02603015075377e-05, - "loss": 5.0716, - "step": 20147 - }, - { - "epoch": 10.507431551499348, - "grad_norm": 1.4479162693023682, - "learning_rate": 8.025929648241206e-05, - "loss": 5.438, - "step": 20148 - }, - { - "epoch": 10.507953063885267, - "grad_norm": 1.7412333488464355, - "learning_rate": 8.025829145728644e-05, - "loss": 4.7908, - "step": 20149 - }, - { - "epoch": 10.508474576271187, - "grad_norm": 1.4621638059616089, - "learning_rate": 8.02572864321608e-05, - "loss": 5.6346, - "step": 20150 - }, - { - "epoch": 10.508996088657106, - "grad_norm": 1.4613630771636963, - "learning_rate": 8.025628140703518e-05, - "loss": 5.6434, - "step": 20151 - }, - { - "epoch": 10.509517601043024, - "grad_norm": 1.458966612815857, - "learning_rate": 8.025527638190956e-05, - "loss": 5.5293, - "step": 20152 - }, - { - "epoch": 10.510039113428943, - "grad_norm": 1.5054205656051636, - "learning_rate": 8.025427135678393e-05, - "loss": 4.3591, - "step": 20153 - }, - { - "epoch": 10.510560625814863, - "grad_norm": 1.5300554037094116, - "learning_rate": 8.02532663316583e-05, - "loss": 5.0629, - "step": 20154 - }, - { - "epoch": 10.511082138200782, - "grad_norm": 1.4448838233947754, - "learning_rate": 8.025226130653268e-05, - "loss": 5.4664, - "step": 20155 - }, - { - "epoch": 10.511603650586702, - "grad_norm": 1.4856313467025757, - "learning_rate": 8.025125628140704e-05, - "loss": 5.104, - "step": 20156 - }, - { - "epoch": 10.512125162972621, - "grad_norm": 1.2974441051483154, - "learning_rate": 8.02502512562814e-05, - "loss": 5.5306, - "step": 20157 - }, - { - "epoch": 10.512646675358539, - "grad_norm": 1.3960912227630615, - "learning_rate": 8.024924623115578e-05, - "loss": 5.4729, - "step": 20158 - }, - { - "epoch": 10.513168187744458, - "grad_norm": 1.4792180061340332, - "learning_rate": 8.024824120603015e-05, - "loss": 5.1307, - "step": 20159 - }, - { - "epoch": 10.513689700130378, - "grad_norm": 1.4676841497421265, - "learning_rate": 8.024723618090452e-05, - "loss": 5.1799, - "step": 20160 - }, - { - "epoch": 10.514211212516297, - "grad_norm": 1.419899344444275, - "learning_rate": 8.024623115577889e-05, - "loss": 4.7953, - "step": 20161 - }, - { - "epoch": 10.514732724902217, - "grad_norm": 1.4318413734436035, - "learning_rate": 8.024522613065327e-05, - "loss": 5.1573, - "step": 20162 - }, - { - "epoch": 10.515254237288136, - "grad_norm": 1.450117588043213, - "learning_rate": 8.024422110552764e-05, - "loss": 5.2572, - "step": 20163 - }, - { - "epoch": 10.515775749674054, - "grad_norm": 1.4222686290740967, - "learning_rate": 8.024321608040202e-05, - "loss": 5.9119, - "step": 20164 - }, - { - "epoch": 10.516297262059974, - "grad_norm": 1.4318339824676514, - "learning_rate": 8.024221105527639e-05, - "loss": 5.7185, - "step": 20165 - }, - { - "epoch": 10.516818774445893, - "grad_norm": 1.3940114974975586, - "learning_rate": 8.024120603015076e-05, - "loss": 5.7025, - "step": 20166 - }, - { - "epoch": 10.517340286831812, - "grad_norm": 1.4331717491149902, - "learning_rate": 8.024020100502513e-05, - "loss": 5.4105, - "step": 20167 - }, - { - "epoch": 10.517861799217732, - "grad_norm": 1.5807502269744873, - "learning_rate": 8.02391959798995e-05, - "loss": 5.3105, - "step": 20168 - }, - { - "epoch": 10.518383311603651, - "grad_norm": 1.4683704376220703, - "learning_rate": 8.023819095477387e-05, - "loss": 5.586, - "step": 20169 - }, - { - "epoch": 10.518904823989569, - "grad_norm": 1.5349172353744507, - "learning_rate": 8.023718592964824e-05, - "loss": 5.5554, - "step": 20170 - }, - { - "epoch": 10.519426336375489, - "grad_norm": 1.45823073387146, - "learning_rate": 8.023618090452261e-05, - "loss": 5.4825, - "step": 20171 - }, - { - "epoch": 10.519947848761408, - "grad_norm": 1.3578556776046753, - "learning_rate": 8.023517587939699e-05, - "loss": 5.7894, - "step": 20172 - }, - { - "epoch": 10.520469361147327, - "grad_norm": 1.4050040245056152, - "learning_rate": 8.023417085427137e-05, - "loss": 5.2001, - "step": 20173 - }, - { - "epoch": 10.520990873533247, - "grad_norm": 1.42388117313385, - "learning_rate": 8.023316582914573e-05, - "loss": 5.6446, - "step": 20174 - }, - { - "epoch": 10.521512385919166, - "grad_norm": 1.5353286266326904, - "learning_rate": 8.023216080402011e-05, - "loss": 5.343, - "step": 20175 - }, - { - "epoch": 10.522033898305084, - "grad_norm": 1.4062973260879517, - "learning_rate": 8.023115577889447e-05, - "loss": 5.5191, - "step": 20176 - }, - { - "epoch": 10.522555410691004, - "grad_norm": 1.4024502038955688, - "learning_rate": 8.023015075376885e-05, - "loss": 5.2884, - "step": 20177 - }, - { - "epoch": 10.523076923076923, - "grad_norm": 1.353084683418274, - "learning_rate": 8.022914572864322e-05, - "loss": 5.7559, - "step": 20178 - }, - { - "epoch": 10.523598435462842, - "grad_norm": 1.3515088558197021, - "learning_rate": 8.02281407035176e-05, - "loss": 4.7229, - "step": 20179 - }, - { - "epoch": 10.524119947848762, - "grad_norm": 1.410803198814392, - "learning_rate": 8.022713567839196e-05, - "loss": 5.7901, - "step": 20180 - }, - { - "epoch": 10.524641460234681, - "grad_norm": 1.5132052898406982, - "learning_rate": 8.022613065326634e-05, - "loss": 5.6676, - "step": 20181 - }, - { - "epoch": 10.525162972620599, - "grad_norm": 1.5468604564666748, - "learning_rate": 8.02251256281407e-05, - "loss": 5.3625, - "step": 20182 - }, - { - "epoch": 10.525684485006519, - "grad_norm": 1.5415101051330566, - "learning_rate": 8.022412060301508e-05, - "loss": 5.4442, - "step": 20183 - }, - { - "epoch": 10.526205997392438, - "grad_norm": 1.4193804264068604, - "learning_rate": 8.022311557788946e-05, - "loss": 5.1048, - "step": 20184 - }, - { - "epoch": 10.526727509778357, - "grad_norm": 1.4701184034347534, - "learning_rate": 8.022211055276382e-05, - "loss": 5.423, - "step": 20185 - }, - { - "epoch": 10.527249022164277, - "grad_norm": 1.298774003982544, - "learning_rate": 8.02211055276382e-05, - "loss": 4.9924, - "step": 20186 - }, - { - "epoch": 10.527770534550196, - "grad_norm": 1.4836339950561523, - "learning_rate": 8.022010050251256e-05, - "loss": 5.3841, - "step": 20187 - }, - { - "epoch": 10.528292046936114, - "grad_norm": 1.6865088939666748, - "learning_rate": 8.021909547738694e-05, - "loss": 5.0764, - "step": 20188 - }, - { - "epoch": 10.528813559322034, - "grad_norm": 1.4704179763793945, - "learning_rate": 8.02180904522613e-05, - "loss": 5.6315, - "step": 20189 - }, - { - "epoch": 10.529335071707953, - "grad_norm": 1.4819533824920654, - "learning_rate": 8.021708542713568e-05, - "loss": 5.3103, - "step": 20190 - }, - { - "epoch": 10.529856584093872, - "grad_norm": 1.4180898666381836, - "learning_rate": 8.021608040201005e-05, - "loss": 5.6733, - "step": 20191 - }, - { - "epoch": 10.530378096479792, - "grad_norm": 1.5153095722198486, - "learning_rate": 8.021507537688443e-05, - "loss": 5.3496, - "step": 20192 - }, - { - "epoch": 10.53089960886571, - "grad_norm": 1.4520914554595947, - "learning_rate": 8.02140703517588e-05, - "loss": 5.5713, - "step": 20193 - }, - { - "epoch": 10.531421121251629, - "grad_norm": 1.4028310775756836, - "learning_rate": 8.021306532663318e-05, - "loss": 5.6173, - "step": 20194 - }, - { - "epoch": 10.531942633637549, - "grad_norm": 1.38916015625, - "learning_rate": 8.021206030150755e-05, - "loss": 5.8968, - "step": 20195 - }, - { - "epoch": 10.532464146023468, - "grad_norm": 1.4884041547775269, - "learning_rate": 8.021105527638191e-05, - "loss": 5.2367, - "step": 20196 - }, - { - "epoch": 10.532985658409387, - "grad_norm": 1.3871575593948364, - "learning_rate": 8.021005025125629e-05, - "loss": 5.6366, - "step": 20197 - }, - { - "epoch": 10.533507170795307, - "grad_norm": 1.4404693841934204, - "learning_rate": 8.020904522613065e-05, - "loss": 5.7333, - "step": 20198 - }, - { - "epoch": 10.534028683181226, - "grad_norm": 1.3881118297576904, - "learning_rate": 8.020804020100503e-05, - "loss": 5.3177, - "step": 20199 - }, - { - "epoch": 10.534550195567144, - "grad_norm": 1.4080289602279663, - "learning_rate": 8.02070351758794e-05, - "loss": 5.7817, - "step": 20200 - }, - { - "epoch": 10.535071707953064, - "grad_norm": 1.384378433227539, - "learning_rate": 8.020603015075377e-05, - "loss": 5.1451, - "step": 20201 - }, - { - "epoch": 10.535593220338983, - "grad_norm": 1.496822476387024, - "learning_rate": 8.020502512562814e-05, - "loss": 5.2427, - "step": 20202 - }, - { - "epoch": 10.536114732724903, - "grad_norm": 1.2980034351348877, - "learning_rate": 8.020402010050251e-05, - "loss": 5.7594, - "step": 20203 - }, - { - "epoch": 10.536636245110822, - "grad_norm": 1.4821263551712036, - "learning_rate": 8.020301507537689e-05, - "loss": 5.7245, - "step": 20204 - }, - { - "epoch": 10.53715775749674, - "grad_norm": 1.3586602210998535, - "learning_rate": 8.020201005025127e-05, - "loss": 5.7562, - "step": 20205 - }, - { - "epoch": 10.53767926988266, - "grad_norm": 1.527610421180725, - "learning_rate": 8.020100502512563e-05, - "loss": 4.9659, - "step": 20206 - }, - { - "epoch": 10.538200782268579, - "grad_norm": 1.4380759000778198, - "learning_rate": 8.020000000000001e-05, - "loss": 5.3976, - "step": 20207 - }, - { - "epoch": 10.538722294654498, - "grad_norm": 1.419843316078186, - "learning_rate": 8.019899497487438e-05, - "loss": 5.2719, - "step": 20208 - }, - { - "epoch": 10.539243807040418, - "grad_norm": 1.4401570558547974, - "learning_rate": 8.019798994974875e-05, - "loss": 5.3882, - "step": 20209 - }, - { - "epoch": 10.539765319426337, - "grad_norm": 1.50314462184906, - "learning_rate": 8.019698492462312e-05, - "loss": 5.0976, - "step": 20210 - }, - { - "epoch": 10.540286831812256, - "grad_norm": 1.4578981399536133, - "learning_rate": 8.019597989949748e-05, - "loss": 4.837, - "step": 20211 - }, - { - "epoch": 10.540808344198174, - "grad_norm": 1.5523169040679932, - "learning_rate": 8.019497487437186e-05, - "loss": 5.4135, - "step": 20212 - }, - { - "epoch": 10.541329856584094, - "grad_norm": 1.46126389503479, - "learning_rate": 8.019396984924624e-05, - "loss": 5.6705, - "step": 20213 - }, - { - "epoch": 10.541851368970013, - "grad_norm": 1.4168742895126343, - "learning_rate": 8.019296482412062e-05, - "loss": 5.1659, - "step": 20214 - }, - { - "epoch": 10.542372881355933, - "grad_norm": 1.4067457914352417, - "learning_rate": 8.019195979899498e-05, - "loss": 5.8521, - "step": 20215 - }, - { - "epoch": 10.542894393741852, - "grad_norm": 1.427014708518982, - "learning_rate": 8.019095477386936e-05, - "loss": 5.6971, - "step": 20216 - }, - { - "epoch": 10.54341590612777, - "grad_norm": 1.4897271394729614, - "learning_rate": 8.018994974874372e-05, - "loss": 4.9842, - "step": 20217 - }, - { - "epoch": 10.54393741851369, - "grad_norm": 1.357631802558899, - "learning_rate": 8.01889447236181e-05, - "loss": 5.5134, - "step": 20218 - }, - { - "epoch": 10.544458930899609, - "grad_norm": 1.5416765213012695, - "learning_rate": 8.018793969849246e-05, - "loss": 5.276, - "step": 20219 - }, - { - "epoch": 10.544980443285528, - "grad_norm": 1.373114824295044, - "learning_rate": 8.018693467336684e-05, - "loss": 5.4876, - "step": 20220 - }, - { - "epoch": 10.545501955671448, - "grad_norm": 1.3414688110351562, - "learning_rate": 8.01859296482412e-05, - "loss": 5.418, - "step": 20221 - }, - { - "epoch": 10.546023468057367, - "grad_norm": 1.5266329050064087, - "learning_rate": 8.018492462311558e-05, - "loss": 4.6372, - "step": 20222 - }, - { - "epoch": 10.546544980443285, - "grad_norm": 1.537272572517395, - "learning_rate": 8.018391959798995e-05, - "loss": 5.7233, - "step": 20223 - }, - { - "epoch": 10.547066492829204, - "grad_norm": 1.4588062763214111, - "learning_rate": 8.018291457286433e-05, - "loss": 5.2741, - "step": 20224 - }, - { - "epoch": 10.547588005215124, - "grad_norm": 1.384966254234314, - "learning_rate": 8.01819095477387e-05, - "loss": 5.5459, - "step": 20225 - }, - { - "epoch": 10.548109517601043, - "grad_norm": 1.5126590728759766, - "learning_rate": 8.018090452261307e-05, - "loss": 5.5557, - "step": 20226 - }, - { - "epoch": 10.548631029986963, - "grad_norm": 1.4294350147247314, - "learning_rate": 8.017989949748745e-05, - "loss": 4.9969, - "step": 20227 - }, - { - "epoch": 10.549152542372882, - "grad_norm": 1.415366530418396, - "learning_rate": 8.017889447236181e-05, - "loss": 5.0759, - "step": 20228 - }, - { - "epoch": 10.5496740547588, - "grad_norm": 1.3845386505126953, - "learning_rate": 8.017788944723619e-05, - "loss": 5.6658, - "step": 20229 - }, - { - "epoch": 10.55019556714472, - "grad_norm": 1.3841091394424438, - "learning_rate": 8.017688442211055e-05, - "loss": 5.5155, - "step": 20230 - }, - { - "epoch": 10.550717079530639, - "grad_norm": 1.3541297912597656, - "learning_rate": 8.017587939698493e-05, - "loss": 5.8013, - "step": 20231 - }, - { - "epoch": 10.551238591916558, - "grad_norm": 1.588049292564392, - "learning_rate": 8.01748743718593e-05, - "loss": 5.4246, - "step": 20232 - }, - { - "epoch": 10.551760104302478, - "grad_norm": 1.4129724502563477, - "learning_rate": 8.017386934673367e-05, - "loss": 5.8969, - "step": 20233 - }, - { - "epoch": 10.552281616688397, - "grad_norm": 1.5299749374389648, - "learning_rate": 8.017286432160804e-05, - "loss": 5.6204, - "step": 20234 - }, - { - "epoch": 10.552803129074315, - "grad_norm": 1.466318964958191, - "learning_rate": 8.017185929648241e-05, - "loss": 5.3806, - "step": 20235 - }, - { - "epoch": 10.553324641460234, - "grad_norm": 1.5021966695785522, - "learning_rate": 8.017085427135679e-05, - "loss": 5.4497, - "step": 20236 - }, - { - "epoch": 10.553846153846154, - "grad_norm": 1.3643347024917603, - "learning_rate": 8.016984924623116e-05, - "loss": 5.5697, - "step": 20237 - }, - { - "epoch": 10.554367666232073, - "grad_norm": 1.4493151903152466, - "learning_rate": 8.016884422110553e-05, - "loss": 5.7334, - "step": 20238 - }, - { - "epoch": 10.554889178617993, - "grad_norm": 1.393432855606079, - "learning_rate": 8.01678391959799e-05, - "loss": 5.58, - "step": 20239 - }, - { - "epoch": 10.555410691003912, - "grad_norm": 1.3610363006591797, - "learning_rate": 8.016683417085428e-05, - "loss": 5.4687, - "step": 20240 - }, - { - "epoch": 10.55593220338983, - "grad_norm": 1.7768837213516235, - "learning_rate": 8.016582914572864e-05, - "loss": 4.8861, - "step": 20241 - }, - { - "epoch": 10.55645371577575, - "grad_norm": 1.4048658609390259, - "learning_rate": 8.016482412060302e-05, - "loss": 4.794, - "step": 20242 - }, - { - "epoch": 10.556975228161669, - "grad_norm": 1.4334431886672974, - "learning_rate": 8.016381909547738e-05, - "loss": 5.6109, - "step": 20243 - }, - { - "epoch": 10.557496740547588, - "grad_norm": 1.431370496749878, - "learning_rate": 8.016281407035176e-05, - "loss": 5.8116, - "step": 20244 - }, - { - "epoch": 10.558018252933508, - "grad_norm": 1.3407193422317505, - "learning_rate": 8.016180904522614e-05, - "loss": 5.9563, - "step": 20245 - }, - { - "epoch": 10.558539765319427, - "grad_norm": 1.5004607439041138, - "learning_rate": 8.016080402010052e-05, - "loss": 5.3835, - "step": 20246 - }, - { - "epoch": 10.559061277705345, - "grad_norm": 1.4581056833267212, - "learning_rate": 8.015979899497488e-05, - "loss": 5.542, - "step": 20247 - }, - { - "epoch": 10.559582790091264, - "grad_norm": 1.4441161155700684, - "learning_rate": 8.015879396984926e-05, - "loss": 5.4108, - "step": 20248 - }, - { - "epoch": 10.560104302477184, - "grad_norm": 1.4695091247558594, - "learning_rate": 8.015778894472362e-05, - "loss": 5.6434, - "step": 20249 - }, - { - "epoch": 10.560625814863103, - "grad_norm": 1.4314546585083008, - "learning_rate": 8.015678391959799e-05, - "loss": 5.2793, - "step": 20250 - }, - { - "epoch": 10.561147327249023, - "grad_norm": 1.4139429330825806, - "learning_rate": 8.015577889447236e-05, - "loss": 5.5812, - "step": 20251 - }, - { - "epoch": 10.561668839634942, - "grad_norm": 1.3729063272476196, - "learning_rate": 8.015477386934673e-05, - "loss": 5.4747, - "step": 20252 - }, - { - "epoch": 10.56219035202086, - "grad_norm": 1.408679723739624, - "learning_rate": 8.015376884422111e-05, - "loss": 5.4106, - "step": 20253 - }, - { - "epoch": 10.56271186440678, - "grad_norm": 1.473600149154663, - "learning_rate": 8.015276381909547e-05, - "loss": 5.7142, - "step": 20254 - }, - { - "epoch": 10.563233376792699, - "grad_norm": 1.3662124872207642, - "learning_rate": 8.015175879396985e-05, - "loss": 5.2257, - "step": 20255 - }, - { - "epoch": 10.563754889178618, - "grad_norm": 1.4238380193710327, - "learning_rate": 8.015075376884423e-05, - "loss": 4.999, - "step": 20256 - }, - { - "epoch": 10.564276401564538, - "grad_norm": 1.5282313823699951, - "learning_rate": 8.01497487437186e-05, - "loss": 5.6665, - "step": 20257 - }, - { - "epoch": 10.564797913950457, - "grad_norm": 1.3787691593170166, - "learning_rate": 8.014874371859297e-05, - "loss": 5.5735, - "step": 20258 - }, - { - "epoch": 10.565319426336375, - "grad_norm": 1.398950219154358, - "learning_rate": 8.014773869346735e-05, - "loss": 5.6961, - "step": 20259 - }, - { - "epoch": 10.565840938722294, - "grad_norm": 1.3992815017700195, - "learning_rate": 8.014673366834171e-05, - "loss": 5.2653, - "step": 20260 - }, - { - "epoch": 10.566362451108214, - "grad_norm": 1.4563251733779907, - "learning_rate": 8.014572864321609e-05, - "loss": 5.2659, - "step": 20261 - }, - { - "epoch": 10.566883963494133, - "grad_norm": 1.556904673576355, - "learning_rate": 8.014472361809045e-05, - "loss": 5.3668, - "step": 20262 - }, - { - "epoch": 10.567405475880053, - "grad_norm": 1.3301118612289429, - "learning_rate": 8.014371859296482e-05, - "loss": 5.6001, - "step": 20263 - }, - { - "epoch": 10.567926988265972, - "grad_norm": 1.4295158386230469, - "learning_rate": 8.01427135678392e-05, - "loss": 5.728, - "step": 20264 - }, - { - "epoch": 10.56844850065189, - "grad_norm": 1.6140402555465698, - "learning_rate": 8.014170854271357e-05, - "loss": 5.2846, - "step": 20265 - }, - { - "epoch": 10.56897001303781, - "grad_norm": 1.5145275592803955, - "learning_rate": 8.014070351758795e-05, - "loss": 5.4039, - "step": 20266 - }, - { - "epoch": 10.569491525423729, - "grad_norm": 1.6052517890930176, - "learning_rate": 8.013969849246232e-05, - "loss": 4.8935, - "step": 20267 - }, - { - "epoch": 10.570013037809648, - "grad_norm": 1.4529507160186768, - "learning_rate": 8.013869346733669e-05, - "loss": 5.91, - "step": 20268 - }, - { - "epoch": 10.570534550195568, - "grad_norm": 1.5304895639419556, - "learning_rate": 8.013768844221106e-05, - "loss": 5.4269, - "step": 20269 - }, - { - "epoch": 10.571056062581487, - "grad_norm": 1.3281275033950806, - "learning_rate": 8.013668341708544e-05, - "loss": 5.5004, - "step": 20270 - }, - { - "epoch": 10.571577574967405, - "grad_norm": 1.3979135751724243, - "learning_rate": 8.01356783919598e-05, - "loss": 5.8135, - "step": 20271 - }, - { - "epoch": 10.572099087353324, - "grad_norm": 1.4603631496429443, - "learning_rate": 8.013467336683418e-05, - "loss": 5.5909, - "step": 20272 - }, - { - "epoch": 10.572620599739244, - "grad_norm": 1.3499153852462769, - "learning_rate": 8.013366834170854e-05, - "loss": 5.9828, - "step": 20273 - }, - { - "epoch": 10.573142112125163, - "grad_norm": 1.381571650505066, - "learning_rate": 8.013266331658292e-05, - "loss": 5.6914, - "step": 20274 - }, - { - "epoch": 10.573663624511083, - "grad_norm": 1.3627787828445435, - "learning_rate": 8.013165829145728e-05, - "loss": 5.8989, - "step": 20275 - }, - { - "epoch": 10.574185136897002, - "grad_norm": 1.478264331817627, - "learning_rate": 8.013065326633166e-05, - "loss": 5.0448, - "step": 20276 - }, - { - "epoch": 10.57470664928292, - "grad_norm": 1.4360945224761963, - "learning_rate": 8.012964824120604e-05, - "loss": 5.2642, - "step": 20277 - }, - { - "epoch": 10.57522816166884, - "grad_norm": 1.4869340658187866, - "learning_rate": 8.01286432160804e-05, - "loss": 5.664, - "step": 20278 - }, - { - "epoch": 10.575749674054759, - "grad_norm": 1.4918346405029297, - "learning_rate": 8.012763819095478e-05, - "loss": 5.3719, - "step": 20279 - }, - { - "epoch": 10.576271186440678, - "grad_norm": 1.4212660789489746, - "learning_rate": 8.012663316582915e-05, - "loss": 5.7056, - "step": 20280 - }, - { - "epoch": 10.576792698826598, - "grad_norm": 1.4206234216690063, - "learning_rate": 8.012562814070352e-05, - "loss": 5.8042, - "step": 20281 - }, - { - "epoch": 10.577314211212517, - "grad_norm": 1.4932959079742432, - "learning_rate": 8.012462311557789e-05, - "loss": 5.1194, - "step": 20282 - }, - { - "epoch": 10.577835723598435, - "grad_norm": 1.5154012441635132, - "learning_rate": 8.012361809045227e-05, - "loss": 5.0379, - "step": 20283 - }, - { - "epoch": 10.578357235984354, - "grad_norm": 1.3067644834518433, - "learning_rate": 8.012261306532663e-05, - "loss": 6.043, - "step": 20284 - }, - { - "epoch": 10.578878748370274, - "grad_norm": 1.515801191329956, - "learning_rate": 8.012160804020101e-05, - "loss": 5.4639, - "step": 20285 - }, - { - "epoch": 10.579400260756193, - "grad_norm": 1.4406015872955322, - "learning_rate": 8.012060301507539e-05, - "loss": 5.692, - "step": 20286 - }, - { - "epoch": 10.579921773142113, - "grad_norm": 1.473102331161499, - "learning_rate": 8.011959798994976e-05, - "loss": 5.1343, - "step": 20287 - }, - { - "epoch": 10.58044328552803, - "grad_norm": 1.525311827659607, - "learning_rate": 8.011859296482413e-05, - "loss": 5.5333, - "step": 20288 - }, - { - "epoch": 10.58096479791395, - "grad_norm": 1.479345679283142, - "learning_rate": 8.01175879396985e-05, - "loss": 5.6151, - "step": 20289 - }, - { - "epoch": 10.58148631029987, - "grad_norm": 1.300231695175171, - "learning_rate": 8.011658291457287e-05, - "loss": 5.9811, - "step": 20290 - }, - { - "epoch": 10.582007822685789, - "grad_norm": 1.4784913063049316, - "learning_rate": 8.011557788944723e-05, - "loss": 5.6193, - "step": 20291 - }, - { - "epoch": 10.582529335071708, - "grad_norm": 1.5426132678985596, - "learning_rate": 8.011457286432161e-05, - "loss": 4.7521, - "step": 20292 - }, - { - "epoch": 10.583050847457628, - "grad_norm": 1.479264497756958, - "learning_rate": 8.011356783919598e-05, - "loss": 5.321, - "step": 20293 - }, - { - "epoch": 10.583572359843547, - "grad_norm": 1.3865805864334106, - "learning_rate": 8.011256281407035e-05, - "loss": 5.8094, - "step": 20294 - }, - { - "epoch": 10.584093872229465, - "grad_norm": 1.3486926555633545, - "learning_rate": 8.011155778894472e-05, - "loss": 5.4826, - "step": 20295 - }, - { - "epoch": 10.584615384615384, - "grad_norm": 1.3089896440505981, - "learning_rate": 8.01105527638191e-05, - "loss": 5.4196, - "step": 20296 - }, - { - "epoch": 10.585136897001304, - "grad_norm": 1.4309192895889282, - "learning_rate": 8.010954773869347e-05, - "loss": 5.2769, - "step": 20297 - }, - { - "epoch": 10.585658409387223, - "grad_norm": 1.4082967042922974, - "learning_rate": 8.010854271356785e-05, - "loss": 5.745, - "step": 20298 - }, - { - "epoch": 10.586179921773143, - "grad_norm": 1.472242832183838, - "learning_rate": 8.010753768844222e-05, - "loss": 5.7669, - "step": 20299 - }, - { - "epoch": 10.58670143415906, - "grad_norm": 1.7023063898086548, - "learning_rate": 8.01065326633166e-05, - "loss": 5.1477, - "step": 20300 - }, - { - "epoch": 10.58722294654498, - "grad_norm": 1.4095755815505981, - "learning_rate": 8.010552763819096e-05, - "loss": 5.7885, - "step": 20301 - }, - { - "epoch": 10.5877444589309, - "grad_norm": 1.4386128187179565, - "learning_rate": 8.010452261306534e-05, - "loss": 5.8386, - "step": 20302 - }, - { - "epoch": 10.588265971316819, - "grad_norm": 1.592929720878601, - "learning_rate": 8.01035175879397e-05, - "loss": 5.5571, - "step": 20303 - }, - { - "epoch": 10.588787483702738, - "grad_norm": 1.345913290977478, - "learning_rate": 8.010251256281406e-05, - "loss": 5.7579, - "step": 20304 - }, - { - "epoch": 10.589308996088658, - "grad_norm": 1.4026896953582764, - "learning_rate": 8.010150753768844e-05, - "loss": 5.6908, - "step": 20305 - }, - { - "epoch": 10.589830508474577, - "grad_norm": 1.8465924263000488, - "learning_rate": 8.010050251256282e-05, - "loss": 5.4552, - "step": 20306 - }, - { - "epoch": 10.590352020860495, - "grad_norm": 1.515138864517212, - "learning_rate": 8.00994974874372e-05, - "loss": 5.738, - "step": 20307 - }, - { - "epoch": 10.590873533246414, - "grad_norm": 1.5409685373306274, - "learning_rate": 8.009849246231156e-05, - "loss": 5.4639, - "step": 20308 - }, - { - "epoch": 10.591395045632334, - "grad_norm": 1.3915365934371948, - "learning_rate": 8.009748743718594e-05, - "loss": 5.2295, - "step": 20309 - }, - { - "epoch": 10.591916558018253, - "grad_norm": 1.4868148565292358, - "learning_rate": 8.00964824120603e-05, - "loss": 6.0164, - "step": 20310 - }, - { - "epoch": 10.592438070404173, - "grad_norm": 1.3760885000228882, - "learning_rate": 8.009547738693468e-05, - "loss": 5.8632, - "step": 20311 - }, - { - "epoch": 10.59295958279009, - "grad_norm": 1.7551581859588623, - "learning_rate": 8.009447236180905e-05, - "loss": 5.1178, - "step": 20312 - }, - { - "epoch": 10.59348109517601, - "grad_norm": 1.4474648237228394, - "learning_rate": 8.009346733668342e-05, - "loss": 5.7155, - "step": 20313 - }, - { - "epoch": 10.59400260756193, - "grad_norm": 1.425426721572876, - "learning_rate": 8.009246231155779e-05, - "loss": 5.5291, - "step": 20314 - }, - { - "epoch": 10.594524119947849, - "grad_norm": 1.6412614583969116, - "learning_rate": 8.009145728643217e-05, - "loss": 5.6707, - "step": 20315 - }, - { - "epoch": 10.595045632333768, - "grad_norm": 1.635238528251648, - "learning_rate": 8.009045226130653e-05, - "loss": 5.2968, - "step": 20316 - }, - { - "epoch": 10.595567144719688, - "grad_norm": 1.551027774810791, - "learning_rate": 8.008944723618091e-05, - "loss": 5.313, - "step": 20317 - }, - { - "epoch": 10.596088657105605, - "grad_norm": 1.6399531364440918, - "learning_rate": 8.008844221105529e-05, - "loss": 5.6269, - "step": 20318 - }, - { - "epoch": 10.596610169491525, - "grad_norm": 1.5596975088119507, - "learning_rate": 8.008743718592965e-05, - "loss": 5.4558, - "step": 20319 - }, - { - "epoch": 10.597131681877444, - "grad_norm": 1.5884283781051636, - "learning_rate": 8.008643216080403e-05, - "loss": 5.5783, - "step": 20320 - }, - { - "epoch": 10.597653194263364, - "grad_norm": 1.512988567352295, - "learning_rate": 8.008542713567839e-05, - "loss": 5.7011, - "step": 20321 - }, - { - "epoch": 10.598174706649283, - "grad_norm": 1.5800082683563232, - "learning_rate": 8.008442211055277e-05, - "loss": 4.8432, - "step": 20322 - }, - { - "epoch": 10.598696219035203, - "grad_norm": 1.593319058418274, - "learning_rate": 8.008341708542713e-05, - "loss": 5.4296, - "step": 20323 - }, - { - "epoch": 10.59921773142112, - "grad_norm": 1.4557939767837524, - "learning_rate": 8.008241206030151e-05, - "loss": 5.1171, - "step": 20324 - }, - { - "epoch": 10.59973924380704, - "grad_norm": 1.6261017322540283, - "learning_rate": 8.008140703517588e-05, - "loss": 5.6028, - "step": 20325 - }, - { - "epoch": 10.60026075619296, - "grad_norm": 1.5650697946548462, - "learning_rate": 8.008040201005025e-05, - "loss": 5.4331, - "step": 20326 - }, - { - "epoch": 10.600782268578879, - "grad_norm": 1.5699433088302612, - "learning_rate": 8.007939698492463e-05, - "loss": 5.6079, - "step": 20327 - }, - { - "epoch": 10.601303780964798, - "grad_norm": 1.4536148309707642, - "learning_rate": 8.007839195979901e-05, - "loss": 5.7236, - "step": 20328 - }, - { - "epoch": 10.601825293350718, - "grad_norm": 1.4225445985794067, - "learning_rate": 8.007738693467337e-05, - "loss": 5.5124, - "step": 20329 - }, - { - "epoch": 10.602346805736635, - "grad_norm": 1.5412858724594116, - "learning_rate": 8.007638190954774e-05, - "loss": 5.7813, - "step": 20330 - }, - { - "epoch": 10.602868318122555, - "grad_norm": 1.4570627212524414, - "learning_rate": 8.007537688442212e-05, - "loss": 5.2537, - "step": 20331 - }, - { - "epoch": 10.603389830508474, - "grad_norm": 1.4667084217071533, - "learning_rate": 8.007437185929648e-05, - "loss": 5.2296, - "step": 20332 - }, - { - "epoch": 10.603911342894394, - "grad_norm": 1.458038330078125, - "learning_rate": 8.007336683417086e-05, - "loss": 5.7835, - "step": 20333 - }, - { - "epoch": 10.604432855280313, - "grad_norm": 1.5938169956207275, - "learning_rate": 8.007236180904522e-05, - "loss": 4.8911, - "step": 20334 - }, - { - "epoch": 10.604954367666233, - "grad_norm": 1.4067832231521606, - "learning_rate": 8.00713567839196e-05, - "loss": 5.0328, - "step": 20335 - }, - { - "epoch": 10.60547588005215, - "grad_norm": 1.3974424600601196, - "learning_rate": 8.007035175879397e-05, - "loss": 5.7257, - "step": 20336 - }, - { - "epoch": 10.60599739243807, - "grad_norm": 1.4746146202087402, - "learning_rate": 8.006934673366834e-05, - "loss": 5.3342, - "step": 20337 - }, - { - "epoch": 10.60651890482399, - "grad_norm": 1.385873556137085, - "learning_rate": 8.006834170854272e-05, - "loss": 5.7591, - "step": 20338 - }, - { - "epoch": 10.607040417209909, - "grad_norm": 1.4458677768707275, - "learning_rate": 8.00673366834171e-05, - "loss": 4.9866, - "step": 20339 - }, - { - "epoch": 10.607561929595828, - "grad_norm": 1.53517746925354, - "learning_rate": 8.006633165829146e-05, - "loss": 5.3816, - "step": 20340 - }, - { - "epoch": 10.608083441981748, - "grad_norm": 1.3803837299346924, - "learning_rate": 8.006532663316584e-05, - "loss": 5.6776, - "step": 20341 - }, - { - "epoch": 10.608604954367665, - "grad_norm": 1.5671350955963135, - "learning_rate": 8.00643216080402e-05, - "loss": 5.5076, - "step": 20342 - }, - { - "epoch": 10.609126466753585, - "grad_norm": 1.5558252334594727, - "learning_rate": 8.006331658291457e-05, - "loss": 5.3208, - "step": 20343 - }, - { - "epoch": 10.609647979139504, - "grad_norm": 1.434950828552246, - "learning_rate": 8.006231155778895e-05, - "loss": 5.0781, - "step": 20344 - }, - { - "epoch": 10.610169491525424, - "grad_norm": 1.372941255569458, - "learning_rate": 8.006130653266331e-05, - "loss": 5.3012, - "step": 20345 - }, - { - "epoch": 10.610691003911343, - "grad_norm": 1.4889776706695557, - "learning_rate": 8.006030150753769e-05, - "loss": 5.4904, - "step": 20346 - }, - { - "epoch": 10.611212516297263, - "grad_norm": 1.5329807996749878, - "learning_rate": 8.005929648241207e-05, - "loss": 5.6627, - "step": 20347 - }, - { - "epoch": 10.61173402868318, - "grad_norm": 1.4532989263534546, - "learning_rate": 8.005829145728644e-05, - "loss": 5.5543, - "step": 20348 - }, - { - "epoch": 10.6122555410691, - "grad_norm": 1.5099384784698486, - "learning_rate": 8.005728643216081e-05, - "loss": 5.351, - "step": 20349 - }, - { - "epoch": 10.61277705345502, - "grad_norm": 1.4249534606933594, - "learning_rate": 8.005628140703519e-05, - "loss": 5.9103, - "step": 20350 - }, - { - "epoch": 10.613298565840939, - "grad_norm": 1.4880112409591675, - "learning_rate": 8.005527638190955e-05, - "loss": 5.5792, - "step": 20351 - }, - { - "epoch": 10.613820078226858, - "grad_norm": 1.420192003250122, - "learning_rate": 8.005427135678393e-05, - "loss": 5.6302, - "step": 20352 - }, - { - "epoch": 10.614341590612778, - "grad_norm": 1.6055805683135986, - "learning_rate": 8.00532663316583e-05, - "loss": 4.8824, - "step": 20353 - }, - { - "epoch": 10.614863102998696, - "grad_norm": 1.4716219902038574, - "learning_rate": 8.005226130653267e-05, - "loss": 5.4559, - "step": 20354 - }, - { - "epoch": 10.615384615384615, - "grad_norm": 1.471611738204956, - "learning_rate": 8.005125628140704e-05, - "loss": 5.7122, - "step": 20355 - }, - { - "epoch": 10.615906127770534, - "grad_norm": 1.3853797912597656, - "learning_rate": 8.00502512562814e-05, - "loss": 5.6336, - "step": 20356 - }, - { - "epoch": 10.616427640156454, - "grad_norm": 1.4593647718429565, - "learning_rate": 8.004924623115578e-05, - "loss": 4.9666, - "step": 20357 - }, - { - "epoch": 10.616949152542373, - "grad_norm": 1.5172309875488281, - "learning_rate": 8.004824120603016e-05, - "loss": 5.2017, - "step": 20358 - }, - { - "epoch": 10.617470664928293, - "grad_norm": 1.574142575263977, - "learning_rate": 8.004723618090453e-05, - "loss": 5.484, - "step": 20359 - }, - { - "epoch": 10.61799217731421, - "grad_norm": 1.3219945430755615, - "learning_rate": 8.00462311557789e-05, - "loss": 5.8777, - "step": 20360 - }, - { - "epoch": 10.61851368970013, - "grad_norm": 1.3347351551055908, - "learning_rate": 8.004522613065328e-05, - "loss": 5.9346, - "step": 20361 - }, - { - "epoch": 10.61903520208605, - "grad_norm": 1.3545855283737183, - "learning_rate": 8.004422110552764e-05, - "loss": 5.5842, - "step": 20362 - }, - { - "epoch": 10.619556714471969, - "grad_norm": 1.546891689300537, - "learning_rate": 8.004321608040202e-05, - "loss": 5.0978, - "step": 20363 - }, - { - "epoch": 10.620078226857888, - "grad_norm": 1.5986939668655396, - "learning_rate": 8.004221105527638e-05, - "loss": 5.3689, - "step": 20364 - }, - { - "epoch": 10.620599739243808, - "grad_norm": 1.3462872505187988, - "learning_rate": 8.004120603015076e-05, - "loss": 5.5303, - "step": 20365 - }, - { - "epoch": 10.621121251629726, - "grad_norm": 1.6108083724975586, - "learning_rate": 8.004020100502512e-05, - "loss": 5.0074, - "step": 20366 - }, - { - "epoch": 10.621642764015645, - "grad_norm": 1.5181044340133667, - "learning_rate": 8.00391959798995e-05, - "loss": 5.3537, - "step": 20367 - }, - { - "epoch": 10.622164276401564, - "grad_norm": 1.4695930480957031, - "learning_rate": 8.003819095477388e-05, - "loss": 5.2942, - "step": 20368 - }, - { - "epoch": 10.622685788787484, - "grad_norm": 1.4911702871322632, - "learning_rate": 8.003718592964824e-05, - "loss": 4.6752, - "step": 20369 - }, - { - "epoch": 10.623207301173403, - "grad_norm": 1.3689013719558716, - "learning_rate": 8.003618090452262e-05, - "loss": 5.7836, - "step": 20370 - }, - { - "epoch": 10.623728813559323, - "grad_norm": 1.504179835319519, - "learning_rate": 8.003517587939699e-05, - "loss": 5.5564, - "step": 20371 - }, - { - "epoch": 10.62425032594524, - "grad_norm": 1.4184364080429077, - "learning_rate": 8.003417085427136e-05, - "loss": 5.202, - "step": 20372 - }, - { - "epoch": 10.62477183833116, - "grad_norm": 1.446561336517334, - "learning_rate": 8.003316582914573e-05, - "loss": 5.2597, - "step": 20373 - }, - { - "epoch": 10.62529335071708, - "grad_norm": 1.3740795850753784, - "learning_rate": 8.00321608040201e-05, - "loss": 5.6263, - "step": 20374 - }, - { - "epoch": 10.625814863102999, - "grad_norm": 1.290969967842102, - "learning_rate": 8.003115577889447e-05, - "loss": 5.7536, - "step": 20375 - }, - { - "epoch": 10.626336375488918, - "grad_norm": 1.483827829360962, - "learning_rate": 8.003015075376885e-05, - "loss": 5.464, - "step": 20376 - }, - { - "epoch": 10.626857887874838, - "grad_norm": 1.381015658378601, - "learning_rate": 8.002914572864321e-05, - "loss": 5.4788, - "step": 20377 - }, - { - "epoch": 10.627379400260756, - "grad_norm": 1.4579358100891113, - "learning_rate": 8.002814070351759e-05, - "loss": 5.3345, - "step": 20378 - }, - { - "epoch": 10.627900912646675, - "grad_norm": 1.4005515575408936, - "learning_rate": 8.002713567839197e-05, - "loss": 5.6301, - "step": 20379 - }, - { - "epoch": 10.628422425032594, - "grad_norm": 1.446740984916687, - "learning_rate": 8.002613065326635e-05, - "loss": 5.5977, - "step": 20380 - }, - { - "epoch": 10.628943937418514, - "grad_norm": 1.4647579193115234, - "learning_rate": 8.002512562814071e-05, - "loss": 4.9954, - "step": 20381 - }, - { - "epoch": 10.629465449804433, - "grad_norm": 1.5701813697814941, - "learning_rate": 8.002412060301509e-05, - "loss": 5.4452, - "step": 20382 - }, - { - "epoch": 10.629986962190351, - "grad_norm": 1.4504609107971191, - "learning_rate": 8.002311557788945e-05, - "loss": 5.7179, - "step": 20383 - }, - { - "epoch": 10.63050847457627, - "grad_norm": 1.4959039688110352, - "learning_rate": 8.002211055276382e-05, - "loss": 5.6526, - "step": 20384 - }, - { - "epoch": 10.63102998696219, - "grad_norm": 1.4168250560760498, - "learning_rate": 8.00211055276382e-05, - "loss": 5.249, - "step": 20385 - }, - { - "epoch": 10.63155149934811, - "grad_norm": 1.5743677616119385, - "learning_rate": 8.002010050251256e-05, - "loss": 5.1759, - "step": 20386 - }, - { - "epoch": 10.632073011734029, - "grad_norm": 1.5216364860534668, - "learning_rate": 8.001909547738694e-05, - "loss": 5.5421, - "step": 20387 - }, - { - "epoch": 10.632594524119948, - "grad_norm": 1.4515125751495361, - "learning_rate": 8.001809045226131e-05, - "loss": 5.226, - "step": 20388 - }, - { - "epoch": 10.633116036505868, - "grad_norm": 1.5928610563278198, - "learning_rate": 8.001708542713569e-05, - "loss": 5.4009, - "step": 20389 - }, - { - "epoch": 10.633637548891786, - "grad_norm": 1.4723674058914185, - "learning_rate": 8.001608040201006e-05, - "loss": 5.5598, - "step": 20390 - }, - { - "epoch": 10.634159061277705, - "grad_norm": 1.547851800918579, - "learning_rate": 8.001507537688443e-05, - "loss": 4.8628, - "step": 20391 - }, - { - "epoch": 10.634680573663625, - "grad_norm": 1.625963807106018, - "learning_rate": 8.00140703517588e-05, - "loss": 5.4782, - "step": 20392 - }, - { - "epoch": 10.635202086049544, - "grad_norm": 1.6613879203796387, - "learning_rate": 8.001306532663318e-05, - "loss": 4.6291, - "step": 20393 - }, - { - "epoch": 10.635723598435463, - "grad_norm": 1.5430057048797607, - "learning_rate": 8.001206030150754e-05, - "loss": 5.7443, - "step": 20394 - }, - { - "epoch": 10.636245110821381, - "grad_norm": 1.5002100467681885, - "learning_rate": 8.001105527638192e-05, - "loss": 5.4551, - "step": 20395 - }, - { - "epoch": 10.6367666232073, - "grad_norm": 1.382204294204712, - "learning_rate": 8.001005025125628e-05, - "loss": 5.7206, - "step": 20396 - }, - { - "epoch": 10.63728813559322, - "grad_norm": 1.436213731765747, - "learning_rate": 8.000904522613065e-05, - "loss": 5.5281, - "step": 20397 - }, - { - "epoch": 10.63780964797914, - "grad_norm": 1.3904532194137573, - "learning_rate": 8.000804020100502e-05, - "loss": 5.7955, - "step": 20398 - }, - { - "epoch": 10.638331160365059, - "grad_norm": 1.4920960664749146, - "learning_rate": 8.00070351758794e-05, - "loss": 5.3288, - "step": 20399 - }, - { - "epoch": 10.638852672750978, - "grad_norm": 1.4184107780456543, - "learning_rate": 8.000603015075378e-05, - "loss": 5.0959, - "step": 20400 - }, - { - "epoch": 10.639374185136898, - "grad_norm": 1.3702064752578735, - "learning_rate": 8.000502512562814e-05, - "loss": 5.4189, - "step": 20401 - }, - { - "epoch": 10.639895697522816, - "grad_norm": 1.3656468391418457, - "learning_rate": 8.000402010050252e-05, - "loss": 5.8103, - "step": 20402 - }, - { - "epoch": 10.640417209908735, - "grad_norm": 1.3742328882217407, - "learning_rate": 8.000301507537689e-05, - "loss": 5.5756, - "step": 20403 - }, - { - "epoch": 10.640938722294655, - "grad_norm": 1.394443154335022, - "learning_rate": 8.000201005025126e-05, - "loss": 5.6556, - "step": 20404 - }, - { - "epoch": 10.641460234680574, - "grad_norm": 1.369411826133728, - "learning_rate": 8.000100502512563e-05, - "loss": 5.3979, - "step": 20405 - }, - { - "epoch": 10.641981747066493, - "grad_norm": 1.434195876121521, - "learning_rate": 8e-05, - "loss": 5.5978, - "step": 20406 - }, - { - "epoch": 10.642503259452411, - "grad_norm": 1.2996437549591064, - "learning_rate": 7.999899497487437e-05, - "loss": 5.6086, - "step": 20407 - }, - { - "epoch": 10.64302477183833, - "grad_norm": 1.6014324426651, - "learning_rate": 7.999798994974875e-05, - "loss": 4.9028, - "step": 20408 - }, - { - "epoch": 10.64354628422425, - "grad_norm": 1.6334145069122314, - "learning_rate": 7.999698492462311e-05, - "loss": 5.1804, - "step": 20409 - }, - { - "epoch": 10.64406779661017, - "grad_norm": 1.5978317260742188, - "learning_rate": 7.999597989949749e-05, - "loss": 5.5301, - "step": 20410 - }, - { - "epoch": 10.644589308996089, - "grad_norm": 1.4751754999160767, - "learning_rate": 7.999497487437187e-05, - "loss": 5.3707, - "step": 20411 - }, - { - "epoch": 10.645110821382008, - "grad_norm": 1.4821021556854248, - "learning_rate": 7.999396984924623e-05, - "loss": 5.1339, - "step": 20412 - }, - { - "epoch": 10.645632333767926, - "grad_norm": 1.3982552289962769, - "learning_rate": 7.999296482412061e-05, - "loss": 5.4253, - "step": 20413 - }, - { - "epoch": 10.646153846153846, - "grad_norm": 1.4856362342834473, - "learning_rate": 7.999195979899497e-05, - "loss": 5.1998, - "step": 20414 - }, - { - "epoch": 10.646675358539765, - "grad_norm": 1.4273440837860107, - "learning_rate": 7.999095477386935e-05, - "loss": 5.3857, - "step": 20415 - }, - { - "epoch": 10.647196870925685, - "grad_norm": 1.4154889583587646, - "learning_rate": 7.998994974874372e-05, - "loss": 5.5954, - "step": 20416 - }, - { - "epoch": 10.647718383311604, - "grad_norm": 1.5018383264541626, - "learning_rate": 7.99889447236181e-05, - "loss": 5.383, - "step": 20417 - }, - { - "epoch": 10.648239895697523, - "grad_norm": 1.3882123231887817, - "learning_rate": 7.998793969849246e-05, - "loss": 5.8197, - "step": 20418 - }, - { - "epoch": 10.648761408083441, - "grad_norm": 1.5093927383422852, - "learning_rate": 7.998693467336684e-05, - "loss": 5.3145, - "step": 20419 - }, - { - "epoch": 10.64928292046936, - "grad_norm": 1.5344239473342896, - "learning_rate": 7.998592964824121e-05, - "loss": 5.7671, - "step": 20420 - }, - { - "epoch": 10.64980443285528, - "grad_norm": 1.427409052848816, - "learning_rate": 7.998492462311559e-05, - "loss": 5.3355, - "step": 20421 - }, - { - "epoch": 10.6503259452412, - "grad_norm": 1.4106247425079346, - "learning_rate": 7.998391959798996e-05, - "loss": 5.0892, - "step": 20422 - }, - { - "epoch": 10.650847457627119, - "grad_norm": 1.4413973093032837, - "learning_rate": 7.998291457286432e-05, - "loss": 5.7152, - "step": 20423 - }, - { - "epoch": 10.651368970013039, - "grad_norm": 1.3835229873657227, - "learning_rate": 7.99819095477387e-05, - "loss": 5.616, - "step": 20424 - }, - { - "epoch": 10.651890482398956, - "grad_norm": 1.3275269269943237, - "learning_rate": 7.998090452261306e-05, - "loss": 5.3928, - "step": 20425 - }, - { - "epoch": 10.652411994784876, - "grad_norm": 1.3810938596725464, - "learning_rate": 7.997989949748744e-05, - "loss": 5.4861, - "step": 20426 - }, - { - "epoch": 10.652933507170795, - "grad_norm": 1.594072937965393, - "learning_rate": 7.99788944723618e-05, - "loss": 5.1142, - "step": 20427 - }, - { - "epoch": 10.653455019556715, - "grad_norm": 1.4089927673339844, - "learning_rate": 7.997788944723618e-05, - "loss": 5.2154, - "step": 20428 - }, - { - "epoch": 10.653976531942634, - "grad_norm": 1.4329657554626465, - "learning_rate": 7.997688442211055e-05, - "loss": 5.8838, - "step": 20429 - }, - { - "epoch": 10.654498044328554, - "grad_norm": 1.4430019855499268, - "learning_rate": 7.997587939698493e-05, - "loss": 5.6904, - "step": 20430 - }, - { - "epoch": 10.655019556714471, - "grad_norm": 1.3546912670135498, - "learning_rate": 7.99748743718593e-05, - "loss": 5.6376, - "step": 20431 - }, - { - "epoch": 10.65554106910039, - "grad_norm": 1.4717532396316528, - "learning_rate": 7.997386934673368e-05, - "loss": 4.9669, - "step": 20432 - }, - { - "epoch": 10.65606258148631, - "grad_norm": 1.44791579246521, - "learning_rate": 7.997286432160805e-05, - "loss": 5.7617, - "step": 20433 - }, - { - "epoch": 10.65658409387223, - "grad_norm": 1.539704442024231, - "learning_rate": 7.997185929648242e-05, - "loss": 5.4685, - "step": 20434 - }, - { - "epoch": 10.657105606258149, - "grad_norm": 1.4861712455749512, - "learning_rate": 7.997085427135679e-05, - "loss": 5.6139, - "step": 20435 - }, - { - "epoch": 10.657627118644069, - "grad_norm": 1.431490421295166, - "learning_rate": 7.996984924623115e-05, - "loss": 5.5992, - "step": 20436 - }, - { - "epoch": 10.658148631029986, - "grad_norm": 1.5139541625976562, - "learning_rate": 7.996884422110553e-05, - "loss": 5.1198, - "step": 20437 - }, - { - "epoch": 10.658670143415906, - "grad_norm": 1.5528419017791748, - "learning_rate": 7.99678391959799e-05, - "loss": 5.2089, - "step": 20438 - }, - { - "epoch": 10.659191655801825, - "grad_norm": 1.4837076663970947, - "learning_rate": 7.996683417085427e-05, - "loss": 5.4449, - "step": 20439 - }, - { - "epoch": 10.659713168187745, - "grad_norm": 1.456648588180542, - "learning_rate": 7.996582914572865e-05, - "loss": 5.4871, - "step": 20440 - }, - { - "epoch": 10.660234680573664, - "grad_norm": 1.4770307540893555, - "learning_rate": 7.996482412060303e-05, - "loss": 4.8401, - "step": 20441 - }, - { - "epoch": 10.660756192959584, - "grad_norm": 1.6018253564834595, - "learning_rate": 7.996381909547739e-05, - "loss": 5.8436, - "step": 20442 - }, - { - "epoch": 10.661277705345501, - "grad_norm": 1.4170273542404175, - "learning_rate": 7.996281407035177e-05, - "loss": 5.5579, - "step": 20443 - }, - { - "epoch": 10.66179921773142, - "grad_norm": 1.4857269525527954, - "learning_rate": 7.996180904522613e-05, - "loss": 5.0157, - "step": 20444 - }, - { - "epoch": 10.66232073011734, - "grad_norm": 1.3211464881896973, - "learning_rate": 7.996080402010051e-05, - "loss": 5.8236, - "step": 20445 - }, - { - "epoch": 10.66284224250326, - "grad_norm": 1.3719292879104614, - "learning_rate": 7.995979899497488e-05, - "loss": 5.5774, - "step": 20446 - }, - { - "epoch": 10.663363754889179, - "grad_norm": 1.319704532623291, - "learning_rate": 7.995879396984925e-05, - "loss": 5.6609, - "step": 20447 - }, - { - "epoch": 10.663885267275099, - "grad_norm": 1.4762024879455566, - "learning_rate": 7.995778894472362e-05, - "loss": 5.5468, - "step": 20448 - }, - { - "epoch": 10.664406779661016, - "grad_norm": 1.414969801902771, - "learning_rate": 7.995678391959798e-05, - "loss": 4.8806, - "step": 20449 - }, - { - "epoch": 10.664928292046936, - "grad_norm": 1.430688738822937, - "learning_rate": 7.995577889447236e-05, - "loss": 5.6722, - "step": 20450 - }, - { - "epoch": 10.665449804432855, - "grad_norm": 1.4975852966308594, - "learning_rate": 7.995477386934674e-05, - "loss": 5.1847, - "step": 20451 - }, - { - "epoch": 10.665971316818775, - "grad_norm": 1.4785892963409424, - "learning_rate": 7.995376884422112e-05, - "loss": 5.4119, - "step": 20452 - }, - { - "epoch": 10.666492829204694, - "grad_norm": 1.5793944597244263, - "learning_rate": 7.995276381909548e-05, - "loss": 5.3995, - "step": 20453 - }, - { - "epoch": 10.667014341590614, - "grad_norm": 1.783992886543274, - "learning_rate": 7.995175879396986e-05, - "loss": 4.6559, - "step": 20454 - }, - { - "epoch": 10.667535853976531, - "grad_norm": 1.4308851957321167, - "learning_rate": 7.995075376884422e-05, - "loss": 5.8921, - "step": 20455 - }, - { - "epoch": 10.66805736636245, - "grad_norm": 1.4882891178131104, - "learning_rate": 7.99497487437186e-05, - "loss": 4.7408, - "step": 20456 - }, - { - "epoch": 10.66857887874837, - "grad_norm": 1.6278200149536133, - "learning_rate": 7.994874371859296e-05, - "loss": 5.2355, - "step": 20457 - }, - { - "epoch": 10.66910039113429, - "grad_norm": 1.4345632791519165, - "learning_rate": 7.994773869346734e-05, - "loss": 4.9048, - "step": 20458 - }, - { - "epoch": 10.66962190352021, - "grad_norm": 1.4995640516281128, - "learning_rate": 7.99467336683417e-05, - "loss": 5.453, - "step": 20459 - }, - { - "epoch": 10.670143415906129, - "grad_norm": 1.3975576162338257, - "learning_rate": 7.994572864321608e-05, - "loss": 5.6245, - "step": 20460 - }, - { - "epoch": 10.670664928292046, - "grad_norm": 1.36367928981781, - "learning_rate": 7.994472361809046e-05, - "loss": 5.4647, - "step": 20461 - }, - { - "epoch": 10.671186440677966, - "grad_norm": 1.3793821334838867, - "learning_rate": 7.994371859296483e-05, - "loss": 5.9298, - "step": 20462 - }, - { - "epoch": 10.671707953063885, - "grad_norm": 1.3245853185653687, - "learning_rate": 7.99427135678392e-05, - "loss": 5.6329, - "step": 20463 - }, - { - "epoch": 10.672229465449805, - "grad_norm": 1.4752954244613647, - "learning_rate": 7.994170854271357e-05, - "loss": 5.422, - "step": 20464 - }, - { - "epoch": 10.672750977835724, - "grad_norm": 1.2769120931625366, - "learning_rate": 7.994070351758795e-05, - "loss": 5.3849, - "step": 20465 - }, - { - "epoch": 10.673272490221644, - "grad_norm": 1.4323991537094116, - "learning_rate": 7.993969849246231e-05, - "loss": 5.4029, - "step": 20466 - }, - { - "epoch": 10.673794002607561, - "grad_norm": 1.462304949760437, - "learning_rate": 7.993869346733669e-05, - "loss": 5.1681, - "step": 20467 - }, - { - "epoch": 10.67431551499348, - "grad_norm": 1.4048998355865479, - "learning_rate": 7.993768844221105e-05, - "loss": 5.5977, - "step": 20468 - }, - { - "epoch": 10.6748370273794, - "grad_norm": 1.4725191593170166, - "learning_rate": 7.993668341708543e-05, - "loss": 5.4992, - "step": 20469 - }, - { - "epoch": 10.67535853976532, - "grad_norm": 1.4131604433059692, - "learning_rate": 7.99356783919598e-05, - "loss": 4.9901, - "step": 20470 - }, - { - "epoch": 10.67588005215124, - "grad_norm": 1.3895456790924072, - "learning_rate": 7.993467336683417e-05, - "loss": 5.1311, - "step": 20471 - }, - { - "epoch": 10.676401564537159, - "grad_norm": 1.4617865085601807, - "learning_rate": 7.993366834170855e-05, - "loss": 5.5898, - "step": 20472 - }, - { - "epoch": 10.676923076923076, - "grad_norm": 1.4720485210418701, - "learning_rate": 7.993266331658293e-05, - "loss": 5.0233, - "step": 20473 - }, - { - "epoch": 10.677444589308996, - "grad_norm": 1.3894908428192139, - "learning_rate": 7.993165829145729e-05, - "loss": 5.8299, - "step": 20474 - }, - { - "epoch": 10.677966101694915, - "grad_norm": 1.42914879322052, - "learning_rate": 7.993065326633167e-05, - "loss": 5.3665, - "step": 20475 - }, - { - "epoch": 10.678487614080835, - "grad_norm": 1.4619572162628174, - "learning_rate": 7.992964824120603e-05, - "loss": 5.764, - "step": 20476 - }, - { - "epoch": 10.679009126466754, - "grad_norm": 1.4516571760177612, - "learning_rate": 7.99286432160804e-05, - "loss": 5.1505, - "step": 20477 - }, - { - "epoch": 10.679530638852672, - "grad_norm": 1.561389684677124, - "learning_rate": 7.992763819095478e-05, - "loss": 5.2136, - "step": 20478 - }, - { - "epoch": 10.680052151238591, - "grad_norm": 1.3896256685256958, - "learning_rate": 7.992663316582914e-05, - "loss": 5.3542, - "step": 20479 - }, - { - "epoch": 10.68057366362451, - "grad_norm": 1.445923924446106, - "learning_rate": 7.992562814070352e-05, - "loss": 5.2904, - "step": 20480 - }, - { - "epoch": 10.68109517601043, - "grad_norm": 1.473493218421936, - "learning_rate": 7.99246231155779e-05, - "loss": 5.6773, - "step": 20481 - }, - { - "epoch": 10.68161668839635, - "grad_norm": 1.4576836824417114, - "learning_rate": 7.992361809045227e-05, - "loss": 5.597, - "step": 20482 - }, - { - "epoch": 10.68213820078227, - "grad_norm": 1.3500571250915527, - "learning_rate": 7.992261306532664e-05, - "loss": 5.8563, - "step": 20483 - }, - { - "epoch": 10.682659713168189, - "grad_norm": 1.4064286947250366, - "learning_rate": 7.992160804020102e-05, - "loss": 5.6306, - "step": 20484 - }, - { - "epoch": 10.683181225554106, - "grad_norm": 1.4754688739776611, - "learning_rate": 7.992060301507538e-05, - "loss": 5.6553, - "step": 20485 - }, - { - "epoch": 10.683702737940026, - "grad_norm": 1.4821614027023315, - "learning_rate": 7.991959798994976e-05, - "loss": 4.665, - "step": 20486 - }, - { - "epoch": 10.684224250325945, - "grad_norm": 1.4698485136032104, - "learning_rate": 7.991859296482412e-05, - "loss": 5.148, - "step": 20487 - }, - { - "epoch": 10.684745762711865, - "grad_norm": 1.473509669303894, - "learning_rate": 7.99175879396985e-05, - "loss": 5.7363, - "step": 20488 - }, - { - "epoch": 10.685267275097784, - "grad_norm": 1.3971935510635376, - "learning_rate": 7.991658291457286e-05, - "loss": 5.5441, - "step": 20489 - }, - { - "epoch": 10.685788787483702, - "grad_norm": 1.3447508811950684, - "learning_rate": 7.991557788944723e-05, - "loss": 5.4796, - "step": 20490 - }, - { - "epoch": 10.686310299869621, - "grad_norm": 1.4347686767578125, - "learning_rate": 7.991457286432161e-05, - "loss": 5.2088, - "step": 20491 - }, - { - "epoch": 10.68683181225554, - "grad_norm": 1.4749305248260498, - "learning_rate": 7.991356783919598e-05, - "loss": 5.5624, - "step": 20492 - }, - { - "epoch": 10.68735332464146, - "grad_norm": 1.520880103111267, - "learning_rate": 7.991256281407036e-05, - "loss": 5.2881, - "step": 20493 - }, - { - "epoch": 10.68787483702738, - "grad_norm": 1.3556042909622192, - "learning_rate": 7.991155778894473e-05, - "loss": 5.4571, - "step": 20494 - }, - { - "epoch": 10.6883963494133, - "grad_norm": 1.3905621767044067, - "learning_rate": 7.99105527638191e-05, - "loss": 5.4627, - "step": 20495 - }, - { - "epoch": 10.688917861799219, - "grad_norm": 1.4204280376434326, - "learning_rate": 7.990954773869347e-05, - "loss": 5.1988, - "step": 20496 - }, - { - "epoch": 10.689439374185136, - "grad_norm": 1.3841419219970703, - "learning_rate": 7.990854271356785e-05, - "loss": 5.7257, - "step": 20497 - }, - { - "epoch": 10.689960886571056, - "grad_norm": 1.5838942527770996, - "learning_rate": 7.990753768844221e-05, - "loss": 4.9205, - "step": 20498 - }, - { - "epoch": 10.690482398956975, - "grad_norm": 1.4827369451522827, - "learning_rate": 7.990653266331659e-05, - "loss": 5.4745, - "step": 20499 - }, - { - "epoch": 10.691003911342895, - "grad_norm": 1.6517480611801147, - "learning_rate": 7.990552763819095e-05, - "loss": 4.6473, - "step": 20500 - }, - { - "epoch": 10.691525423728814, - "grad_norm": 1.4296152591705322, - "learning_rate": 7.990452261306533e-05, - "loss": 5.5354, - "step": 20501 - }, - { - "epoch": 10.692046936114732, - "grad_norm": 1.4654333591461182, - "learning_rate": 7.990351758793971e-05, - "loss": 5.4749, - "step": 20502 - }, - { - "epoch": 10.692568448500651, - "grad_norm": 1.370771884918213, - "learning_rate": 7.990251256281407e-05, - "loss": 5.3795, - "step": 20503 - }, - { - "epoch": 10.69308996088657, - "grad_norm": 1.4126360416412354, - "learning_rate": 7.990150753768845e-05, - "loss": 5.5419, - "step": 20504 - }, - { - "epoch": 10.69361147327249, - "grad_norm": 1.541509747505188, - "learning_rate": 7.990050251256282e-05, - "loss": 4.792, - "step": 20505 - }, - { - "epoch": 10.69413298565841, - "grad_norm": 1.4434982538223267, - "learning_rate": 7.989949748743719e-05, - "loss": 5.5915, - "step": 20506 - }, - { - "epoch": 10.69465449804433, - "grad_norm": 1.429824709892273, - "learning_rate": 7.989849246231156e-05, - "loss": 5.1691, - "step": 20507 - }, - { - "epoch": 10.695176010430247, - "grad_norm": 1.3567109107971191, - "learning_rate": 7.989748743718594e-05, - "loss": 5.686, - "step": 20508 - }, - { - "epoch": 10.695697522816166, - "grad_norm": 1.2894232273101807, - "learning_rate": 7.98964824120603e-05, - "loss": 5.9141, - "step": 20509 - }, - { - "epoch": 10.696219035202086, - "grad_norm": 1.4606778621673584, - "learning_rate": 7.989547738693468e-05, - "loss": 5.4178, - "step": 20510 - }, - { - "epoch": 10.696740547588005, - "grad_norm": 1.345837950706482, - "learning_rate": 7.989447236180904e-05, - "loss": 5.3253, - "step": 20511 - }, - { - "epoch": 10.697262059973925, - "grad_norm": 1.3714817762374878, - "learning_rate": 7.989346733668342e-05, - "loss": 5.3431, - "step": 20512 - }, - { - "epoch": 10.697783572359844, - "grad_norm": 1.4228413105010986, - "learning_rate": 7.98924623115578e-05, - "loss": 5.1586, - "step": 20513 - }, - { - "epoch": 10.698305084745762, - "grad_norm": 1.387399435043335, - "learning_rate": 7.989145728643217e-05, - "loss": 5.5915, - "step": 20514 - }, - { - "epoch": 10.698826597131681, - "grad_norm": 1.4857217073440552, - "learning_rate": 7.989045226130654e-05, - "loss": 5.0827, - "step": 20515 - }, - { - "epoch": 10.6993481095176, - "grad_norm": 1.3520209789276123, - "learning_rate": 7.98894472361809e-05, - "loss": 5.4773, - "step": 20516 - }, - { - "epoch": 10.69986962190352, - "grad_norm": 1.5135937929153442, - "learning_rate": 7.988844221105528e-05, - "loss": 5.2222, - "step": 20517 - }, - { - "epoch": 10.70039113428944, - "grad_norm": 1.541866660118103, - "learning_rate": 7.988743718592965e-05, - "loss": 5.5711, - "step": 20518 - }, - { - "epoch": 10.70091264667536, - "grad_norm": 1.5338534116744995, - "learning_rate": 7.988643216080402e-05, - "loss": 5.0605, - "step": 20519 - }, - { - "epoch": 10.701434159061277, - "grad_norm": 1.3591800928115845, - "learning_rate": 7.988542713567839e-05, - "loss": 5.537, - "step": 20520 - }, - { - "epoch": 10.701955671447196, - "grad_norm": 1.4054569005966187, - "learning_rate": 7.988442211055277e-05, - "loss": 5.7144, - "step": 20521 - }, - { - "epoch": 10.702477183833116, - "grad_norm": 1.3315844535827637, - "learning_rate": 7.988341708542714e-05, - "loss": 5.5304, - "step": 20522 - }, - { - "epoch": 10.702998696219035, - "grad_norm": 1.3895169496536255, - "learning_rate": 7.988241206030152e-05, - "loss": 5.6002, - "step": 20523 - }, - { - "epoch": 10.703520208604955, - "grad_norm": 1.370125412940979, - "learning_rate": 7.988140703517589e-05, - "loss": 5.6228, - "step": 20524 - }, - { - "epoch": 10.704041720990874, - "grad_norm": 1.4668270349502563, - "learning_rate": 7.988040201005026e-05, - "loss": 5.4316, - "step": 20525 - }, - { - "epoch": 10.704563233376792, - "grad_norm": 1.458225965499878, - "learning_rate": 7.987939698492463e-05, - "loss": 5.6003, - "step": 20526 - }, - { - "epoch": 10.705084745762711, - "grad_norm": 1.398246169090271, - "learning_rate": 7.9878391959799e-05, - "loss": 5.6214, - "step": 20527 - }, - { - "epoch": 10.70560625814863, - "grad_norm": 1.4032458066940308, - "learning_rate": 7.987738693467337e-05, - "loss": 5.3571, - "step": 20528 - }, - { - "epoch": 10.70612777053455, - "grad_norm": 1.4126532077789307, - "learning_rate": 7.987638190954773e-05, - "loss": 5.7295, - "step": 20529 - }, - { - "epoch": 10.70664928292047, - "grad_norm": 1.3040403127670288, - "learning_rate": 7.987537688442211e-05, - "loss": 5.6338, - "step": 20530 - }, - { - "epoch": 10.70717079530639, - "grad_norm": 1.4223980903625488, - "learning_rate": 7.987437185929648e-05, - "loss": 5.7874, - "step": 20531 - }, - { - "epoch": 10.707692307692307, - "grad_norm": 1.4649335145950317, - "learning_rate": 7.987336683417085e-05, - "loss": 5.3489, - "step": 20532 - }, - { - "epoch": 10.708213820078226, - "grad_norm": 1.415537714958191, - "learning_rate": 7.987236180904523e-05, - "loss": 5.1988, - "step": 20533 - }, - { - "epoch": 10.708735332464146, - "grad_norm": 1.4973971843719482, - "learning_rate": 7.987135678391961e-05, - "loss": 5.4386, - "step": 20534 - }, - { - "epoch": 10.709256844850065, - "grad_norm": 1.326097846031189, - "learning_rate": 7.987035175879397e-05, - "loss": 5.3591, - "step": 20535 - }, - { - "epoch": 10.709778357235985, - "grad_norm": 1.4599062204360962, - "learning_rate": 7.986934673366835e-05, - "loss": 5.3282, - "step": 20536 - }, - { - "epoch": 10.710299869621904, - "grad_norm": 1.5606449842453003, - "learning_rate": 7.986834170854272e-05, - "loss": 5.7351, - "step": 20537 - }, - { - "epoch": 10.710821382007822, - "grad_norm": 1.2671868801116943, - "learning_rate": 7.98673366834171e-05, - "loss": 4.8856, - "step": 20538 - }, - { - "epoch": 10.711342894393741, - "grad_norm": 1.4558982849121094, - "learning_rate": 7.986633165829146e-05, - "loss": 5.8186, - "step": 20539 - }, - { - "epoch": 10.711864406779661, - "grad_norm": 1.5179119110107422, - "learning_rate": 7.986532663316584e-05, - "loss": 5.5546, - "step": 20540 - }, - { - "epoch": 10.71238591916558, - "grad_norm": 1.5825707912445068, - "learning_rate": 7.98643216080402e-05, - "loss": 5.453, - "step": 20541 - }, - { - "epoch": 10.7129074315515, - "grad_norm": 1.4324754476547241, - "learning_rate": 7.986331658291458e-05, - "loss": 5.0837, - "step": 20542 - }, - { - "epoch": 10.71342894393742, - "grad_norm": 1.4329947233200073, - "learning_rate": 7.986231155778896e-05, - "loss": 5.5977, - "step": 20543 - }, - { - "epoch": 10.713950456323337, - "grad_norm": 1.4459701776504517, - "learning_rate": 7.986130653266332e-05, - "loss": 5.6189, - "step": 20544 - }, - { - "epoch": 10.714471968709256, - "grad_norm": 1.4875367879867554, - "learning_rate": 7.98603015075377e-05, - "loss": 5.2919, - "step": 20545 - }, - { - "epoch": 10.714993481095176, - "grad_norm": 1.6733160018920898, - "learning_rate": 7.985929648241206e-05, - "loss": 5.6982, - "step": 20546 - }, - { - "epoch": 10.715514993481095, - "grad_norm": 1.4029656648635864, - "learning_rate": 7.985829145728644e-05, - "loss": 5.5928, - "step": 20547 - }, - { - "epoch": 10.716036505867015, - "grad_norm": 1.440141201019287, - "learning_rate": 7.98572864321608e-05, - "loss": 5.7757, - "step": 20548 - }, - { - "epoch": 10.716558018252934, - "grad_norm": 1.5664933919906616, - "learning_rate": 7.985628140703518e-05, - "loss": 5.5303, - "step": 20549 - }, - { - "epoch": 10.717079530638852, - "grad_norm": 1.6042330265045166, - "learning_rate": 7.985527638190955e-05, - "loss": 4.6371, - "step": 20550 - }, - { - "epoch": 10.717601043024771, - "grad_norm": 1.6687901020050049, - "learning_rate": 7.985427135678392e-05, - "loss": 5.2901, - "step": 20551 - }, - { - "epoch": 10.718122555410691, - "grad_norm": 1.4783134460449219, - "learning_rate": 7.985326633165829e-05, - "loss": 5.3499, - "step": 20552 - }, - { - "epoch": 10.71864406779661, - "grad_norm": 1.5388374328613281, - "learning_rate": 7.985226130653267e-05, - "loss": 5.5597, - "step": 20553 - }, - { - "epoch": 10.71916558018253, - "grad_norm": 1.464840292930603, - "learning_rate": 7.985125628140704e-05, - "loss": 5.3158, - "step": 20554 - }, - { - "epoch": 10.71968709256845, - "grad_norm": 1.5695183277130127, - "learning_rate": 7.985025125628141e-05, - "loss": 5.1365, - "step": 20555 - }, - { - "epoch": 10.720208604954367, - "grad_norm": 1.4815515279769897, - "learning_rate": 7.984924623115579e-05, - "loss": 5.3227, - "step": 20556 - }, - { - "epoch": 10.720730117340286, - "grad_norm": 1.4057945013046265, - "learning_rate": 7.984824120603015e-05, - "loss": 5.7882, - "step": 20557 - }, - { - "epoch": 10.721251629726206, - "grad_norm": 1.5721955299377441, - "learning_rate": 7.984723618090453e-05, - "loss": 4.9397, - "step": 20558 - }, - { - "epoch": 10.721773142112125, - "grad_norm": 1.500988483428955, - "learning_rate": 7.984623115577889e-05, - "loss": 5.2082, - "step": 20559 - }, - { - "epoch": 10.722294654498045, - "grad_norm": 1.5223915576934814, - "learning_rate": 7.984522613065327e-05, - "loss": 5.356, - "step": 20560 - }, - { - "epoch": 10.722816166883963, - "grad_norm": 1.5964893102645874, - "learning_rate": 7.984422110552763e-05, - "loss": 5.2922, - "step": 20561 - }, - { - "epoch": 10.723337679269882, - "grad_norm": 1.3739266395568848, - "learning_rate": 7.984321608040201e-05, - "loss": 5.8048, - "step": 20562 - }, - { - "epoch": 10.723859191655801, - "grad_norm": 1.546673059463501, - "learning_rate": 7.984221105527638e-05, - "loss": 5.0196, - "step": 20563 - }, - { - "epoch": 10.724380704041721, - "grad_norm": 1.407586693763733, - "learning_rate": 7.984120603015075e-05, - "loss": 5.2983, - "step": 20564 - }, - { - "epoch": 10.72490221642764, - "grad_norm": 1.351186752319336, - "learning_rate": 7.984020100502513e-05, - "loss": 5.1317, - "step": 20565 - }, - { - "epoch": 10.72542372881356, - "grad_norm": 1.4932016134262085, - "learning_rate": 7.983919597989951e-05, - "loss": 5.7853, - "step": 20566 - }, - { - "epoch": 10.72594524119948, - "grad_norm": 1.4835835695266724, - "learning_rate": 7.983819095477387e-05, - "loss": 5.4415, - "step": 20567 - }, - { - "epoch": 10.726466753585397, - "grad_norm": 1.4884053468704224, - "learning_rate": 7.983718592964825e-05, - "loss": 5.5918, - "step": 20568 - }, - { - "epoch": 10.726988265971316, - "grad_norm": 1.3119715452194214, - "learning_rate": 7.983618090452262e-05, - "loss": 5.7149, - "step": 20569 - }, - { - "epoch": 10.727509778357236, - "grad_norm": 1.5007261037826538, - "learning_rate": 7.983517587939698e-05, - "loss": 5.1437, - "step": 20570 - }, - { - "epoch": 10.728031290743155, - "grad_norm": 1.5473265647888184, - "learning_rate": 7.983417085427136e-05, - "loss": 5.7658, - "step": 20571 - }, - { - "epoch": 10.728552803129075, - "grad_norm": 1.4065437316894531, - "learning_rate": 7.983316582914572e-05, - "loss": 5.7831, - "step": 20572 - }, - { - "epoch": 10.729074315514993, - "grad_norm": 1.370995283126831, - "learning_rate": 7.98321608040201e-05, - "loss": 5.4698, - "step": 20573 - }, - { - "epoch": 10.729595827900912, - "grad_norm": 1.4958773851394653, - "learning_rate": 7.983115577889448e-05, - "loss": 5.456, - "step": 20574 - }, - { - "epoch": 10.730117340286832, - "grad_norm": 1.531615972518921, - "learning_rate": 7.983015075376886e-05, - "loss": 5.1943, - "step": 20575 - }, - { - "epoch": 10.730638852672751, - "grad_norm": 1.5144647359848022, - "learning_rate": 7.982914572864322e-05, - "loss": 5.6949, - "step": 20576 - }, - { - "epoch": 10.73116036505867, - "grad_norm": 1.727154016494751, - "learning_rate": 7.98281407035176e-05, - "loss": 4.4463, - "step": 20577 - }, - { - "epoch": 10.73168187744459, - "grad_norm": 1.5031750202178955, - "learning_rate": 7.982713567839196e-05, - "loss": 4.8425, - "step": 20578 - }, - { - "epoch": 10.73220338983051, - "grad_norm": 1.4502664804458618, - "learning_rate": 7.982613065326634e-05, - "loss": 5.8831, - "step": 20579 - }, - { - "epoch": 10.732724902216427, - "grad_norm": 1.4416425228118896, - "learning_rate": 7.98251256281407e-05, - "loss": 5.2087, - "step": 20580 - }, - { - "epoch": 10.733246414602347, - "grad_norm": 1.4262460470199585, - "learning_rate": 7.982412060301508e-05, - "loss": 5.4214, - "step": 20581 - }, - { - "epoch": 10.733767926988266, - "grad_norm": 1.4188470840454102, - "learning_rate": 7.982311557788945e-05, - "loss": 5.267, - "step": 20582 - }, - { - "epoch": 10.734289439374185, - "grad_norm": 1.4282373189926147, - "learning_rate": 7.982211055276381e-05, - "loss": 5.2587, - "step": 20583 - }, - { - "epoch": 10.734810951760105, - "grad_norm": 1.474522352218628, - "learning_rate": 7.982110552763819e-05, - "loss": 5.5145, - "step": 20584 - }, - { - "epoch": 10.735332464146023, - "grad_norm": 1.9726178646087646, - "learning_rate": 7.982010050251257e-05, - "loss": 5.3637, - "step": 20585 - }, - { - "epoch": 10.735853976531942, - "grad_norm": 1.3897017240524292, - "learning_rate": 7.981909547738694e-05, - "loss": 5.5929, - "step": 20586 - }, - { - "epoch": 10.736375488917862, - "grad_norm": 1.440308690071106, - "learning_rate": 7.981809045226131e-05, - "loss": 5.7166, - "step": 20587 - }, - { - "epoch": 10.736897001303781, - "grad_norm": 1.404708743095398, - "learning_rate": 7.981708542713569e-05, - "loss": 5.6116, - "step": 20588 - }, - { - "epoch": 10.7374185136897, - "grad_norm": 1.6150121688842773, - "learning_rate": 7.981608040201005e-05, - "loss": 4.985, - "step": 20589 - }, - { - "epoch": 10.73794002607562, - "grad_norm": 1.4066436290740967, - "learning_rate": 7.981507537688443e-05, - "loss": 5.3529, - "step": 20590 - }, - { - "epoch": 10.73846153846154, - "grad_norm": 1.3918448686599731, - "learning_rate": 7.98140703517588e-05, - "loss": 5.6902, - "step": 20591 - }, - { - "epoch": 10.738983050847457, - "grad_norm": 1.4605953693389893, - "learning_rate": 7.981306532663317e-05, - "loss": 5.3604, - "step": 20592 - }, - { - "epoch": 10.739504563233377, - "grad_norm": 1.4968782663345337, - "learning_rate": 7.981206030150754e-05, - "loss": 5.4065, - "step": 20593 - }, - { - "epoch": 10.740026075619296, - "grad_norm": 1.468072772026062, - "learning_rate": 7.981105527638191e-05, - "loss": 5.3069, - "step": 20594 - }, - { - "epoch": 10.740547588005215, - "grad_norm": 1.4255447387695312, - "learning_rate": 7.981005025125629e-05, - "loss": 5.6418, - "step": 20595 - }, - { - "epoch": 10.741069100391135, - "grad_norm": 1.4637668132781982, - "learning_rate": 7.980904522613066e-05, - "loss": 5.3355, - "step": 20596 - }, - { - "epoch": 10.741590612777053, - "grad_norm": 1.397374153137207, - "learning_rate": 7.980804020100503e-05, - "loss": 5.5698, - "step": 20597 - }, - { - "epoch": 10.742112125162972, - "grad_norm": 1.549070119857788, - "learning_rate": 7.98070351758794e-05, - "loss": 5.0289, - "step": 20598 - }, - { - "epoch": 10.742633637548892, - "grad_norm": 1.3298895359039307, - "learning_rate": 7.980603015075378e-05, - "loss": 5.7493, - "step": 20599 - }, - { - "epoch": 10.743155149934811, - "grad_norm": 1.4399086236953735, - "learning_rate": 7.980502512562814e-05, - "loss": 5.7734, - "step": 20600 - }, - { - "epoch": 10.74367666232073, - "grad_norm": 1.46286141872406, - "learning_rate": 7.980402010050252e-05, - "loss": 5.1524, - "step": 20601 - }, - { - "epoch": 10.74419817470665, - "grad_norm": 1.3924506902694702, - "learning_rate": 7.980301507537688e-05, - "loss": 5.0101, - "step": 20602 - }, - { - "epoch": 10.744719687092568, - "grad_norm": 1.382190227508545, - "learning_rate": 7.980201005025126e-05, - "loss": 5.4776, - "step": 20603 - }, - { - "epoch": 10.745241199478487, - "grad_norm": 1.4183335304260254, - "learning_rate": 7.980100502512562e-05, - "loss": 5.5494, - "step": 20604 - }, - { - "epoch": 10.745762711864407, - "grad_norm": 1.3883990049362183, - "learning_rate": 7.98e-05, - "loss": 5.7059, - "step": 20605 - }, - { - "epoch": 10.746284224250326, - "grad_norm": 1.4127143621444702, - "learning_rate": 7.979899497487438e-05, - "loss": 5.5519, - "step": 20606 - }, - { - "epoch": 10.746805736636245, - "grad_norm": 1.3930257558822632, - "learning_rate": 7.979798994974876e-05, - "loss": 5.7016, - "step": 20607 - }, - { - "epoch": 10.747327249022165, - "grad_norm": 1.547387719154358, - "learning_rate": 7.979698492462312e-05, - "loss": 5.2773, - "step": 20608 - }, - { - "epoch": 10.747848761408083, - "grad_norm": 1.4675372838974, - "learning_rate": 7.979597989949749e-05, - "loss": 5.5175, - "step": 20609 - }, - { - "epoch": 10.748370273794002, - "grad_norm": 1.5473538637161255, - "learning_rate": 7.979497487437186e-05, - "loss": 5.4943, - "step": 20610 - }, - { - "epoch": 10.748891786179922, - "grad_norm": 1.530320644378662, - "learning_rate": 7.979396984924623e-05, - "loss": 5.6468, - "step": 20611 - }, - { - "epoch": 10.749413298565841, - "grad_norm": 1.407534122467041, - "learning_rate": 7.97929648241206e-05, - "loss": 5.4529, - "step": 20612 - }, - { - "epoch": 10.74993481095176, - "grad_norm": 1.476136326789856, - "learning_rate": 7.979195979899497e-05, - "loss": 5.7752, - "step": 20613 - }, - { - "epoch": 10.75045632333768, - "grad_norm": 1.4766126871109009, - "learning_rate": 7.979095477386935e-05, - "loss": 5.2872, - "step": 20614 - }, - { - "epoch": 10.750977835723598, - "grad_norm": 1.5243223905563354, - "learning_rate": 7.978994974874373e-05, - "loss": 5.7856, - "step": 20615 - }, - { - "epoch": 10.751499348109517, - "grad_norm": 1.3945000171661377, - "learning_rate": 7.97889447236181e-05, - "loss": 5.1904, - "step": 20616 - }, - { - "epoch": 10.752020860495437, - "grad_norm": 1.5416702032089233, - "learning_rate": 7.978793969849247e-05, - "loss": 4.7181, - "step": 20617 - }, - { - "epoch": 10.752542372881356, - "grad_norm": 1.6678142547607422, - "learning_rate": 7.978693467336685e-05, - "loss": 4.8833, - "step": 20618 - }, - { - "epoch": 10.753063885267276, - "grad_norm": 1.4212430715560913, - "learning_rate": 7.978592964824121e-05, - "loss": 5.7327, - "step": 20619 - }, - { - "epoch": 10.753585397653195, - "grad_norm": 1.544442057609558, - "learning_rate": 7.978492462311559e-05, - "loss": 5.0351, - "step": 20620 - }, - { - "epoch": 10.754106910039113, - "grad_norm": 1.4248130321502686, - "learning_rate": 7.978391959798995e-05, - "loss": 5.8795, - "step": 20621 - }, - { - "epoch": 10.754628422425032, - "grad_norm": 1.496909737586975, - "learning_rate": 7.978291457286432e-05, - "loss": 5.3171, - "step": 20622 - }, - { - "epoch": 10.755149934810952, - "grad_norm": 1.4453941583633423, - "learning_rate": 7.97819095477387e-05, - "loss": 5.1945, - "step": 20623 - }, - { - "epoch": 10.755671447196871, - "grad_norm": 1.563942313194275, - "learning_rate": 7.978090452261306e-05, - "loss": 5.6247, - "step": 20624 - }, - { - "epoch": 10.75619295958279, - "grad_norm": 1.424263596534729, - "learning_rate": 7.977989949748744e-05, - "loss": 5.4349, - "step": 20625 - }, - { - "epoch": 10.75671447196871, - "grad_norm": 1.327027678489685, - "learning_rate": 7.977889447236181e-05, - "loss": 5.8415, - "step": 20626 - }, - { - "epoch": 10.757235984354628, - "grad_norm": 1.448517084121704, - "learning_rate": 7.977788944723619e-05, - "loss": 5.0308, - "step": 20627 - }, - { - "epoch": 10.757757496740547, - "grad_norm": 1.4885210990905762, - "learning_rate": 7.977688442211056e-05, - "loss": 5.7001, - "step": 20628 - }, - { - "epoch": 10.758279009126467, - "grad_norm": 1.4293689727783203, - "learning_rate": 7.977587939698493e-05, - "loss": 5.3782, - "step": 20629 - }, - { - "epoch": 10.758800521512386, - "grad_norm": 1.4227476119995117, - "learning_rate": 7.97748743718593e-05, - "loss": 5.6843, - "step": 20630 - }, - { - "epoch": 10.759322033898306, - "grad_norm": 1.49143648147583, - "learning_rate": 7.977386934673368e-05, - "loss": 5.318, - "step": 20631 - }, - { - "epoch": 10.759843546284225, - "grad_norm": 1.470916986465454, - "learning_rate": 7.977286432160804e-05, - "loss": 5.8685, - "step": 20632 - }, - { - "epoch": 10.760365058670143, - "grad_norm": 1.5921236276626587, - "learning_rate": 7.977185929648242e-05, - "loss": 4.787, - "step": 20633 - }, - { - "epoch": 10.760886571056062, - "grad_norm": 1.4297915697097778, - "learning_rate": 7.977085427135678e-05, - "loss": 5.5969, - "step": 20634 - }, - { - "epoch": 10.761408083441982, - "grad_norm": 1.410170316696167, - "learning_rate": 7.976984924623116e-05, - "loss": 4.9429, - "step": 20635 - }, - { - "epoch": 10.761929595827901, - "grad_norm": 1.3154422044754028, - "learning_rate": 7.976884422110554e-05, - "loss": 5.8411, - "step": 20636 - }, - { - "epoch": 10.76245110821382, - "grad_norm": 1.5135153532028198, - "learning_rate": 7.97678391959799e-05, - "loss": 5.659, - "step": 20637 - }, - { - "epoch": 10.76297262059974, - "grad_norm": 1.5385178327560425, - "learning_rate": 7.976683417085428e-05, - "loss": 5.3286, - "step": 20638 - }, - { - "epoch": 10.763494132985658, - "grad_norm": 1.3901264667510986, - "learning_rate": 7.976582914572864e-05, - "loss": 5.6606, - "step": 20639 - }, - { - "epoch": 10.764015645371577, - "grad_norm": 1.3723020553588867, - "learning_rate": 7.976482412060302e-05, - "loss": 5.709, - "step": 20640 - }, - { - "epoch": 10.764537157757497, - "grad_norm": 1.375578761100769, - "learning_rate": 7.976381909547739e-05, - "loss": 5.7049, - "step": 20641 - }, - { - "epoch": 10.765058670143416, - "grad_norm": 1.4342303276062012, - "learning_rate": 7.976281407035176e-05, - "loss": 5.3004, - "step": 20642 - }, - { - "epoch": 10.765580182529336, - "grad_norm": 1.5188686847686768, - "learning_rate": 7.976180904522613e-05, - "loss": 5.4535, - "step": 20643 - }, - { - "epoch": 10.766101694915255, - "grad_norm": 1.5327662229537964, - "learning_rate": 7.97608040201005e-05, - "loss": 5.2864, - "step": 20644 - }, - { - "epoch": 10.766623207301173, - "grad_norm": 1.3032110929489136, - "learning_rate": 7.975979899497487e-05, - "loss": 5.5598, - "step": 20645 - }, - { - "epoch": 10.767144719687092, - "grad_norm": 1.5523598194122314, - "learning_rate": 7.975879396984925e-05, - "loss": 5.1685, - "step": 20646 - }, - { - "epoch": 10.767666232073012, - "grad_norm": 1.4198572635650635, - "learning_rate": 7.975778894472363e-05, - "loss": 5.61, - "step": 20647 - }, - { - "epoch": 10.768187744458931, - "grad_norm": 1.4331961870193481, - "learning_rate": 7.975678391959799e-05, - "loss": 5.4318, - "step": 20648 - }, - { - "epoch": 10.76870925684485, - "grad_norm": 1.4220598936080933, - "learning_rate": 7.975577889447237e-05, - "loss": 5.6553, - "step": 20649 - }, - { - "epoch": 10.76923076923077, - "grad_norm": 1.413525938987732, - "learning_rate": 7.975477386934673e-05, - "loss": 5.6348, - "step": 20650 - }, - { - "epoch": 10.769752281616688, - "grad_norm": 1.3968886137008667, - "learning_rate": 7.975376884422111e-05, - "loss": 5.5248, - "step": 20651 - }, - { - "epoch": 10.770273794002607, - "grad_norm": 1.478899359703064, - "learning_rate": 7.975276381909547e-05, - "loss": 5.501, - "step": 20652 - }, - { - "epoch": 10.770795306388527, - "grad_norm": 1.445420265197754, - "learning_rate": 7.975175879396985e-05, - "loss": 5.437, - "step": 20653 - }, - { - "epoch": 10.771316818774446, - "grad_norm": 1.3095381259918213, - "learning_rate": 7.975075376884422e-05, - "loss": 5.6446, - "step": 20654 - }, - { - "epoch": 10.771838331160366, - "grad_norm": 1.424641728401184, - "learning_rate": 7.97497487437186e-05, - "loss": 5.8524, - "step": 20655 - }, - { - "epoch": 10.772359843546283, - "grad_norm": 1.4018781185150146, - "learning_rate": 7.974874371859297e-05, - "loss": 5.8825, - "step": 20656 - }, - { - "epoch": 10.772881355932203, - "grad_norm": 1.3746228218078613, - "learning_rate": 7.974773869346735e-05, - "loss": 5.5888, - "step": 20657 - }, - { - "epoch": 10.773402868318122, - "grad_norm": 1.446773648262024, - "learning_rate": 7.974673366834171e-05, - "loss": 5.3737, - "step": 20658 - }, - { - "epoch": 10.773924380704042, - "grad_norm": 1.446924090385437, - "learning_rate": 7.974572864321609e-05, - "loss": 5.5067, - "step": 20659 - }, - { - "epoch": 10.774445893089961, - "grad_norm": 1.6077567338943481, - "learning_rate": 7.974472361809046e-05, - "loss": 5.5169, - "step": 20660 - }, - { - "epoch": 10.77496740547588, - "grad_norm": 1.3975844383239746, - "learning_rate": 7.974371859296483e-05, - "loss": 5.6503, - "step": 20661 - }, - { - "epoch": 10.7754889178618, - "grad_norm": 1.3694440126419067, - "learning_rate": 7.97427135678392e-05, - "loss": 5.7835, - "step": 20662 - }, - { - "epoch": 10.776010430247718, - "grad_norm": 1.4807727336883545, - "learning_rate": 7.974170854271356e-05, - "loss": 5.1575, - "step": 20663 - }, - { - "epoch": 10.776531942633637, - "grad_norm": 1.4640098810195923, - "learning_rate": 7.974070351758794e-05, - "loss": 5.5631, - "step": 20664 - }, - { - "epoch": 10.777053455019557, - "grad_norm": 1.4074827432632446, - "learning_rate": 7.97396984924623e-05, - "loss": 5.2005, - "step": 20665 - }, - { - "epoch": 10.777574967405476, - "grad_norm": 1.6540461778640747, - "learning_rate": 7.973869346733668e-05, - "loss": 5.5338, - "step": 20666 - }, - { - "epoch": 10.778096479791396, - "grad_norm": 1.5071027278900146, - "learning_rate": 7.973768844221106e-05, - "loss": 5.5616, - "step": 20667 - }, - { - "epoch": 10.778617992177313, - "grad_norm": 1.4506456851959229, - "learning_rate": 7.973668341708544e-05, - "loss": 5.393, - "step": 20668 - }, - { - "epoch": 10.779139504563233, - "grad_norm": 1.3780595064163208, - "learning_rate": 7.97356783919598e-05, - "loss": 5.8274, - "step": 20669 - }, - { - "epoch": 10.779661016949152, - "grad_norm": 1.5183708667755127, - "learning_rate": 7.973467336683418e-05, - "loss": 4.5431, - "step": 20670 - }, - { - "epoch": 10.780182529335072, - "grad_norm": 1.3814094066619873, - "learning_rate": 7.973366834170855e-05, - "loss": 5.7101, - "step": 20671 - }, - { - "epoch": 10.780704041720991, - "grad_norm": 1.4854841232299805, - "learning_rate": 7.973266331658292e-05, - "loss": 5.2639, - "step": 20672 - }, - { - "epoch": 10.78122555410691, - "grad_norm": 1.4210858345031738, - "learning_rate": 7.973165829145729e-05, - "loss": 5.532, - "step": 20673 - }, - { - "epoch": 10.78174706649283, - "grad_norm": 1.5298528671264648, - "learning_rate": 7.973065326633167e-05, - "loss": 5.3229, - "step": 20674 - }, - { - "epoch": 10.782268578878748, - "grad_norm": 1.4310117959976196, - "learning_rate": 7.972964824120603e-05, - "loss": 5.4179, - "step": 20675 - }, - { - "epoch": 10.782790091264667, - "grad_norm": 1.3451337814331055, - "learning_rate": 7.972864321608041e-05, - "loss": 6.0487, - "step": 20676 - }, - { - "epoch": 10.783311603650587, - "grad_norm": 1.5247447490692139, - "learning_rate": 7.972763819095479e-05, - "loss": 5.4684, - "step": 20677 - }, - { - "epoch": 10.783833116036506, - "grad_norm": 1.3892323970794678, - "learning_rate": 7.972663316582915e-05, - "loss": 5.6121, - "step": 20678 - }, - { - "epoch": 10.784354628422426, - "grad_norm": 1.4021673202514648, - "learning_rate": 7.972562814070353e-05, - "loss": 5.6363, - "step": 20679 - }, - { - "epoch": 10.784876140808343, - "grad_norm": 1.4437789916992188, - "learning_rate": 7.972462311557789e-05, - "loss": 5.5443, - "step": 20680 - }, - { - "epoch": 10.785397653194263, - "grad_norm": 1.4582810401916504, - "learning_rate": 7.972361809045227e-05, - "loss": 5.8015, - "step": 20681 - }, - { - "epoch": 10.785919165580182, - "grad_norm": 1.4905661344528198, - "learning_rate": 7.972261306532663e-05, - "loss": 5.371, - "step": 20682 - }, - { - "epoch": 10.786440677966102, - "grad_norm": 1.3369591236114502, - "learning_rate": 7.972160804020101e-05, - "loss": 5.9088, - "step": 20683 - }, - { - "epoch": 10.786962190352021, - "grad_norm": 1.423959732055664, - "learning_rate": 7.972060301507538e-05, - "loss": 4.9422, - "step": 20684 - }, - { - "epoch": 10.78748370273794, - "grad_norm": 1.4387743473052979, - "learning_rate": 7.971959798994975e-05, - "loss": 5.4876, - "step": 20685 - }, - { - "epoch": 10.78800521512386, - "grad_norm": 1.4731855392456055, - "learning_rate": 7.971859296482412e-05, - "loss": 5.5573, - "step": 20686 - }, - { - "epoch": 10.788526727509778, - "grad_norm": 1.4022163152694702, - "learning_rate": 7.97175879396985e-05, - "loss": 5.2713, - "step": 20687 - }, - { - "epoch": 10.789048239895697, - "grad_norm": 1.4151439666748047, - "learning_rate": 7.971658291457287e-05, - "loss": 4.9932, - "step": 20688 - }, - { - "epoch": 10.789569752281617, - "grad_norm": 1.446157455444336, - "learning_rate": 7.971557788944724e-05, - "loss": 5.4095, - "step": 20689 - }, - { - "epoch": 10.790091264667536, - "grad_norm": 1.43240487575531, - "learning_rate": 7.971457286432162e-05, - "loss": 5.2029, - "step": 20690 - }, - { - "epoch": 10.790612777053456, - "grad_norm": 1.520775318145752, - "learning_rate": 7.971356783919598e-05, - "loss": 4.9632, - "step": 20691 - }, - { - "epoch": 10.791134289439373, - "grad_norm": 1.4017713069915771, - "learning_rate": 7.971256281407036e-05, - "loss": 5.5734, - "step": 20692 - }, - { - "epoch": 10.791655801825293, - "grad_norm": 1.3306727409362793, - "learning_rate": 7.971155778894472e-05, - "loss": 6.0564, - "step": 20693 - }, - { - "epoch": 10.792177314211212, - "grad_norm": 1.4964497089385986, - "learning_rate": 7.97105527638191e-05, - "loss": 5.5376, - "step": 20694 - }, - { - "epoch": 10.792698826597132, - "grad_norm": 1.4500621557235718, - "learning_rate": 7.970954773869346e-05, - "loss": 5.8292, - "step": 20695 - }, - { - "epoch": 10.793220338983051, - "grad_norm": 1.338826060295105, - "learning_rate": 7.970854271356784e-05, - "loss": 4.9867, - "step": 20696 - }, - { - "epoch": 10.79374185136897, - "grad_norm": 1.378324270248413, - "learning_rate": 7.970753768844222e-05, - "loss": 5.2013, - "step": 20697 - }, - { - "epoch": 10.794263363754888, - "grad_norm": 1.4890415668487549, - "learning_rate": 7.97065326633166e-05, - "loss": 5.342, - "step": 20698 - }, - { - "epoch": 10.794784876140808, - "grad_norm": 1.5941314697265625, - "learning_rate": 7.970552763819096e-05, - "loss": 5.5614, - "step": 20699 - }, - { - "epoch": 10.795306388526727, - "grad_norm": 1.6168421506881714, - "learning_rate": 7.970452261306534e-05, - "loss": 5.3583, - "step": 20700 - }, - { - "epoch": 10.795827900912647, - "grad_norm": 1.4827972650527954, - "learning_rate": 7.97035175879397e-05, - "loss": 5.3185, - "step": 20701 - }, - { - "epoch": 10.796349413298566, - "grad_norm": 1.5068382024765015, - "learning_rate": 7.970251256281407e-05, - "loss": 5.4768, - "step": 20702 - }, - { - "epoch": 10.796870925684486, - "grad_norm": 1.9235774278640747, - "learning_rate": 7.970150753768845e-05, - "loss": 5.1556, - "step": 20703 - }, - { - "epoch": 10.797392438070403, - "grad_norm": 1.4397050142288208, - "learning_rate": 7.970050251256281e-05, - "loss": 5.4701, - "step": 20704 - }, - { - "epoch": 10.797913950456323, - "grad_norm": 1.4578948020935059, - "learning_rate": 7.969949748743719e-05, - "loss": 4.9752, - "step": 20705 - }, - { - "epoch": 10.798435462842242, - "grad_norm": 1.441693663597107, - "learning_rate": 7.969849246231155e-05, - "loss": 5.2692, - "step": 20706 - }, - { - "epoch": 10.798956975228162, - "grad_norm": 1.6203075647354126, - "learning_rate": 7.969748743718593e-05, - "loss": 5.2555, - "step": 20707 - }, - { - "epoch": 10.799478487614081, - "grad_norm": 1.597507119178772, - "learning_rate": 7.969648241206031e-05, - "loss": 5.3899, - "step": 20708 - }, - { - "epoch": 10.8, - "grad_norm": 1.623583197593689, - "learning_rate": 7.969547738693469e-05, - "loss": 5.0377, - "step": 20709 - }, - { - "epoch": 10.800521512385918, - "grad_norm": 1.3858660459518433, - "learning_rate": 7.969447236180905e-05, - "loss": 5.8976, - "step": 20710 - }, - { - "epoch": 10.801043024771838, - "grad_norm": 1.525005578994751, - "learning_rate": 7.969346733668343e-05, - "loss": 5.0873, - "step": 20711 - }, - { - "epoch": 10.801564537157757, - "grad_norm": 1.5472322702407837, - "learning_rate": 7.969246231155779e-05, - "loss": 5.5273, - "step": 20712 - }, - { - "epoch": 10.802086049543677, - "grad_norm": 1.3793222904205322, - "learning_rate": 7.969145728643217e-05, - "loss": 5.8114, - "step": 20713 - }, - { - "epoch": 10.802607561929596, - "grad_norm": 1.4530913829803467, - "learning_rate": 7.969045226130653e-05, - "loss": 5.3409, - "step": 20714 - }, - { - "epoch": 10.803129074315516, - "grad_norm": 1.414528489112854, - "learning_rate": 7.96894472361809e-05, - "loss": 5.6146, - "step": 20715 - }, - { - "epoch": 10.803650586701433, - "grad_norm": 1.4556586742401123, - "learning_rate": 7.968844221105528e-05, - "loss": 5.4034, - "step": 20716 - }, - { - "epoch": 10.804172099087353, - "grad_norm": 1.5128448009490967, - "learning_rate": 7.968743718592965e-05, - "loss": 5.2992, - "step": 20717 - }, - { - "epoch": 10.804693611473272, - "grad_norm": 1.4829140901565552, - "learning_rate": 7.968643216080403e-05, - "loss": 5.7485, - "step": 20718 - }, - { - "epoch": 10.805215123859192, - "grad_norm": 1.4861263036727905, - "learning_rate": 7.96854271356784e-05, - "loss": 5.6274, - "step": 20719 - }, - { - "epoch": 10.805736636245111, - "grad_norm": 1.4723312854766846, - "learning_rate": 7.968442211055277e-05, - "loss": 5.4937, - "step": 20720 - }, - { - "epoch": 10.80625814863103, - "grad_norm": 1.3949435949325562, - "learning_rate": 7.968341708542714e-05, - "loss": 5.9183, - "step": 20721 - }, - { - "epoch": 10.806779661016948, - "grad_norm": 1.441552996635437, - "learning_rate": 7.968241206030152e-05, - "loss": 5.6363, - "step": 20722 - }, - { - "epoch": 10.807301173402868, - "grad_norm": 1.3486603498458862, - "learning_rate": 7.968140703517588e-05, - "loss": 5.8713, - "step": 20723 - }, - { - "epoch": 10.807822685788787, - "grad_norm": 1.3587275743484497, - "learning_rate": 7.968040201005026e-05, - "loss": 5.4983, - "step": 20724 - }, - { - "epoch": 10.808344198174707, - "grad_norm": 1.4620894193649292, - "learning_rate": 7.967939698492462e-05, - "loss": 5.4494, - "step": 20725 - }, - { - "epoch": 10.808865710560626, - "grad_norm": 1.5368926525115967, - "learning_rate": 7.9678391959799e-05, - "loss": 4.9162, - "step": 20726 - }, - { - "epoch": 10.809387222946546, - "grad_norm": 1.5064212083816528, - "learning_rate": 7.967738693467336e-05, - "loss": 5.261, - "step": 20727 - }, - { - "epoch": 10.809908735332463, - "grad_norm": 1.4238957166671753, - "learning_rate": 7.967638190954774e-05, - "loss": 5.5199, - "step": 20728 - }, - { - "epoch": 10.810430247718383, - "grad_norm": 1.4107391834259033, - "learning_rate": 7.967537688442212e-05, - "loss": 5.738, - "step": 20729 - }, - { - "epoch": 10.810951760104302, - "grad_norm": 1.377893328666687, - "learning_rate": 7.967437185929648e-05, - "loss": 5.7457, - "step": 20730 - }, - { - "epoch": 10.811473272490222, - "grad_norm": 1.3903776407241821, - "learning_rate": 7.967336683417086e-05, - "loss": 5.0453, - "step": 20731 - }, - { - "epoch": 10.811994784876141, - "grad_norm": 1.5792397260665894, - "learning_rate": 7.967236180904523e-05, - "loss": 5.298, - "step": 20732 - }, - { - "epoch": 10.81251629726206, - "grad_norm": 1.4612237215042114, - "learning_rate": 7.96713567839196e-05, - "loss": 5.688, - "step": 20733 - }, - { - "epoch": 10.813037809647978, - "grad_norm": 1.410043478012085, - "learning_rate": 7.967035175879397e-05, - "loss": 5.1718, - "step": 20734 - }, - { - "epoch": 10.813559322033898, - "grad_norm": 1.4629569053649902, - "learning_rate": 7.966934673366835e-05, - "loss": 5.4274, - "step": 20735 - }, - { - "epoch": 10.814080834419817, - "grad_norm": 1.3004539012908936, - "learning_rate": 7.966834170854271e-05, - "loss": 5.0218, - "step": 20736 - }, - { - "epoch": 10.814602346805737, - "grad_norm": 1.3722233772277832, - "learning_rate": 7.966733668341709e-05, - "loss": 5.6449, - "step": 20737 - }, - { - "epoch": 10.815123859191656, - "grad_norm": 1.5427888631820679, - "learning_rate": 7.966633165829145e-05, - "loss": 5.6112, - "step": 20738 - }, - { - "epoch": 10.815645371577576, - "grad_norm": 1.6418004035949707, - "learning_rate": 7.966532663316583e-05, - "loss": 5.1956, - "step": 20739 - }, - { - "epoch": 10.816166883963493, - "grad_norm": 1.573679804801941, - "learning_rate": 7.966432160804021e-05, - "loss": 5.5393, - "step": 20740 - }, - { - "epoch": 10.816688396349413, - "grad_norm": 1.592287540435791, - "learning_rate": 7.966331658291459e-05, - "loss": 5.2725, - "step": 20741 - }, - { - "epoch": 10.817209908735332, - "grad_norm": 1.4096673727035522, - "learning_rate": 7.966231155778895e-05, - "loss": 5.4112, - "step": 20742 - }, - { - "epoch": 10.817731421121252, - "grad_norm": 1.4068632125854492, - "learning_rate": 7.966130653266332e-05, - "loss": 5.0282, - "step": 20743 - }, - { - "epoch": 10.818252933507171, - "grad_norm": 1.6458516120910645, - "learning_rate": 7.966030150753769e-05, - "loss": 5.4022, - "step": 20744 - }, - { - "epoch": 10.81877444589309, - "grad_norm": 1.4579191207885742, - "learning_rate": 7.965929648241206e-05, - "loss": 5.4319, - "step": 20745 - }, - { - "epoch": 10.819295958279008, - "grad_norm": 1.497306227684021, - "learning_rate": 7.965829145728644e-05, - "loss": 5.6255, - "step": 20746 - }, - { - "epoch": 10.819817470664928, - "grad_norm": 1.4214626550674438, - "learning_rate": 7.96572864321608e-05, - "loss": 5.5334, - "step": 20747 - }, - { - "epoch": 10.820338983050847, - "grad_norm": 1.4386236667633057, - "learning_rate": 7.965628140703518e-05, - "loss": 5.381, - "step": 20748 - }, - { - "epoch": 10.820860495436767, - "grad_norm": 1.4974688291549683, - "learning_rate": 7.965527638190956e-05, - "loss": 5.6032, - "step": 20749 - }, - { - "epoch": 10.821382007822686, - "grad_norm": 1.3928426504135132, - "learning_rate": 7.965427135678393e-05, - "loss": 5.5421, - "step": 20750 - }, - { - "epoch": 10.821903520208604, - "grad_norm": 1.4738370180130005, - "learning_rate": 7.96532663316583e-05, - "loss": 5.4332, - "step": 20751 - }, - { - "epoch": 10.822425032594523, - "grad_norm": 1.4198094606399536, - "learning_rate": 7.965226130653268e-05, - "loss": 5.8073, - "step": 20752 - }, - { - "epoch": 10.822946544980443, - "grad_norm": 1.3754414319992065, - "learning_rate": 7.965125628140704e-05, - "loss": 5.6626, - "step": 20753 - }, - { - "epoch": 10.823468057366362, - "grad_norm": 1.4757311344146729, - "learning_rate": 7.965025125628142e-05, - "loss": 5.6423, - "step": 20754 - }, - { - "epoch": 10.823989569752282, - "grad_norm": 1.5737947225570679, - "learning_rate": 7.964924623115578e-05, - "loss": 5.3111, - "step": 20755 - }, - { - "epoch": 10.824511082138201, - "grad_norm": 1.4191819429397583, - "learning_rate": 7.964824120603015e-05, - "loss": 5.6418, - "step": 20756 - }, - { - "epoch": 10.82503259452412, - "grad_norm": 1.4644770622253418, - "learning_rate": 7.964723618090452e-05, - "loss": 4.8861, - "step": 20757 - }, - { - "epoch": 10.825554106910038, - "grad_norm": 1.5920915603637695, - "learning_rate": 7.964623115577889e-05, - "loss": 5.0429, - "step": 20758 - }, - { - "epoch": 10.826075619295958, - "grad_norm": 1.499885082244873, - "learning_rate": 7.964522613065327e-05, - "loss": 5.4004, - "step": 20759 - }, - { - "epoch": 10.826597131681877, - "grad_norm": 1.513565182685852, - "learning_rate": 7.964422110552764e-05, - "loss": 5.1507, - "step": 20760 - }, - { - "epoch": 10.827118644067797, - "grad_norm": 1.448095440864563, - "learning_rate": 7.964321608040202e-05, - "loss": 5.607, - "step": 20761 - }, - { - "epoch": 10.827640156453716, - "grad_norm": 1.3437647819519043, - "learning_rate": 7.964221105527639e-05, - "loss": 5.7006, - "step": 20762 - }, - { - "epoch": 10.828161668839634, - "grad_norm": 1.5024175643920898, - "learning_rate": 7.964120603015076e-05, - "loss": 4.8149, - "step": 20763 - }, - { - "epoch": 10.828683181225554, - "grad_norm": 1.465254783630371, - "learning_rate": 7.964020100502513e-05, - "loss": 5.4909, - "step": 20764 - }, - { - "epoch": 10.829204693611473, - "grad_norm": 1.5923737287521362, - "learning_rate": 7.96391959798995e-05, - "loss": 5.5883, - "step": 20765 - }, - { - "epoch": 10.829726205997392, - "grad_norm": 1.5382260084152222, - "learning_rate": 7.963819095477387e-05, - "loss": 5.0806, - "step": 20766 - }, - { - "epoch": 10.830247718383312, - "grad_norm": 1.4349660873413086, - "learning_rate": 7.963718592964825e-05, - "loss": 5.4018, - "step": 20767 - }, - { - "epoch": 10.830769230769231, - "grad_norm": 1.3795368671417236, - "learning_rate": 7.963618090452261e-05, - "loss": 5.2949, - "step": 20768 - }, - { - "epoch": 10.83129074315515, - "grad_norm": 1.4719438552856445, - "learning_rate": 7.963517587939699e-05, - "loss": 5.475, - "step": 20769 - }, - { - "epoch": 10.831812255541069, - "grad_norm": 1.5309430360794067, - "learning_rate": 7.963417085427137e-05, - "loss": 5.4106, - "step": 20770 - }, - { - "epoch": 10.832333767926988, - "grad_norm": 1.3946200609207153, - "learning_rate": 7.963316582914573e-05, - "loss": 5.3135, - "step": 20771 - }, - { - "epoch": 10.832855280312907, - "grad_norm": 1.336147665977478, - "learning_rate": 7.963216080402011e-05, - "loss": 5.5459, - "step": 20772 - }, - { - "epoch": 10.833376792698827, - "grad_norm": 1.5295556783676147, - "learning_rate": 7.963115577889447e-05, - "loss": 5.3811, - "step": 20773 - }, - { - "epoch": 10.833898305084746, - "grad_norm": 1.5538806915283203, - "learning_rate": 7.963015075376885e-05, - "loss": 5.1544, - "step": 20774 - }, - { - "epoch": 10.834419817470664, - "grad_norm": 1.3942598104476929, - "learning_rate": 7.962914572864322e-05, - "loss": 5.3993, - "step": 20775 - }, - { - "epoch": 10.834941329856584, - "grad_norm": 1.4581702947616577, - "learning_rate": 7.96281407035176e-05, - "loss": 5.6406, - "step": 20776 - }, - { - "epoch": 10.835462842242503, - "grad_norm": 1.3644657135009766, - "learning_rate": 7.962713567839196e-05, - "loss": 4.7246, - "step": 20777 - }, - { - "epoch": 10.835984354628422, - "grad_norm": 1.478860855102539, - "learning_rate": 7.962613065326634e-05, - "loss": 5.5707, - "step": 20778 - }, - { - "epoch": 10.836505867014342, - "grad_norm": 1.4410970211029053, - "learning_rate": 7.96251256281407e-05, - "loss": 5.5544, - "step": 20779 - }, - { - "epoch": 10.837027379400261, - "grad_norm": 1.428435206413269, - "learning_rate": 7.962412060301508e-05, - "loss": 5.5196, - "step": 20780 - }, - { - "epoch": 10.83754889178618, - "grad_norm": 1.4673107862472534, - "learning_rate": 7.962311557788946e-05, - "loss": 4.6384, - "step": 20781 - }, - { - "epoch": 10.838070404172099, - "grad_norm": 1.4291393756866455, - "learning_rate": 7.962211055276382e-05, - "loss": 5.497, - "step": 20782 - }, - { - "epoch": 10.838591916558018, - "grad_norm": 1.427751898765564, - "learning_rate": 7.96211055276382e-05, - "loss": 5.493, - "step": 20783 - }, - { - "epoch": 10.839113428943937, - "grad_norm": 1.5893616676330566, - "learning_rate": 7.962010050251256e-05, - "loss": 5.5176, - "step": 20784 - }, - { - "epoch": 10.839634941329857, - "grad_norm": 1.437824010848999, - "learning_rate": 7.961909547738694e-05, - "loss": 5.5151, - "step": 20785 - }, - { - "epoch": 10.840156453715776, - "grad_norm": 1.4060401916503906, - "learning_rate": 7.96180904522613e-05, - "loss": 5.4245, - "step": 20786 - }, - { - "epoch": 10.840677966101694, - "grad_norm": 1.41290283203125, - "learning_rate": 7.961708542713568e-05, - "loss": 5.6157, - "step": 20787 - }, - { - "epoch": 10.841199478487614, - "grad_norm": 1.4320114850997925, - "learning_rate": 7.961608040201005e-05, - "loss": 5.4521, - "step": 20788 - }, - { - "epoch": 10.841720990873533, - "grad_norm": 1.5476760864257812, - "learning_rate": 7.961507537688442e-05, - "loss": 5.5295, - "step": 20789 - }, - { - "epoch": 10.842242503259452, - "grad_norm": 1.3641599416732788, - "learning_rate": 7.96140703517588e-05, - "loss": 5.1513, - "step": 20790 - }, - { - "epoch": 10.842764015645372, - "grad_norm": 1.462260365486145, - "learning_rate": 7.961306532663318e-05, - "loss": 5.6811, - "step": 20791 - }, - { - "epoch": 10.843285528031291, - "grad_norm": 1.4889155626296997, - "learning_rate": 7.961206030150754e-05, - "loss": 5.2913, - "step": 20792 - }, - { - "epoch": 10.843807040417209, - "grad_norm": 1.5695089101791382, - "learning_rate": 7.961105527638192e-05, - "loss": 5.5161, - "step": 20793 - }, - { - "epoch": 10.844328552803129, - "grad_norm": 1.630618691444397, - "learning_rate": 7.961005025125629e-05, - "loss": 5.3449, - "step": 20794 - }, - { - "epoch": 10.844850065189048, - "grad_norm": 1.3879542350769043, - "learning_rate": 7.960904522613065e-05, - "loss": 5.6203, - "step": 20795 - }, - { - "epoch": 10.845371577574968, - "grad_norm": 1.3964046239852905, - "learning_rate": 7.960804020100503e-05, - "loss": 5.5846, - "step": 20796 - }, - { - "epoch": 10.845893089960887, - "grad_norm": 1.3749357461929321, - "learning_rate": 7.960703517587939e-05, - "loss": 5.6297, - "step": 20797 - }, - { - "epoch": 10.846414602346806, - "grad_norm": 1.353702187538147, - "learning_rate": 7.960603015075377e-05, - "loss": 5.567, - "step": 20798 - }, - { - "epoch": 10.846936114732724, - "grad_norm": 1.4541682004928589, - "learning_rate": 7.960502512562813e-05, - "loss": 5.1598, - "step": 20799 - }, - { - "epoch": 10.847457627118644, - "grad_norm": 1.4829739332199097, - "learning_rate": 7.960402010050251e-05, - "loss": 5.4598, - "step": 20800 - }, - { - "epoch": 10.847979139504563, - "grad_norm": 1.4891096353530884, - "learning_rate": 7.960301507537689e-05, - "loss": 5.5299, - "step": 20801 - }, - { - "epoch": 10.848500651890483, - "grad_norm": 1.4753087759017944, - "learning_rate": 7.960201005025127e-05, - "loss": 5.6889, - "step": 20802 - }, - { - "epoch": 10.849022164276402, - "grad_norm": 1.4102638959884644, - "learning_rate": 7.960100502512563e-05, - "loss": 5.2995, - "step": 20803 - }, - { - "epoch": 10.849543676662321, - "grad_norm": 1.611689805984497, - "learning_rate": 7.960000000000001e-05, - "loss": 4.9359, - "step": 20804 - }, - { - "epoch": 10.85006518904824, - "grad_norm": 1.5377171039581299, - "learning_rate": 7.959899497487437e-05, - "loss": 5.6585, - "step": 20805 - }, - { - "epoch": 10.850586701434159, - "grad_norm": 1.4895716905593872, - "learning_rate": 7.959798994974875e-05, - "loss": 5.0642, - "step": 20806 - }, - { - "epoch": 10.851108213820078, - "grad_norm": 1.3922721147537231, - "learning_rate": 7.959698492462312e-05, - "loss": 5.1579, - "step": 20807 - }, - { - "epoch": 10.851629726205998, - "grad_norm": 1.2907953262329102, - "learning_rate": 7.959597989949748e-05, - "loss": 5.7076, - "step": 20808 - }, - { - "epoch": 10.852151238591917, - "grad_norm": 1.5816020965576172, - "learning_rate": 7.959497487437186e-05, - "loss": 4.9784, - "step": 20809 - }, - { - "epoch": 10.852672750977836, - "grad_norm": 1.4898992776870728, - "learning_rate": 7.959396984924624e-05, - "loss": 5.2614, - "step": 20810 - }, - { - "epoch": 10.853194263363754, - "grad_norm": 1.554205298423767, - "learning_rate": 7.959296482412061e-05, - "loss": 4.8855, - "step": 20811 - }, - { - "epoch": 10.853715775749674, - "grad_norm": 1.45045006275177, - "learning_rate": 7.959195979899498e-05, - "loss": 4.6911, - "step": 20812 - }, - { - "epoch": 10.854237288135593, - "grad_norm": 1.5101748704910278, - "learning_rate": 7.959095477386936e-05, - "loss": 5.2293, - "step": 20813 - }, - { - "epoch": 10.854758800521513, - "grad_norm": 1.5312845706939697, - "learning_rate": 7.958994974874372e-05, - "loss": 5.3077, - "step": 20814 - }, - { - "epoch": 10.855280312907432, - "grad_norm": 1.645363211631775, - "learning_rate": 7.95889447236181e-05, - "loss": 5.3089, - "step": 20815 - }, - { - "epoch": 10.855801825293351, - "grad_norm": 1.4280797243118286, - "learning_rate": 7.958793969849246e-05, - "loss": 5.4614, - "step": 20816 - }, - { - "epoch": 10.85632333767927, - "grad_norm": 1.599623441696167, - "learning_rate": 7.958693467336684e-05, - "loss": 5.4334, - "step": 20817 - }, - { - "epoch": 10.856844850065189, - "grad_norm": 1.5686933994293213, - "learning_rate": 7.95859296482412e-05, - "loss": 4.984, - "step": 20818 - }, - { - "epoch": 10.857366362451108, - "grad_norm": 1.4481548070907593, - "learning_rate": 7.958492462311558e-05, - "loss": 5.3231, - "step": 20819 - }, - { - "epoch": 10.857887874837028, - "grad_norm": 1.463856816291809, - "learning_rate": 7.958391959798995e-05, - "loss": 5.7162, - "step": 20820 - }, - { - "epoch": 10.858409387222947, - "grad_norm": 1.5069621801376343, - "learning_rate": 7.958291457286433e-05, - "loss": 5.3596, - "step": 20821 - }, - { - "epoch": 10.858930899608866, - "grad_norm": 1.4765156507492065, - "learning_rate": 7.95819095477387e-05, - "loss": 5.4304, - "step": 20822 - }, - { - "epoch": 10.859452411994784, - "grad_norm": 1.5320043563842773, - "learning_rate": 7.958090452261307e-05, - "loss": 5.4837, - "step": 20823 - }, - { - "epoch": 10.859973924380704, - "grad_norm": 1.481360673904419, - "learning_rate": 7.957989949748744e-05, - "loss": 5.2685, - "step": 20824 - }, - { - "epoch": 10.860495436766623, - "grad_norm": 1.4351886510849, - "learning_rate": 7.957889447236181e-05, - "loss": 5.6573, - "step": 20825 - }, - { - "epoch": 10.861016949152543, - "grad_norm": 1.5245764255523682, - "learning_rate": 7.957788944723619e-05, - "loss": 5.7187, - "step": 20826 - }, - { - "epoch": 10.861538461538462, - "grad_norm": 1.4680283069610596, - "learning_rate": 7.957688442211055e-05, - "loss": 4.985, - "step": 20827 - }, - { - "epoch": 10.862059973924381, - "grad_norm": 1.5380178689956665, - "learning_rate": 7.957587939698493e-05, - "loss": 5.5582, - "step": 20828 - }, - { - "epoch": 10.8625814863103, - "grad_norm": 1.5265454053878784, - "learning_rate": 7.95748743718593e-05, - "loss": 5.2843, - "step": 20829 - }, - { - "epoch": 10.863102998696219, - "grad_norm": 1.479627251625061, - "learning_rate": 7.957386934673367e-05, - "loss": 5.5416, - "step": 20830 - }, - { - "epoch": 10.863624511082138, - "grad_norm": 1.5117517709732056, - "learning_rate": 7.957286432160805e-05, - "loss": 5.3156, - "step": 20831 - }, - { - "epoch": 10.864146023468058, - "grad_norm": 1.4894530773162842, - "learning_rate": 7.957185929648243e-05, - "loss": 5.5364, - "step": 20832 - }, - { - "epoch": 10.864667535853977, - "grad_norm": 1.4878729581832886, - "learning_rate": 7.957085427135679e-05, - "loss": 5.5564, - "step": 20833 - }, - { - "epoch": 10.865189048239897, - "grad_norm": 1.410269021987915, - "learning_rate": 7.956984924623117e-05, - "loss": 5.6029, - "step": 20834 - }, - { - "epoch": 10.865710560625814, - "grad_norm": 1.471883773803711, - "learning_rate": 7.956884422110553e-05, - "loss": 5.3606, - "step": 20835 - }, - { - "epoch": 10.866232073011734, - "grad_norm": 1.5169390439987183, - "learning_rate": 7.95678391959799e-05, - "loss": 5.4083, - "step": 20836 - }, - { - "epoch": 10.866753585397653, - "grad_norm": 1.5776190757751465, - "learning_rate": 7.956683417085428e-05, - "loss": 4.652, - "step": 20837 - }, - { - "epoch": 10.867275097783573, - "grad_norm": 1.5750374794006348, - "learning_rate": 7.956582914572864e-05, - "loss": 5.1033, - "step": 20838 - }, - { - "epoch": 10.867796610169492, - "grad_norm": 1.4520243406295776, - "learning_rate": 7.956482412060302e-05, - "loss": 5.4374, - "step": 20839 - }, - { - "epoch": 10.868318122555412, - "grad_norm": 1.58138108253479, - "learning_rate": 7.956381909547738e-05, - "loss": 5.4905, - "step": 20840 - }, - { - "epoch": 10.86883963494133, - "grad_norm": 1.485060691833496, - "learning_rate": 7.956281407035176e-05, - "loss": 5.0284, - "step": 20841 - }, - { - "epoch": 10.869361147327249, - "grad_norm": 1.5503602027893066, - "learning_rate": 7.956180904522614e-05, - "loss": 5.6503, - "step": 20842 - }, - { - "epoch": 10.869882659713168, - "grad_norm": 1.5430030822753906, - "learning_rate": 7.956080402010052e-05, - "loss": 5.7022, - "step": 20843 - }, - { - "epoch": 10.870404172099088, - "grad_norm": 1.3324098587036133, - "learning_rate": 7.955979899497488e-05, - "loss": 5.6439, - "step": 20844 - }, - { - "epoch": 10.870925684485007, - "grad_norm": 1.431255578994751, - "learning_rate": 7.955879396984926e-05, - "loss": 5.677, - "step": 20845 - }, - { - "epoch": 10.871447196870925, - "grad_norm": 1.4600403308868408, - "learning_rate": 7.955778894472362e-05, - "loss": 5.4426, - "step": 20846 - }, - { - "epoch": 10.871968709256844, - "grad_norm": 1.5493687391281128, - "learning_rate": 7.9556783919598e-05, - "loss": 5.1807, - "step": 20847 - }, - { - "epoch": 10.872490221642764, - "grad_norm": 1.4435315132141113, - "learning_rate": 7.955577889447236e-05, - "loss": 5.6907, - "step": 20848 - }, - { - "epoch": 10.873011734028683, - "grad_norm": 1.457694411277771, - "learning_rate": 7.955477386934673e-05, - "loss": 5.3472, - "step": 20849 - }, - { - "epoch": 10.873533246414603, - "grad_norm": 1.5176588296890259, - "learning_rate": 7.95537688442211e-05, - "loss": 5.7104, - "step": 20850 - }, - { - "epoch": 10.874054758800522, - "grad_norm": 1.5433452129364014, - "learning_rate": 7.955276381909548e-05, - "loss": 5.6314, - "step": 20851 - }, - { - "epoch": 10.874576271186442, - "grad_norm": 1.4065159559249878, - "learning_rate": 7.955175879396986e-05, - "loss": 4.8043, - "step": 20852 - }, - { - "epoch": 10.87509778357236, - "grad_norm": 1.4746954441070557, - "learning_rate": 7.955075376884423e-05, - "loss": 4.7756, - "step": 20853 - }, - { - "epoch": 10.875619295958279, - "grad_norm": 1.5058845281600952, - "learning_rate": 7.95497487437186e-05, - "loss": 5.355, - "step": 20854 - }, - { - "epoch": 10.876140808344198, - "grad_norm": 1.4877228736877441, - "learning_rate": 7.954874371859297e-05, - "loss": 5.1562, - "step": 20855 - }, - { - "epoch": 10.876662320730118, - "grad_norm": 1.4097403287887573, - "learning_rate": 7.954773869346735e-05, - "loss": 5.624, - "step": 20856 - }, - { - "epoch": 10.877183833116037, - "grad_norm": 1.5058139562606812, - "learning_rate": 7.954673366834171e-05, - "loss": 5.3751, - "step": 20857 - }, - { - "epoch": 10.877705345501955, - "grad_norm": 1.3550471067428589, - "learning_rate": 7.954572864321609e-05, - "loss": 5.7129, - "step": 20858 - }, - { - "epoch": 10.878226857887874, - "grad_norm": 1.3287475109100342, - "learning_rate": 7.954472361809045e-05, - "loss": 5.6979, - "step": 20859 - }, - { - "epoch": 10.878748370273794, - "grad_norm": 1.5471543073654175, - "learning_rate": 7.954371859296483e-05, - "loss": 5.3526, - "step": 20860 - }, - { - "epoch": 10.879269882659713, - "grad_norm": 1.6042343378067017, - "learning_rate": 7.95427135678392e-05, - "loss": 5.1967, - "step": 20861 - }, - { - "epoch": 10.879791395045633, - "grad_norm": 1.4433425664901733, - "learning_rate": 7.954170854271357e-05, - "loss": 5.5429, - "step": 20862 - }, - { - "epoch": 10.880312907431552, - "grad_norm": 1.4447836875915527, - "learning_rate": 7.954070351758795e-05, - "loss": 5.3748, - "step": 20863 - }, - { - "epoch": 10.880834419817472, - "grad_norm": 1.4456778764724731, - "learning_rate": 7.953969849246231e-05, - "loss": 5.4958, - "step": 20864 - }, - { - "epoch": 10.88135593220339, - "grad_norm": 1.4391014575958252, - "learning_rate": 7.953869346733669e-05, - "loss": 5.2561, - "step": 20865 - }, - { - "epoch": 10.881877444589309, - "grad_norm": 1.4296648502349854, - "learning_rate": 7.953768844221106e-05, - "loss": 5.7641, - "step": 20866 - }, - { - "epoch": 10.882398956975228, - "grad_norm": 1.4551854133605957, - "learning_rate": 7.953668341708543e-05, - "loss": 5.6885, - "step": 20867 - }, - { - "epoch": 10.882920469361148, - "grad_norm": 1.3159570693969727, - "learning_rate": 7.95356783919598e-05, - "loss": 5.3096, - "step": 20868 - }, - { - "epoch": 10.883441981747067, - "grad_norm": 1.5035128593444824, - "learning_rate": 7.953467336683418e-05, - "loss": 5.4712, - "step": 20869 - }, - { - "epoch": 10.883963494132985, - "grad_norm": 1.434493064880371, - "learning_rate": 7.953366834170854e-05, - "loss": 5.5397, - "step": 20870 - }, - { - "epoch": 10.884485006518904, - "grad_norm": 1.407245397567749, - "learning_rate": 7.953266331658292e-05, - "loss": 5.2874, - "step": 20871 - }, - { - "epoch": 10.885006518904824, - "grad_norm": 1.615294098854065, - "learning_rate": 7.95316582914573e-05, - "loss": 4.9579, - "step": 20872 - }, - { - "epoch": 10.885528031290743, - "grad_norm": 1.3720768690109253, - "learning_rate": 7.953065326633167e-05, - "loss": 5.9315, - "step": 20873 - }, - { - "epoch": 10.886049543676663, - "grad_norm": 1.5831645727157593, - "learning_rate": 7.952964824120604e-05, - "loss": 5.576, - "step": 20874 - }, - { - "epoch": 10.886571056062582, - "grad_norm": 1.3927009105682373, - "learning_rate": 7.95286432160804e-05, - "loss": 5.8921, - "step": 20875 - }, - { - "epoch": 10.887092568448502, - "grad_norm": 1.322348713874817, - "learning_rate": 7.952763819095478e-05, - "loss": 5.7025, - "step": 20876 - }, - { - "epoch": 10.88761408083442, - "grad_norm": 1.3513325452804565, - "learning_rate": 7.952663316582914e-05, - "loss": 5.6977, - "step": 20877 - }, - { - "epoch": 10.888135593220339, - "grad_norm": 1.3339743614196777, - "learning_rate": 7.952562814070352e-05, - "loss": 5.5695, - "step": 20878 - }, - { - "epoch": 10.888657105606258, - "grad_norm": 1.3675743341445923, - "learning_rate": 7.952462311557789e-05, - "loss": 5.7272, - "step": 20879 - }, - { - "epoch": 10.889178617992178, - "grad_norm": 1.5403809547424316, - "learning_rate": 7.952361809045226e-05, - "loss": 5.5248, - "step": 20880 - }, - { - "epoch": 10.889700130378097, - "grad_norm": 1.492254614830017, - "learning_rate": 7.952261306532663e-05, - "loss": 5.5498, - "step": 20881 - }, - { - "epoch": 10.890221642764015, - "grad_norm": 1.4785937070846558, - "learning_rate": 7.9521608040201e-05, - "loss": 5.3354, - "step": 20882 - }, - { - "epoch": 10.890743155149934, - "grad_norm": 1.3871748447418213, - "learning_rate": 7.952060301507538e-05, - "loss": 4.6877, - "step": 20883 - }, - { - "epoch": 10.891264667535854, - "grad_norm": 1.498595118522644, - "learning_rate": 7.951959798994976e-05, - "loss": 5.508, - "step": 20884 - }, - { - "epoch": 10.891786179921773, - "grad_norm": 1.4056581258773804, - "learning_rate": 7.951859296482413e-05, - "loss": 5.9151, - "step": 20885 - }, - { - "epoch": 10.892307692307693, - "grad_norm": 1.5041513442993164, - "learning_rate": 7.95175879396985e-05, - "loss": 5.442, - "step": 20886 - }, - { - "epoch": 10.892829204693612, - "grad_norm": 1.4165537357330322, - "learning_rate": 7.951658291457287e-05, - "loss": 5.3976, - "step": 20887 - }, - { - "epoch": 10.89335071707953, - "grad_norm": 1.4203828573226929, - "learning_rate": 7.951557788944723e-05, - "loss": 5.6919, - "step": 20888 - }, - { - "epoch": 10.89387222946545, - "grad_norm": 1.4517167806625366, - "learning_rate": 7.951457286432161e-05, - "loss": 5.6548, - "step": 20889 - }, - { - "epoch": 10.894393741851369, - "grad_norm": 1.4476011991500854, - "learning_rate": 7.951356783919598e-05, - "loss": 4.6524, - "step": 20890 - }, - { - "epoch": 10.894915254237288, - "grad_norm": 1.422726035118103, - "learning_rate": 7.951256281407035e-05, - "loss": 5.8377, - "step": 20891 - }, - { - "epoch": 10.895436766623208, - "grad_norm": 1.3993656635284424, - "learning_rate": 7.951155778894472e-05, - "loss": 5.8051, - "step": 20892 - }, - { - "epoch": 10.895958279009127, - "grad_norm": 1.452288031578064, - "learning_rate": 7.95105527638191e-05, - "loss": 5.11, - "step": 20893 - }, - { - "epoch": 10.896479791395045, - "grad_norm": 1.5393177270889282, - "learning_rate": 7.950954773869347e-05, - "loss": 5.5751, - "step": 20894 - }, - { - "epoch": 10.897001303780964, - "grad_norm": 1.4200940132141113, - "learning_rate": 7.950854271356785e-05, - "loss": 5.9156, - "step": 20895 - }, - { - "epoch": 10.897522816166884, - "grad_norm": 1.4003561735153198, - "learning_rate": 7.950753768844221e-05, - "loss": 5.3178, - "step": 20896 - }, - { - "epoch": 10.898044328552803, - "grad_norm": 1.364443063735962, - "learning_rate": 7.950653266331659e-05, - "loss": 5.7249, - "step": 20897 - }, - { - "epoch": 10.898565840938723, - "grad_norm": 1.5462408065795898, - "learning_rate": 7.950552763819096e-05, - "loss": 5.4026, - "step": 20898 - }, - { - "epoch": 10.899087353324642, - "grad_norm": 1.6502461433410645, - "learning_rate": 7.950452261306533e-05, - "loss": 5.2963, - "step": 20899 - }, - { - "epoch": 10.89960886571056, - "grad_norm": 1.6449497938156128, - "learning_rate": 7.95035175879397e-05, - "loss": 4.8947, - "step": 20900 - }, - { - "epoch": 10.90013037809648, - "grad_norm": 1.5031843185424805, - "learning_rate": 7.950251256281406e-05, - "loss": 4.8982, - "step": 20901 - }, - { - "epoch": 10.900651890482399, - "grad_norm": 1.5747390985488892, - "learning_rate": 7.950150753768844e-05, - "loss": 5.1242, - "step": 20902 - }, - { - "epoch": 10.901173402868318, - "grad_norm": 1.3169786930084229, - "learning_rate": 7.950050251256282e-05, - "loss": 5.9117, - "step": 20903 - }, - { - "epoch": 10.901694915254238, - "grad_norm": 1.370862364768982, - "learning_rate": 7.94994974874372e-05, - "loss": 5.6626, - "step": 20904 - }, - { - "epoch": 10.902216427640157, - "grad_norm": 1.5698710680007935, - "learning_rate": 7.949849246231156e-05, - "loss": 5.2537, - "step": 20905 - }, - { - "epoch": 10.902737940026075, - "grad_norm": 1.4392200708389282, - "learning_rate": 7.949748743718594e-05, - "loss": 4.9474, - "step": 20906 - }, - { - "epoch": 10.903259452411994, - "grad_norm": 1.4793663024902344, - "learning_rate": 7.94964824120603e-05, - "loss": 5.1674, - "step": 20907 - }, - { - "epoch": 10.903780964797914, - "grad_norm": 1.4574165344238281, - "learning_rate": 7.949547738693468e-05, - "loss": 5.4334, - "step": 20908 - }, - { - "epoch": 10.904302477183833, - "grad_norm": 1.6038566827774048, - "learning_rate": 7.949447236180905e-05, - "loss": 4.7896, - "step": 20909 - }, - { - "epoch": 10.904823989569753, - "grad_norm": 1.488199234008789, - "learning_rate": 7.949346733668342e-05, - "loss": 5.9032, - "step": 20910 - }, - { - "epoch": 10.905345501955672, - "grad_norm": 1.4139366149902344, - "learning_rate": 7.949246231155779e-05, - "loss": 5.6636, - "step": 20911 - }, - { - "epoch": 10.90586701434159, - "grad_norm": 1.4479649066925049, - "learning_rate": 7.949145728643217e-05, - "loss": 5.0333, - "step": 20912 - }, - { - "epoch": 10.90638852672751, - "grad_norm": 1.4658572673797607, - "learning_rate": 7.949045226130653e-05, - "loss": 5.4264, - "step": 20913 - }, - { - "epoch": 10.906910039113429, - "grad_norm": 1.392055630683899, - "learning_rate": 7.948944723618091e-05, - "loss": 5.6167, - "step": 20914 - }, - { - "epoch": 10.907431551499348, - "grad_norm": 1.4329018592834473, - "learning_rate": 7.948844221105529e-05, - "loss": 5.7922, - "step": 20915 - }, - { - "epoch": 10.907953063885268, - "grad_norm": 1.544110894203186, - "learning_rate": 7.948743718592965e-05, - "loss": 5.4387, - "step": 20916 - }, - { - "epoch": 10.908474576271187, - "grad_norm": 1.6874760389328003, - "learning_rate": 7.948643216080403e-05, - "loss": 5.0029, - "step": 20917 - }, - { - "epoch": 10.908996088657105, - "grad_norm": 1.4808834791183472, - "learning_rate": 7.948542713567839e-05, - "loss": 5.2479, - "step": 20918 - }, - { - "epoch": 10.909517601043024, - "grad_norm": 1.557495355606079, - "learning_rate": 7.948442211055277e-05, - "loss": 5.0523, - "step": 20919 - }, - { - "epoch": 10.910039113428944, - "grad_norm": 1.4027904272079468, - "learning_rate": 7.948341708542713e-05, - "loss": 5.4982, - "step": 20920 - }, - { - "epoch": 10.910560625814863, - "grad_norm": 1.418133020401001, - "learning_rate": 7.948241206030151e-05, - "loss": 5.3878, - "step": 20921 - }, - { - "epoch": 10.911082138200783, - "grad_norm": 1.389695644378662, - "learning_rate": 7.948140703517588e-05, - "loss": 5.5336, - "step": 20922 - }, - { - "epoch": 10.911603650586702, - "grad_norm": 1.4180316925048828, - "learning_rate": 7.948040201005025e-05, - "loss": 5.6098, - "step": 20923 - }, - { - "epoch": 10.91212516297262, - "grad_norm": 1.5257996320724487, - "learning_rate": 7.947939698492463e-05, - "loss": 5.4964, - "step": 20924 - }, - { - "epoch": 10.91264667535854, - "grad_norm": 1.4572893381118774, - "learning_rate": 7.947839195979901e-05, - "loss": 5.2873, - "step": 20925 - }, - { - "epoch": 10.913168187744459, - "grad_norm": 1.4000223875045776, - "learning_rate": 7.947738693467337e-05, - "loss": 5.5371, - "step": 20926 - }, - { - "epoch": 10.913689700130378, - "grad_norm": 1.5175187587738037, - "learning_rate": 7.947638190954775e-05, - "loss": 5.7718, - "step": 20927 - }, - { - "epoch": 10.914211212516298, - "grad_norm": 1.417612075805664, - "learning_rate": 7.947537688442212e-05, - "loss": 5.4433, - "step": 20928 - }, - { - "epoch": 10.914732724902217, - "grad_norm": 1.425512433052063, - "learning_rate": 7.947437185929648e-05, - "loss": 5.0224, - "step": 20929 - }, - { - "epoch": 10.915254237288135, - "grad_norm": 1.4505774974822998, - "learning_rate": 7.947336683417086e-05, - "loss": 5.4547, - "step": 20930 - }, - { - "epoch": 10.915775749674054, - "grad_norm": 1.5449849367141724, - "learning_rate": 7.947236180904522e-05, - "loss": 5.2426, - "step": 20931 - }, - { - "epoch": 10.916297262059974, - "grad_norm": 1.5006132125854492, - "learning_rate": 7.94713567839196e-05, - "loss": 5.3141, - "step": 20932 - }, - { - "epoch": 10.916818774445893, - "grad_norm": 1.278379201889038, - "learning_rate": 7.947035175879396e-05, - "loss": 5.9952, - "step": 20933 - }, - { - "epoch": 10.917340286831813, - "grad_norm": 1.4494935274124146, - "learning_rate": 7.946934673366834e-05, - "loss": 5.2735, - "step": 20934 - }, - { - "epoch": 10.917861799217732, - "grad_norm": 1.44352126121521, - "learning_rate": 7.946834170854272e-05, - "loss": 5.4757, - "step": 20935 - }, - { - "epoch": 10.91838331160365, - "grad_norm": 1.499895691871643, - "learning_rate": 7.94673366834171e-05, - "loss": 5.5513, - "step": 20936 - }, - { - "epoch": 10.91890482398957, - "grad_norm": 1.463358998298645, - "learning_rate": 7.946633165829146e-05, - "loss": 5.6771, - "step": 20937 - }, - { - "epoch": 10.919426336375489, - "grad_norm": 1.6340488195419312, - "learning_rate": 7.946532663316584e-05, - "loss": 5.2406, - "step": 20938 - }, - { - "epoch": 10.919947848761408, - "grad_norm": 1.5814470052719116, - "learning_rate": 7.94643216080402e-05, - "loss": 5.4777, - "step": 20939 - }, - { - "epoch": 10.920469361147328, - "grad_norm": 1.531904935836792, - "learning_rate": 7.946331658291458e-05, - "loss": 5.5558, - "step": 20940 - }, - { - "epoch": 10.920990873533245, - "grad_norm": 1.4873367547988892, - "learning_rate": 7.946231155778895e-05, - "loss": 5.706, - "step": 20941 - }, - { - "epoch": 10.921512385919165, - "grad_norm": 1.4803050756454468, - "learning_rate": 7.946130653266331e-05, - "loss": 4.9068, - "step": 20942 - }, - { - "epoch": 10.922033898305084, - "grad_norm": 1.4614310264587402, - "learning_rate": 7.946030150753769e-05, - "loss": 5.6866, - "step": 20943 - }, - { - "epoch": 10.922555410691004, - "grad_norm": 1.5398675203323364, - "learning_rate": 7.945929648241207e-05, - "loss": 5.3529, - "step": 20944 - }, - { - "epoch": 10.923076923076923, - "grad_norm": 1.3800475597381592, - "learning_rate": 7.945829145728644e-05, - "loss": 5.8213, - "step": 20945 - }, - { - "epoch": 10.923598435462843, - "grad_norm": 1.5008275508880615, - "learning_rate": 7.945728643216081e-05, - "loss": 5.3283, - "step": 20946 - }, - { - "epoch": 10.924119947848762, - "grad_norm": 1.5373327732086182, - "learning_rate": 7.945628140703519e-05, - "loss": 5.1157, - "step": 20947 - }, - { - "epoch": 10.92464146023468, - "grad_norm": 1.390774130821228, - "learning_rate": 7.945527638190955e-05, - "loss": 5.5616, - "step": 20948 - }, - { - "epoch": 10.9251629726206, - "grad_norm": 1.4165246486663818, - "learning_rate": 7.945427135678393e-05, - "loss": 5.5931, - "step": 20949 - }, - { - "epoch": 10.925684485006519, - "grad_norm": 1.4504565000534058, - "learning_rate": 7.945326633165829e-05, - "loss": 5.6173, - "step": 20950 - }, - { - "epoch": 10.926205997392438, - "grad_norm": 1.3563454151153564, - "learning_rate": 7.945226130653267e-05, - "loss": 5.6162, - "step": 20951 - }, - { - "epoch": 10.926727509778358, - "grad_norm": 1.5011118650436401, - "learning_rate": 7.945125628140703e-05, - "loss": 4.8219, - "step": 20952 - }, - { - "epoch": 10.927249022164276, - "grad_norm": 1.4267215728759766, - "learning_rate": 7.945025125628141e-05, - "loss": 5.5383, - "step": 20953 - }, - { - "epoch": 10.927770534550195, - "grad_norm": 1.4159268140792847, - "learning_rate": 7.944924623115578e-05, - "loss": 5.2531, - "step": 20954 - }, - { - "epoch": 10.928292046936114, - "grad_norm": 1.3717079162597656, - "learning_rate": 7.944824120603015e-05, - "loss": 4.8347, - "step": 20955 - }, - { - "epoch": 10.928813559322034, - "grad_norm": 1.497125267982483, - "learning_rate": 7.944723618090453e-05, - "loss": 5.2158, - "step": 20956 - }, - { - "epoch": 10.929335071707953, - "grad_norm": 1.5739015340805054, - "learning_rate": 7.94462311557789e-05, - "loss": 5.1905, - "step": 20957 - }, - { - "epoch": 10.929856584093873, - "grad_norm": 1.4662703275680542, - "learning_rate": 7.944522613065327e-05, - "loss": 5.461, - "step": 20958 - }, - { - "epoch": 10.930378096479792, - "grad_norm": 1.454738736152649, - "learning_rate": 7.944422110552764e-05, - "loss": 5.5569, - "step": 20959 - }, - { - "epoch": 10.93089960886571, - "grad_norm": 1.4483228921890259, - "learning_rate": 7.944321608040202e-05, - "loss": 5.5923, - "step": 20960 - }, - { - "epoch": 10.93142112125163, - "grad_norm": 1.5428428649902344, - "learning_rate": 7.944221105527638e-05, - "loss": 4.9053, - "step": 20961 - }, - { - "epoch": 10.931942633637549, - "grad_norm": 1.9260282516479492, - "learning_rate": 7.944120603015076e-05, - "loss": 4.7062, - "step": 20962 - }, - { - "epoch": 10.932464146023468, - "grad_norm": 1.4864997863769531, - "learning_rate": 7.944020100502512e-05, - "loss": 5.4357, - "step": 20963 - }, - { - "epoch": 10.932985658409388, - "grad_norm": 1.4165695905685425, - "learning_rate": 7.94391959798995e-05, - "loss": 5.0916, - "step": 20964 - }, - { - "epoch": 10.933507170795306, - "grad_norm": 1.4304850101470947, - "learning_rate": 7.943819095477388e-05, - "loss": 5.6316, - "step": 20965 - }, - { - "epoch": 10.934028683181225, - "grad_norm": 1.425891637802124, - "learning_rate": 7.943718592964826e-05, - "loss": 5.6791, - "step": 20966 - }, - { - "epoch": 10.934550195567144, - "grad_norm": 1.4568068981170654, - "learning_rate": 7.943618090452262e-05, - "loss": 5.1236, - "step": 20967 - }, - { - "epoch": 10.935071707953064, - "grad_norm": 1.5454902648925781, - "learning_rate": 7.943517587939698e-05, - "loss": 5.291, - "step": 20968 - }, - { - "epoch": 10.935593220338983, - "grad_norm": 1.5041025876998901, - "learning_rate": 7.943417085427136e-05, - "loss": 5.4392, - "step": 20969 - }, - { - "epoch": 10.936114732724903, - "grad_norm": 1.5437592267990112, - "learning_rate": 7.943316582914573e-05, - "loss": 5.0549, - "step": 20970 - }, - { - "epoch": 10.93663624511082, - "grad_norm": 1.3061270713806152, - "learning_rate": 7.94321608040201e-05, - "loss": 5.7704, - "step": 20971 - }, - { - "epoch": 10.93715775749674, - "grad_norm": 1.4510531425476074, - "learning_rate": 7.943115577889447e-05, - "loss": 5.2776, - "step": 20972 - }, - { - "epoch": 10.93767926988266, - "grad_norm": 1.5836305618286133, - "learning_rate": 7.943015075376885e-05, - "loss": 4.92, - "step": 20973 - }, - { - "epoch": 10.938200782268579, - "grad_norm": 1.3863914012908936, - "learning_rate": 7.942914572864321e-05, - "loss": 5.9593, - "step": 20974 - }, - { - "epoch": 10.938722294654498, - "grad_norm": 1.3610464334487915, - "learning_rate": 7.942814070351759e-05, - "loss": 5.1472, - "step": 20975 - }, - { - "epoch": 10.939243807040418, - "grad_norm": 1.3911867141723633, - "learning_rate": 7.942713567839197e-05, - "loss": 4.6204, - "step": 20976 - }, - { - "epoch": 10.939765319426336, - "grad_norm": 1.3874753713607788, - "learning_rate": 7.942613065326634e-05, - "loss": 5.3447, - "step": 20977 - }, - { - "epoch": 10.940286831812255, - "grad_norm": 1.3914989233016968, - "learning_rate": 7.942512562814071e-05, - "loss": 5.4827, - "step": 20978 - }, - { - "epoch": 10.940808344198174, - "grad_norm": 1.5483626127243042, - "learning_rate": 7.942412060301509e-05, - "loss": 5.237, - "step": 20979 - }, - { - "epoch": 10.941329856584094, - "grad_norm": 1.450799584388733, - "learning_rate": 7.942311557788945e-05, - "loss": 5.4829, - "step": 20980 - }, - { - "epoch": 10.941851368970013, - "grad_norm": 1.453369140625, - "learning_rate": 7.942211055276382e-05, - "loss": 5.4678, - "step": 20981 - }, - { - "epoch": 10.942372881355933, - "grad_norm": 1.4180506467819214, - "learning_rate": 7.942110552763819e-05, - "loss": 5.3243, - "step": 20982 - }, - { - "epoch": 10.94289439374185, - "grad_norm": 1.4748303890228271, - "learning_rate": 7.942010050251256e-05, - "loss": 5.924, - "step": 20983 - }, - { - "epoch": 10.94341590612777, - "grad_norm": 1.5197535753250122, - "learning_rate": 7.941909547738694e-05, - "loss": 5.5981, - "step": 20984 - }, - { - "epoch": 10.94393741851369, - "grad_norm": 1.5293771028518677, - "learning_rate": 7.941809045226131e-05, - "loss": 5.5658, - "step": 20985 - }, - { - "epoch": 10.944458930899609, - "grad_norm": 1.4990962743759155, - "learning_rate": 7.941708542713569e-05, - "loss": 5.4771, - "step": 20986 - }, - { - "epoch": 10.944980443285528, - "grad_norm": 1.3228641748428345, - "learning_rate": 7.941608040201006e-05, - "loss": 5.6902, - "step": 20987 - }, - { - "epoch": 10.945501955671448, - "grad_norm": 1.2924830913543701, - "learning_rate": 7.941507537688443e-05, - "loss": 5.7745, - "step": 20988 - }, - { - "epoch": 10.946023468057366, - "grad_norm": 1.3878355026245117, - "learning_rate": 7.94140703517588e-05, - "loss": 5.1028, - "step": 20989 - }, - { - "epoch": 10.946544980443285, - "grad_norm": 1.5296218395233154, - "learning_rate": 7.941306532663318e-05, - "loss": 4.9931, - "step": 20990 - }, - { - "epoch": 10.947066492829205, - "grad_norm": 1.515978217124939, - "learning_rate": 7.941206030150754e-05, - "loss": 5.6961, - "step": 20991 - }, - { - "epoch": 10.947588005215124, - "grad_norm": 1.5519098043441772, - "learning_rate": 7.941105527638192e-05, - "loss": 5.2795, - "step": 20992 - }, - { - "epoch": 10.948109517601043, - "grad_norm": 1.4744489192962646, - "learning_rate": 7.941005025125628e-05, - "loss": 5.1416, - "step": 20993 - }, - { - "epoch": 10.948631029986963, - "grad_norm": 1.4355180263519287, - "learning_rate": 7.940904522613065e-05, - "loss": 5.584, - "step": 20994 - }, - { - "epoch": 10.94915254237288, - "grad_norm": 1.4336366653442383, - "learning_rate": 7.940804020100502e-05, - "loss": 5.462, - "step": 20995 - }, - { - "epoch": 10.9496740547588, - "grad_norm": 1.4444173574447632, - "learning_rate": 7.94070351758794e-05, - "loss": 4.9244, - "step": 20996 - }, - { - "epoch": 10.95019556714472, - "grad_norm": 1.5954136848449707, - "learning_rate": 7.940603015075378e-05, - "loss": 5.3825, - "step": 20997 - }, - { - "epoch": 10.950717079530639, - "grad_norm": 1.4889582395553589, - "learning_rate": 7.940502512562814e-05, - "loss": 5.1979, - "step": 20998 - }, - { - "epoch": 10.951238591916558, - "grad_norm": 1.3851956129074097, - "learning_rate": 7.940402010050252e-05, - "loss": 5.6736, - "step": 20999 - }, - { - "epoch": 10.951760104302478, - "grad_norm": 1.3554573059082031, - "learning_rate": 7.940301507537689e-05, - "loss": 5.8085, - "step": 21000 - }, - { - "epoch": 10.952281616688396, - "grad_norm": 1.399256706237793, - "learning_rate": 7.940201005025126e-05, - "loss": 5.441, - "step": 21001 - }, - { - "epoch": 10.952803129074315, - "grad_norm": 1.4235142469406128, - "learning_rate": 7.940100502512563e-05, - "loss": 5.0651, - "step": 21002 - }, - { - "epoch": 10.953324641460235, - "grad_norm": 1.4815534353256226, - "learning_rate": 7.94e-05, - "loss": 5.1548, - "step": 21003 - }, - { - "epoch": 10.953846153846154, - "grad_norm": 1.3943684101104736, - "learning_rate": 7.939899497487437e-05, - "loss": 5.4679, - "step": 21004 - }, - { - "epoch": 10.954367666232073, - "grad_norm": 1.4294825792312622, - "learning_rate": 7.939798994974875e-05, - "loss": 5.1398, - "step": 21005 - }, - { - "epoch": 10.954889178617993, - "grad_norm": 1.4298815727233887, - "learning_rate": 7.939698492462313e-05, - "loss": 5.4605, - "step": 21006 - }, - { - "epoch": 10.95541069100391, - "grad_norm": 1.507045030593872, - "learning_rate": 7.939597989949749e-05, - "loss": 5.2197, - "step": 21007 - }, - { - "epoch": 10.95593220338983, - "grad_norm": 1.3973795175552368, - "learning_rate": 7.939497487437187e-05, - "loss": 5.7279, - "step": 21008 - }, - { - "epoch": 10.95645371577575, - "grad_norm": 1.367243766784668, - "learning_rate": 7.939396984924623e-05, - "loss": 5.5702, - "step": 21009 - }, - { - "epoch": 10.956975228161669, - "grad_norm": 1.3824946880340576, - "learning_rate": 7.939296482412061e-05, - "loss": 5.4399, - "step": 21010 - }, - { - "epoch": 10.957496740547588, - "grad_norm": 1.3645747900009155, - "learning_rate": 7.939195979899497e-05, - "loss": 5.673, - "step": 21011 - }, - { - "epoch": 10.958018252933508, - "grad_norm": 1.4059242010116577, - "learning_rate": 7.939095477386935e-05, - "loss": 5.4967, - "step": 21012 - }, - { - "epoch": 10.958539765319426, - "grad_norm": 1.4147907495498657, - "learning_rate": 7.938994974874372e-05, - "loss": 5.2812, - "step": 21013 - }, - { - "epoch": 10.959061277705345, - "grad_norm": 1.4081979990005493, - "learning_rate": 7.93889447236181e-05, - "loss": 5.7041, - "step": 21014 - }, - { - "epoch": 10.959582790091265, - "grad_norm": 1.5151044130325317, - "learning_rate": 7.938793969849246e-05, - "loss": 5.2227, - "step": 21015 - }, - { - "epoch": 10.960104302477184, - "grad_norm": 1.4338107109069824, - "learning_rate": 7.938693467336684e-05, - "loss": 4.716, - "step": 21016 - }, - { - "epoch": 10.960625814863103, - "grad_norm": 1.4353386163711548, - "learning_rate": 7.938592964824121e-05, - "loss": 5.8658, - "step": 21017 - }, - { - "epoch": 10.961147327249023, - "grad_norm": 1.5235562324523926, - "learning_rate": 7.938492462311559e-05, - "loss": 5.2962, - "step": 21018 - }, - { - "epoch": 10.96166883963494, - "grad_norm": 1.4557902812957764, - "learning_rate": 7.938391959798996e-05, - "loss": 5.1995, - "step": 21019 - }, - { - "epoch": 10.96219035202086, - "grad_norm": 1.4175407886505127, - "learning_rate": 7.938291457286433e-05, - "loss": 5.8074, - "step": 21020 - }, - { - "epoch": 10.96271186440678, - "grad_norm": 1.4482581615447998, - "learning_rate": 7.93819095477387e-05, - "loss": 5.7119, - "step": 21021 - }, - { - "epoch": 10.963233376792699, - "grad_norm": 1.4694383144378662, - "learning_rate": 7.938090452261306e-05, - "loss": 5.5255, - "step": 21022 - }, - { - "epoch": 10.963754889178619, - "grad_norm": 1.3603373765945435, - "learning_rate": 7.937989949748744e-05, - "loss": 5.6891, - "step": 21023 - }, - { - "epoch": 10.964276401564538, - "grad_norm": 1.5070805549621582, - "learning_rate": 7.93788944723618e-05, - "loss": 5.3614, - "step": 21024 - }, - { - "epoch": 10.964797913950456, - "grad_norm": 1.6231030225753784, - "learning_rate": 7.937788944723618e-05, - "loss": 5.1995, - "step": 21025 - }, - { - "epoch": 10.965319426336375, - "grad_norm": 1.461775302886963, - "learning_rate": 7.937688442211056e-05, - "loss": 5.6276, - "step": 21026 - }, - { - "epoch": 10.965840938722295, - "grad_norm": 1.4600508213043213, - "learning_rate": 7.937587939698494e-05, - "loss": 5.4838, - "step": 21027 - }, - { - "epoch": 10.966362451108214, - "grad_norm": 1.3968610763549805, - "learning_rate": 7.93748743718593e-05, - "loss": 5.6328, - "step": 21028 - }, - { - "epoch": 10.966883963494134, - "grad_norm": 1.4032931327819824, - "learning_rate": 7.937386934673368e-05, - "loss": 5.7985, - "step": 21029 - }, - { - "epoch": 10.967405475880053, - "grad_norm": 1.3797416687011719, - "learning_rate": 7.937286432160804e-05, - "loss": 5.6328, - "step": 21030 - }, - { - "epoch": 10.96792698826597, - "grad_norm": 1.5559303760528564, - "learning_rate": 7.937185929648242e-05, - "loss": 4.9025, - "step": 21031 - }, - { - "epoch": 10.96844850065189, - "grad_norm": 1.435437560081482, - "learning_rate": 7.937085427135679e-05, - "loss": 5.6093, - "step": 21032 - }, - { - "epoch": 10.96897001303781, - "grad_norm": 1.4638452529907227, - "learning_rate": 7.936984924623116e-05, - "loss": 5.5519, - "step": 21033 - }, - { - "epoch": 10.969491525423729, - "grad_norm": 1.4011547565460205, - "learning_rate": 7.936884422110553e-05, - "loss": 5.3242, - "step": 21034 - }, - { - "epoch": 10.970013037809649, - "grad_norm": 1.4581902027130127, - "learning_rate": 7.936783919597989e-05, - "loss": 5.5338, - "step": 21035 - }, - { - "epoch": 10.970534550195566, - "grad_norm": 1.5235960483551025, - "learning_rate": 7.936683417085427e-05, - "loss": 5.732, - "step": 21036 - }, - { - "epoch": 10.971056062581486, - "grad_norm": 1.5757670402526855, - "learning_rate": 7.936582914572865e-05, - "loss": 4.8605, - "step": 21037 - }, - { - "epoch": 10.971577574967405, - "grad_norm": 1.5269334316253662, - "learning_rate": 7.936482412060303e-05, - "loss": 5.7846, - "step": 21038 - }, - { - "epoch": 10.972099087353325, - "grad_norm": 1.397024154663086, - "learning_rate": 7.936381909547739e-05, - "loss": 5.5091, - "step": 21039 - }, - { - "epoch": 10.972620599739244, - "grad_norm": 1.4778767824172974, - "learning_rate": 7.936281407035177e-05, - "loss": 5.8512, - "step": 21040 - }, - { - "epoch": 10.973142112125164, - "grad_norm": 1.529292106628418, - "learning_rate": 7.936180904522613e-05, - "loss": 5.4362, - "step": 21041 - }, - { - "epoch": 10.973663624511083, - "grad_norm": 1.7048566341400146, - "learning_rate": 7.936080402010051e-05, - "loss": 4.4969, - "step": 21042 - }, - { - "epoch": 10.974185136897, - "grad_norm": 1.5877914428710938, - "learning_rate": 7.935979899497487e-05, - "loss": 5.3893, - "step": 21043 - }, - { - "epoch": 10.97470664928292, - "grad_norm": 1.4953404664993286, - "learning_rate": 7.935879396984925e-05, - "loss": 5.1923, - "step": 21044 - }, - { - "epoch": 10.97522816166884, - "grad_norm": 1.6752102375030518, - "learning_rate": 7.935778894472362e-05, - "loss": 4.6655, - "step": 21045 - }, - { - "epoch": 10.975749674054759, - "grad_norm": 1.4276094436645508, - "learning_rate": 7.9356783919598e-05, - "loss": 5.4718, - "step": 21046 - }, - { - "epoch": 10.976271186440679, - "grad_norm": 1.4129613637924194, - "learning_rate": 7.935577889447237e-05, - "loss": 5.7898, - "step": 21047 - }, - { - "epoch": 10.976792698826596, - "grad_norm": 1.5442242622375488, - "learning_rate": 7.935477386934674e-05, - "loss": 5.1174, - "step": 21048 - }, - { - "epoch": 10.977314211212516, - "grad_norm": 1.4921859502792358, - "learning_rate": 7.935376884422111e-05, - "loss": 5.1287, - "step": 21049 - }, - { - "epoch": 10.977835723598435, - "grad_norm": 1.4914977550506592, - "learning_rate": 7.935276381909548e-05, - "loss": 5.4843, - "step": 21050 - }, - { - "epoch": 10.978357235984355, - "grad_norm": 1.5180046558380127, - "learning_rate": 7.935175879396986e-05, - "loss": 5.4365, - "step": 21051 - }, - { - "epoch": 10.978878748370274, - "grad_norm": 1.4861451387405396, - "learning_rate": 7.935075376884422e-05, - "loss": 5.2494, - "step": 21052 - }, - { - "epoch": 10.979400260756194, - "grad_norm": 1.3985316753387451, - "learning_rate": 7.93497487437186e-05, - "loss": 5.7148, - "step": 21053 - }, - { - "epoch": 10.979921773142113, - "grad_norm": 1.4998106956481934, - "learning_rate": 7.934874371859296e-05, - "loss": 5.1073, - "step": 21054 - }, - { - "epoch": 10.98044328552803, - "grad_norm": 1.4142663478851318, - "learning_rate": 7.934773869346734e-05, - "loss": 5.8018, - "step": 21055 - }, - { - "epoch": 10.98096479791395, - "grad_norm": 1.4890509843826294, - "learning_rate": 7.93467336683417e-05, - "loss": 5.4453, - "step": 21056 - }, - { - "epoch": 10.98148631029987, - "grad_norm": 1.448545217514038, - "learning_rate": 7.934572864321608e-05, - "loss": 5.57, - "step": 21057 - }, - { - "epoch": 10.98200782268579, - "grad_norm": 1.3900812864303589, - "learning_rate": 7.934472361809046e-05, - "loss": 5.2422, - "step": 21058 - }, - { - "epoch": 10.982529335071709, - "grad_norm": 1.734815001487732, - "learning_rate": 7.934371859296484e-05, - "loss": 5.6319, - "step": 21059 - }, - { - "epoch": 10.983050847457626, - "grad_norm": 1.5178097486495972, - "learning_rate": 7.93427135678392e-05, - "loss": 5.6084, - "step": 21060 - }, - { - "epoch": 10.983572359843546, - "grad_norm": 1.6060247421264648, - "learning_rate": 7.934170854271357e-05, - "loss": 5.059, - "step": 21061 - }, - { - "epoch": 10.984093872229465, - "grad_norm": 1.590624213218689, - "learning_rate": 7.934070351758794e-05, - "loss": 5.2193, - "step": 21062 - }, - { - "epoch": 10.984615384615385, - "grad_norm": 1.4666028022766113, - "learning_rate": 7.933969849246231e-05, - "loss": 5.7239, - "step": 21063 - }, - { - "epoch": 10.985136897001304, - "grad_norm": 1.6972624063491821, - "learning_rate": 7.933869346733669e-05, - "loss": 5.0521, - "step": 21064 - }, - { - "epoch": 10.985658409387224, - "grad_norm": 1.4879679679870605, - "learning_rate": 7.933768844221105e-05, - "loss": 5.4594, - "step": 21065 - }, - { - "epoch": 10.986179921773141, - "grad_norm": 1.3531349897384644, - "learning_rate": 7.933668341708543e-05, - "loss": 5.748, - "step": 21066 - }, - { - "epoch": 10.98670143415906, - "grad_norm": 1.5164768695831299, - "learning_rate": 7.93356783919598e-05, - "loss": 5.2773, - "step": 21067 - }, - { - "epoch": 10.98722294654498, - "grad_norm": 1.4546769857406616, - "learning_rate": 7.933467336683417e-05, - "loss": 5.3853, - "step": 21068 - }, - { - "epoch": 10.9877444589309, - "grad_norm": 1.4752851724624634, - "learning_rate": 7.933366834170855e-05, - "loss": 5.2252, - "step": 21069 - }, - { - "epoch": 10.98826597131682, - "grad_norm": 1.440137267112732, - "learning_rate": 7.933266331658293e-05, - "loss": 5.1276, - "step": 21070 - }, - { - "epoch": 10.988787483702739, - "grad_norm": 1.489648699760437, - "learning_rate": 7.933165829145729e-05, - "loss": 5.4491, - "step": 21071 - }, - { - "epoch": 10.989308996088656, - "grad_norm": 1.430466890335083, - "learning_rate": 7.933065326633167e-05, - "loss": 5.4771, - "step": 21072 - }, - { - "epoch": 10.989830508474576, - "grad_norm": 1.4354274272918701, - "learning_rate": 7.932964824120603e-05, - "loss": 5.6375, - "step": 21073 - }, - { - "epoch": 10.990352020860495, - "grad_norm": 1.333548665046692, - "learning_rate": 7.93286432160804e-05, - "loss": 5.5565, - "step": 21074 - }, - { - "epoch": 10.990873533246415, - "grad_norm": 1.406813383102417, - "learning_rate": 7.932763819095478e-05, - "loss": 5.389, - "step": 21075 - }, - { - "epoch": 10.991395045632334, - "grad_norm": 1.3964906930923462, - "learning_rate": 7.932663316582914e-05, - "loss": 5.4897, - "step": 21076 - }, - { - "epoch": 10.991916558018254, - "grad_norm": 1.6340597867965698, - "learning_rate": 7.932562814070352e-05, - "loss": 4.908, - "step": 21077 - }, - { - "epoch": 10.992438070404171, - "grad_norm": 1.451432466506958, - "learning_rate": 7.93246231155779e-05, - "loss": 5.5549, - "step": 21078 - }, - { - "epoch": 10.99295958279009, - "grad_norm": 1.4636764526367188, - "learning_rate": 7.932361809045227e-05, - "loss": 5.729, - "step": 21079 - }, - { - "epoch": 10.99348109517601, - "grad_norm": 1.3572015762329102, - "learning_rate": 7.932261306532664e-05, - "loss": 5.641, - "step": 21080 - }, - { - "epoch": 10.99400260756193, - "grad_norm": 1.3986600637435913, - "learning_rate": 7.932160804020102e-05, - "loss": 5.5881, - "step": 21081 - }, - { - "epoch": 10.99452411994785, - "grad_norm": 1.4535712003707886, - "learning_rate": 7.932060301507538e-05, - "loss": 5.7041, - "step": 21082 - }, - { - "epoch": 10.995045632333769, - "grad_norm": 1.4579298496246338, - "learning_rate": 7.931959798994976e-05, - "loss": 5.5399, - "step": 21083 - }, - { - "epoch": 10.995567144719686, - "grad_norm": 1.3273316621780396, - "learning_rate": 7.931859296482412e-05, - "loss": 5.7706, - "step": 21084 - }, - { - "epoch": 10.996088657105606, - "grad_norm": 1.5663350820541382, - "learning_rate": 7.93175879396985e-05, - "loss": 4.9125, - "step": 21085 - }, - { - "epoch": 10.996610169491525, - "grad_norm": 1.3956539630889893, - "learning_rate": 7.931658291457286e-05, - "loss": 5.7102, - "step": 21086 - }, - { - "epoch": 10.997131681877445, - "grad_norm": 1.4856353998184204, - "learning_rate": 7.931557788944723e-05, - "loss": 5.191, - "step": 21087 - }, - { - "epoch": 10.997653194263364, - "grad_norm": 1.5751895904541016, - "learning_rate": 7.93145728643216e-05, - "loss": 4.8252, - "step": 21088 - }, - { - "epoch": 10.998174706649284, - "grad_norm": 1.5057225227355957, - "learning_rate": 7.931356783919598e-05, - "loss": 5.6352, - "step": 21089 - }, - { - "epoch": 10.998696219035201, - "grad_norm": 1.3950471878051758, - "learning_rate": 7.931256281407036e-05, - "loss": 5.9779, - "step": 21090 - }, - { - "epoch": 10.99921773142112, - "grad_norm": 1.4227081537246704, - "learning_rate": 7.931155778894473e-05, - "loss": 5.5034, - "step": 21091 - }, - { - "epoch": 10.99973924380704, - "grad_norm": 1.4018871784210205, - "learning_rate": 7.93105527638191e-05, - "loss": 4.8098, - "step": 21092 - }, - { - "epoch": 11.00026075619296, - "grad_norm": 1.5166515111923218, - "learning_rate": 7.930954773869347e-05, - "loss": 5.1578, - "step": 21093 - }, - { - "epoch": 11.00078226857888, - "grad_norm": 1.328994631767273, - "learning_rate": 7.930854271356785e-05, - "loss": 5.7272, - "step": 21094 - }, - { - "epoch": 11.001303780964799, - "grad_norm": 1.3658723831176758, - "learning_rate": 7.930753768844221e-05, - "loss": 5.4617, - "step": 21095 - }, - { - "epoch": 11.001825293350716, - "grad_norm": 1.4680907726287842, - "learning_rate": 7.930653266331659e-05, - "loss": 5.0846, - "step": 21096 - }, - { - "epoch": 11.002346805736636, - "grad_norm": 1.5040714740753174, - "learning_rate": 7.930552763819095e-05, - "loss": 5.4929, - "step": 21097 - }, - { - "epoch": 11.002868318122555, - "grad_norm": 1.4263101816177368, - "learning_rate": 7.930452261306533e-05, - "loss": 5.1554, - "step": 21098 - }, - { - "epoch": 11.003389830508475, - "grad_norm": 1.4052366018295288, - "learning_rate": 7.930351758793971e-05, - "loss": 5.7564, - "step": 21099 - }, - { - "epoch": 11.003911342894394, - "grad_norm": 1.5011886358261108, - "learning_rate": 7.930251256281409e-05, - "loss": 5.4001, - "step": 21100 - }, - { - "epoch": 11.004432855280314, - "grad_norm": 1.4798582792282104, - "learning_rate": 7.930150753768845e-05, - "loss": 5.6641, - "step": 21101 - }, - { - "epoch": 11.004954367666231, - "grad_norm": 1.4804071187973022, - "learning_rate": 7.930050251256281e-05, - "loss": 5.5523, - "step": 21102 - }, - { - "epoch": 11.00547588005215, - "grad_norm": 1.586796522140503, - "learning_rate": 7.929949748743719e-05, - "loss": 5.5702, - "step": 21103 - }, - { - "epoch": 11.00599739243807, - "grad_norm": 1.487642526626587, - "learning_rate": 7.929849246231156e-05, - "loss": 5.3448, - "step": 21104 - }, - { - "epoch": 11.00651890482399, - "grad_norm": 1.5953071117401123, - "learning_rate": 7.929748743718593e-05, - "loss": 5.1455, - "step": 21105 - }, - { - "epoch": 11.00704041720991, - "grad_norm": 1.4451327323913574, - "learning_rate": 7.92964824120603e-05, - "loss": 5.666, - "step": 21106 - }, - { - "epoch": 11.007561929595829, - "grad_norm": 1.4291576147079468, - "learning_rate": 7.929547738693468e-05, - "loss": 5.567, - "step": 21107 - }, - { - "epoch": 11.008083441981746, - "grad_norm": 1.5038859844207764, - "learning_rate": 7.929447236180904e-05, - "loss": 5.6229, - "step": 21108 - }, - { - "epoch": 11.008604954367666, - "grad_norm": 1.5580543279647827, - "learning_rate": 7.929346733668342e-05, - "loss": 5.5601, - "step": 21109 - }, - { - "epoch": 11.009126466753585, - "grad_norm": 1.417738676071167, - "learning_rate": 7.92924623115578e-05, - "loss": 5.7592, - "step": 21110 - }, - { - "epoch": 11.009647979139505, - "grad_norm": 1.447232723236084, - "learning_rate": 7.929145728643217e-05, - "loss": 5.2207, - "step": 21111 - }, - { - "epoch": 11.010169491525424, - "grad_norm": 1.44540536403656, - "learning_rate": 7.929045226130654e-05, - "loss": 5.6559, - "step": 21112 - }, - { - "epoch": 11.010691003911344, - "grad_norm": 1.4783271551132202, - "learning_rate": 7.928944723618092e-05, - "loss": 5.428, - "step": 21113 - }, - { - "epoch": 11.011212516297261, - "grad_norm": 1.4941426515579224, - "learning_rate": 7.928844221105528e-05, - "loss": 5.7347, - "step": 21114 - }, - { - "epoch": 11.01173402868318, - "grad_norm": 1.5299079418182373, - "learning_rate": 7.928743718592964e-05, - "loss": 5.4626, - "step": 21115 - }, - { - "epoch": 11.0122555410691, - "grad_norm": 1.3269082307815552, - "learning_rate": 7.928643216080402e-05, - "loss": 5.4847, - "step": 21116 - }, - { - "epoch": 11.01277705345502, - "grad_norm": 1.4174437522888184, - "learning_rate": 7.928542713567839e-05, - "loss": 5.3985, - "step": 21117 - }, - { - "epoch": 11.01329856584094, - "grad_norm": 1.3714170455932617, - "learning_rate": 7.928442211055276e-05, - "loss": 5.7612, - "step": 21118 - }, - { - "epoch": 11.013820078226859, - "grad_norm": 1.3990304470062256, - "learning_rate": 7.928341708542714e-05, - "loss": 5.2894, - "step": 21119 - }, - { - "epoch": 11.014341590612776, - "grad_norm": 1.405818223953247, - "learning_rate": 7.928241206030152e-05, - "loss": 5.7779, - "step": 21120 - }, - { - "epoch": 11.014863102998696, - "grad_norm": 1.4300235509872437, - "learning_rate": 7.928140703517588e-05, - "loss": 5.6564, - "step": 21121 - }, - { - "epoch": 11.015384615384615, - "grad_norm": 1.3794262409210205, - "learning_rate": 7.928040201005026e-05, - "loss": 5.8594, - "step": 21122 - }, - { - "epoch": 11.015906127770535, - "grad_norm": 1.4532383680343628, - "learning_rate": 7.927939698492463e-05, - "loss": 5.6946, - "step": 21123 - }, - { - "epoch": 11.016427640156454, - "grad_norm": 1.3015906810760498, - "learning_rate": 7.9278391959799e-05, - "loss": 5.7648, - "step": 21124 - }, - { - "epoch": 11.016949152542374, - "grad_norm": 1.415192723274231, - "learning_rate": 7.927738693467337e-05, - "loss": 5.459, - "step": 21125 - }, - { - "epoch": 11.017470664928291, - "grad_norm": 1.470921277999878, - "learning_rate": 7.927638190954775e-05, - "loss": 4.8687, - "step": 21126 - }, - { - "epoch": 11.01799217731421, - "grad_norm": 1.551245927810669, - "learning_rate": 7.927537688442211e-05, - "loss": 5.5919, - "step": 21127 - }, - { - "epoch": 11.01851368970013, - "grad_norm": 1.8100107908248901, - "learning_rate": 7.927437185929648e-05, - "loss": 4.523, - "step": 21128 - }, - { - "epoch": 11.01903520208605, - "grad_norm": 1.4497419595718384, - "learning_rate": 7.927336683417085e-05, - "loss": 4.7015, - "step": 21129 - }, - { - "epoch": 11.01955671447197, - "grad_norm": 1.5328397750854492, - "learning_rate": 7.927236180904523e-05, - "loss": 5.3442, - "step": 21130 - }, - { - "epoch": 11.020078226857889, - "grad_norm": 1.5297495126724243, - "learning_rate": 7.927135678391961e-05, - "loss": 5.3453, - "step": 21131 - }, - { - "epoch": 11.020599739243806, - "grad_norm": 1.538192629814148, - "learning_rate": 7.927035175879397e-05, - "loss": 5.1751, - "step": 21132 - }, - { - "epoch": 11.021121251629726, - "grad_norm": 1.4744600057601929, - "learning_rate": 7.926934673366835e-05, - "loss": 5.5567, - "step": 21133 - }, - { - "epoch": 11.021642764015645, - "grad_norm": 1.418416976928711, - "learning_rate": 7.926834170854271e-05, - "loss": 5.5466, - "step": 21134 - }, - { - "epoch": 11.022164276401565, - "grad_norm": 1.598259449005127, - "learning_rate": 7.926733668341709e-05, - "loss": 5.2062, - "step": 21135 - }, - { - "epoch": 11.022685788787484, - "grad_norm": 1.3259409666061401, - "learning_rate": 7.926633165829146e-05, - "loss": 5.8078, - "step": 21136 - }, - { - "epoch": 11.023207301173404, - "grad_norm": 1.3556679487228394, - "learning_rate": 7.926532663316583e-05, - "loss": 5.3739, - "step": 21137 - }, - { - "epoch": 11.023728813559321, - "grad_norm": 1.4929907321929932, - "learning_rate": 7.92643216080402e-05, - "loss": 5.6128, - "step": 21138 - }, - { - "epoch": 11.024250325945241, - "grad_norm": 1.5007375478744507, - "learning_rate": 7.926331658291458e-05, - "loss": 5.2952, - "step": 21139 - }, - { - "epoch": 11.02477183833116, - "grad_norm": 1.5095196962356567, - "learning_rate": 7.926231155778895e-05, - "loss": 5.3569, - "step": 21140 - }, - { - "epoch": 11.02529335071708, - "grad_norm": 1.3737893104553223, - "learning_rate": 7.926130653266332e-05, - "loss": 5.691, - "step": 21141 - }, - { - "epoch": 11.025814863103, - "grad_norm": 1.4987726211547852, - "learning_rate": 7.92603015075377e-05, - "loss": 5.2381, - "step": 21142 - }, - { - "epoch": 11.026336375488917, - "grad_norm": 1.4082118272781372, - "learning_rate": 7.925929648241206e-05, - "loss": 5.5554, - "step": 21143 - }, - { - "epoch": 11.026857887874836, - "grad_norm": 1.521992802619934, - "learning_rate": 7.925829145728644e-05, - "loss": 5.118, - "step": 21144 - }, - { - "epoch": 11.027379400260756, - "grad_norm": 1.3802266120910645, - "learning_rate": 7.92572864321608e-05, - "loss": 5.832, - "step": 21145 - }, - { - "epoch": 11.027900912646675, - "grad_norm": 1.4444068670272827, - "learning_rate": 7.925628140703518e-05, - "loss": 5.5427, - "step": 21146 - }, - { - "epoch": 11.028422425032595, - "grad_norm": 1.423485517501831, - "learning_rate": 7.925527638190955e-05, - "loss": 5.219, - "step": 21147 - }, - { - "epoch": 11.028943937418514, - "grad_norm": 1.3630317449569702, - "learning_rate": 7.925427135678392e-05, - "loss": 5.9402, - "step": 21148 - }, - { - "epoch": 11.029465449804432, - "grad_norm": 1.399409532546997, - "learning_rate": 7.925326633165829e-05, - "loss": 5.5909, - "step": 21149 - }, - { - "epoch": 11.029986962190351, - "grad_norm": 1.4210819005966187, - "learning_rate": 7.925226130653267e-05, - "loss": 5.4409, - "step": 21150 - }, - { - "epoch": 11.030508474576271, - "grad_norm": 1.4736242294311523, - "learning_rate": 7.925125628140704e-05, - "loss": 5.529, - "step": 21151 - }, - { - "epoch": 11.03102998696219, - "grad_norm": 1.4509730339050293, - "learning_rate": 7.925025125628142e-05, - "loss": 5.18, - "step": 21152 - }, - { - "epoch": 11.03155149934811, - "grad_norm": 1.457413911819458, - "learning_rate": 7.924924623115579e-05, - "loss": 5.6129, - "step": 21153 - }, - { - "epoch": 11.03207301173403, - "grad_norm": 1.4118088483810425, - "learning_rate": 7.924824120603015e-05, - "loss": 5.4667, - "step": 21154 - }, - { - "epoch": 11.032594524119947, - "grad_norm": 1.4206299781799316, - "learning_rate": 7.924723618090453e-05, - "loss": 5.4768, - "step": 21155 - }, - { - "epoch": 11.033116036505866, - "grad_norm": 1.4658806324005127, - "learning_rate": 7.924623115577889e-05, - "loss": 5.2773, - "step": 21156 - }, - { - "epoch": 11.033637548891786, - "grad_norm": 1.4071263074874878, - "learning_rate": 7.924522613065327e-05, - "loss": 5.3763, - "step": 21157 - }, - { - "epoch": 11.034159061277705, - "grad_norm": 1.4719984531402588, - "learning_rate": 7.924422110552763e-05, - "loss": 5.766, - "step": 21158 - }, - { - "epoch": 11.034680573663625, - "grad_norm": 1.4947247505187988, - "learning_rate": 7.924321608040201e-05, - "loss": 5.366, - "step": 21159 - }, - { - "epoch": 11.035202086049544, - "grad_norm": 1.3157250881195068, - "learning_rate": 7.924221105527639e-05, - "loss": 5.2314, - "step": 21160 - }, - { - "epoch": 11.035723598435462, - "grad_norm": 1.423624873161316, - "learning_rate": 7.924120603015077e-05, - "loss": 5.4141, - "step": 21161 - }, - { - "epoch": 11.036245110821381, - "grad_norm": 1.4969482421875, - "learning_rate": 7.924020100502513e-05, - "loss": 5.1185, - "step": 21162 - }, - { - "epoch": 11.036766623207301, - "grad_norm": 1.5351263284683228, - "learning_rate": 7.923919597989951e-05, - "loss": 5.0627, - "step": 21163 - }, - { - "epoch": 11.03728813559322, - "grad_norm": 1.4909899234771729, - "learning_rate": 7.923819095477387e-05, - "loss": 5.0985, - "step": 21164 - }, - { - "epoch": 11.03780964797914, - "grad_norm": 1.5653775930404663, - "learning_rate": 7.923718592964825e-05, - "loss": 5.1166, - "step": 21165 - }, - { - "epoch": 11.03833116036506, - "grad_norm": 1.464033603668213, - "learning_rate": 7.923618090452262e-05, - "loss": 5.4518, - "step": 21166 - }, - { - "epoch": 11.038852672750977, - "grad_norm": 1.57407808303833, - "learning_rate": 7.923517587939698e-05, - "loss": 5.3287, - "step": 21167 - }, - { - "epoch": 11.039374185136897, - "grad_norm": 1.991546630859375, - "learning_rate": 7.923417085427136e-05, - "loss": 4.8421, - "step": 21168 - }, - { - "epoch": 11.039895697522816, - "grad_norm": 1.4552110433578491, - "learning_rate": 7.923316582914572e-05, - "loss": 4.7238, - "step": 21169 - }, - { - "epoch": 11.040417209908735, - "grad_norm": 1.4475016593933105, - "learning_rate": 7.92321608040201e-05, - "loss": 5.2892, - "step": 21170 - }, - { - "epoch": 11.040938722294655, - "grad_norm": 1.3825756311416626, - "learning_rate": 7.923115577889448e-05, - "loss": 5.6258, - "step": 21171 - }, - { - "epoch": 11.041460234680574, - "grad_norm": 1.5588867664337158, - "learning_rate": 7.923015075376886e-05, - "loss": 5.5649, - "step": 21172 - }, - { - "epoch": 11.041981747066492, - "grad_norm": 1.4414454698562622, - "learning_rate": 7.922914572864322e-05, - "loss": 5.5498, - "step": 21173 - }, - { - "epoch": 11.042503259452412, - "grad_norm": 1.3351343870162964, - "learning_rate": 7.92281407035176e-05, - "loss": 4.9711, - "step": 21174 - }, - { - "epoch": 11.043024771838331, - "grad_norm": 1.4149149656295776, - "learning_rate": 7.922713567839196e-05, - "loss": 5.406, - "step": 21175 - }, - { - "epoch": 11.04354628422425, - "grad_norm": 1.468466877937317, - "learning_rate": 7.922613065326634e-05, - "loss": 5.0924, - "step": 21176 - }, - { - "epoch": 11.04406779661017, - "grad_norm": 1.4128668308258057, - "learning_rate": 7.92251256281407e-05, - "loss": 5.6233, - "step": 21177 - }, - { - "epoch": 11.04458930899609, - "grad_norm": 1.3970285654067993, - "learning_rate": 7.922412060301508e-05, - "loss": 5.7635, - "step": 21178 - }, - { - "epoch": 11.045110821382007, - "grad_norm": 1.3432559967041016, - "learning_rate": 7.922311557788945e-05, - "loss": 5.7469, - "step": 21179 - }, - { - "epoch": 11.045632333767927, - "grad_norm": 1.3531700372695923, - "learning_rate": 7.922211055276382e-05, - "loss": 5.8838, - "step": 21180 - }, - { - "epoch": 11.046153846153846, - "grad_norm": 1.4331624507904053, - "learning_rate": 7.92211055276382e-05, - "loss": 5.5378, - "step": 21181 - }, - { - "epoch": 11.046675358539765, - "grad_norm": 1.4573742151260376, - "learning_rate": 7.922010050251257e-05, - "loss": 4.933, - "step": 21182 - }, - { - "epoch": 11.047196870925685, - "grad_norm": 1.6009808778762817, - "learning_rate": 7.921909547738694e-05, - "loss": 5.2543, - "step": 21183 - }, - { - "epoch": 11.047718383311604, - "grad_norm": 1.501879334449768, - "learning_rate": 7.921809045226131e-05, - "loss": 5.1047, - "step": 21184 - }, - { - "epoch": 11.048239895697522, - "grad_norm": 1.4740437269210815, - "learning_rate": 7.921708542713569e-05, - "loss": 5.4539, - "step": 21185 - }, - { - "epoch": 11.048761408083442, - "grad_norm": 1.4400067329406738, - "learning_rate": 7.921608040201005e-05, - "loss": 5.2321, - "step": 21186 - }, - { - "epoch": 11.049282920469361, - "grad_norm": 1.480890154838562, - "learning_rate": 7.921507537688443e-05, - "loss": 5.4579, - "step": 21187 - }, - { - "epoch": 11.04980443285528, - "grad_norm": 1.4505083560943604, - "learning_rate": 7.921407035175879e-05, - "loss": 5.7324, - "step": 21188 - }, - { - "epoch": 11.0503259452412, - "grad_norm": 1.4105050563812256, - "learning_rate": 7.921306532663317e-05, - "loss": 5.5168, - "step": 21189 - }, - { - "epoch": 11.05084745762712, - "grad_norm": 1.380867600440979, - "learning_rate": 7.921206030150753e-05, - "loss": 5.6185, - "step": 21190 - }, - { - "epoch": 11.051368970013037, - "grad_norm": 1.4515094757080078, - "learning_rate": 7.921105527638191e-05, - "loss": 4.6706, - "step": 21191 - }, - { - "epoch": 11.051890482398957, - "grad_norm": 1.6026372909545898, - "learning_rate": 7.921005025125629e-05, - "loss": 5.161, - "step": 21192 - }, - { - "epoch": 11.052411994784876, - "grad_norm": 1.2773542404174805, - "learning_rate": 7.920904522613067e-05, - "loss": 5.496, - "step": 21193 - }, - { - "epoch": 11.052933507170795, - "grad_norm": 1.406302809715271, - "learning_rate": 7.920804020100503e-05, - "loss": 5.3697, - "step": 21194 - }, - { - "epoch": 11.053455019556715, - "grad_norm": 1.4720832109451294, - "learning_rate": 7.92070351758794e-05, - "loss": 5.3428, - "step": 21195 - }, - { - "epoch": 11.053976531942634, - "grad_norm": 1.4226127862930298, - "learning_rate": 7.920603015075377e-05, - "loss": 5.5224, - "step": 21196 - }, - { - "epoch": 11.054498044328552, - "grad_norm": 1.5327823162078857, - "learning_rate": 7.920502512562814e-05, - "loss": 5.4728, - "step": 21197 - }, - { - "epoch": 11.055019556714472, - "grad_norm": 1.4475775957107544, - "learning_rate": 7.920402010050252e-05, - "loss": 5.5179, - "step": 21198 - }, - { - "epoch": 11.055541069100391, - "grad_norm": 1.3338887691497803, - "learning_rate": 7.920301507537688e-05, - "loss": 5.6187, - "step": 21199 - }, - { - "epoch": 11.05606258148631, - "grad_norm": 1.509534239768982, - "learning_rate": 7.920201005025126e-05, - "loss": 4.5805, - "step": 21200 - }, - { - "epoch": 11.05658409387223, - "grad_norm": 1.4591636657714844, - "learning_rate": 7.920100502512564e-05, - "loss": 5.7511, - "step": 21201 - }, - { - "epoch": 11.05710560625815, - "grad_norm": 1.8478814363479614, - "learning_rate": 7.920000000000001e-05, - "loss": 4.8851, - "step": 21202 - }, - { - "epoch": 11.057627118644067, - "grad_norm": 1.426405668258667, - "learning_rate": 7.919899497487438e-05, - "loss": 5.5841, - "step": 21203 - }, - { - "epoch": 11.058148631029987, - "grad_norm": 1.3223819732666016, - "learning_rate": 7.919798994974876e-05, - "loss": 5.8779, - "step": 21204 - }, - { - "epoch": 11.058670143415906, - "grad_norm": 1.4365882873535156, - "learning_rate": 7.919698492462312e-05, - "loss": 5.2282, - "step": 21205 - }, - { - "epoch": 11.059191655801826, - "grad_norm": 1.5225387811660767, - "learning_rate": 7.91959798994975e-05, - "loss": 5.3887, - "step": 21206 - }, - { - "epoch": 11.059713168187745, - "grad_norm": 1.3905385732650757, - "learning_rate": 7.919497487437186e-05, - "loss": 5.586, - "step": 21207 - }, - { - "epoch": 11.060234680573664, - "grad_norm": 1.4842307567596436, - "learning_rate": 7.919396984924623e-05, - "loss": 5.4017, - "step": 21208 - }, - { - "epoch": 11.060756192959582, - "grad_norm": 1.4051399230957031, - "learning_rate": 7.91929648241206e-05, - "loss": 5.6332, - "step": 21209 - }, - { - "epoch": 11.061277705345502, - "grad_norm": 1.8363021612167358, - "learning_rate": 7.919195979899497e-05, - "loss": 5.0921, - "step": 21210 - }, - { - "epoch": 11.061799217731421, - "grad_norm": 1.4234111309051514, - "learning_rate": 7.919095477386935e-05, - "loss": 5.8824, - "step": 21211 - }, - { - "epoch": 11.06232073011734, - "grad_norm": 1.4245593547821045, - "learning_rate": 7.918994974874372e-05, - "loss": 5.4575, - "step": 21212 - }, - { - "epoch": 11.06284224250326, - "grad_norm": 1.4988527297973633, - "learning_rate": 7.91889447236181e-05, - "loss": 5.806, - "step": 21213 - }, - { - "epoch": 11.06336375488918, - "grad_norm": 1.5243198871612549, - "learning_rate": 7.918793969849247e-05, - "loss": 5.4911, - "step": 21214 - }, - { - "epoch": 11.063885267275097, - "grad_norm": 1.5343937873840332, - "learning_rate": 7.918693467336684e-05, - "loss": 5.4117, - "step": 21215 - }, - { - "epoch": 11.064406779661017, - "grad_norm": 1.3255337476730347, - "learning_rate": 7.918592964824121e-05, - "loss": 5.7498, - "step": 21216 - }, - { - "epoch": 11.064928292046936, - "grad_norm": 1.4247322082519531, - "learning_rate": 7.918492462311559e-05, - "loss": 5.6449, - "step": 21217 - }, - { - "epoch": 11.065449804432856, - "grad_norm": 1.4279701709747314, - "learning_rate": 7.918391959798995e-05, - "loss": 5.4121, - "step": 21218 - }, - { - "epoch": 11.065971316818775, - "grad_norm": 1.4766770601272583, - "learning_rate": 7.918291457286433e-05, - "loss": 5.0202, - "step": 21219 - }, - { - "epoch": 11.066492829204694, - "grad_norm": 1.39676034450531, - "learning_rate": 7.918190954773869e-05, - "loss": 5.6075, - "step": 21220 - }, - { - "epoch": 11.067014341590612, - "grad_norm": 1.4796735048294067, - "learning_rate": 7.918090452261306e-05, - "loss": 5.5205, - "step": 21221 - }, - { - "epoch": 11.067535853976532, - "grad_norm": 1.3716721534729004, - "learning_rate": 7.917989949748744e-05, - "loss": 5.6054, - "step": 21222 - }, - { - "epoch": 11.068057366362451, - "grad_norm": 1.4955058097839355, - "learning_rate": 7.917889447236181e-05, - "loss": 5.146, - "step": 21223 - }, - { - "epoch": 11.06857887874837, - "grad_norm": 1.4187158346176147, - "learning_rate": 7.917788944723619e-05, - "loss": 5.6838, - "step": 21224 - }, - { - "epoch": 11.06910039113429, - "grad_norm": 1.4310904741287231, - "learning_rate": 7.917688442211056e-05, - "loss": 5.0359, - "step": 21225 - }, - { - "epoch": 11.06962190352021, - "grad_norm": 1.4314510822296143, - "learning_rate": 7.917587939698493e-05, - "loss": 5.334, - "step": 21226 - }, - { - "epoch": 11.070143415906127, - "grad_norm": 1.4251179695129395, - "learning_rate": 7.91748743718593e-05, - "loss": 5.3107, - "step": 21227 - }, - { - "epoch": 11.070664928292047, - "grad_norm": 1.4709123373031616, - "learning_rate": 7.917386934673368e-05, - "loss": 5.4979, - "step": 21228 - }, - { - "epoch": 11.071186440677966, - "grad_norm": 1.3267229795455933, - "learning_rate": 7.917286432160804e-05, - "loss": 5.6282, - "step": 21229 - }, - { - "epoch": 11.071707953063886, - "grad_norm": 1.5685628652572632, - "learning_rate": 7.917185929648242e-05, - "loss": 5.1599, - "step": 21230 - }, - { - "epoch": 11.072229465449805, - "grad_norm": 1.4914478063583374, - "learning_rate": 7.917085427135678e-05, - "loss": 5.2843, - "step": 21231 - }, - { - "epoch": 11.072750977835724, - "grad_norm": 1.3930107355117798, - "learning_rate": 7.916984924623116e-05, - "loss": 5.4282, - "step": 21232 - }, - { - "epoch": 11.073272490221642, - "grad_norm": 1.4599641561508179, - "learning_rate": 7.916884422110554e-05, - "loss": 5.5887, - "step": 21233 - }, - { - "epoch": 11.073794002607562, - "grad_norm": 1.371597409248352, - "learning_rate": 7.91678391959799e-05, - "loss": 5.7628, - "step": 21234 - }, - { - "epoch": 11.074315514993481, - "grad_norm": 1.5411545038223267, - "learning_rate": 7.916683417085428e-05, - "loss": 5.4043, - "step": 21235 - }, - { - "epoch": 11.0748370273794, - "grad_norm": 1.5288842916488647, - "learning_rate": 7.916582914572864e-05, - "loss": 5.2011, - "step": 21236 - }, - { - "epoch": 11.07535853976532, - "grad_norm": 1.4874614477157593, - "learning_rate": 7.916482412060302e-05, - "loss": 5.4894, - "step": 21237 - }, - { - "epoch": 11.075880052151238, - "grad_norm": 1.490326166152954, - "learning_rate": 7.916381909547739e-05, - "loss": 5.4642, - "step": 21238 - }, - { - "epoch": 11.076401564537157, - "grad_norm": 1.5152308940887451, - "learning_rate": 7.916281407035176e-05, - "loss": 5.7777, - "step": 21239 - }, - { - "epoch": 11.076923076923077, - "grad_norm": 1.615773320198059, - "learning_rate": 7.916180904522613e-05, - "loss": 5.1504, - "step": 21240 - }, - { - "epoch": 11.077444589308996, - "grad_norm": 1.4543851613998413, - "learning_rate": 7.91608040201005e-05, - "loss": 5.8302, - "step": 21241 - }, - { - "epoch": 11.077966101694916, - "grad_norm": 1.4628264904022217, - "learning_rate": 7.915979899497487e-05, - "loss": 5.2132, - "step": 21242 - }, - { - "epoch": 11.078487614080835, - "grad_norm": 1.3789745569229126, - "learning_rate": 7.915879396984925e-05, - "loss": 5.7544, - "step": 21243 - }, - { - "epoch": 11.079009126466753, - "grad_norm": 1.4564965963363647, - "learning_rate": 7.915778894472363e-05, - "loss": 5.69, - "step": 21244 - }, - { - "epoch": 11.079530638852672, - "grad_norm": 1.3901844024658203, - "learning_rate": 7.9156783919598e-05, - "loss": 5.565, - "step": 21245 - }, - { - "epoch": 11.080052151238592, - "grad_norm": 1.4455740451812744, - "learning_rate": 7.915577889447237e-05, - "loss": 5.2531, - "step": 21246 - }, - { - "epoch": 11.080573663624511, - "grad_norm": 1.4660471677780151, - "learning_rate": 7.915477386934673e-05, - "loss": 5.6869, - "step": 21247 - }, - { - "epoch": 11.08109517601043, - "grad_norm": 1.5206879377365112, - "learning_rate": 7.915376884422111e-05, - "loss": 5.5161, - "step": 21248 - }, - { - "epoch": 11.08161668839635, - "grad_norm": 1.5010071992874146, - "learning_rate": 7.915276381909547e-05, - "loss": 5.0025, - "step": 21249 - }, - { - "epoch": 11.082138200782268, - "grad_norm": 1.6146413087844849, - "learning_rate": 7.915175879396985e-05, - "loss": 4.8091, - "step": 21250 - }, - { - "epoch": 11.082659713168187, - "grad_norm": 1.3685529232025146, - "learning_rate": 7.915075376884422e-05, - "loss": 5.6754, - "step": 21251 - }, - { - "epoch": 11.083181225554107, - "grad_norm": 1.4532442092895508, - "learning_rate": 7.91497487437186e-05, - "loss": 5.7915, - "step": 21252 - }, - { - "epoch": 11.083702737940026, - "grad_norm": 1.593357801437378, - "learning_rate": 7.914874371859297e-05, - "loss": 5.396, - "step": 21253 - }, - { - "epoch": 11.084224250325946, - "grad_norm": 1.6555933952331543, - "learning_rate": 7.914773869346735e-05, - "loss": 5.0878, - "step": 21254 - }, - { - "epoch": 11.084745762711865, - "grad_norm": 1.5196130275726318, - "learning_rate": 7.914673366834171e-05, - "loss": 5.2046, - "step": 21255 - }, - { - "epoch": 11.085267275097783, - "grad_norm": 1.4507348537445068, - "learning_rate": 7.914572864321609e-05, - "loss": 5.4953, - "step": 21256 - }, - { - "epoch": 11.085788787483702, - "grad_norm": 1.4180045127868652, - "learning_rate": 7.914472361809046e-05, - "loss": 5.8469, - "step": 21257 - }, - { - "epoch": 11.086310299869622, - "grad_norm": 1.43502938747406, - "learning_rate": 7.914371859296483e-05, - "loss": 5.6067, - "step": 21258 - }, - { - "epoch": 11.086831812255541, - "grad_norm": 1.4701634645462036, - "learning_rate": 7.91427135678392e-05, - "loss": 5.6349, - "step": 21259 - }, - { - "epoch": 11.08735332464146, - "grad_norm": 1.4130322933197021, - "learning_rate": 7.914170854271356e-05, - "loss": 5.4914, - "step": 21260 - }, - { - "epoch": 11.08787483702738, - "grad_norm": 1.395987868309021, - "learning_rate": 7.914070351758794e-05, - "loss": 5.2964, - "step": 21261 - }, - { - "epoch": 11.088396349413298, - "grad_norm": Infinity, - "learning_rate": 7.914070351758794e-05, - "loss": 5.7257, - "step": 21262 - }, - { - "epoch": 11.088917861799217, - "grad_norm": 1.4860707521438599, - "learning_rate": 7.91396984924623e-05, - "loss": 5.3845, - "step": 21263 - }, - { - "epoch": 11.089439374185137, - "grad_norm": 1.5197169780731201, - "learning_rate": 7.913869346733668e-05, - "loss": 5.0208, - "step": 21264 - }, - { - "epoch": 11.089960886571056, - "grad_norm": 1.5625925064086914, - "learning_rate": 7.913768844221106e-05, - "loss": 5.0949, - "step": 21265 - }, - { - "epoch": 11.090482398956976, - "grad_norm": 1.3764362335205078, - "learning_rate": 7.913668341708544e-05, - "loss": 5.2299, - "step": 21266 - }, - { - "epoch": 11.091003911342895, - "grad_norm": 1.4350699186325073, - "learning_rate": 7.91356783919598e-05, - "loss": 5.6264, - "step": 21267 - }, - { - "epoch": 11.091525423728813, - "grad_norm": 1.4505139589309692, - "learning_rate": 7.913467336683418e-05, - "loss": 5.5128, - "step": 21268 - }, - { - "epoch": 11.092046936114732, - "grad_norm": 1.4239321947097778, - "learning_rate": 7.913366834170854e-05, - "loss": 5.2717, - "step": 21269 - }, - { - "epoch": 11.092568448500652, - "grad_norm": 1.415115475654602, - "learning_rate": 7.913266331658292e-05, - "loss": 5.6871, - "step": 21270 - }, - { - "epoch": 11.093089960886571, - "grad_norm": 1.498635172843933, - "learning_rate": 7.913165829145729e-05, - "loss": 5.6594, - "step": 21271 - }, - { - "epoch": 11.09361147327249, - "grad_norm": 1.5114023685455322, - "learning_rate": 7.913065326633166e-05, - "loss": 5.5052, - "step": 21272 - }, - { - "epoch": 11.09413298565841, - "grad_norm": 1.498643159866333, - "learning_rate": 7.912964824120603e-05, - "loss": 5.2302, - "step": 21273 - }, - { - "epoch": 11.094654498044328, - "grad_norm": 1.4168274402618408, - "learning_rate": 7.91286432160804e-05, - "loss": 6.0205, - "step": 21274 - }, - { - "epoch": 11.095176010430247, - "grad_norm": 1.630904197692871, - "learning_rate": 7.912763819095478e-05, - "loss": 5.2637, - "step": 21275 - }, - { - "epoch": 11.095697522816167, - "grad_norm": 1.3958061933517456, - "learning_rate": 7.912663316582915e-05, - "loss": 5.6839, - "step": 21276 - }, - { - "epoch": 11.096219035202086, - "grad_norm": 1.3279367685317993, - "learning_rate": 7.912562814070353e-05, - "loss": 5.8795, - "step": 21277 - }, - { - "epoch": 11.096740547588006, - "grad_norm": 1.402670979499817, - "learning_rate": 7.912462311557789e-05, - "loss": 5.4999, - "step": 21278 - }, - { - "epoch": 11.097262059973925, - "grad_norm": 1.435163974761963, - "learning_rate": 7.912361809045227e-05, - "loss": 5.592, - "step": 21279 - }, - { - "epoch": 11.097783572359843, - "grad_norm": 1.4810028076171875, - "learning_rate": 7.912261306532663e-05, - "loss": 5.2114, - "step": 21280 - }, - { - "epoch": 11.098305084745762, - "grad_norm": 1.4576942920684814, - "learning_rate": 7.912160804020101e-05, - "loss": 5.1535, - "step": 21281 - }, - { - "epoch": 11.098826597131682, - "grad_norm": 1.4434548616409302, - "learning_rate": 7.912060301507537e-05, - "loss": 5.3347, - "step": 21282 - }, - { - "epoch": 11.099348109517601, - "grad_norm": 1.5359630584716797, - "learning_rate": 7.911959798994975e-05, - "loss": 5.112, - "step": 21283 - }, - { - "epoch": 11.09986962190352, - "grad_norm": 1.468384027481079, - "learning_rate": 7.911859296482412e-05, - "loss": 5.5106, - "step": 21284 - }, - { - "epoch": 11.10039113428944, - "grad_norm": 1.4887703657150269, - "learning_rate": 7.91175879396985e-05, - "loss": 5.2821, - "step": 21285 - }, - { - "epoch": 11.100912646675358, - "grad_norm": 1.3913615942001343, - "learning_rate": 7.911658291457287e-05, - "loss": 5.1712, - "step": 21286 - }, - { - "epoch": 11.101434159061277, - "grad_norm": 1.420173168182373, - "learning_rate": 7.911557788944725e-05, - "loss": 5.4368, - "step": 21287 - }, - { - "epoch": 11.101955671447197, - "grad_norm": 1.4927680492401123, - "learning_rate": 7.911457286432161e-05, - "loss": 5.4484, - "step": 21288 - }, - { - "epoch": 11.102477183833116, - "grad_norm": 1.4720066785812378, - "learning_rate": 7.911356783919598e-05, - "loss": 5.7663, - "step": 21289 - }, - { - "epoch": 11.102998696219036, - "grad_norm": 1.4533277750015259, - "learning_rate": 7.911256281407036e-05, - "loss": 5.7769, - "step": 21290 - }, - { - "epoch": 11.103520208604955, - "grad_norm": 1.5876013040542603, - "learning_rate": 7.911155778894472e-05, - "loss": 5.1546, - "step": 21291 - }, - { - "epoch": 11.104041720990873, - "grad_norm": 1.37773859500885, - "learning_rate": 7.91105527638191e-05, - "loss": 5.5711, - "step": 21292 - }, - { - "epoch": 11.104563233376792, - "grad_norm": 1.3498377799987793, - "learning_rate": 7.910954773869346e-05, - "loss": 5.5363, - "step": 21293 - }, - { - "epoch": 11.105084745762712, - "grad_norm": 1.4859713315963745, - "learning_rate": 7.910854271356784e-05, - "loss": 5.3093, - "step": 21294 - }, - { - "epoch": 11.105606258148631, - "grad_norm": 1.4717473983764648, - "learning_rate": 7.910753768844222e-05, - "loss": 5.471, - "step": 21295 - }, - { - "epoch": 11.10612777053455, - "grad_norm": 1.4986631870269775, - "learning_rate": 7.91065326633166e-05, - "loss": 5.6772, - "step": 21296 - }, - { - "epoch": 11.10664928292047, - "grad_norm": 1.4816921949386597, - "learning_rate": 7.910552763819096e-05, - "loss": 5.316, - "step": 21297 - }, - { - "epoch": 11.107170795306388, - "grad_norm": 1.5816766023635864, - "learning_rate": 7.910452261306534e-05, - "loss": 5.5382, - "step": 21298 - }, - { - "epoch": 11.107692307692307, - "grad_norm": 1.542664885520935, - "learning_rate": 7.91035175879397e-05, - "loss": 5.1647, - "step": 21299 - }, - { - "epoch": 11.108213820078227, - "grad_norm": 1.304136037826538, - "learning_rate": 7.910251256281408e-05, - "loss": 5.3134, - "step": 21300 - }, - { - "epoch": 11.108735332464146, - "grad_norm": 1.442695140838623, - "learning_rate": 7.910150753768845e-05, - "loss": 5.3785, - "step": 21301 - }, - { - "epoch": 11.109256844850066, - "grad_norm": 1.4418717622756958, - "learning_rate": 7.910050251256281e-05, - "loss": 5.2745, - "step": 21302 - }, - { - "epoch": 11.109778357235985, - "grad_norm": 1.3686498403549194, - "learning_rate": 7.909949748743719e-05, - "loss": 5.9698, - "step": 21303 - }, - { - "epoch": 11.110299869621903, - "grad_norm": 1.3885929584503174, - "learning_rate": 7.909849246231155e-05, - "loss": 5.8157, - "step": 21304 - }, - { - "epoch": 11.110821382007822, - "grad_norm": 1.5441056489944458, - "learning_rate": 7.909748743718593e-05, - "loss": 5.0413, - "step": 21305 - }, - { - "epoch": 11.111342894393742, - "grad_norm": 1.4308533668518066, - "learning_rate": 7.909648241206031e-05, - "loss": 5.5623, - "step": 21306 - }, - { - "epoch": 11.111864406779661, - "grad_norm": 1.555012822151184, - "learning_rate": 7.909547738693468e-05, - "loss": 5.2997, - "step": 21307 - }, - { - "epoch": 11.11238591916558, - "grad_norm": 1.4833509922027588, - "learning_rate": 7.909447236180905e-05, - "loss": 5.687, - "step": 21308 - }, - { - "epoch": 11.1129074315515, - "grad_norm": 1.4388275146484375, - "learning_rate": 7.909346733668343e-05, - "loss": 5.6397, - "step": 21309 - }, - { - "epoch": 11.113428943937418, - "grad_norm": 1.5124293565750122, - "learning_rate": 7.909246231155779e-05, - "loss": 5.5139, - "step": 21310 - }, - { - "epoch": 11.113950456323337, - "grad_norm": 1.4840890169143677, - "learning_rate": 7.909145728643217e-05, - "loss": 5.2564, - "step": 21311 - }, - { - "epoch": 11.114471968709257, - "grad_norm": 1.3972749710083008, - "learning_rate": 7.909045226130653e-05, - "loss": 5.557, - "step": 21312 - }, - { - "epoch": 11.114993481095176, - "grad_norm": 1.4226508140563965, - "learning_rate": 7.908944723618091e-05, - "loss": 5.4972, - "step": 21313 - }, - { - "epoch": 11.115514993481096, - "grad_norm": 1.5256398916244507, - "learning_rate": 7.908844221105528e-05, - "loss": 5.5025, - "step": 21314 - }, - { - "epoch": 11.116036505867015, - "grad_norm": 1.3962414264678955, - "learning_rate": 7.908743718592965e-05, - "loss": 5.6824, - "step": 21315 - }, - { - "epoch": 11.116558018252933, - "grad_norm": 1.4412841796875, - "learning_rate": 7.908643216080403e-05, - "loss": 5.4293, - "step": 21316 - }, - { - "epoch": 11.117079530638852, - "grad_norm": 1.4183921813964844, - "learning_rate": 7.90854271356784e-05, - "loss": 5.1309, - "step": 21317 - }, - { - "epoch": 11.117601043024772, - "grad_norm": 1.4119958877563477, - "learning_rate": 7.908442211055277e-05, - "loss": 5.4706, - "step": 21318 - }, - { - "epoch": 11.118122555410691, - "grad_norm": 1.4091073274612427, - "learning_rate": 7.908341708542714e-05, - "loss": 5.0468, - "step": 21319 - }, - { - "epoch": 11.11864406779661, - "grad_norm": 1.403618574142456, - "learning_rate": 7.908241206030152e-05, - "loss": 5.1639, - "step": 21320 - }, - { - "epoch": 11.11916558018253, - "grad_norm": 1.4982681274414062, - "learning_rate": 7.908140703517588e-05, - "loss": 5.0411, - "step": 21321 - }, - { - "epoch": 11.119687092568448, - "grad_norm": 1.6579148769378662, - "learning_rate": 7.908040201005026e-05, - "loss": 5.2419, - "step": 21322 - }, - { - "epoch": 11.120208604954367, - "grad_norm": 1.4114866256713867, - "learning_rate": 7.907939698492462e-05, - "loss": 5.1317, - "step": 21323 - }, - { - "epoch": 11.120730117340287, - "grad_norm": 1.4738303422927856, - "learning_rate": 7.9078391959799e-05, - "loss": 5.2142, - "step": 21324 - }, - { - "epoch": 11.121251629726206, - "grad_norm": 1.3809763193130493, - "learning_rate": 7.907738693467336e-05, - "loss": 5.7776, - "step": 21325 - }, - { - "epoch": 11.121773142112126, - "grad_norm": 1.45130455493927, - "learning_rate": 7.907638190954774e-05, - "loss": 5.1694, - "step": 21326 - }, - { - "epoch": 11.122294654498045, - "grad_norm": 1.4348806142807007, - "learning_rate": 7.907537688442212e-05, - "loss": 5.5582, - "step": 21327 - }, - { - "epoch": 11.122816166883963, - "grad_norm": 1.4996081590652466, - "learning_rate": 7.907437185929648e-05, - "loss": 5.4694, - "step": 21328 - }, - { - "epoch": 11.123337679269882, - "grad_norm": 1.5494446754455566, - "learning_rate": 7.907336683417086e-05, - "loss": 5.4363, - "step": 21329 - }, - { - "epoch": 11.123859191655802, - "grad_norm": 1.4614343643188477, - "learning_rate": 7.907236180904523e-05, - "loss": 5.332, - "step": 21330 - }, - { - "epoch": 11.124380704041721, - "grad_norm": 1.3817330598831177, - "learning_rate": 7.90713567839196e-05, - "loss": 5.505, - "step": 21331 - }, - { - "epoch": 11.12490221642764, - "grad_norm": 1.4325079917907715, - "learning_rate": 7.907035175879397e-05, - "loss": 5.1667, - "step": 21332 - }, - { - "epoch": 11.125423728813558, - "grad_norm": 1.3807177543640137, - "learning_rate": 7.906934673366835e-05, - "loss": 5.6405, - "step": 21333 - }, - { - "epoch": 11.125945241199478, - "grad_norm": 1.494067668914795, - "learning_rate": 7.906834170854271e-05, - "loss": 5.4016, - "step": 21334 - }, - { - "epoch": 11.126466753585397, - "grad_norm": 1.4297558069229126, - "learning_rate": 7.906733668341709e-05, - "loss": 5.0914, - "step": 21335 - }, - { - "epoch": 11.126988265971317, - "grad_norm": 1.7369492053985596, - "learning_rate": 7.906633165829147e-05, - "loss": 4.714, - "step": 21336 - }, - { - "epoch": 11.127509778357236, - "grad_norm": 1.5385626554489136, - "learning_rate": 7.906532663316584e-05, - "loss": 5.6693, - "step": 21337 - }, - { - "epoch": 11.128031290743156, - "grad_norm": 1.4284844398498535, - "learning_rate": 7.906432160804021e-05, - "loss": 5.6562, - "step": 21338 - }, - { - "epoch": 11.128552803129073, - "grad_norm": 1.6436957120895386, - "learning_rate": 7.906331658291459e-05, - "loss": 5.5379, - "step": 21339 - }, - { - "epoch": 11.129074315514993, - "grad_norm": 1.436186671257019, - "learning_rate": 7.906231155778895e-05, - "loss": 5.345, - "step": 21340 - }, - { - "epoch": 11.129595827900912, - "grad_norm": 1.4999115467071533, - "learning_rate": 7.906130653266331e-05, - "loss": 4.9306, - "step": 21341 - }, - { - "epoch": 11.130117340286832, - "grad_norm": 1.4505506753921509, - "learning_rate": 7.906030150753769e-05, - "loss": 5.1937, - "step": 21342 - }, - { - "epoch": 11.130638852672751, - "grad_norm": 1.4610061645507812, - "learning_rate": 7.905929648241206e-05, - "loss": 5.533, - "step": 21343 - }, - { - "epoch": 11.13116036505867, - "grad_norm": 1.4570389986038208, - "learning_rate": 7.905829145728643e-05, - "loss": 5.2592, - "step": 21344 - }, - { - "epoch": 11.131681877444588, - "grad_norm": 1.613128900527954, - "learning_rate": 7.90572864321608e-05, - "loss": 5.2162, - "step": 21345 - }, - { - "epoch": 11.132203389830508, - "grad_norm": 1.397904872894287, - "learning_rate": 7.905628140703518e-05, - "loss": 5.2699, - "step": 21346 - }, - { - "epoch": 11.132724902216427, - "grad_norm": 1.6108289957046509, - "learning_rate": 7.905527638190955e-05, - "loss": 5.2732, - "step": 21347 - }, - { - "epoch": 11.133246414602347, - "grad_norm": 1.4717718362808228, - "learning_rate": 7.905427135678393e-05, - "loss": 5.6659, - "step": 21348 - }, - { - "epoch": 11.133767926988266, - "grad_norm": 1.4224653244018555, - "learning_rate": 7.90532663316583e-05, - "loss": 5.3043, - "step": 21349 - }, - { - "epoch": 11.134289439374186, - "grad_norm": 1.394222378730774, - "learning_rate": 7.905226130653267e-05, - "loss": 5.5759, - "step": 21350 - }, - { - "epoch": 11.134810951760103, - "grad_norm": 1.4094802141189575, - "learning_rate": 7.905125628140704e-05, - "loss": 5.3345, - "step": 21351 - }, - { - "epoch": 11.135332464146023, - "grad_norm": 1.4624834060668945, - "learning_rate": 7.905025125628142e-05, - "loss": 5.3954, - "step": 21352 - }, - { - "epoch": 11.135853976531942, - "grad_norm": 1.5574744939804077, - "learning_rate": 7.904924623115578e-05, - "loss": 5.4129, - "step": 21353 - }, - { - "epoch": 11.136375488917862, - "grad_norm": 1.4083311557769775, - "learning_rate": 7.904824120603014e-05, - "loss": 5.6245, - "step": 21354 - }, - { - "epoch": 11.136897001303781, - "grad_norm": 1.486976981163025, - "learning_rate": 7.904723618090452e-05, - "loss": 5.0884, - "step": 21355 - }, - { - "epoch": 11.1374185136897, - "grad_norm": 1.5600109100341797, - "learning_rate": 7.90462311557789e-05, - "loss": 5.6982, - "step": 21356 - }, - { - "epoch": 11.137940026075619, - "grad_norm": 1.4768006801605225, - "learning_rate": 7.904522613065328e-05, - "loss": 5.5715, - "step": 21357 - }, - { - "epoch": 11.138461538461538, - "grad_norm": 1.51589035987854, - "learning_rate": 7.904422110552764e-05, - "loss": 5.3454, - "step": 21358 - }, - { - "epoch": 11.138983050847457, - "grad_norm": 1.4575436115264893, - "learning_rate": 7.904321608040202e-05, - "loss": 5.1589, - "step": 21359 - }, - { - "epoch": 11.139504563233377, - "grad_norm": 1.401412010192871, - "learning_rate": 7.904221105527638e-05, - "loss": 5.5201, - "step": 21360 - }, - { - "epoch": 11.140026075619296, - "grad_norm": 1.3746368885040283, - "learning_rate": 7.904120603015076e-05, - "loss": 5.3697, - "step": 21361 - }, - { - "epoch": 11.140547588005216, - "grad_norm": 1.4726382493972778, - "learning_rate": 7.904020100502513e-05, - "loss": 4.7123, - "step": 21362 - }, - { - "epoch": 11.141069100391134, - "grad_norm": 1.4307986497879028, - "learning_rate": 7.90391959798995e-05, - "loss": 5.5249, - "step": 21363 - }, - { - "epoch": 11.141590612777053, - "grad_norm": 1.5728458166122437, - "learning_rate": 7.903819095477387e-05, - "loss": 5.3412, - "step": 21364 - }, - { - "epoch": 11.142112125162972, - "grad_norm": 1.357520341873169, - "learning_rate": 7.903718592964825e-05, - "loss": 5.6215, - "step": 21365 - }, - { - "epoch": 11.142633637548892, - "grad_norm": 1.4037086963653564, - "learning_rate": 7.903618090452261e-05, - "loss": 5.9303, - "step": 21366 - }, - { - "epoch": 11.143155149934811, - "grad_norm": 1.530768871307373, - "learning_rate": 7.903517587939699e-05, - "loss": 5.4398, - "step": 21367 - }, - { - "epoch": 11.14367666232073, - "grad_norm": 1.3421982526779175, - "learning_rate": 7.903417085427137e-05, - "loss": 5.7736, - "step": 21368 - }, - { - "epoch": 11.144198174706649, - "grad_norm": 1.3881607055664062, - "learning_rate": 7.903316582914573e-05, - "loss": 5.3231, - "step": 21369 - }, - { - "epoch": 11.144719687092568, - "grad_norm": 1.5353821516036987, - "learning_rate": 7.903216080402011e-05, - "loss": 5.1406, - "step": 21370 - }, - { - "epoch": 11.145241199478487, - "grad_norm": 1.4521106481552124, - "learning_rate": 7.903115577889447e-05, - "loss": 5.681, - "step": 21371 - }, - { - "epoch": 11.145762711864407, - "grad_norm": 1.4113845825195312, - "learning_rate": 7.903015075376885e-05, - "loss": 5.6468, - "step": 21372 - }, - { - "epoch": 11.146284224250326, - "grad_norm": 1.3554868698120117, - "learning_rate": 7.902914572864321e-05, - "loss": 5.5326, - "step": 21373 - }, - { - "epoch": 11.146805736636246, - "grad_norm": 1.4909281730651855, - "learning_rate": 7.902814070351759e-05, - "loss": 5.0995, - "step": 21374 - }, - { - "epoch": 11.147327249022164, - "grad_norm": 1.44448721408844, - "learning_rate": 7.902713567839196e-05, - "loss": 5.7707, - "step": 21375 - }, - { - "epoch": 11.147848761408083, - "grad_norm": 1.5857930183410645, - "learning_rate": 7.902613065326633e-05, - "loss": 5.0657, - "step": 21376 - }, - { - "epoch": 11.148370273794002, - "grad_norm": 1.6383559703826904, - "learning_rate": 7.902512562814071e-05, - "loss": 5.6202, - "step": 21377 - }, - { - "epoch": 11.148891786179922, - "grad_norm": 1.4449540376663208, - "learning_rate": 7.902412060301509e-05, - "loss": 5.4997, - "step": 21378 - }, - { - "epoch": 11.149413298565841, - "grad_norm": 1.4250297546386719, - "learning_rate": 7.902311557788945e-05, - "loss": 5.4367, - "step": 21379 - }, - { - "epoch": 11.14993481095176, - "grad_norm": 1.3871220350265503, - "learning_rate": 7.902211055276383e-05, - "loss": 5.5282, - "step": 21380 - }, - { - "epoch": 11.150456323337679, - "grad_norm": 1.36214280128479, - "learning_rate": 7.90211055276382e-05, - "loss": 5.8248, - "step": 21381 - }, - { - "epoch": 11.150977835723598, - "grad_norm": 1.403998851776123, - "learning_rate": 7.902010050251256e-05, - "loss": 5.2967, - "step": 21382 - }, - { - "epoch": 11.151499348109517, - "grad_norm": 1.639087438583374, - "learning_rate": 7.901909547738694e-05, - "loss": 5.7568, - "step": 21383 - }, - { - "epoch": 11.152020860495437, - "grad_norm": 1.3040581941604614, - "learning_rate": 7.90180904522613e-05, - "loss": 4.9459, - "step": 21384 - }, - { - "epoch": 11.152542372881356, - "grad_norm": 1.278948426246643, - "learning_rate": 7.901708542713568e-05, - "loss": 5.5898, - "step": 21385 - }, - { - "epoch": 11.153063885267276, - "grad_norm": 1.3205883502960205, - "learning_rate": 7.901608040201005e-05, - "loss": 4.7975, - "step": 21386 - }, - { - "epoch": 11.153585397653194, - "grad_norm": 1.4380989074707031, - "learning_rate": 7.901507537688442e-05, - "loss": 5.5905, - "step": 21387 - }, - { - "epoch": 11.154106910039113, - "grad_norm": 1.3458704948425293, - "learning_rate": 7.90140703517588e-05, - "loss": 4.9709, - "step": 21388 - }, - { - "epoch": 11.154628422425032, - "grad_norm": 1.361082911491394, - "learning_rate": 7.901306532663318e-05, - "loss": 5.1245, - "step": 21389 - }, - { - "epoch": 11.155149934810952, - "grad_norm": Infinity, - "learning_rate": 7.901306532663318e-05, - "loss": 5.8181, - "step": 21390 - }, - { - "epoch": 11.155671447196871, - "grad_norm": 1.5814590454101562, - "learning_rate": 7.901206030150754e-05, - "loss": 5.4896, - "step": 21391 - }, - { - "epoch": 11.156192959582791, - "grad_norm": 1.4058235883712769, - "learning_rate": 7.901105527638192e-05, - "loss": 5.4626, - "step": 21392 - }, - { - "epoch": 11.156714471968709, - "grad_norm": 1.3964712619781494, - "learning_rate": 7.901005025125629e-05, - "loss": 5.7314, - "step": 21393 - }, - { - "epoch": 11.157235984354628, - "grad_norm": 1.4859364032745361, - "learning_rate": 7.900904522613066e-05, - "loss": 5.5918, - "step": 21394 - }, - { - "epoch": 11.157757496740548, - "grad_norm": 1.5116446018218994, - "learning_rate": 7.900804020100503e-05, - "loss": 5.2633, - "step": 21395 - }, - { - "epoch": 11.158279009126467, - "grad_norm": 1.4421643018722534, - "learning_rate": 7.900703517587939e-05, - "loss": 5.8496, - "step": 21396 - }, - { - "epoch": 11.158800521512386, - "grad_norm": 1.382397174835205, - "learning_rate": 7.900603015075377e-05, - "loss": 5.7319, - "step": 21397 - }, - { - "epoch": 11.159322033898306, - "grad_norm": 1.5418555736541748, - "learning_rate": 7.900502512562813e-05, - "loss": 5.4556, - "step": 21398 - }, - { - "epoch": 11.159843546284224, - "grad_norm": 1.386739730834961, - "learning_rate": 7.900402010050251e-05, - "loss": 5.3852, - "step": 21399 - }, - { - "epoch": 11.160365058670143, - "grad_norm": 1.4956578016281128, - "learning_rate": 7.900301507537689e-05, - "loss": 4.8532, - "step": 21400 - }, - { - "epoch": 11.160886571056063, - "grad_norm": 1.5478298664093018, - "learning_rate": 7.900201005025127e-05, - "loss": 5.3722, - "step": 21401 - }, - { - "epoch": 11.161408083441982, - "grad_norm": 1.5793441534042358, - "learning_rate": 7.900100502512563e-05, - "loss": 5.3935, - "step": 21402 - }, - { - "epoch": 11.161929595827901, - "grad_norm": 1.5033138990402222, - "learning_rate": 7.900000000000001e-05, - "loss": 5.5907, - "step": 21403 - }, - { - "epoch": 11.162451108213821, - "grad_norm": 1.4276869297027588, - "learning_rate": 7.899899497487437e-05, - "loss": 4.8596, - "step": 21404 - }, - { - "epoch": 11.162972620599739, - "grad_norm": 1.4800950288772583, - "learning_rate": 7.899798994974875e-05, - "loss": 5.347, - "step": 21405 - }, - { - "epoch": 11.163494132985658, - "grad_norm": 1.3886867761611938, - "learning_rate": 7.899698492462312e-05, - "loss": 5.5428, - "step": 21406 - }, - { - "epoch": 11.164015645371578, - "grad_norm": 1.3459917306900024, - "learning_rate": 7.89959798994975e-05, - "loss": 5.7811, - "step": 21407 - }, - { - "epoch": 11.164537157757497, - "grad_norm": 1.4136309623718262, - "learning_rate": 7.899497487437186e-05, - "loss": 5.7765, - "step": 21408 - }, - { - "epoch": 11.165058670143416, - "grad_norm": 1.4757213592529297, - "learning_rate": 7.899396984924624e-05, - "loss": 5.5431, - "step": 21409 - }, - { - "epoch": 11.165580182529336, - "grad_norm": 1.416334867477417, - "learning_rate": 7.899296482412061e-05, - "loss": 5.4526, - "step": 21410 - }, - { - "epoch": 11.166101694915254, - "grad_norm": 1.4864475727081299, - "learning_rate": 7.899195979899498e-05, - "loss": 5.3419, - "step": 21411 - }, - { - "epoch": 11.166623207301173, - "grad_norm": 1.5004969835281372, - "learning_rate": 7.899095477386936e-05, - "loss": 5.3685, - "step": 21412 - }, - { - "epoch": 11.167144719687093, - "grad_norm": 1.4711356163024902, - "learning_rate": 7.898994974874372e-05, - "loss": 5.3128, - "step": 21413 - }, - { - "epoch": 11.167666232073012, - "grad_norm": 1.4084699153900146, - "learning_rate": 7.89889447236181e-05, - "loss": 5.5163, - "step": 21414 - }, - { - "epoch": 11.168187744458931, - "grad_norm": 1.5308912992477417, - "learning_rate": 7.898793969849246e-05, - "loss": 5.2395, - "step": 21415 - }, - { - "epoch": 11.16870925684485, - "grad_norm": 1.468528389930725, - "learning_rate": 7.898693467336684e-05, - "loss": 5.1627, - "step": 21416 - }, - { - "epoch": 11.169230769230769, - "grad_norm": 1.4206342697143555, - "learning_rate": 7.89859296482412e-05, - "loss": 5.3648, - "step": 21417 - }, - { - "epoch": 11.169752281616688, - "grad_norm": 1.3944761753082275, - "learning_rate": 7.898492462311558e-05, - "loss": 6.0184, - "step": 21418 - }, - { - "epoch": 11.170273794002608, - "grad_norm": 1.4147226810455322, - "learning_rate": 7.898391959798995e-05, - "loss": 5.47, - "step": 21419 - }, - { - "epoch": 11.170795306388527, - "grad_norm": 3.2456023693084717, - "learning_rate": 7.898291457286432e-05, - "loss": 5.7947, - "step": 21420 - }, - { - "epoch": 11.171316818774446, - "grad_norm": 1.4755452871322632, - "learning_rate": 7.89819095477387e-05, - "loss": 5.4626, - "step": 21421 - }, - { - "epoch": 11.171838331160366, - "grad_norm": 1.5307981967926025, - "learning_rate": 7.898090452261307e-05, - "loss": 5.3972, - "step": 21422 - }, - { - "epoch": 11.172359843546284, - "grad_norm": 2.9035685062408447, - "learning_rate": 7.897989949748744e-05, - "loss": 5.531, - "step": 21423 - }, - { - "epoch": 11.172881355932203, - "grad_norm": 1.494965672492981, - "learning_rate": 7.897889447236181e-05, - "loss": 5.274, - "step": 21424 - }, - { - "epoch": 11.173402868318123, - "grad_norm": 1.4702191352844238, - "learning_rate": 7.897788944723619e-05, - "loss": 4.7638, - "step": 21425 - }, - { - "epoch": 11.173924380704042, - "grad_norm": 1.4538065195083618, - "learning_rate": 7.897688442211055e-05, - "loss": 5.9042, - "step": 21426 - }, - { - "epoch": 11.174445893089962, - "grad_norm": 1.4500160217285156, - "learning_rate": 7.897587939698493e-05, - "loss": 5.7525, - "step": 21427 - }, - { - "epoch": 11.17496740547588, - "grad_norm": 1.401063084602356, - "learning_rate": 7.897487437185929e-05, - "loss": 5.2538, - "step": 21428 - }, - { - "epoch": 11.175488917861799, - "grad_norm": 1.4053161144256592, - "learning_rate": 7.897386934673367e-05, - "loss": 5.3865, - "step": 21429 - }, - { - "epoch": 11.176010430247718, - "grad_norm": 1.527596116065979, - "learning_rate": 7.897286432160805e-05, - "loss": 5.289, - "step": 21430 - }, - { - "epoch": 11.176531942633638, - "grad_norm": 1.3261667490005493, - "learning_rate": 7.897185929648243e-05, - "loss": 5.9546, - "step": 21431 - }, - { - "epoch": 11.177053455019557, - "grad_norm": 1.4866340160369873, - "learning_rate": 7.897085427135679e-05, - "loss": 5.8646, - "step": 21432 - }, - { - "epoch": 11.177574967405477, - "grad_norm": 1.4691447019577026, - "learning_rate": 7.896984924623117e-05, - "loss": 5.6522, - "step": 21433 - }, - { - "epoch": 11.178096479791394, - "grad_norm": 1.543697714805603, - "learning_rate": 7.896884422110553e-05, - "loss": 5.3257, - "step": 21434 - }, - { - "epoch": 11.178617992177314, - "grad_norm": 1.3420677185058594, - "learning_rate": 7.89678391959799e-05, - "loss": 5.9075, - "step": 21435 - }, - { - "epoch": 11.179139504563233, - "grad_norm": 1.4195939302444458, - "learning_rate": 7.896683417085427e-05, - "loss": 5.4083, - "step": 21436 - }, - { - "epoch": 11.179661016949153, - "grad_norm": 1.4105525016784668, - "learning_rate": 7.896582914572864e-05, - "loss": 5.5105, - "step": 21437 - }, - { - "epoch": 11.180182529335072, - "grad_norm": 1.4549907445907593, - "learning_rate": 7.896482412060302e-05, - "loss": 5.7765, - "step": 21438 - }, - { - "epoch": 11.180704041720992, - "grad_norm": 1.4540163278579712, - "learning_rate": 7.896381909547738e-05, - "loss": 5.6389, - "step": 21439 - }, - { - "epoch": 11.18122555410691, - "grad_norm": 1.4632179737091064, - "learning_rate": 7.896281407035176e-05, - "loss": 5.4398, - "step": 21440 - }, - { - "epoch": 11.181747066492829, - "grad_norm": 1.51848566532135, - "learning_rate": 7.896180904522614e-05, - "loss": 5.5185, - "step": 21441 - }, - { - "epoch": 11.182268578878748, - "grad_norm": 1.4623430967330933, - "learning_rate": 7.896080402010051e-05, - "loss": 4.9591, - "step": 21442 - }, - { - "epoch": 11.182790091264668, - "grad_norm": 1.3230340480804443, - "learning_rate": 7.895979899497488e-05, - "loss": 4.9826, - "step": 21443 - }, - { - "epoch": 11.183311603650587, - "grad_norm": 1.5166327953338623, - "learning_rate": 7.895879396984926e-05, - "loss": 5.5543, - "step": 21444 - }, - { - "epoch": 11.183833116036507, - "grad_norm": 1.4975099563598633, - "learning_rate": 7.895778894472362e-05, - "loss": 5.4934, - "step": 21445 - }, - { - "epoch": 11.184354628422424, - "grad_norm": 1.6669707298278809, - "learning_rate": 7.8956783919598e-05, - "loss": 5.2082, - "step": 21446 - }, - { - "epoch": 11.184876140808344, - "grad_norm": 1.4566094875335693, - "learning_rate": 7.895577889447236e-05, - "loss": 5.6995, - "step": 21447 - }, - { - "epoch": 11.185397653194263, - "grad_norm": 1.5891447067260742, - "learning_rate": 7.895477386934673e-05, - "loss": 4.9566, - "step": 21448 - }, - { - "epoch": 11.185919165580183, - "grad_norm": 1.4323794841766357, - "learning_rate": 7.89537688442211e-05, - "loss": 5.5042, - "step": 21449 - }, - { - "epoch": 11.186440677966102, - "grad_norm": 1.467942714691162, - "learning_rate": 7.895276381909548e-05, - "loss": 5.5617, - "step": 21450 - }, - { - "epoch": 11.186962190352022, - "grad_norm": 1.426840901374817, - "learning_rate": 7.895175879396986e-05, - "loss": 5.4699, - "step": 21451 - }, - { - "epoch": 11.18748370273794, - "grad_norm": 1.440025806427002, - "learning_rate": 7.895075376884422e-05, - "loss": 5.5095, - "step": 21452 - }, - { - "epoch": 11.188005215123859, - "grad_norm": 1.4048981666564941, - "learning_rate": 7.89497487437186e-05, - "loss": 5.5936, - "step": 21453 - }, - { - "epoch": 11.188526727509778, - "grad_norm": 1.489065408706665, - "learning_rate": 7.894874371859297e-05, - "loss": 5.026, - "step": 21454 - }, - { - "epoch": 11.189048239895698, - "grad_norm": 1.4639655351638794, - "learning_rate": 7.894773869346734e-05, - "loss": 5.6209, - "step": 21455 - }, - { - "epoch": 11.189569752281617, - "grad_norm": 1.33316969871521, - "learning_rate": 7.894673366834171e-05, - "loss": 5.4306, - "step": 21456 - }, - { - "epoch": 11.190091264667537, - "grad_norm": 1.4886330366134644, - "learning_rate": 7.894572864321609e-05, - "loss": 5.479, - "step": 21457 - }, - { - "epoch": 11.190612777053454, - "grad_norm": 1.4789910316467285, - "learning_rate": 7.894472361809045e-05, - "loss": 5.1823, - "step": 21458 - }, - { - "epoch": 11.191134289439374, - "grad_norm": 1.5499558448791504, - "learning_rate": 7.894371859296483e-05, - "loss": 5.1691, - "step": 21459 - }, - { - "epoch": 11.191655801825293, - "grad_norm": 1.8193312883377075, - "learning_rate": 7.894271356783919e-05, - "loss": 5.1624, - "step": 21460 - }, - { - "epoch": 11.192177314211213, - "grad_norm": 1.4061253070831299, - "learning_rate": 7.894170854271357e-05, - "loss": 5.6306, - "step": 21461 - }, - { - "epoch": 11.192698826597132, - "grad_norm": 1.492948293685913, - "learning_rate": 7.894070351758795e-05, - "loss": 5.3694, - "step": 21462 - }, - { - "epoch": 11.193220338983052, - "grad_norm": 1.3546621799468994, - "learning_rate": 7.893969849246231e-05, - "loss": 5.7486, - "step": 21463 - }, - { - "epoch": 11.19374185136897, - "grad_norm": 1.536510705947876, - "learning_rate": 7.893869346733669e-05, - "loss": 5.4113, - "step": 21464 - }, - { - "epoch": 11.194263363754889, - "grad_norm": 1.447645902633667, - "learning_rate": 7.893768844221106e-05, - "loss": 5.5825, - "step": 21465 - }, - { - "epoch": 11.194784876140808, - "grad_norm": 1.5211478471755981, - "learning_rate": 7.893668341708543e-05, - "loss": 5.4131, - "step": 21466 - }, - { - "epoch": 11.195306388526728, - "grad_norm": 1.4291818141937256, - "learning_rate": 7.89356783919598e-05, - "loss": 5.5831, - "step": 21467 - }, - { - "epoch": 11.195827900912647, - "grad_norm": 1.3633430004119873, - "learning_rate": 7.893467336683418e-05, - "loss": 5.8668, - "step": 21468 - }, - { - "epoch": 11.196349413298567, - "grad_norm": 1.5488660335540771, - "learning_rate": 7.893366834170854e-05, - "loss": 5.2447, - "step": 21469 - }, - { - "epoch": 11.196870925684484, - "grad_norm": 1.4044426679611206, - "learning_rate": 7.893266331658292e-05, - "loss": 5.3241, - "step": 21470 - }, - { - "epoch": 11.197392438070404, - "grad_norm": 1.3618015050888062, - "learning_rate": 7.89316582914573e-05, - "loss": 5.6438, - "step": 21471 - }, - { - "epoch": 11.197913950456323, - "grad_norm": 1.4643442630767822, - "learning_rate": 7.893065326633167e-05, - "loss": 5.4334, - "step": 21472 - }, - { - "epoch": 11.198435462842243, - "grad_norm": 1.4577127695083618, - "learning_rate": 7.892964824120604e-05, - "loss": 5.5301, - "step": 21473 - }, - { - "epoch": 11.198956975228162, - "grad_norm": 1.4671868085861206, - "learning_rate": 7.892864321608041e-05, - "loss": 5.4253, - "step": 21474 - }, - { - "epoch": 11.199478487614082, - "grad_norm": 1.3992878198623657, - "learning_rate": 7.892763819095478e-05, - "loss": 5.0573, - "step": 21475 - }, - { - "epoch": 11.2, - "grad_norm": 2.1125471591949463, - "learning_rate": 7.892663316582914e-05, - "loss": 4.505, - "step": 21476 - }, - { - "epoch": 11.200521512385919, - "grad_norm": 1.4772303104400635, - "learning_rate": 7.892562814070352e-05, - "loss": 5.6599, - "step": 21477 - }, - { - "epoch": 11.201043024771838, - "grad_norm": 1.5771074295043945, - "learning_rate": 7.892462311557789e-05, - "loss": 5.3751, - "step": 21478 - }, - { - "epoch": 11.201564537157758, - "grad_norm": 1.535473108291626, - "learning_rate": 7.892361809045226e-05, - "loss": 5.4789, - "step": 21479 - }, - { - "epoch": 11.202086049543677, - "grad_norm": 1.5222721099853516, - "learning_rate": 7.892261306532663e-05, - "loss": 5.4958, - "step": 21480 - }, - { - "epoch": 11.202607561929597, - "grad_norm": 1.3667140007019043, - "learning_rate": 7.8921608040201e-05, - "loss": 5.8466, - "step": 21481 - }, - { - "epoch": 11.203129074315514, - "grad_norm": 1.3408379554748535, - "learning_rate": 7.892060301507538e-05, - "loss": 5.286, - "step": 21482 - }, - { - "epoch": 11.203650586701434, - "grad_norm": 1.5271731615066528, - "learning_rate": 7.891959798994976e-05, - "loss": 5.2534, - "step": 21483 - }, - { - "epoch": 11.204172099087353, - "grad_norm": 1.3228726387023926, - "learning_rate": 7.891859296482413e-05, - "loss": 5.4423, - "step": 21484 - }, - { - "epoch": 11.204693611473273, - "grad_norm": 1.3987928628921509, - "learning_rate": 7.89175879396985e-05, - "loss": 5.3836, - "step": 21485 - }, - { - "epoch": 11.205215123859192, - "grad_norm": 1.3483314514160156, - "learning_rate": 7.891658291457287e-05, - "loss": 5.541, - "step": 21486 - }, - { - "epoch": 11.205736636245112, - "grad_norm": 1.4330178499221802, - "learning_rate": 7.891557788944725e-05, - "loss": 5.9208, - "step": 21487 - }, - { - "epoch": 11.20625814863103, - "grad_norm": 1.3872272968292236, - "learning_rate": 7.891457286432161e-05, - "loss": 5.4811, - "step": 21488 - }, - { - "epoch": 11.206779661016949, - "grad_norm": 1.4325693845748901, - "learning_rate": 7.891356783919597e-05, - "loss": 5.3847, - "step": 21489 - }, - { - "epoch": 11.207301173402868, - "grad_norm": 1.4498069286346436, - "learning_rate": 7.891256281407035e-05, - "loss": 4.9195, - "step": 21490 - }, - { - "epoch": 11.207822685788788, - "grad_norm": 1.5049388408660889, - "learning_rate": 7.891155778894473e-05, - "loss": 5.5336, - "step": 21491 - }, - { - "epoch": 11.208344198174707, - "grad_norm": 1.4558861255645752, - "learning_rate": 7.891055276381911e-05, - "loss": 5.3999, - "step": 21492 - }, - { - "epoch": 11.208865710560627, - "grad_norm": 1.4831517934799194, - "learning_rate": 7.890954773869347e-05, - "loss": 5.5013, - "step": 21493 - }, - { - "epoch": 11.209387222946544, - "grad_norm": 2.994799852371216, - "learning_rate": 7.890854271356785e-05, - "loss": 5.5826, - "step": 21494 - }, - { - "epoch": 11.209908735332464, - "grad_norm": 1.4364490509033203, - "learning_rate": 7.890753768844221e-05, - "loss": 5.5178, - "step": 21495 - }, - { - "epoch": 11.210430247718383, - "grad_norm": 1.4685165882110596, - "learning_rate": 7.890653266331659e-05, - "loss": 5.4407, - "step": 21496 - }, - { - "epoch": 11.210951760104303, - "grad_norm": 1.5045666694641113, - "learning_rate": 7.890552763819096e-05, - "loss": 5.4671, - "step": 21497 - }, - { - "epoch": 11.211473272490222, - "grad_norm": 1.4626744985580444, - "learning_rate": 7.890452261306533e-05, - "loss": 5.6034, - "step": 21498 - }, - { - "epoch": 11.211994784876142, - "grad_norm": 1.568367600440979, - "learning_rate": 7.89035175879397e-05, - "loss": 5.5173, - "step": 21499 - }, - { - "epoch": 11.21251629726206, - "grad_norm": 1.5381991863250732, - "learning_rate": 7.890251256281408e-05, - "loss": 5.233, - "step": 21500 - }, - { - "epoch": 11.213037809647979, - "grad_norm": 1.5123845338821411, - "learning_rate": 7.890150753768844e-05, - "loss": 5.6484, - "step": 21501 - }, - { - "epoch": 11.213559322033898, - "grad_norm": 1.4461729526519775, - "learning_rate": 7.890050251256282e-05, - "loss": 4.9405, - "step": 21502 - }, - { - "epoch": 11.214080834419818, - "grad_norm": 1.5219615697860718, - "learning_rate": 7.88994974874372e-05, - "loss": 5.0119, - "step": 21503 - }, - { - "epoch": 11.214602346805737, - "grad_norm": 1.4035308361053467, - "learning_rate": 7.889849246231156e-05, - "loss": 5.2054, - "step": 21504 - }, - { - "epoch": 11.215123859191657, - "grad_norm": 1.4346859455108643, - "learning_rate": 7.889748743718594e-05, - "loss": 5.1466, - "step": 21505 - }, - { - "epoch": 11.215645371577574, - "grad_norm": 1.3500901460647583, - "learning_rate": 7.88964824120603e-05, - "loss": 5.7903, - "step": 21506 - }, - { - "epoch": 11.216166883963494, - "grad_norm": 1.507265567779541, - "learning_rate": 7.889547738693468e-05, - "loss": 5.3845, - "step": 21507 - }, - { - "epoch": 11.216688396349413, - "grad_norm": 1.367097020149231, - "learning_rate": 7.889447236180904e-05, - "loss": 5.664, - "step": 21508 - }, - { - "epoch": 11.217209908735333, - "grad_norm": 1.3703149557113647, - "learning_rate": 7.889346733668342e-05, - "loss": 5.2935, - "step": 21509 - }, - { - "epoch": 11.217731421121252, - "grad_norm": 1.3428248167037964, - "learning_rate": 7.889246231155779e-05, - "loss": 5.6581, - "step": 21510 - }, - { - "epoch": 11.21825293350717, - "grad_norm": 1.4297832250595093, - "learning_rate": 7.889145728643216e-05, - "loss": 5.6598, - "step": 21511 - }, - { - "epoch": 11.21877444589309, - "grad_norm": 1.5338757038116455, - "learning_rate": 7.889045226130654e-05, - "loss": 5.3501, - "step": 21512 - }, - { - "epoch": 11.219295958279009, - "grad_norm": 1.6730594635009766, - "learning_rate": 7.888944723618092e-05, - "loss": 4.9975, - "step": 21513 - }, - { - "epoch": 11.219817470664928, - "grad_norm": 1.3663370609283447, - "learning_rate": 7.888844221105528e-05, - "loss": 5.6281, - "step": 21514 - }, - { - "epoch": 11.220338983050848, - "grad_norm": 1.5572341680526733, - "learning_rate": 7.888743718592965e-05, - "loss": 5.0866, - "step": 21515 - }, - { - "epoch": 11.220860495436767, - "grad_norm": 1.3738493919372559, - "learning_rate": 7.888643216080403e-05, - "loss": 5.4958, - "step": 21516 - }, - { - "epoch": 11.221382007822687, - "grad_norm": 1.4467744827270508, - "learning_rate": 7.888542713567839e-05, - "loss": 5.3352, - "step": 21517 - }, - { - "epoch": 11.221903520208604, - "grad_norm": 1.4745378494262695, - "learning_rate": 7.888442211055277e-05, - "loss": 5.568, - "step": 21518 - }, - { - "epoch": 11.222425032594524, - "grad_norm": 1.412121295928955, - "learning_rate": 7.888341708542713e-05, - "loss": 5.6383, - "step": 21519 - }, - { - "epoch": 11.222946544980443, - "grad_norm": 1.3945833444595337, - "learning_rate": 7.888241206030151e-05, - "loss": 5.5991, - "step": 21520 - }, - { - "epoch": 11.223468057366363, - "grad_norm": 1.450310230255127, - "learning_rate": 7.888140703517587e-05, - "loss": 5.6653, - "step": 21521 - }, - { - "epoch": 11.223989569752282, - "grad_norm": 1.5411611795425415, - "learning_rate": 7.888040201005025e-05, - "loss": 5.6859, - "step": 21522 - }, - { - "epoch": 11.2245110821382, - "grad_norm": 1.3612892627716064, - "learning_rate": 7.887939698492463e-05, - "loss": 5.5687, - "step": 21523 - }, - { - "epoch": 11.22503259452412, - "grad_norm": 1.3785853385925293, - "learning_rate": 7.887839195979901e-05, - "loss": 5.5774, - "step": 21524 - }, - { - "epoch": 11.225554106910039, - "grad_norm": 1.4974998235702515, - "learning_rate": 7.887738693467337e-05, - "loss": 5.3532, - "step": 21525 - }, - { - "epoch": 11.226075619295958, - "grad_norm": 1.465301513671875, - "learning_rate": 7.887638190954775e-05, - "loss": 5.0989, - "step": 21526 - }, - { - "epoch": 11.226597131681878, - "grad_norm": 1.4580435752868652, - "learning_rate": 7.887537688442211e-05, - "loss": 4.9424, - "step": 21527 - }, - { - "epoch": 11.227118644067797, - "grad_norm": 1.5323933362960815, - "learning_rate": 7.887437185929648e-05, - "loss": 5.7789, - "step": 21528 - }, - { - "epoch": 11.227640156453715, - "grad_norm": 1.4701049327850342, - "learning_rate": 7.887336683417086e-05, - "loss": 5.3661, - "step": 21529 - }, - { - "epoch": 11.228161668839634, - "grad_norm": 1.5602619647979736, - "learning_rate": 7.887236180904522e-05, - "loss": 5.1222, - "step": 21530 - }, - { - "epoch": 11.228683181225554, - "grad_norm": 1.447422981262207, - "learning_rate": 7.88713567839196e-05, - "loss": 5.3228, - "step": 21531 - }, - { - "epoch": 11.229204693611473, - "grad_norm": 1.4521474838256836, - "learning_rate": 7.887035175879398e-05, - "loss": 5.6748, - "step": 21532 - }, - { - "epoch": 11.229726205997393, - "grad_norm": 1.4486596584320068, - "learning_rate": 7.886934673366835e-05, - "loss": 5.1863, - "step": 21533 - }, - { - "epoch": 11.230247718383312, - "grad_norm": 1.4275764226913452, - "learning_rate": 7.886834170854272e-05, - "loss": 5.4241, - "step": 21534 - }, - { - "epoch": 11.23076923076923, - "grad_norm": 1.5261961221694946, - "learning_rate": 7.88673366834171e-05, - "loss": 5.446, - "step": 21535 - }, - { - "epoch": 11.23129074315515, - "grad_norm": 1.4586701393127441, - "learning_rate": 7.886633165829146e-05, - "loss": 5.6475, - "step": 21536 - }, - { - "epoch": 11.231812255541069, - "grad_norm": 1.4515610933303833, - "learning_rate": 7.886532663316584e-05, - "loss": 5.5842, - "step": 21537 - }, - { - "epoch": 11.232333767926988, - "grad_norm": 1.6099001169204712, - "learning_rate": 7.88643216080402e-05, - "loss": 5.3886, - "step": 21538 - }, - { - "epoch": 11.232855280312908, - "grad_norm": 1.477060079574585, - "learning_rate": 7.886331658291458e-05, - "loss": 5.6424, - "step": 21539 - }, - { - "epoch": 11.233376792698827, - "grad_norm": 1.465377688407898, - "learning_rate": 7.886231155778895e-05, - "loss": 5.4489, - "step": 21540 - }, - { - "epoch": 11.233898305084745, - "grad_norm": 1.496896505355835, - "learning_rate": 7.886130653266331e-05, - "loss": 5.2585, - "step": 21541 - }, - { - "epoch": 11.234419817470664, - "grad_norm": 1.3949434757232666, - "learning_rate": 7.886030150753769e-05, - "loss": 4.9599, - "step": 21542 - }, - { - "epoch": 11.234941329856584, - "grad_norm": 1.6489702463150024, - "learning_rate": 7.885929648241206e-05, - "loss": 4.4796, - "step": 21543 - }, - { - "epoch": 11.235462842242503, - "grad_norm": 1.6040754318237305, - "learning_rate": 7.885829145728644e-05, - "loss": 5.4701, - "step": 21544 - }, - { - "epoch": 11.235984354628423, - "grad_norm": 1.47360098361969, - "learning_rate": 7.885728643216081e-05, - "loss": 5.7293, - "step": 21545 - }, - { - "epoch": 11.236505867014342, - "grad_norm": 1.5060369968414307, - "learning_rate": 7.885628140703518e-05, - "loss": 5.4387, - "step": 21546 - }, - { - "epoch": 11.23702737940026, - "grad_norm": 1.4256051778793335, - "learning_rate": 7.885527638190955e-05, - "loss": 5.7548, - "step": 21547 - }, - { - "epoch": 11.23754889178618, - "grad_norm": 1.4275476932525635, - "learning_rate": 7.885427135678393e-05, - "loss": 5.2658, - "step": 21548 - }, - { - "epoch": 11.238070404172099, - "grad_norm": 1.5032707452774048, - "learning_rate": 7.885326633165829e-05, - "loss": 5.4251, - "step": 21549 - }, - { - "epoch": 11.238591916558018, - "grad_norm": 1.4898746013641357, - "learning_rate": 7.885226130653267e-05, - "loss": 5.3745, - "step": 21550 - }, - { - "epoch": 11.239113428943938, - "grad_norm": 1.4509886503219604, - "learning_rate": 7.885125628140703e-05, - "loss": 5.5634, - "step": 21551 - }, - { - "epoch": 11.239634941329857, - "grad_norm": 1.5046229362487793, - "learning_rate": 7.885025125628141e-05, - "loss": 5.1409, - "step": 21552 - }, - { - "epoch": 11.240156453715775, - "grad_norm": 1.4850744009017944, - "learning_rate": 7.884924623115578e-05, - "loss": 5.6722, - "step": 21553 - }, - { - "epoch": 11.240677966101694, - "grad_norm": 1.3835476636886597, - "learning_rate": 7.884824120603015e-05, - "loss": 5.3723, - "step": 21554 - }, - { - "epoch": 11.241199478487614, - "grad_norm": 1.496640920639038, - "learning_rate": 7.884723618090453e-05, - "loss": 5.3677, - "step": 21555 - }, - { - "epoch": 11.241720990873533, - "grad_norm": 1.4247251749038696, - "learning_rate": 7.88462311557789e-05, - "loss": 5.3982, - "step": 21556 - }, - { - "epoch": 11.242242503259453, - "grad_norm": 1.4319449663162231, - "learning_rate": 7.884522613065327e-05, - "loss": 5.5826, - "step": 21557 - }, - { - "epoch": 11.242764015645372, - "grad_norm": 1.4314829111099243, - "learning_rate": 7.884422110552764e-05, - "loss": 5.6213, - "step": 21558 - }, - { - "epoch": 11.24328552803129, - "grad_norm": 1.4000111818313599, - "learning_rate": 7.884321608040202e-05, - "loss": 5.8262, - "step": 21559 - }, - { - "epoch": 11.24380704041721, - "grad_norm": 1.375978946685791, - "learning_rate": 7.884221105527638e-05, - "loss": 5.3902, - "step": 21560 - }, - { - "epoch": 11.244328552803129, - "grad_norm": 1.440431833267212, - "learning_rate": 7.884120603015076e-05, - "loss": 5.0666, - "step": 21561 - }, - { - "epoch": 11.244850065189048, - "grad_norm": 1.3646066188812256, - "learning_rate": 7.884020100502512e-05, - "loss": 5.8275, - "step": 21562 - }, - { - "epoch": 11.245371577574968, - "grad_norm": 1.4053401947021484, - "learning_rate": 7.88391959798995e-05, - "loss": 5.4296, - "step": 21563 - }, - { - "epoch": 11.245893089960887, - "grad_norm": 1.365931749343872, - "learning_rate": 7.883819095477388e-05, - "loss": 5.4847, - "step": 21564 - }, - { - "epoch": 11.246414602346805, - "grad_norm": 1.4233733415603638, - "learning_rate": 7.883718592964826e-05, - "loss": 5.7139, - "step": 21565 - }, - { - "epoch": 11.246936114732724, - "grad_norm": 1.493709683418274, - "learning_rate": 7.883618090452262e-05, - "loss": 5.0806, - "step": 21566 - }, - { - "epoch": 11.247457627118644, - "grad_norm": 1.457600474357605, - "learning_rate": 7.8835175879397e-05, - "loss": 5.5715, - "step": 21567 - }, - { - "epoch": 11.247979139504563, - "grad_norm": 1.4536068439483643, - "learning_rate": 7.883417085427136e-05, - "loss": 5.4161, - "step": 21568 - }, - { - "epoch": 11.248500651890483, - "grad_norm": 1.4486724138259888, - "learning_rate": 7.883316582914573e-05, - "loss": 5.6544, - "step": 21569 - }, - { - "epoch": 11.249022164276402, - "grad_norm": 1.4196668863296509, - "learning_rate": 7.88321608040201e-05, - "loss": 5.4837, - "step": 21570 - }, - { - "epoch": 11.24954367666232, - "grad_norm": 1.4879069328308105, - "learning_rate": 7.883115577889447e-05, - "loss": 5.483, - "step": 21571 - }, - { - "epoch": 11.25006518904824, - "grad_norm": 1.5800832509994507, - "learning_rate": 7.883015075376885e-05, - "loss": 5.6275, - "step": 21572 - }, - { - "epoch": 11.250586701434159, - "grad_norm": 1.4933505058288574, - "learning_rate": 7.882914572864321e-05, - "loss": 4.9533, - "step": 21573 - }, - { - "epoch": 11.251108213820078, - "grad_norm": 1.5035470724105835, - "learning_rate": 7.882814070351759e-05, - "loss": 5.6989, - "step": 21574 - }, - { - "epoch": 11.251629726205998, - "grad_norm": 1.3317551612854004, - "learning_rate": 7.882713567839197e-05, - "loss": 5.2577, - "step": 21575 - }, - { - "epoch": 11.252151238591917, - "grad_norm": 1.3526691198349, - "learning_rate": 7.882613065326634e-05, - "loss": 5.6486, - "step": 21576 - }, - { - "epoch": 11.252672750977835, - "grad_norm": 1.5067870616912842, - "learning_rate": 7.882512562814071e-05, - "loss": 5.0809, - "step": 21577 - }, - { - "epoch": 11.253194263363755, - "grad_norm": 1.4263241291046143, - "learning_rate": 7.882412060301509e-05, - "loss": 5.0907, - "step": 21578 - }, - { - "epoch": 11.253715775749674, - "grad_norm": 1.4343351125717163, - "learning_rate": 7.882311557788945e-05, - "loss": 5.6543, - "step": 21579 - }, - { - "epoch": 11.254237288135593, - "grad_norm": 1.4877642393112183, - "learning_rate": 7.882211055276383e-05, - "loss": 5.0292, - "step": 21580 - }, - { - "epoch": 11.254758800521513, - "grad_norm": 1.376044750213623, - "learning_rate": 7.882110552763819e-05, - "loss": 4.6048, - "step": 21581 - }, - { - "epoch": 11.255280312907432, - "grad_norm": 1.3962892293930054, - "learning_rate": 7.882010050251256e-05, - "loss": 4.9571, - "step": 21582 - }, - { - "epoch": 11.25580182529335, - "grad_norm": 1.5281423330307007, - "learning_rate": 7.881909547738693e-05, - "loss": 5.2469, - "step": 21583 - }, - { - "epoch": 11.25632333767927, - "grad_norm": 1.4197388887405396, - "learning_rate": 7.881809045226131e-05, - "loss": 4.9759, - "step": 21584 - }, - { - "epoch": 11.256844850065189, - "grad_norm": 1.4619269371032715, - "learning_rate": 7.881708542713569e-05, - "loss": 5.3881, - "step": 21585 - }, - { - "epoch": 11.257366362451108, - "grad_norm": 1.4233261346817017, - "learning_rate": 7.881608040201005e-05, - "loss": 5.611, - "step": 21586 - }, - { - "epoch": 11.257887874837028, - "grad_norm": 1.5144531726837158, - "learning_rate": 7.881507537688443e-05, - "loss": 5.1416, - "step": 21587 - }, - { - "epoch": 11.258409387222947, - "grad_norm": 1.504586935043335, - "learning_rate": 7.88140703517588e-05, - "loss": 5.164, - "step": 21588 - }, - { - "epoch": 11.258930899608865, - "grad_norm": 1.5594593286514282, - "learning_rate": 7.881306532663317e-05, - "loss": 5.0528, - "step": 21589 - }, - { - "epoch": 11.259452411994785, - "grad_norm": 1.5064724683761597, - "learning_rate": 7.881206030150754e-05, - "loss": 4.9534, - "step": 21590 - }, - { - "epoch": 11.259973924380704, - "grad_norm": 1.5834522247314453, - "learning_rate": 7.881105527638192e-05, - "loss": 5.3783, - "step": 21591 - }, - { - "epoch": 11.260495436766623, - "grad_norm": 1.478816270828247, - "learning_rate": 7.881005025125628e-05, - "loss": 5.3543, - "step": 21592 - }, - { - "epoch": 11.261016949152543, - "grad_norm": 1.4150121212005615, - "learning_rate": 7.880904522613066e-05, - "loss": 5.6807, - "step": 21593 - }, - { - "epoch": 11.261538461538462, - "grad_norm": 1.5920788049697876, - "learning_rate": 7.880804020100502e-05, - "loss": 5.1905, - "step": 21594 - }, - { - "epoch": 11.26205997392438, - "grad_norm": 1.4119662046432495, - "learning_rate": 7.88070351758794e-05, - "loss": 5.477, - "step": 21595 - }, - { - "epoch": 11.2625814863103, - "grad_norm": 1.4847437143325806, - "learning_rate": 7.880603015075378e-05, - "loss": 5.5603, - "step": 21596 - }, - { - "epoch": 11.263102998696219, - "grad_norm": 1.5683306455612183, - "learning_rate": 7.880502512562814e-05, - "loss": 5.3248, - "step": 21597 - }, - { - "epoch": 11.263624511082138, - "grad_norm": 1.4918023347854614, - "learning_rate": 7.880402010050252e-05, - "loss": 5.7277, - "step": 21598 - }, - { - "epoch": 11.264146023468058, - "grad_norm": 1.5125819444656372, - "learning_rate": 7.880301507537688e-05, - "loss": 5.5612, - "step": 21599 - }, - { - "epoch": 11.264667535853977, - "grad_norm": 1.5569976568222046, - "learning_rate": 7.880201005025126e-05, - "loss": 4.9604, - "step": 21600 - }, - { - "epoch": 11.265189048239895, - "grad_norm": 1.4182040691375732, - "learning_rate": 7.880100502512563e-05, - "loss": 5.6887, - "step": 21601 - }, - { - "epoch": 11.265710560625815, - "grad_norm": 1.4610908031463623, - "learning_rate": 7.88e-05, - "loss": 5.6013, - "step": 21602 - }, - { - "epoch": 11.266232073011734, - "grad_norm": 1.5677963495254517, - "learning_rate": 7.879899497487437e-05, - "loss": 5.2086, - "step": 21603 - }, - { - "epoch": 11.266753585397653, - "grad_norm": 1.359065294265747, - "learning_rate": 7.879798994974875e-05, - "loss": 5.3232, - "step": 21604 - }, - { - "epoch": 11.267275097783573, - "grad_norm": 1.460388422012329, - "learning_rate": 7.879698492462312e-05, - "loss": 5.7164, - "step": 21605 - }, - { - "epoch": 11.26779661016949, - "grad_norm": 1.4160175323486328, - "learning_rate": 7.87959798994975e-05, - "loss": 5.4959, - "step": 21606 - }, - { - "epoch": 11.26831812255541, - "grad_norm": 1.4602148532867432, - "learning_rate": 7.879497487437187e-05, - "loss": 5.616, - "step": 21607 - }, - { - "epoch": 11.26883963494133, - "grad_norm": 1.5534533262252808, - "learning_rate": 7.879396984924623e-05, - "loss": 5.0207, - "step": 21608 - }, - { - "epoch": 11.269361147327249, - "grad_norm": 1.473680019378662, - "learning_rate": 7.879296482412061e-05, - "loss": 5.6233, - "step": 21609 - }, - { - "epoch": 11.269882659713168, - "grad_norm": 1.470304250717163, - "learning_rate": 7.879195979899497e-05, - "loss": 5.7335, - "step": 21610 - }, - { - "epoch": 11.270404172099088, - "grad_norm": 1.4981063604354858, - "learning_rate": 7.879095477386935e-05, - "loss": 5.5175, - "step": 21611 - }, - { - "epoch": 11.270925684485007, - "grad_norm": 1.4647741317749023, - "learning_rate": 7.878994974874371e-05, - "loss": 5.6077, - "step": 21612 - }, - { - "epoch": 11.271447196870925, - "grad_norm": 1.43826162815094, - "learning_rate": 7.878894472361809e-05, - "loss": 5.5871, - "step": 21613 - }, - { - "epoch": 11.271968709256845, - "grad_norm": 1.4858157634735107, - "learning_rate": 7.878793969849246e-05, - "loss": 5.4493, - "step": 21614 - }, - { - "epoch": 11.272490221642764, - "grad_norm": 1.437557339668274, - "learning_rate": 7.878693467336683e-05, - "loss": 5.2311, - "step": 21615 - }, - { - "epoch": 11.273011734028684, - "grad_norm": 1.4938292503356934, - "learning_rate": 7.878592964824121e-05, - "loss": 5.3305, - "step": 21616 - }, - { - "epoch": 11.273533246414603, - "grad_norm": 1.4398385286331177, - "learning_rate": 7.878492462311559e-05, - "loss": 5.1291, - "step": 21617 - }, - { - "epoch": 11.27405475880052, - "grad_norm": 1.414345383644104, - "learning_rate": 7.878391959798995e-05, - "loss": 5.0577, - "step": 21618 - }, - { - "epoch": 11.27457627118644, - "grad_norm": 1.390173077583313, - "learning_rate": 7.878291457286433e-05, - "loss": 4.8144, - "step": 21619 - }, - { - "epoch": 11.27509778357236, - "grad_norm": 1.4529165029525757, - "learning_rate": 7.87819095477387e-05, - "loss": 5.4237, - "step": 21620 - }, - { - "epoch": 11.275619295958279, - "grad_norm": 1.5168319940567017, - "learning_rate": 7.878090452261306e-05, - "loss": 5.2594, - "step": 21621 - }, - { - "epoch": 11.276140808344199, - "grad_norm": 1.5305049419403076, - "learning_rate": 7.877989949748744e-05, - "loss": 5.1227, - "step": 21622 - }, - { - "epoch": 11.276662320730118, - "grad_norm": 1.3875600099563599, - "learning_rate": 7.87788944723618e-05, - "loss": 5.5648, - "step": 21623 - }, - { - "epoch": 11.277183833116036, - "grad_norm": 1.3123477697372437, - "learning_rate": 7.877788944723618e-05, - "loss": 5.6254, - "step": 21624 - }, - { - "epoch": 11.277705345501955, - "grad_norm": 1.45758056640625, - "learning_rate": 7.877688442211056e-05, - "loss": 5.8644, - "step": 21625 - }, - { - "epoch": 11.278226857887875, - "grad_norm": 1.4205445051193237, - "learning_rate": 7.877587939698494e-05, - "loss": 5.7443, - "step": 21626 - }, - { - "epoch": 11.278748370273794, - "grad_norm": 1.4454681873321533, - "learning_rate": 7.87748743718593e-05, - "loss": 5.6216, - "step": 21627 - }, - { - "epoch": 11.279269882659714, - "grad_norm": 1.3755699396133423, - "learning_rate": 7.877386934673368e-05, - "loss": 5.7872, - "step": 21628 - }, - { - "epoch": 11.279791395045633, - "grad_norm": 1.4007513523101807, - "learning_rate": 7.877286432160804e-05, - "loss": 5.535, - "step": 21629 - }, - { - "epoch": 11.28031290743155, - "grad_norm": 1.4445738792419434, - "learning_rate": 7.877185929648242e-05, - "loss": 5.3599, - "step": 21630 - }, - { - "epoch": 11.28083441981747, - "grad_norm": 1.4437800645828247, - "learning_rate": 7.877085427135679e-05, - "loss": 5.6709, - "step": 21631 - }, - { - "epoch": 11.28135593220339, - "grad_norm": 1.5837682485580444, - "learning_rate": 7.876984924623116e-05, - "loss": 5.7918, - "step": 21632 - }, - { - "epoch": 11.281877444589309, - "grad_norm": 1.518086552619934, - "learning_rate": 7.876884422110553e-05, - "loss": 5.2927, - "step": 21633 - }, - { - "epoch": 11.282398956975229, - "grad_norm": 1.5282893180847168, - "learning_rate": 7.876783919597989e-05, - "loss": 5.4521, - "step": 21634 - }, - { - "epoch": 11.282920469361148, - "grad_norm": 1.5289322137832642, - "learning_rate": 7.876683417085427e-05, - "loss": 5.3482, - "step": 21635 - }, - { - "epoch": 11.283441981747066, - "grad_norm": 1.4867479801177979, - "learning_rate": 7.876582914572865e-05, - "loss": 5.4155, - "step": 21636 - }, - { - "epoch": 11.283963494132985, - "grad_norm": 1.5066344738006592, - "learning_rate": 7.876482412060303e-05, - "loss": 5.5038, - "step": 21637 - }, - { - "epoch": 11.284485006518905, - "grad_norm": 1.4927717447280884, - "learning_rate": 7.876381909547739e-05, - "loss": 5.5159, - "step": 21638 - }, - { - "epoch": 11.285006518904824, - "grad_norm": 1.7075626850128174, - "learning_rate": 7.876281407035177e-05, - "loss": 5.051, - "step": 21639 - }, - { - "epoch": 11.285528031290744, - "grad_norm": 1.4367772340774536, - "learning_rate": 7.876180904522613e-05, - "loss": 5.5218, - "step": 21640 - }, - { - "epoch": 11.286049543676663, - "grad_norm": 1.5889720916748047, - "learning_rate": 7.876080402010051e-05, - "loss": 5.2232, - "step": 21641 - }, - { - "epoch": 11.28657105606258, - "grad_norm": 1.4641749858856201, - "learning_rate": 7.875979899497487e-05, - "loss": 5.4305, - "step": 21642 - }, - { - "epoch": 11.2870925684485, - "grad_norm": 1.431882381439209, - "learning_rate": 7.875879396984925e-05, - "loss": 5.5397, - "step": 21643 - }, - { - "epoch": 11.28761408083442, - "grad_norm": 1.5783023834228516, - "learning_rate": 7.875778894472362e-05, - "loss": 5.461, - "step": 21644 - }, - { - "epoch": 11.288135593220339, - "grad_norm": 1.3988182544708252, - "learning_rate": 7.8756783919598e-05, - "loss": 5.7646, - "step": 21645 - }, - { - "epoch": 11.288657105606259, - "grad_norm": 1.4139059782028198, - "learning_rate": 7.875577889447237e-05, - "loss": 4.8788, - "step": 21646 - }, - { - "epoch": 11.289178617992178, - "grad_norm": 1.42238450050354, - "learning_rate": 7.875477386934675e-05, - "loss": 5.659, - "step": 21647 - }, - { - "epoch": 11.289700130378096, - "grad_norm": 1.467110276222229, - "learning_rate": 7.875376884422111e-05, - "loss": 5.1249, - "step": 21648 - }, - { - "epoch": 11.290221642764015, - "grad_norm": 1.509706974029541, - "learning_rate": 7.875276381909548e-05, - "loss": 5.1249, - "step": 21649 - }, - { - "epoch": 11.290743155149935, - "grad_norm": 1.587476134300232, - "learning_rate": 7.875175879396986e-05, - "loss": 5.6022, - "step": 21650 - }, - { - "epoch": 11.291264667535854, - "grad_norm": 1.4178283214569092, - "learning_rate": 7.875075376884422e-05, - "loss": 5.2942, - "step": 21651 - }, - { - "epoch": 11.291786179921774, - "grad_norm": 1.5194664001464844, - "learning_rate": 7.87497487437186e-05, - "loss": 5.5068, - "step": 21652 - }, - { - "epoch": 11.292307692307693, - "grad_norm": 1.3547942638397217, - "learning_rate": 7.874874371859296e-05, - "loss": 5.6458, - "step": 21653 - }, - { - "epoch": 11.29282920469361, - "grad_norm": 1.3816196918487549, - "learning_rate": 7.874773869346734e-05, - "loss": 5.6521, - "step": 21654 - }, - { - "epoch": 11.29335071707953, - "grad_norm": 1.677965521812439, - "learning_rate": 7.87467336683417e-05, - "loss": 5.103, - "step": 21655 - }, - { - "epoch": 11.29387222946545, - "grad_norm": 1.3823999166488647, - "learning_rate": 7.874572864321608e-05, - "loss": 5.6454, - "step": 21656 - }, - { - "epoch": 11.29439374185137, - "grad_norm": 1.4373409748077393, - "learning_rate": 7.874472361809046e-05, - "loss": 5.2443, - "step": 21657 - }, - { - "epoch": 11.294915254237289, - "grad_norm": 1.4840341806411743, - "learning_rate": 7.874371859296484e-05, - "loss": 5.4994, - "step": 21658 - }, - { - "epoch": 11.295436766623208, - "grad_norm": 1.4489637613296509, - "learning_rate": 7.87427135678392e-05, - "loss": 5.5414, - "step": 21659 - }, - { - "epoch": 11.295958279009126, - "grad_norm": 1.5394620895385742, - "learning_rate": 7.874170854271358e-05, - "loss": 5.126, - "step": 21660 - }, - { - "epoch": 11.296479791395045, - "grad_norm": 1.5719976425170898, - "learning_rate": 7.874070351758794e-05, - "loss": 5.0383, - "step": 21661 - }, - { - "epoch": 11.297001303780965, - "grad_norm": 1.4003826379776, - "learning_rate": 7.873969849246231e-05, - "loss": 5.1448, - "step": 21662 - }, - { - "epoch": 11.297522816166884, - "grad_norm": 1.5871381759643555, - "learning_rate": 7.873869346733669e-05, - "loss": 5.7664, - "step": 21663 - }, - { - "epoch": 11.298044328552804, - "grad_norm": 1.405902624130249, - "learning_rate": 7.873768844221105e-05, - "loss": 5.5197, - "step": 21664 - }, - { - "epoch": 11.298565840938723, - "grad_norm": 1.4409465789794922, - "learning_rate": 7.873668341708543e-05, - "loss": 5.3211, - "step": 21665 - }, - { - "epoch": 11.29908735332464, - "grad_norm": 1.3689464330673218, - "learning_rate": 7.87356783919598e-05, - "loss": 5.903, - "step": 21666 - }, - { - "epoch": 11.29960886571056, - "grad_norm": 1.3606454133987427, - "learning_rate": 7.873467336683418e-05, - "loss": 5.9306, - "step": 21667 - }, - { - "epoch": 11.30013037809648, - "grad_norm": 1.3385142087936401, - "learning_rate": 7.873366834170855e-05, - "loss": 5.7855, - "step": 21668 - }, - { - "epoch": 11.3006518904824, - "grad_norm": 1.346885085105896, - "learning_rate": 7.873266331658293e-05, - "loss": 5.6832, - "step": 21669 - }, - { - "epoch": 11.301173402868319, - "grad_norm": 1.5122830867767334, - "learning_rate": 7.873165829145729e-05, - "loss": 5.5178, - "step": 21670 - }, - { - "epoch": 11.301694915254238, - "grad_norm": 1.4186575412750244, - "learning_rate": 7.873065326633167e-05, - "loss": 5.7872, - "step": 21671 - }, - { - "epoch": 11.302216427640156, - "grad_norm": 1.4320359230041504, - "learning_rate": 7.872964824120603e-05, - "loss": 5.4787, - "step": 21672 - }, - { - "epoch": 11.302737940026075, - "grad_norm": 1.4661873579025269, - "learning_rate": 7.872864321608041e-05, - "loss": 5.3163, - "step": 21673 - }, - { - "epoch": 11.303259452411995, - "grad_norm": 1.3950302600860596, - "learning_rate": 7.872763819095477e-05, - "loss": 5.6558, - "step": 21674 - }, - { - "epoch": 11.303780964797914, - "grad_norm": 1.5166090726852417, - "learning_rate": 7.872663316582914e-05, - "loss": 5.4117, - "step": 21675 - }, - { - "epoch": 11.304302477183834, - "grad_norm": 1.5395445823669434, - "learning_rate": 7.872562814070352e-05, - "loss": 5.6975, - "step": 21676 - }, - { - "epoch": 11.304823989569753, - "grad_norm": 1.496320128440857, - "learning_rate": 7.87246231155779e-05, - "loss": 4.9412, - "step": 21677 - }, - { - "epoch": 11.30534550195567, - "grad_norm": 1.449505090713501, - "learning_rate": 7.872361809045227e-05, - "loss": 5.6699, - "step": 21678 - }, - { - "epoch": 11.30586701434159, - "grad_norm": 1.4392473697662354, - "learning_rate": 7.872261306532664e-05, - "loss": 5.4199, - "step": 21679 - }, - { - "epoch": 11.30638852672751, - "grad_norm": 1.3558123111724854, - "learning_rate": 7.872160804020101e-05, - "loss": 5.0571, - "step": 21680 - }, - { - "epoch": 11.30691003911343, - "grad_norm": 1.467471957206726, - "learning_rate": 7.872060301507538e-05, - "loss": 5.3993, - "step": 21681 - }, - { - "epoch": 11.307431551499349, - "grad_norm": 1.424646258354187, - "learning_rate": 7.871959798994976e-05, - "loss": 5.4294, - "step": 21682 - }, - { - "epoch": 11.307953063885268, - "grad_norm": 1.536030650138855, - "learning_rate": 7.871859296482412e-05, - "loss": 4.9608, - "step": 21683 - }, - { - "epoch": 11.308474576271186, - "grad_norm": 1.376602292060852, - "learning_rate": 7.87175879396985e-05, - "loss": 4.9608, - "step": 21684 - }, - { - "epoch": 11.308996088657105, - "grad_norm": 1.4355603456497192, - "learning_rate": 7.871658291457286e-05, - "loss": 5.4988, - "step": 21685 - }, - { - "epoch": 11.309517601043025, - "grad_norm": 1.4147093296051025, - "learning_rate": 7.871557788944724e-05, - "loss": 5.3931, - "step": 21686 - }, - { - "epoch": 11.310039113428944, - "grad_norm": 1.4983640909194946, - "learning_rate": 7.871457286432162e-05, - "loss": 5.3016, - "step": 21687 - }, - { - "epoch": 11.310560625814864, - "grad_norm": 1.3394490480422974, - "learning_rate": 7.871356783919598e-05, - "loss": 5.6722, - "step": 21688 - }, - { - "epoch": 11.311082138200783, - "grad_norm": 1.4607887268066406, - "learning_rate": 7.871256281407036e-05, - "loss": 5.5796, - "step": 21689 - }, - { - "epoch": 11.3116036505867, - "grad_norm": 1.5087865591049194, - "learning_rate": 7.871155778894472e-05, - "loss": 5.0257, - "step": 21690 - }, - { - "epoch": 11.31212516297262, - "grad_norm": 1.3865034580230713, - "learning_rate": 7.87105527638191e-05, - "loss": 5.2286, - "step": 21691 - }, - { - "epoch": 11.31264667535854, - "grad_norm": 1.4927884340286255, - "learning_rate": 7.870954773869347e-05, - "loss": 4.9123, - "step": 21692 - }, - { - "epoch": 11.31316818774446, - "grad_norm": 1.4468927383422852, - "learning_rate": 7.870854271356784e-05, - "loss": 5.5098, - "step": 21693 - }, - { - "epoch": 11.313689700130379, - "grad_norm": 1.4657273292541504, - "learning_rate": 7.870753768844221e-05, - "loss": 5.3475, - "step": 21694 - }, - { - "epoch": 11.314211212516298, - "grad_norm": 1.3471827507019043, - "learning_rate": 7.870653266331659e-05, - "loss": 5.3179, - "step": 21695 - }, - { - "epoch": 11.314732724902216, - "grad_norm": 1.4233967065811157, - "learning_rate": 7.870552763819095e-05, - "loss": 5.7189, - "step": 21696 - }, - { - "epoch": 11.315254237288135, - "grad_norm": 1.4787977933883667, - "learning_rate": 7.870452261306533e-05, - "loss": 5.2952, - "step": 21697 - }, - { - "epoch": 11.315775749674055, - "grad_norm": 1.5005193948745728, - "learning_rate": 7.87035175879397e-05, - "loss": 4.559, - "step": 21698 - }, - { - "epoch": 11.316297262059974, - "grad_norm": 1.4352010488510132, - "learning_rate": 7.870251256281408e-05, - "loss": 5.5067, - "step": 21699 - }, - { - "epoch": 11.316818774445894, - "grad_norm": 1.413405179977417, - "learning_rate": 7.870150753768845e-05, - "loss": 5.224, - "step": 21700 - }, - { - "epoch": 11.317340286831811, - "grad_norm": 1.4513120651245117, - "learning_rate": 7.870050251256281e-05, - "loss": 5.513, - "step": 21701 - }, - { - "epoch": 11.31786179921773, - "grad_norm": 1.5047459602355957, - "learning_rate": 7.869949748743719e-05, - "loss": 4.74, - "step": 21702 - }, - { - "epoch": 11.31838331160365, - "grad_norm": 1.3665696382522583, - "learning_rate": 7.869849246231156e-05, - "loss": 5.8602, - "step": 21703 - }, - { - "epoch": 11.31890482398957, - "grad_norm": 1.4468640089035034, - "learning_rate": 7.869748743718593e-05, - "loss": 5.6038, - "step": 21704 - }, - { - "epoch": 11.31942633637549, - "grad_norm": 1.374899983406067, - "learning_rate": 7.86964824120603e-05, - "loss": 5.7025, - "step": 21705 - }, - { - "epoch": 11.319947848761409, - "grad_norm": 1.351311206817627, - "learning_rate": 7.869547738693468e-05, - "loss": 5.7848, - "step": 21706 - }, - { - "epoch": 11.320469361147328, - "grad_norm": 1.4649688005447388, - "learning_rate": 7.869447236180905e-05, - "loss": 5.7967, - "step": 21707 - }, - { - "epoch": 11.320990873533246, - "grad_norm": 1.5314698219299316, - "learning_rate": 7.869346733668343e-05, - "loss": 5.2417, - "step": 21708 - }, - { - "epoch": 11.321512385919165, - "grad_norm": 1.2981817722320557, - "learning_rate": 7.86924623115578e-05, - "loss": 5.8456, - "step": 21709 - }, - { - "epoch": 11.322033898305085, - "grad_norm": 1.3368864059448242, - "learning_rate": 7.869145728643217e-05, - "loss": 5.6746, - "step": 21710 - }, - { - "epoch": 11.322555410691004, - "grad_norm": 1.3727753162384033, - "learning_rate": 7.869045226130654e-05, - "loss": 5.5538, - "step": 21711 - }, - { - "epoch": 11.323076923076924, - "grad_norm": 1.3732481002807617, - "learning_rate": 7.868944723618092e-05, - "loss": 5.4391, - "step": 21712 - }, - { - "epoch": 11.323598435462841, - "grad_norm": 1.5128899812698364, - "learning_rate": 7.868844221105528e-05, - "loss": 5.4505, - "step": 21713 - }, - { - "epoch": 11.32411994784876, - "grad_norm": 1.3202365636825562, - "learning_rate": 7.868743718592964e-05, - "loss": 5.7668, - "step": 21714 - }, - { - "epoch": 11.32464146023468, - "grad_norm": 1.5275702476501465, - "learning_rate": 7.868643216080402e-05, - "loss": 5.284, - "step": 21715 - }, - { - "epoch": 11.3251629726206, - "grad_norm": 1.443802833557129, - "learning_rate": 7.868542713567839e-05, - "loss": 5.4951, - "step": 21716 - }, - { - "epoch": 11.32568448500652, - "grad_norm": 1.5177580118179321, - "learning_rate": 7.868442211055276e-05, - "loss": 5.4356, - "step": 21717 - }, - { - "epoch": 11.326205997392439, - "grad_norm": 1.4694074392318726, - "learning_rate": 7.868341708542714e-05, - "loss": 5.3902, - "step": 21718 - }, - { - "epoch": 11.326727509778356, - "grad_norm": 1.3986836671829224, - "learning_rate": 7.868241206030152e-05, - "loss": 5.7161, - "step": 21719 - }, - { - "epoch": 11.327249022164276, - "grad_norm": 1.4091523885726929, - "learning_rate": 7.868140703517588e-05, - "loss": 5.0891, - "step": 21720 - }, - { - "epoch": 11.327770534550195, - "grad_norm": 1.4325882196426392, - "learning_rate": 7.868040201005026e-05, - "loss": 5.5224, - "step": 21721 - }, - { - "epoch": 11.328292046936115, - "grad_norm": 1.4647177457809448, - "learning_rate": 7.867939698492463e-05, - "loss": 5.4542, - "step": 21722 - }, - { - "epoch": 11.328813559322034, - "grad_norm": 1.5012428760528564, - "learning_rate": 7.8678391959799e-05, - "loss": 5.6321, - "step": 21723 - }, - { - "epoch": 11.329335071707954, - "grad_norm": 1.4447579383850098, - "learning_rate": 7.867738693467337e-05, - "loss": 5.6305, - "step": 21724 - }, - { - "epoch": 11.329856584093871, - "grad_norm": 1.4803937673568726, - "learning_rate": 7.867638190954775e-05, - "loss": 5.6418, - "step": 21725 - }, - { - "epoch": 11.33037809647979, - "grad_norm": 1.354620099067688, - "learning_rate": 7.867537688442211e-05, - "loss": 5.663, - "step": 21726 - }, - { - "epoch": 11.33089960886571, - "grad_norm": 1.5506181716918945, - "learning_rate": 7.867437185929647e-05, - "loss": 5.4349, - "step": 21727 - }, - { - "epoch": 11.33142112125163, - "grad_norm": 1.598701000213623, - "learning_rate": 7.867336683417085e-05, - "loss": 4.8045, - "step": 21728 - }, - { - "epoch": 11.33194263363755, - "grad_norm": 1.4568543434143066, - "learning_rate": 7.867236180904523e-05, - "loss": 5.1624, - "step": 21729 - }, - { - "epoch": 11.332464146023469, - "grad_norm": 1.4934104681015015, - "learning_rate": 7.867135678391961e-05, - "loss": 5.2705, - "step": 21730 - }, - { - "epoch": 11.332985658409386, - "grad_norm": 1.474877119064331, - "learning_rate": 7.867035175879397e-05, - "loss": 5.958, - "step": 21731 - }, - { - "epoch": 11.333507170795306, - "grad_norm": 1.546889066696167, - "learning_rate": 7.866934673366835e-05, - "loss": 5.0403, - "step": 21732 - }, - { - "epoch": 11.334028683181225, - "grad_norm": 1.4867258071899414, - "learning_rate": 7.866834170854271e-05, - "loss": 5.7254, - "step": 21733 - }, - { - "epoch": 11.334550195567145, - "grad_norm": 1.4174695014953613, - "learning_rate": 7.866733668341709e-05, - "loss": 5.8021, - "step": 21734 - }, - { - "epoch": 11.335071707953064, - "grad_norm": 1.4627094268798828, - "learning_rate": 7.866633165829146e-05, - "loss": 5.4129, - "step": 21735 - }, - { - "epoch": 11.335593220338984, - "grad_norm": 1.388047695159912, - "learning_rate": 7.866532663316583e-05, - "loss": 5.3995, - "step": 21736 - }, - { - "epoch": 11.336114732724901, - "grad_norm": 1.5661519765853882, - "learning_rate": 7.86643216080402e-05, - "loss": 5.5956, - "step": 21737 - }, - { - "epoch": 11.336636245110821, - "grad_norm": 1.5054123401641846, - "learning_rate": 7.866331658291458e-05, - "loss": 5.6972, - "step": 21738 - }, - { - "epoch": 11.33715775749674, - "grad_norm": 1.4591230154037476, - "learning_rate": 7.866231155778895e-05, - "loss": 5.5517, - "step": 21739 - }, - { - "epoch": 11.33767926988266, - "grad_norm": 1.5391422510147095, - "learning_rate": 7.866130653266333e-05, - "loss": 5.5024, - "step": 21740 - }, - { - "epoch": 11.33820078226858, - "grad_norm": 1.4838882684707642, - "learning_rate": 7.86603015075377e-05, - "loss": 5.5934, - "step": 21741 - }, - { - "epoch": 11.338722294654499, - "grad_norm": 1.5804290771484375, - "learning_rate": 7.865929648241206e-05, - "loss": 5.5129, - "step": 21742 - }, - { - "epoch": 11.339243807040416, - "grad_norm": 1.4329185485839844, - "learning_rate": 7.865829145728644e-05, - "loss": 5.4622, - "step": 21743 - }, - { - "epoch": 11.339765319426336, - "grad_norm": 1.607994556427002, - "learning_rate": 7.86572864321608e-05, - "loss": 5.1216, - "step": 21744 - }, - { - "epoch": 11.340286831812255, - "grad_norm": 1.56110417842865, - "learning_rate": 7.865628140703518e-05, - "loss": 5.0403, - "step": 21745 - }, - { - "epoch": 11.340808344198175, - "grad_norm": 1.485825777053833, - "learning_rate": 7.865527638190954e-05, - "loss": 5.1737, - "step": 21746 - }, - { - "epoch": 11.341329856584094, - "grad_norm": 1.5019627809524536, - "learning_rate": 7.865427135678392e-05, - "loss": 5.2306, - "step": 21747 - }, - { - "epoch": 11.341851368970014, - "grad_norm": 1.4020031690597534, - "learning_rate": 7.865326633165829e-05, - "loss": 5.5457, - "step": 21748 - }, - { - "epoch": 11.342372881355931, - "grad_norm": 1.5080572366714478, - "learning_rate": 7.865226130653266e-05, - "loss": 5.1665, - "step": 21749 - }, - { - "epoch": 11.342894393741851, - "grad_norm": 1.5478860139846802, - "learning_rate": 7.865125628140704e-05, - "loss": 5.1837, - "step": 21750 - }, - { - "epoch": 11.34341590612777, - "grad_norm": 1.5807262659072876, - "learning_rate": 7.865025125628142e-05, - "loss": 5.1167, - "step": 21751 - }, - { - "epoch": 11.34393741851369, - "grad_norm": 1.3690661191940308, - "learning_rate": 7.864924623115578e-05, - "loss": 5.7527, - "step": 21752 - }, - { - "epoch": 11.34445893089961, - "grad_norm": 1.3948677778244019, - "learning_rate": 7.864824120603016e-05, - "loss": 5.2899, - "step": 21753 - }, - { - "epoch": 11.344980443285529, - "grad_norm": 1.479723572731018, - "learning_rate": 7.864723618090453e-05, - "loss": 5.2025, - "step": 21754 - }, - { - "epoch": 11.345501955671446, - "grad_norm": 1.4443081617355347, - "learning_rate": 7.864623115577889e-05, - "loss": 5.3808, - "step": 21755 - }, - { - "epoch": 11.346023468057366, - "grad_norm": 1.3138716220855713, - "learning_rate": 7.864522613065327e-05, - "loss": 5.5581, - "step": 21756 - }, - { - "epoch": 11.346544980443285, - "grad_norm": 1.4755359888076782, - "learning_rate": 7.864422110552763e-05, - "loss": 5.3475, - "step": 21757 - }, - { - "epoch": 11.347066492829205, - "grad_norm": 1.474910855293274, - "learning_rate": 7.864321608040201e-05, - "loss": 5.6706, - "step": 21758 - }, - { - "epoch": 11.347588005215124, - "grad_norm": 1.5763154029846191, - "learning_rate": 7.864221105527639e-05, - "loss": 5.3254, - "step": 21759 - }, - { - "epoch": 11.348109517601044, - "grad_norm": 1.4158711433410645, - "learning_rate": 7.864120603015077e-05, - "loss": 5.7263, - "step": 21760 - }, - { - "epoch": 11.348631029986961, - "grad_norm": 1.505584716796875, - "learning_rate": 7.864020100502513e-05, - "loss": 5.2386, - "step": 21761 - }, - { - "epoch": 11.349152542372881, - "grad_norm": 1.570313811302185, - "learning_rate": 7.863919597989951e-05, - "loss": 4.9412, - "step": 21762 - }, - { - "epoch": 11.3496740547588, - "grad_norm": 1.4248522520065308, - "learning_rate": 7.863819095477387e-05, - "loss": 5.5037, - "step": 21763 - }, - { - "epoch": 11.35019556714472, - "grad_norm": 1.5762652158737183, - "learning_rate": 7.863718592964825e-05, - "loss": 5.6628, - "step": 21764 - }, - { - "epoch": 11.35071707953064, - "grad_norm": 1.420334815979004, - "learning_rate": 7.863618090452261e-05, - "loss": 5.3716, - "step": 21765 - }, - { - "epoch": 11.351238591916559, - "grad_norm": 1.5772536993026733, - "learning_rate": 7.863517587939699e-05, - "loss": 4.8715, - "step": 21766 - }, - { - "epoch": 11.351760104302477, - "grad_norm": 1.4493656158447266, - "learning_rate": 7.863417085427136e-05, - "loss": 5.4999, - "step": 21767 - }, - { - "epoch": 11.352281616688396, - "grad_norm": 1.503708839416504, - "learning_rate": 7.863316582914572e-05, - "loss": 5.5539, - "step": 21768 - }, - { - "epoch": 11.352803129074315, - "grad_norm": 1.3251762390136719, - "learning_rate": 7.86321608040201e-05, - "loss": 5.5219, - "step": 21769 - }, - { - "epoch": 11.353324641460235, - "grad_norm": 1.4438284635543823, - "learning_rate": 7.863115577889448e-05, - "loss": 5.6832, - "step": 21770 - }, - { - "epoch": 11.353846153846154, - "grad_norm": 1.3287664651870728, - "learning_rate": 7.863015075376885e-05, - "loss": 5.532, - "step": 21771 - }, - { - "epoch": 11.354367666232074, - "grad_norm": 1.435477614402771, - "learning_rate": 7.862914572864322e-05, - "loss": 5.456, - "step": 21772 - }, - { - "epoch": 11.354889178617992, - "grad_norm": 1.4021674394607544, - "learning_rate": 7.86281407035176e-05, - "loss": 5.4887, - "step": 21773 - }, - { - "epoch": 11.355410691003911, - "grad_norm": 1.4358917474746704, - "learning_rate": 7.862713567839196e-05, - "loss": 5.0422, - "step": 21774 - }, - { - "epoch": 11.35593220338983, - "grad_norm": 1.5566003322601318, - "learning_rate": 7.862613065326634e-05, - "loss": 5.6133, - "step": 21775 - }, - { - "epoch": 11.35645371577575, - "grad_norm": 1.541052222251892, - "learning_rate": 7.86251256281407e-05, - "loss": 5.7871, - "step": 21776 - }, - { - "epoch": 11.35697522816167, - "grad_norm": 1.4621762037277222, - "learning_rate": 7.862412060301508e-05, - "loss": 5.3893, - "step": 21777 - }, - { - "epoch": 11.357496740547589, - "grad_norm": 1.3973109722137451, - "learning_rate": 7.862311557788945e-05, - "loss": 5.664, - "step": 21778 - }, - { - "epoch": 11.358018252933507, - "grad_norm": 1.524572491645813, - "learning_rate": 7.862211055276382e-05, - "loss": 5.1073, - "step": 21779 - }, - { - "epoch": 11.358539765319426, - "grad_norm": 1.3713734149932861, - "learning_rate": 7.86211055276382e-05, - "loss": 5.4258, - "step": 21780 - }, - { - "epoch": 11.359061277705345, - "grad_norm": 1.4195119142532349, - "learning_rate": 7.862010050251257e-05, - "loss": 5.439, - "step": 21781 - }, - { - "epoch": 11.359582790091265, - "grad_norm": 1.53103506565094, - "learning_rate": 7.861909547738694e-05, - "loss": 5.4862, - "step": 21782 - }, - { - "epoch": 11.360104302477184, - "grad_norm": 1.4309606552124023, - "learning_rate": 7.861809045226131e-05, - "loss": 5.3696, - "step": 21783 - }, - { - "epoch": 11.360625814863104, - "grad_norm": 1.4213894605636597, - "learning_rate": 7.861708542713568e-05, - "loss": 5.4576, - "step": 21784 - }, - { - "epoch": 11.361147327249022, - "grad_norm": 1.374656081199646, - "learning_rate": 7.861608040201005e-05, - "loss": 5.7647, - "step": 21785 - }, - { - "epoch": 11.361668839634941, - "grad_norm": 1.4988603591918945, - "learning_rate": 7.861507537688443e-05, - "loss": 5.3268, - "step": 21786 - }, - { - "epoch": 11.36219035202086, - "grad_norm": 1.4622688293457031, - "learning_rate": 7.861407035175879e-05, - "loss": 5.49, - "step": 21787 - }, - { - "epoch": 11.36271186440678, - "grad_norm": 1.4135977029800415, - "learning_rate": 7.861306532663317e-05, - "loss": 5.6199, - "step": 21788 - }, - { - "epoch": 11.3632333767927, - "grad_norm": 1.5629712343215942, - "learning_rate": 7.861206030150753e-05, - "loss": 5.1384, - "step": 21789 - }, - { - "epoch": 11.363754889178619, - "grad_norm": 1.5867526531219482, - "learning_rate": 7.861105527638191e-05, - "loss": 5.4405, - "step": 21790 - }, - { - "epoch": 11.364276401564537, - "grad_norm": 1.436922550201416, - "learning_rate": 7.861005025125629e-05, - "loss": 5.7425, - "step": 21791 - }, - { - "epoch": 11.364797913950456, - "grad_norm": 1.553611159324646, - "learning_rate": 7.860904522613067e-05, - "loss": 5.6427, - "step": 21792 - }, - { - "epoch": 11.365319426336375, - "grad_norm": 1.5044598579406738, - "learning_rate": 7.860804020100503e-05, - "loss": 5.5306, - "step": 21793 - }, - { - "epoch": 11.365840938722295, - "grad_norm": 1.4946401119232178, - "learning_rate": 7.86070351758794e-05, - "loss": 5.1743, - "step": 21794 - }, - { - "epoch": 11.366362451108214, - "grad_norm": 1.4074593782424927, - "learning_rate": 7.860603015075377e-05, - "loss": 5.6987, - "step": 21795 - }, - { - "epoch": 11.366883963494132, - "grad_norm": 1.3871705532073975, - "learning_rate": 7.860502512562814e-05, - "loss": 5.4657, - "step": 21796 - }, - { - "epoch": 11.367405475880052, - "grad_norm": 1.6498055458068848, - "learning_rate": 7.860402010050252e-05, - "loss": 4.6426, - "step": 21797 - }, - { - "epoch": 11.367926988265971, - "grad_norm": 1.48219633102417, - "learning_rate": 7.860301507537688e-05, - "loss": 5.5364, - "step": 21798 - }, - { - "epoch": 11.36844850065189, - "grad_norm": 1.5038427114486694, - "learning_rate": 7.860201005025126e-05, - "loss": 5.696, - "step": 21799 - }, - { - "epoch": 11.36897001303781, - "grad_norm": 1.67229163646698, - "learning_rate": 7.860100502512564e-05, - "loss": 4.5538, - "step": 21800 - }, - { - "epoch": 11.36949152542373, - "grad_norm": 1.3851772546768188, - "learning_rate": 7.860000000000001e-05, - "loss": 5.6611, - "step": 21801 - }, - { - "epoch": 11.370013037809649, - "grad_norm": 1.435460090637207, - "learning_rate": 7.859899497487438e-05, - "loss": 5.5806, - "step": 21802 - }, - { - "epoch": 11.370534550195567, - "grad_norm": 1.468352198600769, - "learning_rate": 7.859798994974876e-05, - "loss": 5.782, - "step": 21803 - }, - { - "epoch": 11.371056062581486, - "grad_norm": 1.3749054670333862, - "learning_rate": 7.859698492462312e-05, - "loss": 5.3778, - "step": 21804 - }, - { - "epoch": 11.371577574967406, - "grad_norm": 1.5206177234649658, - "learning_rate": 7.85959798994975e-05, - "loss": 4.9551, - "step": 21805 - }, - { - "epoch": 11.372099087353325, - "grad_norm": 1.7995771169662476, - "learning_rate": 7.859497487437186e-05, - "loss": 5.3813, - "step": 21806 - }, - { - "epoch": 11.372620599739244, - "grad_norm": 1.4341408014297485, - "learning_rate": 7.859396984924623e-05, - "loss": 5.5137, - "step": 21807 - }, - { - "epoch": 11.373142112125162, - "grad_norm": 1.5919218063354492, - "learning_rate": 7.85929648241206e-05, - "loss": 5.2871, - "step": 21808 - }, - { - "epoch": 11.373663624511082, - "grad_norm": 1.3649134635925293, - "learning_rate": 7.859195979899497e-05, - "loss": 5.7053, - "step": 21809 - }, - { - "epoch": 11.374185136897001, - "grad_norm": 1.4382117986679077, - "learning_rate": 7.859095477386935e-05, - "loss": 5.5924, - "step": 21810 - }, - { - "epoch": 11.37470664928292, - "grad_norm": 1.4861161708831787, - "learning_rate": 7.858994974874372e-05, - "loss": 5.6273, - "step": 21811 - }, - { - "epoch": 11.37522816166884, - "grad_norm": 1.454245686531067, - "learning_rate": 7.85889447236181e-05, - "loss": 5.3579, - "step": 21812 - }, - { - "epoch": 11.37574967405476, - "grad_norm": 1.3954452276229858, - "learning_rate": 7.858793969849247e-05, - "loss": 5.6758, - "step": 21813 - }, - { - "epoch": 11.376271186440677, - "grad_norm": 1.3056309223175049, - "learning_rate": 7.858693467336684e-05, - "loss": 4.8902, - "step": 21814 - }, - { - "epoch": 11.376792698826597, - "grad_norm": 1.7619348764419556, - "learning_rate": 7.858592964824121e-05, - "loss": 4.7097, - "step": 21815 - }, - { - "epoch": 11.377314211212516, - "grad_norm": 1.486729383468628, - "learning_rate": 7.858492462311559e-05, - "loss": 5.2309, - "step": 21816 - }, - { - "epoch": 11.377835723598436, - "grad_norm": 1.4645698070526123, - "learning_rate": 7.858391959798995e-05, - "loss": 5.0711, - "step": 21817 - }, - { - "epoch": 11.378357235984355, - "grad_norm": 1.4453046321868896, - "learning_rate": 7.858291457286433e-05, - "loss": 5.8522, - "step": 21818 - }, - { - "epoch": 11.378878748370274, - "grad_norm": 1.4162838459014893, - "learning_rate": 7.858190954773869e-05, - "loss": 5.2711, - "step": 21819 - }, - { - "epoch": 11.379400260756192, - "grad_norm": 1.485290288925171, - "learning_rate": 7.858090452261307e-05, - "loss": 5.4127, - "step": 21820 - }, - { - "epoch": 11.379921773142112, - "grad_norm": 1.431525468826294, - "learning_rate": 7.857989949748745e-05, - "loss": 5.538, - "step": 21821 - }, - { - "epoch": 11.380443285528031, - "grad_norm": 1.4927219152450562, - "learning_rate": 7.857889447236181e-05, - "loss": 5.3559, - "step": 21822 - }, - { - "epoch": 11.38096479791395, - "grad_norm": 1.4848524332046509, - "learning_rate": 7.857788944723619e-05, - "loss": 5.1884, - "step": 21823 - }, - { - "epoch": 11.38148631029987, - "grad_norm": 1.477676510810852, - "learning_rate": 7.857688442211055e-05, - "loss": 5.468, - "step": 21824 - }, - { - "epoch": 11.38200782268579, - "grad_norm": 1.532486915588379, - "learning_rate": 7.857587939698493e-05, - "loss": 5.3404, - "step": 21825 - }, - { - "epoch": 11.382529335071707, - "grad_norm": 1.521554708480835, - "learning_rate": 7.85748743718593e-05, - "loss": 5.4773, - "step": 21826 - }, - { - "epoch": 11.383050847457627, - "grad_norm": 1.3908954858779907, - "learning_rate": 7.857386934673367e-05, - "loss": 5.3834, - "step": 21827 - }, - { - "epoch": 11.383572359843546, - "grad_norm": 1.4814939498901367, - "learning_rate": 7.857286432160804e-05, - "loss": 5.4129, - "step": 21828 - }, - { - "epoch": 11.384093872229466, - "grad_norm": 1.4466344118118286, - "learning_rate": 7.857185929648242e-05, - "loss": 5.5475, - "step": 21829 - }, - { - "epoch": 11.384615384615385, - "grad_norm": 1.3979688882827759, - "learning_rate": 7.857085427135678e-05, - "loss": 5.4978, - "step": 21830 - }, - { - "epoch": 11.385136897001304, - "grad_norm": 1.3965809345245361, - "learning_rate": 7.856984924623116e-05, - "loss": 5.3038, - "step": 21831 - }, - { - "epoch": 11.385658409387222, - "grad_norm": 1.5357191562652588, - "learning_rate": 7.856884422110554e-05, - "loss": 5.3165, - "step": 21832 - }, - { - "epoch": 11.386179921773142, - "grad_norm": 1.5139240026474, - "learning_rate": 7.856783919597991e-05, - "loss": 5.3878, - "step": 21833 - }, - { - "epoch": 11.386701434159061, - "grad_norm": 1.419657588005066, - "learning_rate": 7.856683417085428e-05, - "loss": 5.2234, - "step": 21834 - }, - { - "epoch": 11.38722294654498, - "grad_norm": 1.4303820133209229, - "learning_rate": 7.856582914572864e-05, - "loss": 5.4119, - "step": 21835 - }, - { - "epoch": 11.3877444589309, - "grad_norm": 1.442871332168579, - "learning_rate": 7.856482412060302e-05, - "loss": 5.762, - "step": 21836 - }, - { - "epoch": 11.38826597131682, - "grad_norm": 1.3156871795654297, - "learning_rate": 7.856381909547738e-05, - "loss": 5.8422, - "step": 21837 - }, - { - "epoch": 11.388787483702737, - "grad_norm": 1.4140875339508057, - "learning_rate": 7.856281407035176e-05, - "loss": 5.3877, - "step": 21838 - }, - { - "epoch": 11.389308996088657, - "grad_norm": 1.5652005672454834, - "learning_rate": 7.856180904522613e-05, - "loss": 5.6054, - "step": 21839 - }, - { - "epoch": 11.389830508474576, - "grad_norm": 1.6796746253967285, - "learning_rate": 7.85608040201005e-05, - "loss": 4.9087, - "step": 21840 - }, - { - "epoch": 11.390352020860496, - "grad_norm": 1.5600570440292358, - "learning_rate": 7.855979899497488e-05, - "loss": 5.1941, - "step": 21841 - }, - { - "epoch": 11.390873533246415, - "grad_norm": 1.4330295324325562, - "learning_rate": 7.855879396984926e-05, - "loss": 5.2039, - "step": 21842 - }, - { - "epoch": 11.391395045632335, - "grad_norm": 1.459499716758728, - "learning_rate": 7.855778894472362e-05, - "loss": 5.3959, - "step": 21843 - }, - { - "epoch": 11.391916558018252, - "grad_norm": 1.445096731185913, - "learning_rate": 7.8556783919598e-05, - "loss": 5.6705, - "step": 21844 - }, - { - "epoch": 11.392438070404172, - "grad_norm": 1.4754167795181274, - "learning_rate": 7.855577889447237e-05, - "loss": 5.7487, - "step": 21845 - }, - { - "epoch": 11.392959582790091, - "grad_norm": 1.4088784456253052, - "learning_rate": 7.855477386934674e-05, - "loss": 5.6059, - "step": 21846 - }, - { - "epoch": 11.39348109517601, - "grad_norm": 1.451524019241333, - "learning_rate": 7.855376884422111e-05, - "loss": 5.5125, - "step": 21847 - }, - { - "epoch": 11.39400260756193, - "grad_norm": 1.381650447845459, - "learning_rate": 7.855276381909547e-05, - "loss": 5.8576, - "step": 21848 - }, - { - "epoch": 11.39452411994785, - "grad_norm": 1.431604027748108, - "learning_rate": 7.855175879396985e-05, - "loss": 5.5235, - "step": 21849 - }, - { - "epoch": 11.395045632333767, - "grad_norm": 1.8606265783309937, - "learning_rate": 7.855075376884422e-05, - "loss": 4.645, - "step": 21850 - }, - { - "epoch": 11.395567144719687, - "grad_norm": 1.3166486024856567, - "learning_rate": 7.854974874371859e-05, - "loss": 5.3334, - "step": 21851 - }, - { - "epoch": 11.396088657105606, - "grad_norm": 1.5102750062942505, - "learning_rate": 7.854874371859297e-05, - "loss": 5.2965, - "step": 21852 - }, - { - "epoch": 11.396610169491526, - "grad_norm": 1.6603626012802124, - "learning_rate": 7.854773869346735e-05, - "loss": 5.3292, - "step": 21853 - }, - { - "epoch": 11.397131681877445, - "grad_norm": 1.5495054721832275, - "learning_rate": 7.854673366834171e-05, - "loss": 4.6729, - "step": 21854 - }, - { - "epoch": 11.397653194263365, - "grad_norm": 1.3960696458816528, - "learning_rate": 7.854572864321609e-05, - "loss": 5.5745, - "step": 21855 - }, - { - "epoch": 11.398174706649282, - "grad_norm": 1.5293467044830322, - "learning_rate": 7.854472361809045e-05, - "loss": 5.0175, - "step": 21856 - }, - { - "epoch": 11.398696219035202, - "grad_norm": 1.3938359022140503, - "learning_rate": 7.854371859296483e-05, - "loss": 5.7647, - "step": 21857 - }, - { - "epoch": 11.399217731421121, - "grad_norm": 1.3842154741287231, - "learning_rate": 7.85427135678392e-05, - "loss": 5.5669, - "step": 21858 - }, - { - "epoch": 11.39973924380704, - "grad_norm": 1.4969512224197388, - "learning_rate": 7.854170854271357e-05, - "loss": 5.4558, - "step": 21859 - }, - { - "epoch": 11.40026075619296, - "grad_norm": 1.4498741626739502, - "learning_rate": 7.854070351758794e-05, - "loss": 5.1766, - "step": 21860 - }, - { - "epoch": 11.40078226857888, - "grad_norm": 1.4000742435455322, - "learning_rate": 7.853969849246232e-05, - "loss": 5.7034, - "step": 21861 - }, - { - "epoch": 11.401303780964797, - "grad_norm": 1.4385626316070557, - "learning_rate": 7.85386934673367e-05, - "loss": 5.0622, - "step": 21862 - }, - { - "epoch": 11.401825293350717, - "grad_norm": 1.429851770401001, - "learning_rate": 7.853768844221106e-05, - "loss": 5.2725, - "step": 21863 - }, - { - "epoch": 11.402346805736636, - "grad_norm": 1.3781784772872925, - "learning_rate": 7.853668341708544e-05, - "loss": 4.6698, - "step": 21864 - }, - { - "epoch": 11.402868318122556, - "grad_norm": 1.3589377403259277, - "learning_rate": 7.85356783919598e-05, - "loss": 5.7674, - "step": 21865 - }, - { - "epoch": 11.403389830508475, - "grad_norm": 1.401558518409729, - "learning_rate": 7.853467336683418e-05, - "loss": 5.6803, - "step": 21866 - }, - { - "epoch": 11.403911342894395, - "grad_norm": 1.6059449911117554, - "learning_rate": 7.853366834170854e-05, - "loss": 5.2274, - "step": 21867 - }, - { - "epoch": 11.404432855280312, - "grad_norm": 1.5354362726211548, - "learning_rate": 7.853266331658292e-05, - "loss": 5.1094, - "step": 21868 - }, - { - "epoch": 11.404954367666232, - "grad_norm": 1.4055652618408203, - "learning_rate": 7.853165829145729e-05, - "loss": 5.5948, - "step": 21869 - }, - { - "epoch": 11.405475880052151, - "grad_norm": 1.3795655965805054, - "learning_rate": 7.853065326633166e-05, - "loss": 5.7219, - "step": 21870 - }, - { - "epoch": 11.40599739243807, - "grad_norm": 1.529893398284912, - "learning_rate": 7.852964824120603e-05, - "loss": 5.1536, - "step": 21871 - }, - { - "epoch": 11.40651890482399, - "grad_norm": 1.4744333028793335, - "learning_rate": 7.85286432160804e-05, - "loss": 5.484, - "step": 21872 - }, - { - "epoch": 11.40704041720991, - "grad_norm": 1.5490103960037231, - "learning_rate": 7.852763819095478e-05, - "loss": 5.4271, - "step": 21873 - }, - { - "epoch": 11.407561929595827, - "grad_norm": 1.3909502029418945, - "learning_rate": 7.852663316582915e-05, - "loss": 5.4237, - "step": 21874 - }, - { - "epoch": 11.408083441981747, - "grad_norm": 1.4503576755523682, - "learning_rate": 7.852562814070353e-05, - "loss": 6.0101, - "step": 21875 - }, - { - "epoch": 11.408604954367666, - "grad_norm": 1.3577269315719604, - "learning_rate": 7.852462311557789e-05, - "loss": 5.8518, - "step": 21876 - }, - { - "epoch": 11.409126466753586, - "grad_norm": 1.4512319564819336, - "learning_rate": 7.852361809045227e-05, - "loss": 5.3647, - "step": 21877 - }, - { - "epoch": 11.409647979139505, - "grad_norm": 1.5142520666122437, - "learning_rate": 7.852261306532663e-05, - "loss": 5.222, - "step": 21878 - }, - { - "epoch": 11.410169491525423, - "grad_norm": 1.4484385251998901, - "learning_rate": 7.852160804020101e-05, - "loss": 5.2293, - "step": 21879 - }, - { - "epoch": 11.410691003911342, - "grad_norm": 1.5830174684524536, - "learning_rate": 7.852060301507537e-05, - "loss": 5.0627, - "step": 21880 - }, - { - "epoch": 11.411212516297262, - "grad_norm": 1.4563528299331665, - "learning_rate": 7.851959798994975e-05, - "loss": 5.2313, - "step": 21881 - }, - { - "epoch": 11.411734028683181, - "grad_norm": 1.4964964389801025, - "learning_rate": 7.851859296482412e-05, - "loss": 5.5, - "step": 21882 - }, - { - "epoch": 11.4122555410691, - "grad_norm": 1.5114717483520508, - "learning_rate": 7.85175879396985e-05, - "loss": 5.3744, - "step": 21883 - }, - { - "epoch": 11.41277705345502, - "grad_norm": 1.4424583911895752, - "learning_rate": 7.851658291457287e-05, - "loss": 5.4012, - "step": 21884 - }, - { - "epoch": 11.41329856584094, - "grad_norm": 1.5589587688446045, - "learning_rate": 7.851557788944725e-05, - "loss": 5.0574, - "step": 21885 - }, - { - "epoch": 11.413820078226857, - "grad_norm": 1.456688404083252, - "learning_rate": 7.851457286432161e-05, - "loss": 5.8742, - "step": 21886 - }, - { - "epoch": 11.414341590612777, - "grad_norm": 1.5405702590942383, - "learning_rate": 7.851356783919598e-05, - "loss": 5.1262, - "step": 21887 - }, - { - "epoch": 11.414863102998696, - "grad_norm": 1.4275109767913818, - "learning_rate": 7.851256281407036e-05, - "loss": 5.5369, - "step": 21888 - }, - { - "epoch": 11.415384615384616, - "grad_norm": 1.5039089918136597, - "learning_rate": 7.851155778894472e-05, - "loss": 5.7804, - "step": 21889 - }, - { - "epoch": 11.415906127770535, - "grad_norm": 1.86427640914917, - "learning_rate": 7.85105527638191e-05, - "loss": 5.408, - "step": 21890 - }, - { - "epoch": 11.416427640156453, - "grad_norm": 1.422865390777588, - "learning_rate": 7.850954773869346e-05, - "loss": 4.9773, - "step": 21891 - }, - { - "epoch": 11.416949152542372, - "grad_norm": 1.4872196912765503, - "learning_rate": 7.850854271356784e-05, - "loss": 5.5224, - "step": 21892 - }, - { - "epoch": 11.417470664928292, - "grad_norm": 1.4279751777648926, - "learning_rate": 7.850753768844222e-05, - "loss": 5.5532, - "step": 21893 - }, - { - "epoch": 11.417992177314211, - "grad_norm": 1.4344799518585205, - "learning_rate": 7.85065326633166e-05, - "loss": 5.7818, - "step": 21894 - }, - { - "epoch": 11.41851368970013, - "grad_norm": 1.5251939296722412, - "learning_rate": 7.850552763819096e-05, - "loss": 5.1791, - "step": 21895 - }, - { - "epoch": 11.41903520208605, - "grad_norm": 1.4395220279693604, - "learning_rate": 7.850452261306534e-05, - "loss": 5.4745, - "step": 21896 - }, - { - "epoch": 11.419556714471968, - "grad_norm": 1.3906700611114502, - "learning_rate": 7.85035175879397e-05, - "loss": 5.7328, - "step": 21897 - }, - { - "epoch": 11.420078226857887, - "grad_norm": 1.5754047632217407, - "learning_rate": 7.850251256281408e-05, - "loss": 4.9641, - "step": 21898 - }, - { - "epoch": 11.420599739243807, - "grad_norm": 1.495476245880127, - "learning_rate": 7.850150753768844e-05, - "loss": 5.65, - "step": 21899 - }, - { - "epoch": 11.421121251629726, - "grad_norm": 1.574713110923767, - "learning_rate": 7.850050251256281e-05, - "loss": 5.4676, - "step": 21900 - }, - { - "epoch": 11.421642764015646, - "grad_norm": 1.4176820516586304, - "learning_rate": 7.849949748743719e-05, - "loss": 5.3262, - "step": 21901 - }, - { - "epoch": 11.422164276401565, - "grad_norm": 1.3127020597457886, - "learning_rate": 7.849849246231155e-05, - "loss": 5.4166, - "step": 21902 - }, - { - "epoch": 11.422685788787483, - "grad_norm": 1.3340725898742676, - "learning_rate": 7.849748743718593e-05, - "loss": 5.672, - "step": 21903 - }, - { - "epoch": 11.423207301173402, - "grad_norm": 1.473162293434143, - "learning_rate": 7.84964824120603e-05, - "loss": 5.5737, - "step": 21904 - }, - { - "epoch": 11.423728813559322, - "grad_norm": 1.3909664154052734, - "learning_rate": 7.849547738693468e-05, - "loss": 5.5645, - "step": 21905 - }, - { - "epoch": 11.424250325945241, - "grad_norm": 1.3671730756759644, - "learning_rate": 7.849447236180905e-05, - "loss": 5.635, - "step": 21906 - }, - { - "epoch": 11.42477183833116, - "grad_norm": 1.401138186454773, - "learning_rate": 7.849346733668343e-05, - "loss": 4.7777, - "step": 21907 - }, - { - "epoch": 11.42529335071708, - "grad_norm": 1.3124910593032837, - "learning_rate": 7.849246231155779e-05, - "loss": 5.6879, - "step": 21908 - }, - { - "epoch": 11.425814863102998, - "grad_norm": 1.3680022954940796, - "learning_rate": 7.849145728643217e-05, - "loss": 5.0888, - "step": 21909 - }, - { - "epoch": 11.426336375488917, - "grad_norm": 1.4863232374191284, - "learning_rate": 7.849045226130653e-05, - "loss": 5.4867, - "step": 21910 - }, - { - "epoch": 11.426857887874837, - "grad_norm": 1.4337248802185059, - "learning_rate": 7.848944723618091e-05, - "loss": 5.6014, - "step": 21911 - }, - { - "epoch": 11.427379400260756, - "grad_norm": 1.519982099533081, - "learning_rate": 7.848844221105527e-05, - "loss": 5.2844, - "step": 21912 - }, - { - "epoch": 11.427900912646676, - "grad_norm": 1.5238186120986938, - "learning_rate": 7.848743718592965e-05, - "loss": 5.4097, - "step": 21913 - }, - { - "epoch": 11.428422425032595, - "grad_norm": 1.7384514808654785, - "learning_rate": 7.848643216080403e-05, - "loss": 4.866, - "step": 21914 - }, - { - "epoch": 11.428943937418513, - "grad_norm": 1.3947458267211914, - "learning_rate": 7.84854271356784e-05, - "loss": 5.7437, - "step": 21915 - }, - { - "epoch": 11.429465449804432, - "grad_norm": 1.486433506011963, - "learning_rate": 7.848442211055277e-05, - "loss": 4.9929, - "step": 21916 - }, - { - "epoch": 11.429986962190352, - "grad_norm": 1.4421298503875732, - "learning_rate": 7.848341708542714e-05, - "loss": 5.6647, - "step": 21917 - }, - { - "epoch": 11.430508474576271, - "grad_norm": 1.50239098072052, - "learning_rate": 7.848241206030151e-05, - "loss": 5.3928, - "step": 21918 - }, - { - "epoch": 11.43102998696219, - "grad_norm": 1.48284912109375, - "learning_rate": 7.848140703517588e-05, - "loss": 5.6424, - "step": 21919 - }, - { - "epoch": 11.43155149934811, - "grad_norm": 1.4868388175964355, - "learning_rate": 7.848040201005026e-05, - "loss": 5.7844, - "step": 21920 - }, - { - "epoch": 11.432073011734028, - "grad_norm": 1.4461861848831177, - "learning_rate": 7.847939698492462e-05, - "loss": 5.4306, - "step": 21921 - }, - { - "epoch": 11.432594524119947, - "grad_norm": 1.44652259349823, - "learning_rate": 7.8478391959799e-05, - "loss": 5.4135, - "step": 21922 - }, - { - "epoch": 11.433116036505867, - "grad_norm": 1.4737640619277954, - "learning_rate": 7.847738693467336e-05, - "loss": 5.2075, - "step": 21923 - }, - { - "epoch": 11.433637548891786, - "grad_norm": 1.3885012865066528, - "learning_rate": 7.847638190954774e-05, - "loss": 5.4222, - "step": 21924 - }, - { - "epoch": 11.434159061277706, - "grad_norm": 1.3937768936157227, - "learning_rate": 7.847537688442212e-05, - "loss": 5.1805, - "step": 21925 - }, - { - "epoch": 11.434680573663625, - "grad_norm": 1.5210520029067993, - "learning_rate": 7.84743718592965e-05, - "loss": 5.5378, - "step": 21926 - }, - { - "epoch": 11.435202086049543, - "grad_norm": 1.407549262046814, - "learning_rate": 7.847336683417086e-05, - "loss": 5.6436, - "step": 21927 - }, - { - "epoch": 11.435723598435462, - "grad_norm": 1.499626636505127, - "learning_rate": 7.847236180904522e-05, - "loss": 4.9547, - "step": 21928 - }, - { - "epoch": 11.436245110821382, - "grad_norm": 1.4379938840866089, - "learning_rate": 7.84713567839196e-05, - "loss": 5.7285, - "step": 21929 - }, - { - "epoch": 11.436766623207301, - "grad_norm": 1.5242055654525757, - "learning_rate": 7.847035175879397e-05, - "loss": 5.1968, - "step": 21930 - }, - { - "epoch": 11.43728813559322, - "grad_norm": 1.397607445716858, - "learning_rate": 7.846934673366834e-05, - "loss": 5.575, - "step": 21931 - }, - { - "epoch": 11.43780964797914, - "grad_norm": 1.4581818580627441, - "learning_rate": 7.846834170854271e-05, - "loss": 5.6431, - "step": 21932 - }, - { - "epoch": 11.438331160365058, - "grad_norm": 1.420741081237793, - "learning_rate": 7.846733668341709e-05, - "loss": 5.6763, - "step": 21933 - }, - { - "epoch": 11.438852672750977, - "grad_norm": 1.4887621402740479, - "learning_rate": 7.846633165829146e-05, - "loss": 5.1734, - "step": 21934 - }, - { - "epoch": 11.439374185136897, - "grad_norm": 1.5526282787322998, - "learning_rate": 7.846532663316584e-05, - "loss": 5.5498, - "step": 21935 - }, - { - "epoch": 11.439895697522816, - "grad_norm": 1.5617296695709229, - "learning_rate": 7.846432160804021e-05, - "loss": 5.272, - "step": 21936 - }, - { - "epoch": 11.440417209908736, - "grad_norm": 1.4087352752685547, - "learning_rate": 7.846331658291458e-05, - "loss": 4.9606, - "step": 21937 - }, - { - "epoch": 11.440938722294655, - "grad_norm": 1.5483160018920898, - "learning_rate": 7.846231155778895e-05, - "loss": 5.1717, - "step": 21938 - }, - { - "epoch": 11.441460234680573, - "grad_norm": 1.4929463863372803, - "learning_rate": 7.846130653266333e-05, - "loss": 5.5545, - "step": 21939 - }, - { - "epoch": 11.441981747066492, - "grad_norm": 1.5375516414642334, - "learning_rate": 7.846030150753769e-05, - "loss": 5.4777, - "step": 21940 - }, - { - "epoch": 11.442503259452412, - "grad_norm": 1.5398472547531128, - "learning_rate": 7.845929648241206e-05, - "loss": 5.268, - "step": 21941 - }, - { - "epoch": 11.443024771838331, - "grad_norm": 1.5212663412094116, - "learning_rate": 7.845829145728643e-05, - "loss": 5.3657, - "step": 21942 - }, - { - "epoch": 11.44354628422425, - "grad_norm": 1.4740883111953735, - "learning_rate": 7.84572864321608e-05, - "loss": 5.2739, - "step": 21943 - }, - { - "epoch": 11.44406779661017, - "grad_norm": 1.486876130104065, - "learning_rate": 7.845628140703518e-05, - "loss": 5.0599, - "step": 21944 - }, - { - "epoch": 11.444589308996088, - "grad_norm": 1.3826375007629395, - "learning_rate": 7.845527638190955e-05, - "loss": 5.5315, - "step": 21945 - }, - { - "epoch": 11.445110821382007, - "grad_norm": 1.4849064350128174, - "learning_rate": 7.845427135678393e-05, - "loss": 5.6089, - "step": 21946 - }, - { - "epoch": 11.445632333767927, - "grad_norm": 1.4315204620361328, - "learning_rate": 7.84532663316583e-05, - "loss": 5.4314, - "step": 21947 - }, - { - "epoch": 11.446153846153846, - "grad_norm": 1.5106080770492554, - "learning_rate": 7.845226130653267e-05, - "loss": 5.0167, - "step": 21948 - }, - { - "epoch": 11.446675358539766, - "grad_norm": 1.351318359375, - "learning_rate": 7.845125628140704e-05, - "loss": 5.6514, - "step": 21949 - }, - { - "epoch": 11.447196870925685, - "grad_norm": 1.4929858446121216, - "learning_rate": 7.845025125628142e-05, - "loss": 5.6561, - "step": 21950 - }, - { - "epoch": 11.447718383311603, - "grad_norm": 1.4310219287872314, - "learning_rate": 7.844924623115578e-05, - "loss": 5.1796, - "step": 21951 - }, - { - "epoch": 11.448239895697522, - "grad_norm": 1.431115984916687, - "learning_rate": 7.844824120603016e-05, - "loss": 5.4759, - "step": 21952 - }, - { - "epoch": 11.448761408083442, - "grad_norm": 1.5305322408676147, - "learning_rate": 7.844723618090452e-05, - "loss": 5.3751, - "step": 21953 - }, - { - "epoch": 11.449282920469361, - "grad_norm": 1.6017018556594849, - "learning_rate": 7.84462311557789e-05, - "loss": 5.2726, - "step": 21954 - }, - { - "epoch": 11.44980443285528, - "grad_norm": 1.3883600234985352, - "learning_rate": 7.844522613065328e-05, - "loss": 5.8804, - "step": 21955 - }, - { - "epoch": 11.4503259452412, - "grad_norm": 1.4482166767120361, - "learning_rate": 7.844422110552764e-05, - "loss": 5.6237, - "step": 21956 - }, - { - "epoch": 11.450847457627118, - "grad_norm": 1.577818751335144, - "learning_rate": 7.844321608040202e-05, - "loss": 5.4012, - "step": 21957 - }, - { - "epoch": 11.451368970013037, - "grad_norm": 1.3832038640975952, - "learning_rate": 7.844221105527638e-05, - "loss": 5.957, - "step": 21958 - }, - { - "epoch": 11.451890482398957, - "grad_norm": 1.57763671875, - "learning_rate": 7.844120603015076e-05, - "loss": 5.1847, - "step": 21959 - }, - { - "epoch": 11.452411994784876, - "grad_norm": 1.4948711395263672, - "learning_rate": 7.844020100502513e-05, - "loss": 5.1108, - "step": 21960 - }, - { - "epoch": 11.452933507170796, - "grad_norm": 1.3991780281066895, - "learning_rate": 7.84391959798995e-05, - "loss": 5.735, - "step": 21961 - }, - { - "epoch": 11.453455019556715, - "grad_norm": 1.4345166683197021, - "learning_rate": 7.843819095477387e-05, - "loss": 5.6279, - "step": 21962 - }, - { - "epoch": 11.453976531942633, - "grad_norm": 1.4509143829345703, - "learning_rate": 7.843718592964825e-05, - "loss": 5.5996, - "step": 21963 - }, - { - "epoch": 11.454498044328552, - "grad_norm": 1.4207608699798584, - "learning_rate": 7.843618090452261e-05, - "loss": 5.5298, - "step": 21964 - }, - { - "epoch": 11.455019556714472, - "grad_norm": 1.372391700744629, - "learning_rate": 7.843517587939699e-05, - "loss": 5.8364, - "step": 21965 - }, - { - "epoch": 11.455541069100391, - "grad_norm": 1.3441556692123413, - "learning_rate": 7.843417085427137e-05, - "loss": 5.7855, - "step": 21966 - }, - { - "epoch": 11.45606258148631, - "grad_norm": 1.3344390392303467, - "learning_rate": 7.843316582914573e-05, - "loss": 5.7719, - "step": 21967 - }, - { - "epoch": 11.45658409387223, - "grad_norm": 1.4809998273849487, - "learning_rate": 7.843216080402011e-05, - "loss": 5.5408, - "step": 21968 - }, - { - "epoch": 11.457105606258148, - "grad_norm": 1.4344843626022339, - "learning_rate": 7.843115577889447e-05, - "loss": 5.1593, - "step": 21969 - }, - { - "epoch": 11.457627118644067, - "grad_norm": 1.5232303142547607, - "learning_rate": 7.843015075376885e-05, - "loss": 5.534, - "step": 21970 - }, - { - "epoch": 11.458148631029987, - "grad_norm": 1.638791799545288, - "learning_rate": 7.842914572864321e-05, - "loss": 5.609, - "step": 21971 - }, - { - "epoch": 11.458670143415906, - "grad_norm": 1.4142677783966064, - "learning_rate": 7.842814070351759e-05, - "loss": 5.4502, - "step": 21972 - }, - { - "epoch": 11.459191655801826, - "grad_norm": 1.415374994277954, - "learning_rate": 7.842713567839196e-05, - "loss": 5.2744, - "step": 21973 - }, - { - "epoch": 11.459713168187744, - "grad_norm": 1.3498796224594116, - "learning_rate": 7.842613065326633e-05, - "loss": 5.5606, - "step": 21974 - }, - { - "epoch": 11.460234680573663, - "grad_norm": 1.375058650970459, - "learning_rate": 7.842512562814071e-05, - "loss": 5.6306, - "step": 21975 - }, - { - "epoch": 11.460756192959582, - "grad_norm": 1.4073662757873535, - "learning_rate": 7.842412060301509e-05, - "loss": 5.9595, - "step": 21976 - }, - { - "epoch": 11.461277705345502, - "grad_norm": 1.4929685592651367, - "learning_rate": 7.842311557788945e-05, - "loss": 5.3015, - "step": 21977 - }, - { - "epoch": 11.461799217731421, - "grad_norm": 1.4289606809616089, - "learning_rate": 7.842211055276383e-05, - "loss": 5.7435, - "step": 21978 - }, - { - "epoch": 11.46232073011734, - "grad_norm": 1.362449049949646, - "learning_rate": 7.84211055276382e-05, - "loss": 5.4133, - "step": 21979 - }, - { - "epoch": 11.46284224250326, - "grad_norm": 1.3966174125671387, - "learning_rate": 7.842010050251256e-05, - "loss": 5.3964, - "step": 21980 - }, - { - "epoch": 11.463363754889178, - "grad_norm": 1.4034438133239746, - "learning_rate": 7.841909547738694e-05, - "loss": 5.5037, - "step": 21981 - }, - { - "epoch": 11.463885267275097, - "grad_norm": 1.4697024822235107, - "learning_rate": 7.84180904522613e-05, - "loss": 5.2215, - "step": 21982 - }, - { - "epoch": 11.464406779661017, - "grad_norm": 1.5010675191879272, - "learning_rate": 7.841708542713568e-05, - "loss": 5.6836, - "step": 21983 - }, - { - "epoch": 11.464928292046936, - "grad_norm": 1.4661678075790405, - "learning_rate": 7.841608040201004e-05, - "loss": 5.1944, - "step": 21984 - }, - { - "epoch": 11.465449804432856, - "grad_norm": 1.4996755123138428, - "learning_rate": 7.841507537688442e-05, - "loss": 5.5073, - "step": 21985 - }, - { - "epoch": 11.465971316818774, - "grad_norm": 1.4562228918075562, - "learning_rate": 7.84140703517588e-05, - "loss": 5.6547, - "step": 21986 - }, - { - "epoch": 11.466492829204693, - "grad_norm": 1.537667989730835, - "learning_rate": 7.841306532663318e-05, - "loss": 5.3058, - "step": 21987 - }, - { - "epoch": 11.467014341590613, - "grad_norm": 1.4160059690475464, - "learning_rate": 7.841206030150754e-05, - "loss": 5.271, - "step": 21988 - }, - { - "epoch": 11.467535853976532, - "grad_norm": 1.657823085784912, - "learning_rate": 7.841105527638192e-05, - "loss": 4.6542, - "step": 21989 - }, - { - "epoch": 11.468057366362451, - "grad_norm": 1.599209189414978, - "learning_rate": 7.841005025125628e-05, - "loss": 5.1911, - "step": 21990 - }, - { - "epoch": 11.468578878748371, - "grad_norm": 1.5031826496124268, - "learning_rate": 7.840904522613066e-05, - "loss": 5.5607, - "step": 21991 - }, - { - "epoch": 11.469100391134289, - "grad_norm": 1.3519002199172974, - "learning_rate": 7.840804020100503e-05, - "loss": 5.4351, - "step": 21992 - }, - { - "epoch": 11.469621903520208, - "grad_norm": 1.4788060188293457, - "learning_rate": 7.840703517587939e-05, - "loss": 4.8278, - "step": 21993 - }, - { - "epoch": 11.470143415906128, - "grad_norm": 1.407860517501831, - "learning_rate": 7.840603015075377e-05, - "loss": 5.2593, - "step": 21994 - }, - { - "epoch": 11.470664928292047, - "grad_norm": 1.671431303024292, - "learning_rate": 7.840502512562815e-05, - "loss": 5.0801, - "step": 21995 - }, - { - "epoch": 11.471186440677966, - "grad_norm": 1.5901298522949219, - "learning_rate": 7.840402010050252e-05, - "loss": 5.3414, - "step": 21996 - }, - { - "epoch": 11.471707953063886, - "grad_norm": 1.4328685998916626, - "learning_rate": 7.840301507537689e-05, - "loss": 5.477, - "step": 21997 - }, - { - "epoch": 11.472229465449804, - "grad_norm": 1.444889783859253, - "learning_rate": 7.840201005025127e-05, - "loss": 5.3084, - "step": 21998 - }, - { - "epoch": 11.472750977835723, - "grad_norm": 1.6165482997894287, - "learning_rate": 7.840100502512563e-05, - "loss": 5.0452, - "step": 21999 - }, - { - "epoch": 11.473272490221643, - "grad_norm": 1.3856250047683716, - "learning_rate": 7.840000000000001e-05, - "loss": 5.9452, - "step": 22000 - }, - { - "epoch": 11.473794002607562, - "grad_norm": 1.4822074174880981, - "learning_rate": 7.839899497487437e-05, - "loss": 5.3831, - "step": 22001 - }, - { - "epoch": 11.474315514993481, - "grad_norm": 1.4833163022994995, - "learning_rate": 7.839798994974875e-05, - "loss": 5.095, - "step": 22002 - }, - { - "epoch": 11.474837027379401, - "grad_norm": 1.4961899518966675, - "learning_rate": 7.839698492462311e-05, - "loss": 5.5564, - "step": 22003 - }, - { - "epoch": 11.475358539765319, - "grad_norm": 1.4784750938415527, - "learning_rate": 7.839597989949749e-05, - "loss": 5.6416, - "step": 22004 - }, - { - "epoch": 11.475880052151238, - "grad_norm": 1.4673463106155396, - "learning_rate": 7.839497487437186e-05, - "loss": 5.7318, - "step": 22005 - }, - { - "epoch": 11.476401564537158, - "grad_norm": 1.5036605596542358, - "learning_rate": 7.839396984924623e-05, - "loss": 5.0609, - "step": 22006 - }, - { - "epoch": 11.476923076923077, - "grad_norm": 1.5010902881622314, - "learning_rate": 7.839296482412061e-05, - "loss": 5.3502, - "step": 22007 - }, - { - "epoch": 11.477444589308996, - "grad_norm": 1.4496947526931763, - "learning_rate": 7.839195979899498e-05, - "loss": 5.0693, - "step": 22008 - }, - { - "epoch": 11.477966101694916, - "grad_norm": 1.450539469718933, - "learning_rate": 7.839095477386935e-05, - "loss": 5.4105, - "step": 22009 - }, - { - "epoch": 11.478487614080834, - "grad_norm": 1.397951602935791, - "learning_rate": 7.838994974874372e-05, - "loss": 5.3455, - "step": 22010 - }, - { - "epoch": 11.479009126466753, - "grad_norm": 1.4314615726470947, - "learning_rate": 7.83889447236181e-05, - "loss": 5.4323, - "step": 22011 - }, - { - "epoch": 11.479530638852673, - "grad_norm": 1.4441031217575073, - "learning_rate": 7.838793969849246e-05, - "loss": 5.6638, - "step": 22012 - }, - { - "epoch": 11.480052151238592, - "grad_norm": 1.5298746824264526, - "learning_rate": 7.838693467336684e-05, - "loss": 5.018, - "step": 22013 - }, - { - "epoch": 11.480573663624511, - "grad_norm": 1.5738763809204102, - "learning_rate": 7.83859296482412e-05, - "loss": 5.2867, - "step": 22014 - }, - { - "epoch": 11.481095176010431, - "grad_norm": 1.4805608987808228, - "learning_rate": 7.838492462311558e-05, - "loss": 5.364, - "step": 22015 - }, - { - "epoch": 11.481616688396349, - "grad_norm": 1.334243655204773, - "learning_rate": 7.838391959798996e-05, - "loss": 5.742, - "step": 22016 - }, - { - "epoch": 11.482138200782268, - "grad_norm": 1.6181221008300781, - "learning_rate": 7.838291457286434e-05, - "loss": 5.2396, - "step": 22017 - }, - { - "epoch": 11.482659713168188, - "grad_norm": 1.45015549659729, - "learning_rate": 7.83819095477387e-05, - "loss": 5.3695, - "step": 22018 - }, - { - "epoch": 11.483181225554107, - "grad_norm": 1.5038368701934814, - "learning_rate": 7.838090452261308e-05, - "loss": 5.6693, - "step": 22019 - }, - { - "epoch": 11.483702737940026, - "grad_norm": 1.5261831283569336, - "learning_rate": 7.837989949748744e-05, - "loss": 5.2663, - "step": 22020 - }, - { - "epoch": 11.484224250325946, - "grad_norm": 1.39738130569458, - "learning_rate": 7.837889447236181e-05, - "loss": 5.7737, - "step": 22021 - }, - { - "epoch": 11.484745762711864, - "grad_norm": 1.4783954620361328, - "learning_rate": 7.837788944723619e-05, - "loss": 5.5251, - "step": 22022 - }, - { - "epoch": 11.485267275097783, - "grad_norm": 1.4941200017929077, - "learning_rate": 7.837688442211055e-05, - "loss": 4.6932, - "step": 22023 - }, - { - "epoch": 11.485788787483703, - "grad_norm": 1.3873728513717651, - "learning_rate": 7.837587939698493e-05, - "loss": 5.6064, - "step": 22024 - }, - { - "epoch": 11.486310299869622, - "grad_norm": 1.3849279880523682, - "learning_rate": 7.837487437185929e-05, - "loss": 5.4478, - "step": 22025 - }, - { - "epoch": 11.486831812255542, - "grad_norm": 1.4963833093643188, - "learning_rate": 7.837386934673367e-05, - "loss": 5.1525, - "step": 22026 - }, - { - "epoch": 11.487353324641461, - "grad_norm": 1.3520461320877075, - "learning_rate": 7.837286432160805e-05, - "loss": 5.4748, - "step": 22027 - }, - { - "epoch": 11.487874837027379, - "grad_norm": 1.5138440132141113, - "learning_rate": 7.837185929648242e-05, - "loss": 5.0788, - "step": 22028 - }, - { - "epoch": 11.488396349413298, - "grad_norm": 1.4641120433807373, - "learning_rate": 7.837085427135679e-05, - "loss": 5.2901, - "step": 22029 - }, - { - "epoch": 11.488917861799218, - "grad_norm": 1.4419300556182861, - "learning_rate": 7.836984924623117e-05, - "loss": 5.2436, - "step": 22030 - }, - { - "epoch": 11.489439374185137, - "grad_norm": 1.356374740600586, - "learning_rate": 7.836884422110553e-05, - "loss": 5.3817, - "step": 22031 - }, - { - "epoch": 11.489960886571057, - "grad_norm": 1.3761968612670898, - "learning_rate": 7.836783919597991e-05, - "loss": 5.5451, - "step": 22032 - }, - { - "epoch": 11.490482398956976, - "grad_norm": 1.341571569442749, - "learning_rate": 7.836683417085427e-05, - "loss": 5.8131, - "step": 22033 - }, - { - "epoch": 11.491003911342894, - "grad_norm": 1.4230791330337524, - "learning_rate": 7.836582914572864e-05, - "loss": 5.4598, - "step": 22034 - }, - { - "epoch": 11.491525423728813, - "grad_norm": 1.3997801542282104, - "learning_rate": 7.836482412060302e-05, - "loss": 5.7011, - "step": 22035 - }, - { - "epoch": 11.492046936114733, - "grad_norm": 1.440280556678772, - "learning_rate": 7.83638190954774e-05, - "loss": 5.5776, - "step": 22036 - }, - { - "epoch": 11.492568448500652, - "grad_norm": 1.4204421043395996, - "learning_rate": 7.836281407035177e-05, - "loss": 5.5262, - "step": 22037 - }, - { - "epoch": 11.493089960886572, - "grad_norm": 1.4615310430526733, - "learning_rate": 7.836180904522614e-05, - "loss": 5.4183, - "step": 22038 - }, - { - "epoch": 11.493611473272491, - "grad_norm": 1.4440776109695435, - "learning_rate": 7.836080402010051e-05, - "loss": 4.8154, - "step": 22039 - }, - { - "epoch": 11.494132985658409, - "grad_norm": 1.6582533121109009, - "learning_rate": 7.835979899497488e-05, - "loss": 5.2021, - "step": 22040 - }, - { - "epoch": 11.494654498044328, - "grad_norm": 1.3975924253463745, - "learning_rate": 7.835879396984926e-05, - "loss": 5.39, - "step": 22041 - }, - { - "epoch": 11.495176010430248, - "grad_norm": 1.3990367650985718, - "learning_rate": 7.835778894472362e-05, - "loss": 5.4398, - "step": 22042 - }, - { - "epoch": 11.495697522816167, - "grad_norm": 1.4350630044937134, - "learning_rate": 7.8356783919598e-05, - "loss": 5.0901, - "step": 22043 - }, - { - "epoch": 11.496219035202087, - "grad_norm": 1.456649899482727, - "learning_rate": 7.835577889447236e-05, - "loss": 5.8809, - "step": 22044 - }, - { - "epoch": 11.496740547588006, - "grad_norm": 1.3437873125076294, - "learning_rate": 7.835477386934674e-05, - "loss": 5.6007, - "step": 22045 - }, - { - "epoch": 11.497262059973924, - "grad_norm": 1.3147121667861938, - "learning_rate": 7.83537688442211e-05, - "loss": 5.6977, - "step": 22046 - }, - { - "epoch": 11.497783572359843, - "grad_norm": 1.4249787330627441, - "learning_rate": 7.835276381909548e-05, - "loss": 5.3814, - "step": 22047 - }, - { - "epoch": 11.498305084745763, - "grad_norm": 1.614884614944458, - "learning_rate": 7.835175879396986e-05, - "loss": 5.1382, - "step": 22048 - }, - { - "epoch": 11.498826597131682, - "grad_norm": 1.2952823638916016, - "learning_rate": 7.835075376884422e-05, - "loss": 5.8547, - "step": 22049 - }, - { - "epoch": 11.499348109517602, - "grad_norm": 1.486340045928955, - "learning_rate": 7.83497487437186e-05, - "loss": 5.383, - "step": 22050 - }, - { - "epoch": 11.499869621903521, - "grad_norm": 1.4171990156173706, - "learning_rate": 7.834874371859297e-05, - "loss": 5.1657, - "step": 22051 - }, - { - "epoch": 11.500391134289439, - "grad_norm": 1.6413767337799072, - "learning_rate": 7.834773869346734e-05, - "loss": 5.0643, - "step": 22052 - }, - { - "epoch": 11.500912646675358, - "grad_norm": 1.402390718460083, - "learning_rate": 7.834673366834171e-05, - "loss": 5.6415, - "step": 22053 - }, - { - "epoch": 11.501434159061278, - "grad_norm": 1.4967855215072632, - "learning_rate": 7.834572864321609e-05, - "loss": 5.6466, - "step": 22054 - }, - { - "epoch": 11.501955671447197, - "grad_norm": 1.392391562461853, - "learning_rate": 7.834472361809045e-05, - "loss": 5.547, - "step": 22055 - }, - { - "epoch": 11.502477183833117, - "grad_norm": 1.4362050294876099, - "learning_rate": 7.834371859296483e-05, - "loss": 5.5457, - "step": 22056 - }, - { - "epoch": 11.502998696219036, - "grad_norm": 1.577022671699524, - "learning_rate": 7.834271356783919e-05, - "loss": 5.3913, - "step": 22057 - }, - { - "epoch": 11.503520208604954, - "grad_norm": 1.5109155178070068, - "learning_rate": 7.834170854271357e-05, - "loss": 5.2979, - "step": 22058 - }, - { - "epoch": 11.504041720990873, - "grad_norm": 1.4602346420288086, - "learning_rate": 7.834070351758795e-05, - "loss": 5.6269, - "step": 22059 - }, - { - "epoch": 11.504563233376793, - "grad_norm": 1.7328978776931763, - "learning_rate": 7.833969849246231e-05, - "loss": 5.1215, - "step": 22060 - }, - { - "epoch": 11.505084745762712, - "grad_norm": 1.4055620431900024, - "learning_rate": 7.833869346733669e-05, - "loss": 4.9465, - "step": 22061 - }, - { - "epoch": 11.505606258148632, - "grad_norm": 1.4749118089675903, - "learning_rate": 7.833768844221105e-05, - "loss": 5.6937, - "step": 22062 - }, - { - "epoch": 11.506127770534551, - "grad_norm": 1.3829560279846191, - "learning_rate": 7.833668341708543e-05, - "loss": 5.6575, - "step": 22063 - }, - { - "epoch": 11.506649282920469, - "grad_norm": 1.3687540292739868, - "learning_rate": 7.83356783919598e-05, - "loss": 4.9315, - "step": 22064 - }, - { - "epoch": 11.507170795306388, - "grad_norm": 1.3750133514404297, - "learning_rate": 7.833467336683417e-05, - "loss": 5.6117, - "step": 22065 - }, - { - "epoch": 11.507692307692308, - "grad_norm": 1.4864928722381592, - "learning_rate": 7.833366834170854e-05, - "loss": 5.7189, - "step": 22066 - }, - { - "epoch": 11.508213820078227, - "grad_norm": 1.6073940992355347, - "learning_rate": 7.833266331658292e-05, - "loss": 5.0151, - "step": 22067 - }, - { - "epoch": 11.508735332464147, - "grad_norm": 1.466808557510376, - "learning_rate": 7.83316582914573e-05, - "loss": 5.3441, - "step": 22068 - }, - { - "epoch": 11.509256844850064, - "grad_norm": 1.396406650543213, - "learning_rate": 7.833065326633167e-05, - "loss": 5.1395, - "step": 22069 - }, - { - "epoch": 11.509778357235984, - "grad_norm": 1.4206547737121582, - "learning_rate": 7.832964824120604e-05, - "loss": 5.7811, - "step": 22070 - }, - { - "epoch": 11.510299869621903, - "grad_norm": 1.5092201232910156, - "learning_rate": 7.832864321608041e-05, - "loss": 5.3575, - "step": 22071 - }, - { - "epoch": 11.510821382007823, - "grad_norm": 1.4595863819122314, - "learning_rate": 7.832763819095478e-05, - "loss": 5.354, - "step": 22072 - }, - { - "epoch": 11.511342894393742, - "grad_norm": 1.3813353776931763, - "learning_rate": 7.832663316582914e-05, - "loss": 5.5364, - "step": 22073 - }, - { - "epoch": 11.511864406779662, - "grad_norm": 1.326134204864502, - "learning_rate": 7.832562814070352e-05, - "loss": 6.0597, - "step": 22074 - }, - { - "epoch": 11.512385919165581, - "grad_norm": 1.4541294574737549, - "learning_rate": 7.832462311557788e-05, - "loss": 5.5939, - "step": 22075 - }, - { - "epoch": 11.512907431551499, - "grad_norm": 1.3797980546951294, - "learning_rate": 7.832361809045226e-05, - "loss": 5.5935, - "step": 22076 - }, - { - "epoch": 11.513428943937418, - "grad_norm": 1.4359245300292969, - "learning_rate": 7.832261306532663e-05, - "loss": 5.4824, - "step": 22077 - }, - { - "epoch": 11.513950456323338, - "grad_norm": 1.337112545967102, - "learning_rate": 7.8321608040201e-05, - "loss": 5.8417, - "step": 22078 - }, - { - "epoch": 11.514471968709257, - "grad_norm": 1.4842630624771118, - "learning_rate": 7.832060301507538e-05, - "loss": 5.5827, - "step": 22079 - }, - { - "epoch": 11.514993481095177, - "grad_norm": 1.377435564994812, - "learning_rate": 7.831959798994976e-05, - "loss": 5.5528, - "step": 22080 - }, - { - "epoch": 11.515514993481094, - "grad_norm": 1.3752537965774536, - "learning_rate": 7.831859296482412e-05, - "loss": 5.5483, - "step": 22081 - }, - { - "epoch": 11.516036505867014, - "grad_norm": 1.3932490348815918, - "learning_rate": 7.83175879396985e-05, - "loss": 5.8189, - "step": 22082 - }, - { - "epoch": 11.516558018252933, - "grad_norm": 1.5265852212905884, - "learning_rate": 7.831658291457287e-05, - "loss": 5.1998, - "step": 22083 - }, - { - "epoch": 11.517079530638853, - "grad_norm": 1.4365898370742798, - "learning_rate": 7.831557788944724e-05, - "loss": 5.2392, - "step": 22084 - }, - { - "epoch": 11.517601043024772, - "grad_norm": 1.5252139568328857, - "learning_rate": 7.831457286432161e-05, - "loss": 5.4448, - "step": 22085 - }, - { - "epoch": 11.518122555410692, - "grad_norm": 1.4665093421936035, - "learning_rate": 7.831356783919597e-05, - "loss": 5.1608, - "step": 22086 - }, - { - "epoch": 11.518644067796611, - "grad_norm": 1.5303748846054077, - "learning_rate": 7.831256281407035e-05, - "loss": 5.4259, - "step": 22087 - }, - { - "epoch": 11.519165580182529, - "grad_norm": 1.3533899784088135, - "learning_rate": 7.831155778894473e-05, - "loss": 5.8171, - "step": 22088 - }, - { - "epoch": 11.519687092568448, - "grad_norm": 1.5275877714157104, - "learning_rate": 7.83105527638191e-05, - "loss": 4.7506, - "step": 22089 - }, - { - "epoch": 11.520208604954368, - "grad_norm": 1.4735097885131836, - "learning_rate": 7.830954773869347e-05, - "loss": 5.7322, - "step": 22090 - }, - { - "epoch": 11.520730117340287, - "grad_norm": 1.4973050355911255, - "learning_rate": 7.830854271356785e-05, - "loss": 5.439, - "step": 22091 - }, - { - "epoch": 11.521251629726207, - "grad_norm": 1.3999415636062622, - "learning_rate": 7.830753768844221e-05, - "loss": 5.1723, - "step": 22092 - }, - { - "epoch": 11.521773142112124, - "grad_norm": 1.4076952934265137, - "learning_rate": 7.830653266331659e-05, - "loss": 5.4478, - "step": 22093 - }, - { - "epoch": 11.522294654498044, - "grad_norm": 1.4733366966247559, - "learning_rate": 7.830552763819095e-05, - "loss": 5.4797, - "step": 22094 - }, - { - "epoch": 11.522816166883963, - "grad_norm": 1.4806276559829712, - "learning_rate": 7.830452261306533e-05, - "loss": 4.9645, - "step": 22095 - }, - { - "epoch": 11.523337679269883, - "grad_norm": 1.4671084880828857, - "learning_rate": 7.83035175879397e-05, - "loss": 5.5842, - "step": 22096 - }, - { - "epoch": 11.523859191655802, - "grad_norm": 1.4196441173553467, - "learning_rate": 7.830251256281407e-05, - "loss": 5.2962, - "step": 22097 - }, - { - "epoch": 11.524380704041722, - "grad_norm": 1.4241409301757812, - "learning_rate": 7.830150753768844e-05, - "loss": 5.617, - "step": 22098 - }, - { - "epoch": 11.52490221642764, - "grad_norm": 1.4878774881362915, - "learning_rate": 7.830050251256282e-05, - "loss": 5.5182, - "step": 22099 - }, - { - "epoch": 11.525423728813559, - "grad_norm": 1.537034511566162, - "learning_rate": 7.82994974874372e-05, - "loss": 5.3022, - "step": 22100 - }, - { - "epoch": 11.525945241199478, - "grad_norm": 1.4222759008407593, - "learning_rate": 7.829849246231156e-05, - "loss": 5.4814, - "step": 22101 - }, - { - "epoch": 11.526466753585398, - "grad_norm": 1.509555697441101, - "learning_rate": 7.829748743718594e-05, - "loss": 5.2602, - "step": 22102 - }, - { - "epoch": 11.526988265971317, - "grad_norm": 1.5052516460418701, - "learning_rate": 7.82964824120603e-05, - "loss": 5.2613, - "step": 22103 - }, - { - "epoch": 11.527509778357237, - "grad_norm": 1.4853413105010986, - "learning_rate": 7.829547738693468e-05, - "loss": 4.7721, - "step": 22104 - }, - { - "epoch": 11.528031290743154, - "grad_norm": 1.4062718152999878, - "learning_rate": 7.829447236180904e-05, - "loss": 5.4749, - "step": 22105 - }, - { - "epoch": 11.528552803129074, - "grad_norm": 1.360733985900879, - "learning_rate": 7.829346733668342e-05, - "loss": 5.6124, - "step": 22106 - }, - { - "epoch": 11.529074315514993, - "grad_norm": 1.33681321144104, - "learning_rate": 7.829246231155779e-05, - "loss": 5.3781, - "step": 22107 - }, - { - "epoch": 11.529595827900913, - "grad_norm": 1.626869797706604, - "learning_rate": 7.829145728643216e-05, - "loss": 4.5528, - "step": 22108 - }, - { - "epoch": 11.530117340286832, - "grad_norm": 1.4051096439361572, - "learning_rate": 7.829045226130654e-05, - "loss": 5.6486, - "step": 22109 - }, - { - "epoch": 11.530638852672752, - "grad_norm": 1.4996206760406494, - "learning_rate": 7.828944723618092e-05, - "loss": 5.049, - "step": 22110 - }, - { - "epoch": 11.53116036505867, - "grad_norm": 1.3945491313934326, - "learning_rate": 7.828844221105528e-05, - "loss": 5.4094, - "step": 22111 - }, - { - "epoch": 11.531681877444589, - "grad_norm": 1.5715994834899902, - "learning_rate": 7.828743718592966e-05, - "loss": 5.2852, - "step": 22112 - }, - { - "epoch": 11.532203389830508, - "grad_norm": 1.4541016817092896, - "learning_rate": 7.828643216080403e-05, - "loss": 5.2281, - "step": 22113 - }, - { - "epoch": 11.532724902216428, - "grad_norm": 1.3652236461639404, - "learning_rate": 7.828542713567839e-05, - "loss": 5.684, - "step": 22114 - }, - { - "epoch": 11.533246414602347, - "grad_norm": 1.347825050354004, - "learning_rate": 7.828442211055277e-05, - "loss": 5.6621, - "step": 22115 - }, - { - "epoch": 11.533767926988267, - "grad_norm": 1.3632346391677856, - "learning_rate": 7.828341708542713e-05, - "loss": 5.6357, - "step": 22116 - }, - { - "epoch": 11.534289439374184, - "grad_norm": 1.5317305326461792, - "learning_rate": 7.828241206030151e-05, - "loss": 5.2768, - "step": 22117 - }, - { - "epoch": 11.534810951760104, - "grad_norm": 1.414209008216858, - "learning_rate": 7.828140703517587e-05, - "loss": 5.2025, - "step": 22118 - }, - { - "epoch": 11.535332464146023, - "grad_norm": 1.3407833576202393, - "learning_rate": 7.828040201005025e-05, - "loss": 5.8454, - "step": 22119 - }, - { - "epoch": 11.535853976531943, - "grad_norm": 1.468333125114441, - "learning_rate": 7.827939698492463e-05, - "loss": 5.6834, - "step": 22120 - }, - { - "epoch": 11.536375488917862, - "grad_norm": 1.4168390035629272, - "learning_rate": 7.827839195979901e-05, - "loss": 5.5851, - "step": 22121 - }, - { - "epoch": 11.536897001303782, - "grad_norm": 1.4518572092056274, - "learning_rate": 7.827738693467337e-05, - "loss": 5.6148, - "step": 22122 - }, - { - "epoch": 11.5374185136897, - "grad_norm": 1.420469045639038, - "learning_rate": 7.827638190954775e-05, - "loss": 4.9092, - "step": 22123 - }, - { - "epoch": 11.537940026075619, - "grad_norm": 1.3616431951522827, - "learning_rate": 7.827537688442211e-05, - "loss": 5.1447, - "step": 22124 - }, - { - "epoch": 11.538461538461538, - "grad_norm": 1.452860713005066, - "learning_rate": 7.827437185929649e-05, - "loss": 5.2975, - "step": 22125 - }, - { - "epoch": 11.538983050847458, - "grad_norm": 1.3899126052856445, - "learning_rate": 7.827336683417086e-05, - "loss": 5.4758, - "step": 22126 - }, - { - "epoch": 11.539504563233377, - "grad_norm": 1.5371448993682861, - "learning_rate": 7.827236180904522e-05, - "loss": 5.528, - "step": 22127 - }, - { - "epoch": 11.540026075619297, - "grad_norm": 1.4121392965316772, - "learning_rate": 7.82713567839196e-05, - "loss": 5.2918, - "step": 22128 - }, - { - "epoch": 11.540547588005214, - "grad_norm": 1.4460489749908447, - "learning_rate": 7.827035175879398e-05, - "loss": 5.6393, - "step": 22129 - }, - { - "epoch": 11.541069100391134, - "grad_norm": 1.51017427444458, - "learning_rate": 7.826934673366835e-05, - "loss": 5.7783, - "step": 22130 - }, - { - "epoch": 11.541590612777053, - "grad_norm": 1.5760149955749512, - "learning_rate": 7.826834170854272e-05, - "loss": 5.3629, - "step": 22131 - }, - { - "epoch": 11.542112125162973, - "grad_norm": 1.7335904836654663, - "learning_rate": 7.82673366834171e-05, - "loss": 4.6991, - "step": 22132 - }, - { - "epoch": 11.542633637548892, - "grad_norm": 1.3692766427993774, - "learning_rate": 7.826633165829146e-05, - "loss": 5.542, - "step": 22133 - }, - { - "epoch": 11.543155149934812, - "grad_norm": 1.3693904876708984, - "learning_rate": 7.826532663316584e-05, - "loss": 5.6318, - "step": 22134 - }, - { - "epoch": 11.54367666232073, - "grad_norm": 1.4866416454315186, - "learning_rate": 7.82643216080402e-05, - "loss": 5.3599, - "step": 22135 - }, - { - "epoch": 11.544198174706649, - "grad_norm": 1.3466613292694092, - "learning_rate": 7.826331658291458e-05, - "loss": 5.7759, - "step": 22136 - }, - { - "epoch": 11.544719687092568, - "grad_norm": 1.395326018333435, - "learning_rate": 7.826231155778894e-05, - "loss": 5.6837, - "step": 22137 - }, - { - "epoch": 11.545241199478488, - "grad_norm": 1.4039946794509888, - "learning_rate": 7.826130653266332e-05, - "loss": 5.1831, - "step": 22138 - }, - { - "epoch": 11.545762711864407, - "grad_norm": 1.340196967124939, - "learning_rate": 7.826030150753769e-05, - "loss": 5.4611, - "step": 22139 - }, - { - "epoch": 11.546284224250327, - "grad_norm": 1.4165114164352417, - "learning_rate": 7.825929648241206e-05, - "loss": 5.5677, - "step": 22140 - }, - { - "epoch": 11.546805736636244, - "grad_norm": 1.4705196619033813, - "learning_rate": 7.825829145728644e-05, - "loss": 5.5672, - "step": 22141 - }, - { - "epoch": 11.547327249022164, - "grad_norm": 1.3600423336029053, - "learning_rate": 7.82572864321608e-05, - "loss": 5.7881, - "step": 22142 - }, - { - "epoch": 11.547848761408083, - "grad_norm": 1.3753876686096191, - "learning_rate": 7.825628140703518e-05, - "loss": 5.4428, - "step": 22143 - }, - { - "epoch": 11.548370273794003, - "grad_norm": 1.4449810981750488, - "learning_rate": 7.825527638190955e-05, - "loss": 5.4687, - "step": 22144 - }, - { - "epoch": 11.548891786179922, - "grad_norm": 1.4047844409942627, - "learning_rate": 7.825427135678393e-05, - "loss": 5.5265, - "step": 22145 - }, - { - "epoch": 11.549413298565842, - "grad_norm": 1.4163531064987183, - "learning_rate": 7.825326633165829e-05, - "loss": 5.4158, - "step": 22146 - }, - { - "epoch": 11.54993481095176, - "grad_norm": 1.4283349514007568, - "learning_rate": 7.825226130653267e-05, - "loss": 5.6068, - "step": 22147 - }, - { - "epoch": 11.550456323337679, - "grad_norm": 1.5690315961837769, - "learning_rate": 7.825125628140703e-05, - "loss": 5.3684, - "step": 22148 - }, - { - "epoch": 11.550977835723598, - "grad_norm": 1.3714631795883179, - "learning_rate": 7.825025125628141e-05, - "loss": 5.8475, - "step": 22149 - }, - { - "epoch": 11.551499348109518, - "grad_norm": 1.3561311960220337, - "learning_rate": 7.824924623115579e-05, - "loss": 5.3824, - "step": 22150 - }, - { - "epoch": 11.552020860495437, - "grad_norm": 1.3751566410064697, - "learning_rate": 7.824824120603017e-05, - "loss": 5.7811, - "step": 22151 - }, - { - "epoch": 11.552542372881355, - "grad_norm": 1.5321232080459595, - "learning_rate": 7.824723618090453e-05, - "loss": 5.2429, - "step": 22152 - }, - { - "epoch": 11.553063885267274, - "grad_norm": 1.5304880142211914, - "learning_rate": 7.82462311557789e-05, - "loss": 5.7509, - "step": 22153 - }, - { - "epoch": 11.553585397653194, - "grad_norm": 1.4711540937423706, - "learning_rate": 7.824522613065327e-05, - "loss": 5.4732, - "step": 22154 - }, - { - "epoch": 11.554106910039113, - "grad_norm": 1.559682011604309, - "learning_rate": 7.824422110552764e-05, - "loss": 5.0288, - "step": 22155 - }, - { - "epoch": 11.554628422425033, - "grad_norm": 1.4623527526855469, - "learning_rate": 7.824321608040201e-05, - "loss": 5.533, - "step": 22156 - }, - { - "epoch": 11.555149934810952, - "grad_norm": 1.4227215051651, - "learning_rate": 7.824221105527638e-05, - "loss": 5.4402, - "step": 22157 - }, - { - "epoch": 11.555671447196872, - "grad_norm": 1.4006030559539795, - "learning_rate": 7.824120603015076e-05, - "loss": 5.6528, - "step": 22158 - }, - { - "epoch": 11.55619295958279, - "grad_norm": 1.4222112894058228, - "learning_rate": 7.824020100502512e-05, - "loss": 5.5376, - "step": 22159 - }, - { - "epoch": 11.556714471968709, - "grad_norm": 1.5910199880599976, - "learning_rate": 7.82391959798995e-05, - "loss": 5.2232, - "step": 22160 - }, - { - "epoch": 11.557235984354628, - "grad_norm": 1.489809513092041, - "learning_rate": 7.823819095477388e-05, - "loss": 5.2796, - "step": 22161 - }, - { - "epoch": 11.557757496740548, - "grad_norm": 1.4213695526123047, - "learning_rate": 7.823718592964825e-05, - "loss": 5.7613, - "step": 22162 - }, - { - "epoch": 11.558279009126467, - "grad_norm": 1.4982469081878662, - "learning_rate": 7.823618090452262e-05, - "loss": 5.256, - "step": 22163 - }, - { - "epoch": 11.558800521512385, - "grad_norm": 1.4811235666275024, - "learning_rate": 7.8235175879397e-05, - "loss": 5.565, - "step": 22164 - }, - { - "epoch": 11.559322033898304, - "grad_norm": 1.2767549753189087, - "learning_rate": 7.823417085427136e-05, - "loss": 5.8773, - "step": 22165 - }, - { - "epoch": 11.559843546284224, - "grad_norm": 1.4317289590835571, - "learning_rate": 7.823316582914572e-05, - "loss": 5.1842, - "step": 22166 - }, - { - "epoch": 11.560365058670143, - "grad_norm": 1.5952823162078857, - "learning_rate": 7.82321608040201e-05, - "loss": 4.9602, - "step": 22167 - }, - { - "epoch": 11.560886571056063, - "grad_norm": 1.469472050666809, - "learning_rate": 7.823115577889447e-05, - "loss": 5.705, - "step": 22168 - }, - { - "epoch": 11.561408083441982, - "grad_norm": 1.5273867845535278, - "learning_rate": 7.823015075376884e-05, - "loss": 5.5779, - "step": 22169 - }, - { - "epoch": 11.561929595827902, - "grad_norm": 1.5344551801681519, - "learning_rate": 7.822914572864322e-05, - "loss": 5.6934, - "step": 22170 - }, - { - "epoch": 11.56245110821382, - "grad_norm": 1.4314265251159668, - "learning_rate": 7.82281407035176e-05, - "loss": 5.2901, - "step": 22171 - }, - { - "epoch": 11.562972620599739, - "grad_norm": 1.3685470819473267, - "learning_rate": 7.822713567839196e-05, - "loss": 5.5199, - "step": 22172 - }, - { - "epoch": 11.563494132985658, - "grad_norm": 1.5146431922912598, - "learning_rate": 7.822613065326634e-05, - "loss": 5.3411, - "step": 22173 - }, - { - "epoch": 11.564015645371578, - "grad_norm": 1.3759765625, - "learning_rate": 7.822512562814071e-05, - "loss": 5.7251, - "step": 22174 - }, - { - "epoch": 11.564537157757497, - "grad_norm": 1.4317816495895386, - "learning_rate": 7.822412060301508e-05, - "loss": 5.6166, - "step": 22175 - }, - { - "epoch": 11.565058670143415, - "grad_norm": 1.3486006259918213, - "learning_rate": 7.822311557788945e-05, - "loss": 5.7524, - "step": 22176 - }, - { - "epoch": 11.565580182529335, - "grad_norm": 1.3786684274673462, - "learning_rate": 7.822211055276383e-05, - "loss": 5.7553, - "step": 22177 - }, - { - "epoch": 11.566101694915254, - "grad_norm": 1.4051475524902344, - "learning_rate": 7.822110552763819e-05, - "loss": 5.6098, - "step": 22178 - }, - { - "epoch": 11.566623207301173, - "grad_norm": 1.435900092124939, - "learning_rate": 7.822010050251256e-05, - "loss": 5.5072, - "step": 22179 - }, - { - "epoch": 11.567144719687093, - "grad_norm": 1.5103262662887573, - "learning_rate": 7.821909547738693e-05, - "loss": 5.0764, - "step": 22180 - }, - { - "epoch": 11.567666232073012, - "grad_norm": 1.4245679378509521, - "learning_rate": 7.821809045226131e-05, - "loss": 5.5014, - "step": 22181 - }, - { - "epoch": 11.568187744458932, - "grad_norm": 1.4157688617706299, - "learning_rate": 7.821708542713569e-05, - "loss": 5.7409, - "step": 22182 - }, - { - "epoch": 11.56870925684485, - "grad_norm": 1.5170047283172607, - "learning_rate": 7.821608040201005e-05, - "loss": 5.0676, - "step": 22183 - }, - { - "epoch": 11.569230769230769, - "grad_norm": 1.492152452468872, - "learning_rate": 7.821507537688443e-05, - "loss": 5.5366, - "step": 22184 - }, - { - "epoch": 11.569752281616688, - "grad_norm": 1.394702434539795, - "learning_rate": 7.82140703517588e-05, - "loss": 5.5894, - "step": 22185 - }, - { - "epoch": 11.570273794002608, - "grad_norm": 1.4771320819854736, - "learning_rate": 7.821306532663317e-05, - "loss": 4.2992, - "step": 22186 - }, - { - "epoch": 11.570795306388527, - "grad_norm": 1.5010353326797485, - "learning_rate": 7.821206030150754e-05, - "loss": 5.343, - "step": 22187 - }, - { - "epoch": 11.571316818774445, - "grad_norm": 1.4606380462646484, - "learning_rate": 7.821105527638192e-05, - "loss": 5.5141, - "step": 22188 - }, - { - "epoch": 11.571838331160365, - "grad_norm": 1.5737509727478027, - "learning_rate": 7.821005025125628e-05, - "loss": 5.0212, - "step": 22189 - }, - { - "epoch": 11.572359843546284, - "grad_norm": 1.3477305173873901, - "learning_rate": 7.820904522613066e-05, - "loss": 5.7367, - "step": 22190 - }, - { - "epoch": 11.572881355932203, - "grad_norm": 1.336254358291626, - "learning_rate": 7.820804020100504e-05, - "loss": 5.5176, - "step": 22191 - }, - { - "epoch": 11.573402868318123, - "grad_norm": 1.5757997035980225, - "learning_rate": 7.820703517587941e-05, - "loss": 5.3041, - "step": 22192 - }, - { - "epoch": 11.573924380704042, - "grad_norm": 1.4285372495651245, - "learning_rate": 7.820603015075378e-05, - "loss": 5.7026, - "step": 22193 - }, - { - "epoch": 11.57444589308996, - "grad_norm": 1.510820984840393, - "learning_rate": 7.820502512562814e-05, - "loss": 5.2068, - "step": 22194 - }, - { - "epoch": 11.57496740547588, - "grad_norm": 1.5256520509719849, - "learning_rate": 7.820402010050252e-05, - "loss": 5.0884, - "step": 22195 - }, - { - "epoch": 11.575488917861799, - "grad_norm": 1.5937970876693726, - "learning_rate": 7.820301507537688e-05, - "loss": 4.8922, - "step": 22196 - }, - { - "epoch": 11.576010430247718, - "grad_norm": 1.4253637790679932, - "learning_rate": 7.820201005025126e-05, - "loss": 5.2783, - "step": 22197 - }, - { - "epoch": 11.576531942633638, - "grad_norm": 1.513887643814087, - "learning_rate": 7.820100502512563e-05, - "loss": 4.9653, - "step": 22198 - }, - { - "epoch": 11.577053455019557, - "grad_norm": 1.5422521829605103, - "learning_rate": 7.82e-05, - "loss": 5.2293, - "step": 22199 - }, - { - "epoch": 11.577574967405475, - "grad_norm": 1.387839674949646, - "learning_rate": 7.819899497487437e-05, - "loss": 4.7921, - "step": 22200 - }, - { - "epoch": 11.578096479791395, - "grad_norm": 1.3379532098770142, - "learning_rate": 7.819798994974875e-05, - "loss": 5.6958, - "step": 22201 - }, - { - "epoch": 11.578617992177314, - "grad_norm": 1.357678771018982, - "learning_rate": 7.819698492462312e-05, - "loss": 5.5383, - "step": 22202 - }, - { - "epoch": 11.579139504563233, - "grad_norm": 1.4132375717163086, - "learning_rate": 7.81959798994975e-05, - "loss": 5.4977, - "step": 22203 - }, - { - "epoch": 11.579661016949153, - "grad_norm": 1.4325311183929443, - "learning_rate": 7.819497487437187e-05, - "loss": 5.6126, - "step": 22204 - }, - { - "epoch": 11.580182529335072, - "grad_norm": 1.4130209684371948, - "learning_rate": 7.819396984924624e-05, - "loss": 5.6472, - "step": 22205 - }, - { - "epoch": 11.58070404172099, - "grad_norm": 1.614508867263794, - "learning_rate": 7.819296482412061e-05, - "loss": 4.8391, - "step": 22206 - }, - { - "epoch": 11.58122555410691, - "grad_norm": 1.4867111444473267, - "learning_rate": 7.819195979899497e-05, - "loss": 5.2927, - "step": 22207 - }, - { - "epoch": 11.581747066492829, - "grad_norm": 1.3977354764938354, - "learning_rate": 7.819095477386935e-05, - "loss": 5.2194, - "step": 22208 - }, - { - "epoch": 11.582268578878748, - "grad_norm": 1.4261736869812012, - "learning_rate": 7.818994974874371e-05, - "loss": 5.316, - "step": 22209 - }, - { - "epoch": 11.582790091264668, - "grad_norm": 1.4234219789505005, - "learning_rate": 7.818894472361809e-05, - "loss": 5.0744, - "step": 22210 - }, - { - "epoch": 11.583311603650587, - "grad_norm": 1.4661136865615845, - "learning_rate": 7.818793969849246e-05, - "loss": 5.8166, - "step": 22211 - }, - { - "epoch": 11.583833116036505, - "grad_norm": 1.4551454782485962, - "learning_rate": 7.818693467336683e-05, - "loss": 5.2902, - "step": 22212 - }, - { - "epoch": 11.584354628422425, - "grad_norm": 1.406449317932129, - "learning_rate": 7.818592964824121e-05, - "loss": 5.8406, - "step": 22213 - }, - { - "epoch": 11.584876140808344, - "grad_norm": 1.5082216262817383, - "learning_rate": 7.818492462311559e-05, - "loss": 5.2295, - "step": 22214 - }, - { - "epoch": 11.585397653194264, - "grad_norm": 1.495180368423462, - "learning_rate": 7.818391959798995e-05, - "loss": 4.3825, - "step": 22215 - }, - { - "epoch": 11.585919165580183, - "grad_norm": 1.4745031595230103, - "learning_rate": 7.818291457286433e-05, - "loss": 5.7416, - "step": 22216 - }, - { - "epoch": 11.586440677966102, - "grad_norm": 1.6791819334030151, - "learning_rate": 7.81819095477387e-05, - "loss": 5.1544, - "step": 22217 - }, - { - "epoch": 11.58696219035202, - "grad_norm": 1.478054404258728, - "learning_rate": 7.818090452261307e-05, - "loss": 5.5913, - "step": 22218 - }, - { - "epoch": 11.58748370273794, - "grad_norm": 1.4979058504104614, - "learning_rate": 7.817989949748744e-05, - "loss": 5.3803, - "step": 22219 - }, - { - "epoch": 11.588005215123859, - "grad_norm": 1.4696394205093384, - "learning_rate": 7.81788944723618e-05, - "loss": 5.3836, - "step": 22220 - }, - { - "epoch": 11.588526727509779, - "grad_norm": 1.5257624387741089, - "learning_rate": 7.817788944723618e-05, - "loss": 5.3322, - "step": 22221 - }, - { - "epoch": 11.589048239895698, - "grad_norm": 1.6462492942810059, - "learning_rate": 7.817688442211056e-05, - "loss": 5.0342, - "step": 22222 - }, - { - "epoch": 11.589569752281617, - "grad_norm": 1.5049477815628052, - "learning_rate": 7.817587939698494e-05, - "loss": 5.5866, - "step": 22223 - }, - { - "epoch": 11.590091264667535, - "grad_norm": 1.5852024555206299, - "learning_rate": 7.81748743718593e-05, - "loss": 5.4811, - "step": 22224 - }, - { - "epoch": 11.590612777053455, - "grad_norm": 1.533625841140747, - "learning_rate": 7.817386934673368e-05, - "loss": 5.5069, - "step": 22225 - }, - { - "epoch": 11.591134289439374, - "grad_norm": 1.6040693521499634, - "learning_rate": 7.817286432160804e-05, - "loss": 5.3857, - "step": 22226 - }, - { - "epoch": 11.591655801825294, - "grad_norm": 1.485075831413269, - "learning_rate": 7.817185929648242e-05, - "loss": 5.1815, - "step": 22227 - }, - { - "epoch": 11.592177314211213, - "grad_norm": 1.6008497476577759, - "learning_rate": 7.817085427135678e-05, - "loss": 5.3485, - "step": 22228 - }, - { - "epoch": 11.592698826597132, - "grad_norm": 1.4957135915756226, - "learning_rate": 7.816984924623116e-05, - "loss": 5.5023, - "step": 22229 - }, - { - "epoch": 11.59322033898305, - "grad_norm": 1.3759287595748901, - "learning_rate": 7.816884422110553e-05, - "loss": 5.6067, - "step": 22230 - }, - { - "epoch": 11.59374185136897, - "grad_norm": 1.5668936967849731, - "learning_rate": 7.81678391959799e-05, - "loss": 5.0459, - "step": 22231 - }, - { - "epoch": 11.594263363754889, - "grad_norm": 1.4681673049926758, - "learning_rate": 7.816683417085427e-05, - "loss": 5.3947, - "step": 22232 - }, - { - "epoch": 11.594784876140809, - "grad_norm": 1.4406111240386963, - "learning_rate": 7.816582914572865e-05, - "loss": 5.4428, - "step": 22233 - }, - { - "epoch": 11.595306388526728, - "grad_norm": 1.4699944257736206, - "learning_rate": 7.816482412060302e-05, - "loss": 5.175, - "step": 22234 - }, - { - "epoch": 11.595827900912647, - "grad_norm": 1.393046498298645, - "learning_rate": 7.816381909547739e-05, - "loss": 5.4501, - "step": 22235 - }, - { - "epoch": 11.596349413298565, - "grad_norm": 1.457277774810791, - "learning_rate": 7.816281407035177e-05, - "loss": 5.3715, - "step": 22236 - }, - { - "epoch": 11.596870925684485, - "grad_norm": 1.4374773502349854, - "learning_rate": 7.816180904522613e-05, - "loss": 5.7428, - "step": 22237 - }, - { - "epoch": 11.597392438070404, - "grad_norm": 1.4734430313110352, - "learning_rate": 7.816080402010051e-05, - "loss": 5.4347, - "step": 22238 - }, - { - "epoch": 11.597913950456324, - "grad_norm": 1.4397342205047607, - "learning_rate": 7.815979899497487e-05, - "loss": 5.4129, - "step": 22239 - }, - { - "epoch": 11.598435462842243, - "grad_norm": 1.5833195447921753, - "learning_rate": 7.815879396984925e-05, - "loss": 5.1467, - "step": 22240 - }, - { - "epoch": 11.598956975228162, - "grad_norm": 1.5682786703109741, - "learning_rate": 7.815778894472361e-05, - "loss": 5.7073, - "step": 22241 - }, - { - "epoch": 11.59947848761408, - "grad_norm": 1.5889302492141724, - "learning_rate": 7.815678391959799e-05, - "loss": 5.4516, - "step": 22242 - }, - { - "epoch": 11.6, - "grad_norm": 1.4085415601730347, - "learning_rate": 7.815577889447237e-05, - "loss": 5.7257, - "step": 22243 - }, - { - "epoch": 11.600521512385919, - "grad_norm": 1.5042115449905396, - "learning_rate": 7.815477386934675e-05, - "loss": 5.2843, - "step": 22244 - }, - { - "epoch": 11.601043024771839, - "grad_norm": 1.4303078651428223, - "learning_rate": 7.815376884422111e-05, - "loss": 5.4366, - "step": 22245 - }, - { - "epoch": 11.601564537157758, - "grad_norm": 1.5163918733596802, - "learning_rate": 7.815276381909548e-05, - "loss": 5.2764, - "step": 22246 - }, - { - "epoch": 11.602086049543676, - "grad_norm": 1.5033605098724365, - "learning_rate": 7.815175879396985e-05, - "loss": 5.7736, - "step": 22247 - }, - { - "epoch": 11.602607561929595, - "grad_norm": 1.3807663917541504, - "learning_rate": 7.815075376884422e-05, - "loss": 4.773, - "step": 22248 - }, - { - "epoch": 11.603129074315515, - "grad_norm": 1.5445321798324585, - "learning_rate": 7.81497487437186e-05, - "loss": 5.6875, - "step": 22249 - }, - { - "epoch": 11.603650586701434, - "grad_norm": 1.6280958652496338, - "learning_rate": 7.814874371859296e-05, - "loss": 4.6392, - "step": 22250 - }, - { - "epoch": 11.604172099087354, - "grad_norm": 1.5137485265731812, - "learning_rate": 7.814773869346734e-05, - "loss": 5.1257, - "step": 22251 - }, - { - "epoch": 11.604693611473273, - "grad_norm": 1.5387120246887207, - "learning_rate": 7.81467336683417e-05, - "loss": 4.966, - "step": 22252 - }, - { - "epoch": 11.605215123859193, - "grad_norm": 1.3929563760757446, - "learning_rate": 7.814572864321608e-05, - "loss": 5.5264, - "step": 22253 - }, - { - "epoch": 11.60573663624511, - "grad_norm": 1.372735619544983, - "learning_rate": 7.814472361809046e-05, - "loss": 5.4844, - "step": 22254 - }, - { - "epoch": 11.60625814863103, - "grad_norm": 1.4124233722686768, - "learning_rate": 7.814371859296484e-05, - "loss": 5.2074, - "step": 22255 - }, - { - "epoch": 11.60677966101695, - "grad_norm": 1.4083153009414673, - "learning_rate": 7.81427135678392e-05, - "loss": 5.0566, - "step": 22256 - }, - { - "epoch": 11.607301173402869, - "grad_norm": 1.4077048301696777, - "learning_rate": 7.814170854271358e-05, - "loss": 5.2411, - "step": 22257 - }, - { - "epoch": 11.607822685788788, - "grad_norm": 1.5373262166976929, - "learning_rate": 7.814070351758794e-05, - "loss": 5.5414, - "step": 22258 - }, - { - "epoch": 11.608344198174706, - "grad_norm": 1.491277813911438, - "learning_rate": 7.813969849246231e-05, - "loss": 5.3129, - "step": 22259 - }, - { - "epoch": 11.608865710560625, - "grad_norm": 1.3822565078735352, - "learning_rate": 7.813869346733669e-05, - "loss": 4.7661, - "step": 22260 - }, - { - "epoch": 11.609387222946545, - "grad_norm": 1.4708571434020996, - "learning_rate": 7.813768844221105e-05, - "loss": 5.5881, - "step": 22261 - }, - { - "epoch": 11.609908735332464, - "grad_norm": 1.4857215881347656, - "learning_rate": 7.813668341708543e-05, - "loss": 5.4841, - "step": 22262 - }, - { - "epoch": 11.610430247718384, - "grad_norm": 1.5040340423583984, - "learning_rate": 7.81356783919598e-05, - "loss": 5.1947, - "step": 22263 - }, - { - "epoch": 11.610951760104303, - "grad_norm": 1.5267579555511475, - "learning_rate": 7.813467336683418e-05, - "loss": 5.4333, - "step": 22264 - }, - { - "epoch": 11.611473272490223, - "grad_norm": 1.4858763217926025, - "learning_rate": 7.813366834170855e-05, - "loss": 5.1045, - "step": 22265 - }, - { - "epoch": 11.61199478487614, - "grad_norm": 1.4751107692718506, - "learning_rate": 7.813266331658292e-05, - "loss": 5.1586, - "step": 22266 - }, - { - "epoch": 11.61251629726206, - "grad_norm": 1.4431971311569214, - "learning_rate": 7.813165829145729e-05, - "loss": 5.7895, - "step": 22267 - }, - { - "epoch": 11.61303780964798, - "grad_norm": 1.53630793094635, - "learning_rate": 7.813065326633167e-05, - "loss": 4.5753, - "step": 22268 - }, - { - "epoch": 11.613559322033899, - "grad_norm": 1.3435505628585815, - "learning_rate": 7.812964824120603e-05, - "loss": 5.6129, - "step": 22269 - }, - { - "epoch": 11.614080834419818, - "grad_norm": 1.3443918228149414, - "learning_rate": 7.812864321608041e-05, - "loss": 5.3278, - "step": 22270 - }, - { - "epoch": 11.614602346805736, - "grad_norm": 1.3864680528640747, - "learning_rate": 7.812763819095477e-05, - "loss": 5.2531, - "step": 22271 - }, - { - "epoch": 11.615123859191655, - "grad_norm": 1.5269242525100708, - "learning_rate": 7.812663316582914e-05, - "loss": 5.2768, - "step": 22272 - }, - { - "epoch": 11.615645371577575, - "grad_norm": 1.4812111854553223, - "learning_rate": 7.812562814070352e-05, - "loss": 5.2434, - "step": 22273 - }, - { - "epoch": 11.616166883963494, - "grad_norm": 1.4119127988815308, - "learning_rate": 7.81246231155779e-05, - "loss": 5.4174, - "step": 22274 - }, - { - "epoch": 11.616688396349414, - "grad_norm": 1.3582298755645752, - "learning_rate": 7.812361809045227e-05, - "loss": 4.9632, - "step": 22275 - }, - { - "epoch": 11.617209908735333, - "grad_norm": 1.399513602256775, - "learning_rate": 7.812261306532664e-05, - "loss": 5.5916, - "step": 22276 - }, - { - "epoch": 11.617731421121253, - "grad_norm": 1.398338794708252, - "learning_rate": 7.812160804020101e-05, - "loss": 5.5129, - "step": 22277 - }, - { - "epoch": 11.61825293350717, - "grad_norm": 1.4259601831436157, - "learning_rate": 7.812060301507538e-05, - "loss": 5.3897, - "step": 22278 - }, - { - "epoch": 11.61877444589309, - "grad_norm": 1.4211174249649048, - "learning_rate": 7.811959798994976e-05, - "loss": 5.264, - "step": 22279 - }, - { - "epoch": 11.61929595827901, - "grad_norm": 1.3958979845046997, - "learning_rate": 7.811859296482412e-05, - "loss": 5.358, - "step": 22280 - }, - { - "epoch": 11.619817470664929, - "grad_norm": 1.357175588607788, - "learning_rate": 7.81175879396985e-05, - "loss": 5.6092, - "step": 22281 - }, - { - "epoch": 11.620338983050848, - "grad_norm": 1.41832435131073, - "learning_rate": 7.811658291457286e-05, - "loss": 5.0918, - "step": 22282 - }, - { - "epoch": 11.620860495436766, - "grad_norm": 1.4910151958465576, - "learning_rate": 7.811557788944724e-05, - "loss": 5.6387, - "step": 22283 - }, - { - "epoch": 11.621382007822685, - "grad_norm": 1.4622622728347778, - "learning_rate": 7.811457286432162e-05, - "loss": 5.549, - "step": 22284 - }, - { - "epoch": 11.621903520208605, - "grad_norm": 1.6053091287612915, - "learning_rate": 7.8113567839196e-05, - "loss": 4.9446, - "step": 22285 - }, - { - "epoch": 11.622425032594524, - "grad_norm": 1.412048578262329, - "learning_rate": 7.811256281407036e-05, - "loss": 5.577, - "step": 22286 - }, - { - "epoch": 11.622946544980444, - "grad_norm": 1.48428475856781, - "learning_rate": 7.811155778894472e-05, - "loss": 5.4005, - "step": 22287 - }, - { - "epoch": 11.623468057366363, - "grad_norm": 1.4788563251495361, - "learning_rate": 7.81105527638191e-05, - "loss": 5.4403, - "step": 22288 - }, - { - "epoch": 11.62398956975228, - "grad_norm": 1.4281319379806519, - "learning_rate": 7.810954773869347e-05, - "loss": 5.5595, - "step": 22289 - }, - { - "epoch": 11.6245110821382, - "grad_norm": 1.5275001525878906, - "learning_rate": 7.810854271356784e-05, - "loss": 5.5628, - "step": 22290 - }, - { - "epoch": 11.62503259452412, - "grad_norm": 1.4414547681808472, - "learning_rate": 7.810753768844221e-05, - "loss": 5.5918, - "step": 22291 - }, - { - "epoch": 11.62555410691004, - "grad_norm": 1.4442588090896606, - "learning_rate": 7.810653266331659e-05, - "loss": 5.333, - "step": 22292 - }, - { - "epoch": 11.626075619295959, - "grad_norm": 1.4458249807357788, - "learning_rate": 7.810552763819095e-05, - "loss": 5.1259, - "step": 22293 - }, - { - "epoch": 11.626597131681878, - "grad_norm": 1.5442019701004028, - "learning_rate": 7.810452261306533e-05, - "loss": 5.5105, - "step": 22294 - }, - { - "epoch": 11.627118644067796, - "grad_norm": 1.4487329721450806, - "learning_rate": 7.81035175879397e-05, - "loss": 5.5016, - "step": 22295 - }, - { - "epoch": 11.627640156453715, - "grad_norm": 1.382688045501709, - "learning_rate": 7.810251256281408e-05, - "loss": 5.9889, - "step": 22296 - }, - { - "epoch": 11.628161668839635, - "grad_norm": 1.4224717617034912, - "learning_rate": 7.810150753768845e-05, - "loss": 5.5586, - "step": 22297 - }, - { - "epoch": 11.628683181225554, - "grad_norm": 1.4151055812835693, - "learning_rate": 7.810050251256283e-05, - "loss": 5.7615, - "step": 22298 - }, - { - "epoch": 11.629204693611474, - "grad_norm": 1.626007080078125, - "learning_rate": 7.809949748743719e-05, - "loss": 4.6048, - "step": 22299 - }, - { - "epoch": 11.629726205997393, - "grad_norm": 1.429244875907898, - "learning_rate": 7.809849246231155e-05, - "loss": 5.3918, - "step": 22300 - }, - { - "epoch": 11.63024771838331, - "grad_norm": 1.5034164190292358, - "learning_rate": 7.809748743718593e-05, - "loss": 5.7503, - "step": 22301 - }, - { - "epoch": 11.63076923076923, - "grad_norm": 1.5023574829101562, - "learning_rate": 7.80964824120603e-05, - "loss": 4.8049, - "step": 22302 - }, - { - "epoch": 11.63129074315515, - "grad_norm": 1.521430492401123, - "learning_rate": 7.809547738693467e-05, - "loss": 5.3649, - "step": 22303 - }, - { - "epoch": 11.63181225554107, - "grad_norm": 1.4477745294570923, - "learning_rate": 7.809447236180905e-05, - "loss": 4.9434, - "step": 22304 - }, - { - "epoch": 11.632333767926989, - "grad_norm": 1.36488676071167, - "learning_rate": 7.809346733668343e-05, - "loss": 5.58, - "step": 22305 - }, - { - "epoch": 11.632855280312908, - "grad_norm": 1.4158498048782349, - "learning_rate": 7.80924623115578e-05, - "loss": 5.7401, - "step": 22306 - }, - { - "epoch": 11.633376792698826, - "grad_norm": 1.5280967950820923, - "learning_rate": 7.809145728643217e-05, - "loss": 5.7082, - "step": 22307 - }, - { - "epoch": 11.633898305084745, - "grad_norm": 1.3891371488571167, - "learning_rate": 7.809045226130654e-05, - "loss": 5.1931, - "step": 22308 - }, - { - "epoch": 11.634419817470665, - "grad_norm": 1.3955918550491333, - "learning_rate": 7.808944723618091e-05, - "loss": 5.7935, - "step": 22309 - }, - { - "epoch": 11.634941329856584, - "grad_norm": 1.3654305934906006, - "learning_rate": 7.808844221105528e-05, - "loss": 5.5312, - "step": 22310 - }, - { - "epoch": 11.635462842242504, - "grad_norm": 1.464335560798645, - "learning_rate": 7.808743718592966e-05, - "loss": 5.5765, - "step": 22311 - }, - { - "epoch": 11.635984354628423, - "grad_norm": 1.3539581298828125, - "learning_rate": 7.808643216080402e-05, - "loss": 5.574, - "step": 22312 - }, - { - "epoch": 11.63650586701434, - "grad_norm": 1.4545167684555054, - "learning_rate": 7.808542713567838e-05, - "loss": 4.9999, - "step": 22313 - }, - { - "epoch": 11.63702737940026, - "grad_norm": 1.513115644454956, - "learning_rate": 7.808442211055276e-05, - "loss": 5.397, - "step": 22314 - }, - { - "epoch": 11.63754889178618, - "grad_norm": 1.4858161211013794, - "learning_rate": 7.808341708542714e-05, - "loss": 5.0545, - "step": 22315 - }, - { - "epoch": 11.6380704041721, - "grad_norm": 1.377718210220337, - "learning_rate": 7.808241206030152e-05, - "loss": 5.8143, - "step": 22316 - }, - { - "epoch": 11.638591916558019, - "grad_norm": 1.3916711807250977, - "learning_rate": 7.808140703517588e-05, - "loss": 5.4029, - "step": 22317 - }, - { - "epoch": 11.639113428943938, - "grad_norm": 1.3970084190368652, - "learning_rate": 7.808040201005026e-05, - "loss": 5.9787, - "step": 22318 - }, - { - "epoch": 11.639634941329856, - "grad_norm": 1.6625604629516602, - "learning_rate": 7.807939698492462e-05, - "loss": 5.1852, - "step": 22319 - }, - { - "epoch": 11.640156453715775, - "grad_norm": 1.5090150833129883, - "learning_rate": 7.8078391959799e-05, - "loss": 5.1911, - "step": 22320 - }, - { - "epoch": 11.640677966101695, - "grad_norm": 1.5273280143737793, - "learning_rate": 7.807738693467337e-05, - "loss": 5.0253, - "step": 22321 - }, - { - "epoch": 11.641199478487614, - "grad_norm": 1.638242483139038, - "learning_rate": 7.807638190954774e-05, - "loss": 5.3564, - "step": 22322 - }, - { - "epoch": 11.641720990873534, - "grad_norm": 1.428669810295105, - "learning_rate": 7.807537688442211e-05, - "loss": 5.8484, - "step": 22323 - }, - { - "epoch": 11.642242503259453, - "grad_norm": 1.559396505355835, - "learning_rate": 7.807437185929649e-05, - "loss": 5.59, - "step": 22324 - }, - { - "epoch": 11.642764015645371, - "grad_norm": 1.5145939588546753, - "learning_rate": 7.807336683417086e-05, - "loss": 5.8569, - "step": 22325 - }, - { - "epoch": 11.64328552803129, - "grad_norm": 1.4094789028167725, - "learning_rate": 7.807236180904523e-05, - "loss": 5.3865, - "step": 22326 - }, - { - "epoch": 11.64380704041721, - "grad_norm": 1.4380170106887817, - "learning_rate": 7.80713567839196e-05, - "loss": 5.6407, - "step": 22327 - }, - { - "epoch": 11.64432855280313, - "grad_norm": 1.42546546459198, - "learning_rate": 7.807035175879397e-05, - "loss": 5.1998, - "step": 22328 - }, - { - "epoch": 11.644850065189049, - "grad_norm": 1.4278517961502075, - "learning_rate": 7.806934673366835e-05, - "loss": 5.5158, - "step": 22329 - }, - { - "epoch": 11.645371577574968, - "grad_norm": 1.4560823440551758, - "learning_rate": 7.806834170854271e-05, - "loss": 5.1295, - "step": 22330 - }, - { - "epoch": 11.645893089960886, - "grad_norm": 1.6115690469741821, - "learning_rate": 7.806733668341709e-05, - "loss": 5.4047, - "step": 22331 - }, - { - "epoch": 11.646414602346805, - "grad_norm": 1.4294345378875732, - "learning_rate": 7.806633165829145e-05, - "loss": 5.6737, - "step": 22332 - }, - { - "epoch": 11.646936114732725, - "grad_norm": 1.3632735013961792, - "learning_rate": 7.806532663316583e-05, - "loss": 5.5422, - "step": 22333 - }, - { - "epoch": 11.647457627118644, - "grad_norm": 1.4169162511825562, - "learning_rate": 7.80643216080402e-05, - "loss": 5.4302, - "step": 22334 - }, - { - "epoch": 11.647979139504564, - "grad_norm": 1.417777419090271, - "learning_rate": 7.806331658291457e-05, - "loss": 5.5383, - "step": 22335 - }, - { - "epoch": 11.648500651890483, - "grad_norm": 1.4470220804214478, - "learning_rate": 7.806231155778895e-05, - "loss": 5.4729, - "step": 22336 - }, - { - "epoch": 11.649022164276401, - "grad_norm": 1.533598780632019, - "learning_rate": 7.806130653266333e-05, - "loss": 5.2597, - "step": 22337 - }, - { - "epoch": 11.64954367666232, - "grad_norm": 1.4031347036361694, - "learning_rate": 7.80603015075377e-05, - "loss": 5.3718, - "step": 22338 - }, - { - "epoch": 11.65006518904824, - "grad_norm": 1.5345033407211304, - "learning_rate": 7.805929648241206e-05, - "loss": 5.5711, - "step": 22339 - }, - { - "epoch": 11.65058670143416, - "grad_norm": 1.347015142440796, - "learning_rate": 7.805829145728644e-05, - "loss": 5.3265, - "step": 22340 - }, - { - "epoch": 11.651108213820079, - "grad_norm": 1.3459367752075195, - "learning_rate": 7.80572864321608e-05, - "loss": 5.0758, - "step": 22341 - }, - { - "epoch": 11.651629726205996, - "grad_norm": 1.4291714429855347, - "learning_rate": 7.805628140703518e-05, - "loss": 5.8195, - "step": 22342 - }, - { - "epoch": 11.652151238591916, - "grad_norm": 1.3891022205352783, - "learning_rate": 7.805527638190954e-05, - "loss": 5.5874, - "step": 22343 - }, - { - "epoch": 11.652672750977835, - "grad_norm": 1.4743692874908447, - "learning_rate": 7.805427135678392e-05, - "loss": 5.3898, - "step": 22344 - }, - { - "epoch": 11.653194263363755, - "grad_norm": 1.4610532522201538, - "learning_rate": 7.80532663316583e-05, - "loss": 5.4898, - "step": 22345 - }, - { - "epoch": 11.653715775749674, - "grad_norm": 1.4509382247924805, - "learning_rate": 7.805226130653268e-05, - "loss": 5.6498, - "step": 22346 - }, - { - "epoch": 11.654237288135594, - "grad_norm": 1.513219952583313, - "learning_rate": 7.805125628140704e-05, - "loss": 5.202, - "step": 22347 - }, - { - "epoch": 11.654758800521513, - "grad_norm": 1.4862080812454224, - "learning_rate": 7.805025125628142e-05, - "loss": 5.6566, - "step": 22348 - }, - { - "epoch": 11.655280312907431, - "grad_norm": 1.4414032697677612, - "learning_rate": 7.804924623115578e-05, - "loss": 5.3113, - "step": 22349 - }, - { - "epoch": 11.65580182529335, - "grad_norm": 1.4618940353393555, - "learning_rate": 7.804824120603016e-05, - "loss": 5.2711, - "step": 22350 - }, - { - "epoch": 11.65632333767927, - "grad_norm": 1.3114490509033203, - "learning_rate": 7.804723618090453e-05, - "loss": 5.1517, - "step": 22351 - }, - { - "epoch": 11.65684485006519, - "grad_norm": 1.6304571628570557, - "learning_rate": 7.804623115577889e-05, - "loss": 5.1509, - "step": 22352 - }, - { - "epoch": 11.657366362451109, - "grad_norm": 1.4391857385635376, - "learning_rate": 7.804522613065327e-05, - "loss": 5.8491, - "step": 22353 - }, - { - "epoch": 11.657887874837026, - "grad_norm": 1.4381139278411865, - "learning_rate": 7.804422110552763e-05, - "loss": 5.0546, - "step": 22354 - }, - { - "epoch": 11.658409387222946, - "grad_norm": 1.4368985891342163, - "learning_rate": 7.804321608040201e-05, - "loss": 5.3484, - "step": 22355 - }, - { - "epoch": 11.658930899608865, - "grad_norm": 1.496816873550415, - "learning_rate": 7.804221105527639e-05, - "loss": 5.3823, - "step": 22356 - }, - { - "epoch": 11.659452411994785, - "grad_norm": 1.509801983833313, - "learning_rate": 7.804120603015077e-05, - "loss": 5.4997, - "step": 22357 - }, - { - "epoch": 11.659973924380704, - "grad_norm": 1.3923392295837402, - "learning_rate": 7.804020100502513e-05, - "loss": 5.3031, - "step": 22358 - }, - { - "epoch": 11.660495436766624, - "grad_norm": 1.7121953964233398, - "learning_rate": 7.803919597989951e-05, - "loss": 4.3741, - "step": 22359 - }, - { - "epoch": 11.661016949152543, - "grad_norm": 1.5564768314361572, - "learning_rate": 7.803819095477387e-05, - "loss": 5.1836, - "step": 22360 - }, - { - "epoch": 11.661538461538461, - "grad_norm": 1.4686307907104492, - "learning_rate": 7.803718592964825e-05, - "loss": 5.4277, - "step": 22361 - }, - { - "epoch": 11.66205997392438, - "grad_norm": 1.5234954357147217, - "learning_rate": 7.803618090452261e-05, - "loss": 5.5649, - "step": 22362 - }, - { - "epoch": 11.6625814863103, - "grad_norm": 1.5818192958831787, - "learning_rate": 7.803517587939699e-05, - "loss": 5.4999, - "step": 22363 - }, - { - "epoch": 11.66310299869622, - "grad_norm": 1.4053621292114258, - "learning_rate": 7.803417085427136e-05, - "loss": 5.4529, - "step": 22364 - }, - { - "epoch": 11.663624511082139, - "grad_norm": 1.5030752420425415, - "learning_rate": 7.803316582914573e-05, - "loss": 5.6779, - "step": 22365 - }, - { - "epoch": 11.664146023468057, - "grad_norm": 1.4251352548599243, - "learning_rate": 7.803216080402011e-05, - "loss": 5.725, - "step": 22366 - }, - { - "epoch": 11.664667535853976, - "grad_norm": 1.7698302268981934, - "learning_rate": 7.803115577889448e-05, - "loss": 4.8946, - "step": 22367 - }, - { - "epoch": 11.665189048239895, - "grad_norm": 1.459350347518921, - "learning_rate": 7.803015075376885e-05, - "loss": 5.1424, - "step": 22368 - }, - { - "epoch": 11.665710560625815, - "grad_norm": 1.470205307006836, - "learning_rate": 7.802914572864322e-05, - "loss": 5.5461, - "step": 22369 - }, - { - "epoch": 11.666232073011734, - "grad_norm": 1.5984972715377808, - "learning_rate": 7.80281407035176e-05, - "loss": 5.2973, - "step": 22370 - }, - { - "epoch": 11.666753585397654, - "grad_norm": 1.4555083513259888, - "learning_rate": 7.802713567839196e-05, - "loss": 5.4886, - "step": 22371 - }, - { - "epoch": 11.667275097783573, - "grad_norm": 1.4290682077407837, - "learning_rate": 7.802613065326634e-05, - "loss": 5.6123, - "step": 22372 - }, - { - "epoch": 11.667796610169491, - "grad_norm": 1.393673300743103, - "learning_rate": 7.80251256281407e-05, - "loss": 5.6438, - "step": 22373 - }, - { - "epoch": 11.66831812255541, - "grad_norm": 1.4559621810913086, - "learning_rate": 7.802412060301508e-05, - "loss": 5.6223, - "step": 22374 - }, - { - "epoch": 11.66883963494133, - "grad_norm": 1.534934163093567, - "learning_rate": 7.802311557788944e-05, - "loss": 5.2654, - "step": 22375 - }, - { - "epoch": 11.66936114732725, - "grad_norm": 1.7711553573608398, - "learning_rate": 7.802211055276382e-05, - "loss": 4.6471, - "step": 22376 - }, - { - "epoch": 11.669882659713169, - "grad_norm": 1.4310418367385864, - "learning_rate": 7.80211055276382e-05, - "loss": 5.6589, - "step": 22377 - }, - { - "epoch": 11.670404172099087, - "grad_norm": 1.3637804985046387, - "learning_rate": 7.802010050251258e-05, - "loss": 5.4824, - "step": 22378 - }, - { - "epoch": 11.670925684485006, - "grad_norm": 1.4700206518173218, - "learning_rate": 7.801909547738694e-05, - "loss": 5.1121, - "step": 22379 - }, - { - "epoch": 11.671447196870925, - "grad_norm": 1.4324235916137695, - "learning_rate": 7.80180904522613e-05, - "loss": 5.3511, - "step": 22380 - }, - { - "epoch": 11.671968709256845, - "grad_norm": 1.4991589784622192, - "learning_rate": 7.801708542713568e-05, - "loss": 5.4234, - "step": 22381 - }, - { - "epoch": 11.672490221642764, - "grad_norm": 1.4901080131530762, - "learning_rate": 7.801608040201005e-05, - "loss": 5.325, - "step": 22382 - }, - { - "epoch": 11.673011734028684, - "grad_norm": 1.3198072910308838, - "learning_rate": 7.801507537688443e-05, - "loss": 5.919, - "step": 22383 - }, - { - "epoch": 11.673533246414602, - "grad_norm": 1.3627763986587524, - "learning_rate": 7.801407035175879e-05, - "loss": 5.8345, - "step": 22384 - }, - { - "epoch": 11.674054758800521, - "grad_norm": 1.4137372970581055, - "learning_rate": 7.801306532663317e-05, - "loss": 5.8474, - "step": 22385 - }, - { - "epoch": 11.67457627118644, - "grad_norm": 1.5607151985168457, - "learning_rate": 7.801206030150753e-05, - "loss": 4.9886, - "step": 22386 - }, - { - "epoch": 11.67509778357236, - "grad_norm": 1.4341670274734497, - "learning_rate": 7.801105527638191e-05, - "loss": 5.6952, - "step": 22387 - }, - { - "epoch": 11.67561929595828, - "grad_norm": 1.473534107208252, - "learning_rate": 7.801005025125629e-05, - "loss": 4.9042, - "step": 22388 - }, - { - "epoch": 11.676140808344199, - "grad_norm": 1.366288185119629, - "learning_rate": 7.800904522613067e-05, - "loss": 5.4932, - "step": 22389 - }, - { - "epoch": 11.676662320730117, - "grad_norm": 1.3821322917938232, - "learning_rate": 7.800804020100503e-05, - "loss": 5.7056, - "step": 22390 - }, - { - "epoch": 11.677183833116036, - "grad_norm": 1.4480736255645752, - "learning_rate": 7.800703517587941e-05, - "loss": 5.5137, - "step": 22391 - }, - { - "epoch": 11.677705345501955, - "grad_norm": 1.4653959274291992, - "learning_rate": 7.800603015075377e-05, - "loss": 5.5651, - "step": 22392 - }, - { - "epoch": 11.678226857887875, - "grad_norm": 1.4862428903579712, - "learning_rate": 7.800502512562814e-05, - "loss": 5.5951, - "step": 22393 - }, - { - "epoch": 11.678748370273794, - "grad_norm": 1.5052144527435303, - "learning_rate": 7.800402010050251e-05, - "loss": 5.3508, - "step": 22394 - }, - { - "epoch": 11.679269882659714, - "grad_norm": 1.4957154989242554, - "learning_rate": 7.800301507537688e-05, - "loss": 5.3551, - "step": 22395 - }, - { - "epoch": 11.679791395045632, - "grad_norm": 1.5022441148757935, - "learning_rate": 7.800201005025126e-05, - "loss": 5.2716, - "step": 22396 - }, - { - "epoch": 11.680312907431551, - "grad_norm": 1.4118150472640991, - "learning_rate": 7.800100502512563e-05, - "loss": 5.4522, - "step": 22397 - }, - { - "epoch": 11.68083441981747, - "grad_norm": 1.3767317533493042, - "learning_rate": 7.800000000000001e-05, - "loss": 5.5111, - "step": 22398 - }, - { - "epoch": 11.68135593220339, - "grad_norm": 1.3343117237091064, - "learning_rate": 7.799899497487438e-05, - "loss": 5.8141, - "step": 22399 - }, - { - "epoch": 11.68187744458931, - "grad_norm": 1.414777159690857, - "learning_rate": 7.799798994974875e-05, - "loss": 5.8817, - "step": 22400 - }, - { - "epoch": 11.682398956975229, - "grad_norm": 1.3941991329193115, - "learning_rate": 7.799698492462312e-05, - "loss": 5.7587, - "step": 22401 - }, - { - "epoch": 11.682920469361147, - "grad_norm": 1.441398024559021, - "learning_rate": 7.79959798994975e-05, - "loss": 5.5157, - "step": 22402 - }, - { - "epoch": 11.683441981747066, - "grad_norm": 1.407328486442566, - "learning_rate": 7.799497487437186e-05, - "loss": 5.4552, - "step": 22403 - }, - { - "epoch": 11.683963494132986, - "grad_norm": 1.4890426397323608, - "learning_rate": 7.799396984924624e-05, - "loss": 5.0828, - "step": 22404 - }, - { - "epoch": 11.684485006518905, - "grad_norm": 1.5332647562026978, - "learning_rate": 7.79929648241206e-05, - "loss": 4.6572, - "step": 22405 - }, - { - "epoch": 11.685006518904824, - "grad_norm": 1.6417287588119507, - "learning_rate": 7.799195979899497e-05, - "loss": 5.4807, - "step": 22406 - }, - { - "epoch": 11.685528031290744, - "grad_norm": 1.4415868520736694, - "learning_rate": 7.799095477386934e-05, - "loss": 5.3368, - "step": 22407 - }, - { - "epoch": 11.686049543676662, - "grad_norm": 1.5408474206924438, - "learning_rate": 7.798994974874372e-05, - "loss": 5.4405, - "step": 22408 - }, - { - "epoch": 11.686571056062581, - "grad_norm": 1.4350969791412354, - "learning_rate": 7.79889447236181e-05, - "loss": 5.4645, - "step": 22409 - }, - { - "epoch": 11.6870925684485, - "grad_norm": 1.370524525642395, - "learning_rate": 7.798793969849246e-05, - "loss": 5.702, - "step": 22410 - }, - { - "epoch": 11.68761408083442, - "grad_norm": 1.488935947418213, - "learning_rate": 7.798693467336684e-05, - "loss": 5.6628, - "step": 22411 - }, - { - "epoch": 11.68813559322034, - "grad_norm": 1.4086413383483887, - "learning_rate": 7.798592964824121e-05, - "loss": 5.748, - "step": 22412 - }, - { - "epoch": 11.688657105606259, - "grad_norm": 1.4073898792266846, - "learning_rate": 7.798492462311558e-05, - "loss": 5.3414, - "step": 22413 - }, - { - "epoch": 11.689178617992177, - "grad_norm": 1.4010579586029053, - "learning_rate": 7.798391959798995e-05, - "loss": 5.0693, - "step": 22414 - }, - { - "epoch": 11.689700130378096, - "grad_norm": 1.4029541015625, - "learning_rate": 7.798291457286433e-05, - "loss": 5.6084, - "step": 22415 - }, - { - "epoch": 11.690221642764016, - "grad_norm": 1.422835350036621, - "learning_rate": 7.798190954773869e-05, - "loss": 5.6635, - "step": 22416 - }, - { - "epoch": 11.690743155149935, - "grad_norm": 1.427840232849121, - "learning_rate": 7.798090452261307e-05, - "loss": 5.663, - "step": 22417 - }, - { - "epoch": 11.691264667535854, - "grad_norm": 1.4030089378356934, - "learning_rate": 7.797989949748745e-05, - "loss": 5.5428, - "step": 22418 - }, - { - "epoch": 11.691786179921774, - "grad_norm": 1.4339790344238281, - "learning_rate": 7.797889447236181e-05, - "loss": 5.1488, - "step": 22419 - }, - { - "epoch": 11.692307692307692, - "grad_norm": 1.3914114236831665, - "learning_rate": 7.797788944723619e-05, - "loss": 5.135, - "step": 22420 - }, - { - "epoch": 11.692829204693611, - "grad_norm": 1.3858121633529663, - "learning_rate": 7.797688442211055e-05, - "loss": 5.6975, - "step": 22421 - }, - { - "epoch": 11.69335071707953, - "grad_norm": 1.4575973749160767, - "learning_rate": 7.797587939698493e-05, - "loss": 5.5233, - "step": 22422 - }, - { - "epoch": 11.69387222946545, - "grad_norm": 1.5779324769973755, - "learning_rate": 7.79748743718593e-05, - "loss": 5.3345, - "step": 22423 - }, - { - "epoch": 11.69439374185137, - "grad_norm": 1.538330316543579, - "learning_rate": 7.797386934673367e-05, - "loss": 4.8423, - "step": 22424 - }, - { - "epoch": 11.694915254237289, - "grad_norm": 1.517224907875061, - "learning_rate": 7.797286432160804e-05, - "loss": 4.5682, - "step": 22425 - }, - { - "epoch": 11.695436766623207, - "grad_norm": 1.5725706815719604, - "learning_rate": 7.797185929648242e-05, - "loss": 5.2264, - "step": 22426 - }, - { - "epoch": 11.695958279009126, - "grad_norm": 1.4596368074417114, - "learning_rate": 7.797085427135678e-05, - "loss": 5.5034, - "step": 22427 - }, - { - "epoch": 11.696479791395046, - "grad_norm": 1.4667460918426514, - "learning_rate": 7.796984924623116e-05, - "loss": 5.4027, - "step": 22428 - }, - { - "epoch": 11.697001303780965, - "grad_norm": 1.4085818529129028, - "learning_rate": 7.796884422110554e-05, - "loss": 5.4322, - "step": 22429 - }, - { - "epoch": 11.697522816166884, - "grad_norm": 1.4669299125671387, - "learning_rate": 7.796783919597991e-05, - "loss": 5.6352, - "step": 22430 - }, - { - "epoch": 11.698044328552804, - "grad_norm": 1.3443292379379272, - "learning_rate": 7.796683417085428e-05, - "loss": 5.7931, - "step": 22431 - }, - { - "epoch": 11.698565840938722, - "grad_norm": 1.5046756267547607, - "learning_rate": 7.796582914572864e-05, - "loss": 5.2159, - "step": 22432 - }, - { - "epoch": 11.699087353324641, - "grad_norm": 1.4159889221191406, - "learning_rate": 7.796482412060302e-05, - "loss": 5.6469, - "step": 22433 - }, - { - "epoch": 11.69960886571056, - "grad_norm": 1.429555058479309, - "learning_rate": 7.796381909547738e-05, - "loss": 5.4423, - "step": 22434 - }, - { - "epoch": 11.70013037809648, - "grad_norm": 1.4651635885238647, - "learning_rate": 7.796281407035176e-05, - "loss": 5.4051, - "step": 22435 - }, - { - "epoch": 11.7006518904824, - "grad_norm": 1.6165627241134644, - "learning_rate": 7.796180904522613e-05, - "loss": 5.1825, - "step": 22436 - }, - { - "epoch": 11.701173402868317, - "grad_norm": 1.4805530309677124, - "learning_rate": 7.79608040201005e-05, - "loss": 5.2124, - "step": 22437 - }, - { - "epoch": 11.701694915254237, - "grad_norm": 1.4260494709014893, - "learning_rate": 7.795979899497488e-05, - "loss": 5.8221, - "step": 22438 - }, - { - "epoch": 11.702216427640156, - "grad_norm": 1.4330998659133911, - "learning_rate": 7.795879396984926e-05, - "loss": 5.2056, - "step": 22439 - }, - { - "epoch": 11.702737940026076, - "grad_norm": 1.409659743309021, - "learning_rate": 7.795778894472362e-05, - "loss": 5.5548, - "step": 22440 - }, - { - "epoch": 11.703259452411995, - "grad_norm": 1.48440420627594, - "learning_rate": 7.7956783919598e-05, - "loss": 5.4245, - "step": 22441 - }, - { - "epoch": 11.703780964797915, - "grad_norm": 1.4510475397109985, - "learning_rate": 7.795577889447237e-05, - "loss": 5.4432, - "step": 22442 - }, - { - "epoch": 11.704302477183834, - "grad_norm": 1.7049930095672607, - "learning_rate": 7.795477386934674e-05, - "loss": 5.0972, - "step": 22443 - }, - { - "epoch": 11.704823989569752, - "grad_norm": 1.4148764610290527, - "learning_rate": 7.795376884422111e-05, - "loss": 5.2654, - "step": 22444 - }, - { - "epoch": 11.705345501955671, - "grad_norm": 1.4843679666519165, - "learning_rate": 7.795276381909547e-05, - "loss": 5.0513, - "step": 22445 - }, - { - "epoch": 11.70586701434159, - "grad_norm": 1.4580845832824707, - "learning_rate": 7.795175879396985e-05, - "loss": 5.5574, - "step": 22446 - }, - { - "epoch": 11.70638852672751, - "grad_norm": 1.5612263679504395, - "learning_rate": 7.795075376884421e-05, - "loss": 5.5625, - "step": 22447 - }, - { - "epoch": 11.70691003911343, - "grad_norm": 1.5042870044708252, - "learning_rate": 7.794974874371859e-05, - "loss": 5.3421, - "step": 22448 - }, - { - "epoch": 11.707431551499347, - "grad_norm": 1.4396135807037354, - "learning_rate": 7.794874371859297e-05, - "loss": 5.4092, - "step": 22449 - }, - { - "epoch": 11.707953063885267, - "grad_norm": 1.4393309354782104, - "learning_rate": 7.794773869346735e-05, - "loss": 5.6972, - "step": 22450 - }, - { - "epoch": 11.708474576271186, - "grad_norm": 1.6035959720611572, - "learning_rate": 7.794673366834171e-05, - "loss": 5.4299, - "step": 22451 - }, - { - "epoch": 11.708996088657106, - "grad_norm": 1.3688639402389526, - "learning_rate": 7.794572864321609e-05, - "loss": 5.4815, - "step": 22452 - }, - { - "epoch": 11.709517601043025, - "grad_norm": 1.412873387336731, - "learning_rate": 7.794472361809045e-05, - "loss": 4.9337, - "step": 22453 - }, - { - "epoch": 11.710039113428945, - "grad_norm": 1.4145488739013672, - "learning_rate": 7.794371859296483e-05, - "loss": 5.3946, - "step": 22454 - }, - { - "epoch": 11.710560625814864, - "grad_norm": 1.4828784465789795, - "learning_rate": 7.79427135678392e-05, - "loss": 5.8173, - "step": 22455 - }, - { - "epoch": 11.711082138200782, - "grad_norm": 1.4704004526138306, - "learning_rate": 7.794170854271357e-05, - "loss": 5.125, - "step": 22456 - }, - { - "epoch": 11.711603650586701, - "grad_norm": 1.3961068391799927, - "learning_rate": 7.794070351758794e-05, - "loss": 5.5263, - "step": 22457 - }, - { - "epoch": 11.71212516297262, - "grad_norm": 1.439123511314392, - "learning_rate": 7.793969849246232e-05, - "loss": 5.4156, - "step": 22458 - }, - { - "epoch": 11.71264667535854, - "grad_norm": 1.4569675922393799, - "learning_rate": 7.79386934673367e-05, - "loss": 5.3458, - "step": 22459 - }, - { - "epoch": 11.71316818774446, - "grad_norm": 1.437272071838379, - "learning_rate": 7.793768844221106e-05, - "loss": 5.4905, - "step": 22460 - }, - { - "epoch": 11.713689700130377, - "grad_norm": 1.395545482635498, - "learning_rate": 7.793668341708544e-05, - "loss": 5.5455, - "step": 22461 - }, - { - "epoch": 11.714211212516297, - "grad_norm": 1.509758472442627, - "learning_rate": 7.79356783919598e-05, - "loss": 5.5208, - "step": 22462 - }, - { - "epoch": 11.714732724902216, - "grad_norm": 1.578309178352356, - "learning_rate": 7.793467336683418e-05, - "loss": 4.9136, - "step": 22463 - }, - { - "epoch": 11.715254237288136, - "grad_norm": 1.5560270547866821, - "learning_rate": 7.793366834170854e-05, - "loss": 5.5725, - "step": 22464 - }, - { - "epoch": 11.715775749674055, - "grad_norm": 1.4218329191207886, - "learning_rate": 7.793266331658292e-05, - "loss": 5.3323, - "step": 22465 - }, - { - "epoch": 11.716297262059975, - "grad_norm": 1.5480468273162842, - "learning_rate": 7.793165829145728e-05, - "loss": 5.2148, - "step": 22466 - }, - { - "epoch": 11.716818774445892, - "grad_norm": 1.4353183507919312, - "learning_rate": 7.793065326633166e-05, - "loss": 5.6021, - "step": 22467 - }, - { - "epoch": 11.717340286831812, - "grad_norm": 1.5578943490982056, - "learning_rate": 7.792964824120603e-05, - "loss": 5.0772, - "step": 22468 - }, - { - "epoch": 11.717861799217731, - "grad_norm": 1.5849608182907104, - "learning_rate": 7.79286432160804e-05, - "loss": 5.0252, - "step": 22469 - }, - { - "epoch": 11.71838331160365, - "grad_norm": 1.38527512550354, - "learning_rate": 7.792763819095478e-05, - "loss": 5.6332, - "step": 22470 - }, - { - "epoch": 11.71890482398957, - "grad_norm": 1.4296066761016846, - "learning_rate": 7.792663316582916e-05, - "loss": 5.4885, - "step": 22471 - }, - { - "epoch": 11.71942633637549, - "grad_norm": 1.3703811168670654, - "learning_rate": 7.792562814070352e-05, - "loss": 5.7659, - "step": 22472 - }, - { - "epoch": 11.719947848761407, - "grad_norm": 1.4113913774490356, - "learning_rate": 7.792462311557789e-05, - "loss": 4.8689, - "step": 22473 - }, - { - "epoch": 11.720469361147327, - "grad_norm": 1.4096713066101074, - "learning_rate": 7.792361809045227e-05, - "loss": 5.6609, - "step": 22474 - }, - { - "epoch": 11.720990873533246, - "grad_norm": 1.4305946826934814, - "learning_rate": 7.792261306532663e-05, - "loss": 5.5221, - "step": 22475 - }, - { - "epoch": 11.721512385919166, - "grad_norm": 1.5416173934936523, - "learning_rate": 7.792160804020101e-05, - "loss": 4.7127, - "step": 22476 - }, - { - "epoch": 11.722033898305085, - "grad_norm": 1.4359829425811768, - "learning_rate": 7.792060301507537e-05, - "loss": 5.266, - "step": 22477 - }, - { - "epoch": 11.722555410691005, - "grad_norm": 1.7345619201660156, - "learning_rate": 7.791959798994975e-05, - "loss": 5.4537, - "step": 22478 - }, - { - "epoch": 11.723076923076922, - "grad_norm": 1.3928526639938354, - "learning_rate": 7.791859296482413e-05, - "loss": 5.4544, - "step": 22479 - }, - { - "epoch": 11.723598435462842, - "grad_norm": 1.4959369897842407, - "learning_rate": 7.79175879396985e-05, - "loss": 5.3098, - "step": 22480 - }, - { - "epoch": 11.724119947848761, - "grad_norm": 1.3802310228347778, - "learning_rate": 7.791658291457287e-05, - "loss": 5.7844, - "step": 22481 - }, - { - "epoch": 11.72464146023468, - "grad_norm": 1.4135864973068237, - "learning_rate": 7.791557788944725e-05, - "loss": 5.6923, - "step": 22482 - }, - { - "epoch": 11.7251629726206, - "grad_norm": 1.400306224822998, - "learning_rate": 7.791457286432161e-05, - "loss": 5.4089, - "step": 22483 - }, - { - "epoch": 11.72568448500652, - "grad_norm": 1.374598503112793, - "learning_rate": 7.791356783919599e-05, - "loss": 5.6734, - "step": 22484 - }, - { - "epoch": 11.726205997392437, - "grad_norm": 1.4260706901550293, - "learning_rate": 7.791256281407035e-05, - "loss": 5.0811, - "step": 22485 - }, - { - "epoch": 11.726727509778357, - "grad_norm": 1.4543240070343018, - "learning_rate": 7.791155778894472e-05, - "loss": 5.4197, - "step": 22486 - }, - { - "epoch": 11.727249022164276, - "grad_norm": 1.6673510074615479, - "learning_rate": 7.79105527638191e-05, - "loss": 5.1242, - "step": 22487 - }, - { - "epoch": 11.727770534550196, - "grad_norm": 1.410360336303711, - "learning_rate": 7.790954773869346e-05, - "loss": 5.5391, - "step": 22488 - }, - { - "epoch": 11.728292046936115, - "grad_norm": 1.3380177021026611, - "learning_rate": 7.790854271356784e-05, - "loss": 5.8526, - "step": 22489 - }, - { - "epoch": 11.728813559322035, - "grad_norm": 1.486576795578003, - "learning_rate": 7.790753768844222e-05, - "loss": 5.1498, - "step": 22490 - }, - { - "epoch": 11.729335071707952, - "grad_norm": 1.4250015020370483, - "learning_rate": 7.79065326633166e-05, - "loss": 4.8451, - "step": 22491 - }, - { - "epoch": 11.729856584093872, - "grad_norm": 1.571189045906067, - "learning_rate": 7.790552763819096e-05, - "loss": 5.1853, - "step": 22492 - }, - { - "epoch": 11.730378096479791, - "grad_norm": 1.517397403717041, - "learning_rate": 7.790452261306534e-05, - "loss": 5.5334, - "step": 22493 - }, - { - "epoch": 11.73089960886571, - "grad_norm": 1.4223803281784058, - "learning_rate": 7.79035175879397e-05, - "loss": 4.8781, - "step": 22494 - }, - { - "epoch": 11.73142112125163, - "grad_norm": 1.4819656610488892, - "learning_rate": 7.790251256281408e-05, - "loss": 5.3218, - "step": 22495 - }, - { - "epoch": 11.73194263363755, - "grad_norm": 1.4287105798721313, - "learning_rate": 7.790150753768844e-05, - "loss": 5.6135, - "step": 22496 - }, - { - "epoch": 11.732464146023467, - "grad_norm": 1.4194462299346924, - "learning_rate": 7.790050251256282e-05, - "loss": 5.6368, - "step": 22497 - }, - { - "epoch": 11.732985658409387, - "grad_norm": 1.3945426940917969, - "learning_rate": 7.789949748743719e-05, - "loss": 5.797, - "step": 22498 - }, - { - "epoch": 11.733507170795306, - "grad_norm": 1.5675188302993774, - "learning_rate": 7.789849246231156e-05, - "loss": 5.5485, - "step": 22499 - }, - { - "epoch": 11.734028683181226, - "grad_norm": 1.505635142326355, - "learning_rate": 7.789748743718594e-05, - "loss": 5.2848, - "step": 22500 - }, - { - "epoch": 11.734550195567145, - "grad_norm": 1.4929746389389038, - "learning_rate": 7.78964824120603e-05, - "loss": 4.7798, - "step": 22501 - }, - { - "epoch": 11.735071707953065, - "grad_norm": 1.5554380416870117, - "learning_rate": 7.789547738693468e-05, - "loss": 4.8184, - "step": 22502 - }, - { - "epoch": 11.735593220338982, - "grad_norm": 1.3248733282089233, - "learning_rate": 7.789447236180905e-05, - "loss": 5.4825, - "step": 22503 - }, - { - "epoch": 11.736114732724902, - "grad_norm": 1.4561240673065186, - "learning_rate": 7.789346733668342e-05, - "loss": 5.7085, - "step": 22504 - }, - { - "epoch": 11.736636245110821, - "grad_norm": 1.432570219039917, - "learning_rate": 7.789246231155779e-05, - "loss": 5.2966, - "step": 22505 - }, - { - "epoch": 11.73715775749674, - "grad_norm": 1.481252908706665, - "learning_rate": 7.789145728643217e-05, - "loss": 5.3442, - "step": 22506 - }, - { - "epoch": 11.73767926988266, - "grad_norm": 1.445008397102356, - "learning_rate": 7.789045226130653e-05, - "loss": 5.3971, - "step": 22507 - }, - { - "epoch": 11.73820078226858, - "grad_norm": 1.5979551076889038, - "learning_rate": 7.788944723618091e-05, - "loss": 5.3204, - "step": 22508 - }, - { - "epoch": 11.738722294654497, - "grad_norm": 1.336997389793396, - "learning_rate": 7.788844221105527e-05, - "loss": 5.366, - "step": 22509 - }, - { - "epoch": 11.739243807040417, - "grad_norm": 1.482154130935669, - "learning_rate": 7.788743718592965e-05, - "loss": 5.454, - "step": 22510 - }, - { - "epoch": 11.739765319426336, - "grad_norm": 1.436630368232727, - "learning_rate": 7.788643216080403e-05, - "loss": 5.4852, - "step": 22511 - }, - { - "epoch": 11.740286831812256, - "grad_norm": 1.4218045473098755, - "learning_rate": 7.78854271356784e-05, - "loss": 5.3147, - "step": 22512 - }, - { - "epoch": 11.740808344198175, - "grad_norm": 1.3961760997772217, - "learning_rate": 7.788442211055277e-05, - "loss": 5.6236, - "step": 22513 - }, - { - "epoch": 11.741329856584095, - "grad_norm": 1.4075239896774292, - "learning_rate": 7.788341708542714e-05, - "loss": 5.5331, - "step": 22514 - }, - { - "epoch": 11.741851368970012, - "grad_norm": 1.399818778038025, - "learning_rate": 7.788241206030151e-05, - "loss": 5.5106, - "step": 22515 - }, - { - "epoch": 11.742372881355932, - "grad_norm": 1.4347901344299316, - "learning_rate": 7.788140703517588e-05, - "loss": 5.2248, - "step": 22516 - }, - { - "epoch": 11.742894393741851, - "grad_norm": 1.54386568069458, - "learning_rate": 7.788040201005026e-05, - "loss": 4.4802, - "step": 22517 - }, - { - "epoch": 11.74341590612777, - "grad_norm": 1.548140048980713, - "learning_rate": 7.787939698492462e-05, - "loss": 5.574, - "step": 22518 - }, - { - "epoch": 11.74393741851369, - "grad_norm": 1.3945552110671997, - "learning_rate": 7.7878391959799e-05, - "loss": 5.8887, - "step": 22519 - }, - { - "epoch": 11.74445893089961, - "grad_norm": 1.4446918964385986, - "learning_rate": 7.787738693467338e-05, - "loss": 5.0808, - "step": 22520 - }, - { - "epoch": 11.744980443285527, - "grad_norm": 1.5292549133300781, - "learning_rate": 7.787638190954775e-05, - "loss": 5.4542, - "step": 22521 - }, - { - "epoch": 11.745501955671447, - "grad_norm": 1.4869401454925537, - "learning_rate": 7.787537688442212e-05, - "loss": 5.051, - "step": 22522 - }, - { - "epoch": 11.746023468057366, - "grad_norm": 1.4456251859664917, - "learning_rate": 7.78743718592965e-05, - "loss": 5.7349, - "step": 22523 - }, - { - "epoch": 11.746544980443286, - "grad_norm": 1.3408106565475464, - "learning_rate": 7.787336683417086e-05, - "loss": 5.5598, - "step": 22524 - }, - { - "epoch": 11.747066492829205, - "grad_norm": 1.4064748287200928, - "learning_rate": 7.787236180904522e-05, - "loss": 5.7426, - "step": 22525 - }, - { - "epoch": 11.747588005215125, - "grad_norm": 1.6299433708190918, - "learning_rate": 7.78713567839196e-05, - "loss": 4.4662, - "step": 22526 - }, - { - "epoch": 11.748109517601042, - "grad_norm": 1.538394570350647, - "learning_rate": 7.787035175879397e-05, - "loss": 5.3228, - "step": 22527 - }, - { - "epoch": 11.748631029986962, - "grad_norm": 1.4869658946990967, - "learning_rate": 7.786934673366834e-05, - "loss": 5.3851, - "step": 22528 - }, - { - "epoch": 11.749152542372881, - "grad_norm": 1.3739008903503418, - "learning_rate": 7.786834170854271e-05, - "loss": 5.6402, - "step": 22529 - }, - { - "epoch": 11.7496740547588, - "grad_norm": 1.641646146774292, - "learning_rate": 7.786733668341709e-05, - "loss": 5.4642, - "step": 22530 - }, - { - "epoch": 11.75019556714472, - "grad_norm": 1.5026894807815552, - "learning_rate": 7.786633165829146e-05, - "loss": 5.6192, - "step": 22531 - }, - { - "epoch": 11.750717079530638, - "grad_norm": 1.4282575845718384, - "learning_rate": 7.786532663316584e-05, - "loss": 5.3258, - "step": 22532 - }, - { - "epoch": 11.751238591916557, - "grad_norm": 1.452181100845337, - "learning_rate": 7.78643216080402e-05, - "loss": 5.6193, - "step": 22533 - }, - { - "epoch": 11.751760104302477, - "grad_norm": 1.4432228803634644, - "learning_rate": 7.786331658291458e-05, - "loss": 4.9001, - "step": 22534 - }, - { - "epoch": 11.752281616688396, - "grad_norm": 1.423448085784912, - "learning_rate": 7.786231155778895e-05, - "loss": 5.641, - "step": 22535 - }, - { - "epoch": 11.752803129074316, - "grad_norm": 1.5182878971099854, - "learning_rate": 7.786130653266333e-05, - "loss": 5.4369, - "step": 22536 - }, - { - "epoch": 11.753324641460235, - "grad_norm": 1.6211521625518799, - "learning_rate": 7.786030150753769e-05, - "loss": 4.8658, - "step": 22537 - }, - { - "epoch": 11.753846153846155, - "grad_norm": 1.378760576248169, - "learning_rate": 7.785929648241205e-05, - "loss": 5.1783, - "step": 22538 - }, - { - "epoch": 11.754367666232072, - "grad_norm": 1.516028881072998, - "learning_rate": 7.785829145728643e-05, - "loss": 5.518, - "step": 22539 - }, - { - "epoch": 11.754889178617992, - "grad_norm": 1.4516773223876953, - "learning_rate": 7.78572864321608e-05, - "loss": 5.5256, - "step": 22540 - }, - { - "epoch": 11.755410691003911, - "grad_norm": 1.433862328529358, - "learning_rate": 7.785628140703517e-05, - "loss": 5.5394, - "step": 22541 - }, - { - "epoch": 11.75593220338983, - "grad_norm": 1.5036695003509521, - "learning_rate": 7.785527638190955e-05, - "loss": 5.2842, - "step": 22542 - }, - { - "epoch": 11.75645371577575, - "grad_norm": 1.5545848608016968, - "learning_rate": 7.785427135678393e-05, - "loss": 5.2328, - "step": 22543 - }, - { - "epoch": 11.756975228161668, - "grad_norm": 1.4566547870635986, - "learning_rate": 7.78532663316583e-05, - "loss": 4.9184, - "step": 22544 - }, - { - "epoch": 11.757496740547587, - "grad_norm": 1.3913334608078003, - "learning_rate": 7.785226130653267e-05, - "loss": 5.497, - "step": 22545 - }, - { - "epoch": 11.758018252933507, - "grad_norm": 1.3711093664169312, - "learning_rate": 7.785125628140704e-05, - "loss": 5.7265, - "step": 22546 - }, - { - "epoch": 11.758539765319426, - "grad_norm": 1.3727680444717407, - "learning_rate": 7.785025125628141e-05, - "loss": 5.5559, - "step": 22547 - }, - { - "epoch": 11.759061277705346, - "grad_norm": 1.389897108078003, - "learning_rate": 7.784924623115578e-05, - "loss": 5.6088, - "step": 22548 - }, - { - "epoch": 11.759582790091265, - "grad_norm": 1.3964351415634155, - "learning_rate": 7.784824120603016e-05, - "loss": 5.6793, - "step": 22549 - }, - { - "epoch": 11.760104302477185, - "grad_norm": 1.5080510377883911, - "learning_rate": 7.784723618090452e-05, - "loss": 4.9174, - "step": 22550 - }, - { - "epoch": 11.760625814863102, - "grad_norm": 1.5277127027511597, - "learning_rate": 7.78462311557789e-05, - "loss": 5.5011, - "step": 22551 - }, - { - "epoch": 11.761147327249022, - "grad_norm": 1.4179033041000366, - "learning_rate": 7.784522613065328e-05, - "loss": 5.314, - "step": 22552 - }, - { - "epoch": 11.761668839634941, - "grad_norm": 1.507889986038208, - "learning_rate": 7.784422110552764e-05, - "loss": 5.474, - "step": 22553 - }, - { - "epoch": 11.76219035202086, - "grad_norm": 1.451195478439331, - "learning_rate": 7.784321608040202e-05, - "loss": 5.6296, - "step": 22554 - }, - { - "epoch": 11.76271186440678, - "grad_norm": 1.5349527597427368, - "learning_rate": 7.784221105527638e-05, - "loss": 5.1075, - "step": 22555 - }, - { - "epoch": 11.763233376792698, - "grad_norm": 1.4114254713058472, - "learning_rate": 7.784120603015076e-05, - "loss": 5.5489, - "step": 22556 - }, - { - "epoch": 11.763754889178617, - "grad_norm": 1.39677894115448, - "learning_rate": 7.784020100502512e-05, - "loss": 5.3971, - "step": 22557 - }, - { - "epoch": 11.764276401564537, - "grad_norm": 1.459970235824585, - "learning_rate": 7.78391959798995e-05, - "loss": 5.2786, - "step": 22558 - }, - { - "epoch": 11.764797913950456, - "grad_norm": 1.4387083053588867, - "learning_rate": 7.783819095477387e-05, - "loss": 5.6503, - "step": 22559 - }, - { - "epoch": 11.765319426336376, - "grad_norm": 1.375139832496643, - "learning_rate": 7.783718592964824e-05, - "loss": 5.4207, - "step": 22560 - }, - { - "epoch": 11.765840938722295, - "grad_norm": 1.4097038507461548, - "learning_rate": 7.783618090452261e-05, - "loss": 5.5737, - "step": 22561 - }, - { - "epoch": 11.766362451108213, - "grad_norm": 1.4475419521331787, - "learning_rate": 7.783517587939699e-05, - "loss": 5.6283, - "step": 22562 - }, - { - "epoch": 11.766883963494132, - "grad_norm": 1.5301154851913452, - "learning_rate": 7.783417085427136e-05, - "loss": 5.0862, - "step": 22563 - }, - { - "epoch": 11.767405475880052, - "grad_norm": 1.3411800861358643, - "learning_rate": 7.783316582914574e-05, - "loss": 5.8724, - "step": 22564 - }, - { - "epoch": 11.767926988265971, - "grad_norm": 1.3830626010894775, - "learning_rate": 7.78321608040201e-05, - "loss": 5.2935, - "step": 22565 - }, - { - "epoch": 11.76844850065189, - "grad_norm": 1.4088339805603027, - "learning_rate": 7.783115577889447e-05, - "loss": 5.6065, - "step": 22566 - }, - { - "epoch": 11.76897001303781, - "grad_norm": 1.435139536857605, - "learning_rate": 7.783015075376885e-05, - "loss": 5.2776, - "step": 22567 - }, - { - "epoch": 11.769491525423728, - "grad_norm": 1.633093237876892, - "learning_rate": 7.782914572864321e-05, - "loss": 4.761, - "step": 22568 - }, - { - "epoch": 11.770013037809647, - "grad_norm": 1.4369118213653564, - "learning_rate": 7.782814070351759e-05, - "loss": 5.6728, - "step": 22569 - }, - { - "epoch": 11.770534550195567, - "grad_norm": 1.614274024963379, - "learning_rate": 7.782713567839196e-05, - "loss": 5.506, - "step": 22570 - }, - { - "epoch": 11.771056062581486, - "grad_norm": 1.4434432983398438, - "learning_rate": 7.782613065326633e-05, - "loss": 5.3088, - "step": 22571 - }, - { - "epoch": 11.771577574967406, - "grad_norm": 1.3761581182479858, - "learning_rate": 7.782512562814071e-05, - "loss": 5.1766, - "step": 22572 - }, - { - "epoch": 11.772099087353325, - "grad_norm": 1.6434475183486938, - "learning_rate": 7.782412060301509e-05, - "loss": 5.3583, - "step": 22573 - }, - { - "epoch": 11.772620599739243, - "grad_norm": 1.4031544923782349, - "learning_rate": 7.782311557788945e-05, - "loss": 5.3243, - "step": 22574 - }, - { - "epoch": 11.773142112125162, - "grad_norm": 1.5299957990646362, - "learning_rate": 7.782211055276383e-05, - "loss": 5.5318, - "step": 22575 - }, - { - "epoch": 11.773663624511082, - "grad_norm": 1.4312094449996948, - "learning_rate": 7.78211055276382e-05, - "loss": 5.3413, - "step": 22576 - }, - { - "epoch": 11.774185136897001, - "grad_norm": 1.6112605333328247, - "learning_rate": 7.782010050251257e-05, - "loss": 4.8223, - "step": 22577 - }, - { - "epoch": 11.77470664928292, - "grad_norm": 1.524142861366272, - "learning_rate": 7.781909547738694e-05, - "loss": 5.452, - "step": 22578 - }, - { - "epoch": 11.77522816166884, - "grad_norm": 1.4954161643981934, - "learning_rate": 7.78180904522613e-05, - "loss": 5.1696, - "step": 22579 - }, - { - "epoch": 11.775749674054758, - "grad_norm": 1.5065370798110962, - "learning_rate": 7.781708542713568e-05, - "loss": 4.8782, - "step": 22580 - }, - { - "epoch": 11.776271186440677, - "grad_norm": 1.3939415216445923, - "learning_rate": 7.781608040201004e-05, - "loss": 5.6884, - "step": 22581 - }, - { - "epoch": 11.776792698826597, - "grad_norm": 1.419543981552124, - "learning_rate": 7.781507537688442e-05, - "loss": 5.3058, - "step": 22582 - }, - { - "epoch": 11.777314211212516, - "grad_norm": 1.5199605226516724, - "learning_rate": 7.78140703517588e-05, - "loss": 5.443, - "step": 22583 - }, - { - "epoch": 11.777835723598436, - "grad_norm": 1.4135801792144775, - "learning_rate": 7.781306532663318e-05, - "loss": 5.8503, - "step": 22584 - }, - { - "epoch": 11.778357235984355, - "grad_norm": 1.3290280103683472, - "learning_rate": 7.781206030150754e-05, - "loss": 5.666, - "step": 22585 - }, - { - "epoch": 11.778878748370273, - "grad_norm": 1.3210972547531128, - "learning_rate": 7.781105527638192e-05, - "loss": 5.68, - "step": 22586 - }, - { - "epoch": 11.779400260756193, - "grad_norm": 1.360033631324768, - "learning_rate": 7.781005025125628e-05, - "loss": 5.4132, - "step": 22587 - }, - { - "epoch": 11.779921773142112, - "grad_norm": 1.360029935836792, - "learning_rate": 7.780904522613066e-05, - "loss": 5.5163, - "step": 22588 - }, - { - "epoch": 11.780443285528031, - "grad_norm": 1.5235960483551025, - "learning_rate": 7.780804020100503e-05, - "loss": 5.0893, - "step": 22589 - }, - { - "epoch": 11.780964797913951, - "grad_norm": 1.5871440172195435, - "learning_rate": 7.78070351758794e-05, - "loss": 4.9198, - "step": 22590 - }, - { - "epoch": 11.78148631029987, - "grad_norm": 1.3662500381469727, - "learning_rate": 7.780603015075377e-05, - "loss": 5.3048, - "step": 22591 - }, - { - "epoch": 11.782007822685788, - "grad_norm": 1.5816771984100342, - "learning_rate": 7.780502512562815e-05, - "loss": 5.1533, - "step": 22592 - }, - { - "epoch": 11.782529335071708, - "grad_norm": 1.4431374073028564, - "learning_rate": 7.780402010050252e-05, - "loss": 4.9745, - "step": 22593 - }, - { - "epoch": 11.783050847457627, - "grad_norm": 1.3320066928863525, - "learning_rate": 7.780301507537689e-05, - "loss": 5.7833, - "step": 22594 - }, - { - "epoch": 11.783572359843546, - "grad_norm": 1.4902459383010864, - "learning_rate": 7.780201005025127e-05, - "loss": 5.4279, - "step": 22595 - }, - { - "epoch": 11.784093872229466, - "grad_norm": 1.379684329032898, - "learning_rate": 7.780100502512563e-05, - "loss": 5.5875, - "step": 22596 - }, - { - "epoch": 11.784615384615385, - "grad_norm": 1.3418806791305542, - "learning_rate": 7.780000000000001e-05, - "loss": 5.7309, - "step": 22597 - }, - { - "epoch": 11.785136897001303, - "grad_norm": 1.5177135467529297, - "learning_rate": 7.779899497487437e-05, - "loss": 5.6287, - "step": 22598 - }, - { - "epoch": 11.785658409387223, - "grad_norm": 1.567966341972351, - "learning_rate": 7.779798994974875e-05, - "loss": 5.2633, - "step": 22599 - }, - { - "epoch": 11.786179921773142, - "grad_norm": 1.4194515943527222, - "learning_rate": 7.779698492462311e-05, - "loss": 5.2517, - "step": 22600 - }, - { - "epoch": 11.786701434159061, - "grad_norm": 1.4837363958358765, - "learning_rate": 7.779597989949749e-05, - "loss": 5.1693, - "step": 22601 - }, - { - "epoch": 11.787222946544981, - "grad_norm": 1.324831485748291, - "learning_rate": 7.779497487437186e-05, - "loss": 5.2914, - "step": 22602 - }, - { - "epoch": 11.7877444589309, - "grad_norm": 1.3949034214019775, - "learning_rate": 7.779396984924623e-05, - "loss": 5.7782, - "step": 22603 - }, - { - "epoch": 11.788265971316818, - "grad_norm": 1.390512228012085, - "learning_rate": 7.779296482412061e-05, - "loss": 5.5478, - "step": 22604 - }, - { - "epoch": 11.788787483702738, - "grad_norm": 1.4182865619659424, - "learning_rate": 7.779195979899498e-05, - "loss": 5.0027, - "step": 22605 - }, - { - "epoch": 11.789308996088657, - "grad_norm": 1.4023958444595337, - "learning_rate": 7.779095477386935e-05, - "loss": 5.801, - "step": 22606 - }, - { - "epoch": 11.789830508474576, - "grad_norm": 1.4441583156585693, - "learning_rate": 7.778994974874372e-05, - "loss": 5.4366, - "step": 22607 - }, - { - "epoch": 11.790352020860496, - "grad_norm": 1.3786672353744507, - "learning_rate": 7.77889447236181e-05, - "loss": 5.6111, - "step": 22608 - }, - { - "epoch": 11.790873533246415, - "grad_norm": 1.4142861366271973, - "learning_rate": 7.778793969849246e-05, - "loss": 4.8512, - "step": 22609 - }, - { - "epoch": 11.791395045632333, - "grad_norm": 1.556159496307373, - "learning_rate": 7.778693467336684e-05, - "loss": 5.3905, - "step": 22610 - }, - { - "epoch": 11.791916558018253, - "grad_norm": 1.321419596672058, - "learning_rate": 7.77859296482412e-05, - "loss": 5.9656, - "step": 22611 - }, - { - "epoch": 11.792438070404172, - "grad_norm": 1.5288840532302856, - "learning_rate": 7.778492462311558e-05, - "loss": 4.8526, - "step": 22612 - }, - { - "epoch": 11.792959582790091, - "grad_norm": 1.531825065612793, - "learning_rate": 7.778391959798996e-05, - "loss": 5.5563, - "step": 22613 - }, - { - "epoch": 11.793481095176011, - "grad_norm": 1.3932687044143677, - "learning_rate": 7.778291457286434e-05, - "loss": 5.5183, - "step": 22614 - }, - { - "epoch": 11.79400260756193, - "grad_norm": 1.4611543416976929, - "learning_rate": 7.77819095477387e-05, - "loss": 5.4018, - "step": 22615 - }, - { - "epoch": 11.794524119947848, - "grad_norm": 1.4656734466552734, - "learning_rate": 7.778090452261308e-05, - "loss": 5.277, - "step": 22616 - }, - { - "epoch": 11.795045632333768, - "grad_norm": 1.4397114515304565, - "learning_rate": 7.777989949748744e-05, - "loss": 5.412, - "step": 22617 - }, - { - "epoch": 11.795567144719687, - "grad_norm": 1.3317029476165771, - "learning_rate": 7.77788944723618e-05, - "loss": 5.7058, - "step": 22618 - }, - { - "epoch": 11.796088657105607, - "grad_norm": 1.417751669883728, - "learning_rate": 7.777788944723618e-05, - "loss": 4.8532, - "step": 22619 - }, - { - "epoch": 11.796610169491526, - "grad_norm": 1.4027276039123535, - "learning_rate": 7.777688442211055e-05, - "loss": 5.4494, - "step": 22620 - }, - { - "epoch": 11.797131681877445, - "grad_norm": 1.506284475326538, - "learning_rate": 7.777587939698493e-05, - "loss": 5.548, - "step": 22621 - }, - { - "epoch": 11.797653194263363, - "grad_norm": 1.559933066368103, - "learning_rate": 7.777487437185929e-05, - "loss": 5.3562, - "step": 22622 - }, - { - "epoch": 11.798174706649283, - "grad_norm": 1.5100486278533936, - "learning_rate": 7.777386934673367e-05, - "loss": 5.5662, - "step": 22623 - }, - { - "epoch": 11.798696219035202, - "grad_norm": 1.4609471559524536, - "learning_rate": 7.777286432160805e-05, - "loss": 5.5185, - "step": 22624 - }, - { - "epoch": 11.799217731421122, - "grad_norm": 1.5066847801208496, - "learning_rate": 7.777185929648242e-05, - "loss": 5.5483, - "step": 22625 - }, - { - "epoch": 11.799739243807041, - "grad_norm": 1.5386645793914795, - "learning_rate": 7.777085427135679e-05, - "loss": 4.8114, - "step": 22626 - }, - { - "epoch": 11.800260756192959, - "grad_norm": 1.5480759143829346, - "learning_rate": 7.776984924623117e-05, - "loss": 5.1437, - "step": 22627 - }, - { - "epoch": 11.800782268578878, - "grad_norm": 1.615178108215332, - "learning_rate": 7.776884422110553e-05, - "loss": 5.4463, - "step": 22628 - }, - { - "epoch": 11.801303780964798, - "grad_norm": 1.5255157947540283, - "learning_rate": 7.776783919597991e-05, - "loss": 5.6629, - "step": 22629 - }, - { - "epoch": 11.801825293350717, - "grad_norm": 1.4355379343032837, - "learning_rate": 7.776683417085427e-05, - "loss": 5.4075, - "step": 22630 - }, - { - "epoch": 11.802346805736637, - "grad_norm": 1.455886721611023, - "learning_rate": 7.776582914572864e-05, - "loss": 5.5523, - "step": 22631 - }, - { - "epoch": 11.802868318122556, - "grad_norm": 1.5625265836715698, - "learning_rate": 7.776482412060301e-05, - "loss": 5.4104, - "step": 22632 - }, - { - "epoch": 11.803389830508475, - "grad_norm": 1.5349183082580566, - "learning_rate": 7.776381909547739e-05, - "loss": 5.732, - "step": 22633 - }, - { - "epoch": 11.803911342894393, - "grad_norm": 1.3888195753097534, - "learning_rate": 7.776281407035177e-05, - "loss": 5.7543, - "step": 22634 - }, - { - "epoch": 11.804432855280313, - "grad_norm": 1.380212664604187, - "learning_rate": 7.776180904522613e-05, - "loss": 5.7872, - "step": 22635 - }, - { - "epoch": 11.804954367666232, - "grad_norm": 1.3873121738433838, - "learning_rate": 7.776080402010051e-05, - "loss": 5.9743, - "step": 22636 - }, - { - "epoch": 11.805475880052152, - "grad_norm": 1.4057403802871704, - "learning_rate": 7.775979899497488e-05, - "loss": 5.47, - "step": 22637 - }, - { - "epoch": 11.805997392438071, - "grad_norm": 1.397611379623413, - "learning_rate": 7.775879396984925e-05, - "loss": 5.6726, - "step": 22638 - }, - { - "epoch": 11.806518904823989, - "grad_norm": 1.4628974199295044, - "learning_rate": 7.775778894472362e-05, - "loss": 5.7401, - "step": 22639 - }, - { - "epoch": 11.807040417209908, - "grad_norm": 1.396308422088623, - "learning_rate": 7.7756783919598e-05, - "loss": 5.6031, - "step": 22640 - }, - { - "epoch": 11.807561929595828, - "grad_norm": 1.4852187633514404, - "learning_rate": 7.775577889447236e-05, - "loss": 5.618, - "step": 22641 - }, - { - "epoch": 11.808083441981747, - "grad_norm": 1.308880090713501, - "learning_rate": 7.775477386934674e-05, - "loss": 5.3694, - "step": 22642 - }, - { - "epoch": 11.808604954367667, - "grad_norm": 1.5052186250686646, - "learning_rate": 7.77537688442211e-05, - "loss": 5.4973, - "step": 22643 - }, - { - "epoch": 11.809126466753586, - "grad_norm": 1.4929285049438477, - "learning_rate": 7.775276381909548e-05, - "loss": 5.4388, - "step": 22644 - }, - { - "epoch": 11.809647979139505, - "grad_norm": 1.4425764083862305, - "learning_rate": 7.775175879396986e-05, - "loss": 5.5968, - "step": 22645 - }, - { - "epoch": 11.810169491525423, - "grad_norm": 1.4013690948486328, - "learning_rate": 7.775075376884422e-05, - "loss": 5.3699, - "step": 22646 - }, - { - "epoch": 11.810691003911343, - "grad_norm": 1.5370193719863892, - "learning_rate": 7.77497487437186e-05, - "loss": 5.4315, - "step": 22647 - }, - { - "epoch": 11.811212516297262, - "grad_norm": 1.3738844394683838, - "learning_rate": 7.774874371859296e-05, - "loss": 5.4048, - "step": 22648 - }, - { - "epoch": 11.811734028683182, - "grad_norm": 1.3755747079849243, - "learning_rate": 7.774773869346734e-05, - "loss": 5.4961, - "step": 22649 - }, - { - "epoch": 11.812255541069101, - "grad_norm": 1.3077497482299805, - "learning_rate": 7.774673366834171e-05, - "loss": 5.7154, - "step": 22650 - }, - { - "epoch": 11.812777053455019, - "grad_norm": 1.4507712125778198, - "learning_rate": 7.774572864321608e-05, - "loss": 4.2373, - "step": 22651 - }, - { - "epoch": 11.813298565840938, - "grad_norm": 1.441184401512146, - "learning_rate": 7.774472361809045e-05, - "loss": 5.6634, - "step": 22652 - }, - { - "epoch": 11.813820078226858, - "grad_norm": 1.4922544956207275, - "learning_rate": 7.774371859296483e-05, - "loss": 5.0742, - "step": 22653 - }, - { - "epoch": 11.814341590612777, - "grad_norm": 1.8142421245574951, - "learning_rate": 7.77427135678392e-05, - "loss": 5.6118, - "step": 22654 - }, - { - "epoch": 11.814863102998697, - "grad_norm": 1.4250328540802002, - "learning_rate": 7.774170854271358e-05, - "loss": 5.5076, - "step": 22655 - }, - { - "epoch": 11.815384615384616, - "grad_norm": 1.462842345237732, - "learning_rate": 7.774070351758795e-05, - "loss": 5.4461, - "step": 22656 - }, - { - "epoch": 11.815906127770534, - "grad_norm": 1.4932646751403809, - "learning_rate": 7.773969849246232e-05, - "loss": 5.8046, - "step": 22657 - }, - { - "epoch": 11.816427640156453, - "grad_norm": 1.4065730571746826, - "learning_rate": 7.773869346733669e-05, - "loss": 5.6839, - "step": 22658 - }, - { - "epoch": 11.816949152542373, - "grad_norm": 1.3836950063705444, - "learning_rate": 7.773768844221105e-05, - "loss": 5.3066, - "step": 22659 - }, - { - "epoch": 11.817470664928292, - "grad_norm": 1.4914215803146362, - "learning_rate": 7.773668341708543e-05, - "loss": 5.6476, - "step": 22660 - }, - { - "epoch": 11.817992177314212, - "grad_norm": 1.5420743227005005, - "learning_rate": 7.77356783919598e-05, - "loss": 5.0739, - "step": 22661 - }, - { - "epoch": 11.818513689700131, - "grad_norm": 1.473806381225586, - "learning_rate": 7.773467336683417e-05, - "loss": 5.3445, - "step": 22662 - }, - { - "epoch": 11.819035202086049, - "grad_norm": 1.5632953643798828, - "learning_rate": 7.773366834170854e-05, - "loss": 5.5676, - "step": 22663 - }, - { - "epoch": 11.819556714471968, - "grad_norm": 1.4963737726211548, - "learning_rate": 7.773266331658292e-05, - "loss": 5.3732, - "step": 22664 - }, - { - "epoch": 11.820078226857888, - "grad_norm": 1.5482940673828125, - "learning_rate": 7.773165829145729e-05, - "loss": 4.7295, - "step": 22665 - }, - { - "epoch": 11.820599739243807, - "grad_norm": 1.5018850564956665, - "learning_rate": 7.773065326633167e-05, - "loss": 5.3457, - "step": 22666 - }, - { - "epoch": 11.821121251629727, - "grad_norm": 1.502486228942871, - "learning_rate": 7.772964824120604e-05, - "loss": 5.3797, - "step": 22667 - }, - { - "epoch": 11.821642764015646, - "grad_norm": 1.5150305032730103, - "learning_rate": 7.772864321608041e-05, - "loss": 5.0542, - "step": 22668 - }, - { - "epoch": 11.822164276401564, - "grad_norm": 1.6526433229446411, - "learning_rate": 7.772763819095478e-05, - "loss": 5.0098, - "step": 22669 - }, - { - "epoch": 11.822685788787483, - "grad_norm": 1.467862606048584, - "learning_rate": 7.772663316582916e-05, - "loss": 4.4549, - "step": 22670 - }, - { - "epoch": 11.823207301173403, - "grad_norm": 1.4273595809936523, - "learning_rate": 7.772562814070352e-05, - "loss": 5.5257, - "step": 22671 - }, - { - "epoch": 11.823728813559322, - "grad_norm": 1.436381220817566, - "learning_rate": 7.772462311557788e-05, - "loss": 5.5541, - "step": 22672 - }, - { - "epoch": 11.824250325945242, - "grad_norm": 1.3694087266921997, - "learning_rate": 7.772361809045226e-05, - "loss": 5.8117, - "step": 22673 - }, - { - "epoch": 11.824771838331161, - "grad_norm": 1.4815592765808105, - "learning_rate": 7.772261306532664e-05, - "loss": 5.7105, - "step": 22674 - }, - { - "epoch": 11.825293350717079, - "grad_norm": 1.4112731218338013, - "learning_rate": 7.772160804020102e-05, - "loss": 5.3238, - "step": 22675 - }, - { - "epoch": 11.825814863102998, - "grad_norm": 1.543005108833313, - "learning_rate": 7.772060301507538e-05, - "loss": 5.5464, - "step": 22676 - }, - { - "epoch": 11.826336375488918, - "grad_norm": 1.5518546104431152, - "learning_rate": 7.771959798994976e-05, - "loss": 5.6072, - "step": 22677 - }, - { - "epoch": 11.826857887874837, - "grad_norm": 1.4318475723266602, - "learning_rate": 7.771859296482412e-05, - "loss": 5.3797, - "step": 22678 - }, - { - "epoch": 11.827379400260757, - "grad_norm": 1.4969969987869263, - "learning_rate": 7.77175879396985e-05, - "loss": 5.5642, - "step": 22679 - }, - { - "epoch": 11.827900912646676, - "grad_norm": 1.4877046346664429, - "learning_rate": 7.771658291457287e-05, - "loss": 4.9703, - "step": 22680 - }, - { - "epoch": 11.828422425032594, - "grad_norm": 1.477501630783081, - "learning_rate": 7.771557788944724e-05, - "loss": 5.3417, - "step": 22681 - }, - { - "epoch": 11.828943937418513, - "grad_norm": 1.5503380298614502, - "learning_rate": 7.771457286432161e-05, - "loss": 5.4458, - "step": 22682 - }, - { - "epoch": 11.829465449804433, - "grad_norm": 1.4590044021606445, - "learning_rate": 7.771356783919599e-05, - "loss": 5.4496, - "step": 22683 - }, - { - "epoch": 11.829986962190352, - "grad_norm": 1.5131574869155884, - "learning_rate": 7.771256281407035e-05, - "loss": 5.5568, - "step": 22684 - }, - { - "epoch": 11.830508474576272, - "grad_norm": 1.4273806810379028, - "learning_rate": 7.771155778894473e-05, - "loss": 5.5218, - "step": 22685 - }, - { - "epoch": 11.831029986962191, - "grad_norm": 1.5326489210128784, - "learning_rate": 7.77105527638191e-05, - "loss": 5.5346, - "step": 22686 - }, - { - "epoch": 11.831551499348109, - "grad_norm": 1.5204427242279053, - "learning_rate": 7.770954773869347e-05, - "loss": 5.513, - "step": 22687 - }, - { - "epoch": 11.832073011734028, - "grad_norm": 1.4651696681976318, - "learning_rate": 7.770854271356785e-05, - "loss": 5.5848, - "step": 22688 - }, - { - "epoch": 11.832594524119948, - "grad_norm": 1.5152647495269775, - "learning_rate": 7.770753768844221e-05, - "loss": 5.6562, - "step": 22689 - }, - { - "epoch": 11.833116036505867, - "grad_norm": 1.524372935295105, - "learning_rate": 7.770653266331659e-05, - "loss": 5.3378, - "step": 22690 - }, - { - "epoch": 11.833637548891787, - "grad_norm": 1.3820195198059082, - "learning_rate": 7.770552763819095e-05, - "loss": 5.2402, - "step": 22691 - }, - { - "epoch": 11.834159061277706, - "grad_norm": 1.4915844202041626, - "learning_rate": 7.770452261306533e-05, - "loss": 5.598, - "step": 22692 - }, - { - "epoch": 11.834680573663624, - "grad_norm": 1.3802319765090942, - "learning_rate": 7.77035175879397e-05, - "loss": 5.7639, - "step": 22693 - }, - { - "epoch": 11.835202086049543, - "grad_norm": 1.4876441955566406, - "learning_rate": 7.770251256281407e-05, - "loss": 5.4509, - "step": 22694 - }, - { - "epoch": 11.835723598435463, - "grad_norm": 1.6112655401229858, - "learning_rate": 7.770150753768845e-05, - "loss": 4.8846, - "step": 22695 - }, - { - "epoch": 11.836245110821382, - "grad_norm": 1.4504104852676392, - "learning_rate": 7.770050251256283e-05, - "loss": 5.5573, - "step": 22696 - }, - { - "epoch": 11.836766623207302, - "grad_norm": 1.4873734712600708, - "learning_rate": 7.76994974874372e-05, - "loss": 5.3434, - "step": 22697 - }, - { - "epoch": 11.837288135593221, - "grad_norm": 1.5122020244598389, - "learning_rate": 7.769849246231156e-05, - "loss": 5.0015, - "step": 22698 - }, - { - "epoch": 11.837809647979139, - "grad_norm": 1.3753334283828735, - "learning_rate": 7.769748743718594e-05, - "loss": 5.6805, - "step": 22699 - }, - { - "epoch": 11.838331160365058, - "grad_norm": 1.3953170776367188, - "learning_rate": 7.76964824120603e-05, - "loss": 5.5356, - "step": 22700 - }, - { - "epoch": 11.838852672750978, - "grad_norm": 1.3814905881881714, - "learning_rate": 7.769547738693468e-05, - "loss": 5.7792, - "step": 22701 - }, - { - "epoch": 11.839374185136897, - "grad_norm": 1.3932191133499146, - "learning_rate": 7.769447236180904e-05, - "loss": 5.6008, - "step": 22702 - }, - { - "epoch": 11.839895697522817, - "grad_norm": 1.4615752696990967, - "learning_rate": 7.769346733668342e-05, - "loss": 5.0847, - "step": 22703 - }, - { - "epoch": 11.840417209908736, - "grad_norm": 1.4024382829666138, - "learning_rate": 7.769246231155778e-05, - "loss": 5.7398, - "step": 22704 - }, - { - "epoch": 11.840938722294654, - "grad_norm": 1.3653922080993652, - "learning_rate": 7.769145728643216e-05, - "loss": 5.4427, - "step": 22705 - }, - { - "epoch": 11.841460234680573, - "grad_norm": 1.3664058446884155, - "learning_rate": 7.769045226130654e-05, - "loss": 5.7548, - "step": 22706 - }, - { - "epoch": 11.841981747066493, - "grad_norm": 1.3710386753082275, - "learning_rate": 7.768944723618092e-05, - "loss": 5.2954, - "step": 22707 - }, - { - "epoch": 11.842503259452412, - "grad_norm": 1.6766345500946045, - "learning_rate": 7.768844221105528e-05, - "loss": 4.7559, - "step": 22708 - }, - { - "epoch": 11.843024771838332, - "grad_norm": 1.4093843698501587, - "learning_rate": 7.768743718592966e-05, - "loss": 5.4424, - "step": 22709 - }, - { - "epoch": 11.843546284224251, - "grad_norm": 1.2592823505401611, - "learning_rate": 7.768643216080402e-05, - "loss": 5.7294, - "step": 22710 - }, - { - "epoch": 11.844067796610169, - "grad_norm": 1.4486922025680542, - "learning_rate": 7.768542713567839e-05, - "loss": 5.7768, - "step": 22711 - }, - { - "epoch": 11.844589308996088, - "grad_norm": 1.4419364929199219, - "learning_rate": 7.768442211055277e-05, - "loss": 5.1853, - "step": 22712 - }, - { - "epoch": 11.845110821382008, - "grad_norm": 1.4274532794952393, - "learning_rate": 7.768341708542713e-05, - "loss": 5.6509, - "step": 22713 - }, - { - "epoch": 11.845632333767927, - "grad_norm": 1.49880850315094, - "learning_rate": 7.768241206030151e-05, - "loss": 5.2686, - "step": 22714 - }, - { - "epoch": 11.846153846153847, - "grad_norm": 1.4882546663284302, - "learning_rate": 7.768140703517587e-05, - "loss": 5.242, - "step": 22715 - }, - { - "epoch": 11.846675358539766, - "grad_norm": 1.4689980745315552, - "learning_rate": 7.768040201005025e-05, - "loss": 5.5217, - "step": 22716 - }, - { - "epoch": 11.847196870925684, - "grad_norm": 1.4190237522125244, - "learning_rate": 7.767939698492463e-05, - "loss": 5.5521, - "step": 22717 - }, - { - "epoch": 11.847718383311603, - "grad_norm": 1.3695807456970215, - "learning_rate": 7.7678391959799e-05, - "loss": 5.6601, - "step": 22718 - }, - { - "epoch": 11.848239895697523, - "grad_norm": 1.4892425537109375, - "learning_rate": 7.767738693467337e-05, - "loss": 5.3858, - "step": 22719 - }, - { - "epoch": 11.848761408083442, - "grad_norm": 1.4445255994796753, - "learning_rate": 7.767638190954775e-05, - "loss": 5.8437, - "step": 22720 - }, - { - "epoch": 11.849282920469362, - "grad_norm": 1.4237394332885742, - "learning_rate": 7.767537688442211e-05, - "loss": 5.1048, - "step": 22721 - }, - { - "epoch": 11.84980443285528, - "grad_norm": 1.4066697359085083, - "learning_rate": 7.767437185929649e-05, - "loss": 5.3903, - "step": 22722 - }, - { - "epoch": 11.850325945241199, - "grad_norm": 1.3540749549865723, - "learning_rate": 7.767336683417085e-05, - "loss": 5.7411, - "step": 22723 - }, - { - "epoch": 11.850847457627118, - "grad_norm": 1.441124439239502, - "learning_rate": 7.767236180904522e-05, - "loss": 5.7049, - "step": 22724 - }, - { - "epoch": 11.851368970013038, - "grad_norm": 1.5376390218734741, - "learning_rate": 7.76713567839196e-05, - "loss": 5.6738, - "step": 22725 - }, - { - "epoch": 11.851890482398957, - "grad_norm": 1.5435471534729004, - "learning_rate": 7.767035175879397e-05, - "loss": 5.0871, - "step": 22726 - }, - { - "epoch": 11.852411994784877, - "grad_norm": 1.3801062107086182, - "learning_rate": 7.766934673366835e-05, - "loss": 5.6365, - "step": 22727 - }, - { - "epoch": 11.852933507170796, - "grad_norm": 1.4566866159439087, - "learning_rate": 7.766834170854272e-05, - "loss": 5.398, - "step": 22728 - }, - { - "epoch": 11.853455019556714, - "grad_norm": 1.389764428138733, - "learning_rate": 7.76673366834171e-05, - "loss": 5.53, - "step": 22729 - }, - { - "epoch": 11.853976531942633, - "grad_norm": 1.3744151592254639, - "learning_rate": 7.766633165829146e-05, - "loss": 5.8851, - "step": 22730 - }, - { - "epoch": 11.854498044328553, - "grad_norm": 1.4052224159240723, - "learning_rate": 7.766532663316584e-05, - "loss": 5.7469, - "step": 22731 - }, - { - "epoch": 11.855019556714472, - "grad_norm": 1.7201809883117676, - "learning_rate": 7.76643216080402e-05, - "loss": 5.3621, - "step": 22732 - }, - { - "epoch": 11.855541069100392, - "grad_norm": 1.5014305114746094, - "learning_rate": 7.766331658291458e-05, - "loss": 5.5024, - "step": 22733 - }, - { - "epoch": 11.85606258148631, - "grad_norm": 1.3792978525161743, - "learning_rate": 7.766231155778894e-05, - "loss": 5.1503, - "step": 22734 - }, - { - "epoch": 11.856584093872229, - "grad_norm": 1.5915908813476562, - "learning_rate": 7.766130653266332e-05, - "loss": 5.5429, - "step": 22735 - }, - { - "epoch": 11.857105606258148, - "grad_norm": 1.421342134475708, - "learning_rate": 7.766030150753769e-05, - "loss": 5.5586, - "step": 22736 - }, - { - "epoch": 11.857627118644068, - "grad_norm": 1.5954725742340088, - "learning_rate": 7.765929648241206e-05, - "loss": 5.6307, - "step": 22737 - }, - { - "epoch": 11.858148631029987, - "grad_norm": 1.4558377265930176, - "learning_rate": 7.765829145728644e-05, - "loss": 5.4019, - "step": 22738 - }, - { - "epoch": 11.858670143415907, - "grad_norm": 1.4969691038131714, - "learning_rate": 7.76572864321608e-05, - "loss": 5.6081, - "step": 22739 - }, - { - "epoch": 11.859191655801826, - "grad_norm": 1.45846688747406, - "learning_rate": 7.765628140703518e-05, - "loss": 4.9655, - "step": 22740 - }, - { - "epoch": 11.859713168187744, - "grad_norm": 1.4759550094604492, - "learning_rate": 7.765527638190955e-05, - "loss": 5.7098, - "step": 22741 - }, - { - "epoch": 11.860234680573663, - "grad_norm": 1.3591625690460205, - "learning_rate": 7.765427135678392e-05, - "loss": 5.7386, - "step": 22742 - }, - { - "epoch": 11.860756192959583, - "grad_norm": 1.5931816101074219, - "learning_rate": 7.765326633165829e-05, - "loss": 5.0178, - "step": 22743 - }, - { - "epoch": 11.861277705345502, - "grad_norm": 1.4250962734222412, - "learning_rate": 7.765226130653267e-05, - "loss": 5.5771, - "step": 22744 - }, - { - "epoch": 11.861799217731422, - "grad_norm": 1.2878308296203613, - "learning_rate": 7.765125628140703e-05, - "loss": 5.3277, - "step": 22745 - }, - { - "epoch": 11.86232073011734, - "grad_norm": 1.4066941738128662, - "learning_rate": 7.765025125628141e-05, - "loss": 4.9266, - "step": 22746 - }, - { - "epoch": 11.862842242503259, - "grad_norm": 1.3655264377593994, - "learning_rate": 7.764924623115579e-05, - "loss": 5.4413, - "step": 22747 - }, - { - "epoch": 11.863363754889178, - "grad_norm": 1.3785651922225952, - "learning_rate": 7.764824120603016e-05, - "loss": 5.6103, - "step": 22748 - }, - { - "epoch": 11.863885267275098, - "grad_norm": 1.544202208518982, - "learning_rate": 7.764723618090453e-05, - "loss": 5.7629, - "step": 22749 - }, - { - "epoch": 11.864406779661017, - "grad_norm": 1.5446349382400513, - "learning_rate": 7.764623115577891e-05, - "loss": 4.9071, - "step": 22750 - }, - { - "epoch": 11.864928292046937, - "grad_norm": 1.3299455642700195, - "learning_rate": 7.764522613065327e-05, - "loss": 5.6792, - "step": 22751 - }, - { - "epoch": 11.865449804432854, - "grad_norm": 1.566326379776001, - "learning_rate": 7.764422110552764e-05, - "loss": 5.6187, - "step": 22752 - }, - { - "epoch": 11.865971316818774, - "grad_norm": 1.4433218240737915, - "learning_rate": 7.764321608040201e-05, - "loss": 5.1613, - "step": 22753 - }, - { - "epoch": 11.866492829204693, - "grad_norm": 1.488104224205017, - "learning_rate": 7.764221105527638e-05, - "loss": 5.1093, - "step": 22754 - }, - { - "epoch": 11.867014341590613, - "grad_norm": 1.5377894639968872, - "learning_rate": 7.764120603015076e-05, - "loss": 5.7258, - "step": 22755 - }, - { - "epoch": 11.867535853976532, - "grad_norm": 1.470167875289917, - "learning_rate": 7.764020100502512e-05, - "loss": 5.5673, - "step": 22756 - }, - { - "epoch": 11.868057366362452, - "grad_norm": 1.4235527515411377, - "learning_rate": 7.76391959798995e-05, - "loss": 5.3679, - "step": 22757 - }, - { - "epoch": 11.86857887874837, - "grad_norm": 1.5554255247116089, - "learning_rate": 7.763819095477388e-05, - "loss": 4.9945, - "step": 22758 - }, - { - "epoch": 11.869100391134289, - "grad_norm": 1.3428468704223633, - "learning_rate": 7.763718592964825e-05, - "loss": 5.6422, - "step": 22759 - }, - { - "epoch": 11.869621903520208, - "grad_norm": 1.5373897552490234, - "learning_rate": 7.763618090452262e-05, - "loss": 4.9548, - "step": 22760 - }, - { - "epoch": 11.870143415906128, - "grad_norm": 1.4787404537200928, - "learning_rate": 7.7635175879397e-05, - "loss": 5.4868, - "step": 22761 - }, - { - "epoch": 11.870664928292047, - "grad_norm": 1.615301489830017, - "learning_rate": 7.763417085427136e-05, - "loss": 5.359, - "step": 22762 - }, - { - "epoch": 11.871186440677967, - "grad_norm": 1.3885455131530762, - "learning_rate": 7.763316582914574e-05, - "loss": 5.6623, - "step": 22763 - }, - { - "epoch": 11.871707953063884, - "grad_norm": 1.3734537363052368, - "learning_rate": 7.76321608040201e-05, - "loss": 5.7026, - "step": 22764 - }, - { - "epoch": 11.872229465449804, - "grad_norm": 1.4371575117111206, - "learning_rate": 7.763115577889447e-05, - "loss": 4.9216, - "step": 22765 - }, - { - "epoch": 11.872750977835723, - "grad_norm": 1.3584342002868652, - "learning_rate": 7.763015075376884e-05, - "loss": 5.4639, - "step": 22766 - }, - { - "epoch": 11.873272490221643, - "grad_norm": 1.5779708623886108, - "learning_rate": 7.762914572864322e-05, - "loss": 5.1834, - "step": 22767 - }, - { - "epoch": 11.873794002607562, - "grad_norm": 1.4435522556304932, - "learning_rate": 7.76281407035176e-05, - "loss": 5.7174, - "step": 22768 - }, - { - "epoch": 11.874315514993482, - "grad_norm": 1.399422526359558, - "learning_rate": 7.762713567839196e-05, - "loss": 5.5019, - "step": 22769 - }, - { - "epoch": 11.8748370273794, - "grad_norm": 1.4205807447433472, - "learning_rate": 7.762613065326634e-05, - "loss": 5.5402, - "step": 22770 - }, - { - "epoch": 11.875358539765319, - "grad_norm": 1.4055366516113281, - "learning_rate": 7.76251256281407e-05, - "loss": 5.5026, - "step": 22771 - }, - { - "epoch": 11.875880052151238, - "grad_norm": 1.5622785091400146, - "learning_rate": 7.762412060301508e-05, - "loss": 5.3571, - "step": 22772 - }, - { - "epoch": 11.876401564537158, - "grad_norm": 1.4336961507797241, - "learning_rate": 7.762311557788945e-05, - "loss": 5.6756, - "step": 22773 - }, - { - "epoch": 11.876923076923077, - "grad_norm": 1.5095692873001099, - "learning_rate": 7.762211055276383e-05, - "loss": 5.5744, - "step": 22774 - }, - { - "epoch": 11.877444589308997, - "grad_norm": 1.450853943824768, - "learning_rate": 7.762110552763819e-05, - "loss": 5.4134, - "step": 22775 - }, - { - "epoch": 11.877966101694915, - "grad_norm": 1.4331743717193604, - "learning_rate": 7.762010050251257e-05, - "loss": 5.6386, - "step": 22776 - }, - { - "epoch": 11.878487614080834, - "grad_norm": 1.4617465734481812, - "learning_rate": 7.761909547738693e-05, - "loss": 5.5952, - "step": 22777 - }, - { - "epoch": 11.879009126466753, - "grad_norm": 1.61185622215271, - "learning_rate": 7.761809045226131e-05, - "loss": 4.8385, - "step": 22778 - }, - { - "epoch": 11.879530638852673, - "grad_norm": 1.49136221408844, - "learning_rate": 7.761708542713569e-05, - "loss": 5.7215, - "step": 22779 - }, - { - "epoch": 11.880052151238592, - "grad_norm": 1.435266375541687, - "learning_rate": 7.761608040201005e-05, - "loss": 5.469, - "step": 22780 - }, - { - "epoch": 11.880573663624512, - "grad_norm": 1.4352315664291382, - "learning_rate": 7.761507537688443e-05, - "loss": 5.2443, - "step": 22781 - }, - { - "epoch": 11.88109517601043, - "grad_norm": 1.4091715812683105, - "learning_rate": 7.76140703517588e-05, - "loss": 5.4824, - "step": 22782 - }, - { - "epoch": 11.881616688396349, - "grad_norm": 1.4578218460083008, - "learning_rate": 7.761306532663317e-05, - "loss": 5.2459, - "step": 22783 - }, - { - "epoch": 11.882138200782268, - "grad_norm": 1.4634780883789062, - "learning_rate": 7.761206030150754e-05, - "loss": 5.3587, - "step": 22784 - }, - { - "epoch": 11.882659713168188, - "grad_norm": 1.447589635848999, - "learning_rate": 7.761105527638191e-05, - "loss": 5.4676, - "step": 22785 - }, - { - "epoch": 11.883181225554107, - "grad_norm": 1.4280949831008911, - "learning_rate": 7.761005025125628e-05, - "loss": 5.1423, - "step": 22786 - }, - { - "epoch": 11.883702737940027, - "grad_norm": 1.5410606861114502, - "learning_rate": 7.760904522613066e-05, - "loss": 4.5789, - "step": 22787 - }, - { - "epoch": 11.884224250325945, - "grad_norm": 1.4003599882125854, - "learning_rate": 7.760804020100503e-05, - "loss": 5.6668, - "step": 22788 - }, - { - "epoch": 11.884745762711864, - "grad_norm": 1.3400455713272095, - "learning_rate": 7.760703517587941e-05, - "loss": 5.3984, - "step": 22789 - }, - { - "epoch": 11.885267275097783, - "grad_norm": 1.4327237606048584, - "learning_rate": 7.760603015075378e-05, - "loss": 5.4558, - "step": 22790 - }, - { - "epoch": 11.885788787483703, - "grad_norm": 1.387759804725647, - "learning_rate": 7.760502512562814e-05, - "loss": 5.7487, - "step": 22791 - }, - { - "epoch": 11.886310299869622, - "grad_norm": 1.3164583444595337, - "learning_rate": 7.760402010050252e-05, - "loss": 5.7215, - "step": 22792 - }, - { - "epoch": 11.886831812255542, - "grad_norm": 1.4991700649261475, - "learning_rate": 7.760301507537688e-05, - "loss": 5.3973, - "step": 22793 - }, - { - "epoch": 11.88735332464146, - "grad_norm": 1.348984956741333, - "learning_rate": 7.760201005025126e-05, - "loss": 5.3126, - "step": 22794 - }, - { - "epoch": 11.887874837027379, - "grad_norm": 1.466622233390808, - "learning_rate": 7.760100502512562e-05, - "loss": 5.583, - "step": 22795 - }, - { - "epoch": 11.888396349413298, - "grad_norm": 1.6002801656723022, - "learning_rate": 7.76e-05, - "loss": 4.7766, - "step": 22796 - }, - { - "epoch": 11.888917861799218, - "grad_norm": 1.462462067604065, - "learning_rate": 7.759899497487437e-05, - "loss": 5.5335, - "step": 22797 - }, - { - "epoch": 11.889439374185137, - "grad_norm": 1.622644305229187, - "learning_rate": 7.759798994974874e-05, - "loss": 5.4052, - "step": 22798 - }, - { - "epoch": 11.889960886571057, - "grad_norm": 1.441701889038086, - "learning_rate": 7.759698492462312e-05, - "loss": 5.7722, - "step": 22799 - }, - { - "epoch": 11.890482398956975, - "grad_norm": 1.5509207248687744, - "learning_rate": 7.75959798994975e-05, - "loss": 5.2706, - "step": 22800 - }, - { - "epoch": 11.891003911342894, - "grad_norm": 1.5040091276168823, - "learning_rate": 7.759497487437186e-05, - "loss": 5.4223, - "step": 22801 - }, - { - "epoch": 11.891525423728813, - "grad_norm": 1.3873541355133057, - "learning_rate": 7.759396984924624e-05, - "loss": 5.6891, - "step": 22802 - }, - { - "epoch": 11.892046936114733, - "grad_norm": 1.436231255531311, - "learning_rate": 7.75929648241206e-05, - "loss": 5.2115, - "step": 22803 - }, - { - "epoch": 11.892568448500652, - "grad_norm": 1.3234046697616577, - "learning_rate": 7.759195979899497e-05, - "loss": 5.5025, - "step": 22804 - }, - { - "epoch": 11.893089960886572, - "grad_norm": 1.5106326341629028, - "learning_rate": 7.759095477386935e-05, - "loss": 5.2402, - "step": 22805 - }, - { - "epoch": 11.89361147327249, - "grad_norm": 1.4511653184890747, - "learning_rate": 7.758994974874371e-05, - "loss": 5.7186, - "step": 22806 - }, - { - "epoch": 11.894132985658409, - "grad_norm": 1.402060866355896, - "learning_rate": 7.758894472361809e-05, - "loss": 5.6111, - "step": 22807 - }, - { - "epoch": 11.894654498044329, - "grad_norm": 1.4000260829925537, - "learning_rate": 7.758793969849247e-05, - "loss": 5.7007, - "step": 22808 - }, - { - "epoch": 11.895176010430248, - "grad_norm": 1.5257493257522583, - "learning_rate": 7.758693467336685e-05, - "loss": 5.0421, - "step": 22809 - }, - { - "epoch": 11.895697522816167, - "grad_norm": 1.494583010673523, - "learning_rate": 7.758592964824121e-05, - "loss": 5.6571, - "step": 22810 - }, - { - "epoch": 11.896219035202087, - "grad_norm": 1.4354771375656128, - "learning_rate": 7.758492462311559e-05, - "loss": 5.6048, - "step": 22811 - }, - { - "epoch": 11.896740547588005, - "grad_norm": 1.5401678085327148, - "learning_rate": 7.758391959798995e-05, - "loss": 5.4956, - "step": 22812 - }, - { - "epoch": 11.897262059973924, - "grad_norm": 1.3480920791625977, - "learning_rate": 7.758291457286433e-05, - "loss": 5.6419, - "step": 22813 - }, - { - "epoch": 11.897783572359844, - "grad_norm": 1.4230210781097412, - "learning_rate": 7.75819095477387e-05, - "loss": 5.3635, - "step": 22814 - }, - { - "epoch": 11.898305084745763, - "grad_norm": 1.4187970161437988, - "learning_rate": 7.758090452261307e-05, - "loss": 5.2659, - "step": 22815 - }, - { - "epoch": 11.898826597131682, - "grad_norm": 1.463417887687683, - "learning_rate": 7.757989949748744e-05, - "loss": 5.5211, - "step": 22816 - }, - { - "epoch": 11.8993481095176, - "grad_norm": 1.5144002437591553, - "learning_rate": 7.757889447236181e-05, - "loss": 4.9418, - "step": 22817 - }, - { - "epoch": 11.89986962190352, - "grad_norm": 1.4543139934539795, - "learning_rate": 7.757788944723618e-05, - "loss": 4.9694, - "step": 22818 - }, - { - "epoch": 11.900391134289439, - "grad_norm": 1.514482855796814, - "learning_rate": 7.757688442211056e-05, - "loss": 5.1917, - "step": 22819 - }, - { - "epoch": 11.900912646675359, - "grad_norm": 1.4549249410629272, - "learning_rate": 7.757587939698493e-05, - "loss": 5.747, - "step": 22820 - }, - { - "epoch": 11.901434159061278, - "grad_norm": 1.5126415491104126, - "learning_rate": 7.75748743718593e-05, - "loss": 5.079, - "step": 22821 - }, - { - "epoch": 11.901955671447197, - "grad_norm": 1.6441028118133545, - "learning_rate": 7.757386934673368e-05, - "loss": 5.105, - "step": 22822 - }, - { - "epoch": 11.902477183833117, - "grad_norm": 1.4159923791885376, - "learning_rate": 7.757286432160804e-05, - "loss": 5.5145, - "step": 22823 - }, - { - "epoch": 11.902998696219035, - "grad_norm": 1.548539400100708, - "learning_rate": 7.757185929648242e-05, - "loss": 5.5158, - "step": 22824 - }, - { - "epoch": 11.903520208604954, - "grad_norm": 1.4797399044036865, - "learning_rate": 7.757085427135678e-05, - "loss": 5.4004, - "step": 22825 - }, - { - "epoch": 11.904041720990874, - "grad_norm": 1.321890950202942, - "learning_rate": 7.756984924623116e-05, - "loss": 5.7552, - "step": 22826 - }, - { - "epoch": 11.904563233376793, - "grad_norm": 1.5082499980926514, - "learning_rate": 7.756884422110553e-05, - "loss": 5.5115, - "step": 22827 - }, - { - "epoch": 11.905084745762712, - "grad_norm": 1.4244741201400757, - "learning_rate": 7.75678391959799e-05, - "loss": 5.8067, - "step": 22828 - }, - { - "epoch": 11.90560625814863, - "grad_norm": 1.4083441495895386, - "learning_rate": 7.756683417085428e-05, - "loss": 5.0828, - "step": 22829 - }, - { - "epoch": 11.90612777053455, - "grad_norm": 1.3979841470718384, - "learning_rate": 7.756582914572866e-05, - "loss": 5.4976, - "step": 22830 - }, - { - "epoch": 11.906649282920469, - "grad_norm": 1.4337127208709717, - "learning_rate": 7.756482412060302e-05, - "loss": 5.6878, - "step": 22831 - }, - { - "epoch": 11.907170795306389, - "grad_norm": 1.4998655319213867, - "learning_rate": 7.756381909547739e-05, - "loss": 5.2629, - "step": 22832 - }, - { - "epoch": 11.907692307692308, - "grad_norm": 1.5177509784698486, - "learning_rate": 7.756281407035177e-05, - "loss": 5.2169, - "step": 22833 - }, - { - "epoch": 11.908213820078227, - "grad_norm": 1.465701699256897, - "learning_rate": 7.756180904522613e-05, - "loss": 5.1916, - "step": 22834 - }, - { - "epoch": 11.908735332464147, - "grad_norm": 1.3066152334213257, - "learning_rate": 7.756080402010051e-05, - "loss": 5.5898, - "step": 22835 - }, - { - "epoch": 11.909256844850065, - "grad_norm": 1.512636423110962, - "learning_rate": 7.755979899497487e-05, - "loss": 5.3812, - "step": 22836 - }, - { - "epoch": 11.909778357235984, - "grad_norm": 1.4630831480026245, - "learning_rate": 7.755879396984925e-05, - "loss": 5.0614, - "step": 22837 - }, - { - "epoch": 11.910299869621904, - "grad_norm": 1.4972333908081055, - "learning_rate": 7.755778894472361e-05, - "loss": 5.4945, - "step": 22838 - }, - { - "epoch": 11.910821382007823, - "grad_norm": 1.4103953838348389, - "learning_rate": 7.755678391959799e-05, - "loss": 5.6814, - "step": 22839 - }, - { - "epoch": 11.911342894393742, - "grad_norm": 1.4429258108139038, - "learning_rate": 7.755577889447237e-05, - "loss": 5.4453, - "step": 22840 - }, - { - "epoch": 11.91186440677966, - "grad_norm": 1.3515636920928955, - "learning_rate": 7.755477386934675e-05, - "loss": 5.4124, - "step": 22841 - }, - { - "epoch": 11.91238591916558, - "grad_norm": 1.4410792589187622, - "learning_rate": 7.755376884422111e-05, - "loss": 5.3818, - "step": 22842 - }, - { - "epoch": 11.9129074315515, - "grad_norm": 1.4222732782363892, - "learning_rate": 7.755276381909549e-05, - "loss": 5.5009, - "step": 22843 - }, - { - "epoch": 11.913428943937419, - "grad_norm": 1.4764423370361328, - "learning_rate": 7.755175879396985e-05, - "loss": 5.3812, - "step": 22844 - }, - { - "epoch": 11.913950456323338, - "grad_norm": 1.5048819780349731, - "learning_rate": 7.755075376884422e-05, - "loss": 5.2435, - "step": 22845 - }, - { - "epoch": 11.914471968709258, - "grad_norm": 1.5366404056549072, - "learning_rate": 7.75497487437186e-05, - "loss": 4.9811, - "step": 22846 - }, - { - "epoch": 11.914993481095175, - "grad_norm": 1.4410068988800049, - "learning_rate": 7.754874371859296e-05, - "loss": 5.2261, - "step": 22847 - }, - { - "epoch": 11.915514993481095, - "grad_norm": 1.3997759819030762, - "learning_rate": 7.754773869346734e-05, - "loss": 5.1885, - "step": 22848 - }, - { - "epoch": 11.916036505867014, - "grad_norm": 1.405009150505066, - "learning_rate": 7.754673366834172e-05, - "loss": 5.4254, - "step": 22849 - }, - { - "epoch": 11.916558018252934, - "grad_norm": 1.5465158224105835, - "learning_rate": 7.75457286432161e-05, - "loss": 5.4577, - "step": 22850 - }, - { - "epoch": 11.917079530638853, - "grad_norm": 1.5516701936721802, - "learning_rate": 7.754472361809046e-05, - "loss": 4.9255, - "step": 22851 - }, - { - "epoch": 11.917601043024773, - "grad_norm": 1.398799180984497, - "learning_rate": 7.754371859296484e-05, - "loss": 5.1286, - "step": 22852 - }, - { - "epoch": 11.91812255541069, - "grad_norm": 1.4291921854019165, - "learning_rate": 7.75427135678392e-05, - "loss": 5.2491, - "step": 22853 - }, - { - "epoch": 11.91864406779661, - "grad_norm": 1.3680471181869507, - "learning_rate": 7.754170854271358e-05, - "loss": 5.651, - "step": 22854 - }, - { - "epoch": 11.91916558018253, - "grad_norm": 1.4037113189697266, - "learning_rate": 7.754070351758794e-05, - "loss": 5.73, - "step": 22855 - }, - { - "epoch": 11.919687092568449, - "grad_norm": 1.36825430393219, - "learning_rate": 7.753969849246232e-05, - "loss": 5.5351, - "step": 22856 - }, - { - "epoch": 11.920208604954368, - "grad_norm": 1.4208530187606812, - "learning_rate": 7.753869346733668e-05, - "loss": 5.6324, - "step": 22857 - }, - { - "epoch": 11.920730117340288, - "grad_norm": 1.375556468963623, - "learning_rate": 7.753768844221105e-05, - "loss": 4.9496, - "step": 22858 - }, - { - "epoch": 11.921251629726205, - "grad_norm": 1.5284929275512695, - "learning_rate": 7.753668341708543e-05, - "loss": 5.7215, - "step": 22859 - }, - { - "epoch": 11.921773142112125, - "grad_norm": 1.4598140716552734, - "learning_rate": 7.75356783919598e-05, - "loss": 5.805, - "step": 22860 - }, - { - "epoch": 11.922294654498044, - "grad_norm": 1.6093276739120483, - "learning_rate": 7.753467336683418e-05, - "loss": 4.8365, - "step": 22861 - }, - { - "epoch": 11.922816166883964, - "grad_norm": 1.5204020738601685, - "learning_rate": 7.753366834170855e-05, - "loss": 5.2214, - "step": 22862 - }, - { - "epoch": 11.923337679269883, - "grad_norm": 1.3323092460632324, - "learning_rate": 7.753266331658292e-05, - "loss": 5.5469, - "step": 22863 - }, - { - "epoch": 11.923859191655803, - "grad_norm": 1.4833664894104004, - "learning_rate": 7.753165829145729e-05, - "loss": 5.2296, - "step": 22864 - }, - { - "epoch": 11.92438070404172, - "grad_norm": 1.4347758293151855, - "learning_rate": 7.753065326633167e-05, - "loss": 5.5105, - "step": 22865 - }, - { - "epoch": 11.92490221642764, - "grad_norm": 1.6885360479354858, - "learning_rate": 7.752964824120603e-05, - "loss": 5.1407, - "step": 22866 - }, - { - "epoch": 11.92542372881356, - "grad_norm": 1.5492643117904663, - "learning_rate": 7.752864321608041e-05, - "loss": 4.9557, - "step": 22867 - }, - { - "epoch": 11.925945241199479, - "grad_norm": 1.3427265882492065, - "learning_rate": 7.752763819095477e-05, - "loss": 5.7423, - "step": 22868 - }, - { - "epoch": 11.926466753585398, - "grad_norm": 1.4374852180480957, - "learning_rate": 7.752663316582915e-05, - "loss": 5.4093, - "step": 22869 - }, - { - "epoch": 11.926988265971318, - "grad_norm": 1.4844778776168823, - "learning_rate": 7.752562814070351e-05, - "loss": 5.2429, - "step": 22870 - }, - { - "epoch": 11.927509778357235, - "grad_norm": 1.4306800365447998, - "learning_rate": 7.752462311557789e-05, - "loss": 5.6985, - "step": 22871 - }, - { - "epoch": 11.928031290743155, - "grad_norm": 1.3779678344726562, - "learning_rate": 7.752361809045227e-05, - "loss": 5.278, - "step": 22872 - }, - { - "epoch": 11.928552803129074, - "grad_norm": 1.4241935014724731, - "learning_rate": 7.752261306532663e-05, - "loss": 4.9573, - "step": 22873 - }, - { - "epoch": 11.929074315514994, - "grad_norm": 1.4509252309799194, - "learning_rate": 7.752160804020101e-05, - "loss": 5.3836, - "step": 22874 - }, - { - "epoch": 11.929595827900913, - "grad_norm": 1.45831298828125, - "learning_rate": 7.752060301507538e-05, - "loss": 5.1369, - "step": 22875 - }, - { - "epoch": 11.930117340286833, - "grad_norm": 1.336169719696045, - "learning_rate": 7.751959798994975e-05, - "loss": 5.7205, - "step": 22876 - }, - { - "epoch": 11.93063885267275, - "grad_norm": 1.4821585416793823, - "learning_rate": 7.751859296482412e-05, - "loss": 5.5223, - "step": 22877 - }, - { - "epoch": 11.93116036505867, - "grad_norm": 1.3948441743850708, - "learning_rate": 7.75175879396985e-05, - "loss": 5.7977, - "step": 22878 - }, - { - "epoch": 11.93168187744459, - "grad_norm": 1.427140235900879, - "learning_rate": 7.751658291457286e-05, - "loss": 5.2725, - "step": 22879 - }, - { - "epoch": 11.932203389830509, - "grad_norm": 1.4401754140853882, - "learning_rate": 7.751557788944724e-05, - "loss": 5.047, - "step": 22880 - }, - { - "epoch": 11.932724902216428, - "grad_norm": 1.4448680877685547, - "learning_rate": 7.751457286432162e-05, - "loss": 5.0463, - "step": 22881 - }, - { - "epoch": 11.933246414602348, - "grad_norm": 1.4015953540802002, - "learning_rate": 7.7513567839196e-05, - "loss": 5.8469, - "step": 22882 - }, - { - "epoch": 11.933767926988265, - "grad_norm": 1.4136987924575806, - "learning_rate": 7.751256281407036e-05, - "loss": 5.5123, - "step": 22883 - }, - { - "epoch": 11.934289439374185, - "grad_norm": 1.3676910400390625, - "learning_rate": 7.751155778894472e-05, - "loss": 5.568, - "step": 22884 - }, - { - "epoch": 11.934810951760104, - "grad_norm": 1.512812852859497, - "learning_rate": 7.75105527638191e-05, - "loss": 4.5273, - "step": 22885 - }, - { - "epoch": 11.935332464146024, - "grad_norm": 1.3816696405410767, - "learning_rate": 7.750954773869346e-05, - "loss": 5.6683, - "step": 22886 - }, - { - "epoch": 11.935853976531943, - "grad_norm": 1.6008821725845337, - "learning_rate": 7.750854271356784e-05, - "loss": 5.052, - "step": 22887 - }, - { - "epoch": 11.936375488917863, - "grad_norm": 1.3696075677871704, - "learning_rate": 7.750753768844221e-05, - "loss": 5.8726, - "step": 22888 - }, - { - "epoch": 11.93689700130378, - "grad_norm": 1.3850027322769165, - "learning_rate": 7.750653266331658e-05, - "loss": 5.5269, - "step": 22889 - }, - { - "epoch": 11.9374185136897, - "grad_norm": 1.3178136348724365, - "learning_rate": 7.750552763819095e-05, - "loss": 5.4601, - "step": 22890 - }, - { - "epoch": 11.93794002607562, - "grad_norm": 1.3278322219848633, - "learning_rate": 7.750452261306533e-05, - "loss": 5.6932, - "step": 22891 - }, - { - "epoch": 11.938461538461539, - "grad_norm": 1.6154142618179321, - "learning_rate": 7.75035175879397e-05, - "loss": 5.0555, - "step": 22892 - }, - { - "epoch": 11.938983050847458, - "grad_norm": 1.6157270669937134, - "learning_rate": 7.750251256281408e-05, - "loss": 5.0396, - "step": 22893 - }, - { - "epoch": 11.939504563233378, - "grad_norm": 1.4964320659637451, - "learning_rate": 7.750150753768845e-05, - "loss": 5.38, - "step": 22894 - }, - { - "epoch": 11.940026075619295, - "grad_norm": 1.5105252265930176, - "learning_rate": 7.750050251256282e-05, - "loss": 5.2109, - "step": 22895 - }, - { - "epoch": 11.940547588005215, - "grad_norm": 1.4408823251724243, - "learning_rate": 7.749949748743719e-05, - "loss": 5.5066, - "step": 22896 - }, - { - "epoch": 11.941069100391134, - "grad_norm": 1.358117938041687, - "learning_rate": 7.749849246231155e-05, - "loss": 5.4111, - "step": 22897 - }, - { - "epoch": 11.941590612777054, - "grad_norm": 1.4580084085464478, - "learning_rate": 7.749748743718593e-05, - "loss": 4.7753, - "step": 22898 - }, - { - "epoch": 11.942112125162973, - "grad_norm": 1.4823791980743408, - "learning_rate": 7.74964824120603e-05, - "loss": 5.5798, - "step": 22899 - }, - { - "epoch": 11.94263363754889, - "grad_norm": 1.318467378616333, - "learning_rate": 7.749547738693467e-05, - "loss": 5.7957, - "step": 22900 - }, - { - "epoch": 11.94315514993481, - "grad_norm": 1.4421759843826294, - "learning_rate": 7.749447236180905e-05, - "loss": 5.3881, - "step": 22901 - }, - { - "epoch": 11.94367666232073, - "grad_norm": 1.5247275829315186, - "learning_rate": 7.749346733668343e-05, - "loss": 4.5736, - "step": 22902 - }, - { - "epoch": 11.94419817470665, - "grad_norm": 1.3596153259277344, - "learning_rate": 7.749246231155779e-05, - "loss": 5.6093, - "step": 22903 - }, - { - "epoch": 11.944719687092569, - "grad_norm": 1.572378158569336, - "learning_rate": 7.749145728643217e-05, - "loss": 5.5783, - "step": 22904 - }, - { - "epoch": 11.945241199478488, - "grad_norm": 1.4474315643310547, - "learning_rate": 7.749045226130654e-05, - "loss": 5.787, - "step": 22905 - }, - { - "epoch": 11.945762711864408, - "grad_norm": 1.5192053318023682, - "learning_rate": 7.748944723618091e-05, - "loss": 4.9053, - "step": 22906 - }, - { - "epoch": 11.946284224250325, - "grad_norm": 1.5773200988769531, - "learning_rate": 7.748844221105528e-05, - "loss": 5.7178, - "step": 22907 - }, - { - "epoch": 11.946805736636245, - "grad_norm": 1.4677947759628296, - "learning_rate": 7.748743718592966e-05, - "loss": 5.1982, - "step": 22908 - }, - { - "epoch": 11.947327249022164, - "grad_norm": 1.577429175376892, - "learning_rate": 7.748643216080402e-05, - "loss": 5.1003, - "step": 22909 - }, - { - "epoch": 11.947848761408084, - "grad_norm": 1.485586404800415, - "learning_rate": 7.74854271356784e-05, - "loss": 5.4856, - "step": 22910 - }, - { - "epoch": 11.948370273794003, - "grad_norm": 1.4528512954711914, - "learning_rate": 7.748442211055276e-05, - "loss": 5.4762, - "step": 22911 - }, - { - "epoch": 11.94889178617992, - "grad_norm": 1.4264497756958008, - "learning_rate": 7.748341708542714e-05, - "loss": 5.165, - "step": 22912 - }, - { - "epoch": 11.94941329856584, - "grad_norm": 1.3501282930374146, - "learning_rate": 7.748241206030152e-05, - "loss": 5.6086, - "step": 22913 - }, - { - "epoch": 11.94993481095176, - "grad_norm": 1.413609266281128, - "learning_rate": 7.748140703517588e-05, - "loss": 5.7157, - "step": 22914 - }, - { - "epoch": 11.95045632333768, - "grad_norm": 1.4211093187332153, - "learning_rate": 7.748040201005026e-05, - "loss": 5.5683, - "step": 22915 - }, - { - "epoch": 11.950977835723599, - "grad_norm": 1.4390029907226562, - "learning_rate": 7.747939698492462e-05, - "loss": 5.0202, - "step": 22916 - }, - { - "epoch": 11.951499348109518, - "grad_norm": 1.4236406087875366, - "learning_rate": 7.7478391959799e-05, - "loss": 5.7061, - "step": 22917 - }, - { - "epoch": 11.952020860495438, - "grad_norm": 1.4782980680465698, - "learning_rate": 7.747738693467337e-05, - "loss": 5.7844, - "step": 22918 - }, - { - "epoch": 11.952542372881355, - "grad_norm": 1.6496809720993042, - "learning_rate": 7.747638190954774e-05, - "loss": 5.45, - "step": 22919 - }, - { - "epoch": 11.953063885267275, - "grad_norm": 1.587364673614502, - "learning_rate": 7.747537688442211e-05, - "loss": 5.5383, - "step": 22920 - }, - { - "epoch": 11.953585397653194, - "grad_norm": 1.3951923847198486, - "learning_rate": 7.747437185929649e-05, - "loss": 5.617, - "step": 22921 - }, - { - "epoch": 11.954106910039114, - "grad_norm": 1.4872679710388184, - "learning_rate": 7.747336683417086e-05, - "loss": 5.3978, - "step": 22922 - }, - { - "epoch": 11.954628422425033, - "grad_norm": 1.5248669385910034, - "learning_rate": 7.747236180904524e-05, - "loss": 5.4296, - "step": 22923 - }, - { - "epoch": 11.955149934810951, - "grad_norm": 1.4441609382629395, - "learning_rate": 7.74713567839196e-05, - "loss": 5.4566, - "step": 22924 - }, - { - "epoch": 11.95567144719687, - "grad_norm": 1.6143574714660645, - "learning_rate": 7.747035175879397e-05, - "loss": 5.4018, - "step": 22925 - }, - { - "epoch": 11.95619295958279, - "grad_norm": 1.516904592514038, - "learning_rate": 7.746934673366835e-05, - "loss": 5.5865, - "step": 22926 - }, - { - "epoch": 11.95671447196871, - "grad_norm": 1.6117643117904663, - "learning_rate": 7.746834170854271e-05, - "loss": 5.324, - "step": 22927 - }, - { - "epoch": 11.957235984354629, - "grad_norm": 1.4899641275405884, - "learning_rate": 7.746733668341709e-05, - "loss": 5.3224, - "step": 22928 - }, - { - "epoch": 11.957757496740548, - "grad_norm": 1.378010869026184, - "learning_rate": 7.746633165829145e-05, - "loss": 5.2692, - "step": 22929 - }, - { - "epoch": 11.958279009126468, - "grad_norm": 1.4138506650924683, - "learning_rate": 7.746532663316583e-05, - "loss": 5.5553, - "step": 22930 - }, - { - "epoch": 11.958800521512385, - "grad_norm": 1.4516761302947998, - "learning_rate": 7.74643216080402e-05, - "loss": 5.3097, - "step": 22931 - }, - { - "epoch": 11.959322033898305, - "grad_norm": 1.3401198387145996, - "learning_rate": 7.746331658291457e-05, - "loss": 5.9274, - "step": 22932 - }, - { - "epoch": 11.959843546284224, - "grad_norm": 1.4307774305343628, - "learning_rate": 7.746231155778895e-05, - "loss": 5.5347, - "step": 22933 - }, - { - "epoch": 11.960365058670144, - "grad_norm": 1.419299840927124, - "learning_rate": 7.746130653266333e-05, - "loss": 5.0269, - "step": 22934 - }, - { - "epoch": 11.960886571056063, - "grad_norm": 1.4793978929519653, - "learning_rate": 7.74603015075377e-05, - "loss": 5.5296, - "step": 22935 - }, - { - "epoch": 11.961408083441981, - "grad_norm": 1.2592582702636719, - "learning_rate": 7.745929648241207e-05, - "loss": 4.9113, - "step": 22936 - }, - { - "epoch": 11.9619295958279, - "grad_norm": 1.4450180530548096, - "learning_rate": 7.745829145728644e-05, - "loss": 5.5124, - "step": 22937 - }, - { - "epoch": 11.96245110821382, - "grad_norm": 1.455039620399475, - "learning_rate": 7.74572864321608e-05, - "loss": 5.5062, - "step": 22938 - }, - { - "epoch": 11.96297262059974, - "grad_norm": 1.3687421083450317, - "learning_rate": 7.745628140703518e-05, - "loss": 4.9313, - "step": 22939 - }, - { - "epoch": 11.963494132985659, - "grad_norm": 1.4199001789093018, - "learning_rate": 7.745527638190954e-05, - "loss": 5.4903, - "step": 22940 - }, - { - "epoch": 11.964015645371578, - "grad_norm": 1.3811789751052856, - "learning_rate": 7.745427135678392e-05, - "loss": 5.5484, - "step": 22941 - }, - { - "epoch": 11.964537157757496, - "grad_norm": 1.5089401006698608, - "learning_rate": 7.74532663316583e-05, - "loss": 5.7181, - "step": 22942 - }, - { - "epoch": 11.965058670143415, - "grad_norm": 1.3901058435440063, - "learning_rate": 7.745226130653268e-05, - "loss": 5.6014, - "step": 22943 - }, - { - "epoch": 11.965580182529335, - "grad_norm": 1.4288290739059448, - "learning_rate": 7.745125628140704e-05, - "loss": 5.4775, - "step": 22944 - }, - { - "epoch": 11.966101694915254, - "grad_norm": 1.579694151878357, - "learning_rate": 7.745025125628142e-05, - "loss": 4.5908, - "step": 22945 - }, - { - "epoch": 11.966623207301174, - "grad_norm": 1.4494441747665405, - "learning_rate": 7.744924623115578e-05, - "loss": 5.1616, - "step": 22946 - }, - { - "epoch": 11.967144719687093, - "grad_norm": 1.377129077911377, - "learning_rate": 7.744824120603016e-05, - "loss": 5.4526, - "step": 22947 - }, - { - "epoch": 11.967666232073011, - "grad_norm": 1.545974612236023, - "learning_rate": 7.744723618090452e-05, - "loss": 5.161, - "step": 22948 - }, - { - "epoch": 11.96818774445893, - "grad_norm": 1.471352219581604, - "learning_rate": 7.74462311557789e-05, - "loss": 5.2824, - "step": 22949 - }, - { - "epoch": 11.96870925684485, - "grad_norm": 1.6461633443832397, - "learning_rate": 7.744522613065327e-05, - "loss": 5.3714, - "step": 22950 - }, - { - "epoch": 11.96923076923077, - "grad_norm": 1.549264669418335, - "learning_rate": 7.744422110552763e-05, - "loss": 5.4444, - "step": 22951 - }, - { - "epoch": 11.969752281616689, - "grad_norm": 1.4102518558502197, - "learning_rate": 7.744321608040201e-05, - "loss": 5.3566, - "step": 22952 - }, - { - "epoch": 11.970273794002608, - "grad_norm": 1.4235460758209229, - "learning_rate": 7.744221105527639e-05, - "loss": 5.5685, - "step": 22953 - }, - { - "epoch": 11.970795306388526, - "grad_norm": 1.4383094310760498, - "learning_rate": 7.744120603015076e-05, - "loss": 5.7895, - "step": 22954 - }, - { - "epoch": 11.971316818774445, - "grad_norm": 1.355224609375, - "learning_rate": 7.744020100502513e-05, - "loss": 5.3349, - "step": 22955 - }, - { - "epoch": 11.971838331160365, - "grad_norm": 1.3440808057785034, - "learning_rate": 7.74391959798995e-05, - "loss": 4.6717, - "step": 22956 - }, - { - "epoch": 11.972359843546284, - "grad_norm": 1.4453121423721313, - "learning_rate": 7.743819095477387e-05, - "loss": 5.3944, - "step": 22957 - }, - { - "epoch": 11.972881355932204, - "grad_norm": 1.4313324689865112, - "learning_rate": 7.743718592964825e-05, - "loss": 5.6264, - "step": 22958 - }, - { - "epoch": 11.973402868318123, - "grad_norm": 1.4688475131988525, - "learning_rate": 7.743618090452261e-05, - "loss": 5.669, - "step": 22959 - }, - { - "epoch": 11.973924380704041, - "grad_norm": 1.509272575378418, - "learning_rate": 7.743517587939699e-05, - "loss": 4.943, - "step": 22960 - }, - { - "epoch": 11.97444589308996, - "grad_norm": 1.462412714958191, - "learning_rate": 7.743417085427135e-05, - "loss": 5.1811, - "step": 22961 - }, - { - "epoch": 11.97496740547588, - "grad_norm": 1.3269699811935425, - "learning_rate": 7.743316582914573e-05, - "loss": 5.1096, - "step": 22962 - }, - { - "epoch": 11.9754889178618, - "grad_norm": 1.435983657836914, - "learning_rate": 7.743216080402011e-05, - "loss": 4.9752, - "step": 22963 - }, - { - "epoch": 11.976010430247719, - "grad_norm": 1.4768002033233643, - "learning_rate": 7.743115577889447e-05, - "loss": 5.4148, - "step": 22964 - }, - { - "epoch": 11.976531942633638, - "grad_norm": 1.3879103660583496, - "learning_rate": 7.743015075376885e-05, - "loss": 5.7411, - "step": 22965 - }, - { - "epoch": 11.977053455019556, - "grad_norm": 1.3144611120224, - "learning_rate": 7.742914572864322e-05, - "loss": 5.5622, - "step": 22966 - }, - { - "epoch": 11.977574967405475, - "grad_norm": 1.3055403232574463, - "learning_rate": 7.74281407035176e-05, - "loss": 5.8914, - "step": 22967 - }, - { - "epoch": 11.978096479791395, - "grad_norm": 1.3867517709732056, - "learning_rate": 7.742713567839196e-05, - "loss": 5.5364, - "step": 22968 - }, - { - "epoch": 11.978617992177314, - "grad_norm": 1.4226723909378052, - "learning_rate": 7.742613065326634e-05, - "loss": 5.385, - "step": 22969 - }, - { - "epoch": 11.979139504563234, - "grad_norm": 1.5965116024017334, - "learning_rate": 7.74251256281407e-05, - "loss": 5.1103, - "step": 22970 - }, - { - "epoch": 11.979661016949153, - "grad_norm": 1.4780265092849731, - "learning_rate": 7.742412060301508e-05, - "loss": 5.4493, - "step": 22971 - }, - { - "epoch": 11.980182529335071, - "grad_norm": 1.4549636840820312, - "learning_rate": 7.742311557788944e-05, - "loss": 5.4608, - "step": 22972 - }, - { - "epoch": 11.98070404172099, - "grad_norm": 1.5664701461791992, - "learning_rate": 7.742211055276382e-05, - "loss": 5.5927, - "step": 22973 - }, - { - "epoch": 11.98122555410691, - "grad_norm": 1.4730370044708252, - "learning_rate": 7.74211055276382e-05, - "loss": 5.505, - "step": 22974 - }, - { - "epoch": 11.98174706649283, - "grad_norm": 1.6222821474075317, - "learning_rate": 7.742010050251258e-05, - "loss": 5.2496, - "step": 22975 - }, - { - "epoch": 11.982268578878749, - "grad_norm": 1.451987862586975, - "learning_rate": 7.741909547738694e-05, - "loss": 5.7032, - "step": 22976 - }, - { - "epoch": 11.982790091264668, - "grad_norm": 1.5514923334121704, - "learning_rate": 7.74180904522613e-05, - "loss": 5.0709, - "step": 22977 - }, - { - "epoch": 11.983311603650586, - "grad_norm": 1.5507442951202393, - "learning_rate": 7.741708542713568e-05, - "loss": 5.2622, - "step": 22978 - }, - { - "epoch": 11.983833116036505, - "grad_norm": 1.3387459516525269, - "learning_rate": 7.741608040201005e-05, - "loss": 5.1802, - "step": 22979 - }, - { - "epoch": 11.984354628422425, - "grad_norm": 1.4337706565856934, - "learning_rate": 7.741507537688443e-05, - "loss": 4.9556, - "step": 22980 - }, - { - "epoch": 11.984876140808344, - "grad_norm": 1.4726160764694214, - "learning_rate": 7.741407035175879e-05, - "loss": 5.2272, - "step": 22981 - }, - { - "epoch": 11.985397653194264, - "grad_norm": 1.37355637550354, - "learning_rate": 7.741306532663317e-05, - "loss": 5.158, - "step": 22982 - }, - { - "epoch": 11.985919165580183, - "grad_norm": 1.6005475521087646, - "learning_rate": 7.741206030150754e-05, - "loss": 5.5226, - "step": 22983 - }, - { - "epoch": 11.986440677966101, - "grad_norm": 1.5161288976669312, - "learning_rate": 7.741105527638192e-05, - "loss": 5.5159, - "step": 22984 - }, - { - "epoch": 11.98696219035202, - "grad_norm": 1.4105861186981201, - "learning_rate": 7.741005025125629e-05, - "loss": 4.5088, - "step": 22985 - }, - { - "epoch": 11.98748370273794, - "grad_norm": 1.402233362197876, - "learning_rate": 7.740904522613066e-05, - "loss": 5.751, - "step": 22986 - }, - { - "epoch": 11.98800521512386, - "grad_norm": 1.411699891090393, - "learning_rate": 7.740804020100503e-05, - "loss": 4.953, - "step": 22987 - }, - { - "epoch": 11.988526727509779, - "grad_norm": 1.4951319694519043, - "learning_rate": 7.740703517587941e-05, - "loss": 5.6998, - "step": 22988 - }, - { - "epoch": 11.989048239895698, - "grad_norm": 1.3839956521987915, - "learning_rate": 7.740603015075377e-05, - "loss": 5.2351, - "step": 22989 - }, - { - "epoch": 11.989569752281616, - "grad_norm": 1.5212976932525635, - "learning_rate": 7.740502512562814e-05, - "loss": 5.2589, - "step": 22990 - }, - { - "epoch": 11.990091264667535, - "grad_norm": 1.4692249298095703, - "learning_rate": 7.740402010050251e-05, - "loss": 5.3548, - "step": 22991 - }, - { - "epoch": 11.990612777053455, - "grad_norm": 1.383193016052246, - "learning_rate": 7.740301507537688e-05, - "loss": 5.5483, - "step": 22992 - }, - { - "epoch": 11.991134289439374, - "grad_norm": 1.3485733270645142, - "learning_rate": 7.740201005025126e-05, - "loss": 5.0923, - "step": 22993 - }, - { - "epoch": 11.991655801825294, - "grad_norm": 1.415628433227539, - "learning_rate": 7.740100502512563e-05, - "loss": 5.7726, - "step": 22994 - }, - { - "epoch": 11.992177314211212, - "grad_norm": 1.4167263507843018, - "learning_rate": 7.740000000000001e-05, - "loss": 5.6277, - "step": 22995 - }, - { - "epoch": 11.992698826597131, - "grad_norm": 1.4581527709960938, - "learning_rate": 7.739899497487438e-05, - "loss": 5.8325, - "step": 22996 - }, - { - "epoch": 11.99322033898305, - "grad_norm": 1.4527504444122314, - "learning_rate": 7.739798994974875e-05, - "loss": 5.7563, - "step": 22997 - }, - { - "epoch": 11.99374185136897, - "grad_norm": 1.4024816751480103, - "learning_rate": 7.739698492462312e-05, - "loss": 5.6077, - "step": 22998 - }, - { - "epoch": 11.99426336375489, - "grad_norm": 1.55463445186615, - "learning_rate": 7.73959798994975e-05, - "loss": 5.3885, - "step": 22999 - }, - { - "epoch": 11.994784876140809, - "grad_norm": 1.548906683921814, - "learning_rate": 7.739497487437186e-05, - "loss": 5.4905, - "step": 23000 - }, - { - "epoch": 11.995306388526728, - "grad_norm": 1.4390546083450317, - "learning_rate": 7.739396984924624e-05, - "loss": 5.4738, - "step": 23001 - }, - { - "epoch": 11.995827900912646, - "grad_norm": 1.45039701461792, - "learning_rate": 7.73929648241206e-05, - "loss": 5.2864, - "step": 23002 - }, - { - "epoch": 11.996349413298566, - "grad_norm": 1.3973594903945923, - "learning_rate": 7.739195979899498e-05, - "loss": 5.6716, - "step": 23003 - }, - { - "epoch": 11.996870925684485, - "grad_norm": 1.4853302240371704, - "learning_rate": 7.739095477386936e-05, - "loss": 5.4836, - "step": 23004 - }, - { - "epoch": 11.997392438070404, - "grad_norm": 1.5190021991729736, - "learning_rate": 7.738994974874372e-05, - "loss": 5.2924, - "step": 23005 - }, - { - "epoch": 11.997913950456324, - "grad_norm": 1.6016267538070679, - "learning_rate": 7.73889447236181e-05, - "loss": 4.7034, - "step": 23006 - }, - { - "epoch": 11.998435462842242, - "grad_norm": 1.594670295715332, - "learning_rate": 7.738793969849246e-05, - "loss": 5.3146, - "step": 23007 - }, - { - "epoch": 11.998956975228161, - "grad_norm": 1.558101773262024, - "learning_rate": 7.738693467336684e-05, - "loss": 5.5484, - "step": 23008 - }, - { - "epoch": 11.99947848761408, - "grad_norm": 1.5748473405838013, - "learning_rate": 7.73859296482412e-05, - "loss": 5.0185, - "step": 23009 - }, - { - "epoch": 12.0, - "grad_norm": 1.6429901123046875, - "learning_rate": 7.738492462311558e-05, - "loss": 5.0599, - "step": 23010 - }, - { - "epoch": 12.00052151238592, - "grad_norm": 1.3868403434753418, - "learning_rate": 7.738391959798995e-05, - "loss": 5.6849, - "step": 23011 - }, - { - "epoch": 12.001043024771839, - "grad_norm": 1.55873441696167, - "learning_rate": 7.738291457286433e-05, - "loss": 5.4073, - "step": 23012 - }, - { - "epoch": 12.001564537157757, - "grad_norm": 1.5900051593780518, - "learning_rate": 7.738190954773869e-05, - "loss": 4.9917, - "step": 23013 - }, - { - "epoch": 12.002086049543676, - "grad_norm": 1.488173246383667, - "learning_rate": 7.738090452261307e-05, - "loss": 4.9508, - "step": 23014 - }, - { - "epoch": 12.002607561929596, - "grad_norm": 1.5797466039657593, - "learning_rate": 7.737989949748745e-05, - "loss": 5.2239, - "step": 23015 - }, - { - "epoch": 12.003129074315515, - "grad_norm": 1.5107700824737549, - "learning_rate": 7.737889447236182e-05, - "loss": 5.61, - "step": 23016 - }, - { - "epoch": 12.003650586701434, - "grad_norm": 1.4363987445831299, - "learning_rate": 7.737788944723619e-05, - "loss": 5.1831, - "step": 23017 - }, - { - "epoch": 12.004172099087354, - "grad_norm": 1.5059337615966797, - "learning_rate": 7.737688442211055e-05, - "loss": 5.5337, - "step": 23018 - }, - { - "epoch": 12.004693611473272, - "grad_norm": 1.3895223140716553, - "learning_rate": 7.737587939698493e-05, - "loss": 5.0801, - "step": 23019 - }, - { - "epoch": 12.005215123859191, - "grad_norm": 1.4425482749938965, - "learning_rate": 7.73748743718593e-05, - "loss": 5.018, - "step": 23020 - }, - { - "epoch": 12.00573663624511, - "grad_norm": 1.4788527488708496, - "learning_rate": 7.737386934673367e-05, - "loss": 5.2914, - "step": 23021 - }, - { - "epoch": 12.00625814863103, - "grad_norm": 1.4998434782028198, - "learning_rate": 7.737286432160804e-05, - "loss": 5.6597, - "step": 23022 - }, - { - "epoch": 12.00677966101695, - "grad_norm": 1.3338634967803955, - "learning_rate": 7.737185929648241e-05, - "loss": 5.0609, - "step": 23023 - }, - { - "epoch": 12.007301173402869, - "grad_norm": 1.4579410552978516, - "learning_rate": 7.737085427135679e-05, - "loss": 5.6053, - "step": 23024 - }, - { - "epoch": 12.007822685788787, - "grad_norm": 1.3267433643341064, - "learning_rate": 7.736984924623117e-05, - "loss": 5.5105, - "step": 23025 - }, - { - "epoch": 12.008344198174706, - "grad_norm": 1.4452365636825562, - "learning_rate": 7.736884422110553e-05, - "loss": 5.2404, - "step": 23026 - }, - { - "epoch": 12.008865710560626, - "grad_norm": 1.4934563636779785, - "learning_rate": 7.736783919597991e-05, - "loss": 5.4014, - "step": 23027 - }, - { - "epoch": 12.009387222946545, - "grad_norm": 1.4981578588485718, - "learning_rate": 7.736683417085428e-05, - "loss": 5.6698, - "step": 23028 - }, - { - "epoch": 12.009908735332465, - "grad_norm": 1.568068027496338, - "learning_rate": 7.736582914572865e-05, - "loss": 5.255, - "step": 23029 - }, - { - "epoch": 12.010430247718384, - "grad_norm": 1.7857692241668701, - "learning_rate": 7.736482412060302e-05, - "loss": 5.0551, - "step": 23030 - }, - { - "epoch": 12.010951760104302, - "grad_norm": 1.4389275312423706, - "learning_rate": 7.736381909547738e-05, - "loss": 5.2006, - "step": 23031 - }, - { - "epoch": 12.011473272490221, - "grad_norm": 1.4503684043884277, - "learning_rate": 7.736281407035176e-05, - "loss": 4.8194, - "step": 23032 - }, - { - "epoch": 12.01199478487614, - "grad_norm": 1.3615896701812744, - "learning_rate": 7.736180904522612e-05, - "loss": 5.8071, - "step": 23033 - }, - { - "epoch": 12.01251629726206, - "grad_norm": 1.374568223953247, - "learning_rate": 7.73608040201005e-05, - "loss": 5.9056, - "step": 23034 - }, - { - "epoch": 12.01303780964798, - "grad_norm": 1.4508581161499023, - "learning_rate": 7.735979899497488e-05, - "loss": 5.711, - "step": 23035 - }, - { - "epoch": 12.013559322033899, - "grad_norm": 1.4165550470352173, - "learning_rate": 7.735879396984926e-05, - "loss": 5.737, - "step": 23036 - }, - { - "epoch": 12.014080834419817, - "grad_norm": 1.6466091871261597, - "learning_rate": 7.735778894472362e-05, - "loss": 5.5826, - "step": 23037 - }, - { - "epoch": 12.014602346805736, - "grad_norm": 1.5673227310180664, - "learning_rate": 7.7356783919598e-05, - "loss": 5.1376, - "step": 23038 - }, - { - "epoch": 12.015123859191656, - "grad_norm": 1.4129509925842285, - "learning_rate": 7.735577889447236e-05, - "loss": 5.5523, - "step": 23039 - }, - { - "epoch": 12.015645371577575, - "grad_norm": 1.505897879600525, - "learning_rate": 7.735477386934674e-05, - "loss": 5.0505, - "step": 23040 - }, - { - "epoch": 12.016166883963495, - "grad_norm": 1.424403429031372, - "learning_rate": 7.73537688442211e-05, - "loss": 5.7055, - "step": 23041 - }, - { - "epoch": 12.016688396349414, - "grad_norm": 1.4854938983917236, - "learning_rate": 7.735276381909548e-05, - "loss": 5.6729, - "step": 23042 - }, - { - "epoch": 12.017209908735332, - "grad_norm": 1.5336570739746094, - "learning_rate": 7.735175879396985e-05, - "loss": 4.9899, - "step": 23043 - }, - { - "epoch": 12.017731421121251, - "grad_norm": 1.7156323194503784, - "learning_rate": 7.735075376884421e-05, - "loss": 5.4018, - "step": 23044 - }, - { - "epoch": 12.01825293350717, - "grad_norm": 1.4789094924926758, - "learning_rate": 7.734974874371859e-05, - "loss": 5.4424, - "step": 23045 - }, - { - "epoch": 12.01877444589309, - "grad_norm": 1.52579927444458, - "learning_rate": 7.734874371859297e-05, - "loss": 5.6689, - "step": 23046 - }, - { - "epoch": 12.01929595827901, - "grad_norm": 1.6701877117156982, - "learning_rate": 7.734773869346735e-05, - "loss": 5.6131, - "step": 23047 - }, - { - "epoch": 12.019817470664929, - "grad_norm": 1.4676882028579712, - "learning_rate": 7.734673366834171e-05, - "loss": 5.4915, - "step": 23048 - }, - { - "epoch": 12.020338983050847, - "grad_norm": 1.4788750410079956, - "learning_rate": 7.734572864321609e-05, - "loss": 5.487, - "step": 23049 - }, - { - "epoch": 12.020860495436766, - "grad_norm": 1.5135889053344727, - "learning_rate": 7.734472361809045e-05, - "loss": 5.4157, - "step": 23050 - }, - { - "epoch": 12.021382007822686, - "grad_norm": 1.4289710521697998, - "learning_rate": 7.734371859296483e-05, - "loss": 5.2048, - "step": 23051 - }, - { - "epoch": 12.021903520208605, - "grad_norm": 1.438616156578064, - "learning_rate": 7.73427135678392e-05, - "loss": 5.4265, - "step": 23052 - }, - { - "epoch": 12.022425032594525, - "grad_norm": 1.339760184288025, - "learning_rate": 7.734170854271357e-05, - "loss": 5.626, - "step": 23053 - }, - { - "epoch": 12.022946544980444, - "grad_norm": 1.4843639135360718, - "learning_rate": 7.734070351758794e-05, - "loss": 5.3591, - "step": 23054 - }, - { - "epoch": 12.023468057366362, - "grad_norm": 1.5094364881515503, - "learning_rate": 7.733969849246231e-05, - "loss": 5.5945, - "step": 23055 - }, - { - "epoch": 12.023989569752281, - "grad_norm": 1.5017175674438477, - "learning_rate": 7.733869346733669e-05, - "loss": 5.0144, - "step": 23056 - }, - { - "epoch": 12.0245110821382, - "grad_norm": 1.497463345527649, - "learning_rate": 7.733768844221106e-05, - "loss": 5.4758, - "step": 23057 - }, - { - "epoch": 12.02503259452412, - "grad_norm": 1.4867048263549805, - "learning_rate": 7.733668341708543e-05, - "loss": 5.4507, - "step": 23058 - }, - { - "epoch": 12.02555410691004, - "grad_norm": 1.5620288848876953, - "learning_rate": 7.73356783919598e-05, - "loss": 5.612, - "step": 23059 - }, - { - "epoch": 12.026075619295959, - "grad_norm": 1.4907606840133667, - "learning_rate": 7.733467336683418e-05, - "loss": 5.6935, - "step": 23060 - }, - { - "epoch": 12.026597131681877, - "grad_norm": 1.4035640954971313, - "learning_rate": 7.733366834170854e-05, - "loss": 5.0586, - "step": 23061 - }, - { - "epoch": 12.027118644067796, - "grad_norm": 1.515650749206543, - "learning_rate": 7.733266331658292e-05, - "loss": 5.6342, - "step": 23062 - }, - { - "epoch": 12.027640156453716, - "grad_norm": 1.674403429031372, - "learning_rate": 7.733165829145728e-05, - "loss": 5.0236, - "step": 23063 - }, - { - "epoch": 12.028161668839635, - "grad_norm": 1.5215201377868652, - "learning_rate": 7.733065326633166e-05, - "loss": 5.7664, - "step": 23064 - }, - { - "epoch": 12.028683181225555, - "grad_norm": 1.3893413543701172, - "learning_rate": 7.732964824120603e-05, - "loss": 5.6832, - "step": 23065 - }, - { - "epoch": 12.029204693611474, - "grad_norm": 1.37310791015625, - "learning_rate": 7.73286432160804e-05, - "loss": 5.5458, - "step": 23066 - }, - { - "epoch": 12.029726205997392, - "grad_norm": 1.408949375152588, - "learning_rate": 7.732763819095478e-05, - "loss": 5.4108, - "step": 23067 - }, - { - "epoch": 12.030247718383311, - "grad_norm": 1.455244541168213, - "learning_rate": 7.732663316582916e-05, - "loss": 5.8502, - "step": 23068 - }, - { - "epoch": 12.03076923076923, - "grad_norm": 1.5637232065200806, - "learning_rate": 7.732562814070352e-05, - "loss": 4.8855, - "step": 23069 - }, - { - "epoch": 12.03129074315515, - "grad_norm": 1.5385692119598389, - "learning_rate": 7.732462311557789e-05, - "loss": 5.0241, - "step": 23070 - }, - { - "epoch": 12.03181225554107, - "grad_norm": 1.517449140548706, - "learning_rate": 7.732361809045227e-05, - "loss": 4.9672, - "step": 23071 - }, - { - "epoch": 12.032333767926989, - "grad_norm": 1.5120311975479126, - "learning_rate": 7.732261306532663e-05, - "loss": 5.4023, - "step": 23072 - }, - { - "epoch": 12.032855280312907, - "grad_norm": 1.41050124168396, - "learning_rate": 7.732160804020101e-05, - "loss": 5.6811, - "step": 23073 - }, - { - "epoch": 12.033376792698826, - "grad_norm": 1.492136001586914, - "learning_rate": 7.732060301507537e-05, - "loss": 4.8404, - "step": 23074 - }, - { - "epoch": 12.033898305084746, - "grad_norm": 1.5372898578643799, - "learning_rate": 7.731959798994975e-05, - "loss": 4.9455, - "step": 23075 - }, - { - "epoch": 12.034419817470665, - "grad_norm": 1.401280164718628, - "learning_rate": 7.731859296482413e-05, - "loss": 5.54, - "step": 23076 - }, - { - "epoch": 12.034941329856585, - "grad_norm": 1.3830305337905884, - "learning_rate": 7.73175879396985e-05, - "loss": 5.7067, - "step": 23077 - }, - { - "epoch": 12.035462842242504, - "grad_norm": 1.3954341411590576, - "learning_rate": 7.731658291457287e-05, - "loss": 5.776, - "step": 23078 - }, - { - "epoch": 12.035984354628422, - "grad_norm": 1.4666849374771118, - "learning_rate": 7.731557788944725e-05, - "loss": 5.5772, - "step": 23079 - }, - { - "epoch": 12.036505867014341, - "grad_norm": 1.440699577331543, - "learning_rate": 7.731457286432161e-05, - "loss": 5.6447, - "step": 23080 - }, - { - "epoch": 12.03702737940026, - "grad_norm": 1.4799137115478516, - "learning_rate": 7.731356783919599e-05, - "loss": 5.4413, - "step": 23081 - }, - { - "epoch": 12.03754889178618, - "grad_norm": 1.412797212600708, - "learning_rate": 7.731256281407035e-05, - "loss": 5.6522, - "step": 23082 - }, - { - "epoch": 12.0380704041721, - "grad_norm": 1.506855845451355, - "learning_rate": 7.731155778894472e-05, - "loss": 5.7743, - "step": 23083 - }, - { - "epoch": 12.038591916558019, - "grad_norm": 1.3782081604003906, - "learning_rate": 7.73105527638191e-05, - "loss": 5.4172, - "step": 23084 - }, - { - "epoch": 12.039113428943937, - "grad_norm": 1.4747611284255981, - "learning_rate": 7.730954773869346e-05, - "loss": 5.5969, - "step": 23085 - }, - { - "epoch": 12.039634941329856, - "grad_norm": 1.4648793935775757, - "learning_rate": 7.730854271356784e-05, - "loss": 5.6073, - "step": 23086 - }, - { - "epoch": 12.040156453715776, - "grad_norm": 1.4057538509368896, - "learning_rate": 7.730753768844222e-05, - "loss": 5.5792, - "step": 23087 - }, - { - "epoch": 12.040677966101695, - "grad_norm": 1.5262291431427002, - "learning_rate": 7.73065326633166e-05, - "loss": 4.8959, - "step": 23088 - }, - { - "epoch": 12.041199478487615, - "grad_norm": 1.4551416635513306, - "learning_rate": 7.730552763819096e-05, - "loss": 5.1434, - "step": 23089 - }, - { - "epoch": 12.041720990873534, - "grad_norm": 1.5444854497909546, - "learning_rate": 7.730452261306534e-05, - "loss": 5.0794, - "step": 23090 - }, - { - "epoch": 12.042242503259452, - "grad_norm": 1.5522671937942505, - "learning_rate": 7.73035175879397e-05, - "loss": 5.4987, - "step": 23091 - }, - { - "epoch": 12.042764015645371, - "grad_norm": 1.6167161464691162, - "learning_rate": 7.730251256281408e-05, - "loss": 5.3182, - "step": 23092 - }, - { - "epoch": 12.04328552803129, - "grad_norm": 1.501484751701355, - "learning_rate": 7.730150753768844e-05, - "loss": 5.16, - "step": 23093 - }, - { - "epoch": 12.04380704041721, - "grad_norm": 1.597159504890442, - "learning_rate": 7.730050251256282e-05, - "loss": 5.6526, - "step": 23094 - }, - { - "epoch": 12.04432855280313, - "grad_norm": 1.5498296022415161, - "learning_rate": 7.729949748743718e-05, - "loss": 5.552, - "step": 23095 - }, - { - "epoch": 12.044850065189049, - "grad_norm": 1.4342327117919922, - "learning_rate": 7.729849246231156e-05, - "loss": 5.3567, - "step": 23096 - }, - { - "epoch": 12.045371577574967, - "grad_norm": 1.4757411479949951, - "learning_rate": 7.729748743718594e-05, - "loss": 5.397, - "step": 23097 - }, - { - "epoch": 12.045893089960886, - "grad_norm": 1.5573078393936157, - "learning_rate": 7.72964824120603e-05, - "loss": 5.4249, - "step": 23098 - }, - { - "epoch": 12.046414602346806, - "grad_norm": 1.480603575706482, - "learning_rate": 7.729547738693468e-05, - "loss": 5.3173, - "step": 23099 - }, - { - "epoch": 12.046936114732725, - "grad_norm": 1.4882498979568481, - "learning_rate": 7.729447236180905e-05, - "loss": 5.1823, - "step": 23100 - }, - { - "epoch": 12.047457627118645, - "grad_norm": 1.428487777709961, - "learning_rate": 7.729346733668342e-05, - "loss": 5.7096, - "step": 23101 - }, - { - "epoch": 12.047979139504564, - "grad_norm": 1.6970319747924805, - "learning_rate": 7.729246231155779e-05, - "loss": 4.7932, - "step": 23102 - }, - { - "epoch": 12.048500651890482, - "grad_norm": 1.5958157777786255, - "learning_rate": 7.729145728643217e-05, - "loss": 5.1405, - "step": 23103 - }, - { - "epoch": 12.049022164276401, - "grad_norm": 1.512058138847351, - "learning_rate": 7.729045226130653e-05, - "loss": 5.255, - "step": 23104 - }, - { - "epoch": 12.04954367666232, - "grad_norm": 1.442771315574646, - "learning_rate": 7.728944723618091e-05, - "loss": 5.4573, - "step": 23105 - }, - { - "epoch": 12.05006518904824, - "grad_norm": 1.5278420448303223, - "learning_rate": 7.728844221105527e-05, - "loss": 5.6034, - "step": 23106 - }, - { - "epoch": 12.05058670143416, - "grad_norm": 1.4150595664978027, - "learning_rate": 7.728743718592965e-05, - "loss": 5.5045, - "step": 23107 - }, - { - "epoch": 12.051108213820077, - "grad_norm": 1.6483243703842163, - "learning_rate": 7.728643216080403e-05, - "loss": 5.4618, - "step": 23108 - }, - { - "epoch": 12.051629726205997, - "grad_norm": 1.5049381256103516, - "learning_rate": 7.72854271356784e-05, - "loss": 5.5043, - "step": 23109 - }, - { - "epoch": 12.052151238591916, - "grad_norm": 1.5205689668655396, - "learning_rate": 7.728442211055277e-05, - "loss": 5.1002, - "step": 23110 - }, - { - "epoch": 12.052672750977836, - "grad_norm": 1.4858235120773315, - "learning_rate": 7.728341708542713e-05, - "loss": 5.9753, - "step": 23111 - }, - { - "epoch": 12.053194263363755, - "grad_norm": 1.3633630275726318, - "learning_rate": 7.728241206030151e-05, - "loss": 5.7302, - "step": 23112 - }, - { - "epoch": 12.053715775749675, - "grad_norm": 1.4039028882980347, - "learning_rate": 7.728140703517588e-05, - "loss": 5.5766, - "step": 23113 - }, - { - "epoch": 12.054237288135592, - "grad_norm": 1.515754222869873, - "learning_rate": 7.728040201005025e-05, - "loss": 5.1773, - "step": 23114 - }, - { - "epoch": 12.054758800521512, - "grad_norm": 1.4896279573440552, - "learning_rate": 7.727939698492462e-05, - "loss": 5.088, - "step": 23115 - }, - { - "epoch": 12.055280312907431, - "grad_norm": 1.4344727993011475, - "learning_rate": 7.7278391959799e-05, - "loss": 5.3875, - "step": 23116 - }, - { - "epoch": 12.05580182529335, - "grad_norm": 1.4616960287094116, - "learning_rate": 7.727738693467337e-05, - "loss": 5.495, - "step": 23117 - }, - { - "epoch": 12.05632333767927, - "grad_norm": 1.4395264387130737, - "learning_rate": 7.727638190954775e-05, - "loss": 5.5669, - "step": 23118 - }, - { - "epoch": 12.05684485006519, - "grad_norm": 1.3966515064239502, - "learning_rate": 7.727537688442212e-05, - "loss": 5.7163, - "step": 23119 - }, - { - "epoch": 12.057366362451107, - "grad_norm": 1.3422608375549316, - "learning_rate": 7.72743718592965e-05, - "loss": 5.6116, - "step": 23120 - }, - { - "epoch": 12.057887874837027, - "grad_norm": 1.4429709911346436, - "learning_rate": 7.727336683417086e-05, - "loss": 5.0624, - "step": 23121 - }, - { - "epoch": 12.058409387222946, - "grad_norm": 1.4024609327316284, - "learning_rate": 7.727236180904524e-05, - "loss": 5.5866, - "step": 23122 - }, - { - "epoch": 12.058930899608866, - "grad_norm": 1.3678864240646362, - "learning_rate": 7.72713567839196e-05, - "loss": 5.4809, - "step": 23123 - }, - { - "epoch": 12.059452411994785, - "grad_norm": 1.4872329235076904, - "learning_rate": 7.727035175879396e-05, - "loss": 4.7608, - "step": 23124 - }, - { - "epoch": 12.059973924380705, - "grad_norm": 1.5194522142410278, - "learning_rate": 7.726934673366834e-05, - "loss": 5.224, - "step": 23125 - }, - { - "epoch": 12.060495436766622, - "grad_norm": 1.6315858364105225, - "learning_rate": 7.726834170854271e-05, - "loss": 5.5373, - "step": 23126 - }, - { - "epoch": 12.061016949152542, - "grad_norm": 1.4971426725387573, - "learning_rate": 7.726733668341708e-05, - "loss": 4.9076, - "step": 23127 - }, - { - "epoch": 12.061538461538461, - "grad_norm": 1.614307165145874, - "learning_rate": 7.726633165829146e-05, - "loss": 5.7007, - "step": 23128 - }, - { - "epoch": 12.06205997392438, - "grad_norm": 1.38572096824646, - "learning_rate": 7.726532663316584e-05, - "loss": 5.5066, - "step": 23129 - }, - { - "epoch": 12.0625814863103, - "grad_norm": 1.4711697101593018, - "learning_rate": 7.72643216080402e-05, - "loss": 5.3969, - "step": 23130 - }, - { - "epoch": 12.06310299869622, - "grad_norm": 1.46401846408844, - "learning_rate": 7.726331658291458e-05, - "loss": 5.3729, - "step": 23131 - }, - { - "epoch": 12.063624511082137, - "grad_norm": 1.5941005945205688, - "learning_rate": 7.726231155778895e-05, - "loss": 5.4361, - "step": 23132 - }, - { - "epoch": 12.064146023468057, - "grad_norm": 1.4965379238128662, - "learning_rate": 7.726130653266332e-05, - "loss": 5.0809, - "step": 23133 - }, - { - "epoch": 12.064667535853976, - "grad_norm": 1.382506012916565, - "learning_rate": 7.726030150753769e-05, - "loss": 5.624, - "step": 23134 - }, - { - "epoch": 12.065189048239896, - "grad_norm": 1.5484803915023804, - "learning_rate": 7.725929648241207e-05, - "loss": 5.3123, - "step": 23135 - }, - { - "epoch": 12.065710560625815, - "grad_norm": 1.463524580001831, - "learning_rate": 7.725829145728643e-05, - "loss": 5.2147, - "step": 23136 - }, - { - "epoch": 12.066232073011735, - "grad_norm": 1.3822216987609863, - "learning_rate": 7.725728643216081e-05, - "loss": 5.6799, - "step": 23137 - }, - { - "epoch": 12.066753585397652, - "grad_norm": 1.5488626956939697, - "learning_rate": 7.725628140703519e-05, - "loss": 5.2549, - "step": 23138 - }, - { - "epoch": 12.067275097783572, - "grad_norm": 1.392382264137268, - "learning_rate": 7.725527638190955e-05, - "loss": 5.6464, - "step": 23139 - }, - { - "epoch": 12.067796610169491, - "grad_norm": 1.5545934438705444, - "learning_rate": 7.725427135678393e-05, - "loss": 5.5901, - "step": 23140 - }, - { - "epoch": 12.06831812255541, - "grad_norm": 1.5810672044754028, - "learning_rate": 7.725326633165829e-05, - "loss": 5.0754, - "step": 23141 - }, - { - "epoch": 12.06883963494133, - "grad_norm": 1.479113221168518, - "learning_rate": 7.725226130653267e-05, - "loss": 5.594, - "step": 23142 - }, - { - "epoch": 12.06936114732725, - "grad_norm": 1.5307499170303345, - "learning_rate": 7.725125628140704e-05, - "loss": 5.7683, - "step": 23143 - }, - { - "epoch": 12.069882659713167, - "grad_norm": 1.5482925176620483, - "learning_rate": 7.725025125628141e-05, - "loss": 5.3627, - "step": 23144 - }, - { - "epoch": 12.070404172099087, - "grad_norm": 1.4254382848739624, - "learning_rate": 7.724924623115578e-05, - "loss": 5.5368, - "step": 23145 - }, - { - "epoch": 12.070925684485006, - "grad_norm": 1.354294776916504, - "learning_rate": 7.724824120603016e-05, - "loss": 5.8986, - "step": 23146 - }, - { - "epoch": 12.071447196870926, - "grad_norm": 1.4707800149917603, - "learning_rate": 7.724723618090452e-05, - "loss": 5.4867, - "step": 23147 - }, - { - "epoch": 12.071968709256845, - "grad_norm": 1.5186947584152222, - "learning_rate": 7.72462311557789e-05, - "loss": 5.3528, - "step": 23148 - }, - { - "epoch": 12.072490221642765, - "grad_norm": 1.5172853469848633, - "learning_rate": 7.724522613065328e-05, - "loss": 5.2225, - "step": 23149 - }, - { - "epoch": 12.073011734028682, - "grad_norm": 1.6064374446868896, - "learning_rate": 7.724422110552764e-05, - "loss": 5.3446, - "step": 23150 - }, - { - "epoch": 12.073533246414602, - "grad_norm": 1.401328206062317, - "learning_rate": 7.724321608040202e-05, - "loss": 5.7057, - "step": 23151 - }, - { - "epoch": 12.074054758800521, - "grad_norm": 1.5704351663589478, - "learning_rate": 7.724221105527638e-05, - "loss": 5.1928, - "step": 23152 - }, - { - "epoch": 12.07457627118644, - "grad_norm": 1.5500965118408203, - "learning_rate": 7.724120603015076e-05, - "loss": 4.631, - "step": 23153 - }, - { - "epoch": 12.07509778357236, - "grad_norm": 1.4541308879852295, - "learning_rate": 7.724020100502512e-05, - "loss": 5.5314, - "step": 23154 - }, - { - "epoch": 12.07561929595828, - "grad_norm": 1.3724642992019653, - "learning_rate": 7.72391959798995e-05, - "loss": 5.7501, - "step": 23155 - }, - { - "epoch": 12.076140808344197, - "grad_norm": 1.4142793416976929, - "learning_rate": 7.723819095477387e-05, - "loss": 5.2688, - "step": 23156 - }, - { - "epoch": 12.076662320730117, - "grad_norm": 1.4733211994171143, - "learning_rate": 7.723718592964824e-05, - "loss": 5.0826, - "step": 23157 - }, - { - "epoch": 12.077183833116036, - "grad_norm": 1.4014768600463867, - "learning_rate": 7.723618090452262e-05, - "loss": 5.4662, - "step": 23158 - }, - { - "epoch": 12.077705345501956, - "grad_norm": 1.4360324144363403, - "learning_rate": 7.7235175879397e-05, - "loss": 5.4341, - "step": 23159 - }, - { - "epoch": 12.078226857887875, - "grad_norm": 1.4753880500793457, - "learning_rate": 7.723417085427136e-05, - "loss": 5.6949, - "step": 23160 - }, - { - "epoch": 12.078748370273795, - "grad_norm": 1.476901650428772, - "learning_rate": 7.723316582914574e-05, - "loss": 5.259, - "step": 23161 - }, - { - "epoch": 12.079269882659712, - "grad_norm": 1.4592376947402954, - "learning_rate": 7.72321608040201e-05, - "loss": 5.1868, - "step": 23162 - }, - { - "epoch": 12.079791395045632, - "grad_norm": 1.4176607131958008, - "learning_rate": 7.723115577889447e-05, - "loss": 5.6763, - "step": 23163 - }, - { - "epoch": 12.080312907431551, - "grad_norm": 1.394101619720459, - "learning_rate": 7.723015075376885e-05, - "loss": 5.428, - "step": 23164 - }, - { - "epoch": 12.08083441981747, - "grad_norm": 1.5066335201263428, - "learning_rate": 7.722914572864321e-05, - "loss": 5.2668, - "step": 23165 - }, - { - "epoch": 12.08135593220339, - "grad_norm": 1.4272500276565552, - "learning_rate": 7.722814070351759e-05, - "loss": 5.6717, - "step": 23166 - }, - { - "epoch": 12.08187744458931, - "grad_norm": 1.4151893854141235, - "learning_rate": 7.722713567839195e-05, - "loss": 5.3976, - "step": 23167 - }, - { - "epoch": 12.082398956975227, - "grad_norm": 1.3158751726150513, - "learning_rate": 7.722613065326633e-05, - "loss": 5.5846, - "step": 23168 - }, - { - "epoch": 12.082920469361147, - "grad_norm": 1.4716885089874268, - "learning_rate": 7.722512562814071e-05, - "loss": 5.342, - "step": 23169 - }, - { - "epoch": 12.083441981747066, - "grad_norm": 1.4621703624725342, - "learning_rate": 7.722412060301509e-05, - "loss": 5.5367, - "step": 23170 - }, - { - "epoch": 12.083963494132986, - "grad_norm": 1.3957018852233887, - "learning_rate": 7.722311557788945e-05, - "loss": 5.58, - "step": 23171 - }, - { - "epoch": 12.084485006518905, - "grad_norm": 1.489406943321228, - "learning_rate": 7.722211055276383e-05, - "loss": 5.8162, - "step": 23172 - }, - { - "epoch": 12.085006518904825, - "grad_norm": 1.4156359434127808, - "learning_rate": 7.72211055276382e-05, - "loss": 5.3326, - "step": 23173 - }, - { - "epoch": 12.085528031290742, - "grad_norm": 1.4730842113494873, - "learning_rate": 7.722010050251257e-05, - "loss": 5.8026, - "step": 23174 - }, - { - "epoch": 12.086049543676662, - "grad_norm": 1.493222951889038, - "learning_rate": 7.721909547738694e-05, - "loss": 5.2776, - "step": 23175 - }, - { - "epoch": 12.086571056062581, - "grad_norm": 1.4378817081451416, - "learning_rate": 7.721809045226131e-05, - "loss": 5.373, - "step": 23176 - }, - { - "epoch": 12.0870925684485, - "grad_norm": 1.4092351198196411, - "learning_rate": 7.721708542713568e-05, - "loss": 5.3503, - "step": 23177 - }, - { - "epoch": 12.08761408083442, - "grad_norm": 1.4170786142349243, - "learning_rate": 7.721608040201006e-05, - "loss": 5.2392, - "step": 23178 - }, - { - "epoch": 12.08813559322034, - "grad_norm": 1.6509698629379272, - "learning_rate": 7.721507537688443e-05, - "loss": 4.934, - "step": 23179 - }, - { - "epoch": 12.088657105606258, - "grad_norm": 1.4109649658203125, - "learning_rate": 7.72140703517588e-05, - "loss": 5.5416, - "step": 23180 - }, - { - "epoch": 12.089178617992177, - "grad_norm": 1.4339953660964966, - "learning_rate": 7.721306532663318e-05, - "loss": 4.9824, - "step": 23181 - }, - { - "epoch": 12.089700130378096, - "grad_norm": 1.3650174140930176, - "learning_rate": 7.721206030150754e-05, - "loss": 5.4538, - "step": 23182 - }, - { - "epoch": 12.090221642764016, - "grad_norm": 1.413261890411377, - "learning_rate": 7.721105527638192e-05, - "loss": 5.2586, - "step": 23183 - }, - { - "epoch": 12.090743155149935, - "grad_norm": 1.6468067169189453, - "learning_rate": 7.721005025125628e-05, - "loss": 5.0641, - "step": 23184 - }, - { - "epoch": 12.091264667535855, - "grad_norm": 1.465459942817688, - "learning_rate": 7.720904522613066e-05, - "loss": 5.508, - "step": 23185 - }, - { - "epoch": 12.091786179921773, - "grad_norm": 1.581454873085022, - "learning_rate": 7.720804020100502e-05, - "loss": 5.1751, - "step": 23186 - }, - { - "epoch": 12.092307692307692, - "grad_norm": 1.4704396724700928, - "learning_rate": 7.72070351758794e-05, - "loss": 5.603, - "step": 23187 - }, - { - "epoch": 12.092829204693611, - "grad_norm": 1.613256812095642, - "learning_rate": 7.720603015075377e-05, - "loss": 5.3642, - "step": 23188 - }, - { - "epoch": 12.093350717079531, - "grad_norm": 1.4250587224960327, - "learning_rate": 7.720502512562814e-05, - "loss": 5.7103, - "step": 23189 - }, - { - "epoch": 12.09387222946545, - "grad_norm": 1.4374372959136963, - "learning_rate": 7.720402010050252e-05, - "loss": 5.4226, - "step": 23190 - }, - { - "epoch": 12.09439374185137, - "grad_norm": 1.4899542331695557, - "learning_rate": 7.720301507537689e-05, - "loss": 5.2381, - "step": 23191 - }, - { - "epoch": 12.094915254237288, - "grad_norm": 1.4355809688568115, - "learning_rate": 7.720201005025126e-05, - "loss": 5.1474, - "step": 23192 - }, - { - "epoch": 12.095436766623207, - "grad_norm": 1.558153748512268, - "learning_rate": 7.720100502512563e-05, - "loss": 4.6743, - "step": 23193 - }, - { - "epoch": 12.095958279009126, - "grad_norm": 1.45798921585083, - "learning_rate": 7.72e-05, - "loss": 4.8972, - "step": 23194 - }, - { - "epoch": 12.096479791395046, - "grad_norm": 1.4685579538345337, - "learning_rate": 7.719899497487437e-05, - "loss": 5.2735, - "step": 23195 - }, - { - "epoch": 12.097001303780965, - "grad_norm": 1.6373865604400635, - "learning_rate": 7.719798994974875e-05, - "loss": 5.5401, - "step": 23196 - }, - { - "epoch": 12.097522816166883, - "grad_norm": 1.5046583414077759, - "learning_rate": 7.719698492462311e-05, - "loss": 5.3829, - "step": 23197 - }, - { - "epoch": 12.098044328552803, - "grad_norm": 1.4812555313110352, - "learning_rate": 7.719597989949749e-05, - "loss": 5.3505, - "step": 23198 - }, - { - "epoch": 12.098565840938722, - "grad_norm": 1.3717961311340332, - "learning_rate": 7.719497487437185e-05, - "loss": 5.6134, - "step": 23199 - }, - { - "epoch": 12.099087353324641, - "grad_norm": 1.6426846981048584, - "learning_rate": 7.719396984924623e-05, - "loss": 5.4598, - "step": 23200 - }, - { - "epoch": 12.099608865710561, - "grad_norm": 1.518883228302002, - "learning_rate": 7.719296482412061e-05, - "loss": 5.2238, - "step": 23201 - }, - { - "epoch": 12.10013037809648, - "grad_norm": 1.5520178079605103, - "learning_rate": 7.719195979899499e-05, - "loss": 5.2854, - "step": 23202 - }, - { - "epoch": 12.100651890482398, - "grad_norm": 1.4103515148162842, - "learning_rate": 7.719095477386935e-05, - "loss": 5.4528, - "step": 23203 - }, - { - "epoch": 12.101173402868318, - "grad_norm": 1.4383925199508667, - "learning_rate": 7.718994974874372e-05, - "loss": 5.8024, - "step": 23204 - }, - { - "epoch": 12.101694915254237, - "grad_norm": 1.559143304824829, - "learning_rate": 7.71889447236181e-05, - "loss": 5.312, - "step": 23205 - }, - { - "epoch": 12.102216427640156, - "grad_norm": 1.3964194059371948, - "learning_rate": 7.718793969849246e-05, - "loss": 5.2673, - "step": 23206 - }, - { - "epoch": 12.102737940026076, - "grad_norm": 1.4628323316574097, - "learning_rate": 7.718693467336684e-05, - "loss": 5.1973, - "step": 23207 - }, - { - "epoch": 12.103259452411995, - "grad_norm": 1.4317137002944946, - "learning_rate": 7.71859296482412e-05, - "loss": 5.4461, - "step": 23208 - }, - { - "epoch": 12.103780964797913, - "grad_norm": 1.471629023551941, - "learning_rate": 7.718492462311558e-05, - "loss": 5.5767, - "step": 23209 - }, - { - "epoch": 12.104302477183833, - "grad_norm": 1.4211130142211914, - "learning_rate": 7.718391959798996e-05, - "loss": 5.1819, - "step": 23210 - }, - { - "epoch": 12.104823989569752, - "grad_norm": 1.6804918050765991, - "learning_rate": 7.718291457286433e-05, - "loss": 5.7377, - "step": 23211 - }, - { - "epoch": 12.105345501955671, - "grad_norm": 1.6533293724060059, - "learning_rate": 7.71819095477387e-05, - "loss": 4.7257, - "step": 23212 - }, - { - "epoch": 12.105867014341591, - "grad_norm": 1.4809714555740356, - "learning_rate": 7.718090452261308e-05, - "loss": 5.6189, - "step": 23213 - }, - { - "epoch": 12.10638852672751, - "grad_norm": 1.6182283163070679, - "learning_rate": 7.717989949748744e-05, - "loss": 5.1938, - "step": 23214 - }, - { - "epoch": 12.106910039113428, - "grad_norm": 1.5200282335281372, - "learning_rate": 7.717889447236182e-05, - "loss": 5.5414, - "step": 23215 - }, - { - "epoch": 12.107431551499348, - "grad_norm": 1.4282560348510742, - "learning_rate": 7.717788944723618e-05, - "loss": 5.4383, - "step": 23216 - }, - { - "epoch": 12.107953063885267, - "grad_norm": 1.5305250883102417, - "learning_rate": 7.717688442211055e-05, - "loss": 4.7738, - "step": 23217 - }, - { - "epoch": 12.108474576271187, - "grad_norm": 1.5039963722229004, - "learning_rate": 7.717587939698493e-05, - "loss": 5.5709, - "step": 23218 - }, - { - "epoch": 12.108996088657106, - "grad_norm": 1.4642914533615112, - "learning_rate": 7.717487437185929e-05, - "loss": 5.6915, - "step": 23219 - }, - { - "epoch": 12.109517601043025, - "grad_norm": 1.437987208366394, - "learning_rate": 7.717386934673367e-05, - "loss": 5.4001, - "step": 23220 - }, - { - "epoch": 12.110039113428943, - "grad_norm": 1.4373502731323242, - "learning_rate": 7.717286432160804e-05, - "loss": 5.3281, - "step": 23221 - }, - { - "epoch": 12.110560625814863, - "grad_norm": 1.3864829540252686, - "learning_rate": 7.717185929648242e-05, - "loss": 5.2867, - "step": 23222 - }, - { - "epoch": 12.111082138200782, - "grad_norm": 1.4542267322540283, - "learning_rate": 7.717085427135679e-05, - "loss": 5.5894, - "step": 23223 - }, - { - "epoch": 12.111603650586702, - "grad_norm": 1.5707799196243286, - "learning_rate": 7.716984924623116e-05, - "loss": 5.1944, - "step": 23224 - }, - { - "epoch": 12.112125162972621, - "grad_norm": 1.5086551904678345, - "learning_rate": 7.716884422110553e-05, - "loss": 5.5396, - "step": 23225 - }, - { - "epoch": 12.11264667535854, - "grad_norm": 1.4224663972854614, - "learning_rate": 7.716783919597991e-05, - "loss": 5.4392, - "step": 23226 - }, - { - "epoch": 12.113168187744458, - "grad_norm": 1.4865385293960571, - "learning_rate": 7.716683417085427e-05, - "loss": 5.5434, - "step": 23227 - }, - { - "epoch": 12.113689700130378, - "grad_norm": 1.3933360576629639, - "learning_rate": 7.716582914572865e-05, - "loss": 5.5136, - "step": 23228 - }, - { - "epoch": 12.114211212516297, - "grad_norm": 1.562211513519287, - "learning_rate": 7.716482412060301e-05, - "loss": 4.8786, - "step": 23229 - }, - { - "epoch": 12.114732724902217, - "grad_norm": 1.6270502805709839, - "learning_rate": 7.716381909547739e-05, - "loss": 5.8043, - "step": 23230 - }, - { - "epoch": 12.115254237288136, - "grad_norm": 1.5078649520874023, - "learning_rate": 7.716281407035177e-05, - "loss": 5.1878, - "step": 23231 - }, - { - "epoch": 12.115775749674055, - "grad_norm": 1.5005996227264404, - "learning_rate": 7.716180904522613e-05, - "loss": 5.4168, - "step": 23232 - }, - { - "epoch": 12.116297262059973, - "grad_norm": 1.5568784475326538, - "learning_rate": 7.716080402010051e-05, - "loss": 5.3491, - "step": 23233 - }, - { - "epoch": 12.116818774445893, - "grad_norm": 1.3571484088897705, - "learning_rate": 7.715979899497488e-05, - "loss": 5.5368, - "step": 23234 - }, - { - "epoch": 12.117340286831812, - "grad_norm": 1.4131619930267334, - "learning_rate": 7.715879396984925e-05, - "loss": 5.3988, - "step": 23235 - }, - { - "epoch": 12.117861799217732, - "grad_norm": 1.3802992105484009, - "learning_rate": 7.715778894472362e-05, - "loss": 5.6483, - "step": 23236 - }, - { - "epoch": 12.118383311603651, - "grad_norm": 1.5045723915100098, - "learning_rate": 7.7156783919598e-05, - "loss": 5.44, - "step": 23237 - }, - { - "epoch": 12.11890482398957, - "grad_norm": 1.3542206287384033, - "learning_rate": 7.715577889447236e-05, - "loss": 5.4072, - "step": 23238 - }, - { - "epoch": 12.119426336375488, - "grad_norm": 1.4684351682662964, - "learning_rate": 7.715477386934674e-05, - "loss": 5.6189, - "step": 23239 - }, - { - "epoch": 12.119947848761408, - "grad_norm": 1.3552581071853638, - "learning_rate": 7.71537688442211e-05, - "loss": 5.8418, - "step": 23240 - }, - { - "epoch": 12.120469361147327, - "grad_norm": 1.3855769634246826, - "learning_rate": 7.715276381909548e-05, - "loss": 5.5141, - "step": 23241 - }, - { - "epoch": 12.120990873533247, - "grad_norm": 1.3863250017166138, - "learning_rate": 7.715175879396986e-05, - "loss": 5.7289, - "step": 23242 - }, - { - "epoch": 12.121512385919166, - "grad_norm": 1.4180080890655518, - "learning_rate": 7.715075376884422e-05, - "loss": 5.2798, - "step": 23243 - }, - { - "epoch": 12.122033898305085, - "grad_norm": 1.3867744207382202, - "learning_rate": 7.71497487437186e-05, - "loss": 5.1754, - "step": 23244 - }, - { - "epoch": 12.122555410691003, - "grad_norm": 1.4599356651306152, - "learning_rate": 7.714874371859296e-05, - "loss": 5.4251, - "step": 23245 - }, - { - "epoch": 12.123076923076923, - "grad_norm": 1.4853521585464478, - "learning_rate": 7.714773869346734e-05, - "loss": 4.8818, - "step": 23246 - }, - { - "epoch": 12.123598435462842, - "grad_norm": 1.4472476243972778, - "learning_rate": 7.71467336683417e-05, - "loss": 6.0063, - "step": 23247 - }, - { - "epoch": 12.124119947848762, - "grad_norm": 1.4048186540603638, - "learning_rate": 7.714572864321608e-05, - "loss": 5.1808, - "step": 23248 - }, - { - "epoch": 12.124641460234681, - "grad_norm": 1.510714054107666, - "learning_rate": 7.714472361809045e-05, - "loss": 5.553, - "step": 23249 - }, - { - "epoch": 12.1251629726206, - "grad_norm": 1.4529436826705933, - "learning_rate": 7.714371859296483e-05, - "loss": 5.5134, - "step": 23250 - }, - { - "epoch": 12.125684485006518, - "grad_norm": 1.5888347625732422, - "learning_rate": 7.71427135678392e-05, - "loss": 5.0681, - "step": 23251 - }, - { - "epoch": 12.126205997392438, - "grad_norm": 1.5713202953338623, - "learning_rate": 7.714170854271358e-05, - "loss": 5.0697, - "step": 23252 - }, - { - "epoch": 12.126727509778357, - "grad_norm": 1.456058144569397, - "learning_rate": 7.714070351758795e-05, - "loss": 5.3732, - "step": 23253 - }, - { - "epoch": 12.127249022164277, - "grad_norm": 1.3542330265045166, - "learning_rate": 7.713969849246232e-05, - "loss": 5.4632, - "step": 23254 - }, - { - "epoch": 12.127770534550196, - "grad_norm": 1.4905760288238525, - "learning_rate": 7.713869346733669e-05, - "loss": 5.2779, - "step": 23255 - }, - { - "epoch": 12.128292046936116, - "grad_norm": 1.505298376083374, - "learning_rate": 7.713768844221105e-05, - "loss": 4.6976, - "step": 23256 - }, - { - "epoch": 12.128813559322033, - "grad_norm": 1.4407790899276733, - "learning_rate": 7.713668341708543e-05, - "loss": 5.0925, - "step": 23257 - }, - { - "epoch": 12.129335071707953, - "grad_norm": 1.5303047895431519, - "learning_rate": 7.71356783919598e-05, - "loss": 5.2716, - "step": 23258 - }, - { - "epoch": 12.129856584093872, - "grad_norm": 1.4792834520339966, - "learning_rate": 7.713467336683417e-05, - "loss": 5.2168, - "step": 23259 - }, - { - "epoch": 12.130378096479792, - "grad_norm": 1.4654704332351685, - "learning_rate": 7.713366834170854e-05, - "loss": 5.3315, - "step": 23260 - }, - { - "epoch": 12.130899608865711, - "grad_norm": 1.4507076740264893, - "learning_rate": 7.713266331658291e-05, - "loss": 5.5352, - "step": 23261 - }, - { - "epoch": 12.13142112125163, - "grad_norm": 1.5841705799102783, - "learning_rate": 7.713165829145729e-05, - "loss": 5.4442, - "step": 23262 - }, - { - "epoch": 12.131942633637548, - "grad_norm": 1.5021167993545532, - "learning_rate": 7.713065326633167e-05, - "loss": 5.5044, - "step": 23263 - }, - { - "epoch": 12.132464146023468, - "grad_norm": 1.4707633256912231, - "learning_rate": 7.712964824120603e-05, - "loss": 5.4177, - "step": 23264 - }, - { - "epoch": 12.132985658409387, - "grad_norm": 1.4848772287368774, - "learning_rate": 7.712864321608041e-05, - "loss": 5.1925, - "step": 23265 - }, - { - "epoch": 12.133507170795307, - "grad_norm": 1.356120228767395, - "learning_rate": 7.712763819095478e-05, - "loss": 5.7198, - "step": 23266 - }, - { - "epoch": 12.134028683181226, - "grad_norm": 1.424798846244812, - "learning_rate": 7.712663316582915e-05, - "loss": 5.5113, - "step": 23267 - }, - { - "epoch": 12.134550195567146, - "grad_norm": 1.4528850317001343, - "learning_rate": 7.712562814070352e-05, - "loss": 5.2151, - "step": 23268 - }, - { - "epoch": 12.135071707953063, - "grad_norm": 1.3560599088668823, - "learning_rate": 7.71246231155779e-05, - "loss": 5.5187, - "step": 23269 - }, - { - "epoch": 12.135593220338983, - "grad_norm": 1.4713330268859863, - "learning_rate": 7.712361809045226e-05, - "loss": 5.2389, - "step": 23270 - }, - { - "epoch": 12.136114732724902, - "grad_norm": 1.4393237829208374, - "learning_rate": 7.712261306532664e-05, - "loss": 5.1559, - "step": 23271 - }, - { - "epoch": 12.136636245110822, - "grad_norm": 1.3506579399108887, - "learning_rate": 7.712160804020102e-05, - "loss": 5.6588, - "step": 23272 - }, - { - "epoch": 12.137157757496741, - "grad_norm": 1.4694457054138184, - "learning_rate": 7.712060301507538e-05, - "loss": 5.572, - "step": 23273 - }, - { - "epoch": 12.13767926988266, - "grad_norm": 1.6166958808898926, - "learning_rate": 7.711959798994976e-05, - "loss": 4.7243, - "step": 23274 - }, - { - "epoch": 12.138200782268578, - "grad_norm": 1.451624870300293, - "learning_rate": 7.711859296482412e-05, - "loss": 5.3111, - "step": 23275 - }, - { - "epoch": 12.138722294654498, - "grad_norm": 1.3972625732421875, - "learning_rate": 7.71175879396985e-05, - "loss": 5.6035, - "step": 23276 - }, - { - "epoch": 12.139243807040417, - "grad_norm": 1.5093107223510742, - "learning_rate": 7.711658291457286e-05, - "loss": 5.2821, - "step": 23277 - }, - { - "epoch": 12.139765319426337, - "grad_norm": 1.4752861261367798, - "learning_rate": 7.711557788944724e-05, - "loss": 5.6475, - "step": 23278 - }, - { - "epoch": 12.140286831812256, - "grad_norm": 1.4938879013061523, - "learning_rate": 7.71145728643216e-05, - "loss": 5.5052, - "step": 23279 - }, - { - "epoch": 12.140808344198176, - "grad_norm": 1.5232247114181519, - "learning_rate": 7.711356783919598e-05, - "loss": 5.3171, - "step": 23280 - }, - { - "epoch": 12.141329856584093, - "grad_norm": 1.5013631582260132, - "learning_rate": 7.711256281407035e-05, - "loss": 5.5011, - "step": 23281 - }, - { - "epoch": 12.141851368970013, - "grad_norm": 1.537766695022583, - "learning_rate": 7.711155778894473e-05, - "loss": 5.4879, - "step": 23282 - }, - { - "epoch": 12.142372881355932, - "grad_norm": 1.5254372358322144, - "learning_rate": 7.71105527638191e-05, - "loss": 5.6191, - "step": 23283 - }, - { - "epoch": 12.142894393741852, - "grad_norm": 1.5185115337371826, - "learning_rate": 7.710954773869347e-05, - "loss": 5.3931, - "step": 23284 - }, - { - "epoch": 12.143415906127771, - "grad_norm": 1.497800350189209, - "learning_rate": 7.710854271356785e-05, - "loss": 5.3358, - "step": 23285 - }, - { - "epoch": 12.14393741851369, - "grad_norm": 1.5053385496139526, - "learning_rate": 7.710753768844221e-05, - "loss": 5.5635, - "step": 23286 - }, - { - "epoch": 12.144458930899608, - "grad_norm": 1.9365413188934326, - "learning_rate": 7.710653266331659e-05, - "loss": 5.2961, - "step": 23287 - }, - { - "epoch": 12.144980443285528, - "grad_norm": 1.5243555307388306, - "learning_rate": 7.710552763819095e-05, - "loss": 5.7222, - "step": 23288 - }, - { - "epoch": 12.145501955671447, - "grad_norm": 1.4120352268218994, - "learning_rate": 7.710452261306533e-05, - "loss": 5.7926, - "step": 23289 - }, - { - "epoch": 12.146023468057367, - "grad_norm": 1.4759808778762817, - "learning_rate": 7.71035175879397e-05, - "loss": 5.3006, - "step": 23290 - }, - { - "epoch": 12.146544980443286, - "grad_norm": 1.302177906036377, - "learning_rate": 7.710251256281407e-05, - "loss": 5.8449, - "step": 23291 - }, - { - "epoch": 12.147066492829204, - "grad_norm": 1.472990870475769, - "learning_rate": 7.710150753768845e-05, - "loss": 5.3852, - "step": 23292 - }, - { - "epoch": 12.147588005215123, - "grad_norm": 1.3896753787994385, - "learning_rate": 7.710050251256283e-05, - "loss": 5.4171, - "step": 23293 - }, - { - "epoch": 12.148109517601043, - "grad_norm": 1.5493955612182617, - "learning_rate": 7.709949748743719e-05, - "loss": 5.4883, - "step": 23294 - }, - { - "epoch": 12.148631029986962, - "grad_norm": 1.4997193813323975, - "learning_rate": 7.709849246231157e-05, - "loss": 5.2254, - "step": 23295 - }, - { - "epoch": 12.149152542372882, - "grad_norm": 1.6209088563919067, - "learning_rate": 7.709748743718593e-05, - "loss": 5.4005, - "step": 23296 - }, - { - "epoch": 12.149674054758801, - "grad_norm": 1.5070935487747192, - "learning_rate": 7.70964824120603e-05, - "loss": 5.3359, - "step": 23297 - }, - { - "epoch": 12.150195567144719, - "grad_norm": 1.5476658344268799, - "learning_rate": 7.709547738693468e-05, - "loss": 5.4895, - "step": 23298 - }, - { - "epoch": 12.150717079530638, - "grad_norm": 1.4580047130584717, - "learning_rate": 7.709447236180904e-05, - "loss": 5.4815, - "step": 23299 - }, - { - "epoch": 12.151238591916558, - "grad_norm": 1.4353770017623901, - "learning_rate": 7.709346733668342e-05, - "loss": 5.6993, - "step": 23300 - }, - { - "epoch": 12.151760104302477, - "grad_norm": 1.3868427276611328, - "learning_rate": 7.709246231155778e-05, - "loss": 5.4958, - "step": 23301 - }, - { - "epoch": 12.152281616688397, - "grad_norm": 1.3699768781661987, - "learning_rate": 7.709145728643216e-05, - "loss": 5.7146, - "step": 23302 - }, - { - "epoch": 12.152803129074316, - "grad_norm": 1.6336146593093872, - "learning_rate": 7.709045226130654e-05, - "loss": 5.2244, - "step": 23303 - }, - { - "epoch": 12.153324641460234, - "grad_norm": 1.5282599925994873, - "learning_rate": 7.708944723618092e-05, - "loss": 5.3147, - "step": 23304 - }, - { - "epoch": 12.153846153846153, - "grad_norm": 1.3613404035568237, - "learning_rate": 7.708844221105528e-05, - "loss": 5.3852, - "step": 23305 - }, - { - "epoch": 12.154367666232073, - "grad_norm": 1.5635039806365967, - "learning_rate": 7.708743718592966e-05, - "loss": 5.4757, - "step": 23306 - }, - { - "epoch": 12.154889178617992, - "grad_norm": 1.5353165864944458, - "learning_rate": 7.708643216080402e-05, - "loss": 5.3019, - "step": 23307 - }, - { - "epoch": 12.155410691003912, - "grad_norm": 1.4703949689865112, - "learning_rate": 7.70854271356784e-05, - "loss": 5.7228, - "step": 23308 - }, - { - "epoch": 12.155932203389831, - "grad_norm": 1.4320669174194336, - "learning_rate": 7.708442211055277e-05, - "loss": 5.4785, - "step": 23309 - }, - { - "epoch": 12.156453715775749, - "grad_norm": 1.3571455478668213, - "learning_rate": 7.708341708542713e-05, - "loss": 5.505, - "step": 23310 - }, - { - "epoch": 12.156975228161668, - "grad_norm": 1.6154870986938477, - "learning_rate": 7.708241206030151e-05, - "loss": 4.6275, - "step": 23311 - }, - { - "epoch": 12.157496740547588, - "grad_norm": 1.4904800653457642, - "learning_rate": 7.708140703517589e-05, - "loss": 4.9685, - "step": 23312 - }, - { - "epoch": 12.158018252933507, - "grad_norm": 1.6181299686431885, - "learning_rate": 7.708040201005026e-05, - "loss": 5.1812, - "step": 23313 - }, - { - "epoch": 12.158539765319427, - "grad_norm": 1.513921856880188, - "learning_rate": 7.707939698492463e-05, - "loss": 5.1277, - "step": 23314 - }, - { - "epoch": 12.159061277705346, - "grad_norm": 1.3423831462860107, - "learning_rate": 7.7078391959799e-05, - "loss": 6.0305, - "step": 23315 - }, - { - "epoch": 12.159582790091264, - "grad_norm": 1.4354511499404907, - "learning_rate": 7.707738693467337e-05, - "loss": 5.1426, - "step": 23316 - }, - { - "epoch": 12.160104302477183, - "grad_norm": 1.5266344547271729, - "learning_rate": 7.707638190954775e-05, - "loss": 4.8829, - "step": 23317 - }, - { - "epoch": 12.160625814863103, - "grad_norm": 1.5573450326919556, - "learning_rate": 7.707537688442211e-05, - "loss": 5.8431, - "step": 23318 - }, - { - "epoch": 12.161147327249022, - "grad_norm": 1.4455690383911133, - "learning_rate": 7.707437185929649e-05, - "loss": 5.3474, - "step": 23319 - }, - { - "epoch": 12.161668839634942, - "grad_norm": 1.4828075170516968, - "learning_rate": 7.707336683417085e-05, - "loss": 5.7715, - "step": 23320 - }, - { - "epoch": 12.162190352020861, - "grad_norm": 1.454125165939331, - "learning_rate": 7.707236180904523e-05, - "loss": 5.8353, - "step": 23321 - }, - { - "epoch": 12.162711864406779, - "grad_norm": 1.6863183975219727, - "learning_rate": 7.70713567839196e-05, - "loss": 5.0384, - "step": 23322 - }, - { - "epoch": 12.163233376792698, - "grad_norm": 1.481653094291687, - "learning_rate": 7.707035175879397e-05, - "loss": 5.3017, - "step": 23323 - }, - { - "epoch": 12.163754889178618, - "grad_norm": 1.5463052988052368, - "learning_rate": 7.706934673366835e-05, - "loss": 5.3041, - "step": 23324 - }, - { - "epoch": 12.164276401564537, - "grad_norm": 1.4869391918182373, - "learning_rate": 7.706834170854272e-05, - "loss": 5.3943, - "step": 23325 - }, - { - "epoch": 12.164797913950457, - "grad_norm": 1.5012106895446777, - "learning_rate": 7.70673366834171e-05, - "loss": 5.4998, - "step": 23326 - }, - { - "epoch": 12.165319426336376, - "grad_norm": 1.43594229221344, - "learning_rate": 7.706633165829146e-05, - "loss": 5.7417, - "step": 23327 - }, - { - "epoch": 12.165840938722294, - "grad_norm": 1.489554762840271, - "learning_rate": 7.706532663316584e-05, - "loss": 5.2817, - "step": 23328 - }, - { - "epoch": 12.166362451108213, - "grad_norm": 1.6523733139038086, - "learning_rate": 7.70643216080402e-05, - "loss": 5.1725, - "step": 23329 - }, - { - "epoch": 12.166883963494133, - "grad_norm": 1.5325548648834229, - "learning_rate": 7.706331658291458e-05, - "loss": 5.2822, - "step": 23330 - }, - { - "epoch": 12.167405475880052, - "grad_norm": 1.3747526407241821, - "learning_rate": 7.706231155778894e-05, - "loss": 5.493, - "step": 23331 - }, - { - "epoch": 12.167926988265972, - "grad_norm": 1.4299225807189941, - "learning_rate": 7.706130653266332e-05, - "loss": 5.2501, - "step": 23332 - }, - { - "epoch": 12.168448500651891, - "grad_norm": 1.4282739162445068, - "learning_rate": 7.70603015075377e-05, - "loss": 5.5737, - "step": 23333 - }, - { - "epoch": 12.168970013037809, - "grad_norm": 1.4036917686462402, - "learning_rate": 7.705929648241208e-05, - "loss": 5.6002, - "step": 23334 - }, - { - "epoch": 12.169491525423728, - "grad_norm": 1.515649437904358, - "learning_rate": 7.705829145728644e-05, - "loss": 5.3753, - "step": 23335 - }, - { - "epoch": 12.170013037809648, - "grad_norm": 1.4405518770217896, - "learning_rate": 7.70572864321608e-05, - "loss": 5.8509, - "step": 23336 - }, - { - "epoch": 12.170534550195567, - "grad_norm": 1.3676750659942627, - "learning_rate": 7.705628140703518e-05, - "loss": 5.3341, - "step": 23337 - }, - { - "epoch": 12.171056062581487, - "grad_norm": 1.3554993867874146, - "learning_rate": 7.705527638190955e-05, - "loss": 5.86, - "step": 23338 - }, - { - "epoch": 12.171577574967406, - "grad_norm": 1.556240200996399, - "learning_rate": 7.705427135678392e-05, - "loss": 5.322, - "step": 23339 - }, - { - "epoch": 12.172099087353324, - "grad_norm": 1.4049954414367676, - "learning_rate": 7.705326633165829e-05, - "loss": 5.263, - "step": 23340 - }, - { - "epoch": 12.172620599739243, - "grad_norm": 1.468074083328247, - "learning_rate": 7.705226130653267e-05, - "loss": 5.0556, - "step": 23341 - }, - { - "epoch": 12.173142112125163, - "grad_norm": 1.3800314664840698, - "learning_rate": 7.705125628140703e-05, - "loss": 5.6102, - "step": 23342 - }, - { - "epoch": 12.173663624511082, - "grad_norm": 1.5552878379821777, - "learning_rate": 7.705025125628141e-05, - "loss": 5.5784, - "step": 23343 - }, - { - "epoch": 12.174185136897002, - "grad_norm": 1.5406370162963867, - "learning_rate": 7.704924623115579e-05, - "loss": 5.3496, - "step": 23344 - }, - { - "epoch": 12.174706649282921, - "grad_norm": 1.516253113746643, - "learning_rate": 7.704824120603016e-05, - "loss": 5.2539, - "step": 23345 - }, - { - "epoch": 12.175228161668839, - "grad_norm": 1.4414924383163452, - "learning_rate": 7.704723618090453e-05, - "loss": 5.5794, - "step": 23346 - }, - { - "epoch": 12.175749674054758, - "grad_norm": 1.510703682899475, - "learning_rate": 7.70462311557789e-05, - "loss": 5.3524, - "step": 23347 - }, - { - "epoch": 12.176271186440678, - "grad_norm": 1.4159233570098877, - "learning_rate": 7.704522613065327e-05, - "loss": 5.7146, - "step": 23348 - }, - { - "epoch": 12.176792698826597, - "grad_norm": 1.9038708209991455, - "learning_rate": 7.704422110552763e-05, - "loss": 5.2348, - "step": 23349 - }, - { - "epoch": 12.177314211212517, - "grad_norm": 1.6102526187896729, - "learning_rate": 7.704321608040201e-05, - "loss": 5.249, - "step": 23350 - }, - { - "epoch": 12.177835723598436, - "grad_norm": 1.4696005582809448, - "learning_rate": 7.704221105527638e-05, - "loss": 5.845, - "step": 23351 - }, - { - "epoch": 12.178357235984354, - "grad_norm": 1.581902265548706, - "learning_rate": 7.704120603015075e-05, - "loss": 5.2773, - "step": 23352 - }, - { - "epoch": 12.178878748370273, - "grad_norm": 1.41827392578125, - "learning_rate": 7.704020100502513e-05, - "loss": 5.6197, - "step": 23353 - }, - { - "epoch": 12.179400260756193, - "grad_norm": 1.4019994735717773, - "learning_rate": 7.703919597989951e-05, - "loss": 5.298, - "step": 23354 - }, - { - "epoch": 12.179921773142112, - "grad_norm": 1.4796041250228882, - "learning_rate": 7.703819095477387e-05, - "loss": 5.8274, - "step": 23355 - }, - { - "epoch": 12.180443285528032, - "grad_norm": 1.5686041116714478, - "learning_rate": 7.703718592964825e-05, - "loss": 5.4601, - "step": 23356 - }, - { - "epoch": 12.180964797913951, - "grad_norm": 1.4996767044067383, - "learning_rate": 7.703618090452262e-05, - "loss": 5.13, - "step": 23357 - }, - { - "epoch": 12.181486310299869, - "grad_norm": 1.429119348526001, - "learning_rate": 7.7035175879397e-05, - "loss": 5.5594, - "step": 23358 - }, - { - "epoch": 12.182007822685788, - "grad_norm": 1.4767425060272217, - "learning_rate": 7.703417085427136e-05, - "loss": 5.4482, - "step": 23359 - }, - { - "epoch": 12.182529335071708, - "grad_norm": 1.342010736465454, - "learning_rate": 7.703316582914574e-05, - "loss": 4.8981, - "step": 23360 - }, - { - "epoch": 12.183050847457627, - "grad_norm": 1.6532597541809082, - "learning_rate": 7.70321608040201e-05, - "loss": 4.9596, - "step": 23361 - }, - { - "epoch": 12.183572359843547, - "grad_norm": 1.296413779258728, - "learning_rate": 7.703115577889448e-05, - "loss": 5.7844, - "step": 23362 - }, - { - "epoch": 12.184093872229466, - "grad_norm": 1.324422836303711, - "learning_rate": 7.703015075376884e-05, - "loss": 4.78, - "step": 23363 - }, - { - "epoch": 12.184615384615384, - "grad_norm": 1.5211480855941772, - "learning_rate": 7.702914572864322e-05, - "loss": 5.3522, - "step": 23364 - }, - { - "epoch": 12.185136897001303, - "grad_norm": 1.5446710586547852, - "learning_rate": 7.70281407035176e-05, - "loss": 4.8354, - "step": 23365 - }, - { - "epoch": 12.185658409387223, - "grad_norm": 1.4676069021224976, - "learning_rate": 7.702713567839196e-05, - "loss": 5.8365, - "step": 23366 - }, - { - "epoch": 12.186179921773142, - "grad_norm": 1.5342400074005127, - "learning_rate": 7.702613065326634e-05, - "loss": 5.7587, - "step": 23367 - }, - { - "epoch": 12.186701434159062, - "grad_norm": 1.5539108514785767, - "learning_rate": 7.70251256281407e-05, - "loss": 5.2583, - "step": 23368 - }, - { - "epoch": 12.187222946544981, - "grad_norm": 1.3007473945617676, - "learning_rate": 7.702412060301508e-05, - "loss": 5.6048, - "step": 23369 - }, - { - "epoch": 12.187744458930899, - "grad_norm": 1.3909543752670288, - "learning_rate": 7.702311557788945e-05, - "loss": 5.616, - "step": 23370 - }, - { - "epoch": 12.188265971316818, - "grad_norm": 1.464339017868042, - "learning_rate": 7.702211055276382e-05, - "loss": 5.1818, - "step": 23371 - }, - { - "epoch": 12.188787483702738, - "grad_norm": 1.5235601663589478, - "learning_rate": 7.702110552763819e-05, - "loss": 5.4435, - "step": 23372 - }, - { - "epoch": 12.189308996088657, - "grad_norm": 1.445291519165039, - "learning_rate": 7.702010050251257e-05, - "loss": 5.6791, - "step": 23373 - }, - { - "epoch": 12.189830508474577, - "grad_norm": 1.5655055046081543, - "learning_rate": 7.701909547738693e-05, - "loss": 5.3876, - "step": 23374 - }, - { - "epoch": 12.190352020860496, - "grad_norm": 1.5699520111083984, - "learning_rate": 7.701809045226131e-05, - "loss": 5.5915, - "step": 23375 - }, - { - "epoch": 12.190873533246414, - "grad_norm": 1.473563551902771, - "learning_rate": 7.701708542713569e-05, - "loss": 5.4007, - "step": 23376 - }, - { - "epoch": 12.191395045632333, - "grad_norm": 1.3971940279006958, - "learning_rate": 7.701608040201005e-05, - "loss": 4.644, - "step": 23377 - }, - { - "epoch": 12.191916558018253, - "grad_norm": 1.4787750244140625, - "learning_rate": 7.701507537688443e-05, - "loss": 5.8785, - "step": 23378 - }, - { - "epoch": 12.192438070404172, - "grad_norm": 1.405579924583435, - "learning_rate": 7.701407035175879e-05, - "loss": 5.2865, - "step": 23379 - }, - { - "epoch": 12.192959582790092, - "grad_norm": 1.448868751525879, - "learning_rate": 7.701306532663317e-05, - "loss": 5.1594, - "step": 23380 - }, - { - "epoch": 12.193481095176011, - "grad_norm": 1.5433615446090698, - "learning_rate": 7.701206030150754e-05, - "loss": 4.8572, - "step": 23381 - }, - { - "epoch": 12.194002607561929, - "grad_norm": 1.5075712203979492, - "learning_rate": 7.701105527638191e-05, - "loss": 5.1814, - "step": 23382 - }, - { - "epoch": 12.194524119947848, - "grad_norm": 1.482069730758667, - "learning_rate": 7.701005025125628e-05, - "loss": 5.4948, - "step": 23383 - }, - { - "epoch": 12.195045632333768, - "grad_norm": 1.4244662523269653, - "learning_rate": 7.700904522613066e-05, - "loss": 4.959, - "step": 23384 - }, - { - "epoch": 12.195567144719687, - "grad_norm": 1.4528125524520874, - "learning_rate": 7.700804020100503e-05, - "loss": 5.5357, - "step": 23385 - }, - { - "epoch": 12.196088657105607, - "grad_norm": 1.3969098329544067, - "learning_rate": 7.700703517587941e-05, - "loss": 5.7862, - "step": 23386 - }, - { - "epoch": 12.196610169491525, - "grad_norm": 1.494681477546692, - "learning_rate": 7.700603015075378e-05, - "loss": 5.4779, - "step": 23387 - }, - { - "epoch": 12.197131681877444, - "grad_norm": 1.4752023220062256, - "learning_rate": 7.700502512562815e-05, - "loss": 5.5634, - "step": 23388 - }, - { - "epoch": 12.197653194263363, - "grad_norm": 1.5467218160629272, - "learning_rate": 7.700402010050252e-05, - "loss": 5.2846, - "step": 23389 - }, - { - "epoch": 12.198174706649283, - "grad_norm": 1.444936990737915, - "learning_rate": 7.700301507537688e-05, - "loss": 5.469, - "step": 23390 - }, - { - "epoch": 12.198696219035202, - "grad_norm": 1.4908379316329956, - "learning_rate": 7.700201005025126e-05, - "loss": 4.9276, - "step": 23391 - }, - { - "epoch": 12.199217731421122, - "grad_norm": 1.6406298875808716, - "learning_rate": 7.700100502512562e-05, - "loss": 5.255, - "step": 23392 - }, - { - "epoch": 12.19973924380704, - "grad_norm": 1.3495728969573975, - "learning_rate": 7.7e-05, - "loss": 5.6502, - "step": 23393 - }, - { - "epoch": 12.200260756192959, - "grad_norm": 1.4650436639785767, - "learning_rate": 7.699899497487437e-05, - "loss": 5.6506, - "step": 23394 - }, - { - "epoch": 12.200782268578878, - "grad_norm": 1.6269320249557495, - "learning_rate": 7.699798994974874e-05, - "loss": 5.2085, - "step": 23395 - }, - { - "epoch": 12.201303780964798, - "grad_norm": 1.5121986865997314, - "learning_rate": 7.699698492462312e-05, - "loss": 5.4597, - "step": 23396 - }, - { - "epoch": 12.201825293350717, - "grad_norm": 1.4394415616989136, - "learning_rate": 7.69959798994975e-05, - "loss": 5.7235, - "step": 23397 - }, - { - "epoch": 12.202346805736637, - "grad_norm": 1.3976236581802368, - "learning_rate": 7.699497487437186e-05, - "loss": 5.5565, - "step": 23398 - }, - { - "epoch": 12.202868318122555, - "grad_norm": 1.4677674770355225, - "learning_rate": 7.699396984924624e-05, - "loss": 5.6801, - "step": 23399 - }, - { - "epoch": 12.203389830508474, - "grad_norm": 1.4842697381973267, - "learning_rate": 7.69929648241206e-05, - "loss": 5.5434, - "step": 23400 - }, - { - "epoch": 12.203911342894393, - "grad_norm": 1.4093700647354126, - "learning_rate": 7.699195979899498e-05, - "loss": 5.5177, - "step": 23401 - }, - { - "epoch": 12.204432855280313, - "grad_norm": 1.4265549182891846, - "learning_rate": 7.699095477386935e-05, - "loss": 5.3644, - "step": 23402 - }, - { - "epoch": 12.204954367666232, - "grad_norm": 1.3934346437454224, - "learning_rate": 7.698994974874371e-05, - "loss": 5.7197, - "step": 23403 - }, - { - "epoch": 12.205475880052152, - "grad_norm": 1.568651556968689, - "learning_rate": 7.698894472361809e-05, - "loss": 5.3019, - "step": 23404 - }, - { - "epoch": 12.20599739243807, - "grad_norm": 1.4075555801391602, - "learning_rate": 7.698793969849247e-05, - "loss": 5.6702, - "step": 23405 - }, - { - "epoch": 12.206518904823989, - "grad_norm": 1.6549304723739624, - "learning_rate": 7.698693467336685e-05, - "loss": 5.516, - "step": 23406 - }, - { - "epoch": 12.207040417209909, - "grad_norm": 1.488179087638855, - "learning_rate": 7.698592964824121e-05, - "loss": 5.2979, - "step": 23407 - }, - { - "epoch": 12.207561929595828, - "grad_norm": 1.3907779455184937, - "learning_rate": 7.698492462311559e-05, - "loss": 5.5283, - "step": 23408 - }, - { - "epoch": 12.208083441981747, - "grad_norm": 1.348798155784607, - "learning_rate": 7.698391959798995e-05, - "loss": 5.9286, - "step": 23409 - }, - { - "epoch": 12.208604954367667, - "grad_norm": 1.4377814531326294, - "learning_rate": 7.698291457286433e-05, - "loss": 5.5903, - "step": 23410 - }, - { - "epoch": 12.209126466753585, - "grad_norm": 1.461807131767273, - "learning_rate": 7.69819095477387e-05, - "loss": 5.2634, - "step": 23411 - }, - { - "epoch": 12.209647979139504, - "grad_norm": 1.4917467832565308, - "learning_rate": 7.698090452261307e-05, - "loss": 5.2107, - "step": 23412 - }, - { - "epoch": 12.210169491525424, - "grad_norm": 1.4868557453155518, - "learning_rate": 7.697989949748744e-05, - "loss": 5.4532, - "step": 23413 - }, - { - "epoch": 12.210691003911343, - "grad_norm": 1.475351333618164, - "learning_rate": 7.697889447236181e-05, - "loss": 5.3339, - "step": 23414 - }, - { - "epoch": 12.211212516297262, - "grad_norm": 1.4658844470977783, - "learning_rate": 7.697788944723618e-05, - "loss": 4.8308, - "step": 23415 - }, - { - "epoch": 12.211734028683182, - "grad_norm": 1.484336495399475, - "learning_rate": 7.697688442211056e-05, - "loss": 5.2462, - "step": 23416 - }, - { - "epoch": 12.2122555410691, - "grad_norm": 1.5299919843673706, - "learning_rate": 7.697587939698493e-05, - "loss": 4.8613, - "step": 23417 - }, - { - "epoch": 12.212777053455019, - "grad_norm": 1.369123935699463, - "learning_rate": 7.69748743718593e-05, - "loss": 5.9503, - "step": 23418 - }, - { - "epoch": 12.213298565840939, - "grad_norm": 1.3725521564483643, - "learning_rate": 7.697386934673368e-05, - "loss": 5.4528, - "step": 23419 - }, - { - "epoch": 12.213820078226858, - "grad_norm": 1.421111822128296, - "learning_rate": 7.697286432160804e-05, - "loss": 4.9316, - "step": 23420 - }, - { - "epoch": 12.214341590612777, - "grad_norm": 1.3240050077438354, - "learning_rate": 7.697185929648242e-05, - "loss": 5.8193, - "step": 23421 - }, - { - "epoch": 12.214863102998697, - "grad_norm": 1.3601304292678833, - "learning_rate": 7.697085427135678e-05, - "loss": 5.9338, - "step": 23422 - }, - { - "epoch": 12.215384615384615, - "grad_norm": 1.4981346130371094, - "learning_rate": 7.696984924623116e-05, - "loss": 4.8694, - "step": 23423 - }, - { - "epoch": 12.215906127770534, - "grad_norm": 1.5075773000717163, - "learning_rate": 7.696884422110552e-05, - "loss": 5.2662, - "step": 23424 - }, - { - "epoch": 12.216427640156454, - "grad_norm": 1.3956466913223267, - "learning_rate": 7.69678391959799e-05, - "loss": 5.5857, - "step": 23425 - }, - { - "epoch": 12.216949152542373, - "grad_norm": 1.441998839378357, - "learning_rate": 7.696683417085428e-05, - "loss": 5.5369, - "step": 23426 - }, - { - "epoch": 12.217470664928292, - "grad_norm": 1.4464836120605469, - "learning_rate": 7.696582914572866e-05, - "loss": 5.6208, - "step": 23427 - }, - { - "epoch": 12.217992177314212, - "grad_norm": 1.5827168226242065, - "learning_rate": 7.696482412060302e-05, - "loss": 5.0801, - "step": 23428 - }, - { - "epoch": 12.21851368970013, - "grad_norm": 1.496633529663086, - "learning_rate": 7.696381909547739e-05, - "loss": 5.4215, - "step": 23429 - }, - { - "epoch": 12.219035202086049, - "grad_norm": 1.3296139240264893, - "learning_rate": 7.696281407035176e-05, - "loss": 5.145, - "step": 23430 - }, - { - "epoch": 12.219556714471969, - "grad_norm": 1.5011311769485474, - "learning_rate": 7.696180904522613e-05, - "loss": 5.5977, - "step": 23431 - }, - { - "epoch": 12.220078226857888, - "grad_norm": 1.4469687938690186, - "learning_rate": 7.69608040201005e-05, - "loss": 5.6083, - "step": 23432 - }, - { - "epoch": 12.220599739243807, - "grad_norm": 1.5205358266830444, - "learning_rate": 7.695979899497487e-05, - "loss": 5.5453, - "step": 23433 - }, - { - "epoch": 12.221121251629727, - "grad_norm": 1.426040768623352, - "learning_rate": 7.695879396984925e-05, - "loss": 5.3442, - "step": 23434 - }, - { - "epoch": 12.221642764015645, - "grad_norm": 1.5034383535385132, - "learning_rate": 7.695778894472361e-05, - "loss": 5.8048, - "step": 23435 - }, - { - "epoch": 12.222164276401564, - "grad_norm": 1.3825082778930664, - "learning_rate": 7.695678391959799e-05, - "loss": 5.2168, - "step": 23436 - }, - { - "epoch": 12.222685788787484, - "grad_norm": 1.3344391584396362, - "learning_rate": 7.695577889447237e-05, - "loss": 5.268, - "step": 23437 - }, - { - "epoch": 12.223207301173403, - "grad_norm": 1.468492031097412, - "learning_rate": 7.695477386934675e-05, - "loss": 5.4971, - "step": 23438 - }, - { - "epoch": 12.223728813559323, - "grad_norm": 1.4639095067977905, - "learning_rate": 7.695376884422111e-05, - "loss": 5.2452, - "step": 23439 - }, - { - "epoch": 12.224250325945242, - "grad_norm": 1.5493566989898682, - "learning_rate": 7.695276381909549e-05, - "loss": 5.6298, - "step": 23440 - }, - { - "epoch": 12.22477183833116, - "grad_norm": 1.4565658569335938, - "learning_rate": 7.695175879396985e-05, - "loss": 5.1287, - "step": 23441 - }, - { - "epoch": 12.22529335071708, - "grad_norm": 1.4800050258636475, - "learning_rate": 7.695075376884422e-05, - "loss": 4.8522, - "step": 23442 - }, - { - "epoch": 12.225814863102999, - "grad_norm": 1.4877023696899414, - "learning_rate": 7.69497487437186e-05, - "loss": 5.3778, - "step": 23443 - }, - { - "epoch": 12.226336375488918, - "grad_norm": 1.5119552612304688, - "learning_rate": 7.694874371859296e-05, - "loss": 5.3344, - "step": 23444 - }, - { - "epoch": 12.226857887874838, - "grad_norm": 1.4463722705841064, - "learning_rate": 7.694773869346734e-05, - "loss": 5.4279, - "step": 23445 - }, - { - "epoch": 12.227379400260757, - "grad_norm": 1.481207251548767, - "learning_rate": 7.694673366834171e-05, - "loss": 5.2532, - "step": 23446 - }, - { - "epoch": 12.227900912646675, - "grad_norm": 1.5031452178955078, - "learning_rate": 7.694572864321609e-05, - "loss": 5.6102, - "step": 23447 - }, - { - "epoch": 12.228422425032594, - "grad_norm": 1.6356819868087769, - "learning_rate": 7.694472361809046e-05, - "loss": 4.9041, - "step": 23448 - }, - { - "epoch": 12.228943937418514, - "grad_norm": 1.5679041147232056, - "learning_rate": 7.694371859296483e-05, - "loss": 5.0112, - "step": 23449 - }, - { - "epoch": 12.229465449804433, - "grad_norm": 1.4716136455535889, - "learning_rate": 7.69427135678392e-05, - "loss": 5.269, - "step": 23450 - }, - { - "epoch": 12.229986962190353, - "grad_norm": 1.5324437618255615, - "learning_rate": 7.694170854271358e-05, - "loss": 5.0637, - "step": 23451 - }, - { - "epoch": 12.230508474576272, - "grad_norm": 1.453991413116455, - "learning_rate": 7.694070351758794e-05, - "loss": 5.2728, - "step": 23452 - }, - { - "epoch": 12.23102998696219, - "grad_norm": 1.5617198944091797, - "learning_rate": 7.693969849246232e-05, - "loss": 5.1903, - "step": 23453 - }, - { - "epoch": 12.23155149934811, - "grad_norm": 1.438713788986206, - "learning_rate": 7.693869346733668e-05, - "loss": 5.7281, - "step": 23454 - }, - { - "epoch": 12.232073011734029, - "grad_norm": 1.4736517667770386, - "learning_rate": 7.693768844221106e-05, - "loss": 4.8486, - "step": 23455 - }, - { - "epoch": 12.232594524119948, - "grad_norm": 1.4834736585617065, - "learning_rate": 7.693668341708543e-05, - "loss": 5.3221, - "step": 23456 - }, - { - "epoch": 12.233116036505868, - "grad_norm": 1.3648407459259033, - "learning_rate": 7.69356783919598e-05, - "loss": 5.3461, - "step": 23457 - }, - { - "epoch": 12.233637548891787, - "grad_norm": 1.6084610223770142, - "learning_rate": 7.693467336683418e-05, - "loss": 4.6965, - "step": 23458 - }, - { - "epoch": 12.234159061277705, - "grad_norm": 1.4772447347640991, - "learning_rate": 7.693366834170855e-05, - "loss": 5.559, - "step": 23459 - }, - { - "epoch": 12.234680573663624, - "grad_norm": 1.44584059715271, - "learning_rate": 7.693266331658292e-05, - "loss": 5.2308, - "step": 23460 - }, - { - "epoch": 12.235202086049544, - "grad_norm": 1.4519597291946411, - "learning_rate": 7.693165829145729e-05, - "loss": 5.5279, - "step": 23461 - }, - { - "epoch": 12.235723598435463, - "grad_norm": 1.4691693782806396, - "learning_rate": 7.693065326633166e-05, - "loss": 5.3888, - "step": 23462 - }, - { - "epoch": 12.236245110821383, - "grad_norm": 1.5871127843856812, - "learning_rate": 7.692964824120603e-05, - "loss": 4.7803, - "step": 23463 - }, - { - "epoch": 12.236766623207302, - "grad_norm": 1.4981111288070679, - "learning_rate": 7.692864321608041e-05, - "loss": 5.356, - "step": 23464 - }, - { - "epoch": 12.23728813559322, - "grad_norm": 1.7245451211929321, - "learning_rate": 7.692763819095477e-05, - "loss": 5.0893, - "step": 23465 - }, - { - "epoch": 12.23780964797914, - "grad_norm": 1.3875981569290161, - "learning_rate": 7.692663316582915e-05, - "loss": 4.9763, - "step": 23466 - }, - { - "epoch": 12.238331160365059, - "grad_norm": 1.4713716506958008, - "learning_rate": 7.692562814070353e-05, - "loss": 5.3436, - "step": 23467 - }, - { - "epoch": 12.238852672750978, - "grad_norm": 1.5183501243591309, - "learning_rate": 7.69246231155779e-05, - "loss": 5.5228, - "step": 23468 - }, - { - "epoch": 12.239374185136898, - "grad_norm": 1.4351000785827637, - "learning_rate": 7.692361809045227e-05, - "loss": 5.7282, - "step": 23469 - }, - { - "epoch": 12.239895697522817, - "grad_norm": 1.433763861656189, - "learning_rate": 7.692261306532663e-05, - "loss": 5.7184, - "step": 23470 - }, - { - "epoch": 12.240417209908735, - "grad_norm": 1.4274888038635254, - "learning_rate": 7.692160804020101e-05, - "loss": 5.4598, - "step": 23471 - }, - { - "epoch": 12.240938722294654, - "grad_norm": 1.3823484182357788, - "learning_rate": 7.692060301507538e-05, - "loss": 5.4318, - "step": 23472 - }, - { - "epoch": 12.241460234680574, - "grad_norm": 1.5556508302688599, - "learning_rate": 7.691959798994975e-05, - "loss": 5.2309, - "step": 23473 - }, - { - "epoch": 12.241981747066493, - "grad_norm": 1.4689873456954956, - "learning_rate": 7.691859296482412e-05, - "loss": 5.4114, - "step": 23474 - }, - { - "epoch": 12.242503259452413, - "grad_norm": 1.4011679887771606, - "learning_rate": 7.69175879396985e-05, - "loss": 5.5183, - "step": 23475 - }, - { - "epoch": 12.243024771838332, - "grad_norm": 1.3916503190994263, - "learning_rate": 7.691658291457286e-05, - "loss": 5.2072, - "step": 23476 - }, - { - "epoch": 12.24354628422425, - "grad_norm": 1.4306871891021729, - "learning_rate": 7.691557788944724e-05, - "loss": 5.4965, - "step": 23477 - }, - { - "epoch": 12.24406779661017, - "grad_norm": 1.5122649669647217, - "learning_rate": 7.691457286432162e-05, - "loss": 5.4916, - "step": 23478 - }, - { - "epoch": 12.244589308996089, - "grad_norm": 1.4805233478546143, - "learning_rate": 7.691356783919599e-05, - "loss": 5.254, - "step": 23479 - }, - { - "epoch": 12.245110821382008, - "grad_norm": 1.44832181930542, - "learning_rate": 7.691256281407036e-05, - "loss": 5.5046, - "step": 23480 - }, - { - "epoch": 12.245632333767928, - "grad_norm": 1.51243257522583, - "learning_rate": 7.691155778894474e-05, - "loss": 5.1793, - "step": 23481 - }, - { - "epoch": 12.246153846153845, - "grad_norm": 1.3970242738723755, - "learning_rate": 7.69105527638191e-05, - "loss": 5.5904, - "step": 23482 - }, - { - "epoch": 12.246675358539765, - "grad_norm": 1.5265660285949707, - "learning_rate": 7.690954773869346e-05, - "loss": 4.9084, - "step": 23483 - }, - { - "epoch": 12.247196870925684, - "grad_norm": 1.5538488626480103, - "learning_rate": 7.690854271356784e-05, - "loss": 5.0855, - "step": 23484 - }, - { - "epoch": 12.247718383311604, - "grad_norm": 1.465204119682312, - "learning_rate": 7.69075376884422e-05, - "loss": 5.4725, - "step": 23485 - }, - { - "epoch": 12.248239895697523, - "grad_norm": 1.537168025970459, - "learning_rate": 7.690653266331658e-05, - "loss": 5.3468, - "step": 23486 - }, - { - "epoch": 12.248761408083443, - "grad_norm": 1.479641079902649, - "learning_rate": 7.690552763819096e-05, - "loss": 5.4702, - "step": 23487 - }, - { - "epoch": 12.24928292046936, - "grad_norm": 1.4460856914520264, - "learning_rate": 7.690452261306534e-05, - "loss": 5.453, - "step": 23488 - }, - { - "epoch": 12.24980443285528, - "grad_norm": 1.4692192077636719, - "learning_rate": 7.69035175879397e-05, - "loss": 5.2307, - "step": 23489 - }, - { - "epoch": 12.2503259452412, - "grad_norm": 1.4893276691436768, - "learning_rate": 7.690251256281408e-05, - "loss": 5.6183, - "step": 23490 - }, - { - "epoch": 12.250847457627119, - "grad_norm": 1.4596339464187622, - "learning_rate": 7.690150753768845e-05, - "loss": 5.2109, - "step": 23491 - }, - { - "epoch": 12.251368970013038, - "grad_norm": 1.6318341493606567, - "learning_rate": 7.690050251256282e-05, - "loss": 5.3446, - "step": 23492 - }, - { - "epoch": 12.251890482398958, - "grad_norm": 1.4798396825790405, - "learning_rate": 7.689949748743719e-05, - "loss": 5.2297, - "step": 23493 - }, - { - "epoch": 12.252411994784875, - "grad_norm": 1.426254391670227, - "learning_rate": 7.689849246231157e-05, - "loss": 5.7886, - "step": 23494 - }, - { - "epoch": 12.252933507170795, - "grad_norm": 1.47239351272583, - "learning_rate": 7.689748743718593e-05, - "loss": 5.4894, - "step": 23495 - }, - { - "epoch": 12.253455019556714, - "grad_norm": 1.454176425933838, - "learning_rate": 7.68964824120603e-05, - "loss": 5.0353, - "step": 23496 - }, - { - "epoch": 12.253976531942634, - "grad_norm": 2.3065714836120605, - "learning_rate": 7.689547738693467e-05, - "loss": 4.9133, - "step": 23497 - }, - { - "epoch": 12.254498044328553, - "grad_norm": 1.3974833488464355, - "learning_rate": 7.689447236180905e-05, - "loss": 5.7384, - "step": 23498 - }, - { - "epoch": 12.255019556714473, - "grad_norm": 1.5268901586532593, - "learning_rate": 7.689346733668343e-05, - "loss": 5.543, - "step": 23499 - }, - { - "epoch": 12.25554106910039, - "grad_norm": 1.372110366821289, - "learning_rate": 7.689246231155779e-05, - "loss": 5.6629, - "step": 23500 - }, - { - "epoch": 12.25606258148631, - "grad_norm": 1.3891956806182861, - "learning_rate": 7.689145728643217e-05, - "loss": 5.3797, - "step": 23501 - }, - { - "epoch": 12.25658409387223, - "grad_norm": 1.5568557977676392, - "learning_rate": 7.689045226130653e-05, - "loss": 5.0984, - "step": 23502 - }, - { - "epoch": 12.257105606258149, - "grad_norm": 1.3741075992584229, - "learning_rate": 7.688944723618091e-05, - "loss": 5.3692, - "step": 23503 - }, - { - "epoch": 12.257627118644068, - "grad_norm": 1.4389379024505615, - "learning_rate": 7.688844221105528e-05, - "loss": 5.3149, - "step": 23504 - }, - { - "epoch": 12.258148631029988, - "grad_norm": 2.152184009552002, - "learning_rate": 7.688743718592965e-05, - "loss": 4.4842, - "step": 23505 - }, - { - "epoch": 12.258670143415905, - "grad_norm": 1.5198057889938354, - "learning_rate": 7.688643216080402e-05, - "loss": 5.5752, - "step": 23506 - }, - { - "epoch": 12.259191655801825, - "grad_norm": 1.4948941469192505, - "learning_rate": 7.68854271356784e-05, - "loss": 5.4062, - "step": 23507 - }, - { - "epoch": 12.259713168187744, - "grad_norm": 1.3873770236968994, - "learning_rate": 7.688442211055277e-05, - "loss": 5.3707, - "step": 23508 - }, - { - "epoch": 12.260234680573664, - "grad_norm": 1.433823585510254, - "learning_rate": 7.688341708542714e-05, - "loss": 5.4269, - "step": 23509 - }, - { - "epoch": 12.260756192959583, - "grad_norm": 1.5629554986953735, - "learning_rate": 7.688241206030152e-05, - "loss": 5.65, - "step": 23510 - }, - { - "epoch": 12.261277705345503, - "grad_norm": 1.6349718570709229, - "learning_rate": 7.688140703517588e-05, - "loss": 5.2877, - "step": 23511 - }, - { - "epoch": 12.26179921773142, - "grad_norm": 1.3161325454711914, - "learning_rate": 7.688040201005026e-05, - "loss": 5.655, - "step": 23512 - }, - { - "epoch": 12.26232073011734, - "grad_norm": 1.3688303232192993, - "learning_rate": 7.687939698492462e-05, - "loss": 5.6284, - "step": 23513 - }, - { - "epoch": 12.26284224250326, - "grad_norm": 1.4010018110275269, - "learning_rate": 7.6878391959799e-05, - "loss": 5.5411, - "step": 23514 - }, - { - "epoch": 12.263363754889179, - "grad_norm": 1.3352155685424805, - "learning_rate": 7.687738693467336e-05, - "loss": 5.6485, - "step": 23515 - }, - { - "epoch": 12.263885267275098, - "grad_norm": 1.4744479656219482, - "learning_rate": 7.687638190954774e-05, - "loss": 5.6441, - "step": 23516 - }, - { - "epoch": 12.264406779661018, - "grad_norm": 1.378240704536438, - "learning_rate": 7.68753768844221e-05, - "loss": 5.363, - "step": 23517 - }, - { - "epoch": 12.264928292046935, - "grad_norm": 1.3530656099319458, - "learning_rate": 7.687437185929648e-05, - "loss": 5.7812, - "step": 23518 - }, - { - "epoch": 12.265449804432855, - "grad_norm": 1.4664875268936157, - "learning_rate": 7.687336683417086e-05, - "loss": 5.3958, - "step": 23519 - }, - { - "epoch": 12.265971316818774, - "grad_norm": 1.5010510683059692, - "learning_rate": 7.687236180904524e-05, - "loss": 5.2494, - "step": 23520 - }, - { - "epoch": 12.266492829204694, - "grad_norm": 1.4408434629440308, - "learning_rate": 7.68713567839196e-05, - "loss": 5.5437, - "step": 23521 - }, - { - "epoch": 12.267014341590613, - "grad_norm": 1.484817385673523, - "learning_rate": 7.687035175879397e-05, - "loss": 5.4117, - "step": 23522 - }, - { - "epoch": 12.267535853976533, - "grad_norm": 1.4730479717254639, - "learning_rate": 7.686934673366835e-05, - "loss": 5.3054, - "step": 23523 - }, - { - "epoch": 12.26805736636245, - "grad_norm": 1.4410946369171143, - "learning_rate": 7.686834170854271e-05, - "loss": 5.5291, - "step": 23524 - }, - { - "epoch": 12.26857887874837, - "grad_norm": 1.5051729679107666, - "learning_rate": 7.686733668341709e-05, - "loss": 5.3207, - "step": 23525 - }, - { - "epoch": 12.26910039113429, - "grad_norm": 1.577158808708191, - "learning_rate": 7.686633165829145e-05, - "loss": 5.3924, - "step": 23526 - }, - { - "epoch": 12.269621903520209, - "grad_norm": 1.5518120527267456, - "learning_rate": 7.686532663316583e-05, - "loss": 5.5911, - "step": 23527 - }, - { - "epoch": 12.270143415906128, - "grad_norm": 1.4769539833068848, - "learning_rate": 7.68643216080402e-05, - "loss": 5.3826, - "step": 23528 - }, - { - "epoch": 12.270664928292048, - "grad_norm": 1.4242756366729736, - "learning_rate": 7.686331658291457e-05, - "loss": 5.7021, - "step": 23529 - }, - { - "epoch": 12.271186440677965, - "grad_norm": 1.481825828552246, - "learning_rate": 7.686231155778895e-05, - "loss": 5.3296, - "step": 23530 - }, - { - "epoch": 12.271707953063885, - "grad_norm": 1.4833170175552368, - "learning_rate": 7.686130653266333e-05, - "loss": 5.5658, - "step": 23531 - }, - { - "epoch": 12.272229465449804, - "grad_norm": 1.5606683492660522, - "learning_rate": 7.686030150753769e-05, - "loss": 5.0721, - "step": 23532 - }, - { - "epoch": 12.272750977835724, - "grad_norm": 1.4315614700317383, - "learning_rate": 7.685929648241207e-05, - "loss": 5.5472, - "step": 23533 - }, - { - "epoch": 12.273272490221643, - "grad_norm": 1.4477698802947998, - "learning_rate": 7.685829145728643e-05, - "loss": 5.5709, - "step": 23534 - }, - { - "epoch": 12.273794002607563, - "grad_norm": 1.38666832447052, - "learning_rate": 7.68572864321608e-05, - "loss": 5.7203, - "step": 23535 - }, - { - "epoch": 12.27431551499348, - "grad_norm": 1.5146559476852417, - "learning_rate": 7.685628140703518e-05, - "loss": 5.414, - "step": 23536 - }, - { - "epoch": 12.2748370273794, - "grad_norm": 1.918952226638794, - "learning_rate": 7.685527638190954e-05, - "loss": 4.9108, - "step": 23537 - }, - { - "epoch": 12.27535853976532, - "grad_norm": 1.471876859664917, - "learning_rate": 7.685427135678392e-05, - "loss": 5.6064, - "step": 23538 - }, - { - "epoch": 12.275880052151239, - "grad_norm": 1.584214448928833, - "learning_rate": 7.68532663316583e-05, - "loss": 5.2214, - "step": 23539 - }, - { - "epoch": 12.276401564537158, - "grad_norm": 1.529974102973938, - "learning_rate": 7.685226130653267e-05, - "loss": 5.2193, - "step": 23540 - }, - { - "epoch": 12.276923076923078, - "grad_norm": 1.5200854539871216, - "learning_rate": 7.685125628140704e-05, - "loss": 5.5041, - "step": 23541 - }, - { - "epoch": 12.277444589308995, - "grad_norm": 1.4809788465499878, - "learning_rate": 7.685025125628142e-05, - "loss": 5.5802, - "step": 23542 - }, - { - "epoch": 12.277966101694915, - "grad_norm": 1.4301517009735107, - "learning_rate": 7.684924623115578e-05, - "loss": 5.4782, - "step": 23543 - }, - { - "epoch": 12.278487614080834, - "grad_norm": 1.4741039276123047, - "learning_rate": 7.684824120603016e-05, - "loss": 5.3981, - "step": 23544 - }, - { - "epoch": 12.279009126466754, - "grad_norm": 1.5055347681045532, - "learning_rate": 7.684723618090452e-05, - "loss": 5.5865, - "step": 23545 - }, - { - "epoch": 12.279530638852673, - "grad_norm": 1.4368410110473633, - "learning_rate": 7.68462311557789e-05, - "loss": 4.9875, - "step": 23546 - }, - { - "epoch": 12.280052151238593, - "grad_norm": 1.403823971748352, - "learning_rate": 7.684522613065327e-05, - "loss": 5.2696, - "step": 23547 - }, - { - "epoch": 12.28057366362451, - "grad_norm": 1.4264287948608398, - "learning_rate": 7.684422110552764e-05, - "loss": 5.3022, - "step": 23548 - }, - { - "epoch": 12.28109517601043, - "grad_norm": 1.3024344444274902, - "learning_rate": 7.684321608040201e-05, - "loss": 5.0571, - "step": 23549 - }, - { - "epoch": 12.28161668839635, - "grad_norm": 1.435333490371704, - "learning_rate": 7.684221105527639e-05, - "loss": 5.0737, - "step": 23550 - }, - { - "epoch": 12.282138200782269, - "grad_norm": 1.5997816324234009, - "learning_rate": 7.684120603015076e-05, - "loss": 5.4165, - "step": 23551 - }, - { - "epoch": 12.282659713168188, - "grad_norm": 1.406262755393982, - "learning_rate": 7.684020100502513e-05, - "loss": 5.4008, - "step": 23552 - }, - { - "epoch": 12.283181225554108, - "grad_norm": 1.5628767013549805, - "learning_rate": 7.68391959798995e-05, - "loss": 5.0606, - "step": 23553 - }, - { - "epoch": 12.283702737940025, - "grad_norm": 1.5092662572860718, - "learning_rate": 7.683819095477387e-05, - "loss": 5.1271, - "step": 23554 - }, - { - "epoch": 12.284224250325945, - "grad_norm": 1.5450851917266846, - "learning_rate": 7.683718592964825e-05, - "loss": 5.5982, - "step": 23555 - }, - { - "epoch": 12.284745762711864, - "grad_norm": 1.4596476554870605, - "learning_rate": 7.683618090452261e-05, - "loss": 5.4758, - "step": 23556 - }, - { - "epoch": 12.285267275097784, - "grad_norm": 1.4630751609802246, - "learning_rate": 7.683517587939699e-05, - "loss": 5.0783, - "step": 23557 - }, - { - "epoch": 12.285788787483703, - "grad_norm": 1.401288628578186, - "learning_rate": 7.683417085427135e-05, - "loss": 5.7694, - "step": 23558 - }, - { - "epoch": 12.286310299869623, - "grad_norm": 1.6064871549606323, - "learning_rate": 7.683316582914573e-05, - "loss": 5.1078, - "step": 23559 - }, - { - "epoch": 12.28683181225554, - "grad_norm": 1.4648468494415283, - "learning_rate": 7.683216080402011e-05, - "loss": 5.64, - "step": 23560 - }, - { - "epoch": 12.28735332464146, - "grad_norm": 1.391213059425354, - "learning_rate": 7.683115577889449e-05, - "loss": 5.8249, - "step": 23561 - }, - { - "epoch": 12.28787483702738, - "grad_norm": 1.3655915260314941, - "learning_rate": 7.683015075376885e-05, - "loss": 5.8063, - "step": 23562 - }, - { - "epoch": 12.288396349413299, - "grad_norm": 1.4405900239944458, - "learning_rate": 7.682914572864322e-05, - "loss": 5.7258, - "step": 23563 - }, - { - "epoch": 12.288917861799218, - "grad_norm": 1.5103713274002075, - "learning_rate": 7.68281407035176e-05, - "loss": 5.0084, - "step": 23564 - }, - { - "epoch": 12.289439374185136, - "grad_norm": 1.5414390563964844, - "learning_rate": 7.682713567839196e-05, - "loss": 5.2937, - "step": 23565 - }, - { - "epoch": 12.289960886571055, - "grad_norm": 1.604797124862671, - "learning_rate": 7.682613065326634e-05, - "loss": 5.2154, - "step": 23566 - }, - { - "epoch": 12.290482398956975, - "grad_norm": 1.4621418714523315, - "learning_rate": 7.68251256281407e-05, - "loss": 5.7437, - "step": 23567 - }, - { - "epoch": 12.291003911342894, - "grad_norm": 1.395200252532959, - "learning_rate": 7.682412060301508e-05, - "loss": 5.5055, - "step": 23568 - }, - { - "epoch": 12.291525423728814, - "grad_norm": 1.6037142276763916, - "learning_rate": 7.682311557788944e-05, - "loss": 4.1766, - "step": 23569 - }, - { - "epoch": 12.292046936114733, - "grad_norm": 1.401550531387329, - "learning_rate": 7.682211055276382e-05, - "loss": 5.5729, - "step": 23570 - }, - { - "epoch": 12.292568448500653, - "grad_norm": 1.5912796258926392, - "learning_rate": 7.68211055276382e-05, - "loss": 5.7182, - "step": 23571 - }, - { - "epoch": 12.29308996088657, - "grad_norm": 1.4828217029571533, - "learning_rate": 7.682010050251258e-05, - "loss": 5.2549, - "step": 23572 - }, - { - "epoch": 12.29361147327249, - "grad_norm": 1.4439067840576172, - "learning_rate": 7.681909547738694e-05, - "loss": 5.217, - "step": 23573 - }, - { - "epoch": 12.29413298565841, - "grad_norm": 1.5239394903182983, - "learning_rate": 7.681809045226132e-05, - "loss": 5.278, - "step": 23574 - }, - { - "epoch": 12.294654498044329, - "grad_norm": 1.4097518920898438, - "learning_rate": 7.681708542713568e-05, - "loss": 5.4917, - "step": 23575 - }, - { - "epoch": 12.295176010430248, - "grad_norm": 1.5155402421951294, - "learning_rate": 7.681608040201005e-05, - "loss": 5.1568, - "step": 23576 - }, - { - "epoch": 12.295697522816166, - "grad_norm": 1.4986567497253418, - "learning_rate": 7.681507537688442e-05, - "loss": 5.3506, - "step": 23577 - }, - { - "epoch": 12.296219035202085, - "grad_norm": 1.5848749876022339, - "learning_rate": 7.681407035175879e-05, - "loss": 5.1226, - "step": 23578 - }, - { - "epoch": 12.296740547588005, - "grad_norm": 1.5412745475769043, - "learning_rate": 7.681306532663317e-05, - "loss": 5.5389, - "step": 23579 - }, - { - "epoch": 12.297262059973924, - "grad_norm": 1.4789113998413086, - "learning_rate": 7.681206030150754e-05, - "loss": 5.548, - "step": 23580 - }, - { - "epoch": 12.297783572359844, - "grad_norm": 1.4641568660736084, - "learning_rate": 7.681105527638192e-05, - "loss": 5.4423, - "step": 23581 - }, - { - "epoch": 12.298305084745763, - "grad_norm": 1.5310620069503784, - "learning_rate": 7.681005025125629e-05, - "loss": 5.0952, - "step": 23582 - }, - { - "epoch": 12.298826597131681, - "grad_norm": 1.4498887062072754, - "learning_rate": 7.680904522613066e-05, - "loss": 5.4163, - "step": 23583 - }, - { - "epoch": 12.2993481095176, - "grad_norm": 1.4151877164840698, - "learning_rate": 7.680804020100503e-05, - "loss": 5.3434, - "step": 23584 - }, - { - "epoch": 12.29986962190352, - "grad_norm": 1.7020518779754639, - "learning_rate": 7.68070351758794e-05, - "loss": 4.9903, - "step": 23585 - }, - { - "epoch": 12.30039113428944, - "grad_norm": 1.353913426399231, - "learning_rate": 7.680603015075377e-05, - "loss": 5.7409, - "step": 23586 - }, - { - "epoch": 12.300912646675359, - "grad_norm": 1.5250664949417114, - "learning_rate": 7.680502512562815e-05, - "loss": 5.5964, - "step": 23587 - }, - { - "epoch": 12.301434159061278, - "grad_norm": 1.6111100912094116, - "learning_rate": 7.680402010050251e-05, - "loss": 5.1991, - "step": 23588 - }, - { - "epoch": 12.301955671447196, - "grad_norm": 1.4251304864883423, - "learning_rate": 7.680301507537688e-05, - "loss": 5.785, - "step": 23589 - }, - { - "epoch": 12.302477183833116, - "grad_norm": 1.349582314491272, - "learning_rate": 7.680201005025125e-05, - "loss": 5.5351, - "step": 23590 - }, - { - "epoch": 12.302998696219035, - "grad_norm": 1.5072436332702637, - "learning_rate": 7.680100502512563e-05, - "loss": 5.3655, - "step": 23591 - }, - { - "epoch": 12.303520208604954, - "grad_norm": 1.381162405014038, - "learning_rate": 7.680000000000001e-05, - "loss": 5.1491, - "step": 23592 - }, - { - "epoch": 12.304041720990874, - "grad_norm": 1.5225462913513184, - "learning_rate": 7.679899497487437e-05, - "loss": 4.7669, - "step": 23593 - }, - { - "epoch": 12.304563233376793, - "grad_norm": 1.499366044998169, - "learning_rate": 7.679798994974875e-05, - "loss": 5.52, - "step": 23594 - }, - { - "epoch": 12.305084745762711, - "grad_norm": 1.4931479692459106, - "learning_rate": 7.679698492462312e-05, - "loss": 5.3771, - "step": 23595 - }, - { - "epoch": 12.30560625814863, - "grad_norm": 1.4286094903945923, - "learning_rate": 7.67959798994975e-05, - "loss": 5.3746, - "step": 23596 - }, - { - "epoch": 12.30612777053455, - "grad_norm": 1.4306505918502808, - "learning_rate": 7.679497487437186e-05, - "loss": 5.3166, - "step": 23597 - }, - { - "epoch": 12.30664928292047, - "grad_norm": 1.5476855039596558, - "learning_rate": 7.679396984924624e-05, - "loss": 4.767, - "step": 23598 - }, - { - "epoch": 12.307170795306389, - "grad_norm": 1.4481042623519897, - "learning_rate": 7.67929648241206e-05, - "loss": 5.6851, - "step": 23599 - }, - { - "epoch": 12.307692307692308, - "grad_norm": 1.441007137298584, - "learning_rate": 7.679195979899498e-05, - "loss": 5.7031, - "step": 23600 - }, - { - "epoch": 12.308213820078226, - "grad_norm": 1.6182913780212402, - "learning_rate": 7.679095477386936e-05, - "loss": 4.9624, - "step": 23601 - }, - { - "epoch": 12.308735332464146, - "grad_norm": 1.4677131175994873, - "learning_rate": 7.678994974874372e-05, - "loss": 5.6574, - "step": 23602 - }, - { - "epoch": 12.309256844850065, - "grad_norm": 1.4375437498092651, - "learning_rate": 7.67889447236181e-05, - "loss": 5.3312, - "step": 23603 - }, - { - "epoch": 12.309778357235984, - "grad_norm": 1.4641664028167725, - "learning_rate": 7.678793969849246e-05, - "loss": 5.3092, - "step": 23604 - }, - { - "epoch": 12.310299869621904, - "grad_norm": 1.4859267473220825, - "learning_rate": 7.678693467336684e-05, - "loss": 5.4255, - "step": 23605 - }, - { - "epoch": 12.310821382007823, - "grad_norm": 1.4179842472076416, - "learning_rate": 7.67859296482412e-05, - "loss": 5.5159, - "step": 23606 - }, - { - "epoch": 12.311342894393741, - "grad_norm": 1.450583815574646, - "learning_rate": 7.678492462311558e-05, - "loss": 5.0103, - "step": 23607 - }, - { - "epoch": 12.31186440677966, - "grad_norm": 1.4421262741088867, - "learning_rate": 7.678391959798995e-05, - "loss": 5.4183, - "step": 23608 - }, - { - "epoch": 12.31238591916558, - "grad_norm": 1.7716171741485596, - "learning_rate": 7.678291457286432e-05, - "loss": 4.7523, - "step": 23609 - }, - { - "epoch": 12.3129074315515, - "grad_norm": 1.4396649599075317, - "learning_rate": 7.678190954773869e-05, - "loss": 5.5994, - "step": 23610 - }, - { - "epoch": 12.313428943937419, - "grad_norm": 1.5082061290740967, - "learning_rate": 7.678090452261307e-05, - "loss": 5.659, - "step": 23611 - }, - { - "epoch": 12.313950456323338, - "grad_norm": 1.4844192266464233, - "learning_rate": 7.677989949748744e-05, - "loss": 5.1157, - "step": 23612 - }, - { - "epoch": 12.314471968709256, - "grad_norm": 1.3942553997039795, - "learning_rate": 7.677889447236182e-05, - "loss": 4.3651, - "step": 23613 - }, - { - "epoch": 12.314993481095176, - "grad_norm": 1.4373472929000854, - "learning_rate": 7.677788944723619e-05, - "loss": 5.6214, - "step": 23614 - }, - { - "epoch": 12.315514993481095, - "grad_norm": 1.4922218322753906, - "learning_rate": 7.677688442211055e-05, - "loss": 5.4275, - "step": 23615 - }, - { - "epoch": 12.316036505867014, - "grad_norm": 1.519867181777954, - "learning_rate": 7.677587939698493e-05, - "loss": 5.3819, - "step": 23616 - }, - { - "epoch": 12.316558018252934, - "grad_norm": 1.5627973079681396, - "learning_rate": 7.677487437185929e-05, - "loss": 5.5159, - "step": 23617 - }, - { - "epoch": 12.317079530638853, - "grad_norm": 1.4274423122406006, - "learning_rate": 7.677386934673367e-05, - "loss": 5.5081, - "step": 23618 - }, - { - "epoch": 12.317601043024771, - "grad_norm": 1.4206336736679077, - "learning_rate": 7.677286432160804e-05, - "loss": 5.4786, - "step": 23619 - }, - { - "epoch": 12.31812255541069, - "grad_norm": 1.567037582397461, - "learning_rate": 7.677185929648241e-05, - "loss": 5.1983, - "step": 23620 - }, - { - "epoch": 12.31864406779661, - "grad_norm": 1.6467007398605347, - "learning_rate": 7.677085427135679e-05, - "loss": 5.0612, - "step": 23621 - }, - { - "epoch": 12.31916558018253, - "grad_norm": 1.5112676620483398, - "learning_rate": 7.676984924623117e-05, - "loss": 5.823, - "step": 23622 - }, - { - "epoch": 12.319687092568449, - "grad_norm": 1.4643086194992065, - "learning_rate": 7.676884422110553e-05, - "loss": 4.8157, - "step": 23623 - }, - { - "epoch": 12.320208604954368, - "grad_norm": 1.5048716068267822, - "learning_rate": 7.676783919597991e-05, - "loss": 5.1658, - "step": 23624 - }, - { - "epoch": 12.320730117340286, - "grad_norm": 1.4298841953277588, - "learning_rate": 7.676683417085428e-05, - "loss": 5.209, - "step": 23625 - }, - { - "epoch": 12.321251629726206, - "grad_norm": 1.4957914352416992, - "learning_rate": 7.676582914572865e-05, - "loss": 5.2019, - "step": 23626 - }, - { - "epoch": 12.321773142112125, - "grad_norm": 1.488970160484314, - "learning_rate": 7.676482412060302e-05, - "loss": 5.2745, - "step": 23627 - }, - { - "epoch": 12.322294654498045, - "grad_norm": 1.5350968837738037, - "learning_rate": 7.67638190954774e-05, - "loss": 5.4405, - "step": 23628 - }, - { - "epoch": 12.322816166883964, - "grad_norm": 1.3907203674316406, - "learning_rate": 7.676281407035176e-05, - "loss": 5.0682, - "step": 23629 - }, - { - "epoch": 12.323337679269883, - "grad_norm": 1.3493101596832275, - "learning_rate": 7.676180904522612e-05, - "loss": 5.4242, - "step": 23630 - }, - { - "epoch": 12.323859191655801, - "grad_norm": 1.4014216661453247, - "learning_rate": 7.67608040201005e-05, - "loss": 5.7191, - "step": 23631 - }, - { - "epoch": 12.32438070404172, - "grad_norm": 1.4452831745147705, - "learning_rate": 7.675979899497488e-05, - "loss": 5.4763, - "step": 23632 - }, - { - "epoch": 12.32490221642764, - "grad_norm": 1.471582055091858, - "learning_rate": 7.675879396984926e-05, - "loss": 5.4928, - "step": 23633 - }, - { - "epoch": 12.32542372881356, - "grad_norm": 1.4015012979507446, - "learning_rate": 7.675778894472362e-05, - "loss": 5.1767, - "step": 23634 - }, - { - "epoch": 12.325945241199479, - "grad_norm": 1.3674722909927368, - "learning_rate": 7.6756783919598e-05, - "loss": 5.4317, - "step": 23635 - }, - { - "epoch": 12.326466753585398, - "grad_norm": 1.4732887744903564, - "learning_rate": 7.675577889447236e-05, - "loss": 5.2916, - "step": 23636 - }, - { - "epoch": 12.326988265971316, - "grad_norm": 1.4947097301483154, - "learning_rate": 7.675477386934674e-05, - "loss": 4.9706, - "step": 23637 - }, - { - "epoch": 12.327509778357236, - "grad_norm": 1.4739835262298584, - "learning_rate": 7.67537688442211e-05, - "loss": 5.4793, - "step": 23638 - }, - { - "epoch": 12.328031290743155, - "grad_norm": 1.4755786657333374, - "learning_rate": 7.675276381909548e-05, - "loss": 5.5868, - "step": 23639 - }, - { - "epoch": 12.328552803129075, - "grad_norm": 1.507470965385437, - "learning_rate": 7.675175879396985e-05, - "loss": 5.4429, - "step": 23640 - }, - { - "epoch": 12.329074315514994, - "grad_norm": 1.3887451887130737, - "learning_rate": 7.675075376884423e-05, - "loss": 4.9723, - "step": 23641 - }, - { - "epoch": 12.329595827900913, - "grad_norm": 1.8033865690231323, - "learning_rate": 7.67497487437186e-05, - "loss": 5.5927, - "step": 23642 - }, - { - "epoch": 12.330117340286831, - "grad_norm": 1.5592985153198242, - "learning_rate": 7.674874371859297e-05, - "loss": 4.9597, - "step": 23643 - }, - { - "epoch": 12.33063885267275, - "grad_norm": 1.5028451681137085, - "learning_rate": 7.674773869346735e-05, - "loss": 5.2987, - "step": 23644 - }, - { - "epoch": 12.33116036505867, - "grad_norm": 1.4387253522872925, - "learning_rate": 7.674673366834171e-05, - "loss": 5.4767, - "step": 23645 - }, - { - "epoch": 12.33168187744459, - "grad_norm": 1.5133087635040283, - "learning_rate": 7.674572864321609e-05, - "loss": 5.2067, - "step": 23646 - }, - { - "epoch": 12.332203389830509, - "grad_norm": 1.541138768196106, - "learning_rate": 7.674472361809045e-05, - "loss": 5.2295, - "step": 23647 - }, - { - "epoch": 12.332724902216428, - "grad_norm": 1.4436063766479492, - "learning_rate": 7.674371859296483e-05, - "loss": 5.6369, - "step": 23648 - }, - { - "epoch": 12.333246414602346, - "grad_norm": 1.4134939908981323, - "learning_rate": 7.67427135678392e-05, - "loss": 5.48, - "step": 23649 - }, - { - "epoch": 12.333767926988266, - "grad_norm": 1.429584264755249, - "learning_rate": 7.674170854271357e-05, - "loss": 5.4802, - "step": 23650 - }, - { - "epoch": 12.334289439374185, - "grad_norm": 1.3951953649520874, - "learning_rate": 7.674070351758794e-05, - "loss": 5.6477, - "step": 23651 - }, - { - "epoch": 12.334810951760105, - "grad_norm": 1.3422776460647583, - "learning_rate": 7.673969849246231e-05, - "loss": 5.4358, - "step": 23652 - }, - { - "epoch": 12.335332464146024, - "grad_norm": 1.4771535396575928, - "learning_rate": 7.673869346733669e-05, - "loss": 5.1673, - "step": 23653 - }, - { - "epoch": 12.335853976531943, - "grad_norm": 1.443471074104309, - "learning_rate": 7.673768844221107e-05, - "loss": 5.2717, - "step": 23654 - }, - { - "epoch": 12.336375488917861, - "grad_norm": 1.5715981721878052, - "learning_rate": 7.673668341708543e-05, - "loss": 5.0272, - "step": 23655 - }, - { - "epoch": 12.33689700130378, - "grad_norm": 1.4622397422790527, - "learning_rate": 7.67356783919598e-05, - "loss": 5.051, - "step": 23656 - }, - { - "epoch": 12.3374185136897, - "grad_norm": 1.5086497068405151, - "learning_rate": 7.673467336683418e-05, - "loss": 5.4506, - "step": 23657 - }, - { - "epoch": 12.33794002607562, - "grad_norm": 1.4242887496948242, - "learning_rate": 7.673366834170854e-05, - "loss": 5.4185, - "step": 23658 - }, - { - "epoch": 12.338461538461539, - "grad_norm": 1.4471826553344727, - "learning_rate": 7.673266331658292e-05, - "loss": 5.539, - "step": 23659 - }, - { - "epoch": 12.338983050847457, - "grad_norm": 1.5423641204833984, - "learning_rate": 7.673165829145728e-05, - "loss": 4.9435, - "step": 23660 - }, - { - "epoch": 12.339504563233376, - "grad_norm": 1.3631783723831177, - "learning_rate": 7.673065326633166e-05, - "loss": 5.5042, - "step": 23661 - }, - { - "epoch": 12.340026075619296, - "grad_norm": 1.4827919006347656, - "learning_rate": 7.672964824120604e-05, - "loss": 5.5358, - "step": 23662 - }, - { - "epoch": 12.340547588005215, - "grad_norm": 1.6170425415039062, - "learning_rate": 7.672864321608042e-05, - "loss": 5.2686, - "step": 23663 - }, - { - "epoch": 12.341069100391135, - "grad_norm": 1.4040875434875488, - "learning_rate": 7.672763819095478e-05, - "loss": 5.7891, - "step": 23664 - }, - { - "epoch": 12.341590612777054, - "grad_norm": 1.440994143486023, - "learning_rate": 7.672663316582916e-05, - "loss": 5.5944, - "step": 23665 - }, - { - "epoch": 12.342112125162974, - "grad_norm": 1.3963356018066406, - "learning_rate": 7.672562814070352e-05, - "loss": 5.3525, - "step": 23666 - }, - { - "epoch": 12.342633637548891, - "grad_norm": 1.5071451663970947, - "learning_rate": 7.67246231155779e-05, - "loss": 5.1959, - "step": 23667 - }, - { - "epoch": 12.34315514993481, - "grad_norm": 1.5508922338485718, - "learning_rate": 7.672361809045226e-05, - "loss": 5.3707, - "step": 23668 - }, - { - "epoch": 12.34367666232073, - "grad_norm": 1.5107954740524292, - "learning_rate": 7.672261306532663e-05, - "loss": 5.6344, - "step": 23669 - }, - { - "epoch": 12.34419817470665, - "grad_norm": 1.493680715560913, - "learning_rate": 7.6721608040201e-05, - "loss": 5.4422, - "step": 23670 - }, - { - "epoch": 12.344719687092569, - "grad_norm": 1.4298105239868164, - "learning_rate": 7.672060301507537e-05, - "loss": 5.6975, - "step": 23671 - }, - { - "epoch": 12.345241199478487, - "grad_norm": 1.5458966493606567, - "learning_rate": 7.671959798994975e-05, - "loss": 5.4024, - "step": 23672 - }, - { - "epoch": 12.345762711864406, - "grad_norm": 1.5120104551315308, - "learning_rate": 7.671859296482413e-05, - "loss": 5.4573, - "step": 23673 - }, - { - "epoch": 12.346284224250326, - "grad_norm": 1.5521783828735352, - "learning_rate": 7.67175879396985e-05, - "loss": 4.5637, - "step": 23674 - }, - { - "epoch": 12.346805736636245, - "grad_norm": 1.5491816997528076, - "learning_rate": 7.671658291457287e-05, - "loss": 5.3356, - "step": 23675 - }, - { - "epoch": 12.347327249022165, - "grad_norm": 1.4213879108428955, - "learning_rate": 7.671557788944725e-05, - "loss": 5.5824, - "step": 23676 - }, - { - "epoch": 12.347848761408084, - "grad_norm": 1.4127589464187622, - "learning_rate": 7.671457286432161e-05, - "loss": 5.2708, - "step": 23677 - }, - { - "epoch": 12.348370273794002, - "grad_norm": 1.4159135818481445, - "learning_rate": 7.671356783919599e-05, - "loss": 5.7077, - "step": 23678 - }, - { - "epoch": 12.348891786179921, - "grad_norm": 1.5135833024978638, - "learning_rate": 7.671256281407035e-05, - "loss": 5.0548, - "step": 23679 - }, - { - "epoch": 12.34941329856584, - "grad_norm": 1.4795161485671997, - "learning_rate": 7.671155778894473e-05, - "loss": 5.6348, - "step": 23680 - }, - { - "epoch": 12.34993481095176, - "grad_norm": 1.636663556098938, - "learning_rate": 7.67105527638191e-05, - "loss": 5.1344, - "step": 23681 - }, - { - "epoch": 12.35045632333768, - "grad_norm": 1.4778704643249512, - "learning_rate": 7.670954773869347e-05, - "loss": 5.2474, - "step": 23682 - }, - { - "epoch": 12.350977835723599, - "grad_norm": 1.5429329872131348, - "learning_rate": 7.670854271356785e-05, - "loss": 5.1915, - "step": 23683 - }, - { - "epoch": 12.351499348109517, - "grad_norm": 1.8718957901000977, - "learning_rate": 7.670753768844221e-05, - "loss": 4.996, - "step": 23684 - }, - { - "epoch": 12.352020860495436, - "grad_norm": 1.5296001434326172, - "learning_rate": 7.670653266331659e-05, - "loss": 5.4835, - "step": 23685 - }, - { - "epoch": 12.352542372881356, - "grad_norm": 1.4372080564498901, - "learning_rate": 7.670552763819096e-05, - "loss": 5.3605, - "step": 23686 - }, - { - "epoch": 12.353063885267275, - "grad_norm": 1.4230800867080688, - "learning_rate": 7.670452261306533e-05, - "loss": 5.2265, - "step": 23687 - }, - { - "epoch": 12.353585397653195, - "grad_norm": 1.3419502973556519, - "learning_rate": 7.67035175879397e-05, - "loss": 5.8891, - "step": 23688 - }, - { - "epoch": 12.354106910039114, - "grad_norm": 1.440359354019165, - "learning_rate": 7.670251256281408e-05, - "loss": 5.0338, - "step": 23689 - }, - { - "epoch": 12.354628422425032, - "grad_norm": 1.41267728805542, - "learning_rate": 7.670150753768844e-05, - "loss": 5.6896, - "step": 23690 - }, - { - "epoch": 12.355149934810951, - "grad_norm": 1.4996075630187988, - "learning_rate": 7.670050251256282e-05, - "loss": 5.4835, - "step": 23691 - }, - { - "epoch": 12.35567144719687, - "grad_norm": 1.4483411312103271, - "learning_rate": 7.669949748743718e-05, - "loss": 5.3165, - "step": 23692 - }, - { - "epoch": 12.35619295958279, - "grad_norm": 1.4509235620498657, - "learning_rate": 7.669849246231156e-05, - "loss": 5.4318, - "step": 23693 - }, - { - "epoch": 12.35671447196871, - "grad_norm": 1.4060497283935547, - "learning_rate": 7.669748743718594e-05, - "loss": 5.6409, - "step": 23694 - }, - { - "epoch": 12.357235984354629, - "grad_norm": 1.3695839643478394, - "learning_rate": 7.66964824120603e-05, - "loss": 5.2531, - "step": 23695 - }, - { - "epoch": 12.357757496740547, - "grad_norm": 1.612196683883667, - "learning_rate": 7.669547738693468e-05, - "loss": 5.4142, - "step": 23696 - }, - { - "epoch": 12.358279009126466, - "grad_norm": 1.3526718616485596, - "learning_rate": 7.669447236180905e-05, - "loss": 5.8948, - "step": 23697 - }, - { - "epoch": 12.358800521512386, - "grad_norm": 1.5156418085098267, - "learning_rate": 7.669346733668342e-05, - "loss": 4.688, - "step": 23698 - }, - { - "epoch": 12.359322033898305, - "grad_norm": 1.5916551351547241, - "learning_rate": 7.669246231155779e-05, - "loss": 5.1795, - "step": 23699 - }, - { - "epoch": 12.359843546284225, - "grad_norm": 1.5424823760986328, - "learning_rate": 7.669145728643216e-05, - "loss": 5.3087, - "step": 23700 - }, - { - "epoch": 12.360365058670144, - "grad_norm": 1.5496686697006226, - "learning_rate": 7.669045226130653e-05, - "loss": 5.2375, - "step": 23701 - }, - { - "epoch": 12.360886571056062, - "grad_norm": 1.3954726457595825, - "learning_rate": 7.668944723618091e-05, - "loss": 5.5602, - "step": 23702 - }, - { - "epoch": 12.361408083441981, - "grad_norm": 1.55675208568573, - "learning_rate": 7.668844221105527e-05, - "loss": 5.4108, - "step": 23703 - }, - { - "epoch": 12.3619295958279, - "grad_norm": 1.487550139427185, - "learning_rate": 7.668743718592965e-05, - "loss": 5.3019, - "step": 23704 - }, - { - "epoch": 12.36245110821382, - "grad_norm": 1.4501068592071533, - "learning_rate": 7.668643216080403e-05, - "loss": 5.2373, - "step": 23705 - }, - { - "epoch": 12.36297262059974, - "grad_norm": 1.4231985807418823, - "learning_rate": 7.66854271356784e-05, - "loss": 5.1564, - "step": 23706 - }, - { - "epoch": 12.36349413298566, - "grad_norm": 1.4249277114868164, - "learning_rate": 7.668442211055277e-05, - "loss": 5.5049, - "step": 23707 - }, - { - "epoch": 12.364015645371577, - "grad_norm": 1.4019403457641602, - "learning_rate": 7.668341708542713e-05, - "loss": 5.5468, - "step": 23708 - }, - { - "epoch": 12.364537157757496, - "grad_norm": 1.3663320541381836, - "learning_rate": 7.668241206030151e-05, - "loss": 5.8751, - "step": 23709 - }, - { - "epoch": 12.365058670143416, - "grad_norm": 1.5527546405792236, - "learning_rate": 7.668140703517588e-05, - "loss": 4.8432, - "step": 23710 - }, - { - "epoch": 12.365580182529335, - "grad_norm": 1.4574214220046997, - "learning_rate": 7.668040201005025e-05, - "loss": 5.351, - "step": 23711 - }, - { - "epoch": 12.366101694915255, - "grad_norm": 1.4102742671966553, - "learning_rate": 7.667939698492462e-05, - "loss": 5.2538, - "step": 23712 - }, - { - "epoch": 12.366623207301174, - "grad_norm": 1.433205008506775, - "learning_rate": 7.6678391959799e-05, - "loss": 5.4038, - "step": 23713 - }, - { - "epoch": 12.367144719687092, - "grad_norm": 1.4234426021575928, - "learning_rate": 7.667738693467337e-05, - "loss": 5.502, - "step": 23714 - }, - { - "epoch": 12.367666232073011, - "grad_norm": 1.5028311014175415, - "learning_rate": 7.667638190954775e-05, - "loss": 5.6442, - "step": 23715 - }, - { - "epoch": 12.36818774445893, - "grad_norm": 1.4873422384262085, - "learning_rate": 7.667537688442212e-05, - "loss": 5.2097, - "step": 23716 - }, - { - "epoch": 12.36870925684485, - "grad_norm": 1.392982840538025, - "learning_rate": 7.667437185929649e-05, - "loss": 5.6197, - "step": 23717 - }, - { - "epoch": 12.36923076923077, - "grad_norm": 1.4624534845352173, - "learning_rate": 7.667336683417086e-05, - "loss": 5.296, - "step": 23718 - }, - { - "epoch": 12.36975228161669, - "grad_norm": 1.7282353639602661, - "learning_rate": 7.667236180904524e-05, - "loss": 5.0758, - "step": 23719 - }, - { - "epoch": 12.370273794002607, - "grad_norm": 1.5903007984161377, - "learning_rate": 7.66713567839196e-05, - "loss": 5.0268, - "step": 23720 - }, - { - "epoch": 12.370795306388526, - "grad_norm": 1.443490982055664, - "learning_rate": 7.667035175879398e-05, - "loss": 4.8423, - "step": 23721 - }, - { - "epoch": 12.371316818774446, - "grad_norm": 1.386910080909729, - "learning_rate": 7.666934673366834e-05, - "loss": 5.343, - "step": 23722 - }, - { - "epoch": 12.371838331160365, - "grad_norm": 1.414070963859558, - "learning_rate": 7.66683417085427e-05, - "loss": 5.8942, - "step": 23723 - }, - { - "epoch": 12.372359843546285, - "grad_norm": 1.5113693475723267, - "learning_rate": 7.666733668341708e-05, - "loss": 5.272, - "step": 23724 - }, - { - "epoch": 12.372881355932204, - "grad_norm": 1.4365779161453247, - "learning_rate": 7.666633165829146e-05, - "loss": 5.3439, - "step": 23725 - }, - { - "epoch": 12.373402868318122, - "grad_norm": 1.501918077468872, - "learning_rate": 7.666532663316584e-05, - "loss": 5.4954, - "step": 23726 - }, - { - "epoch": 12.373924380704041, - "grad_norm": 1.4559036493301392, - "learning_rate": 7.66643216080402e-05, - "loss": 5.1647, - "step": 23727 - }, - { - "epoch": 12.37444589308996, - "grad_norm": 1.4163917303085327, - "learning_rate": 7.666331658291458e-05, - "loss": 5.5564, - "step": 23728 - }, - { - "epoch": 12.37496740547588, - "grad_norm": 1.4073452949523926, - "learning_rate": 7.666231155778895e-05, - "loss": 5.5797, - "step": 23729 - }, - { - "epoch": 12.3754889178618, - "grad_norm": 1.3822050094604492, - "learning_rate": 7.666130653266332e-05, - "loss": 5.5231, - "step": 23730 - }, - { - "epoch": 12.37601043024772, - "grad_norm": 1.4354628324508667, - "learning_rate": 7.666030150753769e-05, - "loss": 5.4379, - "step": 23731 - }, - { - "epoch": 12.376531942633637, - "grad_norm": 1.4167670011520386, - "learning_rate": 7.665929648241207e-05, - "loss": 5.8126, - "step": 23732 - }, - { - "epoch": 12.377053455019556, - "grad_norm": 1.6715668439865112, - "learning_rate": 7.665829145728643e-05, - "loss": 4.9626, - "step": 23733 - }, - { - "epoch": 12.377574967405476, - "grad_norm": 1.4501445293426514, - "learning_rate": 7.665728643216081e-05, - "loss": 5.2342, - "step": 23734 - }, - { - "epoch": 12.378096479791395, - "grad_norm": 1.4735110998153687, - "learning_rate": 7.665628140703519e-05, - "loss": 5.4406, - "step": 23735 - }, - { - "epoch": 12.378617992177315, - "grad_norm": 1.6361209154129028, - "learning_rate": 7.665527638190955e-05, - "loss": 5.3364, - "step": 23736 - }, - { - "epoch": 12.379139504563234, - "grad_norm": 1.4286049604415894, - "learning_rate": 7.665427135678393e-05, - "loss": 5.5233, - "step": 23737 - }, - { - "epoch": 12.379661016949152, - "grad_norm": 1.528102993965149, - "learning_rate": 7.665326633165829e-05, - "loss": 5.3237, - "step": 23738 - }, - { - "epoch": 12.380182529335071, - "grad_norm": 1.5193580389022827, - "learning_rate": 7.665226130653267e-05, - "loss": 5.245, - "step": 23739 - }, - { - "epoch": 12.38070404172099, - "grad_norm": 1.5164334774017334, - "learning_rate": 7.665125628140703e-05, - "loss": 5.2387, - "step": 23740 - }, - { - "epoch": 12.38122555410691, - "grad_norm": 1.63292396068573, - "learning_rate": 7.665025125628141e-05, - "loss": 5.1478, - "step": 23741 - }, - { - "epoch": 12.38174706649283, - "grad_norm": 1.5880143642425537, - "learning_rate": 7.664924623115578e-05, - "loss": 4.6874, - "step": 23742 - }, - { - "epoch": 12.38226857887875, - "grad_norm": 1.5438241958618164, - "learning_rate": 7.664824120603015e-05, - "loss": 4.9601, - "step": 23743 - }, - { - "epoch": 12.382790091264667, - "grad_norm": 1.499232292175293, - "learning_rate": 7.664723618090452e-05, - "loss": 5.3103, - "step": 23744 - }, - { - "epoch": 12.383311603650586, - "grad_norm": 1.3890080451965332, - "learning_rate": 7.66462311557789e-05, - "loss": 5.4212, - "step": 23745 - }, - { - "epoch": 12.383833116036506, - "grad_norm": 1.3916972875595093, - "learning_rate": 7.664522613065327e-05, - "loss": 5.5923, - "step": 23746 - }, - { - "epoch": 12.384354628422425, - "grad_norm": 1.4140353202819824, - "learning_rate": 7.664422110552765e-05, - "loss": 5.6644, - "step": 23747 - }, - { - "epoch": 12.384876140808345, - "grad_norm": 1.4810951948165894, - "learning_rate": 7.664321608040202e-05, - "loss": 5.3137, - "step": 23748 - }, - { - "epoch": 12.385397653194264, - "grad_norm": 1.4368773698806763, - "learning_rate": 7.664221105527638e-05, - "loss": 5.3774, - "step": 23749 - }, - { - "epoch": 12.385919165580182, - "grad_norm": 1.454219102859497, - "learning_rate": 7.664120603015076e-05, - "loss": 5.8086, - "step": 23750 - }, - { - "epoch": 12.386440677966101, - "grad_norm": 1.3830466270446777, - "learning_rate": 7.664020100502512e-05, - "loss": 5.7732, - "step": 23751 - }, - { - "epoch": 12.38696219035202, - "grad_norm": 1.5816638469696045, - "learning_rate": 7.66391959798995e-05, - "loss": 5.0469, - "step": 23752 - }, - { - "epoch": 12.38748370273794, - "grad_norm": 1.6049405336380005, - "learning_rate": 7.663819095477386e-05, - "loss": 5.4273, - "step": 23753 - }, - { - "epoch": 12.38800521512386, - "grad_norm": 1.4875094890594482, - "learning_rate": 7.663718592964824e-05, - "loss": 5.3415, - "step": 23754 - }, - { - "epoch": 12.388526727509777, - "grad_norm": 1.4727206230163574, - "learning_rate": 7.663618090452262e-05, - "loss": 5.5229, - "step": 23755 - }, - { - "epoch": 12.389048239895697, - "grad_norm": 1.560967206954956, - "learning_rate": 7.6635175879397e-05, - "loss": 5.0815, - "step": 23756 - }, - { - "epoch": 12.389569752281616, - "grad_norm": 1.5528901815414429, - "learning_rate": 7.663417085427136e-05, - "loss": 5.3369, - "step": 23757 - }, - { - "epoch": 12.390091264667536, - "grad_norm": 1.3624002933502197, - "learning_rate": 7.663316582914574e-05, - "loss": 5.7886, - "step": 23758 - }, - { - "epoch": 12.390612777053455, - "grad_norm": 1.4848289489746094, - "learning_rate": 7.66321608040201e-05, - "loss": 5.5882, - "step": 23759 - }, - { - "epoch": 12.391134289439375, - "grad_norm": 1.476951241493225, - "learning_rate": 7.663115577889448e-05, - "loss": 5.5261, - "step": 23760 - }, - { - "epoch": 12.391655801825294, - "grad_norm": 1.394593358039856, - "learning_rate": 7.663015075376885e-05, - "loss": 5.8159, - "step": 23761 - }, - { - "epoch": 12.392177314211212, - "grad_norm": 2.0089752674102783, - "learning_rate": 7.662914572864321e-05, - "loss": 4.8575, - "step": 23762 - }, - { - "epoch": 12.392698826597131, - "grad_norm": 1.4256051778793335, - "learning_rate": 7.662814070351759e-05, - "loss": 5.4253, - "step": 23763 - }, - { - "epoch": 12.39322033898305, - "grad_norm": 1.578748345375061, - "learning_rate": 7.662713567839195e-05, - "loss": 5.4285, - "step": 23764 - }, - { - "epoch": 12.39374185136897, - "grad_norm": 1.482709527015686, - "learning_rate": 7.662613065326633e-05, - "loss": 5.4518, - "step": 23765 - }, - { - "epoch": 12.39426336375489, - "grad_norm": 1.5098317861557007, - "learning_rate": 7.662512562814071e-05, - "loss": 5.2705, - "step": 23766 - }, - { - "epoch": 12.394784876140807, - "grad_norm": 1.3870972394943237, - "learning_rate": 7.662412060301509e-05, - "loss": 5.6458, - "step": 23767 - }, - { - "epoch": 12.395306388526727, - "grad_norm": 1.3240457773208618, - "learning_rate": 7.662311557788945e-05, - "loss": 4.8305, - "step": 23768 - }, - { - "epoch": 12.395827900912646, - "grad_norm": 1.3355458974838257, - "learning_rate": 7.662211055276383e-05, - "loss": 5.736, - "step": 23769 - }, - { - "epoch": 12.396349413298566, - "grad_norm": 1.4091652631759644, - "learning_rate": 7.662110552763819e-05, - "loss": 5.5905, - "step": 23770 - }, - { - "epoch": 12.396870925684485, - "grad_norm": 1.6322332620620728, - "learning_rate": 7.662010050251257e-05, - "loss": 4.2978, - "step": 23771 - }, - { - "epoch": 12.397392438070405, - "grad_norm": 1.5264809131622314, - "learning_rate": 7.661909547738693e-05, - "loss": 5.3908, - "step": 23772 - }, - { - "epoch": 12.397913950456322, - "grad_norm": 1.480878472328186, - "learning_rate": 7.661809045226131e-05, - "loss": 5.1181, - "step": 23773 - }, - { - "epoch": 12.398435462842242, - "grad_norm": 1.5581618547439575, - "learning_rate": 7.661708542713568e-05, - "loss": 5.2572, - "step": 23774 - }, - { - "epoch": 12.398956975228161, - "grad_norm": 1.4146952629089355, - "learning_rate": 7.661608040201005e-05, - "loss": 5.1697, - "step": 23775 - }, - { - "epoch": 12.399478487614081, - "grad_norm": 1.4930747747421265, - "learning_rate": 7.661507537688443e-05, - "loss": 5.2314, - "step": 23776 - }, - { - "epoch": 12.4, - "grad_norm": 1.3794203996658325, - "learning_rate": 7.66140703517588e-05, - "loss": 5.5589, - "step": 23777 - }, - { - "epoch": 12.40052151238592, - "grad_norm": 1.4893410205841064, - "learning_rate": 7.661306532663317e-05, - "loss": 5.3771, - "step": 23778 - }, - { - "epoch": 12.401043024771838, - "grad_norm": 1.4471783638000488, - "learning_rate": 7.661206030150754e-05, - "loss": 5.5363, - "step": 23779 - }, - { - "epoch": 12.401564537157757, - "grad_norm": 1.506136178970337, - "learning_rate": 7.661105527638192e-05, - "loss": 4.3449, - "step": 23780 - }, - { - "epoch": 12.402086049543676, - "grad_norm": 1.3989169597625732, - "learning_rate": 7.661005025125628e-05, - "loss": 5.6992, - "step": 23781 - }, - { - "epoch": 12.402607561929596, - "grad_norm": 1.443914532661438, - "learning_rate": 7.660904522613066e-05, - "loss": 5.515, - "step": 23782 - }, - { - "epoch": 12.403129074315515, - "grad_norm": 1.4601478576660156, - "learning_rate": 7.660804020100502e-05, - "loss": 5.7207, - "step": 23783 - }, - { - "epoch": 12.403650586701435, - "grad_norm": 1.5320745706558228, - "learning_rate": 7.66070351758794e-05, - "loss": 5.2528, - "step": 23784 - }, - { - "epoch": 12.404172099087353, - "grad_norm": 1.4792823791503906, - "learning_rate": 7.660603015075377e-05, - "loss": 5.6863, - "step": 23785 - }, - { - "epoch": 12.404693611473272, - "grad_norm": 1.4663664102554321, - "learning_rate": 7.660502512562814e-05, - "loss": 5.3809, - "step": 23786 - }, - { - "epoch": 12.405215123859191, - "grad_norm": 1.407945156097412, - "learning_rate": 7.660402010050252e-05, - "loss": 5.7999, - "step": 23787 - }, - { - "epoch": 12.405736636245111, - "grad_norm": 1.4283978939056396, - "learning_rate": 7.660301507537689e-05, - "loss": 5.4627, - "step": 23788 - }, - { - "epoch": 12.40625814863103, - "grad_norm": 1.446805477142334, - "learning_rate": 7.660201005025126e-05, - "loss": 5.6867, - "step": 23789 - }, - { - "epoch": 12.40677966101695, - "grad_norm": 1.4864200353622437, - "learning_rate": 7.660100502512563e-05, - "loss": 5.608, - "step": 23790 - }, - { - "epoch": 12.407301173402868, - "grad_norm": 1.4039350748062134, - "learning_rate": 7.66e-05, - "loss": 5.7236, - "step": 23791 - }, - { - "epoch": 12.407822685788787, - "grad_norm": 1.4183052778244019, - "learning_rate": 7.659899497487437e-05, - "loss": 5.4293, - "step": 23792 - }, - { - "epoch": 12.408344198174706, - "grad_norm": 1.467413306236267, - "learning_rate": 7.659798994974875e-05, - "loss": 5.7466, - "step": 23793 - }, - { - "epoch": 12.408865710560626, - "grad_norm": 1.4488184452056885, - "learning_rate": 7.659698492462311e-05, - "loss": 5.4932, - "step": 23794 - }, - { - "epoch": 12.409387222946545, - "grad_norm": 1.4276982545852661, - "learning_rate": 7.659597989949749e-05, - "loss": 5.8916, - "step": 23795 - }, - { - "epoch": 12.409908735332465, - "grad_norm": 1.397135615348816, - "learning_rate": 7.659497487437187e-05, - "loss": 5.1112, - "step": 23796 - }, - { - "epoch": 12.410430247718383, - "grad_norm": 1.512736439704895, - "learning_rate": 7.659396984924625e-05, - "loss": 5.1141, - "step": 23797 - }, - { - "epoch": 12.410951760104302, - "grad_norm": 1.5082546472549438, - "learning_rate": 7.659296482412061e-05, - "loss": 5.5431, - "step": 23798 - }, - { - "epoch": 12.411473272490221, - "grad_norm": 1.381402850151062, - "learning_rate": 7.659195979899499e-05, - "loss": 5.4434, - "step": 23799 - }, - { - "epoch": 12.411994784876141, - "grad_norm": 1.4765366315841675, - "learning_rate": 7.659095477386935e-05, - "loss": 4.9069, - "step": 23800 - }, - { - "epoch": 12.41251629726206, - "grad_norm": 1.4581681489944458, - "learning_rate": 7.658994974874372e-05, - "loss": 5.2014, - "step": 23801 - }, - { - "epoch": 12.41303780964798, - "grad_norm": 1.4212056398391724, - "learning_rate": 7.65889447236181e-05, - "loss": 5.5983, - "step": 23802 - }, - { - "epoch": 12.413559322033898, - "grad_norm": 1.5099722146987915, - "learning_rate": 7.658793969849246e-05, - "loss": 4.5845, - "step": 23803 - }, - { - "epoch": 12.414080834419817, - "grad_norm": 1.4099961519241333, - "learning_rate": 7.658693467336684e-05, - "loss": 5.5637, - "step": 23804 - }, - { - "epoch": 12.414602346805736, - "grad_norm": 1.442097544670105, - "learning_rate": 7.65859296482412e-05, - "loss": 4.9078, - "step": 23805 - }, - { - "epoch": 12.415123859191656, - "grad_norm": 1.4677298069000244, - "learning_rate": 7.658492462311558e-05, - "loss": 5.8991, - "step": 23806 - }, - { - "epoch": 12.415645371577575, - "grad_norm": 1.5004733800888062, - "learning_rate": 7.658391959798996e-05, - "loss": 5.3621, - "step": 23807 - }, - { - "epoch": 12.416166883963495, - "grad_norm": 1.522870421409607, - "learning_rate": 7.658291457286433e-05, - "loss": 5.3912, - "step": 23808 - }, - { - "epoch": 12.416688396349413, - "grad_norm": 1.4441994428634644, - "learning_rate": 7.65819095477387e-05, - "loss": 5.321, - "step": 23809 - }, - { - "epoch": 12.417209908735332, - "grad_norm": 1.4667942523956299, - "learning_rate": 7.658090452261308e-05, - "loss": 5.3715, - "step": 23810 - }, - { - "epoch": 12.417731421121252, - "grad_norm": 1.401685357093811, - "learning_rate": 7.657989949748744e-05, - "loss": 5.5363, - "step": 23811 - }, - { - "epoch": 12.418252933507171, - "grad_norm": 1.710755467414856, - "learning_rate": 7.657889447236182e-05, - "loss": 5.3003, - "step": 23812 - }, - { - "epoch": 12.41877444589309, - "grad_norm": 1.4062641859054565, - "learning_rate": 7.657788944723618e-05, - "loss": 5.4908, - "step": 23813 - }, - { - "epoch": 12.41929595827901, - "grad_norm": 1.4042850732803345, - "learning_rate": 7.657688442211056e-05, - "loss": 5.3088, - "step": 23814 - }, - { - "epoch": 12.419817470664928, - "grad_norm": 1.543470025062561, - "learning_rate": 7.657587939698492e-05, - "loss": 5.1533, - "step": 23815 - }, - { - "epoch": 12.420338983050847, - "grad_norm": 1.6130540370941162, - "learning_rate": 7.65748743718593e-05, - "loss": 5.6161, - "step": 23816 - }, - { - "epoch": 12.420860495436767, - "grad_norm": 1.4108381271362305, - "learning_rate": 7.657386934673368e-05, - "loss": 6.0046, - "step": 23817 - }, - { - "epoch": 12.421382007822686, - "grad_norm": 1.4215340614318848, - "learning_rate": 7.657286432160804e-05, - "loss": 5.2988, - "step": 23818 - }, - { - "epoch": 12.421903520208605, - "grad_norm": 1.5006026029586792, - "learning_rate": 7.657185929648242e-05, - "loss": 5.2794, - "step": 23819 - }, - { - "epoch": 12.422425032594525, - "grad_norm": 1.3946694135665894, - "learning_rate": 7.657085427135679e-05, - "loss": 5.4232, - "step": 23820 - }, - { - "epoch": 12.422946544980443, - "grad_norm": 1.3157027959823608, - "learning_rate": 7.656984924623116e-05, - "loss": 5.243, - "step": 23821 - }, - { - "epoch": 12.423468057366362, - "grad_norm": 1.5194106101989746, - "learning_rate": 7.656884422110553e-05, - "loss": 5.2074, - "step": 23822 - }, - { - "epoch": 12.423989569752282, - "grad_norm": 1.4282373189926147, - "learning_rate": 7.65678391959799e-05, - "loss": 5.3838, - "step": 23823 - }, - { - "epoch": 12.424511082138201, - "grad_norm": 1.4137600660324097, - "learning_rate": 7.656683417085427e-05, - "loss": 5.1618, - "step": 23824 - }, - { - "epoch": 12.42503259452412, - "grad_norm": 1.4104396104812622, - "learning_rate": 7.656582914572865e-05, - "loss": 5.6612, - "step": 23825 - }, - { - "epoch": 12.42555410691004, - "grad_norm": 1.5428458452224731, - "learning_rate": 7.656482412060301e-05, - "loss": 5.2756, - "step": 23826 - }, - { - "epoch": 12.426075619295958, - "grad_norm": 1.4886205196380615, - "learning_rate": 7.656381909547739e-05, - "loss": 5.2185, - "step": 23827 - }, - { - "epoch": 12.426597131681877, - "grad_norm": 1.5071150064468384, - "learning_rate": 7.656281407035177e-05, - "loss": 5.5885, - "step": 23828 - }, - { - "epoch": 12.427118644067797, - "grad_norm": 1.444319725036621, - "learning_rate": 7.656180904522613e-05, - "loss": 5.2856, - "step": 23829 - }, - { - "epoch": 12.427640156453716, - "grad_norm": 1.4940437078475952, - "learning_rate": 7.656080402010051e-05, - "loss": 5.2211, - "step": 23830 - }, - { - "epoch": 12.428161668839635, - "grad_norm": 1.3786982297897339, - "learning_rate": 7.655979899497487e-05, - "loss": 5.4861, - "step": 23831 - }, - { - "epoch": 12.428683181225555, - "grad_norm": 1.4851627349853516, - "learning_rate": 7.655879396984925e-05, - "loss": 4.2424, - "step": 23832 - }, - { - "epoch": 12.429204693611473, - "grad_norm": 1.4165419340133667, - "learning_rate": 7.655778894472362e-05, - "loss": 5.5186, - "step": 23833 - }, - { - "epoch": 12.429726205997392, - "grad_norm": 1.4130934476852417, - "learning_rate": 7.6556783919598e-05, - "loss": 5.7447, - "step": 23834 - }, - { - "epoch": 12.430247718383312, - "grad_norm": 1.4840353727340698, - "learning_rate": 7.655577889447236e-05, - "loss": 4.8314, - "step": 23835 - }, - { - "epoch": 12.430769230769231, - "grad_norm": 1.3252582550048828, - "learning_rate": 7.655477386934674e-05, - "loss": 5.8509, - "step": 23836 - }, - { - "epoch": 12.43129074315515, - "grad_norm": 1.3644167184829712, - "learning_rate": 7.655376884422111e-05, - "loss": 5.6896, - "step": 23837 - }, - { - "epoch": 12.43181225554107, - "grad_norm": 1.4254096746444702, - "learning_rate": 7.655276381909549e-05, - "loss": 5.5191, - "step": 23838 - }, - { - "epoch": 12.432333767926988, - "grad_norm": 1.4065399169921875, - "learning_rate": 7.655175879396986e-05, - "loss": 5.3711, - "step": 23839 - }, - { - "epoch": 12.432855280312907, - "grad_norm": 1.3681453466415405, - "learning_rate": 7.655075376884423e-05, - "loss": 4.8664, - "step": 23840 - }, - { - "epoch": 12.433376792698827, - "grad_norm": 1.4717154502868652, - "learning_rate": 7.65497487437186e-05, - "loss": 5.5235, - "step": 23841 - }, - { - "epoch": 12.433898305084746, - "grad_norm": 1.5091842412948608, - "learning_rate": 7.654874371859296e-05, - "loss": 5.4984, - "step": 23842 - }, - { - "epoch": 12.434419817470665, - "grad_norm": 1.4147522449493408, - "learning_rate": 7.654773869346734e-05, - "loss": 5.7321, - "step": 23843 - }, - { - "epoch": 12.434941329856585, - "grad_norm": 1.3422292470932007, - "learning_rate": 7.65467336683417e-05, - "loss": 5.677, - "step": 23844 - }, - { - "epoch": 12.435462842242503, - "grad_norm": 1.4607919454574585, - "learning_rate": 7.654572864321608e-05, - "loss": 5.0811, - "step": 23845 - }, - { - "epoch": 12.435984354628422, - "grad_norm": 1.520227313041687, - "learning_rate": 7.654472361809045e-05, - "loss": 5.1744, - "step": 23846 - }, - { - "epoch": 12.436505867014342, - "grad_norm": 1.5551426410675049, - "learning_rate": 7.654371859296482e-05, - "loss": 5.5663, - "step": 23847 - }, - { - "epoch": 12.437027379400261, - "grad_norm": 1.4993596076965332, - "learning_rate": 7.65427135678392e-05, - "loss": 4.9552, - "step": 23848 - }, - { - "epoch": 12.43754889178618, - "grad_norm": 1.540223240852356, - "learning_rate": 7.654170854271358e-05, - "loss": 5.5629, - "step": 23849 - }, - { - "epoch": 12.438070404172098, - "grad_norm": 1.3231174945831299, - "learning_rate": 7.654070351758794e-05, - "loss": 5.7494, - "step": 23850 - }, - { - "epoch": 12.438591916558018, - "grad_norm": 1.4028513431549072, - "learning_rate": 7.653969849246232e-05, - "loss": 5.6408, - "step": 23851 - }, - { - "epoch": 12.439113428943937, - "grad_norm": 1.3757060766220093, - "learning_rate": 7.653869346733669e-05, - "loss": 4.8704, - "step": 23852 - }, - { - "epoch": 12.439634941329857, - "grad_norm": 1.3621817827224731, - "learning_rate": 7.653768844221106e-05, - "loss": 5.6849, - "step": 23853 - }, - { - "epoch": 12.440156453715776, - "grad_norm": 1.476741075515747, - "learning_rate": 7.653668341708543e-05, - "loss": 5.5682, - "step": 23854 - }, - { - "epoch": 12.440677966101696, - "grad_norm": 1.5105949640274048, - "learning_rate": 7.653567839195979e-05, - "loss": 5.5357, - "step": 23855 - }, - { - "epoch": 12.441199478487615, - "grad_norm": 1.5366222858428955, - "learning_rate": 7.653467336683417e-05, - "loss": 5.0613, - "step": 23856 - }, - { - "epoch": 12.441720990873533, - "grad_norm": 1.5610473155975342, - "learning_rate": 7.653366834170854e-05, - "loss": 5.2283, - "step": 23857 - }, - { - "epoch": 12.442242503259452, - "grad_norm": 1.4782990217208862, - "learning_rate": 7.653266331658291e-05, - "loss": 5.7343, - "step": 23858 - }, - { - "epoch": 12.442764015645372, - "grad_norm": 1.3663442134857178, - "learning_rate": 7.653165829145729e-05, - "loss": 5.3092, - "step": 23859 - }, - { - "epoch": 12.443285528031291, - "grad_norm": 1.5326111316680908, - "learning_rate": 7.653065326633167e-05, - "loss": 4.9813, - "step": 23860 - }, - { - "epoch": 12.44380704041721, - "grad_norm": 1.5193885564804077, - "learning_rate": 7.652964824120603e-05, - "loss": 4.9787, - "step": 23861 - }, - { - "epoch": 12.444328552803128, - "grad_norm": 1.4454288482666016, - "learning_rate": 7.652864321608041e-05, - "loss": 5.7573, - "step": 23862 - }, - { - "epoch": 12.444850065189048, - "grad_norm": 1.3996063470840454, - "learning_rate": 7.652763819095478e-05, - "loss": 5.1412, - "step": 23863 - }, - { - "epoch": 12.445371577574967, - "grad_norm": 1.545922040939331, - "learning_rate": 7.652663316582915e-05, - "loss": 5.609, - "step": 23864 - }, - { - "epoch": 12.445893089960887, - "grad_norm": 1.3752044439315796, - "learning_rate": 7.652562814070352e-05, - "loss": 5.5444, - "step": 23865 - }, - { - "epoch": 12.446414602346806, - "grad_norm": 1.3943368196487427, - "learning_rate": 7.65246231155779e-05, - "loss": 5.384, - "step": 23866 - }, - { - "epoch": 12.446936114732726, - "grad_norm": 1.4490538835525513, - "learning_rate": 7.652361809045226e-05, - "loss": 5.6049, - "step": 23867 - }, - { - "epoch": 12.447457627118643, - "grad_norm": 1.377888798713684, - "learning_rate": 7.652261306532664e-05, - "loss": 5.7445, - "step": 23868 - }, - { - "epoch": 12.447979139504563, - "grad_norm": 1.462333083152771, - "learning_rate": 7.652160804020102e-05, - "loss": 5.1237, - "step": 23869 - }, - { - "epoch": 12.448500651890482, - "grad_norm": 1.4214469194412231, - "learning_rate": 7.652060301507538e-05, - "loss": 5.5969, - "step": 23870 - }, - { - "epoch": 12.449022164276402, - "grad_norm": 1.5922590494155884, - "learning_rate": 7.651959798994976e-05, - "loss": 5.0325, - "step": 23871 - }, - { - "epoch": 12.449543676662321, - "grad_norm": 1.505969524383545, - "learning_rate": 7.651859296482412e-05, - "loss": 5.651, - "step": 23872 - }, - { - "epoch": 12.45006518904824, - "grad_norm": 1.4821701049804688, - "learning_rate": 7.65175879396985e-05, - "loss": 5.5641, - "step": 23873 - }, - { - "epoch": 12.450586701434158, - "grad_norm": 1.410272240638733, - "learning_rate": 7.651658291457286e-05, - "loss": 5.5392, - "step": 23874 - }, - { - "epoch": 12.451108213820078, - "grad_norm": 1.487518072128296, - "learning_rate": 7.651557788944724e-05, - "loss": 5.384, - "step": 23875 - }, - { - "epoch": 12.451629726205997, - "grad_norm": 1.4635494947433472, - "learning_rate": 7.65145728643216e-05, - "loss": 5.3759, - "step": 23876 - }, - { - "epoch": 12.452151238591917, - "grad_norm": 1.4112154245376587, - "learning_rate": 7.651356783919598e-05, - "loss": 5.0038, - "step": 23877 - }, - { - "epoch": 12.452672750977836, - "grad_norm": 1.466858148574829, - "learning_rate": 7.651256281407035e-05, - "loss": 4.9729, - "step": 23878 - }, - { - "epoch": 12.453194263363756, - "grad_norm": 1.4696847200393677, - "learning_rate": 7.651155778894473e-05, - "loss": 4.9088, - "step": 23879 - }, - { - "epoch": 12.453715775749673, - "grad_norm": 1.559050440788269, - "learning_rate": 7.65105527638191e-05, - "loss": 4.7958, - "step": 23880 - }, - { - "epoch": 12.454237288135593, - "grad_norm": 1.5882033109664917, - "learning_rate": 7.650954773869347e-05, - "loss": 5.3624, - "step": 23881 - }, - { - "epoch": 12.454758800521512, - "grad_norm": 1.4567261934280396, - "learning_rate": 7.650854271356785e-05, - "loss": 5.3868, - "step": 23882 - }, - { - "epoch": 12.455280312907432, - "grad_norm": 1.5936743021011353, - "learning_rate": 7.650753768844221e-05, - "loss": 5.2319, - "step": 23883 - }, - { - "epoch": 12.455801825293351, - "grad_norm": 1.4073845148086548, - "learning_rate": 7.650653266331659e-05, - "loss": 5.609, - "step": 23884 - }, - { - "epoch": 12.45632333767927, - "grad_norm": 1.4026669263839722, - "learning_rate": 7.650552763819095e-05, - "loss": 5.5536, - "step": 23885 - }, - { - "epoch": 12.456844850065188, - "grad_norm": 1.5067285299301147, - "learning_rate": 7.650452261306533e-05, - "loss": 5.3227, - "step": 23886 - }, - { - "epoch": 12.457366362451108, - "grad_norm": 1.3606045246124268, - "learning_rate": 7.65035175879397e-05, - "loss": 5.8216, - "step": 23887 - }, - { - "epoch": 12.457887874837027, - "grad_norm": 1.4823493957519531, - "learning_rate": 7.650251256281407e-05, - "loss": 5.4332, - "step": 23888 - }, - { - "epoch": 12.458409387222947, - "grad_norm": 1.473591923713684, - "learning_rate": 7.650150753768845e-05, - "loss": 5.1253, - "step": 23889 - }, - { - "epoch": 12.458930899608866, - "grad_norm": 1.4161796569824219, - "learning_rate": 7.650050251256283e-05, - "loss": 5.5614, - "step": 23890 - }, - { - "epoch": 12.459452411994786, - "grad_norm": 1.4895586967468262, - "learning_rate": 7.649949748743719e-05, - "loss": 5.5382, - "step": 23891 - }, - { - "epoch": 12.459973924380703, - "grad_norm": 1.4927915334701538, - "learning_rate": 7.649849246231157e-05, - "loss": 5.2074, - "step": 23892 - }, - { - "epoch": 12.460495436766623, - "grad_norm": 1.4737502336502075, - "learning_rate": 7.649748743718593e-05, - "loss": 5.8044, - "step": 23893 - }, - { - "epoch": 12.461016949152542, - "grad_norm": 1.4642428159713745, - "learning_rate": 7.64964824120603e-05, - "loss": 5.3438, - "step": 23894 - }, - { - "epoch": 12.461538461538462, - "grad_norm": 1.3166215419769287, - "learning_rate": 7.649547738693468e-05, - "loss": 5.678, - "step": 23895 - }, - { - "epoch": 12.462059973924381, - "grad_norm": 1.4340157508850098, - "learning_rate": 7.649447236180904e-05, - "loss": 5.5556, - "step": 23896 - }, - { - "epoch": 12.4625814863103, - "grad_norm": 1.4314852952957153, - "learning_rate": 7.649346733668342e-05, - "loss": 5.1315, - "step": 23897 - }, - { - "epoch": 12.463102998696218, - "grad_norm": 1.5475008487701416, - "learning_rate": 7.649246231155778e-05, - "loss": 4.9837, - "step": 23898 - }, - { - "epoch": 12.463624511082138, - "grad_norm": 1.4987730979919434, - "learning_rate": 7.649145728643216e-05, - "loss": 5.7163, - "step": 23899 - }, - { - "epoch": 12.464146023468057, - "grad_norm": 1.4902095794677734, - "learning_rate": 7.649045226130654e-05, - "loss": 5.6731, - "step": 23900 - }, - { - "epoch": 12.464667535853977, - "grad_norm": 1.381601333618164, - "learning_rate": 7.648944723618092e-05, - "loss": 5.7253, - "step": 23901 - }, - { - "epoch": 12.465189048239896, - "grad_norm": 1.4353667497634888, - "learning_rate": 7.648844221105528e-05, - "loss": 5.7145, - "step": 23902 - }, - { - "epoch": 12.465710560625816, - "grad_norm": 1.5271464586257935, - "learning_rate": 7.648743718592966e-05, - "loss": 5.2344, - "step": 23903 - }, - { - "epoch": 12.466232073011733, - "grad_norm": 1.5510101318359375, - "learning_rate": 7.648643216080402e-05, - "loss": 5.222, - "step": 23904 - }, - { - "epoch": 12.466753585397653, - "grad_norm": 1.4215399026870728, - "learning_rate": 7.64854271356784e-05, - "loss": 5.731, - "step": 23905 - }, - { - "epoch": 12.467275097783572, - "grad_norm": 1.3483597040176392, - "learning_rate": 7.648442211055276e-05, - "loss": 5.9383, - "step": 23906 - }, - { - "epoch": 12.467796610169492, - "grad_norm": 1.4476491212844849, - "learning_rate": 7.648341708542714e-05, - "loss": 5.5246, - "step": 23907 - }, - { - "epoch": 12.468318122555411, - "grad_norm": 1.5162078142166138, - "learning_rate": 7.64824120603015e-05, - "loss": 5.642, - "step": 23908 - }, - { - "epoch": 12.46883963494133, - "grad_norm": 1.6466444730758667, - "learning_rate": 7.648140703517588e-05, - "loss": 5.2493, - "step": 23909 - }, - { - "epoch": 12.469361147327248, - "grad_norm": 1.4474380016326904, - "learning_rate": 7.648040201005026e-05, - "loss": 5.5958, - "step": 23910 - }, - { - "epoch": 12.469882659713168, - "grad_norm": 1.4507007598876953, - "learning_rate": 7.647939698492463e-05, - "loss": 5.6084, - "step": 23911 - }, - { - "epoch": 12.470404172099087, - "grad_norm": 1.5435689687728882, - "learning_rate": 7.6478391959799e-05, - "loss": 5.146, - "step": 23912 - }, - { - "epoch": 12.470925684485007, - "grad_norm": 1.3129143714904785, - "learning_rate": 7.647738693467337e-05, - "loss": 5.8203, - "step": 23913 - }, - { - "epoch": 12.471447196870926, - "grad_norm": 1.48931086063385, - "learning_rate": 7.647638190954775e-05, - "loss": 5.1595, - "step": 23914 - }, - { - "epoch": 12.471968709256846, - "grad_norm": 1.418338418006897, - "learning_rate": 7.647537688442211e-05, - "loss": 5.3893, - "step": 23915 - }, - { - "epoch": 12.472490221642763, - "grad_norm": 1.4487195014953613, - "learning_rate": 7.647437185929649e-05, - "loss": 5.3915, - "step": 23916 - }, - { - "epoch": 12.473011734028683, - "grad_norm": 1.4759330749511719, - "learning_rate": 7.647336683417085e-05, - "loss": 5.416, - "step": 23917 - }, - { - "epoch": 12.473533246414602, - "grad_norm": 1.4914275407791138, - "learning_rate": 7.647236180904523e-05, - "loss": 5.6065, - "step": 23918 - }, - { - "epoch": 12.474054758800522, - "grad_norm": 1.4382572174072266, - "learning_rate": 7.64713567839196e-05, - "loss": 4.7425, - "step": 23919 - }, - { - "epoch": 12.474576271186441, - "grad_norm": 1.3593164682388306, - "learning_rate": 7.647035175879397e-05, - "loss": 5.7138, - "step": 23920 - }, - { - "epoch": 12.47509778357236, - "grad_norm": 1.5651980638504028, - "learning_rate": 7.646934673366835e-05, - "loss": 5.1222, - "step": 23921 - }, - { - "epoch": 12.475619295958278, - "grad_norm": 1.4208489656448364, - "learning_rate": 7.646834170854271e-05, - "loss": 5.1233, - "step": 23922 - }, - { - "epoch": 12.476140808344198, - "grad_norm": 1.446416974067688, - "learning_rate": 7.646733668341709e-05, - "loss": 5.6009, - "step": 23923 - }, - { - "epoch": 12.476662320730117, - "grad_norm": 1.577379822731018, - "learning_rate": 7.646633165829146e-05, - "loss": 5.351, - "step": 23924 - }, - { - "epoch": 12.477183833116037, - "grad_norm": 1.4392658472061157, - "learning_rate": 7.646532663316583e-05, - "loss": 5.4865, - "step": 23925 - }, - { - "epoch": 12.477705345501956, - "grad_norm": 1.4650200605392456, - "learning_rate": 7.64643216080402e-05, - "loss": 5.0934, - "step": 23926 - }, - { - "epoch": 12.478226857887876, - "grad_norm": 1.5383466482162476, - "learning_rate": 7.646331658291458e-05, - "loss": 5.1927, - "step": 23927 - }, - { - "epoch": 12.478748370273793, - "grad_norm": 1.4945564270019531, - "learning_rate": 7.646231155778894e-05, - "loss": 5.4315, - "step": 23928 - }, - { - "epoch": 12.479269882659713, - "grad_norm": 1.5480318069458008, - "learning_rate": 7.646130653266332e-05, - "loss": 5.3006, - "step": 23929 - }, - { - "epoch": 12.479791395045632, - "grad_norm": 1.5284807682037354, - "learning_rate": 7.64603015075377e-05, - "loss": 5.2525, - "step": 23930 - }, - { - "epoch": 12.480312907431552, - "grad_norm": 1.4392942190170288, - "learning_rate": 7.645929648241207e-05, - "loss": 5.5731, - "step": 23931 - }, - { - "epoch": 12.480834419817471, - "grad_norm": 1.4416391849517822, - "learning_rate": 7.645829145728644e-05, - "loss": 5.5038, - "step": 23932 - }, - { - "epoch": 12.48135593220339, - "grad_norm": 1.5883125066757202, - "learning_rate": 7.645728643216082e-05, - "loss": 5.6548, - "step": 23933 - }, - { - "epoch": 12.481877444589308, - "grad_norm": 1.59064519405365, - "learning_rate": 7.645628140703518e-05, - "loss": 5.7752, - "step": 23934 - }, - { - "epoch": 12.482398956975228, - "grad_norm": 1.4246289730072021, - "learning_rate": 7.645527638190955e-05, - "loss": 5.6915, - "step": 23935 - }, - { - "epoch": 12.482920469361147, - "grad_norm": 1.5602648258209229, - "learning_rate": 7.645427135678392e-05, - "loss": 5.1131, - "step": 23936 - }, - { - "epoch": 12.483441981747067, - "grad_norm": 1.308302402496338, - "learning_rate": 7.645326633165829e-05, - "loss": 5.8375, - "step": 23937 - }, - { - "epoch": 12.483963494132986, - "grad_norm": 1.6943588256835938, - "learning_rate": 7.645226130653267e-05, - "loss": 4.9007, - "step": 23938 - }, - { - "epoch": 12.484485006518906, - "grad_norm": 1.6048747301101685, - "learning_rate": 7.645125628140703e-05, - "loss": 5.5039, - "step": 23939 - }, - { - "epoch": 12.485006518904823, - "grad_norm": 1.5620343685150146, - "learning_rate": 7.645025125628141e-05, - "loss": 5.1718, - "step": 23940 - }, - { - "epoch": 12.485528031290743, - "grad_norm": 1.4623560905456543, - "learning_rate": 7.644924623115578e-05, - "loss": 5.4313, - "step": 23941 - }, - { - "epoch": 12.486049543676662, - "grad_norm": 1.442973017692566, - "learning_rate": 7.644824120603016e-05, - "loss": 5.6549, - "step": 23942 - }, - { - "epoch": 12.486571056062582, - "grad_norm": 1.394755244255066, - "learning_rate": 7.644723618090453e-05, - "loss": 5.3332, - "step": 23943 - }, - { - "epoch": 12.487092568448501, - "grad_norm": 1.4716360569000244, - "learning_rate": 7.64462311557789e-05, - "loss": 5.6972, - "step": 23944 - }, - { - "epoch": 12.487614080834419, - "grad_norm": 1.557801365852356, - "learning_rate": 7.644522613065327e-05, - "loss": 5.0637, - "step": 23945 - }, - { - "epoch": 12.488135593220338, - "grad_norm": 1.5008137226104736, - "learning_rate": 7.644422110552765e-05, - "loss": 5.502, - "step": 23946 - }, - { - "epoch": 12.488657105606258, - "grad_norm": 1.6561548709869385, - "learning_rate": 7.644321608040201e-05, - "loss": 4.9433, - "step": 23947 - }, - { - "epoch": 12.489178617992177, - "grad_norm": 1.4328820705413818, - "learning_rate": 7.644221105527638e-05, - "loss": 5.0531, - "step": 23948 - }, - { - "epoch": 12.489700130378097, - "grad_norm": 1.4527838230133057, - "learning_rate": 7.644120603015075e-05, - "loss": 5.646, - "step": 23949 - }, - { - "epoch": 12.490221642764016, - "grad_norm": 1.5010101795196533, - "learning_rate": 7.644020100502513e-05, - "loss": 5.3127, - "step": 23950 - }, - { - "epoch": 12.490743155149936, - "grad_norm": 1.5378553867340088, - "learning_rate": 7.643919597989951e-05, - "loss": 5.214, - "step": 23951 - }, - { - "epoch": 12.491264667535853, - "grad_norm": 1.436081051826477, - "learning_rate": 7.643819095477387e-05, - "loss": 5.6131, - "step": 23952 - }, - { - "epoch": 12.491786179921773, - "grad_norm": 1.431287169456482, - "learning_rate": 7.643718592964825e-05, - "loss": 5.1902, - "step": 23953 - }, - { - "epoch": 12.492307692307692, - "grad_norm": 1.4422441720962524, - "learning_rate": 7.643618090452262e-05, - "loss": 5.1914, - "step": 23954 - }, - { - "epoch": 12.492829204693612, - "grad_norm": 1.6241364479064941, - "learning_rate": 7.643517587939699e-05, - "loss": 5.0827, - "step": 23955 - }, - { - "epoch": 12.493350717079531, - "grad_norm": 1.4172743558883667, - "learning_rate": 7.643417085427136e-05, - "loss": 5.1411, - "step": 23956 - }, - { - "epoch": 12.493872229465449, - "grad_norm": 1.3096743822097778, - "learning_rate": 7.643316582914574e-05, - "loss": 5.87, - "step": 23957 - }, - { - "epoch": 12.494393741851368, - "grad_norm": 1.3828628063201904, - "learning_rate": 7.64321608040201e-05, - "loss": 5.5224, - "step": 23958 - }, - { - "epoch": 12.494915254237288, - "grad_norm": 1.4948405027389526, - "learning_rate": 7.643115577889448e-05, - "loss": 5.3998, - "step": 23959 - }, - { - "epoch": 12.495436766623207, - "grad_norm": 1.349073052406311, - "learning_rate": 7.643015075376884e-05, - "loss": 5.7904, - "step": 23960 - }, - { - "epoch": 12.495958279009127, - "grad_norm": 1.5361419916152954, - "learning_rate": 7.642914572864322e-05, - "loss": 5.3808, - "step": 23961 - }, - { - "epoch": 12.496479791395046, - "grad_norm": 1.447091817855835, - "learning_rate": 7.64281407035176e-05, - "loss": 4.7208, - "step": 23962 - }, - { - "epoch": 12.497001303780964, - "grad_norm": 1.4541609287261963, - "learning_rate": 7.642713567839196e-05, - "loss": 5.1941, - "step": 23963 - }, - { - "epoch": 12.497522816166883, - "grad_norm": 1.4621177911758423, - "learning_rate": 7.642613065326634e-05, - "loss": 5.4574, - "step": 23964 - }, - { - "epoch": 12.498044328552803, - "grad_norm": 1.5041385889053345, - "learning_rate": 7.64251256281407e-05, - "loss": 5.4247, - "step": 23965 - }, - { - "epoch": 12.498565840938722, - "grad_norm": 1.4481472969055176, - "learning_rate": 7.642412060301508e-05, - "loss": 5.912, - "step": 23966 - }, - { - "epoch": 12.499087353324642, - "grad_norm": 1.5137687921524048, - "learning_rate": 7.642311557788945e-05, - "loss": 5.4625, - "step": 23967 - }, - { - "epoch": 12.499608865710561, - "grad_norm": 1.4013690948486328, - "learning_rate": 7.642211055276382e-05, - "loss": 5.4516, - "step": 23968 - }, - { - "epoch": 12.500130378096479, - "grad_norm": 1.470645785331726, - "learning_rate": 7.642110552763819e-05, - "loss": 5.4676, - "step": 23969 - }, - { - "epoch": 12.500651890482398, - "grad_norm": 1.4191772937774658, - "learning_rate": 7.642010050251257e-05, - "loss": 5.7481, - "step": 23970 - }, - { - "epoch": 12.501173402868318, - "grad_norm": 1.3853963613510132, - "learning_rate": 7.641909547738694e-05, - "loss": 5.5958, - "step": 23971 - }, - { - "epoch": 12.501694915254237, - "grad_norm": 1.5253382921218872, - "learning_rate": 7.641809045226132e-05, - "loss": 5.2485, - "step": 23972 - }, - { - "epoch": 12.502216427640157, - "grad_norm": 1.407069206237793, - "learning_rate": 7.641708542713569e-05, - "loss": 5.8219, - "step": 23973 - }, - { - "epoch": 12.502737940026076, - "grad_norm": 1.477566123008728, - "learning_rate": 7.641608040201005e-05, - "loss": 5.0922, - "step": 23974 - }, - { - "epoch": 12.503259452411994, - "grad_norm": 1.5209016799926758, - "learning_rate": 7.641507537688443e-05, - "loss": 5.5627, - "step": 23975 - }, - { - "epoch": 12.503780964797913, - "grad_norm": 1.3633991479873657, - "learning_rate": 7.641407035175879e-05, - "loss": 5.8327, - "step": 23976 - }, - { - "epoch": 12.504302477183833, - "grad_norm": 1.4334821701049805, - "learning_rate": 7.641306532663317e-05, - "loss": 5.5376, - "step": 23977 - }, - { - "epoch": 12.504823989569752, - "grad_norm": 1.4631969928741455, - "learning_rate": 7.641206030150753e-05, - "loss": 5.2542, - "step": 23978 - }, - { - "epoch": 12.505345501955672, - "grad_norm": 1.6088911294937134, - "learning_rate": 7.641105527638191e-05, - "loss": 5.2371, - "step": 23979 - }, - { - "epoch": 12.505867014341591, - "grad_norm": 1.4348424673080444, - "learning_rate": 7.641005025125628e-05, - "loss": 4.975, - "step": 23980 - }, - { - "epoch": 12.506388526727509, - "grad_norm": 1.61935293674469, - "learning_rate": 7.640904522613065e-05, - "loss": 5.4608, - "step": 23981 - }, - { - "epoch": 12.506910039113428, - "grad_norm": 1.4549404382705688, - "learning_rate": 7.640804020100503e-05, - "loss": 5.3078, - "step": 23982 - }, - { - "epoch": 12.507431551499348, - "grad_norm": 1.5335890054702759, - "learning_rate": 7.640703517587941e-05, - "loss": 5.4256, - "step": 23983 - }, - { - "epoch": 12.507953063885267, - "grad_norm": 1.794306755065918, - "learning_rate": 7.640603015075377e-05, - "loss": 5.4584, - "step": 23984 - }, - { - "epoch": 12.508474576271187, - "grad_norm": 1.5369359254837036, - "learning_rate": 7.640502512562815e-05, - "loss": 5.3379, - "step": 23985 - }, - { - "epoch": 12.508996088657106, - "grad_norm": 1.442427396774292, - "learning_rate": 7.640402010050252e-05, - "loss": 5.6744, - "step": 23986 - }, - { - "epoch": 12.509517601043024, - "grad_norm": 1.3501238822937012, - "learning_rate": 7.64030150753769e-05, - "loss": 5.7066, - "step": 23987 - }, - { - "epoch": 12.510039113428943, - "grad_norm": 1.4165916442871094, - "learning_rate": 7.640201005025126e-05, - "loss": 5.3645, - "step": 23988 - }, - { - "epoch": 12.510560625814863, - "grad_norm": 1.65791654586792, - "learning_rate": 7.640100502512562e-05, - "loss": 5.4648, - "step": 23989 - }, - { - "epoch": 12.511082138200782, - "grad_norm": 1.428136944770813, - "learning_rate": 7.64e-05, - "loss": 5.5408, - "step": 23990 - }, - { - "epoch": 12.511603650586702, - "grad_norm": 1.5523273944854736, - "learning_rate": 7.639899497487438e-05, - "loss": 5.4954, - "step": 23991 - }, - { - "epoch": 12.512125162972621, - "grad_norm": 1.485776662826538, - "learning_rate": 7.639798994974876e-05, - "loss": 5.5272, - "step": 23992 - }, - { - "epoch": 12.512646675358539, - "grad_norm": 1.5057960748672485, - "learning_rate": 7.639698492462312e-05, - "loss": 5.5043, - "step": 23993 - }, - { - "epoch": 12.513168187744458, - "grad_norm": 1.5747747421264648, - "learning_rate": 7.63959798994975e-05, - "loss": 5.3336, - "step": 23994 - }, - { - "epoch": 12.513689700130378, - "grad_norm": 1.5737559795379639, - "learning_rate": 7.639497487437186e-05, - "loss": 5.3503, - "step": 23995 - }, - { - "epoch": 12.514211212516297, - "grad_norm": 1.345757246017456, - "learning_rate": 7.639396984924624e-05, - "loss": 5.3754, - "step": 23996 - }, - { - "epoch": 12.514732724902217, - "grad_norm": 1.3995518684387207, - "learning_rate": 7.63929648241206e-05, - "loss": 5.7906, - "step": 23997 - }, - { - "epoch": 12.515254237288136, - "grad_norm": 1.5265735387802124, - "learning_rate": 7.639195979899498e-05, - "loss": 5.1573, - "step": 23998 - }, - { - "epoch": 12.515775749674054, - "grad_norm": 1.5353530645370483, - "learning_rate": 7.639095477386935e-05, - "loss": 5.4419, - "step": 23999 - }, - { - "epoch": 12.516297262059974, - "grad_norm": 1.4458953142166138, - "learning_rate": 7.638994974874372e-05, - "loss": 5.6868, - "step": 24000 - }, - { - "epoch": 12.516818774445893, - "grad_norm": 1.326342225074768, - "learning_rate": 7.638894472361809e-05, - "loss": 5.685, - "step": 24001 - }, - { - "epoch": 12.517340286831812, - "grad_norm": 1.3961377143859863, - "learning_rate": 7.638793969849247e-05, - "loss": 5.6772, - "step": 24002 - }, - { - "epoch": 12.517861799217732, - "grad_norm": 1.3921561241149902, - "learning_rate": 7.638693467336684e-05, - "loss": 5.2917, - "step": 24003 - }, - { - "epoch": 12.518383311603651, - "grad_norm": 1.5722582340240479, - "learning_rate": 7.638592964824121e-05, - "loss": 4.948, - "step": 24004 - }, - { - "epoch": 12.518904823989569, - "grad_norm": 1.373491644859314, - "learning_rate": 7.638492462311559e-05, - "loss": 5.6027, - "step": 24005 - }, - { - "epoch": 12.519426336375489, - "grad_norm": 1.5441336631774902, - "learning_rate": 7.638391959798995e-05, - "loss": 5.2614, - "step": 24006 - }, - { - "epoch": 12.519947848761408, - "grad_norm": 1.7612119913101196, - "learning_rate": 7.638291457286433e-05, - "loss": 5.4208, - "step": 24007 - }, - { - "epoch": 12.520469361147327, - "grad_norm": 1.5488641262054443, - "learning_rate": 7.638190954773869e-05, - "loss": 5.6309, - "step": 24008 - }, - { - "epoch": 12.520990873533247, - "grad_norm": 1.3743047714233398, - "learning_rate": 7.638090452261307e-05, - "loss": 5.8355, - "step": 24009 - }, - { - "epoch": 12.521512385919166, - "grad_norm": 1.5425219535827637, - "learning_rate": 7.637989949748743e-05, - "loss": 5.2699, - "step": 24010 - }, - { - "epoch": 12.522033898305084, - "grad_norm": 1.4721312522888184, - "learning_rate": 7.637889447236181e-05, - "loss": 5.0939, - "step": 24011 - }, - { - "epoch": 12.522555410691004, - "grad_norm": 1.549061894416809, - "learning_rate": 7.637788944723619e-05, - "loss": 4.9992, - "step": 24012 - }, - { - "epoch": 12.523076923076923, - "grad_norm": 1.5016804933547974, - "learning_rate": 7.637688442211057e-05, - "loss": 5.4633, - "step": 24013 - }, - { - "epoch": 12.523598435462842, - "grad_norm": 1.5122665166854858, - "learning_rate": 7.637587939698493e-05, - "loss": 5.2116, - "step": 24014 - }, - { - "epoch": 12.524119947848762, - "grad_norm": 1.4193669557571411, - "learning_rate": 7.63748743718593e-05, - "loss": 5.2719, - "step": 24015 - }, - { - "epoch": 12.524641460234681, - "grad_norm": 1.5420687198638916, - "learning_rate": 7.637386934673367e-05, - "loss": 4.9329, - "step": 24016 - }, - { - "epoch": 12.525162972620599, - "grad_norm": 1.4554176330566406, - "learning_rate": 7.637286432160804e-05, - "loss": 5.7281, - "step": 24017 - }, - { - "epoch": 12.525684485006519, - "grad_norm": 1.5088129043579102, - "learning_rate": 7.637185929648242e-05, - "loss": 5.3336, - "step": 24018 - }, - { - "epoch": 12.526205997392438, - "grad_norm": 1.4380431175231934, - "learning_rate": 7.637085427135678e-05, - "loss": 5.6608, - "step": 24019 - }, - { - "epoch": 12.526727509778357, - "grad_norm": 1.5412853956222534, - "learning_rate": 7.636984924623116e-05, - "loss": 5.3897, - "step": 24020 - }, - { - "epoch": 12.527249022164277, - "grad_norm": 1.4521303176879883, - "learning_rate": 7.636884422110552e-05, - "loss": 5.2485, - "step": 24021 - }, - { - "epoch": 12.527770534550196, - "grad_norm": 1.5471569299697876, - "learning_rate": 7.63678391959799e-05, - "loss": 5.3092, - "step": 24022 - }, - { - "epoch": 12.528292046936114, - "grad_norm": 1.350314736366272, - "learning_rate": 7.636683417085428e-05, - "loss": 5.0287, - "step": 24023 - }, - { - "epoch": 12.528813559322034, - "grad_norm": 1.4373352527618408, - "learning_rate": 7.636582914572866e-05, - "loss": 5.2534, - "step": 24024 - }, - { - "epoch": 12.529335071707953, - "grad_norm": 1.428499460220337, - "learning_rate": 7.636482412060302e-05, - "loss": 5.3649, - "step": 24025 - }, - { - "epoch": 12.529856584093872, - "grad_norm": 1.3979252576828003, - "learning_rate": 7.63638190954774e-05, - "loss": 5.6563, - "step": 24026 - }, - { - "epoch": 12.530378096479792, - "grad_norm": 1.3768976926803589, - "learning_rate": 7.636281407035176e-05, - "loss": 5.9811, - "step": 24027 - }, - { - "epoch": 12.53089960886571, - "grad_norm": 1.4787874221801758, - "learning_rate": 7.636180904522613e-05, - "loss": 5.4166, - "step": 24028 - }, - { - "epoch": 12.531421121251629, - "grad_norm": 1.3674734830856323, - "learning_rate": 7.63608040201005e-05, - "loss": 5.7595, - "step": 24029 - }, - { - "epoch": 12.531942633637549, - "grad_norm": 1.4548860788345337, - "learning_rate": 7.635979899497487e-05, - "loss": 5.339, - "step": 24030 - }, - { - "epoch": 12.532464146023468, - "grad_norm": 1.5006753206253052, - "learning_rate": 7.635879396984925e-05, - "loss": 5.4244, - "step": 24031 - }, - { - "epoch": 12.532985658409387, - "grad_norm": 1.5715025663375854, - "learning_rate": 7.635778894472361e-05, - "loss": 5.5989, - "step": 24032 - }, - { - "epoch": 12.533507170795307, - "grad_norm": 1.319155216217041, - "learning_rate": 7.635678391959799e-05, - "loss": 4.2722, - "step": 24033 - }, - { - "epoch": 12.534028683181226, - "grad_norm": 1.5723530054092407, - "learning_rate": 7.635577889447237e-05, - "loss": 5.4504, - "step": 24034 - }, - { - "epoch": 12.534550195567144, - "grad_norm": 1.464119553565979, - "learning_rate": 7.635477386934675e-05, - "loss": 5.7512, - "step": 24035 - }, - { - "epoch": 12.535071707953064, - "grad_norm": 1.5393651723861694, - "learning_rate": 7.635376884422111e-05, - "loss": 4.9948, - "step": 24036 - }, - { - "epoch": 12.535593220338983, - "grad_norm": 1.4583607912063599, - "learning_rate": 7.635276381909549e-05, - "loss": 5.6383, - "step": 24037 - }, - { - "epoch": 12.536114732724903, - "grad_norm": 1.5604584217071533, - "learning_rate": 7.635175879396985e-05, - "loss": 5.1859, - "step": 24038 - }, - { - "epoch": 12.536636245110822, - "grad_norm": 1.8999650478363037, - "learning_rate": 7.635075376884423e-05, - "loss": 4.7615, - "step": 24039 - }, - { - "epoch": 12.53715775749674, - "grad_norm": 1.4389922618865967, - "learning_rate": 7.63497487437186e-05, - "loss": 5.4352, - "step": 24040 - }, - { - "epoch": 12.53767926988266, - "grad_norm": 1.4579598903656006, - "learning_rate": 7.634874371859296e-05, - "loss": 5.2612, - "step": 24041 - }, - { - "epoch": 12.538200782268579, - "grad_norm": 1.406278133392334, - "learning_rate": 7.634773869346734e-05, - "loss": 5.8563, - "step": 24042 - }, - { - "epoch": 12.538722294654498, - "grad_norm": 1.4361164569854736, - "learning_rate": 7.634673366834171e-05, - "loss": 5.7577, - "step": 24043 - }, - { - "epoch": 12.539243807040418, - "grad_norm": 1.4122111797332764, - "learning_rate": 7.634572864321609e-05, - "loss": 5.5339, - "step": 24044 - }, - { - "epoch": 12.539765319426337, - "grad_norm": 1.5974842309951782, - "learning_rate": 7.634472361809046e-05, - "loss": 4.9756, - "step": 24045 - }, - { - "epoch": 12.540286831812256, - "grad_norm": 1.5577232837677002, - "learning_rate": 7.634371859296483e-05, - "loss": 5.0403, - "step": 24046 - }, - { - "epoch": 12.540808344198174, - "grad_norm": 1.7335478067398071, - "learning_rate": 7.63427135678392e-05, - "loss": 5.1093, - "step": 24047 - }, - { - "epoch": 12.541329856584094, - "grad_norm": 1.455938458442688, - "learning_rate": 7.634170854271358e-05, - "loss": 5.3593, - "step": 24048 - }, - { - "epoch": 12.541851368970013, - "grad_norm": 1.579758882522583, - "learning_rate": 7.634070351758794e-05, - "loss": 5.2255, - "step": 24049 - }, - { - "epoch": 12.542372881355933, - "grad_norm": 1.3703843355178833, - "learning_rate": 7.633969849246232e-05, - "loss": 5.7102, - "step": 24050 - }, - { - "epoch": 12.542894393741852, - "grad_norm": 1.5027378797531128, - "learning_rate": 7.633869346733668e-05, - "loss": 5.3348, - "step": 24051 - }, - { - "epoch": 12.54341590612777, - "grad_norm": 1.5968945026397705, - "learning_rate": 7.633768844221106e-05, - "loss": 5.0147, - "step": 24052 - }, - { - "epoch": 12.54393741851369, - "grad_norm": 1.4413731098175049, - "learning_rate": 7.633668341708542e-05, - "loss": 5.3156, - "step": 24053 - }, - { - "epoch": 12.544458930899609, - "grad_norm": 1.4975122213363647, - "learning_rate": 7.63356783919598e-05, - "loss": 5.2105, - "step": 24054 - }, - { - "epoch": 12.544980443285528, - "grad_norm": 1.6527618169784546, - "learning_rate": 7.633467336683418e-05, - "loss": 5.4189, - "step": 24055 - }, - { - "epoch": 12.545501955671448, - "grad_norm": 1.5941699743270874, - "learning_rate": 7.633366834170854e-05, - "loss": 5.2952, - "step": 24056 - }, - { - "epoch": 12.546023468057367, - "grad_norm": 1.4538296461105347, - "learning_rate": 7.633266331658292e-05, - "loss": 5.5466, - "step": 24057 - }, - { - "epoch": 12.546544980443285, - "grad_norm": 1.4288579225540161, - "learning_rate": 7.633165829145729e-05, - "loss": 5.2128, - "step": 24058 - }, - { - "epoch": 12.547066492829204, - "grad_norm": 1.4165767431259155, - "learning_rate": 7.633065326633166e-05, - "loss": 5.3245, - "step": 24059 - }, - { - "epoch": 12.547588005215124, - "grad_norm": 1.5150104761123657, - "learning_rate": 7.632964824120603e-05, - "loss": 5.2358, - "step": 24060 - }, - { - "epoch": 12.548109517601043, - "grad_norm": 1.3911129236221313, - "learning_rate": 7.63286432160804e-05, - "loss": 5.0671, - "step": 24061 - }, - { - "epoch": 12.548631029986963, - "grad_norm": 1.5053627490997314, - "learning_rate": 7.632763819095477e-05, - "loss": 5.3879, - "step": 24062 - }, - { - "epoch": 12.549152542372882, - "grad_norm": 1.4448109865188599, - "learning_rate": 7.632663316582915e-05, - "loss": 5.7534, - "step": 24063 - }, - { - "epoch": 12.5496740547588, - "grad_norm": 1.4452301263809204, - "learning_rate": 7.632562814070353e-05, - "loss": 5.4577, - "step": 24064 - }, - { - "epoch": 12.55019556714472, - "grad_norm": 1.4025191068649292, - "learning_rate": 7.63246231155779e-05, - "loss": 5.5259, - "step": 24065 - }, - { - "epoch": 12.550717079530639, - "grad_norm": 1.4997824430465698, - "learning_rate": 7.632361809045227e-05, - "loss": 5.5111, - "step": 24066 - }, - { - "epoch": 12.551238591916558, - "grad_norm": 1.4373723268508911, - "learning_rate": 7.632261306532663e-05, - "loss": 5.6024, - "step": 24067 - }, - { - "epoch": 12.551760104302478, - "grad_norm": 1.6126000881195068, - "learning_rate": 7.632160804020101e-05, - "loss": 5.2945, - "step": 24068 - }, - { - "epoch": 12.552281616688397, - "grad_norm": 1.521464467048645, - "learning_rate": 7.632060301507537e-05, - "loss": 5.0887, - "step": 24069 - }, - { - "epoch": 12.552803129074315, - "grad_norm": 1.5646284818649292, - "learning_rate": 7.631959798994975e-05, - "loss": 5.3947, - "step": 24070 - }, - { - "epoch": 12.553324641460234, - "grad_norm": 1.463364601135254, - "learning_rate": 7.631859296482412e-05, - "loss": 5.6258, - "step": 24071 - }, - { - "epoch": 12.553846153846154, - "grad_norm": 1.5667576789855957, - "learning_rate": 7.63175879396985e-05, - "loss": 5.3443, - "step": 24072 - }, - { - "epoch": 12.554367666232073, - "grad_norm": 1.327050805091858, - "learning_rate": 7.631658291457286e-05, - "loss": 5.692, - "step": 24073 - }, - { - "epoch": 12.554889178617993, - "grad_norm": 1.3768302202224731, - "learning_rate": 7.631557788944724e-05, - "loss": 5.6739, - "step": 24074 - }, - { - "epoch": 12.555410691003912, - "grad_norm": 1.4015555381774902, - "learning_rate": 7.631457286432161e-05, - "loss": 5.7283, - "step": 24075 - }, - { - "epoch": 12.55593220338983, - "grad_norm": 1.4619792699813843, - "learning_rate": 7.631356783919599e-05, - "loss": 5.6194, - "step": 24076 - }, - { - "epoch": 12.55645371577575, - "grad_norm": 1.3818918466567993, - "learning_rate": 7.631256281407036e-05, - "loss": 5.5992, - "step": 24077 - }, - { - "epoch": 12.556975228161669, - "grad_norm": 1.4130626916885376, - "learning_rate": 7.631155778894473e-05, - "loss": 5.845, - "step": 24078 - }, - { - "epoch": 12.557496740547588, - "grad_norm": 1.5316866636276245, - "learning_rate": 7.63105527638191e-05, - "loss": 5.2808, - "step": 24079 - }, - { - "epoch": 12.558018252933508, - "grad_norm": 1.4711416959762573, - "learning_rate": 7.630954773869348e-05, - "loss": 4.8518, - "step": 24080 - }, - { - "epoch": 12.558539765319427, - "grad_norm": 1.7254583835601807, - "learning_rate": 7.630854271356784e-05, - "loss": 5.1424, - "step": 24081 - }, - { - "epoch": 12.559061277705345, - "grad_norm": 1.496216893196106, - "learning_rate": 7.63075376884422e-05, - "loss": 5.3217, - "step": 24082 - }, - { - "epoch": 12.559582790091264, - "grad_norm": 1.6095421314239502, - "learning_rate": 7.630653266331658e-05, - "loss": 5.7075, - "step": 24083 - }, - { - "epoch": 12.560104302477184, - "grad_norm": 1.4454114437103271, - "learning_rate": 7.630552763819096e-05, - "loss": 5.605, - "step": 24084 - }, - { - "epoch": 12.560625814863103, - "grad_norm": 1.481951355934143, - "learning_rate": 7.630452261306534e-05, - "loss": 5.2712, - "step": 24085 - }, - { - "epoch": 12.561147327249023, - "grad_norm": 1.4036610126495361, - "learning_rate": 7.63035175879397e-05, - "loss": 5.4912, - "step": 24086 - }, - { - "epoch": 12.561668839634942, - "grad_norm": 1.4880784749984741, - "learning_rate": 7.630251256281408e-05, - "loss": 5.4649, - "step": 24087 - }, - { - "epoch": 12.56219035202086, - "grad_norm": 1.4286760091781616, - "learning_rate": 7.630150753768844e-05, - "loss": 5.1267, - "step": 24088 - }, - { - "epoch": 12.56271186440678, - "grad_norm": 1.4319508075714111, - "learning_rate": 7.630050251256282e-05, - "loss": 5.6124, - "step": 24089 - }, - { - "epoch": 12.563233376792699, - "grad_norm": 1.4441328048706055, - "learning_rate": 7.629949748743719e-05, - "loss": 5.5669, - "step": 24090 - }, - { - "epoch": 12.563754889178618, - "grad_norm": 1.4575923681259155, - "learning_rate": 7.629849246231156e-05, - "loss": 5.3625, - "step": 24091 - }, - { - "epoch": 12.564276401564538, - "grad_norm": 1.4259700775146484, - "learning_rate": 7.629748743718593e-05, - "loss": 5.6959, - "step": 24092 - }, - { - "epoch": 12.564797913950457, - "grad_norm": 1.4856853485107422, - "learning_rate": 7.62964824120603e-05, - "loss": 5.4471, - "step": 24093 - }, - { - "epoch": 12.565319426336375, - "grad_norm": 1.5089404582977295, - "learning_rate": 7.629547738693467e-05, - "loss": 5.6335, - "step": 24094 - }, - { - "epoch": 12.565840938722294, - "grad_norm": 1.4292603731155396, - "learning_rate": 7.629447236180905e-05, - "loss": 5.81, - "step": 24095 - }, - { - "epoch": 12.566362451108214, - "grad_norm": 1.414827823638916, - "learning_rate": 7.629346733668343e-05, - "loss": 5.2227, - "step": 24096 - }, - { - "epoch": 12.566883963494133, - "grad_norm": 1.5426223278045654, - "learning_rate": 7.629246231155779e-05, - "loss": 5.6166, - "step": 24097 - }, - { - "epoch": 12.567405475880053, - "grad_norm": 1.4248595237731934, - "learning_rate": 7.629145728643217e-05, - "loss": 5.7097, - "step": 24098 - }, - { - "epoch": 12.567926988265972, - "grad_norm": 1.4977900981903076, - "learning_rate": 7.629045226130653e-05, - "loss": 5.6689, - "step": 24099 - }, - { - "epoch": 12.56844850065189, - "grad_norm": 1.4485907554626465, - "learning_rate": 7.628944723618091e-05, - "loss": 5.5703, - "step": 24100 - }, - { - "epoch": 12.56897001303781, - "grad_norm": 1.3246238231658936, - "learning_rate": 7.628844221105528e-05, - "loss": 5.7299, - "step": 24101 - }, - { - "epoch": 12.569491525423729, - "grad_norm": 1.5607991218566895, - "learning_rate": 7.628743718592965e-05, - "loss": 5.0582, - "step": 24102 - }, - { - "epoch": 12.570013037809648, - "grad_norm": 1.3766207695007324, - "learning_rate": 7.628643216080402e-05, - "loss": 5.5735, - "step": 24103 - }, - { - "epoch": 12.570534550195568, - "grad_norm": 1.3481364250183105, - "learning_rate": 7.62854271356784e-05, - "loss": 5.6675, - "step": 24104 - }, - { - "epoch": 12.571056062581487, - "grad_norm": 1.4065724611282349, - "learning_rate": 7.628442211055277e-05, - "loss": 5.339, - "step": 24105 - }, - { - "epoch": 12.571577574967405, - "grad_norm": 1.3406007289886475, - "learning_rate": 7.628341708542715e-05, - "loss": 5.5998, - "step": 24106 - }, - { - "epoch": 12.572099087353324, - "grad_norm": 1.3822495937347412, - "learning_rate": 7.628241206030152e-05, - "loss": 5.6214, - "step": 24107 - }, - { - "epoch": 12.572620599739244, - "grad_norm": 1.5310370922088623, - "learning_rate": 7.628140703517588e-05, - "loss": 4.7087, - "step": 24108 - }, - { - "epoch": 12.573142112125163, - "grad_norm": 1.5979671478271484, - "learning_rate": 7.628040201005026e-05, - "loss": 5.0719, - "step": 24109 - }, - { - "epoch": 12.573663624511083, - "grad_norm": 1.4765231609344482, - "learning_rate": 7.627939698492462e-05, - "loss": 5.4973, - "step": 24110 - }, - { - "epoch": 12.574185136897002, - "grad_norm": 1.418381690979004, - "learning_rate": 7.6278391959799e-05, - "loss": 5.5665, - "step": 24111 - }, - { - "epoch": 12.57470664928292, - "grad_norm": 1.3813358545303345, - "learning_rate": 7.627738693467336e-05, - "loss": 5.7308, - "step": 24112 - }, - { - "epoch": 12.57522816166884, - "grad_norm": 1.4717144966125488, - "learning_rate": 7.627638190954774e-05, - "loss": 5.2331, - "step": 24113 - }, - { - "epoch": 12.575749674054759, - "grad_norm": 1.4739454984664917, - "learning_rate": 7.62753768844221e-05, - "loss": 5.198, - "step": 24114 - }, - { - "epoch": 12.576271186440678, - "grad_norm": 1.4204965829849243, - "learning_rate": 7.627437185929648e-05, - "loss": 5.2445, - "step": 24115 - }, - { - "epoch": 12.576792698826598, - "grad_norm": 1.4052542448043823, - "learning_rate": 7.627336683417086e-05, - "loss": 5.2774, - "step": 24116 - }, - { - "epoch": 12.577314211212517, - "grad_norm": 1.5703935623168945, - "learning_rate": 7.627236180904524e-05, - "loss": 4.8241, - "step": 24117 - }, - { - "epoch": 12.577835723598435, - "grad_norm": 1.3988990783691406, - "learning_rate": 7.62713567839196e-05, - "loss": 5.2675, - "step": 24118 - }, - { - "epoch": 12.578357235984354, - "grad_norm": 1.496049404144287, - "learning_rate": 7.627035175879398e-05, - "loss": 5.4987, - "step": 24119 - }, - { - "epoch": 12.578878748370274, - "grad_norm": 1.5364041328430176, - "learning_rate": 7.626934673366835e-05, - "loss": 5.3965, - "step": 24120 - }, - { - "epoch": 12.579400260756193, - "grad_norm": 1.3627725839614868, - "learning_rate": 7.626834170854271e-05, - "loss": 5.2062, - "step": 24121 - }, - { - "epoch": 12.579921773142113, - "grad_norm": 1.5016710758209229, - "learning_rate": 7.626733668341709e-05, - "loss": 4.7963, - "step": 24122 - }, - { - "epoch": 12.58044328552803, - "grad_norm": 1.4281171560287476, - "learning_rate": 7.626633165829145e-05, - "loss": 5.7446, - "step": 24123 - }, - { - "epoch": 12.58096479791395, - "grad_norm": 1.4082679748535156, - "learning_rate": 7.626532663316583e-05, - "loss": 5.7332, - "step": 24124 - }, - { - "epoch": 12.58148631029987, - "grad_norm": 1.4607770442962646, - "learning_rate": 7.626432160804021e-05, - "loss": 5.6527, - "step": 24125 - }, - { - "epoch": 12.582007822685789, - "grad_norm": 1.447011947631836, - "learning_rate": 7.626331658291459e-05, - "loss": 5.2131, - "step": 24126 - }, - { - "epoch": 12.582529335071708, - "grad_norm": 1.355242371559143, - "learning_rate": 7.626231155778895e-05, - "loss": 5.8521, - "step": 24127 - }, - { - "epoch": 12.583050847457628, - "grad_norm": 1.4031840562820435, - "learning_rate": 7.626130653266333e-05, - "loss": 5.6994, - "step": 24128 - }, - { - "epoch": 12.583572359843547, - "grad_norm": 1.4380722045898438, - "learning_rate": 7.626030150753769e-05, - "loss": 5.2785, - "step": 24129 - }, - { - "epoch": 12.584093872229465, - "grad_norm": 1.5059834718704224, - "learning_rate": 7.625929648241207e-05, - "loss": 5.2479, - "step": 24130 - }, - { - "epoch": 12.584615384615384, - "grad_norm": 1.5301505327224731, - "learning_rate": 7.625829145728643e-05, - "loss": 5.0478, - "step": 24131 - }, - { - "epoch": 12.585136897001304, - "grad_norm": 1.5937585830688477, - "learning_rate": 7.625728643216081e-05, - "loss": 5.5311, - "step": 24132 - }, - { - "epoch": 12.585658409387223, - "grad_norm": 1.4250609874725342, - "learning_rate": 7.625628140703518e-05, - "loss": 5.2276, - "step": 24133 - }, - { - "epoch": 12.586179921773143, - "grad_norm": 1.439974308013916, - "learning_rate": 7.625527638190954e-05, - "loss": 5.6186, - "step": 24134 - }, - { - "epoch": 12.58670143415906, - "grad_norm": 1.5494141578674316, - "learning_rate": 7.625427135678392e-05, - "loss": 5.4496, - "step": 24135 - }, - { - "epoch": 12.58722294654498, - "grad_norm": 1.498809576034546, - "learning_rate": 7.62532663316583e-05, - "loss": 5.0745, - "step": 24136 - }, - { - "epoch": 12.5877444589309, - "grad_norm": 1.4965367317199707, - "learning_rate": 7.625226130653267e-05, - "loss": 5.1224, - "step": 24137 - }, - { - "epoch": 12.588265971316819, - "grad_norm": 1.5342637300491333, - "learning_rate": 7.625125628140704e-05, - "loss": 5.1519, - "step": 24138 - }, - { - "epoch": 12.588787483702738, - "grad_norm": 1.491075873374939, - "learning_rate": 7.625025125628142e-05, - "loss": 5.3477, - "step": 24139 - }, - { - "epoch": 12.589308996088658, - "grad_norm": 1.3220551013946533, - "learning_rate": 7.624924623115578e-05, - "loss": 5.7869, - "step": 24140 - }, - { - "epoch": 12.589830508474577, - "grad_norm": 1.4122811555862427, - "learning_rate": 7.624824120603016e-05, - "loss": 5.226, - "step": 24141 - }, - { - "epoch": 12.590352020860495, - "grad_norm": 1.4936463832855225, - "learning_rate": 7.624723618090452e-05, - "loss": 5.7548, - "step": 24142 - }, - { - "epoch": 12.590873533246414, - "grad_norm": 1.5473060607910156, - "learning_rate": 7.62462311557789e-05, - "loss": 5.0599, - "step": 24143 - }, - { - "epoch": 12.591395045632334, - "grad_norm": 1.491936206817627, - "learning_rate": 7.624522613065326e-05, - "loss": 5.5759, - "step": 24144 - }, - { - "epoch": 12.591916558018253, - "grad_norm": 1.4325995445251465, - "learning_rate": 7.624422110552764e-05, - "loss": 5.3913, - "step": 24145 - }, - { - "epoch": 12.592438070404173, - "grad_norm": 1.6516683101654053, - "learning_rate": 7.624321608040202e-05, - "loss": 4.3424, - "step": 24146 - }, - { - "epoch": 12.59295958279009, - "grad_norm": 1.422881007194519, - "learning_rate": 7.624221105527638e-05, - "loss": 5.312, - "step": 24147 - }, - { - "epoch": 12.59348109517601, - "grad_norm": 1.4374313354492188, - "learning_rate": 7.624120603015076e-05, - "loss": 5.4347, - "step": 24148 - }, - { - "epoch": 12.59400260756193, - "grad_norm": 1.5058659315109253, - "learning_rate": 7.624020100502513e-05, - "loss": 4.9774, - "step": 24149 - }, - { - "epoch": 12.594524119947849, - "grad_norm": 1.4113134145736694, - "learning_rate": 7.62391959798995e-05, - "loss": 5.6875, - "step": 24150 - }, - { - "epoch": 12.595045632333768, - "grad_norm": 1.4116578102111816, - "learning_rate": 7.623819095477387e-05, - "loss": 5.1119, - "step": 24151 - }, - { - "epoch": 12.595567144719688, - "grad_norm": 1.4490816593170166, - "learning_rate": 7.623718592964825e-05, - "loss": 5.3124, - "step": 24152 - }, - { - "epoch": 12.596088657105605, - "grad_norm": 1.430983543395996, - "learning_rate": 7.623618090452261e-05, - "loss": 5.4707, - "step": 24153 - }, - { - "epoch": 12.596610169491525, - "grad_norm": 1.5544211864471436, - "learning_rate": 7.623517587939699e-05, - "loss": 5.1152, - "step": 24154 - }, - { - "epoch": 12.597131681877444, - "grad_norm": 1.4214533567428589, - "learning_rate": 7.623417085427135e-05, - "loss": 5.2389, - "step": 24155 - }, - { - "epoch": 12.597653194263364, - "grad_norm": 1.6084270477294922, - "learning_rate": 7.623316582914573e-05, - "loss": 5.1936, - "step": 24156 - }, - { - "epoch": 12.598174706649283, - "grad_norm": 1.411137580871582, - "learning_rate": 7.623216080402011e-05, - "loss": 5.2389, - "step": 24157 - }, - { - "epoch": 12.598696219035203, - "grad_norm": 1.390541911125183, - "learning_rate": 7.623115577889449e-05, - "loss": 5.0296, - "step": 24158 - }, - { - "epoch": 12.59921773142112, - "grad_norm": 1.5025936365127563, - "learning_rate": 7.623015075376885e-05, - "loss": 5.0984, - "step": 24159 - }, - { - "epoch": 12.59973924380704, - "grad_norm": 1.4402844905853271, - "learning_rate": 7.622914572864321e-05, - "loss": 5.108, - "step": 24160 - }, - { - "epoch": 12.60026075619296, - "grad_norm": 1.3617522716522217, - "learning_rate": 7.622814070351759e-05, - "loss": 5.5347, - "step": 24161 - }, - { - "epoch": 12.600782268578879, - "grad_norm": 1.3534892797470093, - "learning_rate": 7.622713567839196e-05, - "loss": 5.1745, - "step": 24162 - }, - { - "epoch": 12.601303780964798, - "grad_norm": 1.454686164855957, - "learning_rate": 7.622613065326633e-05, - "loss": 5.5468, - "step": 24163 - }, - { - "epoch": 12.601825293350718, - "grad_norm": 1.3817729949951172, - "learning_rate": 7.62251256281407e-05, - "loss": 5.4126, - "step": 24164 - }, - { - "epoch": 12.602346805736635, - "grad_norm": 1.4556024074554443, - "learning_rate": 7.622412060301508e-05, - "loss": 4.6736, - "step": 24165 - }, - { - "epoch": 12.602868318122555, - "grad_norm": 1.3483344316482544, - "learning_rate": 7.622311557788945e-05, - "loss": 5.7941, - "step": 24166 - }, - { - "epoch": 12.603389830508474, - "grad_norm": 1.5422853231430054, - "learning_rate": 7.622211055276383e-05, - "loss": 5.2449, - "step": 24167 - }, - { - "epoch": 12.603911342894394, - "grad_norm": 1.4208157062530518, - "learning_rate": 7.62211055276382e-05, - "loss": 5.4183, - "step": 24168 - }, - { - "epoch": 12.604432855280313, - "grad_norm": 1.514634370803833, - "learning_rate": 7.622010050251257e-05, - "loss": 4.761, - "step": 24169 - }, - { - "epoch": 12.604954367666233, - "grad_norm": 1.5216585397720337, - "learning_rate": 7.621909547738694e-05, - "loss": 4.8503, - "step": 24170 - }, - { - "epoch": 12.60547588005215, - "grad_norm": 1.4797309637069702, - "learning_rate": 7.621809045226132e-05, - "loss": 5.4263, - "step": 24171 - }, - { - "epoch": 12.60599739243807, - "grad_norm": 1.4402559995651245, - "learning_rate": 7.621708542713568e-05, - "loss": 5.6771, - "step": 24172 - }, - { - "epoch": 12.60651890482399, - "grad_norm": 1.4043331146240234, - "learning_rate": 7.621608040201006e-05, - "loss": 5.6299, - "step": 24173 - }, - { - "epoch": 12.607040417209909, - "grad_norm": 1.594487190246582, - "learning_rate": 7.621507537688442e-05, - "loss": 5.0859, - "step": 24174 - }, - { - "epoch": 12.607561929595828, - "grad_norm": 1.631951928138733, - "learning_rate": 7.621407035175879e-05, - "loss": 5.3209, - "step": 24175 - }, - { - "epoch": 12.608083441981748, - "grad_norm": 1.357652187347412, - "learning_rate": 7.621306532663317e-05, - "loss": 5.9911, - "step": 24176 - }, - { - "epoch": 12.608604954367665, - "grad_norm": 1.6444132328033447, - "learning_rate": 7.621206030150754e-05, - "loss": 5.2708, - "step": 24177 - }, - { - "epoch": 12.609126466753585, - "grad_norm": 1.508195400238037, - "learning_rate": 7.621105527638192e-05, - "loss": 5.7682, - "step": 24178 - }, - { - "epoch": 12.609647979139504, - "grad_norm": 1.5563690662384033, - "learning_rate": 7.621005025125628e-05, - "loss": 5.2938, - "step": 24179 - }, - { - "epoch": 12.610169491525424, - "grad_norm": 1.2951006889343262, - "learning_rate": 7.620904522613066e-05, - "loss": 5.3951, - "step": 24180 - }, - { - "epoch": 12.610691003911343, - "grad_norm": 1.586787223815918, - "learning_rate": 7.620804020100503e-05, - "loss": 5.3108, - "step": 24181 - }, - { - "epoch": 12.611212516297263, - "grad_norm": 1.4401277303695679, - "learning_rate": 7.62070351758794e-05, - "loss": 5.3007, - "step": 24182 - }, - { - "epoch": 12.61173402868318, - "grad_norm": 1.432283639907837, - "learning_rate": 7.620603015075377e-05, - "loss": 5.3844, - "step": 24183 - }, - { - "epoch": 12.6122555410691, - "grad_norm": 1.4901829957962036, - "learning_rate": 7.620502512562815e-05, - "loss": 5.3679, - "step": 24184 - }, - { - "epoch": 12.61277705345502, - "grad_norm": 1.4038480520248413, - "learning_rate": 7.620402010050251e-05, - "loss": 5.6167, - "step": 24185 - }, - { - "epoch": 12.613298565840939, - "grad_norm": 1.417486548423767, - "learning_rate": 7.620301507537689e-05, - "loss": 5.289, - "step": 24186 - }, - { - "epoch": 12.613820078226858, - "grad_norm": 1.3204275369644165, - "learning_rate": 7.620201005025125e-05, - "loss": 5.5602, - "step": 24187 - }, - { - "epoch": 12.614341590612778, - "grad_norm": 1.4863862991333008, - "learning_rate": 7.620100502512563e-05, - "loss": 5.427, - "step": 24188 - }, - { - "epoch": 12.614863102998696, - "grad_norm": 1.3843861818313599, - "learning_rate": 7.620000000000001e-05, - "loss": 5.6665, - "step": 24189 - }, - { - "epoch": 12.615384615384615, - "grad_norm": 1.4128495454788208, - "learning_rate": 7.619899497487437e-05, - "loss": 5.6004, - "step": 24190 - }, - { - "epoch": 12.615906127770534, - "grad_norm": 1.4508839845657349, - "learning_rate": 7.619798994974875e-05, - "loss": 5.5374, - "step": 24191 - }, - { - "epoch": 12.616427640156454, - "grad_norm": 1.3601515293121338, - "learning_rate": 7.619698492462312e-05, - "loss": 5.6222, - "step": 24192 - }, - { - "epoch": 12.616949152542373, - "grad_norm": 1.5514222383499146, - "learning_rate": 7.61959798994975e-05, - "loss": 5.2341, - "step": 24193 - }, - { - "epoch": 12.617470664928293, - "grad_norm": 1.3885352611541748, - "learning_rate": 7.619497487437186e-05, - "loss": 5.6994, - "step": 24194 - }, - { - "epoch": 12.61799217731421, - "grad_norm": 1.531398892402649, - "learning_rate": 7.619396984924624e-05, - "loss": 4.3476, - "step": 24195 - }, - { - "epoch": 12.61851368970013, - "grad_norm": 1.4559667110443115, - "learning_rate": 7.61929648241206e-05, - "loss": 5.5837, - "step": 24196 - }, - { - "epoch": 12.61903520208605, - "grad_norm": 1.4265440702438354, - "learning_rate": 7.619195979899498e-05, - "loss": 5.2975, - "step": 24197 - }, - { - "epoch": 12.619556714471969, - "grad_norm": 1.3890931606292725, - "learning_rate": 7.619095477386936e-05, - "loss": 5.7354, - "step": 24198 - }, - { - "epoch": 12.620078226857888, - "grad_norm": 1.4274128675460815, - "learning_rate": 7.618994974874373e-05, - "loss": 5.22, - "step": 24199 - }, - { - "epoch": 12.620599739243808, - "grad_norm": 1.4132174253463745, - "learning_rate": 7.61889447236181e-05, - "loss": 5.011, - "step": 24200 - }, - { - "epoch": 12.621121251629726, - "grad_norm": 1.4926615953445435, - "learning_rate": 7.618793969849246e-05, - "loss": 5.4963, - "step": 24201 - }, - { - "epoch": 12.621642764015645, - "grad_norm": 1.6381510496139526, - "learning_rate": 7.618693467336684e-05, - "loss": 5.3921, - "step": 24202 - }, - { - "epoch": 12.622164276401564, - "grad_norm": 1.4361557960510254, - "learning_rate": 7.61859296482412e-05, - "loss": 5.6453, - "step": 24203 - }, - { - "epoch": 12.622685788787484, - "grad_norm": 1.5244101285934448, - "learning_rate": 7.618492462311558e-05, - "loss": 5.7121, - "step": 24204 - }, - { - "epoch": 12.623207301173403, - "grad_norm": 1.4404422044754028, - "learning_rate": 7.618391959798995e-05, - "loss": 5.801, - "step": 24205 - }, - { - "epoch": 12.623728813559323, - "grad_norm": 1.3525457382202148, - "learning_rate": 7.618291457286432e-05, - "loss": 5.5174, - "step": 24206 - }, - { - "epoch": 12.62425032594524, - "grad_norm": 1.478474497795105, - "learning_rate": 7.618190954773869e-05, - "loss": 5.5023, - "step": 24207 - }, - { - "epoch": 12.62477183833116, - "grad_norm": 1.5552101135253906, - "learning_rate": 7.618090452261307e-05, - "loss": 5.1016, - "step": 24208 - }, - { - "epoch": 12.62529335071708, - "grad_norm": 1.5484611988067627, - "learning_rate": 7.617989949748744e-05, - "loss": 4.7616, - "step": 24209 - }, - { - "epoch": 12.625814863102999, - "grad_norm": 1.5170210599899292, - "learning_rate": 7.617889447236182e-05, - "loss": 5.2322, - "step": 24210 - }, - { - "epoch": 12.626336375488918, - "grad_norm": 1.5304886102676392, - "learning_rate": 7.617788944723619e-05, - "loss": 5.1984, - "step": 24211 - }, - { - "epoch": 12.626857887874838, - "grad_norm": 1.483656406402588, - "learning_rate": 7.617688442211056e-05, - "loss": 5.0748, - "step": 24212 - }, - { - "epoch": 12.627379400260756, - "grad_norm": 1.4101771116256714, - "learning_rate": 7.617587939698493e-05, - "loss": 5.7327, - "step": 24213 - }, - { - "epoch": 12.627900912646675, - "grad_norm": 1.4998592138290405, - "learning_rate": 7.617487437185929e-05, - "loss": 5.3793, - "step": 24214 - }, - { - "epoch": 12.628422425032594, - "grad_norm": 1.4013618230819702, - "learning_rate": 7.617386934673367e-05, - "loss": 5.4732, - "step": 24215 - }, - { - "epoch": 12.628943937418514, - "grad_norm": 1.4045625925064087, - "learning_rate": 7.617286432160803e-05, - "loss": 5.621, - "step": 24216 - }, - { - "epoch": 12.629465449804433, - "grad_norm": 1.4038751125335693, - "learning_rate": 7.617185929648241e-05, - "loss": 5.324, - "step": 24217 - }, - { - "epoch": 12.629986962190351, - "grad_norm": 1.4875398874282837, - "learning_rate": 7.617085427135679e-05, - "loss": 4.9792, - "step": 24218 - }, - { - "epoch": 12.63050847457627, - "grad_norm": 1.4283933639526367, - "learning_rate": 7.616984924623117e-05, - "loss": 5.4929, - "step": 24219 - }, - { - "epoch": 12.63102998696219, - "grad_norm": 1.3719485998153687, - "learning_rate": 7.616884422110553e-05, - "loss": 5.4388, - "step": 24220 - }, - { - "epoch": 12.63155149934811, - "grad_norm": 1.5548839569091797, - "learning_rate": 7.616783919597991e-05, - "loss": 5.3351, - "step": 24221 - }, - { - "epoch": 12.632073011734029, - "grad_norm": 1.5367653369903564, - "learning_rate": 7.616683417085427e-05, - "loss": 5.122, - "step": 24222 - }, - { - "epoch": 12.632594524119948, - "grad_norm": 1.6774739027023315, - "learning_rate": 7.616582914572865e-05, - "loss": 5.4308, - "step": 24223 - }, - { - "epoch": 12.633116036505868, - "grad_norm": 1.422676682472229, - "learning_rate": 7.616482412060302e-05, - "loss": 5.3742, - "step": 24224 - }, - { - "epoch": 12.633637548891786, - "grad_norm": 1.4175447225570679, - "learning_rate": 7.61638190954774e-05, - "loss": 5.5007, - "step": 24225 - }, - { - "epoch": 12.634159061277705, - "grad_norm": 1.3326914310455322, - "learning_rate": 7.616281407035176e-05, - "loss": 5.4993, - "step": 24226 - }, - { - "epoch": 12.634680573663625, - "grad_norm": 1.5058164596557617, - "learning_rate": 7.616180904522612e-05, - "loss": 5.1884, - "step": 24227 - }, - { - "epoch": 12.635202086049544, - "grad_norm": 1.7090483903884888, - "learning_rate": 7.61608040201005e-05, - "loss": 4.4624, - "step": 24228 - }, - { - "epoch": 12.635723598435463, - "grad_norm": 1.3659987449645996, - "learning_rate": 7.615979899497488e-05, - "loss": 5.7738, - "step": 24229 - }, - { - "epoch": 12.636245110821381, - "grad_norm": 1.4246097803115845, - "learning_rate": 7.615879396984926e-05, - "loss": 5.5055, - "step": 24230 - }, - { - "epoch": 12.6367666232073, - "grad_norm": 1.437363624572754, - "learning_rate": 7.615778894472362e-05, - "loss": 5.1862, - "step": 24231 - }, - { - "epoch": 12.63728813559322, - "grad_norm": 1.4266610145568848, - "learning_rate": 7.6156783919598e-05, - "loss": 5.3939, - "step": 24232 - }, - { - "epoch": 12.63780964797914, - "grad_norm": 1.3684862852096558, - "learning_rate": 7.615577889447236e-05, - "loss": 5.3784, - "step": 24233 - }, - { - "epoch": 12.638331160365059, - "grad_norm": 1.4450666904449463, - "learning_rate": 7.615477386934674e-05, - "loss": 5.3802, - "step": 24234 - }, - { - "epoch": 12.638852672750978, - "grad_norm": 1.430763602256775, - "learning_rate": 7.61537688442211e-05, - "loss": 5.3984, - "step": 24235 - }, - { - "epoch": 12.639374185136898, - "grad_norm": 1.4839718341827393, - "learning_rate": 7.615276381909548e-05, - "loss": 5.4469, - "step": 24236 - }, - { - "epoch": 12.639895697522816, - "grad_norm": 1.4265727996826172, - "learning_rate": 7.615175879396985e-05, - "loss": 5.4513, - "step": 24237 - }, - { - "epoch": 12.640417209908735, - "grad_norm": 1.4096124172210693, - "learning_rate": 7.615075376884422e-05, - "loss": 5.3273, - "step": 24238 - }, - { - "epoch": 12.640938722294655, - "grad_norm": 1.3463743925094604, - "learning_rate": 7.61497487437186e-05, - "loss": 5.6453, - "step": 24239 - }, - { - "epoch": 12.641460234680574, - "grad_norm": 1.399750828742981, - "learning_rate": 7.614874371859297e-05, - "loss": 5.2865, - "step": 24240 - }, - { - "epoch": 12.641981747066493, - "grad_norm": 1.2925084829330444, - "learning_rate": 7.614773869346734e-05, - "loss": 4.9909, - "step": 24241 - }, - { - "epoch": 12.642503259452411, - "grad_norm": 1.5188477039337158, - "learning_rate": 7.614673366834171e-05, - "loss": 5.5364, - "step": 24242 - }, - { - "epoch": 12.64302477183833, - "grad_norm": 1.3568624258041382, - "learning_rate": 7.614572864321609e-05, - "loss": 5.6568, - "step": 24243 - }, - { - "epoch": 12.64354628422425, - "grad_norm": 1.3039069175720215, - "learning_rate": 7.614472361809045e-05, - "loss": 5.6859, - "step": 24244 - }, - { - "epoch": 12.64406779661017, - "grad_norm": 1.5273234844207764, - "learning_rate": 7.614371859296483e-05, - "loss": 5.3488, - "step": 24245 - }, - { - "epoch": 12.644589308996089, - "grad_norm": 1.4715096950531006, - "learning_rate": 7.614271356783919e-05, - "loss": 5.1486, - "step": 24246 - }, - { - "epoch": 12.645110821382008, - "grad_norm": 1.4775631427764893, - "learning_rate": 7.614170854271357e-05, - "loss": 5.4399, - "step": 24247 - }, - { - "epoch": 12.645632333767926, - "grad_norm": 1.5695005655288696, - "learning_rate": 7.614070351758793e-05, - "loss": 5.3482, - "step": 24248 - }, - { - "epoch": 12.646153846153846, - "grad_norm": 1.4540103673934937, - "learning_rate": 7.613969849246231e-05, - "loss": 5.7559, - "step": 24249 - }, - { - "epoch": 12.646675358539765, - "grad_norm": 1.5203204154968262, - "learning_rate": 7.613869346733669e-05, - "loss": 5.769, - "step": 24250 - }, - { - "epoch": 12.647196870925685, - "grad_norm": 1.4782140254974365, - "learning_rate": 7.613768844221107e-05, - "loss": 5.5672, - "step": 24251 - }, - { - "epoch": 12.647718383311604, - "grad_norm": 1.4632525444030762, - "learning_rate": 7.613668341708543e-05, - "loss": 5.4666, - "step": 24252 - }, - { - "epoch": 12.648239895697523, - "grad_norm": 1.3034495115280151, - "learning_rate": 7.61356783919598e-05, - "loss": 5.9652, - "step": 24253 - }, - { - "epoch": 12.648761408083441, - "grad_norm": 1.5588845014572144, - "learning_rate": 7.613467336683417e-05, - "loss": 5.5238, - "step": 24254 - }, - { - "epoch": 12.64928292046936, - "grad_norm": 1.4894484281539917, - "learning_rate": 7.613366834170854e-05, - "loss": 5.2221, - "step": 24255 - }, - { - "epoch": 12.64980443285528, - "grad_norm": 1.368666172027588, - "learning_rate": 7.613266331658292e-05, - "loss": 5.8006, - "step": 24256 - }, - { - "epoch": 12.6503259452412, - "grad_norm": 1.449100136756897, - "learning_rate": 7.613165829145728e-05, - "loss": 5.4617, - "step": 24257 - }, - { - "epoch": 12.650847457627119, - "grad_norm": 1.4456567764282227, - "learning_rate": 7.613065326633166e-05, - "loss": 5.7216, - "step": 24258 - }, - { - "epoch": 12.651368970013039, - "grad_norm": 1.4415283203125, - "learning_rate": 7.612964824120604e-05, - "loss": 5.6801, - "step": 24259 - }, - { - "epoch": 12.651890482398956, - "grad_norm": 1.4767874479293823, - "learning_rate": 7.612864321608041e-05, - "loss": 5.0167, - "step": 24260 - }, - { - "epoch": 12.652411994784876, - "grad_norm": 1.4466893672943115, - "learning_rate": 7.612763819095478e-05, - "loss": 4.8332, - "step": 24261 - }, - { - "epoch": 12.652933507170795, - "grad_norm": 1.4080402851104736, - "learning_rate": 7.612663316582916e-05, - "loss": 5.5815, - "step": 24262 - }, - { - "epoch": 12.653455019556715, - "grad_norm": 1.4494566917419434, - "learning_rate": 7.612562814070352e-05, - "loss": 5.0936, - "step": 24263 - }, - { - "epoch": 12.653976531942634, - "grad_norm": 1.4056930541992188, - "learning_rate": 7.61246231155779e-05, - "loss": 5.5623, - "step": 24264 - }, - { - "epoch": 12.654498044328554, - "grad_norm": 1.362209439277649, - "learning_rate": 7.612361809045226e-05, - "loss": 5.0709, - "step": 24265 - }, - { - "epoch": 12.655019556714471, - "grad_norm": 1.5332584381103516, - "learning_rate": 7.612261306532664e-05, - "loss": 5.5733, - "step": 24266 - }, - { - "epoch": 12.65554106910039, - "grad_norm": 1.4077486991882324, - "learning_rate": 7.6121608040201e-05, - "loss": 5.2072, - "step": 24267 - }, - { - "epoch": 12.65606258148631, - "grad_norm": 1.5075082778930664, - "learning_rate": 7.612060301507537e-05, - "loss": 5.2357, - "step": 24268 - }, - { - "epoch": 12.65658409387223, - "grad_norm": 1.445037841796875, - "learning_rate": 7.611959798994975e-05, - "loss": 5.385, - "step": 24269 - }, - { - "epoch": 12.657105606258149, - "grad_norm": 1.3983641862869263, - "learning_rate": 7.611859296482413e-05, - "loss": 5.1941, - "step": 24270 - }, - { - "epoch": 12.657627118644069, - "grad_norm": 1.421000361442566, - "learning_rate": 7.61175879396985e-05, - "loss": 5.3457, - "step": 24271 - }, - { - "epoch": 12.658148631029986, - "grad_norm": 1.534568190574646, - "learning_rate": 7.611658291457287e-05, - "loss": 5.2769, - "step": 24272 - }, - { - "epoch": 12.658670143415906, - "grad_norm": 1.410872220993042, - "learning_rate": 7.611557788944725e-05, - "loss": 5.0188, - "step": 24273 - }, - { - "epoch": 12.659191655801825, - "grad_norm": 1.5291202068328857, - "learning_rate": 7.611457286432161e-05, - "loss": 5.5128, - "step": 24274 - }, - { - "epoch": 12.659713168187745, - "grad_norm": 1.5109515190124512, - "learning_rate": 7.611356783919599e-05, - "loss": 5.331, - "step": 24275 - }, - { - "epoch": 12.660234680573664, - "grad_norm": 1.4862571954727173, - "learning_rate": 7.611256281407035e-05, - "loss": 5.2865, - "step": 24276 - }, - { - "epoch": 12.660756192959584, - "grad_norm": 1.642731785774231, - "learning_rate": 7.611155778894473e-05, - "loss": 5.5275, - "step": 24277 - }, - { - "epoch": 12.661277705345501, - "grad_norm": 1.6462132930755615, - "learning_rate": 7.61105527638191e-05, - "loss": 4.8973, - "step": 24278 - }, - { - "epoch": 12.66179921773142, - "grad_norm": 1.3594563007354736, - "learning_rate": 7.610954773869347e-05, - "loss": 5.7134, - "step": 24279 - }, - { - "epoch": 12.66232073011734, - "grad_norm": 1.4011937379837036, - "learning_rate": 7.610854271356785e-05, - "loss": 5.6743, - "step": 24280 - }, - { - "epoch": 12.66284224250326, - "grad_norm": 1.3196862936019897, - "learning_rate": 7.610753768844221e-05, - "loss": 5.9021, - "step": 24281 - }, - { - "epoch": 12.663363754889179, - "grad_norm": 1.422443151473999, - "learning_rate": 7.610653266331659e-05, - "loss": 5.2851, - "step": 24282 - }, - { - "epoch": 12.663885267275099, - "grad_norm": 1.485835075378418, - "learning_rate": 7.610552763819096e-05, - "loss": 5.1623, - "step": 24283 - }, - { - "epoch": 12.664406779661016, - "grad_norm": 1.583411455154419, - "learning_rate": 7.610452261306533e-05, - "loss": 5.2727, - "step": 24284 - }, - { - "epoch": 12.664928292046936, - "grad_norm": 1.4471313953399658, - "learning_rate": 7.61035175879397e-05, - "loss": 5.3968, - "step": 24285 - }, - { - "epoch": 12.665449804432855, - "grad_norm": 1.526421070098877, - "learning_rate": 7.610251256281408e-05, - "loss": 5.4136, - "step": 24286 - }, - { - "epoch": 12.665971316818775, - "grad_norm": 1.6667462587356567, - "learning_rate": 7.610150753768844e-05, - "loss": 5.5285, - "step": 24287 - }, - { - "epoch": 12.666492829204694, - "grad_norm": 1.3714112043380737, - "learning_rate": 7.610050251256282e-05, - "loss": 5.5651, - "step": 24288 - }, - { - "epoch": 12.667014341590614, - "grad_norm": 1.4060344696044922, - "learning_rate": 7.609949748743718e-05, - "loss": 5.3768, - "step": 24289 - }, - { - "epoch": 12.667535853976531, - "grad_norm": 1.5278794765472412, - "learning_rate": 7.609849246231156e-05, - "loss": 5.612, - "step": 24290 - }, - { - "epoch": 12.66805736636245, - "grad_norm": 1.4664849042892456, - "learning_rate": 7.609748743718594e-05, - "loss": 5.4703, - "step": 24291 - }, - { - "epoch": 12.66857887874837, - "grad_norm": 1.540303349494934, - "learning_rate": 7.609648241206032e-05, - "loss": 5.2535, - "step": 24292 - }, - { - "epoch": 12.66910039113429, - "grad_norm": 1.4502711296081543, - "learning_rate": 7.609547738693468e-05, - "loss": 5.5521, - "step": 24293 - }, - { - "epoch": 12.66962190352021, - "grad_norm": 1.422184944152832, - "learning_rate": 7.609447236180904e-05, - "loss": 5.4656, - "step": 24294 - }, - { - "epoch": 12.670143415906129, - "grad_norm": 1.4040900468826294, - "learning_rate": 7.609346733668342e-05, - "loss": 5.6685, - "step": 24295 - }, - { - "epoch": 12.670664928292046, - "grad_norm": 1.3601429462432861, - "learning_rate": 7.609246231155779e-05, - "loss": 5.7537, - "step": 24296 - }, - { - "epoch": 12.671186440677966, - "grad_norm": 1.3180136680603027, - "learning_rate": 7.609145728643216e-05, - "loss": 5.6173, - "step": 24297 - }, - { - "epoch": 12.671707953063885, - "grad_norm": 1.4264917373657227, - "learning_rate": 7.609045226130653e-05, - "loss": 5.591, - "step": 24298 - }, - { - "epoch": 12.672229465449805, - "grad_norm": 1.341850996017456, - "learning_rate": 7.60894472361809e-05, - "loss": 5.6738, - "step": 24299 - }, - { - "epoch": 12.672750977835724, - "grad_norm": 1.4943071603775024, - "learning_rate": 7.608844221105528e-05, - "loss": 5.3389, - "step": 24300 - }, - { - "epoch": 12.673272490221644, - "grad_norm": 1.4800583124160767, - "learning_rate": 7.608743718592966e-05, - "loss": 5.2033, - "step": 24301 - }, - { - "epoch": 12.673794002607561, - "grad_norm": 1.4654072523117065, - "learning_rate": 7.608643216080403e-05, - "loss": 5.293, - "step": 24302 - }, - { - "epoch": 12.67431551499348, - "grad_norm": 1.5404644012451172, - "learning_rate": 7.60854271356784e-05, - "loss": 5.7291, - "step": 24303 - }, - { - "epoch": 12.6748370273794, - "grad_norm": 1.4334239959716797, - "learning_rate": 7.608442211055277e-05, - "loss": 5.7201, - "step": 24304 - }, - { - "epoch": 12.67535853976532, - "grad_norm": 1.5415775775909424, - "learning_rate": 7.608341708542715e-05, - "loss": 5.176, - "step": 24305 - }, - { - "epoch": 12.67588005215124, - "grad_norm": 1.392251968383789, - "learning_rate": 7.608241206030151e-05, - "loss": 5.2615, - "step": 24306 - }, - { - "epoch": 12.676401564537159, - "grad_norm": 1.441267490386963, - "learning_rate": 7.608140703517587e-05, - "loss": 5.5743, - "step": 24307 - }, - { - "epoch": 12.676923076923076, - "grad_norm": 1.4565941095352173, - "learning_rate": 7.608040201005025e-05, - "loss": 5.8273, - "step": 24308 - }, - { - "epoch": 12.677444589308996, - "grad_norm": 1.5276778936386108, - "learning_rate": 7.607939698492462e-05, - "loss": 5.0985, - "step": 24309 - }, - { - "epoch": 12.677966101694915, - "grad_norm": 1.5106818675994873, - "learning_rate": 7.6078391959799e-05, - "loss": 5.6026, - "step": 24310 - }, - { - "epoch": 12.678487614080835, - "grad_norm": 1.4590986967086792, - "learning_rate": 7.607738693467337e-05, - "loss": 5.7015, - "step": 24311 - }, - { - "epoch": 12.679009126466754, - "grad_norm": 1.4629194736480713, - "learning_rate": 7.607638190954775e-05, - "loss": 5.5036, - "step": 24312 - }, - { - "epoch": 12.679530638852672, - "grad_norm": 1.3608206510543823, - "learning_rate": 7.607537688442211e-05, - "loss": 5.9413, - "step": 24313 - }, - { - "epoch": 12.680052151238591, - "grad_norm": 1.6010980606079102, - "learning_rate": 7.607437185929649e-05, - "loss": 5.2212, - "step": 24314 - }, - { - "epoch": 12.68057366362451, - "grad_norm": 1.4699190855026245, - "learning_rate": 7.607336683417086e-05, - "loss": 5.6009, - "step": 24315 - }, - { - "epoch": 12.68109517601043, - "grad_norm": 1.3533704280853271, - "learning_rate": 7.607236180904523e-05, - "loss": 5.8968, - "step": 24316 - }, - { - "epoch": 12.68161668839635, - "grad_norm": 1.510364055633545, - "learning_rate": 7.60713567839196e-05, - "loss": 5.5867, - "step": 24317 - }, - { - "epoch": 12.68213820078227, - "grad_norm": 1.5813214778900146, - "learning_rate": 7.607035175879398e-05, - "loss": 5.2547, - "step": 24318 - }, - { - "epoch": 12.682659713168189, - "grad_norm": 1.4414528608322144, - "learning_rate": 7.606934673366834e-05, - "loss": 5.7396, - "step": 24319 - }, - { - "epoch": 12.683181225554106, - "grad_norm": 1.5756957530975342, - "learning_rate": 7.606834170854272e-05, - "loss": 5.4813, - "step": 24320 - }, - { - "epoch": 12.683702737940026, - "grad_norm": 1.5599771738052368, - "learning_rate": 7.60673366834171e-05, - "loss": 5.4947, - "step": 24321 - }, - { - "epoch": 12.684224250325945, - "grad_norm": 1.5776969194412231, - "learning_rate": 7.606633165829146e-05, - "loss": 5.1298, - "step": 24322 - }, - { - "epoch": 12.684745762711865, - "grad_norm": 1.6920337677001953, - "learning_rate": 7.606532663316584e-05, - "loss": 5.1769, - "step": 24323 - }, - { - "epoch": 12.685267275097784, - "grad_norm": 1.4756355285644531, - "learning_rate": 7.60643216080402e-05, - "loss": 5.7018, - "step": 24324 - }, - { - "epoch": 12.685788787483702, - "grad_norm": 1.5206096172332764, - "learning_rate": 7.606331658291458e-05, - "loss": 5.205, - "step": 24325 - }, - { - "epoch": 12.686310299869621, - "grad_norm": 1.4926693439483643, - "learning_rate": 7.606231155778894e-05, - "loss": 5.3184, - "step": 24326 - }, - { - "epoch": 12.68683181225554, - "grad_norm": 1.3996894359588623, - "learning_rate": 7.606130653266332e-05, - "loss": 5.2571, - "step": 24327 - }, - { - "epoch": 12.68735332464146, - "grad_norm": 1.3785954713821411, - "learning_rate": 7.606030150753769e-05, - "loss": 5.5346, - "step": 24328 - }, - { - "epoch": 12.68787483702738, - "grad_norm": 1.4803731441497803, - "learning_rate": 7.605929648241206e-05, - "loss": 5.479, - "step": 24329 - }, - { - "epoch": 12.6883963494133, - "grad_norm": 1.4773006439208984, - "learning_rate": 7.605829145728643e-05, - "loss": 5.1236, - "step": 24330 - }, - { - "epoch": 12.688917861799219, - "grad_norm": 1.5420628786087036, - "learning_rate": 7.605728643216081e-05, - "loss": 5.3164, - "step": 24331 - }, - { - "epoch": 12.689439374185136, - "grad_norm": 1.3643741607666016, - "learning_rate": 7.605628140703518e-05, - "loss": 5.732, - "step": 24332 - }, - { - "epoch": 12.689960886571056, - "grad_norm": 1.3995733261108398, - "learning_rate": 7.605527638190955e-05, - "loss": 5.3489, - "step": 24333 - }, - { - "epoch": 12.690482398956975, - "grad_norm": 1.5113794803619385, - "learning_rate": 7.605427135678393e-05, - "loss": 5.0839, - "step": 24334 - }, - { - "epoch": 12.691003911342895, - "grad_norm": 1.4621622562408447, - "learning_rate": 7.605326633165829e-05, - "loss": 5.4285, - "step": 24335 - }, - { - "epoch": 12.691525423728814, - "grad_norm": 1.5336389541625977, - "learning_rate": 7.605226130653267e-05, - "loss": 5.5405, - "step": 24336 - }, - { - "epoch": 12.692046936114732, - "grad_norm": 1.4523630142211914, - "learning_rate": 7.605125628140703e-05, - "loss": 5.4894, - "step": 24337 - }, - { - "epoch": 12.692568448500651, - "grad_norm": 1.4674464464187622, - "learning_rate": 7.605025125628141e-05, - "loss": 5.1259, - "step": 24338 - }, - { - "epoch": 12.69308996088657, - "grad_norm": 1.5984127521514893, - "learning_rate": 7.604924623115578e-05, - "loss": 5.0148, - "step": 24339 - }, - { - "epoch": 12.69361147327249, - "grad_norm": 1.4757450819015503, - "learning_rate": 7.604824120603015e-05, - "loss": 5.5154, - "step": 24340 - }, - { - "epoch": 12.69413298565841, - "grad_norm": 1.5860075950622559, - "learning_rate": 7.604723618090453e-05, - "loss": 5.6452, - "step": 24341 - }, - { - "epoch": 12.69465449804433, - "grad_norm": 1.3731824159622192, - "learning_rate": 7.604623115577891e-05, - "loss": 5.7384, - "step": 24342 - }, - { - "epoch": 12.695176010430247, - "grad_norm": 1.5322567224502563, - "learning_rate": 7.604522613065327e-05, - "loss": 5.1629, - "step": 24343 - }, - { - "epoch": 12.695697522816166, - "grad_norm": 1.3123407363891602, - "learning_rate": 7.604422110552765e-05, - "loss": 4.9578, - "step": 24344 - }, - { - "epoch": 12.696219035202086, - "grad_norm": 1.508303165435791, - "learning_rate": 7.604321608040202e-05, - "loss": 5.8433, - "step": 24345 - }, - { - "epoch": 12.696740547588005, - "grad_norm": 1.4722509384155273, - "learning_rate": 7.604221105527638e-05, - "loss": 5.5069, - "step": 24346 - }, - { - "epoch": 12.697262059973925, - "grad_norm": 1.5153048038482666, - "learning_rate": 7.604120603015076e-05, - "loss": 5.4865, - "step": 24347 - }, - { - "epoch": 12.697783572359844, - "grad_norm": 1.566575050354004, - "learning_rate": 7.604020100502512e-05, - "loss": 4.9991, - "step": 24348 - }, - { - "epoch": 12.698305084745762, - "grad_norm": 1.408167839050293, - "learning_rate": 7.60391959798995e-05, - "loss": 5.5933, - "step": 24349 - }, - { - "epoch": 12.698826597131681, - "grad_norm": 1.6111527681350708, - "learning_rate": 7.603819095477386e-05, - "loss": 4.9464, - "step": 24350 - }, - { - "epoch": 12.6993481095176, - "grad_norm": 1.5153045654296875, - "learning_rate": 7.603718592964824e-05, - "loss": 5.143, - "step": 24351 - }, - { - "epoch": 12.69986962190352, - "grad_norm": 1.478361964225769, - "learning_rate": 7.603618090452262e-05, - "loss": 5.2975, - "step": 24352 - }, - { - "epoch": 12.70039113428944, - "grad_norm": 1.3774333000183105, - "learning_rate": 7.6035175879397e-05, - "loss": 5.7246, - "step": 24353 - }, - { - "epoch": 12.70091264667536, - "grad_norm": 1.4211243391036987, - "learning_rate": 7.603417085427136e-05, - "loss": 5.5883, - "step": 24354 - }, - { - "epoch": 12.701434159061277, - "grad_norm": 1.6116986274719238, - "learning_rate": 7.603316582914574e-05, - "loss": 5.1494, - "step": 24355 - }, - { - "epoch": 12.701955671447196, - "grad_norm": 1.473743200302124, - "learning_rate": 7.60321608040201e-05, - "loss": 5.2769, - "step": 24356 - }, - { - "epoch": 12.702477183833116, - "grad_norm": 1.3275612592697144, - "learning_rate": 7.603115577889448e-05, - "loss": 5.9453, - "step": 24357 - }, - { - "epoch": 12.702998696219035, - "grad_norm": 1.3406250476837158, - "learning_rate": 7.603015075376885e-05, - "loss": 5.6537, - "step": 24358 - }, - { - "epoch": 12.703520208604955, - "grad_norm": 1.5148389339447021, - "learning_rate": 7.602914572864322e-05, - "loss": 5.3399, - "step": 24359 - }, - { - "epoch": 12.704041720990874, - "grad_norm": 1.5284837484359741, - "learning_rate": 7.602814070351759e-05, - "loss": 5.4602, - "step": 24360 - }, - { - "epoch": 12.704563233376792, - "grad_norm": 1.5396493673324585, - "learning_rate": 7.602713567839195e-05, - "loss": 5.1218, - "step": 24361 - }, - { - "epoch": 12.705084745762711, - "grad_norm": 1.4979456663131714, - "learning_rate": 7.602613065326633e-05, - "loss": 5.3511, - "step": 24362 - }, - { - "epoch": 12.70560625814863, - "grad_norm": 1.4480172395706177, - "learning_rate": 7.602512562814071e-05, - "loss": 5.4436, - "step": 24363 - }, - { - "epoch": 12.70612777053455, - "grad_norm": 1.4359452724456787, - "learning_rate": 7.602412060301509e-05, - "loss": 5.2013, - "step": 24364 - }, - { - "epoch": 12.70664928292047, - "grad_norm": 1.4739799499511719, - "learning_rate": 7.602311557788945e-05, - "loss": 5.2594, - "step": 24365 - }, - { - "epoch": 12.70717079530639, - "grad_norm": 1.366713047027588, - "learning_rate": 7.602211055276383e-05, - "loss": 5.8227, - "step": 24366 - }, - { - "epoch": 12.707692307692307, - "grad_norm": 1.4721421003341675, - "learning_rate": 7.602110552763819e-05, - "loss": 5.2837, - "step": 24367 - }, - { - "epoch": 12.708213820078226, - "grad_norm": 1.477561354637146, - "learning_rate": 7.602010050251257e-05, - "loss": 5.0041, - "step": 24368 - }, - { - "epoch": 12.708735332464146, - "grad_norm": 1.521535038948059, - "learning_rate": 7.601909547738693e-05, - "loss": 5.3205, - "step": 24369 - }, - { - "epoch": 12.709256844850065, - "grad_norm": 1.5003838539123535, - "learning_rate": 7.601809045226131e-05, - "loss": 5.5445, - "step": 24370 - }, - { - "epoch": 12.709778357235985, - "grad_norm": 1.4894654750823975, - "learning_rate": 7.601708542713568e-05, - "loss": 5.2131, - "step": 24371 - }, - { - "epoch": 12.710299869621904, - "grad_norm": 1.4190751314163208, - "learning_rate": 7.601608040201005e-05, - "loss": 5.6079, - "step": 24372 - }, - { - "epoch": 12.710821382007822, - "grad_norm": 1.4663288593292236, - "learning_rate": 7.601507537688443e-05, - "loss": 5.2136, - "step": 24373 - }, - { - "epoch": 12.711342894393741, - "grad_norm": 1.4758223295211792, - "learning_rate": 7.60140703517588e-05, - "loss": 5.4003, - "step": 24374 - }, - { - "epoch": 12.711864406779661, - "grad_norm": 1.4517029523849487, - "learning_rate": 7.601306532663317e-05, - "loss": 5.1399, - "step": 24375 - }, - { - "epoch": 12.71238591916558, - "grad_norm": 1.4407105445861816, - "learning_rate": 7.601206030150754e-05, - "loss": 5.4242, - "step": 24376 - }, - { - "epoch": 12.7129074315515, - "grad_norm": 1.5691652297973633, - "learning_rate": 7.601105527638192e-05, - "loss": 5.3879, - "step": 24377 - }, - { - "epoch": 12.71342894393742, - "grad_norm": 1.342659831047058, - "learning_rate": 7.601005025125628e-05, - "loss": 5.6063, - "step": 24378 - }, - { - "epoch": 12.713950456323337, - "grad_norm": 1.5279192924499512, - "learning_rate": 7.600904522613066e-05, - "loss": 5.2738, - "step": 24379 - }, - { - "epoch": 12.714471968709256, - "grad_norm": 1.3627943992614746, - "learning_rate": 7.600804020100502e-05, - "loss": 5.5396, - "step": 24380 - }, - { - "epoch": 12.714993481095176, - "grad_norm": 1.3977882862091064, - "learning_rate": 7.60070351758794e-05, - "loss": 5.5392, - "step": 24381 - }, - { - "epoch": 12.715514993481095, - "grad_norm": 1.6020926237106323, - "learning_rate": 7.600603015075376e-05, - "loss": 5.3559, - "step": 24382 - }, - { - "epoch": 12.716036505867015, - "grad_norm": 1.4645613431930542, - "learning_rate": 7.600502512562814e-05, - "loss": 5.4186, - "step": 24383 - }, - { - "epoch": 12.716558018252934, - "grad_norm": 1.4330931901931763, - "learning_rate": 7.600402010050252e-05, - "loss": 5.1623, - "step": 24384 - }, - { - "epoch": 12.717079530638852, - "grad_norm": 1.4565563201904297, - "learning_rate": 7.60030150753769e-05, - "loss": 5.9705, - "step": 24385 - }, - { - "epoch": 12.717601043024771, - "grad_norm": 1.4503005743026733, - "learning_rate": 7.600201005025126e-05, - "loss": 5.9482, - "step": 24386 - }, - { - "epoch": 12.718122555410691, - "grad_norm": 1.749280571937561, - "learning_rate": 7.600100502512563e-05, - "loss": 5.4631, - "step": 24387 - }, - { - "epoch": 12.71864406779661, - "grad_norm": 1.303693413734436, - "learning_rate": 7.6e-05, - "loss": 5.2168, - "step": 24388 - }, - { - "epoch": 12.71916558018253, - "grad_norm": 1.3896561861038208, - "learning_rate": 7.599899497487437e-05, - "loss": 5.6433, - "step": 24389 - }, - { - "epoch": 12.71968709256845, - "grad_norm": 1.3905243873596191, - "learning_rate": 7.599798994974875e-05, - "loss": 5.6029, - "step": 24390 - }, - { - "epoch": 12.720208604954367, - "grad_norm": 1.3509730100631714, - "learning_rate": 7.599698492462311e-05, - "loss": 5.2764, - "step": 24391 - }, - { - "epoch": 12.720730117340286, - "grad_norm": 1.4457157850265503, - "learning_rate": 7.599597989949749e-05, - "loss": 5.3835, - "step": 24392 - }, - { - "epoch": 12.721251629726206, - "grad_norm": 1.4932770729064941, - "learning_rate": 7.599497487437187e-05, - "loss": 5.5482, - "step": 24393 - }, - { - "epoch": 12.721773142112125, - "grad_norm": 1.3761019706726074, - "learning_rate": 7.599396984924624e-05, - "loss": 5.5008, - "step": 24394 - }, - { - "epoch": 12.722294654498045, - "grad_norm": 1.4271925687789917, - "learning_rate": 7.599296482412061e-05, - "loss": 5.7056, - "step": 24395 - }, - { - "epoch": 12.722816166883963, - "grad_norm": 1.3666000366210938, - "learning_rate": 7.599195979899499e-05, - "loss": 5.7291, - "step": 24396 - }, - { - "epoch": 12.723337679269882, - "grad_norm": 1.6009079217910767, - "learning_rate": 7.599095477386935e-05, - "loss": 5.0507, - "step": 24397 - }, - { - "epoch": 12.723859191655801, - "grad_norm": 1.5036541223526, - "learning_rate": 7.598994974874373e-05, - "loss": 5.0745, - "step": 24398 - }, - { - "epoch": 12.724380704041721, - "grad_norm": 1.6462188959121704, - "learning_rate": 7.598894472361809e-05, - "loss": 5.1669, - "step": 24399 - }, - { - "epoch": 12.72490221642764, - "grad_norm": 1.3240766525268555, - "learning_rate": 7.598793969849246e-05, - "loss": 5.3907, - "step": 24400 - }, - { - "epoch": 12.72542372881356, - "grad_norm": 1.5743635892868042, - "learning_rate": 7.598693467336683e-05, - "loss": 5.1259, - "step": 24401 - }, - { - "epoch": 12.72594524119948, - "grad_norm": 1.3634172677993774, - "learning_rate": 7.59859296482412e-05, - "loss": 5.6334, - "step": 24402 - }, - { - "epoch": 12.726466753585397, - "grad_norm": 1.5094478130340576, - "learning_rate": 7.598492462311558e-05, - "loss": 5.2596, - "step": 24403 - }, - { - "epoch": 12.726988265971316, - "grad_norm": 1.4883472919464111, - "learning_rate": 7.598391959798995e-05, - "loss": 5.6445, - "step": 24404 - }, - { - "epoch": 12.727509778357236, - "grad_norm": 1.3886327743530273, - "learning_rate": 7.598291457286433e-05, - "loss": 5.3604, - "step": 24405 - }, - { - "epoch": 12.728031290743155, - "grad_norm": 1.5796140432357788, - "learning_rate": 7.59819095477387e-05, - "loss": 5.003, - "step": 24406 - }, - { - "epoch": 12.728552803129075, - "grad_norm": 1.5153332948684692, - "learning_rate": 7.598090452261307e-05, - "loss": 5.6326, - "step": 24407 - }, - { - "epoch": 12.729074315514993, - "grad_norm": 1.4648395776748657, - "learning_rate": 7.597989949748744e-05, - "loss": 5.3948, - "step": 24408 - }, - { - "epoch": 12.729595827900912, - "grad_norm": 1.4774514436721802, - "learning_rate": 7.597889447236182e-05, - "loss": 5.271, - "step": 24409 - }, - { - "epoch": 12.730117340286832, - "grad_norm": 1.3379167318344116, - "learning_rate": 7.597788944723618e-05, - "loss": 5.8556, - "step": 24410 - }, - { - "epoch": 12.730638852672751, - "grad_norm": 1.5000122785568237, - "learning_rate": 7.597688442211056e-05, - "loss": 5.9195, - "step": 24411 - }, - { - "epoch": 12.73116036505867, - "grad_norm": 1.4926040172576904, - "learning_rate": 7.597587939698492e-05, - "loss": 5.6912, - "step": 24412 - }, - { - "epoch": 12.73168187744459, - "grad_norm": 1.642241358757019, - "learning_rate": 7.59748743718593e-05, - "loss": 4.8152, - "step": 24413 - }, - { - "epoch": 12.73220338983051, - "grad_norm": 1.4520188570022583, - "learning_rate": 7.597386934673368e-05, - "loss": 5.5295, - "step": 24414 - }, - { - "epoch": 12.732724902216427, - "grad_norm": 1.4170966148376465, - "learning_rate": 7.597286432160804e-05, - "loss": 5.2973, - "step": 24415 - }, - { - "epoch": 12.733246414602347, - "grad_norm": 1.5295612812042236, - "learning_rate": 7.597185929648242e-05, - "loss": 5.3146, - "step": 24416 - }, - { - "epoch": 12.733767926988266, - "grad_norm": 1.4901432991027832, - "learning_rate": 7.597085427135679e-05, - "loss": 5.4067, - "step": 24417 - }, - { - "epoch": 12.734289439374185, - "grad_norm": 1.4407120943069458, - "learning_rate": 7.596984924623116e-05, - "loss": 5.824, - "step": 24418 - }, - { - "epoch": 12.734810951760105, - "grad_norm": 1.438481330871582, - "learning_rate": 7.596884422110553e-05, - "loss": 5.5781, - "step": 24419 - }, - { - "epoch": 12.735332464146023, - "grad_norm": 1.4472700357437134, - "learning_rate": 7.59678391959799e-05, - "loss": 5.5665, - "step": 24420 - }, - { - "epoch": 12.735853976531942, - "grad_norm": 1.4392955303192139, - "learning_rate": 7.596683417085427e-05, - "loss": 5.8269, - "step": 24421 - }, - { - "epoch": 12.736375488917862, - "grad_norm": 1.4716408252716064, - "learning_rate": 7.596582914572865e-05, - "loss": 5.4987, - "step": 24422 - }, - { - "epoch": 12.736897001303781, - "grad_norm": 1.4219905138015747, - "learning_rate": 7.596482412060301e-05, - "loss": 5.3849, - "step": 24423 - }, - { - "epoch": 12.7374185136897, - "grad_norm": 2.0042507648468018, - "learning_rate": 7.596381909547739e-05, - "loss": 5.2999, - "step": 24424 - }, - { - "epoch": 12.73794002607562, - "grad_norm": 1.4204896688461304, - "learning_rate": 7.596281407035177e-05, - "loss": 5.4794, - "step": 24425 - }, - { - "epoch": 12.73846153846154, - "grad_norm": 1.567097544670105, - "learning_rate": 7.596180904522613e-05, - "loss": 5.0879, - "step": 24426 - }, - { - "epoch": 12.738983050847457, - "grad_norm": 1.6261464357376099, - "learning_rate": 7.596080402010051e-05, - "loss": 5.2085, - "step": 24427 - }, - { - "epoch": 12.739504563233377, - "grad_norm": 1.5107901096343994, - "learning_rate": 7.595979899497487e-05, - "loss": 5.4325, - "step": 24428 - }, - { - "epoch": 12.740026075619296, - "grad_norm": 1.4872994422912598, - "learning_rate": 7.595879396984925e-05, - "loss": 5.4289, - "step": 24429 - }, - { - "epoch": 12.740547588005215, - "grad_norm": 1.710824728012085, - "learning_rate": 7.595778894472362e-05, - "loss": 5.6704, - "step": 24430 - }, - { - "epoch": 12.741069100391135, - "grad_norm": 1.5858311653137207, - "learning_rate": 7.5956783919598e-05, - "loss": 5.5291, - "step": 24431 - }, - { - "epoch": 12.741590612777053, - "grad_norm": 1.4226577281951904, - "learning_rate": 7.595577889447236e-05, - "loss": 5.49, - "step": 24432 - }, - { - "epoch": 12.742112125162972, - "grad_norm": 1.5252529382705688, - "learning_rate": 7.595477386934674e-05, - "loss": 4.5934, - "step": 24433 - }, - { - "epoch": 12.742633637548892, - "grad_norm": 1.5185630321502686, - "learning_rate": 7.595376884422111e-05, - "loss": 5.4964, - "step": 24434 - }, - { - "epoch": 12.743155149934811, - "grad_norm": 1.3094062805175781, - "learning_rate": 7.595276381909549e-05, - "loss": 4.762, - "step": 24435 - }, - { - "epoch": 12.74367666232073, - "grad_norm": 1.469008207321167, - "learning_rate": 7.595175879396986e-05, - "loss": 5.5265, - "step": 24436 - }, - { - "epoch": 12.74419817470665, - "grad_norm": 1.3892662525177002, - "learning_rate": 7.595075376884423e-05, - "loss": 5.6677, - "step": 24437 - }, - { - "epoch": 12.744719687092568, - "grad_norm": 1.4142245054244995, - "learning_rate": 7.59497487437186e-05, - "loss": 5.6014, - "step": 24438 - }, - { - "epoch": 12.745241199478487, - "grad_norm": 1.8250715732574463, - "learning_rate": 7.594874371859298e-05, - "loss": 4.9789, - "step": 24439 - }, - { - "epoch": 12.745762711864407, - "grad_norm": 1.457497477531433, - "learning_rate": 7.594773869346734e-05, - "loss": 5.7955, - "step": 24440 - }, - { - "epoch": 12.746284224250326, - "grad_norm": 1.483396053314209, - "learning_rate": 7.59467336683417e-05, - "loss": 5.2935, - "step": 24441 - }, - { - "epoch": 12.746805736636245, - "grad_norm": 1.3736979961395264, - "learning_rate": 7.594572864321608e-05, - "loss": 5.4291, - "step": 24442 - }, - { - "epoch": 12.747327249022165, - "grad_norm": 1.3602410554885864, - "learning_rate": 7.594472361809045e-05, - "loss": 5.5918, - "step": 24443 - }, - { - "epoch": 12.747848761408083, - "grad_norm": 1.5721389055252075, - "learning_rate": 7.594371859296482e-05, - "loss": 5.2534, - "step": 24444 - }, - { - "epoch": 12.748370273794002, - "grad_norm": 1.368338942527771, - "learning_rate": 7.59427135678392e-05, - "loss": 5.4527, - "step": 24445 - }, - { - "epoch": 12.748891786179922, - "grad_norm": 1.4265849590301514, - "learning_rate": 7.594170854271358e-05, - "loss": 5.4541, - "step": 24446 - }, - { - "epoch": 12.749413298565841, - "grad_norm": 1.603132963180542, - "learning_rate": 7.594070351758794e-05, - "loss": 5.1432, - "step": 24447 - }, - { - "epoch": 12.74993481095176, - "grad_norm": 1.4352918863296509, - "learning_rate": 7.593969849246232e-05, - "loss": 5.4494, - "step": 24448 - }, - { - "epoch": 12.75045632333768, - "grad_norm": 1.3804314136505127, - "learning_rate": 7.593869346733669e-05, - "loss": 5.5292, - "step": 24449 - }, - { - "epoch": 12.750977835723598, - "grad_norm": 1.5542335510253906, - "learning_rate": 7.593768844221106e-05, - "loss": 5.4825, - "step": 24450 - }, - { - "epoch": 12.751499348109517, - "grad_norm": 1.491312026977539, - "learning_rate": 7.593668341708543e-05, - "loss": 5.4275, - "step": 24451 - }, - { - "epoch": 12.752020860495437, - "grad_norm": 1.3868355751037598, - "learning_rate": 7.59356783919598e-05, - "loss": 5.2631, - "step": 24452 - }, - { - "epoch": 12.752542372881356, - "grad_norm": 1.4047391414642334, - "learning_rate": 7.593467336683417e-05, - "loss": 5.1587, - "step": 24453 - }, - { - "epoch": 12.753063885267276, - "grad_norm": 1.4351847171783447, - "learning_rate": 7.593366834170855e-05, - "loss": 5.4138, - "step": 24454 - }, - { - "epoch": 12.753585397653195, - "grad_norm": 1.4722750186920166, - "learning_rate": 7.593266331658293e-05, - "loss": 5.7876, - "step": 24455 - }, - { - "epoch": 12.754106910039113, - "grad_norm": 1.3697633743286133, - "learning_rate": 7.593165829145729e-05, - "loss": 5.6563, - "step": 24456 - }, - { - "epoch": 12.754628422425032, - "grad_norm": 1.3705617189407349, - "learning_rate": 7.593065326633167e-05, - "loss": 4.9764, - "step": 24457 - }, - { - "epoch": 12.755149934810952, - "grad_norm": 1.3481708765029907, - "learning_rate": 7.592964824120603e-05, - "loss": 5.1014, - "step": 24458 - }, - { - "epoch": 12.755671447196871, - "grad_norm": 1.5050820112228394, - "learning_rate": 7.592864321608041e-05, - "loss": 5.4981, - "step": 24459 - }, - { - "epoch": 12.75619295958279, - "grad_norm": 1.4407942295074463, - "learning_rate": 7.592763819095477e-05, - "loss": 5.5589, - "step": 24460 - }, - { - "epoch": 12.75671447196871, - "grad_norm": 1.5081263780593872, - "learning_rate": 7.592663316582915e-05, - "loss": 5.4533, - "step": 24461 - }, - { - "epoch": 12.757235984354628, - "grad_norm": 1.321273922920227, - "learning_rate": 7.592562814070352e-05, - "loss": 5.1324, - "step": 24462 - }, - { - "epoch": 12.757757496740547, - "grad_norm": 1.4555095434188843, - "learning_rate": 7.59246231155779e-05, - "loss": 5.3874, - "step": 24463 - }, - { - "epoch": 12.758279009126467, - "grad_norm": 1.5075820684432983, - "learning_rate": 7.592361809045226e-05, - "loss": 5.4763, - "step": 24464 - }, - { - "epoch": 12.758800521512386, - "grad_norm": 1.5498849153518677, - "learning_rate": 7.592261306532664e-05, - "loss": 5.1063, - "step": 24465 - }, - { - "epoch": 12.759322033898306, - "grad_norm": 1.5460013151168823, - "learning_rate": 7.592160804020101e-05, - "loss": 5.4162, - "step": 24466 - }, - { - "epoch": 12.759843546284225, - "grad_norm": 1.4269638061523438, - "learning_rate": 7.592060301507538e-05, - "loss": 5.355, - "step": 24467 - }, - { - "epoch": 12.760365058670143, - "grad_norm": 1.4727325439453125, - "learning_rate": 7.591959798994976e-05, - "loss": 5.2076, - "step": 24468 - }, - { - "epoch": 12.760886571056062, - "grad_norm": 1.4000864028930664, - "learning_rate": 7.591859296482412e-05, - "loss": 5.1894, - "step": 24469 - }, - { - "epoch": 12.761408083441982, - "grad_norm": 1.3517787456512451, - "learning_rate": 7.59175879396985e-05, - "loss": 5.0786, - "step": 24470 - }, - { - "epoch": 12.761929595827901, - "grad_norm": 1.4659368991851807, - "learning_rate": 7.591658291457286e-05, - "loss": 5.1884, - "step": 24471 - }, - { - "epoch": 12.76245110821382, - "grad_norm": 1.5233185291290283, - "learning_rate": 7.591557788944724e-05, - "loss": 5.384, - "step": 24472 - }, - { - "epoch": 12.76297262059974, - "grad_norm": 1.5392242670059204, - "learning_rate": 7.59145728643216e-05, - "loss": 5.4176, - "step": 24473 - }, - { - "epoch": 12.763494132985658, - "grad_norm": 1.5026501417160034, - "learning_rate": 7.591356783919598e-05, - "loss": 5.5277, - "step": 24474 - }, - { - "epoch": 12.764015645371577, - "grad_norm": 1.5138453245162964, - "learning_rate": 7.591256281407036e-05, - "loss": 5.1506, - "step": 24475 - }, - { - "epoch": 12.764537157757497, - "grad_norm": 1.4911173582077026, - "learning_rate": 7.591155778894474e-05, - "loss": 5.646, - "step": 24476 - }, - { - "epoch": 12.765058670143416, - "grad_norm": 1.6545356512069702, - "learning_rate": 7.59105527638191e-05, - "loss": 5.678, - "step": 24477 - }, - { - "epoch": 12.765580182529336, - "grad_norm": 1.4842973947525024, - "learning_rate": 7.590954773869348e-05, - "loss": 5.3708, - "step": 24478 - }, - { - "epoch": 12.766101694915255, - "grad_norm": 1.4955476522445679, - "learning_rate": 7.590854271356784e-05, - "loss": 5.028, - "step": 24479 - }, - { - "epoch": 12.766623207301173, - "grad_norm": 1.508634090423584, - "learning_rate": 7.590753768844221e-05, - "loss": 5.77, - "step": 24480 - }, - { - "epoch": 12.767144719687092, - "grad_norm": 1.4501996040344238, - "learning_rate": 7.590653266331659e-05, - "loss": 5.5554, - "step": 24481 - }, - { - "epoch": 12.767666232073012, - "grad_norm": 1.5697319507598877, - "learning_rate": 7.590552763819095e-05, - "loss": 5.2541, - "step": 24482 - }, - { - "epoch": 12.768187744458931, - "grad_norm": 1.634009838104248, - "learning_rate": 7.590452261306533e-05, - "loss": 5.4781, - "step": 24483 - }, - { - "epoch": 12.76870925684485, - "grad_norm": 1.407063603401184, - "learning_rate": 7.590351758793969e-05, - "loss": 5.7823, - "step": 24484 - }, - { - "epoch": 12.76923076923077, - "grad_norm": 1.4577723741531372, - "learning_rate": 7.590251256281407e-05, - "loss": 5.2127, - "step": 24485 - }, - { - "epoch": 12.769752281616688, - "grad_norm": 1.340656042098999, - "learning_rate": 7.590150753768845e-05, - "loss": 5.5296, - "step": 24486 - }, - { - "epoch": 12.770273794002607, - "grad_norm": 1.4389591217041016, - "learning_rate": 7.590050251256283e-05, - "loss": 5.6096, - "step": 24487 - }, - { - "epoch": 12.770795306388527, - "grad_norm": 1.636777400970459, - "learning_rate": 7.589949748743719e-05, - "loss": 4.9154, - "step": 24488 - }, - { - "epoch": 12.771316818774446, - "grad_norm": 1.4798144102096558, - "learning_rate": 7.589849246231157e-05, - "loss": 5.3381, - "step": 24489 - }, - { - "epoch": 12.771838331160366, - "grad_norm": 1.508229374885559, - "learning_rate": 7.589748743718593e-05, - "loss": 5.3649, - "step": 24490 - }, - { - "epoch": 12.772359843546283, - "grad_norm": 1.403053879737854, - "learning_rate": 7.589648241206031e-05, - "loss": 5.3456, - "step": 24491 - }, - { - "epoch": 12.772881355932203, - "grad_norm": 1.3963004350662231, - "learning_rate": 7.589547738693467e-05, - "loss": 5.6098, - "step": 24492 - }, - { - "epoch": 12.773402868318122, - "grad_norm": 1.49527907371521, - "learning_rate": 7.589447236180904e-05, - "loss": 5.3411, - "step": 24493 - }, - { - "epoch": 12.773924380704042, - "grad_norm": 1.3587065935134888, - "learning_rate": 7.589346733668342e-05, - "loss": 5.7066, - "step": 24494 - }, - { - "epoch": 12.774445893089961, - "grad_norm": 1.4166120290756226, - "learning_rate": 7.58924623115578e-05, - "loss": 5.298, - "step": 24495 - }, - { - "epoch": 12.77496740547588, - "grad_norm": 1.4985336065292358, - "learning_rate": 7.589145728643217e-05, - "loss": 5.5796, - "step": 24496 - }, - { - "epoch": 12.7754889178618, - "grad_norm": 1.3810404539108276, - "learning_rate": 7.589045226130654e-05, - "loss": 5.1456, - "step": 24497 - }, - { - "epoch": 12.776010430247718, - "grad_norm": 1.494794249534607, - "learning_rate": 7.588944723618091e-05, - "loss": 5.127, - "step": 24498 - }, - { - "epoch": 12.776531942633637, - "grad_norm": 1.365222454071045, - "learning_rate": 7.588844221105528e-05, - "loss": 5.1753, - "step": 24499 - }, - { - "epoch": 12.777053455019557, - "grad_norm": 1.471259355545044, - "learning_rate": 7.588743718592966e-05, - "loss": 5.4338, - "step": 24500 - }, - { - "epoch": 12.777574967405476, - "grad_norm": 1.4088819026947021, - "learning_rate": 7.588643216080402e-05, - "loss": 5.475, - "step": 24501 - }, - { - "epoch": 12.778096479791396, - "grad_norm": 1.3831071853637695, - "learning_rate": 7.58854271356784e-05, - "loss": 5.2094, - "step": 24502 - }, - { - "epoch": 12.778617992177313, - "grad_norm": 1.5086346864700317, - "learning_rate": 7.588442211055276e-05, - "loss": 5.1499, - "step": 24503 - }, - { - "epoch": 12.779139504563233, - "grad_norm": 1.4440158605575562, - "learning_rate": 7.588341708542714e-05, - "loss": 5.1501, - "step": 24504 - }, - { - "epoch": 12.779661016949152, - "grad_norm": 1.4619547128677368, - "learning_rate": 7.58824120603015e-05, - "loss": 5.5021, - "step": 24505 - }, - { - "epoch": 12.780182529335072, - "grad_norm": 1.4613354206085205, - "learning_rate": 7.588140703517588e-05, - "loss": 5.6212, - "step": 24506 - }, - { - "epoch": 12.780704041720991, - "grad_norm": 1.5420024394989014, - "learning_rate": 7.588040201005026e-05, - "loss": 5.2748, - "step": 24507 - }, - { - "epoch": 12.78122555410691, - "grad_norm": 1.5137053728103638, - "learning_rate": 7.587939698492463e-05, - "loss": 5.2766, - "step": 24508 - }, - { - "epoch": 12.78174706649283, - "grad_norm": 1.7658950090408325, - "learning_rate": 7.5878391959799e-05, - "loss": 4.9366, - "step": 24509 - }, - { - "epoch": 12.782268578878748, - "grad_norm": 1.461885690689087, - "learning_rate": 7.587738693467337e-05, - "loss": 5.6584, - "step": 24510 - }, - { - "epoch": 12.782790091264667, - "grad_norm": 1.4473845958709717, - "learning_rate": 7.587638190954775e-05, - "loss": 5.4796, - "step": 24511 - }, - { - "epoch": 12.783311603650587, - "grad_norm": 1.4687386751174927, - "learning_rate": 7.587537688442211e-05, - "loss": 5.2223, - "step": 24512 - }, - { - "epoch": 12.783833116036506, - "grad_norm": 1.4030112028121948, - "learning_rate": 7.587437185929649e-05, - "loss": 5.6082, - "step": 24513 - }, - { - "epoch": 12.784354628422426, - "grad_norm": 1.4951237440109253, - "learning_rate": 7.587336683417085e-05, - "loss": 5.5026, - "step": 24514 - }, - { - "epoch": 12.784876140808343, - "grad_norm": 1.42330002784729, - "learning_rate": 7.587236180904523e-05, - "loss": 5.2129, - "step": 24515 - }, - { - "epoch": 12.785397653194263, - "grad_norm": 1.3895014524459839, - "learning_rate": 7.587135678391961e-05, - "loss": 5.157, - "step": 24516 - }, - { - "epoch": 12.785919165580182, - "grad_norm": 1.3528995513916016, - "learning_rate": 7.587035175879399e-05, - "loss": 5.8409, - "step": 24517 - }, - { - "epoch": 12.786440677966102, - "grad_norm": 1.4449176788330078, - "learning_rate": 7.586934673366835e-05, - "loss": 5.4043, - "step": 24518 - }, - { - "epoch": 12.786962190352021, - "grad_norm": 1.6309062242507935, - "learning_rate": 7.586834170854271e-05, - "loss": 5.0753, - "step": 24519 - }, - { - "epoch": 12.78748370273794, - "grad_norm": 1.383406162261963, - "learning_rate": 7.586733668341709e-05, - "loss": 5.7901, - "step": 24520 - }, - { - "epoch": 12.78800521512386, - "grad_norm": 1.454232096672058, - "learning_rate": 7.586633165829146e-05, - "loss": 5.4033, - "step": 24521 - }, - { - "epoch": 12.788526727509778, - "grad_norm": 1.4895360469818115, - "learning_rate": 7.586532663316583e-05, - "loss": 5.2949, - "step": 24522 - }, - { - "epoch": 12.789048239895697, - "grad_norm": 1.5504213571548462, - "learning_rate": 7.58643216080402e-05, - "loss": 5.2135, - "step": 24523 - }, - { - "epoch": 12.789569752281617, - "grad_norm": 1.3436044454574585, - "learning_rate": 7.586331658291458e-05, - "loss": 5.9614, - "step": 24524 - }, - { - "epoch": 12.790091264667536, - "grad_norm": 1.5209850072860718, - "learning_rate": 7.586231155778894e-05, - "loss": 5.5668, - "step": 24525 - }, - { - "epoch": 12.790612777053456, - "grad_norm": 1.60165536403656, - "learning_rate": 7.586130653266332e-05, - "loss": 4.615, - "step": 24526 - }, - { - "epoch": 12.791134289439373, - "grad_norm": 1.5910780429840088, - "learning_rate": 7.58603015075377e-05, - "loss": 5.3874, - "step": 24527 - }, - { - "epoch": 12.791655801825293, - "grad_norm": 1.484256386756897, - "learning_rate": 7.585929648241207e-05, - "loss": 5.3925, - "step": 24528 - }, - { - "epoch": 12.792177314211212, - "grad_norm": 1.432582974433899, - "learning_rate": 7.585829145728644e-05, - "loss": 5.561, - "step": 24529 - }, - { - "epoch": 12.792698826597132, - "grad_norm": 1.3557921648025513, - "learning_rate": 7.585728643216082e-05, - "loss": 5.7018, - "step": 24530 - }, - { - "epoch": 12.793220338983051, - "grad_norm": 1.3070831298828125, - "learning_rate": 7.585628140703518e-05, - "loss": 5.1476, - "step": 24531 - }, - { - "epoch": 12.79374185136897, - "grad_norm": 1.5297027826309204, - "learning_rate": 7.585527638190956e-05, - "loss": 5.1804, - "step": 24532 - }, - { - "epoch": 12.794263363754888, - "grad_norm": 1.3827495574951172, - "learning_rate": 7.585427135678392e-05, - "loss": 5.7287, - "step": 24533 - }, - { - "epoch": 12.794784876140808, - "grad_norm": 1.4751304388046265, - "learning_rate": 7.585326633165829e-05, - "loss": 5.4131, - "step": 24534 - }, - { - "epoch": 12.795306388526727, - "grad_norm": 1.4296321868896484, - "learning_rate": 7.585226130653266e-05, - "loss": 5.4956, - "step": 24535 - }, - { - "epoch": 12.795827900912647, - "grad_norm": 1.2894346714019775, - "learning_rate": 7.585125628140703e-05, - "loss": 5.5422, - "step": 24536 - }, - { - "epoch": 12.796349413298566, - "grad_norm": 1.3907595872879028, - "learning_rate": 7.58502512562814e-05, - "loss": 5.5871, - "step": 24537 - }, - { - "epoch": 12.796870925684486, - "grad_norm": 1.3855032920837402, - "learning_rate": 7.584924623115578e-05, - "loss": 5.7338, - "step": 24538 - }, - { - "epoch": 12.797392438070403, - "grad_norm": 1.5798546075820923, - "learning_rate": 7.584824120603016e-05, - "loss": 5.3166, - "step": 24539 - }, - { - "epoch": 12.797913950456323, - "grad_norm": 1.3911021947860718, - "learning_rate": 7.584723618090453e-05, - "loss": 5.0137, - "step": 24540 - }, - { - "epoch": 12.798435462842242, - "grad_norm": 1.4940603971481323, - "learning_rate": 7.58462311557789e-05, - "loss": 5.1354, - "step": 24541 - }, - { - "epoch": 12.798956975228162, - "grad_norm": 1.3526928424835205, - "learning_rate": 7.584522613065327e-05, - "loss": 5.6737, - "step": 24542 - }, - { - "epoch": 12.799478487614081, - "grad_norm": 1.4715166091918945, - "learning_rate": 7.584422110552765e-05, - "loss": 5.5175, - "step": 24543 - }, - { - "epoch": 12.8, - "grad_norm": 1.5627928972244263, - "learning_rate": 7.584321608040201e-05, - "loss": 5.5459, - "step": 24544 - }, - { - "epoch": 12.800521512385918, - "grad_norm": 1.4839084148406982, - "learning_rate": 7.584221105527639e-05, - "loss": 5.1083, - "step": 24545 - }, - { - "epoch": 12.801043024771838, - "grad_norm": 1.4555813074111938, - "learning_rate": 7.584120603015075e-05, - "loss": 5.2082, - "step": 24546 - }, - { - "epoch": 12.801564537157757, - "grad_norm": 1.4859861135482788, - "learning_rate": 7.584020100502513e-05, - "loss": 5.6679, - "step": 24547 - }, - { - "epoch": 12.802086049543677, - "grad_norm": 1.43168306350708, - "learning_rate": 7.583919597989951e-05, - "loss": 5.4612, - "step": 24548 - }, - { - "epoch": 12.802607561929596, - "grad_norm": 1.4660041332244873, - "learning_rate": 7.583819095477387e-05, - "loss": 5.2578, - "step": 24549 - }, - { - "epoch": 12.803129074315516, - "grad_norm": 1.3965574502944946, - "learning_rate": 7.583718592964825e-05, - "loss": 5.5429, - "step": 24550 - }, - { - "epoch": 12.803650586701433, - "grad_norm": 1.3908474445343018, - "learning_rate": 7.583618090452261e-05, - "loss": 5.2491, - "step": 24551 - }, - { - "epoch": 12.804172099087353, - "grad_norm": 1.4310665130615234, - "learning_rate": 7.583517587939699e-05, - "loss": 5.2824, - "step": 24552 - }, - { - "epoch": 12.804693611473272, - "grad_norm": 1.4238231182098389, - "learning_rate": 7.583417085427136e-05, - "loss": 5.2977, - "step": 24553 - }, - { - "epoch": 12.805215123859192, - "grad_norm": 1.4451740980148315, - "learning_rate": 7.583316582914573e-05, - "loss": 5.4287, - "step": 24554 - }, - { - "epoch": 12.805736636245111, - "grad_norm": 1.395241379737854, - "learning_rate": 7.58321608040201e-05, - "loss": 5.518, - "step": 24555 - }, - { - "epoch": 12.80625814863103, - "grad_norm": 1.5774917602539062, - "learning_rate": 7.583115577889448e-05, - "loss": 4.7822, - "step": 24556 - }, - { - "epoch": 12.806779661016948, - "grad_norm": 1.5945773124694824, - "learning_rate": 7.583015075376884e-05, - "loss": 5.1921, - "step": 24557 - }, - { - "epoch": 12.807301173402868, - "grad_norm": 1.505287528038025, - "learning_rate": 7.582914572864322e-05, - "loss": 4.7526, - "step": 24558 - }, - { - "epoch": 12.807822685788787, - "grad_norm": 1.4281015396118164, - "learning_rate": 7.58281407035176e-05, - "loss": 5.2252, - "step": 24559 - }, - { - "epoch": 12.808344198174707, - "grad_norm": 1.511772632598877, - "learning_rate": 7.582713567839196e-05, - "loss": 5.1247, - "step": 24560 - }, - { - "epoch": 12.808865710560626, - "grad_norm": 1.492177128791809, - "learning_rate": 7.582613065326634e-05, - "loss": 5.5545, - "step": 24561 - }, - { - "epoch": 12.809387222946546, - "grad_norm": 1.4830312728881836, - "learning_rate": 7.58251256281407e-05, - "loss": 5.6446, - "step": 24562 - }, - { - "epoch": 12.809908735332463, - "grad_norm": 1.4984049797058105, - "learning_rate": 7.582412060301508e-05, - "loss": 5.4812, - "step": 24563 - }, - { - "epoch": 12.810430247718383, - "grad_norm": 1.5742549896240234, - "learning_rate": 7.582311557788944e-05, - "loss": 4.9532, - "step": 24564 - }, - { - "epoch": 12.810951760104302, - "grad_norm": 1.488010287284851, - "learning_rate": 7.582211055276382e-05, - "loss": 5.4845, - "step": 24565 - }, - { - "epoch": 12.811473272490222, - "grad_norm": 1.4581317901611328, - "learning_rate": 7.582110552763819e-05, - "loss": 5.5042, - "step": 24566 - }, - { - "epoch": 12.811994784876141, - "grad_norm": 1.4735196828842163, - "learning_rate": 7.582010050251256e-05, - "loss": 5.6493, - "step": 24567 - }, - { - "epoch": 12.81251629726206, - "grad_norm": 1.4385771751403809, - "learning_rate": 7.581909547738694e-05, - "loss": 5.741, - "step": 24568 - }, - { - "epoch": 12.813037809647978, - "grad_norm": 1.415540099143982, - "learning_rate": 7.581809045226132e-05, - "loss": 5.7768, - "step": 24569 - }, - { - "epoch": 12.813559322033898, - "grad_norm": 1.4559924602508545, - "learning_rate": 7.581708542713568e-05, - "loss": 5.509, - "step": 24570 - }, - { - "epoch": 12.814080834419817, - "grad_norm": 1.470900058746338, - "learning_rate": 7.581608040201006e-05, - "loss": 5.3688, - "step": 24571 - }, - { - "epoch": 12.814602346805737, - "grad_norm": 1.5045692920684814, - "learning_rate": 7.581507537688443e-05, - "loss": 5.4026, - "step": 24572 - }, - { - "epoch": 12.815123859191656, - "grad_norm": 1.4353454113006592, - "learning_rate": 7.581407035175879e-05, - "loss": 5.5834, - "step": 24573 - }, - { - "epoch": 12.815645371577576, - "grad_norm": 1.4498240947723389, - "learning_rate": 7.581306532663317e-05, - "loss": 5.8123, - "step": 24574 - }, - { - "epoch": 12.816166883963493, - "grad_norm": 1.5003852844238281, - "learning_rate": 7.581206030150753e-05, - "loss": 5.346, - "step": 24575 - }, - { - "epoch": 12.816688396349413, - "grad_norm": 1.4373533725738525, - "learning_rate": 7.581105527638191e-05, - "loss": 5.7674, - "step": 24576 - }, - { - "epoch": 12.817209908735332, - "grad_norm": 1.5096843242645264, - "learning_rate": 7.581005025125628e-05, - "loss": 5.6016, - "step": 24577 - }, - { - "epoch": 12.817731421121252, - "grad_norm": 1.4364408254623413, - "learning_rate": 7.580904522613065e-05, - "loss": 4.6783, - "step": 24578 - }, - { - "epoch": 12.818252933507171, - "grad_norm": 1.4959897994995117, - "learning_rate": 7.580804020100503e-05, - "loss": 5.0824, - "step": 24579 - }, - { - "epoch": 12.81877444589309, - "grad_norm": 1.4754608869552612, - "learning_rate": 7.580703517587941e-05, - "loss": 5.6108, - "step": 24580 - }, - { - "epoch": 12.819295958279008, - "grad_norm": 1.4132837057113647, - "learning_rate": 7.580603015075377e-05, - "loss": 5.539, - "step": 24581 - }, - { - "epoch": 12.819817470664928, - "grad_norm": 1.4900455474853516, - "learning_rate": 7.580502512562815e-05, - "loss": 5.2632, - "step": 24582 - }, - { - "epoch": 12.820338983050847, - "grad_norm": 1.5477955341339111, - "learning_rate": 7.580402010050252e-05, - "loss": 4.6157, - "step": 24583 - }, - { - "epoch": 12.820860495436767, - "grad_norm": 1.5465805530548096, - "learning_rate": 7.580301507537689e-05, - "loss": 5.1586, - "step": 24584 - }, - { - "epoch": 12.821382007822686, - "grad_norm": 1.506121039390564, - "learning_rate": 7.580201005025126e-05, - "loss": 5.1493, - "step": 24585 - }, - { - "epoch": 12.821903520208604, - "grad_norm": 1.4871801137924194, - "learning_rate": 7.580100502512562e-05, - "loss": 5.6211, - "step": 24586 - }, - { - "epoch": 12.822425032594523, - "grad_norm": 1.4952073097229004, - "learning_rate": 7.58e-05, - "loss": 5.4179, - "step": 24587 - }, - { - "epoch": 12.822946544980443, - "grad_norm": 1.3834048509597778, - "learning_rate": 7.579899497487438e-05, - "loss": 5.0119, - "step": 24588 - }, - { - "epoch": 12.823468057366362, - "grad_norm": 1.5951744318008423, - "learning_rate": 7.579798994974875e-05, - "loss": 4.8816, - "step": 24589 - }, - { - "epoch": 12.823989569752282, - "grad_norm": 1.4223023653030396, - "learning_rate": 7.579698492462312e-05, - "loss": 5.5994, - "step": 24590 - }, - { - "epoch": 12.824511082138201, - "grad_norm": 1.4151676893234253, - "learning_rate": 7.57959798994975e-05, - "loss": 5.5993, - "step": 24591 - }, - { - "epoch": 12.82503259452412, - "grad_norm": 1.41693913936615, - "learning_rate": 7.579497487437186e-05, - "loss": 5.3093, - "step": 24592 - }, - { - "epoch": 12.825554106910038, - "grad_norm": 1.494327187538147, - "learning_rate": 7.579396984924624e-05, - "loss": 5.5578, - "step": 24593 - }, - { - "epoch": 12.826075619295958, - "grad_norm": 1.4850293397903442, - "learning_rate": 7.57929648241206e-05, - "loss": 5.4049, - "step": 24594 - }, - { - "epoch": 12.826597131681877, - "grad_norm": 1.375154972076416, - "learning_rate": 7.579195979899498e-05, - "loss": 5.5599, - "step": 24595 - }, - { - "epoch": 12.827118644067797, - "grad_norm": 1.5015965700149536, - "learning_rate": 7.579095477386935e-05, - "loss": 5.7374, - "step": 24596 - }, - { - "epoch": 12.827640156453716, - "grad_norm": 1.6176033020019531, - "learning_rate": 7.578994974874372e-05, - "loss": 5.2466, - "step": 24597 - }, - { - "epoch": 12.828161668839634, - "grad_norm": 1.4474153518676758, - "learning_rate": 7.578894472361809e-05, - "loss": 5.3839, - "step": 24598 - }, - { - "epoch": 12.828683181225554, - "grad_norm": 1.465911865234375, - "learning_rate": 7.578793969849247e-05, - "loss": 5.2779, - "step": 24599 - }, - { - "epoch": 12.829204693611473, - "grad_norm": 1.452130675315857, - "learning_rate": 7.578693467336684e-05, - "loss": 5.4289, - "step": 24600 - }, - { - "epoch": 12.829726205997392, - "grad_norm": 1.3997102975845337, - "learning_rate": 7.578592964824121e-05, - "loss": 5.6301, - "step": 24601 - }, - { - "epoch": 12.830247718383312, - "grad_norm": 1.5454487800598145, - "learning_rate": 7.578492462311559e-05, - "loss": 5.2869, - "step": 24602 - }, - { - "epoch": 12.830769230769231, - "grad_norm": 1.4041472673416138, - "learning_rate": 7.578391959798995e-05, - "loss": 5.5771, - "step": 24603 - }, - { - "epoch": 12.83129074315515, - "grad_norm": 1.428382158279419, - "learning_rate": 7.578291457286433e-05, - "loss": 5.5219, - "step": 24604 - }, - { - "epoch": 12.831812255541069, - "grad_norm": 1.3847894668579102, - "learning_rate": 7.578190954773869e-05, - "loss": 5.4516, - "step": 24605 - }, - { - "epoch": 12.832333767926988, - "grad_norm": 1.3994027376174927, - "learning_rate": 7.578090452261307e-05, - "loss": 5.6267, - "step": 24606 - }, - { - "epoch": 12.832855280312907, - "grad_norm": 1.3892827033996582, - "learning_rate": 7.577989949748743e-05, - "loss": 5.3245, - "step": 24607 - }, - { - "epoch": 12.833376792698827, - "grad_norm": 1.3365362882614136, - "learning_rate": 7.577889447236181e-05, - "loss": 5.7082, - "step": 24608 - }, - { - "epoch": 12.833898305084746, - "grad_norm": 1.393423080444336, - "learning_rate": 7.577788944723619e-05, - "loss": 5.2247, - "step": 24609 - }, - { - "epoch": 12.834419817470664, - "grad_norm": 1.4267898797988892, - "learning_rate": 7.577688442211057e-05, - "loss": 5.4871, - "step": 24610 - }, - { - "epoch": 12.834941329856584, - "grad_norm": 1.383127212524414, - "learning_rate": 7.577587939698493e-05, - "loss": 5.5478, - "step": 24611 - }, - { - "epoch": 12.835462842242503, - "grad_norm": 1.5042227506637573, - "learning_rate": 7.57748743718593e-05, - "loss": 5.2516, - "step": 24612 - }, - { - "epoch": 12.835984354628422, - "grad_norm": 1.3994364738464355, - "learning_rate": 7.577386934673367e-05, - "loss": 5.3072, - "step": 24613 - }, - { - "epoch": 12.836505867014342, - "grad_norm": 1.4603432416915894, - "learning_rate": 7.577286432160804e-05, - "loss": 5.5117, - "step": 24614 - }, - { - "epoch": 12.837027379400261, - "grad_norm": 1.3548238277435303, - "learning_rate": 7.577185929648242e-05, - "loss": 5.6875, - "step": 24615 - }, - { - "epoch": 12.83754889178618, - "grad_norm": 1.4390456676483154, - "learning_rate": 7.577085427135678e-05, - "loss": 5.4838, - "step": 24616 - }, - { - "epoch": 12.838070404172099, - "grad_norm": 1.3834501504898071, - "learning_rate": 7.576984924623116e-05, - "loss": 5.3341, - "step": 24617 - }, - { - "epoch": 12.838591916558018, - "grad_norm": 1.5526729822158813, - "learning_rate": 7.576884422110552e-05, - "loss": 5.0434, - "step": 24618 - }, - { - "epoch": 12.839113428943937, - "grad_norm": 1.6381199359893799, - "learning_rate": 7.57678391959799e-05, - "loss": 5.147, - "step": 24619 - }, - { - "epoch": 12.839634941329857, - "grad_norm": 1.533531904220581, - "learning_rate": 7.576683417085428e-05, - "loss": 5.7222, - "step": 24620 - }, - { - "epoch": 12.840156453715776, - "grad_norm": 1.6220197677612305, - "learning_rate": 7.576582914572866e-05, - "loss": 4.5642, - "step": 24621 - }, - { - "epoch": 12.840677966101694, - "grad_norm": 1.6668591499328613, - "learning_rate": 7.576482412060302e-05, - "loss": 5.6086, - "step": 24622 - }, - { - "epoch": 12.841199478487614, - "grad_norm": 1.5508743524551392, - "learning_rate": 7.57638190954774e-05, - "loss": 4.7793, - "step": 24623 - }, - { - "epoch": 12.841720990873533, - "grad_norm": 1.5960558652877808, - "learning_rate": 7.576281407035176e-05, - "loss": 5.3743, - "step": 24624 - }, - { - "epoch": 12.842242503259452, - "grad_norm": 1.4818769693374634, - "learning_rate": 7.576180904522614e-05, - "loss": 5.4784, - "step": 24625 - }, - { - "epoch": 12.842764015645372, - "grad_norm": 1.5495015382766724, - "learning_rate": 7.57608040201005e-05, - "loss": 5.104, - "step": 24626 - }, - { - "epoch": 12.843285528031291, - "grad_norm": 1.4388843774795532, - "learning_rate": 7.575979899497487e-05, - "loss": 4.8785, - "step": 24627 - }, - { - "epoch": 12.843807040417209, - "grad_norm": 1.2963098287582397, - "learning_rate": 7.575879396984925e-05, - "loss": 5.8735, - "step": 24628 - }, - { - "epoch": 12.844328552803129, - "grad_norm": 1.4902575016021729, - "learning_rate": 7.575778894472362e-05, - "loss": 5.2214, - "step": 24629 - }, - { - "epoch": 12.844850065189048, - "grad_norm": 1.4846484661102295, - "learning_rate": 7.5756783919598e-05, - "loss": 5.3312, - "step": 24630 - }, - { - "epoch": 12.845371577574968, - "grad_norm": 1.3898589611053467, - "learning_rate": 7.575577889447237e-05, - "loss": 5.5731, - "step": 24631 - }, - { - "epoch": 12.845893089960887, - "grad_norm": 1.4773694276809692, - "learning_rate": 7.575477386934674e-05, - "loss": 5.3644, - "step": 24632 - }, - { - "epoch": 12.846414602346806, - "grad_norm": 1.3766512870788574, - "learning_rate": 7.575376884422111e-05, - "loss": 5.578, - "step": 24633 - }, - { - "epoch": 12.846936114732724, - "grad_norm": 1.4852056503295898, - "learning_rate": 7.575276381909549e-05, - "loss": 5.5608, - "step": 24634 - }, - { - "epoch": 12.847457627118644, - "grad_norm": 1.424828290939331, - "learning_rate": 7.575175879396985e-05, - "loss": 5.6975, - "step": 24635 - }, - { - "epoch": 12.847979139504563, - "grad_norm": 1.4773342609405518, - "learning_rate": 7.575075376884423e-05, - "loss": 5.0337, - "step": 24636 - }, - { - "epoch": 12.848500651890483, - "grad_norm": 1.4607136249542236, - "learning_rate": 7.574974874371859e-05, - "loss": 4.7321, - "step": 24637 - }, - { - "epoch": 12.849022164276402, - "grad_norm": 1.364302158355713, - "learning_rate": 7.574874371859297e-05, - "loss": 5.6721, - "step": 24638 - }, - { - "epoch": 12.849543676662321, - "grad_norm": 1.3853726387023926, - "learning_rate": 7.574773869346733e-05, - "loss": 5.4704, - "step": 24639 - }, - { - "epoch": 12.85006518904824, - "grad_norm": 1.3563792705535889, - "learning_rate": 7.574673366834171e-05, - "loss": 5.7899, - "step": 24640 - }, - { - "epoch": 12.850586701434159, - "grad_norm": 1.3884869813919067, - "learning_rate": 7.574572864321609e-05, - "loss": 5.4705, - "step": 24641 - }, - { - "epoch": 12.851108213820078, - "grad_norm": 1.3340996503829956, - "learning_rate": 7.574472361809045e-05, - "loss": 6.0538, - "step": 24642 - }, - { - "epoch": 12.851629726205998, - "grad_norm": 1.4597491025924683, - "learning_rate": 7.574371859296483e-05, - "loss": 5.3891, - "step": 24643 - }, - { - "epoch": 12.852151238591917, - "grad_norm": 1.4486027956008911, - "learning_rate": 7.57427135678392e-05, - "loss": 5.3116, - "step": 24644 - }, - { - "epoch": 12.852672750977836, - "grad_norm": 1.4889715909957886, - "learning_rate": 7.574170854271357e-05, - "loss": 5.6215, - "step": 24645 - }, - { - "epoch": 12.853194263363754, - "grad_norm": 1.5381659269332886, - "learning_rate": 7.574070351758794e-05, - "loss": 5.0078, - "step": 24646 - }, - { - "epoch": 12.853715775749674, - "grad_norm": 1.3356539011001587, - "learning_rate": 7.573969849246232e-05, - "loss": 5.4502, - "step": 24647 - }, - { - "epoch": 12.854237288135593, - "grad_norm": 1.4161639213562012, - "learning_rate": 7.573869346733668e-05, - "loss": 5.5974, - "step": 24648 - }, - { - "epoch": 12.854758800521513, - "grad_norm": 1.4979901313781738, - "learning_rate": 7.573768844221106e-05, - "loss": 5.2938, - "step": 24649 - }, - { - "epoch": 12.855280312907432, - "grad_norm": 1.42640221118927, - "learning_rate": 7.573668341708544e-05, - "loss": 5.3252, - "step": 24650 - }, - { - "epoch": 12.855801825293351, - "grad_norm": 1.4360921382904053, - "learning_rate": 7.573567839195981e-05, - "loss": 5.4554, - "step": 24651 - }, - { - "epoch": 12.85632333767927, - "grad_norm": 1.4213193655014038, - "learning_rate": 7.573467336683418e-05, - "loss": 5.3807, - "step": 24652 - }, - { - "epoch": 12.856844850065189, - "grad_norm": 1.3851252794265747, - "learning_rate": 7.573366834170854e-05, - "loss": 5.2522, - "step": 24653 - }, - { - "epoch": 12.857366362451108, - "grad_norm": 1.6763086318969727, - "learning_rate": 7.573266331658292e-05, - "loss": 5.4223, - "step": 24654 - }, - { - "epoch": 12.857887874837028, - "grad_norm": 1.446797251701355, - "learning_rate": 7.573165829145729e-05, - "loss": 5.49, - "step": 24655 - }, - { - "epoch": 12.858409387222947, - "grad_norm": 1.3818776607513428, - "learning_rate": 7.573065326633166e-05, - "loss": 5.6658, - "step": 24656 - }, - { - "epoch": 12.858930899608866, - "grad_norm": 1.310022234916687, - "learning_rate": 7.572964824120603e-05, - "loss": 5.9202, - "step": 24657 - }, - { - "epoch": 12.859452411994784, - "grad_norm": 1.4335439205169678, - "learning_rate": 7.57286432160804e-05, - "loss": 5.6191, - "step": 24658 - }, - { - "epoch": 12.859973924380704, - "grad_norm": 1.3717665672302246, - "learning_rate": 7.572763819095477e-05, - "loss": 5.8445, - "step": 24659 - }, - { - "epoch": 12.860495436766623, - "grad_norm": 1.4856759309768677, - "learning_rate": 7.572663316582915e-05, - "loss": 5.3872, - "step": 24660 - }, - { - "epoch": 12.861016949152543, - "grad_norm": 1.4241504669189453, - "learning_rate": 7.572562814070352e-05, - "loss": 5.7959, - "step": 24661 - }, - { - "epoch": 12.861538461538462, - "grad_norm": 1.4128996133804321, - "learning_rate": 7.57246231155779e-05, - "loss": 4.5976, - "step": 24662 - }, - { - "epoch": 12.862059973924381, - "grad_norm": 1.416710615158081, - "learning_rate": 7.572361809045227e-05, - "loss": 5.2748, - "step": 24663 - }, - { - "epoch": 12.8625814863103, - "grad_norm": 1.391680121421814, - "learning_rate": 7.572261306532664e-05, - "loss": 5.1431, - "step": 24664 - }, - { - "epoch": 12.863102998696219, - "grad_norm": 1.5697083473205566, - "learning_rate": 7.572160804020101e-05, - "loss": 5.0868, - "step": 24665 - }, - { - "epoch": 12.863624511082138, - "grad_norm": 1.4734899997711182, - "learning_rate": 7.572060301507537e-05, - "loss": 5.696, - "step": 24666 - }, - { - "epoch": 12.864146023468058, - "grad_norm": 1.3817274570465088, - "learning_rate": 7.571959798994975e-05, - "loss": 5.3746, - "step": 24667 - }, - { - "epoch": 12.864667535853977, - "grad_norm": 1.4497634172439575, - "learning_rate": 7.571859296482412e-05, - "loss": 5.4158, - "step": 24668 - }, - { - "epoch": 12.865189048239897, - "grad_norm": 1.335124135017395, - "learning_rate": 7.57175879396985e-05, - "loss": 5.2901, - "step": 24669 - }, - { - "epoch": 12.865710560625814, - "grad_norm": 1.4868993759155273, - "learning_rate": 7.571658291457287e-05, - "loss": 5.2843, - "step": 24670 - }, - { - "epoch": 12.866232073011734, - "grad_norm": 1.4996832609176636, - "learning_rate": 7.571557788944725e-05, - "loss": 5.1432, - "step": 24671 - }, - { - "epoch": 12.866753585397653, - "grad_norm": 1.3443577289581299, - "learning_rate": 7.571457286432161e-05, - "loss": 5.505, - "step": 24672 - }, - { - "epoch": 12.867275097783573, - "grad_norm": 1.4522989988327026, - "learning_rate": 7.571356783919599e-05, - "loss": 4.8426, - "step": 24673 - }, - { - "epoch": 12.867796610169492, - "grad_norm": 1.522265076637268, - "learning_rate": 7.571256281407036e-05, - "loss": 5.4489, - "step": 24674 - }, - { - "epoch": 12.868318122555412, - "grad_norm": 1.5269722938537598, - "learning_rate": 7.571155778894473e-05, - "loss": 5.2563, - "step": 24675 - }, - { - "epoch": 12.86883963494133, - "grad_norm": 1.4577420949935913, - "learning_rate": 7.57105527638191e-05, - "loss": 5.4515, - "step": 24676 - }, - { - "epoch": 12.869361147327249, - "grad_norm": 1.4942537546157837, - "learning_rate": 7.570954773869348e-05, - "loss": 5.1623, - "step": 24677 - }, - { - "epoch": 12.869882659713168, - "grad_norm": 1.4226210117340088, - "learning_rate": 7.570854271356784e-05, - "loss": 5.1736, - "step": 24678 - }, - { - "epoch": 12.870404172099088, - "grad_norm": 1.5292154550552368, - "learning_rate": 7.57075376884422e-05, - "loss": 5.6541, - "step": 24679 - }, - { - "epoch": 12.870925684485007, - "grad_norm": 1.4317790269851685, - "learning_rate": 7.570653266331658e-05, - "loss": 5.6224, - "step": 24680 - }, - { - "epoch": 12.871447196870925, - "grad_norm": 1.4248631000518799, - "learning_rate": 7.570552763819096e-05, - "loss": 5.3469, - "step": 24681 - }, - { - "epoch": 12.871968709256844, - "grad_norm": 1.353656530380249, - "learning_rate": 7.570452261306534e-05, - "loss": 5.6784, - "step": 24682 - }, - { - "epoch": 12.872490221642764, - "grad_norm": 1.4326950311660767, - "learning_rate": 7.57035175879397e-05, - "loss": 5.5582, - "step": 24683 - }, - { - "epoch": 12.873011734028683, - "grad_norm": 1.4663530588150024, - "learning_rate": 7.570251256281408e-05, - "loss": 4.844, - "step": 24684 - }, - { - "epoch": 12.873533246414603, - "grad_norm": 1.4593175649642944, - "learning_rate": 7.570150753768844e-05, - "loss": 4.8548, - "step": 24685 - }, - { - "epoch": 12.874054758800522, - "grad_norm": 1.5612571239471436, - "learning_rate": 7.570050251256282e-05, - "loss": 4.8014, - "step": 24686 - }, - { - "epoch": 12.874576271186442, - "grad_norm": 1.4868346452713013, - "learning_rate": 7.569949748743719e-05, - "loss": 5.2076, - "step": 24687 - }, - { - "epoch": 12.87509778357236, - "grad_norm": 1.3718276023864746, - "learning_rate": 7.569849246231156e-05, - "loss": 4.9069, - "step": 24688 - }, - { - "epoch": 12.875619295958279, - "grad_norm": 1.4213429689407349, - "learning_rate": 7.569748743718593e-05, - "loss": 4.9285, - "step": 24689 - }, - { - "epoch": 12.876140808344198, - "grad_norm": 1.3828840255737305, - "learning_rate": 7.56964824120603e-05, - "loss": 5.6519, - "step": 24690 - }, - { - "epoch": 12.876662320730118, - "grad_norm": 1.3778210878372192, - "learning_rate": 7.569547738693467e-05, - "loss": 5.5389, - "step": 24691 - }, - { - "epoch": 12.877183833116037, - "grad_norm": 1.522128701210022, - "learning_rate": 7.569447236180905e-05, - "loss": 5.2499, - "step": 24692 - }, - { - "epoch": 12.877705345501955, - "grad_norm": 1.3811395168304443, - "learning_rate": 7.569346733668343e-05, - "loss": 5.6035, - "step": 24693 - }, - { - "epoch": 12.878226857887874, - "grad_norm": 1.3600643873214722, - "learning_rate": 7.569246231155779e-05, - "loss": 5.8185, - "step": 24694 - }, - { - "epoch": 12.878748370273794, - "grad_norm": 1.3867524862289429, - "learning_rate": 7.569145728643217e-05, - "loss": 5.6079, - "step": 24695 - }, - { - "epoch": 12.879269882659713, - "grad_norm": 1.435459017753601, - "learning_rate": 7.569045226130653e-05, - "loss": 4.9936, - "step": 24696 - }, - { - "epoch": 12.879791395045633, - "grad_norm": 1.5101405382156372, - "learning_rate": 7.568944723618091e-05, - "loss": 5.5689, - "step": 24697 - }, - { - "epoch": 12.880312907431552, - "grad_norm": 1.4307211637496948, - "learning_rate": 7.568844221105527e-05, - "loss": 5.3757, - "step": 24698 - }, - { - "epoch": 12.880834419817472, - "grad_norm": 1.661227822303772, - "learning_rate": 7.568743718592965e-05, - "loss": 5.1283, - "step": 24699 - }, - { - "epoch": 12.88135593220339, - "grad_norm": 1.4922553300857544, - "learning_rate": 7.568643216080402e-05, - "loss": 5.1282, - "step": 24700 - }, - { - "epoch": 12.881877444589309, - "grad_norm": 1.5142545700073242, - "learning_rate": 7.56854271356784e-05, - "loss": 5.4349, - "step": 24701 - }, - { - "epoch": 12.882398956975228, - "grad_norm": 1.5512373447418213, - "learning_rate": 7.568442211055277e-05, - "loss": 5.2353, - "step": 24702 - }, - { - "epoch": 12.882920469361148, - "grad_norm": 1.57902991771698, - "learning_rate": 7.568341708542715e-05, - "loss": 5.4021, - "step": 24703 - }, - { - "epoch": 12.883441981747067, - "grad_norm": 1.8808839321136475, - "learning_rate": 7.568241206030151e-05, - "loss": 4.8933, - "step": 24704 - }, - { - "epoch": 12.883963494132985, - "grad_norm": 1.4376376867294312, - "learning_rate": 7.568140703517588e-05, - "loss": 5.8702, - "step": 24705 - }, - { - "epoch": 12.884485006518904, - "grad_norm": 1.512863039970398, - "learning_rate": 7.568040201005026e-05, - "loss": 5.3041, - "step": 24706 - }, - { - "epoch": 12.885006518904824, - "grad_norm": 1.391075611114502, - "learning_rate": 7.567939698492462e-05, - "loss": 5.6659, - "step": 24707 - }, - { - "epoch": 12.885528031290743, - "grad_norm": 1.4961916208267212, - "learning_rate": 7.5678391959799e-05, - "loss": 5.2566, - "step": 24708 - }, - { - "epoch": 12.886049543676663, - "grad_norm": 1.4994032382965088, - "learning_rate": 7.567738693467336e-05, - "loss": 5.5353, - "step": 24709 - }, - { - "epoch": 12.886571056062582, - "grad_norm": 1.4890272617340088, - "learning_rate": 7.567638190954774e-05, - "loss": 4.8463, - "step": 24710 - }, - { - "epoch": 12.887092568448502, - "grad_norm": 1.4404600858688354, - "learning_rate": 7.56753768844221e-05, - "loss": 5.1893, - "step": 24711 - }, - { - "epoch": 12.88761408083442, - "grad_norm": 1.529931902885437, - "learning_rate": 7.567437185929648e-05, - "loss": 5.4085, - "step": 24712 - }, - { - "epoch": 12.888135593220339, - "grad_norm": 1.4638240337371826, - "learning_rate": 7.567336683417086e-05, - "loss": 5.5004, - "step": 24713 - }, - { - "epoch": 12.888657105606258, - "grad_norm": 1.5970109701156616, - "learning_rate": 7.567236180904524e-05, - "loss": 5.5006, - "step": 24714 - }, - { - "epoch": 12.889178617992178, - "grad_norm": 1.4922162294387817, - "learning_rate": 7.56713567839196e-05, - "loss": 5.0644, - "step": 24715 - }, - { - "epoch": 12.889700130378097, - "grad_norm": 1.5717778205871582, - "learning_rate": 7.567035175879398e-05, - "loss": 5.2653, - "step": 24716 - }, - { - "epoch": 12.890221642764015, - "grad_norm": 1.7003695964813232, - "learning_rate": 7.566934673366834e-05, - "loss": 5.3433, - "step": 24717 - }, - { - "epoch": 12.890743155149934, - "grad_norm": 1.506618857383728, - "learning_rate": 7.566834170854272e-05, - "loss": 5.1638, - "step": 24718 - }, - { - "epoch": 12.891264667535854, - "grad_norm": 1.5073165893554688, - "learning_rate": 7.566733668341709e-05, - "loss": 5.5457, - "step": 24719 - }, - { - "epoch": 12.891786179921773, - "grad_norm": 1.3132332563400269, - "learning_rate": 7.566633165829145e-05, - "loss": 5.9267, - "step": 24720 - }, - { - "epoch": 12.892307692307693, - "grad_norm": 1.4223763942718506, - "learning_rate": 7.566532663316583e-05, - "loss": 5.6635, - "step": 24721 - }, - { - "epoch": 12.892829204693612, - "grad_norm": 1.4862831830978394, - "learning_rate": 7.56643216080402e-05, - "loss": 5.3067, - "step": 24722 - }, - { - "epoch": 12.89335071707953, - "grad_norm": 1.5342066287994385, - "learning_rate": 7.566331658291458e-05, - "loss": 4.9901, - "step": 24723 - }, - { - "epoch": 12.89387222946545, - "grad_norm": 1.773094654083252, - "learning_rate": 7.566231155778895e-05, - "loss": 5.005, - "step": 24724 - }, - { - "epoch": 12.894393741851369, - "grad_norm": 1.6567087173461914, - "learning_rate": 7.566130653266333e-05, - "loss": 5.1104, - "step": 24725 - }, - { - "epoch": 12.894915254237288, - "grad_norm": 1.5628445148468018, - "learning_rate": 7.566030150753769e-05, - "loss": 5.7579, - "step": 24726 - }, - { - "epoch": 12.895436766623208, - "grad_norm": 1.4400020837783813, - "learning_rate": 7.565929648241207e-05, - "loss": 5.4541, - "step": 24727 - }, - { - "epoch": 12.895958279009127, - "grad_norm": 1.5084190368652344, - "learning_rate": 7.565829145728643e-05, - "loss": 5.486, - "step": 24728 - }, - { - "epoch": 12.896479791395045, - "grad_norm": 1.5837458372116089, - "learning_rate": 7.565728643216081e-05, - "loss": 5.395, - "step": 24729 - }, - { - "epoch": 12.897001303780964, - "grad_norm": 1.6655513048171997, - "learning_rate": 7.565628140703517e-05, - "loss": 5.7062, - "step": 24730 - }, - { - "epoch": 12.897522816166884, - "grad_norm": 1.4584990739822388, - "learning_rate": 7.565527638190955e-05, - "loss": 5.7195, - "step": 24731 - }, - { - "epoch": 12.898044328552803, - "grad_norm": 1.4373011589050293, - "learning_rate": 7.565427135678392e-05, - "loss": 5.6163, - "step": 24732 - }, - { - "epoch": 12.898565840938723, - "grad_norm": 1.4039498567581177, - "learning_rate": 7.56532663316583e-05, - "loss": 5.2665, - "step": 24733 - }, - { - "epoch": 12.899087353324642, - "grad_norm": 1.4629631042480469, - "learning_rate": 7.565226130653267e-05, - "loss": 5.4701, - "step": 24734 - }, - { - "epoch": 12.89960886571056, - "grad_norm": 1.483049750328064, - "learning_rate": 7.565125628140704e-05, - "loss": 4.3813, - "step": 24735 - }, - { - "epoch": 12.90013037809648, - "grad_norm": 1.5531747341156006, - "learning_rate": 7.565025125628141e-05, - "loss": 5.398, - "step": 24736 - }, - { - "epoch": 12.900651890482399, - "grad_norm": 1.4685455560684204, - "learning_rate": 7.564924623115578e-05, - "loss": 5.5311, - "step": 24737 - }, - { - "epoch": 12.901173402868318, - "grad_norm": 1.425740122795105, - "learning_rate": 7.564824120603016e-05, - "loss": 5.3124, - "step": 24738 - }, - { - "epoch": 12.901694915254238, - "grad_norm": 1.323992133140564, - "learning_rate": 7.564723618090452e-05, - "loss": 5.7898, - "step": 24739 - }, - { - "epoch": 12.902216427640157, - "grad_norm": 1.5405912399291992, - "learning_rate": 7.56462311557789e-05, - "loss": 4.6799, - "step": 24740 - }, - { - "epoch": 12.902737940026075, - "grad_norm": 1.4073818922042847, - "learning_rate": 7.564522613065326e-05, - "loss": 5.7235, - "step": 24741 - }, - { - "epoch": 12.903259452411994, - "grad_norm": 1.5196220874786377, - "learning_rate": 7.564422110552764e-05, - "loss": 5.2907, - "step": 24742 - }, - { - "epoch": 12.903780964797914, - "grad_norm": 1.4104396104812622, - "learning_rate": 7.564321608040202e-05, - "loss": 5.2988, - "step": 24743 - }, - { - "epoch": 12.904302477183833, - "grad_norm": 1.4315919876098633, - "learning_rate": 7.56422110552764e-05, - "loss": 5.8048, - "step": 24744 - }, - { - "epoch": 12.904823989569753, - "grad_norm": 1.4866909980773926, - "learning_rate": 7.564120603015076e-05, - "loss": 5.6088, - "step": 24745 - }, - { - "epoch": 12.905345501955672, - "grad_norm": 1.43842613697052, - "learning_rate": 7.564020100502513e-05, - "loss": 5.254, - "step": 24746 - }, - { - "epoch": 12.90586701434159, - "grad_norm": 1.4491745233535767, - "learning_rate": 7.56391959798995e-05, - "loss": 5.6495, - "step": 24747 - }, - { - "epoch": 12.90638852672751, - "grad_norm": 1.3679744005203247, - "learning_rate": 7.563819095477387e-05, - "loss": 5.6551, - "step": 24748 - }, - { - "epoch": 12.906910039113429, - "grad_norm": 1.440392255783081, - "learning_rate": 7.563718592964825e-05, - "loss": 5.8958, - "step": 24749 - }, - { - "epoch": 12.907431551499348, - "grad_norm": 1.4384727478027344, - "learning_rate": 7.563618090452261e-05, - "loss": 5.4749, - "step": 24750 - }, - { - "epoch": 12.907953063885268, - "grad_norm": 1.4295719861984253, - "learning_rate": 7.563517587939699e-05, - "loss": 5.2561, - "step": 24751 - }, - { - "epoch": 12.908474576271187, - "grad_norm": 1.5152682065963745, - "learning_rate": 7.563417085427135e-05, - "loss": 4.9466, - "step": 24752 - }, - { - "epoch": 12.908996088657105, - "grad_norm": 1.4244900941848755, - "learning_rate": 7.563316582914573e-05, - "loss": 5.4927, - "step": 24753 - }, - { - "epoch": 12.909517601043024, - "grad_norm": 1.4309799671173096, - "learning_rate": 7.563216080402011e-05, - "loss": 5.4996, - "step": 24754 - }, - { - "epoch": 12.910039113428944, - "grad_norm": 1.5996869802474976, - "learning_rate": 7.563115577889449e-05, - "loss": 5.1045, - "step": 24755 - }, - { - "epoch": 12.910560625814863, - "grad_norm": 1.6982598304748535, - "learning_rate": 7.563015075376885e-05, - "loss": 5.3132, - "step": 24756 - }, - { - "epoch": 12.911082138200783, - "grad_norm": 1.5868597030639648, - "learning_rate": 7.562914572864323e-05, - "loss": 5.1141, - "step": 24757 - }, - { - "epoch": 12.911603650586702, - "grad_norm": 1.4919596910476685, - "learning_rate": 7.562814070351759e-05, - "loss": 5.2685, - "step": 24758 - }, - { - "epoch": 12.91212516297262, - "grad_norm": 1.4294100999832153, - "learning_rate": 7.562713567839196e-05, - "loss": 5.2396, - "step": 24759 - }, - { - "epoch": 12.91264667535854, - "grad_norm": 1.4403411149978638, - "learning_rate": 7.562613065326633e-05, - "loss": 5.4755, - "step": 24760 - }, - { - "epoch": 12.913168187744459, - "grad_norm": 1.3791896104812622, - "learning_rate": 7.56251256281407e-05, - "loss": 5.1694, - "step": 24761 - }, - { - "epoch": 12.913689700130378, - "grad_norm": 1.4054217338562012, - "learning_rate": 7.562412060301508e-05, - "loss": 5.2736, - "step": 24762 - }, - { - "epoch": 12.914211212516298, - "grad_norm": 1.3709959983825684, - "learning_rate": 7.562311557788945e-05, - "loss": 5.7586, - "step": 24763 - }, - { - "epoch": 12.914732724902217, - "grad_norm": 1.4125761985778809, - "learning_rate": 7.562211055276383e-05, - "loss": 5.3814, - "step": 24764 - }, - { - "epoch": 12.915254237288135, - "grad_norm": 1.5272672176361084, - "learning_rate": 7.56211055276382e-05, - "loss": 5.2205, - "step": 24765 - }, - { - "epoch": 12.915775749674054, - "grad_norm": 1.436463713645935, - "learning_rate": 7.562010050251257e-05, - "loss": 5.9344, - "step": 24766 - }, - { - "epoch": 12.916297262059974, - "grad_norm": 1.3474106788635254, - "learning_rate": 7.561909547738694e-05, - "loss": 5.451, - "step": 24767 - }, - { - "epoch": 12.916818774445893, - "grad_norm": 1.4660958051681519, - "learning_rate": 7.561809045226132e-05, - "loss": 5.5878, - "step": 24768 - }, - { - "epoch": 12.917340286831813, - "grad_norm": 1.4184856414794922, - "learning_rate": 7.561708542713568e-05, - "loss": 5.5349, - "step": 24769 - }, - { - "epoch": 12.917861799217732, - "grad_norm": 1.5144238471984863, - "learning_rate": 7.561608040201006e-05, - "loss": 5.4453, - "step": 24770 - }, - { - "epoch": 12.91838331160365, - "grad_norm": 1.5152181386947632, - "learning_rate": 7.561507537688442e-05, - "loss": 5.1079, - "step": 24771 - }, - { - "epoch": 12.91890482398957, - "grad_norm": 1.4755703210830688, - "learning_rate": 7.561407035175879e-05, - "loss": 5.3345, - "step": 24772 - }, - { - "epoch": 12.919426336375489, - "grad_norm": 1.494856357574463, - "learning_rate": 7.561306532663316e-05, - "loss": 4.9492, - "step": 24773 - }, - { - "epoch": 12.919947848761408, - "grad_norm": 1.443739652633667, - "learning_rate": 7.561206030150754e-05, - "loss": 5.3487, - "step": 24774 - }, - { - "epoch": 12.920469361147328, - "grad_norm": 1.5493388175964355, - "learning_rate": 7.561105527638192e-05, - "loss": 5.1933, - "step": 24775 - }, - { - "epoch": 12.920990873533245, - "grad_norm": 1.4273240566253662, - "learning_rate": 7.561005025125628e-05, - "loss": 5.8165, - "step": 24776 - }, - { - "epoch": 12.921512385919165, - "grad_norm": 1.3853821754455566, - "learning_rate": 7.560904522613066e-05, - "loss": 5.6322, - "step": 24777 - }, - { - "epoch": 12.922033898305084, - "grad_norm": 1.4013075828552246, - "learning_rate": 7.560804020100503e-05, - "loss": 5.8326, - "step": 24778 - }, - { - "epoch": 12.922555410691004, - "grad_norm": 1.5085194110870361, - "learning_rate": 7.56070351758794e-05, - "loss": 5.5342, - "step": 24779 - }, - { - "epoch": 12.923076923076923, - "grad_norm": 1.543379306793213, - "learning_rate": 7.560603015075377e-05, - "loss": 4.7945, - "step": 24780 - }, - { - "epoch": 12.923598435462843, - "grad_norm": 1.560597538948059, - "learning_rate": 7.560502512562815e-05, - "loss": 5.5286, - "step": 24781 - }, - { - "epoch": 12.924119947848762, - "grad_norm": 1.4275848865509033, - "learning_rate": 7.560402010050251e-05, - "loss": 5.5153, - "step": 24782 - }, - { - "epoch": 12.92464146023468, - "grad_norm": 1.4730587005615234, - "learning_rate": 7.560301507537689e-05, - "loss": 4.9346, - "step": 24783 - }, - { - "epoch": 12.9251629726206, - "grad_norm": 1.3778982162475586, - "learning_rate": 7.560201005025127e-05, - "loss": 5.6533, - "step": 24784 - }, - { - "epoch": 12.925684485006519, - "grad_norm": 1.561908483505249, - "learning_rate": 7.560100502512563e-05, - "loss": 5.1719, - "step": 24785 - }, - { - "epoch": 12.926205997392438, - "grad_norm": 1.3793351650238037, - "learning_rate": 7.560000000000001e-05, - "loss": 5.5108, - "step": 24786 - }, - { - "epoch": 12.926727509778358, - "grad_norm": 1.4578075408935547, - "learning_rate": 7.559899497487437e-05, - "loss": 5.1803, - "step": 24787 - }, - { - "epoch": 12.927249022164276, - "grad_norm": 1.4672389030456543, - "learning_rate": 7.559798994974875e-05, - "loss": 5.1678, - "step": 24788 - }, - { - "epoch": 12.927770534550195, - "grad_norm": 1.4986213445663452, - "learning_rate": 7.559698492462311e-05, - "loss": 5.1918, - "step": 24789 - }, - { - "epoch": 12.928292046936114, - "grad_norm": 1.4703031778335571, - "learning_rate": 7.559597989949749e-05, - "loss": 5.1953, - "step": 24790 - }, - { - "epoch": 12.928813559322034, - "grad_norm": 1.4692702293395996, - "learning_rate": 7.559497487437186e-05, - "loss": 5.4214, - "step": 24791 - }, - { - "epoch": 12.929335071707953, - "grad_norm": 1.6389929056167603, - "learning_rate": 7.559396984924623e-05, - "loss": 5.1505, - "step": 24792 - }, - { - "epoch": 12.929856584093873, - "grad_norm": 1.4495924711227417, - "learning_rate": 7.55929648241206e-05, - "loss": 4.9941, - "step": 24793 - }, - { - "epoch": 12.930378096479792, - "grad_norm": 1.4150946140289307, - "learning_rate": 7.559195979899498e-05, - "loss": 5.5168, - "step": 24794 - }, - { - "epoch": 12.93089960886571, - "grad_norm": 1.396484613418579, - "learning_rate": 7.559095477386935e-05, - "loss": 5.7857, - "step": 24795 - }, - { - "epoch": 12.93142112125163, - "grad_norm": 1.595192790031433, - "learning_rate": 7.558994974874373e-05, - "loss": 4.2998, - "step": 24796 - }, - { - "epoch": 12.931942633637549, - "grad_norm": 1.5848087072372437, - "learning_rate": 7.55889447236181e-05, - "loss": 5.4786, - "step": 24797 - }, - { - "epoch": 12.932464146023468, - "grad_norm": 1.5089913606643677, - "learning_rate": 7.558793969849247e-05, - "loss": 5.5465, - "step": 24798 - }, - { - "epoch": 12.932985658409388, - "grad_norm": 1.4019140005111694, - "learning_rate": 7.558693467336684e-05, - "loss": 4.6754, - "step": 24799 - }, - { - "epoch": 12.933507170795306, - "grad_norm": 1.4451634883880615, - "learning_rate": 7.55859296482412e-05, - "loss": 5.5873, - "step": 24800 - }, - { - "epoch": 12.934028683181225, - "grad_norm": 1.4193488359451294, - "learning_rate": 7.558492462311558e-05, - "loss": 5.5498, - "step": 24801 - }, - { - "epoch": 12.934550195567144, - "grad_norm": 1.4507259130477905, - "learning_rate": 7.558391959798994e-05, - "loss": 5.6571, - "step": 24802 - }, - { - "epoch": 12.935071707953064, - "grad_norm": 1.3644264936447144, - "learning_rate": 7.558291457286432e-05, - "loss": 5.0865, - "step": 24803 - }, - { - "epoch": 12.935593220338983, - "grad_norm": 1.4585180282592773, - "learning_rate": 7.55819095477387e-05, - "loss": 5.5353, - "step": 24804 - }, - { - "epoch": 12.936114732724903, - "grad_norm": 1.4994611740112305, - "learning_rate": 7.558090452261308e-05, - "loss": 5.5891, - "step": 24805 - }, - { - "epoch": 12.93663624511082, - "grad_norm": 1.4000085592269897, - "learning_rate": 7.557989949748744e-05, - "loss": 5.7094, - "step": 24806 - }, - { - "epoch": 12.93715775749674, - "grad_norm": 1.3854570388793945, - "learning_rate": 7.557889447236182e-05, - "loss": 5.9452, - "step": 24807 - }, - { - "epoch": 12.93767926988266, - "grad_norm": 1.5004512071609497, - "learning_rate": 7.557788944723618e-05, - "loss": 5.5201, - "step": 24808 - }, - { - "epoch": 12.938200782268579, - "grad_norm": 1.4625142812728882, - "learning_rate": 7.557688442211056e-05, - "loss": 5.4512, - "step": 24809 - }, - { - "epoch": 12.938722294654498, - "grad_norm": 1.396570086479187, - "learning_rate": 7.557587939698493e-05, - "loss": 5.7577, - "step": 24810 - }, - { - "epoch": 12.939243807040418, - "grad_norm": 1.462796926498413, - "learning_rate": 7.55748743718593e-05, - "loss": 5.7861, - "step": 24811 - }, - { - "epoch": 12.939765319426336, - "grad_norm": 1.419164776802063, - "learning_rate": 7.557386934673367e-05, - "loss": 5.3733, - "step": 24812 - }, - { - "epoch": 12.940286831812255, - "grad_norm": 1.421269178390503, - "learning_rate": 7.557286432160803e-05, - "loss": 5.4047, - "step": 24813 - }, - { - "epoch": 12.940808344198174, - "grad_norm": 1.40413498878479, - "learning_rate": 7.557185929648241e-05, - "loss": 5.6773, - "step": 24814 - }, - { - "epoch": 12.941329856584094, - "grad_norm": 1.4439491033554077, - "learning_rate": 7.557085427135679e-05, - "loss": 5.4638, - "step": 24815 - }, - { - "epoch": 12.941851368970013, - "grad_norm": 1.4479589462280273, - "learning_rate": 7.556984924623117e-05, - "loss": 5.131, - "step": 24816 - }, - { - "epoch": 12.942372881355933, - "grad_norm": 1.4161211252212524, - "learning_rate": 7.556884422110553e-05, - "loss": 5.3297, - "step": 24817 - }, - { - "epoch": 12.94289439374185, - "grad_norm": 1.430370569229126, - "learning_rate": 7.556783919597991e-05, - "loss": 5.3817, - "step": 24818 - }, - { - "epoch": 12.94341590612777, - "grad_norm": 1.4038915634155273, - "learning_rate": 7.556683417085427e-05, - "loss": 5.5971, - "step": 24819 - }, - { - "epoch": 12.94393741851369, - "grad_norm": 1.4134514331817627, - "learning_rate": 7.556582914572865e-05, - "loss": 5.5991, - "step": 24820 - }, - { - "epoch": 12.944458930899609, - "grad_norm": 1.538896083831787, - "learning_rate": 7.556482412060302e-05, - "loss": 5.156, - "step": 24821 - }, - { - "epoch": 12.944980443285528, - "grad_norm": 1.4824374914169312, - "learning_rate": 7.556381909547739e-05, - "loss": 5.4251, - "step": 24822 - }, - { - "epoch": 12.945501955671448, - "grad_norm": 1.5658413171768188, - "learning_rate": 7.556281407035176e-05, - "loss": 5.1457, - "step": 24823 - }, - { - "epoch": 12.946023468057366, - "grad_norm": 1.3248335123062134, - "learning_rate": 7.556180904522614e-05, - "loss": 5.7374, - "step": 24824 - }, - { - "epoch": 12.946544980443285, - "grad_norm": 1.4150969982147217, - "learning_rate": 7.556080402010051e-05, - "loss": 5.6304, - "step": 24825 - }, - { - "epoch": 12.947066492829205, - "grad_norm": 1.4679011106491089, - "learning_rate": 7.555979899497488e-05, - "loss": 5.5264, - "step": 24826 - }, - { - "epoch": 12.947588005215124, - "grad_norm": 1.4957494735717773, - "learning_rate": 7.555879396984926e-05, - "loss": 5.3678, - "step": 24827 - }, - { - "epoch": 12.948109517601043, - "grad_norm": 1.327399730682373, - "learning_rate": 7.555778894472362e-05, - "loss": 5.5366, - "step": 24828 - }, - { - "epoch": 12.948631029986963, - "grad_norm": 1.298120379447937, - "learning_rate": 7.5556783919598e-05, - "loss": 5.6457, - "step": 24829 - }, - { - "epoch": 12.94915254237288, - "grad_norm": 1.4371241331100464, - "learning_rate": 7.555577889447236e-05, - "loss": 5.1917, - "step": 24830 - }, - { - "epoch": 12.9496740547588, - "grad_norm": 1.4717812538146973, - "learning_rate": 7.555477386934674e-05, - "loss": 5.2117, - "step": 24831 - }, - { - "epoch": 12.95019556714472, - "grad_norm": 1.3676960468292236, - "learning_rate": 7.55537688442211e-05, - "loss": 5.6923, - "step": 24832 - }, - { - "epoch": 12.950717079530639, - "grad_norm": 1.4917563199996948, - "learning_rate": 7.555276381909548e-05, - "loss": 4.8156, - "step": 24833 - }, - { - "epoch": 12.951238591916558, - "grad_norm": 1.4077788591384888, - "learning_rate": 7.555175879396985e-05, - "loss": 5.8754, - "step": 24834 - }, - { - "epoch": 12.951760104302478, - "grad_norm": 1.412641167640686, - "learning_rate": 7.555075376884422e-05, - "loss": 5.2142, - "step": 24835 - }, - { - "epoch": 12.952281616688396, - "grad_norm": 1.527685284614563, - "learning_rate": 7.55497487437186e-05, - "loss": 5.4658, - "step": 24836 - }, - { - "epoch": 12.952803129074315, - "grad_norm": 1.5200613737106323, - "learning_rate": 7.554874371859298e-05, - "loss": 5.2716, - "step": 24837 - }, - { - "epoch": 12.953324641460235, - "grad_norm": 1.4599411487579346, - "learning_rate": 7.554773869346734e-05, - "loss": 5.4855, - "step": 24838 - }, - { - "epoch": 12.953846153846154, - "grad_norm": 1.5375727415084839, - "learning_rate": 7.554673366834171e-05, - "loss": 5.2622, - "step": 24839 - }, - { - "epoch": 12.954367666232073, - "grad_norm": 1.3947031497955322, - "learning_rate": 7.554572864321609e-05, - "loss": 5.5609, - "step": 24840 - }, - { - "epoch": 12.954889178617993, - "grad_norm": 1.4404702186584473, - "learning_rate": 7.554472361809045e-05, - "loss": 5.0811, - "step": 24841 - }, - { - "epoch": 12.95541069100391, - "grad_norm": 1.3956023454666138, - "learning_rate": 7.554371859296483e-05, - "loss": 5.4779, - "step": 24842 - }, - { - "epoch": 12.95593220338983, - "grad_norm": 1.4340088367462158, - "learning_rate": 7.554271356783919e-05, - "loss": 5.5091, - "step": 24843 - }, - { - "epoch": 12.95645371577575, - "grad_norm": 1.4932284355163574, - "learning_rate": 7.554170854271357e-05, - "loss": 5.069, - "step": 24844 - }, - { - "epoch": 12.956975228161669, - "grad_norm": 1.51760733127594, - "learning_rate": 7.554070351758795e-05, - "loss": 5.2169, - "step": 24845 - }, - { - "epoch": 12.957496740547588, - "grad_norm": 1.412537932395935, - "learning_rate": 7.553969849246233e-05, - "loss": 5.6892, - "step": 24846 - }, - { - "epoch": 12.958018252933508, - "grad_norm": 1.5469245910644531, - "learning_rate": 7.553869346733669e-05, - "loss": 5.5725, - "step": 24847 - }, - { - "epoch": 12.958539765319426, - "grad_norm": 1.3908629417419434, - "learning_rate": 7.553768844221107e-05, - "loss": 5.5032, - "step": 24848 - }, - { - "epoch": 12.959061277705345, - "grad_norm": 1.4897438287734985, - "learning_rate": 7.553668341708543e-05, - "loss": 5.4558, - "step": 24849 - }, - { - "epoch": 12.959582790091265, - "grad_norm": 1.4776113033294678, - "learning_rate": 7.553567839195981e-05, - "loss": 5.8042, - "step": 24850 - }, - { - "epoch": 12.960104302477184, - "grad_norm": 1.353235125541687, - "learning_rate": 7.553467336683417e-05, - "loss": 5.9104, - "step": 24851 - }, - { - "epoch": 12.960625814863103, - "grad_norm": 1.4854031801223755, - "learning_rate": 7.553366834170854e-05, - "loss": 5.196, - "step": 24852 - }, - { - "epoch": 12.961147327249023, - "grad_norm": 1.565555214881897, - "learning_rate": 7.553266331658292e-05, - "loss": 5.6112, - "step": 24853 - }, - { - "epoch": 12.96166883963494, - "grad_norm": 1.610148310661316, - "learning_rate": 7.553165829145728e-05, - "loss": 5.6234, - "step": 24854 - }, - { - "epoch": 12.96219035202086, - "grad_norm": 1.5746228694915771, - "learning_rate": 7.553065326633166e-05, - "loss": 5.4364, - "step": 24855 - }, - { - "epoch": 12.96271186440678, - "grad_norm": 1.3966357707977295, - "learning_rate": 7.552964824120604e-05, - "loss": 5.7366, - "step": 24856 - }, - { - "epoch": 12.963233376792699, - "grad_norm": 1.5200656652450562, - "learning_rate": 7.552864321608041e-05, - "loss": 5.2514, - "step": 24857 - }, - { - "epoch": 12.963754889178619, - "grad_norm": 1.4276994466781616, - "learning_rate": 7.552763819095478e-05, - "loss": 5.0177, - "step": 24858 - }, - { - "epoch": 12.964276401564538, - "grad_norm": 1.4425915479660034, - "learning_rate": 7.552663316582916e-05, - "loss": 5.4594, - "step": 24859 - }, - { - "epoch": 12.964797913950456, - "grad_norm": 1.4052938222885132, - "learning_rate": 7.552562814070352e-05, - "loss": 5.5855, - "step": 24860 - }, - { - "epoch": 12.965319426336375, - "grad_norm": 1.5571333169937134, - "learning_rate": 7.55246231155779e-05, - "loss": 5.5483, - "step": 24861 - }, - { - "epoch": 12.965840938722295, - "grad_norm": 1.4361435174942017, - "learning_rate": 7.552361809045226e-05, - "loss": 5.0715, - "step": 24862 - }, - { - "epoch": 12.966362451108214, - "grad_norm": 1.5516752004623413, - "learning_rate": 7.552261306532664e-05, - "loss": 5.3161, - "step": 24863 - }, - { - "epoch": 12.966883963494134, - "grad_norm": 1.5567405223846436, - "learning_rate": 7.5521608040201e-05, - "loss": 5.3579, - "step": 24864 - }, - { - "epoch": 12.967405475880053, - "grad_norm": 1.4082837104797363, - "learning_rate": 7.552060301507537e-05, - "loss": 5.1166, - "step": 24865 - }, - { - "epoch": 12.96792698826597, - "grad_norm": 1.446543574333191, - "learning_rate": 7.551959798994975e-05, - "loss": 5.8086, - "step": 24866 - }, - { - "epoch": 12.96844850065189, - "grad_norm": 1.4792859554290771, - "learning_rate": 7.551859296482412e-05, - "loss": 5.2138, - "step": 24867 - }, - { - "epoch": 12.96897001303781, - "grad_norm": 1.46998929977417, - "learning_rate": 7.55175879396985e-05, - "loss": 5.4319, - "step": 24868 - }, - { - "epoch": 12.969491525423729, - "grad_norm": 1.503684639930725, - "learning_rate": 7.551658291457287e-05, - "loss": 5.0115, - "step": 24869 - }, - { - "epoch": 12.970013037809649, - "grad_norm": 1.4610308408737183, - "learning_rate": 7.551557788944724e-05, - "loss": 5.0842, - "step": 24870 - }, - { - "epoch": 12.970534550195566, - "grad_norm": 1.397529125213623, - "learning_rate": 7.551457286432161e-05, - "loss": 5.224, - "step": 24871 - }, - { - "epoch": 12.971056062581486, - "grad_norm": 1.414838194847107, - "learning_rate": 7.551356783919599e-05, - "loss": 5.3745, - "step": 24872 - }, - { - "epoch": 12.971577574967405, - "grad_norm": 1.3712496757507324, - "learning_rate": 7.551256281407035e-05, - "loss": 5.6031, - "step": 24873 - }, - { - "epoch": 12.972099087353325, - "grad_norm": 1.615883469581604, - "learning_rate": 7.551155778894473e-05, - "loss": 5.0794, - "step": 24874 - }, - { - "epoch": 12.972620599739244, - "grad_norm": 1.4590216875076294, - "learning_rate": 7.551055276381909e-05, - "loss": 5.3046, - "step": 24875 - }, - { - "epoch": 12.973142112125164, - "grad_norm": 1.5282623767852783, - "learning_rate": 7.550954773869347e-05, - "loss": 5.1191, - "step": 24876 - }, - { - "epoch": 12.973663624511083, - "grad_norm": 1.4989322423934937, - "learning_rate": 7.550854271356785e-05, - "loss": 5.6628, - "step": 24877 - }, - { - "epoch": 12.974185136897, - "grad_norm": 1.4191858768463135, - "learning_rate": 7.550753768844221e-05, - "loss": 5.368, - "step": 24878 - }, - { - "epoch": 12.97470664928292, - "grad_norm": 1.4943689107894897, - "learning_rate": 7.550653266331659e-05, - "loss": 5.4281, - "step": 24879 - }, - { - "epoch": 12.97522816166884, - "grad_norm": 1.5637153387069702, - "learning_rate": 7.550552763819095e-05, - "loss": 5.2501, - "step": 24880 - }, - { - "epoch": 12.975749674054759, - "grad_norm": 2.25223445892334, - "learning_rate": 7.550452261306533e-05, - "loss": 4.6496, - "step": 24881 - }, - { - "epoch": 12.976271186440679, - "grad_norm": 1.3737996816635132, - "learning_rate": 7.55035175879397e-05, - "loss": 5.6518, - "step": 24882 - }, - { - "epoch": 12.976792698826596, - "grad_norm": 1.563730001449585, - "learning_rate": 7.550251256281407e-05, - "loss": 5.2636, - "step": 24883 - }, - { - "epoch": 12.977314211212516, - "grad_norm": 1.4592657089233398, - "learning_rate": 7.550150753768844e-05, - "loss": 5.4849, - "step": 24884 - }, - { - "epoch": 12.977835723598435, - "grad_norm": 1.6189231872558594, - "learning_rate": 7.550050251256282e-05, - "loss": 5.1686, - "step": 24885 - }, - { - "epoch": 12.978357235984355, - "grad_norm": 1.3698781728744507, - "learning_rate": 7.549949748743718e-05, - "loss": 5.6983, - "step": 24886 - }, - { - "epoch": 12.978878748370274, - "grad_norm": 1.4323832988739014, - "learning_rate": 7.549849246231156e-05, - "loss": 5.8107, - "step": 24887 - }, - { - "epoch": 12.979400260756194, - "grad_norm": 1.4170339107513428, - "learning_rate": 7.549748743718594e-05, - "loss": 5.6339, - "step": 24888 - }, - { - "epoch": 12.979921773142113, - "grad_norm": 1.4335356950759888, - "learning_rate": 7.549648241206031e-05, - "loss": 5.6271, - "step": 24889 - }, - { - "epoch": 12.98044328552803, - "grad_norm": 1.45816969871521, - "learning_rate": 7.549547738693468e-05, - "loss": 5.4166, - "step": 24890 - }, - { - "epoch": 12.98096479791395, - "grad_norm": 1.5053993463516235, - "learning_rate": 7.549447236180906e-05, - "loss": 5.6864, - "step": 24891 - }, - { - "epoch": 12.98148631029987, - "grad_norm": 1.3992886543273926, - "learning_rate": 7.549346733668342e-05, - "loss": 5.5012, - "step": 24892 - }, - { - "epoch": 12.98200782268579, - "grad_norm": 1.4984796047210693, - "learning_rate": 7.549246231155779e-05, - "loss": 5.3648, - "step": 24893 - }, - { - "epoch": 12.982529335071709, - "grad_norm": 1.51664137840271, - "learning_rate": 7.549145728643216e-05, - "loss": 4.8902, - "step": 24894 - }, - { - "epoch": 12.983050847457626, - "grad_norm": 1.4392110109329224, - "learning_rate": 7.549045226130653e-05, - "loss": 5.1816, - "step": 24895 - }, - { - "epoch": 12.983572359843546, - "grad_norm": 1.5878565311431885, - "learning_rate": 7.54894472361809e-05, - "loss": 5.4343, - "step": 24896 - }, - { - "epoch": 12.984093872229465, - "grad_norm": 1.399032473564148, - "learning_rate": 7.548844221105528e-05, - "loss": 5.9353, - "step": 24897 - }, - { - "epoch": 12.984615384615385, - "grad_norm": 1.5939244031906128, - "learning_rate": 7.548743718592966e-05, - "loss": 5.5592, - "step": 24898 - }, - { - "epoch": 12.985136897001304, - "grad_norm": 1.4364012479782104, - "learning_rate": 7.548643216080402e-05, - "loss": 5.9514, - "step": 24899 - }, - { - "epoch": 12.985658409387224, - "grad_norm": 1.401566982269287, - "learning_rate": 7.54854271356784e-05, - "loss": 5.8318, - "step": 24900 - }, - { - "epoch": 12.986179921773141, - "grad_norm": 1.4025949239730835, - "learning_rate": 7.548442211055277e-05, - "loss": 5.6004, - "step": 24901 - }, - { - "epoch": 12.98670143415906, - "grad_norm": 1.418747067451477, - "learning_rate": 7.548341708542714e-05, - "loss": 5.6355, - "step": 24902 - }, - { - "epoch": 12.98722294654498, - "grad_norm": 1.360119342803955, - "learning_rate": 7.548241206030151e-05, - "loss": 5.7012, - "step": 24903 - }, - { - "epoch": 12.9877444589309, - "grad_norm": 1.3518282175064087, - "learning_rate": 7.548140703517589e-05, - "loss": 5.5493, - "step": 24904 - }, - { - "epoch": 12.98826597131682, - "grad_norm": 1.4630800485610962, - "learning_rate": 7.548040201005025e-05, - "loss": 5.07, - "step": 24905 - }, - { - "epoch": 12.988787483702739, - "grad_norm": 1.3679165840148926, - "learning_rate": 7.547939698492462e-05, - "loss": 4.8339, - "step": 24906 - }, - { - "epoch": 12.989308996088656, - "grad_norm": 1.4355125427246094, - "learning_rate": 7.5478391959799e-05, - "loss": 5.1333, - "step": 24907 - }, - { - "epoch": 12.989830508474576, - "grad_norm": 1.4147162437438965, - "learning_rate": 7.547738693467337e-05, - "loss": 5.5698, - "step": 24908 - }, - { - "epoch": 12.990352020860495, - "grad_norm": 1.4937529563903809, - "learning_rate": 7.547638190954775e-05, - "loss": 5.1795, - "step": 24909 - }, - { - "epoch": 12.990873533246415, - "grad_norm": 1.434704065322876, - "learning_rate": 7.547537688442211e-05, - "loss": 5.3368, - "step": 24910 - }, - { - "epoch": 12.991395045632334, - "grad_norm": 1.3380588293075562, - "learning_rate": 7.547437185929649e-05, - "loss": 4.5438, - "step": 24911 - }, - { - "epoch": 12.991916558018254, - "grad_norm": 1.4174107313156128, - "learning_rate": 7.547336683417086e-05, - "loss": 5.2401, - "step": 24912 - }, - { - "epoch": 12.992438070404171, - "grad_norm": 1.4838937520980835, - "learning_rate": 7.547236180904523e-05, - "loss": 5.7458, - "step": 24913 - }, - { - "epoch": 12.99295958279009, - "grad_norm": 1.460187315940857, - "learning_rate": 7.54713567839196e-05, - "loss": 5.571, - "step": 24914 - }, - { - "epoch": 12.99348109517601, - "grad_norm": 1.4413350820541382, - "learning_rate": 7.547035175879398e-05, - "loss": 5.2312, - "step": 24915 - }, - { - "epoch": 12.99400260756193, - "grad_norm": 1.404197335243225, - "learning_rate": 7.546934673366834e-05, - "loss": 4.8674, - "step": 24916 - }, - { - "epoch": 12.99452411994785, - "grad_norm": 1.5999112129211426, - "learning_rate": 7.546834170854272e-05, - "loss": 4.9212, - "step": 24917 - }, - { - "epoch": 12.995045632333769, - "grad_norm": 1.9199494123458862, - "learning_rate": 7.54673366834171e-05, - "loss": 5.1977, - "step": 24918 - }, - { - "epoch": 12.995567144719686, - "grad_norm": 1.4525030851364136, - "learning_rate": 7.546633165829146e-05, - "loss": 5.0752, - "step": 24919 - }, - { - "epoch": 12.996088657105606, - "grad_norm": 1.5402097702026367, - "learning_rate": 7.546532663316584e-05, - "loss": 5.4852, - "step": 24920 - }, - { - "epoch": 12.996610169491525, - "grad_norm": 1.5111380815505981, - "learning_rate": 7.54643216080402e-05, - "loss": 4.8002, - "step": 24921 - }, - { - "epoch": 12.997131681877445, - "grad_norm": 1.5582941770553589, - "learning_rate": 7.546331658291458e-05, - "loss": 5.3745, - "step": 24922 - }, - { - "epoch": 12.997653194263364, - "grad_norm": 1.5431370735168457, - "learning_rate": 7.546231155778894e-05, - "loss": 5.4898, - "step": 24923 - }, - { - "epoch": 12.998174706649284, - "grad_norm": 1.4679903984069824, - "learning_rate": 7.546130653266332e-05, - "loss": 4.9059, - "step": 24924 - }, - { - "epoch": 12.998696219035201, - "grad_norm": 1.4424365758895874, - "learning_rate": 7.546030150753769e-05, - "loss": 5.8304, - "step": 24925 - }, - { - "epoch": 12.99921773142112, - "grad_norm": 1.449933409690857, - "learning_rate": 7.545929648241206e-05, - "loss": 5.4914, - "step": 24926 - }, - { - "epoch": 12.99973924380704, - "grad_norm": 1.4250776767730713, - "learning_rate": 7.545829145728643e-05, - "loss": 5.6755, - "step": 24927 - }, - { - "epoch": 13.00026075619296, - "grad_norm": 1.8117423057556152, - "learning_rate": 7.54572864321608e-05, - "loss": 5.054, - "step": 24928 - }, - { - "epoch": 13.00078226857888, - "grad_norm": 1.4200389385223389, - "learning_rate": 7.545628140703518e-05, - "loss": 5.4375, - "step": 24929 - }, - { - "epoch": 13.001303780964799, - "grad_norm": 1.3528181314468384, - "learning_rate": 7.545527638190956e-05, - "loss": 5.4881, - "step": 24930 - }, - { - "epoch": 13.001825293350716, - "grad_norm": 1.4377245903015137, - "learning_rate": 7.545427135678393e-05, - "loss": 5.7187, - "step": 24931 - }, - { - "epoch": 13.002346805736636, - "grad_norm": 1.444352388381958, - "learning_rate": 7.545326633165829e-05, - "loss": 5.4847, - "step": 24932 - }, - { - "epoch": 13.002868318122555, - "grad_norm": 1.4764409065246582, - "learning_rate": 7.545226130653267e-05, - "loss": 4.8908, - "step": 24933 - }, - { - "epoch": 13.003389830508475, - "grad_norm": 1.5274739265441895, - "learning_rate": 7.545125628140703e-05, - "loss": 5.2774, - "step": 24934 - }, - { - "epoch": 13.003911342894394, - "grad_norm": 1.571966290473938, - "learning_rate": 7.545025125628141e-05, - "loss": 5.018, - "step": 24935 - }, - { - "epoch": 13.004432855280314, - "grad_norm": 1.5012880563735962, - "learning_rate": 7.544924623115577e-05, - "loss": 5.1411, - "step": 24936 - }, - { - "epoch": 13.004954367666231, - "grad_norm": 1.4766172170639038, - "learning_rate": 7.544824120603015e-05, - "loss": 4.8378, - "step": 24937 - }, - { - "epoch": 13.00547588005215, - "grad_norm": 1.3836112022399902, - "learning_rate": 7.544723618090453e-05, - "loss": 5.8167, - "step": 24938 - }, - { - "epoch": 13.00599739243807, - "grad_norm": 1.4877537488937378, - "learning_rate": 7.544623115577891e-05, - "loss": 5.3696, - "step": 24939 - }, - { - "epoch": 13.00651890482399, - "grad_norm": 1.4720375537872314, - "learning_rate": 7.544522613065327e-05, - "loss": 5.3887, - "step": 24940 - }, - { - "epoch": 13.00704041720991, - "grad_norm": 1.3576501607894897, - "learning_rate": 7.544422110552765e-05, - "loss": 5.2396, - "step": 24941 - }, - { - "epoch": 13.007561929595829, - "grad_norm": 1.5126111507415771, - "learning_rate": 7.544321608040201e-05, - "loss": 5.5781, - "step": 24942 - }, - { - "epoch": 13.008083441981746, - "grad_norm": 1.4237885475158691, - "learning_rate": 7.544221105527639e-05, - "loss": 5.3022, - "step": 24943 - }, - { - "epoch": 13.008604954367666, - "grad_norm": 1.4641135931015015, - "learning_rate": 7.544120603015076e-05, - "loss": 5.4139, - "step": 24944 - }, - { - "epoch": 13.009126466753585, - "grad_norm": 1.343546748161316, - "learning_rate": 7.544020100502512e-05, - "loss": 5.3308, - "step": 24945 - }, - { - "epoch": 13.009647979139505, - "grad_norm": 1.4830869436264038, - "learning_rate": 7.54391959798995e-05, - "loss": 5.7493, - "step": 24946 - }, - { - "epoch": 13.010169491525424, - "grad_norm": 1.3791488409042358, - "learning_rate": 7.543819095477386e-05, - "loss": 5.2108, - "step": 24947 - }, - { - "epoch": 13.010691003911344, - "grad_norm": 1.5253902673721313, - "learning_rate": 7.543718592964824e-05, - "loss": 5.1736, - "step": 24948 - }, - { - "epoch": 13.011212516297261, - "grad_norm": 1.4549850225448608, - "learning_rate": 7.543618090452262e-05, - "loss": 5.599, - "step": 24949 - }, - { - "epoch": 13.01173402868318, - "grad_norm": 1.3687851428985596, - "learning_rate": 7.5435175879397e-05, - "loss": 5.2702, - "step": 24950 - }, - { - "epoch": 13.0122555410691, - "grad_norm": 1.52349853515625, - "learning_rate": 7.543417085427136e-05, - "loss": 5.5421, - "step": 24951 - }, - { - "epoch": 13.01277705345502, - "grad_norm": 1.5492075681686401, - "learning_rate": 7.543316582914574e-05, - "loss": 5.4474, - "step": 24952 - }, - { - "epoch": 13.01329856584094, - "grad_norm": 1.3883212804794312, - "learning_rate": 7.54321608040201e-05, - "loss": 5.7876, - "step": 24953 - }, - { - "epoch": 13.013820078226859, - "grad_norm": 1.4092342853546143, - "learning_rate": 7.543115577889448e-05, - "loss": 5.5784, - "step": 24954 - }, - { - "epoch": 13.014341590612776, - "grad_norm": 1.574446201324463, - "learning_rate": 7.543015075376884e-05, - "loss": 5.3385, - "step": 24955 - }, - { - "epoch": 13.014863102998696, - "grad_norm": 1.3892382383346558, - "learning_rate": 7.542914572864322e-05, - "loss": 5.3549, - "step": 24956 - }, - { - "epoch": 13.015384615384615, - "grad_norm": 1.4137601852416992, - "learning_rate": 7.542814070351759e-05, - "loss": 5.4973, - "step": 24957 - }, - { - "epoch": 13.015906127770535, - "grad_norm": 1.5254262685775757, - "learning_rate": 7.542713567839196e-05, - "loss": 5.1363, - "step": 24958 - }, - { - "epoch": 13.016427640156454, - "grad_norm": 1.489316701889038, - "learning_rate": 7.542613065326634e-05, - "loss": 5.514, - "step": 24959 - }, - { - "epoch": 13.016949152542374, - "grad_norm": 1.4311922788619995, - "learning_rate": 7.54251256281407e-05, - "loss": 5.4795, - "step": 24960 - }, - { - "epoch": 13.017470664928291, - "grad_norm": 1.4278647899627686, - "learning_rate": 7.542412060301508e-05, - "loss": 5.5704, - "step": 24961 - }, - { - "epoch": 13.01799217731421, - "grad_norm": 1.4737575054168701, - "learning_rate": 7.542311557788945e-05, - "loss": 5.3254, - "step": 24962 - }, - { - "epoch": 13.01851368970013, - "grad_norm": 1.448438048362732, - "learning_rate": 7.542211055276383e-05, - "loss": 5.3868, - "step": 24963 - }, - { - "epoch": 13.01903520208605, - "grad_norm": 1.520855188369751, - "learning_rate": 7.542110552763819e-05, - "loss": 5.0831, - "step": 24964 - }, - { - "epoch": 13.01955671447197, - "grad_norm": 1.5036416053771973, - "learning_rate": 7.542010050251257e-05, - "loss": 5.6077, - "step": 24965 - }, - { - "epoch": 13.020078226857889, - "grad_norm": 1.39872407913208, - "learning_rate": 7.541909547738693e-05, - "loss": 5.3136, - "step": 24966 - }, - { - "epoch": 13.020599739243806, - "grad_norm": 1.4274282455444336, - "learning_rate": 7.541809045226131e-05, - "loss": 5.7438, - "step": 24967 - }, - { - "epoch": 13.021121251629726, - "grad_norm": 1.4124516248703003, - "learning_rate": 7.541708542713567e-05, - "loss": 5.3484, - "step": 24968 - }, - { - "epoch": 13.021642764015645, - "grad_norm": 1.4041868448257446, - "learning_rate": 7.541608040201005e-05, - "loss": 5.3449, - "step": 24969 - }, - { - "epoch": 13.022164276401565, - "grad_norm": 1.3599072694778442, - "learning_rate": 7.541507537688443e-05, - "loss": 5.6724, - "step": 24970 - }, - { - "epoch": 13.022685788787484, - "grad_norm": 1.4578707218170166, - "learning_rate": 7.54140703517588e-05, - "loss": 5.6556, - "step": 24971 - }, - { - "epoch": 13.023207301173404, - "grad_norm": 1.442463994026184, - "learning_rate": 7.541306532663317e-05, - "loss": 5.4312, - "step": 24972 - }, - { - "epoch": 13.023728813559321, - "grad_norm": 1.4686410427093506, - "learning_rate": 7.541206030150754e-05, - "loss": 5.345, - "step": 24973 - }, - { - "epoch": 13.024250325945241, - "grad_norm": 1.5030460357666016, - "learning_rate": 7.541105527638191e-05, - "loss": 5.3385, - "step": 24974 - }, - { - "epoch": 13.02477183833116, - "grad_norm": 1.5403265953063965, - "learning_rate": 7.541005025125628e-05, - "loss": 4.6231, - "step": 24975 - }, - { - "epoch": 13.02529335071708, - "grad_norm": 1.8225224018096924, - "learning_rate": 7.540904522613066e-05, - "loss": 4.6271, - "step": 24976 - }, - { - "epoch": 13.025814863103, - "grad_norm": 1.9311195611953735, - "learning_rate": 7.540804020100502e-05, - "loss": 5.0377, - "step": 24977 - }, - { - "epoch": 13.026336375488917, - "grad_norm": 1.4456654787063599, - "learning_rate": 7.54070351758794e-05, - "loss": 4.8066, - "step": 24978 - }, - { - "epoch": 13.026857887874836, - "grad_norm": 1.4270296096801758, - "learning_rate": 7.540603015075378e-05, - "loss": 5.5561, - "step": 24979 - }, - { - "epoch": 13.027379400260756, - "grad_norm": 1.4739720821380615, - "learning_rate": 7.540502512562815e-05, - "loss": 4.9893, - "step": 24980 - }, - { - "epoch": 13.027900912646675, - "grad_norm": 1.4896410703659058, - "learning_rate": 7.540402010050252e-05, - "loss": 5.2836, - "step": 24981 - }, - { - "epoch": 13.028422425032595, - "grad_norm": 1.4746875762939453, - "learning_rate": 7.54030150753769e-05, - "loss": 5.5772, - "step": 24982 - }, - { - "epoch": 13.028943937418514, - "grad_norm": 1.6914674043655396, - "learning_rate": 7.540201005025126e-05, - "loss": 4.4894, - "step": 24983 - }, - { - "epoch": 13.029465449804432, - "grad_norm": 1.3429515361785889, - "learning_rate": 7.540100502512564e-05, - "loss": 5.8461, - "step": 24984 - }, - { - "epoch": 13.029986962190351, - "grad_norm": 1.4505583047866821, - "learning_rate": 7.54e-05, - "loss": 5.4922, - "step": 24985 - }, - { - "epoch": 13.030508474576271, - "grad_norm": 1.5194478034973145, - "learning_rate": 7.539899497487437e-05, - "loss": 4.9128, - "step": 24986 - }, - { - "epoch": 13.03102998696219, - "grad_norm": 1.4806746244430542, - "learning_rate": 7.539798994974875e-05, - "loss": 5.2395, - "step": 24987 - }, - { - "epoch": 13.03155149934811, - "grad_norm": 1.3566452264785767, - "learning_rate": 7.539698492462311e-05, - "loss": 5.6985, - "step": 24988 - }, - { - "epoch": 13.03207301173403, - "grad_norm": 1.389236569404602, - "learning_rate": 7.539597989949749e-05, - "loss": 5.4454, - "step": 24989 - }, - { - "epoch": 13.032594524119947, - "grad_norm": 1.45772385597229, - "learning_rate": 7.539497487437187e-05, - "loss": 4.7858, - "step": 24990 - }, - { - "epoch": 13.033116036505866, - "grad_norm": 1.4208375215530396, - "learning_rate": 7.539396984924624e-05, - "loss": 5.8702, - "step": 24991 - }, - { - "epoch": 13.033637548891786, - "grad_norm": 1.4874900579452515, - "learning_rate": 7.539296482412061e-05, - "loss": 5.2632, - "step": 24992 - }, - { - "epoch": 13.034159061277705, - "grad_norm": 1.6581958532333374, - "learning_rate": 7.539195979899499e-05, - "loss": 4.95, - "step": 24993 - }, - { - "epoch": 13.034680573663625, - "grad_norm": 1.505218505859375, - "learning_rate": 7.539095477386935e-05, - "loss": 5.3283, - "step": 24994 - }, - { - "epoch": 13.035202086049544, - "grad_norm": 1.5088810920715332, - "learning_rate": 7.538994974874373e-05, - "loss": 5.2763, - "step": 24995 - }, - { - "epoch": 13.035723598435462, - "grad_norm": 1.4907021522521973, - "learning_rate": 7.538894472361809e-05, - "loss": 5.2277, - "step": 24996 - }, - { - "epoch": 13.036245110821381, - "grad_norm": 1.4617527723312378, - "learning_rate": 7.538793969849247e-05, - "loss": 5.4886, - "step": 24997 - }, - { - "epoch": 13.036766623207301, - "grad_norm": 1.5130726099014282, - "learning_rate": 7.538693467336683e-05, - "loss": 5.1118, - "step": 24998 - }, - { - "epoch": 13.03728813559322, - "grad_norm": 1.6083691120147705, - "learning_rate": 7.538592964824121e-05, - "loss": 5.5955, - "step": 24999 - }, - { - "epoch": 13.03780964797914, - "grad_norm": 1.532025694847107, - "learning_rate": 7.538492462311559e-05, - "loss": 4.9124, - "step": 25000 - }, - { - "epoch": 13.03780964797914, - "eval_loss": 5.480602741241455, - "eval_runtime": 42.6816, - "eval_samples_per_second": 28.724, - "eval_steps_per_second": 3.608, - "step": 25000 - }, - { - "epoch": 13.03833116036506, - "grad_norm": 1.5681285858154297, - "learning_rate": 7.538391959798995e-05, - "loss": 5.4305, - "step": 25001 - }, - { - "epoch": 13.038852672750977, - "grad_norm": 1.3859326839447021, - "learning_rate": 7.538291457286433e-05, - "loss": 5.3414, - "step": 25002 - }, - { - "epoch": 13.039374185136897, - "grad_norm": 1.4273037910461426, - "learning_rate": 7.53819095477387e-05, - "loss": 5.1319, - "step": 25003 - }, - { - "epoch": 13.039895697522816, - "grad_norm": 1.4868451356887817, - "learning_rate": 7.538090452261307e-05, - "loss": 5.6115, - "step": 25004 - }, - { - "epoch": 13.040417209908735, - "grad_norm": 1.4755724668502808, - "learning_rate": 7.537989949748744e-05, - "loss": 5.1901, - "step": 25005 - }, - { - "epoch": 13.040938722294655, - "grad_norm": 1.3661320209503174, - "learning_rate": 7.537889447236182e-05, - "loss": 5.532, - "step": 25006 - }, - { - "epoch": 13.041460234680574, - "grad_norm": 1.506785273551941, - "learning_rate": 7.537788944723618e-05, - "loss": 5.6451, - "step": 25007 - }, - { - "epoch": 13.041981747066492, - "grad_norm": 1.433674693107605, - "learning_rate": 7.537688442211056e-05, - "loss": 5.8557, - "step": 25008 - }, - { - "epoch": 13.042503259452412, - "grad_norm": 1.4180102348327637, - "learning_rate": 7.537587939698492e-05, - "loss": 5.3097, - "step": 25009 - }, - { - "epoch": 13.043024771838331, - "grad_norm": 1.4880775213241577, - "learning_rate": 7.53748743718593e-05, - "loss": 5.2601, - "step": 25010 - }, - { - "epoch": 13.04354628422425, - "grad_norm": 1.4273602962493896, - "learning_rate": 7.537386934673368e-05, - "loss": 5.1229, - "step": 25011 - }, - { - "epoch": 13.04406779661017, - "grad_norm": 1.4471709728240967, - "learning_rate": 7.537286432160804e-05, - "loss": 5.2641, - "step": 25012 - }, - { - "epoch": 13.04458930899609, - "grad_norm": 1.4549627304077148, - "learning_rate": 7.537185929648242e-05, - "loss": 5.1747, - "step": 25013 - }, - { - "epoch": 13.045110821382007, - "grad_norm": 1.3808501958847046, - "learning_rate": 7.537085427135678e-05, - "loss": 5.1887, - "step": 25014 - }, - { - "epoch": 13.045632333767927, - "grad_norm": 1.4362021684646606, - "learning_rate": 7.536984924623116e-05, - "loss": 5.7697, - "step": 25015 - }, - { - "epoch": 13.046153846153846, - "grad_norm": 1.4059470891952515, - "learning_rate": 7.536884422110553e-05, - "loss": 5.5131, - "step": 25016 - }, - { - "epoch": 13.046675358539765, - "grad_norm": 1.3300868272781372, - "learning_rate": 7.53678391959799e-05, - "loss": 5.8265, - "step": 25017 - }, - { - "epoch": 13.047196870925685, - "grad_norm": 1.4380558729171753, - "learning_rate": 7.536683417085427e-05, - "loss": 5.283, - "step": 25018 - }, - { - "epoch": 13.047718383311604, - "grad_norm": 1.5371414422988892, - "learning_rate": 7.536582914572865e-05, - "loss": 5.0966, - "step": 25019 - }, - { - "epoch": 13.048239895697522, - "grad_norm": 1.8003718852996826, - "learning_rate": 7.536482412060301e-05, - "loss": 5.1469, - "step": 25020 - }, - { - "epoch": 13.048761408083442, - "grad_norm": 1.404632806777954, - "learning_rate": 7.536381909547739e-05, - "loss": 5.1903, - "step": 25021 - }, - { - "epoch": 13.049282920469361, - "grad_norm": 1.5047526359558105, - "learning_rate": 7.536281407035177e-05, - "loss": 5.1553, - "step": 25022 - }, - { - "epoch": 13.04980443285528, - "grad_norm": 1.447920322418213, - "learning_rate": 7.536180904522614e-05, - "loss": 5.3584, - "step": 25023 - }, - { - "epoch": 13.0503259452412, - "grad_norm": 1.5456644296646118, - "learning_rate": 7.536080402010051e-05, - "loss": 5.202, - "step": 25024 - }, - { - "epoch": 13.05084745762712, - "grad_norm": 1.4793750047683716, - "learning_rate": 7.535979899497487e-05, - "loss": 4.9771, - "step": 25025 - }, - { - "epoch": 13.051368970013037, - "grad_norm": 1.5456069707870483, - "learning_rate": 7.535879396984925e-05, - "loss": 5.7858, - "step": 25026 - }, - { - "epoch": 13.051890482398957, - "grad_norm": 1.538077473640442, - "learning_rate": 7.535778894472361e-05, - "loss": 5.0891, - "step": 25027 - }, - { - "epoch": 13.052411994784876, - "grad_norm": 1.4607704877853394, - "learning_rate": 7.535678391959799e-05, - "loss": 5.4683, - "step": 25028 - }, - { - "epoch": 13.052933507170795, - "grad_norm": 1.4046400785446167, - "learning_rate": 7.535577889447236e-05, - "loss": 5.6173, - "step": 25029 - }, - { - "epoch": 13.053455019556715, - "grad_norm": 1.4793953895568848, - "learning_rate": 7.535477386934673e-05, - "loss": 5.2467, - "step": 25030 - }, - { - "epoch": 13.053976531942634, - "grad_norm": 1.3344489336013794, - "learning_rate": 7.535376884422111e-05, - "loss": 5.549, - "step": 25031 - }, - { - "epoch": 13.054498044328552, - "grad_norm": 1.6344150304794312, - "learning_rate": 7.535276381909549e-05, - "loss": 5.2559, - "step": 25032 - }, - { - "epoch": 13.055019556714472, - "grad_norm": 1.4933382272720337, - "learning_rate": 7.535175879396985e-05, - "loss": 5.2285, - "step": 25033 - }, - { - "epoch": 13.055541069100391, - "grad_norm": 1.4500466585159302, - "learning_rate": 7.535075376884423e-05, - "loss": 5.2132, - "step": 25034 - }, - { - "epoch": 13.05606258148631, - "grad_norm": 1.4440773725509644, - "learning_rate": 7.53497487437186e-05, - "loss": 5.4344, - "step": 25035 - }, - { - "epoch": 13.05658409387223, - "grad_norm": 1.4005767107009888, - "learning_rate": 7.534874371859297e-05, - "loss": 5.6166, - "step": 25036 - }, - { - "epoch": 13.05710560625815, - "grad_norm": 1.4739924669265747, - "learning_rate": 7.534773869346734e-05, - "loss": 5.766, - "step": 25037 - }, - { - "epoch": 13.057627118644067, - "grad_norm": 1.4168720245361328, - "learning_rate": 7.53467336683417e-05, - "loss": 5.4681, - "step": 25038 - }, - { - "epoch": 13.058148631029987, - "grad_norm": 1.4937533140182495, - "learning_rate": 7.534572864321608e-05, - "loss": 5.598, - "step": 25039 - }, - { - "epoch": 13.058670143415906, - "grad_norm": 1.4248523712158203, - "learning_rate": 7.534472361809044e-05, - "loss": 5.347, - "step": 25040 - }, - { - "epoch": 13.059191655801826, - "grad_norm": 1.472029209136963, - "learning_rate": 7.534371859296482e-05, - "loss": 5.6407, - "step": 25041 - }, - { - "epoch": 13.059713168187745, - "grad_norm": 1.4908260107040405, - "learning_rate": 7.53427135678392e-05, - "loss": 5.5702, - "step": 25042 - }, - { - "epoch": 13.060234680573664, - "grad_norm": 1.367125153541565, - "learning_rate": 7.534170854271358e-05, - "loss": 5.2285, - "step": 25043 - }, - { - "epoch": 13.060756192959582, - "grad_norm": 1.3811912536621094, - "learning_rate": 7.534070351758794e-05, - "loss": 5.7186, - "step": 25044 - }, - { - "epoch": 13.061277705345502, - "grad_norm": 1.4316325187683105, - "learning_rate": 7.533969849246232e-05, - "loss": 5.1802, - "step": 25045 - }, - { - "epoch": 13.061799217731421, - "grad_norm": 1.4048737287521362, - "learning_rate": 7.533869346733668e-05, - "loss": 5.4774, - "step": 25046 - }, - { - "epoch": 13.06232073011734, - "grad_norm": 1.6188302040100098, - "learning_rate": 7.533768844221106e-05, - "loss": 4.7949, - "step": 25047 - }, - { - "epoch": 13.06284224250326, - "grad_norm": 1.411867380142212, - "learning_rate": 7.533668341708543e-05, - "loss": 5.5588, - "step": 25048 - }, - { - "epoch": 13.06336375488918, - "grad_norm": 1.5180368423461914, - "learning_rate": 7.53356783919598e-05, - "loss": 5.1152, - "step": 25049 - }, - { - "epoch": 13.063885267275097, - "grad_norm": 1.5499504804611206, - "learning_rate": 7.533467336683417e-05, - "loss": 5.4896, - "step": 25050 - }, - { - "epoch": 13.064406779661017, - "grad_norm": 1.3863475322723389, - "learning_rate": 7.533366834170855e-05, - "loss": 5.6585, - "step": 25051 - }, - { - "epoch": 13.064928292046936, - "grad_norm": 1.4281535148620605, - "learning_rate": 7.533266331658292e-05, - "loss": 5.8017, - "step": 25052 - }, - { - "epoch": 13.065449804432856, - "grad_norm": 1.483319878578186, - "learning_rate": 7.533165829145729e-05, - "loss": 5.3709, - "step": 25053 - }, - { - "epoch": 13.065971316818775, - "grad_norm": 1.4394121170043945, - "learning_rate": 7.533065326633167e-05, - "loss": 5.5648, - "step": 25054 - }, - { - "epoch": 13.066492829204694, - "grad_norm": 1.5054552555084229, - "learning_rate": 7.532964824120603e-05, - "loss": 5.061, - "step": 25055 - }, - { - "epoch": 13.067014341590612, - "grad_norm": 1.3593753576278687, - "learning_rate": 7.532864321608041e-05, - "loss": 5.5065, - "step": 25056 - }, - { - "epoch": 13.067535853976532, - "grad_norm": 1.4533852338790894, - "learning_rate": 7.532763819095477e-05, - "loss": 5.4714, - "step": 25057 - }, - { - "epoch": 13.068057366362451, - "grad_norm": 1.375229835510254, - "learning_rate": 7.532663316582915e-05, - "loss": 5.5585, - "step": 25058 - }, - { - "epoch": 13.06857887874837, - "grad_norm": 1.451245665550232, - "learning_rate": 7.532562814070352e-05, - "loss": 5.2783, - "step": 25059 - }, - { - "epoch": 13.06910039113429, - "grad_norm": 1.4193123579025269, - "learning_rate": 7.532462311557789e-05, - "loss": 5.5125, - "step": 25060 - }, - { - "epoch": 13.06962190352021, - "grad_norm": 1.4900869131088257, - "learning_rate": 7.532361809045226e-05, - "loss": 5.3788, - "step": 25061 - }, - { - "epoch": 13.070143415906127, - "grad_norm": 1.488438367843628, - "learning_rate": 7.532261306532664e-05, - "loss": 5.1694, - "step": 25062 - }, - { - "epoch": 13.070664928292047, - "grad_norm": 1.3810878992080688, - "learning_rate": 7.532160804020101e-05, - "loss": 5.7402, - "step": 25063 - }, - { - "epoch": 13.071186440677966, - "grad_norm": 1.3966197967529297, - "learning_rate": 7.532060301507538e-05, - "loss": 4.9495, - "step": 25064 - }, - { - "epoch": 13.071707953063886, - "grad_norm": 1.4670382738113403, - "learning_rate": 7.531959798994976e-05, - "loss": 5.1611, - "step": 25065 - }, - { - "epoch": 13.072229465449805, - "grad_norm": 1.3351085186004639, - "learning_rate": 7.531859296482412e-05, - "loss": 5.6076, - "step": 25066 - }, - { - "epoch": 13.072750977835724, - "grad_norm": 1.4964770078659058, - "learning_rate": 7.53175879396985e-05, - "loss": 5.1354, - "step": 25067 - }, - { - "epoch": 13.073272490221642, - "grad_norm": 1.5482255220413208, - "learning_rate": 7.531658291457286e-05, - "loss": 5.4422, - "step": 25068 - }, - { - "epoch": 13.073794002607562, - "grad_norm": 1.5550713539123535, - "learning_rate": 7.531557788944724e-05, - "loss": 5.0096, - "step": 25069 - }, - { - "epoch": 13.074315514993481, - "grad_norm": 1.3136382102966309, - "learning_rate": 7.53145728643216e-05, - "loss": 5.4884, - "step": 25070 - }, - { - "epoch": 13.0748370273794, - "grad_norm": 1.5356523990631104, - "learning_rate": 7.531356783919598e-05, - "loss": 5.2229, - "step": 25071 - }, - { - "epoch": 13.07535853976532, - "grad_norm": 1.3667304515838623, - "learning_rate": 7.531256281407036e-05, - "loss": 5.7175, - "step": 25072 - }, - { - "epoch": 13.075880052151238, - "grad_norm": 1.4704686403274536, - "learning_rate": 7.531155778894474e-05, - "loss": 5.6305, - "step": 25073 - }, - { - "epoch": 13.076401564537157, - "grad_norm": 1.3831396102905273, - "learning_rate": 7.53105527638191e-05, - "loss": 5.3539, - "step": 25074 - }, - { - "epoch": 13.076923076923077, - "grad_norm": 1.3950331211090088, - "learning_rate": 7.530954773869348e-05, - "loss": 5.5623, - "step": 25075 - }, - { - "epoch": 13.077444589308996, - "grad_norm": 1.4728302955627441, - "learning_rate": 7.530854271356784e-05, - "loss": 5.7949, - "step": 25076 - }, - { - "epoch": 13.077966101694916, - "grad_norm": 1.4279857873916626, - "learning_rate": 7.530753768844222e-05, - "loss": 5.6959, - "step": 25077 - }, - { - "epoch": 13.078487614080835, - "grad_norm": 1.4262562990188599, - "learning_rate": 7.530653266331659e-05, - "loss": 4.7335, - "step": 25078 - }, - { - "epoch": 13.079009126466753, - "grad_norm": 1.4884908199310303, - "learning_rate": 7.530552763819095e-05, - "loss": 5.054, - "step": 25079 - }, - { - "epoch": 13.079530638852672, - "grad_norm": 1.4779795408248901, - "learning_rate": 7.530452261306533e-05, - "loss": 5.6333, - "step": 25080 - }, - { - "epoch": 13.080052151238592, - "grad_norm": 1.4568383693695068, - "learning_rate": 7.530351758793969e-05, - "loss": 5.7795, - "step": 25081 - }, - { - "epoch": 13.080573663624511, - "grad_norm": 1.469770073890686, - "learning_rate": 7.530251256281407e-05, - "loss": 5.5671, - "step": 25082 - }, - { - "epoch": 13.08109517601043, - "grad_norm": 1.5236530303955078, - "learning_rate": 7.530150753768845e-05, - "loss": 5.6963, - "step": 25083 - }, - { - "epoch": 13.08161668839635, - "grad_norm": 1.540531039237976, - "learning_rate": 7.530050251256283e-05, - "loss": 5.4676, - "step": 25084 - }, - { - "epoch": 13.082138200782268, - "grad_norm": 1.5736092329025269, - "learning_rate": 7.529949748743719e-05, - "loss": 5.3604, - "step": 25085 - }, - { - "epoch": 13.082659713168187, - "grad_norm": 1.4377678632736206, - "learning_rate": 7.529849246231157e-05, - "loss": 4.9539, - "step": 25086 - }, - { - "epoch": 13.083181225554107, - "grad_norm": 1.4185621738433838, - "learning_rate": 7.529748743718593e-05, - "loss": 5.7506, - "step": 25087 - }, - { - "epoch": 13.083702737940026, - "grad_norm": 1.419871211051941, - "learning_rate": 7.529648241206031e-05, - "loss": 5.5598, - "step": 25088 - }, - { - "epoch": 13.084224250325946, - "grad_norm": 1.4907103776931763, - "learning_rate": 7.529547738693467e-05, - "loss": 5.4017, - "step": 25089 - }, - { - "epoch": 13.084745762711865, - "grad_norm": 1.472992181777954, - "learning_rate": 7.529447236180905e-05, - "loss": 5.4528, - "step": 25090 - }, - { - "epoch": 13.085267275097783, - "grad_norm": 1.439988374710083, - "learning_rate": 7.529346733668342e-05, - "loss": 5.3537, - "step": 25091 - }, - { - "epoch": 13.085788787483702, - "grad_norm": 1.504423975944519, - "learning_rate": 7.52924623115578e-05, - "loss": 5.1763, - "step": 25092 - }, - { - "epoch": 13.086310299869622, - "grad_norm": 1.5364910364151, - "learning_rate": 7.529145728643217e-05, - "loss": 5.4415, - "step": 25093 - }, - { - "epoch": 13.086831812255541, - "grad_norm": 1.5027742385864258, - "learning_rate": 7.529045226130654e-05, - "loss": 4.7892, - "step": 25094 - }, - { - "epoch": 13.08735332464146, - "grad_norm": 1.5341349840164185, - "learning_rate": 7.528944723618091e-05, - "loss": 5.343, - "step": 25095 - }, - { - "epoch": 13.08787483702738, - "grad_norm": 1.5523431301116943, - "learning_rate": 7.528844221105528e-05, - "loss": 5.4983, - "step": 25096 - }, - { - "epoch": 13.088396349413298, - "grad_norm": 1.6440255641937256, - "learning_rate": 7.528743718592966e-05, - "loss": 5.2632, - "step": 25097 - }, - { - "epoch": 13.088917861799217, - "grad_norm": 1.6088463068008423, - "learning_rate": 7.528643216080402e-05, - "loss": 5.4873, - "step": 25098 - }, - { - "epoch": 13.089439374185137, - "grad_norm": 1.656606674194336, - "learning_rate": 7.52854271356784e-05, - "loss": 4.9075, - "step": 25099 - }, - { - "epoch": 13.089960886571056, - "grad_norm": 1.4864325523376465, - "learning_rate": 7.528442211055276e-05, - "loss": 5.5789, - "step": 25100 - }, - { - "epoch": 13.090482398956976, - "grad_norm": 1.5176934003829956, - "learning_rate": 7.528341708542714e-05, - "loss": 5.2836, - "step": 25101 - }, - { - "epoch": 13.091003911342895, - "grad_norm": 1.4057013988494873, - "learning_rate": 7.52824120603015e-05, - "loss": 5.5386, - "step": 25102 - }, - { - "epoch": 13.091525423728813, - "grad_norm": 1.4734258651733398, - "learning_rate": 7.528140703517588e-05, - "loss": 5.2982, - "step": 25103 - }, - { - "epoch": 13.092046936114732, - "grad_norm": 1.4289977550506592, - "learning_rate": 7.528040201005026e-05, - "loss": 5.6123, - "step": 25104 - }, - { - "epoch": 13.092568448500652, - "grad_norm": 1.5152521133422852, - "learning_rate": 7.527939698492462e-05, - "loss": 5.5751, - "step": 25105 - }, - { - "epoch": 13.093089960886571, - "grad_norm": 1.7377262115478516, - "learning_rate": 7.5278391959799e-05, - "loss": 5.5361, - "step": 25106 - }, - { - "epoch": 13.09361147327249, - "grad_norm": 1.4620219469070435, - "learning_rate": 7.527738693467337e-05, - "loss": 5.4823, - "step": 25107 - }, - { - "epoch": 13.09413298565841, - "grad_norm": 1.376672387123108, - "learning_rate": 7.527638190954774e-05, - "loss": 5.4084, - "step": 25108 - }, - { - "epoch": 13.094654498044328, - "grad_norm": 1.4942291975021362, - "learning_rate": 7.527537688442211e-05, - "loss": 4.8811, - "step": 25109 - }, - { - "epoch": 13.095176010430247, - "grad_norm": 1.4416841268539429, - "learning_rate": 7.527437185929649e-05, - "loss": 5.336, - "step": 25110 - }, - { - "epoch": 13.095697522816167, - "grad_norm": 1.4809435606002808, - "learning_rate": 7.527336683417085e-05, - "loss": 5.582, - "step": 25111 - }, - { - "epoch": 13.096219035202086, - "grad_norm": 1.5408440828323364, - "learning_rate": 7.527236180904523e-05, - "loss": 5.1958, - "step": 25112 - }, - { - "epoch": 13.096740547588006, - "grad_norm": 1.3429187536239624, - "learning_rate": 7.52713567839196e-05, - "loss": 5.248, - "step": 25113 - }, - { - "epoch": 13.097262059973925, - "grad_norm": 1.489094853401184, - "learning_rate": 7.527035175879398e-05, - "loss": 5.1926, - "step": 25114 - }, - { - "epoch": 13.097783572359843, - "grad_norm": 1.5334630012512207, - "learning_rate": 7.526934673366835e-05, - "loss": 5.5628, - "step": 25115 - }, - { - "epoch": 13.098305084745762, - "grad_norm": 1.4755303859710693, - "learning_rate": 7.526834170854273e-05, - "loss": 5.4832, - "step": 25116 - }, - { - "epoch": 13.098826597131682, - "grad_norm": 1.5134422779083252, - "learning_rate": 7.526733668341709e-05, - "loss": 5.2358, - "step": 25117 - }, - { - "epoch": 13.099348109517601, - "grad_norm": 1.5732399225234985, - "learning_rate": 7.526633165829145e-05, - "loss": 4.5971, - "step": 25118 - }, - { - "epoch": 13.09986962190352, - "grad_norm": 1.4462542533874512, - "learning_rate": 7.526532663316583e-05, - "loss": 5.5894, - "step": 25119 - }, - { - "epoch": 13.10039113428944, - "grad_norm": 1.480206847190857, - "learning_rate": 7.52643216080402e-05, - "loss": 5.3836, - "step": 25120 - }, - { - "epoch": 13.100912646675358, - "grad_norm": 1.431334376335144, - "learning_rate": 7.526331658291457e-05, - "loss": 5.4143, - "step": 25121 - }, - { - "epoch": 13.101434159061277, - "grad_norm": 1.4676493406295776, - "learning_rate": 7.526231155778894e-05, - "loss": 5.2956, - "step": 25122 - }, - { - "epoch": 13.101955671447197, - "grad_norm": 1.5867829322814941, - "learning_rate": 7.526130653266332e-05, - "loss": 5.5868, - "step": 25123 - }, - { - "epoch": 13.102477183833116, - "grad_norm": 1.489991545677185, - "learning_rate": 7.52603015075377e-05, - "loss": 5.2759, - "step": 25124 - }, - { - "epoch": 13.102998696219036, - "grad_norm": 1.480041742324829, - "learning_rate": 7.525929648241207e-05, - "loss": 5.1028, - "step": 25125 - }, - { - "epoch": 13.103520208604955, - "grad_norm": 1.3973528146743774, - "learning_rate": 7.525829145728644e-05, - "loss": 5.4011, - "step": 25126 - }, - { - "epoch": 13.104041720990873, - "grad_norm": 1.4877779483795166, - "learning_rate": 7.525728643216081e-05, - "loss": 4.795, - "step": 25127 - }, - { - "epoch": 13.104563233376792, - "grad_norm": 1.6170357465744019, - "learning_rate": 7.525628140703518e-05, - "loss": 4.7331, - "step": 25128 - }, - { - "epoch": 13.105084745762712, - "grad_norm": 1.3885246515274048, - "learning_rate": 7.525527638190956e-05, - "loss": 5.6365, - "step": 25129 - }, - { - "epoch": 13.105606258148631, - "grad_norm": 1.3848153352737427, - "learning_rate": 7.525427135678392e-05, - "loss": 5.5304, - "step": 25130 - }, - { - "epoch": 13.10612777053455, - "grad_norm": 1.4276084899902344, - "learning_rate": 7.525326633165829e-05, - "loss": 5.598, - "step": 25131 - }, - { - "epoch": 13.10664928292047, - "grad_norm": 1.6110529899597168, - "learning_rate": 7.525226130653266e-05, - "loss": 5.1046, - "step": 25132 - }, - { - "epoch": 13.107170795306388, - "grad_norm": 1.3766005039215088, - "learning_rate": 7.525125628140704e-05, - "loss": 5.5362, - "step": 25133 - }, - { - "epoch": 13.107692307692307, - "grad_norm": 1.3922392129898071, - "learning_rate": 7.525025125628142e-05, - "loss": 5.7649, - "step": 25134 - }, - { - "epoch": 13.108213820078227, - "grad_norm": 1.407058835029602, - "learning_rate": 7.524924623115578e-05, - "loss": 5.4345, - "step": 25135 - }, - { - "epoch": 13.108735332464146, - "grad_norm": 1.4444992542266846, - "learning_rate": 7.524824120603016e-05, - "loss": 4.9895, - "step": 25136 - }, - { - "epoch": 13.109256844850066, - "grad_norm": 1.5205063819885254, - "learning_rate": 7.524723618090452e-05, - "loss": 5.4086, - "step": 25137 - }, - { - "epoch": 13.109778357235985, - "grad_norm": 1.4468477964401245, - "learning_rate": 7.52462311557789e-05, - "loss": 5.8485, - "step": 25138 - }, - { - "epoch": 13.110299869621903, - "grad_norm": 1.4355002641677856, - "learning_rate": 7.524522613065327e-05, - "loss": 5.4352, - "step": 25139 - }, - { - "epoch": 13.110821382007822, - "grad_norm": 1.4055415391921997, - "learning_rate": 7.524422110552764e-05, - "loss": 5.8012, - "step": 25140 - }, - { - "epoch": 13.111342894393742, - "grad_norm": 1.4706157445907593, - "learning_rate": 7.524321608040201e-05, - "loss": 5.4683, - "step": 25141 - }, - { - "epoch": 13.111864406779661, - "grad_norm": 1.3362462520599365, - "learning_rate": 7.524221105527639e-05, - "loss": 5.9222, - "step": 25142 - }, - { - "epoch": 13.11238591916558, - "grad_norm": 1.371922492980957, - "learning_rate": 7.524120603015075e-05, - "loss": 5.6387, - "step": 25143 - }, - { - "epoch": 13.1129074315515, - "grad_norm": 1.631081461906433, - "learning_rate": 7.524020100502513e-05, - "loss": 5.0964, - "step": 25144 - }, - { - "epoch": 13.113428943937418, - "grad_norm": 1.402008295059204, - "learning_rate": 7.523919597989951e-05, - "loss": 5.1552, - "step": 25145 - }, - { - "epoch": 13.113950456323337, - "grad_norm": 1.4351791143417358, - "learning_rate": 7.523819095477387e-05, - "loss": 5.4799, - "step": 25146 - }, - { - "epoch": 13.114471968709257, - "grad_norm": 1.4819391965866089, - "learning_rate": 7.523718592964825e-05, - "loss": 5.4855, - "step": 25147 - }, - { - "epoch": 13.114993481095176, - "grad_norm": 1.4288625717163086, - "learning_rate": 7.523618090452261e-05, - "loss": 5.1918, - "step": 25148 - }, - { - "epoch": 13.115514993481096, - "grad_norm": 1.4658851623535156, - "learning_rate": 7.523517587939699e-05, - "loss": 5.3301, - "step": 25149 - }, - { - "epoch": 13.116036505867015, - "grad_norm": 1.4835419654846191, - "learning_rate": 7.523417085427136e-05, - "loss": 5.3442, - "step": 25150 - }, - { - "epoch": 13.116558018252933, - "grad_norm": 1.4519551992416382, - "learning_rate": 7.523316582914573e-05, - "loss": 5.3356, - "step": 25151 - }, - { - "epoch": 13.117079530638852, - "grad_norm": 1.4122859239578247, - "learning_rate": 7.52321608040201e-05, - "loss": 5.2117, - "step": 25152 - }, - { - "epoch": 13.117601043024772, - "grad_norm": 1.409703016281128, - "learning_rate": 7.523115577889448e-05, - "loss": 5.2204, - "step": 25153 - }, - { - "epoch": 13.118122555410691, - "grad_norm": 1.4363669157028198, - "learning_rate": 7.523015075376885e-05, - "loss": 5.6138, - "step": 25154 - }, - { - "epoch": 13.11864406779661, - "grad_norm": 1.5355581045150757, - "learning_rate": 7.522914572864323e-05, - "loss": 5.0141, - "step": 25155 - }, - { - "epoch": 13.11916558018253, - "grad_norm": 1.4587815999984741, - "learning_rate": 7.52281407035176e-05, - "loss": 5.4516, - "step": 25156 - }, - { - "epoch": 13.119687092568448, - "grad_norm": 1.5065200328826904, - "learning_rate": 7.522713567839196e-05, - "loss": 5.1917, - "step": 25157 - }, - { - "epoch": 13.120208604954367, - "grad_norm": 1.281298041343689, - "learning_rate": 7.522613065326634e-05, - "loss": 5.2311, - "step": 25158 - }, - { - "epoch": 13.120730117340287, - "grad_norm": 1.365914225578308, - "learning_rate": 7.52251256281407e-05, - "loss": 5.395, - "step": 25159 - }, - { - "epoch": 13.121251629726206, - "grad_norm": 1.7724839448928833, - "learning_rate": 7.522412060301508e-05, - "loss": 4.9295, - "step": 25160 - }, - { - "epoch": 13.121773142112126, - "grad_norm": 1.4676743745803833, - "learning_rate": 7.522311557788944e-05, - "loss": 5.0951, - "step": 25161 - }, - { - "epoch": 13.122294654498045, - "grad_norm": 1.4871352910995483, - "learning_rate": 7.522211055276382e-05, - "loss": 5.1357, - "step": 25162 - }, - { - "epoch": 13.122816166883963, - "grad_norm": 1.485009789466858, - "learning_rate": 7.522110552763819e-05, - "loss": 5.4757, - "step": 25163 - }, - { - "epoch": 13.123337679269882, - "grad_norm": 1.4658973217010498, - "learning_rate": 7.522010050251256e-05, - "loss": 5.2949, - "step": 25164 - }, - { - "epoch": 13.123859191655802, - "grad_norm": 1.4807689189910889, - "learning_rate": 7.521909547738694e-05, - "loss": 5.5555, - "step": 25165 - }, - { - "epoch": 13.124380704041721, - "grad_norm": 1.4030925035476685, - "learning_rate": 7.521809045226132e-05, - "loss": 5.5406, - "step": 25166 - }, - { - "epoch": 13.12490221642764, - "grad_norm": 1.4906975030899048, - "learning_rate": 7.521708542713568e-05, - "loss": 5.1398, - "step": 25167 - }, - { - "epoch": 13.125423728813558, - "grad_norm": 1.5304378271102905, - "learning_rate": 7.521608040201006e-05, - "loss": 5.3768, - "step": 25168 - }, - { - "epoch": 13.125945241199478, - "grad_norm": 1.4647471904754639, - "learning_rate": 7.521507537688443e-05, - "loss": 5.4267, - "step": 25169 - }, - { - "epoch": 13.126466753585397, - "grad_norm": 1.4680771827697754, - "learning_rate": 7.52140703517588e-05, - "loss": 5.2132, - "step": 25170 - }, - { - "epoch": 13.126988265971317, - "grad_norm": 1.3946707248687744, - "learning_rate": 7.521306532663317e-05, - "loss": 5.6097, - "step": 25171 - }, - { - "epoch": 13.127509778357236, - "grad_norm": 1.4917488098144531, - "learning_rate": 7.521206030150753e-05, - "loss": 5.4537, - "step": 25172 - }, - { - "epoch": 13.128031290743156, - "grad_norm": 1.3621923923492432, - "learning_rate": 7.521105527638191e-05, - "loss": 5.6704, - "step": 25173 - }, - { - "epoch": 13.128552803129073, - "grad_norm": 1.4501692056655884, - "learning_rate": 7.521005025125629e-05, - "loss": 5.5443, - "step": 25174 - }, - { - "epoch": 13.129074315514993, - "grad_norm": 1.5247628688812256, - "learning_rate": 7.520904522613067e-05, - "loss": 5.2935, - "step": 25175 - }, - { - "epoch": 13.129595827900912, - "grad_norm": 1.421472191810608, - "learning_rate": 7.520804020100503e-05, - "loss": 5.3774, - "step": 25176 - }, - { - "epoch": 13.130117340286832, - "grad_norm": 1.4811326265335083, - "learning_rate": 7.520703517587941e-05, - "loss": 4.9176, - "step": 25177 - }, - { - "epoch": 13.130638852672751, - "grad_norm": 1.3955714702606201, - "learning_rate": 7.520603015075377e-05, - "loss": 5.4488, - "step": 25178 - }, - { - "epoch": 13.13116036505867, - "grad_norm": 1.464870572090149, - "learning_rate": 7.520502512562815e-05, - "loss": 5.255, - "step": 25179 - }, - { - "epoch": 13.131681877444588, - "grad_norm": 1.5355099439620972, - "learning_rate": 7.520402010050251e-05, - "loss": 5.7683, - "step": 25180 - }, - { - "epoch": 13.132203389830508, - "grad_norm": 1.6420077085494995, - "learning_rate": 7.520301507537689e-05, - "loss": 4.7608, - "step": 25181 - }, - { - "epoch": 13.132724902216427, - "grad_norm": 1.368011713027954, - "learning_rate": 7.520201005025126e-05, - "loss": 5.4534, - "step": 25182 - }, - { - "epoch": 13.133246414602347, - "grad_norm": 1.3707728385925293, - "learning_rate": 7.520100502512563e-05, - "loss": 5.4152, - "step": 25183 - }, - { - "epoch": 13.133767926988266, - "grad_norm": 1.4228885173797607, - "learning_rate": 7.52e-05, - "loss": 5.854, - "step": 25184 - }, - { - "epoch": 13.134289439374186, - "grad_norm": 1.5545129776000977, - "learning_rate": 7.519899497487438e-05, - "loss": 5.5057, - "step": 25185 - }, - { - "epoch": 13.134810951760103, - "grad_norm": 1.3742518424987793, - "learning_rate": 7.519798994974875e-05, - "loss": 5.0162, - "step": 25186 - }, - { - "epoch": 13.135332464146023, - "grad_norm": 1.6018249988555908, - "learning_rate": 7.519698492462312e-05, - "loss": 4.8228, - "step": 25187 - }, - { - "epoch": 13.135853976531942, - "grad_norm": 1.474749207496643, - "learning_rate": 7.51959798994975e-05, - "loss": 5.449, - "step": 25188 - }, - { - "epoch": 13.136375488917862, - "grad_norm": 1.3731311559677124, - "learning_rate": 7.519497487437186e-05, - "loss": 5.4235, - "step": 25189 - }, - { - "epoch": 13.136897001303781, - "grad_norm": 1.4387822151184082, - "learning_rate": 7.519396984924624e-05, - "loss": 5.7949, - "step": 25190 - }, - { - "epoch": 13.1374185136897, - "grad_norm": 1.5263595581054688, - "learning_rate": 7.51929648241206e-05, - "loss": 5.4052, - "step": 25191 - }, - { - "epoch": 13.137940026075619, - "grad_norm": 1.5006766319274902, - "learning_rate": 7.519195979899498e-05, - "loss": 5.2654, - "step": 25192 - }, - { - "epoch": 13.138461538461538, - "grad_norm": 1.4632511138916016, - "learning_rate": 7.519095477386934e-05, - "loss": 5.4663, - "step": 25193 - }, - { - "epoch": 13.138983050847457, - "grad_norm": 1.5952000617980957, - "learning_rate": 7.518994974874372e-05, - "loss": 5.6914, - "step": 25194 - }, - { - "epoch": 13.139504563233377, - "grad_norm": 1.3941690921783447, - "learning_rate": 7.518894472361809e-05, - "loss": 5.4478, - "step": 25195 - }, - { - "epoch": 13.140026075619296, - "grad_norm": 1.495408296585083, - "learning_rate": 7.518793969849246e-05, - "loss": 5.4356, - "step": 25196 - }, - { - "epoch": 13.140547588005216, - "grad_norm": 1.5246511697769165, - "learning_rate": 7.518693467336684e-05, - "loss": 5.5778, - "step": 25197 - }, - { - "epoch": 13.141069100391134, - "grad_norm": 1.4134644269943237, - "learning_rate": 7.51859296482412e-05, - "loss": 5.5395, - "step": 25198 - }, - { - "epoch": 13.141590612777053, - "grad_norm": 1.4303315877914429, - "learning_rate": 7.518492462311558e-05, - "loss": 5.0463, - "step": 25199 - }, - { - "epoch": 13.142112125162972, - "grad_norm": 1.4206912517547607, - "learning_rate": 7.518391959798995e-05, - "loss": 5.5757, - "step": 25200 - }, - { - "epoch": 13.142633637548892, - "grad_norm": 1.3214962482452393, - "learning_rate": 7.518291457286433e-05, - "loss": 5.9257, - "step": 25201 - }, - { - "epoch": 13.143155149934811, - "grad_norm": 1.439445972442627, - "learning_rate": 7.518190954773869e-05, - "loss": 4.849, - "step": 25202 - }, - { - "epoch": 13.14367666232073, - "grad_norm": 1.5055605173110962, - "learning_rate": 7.518090452261307e-05, - "loss": 5.3929, - "step": 25203 - }, - { - "epoch": 13.144198174706649, - "grad_norm": 1.4331488609313965, - "learning_rate": 7.517989949748743e-05, - "loss": 5.3445, - "step": 25204 - }, - { - "epoch": 13.144719687092568, - "grad_norm": 1.379417896270752, - "learning_rate": 7.517889447236181e-05, - "loss": 5.2591, - "step": 25205 - }, - { - "epoch": 13.145241199478487, - "grad_norm": 1.4135205745697021, - "learning_rate": 7.517788944723619e-05, - "loss": 5.4163, - "step": 25206 - }, - { - "epoch": 13.145762711864407, - "grad_norm": 1.3682451248168945, - "learning_rate": 7.517688442211057e-05, - "loss": 5.5839, - "step": 25207 - }, - { - "epoch": 13.146284224250326, - "grad_norm": 1.4360519647598267, - "learning_rate": 7.517587939698493e-05, - "loss": 5.5795, - "step": 25208 - }, - { - "epoch": 13.146805736636246, - "grad_norm": 1.602061152458191, - "learning_rate": 7.517487437185931e-05, - "loss": 4.9595, - "step": 25209 - }, - { - "epoch": 13.147327249022164, - "grad_norm": 1.447446346282959, - "learning_rate": 7.517386934673367e-05, - "loss": 5.5864, - "step": 25210 - }, - { - "epoch": 13.147848761408083, - "grad_norm": 1.464275598526001, - "learning_rate": 7.517286432160804e-05, - "loss": 5.2592, - "step": 25211 - }, - { - "epoch": 13.148370273794002, - "grad_norm": 1.403616189956665, - "learning_rate": 7.517185929648241e-05, - "loss": 5.6849, - "step": 25212 - }, - { - "epoch": 13.148891786179922, - "grad_norm": 1.4978772401809692, - "learning_rate": 7.517085427135678e-05, - "loss": 5.0599, - "step": 25213 - }, - { - "epoch": 13.149413298565841, - "grad_norm": 1.446114420890808, - "learning_rate": 7.516984924623116e-05, - "loss": 5.1957, - "step": 25214 - }, - { - "epoch": 13.14993481095176, - "grad_norm": 1.3357371091842651, - "learning_rate": 7.516884422110552e-05, - "loss": 5.2748, - "step": 25215 - }, - { - "epoch": 13.150456323337679, - "grad_norm": 1.348413348197937, - "learning_rate": 7.51678391959799e-05, - "loss": 5.4667, - "step": 25216 - }, - { - "epoch": 13.150977835723598, - "grad_norm": 1.4176950454711914, - "learning_rate": 7.516683417085428e-05, - "loss": 5.6824, - "step": 25217 - }, - { - "epoch": 13.151499348109517, - "grad_norm": 1.5069847106933594, - "learning_rate": 7.516582914572865e-05, - "loss": 5.2248, - "step": 25218 - }, - { - "epoch": 13.152020860495437, - "grad_norm": 1.639671802520752, - "learning_rate": 7.516482412060302e-05, - "loss": 5.3382, - "step": 25219 - }, - { - "epoch": 13.152542372881356, - "grad_norm": 1.4268959760665894, - "learning_rate": 7.51638190954774e-05, - "loss": 4.6472, - "step": 25220 - }, - { - "epoch": 13.153063885267276, - "grad_norm": 1.5120863914489746, - "learning_rate": 7.516281407035176e-05, - "loss": 5.1403, - "step": 25221 - }, - { - "epoch": 13.153585397653194, - "grad_norm": 1.3998470306396484, - "learning_rate": 7.516180904522614e-05, - "loss": 5.1004, - "step": 25222 - }, - { - "epoch": 13.154106910039113, - "grad_norm": 1.5750643014907837, - "learning_rate": 7.51608040201005e-05, - "loss": 5.4682, - "step": 25223 - }, - { - "epoch": 13.154628422425032, - "grad_norm": 1.4712989330291748, - "learning_rate": 7.515979899497487e-05, - "loss": 5.1003, - "step": 25224 - }, - { - "epoch": 13.155149934810952, - "grad_norm": 1.5370848178863525, - "learning_rate": 7.515879396984925e-05, - "loss": 5.2259, - "step": 25225 - }, - { - "epoch": 13.155671447196871, - "grad_norm": 1.4099410772323608, - "learning_rate": 7.515778894472362e-05, - "loss": 5.6413, - "step": 25226 - }, - { - "epoch": 13.156192959582791, - "grad_norm": 1.4605542421340942, - "learning_rate": 7.5156783919598e-05, - "loss": 5.514, - "step": 25227 - }, - { - "epoch": 13.156714471968709, - "grad_norm": 1.4837132692337036, - "learning_rate": 7.515577889447237e-05, - "loss": 5.1766, - "step": 25228 - }, - { - "epoch": 13.157235984354628, - "grad_norm": 1.7435322999954224, - "learning_rate": 7.515477386934674e-05, - "loss": 5.1242, - "step": 25229 - }, - { - "epoch": 13.157757496740548, - "grad_norm": 1.446797490119934, - "learning_rate": 7.515376884422111e-05, - "loss": 5.6827, - "step": 25230 - }, - { - "epoch": 13.158279009126467, - "grad_norm": 1.5141786336898804, - "learning_rate": 7.515276381909549e-05, - "loss": 5.4957, - "step": 25231 - }, - { - "epoch": 13.158800521512386, - "grad_norm": 1.5257445573806763, - "learning_rate": 7.515175879396985e-05, - "loss": 5.4489, - "step": 25232 - }, - { - "epoch": 13.159322033898306, - "grad_norm": 1.4821162223815918, - "learning_rate": 7.515075376884423e-05, - "loss": 4.9729, - "step": 25233 - }, - { - "epoch": 13.159843546284224, - "grad_norm": 1.4828994274139404, - "learning_rate": 7.514974874371859e-05, - "loss": 5.5719, - "step": 25234 - }, - { - "epoch": 13.160365058670143, - "grad_norm": 1.4445174932479858, - "learning_rate": 7.514874371859297e-05, - "loss": 5.5621, - "step": 25235 - }, - { - "epoch": 13.160886571056063, - "grad_norm": 1.6115525960922241, - "learning_rate": 7.514773869346733e-05, - "loss": 4.5553, - "step": 25236 - }, - { - "epoch": 13.161408083441982, - "grad_norm": 1.5476309061050415, - "learning_rate": 7.514673366834171e-05, - "loss": 5.5753, - "step": 25237 - }, - { - "epoch": 13.161929595827901, - "grad_norm": 1.367323637008667, - "learning_rate": 7.514572864321609e-05, - "loss": 5.1424, - "step": 25238 - }, - { - "epoch": 13.162451108213821, - "grad_norm": 1.524544358253479, - "learning_rate": 7.514472361809045e-05, - "loss": 5.5115, - "step": 25239 - }, - { - "epoch": 13.162972620599739, - "grad_norm": 1.5763441324234009, - "learning_rate": 7.514371859296483e-05, - "loss": 5.135, - "step": 25240 - }, - { - "epoch": 13.163494132985658, - "grad_norm": 1.365749716758728, - "learning_rate": 7.51427135678392e-05, - "loss": 5.4766, - "step": 25241 - }, - { - "epoch": 13.164015645371578, - "grad_norm": 1.470178484916687, - "learning_rate": 7.514170854271357e-05, - "loss": 5.5999, - "step": 25242 - }, - { - "epoch": 13.164537157757497, - "grad_norm": 1.6216228008270264, - "learning_rate": 7.514070351758794e-05, - "loss": 5.0079, - "step": 25243 - }, - { - "epoch": 13.165058670143416, - "grad_norm": 1.4924181699752808, - "learning_rate": 7.513969849246232e-05, - "loss": 4.8182, - "step": 25244 - }, - { - "epoch": 13.165580182529336, - "grad_norm": 1.4343562126159668, - "learning_rate": 7.513869346733668e-05, - "loss": 5.2449, - "step": 25245 - }, - { - "epoch": 13.166101694915254, - "grad_norm": 1.4974836111068726, - "learning_rate": 7.513768844221106e-05, - "loss": 5.8123, - "step": 25246 - }, - { - "epoch": 13.166623207301173, - "grad_norm": 1.3701577186584473, - "learning_rate": 7.513668341708544e-05, - "loss": 5.7129, - "step": 25247 - }, - { - "epoch": 13.167144719687093, - "grad_norm": 1.423564076423645, - "learning_rate": 7.513567839195981e-05, - "loss": 5.6504, - "step": 25248 - }, - { - "epoch": 13.167666232073012, - "grad_norm": 1.4094010591506958, - "learning_rate": 7.513467336683418e-05, - "loss": 5.6017, - "step": 25249 - }, - { - "epoch": 13.168187744458931, - "grad_norm": 1.47555673122406, - "learning_rate": 7.513366834170856e-05, - "loss": 5.4155, - "step": 25250 - }, - { - "epoch": 13.16870925684485, - "grad_norm": 1.4619511365890503, - "learning_rate": 7.513266331658292e-05, - "loss": 5.4583, - "step": 25251 - }, - { - "epoch": 13.169230769230769, - "grad_norm": 1.468177080154419, - "learning_rate": 7.513165829145728e-05, - "loss": 5.5679, - "step": 25252 - }, - { - "epoch": 13.169752281616688, - "grad_norm": 1.4287903308868408, - "learning_rate": 7.513065326633166e-05, - "loss": 5.6069, - "step": 25253 - }, - { - "epoch": 13.170273794002608, - "grad_norm": 1.3606998920440674, - "learning_rate": 7.512964824120603e-05, - "loss": 5.5437, - "step": 25254 - }, - { - "epoch": 13.170795306388527, - "grad_norm": 1.4613357782363892, - "learning_rate": 7.51286432160804e-05, - "loss": 5.1932, - "step": 25255 - }, - { - "epoch": 13.171316818774446, - "grad_norm": 1.461578369140625, - "learning_rate": 7.512763819095477e-05, - "loss": 5.3027, - "step": 25256 - }, - { - "epoch": 13.171838331160366, - "grad_norm": 1.492972731590271, - "learning_rate": 7.512663316582915e-05, - "loss": 4.9425, - "step": 25257 - }, - { - "epoch": 13.172359843546284, - "grad_norm": 1.5285316705703735, - "learning_rate": 7.512562814070352e-05, - "loss": 5.5542, - "step": 25258 - }, - { - "epoch": 13.172881355932203, - "grad_norm": 1.4932959079742432, - "learning_rate": 7.51246231155779e-05, - "loss": 4.8506, - "step": 25259 - }, - { - "epoch": 13.173402868318123, - "grad_norm": 1.439805269241333, - "learning_rate": 7.512361809045227e-05, - "loss": 5.4235, - "step": 25260 - }, - { - "epoch": 13.173924380704042, - "grad_norm": 1.4999237060546875, - "learning_rate": 7.512261306532664e-05, - "loss": 5.143, - "step": 25261 - }, - { - "epoch": 13.174445893089962, - "grad_norm": 1.5481159687042236, - "learning_rate": 7.512160804020101e-05, - "loss": 5.6293, - "step": 25262 - }, - { - "epoch": 13.17496740547588, - "grad_norm": 1.388663649559021, - "learning_rate": 7.512060301507539e-05, - "loss": 4.707, - "step": 25263 - }, - { - "epoch": 13.175488917861799, - "grad_norm": 1.475832462310791, - "learning_rate": 7.511959798994975e-05, - "loss": 5.4087, - "step": 25264 - }, - { - "epoch": 13.176010430247718, - "grad_norm": 1.4332634210586548, - "learning_rate": 7.511859296482411e-05, - "loss": 5.2103, - "step": 25265 - }, - { - "epoch": 13.176531942633638, - "grad_norm": 1.54741632938385, - "learning_rate": 7.511758793969849e-05, - "loss": 5.6008, - "step": 25266 - }, - { - "epoch": 13.177053455019557, - "grad_norm": 1.554762601852417, - "learning_rate": 7.511658291457287e-05, - "loss": 5.2034, - "step": 25267 - }, - { - "epoch": 13.177574967405477, - "grad_norm": 1.3685431480407715, - "learning_rate": 7.511557788944725e-05, - "loss": 5.6397, - "step": 25268 - }, - { - "epoch": 13.178096479791394, - "grad_norm": 1.3834539651870728, - "learning_rate": 7.511457286432161e-05, - "loss": 5.5578, - "step": 25269 - }, - { - "epoch": 13.178617992177314, - "grad_norm": 1.4845693111419678, - "learning_rate": 7.511356783919599e-05, - "loss": 5.3625, - "step": 25270 - }, - { - "epoch": 13.179139504563233, - "grad_norm": 1.409140706062317, - "learning_rate": 7.511256281407035e-05, - "loss": 5.4328, - "step": 25271 - }, - { - "epoch": 13.179661016949153, - "grad_norm": 1.583441972732544, - "learning_rate": 7.511155778894473e-05, - "loss": 4.9113, - "step": 25272 - }, - { - "epoch": 13.180182529335072, - "grad_norm": 1.3735960721969604, - "learning_rate": 7.51105527638191e-05, - "loss": 5.5509, - "step": 25273 - }, - { - "epoch": 13.180704041720992, - "grad_norm": 1.5326156616210938, - "learning_rate": 7.510954773869347e-05, - "loss": 5.3682, - "step": 25274 - }, - { - "epoch": 13.18122555410691, - "grad_norm": 1.471601128578186, - "learning_rate": 7.510854271356784e-05, - "loss": 5.291, - "step": 25275 - }, - { - "epoch": 13.181747066492829, - "grad_norm": 1.4952102899551392, - "learning_rate": 7.510753768844222e-05, - "loss": 5.2322, - "step": 25276 - }, - { - "epoch": 13.182268578878748, - "grad_norm": 1.4075672626495361, - "learning_rate": 7.510653266331658e-05, - "loss": 5.5866, - "step": 25277 - }, - { - "epoch": 13.182790091264668, - "grad_norm": 1.411667823791504, - "learning_rate": 7.510552763819096e-05, - "loss": 5.1595, - "step": 25278 - }, - { - "epoch": 13.183311603650587, - "grad_norm": 1.453439474105835, - "learning_rate": 7.510452261306534e-05, - "loss": 5.4423, - "step": 25279 - }, - { - "epoch": 13.183833116036507, - "grad_norm": 1.4613054990768433, - "learning_rate": 7.51035175879397e-05, - "loss": 4.9892, - "step": 25280 - }, - { - "epoch": 13.184354628422424, - "grad_norm": 1.3879953622817993, - "learning_rate": 7.510251256281408e-05, - "loss": 5.6154, - "step": 25281 - }, - { - "epoch": 13.184876140808344, - "grad_norm": 1.309019684791565, - "learning_rate": 7.510150753768844e-05, - "loss": 5.4199, - "step": 25282 - }, - { - "epoch": 13.185397653194263, - "grad_norm": 1.4620929956436157, - "learning_rate": 7.510050251256282e-05, - "loss": 4.8847, - "step": 25283 - }, - { - "epoch": 13.185919165580183, - "grad_norm": 1.3729901313781738, - "learning_rate": 7.509949748743718e-05, - "loss": 5.8207, - "step": 25284 - }, - { - "epoch": 13.186440677966102, - "grad_norm": 1.628687858581543, - "learning_rate": 7.509849246231156e-05, - "loss": 4.8705, - "step": 25285 - }, - { - "epoch": 13.186962190352022, - "grad_norm": 1.481257677078247, - "learning_rate": 7.509748743718593e-05, - "loss": 5.4343, - "step": 25286 - }, - { - "epoch": 13.18748370273794, - "grad_norm": 1.4264168739318848, - "learning_rate": 7.50964824120603e-05, - "loss": 5.2649, - "step": 25287 - }, - { - "epoch": 13.188005215123859, - "grad_norm": 1.3662102222442627, - "learning_rate": 7.509547738693468e-05, - "loss": 5.8929, - "step": 25288 - }, - { - "epoch": 13.188526727509778, - "grad_norm": 1.4284721612930298, - "learning_rate": 7.509447236180906e-05, - "loss": 5.8247, - "step": 25289 - }, - { - "epoch": 13.189048239895698, - "grad_norm": 1.5036088228225708, - "learning_rate": 7.509346733668342e-05, - "loss": 4.8536, - "step": 25290 - }, - { - "epoch": 13.189569752281617, - "grad_norm": 1.3249458074569702, - "learning_rate": 7.509246231155779e-05, - "loss": 5.895, - "step": 25291 - }, - { - "epoch": 13.190091264667537, - "grad_norm": 1.520720362663269, - "learning_rate": 7.509145728643217e-05, - "loss": 5.2981, - "step": 25292 - }, - { - "epoch": 13.190612777053454, - "grad_norm": 1.4117666482925415, - "learning_rate": 7.509045226130653e-05, - "loss": 5.212, - "step": 25293 - }, - { - "epoch": 13.191134289439374, - "grad_norm": 1.494834065437317, - "learning_rate": 7.508944723618091e-05, - "loss": 5.2485, - "step": 25294 - }, - { - "epoch": 13.191655801825293, - "grad_norm": 1.5160597562789917, - "learning_rate": 7.508844221105527e-05, - "loss": 5.4866, - "step": 25295 - }, - { - "epoch": 13.192177314211213, - "grad_norm": 1.5150141716003418, - "learning_rate": 7.508743718592965e-05, - "loss": 5.521, - "step": 25296 - }, - { - "epoch": 13.192698826597132, - "grad_norm": 1.477573275566101, - "learning_rate": 7.508643216080402e-05, - "loss": 5.4645, - "step": 25297 - }, - { - "epoch": 13.193220338983052, - "grad_norm": 1.4478883743286133, - "learning_rate": 7.508542713567839e-05, - "loss": 5.6425, - "step": 25298 - }, - { - "epoch": 13.19374185136897, - "grad_norm": 1.4807947874069214, - "learning_rate": 7.508442211055277e-05, - "loss": 5.3986, - "step": 25299 - }, - { - "epoch": 13.194263363754889, - "grad_norm": 1.466870903968811, - "learning_rate": 7.508341708542715e-05, - "loss": 5.6284, - "step": 25300 - }, - { - "epoch": 13.194784876140808, - "grad_norm": 1.518020510673523, - "learning_rate": 7.508241206030151e-05, - "loss": 5.6894, - "step": 25301 - }, - { - "epoch": 13.195306388526728, - "grad_norm": 1.4533865451812744, - "learning_rate": 7.508140703517589e-05, - "loss": 5.3161, - "step": 25302 - }, - { - "epoch": 13.195827900912647, - "grad_norm": 1.4525467157363892, - "learning_rate": 7.508040201005026e-05, - "loss": 5.6545, - "step": 25303 - }, - { - "epoch": 13.196349413298567, - "grad_norm": 1.4165788888931274, - "learning_rate": 7.507939698492462e-05, - "loss": 5.4238, - "step": 25304 - }, - { - "epoch": 13.196870925684484, - "grad_norm": 1.574947476387024, - "learning_rate": 7.5078391959799e-05, - "loss": 4.7021, - "step": 25305 - }, - { - "epoch": 13.197392438070404, - "grad_norm": 1.5506794452667236, - "learning_rate": 7.507738693467336e-05, - "loss": 4.9751, - "step": 25306 - }, - { - "epoch": 13.197913950456323, - "grad_norm": 1.5280377864837646, - "learning_rate": 7.507638190954774e-05, - "loss": 4.7964, - "step": 25307 - }, - { - "epoch": 13.198435462842243, - "grad_norm": 1.5694472789764404, - "learning_rate": 7.507537688442212e-05, - "loss": 5.2039, - "step": 25308 - }, - { - "epoch": 13.198956975228162, - "grad_norm": 1.4281989336013794, - "learning_rate": 7.50743718592965e-05, - "loss": 5.0129, - "step": 25309 - }, - { - "epoch": 13.199478487614082, - "grad_norm": 1.6032114028930664, - "learning_rate": 7.507336683417086e-05, - "loss": 5.0869, - "step": 25310 - }, - { - "epoch": 13.2, - "grad_norm": 1.5611605644226074, - "learning_rate": 7.507236180904524e-05, - "loss": 5.163, - "step": 25311 - }, - { - "epoch": 13.200521512385919, - "grad_norm": 1.5177199840545654, - "learning_rate": 7.50713567839196e-05, - "loss": 5.7383, - "step": 25312 - }, - { - "epoch": 13.201043024771838, - "grad_norm": 1.4981129169464111, - "learning_rate": 7.507035175879398e-05, - "loss": 5.1907, - "step": 25313 - }, - { - "epoch": 13.201564537157758, - "grad_norm": 1.5409106016159058, - "learning_rate": 7.506934673366834e-05, - "loss": 5.2101, - "step": 25314 - }, - { - "epoch": 13.202086049543677, - "grad_norm": 1.5703922510147095, - "learning_rate": 7.506834170854272e-05, - "loss": 5.1259, - "step": 25315 - }, - { - "epoch": 13.202607561929597, - "grad_norm": 1.4342584609985352, - "learning_rate": 7.506733668341709e-05, - "loss": 5.7659, - "step": 25316 - }, - { - "epoch": 13.203129074315514, - "grad_norm": 1.456813931465149, - "learning_rate": 7.506633165829145e-05, - "loss": 5.2367, - "step": 25317 - }, - { - "epoch": 13.203650586701434, - "grad_norm": 1.492476224899292, - "learning_rate": 7.506532663316583e-05, - "loss": 5.5118, - "step": 25318 - }, - { - "epoch": 13.204172099087353, - "grad_norm": 1.454817771911621, - "learning_rate": 7.50643216080402e-05, - "loss": 5.0384, - "step": 25319 - }, - { - "epoch": 13.204693611473273, - "grad_norm": 1.573767066001892, - "learning_rate": 7.506331658291458e-05, - "loss": 5.2877, - "step": 25320 - }, - { - "epoch": 13.205215123859192, - "grad_norm": 1.560809850692749, - "learning_rate": 7.506231155778895e-05, - "loss": 5.2358, - "step": 25321 - }, - { - "epoch": 13.205736636245112, - "grad_norm": 1.4942336082458496, - "learning_rate": 7.506130653266333e-05, - "loss": 5.0911, - "step": 25322 - }, - { - "epoch": 13.20625814863103, - "grad_norm": 1.330315113067627, - "learning_rate": 7.506030150753769e-05, - "loss": 4.8084, - "step": 25323 - }, - { - "epoch": 13.206779661016949, - "grad_norm": 1.4787858724594116, - "learning_rate": 7.505929648241207e-05, - "loss": 5.35, - "step": 25324 - }, - { - "epoch": 13.207301173402868, - "grad_norm": 1.4714666604995728, - "learning_rate": 7.505829145728643e-05, - "loss": 5.3506, - "step": 25325 - }, - { - "epoch": 13.207822685788788, - "grad_norm": 1.3703646659851074, - "learning_rate": 7.505728643216081e-05, - "loss": 5.4104, - "step": 25326 - }, - { - "epoch": 13.208344198174707, - "grad_norm": 1.3625128269195557, - "learning_rate": 7.505628140703517e-05, - "loss": 5.5092, - "step": 25327 - }, - { - "epoch": 13.208865710560627, - "grad_norm": 1.4992117881774902, - "learning_rate": 7.505527638190955e-05, - "loss": 5.6188, - "step": 25328 - }, - { - "epoch": 13.209387222946544, - "grad_norm": 1.4594110250473022, - "learning_rate": 7.505427135678393e-05, - "loss": 5.2019, - "step": 25329 - }, - { - "epoch": 13.209908735332464, - "grad_norm": 1.463136076927185, - "learning_rate": 7.50532663316583e-05, - "loss": 5.4361, - "step": 25330 - }, - { - "epoch": 13.210430247718383, - "grad_norm": 1.4826561212539673, - "learning_rate": 7.505226130653267e-05, - "loss": 5.4191, - "step": 25331 - }, - { - "epoch": 13.210951760104303, - "grad_norm": 1.4466357231140137, - "learning_rate": 7.505125628140704e-05, - "loss": 4.9051, - "step": 25332 - }, - { - "epoch": 13.211473272490222, - "grad_norm": 1.4381699562072754, - "learning_rate": 7.505025125628141e-05, - "loss": 5.2836, - "step": 25333 - }, - { - "epoch": 13.211994784876142, - "grad_norm": 1.4257413148880005, - "learning_rate": 7.504924623115578e-05, - "loss": 5.6269, - "step": 25334 - }, - { - "epoch": 13.21251629726206, - "grad_norm": 1.486267328262329, - "learning_rate": 7.504824120603016e-05, - "loss": 5.4114, - "step": 25335 - }, - { - "epoch": 13.213037809647979, - "grad_norm": 1.4078189134597778, - "learning_rate": 7.504723618090452e-05, - "loss": 5.4286, - "step": 25336 - }, - { - "epoch": 13.213559322033898, - "grad_norm": 1.406980276107788, - "learning_rate": 7.50462311557789e-05, - "loss": 5.5712, - "step": 25337 - }, - { - "epoch": 13.214080834419818, - "grad_norm": 1.371066927909851, - "learning_rate": 7.504522613065326e-05, - "loss": 5.4071, - "step": 25338 - }, - { - "epoch": 13.214602346805737, - "grad_norm": 1.5278620719909668, - "learning_rate": 7.504422110552764e-05, - "loss": 5.4969, - "step": 25339 - }, - { - "epoch": 13.215123859191657, - "grad_norm": 1.5154681205749512, - "learning_rate": 7.504321608040202e-05, - "loss": 5.5747, - "step": 25340 - }, - { - "epoch": 13.215645371577574, - "grad_norm": 1.3747785091400146, - "learning_rate": 7.50422110552764e-05, - "loss": 5.4525, - "step": 25341 - }, - { - "epoch": 13.216166883963494, - "grad_norm": 1.4558794498443604, - "learning_rate": 7.504120603015076e-05, - "loss": 5.3588, - "step": 25342 - }, - { - "epoch": 13.216688396349413, - "grad_norm": 1.4241397380828857, - "learning_rate": 7.504020100502514e-05, - "loss": 5.3524, - "step": 25343 - }, - { - "epoch": 13.217209908735333, - "grad_norm": 1.4441947937011719, - "learning_rate": 7.50391959798995e-05, - "loss": 5.4163, - "step": 25344 - }, - { - "epoch": 13.217731421121252, - "grad_norm": 1.4620333909988403, - "learning_rate": 7.503819095477387e-05, - "loss": 5.358, - "step": 25345 - }, - { - "epoch": 13.21825293350717, - "grad_norm": 1.6816033124923706, - "learning_rate": 7.503718592964824e-05, - "loss": 4.2642, - "step": 25346 - }, - { - "epoch": 13.21877444589309, - "grad_norm": 1.588183045387268, - "learning_rate": 7.503618090452261e-05, - "loss": 5.2059, - "step": 25347 - }, - { - "epoch": 13.219295958279009, - "grad_norm": 1.5693033933639526, - "learning_rate": 7.503517587939699e-05, - "loss": 5.8044, - "step": 25348 - }, - { - "epoch": 13.219817470664928, - "grad_norm": 1.4597482681274414, - "learning_rate": 7.503417085427135e-05, - "loss": 5.4203, - "step": 25349 - }, - { - "epoch": 13.220338983050848, - "grad_norm": 1.4356553554534912, - "learning_rate": 7.503316582914573e-05, - "loss": 5.4432, - "step": 25350 - }, - { - "epoch": 13.220860495436767, - "grad_norm": 1.4457191228866577, - "learning_rate": 7.50321608040201e-05, - "loss": 5.748, - "step": 25351 - }, - { - "epoch": 13.221382007822687, - "grad_norm": 1.3870149850845337, - "learning_rate": 7.503115577889448e-05, - "loss": 5.4931, - "step": 25352 - }, - { - "epoch": 13.221903520208604, - "grad_norm": 1.451483130455017, - "learning_rate": 7.503015075376885e-05, - "loss": 5.407, - "step": 25353 - }, - { - "epoch": 13.222425032594524, - "grad_norm": 1.5064964294433594, - "learning_rate": 7.502914572864323e-05, - "loss": 5.2266, - "step": 25354 - }, - { - "epoch": 13.222946544980443, - "grad_norm": 1.461282730102539, - "learning_rate": 7.502814070351759e-05, - "loss": 5.193, - "step": 25355 - }, - { - "epoch": 13.223468057366363, - "grad_norm": 1.5089267492294312, - "learning_rate": 7.502713567839197e-05, - "loss": 5.3562, - "step": 25356 - }, - { - "epoch": 13.223989569752282, - "grad_norm": 1.3725641965866089, - "learning_rate": 7.502613065326633e-05, - "loss": 5.6455, - "step": 25357 - }, - { - "epoch": 13.2245110821382, - "grad_norm": 1.394415020942688, - "learning_rate": 7.50251256281407e-05, - "loss": 5.5716, - "step": 25358 - }, - { - "epoch": 13.22503259452412, - "grad_norm": 1.3184003829956055, - "learning_rate": 7.502412060301507e-05, - "loss": 5.9532, - "step": 25359 - }, - { - "epoch": 13.225554106910039, - "grad_norm": 1.3919098377227783, - "learning_rate": 7.502311557788945e-05, - "loss": 5.7278, - "step": 25360 - }, - { - "epoch": 13.226075619295958, - "grad_norm": 1.3293943405151367, - "learning_rate": 7.502211055276383e-05, - "loss": 5.586, - "step": 25361 - }, - { - "epoch": 13.226597131681878, - "grad_norm": 1.4981939792633057, - "learning_rate": 7.50211055276382e-05, - "loss": 4.9895, - "step": 25362 - }, - { - "epoch": 13.227118644067797, - "grad_norm": 1.367714762687683, - "learning_rate": 7.502010050251257e-05, - "loss": 5.589, - "step": 25363 - }, - { - "epoch": 13.227640156453715, - "grad_norm": 1.5233795642852783, - "learning_rate": 7.501909547738694e-05, - "loss": 5.1297, - "step": 25364 - }, - { - "epoch": 13.228161668839634, - "grad_norm": 1.3977288007736206, - "learning_rate": 7.501809045226131e-05, - "loss": 5.3153, - "step": 25365 - }, - { - "epoch": 13.228683181225554, - "grad_norm": 1.5007826089859009, - "learning_rate": 7.501708542713568e-05, - "loss": 5.7703, - "step": 25366 - }, - { - "epoch": 13.229204693611473, - "grad_norm": 1.436737060546875, - "learning_rate": 7.501608040201006e-05, - "loss": 4.8979, - "step": 25367 - }, - { - "epoch": 13.229726205997393, - "grad_norm": 1.4515581130981445, - "learning_rate": 7.501507537688442e-05, - "loss": 5.2758, - "step": 25368 - }, - { - "epoch": 13.230247718383312, - "grad_norm": 1.560506820678711, - "learning_rate": 7.50140703517588e-05, - "loss": 5.3452, - "step": 25369 - }, - { - "epoch": 13.23076923076923, - "grad_norm": 1.4635729789733887, - "learning_rate": 7.501306532663316e-05, - "loss": 5.4294, - "step": 25370 - }, - { - "epoch": 13.23129074315515, - "grad_norm": 1.5501474142074585, - "learning_rate": 7.501206030150754e-05, - "loss": 5.6119, - "step": 25371 - }, - { - "epoch": 13.231812255541069, - "grad_norm": 1.4319087266921997, - "learning_rate": 7.501105527638192e-05, - "loss": 5.5077, - "step": 25372 - }, - { - "epoch": 13.232333767926988, - "grad_norm": 1.4985228776931763, - "learning_rate": 7.501005025125628e-05, - "loss": 4.9088, - "step": 25373 - }, - { - "epoch": 13.232855280312908, - "grad_norm": 1.5319523811340332, - "learning_rate": 7.500904522613066e-05, - "loss": 5.5647, - "step": 25374 - }, - { - "epoch": 13.233376792698827, - "grad_norm": 1.5170196294784546, - "learning_rate": 7.500804020100503e-05, - "loss": 4.7421, - "step": 25375 - }, - { - "epoch": 13.233898305084745, - "grad_norm": 1.56182861328125, - "learning_rate": 7.50070351758794e-05, - "loss": 4.8624, - "step": 25376 - }, - { - "epoch": 13.234419817470664, - "grad_norm": 1.3863128423690796, - "learning_rate": 7.500603015075377e-05, - "loss": 5.0806, - "step": 25377 - }, - { - "epoch": 13.234941329856584, - "grad_norm": 1.4402841329574585, - "learning_rate": 7.500502512562814e-05, - "loss": 5.6636, - "step": 25378 - }, - { - "epoch": 13.235462842242503, - "grad_norm": 1.5879734754562378, - "learning_rate": 7.500402010050251e-05, - "loss": 5.3308, - "step": 25379 - }, - { - "epoch": 13.235984354628423, - "grad_norm": 1.4170444011688232, - "learning_rate": 7.500301507537689e-05, - "loss": 5.7819, - "step": 25380 - }, - { - "epoch": 13.236505867014342, - "grad_norm": 1.4053277969360352, - "learning_rate": 7.500201005025126e-05, - "loss": 5.3469, - "step": 25381 - }, - { - "epoch": 13.23702737940026, - "grad_norm": 1.3464709520339966, - "learning_rate": 7.500100502512564e-05, - "loss": 5.641, - "step": 25382 - }, - { - "epoch": 13.23754889178618, - "grad_norm": 1.4172112941741943, - "learning_rate": 7.500000000000001e-05, - "loss": 5.5381, - "step": 25383 - }, - { - "epoch": 13.238070404172099, - "grad_norm": 1.3637399673461914, - "learning_rate": 7.499899497487437e-05, - "loss": 4.9254, - "step": 25384 - }, - { - "epoch": 13.238591916558018, - "grad_norm": 1.4713795185089111, - "learning_rate": 7.499798994974875e-05, - "loss": 5.7206, - "step": 25385 - }, - { - "epoch": 13.239113428943938, - "grad_norm": 1.6072629690170288, - "learning_rate": 7.499698492462311e-05, - "loss": 5.2498, - "step": 25386 - }, - { - "epoch": 13.239634941329857, - "grad_norm": 1.4919612407684326, - "learning_rate": 7.499597989949749e-05, - "loss": 5.301, - "step": 25387 - }, - { - "epoch": 13.240156453715775, - "grad_norm": 1.3701772689819336, - "learning_rate": 7.499497487437186e-05, - "loss": 5.3621, - "step": 25388 - }, - { - "epoch": 13.240677966101694, - "grad_norm": 1.5149915218353271, - "learning_rate": 7.499396984924623e-05, - "loss": 5.329, - "step": 25389 - }, - { - "epoch": 13.241199478487614, - "grad_norm": 1.341346025466919, - "learning_rate": 7.49929648241206e-05, - "loss": 5.7364, - "step": 25390 - }, - { - "epoch": 13.241720990873533, - "grad_norm": 1.3798847198486328, - "learning_rate": 7.499195979899498e-05, - "loss": 5.0959, - "step": 25391 - }, - { - "epoch": 13.242242503259453, - "grad_norm": 1.5736886262893677, - "learning_rate": 7.499095477386935e-05, - "loss": 5.0503, - "step": 25392 - }, - { - "epoch": 13.242764015645372, - "grad_norm": 1.4940001964569092, - "learning_rate": 7.498994974874373e-05, - "loss": 5.6135, - "step": 25393 - }, - { - "epoch": 13.24328552803129, - "grad_norm": 1.4454067945480347, - "learning_rate": 7.49889447236181e-05, - "loss": 5.3579, - "step": 25394 - }, - { - "epoch": 13.24380704041721, - "grad_norm": 1.5157781839370728, - "learning_rate": 7.498793969849247e-05, - "loss": 5.358, - "step": 25395 - }, - { - "epoch": 13.244328552803129, - "grad_norm": 1.4811928272247314, - "learning_rate": 7.498693467336684e-05, - "loss": 5.2597, - "step": 25396 - }, - { - "epoch": 13.244850065189048, - "grad_norm": 1.3369364738464355, - "learning_rate": 7.49859296482412e-05, - "loss": 5.8848, - "step": 25397 - }, - { - "epoch": 13.245371577574968, - "grad_norm": 1.4870104789733887, - "learning_rate": 7.498492462311558e-05, - "loss": 5.1615, - "step": 25398 - }, - { - "epoch": 13.245893089960887, - "grad_norm": 1.4334619045257568, - "learning_rate": 7.498391959798994e-05, - "loss": 5.412, - "step": 25399 - }, - { - "epoch": 13.246414602346805, - "grad_norm": 1.4014244079589844, - "learning_rate": 7.498291457286432e-05, - "loss": 5.4811, - "step": 25400 - }, - { - "epoch": 13.246936114732724, - "grad_norm": 1.560128927230835, - "learning_rate": 7.49819095477387e-05, - "loss": 5.3214, - "step": 25401 - }, - { - "epoch": 13.247457627118644, - "grad_norm": 1.4108891487121582, - "learning_rate": 7.498090452261308e-05, - "loss": 5.3792, - "step": 25402 - }, - { - "epoch": 13.247979139504563, - "grad_norm": 1.4482345581054688, - "learning_rate": 7.497989949748744e-05, - "loss": 5.5627, - "step": 25403 - }, - { - "epoch": 13.248500651890483, - "grad_norm": 1.469772219657898, - "learning_rate": 7.497889447236182e-05, - "loss": 5.1852, - "step": 25404 - }, - { - "epoch": 13.249022164276402, - "grad_norm": 1.4286941289901733, - "learning_rate": 7.497788944723618e-05, - "loss": 5.3633, - "step": 25405 - }, - { - "epoch": 13.24954367666232, - "grad_norm": 1.3124587535858154, - "learning_rate": 7.497688442211056e-05, - "loss": 5.2422, - "step": 25406 - }, - { - "epoch": 13.25006518904824, - "grad_norm": 1.4637466669082642, - "learning_rate": 7.497587939698493e-05, - "loss": 5.7955, - "step": 25407 - }, - { - "epoch": 13.250586701434159, - "grad_norm": 1.2904689311981201, - "learning_rate": 7.49748743718593e-05, - "loss": 5.6626, - "step": 25408 - }, - { - "epoch": 13.251108213820078, - "grad_norm": 1.3540548086166382, - "learning_rate": 7.497386934673367e-05, - "loss": 5.2314, - "step": 25409 - }, - { - "epoch": 13.251629726205998, - "grad_norm": 1.5281566381454468, - "learning_rate": 7.497286432160803e-05, - "loss": 5.127, - "step": 25410 - }, - { - "epoch": 13.252151238591917, - "grad_norm": 1.445682406425476, - "learning_rate": 7.497185929648241e-05, - "loss": 5.5945, - "step": 25411 - }, - { - "epoch": 13.252672750977835, - "grad_norm": 1.490857481956482, - "learning_rate": 7.497085427135679e-05, - "loss": 4.8647, - "step": 25412 - }, - { - "epoch": 13.253194263363755, - "grad_norm": 1.3601360321044922, - "learning_rate": 7.496984924623117e-05, - "loss": 5.7316, - "step": 25413 - }, - { - "epoch": 13.253715775749674, - "grad_norm": 1.5419914722442627, - "learning_rate": 7.496884422110553e-05, - "loss": 5.0059, - "step": 25414 - }, - { - "epoch": 13.254237288135593, - "grad_norm": 1.4919794797897339, - "learning_rate": 7.496783919597991e-05, - "loss": 4.798, - "step": 25415 - }, - { - "epoch": 13.254758800521513, - "grad_norm": 1.4225702285766602, - "learning_rate": 7.496683417085427e-05, - "loss": 5.6059, - "step": 25416 - }, - { - "epoch": 13.255280312907432, - "grad_norm": 1.4600328207015991, - "learning_rate": 7.496582914572865e-05, - "loss": 5.6937, - "step": 25417 - }, - { - "epoch": 13.25580182529335, - "grad_norm": 1.453451156616211, - "learning_rate": 7.496482412060301e-05, - "loss": 5.4528, - "step": 25418 - }, - { - "epoch": 13.25632333767927, - "grad_norm": 1.4664722681045532, - "learning_rate": 7.496381909547739e-05, - "loss": 5.2624, - "step": 25419 - }, - { - "epoch": 13.256844850065189, - "grad_norm": 1.3433711528778076, - "learning_rate": 7.496281407035176e-05, - "loss": 5.3387, - "step": 25420 - }, - { - "epoch": 13.257366362451108, - "grad_norm": 1.5245369672775269, - "learning_rate": 7.496180904522613e-05, - "loss": 5.3212, - "step": 25421 - }, - { - "epoch": 13.257887874837028, - "grad_norm": 1.396591305732727, - "learning_rate": 7.496080402010051e-05, - "loss": 5.4732, - "step": 25422 - }, - { - "epoch": 13.258409387222947, - "grad_norm": 1.4624009132385254, - "learning_rate": 7.495979899497488e-05, - "loss": 5.3436, - "step": 25423 - }, - { - "epoch": 13.258930899608865, - "grad_norm": 1.4881036281585693, - "learning_rate": 7.495879396984925e-05, - "loss": 5.6723, - "step": 25424 - }, - { - "epoch": 13.259452411994785, - "grad_norm": 1.444699764251709, - "learning_rate": 7.495778894472362e-05, - "loss": 5.5548, - "step": 25425 - }, - { - "epoch": 13.259973924380704, - "grad_norm": 1.483657956123352, - "learning_rate": 7.4956783919598e-05, - "loss": 5.7527, - "step": 25426 - }, - { - "epoch": 13.260495436766623, - "grad_norm": 1.5487092733383179, - "learning_rate": 7.495577889447236e-05, - "loss": 4.8058, - "step": 25427 - }, - { - "epoch": 13.261016949152543, - "grad_norm": 1.4776846170425415, - "learning_rate": 7.495477386934674e-05, - "loss": 5.5708, - "step": 25428 - }, - { - "epoch": 13.261538461538462, - "grad_norm": 1.4845448732376099, - "learning_rate": 7.49537688442211e-05, - "loss": 5.0636, - "step": 25429 - }, - { - "epoch": 13.26205997392438, - "grad_norm": 1.4320068359375, - "learning_rate": 7.495276381909548e-05, - "loss": 5.6014, - "step": 25430 - }, - { - "epoch": 13.2625814863103, - "grad_norm": 1.5208157300949097, - "learning_rate": 7.495175879396984e-05, - "loss": 5.3573, - "step": 25431 - }, - { - "epoch": 13.263102998696219, - "grad_norm": 1.4467675685882568, - "learning_rate": 7.495075376884422e-05, - "loss": 5.3684, - "step": 25432 - }, - { - "epoch": 13.263624511082138, - "grad_norm": 1.3831690549850464, - "learning_rate": 7.49497487437186e-05, - "loss": 5.4093, - "step": 25433 - }, - { - "epoch": 13.264146023468058, - "grad_norm": 1.4340338706970215, - "learning_rate": 7.494874371859298e-05, - "loss": 5.711, - "step": 25434 - }, - { - "epoch": 13.264667535853977, - "grad_norm": 1.487748146057129, - "learning_rate": 7.494773869346734e-05, - "loss": 5.3334, - "step": 25435 - }, - { - "epoch": 13.265189048239895, - "grad_norm": 1.6333342790603638, - "learning_rate": 7.494673366834172e-05, - "loss": 5.3091, - "step": 25436 - }, - { - "epoch": 13.265710560625815, - "grad_norm": 1.49601149559021, - "learning_rate": 7.494572864321608e-05, - "loss": 5.3201, - "step": 25437 - }, - { - "epoch": 13.266232073011734, - "grad_norm": 1.358510971069336, - "learning_rate": 7.494472361809045e-05, - "loss": 5.7732, - "step": 25438 - }, - { - "epoch": 13.266753585397653, - "grad_norm": 1.4911569356918335, - "learning_rate": 7.494371859296483e-05, - "loss": 5.4249, - "step": 25439 - }, - { - "epoch": 13.267275097783573, - "grad_norm": 1.6501615047454834, - "learning_rate": 7.494271356783919e-05, - "loss": 5.2455, - "step": 25440 - }, - { - "epoch": 13.26779661016949, - "grad_norm": 1.5945181846618652, - "learning_rate": 7.494170854271357e-05, - "loss": 5.4265, - "step": 25441 - }, - { - "epoch": 13.26831812255541, - "grad_norm": 1.5750492811203003, - "learning_rate": 7.494070351758795e-05, - "loss": 5.568, - "step": 25442 - }, - { - "epoch": 13.26883963494133, - "grad_norm": 1.5138028860092163, - "learning_rate": 7.493969849246232e-05, - "loss": 5.2356, - "step": 25443 - }, - { - "epoch": 13.269361147327249, - "grad_norm": 1.3969258069992065, - "learning_rate": 7.493869346733669e-05, - "loss": 5.4655, - "step": 25444 - }, - { - "epoch": 13.269882659713168, - "grad_norm": 1.5121150016784668, - "learning_rate": 7.493768844221107e-05, - "loss": 5.1723, - "step": 25445 - }, - { - "epoch": 13.270404172099088, - "grad_norm": 1.5728468894958496, - "learning_rate": 7.493668341708543e-05, - "loss": 5.2414, - "step": 25446 - }, - { - "epoch": 13.270925684485007, - "grad_norm": 1.387013554573059, - "learning_rate": 7.493567839195981e-05, - "loss": 5.7552, - "step": 25447 - }, - { - "epoch": 13.271447196870925, - "grad_norm": 1.639012336730957, - "learning_rate": 7.493467336683417e-05, - "loss": 5.1895, - "step": 25448 - }, - { - "epoch": 13.271968709256845, - "grad_norm": 1.4688339233398438, - "learning_rate": 7.493366834170855e-05, - "loss": 5.6309, - "step": 25449 - }, - { - "epoch": 13.272490221642764, - "grad_norm": 1.4740759134292603, - "learning_rate": 7.493266331658291e-05, - "loss": 5.5697, - "step": 25450 - }, - { - "epoch": 13.273011734028684, - "grad_norm": 1.387086272239685, - "learning_rate": 7.493165829145728e-05, - "loss": 5.1108, - "step": 25451 - }, - { - "epoch": 13.273533246414603, - "grad_norm": 1.4997508525848389, - "learning_rate": 7.493065326633166e-05, - "loss": 4.8153, - "step": 25452 - }, - { - "epoch": 13.27405475880052, - "grad_norm": 1.5837382078170776, - "learning_rate": 7.492964824120603e-05, - "loss": 5.4297, - "step": 25453 - }, - { - "epoch": 13.27457627118644, - "grad_norm": 1.3818271160125732, - "learning_rate": 7.492864321608041e-05, - "loss": 5.5643, - "step": 25454 - }, - { - "epoch": 13.27509778357236, - "grad_norm": 1.4828463792800903, - "learning_rate": 7.492763819095478e-05, - "loss": 5.3296, - "step": 25455 - }, - { - "epoch": 13.275619295958279, - "grad_norm": 1.43472421169281, - "learning_rate": 7.492663316582915e-05, - "loss": 5.6744, - "step": 25456 - }, - { - "epoch": 13.276140808344199, - "grad_norm": 1.4476120471954346, - "learning_rate": 7.492562814070352e-05, - "loss": 5.698, - "step": 25457 - }, - { - "epoch": 13.276662320730118, - "grad_norm": 1.4214569330215454, - "learning_rate": 7.49246231155779e-05, - "loss": 5.7965, - "step": 25458 - }, - { - "epoch": 13.277183833116036, - "grad_norm": 1.456536889076233, - "learning_rate": 7.492361809045226e-05, - "loss": 5.5261, - "step": 25459 - }, - { - "epoch": 13.277705345501955, - "grad_norm": 1.5149778127670288, - "learning_rate": 7.492261306532664e-05, - "loss": 4.8079, - "step": 25460 - }, - { - "epoch": 13.278226857887875, - "grad_norm": 1.4437638521194458, - "learning_rate": 7.4921608040201e-05, - "loss": 5.5186, - "step": 25461 - }, - { - "epoch": 13.278748370273794, - "grad_norm": 1.546217441558838, - "learning_rate": 7.492060301507538e-05, - "loss": 5.2631, - "step": 25462 - }, - { - "epoch": 13.279269882659714, - "grad_norm": 1.4815410375595093, - "learning_rate": 7.491959798994976e-05, - "loss": 4.3787, - "step": 25463 - }, - { - "epoch": 13.279791395045633, - "grad_norm": 1.4813884496688843, - "learning_rate": 7.491859296482412e-05, - "loss": 5.475, - "step": 25464 - }, - { - "epoch": 13.28031290743155, - "grad_norm": 1.5217376947402954, - "learning_rate": 7.49175879396985e-05, - "loss": 5.5502, - "step": 25465 - }, - { - "epoch": 13.28083441981747, - "grad_norm": 1.4435014724731445, - "learning_rate": 7.491658291457287e-05, - "loss": 5.6696, - "step": 25466 - }, - { - "epoch": 13.28135593220339, - "grad_norm": 1.399161696434021, - "learning_rate": 7.491557788944724e-05, - "loss": 5.5473, - "step": 25467 - }, - { - "epoch": 13.281877444589309, - "grad_norm": 1.4496208429336548, - "learning_rate": 7.491457286432161e-05, - "loss": 5.3645, - "step": 25468 - }, - { - "epoch": 13.282398956975229, - "grad_norm": 1.4534052610397339, - "learning_rate": 7.491356783919599e-05, - "loss": 5.688, - "step": 25469 - }, - { - "epoch": 13.282920469361148, - "grad_norm": 1.5404130220413208, - "learning_rate": 7.491256281407035e-05, - "loss": 5.6561, - "step": 25470 - }, - { - "epoch": 13.283441981747066, - "grad_norm": 1.6273983716964722, - "learning_rate": 7.491155778894473e-05, - "loss": 4.5418, - "step": 25471 - }, - { - "epoch": 13.283963494132985, - "grad_norm": 1.516241192817688, - "learning_rate": 7.491055276381909e-05, - "loss": 5.2987, - "step": 25472 - }, - { - "epoch": 13.284485006518905, - "grad_norm": 1.514333963394165, - "learning_rate": 7.490954773869347e-05, - "loss": 5.5389, - "step": 25473 - }, - { - "epoch": 13.285006518904824, - "grad_norm": 1.4123773574829102, - "learning_rate": 7.490854271356785e-05, - "loss": 5.5382, - "step": 25474 - }, - { - "epoch": 13.285528031290744, - "grad_norm": 1.5146337747573853, - "learning_rate": 7.490753768844223e-05, - "loss": 5.3066, - "step": 25475 - }, - { - "epoch": 13.286049543676663, - "grad_norm": 1.457753300666809, - "learning_rate": 7.490653266331659e-05, - "loss": 5.4247, - "step": 25476 - }, - { - "epoch": 13.28657105606258, - "grad_norm": 1.562856674194336, - "learning_rate": 7.490552763819095e-05, - "loss": 5.4172, - "step": 25477 - }, - { - "epoch": 13.2870925684485, - "grad_norm": 1.713830828666687, - "learning_rate": 7.490452261306533e-05, - "loss": 5.4161, - "step": 25478 - }, - { - "epoch": 13.28761408083442, - "grad_norm": 1.4195996522903442, - "learning_rate": 7.49035175879397e-05, - "loss": 5.4058, - "step": 25479 - }, - { - "epoch": 13.288135593220339, - "grad_norm": 1.4242713451385498, - "learning_rate": 7.490251256281407e-05, - "loss": 5.6569, - "step": 25480 - }, - { - "epoch": 13.288657105606259, - "grad_norm": 1.403554916381836, - "learning_rate": 7.490150753768844e-05, - "loss": 5.6674, - "step": 25481 - }, - { - "epoch": 13.289178617992178, - "grad_norm": 1.4927210807800293, - "learning_rate": 7.490050251256282e-05, - "loss": 5.5946, - "step": 25482 - }, - { - "epoch": 13.289700130378096, - "grad_norm": 1.5187278985977173, - "learning_rate": 7.48994974874372e-05, - "loss": 5.312, - "step": 25483 - }, - { - "epoch": 13.290221642764015, - "grad_norm": 1.4256985187530518, - "learning_rate": 7.489849246231157e-05, - "loss": 5.2557, - "step": 25484 - }, - { - "epoch": 13.290743155149935, - "grad_norm": 1.4232560396194458, - "learning_rate": 7.489748743718594e-05, - "loss": 5.6367, - "step": 25485 - }, - { - "epoch": 13.291264667535854, - "grad_norm": 1.4602237939834595, - "learning_rate": 7.489648241206031e-05, - "loss": 5.7261, - "step": 25486 - }, - { - "epoch": 13.291786179921774, - "grad_norm": 1.4894514083862305, - "learning_rate": 7.489547738693468e-05, - "loss": 5.429, - "step": 25487 - }, - { - "epoch": 13.292307692307693, - "grad_norm": 1.4317399263381958, - "learning_rate": 7.489447236180906e-05, - "loss": 5.6069, - "step": 25488 - }, - { - "epoch": 13.29282920469361, - "grad_norm": 1.4902106523513794, - "learning_rate": 7.489346733668342e-05, - "loss": 5.5808, - "step": 25489 - }, - { - "epoch": 13.29335071707953, - "grad_norm": 1.5316826105117798, - "learning_rate": 7.489246231155778e-05, - "loss": 5.651, - "step": 25490 - }, - { - "epoch": 13.29387222946545, - "grad_norm": 1.4146475791931152, - "learning_rate": 7.489145728643216e-05, - "loss": 5.7673, - "step": 25491 - }, - { - "epoch": 13.29439374185137, - "grad_norm": 1.4503220319747925, - "learning_rate": 7.489045226130653e-05, - "loss": 5.8194, - "step": 25492 - }, - { - "epoch": 13.294915254237289, - "grad_norm": 1.5079025030136108, - "learning_rate": 7.48894472361809e-05, - "loss": 5.5096, - "step": 25493 - }, - { - "epoch": 13.295436766623208, - "grad_norm": 1.4352809190750122, - "learning_rate": 7.488844221105528e-05, - "loss": 5.5479, - "step": 25494 - }, - { - "epoch": 13.295958279009126, - "grad_norm": 1.3879892826080322, - "learning_rate": 7.488743718592966e-05, - "loss": 5.4174, - "step": 25495 - }, - { - "epoch": 13.296479791395045, - "grad_norm": 1.5129196643829346, - "learning_rate": 7.488643216080402e-05, - "loss": 4.8313, - "step": 25496 - }, - { - "epoch": 13.297001303780965, - "grad_norm": 1.5429164171218872, - "learning_rate": 7.48854271356784e-05, - "loss": 5.0285, - "step": 25497 - }, - { - "epoch": 13.297522816166884, - "grad_norm": 1.3927971124649048, - "learning_rate": 7.488442211055277e-05, - "loss": 5.1617, - "step": 25498 - }, - { - "epoch": 13.298044328552804, - "grad_norm": 1.4243543148040771, - "learning_rate": 7.488341708542714e-05, - "loss": 5.0987, - "step": 25499 - }, - { - "epoch": 13.298565840938723, - "grad_norm": 1.442317247390747, - "learning_rate": 7.488241206030151e-05, - "loss": 5.7821, - "step": 25500 - }, - { - "epoch": 13.29908735332464, - "grad_norm": 1.4412719011306763, - "learning_rate": 7.488140703517589e-05, - "loss": 5.4878, - "step": 25501 - }, - { - "epoch": 13.29960886571056, - "grad_norm": 1.4520676136016846, - "learning_rate": 7.488040201005025e-05, - "loss": 5.3983, - "step": 25502 - }, - { - "epoch": 13.30013037809648, - "grad_norm": 1.5508865118026733, - "learning_rate": 7.487939698492463e-05, - "loss": 4.9832, - "step": 25503 - }, - { - "epoch": 13.3006518904824, - "grad_norm": 1.4898319244384766, - "learning_rate": 7.4878391959799e-05, - "loss": 4.9675, - "step": 25504 - }, - { - "epoch": 13.301173402868319, - "grad_norm": 1.3181625604629517, - "learning_rate": 7.487738693467337e-05, - "loss": 5.8454, - "step": 25505 - }, - { - "epoch": 13.301694915254238, - "grad_norm": 1.4271488189697266, - "learning_rate": 7.487638190954775e-05, - "loss": 5.6292, - "step": 25506 - }, - { - "epoch": 13.302216427640156, - "grad_norm": 1.4042320251464844, - "learning_rate": 7.487537688442211e-05, - "loss": 5.2591, - "step": 25507 - }, - { - "epoch": 13.302737940026075, - "grad_norm": 1.4861184358596802, - "learning_rate": 7.487437185929649e-05, - "loss": 5.3209, - "step": 25508 - }, - { - "epoch": 13.303259452411995, - "grad_norm": 1.5696898698806763, - "learning_rate": 7.487336683417085e-05, - "loss": 5.1774, - "step": 25509 - }, - { - "epoch": 13.303780964797914, - "grad_norm": 1.576454997062683, - "learning_rate": 7.487236180904523e-05, - "loss": 5.82, - "step": 25510 - }, - { - "epoch": 13.304302477183834, - "grad_norm": 1.4434504508972168, - "learning_rate": 7.48713567839196e-05, - "loss": 5.636, - "step": 25511 - }, - { - "epoch": 13.304823989569753, - "grad_norm": 1.4732856750488281, - "learning_rate": 7.487035175879397e-05, - "loss": 5.419, - "step": 25512 - }, - { - "epoch": 13.30534550195567, - "grad_norm": 1.5503524541854858, - "learning_rate": 7.486934673366834e-05, - "loss": 4.9895, - "step": 25513 - }, - { - "epoch": 13.30586701434159, - "grad_norm": 1.5306724309921265, - "learning_rate": 7.486834170854272e-05, - "loss": 5.0775, - "step": 25514 - }, - { - "epoch": 13.30638852672751, - "grad_norm": 1.4281803369522095, - "learning_rate": 7.48673366834171e-05, - "loss": 5.4563, - "step": 25515 - }, - { - "epoch": 13.30691003911343, - "grad_norm": 1.5258958339691162, - "learning_rate": 7.486633165829146e-05, - "loss": 5.4758, - "step": 25516 - }, - { - "epoch": 13.307431551499349, - "grad_norm": 1.337449073791504, - "learning_rate": 7.486532663316584e-05, - "loss": 4.4649, - "step": 25517 - }, - { - "epoch": 13.307953063885268, - "grad_norm": 1.4921514987945557, - "learning_rate": 7.48643216080402e-05, - "loss": 5.4881, - "step": 25518 - }, - { - "epoch": 13.308474576271186, - "grad_norm": 1.5289459228515625, - "learning_rate": 7.486331658291458e-05, - "loss": 5.0555, - "step": 25519 - }, - { - "epoch": 13.308996088657105, - "grad_norm": 1.4499554634094238, - "learning_rate": 7.486231155778894e-05, - "loss": 5.2757, - "step": 25520 - }, - { - "epoch": 13.309517601043025, - "grad_norm": 1.47807776927948, - "learning_rate": 7.486130653266332e-05, - "loss": 5.4779, - "step": 25521 - }, - { - "epoch": 13.310039113428944, - "grad_norm": 1.5748295783996582, - "learning_rate": 7.486030150753768e-05, - "loss": 5.6045, - "step": 25522 - }, - { - "epoch": 13.310560625814864, - "grad_norm": 1.446698546409607, - "learning_rate": 7.485929648241206e-05, - "loss": 5.596, - "step": 25523 - }, - { - "epoch": 13.311082138200783, - "grad_norm": 1.4704374074935913, - "learning_rate": 7.485829145728643e-05, - "loss": 5.7527, - "step": 25524 - }, - { - "epoch": 13.3116036505867, - "grad_norm": 1.5408073663711548, - "learning_rate": 7.48572864321608e-05, - "loss": 5.7377, - "step": 25525 - }, - { - "epoch": 13.31212516297262, - "grad_norm": 1.5659096240997314, - "learning_rate": 7.485628140703518e-05, - "loss": 5.3096, - "step": 25526 - }, - { - "epoch": 13.31264667535854, - "grad_norm": 1.5742545127868652, - "learning_rate": 7.485527638190956e-05, - "loss": 4.4117, - "step": 25527 - }, - { - "epoch": 13.31316818774446, - "grad_norm": 1.4061338901519775, - "learning_rate": 7.485427135678392e-05, - "loss": 5.4871, - "step": 25528 - }, - { - "epoch": 13.313689700130379, - "grad_norm": 1.5263487100601196, - "learning_rate": 7.48532663316583e-05, - "loss": 5.2008, - "step": 25529 - }, - { - "epoch": 13.314211212516298, - "grad_norm": 1.6418848037719727, - "learning_rate": 7.485226130653267e-05, - "loss": 5.4449, - "step": 25530 - }, - { - "epoch": 13.314732724902216, - "grad_norm": 1.5521506071090698, - "learning_rate": 7.485125628140703e-05, - "loss": 5.4927, - "step": 25531 - }, - { - "epoch": 13.315254237288135, - "grad_norm": 1.5622690916061401, - "learning_rate": 7.485025125628141e-05, - "loss": 5.6648, - "step": 25532 - }, - { - "epoch": 13.315775749674055, - "grad_norm": 1.3792476654052734, - "learning_rate": 7.484924623115577e-05, - "loss": 5.6607, - "step": 25533 - }, - { - "epoch": 13.316297262059974, - "grad_norm": 1.4701852798461914, - "learning_rate": 7.484824120603015e-05, - "loss": 5.5734, - "step": 25534 - }, - { - "epoch": 13.316818774445894, - "grad_norm": 1.487239956855774, - "learning_rate": 7.484723618090453e-05, - "loss": 5.1162, - "step": 25535 - }, - { - "epoch": 13.317340286831811, - "grad_norm": 1.7171566486358643, - "learning_rate": 7.48462311557789e-05, - "loss": 5.0515, - "step": 25536 - }, - { - "epoch": 13.31786179921773, - "grad_norm": 1.4166496992111206, - "learning_rate": 7.484522613065327e-05, - "loss": 5.7802, - "step": 25537 - }, - { - "epoch": 13.31838331160365, - "grad_norm": 1.4246013164520264, - "learning_rate": 7.484422110552765e-05, - "loss": 5.5315, - "step": 25538 - }, - { - "epoch": 13.31890482398957, - "grad_norm": 1.438037395477295, - "learning_rate": 7.484321608040201e-05, - "loss": 5.7734, - "step": 25539 - }, - { - "epoch": 13.31942633637549, - "grad_norm": 1.5860764980316162, - "learning_rate": 7.484221105527639e-05, - "loss": 5.1958, - "step": 25540 - }, - { - "epoch": 13.319947848761409, - "grad_norm": 1.4423797130584717, - "learning_rate": 7.484120603015076e-05, - "loss": 5.0965, - "step": 25541 - }, - { - "epoch": 13.320469361147328, - "grad_norm": 1.449641466140747, - "learning_rate": 7.484020100502513e-05, - "loss": 5.6765, - "step": 25542 - }, - { - "epoch": 13.320990873533246, - "grad_norm": 1.4187029600143433, - "learning_rate": 7.48391959798995e-05, - "loss": 5.3924, - "step": 25543 - }, - { - "epoch": 13.321512385919165, - "grad_norm": 1.4197331666946411, - "learning_rate": 7.483819095477386e-05, - "loss": 5.4747, - "step": 25544 - }, - { - "epoch": 13.322033898305085, - "grad_norm": 1.5158134698867798, - "learning_rate": 7.483718592964824e-05, - "loss": 5.2891, - "step": 25545 - }, - { - "epoch": 13.322555410691004, - "grad_norm": 1.570731520652771, - "learning_rate": 7.483618090452262e-05, - "loss": 4.6639, - "step": 25546 - }, - { - "epoch": 13.323076923076924, - "grad_norm": 1.4813400506973267, - "learning_rate": 7.4835175879397e-05, - "loss": 5.76, - "step": 25547 - }, - { - "epoch": 13.323598435462841, - "grad_norm": 1.591463565826416, - "learning_rate": 7.483417085427136e-05, - "loss": 5.0242, - "step": 25548 - }, - { - "epoch": 13.32411994784876, - "grad_norm": 1.4473334550857544, - "learning_rate": 7.483316582914574e-05, - "loss": 5.4688, - "step": 25549 - }, - { - "epoch": 13.32464146023468, - "grad_norm": 1.416987419128418, - "learning_rate": 7.48321608040201e-05, - "loss": 5.1433, - "step": 25550 - }, - { - "epoch": 13.3251629726206, - "grad_norm": 1.4789289236068726, - "learning_rate": 7.483115577889448e-05, - "loss": 5.5065, - "step": 25551 - }, - { - "epoch": 13.32568448500652, - "grad_norm": 1.623256802558899, - "learning_rate": 7.483015075376884e-05, - "loss": 5.3526, - "step": 25552 - }, - { - "epoch": 13.326205997392439, - "grad_norm": 1.3860317468643188, - "learning_rate": 7.482914572864322e-05, - "loss": 5.7319, - "step": 25553 - }, - { - "epoch": 13.326727509778356, - "grad_norm": 1.51033353805542, - "learning_rate": 7.482814070351759e-05, - "loss": 5.3128, - "step": 25554 - }, - { - "epoch": 13.327249022164276, - "grad_norm": 1.5349633693695068, - "learning_rate": 7.482713567839196e-05, - "loss": 5.1729, - "step": 25555 - }, - { - "epoch": 13.327770534550195, - "grad_norm": 1.5134656429290771, - "learning_rate": 7.482613065326634e-05, - "loss": 5.1573, - "step": 25556 - }, - { - "epoch": 13.328292046936115, - "grad_norm": 1.5234713554382324, - "learning_rate": 7.48251256281407e-05, - "loss": 5.4401, - "step": 25557 - }, - { - "epoch": 13.328813559322034, - "grad_norm": 1.4853897094726562, - "learning_rate": 7.482412060301508e-05, - "loss": 5.4037, - "step": 25558 - }, - { - "epoch": 13.329335071707954, - "grad_norm": 1.4234157800674438, - "learning_rate": 7.482311557788945e-05, - "loss": 5.5121, - "step": 25559 - }, - { - "epoch": 13.329856584093871, - "grad_norm": 1.537839651107788, - "learning_rate": 7.482211055276383e-05, - "loss": 5.1771, - "step": 25560 - }, - { - "epoch": 13.33037809647979, - "grad_norm": 1.5234686136245728, - "learning_rate": 7.482110552763819e-05, - "loss": 5.2573, - "step": 25561 - }, - { - "epoch": 13.33089960886571, - "grad_norm": 1.4342267513275146, - "learning_rate": 7.482010050251257e-05, - "loss": 5.3798, - "step": 25562 - }, - { - "epoch": 13.33142112125163, - "grad_norm": 1.4434552192687988, - "learning_rate": 7.481909547738693e-05, - "loss": 4.9074, - "step": 25563 - }, - { - "epoch": 13.33194263363755, - "grad_norm": 1.449726939201355, - "learning_rate": 7.481809045226131e-05, - "loss": 5.2803, - "step": 25564 - }, - { - "epoch": 13.332464146023469, - "grad_norm": 1.5331059694290161, - "learning_rate": 7.481708542713567e-05, - "loss": 5.0215, - "step": 25565 - }, - { - "epoch": 13.332985658409386, - "grad_norm": 1.5103806257247925, - "learning_rate": 7.481608040201005e-05, - "loss": 5.4239, - "step": 25566 - }, - { - "epoch": 13.333507170795306, - "grad_norm": 1.554749608039856, - "learning_rate": 7.481507537688443e-05, - "loss": 5.1711, - "step": 25567 - }, - { - "epoch": 13.334028683181225, - "grad_norm": 1.4405765533447266, - "learning_rate": 7.481407035175881e-05, - "loss": 5.5412, - "step": 25568 - }, - { - "epoch": 13.334550195567145, - "grad_norm": 1.3587651252746582, - "learning_rate": 7.481306532663317e-05, - "loss": 5.6807, - "step": 25569 - }, - { - "epoch": 13.335071707953064, - "grad_norm": 1.4758906364440918, - "learning_rate": 7.481206030150754e-05, - "loss": 5.7347, - "step": 25570 - }, - { - "epoch": 13.335593220338984, - "grad_norm": 1.5429096221923828, - "learning_rate": 7.481105527638191e-05, - "loss": 5.2594, - "step": 25571 - }, - { - "epoch": 13.336114732724901, - "grad_norm": 1.4282653331756592, - "learning_rate": 7.481005025125628e-05, - "loss": 5.7007, - "step": 25572 - }, - { - "epoch": 13.336636245110821, - "grad_norm": 1.3735647201538086, - "learning_rate": 7.480904522613066e-05, - "loss": 5.6683, - "step": 25573 - }, - { - "epoch": 13.33715775749674, - "grad_norm": 1.4209283590316772, - "learning_rate": 7.480804020100502e-05, - "loss": 4.9736, - "step": 25574 - }, - { - "epoch": 13.33767926988266, - "grad_norm": 1.4768247604370117, - "learning_rate": 7.48070351758794e-05, - "loss": 5.0097, - "step": 25575 - }, - { - "epoch": 13.33820078226858, - "grad_norm": 1.4996529817581177, - "learning_rate": 7.480603015075378e-05, - "loss": 5.2568, - "step": 25576 - }, - { - "epoch": 13.338722294654499, - "grad_norm": 1.4720381498336792, - "learning_rate": 7.480502512562815e-05, - "loss": 5.6309, - "step": 25577 - }, - { - "epoch": 13.339243807040416, - "grad_norm": 1.4799480438232422, - "learning_rate": 7.480402010050252e-05, - "loss": 5.3764, - "step": 25578 - }, - { - "epoch": 13.339765319426336, - "grad_norm": 1.5044090747833252, - "learning_rate": 7.48030150753769e-05, - "loss": 5.5646, - "step": 25579 - }, - { - "epoch": 13.340286831812255, - "grad_norm": 1.4270243644714355, - "learning_rate": 7.480201005025126e-05, - "loss": 5.1767, - "step": 25580 - }, - { - "epoch": 13.340808344198175, - "grad_norm": 1.54099440574646, - "learning_rate": 7.480100502512564e-05, - "loss": 5.2961, - "step": 25581 - }, - { - "epoch": 13.341329856584094, - "grad_norm": 1.3753052949905396, - "learning_rate": 7.48e-05, - "loss": 5.4752, - "step": 25582 - }, - { - "epoch": 13.341851368970014, - "grad_norm": 1.4975007772445679, - "learning_rate": 7.479899497487437e-05, - "loss": 5.6612, - "step": 25583 - }, - { - "epoch": 13.342372881355931, - "grad_norm": 1.4001284837722778, - "learning_rate": 7.479798994974874e-05, - "loss": 5.5031, - "step": 25584 - }, - { - "epoch": 13.342894393741851, - "grad_norm": 1.4902957677841187, - "learning_rate": 7.479698492462311e-05, - "loss": 5.3734, - "step": 25585 - }, - { - "epoch": 13.34341590612777, - "grad_norm": 1.6393481492996216, - "learning_rate": 7.479597989949749e-05, - "loss": 5.0605, - "step": 25586 - }, - { - "epoch": 13.34393741851369, - "grad_norm": 1.5268242359161377, - "learning_rate": 7.479497487437186e-05, - "loss": 5.5633, - "step": 25587 - }, - { - "epoch": 13.34445893089961, - "grad_norm": 1.5437623262405396, - "learning_rate": 7.479396984924624e-05, - "loss": 5.5552, - "step": 25588 - }, - { - "epoch": 13.344980443285529, - "grad_norm": 1.3292922973632812, - "learning_rate": 7.47929648241206e-05, - "loss": 5.3713, - "step": 25589 - }, - { - "epoch": 13.345501955671446, - "grad_norm": 1.4286381006240845, - "learning_rate": 7.479195979899498e-05, - "loss": 5.4373, - "step": 25590 - }, - { - "epoch": 13.346023468057366, - "grad_norm": 1.4227012395858765, - "learning_rate": 7.479095477386935e-05, - "loss": 5.7199, - "step": 25591 - }, - { - "epoch": 13.346544980443285, - "grad_norm": 1.351165771484375, - "learning_rate": 7.478994974874373e-05, - "loss": 5.7236, - "step": 25592 - }, - { - "epoch": 13.347066492829205, - "grad_norm": 1.5079376697540283, - "learning_rate": 7.478894472361809e-05, - "loss": 5.7103, - "step": 25593 - }, - { - "epoch": 13.347588005215124, - "grad_norm": 1.469797968864441, - "learning_rate": 7.478793969849247e-05, - "loss": 5.4346, - "step": 25594 - }, - { - "epoch": 13.348109517601044, - "grad_norm": 1.3638031482696533, - "learning_rate": 7.478693467336683e-05, - "loss": 5.6972, - "step": 25595 - }, - { - "epoch": 13.348631029986961, - "grad_norm": 1.4642471075057983, - "learning_rate": 7.478592964824121e-05, - "loss": 5.3389, - "step": 25596 - }, - { - "epoch": 13.349152542372881, - "grad_norm": 1.5090336799621582, - "learning_rate": 7.478492462311559e-05, - "loss": 5.5247, - "step": 25597 - }, - { - "epoch": 13.3496740547588, - "grad_norm": 1.4612860679626465, - "learning_rate": 7.478391959798995e-05, - "loss": 5.6164, - "step": 25598 - }, - { - "epoch": 13.35019556714472, - "grad_norm": 1.4524247646331787, - "learning_rate": 7.478291457286433e-05, - "loss": 5.401, - "step": 25599 - }, - { - "epoch": 13.35071707953064, - "grad_norm": 1.5271186828613281, - "learning_rate": 7.47819095477387e-05, - "loss": 5.0485, - "step": 25600 - }, - { - "epoch": 13.351238591916559, - "grad_norm": 1.4511741399765015, - "learning_rate": 7.478090452261307e-05, - "loss": 5.1249, - "step": 25601 - }, - { - "epoch": 13.351760104302477, - "grad_norm": 1.5485676527023315, - "learning_rate": 7.477989949748744e-05, - "loss": 5.1945, - "step": 25602 - }, - { - "epoch": 13.352281616688396, - "grad_norm": 1.6101113557815552, - "learning_rate": 7.477889447236181e-05, - "loss": 5.1073, - "step": 25603 - }, - { - "epoch": 13.352803129074315, - "grad_norm": 1.4590812921524048, - "learning_rate": 7.477788944723618e-05, - "loss": 4.8594, - "step": 25604 - }, - { - "epoch": 13.353324641460235, - "grad_norm": 1.408347249031067, - "learning_rate": 7.477688442211056e-05, - "loss": 5.6154, - "step": 25605 - }, - { - "epoch": 13.353846153846154, - "grad_norm": 1.3557367324829102, - "learning_rate": 7.477587939698492e-05, - "loss": 5.6507, - "step": 25606 - }, - { - "epoch": 13.354367666232074, - "grad_norm": 1.4424008131027222, - "learning_rate": 7.47748743718593e-05, - "loss": 5.4778, - "step": 25607 - }, - { - "epoch": 13.354889178617992, - "grad_norm": 1.4769909381866455, - "learning_rate": 7.477386934673368e-05, - "loss": 5.2171, - "step": 25608 - }, - { - "epoch": 13.355410691003911, - "grad_norm": 1.5071487426757812, - "learning_rate": 7.477286432160805e-05, - "loss": 5.4186, - "step": 25609 - }, - { - "epoch": 13.35593220338983, - "grad_norm": 1.4086124897003174, - "learning_rate": 7.477185929648242e-05, - "loss": 5.3972, - "step": 25610 - }, - { - "epoch": 13.35645371577575, - "grad_norm": 1.637149453163147, - "learning_rate": 7.477085427135678e-05, - "loss": 5.4532, - "step": 25611 - }, - { - "epoch": 13.35697522816167, - "grad_norm": 1.5026112794876099, - "learning_rate": 7.476984924623116e-05, - "loss": 5.4415, - "step": 25612 - }, - { - "epoch": 13.357496740547589, - "grad_norm": 1.5974375009536743, - "learning_rate": 7.476884422110553e-05, - "loss": 5.0971, - "step": 25613 - }, - { - "epoch": 13.358018252933507, - "grad_norm": 1.4003827571868896, - "learning_rate": 7.47678391959799e-05, - "loss": 5.7445, - "step": 25614 - }, - { - "epoch": 13.358539765319426, - "grad_norm": 1.4238201379776, - "learning_rate": 7.476683417085427e-05, - "loss": 5.5155, - "step": 25615 - }, - { - "epoch": 13.359061277705345, - "grad_norm": 1.4987165927886963, - "learning_rate": 7.476582914572864e-05, - "loss": 5.5398, - "step": 25616 - }, - { - "epoch": 13.359582790091265, - "grad_norm": 1.4484001398086548, - "learning_rate": 7.476482412060302e-05, - "loss": 5.3879, - "step": 25617 - }, - { - "epoch": 13.360104302477184, - "grad_norm": 1.4772412776947021, - "learning_rate": 7.47638190954774e-05, - "loss": 5.4973, - "step": 25618 - }, - { - "epoch": 13.360625814863104, - "grad_norm": 1.3976552486419678, - "learning_rate": 7.476281407035176e-05, - "loss": 5.856, - "step": 25619 - }, - { - "epoch": 13.361147327249022, - "grad_norm": 1.5786577463150024, - "learning_rate": 7.476180904522614e-05, - "loss": 5.3627, - "step": 25620 - }, - { - "epoch": 13.361668839634941, - "grad_norm": 1.4778320789337158, - "learning_rate": 7.476080402010051e-05, - "loss": 5.3258, - "step": 25621 - }, - { - "epoch": 13.36219035202086, - "grad_norm": 1.4685102701187134, - "learning_rate": 7.475979899497488e-05, - "loss": 5.528, - "step": 25622 - }, - { - "epoch": 13.36271186440678, - "grad_norm": 1.416680097579956, - "learning_rate": 7.475879396984925e-05, - "loss": 5.2404, - "step": 25623 - }, - { - "epoch": 13.3632333767927, - "grad_norm": 1.437638282775879, - "learning_rate": 7.475778894472361e-05, - "loss": 5.6444, - "step": 25624 - }, - { - "epoch": 13.363754889178619, - "grad_norm": 1.4124419689178467, - "learning_rate": 7.475678391959799e-05, - "loss": 5.1119, - "step": 25625 - }, - { - "epoch": 13.364276401564537, - "grad_norm": 1.4327555894851685, - "learning_rate": 7.475577889447236e-05, - "loss": 5.4685, - "step": 25626 - }, - { - "epoch": 13.364797913950456, - "grad_norm": 1.3395960330963135, - "learning_rate": 7.475477386934673e-05, - "loss": 5.4885, - "step": 25627 - }, - { - "epoch": 13.365319426336375, - "grad_norm": 1.2821604013442993, - "learning_rate": 7.475376884422111e-05, - "loss": 5.6666, - "step": 25628 - }, - { - "epoch": 13.365840938722295, - "grad_norm": 1.5461915731430054, - "learning_rate": 7.475276381909549e-05, - "loss": 5.3706, - "step": 25629 - }, - { - "epoch": 13.366362451108214, - "grad_norm": 1.4926625490188599, - "learning_rate": 7.475175879396985e-05, - "loss": 5.2751, - "step": 25630 - }, - { - "epoch": 13.366883963494132, - "grad_norm": 1.4054052829742432, - "learning_rate": 7.475075376884423e-05, - "loss": 5.3923, - "step": 25631 - }, - { - "epoch": 13.367405475880052, - "grad_norm": 1.5060489177703857, - "learning_rate": 7.47497487437186e-05, - "loss": 4.8704, - "step": 25632 - }, - { - "epoch": 13.367926988265971, - "grad_norm": 1.5691038370132446, - "learning_rate": 7.474874371859297e-05, - "loss": 5.1224, - "step": 25633 - }, - { - "epoch": 13.36844850065189, - "grad_norm": 1.4810845851898193, - "learning_rate": 7.474773869346734e-05, - "loss": 5.5049, - "step": 25634 - }, - { - "epoch": 13.36897001303781, - "grad_norm": 1.5178236961364746, - "learning_rate": 7.474673366834172e-05, - "loss": 5.5532, - "step": 25635 - }, - { - "epoch": 13.36949152542373, - "grad_norm": 1.3550325632095337, - "learning_rate": 7.474572864321608e-05, - "loss": 5.3052, - "step": 25636 - }, - { - "epoch": 13.370013037809649, - "grad_norm": 1.4664517641067505, - "learning_rate": 7.474472361809046e-05, - "loss": 5.1136, - "step": 25637 - }, - { - "epoch": 13.370534550195567, - "grad_norm": 1.3351376056671143, - "learning_rate": 7.474371859296484e-05, - "loss": 4.6674, - "step": 25638 - }, - { - "epoch": 13.371056062581486, - "grad_norm": 1.3599261045455933, - "learning_rate": 7.47427135678392e-05, - "loss": 5.6493, - "step": 25639 - }, - { - "epoch": 13.371577574967406, - "grad_norm": 1.402835726737976, - "learning_rate": 7.474170854271358e-05, - "loss": 5.391, - "step": 25640 - }, - { - "epoch": 13.372099087353325, - "grad_norm": 1.4513009786605835, - "learning_rate": 7.474070351758794e-05, - "loss": 4.9485, - "step": 25641 - }, - { - "epoch": 13.372620599739244, - "grad_norm": 1.4422439336776733, - "learning_rate": 7.473969849246232e-05, - "loss": 5.4695, - "step": 25642 - }, - { - "epoch": 13.373142112125162, - "grad_norm": 1.4548817873001099, - "learning_rate": 7.473869346733668e-05, - "loss": 5.2472, - "step": 25643 - }, - { - "epoch": 13.373663624511082, - "grad_norm": 1.3729034662246704, - "learning_rate": 7.473768844221106e-05, - "loss": 5.4184, - "step": 25644 - }, - { - "epoch": 13.374185136897001, - "grad_norm": 1.440813422203064, - "learning_rate": 7.473668341708543e-05, - "loss": 4.9386, - "step": 25645 - }, - { - "epoch": 13.37470664928292, - "grad_norm": 1.3942042589187622, - "learning_rate": 7.47356783919598e-05, - "loss": 5.5756, - "step": 25646 - }, - { - "epoch": 13.37522816166884, - "grad_norm": 1.5231152772903442, - "learning_rate": 7.473467336683417e-05, - "loss": 5.3009, - "step": 25647 - }, - { - "epoch": 13.37574967405476, - "grad_norm": 1.554020881652832, - "learning_rate": 7.473366834170855e-05, - "loss": 5.7521, - "step": 25648 - }, - { - "epoch": 13.376271186440677, - "grad_norm": 1.5375339984893799, - "learning_rate": 7.473266331658292e-05, - "loss": 5.151, - "step": 25649 - }, - { - "epoch": 13.376792698826597, - "grad_norm": 1.6193546056747437, - "learning_rate": 7.473165829145729e-05, - "loss": 5.0716, - "step": 25650 - }, - { - "epoch": 13.377314211212516, - "grad_norm": 1.395033597946167, - "learning_rate": 7.473065326633167e-05, - "loss": 5.5376, - "step": 25651 - }, - { - "epoch": 13.377835723598436, - "grad_norm": 1.3955535888671875, - "learning_rate": 7.472964824120603e-05, - "loss": 5.5587, - "step": 25652 - }, - { - "epoch": 13.378357235984355, - "grad_norm": 1.3478915691375732, - "learning_rate": 7.472864321608041e-05, - "loss": 5.9704, - "step": 25653 - }, - { - "epoch": 13.378878748370274, - "grad_norm": 1.477400779724121, - "learning_rate": 7.472763819095477e-05, - "loss": 5.3968, - "step": 25654 - }, - { - "epoch": 13.379400260756192, - "grad_norm": 1.404414176940918, - "learning_rate": 7.472663316582915e-05, - "loss": 5.7882, - "step": 25655 - }, - { - "epoch": 13.379921773142112, - "grad_norm": 1.4042272567749023, - "learning_rate": 7.472562814070351e-05, - "loss": 5.3531, - "step": 25656 - }, - { - "epoch": 13.380443285528031, - "grad_norm": 1.5719094276428223, - "learning_rate": 7.472462311557789e-05, - "loss": 5.0507, - "step": 25657 - }, - { - "epoch": 13.38096479791395, - "grad_norm": 1.410982370376587, - "learning_rate": 7.472361809045227e-05, - "loss": 5.6696, - "step": 25658 - }, - { - "epoch": 13.38148631029987, - "grad_norm": 1.4770731925964355, - "learning_rate": 7.472261306532665e-05, - "loss": 5.2387, - "step": 25659 - }, - { - "epoch": 13.38200782268579, - "grad_norm": 1.4308491945266724, - "learning_rate": 7.472160804020101e-05, - "loss": 5.7605, - "step": 25660 - }, - { - "epoch": 13.382529335071707, - "grad_norm": 1.4518773555755615, - "learning_rate": 7.472060301507539e-05, - "loss": 5.5057, - "step": 25661 - }, - { - "epoch": 13.383050847457627, - "grad_norm": 1.496427297592163, - "learning_rate": 7.471959798994975e-05, - "loss": 5.4727, - "step": 25662 - }, - { - "epoch": 13.383572359843546, - "grad_norm": 1.4180673360824585, - "learning_rate": 7.471859296482412e-05, - "loss": 5.4186, - "step": 25663 - }, - { - "epoch": 13.384093872229466, - "grad_norm": 1.5029807090759277, - "learning_rate": 7.47175879396985e-05, - "loss": 5.2266, - "step": 25664 - }, - { - "epoch": 13.384615384615385, - "grad_norm": 1.4320181608200073, - "learning_rate": 7.471658291457286e-05, - "loss": 5.5155, - "step": 25665 - }, - { - "epoch": 13.385136897001304, - "grad_norm": 1.5789037942886353, - "learning_rate": 7.471557788944724e-05, - "loss": 5.0076, - "step": 25666 - }, - { - "epoch": 13.385658409387222, - "grad_norm": 1.4821689128875732, - "learning_rate": 7.47145728643216e-05, - "loss": 5.1354, - "step": 25667 - }, - { - "epoch": 13.386179921773142, - "grad_norm": 1.4501688480377197, - "learning_rate": 7.471356783919598e-05, - "loss": 5.2539, - "step": 25668 - }, - { - "epoch": 13.386701434159061, - "grad_norm": 1.5472636222839355, - "learning_rate": 7.471256281407036e-05, - "loss": 5.5932, - "step": 25669 - }, - { - "epoch": 13.38722294654498, - "grad_norm": 1.5350309610366821, - "learning_rate": 7.471155778894474e-05, - "loss": 5.383, - "step": 25670 - }, - { - "epoch": 13.3877444589309, - "grad_norm": 1.5583363771438599, - "learning_rate": 7.47105527638191e-05, - "loss": 5.5647, - "step": 25671 - }, - { - "epoch": 13.38826597131682, - "grad_norm": 1.4599146842956543, - "learning_rate": 7.470954773869348e-05, - "loss": 4.8558, - "step": 25672 - }, - { - "epoch": 13.388787483702737, - "grad_norm": 1.5359036922454834, - "learning_rate": 7.470854271356784e-05, - "loss": 5.116, - "step": 25673 - }, - { - "epoch": 13.389308996088657, - "grad_norm": 1.3672057390213013, - "learning_rate": 7.470753768844222e-05, - "loss": 5.8933, - "step": 25674 - }, - { - "epoch": 13.389830508474576, - "grad_norm": 1.3970375061035156, - "learning_rate": 7.470653266331658e-05, - "loss": 5.2878, - "step": 25675 - }, - { - "epoch": 13.390352020860496, - "grad_norm": 1.4483989477157593, - "learning_rate": 7.470552763819095e-05, - "loss": 5.3768, - "step": 25676 - }, - { - "epoch": 13.390873533246415, - "grad_norm": 1.488216519355774, - "learning_rate": 7.470452261306533e-05, - "loss": 5.46, - "step": 25677 - }, - { - "epoch": 13.391395045632335, - "grad_norm": 1.395548939704895, - "learning_rate": 7.470351758793969e-05, - "loss": 5.6775, - "step": 25678 - }, - { - "epoch": 13.391916558018252, - "grad_norm": 1.4001498222351074, - "learning_rate": 7.470251256281407e-05, - "loss": 5.5676, - "step": 25679 - }, - { - "epoch": 13.392438070404172, - "grad_norm": 1.4096919298171997, - "learning_rate": 7.470150753768845e-05, - "loss": 5.6761, - "step": 25680 - }, - { - "epoch": 13.392959582790091, - "grad_norm": 1.5681188106536865, - "learning_rate": 7.470050251256282e-05, - "loss": 5.0092, - "step": 25681 - }, - { - "epoch": 13.39348109517601, - "grad_norm": 1.5594825744628906, - "learning_rate": 7.469949748743719e-05, - "loss": 5.2187, - "step": 25682 - }, - { - "epoch": 13.39400260756193, - "grad_norm": 1.4995973110198975, - "learning_rate": 7.469849246231157e-05, - "loss": 4.5949, - "step": 25683 - }, - { - "epoch": 13.39452411994785, - "grad_norm": 1.3732390403747559, - "learning_rate": 7.469748743718593e-05, - "loss": 5.5535, - "step": 25684 - }, - { - "epoch": 13.395045632333767, - "grad_norm": 1.4590548276901245, - "learning_rate": 7.469648241206031e-05, - "loss": 5.4645, - "step": 25685 - }, - { - "epoch": 13.395567144719687, - "grad_norm": 1.4474213123321533, - "learning_rate": 7.469547738693467e-05, - "loss": 5.4554, - "step": 25686 - }, - { - "epoch": 13.396088657105606, - "grad_norm": 1.5184473991394043, - "learning_rate": 7.469447236180905e-05, - "loss": 5.2476, - "step": 25687 - }, - { - "epoch": 13.396610169491526, - "grad_norm": 1.4180519580841064, - "learning_rate": 7.469346733668341e-05, - "loss": 5.5725, - "step": 25688 - }, - { - "epoch": 13.397131681877445, - "grad_norm": 1.54567289352417, - "learning_rate": 7.469246231155779e-05, - "loss": 4.9414, - "step": 25689 - }, - { - "epoch": 13.397653194263365, - "grad_norm": 1.4667245149612427, - "learning_rate": 7.469145728643217e-05, - "loss": 5.3278, - "step": 25690 - }, - { - "epoch": 13.398174706649282, - "grad_norm": 1.4029037952423096, - "learning_rate": 7.469045226130653e-05, - "loss": 5.6966, - "step": 25691 - }, - { - "epoch": 13.398696219035202, - "grad_norm": 1.4207602739334106, - "learning_rate": 7.468944723618091e-05, - "loss": 5.6032, - "step": 25692 - }, - { - "epoch": 13.399217731421121, - "grad_norm": 1.5073564052581787, - "learning_rate": 7.468844221105528e-05, - "loss": 5.5909, - "step": 25693 - }, - { - "epoch": 13.39973924380704, - "grad_norm": 1.4459848403930664, - "learning_rate": 7.468743718592965e-05, - "loss": 5.4975, - "step": 25694 - }, - { - "epoch": 13.40026075619296, - "grad_norm": 1.4309762716293335, - "learning_rate": 7.468643216080402e-05, - "loss": 5.5593, - "step": 25695 - }, - { - "epoch": 13.40078226857888, - "grad_norm": 1.459652066230774, - "learning_rate": 7.46854271356784e-05, - "loss": 5.321, - "step": 25696 - }, - { - "epoch": 13.401303780964797, - "grad_norm": 1.4077222347259521, - "learning_rate": 7.468442211055276e-05, - "loss": 5.6692, - "step": 25697 - }, - { - "epoch": 13.401825293350717, - "grad_norm": 1.4687877893447876, - "learning_rate": 7.468341708542714e-05, - "loss": 5.3568, - "step": 25698 - }, - { - "epoch": 13.402346805736636, - "grad_norm": 1.3625712394714355, - "learning_rate": 7.46824120603015e-05, - "loss": 4.3135, - "step": 25699 - }, - { - "epoch": 13.402868318122556, - "grad_norm": 1.4091373682022095, - "learning_rate": 7.468140703517588e-05, - "loss": 5.5904, - "step": 25700 - }, - { - "epoch": 13.403389830508475, - "grad_norm": 1.3913195133209229, - "learning_rate": 7.468040201005026e-05, - "loss": 5.2957, - "step": 25701 - }, - { - "epoch": 13.403911342894395, - "grad_norm": 1.4963539838790894, - "learning_rate": 7.467939698492464e-05, - "loss": 5.3414, - "step": 25702 - }, - { - "epoch": 13.404432855280312, - "grad_norm": 1.4907171726226807, - "learning_rate": 7.4678391959799e-05, - "loss": 4.8583, - "step": 25703 - }, - { - "epoch": 13.404954367666232, - "grad_norm": 1.4909913539886475, - "learning_rate": 7.467738693467337e-05, - "loss": 5.3484, - "step": 25704 - }, - { - "epoch": 13.405475880052151, - "grad_norm": 1.3909473419189453, - "learning_rate": 7.467638190954774e-05, - "loss": 5.5971, - "step": 25705 - }, - { - "epoch": 13.40599739243807, - "grad_norm": 1.368985652923584, - "learning_rate": 7.467537688442211e-05, - "loss": 5.7136, - "step": 25706 - }, - { - "epoch": 13.40651890482399, - "grad_norm": 1.4492480754852295, - "learning_rate": 7.467437185929649e-05, - "loss": 5.0845, - "step": 25707 - }, - { - "epoch": 13.40704041720991, - "grad_norm": 1.5859419107437134, - "learning_rate": 7.467336683417085e-05, - "loss": 4.8755, - "step": 25708 - }, - { - "epoch": 13.407561929595827, - "grad_norm": 1.5622738599777222, - "learning_rate": 7.467236180904523e-05, - "loss": 5.6294, - "step": 25709 - }, - { - "epoch": 13.408083441981747, - "grad_norm": 1.4414821863174438, - "learning_rate": 7.46713567839196e-05, - "loss": 5.6022, - "step": 25710 - }, - { - "epoch": 13.408604954367666, - "grad_norm": 1.4327222108840942, - "learning_rate": 7.467035175879398e-05, - "loss": 5.708, - "step": 25711 - }, - { - "epoch": 13.409126466753586, - "grad_norm": 1.4639970064163208, - "learning_rate": 7.466934673366835e-05, - "loss": 5.1007, - "step": 25712 - }, - { - "epoch": 13.409647979139505, - "grad_norm": 1.5644515752792358, - "learning_rate": 7.466834170854273e-05, - "loss": 5.092, - "step": 25713 - }, - { - "epoch": 13.410169491525423, - "grad_norm": 1.4968621730804443, - "learning_rate": 7.466733668341709e-05, - "loss": 5.7236, - "step": 25714 - }, - { - "epoch": 13.410691003911342, - "grad_norm": 1.4468327760696411, - "learning_rate": 7.466633165829147e-05, - "loss": 5.5253, - "step": 25715 - }, - { - "epoch": 13.411212516297262, - "grad_norm": 1.4137842655181885, - "learning_rate": 7.466532663316583e-05, - "loss": 5.5908, - "step": 25716 - }, - { - "epoch": 13.411734028683181, - "grad_norm": 1.612979769706726, - "learning_rate": 7.46643216080402e-05, - "loss": 5.4851, - "step": 25717 - }, - { - "epoch": 13.4122555410691, - "grad_norm": 1.3531816005706787, - "learning_rate": 7.466331658291457e-05, - "loss": 5.8046, - "step": 25718 - }, - { - "epoch": 13.41277705345502, - "grad_norm": 1.413576364517212, - "learning_rate": 7.466231155778894e-05, - "loss": 5.5687, - "step": 25719 - }, - { - "epoch": 13.41329856584094, - "grad_norm": 1.386243224143982, - "learning_rate": 7.466130653266332e-05, - "loss": 5.6071, - "step": 25720 - }, - { - "epoch": 13.413820078226857, - "grad_norm": 1.6928720474243164, - "learning_rate": 7.46603015075377e-05, - "loss": 4.6581, - "step": 25721 - }, - { - "epoch": 13.414341590612777, - "grad_norm": 1.5335665941238403, - "learning_rate": 7.465929648241207e-05, - "loss": 5.2963, - "step": 25722 - }, - { - "epoch": 13.414863102998696, - "grad_norm": 1.4856847524642944, - "learning_rate": 7.465829145728644e-05, - "loss": 5.9385, - "step": 25723 - }, - { - "epoch": 13.415384615384616, - "grad_norm": 1.4318376779556274, - "learning_rate": 7.465728643216081e-05, - "loss": 5.4577, - "step": 25724 - }, - { - "epoch": 13.415906127770535, - "grad_norm": 1.3441425561904907, - "learning_rate": 7.465628140703518e-05, - "loss": 5.6296, - "step": 25725 - }, - { - "epoch": 13.416427640156453, - "grad_norm": 1.420954704284668, - "learning_rate": 7.465527638190956e-05, - "loss": 5.2107, - "step": 25726 - }, - { - "epoch": 13.416949152542372, - "grad_norm": 1.4372855424880981, - "learning_rate": 7.465427135678392e-05, - "loss": 5.3871, - "step": 25727 - }, - { - "epoch": 13.417470664928292, - "grad_norm": 1.4491597414016724, - "learning_rate": 7.46532663316583e-05, - "loss": 5.4877, - "step": 25728 - }, - { - "epoch": 13.417992177314211, - "grad_norm": 1.4583172798156738, - "learning_rate": 7.465226130653266e-05, - "loss": 5.1172, - "step": 25729 - }, - { - "epoch": 13.41851368970013, - "grad_norm": 1.5192793607711792, - "learning_rate": 7.465125628140704e-05, - "loss": 5.607, - "step": 25730 - }, - { - "epoch": 13.41903520208605, - "grad_norm": 1.312098503112793, - "learning_rate": 7.465025125628142e-05, - "loss": 5.2955, - "step": 25731 - }, - { - "epoch": 13.419556714471968, - "grad_norm": 1.6170865297317505, - "learning_rate": 7.464924623115578e-05, - "loss": 5.4144, - "step": 25732 - }, - { - "epoch": 13.420078226857887, - "grad_norm": 1.3526105880737305, - "learning_rate": 7.464824120603016e-05, - "loss": 5.8623, - "step": 25733 - }, - { - "epoch": 13.420599739243807, - "grad_norm": 1.3872977495193481, - "learning_rate": 7.464723618090452e-05, - "loss": 5.582, - "step": 25734 - }, - { - "epoch": 13.421121251629726, - "grad_norm": 1.4210340976715088, - "learning_rate": 7.46462311557789e-05, - "loss": 5.5156, - "step": 25735 - }, - { - "epoch": 13.421642764015646, - "grad_norm": 1.3963134288787842, - "learning_rate": 7.464522613065327e-05, - "loss": 5.4318, - "step": 25736 - }, - { - "epoch": 13.422164276401565, - "grad_norm": 1.506334900856018, - "learning_rate": 7.464422110552764e-05, - "loss": 5.2795, - "step": 25737 - }, - { - "epoch": 13.422685788787483, - "grad_norm": 1.4962852001190186, - "learning_rate": 7.464321608040201e-05, - "loss": 5.5451, - "step": 25738 - }, - { - "epoch": 13.423207301173402, - "grad_norm": 1.4622297286987305, - "learning_rate": 7.464221105527639e-05, - "loss": 5.794, - "step": 25739 - }, - { - "epoch": 13.423728813559322, - "grad_norm": 1.4828394651412964, - "learning_rate": 7.464120603015075e-05, - "loss": 5.3586, - "step": 25740 - }, - { - "epoch": 13.424250325945241, - "grad_norm": 1.446427822113037, - "learning_rate": 7.464020100502513e-05, - "loss": 5.5556, - "step": 25741 - }, - { - "epoch": 13.42477183833116, - "grad_norm": 1.4946023225784302, - "learning_rate": 7.46391959798995e-05, - "loss": 5.195, - "step": 25742 - }, - { - "epoch": 13.42529335071708, - "grad_norm": 1.499858021736145, - "learning_rate": 7.463819095477387e-05, - "loss": 4.8704, - "step": 25743 - }, - { - "epoch": 13.425814863102998, - "grad_norm": 1.4598660469055176, - "learning_rate": 7.463718592964825e-05, - "loss": 5.2659, - "step": 25744 - }, - { - "epoch": 13.426336375488917, - "grad_norm": 1.5045286417007446, - "learning_rate": 7.463618090452261e-05, - "loss": 5.4861, - "step": 25745 - }, - { - "epoch": 13.426857887874837, - "grad_norm": 1.3390766382217407, - "learning_rate": 7.463517587939699e-05, - "loss": 4.9591, - "step": 25746 - }, - { - "epoch": 13.427379400260756, - "grad_norm": 1.3206419944763184, - "learning_rate": 7.463417085427135e-05, - "loss": 5.4552, - "step": 25747 - }, - { - "epoch": 13.427900912646676, - "grad_norm": 1.4373273849487305, - "learning_rate": 7.463316582914573e-05, - "loss": 5.5481, - "step": 25748 - }, - { - "epoch": 13.428422425032595, - "grad_norm": 1.3725289106369019, - "learning_rate": 7.46321608040201e-05, - "loss": 5.7542, - "step": 25749 - }, - { - "epoch": 13.428943937418513, - "grad_norm": 1.3941349983215332, - "learning_rate": 7.463115577889447e-05, - "loss": 4.9363, - "step": 25750 - }, - { - "epoch": 13.429465449804432, - "grad_norm": 1.4652057886123657, - "learning_rate": 7.463015075376885e-05, - "loss": 5.0224, - "step": 25751 - }, - { - "epoch": 13.429986962190352, - "grad_norm": 1.4516547918319702, - "learning_rate": 7.462914572864323e-05, - "loss": 5.4692, - "step": 25752 - }, - { - "epoch": 13.430508474576271, - "grad_norm": 1.4874000549316406, - "learning_rate": 7.46281407035176e-05, - "loss": 5.3026, - "step": 25753 - }, - { - "epoch": 13.43102998696219, - "grad_norm": 1.4034217596054077, - "learning_rate": 7.462713567839197e-05, - "loss": 5.3594, - "step": 25754 - }, - { - "epoch": 13.43155149934811, - "grad_norm": 1.4512810707092285, - "learning_rate": 7.462613065326634e-05, - "loss": 5.3288, - "step": 25755 - }, - { - "epoch": 13.432073011734028, - "grad_norm": 1.417320966720581, - "learning_rate": 7.46251256281407e-05, - "loss": 4.9557, - "step": 25756 - }, - { - "epoch": 13.432594524119947, - "grad_norm": 1.3907581567764282, - "learning_rate": 7.462412060301508e-05, - "loss": 5.6044, - "step": 25757 - }, - { - "epoch": 13.433116036505867, - "grad_norm": 1.3842668533325195, - "learning_rate": 7.462311557788944e-05, - "loss": 5.414, - "step": 25758 - }, - { - "epoch": 13.433637548891786, - "grad_norm": 1.4100455045700073, - "learning_rate": 7.462211055276382e-05, - "loss": 5.4194, - "step": 25759 - }, - { - "epoch": 13.434159061277706, - "grad_norm": 1.539253830909729, - "learning_rate": 7.462110552763818e-05, - "loss": 5.1229, - "step": 25760 - }, - { - "epoch": 13.434680573663625, - "grad_norm": 1.4331300258636475, - "learning_rate": 7.462010050251256e-05, - "loss": 5.6595, - "step": 25761 - }, - { - "epoch": 13.435202086049543, - "grad_norm": 1.5121443271636963, - "learning_rate": 7.461909547738694e-05, - "loss": 5.4901, - "step": 25762 - }, - { - "epoch": 13.435723598435462, - "grad_norm": 1.3742835521697998, - "learning_rate": 7.461809045226132e-05, - "loss": 5.4985, - "step": 25763 - }, - { - "epoch": 13.436245110821382, - "grad_norm": 1.4671132564544678, - "learning_rate": 7.461708542713568e-05, - "loss": 5.3447, - "step": 25764 - }, - { - "epoch": 13.436766623207301, - "grad_norm": 1.4815346002578735, - "learning_rate": 7.461608040201006e-05, - "loss": 5.7226, - "step": 25765 - }, - { - "epoch": 13.43728813559322, - "grad_norm": 1.3589967489242554, - "learning_rate": 7.461507537688442e-05, - "loss": 5.3828, - "step": 25766 - }, - { - "epoch": 13.43780964797914, - "grad_norm": 1.5116108655929565, - "learning_rate": 7.46140703517588e-05, - "loss": 5.4127, - "step": 25767 - }, - { - "epoch": 13.438331160365058, - "grad_norm": 1.502476453781128, - "learning_rate": 7.461306532663317e-05, - "loss": 5.5417, - "step": 25768 - }, - { - "epoch": 13.438852672750977, - "grad_norm": 1.4385216236114502, - "learning_rate": 7.461206030150753e-05, - "loss": 5.4728, - "step": 25769 - }, - { - "epoch": 13.439374185136897, - "grad_norm": 1.3518720865249634, - "learning_rate": 7.461105527638191e-05, - "loss": 5.6282, - "step": 25770 - }, - { - "epoch": 13.439895697522816, - "grad_norm": 1.640574336051941, - "learning_rate": 7.461005025125629e-05, - "loss": 4.5703, - "step": 25771 - }, - { - "epoch": 13.440417209908736, - "grad_norm": 1.4608571529388428, - "learning_rate": 7.460904522613066e-05, - "loss": 5.3401, - "step": 25772 - }, - { - "epoch": 13.440938722294655, - "grad_norm": 1.4023679494857788, - "learning_rate": 7.460804020100503e-05, - "loss": 5.4572, - "step": 25773 - }, - { - "epoch": 13.441460234680573, - "grad_norm": 1.3250154256820679, - "learning_rate": 7.46070351758794e-05, - "loss": 5.3791, - "step": 25774 - }, - { - "epoch": 13.441981747066492, - "grad_norm": 1.5098063945770264, - "learning_rate": 7.460603015075377e-05, - "loss": 4.9468, - "step": 25775 - }, - { - "epoch": 13.442503259452412, - "grad_norm": 1.6293339729309082, - "learning_rate": 7.460502512562815e-05, - "loss": 5.5215, - "step": 25776 - }, - { - "epoch": 13.443024771838331, - "grad_norm": 1.417500615119934, - "learning_rate": 7.460402010050251e-05, - "loss": 4.9634, - "step": 25777 - }, - { - "epoch": 13.44354628422425, - "grad_norm": 1.370314598083496, - "learning_rate": 7.460301507537689e-05, - "loss": 5.3518, - "step": 25778 - }, - { - "epoch": 13.44406779661017, - "grad_norm": 1.463358759880066, - "learning_rate": 7.460201005025126e-05, - "loss": 5.3308, - "step": 25779 - }, - { - "epoch": 13.444589308996088, - "grad_norm": 1.4858185052871704, - "learning_rate": 7.460100502512563e-05, - "loss": 5.5331, - "step": 25780 - }, - { - "epoch": 13.445110821382007, - "grad_norm": 1.423521876335144, - "learning_rate": 7.46e-05, - "loss": 5.1747, - "step": 25781 - }, - { - "epoch": 13.445632333767927, - "grad_norm": 1.3970584869384766, - "learning_rate": 7.459899497487438e-05, - "loss": 5.4251, - "step": 25782 - }, - { - "epoch": 13.446153846153846, - "grad_norm": 1.458516240119934, - "learning_rate": 7.459798994974875e-05, - "loss": 5.3551, - "step": 25783 - }, - { - "epoch": 13.446675358539766, - "grad_norm": 1.4815362691879272, - "learning_rate": 7.459698492462312e-05, - "loss": 5.6472, - "step": 25784 - }, - { - "epoch": 13.447196870925685, - "grad_norm": 1.5546815395355225, - "learning_rate": 7.45959798994975e-05, - "loss": 5.0378, - "step": 25785 - }, - { - "epoch": 13.447718383311603, - "grad_norm": 1.4199405908584595, - "learning_rate": 7.459497487437186e-05, - "loss": 5.5287, - "step": 25786 - }, - { - "epoch": 13.448239895697522, - "grad_norm": 1.3967225551605225, - "learning_rate": 7.459396984924624e-05, - "loss": 5.3675, - "step": 25787 - }, - { - "epoch": 13.448761408083442, - "grad_norm": 1.409605622291565, - "learning_rate": 7.45929648241206e-05, - "loss": 5.3944, - "step": 25788 - }, - { - "epoch": 13.449282920469361, - "grad_norm": 1.427430272102356, - "learning_rate": 7.459195979899498e-05, - "loss": 5.5058, - "step": 25789 - }, - { - "epoch": 13.44980443285528, - "grad_norm": 1.5583319664001465, - "learning_rate": 7.459095477386934e-05, - "loss": 5.4593, - "step": 25790 - }, - { - "epoch": 13.4503259452412, - "grad_norm": 1.5089486837387085, - "learning_rate": 7.458994974874372e-05, - "loss": 5.2153, - "step": 25791 - }, - { - "epoch": 13.450847457627118, - "grad_norm": 1.4515163898468018, - "learning_rate": 7.45889447236181e-05, - "loss": 5.64, - "step": 25792 - }, - { - "epoch": 13.451368970013037, - "grad_norm": 1.5082346200942993, - "learning_rate": 7.458793969849248e-05, - "loss": 5.4596, - "step": 25793 - }, - { - "epoch": 13.451890482398957, - "grad_norm": 1.3669602870941162, - "learning_rate": 7.458693467336684e-05, - "loss": 5.0045, - "step": 25794 - }, - { - "epoch": 13.452411994784876, - "grad_norm": 1.5132296085357666, - "learning_rate": 7.458592964824122e-05, - "loss": 4.9038, - "step": 25795 - }, - { - "epoch": 13.452933507170796, - "grad_norm": 1.403357744216919, - "learning_rate": 7.458492462311558e-05, - "loss": 5.2711, - "step": 25796 - }, - { - "epoch": 13.453455019556715, - "grad_norm": 1.3375333547592163, - "learning_rate": 7.458391959798995e-05, - "loss": 5.4691, - "step": 25797 - }, - { - "epoch": 13.453976531942633, - "grad_norm": 1.4695682525634766, - "learning_rate": 7.458291457286433e-05, - "loss": 5.3132, - "step": 25798 - }, - { - "epoch": 13.454498044328552, - "grad_norm": 1.3572773933410645, - "learning_rate": 7.458190954773869e-05, - "loss": 5.6159, - "step": 25799 - }, - { - "epoch": 13.455019556714472, - "grad_norm": 1.4240750074386597, - "learning_rate": 7.458090452261307e-05, - "loss": 5.4923, - "step": 25800 - }, - { - "epoch": 13.455541069100391, - "grad_norm": 1.3610186576843262, - "learning_rate": 7.457989949748743e-05, - "loss": 5.7566, - "step": 25801 - }, - { - "epoch": 13.45606258148631, - "grad_norm": 1.3745728731155396, - "learning_rate": 7.457889447236181e-05, - "loss": 5.7702, - "step": 25802 - }, - { - "epoch": 13.45658409387223, - "grad_norm": 1.431480050086975, - "learning_rate": 7.457788944723619e-05, - "loss": 5.4793, - "step": 25803 - }, - { - "epoch": 13.457105606258148, - "grad_norm": 1.4504729509353638, - "learning_rate": 7.457688442211057e-05, - "loss": 5.2754, - "step": 25804 - }, - { - "epoch": 13.457627118644067, - "grad_norm": 1.2721911668777466, - "learning_rate": 7.457587939698493e-05, - "loss": 5.7739, - "step": 25805 - }, - { - "epoch": 13.458148631029987, - "grad_norm": 1.5029208660125732, - "learning_rate": 7.457487437185931e-05, - "loss": 5.0333, - "step": 25806 - }, - { - "epoch": 13.458670143415906, - "grad_norm": 1.395711898803711, - "learning_rate": 7.457386934673367e-05, - "loss": 5.5502, - "step": 25807 - }, - { - "epoch": 13.459191655801826, - "grad_norm": 1.4403815269470215, - "learning_rate": 7.457286432160805e-05, - "loss": 5.3764, - "step": 25808 - }, - { - "epoch": 13.459713168187744, - "grad_norm": 1.4121901988983154, - "learning_rate": 7.457185929648241e-05, - "loss": 5.5473, - "step": 25809 - }, - { - "epoch": 13.460234680573663, - "grad_norm": 1.4377591609954834, - "learning_rate": 7.457085427135678e-05, - "loss": 5.4363, - "step": 25810 - }, - { - "epoch": 13.460756192959582, - "grad_norm": 1.5347790718078613, - "learning_rate": 7.456984924623116e-05, - "loss": 5.0654, - "step": 25811 - }, - { - "epoch": 13.461277705345502, - "grad_norm": 1.4654875993728638, - "learning_rate": 7.456884422110553e-05, - "loss": 5.4834, - "step": 25812 - }, - { - "epoch": 13.461799217731421, - "grad_norm": 1.3891584873199463, - "learning_rate": 7.456783919597991e-05, - "loss": 5.6157, - "step": 25813 - }, - { - "epoch": 13.46232073011734, - "grad_norm": 1.3398462533950806, - "learning_rate": 7.456683417085428e-05, - "loss": 5.6903, - "step": 25814 - }, - { - "epoch": 13.46284224250326, - "grad_norm": 1.5170092582702637, - "learning_rate": 7.456582914572865e-05, - "loss": 5.6203, - "step": 25815 - }, - { - "epoch": 13.463363754889178, - "grad_norm": 1.4461970329284668, - "learning_rate": 7.456482412060302e-05, - "loss": 4.7269, - "step": 25816 - }, - { - "epoch": 13.463885267275097, - "grad_norm": 1.4339324235916138, - "learning_rate": 7.45638190954774e-05, - "loss": 5.6637, - "step": 25817 - }, - { - "epoch": 13.464406779661017, - "grad_norm": 1.3859716653823853, - "learning_rate": 7.456281407035176e-05, - "loss": 5.6405, - "step": 25818 - }, - { - "epoch": 13.464928292046936, - "grad_norm": 1.4901890754699707, - "learning_rate": 7.456180904522614e-05, - "loss": 5.0904, - "step": 25819 - }, - { - "epoch": 13.465449804432856, - "grad_norm": 1.5199823379516602, - "learning_rate": 7.45608040201005e-05, - "loss": 5.5266, - "step": 25820 - }, - { - "epoch": 13.465971316818774, - "grad_norm": 1.4449909925460815, - "learning_rate": 7.455979899497488e-05, - "loss": 5.2191, - "step": 25821 - }, - { - "epoch": 13.466492829204693, - "grad_norm": 1.3983596563339233, - "learning_rate": 7.455879396984924e-05, - "loss": 5.9131, - "step": 25822 - }, - { - "epoch": 13.467014341590613, - "grad_norm": 1.4448316097259521, - "learning_rate": 7.455778894472362e-05, - "loss": 5.4456, - "step": 25823 - }, - { - "epoch": 13.467535853976532, - "grad_norm": 1.5587831735610962, - "learning_rate": 7.4556783919598e-05, - "loss": 5.2695, - "step": 25824 - }, - { - "epoch": 13.468057366362451, - "grad_norm": 1.5552594661712646, - "learning_rate": 7.455577889447236e-05, - "loss": 5.2706, - "step": 25825 - }, - { - "epoch": 13.468578878748371, - "grad_norm": 1.5522208213806152, - "learning_rate": 7.455477386934674e-05, - "loss": 5.2447, - "step": 25826 - }, - { - "epoch": 13.469100391134289, - "grad_norm": 1.4148774147033691, - "learning_rate": 7.45537688442211e-05, - "loss": 5.3895, - "step": 25827 - }, - { - "epoch": 13.469621903520208, - "grad_norm": 1.3500038385391235, - "learning_rate": 7.455276381909548e-05, - "loss": 5.7849, - "step": 25828 - }, - { - "epoch": 13.470143415906128, - "grad_norm": 1.352275013923645, - "learning_rate": 7.455175879396985e-05, - "loss": 4.8507, - "step": 25829 - }, - { - "epoch": 13.470664928292047, - "grad_norm": 1.4313663244247437, - "learning_rate": 7.455075376884423e-05, - "loss": 5.5664, - "step": 25830 - }, - { - "epoch": 13.471186440677966, - "grad_norm": 1.4129598140716553, - "learning_rate": 7.454974874371859e-05, - "loss": 5.6285, - "step": 25831 - }, - { - "epoch": 13.471707953063886, - "grad_norm": 1.4581941366195679, - "learning_rate": 7.454874371859297e-05, - "loss": 5.4077, - "step": 25832 - }, - { - "epoch": 13.472229465449804, - "grad_norm": 1.4769515991210938, - "learning_rate": 7.454773869346735e-05, - "loss": 5.3971, - "step": 25833 - }, - { - "epoch": 13.472750977835723, - "grad_norm": 1.4114179611206055, - "learning_rate": 7.454673366834172e-05, - "loss": 5.6117, - "step": 25834 - }, - { - "epoch": 13.473272490221643, - "grad_norm": 1.5007543563842773, - "learning_rate": 7.454572864321609e-05, - "loss": 5.0904, - "step": 25835 - }, - { - "epoch": 13.473794002607562, - "grad_norm": 1.4294143915176392, - "learning_rate": 7.454472361809045e-05, - "loss": 5.1709, - "step": 25836 - }, - { - "epoch": 13.474315514993481, - "grad_norm": 1.4615232944488525, - "learning_rate": 7.454371859296483e-05, - "loss": 5.5042, - "step": 25837 - }, - { - "epoch": 13.474837027379401, - "grad_norm": 1.4054678678512573, - "learning_rate": 7.45427135678392e-05, - "loss": 5.5467, - "step": 25838 - }, - { - "epoch": 13.475358539765319, - "grad_norm": 1.4967602491378784, - "learning_rate": 7.454170854271357e-05, - "loss": 5.4114, - "step": 25839 - }, - { - "epoch": 13.475880052151238, - "grad_norm": 1.4218305349349976, - "learning_rate": 7.454070351758794e-05, - "loss": 5.7032, - "step": 25840 - }, - { - "epoch": 13.476401564537158, - "grad_norm": 1.4123295545578003, - "learning_rate": 7.453969849246231e-05, - "loss": 5.2423, - "step": 25841 - }, - { - "epoch": 13.476923076923077, - "grad_norm": 1.3700181245803833, - "learning_rate": 7.453869346733668e-05, - "loss": 5.531, - "step": 25842 - }, - { - "epoch": 13.477444589308996, - "grad_norm": 1.4921541213989258, - "learning_rate": 7.453768844221106e-05, - "loss": 5.387, - "step": 25843 - }, - { - "epoch": 13.477966101694916, - "grad_norm": 1.4084131717681885, - "learning_rate": 7.453668341708543e-05, - "loss": 5.7106, - "step": 25844 - }, - { - "epoch": 13.478487614080834, - "grad_norm": 1.5282868146896362, - "learning_rate": 7.453567839195981e-05, - "loss": 5.2512, - "step": 25845 - }, - { - "epoch": 13.479009126466753, - "grad_norm": 1.5084800720214844, - "learning_rate": 7.453467336683418e-05, - "loss": 4.4248, - "step": 25846 - }, - { - "epoch": 13.479530638852673, - "grad_norm": 1.469380497932434, - "learning_rate": 7.453366834170855e-05, - "loss": 5.341, - "step": 25847 - }, - { - "epoch": 13.480052151238592, - "grad_norm": 1.4949133396148682, - "learning_rate": 7.453266331658292e-05, - "loss": 5.0434, - "step": 25848 - }, - { - "epoch": 13.480573663624511, - "grad_norm": 1.4612425565719604, - "learning_rate": 7.453165829145728e-05, - "loss": 5.7477, - "step": 25849 - }, - { - "epoch": 13.481095176010431, - "grad_norm": 1.4086154699325562, - "learning_rate": 7.453065326633166e-05, - "loss": 5.2406, - "step": 25850 - }, - { - "epoch": 13.481616688396349, - "grad_norm": 1.4912303686141968, - "learning_rate": 7.452964824120603e-05, - "loss": 5.5551, - "step": 25851 - }, - { - "epoch": 13.482138200782268, - "grad_norm": 1.4745655059814453, - "learning_rate": 7.45286432160804e-05, - "loss": 5.0839, - "step": 25852 - }, - { - "epoch": 13.482659713168188, - "grad_norm": 1.4148823022842407, - "learning_rate": 7.452763819095477e-05, - "loss": 5.3035, - "step": 25853 - }, - { - "epoch": 13.483181225554107, - "grad_norm": 1.4886530637741089, - "learning_rate": 7.452663316582915e-05, - "loss": 5.3835, - "step": 25854 - }, - { - "epoch": 13.483702737940026, - "grad_norm": 1.4907963275909424, - "learning_rate": 7.452562814070352e-05, - "loss": 5.2467, - "step": 25855 - }, - { - "epoch": 13.484224250325946, - "grad_norm": 1.4680252075195312, - "learning_rate": 7.45246231155779e-05, - "loss": 4.8174, - "step": 25856 - }, - { - "epoch": 13.484745762711864, - "grad_norm": 1.5300137996673584, - "learning_rate": 7.452361809045226e-05, - "loss": 4.9515, - "step": 25857 - }, - { - "epoch": 13.485267275097783, - "grad_norm": 1.539993405342102, - "learning_rate": 7.452261306532664e-05, - "loss": 5.6634, - "step": 25858 - }, - { - "epoch": 13.485788787483703, - "grad_norm": 1.51149582862854, - "learning_rate": 7.452160804020101e-05, - "loss": 5.2168, - "step": 25859 - }, - { - "epoch": 13.486310299869622, - "grad_norm": 1.3279024362564087, - "learning_rate": 7.452060301507538e-05, - "loss": 5.7916, - "step": 25860 - }, - { - "epoch": 13.486831812255542, - "grad_norm": 1.4908638000488281, - "learning_rate": 7.451959798994975e-05, - "loss": 4.9764, - "step": 25861 - }, - { - "epoch": 13.487353324641461, - "grad_norm": 1.4590305089950562, - "learning_rate": 7.451859296482411e-05, - "loss": 5.5383, - "step": 25862 - }, - { - "epoch": 13.487874837027379, - "grad_norm": 1.5093717575073242, - "learning_rate": 7.451758793969849e-05, - "loss": 5.4227, - "step": 25863 - }, - { - "epoch": 13.488396349413298, - "grad_norm": 1.4809075593948364, - "learning_rate": 7.451658291457287e-05, - "loss": 4.8143, - "step": 25864 - }, - { - "epoch": 13.488917861799218, - "grad_norm": 1.5347845554351807, - "learning_rate": 7.451557788944725e-05, - "loss": 5.4025, - "step": 25865 - }, - { - "epoch": 13.489439374185137, - "grad_norm": 1.4081379175186157, - "learning_rate": 7.451457286432161e-05, - "loss": 5.3954, - "step": 25866 - }, - { - "epoch": 13.489960886571057, - "grad_norm": 1.5008811950683594, - "learning_rate": 7.451356783919599e-05, - "loss": 5.6215, - "step": 25867 - }, - { - "epoch": 13.490482398956976, - "grad_norm": 1.4237391948699951, - "learning_rate": 7.451256281407035e-05, - "loss": 5.5102, - "step": 25868 - }, - { - "epoch": 13.491003911342894, - "grad_norm": 1.5548973083496094, - "learning_rate": 7.451155778894473e-05, - "loss": 5.1366, - "step": 25869 - }, - { - "epoch": 13.491525423728813, - "grad_norm": 1.4376118183135986, - "learning_rate": 7.45105527638191e-05, - "loss": 5.4914, - "step": 25870 - }, - { - "epoch": 13.492046936114733, - "grad_norm": 1.6306744813919067, - "learning_rate": 7.450954773869347e-05, - "loss": 4.7485, - "step": 25871 - }, - { - "epoch": 13.492568448500652, - "grad_norm": 1.4672616720199585, - "learning_rate": 7.450854271356784e-05, - "loss": 5.5098, - "step": 25872 - }, - { - "epoch": 13.493089960886572, - "grad_norm": 1.471908688545227, - "learning_rate": 7.450753768844222e-05, - "loss": 5.2155, - "step": 25873 - }, - { - "epoch": 13.493611473272491, - "grad_norm": 1.4668617248535156, - "learning_rate": 7.450653266331658e-05, - "loss": 5.2369, - "step": 25874 - }, - { - "epoch": 13.494132985658409, - "grad_norm": 1.474664330482483, - "learning_rate": 7.450552763819096e-05, - "loss": 5.6196, - "step": 25875 - }, - { - "epoch": 13.494654498044328, - "grad_norm": 1.4355820417404175, - "learning_rate": 7.450452261306534e-05, - "loss": 5.425, - "step": 25876 - }, - { - "epoch": 13.495176010430248, - "grad_norm": 1.4470003843307495, - "learning_rate": 7.45035175879397e-05, - "loss": 5.0652, - "step": 25877 - }, - { - "epoch": 13.495697522816167, - "grad_norm": 1.4440104961395264, - "learning_rate": 7.450251256281408e-05, - "loss": 5.0029, - "step": 25878 - }, - { - "epoch": 13.496219035202087, - "grad_norm": 1.4482178688049316, - "learning_rate": 7.450150753768844e-05, - "loss": 5.2514, - "step": 25879 - }, - { - "epoch": 13.496740547588006, - "grad_norm": 1.454601526260376, - "learning_rate": 7.450050251256282e-05, - "loss": 5.6576, - "step": 25880 - }, - { - "epoch": 13.497262059973924, - "grad_norm": 1.4809670448303223, - "learning_rate": 7.449949748743718e-05, - "loss": 5.494, - "step": 25881 - }, - { - "epoch": 13.497783572359843, - "grad_norm": 1.492385745048523, - "learning_rate": 7.449849246231156e-05, - "loss": 5.1223, - "step": 25882 - }, - { - "epoch": 13.498305084745763, - "grad_norm": 1.3625870943069458, - "learning_rate": 7.449748743718593e-05, - "loss": 5.704, - "step": 25883 - }, - { - "epoch": 13.498826597131682, - "grad_norm": 1.4367799758911133, - "learning_rate": 7.44964824120603e-05, - "loss": 5.4066, - "step": 25884 - }, - { - "epoch": 13.499348109517602, - "grad_norm": 1.4163209199905396, - "learning_rate": 7.449547738693468e-05, - "loss": 5.5574, - "step": 25885 - }, - { - "epoch": 13.499869621903521, - "grad_norm": 1.391627550125122, - "learning_rate": 7.449447236180906e-05, - "loss": 5.5572, - "step": 25886 - }, - { - "epoch": 13.500391134289439, - "grad_norm": 1.5171618461608887, - "learning_rate": 7.449346733668342e-05, - "loss": 5.4884, - "step": 25887 - }, - { - "epoch": 13.500912646675358, - "grad_norm": 1.4364819526672363, - "learning_rate": 7.44924623115578e-05, - "loss": 5.3345, - "step": 25888 - }, - { - "epoch": 13.501434159061278, - "grad_norm": 1.4262064695358276, - "learning_rate": 7.449145728643217e-05, - "loss": 5.7556, - "step": 25889 - }, - { - "epoch": 13.501955671447197, - "grad_norm": 1.540083646774292, - "learning_rate": 7.449045226130653e-05, - "loss": 5.6249, - "step": 25890 - }, - { - "epoch": 13.502477183833117, - "grad_norm": 1.4518067836761475, - "learning_rate": 7.448944723618091e-05, - "loss": 5.5693, - "step": 25891 - }, - { - "epoch": 13.502998696219036, - "grad_norm": 1.4950978755950928, - "learning_rate": 7.448844221105527e-05, - "loss": 5.6473, - "step": 25892 - }, - { - "epoch": 13.503520208604954, - "grad_norm": 1.3784857988357544, - "learning_rate": 7.448743718592965e-05, - "loss": 4.7384, - "step": 25893 - }, - { - "epoch": 13.504041720990873, - "grad_norm": 1.405648946762085, - "learning_rate": 7.448643216080401e-05, - "loss": 5.5047, - "step": 25894 - }, - { - "epoch": 13.504563233376793, - "grad_norm": 1.4763556718826294, - "learning_rate": 7.448542713567839e-05, - "loss": 5.6243, - "step": 25895 - }, - { - "epoch": 13.505084745762712, - "grad_norm": 1.3729089498519897, - "learning_rate": 7.448442211055277e-05, - "loss": 5.6672, - "step": 25896 - }, - { - "epoch": 13.505606258148632, - "grad_norm": 1.5258829593658447, - "learning_rate": 7.448341708542715e-05, - "loss": 5.6468, - "step": 25897 - }, - { - "epoch": 13.506127770534551, - "grad_norm": 1.375726580619812, - "learning_rate": 7.448241206030151e-05, - "loss": 5.5837, - "step": 25898 - }, - { - "epoch": 13.506649282920469, - "grad_norm": 1.3957444429397583, - "learning_rate": 7.448140703517589e-05, - "loss": 5.3249, - "step": 25899 - }, - { - "epoch": 13.507170795306388, - "grad_norm": 1.3999435901641846, - "learning_rate": 7.448040201005025e-05, - "loss": 5.3531, - "step": 25900 - }, - { - "epoch": 13.507692307692308, - "grad_norm": 1.4674938917160034, - "learning_rate": 7.447939698492463e-05, - "loss": 5.2208, - "step": 25901 - }, - { - "epoch": 13.508213820078227, - "grad_norm": 1.6256000995635986, - "learning_rate": 7.4478391959799e-05, - "loss": 5.0913, - "step": 25902 - }, - { - "epoch": 13.508735332464147, - "grad_norm": 1.3792133331298828, - "learning_rate": 7.447738693467336e-05, - "loss": 4.1037, - "step": 25903 - }, - { - "epoch": 13.509256844850064, - "grad_norm": 1.4572721719741821, - "learning_rate": 7.447638190954774e-05, - "loss": 5.3904, - "step": 25904 - }, - { - "epoch": 13.509778357235984, - "grad_norm": 1.5057754516601562, - "learning_rate": 7.447537688442212e-05, - "loss": 5.6223, - "step": 25905 - }, - { - "epoch": 13.510299869621903, - "grad_norm": 1.455460548400879, - "learning_rate": 7.44743718592965e-05, - "loss": 5.7147, - "step": 25906 - }, - { - "epoch": 13.510821382007823, - "grad_norm": 1.5177149772644043, - "learning_rate": 7.447336683417086e-05, - "loss": 5.4809, - "step": 25907 - }, - { - "epoch": 13.511342894393742, - "grad_norm": 1.473115086555481, - "learning_rate": 7.447236180904524e-05, - "loss": 5.288, - "step": 25908 - }, - { - "epoch": 13.511864406779662, - "grad_norm": 1.4698179960250854, - "learning_rate": 7.44713567839196e-05, - "loss": 5.7207, - "step": 25909 - }, - { - "epoch": 13.512385919165581, - "grad_norm": 1.609645962715149, - "learning_rate": 7.447035175879398e-05, - "loss": 5.231, - "step": 25910 - }, - { - "epoch": 13.512907431551499, - "grad_norm": 1.3645532131195068, - "learning_rate": 7.446934673366834e-05, - "loss": 5.5301, - "step": 25911 - }, - { - "epoch": 13.513428943937418, - "grad_norm": 1.5437088012695312, - "learning_rate": 7.446834170854272e-05, - "loss": 4.8578, - "step": 25912 - }, - { - "epoch": 13.513950456323338, - "grad_norm": 1.5836268663406372, - "learning_rate": 7.446733668341708e-05, - "loss": 5.2183, - "step": 25913 - }, - { - "epoch": 13.514471968709257, - "grad_norm": 1.4085338115692139, - "learning_rate": 7.446633165829146e-05, - "loss": 5.119, - "step": 25914 - }, - { - "epoch": 13.514993481095177, - "grad_norm": 1.5094655752182007, - "learning_rate": 7.446532663316583e-05, - "loss": 5.3904, - "step": 25915 - }, - { - "epoch": 13.515514993481094, - "grad_norm": 1.6087820529937744, - "learning_rate": 7.44643216080402e-05, - "loss": 4.8717, - "step": 25916 - }, - { - "epoch": 13.516036505867014, - "grad_norm": 1.5217869281768799, - "learning_rate": 7.446331658291458e-05, - "loss": 5.3468, - "step": 25917 - }, - { - "epoch": 13.516558018252933, - "grad_norm": 1.4924753904342651, - "learning_rate": 7.446231155778895e-05, - "loss": 4.8923, - "step": 25918 - }, - { - "epoch": 13.517079530638853, - "grad_norm": 1.3980692625045776, - "learning_rate": 7.446130653266332e-05, - "loss": 5.2736, - "step": 25919 - }, - { - "epoch": 13.517601043024772, - "grad_norm": 1.458905816078186, - "learning_rate": 7.446030150753769e-05, - "loss": 5.745, - "step": 25920 - }, - { - "epoch": 13.518122555410692, - "grad_norm": 1.4303532838821411, - "learning_rate": 7.445929648241207e-05, - "loss": 5.8091, - "step": 25921 - }, - { - "epoch": 13.518644067796611, - "grad_norm": 1.3283510208129883, - "learning_rate": 7.445829145728643e-05, - "loss": 5.8899, - "step": 25922 - }, - { - "epoch": 13.519165580182529, - "grad_norm": 1.4057729244232178, - "learning_rate": 7.445728643216081e-05, - "loss": 5.6153, - "step": 25923 - }, - { - "epoch": 13.519687092568448, - "grad_norm": 1.4421374797821045, - "learning_rate": 7.445628140703517e-05, - "loss": 5.0896, - "step": 25924 - }, - { - "epoch": 13.520208604954368, - "grad_norm": 1.449812650680542, - "learning_rate": 7.445527638190955e-05, - "loss": 5.7735, - "step": 25925 - }, - { - "epoch": 13.520730117340287, - "grad_norm": 1.495416522026062, - "learning_rate": 7.445427135678393e-05, - "loss": 5.7947, - "step": 25926 - }, - { - "epoch": 13.521251629726207, - "grad_norm": 1.3541388511657715, - "learning_rate": 7.44532663316583e-05, - "loss": 5.4728, - "step": 25927 - }, - { - "epoch": 13.521773142112124, - "grad_norm": 1.3956247568130493, - "learning_rate": 7.445226130653267e-05, - "loss": 5.465, - "step": 25928 - }, - { - "epoch": 13.522294654498044, - "grad_norm": 1.5294207334518433, - "learning_rate": 7.445125628140703e-05, - "loss": 5.2378, - "step": 25929 - }, - { - "epoch": 13.522816166883963, - "grad_norm": 1.555928111076355, - "learning_rate": 7.445025125628141e-05, - "loss": 5.1533, - "step": 25930 - }, - { - "epoch": 13.523337679269883, - "grad_norm": 1.605849266052246, - "learning_rate": 7.444924623115578e-05, - "loss": 5.5037, - "step": 25931 - }, - { - "epoch": 13.523859191655802, - "grad_norm": 1.4362053871154785, - "learning_rate": 7.444824120603015e-05, - "loss": 5.3092, - "step": 25932 - }, - { - "epoch": 13.524380704041722, - "grad_norm": 1.5531110763549805, - "learning_rate": 7.444723618090452e-05, - "loss": 5.1146, - "step": 25933 - }, - { - "epoch": 13.52490221642764, - "grad_norm": 1.5433335304260254, - "learning_rate": 7.44462311557789e-05, - "loss": 5.4141, - "step": 25934 - }, - { - "epoch": 13.525423728813559, - "grad_norm": 1.428383231163025, - "learning_rate": 7.444522613065326e-05, - "loss": 5.8697, - "step": 25935 - }, - { - "epoch": 13.525945241199478, - "grad_norm": 1.474097728729248, - "learning_rate": 7.444422110552764e-05, - "loss": 5.1844, - "step": 25936 - }, - { - "epoch": 13.526466753585398, - "grad_norm": 1.5129460096359253, - "learning_rate": 7.444321608040202e-05, - "loss": 5.4424, - "step": 25937 - }, - { - "epoch": 13.526988265971317, - "grad_norm": 1.5997051000595093, - "learning_rate": 7.44422110552764e-05, - "loss": 4.9856, - "step": 25938 - }, - { - "epoch": 13.527509778357237, - "grad_norm": 1.4350281953811646, - "learning_rate": 7.444120603015076e-05, - "loss": 5.5749, - "step": 25939 - }, - { - "epoch": 13.528031290743154, - "grad_norm": 1.448862910270691, - "learning_rate": 7.444020100502514e-05, - "loss": 4.8669, - "step": 25940 - }, - { - "epoch": 13.528552803129074, - "grad_norm": 1.5424367189407349, - "learning_rate": 7.44391959798995e-05, - "loss": 5.1068, - "step": 25941 - }, - { - "epoch": 13.529074315514993, - "grad_norm": 1.4703173637390137, - "learning_rate": 7.443819095477387e-05, - "loss": 5.5195, - "step": 25942 - }, - { - "epoch": 13.529595827900913, - "grad_norm": 1.4776593446731567, - "learning_rate": 7.443718592964824e-05, - "loss": 5.427, - "step": 25943 - }, - { - "epoch": 13.530117340286832, - "grad_norm": 1.4899742603302002, - "learning_rate": 7.443618090452261e-05, - "loss": 4.5995, - "step": 25944 - }, - { - "epoch": 13.530638852672752, - "grad_norm": 1.577268362045288, - "learning_rate": 7.443517587939699e-05, - "loss": 5.1151, - "step": 25945 - }, - { - "epoch": 13.53116036505867, - "grad_norm": 1.6029402017593384, - "learning_rate": 7.443417085427136e-05, - "loss": 4.8939, - "step": 25946 - }, - { - "epoch": 13.531681877444589, - "grad_norm": 1.4128305912017822, - "learning_rate": 7.443316582914574e-05, - "loss": 5.5488, - "step": 25947 - }, - { - "epoch": 13.532203389830508, - "grad_norm": 1.4772982597351074, - "learning_rate": 7.44321608040201e-05, - "loss": 5.7103, - "step": 25948 - }, - { - "epoch": 13.532724902216428, - "grad_norm": 1.4211106300354004, - "learning_rate": 7.443115577889448e-05, - "loss": 5.3892, - "step": 25949 - }, - { - "epoch": 13.533246414602347, - "grad_norm": 1.4242748022079468, - "learning_rate": 7.443015075376885e-05, - "loss": 5.5066, - "step": 25950 - }, - { - "epoch": 13.533767926988267, - "grad_norm": 1.4830518960952759, - "learning_rate": 7.442914572864323e-05, - "loss": 5.3333, - "step": 25951 - }, - { - "epoch": 13.534289439374184, - "grad_norm": 1.561877727508545, - "learning_rate": 7.442814070351759e-05, - "loss": 4.8842, - "step": 25952 - }, - { - "epoch": 13.534810951760104, - "grad_norm": 1.5283575057983398, - "learning_rate": 7.442713567839197e-05, - "loss": 5.061, - "step": 25953 - }, - { - "epoch": 13.535332464146023, - "grad_norm": 1.4257550239562988, - "learning_rate": 7.442613065326633e-05, - "loss": 5.7237, - "step": 25954 - }, - { - "epoch": 13.535853976531943, - "grad_norm": 1.5343230962753296, - "learning_rate": 7.44251256281407e-05, - "loss": 5.3933, - "step": 25955 - }, - { - "epoch": 13.536375488917862, - "grad_norm": 1.4083856344223022, - "learning_rate": 7.442412060301507e-05, - "loss": 5.2668, - "step": 25956 - }, - { - "epoch": 13.536897001303782, - "grad_norm": 1.3650084733963013, - "learning_rate": 7.442311557788945e-05, - "loss": 5.5279, - "step": 25957 - }, - { - "epoch": 13.5374185136897, - "grad_norm": 1.374729037284851, - "learning_rate": 7.442211055276383e-05, - "loss": 5.3821, - "step": 25958 - }, - { - "epoch": 13.537940026075619, - "grad_norm": 1.503204107284546, - "learning_rate": 7.44211055276382e-05, - "loss": 5.2006, - "step": 25959 - }, - { - "epoch": 13.538461538461538, - "grad_norm": 1.4023096561431885, - "learning_rate": 7.442010050251257e-05, - "loss": 5.6624, - "step": 25960 - }, - { - "epoch": 13.538983050847458, - "grad_norm": 1.4590502977371216, - "learning_rate": 7.441909547738694e-05, - "loss": 5.3592, - "step": 25961 - }, - { - "epoch": 13.539504563233377, - "grad_norm": 1.388390302658081, - "learning_rate": 7.441809045226131e-05, - "loss": 5.764, - "step": 25962 - }, - { - "epoch": 13.540026075619297, - "grad_norm": 1.434746503829956, - "learning_rate": 7.441708542713568e-05, - "loss": 5.6813, - "step": 25963 - }, - { - "epoch": 13.540547588005214, - "grad_norm": 1.396411418914795, - "learning_rate": 7.441608040201006e-05, - "loss": 5.4462, - "step": 25964 - }, - { - "epoch": 13.541069100391134, - "grad_norm": 1.2860091924667358, - "learning_rate": 7.441507537688442e-05, - "loss": 5.6052, - "step": 25965 - }, - { - "epoch": 13.541590612777053, - "grad_norm": 1.4205468893051147, - "learning_rate": 7.44140703517588e-05, - "loss": 5.5528, - "step": 25966 - }, - { - "epoch": 13.542112125162973, - "grad_norm": 1.3812165260314941, - "learning_rate": 7.441306532663318e-05, - "loss": 5.7332, - "step": 25967 - }, - { - "epoch": 13.542633637548892, - "grad_norm": 1.4035179615020752, - "learning_rate": 7.441206030150754e-05, - "loss": 5.6863, - "step": 25968 - }, - { - "epoch": 13.543155149934812, - "grad_norm": 1.3843671083450317, - "learning_rate": 7.441105527638192e-05, - "loss": 5.7062, - "step": 25969 - }, - { - "epoch": 13.54367666232073, - "grad_norm": 1.481192946434021, - "learning_rate": 7.441005025125628e-05, - "loss": 5.4784, - "step": 25970 - }, - { - "epoch": 13.544198174706649, - "grad_norm": 1.3759831190109253, - "learning_rate": 7.440904522613066e-05, - "loss": 5.7803, - "step": 25971 - }, - { - "epoch": 13.544719687092568, - "grad_norm": 1.4097306728363037, - "learning_rate": 7.440804020100502e-05, - "loss": 5.3642, - "step": 25972 - }, - { - "epoch": 13.545241199478488, - "grad_norm": 1.600789189338684, - "learning_rate": 7.44070351758794e-05, - "loss": 5.1343, - "step": 25973 - }, - { - "epoch": 13.545762711864407, - "grad_norm": 1.5550453662872314, - "learning_rate": 7.440603015075377e-05, - "loss": 5.351, - "step": 25974 - }, - { - "epoch": 13.546284224250327, - "grad_norm": 1.4413334131240845, - "learning_rate": 7.440502512562814e-05, - "loss": 5.5572, - "step": 25975 - }, - { - "epoch": 13.546805736636244, - "grad_norm": 1.4505242109298706, - "learning_rate": 7.440402010050251e-05, - "loss": 5.5466, - "step": 25976 - }, - { - "epoch": 13.547327249022164, - "grad_norm": 1.6240147352218628, - "learning_rate": 7.440301507537689e-05, - "loss": 4.8758, - "step": 25977 - }, - { - "epoch": 13.547848761408083, - "grad_norm": 1.4561899900436401, - "learning_rate": 7.440201005025126e-05, - "loss": 5.3084, - "step": 25978 - }, - { - "epoch": 13.548370273794003, - "grad_norm": 1.5579581260681152, - "learning_rate": 7.440100502512564e-05, - "loss": 5.5399, - "step": 25979 - }, - { - "epoch": 13.548891786179922, - "grad_norm": 1.5164270401000977, - "learning_rate": 7.44e-05, - "loss": 5.4031, - "step": 25980 - }, - { - "epoch": 13.549413298565842, - "grad_norm": 1.5167006254196167, - "learning_rate": 7.439899497487438e-05, - "loss": 5.1222, - "step": 25981 - }, - { - "epoch": 13.54993481095176, - "grad_norm": 1.4125583171844482, - "learning_rate": 7.439798994974875e-05, - "loss": 5.4795, - "step": 25982 - }, - { - "epoch": 13.550456323337679, - "grad_norm": 1.407762050628662, - "learning_rate": 7.439698492462311e-05, - "loss": 5.3182, - "step": 25983 - }, - { - "epoch": 13.550977835723598, - "grad_norm": 1.5006707906723022, - "learning_rate": 7.439597989949749e-05, - "loss": 4.8142, - "step": 25984 - }, - { - "epoch": 13.551499348109518, - "grad_norm": 1.430970549583435, - "learning_rate": 7.439497487437185e-05, - "loss": 5.546, - "step": 25985 - }, - { - "epoch": 13.552020860495437, - "grad_norm": 1.4947845935821533, - "learning_rate": 7.439396984924623e-05, - "loss": 5.5284, - "step": 25986 - }, - { - "epoch": 13.552542372881355, - "grad_norm": 1.5008940696716309, - "learning_rate": 7.439296482412061e-05, - "loss": 5.0784, - "step": 25987 - }, - { - "epoch": 13.553063885267274, - "grad_norm": 1.4941920042037964, - "learning_rate": 7.439195979899499e-05, - "loss": 5.2119, - "step": 25988 - }, - { - "epoch": 13.553585397653194, - "grad_norm": 1.4802685976028442, - "learning_rate": 7.439095477386935e-05, - "loss": 5.187, - "step": 25989 - }, - { - "epoch": 13.554106910039113, - "grad_norm": 1.4996001720428467, - "learning_rate": 7.438994974874373e-05, - "loss": 5.2142, - "step": 25990 - }, - { - "epoch": 13.554628422425033, - "grad_norm": 1.4696357250213623, - "learning_rate": 7.43889447236181e-05, - "loss": 5.5947, - "step": 25991 - }, - { - "epoch": 13.555149934810952, - "grad_norm": 1.2747399806976318, - "learning_rate": 7.438793969849247e-05, - "loss": 4.9744, - "step": 25992 - }, - { - "epoch": 13.555671447196872, - "grad_norm": 1.4503189325332642, - "learning_rate": 7.438693467336684e-05, - "loss": 5.2261, - "step": 25993 - }, - { - "epoch": 13.55619295958279, - "grad_norm": 1.3531697988510132, - "learning_rate": 7.438592964824121e-05, - "loss": 5.592, - "step": 25994 - }, - { - "epoch": 13.556714471968709, - "grad_norm": 1.600414514541626, - "learning_rate": 7.438492462311558e-05, - "loss": 4.9958, - "step": 25995 - }, - { - "epoch": 13.557235984354628, - "grad_norm": 1.483246922492981, - "learning_rate": 7.438391959798994e-05, - "loss": 5.454, - "step": 25996 - }, - { - "epoch": 13.557757496740548, - "grad_norm": 1.4340903759002686, - "learning_rate": 7.438291457286432e-05, - "loss": 5.4076, - "step": 25997 - }, - { - "epoch": 13.558279009126467, - "grad_norm": 1.4346660375595093, - "learning_rate": 7.43819095477387e-05, - "loss": 5.5081, - "step": 25998 - }, - { - "epoch": 13.558800521512385, - "grad_norm": 1.6049302816390991, - "learning_rate": 7.438090452261308e-05, - "loss": 4.9579, - "step": 25999 - }, - { - "epoch": 13.559322033898304, - "grad_norm": 1.406704306602478, - "learning_rate": 7.437989949748744e-05, - "loss": 5.4594, - "step": 26000 - }, - { - "epoch": 13.559843546284224, - "grad_norm": 1.4006972312927246, - "learning_rate": 7.437889447236182e-05, - "loss": 5.6853, - "step": 26001 - }, - { - "epoch": 13.560365058670143, - "grad_norm": 1.4542219638824463, - "learning_rate": 7.437788944723618e-05, - "loss": 5.4594, - "step": 26002 - }, - { - "epoch": 13.560886571056063, - "grad_norm": 1.4597899913787842, - "learning_rate": 7.437688442211056e-05, - "loss": 5.8747, - "step": 26003 - }, - { - "epoch": 13.561408083441982, - "grad_norm": 1.4665014743804932, - "learning_rate": 7.437587939698492e-05, - "loss": 5.4685, - "step": 26004 - }, - { - "epoch": 13.561929595827902, - "grad_norm": 1.480733036994934, - "learning_rate": 7.43748743718593e-05, - "loss": 5.6536, - "step": 26005 - }, - { - "epoch": 13.56245110821382, - "grad_norm": 1.3248893022537231, - "learning_rate": 7.437386934673367e-05, - "loss": 5.8838, - "step": 26006 - }, - { - "epoch": 13.562972620599739, - "grad_norm": 1.4129865169525146, - "learning_rate": 7.437286432160804e-05, - "loss": 5.1572, - "step": 26007 - }, - { - "epoch": 13.563494132985658, - "grad_norm": 1.5340595245361328, - "learning_rate": 7.437185929648241e-05, - "loss": 5.5518, - "step": 26008 - }, - { - "epoch": 13.564015645371578, - "grad_norm": 1.4122991561889648, - "learning_rate": 7.437085427135679e-05, - "loss": 5.7039, - "step": 26009 - }, - { - "epoch": 13.564537157757497, - "grad_norm": 1.442515254020691, - "learning_rate": 7.436984924623116e-05, - "loss": 5.6728, - "step": 26010 - }, - { - "epoch": 13.565058670143415, - "grad_norm": 1.4544196128845215, - "learning_rate": 7.436884422110553e-05, - "loss": 5.6622, - "step": 26011 - }, - { - "epoch": 13.565580182529335, - "grad_norm": 1.5100643634796143, - "learning_rate": 7.43678391959799e-05, - "loss": 5.0699, - "step": 26012 - }, - { - "epoch": 13.566101694915254, - "grad_norm": 1.4016236066818237, - "learning_rate": 7.436683417085427e-05, - "loss": 5.2161, - "step": 26013 - }, - { - "epoch": 13.566623207301173, - "grad_norm": 1.5255712270736694, - "learning_rate": 7.436582914572865e-05, - "loss": 5.519, - "step": 26014 - }, - { - "epoch": 13.567144719687093, - "grad_norm": 1.4653573036193848, - "learning_rate": 7.436482412060301e-05, - "loss": 5.3673, - "step": 26015 - }, - { - "epoch": 13.567666232073012, - "grad_norm": 1.583132028579712, - "learning_rate": 7.436381909547739e-05, - "loss": 5.1888, - "step": 26016 - }, - { - "epoch": 13.568187744458932, - "grad_norm": 1.6191048622131348, - "learning_rate": 7.436281407035176e-05, - "loss": 5.5302, - "step": 26017 - }, - { - "epoch": 13.56870925684485, - "grad_norm": 1.548568844795227, - "learning_rate": 7.436180904522613e-05, - "loss": 5.1323, - "step": 26018 - }, - { - "epoch": 13.569230769230769, - "grad_norm": 1.449314832687378, - "learning_rate": 7.436080402010051e-05, - "loss": 5.4913, - "step": 26019 - }, - { - "epoch": 13.569752281616688, - "grad_norm": 1.38150954246521, - "learning_rate": 7.435979899497489e-05, - "loss": 5.589, - "step": 26020 - }, - { - "epoch": 13.570273794002608, - "grad_norm": 1.444655179977417, - "learning_rate": 7.435879396984925e-05, - "loss": 5.7878, - "step": 26021 - }, - { - "epoch": 13.570795306388527, - "grad_norm": 1.4486509561538696, - "learning_rate": 7.435778894472362e-05, - "loss": 5.496, - "step": 26022 - }, - { - "epoch": 13.571316818774445, - "grad_norm": 1.4810986518859863, - "learning_rate": 7.4356783919598e-05, - "loss": 5.5789, - "step": 26023 - }, - { - "epoch": 13.571838331160365, - "grad_norm": 1.5056471824645996, - "learning_rate": 7.435577889447236e-05, - "loss": 5.5904, - "step": 26024 - }, - { - "epoch": 13.572359843546284, - "grad_norm": 1.3563398122787476, - "learning_rate": 7.435477386934674e-05, - "loss": 5.5999, - "step": 26025 - }, - { - "epoch": 13.572881355932203, - "grad_norm": 1.3871859312057495, - "learning_rate": 7.43537688442211e-05, - "loss": 5.422, - "step": 26026 - }, - { - "epoch": 13.573402868318123, - "grad_norm": 1.3316458463668823, - "learning_rate": 7.435276381909548e-05, - "loss": 5.5855, - "step": 26027 - }, - { - "epoch": 13.573924380704042, - "grad_norm": 1.4283597469329834, - "learning_rate": 7.435175879396984e-05, - "loss": 5.5955, - "step": 26028 - }, - { - "epoch": 13.57444589308996, - "grad_norm": 1.5681982040405273, - "learning_rate": 7.435075376884422e-05, - "loss": 5.4473, - "step": 26029 - }, - { - "epoch": 13.57496740547588, - "grad_norm": 1.5039085149765015, - "learning_rate": 7.43497487437186e-05, - "loss": 5.155, - "step": 26030 - }, - { - "epoch": 13.575488917861799, - "grad_norm": 1.488642930984497, - "learning_rate": 7.434874371859298e-05, - "loss": 5.4681, - "step": 26031 - }, - { - "epoch": 13.576010430247718, - "grad_norm": 1.3703835010528564, - "learning_rate": 7.434773869346734e-05, - "loss": 5.1261, - "step": 26032 - }, - { - "epoch": 13.576531942633638, - "grad_norm": 1.4371651411056519, - "learning_rate": 7.434673366834172e-05, - "loss": 5.2824, - "step": 26033 - }, - { - "epoch": 13.577053455019557, - "grad_norm": 1.3895591497421265, - "learning_rate": 7.434572864321608e-05, - "loss": 5.6247, - "step": 26034 - }, - { - "epoch": 13.577574967405475, - "grad_norm": 1.4925317764282227, - "learning_rate": 7.434472361809045e-05, - "loss": 5.4687, - "step": 26035 - }, - { - "epoch": 13.578096479791395, - "grad_norm": 1.4482593536376953, - "learning_rate": 7.434371859296483e-05, - "loss": 4.7855, - "step": 26036 - }, - { - "epoch": 13.578617992177314, - "grad_norm": 1.5875540971755981, - "learning_rate": 7.434271356783919e-05, - "loss": 5.2388, - "step": 26037 - }, - { - "epoch": 13.579139504563233, - "grad_norm": 1.4118850231170654, - "learning_rate": 7.434170854271357e-05, - "loss": 5.3682, - "step": 26038 - }, - { - "epoch": 13.579661016949153, - "grad_norm": 1.4788395166397095, - "learning_rate": 7.434070351758795e-05, - "loss": 5.654, - "step": 26039 - }, - { - "epoch": 13.580182529335072, - "grad_norm": 1.4811419248580933, - "learning_rate": 7.433969849246232e-05, - "loss": 4.9133, - "step": 26040 - }, - { - "epoch": 13.58070404172099, - "grad_norm": 1.489540696144104, - "learning_rate": 7.433869346733669e-05, - "loss": 5.4461, - "step": 26041 - }, - { - "epoch": 13.58122555410691, - "grad_norm": 1.4745806455612183, - "learning_rate": 7.433768844221107e-05, - "loss": 5.2191, - "step": 26042 - }, - { - "epoch": 13.581747066492829, - "grad_norm": 1.5811735391616821, - "learning_rate": 7.433668341708543e-05, - "loss": 5.3493, - "step": 26043 - }, - { - "epoch": 13.582268578878748, - "grad_norm": 1.5544021129608154, - "learning_rate": 7.433567839195981e-05, - "loss": 4.8793, - "step": 26044 - }, - { - "epoch": 13.582790091264668, - "grad_norm": 1.3718843460083008, - "learning_rate": 7.433467336683417e-05, - "loss": 5.8899, - "step": 26045 - }, - { - "epoch": 13.583311603650587, - "grad_norm": 1.551510214805603, - "learning_rate": 7.433366834170855e-05, - "loss": 5.4853, - "step": 26046 - }, - { - "epoch": 13.583833116036505, - "grad_norm": 1.3683111667633057, - "learning_rate": 7.433266331658291e-05, - "loss": 5.4532, - "step": 26047 - }, - { - "epoch": 13.584354628422425, - "grad_norm": 1.3143409490585327, - "learning_rate": 7.433165829145728e-05, - "loss": 5.7177, - "step": 26048 - }, - { - "epoch": 13.584876140808344, - "grad_norm": 1.4222733974456787, - "learning_rate": 7.433065326633166e-05, - "loss": 5.398, - "step": 26049 - }, - { - "epoch": 13.585397653194264, - "grad_norm": 1.468855857849121, - "learning_rate": 7.432964824120603e-05, - "loss": 5.5878, - "step": 26050 - }, - { - "epoch": 13.585919165580183, - "grad_norm": 1.3871091604232788, - "learning_rate": 7.432864321608041e-05, - "loss": 5.7379, - "step": 26051 - }, - { - "epoch": 13.586440677966102, - "grad_norm": 1.3512885570526123, - "learning_rate": 7.432763819095478e-05, - "loss": 5.5, - "step": 26052 - }, - { - "epoch": 13.58696219035202, - "grad_norm": 1.5669827461242676, - "learning_rate": 7.432663316582915e-05, - "loss": 4.1664, - "step": 26053 - }, - { - "epoch": 13.58748370273794, - "grad_norm": 1.47391676902771, - "learning_rate": 7.432562814070352e-05, - "loss": 5.4363, - "step": 26054 - }, - { - "epoch": 13.588005215123859, - "grad_norm": 1.4235639572143555, - "learning_rate": 7.43246231155779e-05, - "loss": 5.6182, - "step": 26055 - }, - { - "epoch": 13.588526727509779, - "grad_norm": 1.494527816772461, - "learning_rate": 7.432361809045226e-05, - "loss": 5.057, - "step": 26056 - }, - { - "epoch": 13.589048239895698, - "grad_norm": 1.597481608390808, - "learning_rate": 7.432261306532664e-05, - "loss": 4.9702, - "step": 26057 - }, - { - "epoch": 13.589569752281617, - "grad_norm": 1.4714155197143555, - "learning_rate": 7.4321608040201e-05, - "loss": 5.7136, - "step": 26058 - }, - { - "epoch": 13.590091264667535, - "grad_norm": 1.390350103378296, - "learning_rate": 7.432060301507538e-05, - "loss": 5.7085, - "step": 26059 - }, - { - "epoch": 13.590612777053455, - "grad_norm": 1.5258535146713257, - "learning_rate": 7.431959798994976e-05, - "loss": 5.2635, - "step": 26060 - }, - { - "epoch": 13.591134289439374, - "grad_norm": 1.3271933794021606, - "learning_rate": 7.431859296482414e-05, - "loss": 5.3009, - "step": 26061 - }, - { - "epoch": 13.591655801825294, - "grad_norm": 1.4481569528579712, - "learning_rate": 7.43175879396985e-05, - "loss": 5.2012, - "step": 26062 - }, - { - "epoch": 13.592177314211213, - "grad_norm": 1.433272361755371, - "learning_rate": 7.431658291457286e-05, - "loss": 4.9715, - "step": 26063 - }, - { - "epoch": 13.592698826597132, - "grad_norm": 1.515386939048767, - "learning_rate": 7.431557788944724e-05, - "loss": 5.1879, - "step": 26064 - }, - { - "epoch": 13.59322033898305, - "grad_norm": 1.432094693183899, - "learning_rate": 7.43145728643216e-05, - "loss": 4.9586, - "step": 26065 - }, - { - "epoch": 13.59374185136897, - "grad_norm": 1.38548743724823, - "learning_rate": 7.431356783919598e-05, - "loss": 5.4431, - "step": 26066 - }, - { - "epoch": 13.594263363754889, - "grad_norm": 1.4325021505355835, - "learning_rate": 7.431256281407035e-05, - "loss": 5.446, - "step": 26067 - }, - { - "epoch": 13.594784876140809, - "grad_norm": 1.3506349325180054, - "learning_rate": 7.431155778894473e-05, - "loss": 5.3938, - "step": 26068 - }, - { - "epoch": 13.595306388526728, - "grad_norm": 1.4106082916259766, - "learning_rate": 7.431055276381909e-05, - "loss": 5.2892, - "step": 26069 - }, - { - "epoch": 13.595827900912647, - "grad_norm": 1.4728944301605225, - "learning_rate": 7.430954773869347e-05, - "loss": 5.3158, - "step": 26070 - }, - { - "epoch": 13.596349413298565, - "grad_norm": 1.49068284034729, - "learning_rate": 7.430854271356785e-05, - "loss": 5.3951, - "step": 26071 - }, - { - "epoch": 13.596870925684485, - "grad_norm": 1.4055867195129395, - "learning_rate": 7.430753768844222e-05, - "loss": 5.6645, - "step": 26072 - }, - { - "epoch": 13.597392438070404, - "grad_norm": 1.4547690153121948, - "learning_rate": 7.430653266331659e-05, - "loss": 5.3575, - "step": 26073 - }, - { - "epoch": 13.597913950456324, - "grad_norm": 1.3729548454284668, - "learning_rate": 7.430552763819097e-05, - "loss": 5.6431, - "step": 26074 - }, - { - "epoch": 13.598435462842243, - "grad_norm": 1.4396542310714722, - "learning_rate": 7.430452261306533e-05, - "loss": 5.5288, - "step": 26075 - }, - { - "epoch": 13.598956975228162, - "grad_norm": 1.5782101154327393, - "learning_rate": 7.43035175879397e-05, - "loss": 4.9493, - "step": 26076 - }, - { - "epoch": 13.59947848761408, - "grad_norm": 1.4793769121170044, - "learning_rate": 7.430251256281407e-05, - "loss": 5.0505, - "step": 26077 - }, - { - "epoch": 13.6, - "grad_norm": 1.505983829498291, - "learning_rate": 7.430150753768844e-05, - "loss": 5.585, - "step": 26078 - }, - { - "epoch": 13.600521512385919, - "grad_norm": 1.422743558883667, - "learning_rate": 7.430050251256281e-05, - "loss": 5.7707, - "step": 26079 - }, - { - "epoch": 13.601043024771839, - "grad_norm": 1.7881399393081665, - "learning_rate": 7.429949748743719e-05, - "loss": 4.5366, - "step": 26080 - }, - { - "epoch": 13.601564537157758, - "grad_norm": 1.4123483896255493, - "learning_rate": 7.429849246231157e-05, - "loss": 5.6908, - "step": 26081 - }, - { - "epoch": 13.602086049543676, - "grad_norm": 1.3842480182647705, - "learning_rate": 7.429748743718593e-05, - "loss": 5.6, - "step": 26082 - }, - { - "epoch": 13.602607561929595, - "grad_norm": 1.3630621433258057, - "learning_rate": 7.429648241206031e-05, - "loss": 5.6323, - "step": 26083 - }, - { - "epoch": 13.603129074315515, - "grad_norm": 1.4178394079208374, - "learning_rate": 7.429547738693468e-05, - "loss": 5.668, - "step": 26084 - }, - { - "epoch": 13.603650586701434, - "grad_norm": 1.4774198532104492, - "learning_rate": 7.429447236180905e-05, - "loss": 5.4208, - "step": 26085 - }, - { - "epoch": 13.604172099087354, - "grad_norm": 1.4778708219528198, - "learning_rate": 7.429346733668342e-05, - "loss": 5.2447, - "step": 26086 - }, - { - "epoch": 13.604693611473273, - "grad_norm": 1.4368757009506226, - "learning_rate": 7.42924623115578e-05, - "loss": 5.1727, - "step": 26087 - }, - { - "epoch": 13.605215123859193, - "grad_norm": 1.3840830326080322, - "learning_rate": 7.429145728643216e-05, - "loss": 5.3899, - "step": 26088 - }, - { - "epoch": 13.60573663624511, - "grad_norm": 1.526163935661316, - "learning_rate": 7.429045226130653e-05, - "loss": 5.5228, - "step": 26089 - }, - { - "epoch": 13.60625814863103, - "grad_norm": 1.5301872491836548, - "learning_rate": 7.42894472361809e-05, - "loss": 5.3923, - "step": 26090 - }, - { - "epoch": 13.60677966101695, - "grad_norm": 1.5186803340911865, - "learning_rate": 7.428844221105528e-05, - "loss": 4.9935, - "step": 26091 - }, - { - "epoch": 13.607301173402869, - "grad_norm": 1.5396679639816284, - "learning_rate": 7.428743718592966e-05, - "loss": 5.3114, - "step": 26092 - }, - { - "epoch": 13.607822685788788, - "grad_norm": 1.5623466968536377, - "learning_rate": 7.428643216080402e-05, - "loss": 4.9858, - "step": 26093 - }, - { - "epoch": 13.608344198174706, - "grad_norm": 1.4633643627166748, - "learning_rate": 7.42854271356784e-05, - "loss": 5.2921, - "step": 26094 - }, - { - "epoch": 13.608865710560625, - "grad_norm": 1.4185411930084229, - "learning_rate": 7.428442211055276e-05, - "loss": 5.8455, - "step": 26095 - }, - { - "epoch": 13.609387222946545, - "grad_norm": 1.4201661348342896, - "learning_rate": 7.428341708542714e-05, - "loss": 5.4255, - "step": 26096 - }, - { - "epoch": 13.609908735332464, - "grad_norm": 1.5785168409347534, - "learning_rate": 7.428241206030151e-05, - "loss": 4.7963, - "step": 26097 - }, - { - "epoch": 13.610430247718384, - "grad_norm": 1.4296561479568481, - "learning_rate": 7.428140703517588e-05, - "loss": 5.4951, - "step": 26098 - }, - { - "epoch": 13.610951760104303, - "grad_norm": 1.6388473510742188, - "learning_rate": 7.428040201005025e-05, - "loss": 5.2203, - "step": 26099 - }, - { - "epoch": 13.611473272490223, - "grad_norm": 1.554927945137024, - "learning_rate": 7.427939698492463e-05, - "loss": 5.3799, - "step": 26100 - }, - { - "epoch": 13.61199478487614, - "grad_norm": 1.4610971212387085, - "learning_rate": 7.4278391959799e-05, - "loss": 5.483, - "step": 26101 - }, - { - "epoch": 13.61251629726206, - "grad_norm": 1.4932643175125122, - "learning_rate": 7.427738693467337e-05, - "loss": 5.1398, - "step": 26102 - }, - { - "epoch": 13.61303780964798, - "grad_norm": 1.421945571899414, - "learning_rate": 7.427638190954775e-05, - "loss": 5.5296, - "step": 26103 - }, - { - "epoch": 13.613559322033899, - "grad_norm": 1.4800184965133667, - "learning_rate": 7.427537688442211e-05, - "loss": 5.3602, - "step": 26104 - }, - { - "epoch": 13.614080834419818, - "grad_norm": 1.375724196434021, - "learning_rate": 7.427437185929649e-05, - "loss": 5.6454, - "step": 26105 - }, - { - "epoch": 13.614602346805736, - "grad_norm": 1.3794810771942139, - "learning_rate": 7.427336683417085e-05, - "loss": 5.5726, - "step": 26106 - }, - { - "epoch": 13.615123859191655, - "grad_norm": 1.4910796880722046, - "learning_rate": 7.427236180904523e-05, - "loss": 5.7046, - "step": 26107 - }, - { - "epoch": 13.615645371577575, - "grad_norm": 1.472817301750183, - "learning_rate": 7.42713567839196e-05, - "loss": 5.4161, - "step": 26108 - }, - { - "epoch": 13.616166883963494, - "grad_norm": 1.4683984518051147, - "learning_rate": 7.427035175879397e-05, - "loss": 5.4105, - "step": 26109 - }, - { - "epoch": 13.616688396349414, - "grad_norm": 1.410421371459961, - "learning_rate": 7.426934673366834e-05, - "loss": 5.5677, - "step": 26110 - }, - { - "epoch": 13.617209908735333, - "grad_norm": 1.9965509176254272, - "learning_rate": 7.426834170854272e-05, - "loss": 4.6763, - "step": 26111 - }, - { - "epoch": 13.617731421121253, - "grad_norm": 1.6112295389175415, - "learning_rate": 7.426733668341709e-05, - "loss": 5.2869, - "step": 26112 - }, - { - "epoch": 13.61825293350717, - "grad_norm": 1.4278899431228638, - "learning_rate": 7.426633165829147e-05, - "loss": 5.3888, - "step": 26113 - }, - { - "epoch": 13.61877444589309, - "grad_norm": 1.5451023578643799, - "learning_rate": 7.426532663316584e-05, - "loss": 5.2448, - "step": 26114 - }, - { - "epoch": 13.61929595827901, - "grad_norm": 1.495601773262024, - "learning_rate": 7.42643216080402e-05, - "loss": 5.5686, - "step": 26115 - }, - { - "epoch": 13.619817470664929, - "grad_norm": 1.3815990686416626, - "learning_rate": 7.426331658291458e-05, - "loss": 5.3057, - "step": 26116 - }, - { - "epoch": 13.620338983050848, - "grad_norm": 1.7193833589553833, - "learning_rate": 7.426231155778894e-05, - "loss": 5.1115, - "step": 26117 - }, - { - "epoch": 13.620860495436766, - "grad_norm": 1.385528802871704, - "learning_rate": 7.426130653266332e-05, - "loss": 5.2837, - "step": 26118 - }, - { - "epoch": 13.621382007822685, - "grad_norm": 1.4630603790283203, - "learning_rate": 7.426030150753768e-05, - "loss": 5.4018, - "step": 26119 - }, - { - "epoch": 13.621903520208605, - "grad_norm": 1.4596033096313477, - "learning_rate": 7.425929648241206e-05, - "loss": 5.4073, - "step": 26120 - }, - { - "epoch": 13.622425032594524, - "grad_norm": 1.503363013267517, - "learning_rate": 7.425829145728644e-05, - "loss": 5.5269, - "step": 26121 - }, - { - "epoch": 13.622946544980444, - "grad_norm": 1.3908815383911133, - "learning_rate": 7.425728643216082e-05, - "loss": 5.8433, - "step": 26122 - }, - { - "epoch": 13.623468057366363, - "grad_norm": 1.53335440158844, - "learning_rate": 7.425628140703518e-05, - "loss": 5.0917, - "step": 26123 - }, - { - "epoch": 13.62398956975228, - "grad_norm": 1.3855700492858887, - "learning_rate": 7.425527638190956e-05, - "loss": 5.5589, - "step": 26124 - }, - { - "epoch": 13.6245110821382, - "grad_norm": 1.5878850221633911, - "learning_rate": 7.425427135678392e-05, - "loss": 5.6055, - "step": 26125 - }, - { - "epoch": 13.62503259452412, - "grad_norm": 1.4044032096862793, - "learning_rate": 7.42532663316583e-05, - "loss": 5.7794, - "step": 26126 - }, - { - "epoch": 13.62555410691004, - "grad_norm": 1.4140361547470093, - "learning_rate": 7.425226130653267e-05, - "loss": 5.6732, - "step": 26127 - }, - { - "epoch": 13.626075619295959, - "grad_norm": 1.4245022535324097, - "learning_rate": 7.425125628140703e-05, - "loss": 5.5075, - "step": 26128 - }, - { - "epoch": 13.626597131681878, - "grad_norm": 1.4786078929901123, - "learning_rate": 7.425025125628141e-05, - "loss": 5.1137, - "step": 26129 - }, - { - "epoch": 13.627118644067796, - "grad_norm": 1.474573016166687, - "learning_rate": 7.424924623115577e-05, - "loss": 5.0354, - "step": 26130 - }, - { - "epoch": 13.627640156453715, - "grad_norm": 1.3722200393676758, - "learning_rate": 7.424824120603015e-05, - "loss": 5.5187, - "step": 26131 - }, - { - "epoch": 13.628161668839635, - "grad_norm": 1.5479954481124878, - "learning_rate": 7.424723618090453e-05, - "loss": 5.4685, - "step": 26132 - }, - { - "epoch": 13.628683181225554, - "grad_norm": 1.3789873123168945, - "learning_rate": 7.42462311557789e-05, - "loss": 5.6692, - "step": 26133 - }, - { - "epoch": 13.629204693611474, - "grad_norm": 1.4237197637557983, - "learning_rate": 7.424522613065327e-05, - "loss": 5.4826, - "step": 26134 - }, - { - "epoch": 13.629726205997393, - "grad_norm": 1.6209639310836792, - "learning_rate": 7.424422110552765e-05, - "loss": 5.4992, - "step": 26135 - }, - { - "epoch": 13.63024771838331, - "grad_norm": 1.5788004398345947, - "learning_rate": 7.424321608040201e-05, - "loss": 5.1654, - "step": 26136 - }, - { - "epoch": 13.63076923076923, - "grad_norm": 1.545098900794983, - "learning_rate": 7.424221105527639e-05, - "loss": 5.6515, - "step": 26137 - }, - { - "epoch": 13.63129074315515, - "grad_norm": 1.399854302406311, - "learning_rate": 7.424120603015075e-05, - "loss": 5.2923, - "step": 26138 - }, - { - "epoch": 13.63181225554107, - "grad_norm": 1.5861592292785645, - "learning_rate": 7.424020100502513e-05, - "loss": 5.591, - "step": 26139 - }, - { - "epoch": 13.632333767926989, - "grad_norm": 1.47199547290802, - "learning_rate": 7.42391959798995e-05, - "loss": 5.5389, - "step": 26140 - }, - { - "epoch": 13.632855280312908, - "grad_norm": 1.5486048460006714, - "learning_rate": 7.423819095477387e-05, - "loss": 5.1669, - "step": 26141 - }, - { - "epoch": 13.633376792698826, - "grad_norm": 1.517118215560913, - "learning_rate": 7.423718592964825e-05, - "loss": 5.251, - "step": 26142 - }, - { - "epoch": 13.633898305084745, - "grad_norm": 1.404724359512329, - "learning_rate": 7.423618090452262e-05, - "loss": 5.6496, - "step": 26143 - }, - { - "epoch": 13.634419817470665, - "grad_norm": 1.6063823699951172, - "learning_rate": 7.4235175879397e-05, - "loss": 5.179, - "step": 26144 - }, - { - "epoch": 13.634941329856584, - "grad_norm": 1.5282258987426758, - "learning_rate": 7.423417085427136e-05, - "loss": 5.6728, - "step": 26145 - }, - { - "epoch": 13.635462842242504, - "grad_norm": 1.493125081062317, - "learning_rate": 7.423316582914574e-05, - "loss": 4.722, - "step": 26146 - }, - { - "epoch": 13.635984354628423, - "grad_norm": 1.6052831411361694, - "learning_rate": 7.42321608040201e-05, - "loss": 5.0067, - "step": 26147 - }, - { - "epoch": 13.63650586701434, - "grad_norm": 1.4930953979492188, - "learning_rate": 7.423115577889448e-05, - "loss": 5.623, - "step": 26148 - }, - { - "epoch": 13.63702737940026, - "grad_norm": 1.4622656106948853, - "learning_rate": 7.423015075376884e-05, - "loss": 4.8117, - "step": 26149 - }, - { - "epoch": 13.63754889178618, - "grad_norm": 1.4713860750198364, - "learning_rate": 7.422914572864322e-05, - "loss": 5.5922, - "step": 26150 - }, - { - "epoch": 13.6380704041721, - "grad_norm": 1.4636738300323486, - "learning_rate": 7.422814070351758e-05, - "loss": 5.721, - "step": 26151 - }, - { - "epoch": 13.638591916558019, - "grad_norm": 1.5155296325683594, - "learning_rate": 7.422713567839196e-05, - "loss": 5.3482, - "step": 26152 - }, - { - "epoch": 13.639113428943938, - "grad_norm": 1.4691797494888306, - "learning_rate": 7.422613065326634e-05, - "loss": 5.5538, - "step": 26153 - }, - { - "epoch": 13.639634941329856, - "grad_norm": 1.4502317905426025, - "learning_rate": 7.422512562814072e-05, - "loss": 4.8343, - "step": 26154 - }, - { - "epoch": 13.640156453715775, - "grad_norm": 1.4991663694381714, - "learning_rate": 7.422412060301508e-05, - "loss": 5.5547, - "step": 26155 - }, - { - "epoch": 13.640677966101695, - "grad_norm": 1.488993525505066, - "learning_rate": 7.422311557788945e-05, - "loss": 5.2664, - "step": 26156 - }, - { - "epoch": 13.641199478487614, - "grad_norm": 1.7084804773330688, - "learning_rate": 7.422211055276382e-05, - "loss": 5.4609, - "step": 26157 - }, - { - "epoch": 13.641720990873534, - "grad_norm": 1.445791244506836, - "learning_rate": 7.422110552763819e-05, - "loss": 5.6316, - "step": 26158 - }, - { - "epoch": 13.642242503259453, - "grad_norm": 1.4453641176223755, - "learning_rate": 7.422010050251257e-05, - "loss": 5.5443, - "step": 26159 - }, - { - "epoch": 13.642764015645371, - "grad_norm": 1.5148558616638184, - "learning_rate": 7.421909547738693e-05, - "loss": 4.7972, - "step": 26160 - }, - { - "epoch": 13.64328552803129, - "grad_norm": 2.355069875717163, - "learning_rate": 7.421809045226131e-05, - "loss": 5.2027, - "step": 26161 - }, - { - "epoch": 13.64380704041721, - "grad_norm": 1.4367586374282837, - "learning_rate": 7.421708542713569e-05, - "loss": 5.6728, - "step": 26162 - }, - { - "epoch": 13.64432855280313, - "grad_norm": 1.488897681236267, - "learning_rate": 7.421608040201006e-05, - "loss": 5.3677, - "step": 26163 - }, - { - "epoch": 13.644850065189049, - "grad_norm": 1.4052753448486328, - "learning_rate": 7.421507537688443e-05, - "loss": 5.0378, - "step": 26164 - }, - { - "epoch": 13.645371577574968, - "grad_norm": 1.5003809928894043, - "learning_rate": 7.42140703517588e-05, - "loss": 5.4462, - "step": 26165 - }, - { - "epoch": 13.645893089960886, - "grad_norm": 1.6001914739608765, - "learning_rate": 7.421306532663317e-05, - "loss": 5.2903, - "step": 26166 - }, - { - "epoch": 13.646414602346805, - "grad_norm": 1.5029942989349365, - "learning_rate": 7.421206030150755e-05, - "loss": 5.6414, - "step": 26167 - }, - { - "epoch": 13.646936114732725, - "grad_norm": 1.3701965808868408, - "learning_rate": 7.421105527638191e-05, - "loss": 5.6194, - "step": 26168 - }, - { - "epoch": 13.647457627118644, - "grad_norm": 1.4534550905227661, - "learning_rate": 7.421005025125628e-05, - "loss": 5.0325, - "step": 26169 - }, - { - "epoch": 13.647979139504564, - "grad_norm": 1.2662545442581177, - "learning_rate": 7.420904522613065e-05, - "loss": 5.9199, - "step": 26170 - }, - { - "epoch": 13.648500651890483, - "grad_norm": 1.5127530097961426, - "learning_rate": 7.420804020100502e-05, - "loss": 5.5336, - "step": 26171 - }, - { - "epoch": 13.649022164276401, - "grad_norm": 1.4174138307571411, - "learning_rate": 7.42070351758794e-05, - "loss": 4.9872, - "step": 26172 - }, - { - "epoch": 13.64954367666232, - "grad_norm": 1.5074068307876587, - "learning_rate": 7.420603015075377e-05, - "loss": 5.0552, - "step": 26173 - }, - { - "epoch": 13.65006518904824, - "grad_norm": 1.5188313722610474, - "learning_rate": 7.420502512562815e-05, - "loss": 5.3935, - "step": 26174 - }, - { - "epoch": 13.65058670143416, - "grad_norm": 1.4314706325531006, - "learning_rate": 7.420402010050252e-05, - "loss": 5.5797, - "step": 26175 - }, - { - "epoch": 13.651108213820079, - "grad_norm": 1.4563297033309937, - "learning_rate": 7.42030150753769e-05, - "loss": 5.6362, - "step": 26176 - }, - { - "epoch": 13.651629726205996, - "grad_norm": 1.3818061351776123, - "learning_rate": 7.420201005025126e-05, - "loss": 5.0999, - "step": 26177 - }, - { - "epoch": 13.652151238591916, - "grad_norm": 1.4794371128082275, - "learning_rate": 7.420100502512564e-05, - "loss": 5.4999, - "step": 26178 - }, - { - "epoch": 13.652672750977835, - "grad_norm": 1.4799684286117554, - "learning_rate": 7.42e-05, - "loss": 5.7729, - "step": 26179 - }, - { - "epoch": 13.653194263363755, - "grad_norm": 1.5239866971969604, - "learning_rate": 7.419899497487438e-05, - "loss": 5.3849, - "step": 26180 - }, - { - "epoch": 13.653715775749674, - "grad_norm": 1.4996247291564941, - "learning_rate": 7.419798994974874e-05, - "loss": 5.2522, - "step": 26181 - }, - { - "epoch": 13.654237288135594, - "grad_norm": 1.409036636352539, - "learning_rate": 7.419698492462311e-05, - "loss": 4.8579, - "step": 26182 - }, - { - "epoch": 13.654758800521513, - "grad_norm": 1.4676793813705444, - "learning_rate": 7.419597989949749e-05, - "loss": 5.4014, - "step": 26183 - }, - { - "epoch": 13.655280312907431, - "grad_norm": 1.446311354637146, - "learning_rate": 7.419497487437186e-05, - "loss": 5.2416, - "step": 26184 - }, - { - "epoch": 13.65580182529335, - "grad_norm": 1.4883307218551636, - "learning_rate": 7.419396984924624e-05, - "loss": 5.8059, - "step": 26185 - }, - { - "epoch": 13.65632333767927, - "grad_norm": 1.6310043334960938, - "learning_rate": 7.41929648241206e-05, - "loss": 5.1787, - "step": 26186 - }, - { - "epoch": 13.65684485006519, - "grad_norm": 1.4577267169952393, - "learning_rate": 7.419195979899498e-05, - "loss": 5.2385, - "step": 26187 - }, - { - "epoch": 13.657366362451109, - "grad_norm": 1.6027939319610596, - "learning_rate": 7.419095477386935e-05, - "loss": 4.9974, - "step": 26188 - }, - { - "epoch": 13.657887874837026, - "grad_norm": 1.5702874660491943, - "learning_rate": 7.418994974874373e-05, - "loss": 5.5085, - "step": 26189 - }, - { - "epoch": 13.658409387222946, - "grad_norm": 1.4723410606384277, - "learning_rate": 7.418894472361809e-05, - "loss": 5.5255, - "step": 26190 - }, - { - "epoch": 13.658930899608865, - "grad_norm": 1.5120006799697876, - "learning_rate": 7.418793969849247e-05, - "loss": 5.8529, - "step": 26191 - }, - { - "epoch": 13.659452411994785, - "grad_norm": 1.392195224761963, - "learning_rate": 7.418693467336683e-05, - "loss": 5.5675, - "step": 26192 - }, - { - "epoch": 13.659973924380704, - "grad_norm": 1.4808648824691772, - "learning_rate": 7.418592964824121e-05, - "loss": 5.7374, - "step": 26193 - }, - { - "epoch": 13.660495436766624, - "grad_norm": 1.4854035377502441, - "learning_rate": 7.418492462311559e-05, - "loss": 5.336, - "step": 26194 - }, - { - "epoch": 13.661016949152543, - "grad_norm": 1.506764531135559, - "learning_rate": 7.418391959798995e-05, - "loss": 5.5313, - "step": 26195 - }, - { - "epoch": 13.661538461538461, - "grad_norm": 1.4673322439193726, - "learning_rate": 7.418291457286433e-05, - "loss": 5.5385, - "step": 26196 - }, - { - "epoch": 13.66205997392438, - "grad_norm": 1.5096347332000732, - "learning_rate": 7.41819095477387e-05, - "loss": 5.4562, - "step": 26197 - }, - { - "epoch": 13.6625814863103, - "grad_norm": 2.107394218444824, - "learning_rate": 7.418090452261307e-05, - "loss": 5.4657, - "step": 26198 - }, - { - "epoch": 13.66310299869622, - "grad_norm": 1.4587910175323486, - "learning_rate": 7.417989949748744e-05, - "loss": 5.5091, - "step": 26199 - }, - { - "epoch": 13.663624511082139, - "grad_norm": 1.4861432313919067, - "learning_rate": 7.417889447236181e-05, - "loss": 5.5744, - "step": 26200 - }, - { - "epoch": 13.664146023468057, - "grad_norm": 1.5024362802505493, - "learning_rate": 7.417788944723618e-05, - "loss": 5.3625, - "step": 26201 - }, - { - "epoch": 13.664667535853976, - "grad_norm": 1.5409218072891235, - "learning_rate": 7.417688442211056e-05, - "loss": 5.3109, - "step": 26202 - }, - { - "epoch": 13.665189048239895, - "grad_norm": 1.5802841186523438, - "learning_rate": 7.417587939698492e-05, - "loss": 5.2909, - "step": 26203 - }, - { - "epoch": 13.665710560625815, - "grad_norm": 1.5415915250778198, - "learning_rate": 7.41748743718593e-05, - "loss": 5.3321, - "step": 26204 - }, - { - "epoch": 13.666232073011734, - "grad_norm": 1.3742016553878784, - "learning_rate": 7.417386934673368e-05, - "loss": 5.4723, - "step": 26205 - }, - { - "epoch": 13.666753585397654, - "grad_norm": 1.4965910911560059, - "learning_rate": 7.417286432160805e-05, - "loss": 5.5317, - "step": 26206 - }, - { - "epoch": 13.667275097783573, - "grad_norm": 1.4204915761947632, - "learning_rate": 7.417185929648242e-05, - "loss": 5.0331, - "step": 26207 - }, - { - "epoch": 13.667796610169491, - "grad_norm": 1.4878114461898804, - "learning_rate": 7.417085427135678e-05, - "loss": 5.1348, - "step": 26208 - }, - { - "epoch": 13.66831812255541, - "grad_norm": 1.4559324979782104, - "learning_rate": 7.416984924623116e-05, - "loss": 4.9563, - "step": 26209 - }, - { - "epoch": 13.66883963494133, - "grad_norm": 1.5609060525894165, - "learning_rate": 7.416884422110552e-05, - "loss": 4.9849, - "step": 26210 - }, - { - "epoch": 13.66936114732725, - "grad_norm": 1.381268858909607, - "learning_rate": 7.41678391959799e-05, - "loss": 5.6484, - "step": 26211 - }, - { - "epoch": 13.669882659713169, - "grad_norm": 1.3492250442504883, - "learning_rate": 7.416683417085427e-05, - "loss": 5.5157, - "step": 26212 - }, - { - "epoch": 13.670404172099087, - "grad_norm": 1.4626526832580566, - "learning_rate": 7.416582914572864e-05, - "loss": 5.5079, - "step": 26213 - }, - { - "epoch": 13.670925684485006, - "grad_norm": 1.409006118774414, - "learning_rate": 7.416482412060302e-05, - "loss": 5.2888, - "step": 26214 - }, - { - "epoch": 13.671447196870925, - "grad_norm": 1.436029076576233, - "learning_rate": 7.41638190954774e-05, - "loss": 4.97, - "step": 26215 - }, - { - "epoch": 13.671968709256845, - "grad_norm": 1.354110598564148, - "learning_rate": 7.416281407035176e-05, - "loss": 5.7618, - "step": 26216 - }, - { - "epoch": 13.672490221642764, - "grad_norm": 1.5244249105453491, - "learning_rate": 7.416180904522614e-05, - "loss": 5.1877, - "step": 26217 - }, - { - "epoch": 13.673011734028684, - "grad_norm": 1.4458073377609253, - "learning_rate": 7.41608040201005e-05, - "loss": 5.3287, - "step": 26218 - }, - { - "epoch": 13.673533246414602, - "grad_norm": 1.4134246110916138, - "learning_rate": 7.415979899497488e-05, - "loss": 5.6795, - "step": 26219 - }, - { - "epoch": 13.674054758800521, - "grad_norm": 1.4877550601959229, - "learning_rate": 7.415879396984925e-05, - "loss": 5.4524, - "step": 26220 - }, - { - "epoch": 13.67457627118644, - "grad_norm": 1.5454896688461304, - "learning_rate": 7.415778894472361e-05, - "loss": 5.106, - "step": 26221 - }, - { - "epoch": 13.67509778357236, - "grad_norm": 1.449265956878662, - "learning_rate": 7.415678391959799e-05, - "loss": 5.4544, - "step": 26222 - }, - { - "epoch": 13.67561929595828, - "grad_norm": 1.4541906118392944, - "learning_rate": 7.415577889447235e-05, - "loss": 5.3673, - "step": 26223 - }, - { - "epoch": 13.676140808344199, - "grad_norm": 1.4044163227081299, - "learning_rate": 7.415477386934673e-05, - "loss": 5.5651, - "step": 26224 - }, - { - "epoch": 13.676662320730117, - "grad_norm": 1.3367114067077637, - "learning_rate": 7.415376884422111e-05, - "loss": 5.7251, - "step": 26225 - }, - { - "epoch": 13.677183833116036, - "grad_norm": 1.6734681129455566, - "learning_rate": 7.415276381909549e-05, - "loss": 4.5713, - "step": 26226 - }, - { - "epoch": 13.677705345501955, - "grad_norm": 1.4872862100601196, - "learning_rate": 7.415175879396985e-05, - "loss": 5.311, - "step": 26227 - }, - { - "epoch": 13.678226857887875, - "grad_norm": 1.4825828075408936, - "learning_rate": 7.415075376884423e-05, - "loss": 5.3993, - "step": 26228 - }, - { - "epoch": 13.678748370273794, - "grad_norm": 1.575156807899475, - "learning_rate": 7.41497487437186e-05, - "loss": 4.687, - "step": 26229 - }, - { - "epoch": 13.679269882659714, - "grad_norm": 1.4130020141601562, - "learning_rate": 7.414874371859297e-05, - "loss": 5.4552, - "step": 26230 - }, - { - "epoch": 13.679791395045632, - "grad_norm": 1.438066840171814, - "learning_rate": 7.414773869346734e-05, - "loss": 5.4389, - "step": 26231 - }, - { - "epoch": 13.680312907431551, - "grad_norm": 1.4070160388946533, - "learning_rate": 7.414673366834171e-05, - "loss": 5.9027, - "step": 26232 - }, - { - "epoch": 13.68083441981747, - "grad_norm": 1.4406101703643799, - "learning_rate": 7.414572864321608e-05, - "loss": 5.5522, - "step": 26233 - }, - { - "epoch": 13.68135593220339, - "grad_norm": 1.650753140449524, - "learning_rate": 7.414472361809046e-05, - "loss": 5.3567, - "step": 26234 - }, - { - "epoch": 13.68187744458931, - "grad_norm": 1.3738634586334229, - "learning_rate": 7.414371859296483e-05, - "loss": 5.5649, - "step": 26235 - }, - { - "epoch": 13.682398956975229, - "grad_norm": 1.468075156211853, - "learning_rate": 7.41427135678392e-05, - "loss": 5.1266, - "step": 26236 - }, - { - "epoch": 13.682920469361147, - "grad_norm": 1.5536202192306519, - "learning_rate": 7.414170854271358e-05, - "loss": 5.3822, - "step": 26237 - }, - { - "epoch": 13.683441981747066, - "grad_norm": 1.4811030626296997, - "learning_rate": 7.414070351758794e-05, - "loss": 5.5061, - "step": 26238 - }, - { - "epoch": 13.683963494132986, - "grad_norm": 1.5883451700210571, - "learning_rate": 7.413969849246232e-05, - "loss": 5.4024, - "step": 26239 - }, - { - "epoch": 13.684485006518905, - "grad_norm": 1.5401113033294678, - "learning_rate": 7.413869346733668e-05, - "loss": 5.5374, - "step": 26240 - }, - { - "epoch": 13.685006518904824, - "grad_norm": 1.4195226430892944, - "learning_rate": 7.413768844221106e-05, - "loss": 4.774, - "step": 26241 - }, - { - "epoch": 13.685528031290744, - "grad_norm": 1.4024386405944824, - "learning_rate": 7.413668341708542e-05, - "loss": 5.3215, - "step": 26242 - }, - { - "epoch": 13.686049543676662, - "grad_norm": 1.4251238107681274, - "learning_rate": 7.41356783919598e-05, - "loss": 5.3823, - "step": 26243 - }, - { - "epoch": 13.686571056062581, - "grad_norm": 1.4096269607543945, - "learning_rate": 7.413467336683417e-05, - "loss": 5.3796, - "step": 26244 - }, - { - "epoch": 13.6870925684485, - "grad_norm": 1.420304536819458, - "learning_rate": 7.413366834170854e-05, - "loss": 5.4005, - "step": 26245 - }, - { - "epoch": 13.68761408083442, - "grad_norm": 1.496395230293274, - "learning_rate": 7.413266331658292e-05, - "loss": 4.9784, - "step": 26246 - }, - { - "epoch": 13.68813559322034, - "grad_norm": 1.479042887687683, - "learning_rate": 7.41316582914573e-05, - "loss": 5.2394, - "step": 26247 - }, - { - "epoch": 13.688657105606259, - "grad_norm": 1.4018672704696655, - "learning_rate": 7.413065326633166e-05, - "loss": 5.3966, - "step": 26248 - }, - { - "epoch": 13.689178617992177, - "grad_norm": 1.4291772842407227, - "learning_rate": 7.412964824120603e-05, - "loss": 5.3348, - "step": 26249 - }, - { - "epoch": 13.689700130378096, - "grad_norm": 1.5117086172103882, - "learning_rate": 7.41286432160804e-05, - "loss": 5.3186, - "step": 26250 - }, - { - "epoch": 13.690221642764016, - "grad_norm": 1.394789695739746, - "learning_rate": 7.412763819095477e-05, - "loss": 5.7351, - "step": 26251 - }, - { - "epoch": 13.690743155149935, - "grad_norm": 1.51079523563385, - "learning_rate": 7.412663316582915e-05, - "loss": 5.4097, - "step": 26252 - }, - { - "epoch": 13.691264667535854, - "grad_norm": 1.3716630935668945, - "learning_rate": 7.412562814070351e-05, - "loss": 5.4202, - "step": 26253 - }, - { - "epoch": 13.691786179921774, - "grad_norm": 1.5489226579666138, - "learning_rate": 7.412462311557789e-05, - "loss": 4.93, - "step": 26254 - }, - { - "epoch": 13.692307692307692, - "grad_norm": 1.4554541110992432, - "learning_rate": 7.412361809045227e-05, - "loss": 5.4049, - "step": 26255 - }, - { - "epoch": 13.692829204693611, - "grad_norm": 1.4946415424346924, - "learning_rate": 7.412261306532665e-05, - "loss": 5.0249, - "step": 26256 - }, - { - "epoch": 13.69335071707953, - "grad_norm": 1.3879883289337158, - "learning_rate": 7.412160804020101e-05, - "loss": 5.4705, - "step": 26257 - }, - { - "epoch": 13.69387222946545, - "grad_norm": 1.5199731588363647, - "learning_rate": 7.412060301507539e-05, - "loss": 5.1561, - "step": 26258 - }, - { - "epoch": 13.69439374185137, - "grad_norm": 1.4834749698638916, - "learning_rate": 7.411959798994975e-05, - "loss": 5.2967, - "step": 26259 - }, - { - "epoch": 13.694915254237289, - "grad_norm": 1.4142404794692993, - "learning_rate": 7.411859296482413e-05, - "loss": 5.4138, - "step": 26260 - }, - { - "epoch": 13.695436766623207, - "grad_norm": 1.4084123373031616, - "learning_rate": 7.41175879396985e-05, - "loss": 5.7512, - "step": 26261 - }, - { - "epoch": 13.695958279009126, - "grad_norm": 1.4060670137405396, - "learning_rate": 7.411658291457286e-05, - "loss": 5.306, - "step": 26262 - }, - { - "epoch": 13.696479791395046, - "grad_norm": 1.3916294574737549, - "learning_rate": 7.411557788944724e-05, - "loss": 5.449, - "step": 26263 - }, - { - "epoch": 13.697001303780965, - "grad_norm": 1.3441957235336304, - "learning_rate": 7.41145728643216e-05, - "loss": 5.5937, - "step": 26264 - }, - { - "epoch": 13.697522816166884, - "grad_norm": 1.3253542184829712, - "learning_rate": 7.411356783919598e-05, - "loss": 5.8398, - "step": 26265 - }, - { - "epoch": 13.698044328552804, - "grad_norm": 1.4485461711883545, - "learning_rate": 7.411256281407036e-05, - "loss": 5.5493, - "step": 26266 - }, - { - "epoch": 13.698565840938722, - "grad_norm": 1.3540000915527344, - "learning_rate": 7.411155778894473e-05, - "loss": 5.8116, - "step": 26267 - }, - { - "epoch": 13.699087353324641, - "grad_norm": 1.4409502744674683, - "learning_rate": 7.41105527638191e-05, - "loss": 5.6855, - "step": 26268 - }, - { - "epoch": 13.69960886571056, - "grad_norm": 1.4635642766952515, - "learning_rate": 7.410954773869348e-05, - "loss": 5.6311, - "step": 26269 - }, - { - "epoch": 13.70013037809648, - "grad_norm": 1.4778673648834229, - "learning_rate": 7.410854271356784e-05, - "loss": 5.1935, - "step": 26270 - }, - { - "epoch": 13.7006518904824, - "grad_norm": 1.6008974313735962, - "learning_rate": 7.410753768844222e-05, - "loss": 5.1809, - "step": 26271 - }, - { - "epoch": 13.701173402868317, - "grad_norm": 1.5531880855560303, - "learning_rate": 7.410653266331658e-05, - "loss": 5.0368, - "step": 26272 - }, - { - "epoch": 13.701694915254237, - "grad_norm": 1.4938340187072754, - "learning_rate": 7.410552763819096e-05, - "loss": 5.2948, - "step": 26273 - }, - { - "epoch": 13.702216427640156, - "grad_norm": 1.3586103916168213, - "learning_rate": 7.410452261306533e-05, - "loss": 5.2642, - "step": 26274 - }, - { - "epoch": 13.702737940026076, - "grad_norm": 1.7340881824493408, - "learning_rate": 7.41035175879397e-05, - "loss": 4.6779, - "step": 26275 - }, - { - "epoch": 13.703259452411995, - "grad_norm": 1.3740848302841187, - "learning_rate": 7.410251256281408e-05, - "loss": 5.7208, - "step": 26276 - }, - { - "epoch": 13.703780964797915, - "grad_norm": 1.4257525205612183, - "learning_rate": 7.410150753768845e-05, - "loss": 5.655, - "step": 26277 - }, - { - "epoch": 13.704302477183834, - "grad_norm": 1.3289958238601685, - "learning_rate": 7.410050251256282e-05, - "loss": 5.0164, - "step": 26278 - }, - { - "epoch": 13.704823989569752, - "grad_norm": 1.3960164785385132, - "learning_rate": 7.409949748743719e-05, - "loss": 5.2162, - "step": 26279 - }, - { - "epoch": 13.705345501955671, - "grad_norm": 1.5484191179275513, - "learning_rate": 7.409849246231157e-05, - "loss": 5.1099, - "step": 26280 - }, - { - "epoch": 13.70586701434159, - "grad_norm": 1.546349048614502, - "learning_rate": 7.409748743718593e-05, - "loss": 5.1393, - "step": 26281 - }, - { - "epoch": 13.70638852672751, - "grad_norm": 1.3247578144073486, - "learning_rate": 7.409648241206031e-05, - "loss": 5.8952, - "step": 26282 - }, - { - "epoch": 13.70691003911343, - "grad_norm": 1.3990881443023682, - "learning_rate": 7.409547738693467e-05, - "loss": 5.4744, - "step": 26283 - }, - { - "epoch": 13.707431551499347, - "grad_norm": 1.5416021347045898, - "learning_rate": 7.409447236180905e-05, - "loss": 5.4675, - "step": 26284 - }, - { - "epoch": 13.707953063885267, - "grad_norm": 1.3134530782699585, - "learning_rate": 7.409346733668341e-05, - "loss": 5.6475, - "step": 26285 - }, - { - "epoch": 13.708474576271186, - "grad_norm": 1.3997832536697388, - "learning_rate": 7.409246231155779e-05, - "loss": 5.3321, - "step": 26286 - }, - { - "epoch": 13.708996088657106, - "grad_norm": 1.5522408485412598, - "learning_rate": 7.409145728643217e-05, - "loss": 5.0478, - "step": 26287 - }, - { - "epoch": 13.709517601043025, - "grad_norm": 1.4679230451583862, - "learning_rate": 7.409045226130653e-05, - "loss": 5.3113, - "step": 26288 - }, - { - "epoch": 13.710039113428945, - "grad_norm": 1.5354830026626587, - "learning_rate": 7.408944723618091e-05, - "loss": 5.533, - "step": 26289 - }, - { - "epoch": 13.710560625814864, - "grad_norm": 1.5053530931472778, - "learning_rate": 7.408844221105528e-05, - "loss": 4.8804, - "step": 26290 - }, - { - "epoch": 13.711082138200782, - "grad_norm": 1.4091129302978516, - "learning_rate": 7.408743718592965e-05, - "loss": 5.7551, - "step": 26291 - }, - { - "epoch": 13.711603650586701, - "grad_norm": 1.3390133380889893, - "learning_rate": 7.408643216080402e-05, - "loss": 4.7371, - "step": 26292 - }, - { - "epoch": 13.71212516297262, - "grad_norm": 1.6122639179229736, - "learning_rate": 7.40854271356784e-05, - "loss": 5.1205, - "step": 26293 - }, - { - "epoch": 13.71264667535854, - "grad_norm": 1.4704855680465698, - "learning_rate": 7.408442211055276e-05, - "loss": 5.6712, - "step": 26294 - }, - { - "epoch": 13.71316818774446, - "grad_norm": 1.3971049785614014, - "learning_rate": 7.408341708542714e-05, - "loss": 5.6084, - "step": 26295 - }, - { - "epoch": 13.713689700130377, - "grad_norm": 1.5945837497711182, - "learning_rate": 7.408241206030152e-05, - "loss": 5.0501, - "step": 26296 - }, - { - "epoch": 13.714211212516297, - "grad_norm": 1.5383433103561401, - "learning_rate": 7.40814070351759e-05, - "loss": 5.4105, - "step": 26297 - }, - { - "epoch": 13.714732724902216, - "grad_norm": 1.547374963760376, - "learning_rate": 7.408040201005026e-05, - "loss": 5.2056, - "step": 26298 - }, - { - "epoch": 13.715254237288136, - "grad_norm": 1.5434465408325195, - "learning_rate": 7.407939698492464e-05, - "loss": 5.1946, - "step": 26299 - }, - { - "epoch": 13.715775749674055, - "grad_norm": 1.4849501848220825, - "learning_rate": 7.4078391959799e-05, - "loss": 5.2289, - "step": 26300 - }, - { - "epoch": 13.716297262059975, - "grad_norm": 1.3837039470672607, - "learning_rate": 7.407738693467336e-05, - "loss": 5.8061, - "step": 26301 - }, - { - "epoch": 13.716818774445892, - "grad_norm": 1.6627229452133179, - "learning_rate": 7.407638190954774e-05, - "loss": 5.1194, - "step": 26302 - }, - { - "epoch": 13.717340286831812, - "grad_norm": 1.4262263774871826, - "learning_rate": 7.40753768844221e-05, - "loss": 5.9162, - "step": 26303 - }, - { - "epoch": 13.717861799217731, - "grad_norm": 1.4253407716751099, - "learning_rate": 7.407437185929648e-05, - "loss": 5.621, - "step": 26304 - }, - { - "epoch": 13.71838331160365, - "grad_norm": 1.5102624893188477, - "learning_rate": 7.407336683417085e-05, - "loss": 5.3633, - "step": 26305 - }, - { - "epoch": 13.71890482398957, - "grad_norm": 1.5098985433578491, - "learning_rate": 7.407236180904523e-05, - "loss": 5.3244, - "step": 26306 - }, - { - "epoch": 13.71942633637549, - "grad_norm": 1.4127471446990967, - "learning_rate": 7.40713567839196e-05, - "loss": 5.6632, - "step": 26307 - }, - { - "epoch": 13.719947848761407, - "grad_norm": 1.5025441646575928, - "learning_rate": 7.407035175879398e-05, - "loss": 5.4477, - "step": 26308 - }, - { - "epoch": 13.720469361147327, - "grad_norm": 1.4731711149215698, - "learning_rate": 7.406934673366835e-05, - "loss": 5.2424, - "step": 26309 - }, - { - "epoch": 13.720990873533246, - "grad_norm": 1.407908320426941, - "learning_rate": 7.406834170854272e-05, - "loss": 5.6632, - "step": 26310 - }, - { - "epoch": 13.721512385919166, - "grad_norm": 1.570426344871521, - "learning_rate": 7.406733668341709e-05, - "loss": 5.2882, - "step": 26311 - }, - { - "epoch": 13.722033898305085, - "grad_norm": 1.528321385383606, - "learning_rate": 7.406633165829147e-05, - "loss": 4.7139, - "step": 26312 - }, - { - "epoch": 13.722555410691005, - "grad_norm": 1.3895516395568848, - "learning_rate": 7.406532663316583e-05, - "loss": 5.5204, - "step": 26313 - }, - { - "epoch": 13.723076923076922, - "grad_norm": 1.5471476316452026, - "learning_rate": 7.40643216080402e-05, - "loss": 5.0588, - "step": 26314 - }, - { - "epoch": 13.723598435462842, - "grad_norm": 1.5119361877441406, - "learning_rate": 7.406331658291457e-05, - "loss": 5.358, - "step": 26315 - }, - { - "epoch": 13.724119947848761, - "grad_norm": 1.4902794361114502, - "learning_rate": 7.406231155778895e-05, - "loss": 4.9237, - "step": 26316 - }, - { - "epoch": 13.72464146023468, - "grad_norm": 1.342659831047058, - "learning_rate": 7.406130653266333e-05, - "loss": 5.852, - "step": 26317 - }, - { - "epoch": 13.7251629726206, - "grad_norm": 1.7578521966934204, - "learning_rate": 7.406030150753769e-05, - "loss": 5.4874, - "step": 26318 - }, - { - "epoch": 13.72568448500652, - "grad_norm": 1.5006173849105835, - "learning_rate": 7.405929648241207e-05, - "loss": 5.4944, - "step": 26319 - }, - { - "epoch": 13.726205997392437, - "grad_norm": 1.4205178022384644, - "learning_rate": 7.405829145728643e-05, - "loss": 5.5384, - "step": 26320 - }, - { - "epoch": 13.726727509778357, - "grad_norm": 1.4477325677871704, - "learning_rate": 7.405728643216081e-05, - "loss": 5.6302, - "step": 26321 - }, - { - "epoch": 13.727249022164276, - "grad_norm": 1.4187220335006714, - "learning_rate": 7.405628140703518e-05, - "loss": 5.484, - "step": 26322 - }, - { - "epoch": 13.727770534550196, - "grad_norm": 1.4639639854431152, - "learning_rate": 7.405527638190955e-05, - "loss": 5.4063, - "step": 26323 - }, - { - "epoch": 13.728292046936115, - "grad_norm": 1.4644054174423218, - "learning_rate": 7.405427135678392e-05, - "loss": 5.2331, - "step": 26324 - }, - { - "epoch": 13.728813559322035, - "grad_norm": 1.5963212251663208, - "learning_rate": 7.40532663316583e-05, - "loss": 4.912, - "step": 26325 - }, - { - "epoch": 13.729335071707952, - "grad_norm": 1.501006007194519, - "learning_rate": 7.405226130653266e-05, - "loss": 5.7069, - "step": 26326 - }, - { - "epoch": 13.729856584093872, - "grad_norm": 1.4457000494003296, - "learning_rate": 7.405125628140704e-05, - "loss": 5.2542, - "step": 26327 - }, - { - "epoch": 13.730378096479791, - "grad_norm": 1.4152408838272095, - "learning_rate": 7.405025125628142e-05, - "loss": 4.7322, - "step": 26328 - }, - { - "epoch": 13.73089960886571, - "grad_norm": 1.4741460084915161, - "learning_rate": 7.404924623115578e-05, - "loss": 5.5146, - "step": 26329 - }, - { - "epoch": 13.73142112125163, - "grad_norm": 1.4437048435211182, - "learning_rate": 7.404824120603016e-05, - "loss": 5.4486, - "step": 26330 - }, - { - "epoch": 13.73194263363755, - "grad_norm": 1.5044375658035278, - "learning_rate": 7.404723618090452e-05, - "loss": 5.2145, - "step": 26331 - }, - { - "epoch": 13.732464146023467, - "grad_norm": 1.4556844234466553, - "learning_rate": 7.40462311557789e-05, - "loss": 5.5573, - "step": 26332 - }, - { - "epoch": 13.732985658409387, - "grad_norm": 1.431643009185791, - "learning_rate": 7.404522613065327e-05, - "loss": 5.5591, - "step": 26333 - }, - { - "epoch": 13.733507170795306, - "grad_norm": 1.5585272312164307, - "learning_rate": 7.404422110552764e-05, - "loss": 4.5254, - "step": 26334 - }, - { - "epoch": 13.734028683181226, - "grad_norm": 1.3811918497085571, - "learning_rate": 7.404321608040201e-05, - "loss": 5.8452, - "step": 26335 - }, - { - "epoch": 13.734550195567145, - "grad_norm": 1.4615755081176758, - "learning_rate": 7.404221105527638e-05, - "loss": 5.1662, - "step": 26336 - }, - { - "epoch": 13.735071707953065, - "grad_norm": 1.423498511314392, - "learning_rate": 7.404120603015075e-05, - "loss": 5.5802, - "step": 26337 - }, - { - "epoch": 13.735593220338982, - "grad_norm": 1.4805662631988525, - "learning_rate": 7.404020100502513e-05, - "loss": 5.4495, - "step": 26338 - }, - { - "epoch": 13.736114732724902, - "grad_norm": 1.4652297496795654, - "learning_rate": 7.40391959798995e-05, - "loss": 5.3078, - "step": 26339 - }, - { - "epoch": 13.736636245110821, - "grad_norm": 1.468070387840271, - "learning_rate": 7.403819095477388e-05, - "loss": 5.3727, - "step": 26340 - }, - { - "epoch": 13.73715775749674, - "grad_norm": 1.5637736320495605, - "learning_rate": 7.403718592964825e-05, - "loss": 5.5458, - "step": 26341 - }, - { - "epoch": 13.73767926988266, - "grad_norm": 1.4767038822174072, - "learning_rate": 7.403618090452261e-05, - "loss": 5.5236, - "step": 26342 - }, - { - "epoch": 13.73820078226858, - "grad_norm": 1.3895175457000732, - "learning_rate": 7.403517587939699e-05, - "loss": 5.8886, - "step": 26343 - }, - { - "epoch": 13.738722294654497, - "grad_norm": 1.5004115104675293, - "learning_rate": 7.403417085427135e-05, - "loss": 5.6567, - "step": 26344 - }, - { - "epoch": 13.739243807040417, - "grad_norm": 1.5777119398117065, - "learning_rate": 7.403316582914573e-05, - "loss": 5.3782, - "step": 26345 - }, - { - "epoch": 13.739765319426336, - "grad_norm": 1.526443362236023, - "learning_rate": 7.40321608040201e-05, - "loss": 5.1267, - "step": 26346 - }, - { - "epoch": 13.740286831812256, - "grad_norm": 1.6024081707000732, - "learning_rate": 7.403115577889447e-05, - "loss": 5.3713, - "step": 26347 - }, - { - "epoch": 13.740808344198175, - "grad_norm": 1.530344843864441, - "learning_rate": 7.403015075376885e-05, - "loss": 5.265, - "step": 26348 - }, - { - "epoch": 13.741329856584095, - "grad_norm": 1.4765715599060059, - "learning_rate": 7.402914572864323e-05, - "loss": 5.3534, - "step": 26349 - }, - { - "epoch": 13.741851368970012, - "grad_norm": 1.507709264755249, - "learning_rate": 7.40281407035176e-05, - "loss": 5.2719, - "step": 26350 - }, - { - "epoch": 13.742372881355932, - "grad_norm": 1.5508559942245483, - "learning_rate": 7.402713567839197e-05, - "loss": 5.2962, - "step": 26351 - }, - { - "epoch": 13.742894393741851, - "grad_norm": 1.524999976158142, - "learning_rate": 7.402613065326634e-05, - "loss": 5.2288, - "step": 26352 - }, - { - "epoch": 13.74341590612777, - "grad_norm": 1.482681393623352, - "learning_rate": 7.402512562814071e-05, - "loss": 5.3317, - "step": 26353 - }, - { - "epoch": 13.74393741851369, - "grad_norm": 1.5423485040664673, - "learning_rate": 7.402412060301508e-05, - "loss": 5.2761, - "step": 26354 - }, - { - "epoch": 13.74445893089961, - "grad_norm": 1.5471887588500977, - "learning_rate": 7.402311557788944e-05, - "loss": 5.6069, - "step": 26355 - }, - { - "epoch": 13.744980443285527, - "grad_norm": 1.646363615989685, - "learning_rate": 7.402211055276382e-05, - "loss": 4.9835, - "step": 26356 - }, - { - "epoch": 13.745501955671447, - "grad_norm": 1.4680784940719604, - "learning_rate": 7.402110552763818e-05, - "loss": 5.463, - "step": 26357 - }, - { - "epoch": 13.746023468057366, - "grad_norm": 1.5570931434631348, - "learning_rate": 7.402010050251256e-05, - "loss": 5.247, - "step": 26358 - }, - { - "epoch": 13.746544980443286, - "grad_norm": 1.4824610948562622, - "learning_rate": 7.401909547738694e-05, - "loss": 5.716, - "step": 26359 - }, - { - "epoch": 13.747066492829205, - "grad_norm": 1.5080955028533936, - "learning_rate": 7.401809045226132e-05, - "loss": 5.1804, - "step": 26360 - }, - { - "epoch": 13.747588005215125, - "grad_norm": 1.4445420503616333, - "learning_rate": 7.401708542713568e-05, - "loss": 4.6785, - "step": 26361 - }, - { - "epoch": 13.748109517601042, - "grad_norm": 1.4829580783843994, - "learning_rate": 7.401608040201006e-05, - "loss": 5.1302, - "step": 26362 - }, - { - "epoch": 13.748631029986962, - "grad_norm": 1.5469622611999512, - "learning_rate": 7.401507537688442e-05, - "loss": 4.6272, - "step": 26363 - }, - { - "epoch": 13.749152542372881, - "grad_norm": 1.4333226680755615, - "learning_rate": 7.40140703517588e-05, - "loss": 5.4068, - "step": 26364 - }, - { - "epoch": 13.7496740547588, - "grad_norm": 1.5405176877975464, - "learning_rate": 7.401306532663317e-05, - "loss": 5.3051, - "step": 26365 - }, - { - "epoch": 13.75019556714472, - "grad_norm": 1.436424970626831, - "learning_rate": 7.401206030150754e-05, - "loss": 5.3225, - "step": 26366 - }, - { - "epoch": 13.750717079530638, - "grad_norm": 1.6037594079971313, - "learning_rate": 7.401105527638191e-05, - "loss": 4.6573, - "step": 26367 - }, - { - "epoch": 13.751238591916557, - "grad_norm": 1.5370171070098877, - "learning_rate": 7.401005025125629e-05, - "loss": 5.1641, - "step": 26368 - }, - { - "epoch": 13.751760104302477, - "grad_norm": 1.3944084644317627, - "learning_rate": 7.400904522613066e-05, - "loss": 5.8155, - "step": 26369 - }, - { - "epoch": 13.752281616688396, - "grad_norm": 1.4413623809814453, - "learning_rate": 7.400804020100503e-05, - "loss": 5.167, - "step": 26370 - }, - { - "epoch": 13.752803129074316, - "grad_norm": 1.385704755783081, - "learning_rate": 7.40070351758794e-05, - "loss": 5.7975, - "step": 26371 - }, - { - "epoch": 13.753324641460235, - "grad_norm": 1.5941667556762695, - "learning_rate": 7.400603015075377e-05, - "loss": 5.3999, - "step": 26372 - }, - { - "epoch": 13.753846153846155, - "grad_norm": 1.320093035697937, - "learning_rate": 7.400502512562815e-05, - "loss": 5.8138, - "step": 26373 - }, - { - "epoch": 13.754367666232072, - "grad_norm": 1.4092094898223877, - "learning_rate": 7.400402010050251e-05, - "loss": 5.4197, - "step": 26374 - }, - { - "epoch": 13.754889178617992, - "grad_norm": 1.3717724084854126, - "learning_rate": 7.400301507537689e-05, - "loss": 5.5153, - "step": 26375 - }, - { - "epoch": 13.755410691003911, - "grad_norm": 1.4430638551712036, - "learning_rate": 7.400201005025125e-05, - "loss": 5.5917, - "step": 26376 - }, - { - "epoch": 13.75593220338983, - "grad_norm": 1.3952490091323853, - "learning_rate": 7.400100502512563e-05, - "loss": 5.6751, - "step": 26377 - }, - { - "epoch": 13.75645371577575, - "grad_norm": 1.6894973516464233, - "learning_rate": 7.4e-05, - "loss": 4.2621, - "step": 26378 - }, - { - "epoch": 13.756975228161668, - "grad_norm": 1.7109872102737427, - "learning_rate": 7.399899497487437e-05, - "loss": 4.6341, - "step": 26379 - }, - { - "epoch": 13.757496740547587, - "grad_norm": 1.4606164693832397, - "learning_rate": 7.399798994974875e-05, - "loss": 5.5662, - "step": 26380 - }, - { - "epoch": 13.758018252933507, - "grad_norm": 1.4700651168823242, - "learning_rate": 7.399698492462312e-05, - "loss": 5.438, - "step": 26381 - }, - { - "epoch": 13.758539765319426, - "grad_norm": 1.4906933307647705, - "learning_rate": 7.39959798994975e-05, - "loss": 5.1271, - "step": 26382 - }, - { - "epoch": 13.759061277705346, - "grad_norm": 1.5538970232009888, - "learning_rate": 7.399497487437186e-05, - "loss": 5.456, - "step": 26383 - }, - { - "epoch": 13.759582790091265, - "grad_norm": 1.394538164138794, - "learning_rate": 7.399396984924624e-05, - "loss": 5.2725, - "step": 26384 - }, - { - "epoch": 13.760104302477185, - "grad_norm": 1.5566115379333496, - "learning_rate": 7.39929648241206e-05, - "loss": 4.9309, - "step": 26385 - }, - { - "epoch": 13.760625814863102, - "grad_norm": 1.4490926265716553, - "learning_rate": 7.399195979899498e-05, - "loss": 5.4758, - "step": 26386 - }, - { - "epoch": 13.761147327249022, - "grad_norm": 1.4888676404953003, - "learning_rate": 7.399095477386934e-05, - "loss": 5.4139, - "step": 26387 - }, - { - "epoch": 13.761668839634941, - "grad_norm": 1.5319782495498657, - "learning_rate": 7.398994974874372e-05, - "loss": 5.254, - "step": 26388 - }, - { - "epoch": 13.76219035202086, - "grad_norm": 1.5528079271316528, - "learning_rate": 7.39889447236181e-05, - "loss": 5.2903, - "step": 26389 - }, - { - "epoch": 13.76271186440678, - "grad_norm": 1.4860446453094482, - "learning_rate": 7.398793969849248e-05, - "loss": 5.1487, - "step": 26390 - }, - { - "epoch": 13.763233376792698, - "grad_norm": 1.4436328411102295, - "learning_rate": 7.398693467336684e-05, - "loss": 5.5762, - "step": 26391 - }, - { - "epoch": 13.763754889178617, - "grad_norm": 1.5115957260131836, - "learning_rate": 7.398592964824122e-05, - "loss": 4.9015, - "step": 26392 - }, - { - "epoch": 13.764276401564537, - "grad_norm": 1.5232385396957397, - "learning_rate": 7.398492462311558e-05, - "loss": 5.2507, - "step": 26393 - }, - { - "epoch": 13.764797913950456, - "grad_norm": 1.5084216594696045, - "learning_rate": 7.398391959798995e-05, - "loss": 5.4185, - "step": 26394 - }, - { - "epoch": 13.765319426336376, - "grad_norm": 1.5073093175888062, - "learning_rate": 7.398291457286432e-05, - "loss": 5.3633, - "step": 26395 - }, - { - "epoch": 13.765840938722295, - "grad_norm": 1.5302278995513916, - "learning_rate": 7.398190954773869e-05, - "loss": 5.4567, - "step": 26396 - }, - { - "epoch": 13.766362451108213, - "grad_norm": 1.546541452407837, - "learning_rate": 7.398090452261307e-05, - "loss": 4.2343, - "step": 26397 - }, - { - "epoch": 13.766883963494132, - "grad_norm": 1.4552042484283447, - "learning_rate": 7.397989949748743e-05, - "loss": 5.4573, - "step": 26398 - }, - { - "epoch": 13.767405475880052, - "grad_norm": 1.529159665107727, - "learning_rate": 7.397889447236181e-05, - "loss": 5.2786, - "step": 26399 - }, - { - "epoch": 13.767926988265971, - "grad_norm": 1.416229248046875, - "learning_rate": 7.397788944723619e-05, - "loss": 5.7447, - "step": 26400 - }, - { - "epoch": 13.76844850065189, - "grad_norm": 1.5637726783752441, - "learning_rate": 7.397688442211056e-05, - "loss": 5.0406, - "step": 26401 - }, - { - "epoch": 13.76897001303781, - "grad_norm": 1.4302639961242676, - "learning_rate": 7.397587939698493e-05, - "loss": 5.7047, - "step": 26402 - }, - { - "epoch": 13.769491525423728, - "grad_norm": 1.5397229194641113, - "learning_rate": 7.39748743718593e-05, - "loss": 5.0968, - "step": 26403 - }, - { - "epoch": 13.770013037809647, - "grad_norm": 1.434273600578308, - "learning_rate": 7.397386934673367e-05, - "loss": 5.5531, - "step": 26404 - }, - { - "epoch": 13.770534550195567, - "grad_norm": 1.9008731842041016, - "learning_rate": 7.397286432160805e-05, - "loss": 4.773, - "step": 26405 - }, - { - "epoch": 13.771056062581486, - "grad_norm": 1.4220852851867676, - "learning_rate": 7.397185929648241e-05, - "loss": 5.2731, - "step": 26406 - }, - { - "epoch": 13.771577574967406, - "grad_norm": 1.3954423666000366, - "learning_rate": 7.397085427135678e-05, - "loss": 5.4523, - "step": 26407 - }, - { - "epoch": 13.772099087353325, - "grad_norm": 1.5403112173080444, - "learning_rate": 7.396984924623115e-05, - "loss": 5.033, - "step": 26408 - }, - { - "epoch": 13.772620599739243, - "grad_norm": 1.490450143814087, - "learning_rate": 7.396884422110553e-05, - "loss": 5.4627, - "step": 26409 - }, - { - "epoch": 13.773142112125162, - "grad_norm": 1.5374774932861328, - "learning_rate": 7.396783919597991e-05, - "loss": 5.3327, - "step": 26410 - }, - { - "epoch": 13.773663624511082, - "grad_norm": 1.4153592586517334, - "learning_rate": 7.396683417085427e-05, - "loss": 5.8982, - "step": 26411 - }, - { - "epoch": 13.774185136897001, - "grad_norm": 1.4049193859100342, - "learning_rate": 7.396582914572865e-05, - "loss": 5.5068, - "step": 26412 - }, - { - "epoch": 13.77470664928292, - "grad_norm": 1.4835766553878784, - "learning_rate": 7.396482412060302e-05, - "loss": 4.6239, - "step": 26413 - }, - { - "epoch": 13.77522816166884, - "grad_norm": 1.5727577209472656, - "learning_rate": 7.39638190954774e-05, - "loss": 5.4333, - "step": 26414 - }, - { - "epoch": 13.775749674054758, - "grad_norm": 1.5635651350021362, - "learning_rate": 7.396281407035176e-05, - "loss": 4.8797, - "step": 26415 - }, - { - "epoch": 13.776271186440677, - "grad_norm": 1.4108856916427612, - "learning_rate": 7.396180904522614e-05, - "loss": 5.5554, - "step": 26416 - }, - { - "epoch": 13.776792698826597, - "grad_norm": 1.3529754877090454, - "learning_rate": 7.39608040201005e-05, - "loss": 5.0123, - "step": 26417 - }, - { - "epoch": 13.777314211212516, - "grad_norm": 1.5034209489822388, - "learning_rate": 7.395979899497488e-05, - "loss": 5.4143, - "step": 26418 - }, - { - "epoch": 13.777835723598436, - "grad_norm": 1.463484525680542, - "learning_rate": 7.395879396984924e-05, - "loss": 5.3837, - "step": 26419 - }, - { - "epoch": 13.778357235984355, - "grad_norm": 1.3996230363845825, - "learning_rate": 7.395778894472362e-05, - "loss": 5.0837, - "step": 26420 - }, - { - "epoch": 13.778878748370273, - "grad_norm": 1.3740538358688354, - "learning_rate": 7.3956783919598e-05, - "loss": 5.0725, - "step": 26421 - }, - { - "epoch": 13.779400260756193, - "grad_norm": 1.5479425191879272, - "learning_rate": 7.395577889447236e-05, - "loss": 5.329, - "step": 26422 - }, - { - "epoch": 13.779921773142112, - "grad_norm": 1.4755905866622925, - "learning_rate": 7.395477386934674e-05, - "loss": 5.3834, - "step": 26423 - }, - { - "epoch": 13.780443285528031, - "grad_norm": 1.4012584686279297, - "learning_rate": 7.39537688442211e-05, - "loss": 4.9891, - "step": 26424 - }, - { - "epoch": 13.780964797913951, - "grad_norm": 1.5003336668014526, - "learning_rate": 7.395276381909548e-05, - "loss": 5.2476, - "step": 26425 - }, - { - "epoch": 13.78148631029987, - "grad_norm": 1.4437271356582642, - "learning_rate": 7.395175879396985e-05, - "loss": 5.5242, - "step": 26426 - }, - { - "epoch": 13.782007822685788, - "grad_norm": 1.399918794631958, - "learning_rate": 7.395075376884423e-05, - "loss": 5.3508, - "step": 26427 - }, - { - "epoch": 13.782529335071708, - "grad_norm": 1.4936041831970215, - "learning_rate": 7.394974874371859e-05, - "loss": 5.0351, - "step": 26428 - }, - { - "epoch": 13.783050847457627, - "grad_norm": 1.4257824420928955, - "learning_rate": 7.394874371859297e-05, - "loss": 5.8234, - "step": 26429 - }, - { - "epoch": 13.783572359843546, - "grad_norm": 1.5363036394119263, - "learning_rate": 7.394773869346735e-05, - "loss": 5.2239, - "step": 26430 - }, - { - "epoch": 13.784093872229466, - "grad_norm": 1.4956098794937134, - "learning_rate": 7.394673366834172e-05, - "loss": 5.4911, - "step": 26431 - }, - { - "epoch": 13.784615384615385, - "grad_norm": 1.5966894626617432, - "learning_rate": 7.394572864321609e-05, - "loss": 4.975, - "step": 26432 - }, - { - "epoch": 13.785136897001303, - "grad_norm": 1.553889274597168, - "learning_rate": 7.394472361809047e-05, - "loss": 4.8457, - "step": 26433 - }, - { - "epoch": 13.785658409387223, - "grad_norm": 1.496362328529358, - "learning_rate": 7.394371859296483e-05, - "loss": 5.0984, - "step": 26434 - }, - { - "epoch": 13.786179921773142, - "grad_norm": 1.4430550336837769, - "learning_rate": 7.39427135678392e-05, - "loss": 5.6545, - "step": 26435 - }, - { - "epoch": 13.786701434159061, - "grad_norm": 1.3767973184585571, - "learning_rate": 7.394170854271357e-05, - "loss": 5.7211, - "step": 26436 - }, - { - "epoch": 13.787222946544981, - "grad_norm": 1.4855372905731201, - "learning_rate": 7.394070351758794e-05, - "loss": 5.5372, - "step": 26437 - }, - { - "epoch": 13.7877444589309, - "grad_norm": 1.446744680404663, - "learning_rate": 7.393969849246231e-05, - "loss": 5.3369, - "step": 26438 - }, - { - "epoch": 13.788265971316818, - "grad_norm": 1.4282217025756836, - "learning_rate": 7.393869346733668e-05, - "loss": 5.3068, - "step": 26439 - }, - { - "epoch": 13.788787483702738, - "grad_norm": 1.3969601392745972, - "learning_rate": 7.393768844221106e-05, - "loss": 5.3525, - "step": 26440 - }, - { - "epoch": 13.789308996088657, - "grad_norm": 1.5351313352584839, - "learning_rate": 7.393668341708543e-05, - "loss": 5.4492, - "step": 26441 - }, - { - "epoch": 13.789830508474576, - "grad_norm": 1.5437430143356323, - "learning_rate": 7.393567839195981e-05, - "loss": 4.9325, - "step": 26442 - }, - { - "epoch": 13.790352020860496, - "grad_norm": 1.4813867807388306, - "learning_rate": 7.393467336683418e-05, - "loss": 5.4112, - "step": 26443 - }, - { - "epoch": 13.790873533246415, - "grad_norm": 1.4538313150405884, - "learning_rate": 7.393366834170855e-05, - "loss": 5.2482, - "step": 26444 - }, - { - "epoch": 13.791395045632333, - "grad_norm": 1.517920732498169, - "learning_rate": 7.393266331658292e-05, - "loss": 5.0593, - "step": 26445 - }, - { - "epoch": 13.791916558018253, - "grad_norm": 1.4986337423324585, - "learning_rate": 7.39316582914573e-05, - "loss": 5.1683, - "step": 26446 - }, - { - "epoch": 13.792438070404172, - "grad_norm": 1.3524914979934692, - "learning_rate": 7.393065326633166e-05, - "loss": 5.911, - "step": 26447 - }, - { - "epoch": 13.792959582790091, - "grad_norm": 1.2871067523956299, - "learning_rate": 7.392964824120602e-05, - "loss": 5.7825, - "step": 26448 - }, - { - "epoch": 13.793481095176011, - "grad_norm": 1.3542218208312988, - "learning_rate": 7.39286432160804e-05, - "loss": 5.8188, - "step": 26449 - }, - { - "epoch": 13.79400260756193, - "grad_norm": 1.3945420980453491, - "learning_rate": 7.392763819095478e-05, - "loss": 5.7745, - "step": 26450 - }, - { - "epoch": 13.794524119947848, - "grad_norm": 1.483166217803955, - "learning_rate": 7.392663316582916e-05, - "loss": 5.2426, - "step": 26451 - }, - { - "epoch": 13.795045632333768, - "grad_norm": 1.41875422000885, - "learning_rate": 7.392562814070352e-05, - "loss": 5.4855, - "step": 26452 - }, - { - "epoch": 13.795567144719687, - "grad_norm": 1.4704698324203491, - "learning_rate": 7.39246231155779e-05, - "loss": 5.0032, - "step": 26453 - }, - { - "epoch": 13.796088657105607, - "grad_norm": 1.4065637588500977, - "learning_rate": 7.392361809045226e-05, - "loss": 5.6701, - "step": 26454 - }, - { - "epoch": 13.796610169491526, - "grad_norm": 1.4952120780944824, - "learning_rate": 7.392261306532664e-05, - "loss": 5.3275, - "step": 26455 - }, - { - "epoch": 13.797131681877445, - "grad_norm": 1.369248628616333, - "learning_rate": 7.3921608040201e-05, - "loss": 5.3767, - "step": 26456 - }, - { - "epoch": 13.797653194263363, - "grad_norm": 1.3602946996688843, - "learning_rate": 7.392060301507538e-05, - "loss": 5.5782, - "step": 26457 - }, - { - "epoch": 13.798174706649283, - "grad_norm": 1.5441423654556274, - "learning_rate": 7.391959798994975e-05, - "loss": 5.0225, - "step": 26458 - }, - { - "epoch": 13.798696219035202, - "grad_norm": 1.413991093635559, - "learning_rate": 7.391859296482413e-05, - "loss": 4.9619, - "step": 26459 - }, - { - "epoch": 13.799217731421122, - "grad_norm": 1.4765141010284424, - "learning_rate": 7.391758793969849e-05, - "loss": 5.4183, - "step": 26460 - }, - { - "epoch": 13.799739243807041, - "grad_norm": 1.3677716255187988, - "learning_rate": 7.391658291457287e-05, - "loss": 5.5329, - "step": 26461 - }, - { - "epoch": 13.800260756192959, - "grad_norm": 1.4586433172225952, - "learning_rate": 7.391557788944725e-05, - "loss": 5.0815, - "step": 26462 - }, - { - "epoch": 13.800782268578878, - "grad_norm": 1.3711371421813965, - "learning_rate": 7.391457286432161e-05, - "loss": 5.4382, - "step": 26463 - }, - { - "epoch": 13.801303780964798, - "grad_norm": 1.4584420919418335, - "learning_rate": 7.391356783919599e-05, - "loss": 5.0904, - "step": 26464 - }, - { - "epoch": 13.801825293350717, - "grad_norm": 1.5582571029663086, - "learning_rate": 7.391256281407035e-05, - "loss": 4.5633, - "step": 26465 - }, - { - "epoch": 13.802346805736637, - "grad_norm": 1.4537912607192993, - "learning_rate": 7.391155778894473e-05, - "loss": 5.4942, - "step": 26466 - }, - { - "epoch": 13.802868318122556, - "grad_norm": 1.40284264087677, - "learning_rate": 7.39105527638191e-05, - "loss": 5.3799, - "step": 26467 - }, - { - "epoch": 13.803389830508475, - "grad_norm": 1.4334890842437744, - "learning_rate": 7.390954773869347e-05, - "loss": 5.9143, - "step": 26468 - }, - { - "epoch": 13.803911342894393, - "grad_norm": 1.5458240509033203, - "learning_rate": 7.390854271356784e-05, - "loss": 5.1991, - "step": 26469 - }, - { - "epoch": 13.804432855280313, - "grad_norm": 1.4049530029296875, - "learning_rate": 7.390753768844221e-05, - "loss": 5.4987, - "step": 26470 - }, - { - "epoch": 13.804954367666232, - "grad_norm": 1.6335694789886475, - "learning_rate": 7.390653266331659e-05, - "loss": 4.9888, - "step": 26471 - }, - { - "epoch": 13.805475880052152, - "grad_norm": 1.434862494468689, - "learning_rate": 7.390552763819097e-05, - "loss": 4.7372, - "step": 26472 - }, - { - "epoch": 13.805997392438071, - "grad_norm": 1.4554126262664795, - "learning_rate": 7.390452261306533e-05, - "loss": 5.2764, - "step": 26473 - }, - { - "epoch": 13.806518904823989, - "grad_norm": 1.3685393333435059, - "learning_rate": 7.39035175879397e-05, - "loss": 5.4117, - "step": 26474 - }, - { - "epoch": 13.807040417209908, - "grad_norm": 1.4697195291519165, - "learning_rate": 7.390251256281408e-05, - "loss": 4.9632, - "step": 26475 - }, - { - "epoch": 13.807561929595828, - "grad_norm": 1.3608393669128418, - "learning_rate": 7.390150753768844e-05, - "loss": 5.3465, - "step": 26476 - }, - { - "epoch": 13.808083441981747, - "grad_norm": 1.4113236665725708, - "learning_rate": 7.390050251256282e-05, - "loss": 5.1723, - "step": 26477 - }, - { - "epoch": 13.808604954367667, - "grad_norm": 1.436285376548767, - "learning_rate": 7.389949748743718e-05, - "loss": 5.3155, - "step": 26478 - }, - { - "epoch": 13.809126466753586, - "grad_norm": 1.4620888233184814, - "learning_rate": 7.389849246231156e-05, - "loss": 4.5991, - "step": 26479 - }, - { - "epoch": 13.809647979139505, - "grad_norm": 1.450632929801941, - "learning_rate": 7.389748743718592e-05, - "loss": 5.2029, - "step": 26480 - }, - { - "epoch": 13.810169491525423, - "grad_norm": 1.3802186250686646, - "learning_rate": 7.38964824120603e-05, - "loss": 5.872, - "step": 26481 - }, - { - "epoch": 13.810691003911343, - "grad_norm": 1.368727445602417, - "learning_rate": 7.389547738693468e-05, - "loss": 5.7114, - "step": 26482 - }, - { - "epoch": 13.811212516297262, - "grad_norm": 1.3834394216537476, - "learning_rate": 7.389447236180906e-05, - "loss": 5.4965, - "step": 26483 - }, - { - "epoch": 13.811734028683182, - "grad_norm": 1.4570322036743164, - "learning_rate": 7.389346733668342e-05, - "loss": 5.7206, - "step": 26484 - }, - { - "epoch": 13.812255541069101, - "grad_norm": 1.382216453552246, - "learning_rate": 7.38924623115578e-05, - "loss": 5.6353, - "step": 26485 - }, - { - "epoch": 13.812777053455019, - "grad_norm": 1.5031565427780151, - "learning_rate": 7.389145728643216e-05, - "loss": 5.2228, - "step": 26486 - }, - { - "epoch": 13.813298565840938, - "grad_norm": 1.778857707977295, - "learning_rate": 7.389045226130653e-05, - "loss": 4.9284, - "step": 26487 - }, - { - "epoch": 13.813820078226858, - "grad_norm": 1.5163599252700806, - "learning_rate": 7.388944723618091e-05, - "loss": 4.9919, - "step": 26488 - }, - { - "epoch": 13.814341590612777, - "grad_norm": 1.482047438621521, - "learning_rate": 7.388844221105527e-05, - "loss": 5.1664, - "step": 26489 - }, - { - "epoch": 13.814863102998697, - "grad_norm": 1.4220746755599976, - "learning_rate": 7.388743718592965e-05, - "loss": 5.4121, - "step": 26490 - }, - { - "epoch": 13.815384615384616, - "grad_norm": 1.5180259943008423, - "learning_rate": 7.388643216080403e-05, - "loss": 4.7004, - "step": 26491 - }, - { - "epoch": 13.815906127770534, - "grad_norm": 1.4680911302566528, - "learning_rate": 7.38854271356784e-05, - "loss": 5.5885, - "step": 26492 - }, - { - "epoch": 13.816427640156453, - "grad_norm": 1.3919789791107178, - "learning_rate": 7.388442211055277e-05, - "loss": 5.7083, - "step": 26493 - }, - { - "epoch": 13.816949152542373, - "grad_norm": 1.44468355178833, - "learning_rate": 7.388341708542715e-05, - "loss": 5.2311, - "step": 26494 - }, - { - "epoch": 13.817470664928292, - "grad_norm": 1.4075679779052734, - "learning_rate": 7.388241206030151e-05, - "loss": 5.5748, - "step": 26495 - }, - { - "epoch": 13.817992177314212, - "grad_norm": 1.444176197052002, - "learning_rate": 7.388140703517589e-05, - "loss": 5.3789, - "step": 26496 - }, - { - "epoch": 13.818513689700131, - "grad_norm": 1.4759618043899536, - "learning_rate": 7.388040201005025e-05, - "loss": 5.7278, - "step": 26497 - }, - { - "epoch": 13.819035202086049, - "grad_norm": 1.4898877143859863, - "learning_rate": 7.387939698492463e-05, - "loss": 5.0296, - "step": 26498 - }, - { - "epoch": 13.819556714471968, - "grad_norm": 1.3908525705337524, - "learning_rate": 7.3878391959799e-05, - "loss": 5.8774, - "step": 26499 - }, - { - "epoch": 13.820078226857888, - "grad_norm": 1.4607182741165161, - "learning_rate": 7.387738693467336e-05, - "loss": 4.9837, - "step": 26500 - }, - { - "epoch": 13.820599739243807, - "grad_norm": 1.3323724269866943, - "learning_rate": 7.387638190954774e-05, - "loss": 5.7404, - "step": 26501 - }, - { - "epoch": 13.821121251629727, - "grad_norm": 1.4811209440231323, - "learning_rate": 7.387537688442212e-05, - "loss": 5.0073, - "step": 26502 - }, - { - "epoch": 13.821642764015646, - "grad_norm": 1.404847264289856, - "learning_rate": 7.387437185929649e-05, - "loss": 5.663, - "step": 26503 - }, - { - "epoch": 13.822164276401564, - "grad_norm": 1.3617666959762573, - "learning_rate": 7.387336683417086e-05, - "loss": 5.7464, - "step": 26504 - }, - { - "epoch": 13.822685788787483, - "grad_norm": 1.3998384475708008, - "learning_rate": 7.387236180904523e-05, - "loss": 5.607, - "step": 26505 - }, - { - "epoch": 13.823207301173403, - "grad_norm": 1.4475196599960327, - "learning_rate": 7.38713567839196e-05, - "loss": 5.0385, - "step": 26506 - }, - { - "epoch": 13.823728813559322, - "grad_norm": 1.4412034749984741, - "learning_rate": 7.387035175879398e-05, - "loss": 5.57, - "step": 26507 - }, - { - "epoch": 13.824250325945242, - "grad_norm": 1.5133788585662842, - "learning_rate": 7.386934673366834e-05, - "loss": 5.4613, - "step": 26508 - }, - { - "epoch": 13.824771838331161, - "grad_norm": 1.4022142887115479, - "learning_rate": 7.386834170854272e-05, - "loss": 5.0918, - "step": 26509 - }, - { - "epoch": 13.825293350717079, - "grad_norm": 1.5118639469146729, - "learning_rate": 7.386733668341708e-05, - "loss": 4.9873, - "step": 26510 - }, - { - "epoch": 13.825814863102998, - "grad_norm": 1.420628547668457, - "learning_rate": 7.386633165829146e-05, - "loss": 5.4842, - "step": 26511 - }, - { - "epoch": 13.826336375488918, - "grad_norm": 1.4550787210464478, - "learning_rate": 7.386532663316583e-05, - "loss": 5.8457, - "step": 26512 - }, - { - "epoch": 13.826857887874837, - "grad_norm": 1.3898526430130005, - "learning_rate": 7.38643216080402e-05, - "loss": 5.6544, - "step": 26513 - }, - { - "epoch": 13.827379400260757, - "grad_norm": 1.461527943611145, - "learning_rate": 7.386331658291458e-05, - "loss": 4.654, - "step": 26514 - }, - { - "epoch": 13.827900912646676, - "grad_norm": 1.4114949703216553, - "learning_rate": 7.386231155778895e-05, - "loss": 5.5261, - "step": 26515 - }, - { - "epoch": 13.828422425032594, - "grad_norm": 1.3397787809371948, - "learning_rate": 7.386130653266332e-05, - "loss": 5.8636, - "step": 26516 - }, - { - "epoch": 13.828943937418513, - "grad_norm": 1.4602088928222656, - "learning_rate": 7.386030150753769e-05, - "loss": 5.4346, - "step": 26517 - }, - { - "epoch": 13.829465449804433, - "grad_norm": 1.4274699687957764, - "learning_rate": 7.385929648241207e-05, - "loss": 5.0238, - "step": 26518 - }, - { - "epoch": 13.829986962190352, - "grad_norm": 1.4114453792572021, - "learning_rate": 7.385829145728643e-05, - "loss": 5.374, - "step": 26519 - }, - { - "epoch": 13.830508474576272, - "grad_norm": 1.4334321022033691, - "learning_rate": 7.385728643216081e-05, - "loss": 5.1367, - "step": 26520 - }, - { - "epoch": 13.831029986962191, - "grad_norm": 1.3766230344772339, - "learning_rate": 7.385628140703517e-05, - "loss": 5.4523, - "step": 26521 - }, - { - "epoch": 13.831551499348109, - "grad_norm": 1.425680160522461, - "learning_rate": 7.385527638190955e-05, - "loss": 5.9825, - "step": 26522 - }, - { - "epoch": 13.832073011734028, - "grad_norm": 1.4496358633041382, - "learning_rate": 7.385427135678393e-05, - "loss": 5.5025, - "step": 26523 - }, - { - "epoch": 13.832594524119948, - "grad_norm": 1.4499424695968628, - "learning_rate": 7.38532663316583e-05, - "loss": 5.3854, - "step": 26524 - }, - { - "epoch": 13.833116036505867, - "grad_norm": 1.425278902053833, - "learning_rate": 7.385226130653267e-05, - "loss": 5.166, - "step": 26525 - }, - { - "epoch": 13.833637548891787, - "grad_norm": 1.5153617858886719, - "learning_rate": 7.385125628140705e-05, - "loss": 5.5275, - "step": 26526 - }, - { - "epoch": 13.834159061277706, - "grad_norm": 1.5246303081512451, - "learning_rate": 7.385025125628141e-05, - "loss": 5.0897, - "step": 26527 - }, - { - "epoch": 13.834680573663624, - "grad_norm": 1.5999515056610107, - "learning_rate": 7.384924623115578e-05, - "loss": 5.1258, - "step": 26528 - }, - { - "epoch": 13.835202086049543, - "grad_norm": 1.4156593084335327, - "learning_rate": 7.384824120603015e-05, - "loss": 5.4218, - "step": 26529 - }, - { - "epoch": 13.835723598435463, - "grad_norm": 1.3648791313171387, - "learning_rate": 7.384723618090452e-05, - "loss": 5.7005, - "step": 26530 - }, - { - "epoch": 13.836245110821382, - "grad_norm": 1.4114359617233276, - "learning_rate": 7.38462311557789e-05, - "loss": 5.7446, - "step": 26531 - }, - { - "epoch": 13.836766623207302, - "grad_norm": 1.5978981256484985, - "learning_rate": 7.384522613065326e-05, - "loss": 5.2427, - "step": 26532 - }, - { - "epoch": 13.837288135593221, - "grad_norm": 1.3401691913604736, - "learning_rate": 7.384422110552764e-05, - "loss": 5.763, - "step": 26533 - }, - { - "epoch": 13.837809647979139, - "grad_norm": 1.4701534509658813, - "learning_rate": 7.384321608040202e-05, - "loss": 5.3439, - "step": 26534 - }, - { - "epoch": 13.838331160365058, - "grad_norm": 1.4014315605163574, - "learning_rate": 7.38422110552764e-05, - "loss": 4.9311, - "step": 26535 - }, - { - "epoch": 13.838852672750978, - "grad_norm": 1.4601953029632568, - "learning_rate": 7.384120603015076e-05, - "loss": 5.0572, - "step": 26536 - }, - { - "epoch": 13.839374185136897, - "grad_norm": 1.4669698476791382, - "learning_rate": 7.384020100502514e-05, - "loss": 5.3103, - "step": 26537 - }, - { - "epoch": 13.839895697522817, - "grad_norm": 1.479597568511963, - "learning_rate": 7.38391959798995e-05, - "loss": 5.3424, - "step": 26538 - }, - { - "epoch": 13.840417209908736, - "grad_norm": 1.478246808052063, - "learning_rate": 7.383819095477388e-05, - "loss": 5.669, - "step": 26539 - }, - { - "epoch": 13.840938722294654, - "grad_norm": 1.5578365325927734, - "learning_rate": 7.383718592964824e-05, - "loss": 5.5864, - "step": 26540 - }, - { - "epoch": 13.841460234680573, - "grad_norm": 1.44267737865448, - "learning_rate": 7.38361809045226e-05, - "loss": 5.4649, - "step": 26541 - }, - { - "epoch": 13.841981747066493, - "grad_norm": 1.509235143661499, - "learning_rate": 7.383517587939698e-05, - "loss": 5.2228, - "step": 26542 - }, - { - "epoch": 13.842503259452412, - "grad_norm": 1.5816330909729004, - "learning_rate": 7.383417085427136e-05, - "loss": 5.4159, - "step": 26543 - }, - { - "epoch": 13.843024771838332, - "grad_norm": 1.5089608430862427, - "learning_rate": 7.383316582914574e-05, - "loss": 5.6484, - "step": 26544 - }, - { - "epoch": 13.843546284224251, - "grad_norm": 1.3855961561203003, - "learning_rate": 7.38321608040201e-05, - "loss": 5.505, - "step": 26545 - }, - { - "epoch": 13.844067796610169, - "grad_norm": 1.456153392791748, - "learning_rate": 7.383115577889448e-05, - "loss": 5.4411, - "step": 26546 - }, - { - "epoch": 13.844589308996088, - "grad_norm": 1.4587428569793701, - "learning_rate": 7.383015075376885e-05, - "loss": 5.4471, - "step": 26547 - }, - { - "epoch": 13.845110821382008, - "grad_norm": 1.4712167978286743, - "learning_rate": 7.382914572864322e-05, - "loss": 5.3967, - "step": 26548 - }, - { - "epoch": 13.845632333767927, - "grad_norm": 1.3772804737091064, - "learning_rate": 7.382814070351759e-05, - "loss": 5.6875, - "step": 26549 - }, - { - "epoch": 13.846153846153847, - "grad_norm": 1.5212371349334717, - "learning_rate": 7.382713567839197e-05, - "loss": 4.8244, - "step": 26550 - }, - { - "epoch": 13.846675358539766, - "grad_norm": 1.519421935081482, - "learning_rate": 7.382613065326633e-05, - "loss": 5.0225, - "step": 26551 - }, - { - "epoch": 13.847196870925684, - "grad_norm": 1.409741759300232, - "learning_rate": 7.382512562814071e-05, - "loss": 5.6238, - "step": 26552 - }, - { - "epoch": 13.847718383311603, - "grad_norm": 1.4318193197250366, - "learning_rate": 7.382412060301507e-05, - "loss": 5.4995, - "step": 26553 - }, - { - "epoch": 13.848239895697523, - "grad_norm": 1.4362225532531738, - "learning_rate": 7.382311557788945e-05, - "loss": 4.9806, - "step": 26554 - }, - { - "epoch": 13.848761408083442, - "grad_norm": 1.4308249950408936, - "learning_rate": 7.382211055276383e-05, - "loss": 5.5641, - "step": 26555 - }, - { - "epoch": 13.849282920469362, - "grad_norm": 1.6544915437698364, - "learning_rate": 7.382110552763819e-05, - "loss": 4.6351, - "step": 26556 - }, - { - "epoch": 13.84980443285528, - "grad_norm": 1.5612237453460693, - "learning_rate": 7.382010050251257e-05, - "loss": 5.0211, - "step": 26557 - }, - { - "epoch": 13.850325945241199, - "grad_norm": 1.4336183071136475, - "learning_rate": 7.381909547738693e-05, - "loss": 5.3679, - "step": 26558 - }, - { - "epoch": 13.850847457627118, - "grad_norm": 1.467228651046753, - "learning_rate": 7.381809045226131e-05, - "loss": 5.4192, - "step": 26559 - }, - { - "epoch": 13.851368970013038, - "grad_norm": 1.5703413486480713, - "learning_rate": 7.381708542713568e-05, - "loss": 5.1837, - "step": 26560 - }, - { - "epoch": 13.851890482398957, - "grad_norm": 1.392774224281311, - "learning_rate": 7.381608040201005e-05, - "loss": 5.5974, - "step": 26561 - }, - { - "epoch": 13.852411994784877, - "grad_norm": 1.4373931884765625, - "learning_rate": 7.381507537688442e-05, - "loss": 5.3749, - "step": 26562 - }, - { - "epoch": 13.852933507170796, - "grad_norm": 1.4433844089508057, - "learning_rate": 7.38140703517588e-05, - "loss": 5.6808, - "step": 26563 - }, - { - "epoch": 13.853455019556714, - "grad_norm": 1.4451144933700562, - "learning_rate": 7.381306532663317e-05, - "loss": 5.2216, - "step": 26564 - }, - { - "epoch": 13.853976531942633, - "grad_norm": 1.3851608037948608, - "learning_rate": 7.381206030150755e-05, - "loss": 5.5259, - "step": 26565 - }, - { - "epoch": 13.854498044328553, - "grad_norm": 1.4585822820663452, - "learning_rate": 7.381105527638192e-05, - "loss": 5.5045, - "step": 26566 - }, - { - "epoch": 13.855019556714472, - "grad_norm": 1.503200650215149, - "learning_rate": 7.381005025125628e-05, - "loss": 5.4925, - "step": 26567 - }, - { - "epoch": 13.855541069100392, - "grad_norm": 1.4481111764907837, - "learning_rate": 7.380904522613066e-05, - "loss": 5.3854, - "step": 26568 - }, - { - "epoch": 13.85606258148631, - "grad_norm": 1.511469841003418, - "learning_rate": 7.380804020100502e-05, - "loss": 5.3785, - "step": 26569 - }, - { - "epoch": 13.856584093872229, - "grad_norm": 1.4648741483688354, - "learning_rate": 7.38070351758794e-05, - "loss": 5.0836, - "step": 26570 - }, - { - "epoch": 13.857105606258148, - "grad_norm": 1.4487144947052002, - "learning_rate": 7.380603015075377e-05, - "loss": 5.5105, - "step": 26571 - }, - { - "epoch": 13.857627118644068, - "grad_norm": 1.4355738162994385, - "learning_rate": 7.380502512562814e-05, - "loss": 5.4768, - "step": 26572 - }, - { - "epoch": 13.858148631029987, - "grad_norm": 1.4884871244430542, - "learning_rate": 7.380402010050251e-05, - "loss": 5.3628, - "step": 26573 - }, - { - "epoch": 13.858670143415907, - "grad_norm": 1.4084011316299438, - "learning_rate": 7.380301507537688e-05, - "loss": 5.4352, - "step": 26574 - }, - { - "epoch": 13.859191655801826, - "grad_norm": 1.4333350658416748, - "learning_rate": 7.380201005025126e-05, - "loss": 5.8894, - "step": 26575 - }, - { - "epoch": 13.859713168187744, - "grad_norm": 1.5169113874435425, - "learning_rate": 7.380100502512564e-05, - "loss": 4.8846, - "step": 26576 - }, - { - "epoch": 13.860234680573663, - "grad_norm": 1.3853192329406738, - "learning_rate": 7.38e-05, - "loss": 5.49, - "step": 26577 - }, - { - "epoch": 13.860756192959583, - "grad_norm": 1.4637190103530884, - "learning_rate": 7.379899497487438e-05, - "loss": 5.5796, - "step": 26578 - }, - { - "epoch": 13.861277705345502, - "grad_norm": 1.546027421951294, - "learning_rate": 7.379798994974875e-05, - "loss": 5.269, - "step": 26579 - }, - { - "epoch": 13.861799217731422, - "grad_norm": 1.3591747283935547, - "learning_rate": 7.379698492462311e-05, - "loss": 5.9401, - "step": 26580 - }, - { - "epoch": 13.86232073011734, - "grad_norm": 1.3200143575668335, - "learning_rate": 7.379597989949749e-05, - "loss": 5.6316, - "step": 26581 - }, - { - "epoch": 13.862842242503259, - "grad_norm": 1.3101123571395874, - "learning_rate": 7.379497487437185e-05, - "loss": 5.2788, - "step": 26582 - }, - { - "epoch": 13.863363754889178, - "grad_norm": 1.4713033437728882, - "learning_rate": 7.379396984924623e-05, - "loss": 5.3545, - "step": 26583 - }, - { - "epoch": 13.863885267275098, - "grad_norm": 1.4868366718292236, - "learning_rate": 7.379296482412061e-05, - "loss": 5.1477, - "step": 26584 - }, - { - "epoch": 13.864406779661017, - "grad_norm": 1.383829951286316, - "learning_rate": 7.379195979899499e-05, - "loss": 5.5549, - "step": 26585 - }, - { - "epoch": 13.864928292046937, - "grad_norm": 1.3996530771255493, - "learning_rate": 7.379095477386935e-05, - "loss": 5.8232, - "step": 26586 - }, - { - "epoch": 13.865449804432854, - "grad_norm": 1.4808762073516846, - "learning_rate": 7.378994974874373e-05, - "loss": 5.2201, - "step": 26587 - }, - { - "epoch": 13.865971316818774, - "grad_norm": 1.4125200510025024, - "learning_rate": 7.37889447236181e-05, - "loss": 5.5388, - "step": 26588 - }, - { - "epoch": 13.866492829204693, - "grad_norm": 1.4406697750091553, - "learning_rate": 7.378793969849247e-05, - "loss": 5.6218, - "step": 26589 - }, - { - "epoch": 13.867014341590613, - "grad_norm": 1.3532670736312866, - "learning_rate": 7.378693467336684e-05, - "loss": 5.6486, - "step": 26590 - }, - { - "epoch": 13.867535853976532, - "grad_norm": 1.4575310945510864, - "learning_rate": 7.378592964824121e-05, - "loss": 4.9575, - "step": 26591 - }, - { - "epoch": 13.868057366362452, - "grad_norm": 1.4450225830078125, - "learning_rate": 7.378492462311558e-05, - "loss": 5.365, - "step": 26592 - }, - { - "epoch": 13.86857887874837, - "grad_norm": 1.4031126499176025, - "learning_rate": 7.378391959798994e-05, - "loss": 5.6124, - "step": 26593 - }, - { - "epoch": 13.869100391134289, - "grad_norm": 1.5389498472213745, - "learning_rate": 7.378291457286432e-05, - "loss": 5.2427, - "step": 26594 - }, - { - "epoch": 13.869621903520208, - "grad_norm": 1.3555395603179932, - "learning_rate": 7.37819095477387e-05, - "loss": 5.0971, - "step": 26595 - }, - { - "epoch": 13.870143415906128, - "grad_norm": 1.465713620185852, - "learning_rate": 7.378090452261308e-05, - "loss": 5.5205, - "step": 26596 - }, - { - "epoch": 13.870664928292047, - "grad_norm": 1.4382522106170654, - "learning_rate": 7.377989949748744e-05, - "loss": 5.1945, - "step": 26597 - }, - { - "epoch": 13.871186440677967, - "grad_norm": 1.5855571031570435, - "learning_rate": 7.377889447236182e-05, - "loss": 5.4128, - "step": 26598 - }, - { - "epoch": 13.871707953063884, - "grad_norm": 1.4720386266708374, - "learning_rate": 7.377788944723618e-05, - "loss": 5.4909, - "step": 26599 - }, - { - "epoch": 13.872229465449804, - "grad_norm": 1.404062032699585, - "learning_rate": 7.377688442211056e-05, - "loss": 5.3406, - "step": 26600 - }, - { - "epoch": 13.872750977835723, - "grad_norm": 1.620447039604187, - "learning_rate": 7.377587939698492e-05, - "loss": 4.5968, - "step": 26601 - }, - { - "epoch": 13.873272490221643, - "grad_norm": 1.3941786289215088, - "learning_rate": 7.37748743718593e-05, - "loss": 5.2166, - "step": 26602 - }, - { - "epoch": 13.873794002607562, - "grad_norm": 1.5450648069381714, - "learning_rate": 7.377386934673367e-05, - "loss": 4.9481, - "step": 26603 - }, - { - "epoch": 13.874315514993482, - "grad_norm": 1.5950566530227661, - "learning_rate": 7.377286432160804e-05, - "loss": 4.8157, - "step": 26604 - }, - { - "epoch": 13.8748370273794, - "grad_norm": 1.5029428005218506, - "learning_rate": 7.377185929648242e-05, - "loss": 5.1117, - "step": 26605 - }, - { - "epoch": 13.875358539765319, - "grad_norm": 1.4175562858581543, - "learning_rate": 7.37708542713568e-05, - "loss": 5.6209, - "step": 26606 - }, - { - "epoch": 13.875880052151238, - "grad_norm": 1.4179774522781372, - "learning_rate": 7.376984924623116e-05, - "loss": 5.302, - "step": 26607 - }, - { - "epoch": 13.876401564537158, - "grad_norm": 1.432729959487915, - "learning_rate": 7.376884422110553e-05, - "loss": 5.6803, - "step": 26608 - }, - { - "epoch": 13.876923076923077, - "grad_norm": 1.5391219854354858, - "learning_rate": 7.37678391959799e-05, - "loss": 5.5157, - "step": 26609 - }, - { - "epoch": 13.877444589308997, - "grad_norm": 1.469746708869934, - "learning_rate": 7.376683417085427e-05, - "loss": 5.5035, - "step": 26610 - }, - { - "epoch": 13.877966101694915, - "grad_norm": 1.473394751548767, - "learning_rate": 7.376582914572865e-05, - "loss": 5.4138, - "step": 26611 - }, - { - "epoch": 13.878487614080834, - "grad_norm": 1.5536819696426392, - "learning_rate": 7.376482412060301e-05, - "loss": 5.176, - "step": 26612 - }, - { - "epoch": 13.879009126466753, - "grad_norm": 1.4682466983795166, - "learning_rate": 7.376381909547739e-05, - "loss": 5.367, - "step": 26613 - }, - { - "epoch": 13.879530638852673, - "grad_norm": 1.4385857582092285, - "learning_rate": 7.376281407035175e-05, - "loss": 5.2971, - "step": 26614 - }, - { - "epoch": 13.880052151238592, - "grad_norm": 1.482029676437378, - "learning_rate": 7.376180904522613e-05, - "loss": 5.1889, - "step": 26615 - }, - { - "epoch": 13.880573663624512, - "grad_norm": 1.427282452583313, - "learning_rate": 7.376080402010051e-05, - "loss": 5.3557, - "step": 26616 - }, - { - "epoch": 13.88109517601043, - "grad_norm": 1.4401918649673462, - "learning_rate": 7.375979899497489e-05, - "loss": 5.5741, - "step": 26617 - }, - { - "epoch": 13.881616688396349, - "grad_norm": 1.6312189102172852, - "learning_rate": 7.375879396984925e-05, - "loss": 5.3206, - "step": 26618 - }, - { - "epoch": 13.882138200782268, - "grad_norm": 1.4221608638763428, - "learning_rate": 7.375778894472363e-05, - "loss": 4.9026, - "step": 26619 - }, - { - "epoch": 13.882659713168188, - "grad_norm": 1.4329110383987427, - "learning_rate": 7.3756783919598e-05, - "loss": 5.3631, - "step": 26620 - }, - { - "epoch": 13.883181225554107, - "grad_norm": 1.452627420425415, - "learning_rate": 7.375577889447236e-05, - "loss": 5.2424, - "step": 26621 - }, - { - "epoch": 13.883702737940027, - "grad_norm": 1.323655128479004, - "learning_rate": 7.375477386934674e-05, - "loss": 5.9703, - "step": 26622 - }, - { - "epoch": 13.884224250325945, - "grad_norm": 1.5112659931182861, - "learning_rate": 7.37537688442211e-05, - "loss": 4.9537, - "step": 26623 - }, - { - "epoch": 13.884745762711864, - "grad_norm": 1.4555965662002563, - "learning_rate": 7.375276381909548e-05, - "loss": 5.4629, - "step": 26624 - }, - { - "epoch": 13.885267275097783, - "grad_norm": 1.733452320098877, - "learning_rate": 7.375175879396986e-05, - "loss": 4.9269, - "step": 26625 - }, - { - "epoch": 13.885788787483703, - "grad_norm": 1.4827934503555298, - "learning_rate": 7.375075376884423e-05, - "loss": 5.1021, - "step": 26626 - }, - { - "epoch": 13.886310299869622, - "grad_norm": 1.3795722723007202, - "learning_rate": 7.37497487437186e-05, - "loss": 5.2372, - "step": 26627 - }, - { - "epoch": 13.886831812255542, - "grad_norm": 1.4426764249801636, - "learning_rate": 7.374874371859298e-05, - "loss": 5.3715, - "step": 26628 - }, - { - "epoch": 13.88735332464146, - "grad_norm": 1.4397040605545044, - "learning_rate": 7.374773869346734e-05, - "loss": 5.6023, - "step": 26629 - }, - { - "epoch": 13.887874837027379, - "grad_norm": 1.3743141889572144, - "learning_rate": 7.374673366834172e-05, - "loss": 4.9576, - "step": 26630 - }, - { - "epoch": 13.888396349413298, - "grad_norm": 1.5843952894210815, - "learning_rate": 7.374572864321608e-05, - "loss": 5.3932, - "step": 26631 - }, - { - "epoch": 13.888917861799218, - "grad_norm": 1.4581767320632935, - "learning_rate": 7.374472361809046e-05, - "loss": 5.1086, - "step": 26632 - }, - { - "epoch": 13.889439374185137, - "grad_norm": 1.4186147451400757, - "learning_rate": 7.374371859296482e-05, - "loss": 5.5278, - "step": 26633 - }, - { - "epoch": 13.889960886571057, - "grad_norm": 1.4549742937088013, - "learning_rate": 7.374271356783919e-05, - "loss": 5.5259, - "step": 26634 - }, - { - "epoch": 13.890482398956975, - "grad_norm": 1.5221470594406128, - "learning_rate": 7.374170854271357e-05, - "loss": 5.3998, - "step": 26635 - }, - { - "epoch": 13.891003911342894, - "grad_norm": 1.5695905685424805, - "learning_rate": 7.374070351758794e-05, - "loss": 5.4041, - "step": 26636 - }, - { - "epoch": 13.891525423728813, - "grad_norm": 1.4858338832855225, - "learning_rate": 7.373969849246232e-05, - "loss": 5.6866, - "step": 26637 - }, - { - "epoch": 13.892046936114733, - "grad_norm": 1.5797772407531738, - "learning_rate": 7.373869346733669e-05, - "loss": 5.0802, - "step": 26638 - }, - { - "epoch": 13.892568448500652, - "grad_norm": 1.4593744277954102, - "learning_rate": 7.373768844221106e-05, - "loss": 5.2031, - "step": 26639 - }, - { - "epoch": 13.893089960886572, - "grad_norm": 1.37606680393219, - "learning_rate": 7.373668341708543e-05, - "loss": 5.6182, - "step": 26640 - }, - { - "epoch": 13.89361147327249, - "grad_norm": 1.4816226959228516, - "learning_rate": 7.37356783919598e-05, - "loss": 5.6605, - "step": 26641 - }, - { - "epoch": 13.894132985658409, - "grad_norm": 1.4543999433517456, - "learning_rate": 7.373467336683417e-05, - "loss": 5.1805, - "step": 26642 - }, - { - "epoch": 13.894654498044329, - "grad_norm": 1.4872883558273315, - "learning_rate": 7.373366834170855e-05, - "loss": 5.437, - "step": 26643 - }, - { - "epoch": 13.895176010430248, - "grad_norm": 1.4359242916107178, - "learning_rate": 7.373266331658291e-05, - "loss": 5.2835, - "step": 26644 - }, - { - "epoch": 13.895697522816167, - "grad_norm": 1.4933618307113647, - "learning_rate": 7.373165829145729e-05, - "loss": 5.3383, - "step": 26645 - }, - { - "epoch": 13.896219035202087, - "grad_norm": 1.5379003286361694, - "learning_rate": 7.373065326633167e-05, - "loss": 5.5382, - "step": 26646 - }, - { - "epoch": 13.896740547588005, - "grad_norm": 1.526021122932434, - "learning_rate": 7.372964824120603e-05, - "loss": 5.5964, - "step": 26647 - }, - { - "epoch": 13.897262059973924, - "grad_norm": 1.5674846172332764, - "learning_rate": 7.372864321608041e-05, - "loss": 5.7654, - "step": 26648 - }, - { - "epoch": 13.897783572359844, - "grad_norm": 1.75889253616333, - "learning_rate": 7.372763819095477e-05, - "loss": 4.7114, - "step": 26649 - }, - { - "epoch": 13.898305084745763, - "grad_norm": 1.3132448196411133, - "learning_rate": 7.372663316582915e-05, - "loss": 5.6897, - "step": 26650 - }, - { - "epoch": 13.898826597131682, - "grad_norm": 1.3682386875152588, - "learning_rate": 7.372562814070352e-05, - "loss": 5.9535, - "step": 26651 - }, - { - "epoch": 13.8993481095176, - "grad_norm": 1.4459233283996582, - "learning_rate": 7.37246231155779e-05, - "loss": 5.6753, - "step": 26652 - }, - { - "epoch": 13.89986962190352, - "grad_norm": 1.4184913635253906, - "learning_rate": 7.372361809045226e-05, - "loss": 5.2294, - "step": 26653 - }, - { - "epoch": 13.900391134289439, - "grad_norm": 1.4373600482940674, - "learning_rate": 7.372261306532664e-05, - "loss": 5.5242, - "step": 26654 - }, - { - "epoch": 13.900912646675359, - "grad_norm": 1.4687817096710205, - "learning_rate": 7.3721608040201e-05, - "loss": 5.2158, - "step": 26655 - }, - { - "epoch": 13.901434159061278, - "grad_norm": 1.3878799676895142, - "learning_rate": 7.372060301507538e-05, - "loss": 5.6253, - "step": 26656 - }, - { - "epoch": 13.901955671447197, - "grad_norm": 1.5440422296524048, - "learning_rate": 7.371959798994976e-05, - "loss": 5.387, - "step": 26657 - }, - { - "epoch": 13.902477183833117, - "grad_norm": 1.5765721797943115, - "learning_rate": 7.371859296482413e-05, - "loss": 5.3325, - "step": 26658 - }, - { - "epoch": 13.902998696219035, - "grad_norm": 1.5343388319015503, - "learning_rate": 7.37175879396985e-05, - "loss": 4.8661, - "step": 26659 - }, - { - "epoch": 13.903520208604954, - "grad_norm": 1.5436588525772095, - "learning_rate": 7.371658291457286e-05, - "loss": 5.3694, - "step": 26660 - }, - { - "epoch": 13.904041720990874, - "grad_norm": 1.7210440635681152, - "learning_rate": 7.371557788944724e-05, - "loss": 5.0818, - "step": 26661 - }, - { - "epoch": 13.904563233376793, - "grad_norm": 1.5256341695785522, - "learning_rate": 7.37145728643216e-05, - "loss": 5.2195, - "step": 26662 - }, - { - "epoch": 13.905084745762712, - "grad_norm": 1.4127004146575928, - "learning_rate": 7.371356783919598e-05, - "loss": 5.7659, - "step": 26663 - }, - { - "epoch": 13.90560625814863, - "grad_norm": 1.4423679113388062, - "learning_rate": 7.371256281407035e-05, - "loss": 5.3089, - "step": 26664 - }, - { - "epoch": 13.90612777053455, - "grad_norm": 1.508985996246338, - "learning_rate": 7.371155778894473e-05, - "loss": 5.4576, - "step": 26665 - }, - { - "epoch": 13.906649282920469, - "grad_norm": 1.4204457998275757, - "learning_rate": 7.371055276381909e-05, - "loss": 5.3685, - "step": 26666 - }, - { - "epoch": 13.907170795306389, - "grad_norm": 1.494886875152588, - "learning_rate": 7.370954773869347e-05, - "loss": 5.2995, - "step": 26667 - }, - { - "epoch": 13.907692307692308, - "grad_norm": 1.42082679271698, - "learning_rate": 7.370854271356785e-05, - "loss": 5.3842, - "step": 26668 - }, - { - "epoch": 13.908213820078227, - "grad_norm": 1.4341078996658325, - "learning_rate": 7.370753768844222e-05, - "loss": 5.5901, - "step": 26669 - }, - { - "epoch": 13.908735332464147, - "grad_norm": 1.5035512447357178, - "learning_rate": 7.370653266331659e-05, - "loss": 5.7014, - "step": 26670 - }, - { - "epoch": 13.909256844850065, - "grad_norm": 1.4467698335647583, - "learning_rate": 7.370552763819097e-05, - "loss": 5.3172, - "step": 26671 - }, - { - "epoch": 13.909778357235984, - "grad_norm": 1.6182844638824463, - "learning_rate": 7.370452261306533e-05, - "loss": 5.3316, - "step": 26672 - }, - { - "epoch": 13.910299869621904, - "grad_norm": 1.3526809215545654, - "learning_rate": 7.37035175879397e-05, - "loss": 5.2553, - "step": 26673 - }, - { - "epoch": 13.910821382007823, - "grad_norm": 1.5579122304916382, - "learning_rate": 7.370251256281407e-05, - "loss": 4.9359, - "step": 26674 - }, - { - "epoch": 13.911342894393742, - "grad_norm": 1.5064327716827393, - "learning_rate": 7.370150753768844e-05, - "loss": 4.5857, - "step": 26675 - }, - { - "epoch": 13.91186440677966, - "grad_norm": 1.452307939529419, - "learning_rate": 7.370050251256281e-05, - "loss": 5.5639, - "step": 26676 - }, - { - "epoch": 13.91238591916558, - "grad_norm": 1.479504942893982, - "learning_rate": 7.369949748743719e-05, - "loss": 5.7205, - "step": 26677 - }, - { - "epoch": 13.9129074315515, - "grad_norm": 1.3828312158584595, - "learning_rate": 7.369849246231157e-05, - "loss": 5.8899, - "step": 26678 - }, - { - "epoch": 13.913428943937419, - "grad_norm": 1.4434571266174316, - "learning_rate": 7.369748743718593e-05, - "loss": 5.5319, - "step": 26679 - }, - { - "epoch": 13.913950456323338, - "grad_norm": 1.5244044065475464, - "learning_rate": 7.369648241206031e-05, - "loss": 5.0372, - "step": 26680 - }, - { - "epoch": 13.914471968709258, - "grad_norm": 1.73919677734375, - "learning_rate": 7.369547738693468e-05, - "loss": 4.6015, - "step": 26681 - }, - { - "epoch": 13.914993481095175, - "grad_norm": 1.4555275440216064, - "learning_rate": 7.369447236180905e-05, - "loss": 5.7343, - "step": 26682 - }, - { - "epoch": 13.915514993481095, - "grad_norm": 1.5297082662582397, - "learning_rate": 7.369346733668342e-05, - "loss": 5.2371, - "step": 26683 - }, - { - "epoch": 13.916036505867014, - "grad_norm": 1.4727615118026733, - "learning_rate": 7.36924623115578e-05, - "loss": 5.3155, - "step": 26684 - }, - { - "epoch": 13.916558018252934, - "grad_norm": 1.4350155591964722, - "learning_rate": 7.369145728643216e-05, - "loss": 5.2208, - "step": 26685 - }, - { - "epoch": 13.917079530638853, - "grad_norm": 1.4707037210464478, - "learning_rate": 7.369045226130652e-05, - "loss": 5.6404, - "step": 26686 - }, - { - "epoch": 13.917601043024773, - "grad_norm": 1.3821433782577515, - "learning_rate": 7.36894472361809e-05, - "loss": 5.3282, - "step": 26687 - }, - { - "epoch": 13.91812255541069, - "grad_norm": 1.4766658544540405, - "learning_rate": 7.368844221105528e-05, - "loss": 5.226, - "step": 26688 - }, - { - "epoch": 13.91864406779661, - "grad_norm": 1.45279061794281, - "learning_rate": 7.368743718592966e-05, - "loss": 5.5047, - "step": 26689 - }, - { - "epoch": 13.91916558018253, - "grad_norm": 1.6684800386428833, - "learning_rate": 7.368643216080402e-05, - "loss": 4.6905, - "step": 26690 - }, - { - "epoch": 13.919687092568449, - "grad_norm": 1.4954174757003784, - "learning_rate": 7.36854271356784e-05, - "loss": 5.4543, - "step": 26691 - }, - { - "epoch": 13.920208604954368, - "grad_norm": 1.4659150838851929, - "learning_rate": 7.368442211055276e-05, - "loss": 5.4317, - "step": 26692 - }, - { - "epoch": 13.920730117340288, - "grad_norm": 1.4975770711898804, - "learning_rate": 7.368341708542714e-05, - "loss": 4.7188, - "step": 26693 - }, - { - "epoch": 13.921251629726205, - "grad_norm": 1.443884253501892, - "learning_rate": 7.36824120603015e-05, - "loss": 5.264, - "step": 26694 - }, - { - "epoch": 13.921773142112125, - "grad_norm": 1.4936622381210327, - "learning_rate": 7.368140703517588e-05, - "loss": 4.5737, - "step": 26695 - }, - { - "epoch": 13.922294654498044, - "grad_norm": 1.4416905641555786, - "learning_rate": 7.368040201005025e-05, - "loss": 5.4038, - "step": 26696 - }, - { - "epoch": 13.922816166883964, - "grad_norm": 1.4603726863861084, - "learning_rate": 7.367939698492463e-05, - "loss": 5.3269, - "step": 26697 - }, - { - "epoch": 13.923337679269883, - "grad_norm": 1.415736436843872, - "learning_rate": 7.3678391959799e-05, - "loss": 5.6547, - "step": 26698 - }, - { - "epoch": 13.923859191655803, - "grad_norm": 1.369124174118042, - "learning_rate": 7.367738693467338e-05, - "loss": 5.216, - "step": 26699 - }, - { - "epoch": 13.92438070404172, - "grad_norm": 1.4672560691833496, - "learning_rate": 7.367638190954775e-05, - "loss": 5.2612, - "step": 26700 - }, - { - "epoch": 13.92490221642764, - "grad_norm": 1.4825761318206787, - "learning_rate": 7.367537688442211e-05, - "loss": 5.6364, - "step": 26701 - }, - { - "epoch": 13.92542372881356, - "grad_norm": 1.5184375047683716, - "learning_rate": 7.367437185929649e-05, - "loss": 5.167, - "step": 26702 - }, - { - "epoch": 13.925945241199479, - "grad_norm": 1.2806264162063599, - "learning_rate": 7.367336683417085e-05, - "loss": 5.7637, - "step": 26703 - }, - { - "epoch": 13.926466753585398, - "grad_norm": 1.5501829385757446, - "learning_rate": 7.367236180904523e-05, - "loss": 5.1534, - "step": 26704 - }, - { - "epoch": 13.926988265971318, - "grad_norm": 1.418771505355835, - "learning_rate": 7.36713567839196e-05, - "loss": 5.4883, - "step": 26705 - }, - { - "epoch": 13.927509778357235, - "grad_norm": 1.4486854076385498, - "learning_rate": 7.367035175879397e-05, - "loss": 5.3028, - "step": 26706 - }, - { - "epoch": 13.928031290743155, - "grad_norm": 1.523600697517395, - "learning_rate": 7.366934673366834e-05, - "loss": 5.1216, - "step": 26707 - }, - { - "epoch": 13.928552803129074, - "grad_norm": 1.5072319507598877, - "learning_rate": 7.366834170854271e-05, - "loss": 5.0388, - "step": 26708 - }, - { - "epoch": 13.929074315514994, - "grad_norm": 1.4231427907943726, - "learning_rate": 7.366733668341709e-05, - "loss": 5.6187, - "step": 26709 - }, - { - "epoch": 13.929595827900913, - "grad_norm": 1.4185776710510254, - "learning_rate": 7.366633165829147e-05, - "loss": 5.6172, - "step": 26710 - }, - { - "epoch": 13.930117340286833, - "grad_norm": 1.4876748323440552, - "learning_rate": 7.366532663316583e-05, - "loss": 5.4194, - "step": 26711 - }, - { - "epoch": 13.93063885267275, - "grad_norm": 1.5744749307632446, - "learning_rate": 7.366432160804021e-05, - "loss": 5.3129, - "step": 26712 - }, - { - "epoch": 13.93116036505867, - "grad_norm": 1.3311198949813843, - "learning_rate": 7.366331658291458e-05, - "loss": 5.8718, - "step": 26713 - }, - { - "epoch": 13.93168187744459, - "grad_norm": 1.3622770309448242, - "learning_rate": 7.366231155778894e-05, - "loss": 5.6396, - "step": 26714 - }, - { - "epoch": 13.932203389830509, - "grad_norm": 1.4100186824798584, - "learning_rate": 7.366130653266332e-05, - "loss": 5.6709, - "step": 26715 - }, - { - "epoch": 13.932724902216428, - "grad_norm": 1.4706860780715942, - "learning_rate": 7.366030150753768e-05, - "loss": 5.4392, - "step": 26716 - }, - { - "epoch": 13.933246414602348, - "grad_norm": 1.4863117933273315, - "learning_rate": 7.365929648241206e-05, - "loss": 5.2469, - "step": 26717 - }, - { - "epoch": 13.933767926988265, - "grad_norm": 1.4887430667877197, - "learning_rate": 7.365829145728644e-05, - "loss": 5.3699, - "step": 26718 - }, - { - "epoch": 13.934289439374185, - "grad_norm": 1.4002934694290161, - "learning_rate": 7.365728643216082e-05, - "loss": 5.4467, - "step": 26719 - }, - { - "epoch": 13.934810951760104, - "grad_norm": 1.3688433170318604, - "learning_rate": 7.365628140703518e-05, - "loss": 5.6979, - "step": 26720 - }, - { - "epoch": 13.935332464146024, - "grad_norm": 1.6244536638259888, - "learning_rate": 7.365527638190956e-05, - "loss": 5.6054, - "step": 26721 - }, - { - "epoch": 13.935853976531943, - "grad_norm": 1.5005621910095215, - "learning_rate": 7.365427135678392e-05, - "loss": 5.5437, - "step": 26722 - }, - { - "epoch": 13.936375488917863, - "grad_norm": 1.4373232126235962, - "learning_rate": 7.36532663316583e-05, - "loss": 5.4355, - "step": 26723 - }, - { - "epoch": 13.93689700130378, - "grad_norm": 1.4109933376312256, - "learning_rate": 7.365226130653266e-05, - "loss": 5.6648, - "step": 26724 - }, - { - "epoch": 13.9374185136897, - "grad_norm": 1.5422117710113525, - "learning_rate": 7.365125628140704e-05, - "loss": 5.7122, - "step": 26725 - }, - { - "epoch": 13.93794002607562, - "grad_norm": 1.554109811782837, - "learning_rate": 7.365025125628141e-05, - "loss": 5.3159, - "step": 26726 - }, - { - "epoch": 13.938461538461539, - "grad_norm": 1.569801688194275, - "learning_rate": 7.364924623115577e-05, - "loss": 5.3349, - "step": 26727 - }, - { - "epoch": 13.938983050847458, - "grad_norm": 1.417837381362915, - "learning_rate": 7.364824120603015e-05, - "loss": 5.4791, - "step": 26728 - }, - { - "epoch": 13.939504563233378, - "grad_norm": 1.2948740720748901, - "learning_rate": 7.364723618090453e-05, - "loss": 6.0277, - "step": 26729 - }, - { - "epoch": 13.940026075619295, - "grad_norm": 1.576796054840088, - "learning_rate": 7.36462311557789e-05, - "loss": 5.3382, - "step": 26730 - }, - { - "epoch": 13.940547588005215, - "grad_norm": 1.473318338394165, - "learning_rate": 7.364522613065327e-05, - "loss": 5.6456, - "step": 26731 - }, - { - "epoch": 13.941069100391134, - "grad_norm": 1.545254111289978, - "learning_rate": 7.364422110552765e-05, - "loss": 5.399, - "step": 26732 - }, - { - "epoch": 13.941590612777054, - "grad_norm": 1.5057916641235352, - "learning_rate": 7.364321608040201e-05, - "loss": 5.2219, - "step": 26733 - }, - { - "epoch": 13.942112125162973, - "grad_norm": 1.4794948101043701, - "learning_rate": 7.364221105527639e-05, - "loss": 5.6555, - "step": 26734 - }, - { - "epoch": 13.94263363754889, - "grad_norm": 1.4810484647750854, - "learning_rate": 7.364120603015075e-05, - "loss": 5.6945, - "step": 26735 - }, - { - "epoch": 13.94315514993481, - "grad_norm": 1.4121111631393433, - "learning_rate": 7.364020100502513e-05, - "loss": 5.1214, - "step": 26736 - }, - { - "epoch": 13.94367666232073, - "grad_norm": 1.3786876201629639, - "learning_rate": 7.36391959798995e-05, - "loss": 5.5324, - "step": 26737 - }, - { - "epoch": 13.94419817470665, - "grad_norm": 1.443158507347107, - "learning_rate": 7.363819095477387e-05, - "loss": 4.6538, - "step": 26738 - }, - { - "epoch": 13.944719687092569, - "grad_norm": 1.5720232725143433, - "learning_rate": 7.363718592964825e-05, - "loss": 5.1649, - "step": 26739 - }, - { - "epoch": 13.945241199478488, - "grad_norm": 1.4947254657745361, - "learning_rate": 7.363618090452262e-05, - "loss": 5.5885, - "step": 26740 - }, - { - "epoch": 13.945762711864408, - "grad_norm": 1.5446851253509521, - "learning_rate": 7.363517587939699e-05, - "loss": 5.1162, - "step": 26741 - }, - { - "epoch": 13.946284224250325, - "grad_norm": 1.4884545803070068, - "learning_rate": 7.363417085427136e-05, - "loss": 5.0191, - "step": 26742 - }, - { - "epoch": 13.946805736636245, - "grad_norm": 1.4477216005325317, - "learning_rate": 7.363316582914574e-05, - "loss": 5.0399, - "step": 26743 - }, - { - "epoch": 13.947327249022164, - "grad_norm": 1.45907461643219, - "learning_rate": 7.36321608040201e-05, - "loss": 5.3398, - "step": 26744 - }, - { - "epoch": 13.947848761408084, - "grad_norm": 1.5139464139938354, - "learning_rate": 7.363115577889448e-05, - "loss": 5.6116, - "step": 26745 - }, - { - "epoch": 13.948370273794003, - "grad_norm": 1.4166967868804932, - "learning_rate": 7.363015075376884e-05, - "loss": 5.7158, - "step": 26746 - }, - { - "epoch": 13.94889178617992, - "grad_norm": 1.455305814743042, - "learning_rate": 7.362914572864322e-05, - "loss": 5.5612, - "step": 26747 - }, - { - "epoch": 13.94941329856584, - "grad_norm": 1.5572513341903687, - "learning_rate": 7.362814070351758e-05, - "loss": 5.2216, - "step": 26748 - }, - { - "epoch": 13.94993481095176, - "grad_norm": 1.5442153215408325, - "learning_rate": 7.362713567839196e-05, - "loss": 5.6762, - "step": 26749 - }, - { - "epoch": 13.95045632333768, - "grad_norm": 1.4083895683288574, - "learning_rate": 7.362613065326634e-05, - "loss": 5.4785, - "step": 26750 - }, - { - "epoch": 13.950977835723599, - "grad_norm": 1.4755198955535889, - "learning_rate": 7.362512562814072e-05, - "loss": 5.0154, - "step": 26751 - }, - { - "epoch": 13.951499348109518, - "grad_norm": 1.6276192665100098, - "learning_rate": 7.362412060301508e-05, - "loss": 4.7048, - "step": 26752 - }, - { - "epoch": 13.952020860495438, - "grad_norm": 1.4662811756134033, - "learning_rate": 7.362311557788945e-05, - "loss": 5.126, - "step": 26753 - }, - { - "epoch": 13.952542372881355, - "grad_norm": 1.5883311033248901, - "learning_rate": 7.362211055276382e-05, - "loss": 5.1382, - "step": 26754 - }, - { - "epoch": 13.953063885267275, - "grad_norm": 1.3924087285995483, - "learning_rate": 7.362110552763819e-05, - "loss": 5.0355, - "step": 26755 - }, - { - "epoch": 13.953585397653194, - "grad_norm": 1.526879072189331, - "learning_rate": 7.362010050251257e-05, - "loss": 4.9037, - "step": 26756 - }, - { - "epoch": 13.954106910039114, - "grad_norm": 1.4299628734588623, - "learning_rate": 7.361909547738693e-05, - "loss": 5.533, - "step": 26757 - }, - { - "epoch": 13.954628422425033, - "grad_norm": 1.489676594734192, - "learning_rate": 7.361809045226131e-05, - "loss": 4.9036, - "step": 26758 - }, - { - "epoch": 13.955149934810951, - "grad_norm": 1.375650405883789, - "learning_rate": 7.361708542713569e-05, - "loss": 5.2429, - "step": 26759 - }, - { - "epoch": 13.95567144719687, - "grad_norm": 1.4780923128128052, - "learning_rate": 7.361608040201006e-05, - "loss": 5.6222, - "step": 26760 - }, - { - "epoch": 13.95619295958279, - "grad_norm": 1.4481362104415894, - "learning_rate": 7.361507537688443e-05, - "loss": 5.3446, - "step": 26761 - }, - { - "epoch": 13.95671447196871, - "grad_norm": 1.4543001651763916, - "learning_rate": 7.36140703517588e-05, - "loss": 5.8181, - "step": 26762 - }, - { - "epoch": 13.957235984354629, - "grad_norm": 1.606495976448059, - "learning_rate": 7.361306532663317e-05, - "loss": 5.1825, - "step": 26763 - }, - { - "epoch": 13.957757496740548, - "grad_norm": 1.4093849658966064, - "learning_rate": 7.361206030150755e-05, - "loss": 5.5873, - "step": 26764 - }, - { - "epoch": 13.958279009126468, - "grad_norm": 1.470317006111145, - "learning_rate": 7.361105527638191e-05, - "loss": 5.5007, - "step": 26765 - }, - { - "epoch": 13.958800521512385, - "grad_norm": 1.532192349433899, - "learning_rate": 7.361005025125628e-05, - "loss": 5.4882, - "step": 26766 - }, - { - "epoch": 13.959322033898305, - "grad_norm": 1.3733165264129639, - "learning_rate": 7.360904522613065e-05, - "loss": 5.7388, - "step": 26767 - }, - { - "epoch": 13.959843546284224, - "grad_norm": 1.5592191219329834, - "learning_rate": 7.360804020100502e-05, - "loss": 5.2777, - "step": 26768 - }, - { - "epoch": 13.960365058670144, - "grad_norm": 1.450516700744629, - "learning_rate": 7.36070351758794e-05, - "loss": 5.4359, - "step": 26769 - }, - { - "epoch": 13.960886571056063, - "grad_norm": 1.3780673742294312, - "learning_rate": 7.360603015075377e-05, - "loss": 5.6496, - "step": 26770 - }, - { - "epoch": 13.961408083441981, - "grad_norm": 1.3412578105926514, - "learning_rate": 7.360502512562815e-05, - "loss": 5.6529, - "step": 26771 - }, - { - "epoch": 13.9619295958279, - "grad_norm": 1.409651517868042, - "learning_rate": 7.360402010050252e-05, - "loss": 5.1035, - "step": 26772 - }, - { - "epoch": 13.96245110821382, - "grad_norm": 1.4353704452514648, - "learning_rate": 7.36030150753769e-05, - "loss": 5.5726, - "step": 26773 - }, - { - "epoch": 13.96297262059974, - "grad_norm": 1.4615349769592285, - "learning_rate": 7.360201005025126e-05, - "loss": 5.3564, - "step": 26774 - }, - { - "epoch": 13.963494132985659, - "grad_norm": 1.4256513118743896, - "learning_rate": 7.360100502512564e-05, - "loss": 5.4341, - "step": 26775 - }, - { - "epoch": 13.964015645371578, - "grad_norm": 1.4297988414764404, - "learning_rate": 7.36e-05, - "loss": 5.7102, - "step": 26776 - }, - { - "epoch": 13.964537157757496, - "grad_norm": 1.3601183891296387, - "learning_rate": 7.359899497487438e-05, - "loss": 5.6929, - "step": 26777 - }, - { - "epoch": 13.965058670143415, - "grad_norm": 1.4266067743301392, - "learning_rate": 7.359798994974874e-05, - "loss": 5.4361, - "step": 26778 - }, - { - "epoch": 13.965580182529335, - "grad_norm": 1.4265316724777222, - "learning_rate": 7.359698492462312e-05, - "loss": 5.0847, - "step": 26779 - }, - { - "epoch": 13.966101694915254, - "grad_norm": 1.4128801822662354, - "learning_rate": 7.35959798994975e-05, - "loss": 4.9898, - "step": 26780 - }, - { - "epoch": 13.966623207301174, - "grad_norm": 1.4600292444229126, - "learning_rate": 7.359497487437186e-05, - "loss": 5.1557, - "step": 26781 - }, - { - "epoch": 13.967144719687093, - "grad_norm": 1.4585713148117065, - "learning_rate": 7.359396984924624e-05, - "loss": 5.4017, - "step": 26782 - }, - { - "epoch": 13.967666232073011, - "grad_norm": 1.49857497215271, - "learning_rate": 7.35929648241206e-05, - "loss": 5.6955, - "step": 26783 - }, - { - "epoch": 13.96818774445893, - "grad_norm": 1.5361857414245605, - "learning_rate": 7.359195979899498e-05, - "loss": 5.3786, - "step": 26784 - }, - { - "epoch": 13.96870925684485, - "grad_norm": 1.3994060754776, - "learning_rate": 7.359095477386935e-05, - "loss": 5.5702, - "step": 26785 - }, - { - "epoch": 13.96923076923077, - "grad_norm": 1.4731388092041016, - "learning_rate": 7.358994974874372e-05, - "loss": 4.9833, - "step": 26786 - }, - { - "epoch": 13.969752281616689, - "grad_norm": 1.425644874572754, - "learning_rate": 7.358894472361809e-05, - "loss": 5.5767, - "step": 26787 - }, - { - "epoch": 13.970273794002608, - "grad_norm": 1.3357279300689697, - "learning_rate": 7.358793969849247e-05, - "loss": 5.3458, - "step": 26788 - }, - { - "epoch": 13.970795306388526, - "grad_norm": 1.2940394878387451, - "learning_rate": 7.358693467336683e-05, - "loss": 5.8294, - "step": 26789 - }, - { - "epoch": 13.971316818774445, - "grad_norm": 1.3942792415618896, - "learning_rate": 7.358592964824121e-05, - "loss": 5.4873, - "step": 26790 - }, - { - "epoch": 13.971838331160365, - "grad_norm": 1.3350924253463745, - "learning_rate": 7.358492462311559e-05, - "loss": 5.4439, - "step": 26791 - }, - { - "epoch": 13.972359843546284, - "grad_norm": 1.5189212560653687, - "learning_rate": 7.358391959798996e-05, - "loss": 5.3676, - "step": 26792 - }, - { - "epoch": 13.972881355932204, - "grad_norm": 1.4011837244033813, - "learning_rate": 7.358291457286433e-05, - "loss": 5.6413, - "step": 26793 - }, - { - "epoch": 13.973402868318123, - "grad_norm": 1.5772035121917725, - "learning_rate": 7.358190954773869e-05, - "loss": 5.3732, - "step": 26794 - }, - { - "epoch": 13.973924380704041, - "grad_norm": 1.5933120250701904, - "learning_rate": 7.358090452261307e-05, - "loss": 5.5655, - "step": 26795 - }, - { - "epoch": 13.97444589308996, - "grad_norm": 1.6761027574539185, - "learning_rate": 7.357989949748743e-05, - "loss": 4.6845, - "step": 26796 - }, - { - "epoch": 13.97496740547588, - "grad_norm": 1.4810163974761963, - "learning_rate": 7.357889447236181e-05, - "loss": 5.5088, - "step": 26797 - }, - { - "epoch": 13.9754889178618, - "grad_norm": 1.4113966226577759, - "learning_rate": 7.357788944723618e-05, - "loss": 5.7016, - "step": 26798 - }, - { - "epoch": 13.976010430247719, - "grad_norm": 1.4204628467559814, - "learning_rate": 7.357688442211055e-05, - "loss": 5.3871, - "step": 26799 - }, - { - "epoch": 13.976531942633638, - "grad_norm": 1.398368000984192, - "learning_rate": 7.357587939698493e-05, - "loss": 5.1118, - "step": 26800 - }, - { - "epoch": 13.977053455019556, - "grad_norm": 1.4315775632858276, - "learning_rate": 7.357487437185931e-05, - "loss": 5.1235, - "step": 26801 - }, - { - "epoch": 13.977574967405475, - "grad_norm": 1.500160813331604, - "learning_rate": 7.357386934673367e-05, - "loss": 4.802, - "step": 26802 - }, - { - "epoch": 13.978096479791395, - "grad_norm": 1.5232983827590942, - "learning_rate": 7.357286432160805e-05, - "loss": 5.4479, - "step": 26803 - }, - { - "epoch": 13.978617992177314, - "grad_norm": 1.513954520225525, - "learning_rate": 7.357185929648242e-05, - "loss": 5.6248, - "step": 26804 - }, - { - "epoch": 13.979139504563234, - "grad_norm": 1.526707649230957, - "learning_rate": 7.35708542713568e-05, - "loss": 5.3217, - "step": 26805 - }, - { - "epoch": 13.979661016949153, - "grad_norm": 1.5071063041687012, - "learning_rate": 7.356984924623116e-05, - "loss": 5.1963, - "step": 26806 - }, - { - "epoch": 13.980182529335071, - "grad_norm": 1.4635404348373413, - "learning_rate": 7.356884422110552e-05, - "loss": 5.5328, - "step": 26807 - }, - { - "epoch": 13.98070404172099, - "grad_norm": 1.4978973865509033, - "learning_rate": 7.35678391959799e-05, - "loss": 5.5122, - "step": 26808 - }, - { - "epoch": 13.98122555410691, - "grad_norm": 1.441070318222046, - "learning_rate": 7.356683417085427e-05, - "loss": 5.2107, - "step": 26809 - }, - { - "epoch": 13.98174706649283, - "grad_norm": 1.4981123208999634, - "learning_rate": 7.356582914572864e-05, - "loss": 5.1212, - "step": 26810 - }, - { - "epoch": 13.982268578878749, - "grad_norm": 1.3488099575042725, - "learning_rate": 7.356482412060302e-05, - "loss": 5.7533, - "step": 26811 - }, - { - "epoch": 13.982790091264668, - "grad_norm": 1.738165020942688, - "learning_rate": 7.35638190954774e-05, - "loss": 4.5023, - "step": 26812 - }, - { - "epoch": 13.983311603650586, - "grad_norm": 1.5288413763046265, - "learning_rate": 7.356281407035176e-05, - "loss": 4.8685, - "step": 26813 - }, - { - "epoch": 13.983833116036505, - "grad_norm": 1.494642972946167, - "learning_rate": 7.356180904522614e-05, - "loss": 5.5253, - "step": 26814 - }, - { - "epoch": 13.984354628422425, - "grad_norm": 1.4947139024734497, - "learning_rate": 7.35608040201005e-05, - "loss": 5.1878, - "step": 26815 - }, - { - "epoch": 13.984876140808344, - "grad_norm": 1.3864974975585938, - "learning_rate": 7.355979899497488e-05, - "loss": 5.6899, - "step": 26816 - }, - { - "epoch": 13.985397653194264, - "grad_norm": 1.5101107358932495, - "learning_rate": 7.355879396984925e-05, - "loss": 4.8469, - "step": 26817 - }, - { - "epoch": 13.985919165580183, - "grad_norm": 1.545830488204956, - "learning_rate": 7.355778894472362e-05, - "loss": 4.8856, - "step": 26818 - }, - { - "epoch": 13.986440677966101, - "grad_norm": 1.4211416244506836, - "learning_rate": 7.355678391959799e-05, - "loss": 5.4441, - "step": 26819 - }, - { - "epoch": 13.98696219035202, - "grad_norm": 1.5769248008728027, - "learning_rate": 7.355577889447237e-05, - "loss": 5.1471, - "step": 26820 - }, - { - "epoch": 13.98748370273794, - "grad_norm": 1.4263787269592285, - "learning_rate": 7.355477386934674e-05, - "loss": 5.8847, - "step": 26821 - }, - { - "epoch": 13.98800521512386, - "grad_norm": 1.4535173177719116, - "learning_rate": 7.355376884422111e-05, - "loss": 5.1169, - "step": 26822 - }, - { - "epoch": 13.988526727509779, - "grad_norm": 1.378038763999939, - "learning_rate": 7.355276381909549e-05, - "loss": 5.845, - "step": 26823 - }, - { - "epoch": 13.989048239895698, - "grad_norm": 1.5194714069366455, - "learning_rate": 7.355175879396985e-05, - "loss": 5.2315, - "step": 26824 - }, - { - "epoch": 13.989569752281616, - "grad_norm": 1.5826665163040161, - "learning_rate": 7.355075376884423e-05, - "loss": 5.3273, - "step": 26825 - }, - { - "epoch": 13.990091264667535, - "grad_norm": 1.4472769498825073, - "learning_rate": 7.35497487437186e-05, - "loss": 5.8343, - "step": 26826 - }, - { - "epoch": 13.990612777053455, - "grad_norm": 1.6252646446228027, - "learning_rate": 7.354874371859297e-05, - "loss": 5.0606, - "step": 26827 - }, - { - "epoch": 13.991134289439374, - "grad_norm": 1.4935024976730347, - "learning_rate": 7.354773869346734e-05, - "loss": 5.8001, - "step": 26828 - }, - { - "epoch": 13.991655801825294, - "grad_norm": 1.4202709197998047, - "learning_rate": 7.354673366834171e-05, - "loss": 5.4945, - "step": 26829 - }, - { - "epoch": 13.992177314211212, - "grad_norm": 1.4242334365844727, - "learning_rate": 7.354572864321608e-05, - "loss": 4.8596, - "step": 26830 - }, - { - "epoch": 13.992698826597131, - "grad_norm": 1.4604341983795166, - "learning_rate": 7.354472361809046e-05, - "loss": 5.1734, - "step": 26831 - }, - { - "epoch": 13.99322033898305, - "grad_norm": 1.376772403717041, - "learning_rate": 7.354371859296483e-05, - "loss": 5.0823, - "step": 26832 - }, - { - "epoch": 13.99374185136897, - "grad_norm": 1.376705527305603, - "learning_rate": 7.35427135678392e-05, - "loss": 5.5001, - "step": 26833 - }, - { - "epoch": 13.99426336375489, - "grad_norm": 1.5069876909255981, - "learning_rate": 7.354170854271358e-05, - "loss": 5.5931, - "step": 26834 - }, - { - "epoch": 13.994784876140809, - "grad_norm": 1.488939642906189, - "learning_rate": 7.354070351758794e-05, - "loss": 4.9725, - "step": 26835 - }, - { - "epoch": 13.995306388526728, - "grad_norm": 1.4175881147384644, - "learning_rate": 7.353969849246232e-05, - "loss": 5.4144, - "step": 26836 - }, - { - "epoch": 13.995827900912646, - "grad_norm": 1.463783860206604, - "learning_rate": 7.353869346733668e-05, - "loss": 5.1871, - "step": 26837 - }, - { - "epoch": 13.996349413298566, - "grad_norm": 1.5515037775039673, - "learning_rate": 7.353768844221106e-05, - "loss": 5.3532, - "step": 26838 - }, - { - "epoch": 13.996870925684485, - "grad_norm": 1.4369971752166748, - "learning_rate": 7.353668341708542e-05, - "loss": 5.2837, - "step": 26839 - }, - { - "epoch": 13.997392438070404, - "grad_norm": 1.449455976486206, - "learning_rate": 7.35356783919598e-05, - "loss": 5.04, - "step": 26840 - }, - { - "epoch": 13.997913950456324, - "grad_norm": 1.4751110076904297, - "learning_rate": 7.353467336683417e-05, - "loss": 4.8468, - "step": 26841 - }, - { - "epoch": 13.998435462842242, - "grad_norm": 1.3613299131393433, - "learning_rate": 7.353366834170854e-05, - "loss": 5.6348, - "step": 26842 - }, - { - "epoch": 13.998956975228161, - "grad_norm": 1.4341375827789307, - "learning_rate": 7.353266331658292e-05, - "loss": 5.4747, - "step": 26843 - }, - { - "epoch": 13.99947848761408, - "grad_norm": 1.3866671323776245, - "learning_rate": 7.35316582914573e-05, - "loss": 5.5033, - "step": 26844 - }, - { - "epoch": 14.0, - "grad_norm": 1.6326735019683838, - "learning_rate": 7.353065326633166e-05, - "loss": 5.1622, - "step": 26845 - }, - { - "epoch": 14.00052151238592, - "grad_norm": 1.4589935541152954, - "learning_rate": 7.352964824120603e-05, - "loss": 4.7231, - "step": 26846 - }, - { - "epoch": 14.001043024771839, - "grad_norm": 1.4754462242126465, - "learning_rate": 7.35286432160804e-05, - "loss": 5.3489, - "step": 26847 - }, - { - "epoch": 14.001564537157757, - "grad_norm": 1.3962643146514893, - "learning_rate": 7.352763819095477e-05, - "loss": 5.5245, - "step": 26848 - }, - { - "epoch": 14.002086049543676, - "grad_norm": 1.4796777963638306, - "learning_rate": 7.352663316582915e-05, - "loss": 5.418, - "step": 26849 - }, - { - "epoch": 14.002607561929596, - "grad_norm": 1.4989272356033325, - "learning_rate": 7.352562814070351e-05, - "loss": 5.4504, - "step": 26850 - }, - { - "epoch": 14.003129074315515, - "grad_norm": 1.5473209619522095, - "learning_rate": 7.352462311557789e-05, - "loss": 5.5937, - "step": 26851 - }, - { - "epoch": 14.003650586701434, - "grad_norm": 1.580489993095398, - "learning_rate": 7.352361809045227e-05, - "loss": 4.5548, - "step": 26852 - }, - { - "epoch": 14.004172099087354, - "grad_norm": 1.5011030435562134, - "learning_rate": 7.352261306532665e-05, - "loss": 5.5232, - "step": 26853 - }, - { - "epoch": 14.004693611473272, - "grad_norm": 1.3724726438522339, - "learning_rate": 7.352160804020101e-05, - "loss": 5.619, - "step": 26854 - }, - { - "epoch": 14.005215123859191, - "grad_norm": 1.4519985914230347, - "learning_rate": 7.352060301507539e-05, - "loss": 5.6839, - "step": 26855 - }, - { - "epoch": 14.00573663624511, - "grad_norm": 1.4497004747390747, - "learning_rate": 7.351959798994975e-05, - "loss": 4.9564, - "step": 26856 - }, - { - "epoch": 14.00625814863103, - "grad_norm": 1.7752814292907715, - "learning_rate": 7.351859296482413e-05, - "loss": 4.8688, - "step": 26857 - }, - { - "epoch": 14.00677966101695, - "grad_norm": 1.3969091176986694, - "learning_rate": 7.35175879396985e-05, - "loss": 5.7232, - "step": 26858 - }, - { - "epoch": 14.007301173402869, - "grad_norm": 1.4468841552734375, - "learning_rate": 7.351658291457286e-05, - "loss": 5.5508, - "step": 26859 - }, - { - "epoch": 14.007822685788787, - "grad_norm": 1.3771315813064575, - "learning_rate": 7.351557788944724e-05, - "loss": 5.4213, - "step": 26860 - }, - { - "epoch": 14.008344198174706, - "grad_norm": 1.392442226409912, - "learning_rate": 7.35145728643216e-05, - "loss": 5.705, - "step": 26861 - }, - { - "epoch": 14.008865710560626, - "grad_norm": 1.4163898229599, - "learning_rate": 7.351356783919598e-05, - "loss": 5.6212, - "step": 26862 - }, - { - "epoch": 14.009387222946545, - "grad_norm": 1.455457091331482, - "learning_rate": 7.351256281407036e-05, - "loss": 5.5424, - "step": 26863 - }, - { - "epoch": 14.009908735332465, - "grad_norm": 1.3963004350662231, - "learning_rate": 7.351155778894473e-05, - "loss": 5.2199, - "step": 26864 - }, - { - "epoch": 14.010430247718384, - "grad_norm": 1.3810863494873047, - "learning_rate": 7.35105527638191e-05, - "loss": 5.539, - "step": 26865 - }, - { - "epoch": 14.010951760104302, - "grad_norm": 1.4181419610977173, - "learning_rate": 7.350954773869348e-05, - "loss": 5.3802, - "step": 26866 - }, - { - "epoch": 14.011473272490221, - "grad_norm": 1.458590030670166, - "learning_rate": 7.350854271356784e-05, - "loss": 5.6133, - "step": 26867 - }, - { - "epoch": 14.01199478487614, - "grad_norm": 1.4504090547561646, - "learning_rate": 7.350753768844222e-05, - "loss": 5.3067, - "step": 26868 - }, - { - "epoch": 14.01251629726206, - "grad_norm": 1.3107978105545044, - "learning_rate": 7.350653266331658e-05, - "loss": 5.945, - "step": 26869 - }, - { - "epoch": 14.01303780964798, - "grad_norm": 1.620133638381958, - "learning_rate": 7.350552763819096e-05, - "loss": 4.925, - "step": 26870 - }, - { - "epoch": 14.013559322033899, - "grad_norm": 1.615128993988037, - "learning_rate": 7.350452261306532e-05, - "loss": 4.9392, - "step": 26871 - }, - { - "epoch": 14.014080834419817, - "grad_norm": 1.477499008178711, - "learning_rate": 7.35035175879397e-05, - "loss": 5.5536, - "step": 26872 - }, - { - "epoch": 14.014602346805736, - "grad_norm": 1.7378100156784058, - "learning_rate": 7.350251256281408e-05, - "loss": 4.9507, - "step": 26873 - }, - { - "epoch": 14.015123859191656, - "grad_norm": 1.4452301263809204, - "learning_rate": 7.350150753768844e-05, - "loss": 5.0245, - "step": 26874 - }, - { - "epoch": 14.015645371577575, - "grad_norm": 1.4544755220413208, - "learning_rate": 7.350050251256282e-05, - "loss": 4.8899, - "step": 26875 - }, - { - "epoch": 14.016166883963495, - "grad_norm": 1.617704153060913, - "learning_rate": 7.349949748743719e-05, - "loss": 5.1211, - "step": 26876 - }, - { - "epoch": 14.016688396349414, - "grad_norm": 1.4898455142974854, - "learning_rate": 7.349849246231156e-05, - "loss": 5.1897, - "step": 26877 - }, - { - "epoch": 14.017209908735332, - "grad_norm": 1.4862133264541626, - "learning_rate": 7.349748743718593e-05, - "loss": 5.4621, - "step": 26878 - }, - { - "epoch": 14.017731421121251, - "grad_norm": 1.5044457912445068, - "learning_rate": 7.34964824120603e-05, - "loss": 5.2371, - "step": 26879 - }, - { - "epoch": 14.01825293350717, - "grad_norm": 1.4316030740737915, - "learning_rate": 7.349547738693467e-05, - "loss": 5.7105, - "step": 26880 - }, - { - "epoch": 14.01877444589309, - "grad_norm": 1.6181401014328003, - "learning_rate": 7.349447236180905e-05, - "loss": 5.4356, - "step": 26881 - }, - { - "epoch": 14.01929595827901, - "grad_norm": 1.539232611656189, - "learning_rate": 7.349346733668341e-05, - "loss": 5.3405, - "step": 26882 - }, - { - "epoch": 14.019817470664929, - "grad_norm": 1.4436768293380737, - "learning_rate": 7.349246231155779e-05, - "loss": 5.5902, - "step": 26883 - }, - { - "epoch": 14.020338983050847, - "grad_norm": 1.387526512145996, - "learning_rate": 7.349145728643217e-05, - "loss": 5.5701, - "step": 26884 - }, - { - "epoch": 14.020860495436766, - "grad_norm": 1.3931430578231812, - "learning_rate": 7.349045226130655e-05, - "loss": 5.6088, - "step": 26885 - }, - { - "epoch": 14.021382007822686, - "grad_norm": 1.453688144683838, - "learning_rate": 7.348944723618091e-05, - "loss": 5.5893, - "step": 26886 - }, - { - "epoch": 14.021903520208605, - "grad_norm": 1.371160864830017, - "learning_rate": 7.348844221105527e-05, - "loss": 4.6221, - "step": 26887 - }, - { - "epoch": 14.022425032594525, - "grad_norm": 1.42741858959198, - "learning_rate": 7.348743718592965e-05, - "loss": 5.5143, - "step": 26888 - }, - { - "epoch": 14.022946544980444, - "grad_norm": 1.4866973161697388, - "learning_rate": 7.348643216080402e-05, - "loss": 5.4048, - "step": 26889 - }, - { - "epoch": 14.023468057366362, - "grad_norm": 1.3564447164535522, - "learning_rate": 7.34854271356784e-05, - "loss": 5.5835, - "step": 26890 - }, - { - "epoch": 14.023989569752281, - "grad_norm": 1.4545187950134277, - "learning_rate": 7.348442211055276e-05, - "loss": 5.4094, - "step": 26891 - }, - { - "epoch": 14.0245110821382, - "grad_norm": 1.526732087135315, - "learning_rate": 7.348341708542714e-05, - "loss": 5.3082, - "step": 26892 - }, - { - "epoch": 14.02503259452412, - "grad_norm": 1.4828232526779175, - "learning_rate": 7.348241206030151e-05, - "loss": 5.2854, - "step": 26893 - }, - { - "epoch": 14.02555410691004, - "grad_norm": 1.4959583282470703, - "learning_rate": 7.348140703517589e-05, - "loss": 5.4689, - "step": 26894 - }, - { - "epoch": 14.026075619295959, - "grad_norm": 1.4909706115722656, - "learning_rate": 7.348040201005026e-05, - "loss": 5.609, - "step": 26895 - }, - { - "epoch": 14.026597131681877, - "grad_norm": 1.4228639602661133, - "learning_rate": 7.347939698492463e-05, - "loss": 5.7586, - "step": 26896 - }, - { - "epoch": 14.027118644067796, - "grad_norm": 1.456133246421814, - "learning_rate": 7.3478391959799e-05, - "loss": 5.5625, - "step": 26897 - }, - { - "epoch": 14.027640156453716, - "grad_norm": 1.423514485359192, - "learning_rate": 7.347738693467338e-05, - "loss": 5.1824, - "step": 26898 - }, - { - "epoch": 14.028161668839635, - "grad_norm": 1.41177499294281, - "learning_rate": 7.347638190954774e-05, - "loss": 5.5657, - "step": 26899 - }, - { - "epoch": 14.028683181225555, - "grad_norm": 1.4766080379486084, - "learning_rate": 7.34753768844221e-05, - "loss": 5.5209, - "step": 26900 - }, - { - "epoch": 14.029204693611474, - "grad_norm": 1.455452799797058, - "learning_rate": 7.347437185929648e-05, - "loss": 4.7298, - "step": 26901 - }, - { - "epoch": 14.029726205997392, - "grad_norm": 1.4418785572052002, - "learning_rate": 7.347336683417085e-05, - "loss": 5.4136, - "step": 26902 - }, - { - "epoch": 14.030247718383311, - "grad_norm": 1.3936691284179688, - "learning_rate": 7.347236180904523e-05, - "loss": 5.2357, - "step": 26903 - }, - { - "epoch": 14.03076923076923, - "grad_norm": 1.4701529741287231, - "learning_rate": 7.34713567839196e-05, - "loss": 5.6636, - "step": 26904 - }, - { - "epoch": 14.03129074315515, - "grad_norm": 1.4701812267303467, - "learning_rate": 7.347035175879398e-05, - "loss": 5.3333, - "step": 26905 - }, - { - "epoch": 14.03181225554107, - "grad_norm": 1.4231520891189575, - "learning_rate": 7.346934673366835e-05, - "loss": 5.7002, - "step": 26906 - }, - { - "epoch": 14.032333767926989, - "grad_norm": 1.5873533487319946, - "learning_rate": 7.346834170854272e-05, - "loss": 4.8229, - "step": 26907 - }, - { - "epoch": 14.032855280312907, - "grad_norm": 1.4588545560836792, - "learning_rate": 7.346733668341709e-05, - "loss": 5.3591, - "step": 26908 - }, - { - "epoch": 14.033376792698826, - "grad_norm": 1.4215220212936401, - "learning_rate": 7.346633165829147e-05, - "loss": 5.8304, - "step": 26909 - }, - { - "epoch": 14.033898305084746, - "grad_norm": 1.4498523473739624, - "learning_rate": 7.346532663316583e-05, - "loss": 5.0686, - "step": 26910 - }, - { - "epoch": 14.034419817470665, - "grad_norm": 1.4850677251815796, - "learning_rate": 7.346432160804021e-05, - "loss": 5.4009, - "step": 26911 - }, - { - "epoch": 14.034941329856585, - "grad_norm": 1.529175877571106, - "learning_rate": 7.346331658291457e-05, - "loss": 5.0259, - "step": 26912 - }, - { - "epoch": 14.035462842242504, - "grad_norm": 1.6054964065551758, - "learning_rate": 7.346231155778895e-05, - "loss": 5.5138, - "step": 26913 - }, - { - "epoch": 14.035984354628422, - "grad_norm": 1.5308665037155151, - "learning_rate": 7.346130653266333e-05, - "loss": 5.4189, - "step": 26914 - }, - { - "epoch": 14.036505867014341, - "grad_norm": 1.5901618003845215, - "learning_rate": 7.346030150753769e-05, - "loss": 5.517, - "step": 26915 - }, - { - "epoch": 14.03702737940026, - "grad_norm": 1.518808364868164, - "learning_rate": 7.345929648241207e-05, - "loss": 5.6443, - "step": 26916 - }, - { - "epoch": 14.03754889178618, - "grad_norm": 1.5329805612564087, - "learning_rate": 7.345829145728643e-05, - "loss": 4.8866, - "step": 26917 - }, - { - "epoch": 14.0380704041721, - "grad_norm": 1.5338971614837646, - "learning_rate": 7.345728643216081e-05, - "loss": 5.6613, - "step": 26918 - }, - { - "epoch": 14.038591916558019, - "grad_norm": 1.458968162536621, - "learning_rate": 7.345628140703518e-05, - "loss": 5.3021, - "step": 26919 - }, - { - "epoch": 14.039113428943937, - "grad_norm": 1.3792682886123657, - "learning_rate": 7.345527638190955e-05, - "loss": 5.7242, - "step": 26920 - }, - { - "epoch": 14.039634941329856, - "grad_norm": 1.4627803564071655, - "learning_rate": 7.345427135678392e-05, - "loss": 5.4116, - "step": 26921 - }, - { - "epoch": 14.040156453715776, - "grad_norm": 1.5662236213684082, - "learning_rate": 7.34532663316583e-05, - "loss": 5.4299, - "step": 26922 - }, - { - "epoch": 14.040677966101695, - "grad_norm": 1.3735179901123047, - "learning_rate": 7.345226130653266e-05, - "loss": 5.0843, - "step": 26923 - }, - { - "epoch": 14.041199478487615, - "grad_norm": 1.4943009614944458, - "learning_rate": 7.345125628140704e-05, - "loss": 5.4719, - "step": 26924 - }, - { - "epoch": 14.041720990873534, - "grad_norm": 1.4332152605056763, - "learning_rate": 7.345025125628142e-05, - "loss": 5.6289, - "step": 26925 - }, - { - "epoch": 14.042242503259452, - "grad_norm": 1.4733699560165405, - "learning_rate": 7.344924623115578e-05, - "loss": 5.3615, - "step": 26926 - }, - { - "epoch": 14.042764015645371, - "grad_norm": 1.380283236503601, - "learning_rate": 7.344824120603016e-05, - "loss": 5.5824, - "step": 26927 - }, - { - "epoch": 14.04328552803129, - "grad_norm": 1.629823923110962, - "learning_rate": 7.344723618090452e-05, - "loss": 5.243, - "step": 26928 - }, - { - "epoch": 14.04380704041721, - "grad_norm": 1.5369040966033936, - "learning_rate": 7.34462311557789e-05, - "loss": 5.4125, - "step": 26929 - }, - { - "epoch": 14.04432855280313, - "grad_norm": 1.4611072540283203, - "learning_rate": 7.344522613065326e-05, - "loss": 5.736, - "step": 26930 - }, - { - "epoch": 14.044850065189049, - "grad_norm": 1.578600525856018, - "learning_rate": 7.344422110552764e-05, - "loss": 4.8902, - "step": 26931 - }, - { - "epoch": 14.045371577574967, - "grad_norm": 1.4626238346099854, - "learning_rate": 7.3443216080402e-05, - "loss": 5.7985, - "step": 26932 - }, - { - "epoch": 14.045893089960886, - "grad_norm": 1.4488062858581543, - "learning_rate": 7.344221105527638e-05, - "loss": 5.4533, - "step": 26933 - }, - { - "epoch": 14.046414602346806, - "grad_norm": 1.3640638589859009, - "learning_rate": 7.344120603015076e-05, - "loss": 5.2329, - "step": 26934 - }, - { - "epoch": 14.046936114732725, - "grad_norm": 1.5202834606170654, - "learning_rate": 7.344020100502514e-05, - "loss": 5.6312, - "step": 26935 - }, - { - "epoch": 14.047457627118645, - "grad_norm": 1.466470718383789, - "learning_rate": 7.34391959798995e-05, - "loss": 5.6888, - "step": 26936 - }, - { - "epoch": 14.047979139504564, - "grad_norm": 1.472771167755127, - "learning_rate": 7.343819095477388e-05, - "loss": 5.5493, - "step": 26937 - }, - { - "epoch": 14.048500651890482, - "grad_norm": 1.4768167734146118, - "learning_rate": 7.343718592964825e-05, - "loss": 5.0305, - "step": 26938 - }, - { - "epoch": 14.049022164276401, - "grad_norm": 1.4908044338226318, - "learning_rate": 7.343618090452261e-05, - "loss": 5.1085, - "step": 26939 - }, - { - "epoch": 14.04954367666232, - "grad_norm": 1.4810231924057007, - "learning_rate": 7.343517587939699e-05, - "loss": 5.6078, - "step": 26940 - }, - { - "epoch": 14.05006518904824, - "grad_norm": 1.472760796546936, - "learning_rate": 7.343417085427135e-05, - "loss": 5.0588, - "step": 26941 - }, - { - "epoch": 14.05058670143416, - "grad_norm": 1.5106000900268555, - "learning_rate": 7.343316582914573e-05, - "loss": 5.3316, - "step": 26942 - }, - { - "epoch": 14.051108213820077, - "grad_norm": 1.6306064128875732, - "learning_rate": 7.34321608040201e-05, - "loss": 4.4783, - "step": 26943 - }, - { - "epoch": 14.051629726205997, - "grad_norm": 1.5006804466247559, - "learning_rate": 7.343115577889447e-05, - "loss": 5.2381, - "step": 26944 - }, - { - "epoch": 14.052151238591916, - "grad_norm": 1.566216230392456, - "learning_rate": 7.343015075376885e-05, - "loss": 5.0441, - "step": 26945 - }, - { - "epoch": 14.052672750977836, - "grad_norm": 1.5676963329315186, - "learning_rate": 7.342914572864323e-05, - "loss": 5.4666, - "step": 26946 - }, - { - "epoch": 14.053194263363755, - "grad_norm": 1.4918415546417236, - "learning_rate": 7.342814070351759e-05, - "loss": 5.9083, - "step": 26947 - }, - { - "epoch": 14.053715775749675, - "grad_norm": 1.537062168121338, - "learning_rate": 7.342713567839197e-05, - "loss": 5.2627, - "step": 26948 - }, - { - "epoch": 14.054237288135592, - "grad_norm": 1.420619010925293, - "learning_rate": 7.342613065326633e-05, - "loss": 5.7095, - "step": 26949 - }, - { - "epoch": 14.054758800521512, - "grad_norm": 1.5238409042358398, - "learning_rate": 7.342512562814071e-05, - "loss": 4.4825, - "step": 26950 - }, - { - "epoch": 14.055280312907431, - "grad_norm": 1.4303324222564697, - "learning_rate": 7.342412060301508e-05, - "loss": 5.4923, - "step": 26951 - }, - { - "epoch": 14.05580182529335, - "grad_norm": 1.526898980140686, - "learning_rate": 7.342311557788944e-05, - "loss": 4.7523, - "step": 26952 - }, - { - "epoch": 14.05632333767927, - "grad_norm": 1.4111510515213013, - "learning_rate": 7.342211055276382e-05, - "loss": 5.5588, - "step": 26953 - }, - { - "epoch": 14.05684485006519, - "grad_norm": 1.49845552444458, - "learning_rate": 7.34211055276382e-05, - "loss": 5.497, - "step": 26954 - }, - { - "epoch": 14.057366362451107, - "grad_norm": 1.407240390777588, - "learning_rate": 7.342010050251257e-05, - "loss": 5.5326, - "step": 26955 - }, - { - "epoch": 14.057887874837027, - "grad_norm": 1.5240910053253174, - "learning_rate": 7.341909547738694e-05, - "loss": 5.3038, - "step": 26956 - }, - { - "epoch": 14.058409387222946, - "grad_norm": 1.4933522939682007, - "learning_rate": 7.341809045226132e-05, - "loss": 5.11, - "step": 26957 - }, - { - "epoch": 14.058930899608866, - "grad_norm": 1.6219290494918823, - "learning_rate": 7.341708542713568e-05, - "loss": 5.0242, - "step": 26958 - }, - { - "epoch": 14.059452411994785, - "grad_norm": 1.6733415126800537, - "learning_rate": 7.341608040201006e-05, - "loss": 4.8423, - "step": 26959 - }, - { - "epoch": 14.059973924380705, - "grad_norm": 1.5590277910232544, - "learning_rate": 7.341507537688442e-05, - "loss": 5.7307, - "step": 26960 - }, - { - "epoch": 14.060495436766622, - "grad_norm": 1.3886915445327759, - "learning_rate": 7.34140703517588e-05, - "loss": 5.0253, - "step": 26961 - }, - { - "epoch": 14.061016949152542, - "grad_norm": 1.459175944328308, - "learning_rate": 7.341306532663316e-05, - "loss": 5.2531, - "step": 26962 - }, - { - "epoch": 14.061538461538461, - "grad_norm": 1.6267497539520264, - "learning_rate": 7.341206030150754e-05, - "loss": 4.8548, - "step": 26963 - }, - { - "epoch": 14.06205997392438, - "grad_norm": 1.505282998085022, - "learning_rate": 7.341105527638191e-05, - "loss": 4.7113, - "step": 26964 - }, - { - "epoch": 14.0625814863103, - "grad_norm": 1.5379111766815186, - "learning_rate": 7.341005025125628e-05, - "loss": 5.4143, - "step": 26965 - }, - { - "epoch": 14.06310299869622, - "grad_norm": 1.5097013711929321, - "learning_rate": 7.340904522613066e-05, - "loss": 5.3186, - "step": 26966 - }, - { - "epoch": 14.063624511082137, - "grad_norm": 1.4820483922958374, - "learning_rate": 7.340804020100503e-05, - "loss": 5.5865, - "step": 26967 - }, - { - "epoch": 14.064146023468057, - "grad_norm": 1.5100579261779785, - "learning_rate": 7.34070351758794e-05, - "loss": 5.347, - "step": 26968 - }, - { - "epoch": 14.064667535853976, - "grad_norm": 1.3259782791137695, - "learning_rate": 7.340603015075377e-05, - "loss": 5.5415, - "step": 26969 - }, - { - "epoch": 14.065189048239896, - "grad_norm": 1.3338228464126587, - "learning_rate": 7.340502512562815e-05, - "loss": 5.5601, - "step": 26970 - }, - { - "epoch": 14.065710560625815, - "grad_norm": 1.5008724927902222, - "learning_rate": 7.340402010050251e-05, - "loss": 5.2101, - "step": 26971 - }, - { - "epoch": 14.066232073011735, - "grad_norm": 1.5310075283050537, - "learning_rate": 7.340301507537689e-05, - "loss": 5.3626, - "step": 26972 - }, - { - "epoch": 14.066753585397652, - "grad_norm": 1.4345438480377197, - "learning_rate": 7.340201005025125e-05, - "loss": 5.1643, - "step": 26973 - }, - { - "epoch": 14.067275097783572, - "grad_norm": 1.4885064363479614, - "learning_rate": 7.340100502512563e-05, - "loss": 5.7009, - "step": 26974 - }, - { - "epoch": 14.067796610169491, - "grad_norm": 1.4291306734085083, - "learning_rate": 7.340000000000001e-05, - "loss": 5.3722, - "step": 26975 - }, - { - "epoch": 14.06831812255541, - "grad_norm": 1.4195934534072876, - "learning_rate": 7.339899497487439e-05, - "loss": 5.5797, - "step": 26976 - }, - { - "epoch": 14.06883963494133, - "grad_norm": 1.4768248796463013, - "learning_rate": 7.339798994974875e-05, - "loss": 4.9403, - "step": 26977 - }, - { - "epoch": 14.06936114732725, - "grad_norm": 1.5117374658584595, - "learning_rate": 7.339698492462313e-05, - "loss": 5.0273, - "step": 26978 - }, - { - "epoch": 14.069882659713167, - "grad_norm": 1.624394416809082, - "learning_rate": 7.339597989949749e-05, - "loss": 5.0392, - "step": 26979 - }, - { - "epoch": 14.070404172099087, - "grad_norm": 1.6045887470245361, - "learning_rate": 7.339497487437186e-05, - "loss": 4.7568, - "step": 26980 - }, - { - "epoch": 14.070925684485006, - "grad_norm": 1.4850027561187744, - "learning_rate": 7.339396984924624e-05, - "loss": 5.3149, - "step": 26981 - }, - { - "epoch": 14.071447196870926, - "grad_norm": 1.445961356163025, - "learning_rate": 7.33929648241206e-05, - "loss": 5.2994, - "step": 26982 - }, - { - "epoch": 14.071968709256845, - "grad_norm": 1.5436793565750122, - "learning_rate": 7.339195979899498e-05, - "loss": 5.6356, - "step": 26983 - }, - { - "epoch": 14.072490221642765, - "grad_norm": 1.358197808265686, - "learning_rate": 7.339095477386934e-05, - "loss": 4.7294, - "step": 26984 - }, - { - "epoch": 14.073011734028682, - "grad_norm": 1.4321110248565674, - "learning_rate": 7.338994974874372e-05, - "loss": 5.1181, - "step": 26985 - }, - { - "epoch": 14.073533246414602, - "grad_norm": 1.4105048179626465, - "learning_rate": 7.33889447236181e-05, - "loss": 5.5061, - "step": 26986 - }, - { - "epoch": 14.074054758800521, - "grad_norm": 1.3433825969696045, - "learning_rate": 7.338793969849247e-05, - "loss": 5.0372, - "step": 26987 - }, - { - "epoch": 14.07457627118644, - "grad_norm": 1.4765232801437378, - "learning_rate": 7.338693467336684e-05, - "loss": 5.5381, - "step": 26988 - }, - { - "epoch": 14.07509778357236, - "grad_norm": 1.53202486038208, - "learning_rate": 7.338592964824122e-05, - "loss": 5.4226, - "step": 26989 - }, - { - "epoch": 14.07561929595828, - "grad_norm": 1.353413701057434, - "learning_rate": 7.338492462311558e-05, - "loss": 5.6609, - "step": 26990 - }, - { - "epoch": 14.076140808344197, - "grad_norm": 1.4082856178283691, - "learning_rate": 7.338391959798996e-05, - "loss": 5.3151, - "step": 26991 - }, - { - "epoch": 14.076662320730117, - "grad_norm": 1.5153356790542603, - "learning_rate": 7.338291457286432e-05, - "loss": 5.1468, - "step": 26992 - }, - { - "epoch": 14.077183833116036, - "grad_norm": 1.5493944883346558, - "learning_rate": 7.338190954773869e-05, - "loss": 5.3373, - "step": 26993 - }, - { - "epoch": 14.077705345501956, - "grad_norm": 1.4763147830963135, - "learning_rate": 7.338090452261307e-05, - "loss": 5.4442, - "step": 26994 - }, - { - "epoch": 14.078226857887875, - "grad_norm": 1.4531883001327515, - "learning_rate": 7.337989949748743e-05, - "loss": 5.1473, - "step": 26995 - }, - { - "epoch": 14.078748370273795, - "grad_norm": 1.440550684928894, - "learning_rate": 7.337889447236181e-05, - "loss": 5.6688, - "step": 26996 - }, - { - "epoch": 14.079269882659712, - "grad_norm": 1.3294382095336914, - "learning_rate": 7.337788944723619e-05, - "loss": 5.3996, - "step": 26997 - }, - { - "epoch": 14.079791395045632, - "grad_norm": 1.4270083904266357, - "learning_rate": 7.337688442211056e-05, - "loss": 5.0207, - "step": 26998 - }, - { - "epoch": 14.080312907431551, - "grad_norm": 1.4045442342758179, - "learning_rate": 7.337587939698493e-05, - "loss": 5.524, - "step": 26999 - }, - { - "epoch": 14.08083441981747, - "grad_norm": 1.4707595109939575, - "learning_rate": 7.33748743718593e-05, - "loss": 5.3835, - "step": 27000 - }, - { - "epoch": 14.08135593220339, - "grad_norm": 1.510719895362854, - "learning_rate": 7.337386934673367e-05, - "loss": 5.2587, - "step": 27001 - }, - { - "epoch": 14.08187744458931, - "grad_norm": 1.5471960306167603, - "learning_rate": 7.337286432160805e-05, - "loss": 4.8126, - "step": 27002 - }, - { - "epoch": 14.082398956975227, - "grad_norm": 1.547533631324768, - "learning_rate": 7.337185929648241e-05, - "loss": 4.9195, - "step": 27003 - }, - { - "epoch": 14.082920469361147, - "grad_norm": 1.5873563289642334, - "learning_rate": 7.337085427135679e-05, - "loss": 5.1004, - "step": 27004 - }, - { - "epoch": 14.083441981747066, - "grad_norm": 1.5183711051940918, - "learning_rate": 7.336984924623115e-05, - "loss": 4.7681, - "step": 27005 - }, - { - "epoch": 14.083963494132986, - "grad_norm": 1.5216656923294067, - "learning_rate": 7.336884422110553e-05, - "loss": 5.5801, - "step": 27006 - }, - { - "epoch": 14.084485006518905, - "grad_norm": 1.4117413759231567, - "learning_rate": 7.336783919597991e-05, - "loss": 5.3513, - "step": 27007 - }, - { - "epoch": 14.085006518904825, - "grad_norm": 1.3514903783798218, - "learning_rate": 7.336683417085427e-05, - "loss": 5.5768, - "step": 27008 - }, - { - "epoch": 14.085528031290742, - "grad_norm": 1.4644232988357544, - "learning_rate": 7.336582914572865e-05, - "loss": 5.4679, - "step": 27009 - }, - { - "epoch": 14.086049543676662, - "grad_norm": 1.612889289855957, - "learning_rate": 7.336482412060302e-05, - "loss": 5.0104, - "step": 27010 - }, - { - "epoch": 14.086571056062581, - "grad_norm": 1.7338007688522339, - "learning_rate": 7.33638190954774e-05, - "loss": 4.8515, - "step": 27011 - }, - { - "epoch": 14.0870925684485, - "grad_norm": 1.7143433094024658, - "learning_rate": 7.336281407035176e-05, - "loss": 4.8111, - "step": 27012 - }, - { - "epoch": 14.08761408083442, - "grad_norm": 1.667224645614624, - "learning_rate": 7.336180904522614e-05, - "loss": 4.7538, - "step": 27013 - }, - { - "epoch": 14.08813559322034, - "grad_norm": 1.369659423828125, - "learning_rate": 7.33608040201005e-05, - "loss": 5.377, - "step": 27014 - }, - { - "epoch": 14.088657105606258, - "grad_norm": 1.4101405143737793, - "learning_rate": 7.335979899497488e-05, - "loss": 5.6342, - "step": 27015 - }, - { - "epoch": 14.089178617992177, - "grad_norm": 1.4364200830459595, - "learning_rate": 7.335879396984924e-05, - "loss": 5.4324, - "step": 27016 - }, - { - "epoch": 14.089700130378096, - "grad_norm": 1.4673963785171509, - "learning_rate": 7.335778894472362e-05, - "loss": 5.4297, - "step": 27017 - }, - { - "epoch": 14.090221642764016, - "grad_norm": 1.4626142978668213, - "learning_rate": 7.3356783919598e-05, - "loss": 5.2554, - "step": 27018 - }, - { - "epoch": 14.090743155149935, - "grad_norm": 1.5147954225540161, - "learning_rate": 7.335577889447236e-05, - "loss": 5.3369, - "step": 27019 - }, - { - "epoch": 14.091264667535855, - "grad_norm": 1.457413911819458, - "learning_rate": 7.335477386934674e-05, - "loss": 4.85, - "step": 27020 - }, - { - "epoch": 14.091786179921773, - "grad_norm": 1.4690111875534058, - "learning_rate": 7.33537688442211e-05, - "loss": 5.3016, - "step": 27021 - }, - { - "epoch": 14.092307692307692, - "grad_norm": 1.516996145248413, - "learning_rate": 7.335276381909548e-05, - "loss": 5.3119, - "step": 27022 - }, - { - "epoch": 14.092829204693611, - "grad_norm": 1.3609051704406738, - "learning_rate": 7.335175879396985e-05, - "loss": 5.2329, - "step": 27023 - }, - { - "epoch": 14.093350717079531, - "grad_norm": 1.481511116027832, - "learning_rate": 7.335075376884422e-05, - "loss": 5.0991, - "step": 27024 - }, - { - "epoch": 14.09387222946545, - "grad_norm": 1.4077115058898926, - "learning_rate": 7.334974874371859e-05, - "loss": 5.5002, - "step": 27025 - }, - { - "epoch": 14.09439374185137, - "grad_norm": 1.4823514223098755, - "learning_rate": 7.334874371859297e-05, - "loss": 5.1459, - "step": 27026 - }, - { - "epoch": 14.094915254237288, - "grad_norm": 1.3597832918167114, - "learning_rate": 7.334773869346734e-05, - "loss": 5.551, - "step": 27027 - }, - { - "epoch": 14.095436766623207, - "grad_norm": 1.6007215976715088, - "learning_rate": 7.334673366834172e-05, - "loss": 4.9726, - "step": 27028 - }, - { - "epoch": 14.095958279009126, - "grad_norm": 1.4971541166305542, - "learning_rate": 7.334572864321609e-05, - "loss": 5.1495, - "step": 27029 - }, - { - "epoch": 14.096479791395046, - "grad_norm": 1.5027902126312256, - "learning_rate": 7.334472361809046e-05, - "loss": 5.0862, - "step": 27030 - }, - { - "epoch": 14.097001303780965, - "grad_norm": 1.4562513828277588, - "learning_rate": 7.334371859296483e-05, - "loss": 5.7233, - "step": 27031 - }, - { - "epoch": 14.097522816166883, - "grad_norm": 1.3958607912063599, - "learning_rate": 7.334271356783919e-05, - "loss": 5.57, - "step": 27032 - }, - { - "epoch": 14.098044328552803, - "grad_norm": 1.4371981620788574, - "learning_rate": 7.334170854271357e-05, - "loss": 5.3901, - "step": 27033 - }, - { - "epoch": 14.098565840938722, - "grad_norm": 1.4037402868270874, - "learning_rate": 7.334070351758793e-05, - "loss": 5.3456, - "step": 27034 - }, - { - "epoch": 14.099087353324641, - "grad_norm": 1.4690951108932495, - "learning_rate": 7.333969849246231e-05, - "loss": 5.4477, - "step": 27035 - }, - { - "epoch": 14.099608865710561, - "grad_norm": 1.6597872972488403, - "learning_rate": 7.333869346733668e-05, - "loss": 5.1899, - "step": 27036 - }, - { - "epoch": 14.10013037809648, - "grad_norm": 1.6636402606964111, - "learning_rate": 7.333768844221105e-05, - "loss": 5.0831, - "step": 27037 - }, - { - "epoch": 14.100651890482398, - "grad_norm": 1.4132986068725586, - "learning_rate": 7.333668341708543e-05, - "loss": 5.6209, - "step": 27038 - }, - { - "epoch": 14.101173402868318, - "grad_norm": 1.4757620096206665, - "learning_rate": 7.333567839195981e-05, - "loss": 5.3182, - "step": 27039 - }, - { - "epoch": 14.101694915254237, - "grad_norm": 1.5559535026550293, - "learning_rate": 7.333467336683417e-05, - "loss": 5.3468, - "step": 27040 - }, - { - "epoch": 14.102216427640156, - "grad_norm": 1.4102482795715332, - "learning_rate": 7.333366834170855e-05, - "loss": 5.4503, - "step": 27041 - }, - { - "epoch": 14.102737940026076, - "grad_norm": 1.4898840188980103, - "learning_rate": 7.333266331658292e-05, - "loss": 5.145, - "step": 27042 - }, - { - "epoch": 14.103259452411995, - "grad_norm": 1.4442601203918457, - "learning_rate": 7.33316582914573e-05, - "loss": 5.2195, - "step": 27043 - }, - { - "epoch": 14.103780964797913, - "grad_norm": 1.3884971141815186, - "learning_rate": 7.333065326633166e-05, - "loss": 5.8017, - "step": 27044 - }, - { - "epoch": 14.104302477183833, - "grad_norm": 1.483178734779358, - "learning_rate": 7.332964824120602e-05, - "loss": 5.2426, - "step": 27045 - }, - { - "epoch": 14.104823989569752, - "grad_norm": 1.4444904327392578, - "learning_rate": 7.33286432160804e-05, - "loss": 5.69, - "step": 27046 - }, - { - "epoch": 14.105345501955671, - "grad_norm": 1.5036208629608154, - "learning_rate": 7.332763819095478e-05, - "loss": 5.2371, - "step": 27047 - }, - { - "epoch": 14.105867014341591, - "grad_norm": 1.4785077571868896, - "learning_rate": 7.332663316582916e-05, - "loss": 5.316, - "step": 27048 - }, - { - "epoch": 14.10638852672751, - "grad_norm": 1.48932683467865, - "learning_rate": 7.332562814070352e-05, - "loss": 5.3433, - "step": 27049 - }, - { - "epoch": 14.106910039113428, - "grad_norm": 1.4788758754730225, - "learning_rate": 7.33246231155779e-05, - "loss": 4.9079, - "step": 27050 - }, - { - "epoch": 14.107431551499348, - "grad_norm": 1.4703607559204102, - "learning_rate": 7.332361809045226e-05, - "loss": 4.9579, - "step": 27051 - }, - { - "epoch": 14.107953063885267, - "grad_norm": 1.4287683963775635, - "learning_rate": 7.332261306532664e-05, - "loss": 5.4374, - "step": 27052 - }, - { - "epoch": 14.108474576271187, - "grad_norm": 1.4590232372283936, - "learning_rate": 7.3321608040201e-05, - "loss": 5.4763, - "step": 27053 - }, - { - "epoch": 14.108996088657106, - "grad_norm": 1.5458266735076904, - "learning_rate": 7.332060301507538e-05, - "loss": 5.5374, - "step": 27054 - }, - { - "epoch": 14.109517601043025, - "grad_norm": 1.5580174922943115, - "learning_rate": 7.331959798994975e-05, - "loss": 4.6293, - "step": 27055 - }, - { - "epoch": 14.110039113428943, - "grad_norm": 1.40945565700531, - "learning_rate": 7.331859296482412e-05, - "loss": 5.2408, - "step": 27056 - }, - { - "epoch": 14.110560625814863, - "grad_norm": 1.3677732944488525, - "learning_rate": 7.331758793969849e-05, - "loss": 5.2794, - "step": 27057 - }, - { - "epoch": 14.111082138200782, - "grad_norm": 1.4609769582748413, - "learning_rate": 7.331658291457287e-05, - "loss": 5.5849, - "step": 27058 - }, - { - "epoch": 14.111603650586702, - "grad_norm": 1.5783048868179321, - "learning_rate": 7.331557788944724e-05, - "loss": 5.425, - "step": 27059 - }, - { - "epoch": 14.112125162972621, - "grad_norm": 1.466243028640747, - "learning_rate": 7.331457286432161e-05, - "loss": 5.0443, - "step": 27060 - }, - { - "epoch": 14.11264667535854, - "grad_norm": 1.692743182182312, - "learning_rate": 7.331356783919599e-05, - "loss": 5.0858, - "step": 27061 - }, - { - "epoch": 14.113168187744458, - "grad_norm": 1.5572959184646606, - "learning_rate": 7.331256281407035e-05, - "loss": 5.2547, - "step": 27062 - }, - { - "epoch": 14.113689700130378, - "grad_norm": 1.7037220001220703, - "learning_rate": 7.331155778894473e-05, - "loss": 5.4348, - "step": 27063 - }, - { - "epoch": 14.114211212516297, - "grad_norm": 1.5080400705337524, - "learning_rate": 7.33105527638191e-05, - "loss": 5.792, - "step": 27064 - }, - { - "epoch": 14.114732724902217, - "grad_norm": 1.3987113237380981, - "learning_rate": 7.330954773869347e-05, - "loss": 5.6384, - "step": 27065 - }, - { - "epoch": 14.115254237288136, - "grad_norm": 1.4800809621810913, - "learning_rate": 7.330854271356784e-05, - "loss": 5.6373, - "step": 27066 - }, - { - "epoch": 14.115775749674055, - "grad_norm": 1.6037331819534302, - "learning_rate": 7.330753768844221e-05, - "loss": 5.3818, - "step": 27067 - }, - { - "epoch": 14.116297262059973, - "grad_norm": 1.4308743476867676, - "learning_rate": 7.330653266331659e-05, - "loss": 5.5627, - "step": 27068 - }, - { - "epoch": 14.116818774445893, - "grad_norm": 1.5622793436050415, - "learning_rate": 7.330552763819097e-05, - "loss": 5.42, - "step": 27069 - }, - { - "epoch": 14.117340286831812, - "grad_norm": 1.5118988752365112, - "learning_rate": 7.330452261306533e-05, - "loss": 5.2206, - "step": 27070 - }, - { - "epoch": 14.117861799217732, - "grad_norm": 1.3727740049362183, - "learning_rate": 7.330351758793971e-05, - "loss": 5.1242, - "step": 27071 - }, - { - "epoch": 14.118383311603651, - "grad_norm": 1.504246473312378, - "learning_rate": 7.330251256281408e-05, - "loss": 5.3947, - "step": 27072 - }, - { - "epoch": 14.11890482398957, - "grad_norm": 1.5863585472106934, - "learning_rate": 7.330150753768844e-05, - "loss": 5.5447, - "step": 27073 - }, - { - "epoch": 14.119426336375488, - "grad_norm": 1.935731291770935, - "learning_rate": 7.330050251256282e-05, - "loss": 5.1874, - "step": 27074 - }, - { - "epoch": 14.119947848761408, - "grad_norm": 1.8895453214645386, - "learning_rate": 7.329949748743718e-05, - "loss": 5.2374, - "step": 27075 - }, - { - "epoch": 14.120469361147327, - "grad_norm": 1.5498055219650269, - "learning_rate": 7.329849246231156e-05, - "loss": 5.2522, - "step": 27076 - }, - { - "epoch": 14.120990873533247, - "grad_norm": 1.3865498304367065, - "learning_rate": 7.329748743718592e-05, - "loss": 5.4769, - "step": 27077 - }, - { - "epoch": 14.121512385919166, - "grad_norm": 1.4979712963104248, - "learning_rate": 7.32964824120603e-05, - "loss": 5.3904, - "step": 27078 - }, - { - "epoch": 14.122033898305085, - "grad_norm": 1.3477623462677002, - "learning_rate": 7.329547738693468e-05, - "loss": 5.0017, - "step": 27079 - }, - { - "epoch": 14.122555410691003, - "grad_norm": 1.5227361917495728, - "learning_rate": 7.329447236180906e-05, - "loss": 4.9792, - "step": 27080 - }, - { - "epoch": 14.123076923076923, - "grad_norm": 1.6090432405471802, - "learning_rate": 7.329346733668342e-05, - "loss": 5.3994, - "step": 27081 - }, - { - "epoch": 14.123598435462842, - "grad_norm": 1.3310611248016357, - "learning_rate": 7.32924623115578e-05, - "loss": 5.0064, - "step": 27082 - }, - { - "epoch": 14.124119947848762, - "grad_norm": 1.534145712852478, - "learning_rate": 7.329145728643216e-05, - "loss": 5.7013, - "step": 27083 - }, - { - "epoch": 14.124641460234681, - "grad_norm": 1.4824138879776, - "learning_rate": 7.329045226130654e-05, - "loss": 5.7482, - "step": 27084 - }, - { - "epoch": 14.1251629726206, - "grad_norm": 1.4732816219329834, - "learning_rate": 7.32894472361809e-05, - "loss": 5.6068, - "step": 27085 - }, - { - "epoch": 14.125684485006518, - "grad_norm": 1.4836137294769287, - "learning_rate": 7.328844221105527e-05, - "loss": 5.4504, - "step": 27086 - }, - { - "epoch": 14.126205997392438, - "grad_norm": 1.4181419610977173, - "learning_rate": 7.328743718592965e-05, - "loss": 5.7184, - "step": 27087 - }, - { - "epoch": 14.126727509778357, - "grad_norm": 1.5025229454040527, - "learning_rate": 7.328643216080403e-05, - "loss": 5.8437, - "step": 27088 - }, - { - "epoch": 14.127249022164277, - "grad_norm": 1.5615384578704834, - "learning_rate": 7.32854271356784e-05, - "loss": 5.3272, - "step": 27089 - }, - { - "epoch": 14.127770534550196, - "grad_norm": 1.5412967205047607, - "learning_rate": 7.328442211055277e-05, - "loss": 5.0856, - "step": 27090 - }, - { - "epoch": 14.128292046936116, - "grad_norm": 1.6379923820495605, - "learning_rate": 7.328341708542715e-05, - "loss": 5.4444, - "step": 27091 - }, - { - "epoch": 14.128813559322033, - "grad_norm": 1.45161771774292, - "learning_rate": 7.328241206030151e-05, - "loss": 5.2338, - "step": 27092 - }, - { - "epoch": 14.129335071707953, - "grad_norm": 1.892775058746338, - "learning_rate": 7.328140703517589e-05, - "loss": 4.9928, - "step": 27093 - }, - { - "epoch": 14.129856584093872, - "grad_norm": 1.559345006942749, - "learning_rate": 7.328040201005025e-05, - "loss": 5.3689, - "step": 27094 - }, - { - "epoch": 14.130378096479792, - "grad_norm": 1.3712787628173828, - "learning_rate": 7.327939698492463e-05, - "loss": 5.7156, - "step": 27095 - }, - { - "epoch": 14.130899608865711, - "grad_norm": 1.8041661977767944, - "learning_rate": 7.3278391959799e-05, - "loss": 4.8902, - "step": 27096 - }, - { - "epoch": 14.13142112125163, - "grad_norm": 1.5413931608200073, - "learning_rate": 7.327738693467337e-05, - "loss": 5.3175, - "step": 27097 - }, - { - "epoch": 14.131942633637548, - "grad_norm": 1.5107601881027222, - "learning_rate": 7.327638190954774e-05, - "loss": 5.1382, - "step": 27098 - }, - { - "epoch": 14.132464146023468, - "grad_norm": 1.5019197463989258, - "learning_rate": 7.327537688442211e-05, - "loss": 5.5119, - "step": 27099 - }, - { - "epoch": 14.132985658409387, - "grad_norm": 1.3601258993148804, - "learning_rate": 7.327437185929649e-05, - "loss": 5.5461, - "step": 27100 - }, - { - "epoch": 14.133507170795307, - "grad_norm": 1.5678355693817139, - "learning_rate": 7.327336683417086e-05, - "loss": 4.9278, - "step": 27101 - }, - { - "epoch": 14.134028683181226, - "grad_norm": 1.5653232336044312, - "learning_rate": 7.327236180904523e-05, - "loss": 5.2102, - "step": 27102 - }, - { - "epoch": 14.134550195567146, - "grad_norm": 1.5601693391799927, - "learning_rate": 7.32713567839196e-05, - "loss": 4.8845, - "step": 27103 - }, - { - "epoch": 14.135071707953063, - "grad_norm": 1.4565255641937256, - "learning_rate": 7.327035175879398e-05, - "loss": 5.7434, - "step": 27104 - }, - { - "epoch": 14.135593220338983, - "grad_norm": 1.3909814357757568, - "learning_rate": 7.326934673366834e-05, - "loss": 5.42, - "step": 27105 - }, - { - "epoch": 14.136114732724902, - "grad_norm": 1.438152551651001, - "learning_rate": 7.326834170854272e-05, - "loss": 5.2644, - "step": 27106 - }, - { - "epoch": 14.136636245110822, - "grad_norm": 1.5432438850402832, - "learning_rate": 7.326733668341708e-05, - "loss": 5.398, - "step": 27107 - }, - { - "epoch": 14.137157757496741, - "grad_norm": 1.438012719154358, - "learning_rate": 7.326633165829146e-05, - "loss": 5.6347, - "step": 27108 - }, - { - "epoch": 14.13767926988266, - "grad_norm": 1.3561068773269653, - "learning_rate": 7.326532663316584e-05, - "loss": 5.5383, - "step": 27109 - }, - { - "epoch": 14.138200782268578, - "grad_norm": 1.5052940845489502, - "learning_rate": 7.326432160804022e-05, - "loss": 5.4658, - "step": 27110 - }, - { - "epoch": 14.138722294654498, - "grad_norm": 1.49449622631073, - "learning_rate": 7.326331658291458e-05, - "loss": 5.6203, - "step": 27111 - }, - { - "epoch": 14.139243807040417, - "grad_norm": 1.4658457040786743, - "learning_rate": 7.326231155778894e-05, - "loss": 5.4846, - "step": 27112 - }, - { - "epoch": 14.139765319426337, - "grad_norm": 1.43537437915802, - "learning_rate": 7.326130653266332e-05, - "loss": 5.0746, - "step": 27113 - }, - { - "epoch": 14.140286831812256, - "grad_norm": 1.5695340633392334, - "learning_rate": 7.326030150753769e-05, - "loss": 4.8783, - "step": 27114 - }, - { - "epoch": 14.140808344198176, - "grad_norm": 1.5906258821487427, - "learning_rate": 7.325929648241206e-05, - "loss": 4.8649, - "step": 27115 - }, - { - "epoch": 14.141329856584093, - "grad_norm": 1.5330979824066162, - "learning_rate": 7.325829145728643e-05, - "loss": 5.6333, - "step": 27116 - }, - { - "epoch": 14.141851368970013, - "grad_norm": 1.4666411876678467, - "learning_rate": 7.32572864321608e-05, - "loss": 5.5278, - "step": 27117 - }, - { - "epoch": 14.142372881355932, - "grad_norm": 1.476550579071045, - "learning_rate": 7.325628140703517e-05, - "loss": 5.3708, - "step": 27118 - }, - { - "epoch": 14.142894393741852, - "grad_norm": 1.544860601425171, - "learning_rate": 7.325527638190955e-05, - "loss": 5.0239, - "step": 27119 - }, - { - "epoch": 14.143415906127771, - "grad_norm": 1.5729296207427979, - "learning_rate": 7.325427135678393e-05, - "loss": 5.0477, - "step": 27120 - }, - { - "epoch": 14.14393741851369, - "grad_norm": 1.5710042715072632, - "learning_rate": 7.32532663316583e-05, - "loss": 5.003, - "step": 27121 - }, - { - "epoch": 14.144458930899608, - "grad_norm": 1.501647710800171, - "learning_rate": 7.325226130653267e-05, - "loss": 5.8492, - "step": 27122 - }, - { - "epoch": 14.144980443285528, - "grad_norm": 1.504891037940979, - "learning_rate": 7.325125628140705e-05, - "loss": 5.2151, - "step": 27123 - }, - { - "epoch": 14.145501955671447, - "grad_norm": 1.467157006263733, - "learning_rate": 7.325025125628141e-05, - "loss": 5.4131, - "step": 27124 - }, - { - "epoch": 14.146023468057367, - "grad_norm": 1.5662587881088257, - "learning_rate": 7.324924623115577e-05, - "loss": 5.2642, - "step": 27125 - }, - { - "epoch": 14.146544980443286, - "grad_norm": 1.5353291034698486, - "learning_rate": 7.324824120603015e-05, - "loss": 5.156, - "step": 27126 - }, - { - "epoch": 14.147066492829204, - "grad_norm": 1.4948354959487915, - "learning_rate": 7.324723618090452e-05, - "loss": 5.4854, - "step": 27127 - }, - { - "epoch": 14.147588005215123, - "grad_norm": 1.5600003004074097, - "learning_rate": 7.32462311557789e-05, - "loss": 4.7412, - "step": 27128 - }, - { - "epoch": 14.148109517601043, - "grad_norm": 1.447126030921936, - "learning_rate": 7.324522613065327e-05, - "loss": 4.8743, - "step": 27129 - }, - { - "epoch": 14.148631029986962, - "grad_norm": 1.4712954759597778, - "learning_rate": 7.324422110552765e-05, - "loss": 5.5138, - "step": 27130 - }, - { - "epoch": 14.149152542372882, - "grad_norm": 1.4763023853302002, - "learning_rate": 7.324321608040201e-05, - "loss": 5.1192, - "step": 27131 - }, - { - "epoch": 14.149674054758801, - "grad_norm": 1.3705309629440308, - "learning_rate": 7.324221105527639e-05, - "loss": 5.7604, - "step": 27132 - }, - { - "epoch": 14.150195567144719, - "grad_norm": 1.6101802587509155, - "learning_rate": 7.324120603015076e-05, - "loss": 5.2628, - "step": 27133 - }, - { - "epoch": 14.150717079530638, - "grad_norm": 1.4304100275039673, - "learning_rate": 7.324020100502513e-05, - "loss": 5.234, - "step": 27134 - }, - { - "epoch": 14.151238591916558, - "grad_norm": 1.5048824548721313, - "learning_rate": 7.32391959798995e-05, - "loss": 4.8807, - "step": 27135 - }, - { - "epoch": 14.151760104302477, - "grad_norm": 1.4495106935501099, - "learning_rate": 7.323819095477388e-05, - "loss": 5.319, - "step": 27136 - }, - { - "epoch": 14.152281616688397, - "grad_norm": 1.502549171447754, - "learning_rate": 7.323718592964824e-05, - "loss": 5.1632, - "step": 27137 - }, - { - "epoch": 14.152803129074316, - "grad_norm": 1.5764570236206055, - "learning_rate": 7.32361809045226e-05, - "loss": 5.4746, - "step": 27138 - }, - { - "epoch": 14.153324641460234, - "grad_norm": 1.3918178081512451, - "learning_rate": 7.323517587939698e-05, - "loss": 5.7829, - "step": 27139 - }, - { - "epoch": 14.153846153846153, - "grad_norm": 1.4709405899047852, - "learning_rate": 7.323417085427136e-05, - "loss": 5.2778, - "step": 27140 - }, - { - "epoch": 14.154367666232073, - "grad_norm": 1.333682656288147, - "learning_rate": 7.323316582914574e-05, - "loss": 5.9167, - "step": 27141 - }, - { - "epoch": 14.154889178617992, - "grad_norm": 1.3965404033660889, - "learning_rate": 7.32321608040201e-05, - "loss": 5.6617, - "step": 27142 - }, - { - "epoch": 14.155410691003912, - "grad_norm": 1.566819429397583, - "learning_rate": 7.323115577889448e-05, - "loss": 5.649, - "step": 27143 - }, - { - "epoch": 14.155932203389831, - "grad_norm": 1.4535058736801147, - "learning_rate": 7.323015075376885e-05, - "loss": 5.3768, - "step": 27144 - }, - { - "epoch": 14.156453715775749, - "grad_norm": 1.3459033966064453, - "learning_rate": 7.322914572864322e-05, - "loss": 5.6616, - "step": 27145 - }, - { - "epoch": 14.156975228161668, - "grad_norm": 1.4239634275436401, - "learning_rate": 7.322814070351759e-05, - "loss": 5.5242, - "step": 27146 - }, - { - "epoch": 14.157496740547588, - "grad_norm": 1.4476709365844727, - "learning_rate": 7.322713567839197e-05, - "loss": 5.2555, - "step": 27147 - }, - { - "epoch": 14.158018252933507, - "grad_norm": 1.4330954551696777, - "learning_rate": 7.322613065326633e-05, - "loss": 5.6404, - "step": 27148 - }, - { - "epoch": 14.158539765319427, - "grad_norm": 1.4078445434570312, - "learning_rate": 7.322512562814071e-05, - "loss": 4.9693, - "step": 27149 - }, - { - "epoch": 14.159061277705346, - "grad_norm": 1.513153314590454, - "learning_rate": 7.322412060301509e-05, - "loss": 4.9786, - "step": 27150 - }, - { - "epoch": 14.159582790091264, - "grad_norm": 1.4434455633163452, - "learning_rate": 7.322311557788946e-05, - "loss": 5.5798, - "step": 27151 - }, - { - "epoch": 14.160104302477183, - "grad_norm": 1.3965842723846436, - "learning_rate": 7.322211055276383e-05, - "loss": 5.6412, - "step": 27152 - }, - { - "epoch": 14.160625814863103, - "grad_norm": 1.480010986328125, - "learning_rate": 7.322110552763819e-05, - "loss": 5.2152, - "step": 27153 - }, - { - "epoch": 14.161147327249022, - "grad_norm": 1.5230145454406738, - "learning_rate": 7.322010050251257e-05, - "loss": 4.7261, - "step": 27154 - }, - { - "epoch": 14.161668839634942, - "grad_norm": 1.4433557987213135, - "learning_rate": 7.321909547738693e-05, - "loss": 5.7253, - "step": 27155 - }, - { - "epoch": 14.162190352020861, - "grad_norm": 1.3705871105194092, - "learning_rate": 7.321809045226131e-05, - "loss": 5.7439, - "step": 27156 - }, - { - "epoch": 14.162711864406779, - "grad_norm": 1.5261682271957397, - "learning_rate": 7.321708542713568e-05, - "loss": 5.4958, - "step": 27157 - }, - { - "epoch": 14.163233376792698, - "grad_norm": 1.59833824634552, - "learning_rate": 7.321608040201005e-05, - "loss": 5.619, - "step": 27158 - }, - { - "epoch": 14.163754889178618, - "grad_norm": 1.5576152801513672, - "learning_rate": 7.321507537688442e-05, - "loss": 4.826, - "step": 27159 - }, - { - "epoch": 14.164276401564537, - "grad_norm": 1.4859559535980225, - "learning_rate": 7.32140703517588e-05, - "loss": 5.5445, - "step": 27160 - }, - { - "epoch": 14.164797913950457, - "grad_norm": 1.4559345245361328, - "learning_rate": 7.321306532663317e-05, - "loss": 5.2723, - "step": 27161 - }, - { - "epoch": 14.165319426336376, - "grad_norm": 1.3420257568359375, - "learning_rate": 7.321206030150755e-05, - "loss": 5.4853, - "step": 27162 - }, - { - "epoch": 14.165840938722294, - "grad_norm": 1.489880084991455, - "learning_rate": 7.321105527638192e-05, - "loss": 5.3863, - "step": 27163 - }, - { - "epoch": 14.166362451108213, - "grad_norm": 1.5364068746566772, - "learning_rate": 7.32100502512563e-05, - "loss": 4.9218, - "step": 27164 - }, - { - "epoch": 14.166883963494133, - "grad_norm": 1.301005482673645, - "learning_rate": 7.320904522613066e-05, - "loss": 5.5706, - "step": 27165 - }, - { - "epoch": 14.167405475880052, - "grad_norm": 1.3205894231796265, - "learning_rate": 7.320804020100502e-05, - "loss": 5.489, - "step": 27166 - }, - { - "epoch": 14.167926988265972, - "grad_norm": 1.504630446434021, - "learning_rate": 7.32070351758794e-05, - "loss": 5.1757, - "step": 27167 - }, - { - "epoch": 14.168448500651891, - "grad_norm": 1.406087875366211, - "learning_rate": 7.320603015075376e-05, - "loss": 5.5894, - "step": 27168 - }, - { - "epoch": 14.168970013037809, - "grad_norm": 1.4903349876403809, - "learning_rate": 7.320502512562814e-05, - "loss": 5.3981, - "step": 27169 - }, - { - "epoch": 14.169491525423728, - "grad_norm": 1.4248340129852295, - "learning_rate": 7.32040201005025e-05, - "loss": 5.5165, - "step": 27170 - }, - { - "epoch": 14.170013037809648, - "grad_norm": 1.4388628005981445, - "learning_rate": 7.320301507537688e-05, - "loss": 5.7663, - "step": 27171 - }, - { - "epoch": 14.170534550195567, - "grad_norm": 1.431009292602539, - "learning_rate": 7.320201005025126e-05, - "loss": 5.4129, - "step": 27172 - }, - { - "epoch": 14.171056062581487, - "grad_norm": 1.4947129487991333, - "learning_rate": 7.320100502512564e-05, - "loss": 5.3228, - "step": 27173 - }, - { - "epoch": 14.171577574967406, - "grad_norm": 1.489804744720459, - "learning_rate": 7.32e-05, - "loss": 5.1572, - "step": 27174 - }, - { - "epoch": 14.172099087353324, - "grad_norm": 1.4551734924316406, - "learning_rate": 7.319899497487438e-05, - "loss": 5.4952, - "step": 27175 - }, - { - "epoch": 14.172620599739243, - "grad_norm": 1.4447124004364014, - "learning_rate": 7.319798994974875e-05, - "loss": 5.1216, - "step": 27176 - }, - { - "epoch": 14.173142112125163, - "grad_norm": 1.4400486946105957, - "learning_rate": 7.319698492462312e-05, - "loss": 5.5357, - "step": 27177 - }, - { - "epoch": 14.173663624511082, - "grad_norm": 1.5080485343933105, - "learning_rate": 7.319597989949749e-05, - "loss": 4.9627, - "step": 27178 - }, - { - "epoch": 14.174185136897002, - "grad_norm": 1.5429857969284058, - "learning_rate": 7.319497487437185e-05, - "loss": 5.3929, - "step": 27179 - }, - { - "epoch": 14.174706649282921, - "grad_norm": 1.5156521797180176, - "learning_rate": 7.319396984924623e-05, - "loss": 5.3672, - "step": 27180 - }, - { - "epoch": 14.175228161668839, - "grad_norm": 1.362364411354065, - "learning_rate": 7.319296482412061e-05, - "loss": 5.7604, - "step": 27181 - }, - { - "epoch": 14.175749674054758, - "grad_norm": 1.475502371788025, - "learning_rate": 7.319195979899499e-05, - "loss": 4.8596, - "step": 27182 - }, - { - "epoch": 14.176271186440678, - "grad_norm": 1.4980660676956177, - "learning_rate": 7.319095477386935e-05, - "loss": 5.4394, - "step": 27183 - }, - { - "epoch": 14.176792698826597, - "grad_norm": 1.5411347150802612, - "learning_rate": 7.318994974874373e-05, - "loss": 4.8863, - "step": 27184 - }, - { - "epoch": 14.177314211212517, - "grad_norm": 1.3914109468460083, - "learning_rate": 7.318894472361809e-05, - "loss": 5.7207, - "step": 27185 - }, - { - "epoch": 14.177835723598436, - "grad_norm": 1.4637866020202637, - "learning_rate": 7.318793969849247e-05, - "loss": 5.4958, - "step": 27186 - }, - { - "epoch": 14.178357235984354, - "grad_norm": 1.3790674209594727, - "learning_rate": 7.318693467336683e-05, - "loss": 5.7422, - "step": 27187 - }, - { - "epoch": 14.178878748370273, - "grad_norm": 1.4498777389526367, - "learning_rate": 7.318592964824121e-05, - "loss": 5.4895, - "step": 27188 - }, - { - "epoch": 14.179400260756193, - "grad_norm": 1.5177955627441406, - "learning_rate": 7.318492462311558e-05, - "loss": 5.0719, - "step": 27189 - }, - { - "epoch": 14.179921773142112, - "grad_norm": 1.4832698106765747, - "learning_rate": 7.318391959798995e-05, - "loss": 5.6235, - "step": 27190 - }, - { - "epoch": 14.180443285528032, - "grad_norm": 1.483486533164978, - "learning_rate": 7.318291457286432e-05, - "loss": 5.487, - "step": 27191 - }, - { - "epoch": 14.180964797913951, - "grad_norm": 1.3036777973175049, - "learning_rate": 7.31819095477387e-05, - "loss": 5.854, - "step": 27192 - }, - { - "epoch": 14.181486310299869, - "grad_norm": 1.6212745904922485, - "learning_rate": 7.318090452261307e-05, - "loss": 5.3741, - "step": 27193 - }, - { - "epoch": 14.182007822685788, - "grad_norm": 1.3661625385284424, - "learning_rate": 7.317989949748744e-05, - "loss": 5.9508, - "step": 27194 - }, - { - "epoch": 14.182529335071708, - "grad_norm": 1.4193960428237915, - "learning_rate": 7.317889447236182e-05, - "loss": 5.4099, - "step": 27195 - }, - { - "epoch": 14.183050847457627, - "grad_norm": 1.415579915046692, - "learning_rate": 7.317788944723618e-05, - "loss": 5.782, - "step": 27196 - }, - { - "epoch": 14.183572359843547, - "grad_norm": 1.4814778566360474, - "learning_rate": 7.317688442211056e-05, - "loss": 5.1448, - "step": 27197 - }, - { - "epoch": 14.184093872229466, - "grad_norm": 1.3976147174835205, - "learning_rate": 7.317587939698492e-05, - "loss": 5.2323, - "step": 27198 - }, - { - "epoch": 14.184615384615384, - "grad_norm": 1.4749821424484253, - "learning_rate": 7.31748743718593e-05, - "loss": 5.5298, - "step": 27199 - }, - { - "epoch": 14.185136897001303, - "grad_norm": 1.4186656475067139, - "learning_rate": 7.317386934673366e-05, - "loss": 5.4437, - "step": 27200 - }, - { - "epoch": 14.185658409387223, - "grad_norm": 1.4419045448303223, - "learning_rate": 7.317286432160804e-05, - "loss": 5.3758, - "step": 27201 - }, - { - "epoch": 14.186179921773142, - "grad_norm": 1.3796026706695557, - "learning_rate": 7.317185929648242e-05, - "loss": 5.533, - "step": 27202 - }, - { - "epoch": 14.186701434159062, - "grad_norm": 1.4630990028381348, - "learning_rate": 7.31708542713568e-05, - "loss": 5.273, - "step": 27203 - }, - { - "epoch": 14.187222946544981, - "grad_norm": 1.4841080904006958, - "learning_rate": 7.316984924623116e-05, - "loss": 5.4995, - "step": 27204 - }, - { - "epoch": 14.187744458930899, - "grad_norm": 1.5076159238815308, - "learning_rate": 7.316884422110553e-05, - "loss": 4.7752, - "step": 27205 - }, - { - "epoch": 14.188265971316818, - "grad_norm": 1.5728312730789185, - "learning_rate": 7.31678391959799e-05, - "loss": 5.565, - "step": 27206 - }, - { - "epoch": 14.188787483702738, - "grad_norm": 1.4428430795669556, - "learning_rate": 7.316683417085427e-05, - "loss": 5.1767, - "step": 27207 - }, - { - "epoch": 14.189308996088657, - "grad_norm": 1.519760251045227, - "learning_rate": 7.316582914572865e-05, - "loss": 5.1376, - "step": 27208 - }, - { - "epoch": 14.189830508474577, - "grad_norm": 1.4841769933700562, - "learning_rate": 7.316482412060301e-05, - "loss": 5.4555, - "step": 27209 - }, - { - "epoch": 14.190352020860496, - "grad_norm": 1.4226421117782593, - "learning_rate": 7.316381909547739e-05, - "loss": 5.6837, - "step": 27210 - }, - { - "epoch": 14.190873533246414, - "grad_norm": 1.5850707292556763, - "learning_rate": 7.316281407035175e-05, - "loss": 5.0193, - "step": 27211 - }, - { - "epoch": 14.191395045632333, - "grad_norm": 1.5148652791976929, - "learning_rate": 7.316180904522613e-05, - "loss": 5.5803, - "step": 27212 - }, - { - "epoch": 14.191916558018253, - "grad_norm": 1.4596511125564575, - "learning_rate": 7.316080402010051e-05, - "loss": 5.6349, - "step": 27213 - }, - { - "epoch": 14.192438070404172, - "grad_norm": 1.4664758443832397, - "learning_rate": 7.315979899497489e-05, - "loss": 5.2277, - "step": 27214 - }, - { - "epoch": 14.192959582790092, - "grad_norm": 1.314332365989685, - "learning_rate": 7.315879396984925e-05, - "loss": 5.2574, - "step": 27215 - }, - { - "epoch": 14.193481095176011, - "grad_norm": 1.6141880750656128, - "learning_rate": 7.315778894472363e-05, - "loss": 5.2735, - "step": 27216 - }, - { - "epoch": 14.194002607561929, - "grad_norm": 1.6232140064239502, - "learning_rate": 7.315678391959799e-05, - "loss": 5.3862, - "step": 27217 - }, - { - "epoch": 14.194524119947848, - "grad_norm": 1.467341661453247, - "learning_rate": 7.315577889447236e-05, - "loss": 5.3952, - "step": 27218 - }, - { - "epoch": 14.195045632333768, - "grad_norm": 1.511054277420044, - "learning_rate": 7.315477386934674e-05, - "loss": 5.61, - "step": 27219 - }, - { - "epoch": 14.195567144719687, - "grad_norm": 1.4263521432876587, - "learning_rate": 7.31537688442211e-05, - "loss": 5.4555, - "step": 27220 - }, - { - "epoch": 14.196088657105607, - "grad_norm": 1.4854775667190552, - "learning_rate": 7.315276381909548e-05, - "loss": 5.1756, - "step": 27221 - }, - { - "epoch": 14.196610169491525, - "grad_norm": 1.5135750770568848, - "learning_rate": 7.315175879396986e-05, - "loss": 5.1588, - "step": 27222 - }, - { - "epoch": 14.197131681877444, - "grad_norm": 1.5286197662353516, - "learning_rate": 7.315075376884423e-05, - "loss": 4.7782, - "step": 27223 - }, - { - "epoch": 14.197653194263363, - "grad_norm": 1.4718308448791504, - "learning_rate": 7.31497487437186e-05, - "loss": 5.6709, - "step": 27224 - }, - { - "epoch": 14.198174706649283, - "grad_norm": 1.4337424039840698, - "learning_rate": 7.314874371859297e-05, - "loss": 5.5513, - "step": 27225 - }, - { - "epoch": 14.198696219035202, - "grad_norm": 1.3613030910491943, - "learning_rate": 7.314773869346734e-05, - "loss": 5.7295, - "step": 27226 - }, - { - "epoch": 14.199217731421122, - "grad_norm": 1.3673616647720337, - "learning_rate": 7.314673366834172e-05, - "loss": 5.751, - "step": 27227 - }, - { - "epoch": 14.19973924380704, - "grad_norm": 1.4920179843902588, - "learning_rate": 7.314572864321608e-05, - "loss": 5.4003, - "step": 27228 - }, - { - "epoch": 14.200260756192959, - "grad_norm": 1.490362286567688, - "learning_rate": 7.314472361809046e-05, - "loss": 5.2547, - "step": 27229 - }, - { - "epoch": 14.200782268578878, - "grad_norm": 1.4213807582855225, - "learning_rate": 7.314371859296482e-05, - "loss": 5.5905, - "step": 27230 - }, - { - "epoch": 14.201303780964798, - "grad_norm": 1.5121954679489136, - "learning_rate": 7.31427135678392e-05, - "loss": 5.5349, - "step": 27231 - }, - { - "epoch": 14.201825293350717, - "grad_norm": 1.3352218866348267, - "learning_rate": 7.314170854271357e-05, - "loss": 5.7394, - "step": 27232 - }, - { - "epoch": 14.202346805736637, - "grad_norm": 1.5209839344024658, - "learning_rate": 7.314070351758794e-05, - "loss": 5.1608, - "step": 27233 - }, - { - "epoch": 14.202868318122555, - "grad_norm": 1.5732306241989136, - "learning_rate": 7.313969849246232e-05, - "loss": 5.1975, - "step": 27234 - }, - { - "epoch": 14.203389830508474, - "grad_norm": 1.4817997217178345, - "learning_rate": 7.313869346733669e-05, - "loss": 5.4027, - "step": 27235 - }, - { - "epoch": 14.203911342894393, - "grad_norm": 1.478000521659851, - "learning_rate": 7.313768844221106e-05, - "loss": 5.6123, - "step": 27236 - }, - { - "epoch": 14.204432855280313, - "grad_norm": 1.574125051498413, - "learning_rate": 7.313668341708543e-05, - "loss": 4.9564, - "step": 27237 - }, - { - "epoch": 14.204954367666232, - "grad_norm": 1.444568157196045, - "learning_rate": 7.31356783919598e-05, - "loss": 5.7197, - "step": 27238 - }, - { - "epoch": 14.205475880052152, - "grad_norm": 1.4567217826843262, - "learning_rate": 7.313467336683417e-05, - "loss": 5.3104, - "step": 27239 - }, - { - "epoch": 14.20599739243807, - "grad_norm": 1.386392593383789, - "learning_rate": 7.313366834170855e-05, - "loss": 5.3624, - "step": 27240 - }, - { - "epoch": 14.206518904823989, - "grad_norm": 1.388288140296936, - "learning_rate": 7.313266331658291e-05, - "loss": 5.3403, - "step": 27241 - }, - { - "epoch": 14.207040417209909, - "grad_norm": 1.3962334394454956, - "learning_rate": 7.313165829145729e-05, - "loss": 5.6319, - "step": 27242 - }, - { - "epoch": 14.207561929595828, - "grad_norm": 1.5229636430740356, - "learning_rate": 7.313065326633167e-05, - "loss": 4.9588, - "step": 27243 - }, - { - "epoch": 14.208083441981747, - "grad_norm": 1.4855610132217407, - "learning_rate": 7.312964824120605e-05, - "loss": 5.3265, - "step": 27244 - }, - { - "epoch": 14.208604954367667, - "grad_norm": 1.4759796857833862, - "learning_rate": 7.312864321608041e-05, - "loss": 5.3967, - "step": 27245 - }, - { - "epoch": 14.209126466753585, - "grad_norm": 1.4178539514541626, - "learning_rate": 7.312763819095477e-05, - "loss": 5.5571, - "step": 27246 - }, - { - "epoch": 14.209647979139504, - "grad_norm": 1.5256099700927734, - "learning_rate": 7.312663316582915e-05, - "loss": 5.2919, - "step": 27247 - }, - { - "epoch": 14.210169491525424, - "grad_norm": 1.5339365005493164, - "learning_rate": 7.312562814070352e-05, - "loss": 5.4475, - "step": 27248 - }, - { - "epoch": 14.210691003911343, - "grad_norm": 1.4199373722076416, - "learning_rate": 7.31246231155779e-05, - "loss": 4.953, - "step": 27249 - }, - { - "epoch": 14.211212516297262, - "grad_norm": 1.4339454174041748, - "learning_rate": 7.312361809045226e-05, - "loss": 5.1769, - "step": 27250 - }, - { - "epoch": 14.211734028683182, - "grad_norm": 1.43657386302948, - "learning_rate": 7.312261306532664e-05, - "loss": 5.0392, - "step": 27251 - }, - { - "epoch": 14.2122555410691, - "grad_norm": 1.4352450370788574, - "learning_rate": 7.3121608040201e-05, - "loss": 5.6362, - "step": 27252 - }, - { - "epoch": 14.212777053455019, - "grad_norm": 1.3909823894500732, - "learning_rate": 7.312060301507538e-05, - "loss": 5.5358, - "step": 27253 - }, - { - "epoch": 14.213298565840939, - "grad_norm": 1.590221643447876, - "learning_rate": 7.311959798994976e-05, - "loss": 5.0028, - "step": 27254 - }, - { - "epoch": 14.213820078226858, - "grad_norm": 1.4002997875213623, - "learning_rate": 7.311859296482413e-05, - "loss": 5.2444, - "step": 27255 - }, - { - "epoch": 14.214341590612777, - "grad_norm": 1.3776838779449463, - "learning_rate": 7.31175879396985e-05, - "loss": 5.5415, - "step": 27256 - }, - { - "epoch": 14.214863102998697, - "grad_norm": 1.3634021282196045, - "learning_rate": 7.311658291457288e-05, - "loss": 5.5866, - "step": 27257 - }, - { - "epoch": 14.215384615384615, - "grad_norm": 1.496278166770935, - "learning_rate": 7.311557788944724e-05, - "loss": 5.8045, - "step": 27258 - }, - { - "epoch": 14.215906127770534, - "grad_norm": 1.5851322412490845, - "learning_rate": 7.31145728643216e-05, - "loss": 5.1982, - "step": 27259 - }, - { - "epoch": 14.216427640156454, - "grad_norm": 1.50608491897583, - "learning_rate": 7.311356783919598e-05, - "loss": 5.4151, - "step": 27260 - }, - { - "epoch": 14.216949152542373, - "grad_norm": 1.3819899559020996, - "learning_rate": 7.311256281407035e-05, - "loss": 5.8766, - "step": 27261 - }, - { - "epoch": 14.217470664928292, - "grad_norm": 1.5295580625534058, - "learning_rate": 7.311155778894472e-05, - "loss": 5.1268, - "step": 27262 - }, - { - "epoch": 14.217992177314212, - "grad_norm": 1.4227226972579956, - "learning_rate": 7.31105527638191e-05, - "loss": 5.5679, - "step": 27263 - }, - { - "epoch": 14.21851368970013, - "grad_norm": 1.398829698562622, - "learning_rate": 7.310954773869348e-05, - "loss": 5.245, - "step": 27264 - }, - { - "epoch": 14.219035202086049, - "grad_norm": 1.415742039680481, - "learning_rate": 7.310854271356784e-05, - "loss": 5.7169, - "step": 27265 - }, - { - "epoch": 14.219556714471969, - "grad_norm": 1.364363670349121, - "learning_rate": 7.310753768844222e-05, - "loss": 5.6408, - "step": 27266 - }, - { - "epoch": 14.220078226857888, - "grad_norm": 1.8075542449951172, - "learning_rate": 7.310653266331659e-05, - "loss": 5.3408, - "step": 27267 - }, - { - "epoch": 14.220599739243807, - "grad_norm": 1.3957653045654297, - "learning_rate": 7.310552763819096e-05, - "loss": 5.3424, - "step": 27268 - }, - { - "epoch": 14.221121251629727, - "grad_norm": 1.4244279861450195, - "learning_rate": 7.310452261306533e-05, - "loss": 5.4808, - "step": 27269 - }, - { - "epoch": 14.221642764015645, - "grad_norm": 1.4251354932785034, - "learning_rate": 7.31035175879397e-05, - "loss": 5.2947, - "step": 27270 - }, - { - "epoch": 14.222164276401564, - "grad_norm": 1.4166768789291382, - "learning_rate": 7.310251256281407e-05, - "loss": 5.7704, - "step": 27271 - }, - { - "epoch": 14.222685788787484, - "grad_norm": 1.3820013999938965, - "learning_rate": 7.310150753768843e-05, - "loss": 5.665, - "step": 27272 - }, - { - "epoch": 14.223207301173403, - "grad_norm": 1.4777919054031372, - "learning_rate": 7.310050251256281e-05, - "loss": 5.6909, - "step": 27273 - }, - { - "epoch": 14.223728813559323, - "grad_norm": 1.5357627868652344, - "learning_rate": 7.309949748743719e-05, - "loss": 5.3044, - "step": 27274 - }, - { - "epoch": 14.224250325945242, - "grad_norm": 1.4086380004882812, - "learning_rate": 7.309849246231157e-05, - "loss": 5.0209, - "step": 27275 - }, - { - "epoch": 14.22477183833116, - "grad_norm": 1.4369885921478271, - "learning_rate": 7.309748743718593e-05, - "loss": 5.62, - "step": 27276 - }, - { - "epoch": 14.22529335071708, - "grad_norm": 1.482466220855713, - "learning_rate": 7.309648241206031e-05, - "loss": 5.6127, - "step": 27277 - }, - { - "epoch": 14.225814863102999, - "grad_norm": 1.407321572303772, - "learning_rate": 7.309547738693467e-05, - "loss": 5.7001, - "step": 27278 - }, - { - "epoch": 14.226336375488918, - "grad_norm": 1.3797773122787476, - "learning_rate": 7.309447236180905e-05, - "loss": 5.2694, - "step": 27279 - }, - { - "epoch": 14.226857887874838, - "grad_norm": 1.4066593647003174, - "learning_rate": 7.309346733668342e-05, - "loss": 4.9554, - "step": 27280 - }, - { - "epoch": 14.227379400260757, - "grad_norm": 1.443768858909607, - "learning_rate": 7.30924623115578e-05, - "loss": 5.4694, - "step": 27281 - }, - { - "epoch": 14.227900912646675, - "grad_norm": 1.434364676475525, - "learning_rate": 7.309145728643216e-05, - "loss": 5.4498, - "step": 27282 - }, - { - "epoch": 14.228422425032594, - "grad_norm": 1.5249860286712646, - "learning_rate": 7.309045226130654e-05, - "loss": 4.902, - "step": 27283 - }, - { - "epoch": 14.228943937418514, - "grad_norm": 1.563340425491333, - "learning_rate": 7.308944723618091e-05, - "loss": 5.4862, - "step": 27284 - }, - { - "epoch": 14.229465449804433, - "grad_norm": 1.4683183431625366, - "learning_rate": 7.308844221105528e-05, - "loss": 5.6635, - "step": 27285 - }, - { - "epoch": 14.229986962190353, - "grad_norm": 1.481260061264038, - "learning_rate": 7.308743718592966e-05, - "loss": 4.458, - "step": 27286 - }, - { - "epoch": 14.230508474576272, - "grad_norm": 1.4826347827911377, - "learning_rate": 7.308643216080402e-05, - "loss": 5.3891, - "step": 27287 - }, - { - "epoch": 14.23102998696219, - "grad_norm": 1.4255194664001465, - "learning_rate": 7.30854271356784e-05, - "loss": 5.6463, - "step": 27288 - }, - { - "epoch": 14.23155149934811, - "grad_norm": 1.563452124595642, - "learning_rate": 7.308442211055276e-05, - "loss": 5.0557, - "step": 27289 - }, - { - "epoch": 14.232073011734029, - "grad_norm": 1.444115161895752, - "learning_rate": 7.308341708542714e-05, - "loss": 5.7019, - "step": 27290 - }, - { - "epoch": 14.232594524119948, - "grad_norm": 1.3775157928466797, - "learning_rate": 7.30824120603015e-05, - "loss": 5.8407, - "step": 27291 - }, - { - "epoch": 14.233116036505868, - "grad_norm": 1.6213982105255127, - "learning_rate": 7.308140703517588e-05, - "loss": 5.668, - "step": 27292 - }, - { - "epoch": 14.233637548891787, - "grad_norm": 1.790926218032837, - "learning_rate": 7.308040201005025e-05, - "loss": 4.6647, - "step": 27293 - }, - { - "epoch": 14.234159061277705, - "grad_norm": 1.5044662952423096, - "learning_rate": 7.307939698492462e-05, - "loss": 5.1062, - "step": 27294 - }, - { - "epoch": 14.234680573663624, - "grad_norm": 1.4272797107696533, - "learning_rate": 7.3078391959799e-05, - "loss": 5.3878, - "step": 27295 - }, - { - "epoch": 14.235202086049544, - "grad_norm": 1.5998446941375732, - "learning_rate": 7.307738693467338e-05, - "loss": 5.3925, - "step": 27296 - }, - { - "epoch": 14.235723598435463, - "grad_norm": 1.4371025562286377, - "learning_rate": 7.307638190954774e-05, - "loss": 5.5894, - "step": 27297 - }, - { - "epoch": 14.236245110821383, - "grad_norm": 1.7426761388778687, - "learning_rate": 7.307537688442211e-05, - "loss": 4.9216, - "step": 27298 - }, - { - "epoch": 14.236766623207302, - "grad_norm": 1.3583760261535645, - "learning_rate": 7.307437185929649e-05, - "loss": 5.4269, - "step": 27299 - }, - { - "epoch": 14.23728813559322, - "grad_norm": 1.5230202674865723, - "learning_rate": 7.307336683417085e-05, - "loss": 5.4224, - "step": 27300 - }, - { - "epoch": 14.23780964797914, - "grad_norm": 1.3382148742675781, - "learning_rate": 7.307236180904523e-05, - "loss": 4.4273, - "step": 27301 - }, - { - "epoch": 14.238331160365059, - "grad_norm": 1.4035459756851196, - "learning_rate": 7.30713567839196e-05, - "loss": 5.5305, - "step": 27302 - }, - { - "epoch": 14.238852672750978, - "grad_norm": 1.483431100845337, - "learning_rate": 7.307035175879397e-05, - "loss": 5.3454, - "step": 27303 - }, - { - "epoch": 14.239374185136898, - "grad_norm": 1.4702520370483398, - "learning_rate": 7.306934673366835e-05, - "loss": 5.302, - "step": 27304 - }, - { - "epoch": 14.239895697522817, - "grad_norm": 1.5252269506454468, - "learning_rate": 7.306834170854273e-05, - "loss": 5.4236, - "step": 27305 - }, - { - "epoch": 14.240417209908735, - "grad_norm": 1.4960097074508667, - "learning_rate": 7.306733668341709e-05, - "loss": 5.4778, - "step": 27306 - }, - { - "epoch": 14.240938722294654, - "grad_norm": 1.4212547540664673, - "learning_rate": 7.306633165829147e-05, - "loss": 5.2442, - "step": 27307 - }, - { - "epoch": 14.241460234680574, - "grad_norm": 1.4477211236953735, - "learning_rate": 7.306532663316583e-05, - "loss": 5.4861, - "step": 27308 - }, - { - "epoch": 14.241981747066493, - "grad_norm": 1.5423709154129028, - "learning_rate": 7.306432160804021e-05, - "loss": 5.102, - "step": 27309 - }, - { - "epoch": 14.242503259452413, - "grad_norm": 1.3819835186004639, - "learning_rate": 7.306331658291458e-05, - "loss": 5.6208, - "step": 27310 - }, - { - "epoch": 14.243024771838332, - "grad_norm": 1.3800822496414185, - "learning_rate": 7.306231155778894e-05, - "loss": 5.5215, - "step": 27311 - }, - { - "epoch": 14.24354628422425, - "grad_norm": 1.463265299797058, - "learning_rate": 7.306130653266332e-05, - "loss": 5.1405, - "step": 27312 - }, - { - "epoch": 14.24406779661017, - "grad_norm": 1.4437463283538818, - "learning_rate": 7.306030150753768e-05, - "loss": 5.2342, - "step": 27313 - }, - { - "epoch": 14.244589308996089, - "grad_norm": 1.6281908750534058, - "learning_rate": 7.305929648241206e-05, - "loss": 4.8531, - "step": 27314 - }, - { - "epoch": 14.245110821382008, - "grad_norm": 1.4291805028915405, - "learning_rate": 7.305829145728644e-05, - "loss": 4.9616, - "step": 27315 - }, - { - "epoch": 14.245632333767928, - "grad_norm": 1.5477478504180908, - "learning_rate": 7.305728643216082e-05, - "loss": 5.3448, - "step": 27316 - }, - { - "epoch": 14.246153846153845, - "grad_norm": 1.6123816967010498, - "learning_rate": 7.305628140703518e-05, - "loss": 5.2226, - "step": 27317 - }, - { - "epoch": 14.246675358539765, - "grad_norm": 1.508055567741394, - "learning_rate": 7.305527638190956e-05, - "loss": 4.6682, - "step": 27318 - }, - { - "epoch": 14.247196870925684, - "grad_norm": 1.440315842628479, - "learning_rate": 7.305427135678392e-05, - "loss": 5.3362, - "step": 27319 - }, - { - "epoch": 14.247718383311604, - "grad_norm": 1.4433022737503052, - "learning_rate": 7.30532663316583e-05, - "loss": 4.8389, - "step": 27320 - }, - { - "epoch": 14.248239895697523, - "grad_norm": 1.4750778675079346, - "learning_rate": 7.305226130653266e-05, - "loss": 5.6085, - "step": 27321 - }, - { - "epoch": 14.248761408083443, - "grad_norm": 1.4744148254394531, - "learning_rate": 7.305125628140704e-05, - "loss": 5.1633, - "step": 27322 - }, - { - "epoch": 14.24928292046936, - "grad_norm": 1.3542265892028809, - "learning_rate": 7.30502512562814e-05, - "loss": 5.6371, - "step": 27323 - }, - { - "epoch": 14.24980443285528, - "grad_norm": 1.5286089181900024, - "learning_rate": 7.304924623115578e-05, - "loss": 5.3784, - "step": 27324 - }, - { - "epoch": 14.2503259452412, - "grad_norm": 1.4177786111831665, - "learning_rate": 7.304824120603015e-05, - "loss": 5.4395, - "step": 27325 - }, - { - "epoch": 14.250847457627119, - "grad_norm": 1.440119981765747, - "learning_rate": 7.304723618090453e-05, - "loss": 5.6359, - "step": 27326 - }, - { - "epoch": 14.251368970013038, - "grad_norm": 1.4730457067489624, - "learning_rate": 7.30462311557789e-05, - "loss": 5.4333, - "step": 27327 - }, - { - "epoch": 14.251890482398958, - "grad_norm": 1.3955254554748535, - "learning_rate": 7.304522613065327e-05, - "loss": 5.2767, - "step": 27328 - }, - { - "epoch": 14.252411994784875, - "grad_norm": 1.781304121017456, - "learning_rate": 7.304422110552765e-05, - "loss": 5.2614, - "step": 27329 - }, - { - "epoch": 14.252933507170795, - "grad_norm": 1.559217095375061, - "learning_rate": 7.304321608040201e-05, - "loss": 5.4996, - "step": 27330 - }, - { - "epoch": 14.253455019556714, - "grad_norm": 1.5744870901107788, - "learning_rate": 7.304221105527639e-05, - "loss": 5.1946, - "step": 27331 - }, - { - "epoch": 14.253976531942634, - "grad_norm": 1.5614875555038452, - "learning_rate": 7.304120603015075e-05, - "loss": 5.4666, - "step": 27332 - }, - { - "epoch": 14.254498044328553, - "grad_norm": 1.4541552066802979, - "learning_rate": 7.304020100502513e-05, - "loss": 5.5444, - "step": 27333 - }, - { - "epoch": 14.255019556714473, - "grad_norm": 1.3982900381088257, - "learning_rate": 7.30391959798995e-05, - "loss": 5.4106, - "step": 27334 - }, - { - "epoch": 14.25554106910039, - "grad_norm": 1.4159704446792603, - "learning_rate": 7.303819095477387e-05, - "loss": 5.2256, - "step": 27335 - }, - { - "epoch": 14.25606258148631, - "grad_norm": 1.5504511594772339, - "learning_rate": 7.303718592964825e-05, - "loss": 4.9876, - "step": 27336 - }, - { - "epoch": 14.25658409387223, - "grad_norm": 1.4303057193756104, - "learning_rate": 7.303618090452263e-05, - "loss": 5.4683, - "step": 27337 - }, - { - "epoch": 14.257105606258149, - "grad_norm": 1.593218445777893, - "learning_rate": 7.303517587939699e-05, - "loss": 5.7061, - "step": 27338 - }, - { - "epoch": 14.257627118644068, - "grad_norm": 1.466259241104126, - "learning_rate": 7.303417085427136e-05, - "loss": 5.232, - "step": 27339 - }, - { - "epoch": 14.258148631029988, - "grad_norm": 1.4396835565567017, - "learning_rate": 7.303316582914573e-05, - "loss": 5.3998, - "step": 27340 - }, - { - "epoch": 14.258670143415905, - "grad_norm": 1.5036815404891968, - "learning_rate": 7.30321608040201e-05, - "loss": 5.3328, - "step": 27341 - }, - { - "epoch": 14.259191655801825, - "grad_norm": 1.5330265760421753, - "learning_rate": 7.303115577889448e-05, - "loss": 5.4874, - "step": 27342 - }, - { - "epoch": 14.259713168187744, - "grad_norm": 1.5450650453567505, - "learning_rate": 7.303015075376884e-05, - "loss": 5.3454, - "step": 27343 - }, - { - "epoch": 14.260234680573664, - "grad_norm": 1.4540239572525024, - "learning_rate": 7.302914572864322e-05, - "loss": 5.2745, - "step": 27344 - }, - { - "epoch": 14.260756192959583, - "grad_norm": 1.3969920873641968, - "learning_rate": 7.302814070351758e-05, - "loss": 5.5401, - "step": 27345 - }, - { - "epoch": 14.261277705345503, - "grad_norm": 1.4719725847244263, - "learning_rate": 7.302713567839196e-05, - "loss": 5.574, - "step": 27346 - }, - { - "epoch": 14.26179921773142, - "grad_norm": 1.498020052909851, - "learning_rate": 7.302613065326634e-05, - "loss": 5.1179, - "step": 27347 - }, - { - "epoch": 14.26232073011734, - "grad_norm": 1.498841404914856, - "learning_rate": 7.302512562814072e-05, - "loss": 5.1371, - "step": 27348 - }, - { - "epoch": 14.26284224250326, - "grad_norm": 1.5515542030334473, - "learning_rate": 7.302412060301508e-05, - "loss": 5.5567, - "step": 27349 - }, - { - "epoch": 14.263363754889179, - "grad_norm": 1.446483850479126, - "learning_rate": 7.302311557788946e-05, - "loss": 5.3859, - "step": 27350 - }, - { - "epoch": 14.263885267275098, - "grad_norm": 1.4374140501022339, - "learning_rate": 7.302211055276382e-05, - "loss": 5.6827, - "step": 27351 - }, - { - "epoch": 14.264406779661018, - "grad_norm": 1.5385171175003052, - "learning_rate": 7.302110552763819e-05, - "loss": 5.2828, - "step": 27352 - }, - { - "epoch": 14.264928292046935, - "grad_norm": 1.4892191886901855, - "learning_rate": 7.302010050251256e-05, - "loss": 5.5085, - "step": 27353 - }, - { - "epoch": 14.265449804432855, - "grad_norm": 1.4354424476623535, - "learning_rate": 7.301909547738693e-05, - "loss": 5.7253, - "step": 27354 - }, - { - "epoch": 14.265971316818774, - "grad_norm": 1.4219563007354736, - "learning_rate": 7.30180904522613e-05, - "loss": 5.6285, - "step": 27355 - }, - { - "epoch": 14.266492829204694, - "grad_norm": 1.527944564819336, - "learning_rate": 7.301708542713568e-05, - "loss": 5.4898, - "step": 27356 - }, - { - "epoch": 14.267014341590613, - "grad_norm": 1.3339481353759766, - "learning_rate": 7.301608040201006e-05, - "loss": 5.5757, - "step": 27357 - }, - { - "epoch": 14.267535853976533, - "grad_norm": 1.444425106048584, - "learning_rate": 7.301507537688443e-05, - "loss": 5.4457, - "step": 27358 - }, - { - "epoch": 14.26805736636245, - "grad_norm": 1.5400893688201904, - "learning_rate": 7.30140703517588e-05, - "loss": 4.9829, - "step": 27359 - }, - { - "epoch": 14.26857887874837, - "grad_norm": 1.4485074281692505, - "learning_rate": 7.301306532663317e-05, - "loss": 5.5972, - "step": 27360 - }, - { - "epoch": 14.26910039113429, - "grad_norm": 1.6577684879302979, - "learning_rate": 7.301206030150755e-05, - "loss": 5.1183, - "step": 27361 - }, - { - "epoch": 14.269621903520209, - "grad_norm": 1.5232212543487549, - "learning_rate": 7.301105527638191e-05, - "loss": 5.4756, - "step": 27362 - }, - { - "epoch": 14.270143415906128, - "grad_norm": 1.5928058624267578, - "learning_rate": 7.301005025125629e-05, - "loss": 5.4435, - "step": 27363 - }, - { - "epoch": 14.270664928292048, - "grad_norm": 1.456756830215454, - "learning_rate": 7.300904522613065e-05, - "loss": 5.4724, - "step": 27364 - }, - { - "epoch": 14.271186440677965, - "grad_norm": 1.4948891401290894, - "learning_rate": 7.300804020100502e-05, - "loss": 5.5486, - "step": 27365 - }, - { - "epoch": 14.271707953063885, - "grad_norm": 1.4610286951065063, - "learning_rate": 7.30070351758794e-05, - "loss": 4.9851, - "step": 27366 - }, - { - "epoch": 14.272229465449804, - "grad_norm": 1.5788755416870117, - "learning_rate": 7.300603015075377e-05, - "loss": 5.2745, - "step": 27367 - }, - { - "epoch": 14.272750977835724, - "grad_norm": 1.4676644802093506, - "learning_rate": 7.300502512562815e-05, - "loss": 5.0938, - "step": 27368 - }, - { - "epoch": 14.273272490221643, - "grad_norm": 1.4719160795211792, - "learning_rate": 7.300402010050251e-05, - "loss": 5.2781, - "step": 27369 - }, - { - "epoch": 14.273794002607563, - "grad_norm": 1.4142422676086426, - "learning_rate": 7.300301507537689e-05, - "loss": 4.7577, - "step": 27370 - }, - { - "epoch": 14.27431551499348, - "grad_norm": 1.4793493747711182, - "learning_rate": 7.300201005025126e-05, - "loss": 5.3576, - "step": 27371 - }, - { - "epoch": 14.2748370273794, - "grad_norm": 1.4387786388397217, - "learning_rate": 7.300100502512563e-05, - "loss": 5.8266, - "step": 27372 - }, - { - "epoch": 14.27535853976532, - "grad_norm": 1.5395426750183105, - "learning_rate": 7.3e-05, - "loss": 5.3784, - "step": 27373 - }, - { - "epoch": 14.275880052151239, - "grad_norm": 1.4667255878448486, - "learning_rate": 7.299899497487438e-05, - "loss": 5.5751, - "step": 27374 - }, - { - "epoch": 14.276401564537158, - "grad_norm": 1.5359019041061401, - "learning_rate": 7.299798994974874e-05, - "loss": 5.6446, - "step": 27375 - }, - { - "epoch": 14.276923076923078, - "grad_norm": 1.4388748407363892, - "learning_rate": 7.299698492462312e-05, - "loss": 5.4415, - "step": 27376 - }, - { - "epoch": 14.277444589308995, - "grad_norm": 1.543497920036316, - "learning_rate": 7.29959798994975e-05, - "loss": 5.1976, - "step": 27377 - }, - { - "epoch": 14.277966101694915, - "grad_norm": 1.4516485929489136, - "learning_rate": 7.299497487437186e-05, - "loss": 5.6099, - "step": 27378 - }, - { - "epoch": 14.278487614080834, - "grad_norm": 1.4513481855392456, - "learning_rate": 7.299396984924624e-05, - "loss": 4.9617, - "step": 27379 - }, - { - "epoch": 14.279009126466754, - "grad_norm": 1.4837782382965088, - "learning_rate": 7.29929648241206e-05, - "loss": 5.2316, - "step": 27380 - }, - { - "epoch": 14.279530638852673, - "grad_norm": 1.4634177684783936, - "learning_rate": 7.299195979899498e-05, - "loss": 5.1693, - "step": 27381 - }, - { - "epoch": 14.280052151238593, - "grad_norm": 1.5808354616165161, - "learning_rate": 7.299095477386935e-05, - "loss": 5.1263, - "step": 27382 - }, - { - "epoch": 14.28057366362451, - "grad_norm": 1.4774978160858154, - "learning_rate": 7.298994974874372e-05, - "loss": 5.3355, - "step": 27383 - }, - { - "epoch": 14.28109517601043, - "grad_norm": 1.4695812463760376, - "learning_rate": 7.298894472361809e-05, - "loss": 4.8298, - "step": 27384 - }, - { - "epoch": 14.28161668839635, - "grad_norm": 1.3788570165634155, - "learning_rate": 7.298793969849247e-05, - "loss": 5.6672, - "step": 27385 - }, - { - "epoch": 14.282138200782269, - "grad_norm": 1.3766332864761353, - "learning_rate": 7.298693467336683e-05, - "loss": 5.5087, - "step": 27386 - }, - { - "epoch": 14.282659713168188, - "grad_norm": 1.4634814262390137, - "learning_rate": 7.298592964824121e-05, - "loss": 5.4449, - "step": 27387 - }, - { - "epoch": 14.283181225554108, - "grad_norm": 1.4889185428619385, - "learning_rate": 7.298492462311559e-05, - "loss": 5.5199, - "step": 27388 - }, - { - "epoch": 14.283702737940025, - "grad_norm": 1.3976187705993652, - "learning_rate": 7.298391959798996e-05, - "loss": 5.766, - "step": 27389 - }, - { - "epoch": 14.284224250325945, - "grad_norm": 1.4465142488479614, - "learning_rate": 7.298291457286433e-05, - "loss": 5.7675, - "step": 27390 - }, - { - "epoch": 14.284745762711864, - "grad_norm": 1.5418596267700195, - "learning_rate": 7.298190954773869e-05, - "loss": 5.0603, - "step": 27391 - }, - { - "epoch": 14.285267275097784, - "grad_norm": 1.5821375846862793, - "learning_rate": 7.298090452261307e-05, - "loss": 4.8062, - "step": 27392 - }, - { - "epoch": 14.285788787483703, - "grad_norm": Infinity, - "learning_rate": 7.298090452261307e-05, - "loss": 5.8957, - "step": 27393 - }, - { - "epoch": 14.286310299869623, - "grad_norm": 1.429723858833313, - "learning_rate": 7.297989949748743e-05, - "loss": 4.5338, - "step": 27394 - }, - { - "epoch": 14.28683181225554, - "grad_norm": 1.4929630756378174, - "learning_rate": 7.297889447236181e-05, - "loss": 5.3426, - "step": 27395 - }, - { - "epoch": 14.28735332464146, - "grad_norm": 1.4872736930847168, - "learning_rate": 7.297788944723618e-05, - "loss": 5.5494, - "step": 27396 - }, - { - "epoch": 14.28787483702738, - "grad_norm": 1.4138108491897583, - "learning_rate": 7.297688442211055e-05, - "loss": 5.6932, - "step": 27397 - }, - { - "epoch": 14.288396349413299, - "grad_norm": 1.7120633125305176, - "learning_rate": 7.297587939698493e-05, - "loss": 5.008, - "step": 27398 - }, - { - "epoch": 14.288917861799218, - "grad_norm": 1.3868367671966553, - "learning_rate": 7.297487437185931e-05, - "loss": 5.2364, - "step": 27399 - }, - { - "epoch": 14.289439374185136, - "grad_norm": 1.5802234411239624, - "learning_rate": 7.297386934673367e-05, - "loss": 5.1803, - "step": 27400 - }, - { - "epoch": 14.289960886571055, - "grad_norm": 1.4652255773544312, - "learning_rate": 7.297286432160805e-05, - "loss": 5.0667, - "step": 27401 - }, - { - "epoch": 14.290482398956975, - "grad_norm": 1.4111429452896118, - "learning_rate": 7.297185929648242e-05, - "loss": 5.5551, - "step": 27402 - }, - { - "epoch": 14.291003911342894, - "grad_norm": 1.5009095668792725, - "learning_rate": 7.29708542713568e-05, - "loss": 5.2578, - "step": 27403 - }, - { - "epoch": 14.291525423728814, - "grad_norm": 1.4413847923278809, - "learning_rate": 7.296984924623116e-05, - "loss": 5.4794, - "step": 27404 - }, - { - "epoch": 14.292046936114733, - "grad_norm": 1.590943694114685, - "learning_rate": 7.296884422110552e-05, - "loss": 4.8703, - "step": 27405 - }, - { - "epoch": 14.292568448500653, - "grad_norm": 1.5355826616287231, - "learning_rate": 7.29678391959799e-05, - "loss": 4.9567, - "step": 27406 - }, - { - "epoch": 14.29308996088657, - "grad_norm": 1.5388612747192383, - "learning_rate": 7.296683417085426e-05, - "loss": 4.5651, - "step": 27407 - }, - { - "epoch": 14.29361147327249, - "grad_norm": 1.4259611368179321, - "learning_rate": 7.296582914572864e-05, - "loss": 5.313, - "step": 27408 - }, - { - "epoch": 14.29413298565841, - "grad_norm": 1.4333133697509766, - "learning_rate": 7.296482412060302e-05, - "loss": 5.5652, - "step": 27409 - }, - { - "epoch": 14.294654498044329, - "grad_norm": 1.3938486576080322, - "learning_rate": 7.29638190954774e-05, - "loss": 5.2156, - "step": 27410 - }, - { - "epoch": 14.295176010430248, - "grad_norm": 1.4745677709579468, - "learning_rate": 7.296281407035176e-05, - "loss": 5.5155, - "step": 27411 - }, - { - "epoch": 14.295697522816166, - "grad_norm": 1.3446909189224243, - "learning_rate": 7.296180904522614e-05, - "loss": 5.62, - "step": 27412 - }, - { - "epoch": 14.296219035202085, - "grad_norm": 1.4755216836929321, - "learning_rate": 7.29608040201005e-05, - "loss": 4.6842, - "step": 27413 - }, - { - "epoch": 14.296740547588005, - "grad_norm": 1.4578700065612793, - "learning_rate": 7.295979899497488e-05, - "loss": 5.2694, - "step": 27414 - }, - { - "epoch": 14.297262059973924, - "grad_norm": 1.6066657304763794, - "learning_rate": 7.295879396984925e-05, - "loss": 5.1118, - "step": 27415 - }, - { - "epoch": 14.297783572359844, - "grad_norm": 1.4480838775634766, - "learning_rate": 7.295778894472362e-05, - "loss": 5.5573, - "step": 27416 - }, - { - "epoch": 14.298305084745763, - "grad_norm": 1.5230098962783813, - "learning_rate": 7.295678391959799e-05, - "loss": 4.9817, - "step": 27417 - }, - { - "epoch": 14.298826597131681, - "grad_norm": 1.4324089288711548, - "learning_rate": 7.295577889447237e-05, - "loss": 5.7656, - "step": 27418 - }, - { - "epoch": 14.2993481095176, - "grad_norm": 1.423113226890564, - "learning_rate": 7.295477386934674e-05, - "loss": 5.7986, - "step": 27419 - }, - { - "epoch": 14.29986962190352, - "grad_norm": 1.5300703048706055, - "learning_rate": 7.295376884422111e-05, - "loss": 5.3327, - "step": 27420 - }, - { - "epoch": 14.30039113428944, - "grad_norm": 1.4866975545883179, - "learning_rate": 7.295276381909549e-05, - "loss": 5.4357, - "step": 27421 - }, - { - "epoch": 14.300912646675359, - "grad_norm": 1.5817064046859741, - "learning_rate": 7.295175879396985e-05, - "loss": 5.3046, - "step": 27422 - }, - { - "epoch": 14.301434159061278, - "grad_norm": 1.4705170392990112, - "learning_rate": 7.295075376884423e-05, - "loss": 5.3935, - "step": 27423 - }, - { - "epoch": 14.301955671447196, - "grad_norm": 1.5175158977508545, - "learning_rate": 7.294974874371859e-05, - "loss": 5.3295, - "step": 27424 - }, - { - "epoch": 14.302477183833116, - "grad_norm": 1.4500956535339355, - "learning_rate": 7.294874371859297e-05, - "loss": 5.071, - "step": 27425 - }, - { - "epoch": 14.302998696219035, - "grad_norm": 1.4936840534210205, - "learning_rate": 7.294773869346733e-05, - "loss": 5.5144, - "step": 27426 - }, - { - "epoch": 14.303520208604954, - "grad_norm": 1.461538553237915, - "learning_rate": 7.294673366834171e-05, - "loss": 5.7307, - "step": 27427 - }, - { - "epoch": 14.304041720990874, - "grad_norm": 1.3690671920776367, - "learning_rate": 7.294572864321608e-05, - "loss": 4.5548, - "step": 27428 - }, - { - "epoch": 14.304563233376793, - "grad_norm": 1.5518805980682373, - "learning_rate": 7.294472361809045e-05, - "loss": 5.6204, - "step": 27429 - }, - { - "epoch": 14.305084745762711, - "grad_norm": 1.3631479740142822, - "learning_rate": 7.294371859296483e-05, - "loss": 5.6889, - "step": 27430 - }, - { - "epoch": 14.30560625814863, - "grad_norm": 1.4839521646499634, - "learning_rate": 7.294271356783921e-05, - "loss": 5.8543, - "step": 27431 - }, - { - "epoch": 14.30612777053455, - "grad_norm": 1.4553381204605103, - "learning_rate": 7.294170854271357e-05, - "loss": 4.9997, - "step": 27432 - }, - { - "epoch": 14.30664928292047, - "grad_norm": 1.4323261976242065, - "learning_rate": 7.294070351758794e-05, - "loss": 5.2329, - "step": 27433 - }, - { - "epoch": 14.307170795306389, - "grad_norm": 1.451188087463379, - "learning_rate": 7.293969849246232e-05, - "loss": 5.55, - "step": 27434 - }, - { - "epoch": 14.307692307692308, - "grad_norm": 1.3303563594818115, - "learning_rate": 7.293869346733668e-05, - "loss": 5.9226, - "step": 27435 - }, - { - "epoch": 14.308213820078226, - "grad_norm": 1.4706032276153564, - "learning_rate": 7.293768844221106e-05, - "loss": 5.2359, - "step": 27436 - }, - { - "epoch": 14.308735332464146, - "grad_norm": 1.4650769233703613, - "learning_rate": 7.293668341708542e-05, - "loss": 5.2461, - "step": 27437 - }, - { - "epoch": 14.309256844850065, - "grad_norm": 1.5248686075210571, - "learning_rate": 7.29356783919598e-05, - "loss": 5.5307, - "step": 27438 - }, - { - "epoch": 14.309778357235984, - "grad_norm": 1.4946792125701904, - "learning_rate": 7.293467336683418e-05, - "loss": 5.0137, - "step": 27439 - }, - { - "epoch": 14.310299869621904, - "grad_norm": 1.6461896896362305, - "learning_rate": 7.293366834170856e-05, - "loss": 4.8529, - "step": 27440 - }, - { - "epoch": 14.310821382007823, - "grad_norm": 1.4075337648391724, - "learning_rate": 7.293266331658292e-05, - "loss": 5.4802, - "step": 27441 - }, - { - "epoch": 14.311342894393741, - "grad_norm": 1.5433053970336914, - "learning_rate": 7.29316582914573e-05, - "loss": 5.3915, - "step": 27442 - }, - { - "epoch": 14.31186440677966, - "grad_norm": 1.578058123588562, - "learning_rate": 7.293065326633166e-05, - "loss": 5.2357, - "step": 27443 - }, - { - "epoch": 14.31238591916558, - "grad_norm": 1.4983476400375366, - "learning_rate": 7.292964824120604e-05, - "loss": 5.078, - "step": 27444 - }, - { - "epoch": 14.3129074315515, - "grad_norm": 1.4702011346817017, - "learning_rate": 7.29286432160804e-05, - "loss": 5.3803, - "step": 27445 - }, - { - "epoch": 14.313428943937419, - "grad_norm": 1.4907379150390625, - "learning_rate": 7.292763819095477e-05, - "loss": 5.4359, - "step": 27446 - }, - { - "epoch": 14.313950456323338, - "grad_norm": 1.3938888311386108, - "learning_rate": 7.292663316582915e-05, - "loss": 5.6552, - "step": 27447 - }, - { - "epoch": 14.314471968709256, - "grad_norm": 1.4328768253326416, - "learning_rate": 7.292562814070351e-05, - "loss": 5.1002, - "step": 27448 - }, - { - "epoch": 14.314993481095176, - "grad_norm": 1.429824709892273, - "learning_rate": 7.292462311557789e-05, - "loss": 5.5015, - "step": 27449 - }, - { - "epoch": 14.315514993481095, - "grad_norm": 1.4663156270980835, - "learning_rate": 7.292361809045227e-05, - "loss": 5.5363, - "step": 27450 - }, - { - "epoch": 14.316036505867014, - "grad_norm": 1.827901005744934, - "learning_rate": 7.292261306532664e-05, - "loss": 5.2194, - "step": 27451 - }, - { - "epoch": 14.316558018252934, - "grad_norm": 1.622591257095337, - "learning_rate": 7.292160804020101e-05, - "loss": 5.0618, - "step": 27452 - }, - { - "epoch": 14.317079530638853, - "grad_norm": 1.586893081665039, - "learning_rate": 7.292060301507539e-05, - "loss": 5.3969, - "step": 27453 - }, - { - "epoch": 14.317601043024771, - "grad_norm": 1.5023853778839111, - "learning_rate": 7.291959798994975e-05, - "loss": 5.1119, - "step": 27454 - }, - { - "epoch": 14.31812255541069, - "grad_norm": 1.409922480583191, - "learning_rate": 7.291859296482413e-05, - "loss": 5.1566, - "step": 27455 - }, - { - "epoch": 14.31864406779661, - "grad_norm": 1.5489652156829834, - "learning_rate": 7.291758793969849e-05, - "loss": 5.5798, - "step": 27456 - }, - { - "epoch": 14.31916558018253, - "grad_norm": 1.538278341293335, - "learning_rate": 7.291658291457287e-05, - "loss": 5.0624, - "step": 27457 - }, - { - "epoch": 14.319687092568449, - "grad_norm": 1.545274019241333, - "learning_rate": 7.291557788944724e-05, - "loss": 5.4518, - "step": 27458 - }, - { - "epoch": 14.320208604954368, - "grad_norm": 1.5135568380355835, - "learning_rate": 7.291457286432161e-05, - "loss": 4.8557, - "step": 27459 - }, - { - "epoch": 14.320730117340286, - "grad_norm": 1.5176647901535034, - "learning_rate": 7.291356783919599e-05, - "loss": 5.1626, - "step": 27460 - }, - { - "epoch": 14.321251629726206, - "grad_norm": 1.4832367897033691, - "learning_rate": 7.291256281407036e-05, - "loss": 4.8578, - "step": 27461 - }, - { - "epoch": 14.321773142112125, - "grad_norm": 1.5253783464431763, - "learning_rate": 7.291155778894473e-05, - "loss": 5.4127, - "step": 27462 - }, - { - "epoch": 14.322294654498045, - "grad_norm": 1.4345839023590088, - "learning_rate": 7.29105527638191e-05, - "loss": 5.7144, - "step": 27463 - }, - { - "epoch": 14.322816166883964, - "grad_norm": 1.4969979524612427, - "learning_rate": 7.290954773869347e-05, - "loss": 5.7417, - "step": 27464 - }, - { - "epoch": 14.323337679269883, - "grad_norm": 1.6141074895858765, - "learning_rate": 7.290854271356784e-05, - "loss": 4.8035, - "step": 27465 - }, - { - "epoch": 14.323859191655801, - "grad_norm": 1.488839864730835, - "learning_rate": 7.290753768844222e-05, - "loss": 5.4275, - "step": 27466 - }, - { - "epoch": 14.32438070404172, - "grad_norm": 1.479506015777588, - "learning_rate": 7.290653266331658e-05, - "loss": 5.2807, - "step": 27467 - }, - { - "epoch": 14.32490221642764, - "grad_norm": 1.498016595840454, - "learning_rate": 7.290552763819096e-05, - "loss": 5.3197, - "step": 27468 - }, - { - "epoch": 14.32542372881356, - "grad_norm": 1.457808494567871, - "learning_rate": 7.290452261306532e-05, - "loss": 4.8532, - "step": 27469 - }, - { - "epoch": 14.325945241199479, - "grad_norm": 1.4311151504516602, - "learning_rate": 7.29035175879397e-05, - "loss": 5.9212, - "step": 27470 - }, - { - "epoch": 14.326466753585398, - "grad_norm": 1.4903500080108643, - "learning_rate": 7.290251256281408e-05, - "loss": 5.3105, - "step": 27471 - }, - { - "epoch": 14.326988265971316, - "grad_norm": 1.509214997291565, - "learning_rate": 7.290150753768844e-05, - "loss": 5.3877, - "step": 27472 - }, - { - "epoch": 14.327509778357236, - "grad_norm": 1.3766292333602905, - "learning_rate": 7.290050251256282e-05, - "loss": 5.4578, - "step": 27473 - }, - { - "epoch": 14.328031290743155, - "grad_norm": 1.365463376045227, - "learning_rate": 7.289949748743719e-05, - "loss": 5.6792, - "step": 27474 - }, - { - "epoch": 14.328552803129075, - "grad_norm": 1.3935855627059937, - "learning_rate": 7.289849246231156e-05, - "loss": 5.5715, - "step": 27475 - }, - { - "epoch": 14.329074315514994, - "grad_norm": 1.4434998035430908, - "learning_rate": 7.289748743718593e-05, - "loss": 5.431, - "step": 27476 - }, - { - "epoch": 14.329595827900913, - "grad_norm": 1.3183585405349731, - "learning_rate": 7.28964824120603e-05, - "loss": 5.6513, - "step": 27477 - }, - { - "epoch": 14.330117340286831, - "grad_norm": 1.519308090209961, - "learning_rate": 7.289547738693467e-05, - "loss": 5.0445, - "step": 27478 - }, - { - "epoch": 14.33063885267275, - "grad_norm": 1.454529881477356, - "learning_rate": 7.289447236180905e-05, - "loss": 5.4571, - "step": 27479 - }, - { - "epoch": 14.33116036505867, - "grad_norm": 1.371730089187622, - "learning_rate": 7.289346733668343e-05, - "loss": 5.4375, - "step": 27480 - }, - { - "epoch": 14.33168187744459, - "grad_norm": 1.4735101461410522, - "learning_rate": 7.28924623115578e-05, - "loss": 5.2869, - "step": 27481 - }, - { - "epoch": 14.332203389830509, - "grad_norm": 1.4615155458450317, - "learning_rate": 7.289145728643217e-05, - "loss": 5.3303, - "step": 27482 - }, - { - "epoch": 14.332724902216428, - "grad_norm": 1.3872510194778442, - "learning_rate": 7.289045226130655e-05, - "loss": 5.6194, - "step": 27483 - }, - { - "epoch": 14.333246414602346, - "grad_norm": 1.4903813600540161, - "learning_rate": 7.288944723618091e-05, - "loss": 5.2321, - "step": 27484 - }, - { - "epoch": 14.333767926988266, - "grad_norm": 1.5123133659362793, - "learning_rate": 7.288844221105527e-05, - "loss": 5.5484, - "step": 27485 - }, - { - "epoch": 14.334289439374185, - "grad_norm": 1.626212239265442, - "learning_rate": 7.288743718592965e-05, - "loss": 5.5409, - "step": 27486 - }, - { - "epoch": 14.334810951760105, - "grad_norm": 1.425682783126831, - "learning_rate": 7.288643216080402e-05, - "loss": 5.5576, - "step": 27487 - }, - { - "epoch": 14.335332464146024, - "grad_norm": 1.4601831436157227, - "learning_rate": 7.28854271356784e-05, - "loss": 5.2305, - "step": 27488 - }, - { - "epoch": 14.335853976531943, - "grad_norm": 1.6676387786865234, - "learning_rate": 7.288442211055276e-05, - "loss": 5.4868, - "step": 27489 - }, - { - "epoch": 14.336375488917861, - "grad_norm": 1.478464126586914, - "learning_rate": 7.288341708542714e-05, - "loss": 5.0543, - "step": 27490 - }, - { - "epoch": 14.33689700130378, - "grad_norm": 1.5951961278915405, - "learning_rate": 7.288241206030151e-05, - "loss": 5.4372, - "step": 27491 - }, - { - "epoch": 14.3374185136897, - "grad_norm": 1.4223047494888306, - "learning_rate": 7.288140703517589e-05, - "loss": 5.3052, - "step": 27492 - }, - { - "epoch": 14.33794002607562, - "grad_norm": 1.5344531536102295, - "learning_rate": 7.288040201005026e-05, - "loss": 5.7386, - "step": 27493 - }, - { - "epoch": 14.338461538461539, - "grad_norm": 1.3639038801193237, - "learning_rate": 7.287939698492463e-05, - "loss": 5.6676, - "step": 27494 - }, - { - "epoch": 14.338983050847457, - "grad_norm": 1.3661466836929321, - "learning_rate": 7.2878391959799e-05, - "loss": 5.5521, - "step": 27495 - }, - { - "epoch": 14.339504563233376, - "grad_norm": 1.4507789611816406, - "learning_rate": 7.287738693467338e-05, - "loss": 5.5652, - "step": 27496 - }, - { - "epoch": 14.340026075619296, - "grad_norm": 1.3794257640838623, - "learning_rate": 7.287638190954774e-05, - "loss": 5.1714, - "step": 27497 - }, - { - "epoch": 14.340547588005215, - "grad_norm": 1.5496329069137573, - "learning_rate": 7.28753768844221e-05, - "loss": 4.9416, - "step": 27498 - }, - { - "epoch": 14.341069100391135, - "grad_norm": 1.4350783824920654, - "learning_rate": 7.287437185929648e-05, - "loss": 5.2649, - "step": 27499 - }, - { - "epoch": 14.341590612777054, - "grad_norm": 1.4015618562698364, - "learning_rate": 7.287336683417085e-05, - "loss": 5.4201, - "step": 27500 - }, - { - "epoch": 14.342112125162974, - "grad_norm": 1.4831870794296265, - "learning_rate": 7.287236180904522e-05, - "loss": 5.4297, - "step": 27501 - }, - { - "epoch": 14.342633637548891, - "grad_norm": 1.5947537422180176, - "learning_rate": 7.28713567839196e-05, - "loss": 5.2367, - "step": 27502 - }, - { - "epoch": 14.34315514993481, - "grad_norm": 1.4614640474319458, - "learning_rate": 7.287035175879398e-05, - "loss": 5.1747, - "step": 27503 - }, - { - "epoch": 14.34367666232073, - "grad_norm": 1.3973021507263184, - "learning_rate": 7.286934673366834e-05, - "loss": 5.7724, - "step": 27504 - }, - { - "epoch": 14.34419817470665, - "grad_norm": 1.549750804901123, - "learning_rate": 7.286834170854272e-05, - "loss": 5.1747, - "step": 27505 - }, - { - "epoch": 14.344719687092569, - "grad_norm": 1.4175962209701538, - "learning_rate": 7.286733668341709e-05, - "loss": 5.4403, - "step": 27506 - }, - { - "epoch": 14.345241199478487, - "grad_norm": 1.4120837450027466, - "learning_rate": 7.286633165829146e-05, - "loss": 5.5791, - "step": 27507 - }, - { - "epoch": 14.345762711864406, - "grad_norm": 1.5028983354568481, - "learning_rate": 7.286532663316583e-05, - "loss": 5.1646, - "step": 27508 - }, - { - "epoch": 14.346284224250326, - "grad_norm": 1.467103123664856, - "learning_rate": 7.28643216080402e-05, - "loss": 5.6387, - "step": 27509 - }, - { - "epoch": 14.346805736636245, - "grad_norm": 1.6227364540100098, - "learning_rate": 7.286331658291457e-05, - "loss": 5.0672, - "step": 27510 - }, - { - "epoch": 14.347327249022165, - "grad_norm": 1.4558281898498535, - "learning_rate": 7.286231155778895e-05, - "loss": 5.2751, - "step": 27511 - }, - { - "epoch": 14.347848761408084, - "grad_norm": 1.3199410438537598, - "learning_rate": 7.286130653266333e-05, - "loss": 5.8937, - "step": 27512 - }, - { - "epoch": 14.348370273794002, - "grad_norm": 1.5531054735183716, - "learning_rate": 7.286030150753769e-05, - "loss": 5.2239, - "step": 27513 - }, - { - "epoch": 14.348891786179921, - "grad_norm": 1.5203312635421753, - "learning_rate": 7.285929648241207e-05, - "loss": 5.3984, - "step": 27514 - }, - { - "epoch": 14.34941329856584, - "grad_norm": 1.5603739023208618, - "learning_rate": 7.285829145728643e-05, - "loss": 5.1807, - "step": 27515 - }, - { - "epoch": 14.34993481095176, - "grad_norm": 1.48141348361969, - "learning_rate": 7.285728643216081e-05, - "loss": 5.1576, - "step": 27516 - }, - { - "epoch": 14.35045632333768, - "grad_norm": 1.3907461166381836, - "learning_rate": 7.285628140703517e-05, - "loss": 5.6045, - "step": 27517 - }, - { - "epoch": 14.350977835723599, - "grad_norm": 1.4803475141525269, - "learning_rate": 7.285527638190955e-05, - "loss": 5.4596, - "step": 27518 - }, - { - "epoch": 14.351499348109517, - "grad_norm": 1.5468178987503052, - "learning_rate": 7.285427135678392e-05, - "loss": 5.8264, - "step": 27519 - }, - { - "epoch": 14.352020860495436, - "grad_norm": 1.394327998161316, - "learning_rate": 7.28532663316583e-05, - "loss": 4.6619, - "step": 27520 - }, - { - "epoch": 14.352542372881356, - "grad_norm": 1.4731104373931885, - "learning_rate": 7.285226130653266e-05, - "loss": 5.1754, - "step": 27521 - }, - { - "epoch": 14.353063885267275, - "grad_norm": 1.5594784021377563, - "learning_rate": 7.285125628140704e-05, - "loss": 5.2541, - "step": 27522 - }, - { - "epoch": 14.353585397653195, - "grad_norm": 1.51421058177948, - "learning_rate": 7.285025125628141e-05, - "loss": 5.2421, - "step": 27523 - }, - { - "epoch": 14.354106910039114, - "grad_norm": 1.5547796487808228, - "learning_rate": 7.284924623115579e-05, - "loss": 5.1559, - "step": 27524 - }, - { - "epoch": 14.354628422425032, - "grad_norm": 1.439234733581543, - "learning_rate": 7.284824120603016e-05, - "loss": 5.6476, - "step": 27525 - }, - { - "epoch": 14.355149934810951, - "grad_norm": 1.5041128396987915, - "learning_rate": 7.284723618090452e-05, - "loss": 5.4048, - "step": 27526 - }, - { - "epoch": 14.35567144719687, - "grad_norm": 1.4282424449920654, - "learning_rate": 7.28462311557789e-05, - "loss": 4.9093, - "step": 27527 - }, - { - "epoch": 14.35619295958279, - "grad_norm": 1.970528483390808, - "learning_rate": 7.284522613065326e-05, - "loss": 5.3817, - "step": 27528 - }, - { - "epoch": 14.35671447196871, - "grad_norm": 1.387695074081421, - "learning_rate": 7.284422110552764e-05, - "loss": 5.6351, - "step": 27529 - }, - { - "epoch": 14.357235984354629, - "grad_norm": 1.4363348484039307, - "learning_rate": 7.2843216080402e-05, - "loss": 5.083, - "step": 27530 - }, - { - "epoch": 14.357757496740547, - "grad_norm": 1.3983062505722046, - "learning_rate": 7.284221105527638e-05, - "loss": 5.615, - "step": 27531 - }, - { - "epoch": 14.358279009126466, - "grad_norm": 1.4668991565704346, - "learning_rate": 7.284120603015076e-05, - "loss": 5.3182, - "step": 27532 - }, - { - "epoch": 14.358800521512386, - "grad_norm": 1.4466382265090942, - "learning_rate": 7.284020100502514e-05, - "loss": 5.2277, - "step": 27533 - }, - { - "epoch": 14.359322033898305, - "grad_norm": 1.5520015954971313, - "learning_rate": 7.28391959798995e-05, - "loss": 5.5486, - "step": 27534 - }, - { - "epoch": 14.359843546284225, - "grad_norm": 1.4535212516784668, - "learning_rate": 7.283819095477388e-05, - "loss": 5.2633, - "step": 27535 - }, - { - "epoch": 14.360365058670144, - "grad_norm": 1.4340425729751587, - "learning_rate": 7.283718592964824e-05, - "loss": 5.7504, - "step": 27536 - }, - { - "epoch": 14.360886571056062, - "grad_norm": 1.3371672630310059, - "learning_rate": 7.283618090452262e-05, - "loss": 5.4765, - "step": 27537 - }, - { - "epoch": 14.361408083441981, - "grad_norm": 1.5550291538238525, - "learning_rate": 7.283517587939699e-05, - "loss": 4.9285, - "step": 27538 - }, - { - "epoch": 14.3619295958279, - "grad_norm": 1.5957330465316772, - "learning_rate": 7.283417085427135e-05, - "loss": 4.7543, - "step": 27539 - }, - { - "epoch": 14.36245110821382, - "grad_norm": 1.4267586469650269, - "learning_rate": 7.283316582914573e-05, - "loss": 5.5164, - "step": 27540 - }, - { - "epoch": 14.36297262059974, - "grad_norm": 1.5070831775665283, - "learning_rate": 7.28321608040201e-05, - "loss": 5.3444, - "step": 27541 - }, - { - "epoch": 14.36349413298566, - "grad_norm": 1.6141208410263062, - "learning_rate": 7.283115577889447e-05, - "loss": 5.4401, - "step": 27542 - }, - { - "epoch": 14.364015645371577, - "grad_norm": 1.394382119178772, - "learning_rate": 7.283015075376885e-05, - "loss": 5.7596, - "step": 27543 - }, - { - "epoch": 14.364537157757496, - "grad_norm": 1.5034767389297485, - "learning_rate": 7.282914572864323e-05, - "loss": 5.3179, - "step": 27544 - }, - { - "epoch": 14.365058670143416, - "grad_norm": 1.5045875310897827, - "learning_rate": 7.282814070351759e-05, - "loss": 5.1891, - "step": 27545 - }, - { - "epoch": 14.365580182529335, - "grad_norm": 1.3071645498275757, - "learning_rate": 7.282713567839197e-05, - "loss": 5.1045, - "step": 27546 - }, - { - "epoch": 14.366101694915255, - "grad_norm": 1.3774950504302979, - "learning_rate": 7.282613065326633e-05, - "loss": 5.7486, - "step": 27547 - }, - { - "epoch": 14.366623207301174, - "grad_norm": 1.5498045682907104, - "learning_rate": 7.282512562814071e-05, - "loss": 5.0776, - "step": 27548 - }, - { - "epoch": 14.367144719687092, - "grad_norm": 1.5274643898010254, - "learning_rate": 7.282412060301508e-05, - "loss": 5.1576, - "step": 27549 - }, - { - "epoch": 14.367666232073011, - "grad_norm": 1.5538990497589111, - "learning_rate": 7.282311557788945e-05, - "loss": 4.9033, - "step": 27550 - }, - { - "epoch": 14.36818774445893, - "grad_norm": 1.4743868112564087, - "learning_rate": 7.282211055276382e-05, - "loss": 5.5234, - "step": 27551 - }, - { - "epoch": 14.36870925684485, - "grad_norm": 1.4400460720062256, - "learning_rate": 7.28211055276382e-05, - "loss": 5.6224, - "step": 27552 - }, - { - "epoch": 14.36923076923077, - "grad_norm": 1.470577597618103, - "learning_rate": 7.282010050251257e-05, - "loss": 5.4475, - "step": 27553 - }, - { - "epoch": 14.36975228161669, - "grad_norm": 1.4079021215438843, - "learning_rate": 7.281909547738694e-05, - "loss": 5.3497, - "step": 27554 - }, - { - "epoch": 14.370273794002607, - "grad_norm": 1.455165982246399, - "learning_rate": 7.281809045226132e-05, - "loss": 5.4691, - "step": 27555 - }, - { - "epoch": 14.370795306388526, - "grad_norm": 1.6710424423217773, - "learning_rate": 7.281708542713568e-05, - "loss": 4.7776, - "step": 27556 - }, - { - "epoch": 14.371316818774446, - "grad_norm": 1.60850191116333, - "learning_rate": 7.281608040201006e-05, - "loss": 4.7814, - "step": 27557 - }, - { - "epoch": 14.371838331160365, - "grad_norm": 1.511191487312317, - "learning_rate": 7.281507537688442e-05, - "loss": 5.3523, - "step": 27558 - }, - { - "epoch": 14.372359843546285, - "grad_norm": 1.4886810779571533, - "learning_rate": 7.28140703517588e-05, - "loss": 5.817, - "step": 27559 - }, - { - "epoch": 14.372881355932204, - "grad_norm": 1.4136666059494019, - "learning_rate": 7.281306532663316e-05, - "loss": 5.3974, - "step": 27560 - }, - { - "epoch": 14.373402868318122, - "grad_norm": 1.4605038166046143, - "learning_rate": 7.281206030150754e-05, - "loss": 5.069, - "step": 27561 - }, - { - "epoch": 14.373924380704041, - "grad_norm": 1.4737998247146606, - "learning_rate": 7.28110552763819e-05, - "loss": 5.6547, - "step": 27562 - }, - { - "epoch": 14.37444589308996, - "grad_norm": 1.4427639245986938, - "learning_rate": 7.281005025125628e-05, - "loss": 4.9386, - "step": 27563 - }, - { - "epoch": 14.37496740547588, - "grad_norm": 1.3967047929763794, - "learning_rate": 7.280904522613066e-05, - "loss": 5.5618, - "step": 27564 - }, - { - "epoch": 14.3754889178618, - "grad_norm": 1.5285439491271973, - "learning_rate": 7.280804020100503e-05, - "loss": 5.3108, - "step": 27565 - }, - { - "epoch": 14.37601043024772, - "grad_norm": 1.4670512676239014, - "learning_rate": 7.28070351758794e-05, - "loss": 4.8815, - "step": 27566 - }, - { - "epoch": 14.376531942633637, - "grad_norm": 1.4069863557815552, - "learning_rate": 7.280603015075377e-05, - "loss": 5.4389, - "step": 27567 - }, - { - "epoch": 14.377053455019556, - "grad_norm": 1.3812483549118042, - "learning_rate": 7.280502512562815e-05, - "loss": 5.239, - "step": 27568 - }, - { - "epoch": 14.377574967405476, - "grad_norm": 1.37523353099823, - "learning_rate": 7.280402010050251e-05, - "loss": 5.703, - "step": 27569 - }, - { - "epoch": 14.378096479791395, - "grad_norm": 1.4019538164138794, - "learning_rate": 7.280301507537689e-05, - "loss": 5.4652, - "step": 27570 - }, - { - "epoch": 14.378617992177315, - "grad_norm": 1.489944577217102, - "learning_rate": 7.280201005025125e-05, - "loss": 5.0633, - "step": 27571 - }, - { - "epoch": 14.379139504563234, - "grad_norm": 1.3382173776626587, - "learning_rate": 7.280100502512563e-05, - "loss": 5.4976, - "step": 27572 - }, - { - "epoch": 14.379661016949152, - "grad_norm": 1.4841772317886353, - "learning_rate": 7.280000000000001e-05, - "loss": 5.2958, - "step": 27573 - }, - { - "epoch": 14.380182529335071, - "grad_norm": 1.5747919082641602, - "learning_rate": 7.279899497487439e-05, - "loss": 5.0564, - "step": 27574 - }, - { - "epoch": 14.38070404172099, - "grad_norm": 1.512978196144104, - "learning_rate": 7.279798994974875e-05, - "loss": 5.179, - "step": 27575 - }, - { - "epoch": 14.38122555410691, - "grad_norm": 1.4918808937072754, - "learning_rate": 7.279698492462313e-05, - "loss": 5.1061, - "step": 27576 - }, - { - "epoch": 14.38174706649283, - "grad_norm": 1.4735060930252075, - "learning_rate": 7.279597989949749e-05, - "loss": 5.526, - "step": 27577 - }, - { - "epoch": 14.38226857887875, - "grad_norm": 1.419295310974121, - "learning_rate": 7.279497487437186e-05, - "loss": 5.3387, - "step": 27578 - }, - { - "epoch": 14.382790091264667, - "grad_norm": 1.5368515253067017, - "learning_rate": 7.279396984924623e-05, - "loss": 5.5125, - "step": 27579 - }, - { - "epoch": 14.383311603650586, - "grad_norm": 1.4590179920196533, - "learning_rate": 7.27929648241206e-05, - "loss": 5.7358, - "step": 27580 - }, - { - "epoch": 14.383833116036506, - "grad_norm": 1.4521738290786743, - "learning_rate": 7.279195979899498e-05, - "loss": 4.9136, - "step": 27581 - }, - { - "epoch": 14.384354628422425, - "grad_norm": 1.4377857446670532, - "learning_rate": 7.279095477386934e-05, - "loss": 5.7515, - "step": 27582 - }, - { - "epoch": 14.384876140808345, - "grad_norm": 1.4311844110488892, - "learning_rate": 7.278994974874372e-05, - "loss": 5.0427, - "step": 27583 - }, - { - "epoch": 14.385397653194264, - "grad_norm": 1.542914628982544, - "learning_rate": 7.27889447236181e-05, - "loss": 5.2271, - "step": 27584 - }, - { - "epoch": 14.385919165580182, - "grad_norm": 1.4536592960357666, - "learning_rate": 7.278793969849247e-05, - "loss": 5.829, - "step": 27585 - }, - { - "epoch": 14.386440677966101, - "grad_norm": 1.3909837007522583, - "learning_rate": 7.278693467336684e-05, - "loss": 5.3742, - "step": 27586 - }, - { - "epoch": 14.38696219035202, - "grad_norm": 1.4866034984588623, - "learning_rate": 7.278592964824122e-05, - "loss": 5.3987, - "step": 27587 - }, - { - "epoch": 14.38748370273794, - "grad_norm": 1.9148943424224854, - "learning_rate": 7.278492462311558e-05, - "loss": 4.9646, - "step": 27588 - }, - { - "epoch": 14.38800521512386, - "grad_norm": 1.397819995880127, - "learning_rate": 7.278391959798996e-05, - "loss": 5.779, - "step": 27589 - }, - { - "epoch": 14.388526727509777, - "grad_norm": 1.5458022356033325, - "learning_rate": 7.278291457286432e-05, - "loss": 4.9984, - "step": 27590 - }, - { - "epoch": 14.389048239895697, - "grad_norm": 1.4759503602981567, - "learning_rate": 7.278190954773869e-05, - "loss": 5.6125, - "step": 27591 - }, - { - "epoch": 14.389569752281616, - "grad_norm": 1.4542094469070435, - "learning_rate": 7.278090452261306e-05, - "loss": 5.3708, - "step": 27592 - }, - { - "epoch": 14.390091264667536, - "grad_norm": 1.465996265411377, - "learning_rate": 7.277989949748744e-05, - "loss": 5.226, - "step": 27593 - }, - { - "epoch": 14.390612777053455, - "grad_norm": 1.3688136339187622, - "learning_rate": 7.277889447236182e-05, - "loss": 5.6193, - "step": 27594 - }, - { - "epoch": 14.391134289439375, - "grad_norm": 1.5073856115341187, - "learning_rate": 7.277788944723618e-05, - "loss": 4.7274, - "step": 27595 - }, - { - "epoch": 14.391655801825294, - "grad_norm": 1.504684567451477, - "learning_rate": 7.277688442211056e-05, - "loss": 5.2116, - "step": 27596 - }, - { - "epoch": 14.392177314211212, - "grad_norm": 1.4186524152755737, - "learning_rate": 7.277587939698493e-05, - "loss": 5.3435, - "step": 27597 - }, - { - "epoch": 14.392698826597131, - "grad_norm": 1.4407153129577637, - "learning_rate": 7.27748743718593e-05, - "loss": 5.4847, - "step": 27598 - }, - { - "epoch": 14.39322033898305, - "grad_norm": 1.4323970079421997, - "learning_rate": 7.277386934673367e-05, - "loss": 5.5475, - "step": 27599 - }, - { - "epoch": 14.39374185136897, - "grad_norm": 1.4050661325454712, - "learning_rate": 7.277286432160805e-05, - "loss": 5.5076, - "step": 27600 - }, - { - "epoch": 14.39426336375489, - "grad_norm": 1.4234699010849, - "learning_rate": 7.277185929648241e-05, - "loss": 5.1492, - "step": 27601 - }, - { - "epoch": 14.394784876140807, - "grad_norm": 1.6301525831222534, - "learning_rate": 7.277085427135679e-05, - "loss": 4.7719, - "step": 27602 - }, - { - "epoch": 14.395306388526727, - "grad_norm": 1.6184967756271362, - "learning_rate": 7.276984924623115e-05, - "loss": 5.5347, - "step": 27603 - }, - { - "epoch": 14.395827900912646, - "grad_norm": 1.4663903713226318, - "learning_rate": 7.276884422110553e-05, - "loss": 5.0145, - "step": 27604 - }, - { - "epoch": 14.396349413298566, - "grad_norm": 1.7975527048110962, - "learning_rate": 7.276783919597991e-05, - "loss": 5.0368, - "step": 27605 - }, - { - "epoch": 14.396870925684485, - "grad_norm": 1.4398642778396606, - "learning_rate": 7.276683417085427e-05, - "loss": 5.4723, - "step": 27606 - }, - { - "epoch": 14.397392438070405, - "grad_norm": 1.5565979480743408, - "learning_rate": 7.276582914572865e-05, - "loss": 5.5326, - "step": 27607 - }, - { - "epoch": 14.397913950456322, - "grad_norm": 1.5106472969055176, - "learning_rate": 7.276482412060301e-05, - "loss": 5.2922, - "step": 27608 - }, - { - "epoch": 14.398435462842242, - "grad_norm": 1.5161550045013428, - "learning_rate": 7.276381909547739e-05, - "loss": 5.1155, - "step": 27609 - }, - { - "epoch": 14.398956975228161, - "grad_norm": 1.3271260261535645, - "learning_rate": 7.276281407035176e-05, - "loss": 5.65, - "step": 27610 - }, - { - "epoch": 14.399478487614081, - "grad_norm": 1.4156737327575684, - "learning_rate": 7.276180904522613e-05, - "loss": 5.1066, - "step": 27611 - }, - { - "epoch": 14.4, - "grad_norm": 1.4634207487106323, - "learning_rate": 7.27608040201005e-05, - "loss": 5.4179, - "step": 27612 - }, - { - "epoch": 14.40052151238592, - "grad_norm": 1.4780020713806152, - "learning_rate": 7.275979899497488e-05, - "loss": 5.6949, - "step": 27613 - }, - { - "epoch": 14.401043024771838, - "grad_norm": 1.5179702043533325, - "learning_rate": 7.275879396984925e-05, - "loss": 4.8475, - "step": 27614 - }, - { - "epoch": 14.401564537157757, - "grad_norm": 1.4587126970291138, - "learning_rate": 7.275778894472363e-05, - "loss": 5.6396, - "step": 27615 - }, - { - "epoch": 14.402086049543676, - "grad_norm": 1.5007832050323486, - "learning_rate": 7.2756783919598e-05, - "loss": 5.4136, - "step": 27616 - }, - { - "epoch": 14.402607561929596, - "grad_norm": 1.5162914991378784, - "learning_rate": 7.275577889447237e-05, - "loss": 5.3061, - "step": 27617 - }, - { - "epoch": 14.403129074315515, - "grad_norm": 1.358654499053955, - "learning_rate": 7.275477386934674e-05, - "loss": 5.8459, - "step": 27618 - }, - { - "epoch": 14.403650586701435, - "grad_norm": 1.4220609664916992, - "learning_rate": 7.27537688442211e-05, - "loss": 5.0533, - "step": 27619 - }, - { - "epoch": 14.404172099087353, - "grad_norm": 1.4709721803665161, - "learning_rate": 7.275276381909548e-05, - "loss": 5.0402, - "step": 27620 - }, - { - "epoch": 14.404693611473272, - "grad_norm": 1.576439619064331, - "learning_rate": 7.275175879396985e-05, - "loss": 5.1991, - "step": 27621 - }, - { - "epoch": 14.405215123859191, - "grad_norm": 1.3414099216461182, - "learning_rate": 7.275075376884422e-05, - "loss": 5.8174, - "step": 27622 - }, - { - "epoch": 14.405736636245111, - "grad_norm": 1.4068207740783691, - "learning_rate": 7.274974874371859e-05, - "loss": 5.5249, - "step": 27623 - }, - { - "epoch": 14.40625814863103, - "grad_norm": 1.419748067855835, - "learning_rate": 7.274874371859297e-05, - "loss": 5.5023, - "step": 27624 - }, - { - "epoch": 14.40677966101695, - "grad_norm": 1.4400179386138916, - "learning_rate": 7.274773869346734e-05, - "loss": 5.0627, - "step": 27625 - }, - { - "epoch": 14.407301173402868, - "grad_norm": 1.5194300413131714, - "learning_rate": 7.274673366834172e-05, - "loss": 5.4659, - "step": 27626 - }, - { - "epoch": 14.407822685788787, - "grad_norm": 1.5091147422790527, - "learning_rate": 7.274572864321609e-05, - "loss": 5.3377, - "step": 27627 - }, - { - "epoch": 14.408344198174706, - "grad_norm": 1.3843814134597778, - "learning_rate": 7.274472361809046e-05, - "loss": 5.5531, - "step": 27628 - }, - { - "epoch": 14.408865710560626, - "grad_norm": 1.4678709506988525, - "learning_rate": 7.274371859296483e-05, - "loss": 5.5789, - "step": 27629 - }, - { - "epoch": 14.409387222946545, - "grad_norm": 1.4849708080291748, - "learning_rate": 7.27427135678392e-05, - "loss": 5.5739, - "step": 27630 - }, - { - "epoch": 14.409908735332465, - "grad_norm": 1.5949519872665405, - "learning_rate": 7.274170854271357e-05, - "loss": 5.0549, - "step": 27631 - }, - { - "epoch": 14.410430247718383, - "grad_norm": 1.4980106353759766, - "learning_rate": 7.274070351758793e-05, - "loss": 5.0586, - "step": 27632 - }, - { - "epoch": 14.410951760104302, - "grad_norm": 1.3864704370498657, - "learning_rate": 7.273969849246231e-05, - "loss": 5.7898, - "step": 27633 - }, - { - "epoch": 14.411473272490221, - "grad_norm": 1.4171779155731201, - "learning_rate": 7.273869346733669e-05, - "loss": 4.6579, - "step": 27634 - }, - { - "epoch": 14.411994784876141, - "grad_norm": 1.5034793615341187, - "learning_rate": 7.273768844221107e-05, - "loss": 5.6096, - "step": 27635 - }, - { - "epoch": 14.41251629726206, - "grad_norm": 1.5430938005447388, - "learning_rate": 7.273668341708543e-05, - "loss": 4.7775, - "step": 27636 - }, - { - "epoch": 14.41303780964798, - "grad_norm": 1.440457820892334, - "learning_rate": 7.273567839195981e-05, - "loss": 4.9259, - "step": 27637 - }, - { - "epoch": 14.413559322033898, - "grad_norm": 1.4758105278015137, - "learning_rate": 7.273467336683417e-05, - "loss": 5.3109, - "step": 27638 - }, - { - "epoch": 14.414080834419817, - "grad_norm": 1.5456050634384155, - "learning_rate": 7.273366834170855e-05, - "loss": 5.1605, - "step": 27639 - }, - { - "epoch": 14.414602346805736, - "grad_norm": 1.5639119148254395, - "learning_rate": 7.273266331658292e-05, - "loss": 5.4143, - "step": 27640 - }, - { - "epoch": 14.415123859191656, - "grad_norm": 1.4406700134277344, - "learning_rate": 7.27316582914573e-05, - "loss": 4.9901, - "step": 27641 - }, - { - "epoch": 14.415645371577575, - "grad_norm": 1.467941164970398, - "learning_rate": 7.273065326633166e-05, - "loss": 5.6716, - "step": 27642 - }, - { - "epoch": 14.416166883963495, - "grad_norm": 1.4577616453170776, - "learning_rate": 7.272964824120604e-05, - "loss": 5.7237, - "step": 27643 - }, - { - "epoch": 14.416688396349413, - "grad_norm": 1.3786224126815796, - "learning_rate": 7.27286432160804e-05, - "loss": 5.494, - "step": 27644 - }, - { - "epoch": 14.417209908735332, - "grad_norm": 1.5208021402359009, - "learning_rate": 7.272763819095478e-05, - "loss": 5.1983, - "step": 27645 - }, - { - "epoch": 14.417731421121252, - "grad_norm": 1.5820552110671997, - "learning_rate": 7.272663316582916e-05, - "loss": 5.0089, - "step": 27646 - }, - { - "epoch": 14.418252933507171, - "grad_norm": 1.4551130533218384, - "learning_rate": 7.272562814070352e-05, - "loss": 5.3849, - "step": 27647 - }, - { - "epoch": 14.41877444589309, - "grad_norm": 1.447651743888855, - "learning_rate": 7.27246231155779e-05, - "loss": 5.4428, - "step": 27648 - }, - { - "epoch": 14.41929595827901, - "grad_norm": 1.5171215534210205, - "learning_rate": 7.272361809045226e-05, - "loss": 5.1578, - "step": 27649 - }, - { - "epoch": 14.419817470664928, - "grad_norm": 1.4382719993591309, - "learning_rate": 7.272261306532664e-05, - "loss": 5.8548, - "step": 27650 - }, - { - "epoch": 14.420338983050847, - "grad_norm": 1.5202951431274414, - "learning_rate": 7.2721608040201e-05, - "loss": 5.4418, - "step": 27651 - }, - { - "epoch": 14.420860495436767, - "grad_norm": 1.4068336486816406, - "learning_rate": 7.272060301507538e-05, - "loss": 5.4614, - "step": 27652 - }, - { - "epoch": 14.421382007822686, - "grad_norm": 1.525843620300293, - "learning_rate": 7.271959798994975e-05, - "loss": 5.2123, - "step": 27653 - }, - { - "epoch": 14.421903520208605, - "grad_norm": 1.5651401281356812, - "learning_rate": 7.271859296482412e-05, - "loss": 5.1019, - "step": 27654 - }, - { - "epoch": 14.422425032594525, - "grad_norm": 1.5088211297988892, - "learning_rate": 7.271758793969849e-05, - "loss": 5.1983, - "step": 27655 - }, - { - "epoch": 14.422946544980443, - "grad_norm": 1.521807312965393, - "learning_rate": 7.271658291457287e-05, - "loss": 5.8394, - "step": 27656 - }, - { - "epoch": 14.423468057366362, - "grad_norm": 1.5285305976867676, - "learning_rate": 7.271557788944724e-05, - "loss": 5.1647, - "step": 27657 - }, - { - "epoch": 14.423989569752282, - "grad_norm": 1.4264870882034302, - "learning_rate": 7.271457286432161e-05, - "loss": 5.6697, - "step": 27658 - }, - { - "epoch": 14.424511082138201, - "grad_norm": 1.3591221570968628, - "learning_rate": 7.271356783919599e-05, - "loss": 5.4783, - "step": 27659 - }, - { - "epoch": 14.42503259452412, - "grad_norm": 1.569525957107544, - "learning_rate": 7.271256281407035e-05, - "loss": 4.8244, - "step": 27660 - }, - { - "epoch": 14.42555410691004, - "grad_norm": 1.4206935167312622, - "learning_rate": 7.271155778894473e-05, - "loss": 5.3439, - "step": 27661 - }, - { - "epoch": 14.426075619295958, - "grad_norm": 1.724382758140564, - "learning_rate": 7.271055276381909e-05, - "loss": 5.296, - "step": 27662 - }, - { - "epoch": 14.426597131681877, - "grad_norm": 1.4348890781402588, - "learning_rate": 7.270954773869347e-05, - "loss": 5.7335, - "step": 27663 - }, - { - "epoch": 14.427118644067797, - "grad_norm": 1.4241927862167358, - "learning_rate": 7.270854271356783e-05, - "loss": 5.7369, - "step": 27664 - }, - { - "epoch": 14.427640156453716, - "grad_norm": 1.4064114093780518, - "learning_rate": 7.270753768844221e-05, - "loss": 5.8702, - "step": 27665 - }, - { - "epoch": 14.428161668839635, - "grad_norm": 1.5673658847808838, - "learning_rate": 7.270653266331659e-05, - "loss": 5.1883, - "step": 27666 - }, - { - "epoch": 14.428683181225555, - "grad_norm": 1.424349308013916, - "learning_rate": 7.270552763819097e-05, - "loss": 5.5786, - "step": 27667 - }, - { - "epoch": 14.429204693611473, - "grad_norm": 1.5355361700057983, - "learning_rate": 7.270452261306533e-05, - "loss": 5.2969, - "step": 27668 - }, - { - "epoch": 14.429726205997392, - "grad_norm": 1.5192137956619263, - "learning_rate": 7.270351758793971e-05, - "loss": 5.368, - "step": 27669 - }, - { - "epoch": 14.430247718383312, - "grad_norm": 1.5318351984024048, - "learning_rate": 7.270251256281407e-05, - "loss": 5.0327, - "step": 27670 - }, - { - "epoch": 14.430769230769231, - "grad_norm": 1.4656283855438232, - "learning_rate": 7.270150753768844e-05, - "loss": 5.5624, - "step": 27671 - }, - { - "epoch": 14.43129074315515, - "grad_norm": 1.5081496238708496, - "learning_rate": 7.270050251256282e-05, - "loss": 4.8374, - "step": 27672 - }, - { - "epoch": 14.43181225554107, - "grad_norm": 1.4863418340682983, - "learning_rate": 7.269949748743718e-05, - "loss": 5.5563, - "step": 27673 - }, - { - "epoch": 14.432333767926988, - "grad_norm": 1.4961192607879639, - "learning_rate": 7.269849246231156e-05, - "loss": 5.2957, - "step": 27674 - }, - { - "epoch": 14.432855280312907, - "grad_norm": 1.5365610122680664, - "learning_rate": 7.269748743718592e-05, - "loss": 5.1054, - "step": 27675 - }, - { - "epoch": 14.433376792698827, - "grad_norm": 1.471153736114502, - "learning_rate": 7.26964824120603e-05, - "loss": 5.0909, - "step": 27676 - }, - { - "epoch": 14.433898305084746, - "grad_norm": 1.3564966917037964, - "learning_rate": 7.269547738693468e-05, - "loss": 5.7511, - "step": 27677 - }, - { - "epoch": 14.434419817470665, - "grad_norm": 1.439067006111145, - "learning_rate": 7.269447236180906e-05, - "loss": 5.6651, - "step": 27678 - }, - { - "epoch": 14.434941329856585, - "grad_norm": 1.504670262336731, - "learning_rate": 7.269346733668342e-05, - "loss": 5.3846, - "step": 27679 - }, - { - "epoch": 14.435462842242503, - "grad_norm": 1.5574694871902466, - "learning_rate": 7.26924623115578e-05, - "loss": 5.4486, - "step": 27680 - }, - { - "epoch": 14.435984354628422, - "grad_norm": 1.5556209087371826, - "learning_rate": 7.269145728643216e-05, - "loss": 5.0894, - "step": 27681 - }, - { - "epoch": 14.436505867014342, - "grad_norm": 1.4284785985946655, - "learning_rate": 7.269045226130654e-05, - "loss": 5.7373, - "step": 27682 - }, - { - "epoch": 14.437027379400261, - "grad_norm": 1.4737839698791504, - "learning_rate": 7.26894472361809e-05, - "loss": 5.189, - "step": 27683 - }, - { - "epoch": 14.43754889178618, - "grad_norm": 1.528730869293213, - "learning_rate": 7.268844221105528e-05, - "loss": 5.0716, - "step": 27684 - }, - { - "epoch": 14.438070404172098, - "grad_norm": 1.3834675550460815, - "learning_rate": 7.268743718592965e-05, - "loss": 5.5831, - "step": 27685 - }, - { - "epoch": 14.438591916558018, - "grad_norm": 1.4192559719085693, - "learning_rate": 7.268643216080402e-05, - "loss": 5.358, - "step": 27686 - }, - { - "epoch": 14.439113428943937, - "grad_norm": 1.3601224422454834, - "learning_rate": 7.26854271356784e-05, - "loss": 5.6423, - "step": 27687 - }, - { - "epoch": 14.439634941329857, - "grad_norm": 1.3295645713806152, - "learning_rate": 7.268442211055277e-05, - "loss": 5.5172, - "step": 27688 - }, - { - "epoch": 14.440156453715776, - "grad_norm": 1.4197274446487427, - "learning_rate": 7.268341708542714e-05, - "loss": 4.945, - "step": 27689 - }, - { - "epoch": 14.440677966101696, - "grad_norm": 1.4548983573913574, - "learning_rate": 7.268241206030151e-05, - "loss": 5.8542, - "step": 27690 - }, - { - "epoch": 14.441199478487615, - "grad_norm": 1.4680837392807007, - "learning_rate": 7.268140703517589e-05, - "loss": 4.7951, - "step": 27691 - }, - { - "epoch": 14.441720990873533, - "grad_norm": 1.4789537191390991, - "learning_rate": 7.268040201005025e-05, - "loss": 5.1471, - "step": 27692 - }, - { - "epoch": 14.442242503259452, - "grad_norm": 1.3876022100448608, - "learning_rate": 7.267939698492463e-05, - "loss": 5.5583, - "step": 27693 - }, - { - "epoch": 14.442764015645372, - "grad_norm": 1.445919394493103, - "learning_rate": 7.267839195979899e-05, - "loss": 5.5631, - "step": 27694 - }, - { - "epoch": 14.443285528031291, - "grad_norm": 1.418931245803833, - "learning_rate": 7.267738693467337e-05, - "loss": 5.682, - "step": 27695 - }, - { - "epoch": 14.44380704041721, - "grad_norm": 1.4336682558059692, - "learning_rate": 7.267638190954774e-05, - "loss": 5.7093, - "step": 27696 - }, - { - "epoch": 14.444328552803128, - "grad_norm": 1.5327438116073608, - "learning_rate": 7.267537688442211e-05, - "loss": 5.1836, - "step": 27697 - }, - { - "epoch": 14.444850065189048, - "grad_norm": 1.5527621507644653, - "learning_rate": 7.267437185929649e-05, - "loss": 5.4599, - "step": 27698 - }, - { - "epoch": 14.445371577574967, - "grad_norm": 1.4755051136016846, - "learning_rate": 7.267336683417086e-05, - "loss": 5.2244, - "step": 27699 - }, - { - "epoch": 14.445893089960887, - "grad_norm": 1.5321329832077026, - "learning_rate": 7.267236180904523e-05, - "loss": 5.4423, - "step": 27700 - }, - { - "epoch": 14.446414602346806, - "grad_norm": 1.436448574066162, - "learning_rate": 7.26713567839196e-05, - "loss": 5.5744, - "step": 27701 - }, - { - "epoch": 14.446936114732726, - "grad_norm": 1.5852630138397217, - "learning_rate": 7.267035175879398e-05, - "loss": 5.4965, - "step": 27702 - }, - { - "epoch": 14.447457627118643, - "grad_norm": 1.543637990951538, - "learning_rate": 7.266934673366834e-05, - "loss": 5.3288, - "step": 27703 - }, - { - "epoch": 14.447979139504563, - "grad_norm": 1.5154595375061035, - "learning_rate": 7.266834170854272e-05, - "loss": 5.1237, - "step": 27704 - }, - { - "epoch": 14.448500651890482, - "grad_norm": 1.550469994544983, - "learning_rate": 7.266733668341708e-05, - "loss": 5.1771, - "step": 27705 - }, - { - "epoch": 14.449022164276402, - "grad_norm": 1.368315577507019, - "learning_rate": 7.266633165829146e-05, - "loss": 5.4059, - "step": 27706 - }, - { - "epoch": 14.449543676662321, - "grad_norm": 1.5344254970550537, - "learning_rate": 7.266532663316584e-05, - "loss": 5.5442, - "step": 27707 - }, - { - "epoch": 14.45006518904824, - "grad_norm": 1.4184614419937134, - "learning_rate": 7.266432160804021e-05, - "loss": 5.4949, - "step": 27708 - }, - { - "epoch": 14.450586701434158, - "grad_norm": 1.3642655611038208, - "learning_rate": 7.266331658291458e-05, - "loss": 5.6513, - "step": 27709 - }, - { - "epoch": 14.451108213820078, - "grad_norm": 1.4591944217681885, - "learning_rate": 7.266231155778896e-05, - "loss": 5.231, - "step": 27710 - }, - { - "epoch": 14.451629726205997, - "grad_norm": 1.5610713958740234, - "learning_rate": 7.266130653266332e-05, - "loss": 5.566, - "step": 27711 - }, - { - "epoch": 14.452151238591917, - "grad_norm": 1.4501713514328003, - "learning_rate": 7.266030150753769e-05, - "loss": 5.6188, - "step": 27712 - }, - { - "epoch": 14.452672750977836, - "grad_norm": 1.4586360454559326, - "learning_rate": 7.265929648241206e-05, - "loss": 5.523, - "step": 27713 - }, - { - "epoch": 14.453194263363756, - "grad_norm": 1.555533766746521, - "learning_rate": 7.265829145728643e-05, - "loss": 5.1676, - "step": 27714 - }, - { - "epoch": 14.453715775749673, - "grad_norm": 1.4611865282058716, - "learning_rate": 7.26572864321608e-05, - "loss": 5.4788, - "step": 27715 - }, - { - "epoch": 14.454237288135593, - "grad_norm": 1.4902434349060059, - "learning_rate": 7.265628140703517e-05, - "loss": 5.3554, - "step": 27716 - }, - { - "epoch": 14.454758800521512, - "grad_norm": 1.3879120349884033, - "learning_rate": 7.265527638190955e-05, - "loss": 5.7732, - "step": 27717 - }, - { - "epoch": 14.455280312907432, - "grad_norm": 1.660056710243225, - "learning_rate": 7.265427135678393e-05, - "loss": 4.7633, - "step": 27718 - }, - { - "epoch": 14.455801825293351, - "grad_norm": 1.444818377494812, - "learning_rate": 7.26532663316583e-05, - "loss": 5.4992, - "step": 27719 - }, - { - "epoch": 14.45632333767927, - "grad_norm": 1.4109668731689453, - "learning_rate": 7.265226130653267e-05, - "loss": 5.7103, - "step": 27720 - }, - { - "epoch": 14.456844850065188, - "grad_norm": 1.4111560583114624, - "learning_rate": 7.265125628140705e-05, - "loss": 5.2959, - "step": 27721 - }, - { - "epoch": 14.457366362451108, - "grad_norm": 1.4248099327087402, - "learning_rate": 7.265025125628141e-05, - "loss": 5.2787, - "step": 27722 - }, - { - "epoch": 14.457887874837027, - "grad_norm": 1.4393807649612427, - "learning_rate": 7.264924623115579e-05, - "loss": 5.6183, - "step": 27723 - }, - { - "epoch": 14.458409387222947, - "grad_norm": 1.5288970470428467, - "learning_rate": 7.264824120603015e-05, - "loss": 5.0213, - "step": 27724 - }, - { - "epoch": 14.458930899608866, - "grad_norm": 1.428405523300171, - "learning_rate": 7.264723618090452e-05, - "loss": 4.9787, - "step": 27725 - }, - { - "epoch": 14.459452411994786, - "grad_norm": 1.3811813592910767, - "learning_rate": 7.26462311557789e-05, - "loss": 5.3301, - "step": 27726 - }, - { - "epoch": 14.459973924380703, - "grad_norm": 1.4768126010894775, - "learning_rate": 7.264522613065327e-05, - "loss": 5.2472, - "step": 27727 - }, - { - "epoch": 14.460495436766623, - "grad_norm": 1.4634824991226196, - "learning_rate": 7.264422110552765e-05, - "loss": 5.5592, - "step": 27728 - }, - { - "epoch": 14.461016949152542, - "grad_norm": 1.4900482892990112, - "learning_rate": 7.264321608040201e-05, - "loss": 5.3206, - "step": 27729 - }, - { - "epoch": 14.461538461538462, - "grad_norm": 1.3889341354370117, - "learning_rate": 7.264221105527639e-05, - "loss": 5.4339, - "step": 27730 - }, - { - "epoch": 14.462059973924381, - "grad_norm": 1.4030961990356445, - "learning_rate": 7.264120603015076e-05, - "loss": 5.5131, - "step": 27731 - }, - { - "epoch": 14.4625814863103, - "grad_norm": 1.463734745979309, - "learning_rate": 7.264020100502513e-05, - "loss": 5.3997, - "step": 27732 - }, - { - "epoch": 14.463102998696218, - "grad_norm": 1.4711803197860718, - "learning_rate": 7.26391959798995e-05, - "loss": 5.4106, - "step": 27733 - }, - { - "epoch": 14.463624511082138, - "grad_norm": 1.6288126707077026, - "learning_rate": 7.263819095477388e-05, - "loss": 5.0328, - "step": 27734 - }, - { - "epoch": 14.464146023468057, - "grad_norm": 1.3677035570144653, - "learning_rate": 7.263718592964824e-05, - "loss": 5.6758, - "step": 27735 - }, - { - "epoch": 14.464667535853977, - "grad_norm": 1.4606349468231201, - "learning_rate": 7.263618090452262e-05, - "loss": 5.2466, - "step": 27736 - }, - { - "epoch": 14.465189048239896, - "grad_norm": 1.45546555519104, - "learning_rate": 7.263517587939698e-05, - "loss": 5.3705, - "step": 27737 - }, - { - "epoch": 14.465710560625816, - "grad_norm": 1.3759706020355225, - "learning_rate": 7.263417085427136e-05, - "loss": 5.5812, - "step": 27738 - }, - { - "epoch": 14.466232073011733, - "grad_norm": 1.4078596830368042, - "learning_rate": 7.263316582914574e-05, - "loss": 5.3421, - "step": 27739 - }, - { - "epoch": 14.466753585397653, - "grad_norm": 1.5589983463287354, - "learning_rate": 7.26321608040201e-05, - "loss": 5.2831, - "step": 27740 - }, - { - "epoch": 14.467275097783572, - "grad_norm": 1.4708397388458252, - "learning_rate": 7.263115577889448e-05, - "loss": 5.5382, - "step": 27741 - }, - { - "epoch": 14.467796610169492, - "grad_norm": 1.4526363611221313, - "learning_rate": 7.263015075376884e-05, - "loss": 5.6068, - "step": 27742 - }, - { - "epoch": 14.468318122555411, - "grad_norm": 1.4551074504852295, - "learning_rate": 7.262914572864322e-05, - "loss": 5.2787, - "step": 27743 - }, - { - "epoch": 14.46883963494133, - "grad_norm": 1.4054229259490967, - "learning_rate": 7.262814070351759e-05, - "loss": 5.4746, - "step": 27744 - }, - { - "epoch": 14.469361147327248, - "grad_norm": 1.4147849082946777, - "learning_rate": 7.262713567839196e-05, - "loss": 5.4136, - "step": 27745 - }, - { - "epoch": 14.469882659713168, - "grad_norm": 1.4099295139312744, - "learning_rate": 7.262613065326633e-05, - "loss": 5.307, - "step": 27746 - }, - { - "epoch": 14.470404172099087, - "grad_norm": 1.4026758670806885, - "learning_rate": 7.26251256281407e-05, - "loss": 5.8539, - "step": 27747 - }, - { - "epoch": 14.470925684485007, - "grad_norm": 1.529367446899414, - "learning_rate": 7.262412060301508e-05, - "loss": 5.426, - "step": 27748 - }, - { - "epoch": 14.471447196870926, - "grad_norm": 1.5660845041275024, - "learning_rate": 7.262311557788946e-05, - "loss": 5.493, - "step": 27749 - }, - { - "epoch": 14.471968709256846, - "grad_norm": 1.4588487148284912, - "learning_rate": 7.262211055276383e-05, - "loss": 5.2929, - "step": 27750 - }, - { - "epoch": 14.472490221642763, - "grad_norm": 1.5473473072052002, - "learning_rate": 7.262110552763819e-05, - "loss": 5.8364, - "step": 27751 - }, - { - "epoch": 14.473011734028683, - "grad_norm": 1.4805186986923218, - "learning_rate": 7.262010050251257e-05, - "loss": 4.9987, - "step": 27752 - }, - { - "epoch": 14.473533246414602, - "grad_norm": 1.5037000179290771, - "learning_rate": 7.261909547738693e-05, - "loss": 5.4912, - "step": 27753 - }, - { - "epoch": 14.474054758800522, - "grad_norm": 1.5688174962997437, - "learning_rate": 7.261809045226131e-05, - "loss": 5.0506, - "step": 27754 - }, - { - "epoch": 14.474576271186441, - "grad_norm": 1.4966380596160889, - "learning_rate": 7.261708542713567e-05, - "loss": 5.0404, - "step": 27755 - }, - { - "epoch": 14.47509778357236, - "grad_norm": 1.4708398580551147, - "learning_rate": 7.261608040201005e-05, - "loss": 5.583, - "step": 27756 - }, - { - "epoch": 14.475619295958278, - "grad_norm": 1.4749945402145386, - "learning_rate": 7.261507537688442e-05, - "loss": 5.2941, - "step": 27757 - }, - { - "epoch": 14.476140808344198, - "grad_norm": 1.4622371196746826, - "learning_rate": 7.26140703517588e-05, - "loss": 5.468, - "step": 27758 - }, - { - "epoch": 14.476662320730117, - "grad_norm": 1.4731217622756958, - "learning_rate": 7.261306532663317e-05, - "loss": 5.6188, - "step": 27759 - }, - { - "epoch": 14.477183833116037, - "grad_norm": 1.5235589742660522, - "learning_rate": 7.261206030150755e-05, - "loss": 4.9841, - "step": 27760 - }, - { - "epoch": 14.477705345501956, - "grad_norm": 1.4284309148788452, - "learning_rate": 7.261105527638191e-05, - "loss": 5.6334, - "step": 27761 - }, - { - "epoch": 14.478226857887876, - "grad_norm": 1.5896315574645996, - "learning_rate": 7.261005025125629e-05, - "loss": 4.7828, - "step": 27762 - }, - { - "epoch": 14.478748370273793, - "grad_norm": 1.6022361516952515, - "learning_rate": 7.260904522613066e-05, - "loss": 5.1194, - "step": 27763 - }, - { - "epoch": 14.479269882659713, - "grad_norm": 1.4188021421432495, - "learning_rate": 7.260804020100502e-05, - "loss": 5.3678, - "step": 27764 - }, - { - "epoch": 14.479791395045632, - "grad_norm": 1.530936598777771, - "learning_rate": 7.26070351758794e-05, - "loss": 4.9948, - "step": 27765 - }, - { - "epoch": 14.480312907431552, - "grad_norm": 1.3751217126846313, - "learning_rate": 7.260603015075376e-05, - "loss": 5.559, - "step": 27766 - }, - { - "epoch": 14.480834419817471, - "grad_norm": 1.4443784952163696, - "learning_rate": 7.260502512562814e-05, - "loss": 5.2153, - "step": 27767 - }, - { - "epoch": 14.48135593220339, - "grad_norm": 1.4627926349639893, - "learning_rate": 7.260402010050252e-05, - "loss": 5.2011, - "step": 27768 - }, - { - "epoch": 14.481877444589308, - "grad_norm": 1.4500964879989624, - "learning_rate": 7.26030150753769e-05, - "loss": 5.3407, - "step": 27769 - }, - { - "epoch": 14.482398956975228, - "grad_norm": 1.522821068763733, - "learning_rate": 7.260201005025126e-05, - "loss": 5.1913, - "step": 27770 - }, - { - "epoch": 14.482920469361147, - "grad_norm": 1.4358763694763184, - "learning_rate": 7.260100502512564e-05, - "loss": 5.0352, - "step": 27771 - }, - { - "epoch": 14.483441981747067, - "grad_norm": 1.5939900875091553, - "learning_rate": 7.26e-05, - "loss": 5.0295, - "step": 27772 - }, - { - "epoch": 14.483963494132986, - "grad_norm": 1.5440109968185425, - "learning_rate": 7.259899497487438e-05, - "loss": 4.9022, - "step": 27773 - }, - { - "epoch": 14.484485006518906, - "grad_norm": 1.519138216972351, - "learning_rate": 7.259798994974874e-05, - "loss": 5.1652, - "step": 27774 - }, - { - "epoch": 14.485006518904823, - "grad_norm": 1.5002444982528687, - "learning_rate": 7.259698492462312e-05, - "loss": 5.5496, - "step": 27775 - }, - { - "epoch": 14.485528031290743, - "grad_norm": 1.3176140785217285, - "learning_rate": 7.259597989949749e-05, - "loss": 5.9834, - "step": 27776 - }, - { - "epoch": 14.486049543676662, - "grad_norm": 1.5341355800628662, - "learning_rate": 7.259497487437186e-05, - "loss": 4.6882, - "step": 27777 - }, - { - "epoch": 14.486571056062582, - "grad_norm": 1.4376860857009888, - "learning_rate": 7.259396984924623e-05, - "loss": 5.4669, - "step": 27778 - }, - { - "epoch": 14.487092568448501, - "grad_norm": 1.4716298580169678, - "learning_rate": 7.259296482412061e-05, - "loss": 5.8602, - "step": 27779 - }, - { - "epoch": 14.487614080834419, - "grad_norm": 1.4544252157211304, - "learning_rate": 7.259195979899498e-05, - "loss": 4.8648, - "step": 27780 - }, - { - "epoch": 14.488135593220338, - "grad_norm": 1.4903558492660522, - "learning_rate": 7.259095477386935e-05, - "loss": 5.4138, - "step": 27781 - }, - { - "epoch": 14.488657105606258, - "grad_norm": 1.400352120399475, - "learning_rate": 7.258994974874373e-05, - "loss": 5.4725, - "step": 27782 - }, - { - "epoch": 14.489178617992177, - "grad_norm": 1.5347641706466675, - "learning_rate": 7.258894472361809e-05, - "loss": 5.2969, - "step": 27783 - }, - { - "epoch": 14.489700130378097, - "grad_norm": 1.4404325485229492, - "learning_rate": 7.258793969849247e-05, - "loss": 5.5077, - "step": 27784 - }, - { - "epoch": 14.490221642764016, - "grad_norm": 1.4458062648773193, - "learning_rate": 7.258693467336683e-05, - "loss": 5.0213, - "step": 27785 - }, - { - "epoch": 14.490743155149936, - "grad_norm": 1.39250910282135, - "learning_rate": 7.258592964824121e-05, - "loss": 5.5233, - "step": 27786 - }, - { - "epoch": 14.491264667535853, - "grad_norm": 1.4490833282470703, - "learning_rate": 7.258492462311558e-05, - "loss": 5.2382, - "step": 27787 - }, - { - "epoch": 14.491786179921773, - "grad_norm": 1.4546334743499756, - "learning_rate": 7.258391959798995e-05, - "loss": 5.005, - "step": 27788 - }, - { - "epoch": 14.492307692307692, - "grad_norm": 1.3636709451675415, - "learning_rate": 7.258291457286433e-05, - "loss": 5.5393, - "step": 27789 - }, - { - "epoch": 14.492829204693612, - "grad_norm": 1.3715928792953491, - "learning_rate": 7.258190954773871e-05, - "loss": 5.4288, - "step": 27790 - }, - { - "epoch": 14.493350717079531, - "grad_norm": 1.5435729026794434, - "learning_rate": 7.258090452261307e-05, - "loss": 4.9821, - "step": 27791 - }, - { - "epoch": 14.493872229465449, - "grad_norm": 1.3986783027648926, - "learning_rate": 7.257989949748744e-05, - "loss": 5.4613, - "step": 27792 - }, - { - "epoch": 14.494393741851368, - "grad_norm": 1.4486676454544067, - "learning_rate": 7.257889447236182e-05, - "loss": 5.3538, - "step": 27793 - }, - { - "epoch": 14.494915254237288, - "grad_norm": 1.3294181823730469, - "learning_rate": 7.257788944723618e-05, - "loss": 4.7557, - "step": 27794 - }, - { - "epoch": 14.495436766623207, - "grad_norm": 1.4566243886947632, - "learning_rate": 7.257688442211056e-05, - "loss": 5.624, - "step": 27795 - }, - { - "epoch": 14.495958279009127, - "grad_norm": 1.4289017915725708, - "learning_rate": 7.257587939698492e-05, - "loss": 5.4874, - "step": 27796 - }, - { - "epoch": 14.496479791395046, - "grad_norm": 1.475429654121399, - "learning_rate": 7.25748743718593e-05, - "loss": 5.2971, - "step": 27797 - }, - { - "epoch": 14.497001303780964, - "grad_norm": 1.4173730611801147, - "learning_rate": 7.257386934673366e-05, - "loss": 5.5276, - "step": 27798 - }, - { - "epoch": 14.497522816166883, - "grad_norm": 1.5801352262496948, - "learning_rate": 7.257286432160804e-05, - "loss": 5.0413, - "step": 27799 - }, - { - "epoch": 14.498044328552803, - "grad_norm": 1.50397789478302, - "learning_rate": 7.257185929648242e-05, - "loss": 5.4377, - "step": 27800 - }, - { - "epoch": 14.498565840938722, - "grad_norm": 1.3630328178405762, - "learning_rate": 7.25708542713568e-05, - "loss": 5.6084, - "step": 27801 - }, - { - "epoch": 14.499087353324642, - "grad_norm": 1.5982531309127808, - "learning_rate": 7.256984924623116e-05, - "loss": 5.2986, - "step": 27802 - }, - { - "epoch": 14.499608865710561, - "grad_norm": 1.5371696949005127, - "learning_rate": 7.256884422110554e-05, - "loss": 5.079, - "step": 27803 - }, - { - "epoch": 14.500130378096479, - "grad_norm": 1.4569350481033325, - "learning_rate": 7.25678391959799e-05, - "loss": 4.8015, - "step": 27804 - }, - { - "epoch": 14.500651890482398, - "grad_norm": 1.5000020265579224, - "learning_rate": 7.256683417085427e-05, - "loss": 5.4879, - "step": 27805 - }, - { - "epoch": 14.501173402868318, - "grad_norm": 1.4567855596542358, - "learning_rate": 7.256582914572865e-05, - "loss": 5.8752, - "step": 27806 - }, - { - "epoch": 14.501694915254237, - "grad_norm": 1.4723840951919556, - "learning_rate": 7.256482412060301e-05, - "loss": 4.8792, - "step": 27807 - }, - { - "epoch": 14.502216427640157, - "grad_norm": 1.4552149772644043, - "learning_rate": 7.256381909547739e-05, - "loss": 5.5449, - "step": 27808 - }, - { - "epoch": 14.502737940026076, - "grad_norm": 1.67232346534729, - "learning_rate": 7.256281407035177e-05, - "loss": 4.5688, - "step": 27809 - }, - { - "epoch": 14.503259452411994, - "grad_norm": 1.5669032335281372, - "learning_rate": 7.256180904522614e-05, - "loss": 5.0531, - "step": 27810 - }, - { - "epoch": 14.503780964797913, - "grad_norm": 1.624675989151001, - "learning_rate": 7.256080402010051e-05, - "loss": 5.0836, - "step": 27811 - }, - { - "epoch": 14.504302477183833, - "grad_norm": 1.6131874322891235, - "learning_rate": 7.255979899497489e-05, - "loss": 4.8233, - "step": 27812 - }, - { - "epoch": 14.504823989569752, - "grad_norm": 1.8054813146591187, - "learning_rate": 7.255879396984925e-05, - "loss": 5.3462, - "step": 27813 - }, - { - "epoch": 14.505345501955672, - "grad_norm": 1.4859895706176758, - "learning_rate": 7.255778894472363e-05, - "loss": 5.3245, - "step": 27814 - }, - { - "epoch": 14.505867014341591, - "grad_norm": 1.5362536907196045, - "learning_rate": 7.255678391959799e-05, - "loss": 5.3637, - "step": 27815 - }, - { - "epoch": 14.506388526727509, - "grad_norm": 1.4109423160552979, - "learning_rate": 7.255577889447237e-05, - "loss": 4.4423, - "step": 27816 - }, - { - "epoch": 14.506910039113428, - "grad_norm": 1.3675345182418823, - "learning_rate": 7.255477386934673e-05, - "loss": 5.6, - "step": 27817 - }, - { - "epoch": 14.507431551499348, - "grad_norm": 1.4735568761825562, - "learning_rate": 7.25537688442211e-05, - "loss": 5.1116, - "step": 27818 - }, - { - "epoch": 14.507953063885267, - "grad_norm": 1.4379968643188477, - "learning_rate": 7.255276381909548e-05, - "loss": 5.5676, - "step": 27819 - }, - { - "epoch": 14.508474576271187, - "grad_norm": 1.4421945810317993, - "learning_rate": 7.255175879396985e-05, - "loss": 5.6092, - "step": 27820 - }, - { - "epoch": 14.508996088657106, - "grad_norm": 1.5194189548492432, - "learning_rate": 7.255075376884423e-05, - "loss": 4.6623, - "step": 27821 - }, - { - "epoch": 14.509517601043024, - "grad_norm": 1.51508629322052, - "learning_rate": 7.25497487437186e-05, - "loss": 5.0365, - "step": 27822 - }, - { - "epoch": 14.510039113428943, - "grad_norm": 1.3899062871932983, - "learning_rate": 7.254874371859297e-05, - "loss": 5.75, - "step": 27823 - }, - { - "epoch": 14.510560625814863, - "grad_norm": 1.4113346338272095, - "learning_rate": 7.254773869346734e-05, - "loss": 5.5303, - "step": 27824 - }, - { - "epoch": 14.511082138200782, - "grad_norm": 1.4003528356552124, - "learning_rate": 7.254673366834172e-05, - "loss": 5.6023, - "step": 27825 - }, - { - "epoch": 14.511603650586702, - "grad_norm": 1.4544216394424438, - "learning_rate": 7.254572864321608e-05, - "loss": 5.5838, - "step": 27826 - }, - { - "epoch": 14.512125162972621, - "grad_norm": 1.4981330633163452, - "learning_rate": 7.254472361809046e-05, - "loss": 5.3384, - "step": 27827 - }, - { - "epoch": 14.512646675358539, - "grad_norm": 1.5760358572006226, - "learning_rate": 7.254371859296482e-05, - "loss": 5.0716, - "step": 27828 - }, - { - "epoch": 14.513168187744458, - "grad_norm": 1.6229082345962524, - "learning_rate": 7.25427135678392e-05, - "loss": 4.9964, - "step": 27829 - }, - { - "epoch": 14.513689700130378, - "grad_norm": 1.446816325187683, - "learning_rate": 7.254170854271356e-05, - "loss": 5.4483, - "step": 27830 - }, - { - "epoch": 14.514211212516297, - "grad_norm": 1.5357820987701416, - "learning_rate": 7.254070351758794e-05, - "loss": 5.3868, - "step": 27831 - }, - { - "epoch": 14.514732724902217, - "grad_norm": 1.4244036674499512, - "learning_rate": 7.253969849246232e-05, - "loss": 5.6825, - "step": 27832 - }, - { - "epoch": 14.515254237288136, - "grad_norm": 1.4912716150283813, - "learning_rate": 7.253869346733668e-05, - "loss": 5.4263, - "step": 27833 - }, - { - "epoch": 14.515775749674054, - "grad_norm": 1.598034381866455, - "learning_rate": 7.253768844221106e-05, - "loss": 4.7447, - "step": 27834 - }, - { - "epoch": 14.516297262059974, - "grad_norm": 1.5911602973937988, - "learning_rate": 7.253668341708543e-05, - "loss": 5.3774, - "step": 27835 - }, - { - "epoch": 14.516818774445893, - "grad_norm": 1.3794102668762207, - "learning_rate": 7.25356783919598e-05, - "loss": 5.7125, - "step": 27836 - }, - { - "epoch": 14.517340286831812, - "grad_norm": 1.4820152521133423, - "learning_rate": 7.253467336683417e-05, - "loss": 5.3123, - "step": 27837 - }, - { - "epoch": 14.517861799217732, - "grad_norm": 1.3321194648742676, - "learning_rate": 7.253366834170855e-05, - "loss": 5.9709, - "step": 27838 - }, - { - "epoch": 14.518383311603651, - "grad_norm": 1.422397255897522, - "learning_rate": 7.253266331658291e-05, - "loss": 5.601, - "step": 27839 - }, - { - "epoch": 14.518904823989569, - "grad_norm": 1.490161418914795, - "learning_rate": 7.253165829145729e-05, - "loss": 4.9152, - "step": 27840 - }, - { - "epoch": 14.519426336375489, - "grad_norm": 1.3901324272155762, - "learning_rate": 7.253065326633167e-05, - "loss": 5.7987, - "step": 27841 - }, - { - "epoch": 14.519947848761408, - "grad_norm": 1.433100938796997, - "learning_rate": 7.252964824120604e-05, - "loss": 4.9897, - "step": 27842 - }, - { - "epoch": 14.520469361147327, - "grad_norm": 1.4769761562347412, - "learning_rate": 7.252864321608041e-05, - "loss": 5.6884, - "step": 27843 - }, - { - "epoch": 14.520990873533247, - "grad_norm": 1.503115177154541, - "learning_rate": 7.252763819095477e-05, - "loss": 5.5578, - "step": 27844 - }, - { - "epoch": 14.521512385919166, - "grad_norm": 1.3669610023498535, - "learning_rate": 7.252663316582915e-05, - "loss": 5.6578, - "step": 27845 - }, - { - "epoch": 14.522033898305084, - "grad_norm": 1.4712785482406616, - "learning_rate": 7.252562814070351e-05, - "loss": 5.5068, - "step": 27846 - }, - { - "epoch": 14.522555410691004, - "grad_norm": 1.4427900314331055, - "learning_rate": 7.252462311557789e-05, - "loss": 5.7548, - "step": 27847 - }, - { - "epoch": 14.523076923076923, - "grad_norm": 1.4779796600341797, - "learning_rate": 7.252361809045226e-05, - "loss": 5.3225, - "step": 27848 - }, - { - "epoch": 14.523598435462842, - "grad_norm": 1.3925886154174805, - "learning_rate": 7.252261306532663e-05, - "loss": 5.3753, - "step": 27849 - }, - { - "epoch": 14.524119947848762, - "grad_norm": 1.4911679029464722, - "learning_rate": 7.2521608040201e-05, - "loss": 4.8647, - "step": 27850 - }, - { - "epoch": 14.524641460234681, - "grad_norm": 1.3647923469543457, - "learning_rate": 7.252060301507538e-05, - "loss": 5.3891, - "step": 27851 - }, - { - "epoch": 14.525162972620599, - "grad_norm": 1.4012115001678467, - "learning_rate": 7.251959798994975e-05, - "loss": 5.3568, - "step": 27852 - }, - { - "epoch": 14.525684485006519, - "grad_norm": 1.6014699935913086, - "learning_rate": 7.251859296482413e-05, - "loss": 5.3243, - "step": 27853 - }, - { - "epoch": 14.526205997392438, - "grad_norm": 1.5075442790985107, - "learning_rate": 7.25175879396985e-05, - "loss": 5.085, - "step": 27854 - }, - { - "epoch": 14.526727509778357, - "grad_norm": 1.4349801540374756, - "learning_rate": 7.251658291457287e-05, - "loss": 5.7092, - "step": 27855 - }, - { - "epoch": 14.527249022164277, - "grad_norm": 1.4905165433883667, - "learning_rate": 7.251557788944724e-05, - "loss": 5.3671, - "step": 27856 - }, - { - "epoch": 14.527770534550196, - "grad_norm": 1.3983147144317627, - "learning_rate": 7.25145728643216e-05, - "loss": 5.5352, - "step": 27857 - }, - { - "epoch": 14.528292046936114, - "grad_norm": 1.4585809707641602, - "learning_rate": 7.251356783919598e-05, - "loss": 5.6875, - "step": 27858 - }, - { - "epoch": 14.528813559322034, - "grad_norm": 1.388298511505127, - "learning_rate": 7.251256281407035e-05, - "loss": 5.7522, - "step": 27859 - }, - { - "epoch": 14.529335071707953, - "grad_norm": 1.3781304359436035, - "learning_rate": 7.251155778894472e-05, - "loss": 5.6294, - "step": 27860 - }, - { - "epoch": 14.529856584093872, - "grad_norm": 1.3912336826324463, - "learning_rate": 7.25105527638191e-05, - "loss": 5.5743, - "step": 27861 - }, - { - "epoch": 14.530378096479792, - "grad_norm": 1.3602464199066162, - "learning_rate": 7.250954773869348e-05, - "loss": 5.5523, - "step": 27862 - }, - { - "epoch": 14.53089960886571, - "grad_norm": 1.44057297706604, - "learning_rate": 7.250854271356784e-05, - "loss": 5.3145, - "step": 27863 - }, - { - "epoch": 14.531421121251629, - "grad_norm": 1.3622037172317505, - "learning_rate": 7.250753768844222e-05, - "loss": 5.3622, - "step": 27864 - }, - { - "epoch": 14.531942633637549, - "grad_norm": 1.4691115617752075, - "learning_rate": 7.250653266331659e-05, - "loss": 5.128, - "step": 27865 - }, - { - "epoch": 14.532464146023468, - "grad_norm": 1.517576813697815, - "learning_rate": 7.250552763819096e-05, - "loss": 5.3568, - "step": 27866 - }, - { - "epoch": 14.532985658409387, - "grad_norm": 1.517526388168335, - "learning_rate": 7.250452261306533e-05, - "loss": 5.0675, - "step": 27867 - }, - { - "epoch": 14.533507170795307, - "grad_norm": 1.4617775678634644, - "learning_rate": 7.25035175879397e-05, - "loss": 5.5341, - "step": 27868 - }, - { - "epoch": 14.534028683181226, - "grad_norm": 1.3911468982696533, - "learning_rate": 7.250251256281407e-05, - "loss": 5.5231, - "step": 27869 - }, - { - "epoch": 14.534550195567144, - "grad_norm": 1.455308437347412, - "learning_rate": 7.250150753768845e-05, - "loss": 5.4666, - "step": 27870 - }, - { - "epoch": 14.535071707953064, - "grad_norm": 1.491706132888794, - "learning_rate": 7.250050251256281e-05, - "loss": 4.9408, - "step": 27871 - }, - { - "epoch": 14.535593220338983, - "grad_norm": 1.7374087572097778, - "learning_rate": 7.249949748743719e-05, - "loss": 5.1516, - "step": 27872 - }, - { - "epoch": 14.536114732724903, - "grad_norm": 1.5148465633392334, - "learning_rate": 7.249849246231157e-05, - "loss": 5.6063, - "step": 27873 - }, - { - "epoch": 14.536636245110822, - "grad_norm": 1.473503828048706, - "learning_rate": 7.249748743718593e-05, - "loss": 5.3915, - "step": 27874 - }, - { - "epoch": 14.53715775749674, - "grad_norm": 1.6457650661468506, - "learning_rate": 7.249648241206031e-05, - "loss": 4.393, - "step": 27875 - }, - { - "epoch": 14.53767926988266, - "grad_norm": 1.3603637218475342, - "learning_rate": 7.249547738693467e-05, - "loss": 5.8615, - "step": 27876 - }, - { - "epoch": 14.538200782268579, - "grad_norm": 1.487370252609253, - "learning_rate": 7.249447236180905e-05, - "loss": 5.1896, - "step": 27877 - }, - { - "epoch": 14.538722294654498, - "grad_norm": 1.465965747833252, - "learning_rate": 7.249346733668342e-05, - "loss": 5.1027, - "step": 27878 - }, - { - "epoch": 14.539243807040418, - "grad_norm": 1.4736028909683228, - "learning_rate": 7.24924623115578e-05, - "loss": 5.6157, - "step": 27879 - }, - { - "epoch": 14.539765319426337, - "grad_norm": 1.4438899755477905, - "learning_rate": 7.249145728643216e-05, - "loss": 5.7896, - "step": 27880 - }, - { - "epoch": 14.540286831812256, - "grad_norm": 1.4497333765029907, - "learning_rate": 7.249045226130654e-05, - "loss": 5.4304, - "step": 27881 - }, - { - "epoch": 14.540808344198174, - "grad_norm": 1.4916462898254395, - "learning_rate": 7.248944723618091e-05, - "loss": 4.9061, - "step": 27882 - }, - { - "epoch": 14.541329856584094, - "grad_norm": 1.5112353563308716, - "learning_rate": 7.248844221105529e-05, - "loss": 4.361, - "step": 27883 - }, - { - "epoch": 14.541851368970013, - "grad_norm": 1.4771455526351929, - "learning_rate": 7.248743718592966e-05, - "loss": 5.015, - "step": 27884 - }, - { - "epoch": 14.542372881355933, - "grad_norm": 1.4780690670013428, - "learning_rate": 7.248643216080402e-05, - "loss": 5.028, - "step": 27885 - }, - { - "epoch": 14.542894393741852, - "grad_norm": 1.5131094455718994, - "learning_rate": 7.24854271356784e-05, - "loss": 5.5264, - "step": 27886 - }, - { - "epoch": 14.54341590612777, - "grad_norm": 1.3705509901046753, - "learning_rate": 7.248442211055276e-05, - "loss": 4.5952, - "step": 27887 - }, - { - "epoch": 14.54393741851369, - "grad_norm": 1.5237494707107544, - "learning_rate": 7.248341708542714e-05, - "loss": 5.5806, - "step": 27888 - }, - { - "epoch": 14.544458930899609, - "grad_norm": 1.4257622957229614, - "learning_rate": 7.24824120603015e-05, - "loss": 5.7374, - "step": 27889 - }, - { - "epoch": 14.544980443285528, - "grad_norm": 1.4877480268478394, - "learning_rate": 7.248140703517588e-05, - "loss": 5.2382, - "step": 27890 - }, - { - "epoch": 14.545501955671448, - "grad_norm": 1.4090967178344727, - "learning_rate": 7.248040201005025e-05, - "loss": 5.6638, - "step": 27891 - }, - { - "epoch": 14.546023468057367, - "grad_norm": 1.396342158317566, - "learning_rate": 7.247939698492462e-05, - "loss": 5.3175, - "step": 27892 - }, - { - "epoch": 14.546544980443285, - "grad_norm": 1.5543386936187744, - "learning_rate": 7.2478391959799e-05, - "loss": 5.3827, - "step": 27893 - }, - { - "epoch": 14.547066492829204, - "grad_norm": 1.338243842124939, - "learning_rate": 7.247738693467338e-05, - "loss": 5.4582, - "step": 27894 - }, - { - "epoch": 14.547588005215124, - "grad_norm": 1.363004207611084, - "learning_rate": 7.247638190954774e-05, - "loss": 4.5047, - "step": 27895 - }, - { - "epoch": 14.548109517601043, - "grad_norm": 1.4882783889770508, - "learning_rate": 7.247537688442212e-05, - "loss": 5.3893, - "step": 27896 - }, - { - "epoch": 14.548631029986963, - "grad_norm": 1.4327970743179321, - "learning_rate": 7.247437185929649e-05, - "loss": 5.5993, - "step": 27897 - }, - { - "epoch": 14.549152542372882, - "grad_norm": 1.4961345195770264, - "learning_rate": 7.247336683417085e-05, - "loss": 5.3277, - "step": 27898 - }, - { - "epoch": 14.5496740547588, - "grad_norm": 1.5322325229644775, - "learning_rate": 7.247236180904523e-05, - "loss": 5.5578, - "step": 27899 - }, - { - "epoch": 14.55019556714472, - "grad_norm": 1.392107605934143, - "learning_rate": 7.247135678391959e-05, - "loss": 5.3594, - "step": 27900 - }, - { - "epoch": 14.550717079530639, - "grad_norm": 1.3935805559158325, - "learning_rate": 7.247035175879397e-05, - "loss": 5.2738, - "step": 27901 - }, - { - "epoch": 14.551238591916558, - "grad_norm": 1.33814537525177, - "learning_rate": 7.246934673366835e-05, - "loss": 5.8979, - "step": 27902 - }, - { - "epoch": 14.551760104302478, - "grad_norm": 1.4824793338775635, - "learning_rate": 7.246834170854273e-05, - "loss": 4.9924, - "step": 27903 - }, - { - "epoch": 14.552281616688397, - "grad_norm": 1.558030605316162, - "learning_rate": 7.246733668341709e-05, - "loss": 4.7875, - "step": 27904 - }, - { - "epoch": 14.552803129074315, - "grad_norm": 1.576809048652649, - "learning_rate": 7.246633165829147e-05, - "loss": 4.8173, - "step": 27905 - }, - { - "epoch": 14.553324641460234, - "grad_norm": 1.461898684501648, - "learning_rate": 7.246532663316583e-05, - "loss": 5.3971, - "step": 27906 - }, - { - "epoch": 14.553846153846154, - "grad_norm": 1.554019808769226, - "learning_rate": 7.246432160804021e-05, - "loss": 5.6169, - "step": 27907 - }, - { - "epoch": 14.554367666232073, - "grad_norm": 1.4505081176757812, - "learning_rate": 7.246331658291457e-05, - "loss": 5.617, - "step": 27908 - }, - { - "epoch": 14.554889178617993, - "grad_norm": 1.6023842096328735, - "learning_rate": 7.246231155778895e-05, - "loss": 5.3943, - "step": 27909 - }, - { - "epoch": 14.555410691003912, - "grad_norm": 1.5015760660171509, - "learning_rate": 7.246130653266332e-05, - "loss": 5.5402, - "step": 27910 - }, - { - "epoch": 14.55593220338983, - "grad_norm": 1.3955868482589722, - "learning_rate": 7.246030150753768e-05, - "loss": 5.6496, - "step": 27911 - }, - { - "epoch": 14.55645371577575, - "grad_norm": 1.4908267259597778, - "learning_rate": 7.245929648241206e-05, - "loss": 5.5018, - "step": 27912 - }, - { - "epoch": 14.556975228161669, - "grad_norm": 1.5049296617507935, - "learning_rate": 7.245829145728644e-05, - "loss": 5.5048, - "step": 27913 - }, - { - "epoch": 14.557496740547588, - "grad_norm": 1.4449880123138428, - "learning_rate": 7.245728643216081e-05, - "loss": 5.2626, - "step": 27914 - }, - { - "epoch": 14.558018252933508, - "grad_norm": 1.5681626796722412, - "learning_rate": 7.245628140703518e-05, - "loss": 5.206, - "step": 27915 - }, - { - "epoch": 14.558539765319427, - "grad_norm": 1.46823251247406, - "learning_rate": 7.245527638190956e-05, - "loss": 4.668, - "step": 27916 - }, - { - "epoch": 14.559061277705345, - "grad_norm": 1.4520922899246216, - "learning_rate": 7.245427135678392e-05, - "loss": 5.4373, - "step": 27917 - }, - { - "epoch": 14.559582790091264, - "grad_norm": 1.5355932712554932, - "learning_rate": 7.24532663316583e-05, - "loss": 5.1388, - "step": 27918 - }, - { - "epoch": 14.560104302477184, - "grad_norm": 1.5628015995025635, - "learning_rate": 7.245226130653266e-05, - "loss": 5.2129, - "step": 27919 - }, - { - "epoch": 14.560625814863103, - "grad_norm": 1.5367755889892578, - "learning_rate": 7.245125628140704e-05, - "loss": 5.3881, - "step": 27920 - }, - { - "epoch": 14.561147327249023, - "grad_norm": 1.5808110237121582, - "learning_rate": 7.24502512562814e-05, - "loss": 5.1993, - "step": 27921 - }, - { - "epoch": 14.561668839634942, - "grad_norm": 1.5087239742279053, - "learning_rate": 7.244924623115578e-05, - "loss": 4.9104, - "step": 27922 - }, - { - "epoch": 14.56219035202086, - "grad_norm": 1.5801717042922974, - "learning_rate": 7.244824120603016e-05, - "loss": 5.0842, - "step": 27923 - }, - { - "epoch": 14.56271186440678, - "grad_norm": 1.4876720905303955, - "learning_rate": 7.244723618090452e-05, - "loss": 5.3273, - "step": 27924 - }, - { - "epoch": 14.563233376792699, - "grad_norm": 1.3276501893997192, - "learning_rate": 7.24462311557789e-05, - "loss": 5.6848, - "step": 27925 - }, - { - "epoch": 14.563754889178618, - "grad_norm": 1.508569359779358, - "learning_rate": 7.244522613065327e-05, - "loss": 5.2429, - "step": 27926 - }, - { - "epoch": 14.564276401564538, - "grad_norm": 1.599200963973999, - "learning_rate": 7.244422110552764e-05, - "loss": 4.6345, - "step": 27927 - }, - { - "epoch": 14.564797913950457, - "grad_norm": 1.6112041473388672, - "learning_rate": 7.244321608040201e-05, - "loss": 5.6246, - "step": 27928 - }, - { - "epoch": 14.565319426336375, - "grad_norm": 1.4979783296585083, - "learning_rate": 7.244221105527639e-05, - "loss": 5.3348, - "step": 27929 - }, - { - "epoch": 14.565840938722294, - "grad_norm": 1.4365742206573486, - "learning_rate": 7.244120603015075e-05, - "loss": 5.3314, - "step": 27930 - }, - { - "epoch": 14.566362451108214, - "grad_norm": 1.4465360641479492, - "learning_rate": 7.244020100502513e-05, - "loss": 5.34, - "step": 27931 - }, - { - "epoch": 14.566883963494133, - "grad_norm": 1.3453075885772705, - "learning_rate": 7.243919597989949e-05, - "loss": 5.1277, - "step": 27932 - }, - { - "epoch": 14.567405475880053, - "grad_norm": 1.4929431676864624, - "learning_rate": 7.243819095477387e-05, - "loss": 5.0886, - "step": 27933 - }, - { - "epoch": 14.567926988265972, - "grad_norm": 1.4714179039001465, - "learning_rate": 7.243718592964825e-05, - "loss": 5.7767, - "step": 27934 - }, - { - "epoch": 14.56844850065189, - "grad_norm": 1.4937664270401, - "learning_rate": 7.243618090452263e-05, - "loss": 5.5076, - "step": 27935 - }, - { - "epoch": 14.56897001303781, - "grad_norm": 1.4846196174621582, - "learning_rate": 7.243517587939699e-05, - "loss": 5.0914, - "step": 27936 - }, - { - "epoch": 14.569491525423729, - "grad_norm": 1.4035298824310303, - "learning_rate": 7.243417085427136e-05, - "loss": 5.4061, - "step": 27937 - }, - { - "epoch": 14.570013037809648, - "grad_norm": 1.4894604682922363, - "learning_rate": 7.243316582914573e-05, - "loss": 5.2096, - "step": 27938 - }, - { - "epoch": 14.570534550195568, - "grad_norm": 1.3579086065292358, - "learning_rate": 7.24321608040201e-05, - "loss": 5.5129, - "step": 27939 - }, - { - "epoch": 14.571056062581487, - "grad_norm": 1.377989649772644, - "learning_rate": 7.243115577889448e-05, - "loss": 5.3849, - "step": 27940 - }, - { - "epoch": 14.571577574967405, - "grad_norm": 1.4203096628189087, - "learning_rate": 7.243015075376884e-05, - "loss": 5.2194, - "step": 27941 - }, - { - "epoch": 14.572099087353324, - "grad_norm": 1.523910641670227, - "learning_rate": 7.242914572864322e-05, - "loss": 5.6717, - "step": 27942 - }, - { - "epoch": 14.572620599739244, - "grad_norm": 1.5797836780548096, - "learning_rate": 7.24281407035176e-05, - "loss": 5.4903, - "step": 27943 - }, - { - "epoch": 14.573142112125163, - "grad_norm": 1.4015170335769653, - "learning_rate": 7.242713567839197e-05, - "loss": 5.3102, - "step": 27944 - }, - { - "epoch": 14.573663624511083, - "grad_norm": 1.5849860906600952, - "learning_rate": 7.242613065326634e-05, - "loss": 5.1985, - "step": 27945 - }, - { - "epoch": 14.574185136897002, - "grad_norm": 1.4416043758392334, - "learning_rate": 7.242512562814071e-05, - "loss": 5.4247, - "step": 27946 - }, - { - "epoch": 14.57470664928292, - "grad_norm": 1.418518304824829, - "learning_rate": 7.242412060301508e-05, - "loss": 5.5171, - "step": 27947 - }, - { - "epoch": 14.57522816166884, - "grad_norm": 1.352144718170166, - "learning_rate": 7.242311557788946e-05, - "loss": 5.6436, - "step": 27948 - }, - { - "epoch": 14.575749674054759, - "grad_norm": 1.4197378158569336, - "learning_rate": 7.242211055276382e-05, - "loss": 5.5356, - "step": 27949 - }, - { - "epoch": 14.576271186440678, - "grad_norm": 1.3870290517807007, - "learning_rate": 7.242110552763819e-05, - "loss": 5.6428, - "step": 27950 - }, - { - "epoch": 14.576792698826598, - "grad_norm": 1.4848483800888062, - "learning_rate": 7.242010050251256e-05, - "loss": 5.5096, - "step": 27951 - }, - { - "epoch": 14.577314211212517, - "grad_norm": 1.5048930644989014, - "learning_rate": 7.241909547738693e-05, - "loss": 5.238, - "step": 27952 - }, - { - "epoch": 14.577835723598435, - "grad_norm": 1.433355450630188, - "learning_rate": 7.24180904522613e-05, - "loss": 5.1509, - "step": 27953 - }, - { - "epoch": 14.578357235984354, - "grad_norm": 1.510019063949585, - "learning_rate": 7.241708542713568e-05, - "loss": 4.6101, - "step": 27954 - }, - { - "epoch": 14.578878748370274, - "grad_norm": 1.5131990909576416, - "learning_rate": 7.241608040201006e-05, - "loss": 5.5674, - "step": 27955 - }, - { - "epoch": 14.579400260756193, - "grad_norm": 1.5114942789077759, - "learning_rate": 7.241507537688443e-05, - "loss": 5.7286, - "step": 27956 - }, - { - "epoch": 14.579921773142113, - "grad_norm": 1.5068049430847168, - "learning_rate": 7.24140703517588e-05, - "loss": 5.3119, - "step": 27957 - }, - { - "epoch": 14.58044328552803, - "grad_norm": 1.4744279384613037, - "learning_rate": 7.241306532663317e-05, - "loss": 4.969, - "step": 27958 - }, - { - "epoch": 14.58096479791395, - "grad_norm": 1.3491156101226807, - "learning_rate": 7.241206030150755e-05, - "loss": 5.4698, - "step": 27959 - }, - { - "epoch": 14.58148631029987, - "grad_norm": 1.4405107498168945, - "learning_rate": 7.241105527638191e-05, - "loss": 5.4082, - "step": 27960 - }, - { - "epoch": 14.582007822685789, - "grad_norm": 1.4494799375534058, - "learning_rate": 7.241005025125629e-05, - "loss": 5.5944, - "step": 27961 - }, - { - "epoch": 14.582529335071708, - "grad_norm": 1.5520588159561157, - "learning_rate": 7.240904522613065e-05, - "loss": 5.1959, - "step": 27962 - }, - { - "epoch": 14.583050847457628, - "grad_norm": 1.4303780794143677, - "learning_rate": 7.240804020100503e-05, - "loss": 5.5314, - "step": 27963 - }, - { - "epoch": 14.583572359843547, - "grad_norm": 1.3896713256835938, - "learning_rate": 7.240703517587941e-05, - "loss": 5.5401, - "step": 27964 - }, - { - "epoch": 14.584093872229465, - "grad_norm": 1.3579075336456299, - "learning_rate": 7.240603015075377e-05, - "loss": 5.5182, - "step": 27965 - }, - { - "epoch": 14.584615384615384, - "grad_norm": 1.407112956047058, - "learning_rate": 7.240502512562815e-05, - "loss": 5.2261, - "step": 27966 - }, - { - "epoch": 14.585136897001304, - "grad_norm": 1.5164244174957275, - "learning_rate": 7.240402010050251e-05, - "loss": 5.3393, - "step": 27967 - }, - { - "epoch": 14.585658409387223, - "grad_norm": 2.1050806045532227, - "learning_rate": 7.240301507537689e-05, - "loss": 4.6748, - "step": 27968 - }, - { - "epoch": 14.586179921773143, - "grad_norm": 1.4323612451553345, - "learning_rate": 7.240201005025126e-05, - "loss": 5.537, - "step": 27969 - }, - { - "epoch": 14.58670143415906, - "grad_norm": 1.5500739812850952, - "learning_rate": 7.240100502512563e-05, - "loss": 4.8334, - "step": 27970 - }, - { - "epoch": 14.58722294654498, - "grad_norm": 1.4492448568344116, - "learning_rate": 7.24e-05, - "loss": 5.1291, - "step": 27971 - }, - { - "epoch": 14.5877444589309, - "grad_norm": 1.4046571254730225, - "learning_rate": 7.239899497487438e-05, - "loss": 5.5494, - "step": 27972 - }, - { - "epoch": 14.588265971316819, - "grad_norm": 1.5623537302017212, - "learning_rate": 7.239798994974874e-05, - "loss": 5.3821, - "step": 27973 - }, - { - "epoch": 14.588787483702738, - "grad_norm": 1.654383897781372, - "learning_rate": 7.239698492462312e-05, - "loss": 4.9454, - "step": 27974 - }, - { - "epoch": 14.589308996088658, - "grad_norm": 1.543915033340454, - "learning_rate": 7.23959798994975e-05, - "loss": 5.3267, - "step": 27975 - }, - { - "epoch": 14.589830508474577, - "grad_norm": 1.6614588499069214, - "learning_rate": 7.239497487437187e-05, - "loss": 4.6466, - "step": 27976 - }, - { - "epoch": 14.590352020860495, - "grad_norm": 1.5977033376693726, - "learning_rate": 7.239396984924624e-05, - "loss": 5.103, - "step": 27977 - }, - { - "epoch": 14.590873533246414, - "grad_norm": 1.4357144832611084, - "learning_rate": 7.23929648241206e-05, - "loss": 5.5372, - "step": 27978 - }, - { - "epoch": 14.591395045632334, - "grad_norm": 1.4727895259857178, - "learning_rate": 7.239195979899498e-05, - "loss": 5.6543, - "step": 27979 - }, - { - "epoch": 14.591916558018253, - "grad_norm": 1.4912731647491455, - "learning_rate": 7.239095477386934e-05, - "loss": 5.2778, - "step": 27980 - }, - { - "epoch": 14.592438070404173, - "grad_norm": 1.6156896352767944, - "learning_rate": 7.238994974874372e-05, - "loss": 4.9152, - "step": 27981 - }, - { - "epoch": 14.59295958279009, - "grad_norm": 1.6242166757583618, - "learning_rate": 7.238894472361809e-05, - "loss": 5.3867, - "step": 27982 - }, - { - "epoch": 14.59348109517601, - "grad_norm": 1.664057970046997, - "learning_rate": 7.238793969849246e-05, - "loss": 5.0071, - "step": 27983 - }, - { - "epoch": 14.59400260756193, - "grad_norm": 1.5329540967941284, - "learning_rate": 7.238693467336683e-05, - "loss": 5.2059, - "step": 27984 - }, - { - "epoch": 14.594524119947849, - "grad_norm": 1.5543251037597656, - "learning_rate": 7.23859296482412e-05, - "loss": 5.368, - "step": 27985 - }, - { - "epoch": 14.595045632333768, - "grad_norm": 1.5007320642471313, - "learning_rate": 7.238492462311558e-05, - "loss": 5.2482, - "step": 27986 - }, - { - "epoch": 14.595567144719688, - "grad_norm": 1.360280156135559, - "learning_rate": 7.238391959798996e-05, - "loss": 5.0333, - "step": 27987 - }, - { - "epoch": 14.596088657105605, - "grad_norm": 1.4257696866989136, - "learning_rate": 7.238291457286433e-05, - "loss": 5.1999, - "step": 27988 - }, - { - "epoch": 14.596610169491525, - "grad_norm": 1.5119869709014893, - "learning_rate": 7.23819095477387e-05, - "loss": 5.3479, - "step": 27989 - }, - { - "epoch": 14.597131681877444, - "grad_norm": 1.5788111686706543, - "learning_rate": 7.238090452261307e-05, - "loss": 5.5469, - "step": 27990 - }, - { - "epoch": 14.597653194263364, - "grad_norm": 1.430322527885437, - "learning_rate": 7.237989949748743e-05, - "loss": 5.5571, - "step": 27991 - }, - { - "epoch": 14.598174706649283, - "grad_norm": 1.4739853143692017, - "learning_rate": 7.237889447236181e-05, - "loss": 5.5132, - "step": 27992 - }, - { - "epoch": 14.598696219035203, - "grad_norm": 1.4764485359191895, - "learning_rate": 7.237788944723617e-05, - "loss": 5.4843, - "step": 27993 - }, - { - "epoch": 14.59921773142112, - "grad_norm": 1.467971920967102, - "learning_rate": 7.237688442211055e-05, - "loss": 5.6304, - "step": 27994 - }, - { - "epoch": 14.59973924380704, - "grad_norm": 1.476412057876587, - "learning_rate": 7.237587939698493e-05, - "loss": 5.7819, - "step": 27995 - }, - { - "epoch": 14.60026075619296, - "grad_norm": 1.605830192565918, - "learning_rate": 7.237487437185931e-05, - "loss": 5.0201, - "step": 27996 - }, - { - "epoch": 14.600782268578879, - "grad_norm": 1.4195753335952759, - "learning_rate": 7.237386934673367e-05, - "loss": 5.7424, - "step": 27997 - }, - { - "epoch": 14.601303780964798, - "grad_norm": 1.4435501098632812, - "learning_rate": 7.237286432160805e-05, - "loss": 4.9928, - "step": 27998 - }, - { - "epoch": 14.601825293350718, - "grad_norm": 1.5351214408874512, - "learning_rate": 7.237185929648241e-05, - "loss": 5.4706, - "step": 27999 - }, - { - "epoch": 14.602346805736635, - "grad_norm": 1.594736933708191, - "learning_rate": 7.237085427135679e-05, - "loss": 5.0199, - "step": 28000 - }, - { - "epoch": 14.602868318122555, - "grad_norm": 1.4189056158065796, - "learning_rate": 7.236984924623116e-05, - "loss": 5.5337, - "step": 28001 - }, - { - "epoch": 14.603389830508474, - "grad_norm": 1.446407437324524, - "learning_rate": 7.236884422110553e-05, - "loss": 5.4512, - "step": 28002 - }, - { - "epoch": 14.603911342894394, - "grad_norm": 1.5739566087722778, - "learning_rate": 7.23678391959799e-05, - "loss": 5.176, - "step": 28003 - }, - { - "epoch": 14.604432855280313, - "grad_norm": 1.5108952522277832, - "learning_rate": 7.236683417085426e-05, - "loss": 5.7527, - "step": 28004 - }, - { - "epoch": 14.604954367666233, - "grad_norm": 1.3973886966705322, - "learning_rate": 7.236582914572864e-05, - "loss": 5.2288, - "step": 28005 - }, - { - "epoch": 14.60547588005215, - "grad_norm": 1.4772357940673828, - "learning_rate": 7.236482412060302e-05, - "loss": 5.4711, - "step": 28006 - }, - { - "epoch": 14.60599739243807, - "grad_norm": 1.5998121500015259, - "learning_rate": 7.23638190954774e-05, - "loss": 5.323, - "step": 28007 - }, - { - "epoch": 14.60651890482399, - "grad_norm": 1.3847894668579102, - "learning_rate": 7.236281407035176e-05, - "loss": 5.6462, - "step": 28008 - }, - { - "epoch": 14.607040417209909, - "grad_norm": 1.4223586320877075, - "learning_rate": 7.236180904522614e-05, - "loss": 5.5919, - "step": 28009 - }, - { - "epoch": 14.607561929595828, - "grad_norm": 1.541958212852478, - "learning_rate": 7.23608040201005e-05, - "loss": 5.7123, - "step": 28010 - }, - { - "epoch": 14.608083441981748, - "grad_norm": 1.9094480276107788, - "learning_rate": 7.235979899497488e-05, - "loss": 5.4288, - "step": 28011 - }, - { - "epoch": 14.608604954367665, - "grad_norm": 1.5376721620559692, - "learning_rate": 7.235879396984924e-05, - "loss": 5.0312, - "step": 28012 - }, - { - "epoch": 14.609126466753585, - "grad_norm": 1.412558913230896, - "learning_rate": 7.235778894472362e-05, - "loss": 5.5718, - "step": 28013 - }, - { - "epoch": 14.609647979139504, - "grad_norm": 1.4906580448150635, - "learning_rate": 7.235678391959799e-05, - "loss": 5.5313, - "step": 28014 - }, - { - "epoch": 14.610169491525424, - "grad_norm": 1.4512227773666382, - "learning_rate": 7.235577889447236e-05, - "loss": 5.6135, - "step": 28015 - }, - { - "epoch": 14.610691003911343, - "grad_norm": 1.3743159770965576, - "learning_rate": 7.235477386934674e-05, - "loss": 5.8599, - "step": 28016 - }, - { - "epoch": 14.611212516297263, - "grad_norm": 1.5411745309829712, - "learning_rate": 7.235376884422111e-05, - "loss": 4.6358, - "step": 28017 - }, - { - "epoch": 14.61173402868318, - "grad_norm": 1.54520845413208, - "learning_rate": 7.235276381909548e-05, - "loss": 5.4504, - "step": 28018 - }, - { - "epoch": 14.6122555410691, - "grad_norm": 1.542235016822815, - "learning_rate": 7.235175879396985e-05, - "loss": 5.1504, - "step": 28019 - }, - { - "epoch": 14.61277705345502, - "grad_norm": 1.4786159992218018, - "learning_rate": 7.235075376884423e-05, - "loss": 5.3895, - "step": 28020 - }, - { - "epoch": 14.613298565840939, - "grad_norm": 1.4350595474243164, - "learning_rate": 7.234974874371859e-05, - "loss": 5.183, - "step": 28021 - }, - { - "epoch": 14.613820078226858, - "grad_norm": 1.527738094329834, - "learning_rate": 7.234874371859297e-05, - "loss": 5.3716, - "step": 28022 - }, - { - "epoch": 14.614341590612778, - "grad_norm": 1.5216654539108276, - "learning_rate": 7.234773869346733e-05, - "loss": 5.2096, - "step": 28023 - }, - { - "epoch": 14.614863102998696, - "grad_norm": 1.4527535438537598, - "learning_rate": 7.234673366834171e-05, - "loss": 5.6906, - "step": 28024 - }, - { - "epoch": 14.615384615384615, - "grad_norm": 1.5394896268844604, - "learning_rate": 7.234572864321608e-05, - "loss": 5.2779, - "step": 28025 - }, - { - "epoch": 14.615906127770534, - "grad_norm": 1.5099927186965942, - "learning_rate": 7.234472361809045e-05, - "loss": 5.1625, - "step": 28026 - }, - { - "epoch": 14.616427640156454, - "grad_norm": 1.4793306589126587, - "learning_rate": 7.234371859296483e-05, - "loss": 5.5822, - "step": 28027 - }, - { - "epoch": 14.616949152542373, - "grad_norm": 1.4475871324539185, - "learning_rate": 7.234271356783921e-05, - "loss": 5.626, - "step": 28028 - }, - { - "epoch": 14.617470664928293, - "grad_norm": 1.5126404762268066, - "learning_rate": 7.234170854271357e-05, - "loss": 5.6785, - "step": 28029 - }, - { - "epoch": 14.61799217731421, - "grad_norm": 1.502301812171936, - "learning_rate": 7.234070351758794e-05, - "loss": 5.3574, - "step": 28030 - }, - { - "epoch": 14.61851368970013, - "grad_norm": 1.5177226066589355, - "learning_rate": 7.233969849246232e-05, - "loss": 5.4452, - "step": 28031 - }, - { - "epoch": 14.61903520208605, - "grad_norm": 1.4015069007873535, - "learning_rate": 7.233869346733668e-05, - "loss": 5.3573, - "step": 28032 - }, - { - "epoch": 14.619556714471969, - "grad_norm": 1.5564148426055908, - "learning_rate": 7.233768844221106e-05, - "loss": 5.2268, - "step": 28033 - }, - { - "epoch": 14.620078226857888, - "grad_norm": 1.4391882419586182, - "learning_rate": 7.233668341708542e-05, - "loss": 5.4005, - "step": 28034 - }, - { - "epoch": 14.620599739243808, - "grad_norm": 1.5460859537124634, - "learning_rate": 7.23356783919598e-05, - "loss": 4.8861, - "step": 28035 - }, - { - "epoch": 14.621121251629726, - "grad_norm": 1.3467835187911987, - "learning_rate": 7.233467336683418e-05, - "loss": 5.8648, - "step": 28036 - }, - { - "epoch": 14.621642764015645, - "grad_norm": 1.5800251960754395, - "learning_rate": 7.233366834170856e-05, - "loss": 5.4541, - "step": 28037 - }, - { - "epoch": 14.622164276401564, - "grad_norm": 1.444820523262024, - "learning_rate": 7.233266331658292e-05, - "loss": 5.567, - "step": 28038 - }, - { - "epoch": 14.622685788787484, - "grad_norm": 1.620198130607605, - "learning_rate": 7.23316582914573e-05, - "loss": 5.0313, - "step": 28039 - }, - { - "epoch": 14.623207301173403, - "grad_norm": 1.534997820854187, - "learning_rate": 7.233065326633166e-05, - "loss": 5.3123, - "step": 28040 - }, - { - "epoch": 14.623728813559323, - "grad_norm": 1.4978152513504028, - "learning_rate": 7.232964824120604e-05, - "loss": 5.4966, - "step": 28041 - }, - { - "epoch": 14.62425032594524, - "grad_norm": 1.494978904724121, - "learning_rate": 7.23286432160804e-05, - "loss": 5.3385, - "step": 28042 - }, - { - "epoch": 14.62477183833116, - "grad_norm": 1.4366554021835327, - "learning_rate": 7.232763819095478e-05, - "loss": 5.8428, - "step": 28043 - }, - { - "epoch": 14.62529335071708, - "grad_norm": 1.4840329885482788, - "learning_rate": 7.232663316582915e-05, - "loss": 5.6972, - "step": 28044 - }, - { - "epoch": 14.625814863102999, - "grad_norm": 1.5645627975463867, - "learning_rate": 7.232562814070351e-05, - "loss": 5.2837, - "step": 28045 - }, - { - "epoch": 14.626336375488918, - "grad_norm": 1.5362759828567505, - "learning_rate": 7.232462311557789e-05, - "loss": 5.2555, - "step": 28046 - }, - { - "epoch": 14.626857887874838, - "grad_norm": 1.59690260887146, - "learning_rate": 7.232361809045227e-05, - "loss": 4.962, - "step": 28047 - }, - { - "epoch": 14.627379400260756, - "grad_norm": 1.4377580881118774, - "learning_rate": 7.232261306532664e-05, - "loss": 5.5033, - "step": 28048 - }, - { - "epoch": 14.627900912646675, - "grad_norm": 1.499528169631958, - "learning_rate": 7.232160804020101e-05, - "loss": 5.1934, - "step": 28049 - }, - { - "epoch": 14.628422425032594, - "grad_norm": 1.4355498552322388, - "learning_rate": 7.232060301507539e-05, - "loss": 5.1571, - "step": 28050 - }, - { - "epoch": 14.628943937418514, - "grad_norm": 1.447056531906128, - "learning_rate": 7.231959798994975e-05, - "loss": 4.8334, - "step": 28051 - }, - { - "epoch": 14.629465449804433, - "grad_norm": 1.552718997001648, - "learning_rate": 7.231859296482413e-05, - "loss": 5.2716, - "step": 28052 - }, - { - "epoch": 14.629986962190351, - "grad_norm": 1.4355862140655518, - "learning_rate": 7.231758793969849e-05, - "loss": 5.3548, - "step": 28053 - }, - { - "epoch": 14.63050847457627, - "grad_norm": 1.350435495376587, - "learning_rate": 7.231658291457287e-05, - "loss": 5.5829, - "step": 28054 - }, - { - "epoch": 14.63102998696219, - "grad_norm": 1.515970230102539, - "learning_rate": 7.231557788944723e-05, - "loss": 5.1625, - "step": 28055 - }, - { - "epoch": 14.63155149934811, - "grad_norm": 1.5217311382293701, - "learning_rate": 7.231457286432161e-05, - "loss": 5.2319, - "step": 28056 - }, - { - "epoch": 14.632073011734029, - "grad_norm": 1.5027939081192017, - "learning_rate": 7.231356783919599e-05, - "loss": 5.2898, - "step": 28057 - }, - { - "epoch": 14.632594524119948, - "grad_norm": 1.64187753200531, - "learning_rate": 7.231256281407035e-05, - "loss": 4.8136, - "step": 28058 - }, - { - "epoch": 14.633116036505868, - "grad_norm": 1.454702377319336, - "learning_rate": 7.231155778894473e-05, - "loss": 5.4222, - "step": 28059 - }, - { - "epoch": 14.633637548891786, - "grad_norm": 1.4725525379180908, - "learning_rate": 7.23105527638191e-05, - "loss": 5.4143, - "step": 28060 - }, - { - "epoch": 14.634159061277705, - "grad_norm": 1.418113350868225, - "learning_rate": 7.230954773869347e-05, - "loss": 5.6502, - "step": 28061 - }, - { - "epoch": 14.634680573663625, - "grad_norm": 1.3824782371520996, - "learning_rate": 7.230854271356784e-05, - "loss": 5.3221, - "step": 28062 - }, - { - "epoch": 14.635202086049544, - "grad_norm": 1.5377013683319092, - "learning_rate": 7.230753768844222e-05, - "loss": 5.0215, - "step": 28063 - }, - { - "epoch": 14.635723598435463, - "grad_norm": 1.4318524599075317, - "learning_rate": 7.230653266331658e-05, - "loss": 4.8036, - "step": 28064 - }, - { - "epoch": 14.636245110821381, - "grad_norm": 1.4695144891738892, - "learning_rate": 7.230552763819096e-05, - "loss": 5.4955, - "step": 28065 - }, - { - "epoch": 14.6367666232073, - "grad_norm": 1.4555621147155762, - "learning_rate": 7.230452261306532e-05, - "loss": 5.7144, - "step": 28066 - }, - { - "epoch": 14.63728813559322, - "grad_norm": 1.4066228866577148, - "learning_rate": 7.23035175879397e-05, - "loss": 5.5651, - "step": 28067 - }, - { - "epoch": 14.63780964797914, - "grad_norm": 1.4220725297927856, - "learning_rate": 7.230251256281408e-05, - "loss": 4.5415, - "step": 28068 - }, - { - "epoch": 14.638331160365059, - "grad_norm": 1.4254662990570068, - "learning_rate": 7.230150753768846e-05, - "loss": 5.804, - "step": 28069 - }, - { - "epoch": 14.638852672750978, - "grad_norm": 1.4819610118865967, - "learning_rate": 7.230050251256282e-05, - "loss": 5.5196, - "step": 28070 - }, - { - "epoch": 14.639374185136898, - "grad_norm": 1.616431713104248, - "learning_rate": 7.229949748743718e-05, - "loss": 4.9181, - "step": 28071 - }, - { - "epoch": 14.639895697522816, - "grad_norm": 1.4027271270751953, - "learning_rate": 7.229849246231156e-05, - "loss": 5.8865, - "step": 28072 - }, - { - "epoch": 14.640417209908735, - "grad_norm": 1.479526162147522, - "learning_rate": 7.229748743718593e-05, - "loss": 5.2014, - "step": 28073 - }, - { - "epoch": 14.640938722294655, - "grad_norm": 1.3837097883224487, - "learning_rate": 7.22964824120603e-05, - "loss": 5.6253, - "step": 28074 - }, - { - "epoch": 14.641460234680574, - "grad_norm": 1.5124224424362183, - "learning_rate": 7.229547738693467e-05, - "loss": 5.6861, - "step": 28075 - }, - { - "epoch": 14.641981747066493, - "grad_norm": 1.4464548826217651, - "learning_rate": 7.229447236180905e-05, - "loss": 5.6364, - "step": 28076 - }, - { - "epoch": 14.642503259452411, - "grad_norm": 1.4539713859558105, - "learning_rate": 7.229346733668342e-05, - "loss": 5.7719, - "step": 28077 - }, - { - "epoch": 14.64302477183833, - "grad_norm": 1.5073076486587524, - "learning_rate": 7.22924623115578e-05, - "loss": 5.2897, - "step": 28078 - }, - { - "epoch": 14.64354628422425, - "grad_norm": 1.4004369974136353, - "learning_rate": 7.229145728643217e-05, - "loss": 5.5817, - "step": 28079 - }, - { - "epoch": 14.64406779661017, - "grad_norm": 1.358492374420166, - "learning_rate": 7.229045226130654e-05, - "loss": 5.0299, - "step": 28080 - }, - { - "epoch": 14.644589308996089, - "grad_norm": 1.5196316242218018, - "learning_rate": 7.228944723618091e-05, - "loss": 5.3023, - "step": 28081 - }, - { - "epoch": 14.645110821382008, - "grad_norm": 1.5064361095428467, - "learning_rate": 7.228844221105529e-05, - "loss": 4.9911, - "step": 28082 - }, - { - "epoch": 14.645632333767926, - "grad_norm": 1.4914406538009644, - "learning_rate": 7.228743718592965e-05, - "loss": 5.3503, - "step": 28083 - }, - { - "epoch": 14.646153846153846, - "grad_norm": 1.4332497119903564, - "learning_rate": 7.228643216080401e-05, - "loss": 5.3728, - "step": 28084 - }, - { - "epoch": 14.646675358539765, - "grad_norm": 1.4163998365402222, - "learning_rate": 7.228542713567839e-05, - "loss": 5.482, - "step": 28085 - }, - { - "epoch": 14.647196870925685, - "grad_norm": 1.478324055671692, - "learning_rate": 7.228442211055276e-05, - "loss": 5.7025, - "step": 28086 - }, - { - "epoch": 14.647718383311604, - "grad_norm": 1.5797703266143799, - "learning_rate": 7.228341708542713e-05, - "loss": 5.0705, - "step": 28087 - }, - { - "epoch": 14.648239895697523, - "grad_norm": 1.598757028579712, - "learning_rate": 7.228241206030151e-05, - "loss": 4.949, - "step": 28088 - }, - { - "epoch": 14.648761408083441, - "grad_norm": 1.5320329666137695, - "learning_rate": 7.228140703517589e-05, - "loss": 5.3692, - "step": 28089 - }, - { - "epoch": 14.64928292046936, - "grad_norm": 1.4285638332366943, - "learning_rate": 7.228040201005025e-05, - "loss": 5.3843, - "step": 28090 - }, - { - "epoch": 14.64980443285528, - "grad_norm": 1.445030927658081, - "learning_rate": 7.227939698492463e-05, - "loss": 5.8227, - "step": 28091 - }, - { - "epoch": 14.6503259452412, - "grad_norm": 1.5767546892166138, - "learning_rate": 7.2278391959799e-05, - "loss": 4.8972, - "step": 28092 - }, - { - "epoch": 14.650847457627119, - "grad_norm": 1.4194116592407227, - "learning_rate": 7.227738693467337e-05, - "loss": 5.3037, - "step": 28093 - }, - { - "epoch": 14.651368970013039, - "grad_norm": 1.462957739830017, - "learning_rate": 7.227638190954774e-05, - "loss": 4.9469, - "step": 28094 - }, - { - "epoch": 14.651890482398956, - "grad_norm": 1.3990936279296875, - "learning_rate": 7.227537688442212e-05, - "loss": 5.4519, - "step": 28095 - }, - { - "epoch": 14.652411994784876, - "grad_norm": 2.222689390182495, - "learning_rate": 7.227437185929648e-05, - "loss": 4.5882, - "step": 28096 - }, - { - "epoch": 14.652933507170795, - "grad_norm": 1.4791616201400757, - "learning_rate": 7.227336683417086e-05, - "loss": 5.1946, - "step": 28097 - }, - { - "epoch": 14.653455019556715, - "grad_norm": 1.3871874809265137, - "learning_rate": 7.227236180904524e-05, - "loss": 5.4466, - "step": 28098 - }, - { - "epoch": 14.653976531942634, - "grad_norm": 1.3762584924697876, - "learning_rate": 7.22713567839196e-05, - "loss": 5.7819, - "step": 28099 - }, - { - "epoch": 14.654498044328554, - "grad_norm": 1.536110758781433, - "learning_rate": 7.227035175879398e-05, - "loss": 5.478, - "step": 28100 - }, - { - "epoch": 14.655019556714471, - "grad_norm": 1.4418363571166992, - "learning_rate": 7.226934673366834e-05, - "loss": 5.3224, - "step": 28101 - }, - { - "epoch": 14.65554106910039, - "grad_norm": 1.403907299041748, - "learning_rate": 7.226834170854272e-05, - "loss": 5.3702, - "step": 28102 - }, - { - "epoch": 14.65606258148631, - "grad_norm": 1.466815710067749, - "learning_rate": 7.226733668341709e-05, - "loss": 5.5951, - "step": 28103 - }, - { - "epoch": 14.65658409387223, - "grad_norm": 1.5876373052597046, - "learning_rate": 7.226633165829146e-05, - "loss": 5.2101, - "step": 28104 - }, - { - "epoch": 14.657105606258149, - "grad_norm": 1.6359694004058838, - "learning_rate": 7.226532663316583e-05, - "loss": 5.3103, - "step": 28105 - }, - { - "epoch": 14.657627118644069, - "grad_norm": 1.4625928401947021, - "learning_rate": 7.22643216080402e-05, - "loss": 5.0327, - "step": 28106 - }, - { - "epoch": 14.658148631029986, - "grad_norm": 1.4175279140472412, - "learning_rate": 7.226331658291457e-05, - "loss": 5.9718, - "step": 28107 - }, - { - "epoch": 14.658670143415906, - "grad_norm": 1.4957834482192993, - "learning_rate": 7.226231155778895e-05, - "loss": 5.6512, - "step": 28108 - }, - { - "epoch": 14.659191655801825, - "grad_norm": 1.4261925220489502, - "learning_rate": 7.226130653266333e-05, - "loss": 5.571, - "step": 28109 - }, - { - "epoch": 14.659713168187745, - "grad_norm": 1.4031920433044434, - "learning_rate": 7.226030150753769e-05, - "loss": 5.4135, - "step": 28110 - }, - { - "epoch": 14.660234680573664, - "grad_norm": 1.3680752515792847, - "learning_rate": 7.225929648241207e-05, - "loss": 5.7767, - "step": 28111 - }, - { - "epoch": 14.660756192959584, - "grad_norm": 1.5410380363464355, - "learning_rate": 7.225829145728643e-05, - "loss": 5.5554, - "step": 28112 - }, - { - "epoch": 14.661277705345501, - "grad_norm": 1.4223462343215942, - "learning_rate": 7.225728643216081e-05, - "loss": 5.5536, - "step": 28113 - }, - { - "epoch": 14.66179921773142, - "grad_norm": 1.541030764579773, - "learning_rate": 7.225628140703517e-05, - "loss": 5.5858, - "step": 28114 - }, - { - "epoch": 14.66232073011734, - "grad_norm": 1.4607298374176025, - "learning_rate": 7.225527638190955e-05, - "loss": 5.551, - "step": 28115 - }, - { - "epoch": 14.66284224250326, - "grad_norm": 1.3986035585403442, - "learning_rate": 7.225427135678392e-05, - "loss": 5.556, - "step": 28116 - }, - { - "epoch": 14.663363754889179, - "grad_norm": 1.5912995338439941, - "learning_rate": 7.22532663316583e-05, - "loss": 5.2795, - "step": 28117 - }, - { - "epoch": 14.663885267275099, - "grad_norm": 1.4686657190322876, - "learning_rate": 7.225226130653267e-05, - "loss": 5.3268, - "step": 28118 - }, - { - "epoch": 14.664406779661016, - "grad_norm": 1.4905542135238647, - "learning_rate": 7.225125628140705e-05, - "loss": 4.9626, - "step": 28119 - }, - { - "epoch": 14.664928292046936, - "grad_norm": 1.356906771659851, - "learning_rate": 7.225025125628141e-05, - "loss": 5.6143, - "step": 28120 - }, - { - "epoch": 14.665449804432855, - "grad_norm": 1.4076216220855713, - "learning_rate": 7.224924623115579e-05, - "loss": 5.476, - "step": 28121 - }, - { - "epoch": 14.665971316818775, - "grad_norm": 1.3987082242965698, - "learning_rate": 7.224824120603016e-05, - "loss": 5.3922, - "step": 28122 - }, - { - "epoch": 14.666492829204694, - "grad_norm": 1.4661831855773926, - "learning_rate": 7.224723618090452e-05, - "loss": 5.7243, - "step": 28123 - }, - { - "epoch": 14.667014341590614, - "grad_norm": 1.460618257522583, - "learning_rate": 7.22462311557789e-05, - "loss": 5.5515, - "step": 28124 - }, - { - "epoch": 14.667535853976531, - "grad_norm": 1.3748971223831177, - "learning_rate": 7.224522613065326e-05, - "loss": 5.5255, - "step": 28125 - }, - { - "epoch": 14.66805736636245, - "grad_norm": 1.4378528594970703, - "learning_rate": 7.224422110552764e-05, - "loss": 5.2647, - "step": 28126 - }, - { - "epoch": 14.66857887874837, - "grad_norm": 1.386738896369934, - "learning_rate": 7.2243216080402e-05, - "loss": 4.9842, - "step": 28127 - }, - { - "epoch": 14.66910039113429, - "grad_norm": 1.4819945096969604, - "learning_rate": 7.224221105527638e-05, - "loss": 5.1302, - "step": 28128 - }, - { - "epoch": 14.66962190352021, - "grad_norm": 1.4874398708343506, - "learning_rate": 7.224120603015076e-05, - "loss": 5.8266, - "step": 28129 - }, - { - "epoch": 14.670143415906129, - "grad_norm": 1.4627703428268433, - "learning_rate": 7.224020100502514e-05, - "loss": 5.4983, - "step": 28130 - }, - { - "epoch": 14.670664928292046, - "grad_norm": 1.4871090650558472, - "learning_rate": 7.22391959798995e-05, - "loss": 5.3223, - "step": 28131 - }, - { - "epoch": 14.671186440677966, - "grad_norm": 1.4351727962493896, - "learning_rate": 7.223819095477388e-05, - "loss": 5.6504, - "step": 28132 - }, - { - "epoch": 14.671707953063885, - "grad_norm": 1.5209295749664307, - "learning_rate": 7.223718592964824e-05, - "loss": 5.1513, - "step": 28133 - }, - { - "epoch": 14.672229465449805, - "grad_norm": 1.4914367198944092, - "learning_rate": 7.223618090452262e-05, - "loss": 5.6763, - "step": 28134 - }, - { - "epoch": 14.672750977835724, - "grad_norm": 1.693715214729309, - "learning_rate": 7.223517587939699e-05, - "loss": 5.1142, - "step": 28135 - }, - { - "epoch": 14.673272490221644, - "grad_norm": 1.5869464874267578, - "learning_rate": 7.223417085427136e-05, - "loss": 5.0296, - "step": 28136 - }, - { - "epoch": 14.673794002607561, - "grad_norm": 1.565017580986023, - "learning_rate": 7.223316582914573e-05, - "loss": 5.4273, - "step": 28137 - }, - { - "epoch": 14.67431551499348, - "grad_norm": 1.5046159029006958, - "learning_rate": 7.22321608040201e-05, - "loss": 5.6526, - "step": 28138 - }, - { - "epoch": 14.6748370273794, - "grad_norm": 1.362112283706665, - "learning_rate": 7.223115577889448e-05, - "loss": 5.6844, - "step": 28139 - }, - { - "epoch": 14.67535853976532, - "grad_norm": 1.423667311668396, - "learning_rate": 7.223015075376885e-05, - "loss": 5.741, - "step": 28140 - }, - { - "epoch": 14.67588005215124, - "grad_norm": 1.4391424655914307, - "learning_rate": 7.222914572864323e-05, - "loss": 5.0209, - "step": 28141 - }, - { - "epoch": 14.676401564537159, - "grad_norm": 1.4779729843139648, - "learning_rate": 7.222814070351759e-05, - "loss": 5.0977, - "step": 28142 - }, - { - "epoch": 14.676923076923076, - "grad_norm": 1.4692564010620117, - "learning_rate": 7.222713567839197e-05, - "loss": 5.241, - "step": 28143 - }, - { - "epoch": 14.677444589308996, - "grad_norm": 1.5144970417022705, - "learning_rate": 7.222613065326633e-05, - "loss": 5.0863, - "step": 28144 - }, - { - "epoch": 14.677966101694915, - "grad_norm": 1.4394055604934692, - "learning_rate": 7.222512562814071e-05, - "loss": 5.5017, - "step": 28145 - }, - { - "epoch": 14.678487614080835, - "grad_norm": 1.5744074583053589, - "learning_rate": 7.222412060301507e-05, - "loss": 5.2759, - "step": 28146 - }, - { - "epoch": 14.679009126466754, - "grad_norm": 1.4384647607803345, - "learning_rate": 7.222311557788945e-05, - "loss": 5.3746, - "step": 28147 - }, - { - "epoch": 14.679530638852672, - "grad_norm": 1.484196662902832, - "learning_rate": 7.222211055276382e-05, - "loss": 5.212, - "step": 28148 - }, - { - "epoch": 14.680052151238591, - "grad_norm": 1.4263708591461182, - "learning_rate": 7.22211055276382e-05, - "loss": 4.6512, - "step": 28149 - }, - { - "epoch": 14.68057366362451, - "grad_norm": 1.4028003215789795, - "learning_rate": 7.222010050251257e-05, - "loss": 5.3045, - "step": 28150 - }, - { - "epoch": 14.68109517601043, - "grad_norm": 1.527626633644104, - "learning_rate": 7.221909547738694e-05, - "loss": 5.5244, - "step": 28151 - }, - { - "epoch": 14.68161668839635, - "grad_norm": 1.3645013570785522, - "learning_rate": 7.221809045226131e-05, - "loss": 5.7756, - "step": 28152 - }, - { - "epoch": 14.68213820078227, - "grad_norm": 1.5036036968231201, - "learning_rate": 7.221708542713568e-05, - "loss": 5.3177, - "step": 28153 - }, - { - "epoch": 14.682659713168189, - "grad_norm": 1.5129085779190063, - "learning_rate": 7.221608040201006e-05, - "loss": 5.2543, - "step": 28154 - }, - { - "epoch": 14.683181225554106, - "grad_norm": 1.6177122592926025, - "learning_rate": 7.221507537688442e-05, - "loss": 5.2857, - "step": 28155 - }, - { - "epoch": 14.683702737940026, - "grad_norm": 1.4480351209640503, - "learning_rate": 7.22140703517588e-05, - "loss": 5.1973, - "step": 28156 - }, - { - "epoch": 14.684224250325945, - "grad_norm": 2.2204015254974365, - "learning_rate": 7.221306532663316e-05, - "loss": 4.868, - "step": 28157 - }, - { - "epoch": 14.684745762711865, - "grad_norm": 1.3705087900161743, - "learning_rate": 7.221206030150754e-05, - "loss": 5.7051, - "step": 28158 - }, - { - "epoch": 14.685267275097784, - "grad_norm": 1.4836175441741943, - "learning_rate": 7.22110552763819e-05, - "loss": 5.3287, - "step": 28159 - }, - { - "epoch": 14.685788787483702, - "grad_norm": 1.4403913021087646, - "learning_rate": 7.221005025125628e-05, - "loss": 5.4785, - "step": 28160 - }, - { - "epoch": 14.686310299869621, - "grad_norm": 1.5646923780441284, - "learning_rate": 7.220904522613066e-05, - "loss": 5.4514, - "step": 28161 - }, - { - "epoch": 14.68683181225554, - "grad_norm": 1.4896273612976074, - "learning_rate": 7.220804020100504e-05, - "loss": 5.3447, - "step": 28162 - }, - { - "epoch": 14.68735332464146, - "grad_norm": 1.5137661695480347, - "learning_rate": 7.22070351758794e-05, - "loss": 5.4338, - "step": 28163 - }, - { - "epoch": 14.68787483702738, - "grad_norm": 1.457275390625, - "learning_rate": 7.220603015075377e-05, - "loss": 4.974, - "step": 28164 - }, - { - "epoch": 14.6883963494133, - "grad_norm": 1.4073340892791748, - "learning_rate": 7.220502512562814e-05, - "loss": 5.8568, - "step": 28165 - }, - { - "epoch": 14.688917861799219, - "grad_norm": 1.4968607425689697, - "learning_rate": 7.220402010050251e-05, - "loss": 5.499, - "step": 28166 - }, - { - "epoch": 14.689439374185136, - "grad_norm": 1.4751383066177368, - "learning_rate": 7.220301507537689e-05, - "loss": 5.3574, - "step": 28167 - }, - { - "epoch": 14.689960886571056, - "grad_norm": 1.4026161432266235, - "learning_rate": 7.220201005025125e-05, - "loss": 5.4814, - "step": 28168 - }, - { - "epoch": 14.690482398956975, - "grad_norm": 1.489212989807129, - "learning_rate": 7.220100502512563e-05, - "loss": 5.085, - "step": 28169 - }, - { - "epoch": 14.691003911342895, - "grad_norm": 1.4058120250701904, - "learning_rate": 7.22e-05, - "loss": 5.0366, - "step": 28170 - }, - { - "epoch": 14.691525423728814, - "grad_norm": 1.4367023706436157, - "learning_rate": 7.219899497487438e-05, - "loss": 5.5788, - "step": 28171 - }, - { - "epoch": 14.692046936114732, - "grad_norm": 1.460575819015503, - "learning_rate": 7.219798994974875e-05, - "loss": 5.5572, - "step": 28172 - }, - { - "epoch": 14.692568448500651, - "grad_norm": 1.54339599609375, - "learning_rate": 7.219698492462313e-05, - "loss": 5.2483, - "step": 28173 - }, - { - "epoch": 14.69308996088657, - "grad_norm": 1.4549883604049683, - "learning_rate": 7.219597989949749e-05, - "loss": 5.6114, - "step": 28174 - }, - { - "epoch": 14.69361147327249, - "grad_norm": 1.3347749710083008, - "learning_rate": 7.219497487437187e-05, - "loss": 5.0343, - "step": 28175 - }, - { - "epoch": 14.69413298565841, - "grad_norm": 1.441857933998108, - "learning_rate": 7.219396984924623e-05, - "loss": 4.9028, - "step": 28176 - }, - { - "epoch": 14.69465449804433, - "grad_norm": 1.4734344482421875, - "learning_rate": 7.21929648241206e-05, - "loss": 5.2901, - "step": 28177 - }, - { - "epoch": 14.695176010430247, - "grad_norm": 1.758072018623352, - "learning_rate": 7.219195979899498e-05, - "loss": 4.7856, - "step": 28178 - }, - { - "epoch": 14.695697522816166, - "grad_norm": 1.6053146123886108, - "learning_rate": 7.219095477386934e-05, - "loss": 5.0003, - "step": 28179 - }, - { - "epoch": 14.696219035202086, - "grad_norm": 1.6110655069351196, - "learning_rate": 7.218994974874372e-05, - "loss": 5.3736, - "step": 28180 - }, - { - "epoch": 14.696740547588005, - "grad_norm": 1.5266666412353516, - "learning_rate": 7.21889447236181e-05, - "loss": 5.7229, - "step": 28181 - }, - { - "epoch": 14.697262059973925, - "grad_norm": 1.4786460399627686, - "learning_rate": 7.218793969849247e-05, - "loss": 5.4951, - "step": 28182 - }, - { - "epoch": 14.697783572359844, - "grad_norm": 1.4230722188949585, - "learning_rate": 7.218693467336684e-05, - "loss": 5.4579, - "step": 28183 - }, - { - "epoch": 14.698305084745762, - "grad_norm": 1.535028100013733, - "learning_rate": 7.218592964824121e-05, - "loss": 5.0438, - "step": 28184 - }, - { - "epoch": 14.698826597131681, - "grad_norm": 1.5759721994400024, - "learning_rate": 7.218492462311558e-05, - "loss": 5.4524, - "step": 28185 - }, - { - "epoch": 14.6993481095176, - "grad_norm": 1.5206711292266846, - "learning_rate": 7.218391959798996e-05, - "loss": 5.4994, - "step": 28186 - }, - { - "epoch": 14.69986962190352, - "grad_norm": 1.4305146932601929, - "learning_rate": 7.218291457286432e-05, - "loss": 5.454, - "step": 28187 - }, - { - "epoch": 14.70039113428944, - "grad_norm": 1.6186145544052124, - "learning_rate": 7.21819095477387e-05, - "loss": 4.8757, - "step": 28188 - }, - { - "epoch": 14.70091264667536, - "grad_norm": 1.5152559280395508, - "learning_rate": 7.218090452261306e-05, - "loss": 5.7097, - "step": 28189 - }, - { - "epoch": 14.701434159061277, - "grad_norm": 1.4308090209960938, - "learning_rate": 7.217989949748744e-05, - "loss": 5.3886, - "step": 28190 - }, - { - "epoch": 14.701955671447196, - "grad_norm": 1.4486464262008667, - "learning_rate": 7.217889447236182e-05, - "loss": 5.2343, - "step": 28191 - }, - { - "epoch": 14.702477183833116, - "grad_norm": 1.4027429819107056, - "learning_rate": 7.217788944723618e-05, - "loss": 5.1541, - "step": 28192 - }, - { - "epoch": 14.702998696219035, - "grad_norm": 1.4367965459823608, - "learning_rate": 7.217688442211056e-05, - "loss": 5.6494, - "step": 28193 - }, - { - "epoch": 14.703520208604955, - "grad_norm": 1.443812608718872, - "learning_rate": 7.217587939698493e-05, - "loss": 5.1523, - "step": 28194 - }, - { - "epoch": 14.704041720990874, - "grad_norm": 1.4655039310455322, - "learning_rate": 7.21748743718593e-05, - "loss": 5.2704, - "step": 28195 - }, - { - "epoch": 14.704563233376792, - "grad_norm": 1.5851361751556396, - "learning_rate": 7.217386934673367e-05, - "loss": 5.2637, - "step": 28196 - }, - { - "epoch": 14.705084745762711, - "grad_norm": 1.4109044075012207, - "learning_rate": 7.217286432160805e-05, - "loss": 5.5971, - "step": 28197 - }, - { - "epoch": 14.70560625814863, - "grad_norm": 1.4803260564804077, - "learning_rate": 7.217185929648241e-05, - "loss": 5.2132, - "step": 28198 - }, - { - "epoch": 14.70612777053455, - "grad_norm": 1.421075701713562, - "learning_rate": 7.217085427135679e-05, - "loss": 5.6154, - "step": 28199 - }, - { - "epoch": 14.70664928292047, - "grad_norm": 1.4420520067214966, - "learning_rate": 7.216984924623115e-05, - "loss": 5.2614, - "step": 28200 - }, - { - "epoch": 14.70717079530639, - "grad_norm": 1.4863632917404175, - "learning_rate": 7.216884422110553e-05, - "loss": 5.6562, - "step": 28201 - }, - { - "epoch": 14.707692307692307, - "grad_norm": 1.4684005975723267, - "learning_rate": 7.216783919597991e-05, - "loss": 4.9777, - "step": 28202 - }, - { - "epoch": 14.708213820078226, - "grad_norm": 1.5669481754302979, - "learning_rate": 7.216683417085427e-05, - "loss": 5.2824, - "step": 28203 - }, - { - "epoch": 14.708735332464146, - "grad_norm": 1.3931195735931396, - "learning_rate": 7.216582914572865e-05, - "loss": 5.7996, - "step": 28204 - }, - { - "epoch": 14.709256844850065, - "grad_norm": 1.4823716878890991, - "learning_rate": 7.216482412060301e-05, - "loss": 5.2216, - "step": 28205 - }, - { - "epoch": 14.709778357235985, - "grad_norm": 1.4135807752609253, - "learning_rate": 7.216381909547739e-05, - "loss": 5.3725, - "step": 28206 - }, - { - "epoch": 14.710299869621904, - "grad_norm": 1.5749331712722778, - "learning_rate": 7.216281407035176e-05, - "loss": 4.6094, - "step": 28207 - }, - { - "epoch": 14.710821382007822, - "grad_norm": 1.4896013736724854, - "learning_rate": 7.216180904522613e-05, - "loss": 5.2563, - "step": 28208 - }, - { - "epoch": 14.711342894393741, - "grad_norm": 1.4666322469711304, - "learning_rate": 7.21608040201005e-05, - "loss": 5.2081, - "step": 28209 - }, - { - "epoch": 14.711864406779661, - "grad_norm": 1.4001638889312744, - "learning_rate": 7.215979899497488e-05, - "loss": 5.7995, - "step": 28210 - }, - { - "epoch": 14.71238591916558, - "grad_norm": 1.4972490072250366, - "learning_rate": 7.215879396984925e-05, - "loss": 5.2838, - "step": 28211 - }, - { - "epoch": 14.7129074315515, - "grad_norm": 1.499792218208313, - "learning_rate": 7.215778894472363e-05, - "loss": 5.3936, - "step": 28212 - }, - { - "epoch": 14.71342894393742, - "grad_norm": 1.3145692348480225, - "learning_rate": 7.2156783919598e-05, - "loss": 5.9015, - "step": 28213 - }, - { - "epoch": 14.713950456323337, - "grad_norm": 1.448065996170044, - "learning_rate": 7.215577889447237e-05, - "loss": 5.2874, - "step": 28214 - }, - { - "epoch": 14.714471968709256, - "grad_norm": 1.4744722843170166, - "learning_rate": 7.215477386934674e-05, - "loss": 5.6032, - "step": 28215 - }, - { - "epoch": 14.714993481095176, - "grad_norm": 1.522020697593689, - "learning_rate": 7.21537688442211e-05, - "loss": 4.9345, - "step": 28216 - }, - { - "epoch": 14.715514993481095, - "grad_norm": 1.4535695314407349, - "learning_rate": 7.215276381909548e-05, - "loss": 5.499, - "step": 28217 - }, - { - "epoch": 14.716036505867015, - "grad_norm": 1.5238364934921265, - "learning_rate": 7.215175879396984e-05, - "loss": 5.3798, - "step": 28218 - }, - { - "epoch": 14.716558018252934, - "grad_norm": 1.5729619264602661, - "learning_rate": 7.215075376884422e-05, - "loss": 5.2841, - "step": 28219 - }, - { - "epoch": 14.717079530638852, - "grad_norm": 1.3386579751968384, - "learning_rate": 7.214974874371859e-05, - "loss": 5.4626, - "step": 28220 - }, - { - "epoch": 14.717601043024771, - "grad_norm": 1.4146227836608887, - "learning_rate": 7.214874371859296e-05, - "loss": 5.603, - "step": 28221 - }, - { - "epoch": 14.718122555410691, - "grad_norm": 1.5771195888519287, - "learning_rate": 7.214773869346734e-05, - "loss": 5.0536, - "step": 28222 - }, - { - "epoch": 14.71864406779661, - "grad_norm": 1.503506064414978, - "learning_rate": 7.214673366834172e-05, - "loss": 5.5354, - "step": 28223 - }, - { - "epoch": 14.71916558018253, - "grad_norm": 1.456629753112793, - "learning_rate": 7.214572864321608e-05, - "loss": 4.7953, - "step": 28224 - }, - { - "epoch": 14.71968709256845, - "grad_norm": 1.4596984386444092, - "learning_rate": 7.214472361809046e-05, - "loss": 5.5949, - "step": 28225 - }, - { - "epoch": 14.720208604954367, - "grad_norm": 1.3697729110717773, - "learning_rate": 7.214371859296483e-05, - "loss": 5.8502, - "step": 28226 - }, - { - "epoch": 14.720730117340286, - "grad_norm": 1.5031572580337524, - "learning_rate": 7.21427135678392e-05, - "loss": 5.2277, - "step": 28227 - }, - { - "epoch": 14.721251629726206, - "grad_norm": 1.4293179512023926, - "learning_rate": 7.214170854271357e-05, - "loss": 5.619, - "step": 28228 - }, - { - "epoch": 14.721773142112125, - "grad_norm": 1.4785178899765015, - "learning_rate": 7.214070351758795e-05, - "loss": 5.1229, - "step": 28229 - }, - { - "epoch": 14.722294654498045, - "grad_norm": 1.5023335218429565, - "learning_rate": 7.213969849246231e-05, - "loss": 5.1625, - "step": 28230 - }, - { - "epoch": 14.722816166883963, - "grad_norm": 1.478812575340271, - "learning_rate": 7.213869346733669e-05, - "loss": 5.0986, - "step": 28231 - }, - { - "epoch": 14.723337679269882, - "grad_norm": 1.5936671495437622, - "learning_rate": 7.213768844221107e-05, - "loss": 5.1756, - "step": 28232 - }, - { - "epoch": 14.723859191655801, - "grad_norm": 1.5258030891418457, - "learning_rate": 7.213668341708543e-05, - "loss": 5.2876, - "step": 28233 - }, - { - "epoch": 14.724380704041721, - "grad_norm": 1.458328366279602, - "learning_rate": 7.213567839195981e-05, - "loss": 5.3506, - "step": 28234 - }, - { - "epoch": 14.72490221642764, - "grad_norm": 1.3459807634353638, - "learning_rate": 7.213467336683417e-05, - "loss": 4.8225, - "step": 28235 - }, - { - "epoch": 14.72542372881356, - "grad_norm": 1.4427366256713867, - "learning_rate": 7.213366834170855e-05, - "loss": 5.6207, - "step": 28236 - }, - { - "epoch": 14.72594524119948, - "grad_norm": 1.3654327392578125, - "learning_rate": 7.213266331658291e-05, - "loss": 5.6421, - "step": 28237 - }, - { - "epoch": 14.726466753585397, - "grad_norm": 1.6562551259994507, - "learning_rate": 7.213165829145729e-05, - "loss": 4.7617, - "step": 28238 - }, - { - "epoch": 14.726988265971316, - "grad_norm": 1.4640525579452515, - "learning_rate": 7.213065326633166e-05, - "loss": 5.3036, - "step": 28239 - }, - { - "epoch": 14.727509778357236, - "grad_norm": 1.4097212553024292, - "learning_rate": 7.212964824120603e-05, - "loss": 5.4425, - "step": 28240 - }, - { - "epoch": 14.728031290743155, - "grad_norm": 1.445619821548462, - "learning_rate": 7.21286432160804e-05, - "loss": 5.4059, - "step": 28241 - }, - { - "epoch": 14.728552803129075, - "grad_norm": 1.5069063901901245, - "learning_rate": 7.212763819095478e-05, - "loss": 5.1564, - "step": 28242 - }, - { - "epoch": 14.729074315514993, - "grad_norm": 1.6884853839874268, - "learning_rate": 7.212663316582915e-05, - "loss": 5.6337, - "step": 28243 - }, - { - "epoch": 14.729595827900912, - "grad_norm": 1.524701476097107, - "learning_rate": 7.212562814070352e-05, - "loss": 5.7377, - "step": 28244 - }, - { - "epoch": 14.730117340286832, - "grad_norm": 1.4497270584106445, - "learning_rate": 7.21246231155779e-05, - "loss": 5.6796, - "step": 28245 - }, - { - "epoch": 14.730638852672751, - "grad_norm": 1.439222812652588, - "learning_rate": 7.212361809045226e-05, - "loss": 5.69, - "step": 28246 - }, - { - "epoch": 14.73116036505867, - "grad_norm": 1.3670276403427124, - "learning_rate": 7.212261306532664e-05, - "loss": 5.8648, - "step": 28247 - }, - { - "epoch": 14.73168187744459, - "grad_norm": 1.4953721761703491, - "learning_rate": 7.2121608040201e-05, - "loss": 5.5673, - "step": 28248 - }, - { - "epoch": 14.73220338983051, - "grad_norm": 1.47298002243042, - "learning_rate": 7.212060301507538e-05, - "loss": 5.3708, - "step": 28249 - }, - { - "epoch": 14.732724902216427, - "grad_norm": 1.3440204858779907, - "learning_rate": 7.211959798994975e-05, - "loss": 5.6782, - "step": 28250 - }, - { - "epoch": 14.733246414602347, - "grad_norm": 1.3914991617202759, - "learning_rate": 7.211859296482412e-05, - "loss": 5.2406, - "step": 28251 - }, - { - "epoch": 14.733767926988266, - "grad_norm": 1.468814730644226, - "learning_rate": 7.21175879396985e-05, - "loss": 5.4138, - "step": 28252 - }, - { - "epoch": 14.734289439374185, - "grad_norm": 1.4674304723739624, - "learning_rate": 7.211658291457288e-05, - "loss": 5.685, - "step": 28253 - }, - { - "epoch": 14.734810951760105, - "grad_norm": 1.396816372871399, - "learning_rate": 7.211557788944724e-05, - "loss": 5.0301, - "step": 28254 - }, - { - "epoch": 14.735332464146023, - "grad_norm": 1.5079962015151978, - "learning_rate": 7.211457286432162e-05, - "loss": 5.2401, - "step": 28255 - }, - { - "epoch": 14.735853976531942, - "grad_norm": 1.468716025352478, - "learning_rate": 7.211356783919598e-05, - "loss": 5.484, - "step": 28256 - }, - { - "epoch": 14.736375488917862, - "grad_norm": 1.408874750137329, - "learning_rate": 7.211256281407035e-05, - "loss": 5.3789, - "step": 28257 - }, - { - "epoch": 14.736897001303781, - "grad_norm": 1.3703978061676025, - "learning_rate": 7.211155778894473e-05, - "loss": 5.6283, - "step": 28258 - }, - { - "epoch": 14.7374185136897, - "grad_norm": 1.3983272314071655, - "learning_rate": 7.211055276381909e-05, - "loss": 5.5287, - "step": 28259 - }, - { - "epoch": 14.73794002607562, - "grad_norm": 1.3526605367660522, - "learning_rate": 7.210954773869347e-05, - "loss": 5.5813, - "step": 28260 - }, - { - "epoch": 14.73846153846154, - "grad_norm": 1.5043106079101562, - "learning_rate": 7.210854271356783e-05, - "loss": 4.8432, - "step": 28261 - }, - { - "epoch": 14.738983050847457, - "grad_norm": 1.4631733894348145, - "learning_rate": 7.210753768844221e-05, - "loss": 5.2374, - "step": 28262 - }, - { - "epoch": 14.739504563233377, - "grad_norm": 1.6288541555404663, - "learning_rate": 7.210653266331659e-05, - "loss": 5.0913, - "step": 28263 - }, - { - "epoch": 14.740026075619296, - "grad_norm": 1.492732048034668, - "learning_rate": 7.210552763819097e-05, - "loss": 4.8816, - "step": 28264 - }, - { - "epoch": 14.740547588005215, - "grad_norm": 1.4587335586547852, - "learning_rate": 7.210452261306533e-05, - "loss": 5.4865, - "step": 28265 - }, - { - "epoch": 14.741069100391135, - "grad_norm": 1.4465841054916382, - "learning_rate": 7.210351758793971e-05, - "loss": 5.3657, - "step": 28266 - }, - { - "epoch": 14.741590612777053, - "grad_norm": 1.4896210432052612, - "learning_rate": 7.210251256281407e-05, - "loss": 5.2985, - "step": 28267 - }, - { - "epoch": 14.742112125162972, - "grad_norm": 1.473684310913086, - "learning_rate": 7.210150753768845e-05, - "loss": 4.4418, - "step": 28268 - }, - { - "epoch": 14.742633637548892, - "grad_norm": 1.350284218788147, - "learning_rate": 7.210050251256282e-05, - "loss": 5.8448, - "step": 28269 - }, - { - "epoch": 14.743155149934811, - "grad_norm": 1.3703519105911255, - "learning_rate": 7.209949748743718e-05, - "loss": 5.952, - "step": 28270 - }, - { - "epoch": 14.74367666232073, - "grad_norm": 1.5316839218139648, - "learning_rate": 7.209849246231156e-05, - "loss": 5.2272, - "step": 28271 - }, - { - "epoch": 14.74419817470665, - "grad_norm": 1.3518047332763672, - "learning_rate": 7.209748743718594e-05, - "loss": 5.7838, - "step": 28272 - }, - { - "epoch": 14.744719687092568, - "grad_norm": 1.5159269571304321, - "learning_rate": 7.209648241206031e-05, - "loss": 5.4492, - "step": 28273 - }, - { - "epoch": 14.745241199478487, - "grad_norm": 1.3966705799102783, - "learning_rate": 7.209547738693468e-05, - "loss": 5.4034, - "step": 28274 - }, - { - "epoch": 14.745762711864407, - "grad_norm": 1.4833393096923828, - "learning_rate": 7.209447236180906e-05, - "loss": 5.4608, - "step": 28275 - }, - { - "epoch": 14.746284224250326, - "grad_norm": 1.5949362516403198, - "learning_rate": 7.209346733668342e-05, - "loss": 5.2461, - "step": 28276 - }, - { - "epoch": 14.746805736636245, - "grad_norm": 1.4408074617385864, - "learning_rate": 7.20924623115578e-05, - "loss": 5.8062, - "step": 28277 - }, - { - "epoch": 14.747327249022165, - "grad_norm": 1.4951478242874146, - "learning_rate": 7.209145728643216e-05, - "loss": 5.3169, - "step": 28278 - }, - { - "epoch": 14.747848761408083, - "grad_norm": 1.3904082775115967, - "learning_rate": 7.209045226130654e-05, - "loss": 5.3637, - "step": 28279 - }, - { - "epoch": 14.748370273794002, - "grad_norm": 1.5419079065322876, - "learning_rate": 7.20894472361809e-05, - "loss": 5.5449, - "step": 28280 - }, - { - "epoch": 14.748891786179922, - "grad_norm": 1.4475973844528198, - "learning_rate": 7.208844221105528e-05, - "loss": 5.503, - "step": 28281 - }, - { - "epoch": 14.749413298565841, - "grad_norm": 1.4799203872680664, - "learning_rate": 7.208743718592965e-05, - "loss": 5.2097, - "step": 28282 - }, - { - "epoch": 14.74993481095176, - "grad_norm": 1.5139479637145996, - "learning_rate": 7.208643216080402e-05, - "loss": 5.6974, - "step": 28283 - }, - { - "epoch": 14.75045632333768, - "grad_norm": 1.370806336402893, - "learning_rate": 7.20854271356784e-05, - "loss": 5.6556, - "step": 28284 - }, - { - "epoch": 14.750977835723598, - "grad_norm": 1.4623289108276367, - "learning_rate": 7.208442211055277e-05, - "loss": 5.3927, - "step": 28285 - }, - { - "epoch": 14.751499348109517, - "grad_norm": 1.593437671661377, - "learning_rate": 7.208341708542714e-05, - "loss": 5.1311, - "step": 28286 - }, - { - "epoch": 14.752020860495437, - "grad_norm": 1.4435275793075562, - "learning_rate": 7.208241206030151e-05, - "loss": 5.7737, - "step": 28287 - }, - { - "epoch": 14.752542372881356, - "grad_norm": 1.4458776712417603, - "learning_rate": 7.208140703517589e-05, - "loss": 5.8312, - "step": 28288 - }, - { - "epoch": 14.753063885267276, - "grad_norm": 1.4766039848327637, - "learning_rate": 7.208040201005025e-05, - "loss": 5.165, - "step": 28289 - }, - { - "epoch": 14.753585397653195, - "grad_norm": 1.3518368005752563, - "learning_rate": 7.207939698492463e-05, - "loss": 5.7555, - "step": 28290 - }, - { - "epoch": 14.754106910039113, - "grad_norm": 1.3766859769821167, - "learning_rate": 7.207839195979899e-05, - "loss": 5.5264, - "step": 28291 - }, - { - "epoch": 14.754628422425032, - "grad_norm": 1.465477705001831, - "learning_rate": 7.207738693467337e-05, - "loss": 5.4147, - "step": 28292 - }, - { - "epoch": 14.755149934810952, - "grad_norm": 1.4935150146484375, - "learning_rate": 7.207638190954775e-05, - "loss": 5.1194, - "step": 28293 - }, - { - "epoch": 14.755671447196871, - "grad_norm": 1.6477320194244385, - "learning_rate": 7.207537688442213e-05, - "loss": 4.9388, - "step": 28294 - }, - { - "epoch": 14.75619295958279, - "grad_norm": 1.5052202939987183, - "learning_rate": 7.207437185929649e-05, - "loss": 5.4763, - "step": 28295 - }, - { - "epoch": 14.75671447196871, - "grad_norm": 1.6609479188919067, - "learning_rate": 7.207336683417085e-05, - "loss": 5.3993, - "step": 28296 - }, - { - "epoch": 14.757235984354628, - "grad_norm": 1.418387770652771, - "learning_rate": 7.207236180904523e-05, - "loss": 5.2504, - "step": 28297 - }, - { - "epoch": 14.757757496740547, - "grad_norm": 1.366052269935608, - "learning_rate": 7.20713567839196e-05, - "loss": 5.7176, - "step": 28298 - }, - { - "epoch": 14.758279009126467, - "grad_norm": 1.4041014909744263, - "learning_rate": 7.207035175879397e-05, - "loss": 5.0628, - "step": 28299 - }, - { - "epoch": 14.758800521512386, - "grad_norm": 1.445055603981018, - "learning_rate": 7.206934673366834e-05, - "loss": 5.2906, - "step": 28300 - }, - { - "epoch": 14.759322033898306, - "grad_norm": 1.4559251070022583, - "learning_rate": 7.206834170854272e-05, - "loss": 5.7374, - "step": 28301 - }, - { - "epoch": 14.759843546284225, - "grad_norm": 1.4874825477600098, - "learning_rate": 7.206733668341708e-05, - "loss": 5.5361, - "step": 28302 - }, - { - "epoch": 14.760365058670143, - "grad_norm": 1.5163094997406006, - "learning_rate": 7.206633165829146e-05, - "loss": 5.6911, - "step": 28303 - }, - { - "epoch": 14.760886571056062, - "grad_norm": 1.4885907173156738, - "learning_rate": 7.206532663316584e-05, - "loss": 5.5716, - "step": 28304 - }, - { - "epoch": 14.761408083441982, - "grad_norm": 1.5022963285446167, - "learning_rate": 7.206432160804021e-05, - "loss": 5.0909, - "step": 28305 - }, - { - "epoch": 14.761929595827901, - "grad_norm": 1.446107268333435, - "learning_rate": 7.206331658291458e-05, - "loss": 5.6658, - "step": 28306 - }, - { - "epoch": 14.76245110821382, - "grad_norm": 1.5042088031768799, - "learning_rate": 7.206231155778896e-05, - "loss": 5.3198, - "step": 28307 - }, - { - "epoch": 14.76297262059974, - "grad_norm": 1.3899598121643066, - "learning_rate": 7.206130653266332e-05, - "loss": 5.5407, - "step": 28308 - }, - { - "epoch": 14.763494132985658, - "grad_norm": 1.4568006992340088, - "learning_rate": 7.206030150753768e-05, - "loss": 5.4505, - "step": 28309 - }, - { - "epoch": 14.764015645371577, - "grad_norm": 1.3055572509765625, - "learning_rate": 7.205929648241206e-05, - "loss": 5.6943, - "step": 28310 - }, - { - "epoch": 14.764537157757497, - "grad_norm": 1.4284460544586182, - "learning_rate": 7.205829145728643e-05, - "loss": 4.9162, - "step": 28311 - }, - { - "epoch": 14.765058670143416, - "grad_norm": 1.3989551067352295, - "learning_rate": 7.20572864321608e-05, - "loss": 5.2262, - "step": 28312 - }, - { - "epoch": 14.765580182529336, - "grad_norm": 1.4879775047302246, - "learning_rate": 7.205628140703517e-05, - "loss": 5.427, - "step": 28313 - }, - { - "epoch": 14.766101694915255, - "grad_norm": 1.4066416025161743, - "learning_rate": 7.205527638190955e-05, - "loss": 5.1668, - "step": 28314 - }, - { - "epoch": 14.766623207301173, - "grad_norm": 1.7043136358261108, - "learning_rate": 7.205427135678392e-05, - "loss": 4.8676, - "step": 28315 - }, - { - "epoch": 14.767144719687092, - "grad_norm": 1.424278974533081, - "learning_rate": 7.20532663316583e-05, - "loss": 5.5553, - "step": 28316 - }, - { - "epoch": 14.767666232073012, - "grad_norm": 1.4645445346832275, - "learning_rate": 7.205226130653267e-05, - "loss": 5.299, - "step": 28317 - }, - { - "epoch": 14.768187744458931, - "grad_norm": 1.4382243156433105, - "learning_rate": 7.205125628140704e-05, - "loss": 4.7832, - "step": 28318 - }, - { - "epoch": 14.76870925684485, - "grad_norm": 1.355554461479187, - "learning_rate": 7.205025125628141e-05, - "loss": 5.153, - "step": 28319 - }, - { - "epoch": 14.76923076923077, - "grad_norm": 1.4121661186218262, - "learning_rate": 7.204924623115579e-05, - "loss": 5.7511, - "step": 28320 - }, - { - "epoch": 14.769752281616688, - "grad_norm": 1.5884666442871094, - "learning_rate": 7.204824120603015e-05, - "loss": 5.1549, - "step": 28321 - }, - { - "epoch": 14.770273794002607, - "grad_norm": 1.4780855178833008, - "learning_rate": 7.204723618090453e-05, - "loss": 5.1998, - "step": 28322 - }, - { - "epoch": 14.770795306388527, - "grad_norm": 1.6719478368759155, - "learning_rate": 7.204623115577889e-05, - "loss": 4.5059, - "step": 28323 - }, - { - "epoch": 14.771316818774446, - "grad_norm": 1.4553958177566528, - "learning_rate": 7.204522613065327e-05, - "loss": 5.2536, - "step": 28324 - }, - { - "epoch": 14.771838331160366, - "grad_norm": 1.476692795753479, - "learning_rate": 7.204422110552765e-05, - "loss": 5.7004, - "step": 28325 - }, - { - "epoch": 14.772359843546283, - "grad_norm": 1.4955296516418457, - "learning_rate": 7.204321608040201e-05, - "loss": 5.2673, - "step": 28326 - }, - { - "epoch": 14.772881355932203, - "grad_norm": 1.4121158123016357, - "learning_rate": 7.204221105527639e-05, - "loss": 5.394, - "step": 28327 - }, - { - "epoch": 14.773402868318122, - "grad_norm": 1.5238326787948608, - "learning_rate": 7.204120603015075e-05, - "loss": 5.2971, - "step": 28328 - }, - { - "epoch": 14.773924380704042, - "grad_norm": 1.478186011314392, - "learning_rate": 7.204020100502513e-05, - "loss": 5.2873, - "step": 28329 - }, - { - "epoch": 14.774445893089961, - "grad_norm": 1.4535528421401978, - "learning_rate": 7.20391959798995e-05, - "loss": 5.2777, - "step": 28330 - }, - { - "epoch": 14.77496740547588, - "grad_norm": 1.4435721635818481, - "learning_rate": 7.203819095477387e-05, - "loss": 5.5335, - "step": 28331 - }, - { - "epoch": 14.7754889178618, - "grad_norm": 1.4555764198303223, - "learning_rate": 7.203718592964824e-05, - "loss": 5.599, - "step": 28332 - }, - { - "epoch": 14.776010430247718, - "grad_norm": 1.4865517616271973, - "learning_rate": 7.203618090452262e-05, - "loss": 5.8387, - "step": 28333 - }, - { - "epoch": 14.776531942633637, - "grad_norm": 1.49631667137146, - "learning_rate": 7.203517587939698e-05, - "loss": 5.4954, - "step": 28334 - }, - { - "epoch": 14.777053455019557, - "grad_norm": 1.5333492755889893, - "learning_rate": 7.203417085427136e-05, - "loss": 5.3561, - "step": 28335 - }, - { - "epoch": 14.777574967405476, - "grad_norm": 1.323593258857727, - "learning_rate": 7.203316582914574e-05, - "loss": 5.5609, - "step": 28336 - }, - { - "epoch": 14.778096479791396, - "grad_norm": 1.4091992378234863, - "learning_rate": 7.20321608040201e-05, - "loss": 5.2189, - "step": 28337 - }, - { - "epoch": 14.778617992177313, - "grad_norm": 1.5417485237121582, - "learning_rate": 7.203115577889448e-05, - "loss": 4.4859, - "step": 28338 - }, - { - "epoch": 14.779139504563233, - "grad_norm": 1.4363986253738403, - "learning_rate": 7.203015075376884e-05, - "loss": 5.7113, - "step": 28339 - }, - { - "epoch": 14.779661016949152, - "grad_norm": 1.4685544967651367, - "learning_rate": 7.202914572864322e-05, - "loss": 5.7984, - "step": 28340 - }, - { - "epoch": 14.780182529335072, - "grad_norm": 1.5190253257751465, - "learning_rate": 7.202814070351759e-05, - "loss": 5.5261, - "step": 28341 - }, - { - "epoch": 14.780704041720991, - "grad_norm": 1.394889235496521, - "learning_rate": 7.202713567839196e-05, - "loss": 5.5064, - "step": 28342 - }, - { - "epoch": 14.78122555410691, - "grad_norm": 1.3970905542373657, - "learning_rate": 7.202613065326633e-05, - "loss": 5.5503, - "step": 28343 - }, - { - "epoch": 14.78174706649283, - "grad_norm": 1.434092402458191, - "learning_rate": 7.20251256281407e-05, - "loss": 5.2179, - "step": 28344 - }, - { - "epoch": 14.782268578878748, - "grad_norm": 1.4187980890274048, - "learning_rate": 7.202412060301508e-05, - "loss": 5.5165, - "step": 28345 - }, - { - "epoch": 14.782790091264667, - "grad_norm": 1.5131964683532715, - "learning_rate": 7.202311557788946e-05, - "loss": 5.0614, - "step": 28346 - }, - { - "epoch": 14.783311603650587, - "grad_norm": 1.5799658298492432, - "learning_rate": 7.202211055276383e-05, - "loss": 5.4775, - "step": 28347 - }, - { - "epoch": 14.783833116036506, - "grad_norm": 1.49422025680542, - "learning_rate": 7.20211055276382e-05, - "loss": 5.3311, - "step": 28348 - }, - { - "epoch": 14.784354628422426, - "grad_norm": 1.441029667854309, - "learning_rate": 7.202010050251257e-05, - "loss": 4.7424, - "step": 28349 - }, - { - "epoch": 14.784876140808343, - "grad_norm": 1.609880805015564, - "learning_rate": 7.201909547738693e-05, - "loss": 5.5226, - "step": 28350 - }, - { - "epoch": 14.785397653194263, - "grad_norm": 1.5126771926879883, - "learning_rate": 7.201809045226131e-05, - "loss": 5.3379, - "step": 28351 - }, - { - "epoch": 14.785919165580182, - "grad_norm": 1.391235589981079, - "learning_rate": 7.201708542713567e-05, - "loss": 5.4559, - "step": 28352 - }, - { - "epoch": 14.786440677966102, - "grad_norm": 1.435037612915039, - "learning_rate": 7.201608040201005e-05, - "loss": 5.6031, - "step": 28353 - }, - { - "epoch": 14.786962190352021, - "grad_norm": 1.3886114358901978, - "learning_rate": 7.201507537688442e-05, - "loss": 5.786, - "step": 28354 - }, - { - "epoch": 14.78748370273794, - "grad_norm": 1.6178139448165894, - "learning_rate": 7.20140703517588e-05, - "loss": 4.9623, - "step": 28355 - }, - { - "epoch": 14.78800521512386, - "grad_norm": 1.51095449924469, - "learning_rate": 7.201306532663317e-05, - "loss": 5.1405, - "step": 28356 - }, - { - "epoch": 14.788526727509778, - "grad_norm": 1.510780692100525, - "learning_rate": 7.201206030150755e-05, - "loss": 5.5524, - "step": 28357 - }, - { - "epoch": 14.789048239895697, - "grad_norm": 1.5358844995498657, - "learning_rate": 7.201105527638191e-05, - "loss": 5.3524, - "step": 28358 - }, - { - "epoch": 14.789569752281617, - "grad_norm": 1.4407707452774048, - "learning_rate": 7.201005025125629e-05, - "loss": 5.6449, - "step": 28359 - }, - { - "epoch": 14.790091264667536, - "grad_norm": 1.582916259765625, - "learning_rate": 7.200904522613066e-05, - "loss": 5.4356, - "step": 28360 - }, - { - "epoch": 14.790612777053456, - "grad_norm": 1.5410966873168945, - "learning_rate": 7.200804020100503e-05, - "loss": 5.3754, - "step": 28361 - }, - { - "epoch": 14.791134289439373, - "grad_norm": 1.4317045211791992, - "learning_rate": 7.20070351758794e-05, - "loss": 5.6931, - "step": 28362 - }, - { - "epoch": 14.791655801825293, - "grad_norm": 1.4341527223587036, - "learning_rate": 7.200603015075376e-05, - "loss": 5.2815, - "step": 28363 - }, - { - "epoch": 14.792177314211212, - "grad_norm": 1.5376836061477661, - "learning_rate": 7.200502512562814e-05, - "loss": 5.2624, - "step": 28364 - }, - { - "epoch": 14.792698826597132, - "grad_norm": 1.4844456911087036, - "learning_rate": 7.200402010050252e-05, - "loss": 5.4455, - "step": 28365 - }, - { - "epoch": 14.793220338983051, - "grad_norm": 1.4864697456359863, - "learning_rate": 7.20030150753769e-05, - "loss": 5.8301, - "step": 28366 - }, - { - "epoch": 14.79374185136897, - "grad_norm": 1.4701523780822754, - "learning_rate": 7.200201005025126e-05, - "loss": 5.625, - "step": 28367 - }, - { - "epoch": 14.794263363754888, - "grad_norm": 1.5076736211776733, - "learning_rate": 7.200100502512564e-05, - "loss": 5.1025, - "step": 28368 - }, - { - "epoch": 14.794784876140808, - "grad_norm": 1.4905478954315186, - "learning_rate": 7.2e-05, - "loss": 5.1958, - "step": 28369 - }, - { - "epoch": 14.795306388526727, - "grad_norm": 1.4511357545852661, - "learning_rate": 7.199899497487438e-05, - "loss": 5.0876, - "step": 28370 - }, - { - "epoch": 14.795827900912647, - "grad_norm": 1.5931291580200195, - "learning_rate": 7.199798994974874e-05, - "loss": 5.1619, - "step": 28371 - }, - { - "epoch": 14.796349413298566, - "grad_norm": 1.5082045793533325, - "learning_rate": 7.199698492462312e-05, - "loss": 5.1331, - "step": 28372 - }, - { - "epoch": 14.796870925684486, - "grad_norm": 1.428027868270874, - "learning_rate": 7.199597989949749e-05, - "loss": 5.1827, - "step": 28373 - }, - { - "epoch": 14.797392438070403, - "grad_norm": 1.5083097219467163, - "learning_rate": 7.199497487437186e-05, - "loss": 5.1981, - "step": 28374 - }, - { - "epoch": 14.797913950456323, - "grad_norm": 1.557309627532959, - "learning_rate": 7.199396984924623e-05, - "loss": 5.4803, - "step": 28375 - }, - { - "epoch": 14.798435462842242, - "grad_norm": 1.4711641073226929, - "learning_rate": 7.19929648241206e-05, - "loss": 5.3354, - "step": 28376 - }, - { - "epoch": 14.798956975228162, - "grad_norm": 1.447561502456665, - "learning_rate": 7.199195979899498e-05, - "loss": 5.436, - "step": 28377 - }, - { - "epoch": 14.799478487614081, - "grad_norm": 1.4566764831542969, - "learning_rate": 7.199095477386935e-05, - "loss": 5.0752, - "step": 28378 - }, - { - "epoch": 14.8, - "grad_norm": 1.4838379621505737, - "learning_rate": 7.198994974874373e-05, - "loss": 5.5133, - "step": 28379 - }, - { - "epoch": 14.800521512385918, - "grad_norm": 1.6570483446121216, - "learning_rate": 7.198894472361809e-05, - "loss": 4.7989, - "step": 28380 - }, - { - "epoch": 14.801043024771838, - "grad_norm": 1.5289421081542969, - "learning_rate": 7.198793969849247e-05, - "loss": 5.2668, - "step": 28381 - }, - { - "epoch": 14.801564537157757, - "grad_norm": 1.4748972654342651, - "learning_rate": 7.198693467336683e-05, - "loss": 5.2157, - "step": 28382 - }, - { - "epoch": 14.802086049543677, - "grad_norm": 1.4037212133407593, - "learning_rate": 7.198592964824121e-05, - "loss": 5.3171, - "step": 28383 - }, - { - "epoch": 14.802607561929596, - "grad_norm": 1.5365548133850098, - "learning_rate": 7.198492462311557e-05, - "loss": 5.1475, - "step": 28384 - }, - { - "epoch": 14.803129074315516, - "grad_norm": 1.3242210149765015, - "learning_rate": 7.198391959798995e-05, - "loss": 5.6616, - "step": 28385 - }, - { - "epoch": 14.803650586701433, - "grad_norm": 1.4808800220489502, - "learning_rate": 7.198291457286433e-05, - "loss": 5.3096, - "step": 28386 - }, - { - "epoch": 14.804172099087353, - "grad_norm": 1.8534784317016602, - "learning_rate": 7.198190954773871e-05, - "loss": 4.8502, - "step": 28387 - }, - { - "epoch": 14.804693611473272, - "grad_norm": 1.3904591798782349, - "learning_rate": 7.198090452261307e-05, - "loss": 5.4003, - "step": 28388 - }, - { - "epoch": 14.805215123859192, - "grad_norm": 1.394013524055481, - "learning_rate": 7.197989949748744e-05, - "loss": 5.4192, - "step": 28389 - }, - { - "epoch": 14.805736636245111, - "grad_norm": 1.357876181602478, - "learning_rate": 7.197889447236181e-05, - "loss": 5.3615, - "step": 28390 - }, - { - "epoch": 14.80625814863103, - "grad_norm": 1.4564399719238281, - "learning_rate": 7.197788944723618e-05, - "loss": 5.2074, - "step": 28391 - }, - { - "epoch": 14.806779661016948, - "grad_norm": 1.4632742404937744, - "learning_rate": 7.197688442211056e-05, - "loss": 5.4333, - "step": 28392 - }, - { - "epoch": 14.807301173402868, - "grad_norm": 1.5250595808029175, - "learning_rate": 7.197587939698492e-05, - "loss": 5.1162, - "step": 28393 - }, - { - "epoch": 14.807822685788787, - "grad_norm": 1.6097304821014404, - "learning_rate": 7.19748743718593e-05, - "loss": 5.1175, - "step": 28394 - }, - { - "epoch": 14.808344198174707, - "grad_norm": 1.4334156513214111, - "learning_rate": 7.197386934673366e-05, - "loss": 5.3288, - "step": 28395 - }, - { - "epoch": 14.808865710560626, - "grad_norm": 1.479110836982727, - "learning_rate": 7.197286432160804e-05, - "loss": 4.779, - "step": 28396 - }, - { - "epoch": 14.809387222946546, - "grad_norm": 1.4070950746536255, - "learning_rate": 7.197185929648242e-05, - "loss": 5.4859, - "step": 28397 - }, - { - "epoch": 14.809908735332463, - "grad_norm": 1.41886305809021, - "learning_rate": 7.19708542713568e-05, - "loss": 5.5098, - "step": 28398 - }, - { - "epoch": 14.810430247718383, - "grad_norm": 1.4982377290725708, - "learning_rate": 7.196984924623116e-05, - "loss": 5.5956, - "step": 28399 - }, - { - "epoch": 14.810951760104302, - "grad_norm": 1.4552727937698364, - "learning_rate": 7.196884422110554e-05, - "loss": 5.4708, - "step": 28400 - }, - { - "epoch": 14.811473272490222, - "grad_norm": 1.3452742099761963, - "learning_rate": 7.19678391959799e-05, - "loss": 5.5445, - "step": 28401 - }, - { - "epoch": 14.811994784876141, - "grad_norm": 1.5315724611282349, - "learning_rate": 7.196683417085427e-05, - "loss": 5.2928, - "step": 28402 - }, - { - "epoch": 14.81251629726206, - "grad_norm": 1.5228047370910645, - "learning_rate": 7.196582914572864e-05, - "loss": 4.9602, - "step": 28403 - }, - { - "epoch": 14.813037809647978, - "grad_norm": 1.4356403350830078, - "learning_rate": 7.196482412060301e-05, - "loss": 5.302, - "step": 28404 - }, - { - "epoch": 14.813559322033898, - "grad_norm": 1.3993754386901855, - "learning_rate": 7.196381909547739e-05, - "loss": 4.9415, - "step": 28405 - }, - { - "epoch": 14.814080834419817, - "grad_norm": 1.4802138805389404, - "learning_rate": 7.196281407035176e-05, - "loss": 4.922, - "step": 28406 - }, - { - "epoch": 14.814602346805737, - "grad_norm": 1.468979835510254, - "learning_rate": 7.196180904522614e-05, - "loss": 5.6573, - "step": 28407 - }, - { - "epoch": 14.815123859191656, - "grad_norm": 1.5335102081298828, - "learning_rate": 7.19608040201005e-05, - "loss": 4.9407, - "step": 28408 - }, - { - "epoch": 14.815645371577576, - "grad_norm": 1.5413364171981812, - "learning_rate": 7.195979899497488e-05, - "loss": 5.4383, - "step": 28409 - }, - { - "epoch": 14.816166883963493, - "grad_norm": 1.4566144943237305, - "learning_rate": 7.195879396984925e-05, - "loss": 5.4406, - "step": 28410 - }, - { - "epoch": 14.816688396349413, - "grad_norm": 1.570470929145813, - "learning_rate": 7.195778894472363e-05, - "loss": 4.8597, - "step": 28411 - }, - { - "epoch": 14.817209908735332, - "grad_norm": 1.4882588386535645, - "learning_rate": 7.195678391959799e-05, - "loss": 4.9482, - "step": 28412 - }, - { - "epoch": 14.817731421121252, - "grad_norm": 1.4591947793960571, - "learning_rate": 7.195577889447237e-05, - "loss": 4.8319, - "step": 28413 - }, - { - "epoch": 14.818252933507171, - "grad_norm": 1.4119561910629272, - "learning_rate": 7.195477386934673e-05, - "loss": 5.4829, - "step": 28414 - }, - { - "epoch": 14.81877444589309, - "grad_norm": 1.493438482284546, - "learning_rate": 7.195376884422111e-05, - "loss": 5.8317, - "step": 28415 - }, - { - "epoch": 14.819295958279008, - "grad_norm": 1.5846713781356812, - "learning_rate": 7.195276381909548e-05, - "loss": 4.6832, - "step": 28416 - }, - { - "epoch": 14.819817470664928, - "grad_norm": 1.5117803812026978, - "learning_rate": 7.195175879396985e-05, - "loss": 5.1369, - "step": 28417 - }, - { - "epoch": 14.820338983050847, - "grad_norm": 1.5690916776657104, - "learning_rate": 7.195075376884423e-05, - "loss": 5.1886, - "step": 28418 - }, - { - "epoch": 14.820860495436767, - "grad_norm": 1.364010214805603, - "learning_rate": 7.19497487437186e-05, - "loss": 5.9098, - "step": 28419 - }, - { - "epoch": 14.821382007822686, - "grad_norm": 1.5129034519195557, - "learning_rate": 7.194874371859297e-05, - "loss": 5.7335, - "step": 28420 - }, - { - "epoch": 14.821903520208604, - "grad_norm": 1.3470282554626465, - "learning_rate": 7.194773869346734e-05, - "loss": 5.4748, - "step": 28421 - }, - { - "epoch": 14.822425032594523, - "grad_norm": 1.5953823328018188, - "learning_rate": 7.194673366834171e-05, - "loss": 5.1413, - "step": 28422 - }, - { - "epoch": 14.822946544980443, - "grad_norm": 1.3719133138656616, - "learning_rate": 7.194572864321608e-05, - "loss": 5.3976, - "step": 28423 - }, - { - "epoch": 14.823468057366362, - "grad_norm": 1.4776089191436768, - "learning_rate": 7.194472361809046e-05, - "loss": 5.5218, - "step": 28424 - }, - { - "epoch": 14.823989569752282, - "grad_norm": 1.5377651453018188, - "learning_rate": 7.194371859296482e-05, - "loss": 5.13, - "step": 28425 - }, - { - "epoch": 14.824511082138201, - "grad_norm": 1.3713260889053345, - "learning_rate": 7.19427135678392e-05, - "loss": 5.4168, - "step": 28426 - }, - { - "epoch": 14.82503259452412, - "grad_norm": 1.419805645942688, - "learning_rate": 7.194170854271358e-05, - "loss": 5.7095, - "step": 28427 - }, - { - "epoch": 14.825554106910038, - "grad_norm": 1.399126648902893, - "learning_rate": 7.194070351758795e-05, - "loss": 5.3711, - "step": 28428 - }, - { - "epoch": 14.826075619295958, - "grad_norm": 1.5632078647613525, - "learning_rate": 7.193969849246232e-05, - "loss": 5.0673, - "step": 28429 - }, - { - "epoch": 14.826597131681877, - "grad_norm": 1.4745502471923828, - "learning_rate": 7.193869346733668e-05, - "loss": 5.6218, - "step": 28430 - }, - { - "epoch": 14.827118644067797, - "grad_norm": 1.5655299425125122, - "learning_rate": 7.193768844221106e-05, - "loss": 5.1065, - "step": 28431 - }, - { - "epoch": 14.827640156453716, - "grad_norm": 1.6170392036437988, - "learning_rate": 7.193668341708543e-05, - "loss": 5.3579, - "step": 28432 - }, - { - "epoch": 14.828161668839634, - "grad_norm": 1.551432490348816, - "learning_rate": 7.19356783919598e-05, - "loss": 5.1874, - "step": 28433 - }, - { - "epoch": 14.828683181225554, - "grad_norm": 1.5570260286331177, - "learning_rate": 7.193467336683417e-05, - "loss": 5.2596, - "step": 28434 - }, - { - "epoch": 14.829204693611473, - "grad_norm": 1.5406066179275513, - "learning_rate": 7.193366834170855e-05, - "loss": 5.4944, - "step": 28435 - }, - { - "epoch": 14.829726205997392, - "grad_norm": 1.499942421913147, - "learning_rate": 7.193266331658291e-05, - "loss": 5.4868, - "step": 28436 - }, - { - "epoch": 14.830247718383312, - "grad_norm": 1.4518357515335083, - "learning_rate": 7.193165829145729e-05, - "loss": 5.8492, - "step": 28437 - }, - { - "epoch": 14.830769230769231, - "grad_norm": 1.4348112344741821, - "learning_rate": 7.193065326633167e-05, - "loss": 5.9227, - "step": 28438 - }, - { - "epoch": 14.83129074315515, - "grad_norm": 1.4345792531967163, - "learning_rate": 7.192964824120604e-05, - "loss": 4.8307, - "step": 28439 - }, - { - "epoch": 14.831812255541069, - "grad_norm": 1.475465178489685, - "learning_rate": 7.192864321608041e-05, - "loss": 5.2229, - "step": 28440 - }, - { - "epoch": 14.832333767926988, - "grad_norm": 1.4975261688232422, - "learning_rate": 7.192763819095479e-05, - "loss": 4.8877, - "step": 28441 - }, - { - "epoch": 14.832855280312907, - "grad_norm": 1.599745273590088, - "learning_rate": 7.192663316582915e-05, - "loss": 5.386, - "step": 28442 - }, - { - "epoch": 14.833376792698827, - "grad_norm": 1.5644596815109253, - "learning_rate": 7.192562814070351e-05, - "loss": 4.8309, - "step": 28443 - }, - { - "epoch": 14.833898305084746, - "grad_norm": 1.4847742319107056, - "learning_rate": 7.192462311557789e-05, - "loss": 4.7934, - "step": 28444 - }, - { - "epoch": 14.834419817470664, - "grad_norm": 1.4732805490493774, - "learning_rate": 7.192361809045226e-05, - "loss": 5.3213, - "step": 28445 - }, - { - "epoch": 14.834941329856584, - "grad_norm": 1.6270469427108765, - "learning_rate": 7.192261306532663e-05, - "loss": 4.852, - "step": 28446 - }, - { - "epoch": 14.835462842242503, - "grad_norm": 1.5830719470977783, - "learning_rate": 7.192160804020101e-05, - "loss": 5.6551, - "step": 28447 - }, - { - "epoch": 14.835984354628422, - "grad_norm": 1.5434421300888062, - "learning_rate": 7.192060301507539e-05, - "loss": 5.3986, - "step": 28448 - }, - { - "epoch": 14.836505867014342, - "grad_norm": 1.5566529035568237, - "learning_rate": 7.191959798994975e-05, - "loss": 5.0085, - "step": 28449 - }, - { - "epoch": 14.837027379400261, - "grad_norm": 1.4844553470611572, - "learning_rate": 7.191859296482413e-05, - "loss": 4.5127, - "step": 28450 - }, - { - "epoch": 14.83754889178618, - "grad_norm": 1.530661702156067, - "learning_rate": 7.19175879396985e-05, - "loss": 4.7483, - "step": 28451 - }, - { - "epoch": 14.838070404172099, - "grad_norm": 1.5277894735336304, - "learning_rate": 7.191658291457287e-05, - "loss": 5.042, - "step": 28452 - }, - { - "epoch": 14.838591916558018, - "grad_norm": 1.551499843597412, - "learning_rate": 7.191557788944724e-05, - "loss": 5.145, - "step": 28453 - }, - { - "epoch": 14.839113428943937, - "grad_norm": 1.3951458930969238, - "learning_rate": 7.191457286432162e-05, - "loss": 4.9487, - "step": 28454 - }, - { - "epoch": 14.839634941329857, - "grad_norm": 1.373840570449829, - "learning_rate": 7.191356783919598e-05, - "loss": 5.5079, - "step": 28455 - }, - { - "epoch": 14.840156453715776, - "grad_norm": 1.4616882801055908, - "learning_rate": 7.191256281407034e-05, - "loss": 5.3934, - "step": 28456 - }, - { - "epoch": 14.840677966101694, - "grad_norm": 1.5003257989883423, - "learning_rate": 7.191155778894472e-05, - "loss": 4.8351, - "step": 28457 - }, - { - "epoch": 14.841199478487614, - "grad_norm": 1.5650326013565063, - "learning_rate": 7.19105527638191e-05, - "loss": 5.4433, - "step": 28458 - }, - { - "epoch": 14.841720990873533, - "grad_norm": 1.5418931245803833, - "learning_rate": 7.190954773869348e-05, - "loss": 5.3534, - "step": 28459 - }, - { - "epoch": 14.842242503259452, - "grad_norm": 1.4036118984222412, - "learning_rate": 7.190854271356784e-05, - "loss": 5.5627, - "step": 28460 - }, - { - "epoch": 14.842764015645372, - "grad_norm": 1.4264036417007446, - "learning_rate": 7.190753768844222e-05, - "loss": 5.3587, - "step": 28461 - }, - { - "epoch": 14.843285528031291, - "grad_norm": 1.3498971462249756, - "learning_rate": 7.190653266331658e-05, - "loss": 5.8245, - "step": 28462 - }, - { - "epoch": 14.843807040417209, - "grad_norm": 1.4877630472183228, - "learning_rate": 7.190552763819096e-05, - "loss": 5.2044, - "step": 28463 - }, - { - "epoch": 14.844328552803129, - "grad_norm": 1.4211270809173584, - "learning_rate": 7.190452261306533e-05, - "loss": 4.981, - "step": 28464 - }, - { - "epoch": 14.844850065189048, - "grad_norm": 1.5325255393981934, - "learning_rate": 7.19035175879397e-05, - "loss": 5.4268, - "step": 28465 - }, - { - "epoch": 14.845371577574968, - "grad_norm": 1.595265507698059, - "learning_rate": 7.190251256281407e-05, - "loss": 5.0014, - "step": 28466 - }, - { - "epoch": 14.845893089960887, - "grad_norm": 1.553394079208374, - "learning_rate": 7.190150753768845e-05, - "loss": 4.919, - "step": 28467 - }, - { - "epoch": 14.846414602346806, - "grad_norm": 1.4917656183242798, - "learning_rate": 7.190050251256282e-05, - "loss": 4.5267, - "step": 28468 - }, - { - "epoch": 14.846936114732724, - "grad_norm": 1.5339040756225586, - "learning_rate": 7.189949748743719e-05, - "loss": 5.317, - "step": 28469 - }, - { - "epoch": 14.847457627118644, - "grad_norm": 1.4710890054702759, - "learning_rate": 7.189849246231157e-05, - "loss": 4.9503, - "step": 28470 - }, - { - "epoch": 14.847979139504563, - "grad_norm": 1.5651543140411377, - "learning_rate": 7.189748743718593e-05, - "loss": 5.4159, - "step": 28471 - }, - { - "epoch": 14.848500651890483, - "grad_norm": 1.4557793140411377, - "learning_rate": 7.189648241206031e-05, - "loss": 5.4221, - "step": 28472 - }, - { - "epoch": 14.849022164276402, - "grad_norm": 1.467147946357727, - "learning_rate": 7.189547738693467e-05, - "loss": 5.4341, - "step": 28473 - }, - { - "epoch": 14.849543676662321, - "grad_norm": 1.5413587093353271, - "learning_rate": 7.189447236180905e-05, - "loss": 5.1178, - "step": 28474 - }, - { - "epoch": 14.85006518904824, - "grad_norm": 1.4408968687057495, - "learning_rate": 7.189346733668341e-05, - "loss": 5.8745, - "step": 28475 - }, - { - "epoch": 14.850586701434159, - "grad_norm": 1.4764072895050049, - "learning_rate": 7.189246231155779e-05, - "loss": 5.6623, - "step": 28476 - }, - { - "epoch": 14.851108213820078, - "grad_norm": 1.479858160018921, - "learning_rate": 7.189145728643216e-05, - "loss": 5.366, - "step": 28477 - }, - { - "epoch": 14.851629726205998, - "grad_norm": 1.5269756317138672, - "learning_rate": 7.189045226130653e-05, - "loss": 5.239, - "step": 28478 - }, - { - "epoch": 14.852151238591917, - "grad_norm": 1.5052521228790283, - "learning_rate": 7.188944723618091e-05, - "loss": 5.5053, - "step": 28479 - }, - { - "epoch": 14.852672750977836, - "grad_norm": 1.466199278831482, - "learning_rate": 7.188844221105529e-05, - "loss": 5.7134, - "step": 28480 - }, - { - "epoch": 14.853194263363754, - "grad_norm": 1.4813376665115356, - "learning_rate": 7.188743718592965e-05, - "loss": 5.4507, - "step": 28481 - }, - { - "epoch": 14.853715775749674, - "grad_norm": 1.4107283353805542, - "learning_rate": 7.188643216080402e-05, - "loss": 5.9255, - "step": 28482 - }, - { - "epoch": 14.854237288135593, - "grad_norm": 1.393752932548523, - "learning_rate": 7.18854271356784e-05, - "loss": 5.4088, - "step": 28483 - }, - { - "epoch": 14.854758800521513, - "grad_norm": 1.506169319152832, - "learning_rate": 7.188442211055276e-05, - "loss": 5.6168, - "step": 28484 - }, - { - "epoch": 14.855280312907432, - "grad_norm": 1.6763156652450562, - "learning_rate": 7.188341708542714e-05, - "loss": 5.3946, - "step": 28485 - }, - { - "epoch": 14.855801825293351, - "grad_norm": 1.3866115808486938, - "learning_rate": 7.18824120603015e-05, - "loss": 5.3661, - "step": 28486 - }, - { - "epoch": 14.85632333767927, - "grad_norm": 1.4301475286483765, - "learning_rate": 7.188140703517588e-05, - "loss": 5.3326, - "step": 28487 - }, - { - "epoch": 14.856844850065189, - "grad_norm": 1.5268535614013672, - "learning_rate": 7.188040201005025e-05, - "loss": 5.2061, - "step": 28488 - }, - { - "epoch": 14.857366362451108, - "grad_norm": 1.4306271076202393, - "learning_rate": 7.187939698492462e-05, - "loss": 5.188, - "step": 28489 - }, - { - "epoch": 14.857887874837028, - "grad_norm": 1.4544148445129395, - "learning_rate": 7.1878391959799e-05, - "loss": 5.0992, - "step": 28490 - }, - { - "epoch": 14.858409387222947, - "grad_norm": 1.4755271673202515, - "learning_rate": 7.187738693467338e-05, - "loss": 5.2298, - "step": 28491 - }, - { - "epoch": 14.858930899608866, - "grad_norm": 1.5356642007827759, - "learning_rate": 7.187638190954774e-05, - "loss": 5.5798, - "step": 28492 - }, - { - "epoch": 14.859452411994784, - "grad_norm": 1.4326049089431763, - "learning_rate": 7.187537688442212e-05, - "loss": 5.6761, - "step": 28493 - }, - { - "epoch": 14.859973924380704, - "grad_norm": 1.5045281648635864, - "learning_rate": 7.187437185929648e-05, - "loss": 5.4778, - "step": 28494 - }, - { - "epoch": 14.860495436766623, - "grad_norm": 1.52534019947052, - "learning_rate": 7.187336683417086e-05, - "loss": 5.1587, - "step": 28495 - }, - { - "epoch": 14.861016949152543, - "grad_norm": 1.5338770151138306, - "learning_rate": 7.187236180904523e-05, - "loss": 5.1416, - "step": 28496 - }, - { - "epoch": 14.861538461538462, - "grad_norm": 1.4782872200012207, - "learning_rate": 7.187135678391959e-05, - "loss": 5.2463, - "step": 28497 - }, - { - "epoch": 14.862059973924381, - "grad_norm": 1.52986741065979, - "learning_rate": 7.187035175879397e-05, - "loss": 5.4209, - "step": 28498 - }, - { - "epoch": 14.8625814863103, - "grad_norm": 1.4439809322357178, - "learning_rate": 7.186934673366835e-05, - "loss": 5.474, - "step": 28499 - }, - { - "epoch": 14.863102998696219, - "grad_norm": 1.4654728174209595, - "learning_rate": 7.186834170854272e-05, - "loss": 5.6038, - "step": 28500 - }, - { - "epoch": 14.863624511082138, - "grad_norm": 1.4442986249923706, - "learning_rate": 7.186733668341709e-05, - "loss": 5.5965, - "step": 28501 - }, - { - "epoch": 14.864146023468058, - "grad_norm": 1.444387674331665, - "learning_rate": 7.186633165829147e-05, - "loss": 5.6047, - "step": 28502 - }, - { - "epoch": 14.864667535853977, - "grad_norm": 1.3230631351470947, - "learning_rate": 7.186532663316583e-05, - "loss": 5.4063, - "step": 28503 - }, - { - "epoch": 14.865189048239897, - "grad_norm": 1.5281953811645508, - "learning_rate": 7.186432160804021e-05, - "loss": 5.1765, - "step": 28504 - }, - { - "epoch": 14.865710560625814, - "grad_norm": 1.514102578163147, - "learning_rate": 7.186331658291457e-05, - "loss": 4.942, - "step": 28505 - }, - { - "epoch": 14.866232073011734, - "grad_norm": 1.4939452409744263, - "learning_rate": 7.186231155778895e-05, - "loss": 5.6459, - "step": 28506 - }, - { - "epoch": 14.866753585397653, - "grad_norm": 1.4178060293197632, - "learning_rate": 7.186130653266332e-05, - "loss": 5.6221, - "step": 28507 - }, - { - "epoch": 14.867275097783573, - "grad_norm": 1.3868366479873657, - "learning_rate": 7.186030150753769e-05, - "loss": 5.2898, - "step": 28508 - }, - { - "epoch": 14.867796610169492, - "grad_norm": 1.4257410764694214, - "learning_rate": 7.185929648241206e-05, - "loss": 5.29, - "step": 28509 - }, - { - "epoch": 14.868318122555412, - "grad_norm": 1.3984729051589966, - "learning_rate": 7.185829145728644e-05, - "loss": 5.48, - "step": 28510 - }, - { - "epoch": 14.86883963494133, - "grad_norm": 1.4240092039108276, - "learning_rate": 7.185728643216081e-05, - "loss": 5.5168, - "step": 28511 - }, - { - "epoch": 14.869361147327249, - "grad_norm": 1.4490954875946045, - "learning_rate": 7.185628140703518e-05, - "loss": 5.3373, - "step": 28512 - }, - { - "epoch": 14.869882659713168, - "grad_norm": 1.47867751121521, - "learning_rate": 7.185527638190956e-05, - "loss": 5.6068, - "step": 28513 - }, - { - "epoch": 14.870404172099088, - "grad_norm": 1.4989025592803955, - "learning_rate": 7.185427135678392e-05, - "loss": 5.4276, - "step": 28514 - }, - { - "epoch": 14.870925684485007, - "grad_norm": 1.4092265367507935, - "learning_rate": 7.18532663316583e-05, - "loss": 5.4641, - "step": 28515 - }, - { - "epoch": 14.871447196870925, - "grad_norm": 1.4016475677490234, - "learning_rate": 7.185226130653266e-05, - "loss": 5.6234, - "step": 28516 - }, - { - "epoch": 14.871968709256844, - "grad_norm": 1.4328488111495972, - "learning_rate": 7.185125628140704e-05, - "loss": 5.249, - "step": 28517 - }, - { - "epoch": 14.872490221642764, - "grad_norm": 1.4412909746170044, - "learning_rate": 7.18502512562814e-05, - "loss": 5.411, - "step": 28518 - }, - { - "epoch": 14.873011734028683, - "grad_norm": 1.504004955291748, - "learning_rate": 7.184924623115578e-05, - "loss": 5.3801, - "step": 28519 - }, - { - "epoch": 14.873533246414603, - "grad_norm": 1.4553966522216797, - "learning_rate": 7.184824120603016e-05, - "loss": 5.4607, - "step": 28520 - }, - { - "epoch": 14.874054758800522, - "grad_norm": 1.4020143747329712, - "learning_rate": 7.184723618090454e-05, - "loss": 4.9835, - "step": 28521 - }, - { - "epoch": 14.874576271186442, - "grad_norm": 1.434863567352295, - "learning_rate": 7.18462311557789e-05, - "loss": 5.3752, - "step": 28522 - }, - { - "epoch": 14.87509778357236, - "grad_norm": 1.5193005800247192, - "learning_rate": 7.184522613065327e-05, - "loss": 5.4591, - "step": 28523 - }, - { - "epoch": 14.875619295958279, - "grad_norm": 1.5169168710708618, - "learning_rate": 7.184422110552764e-05, - "loss": 5.2539, - "step": 28524 - }, - { - "epoch": 14.876140808344198, - "grad_norm": 1.5206594467163086, - "learning_rate": 7.184321608040201e-05, - "loss": 5.2334, - "step": 28525 - }, - { - "epoch": 14.876662320730118, - "grad_norm": 1.441630482673645, - "learning_rate": 7.184221105527639e-05, - "loss": 5.7879, - "step": 28526 - }, - { - "epoch": 14.877183833116037, - "grad_norm": 1.5630656480789185, - "learning_rate": 7.184120603015075e-05, - "loss": 5.464, - "step": 28527 - }, - { - "epoch": 14.877705345501955, - "grad_norm": 1.582668423652649, - "learning_rate": 7.184020100502513e-05, - "loss": 5.5892, - "step": 28528 - }, - { - "epoch": 14.878226857887874, - "grad_norm": 1.4438753128051758, - "learning_rate": 7.183919597989949e-05, - "loss": 5.4492, - "step": 28529 - }, - { - "epoch": 14.878748370273794, - "grad_norm": 1.4977989196777344, - "learning_rate": 7.183819095477387e-05, - "loss": 4.9181, - "step": 28530 - }, - { - "epoch": 14.879269882659713, - "grad_norm": 1.6182748079299927, - "learning_rate": 7.183718592964825e-05, - "loss": 5.1795, - "step": 28531 - }, - { - "epoch": 14.879791395045633, - "grad_norm": 1.3695989847183228, - "learning_rate": 7.183618090452263e-05, - "loss": 5.7331, - "step": 28532 - }, - { - "epoch": 14.880312907431552, - "grad_norm": 1.4448599815368652, - "learning_rate": 7.183517587939699e-05, - "loss": 5.0581, - "step": 28533 - }, - { - "epoch": 14.880834419817472, - "grad_norm": 1.42927086353302, - "learning_rate": 7.183417085427137e-05, - "loss": 5.0363, - "step": 28534 - }, - { - "epoch": 14.88135593220339, - "grad_norm": 1.4527174234390259, - "learning_rate": 7.183316582914573e-05, - "loss": 5.4772, - "step": 28535 - }, - { - "epoch": 14.881877444589309, - "grad_norm": 1.4350348711013794, - "learning_rate": 7.18321608040201e-05, - "loss": 5.7193, - "step": 28536 - }, - { - "epoch": 14.882398956975228, - "grad_norm": 1.442254662513733, - "learning_rate": 7.183115577889447e-05, - "loss": 5.6013, - "step": 28537 - }, - { - "epoch": 14.882920469361148, - "grad_norm": 1.4083036184310913, - "learning_rate": 7.183015075376884e-05, - "loss": 5.5739, - "step": 28538 - }, - { - "epoch": 14.883441981747067, - "grad_norm": 1.5321266651153564, - "learning_rate": 7.182914572864322e-05, - "loss": 4.7514, - "step": 28539 - }, - { - "epoch": 14.883963494132985, - "grad_norm": 1.414580225944519, - "learning_rate": 7.18281407035176e-05, - "loss": 4.9258, - "step": 28540 - }, - { - "epoch": 14.884485006518904, - "grad_norm": 1.4018856287002563, - "learning_rate": 7.182713567839197e-05, - "loss": 5.6742, - "step": 28541 - }, - { - "epoch": 14.885006518904824, - "grad_norm": 1.4193462133407593, - "learning_rate": 7.182613065326634e-05, - "loss": 5.8229, - "step": 28542 - }, - { - "epoch": 14.885528031290743, - "grad_norm": 1.4967334270477295, - "learning_rate": 7.182512562814071e-05, - "loss": 5.4188, - "step": 28543 - }, - { - "epoch": 14.886049543676663, - "grad_norm": 1.5790925025939941, - "learning_rate": 7.182412060301508e-05, - "loss": 5.3826, - "step": 28544 - }, - { - "epoch": 14.886571056062582, - "grad_norm": 1.3971916437149048, - "learning_rate": 7.182311557788946e-05, - "loss": 5.7249, - "step": 28545 - }, - { - "epoch": 14.887092568448502, - "grad_norm": 1.434005618095398, - "learning_rate": 7.182211055276382e-05, - "loss": 5.399, - "step": 28546 - }, - { - "epoch": 14.88761408083442, - "grad_norm": 1.4434638023376465, - "learning_rate": 7.18211055276382e-05, - "loss": 5.0261, - "step": 28547 - }, - { - "epoch": 14.888135593220339, - "grad_norm": 1.5755486488342285, - "learning_rate": 7.182010050251256e-05, - "loss": 5.0893, - "step": 28548 - }, - { - "epoch": 14.888657105606258, - "grad_norm": 1.4088103771209717, - "learning_rate": 7.181909547738693e-05, - "loss": 5.5614, - "step": 28549 - }, - { - "epoch": 14.889178617992178, - "grad_norm": 1.3984018564224243, - "learning_rate": 7.18180904522613e-05, - "loss": 5.7938, - "step": 28550 - }, - { - "epoch": 14.889700130378097, - "grad_norm": 1.454939365386963, - "learning_rate": 7.181708542713568e-05, - "loss": 5.6006, - "step": 28551 - }, - { - "epoch": 14.890221642764015, - "grad_norm": 1.5137314796447754, - "learning_rate": 7.181608040201006e-05, - "loss": 5.4549, - "step": 28552 - }, - { - "epoch": 14.890743155149934, - "grad_norm": 1.3212642669677734, - "learning_rate": 7.181507537688442e-05, - "loss": 5.8129, - "step": 28553 - }, - { - "epoch": 14.891264667535854, - "grad_norm": 1.4778705835342407, - "learning_rate": 7.18140703517588e-05, - "loss": 5.1765, - "step": 28554 - }, - { - "epoch": 14.891786179921773, - "grad_norm": 1.452956199645996, - "learning_rate": 7.181306532663317e-05, - "loss": 5.8042, - "step": 28555 - }, - { - "epoch": 14.892307692307693, - "grad_norm": 1.4149595499038696, - "learning_rate": 7.181206030150754e-05, - "loss": 5.391, - "step": 28556 - }, - { - "epoch": 14.892829204693612, - "grad_norm": 1.4447195529937744, - "learning_rate": 7.181105527638191e-05, - "loss": 5.4602, - "step": 28557 - }, - { - "epoch": 14.89335071707953, - "grad_norm": 1.4163416624069214, - "learning_rate": 7.181005025125629e-05, - "loss": 5.6654, - "step": 28558 - }, - { - "epoch": 14.89387222946545, - "grad_norm": 1.42918860912323, - "learning_rate": 7.180904522613065e-05, - "loss": 5.4031, - "step": 28559 - }, - { - "epoch": 14.894393741851369, - "grad_norm": 1.4167267084121704, - "learning_rate": 7.180804020100503e-05, - "loss": 5.3974, - "step": 28560 - }, - { - "epoch": 14.894915254237288, - "grad_norm": 1.4058295488357544, - "learning_rate": 7.18070351758794e-05, - "loss": 5.596, - "step": 28561 - }, - { - "epoch": 14.895436766623208, - "grad_norm": 1.3447445631027222, - "learning_rate": 7.180603015075377e-05, - "loss": 5.7504, - "step": 28562 - }, - { - "epoch": 14.895958279009127, - "grad_norm": 1.4927042722702026, - "learning_rate": 7.180502512562815e-05, - "loss": 5.4397, - "step": 28563 - }, - { - "epoch": 14.896479791395045, - "grad_norm": 1.430709958076477, - "learning_rate": 7.180402010050251e-05, - "loss": 5.7546, - "step": 28564 - }, - { - "epoch": 14.897001303780964, - "grad_norm": 1.4624061584472656, - "learning_rate": 7.180301507537689e-05, - "loss": 5.0466, - "step": 28565 - }, - { - "epoch": 14.897522816166884, - "grad_norm": 1.4894922971725464, - "learning_rate": 7.180201005025125e-05, - "loss": 5.3254, - "step": 28566 - }, - { - "epoch": 14.898044328552803, - "grad_norm": 1.4027808904647827, - "learning_rate": 7.180100502512563e-05, - "loss": 5.3938, - "step": 28567 - }, - { - "epoch": 14.898565840938723, - "grad_norm": 1.4182857275009155, - "learning_rate": 7.18e-05, - "loss": 5.4782, - "step": 28568 - }, - { - "epoch": 14.899087353324642, - "grad_norm": 1.488807201385498, - "learning_rate": 7.179899497487437e-05, - "loss": 5.5033, - "step": 28569 - }, - { - "epoch": 14.89960886571056, - "grad_norm": 1.5593899488449097, - "learning_rate": 7.179798994974874e-05, - "loss": 4.9237, - "step": 28570 - }, - { - "epoch": 14.90013037809648, - "grad_norm": 1.4154565334320068, - "learning_rate": 7.179698492462312e-05, - "loss": 5.5958, - "step": 28571 - }, - { - "epoch": 14.900651890482399, - "grad_norm": 1.507839560508728, - "learning_rate": 7.17959798994975e-05, - "loss": 5.2761, - "step": 28572 - }, - { - "epoch": 14.901173402868318, - "grad_norm": 1.470206379890442, - "learning_rate": 7.179497487437187e-05, - "loss": 5.0811, - "step": 28573 - }, - { - "epoch": 14.901694915254238, - "grad_norm": 1.3512649536132812, - "learning_rate": 7.179396984924624e-05, - "loss": 5.6868, - "step": 28574 - }, - { - "epoch": 14.902216427640157, - "grad_norm": 1.4668861627578735, - "learning_rate": 7.17929648241206e-05, - "loss": 5.4509, - "step": 28575 - }, - { - "epoch": 14.902737940026075, - "grad_norm": 1.5129486322402954, - "learning_rate": 7.179195979899498e-05, - "loss": 5.3588, - "step": 28576 - }, - { - "epoch": 14.903259452411994, - "grad_norm": 1.4918949604034424, - "learning_rate": 7.179095477386934e-05, - "loss": 5.6984, - "step": 28577 - }, - { - "epoch": 14.903780964797914, - "grad_norm": 1.4602915048599243, - "learning_rate": 7.178994974874372e-05, - "loss": 5.6435, - "step": 28578 - }, - { - "epoch": 14.904302477183833, - "grad_norm": 1.3953052759170532, - "learning_rate": 7.178894472361809e-05, - "loss": 5.3959, - "step": 28579 - }, - { - "epoch": 14.904823989569753, - "grad_norm": 1.4091367721557617, - "learning_rate": 7.178793969849246e-05, - "loss": 5.2371, - "step": 28580 - }, - { - "epoch": 14.905345501955672, - "grad_norm": 1.455794334411621, - "learning_rate": 7.178693467336684e-05, - "loss": 5.1858, - "step": 28581 - }, - { - "epoch": 14.90586701434159, - "grad_norm": 1.5143053531646729, - "learning_rate": 7.178592964824122e-05, - "loss": 5.2005, - "step": 28582 - }, - { - "epoch": 14.90638852672751, - "grad_norm": 1.5901391506195068, - "learning_rate": 7.178492462311558e-05, - "loss": 5.3154, - "step": 28583 - }, - { - "epoch": 14.906910039113429, - "grad_norm": 1.6330052614212036, - "learning_rate": 7.178391959798996e-05, - "loss": 4.9081, - "step": 28584 - }, - { - "epoch": 14.907431551499348, - "grad_norm": 1.4562150239944458, - "learning_rate": 7.178291457286433e-05, - "loss": 4.9312, - "step": 28585 - }, - { - "epoch": 14.907953063885268, - "grad_norm": 1.4999668598175049, - "learning_rate": 7.17819095477387e-05, - "loss": 5.5974, - "step": 28586 - }, - { - "epoch": 14.908474576271187, - "grad_norm": 1.6403470039367676, - "learning_rate": 7.178090452261307e-05, - "loss": 5.606, - "step": 28587 - }, - { - "epoch": 14.908996088657105, - "grad_norm": 1.4517935514450073, - "learning_rate": 7.177989949748745e-05, - "loss": 5.2713, - "step": 28588 - }, - { - "epoch": 14.909517601043024, - "grad_norm": 1.402818202972412, - "learning_rate": 7.177889447236181e-05, - "loss": 5.7872, - "step": 28589 - }, - { - "epoch": 14.910039113428944, - "grad_norm": 1.4280301332473755, - "learning_rate": 7.177788944723617e-05, - "loss": 5.2489, - "step": 28590 - }, - { - "epoch": 14.910560625814863, - "grad_norm": 1.4343523979187012, - "learning_rate": 7.177688442211055e-05, - "loss": 5.6438, - "step": 28591 - }, - { - "epoch": 14.911082138200783, - "grad_norm": 1.4379621744155884, - "learning_rate": 7.177587939698493e-05, - "loss": 5.5759, - "step": 28592 - }, - { - "epoch": 14.911603650586702, - "grad_norm": 1.4925847053527832, - "learning_rate": 7.177487437185931e-05, - "loss": 4.8752, - "step": 28593 - }, - { - "epoch": 14.91212516297262, - "grad_norm": 1.5479967594146729, - "learning_rate": 7.177386934673367e-05, - "loss": 5.2418, - "step": 28594 - }, - { - "epoch": 14.91264667535854, - "grad_norm": 1.5298296213150024, - "learning_rate": 7.177286432160805e-05, - "loss": 4.9118, - "step": 28595 - }, - { - "epoch": 14.913168187744459, - "grad_norm": 1.497225046157837, - "learning_rate": 7.177185929648241e-05, - "loss": 5.2403, - "step": 28596 - }, - { - "epoch": 14.913689700130378, - "grad_norm": 1.5244134664535522, - "learning_rate": 7.177085427135679e-05, - "loss": 5.1606, - "step": 28597 - }, - { - "epoch": 14.914211212516298, - "grad_norm": 1.4511218070983887, - "learning_rate": 7.176984924623116e-05, - "loss": 5.7562, - "step": 28598 - }, - { - "epoch": 14.914732724902217, - "grad_norm": 1.4166908264160156, - "learning_rate": 7.176884422110553e-05, - "loss": 4.9948, - "step": 28599 - }, - { - "epoch": 14.915254237288135, - "grad_norm": 1.6513055562973022, - "learning_rate": 7.17678391959799e-05, - "loss": 5.1704, - "step": 28600 - }, - { - "epoch": 14.915775749674054, - "grad_norm": 1.4948807954788208, - "learning_rate": 7.176683417085428e-05, - "loss": 5.5428, - "step": 28601 - }, - { - "epoch": 14.916297262059974, - "grad_norm": 1.4504224061965942, - "learning_rate": 7.176582914572865e-05, - "loss": 5.2379, - "step": 28602 - }, - { - "epoch": 14.916818774445893, - "grad_norm": 1.4944734573364258, - "learning_rate": 7.176482412060302e-05, - "loss": 5.191, - "step": 28603 - }, - { - "epoch": 14.917340286831813, - "grad_norm": 1.4826029539108276, - "learning_rate": 7.17638190954774e-05, - "loss": 5.7172, - "step": 28604 - }, - { - "epoch": 14.917861799217732, - "grad_norm": 1.5684783458709717, - "learning_rate": 7.176281407035176e-05, - "loss": 5.448, - "step": 28605 - }, - { - "epoch": 14.91838331160365, - "grad_norm": 1.5861703157424927, - "learning_rate": 7.176180904522614e-05, - "loss": 4.3413, - "step": 28606 - }, - { - "epoch": 14.91890482398957, - "grad_norm": 1.4855420589447021, - "learning_rate": 7.17608040201005e-05, - "loss": 4.8423, - "step": 28607 - }, - { - "epoch": 14.919426336375489, - "grad_norm": 1.4785687923431396, - "learning_rate": 7.175979899497488e-05, - "loss": 5.3938, - "step": 28608 - }, - { - "epoch": 14.919947848761408, - "grad_norm": 1.5513410568237305, - "learning_rate": 7.175879396984924e-05, - "loss": 5.4591, - "step": 28609 - }, - { - "epoch": 14.920469361147328, - "grad_norm": 1.4795007705688477, - "learning_rate": 7.175778894472362e-05, - "loss": 5.3527, - "step": 28610 - }, - { - "epoch": 14.920990873533245, - "grad_norm": 1.4792543649673462, - "learning_rate": 7.175678391959799e-05, - "loss": 5.0938, - "step": 28611 - }, - { - "epoch": 14.921512385919165, - "grad_norm": 1.3774155378341675, - "learning_rate": 7.175577889447236e-05, - "loss": 5.8174, - "step": 28612 - }, - { - "epoch": 14.922033898305084, - "grad_norm": 1.481491208076477, - "learning_rate": 7.175477386934674e-05, - "loss": 5.4991, - "step": 28613 - }, - { - "epoch": 14.922555410691004, - "grad_norm": 1.5664291381835938, - "learning_rate": 7.175376884422112e-05, - "loss": 4.9276, - "step": 28614 - }, - { - "epoch": 14.923076923076923, - "grad_norm": 1.4393399953842163, - "learning_rate": 7.175276381909548e-05, - "loss": 5.4528, - "step": 28615 - }, - { - "epoch": 14.923598435462843, - "grad_norm": 1.5230170488357544, - "learning_rate": 7.175175879396985e-05, - "loss": 5.551, - "step": 28616 - }, - { - "epoch": 14.924119947848762, - "grad_norm": 1.597256064414978, - "learning_rate": 7.175075376884423e-05, - "loss": 4.686, - "step": 28617 - }, - { - "epoch": 14.92464146023468, - "grad_norm": 1.4927494525909424, - "learning_rate": 7.174974874371859e-05, - "loss": 5.5701, - "step": 28618 - }, - { - "epoch": 14.9251629726206, - "grad_norm": 1.5902197360992432, - "learning_rate": 7.174874371859297e-05, - "loss": 5.5252, - "step": 28619 - }, - { - "epoch": 14.925684485006519, - "grad_norm": 1.4441150426864624, - "learning_rate": 7.174773869346733e-05, - "loss": 5.2231, - "step": 28620 - }, - { - "epoch": 14.926205997392438, - "grad_norm": 1.3923417329788208, - "learning_rate": 7.174673366834171e-05, - "loss": 5.5077, - "step": 28621 - }, - { - "epoch": 14.926727509778358, - "grad_norm": 1.5291404724121094, - "learning_rate": 7.174572864321609e-05, - "loss": 5.4399, - "step": 28622 - }, - { - "epoch": 14.927249022164276, - "grad_norm": 1.4962871074676514, - "learning_rate": 7.174472361809047e-05, - "loss": 5.5526, - "step": 28623 - }, - { - "epoch": 14.927770534550195, - "grad_norm": 1.5145909786224365, - "learning_rate": 7.174371859296483e-05, - "loss": 5.5887, - "step": 28624 - }, - { - "epoch": 14.928292046936114, - "grad_norm": 1.3450857400894165, - "learning_rate": 7.174271356783921e-05, - "loss": 5.7805, - "step": 28625 - }, - { - "epoch": 14.928813559322034, - "grad_norm": 1.3729137182235718, - "learning_rate": 7.174170854271357e-05, - "loss": 5.6355, - "step": 28626 - }, - { - "epoch": 14.929335071707953, - "grad_norm": 1.4083070755004883, - "learning_rate": 7.174070351758795e-05, - "loss": 5.5669, - "step": 28627 - }, - { - "epoch": 14.929856584093873, - "grad_norm": 1.5167216062545776, - "learning_rate": 7.173969849246231e-05, - "loss": 5.4578, - "step": 28628 - }, - { - "epoch": 14.930378096479792, - "grad_norm": 1.4094116687774658, - "learning_rate": 7.173869346733668e-05, - "loss": 5.3137, - "step": 28629 - }, - { - "epoch": 14.93089960886571, - "grad_norm": 1.4761216640472412, - "learning_rate": 7.173768844221106e-05, - "loss": 5.1205, - "step": 28630 - }, - { - "epoch": 14.93142112125163, - "grad_norm": 1.552886962890625, - "learning_rate": 7.173668341708542e-05, - "loss": 4.7728, - "step": 28631 - }, - { - "epoch": 14.931942633637549, - "grad_norm": 1.547668695449829, - "learning_rate": 7.17356783919598e-05, - "loss": 5.3969, - "step": 28632 - }, - { - "epoch": 14.932464146023468, - "grad_norm": 1.5625660419464111, - "learning_rate": 7.173467336683418e-05, - "loss": 5.1843, - "step": 28633 - }, - { - "epoch": 14.932985658409388, - "grad_norm": 1.4544841051101685, - "learning_rate": 7.173366834170855e-05, - "loss": 5.5686, - "step": 28634 - }, - { - "epoch": 14.933507170795306, - "grad_norm": 1.4401129484176636, - "learning_rate": 7.173266331658292e-05, - "loss": 5.1295, - "step": 28635 - }, - { - "epoch": 14.934028683181225, - "grad_norm": 1.4355723857879639, - "learning_rate": 7.17316582914573e-05, - "loss": 5.6651, - "step": 28636 - }, - { - "epoch": 14.934550195567144, - "grad_norm": 1.485883116722107, - "learning_rate": 7.173065326633166e-05, - "loss": 5.2165, - "step": 28637 - }, - { - "epoch": 14.935071707953064, - "grad_norm": 1.425299048423767, - "learning_rate": 7.172964824120604e-05, - "loss": 5.6276, - "step": 28638 - }, - { - "epoch": 14.935593220338983, - "grad_norm": 1.4804247617721558, - "learning_rate": 7.17286432160804e-05, - "loss": 4.9658, - "step": 28639 - }, - { - "epoch": 14.936114732724903, - "grad_norm": 1.492874026298523, - "learning_rate": 7.172763819095478e-05, - "loss": 5.4473, - "step": 28640 - }, - { - "epoch": 14.93663624511082, - "grad_norm": 1.4532079696655273, - "learning_rate": 7.172663316582914e-05, - "loss": 5.6644, - "step": 28641 - }, - { - "epoch": 14.93715775749674, - "grad_norm": 1.604250431060791, - "learning_rate": 7.172562814070352e-05, - "loss": 4.8808, - "step": 28642 - }, - { - "epoch": 14.93767926988266, - "grad_norm": 1.4377472400665283, - "learning_rate": 7.17246231155779e-05, - "loss": 5.6787, - "step": 28643 - }, - { - "epoch": 14.938200782268579, - "grad_norm": 1.4968500137329102, - "learning_rate": 7.172361809045226e-05, - "loss": 5.2968, - "step": 28644 - }, - { - "epoch": 14.938722294654498, - "grad_norm": 1.5776867866516113, - "learning_rate": 7.172261306532664e-05, - "loss": 5.0384, - "step": 28645 - }, - { - "epoch": 14.939243807040418, - "grad_norm": 1.498242974281311, - "learning_rate": 7.1721608040201e-05, - "loss": 5.7009, - "step": 28646 - }, - { - "epoch": 14.939765319426336, - "grad_norm": 1.4323538541793823, - "learning_rate": 7.172060301507538e-05, - "loss": 5.5764, - "step": 28647 - }, - { - "epoch": 14.940286831812255, - "grad_norm": 1.532089114189148, - "learning_rate": 7.171959798994975e-05, - "loss": 5.2511, - "step": 28648 - }, - { - "epoch": 14.940808344198174, - "grad_norm": 1.5790698528289795, - "learning_rate": 7.171859296482413e-05, - "loss": 5.3183, - "step": 28649 - }, - { - "epoch": 14.941329856584094, - "grad_norm": 1.5105369091033936, - "learning_rate": 7.171758793969849e-05, - "loss": 4.8529, - "step": 28650 - }, - { - "epoch": 14.941851368970013, - "grad_norm": 1.4573839902877808, - "learning_rate": 7.171658291457287e-05, - "loss": 4.4762, - "step": 28651 - }, - { - "epoch": 14.942372881355933, - "grad_norm": 1.3724313974380493, - "learning_rate": 7.171557788944723e-05, - "loss": 5.4987, - "step": 28652 - }, - { - "epoch": 14.94289439374185, - "grad_norm": 1.438246488571167, - "learning_rate": 7.171457286432161e-05, - "loss": 5.8217, - "step": 28653 - }, - { - "epoch": 14.94341590612777, - "grad_norm": 1.5645272731781006, - "learning_rate": 7.171356783919599e-05, - "loss": 4.77, - "step": 28654 - }, - { - "epoch": 14.94393741851369, - "grad_norm": 1.6361523866653442, - "learning_rate": 7.171256281407035e-05, - "loss": 4.5458, - "step": 28655 - }, - { - "epoch": 14.944458930899609, - "grad_norm": 1.441245436668396, - "learning_rate": 7.171155778894473e-05, - "loss": 5.2776, - "step": 28656 - }, - { - "epoch": 14.944980443285528, - "grad_norm": 1.4473341703414917, - "learning_rate": 7.17105527638191e-05, - "loss": 5.0768, - "step": 28657 - }, - { - "epoch": 14.945501955671448, - "grad_norm": 1.4376143217086792, - "learning_rate": 7.170954773869347e-05, - "loss": 5.4546, - "step": 28658 - }, - { - "epoch": 14.946023468057366, - "grad_norm": 1.449034571647644, - "learning_rate": 7.170854271356784e-05, - "loss": 4.9609, - "step": 28659 - }, - { - "epoch": 14.946544980443285, - "grad_norm": 1.3720182180404663, - "learning_rate": 7.170753768844222e-05, - "loss": 5.8586, - "step": 28660 - }, - { - "epoch": 14.947066492829205, - "grad_norm": 1.3695019483566284, - "learning_rate": 7.170653266331658e-05, - "loss": 5.4249, - "step": 28661 - }, - { - "epoch": 14.947588005215124, - "grad_norm": 1.4081997871398926, - "learning_rate": 7.170552763819096e-05, - "loss": 5.7932, - "step": 28662 - }, - { - "epoch": 14.948109517601043, - "grad_norm": 1.3486179113388062, - "learning_rate": 7.170452261306532e-05, - "loss": 5.7482, - "step": 28663 - }, - { - "epoch": 14.948631029986963, - "grad_norm": 1.4321311712265015, - "learning_rate": 7.17035175879397e-05, - "loss": 5.247, - "step": 28664 - }, - { - "epoch": 14.94915254237288, - "grad_norm": 1.3675172328948975, - "learning_rate": 7.170251256281408e-05, - "loss": 5.5435, - "step": 28665 - }, - { - "epoch": 14.9496740547588, - "grad_norm": 1.5517513751983643, - "learning_rate": 7.170150753768845e-05, - "loss": 5.5078, - "step": 28666 - }, - { - "epoch": 14.95019556714472, - "grad_norm": 1.4665303230285645, - "learning_rate": 7.170050251256282e-05, - "loss": 5.1849, - "step": 28667 - }, - { - "epoch": 14.950717079530639, - "grad_norm": 1.5832356214523315, - "learning_rate": 7.169949748743718e-05, - "loss": 5.2357, - "step": 28668 - }, - { - "epoch": 14.951238591916558, - "grad_norm": 1.4969896078109741, - "learning_rate": 7.169849246231156e-05, - "loss": 4.9454, - "step": 28669 - }, - { - "epoch": 14.951760104302478, - "grad_norm": 1.4712553024291992, - "learning_rate": 7.169748743718593e-05, - "loss": 5.358, - "step": 28670 - }, - { - "epoch": 14.952281616688396, - "grad_norm": 1.4642703533172607, - "learning_rate": 7.16964824120603e-05, - "loss": 5.4412, - "step": 28671 - }, - { - "epoch": 14.952803129074315, - "grad_norm": 1.5053106546401978, - "learning_rate": 7.169547738693467e-05, - "loss": 5.6361, - "step": 28672 - }, - { - "epoch": 14.953324641460235, - "grad_norm": 1.5268771648406982, - "learning_rate": 7.169447236180905e-05, - "loss": 5.1085, - "step": 28673 - }, - { - "epoch": 14.953846153846154, - "grad_norm": 1.498722791671753, - "learning_rate": 7.169346733668342e-05, - "loss": 4.9676, - "step": 28674 - }, - { - "epoch": 14.954367666232073, - "grad_norm": 1.4016809463500977, - "learning_rate": 7.16924623115578e-05, - "loss": 5.4918, - "step": 28675 - }, - { - "epoch": 14.954889178617993, - "grad_norm": 1.513654112815857, - "learning_rate": 7.169145728643217e-05, - "loss": 4.7108, - "step": 28676 - }, - { - "epoch": 14.95541069100391, - "grad_norm": 1.5641177892684937, - "learning_rate": 7.169045226130654e-05, - "loss": 4.6352, - "step": 28677 - }, - { - "epoch": 14.95593220338983, - "grad_norm": 1.4486216306686401, - "learning_rate": 7.168944723618091e-05, - "loss": 5.468, - "step": 28678 - }, - { - "epoch": 14.95645371577575, - "grad_norm": 1.4731370210647583, - "learning_rate": 7.168844221105529e-05, - "loss": 5.5751, - "step": 28679 - }, - { - "epoch": 14.956975228161669, - "grad_norm": 1.4224133491516113, - "learning_rate": 7.168743718592965e-05, - "loss": 5.6919, - "step": 28680 - }, - { - "epoch": 14.957496740547588, - "grad_norm": 1.4449156522750854, - "learning_rate": 7.168643216080403e-05, - "loss": 5.4323, - "step": 28681 - }, - { - "epoch": 14.958018252933508, - "grad_norm": 1.4401071071624756, - "learning_rate": 7.168542713567839e-05, - "loss": 5.4474, - "step": 28682 - }, - { - "epoch": 14.958539765319426, - "grad_norm": 1.4329952001571655, - "learning_rate": 7.168442211055276e-05, - "loss": 5.2515, - "step": 28683 - }, - { - "epoch": 14.959061277705345, - "grad_norm": 1.3728079795837402, - "learning_rate": 7.168341708542713e-05, - "loss": 5.3729, - "step": 28684 - }, - { - "epoch": 14.959582790091265, - "grad_norm": 1.388664722442627, - "learning_rate": 7.168241206030151e-05, - "loss": 5.4933, - "step": 28685 - }, - { - "epoch": 14.960104302477184, - "grad_norm": 1.4649243354797363, - "learning_rate": 7.168140703517589e-05, - "loss": 5.1329, - "step": 28686 - }, - { - "epoch": 14.960625814863103, - "grad_norm": 1.4373661279678345, - "learning_rate": 7.168040201005025e-05, - "loss": 5.4094, - "step": 28687 - }, - { - "epoch": 14.961147327249023, - "grad_norm": 1.4167628288269043, - "learning_rate": 7.167939698492463e-05, - "loss": 5.6761, - "step": 28688 - }, - { - "epoch": 14.96166883963494, - "grad_norm": 1.5517792701721191, - "learning_rate": 7.1678391959799e-05, - "loss": 5.111, - "step": 28689 - }, - { - "epoch": 14.96219035202086, - "grad_norm": 1.5337998867034912, - "learning_rate": 7.167738693467337e-05, - "loss": 5.1616, - "step": 28690 - }, - { - "epoch": 14.96271186440678, - "grad_norm": 1.4408414363861084, - "learning_rate": 7.167638190954774e-05, - "loss": 5.3281, - "step": 28691 - }, - { - "epoch": 14.963233376792699, - "grad_norm": 1.3482372760772705, - "learning_rate": 7.167537688442212e-05, - "loss": 5.6694, - "step": 28692 - }, - { - "epoch": 14.963754889178619, - "grad_norm": 1.4203884601593018, - "learning_rate": 7.167437185929648e-05, - "loss": 5.2107, - "step": 28693 - }, - { - "epoch": 14.964276401564538, - "grad_norm": 1.4743424654006958, - "learning_rate": 7.167336683417086e-05, - "loss": 5.434, - "step": 28694 - }, - { - "epoch": 14.964797913950456, - "grad_norm": 1.4434056282043457, - "learning_rate": 7.167236180904524e-05, - "loss": 5.7652, - "step": 28695 - }, - { - "epoch": 14.965319426336375, - "grad_norm": 1.4742836952209473, - "learning_rate": 7.16713567839196e-05, - "loss": 5.5115, - "step": 28696 - }, - { - "epoch": 14.965840938722295, - "grad_norm": 1.4921780824661255, - "learning_rate": 7.167035175879398e-05, - "loss": 5.6294, - "step": 28697 - }, - { - "epoch": 14.966362451108214, - "grad_norm": 1.3196179866790771, - "learning_rate": 7.166934673366834e-05, - "loss": 5.7158, - "step": 28698 - }, - { - "epoch": 14.966883963494134, - "grad_norm": 1.3442649841308594, - "learning_rate": 7.166834170854272e-05, - "loss": 4.7841, - "step": 28699 - }, - { - "epoch": 14.967405475880053, - "grad_norm": 1.3880915641784668, - "learning_rate": 7.166733668341708e-05, - "loss": 5.2986, - "step": 28700 - }, - { - "epoch": 14.96792698826597, - "grad_norm": 1.3242672681808472, - "learning_rate": 7.166633165829146e-05, - "loss": 5.4306, - "step": 28701 - }, - { - "epoch": 14.96844850065189, - "grad_norm": 1.410508155822754, - "learning_rate": 7.166532663316583e-05, - "loss": 5.4946, - "step": 28702 - }, - { - "epoch": 14.96897001303781, - "grad_norm": 1.3651875257492065, - "learning_rate": 7.16643216080402e-05, - "loss": 5.7432, - "step": 28703 - }, - { - "epoch": 14.969491525423729, - "grad_norm": 1.4595333337783813, - "learning_rate": 7.166331658291457e-05, - "loss": 5.4658, - "step": 28704 - }, - { - "epoch": 14.970013037809649, - "grad_norm": 1.3509241342544556, - "learning_rate": 7.166231155778895e-05, - "loss": 5.1201, - "step": 28705 - }, - { - "epoch": 14.970534550195566, - "grad_norm": 1.6749680042266846, - "learning_rate": 7.166130653266332e-05, - "loss": 5.6887, - "step": 28706 - }, - { - "epoch": 14.971056062581486, - "grad_norm": 1.3925038576126099, - "learning_rate": 7.16603015075377e-05, - "loss": 5.3005, - "step": 28707 - }, - { - "epoch": 14.971577574967405, - "grad_norm": 1.4161747694015503, - "learning_rate": 7.165929648241207e-05, - "loss": 5.6963, - "step": 28708 - }, - { - "epoch": 14.972099087353325, - "grad_norm": 1.4100614786148071, - "learning_rate": 7.165829145728643e-05, - "loss": 5.5372, - "step": 28709 - }, - { - "epoch": 14.972620599739244, - "grad_norm": 1.4832944869995117, - "learning_rate": 7.165728643216081e-05, - "loss": 5.4686, - "step": 28710 - }, - { - "epoch": 14.973142112125164, - "grad_norm": 1.4336020946502686, - "learning_rate": 7.165628140703517e-05, - "loss": 5.741, - "step": 28711 - }, - { - "epoch": 14.973663624511083, - "grad_norm": 1.4400634765625, - "learning_rate": 7.165527638190955e-05, - "loss": 5.5362, - "step": 28712 - }, - { - "epoch": 14.974185136897, - "grad_norm": 1.369765281677246, - "learning_rate": 7.165427135678391e-05, - "loss": 5.3163, - "step": 28713 - }, - { - "epoch": 14.97470664928292, - "grad_norm": 1.5993236303329468, - "learning_rate": 7.165326633165829e-05, - "loss": 5.3853, - "step": 28714 - }, - { - "epoch": 14.97522816166884, - "grad_norm": 1.5911661386489868, - "learning_rate": 7.165226130653267e-05, - "loss": 5.0187, - "step": 28715 - }, - { - "epoch": 14.975749674054759, - "grad_norm": 1.5312795639038086, - "learning_rate": 7.165125628140705e-05, - "loss": 5.6851, - "step": 28716 - }, - { - "epoch": 14.976271186440679, - "grad_norm": 1.4324754476547241, - "learning_rate": 7.165025125628141e-05, - "loss": 5.635, - "step": 28717 - }, - { - "epoch": 14.976792698826596, - "grad_norm": 1.5472065210342407, - "learning_rate": 7.164924623115579e-05, - "loss": 5.2438, - "step": 28718 - }, - { - "epoch": 14.977314211212516, - "grad_norm": 1.403737187385559, - "learning_rate": 7.164824120603015e-05, - "loss": 5.7839, - "step": 28719 - }, - { - "epoch": 14.977835723598435, - "grad_norm": 1.4763922691345215, - "learning_rate": 7.164723618090453e-05, - "loss": 5.4063, - "step": 28720 - }, - { - "epoch": 14.978357235984355, - "grad_norm": 1.6391302347183228, - "learning_rate": 7.16462311557789e-05, - "loss": 4.8056, - "step": 28721 - }, - { - "epoch": 14.978878748370274, - "grad_norm": 1.4879814386367798, - "learning_rate": 7.164522613065326e-05, - "loss": 5.4352, - "step": 28722 - }, - { - "epoch": 14.979400260756194, - "grad_norm": 1.5302475690841675, - "learning_rate": 7.164422110552764e-05, - "loss": 4.8108, - "step": 28723 - }, - { - "epoch": 14.979921773142113, - "grad_norm": 1.5435317754745483, - "learning_rate": 7.1643216080402e-05, - "loss": 5.5326, - "step": 28724 - }, - { - "epoch": 14.98044328552803, - "grad_norm": 1.58011794090271, - "learning_rate": 7.164221105527638e-05, - "loss": 5.333, - "step": 28725 - }, - { - "epoch": 14.98096479791395, - "grad_norm": 1.5476481914520264, - "learning_rate": 7.164120603015076e-05, - "loss": 4.3638, - "step": 28726 - }, - { - "epoch": 14.98148631029987, - "grad_norm": 1.5259873867034912, - "learning_rate": 7.164020100502514e-05, - "loss": 5.6969, - "step": 28727 - }, - { - "epoch": 14.98200782268579, - "grad_norm": 1.4289370775222778, - "learning_rate": 7.16391959798995e-05, - "loss": 5.2811, - "step": 28728 - }, - { - "epoch": 14.982529335071709, - "grad_norm": 1.4673734903335571, - "learning_rate": 7.163819095477388e-05, - "loss": 5.4536, - "step": 28729 - }, - { - "epoch": 14.983050847457626, - "grad_norm": 1.4638323783874512, - "learning_rate": 7.163718592964824e-05, - "loss": 5.8664, - "step": 28730 - }, - { - "epoch": 14.983572359843546, - "grad_norm": 1.538002610206604, - "learning_rate": 7.163618090452262e-05, - "loss": 5.3419, - "step": 28731 - }, - { - "epoch": 14.984093872229465, - "grad_norm": 1.4747270345687866, - "learning_rate": 7.163517587939698e-05, - "loss": 4.7022, - "step": 28732 - }, - { - "epoch": 14.984615384615385, - "grad_norm": 1.5165743827819824, - "learning_rate": 7.163417085427136e-05, - "loss": 5.4074, - "step": 28733 - }, - { - "epoch": 14.985136897001304, - "grad_norm": 1.460039496421814, - "learning_rate": 7.163316582914573e-05, - "loss": 5.4383, - "step": 28734 - }, - { - "epoch": 14.985658409387224, - "grad_norm": 1.5856627225875854, - "learning_rate": 7.16321608040201e-05, - "loss": 5.2963, - "step": 28735 - }, - { - "epoch": 14.986179921773141, - "grad_norm": 1.442042589187622, - "learning_rate": 7.163115577889448e-05, - "loss": 5.4237, - "step": 28736 - }, - { - "epoch": 14.98670143415906, - "grad_norm": 1.4758368730545044, - "learning_rate": 7.163015075376885e-05, - "loss": 5.3933, - "step": 28737 - }, - { - "epoch": 14.98722294654498, - "grad_norm": 1.4830396175384521, - "learning_rate": 7.162914572864322e-05, - "loss": 5.3216, - "step": 28738 - }, - { - "epoch": 14.9877444589309, - "grad_norm": 1.3438105583190918, - "learning_rate": 7.162814070351759e-05, - "loss": 5.2257, - "step": 28739 - }, - { - "epoch": 14.98826597131682, - "grad_norm": 1.478973150253296, - "learning_rate": 7.162713567839197e-05, - "loss": 5.4409, - "step": 28740 - }, - { - "epoch": 14.988787483702739, - "grad_norm": 1.5200358629226685, - "learning_rate": 7.162613065326633e-05, - "loss": 5.2947, - "step": 28741 - }, - { - "epoch": 14.989308996088656, - "grad_norm": 1.534571886062622, - "learning_rate": 7.162512562814071e-05, - "loss": 5.2428, - "step": 28742 - }, - { - "epoch": 14.989830508474576, - "grad_norm": 1.499707579612732, - "learning_rate": 7.162412060301507e-05, - "loss": 5.501, - "step": 28743 - }, - { - "epoch": 14.990352020860495, - "grad_norm": 1.3675246238708496, - "learning_rate": 7.162311557788945e-05, - "loss": 5.8222, - "step": 28744 - }, - { - "epoch": 14.990873533246415, - "grad_norm": 1.430826187133789, - "learning_rate": 7.162211055276382e-05, - "loss": 5.4385, - "step": 28745 - }, - { - "epoch": 14.991395045632334, - "grad_norm": 1.5606554746627808, - "learning_rate": 7.16211055276382e-05, - "loss": 5.1306, - "step": 28746 - }, - { - "epoch": 14.991916558018254, - "grad_norm": 1.5068706274032593, - "learning_rate": 7.162010050251257e-05, - "loss": 5.0742, - "step": 28747 - }, - { - "epoch": 14.992438070404171, - "grad_norm": 1.600827932357788, - "learning_rate": 7.161909547738694e-05, - "loss": 5.6606, - "step": 28748 - }, - { - "epoch": 14.99295958279009, - "grad_norm": 1.5486794710159302, - "learning_rate": 7.161809045226131e-05, - "loss": 5.1085, - "step": 28749 - }, - { - "epoch": 14.99348109517601, - "grad_norm": 1.5057594776153564, - "learning_rate": 7.161708542713568e-05, - "loss": 5.1392, - "step": 28750 - }, - { - "epoch": 14.99400260756193, - "grad_norm": 1.4344435930252075, - "learning_rate": 7.161608040201006e-05, - "loss": 5.1967, - "step": 28751 - }, - { - "epoch": 14.99452411994785, - "grad_norm": 1.4849990606307983, - "learning_rate": 7.161507537688442e-05, - "loss": 5.6232, - "step": 28752 - }, - { - "epoch": 14.995045632333769, - "grad_norm": 1.313296914100647, - "learning_rate": 7.16140703517588e-05, - "loss": 5.5417, - "step": 28753 - }, - { - "epoch": 14.995567144719686, - "grad_norm": 1.447009563446045, - "learning_rate": 7.161306532663316e-05, - "loss": 4.8909, - "step": 28754 - }, - { - "epoch": 14.996088657105606, - "grad_norm": 1.429837942123413, - "learning_rate": 7.161206030150754e-05, - "loss": 5.1868, - "step": 28755 - }, - { - "epoch": 14.996610169491525, - "grad_norm": 1.432717204093933, - "learning_rate": 7.161105527638192e-05, - "loss": 5.465, - "step": 28756 - }, - { - "epoch": 14.997131681877445, - "grad_norm": 1.508255958557129, - "learning_rate": 7.16100502512563e-05, - "loss": 5.409, - "step": 28757 - }, - { - "epoch": 14.997653194263364, - "grad_norm": 1.331375002861023, - "learning_rate": 7.160904522613066e-05, - "loss": 5.7425, - "step": 28758 - }, - { - "epoch": 14.998174706649284, - "grad_norm": 1.39933180809021, - "learning_rate": 7.160804020100504e-05, - "loss": 5.716, - "step": 28759 - }, - { - "epoch": 14.998696219035201, - "grad_norm": 1.4322574138641357, - "learning_rate": 7.16070351758794e-05, - "loss": 5.3355, - "step": 28760 - }, - { - "epoch": 14.99921773142112, - "grad_norm": 1.4264367818832397, - "learning_rate": 7.160603015075377e-05, - "loss": 5.1726, - "step": 28761 - }, - { - "epoch": 14.99973924380704, - "grad_norm": 1.40431809425354, - "learning_rate": 7.160502512562814e-05, - "loss": 5.657, - "step": 28762 - }, - { - "epoch": 15.00026075619296, - "grad_norm": 1.6525298357009888, - "learning_rate": 7.160402010050251e-05, - "loss": 5.3913, - "step": 28763 - }, - { - "epoch": 15.00078226857888, - "grad_norm": 1.5092744827270508, - "learning_rate": 7.160301507537689e-05, - "loss": 4.9462, - "step": 28764 - }, - { - "epoch": 15.001303780964799, - "grad_norm": 1.532710075378418, - "learning_rate": 7.160201005025125e-05, - "loss": 5.0459, - "step": 28765 - }, - { - "epoch": 15.001825293350716, - "grad_norm": 1.4828742742538452, - "learning_rate": 7.160100502512563e-05, - "loss": 5.3585, - "step": 28766 - }, - { - "epoch": 15.002346805736636, - "grad_norm": 1.6898969411849976, - "learning_rate": 7.16e-05, - "loss": 5.0414, - "step": 28767 - }, - { - "epoch": 15.002868318122555, - "grad_norm": 1.4259090423583984, - "learning_rate": 7.159899497487438e-05, - "loss": 5.4224, - "step": 28768 - }, - { - "epoch": 15.003389830508475, - "grad_norm": 1.6078959703445435, - "learning_rate": 7.159798994974875e-05, - "loss": 4.9443, - "step": 28769 - }, - { - "epoch": 15.003911342894394, - "grad_norm": 1.4462345838546753, - "learning_rate": 7.159698492462313e-05, - "loss": 5.259, - "step": 28770 - }, - { - "epoch": 15.004432855280314, - "grad_norm": 1.4513009786605835, - "learning_rate": 7.159597989949749e-05, - "loss": 5.5397, - "step": 28771 - }, - { - "epoch": 15.004954367666231, - "grad_norm": 1.4159588813781738, - "learning_rate": 7.159497487437187e-05, - "loss": 4.8246, - "step": 28772 - }, - { - "epoch": 15.00547588005215, - "grad_norm": 1.6263099908828735, - "learning_rate": 7.159396984924623e-05, - "loss": 5.3909, - "step": 28773 - }, - { - "epoch": 15.00599739243807, - "grad_norm": 1.4494686126708984, - "learning_rate": 7.159296482412061e-05, - "loss": 5.779, - "step": 28774 - }, - { - "epoch": 15.00651890482399, - "grad_norm": 1.505715012550354, - "learning_rate": 7.159195979899497e-05, - "loss": 5.3623, - "step": 28775 - }, - { - "epoch": 15.00704041720991, - "grad_norm": 1.3422138690948486, - "learning_rate": 7.159095477386935e-05, - "loss": 5.6364, - "step": 28776 - }, - { - "epoch": 15.007561929595829, - "grad_norm": 1.5295192003250122, - "learning_rate": 7.158994974874373e-05, - "loss": 5.0962, - "step": 28777 - }, - { - "epoch": 15.008083441981746, - "grad_norm": 1.3739656209945679, - "learning_rate": 7.15889447236181e-05, - "loss": 5.5243, - "step": 28778 - }, - { - "epoch": 15.008604954367666, - "grad_norm": 1.358380675315857, - "learning_rate": 7.158793969849247e-05, - "loss": 5.6585, - "step": 28779 - }, - { - "epoch": 15.009126466753585, - "grad_norm": 1.663046956062317, - "learning_rate": 7.158693467336684e-05, - "loss": 5.1932, - "step": 28780 - }, - { - "epoch": 15.009647979139505, - "grad_norm": 1.538774013519287, - "learning_rate": 7.158592964824121e-05, - "loss": 5.0794, - "step": 28781 - }, - { - "epoch": 15.010169491525424, - "grad_norm": 1.4424406290054321, - "learning_rate": 7.158492462311558e-05, - "loss": 5.5451, - "step": 28782 - }, - { - "epoch": 15.010691003911344, - "grad_norm": 1.6574249267578125, - "learning_rate": 7.158391959798996e-05, - "loss": 4.7721, - "step": 28783 - }, - { - "epoch": 15.011212516297261, - "grad_norm": 1.3812527656555176, - "learning_rate": 7.158291457286432e-05, - "loss": 5.8809, - "step": 28784 - }, - { - "epoch": 15.01173402868318, - "grad_norm": 1.3632599115371704, - "learning_rate": 7.15819095477387e-05, - "loss": 5.6386, - "step": 28785 - }, - { - "epoch": 15.0122555410691, - "grad_norm": 1.421990156173706, - "learning_rate": 7.158090452261306e-05, - "loss": 5.84, - "step": 28786 - }, - { - "epoch": 15.01277705345502, - "grad_norm": 1.4958935976028442, - "learning_rate": 7.157989949748744e-05, - "loss": 5.5306, - "step": 28787 - }, - { - "epoch": 15.01329856584094, - "grad_norm": 1.575251817703247, - "learning_rate": 7.157889447236182e-05, - "loss": 5.0612, - "step": 28788 - }, - { - "epoch": 15.013820078226859, - "grad_norm": 1.5203949213027954, - "learning_rate": 7.157788944723618e-05, - "loss": 5.3774, - "step": 28789 - }, - { - "epoch": 15.014341590612776, - "grad_norm": 1.4855563640594482, - "learning_rate": 7.157688442211056e-05, - "loss": 4.6897, - "step": 28790 - }, - { - "epoch": 15.014863102998696, - "grad_norm": 1.359682559967041, - "learning_rate": 7.157587939698492e-05, - "loss": 5.9481, - "step": 28791 - }, - { - "epoch": 15.015384615384615, - "grad_norm": 1.4472228288650513, - "learning_rate": 7.15748743718593e-05, - "loss": 5.2749, - "step": 28792 - }, - { - "epoch": 15.015906127770535, - "grad_norm": 1.5162242650985718, - "learning_rate": 7.157386934673367e-05, - "loss": 5.5406, - "step": 28793 - }, - { - "epoch": 15.016427640156454, - "grad_norm": 1.7267752885818481, - "learning_rate": 7.157286432160804e-05, - "loss": 4.129, - "step": 28794 - }, - { - "epoch": 15.016949152542374, - "grad_norm": 1.4421755075454712, - "learning_rate": 7.157185929648241e-05, - "loss": 5.0618, - "step": 28795 - }, - { - "epoch": 15.017470664928291, - "grad_norm": 1.6397647857666016, - "learning_rate": 7.157085427135679e-05, - "loss": 5.1878, - "step": 28796 - }, - { - "epoch": 15.01799217731421, - "grad_norm": 1.479836344718933, - "learning_rate": 7.156984924623116e-05, - "loss": 5.1582, - "step": 28797 - }, - { - "epoch": 15.01851368970013, - "grad_norm": 1.469161868095398, - "learning_rate": 7.156884422110554e-05, - "loss": 5.5185, - "step": 28798 - }, - { - "epoch": 15.01903520208605, - "grad_norm": 1.543969988822937, - "learning_rate": 7.15678391959799e-05, - "loss": 4.8048, - "step": 28799 - }, - { - "epoch": 15.01955671447197, - "grad_norm": 1.4900189638137817, - "learning_rate": 7.156683417085428e-05, - "loss": 5.6398, - "step": 28800 - }, - { - "epoch": 15.020078226857889, - "grad_norm": 1.339597463607788, - "learning_rate": 7.156582914572865e-05, - "loss": 5.4221, - "step": 28801 - }, - { - "epoch": 15.020599739243806, - "grad_norm": 1.3894256353378296, - "learning_rate": 7.156482412060301e-05, - "loss": 5.6948, - "step": 28802 - }, - { - "epoch": 15.021121251629726, - "grad_norm": 1.6065609455108643, - "learning_rate": 7.156381909547739e-05, - "loss": 5.394, - "step": 28803 - }, - { - "epoch": 15.021642764015645, - "grad_norm": 1.4424705505371094, - "learning_rate": 7.156281407035175e-05, - "loss": 5.8872, - "step": 28804 - }, - { - "epoch": 15.022164276401565, - "grad_norm": 1.4750677347183228, - "learning_rate": 7.156180904522613e-05, - "loss": 5.7291, - "step": 28805 - }, - { - "epoch": 15.022685788787484, - "grad_norm": 1.637956142425537, - "learning_rate": 7.15608040201005e-05, - "loss": 5.4882, - "step": 28806 - }, - { - "epoch": 15.023207301173404, - "grad_norm": 1.4494270086288452, - "learning_rate": 7.155979899497487e-05, - "loss": 5.3475, - "step": 28807 - }, - { - "epoch": 15.023728813559321, - "grad_norm": 1.4985331296920776, - "learning_rate": 7.155879396984925e-05, - "loss": 5.1292, - "step": 28808 - }, - { - "epoch": 15.024250325945241, - "grad_norm": 1.4529738426208496, - "learning_rate": 7.155778894472363e-05, - "loss": 5.627, - "step": 28809 - }, - { - "epoch": 15.02477183833116, - "grad_norm": 1.441886305809021, - "learning_rate": 7.1556783919598e-05, - "loss": 5.4407, - "step": 28810 - }, - { - "epoch": 15.02529335071708, - "grad_norm": 1.5129750967025757, - "learning_rate": 7.155577889447237e-05, - "loss": 5.3869, - "step": 28811 - }, - { - "epoch": 15.025814863103, - "grad_norm": 1.476235032081604, - "learning_rate": 7.155477386934674e-05, - "loss": 5.0708, - "step": 28812 - }, - { - "epoch": 15.026336375488917, - "grad_norm": 1.5118719339370728, - "learning_rate": 7.155376884422111e-05, - "loss": 5.1149, - "step": 28813 - }, - { - "epoch": 15.026857887874836, - "grad_norm": 1.4996546506881714, - "learning_rate": 7.155276381909548e-05, - "loss": 5.6304, - "step": 28814 - }, - { - "epoch": 15.027379400260756, - "grad_norm": 1.4705839157104492, - "learning_rate": 7.155175879396984e-05, - "loss": 4.6083, - "step": 28815 - }, - { - "epoch": 15.027900912646675, - "grad_norm": 1.4936978816986084, - "learning_rate": 7.155075376884422e-05, - "loss": 5.0401, - "step": 28816 - }, - { - "epoch": 15.028422425032595, - "grad_norm": 1.8294819593429565, - "learning_rate": 7.154974874371859e-05, - "loss": 5.2383, - "step": 28817 - }, - { - "epoch": 15.028943937418514, - "grad_norm": 1.439602017402649, - "learning_rate": 7.154874371859296e-05, - "loss": 5.7824, - "step": 28818 - }, - { - "epoch": 15.029465449804432, - "grad_norm": 1.4859435558319092, - "learning_rate": 7.154773869346734e-05, - "loss": 5.2004, - "step": 28819 - }, - { - "epoch": 15.029986962190351, - "grad_norm": 1.4607715606689453, - "learning_rate": 7.154673366834172e-05, - "loss": 5.3147, - "step": 28820 - }, - { - "epoch": 15.030508474576271, - "grad_norm": 1.3796449899673462, - "learning_rate": 7.154572864321608e-05, - "loss": 4.3401, - "step": 28821 - }, - { - "epoch": 15.03102998696219, - "grad_norm": 1.4791381359100342, - "learning_rate": 7.154472361809046e-05, - "loss": 4.943, - "step": 28822 - }, - { - "epoch": 15.03155149934811, - "grad_norm": 1.479760766029358, - "learning_rate": 7.154371859296483e-05, - "loss": 4.7303, - "step": 28823 - }, - { - "epoch": 15.03207301173403, - "grad_norm": 1.5271408557891846, - "learning_rate": 7.15427135678392e-05, - "loss": 5.4615, - "step": 28824 - }, - { - "epoch": 15.032594524119947, - "grad_norm": 1.3575702905654907, - "learning_rate": 7.154170854271357e-05, - "loss": 5.6705, - "step": 28825 - }, - { - "epoch": 15.033116036505866, - "grad_norm": 1.4282563924789429, - "learning_rate": 7.154070351758795e-05, - "loss": 5.5813, - "step": 28826 - }, - { - "epoch": 15.033637548891786, - "grad_norm": 1.5819123983383179, - "learning_rate": 7.153969849246231e-05, - "loss": 5.2249, - "step": 28827 - }, - { - "epoch": 15.034159061277705, - "grad_norm": 1.490268349647522, - "learning_rate": 7.153869346733669e-05, - "loss": 5.1746, - "step": 28828 - }, - { - "epoch": 15.034680573663625, - "grad_norm": 1.4435425996780396, - "learning_rate": 7.153768844221107e-05, - "loss": 5.2465, - "step": 28829 - }, - { - "epoch": 15.035202086049544, - "grad_norm": 1.497698426246643, - "learning_rate": 7.153668341708543e-05, - "loss": 5.5486, - "step": 28830 - }, - { - "epoch": 15.035723598435462, - "grad_norm": 1.5131421089172363, - "learning_rate": 7.153567839195981e-05, - "loss": 4.6749, - "step": 28831 - }, - { - "epoch": 15.036245110821381, - "grad_norm": 1.4053012132644653, - "learning_rate": 7.153467336683417e-05, - "loss": 5.6059, - "step": 28832 - }, - { - "epoch": 15.036766623207301, - "grad_norm": 1.473907232284546, - "learning_rate": 7.153366834170855e-05, - "loss": 5.1365, - "step": 28833 - }, - { - "epoch": 15.03728813559322, - "grad_norm": 1.4321892261505127, - "learning_rate": 7.153266331658291e-05, - "loss": 5.2932, - "step": 28834 - }, - { - "epoch": 15.03780964797914, - "grad_norm": 1.546321153640747, - "learning_rate": 7.153165829145729e-05, - "loss": 5.1277, - "step": 28835 - }, - { - "epoch": 15.03833116036506, - "grad_norm": 1.4344371557235718, - "learning_rate": 7.153065326633166e-05, - "loss": 5.2466, - "step": 28836 - }, - { - "epoch": 15.038852672750977, - "grad_norm": 1.4754090309143066, - "learning_rate": 7.152964824120603e-05, - "loss": 5.4899, - "step": 28837 - }, - { - "epoch": 15.039374185136897, - "grad_norm": 1.4008880853652954, - "learning_rate": 7.15286432160804e-05, - "loss": 5.7937, - "step": 28838 - }, - { - "epoch": 15.039895697522816, - "grad_norm": 1.4668387174606323, - "learning_rate": 7.152763819095478e-05, - "loss": 5.5429, - "step": 28839 - }, - { - "epoch": 15.040417209908735, - "grad_norm": 1.5064904689788818, - "learning_rate": 7.152663316582915e-05, - "loss": 4.8388, - "step": 28840 - }, - { - "epoch": 15.040938722294655, - "grad_norm": 1.5402803421020508, - "learning_rate": 7.152562814070352e-05, - "loss": 5.095, - "step": 28841 - }, - { - "epoch": 15.041460234680574, - "grad_norm": 1.577242136001587, - "learning_rate": 7.15246231155779e-05, - "loss": 5.291, - "step": 28842 - }, - { - "epoch": 15.041981747066492, - "grad_norm": 1.3930110931396484, - "learning_rate": 7.152361809045226e-05, - "loss": 5.346, - "step": 28843 - }, - { - "epoch": 15.042503259452412, - "grad_norm": 1.4565554857254028, - "learning_rate": 7.152261306532664e-05, - "loss": 5.6947, - "step": 28844 - }, - { - "epoch": 15.043024771838331, - "grad_norm": 1.3500627279281616, - "learning_rate": 7.1521608040201e-05, - "loss": 5.4896, - "step": 28845 - }, - { - "epoch": 15.04354628422425, - "grad_norm": 1.3910415172576904, - "learning_rate": 7.152060301507538e-05, - "loss": 5.4508, - "step": 28846 - }, - { - "epoch": 15.04406779661017, - "grad_norm": 1.5313643217086792, - "learning_rate": 7.151959798994974e-05, - "loss": 5.4264, - "step": 28847 - }, - { - "epoch": 15.04458930899609, - "grad_norm": 1.455739140510559, - "learning_rate": 7.151859296482412e-05, - "loss": 5.4513, - "step": 28848 - }, - { - "epoch": 15.045110821382007, - "grad_norm": 1.384447455406189, - "learning_rate": 7.15175879396985e-05, - "loss": 5.3703, - "step": 28849 - }, - { - "epoch": 15.045632333767927, - "grad_norm": 1.5418479442596436, - "learning_rate": 7.151658291457288e-05, - "loss": 4.592, - "step": 28850 - }, - { - "epoch": 15.046153846153846, - "grad_norm": 1.5046991109848022, - "learning_rate": 7.151557788944724e-05, - "loss": 5.2092, - "step": 28851 - }, - { - "epoch": 15.046675358539765, - "grad_norm": 1.514387607574463, - "learning_rate": 7.151457286432162e-05, - "loss": 5.7226, - "step": 28852 - }, - { - "epoch": 15.047196870925685, - "grad_norm": 1.4052135944366455, - "learning_rate": 7.151356783919598e-05, - "loss": 4.3011, - "step": 28853 - }, - { - "epoch": 15.047718383311604, - "grad_norm": 1.5513349771499634, - "learning_rate": 7.151256281407036e-05, - "loss": 5.6014, - "step": 28854 - }, - { - "epoch": 15.048239895697522, - "grad_norm": 1.5160729885101318, - "learning_rate": 7.151155778894473e-05, - "loss": 4.8852, - "step": 28855 - }, - { - "epoch": 15.048761408083442, - "grad_norm": 1.4477425813674927, - "learning_rate": 7.151055276381909e-05, - "loss": 5.1647, - "step": 28856 - }, - { - "epoch": 15.049282920469361, - "grad_norm": 1.6007227897644043, - "learning_rate": 7.150954773869347e-05, - "loss": 5.4262, - "step": 28857 - }, - { - "epoch": 15.04980443285528, - "grad_norm": 1.5553786754608154, - "learning_rate": 7.150854271356783e-05, - "loss": 5.0283, - "step": 28858 - }, - { - "epoch": 15.0503259452412, - "grad_norm": 1.4909988641738892, - "learning_rate": 7.150753768844221e-05, - "loss": 5.2264, - "step": 28859 - }, - { - "epoch": 15.05084745762712, - "grad_norm": 1.4256234169006348, - "learning_rate": 7.150653266331659e-05, - "loss": 5.3761, - "step": 28860 - }, - { - "epoch": 15.051368970013037, - "grad_norm": 1.478050947189331, - "learning_rate": 7.150552763819097e-05, - "loss": 5.4356, - "step": 28861 - }, - { - "epoch": 15.051890482398957, - "grad_norm": 1.566428303718567, - "learning_rate": 7.150452261306533e-05, - "loss": 5.4581, - "step": 28862 - }, - { - "epoch": 15.052411994784876, - "grad_norm": 1.5278123617172241, - "learning_rate": 7.150351758793971e-05, - "loss": 5.5649, - "step": 28863 - }, - { - "epoch": 15.052933507170795, - "grad_norm": 1.4733365774154663, - "learning_rate": 7.150251256281407e-05, - "loss": 4.9919, - "step": 28864 - }, - { - "epoch": 15.053455019556715, - "grad_norm": 1.423715591430664, - "learning_rate": 7.150150753768845e-05, - "loss": 5.8353, - "step": 28865 - }, - { - "epoch": 15.053976531942634, - "grad_norm": 1.4385682344436646, - "learning_rate": 7.150050251256281e-05, - "loss": 5.5519, - "step": 28866 - }, - { - "epoch": 15.054498044328552, - "grad_norm": 1.4246594905853271, - "learning_rate": 7.149949748743719e-05, - "loss": 5.3503, - "step": 28867 - }, - { - "epoch": 15.055019556714472, - "grad_norm": 1.3816472291946411, - "learning_rate": 7.149849246231156e-05, - "loss": 5.5198, - "step": 28868 - }, - { - "epoch": 15.055541069100391, - "grad_norm": 1.3497544527053833, - "learning_rate": 7.149748743718593e-05, - "loss": 5.716, - "step": 28869 - }, - { - "epoch": 15.05606258148631, - "grad_norm": 1.419115424156189, - "learning_rate": 7.149648241206031e-05, - "loss": 5.6264, - "step": 28870 - }, - { - "epoch": 15.05658409387223, - "grad_norm": 1.4303467273712158, - "learning_rate": 7.149547738693468e-05, - "loss": 5.0302, - "step": 28871 - }, - { - "epoch": 15.05710560625815, - "grad_norm": 1.438164472579956, - "learning_rate": 7.149447236180905e-05, - "loss": 5.5741, - "step": 28872 - }, - { - "epoch": 15.057627118644067, - "grad_norm": 1.4476560354232788, - "learning_rate": 7.149346733668342e-05, - "loss": 5.3074, - "step": 28873 - }, - { - "epoch": 15.058148631029987, - "grad_norm": 1.5245305299758911, - "learning_rate": 7.14924623115578e-05, - "loss": 5.6523, - "step": 28874 - }, - { - "epoch": 15.058670143415906, - "grad_norm": 1.3458268642425537, - "learning_rate": 7.149145728643216e-05, - "loss": 5.2648, - "step": 28875 - }, - { - "epoch": 15.059191655801826, - "grad_norm": 1.510672926902771, - "learning_rate": 7.149045226130654e-05, - "loss": 5.6866, - "step": 28876 - }, - { - "epoch": 15.059713168187745, - "grad_norm": 1.5758062601089478, - "learning_rate": 7.14894472361809e-05, - "loss": 5.6382, - "step": 28877 - }, - { - "epoch": 15.060234680573664, - "grad_norm": 1.596310019493103, - "learning_rate": 7.148844221105528e-05, - "loss": 4.7085, - "step": 28878 - }, - { - "epoch": 15.060756192959582, - "grad_norm": 1.5607863664627075, - "learning_rate": 7.148743718592964e-05, - "loss": 4.436, - "step": 28879 - }, - { - "epoch": 15.061277705345502, - "grad_norm": 1.5862423181533813, - "learning_rate": 7.148643216080402e-05, - "loss": 5.407, - "step": 28880 - }, - { - "epoch": 15.061799217731421, - "grad_norm": 1.4584648609161377, - "learning_rate": 7.14854271356784e-05, - "loss": 5.1606, - "step": 28881 - }, - { - "epoch": 15.06232073011734, - "grad_norm": 1.5613422393798828, - "learning_rate": 7.148442211055276e-05, - "loss": 4.7233, - "step": 28882 - }, - { - "epoch": 15.06284224250326, - "grad_norm": 1.5399249792099, - "learning_rate": 7.148341708542714e-05, - "loss": 5.2613, - "step": 28883 - }, - { - "epoch": 15.06336375488918, - "grad_norm": 1.5518499612808228, - "learning_rate": 7.148241206030151e-05, - "loss": 5.4214, - "step": 28884 - }, - { - "epoch": 15.063885267275097, - "grad_norm": 1.588444471359253, - "learning_rate": 7.148140703517588e-05, - "loss": 5.1463, - "step": 28885 - }, - { - "epoch": 15.064406779661017, - "grad_norm": 1.5751436948776245, - "learning_rate": 7.148040201005025e-05, - "loss": 5.2571, - "step": 28886 - }, - { - "epoch": 15.064928292046936, - "grad_norm": 1.5078915357589722, - "learning_rate": 7.147939698492463e-05, - "loss": 5.4705, - "step": 28887 - }, - { - "epoch": 15.065449804432856, - "grad_norm": 1.4844189882278442, - "learning_rate": 7.147839195979899e-05, - "loss": 5.3055, - "step": 28888 - }, - { - "epoch": 15.065971316818775, - "grad_norm": 1.4628113508224487, - "learning_rate": 7.147738693467337e-05, - "loss": 5.5058, - "step": 28889 - }, - { - "epoch": 15.066492829204694, - "grad_norm": 1.3131061792373657, - "learning_rate": 7.147638190954775e-05, - "loss": 5.2995, - "step": 28890 - }, - { - "epoch": 15.067014341590612, - "grad_norm": 1.406394362449646, - "learning_rate": 7.147537688442212e-05, - "loss": 5.8129, - "step": 28891 - }, - { - "epoch": 15.067535853976532, - "grad_norm": 1.4251024723052979, - "learning_rate": 7.147437185929649e-05, - "loss": 5.4811, - "step": 28892 - }, - { - "epoch": 15.068057366362451, - "grad_norm": 1.5555423498153687, - "learning_rate": 7.147336683417087e-05, - "loss": 5.3117, - "step": 28893 - }, - { - "epoch": 15.06857887874837, - "grad_norm": 1.4877212047576904, - "learning_rate": 7.147236180904523e-05, - "loss": 5.0694, - "step": 28894 - }, - { - "epoch": 15.06910039113429, - "grad_norm": 1.4975770711898804, - "learning_rate": 7.14713567839196e-05, - "loss": 5.1578, - "step": 28895 - }, - { - "epoch": 15.06962190352021, - "grad_norm": 1.5107280015945435, - "learning_rate": 7.147035175879397e-05, - "loss": 5.4219, - "step": 28896 - }, - { - "epoch": 15.070143415906127, - "grad_norm": 1.425217628479004, - "learning_rate": 7.146934673366834e-05, - "loss": 5.6853, - "step": 28897 - }, - { - "epoch": 15.070664928292047, - "grad_norm": 1.4032572507858276, - "learning_rate": 7.146834170854272e-05, - "loss": 5.4431, - "step": 28898 - }, - { - "epoch": 15.071186440677966, - "grad_norm": 1.5202369689941406, - "learning_rate": 7.146733668341708e-05, - "loss": 5.2559, - "step": 28899 - }, - { - "epoch": 15.071707953063886, - "grad_norm": 1.4753310680389404, - "learning_rate": 7.146633165829146e-05, - "loss": 4.696, - "step": 28900 - }, - { - "epoch": 15.072229465449805, - "grad_norm": 1.5914028882980347, - "learning_rate": 7.146532663316583e-05, - "loss": 5.525, - "step": 28901 - }, - { - "epoch": 15.072750977835724, - "grad_norm": 1.5695390701293945, - "learning_rate": 7.146432160804021e-05, - "loss": 5.0267, - "step": 28902 - }, - { - "epoch": 15.073272490221642, - "grad_norm": 1.4306888580322266, - "learning_rate": 7.146331658291458e-05, - "loss": 5.5274, - "step": 28903 - }, - { - "epoch": 15.073794002607562, - "grad_norm": 1.4211045503616333, - "learning_rate": 7.146231155778895e-05, - "loss": 5.8594, - "step": 28904 - }, - { - "epoch": 15.074315514993481, - "grad_norm": 1.335365891456604, - "learning_rate": 7.146130653266332e-05, - "loss": 5.0985, - "step": 28905 - }, - { - "epoch": 15.0748370273794, - "grad_norm": 1.5315121412277222, - "learning_rate": 7.14603015075377e-05, - "loss": 5.2639, - "step": 28906 - }, - { - "epoch": 15.07535853976532, - "grad_norm": 1.4786714315414429, - "learning_rate": 7.145929648241206e-05, - "loss": 5.6138, - "step": 28907 - }, - { - "epoch": 15.075880052151238, - "grad_norm": 1.4788589477539062, - "learning_rate": 7.145829145728643e-05, - "loss": 5.49, - "step": 28908 - }, - { - "epoch": 15.076401564537157, - "grad_norm": 1.410479187965393, - "learning_rate": 7.14572864321608e-05, - "loss": 5.4614, - "step": 28909 - }, - { - "epoch": 15.076923076923077, - "grad_norm": 1.6251330375671387, - "learning_rate": 7.145628140703518e-05, - "loss": 4.6833, - "step": 28910 - }, - { - "epoch": 15.077444589308996, - "grad_norm": 1.5600355863571167, - "learning_rate": 7.145527638190956e-05, - "loss": 5.316, - "step": 28911 - }, - { - "epoch": 15.077966101694916, - "grad_norm": 1.4925483465194702, - "learning_rate": 7.145427135678392e-05, - "loss": 5.4552, - "step": 28912 - }, - { - "epoch": 15.078487614080835, - "grad_norm": 1.5392584800720215, - "learning_rate": 7.14532663316583e-05, - "loss": 5.2715, - "step": 28913 - }, - { - "epoch": 15.079009126466753, - "grad_norm": 1.5024949312210083, - "learning_rate": 7.145226130653267e-05, - "loss": 5.5497, - "step": 28914 - }, - { - "epoch": 15.079530638852672, - "grad_norm": 1.3707231283187866, - "learning_rate": 7.145125628140704e-05, - "loss": 5.5869, - "step": 28915 - }, - { - "epoch": 15.080052151238592, - "grad_norm": 1.5117924213409424, - "learning_rate": 7.145025125628141e-05, - "loss": 5.3891, - "step": 28916 - }, - { - "epoch": 15.080573663624511, - "grad_norm": 1.5013914108276367, - "learning_rate": 7.144924623115579e-05, - "loss": 5.6617, - "step": 28917 - }, - { - "epoch": 15.08109517601043, - "grad_norm": 1.5045989751815796, - "learning_rate": 7.144824120603015e-05, - "loss": 4.8163, - "step": 28918 - }, - { - "epoch": 15.08161668839635, - "grad_norm": 1.4401882886886597, - "learning_rate": 7.144723618090453e-05, - "loss": 5.0921, - "step": 28919 - }, - { - "epoch": 15.082138200782268, - "grad_norm": 1.478485345840454, - "learning_rate": 7.144623115577889e-05, - "loss": 5.677, - "step": 28920 - }, - { - "epoch": 15.082659713168187, - "grad_norm": 1.6924904584884644, - "learning_rate": 7.144522613065327e-05, - "loss": 4.5949, - "step": 28921 - }, - { - "epoch": 15.083181225554107, - "grad_norm": 1.490372896194458, - "learning_rate": 7.144422110552765e-05, - "loss": 5.4182, - "step": 28922 - }, - { - "epoch": 15.083702737940026, - "grad_norm": 1.3259464502334595, - "learning_rate": 7.144321608040201e-05, - "loss": 5.6872, - "step": 28923 - }, - { - "epoch": 15.084224250325946, - "grad_norm": 1.435198426246643, - "learning_rate": 7.144221105527639e-05, - "loss": 5.3351, - "step": 28924 - }, - { - "epoch": 15.084745762711865, - "grad_norm": 1.6211390495300293, - "learning_rate": 7.144120603015075e-05, - "loss": 5.2755, - "step": 28925 - }, - { - "epoch": 15.085267275097783, - "grad_norm": 1.499839425086975, - "learning_rate": 7.144020100502513e-05, - "loss": 5.3371, - "step": 28926 - }, - { - "epoch": 15.085788787483702, - "grad_norm": 1.3940523862838745, - "learning_rate": 7.14391959798995e-05, - "loss": 5.6963, - "step": 28927 - }, - { - "epoch": 15.086310299869622, - "grad_norm": 1.5216419696807861, - "learning_rate": 7.143819095477387e-05, - "loss": 5.1705, - "step": 28928 - }, - { - "epoch": 15.086831812255541, - "grad_norm": 1.40998375415802, - "learning_rate": 7.143718592964824e-05, - "loss": 5.6686, - "step": 28929 - }, - { - "epoch": 15.08735332464146, - "grad_norm": 1.4843734502792358, - "learning_rate": 7.143618090452262e-05, - "loss": 5.4397, - "step": 28930 - }, - { - "epoch": 15.08787483702738, - "grad_norm": 1.4487470388412476, - "learning_rate": 7.1435175879397e-05, - "loss": 5.6565, - "step": 28931 - }, - { - "epoch": 15.088396349413298, - "grad_norm": 1.4440490007400513, - "learning_rate": 7.143417085427137e-05, - "loss": 5.6362, - "step": 28932 - }, - { - "epoch": 15.088917861799217, - "grad_norm": 1.3741968870162964, - "learning_rate": 7.143316582914574e-05, - "loss": 5.7426, - "step": 28933 - }, - { - "epoch": 15.089439374185137, - "grad_norm": 1.3843262195587158, - "learning_rate": 7.14321608040201e-05, - "loss": 5.709, - "step": 28934 - }, - { - "epoch": 15.089960886571056, - "grad_norm": 1.4738620519638062, - "learning_rate": 7.143115577889448e-05, - "loss": 5.4669, - "step": 28935 - }, - { - "epoch": 15.090482398956976, - "grad_norm": 1.555630087852478, - "learning_rate": 7.143015075376884e-05, - "loss": 5.4933, - "step": 28936 - }, - { - "epoch": 15.091003911342895, - "grad_norm": 1.5510210990905762, - "learning_rate": 7.142914572864322e-05, - "loss": 5.1984, - "step": 28937 - }, - { - "epoch": 15.091525423728813, - "grad_norm": 1.4729846715927124, - "learning_rate": 7.142814070351758e-05, - "loss": 4.7357, - "step": 28938 - }, - { - "epoch": 15.092046936114732, - "grad_norm": 1.746331810951233, - "learning_rate": 7.142713567839196e-05, - "loss": 4.6816, - "step": 28939 - }, - { - "epoch": 15.092568448500652, - "grad_norm": 1.5910457372665405, - "learning_rate": 7.142613065326633e-05, - "loss": 5.1432, - "step": 28940 - }, - { - "epoch": 15.093089960886571, - "grad_norm": 1.5701767206192017, - "learning_rate": 7.14251256281407e-05, - "loss": 5.4806, - "step": 28941 - }, - { - "epoch": 15.09361147327249, - "grad_norm": 1.4326491355895996, - "learning_rate": 7.142412060301508e-05, - "loss": 5.2868, - "step": 28942 - }, - { - "epoch": 15.09413298565841, - "grad_norm": 1.5629723072052002, - "learning_rate": 7.142311557788946e-05, - "loss": 4.6854, - "step": 28943 - }, - { - "epoch": 15.094654498044328, - "grad_norm": 1.415118932723999, - "learning_rate": 7.142211055276382e-05, - "loss": 5.3194, - "step": 28944 - }, - { - "epoch": 15.095176010430247, - "grad_norm": 1.4162962436676025, - "learning_rate": 7.14211055276382e-05, - "loss": 5.3412, - "step": 28945 - }, - { - "epoch": 15.095697522816167, - "grad_norm": 1.4055882692337036, - "learning_rate": 7.142010050251257e-05, - "loss": 5.3284, - "step": 28946 - }, - { - "epoch": 15.096219035202086, - "grad_norm": 1.56875741481781, - "learning_rate": 7.141909547738694e-05, - "loss": 5.1753, - "step": 28947 - }, - { - "epoch": 15.096740547588006, - "grad_norm": 1.3937442302703857, - "learning_rate": 7.141809045226131e-05, - "loss": 5.4329, - "step": 28948 - }, - { - "epoch": 15.097262059973925, - "grad_norm": 1.4763462543487549, - "learning_rate": 7.141708542713567e-05, - "loss": 5.3916, - "step": 28949 - }, - { - "epoch": 15.097783572359843, - "grad_norm": 1.4998615980148315, - "learning_rate": 7.141608040201005e-05, - "loss": 5.3771, - "step": 28950 - }, - { - "epoch": 15.098305084745762, - "grad_norm": 1.408146619796753, - "learning_rate": 7.141507537688443e-05, - "loss": 5.2073, - "step": 28951 - }, - { - "epoch": 15.098826597131682, - "grad_norm": 1.494038462638855, - "learning_rate": 7.14140703517588e-05, - "loss": 5.5704, - "step": 28952 - }, - { - "epoch": 15.099348109517601, - "grad_norm": 1.5848718881607056, - "learning_rate": 7.141306532663317e-05, - "loss": 5.1567, - "step": 28953 - }, - { - "epoch": 15.09986962190352, - "grad_norm": 1.5129927396774292, - "learning_rate": 7.141206030150755e-05, - "loss": 5.4619, - "step": 28954 - }, - { - "epoch": 15.10039113428944, - "grad_norm": 1.4144443273544312, - "learning_rate": 7.141105527638191e-05, - "loss": 5.2435, - "step": 28955 - }, - { - "epoch": 15.100912646675358, - "grad_norm": 1.4986590147018433, - "learning_rate": 7.141005025125629e-05, - "loss": 5.4601, - "step": 28956 - }, - { - "epoch": 15.101434159061277, - "grad_norm": 1.4569087028503418, - "learning_rate": 7.140904522613065e-05, - "loss": 5.5611, - "step": 28957 - }, - { - "epoch": 15.101955671447197, - "grad_norm": 1.5059967041015625, - "learning_rate": 7.140804020100503e-05, - "loss": 5.8556, - "step": 28958 - }, - { - "epoch": 15.102477183833116, - "grad_norm": 1.4549798965454102, - "learning_rate": 7.14070351758794e-05, - "loss": 5.6016, - "step": 28959 - }, - { - "epoch": 15.102998696219036, - "grad_norm": 1.4329584836959839, - "learning_rate": 7.140603015075377e-05, - "loss": 5.3923, - "step": 28960 - }, - { - "epoch": 15.103520208604955, - "grad_norm": 1.3929848670959473, - "learning_rate": 7.140502512562814e-05, - "loss": 5.5664, - "step": 28961 - }, - { - "epoch": 15.104041720990873, - "grad_norm": 1.49989652633667, - "learning_rate": 7.140402010050252e-05, - "loss": 5.2271, - "step": 28962 - }, - { - "epoch": 15.104563233376792, - "grad_norm": 1.5136001110076904, - "learning_rate": 7.14030150753769e-05, - "loss": 5.2107, - "step": 28963 - }, - { - "epoch": 15.105084745762712, - "grad_norm": 1.9796257019042969, - "learning_rate": 7.140201005025126e-05, - "loss": 5.0144, - "step": 28964 - }, - { - "epoch": 15.105606258148631, - "grad_norm": 1.4628428220748901, - "learning_rate": 7.140100502512564e-05, - "loss": 5.0226, - "step": 28965 - }, - { - "epoch": 15.10612777053455, - "grad_norm": 1.341151237487793, - "learning_rate": 7.14e-05, - "loss": 5.3666, - "step": 28966 - }, - { - "epoch": 15.10664928292047, - "grad_norm": 1.4637469053268433, - "learning_rate": 7.139899497487438e-05, - "loss": 5.2468, - "step": 28967 - }, - { - "epoch": 15.107170795306388, - "grad_norm": 1.6683974266052246, - "learning_rate": 7.139798994974874e-05, - "loss": 4.8547, - "step": 28968 - }, - { - "epoch": 15.107692307692307, - "grad_norm": 1.6331274509429932, - "learning_rate": 7.139698492462312e-05, - "loss": 5.1775, - "step": 28969 - }, - { - "epoch": 15.108213820078227, - "grad_norm": 1.548012614250183, - "learning_rate": 7.139597989949748e-05, - "loss": 5.4758, - "step": 28970 - }, - { - "epoch": 15.108735332464146, - "grad_norm": 1.5210987329483032, - "learning_rate": 7.139497487437186e-05, - "loss": 5.0987, - "step": 28971 - }, - { - "epoch": 15.109256844850066, - "grad_norm": 1.4844462871551514, - "learning_rate": 7.139396984924624e-05, - "loss": 5.3036, - "step": 28972 - }, - { - "epoch": 15.109778357235985, - "grad_norm": 1.3759374618530273, - "learning_rate": 7.139296482412062e-05, - "loss": 5.6432, - "step": 28973 - }, - { - "epoch": 15.110299869621903, - "grad_norm": 1.4730433225631714, - "learning_rate": 7.139195979899498e-05, - "loss": 5.4567, - "step": 28974 - }, - { - "epoch": 15.110821382007822, - "grad_norm": 1.5208078622817993, - "learning_rate": 7.139095477386935e-05, - "loss": 5.554, - "step": 28975 - }, - { - "epoch": 15.111342894393742, - "grad_norm": 1.529284119606018, - "learning_rate": 7.138994974874372e-05, - "loss": 5.4257, - "step": 28976 - }, - { - "epoch": 15.111864406779661, - "grad_norm": 1.5474213361740112, - "learning_rate": 7.138894472361809e-05, - "loss": 5.2039, - "step": 28977 - }, - { - "epoch": 15.11238591916558, - "grad_norm": 1.4905202388763428, - "learning_rate": 7.138793969849247e-05, - "loss": 5.5938, - "step": 28978 - }, - { - "epoch": 15.1129074315515, - "grad_norm": 1.3677691221237183, - "learning_rate": 7.138693467336683e-05, - "loss": 5.6089, - "step": 28979 - }, - { - "epoch": 15.113428943937418, - "grad_norm": 1.4773911237716675, - "learning_rate": 7.138592964824121e-05, - "loss": 5.4169, - "step": 28980 - }, - { - "epoch": 15.113950456323337, - "grad_norm": 1.4439160823822021, - "learning_rate": 7.138492462311557e-05, - "loss": 5.5175, - "step": 28981 - }, - { - "epoch": 15.114471968709257, - "grad_norm": 1.3880728483200073, - "learning_rate": 7.138391959798995e-05, - "loss": 5.6056, - "step": 28982 - }, - { - "epoch": 15.114993481095176, - "grad_norm": 1.3507097959518433, - "learning_rate": 7.138291457286433e-05, - "loss": 5.5776, - "step": 28983 - }, - { - "epoch": 15.115514993481096, - "grad_norm": 1.4659535884857178, - "learning_rate": 7.138190954773871e-05, - "loss": 5.0224, - "step": 28984 - }, - { - "epoch": 15.116036505867015, - "grad_norm": 1.4414665699005127, - "learning_rate": 7.138090452261307e-05, - "loss": 5.1551, - "step": 28985 - }, - { - "epoch": 15.116558018252933, - "grad_norm": 1.5073434114456177, - "learning_rate": 7.137989949748745e-05, - "loss": 5.3196, - "step": 28986 - }, - { - "epoch": 15.117079530638852, - "grad_norm": 1.4076799154281616, - "learning_rate": 7.137889447236181e-05, - "loss": 5.376, - "step": 28987 - }, - { - "epoch": 15.117601043024772, - "grad_norm": Infinity, - "learning_rate": 7.137889447236181e-05, - "loss": 4.6917, - "step": 28988 - }, - { - "epoch": 15.118122555410691, - "grad_norm": 1.4623253345489502, - "learning_rate": 7.137788944723618e-05, - "loss": 5.6143, - "step": 28989 - }, - { - "epoch": 15.11864406779661, - "grad_norm": 1.5261727571487427, - "learning_rate": 7.137688442211056e-05, - "loss": 5.4663, - "step": 28990 - }, - { - "epoch": 15.11916558018253, - "grad_norm": 1.4280831813812256, - "learning_rate": 7.137587939698492e-05, - "loss": 5.1533, - "step": 28991 - }, - { - "epoch": 15.119687092568448, - "grad_norm": 1.4373342990875244, - "learning_rate": 7.13748743718593e-05, - "loss": 5.2576, - "step": 28992 - }, - { - "epoch": 15.120208604954367, - "grad_norm": 1.5543891191482544, - "learning_rate": 7.137386934673366e-05, - "loss": 5.3232, - "step": 28993 - }, - { - "epoch": 15.120730117340287, - "grad_norm": 1.4455937147140503, - "learning_rate": 7.137286432160804e-05, - "loss": 5.238, - "step": 28994 - }, - { - "epoch": 15.121251629726206, - "grad_norm": 1.4428068399429321, - "learning_rate": 7.137185929648242e-05, - "loss": 5.4613, - "step": 28995 - }, - { - "epoch": 15.121773142112126, - "grad_norm": 1.524838924407959, - "learning_rate": 7.13708542713568e-05, - "loss": 4.9826, - "step": 28996 - }, - { - "epoch": 15.122294654498045, - "grad_norm": 1.4304856061935425, - "learning_rate": 7.136984924623116e-05, - "loss": 5.7034, - "step": 28997 - }, - { - "epoch": 15.122816166883963, - "grad_norm": 1.4452167749404907, - "learning_rate": 7.136884422110554e-05, - "loss": 5.433, - "step": 28998 - }, - { - "epoch": 15.123337679269882, - "grad_norm": 1.342434287071228, - "learning_rate": 7.13678391959799e-05, - "loss": 5.6212, - "step": 28999 - }, - { - "epoch": 15.123859191655802, - "grad_norm": 1.4309464693069458, - "learning_rate": 7.136683417085428e-05, - "loss": 5.5933, - "step": 29000 - }, - { - "epoch": 15.124380704041721, - "grad_norm": 1.4471975564956665, - "learning_rate": 7.136582914572864e-05, - "loss": 5.2764, - "step": 29001 - }, - { - "epoch": 15.12490221642764, - "grad_norm": 1.5104763507843018, - "learning_rate": 7.136482412060301e-05, - "loss": 5.0428, - "step": 29002 - }, - { - "epoch": 15.125423728813558, - "grad_norm": 1.5696898698806763, - "learning_rate": 7.136381909547739e-05, - "loss": 5.3143, - "step": 29003 - }, - { - "epoch": 15.125945241199478, - "grad_norm": 1.4075406789779663, - "learning_rate": 7.136281407035176e-05, - "loss": 5.6435, - "step": 29004 - }, - { - "epoch": 15.126466753585397, - "grad_norm": 1.435833215713501, - "learning_rate": 7.136180904522614e-05, - "loss": 5.5908, - "step": 29005 - }, - { - "epoch": 15.126988265971317, - "grad_norm": 1.3716201782226562, - "learning_rate": 7.13608040201005e-05, - "loss": 5.4821, - "step": 29006 - }, - { - "epoch": 15.127509778357236, - "grad_norm": 1.4538233280181885, - "learning_rate": 7.135979899497488e-05, - "loss": 5.6437, - "step": 29007 - }, - { - "epoch": 15.128031290743156, - "grad_norm": 1.539462924003601, - "learning_rate": 7.135879396984925e-05, - "loss": 5.4521, - "step": 29008 - }, - { - "epoch": 15.128552803129073, - "grad_norm": 1.6915168762207031, - "learning_rate": 7.135778894472363e-05, - "loss": 4.924, - "step": 29009 - }, - { - "epoch": 15.129074315514993, - "grad_norm": 1.4632387161254883, - "learning_rate": 7.135678391959799e-05, - "loss": 5.5095, - "step": 29010 - }, - { - "epoch": 15.129595827900912, - "grad_norm": 1.4300282001495361, - "learning_rate": 7.135577889447237e-05, - "loss": 5.4327, - "step": 29011 - }, - { - "epoch": 15.130117340286832, - "grad_norm": 1.4685933589935303, - "learning_rate": 7.135477386934673e-05, - "loss": 5.4492, - "step": 29012 - }, - { - "epoch": 15.130638852672751, - "grad_norm": 1.4691903591156006, - "learning_rate": 7.135376884422111e-05, - "loss": 5.4468, - "step": 29013 - }, - { - "epoch": 15.13116036505867, - "grad_norm": 1.4280394315719604, - "learning_rate": 7.135276381909547e-05, - "loss": 5.1211, - "step": 29014 - }, - { - "epoch": 15.131681877444588, - "grad_norm": 1.4426642656326294, - "learning_rate": 7.135175879396985e-05, - "loss": 5.3176, - "step": 29015 - }, - { - "epoch": 15.132203389830508, - "grad_norm": 1.4958908557891846, - "learning_rate": 7.135075376884423e-05, - "loss": 4.9278, - "step": 29016 - }, - { - "epoch": 15.132724902216427, - "grad_norm": 1.5318886041641235, - "learning_rate": 7.13497487437186e-05, - "loss": 5.2839, - "step": 29017 - }, - { - "epoch": 15.133246414602347, - "grad_norm": 1.3786710500717163, - "learning_rate": 7.134874371859297e-05, - "loss": 5.5724, - "step": 29018 - }, - { - "epoch": 15.133767926988266, - "grad_norm": 1.4286922216415405, - "learning_rate": 7.134773869346734e-05, - "loss": 5.3679, - "step": 29019 - }, - { - "epoch": 15.134289439374186, - "grad_norm": 1.4419300556182861, - "learning_rate": 7.134673366834171e-05, - "loss": 5.7136, - "step": 29020 - }, - { - "epoch": 15.134810951760103, - "grad_norm": 1.4136335849761963, - "learning_rate": 7.134572864321608e-05, - "loss": 5.5669, - "step": 29021 - }, - { - "epoch": 15.135332464146023, - "grad_norm": 1.4925001859664917, - "learning_rate": 7.134472361809046e-05, - "loss": 5.6006, - "step": 29022 - }, - { - "epoch": 15.135853976531942, - "grad_norm": 1.4636709690093994, - "learning_rate": 7.134371859296482e-05, - "loss": 5.6432, - "step": 29023 - }, - { - "epoch": 15.136375488917862, - "grad_norm": 1.4070241451263428, - "learning_rate": 7.13427135678392e-05, - "loss": 5.5244, - "step": 29024 - }, - { - "epoch": 15.136897001303781, - "grad_norm": 1.5891765356063843, - "learning_rate": 7.134170854271358e-05, - "loss": 4.7607, - "step": 29025 - }, - { - "epoch": 15.1374185136897, - "grad_norm": 1.458716869354248, - "learning_rate": 7.134070351758795e-05, - "loss": 5.5585, - "step": 29026 - }, - { - "epoch": 15.137940026075619, - "grad_norm": 1.3933122158050537, - "learning_rate": 7.133969849246232e-05, - "loss": 5.5731, - "step": 29027 - }, - { - "epoch": 15.138461538461538, - "grad_norm": 1.391535758972168, - "learning_rate": 7.133869346733668e-05, - "loss": 5.5988, - "step": 29028 - }, - { - "epoch": 15.138983050847457, - "grad_norm": 1.3724972009658813, - "learning_rate": 7.133768844221106e-05, - "loss": 5.1434, - "step": 29029 - }, - { - "epoch": 15.139504563233377, - "grad_norm": 1.3958393335342407, - "learning_rate": 7.133668341708542e-05, - "loss": 5.6245, - "step": 29030 - }, - { - "epoch": 15.140026075619296, - "grad_norm": 1.3763427734375, - "learning_rate": 7.13356783919598e-05, - "loss": 5.5614, - "step": 29031 - }, - { - "epoch": 15.140547588005216, - "grad_norm": 1.397996187210083, - "learning_rate": 7.133467336683417e-05, - "loss": 5.3803, - "step": 29032 - }, - { - "epoch": 15.141069100391134, - "grad_norm": 1.4286565780639648, - "learning_rate": 7.133366834170854e-05, - "loss": 5.4641, - "step": 29033 - }, - { - "epoch": 15.141590612777053, - "grad_norm": 1.4353885650634766, - "learning_rate": 7.133266331658291e-05, - "loss": 5.6271, - "step": 29034 - }, - { - "epoch": 15.142112125162972, - "grad_norm": 1.5728447437286377, - "learning_rate": 7.133165829145729e-05, - "loss": 5.1306, - "step": 29035 - }, - { - "epoch": 15.142633637548892, - "grad_norm": 1.4712942838668823, - "learning_rate": 7.133065326633166e-05, - "loss": 5.4033, - "step": 29036 - }, - { - "epoch": 15.143155149934811, - "grad_norm": 1.3912672996520996, - "learning_rate": 7.132964824120604e-05, - "loss": 5.4614, - "step": 29037 - }, - { - "epoch": 15.14367666232073, - "grad_norm": 1.4003973007202148, - "learning_rate": 7.13286432160804e-05, - "loss": 5.5948, - "step": 29038 - }, - { - "epoch": 15.144198174706649, - "grad_norm": 1.4671008586883545, - "learning_rate": 7.132763819095478e-05, - "loss": 4.8619, - "step": 29039 - }, - { - "epoch": 15.144719687092568, - "grad_norm": 1.4142231941223145, - "learning_rate": 7.132663316582915e-05, - "loss": 5.4722, - "step": 29040 - }, - { - "epoch": 15.145241199478487, - "grad_norm": 1.561867356300354, - "learning_rate": 7.132562814070353e-05, - "loss": 4.9734, - "step": 29041 - }, - { - "epoch": 15.145762711864407, - "grad_norm": 1.426042914390564, - "learning_rate": 7.132462311557789e-05, - "loss": 5.7915, - "step": 29042 - }, - { - "epoch": 15.146284224250326, - "grad_norm": 1.3952854871749878, - "learning_rate": 7.132361809045225e-05, - "loss": 5.441, - "step": 29043 - }, - { - "epoch": 15.146805736636246, - "grad_norm": 1.4686346054077148, - "learning_rate": 7.132261306532663e-05, - "loss": 4.9324, - "step": 29044 - }, - { - "epoch": 15.147327249022164, - "grad_norm": 1.4551379680633545, - "learning_rate": 7.132160804020101e-05, - "loss": 5.6299, - "step": 29045 - }, - { - "epoch": 15.147848761408083, - "grad_norm": 1.4839674234390259, - "learning_rate": 7.132060301507539e-05, - "loss": 5.5071, - "step": 29046 - }, - { - "epoch": 15.148370273794002, - "grad_norm": 1.4997496604919434, - "learning_rate": 7.131959798994975e-05, - "loss": 5.8538, - "step": 29047 - }, - { - "epoch": 15.148891786179922, - "grad_norm": 1.4840813875198364, - "learning_rate": 7.131859296482413e-05, - "loss": 4.9332, - "step": 29048 - }, - { - "epoch": 15.149413298565841, - "grad_norm": 1.3952629566192627, - "learning_rate": 7.13175879396985e-05, - "loss": 5.512, - "step": 29049 - }, - { - "epoch": 15.14993481095176, - "grad_norm": 1.3677921295166016, - "learning_rate": 7.131658291457287e-05, - "loss": 5.0031, - "step": 29050 - }, - { - "epoch": 15.150456323337679, - "grad_norm": 1.5143532752990723, - "learning_rate": 7.131557788944724e-05, - "loss": 5.4013, - "step": 29051 - }, - { - "epoch": 15.150977835723598, - "grad_norm": 1.490962028503418, - "learning_rate": 7.131457286432161e-05, - "loss": 5.3656, - "step": 29052 - }, - { - "epoch": 15.151499348109517, - "grad_norm": 1.4711412191390991, - "learning_rate": 7.131356783919598e-05, - "loss": 5.4535, - "step": 29053 - }, - { - "epoch": 15.152020860495437, - "grad_norm": 1.5155580043792725, - "learning_rate": 7.131256281407036e-05, - "loss": 4.7827, - "step": 29054 - }, - { - "epoch": 15.152542372881356, - "grad_norm": 1.5409817695617676, - "learning_rate": 7.131155778894472e-05, - "loss": 5.1288, - "step": 29055 - }, - { - "epoch": 15.153063885267276, - "grad_norm": 1.4916616678237915, - "learning_rate": 7.13105527638191e-05, - "loss": 5.3661, - "step": 29056 - }, - { - "epoch": 15.153585397653194, - "grad_norm": 1.469427227973938, - "learning_rate": 7.130954773869348e-05, - "loss": 5.2616, - "step": 29057 - }, - { - "epoch": 15.154106910039113, - "grad_norm": 1.4927456378936768, - "learning_rate": 7.130854271356784e-05, - "loss": 5.3439, - "step": 29058 - }, - { - "epoch": 15.154628422425032, - "grad_norm": 1.381862998008728, - "learning_rate": 7.130753768844222e-05, - "loss": 5.0484, - "step": 29059 - }, - { - "epoch": 15.155149934810952, - "grad_norm": 1.444161295890808, - "learning_rate": 7.130653266331658e-05, - "loss": 4.2875, - "step": 29060 - }, - { - "epoch": 15.155671447196871, - "grad_norm": 1.4299434423446655, - "learning_rate": 7.130552763819096e-05, - "loss": 5.5152, - "step": 29061 - }, - { - "epoch": 15.156192959582791, - "grad_norm": 1.4419199228286743, - "learning_rate": 7.130452261306533e-05, - "loss": 4.9734, - "step": 29062 - }, - { - "epoch": 15.156714471968709, - "grad_norm": 1.4400181770324707, - "learning_rate": 7.13035175879397e-05, - "loss": 5.2818, - "step": 29063 - }, - { - "epoch": 15.157235984354628, - "grad_norm": 1.522199273109436, - "learning_rate": 7.130251256281407e-05, - "loss": 5.142, - "step": 29064 - }, - { - "epoch": 15.157757496740548, - "grad_norm": 1.5233980417251587, - "learning_rate": 7.130150753768845e-05, - "loss": 4.8287, - "step": 29065 - }, - { - "epoch": 15.158279009126467, - "grad_norm": 1.3926461935043335, - "learning_rate": 7.130050251256282e-05, - "loss": 5.5495, - "step": 29066 - }, - { - "epoch": 15.158800521512386, - "grad_norm": 1.424590826034546, - "learning_rate": 7.12994974874372e-05, - "loss": 5.4608, - "step": 29067 - }, - { - "epoch": 15.159322033898306, - "grad_norm": 1.5626611709594727, - "learning_rate": 7.129849246231157e-05, - "loss": 4.8506, - "step": 29068 - }, - { - "epoch": 15.159843546284224, - "grad_norm": 1.4464964866638184, - "learning_rate": 7.129748743718593e-05, - "loss": 5.5378, - "step": 29069 - }, - { - "epoch": 15.160365058670143, - "grad_norm": 1.3634130954742432, - "learning_rate": 7.129648241206031e-05, - "loss": 5.6078, - "step": 29070 - }, - { - "epoch": 15.160886571056063, - "grad_norm": 1.4363592863082886, - "learning_rate": 7.129547738693467e-05, - "loss": 5.6336, - "step": 29071 - }, - { - "epoch": 15.161408083441982, - "grad_norm": 1.4152342081069946, - "learning_rate": 7.129447236180905e-05, - "loss": 5.3938, - "step": 29072 - }, - { - "epoch": 15.161929595827901, - "grad_norm": 1.4939249753952026, - "learning_rate": 7.129346733668341e-05, - "loss": 5.1971, - "step": 29073 - }, - { - "epoch": 15.162451108213821, - "grad_norm": 1.510880947113037, - "learning_rate": 7.129246231155779e-05, - "loss": 4.8044, - "step": 29074 - }, - { - "epoch": 15.162972620599739, - "grad_norm": 1.4806228876113892, - "learning_rate": 7.129145728643216e-05, - "loss": 5.2847, - "step": 29075 - }, - { - "epoch": 15.163494132985658, - "grad_norm": 1.5433557033538818, - "learning_rate": 7.129045226130653e-05, - "loss": 5.2917, - "step": 29076 - }, - { - "epoch": 15.164015645371578, - "grad_norm": 1.5955464839935303, - "learning_rate": 7.128944723618091e-05, - "loss": 5.6031, - "step": 29077 - }, - { - "epoch": 15.164537157757497, - "grad_norm": 1.4277589321136475, - "learning_rate": 7.128844221105529e-05, - "loss": 5.5899, - "step": 29078 - }, - { - "epoch": 15.165058670143416, - "grad_norm": 1.5461323261260986, - "learning_rate": 7.128743718592965e-05, - "loss": 4.5077, - "step": 29079 - }, - { - "epoch": 15.165580182529336, - "grad_norm": 1.4442739486694336, - "learning_rate": 7.128643216080403e-05, - "loss": 5.3417, - "step": 29080 - }, - { - "epoch": 15.166101694915254, - "grad_norm": 1.4870091676712036, - "learning_rate": 7.12854271356784e-05, - "loss": 5.1711, - "step": 29081 - }, - { - "epoch": 15.166623207301173, - "grad_norm": 1.4961525201797485, - "learning_rate": 7.128442211055276e-05, - "loss": 4.8405, - "step": 29082 - }, - { - "epoch": 15.167144719687093, - "grad_norm": 1.4067193269729614, - "learning_rate": 7.128341708542714e-05, - "loss": 4.4454, - "step": 29083 - }, - { - "epoch": 15.167666232073012, - "grad_norm": 1.561736822128296, - "learning_rate": 7.12824120603015e-05, - "loss": 5.3293, - "step": 29084 - }, - { - "epoch": 15.168187744458931, - "grad_norm": 1.6595354080200195, - "learning_rate": 7.128140703517588e-05, - "loss": 5.3938, - "step": 29085 - }, - { - "epoch": 15.16870925684485, - "grad_norm": 1.586471438407898, - "learning_rate": 7.128040201005026e-05, - "loss": 5.2605, - "step": 29086 - }, - { - "epoch": 15.169230769230769, - "grad_norm": 1.458974838256836, - "learning_rate": 7.127939698492464e-05, - "loss": 5.1633, - "step": 29087 - }, - { - "epoch": 15.169752281616688, - "grad_norm": 1.4685744047164917, - "learning_rate": 7.1278391959799e-05, - "loss": 5.3059, - "step": 29088 - }, - { - "epoch": 15.170273794002608, - "grad_norm": 1.3904953002929688, - "learning_rate": 7.127738693467338e-05, - "loss": 5.1445, - "step": 29089 - }, - { - "epoch": 15.170795306388527, - "grad_norm": 1.5248311758041382, - "learning_rate": 7.127638190954774e-05, - "loss": 5.4002, - "step": 29090 - }, - { - "epoch": 15.171316818774446, - "grad_norm": 1.5510433912277222, - "learning_rate": 7.127537688442212e-05, - "loss": 5.3188, - "step": 29091 - }, - { - "epoch": 15.171838331160366, - "grad_norm": 1.4637070894241333, - "learning_rate": 7.127437185929648e-05, - "loss": 5.5029, - "step": 29092 - }, - { - "epoch": 15.172359843546284, - "grad_norm": 1.4515470266342163, - "learning_rate": 7.127336683417086e-05, - "loss": 5.3481, - "step": 29093 - }, - { - "epoch": 15.172881355932203, - "grad_norm": 1.493395209312439, - "learning_rate": 7.127236180904523e-05, - "loss": 5.4914, - "step": 29094 - }, - { - "epoch": 15.173402868318123, - "grad_norm": 1.318751573562622, - "learning_rate": 7.127135678391959e-05, - "loss": 5.6605, - "step": 29095 - }, - { - "epoch": 15.173924380704042, - "grad_norm": 1.4380578994750977, - "learning_rate": 7.127035175879397e-05, - "loss": 5.0638, - "step": 29096 - }, - { - "epoch": 15.174445893089962, - "grad_norm": 1.4950019121170044, - "learning_rate": 7.126934673366835e-05, - "loss": 5.4821, - "step": 29097 - }, - { - "epoch": 15.17496740547588, - "grad_norm": 1.3410086631774902, - "learning_rate": 7.126834170854272e-05, - "loss": 4.8569, - "step": 29098 - }, - { - "epoch": 15.175488917861799, - "grad_norm": 1.5146808624267578, - "learning_rate": 7.126733668341709e-05, - "loss": 4.8845, - "step": 29099 - }, - { - "epoch": 15.176010430247718, - "grad_norm": 1.5112154483795166, - "learning_rate": 7.126633165829147e-05, - "loss": 5.1368, - "step": 29100 - }, - { - "epoch": 15.176531942633638, - "grad_norm": 1.5761516094207764, - "learning_rate": 7.126532663316583e-05, - "loss": 5.2325, - "step": 29101 - }, - { - "epoch": 15.177053455019557, - "grad_norm": 1.6685019731521606, - "learning_rate": 7.126432160804021e-05, - "loss": 5.3374, - "step": 29102 - }, - { - "epoch": 15.177574967405477, - "grad_norm": 1.5778214931488037, - "learning_rate": 7.126331658291457e-05, - "loss": 4.7999, - "step": 29103 - }, - { - "epoch": 15.178096479791394, - "grad_norm": 1.432312250137329, - "learning_rate": 7.126231155778895e-05, - "loss": 5.5594, - "step": 29104 - }, - { - "epoch": 15.178617992177314, - "grad_norm": 1.4785010814666748, - "learning_rate": 7.126130653266331e-05, - "loss": 5.3495, - "step": 29105 - }, - { - "epoch": 15.179139504563233, - "grad_norm": 1.4270398616790771, - "learning_rate": 7.126030150753769e-05, - "loss": 5.4846, - "step": 29106 - }, - { - "epoch": 15.179661016949153, - "grad_norm": 1.5278854370117188, - "learning_rate": 7.125929648241207e-05, - "loss": 5.6027, - "step": 29107 - }, - { - "epoch": 15.180182529335072, - "grad_norm": 1.5614577531814575, - "learning_rate": 7.125829145728643e-05, - "loss": 5.1034, - "step": 29108 - }, - { - "epoch": 15.180704041720992, - "grad_norm": 1.4690035581588745, - "learning_rate": 7.125728643216081e-05, - "loss": 5.6178, - "step": 29109 - }, - { - "epoch": 15.18122555410691, - "grad_norm": 1.5037943124771118, - "learning_rate": 7.125628140703518e-05, - "loss": 5.2741, - "step": 29110 - }, - { - "epoch": 15.181747066492829, - "grad_norm": 1.442018747329712, - "learning_rate": 7.125527638190955e-05, - "loss": 5.457, - "step": 29111 - }, - { - "epoch": 15.182268578878748, - "grad_norm": 1.564052700996399, - "learning_rate": 7.125427135678392e-05, - "loss": 5.3729, - "step": 29112 - }, - { - "epoch": 15.182790091264668, - "grad_norm": 1.441823959350586, - "learning_rate": 7.12532663316583e-05, - "loss": 5.2737, - "step": 29113 - }, - { - "epoch": 15.183311603650587, - "grad_norm": 1.4704399108886719, - "learning_rate": 7.125226130653266e-05, - "loss": 5.7685, - "step": 29114 - }, - { - "epoch": 15.183833116036507, - "grad_norm": 1.4440233707427979, - "learning_rate": 7.125125628140704e-05, - "loss": 5.434, - "step": 29115 - }, - { - "epoch": 15.184354628422424, - "grad_norm": 1.4333513975143433, - "learning_rate": 7.12502512562814e-05, - "loss": 5.059, - "step": 29116 - }, - { - "epoch": 15.184876140808344, - "grad_norm": 1.591092824935913, - "learning_rate": 7.124924623115578e-05, - "loss": 5.1417, - "step": 29117 - }, - { - "epoch": 15.185397653194263, - "grad_norm": 1.4443446397781372, - "learning_rate": 7.124824120603016e-05, - "loss": 5.7497, - "step": 29118 - }, - { - "epoch": 15.185919165580183, - "grad_norm": 1.513028621673584, - "learning_rate": 7.124723618090454e-05, - "loss": 5.5676, - "step": 29119 - }, - { - "epoch": 15.186440677966102, - "grad_norm": 1.3937820196151733, - "learning_rate": 7.12462311557789e-05, - "loss": 5.322, - "step": 29120 - }, - { - "epoch": 15.186962190352022, - "grad_norm": 1.438401699066162, - "learning_rate": 7.124522613065326e-05, - "loss": 5.6547, - "step": 29121 - }, - { - "epoch": 15.18748370273794, - "grad_norm": 1.4555140733718872, - "learning_rate": 7.124422110552764e-05, - "loss": 5.2826, - "step": 29122 - }, - { - "epoch": 15.188005215123859, - "grad_norm": 1.4398791790008545, - "learning_rate": 7.124321608040201e-05, - "loss": 5.7094, - "step": 29123 - }, - { - "epoch": 15.188526727509778, - "grad_norm": 1.4832066297531128, - "learning_rate": 7.124221105527638e-05, - "loss": 5.1163, - "step": 29124 - }, - { - "epoch": 15.189048239895698, - "grad_norm": 1.4738657474517822, - "learning_rate": 7.124120603015075e-05, - "loss": 5.6767, - "step": 29125 - }, - { - "epoch": 15.189569752281617, - "grad_norm": 1.456805944442749, - "learning_rate": 7.124020100502513e-05, - "loss": 4.9472, - "step": 29126 - }, - { - "epoch": 15.190091264667537, - "grad_norm": 1.5146116018295288, - "learning_rate": 7.12391959798995e-05, - "loss": 5.1556, - "step": 29127 - }, - { - "epoch": 15.190612777053454, - "grad_norm": 1.4952772855758667, - "learning_rate": 7.123819095477388e-05, - "loss": 4.9264, - "step": 29128 - }, - { - "epoch": 15.191134289439374, - "grad_norm": 1.5599579811096191, - "learning_rate": 7.123718592964825e-05, - "loss": 5.2785, - "step": 29129 - }, - { - "epoch": 15.191655801825293, - "grad_norm": 1.5764679908752441, - "learning_rate": 7.123618090452262e-05, - "loss": 5.3397, - "step": 29130 - }, - { - "epoch": 15.192177314211213, - "grad_norm": 1.3770923614501953, - "learning_rate": 7.123517587939699e-05, - "loss": 5.4137, - "step": 29131 - }, - { - "epoch": 15.192698826597132, - "grad_norm": 1.5447611808776855, - "learning_rate": 7.123417085427137e-05, - "loss": 5.1089, - "step": 29132 - }, - { - "epoch": 15.193220338983052, - "grad_norm": 1.4251041412353516, - "learning_rate": 7.123316582914573e-05, - "loss": 5.6005, - "step": 29133 - }, - { - "epoch": 15.19374185136897, - "grad_norm": 1.4695042371749878, - "learning_rate": 7.123216080402011e-05, - "loss": 5.5516, - "step": 29134 - }, - { - "epoch": 15.194263363754889, - "grad_norm": 1.5098638534545898, - "learning_rate": 7.123115577889447e-05, - "loss": 5.1196, - "step": 29135 - }, - { - "epoch": 15.194784876140808, - "grad_norm": 1.5099310874938965, - "learning_rate": 7.123015075376884e-05, - "loss": 5.3962, - "step": 29136 - }, - { - "epoch": 15.195306388526728, - "grad_norm": 1.5616341829299927, - "learning_rate": 7.122914572864322e-05, - "loss": 5.1171, - "step": 29137 - }, - { - "epoch": 15.195827900912647, - "grad_norm": 1.4255425930023193, - "learning_rate": 7.122814070351759e-05, - "loss": 5.5468, - "step": 29138 - }, - { - "epoch": 15.196349413298567, - "grad_norm": 1.5018121004104614, - "learning_rate": 7.122713567839197e-05, - "loss": 4.8438, - "step": 29139 - }, - { - "epoch": 15.196870925684484, - "grad_norm": 1.408457636833191, - "learning_rate": 7.122613065326634e-05, - "loss": 5.7668, - "step": 29140 - }, - { - "epoch": 15.197392438070404, - "grad_norm": 1.4518191814422607, - "learning_rate": 7.122512562814071e-05, - "loss": 5.2227, - "step": 29141 - }, - { - "epoch": 15.197913950456323, - "grad_norm": 1.388339877128601, - "learning_rate": 7.122412060301508e-05, - "loss": 4.5772, - "step": 29142 - }, - { - "epoch": 15.198435462842243, - "grad_norm": 1.5343230962753296, - "learning_rate": 7.122311557788945e-05, - "loss": 5.3298, - "step": 29143 - }, - { - "epoch": 15.198956975228162, - "grad_norm": 1.4954180717468262, - "learning_rate": 7.122211055276382e-05, - "loss": 5.4006, - "step": 29144 - }, - { - "epoch": 15.199478487614082, - "grad_norm": 1.4627164602279663, - "learning_rate": 7.12211055276382e-05, - "loss": 5.3932, - "step": 29145 - }, - { - "epoch": 15.2, - "grad_norm": 1.4242260456085205, - "learning_rate": 7.122010050251256e-05, - "loss": 5.4733, - "step": 29146 - }, - { - "epoch": 15.200521512385919, - "grad_norm": 1.5321979522705078, - "learning_rate": 7.121909547738694e-05, - "loss": 4.9814, - "step": 29147 - }, - { - "epoch": 15.201043024771838, - "grad_norm": 1.4426547288894653, - "learning_rate": 7.12180904522613e-05, - "loss": 5.42, - "step": 29148 - }, - { - "epoch": 15.201564537157758, - "grad_norm": 1.4291707277297974, - "learning_rate": 7.121708542713568e-05, - "loss": 5.4717, - "step": 29149 - }, - { - "epoch": 15.202086049543677, - "grad_norm": 1.4988031387329102, - "learning_rate": 7.121608040201006e-05, - "loss": 4.8904, - "step": 29150 - }, - { - "epoch": 15.202607561929597, - "grad_norm": 1.3476476669311523, - "learning_rate": 7.121507537688442e-05, - "loss": 5.7488, - "step": 29151 - }, - { - "epoch": 15.203129074315514, - "grad_norm": 1.4425764083862305, - "learning_rate": 7.12140703517588e-05, - "loss": 5.3255, - "step": 29152 - }, - { - "epoch": 15.203650586701434, - "grad_norm": 1.4146543741226196, - "learning_rate": 7.121306532663317e-05, - "loss": 5.4096, - "step": 29153 - }, - { - "epoch": 15.204172099087353, - "grad_norm": 1.4881154298782349, - "learning_rate": 7.121206030150754e-05, - "loss": 5.3535, - "step": 29154 - }, - { - "epoch": 15.204693611473273, - "grad_norm": 1.367303490638733, - "learning_rate": 7.121105527638191e-05, - "loss": 5.5083, - "step": 29155 - }, - { - "epoch": 15.205215123859192, - "grad_norm": 1.4239705801010132, - "learning_rate": 7.121005025125629e-05, - "loss": 5.645, - "step": 29156 - }, - { - "epoch": 15.205736636245112, - "grad_norm": 1.4227157831192017, - "learning_rate": 7.120904522613065e-05, - "loss": 5.5217, - "step": 29157 - }, - { - "epoch": 15.20625814863103, - "grad_norm": 1.5438764095306396, - "learning_rate": 7.120804020100503e-05, - "loss": 5.3585, - "step": 29158 - }, - { - "epoch": 15.206779661016949, - "grad_norm": 1.4319339990615845, - "learning_rate": 7.12070351758794e-05, - "loss": 5.2934, - "step": 29159 - }, - { - "epoch": 15.207301173402868, - "grad_norm": 1.4700630903244019, - "learning_rate": 7.120603015075378e-05, - "loss": 5.3573, - "step": 29160 - }, - { - "epoch": 15.207822685788788, - "grad_norm": 1.4925076961517334, - "learning_rate": 7.120502512562815e-05, - "loss": 4.9017, - "step": 29161 - }, - { - "epoch": 15.208344198174707, - "grad_norm": 1.3639377355575562, - "learning_rate": 7.120402010050251e-05, - "loss": 5.325, - "step": 29162 - }, - { - "epoch": 15.208865710560627, - "grad_norm": 1.4609572887420654, - "learning_rate": 7.120301507537689e-05, - "loss": 5.4757, - "step": 29163 - }, - { - "epoch": 15.209387222946544, - "grad_norm": 1.4827523231506348, - "learning_rate": 7.120201005025125e-05, - "loss": 5.4588, - "step": 29164 - }, - { - "epoch": 15.209908735332464, - "grad_norm": 1.422369122505188, - "learning_rate": 7.120100502512563e-05, - "loss": 5.6691, - "step": 29165 - }, - { - "epoch": 15.210430247718383, - "grad_norm": 1.429643988609314, - "learning_rate": 7.12e-05, - "loss": 5.0762, - "step": 29166 - }, - { - "epoch": 15.210951760104303, - "grad_norm": 1.5050907135009766, - "learning_rate": 7.119899497487437e-05, - "loss": 5.1685, - "step": 29167 - }, - { - "epoch": 15.211473272490222, - "grad_norm": 1.6214591264724731, - "learning_rate": 7.119798994974874e-05, - "loss": 5.2729, - "step": 29168 - }, - { - "epoch": 15.211994784876142, - "grad_norm": 1.498030185699463, - "learning_rate": 7.119698492462312e-05, - "loss": 5.361, - "step": 29169 - }, - { - "epoch": 15.21251629726206, - "grad_norm": 1.5234391689300537, - "learning_rate": 7.11959798994975e-05, - "loss": 5.6009, - "step": 29170 - }, - { - "epoch": 15.213037809647979, - "grad_norm": 1.3983094692230225, - "learning_rate": 7.119497487437187e-05, - "loss": 5.6097, - "step": 29171 - }, - { - "epoch": 15.213559322033898, - "grad_norm": 1.4478343725204468, - "learning_rate": 7.119396984924624e-05, - "loss": 4.8635, - "step": 29172 - }, - { - "epoch": 15.214080834419818, - "grad_norm": 1.500260829925537, - "learning_rate": 7.119296482412061e-05, - "loss": 5.0577, - "step": 29173 - }, - { - "epoch": 15.214602346805737, - "grad_norm": 1.316672921180725, - "learning_rate": 7.119195979899498e-05, - "loss": 5.6918, - "step": 29174 - }, - { - "epoch": 15.215123859191657, - "grad_norm": 1.434207558631897, - "learning_rate": 7.119095477386934e-05, - "loss": 5.4138, - "step": 29175 - }, - { - "epoch": 15.215645371577574, - "grad_norm": 1.4147169589996338, - "learning_rate": 7.118994974874372e-05, - "loss": 5.4568, - "step": 29176 - }, - { - "epoch": 15.216166883963494, - "grad_norm": 1.4941564798355103, - "learning_rate": 7.118894472361808e-05, - "loss": 5.0491, - "step": 29177 - }, - { - "epoch": 15.216688396349413, - "grad_norm": 1.4595413208007812, - "learning_rate": 7.118793969849246e-05, - "loss": 5.522, - "step": 29178 - }, - { - "epoch": 15.217209908735333, - "grad_norm": 1.4628609418869019, - "learning_rate": 7.118693467336684e-05, - "loss": 5.6684, - "step": 29179 - }, - { - "epoch": 15.217731421121252, - "grad_norm": 1.4377925395965576, - "learning_rate": 7.118592964824122e-05, - "loss": 5.4969, - "step": 29180 - }, - { - "epoch": 15.21825293350717, - "grad_norm": 1.4442487955093384, - "learning_rate": 7.118492462311558e-05, - "loss": 5.7352, - "step": 29181 - }, - { - "epoch": 15.21877444589309, - "grad_norm": 1.4414522647857666, - "learning_rate": 7.118391959798996e-05, - "loss": 5.7265, - "step": 29182 - }, - { - "epoch": 15.219295958279009, - "grad_norm": 1.4220292568206787, - "learning_rate": 7.118291457286432e-05, - "loss": 5.5706, - "step": 29183 - }, - { - "epoch": 15.219817470664928, - "grad_norm": 1.412300944328308, - "learning_rate": 7.11819095477387e-05, - "loss": 5.7376, - "step": 29184 - }, - { - "epoch": 15.220338983050848, - "grad_norm": 1.3802800178527832, - "learning_rate": 7.118090452261307e-05, - "loss": 5.8763, - "step": 29185 - }, - { - "epoch": 15.220860495436767, - "grad_norm": 1.5501171350479126, - "learning_rate": 7.117989949748744e-05, - "loss": 5.3293, - "step": 29186 - }, - { - "epoch": 15.221382007822687, - "grad_norm": 1.4845623970031738, - "learning_rate": 7.117889447236181e-05, - "loss": 5.6513, - "step": 29187 - }, - { - "epoch": 15.221903520208604, - "grad_norm": 1.5787614583969116, - "learning_rate": 7.117788944723617e-05, - "loss": 5.0471, - "step": 29188 - }, - { - "epoch": 15.222425032594524, - "grad_norm": 1.7037742137908936, - "learning_rate": 7.117688442211055e-05, - "loss": 4.5109, - "step": 29189 - }, - { - "epoch": 15.222946544980443, - "grad_norm": 1.389772891998291, - "learning_rate": 7.117587939698493e-05, - "loss": 5.4648, - "step": 29190 - }, - { - "epoch": 15.223468057366363, - "grad_norm": 1.3605092763900757, - "learning_rate": 7.11748743718593e-05, - "loss": 5.6748, - "step": 29191 - }, - { - "epoch": 15.223989569752282, - "grad_norm": 1.3752412796020508, - "learning_rate": 7.117386934673367e-05, - "loss": 5.1244, - "step": 29192 - }, - { - "epoch": 15.2245110821382, - "grad_norm": 1.431384801864624, - "learning_rate": 7.117286432160805e-05, - "loss": 5.4054, - "step": 29193 - }, - { - "epoch": 15.22503259452412, - "grad_norm": 1.4368969202041626, - "learning_rate": 7.117185929648241e-05, - "loss": 5.6442, - "step": 29194 - }, - { - "epoch": 15.225554106910039, - "grad_norm": 1.44606351852417, - "learning_rate": 7.117085427135679e-05, - "loss": 5.4648, - "step": 29195 - }, - { - "epoch": 15.226075619295958, - "grad_norm": 1.4192358255386353, - "learning_rate": 7.116984924623115e-05, - "loss": 4.9874, - "step": 29196 - }, - { - "epoch": 15.226597131681878, - "grad_norm": 1.4571563005447388, - "learning_rate": 7.116884422110553e-05, - "loss": 5.3315, - "step": 29197 - }, - { - "epoch": 15.227118644067797, - "grad_norm": 1.5193946361541748, - "learning_rate": 7.11678391959799e-05, - "loss": 5.375, - "step": 29198 - }, - { - "epoch": 15.227640156453715, - "grad_norm": 1.4320123195648193, - "learning_rate": 7.116683417085427e-05, - "loss": 5.4761, - "step": 29199 - }, - { - "epoch": 15.228161668839634, - "grad_norm": 1.718379020690918, - "learning_rate": 7.116582914572865e-05, - "loss": 4.9132, - "step": 29200 - }, - { - "epoch": 15.228683181225554, - "grad_norm": 1.420807123184204, - "learning_rate": 7.116482412060302e-05, - "loss": 5.6224, - "step": 29201 - }, - { - "epoch": 15.229204693611473, - "grad_norm": 1.4570138454437256, - "learning_rate": 7.11638190954774e-05, - "loss": 5.3667, - "step": 29202 - }, - { - "epoch": 15.229726205997393, - "grad_norm": 1.5001001358032227, - "learning_rate": 7.116281407035176e-05, - "loss": 5.2659, - "step": 29203 - }, - { - "epoch": 15.230247718383312, - "grad_norm": 1.5709587335586548, - "learning_rate": 7.116180904522614e-05, - "loss": 5.0525, - "step": 29204 - }, - { - "epoch": 15.23076923076923, - "grad_norm": 1.4856010675430298, - "learning_rate": 7.11608040201005e-05, - "loss": 5.3558, - "step": 29205 - }, - { - "epoch": 15.23129074315515, - "grad_norm": 1.4104492664337158, - "learning_rate": 7.115979899497488e-05, - "loss": 5.4161, - "step": 29206 - }, - { - "epoch": 15.231812255541069, - "grad_norm": 1.4696192741394043, - "learning_rate": 7.115879396984924e-05, - "loss": 5.4708, - "step": 29207 - }, - { - "epoch": 15.232333767926988, - "grad_norm": 1.5627691745758057, - "learning_rate": 7.115778894472362e-05, - "loss": 4.8357, - "step": 29208 - }, - { - "epoch": 15.232855280312908, - "grad_norm": 1.4541746377944946, - "learning_rate": 7.115678391959799e-05, - "loss": 5.8107, - "step": 29209 - }, - { - "epoch": 15.233376792698827, - "grad_norm": 1.5496139526367188, - "learning_rate": 7.115577889447236e-05, - "loss": 5.2817, - "step": 29210 - }, - { - "epoch": 15.233898305084745, - "grad_norm": 1.4731202125549316, - "learning_rate": 7.115477386934674e-05, - "loss": 4.6094, - "step": 29211 - }, - { - "epoch": 15.234419817470664, - "grad_norm": 1.4015226364135742, - "learning_rate": 7.115376884422112e-05, - "loss": 5.4218, - "step": 29212 - }, - { - "epoch": 15.234941329856584, - "grad_norm": 1.5217863321304321, - "learning_rate": 7.115276381909548e-05, - "loss": 5.0055, - "step": 29213 - }, - { - "epoch": 15.235462842242503, - "grad_norm": 1.5536103248596191, - "learning_rate": 7.115175879396985e-05, - "loss": 5.6109, - "step": 29214 - }, - { - "epoch": 15.235984354628423, - "grad_norm": 1.4529712200164795, - "learning_rate": 7.115075376884422e-05, - "loss": 5.1843, - "step": 29215 - }, - { - "epoch": 15.236505867014342, - "grad_norm": 1.4078140258789062, - "learning_rate": 7.114974874371859e-05, - "loss": 5.3024, - "step": 29216 - }, - { - "epoch": 15.23702737940026, - "grad_norm": 1.6242976188659668, - "learning_rate": 7.114874371859297e-05, - "loss": 4.9754, - "step": 29217 - }, - { - "epoch": 15.23754889178618, - "grad_norm": 1.4981985092163086, - "learning_rate": 7.114773869346733e-05, - "loss": 5.41, - "step": 29218 - }, - { - "epoch": 15.238070404172099, - "grad_norm": 1.4471553564071655, - "learning_rate": 7.114673366834171e-05, - "loss": 5.5814, - "step": 29219 - }, - { - "epoch": 15.238591916558018, - "grad_norm": 1.4787113666534424, - "learning_rate": 7.114572864321609e-05, - "loss": 5.4081, - "step": 29220 - }, - { - "epoch": 15.239113428943938, - "grad_norm": 1.5602024793624878, - "learning_rate": 7.114472361809046e-05, - "loss": 5.5021, - "step": 29221 - }, - { - "epoch": 15.239634941329857, - "grad_norm": 1.4478280544281006, - "learning_rate": 7.114371859296483e-05, - "loss": 5.3845, - "step": 29222 - }, - { - "epoch": 15.240156453715775, - "grad_norm": 1.4182497262954712, - "learning_rate": 7.114271356783921e-05, - "loss": 5.3386, - "step": 29223 - }, - { - "epoch": 15.240677966101694, - "grad_norm": 1.5345991849899292, - "learning_rate": 7.114170854271357e-05, - "loss": 5.0601, - "step": 29224 - }, - { - "epoch": 15.241199478487614, - "grad_norm": 1.5500789880752563, - "learning_rate": 7.114070351758795e-05, - "loss": 5.3619, - "step": 29225 - }, - { - "epoch": 15.241720990873533, - "grad_norm": 1.6311488151550293, - "learning_rate": 7.113969849246231e-05, - "loss": 4.9737, - "step": 29226 - }, - { - "epoch": 15.242242503259453, - "grad_norm": 1.5715339183807373, - "learning_rate": 7.113869346733669e-05, - "loss": 5.4178, - "step": 29227 - }, - { - "epoch": 15.242764015645372, - "grad_norm": 1.4906986951828003, - "learning_rate": 7.113768844221106e-05, - "loss": 5.7101, - "step": 29228 - }, - { - "epoch": 15.24328552803129, - "grad_norm": 1.519370436668396, - "learning_rate": 7.113668341708542e-05, - "loss": 5.2404, - "step": 29229 - }, - { - "epoch": 15.24380704041721, - "grad_norm": 1.4089536666870117, - "learning_rate": 7.11356783919598e-05, - "loss": 5.6062, - "step": 29230 - }, - { - "epoch": 15.244328552803129, - "grad_norm": 1.4868764877319336, - "learning_rate": 7.113467336683418e-05, - "loss": 5.1736, - "step": 29231 - }, - { - "epoch": 15.244850065189048, - "grad_norm": 1.467621922492981, - "learning_rate": 7.113366834170855e-05, - "loss": 5.2968, - "step": 29232 - }, - { - "epoch": 15.245371577574968, - "grad_norm": 1.4765105247497559, - "learning_rate": 7.113266331658292e-05, - "loss": 5.4868, - "step": 29233 - }, - { - "epoch": 15.245893089960887, - "grad_norm": 1.5225013494491577, - "learning_rate": 7.11316582914573e-05, - "loss": 5.211, - "step": 29234 - }, - { - "epoch": 15.246414602346805, - "grad_norm": 1.4680237770080566, - "learning_rate": 7.113065326633166e-05, - "loss": 5.5159, - "step": 29235 - }, - { - "epoch": 15.246936114732724, - "grad_norm": 1.4028443098068237, - "learning_rate": 7.112964824120604e-05, - "loss": 4.6197, - "step": 29236 - }, - { - "epoch": 15.247457627118644, - "grad_norm": 1.474017858505249, - "learning_rate": 7.11286432160804e-05, - "loss": 5.0051, - "step": 29237 - }, - { - "epoch": 15.247979139504563, - "grad_norm": 1.508884310722351, - "learning_rate": 7.112763819095478e-05, - "loss": 4.9394, - "step": 29238 - }, - { - "epoch": 15.248500651890483, - "grad_norm": 1.6887837648391724, - "learning_rate": 7.112663316582914e-05, - "loss": 5.036, - "step": 29239 - }, - { - "epoch": 15.249022164276402, - "grad_norm": 1.6257001161575317, - "learning_rate": 7.112562814070352e-05, - "loss": 5.1522, - "step": 29240 - }, - { - "epoch": 15.24954367666232, - "grad_norm": 1.3846627473831177, - "learning_rate": 7.11246231155779e-05, - "loss": 5.5358, - "step": 29241 - }, - { - "epoch": 15.25006518904824, - "grad_norm": 1.537520408630371, - "learning_rate": 7.112361809045226e-05, - "loss": 5.3058, - "step": 29242 - }, - { - "epoch": 15.250586701434159, - "grad_norm": 1.5530458688735962, - "learning_rate": 7.112261306532664e-05, - "loss": 5.4403, - "step": 29243 - }, - { - "epoch": 15.251108213820078, - "grad_norm": 1.5129646062850952, - "learning_rate": 7.1121608040201e-05, - "loss": 5.1782, - "step": 29244 - }, - { - "epoch": 15.251629726205998, - "grad_norm": 1.4050081968307495, - "learning_rate": 7.112060301507538e-05, - "loss": 5.3821, - "step": 29245 - }, - { - "epoch": 15.252151238591917, - "grad_norm": 1.4168224334716797, - "learning_rate": 7.111959798994975e-05, - "loss": 5.7571, - "step": 29246 - }, - { - "epoch": 15.252672750977835, - "grad_norm": 1.354550838470459, - "learning_rate": 7.111859296482413e-05, - "loss": 5.594, - "step": 29247 - }, - { - "epoch": 15.253194263363755, - "grad_norm": 1.5746275186538696, - "learning_rate": 7.111758793969849e-05, - "loss": 5.1267, - "step": 29248 - }, - { - "epoch": 15.253715775749674, - "grad_norm": 1.4472053050994873, - "learning_rate": 7.111658291457287e-05, - "loss": 5.2917, - "step": 29249 - }, - { - "epoch": 15.254237288135593, - "grad_norm": 1.4611178636550903, - "learning_rate": 7.111557788944723e-05, - "loss": 5.4026, - "step": 29250 - }, - { - "epoch": 15.254758800521513, - "grad_norm": 1.3689262866973877, - "learning_rate": 7.111457286432161e-05, - "loss": 5.6506, - "step": 29251 - }, - { - "epoch": 15.255280312907432, - "grad_norm": 1.4092766046524048, - "learning_rate": 7.111356783919599e-05, - "loss": 5.2928, - "step": 29252 - }, - { - "epoch": 15.25580182529335, - "grad_norm": 1.5629675388336182, - "learning_rate": 7.111256281407037e-05, - "loss": 5.4194, - "step": 29253 - }, - { - "epoch": 15.25632333767927, - "grad_norm": 1.394439697265625, - "learning_rate": 7.111155778894473e-05, - "loss": 5.652, - "step": 29254 - }, - { - "epoch": 15.256844850065189, - "grad_norm": 1.447618007659912, - "learning_rate": 7.11105527638191e-05, - "loss": 4.6829, - "step": 29255 - }, - { - "epoch": 15.257366362451108, - "grad_norm": 1.419729232788086, - "learning_rate": 7.110954773869347e-05, - "loss": 5.4535, - "step": 29256 - }, - { - "epoch": 15.257887874837028, - "grad_norm": 1.558736801147461, - "learning_rate": 7.110854271356784e-05, - "loss": 5.2032, - "step": 29257 - }, - { - "epoch": 15.258409387222947, - "grad_norm": 1.50729238986969, - "learning_rate": 7.110753768844221e-05, - "loss": 5.5548, - "step": 29258 - }, - { - "epoch": 15.258930899608865, - "grad_norm": 1.5075544118881226, - "learning_rate": 7.110653266331658e-05, - "loss": 5.1194, - "step": 29259 - }, - { - "epoch": 15.259452411994785, - "grad_norm": 3.02138614654541, - "learning_rate": 7.110552763819096e-05, - "loss": 5.3687, - "step": 29260 - }, - { - "epoch": 15.259973924380704, - "grad_norm": 1.3951187133789062, - "learning_rate": 7.110452261306533e-05, - "loss": 5.686, - "step": 29261 - }, - { - "epoch": 15.260495436766623, - "grad_norm": 1.5155194997787476, - "learning_rate": 7.110351758793971e-05, - "loss": 5.1391, - "step": 29262 - }, - { - "epoch": 15.261016949152543, - "grad_norm": 1.5972416400909424, - "learning_rate": 7.110251256281408e-05, - "loss": 5.3235, - "step": 29263 - }, - { - "epoch": 15.261538461538462, - "grad_norm": 1.3795753717422485, - "learning_rate": 7.110150753768845e-05, - "loss": 5.4485, - "step": 29264 - }, - { - "epoch": 15.26205997392438, - "grad_norm": 1.5859390497207642, - "learning_rate": 7.110050251256282e-05, - "loss": 5.115, - "step": 29265 - }, - { - "epoch": 15.2625814863103, - "grad_norm": 1.583330512046814, - "learning_rate": 7.10994974874372e-05, - "loss": 5.5108, - "step": 29266 - }, - { - "epoch": 15.263102998696219, - "grad_norm": 1.4834638833999634, - "learning_rate": 7.109849246231156e-05, - "loss": 5.269, - "step": 29267 - }, - { - "epoch": 15.263624511082138, - "grad_norm": 1.4970821142196655, - "learning_rate": 7.109748743718592e-05, - "loss": 5.5431, - "step": 29268 - }, - { - "epoch": 15.264146023468058, - "grad_norm": 1.4608657360076904, - "learning_rate": 7.10964824120603e-05, - "loss": 5.5366, - "step": 29269 - }, - { - "epoch": 15.264667535853977, - "grad_norm": 1.7062788009643555, - "learning_rate": 7.109547738693467e-05, - "loss": 5.2902, - "step": 29270 - }, - { - "epoch": 15.265189048239895, - "grad_norm": 1.5142731666564941, - "learning_rate": 7.109447236180904e-05, - "loss": 5.138, - "step": 29271 - }, - { - "epoch": 15.265710560625815, - "grad_norm": 1.6138337850570679, - "learning_rate": 7.109346733668342e-05, - "loss": 4.7861, - "step": 29272 - }, - { - "epoch": 15.266232073011734, - "grad_norm": 1.5144842863082886, - "learning_rate": 7.10924623115578e-05, - "loss": 5.0427, - "step": 29273 - }, - { - "epoch": 15.266753585397653, - "grad_norm": 1.430625081062317, - "learning_rate": 7.109145728643216e-05, - "loss": 5.4273, - "step": 29274 - }, - { - "epoch": 15.267275097783573, - "grad_norm": 1.5087437629699707, - "learning_rate": 7.109045226130654e-05, - "loss": 5.2919, - "step": 29275 - }, - { - "epoch": 15.26779661016949, - "grad_norm": 1.497135877609253, - "learning_rate": 7.10894472361809e-05, - "loss": 5.5167, - "step": 29276 - }, - { - "epoch": 15.26831812255541, - "grad_norm": 1.492318868637085, - "learning_rate": 7.108844221105528e-05, - "loss": 5.2947, - "step": 29277 - }, - { - "epoch": 15.26883963494133, - "grad_norm": 1.5044126510620117, - "learning_rate": 7.108743718592965e-05, - "loss": 5.0413, - "step": 29278 - }, - { - "epoch": 15.269361147327249, - "grad_norm": 1.4316357374191284, - "learning_rate": 7.108643216080403e-05, - "loss": 5.6448, - "step": 29279 - }, - { - "epoch": 15.269882659713168, - "grad_norm": 1.4216103553771973, - "learning_rate": 7.108542713567839e-05, - "loss": 5.1982, - "step": 29280 - }, - { - "epoch": 15.270404172099088, - "grad_norm": 1.4883488416671753, - "learning_rate": 7.108442211055277e-05, - "loss": 5.1059, - "step": 29281 - }, - { - "epoch": 15.270925684485007, - "grad_norm": 1.5367505550384521, - "learning_rate": 7.108341708542715e-05, - "loss": 5.0148, - "step": 29282 - }, - { - "epoch": 15.271447196870925, - "grad_norm": 1.5230896472930908, - "learning_rate": 7.108241206030151e-05, - "loss": 5.0652, - "step": 29283 - }, - { - "epoch": 15.271968709256845, - "grad_norm": 1.63201904296875, - "learning_rate": 7.108140703517589e-05, - "loss": 4.5893, - "step": 29284 - }, - { - "epoch": 15.272490221642764, - "grad_norm": 1.490744948387146, - "learning_rate": 7.108040201005025e-05, - "loss": 5.7108, - "step": 29285 - }, - { - "epoch": 15.273011734028684, - "grad_norm": 1.468396782875061, - "learning_rate": 7.107939698492463e-05, - "loss": 5.4588, - "step": 29286 - }, - { - "epoch": 15.273533246414603, - "grad_norm": 1.4380784034729004, - "learning_rate": 7.1078391959799e-05, - "loss": 5.566, - "step": 29287 - }, - { - "epoch": 15.27405475880052, - "grad_norm": 1.6346771717071533, - "learning_rate": 7.107738693467337e-05, - "loss": 5.1915, - "step": 29288 - }, - { - "epoch": 15.27457627118644, - "grad_norm": 1.4833974838256836, - "learning_rate": 7.107638190954774e-05, - "loss": 5.3341, - "step": 29289 - }, - { - "epoch": 15.27509778357236, - "grad_norm": 1.5055322647094727, - "learning_rate": 7.107537688442211e-05, - "loss": 4.8328, - "step": 29290 - }, - { - "epoch": 15.275619295958279, - "grad_norm": 1.4856635332107544, - "learning_rate": 7.107437185929648e-05, - "loss": 5.3008, - "step": 29291 - }, - { - "epoch": 15.276140808344199, - "grad_norm": 1.5556002855300903, - "learning_rate": 7.107336683417086e-05, - "loss": 4.6245, - "step": 29292 - }, - { - "epoch": 15.276662320730118, - "grad_norm": 1.43301522731781, - "learning_rate": 7.107236180904523e-05, - "loss": 5.7796, - "step": 29293 - }, - { - "epoch": 15.277183833116036, - "grad_norm": 1.4609949588775635, - "learning_rate": 7.10713567839196e-05, - "loss": 5.5224, - "step": 29294 - }, - { - "epoch": 15.277705345501955, - "grad_norm": 1.6017494201660156, - "learning_rate": 7.107035175879398e-05, - "loss": 5.3137, - "step": 29295 - }, - { - "epoch": 15.278226857887875, - "grad_norm": 1.5692193508148193, - "learning_rate": 7.106934673366834e-05, - "loss": 5.5621, - "step": 29296 - }, - { - "epoch": 15.278748370273794, - "grad_norm": 1.3760876655578613, - "learning_rate": 7.106834170854272e-05, - "loss": 5.4452, - "step": 29297 - }, - { - "epoch": 15.279269882659714, - "grad_norm": 1.3698985576629639, - "learning_rate": 7.106733668341708e-05, - "loss": 5.0637, - "step": 29298 - }, - { - "epoch": 15.279791395045633, - "grad_norm": 1.5468356609344482, - "learning_rate": 7.106633165829146e-05, - "loss": 5.443, - "step": 29299 - }, - { - "epoch": 15.28031290743155, - "grad_norm": 1.4911890029907227, - "learning_rate": 7.106532663316583e-05, - "loss": 5.3503, - "step": 29300 - }, - { - "epoch": 15.28083441981747, - "grad_norm": 1.4646217823028564, - "learning_rate": 7.10643216080402e-05, - "loss": 5.1669, - "step": 29301 - }, - { - "epoch": 15.28135593220339, - "grad_norm": 1.485007643699646, - "learning_rate": 7.106331658291458e-05, - "loss": 5.436, - "step": 29302 - }, - { - "epoch": 15.281877444589309, - "grad_norm": 1.4798847436904907, - "learning_rate": 7.106231155778896e-05, - "loss": 5.4015, - "step": 29303 - }, - { - "epoch": 15.282398956975229, - "grad_norm": 1.4003758430480957, - "learning_rate": 7.106130653266332e-05, - "loss": 5.6618, - "step": 29304 - }, - { - "epoch": 15.282920469361148, - "grad_norm": 1.667283058166504, - "learning_rate": 7.10603015075377e-05, - "loss": 4.7938, - "step": 29305 - }, - { - "epoch": 15.283441981747066, - "grad_norm": 1.4326856136322021, - "learning_rate": 7.105929648241207e-05, - "loss": 5.4574, - "step": 29306 - }, - { - "epoch": 15.283963494132985, - "grad_norm": 1.439226508140564, - "learning_rate": 7.105829145728644e-05, - "loss": 5.1851, - "step": 29307 - }, - { - "epoch": 15.284485006518905, - "grad_norm": 1.36747407913208, - "learning_rate": 7.105728643216081e-05, - "loss": 5.6768, - "step": 29308 - }, - { - "epoch": 15.285006518904824, - "grad_norm": 1.4422301054000854, - "learning_rate": 7.105628140703517e-05, - "loss": 5.5489, - "step": 29309 - }, - { - "epoch": 15.285528031290744, - "grad_norm": 1.4019887447357178, - "learning_rate": 7.105527638190955e-05, - "loss": 5.667, - "step": 29310 - }, - { - "epoch": 15.286049543676663, - "grad_norm": 1.4363765716552734, - "learning_rate": 7.105427135678391e-05, - "loss": 5.3976, - "step": 29311 - }, - { - "epoch": 15.28657105606258, - "grad_norm": 1.501641035079956, - "learning_rate": 7.105326633165829e-05, - "loss": 5.1065, - "step": 29312 - }, - { - "epoch": 15.2870925684485, - "grad_norm": 1.4707731008529663, - "learning_rate": 7.105226130653267e-05, - "loss": 5.6657, - "step": 29313 - }, - { - "epoch": 15.28761408083442, - "grad_norm": 1.3933601379394531, - "learning_rate": 7.105125628140705e-05, - "loss": 5.389, - "step": 29314 - }, - { - "epoch": 15.288135593220339, - "grad_norm": 1.5441107749938965, - "learning_rate": 7.105025125628141e-05, - "loss": 5.1169, - "step": 29315 - }, - { - "epoch": 15.288657105606259, - "grad_norm": 1.4338984489440918, - "learning_rate": 7.104924623115579e-05, - "loss": 5.4118, - "step": 29316 - }, - { - "epoch": 15.289178617992178, - "grad_norm": 1.4471893310546875, - "learning_rate": 7.104824120603015e-05, - "loss": 5.1153, - "step": 29317 - }, - { - "epoch": 15.289700130378096, - "grad_norm": 1.4199351072311401, - "learning_rate": 7.104723618090453e-05, - "loss": 5.2912, - "step": 29318 - }, - { - "epoch": 15.290221642764015, - "grad_norm": 1.5104995965957642, - "learning_rate": 7.10462311557789e-05, - "loss": 5.3142, - "step": 29319 - }, - { - "epoch": 15.290743155149935, - "grad_norm": 1.546210765838623, - "learning_rate": 7.104522613065327e-05, - "loss": 5.4852, - "step": 29320 - }, - { - "epoch": 15.291264667535854, - "grad_norm": 1.4927351474761963, - "learning_rate": 7.104422110552764e-05, - "loss": 5.3964, - "step": 29321 - }, - { - "epoch": 15.291786179921774, - "grad_norm": 1.4840331077575684, - "learning_rate": 7.1043216080402e-05, - "loss": 5.2647, - "step": 29322 - }, - { - "epoch": 15.292307692307693, - "grad_norm": 1.4876492023468018, - "learning_rate": 7.104221105527638e-05, - "loss": 4.6135, - "step": 29323 - }, - { - "epoch": 15.29282920469361, - "grad_norm": 1.4561541080474854, - "learning_rate": 7.104120603015076e-05, - "loss": 4.9457, - "step": 29324 - }, - { - "epoch": 15.29335071707953, - "grad_norm": 1.3925868272781372, - "learning_rate": 7.104020100502514e-05, - "loss": 5.7239, - "step": 29325 - }, - { - "epoch": 15.29387222946545, - "grad_norm": 1.5435882806777954, - "learning_rate": 7.10391959798995e-05, - "loss": 5.6045, - "step": 29326 - }, - { - "epoch": 15.29439374185137, - "grad_norm": 1.5216354131698608, - "learning_rate": 7.103819095477388e-05, - "loss": 5.4493, - "step": 29327 - }, - { - "epoch": 15.294915254237289, - "grad_norm": 1.5252598524093628, - "learning_rate": 7.103718592964824e-05, - "loss": 5.538, - "step": 29328 - }, - { - "epoch": 15.295436766623208, - "grad_norm": 1.574095368385315, - "learning_rate": 7.103618090452262e-05, - "loss": 5.2045, - "step": 29329 - }, - { - "epoch": 15.295958279009126, - "grad_norm": 1.3456166982650757, - "learning_rate": 7.103517587939698e-05, - "loss": 5.6847, - "step": 29330 - }, - { - "epoch": 15.296479791395045, - "grad_norm": 1.518509030342102, - "learning_rate": 7.103417085427136e-05, - "loss": 5.47, - "step": 29331 - }, - { - "epoch": 15.297001303780965, - "grad_norm": 1.3422966003417969, - "learning_rate": 7.103316582914573e-05, - "loss": 5.8555, - "step": 29332 - }, - { - "epoch": 15.297522816166884, - "grad_norm": 1.498883605003357, - "learning_rate": 7.10321608040201e-05, - "loss": 5.6202, - "step": 29333 - }, - { - "epoch": 15.298044328552804, - "grad_norm": 1.4869309663772583, - "learning_rate": 7.103115577889448e-05, - "loss": 5.2633, - "step": 29334 - }, - { - "epoch": 15.298565840938723, - "grad_norm": 1.427056908607483, - "learning_rate": 7.103015075376885e-05, - "loss": 5.3598, - "step": 29335 - }, - { - "epoch": 15.29908735332464, - "grad_norm": 1.4353587627410889, - "learning_rate": 7.102914572864322e-05, - "loss": 4.7462, - "step": 29336 - }, - { - "epoch": 15.29960886571056, - "grad_norm": 1.4003654718399048, - "learning_rate": 7.102814070351759e-05, - "loss": 5.5436, - "step": 29337 - }, - { - "epoch": 15.30013037809648, - "grad_norm": 1.4537805318832397, - "learning_rate": 7.102713567839197e-05, - "loss": 5.4403, - "step": 29338 - }, - { - "epoch": 15.3006518904824, - "grad_norm": 1.5028003454208374, - "learning_rate": 7.102613065326633e-05, - "loss": 5.3887, - "step": 29339 - }, - { - "epoch": 15.301173402868319, - "grad_norm": 1.4674171209335327, - "learning_rate": 7.102512562814071e-05, - "loss": 5.395, - "step": 29340 - }, - { - "epoch": 15.301694915254238, - "grad_norm": 1.4670584201812744, - "learning_rate": 7.102412060301507e-05, - "loss": 5.1683, - "step": 29341 - }, - { - "epoch": 15.302216427640156, - "grad_norm": 1.4277158975601196, - "learning_rate": 7.102311557788945e-05, - "loss": 5.5403, - "step": 29342 - }, - { - "epoch": 15.302737940026075, - "grad_norm": 1.4988266229629517, - "learning_rate": 7.102211055276381e-05, - "loss": 4.6656, - "step": 29343 - }, - { - "epoch": 15.303259452411995, - "grad_norm": 1.4049707651138306, - "learning_rate": 7.102110552763819e-05, - "loss": 5.5765, - "step": 29344 - }, - { - "epoch": 15.303780964797914, - "grad_norm": 1.4259719848632812, - "learning_rate": 7.102010050251257e-05, - "loss": 5.631, - "step": 29345 - }, - { - "epoch": 15.304302477183834, - "grad_norm": 1.3259862661361694, - "learning_rate": 7.101909547738695e-05, - "loss": 5.6112, - "step": 29346 - }, - { - "epoch": 15.304823989569753, - "grad_norm": 1.312058448791504, - "learning_rate": 7.101809045226131e-05, - "loss": 5.6319, - "step": 29347 - }, - { - "epoch": 15.30534550195567, - "grad_norm": 1.5411399602890015, - "learning_rate": 7.101708542713568e-05, - "loss": 4.9062, - "step": 29348 - }, - { - "epoch": 15.30586701434159, - "grad_norm": 1.4422537088394165, - "learning_rate": 7.101608040201005e-05, - "loss": 5.0459, - "step": 29349 - }, - { - "epoch": 15.30638852672751, - "grad_norm": 1.5396050214767456, - "learning_rate": 7.101507537688442e-05, - "loss": 5.1656, - "step": 29350 - }, - { - "epoch": 15.30691003911343, - "grad_norm": 1.4763191938400269, - "learning_rate": 7.10140703517588e-05, - "loss": 5.4675, - "step": 29351 - }, - { - "epoch": 15.307431551499349, - "grad_norm": 1.3536689281463623, - "learning_rate": 7.101306532663316e-05, - "loss": 5.5382, - "step": 29352 - }, - { - "epoch": 15.307953063885268, - "grad_norm": 1.4902926683425903, - "learning_rate": 7.101206030150754e-05, - "loss": 5.4117, - "step": 29353 - }, - { - "epoch": 15.308474576271186, - "grad_norm": 1.519805669784546, - "learning_rate": 7.101105527638192e-05, - "loss": 5.5216, - "step": 29354 - }, - { - "epoch": 15.308996088657105, - "grad_norm": 1.430617094039917, - "learning_rate": 7.10100502512563e-05, - "loss": 5.5178, - "step": 29355 - }, - { - "epoch": 15.309517601043025, - "grad_norm": 1.474892020225525, - "learning_rate": 7.100904522613066e-05, - "loss": 5.4783, - "step": 29356 - }, - { - "epoch": 15.310039113428944, - "grad_norm": 1.5215966701507568, - "learning_rate": 7.100804020100504e-05, - "loss": 5.1554, - "step": 29357 - }, - { - "epoch": 15.310560625814864, - "grad_norm": 1.5608253479003906, - "learning_rate": 7.10070351758794e-05, - "loss": 4.9089, - "step": 29358 - }, - { - "epoch": 15.311082138200783, - "grad_norm": 1.5770255327224731, - "learning_rate": 7.100603015075378e-05, - "loss": 5.5241, - "step": 29359 - }, - { - "epoch": 15.3116036505867, - "grad_norm": 1.5551069974899292, - "learning_rate": 7.100502512562814e-05, - "loss": 5.5673, - "step": 29360 - }, - { - "epoch": 15.31212516297262, - "grad_norm": 1.4639376401901245, - "learning_rate": 7.100402010050251e-05, - "loss": 5.3974, - "step": 29361 - }, - { - "epoch": 15.31264667535854, - "grad_norm": 1.6201626062393188, - "learning_rate": 7.100301507537688e-05, - "loss": 4.8468, - "step": 29362 - }, - { - "epoch": 15.31316818774446, - "grad_norm": 1.4381189346313477, - "learning_rate": 7.100201005025125e-05, - "loss": 5.3553, - "step": 29363 - }, - { - "epoch": 15.313689700130379, - "grad_norm": 1.5057883262634277, - "learning_rate": 7.100100502512563e-05, - "loss": 5.2718, - "step": 29364 - }, - { - "epoch": 15.314211212516298, - "grad_norm": 1.3808417320251465, - "learning_rate": 7.1e-05, - "loss": 4.8633, - "step": 29365 - }, - { - "epoch": 15.314732724902216, - "grad_norm": 1.4758027791976929, - "learning_rate": 7.099899497487438e-05, - "loss": 5.5415, - "step": 29366 - }, - { - "epoch": 15.315254237288135, - "grad_norm": 1.6319524049758911, - "learning_rate": 7.099798994974875e-05, - "loss": 4.9194, - "step": 29367 - }, - { - "epoch": 15.315775749674055, - "grad_norm": 1.4908320903778076, - "learning_rate": 7.099698492462312e-05, - "loss": 5.2264, - "step": 29368 - }, - { - "epoch": 15.316297262059974, - "grad_norm": 1.5670061111450195, - "learning_rate": 7.099597989949749e-05, - "loss": 4.7307, - "step": 29369 - }, - { - "epoch": 15.316818774445894, - "grad_norm": 1.4453691244125366, - "learning_rate": 7.099497487437187e-05, - "loss": 5.5637, - "step": 29370 - }, - { - "epoch": 15.317340286831811, - "grad_norm": 1.3977118730545044, - "learning_rate": 7.099396984924623e-05, - "loss": 5.7228, - "step": 29371 - }, - { - "epoch": 15.31786179921773, - "grad_norm": 1.3598848581314087, - "learning_rate": 7.099296482412061e-05, - "loss": 5.5986, - "step": 29372 - }, - { - "epoch": 15.31838331160365, - "grad_norm": 1.4755065441131592, - "learning_rate": 7.099195979899497e-05, - "loss": 5.3857, - "step": 29373 - }, - { - "epoch": 15.31890482398957, - "grad_norm": 1.4938299655914307, - "learning_rate": 7.099095477386935e-05, - "loss": 5.4077, - "step": 29374 - }, - { - "epoch": 15.31942633637549, - "grad_norm": 1.4962185621261597, - "learning_rate": 7.098994974874373e-05, - "loss": 5.0862, - "step": 29375 - }, - { - "epoch": 15.319947848761409, - "grad_norm": 1.5540679693222046, - "learning_rate": 7.098894472361809e-05, - "loss": 5.2589, - "step": 29376 - }, - { - "epoch": 15.320469361147328, - "grad_norm": 1.4976190328598022, - "learning_rate": 7.098793969849247e-05, - "loss": 5.4112, - "step": 29377 - }, - { - "epoch": 15.320990873533246, - "grad_norm": 1.4402002096176147, - "learning_rate": 7.098693467336684e-05, - "loss": 5.5369, - "step": 29378 - }, - { - "epoch": 15.321512385919165, - "grad_norm": 1.4116286039352417, - "learning_rate": 7.098592964824121e-05, - "loss": 5.5559, - "step": 29379 - }, - { - "epoch": 15.322033898305085, - "grad_norm": 1.476027250289917, - "learning_rate": 7.098492462311558e-05, - "loss": 5.496, - "step": 29380 - }, - { - "epoch": 15.322555410691004, - "grad_norm": 1.592615008354187, - "learning_rate": 7.098391959798996e-05, - "loss": 5.5799, - "step": 29381 - }, - { - "epoch": 15.323076923076924, - "grad_norm": 1.4436957836151123, - "learning_rate": 7.098291457286432e-05, - "loss": 5.7652, - "step": 29382 - }, - { - "epoch": 15.323598435462841, - "grad_norm": 1.5761618614196777, - "learning_rate": 7.09819095477387e-05, - "loss": 5.1641, - "step": 29383 - }, - { - "epoch": 15.32411994784876, - "grad_norm": 1.5226973295211792, - "learning_rate": 7.098090452261306e-05, - "loss": 5.04, - "step": 29384 - }, - { - "epoch": 15.32464146023468, - "grad_norm": 1.4692763090133667, - "learning_rate": 7.097989949748744e-05, - "loss": 4.9619, - "step": 29385 - }, - { - "epoch": 15.3251629726206, - "grad_norm": 1.6267428398132324, - "learning_rate": 7.097889447236182e-05, - "loss": 4.735, - "step": 29386 - }, - { - "epoch": 15.32568448500652, - "grad_norm": 1.5578140020370483, - "learning_rate": 7.097788944723618e-05, - "loss": 4.8068, - "step": 29387 - }, - { - "epoch": 15.326205997392439, - "grad_norm": 1.556387186050415, - "learning_rate": 7.097688442211056e-05, - "loss": 5.2255, - "step": 29388 - }, - { - "epoch": 15.326727509778356, - "grad_norm": 1.435271143913269, - "learning_rate": 7.097587939698492e-05, - "loss": 5.5468, - "step": 29389 - }, - { - "epoch": 15.327249022164276, - "grad_norm": 1.4457241296768188, - "learning_rate": 7.09748743718593e-05, - "loss": 4.9683, - "step": 29390 - }, - { - "epoch": 15.327770534550195, - "grad_norm": 1.4462552070617676, - "learning_rate": 7.097386934673367e-05, - "loss": 5.5302, - "step": 29391 - }, - { - "epoch": 15.328292046936115, - "grad_norm": 1.515506386756897, - "learning_rate": 7.097286432160804e-05, - "loss": 5.4462, - "step": 29392 - }, - { - "epoch": 15.328813559322034, - "grad_norm": 1.5399415493011475, - "learning_rate": 7.097185929648241e-05, - "loss": 5.257, - "step": 29393 - }, - { - "epoch": 15.329335071707954, - "grad_norm": 1.425667405128479, - "learning_rate": 7.097085427135679e-05, - "loss": 5.3426, - "step": 29394 - }, - { - "epoch": 15.329856584093871, - "grad_norm": 1.4208918809890747, - "learning_rate": 7.096984924623116e-05, - "loss": 5.7774, - "step": 29395 - }, - { - "epoch": 15.33037809647979, - "grad_norm": 1.4325352907180786, - "learning_rate": 7.096884422110554e-05, - "loss": 5.5942, - "step": 29396 - }, - { - "epoch": 15.33089960886571, - "grad_norm": 1.6812721490859985, - "learning_rate": 7.09678391959799e-05, - "loss": 4.2323, - "step": 29397 - }, - { - "epoch": 15.33142112125163, - "grad_norm": 1.6203092336654663, - "learning_rate": 7.096683417085428e-05, - "loss": 4.9308, - "step": 29398 - }, - { - "epoch": 15.33194263363755, - "grad_norm": 1.4274495840072632, - "learning_rate": 7.096582914572865e-05, - "loss": 5.2838, - "step": 29399 - }, - { - "epoch": 15.332464146023469, - "grad_norm": 1.3630969524383545, - "learning_rate": 7.096482412060303e-05, - "loss": 5.3499, - "step": 29400 - }, - { - "epoch": 15.332985658409386, - "grad_norm": 1.5411399602890015, - "learning_rate": 7.096381909547739e-05, - "loss": 5.1631, - "step": 29401 - }, - { - "epoch": 15.333507170795306, - "grad_norm": 1.5021252632141113, - "learning_rate": 7.096281407035175e-05, - "loss": 5.6213, - "step": 29402 - }, - { - "epoch": 15.334028683181225, - "grad_norm": 1.569925308227539, - "learning_rate": 7.096180904522613e-05, - "loss": 5.0738, - "step": 29403 - }, - { - "epoch": 15.334550195567145, - "grad_norm": 1.5451292991638184, - "learning_rate": 7.09608040201005e-05, - "loss": 4.8355, - "step": 29404 - }, - { - "epoch": 15.335071707953064, - "grad_norm": 1.6290525197982788, - "learning_rate": 7.095979899497487e-05, - "loss": 5.388, - "step": 29405 - }, - { - "epoch": 15.335593220338984, - "grad_norm": 1.5279568433761597, - "learning_rate": 7.095879396984925e-05, - "loss": 4.5987, - "step": 29406 - }, - { - "epoch": 15.336114732724901, - "grad_norm": 1.4263486862182617, - "learning_rate": 7.095778894472363e-05, - "loss": 5.3906, - "step": 29407 - }, - { - "epoch": 15.336636245110821, - "grad_norm": 1.6105337142944336, - "learning_rate": 7.0956783919598e-05, - "loss": 5.1548, - "step": 29408 - }, - { - "epoch": 15.33715775749674, - "grad_norm": 1.576453447341919, - "learning_rate": 7.095577889447237e-05, - "loss": 4.8734, - "step": 29409 - }, - { - "epoch": 15.33767926988266, - "grad_norm": 1.4381731748580933, - "learning_rate": 7.095477386934674e-05, - "loss": 5.5272, - "step": 29410 - }, - { - "epoch": 15.33820078226858, - "grad_norm": 1.5227190256118774, - "learning_rate": 7.095376884422111e-05, - "loss": 5.0092, - "step": 29411 - }, - { - "epoch": 15.338722294654499, - "grad_norm": 1.5497066974639893, - "learning_rate": 7.095276381909548e-05, - "loss": 4.7989, - "step": 29412 - }, - { - "epoch": 15.339243807040416, - "grad_norm": 1.4424240589141846, - "learning_rate": 7.095175879396986e-05, - "loss": 5.4107, - "step": 29413 - }, - { - "epoch": 15.339765319426336, - "grad_norm": 1.4284729957580566, - "learning_rate": 7.095075376884422e-05, - "loss": 5.6906, - "step": 29414 - }, - { - "epoch": 15.340286831812255, - "grad_norm": 1.5288091897964478, - "learning_rate": 7.09497487437186e-05, - "loss": 5.656, - "step": 29415 - }, - { - "epoch": 15.340808344198175, - "grad_norm": 1.439754843711853, - "learning_rate": 7.094874371859298e-05, - "loss": 5.3044, - "step": 29416 - }, - { - "epoch": 15.341329856584094, - "grad_norm": 1.4828888177871704, - "learning_rate": 7.094773869346734e-05, - "loss": 5.6407, - "step": 29417 - }, - { - "epoch": 15.341851368970014, - "grad_norm": 1.53744637966156, - "learning_rate": 7.094673366834172e-05, - "loss": 5.568, - "step": 29418 - }, - { - "epoch": 15.342372881355931, - "grad_norm": 1.5117498636245728, - "learning_rate": 7.094572864321608e-05, - "loss": 4.6729, - "step": 29419 - }, - { - "epoch": 15.342894393741851, - "grad_norm": 1.4338788986206055, - "learning_rate": 7.094472361809046e-05, - "loss": 5.709, - "step": 29420 - }, - { - "epoch": 15.34341590612777, - "grad_norm": 1.5683404207229614, - "learning_rate": 7.094371859296482e-05, - "loss": 5.3939, - "step": 29421 - }, - { - "epoch": 15.34393741851369, - "grad_norm": 1.5334486961364746, - "learning_rate": 7.09427135678392e-05, - "loss": 4.9948, - "step": 29422 - }, - { - "epoch": 15.34445893089961, - "grad_norm": 1.4369977712631226, - "learning_rate": 7.094170854271357e-05, - "loss": 5.3662, - "step": 29423 - }, - { - "epoch": 15.344980443285529, - "grad_norm": 1.4915363788604736, - "learning_rate": 7.094070351758794e-05, - "loss": 4.7705, - "step": 29424 - }, - { - "epoch": 15.345501955671446, - "grad_norm": 1.5413792133331299, - "learning_rate": 7.093969849246231e-05, - "loss": 5.4536, - "step": 29425 - }, - { - "epoch": 15.346023468057366, - "grad_norm": 1.4257376194000244, - "learning_rate": 7.093869346733669e-05, - "loss": 5.6482, - "step": 29426 - }, - { - "epoch": 15.346544980443285, - "grad_norm": 1.4750468730926514, - "learning_rate": 7.093768844221106e-05, - "loss": 5.6041, - "step": 29427 - }, - { - "epoch": 15.347066492829205, - "grad_norm": 1.4488298892974854, - "learning_rate": 7.093668341708543e-05, - "loss": 5.5926, - "step": 29428 - }, - { - "epoch": 15.347588005215124, - "grad_norm": 1.466354489326477, - "learning_rate": 7.09356783919598e-05, - "loss": 5.3742, - "step": 29429 - }, - { - "epoch": 15.348109517601044, - "grad_norm": 1.3946584463119507, - "learning_rate": 7.093467336683417e-05, - "loss": 5.7022, - "step": 29430 - }, - { - "epoch": 15.348631029986961, - "grad_norm": 1.294505000114441, - "learning_rate": 7.093366834170855e-05, - "loss": 5.6307, - "step": 29431 - }, - { - "epoch": 15.349152542372881, - "grad_norm": 1.5080928802490234, - "learning_rate": 7.093266331658291e-05, - "loss": 5.2056, - "step": 29432 - }, - { - "epoch": 15.3496740547588, - "grad_norm": 1.5540968179702759, - "learning_rate": 7.093165829145729e-05, - "loss": 5.2871, - "step": 29433 - }, - { - "epoch": 15.35019556714472, - "grad_norm": 1.5119614601135254, - "learning_rate": 7.093065326633165e-05, - "loss": 5.4781, - "step": 29434 - }, - { - "epoch": 15.35071707953064, - "grad_norm": 1.5070730447769165, - "learning_rate": 7.092964824120603e-05, - "loss": 4.2775, - "step": 29435 - }, - { - "epoch": 15.351238591916559, - "grad_norm": 1.5155965089797974, - "learning_rate": 7.092864321608041e-05, - "loss": 5.4793, - "step": 29436 - }, - { - "epoch": 15.351760104302477, - "grad_norm": 1.511824369430542, - "learning_rate": 7.092763819095479e-05, - "loss": 5.1527, - "step": 29437 - }, - { - "epoch": 15.352281616688396, - "grad_norm": 1.510451078414917, - "learning_rate": 7.092663316582915e-05, - "loss": 5.2706, - "step": 29438 - }, - { - "epoch": 15.352803129074315, - "grad_norm": 1.3575663566589355, - "learning_rate": 7.092562814070353e-05, - "loss": 5.2428, - "step": 29439 - }, - { - "epoch": 15.353324641460235, - "grad_norm": 1.5115175247192383, - "learning_rate": 7.09246231155779e-05, - "loss": 5.4968, - "step": 29440 - }, - { - "epoch": 15.353846153846154, - "grad_norm": 1.4243559837341309, - "learning_rate": 7.092361809045226e-05, - "loss": 4.8828, - "step": 29441 - }, - { - "epoch": 15.354367666232074, - "grad_norm": 1.4339983463287354, - "learning_rate": 7.092261306532664e-05, - "loss": 5.1558, - "step": 29442 - }, - { - "epoch": 15.354889178617992, - "grad_norm": 1.469846248626709, - "learning_rate": 7.0921608040201e-05, - "loss": 5.2444, - "step": 29443 - }, - { - "epoch": 15.355410691003911, - "grad_norm": 1.359991431236267, - "learning_rate": 7.092060301507538e-05, - "loss": 5.4705, - "step": 29444 - }, - { - "epoch": 15.35593220338983, - "grad_norm": 1.5310231447219849, - "learning_rate": 7.091959798994974e-05, - "loss": 5.3698, - "step": 29445 - }, - { - "epoch": 15.35645371577575, - "grad_norm": 1.5244665145874023, - "learning_rate": 7.091859296482412e-05, - "loss": 5.6668, - "step": 29446 - }, - { - "epoch": 15.35697522816167, - "grad_norm": 1.416869044303894, - "learning_rate": 7.09175879396985e-05, - "loss": 5.3848, - "step": 29447 - }, - { - "epoch": 15.357496740547589, - "grad_norm": 1.531835675239563, - "learning_rate": 7.091658291457288e-05, - "loss": 4.8804, - "step": 29448 - }, - { - "epoch": 15.358018252933507, - "grad_norm": 1.4638569355010986, - "learning_rate": 7.091557788944724e-05, - "loss": 5.3677, - "step": 29449 - }, - { - "epoch": 15.358539765319426, - "grad_norm": 1.4765348434448242, - "learning_rate": 7.091457286432162e-05, - "loss": 5.5592, - "step": 29450 - }, - { - "epoch": 15.359061277705345, - "grad_norm": 1.5918322801589966, - "learning_rate": 7.091356783919598e-05, - "loss": 5.231, - "step": 29451 - }, - { - "epoch": 15.359582790091265, - "grad_norm": 1.5324763059616089, - "learning_rate": 7.091256281407036e-05, - "loss": 5.5651, - "step": 29452 - }, - { - "epoch": 15.360104302477184, - "grad_norm": 1.5340152978897095, - "learning_rate": 7.091155778894472e-05, - "loss": 5.2571, - "step": 29453 - }, - { - "epoch": 15.360625814863104, - "grad_norm": 1.4719208478927612, - "learning_rate": 7.091055276381909e-05, - "loss": 5.5596, - "step": 29454 - }, - { - "epoch": 15.361147327249022, - "grad_norm": 1.5601240396499634, - "learning_rate": 7.090954773869347e-05, - "loss": 4.797, - "step": 29455 - }, - { - "epoch": 15.361668839634941, - "grad_norm": 1.4410814046859741, - "learning_rate": 7.090854271356784e-05, - "loss": 5.6716, - "step": 29456 - }, - { - "epoch": 15.36219035202086, - "grad_norm": 1.5836313962936401, - "learning_rate": 7.090753768844222e-05, - "loss": 5.3112, - "step": 29457 - }, - { - "epoch": 15.36271186440678, - "grad_norm": 1.7886006832122803, - "learning_rate": 7.090653266331659e-05, - "loss": 5.1771, - "step": 29458 - }, - { - "epoch": 15.3632333767927, - "grad_norm": 1.5515748262405396, - "learning_rate": 7.090552763819096e-05, - "loss": 5.0074, - "step": 29459 - }, - { - "epoch": 15.363754889178619, - "grad_norm": 1.468180775642395, - "learning_rate": 7.090452261306533e-05, - "loss": 5.6685, - "step": 29460 - }, - { - "epoch": 15.364276401564537, - "grad_norm": 1.6270809173583984, - "learning_rate": 7.090351758793971e-05, - "loss": 4.7666, - "step": 29461 - }, - { - "epoch": 15.364797913950456, - "grad_norm": 1.5983784198760986, - "learning_rate": 7.090251256281407e-05, - "loss": 5.5874, - "step": 29462 - }, - { - "epoch": 15.365319426336375, - "grad_norm": 1.4494624137878418, - "learning_rate": 7.090150753768845e-05, - "loss": 5.4473, - "step": 29463 - }, - { - "epoch": 15.365840938722295, - "grad_norm": 1.581477165222168, - "learning_rate": 7.090050251256281e-05, - "loss": 5.1623, - "step": 29464 - }, - { - "epoch": 15.366362451108214, - "grad_norm": 1.3875422477722168, - "learning_rate": 7.089949748743719e-05, - "loss": 5.4901, - "step": 29465 - }, - { - "epoch": 15.366883963494132, - "grad_norm": 1.515095829963684, - "learning_rate": 7.089849246231156e-05, - "loss": 4.9265, - "step": 29466 - }, - { - "epoch": 15.367405475880052, - "grad_norm": 1.4531443119049072, - "learning_rate": 7.089748743718593e-05, - "loss": 5.3408, - "step": 29467 - }, - { - "epoch": 15.367926988265971, - "grad_norm": 1.4652445316314697, - "learning_rate": 7.089648241206031e-05, - "loss": 5.2715, - "step": 29468 - }, - { - "epoch": 15.36844850065189, - "grad_norm": 1.3688033819198608, - "learning_rate": 7.089547738693468e-05, - "loss": 5.6096, - "step": 29469 - }, - { - "epoch": 15.36897001303781, - "grad_norm": 1.6245063543319702, - "learning_rate": 7.089447236180905e-05, - "loss": 5.2802, - "step": 29470 - }, - { - "epoch": 15.36949152542373, - "grad_norm": 1.4012175798416138, - "learning_rate": 7.089346733668342e-05, - "loss": 5.5854, - "step": 29471 - }, - { - "epoch": 15.370013037809649, - "grad_norm": 1.5007888078689575, - "learning_rate": 7.08924623115578e-05, - "loss": 5.5889, - "step": 29472 - }, - { - "epoch": 15.370534550195567, - "grad_norm": 1.3954178094863892, - "learning_rate": 7.089145728643216e-05, - "loss": 5.6065, - "step": 29473 - }, - { - "epoch": 15.371056062581486, - "grad_norm": 1.5731464624404907, - "learning_rate": 7.089045226130654e-05, - "loss": 4.8426, - "step": 29474 - }, - { - "epoch": 15.371577574967406, - "grad_norm": 1.4906718730926514, - "learning_rate": 7.08894472361809e-05, - "loss": 5.0653, - "step": 29475 - }, - { - "epoch": 15.372099087353325, - "grad_norm": 1.4771842956542969, - "learning_rate": 7.088844221105528e-05, - "loss": 5.7005, - "step": 29476 - }, - { - "epoch": 15.372620599739244, - "grad_norm": 1.5862138271331787, - "learning_rate": 7.088743718592964e-05, - "loss": 5.4729, - "step": 29477 - }, - { - "epoch": 15.373142112125162, - "grad_norm": 1.4941807985305786, - "learning_rate": 7.088643216080402e-05, - "loss": 5.4593, - "step": 29478 - }, - { - "epoch": 15.373663624511082, - "grad_norm": 1.342432975769043, - "learning_rate": 7.08854271356784e-05, - "loss": 5.3999, - "step": 29479 - }, - { - "epoch": 15.374185136897001, - "grad_norm": 1.413649320602417, - "learning_rate": 7.088442211055276e-05, - "loss": 5.019, - "step": 29480 - }, - { - "epoch": 15.37470664928292, - "grad_norm": 1.5225557088851929, - "learning_rate": 7.088341708542714e-05, - "loss": 5.3928, - "step": 29481 - }, - { - "epoch": 15.37522816166884, - "grad_norm": 1.5688589811325073, - "learning_rate": 7.08824120603015e-05, - "loss": 5.6442, - "step": 29482 - }, - { - "epoch": 15.37574967405476, - "grad_norm": 1.3705451488494873, - "learning_rate": 7.088140703517588e-05, - "loss": 5.4481, - "step": 29483 - }, - { - "epoch": 15.376271186440677, - "grad_norm": 1.377962350845337, - "learning_rate": 7.088040201005025e-05, - "loss": 5.1338, - "step": 29484 - }, - { - "epoch": 15.376792698826597, - "grad_norm": 1.548869252204895, - "learning_rate": 7.087939698492463e-05, - "loss": 5.1079, - "step": 29485 - }, - { - "epoch": 15.377314211212516, - "grad_norm": 1.532320261001587, - "learning_rate": 7.087839195979899e-05, - "loss": 5.2124, - "step": 29486 - }, - { - "epoch": 15.377835723598436, - "grad_norm": 1.5276405811309814, - "learning_rate": 7.087738693467337e-05, - "loss": 5.1563, - "step": 29487 - }, - { - "epoch": 15.378357235984355, - "grad_norm": 1.3906761407852173, - "learning_rate": 7.087638190954775e-05, - "loss": 5.637, - "step": 29488 - }, - { - "epoch": 15.378878748370274, - "grad_norm": 1.4978618621826172, - "learning_rate": 7.087537688442212e-05, - "loss": 5.6066, - "step": 29489 - }, - { - "epoch": 15.379400260756192, - "grad_norm": 1.417959213256836, - "learning_rate": 7.087437185929649e-05, - "loss": 5.6684, - "step": 29490 - }, - { - "epoch": 15.379921773142112, - "grad_norm": 1.450194001197815, - "learning_rate": 7.087336683417087e-05, - "loss": 5.7553, - "step": 29491 - }, - { - "epoch": 15.380443285528031, - "grad_norm": 1.4563955068588257, - "learning_rate": 7.087236180904523e-05, - "loss": 5.0688, - "step": 29492 - }, - { - "epoch": 15.38096479791395, - "grad_norm": 1.469448447227478, - "learning_rate": 7.087135678391961e-05, - "loss": 5.5425, - "step": 29493 - }, - { - "epoch": 15.38148631029987, - "grad_norm": 2.136993885040283, - "learning_rate": 7.087035175879397e-05, - "loss": 4.4531, - "step": 29494 - }, - { - "epoch": 15.38200782268579, - "grad_norm": 1.4247077703475952, - "learning_rate": 7.086934673366834e-05, - "loss": 5.5385, - "step": 29495 - }, - { - "epoch": 15.382529335071707, - "grad_norm": 1.5677019357681274, - "learning_rate": 7.086834170854271e-05, - "loss": 4.6737, - "step": 29496 - }, - { - "epoch": 15.383050847457627, - "grad_norm": 1.4631104469299316, - "learning_rate": 7.086733668341708e-05, - "loss": 5.4546, - "step": 29497 - }, - { - "epoch": 15.383572359843546, - "grad_norm": 1.4256298542022705, - "learning_rate": 7.086633165829146e-05, - "loss": 5.4939, - "step": 29498 - }, - { - "epoch": 15.384093872229466, - "grad_norm": 1.3910245895385742, - "learning_rate": 7.086532663316583e-05, - "loss": 5.0762, - "step": 29499 - }, - { - "epoch": 15.384615384615385, - "grad_norm": 1.5248229503631592, - "learning_rate": 7.086432160804021e-05, - "loss": 5.7321, - "step": 29500 - }, - { - "epoch": 15.385136897001304, - "grad_norm": 1.443608283996582, - "learning_rate": 7.086331658291458e-05, - "loss": 5.6693, - "step": 29501 - }, - { - "epoch": 15.385658409387222, - "grad_norm": 1.4844220876693726, - "learning_rate": 7.086231155778895e-05, - "loss": 5.4416, - "step": 29502 - }, - { - "epoch": 15.386179921773142, - "grad_norm": 1.5295706987380981, - "learning_rate": 7.086130653266332e-05, - "loss": 4.7886, - "step": 29503 - }, - { - "epoch": 15.386701434159061, - "grad_norm": 1.4381541013717651, - "learning_rate": 7.08603015075377e-05, - "loss": 5.1235, - "step": 29504 - }, - { - "epoch": 15.38722294654498, - "grad_norm": 1.505983829498291, - "learning_rate": 7.085929648241206e-05, - "loss": 5.24, - "step": 29505 - }, - { - "epoch": 15.3877444589309, - "grad_norm": 1.505308985710144, - "learning_rate": 7.085829145728644e-05, - "loss": 5.0087, - "step": 29506 - }, - { - "epoch": 15.38826597131682, - "grad_norm": 1.5623494386672974, - "learning_rate": 7.08572864321608e-05, - "loss": 5.3989, - "step": 29507 - }, - { - "epoch": 15.388787483702737, - "grad_norm": 1.4522091150283813, - "learning_rate": 7.085628140703518e-05, - "loss": 5.478, - "step": 29508 - }, - { - "epoch": 15.389308996088657, - "grad_norm": 1.559999704360962, - "learning_rate": 7.085527638190956e-05, - "loss": 5.4437, - "step": 29509 - }, - { - "epoch": 15.389830508474576, - "grad_norm": 1.4526773691177368, - "learning_rate": 7.085427135678392e-05, - "loss": 5.3177, - "step": 29510 - }, - { - "epoch": 15.390352020860496, - "grad_norm": 1.4847043752670288, - "learning_rate": 7.08532663316583e-05, - "loss": 5.4479, - "step": 29511 - }, - { - "epoch": 15.390873533246415, - "grad_norm": 1.37345290184021, - "learning_rate": 7.085226130653266e-05, - "loss": 5.6504, - "step": 29512 - }, - { - "epoch": 15.391395045632335, - "grad_norm": 1.4435133934020996, - "learning_rate": 7.085125628140704e-05, - "loss": 5.4697, - "step": 29513 - }, - { - "epoch": 15.391916558018252, - "grad_norm": 1.5623966455459595, - "learning_rate": 7.08502512562814e-05, - "loss": 5.1465, - "step": 29514 - }, - { - "epoch": 15.392438070404172, - "grad_norm": 1.6322609186172485, - "learning_rate": 7.084924623115578e-05, - "loss": 5.4124, - "step": 29515 - }, - { - "epoch": 15.392959582790091, - "grad_norm": 1.4241400957107544, - "learning_rate": 7.084824120603015e-05, - "loss": 5.5326, - "step": 29516 - }, - { - "epoch": 15.39348109517601, - "grad_norm": 1.5854367017745972, - "learning_rate": 7.084723618090453e-05, - "loss": 5.5423, - "step": 29517 - }, - { - "epoch": 15.39400260756193, - "grad_norm": 1.5972968339920044, - "learning_rate": 7.084623115577889e-05, - "loss": 4.9141, - "step": 29518 - }, - { - "epoch": 15.39452411994785, - "grad_norm": 1.4286434650421143, - "learning_rate": 7.084522613065327e-05, - "loss": 5.4271, - "step": 29519 - }, - { - "epoch": 15.395045632333767, - "grad_norm": 1.5237102508544922, - "learning_rate": 7.084422110552765e-05, - "loss": 5.1988, - "step": 29520 - }, - { - "epoch": 15.395567144719687, - "grad_norm": 1.4271405935287476, - "learning_rate": 7.084321608040201e-05, - "loss": 5.3587, - "step": 29521 - }, - { - "epoch": 15.396088657105606, - "grad_norm": 1.541608214378357, - "learning_rate": 7.084221105527639e-05, - "loss": 5.3198, - "step": 29522 - }, - { - "epoch": 15.396610169491526, - "grad_norm": 1.5112498998641968, - "learning_rate": 7.084120603015075e-05, - "loss": 5.0447, - "step": 29523 - }, - { - "epoch": 15.397131681877445, - "grad_norm": 1.475095510482788, - "learning_rate": 7.084020100502513e-05, - "loss": 5.5876, - "step": 29524 - }, - { - "epoch": 15.397653194263365, - "grad_norm": 1.4374611377716064, - "learning_rate": 7.08391959798995e-05, - "loss": 5.3859, - "step": 29525 - }, - { - "epoch": 15.398174706649282, - "grad_norm": 1.5243589878082275, - "learning_rate": 7.083819095477387e-05, - "loss": 4.8675, - "step": 29526 - }, - { - "epoch": 15.398696219035202, - "grad_norm": 1.4520447254180908, - "learning_rate": 7.083718592964824e-05, - "loss": 5.4058, - "step": 29527 - }, - { - "epoch": 15.399217731421121, - "grad_norm": 1.484083890914917, - "learning_rate": 7.083618090452261e-05, - "loss": 4.9759, - "step": 29528 - }, - { - "epoch": 15.39973924380704, - "grad_norm": 1.614174485206604, - "learning_rate": 7.083517587939699e-05, - "loss": 5.0371, - "step": 29529 - }, - { - "epoch": 15.40026075619296, - "grad_norm": 1.4630359411239624, - "learning_rate": 7.083417085427137e-05, - "loss": 5.8009, - "step": 29530 - }, - { - "epoch": 15.40078226857888, - "grad_norm": 1.5003620386123657, - "learning_rate": 7.083316582914573e-05, - "loss": 5.1703, - "step": 29531 - }, - { - "epoch": 15.401303780964797, - "grad_norm": 1.421350121498108, - "learning_rate": 7.083216080402011e-05, - "loss": 4.9362, - "step": 29532 - }, - { - "epoch": 15.401825293350717, - "grad_norm": 1.6195141077041626, - "learning_rate": 7.083115577889448e-05, - "loss": 5.1746, - "step": 29533 - }, - { - "epoch": 15.402346805736636, - "grad_norm": 1.432075023651123, - "learning_rate": 7.083015075376884e-05, - "loss": 5.5218, - "step": 29534 - }, - { - "epoch": 15.402868318122556, - "grad_norm": 1.4234788417816162, - "learning_rate": 7.082914572864322e-05, - "loss": 5.5878, - "step": 29535 - }, - { - "epoch": 15.403389830508475, - "grad_norm": 1.3575215339660645, - "learning_rate": 7.082814070351758e-05, - "loss": 4.8386, - "step": 29536 - }, - { - "epoch": 15.403911342894395, - "grad_norm": 1.3796855211257935, - "learning_rate": 7.082713567839196e-05, - "loss": 5.6088, - "step": 29537 - }, - { - "epoch": 15.404432855280312, - "grad_norm": 1.5134105682373047, - "learning_rate": 7.082613065326633e-05, - "loss": 5.3274, - "step": 29538 - }, - { - "epoch": 15.404954367666232, - "grad_norm": 1.4971941709518433, - "learning_rate": 7.08251256281407e-05, - "loss": 5.7142, - "step": 29539 - }, - { - "epoch": 15.405475880052151, - "grad_norm": 1.4265614748001099, - "learning_rate": 7.082412060301508e-05, - "loss": 5.6567, - "step": 29540 - }, - { - "epoch": 15.40599739243807, - "grad_norm": 1.4602537155151367, - "learning_rate": 7.082311557788946e-05, - "loss": 5.4656, - "step": 29541 - }, - { - "epoch": 15.40651890482399, - "grad_norm": 1.5047004222869873, - "learning_rate": 7.082211055276382e-05, - "loss": 5.5282, - "step": 29542 - }, - { - "epoch": 15.40704041720991, - "grad_norm": 1.5932377576828003, - "learning_rate": 7.08211055276382e-05, - "loss": 4.8816, - "step": 29543 - }, - { - "epoch": 15.407561929595827, - "grad_norm": 1.6398977041244507, - "learning_rate": 7.082010050251257e-05, - "loss": 5.345, - "step": 29544 - }, - { - "epoch": 15.408083441981747, - "grad_norm": 1.5432499647140503, - "learning_rate": 7.081909547738694e-05, - "loss": 5.1098, - "step": 29545 - }, - { - "epoch": 15.408604954367666, - "grad_norm": 1.575682282447815, - "learning_rate": 7.081809045226131e-05, - "loss": 4.7624, - "step": 29546 - }, - { - "epoch": 15.409126466753586, - "grad_norm": 1.4707602262496948, - "learning_rate": 7.081708542713567e-05, - "loss": 5.368, - "step": 29547 - }, - { - "epoch": 15.409647979139505, - "grad_norm": 2.2985758781433105, - "learning_rate": 7.081608040201005e-05, - "loss": 5.2952, - "step": 29548 - }, - { - "epoch": 15.410169491525423, - "grad_norm": 1.550214409828186, - "learning_rate": 7.081507537688443e-05, - "loss": 5.4906, - "step": 29549 - }, - { - "epoch": 15.410691003911342, - "grad_norm": 1.5360442399978638, - "learning_rate": 7.08140703517588e-05, - "loss": 5.3567, - "step": 29550 - }, - { - "epoch": 15.411212516297262, - "grad_norm": 1.447292685508728, - "learning_rate": 7.081306532663317e-05, - "loss": 4.1785, - "step": 29551 - }, - { - "epoch": 15.411734028683181, - "grad_norm": 1.448920488357544, - "learning_rate": 7.081206030150755e-05, - "loss": 5.6326, - "step": 29552 - }, - { - "epoch": 15.4122555410691, - "grad_norm": 1.470446228981018, - "learning_rate": 7.081105527638191e-05, - "loss": 5.1297, - "step": 29553 - }, - { - "epoch": 15.41277705345502, - "grad_norm": 1.486828327178955, - "learning_rate": 7.081005025125629e-05, - "loss": 5.6922, - "step": 29554 - }, - { - "epoch": 15.41329856584094, - "grad_norm": 1.4561619758605957, - "learning_rate": 7.080904522613065e-05, - "loss": 5.7488, - "step": 29555 - }, - { - "epoch": 15.413820078226857, - "grad_norm": 1.4942246675491333, - "learning_rate": 7.080804020100503e-05, - "loss": 5.3823, - "step": 29556 - }, - { - "epoch": 15.414341590612777, - "grad_norm": 1.5058679580688477, - "learning_rate": 7.08070351758794e-05, - "loss": 5.2124, - "step": 29557 - }, - { - "epoch": 15.414863102998696, - "grad_norm": 1.4378795623779297, - "learning_rate": 7.080603015075377e-05, - "loss": 5.2824, - "step": 29558 - }, - { - "epoch": 15.415384615384616, - "grad_norm": 1.357006549835205, - "learning_rate": 7.080502512562814e-05, - "loss": 5.0268, - "step": 29559 - }, - { - "epoch": 15.415906127770535, - "grad_norm": 1.548862338066101, - "learning_rate": 7.080402010050252e-05, - "loss": 5.0198, - "step": 29560 - }, - { - "epoch": 15.416427640156453, - "grad_norm": 1.6583184003829956, - "learning_rate": 7.08030150753769e-05, - "loss": 5.2708, - "step": 29561 - }, - { - "epoch": 15.416949152542372, - "grad_norm": 1.3646725416183472, - "learning_rate": 7.080201005025126e-05, - "loss": 5.5889, - "step": 29562 - }, - { - "epoch": 15.417470664928292, - "grad_norm": 1.4311424493789673, - "learning_rate": 7.080100502512564e-05, - "loss": 5.469, - "step": 29563 - }, - { - "epoch": 15.417992177314211, - "grad_norm": 1.4635313749313354, - "learning_rate": 7.08e-05, - "loss": 5.2255, - "step": 29564 - }, - { - "epoch": 15.41851368970013, - "grad_norm": 1.4622552394866943, - "learning_rate": 7.079899497487438e-05, - "loss": 5.4022, - "step": 29565 - }, - { - "epoch": 15.41903520208605, - "grad_norm": 1.351656198501587, - "learning_rate": 7.079798994974874e-05, - "loss": 5.5965, - "step": 29566 - }, - { - "epoch": 15.419556714471968, - "grad_norm": 1.449845790863037, - "learning_rate": 7.079698492462312e-05, - "loss": 5.2911, - "step": 29567 - }, - { - "epoch": 15.420078226857887, - "grad_norm": 1.5032999515533447, - "learning_rate": 7.079597989949748e-05, - "loss": 5.5233, - "step": 29568 - }, - { - "epoch": 15.420599739243807, - "grad_norm": 1.5406436920166016, - "learning_rate": 7.079497487437186e-05, - "loss": 5.4829, - "step": 29569 - }, - { - "epoch": 15.421121251629726, - "grad_norm": 1.4365732669830322, - "learning_rate": 7.079396984924624e-05, - "loss": 5.5003, - "step": 29570 - }, - { - "epoch": 15.421642764015646, - "grad_norm": 1.3835101127624512, - "learning_rate": 7.079296482412062e-05, - "loss": 5.7901, - "step": 29571 - }, - { - "epoch": 15.422164276401565, - "grad_norm": 1.5694085359573364, - "learning_rate": 7.079195979899498e-05, - "loss": 5.4459, - "step": 29572 - }, - { - "epoch": 15.422685788787483, - "grad_norm": 1.45012629032135, - "learning_rate": 7.079095477386935e-05, - "loss": 5.1625, - "step": 29573 - }, - { - "epoch": 15.423207301173402, - "grad_norm": 1.5276774168014526, - "learning_rate": 7.078994974874372e-05, - "loss": 5.3154, - "step": 29574 - }, - { - "epoch": 15.423728813559322, - "grad_norm": 1.3844096660614014, - "learning_rate": 7.078894472361809e-05, - "loss": 5.5015, - "step": 29575 - }, - { - "epoch": 15.424250325945241, - "grad_norm": 1.5624725818634033, - "learning_rate": 7.078793969849247e-05, - "loss": 5.3282, - "step": 29576 - }, - { - "epoch": 15.42477183833116, - "grad_norm": 1.6406066417694092, - "learning_rate": 7.078693467336683e-05, - "loss": 5.1687, - "step": 29577 - }, - { - "epoch": 15.42529335071708, - "grad_norm": 1.4789659976959229, - "learning_rate": 7.078592964824121e-05, - "loss": 5.5453, - "step": 29578 - }, - { - "epoch": 15.425814863102998, - "grad_norm": 1.4823461771011353, - "learning_rate": 7.078492462311557e-05, - "loss": 5.308, - "step": 29579 - }, - { - "epoch": 15.426336375488917, - "grad_norm": 1.4986494779586792, - "learning_rate": 7.078391959798995e-05, - "loss": 4.8214, - "step": 29580 - }, - { - "epoch": 15.426857887874837, - "grad_norm": 1.4134584665298462, - "learning_rate": 7.078291457286433e-05, - "loss": 5.7069, - "step": 29581 - }, - { - "epoch": 15.427379400260756, - "grad_norm": 1.5302245616912842, - "learning_rate": 7.07819095477387e-05, - "loss": 5.199, - "step": 29582 - }, - { - "epoch": 15.427900912646676, - "grad_norm": 1.405515432357788, - "learning_rate": 7.078090452261307e-05, - "loss": 5.1832, - "step": 29583 - }, - { - "epoch": 15.428422425032595, - "grad_norm": 1.3691824674606323, - "learning_rate": 7.077989949748745e-05, - "loss": 5.2823, - "step": 29584 - }, - { - "epoch": 15.428943937418513, - "grad_norm": 1.2872645854949951, - "learning_rate": 7.077889447236181e-05, - "loss": 4.8932, - "step": 29585 - }, - { - "epoch": 15.429465449804432, - "grad_norm": 1.4782060384750366, - "learning_rate": 7.077788944723619e-05, - "loss": 5.7427, - "step": 29586 - }, - { - "epoch": 15.429986962190352, - "grad_norm": 1.4718554019927979, - "learning_rate": 7.077688442211055e-05, - "loss": 5.6756, - "step": 29587 - }, - { - "epoch": 15.430508474576271, - "grad_norm": 1.4041998386383057, - "learning_rate": 7.077587939698492e-05, - "loss": 5.2644, - "step": 29588 - }, - { - "epoch": 15.43102998696219, - "grad_norm": 1.5449012517929077, - "learning_rate": 7.07748743718593e-05, - "loss": 5.3102, - "step": 29589 - }, - { - "epoch": 15.43155149934811, - "grad_norm": 1.367257833480835, - "learning_rate": 7.077386934673367e-05, - "loss": 5.6383, - "step": 29590 - }, - { - "epoch": 15.432073011734028, - "grad_norm": 1.4013272523880005, - "learning_rate": 7.077286432160805e-05, - "loss": 4.9891, - "step": 29591 - }, - { - "epoch": 15.432594524119947, - "grad_norm": 1.441127896308899, - "learning_rate": 7.077185929648242e-05, - "loss": 5.0508, - "step": 29592 - }, - { - "epoch": 15.433116036505867, - "grad_norm": 1.378674864768982, - "learning_rate": 7.07708542713568e-05, - "loss": 5.6718, - "step": 29593 - }, - { - "epoch": 15.433637548891786, - "grad_norm": 1.3998055458068848, - "learning_rate": 7.076984924623116e-05, - "loss": 4.8891, - "step": 29594 - }, - { - "epoch": 15.434159061277706, - "grad_norm": 1.4207749366760254, - "learning_rate": 7.076884422110554e-05, - "loss": 5.9114, - "step": 29595 - }, - { - "epoch": 15.434680573663625, - "grad_norm": 1.4738154411315918, - "learning_rate": 7.07678391959799e-05, - "loss": 5.3692, - "step": 29596 - }, - { - "epoch": 15.435202086049543, - "grad_norm": 1.4386310577392578, - "learning_rate": 7.076683417085428e-05, - "loss": 5.4765, - "step": 29597 - }, - { - "epoch": 15.435723598435462, - "grad_norm": 1.3407903909683228, - "learning_rate": 7.076582914572864e-05, - "loss": 5.0232, - "step": 29598 - }, - { - "epoch": 15.436245110821382, - "grad_norm": 1.5039657354354858, - "learning_rate": 7.076482412060302e-05, - "loss": 5.5895, - "step": 29599 - }, - { - "epoch": 15.436766623207301, - "grad_norm": 1.520312786102295, - "learning_rate": 7.076381909547738e-05, - "loss": 5.345, - "step": 29600 - }, - { - "epoch": 15.43728813559322, - "grad_norm": 1.3942300081253052, - "learning_rate": 7.076281407035176e-05, - "loss": 5.6043, - "step": 29601 - }, - { - "epoch": 15.43780964797914, - "grad_norm": 1.4001415967941284, - "learning_rate": 7.076180904522614e-05, - "loss": 5.6145, - "step": 29602 - }, - { - "epoch": 15.438331160365058, - "grad_norm": 1.3699923753738403, - "learning_rate": 7.07608040201005e-05, - "loss": 5.4561, - "step": 29603 - }, - { - "epoch": 15.438852672750977, - "grad_norm": 1.5702497959136963, - "learning_rate": 7.075979899497488e-05, - "loss": 5.2234, - "step": 29604 - }, - { - "epoch": 15.439374185136897, - "grad_norm": 1.4122146368026733, - "learning_rate": 7.075879396984925e-05, - "loss": 5.4918, - "step": 29605 - }, - { - "epoch": 15.439895697522816, - "grad_norm": 1.3964678049087524, - "learning_rate": 7.075778894472362e-05, - "loss": 4.864, - "step": 29606 - }, - { - "epoch": 15.440417209908736, - "grad_norm": 1.5024381875991821, - "learning_rate": 7.075678391959799e-05, - "loss": 5.2271, - "step": 29607 - }, - { - "epoch": 15.440938722294655, - "grad_norm": 1.6814385652542114, - "learning_rate": 7.075577889447237e-05, - "loss": 4.9364, - "step": 29608 - }, - { - "epoch": 15.441460234680573, - "grad_norm": 1.5830061435699463, - "learning_rate": 7.075477386934673e-05, - "loss": 5.2519, - "step": 29609 - }, - { - "epoch": 15.441981747066492, - "grad_norm": 1.6121630668640137, - "learning_rate": 7.075376884422111e-05, - "loss": 5.1507, - "step": 29610 - }, - { - "epoch": 15.442503259452412, - "grad_norm": 1.4428331851959229, - "learning_rate": 7.075276381909549e-05, - "loss": 5.1669, - "step": 29611 - }, - { - "epoch": 15.443024771838331, - "grad_norm": 1.497091293334961, - "learning_rate": 7.075175879396986e-05, - "loss": 5.2744, - "step": 29612 - }, - { - "epoch": 15.44354628422425, - "grad_norm": 1.4903266429901123, - "learning_rate": 7.075075376884423e-05, - "loss": 5.1646, - "step": 29613 - }, - { - "epoch": 15.44406779661017, - "grad_norm": 1.5185997486114502, - "learning_rate": 7.074974874371859e-05, - "loss": 5.3623, - "step": 29614 - }, - { - "epoch": 15.444589308996088, - "grad_norm": 1.4407308101654053, - "learning_rate": 7.074874371859297e-05, - "loss": 5.535, - "step": 29615 - }, - { - "epoch": 15.445110821382007, - "grad_norm": 1.437556266784668, - "learning_rate": 7.074773869346734e-05, - "loss": 5.3431, - "step": 29616 - }, - { - "epoch": 15.445632333767927, - "grad_norm": 1.5059400796890259, - "learning_rate": 7.074673366834171e-05, - "loss": 5.4475, - "step": 29617 - }, - { - "epoch": 15.446153846153846, - "grad_norm": 1.608389139175415, - "learning_rate": 7.074572864321608e-05, - "loss": 5.171, - "step": 29618 - }, - { - "epoch": 15.446675358539766, - "grad_norm": 1.4422017335891724, - "learning_rate": 7.074472361809046e-05, - "loss": 5.2614, - "step": 29619 - }, - { - "epoch": 15.447196870925685, - "grad_norm": 1.5849167108535767, - "learning_rate": 7.074371859296482e-05, - "loss": 5.3063, - "step": 29620 - }, - { - "epoch": 15.447718383311603, - "grad_norm": 1.4444866180419922, - "learning_rate": 7.07427135678392e-05, - "loss": 4.7844, - "step": 29621 - }, - { - "epoch": 15.448239895697522, - "grad_norm": 1.5534603595733643, - "learning_rate": 7.074170854271357e-05, - "loss": 5.5338, - "step": 29622 - }, - { - "epoch": 15.448761408083442, - "grad_norm": 1.5264501571655273, - "learning_rate": 7.074070351758795e-05, - "loss": 5.059, - "step": 29623 - }, - { - "epoch": 15.449282920469361, - "grad_norm": 1.5578356981277466, - "learning_rate": 7.073969849246232e-05, - "loss": 5.1032, - "step": 29624 - }, - { - "epoch": 15.44980443285528, - "grad_norm": 1.5481518507003784, - "learning_rate": 7.07386934673367e-05, - "loss": 5.2292, - "step": 29625 - }, - { - "epoch": 15.4503259452412, - "grad_norm": 1.4260507822036743, - "learning_rate": 7.073768844221106e-05, - "loss": 5.2819, - "step": 29626 - }, - { - "epoch": 15.450847457627118, - "grad_norm": 1.4907326698303223, - "learning_rate": 7.073668341708542e-05, - "loss": 5.141, - "step": 29627 - }, - { - "epoch": 15.451368970013037, - "grad_norm": 1.4179877042770386, - "learning_rate": 7.07356783919598e-05, - "loss": 5.4145, - "step": 29628 - }, - { - "epoch": 15.451890482398957, - "grad_norm": 1.4200284481048584, - "learning_rate": 7.073467336683417e-05, - "loss": 5.565, - "step": 29629 - }, - { - "epoch": 15.452411994784876, - "grad_norm": 1.4873936176300049, - "learning_rate": 7.073366834170854e-05, - "loss": 5.5989, - "step": 29630 - }, - { - "epoch": 15.452933507170796, - "grad_norm": 1.4622176885604858, - "learning_rate": 7.073266331658292e-05, - "loss": 5.4698, - "step": 29631 - }, - { - "epoch": 15.453455019556715, - "grad_norm": 1.4618054628372192, - "learning_rate": 7.07316582914573e-05, - "loss": 5.5624, - "step": 29632 - }, - { - "epoch": 15.453976531942633, - "grad_norm": 1.501267671585083, - "learning_rate": 7.073065326633166e-05, - "loss": 5.3128, - "step": 29633 - }, - { - "epoch": 15.454498044328552, - "grad_norm": 1.5188133716583252, - "learning_rate": 7.072964824120604e-05, - "loss": 5.1546, - "step": 29634 - }, - { - "epoch": 15.455019556714472, - "grad_norm": 1.4764472246170044, - "learning_rate": 7.07286432160804e-05, - "loss": 4.7659, - "step": 29635 - }, - { - "epoch": 15.455541069100391, - "grad_norm": 1.500624656677246, - "learning_rate": 7.072763819095478e-05, - "loss": 4.9909, - "step": 29636 - }, - { - "epoch": 15.45606258148631, - "grad_norm": 1.4550176858901978, - "learning_rate": 7.072663316582915e-05, - "loss": 5.6096, - "step": 29637 - }, - { - "epoch": 15.45658409387223, - "grad_norm": 1.4890352487564087, - "learning_rate": 7.072562814070353e-05, - "loss": 5.5865, - "step": 29638 - }, - { - "epoch": 15.457105606258148, - "grad_norm": 1.410120964050293, - "learning_rate": 7.072462311557789e-05, - "loss": 5.564, - "step": 29639 - }, - { - "epoch": 15.457627118644067, - "grad_norm": 1.3942935466766357, - "learning_rate": 7.072361809045225e-05, - "loss": 5.7211, - "step": 29640 - }, - { - "epoch": 15.458148631029987, - "grad_norm": 1.5763134956359863, - "learning_rate": 7.072261306532663e-05, - "loss": 5.0607, - "step": 29641 - }, - { - "epoch": 15.458670143415906, - "grad_norm": 1.5173691511154175, - "learning_rate": 7.072160804020101e-05, - "loss": 5.3327, - "step": 29642 - }, - { - "epoch": 15.459191655801826, - "grad_norm": 1.5600018501281738, - "learning_rate": 7.072060301507539e-05, - "loss": 5.2901, - "step": 29643 - }, - { - "epoch": 15.459713168187744, - "grad_norm": 1.4765982627868652, - "learning_rate": 7.071959798994975e-05, - "loss": 5.4088, - "step": 29644 - }, - { - "epoch": 15.460234680573663, - "grad_norm": 1.4144312143325806, - "learning_rate": 7.071859296482413e-05, - "loss": 5.3649, - "step": 29645 - }, - { - "epoch": 15.460756192959582, - "grad_norm": 1.2828383445739746, - "learning_rate": 7.07175879396985e-05, - "loss": 5.8385, - "step": 29646 - }, - { - "epoch": 15.461277705345502, - "grad_norm": 1.4504365921020508, - "learning_rate": 7.071658291457287e-05, - "loss": 5.7333, - "step": 29647 - }, - { - "epoch": 15.461799217731421, - "grad_norm": 1.4201005697250366, - "learning_rate": 7.071557788944724e-05, - "loss": 5.1866, - "step": 29648 - }, - { - "epoch": 15.46232073011734, - "grad_norm": 1.3870352506637573, - "learning_rate": 7.071457286432161e-05, - "loss": 5.4895, - "step": 29649 - }, - { - "epoch": 15.46284224250326, - "grad_norm": 1.5437941551208496, - "learning_rate": 7.071356783919598e-05, - "loss": 5.7382, - "step": 29650 - }, - { - "epoch": 15.463363754889178, - "grad_norm": 1.513773798942566, - "learning_rate": 7.071256281407036e-05, - "loss": 5.1005, - "step": 29651 - }, - { - "epoch": 15.463885267275097, - "grad_norm": 1.4854575395584106, - "learning_rate": 7.071155778894472e-05, - "loss": 5.5668, - "step": 29652 - }, - { - "epoch": 15.464406779661017, - "grad_norm": 1.5179544687271118, - "learning_rate": 7.07105527638191e-05, - "loss": 5.0194, - "step": 29653 - }, - { - "epoch": 15.464928292046936, - "grad_norm": 1.4691311120986938, - "learning_rate": 7.070954773869348e-05, - "loss": 5.3163, - "step": 29654 - }, - { - "epoch": 15.465449804432856, - "grad_norm": 1.4095406532287598, - "learning_rate": 7.070854271356784e-05, - "loss": 5.4378, - "step": 29655 - }, - { - "epoch": 15.465971316818774, - "grad_norm": 1.5820074081420898, - "learning_rate": 7.070753768844222e-05, - "loss": 5.4457, - "step": 29656 - }, - { - "epoch": 15.466492829204693, - "grad_norm": 1.4046874046325684, - "learning_rate": 7.070653266331658e-05, - "loss": 5.7129, - "step": 29657 - }, - { - "epoch": 15.467014341590613, - "grad_norm": 1.4070909023284912, - "learning_rate": 7.070552763819096e-05, - "loss": 5.6835, - "step": 29658 - }, - { - "epoch": 15.467535853976532, - "grad_norm": 1.4900997877120972, - "learning_rate": 7.070452261306532e-05, - "loss": 5.2144, - "step": 29659 - }, - { - "epoch": 15.468057366362451, - "grad_norm": 1.5742108821868896, - "learning_rate": 7.07035175879397e-05, - "loss": 5.6229, - "step": 29660 - }, - { - "epoch": 15.468578878748371, - "grad_norm": 1.4454482793807983, - "learning_rate": 7.070251256281407e-05, - "loss": 5.5305, - "step": 29661 - }, - { - "epoch": 15.469100391134289, - "grad_norm": 1.509254813194275, - "learning_rate": 7.070150753768844e-05, - "loss": 5.4487, - "step": 29662 - }, - { - "epoch": 15.469621903520208, - "grad_norm": 1.4824298620224, - "learning_rate": 7.070050251256282e-05, - "loss": 5.6882, - "step": 29663 - }, - { - "epoch": 15.470143415906128, - "grad_norm": 1.4978344440460205, - "learning_rate": 7.06994974874372e-05, - "loss": 5.8415, - "step": 29664 - }, - { - "epoch": 15.470664928292047, - "grad_norm": 1.5148247480392456, - "learning_rate": 7.069849246231156e-05, - "loss": 5.4826, - "step": 29665 - }, - { - "epoch": 15.471186440677966, - "grad_norm": 1.4739954471588135, - "learning_rate": 7.069748743718593e-05, - "loss": 5.0488, - "step": 29666 - }, - { - "epoch": 15.471707953063886, - "grad_norm": 1.6225553750991821, - "learning_rate": 7.06964824120603e-05, - "loss": 4.4646, - "step": 29667 - }, - { - "epoch": 15.472229465449804, - "grad_norm": 1.4783589839935303, - "learning_rate": 7.069547738693467e-05, - "loss": 5.7432, - "step": 29668 - }, - { - "epoch": 15.472750977835723, - "grad_norm": 1.5139974355697632, - "learning_rate": 7.069447236180905e-05, - "loss": 5.1828, - "step": 29669 - }, - { - "epoch": 15.473272490221643, - "grad_norm": 1.531670093536377, - "learning_rate": 7.069346733668341e-05, - "loss": 5.1001, - "step": 29670 - }, - { - "epoch": 15.473794002607562, - "grad_norm": 1.5472794771194458, - "learning_rate": 7.069246231155779e-05, - "loss": 5.2673, - "step": 29671 - }, - { - "epoch": 15.474315514993481, - "grad_norm": 1.5028196573257446, - "learning_rate": 7.069145728643215e-05, - "loss": 5.2526, - "step": 29672 - }, - { - "epoch": 15.474837027379401, - "grad_norm": 1.458742380142212, - "learning_rate": 7.069045226130653e-05, - "loss": 5.6484, - "step": 29673 - }, - { - "epoch": 15.475358539765319, - "grad_norm": 1.5127419233322144, - "learning_rate": 7.068944723618091e-05, - "loss": 5.0239, - "step": 29674 - }, - { - "epoch": 15.475880052151238, - "grad_norm": 1.377558708190918, - "learning_rate": 7.068844221105529e-05, - "loss": 5.6177, - "step": 29675 - }, - { - "epoch": 15.476401564537158, - "grad_norm": 1.3718022108078003, - "learning_rate": 7.068743718592965e-05, - "loss": 5.3764, - "step": 29676 - }, - { - "epoch": 15.476923076923077, - "grad_norm": 1.3848201036453247, - "learning_rate": 7.068643216080403e-05, - "loss": 5.6315, - "step": 29677 - }, - { - "epoch": 15.477444589308996, - "grad_norm": 1.433778166770935, - "learning_rate": 7.06854271356784e-05, - "loss": 5.3604, - "step": 29678 - }, - { - "epoch": 15.477966101694916, - "grad_norm": 1.437677264213562, - "learning_rate": 7.068442211055277e-05, - "loss": 5.463, - "step": 29679 - }, - { - "epoch": 15.478487614080834, - "grad_norm": 1.4279510974884033, - "learning_rate": 7.068341708542714e-05, - "loss": 5.3963, - "step": 29680 - }, - { - "epoch": 15.479009126466753, - "grad_norm": 1.5222307443618774, - "learning_rate": 7.06824120603015e-05, - "loss": 5.3391, - "step": 29681 - }, - { - "epoch": 15.479530638852673, - "grad_norm": 1.515159010887146, - "learning_rate": 7.068140703517588e-05, - "loss": 5.0775, - "step": 29682 - }, - { - "epoch": 15.480052151238592, - "grad_norm": 1.3471401929855347, - "learning_rate": 7.068040201005026e-05, - "loss": 5.7657, - "step": 29683 - }, - { - "epoch": 15.480573663624511, - "grad_norm": 1.6295406818389893, - "learning_rate": 7.067939698492463e-05, - "loss": 5.2212, - "step": 29684 - }, - { - "epoch": 15.481095176010431, - "grad_norm": 1.4862338304519653, - "learning_rate": 7.0678391959799e-05, - "loss": 5.3154, - "step": 29685 - }, - { - "epoch": 15.481616688396349, - "grad_norm": 1.3492958545684814, - "learning_rate": 7.067738693467338e-05, - "loss": 5.7642, - "step": 29686 - }, - { - "epoch": 15.482138200782268, - "grad_norm": 1.6241532564163208, - "learning_rate": 7.067638190954774e-05, - "loss": 4.9871, - "step": 29687 - }, - { - "epoch": 15.482659713168188, - "grad_norm": 1.4315733909606934, - "learning_rate": 7.067537688442212e-05, - "loss": 5.6198, - "step": 29688 - }, - { - "epoch": 15.483181225554107, - "grad_norm": 1.4734489917755127, - "learning_rate": 7.067437185929648e-05, - "loss": 5.3938, - "step": 29689 - }, - { - "epoch": 15.483702737940026, - "grad_norm": 1.4832732677459717, - "learning_rate": 7.067336683417086e-05, - "loss": 5.4417, - "step": 29690 - }, - { - "epoch": 15.484224250325946, - "grad_norm": 1.436396837234497, - "learning_rate": 7.067236180904522e-05, - "loss": 5.409, - "step": 29691 - }, - { - "epoch": 15.484745762711864, - "grad_norm": 1.4367002248764038, - "learning_rate": 7.06713567839196e-05, - "loss": 5.5854, - "step": 29692 - }, - { - "epoch": 15.485267275097783, - "grad_norm": 1.401820182800293, - "learning_rate": 7.067035175879397e-05, - "loss": 5.4685, - "step": 29693 - }, - { - "epoch": 15.485788787483703, - "grad_norm": 1.7150884866714478, - "learning_rate": 7.066934673366834e-05, - "loss": 4.7839, - "step": 29694 - }, - { - "epoch": 15.486310299869622, - "grad_norm": 1.508100152015686, - "learning_rate": 7.066834170854272e-05, - "loss": 5.0688, - "step": 29695 - }, - { - "epoch": 15.486831812255542, - "grad_norm": 1.4716459512710571, - "learning_rate": 7.066733668341709e-05, - "loss": 4.888, - "step": 29696 - }, - { - "epoch": 15.487353324641461, - "grad_norm": 1.4668211936950684, - "learning_rate": 7.066633165829146e-05, - "loss": 4.4758, - "step": 29697 - }, - { - "epoch": 15.487874837027379, - "grad_norm": 1.447083592414856, - "learning_rate": 7.066532663316583e-05, - "loss": 5.4312, - "step": 29698 - }, - { - "epoch": 15.488396349413298, - "grad_norm": 1.5265862941741943, - "learning_rate": 7.066432160804021e-05, - "loss": 5.0184, - "step": 29699 - }, - { - "epoch": 15.488917861799218, - "grad_norm": 1.4284898042678833, - "learning_rate": 7.066331658291457e-05, - "loss": 4.9711, - "step": 29700 - }, - { - "epoch": 15.489439374185137, - "grad_norm": 1.5861599445343018, - "learning_rate": 7.066231155778895e-05, - "loss": 4.7551, - "step": 29701 - }, - { - "epoch": 15.489960886571057, - "grad_norm": 1.484700083732605, - "learning_rate": 7.066130653266331e-05, - "loss": 4.9657, - "step": 29702 - }, - { - "epoch": 15.490482398956976, - "grad_norm": 1.486958384513855, - "learning_rate": 7.066030150753769e-05, - "loss": 5.6232, - "step": 29703 - }, - { - "epoch": 15.491003911342894, - "grad_norm": 1.399328351020813, - "learning_rate": 7.065929648241207e-05, - "loss": 5.5511, - "step": 29704 - }, - { - "epoch": 15.491525423728813, - "grad_norm": 1.4129737615585327, - "learning_rate": 7.065829145728645e-05, - "loss": 5.5657, - "step": 29705 - }, - { - "epoch": 15.492046936114733, - "grad_norm": 1.422866702079773, - "learning_rate": 7.065728643216081e-05, - "loss": 5.2962, - "step": 29706 - }, - { - "epoch": 15.492568448500652, - "grad_norm": 1.3733655214309692, - "learning_rate": 7.065628140703518e-05, - "loss": 5.4949, - "step": 29707 - }, - { - "epoch": 15.493089960886572, - "grad_norm": 1.417636513710022, - "learning_rate": 7.065527638190955e-05, - "loss": 5.2925, - "step": 29708 - }, - { - "epoch": 15.493611473272491, - "grad_norm": 1.568176031112671, - "learning_rate": 7.065427135678392e-05, - "loss": 5.0327, - "step": 29709 - }, - { - "epoch": 15.494132985658409, - "grad_norm": 1.4728968143463135, - "learning_rate": 7.06532663316583e-05, - "loss": 5.7222, - "step": 29710 - }, - { - "epoch": 15.494654498044328, - "grad_norm": 1.4307410717010498, - "learning_rate": 7.065226130653266e-05, - "loss": 5.3377, - "step": 29711 - }, - { - "epoch": 15.495176010430248, - "grad_norm": 1.4904263019561768, - "learning_rate": 7.065125628140704e-05, - "loss": 5.5696, - "step": 29712 - }, - { - "epoch": 15.495697522816167, - "grad_norm": 1.493096947669983, - "learning_rate": 7.06502512562814e-05, - "loss": 5.5689, - "step": 29713 - }, - { - "epoch": 15.496219035202087, - "grad_norm": 1.4421977996826172, - "learning_rate": 7.064924623115578e-05, - "loss": 4.7959, - "step": 29714 - }, - { - "epoch": 15.496740547588006, - "grad_norm": 1.5103121995925903, - "learning_rate": 7.064824120603016e-05, - "loss": 5.4254, - "step": 29715 - }, - { - "epoch": 15.497262059973924, - "grad_norm": 1.4910341501235962, - "learning_rate": 7.064723618090454e-05, - "loss": 5.5567, - "step": 29716 - }, - { - "epoch": 15.497783572359843, - "grad_norm": 1.5045862197875977, - "learning_rate": 7.06462311557789e-05, - "loss": 5.3533, - "step": 29717 - }, - { - "epoch": 15.498305084745763, - "grad_norm": 1.3986520767211914, - "learning_rate": 7.064522613065328e-05, - "loss": 5.267, - "step": 29718 - }, - { - "epoch": 15.498826597131682, - "grad_norm": 1.4453613758087158, - "learning_rate": 7.064422110552764e-05, - "loss": 5.3499, - "step": 29719 - }, - { - "epoch": 15.499348109517602, - "grad_norm": 1.4646681547164917, - "learning_rate": 7.0643216080402e-05, - "loss": 5.2901, - "step": 29720 - }, - { - "epoch": 15.499869621903521, - "grad_norm": 1.588362455368042, - "learning_rate": 7.064221105527638e-05, - "loss": 4.8759, - "step": 29721 - }, - { - "epoch": 15.500391134289439, - "grad_norm": 1.487717628479004, - "learning_rate": 7.064120603015075e-05, - "loss": 5.5464, - "step": 29722 - }, - { - "epoch": 15.500912646675358, - "grad_norm": 1.7198896408081055, - "learning_rate": 7.064020100502513e-05, - "loss": 5.5057, - "step": 29723 - }, - { - "epoch": 15.501434159061278, - "grad_norm": 1.4188807010650635, - "learning_rate": 7.06391959798995e-05, - "loss": 5.548, - "step": 29724 - }, - { - "epoch": 15.501955671447197, - "grad_norm": 1.5355890989303589, - "learning_rate": 7.063819095477388e-05, - "loss": 5.4785, - "step": 29725 - }, - { - "epoch": 15.502477183833117, - "grad_norm": 1.484499216079712, - "learning_rate": 7.063718592964825e-05, - "loss": 5.4568, - "step": 29726 - }, - { - "epoch": 15.502998696219036, - "grad_norm": 1.4065556526184082, - "learning_rate": 7.063618090452262e-05, - "loss": 5.6368, - "step": 29727 - }, - { - "epoch": 15.503520208604954, - "grad_norm": 1.6094199419021606, - "learning_rate": 7.063517587939699e-05, - "loss": 5.688, - "step": 29728 - }, - { - "epoch": 15.504041720990873, - "grad_norm": 1.4081048965454102, - "learning_rate": 7.063417085427137e-05, - "loss": 5.1858, - "step": 29729 - }, - { - "epoch": 15.504563233376793, - "grad_norm": 1.514070987701416, - "learning_rate": 7.063316582914573e-05, - "loss": 5.4571, - "step": 29730 - }, - { - "epoch": 15.505084745762712, - "grad_norm": 1.483888030052185, - "learning_rate": 7.063216080402011e-05, - "loss": 5.2293, - "step": 29731 - }, - { - "epoch": 15.505606258148632, - "grad_norm": 1.4952235221862793, - "learning_rate": 7.063115577889447e-05, - "loss": 5.2144, - "step": 29732 - }, - { - "epoch": 15.506127770534551, - "grad_norm": 1.4558608531951904, - "learning_rate": 7.063015075376884e-05, - "loss": 5.8951, - "step": 29733 - }, - { - "epoch": 15.506649282920469, - "grad_norm": 1.534998893737793, - "learning_rate": 7.062914572864321e-05, - "loss": 4.9245, - "step": 29734 - }, - { - "epoch": 15.507170795306388, - "grad_norm": 1.428452730178833, - "learning_rate": 7.062814070351759e-05, - "loss": 5.7881, - "step": 29735 - }, - { - "epoch": 15.507692307692308, - "grad_norm": 1.4388796091079712, - "learning_rate": 7.062713567839197e-05, - "loss": 5.6742, - "step": 29736 - }, - { - "epoch": 15.508213820078227, - "grad_norm": 1.6274371147155762, - "learning_rate": 7.062613065326633e-05, - "loss": 4.8436, - "step": 29737 - }, - { - "epoch": 15.508735332464147, - "grad_norm": 1.4667145013809204, - "learning_rate": 7.062512562814071e-05, - "loss": 5.4955, - "step": 29738 - }, - { - "epoch": 15.509256844850064, - "grad_norm": 1.5027046203613281, - "learning_rate": 7.062412060301508e-05, - "loss": 5.3555, - "step": 29739 - }, - { - "epoch": 15.509778357235984, - "grad_norm": 1.515792965888977, - "learning_rate": 7.062311557788945e-05, - "loss": 5.7367, - "step": 29740 - }, - { - "epoch": 15.510299869621903, - "grad_norm": 1.5283528566360474, - "learning_rate": 7.062211055276382e-05, - "loss": 4.8268, - "step": 29741 - }, - { - "epoch": 15.510821382007823, - "grad_norm": 1.3812004327774048, - "learning_rate": 7.06211055276382e-05, - "loss": 5.5084, - "step": 29742 - }, - { - "epoch": 15.511342894393742, - "grad_norm": 1.5578882694244385, - "learning_rate": 7.062010050251256e-05, - "loss": 5.3913, - "step": 29743 - }, - { - "epoch": 15.511864406779662, - "grad_norm": 1.4343864917755127, - "learning_rate": 7.061909547738694e-05, - "loss": 5.5955, - "step": 29744 - }, - { - "epoch": 15.512385919165581, - "grad_norm": 1.5252264738082886, - "learning_rate": 7.061809045226132e-05, - "loss": 5.4692, - "step": 29745 - }, - { - "epoch": 15.512907431551499, - "grad_norm": 1.5099170207977295, - "learning_rate": 7.061708542713568e-05, - "loss": 5.3558, - "step": 29746 - }, - { - "epoch": 15.513428943937418, - "grad_norm": 1.4658761024475098, - "learning_rate": 7.061608040201006e-05, - "loss": 5.3545, - "step": 29747 - }, - { - "epoch": 15.513950456323338, - "grad_norm": 1.3477787971496582, - "learning_rate": 7.061507537688442e-05, - "loss": 5.434, - "step": 29748 - }, - { - "epoch": 15.514471968709257, - "grad_norm": 1.4713538885116577, - "learning_rate": 7.06140703517588e-05, - "loss": 5.1124, - "step": 29749 - }, - { - "epoch": 15.514993481095177, - "grad_norm": 1.4272589683532715, - "learning_rate": 7.061306532663316e-05, - "loss": 5.7358, - "step": 29750 - }, - { - "epoch": 15.515514993481094, - "grad_norm": 1.4134178161621094, - "learning_rate": 7.061206030150754e-05, - "loss": 5.9954, - "step": 29751 - }, - { - "epoch": 15.516036505867014, - "grad_norm": 1.40263831615448, - "learning_rate": 7.06110552763819e-05, - "loss": 5.3596, - "step": 29752 - }, - { - "epoch": 15.516558018252933, - "grad_norm": 1.4973150491714478, - "learning_rate": 7.061005025125628e-05, - "loss": 5.1567, - "step": 29753 - }, - { - "epoch": 15.517079530638853, - "grad_norm": 1.4745291471481323, - "learning_rate": 7.060904522613065e-05, - "loss": 5.0277, - "step": 29754 - }, - { - "epoch": 15.517601043024772, - "grad_norm": 1.5274412631988525, - "learning_rate": 7.060804020100503e-05, - "loss": 5.5163, - "step": 29755 - }, - { - "epoch": 15.518122555410692, - "grad_norm": 1.4831572771072388, - "learning_rate": 7.06070351758794e-05, - "loss": 5.4218, - "step": 29756 - }, - { - "epoch": 15.518644067796611, - "grad_norm": 1.5726218223571777, - "learning_rate": 7.060603015075378e-05, - "loss": 5.3881, - "step": 29757 - }, - { - "epoch": 15.519165580182529, - "grad_norm": 1.539580225944519, - "learning_rate": 7.060502512562815e-05, - "loss": 4.5145, - "step": 29758 - }, - { - "epoch": 15.519687092568448, - "grad_norm": 1.4816638231277466, - "learning_rate": 7.060402010050252e-05, - "loss": 5.3556, - "step": 29759 - }, - { - "epoch": 15.520208604954368, - "grad_norm": 1.6098450422286987, - "learning_rate": 7.060301507537689e-05, - "loss": 5.2538, - "step": 29760 - }, - { - "epoch": 15.520730117340287, - "grad_norm": 1.5723119974136353, - "learning_rate": 7.060201005025125e-05, - "loss": 4.6901, - "step": 29761 - }, - { - "epoch": 15.521251629726207, - "grad_norm": 1.4748954772949219, - "learning_rate": 7.060100502512563e-05, - "loss": 5.3719, - "step": 29762 - }, - { - "epoch": 15.521773142112124, - "grad_norm": 1.5230860710144043, - "learning_rate": 7.06e-05, - "loss": 5.4467, - "step": 29763 - }, - { - "epoch": 15.522294654498044, - "grad_norm": 1.6751346588134766, - "learning_rate": 7.059899497487437e-05, - "loss": 4.6599, - "step": 29764 - }, - { - "epoch": 15.522816166883963, - "grad_norm": 1.7366809844970703, - "learning_rate": 7.059798994974875e-05, - "loss": 4.9636, - "step": 29765 - }, - { - "epoch": 15.523337679269883, - "grad_norm": 1.345672607421875, - "learning_rate": 7.059698492462313e-05, - "loss": 5.3736, - "step": 29766 - }, - { - "epoch": 15.523859191655802, - "grad_norm": 1.4626842737197876, - "learning_rate": 7.059597989949749e-05, - "loss": 5.6302, - "step": 29767 - }, - { - "epoch": 15.524380704041722, - "grad_norm": 1.4428528547286987, - "learning_rate": 7.059497487437187e-05, - "loss": 5.595, - "step": 29768 - }, - { - "epoch": 15.52490221642764, - "grad_norm": 1.5451743602752686, - "learning_rate": 7.059396984924623e-05, - "loss": 5.2927, - "step": 29769 - }, - { - "epoch": 15.525423728813559, - "grad_norm": 1.4627478122711182, - "learning_rate": 7.059296482412061e-05, - "loss": 5.3079, - "step": 29770 - }, - { - "epoch": 15.525945241199478, - "grad_norm": 1.502657413482666, - "learning_rate": 7.059195979899498e-05, - "loss": 5.4731, - "step": 29771 - }, - { - "epoch": 15.526466753585398, - "grad_norm": 1.4977943897247314, - "learning_rate": 7.059095477386935e-05, - "loss": 4.7859, - "step": 29772 - }, - { - "epoch": 15.526988265971317, - "grad_norm": 1.4842380285263062, - "learning_rate": 7.058994974874372e-05, - "loss": 5.1261, - "step": 29773 - }, - { - "epoch": 15.527509778357237, - "grad_norm": 1.5325905084609985, - "learning_rate": 7.058894472361808e-05, - "loss": 5.0485, - "step": 29774 - }, - { - "epoch": 15.528031290743154, - "grad_norm": 1.5889365673065186, - "learning_rate": 7.058793969849246e-05, - "loss": 5.6515, - "step": 29775 - }, - { - "epoch": 15.528552803129074, - "grad_norm": 1.463250756263733, - "learning_rate": 7.058693467336684e-05, - "loss": 5.3048, - "step": 29776 - }, - { - "epoch": 15.529074315514993, - "grad_norm": 1.4086146354675293, - "learning_rate": 7.058592964824122e-05, - "loss": 5.5337, - "step": 29777 - }, - { - "epoch": 15.529595827900913, - "grad_norm": 1.450734257698059, - "learning_rate": 7.058492462311558e-05, - "loss": 5.2074, - "step": 29778 - }, - { - "epoch": 15.530117340286832, - "grad_norm": 1.3955553770065308, - "learning_rate": 7.058391959798996e-05, - "loss": 5.5409, - "step": 29779 - }, - { - "epoch": 15.530638852672752, - "grad_norm": 1.455403208732605, - "learning_rate": 7.058291457286432e-05, - "loss": 5.4548, - "step": 29780 - }, - { - "epoch": 15.53116036505867, - "grad_norm": 1.5893361568450928, - "learning_rate": 7.05819095477387e-05, - "loss": 5.0385, - "step": 29781 - }, - { - "epoch": 15.531681877444589, - "grad_norm": 1.5814707279205322, - "learning_rate": 7.058090452261307e-05, - "loss": 5.1502, - "step": 29782 - }, - { - "epoch": 15.532203389830508, - "grad_norm": 1.39535653591156, - "learning_rate": 7.057989949748744e-05, - "loss": 5.5591, - "step": 29783 - }, - { - "epoch": 15.532724902216428, - "grad_norm": 1.3255698680877686, - "learning_rate": 7.057889447236181e-05, - "loss": 5.8529, - "step": 29784 - }, - { - "epoch": 15.533246414602347, - "grad_norm": 1.5013705492019653, - "learning_rate": 7.057788944723619e-05, - "loss": 5.443, - "step": 29785 - }, - { - "epoch": 15.533767926988267, - "grad_norm": 1.4662271738052368, - "learning_rate": 7.057688442211056e-05, - "loss": 5.676, - "step": 29786 - }, - { - "epoch": 15.534289439374184, - "grad_norm": 1.3984819650650024, - "learning_rate": 7.057587939698493e-05, - "loss": 5.164, - "step": 29787 - }, - { - "epoch": 15.534810951760104, - "grad_norm": 1.4018551111221313, - "learning_rate": 7.05748743718593e-05, - "loss": 5.5252, - "step": 29788 - }, - { - "epoch": 15.535332464146023, - "grad_norm": 1.5171080827713013, - "learning_rate": 7.057386934673367e-05, - "loss": 5.3123, - "step": 29789 - }, - { - "epoch": 15.535853976531943, - "grad_norm": 1.5248485803604126, - "learning_rate": 7.057286432160805e-05, - "loss": 5.3584, - "step": 29790 - }, - { - "epoch": 15.536375488917862, - "grad_norm": 1.4990285634994507, - "learning_rate": 7.057185929648241e-05, - "loss": 5.1415, - "step": 29791 - }, - { - "epoch": 15.536897001303782, - "grad_norm": 1.4484447240829468, - "learning_rate": 7.057085427135679e-05, - "loss": 5.7667, - "step": 29792 - }, - { - "epoch": 15.5374185136897, - "grad_norm": 1.4058154821395874, - "learning_rate": 7.056984924623115e-05, - "loss": 4.9864, - "step": 29793 - }, - { - "epoch": 15.537940026075619, - "grad_norm": 1.4378197193145752, - "learning_rate": 7.056884422110553e-05, - "loss": 5.6076, - "step": 29794 - }, - { - "epoch": 15.538461538461538, - "grad_norm": 1.5059949159622192, - "learning_rate": 7.05678391959799e-05, - "loss": 5.6208, - "step": 29795 - }, - { - "epoch": 15.538983050847458, - "grad_norm": 1.4939093589782715, - "learning_rate": 7.056683417085427e-05, - "loss": 4.4723, - "step": 29796 - }, - { - "epoch": 15.539504563233377, - "grad_norm": 1.4853445291519165, - "learning_rate": 7.056582914572865e-05, - "loss": 5.6973, - "step": 29797 - }, - { - "epoch": 15.540026075619297, - "grad_norm": 1.54734468460083, - "learning_rate": 7.056482412060303e-05, - "loss": 5.2089, - "step": 29798 - }, - { - "epoch": 15.540547588005214, - "grad_norm": 1.4218579530715942, - "learning_rate": 7.05638190954774e-05, - "loss": 5.3431, - "step": 29799 - }, - { - "epoch": 15.541069100391134, - "grad_norm": 1.4414992332458496, - "learning_rate": 7.056281407035176e-05, - "loss": 5.4397, - "step": 29800 - }, - { - "epoch": 15.541590612777053, - "grad_norm": 1.4398777484893799, - "learning_rate": 7.056180904522614e-05, - "loss": 5.4143, - "step": 29801 - }, - { - "epoch": 15.542112125162973, - "grad_norm": 1.513824224472046, - "learning_rate": 7.05608040201005e-05, - "loss": 5.2995, - "step": 29802 - }, - { - "epoch": 15.542633637548892, - "grad_norm": 1.5201908349990845, - "learning_rate": 7.055979899497488e-05, - "loss": 5.6828, - "step": 29803 - }, - { - "epoch": 15.543155149934812, - "grad_norm": 1.6158503293991089, - "learning_rate": 7.055879396984924e-05, - "loss": 5.413, - "step": 29804 - }, - { - "epoch": 15.54367666232073, - "grad_norm": 1.5046491622924805, - "learning_rate": 7.055778894472362e-05, - "loss": 5.4036, - "step": 29805 - }, - { - "epoch": 15.544198174706649, - "grad_norm": 1.9255393743515015, - "learning_rate": 7.055678391959798e-05, - "loss": 4.7942, - "step": 29806 - }, - { - "epoch": 15.544719687092568, - "grad_norm": 1.5638474225997925, - "learning_rate": 7.055577889447236e-05, - "loss": 5.1776, - "step": 29807 - }, - { - "epoch": 15.545241199478488, - "grad_norm": 1.507620096206665, - "learning_rate": 7.055477386934674e-05, - "loss": 5.4937, - "step": 29808 - }, - { - "epoch": 15.545762711864407, - "grad_norm": 1.3841540813446045, - "learning_rate": 7.055376884422112e-05, - "loss": 5.369, - "step": 29809 - }, - { - "epoch": 15.546284224250327, - "grad_norm": 1.3752565383911133, - "learning_rate": 7.055276381909548e-05, - "loss": 5.2216, - "step": 29810 - }, - { - "epoch": 15.546805736636244, - "grad_norm": 1.6476832628250122, - "learning_rate": 7.055175879396986e-05, - "loss": 5.3487, - "step": 29811 - }, - { - "epoch": 15.547327249022164, - "grad_norm": 1.5123642683029175, - "learning_rate": 7.055075376884422e-05, - "loss": 5.0146, - "step": 29812 - }, - { - "epoch": 15.547848761408083, - "grad_norm": 1.5649917125701904, - "learning_rate": 7.054974874371859e-05, - "loss": 5.4072, - "step": 29813 - }, - { - "epoch": 15.548370273794003, - "grad_norm": 1.3651069402694702, - "learning_rate": 7.054874371859297e-05, - "loss": 5.8063, - "step": 29814 - }, - { - "epoch": 15.548891786179922, - "grad_norm": 1.4728069305419922, - "learning_rate": 7.054773869346733e-05, - "loss": 5.4221, - "step": 29815 - }, - { - "epoch": 15.549413298565842, - "grad_norm": 1.571724534034729, - "learning_rate": 7.054673366834171e-05, - "loss": 5.8254, - "step": 29816 - }, - { - "epoch": 15.54993481095176, - "grad_norm": 1.9240257740020752, - "learning_rate": 7.054572864321609e-05, - "loss": 5.2448, - "step": 29817 - }, - { - "epoch": 15.550456323337679, - "grad_norm": 1.4654467105865479, - "learning_rate": 7.054472361809046e-05, - "loss": 5.7089, - "step": 29818 - }, - { - "epoch": 15.550977835723598, - "grad_norm": 1.5009195804595947, - "learning_rate": 7.054371859296483e-05, - "loss": 5.5112, - "step": 29819 - }, - { - "epoch": 15.551499348109518, - "grad_norm": 1.5191709995269775, - "learning_rate": 7.05427135678392e-05, - "loss": 5.3179, - "step": 29820 - }, - { - "epoch": 15.552020860495437, - "grad_norm": 1.5352131128311157, - "learning_rate": 7.054170854271357e-05, - "loss": 4.9761, - "step": 29821 - }, - { - "epoch": 15.552542372881355, - "grad_norm": 1.4045053720474243, - "learning_rate": 7.054070351758795e-05, - "loss": 5.1678, - "step": 29822 - }, - { - "epoch": 15.553063885267274, - "grad_norm": 1.3714873790740967, - "learning_rate": 7.053969849246231e-05, - "loss": 5.7452, - "step": 29823 - }, - { - "epoch": 15.553585397653194, - "grad_norm": 1.7013514041900635, - "learning_rate": 7.053869346733669e-05, - "loss": 4.9531, - "step": 29824 - }, - { - "epoch": 15.554106910039113, - "grad_norm": 1.3601326942443848, - "learning_rate": 7.053768844221105e-05, - "loss": 5.6883, - "step": 29825 - }, - { - "epoch": 15.554628422425033, - "grad_norm": 1.4602686166763306, - "learning_rate": 7.053668341708542e-05, - "loss": 5.164, - "step": 29826 - }, - { - "epoch": 15.555149934810952, - "grad_norm": 1.4759551286697388, - "learning_rate": 7.05356783919598e-05, - "loss": 5.2029, - "step": 29827 - }, - { - "epoch": 15.555671447196872, - "grad_norm": 1.5075558423995972, - "learning_rate": 7.053467336683417e-05, - "loss": 5.4587, - "step": 29828 - }, - { - "epoch": 15.55619295958279, - "grad_norm": 1.5216131210327148, - "learning_rate": 7.053366834170855e-05, - "loss": 5.561, - "step": 29829 - }, - { - "epoch": 15.556714471968709, - "grad_norm": 1.5742125511169434, - "learning_rate": 7.053266331658292e-05, - "loss": 5.3917, - "step": 29830 - }, - { - "epoch": 15.557235984354628, - "grad_norm": 1.4046748876571655, - "learning_rate": 7.05316582914573e-05, - "loss": 5.7231, - "step": 29831 - }, - { - "epoch": 15.557757496740548, - "grad_norm": 1.3964786529541016, - "learning_rate": 7.053065326633166e-05, - "loss": 5.4838, - "step": 29832 - }, - { - "epoch": 15.558279009126467, - "grad_norm": 1.474071979522705, - "learning_rate": 7.052964824120604e-05, - "loss": 5.1342, - "step": 29833 - }, - { - "epoch": 15.558800521512385, - "grad_norm": 1.6422621011734009, - "learning_rate": 7.05286432160804e-05, - "loss": 5.1839, - "step": 29834 - }, - { - "epoch": 15.559322033898304, - "grad_norm": 1.5394941568374634, - "learning_rate": 7.052763819095478e-05, - "loss": 5.0178, - "step": 29835 - }, - { - "epoch": 15.559843546284224, - "grad_norm": 1.3236088752746582, - "learning_rate": 7.052663316582914e-05, - "loss": 4.8579, - "step": 29836 - }, - { - "epoch": 15.560365058670143, - "grad_norm": 1.4940462112426758, - "learning_rate": 7.052562814070352e-05, - "loss": 5.6638, - "step": 29837 - }, - { - "epoch": 15.560886571056063, - "grad_norm": 1.37168550491333, - "learning_rate": 7.05246231155779e-05, - "loss": 5.6013, - "step": 29838 - }, - { - "epoch": 15.561408083441982, - "grad_norm": 1.4741586446762085, - "learning_rate": 7.052361809045226e-05, - "loss": 5.3339, - "step": 29839 - }, - { - "epoch": 15.561929595827902, - "grad_norm": 1.4865258932113647, - "learning_rate": 7.052261306532664e-05, - "loss": 5.5279, - "step": 29840 - }, - { - "epoch": 15.56245110821382, - "grad_norm": 1.4546748399734497, - "learning_rate": 7.0521608040201e-05, - "loss": 4.793, - "step": 29841 - }, - { - "epoch": 15.562972620599739, - "grad_norm": 1.5693137645721436, - "learning_rate": 7.052060301507538e-05, - "loss": 5.6034, - "step": 29842 - }, - { - "epoch": 15.563494132985658, - "grad_norm": 1.513153314590454, - "learning_rate": 7.051959798994975e-05, - "loss": 5.1504, - "step": 29843 - }, - { - "epoch": 15.564015645371578, - "grad_norm": 1.545957326889038, - "learning_rate": 7.051859296482412e-05, - "loss": 5.4439, - "step": 29844 - }, - { - "epoch": 15.564537157757497, - "grad_norm": 1.6157091856002808, - "learning_rate": 7.051758793969849e-05, - "loss": 5.4018, - "step": 29845 - }, - { - "epoch": 15.565058670143415, - "grad_norm": 1.5408800840377808, - "learning_rate": 7.051658291457287e-05, - "loss": 5.4966, - "step": 29846 - }, - { - "epoch": 15.565580182529335, - "grad_norm": 1.4139115810394287, - "learning_rate": 7.051557788944723e-05, - "loss": 5.5746, - "step": 29847 - }, - { - "epoch": 15.566101694915254, - "grad_norm": 1.633948564529419, - "learning_rate": 7.051457286432161e-05, - "loss": 5.387, - "step": 29848 - }, - { - "epoch": 15.566623207301173, - "grad_norm": 1.5489752292633057, - "learning_rate": 7.051356783919599e-05, - "loss": 5.129, - "step": 29849 - }, - { - "epoch": 15.567144719687093, - "grad_norm": 1.6512142419815063, - "learning_rate": 7.051256281407036e-05, - "loss": 4.9282, - "step": 29850 - }, - { - "epoch": 15.567666232073012, - "grad_norm": 1.5166614055633545, - "learning_rate": 7.051155778894473e-05, - "loss": 5.3985, - "step": 29851 - }, - { - "epoch": 15.568187744458932, - "grad_norm": 1.4935499429702759, - "learning_rate": 7.05105527638191e-05, - "loss": 5.6653, - "step": 29852 - }, - { - "epoch": 15.56870925684485, - "grad_norm": 1.3796736001968384, - "learning_rate": 7.050954773869347e-05, - "loss": 5.265, - "step": 29853 - }, - { - "epoch": 15.569230769230769, - "grad_norm": 1.658752202987671, - "learning_rate": 7.050854271356784e-05, - "loss": 4.9538, - "step": 29854 - }, - { - "epoch": 15.569752281616688, - "grad_norm": 1.6187547445297241, - "learning_rate": 7.050753768844221e-05, - "loss": 4.9371, - "step": 29855 - }, - { - "epoch": 15.570273794002608, - "grad_norm": 1.4898301362991333, - "learning_rate": 7.050653266331658e-05, - "loss": 5.2277, - "step": 29856 - }, - { - "epoch": 15.570795306388527, - "grad_norm": 1.5860871076583862, - "learning_rate": 7.050552763819096e-05, - "loss": 5.4895, - "step": 29857 - }, - { - "epoch": 15.571316818774445, - "grad_norm": 1.4545207023620605, - "learning_rate": 7.050452261306533e-05, - "loss": 5.7731, - "step": 29858 - }, - { - "epoch": 15.571838331160365, - "grad_norm": 1.4629108905792236, - "learning_rate": 7.050351758793971e-05, - "loss": 4.9581, - "step": 29859 - }, - { - "epoch": 15.572359843546284, - "grad_norm": 1.4972574710845947, - "learning_rate": 7.050251256281408e-05, - "loss": 5.473, - "step": 29860 - }, - { - "epoch": 15.572881355932203, - "grad_norm": 1.406599521636963, - "learning_rate": 7.050150753768845e-05, - "loss": 4.6265, - "step": 29861 - }, - { - "epoch": 15.573402868318123, - "grad_norm": 1.5145223140716553, - "learning_rate": 7.050050251256282e-05, - "loss": 5.585, - "step": 29862 - }, - { - "epoch": 15.573924380704042, - "grad_norm": 1.7047865390777588, - "learning_rate": 7.04994974874372e-05, - "loss": 4.5371, - "step": 29863 - }, - { - "epoch": 15.57444589308996, - "grad_norm": 1.4536340236663818, - "learning_rate": 7.049849246231156e-05, - "loss": 5.6756, - "step": 29864 - }, - { - "epoch": 15.57496740547588, - "grad_norm": 1.3580827713012695, - "learning_rate": 7.049748743718594e-05, - "loss": 5.2571, - "step": 29865 - }, - { - "epoch": 15.575488917861799, - "grad_norm": 1.460701823234558, - "learning_rate": 7.04964824120603e-05, - "loss": 5.7662, - "step": 29866 - }, - { - "epoch": 15.576010430247718, - "grad_norm": 1.4651075601577759, - "learning_rate": 7.049547738693467e-05, - "loss": 5.7616, - "step": 29867 - }, - { - "epoch": 15.576531942633638, - "grad_norm": 1.643721103668213, - "learning_rate": 7.049447236180904e-05, - "loss": 4.5942, - "step": 29868 - }, - { - "epoch": 15.577053455019557, - "grad_norm": 1.545576810836792, - "learning_rate": 7.049346733668342e-05, - "loss": 5.3161, - "step": 29869 - }, - { - "epoch": 15.577574967405475, - "grad_norm": 1.4163509607315063, - "learning_rate": 7.04924623115578e-05, - "loss": 5.2046, - "step": 29870 - }, - { - "epoch": 15.578096479791395, - "grad_norm": 1.4633320569992065, - "learning_rate": 7.049145728643216e-05, - "loss": 4.7622, - "step": 29871 - }, - { - "epoch": 15.578617992177314, - "grad_norm": 1.4803954362869263, - "learning_rate": 7.049045226130654e-05, - "loss": 5.4683, - "step": 29872 - }, - { - "epoch": 15.579139504563233, - "grad_norm": 1.5595331192016602, - "learning_rate": 7.04894472361809e-05, - "loss": 5.155, - "step": 29873 - }, - { - "epoch": 15.579661016949153, - "grad_norm": 1.5382615327835083, - "learning_rate": 7.048844221105528e-05, - "loss": 4.9215, - "step": 29874 - }, - { - "epoch": 15.580182529335072, - "grad_norm": 1.5639911890029907, - "learning_rate": 7.048743718592965e-05, - "loss": 5.1985, - "step": 29875 - }, - { - "epoch": 15.58070404172099, - "grad_norm": 1.555514931678772, - "learning_rate": 7.048643216080403e-05, - "loss": 5.2237, - "step": 29876 - }, - { - "epoch": 15.58122555410691, - "grad_norm": 1.431175947189331, - "learning_rate": 7.048542713567839e-05, - "loss": 5.1746, - "step": 29877 - }, - { - "epoch": 15.581747066492829, - "grad_norm": 1.5792667865753174, - "learning_rate": 7.048442211055277e-05, - "loss": 5.3228, - "step": 29878 - }, - { - "epoch": 15.582268578878748, - "grad_norm": 1.5162075757980347, - "learning_rate": 7.048341708542715e-05, - "loss": 5.5811, - "step": 29879 - }, - { - "epoch": 15.582790091264668, - "grad_norm": 1.4221224784851074, - "learning_rate": 7.048241206030151e-05, - "loss": 4.6255, - "step": 29880 - }, - { - "epoch": 15.583311603650587, - "grad_norm": 1.467383861541748, - "learning_rate": 7.048140703517589e-05, - "loss": 5.2627, - "step": 29881 - }, - { - "epoch": 15.583833116036505, - "grad_norm": 1.5884525775909424, - "learning_rate": 7.048040201005025e-05, - "loss": 5.2404, - "step": 29882 - }, - { - "epoch": 15.584354628422425, - "grad_norm": 1.479169487953186, - "learning_rate": 7.047939698492463e-05, - "loss": 5.2846, - "step": 29883 - }, - { - "epoch": 15.584876140808344, - "grad_norm": 1.506241798400879, - "learning_rate": 7.0478391959799e-05, - "loss": 5.0177, - "step": 29884 - }, - { - "epoch": 15.585397653194264, - "grad_norm": 1.3723769187927246, - "learning_rate": 7.047738693467337e-05, - "loss": 5.346, - "step": 29885 - }, - { - "epoch": 15.585919165580183, - "grad_norm": 1.533069372177124, - "learning_rate": 7.047638190954774e-05, - "loss": 5.3081, - "step": 29886 - }, - { - "epoch": 15.586440677966102, - "grad_norm": 1.4475507736206055, - "learning_rate": 7.047537688442211e-05, - "loss": 5.3327, - "step": 29887 - }, - { - "epoch": 15.58696219035202, - "grad_norm": 1.4839651584625244, - "learning_rate": 7.047437185929648e-05, - "loss": 5.5337, - "step": 29888 - }, - { - "epoch": 15.58748370273794, - "grad_norm": 1.4660507440567017, - "learning_rate": 7.047336683417086e-05, - "loss": 5.4932, - "step": 29889 - }, - { - "epoch": 15.588005215123859, - "grad_norm": 1.5035451650619507, - "learning_rate": 7.047236180904523e-05, - "loss": 5.3973, - "step": 29890 - }, - { - "epoch": 15.588526727509779, - "grad_norm": 1.5033352375030518, - "learning_rate": 7.047135678391961e-05, - "loss": 5.4778, - "step": 29891 - }, - { - "epoch": 15.589048239895698, - "grad_norm": 1.4247105121612549, - "learning_rate": 7.047035175879398e-05, - "loss": 4.7245, - "step": 29892 - }, - { - "epoch": 15.589569752281617, - "grad_norm": 1.6096715927124023, - "learning_rate": 7.046934673366834e-05, - "loss": 4.9723, - "step": 29893 - }, - { - "epoch": 15.590091264667535, - "grad_norm": 1.391749620437622, - "learning_rate": 7.046834170854272e-05, - "loss": 5.4632, - "step": 29894 - }, - { - "epoch": 15.590612777053455, - "grad_norm": 1.534865140914917, - "learning_rate": 7.046733668341708e-05, - "loss": 5.1682, - "step": 29895 - }, - { - "epoch": 15.591134289439374, - "grad_norm": 1.460228681564331, - "learning_rate": 7.046633165829146e-05, - "loss": 5.4874, - "step": 29896 - }, - { - "epoch": 15.591655801825294, - "grad_norm": 1.5613734722137451, - "learning_rate": 7.046532663316582e-05, - "loss": 5.0661, - "step": 29897 - }, - { - "epoch": 15.592177314211213, - "grad_norm": 1.36672043800354, - "learning_rate": 7.04643216080402e-05, - "loss": 5.1435, - "step": 29898 - }, - { - "epoch": 15.592698826597132, - "grad_norm": 1.4785797595977783, - "learning_rate": 7.046331658291458e-05, - "loss": 5.4307, - "step": 29899 - }, - { - "epoch": 15.59322033898305, - "grad_norm": 1.4932688474655151, - "learning_rate": 7.046231155778896e-05, - "loss": 5.3701, - "step": 29900 - }, - { - "epoch": 15.59374185136897, - "grad_norm": 1.4935871362686157, - "learning_rate": 7.046130653266332e-05, - "loss": 5.4197, - "step": 29901 - }, - { - "epoch": 15.594263363754889, - "grad_norm": 1.4162698984146118, - "learning_rate": 7.04603015075377e-05, - "loss": 4.9563, - "step": 29902 - }, - { - "epoch": 15.594784876140809, - "grad_norm": 1.5761297941207886, - "learning_rate": 7.045929648241206e-05, - "loss": 5.3335, - "step": 29903 - }, - { - "epoch": 15.595306388526728, - "grad_norm": 1.519510269165039, - "learning_rate": 7.045829145728644e-05, - "loss": 5.3549, - "step": 29904 - }, - { - "epoch": 15.595827900912647, - "grad_norm": 1.4000787734985352, - "learning_rate": 7.04572864321608e-05, - "loss": 4.852, - "step": 29905 - }, - { - "epoch": 15.596349413298565, - "grad_norm": 1.4698258638381958, - "learning_rate": 7.045628140703517e-05, - "loss": 5.4642, - "step": 29906 - }, - { - "epoch": 15.596870925684485, - "grad_norm": 1.474560260772705, - "learning_rate": 7.045527638190955e-05, - "loss": 5.2063, - "step": 29907 - }, - { - "epoch": 15.597392438070404, - "grad_norm": 1.5502727031707764, - "learning_rate": 7.045427135678391e-05, - "loss": 5.2943, - "step": 29908 - }, - { - "epoch": 15.597913950456324, - "grad_norm": 1.366568922996521, - "learning_rate": 7.045326633165829e-05, - "loss": 5.6402, - "step": 29909 - }, - { - "epoch": 15.598435462842243, - "grad_norm": 1.414322018623352, - "learning_rate": 7.045226130653267e-05, - "loss": 5.3582, - "step": 29910 - }, - { - "epoch": 15.598956975228162, - "grad_norm": 1.5684572458267212, - "learning_rate": 7.045125628140705e-05, - "loss": 5.3808, - "step": 29911 - }, - { - "epoch": 15.59947848761408, - "grad_norm": 1.6201958656311035, - "learning_rate": 7.045025125628141e-05, - "loss": 5.006, - "step": 29912 - }, - { - "epoch": 15.6, - "grad_norm": 1.4744551181793213, - "learning_rate": 7.044924623115579e-05, - "loss": 5.6099, - "step": 29913 - }, - { - "epoch": 15.600521512385919, - "grad_norm": 1.5173089504241943, - "learning_rate": 7.044824120603015e-05, - "loss": 5.5942, - "step": 29914 - }, - { - "epoch": 15.601043024771839, - "grad_norm": 1.4416636228561401, - "learning_rate": 7.044723618090453e-05, - "loss": 5.6945, - "step": 29915 - }, - { - "epoch": 15.601564537157758, - "grad_norm": 1.392016887664795, - "learning_rate": 7.04462311557789e-05, - "loss": 5.6053, - "step": 29916 - }, - { - "epoch": 15.602086049543676, - "grad_norm": 1.6438734531402588, - "learning_rate": 7.044522613065327e-05, - "loss": 5.1342, - "step": 29917 - }, - { - "epoch": 15.602607561929595, - "grad_norm": 1.4649128913879395, - "learning_rate": 7.044422110552764e-05, - "loss": 5.683, - "step": 29918 - }, - { - "epoch": 15.603129074315515, - "grad_norm": 1.6050852537155151, - "learning_rate": 7.044321608040201e-05, - "loss": 5.6314, - "step": 29919 - }, - { - "epoch": 15.603650586701434, - "grad_norm": 1.3818148374557495, - "learning_rate": 7.044221105527639e-05, - "loss": 5.708, - "step": 29920 - }, - { - "epoch": 15.604172099087354, - "grad_norm": 1.5242812633514404, - "learning_rate": 7.044120603015076e-05, - "loss": 5.078, - "step": 29921 - }, - { - "epoch": 15.604693611473273, - "grad_norm": 1.536713719367981, - "learning_rate": 7.044020100502513e-05, - "loss": 5.4355, - "step": 29922 - }, - { - "epoch": 15.605215123859193, - "grad_norm": 1.4669018983840942, - "learning_rate": 7.04391959798995e-05, - "loss": 5.2589, - "step": 29923 - }, - { - "epoch": 15.60573663624511, - "grad_norm": 1.4739636182785034, - "learning_rate": 7.043819095477388e-05, - "loss": 5.4632, - "step": 29924 - }, - { - "epoch": 15.60625814863103, - "grad_norm": 1.4418977499008179, - "learning_rate": 7.043718592964824e-05, - "loss": 5.5346, - "step": 29925 - }, - { - "epoch": 15.60677966101695, - "grad_norm": 1.4832143783569336, - "learning_rate": 7.043618090452262e-05, - "loss": 5.3099, - "step": 29926 - }, - { - "epoch": 15.607301173402869, - "grad_norm": 1.4884930849075317, - "learning_rate": 7.043517587939698e-05, - "loss": 5.7038, - "step": 29927 - }, - { - "epoch": 15.607822685788788, - "grad_norm": 1.4383413791656494, - "learning_rate": 7.043417085427136e-05, - "loss": 5.5042, - "step": 29928 - }, - { - "epoch": 15.608344198174706, - "grad_norm": 1.4774445295333862, - "learning_rate": 7.043316582914573e-05, - "loss": 5.4597, - "step": 29929 - }, - { - "epoch": 15.608865710560625, - "grad_norm": 1.4659324884414673, - "learning_rate": 7.04321608040201e-05, - "loss": 5.2099, - "step": 29930 - }, - { - "epoch": 15.609387222946545, - "grad_norm": 1.5013165473937988, - "learning_rate": 7.043115577889448e-05, - "loss": 5.1303, - "step": 29931 - }, - { - "epoch": 15.609908735332464, - "grad_norm": 1.407395362854004, - "learning_rate": 7.043015075376884e-05, - "loss": 5.0874, - "step": 29932 - }, - { - "epoch": 15.610430247718384, - "grad_norm": 1.39747154712677, - "learning_rate": 7.042914572864322e-05, - "loss": 5.5611, - "step": 29933 - }, - { - "epoch": 15.610951760104303, - "grad_norm": 1.5613833665847778, - "learning_rate": 7.042814070351759e-05, - "loss": 4.9609, - "step": 29934 - }, - { - "epoch": 15.611473272490223, - "grad_norm": 1.4261175394058228, - "learning_rate": 7.042713567839196e-05, - "loss": 4.3914, - "step": 29935 - }, - { - "epoch": 15.61199478487614, - "grad_norm": 1.5079468488693237, - "learning_rate": 7.042613065326633e-05, - "loss": 5.1145, - "step": 29936 - }, - { - "epoch": 15.61251629726206, - "grad_norm": 1.526282787322998, - "learning_rate": 7.042512562814071e-05, - "loss": 4.8837, - "step": 29937 - }, - { - "epoch": 15.61303780964798, - "grad_norm": 1.4468494653701782, - "learning_rate": 7.042412060301507e-05, - "loss": 5.6409, - "step": 29938 - }, - { - "epoch": 15.613559322033899, - "grad_norm": 1.7262965440750122, - "learning_rate": 7.042311557788945e-05, - "loss": 4.9102, - "step": 29939 - }, - { - "epoch": 15.614080834419818, - "grad_norm": 1.44370436668396, - "learning_rate": 7.042211055276383e-05, - "loss": 5.1259, - "step": 29940 - }, - { - "epoch": 15.614602346805736, - "grad_norm": 1.4548479318618774, - "learning_rate": 7.04211055276382e-05, - "loss": 5.7495, - "step": 29941 - }, - { - "epoch": 15.615123859191655, - "grad_norm": 1.5498237609863281, - "learning_rate": 7.042010050251257e-05, - "loss": 5.5402, - "step": 29942 - }, - { - "epoch": 15.615645371577575, - "grad_norm": 1.5302948951721191, - "learning_rate": 7.041909547738695e-05, - "loss": 5.1612, - "step": 29943 - }, - { - "epoch": 15.616166883963494, - "grad_norm": 1.5376778841018677, - "learning_rate": 7.041809045226131e-05, - "loss": 5.4204, - "step": 29944 - }, - { - "epoch": 15.616688396349414, - "grad_norm": 1.4004099369049072, - "learning_rate": 7.041708542713569e-05, - "loss": 5.1446, - "step": 29945 - }, - { - "epoch": 15.617209908735333, - "grad_norm": 1.3878200054168701, - "learning_rate": 7.041608040201005e-05, - "loss": 5.2523, - "step": 29946 - }, - { - "epoch": 15.617731421121253, - "grad_norm": 1.4165589809417725, - "learning_rate": 7.041507537688442e-05, - "loss": 5.4634, - "step": 29947 - }, - { - "epoch": 15.61825293350717, - "grad_norm": 1.432448148727417, - "learning_rate": 7.04140703517588e-05, - "loss": 5.5763, - "step": 29948 - }, - { - "epoch": 15.61877444589309, - "grad_norm": 1.5806833505630493, - "learning_rate": 7.041306532663316e-05, - "loss": 4.6218, - "step": 29949 - }, - { - "epoch": 15.61929595827901, - "grad_norm": 1.4274758100509644, - "learning_rate": 7.041206030150754e-05, - "loss": 5.3154, - "step": 29950 - }, - { - "epoch": 15.619817470664929, - "grad_norm": 1.5513089895248413, - "learning_rate": 7.041105527638192e-05, - "loss": 5.1289, - "step": 29951 - }, - { - "epoch": 15.620338983050848, - "grad_norm": 1.4450125694274902, - "learning_rate": 7.041005025125629e-05, - "loss": 5.5936, - "step": 29952 - }, - { - "epoch": 15.620860495436766, - "grad_norm": 1.4358642101287842, - "learning_rate": 7.040904522613066e-05, - "loss": 5.5387, - "step": 29953 - }, - { - "epoch": 15.621382007822685, - "grad_norm": 1.4499359130859375, - "learning_rate": 7.040804020100504e-05, - "loss": 5.2146, - "step": 29954 - }, - { - "epoch": 15.621903520208605, - "grad_norm": 1.4747754335403442, - "learning_rate": 7.04070351758794e-05, - "loss": 5.5442, - "step": 29955 - }, - { - "epoch": 15.622425032594524, - "grad_norm": 1.5905131101608276, - "learning_rate": 7.040603015075378e-05, - "loss": 5.0565, - "step": 29956 - }, - { - "epoch": 15.622946544980444, - "grad_norm": 1.44203519821167, - "learning_rate": 7.040502512562814e-05, - "loss": 5.2919, - "step": 29957 - }, - { - "epoch": 15.623468057366363, - "grad_norm": 1.5652529001235962, - "learning_rate": 7.040402010050252e-05, - "loss": 5.3853, - "step": 29958 - }, - { - "epoch": 15.62398956975228, - "grad_norm": 1.4141149520874023, - "learning_rate": 7.040301507537688e-05, - "loss": 5.3913, - "step": 29959 - }, - { - "epoch": 15.6245110821382, - "grad_norm": 1.524466633796692, - "learning_rate": 7.040201005025126e-05, - "loss": 4.9386, - "step": 29960 - }, - { - "epoch": 15.62503259452412, - "grad_norm": 1.350555658340454, - "learning_rate": 7.040100502512564e-05, - "loss": 4.8937, - "step": 29961 - }, - { - "epoch": 15.62555410691004, - "grad_norm": 1.6304889917373657, - "learning_rate": 7.04e-05, - "loss": 5.0244, - "step": 29962 - }, - { - "epoch": 15.626075619295959, - "grad_norm": 1.462827205657959, - "learning_rate": 7.039899497487438e-05, - "loss": 5.257, - "step": 29963 - }, - { - "epoch": 15.626597131681878, - "grad_norm": 1.3728485107421875, - "learning_rate": 7.039798994974875e-05, - "loss": 5.6536, - "step": 29964 - }, - { - "epoch": 15.627118644067796, - "grad_norm": 1.3408362865447998, - "learning_rate": 7.039698492462312e-05, - "loss": 5.8831, - "step": 29965 - }, - { - "epoch": 15.627640156453715, - "grad_norm": 1.429350733757019, - "learning_rate": 7.039597989949749e-05, - "loss": 5.2888, - "step": 29966 - }, - { - "epoch": 15.628161668839635, - "grad_norm": 1.4728062152862549, - "learning_rate": 7.039497487437187e-05, - "loss": 4.9436, - "step": 29967 - }, - { - "epoch": 15.628683181225554, - "grad_norm": 1.4137309789657593, - "learning_rate": 7.039396984924623e-05, - "loss": 5.6368, - "step": 29968 - }, - { - "epoch": 15.629204693611474, - "grad_norm": 1.5079853534698486, - "learning_rate": 7.039296482412061e-05, - "loss": 5.2758, - "step": 29969 - }, - { - "epoch": 15.629726205997393, - "grad_norm": 1.662076711654663, - "learning_rate": 7.039195979899497e-05, - "loss": 5.1512, - "step": 29970 - }, - { - "epoch": 15.63024771838331, - "grad_norm": 1.5657864809036255, - "learning_rate": 7.039095477386935e-05, - "loss": 5.7042, - "step": 29971 - }, - { - "epoch": 15.63076923076923, - "grad_norm": 1.4936258792877197, - "learning_rate": 7.038994974874373e-05, - "loss": 5.1216, - "step": 29972 - }, - { - "epoch": 15.63129074315515, - "grad_norm": 1.4902905225753784, - "learning_rate": 7.038894472361809e-05, - "loss": 5.4731, - "step": 29973 - }, - { - "epoch": 15.63181225554107, - "grad_norm": 1.3768261671066284, - "learning_rate": 7.038793969849247e-05, - "loss": 5.9513, - "step": 29974 - }, - { - "epoch": 15.632333767926989, - "grad_norm": 1.4883761405944824, - "learning_rate": 7.038693467336683e-05, - "loss": 5.5183, - "step": 29975 - }, - { - "epoch": 15.632855280312908, - "grad_norm": 1.5111212730407715, - "learning_rate": 7.038592964824121e-05, - "loss": 5.285, - "step": 29976 - }, - { - "epoch": 15.633376792698826, - "grad_norm": 1.566623330116272, - "learning_rate": 7.038492462311558e-05, - "loss": 5.4991, - "step": 29977 - }, - { - "epoch": 15.633898305084745, - "grad_norm": 1.4468623399734497, - "learning_rate": 7.038391959798995e-05, - "loss": 5.6106, - "step": 29978 - }, - { - "epoch": 15.634419817470665, - "grad_norm": 1.48420250415802, - "learning_rate": 7.038291457286432e-05, - "loss": 5.3263, - "step": 29979 - }, - { - "epoch": 15.634941329856584, - "grad_norm": 1.487633466720581, - "learning_rate": 7.03819095477387e-05, - "loss": 5.111, - "step": 29980 - }, - { - "epoch": 15.635462842242504, - "grad_norm": 1.549830675125122, - "learning_rate": 7.038090452261306e-05, - "loss": 5.2857, - "step": 29981 - }, - { - "epoch": 15.635984354628423, - "grad_norm": 1.4480869770050049, - "learning_rate": 7.037989949748744e-05, - "loss": 5.3869, - "step": 29982 - }, - { - "epoch": 15.63650586701434, - "grad_norm": 1.6220782995224, - "learning_rate": 7.037889447236182e-05, - "loss": 5.3479, - "step": 29983 - }, - { - "epoch": 15.63702737940026, - "grad_norm": 1.5577313899993896, - "learning_rate": 7.03778894472362e-05, - "loss": 4.7684, - "step": 29984 - }, - { - "epoch": 15.63754889178618, - "grad_norm": 1.4343575239181519, - "learning_rate": 7.037688442211056e-05, - "loss": 5.6796, - "step": 29985 - }, - { - "epoch": 15.6380704041721, - "grad_norm": 1.5110361576080322, - "learning_rate": 7.037587939698492e-05, - "loss": 5.0033, - "step": 29986 - }, - { - "epoch": 15.638591916558019, - "grad_norm": 1.631688117980957, - "learning_rate": 7.03748743718593e-05, - "loss": 4.8838, - "step": 29987 - }, - { - "epoch": 15.639113428943938, - "grad_norm": 1.5702265501022339, - "learning_rate": 7.037386934673366e-05, - "loss": 4.6927, - "step": 29988 - }, - { - "epoch": 15.639634941329856, - "grad_norm": 1.6152788400650024, - "learning_rate": 7.037286432160804e-05, - "loss": 5.1932, - "step": 29989 - }, - { - "epoch": 15.640156453715775, - "grad_norm": 1.5983808040618896, - "learning_rate": 7.03718592964824e-05, - "loss": 4.8768, - "step": 29990 - }, - { - "epoch": 15.640677966101695, - "grad_norm": 1.4208405017852783, - "learning_rate": 7.037085427135678e-05, - "loss": 4.9428, - "step": 29991 - }, - { - "epoch": 15.641199478487614, - "grad_norm": 1.5560824871063232, - "learning_rate": 7.036984924623116e-05, - "loss": 5.261, - "step": 29992 - }, - { - "epoch": 15.641720990873534, - "grad_norm": 1.3879438638687134, - "learning_rate": 7.036884422110554e-05, - "loss": 5.5944, - "step": 29993 - }, - { - "epoch": 15.642242503259453, - "grad_norm": 1.468414306640625, - "learning_rate": 7.03678391959799e-05, - "loss": 5.3608, - "step": 29994 - }, - { - "epoch": 15.642764015645371, - "grad_norm": 1.3937424421310425, - "learning_rate": 7.036683417085428e-05, - "loss": 5.316, - "step": 29995 - }, - { - "epoch": 15.64328552803129, - "grad_norm": 1.456793189048767, - "learning_rate": 7.036582914572865e-05, - "loss": 5.7648, - "step": 29996 - }, - { - "epoch": 15.64380704041721, - "grad_norm": 1.5667072534561157, - "learning_rate": 7.036482412060302e-05, - "loss": 4.9671, - "step": 29997 - }, - { - "epoch": 15.64432855280313, - "grad_norm": 1.564516544342041, - "learning_rate": 7.036381909547739e-05, - "loss": 5.2179, - "step": 29998 - }, - { - "epoch": 15.644850065189049, - "grad_norm": 1.4960548877716064, - "learning_rate": 7.036281407035175e-05, - "loss": 5.0949, - "step": 29999 - }, - { - "epoch": 15.645371577574968, - "grad_norm": 1.4661023616790771, - "learning_rate": 7.036180904522613e-05, - "loss": 5.3394, - "step": 30000 - }, - { - "epoch": 15.645371577574968, - "eval_loss": 5.427452087402344, - "eval_runtime": 42.67, - "eval_samples_per_second": 28.732, - "eval_steps_per_second": 3.609, - "step": 30000 - }, - { - "epoch": 15.645893089960886, - "grad_norm": 1.5810165405273438, - "learning_rate": 7.03608040201005e-05, - "loss": 4.7133, - "step": 30001 - }, - { - "epoch": 15.646414602346805, - "grad_norm": 1.5174697637557983, - "learning_rate": 7.035979899497487e-05, - "loss": 5.1704, - "step": 30002 - }, - { - "epoch": 15.646936114732725, - "grad_norm": 1.4082316160202026, - "learning_rate": 7.035879396984925e-05, - "loss": 5.2054, - "step": 30003 - }, - { - "epoch": 15.647457627118644, - "grad_norm": 1.4817538261413574, - "learning_rate": 7.035778894472363e-05, - "loss": 5.0175, - "step": 30004 - }, - { - "epoch": 15.647979139504564, - "grad_norm": 1.4781922101974487, - "learning_rate": 7.035678391959799e-05, - "loss": 5.6151, - "step": 30005 - }, - { - "epoch": 15.648500651890483, - "grad_norm": 1.4207042455673218, - "learning_rate": 7.035577889447237e-05, - "loss": 5.2115, - "step": 30006 - }, - { - "epoch": 15.649022164276401, - "grad_norm": 1.3230258226394653, - "learning_rate": 7.035477386934673e-05, - "loss": 5.8577, - "step": 30007 - }, - { - "epoch": 15.64954367666232, - "grad_norm": 1.5758445262908936, - "learning_rate": 7.035376884422111e-05, - "loss": 5.2338, - "step": 30008 - }, - { - "epoch": 15.65006518904824, - "grad_norm": 1.5014439821243286, - "learning_rate": 7.035276381909548e-05, - "loss": 5.0921, - "step": 30009 - }, - { - "epoch": 15.65058670143416, - "grad_norm": 1.5421696901321411, - "learning_rate": 7.035175879396985e-05, - "loss": 5.3465, - "step": 30010 - }, - { - "epoch": 15.651108213820079, - "grad_norm": 1.4673150777816772, - "learning_rate": 7.035075376884422e-05, - "loss": 5.6166, - "step": 30011 - }, - { - "epoch": 15.651629726205996, - "grad_norm": 1.5223835706710815, - "learning_rate": 7.03497487437186e-05, - "loss": 4.7889, - "step": 30012 - }, - { - "epoch": 15.652151238591916, - "grad_norm": 1.3539470434188843, - "learning_rate": 7.034874371859297e-05, - "loss": 5.7861, - "step": 30013 - }, - { - "epoch": 15.652672750977835, - "grad_norm": 1.5034499168395996, - "learning_rate": 7.034773869346734e-05, - "loss": 5.0802, - "step": 30014 - }, - { - "epoch": 15.653194263363755, - "grad_norm": 1.5000252723693848, - "learning_rate": 7.034673366834172e-05, - "loss": 5.3096, - "step": 30015 - }, - { - "epoch": 15.653715775749674, - "grad_norm": 1.3573576211929321, - "learning_rate": 7.034572864321608e-05, - "loss": 5.1289, - "step": 30016 - }, - { - "epoch": 15.654237288135594, - "grad_norm": 1.5119774341583252, - "learning_rate": 7.034472361809046e-05, - "loss": 5.0959, - "step": 30017 - }, - { - "epoch": 15.654758800521513, - "grad_norm": 1.4813077449798584, - "learning_rate": 7.034371859296482e-05, - "loss": 5.4628, - "step": 30018 - }, - { - "epoch": 15.655280312907431, - "grad_norm": 1.5783803462982178, - "learning_rate": 7.03427135678392e-05, - "loss": 5.2695, - "step": 30019 - }, - { - "epoch": 15.65580182529335, - "grad_norm": 1.5176588296890259, - "learning_rate": 7.034170854271357e-05, - "loss": 5.3559, - "step": 30020 - }, - { - "epoch": 15.65632333767927, - "grad_norm": 1.440812349319458, - "learning_rate": 7.034070351758794e-05, - "loss": 5.4147, - "step": 30021 - }, - { - "epoch": 15.65684485006519, - "grad_norm": 1.381279468536377, - "learning_rate": 7.033969849246231e-05, - "loss": 4.8754, - "step": 30022 - }, - { - "epoch": 15.657366362451109, - "grad_norm": 1.4483320713043213, - "learning_rate": 7.033869346733669e-05, - "loss": 5.7707, - "step": 30023 - }, - { - "epoch": 15.657887874837026, - "grad_norm": 1.2826277017593384, - "learning_rate": 7.033768844221106e-05, - "loss": 4.3647, - "step": 30024 - }, - { - "epoch": 15.658409387222946, - "grad_norm": 1.6106362342834473, - "learning_rate": 7.033668341708543e-05, - "loss": 5.127, - "step": 30025 - }, - { - "epoch": 15.658930899608865, - "grad_norm": 1.4412578344345093, - "learning_rate": 7.03356783919598e-05, - "loss": 5.7045, - "step": 30026 - }, - { - "epoch": 15.659452411994785, - "grad_norm": 1.5877286195755005, - "learning_rate": 7.033467336683417e-05, - "loss": 5.5094, - "step": 30027 - }, - { - "epoch": 15.659973924380704, - "grad_norm": 1.38240385055542, - "learning_rate": 7.033366834170855e-05, - "loss": 5.493, - "step": 30028 - }, - { - "epoch": 15.660495436766624, - "grad_norm": 1.405619502067566, - "learning_rate": 7.033266331658291e-05, - "loss": 5.6836, - "step": 30029 - }, - { - "epoch": 15.661016949152543, - "grad_norm": 1.4457433223724365, - "learning_rate": 7.033165829145729e-05, - "loss": 5.5485, - "step": 30030 - }, - { - "epoch": 15.661538461538461, - "grad_norm": 1.459743857383728, - "learning_rate": 7.033065326633165e-05, - "loss": 5.3881, - "step": 30031 - }, - { - "epoch": 15.66205997392438, - "grad_norm": 1.5080881118774414, - "learning_rate": 7.032964824120603e-05, - "loss": 5.2047, - "step": 30032 - }, - { - "epoch": 15.6625814863103, - "grad_norm": 1.384797215461731, - "learning_rate": 7.032864321608041e-05, - "loss": 5.7627, - "step": 30033 - }, - { - "epoch": 15.66310299869622, - "grad_norm": 1.4035999774932861, - "learning_rate": 7.032763819095479e-05, - "loss": 5.3854, - "step": 30034 - }, - { - "epoch": 15.663624511082139, - "grad_norm": 1.3466969728469849, - "learning_rate": 7.032663316582915e-05, - "loss": 5.1963, - "step": 30035 - }, - { - "epoch": 15.664146023468057, - "grad_norm": 1.3868387937545776, - "learning_rate": 7.032562814070353e-05, - "loss": 4.9735, - "step": 30036 - }, - { - "epoch": 15.664667535853976, - "grad_norm": 1.4800297021865845, - "learning_rate": 7.03246231155779e-05, - "loss": 5.4404, - "step": 30037 - }, - { - "epoch": 15.665189048239895, - "grad_norm": 1.3516308069229126, - "learning_rate": 7.032361809045227e-05, - "loss": 5.6714, - "step": 30038 - }, - { - "epoch": 15.665710560625815, - "grad_norm": 1.5503063201904297, - "learning_rate": 7.032261306532664e-05, - "loss": 5.4439, - "step": 30039 - }, - { - "epoch": 15.666232073011734, - "grad_norm": 1.5086185932159424, - "learning_rate": 7.0321608040201e-05, - "loss": 5.0039, - "step": 30040 - }, - { - "epoch": 15.666753585397654, - "grad_norm": 1.3921794891357422, - "learning_rate": 7.032060301507538e-05, - "loss": 5.6902, - "step": 30041 - }, - { - "epoch": 15.667275097783573, - "grad_norm": 1.4566251039505005, - "learning_rate": 7.031959798994974e-05, - "loss": 5.3333, - "step": 30042 - }, - { - "epoch": 15.667796610169491, - "grad_norm": 1.4193425178527832, - "learning_rate": 7.031859296482412e-05, - "loss": 5.7759, - "step": 30043 - }, - { - "epoch": 15.66831812255541, - "grad_norm": 1.3709461688995361, - "learning_rate": 7.03175879396985e-05, - "loss": 5.7155, - "step": 30044 - }, - { - "epoch": 15.66883963494133, - "grad_norm": 1.3801155090332031, - "learning_rate": 7.031658291457288e-05, - "loss": 5.63, - "step": 30045 - }, - { - "epoch": 15.66936114732725, - "grad_norm": 1.451909065246582, - "learning_rate": 7.031557788944724e-05, - "loss": 5.5007, - "step": 30046 - }, - { - "epoch": 15.669882659713169, - "grad_norm": 1.4451613426208496, - "learning_rate": 7.031457286432162e-05, - "loss": 5.4585, - "step": 30047 - }, - { - "epoch": 15.670404172099087, - "grad_norm": 1.4272741079330444, - "learning_rate": 7.031356783919598e-05, - "loss": 5.7499, - "step": 30048 - }, - { - "epoch": 15.670925684485006, - "grad_norm": 1.3623384237289429, - "learning_rate": 7.031256281407036e-05, - "loss": 5.7812, - "step": 30049 - }, - { - "epoch": 15.671447196870925, - "grad_norm": 1.5727695226669312, - "learning_rate": 7.031155778894472e-05, - "loss": 5.0478, - "step": 30050 - }, - { - "epoch": 15.671968709256845, - "grad_norm": 1.4335187673568726, - "learning_rate": 7.03105527638191e-05, - "loss": 5.6912, - "step": 30051 - }, - { - "epoch": 15.672490221642764, - "grad_norm": 1.4679251909255981, - "learning_rate": 7.030954773869347e-05, - "loss": 5.8462, - "step": 30052 - }, - { - "epoch": 15.673011734028684, - "grad_norm": 1.4858371019363403, - "learning_rate": 7.030854271356784e-05, - "loss": 5.268, - "step": 30053 - }, - { - "epoch": 15.673533246414602, - "grad_norm": 1.565849781036377, - "learning_rate": 7.030753768844222e-05, - "loss": 4.897, - "step": 30054 - }, - { - "epoch": 15.674054758800521, - "grad_norm": 1.4003568887710571, - "learning_rate": 7.030653266331659e-05, - "loss": 5.8525, - "step": 30055 - }, - { - "epoch": 15.67457627118644, - "grad_norm": 1.5551717281341553, - "learning_rate": 7.030552763819096e-05, - "loss": 5.2306, - "step": 30056 - }, - { - "epoch": 15.67509778357236, - "grad_norm": 1.4572714567184448, - "learning_rate": 7.030452261306533e-05, - "loss": 5.17, - "step": 30057 - }, - { - "epoch": 15.67561929595828, - "grad_norm": 1.478036642074585, - "learning_rate": 7.03035175879397e-05, - "loss": 5.2421, - "step": 30058 - }, - { - "epoch": 15.676140808344199, - "grad_norm": 1.5803945064544678, - "learning_rate": 7.030251256281407e-05, - "loss": 5.1637, - "step": 30059 - }, - { - "epoch": 15.676662320730117, - "grad_norm": 1.4095864295959473, - "learning_rate": 7.030150753768845e-05, - "loss": 5.145, - "step": 30060 - }, - { - "epoch": 15.677183833116036, - "grad_norm": 1.631874680519104, - "learning_rate": 7.030050251256281e-05, - "loss": 4.9089, - "step": 30061 - }, - { - "epoch": 15.677705345501955, - "grad_norm": 1.5062364339828491, - "learning_rate": 7.029949748743719e-05, - "loss": 5.4427, - "step": 30062 - }, - { - "epoch": 15.678226857887875, - "grad_norm": 1.4489744901657104, - "learning_rate": 7.029849246231155e-05, - "loss": 5.37, - "step": 30063 - }, - { - "epoch": 15.678748370273794, - "grad_norm": 1.4426242113113403, - "learning_rate": 7.029748743718593e-05, - "loss": 5.3656, - "step": 30064 - }, - { - "epoch": 15.679269882659714, - "grad_norm": 1.4968575239181519, - "learning_rate": 7.029648241206031e-05, - "loss": 5.1945, - "step": 30065 - }, - { - "epoch": 15.679791395045632, - "grad_norm": 1.4142004251480103, - "learning_rate": 7.029547738693467e-05, - "loss": 5.7279, - "step": 30066 - }, - { - "epoch": 15.680312907431551, - "grad_norm": 1.4767961502075195, - "learning_rate": 7.029447236180905e-05, - "loss": 5.3014, - "step": 30067 - }, - { - "epoch": 15.68083441981747, - "grad_norm": 1.4306780099868774, - "learning_rate": 7.029346733668342e-05, - "loss": 5.2876, - "step": 30068 - }, - { - "epoch": 15.68135593220339, - "grad_norm": 1.476169466972351, - "learning_rate": 7.02924623115578e-05, - "loss": 5.2493, - "step": 30069 - }, - { - "epoch": 15.68187744458931, - "grad_norm": 1.5283794403076172, - "learning_rate": 7.029145728643216e-05, - "loss": 5.1722, - "step": 30070 - }, - { - "epoch": 15.682398956975229, - "grad_norm": 1.491104245185852, - "learning_rate": 7.029045226130654e-05, - "loss": 5.1334, - "step": 30071 - }, - { - "epoch": 15.682920469361147, - "grad_norm": 1.72818922996521, - "learning_rate": 7.02894472361809e-05, - "loss": 5.3454, - "step": 30072 - }, - { - "epoch": 15.683441981747066, - "grad_norm": 1.4341708421707153, - "learning_rate": 7.028844221105528e-05, - "loss": 4.7769, - "step": 30073 - }, - { - "epoch": 15.683963494132986, - "grad_norm": 1.4728963375091553, - "learning_rate": 7.028743718592966e-05, - "loss": 4.9205, - "step": 30074 - }, - { - "epoch": 15.684485006518905, - "grad_norm": 1.5121079683303833, - "learning_rate": 7.028643216080403e-05, - "loss": 5.5626, - "step": 30075 - }, - { - "epoch": 15.685006518904824, - "grad_norm": 1.4936178922653198, - "learning_rate": 7.02854271356784e-05, - "loss": 5.5618, - "step": 30076 - }, - { - "epoch": 15.685528031290744, - "grad_norm": 1.515897512435913, - "learning_rate": 7.028442211055278e-05, - "loss": 5.4672, - "step": 30077 - }, - { - "epoch": 15.686049543676662, - "grad_norm": 2.133845806121826, - "learning_rate": 7.028341708542714e-05, - "loss": 5.0213, - "step": 30078 - }, - { - "epoch": 15.686571056062581, - "grad_norm": 1.446158528327942, - "learning_rate": 7.02824120603015e-05, - "loss": 5.3292, - "step": 30079 - }, - { - "epoch": 15.6870925684485, - "grad_norm": 1.6821244955062866, - "learning_rate": 7.028140703517588e-05, - "loss": 5.1643, - "step": 30080 - }, - { - "epoch": 15.68761408083442, - "grad_norm": 1.4575139284133911, - "learning_rate": 7.028040201005025e-05, - "loss": 5.3318, - "step": 30081 - }, - { - "epoch": 15.68813559322034, - "grad_norm": 1.480717658996582, - "learning_rate": 7.027939698492462e-05, - "loss": 5.3023, - "step": 30082 - }, - { - "epoch": 15.688657105606259, - "grad_norm": 1.512333869934082, - "learning_rate": 7.027839195979899e-05, - "loss": 5.2625, - "step": 30083 - }, - { - "epoch": 15.689178617992177, - "grad_norm": 1.5238534212112427, - "learning_rate": 7.027738693467337e-05, - "loss": 5.4947, - "step": 30084 - }, - { - "epoch": 15.689700130378096, - "grad_norm": 1.4411053657531738, - "learning_rate": 7.027638190954774e-05, - "loss": 4.9276, - "step": 30085 - }, - { - "epoch": 15.690221642764016, - "grad_norm": 1.5673589706420898, - "learning_rate": 7.027537688442212e-05, - "loss": 5.4591, - "step": 30086 - }, - { - "epoch": 15.690743155149935, - "grad_norm": 1.546901822090149, - "learning_rate": 7.027437185929649e-05, - "loss": 5.3921, - "step": 30087 - }, - { - "epoch": 15.691264667535854, - "grad_norm": 1.443274974822998, - "learning_rate": 7.027336683417086e-05, - "loss": 5.4802, - "step": 30088 - }, - { - "epoch": 15.691786179921774, - "grad_norm": 1.6030664443969727, - "learning_rate": 7.027236180904523e-05, - "loss": 5.5033, - "step": 30089 - }, - { - "epoch": 15.692307692307692, - "grad_norm": 1.4636894464492798, - "learning_rate": 7.02713567839196e-05, - "loss": 5.1709, - "step": 30090 - }, - { - "epoch": 15.692829204693611, - "grad_norm": 1.4852144718170166, - "learning_rate": 7.027035175879397e-05, - "loss": 5.3215, - "step": 30091 - }, - { - "epoch": 15.69335071707953, - "grad_norm": 1.4966167211532593, - "learning_rate": 7.026934673366834e-05, - "loss": 5.5281, - "step": 30092 - }, - { - "epoch": 15.69387222946545, - "grad_norm": 1.5151972770690918, - "learning_rate": 7.026834170854271e-05, - "loss": 5.2597, - "step": 30093 - }, - { - "epoch": 15.69439374185137, - "grad_norm": 1.4905990362167358, - "learning_rate": 7.026733668341709e-05, - "loss": 5.5019, - "step": 30094 - }, - { - "epoch": 15.694915254237289, - "grad_norm": 1.3769769668579102, - "learning_rate": 7.026633165829147e-05, - "loss": 5.4832, - "step": 30095 - }, - { - "epoch": 15.695436766623207, - "grad_norm": 1.3926029205322266, - "learning_rate": 7.026532663316583e-05, - "loss": 5.5165, - "step": 30096 - }, - { - "epoch": 15.695958279009126, - "grad_norm": 1.3695495128631592, - "learning_rate": 7.026432160804021e-05, - "loss": 5.3511, - "step": 30097 - }, - { - "epoch": 15.696479791395046, - "grad_norm": 1.3098804950714111, - "learning_rate": 7.026331658291458e-05, - "loss": 5.3845, - "step": 30098 - }, - { - "epoch": 15.697001303780965, - "grad_norm": 1.4412856101989746, - "learning_rate": 7.026231155778895e-05, - "loss": 5.3102, - "step": 30099 - }, - { - "epoch": 15.697522816166884, - "grad_norm": 1.56721031665802, - "learning_rate": 7.026130653266332e-05, - "loss": 5.2577, - "step": 30100 - }, - { - "epoch": 15.698044328552804, - "grad_norm": 1.6231889724731445, - "learning_rate": 7.02603015075377e-05, - "loss": 4.7194, - "step": 30101 - }, - { - "epoch": 15.698565840938722, - "grad_norm": 1.4875494241714478, - "learning_rate": 7.025929648241206e-05, - "loss": 5.2453, - "step": 30102 - }, - { - "epoch": 15.699087353324641, - "grad_norm": 1.4730647802352905, - "learning_rate": 7.025829145728644e-05, - "loss": 5.741, - "step": 30103 - }, - { - "epoch": 15.69960886571056, - "grad_norm": 1.4078725576400757, - "learning_rate": 7.02572864321608e-05, - "loss": 5.9139, - "step": 30104 - }, - { - "epoch": 15.70013037809648, - "grad_norm": 1.502928376197815, - "learning_rate": 7.025628140703518e-05, - "loss": 5.3416, - "step": 30105 - }, - { - "epoch": 15.7006518904824, - "grad_norm": 1.4487597942352295, - "learning_rate": 7.025527638190956e-05, - "loss": 5.7301, - "step": 30106 - }, - { - "epoch": 15.701173402868317, - "grad_norm": 1.526648998260498, - "learning_rate": 7.025427135678392e-05, - "loss": 5.0563, - "step": 30107 - }, - { - "epoch": 15.701694915254237, - "grad_norm": 1.458340048789978, - "learning_rate": 7.02532663316583e-05, - "loss": 5.3989, - "step": 30108 - }, - { - "epoch": 15.702216427640156, - "grad_norm": 1.5044630765914917, - "learning_rate": 7.025226130653266e-05, - "loss": 5.5301, - "step": 30109 - }, - { - "epoch": 15.702737940026076, - "grad_norm": 1.3620439767837524, - "learning_rate": 7.025125628140704e-05, - "loss": 5.7877, - "step": 30110 - }, - { - "epoch": 15.703259452411995, - "grad_norm": 1.4689435958862305, - "learning_rate": 7.02502512562814e-05, - "loss": 5.4506, - "step": 30111 - }, - { - "epoch": 15.703780964797915, - "grad_norm": 1.4611653089523315, - "learning_rate": 7.024924623115578e-05, - "loss": 5.1924, - "step": 30112 - }, - { - "epoch": 15.704302477183834, - "grad_norm": 1.5253775119781494, - "learning_rate": 7.024824120603015e-05, - "loss": 5.2959, - "step": 30113 - }, - { - "epoch": 15.704823989569752, - "grad_norm": 1.468432068824768, - "learning_rate": 7.024723618090453e-05, - "loss": 5.2127, - "step": 30114 - }, - { - "epoch": 15.705345501955671, - "grad_norm": 1.5194251537322998, - "learning_rate": 7.02462311557789e-05, - "loss": 5.3093, - "step": 30115 - }, - { - "epoch": 15.70586701434159, - "grad_norm": 1.569901704788208, - "learning_rate": 7.024522613065328e-05, - "loss": 5.6463, - "step": 30116 - }, - { - "epoch": 15.70638852672751, - "grad_norm": 1.4999823570251465, - "learning_rate": 7.024422110552765e-05, - "loss": 5.6806, - "step": 30117 - }, - { - "epoch": 15.70691003911343, - "grad_norm": 1.480406641960144, - "learning_rate": 7.024321608040202e-05, - "loss": 5.3573, - "step": 30118 - }, - { - "epoch": 15.707431551499347, - "grad_norm": 1.4196051359176636, - "learning_rate": 7.024221105527639e-05, - "loss": 5.4391, - "step": 30119 - }, - { - "epoch": 15.707953063885267, - "grad_norm": 1.4991166591644287, - "learning_rate": 7.024120603015075e-05, - "loss": 5.3981, - "step": 30120 - }, - { - "epoch": 15.708474576271186, - "grad_norm": 1.5334124565124512, - "learning_rate": 7.024020100502513e-05, - "loss": 5.1219, - "step": 30121 - }, - { - "epoch": 15.708996088657106, - "grad_norm": 1.4536302089691162, - "learning_rate": 7.02391959798995e-05, - "loss": 5.4865, - "step": 30122 - }, - { - "epoch": 15.709517601043025, - "grad_norm": 1.5017125606536865, - "learning_rate": 7.023819095477387e-05, - "loss": 4.9673, - "step": 30123 - }, - { - "epoch": 15.710039113428945, - "grad_norm": 1.4831936359405518, - "learning_rate": 7.023718592964824e-05, - "loss": 5.1858, - "step": 30124 - }, - { - "epoch": 15.710560625814864, - "grad_norm": 1.379374384880066, - "learning_rate": 7.023618090452261e-05, - "loss": 5.5918, - "step": 30125 - }, - { - "epoch": 15.711082138200782, - "grad_norm": 1.5081045627593994, - "learning_rate": 7.023517587939699e-05, - "loss": 5.1555, - "step": 30126 - }, - { - "epoch": 15.711603650586701, - "grad_norm": 1.414910078048706, - "learning_rate": 7.023417085427137e-05, - "loss": 5.789, - "step": 30127 - }, - { - "epoch": 15.71212516297262, - "grad_norm": 1.4945827722549438, - "learning_rate": 7.023316582914573e-05, - "loss": 5.3886, - "step": 30128 - }, - { - "epoch": 15.71264667535854, - "grad_norm": 1.4862589836120605, - "learning_rate": 7.023216080402011e-05, - "loss": 5.1549, - "step": 30129 - }, - { - "epoch": 15.71316818774446, - "grad_norm": 1.437087059020996, - "learning_rate": 7.023115577889448e-05, - "loss": 5.8176, - "step": 30130 - }, - { - "epoch": 15.713689700130377, - "grad_norm": 1.4808441400527954, - "learning_rate": 7.023015075376885e-05, - "loss": 5.5085, - "step": 30131 - }, - { - "epoch": 15.714211212516297, - "grad_norm": 1.364935040473938, - "learning_rate": 7.022914572864322e-05, - "loss": 5.461, - "step": 30132 - }, - { - "epoch": 15.714732724902216, - "grad_norm": 1.5012834072113037, - "learning_rate": 7.022814070351758e-05, - "loss": 5.357, - "step": 30133 - }, - { - "epoch": 15.715254237288136, - "grad_norm": 1.3821066617965698, - "learning_rate": 7.022713567839196e-05, - "loss": 5.3101, - "step": 30134 - }, - { - "epoch": 15.715775749674055, - "grad_norm": 1.281141996383667, - "learning_rate": 7.022613065326632e-05, - "loss": 5.901, - "step": 30135 - }, - { - "epoch": 15.716297262059975, - "grad_norm": 1.3726869821548462, - "learning_rate": 7.02251256281407e-05, - "loss": 5.5845, - "step": 30136 - }, - { - "epoch": 15.716818774445892, - "grad_norm": 1.4535654783248901, - "learning_rate": 7.022412060301508e-05, - "loss": 5.5668, - "step": 30137 - }, - { - "epoch": 15.717340286831812, - "grad_norm": 1.486040472984314, - "learning_rate": 7.022311557788946e-05, - "loss": 5.7832, - "step": 30138 - }, - { - "epoch": 15.717861799217731, - "grad_norm": 1.5427513122558594, - "learning_rate": 7.022211055276382e-05, - "loss": 5.0528, - "step": 30139 - }, - { - "epoch": 15.71838331160365, - "grad_norm": 1.3634629249572754, - "learning_rate": 7.02211055276382e-05, - "loss": 4.7972, - "step": 30140 - }, - { - "epoch": 15.71890482398957, - "grad_norm": 1.4050172567367554, - "learning_rate": 7.022010050251256e-05, - "loss": 5.7308, - "step": 30141 - }, - { - "epoch": 15.71942633637549, - "grad_norm": 1.4565869569778442, - "learning_rate": 7.021909547738694e-05, - "loss": 5.2719, - "step": 30142 - }, - { - "epoch": 15.719947848761407, - "grad_norm": 1.4980944395065308, - "learning_rate": 7.02180904522613e-05, - "loss": 5.7321, - "step": 30143 - }, - { - "epoch": 15.720469361147327, - "grad_norm": 1.4566271305084229, - "learning_rate": 7.021708542713568e-05, - "loss": 5.284, - "step": 30144 - }, - { - "epoch": 15.720990873533246, - "grad_norm": 1.5803946256637573, - "learning_rate": 7.021608040201005e-05, - "loss": 5.2503, - "step": 30145 - }, - { - "epoch": 15.721512385919166, - "grad_norm": 1.444300651550293, - "learning_rate": 7.021507537688443e-05, - "loss": 5.74, - "step": 30146 - }, - { - "epoch": 15.722033898305085, - "grad_norm": 1.4602336883544922, - "learning_rate": 7.02140703517588e-05, - "loss": 5.9577, - "step": 30147 - }, - { - "epoch": 15.722555410691005, - "grad_norm": 1.4776623249053955, - "learning_rate": 7.021306532663317e-05, - "loss": 5.7194, - "step": 30148 - }, - { - "epoch": 15.723076923076922, - "grad_norm": 1.5597072839736938, - "learning_rate": 7.021206030150755e-05, - "loss": 5.1718, - "step": 30149 - }, - { - "epoch": 15.723598435462842, - "grad_norm": 1.5934633016586304, - "learning_rate": 7.021105527638191e-05, - "loss": 5.3503, - "step": 30150 - }, - { - "epoch": 15.724119947848761, - "grad_norm": 1.513995885848999, - "learning_rate": 7.021005025125629e-05, - "loss": 5.1708, - "step": 30151 - }, - { - "epoch": 15.72464146023468, - "grad_norm": 1.4050647020339966, - "learning_rate": 7.020904522613065e-05, - "loss": 5.5868, - "step": 30152 - }, - { - "epoch": 15.7251629726206, - "grad_norm": 1.4873361587524414, - "learning_rate": 7.020804020100503e-05, - "loss": 5.2993, - "step": 30153 - }, - { - "epoch": 15.72568448500652, - "grad_norm": 1.5016478300094604, - "learning_rate": 7.02070351758794e-05, - "loss": 5.6047, - "step": 30154 - }, - { - "epoch": 15.726205997392437, - "grad_norm": 1.4087773561477661, - "learning_rate": 7.020603015075377e-05, - "loss": 5.6429, - "step": 30155 - }, - { - "epoch": 15.726727509778357, - "grad_norm": 1.486862301826477, - "learning_rate": 7.020502512562814e-05, - "loss": 5.0503, - "step": 30156 - }, - { - "epoch": 15.727249022164276, - "grad_norm": 1.3419684171676636, - "learning_rate": 7.020402010050251e-05, - "loss": 5.8188, - "step": 30157 - }, - { - "epoch": 15.727770534550196, - "grad_norm": 1.3484305143356323, - "learning_rate": 7.020301507537689e-05, - "loss": 5.7336, - "step": 30158 - }, - { - "epoch": 15.728292046936115, - "grad_norm": 1.397395372390747, - "learning_rate": 7.020201005025126e-05, - "loss": 5.7866, - "step": 30159 - }, - { - "epoch": 15.728813559322035, - "grad_norm": 1.437585711479187, - "learning_rate": 7.020100502512563e-05, - "loss": 5.0045, - "step": 30160 - }, - { - "epoch": 15.729335071707952, - "grad_norm": 1.348577618598938, - "learning_rate": 7.02e-05, - "loss": 5.7505, - "step": 30161 - }, - { - "epoch": 15.729856584093872, - "grad_norm": 1.6531016826629639, - "learning_rate": 7.019899497487438e-05, - "loss": 4.7938, - "step": 30162 - }, - { - "epoch": 15.730378096479791, - "grad_norm": 1.3978906869888306, - "learning_rate": 7.019798994974874e-05, - "loss": 5.3425, - "step": 30163 - }, - { - "epoch": 15.73089960886571, - "grad_norm": 1.4985617399215698, - "learning_rate": 7.019698492462312e-05, - "loss": 5.2903, - "step": 30164 - }, - { - "epoch": 15.73142112125163, - "grad_norm": 1.495753526687622, - "learning_rate": 7.019597989949748e-05, - "loss": 5.2949, - "step": 30165 - }, - { - "epoch": 15.73194263363755, - "grad_norm": 1.5717226266860962, - "learning_rate": 7.019497487437186e-05, - "loss": 5.5327, - "step": 30166 - }, - { - "epoch": 15.732464146023467, - "grad_norm": 1.5810070037841797, - "learning_rate": 7.019396984924624e-05, - "loss": 5.518, - "step": 30167 - }, - { - "epoch": 15.732985658409387, - "grad_norm": 1.4294079542160034, - "learning_rate": 7.019296482412062e-05, - "loss": 5.162, - "step": 30168 - }, - { - "epoch": 15.733507170795306, - "grad_norm": 1.4951432943344116, - "learning_rate": 7.019195979899498e-05, - "loss": 5.3655, - "step": 30169 - }, - { - "epoch": 15.734028683181226, - "grad_norm": 1.3723925352096558, - "learning_rate": 7.019095477386936e-05, - "loss": 5.7716, - "step": 30170 - }, - { - "epoch": 15.734550195567145, - "grad_norm": 1.3757320642471313, - "learning_rate": 7.018994974874372e-05, - "loss": 5.5961, - "step": 30171 - }, - { - "epoch": 15.735071707953065, - "grad_norm": 1.527732491493225, - "learning_rate": 7.018894472361809e-05, - "loss": 5.2398, - "step": 30172 - }, - { - "epoch": 15.735593220338982, - "grad_norm": 1.4175294637680054, - "learning_rate": 7.018793969849246e-05, - "loss": 5.2482, - "step": 30173 - }, - { - "epoch": 15.736114732724902, - "grad_norm": 1.5223379135131836, - "learning_rate": 7.018693467336683e-05, - "loss": 4.8564, - "step": 30174 - }, - { - "epoch": 15.736636245110821, - "grad_norm": 1.5282152891159058, - "learning_rate": 7.018592964824121e-05, - "loss": 5.3646, - "step": 30175 - }, - { - "epoch": 15.73715775749674, - "grad_norm": 1.4716777801513672, - "learning_rate": 7.018492462311557e-05, - "loss": 5.467, - "step": 30176 - }, - { - "epoch": 15.73767926988266, - "grad_norm": 1.4521106481552124, - "learning_rate": 7.018391959798995e-05, - "loss": 5.671, - "step": 30177 - }, - { - "epoch": 15.73820078226858, - "grad_norm": 1.5095267295837402, - "learning_rate": 7.018291457286433e-05, - "loss": 4.8514, - "step": 30178 - }, - { - "epoch": 15.738722294654497, - "grad_norm": 1.5374425649642944, - "learning_rate": 7.01819095477387e-05, - "loss": 5.4933, - "step": 30179 - }, - { - "epoch": 15.739243807040417, - "grad_norm": 1.4597798585891724, - "learning_rate": 7.018090452261307e-05, - "loss": 5.2971, - "step": 30180 - }, - { - "epoch": 15.739765319426336, - "grad_norm": 1.4977372884750366, - "learning_rate": 7.017989949748745e-05, - "loss": 5.3486, - "step": 30181 - }, - { - "epoch": 15.740286831812256, - "grad_norm": 1.4383758306503296, - "learning_rate": 7.017889447236181e-05, - "loss": 5.6902, - "step": 30182 - }, - { - "epoch": 15.740808344198175, - "grad_norm": 1.5290420055389404, - "learning_rate": 7.017788944723619e-05, - "loss": 5.3454, - "step": 30183 - }, - { - "epoch": 15.741329856584095, - "grad_norm": 1.5572789907455444, - "learning_rate": 7.017688442211055e-05, - "loss": 5.4163, - "step": 30184 - }, - { - "epoch": 15.741851368970012, - "grad_norm": 1.4298185110092163, - "learning_rate": 7.017587939698492e-05, - "loss": 5.234, - "step": 30185 - }, - { - "epoch": 15.742372881355932, - "grad_norm": 1.3295146226882935, - "learning_rate": 7.01748743718593e-05, - "loss": 5.9275, - "step": 30186 - }, - { - "epoch": 15.742894393741851, - "grad_norm": 1.4323266744613647, - "learning_rate": 7.017386934673367e-05, - "loss": 5.6231, - "step": 30187 - }, - { - "epoch": 15.74341590612777, - "grad_norm": 1.5963854789733887, - "learning_rate": 7.017286432160805e-05, - "loss": 4.8824, - "step": 30188 - }, - { - "epoch": 15.74393741851369, - "grad_norm": 1.3458635807037354, - "learning_rate": 7.017185929648242e-05, - "loss": 5.7491, - "step": 30189 - }, - { - "epoch": 15.74445893089961, - "grad_norm": 1.4752349853515625, - "learning_rate": 7.017085427135679e-05, - "loss": 5.607, - "step": 30190 - }, - { - "epoch": 15.744980443285527, - "grad_norm": 1.4376788139343262, - "learning_rate": 7.016984924623116e-05, - "loss": 5.6167, - "step": 30191 - }, - { - "epoch": 15.745501955671447, - "grad_norm": 1.5190824270248413, - "learning_rate": 7.016884422110554e-05, - "loss": 5.164, - "step": 30192 - }, - { - "epoch": 15.746023468057366, - "grad_norm": 1.416258454322815, - "learning_rate": 7.01678391959799e-05, - "loss": 5.3686, - "step": 30193 - }, - { - "epoch": 15.746544980443286, - "grad_norm": 1.5090930461883545, - "learning_rate": 7.016683417085428e-05, - "loss": 5.5298, - "step": 30194 - }, - { - "epoch": 15.747066492829205, - "grad_norm": 1.510217308998108, - "learning_rate": 7.016582914572864e-05, - "loss": 4.8194, - "step": 30195 - }, - { - "epoch": 15.747588005215125, - "grad_norm": 1.4666091203689575, - "learning_rate": 7.016482412060302e-05, - "loss": 5.7695, - "step": 30196 - }, - { - "epoch": 15.748109517601042, - "grad_norm": 1.5803765058517456, - "learning_rate": 7.016381909547738e-05, - "loss": 4.9248, - "step": 30197 - }, - { - "epoch": 15.748631029986962, - "grad_norm": 1.4112569093704224, - "learning_rate": 7.016281407035176e-05, - "loss": 5.0087, - "step": 30198 - }, - { - "epoch": 15.749152542372881, - "grad_norm": 1.4698779582977295, - "learning_rate": 7.016180904522614e-05, - "loss": 5.3277, - "step": 30199 - }, - { - "epoch": 15.7496740547588, - "grad_norm": 1.4512596130371094, - "learning_rate": 7.01608040201005e-05, - "loss": 5.5048, - "step": 30200 - }, - { - "epoch": 15.75019556714472, - "grad_norm": 1.3966971635818481, - "learning_rate": 7.015979899497488e-05, - "loss": 5.6992, - "step": 30201 - }, - { - "epoch": 15.750717079530638, - "grad_norm": 1.435630202293396, - "learning_rate": 7.015879396984925e-05, - "loss": 5.341, - "step": 30202 - }, - { - "epoch": 15.751238591916557, - "grad_norm": 1.3827801942825317, - "learning_rate": 7.015778894472362e-05, - "loss": 5.1577, - "step": 30203 - }, - { - "epoch": 15.751760104302477, - "grad_norm": 1.4235506057739258, - "learning_rate": 7.015678391959799e-05, - "loss": 5.4024, - "step": 30204 - }, - { - "epoch": 15.752281616688396, - "grad_norm": 1.44083833694458, - "learning_rate": 7.015577889447237e-05, - "loss": 4.8322, - "step": 30205 - }, - { - "epoch": 15.752803129074316, - "grad_norm": 1.4391957521438599, - "learning_rate": 7.015477386934673e-05, - "loss": 4.8388, - "step": 30206 - }, - { - "epoch": 15.753324641460235, - "grad_norm": 1.4313545227050781, - "learning_rate": 7.015376884422111e-05, - "loss": 5.0861, - "step": 30207 - }, - { - "epoch": 15.753846153846155, - "grad_norm": 1.5501798391342163, - "learning_rate": 7.015276381909549e-05, - "loss": 4.9923, - "step": 30208 - }, - { - "epoch": 15.754367666232072, - "grad_norm": 1.4115206003189087, - "learning_rate": 7.015175879396986e-05, - "loss": 5.3569, - "step": 30209 - }, - { - "epoch": 15.754889178617992, - "grad_norm": 1.3633978366851807, - "learning_rate": 7.015075376884423e-05, - "loss": 5.4291, - "step": 30210 - }, - { - "epoch": 15.755410691003911, - "grad_norm": 1.4322537183761597, - "learning_rate": 7.01497487437186e-05, - "loss": 4.844, - "step": 30211 - }, - { - "epoch": 15.75593220338983, - "grad_norm": 1.370767593383789, - "learning_rate": 7.014874371859297e-05, - "loss": 5.597, - "step": 30212 - }, - { - "epoch": 15.75645371577575, - "grad_norm": 1.5235148668289185, - "learning_rate": 7.014773869346733e-05, - "loss": 5.3128, - "step": 30213 - }, - { - "epoch": 15.756975228161668, - "grad_norm": 1.5164039134979248, - "learning_rate": 7.014673366834171e-05, - "loss": 5.1904, - "step": 30214 - }, - { - "epoch": 15.757496740547587, - "grad_norm": 1.426954984664917, - "learning_rate": 7.014572864321608e-05, - "loss": 5.1578, - "step": 30215 - }, - { - "epoch": 15.758018252933507, - "grad_norm": 1.465244174003601, - "learning_rate": 7.014472361809045e-05, - "loss": 5.2268, - "step": 30216 - }, - { - "epoch": 15.758539765319426, - "grad_norm": 1.4287030696868896, - "learning_rate": 7.014371859296482e-05, - "loss": 5.0801, - "step": 30217 - }, - { - "epoch": 15.759061277705346, - "grad_norm": 1.5752320289611816, - "learning_rate": 7.01427135678392e-05, - "loss": 5.6727, - "step": 30218 - }, - { - "epoch": 15.759582790091265, - "grad_norm": 1.5236029624938965, - "learning_rate": 7.014170854271357e-05, - "loss": 5.4103, - "step": 30219 - }, - { - "epoch": 15.760104302477185, - "grad_norm": 1.4693915843963623, - "learning_rate": 7.014070351758795e-05, - "loss": 5.785, - "step": 30220 - }, - { - "epoch": 15.760625814863102, - "grad_norm": 1.4478628635406494, - "learning_rate": 7.013969849246232e-05, - "loss": 5.7221, - "step": 30221 - }, - { - "epoch": 15.761147327249022, - "grad_norm": 1.5109699964523315, - "learning_rate": 7.01386934673367e-05, - "loss": 5.4545, - "step": 30222 - }, - { - "epoch": 15.761668839634941, - "grad_norm": 1.5151166915893555, - "learning_rate": 7.013768844221106e-05, - "loss": 5.2182, - "step": 30223 - }, - { - "epoch": 15.76219035202086, - "grad_norm": 1.4266761541366577, - "learning_rate": 7.013668341708544e-05, - "loss": 5.2744, - "step": 30224 - }, - { - "epoch": 15.76271186440678, - "grad_norm": 1.4167400598526, - "learning_rate": 7.01356783919598e-05, - "loss": 5.2544, - "step": 30225 - }, - { - "epoch": 15.763233376792698, - "grad_norm": 1.5913041830062866, - "learning_rate": 7.013467336683416e-05, - "loss": 5.3824, - "step": 30226 - }, - { - "epoch": 15.763754889178617, - "grad_norm": 1.4887992143630981, - "learning_rate": 7.013366834170854e-05, - "loss": 5.6152, - "step": 30227 - }, - { - "epoch": 15.764276401564537, - "grad_norm": 1.49791419506073, - "learning_rate": 7.013266331658292e-05, - "loss": 5.4871, - "step": 30228 - }, - { - "epoch": 15.764797913950456, - "grad_norm": 1.4867668151855469, - "learning_rate": 7.01316582914573e-05, - "loss": 5.2592, - "step": 30229 - }, - { - "epoch": 15.765319426336376, - "grad_norm": 1.5742599964141846, - "learning_rate": 7.013065326633166e-05, - "loss": 5.2856, - "step": 30230 - }, - { - "epoch": 15.765840938722295, - "grad_norm": 1.4172331094741821, - "learning_rate": 7.012964824120604e-05, - "loss": 5.6812, - "step": 30231 - }, - { - "epoch": 15.766362451108213, - "grad_norm": 1.443945288658142, - "learning_rate": 7.01286432160804e-05, - "loss": 5.4395, - "step": 30232 - }, - { - "epoch": 15.766883963494132, - "grad_norm": 1.770742654800415, - "learning_rate": 7.012763819095478e-05, - "loss": 5.3092, - "step": 30233 - }, - { - "epoch": 15.767405475880052, - "grad_norm": 1.455727458000183, - "learning_rate": 7.012663316582915e-05, - "loss": 5.6324, - "step": 30234 - }, - { - "epoch": 15.767926988265971, - "grad_norm": 1.4393950700759888, - "learning_rate": 7.012562814070352e-05, - "loss": 5.4127, - "step": 30235 - }, - { - "epoch": 15.76844850065189, - "grad_norm": 1.4052542448043823, - "learning_rate": 7.012462311557789e-05, - "loss": 5.6617, - "step": 30236 - }, - { - "epoch": 15.76897001303781, - "grad_norm": 1.4439090490341187, - "learning_rate": 7.012361809045227e-05, - "loss": 5.6982, - "step": 30237 - }, - { - "epoch": 15.769491525423728, - "grad_norm": 1.579169511795044, - "learning_rate": 7.012261306532663e-05, - "loss": 4.7904, - "step": 30238 - }, - { - "epoch": 15.770013037809647, - "grad_norm": 1.7143194675445557, - "learning_rate": 7.012160804020101e-05, - "loss": 5.1403, - "step": 30239 - }, - { - "epoch": 15.770534550195567, - "grad_norm": 1.6813476085662842, - "learning_rate": 7.012060301507539e-05, - "loss": 5.4355, - "step": 30240 - }, - { - "epoch": 15.771056062581486, - "grad_norm": 1.523077368736267, - "learning_rate": 7.011959798994975e-05, - "loss": 5.1652, - "step": 30241 - }, - { - "epoch": 15.771577574967406, - "grad_norm": 1.5805588960647583, - "learning_rate": 7.011859296482413e-05, - "loss": 5.2963, - "step": 30242 - }, - { - "epoch": 15.772099087353325, - "grad_norm": 1.6018493175506592, - "learning_rate": 7.011758793969849e-05, - "loss": 5.2192, - "step": 30243 - }, - { - "epoch": 15.772620599739243, - "grad_norm": 1.6076209545135498, - "learning_rate": 7.011658291457287e-05, - "loss": 5.6409, - "step": 30244 - }, - { - "epoch": 15.773142112125162, - "grad_norm": 1.5822476148605347, - "learning_rate": 7.011557788944723e-05, - "loss": 5.0721, - "step": 30245 - }, - { - "epoch": 15.773663624511082, - "grad_norm": 1.4780622720718384, - "learning_rate": 7.011457286432161e-05, - "loss": 5.1289, - "step": 30246 - }, - { - "epoch": 15.774185136897001, - "grad_norm": 1.583699345588684, - "learning_rate": 7.011356783919598e-05, - "loss": 5.3503, - "step": 30247 - }, - { - "epoch": 15.77470664928292, - "grad_norm": 1.3723703622817993, - "learning_rate": 7.011256281407035e-05, - "loss": 5.8964, - "step": 30248 - }, - { - "epoch": 15.77522816166884, - "grad_norm": 1.5845354795455933, - "learning_rate": 7.011155778894473e-05, - "loss": 5.2421, - "step": 30249 - }, - { - "epoch": 15.775749674054758, - "grad_norm": 1.4369146823883057, - "learning_rate": 7.011055276381911e-05, - "loss": 5.6531, - "step": 30250 - }, - { - "epoch": 15.776271186440677, - "grad_norm": 1.5074659585952759, - "learning_rate": 7.010954773869347e-05, - "loss": 5.274, - "step": 30251 - }, - { - "epoch": 15.776792698826597, - "grad_norm": 1.5115385055541992, - "learning_rate": 7.010854271356784e-05, - "loss": 5.2251, - "step": 30252 - }, - { - "epoch": 15.777314211212516, - "grad_norm": 1.4266650676727295, - "learning_rate": 7.010753768844222e-05, - "loss": 5.2512, - "step": 30253 - }, - { - "epoch": 15.777835723598436, - "grad_norm": 1.4803398847579956, - "learning_rate": 7.010653266331658e-05, - "loss": 5.5569, - "step": 30254 - }, - { - "epoch": 15.778357235984355, - "grad_norm": 1.4643216133117676, - "learning_rate": 7.010552763819096e-05, - "loss": 5.5092, - "step": 30255 - }, - { - "epoch": 15.778878748370273, - "grad_norm": 1.4863308668136597, - "learning_rate": 7.010452261306532e-05, - "loss": 5.3568, - "step": 30256 - }, - { - "epoch": 15.779400260756193, - "grad_norm": 1.4342734813690186, - "learning_rate": 7.01035175879397e-05, - "loss": 5.7933, - "step": 30257 - }, - { - "epoch": 15.779921773142112, - "grad_norm": 1.4146156311035156, - "learning_rate": 7.010251256281407e-05, - "loss": 5.8327, - "step": 30258 - }, - { - "epoch": 15.780443285528031, - "grad_norm": 1.4502081871032715, - "learning_rate": 7.010150753768844e-05, - "loss": 5.606, - "step": 30259 - }, - { - "epoch": 15.780964797913951, - "grad_norm": 1.4534488916397095, - "learning_rate": 7.010050251256282e-05, - "loss": 5.5607, - "step": 30260 - }, - { - "epoch": 15.78148631029987, - "grad_norm": 1.457743525505066, - "learning_rate": 7.00994974874372e-05, - "loss": 5.5754, - "step": 30261 - }, - { - "epoch": 15.782007822685788, - "grad_norm": 1.4803214073181152, - "learning_rate": 7.009849246231156e-05, - "loss": 5.47, - "step": 30262 - }, - { - "epoch": 15.782529335071708, - "grad_norm": 1.5743012428283691, - "learning_rate": 7.009748743718594e-05, - "loss": 5.3209, - "step": 30263 - }, - { - "epoch": 15.783050847457627, - "grad_norm": 1.4814375638961792, - "learning_rate": 7.00964824120603e-05, - "loss": 5.7683, - "step": 30264 - }, - { - "epoch": 15.783572359843546, - "grad_norm": 1.4395512342453003, - "learning_rate": 7.009547738693467e-05, - "loss": 5.2735, - "step": 30265 - }, - { - "epoch": 15.784093872229466, - "grad_norm": 1.5958936214447021, - "learning_rate": 7.009447236180905e-05, - "loss": 4.9997, - "step": 30266 - }, - { - "epoch": 15.784615384615385, - "grad_norm": 1.4827110767364502, - "learning_rate": 7.009346733668341e-05, - "loss": 5.1913, - "step": 30267 - }, - { - "epoch": 15.785136897001303, - "grad_norm": 1.4316502809524536, - "learning_rate": 7.009246231155779e-05, - "loss": 5.1399, - "step": 30268 - }, - { - "epoch": 15.785658409387223, - "grad_norm": 1.4640116691589355, - "learning_rate": 7.009145728643217e-05, - "loss": 5.7357, - "step": 30269 - }, - { - "epoch": 15.786179921773142, - "grad_norm": 1.3993936777114868, - "learning_rate": 7.009045226130655e-05, - "loss": 5.2903, - "step": 30270 - }, - { - "epoch": 15.786701434159061, - "grad_norm": 1.4506365060806274, - "learning_rate": 7.008944723618091e-05, - "loss": 5.2692, - "step": 30271 - }, - { - "epoch": 15.787222946544981, - "grad_norm": 1.4127753973007202, - "learning_rate": 7.008844221105529e-05, - "loss": 5.5118, - "step": 30272 - }, - { - "epoch": 15.7877444589309, - "grad_norm": 1.3579820394515991, - "learning_rate": 7.008743718592965e-05, - "loss": 5.153, - "step": 30273 - }, - { - "epoch": 15.788265971316818, - "grad_norm": 1.4421052932739258, - "learning_rate": 7.008643216080403e-05, - "loss": 4.9209, - "step": 30274 - }, - { - "epoch": 15.788787483702738, - "grad_norm": 1.3605066537857056, - "learning_rate": 7.00854271356784e-05, - "loss": 5.5654, - "step": 30275 - }, - { - "epoch": 15.789308996088657, - "grad_norm": 1.4109781980514526, - "learning_rate": 7.008442211055277e-05, - "loss": 5.3318, - "step": 30276 - }, - { - "epoch": 15.789830508474576, - "grad_norm": 1.5437045097351074, - "learning_rate": 7.008341708542714e-05, - "loss": 5.061, - "step": 30277 - }, - { - "epoch": 15.790352020860496, - "grad_norm": 1.4617958068847656, - "learning_rate": 7.00824120603015e-05, - "loss": 5.4884, - "step": 30278 - }, - { - "epoch": 15.790873533246415, - "grad_norm": 1.4619839191436768, - "learning_rate": 7.008140703517588e-05, - "loss": 5.3513, - "step": 30279 - }, - { - "epoch": 15.791395045632333, - "grad_norm": 1.4338233470916748, - "learning_rate": 7.008040201005026e-05, - "loss": 5.2349, - "step": 30280 - }, - { - "epoch": 15.791916558018253, - "grad_norm": 1.573731780052185, - "learning_rate": 7.007939698492463e-05, - "loss": 4.7327, - "step": 30281 - }, - { - "epoch": 15.792438070404172, - "grad_norm": 1.5923352241516113, - "learning_rate": 7.0078391959799e-05, - "loss": 5.4223, - "step": 30282 - }, - { - "epoch": 15.792959582790091, - "grad_norm": 1.5130772590637207, - "learning_rate": 7.007738693467338e-05, - "loss": 4.702, - "step": 30283 - }, - { - "epoch": 15.793481095176011, - "grad_norm": 1.4688860177993774, - "learning_rate": 7.007638190954774e-05, - "loss": 5.3455, - "step": 30284 - }, - { - "epoch": 15.79400260756193, - "grad_norm": 1.4514305591583252, - "learning_rate": 7.007537688442212e-05, - "loss": 5.5233, - "step": 30285 - }, - { - "epoch": 15.794524119947848, - "grad_norm": 1.5359165668487549, - "learning_rate": 7.007437185929648e-05, - "loss": 5.3806, - "step": 30286 - }, - { - "epoch": 15.795045632333768, - "grad_norm": 1.5345733165740967, - "learning_rate": 7.007336683417086e-05, - "loss": 4.7766, - "step": 30287 - }, - { - "epoch": 15.795567144719687, - "grad_norm": 1.4962658882141113, - "learning_rate": 7.007236180904522e-05, - "loss": 5.6989, - "step": 30288 - }, - { - "epoch": 15.796088657105607, - "grad_norm": 1.4354151487350464, - "learning_rate": 7.00713567839196e-05, - "loss": 5.7055, - "step": 30289 - }, - { - "epoch": 15.796610169491526, - "grad_norm": 1.4488197565078735, - "learning_rate": 7.007035175879398e-05, - "loss": 5.5342, - "step": 30290 - }, - { - "epoch": 15.797131681877445, - "grad_norm": 1.547020673751831, - "learning_rate": 7.006934673366834e-05, - "loss": 5.0435, - "step": 30291 - }, - { - "epoch": 15.797653194263363, - "grad_norm": 1.4433211088180542, - "learning_rate": 7.006834170854272e-05, - "loss": 5.1811, - "step": 30292 - }, - { - "epoch": 15.798174706649283, - "grad_norm": 1.5391932725906372, - "learning_rate": 7.006733668341709e-05, - "loss": 5.3777, - "step": 30293 - }, - { - "epoch": 15.798696219035202, - "grad_norm": 1.5522042512893677, - "learning_rate": 7.006633165829146e-05, - "loss": 5.3493, - "step": 30294 - }, - { - "epoch": 15.799217731421122, - "grad_norm": 1.396227478981018, - "learning_rate": 7.006532663316583e-05, - "loss": 5.1243, - "step": 30295 - }, - { - "epoch": 15.799739243807041, - "grad_norm": 1.583461046218872, - "learning_rate": 7.00643216080402e-05, - "loss": 5.2669, - "step": 30296 - }, - { - "epoch": 15.800260756192959, - "grad_norm": 1.489331841468811, - "learning_rate": 7.006331658291457e-05, - "loss": 5.3373, - "step": 30297 - }, - { - "epoch": 15.800782268578878, - "grad_norm": 1.3951588869094849, - "learning_rate": 7.006231155778895e-05, - "loss": 5.582, - "step": 30298 - }, - { - "epoch": 15.801303780964798, - "grad_norm": 1.397723913192749, - "learning_rate": 7.006130653266331e-05, - "loss": 5.4156, - "step": 30299 - }, - { - "epoch": 15.801825293350717, - "grad_norm": 1.5721948146820068, - "learning_rate": 7.006030150753769e-05, - "loss": 5.3557, - "step": 30300 - }, - { - "epoch": 15.802346805736637, - "grad_norm": 1.460800290107727, - "learning_rate": 7.005929648241207e-05, - "loss": 5.4339, - "step": 30301 - }, - { - "epoch": 15.802868318122556, - "grad_norm": 1.3733633756637573, - "learning_rate": 7.005829145728645e-05, - "loss": 5.776, - "step": 30302 - }, - { - "epoch": 15.803389830508475, - "grad_norm": 1.366523265838623, - "learning_rate": 7.005728643216081e-05, - "loss": 5.6456, - "step": 30303 - }, - { - "epoch": 15.803911342894393, - "grad_norm": 1.5622129440307617, - "learning_rate": 7.005628140703519e-05, - "loss": 5.3028, - "step": 30304 - }, - { - "epoch": 15.804432855280313, - "grad_norm": 1.323290228843689, - "learning_rate": 7.005527638190955e-05, - "loss": 5.6053, - "step": 30305 - }, - { - "epoch": 15.804954367666232, - "grad_norm": 1.6503397226333618, - "learning_rate": 7.005427135678392e-05, - "loss": 4.9613, - "step": 30306 - }, - { - "epoch": 15.805475880052152, - "grad_norm": 1.5183258056640625, - "learning_rate": 7.00532663316583e-05, - "loss": 5.176, - "step": 30307 - }, - { - "epoch": 15.805997392438071, - "grad_norm": 1.4939725399017334, - "learning_rate": 7.005226130653266e-05, - "loss": 5.2501, - "step": 30308 - }, - { - "epoch": 15.806518904823989, - "grad_norm": 1.5180881023406982, - "learning_rate": 7.005125628140704e-05, - "loss": 5.6875, - "step": 30309 - }, - { - "epoch": 15.807040417209908, - "grad_norm": 1.5132060050964355, - "learning_rate": 7.00502512562814e-05, - "loss": 5.9217, - "step": 30310 - }, - { - "epoch": 15.807561929595828, - "grad_norm": 1.427817702293396, - "learning_rate": 7.004924623115578e-05, - "loss": 5.5494, - "step": 30311 - }, - { - "epoch": 15.808083441981747, - "grad_norm": 1.5039196014404297, - "learning_rate": 7.004824120603016e-05, - "loss": 5.0666, - "step": 30312 - }, - { - "epoch": 15.808604954367667, - "grad_norm": 1.46401846408844, - "learning_rate": 7.004723618090453e-05, - "loss": 4.9176, - "step": 30313 - }, - { - "epoch": 15.809126466753586, - "grad_norm": 1.4994993209838867, - "learning_rate": 7.00462311557789e-05, - "loss": 5.2735, - "step": 30314 - }, - { - "epoch": 15.809647979139505, - "grad_norm": 1.496349573135376, - "learning_rate": 7.004522613065328e-05, - "loss": 5.1483, - "step": 30315 - }, - { - "epoch": 15.810169491525423, - "grad_norm": 1.4826056957244873, - "learning_rate": 7.004422110552764e-05, - "loss": 5.2919, - "step": 30316 - }, - { - "epoch": 15.810691003911343, - "grad_norm": 1.3570637702941895, - "learning_rate": 7.004321608040202e-05, - "loss": 5.1636, - "step": 30317 - }, - { - "epoch": 15.811212516297262, - "grad_norm": 1.5748114585876465, - "learning_rate": 7.004221105527638e-05, - "loss": 5.3891, - "step": 30318 - }, - { - "epoch": 15.811734028683182, - "grad_norm": 1.402992606163025, - "learning_rate": 7.004120603015075e-05, - "loss": 5.6146, - "step": 30319 - }, - { - "epoch": 15.812255541069101, - "grad_norm": 1.4344443082809448, - "learning_rate": 7.004020100502512e-05, - "loss": 4.8849, - "step": 30320 - }, - { - "epoch": 15.812777053455019, - "grad_norm": 1.518977403640747, - "learning_rate": 7.00391959798995e-05, - "loss": 5.667, - "step": 30321 - }, - { - "epoch": 15.813298565840938, - "grad_norm": 1.4516323804855347, - "learning_rate": 7.003819095477388e-05, - "loss": 5.0646, - "step": 30322 - }, - { - "epoch": 15.813820078226858, - "grad_norm": 1.407728672027588, - "learning_rate": 7.003718592964824e-05, - "loss": 5.6019, - "step": 30323 - }, - { - "epoch": 15.814341590612777, - "grad_norm": 1.5073602199554443, - "learning_rate": 7.003618090452262e-05, - "loss": 5.2507, - "step": 30324 - }, - { - "epoch": 15.814863102998697, - "grad_norm": 1.479079246520996, - "learning_rate": 7.003517587939699e-05, - "loss": 5.2116, - "step": 30325 - }, - { - "epoch": 15.815384615384616, - "grad_norm": 1.486585259437561, - "learning_rate": 7.003417085427136e-05, - "loss": 5.5101, - "step": 30326 - }, - { - "epoch": 15.815906127770534, - "grad_norm": 1.4673086404800415, - "learning_rate": 7.003316582914573e-05, - "loss": 5.7109, - "step": 30327 - }, - { - "epoch": 15.816427640156453, - "grad_norm": 1.4464459419250488, - "learning_rate": 7.00321608040201e-05, - "loss": 5.2859, - "step": 30328 - }, - { - "epoch": 15.816949152542373, - "grad_norm": 1.7978161573410034, - "learning_rate": 7.003115577889447e-05, - "loss": 4.7755, - "step": 30329 - }, - { - "epoch": 15.817470664928292, - "grad_norm": 1.3725334405899048, - "learning_rate": 7.003015075376885e-05, - "loss": 5.2073, - "step": 30330 - }, - { - "epoch": 15.817992177314212, - "grad_norm": 1.5304675102233887, - "learning_rate": 7.002914572864321e-05, - "loss": 5.0979, - "step": 30331 - }, - { - "epoch": 15.818513689700131, - "grad_norm": 1.4424408674240112, - "learning_rate": 7.002814070351759e-05, - "loss": 5.1601, - "step": 30332 - }, - { - "epoch": 15.819035202086049, - "grad_norm": 1.3884862661361694, - "learning_rate": 7.002713567839197e-05, - "loss": 5.3976, - "step": 30333 - }, - { - "epoch": 15.819556714471968, - "grad_norm": 1.3795528411865234, - "learning_rate": 7.002613065326633e-05, - "loss": 5.8865, - "step": 30334 - }, - { - "epoch": 15.820078226857888, - "grad_norm": 1.3934326171875, - "learning_rate": 7.002512562814071e-05, - "loss": 5.7565, - "step": 30335 - }, - { - "epoch": 15.820599739243807, - "grad_norm": 1.6211481094360352, - "learning_rate": 7.002412060301508e-05, - "loss": 4.8788, - "step": 30336 - }, - { - "epoch": 15.821121251629727, - "grad_norm": 1.4858163595199585, - "learning_rate": 7.002311557788945e-05, - "loss": 5.5005, - "step": 30337 - }, - { - "epoch": 15.821642764015646, - "grad_norm": 1.4018003940582275, - "learning_rate": 7.002211055276382e-05, - "loss": 5.3265, - "step": 30338 - }, - { - "epoch": 15.822164276401564, - "grad_norm": 1.449812412261963, - "learning_rate": 7.00211055276382e-05, - "loss": 5.5353, - "step": 30339 - }, - { - "epoch": 15.822685788787483, - "grad_norm": 1.4659650325775146, - "learning_rate": 7.002010050251256e-05, - "loss": 5.0264, - "step": 30340 - }, - { - "epoch": 15.823207301173403, - "grad_norm": 1.4892125129699707, - "learning_rate": 7.001909547738694e-05, - "loss": 5.1945, - "step": 30341 - }, - { - "epoch": 15.823728813559322, - "grad_norm": 1.3576672077178955, - "learning_rate": 7.001809045226131e-05, - "loss": 5.8044, - "step": 30342 - }, - { - "epoch": 15.824250325945242, - "grad_norm": 1.4073001146316528, - "learning_rate": 7.001708542713569e-05, - "loss": 5.4917, - "step": 30343 - }, - { - "epoch": 15.824771838331161, - "grad_norm": 1.5428497791290283, - "learning_rate": 7.001608040201006e-05, - "loss": 4.9806, - "step": 30344 - }, - { - "epoch": 15.825293350717079, - "grad_norm": 1.4903943538665771, - "learning_rate": 7.001507537688442e-05, - "loss": 5.4686, - "step": 30345 - }, - { - "epoch": 15.825814863102998, - "grad_norm": 1.5884997844696045, - "learning_rate": 7.00140703517588e-05, - "loss": 5.3543, - "step": 30346 - }, - { - "epoch": 15.826336375488918, - "grad_norm": 1.5219172239303589, - "learning_rate": 7.001306532663316e-05, - "loss": 5.4876, - "step": 30347 - }, - { - "epoch": 15.826857887874837, - "grad_norm": 1.500691533088684, - "learning_rate": 7.001206030150754e-05, - "loss": 5.8044, - "step": 30348 - }, - { - "epoch": 15.827379400260757, - "grad_norm": 1.4560953378677368, - "learning_rate": 7.00110552763819e-05, - "loss": 5.0561, - "step": 30349 - }, - { - "epoch": 15.827900912646676, - "grad_norm": 1.508933663368225, - "learning_rate": 7.001005025125628e-05, - "loss": 5.5507, - "step": 30350 - }, - { - "epoch": 15.828422425032594, - "grad_norm": 1.6466952562332153, - "learning_rate": 7.000904522613065e-05, - "loss": 5.3459, - "step": 30351 - }, - { - "epoch": 15.828943937418513, - "grad_norm": 1.5006712675094604, - "learning_rate": 7.000804020100503e-05, - "loss": 5.1996, - "step": 30352 - }, - { - "epoch": 15.829465449804433, - "grad_norm": 1.4051467180252075, - "learning_rate": 7.00070351758794e-05, - "loss": 5.5875, - "step": 30353 - }, - { - "epoch": 15.829986962190352, - "grad_norm": 1.4293581247329712, - "learning_rate": 7.000603015075378e-05, - "loss": 4.8604, - "step": 30354 - }, - { - "epoch": 15.830508474576272, - "grad_norm": 1.599920630455017, - "learning_rate": 7.000502512562815e-05, - "loss": 4.8441, - "step": 30355 - }, - { - "epoch": 15.831029986962191, - "grad_norm": 1.6312376260757446, - "learning_rate": 7.000402010050252e-05, - "loss": 5.4411, - "step": 30356 - }, - { - "epoch": 15.831551499348109, - "grad_norm": 1.4122059345245361, - "learning_rate": 7.000301507537689e-05, - "loss": 4.9716, - "step": 30357 - }, - { - "epoch": 15.832073011734028, - "grad_norm": 1.5736958980560303, - "learning_rate": 7.000201005025125e-05, - "loss": 5.1735, - "step": 30358 - }, - { - "epoch": 15.832594524119948, - "grad_norm": 1.4192613363265991, - "learning_rate": 7.000100502512563e-05, - "loss": 5.5552, - "step": 30359 - }, - { - "epoch": 15.833116036505867, - "grad_norm": 1.4193211793899536, - "learning_rate": 7e-05, - "loss": 5.3639, - "step": 30360 - }, - { - "epoch": 15.833637548891787, - "grad_norm": 1.498274564743042, - "learning_rate": 6.999899497487437e-05, - "loss": 5.3534, - "step": 30361 - }, - { - "epoch": 15.834159061277706, - "grad_norm": 1.4515576362609863, - "learning_rate": 6.999798994974875e-05, - "loss": 4.7174, - "step": 30362 - }, - { - "epoch": 15.834680573663624, - "grad_norm": 1.5957297086715698, - "learning_rate": 6.999698492462313e-05, - "loss": 5.1258, - "step": 30363 - }, - { - "epoch": 15.835202086049543, - "grad_norm": 1.5344537496566772, - "learning_rate": 6.999597989949749e-05, - "loss": 5.6413, - "step": 30364 - }, - { - "epoch": 15.835723598435463, - "grad_norm": 1.4449163675308228, - "learning_rate": 6.999497487437187e-05, - "loss": 5.3065, - "step": 30365 - }, - { - "epoch": 15.836245110821382, - "grad_norm": 1.6206272840499878, - "learning_rate": 6.999396984924623e-05, - "loss": 5.2545, - "step": 30366 - }, - { - "epoch": 15.836766623207302, - "grad_norm": 1.4722568988800049, - "learning_rate": 6.999296482412061e-05, - "loss": 4.6085, - "step": 30367 - }, - { - "epoch": 15.837288135593221, - "grad_norm": 1.4736316204071045, - "learning_rate": 6.999195979899498e-05, - "loss": 5.0595, - "step": 30368 - }, - { - "epoch": 15.837809647979139, - "grad_norm": 1.5801132917404175, - "learning_rate": 6.999095477386935e-05, - "loss": 5.4711, - "step": 30369 - }, - { - "epoch": 15.838331160365058, - "grad_norm": 1.3795987367630005, - "learning_rate": 6.998994974874372e-05, - "loss": 5.7658, - "step": 30370 - }, - { - "epoch": 15.838852672750978, - "grad_norm": 1.4492318630218506, - "learning_rate": 6.998894472361808e-05, - "loss": 5.4105, - "step": 30371 - }, - { - "epoch": 15.839374185136897, - "grad_norm": 1.7159430980682373, - "learning_rate": 6.998793969849246e-05, - "loss": 5.3871, - "step": 30372 - }, - { - "epoch": 15.839895697522817, - "grad_norm": 1.4795138835906982, - "learning_rate": 6.998693467336684e-05, - "loss": 5.3272, - "step": 30373 - }, - { - "epoch": 15.840417209908736, - "grad_norm": 1.4877936840057373, - "learning_rate": 6.998592964824122e-05, - "loss": 5.245, - "step": 30374 - }, - { - "epoch": 15.840938722294654, - "grad_norm": 1.506435751914978, - "learning_rate": 6.998492462311558e-05, - "loss": 5.4601, - "step": 30375 - }, - { - "epoch": 15.841460234680573, - "grad_norm": 1.3766324520111084, - "learning_rate": 6.998391959798996e-05, - "loss": 5.6365, - "step": 30376 - }, - { - "epoch": 15.841981747066493, - "grad_norm": 1.4744007587432861, - "learning_rate": 6.998291457286432e-05, - "loss": 5.6981, - "step": 30377 - }, - { - "epoch": 15.842503259452412, - "grad_norm": 1.608835220336914, - "learning_rate": 6.99819095477387e-05, - "loss": 4.8463, - "step": 30378 - }, - { - "epoch": 15.843024771838332, - "grad_norm": 1.6056876182556152, - "learning_rate": 6.998090452261306e-05, - "loss": 5.3291, - "step": 30379 - }, - { - "epoch": 15.843546284224251, - "grad_norm": 1.473455786705017, - "learning_rate": 6.997989949748744e-05, - "loss": 5.5035, - "step": 30380 - }, - { - "epoch": 15.844067796610169, - "grad_norm": 1.5780917406082153, - "learning_rate": 6.99788944723618e-05, - "loss": 5.3531, - "step": 30381 - }, - { - "epoch": 15.844589308996088, - "grad_norm": 1.4314134120941162, - "learning_rate": 6.997788944723618e-05, - "loss": 5.2002, - "step": 30382 - }, - { - "epoch": 15.845110821382008, - "grad_norm": 1.4993404150009155, - "learning_rate": 6.997688442211056e-05, - "loss": 5.0333, - "step": 30383 - }, - { - "epoch": 15.845632333767927, - "grad_norm": 1.4315739870071411, - "learning_rate": 6.997587939698493e-05, - "loss": 5.6636, - "step": 30384 - }, - { - "epoch": 15.846153846153847, - "grad_norm": 1.52027428150177, - "learning_rate": 6.99748743718593e-05, - "loss": 5.5697, - "step": 30385 - }, - { - "epoch": 15.846675358539766, - "grad_norm": 1.5204343795776367, - "learning_rate": 6.997386934673367e-05, - "loss": 5.7566, - "step": 30386 - }, - { - "epoch": 15.847196870925684, - "grad_norm": 1.5659565925598145, - "learning_rate": 6.997286432160805e-05, - "loss": 5.4243, - "step": 30387 - }, - { - "epoch": 15.847718383311603, - "grad_norm": 1.5209474563598633, - "learning_rate": 6.997185929648241e-05, - "loss": 5.0206, - "step": 30388 - }, - { - "epoch": 15.848239895697523, - "grad_norm": 1.5513736009597778, - "learning_rate": 6.997085427135679e-05, - "loss": 5.1445, - "step": 30389 - }, - { - "epoch": 15.848761408083442, - "grad_norm": 1.3855254650115967, - "learning_rate": 6.996984924623115e-05, - "loss": 5.7442, - "step": 30390 - }, - { - "epoch": 15.849282920469362, - "grad_norm": 1.5939538478851318, - "learning_rate": 6.996884422110553e-05, - "loss": 5.2, - "step": 30391 - }, - { - "epoch": 15.84980443285528, - "grad_norm": 1.4568800926208496, - "learning_rate": 6.99678391959799e-05, - "loss": 5.523, - "step": 30392 - }, - { - "epoch": 15.850325945241199, - "grad_norm": 1.4494556188583374, - "learning_rate": 6.996683417085427e-05, - "loss": 5.0846, - "step": 30393 - }, - { - "epoch": 15.850847457627118, - "grad_norm": 1.4347728490829468, - "learning_rate": 6.996582914572865e-05, - "loss": 5.635, - "step": 30394 - }, - { - "epoch": 15.851368970013038, - "grad_norm": 1.5396414995193481, - "learning_rate": 6.996482412060303e-05, - "loss": 5.2459, - "step": 30395 - }, - { - "epoch": 15.851890482398957, - "grad_norm": 1.4824477434158325, - "learning_rate": 6.996381909547739e-05, - "loss": 5.4001, - "step": 30396 - }, - { - "epoch": 15.852411994784877, - "grad_norm": 1.4852873086929321, - "learning_rate": 6.996281407035177e-05, - "loss": 5.5832, - "step": 30397 - }, - { - "epoch": 15.852933507170796, - "grad_norm": 1.5934408903121948, - "learning_rate": 6.996180904522613e-05, - "loss": 5.563, - "step": 30398 - }, - { - "epoch": 15.853455019556714, - "grad_norm": 1.5287351608276367, - "learning_rate": 6.99608040201005e-05, - "loss": 5.241, - "step": 30399 - }, - { - "epoch": 15.853976531942633, - "grad_norm": 1.5906566381454468, - "learning_rate": 6.995979899497488e-05, - "loss": 5.1592, - "step": 30400 - }, - { - "epoch": 15.854498044328553, - "grad_norm": 1.4209178686141968, - "learning_rate": 6.995879396984924e-05, - "loss": 5.5639, - "step": 30401 - }, - { - "epoch": 15.855019556714472, - "grad_norm": 1.4845441579818726, - "learning_rate": 6.995778894472362e-05, - "loss": 5.3534, - "step": 30402 - }, - { - "epoch": 15.855541069100392, - "grad_norm": 1.4019620418548584, - "learning_rate": 6.9956783919598e-05, - "loss": 5.6128, - "step": 30403 - }, - { - "epoch": 15.85606258148631, - "grad_norm": 1.466338872909546, - "learning_rate": 6.995577889447237e-05, - "loss": 5.295, - "step": 30404 - }, - { - "epoch": 15.856584093872229, - "grad_norm": 1.467340111732483, - "learning_rate": 6.995477386934674e-05, - "loss": 5.6049, - "step": 30405 - }, - { - "epoch": 15.857105606258148, - "grad_norm": 1.6030455827713013, - "learning_rate": 6.995376884422112e-05, - "loss": 5.1364, - "step": 30406 - }, - { - "epoch": 15.857627118644068, - "grad_norm": 1.5240498781204224, - "learning_rate": 6.995276381909548e-05, - "loss": 5.3148, - "step": 30407 - }, - { - "epoch": 15.858148631029987, - "grad_norm": 1.4664510488510132, - "learning_rate": 6.995175879396986e-05, - "loss": 5.3868, - "step": 30408 - }, - { - "epoch": 15.858670143415907, - "grad_norm": 1.476261854171753, - "learning_rate": 6.995075376884422e-05, - "loss": 5.4668, - "step": 30409 - }, - { - "epoch": 15.859191655801826, - "grad_norm": 1.4599480628967285, - "learning_rate": 6.99497487437186e-05, - "loss": 5.4515, - "step": 30410 - }, - { - "epoch": 15.859713168187744, - "grad_norm": 1.4538235664367676, - "learning_rate": 6.994874371859296e-05, - "loss": 5.4526, - "step": 30411 - }, - { - "epoch": 15.860234680573663, - "grad_norm": 1.4472113847732544, - "learning_rate": 6.994773869346733e-05, - "loss": 5.5173, - "step": 30412 - }, - { - "epoch": 15.860756192959583, - "grad_norm": 1.4640308618545532, - "learning_rate": 6.994673366834171e-05, - "loss": 5.3521, - "step": 30413 - }, - { - "epoch": 15.861277705345502, - "grad_norm": 1.4631510972976685, - "learning_rate": 6.994572864321608e-05, - "loss": 5.2826, - "step": 30414 - }, - { - "epoch": 15.861799217731422, - "grad_norm": 1.4297021627426147, - "learning_rate": 6.994472361809046e-05, - "loss": 5.5989, - "step": 30415 - }, - { - "epoch": 15.86232073011734, - "grad_norm": 1.5185526609420776, - "learning_rate": 6.994371859296483e-05, - "loss": 5.1739, - "step": 30416 - }, - { - "epoch": 15.862842242503259, - "grad_norm": 1.5432533025741577, - "learning_rate": 6.99427135678392e-05, - "loss": 5.2642, - "step": 30417 - }, - { - "epoch": 15.863363754889178, - "grad_norm": 1.6125239133834839, - "learning_rate": 6.994170854271357e-05, - "loss": 4.7749, - "step": 30418 - }, - { - "epoch": 15.863885267275098, - "grad_norm": 1.445418357849121, - "learning_rate": 6.994070351758795e-05, - "loss": 5.4322, - "step": 30419 - }, - { - "epoch": 15.864406779661017, - "grad_norm": 1.4589712619781494, - "learning_rate": 6.993969849246231e-05, - "loss": 5.4965, - "step": 30420 - }, - { - "epoch": 15.864928292046937, - "grad_norm": 1.4509190320968628, - "learning_rate": 6.993869346733669e-05, - "loss": 4.8855, - "step": 30421 - }, - { - "epoch": 15.865449804432854, - "grad_norm": 1.4240574836730957, - "learning_rate": 6.993768844221105e-05, - "loss": 5.5068, - "step": 30422 - }, - { - "epoch": 15.865971316818774, - "grad_norm": 1.485298752784729, - "learning_rate": 6.993668341708543e-05, - "loss": 5.4887, - "step": 30423 - }, - { - "epoch": 15.866492829204693, - "grad_norm": 1.5533255338668823, - "learning_rate": 6.993567839195981e-05, - "loss": 5.4599, - "step": 30424 - }, - { - "epoch": 15.867014341590613, - "grad_norm": 1.5429524183273315, - "learning_rate": 6.993467336683417e-05, - "loss": 5.1763, - "step": 30425 - }, - { - "epoch": 15.867535853976532, - "grad_norm": 1.4788702726364136, - "learning_rate": 6.993366834170855e-05, - "loss": 5.1141, - "step": 30426 - }, - { - "epoch": 15.868057366362452, - "grad_norm": 1.5168298482894897, - "learning_rate": 6.993266331658292e-05, - "loss": 5.4283, - "step": 30427 - }, - { - "epoch": 15.86857887874837, - "grad_norm": 1.4471083879470825, - "learning_rate": 6.993165829145729e-05, - "loss": 5.691, - "step": 30428 - }, - { - "epoch": 15.869100391134289, - "grad_norm": 1.7111717462539673, - "learning_rate": 6.993065326633166e-05, - "loss": 4.7194, - "step": 30429 - }, - { - "epoch": 15.869621903520208, - "grad_norm": 1.5433863401412964, - "learning_rate": 6.992964824120604e-05, - "loss": 5.3917, - "step": 30430 - }, - { - "epoch": 15.870143415906128, - "grad_norm": 1.45786452293396, - "learning_rate": 6.99286432160804e-05, - "loss": 5.7097, - "step": 30431 - }, - { - "epoch": 15.870664928292047, - "grad_norm": 1.5138249397277832, - "learning_rate": 6.992763819095478e-05, - "loss": 5.5106, - "step": 30432 - }, - { - "epoch": 15.871186440677967, - "grad_norm": 1.423102855682373, - "learning_rate": 6.992663316582914e-05, - "loss": 5.0892, - "step": 30433 - }, - { - "epoch": 15.871707953063884, - "grad_norm": 1.4221900701522827, - "learning_rate": 6.992562814070352e-05, - "loss": 5.7819, - "step": 30434 - }, - { - "epoch": 15.872229465449804, - "grad_norm": 1.4909576177597046, - "learning_rate": 6.99246231155779e-05, - "loss": 5.397, - "step": 30435 - }, - { - "epoch": 15.872750977835723, - "grad_norm": 1.4447462558746338, - "learning_rate": 6.992361809045228e-05, - "loss": 5.4251, - "step": 30436 - }, - { - "epoch": 15.873272490221643, - "grad_norm": 1.5446988344192505, - "learning_rate": 6.992261306532664e-05, - "loss": 5.6919, - "step": 30437 - }, - { - "epoch": 15.873794002607562, - "grad_norm": 1.47120201587677, - "learning_rate": 6.9921608040201e-05, - "loss": 5.2147, - "step": 30438 - }, - { - "epoch": 15.874315514993482, - "grad_norm": 1.4714611768722534, - "learning_rate": 6.992060301507538e-05, - "loss": 4.6976, - "step": 30439 - }, - { - "epoch": 15.8748370273794, - "grad_norm": 1.6353895664215088, - "learning_rate": 6.991959798994975e-05, - "loss": 4.7489, - "step": 30440 - }, - { - "epoch": 15.875358539765319, - "grad_norm": 1.4875680208206177, - "learning_rate": 6.991859296482412e-05, - "loss": 5.6537, - "step": 30441 - }, - { - "epoch": 15.875880052151238, - "grad_norm": 1.4406567811965942, - "learning_rate": 6.991758793969849e-05, - "loss": 5.1543, - "step": 30442 - }, - { - "epoch": 15.876401564537158, - "grad_norm": 1.4801162481307983, - "learning_rate": 6.991658291457287e-05, - "loss": 5.1982, - "step": 30443 - }, - { - "epoch": 15.876923076923077, - "grad_norm": 1.4139851331710815, - "learning_rate": 6.991557788944724e-05, - "loss": 5.3198, - "step": 30444 - }, - { - "epoch": 15.877444589308997, - "grad_norm": 1.3892194032669067, - "learning_rate": 6.991457286432162e-05, - "loss": 5.4081, - "step": 30445 - }, - { - "epoch": 15.877966101694915, - "grad_norm": 1.4366719722747803, - "learning_rate": 6.991356783919599e-05, - "loss": 5.481, - "step": 30446 - }, - { - "epoch": 15.878487614080834, - "grad_norm": 1.4273765087127686, - "learning_rate": 6.991256281407036e-05, - "loss": 5.7242, - "step": 30447 - }, - { - "epoch": 15.879009126466753, - "grad_norm": 1.5065226554870605, - "learning_rate": 6.991155778894473e-05, - "loss": 5.3761, - "step": 30448 - }, - { - "epoch": 15.879530638852673, - "grad_norm": 1.5708869695663452, - "learning_rate": 6.99105527638191e-05, - "loss": 5.2311, - "step": 30449 - }, - { - "epoch": 15.880052151238592, - "grad_norm": 1.6013001203536987, - "learning_rate": 6.990954773869347e-05, - "loss": 5.2526, - "step": 30450 - }, - { - "epoch": 15.880573663624512, - "grad_norm": 1.5219001770019531, - "learning_rate": 6.990854271356783e-05, - "loss": 5.2146, - "step": 30451 - }, - { - "epoch": 15.88109517601043, - "grad_norm": 1.4283350706100464, - "learning_rate": 6.990753768844221e-05, - "loss": 5.5954, - "step": 30452 - }, - { - "epoch": 15.881616688396349, - "grad_norm": 1.5214449167251587, - "learning_rate": 6.990653266331658e-05, - "loss": 5.7109, - "step": 30453 - }, - { - "epoch": 15.882138200782268, - "grad_norm": 1.5626554489135742, - "learning_rate": 6.990552763819095e-05, - "loss": 5.5268, - "step": 30454 - }, - { - "epoch": 15.882659713168188, - "grad_norm": 1.4695441722869873, - "learning_rate": 6.990452261306533e-05, - "loss": 5.4198, - "step": 30455 - }, - { - "epoch": 15.883181225554107, - "grad_norm": 1.4451723098754883, - "learning_rate": 6.990351758793971e-05, - "loss": 4.7547, - "step": 30456 - }, - { - "epoch": 15.883702737940027, - "grad_norm": 1.4845136404037476, - "learning_rate": 6.990251256281407e-05, - "loss": 5.424, - "step": 30457 - }, - { - "epoch": 15.884224250325945, - "grad_norm": 1.5134435892105103, - "learning_rate": 6.990150753768845e-05, - "loss": 5.6631, - "step": 30458 - }, - { - "epoch": 15.884745762711864, - "grad_norm": 1.4327776432037354, - "learning_rate": 6.990050251256282e-05, - "loss": 5.7436, - "step": 30459 - }, - { - "epoch": 15.885267275097783, - "grad_norm": 1.536957859992981, - "learning_rate": 6.98994974874372e-05, - "loss": 4.7989, - "step": 30460 - }, - { - "epoch": 15.885788787483703, - "grad_norm": 1.50222647190094, - "learning_rate": 6.989849246231156e-05, - "loss": 5.4608, - "step": 30461 - }, - { - "epoch": 15.886310299869622, - "grad_norm": 1.5650010108947754, - "learning_rate": 6.989748743718594e-05, - "loss": 4.9168, - "step": 30462 - }, - { - "epoch": 15.886831812255542, - "grad_norm": 1.5856962203979492, - "learning_rate": 6.98964824120603e-05, - "loss": 5.3551, - "step": 30463 - }, - { - "epoch": 15.88735332464146, - "grad_norm": 1.4486721754074097, - "learning_rate": 6.989547738693466e-05, - "loss": 5.5541, - "step": 30464 - }, - { - "epoch": 15.887874837027379, - "grad_norm": 1.488252878189087, - "learning_rate": 6.989447236180904e-05, - "loss": 5.313, - "step": 30465 - }, - { - "epoch": 15.888396349413298, - "grad_norm": 1.3538436889648438, - "learning_rate": 6.989346733668342e-05, - "loss": 5.8893, - "step": 30466 - }, - { - "epoch": 15.888917861799218, - "grad_norm": 1.537735104560852, - "learning_rate": 6.98924623115578e-05, - "loss": 4.864, - "step": 30467 - }, - { - "epoch": 15.889439374185137, - "grad_norm": 1.4194749593734741, - "learning_rate": 6.989145728643216e-05, - "loss": 5.1102, - "step": 30468 - }, - { - "epoch": 15.889960886571057, - "grad_norm": 1.4197466373443604, - "learning_rate": 6.989045226130654e-05, - "loss": 5.3251, - "step": 30469 - }, - { - "epoch": 15.890482398956975, - "grad_norm": 1.4738084077835083, - "learning_rate": 6.98894472361809e-05, - "loss": 4.8791, - "step": 30470 - }, - { - "epoch": 15.891003911342894, - "grad_norm": 1.510414958000183, - "learning_rate": 6.988844221105528e-05, - "loss": 4.7777, - "step": 30471 - }, - { - "epoch": 15.891525423728813, - "grad_norm": 1.5517263412475586, - "learning_rate": 6.988743718592965e-05, - "loss": 5.3337, - "step": 30472 - }, - { - "epoch": 15.892046936114733, - "grad_norm": 1.4085667133331299, - "learning_rate": 6.988643216080402e-05, - "loss": 5.4821, - "step": 30473 - }, - { - "epoch": 15.892568448500652, - "grad_norm": 1.7838771343231201, - "learning_rate": 6.988542713567839e-05, - "loss": 5.3606, - "step": 30474 - }, - { - "epoch": 15.893089960886572, - "grad_norm": 1.3910187482833862, - "learning_rate": 6.988442211055277e-05, - "loss": 5.5465, - "step": 30475 - }, - { - "epoch": 15.89361147327249, - "grad_norm": 1.500244379043579, - "learning_rate": 6.988341708542714e-05, - "loss": 5.2499, - "step": 30476 - }, - { - "epoch": 15.894132985658409, - "grad_norm": 1.4267665147781372, - "learning_rate": 6.988241206030151e-05, - "loss": 5.4144, - "step": 30477 - }, - { - "epoch": 15.894654498044329, - "grad_norm": 1.6558616161346436, - "learning_rate": 6.988140703517589e-05, - "loss": 4.9437, - "step": 30478 - }, - { - "epoch": 15.895176010430248, - "grad_norm": 1.5679224729537964, - "learning_rate": 6.988040201005025e-05, - "loss": 4.9519, - "step": 30479 - }, - { - "epoch": 15.895697522816167, - "grad_norm": 1.5100799798965454, - "learning_rate": 6.987939698492463e-05, - "loss": 5.5371, - "step": 30480 - }, - { - "epoch": 15.896219035202087, - "grad_norm": 1.4489526748657227, - "learning_rate": 6.987839195979899e-05, - "loss": 5.7444, - "step": 30481 - }, - { - "epoch": 15.896740547588005, - "grad_norm": 1.3624541759490967, - "learning_rate": 6.987738693467337e-05, - "loss": 5.8007, - "step": 30482 - }, - { - "epoch": 15.897262059973924, - "grad_norm": 1.487384557723999, - "learning_rate": 6.987638190954773e-05, - "loss": 5.7486, - "step": 30483 - }, - { - "epoch": 15.897783572359844, - "grad_norm": 1.4637142419815063, - "learning_rate": 6.987537688442211e-05, - "loss": 4.6277, - "step": 30484 - }, - { - "epoch": 15.898305084745763, - "grad_norm": 1.3824745416641235, - "learning_rate": 6.987437185929648e-05, - "loss": 5.833, - "step": 30485 - }, - { - "epoch": 15.898826597131682, - "grad_norm": 1.4130640029907227, - "learning_rate": 6.987336683417085e-05, - "loss": 5.6217, - "step": 30486 - }, - { - "epoch": 15.8993481095176, - "grad_norm": 1.3449180126190186, - "learning_rate": 6.987236180904523e-05, - "loss": 5.7016, - "step": 30487 - }, - { - "epoch": 15.89986962190352, - "grad_norm": 1.4384335279464722, - "learning_rate": 6.987135678391961e-05, - "loss": 5.0689, - "step": 30488 - }, - { - "epoch": 15.900391134289439, - "grad_norm": 1.4603253602981567, - "learning_rate": 6.987035175879397e-05, - "loss": 5.2696, - "step": 30489 - }, - { - "epoch": 15.900912646675359, - "grad_norm": 1.6167163848876953, - "learning_rate": 6.986934673366835e-05, - "loss": 5.0485, - "step": 30490 - }, - { - "epoch": 15.901434159061278, - "grad_norm": 1.4862834215164185, - "learning_rate": 6.986834170854272e-05, - "loss": 5.2703, - "step": 30491 - }, - { - "epoch": 15.901955671447197, - "grad_norm": 1.5175169706344604, - "learning_rate": 6.986733668341708e-05, - "loss": 5.8584, - "step": 30492 - }, - { - "epoch": 15.902477183833117, - "grad_norm": 1.5673614740371704, - "learning_rate": 6.986633165829146e-05, - "loss": 5.4398, - "step": 30493 - }, - { - "epoch": 15.902998696219035, - "grad_norm": 1.4730908870697021, - "learning_rate": 6.986532663316582e-05, - "loss": 5.3463, - "step": 30494 - }, - { - "epoch": 15.903520208604954, - "grad_norm": 1.5304926633834839, - "learning_rate": 6.98643216080402e-05, - "loss": 5.5915, - "step": 30495 - }, - { - "epoch": 15.904041720990874, - "grad_norm": 1.5485382080078125, - "learning_rate": 6.986331658291458e-05, - "loss": 5.2899, - "step": 30496 - }, - { - "epoch": 15.904563233376793, - "grad_norm": 1.4278963804244995, - "learning_rate": 6.986231155778896e-05, - "loss": 4.8495, - "step": 30497 - }, - { - "epoch": 15.905084745762712, - "grad_norm": 1.5443533658981323, - "learning_rate": 6.986130653266332e-05, - "loss": 5.1062, - "step": 30498 - }, - { - "epoch": 15.90560625814863, - "grad_norm": 1.6409554481506348, - "learning_rate": 6.98603015075377e-05, - "loss": 4.2532, - "step": 30499 - }, - { - "epoch": 15.90612777053455, - "grad_norm": 1.407578468322754, - "learning_rate": 6.985929648241206e-05, - "loss": 5.0452, - "step": 30500 - }, - { - "epoch": 15.906649282920469, - "grad_norm": 1.4793956279754639, - "learning_rate": 6.985829145728644e-05, - "loss": 5.4016, - "step": 30501 - }, - { - "epoch": 15.907170795306389, - "grad_norm": 1.45779287815094, - "learning_rate": 6.98572864321608e-05, - "loss": 5.1559, - "step": 30502 - }, - { - "epoch": 15.907692307692308, - "grad_norm": 1.5360219478607178, - "learning_rate": 6.985628140703518e-05, - "loss": 5.3559, - "step": 30503 - }, - { - "epoch": 15.908213820078227, - "grad_norm": 1.6594562530517578, - "learning_rate": 6.985527638190955e-05, - "loss": 5.4329, - "step": 30504 - }, - { - "epoch": 15.908735332464147, - "grad_norm": 1.4395743608474731, - "learning_rate": 6.985427135678391e-05, - "loss": 5.8561, - "step": 30505 - }, - { - "epoch": 15.909256844850065, - "grad_norm": 1.4825384616851807, - "learning_rate": 6.985326633165829e-05, - "loss": 5.1941, - "step": 30506 - }, - { - "epoch": 15.909778357235984, - "grad_norm": 1.5066946744918823, - "learning_rate": 6.985226130653267e-05, - "loss": 5.4394, - "step": 30507 - }, - { - "epoch": 15.910299869621904, - "grad_norm": 1.4870461225509644, - "learning_rate": 6.985125628140705e-05, - "loss": 5.2497, - "step": 30508 - }, - { - "epoch": 15.910821382007823, - "grad_norm": 1.4796022176742554, - "learning_rate": 6.985025125628141e-05, - "loss": 5.4006, - "step": 30509 - }, - { - "epoch": 15.911342894393742, - "grad_norm": 1.5225374698638916, - "learning_rate": 6.984924623115579e-05, - "loss": 5.0695, - "step": 30510 - }, - { - "epoch": 15.91186440677966, - "grad_norm": 1.3843533992767334, - "learning_rate": 6.984824120603015e-05, - "loss": 5.5079, - "step": 30511 - }, - { - "epoch": 15.91238591916558, - "grad_norm": 1.554186224937439, - "learning_rate": 6.984723618090453e-05, - "loss": 4.9157, - "step": 30512 - }, - { - "epoch": 15.9129074315515, - "grad_norm": 1.5130693912506104, - "learning_rate": 6.98462311557789e-05, - "loss": 5.2974, - "step": 30513 - }, - { - "epoch": 15.913428943937419, - "grad_norm": 1.472098708152771, - "learning_rate": 6.984522613065327e-05, - "loss": 5.427, - "step": 30514 - }, - { - "epoch": 15.913950456323338, - "grad_norm": 1.4889572858810425, - "learning_rate": 6.984422110552764e-05, - "loss": 5.2998, - "step": 30515 - }, - { - "epoch": 15.914471968709258, - "grad_norm": 1.5021134614944458, - "learning_rate": 6.984321608040201e-05, - "loss": 4.8687, - "step": 30516 - }, - { - "epoch": 15.914993481095175, - "grad_norm": 1.5493137836456299, - "learning_rate": 6.984221105527639e-05, - "loss": 5.4167, - "step": 30517 - }, - { - "epoch": 15.915514993481095, - "grad_norm": 1.552289605140686, - "learning_rate": 6.984120603015076e-05, - "loss": 5.3652, - "step": 30518 - }, - { - "epoch": 15.916036505867014, - "grad_norm": 1.51752507686615, - "learning_rate": 6.984020100502513e-05, - "loss": 5.2375, - "step": 30519 - }, - { - "epoch": 15.916558018252934, - "grad_norm": 1.5014289617538452, - "learning_rate": 6.98391959798995e-05, - "loss": 5.5662, - "step": 30520 - }, - { - "epoch": 15.917079530638853, - "grad_norm": 1.506789207458496, - "learning_rate": 6.983819095477388e-05, - "loss": 5.2389, - "step": 30521 - }, - { - "epoch": 15.917601043024773, - "grad_norm": 1.5766680240631104, - "learning_rate": 6.983718592964824e-05, - "loss": 5.5072, - "step": 30522 - }, - { - "epoch": 15.91812255541069, - "grad_norm": 1.5283777713775635, - "learning_rate": 6.983618090452262e-05, - "loss": 5.5723, - "step": 30523 - }, - { - "epoch": 15.91864406779661, - "grad_norm": 1.4786508083343506, - "learning_rate": 6.983517587939698e-05, - "loss": 5.1099, - "step": 30524 - }, - { - "epoch": 15.91916558018253, - "grad_norm": 1.5761826038360596, - "learning_rate": 6.983417085427136e-05, - "loss": 5.2308, - "step": 30525 - }, - { - "epoch": 15.919687092568449, - "grad_norm": 1.4257081747055054, - "learning_rate": 6.983316582914572e-05, - "loss": 5.5923, - "step": 30526 - }, - { - "epoch": 15.920208604954368, - "grad_norm": 1.4256097078323364, - "learning_rate": 6.98321608040201e-05, - "loss": 5.6298, - "step": 30527 - }, - { - "epoch": 15.920730117340288, - "grad_norm": 1.4084041118621826, - "learning_rate": 6.983115577889448e-05, - "loss": 5.6928, - "step": 30528 - }, - { - "epoch": 15.921251629726205, - "grad_norm": 1.4722398519515991, - "learning_rate": 6.983015075376886e-05, - "loss": 5.465, - "step": 30529 - }, - { - "epoch": 15.921773142112125, - "grad_norm": 1.4442715644836426, - "learning_rate": 6.982914572864322e-05, - "loss": 5.3873, - "step": 30530 - }, - { - "epoch": 15.922294654498044, - "grad_norm": 1.6161645650863647, - "learning_rate": 6.982814070351759e-05, - "loss": 4.9658, - "step": 30531 - }, - { - "epoch": 15.922816166883964, - "grad_norm": 1.4163345098495483, - "learning_rate": 6.982713567839196e-05, - "loss": 5.7547, - "step": 30532 - }, - { - "epoch": 15.923337679269883, - "grad_norm": 1.5296803712844849, - "learning_rate": 6.982613065326633e-05, - "loss": 5.4842, - "step": 30533 - }, - { - "epoch": 15.923859191655803, - "grad_norm": 1.4596439599990845, - "learning_rate": 6.98251256281407e-05, - "loss": 5.6049, - "step": 30534 - }, - { - "epoch": 15.92438070404172, - "grad_norm": 1.4150499105453491, - "learning_rate": 6.982412060301507e-05, - "loss": 5.5159, - "step": 30535 - }, - { - "epoch": 15.92490221642764, - "grad_norm": 1.405438780784607, - "learning_rate": 6.982311557788945e-05, - "loss": 5.7522, - "step": 30536 - }, - { - "epoch": 15.92542372881356, - "grad_norm": 1.4298430681228638, - "learning_rate": 6.982211055276383e-05, - "loss": 5.439, - "step": 30537 - }, - { - "epoch": 15.925945241199479, - "grad_norm": 1.618770956993103, - "learning_rate": 6.98211055276382e-05, - "loss": 4.7703, - "step": 30538 - }, - { - "epoch": 15.926466753585398, - "grad_norm": 1.4642211198806763, - "learning_rate": 6.982010050251257e-05, - "loss": 5.2547, - "step": 30539 - }, - { - "epoch": 15.926988265971318, - "grad_norm": 1.4510276317596436, - "learning_rate": 6.981909547738695e-05, - "loss": 5.4716, - "step": 30540 - }, - { - "epoch": 15.927509778357235, - "grad_norm": 1.3594450950622559, - "learning_rate": 6.981809045226131e-05, - "loss": 5.6783, - "step": 30541 - }, - { - "epoch": 15.928031290743155, - "grad_norm": 1.5075780153274536, - "learning_rate": 6.981708542713569e-05, - "loss": 5.2341, - "step": 30542 - }, - { - "epoch": 15.928552803129074, - "grad_norm": 1.5501152276992798, - "learning_rate": 6.981608040201005e-05, - "loss": 5.5084, - "step": 30543 - }, - { - "epoch": 15.929074315514994, - "grad_norm": 1.492543339729309, - "learning_rate": 6.981507537688442e-05, - "loss": 5.1334, - "step": 30544 - }, - { - "epoch": 15.929595827900913, - "grad_norm": 1.5194264650344849, - "learning_rate": 6.98140703517588e-05, - "loss": 4.8223, - "step": 30545 - }, - { - "epoch": 15.930117340286833, - "grad_norm": 1.5439493656158447, - "learning_rate": 6.981306532663316e-05, - "loss": 5.0145, - "step": 30546 - }, - { - "epoch": 15.93063885267275, - "grad_norm": 1.459333896636963, - "learning_rate": 6.981206030150754e-05, - "loss": 5.3605, - "step": 30547 - }, - { - "epoch": 15.93116036505867, - "grad_norm": 1.473097324371338, - "learning_rate": 6.981105527638191e-05, - "loss": 5.4467, - "step": 30548 - }, - { - "epoch": 15.93168187744459, - "grad_norm": 1.5625369548797607, - "learning_rate": 6.981005025125629e-05, - "loss": 5.2001, - "step": 30549 - }, - { - "epoch": 15.932203389830509, - "grad_norm": 1.3918023109436035, - "learning_rate": 6.980904522613066e-05, - "loss": 5.6362, - "step": 30550 - }, - { - "epoch": 15.932724902216428, - "grad_norm": 1.4947805404663086, - "learning_rate": 6.980804020100503e-05, - "loss": 4.9681, - "step": 30551 - }, - { - "epoch": 15.933246414602348, - "grad_norm": 1.4852923154830933, - "learning_rate": 6.98070351758794e-05, - "loss": 5.1923, - "step": 30552 - }, - { - "epoch": 15.933767926988265, - "grad_norm": 1.5104726552963257, - "learning_rate": 6.980603015075378e-05, - "loss": 5.5183, - "step": 30553 - }, - { - "epoch": 15.934289439374185, - "grad_norm": 1.4657177925109863, - "learning_rate": 6.980502512562814e-05, - "loss": 5.6832, - "step": 30554 - }, - { - "epoch": 15.934810951760104, - "grad_norm": 1.5006704330444336, - "learning_rate": 6.980402010050252e-05, - "loss": 5.4792, - "step": 30555 - }, - { - "epoch": 15.935332464146024, - "grad_norm": 1.343180775642395, - "learning_rate": 6.980301507537688e-05, - "loss": 5.66, - "step": 30556 - }, - { - "epoch": 15.935853976531943, - "grad_norm": 1.483411431312561, - "learning_rate": 6.980201005025126e-05, - "loss": 5.3568, - "step": 30557 - }, - { - "epoch": 15.936375488917863, - "grad_norm": 1.5658529996871948, - "learning_rate": 6.980100502512564e-05, - "loss": 4.793, - "step": 30558 - }, - { - "epoch": 15.93689700130378, - "grad_norm": 1.6157095432281494, - "learning_rate": 6.98e-05, - "loss": 5.1744, - "step": 30559 - }, - { - "epoch": 15.9374185136897, - "grad_norm": 1.421636700630188, - "learning_rate": 6.979899497487438e-05, - "loss": 5.5081, - "step": 30560 - }, - { - "epoch": 15.93794002607562, - "grad_norm": 1.4810876846313477, - "learning_rate": 6.979798994974874e-05, - "loss": 5.4907, - "step": 30561 - }, - { - "epoch": 15.938461538461539, - "grad_norm": 1.4467607736587524, - "learning_rate": 6.979698492462312e-05, - "loss": 5.2786, - "step": 30562 - }, - { - "epoch": 15.938983050847458, - "grad_norm": 1.5469354391098022, - "learning_rate": 6.979597989949749e-05, - "loss": 5.5457, - "step": 30563 - }, - { - "epoch": 15.939504563233378, - "grad_norm": 1.6746810674667358, - "learning_rate": 6.979497487437186e-05, - "loss": 5.2981, - "step": 30564 - }, - { - "epoch": 15.940026075619295, - "grad_norm": 1.4682585000991821, - "learning_rate": 6.979396984924623e-05, - "loss": 5.2259, - "step": 30565 - }, - { - "epoch": 15.940547588005215, - "grad_norm": 1.5570967197418213, - "learning_rate": 6.97929648241206e-05, - "loss": 5.1437, - "step": 30566 - }, - { - "epoch": 15.941069100391134, - "grad_norm": 1.7059297561645508, - "learning_rate": 6.979195979899497e-05, - "loss": 5.1725, - "step": 30567 - }, - { - "epoch": 15.941590612777054, - "grad_norm": 1.4981975555419922, - "learning_rate": 6.979095477386935e-05, - "loss": 5.6455, - "step": 30568 - }, - { - "epoch": 15.942112125162973, - "grad_norm": 1.5595357418060303, - "learning_rate": 6.978994974874373e-05, - "loss": 5.3786, - "step": 30569 - }, - { - "epoch": 15.94263363754889, - "grad_norm": 1.6488229036331177, - "learning_rate": 6.97889447236181e-05, - "loss": 5.5236, - "step": 30570 - }, - { - "epoch": 15.94315514993481, - "grad_norm": 1.6151862144470215, - "learning_rate": 6.978793969849247e-05, - "loss": 5.1955, - "step": 30571 - }, - { - "epoch": 15.94367666232073, - "grad_norm": 1.4873361587524414, - "learning_rate": 6.978693467336683e-05, - "loss": 5.311, - "step": 30572 - }, - { - "epoch": 15.94419817470665, - "grad_norm": 1.5375458002090454, - "learning_rate": 6.978592964824121e-05, - "loss": 5.1063, - "step": 30573 - }, - { - "epoch": 15.944719687092569, - "grad_norm": 1.500336766242981, - "learning_rate": 6.978492462311558e-05, - "loss": 5.6715, - "step": 30574 - }, - { - "epoch": 15.945241199478488, - "grad_norm": 1.517096996307373, - "learning_rate": 6.978391959798995e-05, - "loss": 4.8429, - "step": 30575 - }, - { - "epoch": 15.945762711864408, - "grad_norm": 1.4847476482391357, - "learning_rate": 6.978291457286432e-05, - "loss": 5.0792, - "step": 30576 - }, - { - "epoch": 15.946284224250325, - "grad_norm": 1.334309458732605, - "learning_rate": 6.97819095477387e-05, - "loss": 5.4444, - "step": 30577 - }, - { - "epoch": 15.946805736636245, - "grad_norm": 1.3794628381729126, - "learning_rate": 6.978090452261307e-05, - "loss": 5.1903, - "step": 30578 - }, - { - "epoch": 15.947327249022164, - "grad_norm": 1.4924976825714111, - "learning_rate": 6.977989949748745e-05, - "loss": 5.2437, - "step": 30579 - }, - { - "epoch": 15.947848761408084, - "grad_norm": 1.3449240922927856, - "learning_rate": 6.977889447236181e-05, - "loss": 5.5655, - "step": 30580 - }, - { - "epoch": 15.948370273794003, - "grad_norm": 1.4085640907287598, - "learning_rate": 6.977788944723619e-05, - "loss": 5.8722, - "step": 30581 - }, - { - "epoch": 15.94889178617992, - "grad_norm": 1.4542884826660156, - "learning_rate": 6.977688442211056e-05, - "loss": 4.9206, - "step": 30582 - }, - { - "epoch": 15.94941329856584, - "grad_norm": 1.5679001808166504, - "learning_rate": 6.977587939698493e-05, - "loss": 5.366, - "step": 30583 - }, - { - "epoch": 15.94993481095176, - "grad_norm": 1.4923242330551147, - "learning_rate": 6.97748743718593e-05, - "loss": 5.1141, - "step": 30584 - }, - { - "epoch": 15.95045632333768, - "grad_norm": 1.4613264799118042, - "learning_rate": 6.977386934673366e-05, - "loss": 5.0599, - "step": 30585 - }, - { - "epoch": 15.950977835723599, - "grad_norm": 1.4012250900268555, - "learning_rate": 6.977286432160804e-05, - "loss": 4.8523, - "step": 30586 - }, - { - "epoch": 15.951499348109518, - "grad_norm": 1.436112880706787, - "learning_rate": 6.97718592964824e-05, - "loss": 5.3189, - "step": 30587 - }, - { - "epoch": 15.952020860495438, - "grad_norm": 1.4335441589355469, - "learning_rate": 6.977085427135678e-05, - "loss": 5.7938, - "step": 30588 - }, - { - "epoch": 15.952542372881355, - "grad_norm": 1.5905483961105347, - "learning_rate": 6.976984924623116e-05, - "loss": 5.2278, - "step": 30589 - }, - { - "epoch": 15.953063885267275, - "grad_norm": 1.5670089721679688, - "learning_rate": 6.976884422110554e-05, - "loss": 5.2917, - "step": 30590 - }, - { - "epoch": 15.953585397653194, - "grad_norm": 1.4105052947998047, - "learning_rate": 6.97678391959799e-05, - "loss": 5.4735, - "step": 30591 - }, - { - "epoch": 15.954106910039114, - "grad_norm": 1.4165291786193848, - "learning_rate": 6.976683417085428e-05, - "loss": 5.76, - "step": 30592 - }, - { - "epoch": 15.954628422425033, - "grad_norm": 1.4285995960235596, - "learning_rate": 6.976582914572865e-05, - "loss": 5.6249, - "step": 30593 - }, - { - "epoch": 15.955149934810951, - "grad_norm": 1.5160346031188965, - "learning_rate": 6.976482412060302e-05, - "loss": 5.5453, - "step": 30594 - }, - { - "epoch": 15.95567144719687, - "grad_norm": 1.464688777923584, - "learning_rate": 6.976381909547739e-05, - "loss": 5.4471, - "step": 30595 - }, - { - "epoch": 15.95619295958279, - "grad_norm": 1.4814977645874023, - "learning_rate": 6.976281407035177e-05, - "loss": 5.2563, - "step": 30596 - }, - { - "epoch": 15.95671447196871, - "grad_norm": 1.6632636785507202, - "learning_rate": 6.976180904522613e-05, - "loss": 5.1775, - "step": 30597 - }, - { - "epoch": 15.957235984354629, - "grad_norm": 1.4240761995315552, - "learning_rate": 6.976080402010051e-05, - "loss": 5.2918, - "step": 30598 - }, - { - "epoch": 15.957757496740548, - "grad_norm": 1.5652005672454834, - "learning_rate": 6.975979899497489e-05, - "loss": 5.1628, - "step": 30599 - }, - { - "epoch": 15.958279009126468, - "grad_norm": 1.4978325366973877, - "learning_rate": 6.975879396984925e-05, - "loss": 5.5343, - "step": 30600 - }, - { - "epoch": 15.958800521512385, - "grad_norm": 1.4447420835494995, - "learning_rate": 6.975778894472363e-05, - "loss": 5.5724, - "step": 30601 - }, - { - "epoch": 15.959322033898305, - "grad_norm": 1.3589071035385132, - "learning_rate": 6.975678391959799e-05, - "loss": 5.7952, - "step": 30602 - }, - { - "epoch": 15.959843546284224, - "grad_norm": 1.54403817653656, - "learning_rate": 6.975577889447237e-05, - "loss": 4.9543, - "step": 30603 - }, - { - "epoch": 15.960365058670144, - "grad_norm": 1.4937928915023804, - "learning_rate": 6.975477386934673e-05, - "loss": 5.4136, - "step": 30604 - }, - { - "epoch": 15.960886571056063, - "grad_norm": 1.552374005317688, - "learning_rate": 6.975376884422111e-05, - "loss": 5.4055, - "step": 30605 - }, - { - "epoch": 15.961408083441981, - "grad_norm": 1.551119089126587, - "learning_rate": 6.975276381909548e-05, - "loss": 5.3265, - "step": 30606 - }, - { - "epoch": 15.9619295958279, - "grad_norm": 1.3994163274765015, - "learning_rate": 6.975175879396985e-05, - "loss": 5.5571, - "step": 30607 - }, - { - "epoch": 15.96245110821382, - "grad_norm": 1.338018536567688, - "learning_rate": 6.975075376884422e-05, - "loss": 5.8717, - "step": 30608 - }, - { - "epoch": 15.96297262059974, - "grad_norm": 1.410319447517395, - "learning_rate": 6.97497487437186e-05, - "loss": 5.6026, - "step": 30609 - }, - { - "epoch": 15.963494132985659, - "grad_norm": 1.4660770893096924, - "learning_rate": 6.974874371859297e-05, - "loss": 5.2006, - "step": 30610 - }, - { - "epoch": 15.964015645371578, - "grad_norm": 1.577864646911621, - "learning_rate": 6.974773869346734e-05, - "loss": 5.1585, - "step": 30611 - }, - { - "epoch": 15.964537157757496, - "grad_norm": 1.5334296226501465, - "learning_rate": 6.974673366834172e-05, - "loss": 5.6559, - "step": 30612 - }, - { - "epoch": 15.965058670143415, - "grad_norm": 1.5045878887176514, - "learning_rate": 6.974572864321608e-05, - "loss": 5.7478, - "step": 30613 - }, - { - "epoch": 15.965580182529335, - "grad_norm": 1.3998953104019165, - "learning_rate": 6.974472361809046e-05, - "loss": 5.4755, - "step": 30614 - }, - { - "epoch": 15.966101694915254, - "grad_norm": 1.3904056549072266, - "learning_rate": 6.974371859296482e-05, - "loss": 5.5681, - "step": 30615 - }, - { - "epoch": 15.966623207301174, - "grad_norm": 1.5973035097122192, - "learning_rate": 6.97427135678392e-05, - "loss": 5.4518, - "step": 30616 - }, - { - "epoch": 15.967144719687093, - "grad_norm": 1.4743354320526123, - "learning_rate": 6.974170854271356e-05, - "loss": 5.6529, - "step": 30617 - }, - { - "epoch": 15.967666232073011, - "grad_norm": 1.4561185836791992, - "learning_rate": 6.974070351758794e-05, - "loss": 5.1494, - "step": 30618 - }, - { - "epoch": 15.96818774445893, - "grad_norm": 1.3941494226455688, - "learning_rate": 6.973969849246232e-05, - "loss": 5.4776, - "step": 30619 - }, - { - "epoch": 15.96870925684485, - "grad_norm": 1.3494110107421875, - "learning_rate": 6.97386934673367e-05, - "loss": 5.5545, - "step": 30620 - }, - { - "epoch": 15.96923076923077, - "grad_norm": 1.4165136814117432, - "learning_rate": 6.973768844221106e-05, - "loss": 5.3575, - "step": 30621 - }, - { - "epoch": 15.969752281616689, - "grad_norm": 1.4111732244491577, - "learning_rate": 6.973668341708544e-05, - "loss": 5.2062, - "step": 30622 - }, - { - "epoch": 15.970273794002608, - "grad_norm": 1.4120473861694336, - "learning_rate": 6.97356783919598e-05, - "loss": 5.8121, - "step": 30623 - }, - { - "epoch": 15.970795306388526, - "grad_norm": 1.4634065628051758, - "learning_rate": 6.973467336683417e-05, - "loss": 5.5427, - "step": 30624 - }, - { - "epoch": 15.971316818774445, - "grad_norm": 1.5014352798461914, - "learning_rate": 6.973366834170855e-05, - "loss": 5.0224, - "step": 30625 - }, - { - "epoch": 15.971838331160365, - "grad_norm": 1.4198403358459473, - "learning_rate": 6.973266331658291e-05, - "loss": 5.453, - "step": 30626 - }, - { - "epoch": 15.972359843546284, - "grad_norm": 1.4322142601013184, - "learning_rate": 6.973165829145729e-05, - "loss": 5.3581, - "step": 30627 - }, - { - "epoch": 15.972881355932204, - "grad_norm": 1.65389883518219, - "learning_rate": 6.973065326633165e-05, - "loss": 4.8223, - "step": 30628 - }, - { - "epoch": 15.973402868318123, - "grad_norm": 1.4208296537399292, - "learning_rate": 6.972964824120603e-05, - "loss": 5.3786, - "step": 30629 - }, - { - "epoch": 15.973924380704041, - "grad_norm": 1.503032922744751, - "learning_rate": 6.972864321608041e-05, - "loss": 5.4808, - "step": 30630 - }, - { - "epoch": 15.97444589308996, - "grad_norm": 1.4899228811264038, - "learning_rate": 6.972763819095479e-05, - "loss": 5.3266, - "step": 30631 - }, - { - "epoch": 15.97496740547588, - "grad_norm": 1.5279884338378906, - "learning_rate": 6.972663316582915e-05, - "loss": 5.0554, - "step": 30632 - }, - { - "epoch": 15.9754889178618, - "grad_norm": 1.4246957302093506, - "learning_rate": 6.972562814070353e-05, - "loss": 5.6907, - "step": 30633 - }, - { - "epoch": 15.976010430247719, - "grad_norm": 1.5697423219680786, - "learning_rate": 6.972462311557789e-05, - "loss": 5.1341, - "step": 30634 - }, - { - "epoch": 15.976531942633638, - "grad_norm": 1.4729206562042236, - "learning_rate": 6.972361809045227e-05, - "loss": 5.3915, - "step": 30635 - }, - { - "epoch": 15.977053455019556, - "grad_norm": 1.4184612035751343, - "learning_rate": 6.972261306532663e-05, - "loss": 5.6344, - "step": 30636 - }, - { - "epoch": 15.977574967405475, - "grad_norm": 1.5247238874435425, - "learning_rate": 6.9721608040201e-05, - "loss": 5.3438, - "step": 30637 - }, - { - "epoch": 15.978096479791395, - "grad_norm": 1.5522558689117432, - "learning_rate": 6.972060301507538e-05, - "loss": 5.0635, - "step": 30638 - }, - { - "epoch": 15.978617992177314, - "grad_norm": 1.473021388053894, - "learning_rate": 6.971959798994974e-05, - "loss": 5.6355, - "step": 30639 - }, - { - "epoch": 15.979139504563234, - "grad_norm": 1.4672517776489258, - "learning_rate": 6.971859296482412e-05, - "loss": 5.5308, - "step": 30640 - }, - { - "epoch": 15.979661016949153, - "grad_norm": 1.4990756511688232, - "learning_rate": 6.97175879396985e-05, - "loss": 5.5324, - "step": 30641 - }, - { - "epoch": 15.980182529335071, - "grad_norm": 1.4561917781829834, - "learning_rate": 6.971658291457287e-05, - "loss": 5.1707, - "step": 30642 - }, - { - "epoch": 15.98070404172099, - "grad_norm": 1.4909234046936035, - "learning_rate": 6.971557788944724e-05, - "loss": 5.3533, - "step": 30643 - }, - { - "epoch": 15.98122555410691, - "grad_norm": 1.6253206729888916, - "learning_rate": 6.971457286432162e-05, - "loss": 5.2287, - "step": 30644 - }, - { - "epoch": 15.98174706649283, - "grad_norm": 1.5763137340545654, - "learning_rate": 6.971356783919598e-05, - "loss": 4.8747, - "step": 30645 - }, - { - "epoch": 15.982268578878749, - "grad_norm": 1.439733624458313, - "learning_rate": 6.971256281407036e-05, - "loss": 5.489, - "step": 30646 - }, - { - "epoch": 15.982790091264668, - "grad_norm": 1.405836820602417, - "learning_rate": 6.971155778894472e-05, - "loss": 5.5814, - "step": 30647 - }, - { - "epoch": 15.983311603650586, - "grad_norm": 1.4562348127365112, - "learning_rate": 6.97105527638191e-05, - "loss": 5.2316, - "step": 30648 - }, - { - "epoch": 15.983833116036505, - "grad_norm": 1.5395832061767578, - "learning_rate": 6.970954773869346e-05, - "loss": 5.5234, - "step": 30649 - }, - { - "epoch": 15.984354628422425, - "grad_norm": 1.5050523281097412, - "learning_rate": 6.970854271356784e-05, - "loss": 5.4651, - "step": 30650 - }, - { - "epoch": 15.984876140808344, - "grad_norm": 1.458001732826233, - "learning_rate": 6.970753768844222e-05, - "loss": 5.7403, - "step": 30651 - }, - { - "epoch": 15.985397653194264, - "grad_norm": 1.4809937477111816, - "learning_rate": 6.970653266331658e-05, - "loss": 5.4148, - "step": 30652 - }, - { - "epoch": 15.985919165580183, - "grad_norm": 1.439950704574585, - "learning_rate": 6.970552763819096e-05, - "loss": 5.7742, - "step": 30653 - }, - { - "epoch": 15.986440677966101, - "grad_norm": 1.495163083076477, - "learning_rate": 6.970452261306533e-05, - "loss": 5.4237, - "step": 30654 - }, - { - "epoch": 15.98696219035202, - "grad_norm": 1.4540765285491943, - "learning_rate": 6.97035175879397e-05, - "loss": 5.4744, - "step": 30655 - }, - { - "epoch": 15.98748370273794, - "grad_norm": 1.5739954710006714, - "learning_rate": 6.970251256281407e-05, - "loss": 5.2633, - "step": 30656 - }, - { - "epoch": 15.98800521512386, - "grad_norm": 1.36847722530365, - "learning_rate": 6.970150753768845e-05, - "loss": 5.2676, - "step": 30657 - }, - { - "epoch": 15.988526727509779, - "grad_norm": 1.4379595518112183, - "learning_rate": 6.970050251256281e-05, - "loss": 5.4446, - "step": 30658 - }, - { - "epoch": 15.989048239895698, - "grad_norm": 1.5577547550201416, - "learning_rate": 6.969949748743719e-05, - "loss": 5.3093, - "step": 30659 - }, - { - "epoch": 15.989569752281616, - "grad_norm": 1.4520848989486694, - "learning_rate": 6.969849246231155e-05, - "loss": 5.5338, - "step": 30660 - }, - { - "epoch": 15.990091264667535, - "grad_norm": 1.5237507820129395, - "learning_rate": 6.969748743718593e-05, - "loss": 5.1066, - "step": 30661 - }, - { - "epoch": 15.990612777053455, - "grad_norm": 1.3882689476013184, - "learning_rate": 6.969648241206031e-05, - "loss": 5.5834, - "step": 30662 - }, - { - "epoch": 15.991134289439374, - "grad_norm": 1.4231932163238525, - "learning_rate": 6.969547738693469e-05, - "loss": 5.3751, - "step": 30663 - }, - { - "epoch": 15.991655801825294, - "grad_norm": 1.5476806163787842, - "learning_rate": 6.969447236180905e-05, - "loss": 4.8789, - "step": 30664 - }, - { - "epoch": 15.992177314211212, - "grad_norm": 1.3835195302963257, - "learning_rate": 6.969346733668342e-05, - "loss": 5.6326, - "step": 30665 - }, - { - "epoch": 15.992698826597131, - "grad_norm": 1.4711713790893555, - "learning_rate": 6.969246231155779e-05, - "loss": 5.2101, - "step": 30666 - }, - { - "epoch": 15.99322033898305, - "grad_norm": 1.4689857959747314, - "learning_rate": 6.969145728643216e-05, - "loss": 5.4065, - "step": 30667 - }, - { - "epoch": 15.99374185136897, - "grad_norm": 1.4382920265197754, - "learning_rate": 6.969045226130654e-05, - "loss": 5.516, - "step": 30668 - }, - { - "epoch": 15.99426336375489, - "grad_norm": 1.571627140045166, - "learning_rate": 6.96894472361809e-05, - "loss": 5.024, - "step": 30669 - }, - { - "epoch": 15.994784876140809, - "grad_norm": 1.5832043886184692, - "learning_rate": 6.968844221105528e-05, - "loss": 4.9485, - "step": 30670 - }, - { - "epoch": 15.995306388526728, - "grad_norm": 1.4603594541549683, - "learning_rate": 6.968743718592966e-05, - "loss": 5.4435, - "step": 30671 - }, - { - "epoch": 15.995827900912646, - "grad_norm": 1.5879589319229126, - "learning_rate": 6.968643216080403e-05, - "loss": 4.913, - "step": 30672 - }, - { - "epoch": 15.996349413298566, - "grad_norm": 1.282865047454834, - "learning_rate": 6.96854271356784e-05, - "loss": 5.8756, - "step": 30673 - }, - { - "epoch": 15.996870925684485, - "grad_norm": 1.5468186140060425, - "learning_rate": 6.968442211055278e-05, - "loss": 5.5638, - "step": 30674 - }, - { - "epoch": 15.997392438070404, - "grad_norm": 1.4741789102554321, - "learning_rate": 6.968341708542714e-05, - "loss": 5.8262, - "step": 30675 - }, - { - "epoch": 15.997913950456324, - "grad_norm": 1.3789113759994507, - "learning_rate": 6.968241206030152e-05, - "loss": 5.7342, - "step": 30676 - }, - { - "epoch": 15.998435462842242, - "grad_norm": 1.506566047668457, - "learning_rate": 6.968140703517588e-05, - "loss": 4.7364, - "step": 30677 - }, - { - "epoch": 15.998956975228161, - "grad_norm": 1.5108981132507324, - "learning_rate": 6.968040201005025e-05, - "loss": 5.0951, - "step": 30678 - }, - { - "epoch": 15.99947848761408, - "grad_norm": 1.6002683639526367, - "learning_rate": 6.967939698492462e-05, - "loss": 5.0293, - "step": 30679 - }, - { - "epoch": 16.0, - "grad_norm": 1.6395447254180908, - "learning_rate": 6.967839195979899e-05, - "loss": 5.3493, - "step": 30680 - }, - { - "epoch": 16.00052151238592, - "grad_norm": 1.4383502006530762, - "learning_rate": 6.967738693467337e-05, - "loss": 5.4975, - "step": 30681 - }, - { - "epoch": 16.00104302477184, - "grad_norm": 1.4164187908172607, - "learning_rate": 6.967638190954774e-05, - "loss": 4.9437, - "step": 30682 - }, - { - "epoch": 16.00156453715776, - "grad_norm": 1.4440916776657104, - "learning_rate": 6.967537688442212e-05, - "loss": 5.4525, - "step": 30683 - }, - { - "epoch": 16.002086049543678, - "grad_norm": 1.5494117736816406, - "learning_rate": 6.967437185929649e-05, - "loss": 5.6075, - "step": 30684 - }, - { - "epoch": 16.002607561929597, - "grad_norm": 1.5211825370788574, - "learning_rate": 6.967336683417086e-05, - "loss": 5.3034, - "step": 30685 - }, - { - "epoch": 16.003129074315513, - "grad_norm": 1.4596824645996094, - "learning_rate": 6.967236180904523e-05, - "loss": 5.6188, - "step": 30686 - }, - { - "epoch": 16.003650586701433, - "grad_norm": 1.4196031093597412, - "learning_rate": 6.96713567839196e-05, - "loss": 5.4304, - "step": 30687 - }, - { - "epoch": 16.004172099087352, - "grad_norm": 1.4587527513504028, - "learning_rate": 6.967035175879397e-05, - "loss": 5.4926, - "step": 30688 - }, - { - "epoch": 16.00469361147327, - "grad_norm": 1.62749445438385, - "learning_rate": 6.966934673366835e-05, - "loss": 5.1088, - "step": 30689 - }, - { - "epoch": 16.00521512385919, - "grad_norm": 1.4530503749847412, - "learning_rate": 6.966834170854271e-05, - "loss": 5.4619, - "step": 30690 - }, - { - "epoch": 16.00573663624511, - "grad_norm": 1.6244885921478271, - "learning_rate": 6.966733668341709e-05, - "loss": 4.9491, - "step": 30691 - }, - { - "epoch": 16.00625814863103, - "grad_norm": 1.4052766561508179, - "learning_rate": 6.966633165829147e-05, - "loss": 5.3446, - "step": 30692 - }, - { - "epoch": 16.00677966101695, - "grad_norm": 1.492931842803955, - "learning_rate": 6.966532663316583e-05, - "loss": 5.0921, - "step": 30693 - }, - { - "epoch": 16.00730117340287, - "grad_norm": 1.4445297718048096, - "learning_rate": 6.966432160804021e-05, - "loss": 4.983, - "step": 30694 - }, - { - "epoch": 16.00782268578879, - "grad_norm": 1.5709081888198853, - "learning_rate": 6.966331658291457e-05, - "loss": 5.2576, - "step": 30695 - }, - { - "epoch": 16.008344198174708, - "grad_norm": 1.4803013801574707, - "learning_rate": 6.966231155778895e-05, - "loss": 5.4899, - "step": 30696 - }, - { - "epoch": 16.008865710560627, - "grad_norm": 1.5680888891220093, - "learning_rate": 6.966130653266332e-05, - "loss": 5.2439, - "step": 30697 - }, - { - "epoch": 16.009387222946543, - "grad_norm": 1.5051196813583374, - "learning_rate": 6.96603015075377e-05, - "loss": 5.573, - "step": 30698 - }, - { - "epoch": 16.009908735332463, - "grad_norm": 1.5143128633499146, - "learning_rate": 6.965929648241206e-05, - "loss": 5.3146, - "step": 30699 - }, - { - "epoch": 16.010430247718382, - "grad_norm": 1.5312607288360596, - "learning_rate": 6.965829145728644e-05, - "loss": 4.7296, - "step": 30700 - }, - { - "epoch": 16.0109517601043, - "grad_norm": 1.3519586324691772, - "learning_rate": 6.96572864321608e-05, - "loss": 5.3393, - "step": 30701 - }, - { - "epoch": 16.01147327249022, - "grad_norm": 1.4549139738082886, - "learning_rate": 6.965628140703518e-05, - "loss": 5.4058, - "step": 30702 - }, - { - "epoch": 16.01199478487614, - "grad_norm": 1.598228931427002, - "learning_rate": 6.965527638190956e-05, - "loss": 5.465, - "step": 30703 - }, - { - "epoch": 16.01251629726206, - "grad_norm": 1.4141230583190918, - "learning_rate": 6.965427135678392e-05, - "loss": 5.3034, - "step": 30704 - }, - { - "epoch": 16.01303780964798, - "grad_norm": 1.5903054475784302, - "learning_rate": 6.96532663316583e-05, - "loss": 4.9786, - "step": 30705 - }, - { - "epoch": 16.0135593220339, - "grad_norm": 1.4474236965179443, - "learning_rate": 6.965226130653266e-05, - "loss": 5.3327, - "step": 30706 - }, - { - "epoch": 16.01408083441982, - "grad_norm": 1.463449478149414, - "learning_rate": 6.965125628140704e-05, - "loss": 5.4723, - "step": 30707 - }, - { - "epoch": 16.014602346805738, - "grad_norm": 1.4115749597549438, - "learning_rate": 6.96502512562814e-05, - "loss": 5.6797, - "step": 30708 - }, - { - "epoch": 16.015123859191657, - "grad_norm": 1.4481877088546753, - "learning_rate": 6.964924623115578e-05, - "loss": 5.3943, - "step": 30709 - }, - { - "epoch": 16.015645371577573, - "grad_norm": 1.4080175161361694, - "learning_rate": 6.964824120603015e-05, - "loss": 5.3358, - "step": 30710 - }, - { - "epoch": 16.016166883963493, - "grad_norm": 1.3580437898635864, - "learning_rate": 6.964723618090452e-05, - "loss": 5.5235, - "step": 30711 - }, - { - "epoch": 16.016688396349412, - "grad_norm": 1.396807312965393, - "learning_rate": 6.96462311557789e-05, - "loss": 5.8003, - "step": 30712 - }, - { - "epoch": 16.01720990873533, - "grad_norm": 1.4874883890151978, - "learning_rate": 6.964522613065328e-05, - "loss": 5.3258, - "step": 30713 - }, - { - "epoch": 16.01773142112125, - "grad_norm": 1.4225165843963623, - "learning_rate": 6.964422110552764e-05, - "loss": 5.4203, - "step": 30714 - }, - { - "epoch": 16.01825293350717, - "grad_norm": 1.3836719989776611, - "learning_rate": 6.964321608040202e-05, - "loss": 5.505, - "step": 30715 - }, - { - "epoch": 16.01877444589309, - "grad_norm": 1.4745255708694458, - "learning_rate": 6.964221105527639e-05, - "loss": 5.4366, - "step": 30716 - }, - { - "epoch": 16.01929595827901, - "grad_norm": 1.3978497982025146, - "learning_rate": 6.964120603015075e-05, - "loss": 5.6013, - "step": 30717 - }, - { - "epoch": 16.01981747066493, - "grad_norm": 1.6980338096618652, - "learning_rate": 6.964020100502513e-05, - "loss": 5.1502, - "step": 30718 - }, - { - "epoch": 16.02033898305085, - "grad_norm": 1.4987436532974243, - "learning_rate": 6.963919597989949e-05, - "loss": 5.1799, - "step": 30719 - }, - { - "epoch": 16.020860495436768, - "grad_norm": 1.480669379234314, - "learning_rate": 6.963819095477387e-05, - "loss": 5.1352, - "step": 30720 - }, - { - "epoch": 16.021382007822687, - "grad_norm": 1.5366579294204712, - "learning_rate": 6.963718592964823e-05, - "loss": 4.5396, - "step": 30721 - }, - { - "epoch": 16.021903520208603, - "grad_norm": 1.5701473951339722, - "learning_rate": 6.963618090452261e-05, - "loss": 4.9298, - "step": 30722 - }, - { - "epoch": 16.022425032594523, - "grad_norm": 1.6364071369171143, - "learning_rate": 6.963517587939699e-05, - "loss": 5.2456, - "step": 30723 - }, - { - "epoch": 16.022946544980442, - "grad_norm": 1.3644506931304932, - "learning_rate": 6.963417085427137e-05, - "loss": 5.0971, - "step": 30724 - }, - { - "epoch": 16.02346805736636, - "grad_norm": 1.473729133605957, - "learning_rate": 6.963316582914573e-05, - "loss": 5.2716, - "step": 30725 - }, - { - "epoch": 16.02398956975228, - "grad_norm": 1.4491453170776367, - "learning_rate": 6.963216080402011e-05, - "loss": 5.7795, - "step": 30726 - }, - { - "epoch": 16.0245110821382, - "grad_norm": 1.4092381000518799, - "learning_rate": 6.963115577889447e-05, - "loss": 5.6047, - "step": 30727 - }, - { - "epoch": 16.02503259452412, - "grad_norm": 1.6308404207229614, - "learning_rate": 6.963015075376885e-05, - "loss": 4.6785, - "step": 30728 - }, - { - "epoch": 16.02555410691004, - "grad_norm": 1.5323253870010376, - "learning_rate": 6.962914572864322e-05, - "loss": 5.3031, - "step": 30729 - }, - { - "epoch": 16.02607561929596, - "grad_norm": 1.4357200860977173, - "learning_rate": 6.962814070351758e-05, - "loss": 5.362, - "step": 30730 - }, - { - "epoch": 16.02659713168188, - "grad_norm": 1.5199280977249146, - "learning_rate": 6.962713567839196e-05, - "loss": 5.3031, - "step": 30731 - }, - { - "epoch": 16.027118644067798, - "grad_norm": 1.4421982765197754, - "learning_rate": 6.962613065326634e-05, - "loss": 5.4466, - "step": 30732 - }, - { - "epoch": 16.027640156453717, - "grad_norm": 1.4634263515472412, - "learning_rate": 6.962512562814071e-05, - "loss": 4.8971, - "step": 30733 - }, - { - "epoch": 16.028161668839633, - "grad_norm": 1.4031203985214233, - "learning_rate": 6.962412060301508e-05, - "loss": 5.439, - "step": 30734 - }, - { - "epoch": 16.028683181225553, - "grad_norm": 1.5594736337661743, - "learning_rate": 6.962311557788946e-05, - "loss": 4.6326, - "step": 30735 - }, - { - "epoch": 16.029204693611472, - "grad_norm": 1.5561511516571045, - "learning_rate": 6.962211055276382e-05, - "loss": 5.2251, - "step": 30736 - }, - { - "epoch": 16.02972620599739, - "grad_norm": 1.508277177810669, - "learning_rate": 6.96211055276382e-05, - "loss": 5.2005, - "step": 30737 - }, - { - "epoch": 16.03024771838331, - "grad_norm": 1.4919604063034058, - "learning_rate": 6.962010050251256e-05, - "loss": 5.4909, - "step": 30738 - }, - { - "epoch": 16.03076923076923, - "grad_norm": 1.524104356765747, - "learning_rate": 6.961909547738694e-05, - "loss": 5.2384, - "step": 30739 - }, - { - "epoch": 16.03129074315515, - "grad_norm": 1.5942885875701904, - "learning_rate": 6.96180904522613e-05, - "loss": 4.9453, - "step": 30740 - }, - { - "epoch": 16.03181225554107, - "grad_norm": 1.545280933380127, - "learning_rate": 6.961708542713568e-05, - "loss": 5.125, - "step": 30741 - }, - { - "epoch": 16.03233376792699, - "grad_norm": 1.372029185295105, - "learning_rate": 6.961608040201005e-05, - "loss": 5.4816, - "step": 30742 - }, - { - "epoch": 16.03285528031291, - "grad_norm": 1.3651143312454224, - "learning_rate": 6.961507537688443e-05, - "loss": 5.7728, - "step": 30743 - }, - { - "epoch": 16.033376792698828, - "grad_norm": 1.4839656352996826, - "learning_rate": 6.96140703517588e-05, - "loss": 5.4413, - "step": 30744 - }, - { - "epoch": 16.033898305084747, - "grad_norm": 1.4083585739135742, - "learning_rate": 6.961306532663317e-05, - "loss": 5.0156, - "step": 30745 - }, - { - "epoch": 16.034419817470663, - "grad_norm": 1.410679817199707, - "learning_rate": 6.961206030150755e-05, - "loss": 5.2947, - "step": 30746 - }, - { - "epoch": 16.034941329856583, - "grad_norm": 1.5891562700271606, - "learning_rate": 6.961105527638191e-05, - "loss": 5.1398, - "step": 30747 - }, - { - "epoch": 16.035462842242502, - "grad_norm": 1.3878774642944336, - "learning_rate": 6.961005025125629e-05, - "loss": 5.1432, - "step": 30748 - }, - { - "epoch": 16.03598435462842, - "grad_norm": 1.3956979513168335, - "learning_rate": 6.960904522613065e-05, - "loss": 5.6003, - "step": 30749 - }, - { - "epoch": 16.03650586701434, - "grad_norm": 1.4173128604888916, - "learning_rate": 6.960804020100503e-05, - "loss": 5.6588, - "step": 30750 - }, - { - "epoch": 16.03702737940026, - "grad_norm": 1.5231291055679321, - "learning_rate": 6.96070351758794e-05, - "loss": 5.4948, - "step": 30751 - }, - { - "epoch": 16.03754889178618, - "grad_norm": 1.5192276239395142, - "learning_rate": 6.960603015075377e-05, - "loss": 5.6147, - "step": 30752 - }, - { - "epoch": 16.0380704041721, - "grad_norm": 1.5405147075653076, - "learning_rate": 6.960502512562815e-05, - "loss": 5.1765, - "step": 30753 - }, - { - "epoch": 16.03859191655802, - "grad_norm": 1.430746078491211, - "learning_rate": 6.960402010050253e-05, - "loss": 5.5227, - "step": 30754 - }, - { - "epoch": 16.03911342894394, - "grad_norm": 1.572342872619629, - "learning_rate": 6.960301507537689e-05, - "loss": 5.3694, - "step": 30755 - }, - { - "epoch": 16.039634941329858, - "grad_norm": 1.5026111602783203, - "learning_rate": 6.960201005025127e-05, - "loss": 5.3678, - "step": 30756 - }, - { - "epoch": 16.040156453715777, - "grad_norm": 1.5530282258987427, - "learning_rate": 6.960100502512563e-05, - "loss": 4.7465, - "step": 30757 - }, - { - "epoch": 16.040677966101693, - "grad_norm": 1.5196616649627686, - "learning_rate": 6.96e-05, - "loss": 4.7238, - "step": 30758 - }, - { - "epoch": 16.041199478487613, - "grad_norm": 1.5245023965835571, - "learning_rate": 6.959899497487438e-05, - "loss": 5.3098, - "step": 30759 - }, - { - "epoch": 16.041720990873532, - "grad_norm": 1.5207396745681763, - "learning_rate": 6.959798994974874e-05, - "loss": 5.3664, - "step": 30760 - }, - { - "epoch": 16.042242503259452, - "grad_norm": 1.515708565711975, - "learning_rate": 6.959698492462312e-05, - "loss": 5.1696, - "step": 30761 - }, - { - "epoch": 16.04276401564537, - "grad_norm": 1.5146576166152954, - "learning_rate": 6.959597989949748e-05, - "loss": 5.5416, - "step": 30762 - }, - { - "epoch": 16.04328552803129, - "grad_norm": 1.4251456260681152, - "learning_rate": 6.959497487437186e-05, - "loss": 5.5477, - "step": 30763 - }, - { - "epoch": 16.04380704041721, - "grad_norm": 1.4102466106414795, - "learning_rate": 6.959396984924624e-05, - "loss": 5.6486, - "step": 30764 - }, - { - "epoch": 16.04432855280313, - "grad_norm": 1.579641580581665, - "learning_rate": 6.959296482412062e-05, - "loss": 5.125, - "step": 30765 - }, - { - "epoch": 16.04485006518905, - "grad_norm": 1.4488950967788696, - "learning_rate": 6.959195979899498e-05, - "loss": 5.2877, - "step": 30766 - }, - { - "epoch": 16.04537157757497, - "grad_norm": 1.6173425912857056, - "learning_rate": 6.959095477386936e-05, - "loss": 4.7283, - "step": 30767 - }, - { - "epoch": 16.045893089960888, - "grad_norm": 1.4309731721878052, - "learning_rate": 6.958994974874372e-05, - "loss": 5.3824, - "step": 30768 - }, - { - "epoch": 16.046414602346807, - "grad_norm": 1.4833085536956787, - "learning_rate": 6.95889447236181e-05, - "loss": 4.993, - "step": 30769 - }, - { - "epoch": 16.046936114732723, - "grad_norm": 1.6186977624893188, - "learning_rate": 6.958793969849246e-05, - "loss": 5.1882, - "step": 30770 - }, - { - "epoch": 16.047457627118643, - "grad_norm": 1.495772123336792, - "learning_rate": 6.958693467336683e-05, - "loss": 5.6713, - "step": 30771 - }, - { - "epoch": 16.047979139504562, - "grad_norm": 1.4899786710739136, - "learning_rate": 6.95859296482412e-05, - "loss": 4.9879, - "step": 30772 - }, - { - "epoch": 16.048500651890482, - "grad_norm": 1.4436489343643188, - "learning_rate": 6.958492462311558e-05, - "loss": 5.2566, - "step": 30773 - }, - { - "epoch": 16.0490221642764, - "grad_norm": 1.536820411682129, - "learning_rate": 6.958391959798996e-05, - "loss": 5.066, - "step": 30774 - }, - { - "epoch": 16.04954367666232, - "grad_norm": 1.6025874614715576, - "learning_rate": 6.958291457286433e-05, - "loss": 5.4341, - "step": 30775 - }, - { - "epoch": 16.05006518904824, - "grad_norm": 1.5532057285308838, - "learning_rate": 6.95819095477387e-05, - "loss": 5.2187, - "step": 30776 - }, - { - "epoch": 16.05058670143416, - "grad_norm": 1.387904405593872, - "learning_rate": 6.958090452261307e-05, - "loss": 5.5832, - "step": 30777 - }, - { - "epoch": 16.05110821382008, - "grad_norm": 1.5432369709014893, - "learning_rate": 6.957989949748745e-05, - "loss": 5.0217, - "step": 30778 - }, - { - "epoch": 16.051629726206, - "grad_norm": 1.4339786767959595, - "learning_rate": 6.957889447236181e-05, - "loss": 5.3005, - "step": 30779 - }, - { - "epoch": 16.052151238591918, - "grad_norm": 1.4091196060180664, - "learning_rate": 6.957788944723619e-05, - "loss": 5.377, - "step": 30780 - }, - { - "epoch": 16.052672750977834, - "grad_norm": 1.4629555940628052, - "learning_rate": 6.957688442211055e-05, - "loss": 5.2593, - "step": 30781 - }, - { - "epoch": 16.053194263363753, - "grad_norm": 1.4373677968978882, - "learning_rate": 6.957587939698493e-05, - "loss": 5.5236, - "step": 30782 - }, - { - "epoch": 16.053715775749673, - "grad_norm": 1.4925565719604492, - "learning_rate": 6.95748743718593e-05, - "loss": 5.4025, - "step": 30783 - }, - { - "epoch": 16.054237288135592, - "grad_norm": 1.4511865377426147, - "learning_rate": 6.957386934673367e-05, - "loss": 5.4201, - "step": 30784 - }, - { - "epoch": 16.054758800521512, - "grad_norm": 1.5480666160583496, - "learning_rate": 6.957286432160805e-05, - "loss": 4.7855, - "step": 30785 - }, - { - "epoch": 16.05528031290743, - "grad_norm": 1.5205507278442383, - "learning_rate": 6.957185929648241e-05, - "loss": 5.0553, - "step": 30786 - }, - { - "epoch": 16.05580182529335, - "grad_norm": 1.4433856010437012, - "learning_rate": 6.957085427135679e-05, - "loss": 5.3648, - "step": 30787 - }, - { - "epoch": 16.05632333767927, - "grad_norm": 1.5701196193695068, - "learning_rate": 6.956984924623116e-05, - "loss": 4.8602, - "step": 30788 - }, - { - "epoch": 16.05684485006519, - "grad_norm": 1.552069067955017, - "learning_rate": 6.956884422110553e-05, - "loss": 5.0333, - "step": 30789 - }, - { - "epoch": 16.05736636245111, - "grad_norm": 1.454399585723877, - "learning_rate": 6.95678391959799e-05, - "loss": 5.5901, - "step": 30790 - }, - { - "epoch": 16.05788787483703, - "grad_norm": 1.445487380027771, - "learning_rate": 6.956683417085428e-05, - "loss": 5.3426, - "step": 30791 - }, - { - "epoch": 16.058409387222948, - "grad_norm": 1.4732953310012817, - "learning_rate": 6.956582914572864e-05, - "loss": 4.86, - "step": 30792 - }, - { - "epoch": 16.058930899608864, - "grad_norm": 1.4283004999160767, - "learning_rate": 6.956482412060302e-05, - "loss": 5.4564, - "step": 30793 - }, - { - "epoch": 16.059452411994783, - "grad_norm": 1.4534945487976074, - "learning_rate": 6.956381909547738e-05, - "loss": 5.4108, - "step": 30794 - }, - { - "epoch": 16.059973924380703, - "grad_norm": 1.4278135299682617, - "learning_rate": 6.956281407035176e-05, - "loss": 5.595, - "step": 30795 - }, - { - "epoch": 16.060495436766622, - "grad_norm": 1.446648359298706, - "learning_rate": 6.956180904522614e-05, - "loss": 5.2716, - "step": 30796 - }, - { - "epoch": 16.061016949152542, - "grad_norm": 1.4647767543792725, - "learning_rate": 6.95608040201005e-05, - "loss": 5.1787, - "step": 30797 - }, - { - "epoch": 16.06153846153846, - "grad_norm": 1.4536501169204712, - "learning_rate": 6.955979899497488e-05, - "loss": 5.4565, - "step": 30798 - }, - { - "epoch": 16.06205997392438, - "grad_norm": 1.533584713935852, - "learning_rate": 6.955879396984924e-05, - "loss": 5.6364, - "step": 30799 - }, - { - "epoch": 16.0625814863103, - "grad_norm": 1.474989414215088, - "learning_rate": 6.955778894472362e-05, - "loss": 5.1992, - "step": 30800 - }, - { - "epoch": 16.06310299869622, - "grad_norm": 1.4761842489242554, - "learning_rate": 6.955678391959799e-05, - "loss": 5.5837, - "step": 30801 - }, - { - "epoch": 16.06362451108214, - "grad_norm": 1.454040765762329, - "learning_rate": 6.955577889447236e-05, - "loss": 5.4461, - "step": 30802 - }, - { - "epoch": 16.06414602346806, - "grad_norm": 1.5145896673202515, - "learning_rate": 6.955477386934673e-05, - "loss": 4.9639, - "step": 30803 - }, - { - "epoch": 16.064667535853978, - "grad_norm": 1.465885043144226, - "learning_rate": 6.95537688442211e-05, - "loss": 5.1112, - "step": 30804 - }, - { - "epoch": 16.065189048239894, - "grad_norm": 1.4393587112426758, - "learning_rate": 6.955276381909548e-05, - "loss": 5.2282, - "step": 30805 - }, - { - "epoch": 16.065710560625813, - "grad_norm": 1.4786702394485474, - "learning_rate": 6.955175879396986e-05, - "loss": 5.4531, - "step": 30806 - }, - { - "epoch": 16.066232073011733, - "grad_norm": 1.3969494104385376, - "learning_rate": 6.955075376884423e-05, - "loss": 5.4046, - "step": 30807 - }, - { - "epoch": 16.066753585397652, - "grad_norm": 1.5604004859924316, - "learning_rate": 6.95497487437186e-05, - "loss": 4.4956, - "step": 30808 - }, - { - "epoch": 16.067275097783572, - "grad_norm": 1.6156693696975708, - "learning_rate": 6.954874371859297e-05, - "loss": 4.9775, - "step": 30809 - }, - { - "epoch": 16.06779661016949, - "grad_norm": 1.5552117824554443, - "learning_rate": 6.954773869346733e-05, - "loss": 5.5124, - "step": 30810 - }, - { - "epoch": 16.06831812255541, - "grad_norm": 1.5377196073532104, - "learning_rate": 6.954673366834171e-05, - "loss": 5.3951, - "step": 30811 - }, - { - "epoch": 16.06883963494133, - "grad_norm": 1.4350299835205078, - "learning_rate": 6.954572864321608e-05, - "loss": 5.7282, - "step": 30812 - }, - { - "epoch": 16.06936114732725, - "grad_norm": 1.5535482168197632, - "learning_rate": 6.954472361809045e-05, - "loss": 5.2758, - "step": 30813 - }, - { - "epoch": 16.06988265971317, - "grad_norm": 1.4437572956085205, - "learning_rate": 6.954371859296482e-05, - "loss": 5.4664, - "step": 30814 - }, - { - "epoch": 16.07040417209909, - "grad_norm": 1.4444600343704224, - "learning_rate": 6.95427135678392e-05, - "loss": 5.6582, - "step": 30815 - }, - { - "epoch": 16.070925684485008, - "grad_norm": 1.4941636323928833, - "learning_rate": 6.954170854271357e-05, - "loss": 5.4987, - "step": 30816 - }, - { - "epoch": 16.071447196870924, - "grad_norm": 1.460801601409912, - "learning_rate": 6.954070351758795e-05, - "loss": 5.7744, - "step": 30817 - }, - { - "epoch": 16.071968709256844, - "grad_norm": 1.4305002689361572, - "learning_rate": 6.953969849246232e-05, - "loss": 5.193, - "step": 30818 - }, - { - "epoch": 16.072490221642763, - "grad_norm": 1.4666286706924438, - "learning_rate": 6.953869346733669e-05, - "loss": 5.299, - "step": 30819 - }, - { - "epoch": 16.073011734028682, - "grad_norm": 1.448548674583435, - "learning_rate": 6.953768844221106e-05, - "loss": 5.6526, - "step": 30820 - }, - { - "epoch": 16.073533246414602, - "grad_norm": 1.4030627012252808, - "learning_rate": 6.953668341708543e-05, - "loss": 5.3281, - "step": 30821 - }, - { - "epoch": 16.07405475880052, - "grad_norm": 1.3624361753463745, - "learning_rate": 6.95356783919598e-05, - "loss": 5.1978, - "step": 30822 - }, - { - "epoch": 16.07457627118644, - "grad_norm": 1.4505882263183594, - "learning_rate": 6.953467336683416e-05, - "loss": 5.3194, - "step": 30823 - }, - { - "epoch": 16.07509778357236, - "grad_norm": 1.4724020957946777, - "learning_rate": 6.953366834170854e-05, - "loss": 5.1698, - "step": 30824 - }, - { - "epoch": 16.07561929595828, - "grad_norm": 1.3356804847717285, - "learning_rate": 6.953266331658292e-05, - "loss": 4.446, - "step": 30825 - }, - { - "epoch": 16.0761408083442, - "grad_norm": 1.4150460958480835, - "learning_rate": 6.95316582914573e-05, - "loss": 5.5115, - "step": 30826 - }, - { - "epoch": 16.07666232073012, - "grad_norm": 1.7111420631408691, - "learning_rate": 6.953065326633166e-05, - "loss": 5.0623, - "step": 30827 - }, - { - "epoch": 16.077183833116038, - "grad_norm": 1.5638868808746338, - "learning_rate": 6.952964824120604e-05, - "loss": 5.1468, - "step": 30828 - }, - { - "epoch": 16.077705345501954, - "grad_norm": 1.3936872482299805, - "learning_rate": 6.95286432160804e-05, - "loss": 5.2815, - "step": 30829 - }, - { - "epoch": 16.078226857887874, - "grad_norm": 1.4560637474060059, - "learning_rate": 6.952763819095478e-05, - "loss": 5.6044, - "step": 30830 - }, - { - "epoch": 16.078748370273793, - "grad_norm": 1.5335006713867188, - "learning_rate": 6.952663316582915e-05, - "loss": 4.9223, - "step": 30831 - }, - { - "epoch": 16.079269882659712, - "grad_norm": 1.3708525896072388, - "learning_rate": 6.952562814070352e-05, - "loss": 5.7652, - "step": 30832 - }, - { - "epoch": 16.079791395045632, - "grad_norm": 1.451716423034668, - "learning_rate": 6.952462311557789e-05, - "loss": 5.2468, - "step": 30833 - }, - { - "epoch": 16.08031290743155, - "grad_norm": 1.4747563600540161, - "learning_rate": 6.952361809045227e-05, - "loss": 5.1203, - "step": 30834 - }, - { - "epoch": 16.08083441981747, - "grad_norm": 1.8183717727661133, - "learning_rate": 6.952261306532663e-05, - "loss": 4.413, - "step": 30835 - }, - { - "epoch": 16.08135593220339, - "grad_norm": 1.525620937347412, - "learning_rate": 6.952160804020101e-05, - "loss": 4.991, - "step": 30836 - }, - { - "epoch": 16.08187744458931, - "grad_norm": 1.4709784984588623, - "learning_rate": 6.952060301507539e-05, - "loss": 5.3778, - "step": 30837 - }, - { - "epoch": 16.08239895697523, - "grad_norm": 1.4669103622436523, - "learning_rate": 6.951959798994975e-05, - "loss": 5.048, - "step": 30838 - }, - { - "epoch": 16.08292046936115, - "grad_norm": 1.5365883111953735, - "learning_rate": 6.951859296482413e-05, - "loss": 4.8725, - "step": 30839 - }, - { - "epoch": 16.083441981747068, - "grad_norm": 1.4692875146865845, - "learning_rate": 6.951758793969849e-05, - "loss": 5.0296, - "step": 30840 - }, - { - "epoch": 16.083963494132984, - "grad_norm": 1.5271166563034058, - "learning_rate": 6.951658291457287e-05, - "loss": 5.6641, - "step": 30841 - }, - { - "epoch": 16.084485006518904, - "grad_norm": 1.489038109779358, - "learning_rate": 6.951557788944723e-05, - "loss": 5.356, - "step": 30842 - }, - { - "epoch": 16.085006518904823, - "grad_norm": 1.479081392288208, - "learning_rate": 6.951457286432161e-05, - "loss": 5.4339, - "step": 30843 - }, - { - "epoch": 16.085528031290742, - "grad_norm": 1.411893606185913, - "learning_rate": 6.951356783919598e-05, - "loss": 5.5943, - "step": 30844 - }, - { - "epoch": 16.086049543676662, - "grad_norm": 1.4794155359268188, - "learning_rate": 6.951256281407035e-05, - "loss": 5.4013, - "step": 30845 - }, - { - "epoch": 16.08657105606258, - "grad_norm": 1.391089677810669, - "learning_rate": 6.951155778894473e-05, - "loss": 5.5906, - "step": 30846 - }, - { - "epoch": 16.0870925684485, - "grad_norm": 1.456228256225586, - "learning_rate": 6.951055276381911e-05, - "loss": 5.1769, - "step": 30847 - }, - { - "epoch": 16.08761408083442, - "grad_norm": 1.529538869857788, - "learning_rate": 6.950954773869347e-05, - "loss": 5.4167, - "step": 30848 - }, - { - "epoch": 16.08813559322034, - "grad_norm": 1.5385688543319702, - "learning_rate": 6.950854271356785e-05, - "loss": 5.0542, - "step": 30849 - }, - { - "epoch": 16.08865710560626, - "grad_norm": 1.464777946472168, - "learning_rate": 6.950753768844222e-05, - "loss": 4.6664, - "step": 30850 - }, - { - "epoch": 16.08917861799218, - "grad_norm": 1.449520468711853, - "learning_rate": 6.950653266331658e-05, - "loss": 5.1737, - "step": 30851 - }, - { - "epoch": 16.089700130378098, - "grad_norm": 1.4036203622817993, - "learning_rate": 6.950552763819096e-05, - "loss": 5.5393, - "step": 30852 - }, - { - "epoch": 16.090221642764014, - "grad_norm": 1.5774061679840088, - "learning_rate": 6.950452261306532e-05, - "loss": 5.2717, - "step": 30853 - }, - { - "epoch": 16.090743155149934, - "grad_norm": 1.5585153102874756, - "learning_rate": 6.95035175879397e-05, - "loss": 5.2473, - "step": 30854 - }, - { - "epoch": 16.091264667535853, - "grad_norm": 1.5414174795150757, - "learning_rate": 6.950251256281406e-05, - "loss": 5.1009, - "step": 30855 - }, - { - "epoch": 16.091786179921773, - "grad_norm": 1.6362191438674927, - "learning_rate": 6.950150753768844e-05, - "loss": 5.2775, - "step": 30856 - }, - { - "epoch": 16.092307692307692, - "grad_norm": 1.4857923984527588, - "learning_rate": 6.950050251256282e-05, - "loss": 5.5684, - "step": 30857 - }, - { - "epoch": 16.09282920469361, - "grad_norm": 1.3971922397613525, - "learning_rate": 6.94994974874372e-05, - "loss": 5.2494, - "step": 30858 - }, - { - "epoch": 16.09335071707953, - "grad_norm": 1.5428810119628906, - "learning_rate": 6.949849246231156e-05, - "loss": 4.7019, - "step": 30859 - }, - { - "epoch": 16.09387222946545, - "grad_norm": 1.491753101348877, - "learning_rate": 6.949748743718594e-05, - "loss": 5.2434, - "step": 30860 - }, - { - "epoch": 16.09439374185137, - "grad_norm": 1.6509264707565308, - "learning_rate": 6.94964824120603e-05, - "loss": 5.3474, - "step": 30861 - }, - { - "epoch": 16.09491525423729, - "grad_norm": 1.9592207670211792, - "learning_rate": 6.949547738693468e-05, - "loss": 5.2409, - "step": 30862 - }, - { - "epoch": 16.09543676662321, - "grad_norm": 1.4251818656921387, - "learning_rate": 6.949447236180905e-05, - "loss": 5.3696, - "step": 30863 - }, - { - "epoch": 16.09595827900913, - "grad_norm": 1.4616318941116333, - "learning_rate": 6.949346733668341e-05, - "loss": 5.5675, - "step": 30864 - }, - { - "epoch": 16.096479791395044, - "grad_norm": 1.376705527305603, - "learning_rate": 6.949246231155779e-05, - "loss": 5.4529, - "step": 30865 - }, - { - "epoch": 16.097001303780964, - "grad_norm": 1.4795376062393188, - "learning_rate": 6.949145728643217e-05, - "loss": 5.3177, - "step": 30866 - }, - { - "epoch": 16.097522816166883, - "grad_norm": 1.6817528009414673, - "learning_rate": 6.949045226130654e-05, - "loss": 5.3335, - "step": 30867 - }, - { - "epoch": 16.098044328552803, - "grad_norm": 1.4022592306137085, - "learning_rate": 6.948944723618091e-05, - "loss": 5.4305, - "step": 30868 - }, - { - "epoch": 16.098565840938722, - "grad_norm": 1.4894222021102905, - "learning_rate": 6.948844221105529e-05, - "loss": 4.7963, - "step": 30869 - }, - { - "epoch": 16.09908735332464, - "grad_norm": 1.5019277334213257, - "learning_rate": 6.948743718592965e-05, - "loss": 5.6161, - "step": 30870 - }, - { - "epoch": 16.09960886571056, - "grad_norm": 1.5007206201553345, - "learning_rate": 6.948643216080403e-05, - "loss": 5.1349, - "step": 30871 - }, - { - "epoch": 16.10013037809648, - "grad_norm": 1.554226040840149, - "learning_rate": 6.948542713567839e-05, - "loss": 5.6397, - "step": 30872 - }, - { - "epoch": 16.1006518904824, - "grad_norm": 1.5866203308105469, - "learning_rate": 6.948442211055277e-05, - "loss": 5.0422, - "step": 30873 - }, - { - "epoch": 16.10117340286832, - "grad_norm": 1.5310615301132202, - "learning_rate": 6.948341708542713e-05, - "loss": 5.063, - "step": 30874 - }, - { - "epoch": 16.10169491525424, - "grad_norm": 1.42096745967865, - "learning_rate": 6.948241206030151e-05, - "loss": 5.7081, - "step": 30875 - }, - { - "epoch": 16.102216427640155, - "grad_norm": 1.3846937417984009, - "learning_rate": 6.948140703517588e-05, - "loss": 5.6365, - "step": 30876 - }, - { - "epoch": 16.102737940026074, - "grad_norm": 1.4241719245910645, - "learning_rate": 6.948040201005025e-05, - "loss": 5.6761, - "step": 30877 - }, - { - "epoch": 16.103259452411994, - "grad_norm": 1.4752211570739746, - "learning_rate": 6.947939698492463e-05, - "loss": 5.2547, - "step": 30878 - }, - { - "epoch": 16.103780964797913, - "grad_norm": 1.5101789236068726, - "learning_rate": 6.9478391959799e-05, - "loss": 5.5612, - "step": 30879 - }, - { - "epoch": 16.104302477183833, - "grad_norm": 1.4149963855743408, - "learning_rate": 6.947738693467337e-05, - "loss": 5.3995, - "step": 30880 - }, - { - "epoch": 16.104823989569752, - "grad_norm": 1.4295270442962646, - "learning_rate": 6.947638190954774e-05, - "loss": 5.5771, - "step": 30881 - }, - { - "epoch": 16.10534550195567, - "grad_norm": 1.3884633779525757, - "learning_rate": 6.947537688442212e-05, - "loss": 4.7332, - "step": 30882 - }, - { - "epoch": 16.10586701434159, - "grad_norm": 1.4202823638916016, - "learning_rate": 6.947437185929648e-05, - "loss": 5.6213, - "step": 30883 - }, - { - "epoch": 16.10638852672751, - "grad_norm": 1.4770076274871826, - "learning_rate": 6.947336683417086e-05, - "loss": 5.2598, - "step": 30884 - }, - { - "epoch": 16.10691003911343, - "grad_norm": 1.522358775138855, - "learning_rate": 6.947236180904522e-05, - "loss": 5.3174, - "step": 30885 - }, - { - "epoch": 16.10743155149935, - "grad_norm": 1.4711663722991943, - "learning_rate": 6.94713567839196e-05, - "loss": 5.3898, - "step": 30886 - }, - { - "epoch": 16.10795306388527, - "grad_norm": 1.4753514528274536, - "learning_rate": 6.947035175879398e-05, - "loss": 4.8557, - "step": 30887 - }, - { - "epoch": 16.108474576271185, - "grad_norm": 1.395708680152893, - "learning_rate": 6.946934673366836e-05, - "loss": 5.3784, - "step": 30888 - }, - { - "epoch": 16.108996088657104, - "grad_norm": 1.3904067277908325, - "learning_rate": 6.946834170854272e-05, - "loss": 5.6937, - "step": 30889 - }, - { - "epoch": 16.109517601043024, - "grad_norm": 1.3827471733093262, - "learning_rate": 6.946733668341708e-05, - "loss": 5.3526, - "step": 30890 - }, - { - "epoch": 16.110039113428943, - "grad_norm": 1.398087739944458, - "learning_rate": 6.946633165829146e-05, - "loss": 4.6958, - "step": 30891 - }, - { - "epoch": 16.110560625814863, - "grad_norm": 1.5586256980895996, - "learning_rate": 6.946532663316583e-05, - "loss": 4.9995, - "step": 30892 - }, - { - "epoch": 16.111082138200782, - "grad_norm": 1.4514992237091064, - "learning_rate": 6.94643216080402e-05, - "loss": 5.6206, - "step": 30893 - }, - { - "epoch": 16.1116036505867, - "grad_norm": 1.4911390542984009, - "learning_rate": 6.946331658291457e-05, - "loss": 5.0377, - "step": 30894 - }, - { - "epoch": 16.11212516297262, - "grad_norm": 1.3648226261138916, - "learning_rate": 6.946231155778895e-05, - "loss": 5.6885, - "step": 30895 - }, - { - "epoch": 16.11264667535854, - "grad_norm": 1.5237088203430176, - "learning_rate": 6.946130653266331e-05, - "loss": 5.0068, - "step": 30896 - }, - { - "epoch": 16.11316818774446, - "grad_norm": 1.6293805837631226, - "learning_rate": 6.946030150753769e-05, - "loss": 5.1995, - "step": 30897 - }, - { - "epoch": 16.11368970013038, - "grad_norm": 1.6354091167449951, - "learning_rate": 6.945929648241207e-05, - "loss": 4.6147, - "step": 30898 - }, - { - "epoch": 16.1142112125163, - "grad_norm": 1.5455366373062134, - "learning_rate": 6.945829145728644e-05, - "loss": 5.6476, - "step": 30899 - }, - { - "epoch": 16.114732724902215, - "grad_norm": 1.561474323272705, - "learning_rate": 6.945728643216081e-05, - "loss": 5.0705, - "step": 30900 - }, - { - "epoch": 16.115254237288134, - "grad_norm": 1.4214694499969482, - "learning_rate": 6.945628140703519e-05, - "loss": 5.3772, - "step": 30901 - }, - { - "epoch": 16.115775749674054, - "grad_norm": 1.5568349361419678, - "learning_rate": 6.945527638190955e-05, - "loss": 4.9576, - "step": 30902 - }, - { - "epoch": 16.116297262059973, - "grad_norm": 1.608067274093628, - "learning_rate": 6.945427135678392e-05, - "loss": 5.0342, - "step": 30903 - }, - { - "epoch": 16.116818774445893, - "grad_norm": 1.479416012763977, - "learning_rate": 6.945326633165829e-05, - "loss": 5.1131, - "step": 30904 - }, - { - "epoch": 16.117340286831812, - "grad_norm": 1.4494872093200684, - "learning_rate": 6.945226130653266e-05, - "loss": 5.6296, - "step": 30905 - }, - { - "epoch": 16.11786179921773, - "grad_norm": 1.4571239948272705, - "learning_rate": 6.945125628140704e-05, - "loss": 5.3533, - "step": 30906 - }, - { - "epoch": 16.11838331160365, - "grad_norm": 1.4834619760513306, - "learning_rate": 6.945025125628141e-05, - "loss": 5.6539, - "step": 30907 - }, - { - "epoch": 16.11890482398957, - "grad_norm": 1.8632839918136597, - "learning_rate": 6.944924623115579e-05, - "loss": 4.5655, - "step": 30908 - }, - { - "epoch": 16.11942633637549, - "grad_norm": 1.5673547983169556, - "learning_rate": 6.944824120603016e-05, - "loss": 5.3448, - "step": 30909 - }, - { - "epoch": 16.11994784876141, - "grad_norm": 1.4211896657943726, - "learning_rate": 6.944723618090453e-05, - "loss": 4.957, - "step": 30910 - }, - { - "epoch": 16.12046936114733, - "grad_norm": 1.5035418272018433, - "learning_rate": 6.94462311557789e-05, - "loss": 5.3665, - "step": 30911 - }, - { - "epoch": 16.120990873533245, - "grad_norm": 1.5876163244247437, - "learning_rate": 6.944522613065328e-05, - "loss": 5.4336, - "step": 30912 - }, - { - "epoch": 16.121512385919164, - "grad_norm": 1.491905927658081, - "learning_rate": 6.944422110552764e-05, - "loss": 5.5141, - "step": 30913 - }, - { - "epoch": 16.122033898305084, - "grad_norm": 1.4798401594161987, - "learning_rate": 6.944321608040202e-05, - "loss": 5.275, - "step": 30914 - }, - { - "epoch": 16.122555410691003, - "grad_norm": 1.5043474435806274, - "learning_rate": 6.944221105527638e-05, - "loss": 5.0673, - "step": 30915 - }, - { - "epoch": 16.123076923076923, - "grad_norm": 1.4829188585281372, - "learning_rate": 6.944120603015075e-05, - "loss": 5.589, - "step": 30916 - }, - { - "epoch": 16.123598435462842, - "grad_norm": 1.6205490827560425, - "learning_rate": 6.944020100502512e-05, - "loss": 5.4894, - "step": 30917 - }, - { - "epoch": 16.12411994784876, - "grad_norm": 1.5188641548156738, - "learning_rate": 6.94391959798995e-05, - "loss": 5.2745, - "step": 30918 - }, - { - "epoch": 16.12464146023468, - "grad_norm": 1.5637530088424683, - "learning_rate": 6.943819095477388e-05, - "loss": 5.6209, - "step": 30919 - }, - { - "epoch": 16.1251629726206, - "grad_norm": 1.4995582103729248, - "learning_rate": 6.943718592964824e-05, - "loss": 5.5466, - "step": 30920 - }, - { - "epoch": 16.12568448500652, - "grad_norm": 1.3802902698516846, - "learning_rate": 6.943618090452262e-05, - "loss": 5.61, - "step": 30921 - }, - { - "epoch": 16.12620599739244, - "grad_norm": 1.5374958515167236, - "learning_rate": 6.943517587939699e-05, - "loss": 4.9995, - "step": 30922 - }, - { - "epoch": 16.12672750977836, - "grad_norm": 1.446828007698059, - "learning_rate": 6.943417085427136e-05, - "loss": 5.5109, - "step": 30923 - }, - { - "epoch": 16.127249022164275, - "grad_norm": 1.517698049545288, - "learning_rate": 6.943316582914573e-05, - "loss": 5.2388, - "step": 30924 - }, - { - "epoch": 16.127770534550194, - "grad_norm": 1.506032109260559, - "learning_rate": 6.94321608040201e-05, - "loss": 5.1549, - "step": 30925 - }, - { - "epoch": 16.128292046936114, - "grad_norm": 1.4762961864471436, - "learning_rate": 6.943115577889447e-05, - "loss": 5.8069, - "step": 30926 - }, - { - "epoch": 16.128813559322033, - "grad_norm": 1.5680407285690308, - "learning_rate": 6.943015075376885e-05, - "loss": 5.2227, - "step": 30927 - }, - { - "epoch": 16.129335071707953, - "grad_norm": 1.452688217163086, - "learning_rate": 6.942914572864323e-05, - "loss": 5.1458, - "step": 30928 - }, - { - "epoch": 16.129856584093872, - "grad_norm": 1.4103760719299316, - "learning_rate": 6.94281407035176e-05, - "loss": 5.4656, - "step": 30929 - }, - { - "epoch": 16.13037809647979, - "grad_norm": 1.4444475173950195, - "learning_rate": 6.942713567839197e-05, - "loss": 5.7472, - "step": 30930 - }, - { - "epoch": 16.13089960886571, - "grad_norm": 1.454750657081604, - "learning_rate": 6.942613065326633e-05, - "loss": 5.2178, - "step": 30931 - }, - { - "epoch": 16.13142112125163, - "grad_norm": 1.6125377416610718, - "learning_rate": 6.942512562814071e-05, - "loss": 5.059, - "step": 30932 - }, - { - "epoch": 16.13194263363755, - "grad_norm": 1.6152496337890625, - "learning_rate": 6.942412060301507e-05, - "loss": 5.2947, - "step": 30933 - }, - { - "epoch": 16.13246414602347, - "grad_norm": 1.4299639463424683, - "learning_rate": 6.942311557788945e-05, - "loss": 5.6048, - "step": 30934 - }, - { - "epoch": 16.13298565840939, - "grad_norm": 1.64900803565979, - "learning_rate": 6.942211055276382e-05, - "loss": 5.2609, - "step": 30935 - }, - { - "epoch": 16.133507170795305, - "grad_norm": 1.4914608001708984, - "learning_rate": 6.94211055276382e-05, - "loss": 5.6914, - "step": 30936 - }, - { - "epoch": 16.134028683181224, - "grad_norm": 1.452596664428711, - "learning_rate": 6.942010050251256e-05, - "loss": 5.5591, - "step": 30937 - }, - { - "epoch": 16.134550195567144, - "grad_norm": 1.421158790588379, - "learning_rate": 6.941909547738694e-05, - "loss": 5.015, - "step": 30938 - }, - { - "epoch": 16.135071707953063, - "grad_norm": 1.443095088005066, - "learning_rate": 6.941809045226131e-05, - "loss": 5.7045, - "step": 30939 - }, - { - "epoch": 16.135593220338983, - "grad_norm": 1.5500558614730835, - "learning_rate": 6.941708542713569e-05, - "loss": 5.115, - "step": 30940 - }, - { - "epoch": 16.136114732724902, - "grad_norm": 1.448595643043518, - "learning_rate": 6.941608040201006e-05, - "loss": 5.5953, - "step": 30941 - }, - { - "epoch": 16.13663624511082, - "grad_norm": 1.432407021522522, - "learning_rate": 6.941507537688443e-05, - "loss": 5.5767, - "step": 30942 - }, - { - "epoch": 16.13715775749674, - "grad_norm": 1.5897576808929443, - "learning_rate": 6.94140703517588e-05, - "loss": 5.4488, - "step": 30943 - }, - { - "epoch": 16.13767926988266, - "grad_norm": 1.4501134157180786, - "learning_rate": 6.941306532663316e-05, - "loss": 5.1651, - "step": 30944 - }, - { - "epoch": 16.13820078226858, - "grad_norm": 1.7125779390335083, - "learning_rate": 6.941206030150754e-05, - "loss": 5.2522, - "step": 30945 - }, - { - "epoch": 16.1387222946545, - "grad_norm": 1.4800430536270142, - "learning_rate": 6.94110552763819e-05, - "loss": 5.4693, - "step": 30946 - }, - { - "epoch": 16.13924380704042, - "grad_norm": 1.4681727886199951, - "learning_rate": 6.941005025125628e-05, - "loss": 5.2779, - "step": 30947 - }, - { - "epoch": 16.139765319426335, - "grad_norm": 1.5413490533828735, - "learning_rate": 6.940904522613066e-05, - "loss": 5.1265, - "step": 30948 - }, - { - "epoch": 16.140286831812254, - "grad_norm": 1.4120099544525146, - "learning_rate": 6.940804020100504e-05, - "loss": 5.5996, - "step": 30949 - }, - { - "epoch": 16.140808344198174, - "grad_norm": 1.40192449092865, - "learning_rate": 6.94070351758794e-05, - "loss": 5.6911, - "step": 30950 - }, - { - "epoch": 16.141329856584093, - "grad_norm": 1.3953180313110352, - "learning_rate": 6.940603015075378e-05, - "loss": 4.9823, - "step": 30951 - }, - { - "epoch": 16.141851368970013, - "grad_norm": 1.4177546501159668, - "learning_rate": 6.940502512562814e-05, - "loss": 5.0636, - "step": 30952 - }, - { - "epoch": 16.142372881355932, - "grad_norm": 1.5062530040740967, - "learning_rate": 6.940402010050252e-05, - "loss": 5.4872, - "step": 30953 - }, - { - "epoch": 16.14289439374185, - "grad_norm": 1.530922293663025, - "learning_rate": 6.940301507537689e-05, - "loss": 5.2536, - "step": 30954 - }, - { - "epoch": 16.14341590612777, - "grad_norm": 1.6139620542526245, - "learning_rate": 6.940201005025126e-05, - "loss": 5.3911, - "step": 30955 - }, - { - "epoch": 16.14393741851369, - "grad_norm": 1.5162869691848755, - "learning_rate": 6.940100502512563e-05, - "loss": 5.3082, - "step": 30956 - }, - { - "epoch": 16.14445893089961, - "grad_norm": 1.5649179220199585, - "learning_rate": 6.939999999999999e-05, - "loss": 5.1973, - "step": 30957 - }, - { - "epoch": 16.14498044328553, - "grad_norm": 1.6975173950195312, - "learning_rate": 6.939899497487437e-05, - "loss": 4.9799, - "step": 30958 - }, - { - "epoch": 16.14550195567145, - "grad_norm": 1.4940794706344604, - "learning_rate": 6.939798994974875e-05, - "loss": 5.6167, - "step": 30959 - }, - { - "epoch": 16.146023468057365, - "grad_norm": 1.5439335107803345, - "learning_rate": 6.939698492462313e-05, - "loss": 5.3682, - "step": 30960 - }, - { - "epoch": 16.146544980443284, - "grad_norm": 1.441157341003418, - "learning_rate": 6.939597989949749e-05, - "loss": 5.4158, - "step": 30961 - }, - { - "epoch": 16.147066492829204, - "grad_norm": 1.4640307426452637, - "learning_rate": 6.939497487437187e-05, - "loss": 5.3336, - "step": 30962 - }, - { - "epoch": 16.147588005215123, - "grad_norm": 1.5664806365966797, - "learning_rate": 6.939396984924623e-05, - "loss": 5.0662, - "step": 30963 - }, - { - "epoch": 16.148109517601043, - "grad_norm": 1.5051360130310059, - "learning_rate": 6.939296482412061e-05, - "loss": 4.8969, - "step": 30964 - }, - { - "epoch": 16.148631029986962, - "grad_norm": 1.6253182888031006, - "learning_rate": 6.939195979899497e-05, - "loss": 5.1532, - "step": 30965 - }, - { - "epoch": 16.14915254237288, - "grad_norm": 1.4935022592544556, - "learning_rate": 6.939095477386935e-05, - "loss": 5.5081, - "step": 30966 - }, - { - "epoch": 16.1496740547588, - "grad_norm": 1.574904203414917, - "learning_rate": 6.938994974874372e-05, - "loss": 5.1349, - "step": 30967 - }, - { - "epoch": 16.15019556714472, - "grad_norm": 1.483721137046814, - "learning_rate": 6.93889447236181e-05, - "loss": 5.2587, - "step": 30968 - }, - { - "epoch": 16.15071707953064, - "grad_norm": 1.5140016078948975, - "learning_rate": 6.938793969849246e-05, - "loss": 5.2813, - "step": 30969 - }, - { - "epoch": 16.15123859191656, - "grad_norm": 1.5099924802780151, - "learning_rate": 6.938693467336684e-05, - "loss": 5.4512, - "step": 30970 - }, - { - "epoch": 16.151760104302475, - "grad_norm": 1.5085556507110596, - "learning_rate": 6.938592964824121e-05, - "loss": 5.5783, - "step": 30971 - }, - { - "epoch": 16.152281616688395, - "grad_norm": 1.4936567544937134, - "learning_rate": 6.938492462311558e-05, - "loss": 5.2276, - "step": 30972 - }, - { - "epoch": 16.152803129074314, - "grad_norm": 1.3854644298553467, - "learning_rate": 6.938391959798996e-05, - "loss": 5.5905, - "step": 30973 - }, - { - "epoch": 16.153324641460234, - "grad_norm": 1.505040168762207, - "learning_rate": 6.938291457286432e-05, - "loss": 5.3087, - "step": 30974 - }, - { - "epoch": 16.153846153846153, - "grad_norm": 1.5396931171417236, - "learning_rate": 6.93819095477387e-05, - "loss": 5.4977, - "step": 30975 - }, - { - "epoch": 16.154367666232073, - "grad_norm": 1.480103611946106, - "learning_rate": 6.938090452261306e-05, - "loss": 5.3681, - "step": 30976 - }, - { - "epoch": 16.154889178617992, - "grad_norm": 1.6618238687515259, - "learning_rate": 6.937989949748744e-05, - "loss": 5.2108, - "step": 30977 - }, - { - "epoch": 16.15541069100391, - "grad_norm": 1.3472121953964233, - "learning_rate": 6.93788944723618e-05, - "loss": 5.5637, - "step": 30978 - }, - { - "epoch": 16.15593220338983, - "grad_norm": 1.523094892501831, - "learning_rate": 6.937788944723618e-05, - "loss": 5.7395, - "step": 30979 - }, - { - "epoch": 16.15645371577575, - "grad_norm": 1.5398280620574951, - "learning_rate": 6.937688442211056e-05, - "loss": 5.3711, - "step": 30980 - }, - { - "epoch": 16.15697522816167, - "grad_norm": 1.453578233718872, - "learning_rate": 6.937587939698494e-05, - "loss": 5.6975, - "step": 30981 - }, - { - "epoch": 16.15749674054759, - "grad_norm": 1.5759199857711792, - "learning_rate": 6.93748743718593e-05, - "loss": 5.0156, - "step": 30982 - }, - { - "epoch": 16.158018252933505, - "grad_norm": 1.4901840686798096, - "learning_rate": 6.937386934673367e-05, - "loss": 5.0428, - "step": 30983 - }, - { - "epoch": 16.158539765319425, - "grad_norm": 1.4848973751068115, - "learning_rate": 6.937286432160805e-05, - "loss": 5.3937, - "step": 30984 - }, - { - "epoch": 16.159061277705344, - "grad_norm": 1.5060938596725464, - "learning_rate": 6.937185929648241e-05, - "loss": 5.5763, - "step": 30985 - }, - { - "epoch": 16.159582790091264, - "grad_norm": 1.4460431337356567, - "learning_rate": 6.937085427135679e-05, - "loss": 5.5586, - "step": 30986 - }, - { - "epoch": 16.160104302477183, - "grad_norm": 1.4057544469833374, - "learning_rate": 6.936984924623115e-05, - "loss": 3.734, - "step": 30987 - }, - { - "epoch": 16.160625814863103, - "grad_norm": 1.4940873384475708, - "learning_rate": 6.936884422110553e-05, - "loss": 5.6839, - "step": 30988 - }, - { - "epoch": 16.161147327249022, - "grad_norm": 1.5541502237319946, - "learning_rate": 6.93678391959799e-05, - "loss": 4.8537, - "step": 30989 - }, - { - "epoch": 16.16166883963494, - "grad_norm": 1.4994029998779297, - "learning_rate": 6.936683417085427e-05, - "loss": 5.2429, - "step": 30990 - }, - { - "epoch": 16.16219035202086, - "grad_norm": 1.3941715955734253, - "learning_rate": 6.936582914572865e-05, - "loss": 5.7115, - "step": 30991 - }, - { - "epoch": 16.16271186440678, - "grad_norm": 1.399877667427063, - "learning_rate": 6.936482412060303e-05, - "loss": 5.4179, - "step": 30992 - }, - { - "epoch": 16.1632333767927, - "grad_norm": 1.536030650138855, - "learning_rate": 6.936381909547739e-05, - "loss": 5.1837, - "step": 30993 - }, - { - "epoch": 16.16375488917862, - "grad_norm": 1.6162053346633911, - "learning_rate": 6.936281407035177e-05, - "loss": 5.3524, - "step": 30994 - }, - { - "epoch": 16.164276401564535, - "grad_norm": 1.6573278903961182, - "learning_rate": 6.936180904522613e-05, - "loss": 4.9049, - "step": 30995 - }, - { - "epoch": 16.164797913950455, - "grad_norm": 1.5498918294906616, - "learning_rate": 6.93608040201005e-05, - "loss": 5.3106, - "step": 30996 - }, - { - "epoch": 16.165319426336374, - "grad_norm": 1.3725453615188599, - "learning_rate": 6.935979899497488e-05, - "loss": 5.5318, - "step": 30997 - }, - { - "epoch": 16.165840938722294, - "grad_norm": 1.5398285388946533, - "learning_rate": 6.935879396984924e-05, - "loss": 5.0013, - "step": 30998 - }, - { - "epoch": 16.166362451108213, - "grad_norm": 1.4664270877838135, - "learning_rate": 6.935778894472362e-05, - "loss": 5.2751, - "step": 30999 - }, - { - "epoch": 16.166883963494133, - "grad_norm": 1.367972731590271, - "learning_rate": 6.9356783919598e-05, - "loss": 5.5325, - "step": 31000 - }, - { - "epoch": 16.167405475880052, - "grad_norm": 1.4945025444030762, - "learning_rate": 6.935577889447237e-05, - "loss": 5.0126, - "step": 31001 - }, - { - "epoch": 16.16792698826597, - "grad_norm": 1.4393197298049927, - "learning_rate": 6.935477386934674e-05, - "loss": 4.7882, - "step": 31002 - }, - { - "epoch": 16.16844850065189, - "grad_norm": 1.5353190898895264, - "learning_rate": 6.935376884422112e-05, - "loss": 5.1167, - "step": 31003 - }, - { - "epoch": 16.16897001303781, - "grad_norm": 1.4792746305465698, - "learning_rate": 6.935276381909548e-05, - "loss": 5.3125, - "step": 31004 - }, - { - "epoch": 16.16949152542373, - "grad_norm": 1.433932900428772, - "learning_rate": 6.935175879396986e-05, - "loss": 5.4604, - "step": 31005 - }, - { - "epoch": 16.17001303780965, - "grad_norm": 1.4247996807098389, - "learning_rate": 6.935075376884422e-05, - "loss": 5.5028, - "step": 31006 - }, - { - "epoch": 16.170534550195566, - "grad_norm": 1.4772855043411255, - "learning_rate": 6.93497487437186e-05, - "loss": 4.9803, - "step": 31007 - }, - { - "epoch": 16.171056062581485, - "grad_norm": 1.540046215057373, - "learning_rate": 6.934874371859296e-05, - "loss": 5.3829, - "step": 31008 - }, - { - "epoch": 16.171577574967404, - "grad_norm": 1.4577100276947021, - "learning_rate": 6.934773869346733e-05, - "loss": 5.2757, - "step": 31009 - }, - { - "epoch": 16.172099087353324, - "grad_norm": 1.4610847234725952, - "learning_rate": 6.93467336683417e-05, - "loss": 5.3101, - "step": 31010 - }, - { - "epoch": 16.172620599739243, - "grad_norm": 1.5822687149047852, - "learning_rate": 6.934572864321608e-05, - "loss": 5.3218, - "step": 31011 - }, - { - "epoch": 16.173142112125163, - "grad_norm": 1.471044659614563, - "learning_rate": 6.934472361809046e-05, - "loss": 5.3674, - "step": 31012 - }, - { - "epoch": 16.173663624511082, - "grad_norm": 1.5042380094528198, - "learning_rate": 6.934371859296483e-05, - "loss": 5.4827, - "step": 31013 - }, - { - "epoch": 16.174185136897, - "grad_norm": 1.4603718519210815, - "learning_rate": 6.93427135678392e-05, - "loss": 5.6867, - "step": 31014 - }, - { - "epoch": 16.17470664928292, - "grad_norm": 1.460034966468811, - "learning_rate": 6.934170854271357e-05, - "loss": 5.2185, - "step": 31015 - }, - { - "epoch": 16.17522816166884, - "grad_norm": 1.402159333229065, - "learning_rate": 6.934070351758795e-05, - "loss": 5.4275, - "step": 31016 - }, - { - "epoch": 16.17574967405476, - "grad_norm": 1.4719126224517822, - "learning_rate": 6.933969849246231e-05, - "loss": 5.3275, - "step": 31017 - }, - { - "epoch": 16.17627118644068, - "grad_norm": 1.4382659196853638, - "learning_rate": 6.933869346733669e-05, - "loss": 5.3734, - "step": 31018 - }, - { - "epoch": 16.176792698826596, - "grad_norm": 1.4584487676620483, - "learning_rate": 6.933768844221105e-05, - "loss": 5.6709, - "step": 31019 - }, - { - "epoch": 16.177314211212515, - "grad_norm": 1.5017905235290527, - "learning_rate": 6.933668341708543e-05, - "loss": 5.1959, - "step": 31020 - }, - { - "epoch": 16.177835723598434, - "grad_norm": 1.4500471353530884, - "learning_rate": 6.933567839195981e-05, - "loss": 5.3786, - "step": 31021 - }, - { - "epoch": 16.178357235984354, - "grad_norm": 1.5153350830078125, - "learning_rate": 6.933467336683419e-05, - "loss": 5.4702, - "step": 31022 - }, - { - "epoch": 16.178878748370273, - "grad_norm": 1.4129160642623901, - "learning_rate": 6.933366834170855e-05, - "loss": 4.8131, - "step": 31023 - }, - { - "epoch": 16.179400260756193, - "grad_norm": 1.5010240077972412, - "learning_rate": 6.933266331658291e-05, - "loss": 5.4957, - "step": 31024 - }, - { - "epoch": 16.179921773142112, - "grad_norm": 1.4935449361801147, - "learning_rate": 6.933165829145729e-05, - "loss": 4.6558, - "step": 31025 - }, - { - "epoch": 16.180443285528032, - "grad_norm": 1.496087908744812, - "learning_rate": 6.933065326633166e-05, - "loss": 5.576, - "step": 31026 - }, - { - "epoch": 16.18096479791395, - "grad_norm": 1.5449767112731934, - "learning_rate": 6.932964824120603e-05, - "loss": 5.3195, - "step": 31027 - }, - { - "epoch": 16.18148631029987, - "grad_norm": 1.4260962009429932, - "learning_rate": 6.93286432160804e-05, - "loss": 5.4215, - "step": 31028 - }, - { - "epoch": 16.18200782268579, - "grad_norm": 1.4913897514343262, - "learning_rate": 6.932763819095478e-05, - "loss": 5.3332, - "step": 31029 - }, - { - "epoch": 16.18252933507171, - "grad_norm": 1.4770723581314087, - "learning_rate": 6.932663316582914e-05, - "loss": 5.4414, - "step": 31030 - }, - { - "epoch": 16.183050847457626, - "grad_norm": 1.488770604133606, - "learning_rate": 6.932562814070352e-05, - "loss": 5.3183, - "step": 31031 - }, - { - "epoch": 16.183572359843545, - "grad_norm": 1.514716625213623, - "learning_rate": 6.93246231155779e-05, - "loss": 5.1597, - "step": 31032 - }, - { - "epoch": 16.184093872229464, - "grad_norm": 1.4983950853347778, - "learning_rate": 6.932361809045227e-05, - "loss": 5.7753, - "step": 31033 - }, - { - "epoch": 16.184615384615384, - "grad_norm": 1.462860345840454, - "learning_rate": 6.932261306532664e-05, - "loss": 5.6212, - "step": 31034 - }, - { - "epoch": 16.185136897001303, - "grad_norm": 1.3769932985305786, - "learning_rate": 6.932160804020102e-05, - "loss": 5.8616, - "step": 31035 - }, - { - "epoch": 16.185658409387223, - "grad_norm": 1.4208359718322754, - "learning_rate": 6.932060301507538e-05, - "loss": 5.3965, - "step": 31036 - }, - { - "epoch": 16.186179921773142, - "grad_norm": 1.3949577808380127, - "learning_rate": 6.931959798994974e-05, - "loss": 5.3119, - "step": 31037 - }, - { - "epoch": 16.186701434159062, - "grad_norm": 1.4958423376083374, - "learning_rate": 6.931859296482412e-05, - "loss": 5.2557, - "step": 31038 - }, - { - "epoch": 16.18722294654498, - "grad_norm": 1.4033560752868652, - "learning_rate": 6.931758793969849e-05, - "loss": 5.3622, - "step": 31039 - }, - { - "epoch": 16.1877444589309, - "grad_norm": 2.0020644664764404, - "learning_rate": 6.931658291457286e-05, - "loss": 5.1232, - "step": 31040 - }, - { - "epoch": 16.18826597131682, - "grad_norm": 1.3849378824234009, - "learning_rate": 6.931557788944724e-05, - "loss": 5.2298, - "step": 31041 - }, - { - "epoch": 16.18878748370274, - "grad_norm": 1.433516025543213, - "learning_rate": 6.931457286432162e-05, - "loss": 5.7402, - "step": 31042 - }, - { - "epoch": 16.189308996088656, - "grad_norm": 1.4487532377243042, - "learning_rate": 6.931356783919598e-05, - "loss": 5.4034, - "step": 31043 - }, - { - "epoch": 16.189830508474575, - "grad_norm": 1.5431175231933594, - "learning_rate": 6.931256281407036e-05, - "loss": 4.9047, - "step": 31044 - }, - { - "epoch": 16.190352020860495, - "grad_norm": 1.491159200668335, - "learning_rate": 6.931155778894473e-05, - "loss": 5.4132, - "step": 31045 - }, - { - "epoch": 16.190873533246414, - "grad_norm": 1.4863322973251343, - "learning_rate": 6.93105527638191e-05, - "loss": 5.0194, - "step": 31046 - }, - { - "epoch": 16.191395045632333, - "grad_norm": 1.4713115692138672, - "learning_rate": 6.930954773869347e-05, - "loss": 5.3142, - "step": 31047 - }, - { - "epoch": 16.191916558018253, - "grad_norm": 1.509300947189331, - "learning_rate": 6.930854271356785e-05, - "loss": 5.1429, - "step": 31048 - }, - { - "epoch": 16.192438070404172, - "grad_norm": 1.3954026699066162, - "learning_rate": 6.930753768844221e-05, - "loss": 5.4507, - "step": 31049 - }, - { - "epoch": 16.192959582790092, - "grad_norm": 1.4906244277954102, - "learning_rate": 6.930653266331658e-05, - "loss": 5.5347, - "step": 31050 - }, - { - "epoch": 16.19348109517601, - "grad_norm": 1.4364922046661377, - "learning_rate": 6.930552763819095e-05, - "loss": 5.4764, - "step": 31051 - }, - { - "epoch": 16.19400260756193, - "grad_norm": 1.473679542541504, - "learning_rate": 6.930452261306533e-05, - "loss": 4.6905, - "step": 31052 - }, - { - "epoch": 16.19452411994785, - "grad_norm": 1.4882627725601196, - "learning_rate": 6.930351758793971e-05, - "loss": 5.1453, - "step": 31053 - }, - { - "epoch": 16.195045632333766, - "grad_norm": 1.4740679264068604, - "learning_rate": 6.930251256281407e-05, - "loss": 5.5045, - "step": 31054 - }, - { - "epoch": 16.195567144719686, - "grad_norm": 1.5473634004592896, - "learning_rate": 6.930150753768845e-05, - "loss": 5.2574, - "step": 31055 - }, - { - "epoch": 16.196088657105605, - "grad_norm": 1.6102575063705444, - "learning_rate": 6.930050251256282e-05, - "loss": 5.1624, - "step": 31056 - }, - { - "epoch": 16.196610169491525, - "grad_norm": 1.9668787717819214, - "learning_rate": 6.929949748743719e-05, - "loss": 4.8126, - "step": 31057 - }, - { - "epoch": 16.197131681877444, - "grad_norm": 1.5714538097381592, - "learning_rate": 6.929849246231156e-05, - "loss": 5.4216, - "step": 31058 - }, - { - "epoch": 16.197653194263363, - "grad_norm": 1.511050820350647, - "learning_rate": 6.929748743718593e-05, - "loss": 5.441, - "step": 31059 - }, - { - "epoch": 16.198174706649283, - "grad_norm": 1.48995840549469, - "learning_rate": 6.92964824120603e-05, - "loss": 5.227, - "step": 31060 - }, - { - "epoch": 16.198696219035202, - "grad_norm": 1.4933931827545166, - "learning_rate": 6.929547738693468e-05, - "loss": 5.6681, - "step": 31061 - }, - { - "epoch": 16.199217731421122, - "grad_norm": 1.4405485391616821, - "learning_rate": 6.929447236180905e-05, - "loss": 5.2446, - "step": 31062 - }, - { - "epoch": 16.19973924380704, - "grad_norm": 1.641489028930664, - "learning_rate": 6.929346733668342e-05, - "loss": 4.6417, - "step": 31063 - }, - { - "epoch": 16.20026075619296, - "grad_norm": 1.6750874519348145, - "learning_rate": 6.92924623115578e-05, - "loss": 5.2454, - "step": 31064 - }, - { - "epoch": 16.20078226857888, - "grad_norm": 1.3739182949066162, - "learning_rate": 6.929145728643216e-05, - "loss": 4.9979, - "step": 31065 - }, - { - "epoch": 16.201303780964796, - "grad_norm": 1.5971739292144775, - "learning_rate": 6.929045226130654e-05, - "loss": 5.3022, - "step": 31066 - }, - { - "epoch": 16.201825293350716, - "grad_norm": 1.446584939956665, - "learning_rate": 6.92894472361809e-05, - "loss": 5.4874, - "step": 31067 - }, - { - "epoch": 16.202346805736635, - "grad_norm": 1.5779374837875366, - "learning_rate": 6.928844221105528e-05, - "loss": 5.0957, - "step": 31068 - }, - { - "epoch": 16.202868318122555, - "grad_norm": 1.5435078144073486, - "learning_rate": 6.928743718592965e-05, - "loss": 5.0175, - "step": 31069 - }, - { - "epoch": 16.203389830508474, - "grad_norm": 1.612547516822815, - "learning_rate": 6.928643216080402e-05, - "loss": 4.5034, - "step": 31070 - }, - { - "epoch": 16.203911342894393, - "grad_norm": 1.4271867275238037, - "learning_rate": 6.928542713567839e-05, - "loss": 4.8651, - "step": 31071 - }, - { - "epoch": 16.204432855280313, - "grad_norm": 1.4698894023895264, - "learning_rate": 6.928442211055277e-05, - "loss": 5.0419, - "step": 31072 - }, - { - "epoch": 16.204954367666232, - "grad_norm": 1.4936611652374268, - "learning_rate": 6.928341708542714e-05, - "loss": 5.3123, - "step": 31073 - }, - { - "epoch": 16.205475880052152, - "grad_norm": 1.6179996728897095, - "learning_rate": 6.928241206030152e-05, - "loss": 4.8703, - "step": 31074 - }, - { - "epoch": 16.20599739243807, - "grad_norm": 1.5059226751327515, - "learning_rate": 6.928140703517589e-05, - "loss": 5.3944, - "step": 31075 - }, - { - "epoch": 16.20651890482399, - "grad_norm": 1.4225107431411743, - "learning_rate": 6.928040201005025e-05, - "loss": 5.5073, - "step": 31076 - }, - { - "epoch": 16.20704041720991, - "grad_norm": 1.5877107381820679, - "learning_rate": 6.927939698492463e-05, - "loss": 5.0435, - "step": 31077 - }, - { - "epoch": 16.207561929595826, - "grad_norm": 1.5257872343063354, - "learning_rate": 6.927839195979899e-05, - "loss": 5.5803, - "step": 31078 - }, - { - "epoch": 16.208083441981746, - "grad_norm": 1.42218816280365, - "learning_rate": 6.927738693467337e-05, - "loss": 5.3323, - "step": 31079 - }, - { - "epoch": 16.208604954367665, - "grad_norm": 1.4694528579711914, - "learning_rate": 6.927638190954773e-05, - "loss": 5.6669, - "step": 31080 - }, - { - "epoch": 16.209126466753585, - "grad_norm": 1.4443883895874023, - "learning_rate": 6.927537688442211e-05, - "loss": 5.1932, - "step": 31081 - }, - { - "epoch": 16.209647979139504, - "grad_norm": 1.548561692237854, - "learning_rate": 6.927437185929649e-05, - "loss": 5.4565, - "step": 31082 - }, - { - "epoch": 16.210169491525424, - "grad_norm": 1.537735104560852, - "learning_rate": 6.927336683417087e-05, - "loss": 5.1837, - "step": 31083 - }, - { - "epoch": 16.210691003911343, - "grad_norm": 1.4334149360656738, - "learning_rate": 6.927236180904523e-05, - "loss": 5.7532, - "step": 31084 - }, - { - "epoch": 16.211212516297262, - "grad_norm": 1.4960253238677979, - "learning_rate": 6.927135678391961e-05, - "loss": 4.8881, - "step": 31085 - }, - { - "epoch": 16.211734028683182, - "grad_norm": 1.4487791061401367, - "learning_rate": 6.927035175879397e-05, - "loss": 4.8722, - "step": 31086 - }, - { - "epoch": 16.2122555410691, - "grad_norm": 1.480347752571106, - "learning_rate": 6.926934673366835e-05, - "loss": 5.6693, - "step": 31087 - }, - { - "epoch": 16.21277705345502, - "grad_norm": 1.6001046895980835, - "learning_rate": 6.926834170854272e-05, - "loss": 5.1815, - "step": 31088 - }, - { - "epoch": 16.21329856584094, - "grad_norm": 1.4910993576049805, - "learning_rate": 6.926733668341708e-05, - "loss": 5.2773, - "step": 31089 - }, - { - "epoch": 16.213820078226856, - "grad_norm": 1.5827549695968628, - "learning_rate": 6.926633165829146e-05, - "loss": 5.1379, - "step": 31090 - }, - { - "epoch": 16.214341590612776, - "grad_norm": 1.4520812034606934, - "learning_rate": 6.926532663316582e-05, - "loss": 5.7729, - "step": 31091 - }, - { - "epoch": 16.214863102998695, - "grad_norm": 1.3357058763504028, - "learning_rate": 6.92643216080402e-05, - "loss": 5.7751, - "step": 31092 - }, - { - "epoch": 16.215384615384615, - "grad_norm": 1.538414716720581, - "learning_rate": 6.926331658291458e-05, - "loss": 5.0563, - "step": 31093 - }, - { - "epoch": 16.215906127770534, - "grad_norm": 1.4891408681869507, - "learning_rate": 6.926231155778896e-05, - "loss": 5.1795, - "step": 31094 - }, - { - "epoch": 16.216427640156454, - "grad_norm": 1.5099854469299316, - "learning_rate": 6.926130653266332e-05, - "loss": 5.4529, - "step": 31095 - }, - { - "epoch": 16.216949152542373, - "grad_norm": 1.4191516637802124, - "learning_rate": 6.92603015075377e-05, - "loss": 5.4716, - "step": 31096 - }, - { - "epoch": 16.217470664928292, - "grad_norm": 1.5285416841506958, - "learning_rate": 6.925929648241206e-05, - "loss": 4.7333, - "step": 31097 - }, - { - "epoch": 16.217992177314212, - "grad_norm": 1.4937270879745483, - "learning_rate": 6.925829145728644e-05, - "loss": 5.3786, - "step": 31098 - }, - { - "epoch": 16.21851368970013, - "grad_norm": 1.4908466339111328, - "learning_rate": 6.92572864321608e-05, - "loss": 5.1821, - "step": 31099 - }, - { - "epoch": 16.21903520208605, - "grad_norm": 1.620050311088562, - "learning_rate": 6.925628140703518e-05, - "loss": 5.4333, - "step": 31100 - }, - { - "epoch": 16.21955671447197, - "grad_norm": 1.4987469911575317, - "learning_rate": 6.925527638190955e-05, - "loss": 5.6617, - "step": 31101 - }, - { - "epoch": 16.220078226857886, - "grad_norm": 1.502287745475769, - "learning_rate": 6.925427135678392e-05, - "loss": 5.3741, - "step": 31102 - }, - { - "epoch": 16.220599739243806, - "grad_norm": 1.5648324489593506, - "learning_rate": 6.92532663316583e-05, - "loss": 5.4678, - "step": 31103 - }, - { - "epoch": 16.221121251629725, - "grad_norm": 1.4424359798431396, - "learning_rate": 6.925226130653267e-05, - "loss": 5.4778, - "step": 31104 - }, - { - "epoch": 16.221642764015645, - "grad_norm": 1.491478681564331, - "learning_rate": 6.925125628140704e-05, - "loss": 5.4773, - "step": 31105 - }, - { - "epoch": 16.222164276401564, - "grad_norm": 1.5185734033584595, - "learning_rate": 6.925025125628141e-05, - "loss": 5.2158, - "step": 31106 - }, - { - "epoch": 16.222685788787484, - "grad_norm": 1.5501961708068848, - "learning_rate": 6.924924623115579e-05, - "loss": 5.4024, - "step": 31107 - }, - { - "epoch": 16.223207301173403, - "grad_norm": 1.419853687286377, - "learning_rate": 6.924824120603015e-05, - "loss": 5.4851, - "step": 31108 - }, - { - "epoch": 16.223728813559323, - "grad_norm": 1.6301162242889404, - "learning_rate": 6.924723618090453e-05, - "loss": 5.3487, - "step": 31109 - }, - { - "epoch": 16.224250325945242, - "grad_norm": 1.5110739469528198, - "learning_rate": 6.924623115577889e-05, - "loss": 5.2956, - "step": 31110 - }, - { - "epoch": 16.22477183833116, - "grad_norm": 1.4063645601272583, - "learning_rate": 6.924522613065327e-05, - "loss": 5.5912, - "step": 31111 - }, - { - "epoch": 16.22529335071708, - "grad_norm": 1.5101561546325684, - "learning_rate": 6.924422110552763e-05, - "loss": 5.6429, - "step": 31112 - }, - { - "epoch": 16.225814863103, - "grad_norm": 1.5045820474624634, - "learning_rate": 6.924321608040201e-05, - "loss": 5.2703, - "step": 31113 - }, - { - "epoch": 16.226336375488916, - "grad_norm": 1.6005206108093262, - "learning_rate": 6.924221105527639e-05, - "loss": 5.023, - "step": 31114 - }, - { - "epoch": 16.226857887874836, - "grad_norm": 1.6010268926620483, - "learning_rate": 6.924120603015077e-05, - "loss": 5.0502, - "step": 31115 - }, - { - "epoch": 16.227379400260755, - "grad_norm": 1.5830497741699219, - "learning_rate": 6.924020100502513e-05, - "loss": 5.4597, - "step": 31116 - }, - { - "epoch": 16.227900912646675, - "grad_norm": 1.533555030822754, - "learning_rate": 6.92391959798995e-05, - "loss": 5.199, - "step": 31117 - }, - { - "epoch": 16.228422425032594, - "grad_norm": 1.4514802694320679, - "learning_rate": 6.923819095477387e-05, - "loss": 5.7166, - "step": 31118 - }, - { - "epoch": 16.228943937418514, - "grad_norm": 1.459532380104065, - "learning_rate": 6.923718592964824e-05, - "loss": 4.8776, - "step": 31119 - }, - { - "epoch": 16.229465449804433, - "grad_norm": 1.4712469577789307, - "learning_rate": 6.923618090452262e-05, - "loss": 5.151, - "step": 31120 - }, - { - "epoch": 16.229986962190353, - "grad_norm": 1.4706616401672363, - "learning_rate": 6.923517587939698e-05, - "loss": 5.6858, - "step": 31121 - }, - { - "epoch": 16.230508474576272, - "grad_norm": 1.4809606075286865, - "learning_rate": 6.923417085427136e-05, - "loss": 5.3276, - "step": 31122 - }, - { - "epoch": 16.23102998696219, - "grad_norm": 1.5044983625411987, - "learning_rate": 6.923316582914572e-05, - "loss": 5.1247, - "step": 31123 - }, - { - "epoch": 16.23155149934811, - "grad_norm": 1.4824814796447754, - "learning_rate": 6.92321608040201e-05, - "loss": 5.5281, - "step": 31124 - }, - { - "epoch": 16.23207301173403, - "grad_norm": 1.3190925121307373, - "learning_rate": 6.923115577889448e-05, - "loss": 5.4757, - "step": 31125 - }, - { - "epoch": 16.232594524119946, - "grad_norm": 1.4410829544067383, - "learning_rate": 6.923015075376886e-05, - "loss": 5.6442, - "step": 31126 - }, - { - "epoch": 16.233116036505866, - "grad_norm": 1.530422568321228, - "learning_rate": 6.922914572864322e-05, - "loss": 5.4087, - "step": 31127 - }, - { - "epoch": 16.233637548891785, - "grad_norm": 1.4779512882232666, - "learning_rate": 6.92281407035176e-05, - "loss": 5.4477, - "step": 31128 - }, - { - "epoch": 16.234159061277705, - "grad_norm": 1.5835564136505127, - "learning_rate": 6.922713567839196e-05, - "loss": 4.9246, - "step": 31129 - }, - { - "epoch": 16.234680573663624, - "grad_norm": 1.5404410362243652, - "learning_rate": 6.922613065326633e-05, - "loss": 5.1954, - "step": 31130 - }, - { - "epoch": 16.235202086049544, - "grad_norm": 1.4374843835830688, - "learning_rate": 6.92251256281407e-05, - "loss": 5.0901, - "step": 31131 - }, - { - "epoch": 16.235723598435463, - "grad_norm": 1.4760843515396118, - "learning_rate": 6.922412060301507e-05, - "loss": 5.3988, - "step": 31132 - }, - { - "epoch": 16.236245110821383, - "grad_norm": 1.459097146987915, - "learning_rate": 6.922311557788945e-05, - "loss": 5.4148, - "step": 31133 - }, - { - "epoch": 16.236766623207302, - "grad_norm": 1.4868260622024536, - "learning_rate": 6.922211055276382e-05, - "loss": 5.31, - "step": 31134 - }, - { - "epoch": 16.23728813559322, - "grad_norm": 1.4610999822616577, - "learning_rate": 6.92211055276382e-05, - "loss": 5.7052, - "step": 31135 - }, - { - "epoch": 16.23780964797914, - "grad_norm": 1.4065773487091064, - "learning_rate": 6.922010050251257e-05, - "loss": 5.4779, - "step": 31136 - }, - { - "epoch": 16.23833116036506, - "grad_norm": 1.6316365003585815, - "learning_rate": 6.921909547738694e-05, - "loss": 4.9049, - "step": 31137 - }, - { - "epoch": 16.238852672750976, - "grad_norm": 1.4870314598083496, - "learning_rate": 6.921809045226131e-05, - "loss": 5.5419, - "step": 31138 - }, - { - "epoch": 16.239374185136896, - "grad_norm": 1.4268778562545776, - "learning_rate": 6.921708542713569e-05, - "loss": 5.5851, - "step": 31139 - }, - { - "epoch": 16.239895697522815, - "grad_norm": 1.4559993743896484, - "learning_rate": 6.921608040201005e-05, - "loss": 5.1562, - "step": 31140 - }, - { - "epoch": 16.240417209908735, - "grad_norm": 1.4792026281356812, - "learning_rate": 6.921507537688443e-05, - "loss": 5.6098, - "step": 31141 - }, - { - "epoch": 16.240938722294654, - "grad_norm": 1.4512722492218018, - "learning_rate": 6.92140703517588e-05, - "loss": 5.7943, - "step": 31142 - }, - { - "epoch": 16.241460234680574, - "grad_norm": 1.686238169670105, - "learning_rate": 6.921306532663316e-05, - "loss": 4.812, - "step": 31143 - }, - { - "epoch": 16.241981747066493, - "grad_norm": 1.422866702079773, - "learning_rate": 6.921206030150754e-05, - "loss": 5.1444, - "step": 31144 - }, - { - "epoch": 16.242503259452413, - "grad_norm": 1.5243974924087524, - "learning_rate": 6.921105527638191e-05, - "loss": 5.1518, - "step": 31145 - }, - { - "epoch": 16.243024771838332, - "grad_norm": 1.578454613685608, - "learning_rate": 6.921005025125629e-05, - "loss": 5.1257, - "step": 31146 - }, - { - "epoch": 16.24354628422425, - "grad_norm": 1.5294760465621948, - "learning_rate": 6.920904522613066e-05, - "loss": 5.5434, - "step": 31147 - }, - { - "epoch": 16.24406779661017, - "grad_norm": 1.4657998085021973, - "learning_rate": 6.920804020100503e-05, - "loss": 5.3318, - "step": 31148 - }, - { - "epoch": 16.24458930899609, - "grad_norm": 1.4548890590667725, - "learning_rate": 6.92070351758794e-05, - "loss": 5.4532, - "step": 31149 - }, - { - "epoch": 16.245110821382006, - "grad_norm": 1.3977382183074951, - "learning_rate": 6.920603015075378e-05, - "loss": 5.4701, - "step": 31150 - }, - { - "epoch": 16.245632333767926, - "grad_norm": 1.5284249782562256, - "learning_rate": 6.920502512562814e-05, - "loss": 5.183, - "step": 31151 - }, - { - "epoch": 16.246153846153845, - "grad_norm": 1.4206089973449707, - "learning_rate": 6.920402010050252e-05, - "loss": 5.5516, - "step": 31152 - }, - { - "epoch": 16.246675358539765, - "grad_norm": 1.4672527313232422, - "learning_rate": 6.920301507537688e-05, - "loss": 5.457, - "step": 31153 - }, - { - "epoch": 16.247196870925684, - "grad_norm": 1.5014852285385132, - "learning_rate": 6.920201005025126e-05, - "loss": 4.8887, - "step": 31154 - }, - { - "epoch": 16.247718383311604, - "grad_norm": 1.5018209218978882, - "learning_rate": 6.920100502512564e-05, - "loss": 5.0426, - "step": 31155 - }, - { - "epoch": 16.248239895697523, - "grad_norm": 1.445389986038208, - "learning_rate": 6.92e-05, - "loss": 5.4669, - "step": 31156 - }, - { - "epoch": 16.248761408083443, - "grad_norm": 1.5328763723373413, - "learning_rate": 6.919899497487438e-05, - "loss": 5.1019, - "step": 31157 - }, - { - "epoch": 16.249282920469362, - "grad_norm": 1.5564296245574951, - "learning_rate": 6.919798994974874e-05, - "loss": 5.1448, - "step": 31158 - }, - { - "epoch": 16.24980443285528, - "grad_norm": 1.5688992738723755, - "learning_rate": 6.919698492462312e-05, - "loss": 4.8091, - "step": 31159 - }, - { - "epoch": 16.2503259452412, - "grad_norm": 1.3605997562408447, - "learning_rate": 6.919597989949749e-05, - "loss": 5.8587, - "step": 31160 - }, - { - "epoch": 16.250847457627117, - "grad_norm": 1.3417707681655884, - "learning_rate": 6.919497487437186e-05, - "loss": 5.5647, - "step": 31161 - }, - { - "epoch": 16.251368970013036, - "grad_norm": 1.493355393409729, - "learning_rate": 6.919396984924623e-05, - "loss": 5.2846, - "step": 31162 - }, - { - "epoch": 16.251890482398956, - "grad_norm": 1.4408022165298462, - "learning_rate": 6.91929648241206e-05, - "loss": 5.0119, - "step": 31163 - }, - { - "epoch": 16.252411994784875, - "grad_norm": 1.4317432641983032, - "learning_rate": 6.919195979899497e-05, - "loss": 5.2902, - "step": 31164 - }, - { - "epoch": 16.252933507170795, - "grad_norm": 1.4374948740005493, - "learning_rate": 6.919095477386935e-05, - "loss": 5.1085, - "step": 31165 - }, - { - "epoch": 16.253455019556714, - "grad_norm": 1.5186349153518677, - "learning_rate": 6.918994974874373e-05, - "loss": 5.54, - "step": 31166 - }, - { - "epoch": 16.253976531942634, - "grad_norm": 1.492283582687378, - "learning_rate": 6.91889447236181e-05, - "loss": 5.6786, - "step": 31167 - }, - { - "epoch": 16.254498044328553, - "grad_norm": 1.4649333953857422, - "learning_rate": 6.918793969849247e-05, - "loss": 5.3592, - "step": 31168 - }, - { - "epoch": 16.255019556714473, - "grad_norm": 1.4844154119491577, - "learning_rate": 6.918693467336683e-05, - "loss": 5.4314, - "step": 31169 - }, - { - "epoch": 16.255541069100392, - "grad_norm": 1.3289729356765747, - "learning_rate": 6.918592964824121e-05, - "loss": 5.7101, - "step": 31170 - }, - { - "epoch": 16.25606258148631, - "grad_norm": 1.4748516082763672, - "learning_rate": 6.918492462311557e-05, - "loss": 4.9995, - "step": 31171 - }, - { - "epoch": 16.25658409387223, - "grad_norm": 1.4208893775939941, - "learning_rate": 6.918391959798995e-05, - "loss": 5.1643, - "step": 31172 - }, - { - "epoch": 16.257105606258147, - "grad_norm": 1.4513126611709595, - "learning_rate": 6.918291457286432e-05, - "loss": 5.3287, - "step": 31173 - }, - { - "epoch": 16.257627118644066, - "grad_norm": 1.51252281665802, - "learning_rate": 6.91819095477387e-05, - "loss": 5.0338, - "step": 31174 - }, - { - "epoch": 16.258148631029986, - "grad_norm": 1.4612311124801636, - "learning_rate": 6.918090452261307e-05, - "loss": 5.7926, - "step": 31175 - }, - { - "epoch": 16.258670143415905, - "grad_norm": 1.555660367012024, - "learning_rate": 6.917989949748745e-05, - "loss": 4.8385, - "step": 31176 - }, - { - "epoch": 16.259191655801825, - "grad_norm": 1.5507440567016602, - "learning_rate": 6.917889447236181e-05, - "loss": 5.4225, - "step": 31177 - }, - { - "epoch": 16.259713168187744, - "grad_norm": 1.5529522895812988, - "learning_rate": 6.917788944723619e-05, - "loss": 5.5473, - "step": 31178 - }, - { - "epoch": 16.260234680573664, - "grad_norm": 1.522326111793518, - "learning_rate": 6.917688442211056e-05, - "loss": 5.3007, - "step": 31179 - }, - { - "epoch": 16.260756192959583, - "grad_norm": 1.5410088300704956, - "learning_rate": 6.917587939698493e-05, - "loss": 5.436, - "step": 31180 - }, - { - "epoch": 16.261277705345503, - "grad_norm": 1.4564471244812012, - "learning_rate": 6.91748743718593e-05, - "loss": 5.7276, - "step": 31181 - }, - { - "epoch": 16.261799217731422, - "grad_norm": 1.4776140451431274, - "learning_rate": 6.917386934673366e-05, - "loss": 5.5467, - "step": 31182 - }, - { - "epoch": 16.26232073011734, - "grad_norm": 1.4679334163665771, - "learning_rate": 6.917286432160804e-05, - "loss": 5.6026, - "step": 31183 - }, - { - "epoch": 16.26284224250326, - "grad_norm": 1.3723526000976562, - "learning_rate": 6.91718592964824e-05, - "loss": 5.0661, - "step": 31184 - }, - { - "epoch": 16.263363754889177, - "grad_norm": 1.4877872467041016, - "learning_rate": 6.917085427135678e-05, - "loss": 5.3253, - "step": 31185 - }, - { - "epoch": 16.263885267275096, - "grad_norm": 1.551662802696228, - "learning_rate": 6.916984924623116e-05, - "loss": 5.1842, - "step": 31186 - }, - { - "epoch": 16.264406779661016, - "grad_norm": 1.4470540285110474, - "learning_rate": 6.916884422110554e-05, - "loss": 5.4921, - "step": 31187 - }, - { - "epoch": 16.264928292046935, - "grad_norm": 1.4354807138442993, - "learning_rate": 6.91678391959799e-05, - "loss": 5.3626, - "step": 31188 - }, - { - "epoch": 16.265449804432855, - "grad_norm": 1.657882571220398, - "learning_rate": 6.916683417085428e-05, - "loss": 5.0075, - "step": 31189 - }, - { - "epoch": 16.265971316818774, - "grad_norm": 1.4575449228286743, - "learning_rate": 6.916582914572864e-05, - "loss": 5.3677, - "step": 31190 - }, - { - "epoch": 16.266492829204694, - "grad_norm": 1.4526653289794922, - "learning_rate": 6.916482412060302e-05, - "loss": 5.4621, - "step": 31191 - }, - { - "epoch": 16.267014341590613, - "grad_norm": 1.443003535270691, - "learning_rate": 6.916381909547739e-05, - "loss": 5.4653, - "step": 31192 - }, - { - "epoch": 16.267535853976533, - "grad_norm": 1.5030765533447266, - "learning_rate": 6.916281407035176e-05, - "loss": 5.5544, - "step": 31193 - }, - { - "epoch": 16.268057366362452, - "grad_norm": 1.452349066734314, - "learning_rate": 6.916180904522613e-05, - "loss": 5.5443, - "step": 31194 - }, - { - "epoch": 16.26857887874837, - "grad_norm": 1.468144178390503, - "learning_rate": 6.91608040201005e-05, - "loss": 5.5023, - "step": 31195 - }, - { - "epoch": 16.26910039113429, - "grad_norm": 1.3157298564910889, - "learning_rate": 6.915979899497488e-05, - "loss": 5.8094, - "step": 31196 - }, - { - "epoch": 16.269621903520207, - "grad_norm": 1.5338844060897827, - "learning_rate": 6.915879396984925e-05, - "loss": 5.1388, - "step": 31197 - }, - { - "epoch": 16.270143415906126, - "grad_norm": 1.568088412284851, - "learning_rate": 6.915778894472363e-05, - "loss": 5.5713, - "step": 31198 - }, - { - "epoch": 16.270664928292046, - "grad_norm": 1.3681912422180176, - "learning_rate": 6.915678391959799e-05, - "loss": 5.5144, - "step": 31199 - }, - { - "epoch": 16.271186440677965, - "grad_norm": 1.4830693006515503, - "learning_rate": 6.915577889447237e-05, - "loss": 5.324, - "step": 31200 - }, - { - "epoch": 16.271707953063885, - "grad_norm": 1.351847767829895, - "learning_rate": 6.915477386934673e-05, - "loss": 5.706, - "step": 31201 - }, - { - "epoch": 16.272229465449804, - "grad_norm": 1.523917555809021, - "learning_rate": 6.915376884422111e-05, - "loss": 5.4364, - "step": 31202 - }, - { - "epoch": 16.272750977835724, - "grad_norm": 1.4081720113754272, - "learning_rate": 6.915276381909547e-05, - "loss": 5.4004, - "step": 31203 - }, - { - "epoch": 16.273272490221643, - "grad_norm": 1.4545812606811523, - "learning_rate": 6.915175879396985e-05, - "loss": 5.5107, - "step": 31204 - }, - { - "epoch": 16.273794002607563, - "grad_norm": 1.5296237468719482, - "learning_rate": 6.915075376884422e-05, - "loss": 5.1814, - "step": 31205 - }, - { - "epoch": 16.274315514993482, - "grad_norm": 1.4336802959442139, - "learning_rate": 6.91497487437186e-05, - "loss": 5.5067, - "step": 31206 - }, - { - "epoch": 16.2748370273794, - "grad_norm": 1.534881353378296, - "learning_rate": 6.914874371859297e-05, - "loss": 5.2452, - "step": 31207 - }, - { - "epoch": 16.27535853976532, - "grad_norm": 1.3093602657318115, - "learning_rate": 6.914773869346735e-05, - "loss": 4.904, - "step": 31208 - }, - { - "epoch": 16.275880052151237, - "grad_norm": 1.5695298910140991, - "learning_rate": 6.914673366834171e-05, - "loss": 5.1777, - "step": 31209 - }, - { - "epoch": 16.276401564537156, - "grad_norm": 1.4136409759521484, - "learning_rate": 6.914572864321608e-05, - "loss": 5.6982, - "step": 31210 - }, - { - "epoch": 16.276923076923076, - "grad_norm": 1.4662851095199585, - "learning_rate": 6.914472361809046e-05, - "loss": 4.8913, - "step": 31211 - }, - { - "epoch": 16.277444589308995, - "grad_norm": 1.4879060983657837, - "learning_rate": 6.914371859296482e-05, - "loss": 5.2742, - "step": 31212 - }, - { - "epoch": 16.277966101694915, - "grad_norm": 1.578859567642212, - "learning_rate": 6.91427135678392e-05, - "loss": 5.267, - "step": 31213 - }, - { - "epoch": 16.278487614080834, - "grad_norm": 1.5114257335662842, - "learning_rate": 6.914170854271356e-05, - "loss": 4.9727, - "step": 31214 - }, - { - "epoch": 16.279009126466754, - "grad_norm": 1.419581651687622, - "learning_rate": 6.914070351758794e-05, - "loss": 5.7282, - "step": 31215 - }, - { - "epoch": 16.279530638852673, - "grad_norm": 1.5299999713897705, - "learning_rate": 6.913969849246232e-05, - "loss": 4.8939, - "step": 31216 - }, - { - "epoch": 16.280052151238593, - "grad_norm": 1.4603723287582397, - "learning_rate": 6.91386934673367e-05, - "loss": 5.5721, - "step": 31217 - }, - { - "epoch": 16.280573663624512, - "grad_norm": 1.4693877696990967, - "learning_rate": 6.913768844221106e-05, - "loss": 5.5191, - "step": 31218 - }, - { - "epoch": 16.28109517601043, - "grad_norm": 1.5239485502243042, - "learning_rate": 6.913668341708544e-05, - "loss": 4.9307, - "step": 31219 - }, - { - "epoch": 16.28161668839635, - "grad_norm": 1.5082708597183228, - "learning_rate": 6.91356783919598e-05, - "loss": 5.4136, - "step": 31220 - }, - { - "epoch": 16.282138200782267, - "grad_norm": 1.465074062347412, - "learning_rate": 6.913467336683418e-05, - "loss": 5.4136, - "step": 31221 - }, - { - "epoch": 16.282659713168186, - "grad_norm": 1.5279515981674194, - "learning_rate": 6.913366834170855e-05, - "loss": 5.145, - "step": 31222 - }, - { - "epoch": 16.283181225554106, - "grad_norm": 1.454884648323059, - "learning_rate": 6.913266331658291e-05, - "loss": 5.6819, - "step": 31223 - }, - { - "epoch": 16.283702737940025, - "grad_norm": 1.5164965391159058, - "learning_rate": 6.913165829145729e-05, - "loss": 5.4672, - "step": 31224 - }, - { - "epoch": 16.284224250325945, - "grad_norm": 1.3841710090637207, - "learning_rate": 6.913065326633165e-05, - "loss": 5.514, - "step": 31225 - }, - { - "epoch": 16.284745762711864, - "grad_norm": 1.4138258695602417, - "learning_rate": 6.912964824120603e-05, - "loss": 5.7555, - "step": 31226 - }, - { - "epoch": 16.285267275097784, - "grad_norm": 1.4587318897247314, - "learning_rate": 6.912864321608041e-05, - "loss": 5.132, - "step": 31227 - }, - { - "epoch": 16.285788787483703, - "grad_norm": 1.4891409873962402, - "learning_rate": 6.912763819095479e-05, - "loss": 5.5914, - "step": 31228 - }, - { - "epoch": 16.286310299869623, - "grad_norm": 1.4580469131469727, - "learning_rate": 6.912663316582915e-05, - "loss": 5.6618, - "step": 31229 - }, - { - "epoch": 16.286831812255542, - "grad_norm": 1.5582109689712524, - "learning_rate": 6.912562814070353e-05, - "loss": 5.0162, - "step": 31230 - }, - { - "epoch": 16.28735332464146, - "grad_norm": 1.4468975067138672, - "learning_rate": 6.912462311557789e-05, - "loss": 5.4752, - "step": 31231 - }, - { - "epoch": 16.28787483702738, - "grad_norm": 1.5847190618515015, - "learning_rate": 6.912361809045227e-05, - "loss": 4.7837, - "step": 31232 - }, - { - "epoch": 16.288396349413297, - "grad_norm": 1.4403737783432007, - "learning_rate": 6.912261306532663e-05, - "loss": 5.5088, - "step": 31233 - }, - { - "epoch": 16.288917861799217, - "grad_norm": 1.5605820417404175, - "learning_rate": 6.912160804020101e-05, - "loss": 4.9008, - "step": 31234 - }, - { - "epoch": 16.289439374185136, - "grad_norm": 1.5190587043762207, - "learning_rate": 6.912060301507538e-05, - "loss": 5.1531, - "step": 31235 - }, - { - "epoch": 16.289960886571055, - "grad_norm": 1.4247583150863647, - "learning_rate": 6.911959798994975e-05, - "loss": 5.6407, - "step": 31236 - }, - { - "epoch": 16.290482398956975, - "grad_norm": 1.5523450374603271, - "learning_rate": 6.911859296482413e-05, - "loss": 4.8086, - "step": 31237 - }, - { - "epoch": 16.291003911342894, - "grad_norm": 1.5023653507232666, - "learning_rate": 6.91175879396985e-05, - "loss": 5.4157, - "step": 31238 - }, - { - "epoch": 16.291525423728814, - "grad_norm": 1.4596022367477417, - "learning_rate": 6.911658291457287e-05, - "loss": 5.7254, - "step": 31239 - }, - { - "epoch": 16.292046936114733, - "grad_norm": 1.4000180959701538, - "learning_rate": 6.911557788944724e-05, - "loss": 5.6521, - "step": 31240 - }, - { - "epoch": 16.292568448500653, - "grad_norm": 1.400017499923706, - "learning_rate": 6.911457286432162e-05, - "loss": 5.6176, - "step": 31241 - }, - { - "epoch": 16.293089960886572, - "grad_norm": 1.5628305673599243, - "learning_rate": 6.911356783919598e-05, - "loss": 5.4478, - "step": 31242 - }, - { - "epoch": 16.29361147327249, - "grad_norm": 1.424174427986145, - "learning_rate": 6.911256281407036e-05, - "loss": 5.6502, - "step": 31243 - }, - { - "epoch": 16.294132985658408, - "grad_norm": 1.417543649673462, - "learning_rate": 6.911155778894472e-05, - "loss": 5.1974, - "step": 31244 - }, - { - "epoch": 16.294654498044327, - "grad_norm": 1.5960663557052612, - "learning_rate": 6.91105527638191e-05, - "loss": 5.4081, - "step": 31245 - }, - { - "epoch": 16.295176010430247, - "grad_norm": 1.5144634246826172, - "learning_rate": 6.910954773869346e-05, - "loss": 5.348, - "step": 31246 - }, - { - "epoch": 16.295697522816166, - "grad_norm": 1.5649641752243042, - "learning_rate": 6.910854271356784e-05, - "loss": 4.9098, - "step": 31247 - }, - { - "epoch": 16.296219035202085, - "grad_norm": 1.610914945602417, - "learning_rate": 6.910753768844222e-05, - "loss": 5.3447, - "step": 31248 - }, - { - "epoch": 16.296740547588005, - "grad_norm": 1.556652545928955, - "learning_rate": 6.910653266331658e-05, - "loss": 5.228, - "step": 31249 - }, - { - "epoch": 16.297262059973924, - "grad_norm": 1.4710885286331177, - "learning_rate": 6.910552763819096e-05, - "loss": 5.504, - "step": 31250 - }, - { - "epoch": 16.297783572359844, - "grad_norm": 1.5255745649337769, - "learning_rate": 6.910452261306533e-05, - "loss": 5.1473, - "step": 31251 - }, - { - "epoch": 16.298305084745763, - "grad_norm": 1.504607915878296, - "learning_rate": 6.91035175879397e-05, - "loss": 5.1172, - "step": 31252 - }, - { - "epoch": 16.298826597131683, - "grad_norm": 1.646968126296997, - "learning_rate": 6.910251256281407e-05, - "loss": 4.73, - "step": 31253 - }, - { - "epoch": 16.299348109517602, - "grad_norm": 1.4869636297225952, - "learning_rate": 6.910150753768845e-05, - "loss": 5.5766, - "step": 31254 - }, - { - "epoch": 16.29986962190352, - "grad_norm": 1.406297206878662, - "learning_rate": 6.910050251256281e-05, - "loss": 4.8736, - "step": 31255 - }, - { - "epoch": 16.300391134289438, - "grad_norm": 1.5708647966384888, - "learning_rate": 6.909949748743719e-05, - "loss": 4.6274, - "step": 31256 - }, - { - "epoch": 16.300912646675357, - "grad_norm": 1.5810109376907349, - "learning_rate": 6.909849246231157e-05, - "loss": 5.0635, - "step": 31257 - }, - { - "epoch": 16.301434159061277, - "grad_norm": 1.518337607383728, - "learning_rate": 6.909748743718594e-05, - "loss": 5.4153, - "step": 31258 - }, - { - "epoch": 16.301955671447196, - "grad_norm": 1.465943694114685, - "learning_rate": 6.909648241206031e-05, - "loss": 5.4488, - "step": 31259 - }, - { - "epoch": 16.302477183833116, - "grad_norm": 1.3215973377227783, - "learning_rate": 6.909547738693469e-05, - "loss": 5.5549, - "step": 31260 - }, - { - "epoch": 16.302998696219035, - "grad_norm": 1.3173540830612183, - "learning_rate": 6.909447236180905e-05, - "loss": 5.701, - "step": 31261 - }, - { - "epoch": 16.303520208604954, - "grad_norm": 1.62455415725708, - "learning_rate": 6.909346733668341e-05, - "loss": 4.9435, - "step": 31262 - }, - { - "epoch": 16.304041720990874, - "grad_norm": 1.5001991987228394, - "learning_rate": 6.909246231155779e-05, - "loss": 5.3616, - "step": 31263 - }, - { - "epoch": 16.304563233376793, - "grad_norm": 1.4514966011047363, - "learning_rate": 6.909145728643216e-05, - "loss": 5.4493, - "step": 31264 - }, - { - "epoch": 16.305084745762713, - "grad_norm": 1.5461254119873047, - "learning_rate": 6.909045226130653e-05, - "loss": 5.6012, - "step": 31265 - }, - { - "epoch": 16.305606258148632, - "grad_norm": 1.4280036687850952, - "learning_rate": 6.90894472361809e-05, - "loss": 5.0724, - "step": 31266 - }, - { - "epoch": 16.30612777053455, - "grad_norm": 1.4334359169006348, - "learning_rate": 6.908844221105528e-05, - "loss": 5.3279, - "step": 31267 - }, - { - "epoch": 16.306649282920468, - "grad_norm": 1.395806908607483, - "learning_rate": 6.908743718592965e-05, - "loss": 5.7179, - "step": 31268 - }, - { - "epoch": 16.307170795306387, - "grad_norm": 1.4503413438796997, - "learning_rate": 6.908643216080403e-05, - "loss": 5.7819, - "step": 31269 - }, - { - "epoch": 16.307692307692307, - "grad_norm": 1.54376220703125, - "learning_rate": 6.90854271356784e-05, - "loss": 5.4849, - "step": 31270 - }, - { - "epoch": 16.308213820078226, - "grad_norm": 1.4756830930709839, - "learning_rate": 6.908442211055277e-05, - "loss": 5.4708, - "step": 31271 - }, - { - "epoch": 16.308735332464146, - "grad_norm": 1.4799803495407104, - "learning_rate": 6.908341708542714e-05, - "loss": 5.1967, - "step": 31272 - }, - { - "epoch": 16.309256844850065, - "grad_norm": 1.5036073923110962, - "learning_rate": 6.908241206030152e-05, - "loss": 5.4543, - "step": 31273 - }, - { - "epoch": 16.309778357235984, - "grad_norm": 1.4551055431365967, - "learning_rate": 6.908140703517588e-05, - "loss": 5.3086, - "step": 31274 - }, - { - "epoch": 16.310299869621904, - "grad_norm": 1.4543179273605347, - "learning_rate": 6.908040201005024e-05, - "loss": 5.5706, - "step": 31275 - }, - { - "epoch": 16.310821382007823, - "grad_norm": 1.4630993604660034, - "learning_rate": 6.907939698492462e-05, - "loss": 5.1712, - "step": 31276 - }, - { - "epoch": 16.311342894393743, - "grad_norm": 1.4701939821243286, - "learning_rate": 6.9078391959799e-05, - "loss": 5.3825, - "step": 31277 - }, - { - "epoch": 16.311864406779662, - "grad_norm": 1.4726046323776245, - "learning_rate": 6.907738693467338e-05, - "loss": 5.0443, - "step": 31278 - }, - { - "epoch": 16.312385919165582, - "grad_norm": 1.4095540046691895, - "learning_rate": 6.907638190954774e-05, - "loss": 5.4287, - "step": 31279 - }, - { - "epoch": 16.312907431551498, - "grad_norm": 1.4833619594573975, - "learning_rate": 6.907537688442212e-05, - "loss": 5.4447, - "step": 31280 - }, - { - "epoch": 16.313428943937417, - "grad_norm": 1.5654155015945435, - "learning_rate": 6.907437185929648e-05, - "loss": 5.6097, - "step": 31281 - }, - { - "epoch": 16.313950456323337, - "grad_norm": 1.5312641859054565, - "learning_rate": 6.907336683417086e-05, - "loss": 5.4514, - "step": 31282 - }, - { - "epoch": 16.314471968709256, - "grad_norm": 1.559386134147644, - "learning_rate": 6.907236180904523e-05, - "loss": 5.6064, - "step": 31283 - }, - { - "epoch": 16.314993481095176, - "grad_norm": 1.537150502204895, - "learning_rate": 6.90713567839196e-05, - "loss": 5.331, - "step": 31284 - }, - { - "epoch": 16.315514993481095, - "grad_norm": 1.4087860584259033, - "learning_rate": 6.907035175879397e-05, - "loss": 5.5754, - "step": 31285 - }, - { - "epoch": 16.316036505867014, - "grad_norm": 1.439221739768982, - "learning_rate": 6.906934673366835e-05, - "loss": 5.5437, - "step": 31286 - }, - { - "epoch": 16.316558018252934, - "grad_norm": 1.4157744646072388, - "learning_rate": 6.906834170854271e-05, - "loss": 5.1712, - "step": 31287 - }, - { - "epoch": 16.317079530638853, - "grad_norm": 1.4544053077697754, - "learning_rate": 6.906733668341709e-05, - "loss": 5.7976, - "step": 31288 - }, - { - "epoch": 16.317601043024773, - "grad_norm": 1.5816882848739624, - "learning_rate": 6.906633165829147e-05, - "loss": 4.9377, - "step": 31289 - }, - { - "epoch": 16.318122555410692, - "grad_norm": 1.54128897190094, - "learning_rate": 6.906532663316583e-05, - "loss": 4.9852, - "step": 31290 - }, - { - "epoch": 16.318644067796612, - "grad_norm": 1.5138211250305176, - "learning_rate": 6.906432160804021e-05, - "loss": 5.5167, - "step": 31291 - }, - { - "epoch": 16.319165580182528, - "grad_norm": 1.4980460405349731, - "learning_rate": 6.906331658291457e-05, - "loss": 5.3663, - "step": 31292 - }, - { - "epoch": 16.319687092568447, - "grad_norm": 1.435627818107605, - "learning_rate": 6.906231155778895e-05, - "loss": 5.0088, - "step": 31293 - }, - { - "epoch": 16.320208604954367, - "grad_norm": 1.512392282485962, - "learning_rate": 6.906130653266332e-05, - "loss": 4.8489, - "step": 31294 - }, - { - "epoch": 16.320730117340286, - "grad_norm": 1.5622304677963257, - "learning_rate": 6.906030150753769e-05, - "loss": 5.0977, - "step": 31295 - }, - { - "epoch": 16.321251629726206, - "grad_norm": 1.5381481647491455, - "learning_rate": 6.905929648241206e-05, - "loss": 5.0898, - "step": 31296 - }, - { - "epoch": 16.321773142112125, - "grad_norm": 1.4878348112106323, - "learning_rate": 6.905829145728644e-05, - "loss": 5.0534, - "step": 31297 - }, - { - "epoch": 16.322294654498045, - "grad_norm": 1.5919172763824463, - "learning_rate": 6.90572864321608e-05, - "loss": 5.2938, - "step": 31298 - }, - { - "epoch": 16.322816166883964, - "grad_norm": 1.5337930917739868, - "learning_rate": 6.905628140703518e-05, - "loss": 5.3141, - "step": 31299 - }, - { - "epoch": 16.323337679269883, - "grad_norm": 1.4267339706420898, - "learning_rate": 6.905527638190955e-05, - "loss": 5.647, - "step": 31300 - }, - { - "epoch": 16.323859191655803, - "grad_norm": 1.506076455116272, - "learning_rate": 6.905427135678393e-05, - "loss": 5.4333, - "step": 31301 - }, - { - "epoch": 16.324380704041722, - "grad_norm": 1.7269736528396606, - "learning_rate": 6.90532663316583e-05, - "loss": 4.8798, - "step": 31302 - }, - { - "epoch": 16.324902216427642, - "grad_norm": 1.5383551120758057, - "learning_rate": 6.905226130653266e-05, - "loss": 5.0544, - "step": 31303 - }, - { - "epoch": 16.325423728813558, - "grad_norm": 1.5923889875411987, - "learning_rate": 6.905125628140704e-05, - "loss": 5.4424, - "step": 31304 - }, - { - "epoch": 16.325945241199477, - "grad_norm": 1.5872708559036255, - "learning_rate": 6.90502512562814e-05, - "loss": 4.9055, - "step": 31305 - }, - { - "epoch": 16.326466753585397, - "grad_norm": 1.8498812913894653, - "learning_rate": 6.904924623115578e-05, - "loss": 5.4021, - "step": 31306 - }, - { - "epoch": 16.326988265971316, - "grad_norm": 1.6337701082229614, - "learning_rate": 6.904824120603015e-05, - "loss": 4.8566, - "step": 31307 - }, - { - "epoch": 16.327509778357236, - "grad_norm": 1.5288203954696655, - "learning_rate": 6.904723618090452e-05, - "loss": 5.6325, - "step": 31308 - }, - { - "epoch": 16.328031290743155, - "grad_norm": 1.472981572151184, - "learning_rate": 6.90462311557789e-05, - "loss": 5.6517, - "step": 31309 - }, - { - "epoch": 16.328552803129075, - "grad_norm": 1.5649245977401733, - "learning_rate": 6.904522613065328e-05, - "loss": 5.0182, - "step": 31310 - }, - { - "epoch": 16.329074315514994, - "grad_norm": 1.5460681915283203, - "learning_rate": 6.904422110552764e-05, - "loss": 5.1401, - "step": 31311 - }, - { - "epoch": 16.329595827900913, - "grad_norm": 1.409224033355713, - "learning_rate": 6.904321608040202e-05, - "loss": 5.8093, - "step": 31312 - }, - { - "epoch": 16.330117340286833, - "grad_norm": 1.4421751499176025, - "learning_rate": 6.904221105527639e-05, - "loss": 5.4908, - "step": 31313 - }, - { - "epoch": 16.330638852672752, - "grad_norm": 1.496168613433838, - "learning_rate": 6.904120603015076e-05, - "loss": 5.613, - "step": 31314 - }, - { - "epoch": 16.331160365058672, - "grad_norm": 1.5203732252120972, - "learning_rate": 6.904020100502513e-05, - "loss": 5.1162, - "step": 31315 - }, - { - "epoch": 16.331681877444588, - "grad_norm": 1.4174896478652954, - "learning_rate": 6.903919597989949e-05, - "loss": 5.0389, - "step": 31316 - }, - { - "epoch": 16.332203389830507, - "grad_norm": 1.4883846044540405, - "learning_rate": 6.903819095477387e-05, - "loss": 5.0027, - "step": 31317 - }, - { - "epoch": 16.332724902216427, - "grad_norm": 1.4776694774627686, - "learning_rate": 6.903718592964823e-05, - "loss": 5.7521, - "step": 31318 - }, - { - "epoch": 16.333246414602346, - "grad_norm": 1.4567378759384155, - "learning_rate": 6.903618090452261e-05, - "loss": 5.216, - "step": 31319 - }, - { - "epoch": 16.333767926988266, - "grad_norm": 1.4200201034545898, - "learning_rate": 6.903517587939699e-05, - "loss": 5.473, - "step": 31320 - }, - { - "epoch": 16.334289439374185, - "grad_norm": 1.6450248956680298, - "learning_rate": 6.903417085427137e-05, - "loss": 4.9807, - "step": 31321 - }, - { - "epoch": 16.334810951760105, - "grad_norm": 1.4071166515350342, - "learning_rate": 6.903316582914573e-05, - "loss": 5.7406, - "step": 31322 - }, - { - "epoch": 16.335332464146024, - "grad_norm": 1.5345059633255005, - "learning_rate": 6.903216080402011e-05, - "loss": 5.189, - "step": 31323 - }, - { - "epoch": 16.335853976531943, - "grad_norm": 1.4464019536972046, - "learning_rate": 6.903115577889447e-05, - "loss": 5.6992, - "step": 31324 - }, - { - "epoch": 16.336375488917863, - "grad_norm": 1.4235539436340332, - "learning_rate": 6.903015075376885e-05, - "loss": 5.3245, - "step": 31325 - }, - { - "epoch": 16.336897001303782, - "grad_norm": 1.42299485206604, - "learning_rate": 6.902914572864322e-05, - "loss": 5.8712, - "step": 31326 - }, - { - "epoch": 16.3374185136897, - "grad_norm": 1.4761815071105957, - "learning_rate": 6.90281407035176e-05, - "loss": 5.8271, - "step": 31327 - }, - { - "epoch": 16.337940026075618, - "grad_norm": 1.4862767457962036, - "learning_rate": 6.902713567839196e-05, - "loss": 5.2627, - "step": 31328 - }, - { - "epoch": 16.338461538461537, - "grad_norm": 1.4116549491882324, - "learning_rate": 6.902613065326634e-05, - "loss": 5.4658, - "step": 31329 - }, - { - "epoch": 16.338983050847457, - "grad_norm": 1.4406287670135498, - "learning_rate": 6.902512562814071e-05, - "loss": 4.6741, - "step": 31330 - }, - { - "epoch": 16.339504563233376, - "grad_norm": 1.5698314905166626, - "learning_rate": 6.902412060301508e-05, - "loss": 4.7017, - "step": 31331 - }, - { - "epoch": 16.340026075619296, - "grad_norm": 1.514410138130188, - "learning_rate": 6.902311557788946e-05, - "loss": 5.5212, - "step": 31332 - }, - { - "epoch": 16.340547588005215, - "grad_norm": 1.41310715675354, - "learning_rate": 6.902211055276382e-05, - "loss": 5.3261, - "step": 31333 - }, - { - "epoch": 16.341069100391135, - "grad_norm": 1.525736689567566, - "learning_rate": 6.90211055276382e-05, - "loss": 5.2288, - "step": 31334 - }, - { - "epoch": 16.341590612777054, - "grad_norm": 1.5990067720413208, - "learning_rate": 6.902010050251256e-05, - "loss": 5.1003, - "step": 31335 - }, - { - "epoch": 16.342112125162974, - "grad_norm": 1.425395131111145, - "learning_rate": 6.901909547738694e-05, - "loss": 5.7655, - "step": 31336 - }, - { - "epoch": 16.342633637548893, - "grad_norm": 1.4302016496658325, - "learning_rate": 6.90180904522613e-05, - "loss": 5.1695, - "step": 31337 - }, - { - "epoch": 16.343155149934812, - "grad_norm": 1.4791518449783325, - "learning_rate": 6.901708542713568e-05, - "loss": 5.1392, - "step": 31338 - }, - { - "epoch": 16.343676662320732, - "grad_norm": 1.5140976905822754, - "learning_rate": 6.901608040201005e-05, - "loss": 5.4317, - "step": 31339 - }, - { - "epoch": 16.344198174706648, - "grad_norm": 1.5036382675170898, - "learning_rate": 6.901507537688442e-05, - "loss": 5.6927, - "step": 31340 - }, - { - "epoch": 16.344719687092567, - "grad_norm": 1.4393466711044312, - "learning_rate": 6.90140703517588e-05, - "loss": 5.6112, - "step": 31341 - }, - { - "epoch": 16.345241199478487, - "grad_norm": 1.5401616096496582, - "learning_rate": 6.901306532663317e-05, - "loss": 5.3054, - "step": 31342 - }, - { - "epoch": 16.345762711864406, - "grad_norm": 1.4413738250732422, - "learning_rate": 6.901206030150754e-05, - "loss": 5.1656, - "step": 31343 - }, - { - "epoch": 16.346284224250326, - "grad_norm": 1.5150539875030518, - "learning_rate": 6.901105527638191e-05, - "loss": 5.4422, - "step": 31344 - }, - { - "epoch": 16.346805736636245, - "grad_norm": 1.4767959117889404, - "learning_rate": 6.901005025125629e-05, - "loss": 5.6114, - "step": 31345 - }, - { - "epoch": 16.347327249022165, - "grad_norm": 1.6474640369415283, - "learning_rate": 6.900904522613065e-05, - "loss": 4.8659, - "step": 31346 - }, - { - "epoch": 16.347848761408084, - "grad_norm": 1.4681963920593262, - "learning_rate": 6.900804020100503e-05, - "loss": 5.3085, - "step": 31347 - }, - { - "epoch": 16.348370273794004, - "grad_norm": 1.511553406715393, - "learning_rate": 6.900703517587939e-05, - "loss": 5.3556, - "step": 31348 - }, - { - "epoch": 16.348891786179923, - "grad_norm": 1.6291661262512207, - "learning_rate": 6.900603015075377e-05, - "loss": 5.2552, - "step": 31349 - }, - { - "epoch": 16.349413298565842, - "grad_norm": 1.399874210357666, - "learning_rate": 6.900502512562815e-05, - "loss": 5.3926, - "step": 31350 - }, - { - "epoch": 16.34993481095176, - "grad_norm": 1.4337115287780762, - "learning_rate": 6.900402010050253e-05, - "loss": 5.3546, - "step": 31351 - }, - { - "epoch": 16.350456323337678, - "grad_norm": 1.5041087865829468, - "learning_rate": 6.900301507537689e-05, - "loss": 5.4248, - "step": 31352 - }, - { - "epoch": 16.350977835723597, - "grad_norm": 1.4396296739578247, - "learning_rate": 6.900201005025127e-05, - "loss": 5.4866, - "step": 31353 - }, - { - "epoch": 16.351499348109517, - "grad_norm": 1.4640523195266724, - "learning_rate": 6.900100502512563e-05, - "loss": 5.456, - "step": 31354 - }, - { - "epoch": 16.352020860495436, - "grad_norm": 1.6403312683105469, - "learning_rate": 6.9e-05, - "loss": 4.4875, - "step": 31355 - }, - { - "epoch": 16.352542372881356, - "grad_norm": 1.3589069843292236, - "learning_rate": 6.899899497487437e-05, - "loss": 5.5492, - "step": 31356 - }, - { - "epoch": 16.353063885267275, - "grad_norm": 1.6231597661972046, - "learning_rate": 6.899798994974874e-05, - "loss": 5.5151, - "step": 31357 - }, - { - "epoch": 16.353585397653195, - "grad_norm": 1.5152119398117065, - "learning_rate": 6.899698492462312e-05, - "loss": 4.7883, - "step": 31358 - }, - { - "epoch": 16.354106910039114, - "grad_norm": 1.4809434413909912, - "learning_rate": 6.899597989949748e-05, - "loss": 5.3221, - "step": 31359 - }, - { - "epoch": 16.354628422425034, - "grad_norm": 1.5151695013046265, - "learning_rate": 6.899497487437186e-05, - "loss": 5.2874, - "step": 31360 - }, - { - "epoch": 16.355149934810953, - "grad_norm": 1.4297206401824951, - "learning_rate": 6.899396984924624e-05, - "loss": 5.456, - "step": 31361 - }, - { - "epoch": 16.355671447196872, - "grad_norm": 1.4868309497833252, - "learning_rate": 6.899296482412061e-05, - "loss": 5.1897, - "step": 31362 - }, - { - "epoch": 16.35619295958279, - "grad_norm": 1.5012940168380737, - "learning_rate": 6.899195979899498e-05, - "loss": 5.2518, - "step": 31363 - }, - { - "epoch": 16.356714471968708, - "grad_norm": 1.4325059652328491, - "learning_rate": 6.899095477386936e-05, - "loss": 4.8565, - "step": 31364 - }, - { - "epoch": 16.357235984354627, - "grad_norm": 1.4928745031356812, - "learning_rate": 6.898994974874372e-05, - "loss": 5.5845, - "step": 31365 - }, - { - "epoch": 16.357757496740547, - "grad_norm": 1.5800449848175049, - "learning_rate": 6.89889447236181e-05, - "loss": 5.0335, - "step": 31366 - }, - { - "epoch": 16.358279009126466, - "grad_norm": 1.435771107673645, - "learning_rate": 6.898793969849246e-05, - "loss": 5.558, - "step": 31367 - }, - { - "epoch": 16.358800521512386, - "grad_norm": 1.590396523475647, - "learning_rate": 6.898693467336683e-05, - "loss": 5.4759, - "step": 31368 - }, - { - "epoch": 16.359322033898305, - "grad_norm": 1.5736823081970215, - "learning_rate": 6.89859296482412e-05, - "loss": 5.4333, - "step": 31369 - }, - { - "epoch": 16.359843546284225, - "grad_norm": 1.4675753116607666, - "learning_rate": 6.898492462311558e-05, - "loss": 5.6642, - "step": 31370 - }, - { - "epoch": 16.360365058670144, - "grad_norm": 1.4114046096801758, - "learning_rate": 6.898391959798996e-05, - "loss": 5.7537, - "step": 31371 - }, - { - "epoch": 16.360886571056064, - "grad_norm": 1.5001496076583862, - "learning_rate": 6.898291457286432e-05, - "loss": 5.4254, - "step": 31372 - }, - { - "epoch": 16.361408083441983, - "grad_norm": 1.5954521894454956, - "learning_rate": 6.89819095477387e-05, - "loss": 5.1979, - "step": 31373 - }, - { - "epoch": 16.361929595827903, - "grad_norm": 1.5034642219543457, - "learning_rate": 6.898090452261307e-05, - "loss": 5.6631, - "step": 31374 - }, - { - "epoch": 16.36245110821382, - "grad_norm": 1.5352011919021606, - "learning_rate": 6.897989949748744e-05, - "loss": 5.3832, - "step": 31375 - }, - { - "epoch": 16.362972620599738, - "grad_norm": 1.393612265586853, - "learning_rate": 6.897889447236181e-05, - "loss": 5.7706, - "step": 31376 - }, - { - "epoch": 16.363494132985657, - "grad_norm": 1.3504966497421265, - "learning_rate": 6.897788944723619e-05, - "loss": 5.1577, - "step": 31377 - }, - { - "epoch": 16.364015645371577, - "grad_norm": 1.4589227437973022, - "learning_rate": 6.897688442211055e-05, - "loss": 5.7165, - "step": 31378 - }, - { - "epoch": 16.364537157757496, - "grad_norm": 1.5199185609817505, - "learning_rate": 6.897587939698493e-05, - "loss": 5.3611, - "step": 31379 - }, - { - "epoch": 16.365058670143416, - "grad_norm": 1.5511219501495361, - "learning_rate": 6.89748743718593e-05, - "loss": 5.7015, - "step": 31380 - }, - { - "epoch": 16.365580182529335, - "grad_norm": 1.473868727684021, - "learning_rate": 6.897386934673367e-05, - "loss": 5.1109, - "step": 31381 - }, - { - "epoch": 16.366101694915255, - "grad_norm": 1.3872687816619873, - "learning_rate": 6.897286432160805e-05, - "loss": 5.7459, - "step": 31382 - }, - { - "epoch": 16.366623207301174, - "grad_norm": 1.4107608795166016, - "learning_rate": 6.897185929648241e-05, - "loss": 5.5284, - "step": 31383 - }, - { - "epoch": 16.367144719687094, - "grad_norm": 1.5353628396987915, - "learning_rate": 6.897085427135679e-05, - "loss": 5.1862, - "step": 31384 - }, - { - "epoch": 16.367666232073013, - "grad_norm": 1.4989163875579834, - "learning_rate": 6.896984924623116e-05, - "loss": 5.3022, - "step": 31385 - }, - { - "epoch": 16.368187744458933, - "grad_norm": 1.5068252086639404, - "learning_rate": 6.896884422110553e-05, - "loss": 5.3046, - "step": 31386 - }, - { - "epoch": 16.36870925684485, - "grad_norm": 1.4152882099151611, - "learning_rate": 6.89678391959799e-05, - "loss": 5.537, - "step": 31387 - }, - { - "epoch": 16.369230769230768, - "grad_norm": 1.4673343896865845, - "learning_rate": 6.896683417085428e-05, - "loss": 5.4425, - "step": 31388 - }, - { - "epoch": 16.369752281616687, - "grad_norm": 1.686995506286621, - "learning_rate": 6.896582914572864e-05, - "loss": 5.5093, - "step": 31389 - }, - { - "epoch": 16.370273794002607, - "grad_norm": 1.5616475343704224, - "learning_rate": 6.896482412060302e-05, - "loss": 5.2094, - "step": 31390 - }, - { - "epoch": 16.370795306388526, - "grad_norm": 1.412637710571289, - "learning_rate": 6.89638190954774e-05, - "loss": 5.6569, - "step": 31391 - }, - { - "epoch": 16.371316818774446, - "grad_norm": 1.6278115510940552, - "learning_rate": 6.896281407035177e-05, - "loss": 5.1612, - "step": 31392 - }, - { - "epoch": 16.371838331160365, - "grad_norm": 1.5247102975845337, - "learning_rate": 6.896180904522614e-05, - "loss": 4.6558, - "step": 31393 - }, - { - "epoch": 16.372359843546285, - "grad_norm": 1.4950047731399536, - "learning_rate": 6.896080402010052e-05, - "loss": 4.9089, - "step": 31394 - }, - { - "epoch": 16.372881355932204, - "grad_norm": 1.4651354551315308, - "learning_rate": 6.895979899497488e-05, - "loss": 5.3474, - "step": 31395 - }, - { - "epoch": 16.373402868318124, - "grad_norm": 1.5753048658370972, - "learning_rate": 6.895879396984924e-05, - "loss": 5.4624, - "step": 31396 - }, - { - "epoch": 16.373924380704043, - "grad_norm": 1.5409475564956665, - "learning_rate": 6.895778894472362e-05, - "loss": 5.441, - "step": 31397 - }, - { - "epoch": 16.374445893089963, - "grad_norm": 1.583217740058899, - "learning_rate": 6.895678391959799e-05, - "loss": 5.0521, - "step": 31398 - }, - { - "epoch": 16.37496740547588, - "grad_norm": 1.517371654510498, - "learning_rate": 6.895577889447236e-05, - "loss": 5.1928, - "step": 31399 - }, - { - "epoch": 16.375488917861798, - "grad_norm": 1.4381568431854248, - "learning_rate": 6.895477386934673e-05, - "loss": 5.4591, - "step": 31400 - }, - { - "epoch": 16.376010430247717, - "grad_norm": 1.605342149734497, - "learning_rate": 6.89537688442211e-05, - "loss": 5.3103, - "step": 31401 - }, - { - "epoch": 16.376531942633637, - "grad_norm": 1.516343355178833, - "learning_rate": 6.895276381909548e-05, - "loss": 5.1973, - "step": 31402 - }, - { - "epoch": 16.377053455019556, - "grad_norm": 1.3715795278549194, - "learning_rate": 6.895175879396986e-05, - "loss": 5.7206, - "step": 31403 - }, - { - "epoch": 16.377574967405476, - "grad_norm": 1.5293593406677246, - "learning_rate": 6.895075376884423e-05, - "loss": 5.1948, - "step": 31404 - }, - { - "epoch": 16.378096479791395, - "grad_norm": 1.4066822528839111, - "learning_rate": 6.89497487437186e-05, - "loss": 5.4395, - "step": 31405 - }, - { - "epoch": 16.378617992177315, - "grad_norm": 1.5539097785949707, - "learning_rate": 6.894874371859297e-05, - "loss": 5.1586, - "step": 31406 - }, - { - "epoch": 16.379139504563234, - "grad_norm": 1.551988124847412, - "learning_rate": 6.894773869346735e-05, - "loss": 5.2223, - "step": 31407 - }, - { - "epoch": 16.379661016949154, - "grad_norm": 1.5352592468261719, - "learning_rate": 6.894673366834171e-05, - "loss": 5.1946, - "step": 31408 - }, - { - "epoch": 16.380182529335073, - "grad_norm": 1.414577841758728, - "learning_rate": 6.894572864321607e-05, - "loss": 5.5527, - "step": 31409 - }, - { - "epoch": 16.380704041720993, - "grad_norm": 1.4834299087524414, - "learning_rate": 6.894472361809045e-05, - "loss": 5.0849, - "step": 31410 - }, - { - "epoch": 16.38122555410691, - "grad_norm": 1.4784510135650635, - "learning_rate": 6.894371859296483e-05, - "loss": 5.3561, - "step": 31411 - }, - { - "epoch": 16.381747066492828, - "grad_norm": 1.4112548828125, - "learning_rate": 6.894271356783921e-05, - "loss": 5.5563, - "step": 31412 - }, - { - "epoch": 16.382268578878747, - "grad_norm": 1.5239983797073364, - "learning_rate": 6.894170854271357e-05, - "loss": 5.0105, - "step": 31413 - }, - { - "epoch": 16.382790091264667, - "grad_norm": 1.468925952911377, - "learning_rate": 6.894070351758795e-05, - "loss": 4.8571, - "step": 31414 - }, - { - "epoch": 16.383311603650586, - "grad_norm": 1.5784952640533447, - "learning_rate": 6.893969849246231e-05, - "loss": 5.5561, - "step": 31415 - }, - { - "epoch": 16.383833116036506, - "grad_norm": 1.476928472518921, - "learning_rate": 6.893869346733669e-05, - "loss": 5.7645, - "step": 31416 - }, - { - "epoch": 16.384354628422425, - "grad_norm": 1.6360756158828735, - "learning_rate": 6.893768844221106e-05, - "loss": 5.4363, - "step": 31417 - }, - { - "epoch": 16.384876140808345, - "grad_norm": 1.6301159858703613, - "learning_rate": 6.893668341708543e-05, - "loss": 4.9735, - "step": 31418 - }, - { - "epoch": 16.385397653194264, - "grad_norm": 1.6102248430252075, - "learning_rate": 6.89356783919598e-05, - "loss": 4.9493, - "step": 31419 - }, - { - "epoch": 16.385919165580184, - "grad_norm": 1.5386136770248413, - "learning_rate": 6.893467336683418e-05, - "loss": 5.6489, - "step": 31420 - }, - { - "epoch": 16.386440677966103, - "grad_norm": 1.2797887325286865, - "learning_rate": 6.893366834170854e-05, - "loss": 5.309, - "step": 31421 - }, - { - "epoch": 16.386962190352023, - "grad_norm": 1.583454966545105, - "learning_rate": 6.893266331658292e-05, - "loss": 4.9988, - "step": 31422 - }, - { - "epoch": 16.38748370273794, - "grad_norm": 1.712294340133667, - "learning_rate": 6.89316582914573e-05, - "loss": 4.8986, - "step": 31423 - }, - { - "epoch": 16.388005215123858, - "grad_norm": 1.5195013284683228, - "learning_rate": 6.893065326633166e-05, - "loss": 5.0471, - "step": 31424 - }, - { - "epoch": 16.388526727509777, - "grad_norm": 1.3878997564315796, - "learning_rate": 6.892964824120604e-05, - "loss": 5.5395, - "step": 31425 - }, - { - "epoch": 16.389048239895697, - "grad_norm": 1.3800928592681885, - "learning_rate": 6.89286432160804e-05, - "loss": 5.6013, - "step": 31426 - }, - { - "epoch": 16.389569752281616, - "grad_norm": 1.5995478630065918, - "learning_rate": 6.892763819095478e-05, - "loss": 4.9784, - "step": 31427 - }, - { - "epoch": 16.390091264667536, - "grad_norm": 1.3562729358673096, - "learning_rate": 6.892663316582914e-05, - "loss": 5.0309, - "step": 31428 - }, - { - "epoch": 16.390612777053455, - "grad_norm": 1.5492995977401733, - "learning_rate": 6.892562814070352e-05, - "loss": 5.2819, - "step": 31429 - }, - { - "epoch": 16.391134289439375, - "grad_norm": 1.4751415252685547, - "learning_rate": 6.892462311557789e-05, - "loss": 5.2379, - "step": 31430 - }, - { - "epoch": 16.391655801825294, - "grad_norm": 1.4896831512451172, - "learning_rate": 6.892361809045226e-05, - "loss": 5.256, - "step": 31431 - }, - { - "epoch": 16.392177314211214, - "grad_norm": 1.4806556701660156, - "learning_rate": 6.892261306532664e-05, - "loss": 5.7211, - "step": 31432 - }, - { - "epoch": 16.392698826597133, - "grad_norm": 1.4367865324020386, - "learning_rate": 6.892160804020102e-05, - "loss": 5.3799, - "step": 31433 - }, - { - "epoch": 16.39322033898305, - "grad_norm": 1.418871521949768, - "learning_rate": 6.892060301507538e-05, - "loss": 5.7837, - "step": 31434 - }, - { - "epoch": 16.39374185136897, - "grad_norm": 1.6299155950546265, - "learning_rate": 6.891959798994975e-05, - "loss": 4.8882, - "step": 31435 - }, - { - "epoch": 16.394263363754888, - "grad_norm": 1.4075229167938232, - "learning_rate": 6.891859296482413e-05, - "loss": 5.7941, - "step": 31436 - }, - { - "epoch": 16.394784876140807, - "grad_norm": 1.3940610885620117, - "learning_rate": 6.891758793969849e-05, - "loss": 5.6929, - "step": 31437 - }, - { - "epoch": 16.395306388526727, - "grad_norm": 1.3852134943008423, - "learning_rate": 6.891658291457287e-05, - "loss": 5.4437, - "step": 31438 - }, - { - "epoch": 16.395827900912646, - "grad_norm": 1.3431754112243652, - "learning_rate": 6.891557788944723e-05, - "loss": 5.7947, - "step": 31439 - }, - { - "epoch": 16.396349413298566, - "grad_norm": 1.6359868049621582, - "learning_rate": 6.891457286432161e-05, - "loss": 5.1503, - "step": 31440 - }, - { - "epoch": 16.396870925684485, - "grad_norm": 1.4573370218276978, - "learning_rate": 6.891356783919597e-05, - "loss": 5.7116, - "step": 31441 - }, - { - "epoch": 16.397392438070405, - "grad_norm": 1.5254086256027222, - "learning_rate": 6.891256281407035e-05, - "loss": 4.6615, - "step": 31442 - }, - { - "epoch": 16.397913950456324, - "grad_norm": 1.5613716840744019, - "learning_rate": 6.891155778894473e-05, - "loss": 5.4738, - "step": 31443 - }, - { - "epoch": 16.398435462842244, - "grad_norm": 1.6009904146194458, - "learning_rate": 6.891055276381911e-05, - "loss": 4.7937, - "step": 31444 - }, - { - "epoch": 16.398956975228163, - "grad_norm": 1.5517284870147705, - "learning_rate": 6.890954773869347e-05, - "loss": 4.9301, - "step": 31445 - }, - { - "epoch": 16.39947848761408, - "grad_norm": 1.5141171216964722, - "learning_rate": 6.890854271356785e-05, - "loss": 4.9411, - "step": 31446 - }, - { - "epoch": 16.4, - "grad_norm": 1.571169137954712, - "learning_rate": 6.890753768844221e-05, - "loss": 5.3504, - "step": 31447 - }, - { - "epoch": 16.400521512385918, - "grad_norm": 1.451866626739502, - "learning_rate": 6.890653266331658e-05, - "loss": 5.5038, - "step": 31448 - }, - { - "epoch": 16.401043024771838, - "grad_norm": 1.521067500114441, - "learning_rate": 6.890552763819096e-05, - "loss": 5.4683, - "step": 31449 - }, - { - "epoch": 16.401564537157757, - "grad_norm": 1.4772270917892456, - "learning_rate": 6.890452261306532e-05, - "loss": 5.6837, - "step": 31450 - }, - { - "epoch": 16.402086049543676, - "grad_norm": 1.4776654243469238, - "learning_rate": 6.89035175879397e-05, - "loss": 5.5615, - "step": 31451 - }, - { - "epoch": 16.402607561929596, - "grad_norm": 1.711431622505188, - "learning_rate": 6.890251256281406e-05, - "loss": 5.1658, - "step": 31452 - }, - { - "epoch": 16.403129074315515, - "grad_norm": 1.5539606809616089, - "learning_rate": 6.890150753768844e-05, - "loss": 5.3862, - "step": 31453 - }, - { - "epoch": 16.403650586701435, - "grad_norm": 1.3859986066818237, - "learning_rate": 6.890050251256282e-05, - "loss": 5.5677, - "step": 31454 - }, - { - "epoch": 16.404172099087354, - "grad_norm": 1.4754540920257568, - "learning_rate": 6.88994974874372e-05, - "loss": 5.6371, - "step": 31455 - }, - { - "epoch": 16.404693611473274, - "grad_norm": 1.5024789571762085, - "learning_rate": 6.889849246231156e-05, - "loss": 5.5614, - "step": 31456 - }, - { - "epoch": 16.405215123859193, - "grad_norm": 1.418412446975708, - "learning_rate": 6.889748743718594e-05, - "loss": 5.2999, - "step": 31457 - }, - { - "epoch": 16.40573663624511, - "grad_norm": 1.7404556274414062, - "learning_rate": 6.88964824120603e-05, - "loss": 5.1911, - "step": 31458 - }, - { - "epoch": 16.40625814863103, - "grad_norm": 1.6103993654251099, - "learning_rate": 6.889547738693468e-05, - "loss": 5.0801, - "step": 31459 - }, - { - "epoch": 16.406779661016948, - "grad_norm": 1.5212409496307373, - "learning_rate": 6.889447236180905e-05, - "loss": 5.2465, - "step": 31460 - }, - { - "epoch": 16.407301173402868, - "grad_norm": 1.6127387285232544, - "learning_rate": 6.889346733668341e-05, - "loss": 4.6567, - "step": 31461 - }, - { - "epoch": 16.407822685788787, - "grad_norm": 1.4135453701019287, - "learning_rate": 6.889246231155779e-05, - "loss": 5.605, - "step": 31462 - }, - { - "epoch": 16.408344198174706, - "grad_norm": 1.6409058570861816, - "learning_rate": 6.889145728643217e-05, - "loss": 5.256, - "step": 31463 - }, - { - "epoch": 16.408865710560626, - "grad_norm": 1.487801432609558, - "learning_rate": 6.889045226130654e-05, - "loss": 5.2905, - "step": 31464 - }, - { - "epoch": 16.409387222946545, - "grad_norm": 1.4383563995361328, - "learning_rate": 6.888944723618091e-05, - "loss": 5.1185, - "step": 31465 - }, - { - "epoch": 16.409908735332465, - "grad_norm": 1.4105162620544434, - "learning_rate": 6.888844221105529e-05, - "loss": 5.7692, - "step": 31466 - }, - { - "epoch": 16.410430247718384, - "grad_norm": 1.4209829568862915, - "learning_rate": 6.888743718592965e-05, - "loss": 5.6434, - "step": 31467 - }, - { - "epoch": 16.410951760104304, - "grad_norm": 1.5452382564544678, - "learning_rate": 6.888643216080403e-05, - "loss": 5.3587, - "step": 31468 - }, - { - "epoch": 16.411473272490223, - "grad_norm": 1.4586119651794434, - "learning_rate": 6.888542713567839e-05, - "loss": 5.1039, - "step": 31469 - }, - { - "epoch": 16.41199478487614, - "grad_norm": 1.6741055250167847, - "learning_rate": 6.888442211055277e-05, - "loss": 5.4133, - "step": 31470 - }, - { - "epoch": 16.41251629726206, - "grad_norm": 1.5768665075302124, - "learning_rate": 6.888341708542713e-05, - "loss": 4.9325, - "step": 31471 - }, - { - "epoch": 16.413037809647978, - "grad_norm": 1.415107011795044, - "learning_rate": 6.888241206030151e-05, - "loss": 5.8905, - "step": 31472 - }, - { - "epoch": 16.413559322033898, - "grad_norm": 1.3954713344573975, - "learning_rate": 6.888140703517588e-05, - "loss": 5.6566, - "step": 31473 - }, - { - "epoch": 16.414080834419817, - "grad_norm": 1.372789978981018, - "learning_rate": 6.888040201005025e-05, - "loss": 5.2211, - "step": 31474 - }, - { - "epoch": 16.414602346805736, - "grad_norm": 1.552462100982666, - "learning_rate": 6.887939698492463e-05, - "loss": 5.346, - "step": 31475 - }, - { - "epoch": 16.415123859191656, - "grad_norm": 1.5293842554092407, - "learning_rate": 6.8878391959799e-05, - "loss": 5.4013, - "step": 31476 - }, - { - "epoch": 16.415645371577575, - "grad_norm": 1.515419602394104, - "learning_rate": 6.887738693467337e-05, - "loss": 5.5721, - "step": 31477 - }, - { - "epoch": 16.416166883963495, - "grad_norm": 1.4344475269317627, - "learning_rate": 6.887638190954774e-05, - "loss": 5.0241, - "step": 31478 - }, - { - "epoch": 16.416688396349414, - "grad_norm": 1.5662590265274048, - "learning_rate": 6.887537688442212e-05, - "loss": 5.1152, - "step": 31479 - }, - { - "epoch": 16.417209908735334, - "grad_norm": 1.4616304636001587, - "learning_rate": 6.887437185929648e-05, - "loss": 5.1677, - "step": 31480 - }, - { - "epoch": 16.417731421121253, - "grad_norm": 1.4107712507247925, - "learning_rate": 6.887336683417086e-05, - "loss": 5.4475, - "step": 31481 - }, - { - "epoch": 16.41825293350717, - "grad_norm": 1.4588507413864136, - "learning_rate": 6.887236180904522e-05, - "loss": 5.5389, - "step": 31482 - }, - { - "epoch": 16.41877444589309, - "grad_norm": 1.4740833044052124, - "learning_rate": 6.88713567839196e-05, - "loss": 5.6419, - "step": 31483 - }, - { - "epoch": 16.419295958279008, - "grad_norm": 1.5671931505203247, - "learning_rate": 6.887035175879398e-05, - "loss": 5.5535, - "step": 31484 - }, - { - "epoch": 16.419817470664928, - "grad_norm": 1.4411901235580444, - "learning_rate": 6.886934673366836e-05, - "loss": 5.3024, - "step": 31485 - }, - { - "epoch": 16.420338983050847, - "grad_norm": 1.4808135032653809, - "learning_rate": 6.886834170854272e-05, - "loss": 5.4568, - "step": 31486 - }, - { - "epoch": 16.420860495436767, - "grad_norm": 1.4360417127609253, - "learning_rate": 6.88673366834171e-05, - "loss": 5.414, - "step": 31487 - }, - { - "epoch": 16.421382007822686, - "grad_norm": 1.5471729040145874, - "learning_rate": 6.886633165829146e-05, - "loss": 5.4119, - "step": 31488 - }, - { - "epoch": 16.421903520208605, - "grad_norm": 1.4682108163833618, - "learning_rate": 6.886532663316583e-05, - "loss": 5.5782, - "step": 31489 - }, - { - "epoch": 16.422425032594525, - "grad_norm": 1.5334874391555786, - "learning_rate": 6.88643216080402e-05, - "loss": 5.3761, - "step": 31490 - }, - { - "epoch": 16.422946544980444, - "grad_norm": 1.433405876159668, - "learning_rate": 6.886331658291457e-05, - "loss": 5.3645, - "step": 31491 - }, - { - "epoch": 16.423468057366364, - "grad_norm": 1.5292413234710693, - "learning_rate": 6.886231155778895e-05, - "loss": 4.7656, - "step": 31492 - }, - { - "epoch": 16.423989569752283, - "grad_norm": 1.3334449529647827, - "learning_rate": 6.886130653266331e-05, - "loss": 4.7488, - "step": 31493 - }, - { - "epoch": 16.4245110821382, - "grad_norm": 1.5337753295898438, - "learning_rate": 6.886030150753769e-05, - "loss": 5.2678, - "step": 31494 - }, - { - "epoch": 16.42503259452412, - "grad_norm": 1.4876986742019653, - "learning_rate": 6.885929648241207e-05, - "loss": 5.0128, - "step": 31495 - }, - { - "epoch": 16.425554106910038, - "grad_norm": 1.3829442262649536, - "learning_rate": 6.885829145728644e-05, - "loss": 5.0891, - "step": 31496 - }, - { - "epoch": 16.426075619295958, - "grad_norm": 1.4518061876296997, - "learning_rate": 6.885728643216081e-05, - "loss": 5.3715, - "step": 31497 - }, - { - "epoch": 16.426597131681877, - "grad_norm": 1.4462257623672485, - "learning_rate": 6.885628140703519e-05, - "loss": 5.269, - "step": 31498 - }, - { - "epoch": 16.427118644067797, - "grad_norm": 1.5157294273376465, - "learning_rate": 6.885527638190955e-05, - "loss": 5.0394, - "step": 31499 - }, - { - "epoch": 16.427640156453716, - "grad_norm": 1.5038143396377563, - "learning_rate": 6.885427135678393e-05, - "loss": 5.3398, - "step": 31500 - }, - { - "epoch": 16.428161668839635, - "grad_norm": 1.4919135570526123, - "learning_rate": 6.885326633165829e-05, - "loss": 5.6225, - "step": 31501 - }, - { - "epoch": 16.428683181225555, - "grad_norm": 1.521572470664978, - "learning_rate": 6.885226130653266e-05, - "loss": 5.0184, - "step": 31502 - }, - { - "epoch": 16.429204693611474, - "grad_norm": 1.44117271900177, - "learning_rate": 6.885125628140703e-05, - "loss": 5.6783, - "step": 31503 - }, - { - "epoch": 16.429726205997394, - "grad_norm": 1.4048638343811035, - "learning_rate": 6.885025125628141e-05, - "loss": 5.509, - "step": 31504 - }, - { - "epoch": 16.430247718383313, - "grad_norm": 1.4751418828964233, - "learning_rate": 6.884924623115579e-05, - "loss": 5.6158, - "step": 31505 - }, - { - "epoch": 16.43076923076923, - "grad_norm": 1.458517074584961, - "learning_rate": 6.884824120603015e-05, - "loss": 5.2798, - "step": 31506 - }, - { - "epoch": 16.43129074315515, - "grad_norm": 1.4607861042022705, - "learning_rate": 6.884723618090453e-05, - "loss": 5.2851, - "step": 31507 - }, - { - "epoch": 16.431812255541068, - "grad_norm": 1.5317684412002563, - "learning_rate": 6.88462311557789e-05, - "loss": 5.2471, - "step": 31508 - }, - { - "epoch": 16.432333767926988, - "grad_norm": 1.369711995124817, - "learning_rate": 6.884522613065327e-05, - "loss": 5.4488, - "step": 31509 - }, - { - "epoch": 16.432855280312907, - "grad_norm": 1.482417106628418, - "learning_rate": 6.884422110552764e-05, - "loss": 5.3076, - "step": 31510 - }, - { - "epoch": 16.433376792698827, - "grad_norm": 1.4428211450576782, - "learning_rate": 6.884321608040202e-05, - "loss": 5.5823, - "step": 31511 - }, - { - "epoch": 16.433898305084746, - "grad_norm": 1.3518507480621338, - "learning_rate": 6.884221105527638e-05, - "loss": 5.1812, - "step": 31512 - }, - { - "epoch": 16.434419817470665, - "grad_norm": 1.415927529335022, - "learning_rate": 6.884120603015076e-05, - "loss": 5.2734, - "step": 31513 - }, - { - "epoch": 16.434941329856585, - "grad_norm": 1.5061949491500854, - "learning_rate": 6.884020100502512e-05, - "loss": 5.3903, - "step": 31514 - }, - { - "epoch": 16.435462842242504, - "grad_norm": 1.4957412481307983, - "learning_rate": 6.88391959798995e-05, - "loss": 5.3112, - "step": 31515 - }, - { - "epoch": 16.435984354628424, - "grad_norm": 1.5200008153915405, - "learning_rate": 6.883819095477388e-05, - "loss": 4.8825, - "step": 31516 - }, - { - "epoch": 16.43650586701434, - "grad_norm": 1.4909648895263672, - "learning_rate": 6.883718592964824e-05, - "loss": 5.3189, - "step": 31517 - }, - { - "epoch": 16.43702737940026, - "grad_norm": 1.4690457582473755, - "learning_rate": 6.883618090452262e-05, - "loss": 5.1718, - "step": 31518 - }, - { - "epoch": 16.43754889178618, - "grad_norm": 1.480153203010559, - "learning_rate": 6.883517587939698e-05, - "loss": 5.2545, - "step": 31519 - }, - { - "epoch": 16.438070404172098, - "grad_norm": 1.497173547744751, - "learning_rate": 6.883417085427136e-05, - "loss": 5.3692, - "step": 31520 - }, - { - "epoch": 16.438591916558018, - "grad_norm": 1.5277981758117676, - "learning_rate": 6.883316582914573e-05, - "loss": 5.4073, - "step": 31521 - }, - { - "epoch": 16.439113428943937, - "grad_norm": 1.4134913682937622, - "learning_rate": 6.88321608040201e-05, - "loss": 5.6578, - "step": 31522 - }, - { - "epoch": 16.439634941329857, - "grad_norm": 1.5068495273590088, - "learning_rate": 6.883115577889447e-05, - "loss": 5.1101, - "step": 31523 - }, - { - "epoch": 16.440156453715776, - "grad_norm": 1.4709407091140747, - "learning_rate": 6.883015075376885e-05, - "loss": 5.037, - "step": 31524 - }, - { - "epoch": 16.440677966101696, - "grad_norm": 1.849890947341919, - "learning_rate": 6.882914572864322e-05, - "loss": 5.1029, - "step": 31525 - }, - { - "epoch": 16.441199478487615, - "grad_norm": 1.775009036064148, - "learning_rate": 6.88281407035176e-05, - "loss": 5.2951, - "step": 31526 - }, - { - "epoch": 16.441720990873534, - "grad_norm": 1.4482699632644653, - "learning_rate": 6.882713567839197e-05, - "loss": 5.2993, - "step": 31527 - }, - { - "epoch": 16.442242503259454, - "grad_norm": 1.449783205986023, - "learning_rate": 6.882613065326633e-05, - "loss": 5.3403, - "step": 31528 - }, - { - "epoch": 16.442764015645373, - "grad_norm": 1.3784775733947754, - "learning_rate": 6.882512562814071e-05, - "loss": 5.4974, - "step": 31529 - }, - { - "epoch": 16.44328552803129, - "grad_norm": 1.4608654975891113, - "learning_rate": 6.882412060301507e-05, - "loss": 5.0796, - "step": 31530 - }, - { - "epoch": 16.44380704041721, - "grad_norm": 1.6479482650756836, - "learning_rate": 6.882311557788945e-05, - "loss": 4.964, - "step": 31531 - }, - { - "epoch": 16.444328552803128, - "grad_norm": 1.5592666864395142, - "learning_rate": 6.882211055276382e-05, - "loss": 5.2746, - "step": 31532 - }, - { - "epoch": 16.444850065189048, - "grad_norm": 1.4192198514938354, - "learning_rate": 6.882110552763819e-05, - "loss": 5.6051, - "step": 31533 - }, - { - "epoch": 16.445371577574967, - "grad_norm": 1.4706310033798218, - "learning_rate": 6.882010050251256e-05, - "loss": 5.3567, - "step": 31534 - }, - { - "epoch": 16.445893089960887, - "grad_norm": 1.5065410137176514, - "learning_rate": 6.881909547738694e-05, - "loss": 4.942, - "step": 31535 - }, - { - "epoch": 16.446414602346806, - "grad_norm": 1.901411533355713, - "learning_rate": 6.881809045226131e-05, - "loss": 5.5669, - "step": 31536 - }, - { - "epoch": 16.446936114732726, - "grad_norm": 1.504901647567749, - "learning_rate": 6.881708542713569e-05, - "loss": 5.6176, - "step": 31537 - }, - { - "epoch": 16.447457627118645, - "grad_norm": 1.476576566696167, - "learning_rate": 6.881608040201005e-05, - "loss": 5.4429, - "step": 31538 - }, - { - "epoch": 16.447979139504564, - "grad_norm": 1.427633285522461, - "learning_rate": 6.881507537688443e-05, - "loss": 5.0558, - "step": 31539 - }, - { - "epoch": 16.448500651890484, - "grad_norm": 1.6396561861038208, - "learning_rate": 6.88140703517588e-05, - "loss": 5.291, - "step": 31540 - }, - { - "epoch": 16.4490221642764, - "grad_norm": 1.4416791200637817, - "learning_rate": 6.881306532663316e-05, - "loss": 5.5062, - "step": 31541 - }, - { - "epoch": 16.44954367666232, - "grad_norm": 1.4100072383880615, - "learning_rate": 6.881206030150754e-05, - "loss": 5.4616, - "step": 31542 - }, - { - "epoch": 16.45006518904824, - "grad_norm": 1.4648839235305786, - "learning_rate": 6.88110552763819e-05, - "loss": 5.6051, - "step": 31543 - }, - { - "epoch": 16.45058670143416, - "grad_norm": 1.5893884897232056, - "learning_rate": 6.881005025125628e-05, - "loss": 5.4, - "step": 31544 - }, - { - "epoch": 16.451108213820078, - "grad_norm": 1.5761593580245972, - "learning_rate": 6.880904522613066e-05, - "loss": 5.6205, - "step": 31545 - }, - { - "epoch": 16.451629726205997, - "grad_norm": 1.454721450805664, - "learning_rate": 6.880804020100504e-05, - "loss": 5.6521, - "step": 31546 - }, - { - "epoch": 16.452151238591917, - "grad_norm": 1.478990077972412, - "learning_rate": 6.88070351758794e-05, - "loss": 5.4025, - "step": 31547 - }, - { - "epoch": 16.452672750977836, - "grad_norm": 1.4893959760665894, - "learning_rate": 6.880603015075378e-05, - "loss": 5.2702, - "step": 31548 - }, - { - "epoch": 16.453194263363756, - "grad_norm": 1.5743579864501953, - "learning_rate": 6.880502512562814e-05, - "loss": 5.5533, - "step": 31549 - }, - { - "epoch": 16.453715775749675, - "grad_norm": 1.4575482606887817, - "learning_rate": 6.880402010050252e-05, - "loss": 5.2077, - "step": 31550 - }, - { - "epoch": 16.454237288135594, - "grad_norm": 1.4635769128799438, - "learning_rate": 6.880301507537689e-05, - "loss": 5.4693, - "step": 31551 - }, - { - "epoch": 16.454758800521514, - "grad_norm": 1.5621908903121948, - "learning_rate": 6.880201005025126e-05, - "loss": 5.3861, - "step": 31552 - }, - { - "epoch": 16.45528031290743, - "grad_norm": 1.5089327096939087, - "learning_rate": 6.880100502512563e-05, - "loss": 4.9017, - "step": 31553 - }, - { - "epoch": 16.45580182529335, - "grad_norm": 1.6257036924362183, - "learning_rate": 6.879999999999999e-05, - "loss": 4.569, - "step": 31554 - }, - { - "epoch": 16.45632333767927, - "grad_norm": 1.484552025794983, - "learning_rate": 6.879899497487437e-05, - "loss": 5.1339, - "step": 31555 - }, - { - "epoch": 16.45684485006519, - "grad_norm": 1.5543324947357178, - "learning_rate": 6.879798994974875e-05, - "loss": 4.6298, - "step": 31556 - }, - { - "epoch": 16.457366362451108, - "grad_norm": 1.6917449235916138, - "learning_rate": 6.879698492462313e-05, - "loss": 5.2793, - "step": 31557 - }, - { - "epoch": 16.457887874837027, - "grad_norm": 1.4682215452194214, - "learning_rate": 6.879597989949749e-05, - "loss": 5.5908, - "step": 31558 - }, - { - "epoch": 16.458409387222947, - "grad_norm": 1.4689205884933472, - "learning_rate": 6.879497487437187e-05, - "loss": 5.5199, - "step": 31559 - }, - { - "epoch": 16.458930899608866, - "grad_norm": 1.529730200767517, - "learning_rate": 6.879396984924623e-05, - "loss": 5.353, - "step": 31560 - }, - { - "epoch": 16.459452411994786, - "grad_norm": 1.4763551950454712, - "learning_rate": 6.879296482412061e-05, - "loss": 4.8555, - "step": 31561 - }, - { - "epoch": 16.459973924380705, - "grad_norm": 1.4406198263168335, - "learning_rate": 6.879195979899497e-05, - "loss": 5.6944, - "step": 31562 - }, - { - "epoch": 16.460495436766625, - "grad_norm": 1.5634723901748657, - "learning_rate": 6.879095477386935e-05, - "loss": 5.3357, - "step": 31563 - }, - { - "epoch": 16.461016949152544, - "grad_norm": 1.5407304763793945, - "learning_rate": 6.878994974874372e-05, - "loss": 4.6626, - "step": 31564 - }, - { - "epoch": 16.46153846153846, - "grad_norm": 1.4920364618301392, - "learning_rate": 6.87889447236181e-05, - "loss": 5.1402, - "step": 31565 - }, - { - "epoch": 16.46205997392438, - "grad_norm": 1.4621562957763672, - "learning_rate": 6.878793969849247e-05, - "loss": 5.7839, - "step": 31566 - }, - { - "epoch": 16.4625814863103, - "grad_norm": 1.511030912399292, - "learning_rate": 6.878693467336685e-05, - "loss": 4.8457, - "step": 31567 - }, - { - "epoch": 16.46310299869622, - "grad_norm": 1.4316483736038208, - "learning_rate": 6.878592964824121e-05, - "loss": 5.3682, - "step": 31568 - }, - { - "epoch": 16.463624511082138, - "grad_norm": 1.436259150505066, - "learning_rate": 6.878492462311558e-05, - "loss": 5.5691, - "step": 31569 - }, - { - "epoch": 16.464146023468057, - "grad_norm": 1.4188871383666992, - "learning_rate": 6.878391959798996e-05, - "loss": 5.322, - "step": 31570 - }, - { - "epoch": 16.464667535853977, - "grad_norm": 1.469642162322998, - "learning_rate": 6.878291457286432e-05, - "loss": 5.3124, - "step": 31571 - }, - { - "epoch": 16.465189048239896, - "grad_norm": 1.3788864612579346, - "learning_rate": 6.87819095477387e-05, - "loss": 5.4108, - "step": 31572 - }, - { - "epoch": 16.465710560625816, - "grad_norm": 1.413347601890564, - "learning_rate": 6.878090452261306e-05, - "loss": 5.392, - "step": 31573 - }, - { - "epoch": 16.466232073011735, - "grad_norm": 1.5033146142959595, - "learning_rate": 6.877989949748744e-05, - "loss": 5.6446, - "step": 31574 - }, - { - "epoch": 16.466753585397655, - "grad_norm": 1.4008598327636719, - "learning_rate": 6.87788944723618e-05, - "loss": 5.468, - "step": 31575 - }, - { - "epoch": 16.467275097783574, - "grad_norm": 1.3982207775115967, - "learning_rate": 6.877788944723618e-05, - "loss": 5.7116, - "step": 31576 - }, - { - "epoch": 16.46779661016949, - "grad_norm": 1.687286376953125, - "learning_rate": 6.877688442211056e-05, - "loss": 4.5928, - "step": 31577 - }, - { - "epoch": 16.46831812255541, - "grad_norm": 1.5650418996810913, - "learning_rate": 6.877587939698494e-05, - "loss": 4.7744, - "step": 31578 - }, - { - "epoch": 16.46883963494133, - "grad_norm": 1.534508466720581, - "learning_rate": 6.87748743718593e-05, - "loss": 5.6012, - "step": 31579 - }, - { - "epoch": 16.46936114732725, - "grad_norm": 1.5389155149459839, - "learning_rate": 6.877386934673368e-05, - "loss": 5.225, - "step": 31580 - }, - { - "epoch": 16.469882659713168, - "grad_norm": 1.5734031200408936, - "learning_rate": 6.877286432160804e-05, - "loss": 4.7741, - "step": 31581 - }, - { - "epoch": 16.470404172099087, - "grad_norm": 1.4974159002304077, - "learning_rate": 6.877185929648241e-05, - "loss": 5.8797, - "step": 31582 - }, - { - "epoch": 16.470925684485007, - "grad_norm": 1.5481483936309814, - "learning_rate": 6.877085427135679e-05, - "loss": 5.3109, - "step": 31583 - }, - { - "epoch": 16.471447196870926, - "grad_norm": 1.4955310821533203, - "learning_rate": 6.876984924623115e-05, - "loss": 4.8503, - "step": 31584 - }, - { - "epoch": 16.471968709256846, - "grad_norm": 1.4631552696228027, - "learning_rate": 6.876884422110553e-05, - "loss": 4.8486, - "step": 31585 - }, - { - "epoch": 16.472490221642765, - "grad_norm": 1.4453177452087402, - "learning_rate": 6.87678391959799e-05, - "loss": 5.6215, - "step": 31586 - }, - { - "epoch": 16.473011734028685, - "grad_norm": 1.4452402591705322, - "learning_rate": 6.876683417085428e-05, - "loss": 5.3852, - "step": 31587 - }, - { - "epoch": 16.473533246414604, - "grad_norm": 1.63565993309021, - "learning_rate": 6.876582914572865e-05, - "loss": 5.0481, - "step": 31588 - }, - { - "epoch": 16.47405475880052, - "grad_norm": 1.3847382068634033, - "learning_rate": 6.876482412060303e-05, - "loss": 5.4186, - "step": 31589 - }, - { - "epoch": 16.47457627118644, - "grad_norm": 1.3949429988861084, - "learning_rate": 6.876381909547739e-05, - "loss": 5.5296, - "step": 31590 - }, - { - "epoch": 16.47509778357236, - "grad_norm": 1.39295494556427, - "learning_rate": 6.876281407035177e-05, - "loss": 5.3235, - "step": 31591 - }, - { - "epoch": 16.47561929595828, - "grad_norm": 1.4669668674468994, - "learning_rate": 6.876180904522613e-05, - "loss": 5.5396, - "step": 31592 - }, - { - "epoch": 16.476140808344198, - "grad_norm": 1.4244694709777832, - "learning_rate": 6.876080402010051e-05, - "loss": 5.7812, - "step": 31593 - }, - { - "epoch": 16.476662320730117, - "grad_norm": 1.4643081426620483, - "learning_rate": 6.875979899497487e-05, - "loss": 5.0858, - "step": 31594 - }, - { - "epoch": 16.477183833116037, - "grad_norm": 1.4949363470077515, - "learning_rate": 6.875879396984924e-05, - "loss": 5.53, - "step": 31595 - }, - { - "epoch": 16.477705345501956, - "grad_norm": 1.5006904602050781, - "learning_rate": 6.875778894472362e-05, - "loss": 5.5472, - "step": 31596 - }, - { - "epoch": 16.478226857887876, - "grad_norm": 1.476503610610962, - "learning_rate": 6.8756783919598e-05, - "loss": 5.2905, - "step": 31597 - }, - { - "epoch": 16.478748370273795, - "grad_norm": 1.4219965934753418, - "learning_rate": 6.875577889447237e-05, - "loss": 5.1907, - "step": 31598 - }, - { - "epoch": 16.479269882659715, - "grad_norm": 1.3973084688186646, - "learning_rate": 6.875477386934674e-05, - "loss": 5.7347, - "step": 31599 - }, - { - "epoch": 16.479791395045634, - "grad_norm": 1.47309410572052, - "learning_rate": 6.875376884422111e-05, - "loss": 5.6656, - "step": 31600 - }, - { - "epoch": 16.48031290743155, - "grad_norm": 1.490279197692871, - "learning_rate": 6.875276381909548e-05, - "loss": 5.2151, - "step": 31601 - }, - { - "epoch": 16.48083441981747, - "grad_norm": 1.5514237880706787, - "learning_rate": 6.875175879396986e-05, - "loss": 5.463, - "step": 31602 - }, - { - "epoch": 16.48135593220339, - "grad_norm": 1.5942668914794922, - "learning_rate": 6.875075376884422e-05, - "loss": 5.5689, - "step": 31603 - }, - { - "epoch": 16.48187744458931, - "grad_norm": 1.45650053024292, - "learning_rate": 6.87497487437186e-05, - "loss": 5.5334, - "step": 31604 - }, - { - "epoch": 16.482398956975228, - "grad_norm": 1.5800390243530273, - "learning_rate": 6.874874371859296e-05, - "loss": 4.9292, - "step": 31605 - }, - { - "epoch": 16.482920469361147, - "grad_norm": 1.4555641412734985, - "learning_rate": 6.874773869346734e-05, - "loss": 5.7411, - "step": 31606 - }, - { - "epoch": 16.483441981747067, - "grad_norm": 1.4024330377578735, - "learning_rate": 6.874673366834172e-05, - "loss": 5.8458, - "step": 31607 - }, - { - "epoch": 16.483963494132986, - "grad_norm": 1.5250940322875977, - "learning_rate": 6.874572864321608e-05, - "loss": 5.4737, - "step": 31608 - }, - { - "epoch": 16.484485006518906, - "grad_norm": 1.3987886905670166, - "learning_rate": 6.874472361809046e-05, - "loss": 5.6416, - "step": 31609 - }, - { - "epoch": 16.485006518904825, - "grad_norm": 1.3914990425109863, - "learning_rate": 6.874371859296482e-05, - "loss": 5.5346, - "step": 31610 - }, - { - "epoch": 16.485528031290745, - "grad_norm": 1.4400948286056519, - "learning_rate": 6.87427135678392e-05, - "loss": 5.4628, - "step": 31611 - }, - { - "epoch": 16.486049543676664, - "grad_norm": 1.6058839559555054, - "learning_rate": 6.874170854271357e-05, - "loss": 5.2398, - "step": 31612 - }, - { - "epoch": 16.48657105606258, - "grad_norm": 1.4019441604614258, - "learning_rate": 6.874070351758794e-05, - "loss": 5.2063, - "step": 31613 - }, - { - "epoch": 16.4870925684485, - "grad_norm": 1.5777907371520996, - "learning_rate": 6.873969849246231e-05, - "loss": 5.6346, - "step": 31614 - }, - { - "epoch": 16.48761408083442, - "grad_norm": 1.4229921102523804, - "learning_rate": 6.873869346733669e-05, - "loss": 5.1604, - "step": 31615 - }, - { - "epoch": 16.48813559322034, - "grad_norm": 1.5164068937301636, - "learning_rate": 6.873768844221105e-05, - "loss": 5.2713, - "step": 31616 - }, - { - "epoch": 16.488657105606258, - "grad_norm": 1.4925905466079712, - "learning_rate": 6.873668341708543e-05, - "loss": 5.5712, - "step": 31617 - }, - { - "epoch": 16.489178617992177, - "grad_norm": 1.559501051902771, - "learning_rate": 6.873567839195981e-05, - "loss": 5.3, - "step": 31618 - }, - { - "epoch": 16.489700130378097, - "grad_norm": 1.5119789838790894, - "learning_rate": 6.873467336683418e-05, - "loss": 4.8141, - "step": 31619 - }, - { - "epoch": 16.490221642764016, - "grad_norm": 1.4569865465164185, - "learning_rate": 6.873366834170855e-05, - "loss": 5.4556, - "step": 31620 - }, - { - "epoch": 16.490743155149936, - "grad_norm": 1.5367742776870728, - "learning_rate": 6.873266331658291e-05, - "loss": 5.4558, - "step": 31621 - }, - { - "epoch": 16.491264667535855, - "grad_norm": 1.5353158712387085, - "learning_rate": 6.873165829145729e-05, - "loss": 5.5721, - "step": 31622 - }, - { - "epoch": 16.491786179921775, - "grad_norm": 1.468582034111023, - "learning_rate": 6.873065326633166e-05, - "loss": 5.6102, - "step": 31623 - }, - { - "epoch": 16.49230769230769, - "grad_norm": 1.4470860958099365, - "learning_rate": 6.872964824120603e-05, - "loss": 5.4036, - "step": 31624 - }, - { - "epoch": 16.49282920469361, - "grad_norm": 1.4472681283950806, - "learning_rate": 6.87286432160804e-05, - "loss": 5.1613, - "step": 31625 - }, - { - "epoch": 16.49335071707953, - "grad_norm": 1.4498213529586792, - "learning_rate": 6.872763819095478e-05, - "loss": 5.4749, - "step": 31626 - }, - { - "epoch": 16.49387222946545, - "grad_norm": 1.4506962299346924, - "learning_rate": 6.872663316582914e-05, - "loss": 5.349, - "step": 31627 - }, - { - "epoch": 16.49439374185137, - "grad_norm": 1.6514379978179932, - "learning_rate": 6.872562814070352e-05, - "loss": 5.1324, - "step": 31628 - }, - { - "epoch": 16.494915254237288, - "grad_norm": 1.5837290287017822, - "learning_rate": 6.87246231155779e-05, - "loss": 5.0866, - "step": 31629 - }, - { - "epoch": 16.495436766623207, - "grad_norm": 1.587170958518982, - "learning_rate": 6.872361809045227e-05, - "loss": 5.3105, - "step": 31630 - }, - { - "epoch": 16.495958279009127, - "grad_norm": 1.4991482496261597, - "learning_rate": 6.872261306532664e-05, - "loss": 5.7244, - "step": 31631 - }, - { - "epoch": 16.496479791395046, - "grad_norm": 1.4571247100830078, - "learning_rate": 6.872160804020102e-05, - "loss": 5.2473, - "step": 31632 - }, - { - "epoch": 16.497001303780966, - "grad_norm": 1.486035943031311, - "learning_rate": 6.872060301507538e-05, - "loss": 5.7817, - "step": 31633 - }, - { - "epoch": 16.497522816166885, - "grad_norm": 1.519545555114746, - "learning_rate": 6.871959798994974e-05, - "loss": 5.2991, - "step": 31634 - }, - { - "epoch": 16.498044328552805, - "grad_norm": 1.5607945919036865, - "learning_rate": 6.871859296482412e-05, - "loss": 5.5814, - "step": 31635 - }, - { - "epoch": 16.49856584093872, - "grad_norm": 1.6242576837539673, - "learning_rate": 6.871758793969849e-05, - "loss": 5.298, - "step": 31636 - }, - { - "epoch": 16.49908735332464, - "grad_norm": 1.6042903661727905, - "learning_rate": 6.871658291457286e-05, - "loss": 5.102, - "step": 31637 - }, - { - "epoch": 16.49960886571056, - "grad_norm": 1.515305757522583, - "learning_rate": 6.871557788944724e-05, - "loss": 5.5231, - "step": 31638 - }, - { - "epoch": 16.50013037809648, - "grad_norm": 1.3865524530410767, - "learning_rate": 6.871457286432162e-05, - "loss": 5.2012, - "step": 31639 - }, - { - "epoch": 16.5006518904824, - "grad_norm": 1.4839369058609009, - "learning_rate": 6.871356783919598e-05, - "loss": 5.0312, - "step": 31640 - }, - { - "epoch": 16.501173402868318, - "grad_norm": 1.5297791957855225, - "learning_rate": 6.871256281407036e-05, - "loss": 5.2636, - "step": 31641 - }, - { - "epoch": 16.501694915254237, - "grad_norm": 1.4687018394470215, - "learning_rate": 6.871155778894473e-05, - "loss": 5.5641, - "step": 31642 - }, - { - "epoch": 16.502216427640157, - "grad_norm": 1.4543172121047974, - "learning_rate": 6.87105527638191e-05, - "loss": 5.5943, - "step": 31643 - }, - { - "epoch": 16.502737940026076, - "grad_norm": 1.4142889976501465, - "learning_rate": 6.870954773869347e-05, - "loss": 5.82, - "step": 31644 - }, - { - "epoch": 16.503259452411996, - "grad_norm": 1.508291482925415, - "learning_rate": 6.870854271356785e-05, - "loss": 5.2766, - "step": 31645 - }, - { - "epoch": 16.503780964797915, - "grad_norm": 1.4976019859313965, - "learning_rate": 6.870753768844221e-05, - "loss": 5.4104, - "step": 31646 - }, - { - "epoch": 16.504302477183835, - "grad_norm": 1.324669361114502, - "learning_rate": 6.870653266331657e-05, - "loss": 5.8712, - "step": 31647 - }, - { - "epoch": 16.50482398956975, - "grad_norm": 1.5160701274871826, - "learning_rate": 6.870552763819095e-05, - "loss": 5.4888, - "step": 31648 - }, - { - "epoch": 16.50534550195567, - "grad_norm": 1.4812867641448975, - "learning_rate": 6.870452261306533e-05, - "loss": 4.9726, - "step": 31649 - }, - { - "epoch": 16.50586701434159, - "grad_norm": 1.542826771736145, - "learning_rate": 6.870351758793971e-05, - "loss": 5.1768, - "step": 31650 - }, - { - "epoch": 16.50638852672751, - "grad_norm": 1.3804315328598022, - "learning_rate": 6.870251256281407e-05, - "loss": 5.819, - "step": 31651 - }, - { - "epoch": 16.50691003911343, - "grad_norm": 1.6290278434753418, - "learning_rate": 6.870150753768845e-05, - "loss": 4.9031, - "step": 31652 - }, - { - "epoch": 16.507431551499348, - "grad_norm": 1.4632799625396729, - "learning_rate": 6.870050251256281e-05, - "loss": 4.7473, - "step": 31653 - }, - { - "epoch": 16.507953063885267, - "grad_norm": 1.371263861656189, - "learning_rate": 6.869949748743719e-05, - "loss": 5.7699, - "step": 31654 - }, - { - "epoch": 16.508474576271187, - "grad_norm": 1.4458378553390503, - "learning_rate": 6.869849246231156e-05, - "loss": 4.9653, - "step": 31655 - }, - { - "epoch": 16.508996088657106, - "grad_norm": 1.578955054283142, - "learning_rate": 6.869748743718593e-05, - "loss": 5.3714, - "step": 31656 - }, - { - "epoch": 16.509517601043026, - "grad_norm": 1.540718913078308, - "learning_rate": 6.86964824120603e-05, - "loss": 5.1384, - "step": 31657 - }, - { - "epoch": 16.510039113428945, - "grad_norm": 1.3718857765197754, - "learning_rate": 6.869547738693468e-05, - "loss": 5.486, - "step": 31658 - }, - { - "epoch": 16.510560625814865, - "grad_norm": 1.5581218004226685, - "learning_rate": 6.869447236180905e-05, - "loss": 5.2964, - "step": 31659 - }, - { - "epoch": 16.51108213820078, - "grad_norm": 1.4355064630508423, - "learning_rate": 6.869346733668343e-05, - "loss": 5.7848, - "step": 31660 - }, - { - "epoch": 16.5116036505867, - "grad_norm": 1.4028218984603882, - "learning_rate": 6.86924623115578e-05, - "loss": 5.5471, - "step": 31661 - }, - { - "epoch": 16.51212516297262, - "grad_norm": 1.448853850364685, - "learning_rate": 6.869145728643216e-05, - "loss": 5.1281, - "step": 31662 - }, - { - "epoch": 16.51264667535854, - "grad_norm": 1.516817569732666, - "learning_rate": 6.869045226130654e-05, - "loss": 5.5699, - "step": 31663 - }, - { - "epoch": 16.51316818774446, - "grad_norm": 1.427974820137024, - "learning_rate": 6.86894472361809e-05, - "loss": 5.3756, - "step": 31664 - }, - { - "epoch": 16.513689700130378, - "grad_norm": 1.7629843950271606, - "learning_rate": 6.868844221105528e-05, - "loss": 4.2512, - "step": 31665 - }, - { - "epoch": 16.514211212516297, - "grad_norm": 1.4452588558197021, - "learning_rate": 6.868743718592964e-05, - "loss": 5.838, - "step": 31666 - }, - { - "epoch": 16.514732724902217, - "grad_norm": 1.4854373931884766, - "learning_rate": 6.868643216080402e-05, - "loss": 5.5963, - "step": 31667 - }, - { - "epoch": 16.515254237288136, - "grad_norm": 1.4263315200805664, - "learning_rate": 6.868542713567839e-05, - "loss": 5.5701, - "step": 31668 - }, - { - "epoch": 16.515775749674056, - "grad_norm": 1.4765028953552246, - "learning_rate": 6.868442211055276e-05, - "loss": 4.9583, - "step": 31669 - }, - { - "epoch": 16.516297262059975, - "grad_norm": 1.428579568862915, - "learning_rate": 6.868341708542714e-05, - "loss": 5.4824, - "step": 31670 - }, - { - "epoch": 16.516818774445895, - "grad_norm": 1.4761269092559814, - "learning_rate": 6.868241206030152e-05, - "loss": 5.4125, - "step": 31671 - }, - { - "epoch": 16.51734028683181, - "grad_norm": 1.4993075132369995, - "learning_rate": 6.868140703517588e-05, - "loss": 5.4718, - "step": 31672 - }, - { - "epoch": 16.51786179921773, - "grad_norm": 1.4499928951263428, - "learning_rate": 6.868040201005026e-05, - "loss": 5.6031, - "step": 31673 - }, - { - "epoch": 16.51838331160365, - "grad_norm": 1.531223177909851, - "learning_rate": 6.867939698492463e-05, - "loss": 5.2911, - "step": 31674 - }, - { - "epoch": 16.51890482398957, - "grad_norm": 1.598676085472107, - "learning_rate": 6.867839195979899e-05, - "loss": 5.3124, - "step": 31675 - }, - { - "epoch": 16.51942633637549, - "grad_norm": 1.5528815984725952, - "learning_rate": 6.867738693467337e-05, - "loss": 5.0958, - "step": 31676 - }, - { - "epoch": 16.519947848761408, - "grad_norm": 1.3907850980758667, - "learning_rate": 6.867638190954773e-05, - "loss": 5.0246, - "step": 31677 - }, - { - "epoch": 16.520469361147327, - "grad_norm": 1.472508430480957, - "learning_rate": 6.867537688442211e-05, - "loss": 5.3032, - "step": 31678 - }, - { - "epoch": 16.520990873533247, - "grad_norm": 1.495861530303955, - "learning_rate": 6.867437185929649e-05, - "loss": 5.312, - "step": 31679 - }, - { - "epoch": 16.521512385919166, - "grad_norm": 1.441819190979004, - "learning_rate": 6.867336683417087e-05, - "loss": 5.3273, - "step": 31680 - }, - { - "epoch": 16.522033898305086, - "grad_norm": 1.4437059164047241, - "learning_rate": 6.867236180904523e-05, - "loss": 5.2403, - "step": 31681 - }, - { - "epoch": 16.522555410691005, - "grad_norm": 1.392138957977295, - "learning_rate": 6.867135678391961e-05, - "loss": 5.4369, - "step": 31682 - }, - { - "epoch": 16.523076923076925, - "grad_norm": 1.5668007135391235, - "learning_rate": 6.867035175879397e-05, - "loss": 5.1764, - "step": 31683 - }, - { - "epoch": 16.52359843546284, - "grad_norm": 1.4668787717819214, - "learning_rate": 6.866934673366835e-05, - "loss": 5.6985, - "step": 31684 - }, - { - "epoch": 16.52411994784876, - "grad_norm": 1.4631197452545166, - "learning_rate": 6.866834170854271e-05, - "loss": 5.3862, - "step": 31685 - }, - { - "epoch": 16.52464146023468, - "grad_norm": 1.4383333921432495, - "learning_rate": 6.866733668341709e-05, - "loss": 5.2176, - "step": 31686 - }, - { - "epoch": 16.5251629726206, - "grad_norm": 1.436447262763977, - "learning_rate": 6.866633165829146e-05, - "loss": 5.3326, - "step": 31687 - }, - { - "epoch": 16.52568448500652, - "grad_norm": 1.4406789541244507, - "learning_rate": 6.866532663316582e-05, - "loss": 5.1135, - "step": 31688 - }, - { - "epoch": 16.526205997392438, - "grad_norm": 1.4602856636047363, - "learning_rate": 6.86643216080402e-05, - "loss": 5.529, - "step": 31689 - }, - { - "epoch": 16.526727509778357, - "grad_norm": 1.4394398927688599, - "learning_rate": 6.866331658291458e-05, - "loss": 5.5929, - "step": 31690 - }, - { - "epoch": 16.527249022164277, - "grad_norm": 1.4382332563400269, - "learning_rate": 6.866231155778895e-05, - "loss": 5.321, - "step": 31691 - }, - { - "epoch": 16.527770534550196, - "grad_norm": 1.4467424154281616, - "learning_rate": 6.866130653266332e-05, - "loss": 5.231, - "step": 31692 - }, - { - "epoch": 16.528292046936116, - "grad_norm": 1.4332661628723145, - "learning_rate": 6.86603015075377e-05, - "loss": 5.4074, - "step": 31693 - }, - { - "epoch": 16.528813559322035, - "grad_norm": 1.4333457946777344, - "learning_rate": 6.865929648241206e-05, - "loss": 5.7832, - "step": 31694 - }, - { - "epoch": 16.529335071707955, - "grad_norm": 1.48317551612854, - "learning_rate": 6.865829145728644e-05, - "loss": 5.2843, - "step": 31695 - }, - { - "epoch": 16.52985658409387, - "grad_norm": 1.5993813276290894, - "learning_rate": 6.86572864321608e-05, - "loss": 4.8812, - "step": 31696 - }, - { - "epoch": 16.53037809647979, - "grad_norm": 1.410029411315918, - "learning_rate": 6.865628140703518e-05, - "loss": 5.5033, - "step": 31697 - }, - { - "epoch": 16.53089960886571, - "grad_norm": 1.4207673072814941, - "learning_rate": 6.865527638190955e-05, - "loss": 5.4977, - "step": 31698 - }, - { - "epoch": 16.53142112125163, - "grad_norm": 1.4541505575180054, - "learning_rate": 6.865427135678392e-05, - "loss": 5.2732, - "step": 31699 - }, - { - "epoch": 16.53194263363755, - "grad_norm": 1.4071919918060303, - "learning_rate": 6.86532663316583e-05, - "loss": 5.2229, - "step": 31700 - }, - { - "epoch": 16.532464146023468, - "grad_norm": 1.4518100023269653, - "learning_rate": 6.865226130653267e-05, - "loss": 5.4892, - "step": 31701 - }, - { - "epoch": 16.532985658409387, - "grad_norm": 1.5936201810836792, - "learning_rate": 6.865125628140704e-05, - "loss": 5.3814, - "step": 31702 - }, - { - "epoch": 16.533507170795307, - "grad_norm": 1.5361013412475586, - "learning_rate": 6.865025125628141e-05, - "loss": 5.3024, - "step": 31703 - }, - { - "epoch": 16.534028683181226, - "grad_norm": 1.519648551940918, - "learning_rate": 6.864924623115579e-05, - "loss": 5.388, - "step": 31704 - }, - { - "epoch": 16.534550195567146, - "grad_norm": 1.5176059007644653, - "learning_rate": 6.864824120603015e-05, - "loss": 5.115, - "step": 31705 - }, - { - "epoch": 16.535071707953065, - "grad_norm": 1.396774411201477, - "learning_rate": 6.864723618090453e-05, - "loss": 5.4013, - "step": 31706 - }, - { - "epoch": 16.53559322033898, - "grad_norm": 1.5269083976745605, - "learning_rate": 6.864623115577889e-05, - "loss": 5.1149, - "step": 31707 - }, - { - "epoch": 16.5361147327249, - "grad_norm": 1.428453803062439, - "learning_rate": 6.864522613065327e-05, - "loss": 5.1302, - "step": 31708 - }, - { - "epoch": 16.53663624511082, - "grad_norm": 1.5300451517105103, - "learning_rate": 6.864422110552763e-05, - "loss": 5.2371, - "step": 31709 - }, - { - "epoch": 16.53715775749674, - "grad_norm": 1.4250613451004028, - "learning_rate": 6.864321608040201e-05, - "loss": 5.5342, - "step": 31710 - }, - { - "epoch": 16.53767926988266, - "grad_norm": 1.4323896169662476, - "learning_rate": 6.864221105527639e-05, - "loss": 5.5604, - "step": 31711 - }, - { - "epoch": 16.53820078226858, - "grad_norm": 1.5507025718688965, - "learning_rate": 6.864120603015077e-05, - "loss": 5.4034, - "step": 31712 - }, - { - "epoch": 16.538722294654498, - "grad_norm": 1.5455169677734375, - "learning_rate": 6.864020100502513e-05, - "loss": 5.4965, - "step": 31713 - }, - { - "epoch": 16.539243807040418, - "grad_norm": 1.4942984580993652, - "learning_rate": 6.86391959798995e-05, - "loss": 5.1636, - "step": 31714 - }, - { - "epoch": 16.539765319426337, - "grad_norm": 1.3936381340026855, - "learning_rate": 6.863819095477387e-05, - "loss": 5.2961, - "step": 31715 - }, - { - "epoch": 16.540286831812256, - "grad_norm": 1.4135288000106812, - "learning_rate": 6.863718592964824e-05, - "loss": 5.8134, - "step": 31716 - }, - { - "epoch": 16.540808344198176, - "grad_norm": 1.4060463905334473, - "learning_rate": 6.863618090452262e-05, - "loss": 5.7609, - "step": 31717 - }, - { - "epoch": 16.541329856584095, - "grad_norm": 1.4403984546661377, - "learning_rate": 6.863517587939698e-05, - "loss": 5.2868, - "step": 31718 - }, - { - "epoch": 16.541851368970015, - "grad_norm": 1.4015544652938843, - "learning_rate": 6.863417085427136e-05, - "loss": 5.2716, - "step": 31719 - }, - { - "epoch": 16.54237288135593, - "grad_norm": Infinity, - "learning_rate": 6.863417085427136e-05, - "loss": 5.1176, - "step": 31720 - }, - { - "epoch": 16.54289439374185, - "grad_norm": 1.5108873844146729, - "learning_rate": 6.863316582914574e-05, - "loss": 5.2165, - "step": 31721 - }, - { - "epoch": 16.54341590612777, - "grad_norm": 1.3917577266693115, - "learning_rate": 6.863216080402011e-05, - "loss": 5.7737, - "step": 31722 - }, - { - "epoch": 16.54393741851369, - "grad_norm": 1.4226608276367188, - "learning_rate": 6.863115577889448e-05, - "loss": 5.5183, - "step": 31723 - }, - { - "epoch": 16.54445893089961, - "grad_norm": 1.4402449131011963, - "learning_rate": 6.863015075376886e-05, - "loss": 5.4168, - "step": 31724 - }, - { - "epoch": 16.544980443285528, - "grad_norm": 1.5147724151611328, - "learning_rate": 6.862914572864322e-05, - "loss": 5.1407, - "step": 31725 - }, - { - "epoch": 16.545501955671448, - "grad_norm": 1.4132556915283203, - "learning_rate": 6.86281407035176e-05, - "loss": 4.8144, - "step": 31726 - }, - { - "epoch": 16.546023468057367, - "grad_norm": 1.4122813940048218, - "learning_rate": 6.862713567839196e-05, - "loss": 5.3953, - "step": 31727 - }, - { - "epoch": 16.546544980443286, - "grad_norm": 1.4687728881835938, - "learning_rate": 6.862613065326633e-05, - "loss": 5.2523, - "step": 31728 - }, - { - "epoch": 16.547066492829206, - "grad_norm": 1.4519565105438232, - "learning_rate": 6.86251256281407e-05, - "loss": 5.4453, - "step": 31729 - }, - { - "epoch": 16.547588005215125, - "grad_norm": 1.4250752925872803, - "learning_rate": 6.862412060301507e-05, - "loss": 5.2829, - "step": 31730 - }, - { - "epoch": 16.54810951760104, - "grad_norm": 1.4443093538284302, - "learning_rate": 6.862311557788945e-05, - "loss": 5.2219, - "step": 31731 - }, - { - "epoch": 16.54863102998696, - "grad_norm": 1.3728580474853516, - "learning_rate": 6.862211055276382e-05, - "loss": 4.782, - "step": 31732 - }, - { - "epoch": 16.54915254237288, - "grad_norm": 1.443861484527588, - "learning_rate": 6.86211055276382e-05, - "loss": 5.5466, - "step": 31733 - }, - { - "epoch": 16.5496740547588, - "grad_norm": 1.4279615879058838, - "learning_rate": 6.862010050251257e-05, - "loss": 5.5486, - "step": 31734 - }, - { - "epoch": 16.55019556714472, - "grad_norm": 1.4932620525360107, - "learning_rate": 6.861909547738694e-05, - "loss": 5.5355, - "step": 31735 - }, - { - "epoch": 16.55071707953064, - "grad_norm": 1.6095308065414429, - "learning_rate": 6.861809045226131e-05, - "loss": 5.1158, - "step": 31736 - }, - { - "epoch": 16.551238591916558, - "grad_norm": 1.4348841905593872, - "learning_rate": 6.861708542713569e-05, - "loss": 5.373, - "step": 31737 - }, - { - "epoch": 16.551760104302478, - "grad_norm": 1.5494601726531982, - "learning_rate": 6.861608040201005e-05, - "loss": 5.0645, - "step": 31738 - }, - { - "epoch": 16.552281616688397, - "grad_norm": 1.4508315324783325, - "learning_rate": 6.861507537688443e-05, - "loss": 5.7718, - "step": 31739 - }, - { - "epoch": 16.552803129074317, - "grad_norm": 1.4338085651397705, - "learning_rate": 6.861407035175879e-05, - "loss": 4.6435, - "step": 31740 - }, - { - "epoch": 16.553324641460236, - "grad_norm": 1.5107218027114868, - "learning_rate": 6.861306532663317e-05, - "loss": 5.7011, - "step": 31741 - }, - { - "epoch": 16.553846153846155, - "grad_norm": 1.4490885734558105, - "learning_rate": 6.861206030150755e-05, - "loss": 5.6455, - "step": 31742 - }, - { - "epoch": 16.55436766623207, - "grad_norm": 1.3639578819274902, - "learning_rate": 6.861105527638191e-05, - "loss": 5.404, - "step": 31743 - }, - { - "epoch": 16.55488917861799, - "grad_norm": 1.4840247631072998, - "learning_rate": 6.861005025125629e-05, - "loss": 5.6851, - "step": 31744 - }, - { - "epoch": 16.55541069100391, - "grad_norm": 1.4888404607772827, - "learning_rate": 6.860904522613065e-05, - "loss": 5.2509, - "step": 31745 - }, - { - "epoch": 16.55593220338983, - "grad_norm": 1.3964686393737793, - "learning_rate": 6.860804020100503e-05, - "loss": 5.5534, - "step": 31746 - }, - { - "epoch": 16.55645371577575, - "grad_norm": 1.5131151676177979, - "learning_rate": 6.86070351758794e-05, - "loss": 5.3472, - "step": 31747 - }, - { - "epoch": 16.55697522816167, - "grad_norm": 1.5055270195007324, - "learning_rate": 6.860603015075377e-05, - "loss": 5.476, - "step": 31748 - }, - { - "epoch": 16.557496740547588, - "grad_norm": 1.4821758270263672, - "learning_rate": 6.860502512562814e-05, - "loss": 5.2252, - "step": 31749 - }, - { - "epoch": 16.558018252933508, - "grad_norm": 1.5010623931884766, - "learning_rate": 6.860402010050252e-05, - "loss": 5.5167, - "step": 31750 - }, - { - "epoch": 16.558539765319427, - "grad_norm": 1.4742720127105713, - "learning_rate": 6.860301507537688e-05, - "loss": 5.1283, - "step": 31751 - }, - { - "epoch": 16.559061277705347, - "grad_norm": 1.3924617767333984, - "learning_rate": 6.860201005025126e-05, - "loss": 5.4063, - "step": 31752 - }, - { - "epoch": 16.559582790091266, - "grad_norm": 1.415818214416504, - "learning_rate": 6.860100502512564e-05, - "loss": 5.3374, - "step": 31753 - }, - { - "epoch": 16.560104302477185, - "grad_norm": 1.4692749977111816, - "learning_rate": 6.860000000000001e-05, - "loss": 5.4593, - "step": 31754 - }, - { - "epoch": 16.5606258148631, - "grad_norm": 1.503350019454956, - "learning_rate": 6.859899497487438e-05, - "loss": 5.3759, - "step": 31755 - }, - { - "epoch": 16.56114732724902, - "grad_norm": 1.5380640029907227, - "learning_rate": 6.859798994974874e-05, - "loss": 5.1153, - "step": 31756 - }, - { - "epoch": 16.56166883963494, - "grad_norm": 1.4745564460754395, - "learning_rate": 6.859698492462312e-05, - "loss": 5.1736, - "step": 31757 - }, - { - "epoch": 16.56219035202086, - "grad_norm": 1.3950718641281128, - "learning_rate": 6.859597989949748e-05, - "loss": 5.6979, - "step": 31758 - }, - { - "epoch": 16.56271186440678, - "grad_norm": 1.5142191648483276, - "learning_rate": 6.859497487437186e-05, - "loss": 5.5974, - "step": 31759 - }, - { - "epoch": 16.5632333767927, - "grad_norm": 1.4515448808670044, - "learning_rate": 6.859396984924623e-05, - "loss": 5.5234, - "step": 31760 - }, - { - "epoch": 16.563754889178618, - "grad_norm": 1.5559196472167969, - "learning_rate": 6.85929648241206e-05, - "loss": 5.1787, - "step": 31761 - }, - { - "epoch": 16.564276401564538, - "grad_norm": 1.508660078048706, - "learning_rate": 6.859195979899498e-05, - "loss": 5.0737, - "step": 31762 - }, - { - "epoch": 16.564797913950457, - "grad_norm": 1.4668874740600586, - "learning_rate": 6.859095477386936e-05, - "loss": 5.3153, - "step": 31763 - }, - { - "epoch": 16.565319426336377, - "grad_norm": 1.5736151933670044, - "learning_rate": 6.858994974874372e-05, - "loss": 4.6752, - "step": 31764 - }, - { - "epoch": 16.565840938722296, - "grad_norm": 1.3454478979110718, - "learning_rate": 6.85889447236181e-05, - "loss": 5.2309, - "step": 31765 - }, - { - "epoch": 16.566362451108215, - "grad_norm": 1.4460217952728271, - "learning_rate": 6.858793969849247e-05, - "loss": 5.7528, - "step": 31766 - }, - { - "epoch": 16.56688396349413, - "grad_norm": 1.5007386207580566, - "learning_rate": 6.858693467336684e-05, - "loss": 5.3206, - "step": 31767 - }, - { - "epoch": 16.56740547588005, - "grad_norm": 1.4924925565719604, - "learning_rate": 6.858592964824121e-05, - "loss": 5.3539, - "step": 31768 - }, - { - "epoch": 16.56792698826597, - "grad_norm": 1.3316951990127563, - "learning_rate": 6.858492462311557e-05, - "loss": 5.4666, - "step": 31769 - }, - { - "epoch": 16.56844850065189, - "grad_norm": 1.6352949142456055, - "learning_rate": 6.858391959798995e-05, - "loss": 5.0374, - "step": 31770 - }, - { - "epoch": 16.56897001303781, - "grad_norm": 1.4097951650619507, - "learning_rate": 6.858291457286432e-05, - "loss": 5.5644, - "step": 31771 - }, - { - "epoch": 16.56949152542373, - "grad_norm": 1.6226999759674072, - "learning_rate": 6.858190954773869e-05, - "loss": 5.4272, - "step": 31772 - }, - { - "epoch": 16.570013037809648, - "grad_norm": 1.523787260055542, - "learning_rate": 6.858090452261307e-05, - "loss": 5.4011, - "step": 31773 - }, - { - "epoch": 16.570534550195568, - "grad_norm": 1.495197057723999, - "learning_rate": 6.857989949748745e-05, - "loss": 5.469, - "step": 31774 - }, - { - "epoch": 16.571056062581487, - "grad_norm": 1.4608339071273804, - "learning_rate": 6.857889447236181e-05, - "loss": 5.2652, - "step": 31775 - }, - { - "epoch": 16.571577574967407, - "grad_norm": 1.38694167137146, - "learning_rate": 6.857788944723619e-05, - "loss": 5.7193, - "step": 31776 - }, - { - "epoch": 16.572099087353326, - "grad_norm": 1.4634335041046143, - "learning_rate": 6.857688442211056e-05, - "loss": 5.2301, - "step": 31777 - }, - { - "epoch": 16.572620599739246, - "grad_norm": 1.7041175365447998, - "learning_rate": 6.857587939698493e-05, - "loss": 4.8583, - "step": 31778 - }, - { - "epoch": 16.57314211212516, - "grad_norm": 1.4305096864700317, - "learning_rate": 6.85748743718593e-05, - "loss": 5.3696, - "step": 31779 - }, - { - "epoch": 16.57366362451108, - "grad_norm": 1.3821494579315186, - "learning_rate": 6.857386934673367e-05, - "loss": 5.7692, - "step": 31780 - }, - { - "epoch": 16.574185136897, - "grad_norm": 1.4929012060165405, - "learning_rate": 6.857286432160804e-05, - "loss": 5.4272, - "step": 31781 - }, - { - "epoch": 16.57470664928292, - "grad_norm": 1.5246950387954712, - "learning_rate": 6.85718592964824e-05, - "loss": 4.9595, - "step": 31782 - }, - { - "epoch": 16.57522816166884, - "grad_norm": 1.493899941444397, - "learning_rate": 6.857085427135678e-05, - "loss": 4.7122, - "step": 31783 - }, - { - "epoch": 16.57574967405476, - "grad_norm": 1.5034503936767578, - "learning_rate": 6.856984924623116e-05, - "loss": 4.8799, - "step": 31784 - }, - { - "epoch": 16.576271186440678, - "grad_norm": 1.612138271331787, - "learning_rate": 6.856884422110554e-05, - "loss": 4.921, - "step": 31785 - }, - { - "epoch": 16.576792698826598, - "grad_norm": 2.118063449859619, - "learning_rate": 6.85678391959799e-05, - "loss": 5.0651, - "step": 31786 - }, - { - "epoch": 16.577314211212517, - "grad_norm": 1.4699867963790894, - "learning_rate": 6.856683417085428e-05, - "loss": 5.3677, - "step": 31787 - }, - { - "epoch": 16.577835723598437, - "grad_norm": 1.4850114583969116, - "learning_rate": 6.856582914572864e-05, - "loss": 5.5366, - "step": 31788 - }, - { - "epoch": 16.578357235984356, - "grad_norm": 1.4850448369979858, - "learning_rate": 6.856482412060302e-05, - "loss": 5.2406, - "step": 31789 - }, - { - "epoch": 16.578878748370272, - "grad_norm": 1.7758609056472778, - "learning_rate": 6.856381909547739e-05, - "loss": 5.3107, - "step": 31790 - }, - { - "epoch": 16.57940026075619, - "grad_norm": 1.4418960809707642, - "learning_rate": 6.856281407035176e-05, - "loss": 4.9315, - "step": 31791 - }, - { - "epoch": 16.57992177314211, - "grad_norm": 1.474555253982544, - "learning_rate": 6.856180904522613e-05, - "loss": 5.2297, - "step": 31792 - }, - { - "epoch": 16.58044328552803, - "grad_norm": 1.578627586364746, - "learning_rate": 6.85608040201005e-05, - "loss": 5.4818, - "step": 31793 - }, - { - "epoch": 16.58096479791395, - "grad_norm": 1.5467692613601685, - "learning_rate": 6.855979899497488e-05, - "loss": 4.8844, - "step": 31794 - }, - { - "epoch": 16.58148631029987, - "grad_norm": 1.4057316780090332, - "learning_rate": 6.855879396984925e-05, - "loss": 5.5666, - "step": 31795 - }, - { - "epoch": 16.58200782268579, - "grad_norm": 1.4855581521987915, - "learning_rate": 6.855778894472363e-05, - "loss": 5.443, - "step": 31796 - }, - { - "epoch": 16.58252933507171, - "grad_norm": 1.623557448387146, - "learning_rate": 6.855678391959799e-05, - "loss": 5.1603, - "step": 31797 - }, - { - "epoch": 16.583050847457628, - "grad_norm": 1.5114301443099976, - "learning_rate": 6.855577889447237e-05, - "loss": 5.4549, - "step": 31798 - }, - { - "epoch": 16.583572359843547, - "grad_norm": 1.5366238355636597, - "learning_rate": 6.855477386934673e-05, - "loss": 5.2634, - "step": 31799 - }, - { - "epoch": 16.584093872229467, - "grad_norm": 1.384951114654541, - "learning_rate": 6.855376884422111e-05, - "loss": 5.6739, - "step": 31800 - }, - { - "epoch": 16.584615384615386, - "grad_norm": 1.550540566444397, - "learning_rate": 6.855276381909547e-05, - "loss": 5.6141, - "step": 31801 - }, - { - "epoch": 16.585136897001306, - "grad_norm": 1.404815435409546, - "learning_rate": 6.855175879396985e-05, - "loss": 5.5869, - "step": 31802 - }, - { - "epoch": 16.58565840938722, - "grad_norm": 1.5107088088989258, - "learning_rate": 6.855075376884422e-05, - "loss": 5.2047, - "step": 31803 - }, - { - "epoch": 16.58617992177314, - "grad_norm": 1.4218575954437256, - "learning_rate": 6.85497487437186e-05, - "loss": 5.7635, - "step": 31804 - }, - { - "epoch": 16.58670143415906, - "grad_norm": 1.4871801137924194, - "learning_rate": 6.854874371859297e-05, - "loss": 5.681, - "step": 31805 - }, - { - "epoch": 16.58722294654498, - "grad_norm": 1.5488145351409912, - "learning_rate": 6.854773869346735e-05, - "loss": 4.7306, - "step": 31806 - }, - { - "epoch": 16.5877444589309, - "grad_norm": 1.5847187042236328, - "learning_rate": 6.854673366834171e-05, - "loss": 5.3348, - "step": 31807 - }, - { - "epoch": 16.58826597131682, - "grad_norm": 1.498538851737976, - "learning_rate": 6.854572864321608e-05, - "loss": 4.8845, - "step": 31808 - }, - { - "epoch": 16.58878748370274, - "grad_norm": 1.5194839239120483, - "learning_rate": 6.854472361809046e-05, - "loss": 5.3274, - "step": 31809 - }, - { - "epoch": 16.589308996088658, - "grad_norm": 1.4038681983947754, - "learning_rate": 6.854371859296482e-05, - "loss": 5.5458, - "step": 31810 - }, - { - "epoch": 16.589830508474577, - "grad_norm": 1.4093307256698608, - "learning_rate": 6.85427135678392e-05, - "loss": 5.5869, - "step": 31811 - }, - { - "epoch": 16.590352020860497, - "grad_norm": 1.4300788640975952, - "learning_rate": 6.854170854271356e-05, - "loss": 5.7271, - "step": 31812 - }, - { - "epoch": 16.590873533246416, - "grad_norm": 1.4762706756591797, - "learning_rate": 6.854070351758794e-05, - "loss": 5.7298, - "step": 31813 - }, - { - "epoch": 16.591395045632332, - "grad_norm": 1.5354472398757935, - "learning_rate": 6.853969849246232e-05, - "loss": 5.1725, - "step": 31814 - }, - { - "epoch": 16.59191655801825, - "grad_norm": 1.4573270082473755, - "learning_rate": 6.85386934673367e-05, - "loss": 5.1419, - "step": 31815 - }, - { - "epoch": 16.59243807040417, - "grad_norm": 1.5633689165115356, - "learning_rate": 6.853768844221106e-05, - "loss": 4.5874, - "step": 31816 - }, - { - "epoch": 16.59295958279009, - "grad_norm": 1.4894908666610718, - "learning_rate": 6.853668341708544e-05, - "loss": 5.6387, - "step": 31817 - }, - { - "epoch": 16.59348109517601, - "grad_norm": 1.450129508972168, - "learning_rate": 6.85356783919598e-05, - "loss": 5.2175, - "step": 31818 - }, - { - "epoch": 16.59400260756193, - "grad_norm": 1.4928871393203735, - "learning_rate": 6.853467336683418e-05, - "loss": 5.6575, - "step": 31819 - }, - { - "epoch": 16.59452411994785, - "grad_norm": 1.539548397064209, - "learning_rate": 6.853366834170854e-05, - "loss": 5.3415, - "step": 31820 - }, - { - "epoch": 16.59504563233377, - "grad_norm": 1.454768419265747, - "learning_rate": 6.853266331658291e-05, - "loss": 5.5495, - "step": 31821 - }, - { - "epoch": 16.595567144719688, - "grad_norm": 1.416604995727539, - "learning_rate": 6.853165829145729e-05, - "loss": 5.4415, - "step": 31822 - }, - { - "epoch": 16.596088657105607, - "grad_norm": 1.5218690633773804, - "learning_rate": 6.853065326633165e-05, - "loss": 5.3862, - "step": 31823 - }, - { - "epoch": 16.596610169491527, - "grad_norm": 1.491566777229309, - "learning_rate": 6.852964824120603e-05, - "loss": 5.5571, - "step": 31824 - }, - { - "epoch": 16.597131681877446, - "grad_norm": 1.4965934753417969, - "learning_rate": 6.85286432160804e-05, - "loss": 5.2095, - "step": 31825 - }, - { - "epoch": 16.597653194263362, - "grad_norm": 1.4559623003005981, - "learning_rate": 6.852763819095478e-05, - "loss": 5.0099, - "step": 31826 - }, - { - "epoch": 16.59817470664928, - "grad_norm": 1.516855239868164, - "learning_rate": 6.852663316582915e-05, - "loss": 4.9343, - "step": 31827 - }, - { - "epoch": 16.5986962190352, - "grad_norm": 1.47390615940094, - "learning_rate": 6.852562814070353e-05, - "loss": 5.2223, - "step": 31828 - }, - { - "epoch": 16.59921773142112, - "grad_norm": 1.5157479047775269, - "learning_rate": 6.852462311557789e-05, - "loss": 5.4117, - "step": 31829 - }, - { - "epoch": 16.59973924380704, - "grad_norm": 1.399772047996521, - "learning_rate": 6.852361809045227e-05, - "loss": 5.5028, - "step": 31830 - }, - { - "epoch": 16.60026075619296, - "grad_norm": 1.5147318840026855, - "learning_rate": 6.852261306532663e-05, - "loss": 5.3494, - "step": 31831 - }, - { - "epoch": 16.60078226857888, - "grad_norm": 1.4527376890182495, - "learning_rate": 6.852160804020101e-05, - "loss": 5.5278, - "step": 31832 - }, - { - "epoch": 16.6013037809648, - "grad_norm": 1.4492485523223877, - "learning_rate": 6.852060301507537e-05, - "loss": 5.2507, - "step": 31833 - }, - { - "epoch": 16.601825293350718, - "grad_norm": 1.4392709732055664, - "learning_rate": 6.851959798994975e-05, - "loss": 5.828, - "step": 31834 - }, - { - "epoch": 16.602346805736637, - "grad_norm": 1.471445918083191, - "learning_rate": 6.851859296482413e-05, - "loss": 4.6498, - "step": 31835 - }, - { - "epoch": 16.602868318122557, - "grad_norm": 1.6285513639450073, - "learning_rate": 6.85175879396985e-05, - "loss": 5.1865, - "step": 31836 - }, - { - "epoch": 16.603389830508476, - "grad_norm": 1.511054277420044, - "learning_rate": 6.851658291457287e-05, - "loss": 5.4284, - "step": 31837 - }, - { - "epoch": 16.603911342894392, - "grad_norm": 1.5129042863845825, - "learning_rate": 6.851557788944724e-05, - "loss": 5.0763, - "step": 31838 - }, - { - "epoch": 16.60443285528031, - "grad_norm": 1.4672644138336182, - "learning_rate": 6.851457286432161e-05, - "loss": 5.404, - "step": 31839 - }, - { - "epoch": 16.60495436766623, - "grad_norm": 1.4345884323120117, - "learning_rate": 6.851356783919598e-05, - "loss": 5.5415, - "step": 31840 - }, - { - "epoch": 16.60547588005215, - "grad_norm": 1.5286099910736084, - "learning_rate": 6.851256281407036e-05, - "loss": 5.283, - "step": 31841 - }, - { - "epoch": 16.60599739243807, - "grad_norm": 1.470200777053833, - "learning_rate": 6.851155778894472e-05, - "loss": 5.5557, - "step": 31842 - }, - { - "epoch": 16.60651890482399, - "grad_norm": 1.6150331497192383, - "learning_rate": 6.85105527638191e-05, - "loss": 5.3101, - "step": 31843 - }, - { - "epoch": 16.60704041720991, - "grad_norm": 1.3887778520584106, - "learning_rate": 6.850954773869346e-05, - "loss": 5.8323, - "step": 31844 - }, - { - "epoch": 16.60756192959583, - "grad_norm": 1.4820208549499512, - "learning_rate": 6.850854271356784e-05, - "loss": 5.2583, - "step": 31845 - }, - { - "epoch": 16.608083441981748, - "grad_norm": 1.459841251373291, - "learning_rate": 6.850753768844222e-05, - "loss": 5.004, - "step": 31846 - }, - { - "epoch": 16.608604954367667, - "grad_norm": 1.490854024887085, - "learning_rate": 6.85065326633166e-05, - "loss": 5.502, - "step": 31847 - }, - { - "epoch": 16.609126466753587, - "grad_norm": 1.5440709590911865, - "learning_rate": 6.850552763819096e-05, - "loss": 4.3563, - "step": 31848 - }, - { - "epoch": 16.609647979139506, - "grad_norm": 1.536443829536438, - "learning_rate": 6.850452261306532e-05, - "loss": 5.4576, - "step": 31849 - }, - { - "epoch": 16.610169491525422, - "grad_norm": 1.4842041730880737, - "learning_rate": 6.85035175879397e-05, - "loss": 5.1197, - "step": 31850 - }, - { - "epoch": 16.61069100391134, - "grad_norm": 1.5766043663024902, - "learning_rate": 6.850251256281407e-05, - "loss": 4.8271, - "step": 31851 - }, - { - "epoch": 16.61121251629726, - "grad_norm": 1.506008267402649, - "learning_rate": 6.850150753768844e-05, - "loss": 4.9587, - "step": 31852 - }, - { - "epoch": 16.61173402868318, - "grad_norm": 1.5134464502334595, - "learning_rate": 6.850050251256281e-05, - "loss": 5.3567, - "step": 31853 - }, - { - "epoch": 16.6122555410691, - "grad_norm": 1.6432462930679321, - "learning_rate": 6.849949748743719e-05, - "loss": 5.0022, - "step": 31854 - }, - { - "epoch": 16.61277705345502, - "grad_norm": 1.501944661140442, - "learning_rate": 6.849849246231156e-05, - "loss": 5.2709, - "step": 31855 - }, - { - "epoch": 16.61329856584094, - "grad_norm": 1.5880593061447144, - "learning_rate": 6.849748743718594e-05, - "loss": 5.2623, - "step": 31856 - }, - { - "epoch": 16.61382007822686, - "grad_norm": 1.462196946144104, - "learning_rate": 6.849648241206031e-05, - "loss": 4.9677, - "step": 31857 - }, - { - "epoch": 16.614341590612778, - "grad_norm": 1.4702727794647217, - "learning_rate": 6.849547738693468e-05, - "loss": 5.424, - "step": 31858 - }, - { - "epoch": 16.614863102998697, - "grad_norm": 1.458552360534668, - "learning_rate": 6.849447236180905e-05, - "loss": 5.0189, - "step": 31859 - }, - { - "epoch": 16.615384615384617, - "grad_norm": 1.441962480545044, - "learning_rate": 6.849346733668343e-05, - "loss": 5.1317, - "step": 31860 - }, - { - "epoch": 16.615906127770536, - "grad_norm": 1.5440874099731445, - "learning_rate": 6.849246231155779e-05, - "loss": 5.0432, - "step": 31861 - }, - { - "epoch": 16.616427640156452, - "grad_norm": 1.5573375225067139, - "learning_rate": 6.849145728643216e-05, - "loss": 5.4556, - "step": 31862 - }, - { - "epoch": 16.61694915254237, - "grad_norm": 1.5333975553512573, - "learning_rate": 6.849045226130653e-05, - "loss": 5.4828, - "step": 31863 - }, - { - "epoch": 16.61747066492829, - "grad_norm": 1.6344773769378662, - "learning_rate": 6.84894472361809e-05, - "loss": 5.0109, - "step": 31864 - }, - { - "epoch": 16.61799217731421, - "grad_norm": 1.434840202331543, - "learning_rate": 6.848844221105528e-05, - "loss": 5.7956, - "step": 31865 - }, - { - "epoch": 16.61851368970013, - "grad_norm": 1.527025580406189, - "learning_rate": 6.848743718592965e-05, - "loss": 5.3184, - "step": 31866 - }, - { - "epoch": 16.61903520208605, - "grad_norm": 1.4150981903076172, - "learning_rate": 6.848643216080403e-05, - "loss": 5.6621, - "step": 31867 - }, - { - "epoch": 16.61955671447197, - "grad_norm": 1.416407585144043, - "learning_rate": 6.84854271356784e-05, - "loss": 5.5301, - "step": 31868 - }, - { - "epoch": 16.62007822685789, - "grad_norm": 1.4617884159088135, - "learning_rate": 6.848442211055277e-05, - "loss": 5.5013, - "step": 31869 - }, - { - "epoch": 16.620599739243808, - "grad_norm": 1.4561933279037476, - "learning_rate": 6.848341708542714e-05, - "loss": 5.4533, - "step": 31870 - }, - { - "epoch": 16.621121251629727, - "grad_norm": 1.4716582298278809, - "learning_rate": 6.848241206030152e-05, - "loss": 5.2156, - "step": 31871 - }, - { - "epoch": 16.621642764015647, - "grad_norm": 1.4080970287322998, - "learning_rate": 6.848140703517588e-05, - "loss": 5.5056, - "step": 31872 - }, - { - "epoch": 16.622164276401566, - "grad_norm": 1.4663978815078735, - "learning_rate": 6.848040201005026e-05, - "loss": 4.9122, - "step": 31873 - }, - { - "epoch": 16.622685788787482, - "grad_norm": 1.5482224225997925, - "learning_rate": 6.847939698492462e-05, - "loss": 5.6171, - "step": 31874 - }, - { - "epoch": 16.6232073011734, - "grad_norm": 1.4037301540374756, - "learning_rate": 6.8478391959799e-05, - "loss": 5.4715, - "step": 31875 - }, - { - "epoch": 16.62372881355932, - "grad_norm": 1.5918887853622437, - "learning_rate": 6.847738693467338e-05, - "loss": 4.9029, - "step": 31876 - }, - { - "epoch": 16.62425032594524, - "grad_norm": 1.5359920263290405, - "learning_rate": 6.847638190954774e-05, - "loss": 4.9092, - "step": 31877 - }, - { - "epoch": 16.62477183833116, - "grad_norm": 1.516543984413147, - "learning_rate": 6.847537688442212e-05, - "loss": 5.1078, - "step": 31878 - }, - { - "epoch": 16.62529335071708, - "grad_norm": 1.5235021114349365, - "learning_rate": 6.847437185929648e-05, - "loss": 5.5048, - "step": 31879 - }, - { - "epoch": 16.625814863103, - "grad_norm": 1.4682456254959106, - "learning_rate": 6.847336683417086e-05, - "loss": 5.5521, - "step": 31880 - }, - { - "epoch": 16.62633637548892, - "grad_norm": 1.591693639755249, - "learning_rate": 6.847236180904523e-05, - "loss": 5.4981, - "step": 31881 - }, - { - "epoch": 16.626857887874838, - "grad_norm": 1.2662756443023682, - "learning_rate": 6.84713567839196e-05, - "loss": 5.9601, - "step": 31882 - }, - { - "epoch": 16.627379400260757, - "grad_norm": 1.5226033926010132, - "learning_rate": 6.847035175879397e-05, - "loss": 5.486, - "step": 31883 - }, - { - "epoch": 16.627900912646677, - "grad_norm": 1.4315450191497803, - "learning_rate": 6.846934673366835e-05, - "loss": 5.4798, - "step": 31884 - }, - { - "epoch": 16.628422425032596, - "grad_norm": 1.4928046464920044, - "learning_rate": 6.846834170854271e-05, - "loss": 5.186, - "step": 31885 - }, - { - "epoch": 16.628943937418512, - "grad_norm": 1.557258129119873, - "learning_rate": 6.846733668341709e-05, - "loss": 5.3907, - "step": 31886 - }, - { - "epoch": 16.62946544980443, - "grad_norm": 1.6724591255187988, - "learning_rate": 6.846633165829147e-05, - "loss": 4.934, - "step": 31887 - }, - { - "epoch": 16.62998696219035, - "grad_norm": 1.4886794090270996, - "learning_rate": 6.846532663316583e-05, - "loss": 5.0048, - "step": 31888 - }, - { - "epoch": 16.63050847457627, - "grad_norm": 1.5613658428192139, - "learning_rate": 6.846432160804021e-05, - "loss": 4.6686, - "step": 31889 - }, - { - "epoch": 16.63102998696219, - "grad_norm": 1.431496262550354, - "learning_rate": 6.846331658291457e-05, - "loss": 5.5014, - "step": 31890 - }, - { - "epoch": 16.63155149934811, - "grad_norm": 1.5014609098434448, - "learning_rate": 6.846231155778895e-05, - "loss": 5.0991, - "step": 31891 - }, - { - "epoch": 16.63207301173403, - "grad_norm": 1.6725143194198608, - "learning_rate": 6.846130653266331e-05, - "loss": 4.6843, - "step": 31892 - }, - { - "epoch": 16.63259452411995, - "grad_norm": 1.658812403678894, - "learning_rate": 6.846030150753769e-05, - "loss": 4.8975, - "step": 31893 - }, - { - "epoch": 16.633116036505868, - "grad_norm": 1.6173521280288696, - "learning_rate": 6.845929648241206e-05, - "loss": 5.3553, - "step": 31894 - }, - { - "epoch": 16.633637548891787, - "grad_norm": 1.4931799173355103, - "learning_rate": 6.845829145728643e-05, - "loss": 5.2964, - "step": 31895 - }, - { - "epoch": 16.634159061277707, - "grad_norm": 1.5480148792266846, - "learning_rate": 6.845728643216081e-05, - "loss": 5.2093, - "step": 31896 - }, - { - "epoch": 16.634680573663623, - "grad_norm": 1.4691095352172852, - "learning_rate": 6.845628140703519e-05, - "loss": 5.4832, - "step": 31897 - }, - { - "epoch": 16.635202086049542, - "grad_norm": 1.4638532400131226, - "learning_rate": 6.845527638190955e-05, - "loss": 5.5678, - "step": 31898 - }, - { - "epoch": 16.63572359843546, - "grad_norm": 1.4019490480422974, - "learning_rate": 6.845427135678393e-05, - "loss": 5.1125, - "step": 31899 - }, - { - "epoch": 16.63624511082138, - "grad_norm": 1.611497402191162, - "learning_rate": 6.84532663316583e-05, - "loss": 4.758, - "step": 31900 - }, - { - "epoch": 16.6367666232073, - "grad_norm": 1.567062258720398, - "learning_rate": 6.845226130653266e-05, - "loss": 4.9861, - "step": 31901 - }, - { - "epoch": 16.63728813559322, - "grad_norm": 1.543563723564148, - "learning_rate": 6.845125628140704e-05, - "loss": 5.1787, - "step": 31902 - }, - { - "epoch": 16.63780964797914, - "grad_norm": 1.4323350191116333, - "learning_rate": 6.84502512562814e-05, - "loss": 5.6205, - "step": 31903 - }, - { - "epoch": 16.63833116036506, - "grad_norm": 1.5520819425582886, - "learning_rate": 6.844924623115578e-05, - "loss": 5.5593, - "step": 31904 - }, - { - "epoch": 16.63885267275098, - "grad_norm": 1.4478727579116821, - "learning_rate": 6.844824120603014e-05, - "loss": 5.2705, - "step": 31905 - }, - { - "epoch": 16.639374185136898, - "grad_norm": 1.4533765316009521, - "learning_rate": 6.844723618090452e-05, - "loss": 5.5308, - "step": 31906 - }, - { - "epoch": 16.639895697522817, - "grad_norm": 1.53481924533844, - "learning_rate": 6.84462311557789e-05, - "loss": 5.3357, - "step": 31907 - }, - { - "epoch": 16.640417209908737, - "grad_norm": 1.5570785999298096, - "learning_rate": 6.844522613065328e-05, - "loss": 5.1334, - "step": 31908 - }, - { - "epoch": 16.640938722294656, - "grad_norm": 1.5452288389205933, - "learning_rate": 6.844422110552764e-05, - "loss": 5.6066, - "step": 31909 - }, - { - "epoch": 16.641460234680572, - "grad_norm": 1.5492029190063477, - "learning_rate": 6.844321608040202e-05, - "loss": 5.3216, - "step": 31910 - }, - { - "epoch": 16.64198174706649, - "grad_norm": 1.483486533164978, - "learning_rate": 6.844221105527638e-05, - "loss": 5.4735, - "step": 31911 - }, - { - "epoch": 16.64250325945241, - "grad_norm": 1.4873508214950562, - "learning_rate": 6.844120603015076e-05, - "loss": 5.5659, - "step": 31912 - }, - { - "epoch": 16.64302477183833, - "grad_norm": 1.4417425394058228, - "learning_rate": 6.844020100502513e-05, - "loss": 5.6749, - "step": 31913 - }, - { - "epoch": 16.64354628422425, - "grad_norm": 1.4467220306396484, - "learning_rate": 6.843919597989949e-05, - "loss": 4.8219, - "step": 31914 - }, - { - "epoch": 16.64406779661017, - "grad_norm": 1.6462583541870117, - "learning_rate": 6.843819095477387e-05, - "loss": 5.1864, - "step": 31915 - }, - { - "epoch": 16.64458930899609, - "grad_norm": 1.5797569751739502, - "learning_rate": 6.843718592964825e-05, - "loss": 4.7511, - "step": 31916 - }, - { - "epoch": 16.64511082138201, - "grad_norm": 1.548746943473816, - "learning_rate": 6.843618090452262e-05, - "loss": 4.9183, - "step": 31917 - }, - { - "epoch": 16.645632333767928, - "grad_norm": 1.5686547756195068, - "learning_rate": 6.843517587939699e-05, - "loss": 5.2533, - "step": 31918 - }, - { - "epoch": 16.646153846153847, - "grad_norm": 1.4747228622436523, - "learning_rate": 6.843417085427137e-05, - "loss": 5.0514, - "step": 31919 - }, - { - "epoch": 16.646675358539767, - "grad_norm": 1.5008950233459473, - "learning_rate": 6.843316582914573e-05, - "loss": 5.4627, - "step": 31920 - }, - { - "epoch": 16.647196870925683, - "grad_norm": 1.5788356065750122, - "learning_rate": 6.843216080402011e-05, - "loss": 5.3666, - "step": 31921 - }, - { - "epoch": 16.647718383311602, - "grad_norm": 1.4959403276443481, - "learning_rate": 6.843115577889447e-05, - "loss": 4.5811, - "step": 31922 - }, - { - "epoch": 16.64823989569752, - "grad_norm": 1.6255667209625244, - "learning_rate": 6.843015075376885e-05, - "loss": 5.4326, - "step": 31923 - }, - { - "epoch": 16.64876140808344, - "grad_norm": 1.4038654565811157, - "learning_rate": 6.842914572864321e-05, - "loss": 5.3508, - "step": 31924 - }, - { - "epoch": 16.64928292046936, - "grad_norm": 1.5200715065002441, - "learning_rate": 6.842814070351759e-05, - "loss": 4.6382, - "step": 31925 - }, - { - "epoch": 16.64980443285528, - "grad_norm": 1.3741405010223389, - "learning_rate": 6.842713567839196e-05, - "loss": 5.2967, - "step": 31926 - }, - { - "epoch": 16.6503259452412, - "grad_norm": 1.4211163520812988, - "learning_rate": 6.842613065326633e-05, - "loss": 5.131, - "step": 31927 - }, - { - "epoch": 16.65084745762712, - "grad_norm": 1.4884880781173706, - "learning_rate": 6.842512562814071e-05, - "loss": 5.4846, - "step": 31928 - }, - { - "epoch": 16.65136897001304, - "grad_norm": 1.3818668127059937, - "learning_rate": 6.842412060301508e-05, - "loss": 5.0727, - "step": 31929 - }, - { - "epoch": 16.651890482398958, - "grad_norm": 1.3905547857284546, - "learning_rate": 6.842311557788945e-05, - "loss": 5.7815, - "step": 31930 - }, - { - "epoch": 16.652411994784877, - "grad_norm": 1.5259240865707397, - "learning_rate": 6.842211055276382e-05, - "loss": 5.0637, - "step": 31931 - }, - { - "epoch": 16.652933507170797, - "grad_norm": 1.4397190809249878, - "learning_rate": 6.84211055276382e-05, - "loss": 5.6769, - "step": 31932 - }, - { - "epoch": 16.653455019556713, - "grad_norm": 1.4992903470993042, - "learning_rate": 6.842010050251256e-05, - "loss": 4.7232, - "step": 31933 - }, - { - "epoch": 16.653976531942632, - "grad_norm": 1.388408899307251, - "learning_rate": 6.841909547738694e-05, - "loss": 5.6064, - "step": 31934 - }, - { - "epoch": 16.65449804432855, - "grad_norm": 1.4343599081039429, - "learning_rate": 6.84180904522613e-05, - "loss": 5.7613, - "step": 31935 - }, - { - "epoch": 16.65501955671447, - "grad_norm": 1.4131217002868652, - "learning_rate": 6.841708542713568e-05, - "loss": 5.6839, - "step": 31936 - }, - { - "epoch": 16.65554106910039, - "grad_norm": 1.4816687107086182, - "learning_rate": 6.841608040201006e-05, - "loss": 5.0489, - "step": 31937 - }, - { - "epoch": 16.65606258148631, - "grad_norm": 1.4833767414093018, - "learning_rate": 6.841507537688444e-05, - "loss": 5.5992, - "step": 31938 - }, - { - "epoch": 16.65658409387223, - "grad_norm": 1.5674302577972412, - "learning_rate": 6.84140703517588e-05, - "loss": 5.617, - "step": 31939 - }, - { - "epoch": 16.65710560625815, - "grad_norm": 1.3806949853897095, - "learning_rate": 6.841306532663318e-05, - "loss": 5.3472, - "step": 31940 - }, - { - "epoch": 16.65762711864407, - "grad_norm": 1.42318594455719, - "learning_rate": 6.841206030150754e-05, - "loss": 5.202, - "step": 31941 - }, - { - "epoch": 16.658148631029988, - "grad_norm": 1.446267008781433, - "learning_rate": 6.841105527638191e-05, - "loss": 5.4369, - "step": 31942 - }, - { - "epoch": 16.658670143415907, - "grad_norm": 1.4571664333343506, - "learning_rate": 6.841005025125629e-05, - "loss": 5.3911, - "step": 31943 - }, - { - "epoch": 16.659191655801827, - "grad_norm": 1.3605232238769531, - "learning_rate": 6.840904522613065e-05, - "loss": 4.948, - "step": 31944 - }, - { - "epoch": 16.659713168187743, - "grad_norm": 1.7863389253616333, - "learning_rate": 6.840804020100503e-05, - "loss": 4.9962, - "step": 31945 - }, - { - "epoch": 16.660234680573662, - "grad_norm": 1.455582618713379, - "learning_rate": 6.840703517587939e-05, - "loss": 5.7022, - "step": 31946 - }, - { - "epoch": 16.66075619295958, - "grad_norm": 1.357267141342163, - "learning_rate": 6.840603015075377e-05, - "loss": 5.2918, - "step": 31947 - }, - { - "epoch": 16.6612777053455, - "grad_norm": 1.469768762588501, - "learning_rate": 6.840502512562815e-05, - "loss": 5.2693, - "step": 31948 - }, - { - "epoch": 16.66179921773142, - "grad_norm": 1.4580644369125366, - "learning_rate": 6.840402010050252e-05, - "loss": 4.9595, - "step": 31949 - }, - { - "epoch": 16.66232073011734, - "grad_norm": 1.4501991271972656, - "learning_rate": 6.840301507537689e-05, - "loss": 5.604, - "step": 31950 - }, - { - "epoch": 16.66284224250326, - "grad_norm": 1.5719468593597412, - "learning_rate": 6.840201005025127e-05, - "loss": 4.8334, - "step": 31951 - }, - { - "epoch": 16.66336375488918, - "grad_norm": 1.8010900020599365, - "learning_rate": 6.840100502512563e-05, - "loss": 5.4659, - "step": 31952 - }, - { - "epoch": 16.6638852672751, - "grad_norm": 1.5608073472976685, - "learning_rate": 6.840000000000001e-05, - "loss": 5.6785, - "step": 31953 - }, - { - "epoch": 16.664406779661018, - "grad_norm": 1.5459074974060059, - "learning_rate": 6.839899497487437e-05, - "loss": 5.1916, - "step": 31954 - }, - { - "epoch": 16.664928292046937, - "grad_norm": 1.4627587795257568, - "learning_rate": 6.839798994974874e-05, - "loss": 4.9402, - "step": 31955 - }, - { - "epoch": 16.665449804432857, - "grad_norm": 1.506183385848999, - "learning_rate": 6.839698492462312e-05, - "loss": 5.5794, - "step": 31956 - }, - { - "epoch": 16.665971316818773, - "grad_norm": 1.4704970121383667, - "learning_rate": 6.839597989949748e-05, - "loss": 5.1187, - "step": 31957 - }, - { - "epoch": 16.666492829204692, - "grad_norm": 1.6960220336914062, - "learning_rate": 6.839497487437186e-05, - "loss": 4.9431, - "step": 31958 - }, - { - "epoch": 16.667014341590612, - "grad_norm": 1.632696509361267, - "learning_rate": 6.839396984924624e-05, - "loss": 5.119, - "step": 31959 - }, - { - "epoch": 16.66753585397653, - "grad_norm": 1.5150455236434937, - "learning_rate": 6.839296482412061e-05, - "loss": 5.4251, - "step": 31960 - }, - { - "epoch": 16.66805736636245, - "grad_norm": 1.4508358240127563, - "learning_rate": 6.839195979899498e-05, - "loss": 5.0038, - "step": 31961 - }, - { - "epoch": 16.66857887874837, - "grad_norm": 1.459735631942749, - "learning_rate": 6.839095477386936e-05, - "loss": 5.5181, - "step": 31962 - }, - { - "epoch": 16.66910039113429, - "grad_norm": 1.4761251211166382, - "learning_rate": 6.838994974874372e-05, - "loss": 5.0162, - "step": 31963 - }, - { - "epoch": 16.66962190352021, - "grad_norm": 1.431638479232788, - "learning_rate": 6.83889447236181e-05, - "loss": 4.8434, - "step": 31964 - }, - { - "epoch": 16.67014341590613, - "grad_norm": 1.5756245851516724, - "learning_rate": 6.838793969849246e-05, - "loss": 5.2848, - "step": 31965 - }, - { - "epoch": 16.670664928292048, - "grad_norm": 1.5461002588272095, - "learning_rate": 6.838693467336684e-05, - "loss": 4.9217, - "step": 31966 - }, - { - "epoch": 16.671186440677968, - "grad_norm": 1.3792314529418945, - "learning_rate": 6.83859296482412e-05, - "loss": 5.2632, - "step": 31967 - }, - { - "epoch": 16.671707953063887, - "grad_norm": 1.4643452167510986, - "learning_rate": 6.838492462311558e-05, - "loss": 4.9405, - "step": 31968 - }, - { - "epoch": 16.672229465449803, - "grad_norm": 1.4723987579345703, - "learning_rate": 6.838391959798996e-05, - "loss": 5.1936, - "step": 31969 - }, - { - "epoch": 16.672750977835722, - "grad_norm": 1.4659594297409058, - "learning_rate": 6.838291457286432e-05, - "loss": 5.1202, - "step": 31970 - }, - { - "epoch": 16.673272490221642, - "grad_norm": 1.4708499908447266, - "learning_rate": 6.83819095477387e-05, - "loss": 5.3598, - "step": 31971 - }, - { - "epoch": 16.67379400260756, - "grad_norm": 1.6018763780593872, - "learning_rate": 6.838090452261307e-05, - "loss": 5.2556, - "step": 31972 - }, - { - "epoch": 16.67431551499348, - "grad_norm": 1.5926071405410767, - "learning_rate": 6.837989949748744e-05, - "loss": 5.1462, - "step": 31973 - }, - { - "epoch": 16.6748370273794, - "grad_norm": 1.5126168727874756, - "learning_rate": 6.837889447236181e-05, - "loss": 5.473, - "step": 31974 - }, - { - "epoch": 16.67535853976532, - "grad_norm": 1.3723220825195312, - "learning_rate": 6.837788944723619e-05, - "loss": 5.4722, - "step": 31975 - }, - { - "epoch": 16.67588005215124, - "grad_norm": 1.4753799438476562, - "learning_rate": 6.837688442211055e-05, - "loss": 5.2273, - "step": 31976 - }, - { - "epoch": 16.67640156453716, - "grad_norm": 1.6215736865997314, - "learning_rate": 6.837587939698493e-05, - "loss": 4.846, - "step": 31977 - }, - { - "epoch": 16.676923076923078, - "grad_norm": 1.4044663906097412, - "learning_rate": 6.837487437185929e-05, - "loss": 4.4987, - "step": 31978 - }, - { - "epoch": 16.677444589308998, - "grad_norm": 1.4604347944259644, - "learning_rate": 6.837386934673367e-05, - "loss": 5.6386, - "step": 31979 - }, - { - "epoch": 16.677966101694913, - "grad_norm": 1.4653055667877197, - "learning_rate": 6.837286432160805e-05, - "loss": 5.4327, - "step": 31980 - }, - { - "epoch": 16.678487614080833, - "grad_norm": 1.5169107913970947, - "learning_rate": 6.837185929648241e-05, - "loss": 5.0267, - "step": 31981 - }, - { - "epoch": 16.679009126466752, - "grad_norm": 1.5345293283462524, - "learning_rate": 6.837085427135679e-05, - "loss": 5.3701, - "step": 31982 - }, - { - "epoch": 16.679530638852672, - "grad_norm": 1.5204527378082275, - "learning_rate": 6.836984924623115e-05, - "loss": 4.9365, - "step": 31983 - }, - { - "epoch": 16.68005215123859, - "grad_norm": 1.5136934518814087, - "learning_rate": 6.836884422110553e-05, - "loss": 5.3612, - "step": 31984 - }, - { - "epoch": 16.68057366362451, - "grad_norm": 1.5329545736312866, - "learning_rate": 6.83678391959799e-05, - "loss": 5.0328, - "step": 31985 - }, - { - "epoch": 16.68109517601043, - "grad_norm": 1.458855390548706, - "learning_rate": 6.836683417085427e-05, - "loss": 5.4262, - "step": 31986 - }, - { - "epoch": 16.68161668839635, - "grad_norm": 1.5793513059616089, - "learning_rate": 6.836582914572864e-05, - "loss": 5.2297, - "step": 31987 - }, - { - "epoch": 16.68213820078227, - "grad_norm": 1.553560733795166, - "learning_rate": 6.836482412060302e-05, - "loss": 5.343, - "step": 31988 - }, - { - "epoch": 16.68265971316819, - "grad_norm": 1.5456444025039673, - "learning_rate": 6.83638190954774e-05, - "loss": 4.4416, - "step": 31989 - }, - { - "epoch": 16.683181225554108, - "grad_norm": 1.5801692008972168, - "learning_rate": 6.836281407035177e-05, - "loss": 5.6659, - "step": 31990 - }, - { - "epoch": 16.683702737940028, - "grad_norm": 1.5634796619415283, - "learning_rate": 6.836180904522614e-05, - "loss": 4.7547, - "step": 31991 - }, - { - "epoch": 16.684224250325947, - "grad_norm": 1.505982518196106, - "learning_rate": 6.836080402010051e-05, - "loss": 5.3472, - "step": 31992 - }, - { - "epoch": 16.684745762711863, - "grad_norm": 1.51729154586792, - "learning_rate": 6.835979899497488e-05, - "loss": 5.2128, - "step": 31993 - }, - { - "epoch": 16.685267275097782, - "grad_norm": 1.4766892194747925, - "learning_rate": 6.835879396984924e-05, - "loss": 5.3472, - "step": 31994 - }, - { - "epoch": 16.685788787483702, - "grad_norm": 1.5803604125976562, - "learning_rate": 6.835778894472362e-05, - "loss": 5.1877, - "step": 31995 - }, - { - "epoch": 16.68631029986962, - "grad_norm": 1.4269686937332153, - "learning_rate": 6.835678391959798e-05, - "loss": 5.5263, - "step": 31996 - }, - { - "epoch": 16.68683181225554, - "grad_norm": 1.4586241245269775, - "learning_rate": 6.835577889447236e-05, - "loss": 5.6591, - "step": 31997 - }, - { - "epoch": 16.68735332464146, - "grad_norm": 1.5935258865356445, - "learning_rate": 6.835477386934673e-05, - "loss": 4.847, - "step": 31998 - }, - { - "epoch": 16.68787483702738, - "grad_norm": 1.5129070281982422, - "learning_rate": 6.83537688442211e-05, - "loss": 5.1345, - "step": 31999 - }, - { - "epoch": 16.6883963494133, - "grad_norm": 1.6582225561141968, - "learning_rate": 6.835276381909548e-05, - "loss": 5.0564, - "step": 32000 - }, - { - "epoch": 16.68891786179922, - "grad_norm": 1.6449384689331055, - "learning_rate": 6.835175879396986e-05, - "loss": 5.2391, - "step": 32001 - }, - { - "epoch": 16.689439374185138, - "grad_norm": 1.54106605052948, - "learning_rate": 6.835075376884422e-05, - "loss": 5.4856, - "step": 32002 - }, - { - "epoch": 16.689960886571058, - "grad_norm": 1.4999771118164062, - "learning_rate": 6.83497487437186e-05, - "loss": 5.5691, - "step": 32003 - }, - { - "epoch": 16.690482398956973, - "grad_norm": 1.5358470678329468, - "learning_rate": 6.834874371859297e-05, - "loss": 4.9833, - "step": 32004 - }, - { - "epoch": 16.691003911342893, - "grad_norm": 1.423054575920105, - "learning_rate": 6.834773869346734e-05, - "loss": 5.5241, - "step": 32005 - }, - { - "epoch": 16.691525423728812, - "grad_norm": 1.4493337869644165, - "learning_rate": 6.834673366834171e-05, - "loss": 5.2989, - "step": 32006 - }, - { - "epoch": 16.692046936114732, - "grad_norm": 1.5359723567962646, - "learning_rate": 6.834572864321607e-05, - "loss": 5.279, - "step": 32007 - }, - { - "epoch": 16.69256844850065, - "grad_norm": 1.4938026666641235, - "learning_rate": 6.834472361809045e-05, - "loss": 4.6092, - "step": 32008 - }, - { - "epoch": 16.69308996088657, - "grad_norm": 1.3895890712738037, - "learning_rate": 6.834371859296483e-05, - "loss": 5.2818, - "step": 32009 - }, - { - "epoch": 16.69361147327249, - "grad_norm": 1.4527480602264404, - "learning_rate": 6.83427135678392e-05, - "loss": 5.2511, - "step": 32010 - }, - { - "epoch": 16.69413298565841, - "grad_norm": 1.5115550756454468, - "learning_rate": 6.834170854271357e-05, - "loss": 5.1459, - "step": 32011 - }, - { - "epoch": 16.69465449804433, - "grad_norm": 1.4334189891815186, - "learning_rate": 6.834070351758795e-05, - "loss": 5.7758, - "step": 32012 - }, - { - "epoch": 16.69517601043025, - "grad_norm": 1.4479719400405884, - "learning_rate": 6.833969849246231e-05, - "loss": 5.33, - "step": 32013 - }, - { - "epoch": 16.695697522816168, - "grad_norm": 1.4262350797653198, - "learning_rate": 6.833869346733669e-05, - "loss": 5.5394, - "step": 32014 - }, - { - "epoch": 16.696219035202088, - "grad_norm": 1.6067804098129272, - "learning_rate": 6.833768844221106e-05, - "loss": 5.0638, - "step": 32015 - }, - { - "epoch": 16.696740547588004, - "grad_norm": 1.419331669807434, - "learning_rate": 6.833668341708543e-05, - "loss": 5.5816, - "step": 32016 - }, - { - "epoch": 16.697262059973923, - "grad_norm": 1.5293445587158203, - "learning_rate": 6.83356783919598e-05, - "loss": 5.2251, - "step": 32017 - }, - { - "epoch": 16.697783572359842, - "grad_norm": 1.5045663118362427, - "learning_rate": 6.833467336683417e-05, - "loss": 5.1528, - "step": 32018 - }, - { - "epoch": 16.698305084745762, - "grad_norm": 1.4855873584747314, - "learning_rate": 6.833366834170854e-05, - "loss": 5.4556, - "step": 32019 - }, - { - "epoch": 16.69882659713168, - "grad_norm": 1.4492416381835938, - "learning_rate": 6.833266331658292e-05, - "loss": 5.7569, - "step": 32020 - }, - { - "epoch": 16.6993481095176, - "grad_norm": 1.4488780498504639, - "learning_rate": 6.83316582914573e-05, - "loss": 5.3055, - "step": 32021 - }, - { - "epoch": 16.69986962190352, - "grad_norm": 1.5006591081619263, - "learning_rate": 6.833065326633166e-05, - "loss": 5.1597, - "step": 32022 - }, - { - "epoch": 16.70039113428944, - "grad_norm": 1.4456722736358643, - "learning_rate": 6.832964824120604e-05, - "loss": 5.1311, - "step": 32023 - }, - { - "epoch": 16.70091264667536, - "grad_norm": 1.4838725328445435, - "learning_rate": 6.83286432160804e-05, - "loss": 5.1238, - "step": 32024 - }, - { - "epoch": 16.70143415906128, - "grad_norm": 1.446617841720581, - "learning_rate": 6.832763819095478e-05, - "loss": 5.656, - "step": 32025 - }, - { - "epoch": 16.701955671447198, - "grad_norm": 1.4924781322479248, - "learning_rate": 6.832663316582914e-05, - "loss": 5.2886, - "step": 32026 - }, - { - "epoch": 16.702477183833118, - "grad_norm": 1.4063756465911865, - "learning_rate": 6.832562814070352e-05, - "loss": 5.3434, - "step": 32027 - }, - { - "epoch": 16.702998696219034, - "grad_norm": 1.5528002977371216, - "learning_rate": 6.832462311557789e-05, - "loss": 5.5495, - "step": 32028 - }, - { - "epoch": 16.703520208604953, - "grad_norm": 1.4852066040039062, - "learning_rate": 6.832361809045226e-05, - "loss": 5.0583, - "step": 32029 - }, - { - "epoch": 16.704041720990872, - "grad_norm": 1.5015804767608643, - "learning_rate": 6.832261306532664e-05, - "loss": 5.7979, - "step": 32030 - }, - { - "epoch": 16.704563233376792, - "grad_norm": 1.5225262641906738, - "learning_rate": 6.832160804020102e-05, - "loss": 5.3236, - "step": 32031 - }, - { - "epoch": 16.70508474576271, - "grad_norm": 1.4748858213424683, - "learning_rate": 6.832060301507538e-05, - "loss": 5.4485, - "step": 32032 - }, - { - "epoch": 16.70560625814863, - "grad_norm": 1.3844953775405884, - "learning_rate": 6.831959798994976e-05, - "loss": 5.4748, - "step": 32033 - }, - { - "epoch": 16.70612777053455, - "grad_norm": 1.4835209846496582, - "learning_rate": 6.831859296482413e-05, - "loss": 5.4723, - "step": 32034 - }, - { - "epoch": 16.70664928292047, - "grad_norm": 1.5100219249725342, - "learning_rate": 6.831758793969849e-05, - "loss": 5.3815, - "step": 32035 - }, - { - "epoch": 16.70717079530639, - "grad_norm": 1.4313688278198242, - "learning_rate": 6.831658291457287e-05, - "loss": 5.5927, - "step": 32036 - }, - { - "epoch": 16.70769230769231, - "grad_norm": 1.5883454084396362, - "learning_rate": 6.831557788944723e-05, - "loss": 4.6124, - "step": 32037 - }, - { - "epoch": 16.708213820078228, - "grad_norm": 1.521105408668518, - "learning_rate": 6.831457286432161e-05, - "loss": 5.4857, - "step": 32038 - }, - { - "epoch": 16.708735332464148, - "grad_norm": 1.6098339557647705, - "learning_rate": 6.831356783919597e-05, - "loss": 4.9547, - "step": 32039 - }, - { - "epoch": 16.709256844850064, - "grad_norm": 1.5180692672729492, - "learning_rate": 6.831256281407035e-05, - "loss": 5.277, - "step": 32040 - }, - { - "epoch": 16.709778357235983, - "grad_norm": 1.4843460321426392, - "learning_rate": 6.831155778894473e-05, - "loss": 5.3106, - "step": 32041 - }, - { - "epoch": 16.710299869621903, - "grad_norm": 1.5732197761535645, - "learning_rate": 6.831055276381911e-05, - "loss": 5.2819, - "step": 32042 - }, - { - "epoch": 16.710821382007822, - "grad_norm": 1.3269795179367065, - "learning_rate": 6.830954773869347e-05, - "loss": 5.6678, - "step": 32043 - }, - { - "epoch": 16.71134289439374, - "grad_norm": 1.4089423418045044, - "learning_rate": 6.830854271356785e-05, - "loss": 5.3553, - "step": 32044 - }, - { - "epoch": 16.71186440677966, - "grad_norm": 1.4202280044555664, - "learning_rate": 6.830753768844221e-05, - "loss": 5.7832, - "step": 32045 - }, - { - "epoch": 16.71238591916558, - "grad_norm": 1.5122604370117188, - "learning_rate": 6.830653266331659e-05, - "loss": 5.229, - "step": 32046 - }, - { - "epoch": 16.7129074315515, - "grad_norm": 1.5257827043533325, - "learning_rate": 6.830552763819096e-05, - "loss": 5.3114, - "step": 32047 - }, - { - "epoch": 16.71342894393742, - "grad_norm": 1.420432209968567, - "learning_rate": 6.830452261306532e-05, - "loss": 5.1025, - "step": 32048 - }, - { - "epoch": 16.71395045632334, - "grad_norm": 1.5778098106384277, - "learning_rate": 6.83035175879397e-05, - "loss": 4.5882, - "step": 32049 - }, - { - "epoch": 16.714471968709258, - "grad_norm": 1.4636292457580566, - "learning_rate": 6.830251256281408e-05, - "loss": 5.6923, - "step": 32050 - }, - { - "epoch": 16.714993481095178, - "grad_norm": 1.6086596250534058, - "learning_rate": 6.830150753768845e-05, - "loss": 5.2698, - "step": 32051 - }, - { - "epoch": 16.715514993481094, - "grad_norm": 1.5577521324157715, - "learning_rate": 6.830050251256282e-05, - "loss": 5.6871, - "step": 32052 - }, - { - "epoch": 16.716036505867013, - "grad_norm": 1.4369738101959229, - "learning_rate": 6.82994974874372e-05, - "loss": 5.7495, - "step": 32053 - }, - { - "epoch": 16.716558018252933, - "grad_norm": 1.4845877885818481, - "learning_rate": 6.829849246231156e-05, - "loss": 5.3827, - "step": 32054 - }, - { - "epoch": 16.717079530638852, - "grad_norm": 1.454537272453308, - "learning_rate": 6.829748743718594e-05, - "loss": 5.3787, - "step": 32055 - }, - { - "epoch": 16.71760104302477, - "grad_norm": 1.5892754793167114, - "learning_rate": 6.82964824120603e-05, - "loss": 5.2177, - "step": 32056 - }, - { - "epoch": 16.71812255541069, - "grad_norm": 1.588005781173706, - "learning_rate": 6.829547738693468e-05, - "loss": 5.4405, - "step": 32057 - }, - { - "epoch": 16.71864406779661, - "grad_norm": 1.462024211883545, - "learning_rate": 6.829447236180904e-05, - "loss": 5.4795, - "step": 32058 - }, - { - "epoch": 16.71916558018253, - "grad_norm": 1.503610610961914, - "learning_rate": 6.829346733668342e-05, - "loss": 5.0622, - "step": 32059 - }, - { - "epoch": 16.71968709256845, - "grad_norm": 1.5807548761367798, - "learning_rate": 6.829246231155779e-05, - "loss": 5.0496, - "step": 32060 - }, - { - "epoch": 16.72020860495437, - "grad_norm": 1.3107032775878906, - "learning_rate": 6.829145728643216e-05, - "loss": 5.7079, - "step": 32061 - }, - { - "epoch": 16.72073011734029, - "grad_norm": 1.5398658514022827, - "learning_rate": 6.829045226130654e-05, - "loss": 5.0112, - "step": 32062 - }, - { - "epoch": 16.721251629726208, - "grad_norm": 1.5147812366485596, - "learning_rate": 6.82894472361809e-05, - "loss": 5.0293, - "step": 32063 - }, - { - "epoch": 16.721773142112124, - "grad_norm": 1.4100427627563477, - "learning_rate": 6.828844221105528e-05, - "loss": 5.7493, - "step": 32064 - }, - { - "epoch": 16.722294654498043, - "grad_norm": 1.5007672309875488, - "learning_rate": 6.828743718592965e-05, - "loss": 5.5515, - "step": 32065 - }, - { - "epoch": 16.722816166883963, - "grad_norm": 1.3731693029403687, - "learning_rate": 6.828643216080403e-05, - "loss": 5.1137, - "step": 32066 - }, - { - "epoch": 16.723337679269882, - "grad_norm": 1.468955159187317, - "learning_rate": 6.828542713567839e-05, - "loss": 5.1844, - "step": 32067 - }, - { - "epoch": 16.7238591916558, - "grad_norm": 1.483717918395996, - "learning_rate": 6.828442211055277e-05, - "loss": 5.5012, - "step": 32068 - }, - { - "epoch": 16.72438070404172, - "grad_norm": 1.6542272567749023, - "learning_rate": 6.828341708542713e-05, - "loss": 4.5993, - "step": 32069 - }, - { - "epoch": 16.72490221642764, - "grad_norm": 1.4523786306381226, - "learning_rate": 6.828241206030151e-05, - "loss": 5.4939, - "step": 32070 - }, - { - "epoch": 16.72542372881356, - "grad_norm": 1.4198640584945679, - "learning_rate": 6.828140703517589e-05, - "loss": 5.4627, - "step": 32071 - }, - { - "epoch": 16.72594524119948, - "grad_norm": 1.5143439769744873, - "learning_rate": 6.828040201005027e-05, - "loss": 5.5713, - "step": 32072 - }, - { - "epoch": 16.7264667535854, - "grad_norm": 1.636934757232666, - "learning_rate": 6.827939698492463e-05, - "loss": 4.6676, - "step": 32073 - }, - { - "epoch": 16.72698826597132, - "grad_norm": 1.459311604499817, - "learning_rate": 6.8278391959799e-05, - "loss": 5.3797, - "step": 32074 - }, - { - "epoch": 16.727509778357238, - "grad_norm": 1.544616937637329, - "learning_rate": 6.827738693467337e-05, - "loss": 5.028, - "step": 32075 - }, - { - "epoch": 16.728031290743154, - "grad_norm": 1.3669708967208862, - "learning_rate": 6.827638190954774e-05, - "loss": 5.5136, - "step": 32076 - }, - { - "epoch": 16.728552803129073, - "grad_norm": 1.5376347303390503, - "learning_rate": 6.827537688442211e-05, - "loss": 5.035, - "step": 32077 - }, - { - "epoch": 16.729074315514993, - "grad_norm": 1.515159249305725, - "learning_rate": 6.827437185929648e-05, - "loss": 5.096, - "step": 32078 - }, - { - "epoch": 16.729595827900912, - "grad_norm": 1.5381991863250732, - "learning_rate": 6.827336683417086e-05, - "loss": 5.2108, - "step": 32079 - }, - { - "epoch": 16.73011734028683, - "grad_norm": 1.4692661762237549, - "learning_rate": 6.827236180904522e-05, - "loss": 5.2953, - "step": 32080 - }, - { - "epoch": 16.73063885267275, - "grad_norm": 1.7885565757751465, - "learning_rate": 6.82713567839196e-05, - "loss": 4.577, - "step": 32081 - }, - { - "epoch": 16.73116036505867, - "grad_norm": 1.5153635740280151, - "learning_rate": 6.827035175879398e-05, - "loss": 4.8147, - "step": 32082 - }, - { - "epoch": 16.73168187744459, - "grad_norm": 1.442362904548645, - "learning_rate": 6.826934673366835e-05, - "loss": 4.9, - "step": 32083 - }, - { - "epoch": 16.73220338983051, - "grad_norm": 1.5675828456878662, - "learning_rate": 6.826834170854272e-05, - "loss": 4.9915, - "step": 32084 - }, - { - "epoch": 16.73272490221643, - "grad_norm": 1.4527041912078857, - "learning_rate": 6.82673366834171e-05, - "loss": 5.4928, - "step": 32085 - }, - { - "epoch": 16.73324641460235, - "grad_norm": 1.5254976749420166, - "learning_rate": 6.826633165829146e-05, - "loss": 4.3864, - "step": 32086 - }, - { - "epoch": 16.733767926988264, - "grad_norm": 1.4401723146438599, - "learning_rate": 6.826532663316582e-05, - "loss": 5.1929, - "step": 32087 - }, - { - "epoch": 16.734289439374184, - "grad_norm": 1.5075974464416504, - "learning_rate": 6.82643216080402e-05, - "loss": 4.896, - "step": 32088 - }, - { - "epoch": 16.734810951760103, - "grad_norm": 1.5956988334655762, - "learning_rate": 6.826331658291457e-05, - "loss": 5.0649, - "step": 32089 - }, - { - "epoch": 16.735332464146023, - "grad_norm": 1.5353254079818726, - "learning_rate": 6.826231155778894e-05, - "loss": 5.0025, - "step": 32090 - }, - { - "epoch": 16.735853976531942, - "grad_norm": 1.5404528379440308, - "learning_rate": 6.826130653266332e-05, - "loss": 5.3508, - "step": 32091 - }, - { - "epoch": 16.73637548891786, - "grad_norm": 1.391295075416565, - "learning_rate": 6.82603015075377e-05, - "loss": 5.5469, - "step": 32092 - }, - { - "epoch": 16.73689700130378, - "grad_norm": 1.4764683246612549, - "learning_rate": 6.825929648241206e-05, - "loss": 5.1145, - "step": 32093 - }, - { - "epoch": 16.7374185136897, - "grad_norm": 1.486870288848877, - "learning_rate": 6.825829145728644e-05, - "loss": 5.0594, - "step": 32094 - }, - { - "epoch": 16.73794002607562, - "grad_norm": 1.59083092212677, - "learning_rate": 6.825728643216081e-05, - "loss": 5.0461, - "step": 32095 - }, - { - "epoch": 16.73846153846154, - "grad_norm": 1.441457748413086, - "learning_rate": 6.825628140703518e-05, - "loss": 5.5432, - "step": 32096 - }, - { - "epoch": 16.73898305084746, - "grad_norm": 1.488957166671753, - "learning_rate": 6.825527638190955e-05, - "loss": 5.4442, - "step": 32097 - }, - { - "epoch": 16.73950456323338, - "grad_norm": 1.4642499685287476, - "learning_rate": 6.825427135678393e-05, - "loss": 5.5825, - "step": 32098 - }, - { - "epoch": 16.740026075619298, - "grad_norm": 1.4976849555969238, - "learning_rate": 6.825326633165829e-05, - "loss": 5.3616, - "step": 32099 - }, - { - "epoch": 16.740547588005214, - "grad_norm": 1.7170675992965698, - "learning_rate": 6.825226130653266e-05, - "loss": 4.7723, - "step": 32100 - }, - { - "epoch": 16.741069100391133, - "grad_norm": 1.477333426475525, - "learning_rate": 6.825125628140703e-05, - "loss": 5.649, - "step": 32101 - }, - { - "epoch": 16.741590612777053, - "grad_norm": 1.5829054117202759, - "learning_rate": 6.825025125628141e-05, - "loss": 5.2818, - "step": 32102 - }, - { - "epoch": 16.742112125162972, - "grad_norm": 1.4638057947158813, - "learning_rate": 6.824924623115579e-05, - "loss": 5.1177, - "step": 32103 - }, - { - "epoch": 16.74263363754889, - "grad_norm": 1.469643473625183, - "learning_rate": 6.824824120603015e-05, - "loss": 5.3045, - "step": 32104 - }, - { - "epoch": 16.74315514993481, - "grad_norm": 1.5875632762908936, - "learning_rate": 6.824723618090453e-05, - "loss": 5.0973, - "step": 32105 - }, - { - "epoch": 16.74367666232073, - "grad_norm": 1.503047227859497, - "learning_rate": 6.82462311557789e-05, - "loss": 5.5399, - "step": 32106 - }, - { - "epoch": 16.74419817470665, - "grad_norm": 1.4575330018997192, - "learning_rate": 6.824522613065327e-05, - "loss": 5.7665, - "step": 32107 - }, - { - "epoch": 16.74471968709257, - "grad_norm": 1.5517851114273071, - "learning_rate": 6.824422110552764e-05, - "loss": 5.1736, - "step": 32108 - }, - { - "epoch": 16.74524119947849, - "grad_norm": 1.4675567150115967, - "learning_rate": 6.824321608040202e-05, - "loss": 5.2823, - "step": 32109 - }, - { - "epoch": 16.74576271186441, - "grad_norm": 1.4470287561416626, - "learning_rate": 6.824221105527638e-05, - "loss": 5.679, - "step": 32110 - }, - { - "epoch": 16.746284224250324, - "grad_norm": 1.561443567276001, - "learning_rate": 6.824120603015076e-05, - "loss": 5.4162, - "step": 32111 - }, - { - "epoch": 16.746805736636244, - "grad_norm": 1.4478700160980225, - "learning_rate": 6.824020100502512e-05, - "loss": 5.4495, - "step": 32112 - }, - { - "epoch": 16.747327249022163, - "grad_norm": 1.476166009902954, - "learning_rate": 6.82391959798995e-05, - "loss": 5.6406, - "step": 32113 - }, - { - "epoch": 16.747848761408083, - "grad_norm": 1.5210410356521606, - "learning_rate": 6.823819095477388e-05, - "loss": 5.3948, - "step": 32114 - }, - { - "epoch": 16.748370273794002, - "grad_norm": 1.5466742515563965, - "learning_rate": 6.823718592964824e-05, - "loss": 5.3212, - "step": 32115 - }, - { - "epoch": 16.74889178617992, - "grad_norm": 1.4556159973144531, - "learning_rate": 6.823618090452262e-05, - "loss": 5.2083, - "step": 32116 - }, - { - "epoch": 16.74941329856584, - "grad_norm": 1.4717140197753906, - "learning_rate": 6.823517587939698e-05, - "loss": 5.3185, - "step": 32117 - }, - { - "epoch": 16.74993481095176, - "grad_norm": 1.3798179626464844, - "learning_rate": 6.823417085427136e-05, - "loss": 5.6402, - "step": 32118 - }, - { - "epoch": 16.75045632333768, - "grad_norm": 1.376600980758667, - "learning_rate": 6.823316582914573e-05, - "loss": 5.0926, - "step": 32119 - }, - { - "epoch": 16.7509778357236, - "grad_norm": 1.403267502784729, - "learning_rate": 6.82321608040201e-05, - "loss": 5.4897, - "step": 32120 - }, - { - "epoch": 16.75149934810952, - "grad_norm": 1.5837265253067017, - "learning_rate": 6.823115577889447e-05, - "loss": 5.289, - "step": 32121 - }, - { - "epoch": 16.75202086049544, - "grad_norm": 1.574583649635315, - "learning_rate": 6.823015075376885e-05, - "loss": 5.4006, - "step": 32122 - }, - { - "epoch": 16.752542372881354, - "grad_norm": 1.4509285688400269, - "learning_rate": 6.822914572864322e-05, - "loss": 5.7117, - "step": 32123 - }, - { - "epoch": 16.753063885267274, - "grad_norm": 1.4814643859863281, - "learning_rate": 6.82281407035176e-05, - "loss": 5.506, - "step": 32124 - }, - { - "epoch": 16.753585397653193, - "grad_norm": 1.4786255359649658, - "learning_rate": 6.822713567839197e-05, - "loss": 5.6403, - "step": 32125 - }, - { - "epoch": 16.754106910039113, - "grad_norm": 1.5042423009872437, - "learning_rate": 6.822613065326634e-05, - "loss": 5.1768, - "step": 32126 - }, - { - "epoch": 16.754628422425032, - "grad_norm": 1.4331161975860596, - "learning_rate": 6.822512562814071e-05, - "loss": 5.4156, - "step": 32127 - }, - { - "epoch": 16.75514993481095, - "grad_norm": 1.5388671159744263, - "learning_rate": 6.822412060301507e-05, - "loss": 5.4051, - "step": 32128 - }, - { - "epoch": 16.75567144719687, - "grad_norm": 1.4540215730667114, - "learning_rate": 6.822311557788945e-05, - "loss": 5.4767, - "step": 32129 - }, - { - "epoch": 16.75619295958279, - "grad_norm": 1.3237954378128052, - "learning_rate": 6.822211055276381e-05, - "loss": 5.6123, - "step": 32130 - }, - { - "epoch": 16.75671447196871, - "grad_norm": 1.4257392883300781, - "learning_rate": 6.822110552763819e-05, - "loss": 5.2283, - "step": 32131 - }, - { - "epoch": 16.75723598435463, - "grad_norm": 1.4451539516448975, - "learning_rate": 6.822010050251256e-05, - "loss": 5.5549, - "step": 32132 - }, - { - "epoch": 16.75775749674055, - "grad_norm": 1.4449673891067505, - "learning_rate": 6.821909547738693e-05, - "loss": 5.2065, - "step": 32133 - }, - { - "epoch": 16.75827900912647, - "grad_norm": 1.3807724714279175, - "learning_rate": 6.821809045226131e-05, - "loss": 5.4929, - "step": 32134 - }, - { - "epoch": 16.758800521512384, - "grad_norm": 1.622580647468567, - "learning_rate": 6.821708542713569e-05, - "loss": 5.1972, - "step": 32135 - }, - { - "epoch": 16.759322033898304, - "grad_norm": 1.4418615102767944, - "learning_rate": 6.821608040201005e-05, - "loss": 5.5234, - "step": 32136 - }, - { - "epoch": 16.759843546284223, - "grad_norm": 1.500157117843628, - "learning_rate": 6.821507537688443e-05, - "loss": 5.5876, - "step": 32137 - }, - { - "epoch": 16.760365058670143, - "grad_norm": 1.5800188779830933, - "learning_rate": 6.82140703517588e-05, - "loss": 5.1733, - "step": 32138 - }, - { - "epoch": 16.760886571056062, - "grad_norm": 1.4927252531051636, - "learning_rate": 6.821306532663317e-05, - "loss": 5.3077, - "step": 32139 - }, - { - "epoch": 16.76140808344198, - "grad_norm": 1.6553534269332886, - "learning_rate": 6.821206030150754e-05, - "loss": 4.5784, - "step": 32140 - }, - { - "epoch": 16.7619295958279, - "grad_norm": 1.5384092330932617, - "learning_rate": 6.82110552763819e-05, - "loss": 5.1933, - "step": 32141 - }, - { - "epoch": 16.76245110821382, - "grad_norm": 1.480545997619629, - "learning_rate": 6.821005025125628e-05, - "loss": 5.459, - "step": 32142 - }, - { - "epoch": 16.76297262059974, - "grad_norm": 1.4364148378372192, - "learning_rate": 6.820904522613066e-05, - "loss": 5.4782, - "step": 32143 - }, - { - "epoch": 16.76349413298566, - "grad_norm": 1.4268953800201416, - "learning_rate": 6.820804020100504e-05, - "loss": 5.6595, - "step": 32144 - }, - { - "epoch": 16.76401564537158, - "grad_norm": 1.5145984888076782, - "learning_rate": 6.82070351758794e-05, - "loss": 5.3405, - "step": 32145 - }, - { - "epoch": 16.7645371577575, - "grad_norm": 1.4740502834320068, - "learning_rate": 6.820603015075378e-05, - "loss": 5.5224, - "step": 32146 - }, - { - "epoch": 16.765058670143414, - "grad_norm": 1.525972843170166, - "learning_rate": 6.820502512562814e-05, - "loss": 5.0288, - "step": 32147 - }, - { - "epoch": 16.765580182529334, - "grad_norm": 1.4325059652328491, - "learning_rate": 6.820402010050252e-05, - "loss": 5.0563, - "step": 32148 - }, - { - "epoch": 16.766101694915253, - "grad_norm": 1.4547858238220215, - "learning_rate": 6.820301507537688e-05, - "loss": 5.6503, - "step": 32149 - }, - { - "epoch": 16.766623207301173, - "grad_norm": 1.4337540864944458, - "learning_rate": 6.820201005025126e-05, - "loss": 5.752, - "step": 32150 - }, - { - "epoch": 16.767144719687092, - "grad_norm": 1.4925023317337036, - "learning_rate": 6.820100502512563e-05, - "loss": 5.1012, - "step": 32151 - }, - { - "epoch": 16.76766623207301, - "grad_norm": 1.4462909698486328, - "learning_rate": 6.82e-05, - "loss": 5.8546, - "step": 32152 - }, - { - "epoch": 16.76818774445893, - "grad_norm": 1.5949749946594238, - "learning_rate": 6.819899497487437e-05, - "loss": 4.3826, - "step": 32153 - }, - { - "epoch": 16.76870925684485, - "grad_norm": 1.4138106107711792, - "learning_rate": 6.819798994974875e-05, - "loss": 5.7878, - "step": 32154 - }, - { - "epoch": 16.76923076923077, - "grad_norm": 1.456467866897583, - "learning_rate": 6.819698492462312e-05, - "loss": 5.4614, - "step": 32155 - }, - { - "epoch": 16.76975228161669, - "grad_norm": 1.4989033937454224, - "learning_rate": 6.819597989949749e-05, - "loss": 5.5149, - "step": 32156 - }, - { - "epoch": 16.77027379400261, - "grad_norm": 1.5333174467086792, - "learning_rate": 6.819497487437187e-05, - "loss": 5.1161, - "step": 32157 - }, - { - "epoch": 16.77079530638853, - "grad_norm": 1.4783287048339844, - "learning_rate": 6.819396984924623e-05, - "loss": 5.4088, - "step": 32158 - }, - { - "epoch": 16.771316818774444, - "grad_norm": 1.4731082916259766, - "learning_rate": 6.819296482412061e-05, - "loss": 5.2375, - "step": 32159 - }, - { - "epoch": 16.771838331160364, - "grad_norm": 1.514878273010254, - "learning_rate": 6.819195979899497e-05, - "loss": 5.1704, - "step": 32160 - }, - { - "epoch": 16.772359843546283, - "grad_norm": 1.530027151107788, - "learning_rate": 6.819095477386935e-05, - "loss": 5.0929, - "step": 32161 - }, - { - "epoch": 16.772881355932203, - "grad_norm": 1.3581746816635132, - "learning_rate": 6.818994974874371e-05, - "loss": 5.7854, - "step": 32162 - }, - { - "epoch": 16.773402868318122, - "grad_norm": 1.423215389251709, - "learning_rate": 6.818894472361809e-05, - "loss": 5.1918, - "step": 32163 - }, - { - "epoch": 16.77392438070404, - "grad_norm": 1.373935580253601, - "learning_rate": 6.818793969849247e-05, - "loss": 5.18, - "step": 32164 - }, - { - "epoch": 16.77444589308996, - "grad_norm": 1.4696706533432007, - "learning_rate": 6.818693467336685e-05, - "loss": 5.4242, - "step": 32165 - }, - { - "epoch": 16.77496740547588, - "grad_norm": 1.4267797470092773, - "learning_rate": 6.818592964824121e-05, - "loss": 5.4275, - "step": 32166 - }, - { - "epoch": 16.7754889178618, - "grad_norm": 1.5421521663665771, - "learning_rate": 6.818492462311558e-05, - "loss": 4.8055, - "step": 32167 - }, - { - "epoch": 16.77601043024772, - "grad_norm": 1.459506869316101, - "learning_rate": 6.818391959798995e-05, - "loss": 5.0041, - "step": 32168 - }, - { - "epoch": 16.77653194263364, - "grad_norm": 1.4908419847488403, - "learning_rate": 6.818291457286432e-05, - "loss": 5.8071, - "step": 32169 - }, - { - "epoch": 16.777053455019555, - "grad_norm": 1.5660477876663208, - "learning_rate": 6.81819095477387e-05, - "loss": 5.2161, - "step": 32170 - }, - { - "epoch": 16.777574967405474, - "grad_norm": 1.525856375694275, - "learning_rate": 6.818090452261306e-05, - "loss": 5.703, - "step": 32171 - }, - { - "epoch": 16.778096479791394, - "grad_norm": 1.4893053770065308, - "learning_rate": 6.817989949748744e-05, - "loss": 5.5031, - "step": 32172 - }, - { - "epoch": 16.778617992177313, - "grad_norm": 1.56302809715271, - "learning_rate": 6.81788944723618e-05, - "loss": 5.1926, - "step": 32173 - }, - { - "epoch": 16.779139504563233, - "grad_norm": 1.4364020824432373, - "learning_rate": 6.817788944723618e-05, - "loss": 5.1163, - "step": 32174 - }, - { - "epoch": 16.779661016949152, - "grad_norm": 1.4721269607543945, - "learning_rate": 6.817688442211056e-05, - "loss": 5.3535, - "step": 32175 - }, - { - "epoch": 16.78018252933507, - "grad_norm": 1.5558595657348633, - "learning_rate": 6.817587939698494e-05, - "loss": 5.1643, - "step": 32176 - }, - { - "epoch": 16.78070404172099, - "grad_norm": 1.3646856546401978, - "learning_rate": 6.81748743718593e-05, - "loss": 5.4677, - "step": 32177 - }, - { - "epoch": 16.78122555410691, - "grad_norm": 1.471401572227478, - "learning_rate": 6.817386934673368e-05, - "loss": 5.5488, - "step": 32178 - }, - { - "epoch": 16.78174706649283, - "grad_norm": 1.4115381240844727, - "learning_rate": 6.817286432160804e-05, - "loss": 5.3739, - "step": 32179 - }, - { - "epoch": 16.78226857887875, - "grad_norm": 1.461485505104065, - "learning_rate": 6.817185929648241e-05, - "loss": 5.7767, - "step": 32180 - }, - { - "epoch": 16.78279009126467, - "grad_norm": 1.4169188737869263, - "learning_rate": 6.817085427135679e-05, - "loss": 5.527, - "step": 32181 - }, - { - "epoch": 16.78331160365059, - "grad_norm": 1.5787427425384521, - "learning_rate": 6.816984924623115e-05, - "loss": 4.9011, - "step": 32182 - }, - { - "epoch": 16.783833116036504, - "grad_norm": 1.3940954208374023, - "learning_rate": 6.816884422110553e-05, - "loss": 5.7509, - "step": 32183 - }, - { - "epoch": 16.784354628422424, - "grad_norm": 1.597353458404541, - "learning_rate": 6.81678391959799e-05, - "loss": 5.1301, - "step": 32184 - }, - { - "epoch": 16.784876140808343, - "grad_norm": 1.4991800785064697, - "learning_rate": 6.816683417085428e-05, - "loss": 5.2222, - "step": 32185 - }, - { - "epoch": 16.785397653194263, - "grad_norm": 1.5732795000076294, - "learning_rate": 6.816582914572865e-05, - "loss": 5.3281, - "step": 32186 - }, - { - "epoch": 16.785919165580182, - "grad_norm": 1.4665584564208984, - "learning_rate": 6.816482412060303e-05, - "loss": 4.7169, - "step": 32187 - }, - { - "epoch": 16.7864406779661, - "grad_norm": 1.464357614517212, - "learning_rate": 6.816381909547739e-05, - "loss": 5.5783, - "step": 32188 - }, - { - "epoch": 16.78696219035202, - "grad_norm": 1.4957258701324463, - "learning_rate": 6.816281407035177e-05, - "loss": 4.7752, - "step": 32189 - }, - { - "epoch": 16.78748370273794, - "grad_norm": 1.4736332893371582, - "learning_rate": 6.816180904522613e-05, - "loss": 5.5465, - "step": 32190 - }, - { - "epoch": 16.78800521512386, - "grad_norm": 1.5552585124969482, - "learning_rate": 6.816080402010051e-05, - "loss": 5.4028, - "step": 32191 - }, - { - "epoch": 16.78852672750978, - "grad_norm": 1.3648723363876343, - "learning_rate": 6.815979899497487e-05, - "loss": 5.5991, - "step": 32192 - }, - { - "epoch": 16.7890482398957, - "grad_norm": 1.5452773571014404, - "learning_rate": 6.815879396984925e-05, - "loss": 5.3919, - "step": 32193 - }, - { - "epoch": 16.789569752281615, - "grad_norm": 1.4785019159317017, - "learning_rate": 6.815778894472362e-05, - "loss": 5.259, - "step": 32194 - }, - { - "epoch": 16.790091264667534, - "grad_norm": 1.6074814796447754, - "learning_rate": 6.8156783919598e-05, - "loss": 5.3228, - "step": 32195 - }, - { - "epoch": 16.790612777053454, - "grad_norm": 1.6142023801803589, - "learning_rate": 6.815577889447237e-05, - "loss": 4.876, - "step": 32196 - }, - { - "epoch": 16.791134289439373, - "grad_norm": 1.654758095741272, - "learning_rate": 6.815477386934674e-05, - "loss": 5.301, - "step": 32197 - }, - { - "epoch": 16.791655801825293, - "grad_norm": 1.5032848119735718, - "learning_rate": 6.815376884422111e-05, - "loss": 5.715, - "step": 32198 - }, - { - "epoch": 16.792177314211212, - "grad_norm": 1.5121691226959229, - "learning_rate": 6.815276381909548e-05, - "loss": 5.2213, - "step": 32199 - }, - { - "epoch": 16.79269882659713, - "grad_norm": 1.4614659547805786, - "learning_rate": 6.815175879396986e-05, - "loss": 5.4412, - "step": 32200 - }, - { - "epoch": 16.79322033898305, - "grad_norm": 1.5777974128723145, - "learning_rate": 6.815075376884422e-05, - "loss": 5.1107, - "step": 32201 - }, - { - "epoch": 16.79374185136897, - "grad_norm": 1.6537309885025024, - "learning_rate": 6.81497487437186e-05, - "loss": 4.4437, - "step": 32202 - }, - { - "epoch": 16.79426336375489, - "grad_norm": 1.432127833366394, - "learning_rate": 6.814874371859296e-05, - "loss": 5.7576, - "step": 32203 - }, - { - "epoch": 16.79478487614081, - "grad_norm": 1.5189236402511597, - "learning_rate": 6.814773869346734e-05, - "loss": 5.2628, - "step": 32204 - }, - { - "epoch": 16.79530638852673, - "grad_norm": 1.5158084630966187, - "learning_rate": 6.814673366834172e-05, - "loss": 5.1339, - "step": 32205 - }, - { - "epoch": 16.795827900912645, - "grad_norm": 1.518705129623413, - "learning_rate": 6.81457286432161e-05, - "loss": 5.0622, - "step": 32206 - }, - { - "epoch": 16.796349413298564, - "grad_norm": 1.4828519821166992, - "learning_rate": 6.814472361809046e-05, - "loss": 5.4874, - "step": 32207 - }, - { - "epoch": 16.796870925684484, - "grad_norm": 1.5688568353652954, - "learning_rate": 6.814371859296482e-05, - "loss": 5.4659, - "step": 32208 - }, - { - "epoch": 16.797392438070403, - "grad_norm": 1.4416998624801636, - "learning_rate": 6.81427135678392e-05, - "loss": 5.5827, - "step": 32209 - }, - { - "epoch": 16.797913950456323, - "grad_norm": 1.6394208669662476, - "learning_rate": 6.814170854271357e-05, - "loss": 4.8147, - "step": 32210 - }, - { - "epoch": 16.798435462842242, - "grad_norm": 1.3674403429031372, - "learning_rate": 6.814070351758794e-05, - "loss": 5.8231, - "step": 32211 - }, - { - "epoch": 16.798956975228162, - "grad_norm": 1.4532010555267334, - "learning_rate": 6.813969849246231e-05, - "loss": 5.5624, - "step": 32212 - }, - { - "epoch": 16.79947848761408, - "grad_norm": 1.550743818283081, - "learning_rate": 6.813869346733669e-05, - "loss": 5.2968, - "step": 32213 - }, - { - "epoch": 16.8, - "grad_norm": 1.5190784931182861, - "learning_rate": 6.813768844221105e-05, - "loss": 5.3274, - "step": 32214 - }, - { - "epoch": 16.80052151238592, - "grad_norm": 1.59329092502594, - "learning_rate": 6.813668341708543e-05, - "loss": 5.4457, - "step": 32215 - }, - { - "epoch": 16.80104302477184, - "grad_norm": 1.5243127346038818, - "learning_rate": 6.81356783919598e-05, - "loss": 4.6692, - "step": 32216 - }, - { - "epoch": 16.80156453715776, - "grad_norm": 1.5941133499145508, - "learning_rate": 6.813467336683418e-05, - "loss": 5.2067, - "step": 32217 - }, - { - "epoch": 16.802086049543675, - "grad_norm": 1.618507981300354, - "learning_rate": 6.813366834170855e-05, - "loss": 5.1478, - "step": 32218 - }, - { - "epoch": 16.802607561929594, - "grad_norm": 1.4811009168624878, - "learning_rate": 6.813266331658293e-05, - "loss": 5.576, - "step": 32219 - }, - { - "epoch": 16.803129074315514, - "grad_norm": 1.4906744956970215, - "learning_rate": 6.813165829145729e-05, - "loss": 5.5973, - "step": 32220 - }, - { - "epoch": 16.803650586701433, - "grad_norm": 1.5996023416519165, - "learning_rate": 6.813065326633165e-05, - "loss": 5.5371, - "step": 32221 - }, - { - "epoch": 16.804172099087353, - "grad_norm": 1.4527695178985596, - "learning_rate": 6.812964824120603e-05, - "loss": 5.2659, - "step": 32222 - }, - { - "epoch": 16.804693611473272, - "grad_norm": 1.4184855222702026, - "learning_rate": 6.81286432160804e-05, - "loss": 5.5417, - "step": 32223 - }, - { - "epoch": 16.805215123859192, - "grad_norm": 1.4366674423217773, - "learning_rate": 6.812763819095477e-05, - "loss": 5.5835, - "step": 32224 - }, - { - "epoch": 16.80573663624511, - "grad_norm": 1.4732416868209839, - "learning_rate": 6.812663316582915e-05, - "loss": 5.4492, - "step": 32225 - }, - { - "epoch": 16.80625814863103, - "grad_norm": 1.459498643875122, - "learning_rate": 6.812562814070353e-05, - "loss": 5.2794, - "step": 32226 - }, - { - "epoch": 16.80677966101695, - "grad_norm": 1.4049906730651855, - "learning_rate": 6.81246231155779e-05, - "loss": 5.5433, - "step": 32227 - }, - { - "epoch": 16.80730117340287, - "grad_norm": 1.4600849151611328, - "learning_rate": 6.812361809045227e-05, - "loss": 5.4881, - "step": 32228 - }, - { - "epoch": 16.80782268578879, - "grad_norm": 1.5257198810577393, - "learning_rate": 6.812261306532664e-05, - "loss": 5.4153, - "step": 32229 - }, - { - "epoch": 16.808344198174705, - "grad_norm": 1.4667681455612183, - "learning_rate": 6.812160804020101e-05, - "loss": 5.3627, - "step": 32230 - }, - { - "epoch": 16.808865710560625, - "grad_norm": 1.4093654155731201, - "learning_rate": 6.812060301507538e-05, - "loss": 5.255, - "step": 32231 - }, - { - "epoch": 16.809387222946544, - "grad_norm": 1.4354609251022339, - "learning_rate": 6.811959798994976e-05, - "loss": 5.409, - "step": 32232 - }, - { - "epoch": 16.809908735332463, - "grad_norm": 1.4511804580688477, - "learning_rate": 6.811859296482412e-05, - "loss": 5.2829, - "step": 32233 - }, - { - "epoch": 16.810430247718383, - "grad_norm": 1.631495475769043, - "learning_rate": 6.811758793969848e-05, - "loss": 4.9771, - "step": 32234 - }, - { - "epoch": 16.810951760104302, - "grad_norm": 1.4254612922668457, - "learning_rate": 6.811658291457286e-05, - "loss": 5.5648, - "step": 32235 - }, - { - "epoch": 16.811473272490222, - "grad_norm": 1.4607248306274414, - "learning_rate": 6.811557788944724e-05, - "loss": 5.1943, - "step": 32236 - }, - { - "epoch": 16.81199478487614, - "grad_norm": 1.4698255062103271, - "learning_rate": 6.811457286432162e-05, - "loss": 5.2882, - "step": 32237 - }, - { - "epoch": 16.81251629726206, - "grad_norm": 1.430342674255371, - "learning_rate": 6.811356783919598e-05, - "loss": 5.0732, - "step": 32238 - }, - { - "epoch": 16.81303780964798, - "grad_norm": 1.530485987663269, - "learning_rate": 6.811256281407036e-05, - "loss": 5.3367, - "step": 32239 - }, - { - "epoch": 16.8135593220339, - "grad_norm": 1.4782066345214844, - "learning_rate": 6.811155778894472e-05, - "loss": 5.3626, - "step": 32240 - }, - { - "epoch": 16.81408083441982, - "grad_norm": 1.5078420639038086, - "learning_rate": 6.81105527638191e-05, - "loss": 4.5488, - "step": 32241 - }, - { - "epoch": 16.814602346805735, - "grad_norm": 1.3685628175735474, - "learning_rate": 6.810954773869347e-05, - "loss": 5.7081, - "step": 32242 - }, - { - "epoch": 16.815123859191655, - "grad_norm": 1.5352864265441895, - "learning_rate": 6.810854271356784e-05, - "loss": 5.4687, - "step": 32243 - }, - { - "epoch": 16.815645371577574, - "grad_norm": 1.4922908544540405, - "learning_rate": 6.810753768844221e-05, - "loss": 5.3035, - "step": 32244 - }, - { - "epoch": 16.816166883963493, - "grad_norm": 1.6320977210998535, - "learning_rate": 6.810653266331659e-05, - "loss": 4.9977, - "step": 32245 - }, - { - "epoch": 16.816688396349413, - "grad_norm": 1.4462674856185913, - "learning_rate": 6.810552763819096e-05, - "loss": 4.7986, - "step": 32246 - }, - { - "epoch": 16.817209908735332, - "grad_norm": 1.4855844974517822, - "learning_rate": 6.810452261306533e-05, - "loss": 5.1601, - "step": 32247 - }, - { - "epoch": 16.817731421121252, - "grad_norm": 1.5015418529510498, - "learning_rate": 6.81035175879397e-05, - "loss": 4.8939, - "step": 32248 - }, - { - "epoch": 16.81825293350717, - "grad_norm": 1.4044885635375977, - "learning_rate": 6.810251256281407e-05, - "loss": 5.5958, - "step": 32249 - }, - { - "epoch": 16.81877444589309, - "grad_norm": 1.5564625263214111, - "learning_rate": 6.810150753768845e-05, - "loss": 5.1493, - "step": 32250 - }, - { - "epoch": 16.81929595827901, - "grad_norm": 1.462203860282898, - "learning_rate": 6.810050251256281e-05, - "loss": 5.6381, - "step": 32251 - }, - { - "epoch": 16.81981747066493, - "grad_norm": 1.4473809003829956, - "learning_rate": 6.809949748743719e-05, - "loss": 4.9622, - "step": 32252 - }, - { - "epoch": 16.820338983050846, - "grad_norm": 1.4460340738296509, - "learning_rate": 6.809849246231156e-05, - "loss": 5.5568, - "step": 32253 - }, - { - "epoch": 16.820860495436765, - "grad_norm": 1.7312417030334473, - "learning_rate": 6.809748743718593e-05, - "loss": 4.3886, - "step": 32254 - }, - { - "epoch": 16.821382007822685, - "grad_norm": 1.464523434638977, - "learning_rate": 6.80964824120603e-05, - "loss": 5.8156, - "step": 32255 - }, - { - "epoch": 16.821903520208604, - "grad_norm": 1.4283992052078247, - "learning_rate": 6.809547738693468e-05, - "loss": 5.4809, - "step": 32256 - }, - { - "epoch": 16.822425032594523, - "grad_norm": 1.3495358228683472, - "learning_rate": 6.809447236180905e-05, - "loss": 5.6428, - "step": 32257 - }, - { - "epoch": 16.822946544980443, - "grad_norm": 1.57086181640625, - "learning_rate": 6.809346733668343e-05, - "loss": 5.3073, - "step": 32258 - }, - { - "epoch": 16.823468057366362, - "grad_norm": 1.4902983903884888, - "learning_rate": 6.80924623115578e-05, - "loss": 5.068, - "step": 32259 - }, - { - "epoch": 16.823989569752282, - "grad_norm": 1.3659354448318481, - "learning_rate": 6.809145728643216e-05, - "loss": 5.3183, - "step": 32260 - }, - { - "epoch": 16.8245110821382, - "grad_norm": 1.4588451385498047, - "learning_rate": 6.809045226130654e-05, - "loss": 5.338, - "step": 32261 - }, - { - "epoch": 16.82503259452412, - "grad_norm": 1.4269752502441406, - "learning_rate": 6.80894472361809e-05, - "loss": 5.5153, - "step": 32262 - }, - { - "epoch": 16.82555410691004, - "grad_norm": 1.5318281650543213, - "learning_rate": 6.808844221105528e-05, - "loss": 5.4477, - "step": 32263 - }, - { - "epoch": 16.82607561929596, - "grad_norm": 1.4717127084732056, - "learning_rate": 6.808743718592964e-05, - "loss": 5.1391, - "step": 32264 - }, - { - "epoch": 16.82659713168188, - "grad_norm": 1.3730554580688477, - "learning_rate": 6.808643216080402e-05, - "loss": 5.8, - "step": 32265 - }, - { - "epoch": 16.827118644067795, - "grad_norm": 1.4311480522155762, - "learning_rate": 6.80854271356784e-05, - "loss": 5.3432, - "step": 32266 - }, - { - "epoch": 16.827640156453715, - "grad_norm": 1.4840831756591797, - "learning_rate": 6.808442211055278e-05, - "loss": 5.3272, - "step": 32267 - }, - { - "epoch": 16.828161668839634, - "grad_norm": 1.5875296592712402, - "learning_rate": 6.808341708542714e-05, - "loss": 5.3552, - "step": 32268 - }, - { - "epoch": 16.828683181225554, - "grad_norm": 1.4675390720367432, - "learning_rate": 6.808241206030152e-05, - "loss": 5.5292, - "step": 32269 - }, - { - "epoch": 16.829204693611473, - "grad_norm": 1.487448811531067, - "learning_rate": 6.808140703517588e-05, - "loss": 5.655, - "step": 32270 - }, - { - "epoch": 16.829726205997392, - "grad_norm": 1.6335320472717285, - "learning_rate": 6.808040201005026e-05, - "loss": 5.4538, - "step": 32271 - }, - { - "epoch": 16.830247718383312, - "grad_norm": 1.5432835817337036, - "learning_rate": 6.807939698492463e-05, - "loss": 5.1874, - "step": 32272 - }, - { - "epoch": 16.83076923076923, - "grad_norm": 1.6164653301239014, - "learning_rate": 6.807839195979899e-05, - "loss": 4.7992, - "step": 32273 - }, - { - "epoch": 16.83129074315515, - "grad_norm": 1.5519556999206543, - "learning_rate": 6.807738693467337e-05, - "loss": 5.6583, - "step": 32274 - }, - { - "epoch": 16.83181225554107, - "grad_norm": 1.4818847179412842, - "learning_rate": 6.807638190954773e-05, - "loss": 5.1017, - "step": 32275 - }, - { - "epoch": 16.83233376792699, - "grad_norm": 1.4086637496948242, - "learning_rate": 6.807537688442211e-05, - "loss": 5.2852, - "step": 32276 - }, - { - "epoch": 16.832855280312906, - "grad_norm": 1.4884984493255615, - "learning_rate": 6.807437185929649e-05, - "loss": 5.5994, - "step": 32277 - }, - { - "epoch": 16.833376792698825, - "grad_norm": 1.402036190032959, - "learning_rate": 6.807336683417087e-05, - "loss": 5.5945, - "step": 32278 - }, - { - "epoch": 16.833898305084745, - "grad_norm": 1.3730614185333252, - "learning_rate": 6.807236180904523e-05, - "loss": 5.6016, - "step": 32279 - }, - { - "epoch": 16.834419817470664, - "grad_norm": 1.4380699396133423, - "learning_rate": 6.807135678391961e-05, - "loss": 5.6197, - "step": 32280 - }, - { - "epoch": 16.834941329856584, - "grad_norm": 1.4394521713256836, - "learning_rate": 6.807035175879397e-05, - "loss": 5.7669, - "step": 32281 - }, - { - "epoch": 16.835462842242503, - "grad_norm": 1.666674017906189, - "learning_rate": 6.806934673366835e-05, - "loss": 5.1824, - "step": 32282 - }, - { - "epoch": 16.835984354628422, - "grad_norm": 1.5262519121170044, - "learning_rate": 6.806834170854271e-05, - "loss": 5.6693, - "step": 32283 - }, - { - "epoch": 16.836505867014342, - "grad_norm": 1.408313512802124, - "learning_rate": 6.806733668341709e-05, - "loss": 5.3513, - "step": 32284 - }, - { - "epoch": 16.83702737940026, - "grad_norm": 1.4788936376571655, - "learning_rate": 6.806633165829146e-05, - "loss": 5.236, - "step": 32285 - }, - { - "epoch": 16.83754889178618, - "grad_norm": 1.5548593997955322, - "learning_rate": 6.806532663316583e-05, - "loss": 5.3283, - "step": 32286 - }, - { - "epoch": 16.8380704041721, - "grad_norm": 1.4647866487503052, - "learning_rate": 6.80643216080402e-05, - "loss": 5.5431, - "step": 32287 - }, - { - "epoch": 16.83859191655802, - "grad_norm": 1.8245185613632202, - "learning_rate": 6.806331658291458e-05, - "loss": 4.4265, - "step": 32288 - }, - { - "epoch": 16.839113428943936, - "grad_norm": 1.6040338277816772, - "learning_rate": 6.806231155778895e-05, - "loss": 5.1472, - "step": 32289 - }, - { - "epoch": 16.839634941329855, - "grad_norm": 1.5158417224884033, - "learning_rate": 6.806130653266332e-05, - "loss": 5.5369, - "step": 32290 - }, - { - "epoch": 16.840156453715775, - "grad_norm": 1.4937772750854492, - "learning_rate": 6.80603015075377e-05, - "loss": 5.5289, - "step": 32291 - }, - { - "epoch": 16.840677966101694, - "grad_norm": 1.5024772882461548, - "learning_rate": 6.805929648241206e-05, - "loss": 5.3196, - "step": 32292 - }, - { - "epoch": 16.841199478487614, - "grad_norm": 1.4447509050369263, - "learning_rate": 6.805829145728644e-05, - "loss": 5.7246, - "step": 32293 - }, - { - "epoch": 16.841720990873533, - "grad_norm": 1.552960991859436, - "learning_rate": 6.80572864321608e-05, - "loss": 5.2145, - "step": 32294 - }, - { - "epoch": 16.842242503259452, - "grad_norm": 1.5452227592468262, - "learning_rate": 6.805628140703518e-05, - "loss": 4.8153, - "step": 32295 - }, - { - "epoch": 16.842764015645372, - "grad_norm": 1.6260719299316406, - "learning_rate": 6.805527638190954e-05, - "loss": 4.9387, - "step": 32296 - }, - { - "epoch": 16.84328552803129, - "grad_norm": 1.5294049978256226, - "learning_rate": 6.805427135678392e-05, - "loss": 4.9174, - "step": 32297 - }, - { - "epoch": 16.84380704041721, - "grad_norm": 1.4775010347366333, - "learning_rate": 6.80532663316583e-05, - "loss": 5.4644, - "step": 32298 - }, - { - "epoch": 16.84432855280313, - "grad_norm": 1.3698045015335083, - "learning_rate": 6.805226130653268e-05, - "loss": 5.6984, - "step": 32299 - }, - { - "epoch": 16.84485006518905, - "grad_norm": 1.4460890293121338, - "learning_rate": 6.805125628140704e-05, - "loss": 5.526, - "step": 32300 - }, - { - "epoch": 16.845371577574966, - "grad_norm": 1.4469985961914062, - "learning_rate": 6.80502512562814e-05, - "loss": 5.3451, - "step": 32301 - }, - { - "epoch": 16.845893089960885, - "grad_norm": 1.4390296936035156, - "learning_rate": 6.804924623115578e-05, - "loss": 5.4194, - "step": 32302 - }, - { - "epoch": 16.846414602346805, - "grad_norm": 1.5403863191604614, - "learning_rate": 6.804824120603015e-05, - "loss": 5.2988, - "step": 32303 - }, - { - "epoch": 16.846936114732724, - "grad_norm": 1.4167232513427734, - "learning_rate": 6.804723618090453e-05, - "loss": 5.5034, - "step": 32304 - }, - { - "epoch": 16.847457627118644, - "grad_norm": 1.3592467308044434, - "learning_rate": 6.804623115577889e-05, - "loss": 5.4431, - "step": 32305 - }, - { - "epoch": 16.847979139504563, - "grad_norm": 1.4841742515563965, - "learning_rate": 6.804522613065327e-05, - "loss": 5.5803, - "step": 32306 - }, - { - "epoch": 16.848500651890483, - "grad_norm": 1.4531583786010742, - "learning_rate": 6.804422110552763e-05, - "loss": 5.0393, - "step": 32307 - }, - { - "epoch": 16.849022164276402, - "grad_norm": 1.4909645318984985, - "learning_rate": 6.804321608040201e-05, - "loss": 5.2716, - "step": 32308 - }, - { - "epoch": 16.84954367666232, - "grad_norm": 1.438641905784607, - "learning_rate": 6.804221105527639e-05, - "loss": 5.451, - "step": 32309 - }, - { - "epoch": 16.85006518904824, - "grad_norm": 1.450132966041565, - "learning_rate": 6.804120603015077e-05, - "loss": 5.0258, - "step": 32310 - }, - { - "epoch": 16.85058670143416, - "grad_norm": 1.4128117561340332, - "learning_rate": 6.804020100502513e-05, - "loss": 5.6001, - "step": 32311 - }, - { - "epoch": 16.85110821382008, - "grad_norm": 1.4158730506896973, - "learning_rate": 6.803919597989951e-05, - "loss": 5.2885, - "step": 32312 - }, - { - "epoch": 16.851629726205996, - "grad_norm": 1.517215609550476, - "learning_rate": 6.803819095477387e-05, - "loss": 5.1261, - "step": 32313 - }, - { - "epoch": 16.852151238591915, - "grad_norm": 1.461003303527832, - "learning_rate": 6.803718592964824e-05, - "loss": 5.5772, - "step": 32314 - }, - { - "epoch": 16.852672750977835, - "grad_norm": 1.5857937335968018, - "learning_rate": 6.803618090452261e-05, - "loss": 5.1324, - "step": 32315 - }, - { - "epoch": 16.853194263363754, - "grad_norm": 1.4597573280334473, - "learning_rate": 6.803517587939698e-05, - "loss": 5.5591, - "step": 32316 - }, - { - "epoch": 16.853715775749674, - "grad_norm": 1.64104425907135, - "learning_rate": 6.803417085427136e-05, - "loss": 4.8194, - "step": 32317 - }, - { - "epoch": 16.854237288135593, - "grad_norm": 1.4670978784561157, - "learning_rate": 6.803316582914573e-05, - "loss": 5.3145, - "step": 32318 - }, - { - "epoch": 16.854758800521513, - "grad_norm": 1.5061482191085815, - "learning_rate": 6.803216080402011e-05, - "loss": 5.5036, - "step": 32319 - }, - { - "epoch": 16.855280312907432, - "grad_norm": 1.5042190551757812, - "learning_rate": 6.803115577889448e-05, - "loss": 4.8788, - "step": 32320 - }, - { - "epoch": 16.85580182529335, - "grad_norm": 1.4566477537155151, - "learning_rate": 6.803015075376885e-05, - "loss": 5.5967, - "step": 32321 - }, - { - "epoch": 16.85632333767927, - "grad_norm": 1.5195703506469727, - "learning_rate": 6.802914572864322e-05, - "loss": 5.5489, - "step": 32322 - }, - { - "epoch": 16.85684485006519, - "grad_norm": 1.4620803594589233, - "learning_rate": 6.80281407035176e-05, - "loss": 5.4269, - "step": 32323 - }, - { - "epoch": 16.85736636245111, - "grad_norm": 1.5707426071166992, - "learning_rate": 6.802713567839196e-05, - "loss": 5.123, - "step": 32324 - }, - { - "epoch": 16.857887874837026, - "grad_norm": 1.450875163078308, - "learning_rate": 6.802613065326634e-05, - "loss": 5.3864, - "step": 32325 - }, - { - "epoch": 16.858409387222945, - "grad_norm": 1.4025403261184692, - "learning_rate": 6.80251256281407e-05, - "loss": 5.7229, - "step": 32326 - }, - { - "epoch": 16.858930899608865, - "grad_norm": 1.6100084781646729, - "learning_rate": 6.802412060301507e-05, - "loss": 5.0825, - "step": 32327 - }, - { - "epoch": 16.859452411994784, - "grad_norm": 1.498835563659668, - "learning_rate": 6.802311557788944e-05, - "loss": 5.3448, - "step": 32328 - }, - { - "epoch": 16.859973924380704, - "grad_norm": 1.4917734861373901, - "learning_rate": 6.802211055276382e-05, - "loss": 5.1804, - "step": 32329 - }, - { - "epoch": 16.860495436766623, - "grad_norm": 1.5474884510040283, - "learning_rate": 6.80211055276382e-05, - "loss": 4.9646, - "step": 32330 - }, - { - "epoch": 16.861016949152543, - "grad_norm": 1.5999699831008911, - "learning_rate": 6.802010050251256e-05, - "loss": 5.3083, - "step": 32331 - }, - { - "epoch": 16.861538461538462, - "grad_norm": 1.524290919303894, - "learning_rate": 6.801909547738694e-05, - "loss": 5.7092, - "step": 32332 - }, - { - "epoch": 16.86205997392438, - "grad_norm": 1.4705445766448975, - "learning_rate": 6.801809045226131e-05, - "loss": 5.32, - "step": 32333 - }, - { - "epoch": 16.8625814863103, - "grad_norm": 1.531370997428894, - "learning_rate": 6.801708542713568e-05, - "loss": 5.0256, - "step": 32334 - }, - { - "epoch": 16.86310299869622, - "grad_norm": 1.4705036878585815, - "learning_rate": 6.801608040201005e-05, - "loss": 5.2815, - "step": 32335 - }, - { - "epoch": 16.86362451108214, - "grad_norm": 1.4154504537582397, - "learning_rate": 6.801507537688443e-05, - "loss": 5.7015, - "step": 32336 - }, - { - "epoch": 16.864146023468056, - "grad_norm": 1.4681625366210938, - "learning_rate": 6.801407035175879e-05, - "loss": 5.0413, - "step": 32337 - }, - { - "epoch": 16.864667535853975, - "grad_norm": 1.5421240329742432, - "learning_rate": 6.801306532663317e-05, - "loss": 5.3368, - "step": 32338 - }, - { - "epoch": 16.865189048239895, - "grad_norm": 1.5045236349105835, - "learning_rate": 6.801206030150755e-05, - "loss": 5.4863, - "step": 32339 - }, - { - "epoch": 16.865710560625814, - "grad_norm": 1.504016637802124, - "learning_rate": 6.801105527638191e-05, - "loss": 5.5495, - "step": 32340 - }, - { - "epoch": 16.866232073011734, - "grad_norm": 1.4321191310882568, - "learning_rate": 6.801005025125629e-05, - "loss": 5.5052, - "step": 32341 - }, - { - "epoch": 16.866753585397653, - "grad_norm": 1.4413570165634155, - "learning_rate": 6.800904522613065e-05, - "loss": 5.5977, - "step": 32342 - }, - { - "epoch": 16.867275097783573, - "grad_norm": 1.4629067182540894, - "learning_rate": 6.800804020100503e-05, - "loss": 5.2107, - "step": 32343 - }, - { - "epoch": 16.867796610169492, - "grad_norm": 1.4168341159820557, - "learning_rate": 6.80070351758794e-05, - "loss": 5.6439, - "step": 32344 - }, - { - "epoch": 16.86831812255541, - "grad_norm": 1.4293676614761353, - "learning_rate": 6.800603015075377e-05, - "loss": 5.6695, - "step": 32345 - }, - { - "epoch": 16.86883963494133, - "grad_norm": 1.4909430742263794, - "learning_rate": 6.800502512562814e-05, - "loss": 5.4145, - "step": 32346 - }, - { - "epoch": 16.86936114732725, - "grad_norm": 1.5386234521865845, - "learning_rate": 6.800402010050252e-05, - "loss": 5.3797, - "step": 32347 - }, - { - "epoch": 16.86988265971317, - "grad_norm": 1.4167425632476807, - "learning_rate": 6.800301507537688e-05, - "loss": 5.313, - "step": 32348 - }, - { - "epoch": 16.870404172099086, - "grad_norm": 1.4678267240524292, - "learning_rate": 6.800201005025126e-05, - "loss": 5.2998, - "step": 32349 - }, - { - "epoch": 16.870925684485005, - "grad_norm": 1.3805166482925415, - "learning_rate": 6.800100502512564e-05, - "loss": 5.5877, - "step": 32350 - }, - { - "epoch": 16.871447196870925, - "grad_norm": 1.5353916883468628, - "learning_rate": 6.800000000000001e-05, - "loss": 5.0314, - "step": 32351 - }, - { - "epoch": 16.871968709256844, - "grad_norm": 1.4218164682388306, - "learning_rate": 6.799899497487438e-05, - "loss": 5.3752, - "step": 32352 - }, - { - "epoch": 16.872490221642764, - "grad_norm": 1.5239075422286987, - "learning_rate": 6.799798994974874e-05, - "loss": 5.5058, - "step": 32353 - }, - { - "epoch": 16.873011734028683, - "grad_norm": 1.4978108406066895, - "learning_rate": 6.799698492462312e-05, - "loss": 5.4224, - "step": 32354 - }, - { - "epoch": 16.873533246414603, - "grad_norm": 1.5130846500396729, - "learning_rate": 6.799597989949748e-05, - "loss": 5.5021, - "step": 32355 - }, - { - "epoch": 16.874054758800522, - "grad_norm": 1.3958134651184082, - "learning_rate": 6.799497487437186e-05, - "loss": 5.2377, - "step": 32356 - }, - { - "epoch": 16.87457627118644, - "grad_norm": 1.5282251834869385, - "learning_rate": 6.799396984924623e-05, - "loss": 4.5441, - "step": 32357 - }, - { - "epoch": 16.87509778357236, - "grad_norm": 1.4294945001602173, - "learning_rate": 6.79929648241206e-05, - "loss": 5.8107, - "step": 32358 - }, - { - "epoch": 16.87561929595828, - "grad_norm": 1.5477867126464844, - "learning_rate": 6.799195979899498e-05, - "loss": 5.2826, - "step": 32359 - }, - { - "epoch": 16.876140808344196, - "grad_norm": 1.4709736108779907, - "learning_rate": 6.799095477386936e-05, - "loss": 5.34, - "step": 32360 - }, - { - "epoch": 16.876662320730116, - "grad_norm": 1.5822782516479492, - "learning_rate": 6.798994974874372e-05, - "loss": 5.1394, - "step": 32361 - }, - { - "epoch": 16.877183833116035, - "grad_norm": 1.4112669229507446, - "learning_rate": 6.79889447236181e-05, - "loss": 5.4785, - "step": 32362 - }, - { - "epoch": 16.877705345501955, - "grad_norm": 1.5523914098739624, - "learning_rate": 6.798793969849247e-05, - "loss": 5.1709, - "step": 32363 - }, - { - "epoch": 16.878226857887874, - "grad_norm": 1.41044282913208, - "learning_rate": 6.798693467336684e-05, - "loss": 5.6549, - "step": 32364 - }, - { - "epoch": 16.878748370273794, - "grad_norm": 1.4799189567565918, - "learning_rate": 6.798592964824121e-05, - "loss": 5.4895, - "step": 32365 - }, - { - "epoch": 16.879269882659713, - "grad_norm": 1.428674340248108, - "learning_rate": 6.798492462311557e-05, - "loss": 5.5695, - "step": 32366 - }, - { - "epoch": 16.879791395045633, - "grad_norm": 1.3389296531677246, - "learning_rate": 6.798391959798995e-05, - "loss": 5.7833, - "step": 32367 - }, - { - "epoch": 16.880312907431552, - "grad_norm": 1.4089906215667725, - "learning_rate": 6.798291457286431e-05, - "loss": 5.1084, - "step": 32368 - }, - { - "epoch": 16.88083441981747, - "grad_norm": 1.5674139261245728, - "learning_rate": 6.798190954773869e-05, - "loss": 5.8572, - "step": 32369 - }, - { - "epoch": 16.88135593220339, - "grad_norm": 1.4656537771224976, - "learning_rate": 6.798090452261307e-05, - "loss": 5.4551, - "step": 32370 - }, - { - "epoch": 16.88187744458931, - "grad_norm": 1.4765628576278687, - "learning_rate": 6.797989949748745e-05, - "loss": 5.3568, - "step": 32371 - }, - { - "epoch": 16.88239895697523, - "grad_norm": 1.5358625650405884, - "learning_rate": 6.797889447236181e-05, - "loss": 5.1087, - "step": 32372 - }, - { - "epoch": 16.882920469361146, - "grad_norm": 1.393275260925293, - "learning_rate": 6.797788944723619e-05, - "loss": 5.8279, - "step": 32373 - }, - { - "epoch": 16.883441981747065, - "grad_norm": 1.5468108654022217, - "learning_rate": 6.797688442211055e-05, - "loss": 5.0335, - "step": 32374 - }, - { - "epoch": 16.883963494132985, - "grad_norm": 1.4667243957519531, - "learning_rate": 6.797587939698493e-05, - "loss": 4.9925, - "step": 32375 - }, - { - "epoch": 16.884485006518904, - "grad_norm": 1.502632737159729, - "learning_rate": 6.79748743718593e-05, - "loss": 5.0556, - "step": 32376 - }, - { - "epoch": 16.885006518904824, - "grad_norm": 1.4619288444519043, - "learning_rate": 6.797386934673367e-05, - "loss": 5.7896, - "step": 32377 - }, - { - "epoch": 16.885528031290743, - "grad_norm": 1.46075439453125, - "learning_rate": 6.797286432160804e-05, - "loss": 5.3907, - "step": 32378 - }, - { - "epoch": 16.886049543676663, - "grad_norm": 1.5204224586486816, - "learning_rate": 6.797185929648242e-05, - "loss": 5.2373, - "step": 32379 - }, - { - "epoch": 16.886571056062582, - "grad_norm": 1.5632343292236328, - "learning_rate": 6.79708542713568e-05, - "loss": 5.467, - "step": 32380 - }, - { - "epoch": 16.8870925684485, - "grad_norm": 1.563793659210205, - "learning_rate": 6.796984924623116e-05, - "loss": 5.5479, - "step": 32381 - }, - { - "epoch": 16.88761408083442, - "grad_norm": 1.5477232933044434, - "learning_rate": 6.796884422110554e-05, - "loss": 5.3936, - "step": 32382 - }, - { - "epoch": 16.88813559322034, - "grad_norm": 1.4253052473068237, - "learning_rate": 6.79678391959799e-05, - "loss": 5.4299, - "step": 32383 - }, - { - "epoch": 16.888657105606256, - "grad_norm": 1.4054404497146606, - "learning_rate": 6.796683417085428e-05, - "loss": 5.505, - "step": 32384 - }, - { - "epoch": 16.889178617992176, - "grad_norm": 1.4547144174575806, - "learning_rate": 6.796582914572864e-05, - "loss": 5.3758, - "step": 32385 - }, - { - "epoch": 16.889700130378095, - "grad_norm": 1.3562346696853638, - "learning_rate": 6.796482412060302e-05, - "loss": 5.4331, - "step": 32386 - }, - { - "epoch": 16.890221642764015, - "grad_norm": 1.5622116327285767, - "learning_rate": 6.796381909547738e-05, - "loss": 5.2867, - "step": 32387 - }, - { - "epoch": 16.890743155149934, - "grad_norm": 1.6270904541015625, - "learning_rate": 6.796281407035176e-05, - "loss": 5.2917, - "step": 32388 - }, - { - "epoch": 16.891264667535854, - "grad_norm": 1.52131986618042, - "learning_rate": 6.796180904522613e-05, - "loss": 5.3574, - "step": 32389 - }, - { - "epoch": 16.891786179921773, - "grad_norm": 1.5438766479492188, - "learning_rate": 6.79608040201005e-05, - "loss": 4.9773, - "step": 32390 - }, - { - "epoch": 16.892307692307693, - "grad_norm": 1.4843555688858032, - "learning_rate": 6.795979899497488e-05, - "loss": 5.2643, - "step": 32391 - }, - { - "epoch": 16.892829204693612, - "grad_norm": 1.5478272438049316, - "learning_rate": 6.795879396984926e-05, - "loss": 4.9886, - "step": 32392 - }, - { - "epoch": 16.89335071707953, - "grad_norm": 1.5448975563049316, - "learning_rate": 6.795778894472362e-05, - "loss": 5.5898, - "step": 32393 - }, - { - "epoch": 16.89387222946545, - "grad_norm": 1.4098066091537476, - "learning_rate": 6.795678391959799e-05, - "loss": 5.4836, - "step": 32394 - }, - { - "epoch": 16.89439374185137, - "grad_norm": 1.670881986618042, - "learning_rate": 6.795577889447237e-05, - "loss": 5.0142, - "step": 32395 - }, - { - "epoch": 16.894915254237286, - "grad_norm": 1.634682536125183, - "learning_rate": 6.795477386934673e-05, - "loss": 4.7982, - "step": 32396 - }, - { - "epoch": 16.895436766623206, - "grad_norm": 1.5118247270584106, - "learning_rate": 6.795376884422111e-05, - "loss": 5.4586, - "step": 32397 - }, - { - "epoch": 16.895958279009125, - "grad_norm": 1.450194001197815, - "learning_rate": 6.795276381909547e-05, - "loss": 5.1116, - "step": 32398 - }, - { - "epoch": 16.896479791395045, - "grad_norm": 1.4802435636520386, - "learning_rate": 6.795175879396985e-05, - "loss": 5.1491, - "step": 32399 - }, - { - "epoch": 16.897001303780964, - "grad_norm": 1.4285637140274048, - "learning_rate": 6.795075376884423e-05, - "loss": 5.1806, - "step": 32400 - }, - { - "epoch": 16.897522816166884, - "grad_norm": 1.5475327968597412, - "learning_rate": 6.79497487437186e-05, - "loss": 5.1658, - "step": 32401 - }, - { - "epoch": 16.898044328552803, - "grad_norm": 1.5188158750534058, - "learning_rate": 6.794874371859297e-05, - "loss": 5.2091, - "step": 32402 - }, - { - "epoch": 16.898565840938723, - "grad_norm": 1.4493523836135864, - "learning_rate": 6.794773869346735e-05, - "loss": 4.792, - "step": 32403 - }, - { - "epoch": 16.899087353324642, - "grad_norm": 1.564681053161621, - "learning_rate": 6.794673366834171e-05, - "loss": 4.2918, - "step": 32404 - }, - { - "epoch": 16.89960886571056, - "grad_norm": 1.5450899600982666, - "learning_rate": 6.794572864321609e-05, - "loss": 4.6961, - "step": 32405 - }, - { - "epoch": 16.90013037809648, - "grad_norm": 1.53985595703125, - "learning_rate": 6.794472361809045e-05, - "loss": 4.5179, - "step": 32406 - }, - { - "epoch": 16.9006518904824, - "grad_norm": 1.6042312383651733, - "learning_rate": 6.794371859296482e-05, - "loss": 5.0994, - "step": 32407 - }, - { - "epoch": 16.901173402868316, - "grad_norm": 1.499404788017273, - "learning_rate": 6.79427135678392e-05, - "loss": 4.9225, - "step": 32408 - }, - { - "epoch": 16.901694915254236, - "grad_norm": 1.4318393468856812, - "learning_rate": 6.794170854271356e-05, - "loss": 4.8485, - "step": 32409 - }, - { - "epoch": 16.902216427640155, - "grad_norm": 1.575245976448059, - "learning_rate": 6.794070351758794e-05, - "loss": 4.7498, - "step": 32410 - }, - { - "epoch": 16.902737940026075, - "grad_norm": 1.523076057434082, - "learning_rate": 6.793969849246232e-05, - "loss": 4.8459, - "step": 32411 - }, - { - "epoch": 16.903259452411994, - "grad_norm": 1.3801064491271973, - "learning_rate": 6.79386934673367e-05, - "loss": 5.6031, - "step": 32412 - }, - { - "epoch": 16.903780964797914, - "grad_norm": 1.5999196767807007, - "learning_rate": 6.793768844221106e-05, - "loss": 5.4116, - "step": 32413 - }, - { - "epoch": 16.904302477183833, - "grad_norm": 1.5234853029251099, - "learning_rate": 6.793668341708544e-05, - "loss": 5.3604, - "step": 32414 - }, - { - "epoch": 16.904823989569753, - "grad_norm": 1.4929800033569336, - "learning_rate": 6.79356783919598e-05, - "loss": 4.963, - "step": 32415 - }, - { - "epoch": 16.905345501955672, - "grad_norm": 1.4898545742034912, - "learning_rate": 6.793467336683418e-05, - "loss": 5.3165, - "step": 32416 - }, - { - "epoch": 16.90586701434159, - "grad_norm": 1.5031137466430664, - "learning_rate": 6.793366834170854e-05, - "loss": 5.2267, - "step": 32417 - }, - { - "epoch": 16.90638852672751, - "grad_norm": 1.5518840551376343, - "learning_rate": 6.793266331658292e-05, - "loss": 5.5924, - "step": 32418 - }, - { - "epoch": 16.90691003911343, - "grad_norm": 1.4476866722106934, - "learning_rate": 6.793165829145729e-05, - "loss": 5.6544, - "step": 32419 - }, - { - "epoch": 16.907431551499347, - "grad_norm": 1.3894962072372437, - "learning_rate": 6.793065326633166e-05, - "loss": 5.3898, - "step": 32420 - }, - { - "epoch": 16.907953063885266, - "grad_norm": 1.4671924114227295, - "learning_rate": 6.792964824120604e-05, - "loss": 5.3894, - "step": 32421 - }, - { - "epoch": 16.908474576271185, - "grad_norm": 1.380654215812683, - "learning_rate": 6.79286432160804e-05, - "loss": 5.1498, - "step": 32422 - }, - { - "epoch": 16.908996088657105, - "grad_norm": 1.6227922439575195, - "learning_rate": 6.792763819095478e-05, - "loss": 4.993, - "step": 32423 - }, - { - "epoch": 16.909517601043024, - "grad_norm": 1.4840996265411377, - "learning_rate": 6.792663316582915e-05, - "loss": 4.6775, - "step": 32424 - }, - { - "epoch": 16.910039113428944, - "grad_norm": 1.6057088375091553, - "learning_rate": 6.792562814070353e-05, - "loss": 5.5235, - "step": 32425 - }, - { - "epoch": 16.910560625814863, - "grad_norm": 1.524800181388855, - "learning_rate": 6.792462311557789e-05, - "loss": 5.0843, - "step": 32426 - }, - { - "epoch": 16.911082138200783, - "grad_norm": 1.4572134017944336, - "learning_rate": 6.792361809045227e-05, - "loss": 5.5798, - "step": 32427 - }, - { - "epoch": 16.911603650586702, - "grad_norm": 1.4275416135787964, - "learning_rate": 6.792261306532663e-05, - "loss": 5.6972, - "step": 32428 - }, - { - "epoch": 16.91212516297262, - "grad_norm": 1.4458866119384766, - "learning_rate": 6.792160804020101e-05, - "loss": 5.2846, - "step": 32429 - }, - { - "epoch": 16.91264667535854, - "grad_norm": 1.3837119340896606, - "learning_rate": 6.792060301507537e-05, - "loss": 5.5417, - "step": 32430 - }, - { - "epoch": 16.91316818774446, - "grad_norm": 1.4822882413864136, - "learning_rate": 6.791959798994975e-05, - "loss": 5.3521, - "step": 32431 - }, - { - "epoch": 16.913689700130377, - "grad_norm": 1.5889843702316284, - "learning_rate": 6.791859296482413e-05, - "loss": 4.9364, - "step": 32432 - }, - { - "epoch": 16.914211212516296, - "grad_norm": 1.3969556093215942, - "learning_rate": 6.79175879396985e-05, - "loss": 5.4135, - "step": 32433 - }, - { - "epoch": 16.914732724902215, - "grad_norm": 1.4677510261535645, - "learning_rate": 6.791658291457287e-05, - "loss": 4.9149, - "step": 32434 - }, - { - "epoch": 16.915254237288135, - "grad_norm": 1.4858578443527222, - "learning_rate": 6.791557788944724e-05, - "loss": 5.4426, - "step": 32435 - }, - { - "epoch": 16.915775749674054, - "grad_norm": 1.5129386186599731, - "learning_rate": 6.791457286432161e-05, - "loss": 5.1704, - "step": 32436 - }, - { - "epoch": 16.916297262059974, - "grad_norm": 1.4411780834197998, - "learning_rate": 6.791356783919598e-05, - "loss": 5.5826, - "step": 32437 - }, - { - "epoch": 16.916818774445893, - "grad_norm": 1.6230751276016235, - "learning_rate": 6.791256281407036e-05, - "loss": 5.1289, - "step": 32438 - }, - { - "epoch": 16.917340286831813, - "grad_norm": 1.428541898727417, - "learning_rate": 6.791155778894472e-05, - "loss": 5.628, - "step": 32439 - }, - { - "epoch": 16.917861799217732, - "grad_norm": 1.3998253345489502, - "learning_rate": 6.79105527638191e-05, - "loss": 5.7819, - "step": 32440 - }, - { - "epoch": 16.91838331160365, - "grad_norm": 1.402811050415039, - "learning_rate": 6.790954773869346e-05, - "loss": 5.2663, - "step": 32441 - }, - { - "epoch": 16.91890482398957, - "grad_norm": 1.4845298528671265, - "learning_rate": 6.790854271356784e-05, - "loss": 4.8641, - "step": 32442 - }, - { - "epoch": 16.919426336375487, - "grad_norm": 1.4814549684524536, - "learning_rate": 6.790753768844222e-05, - "loss": 5.4626, - "step": 32443 - }, - { - "epoch": 16.919947848761407, - "grad_norm": 1.3921464681625366, - "learning_rate": 6.79065326633166e-05, - "loss": 5.7124, - "step": 32444 - }, - { - "epoch": 16.920469361147326, - "grad_norm": 1.3104885816574097, - "learning_rate": 6.790552763819096e-05, - "loss": 5.759, - "step": 32445 - }, - { - "epoch": 16.920990873533245, - "grad_norm": 1.507103681564331, - "learning_rate": 6.790452261306532e-05, - "loss": 5.0896, - "step": 32446 - }, - { - "epoch": 16.921512385919165, - "grad_norm": 1.6472971439361572, - "learning_rate": 6.79035175879397e-05, - "loss": 5.159, - "step": 32447 - }, - { - "epoch": 16.922033898305084, - "grad_norm": 1.6453166007995605, - "learning_rate": 6.790251256281407e-05, - "loss": 5.6292, - "step": 32448 - }, - { - "epoch": 16.922555410691004, - "grad_norm": 1.6022409200668335, - "learning_rate": 6.790150753768844e-05, - "loss": 5.1275, - "step": 32449 - }, - { - "epoch": 16.923076923076923, - "grad_norm": 1.5402127504348755, - "learning_rate": 6.790050251256281e-05, - "loss": 5.3025, - "step": 32450 - }, - { - "epoch": 16.923598435462843, - "grad_norm": 1.6186796426773071, - "learning_rate": 6.789949748743719e-05, - "loss": 5.0808, - "step": 32451 - }, - { - "epoch": 16.924119947848762, - "grad_norm": 1.5059268474578857, - "learning_rate": 6.789849246231156e-05, - "loss": 5.5736, - "step": 32452 - }, - { - "epoch": 16.92464146023468, - "grad_norm": 1.47416090965271, - "learning_rate": 6.789748743718594e-05, - "loss": 5.4597, - "step": 32453 - }, - { - "epoch": 16.9251629726206, - "grad_norm": 1.5530303716659546, - "learning_rate": 6.78964824120603e-05, - "loss": 5.3305, - "step": 32454 - }, - { - "epoch": 16.92568448500652, - "grad_norm": 1.4411883354187012, - "learning_rate": 6.789547738693468e-05, - "loss": 5.6362, - "step": 32455 - }, - { - "epoch": 16.926205997392437, - "grad_norm": 1.4205687046051025, - "learning_rate": 6.789447236180905e-05, - "loss": 5.583, - "step": 32456 - }, - { - "epoch": 16.926727509778356, - "grad_norm": 1.5574069023132324, - "learning_rate": 6.789346733668343e-05, - "loss": 5.441, - "step": 32457 - }, - { - "epoch": 16.927249022164276, - "grad_norm": 1.4625929594039917, - "learning_rate": 6.789246231155779e-05, - "loss": 5.5049, - "step": 32458 - }, - { - "epoch": 16.927770534550195, - "grad_norm": 1.3688398599624634, - "learning_rate": 6.789145728643215e-05, - "loss": 5.557, - "step": 32459 - }, - { - "epoch": 16.928292046936114, - "grad_norm": 1.4979876279830933, - "learning_rate": 6.789045226130653e-05, - "loss": 5.0321, - "step": 32460 - }, - { - "epoch": 16.928813559322034, - "grad_norm": 1.5938234329223633, - "learning_rate": 6.78894472361809e-05, - "loss": 4.8214, - "step": 32461 - }, - { - "epoch": 16.929335071707953, - "grad_norm": 1.4277130365371704, - "learning_rate": 6.788844221105527e-05, - "loss": 5.6107, - "step": 32462 - }, - { - "epoch": 16.929856584093873, - "grad_norm": 1.4846256971359253, - "learning_rate": 6.788743718592965e-05, - "loss": 5.551, - "step": 32463 - }, - { - "epoch": 16.930378096479792, - "grad_norm": 1.4373703002929688, - "learning_rate": 6.788643216080403e-05, - "loss": 5.6521, - "step": 32464 - }, - { - "epoch": 16.93089960886571, - "grad_norm": 1.4478901624679565, - "learning_rate": 6.78854271356784e-05, - "loss": 5.5588, - "step": 32465 - }, - { - "epoch": 16.93142112125163, - "grad_norm": 1.5088919401168823, - "learning_rate": 6.788442211055277e-05, - "loss": 5.2031, - "step": 32466 - }, - { - "epoch": 16.931942633637547, - "grad_norm": 1.6543411016464233, - "learning_rate": 6.788341708542714e-05, - "loss": 4.6802, - "step": 32467 - }, - { - "epoch": 16.932464146023467, - "grad_norm": 1.4922834634780884, - "learning_rate": 6.788241206030151e-05, - "loss": 5.3909, - "step": 32468 - }, - { - "epoch": 16.932985658409386, - "grad_norm": 1.4276663064956665, - "learning_rate": 6.788140703517588e-05, - "loss": 5.5653, - "step": 32469 - }, - { - "epoch": 16.933507170795306, - "grad_norm": 1.6414177417755127, - "learning_rate": 6.788040201005026e-05, - "loss": 5.0289, - "step": 32470 - }, - { - "epoch": 16.934028683181225, - "grad_norm": 1.3962595462799072, - "learning_rate": 6.787939698492462e-05, - "loss": 5.4565, - "step": 32471 - }, - { - "epoch": 16.934550195567144, - "grad_norm": 1.5477362871170044, - "learning_rate": 6.7878391959799e-05, - "loss": 5.0547, - "step": 32472 - }, - { - "epoch": 16.935071707953064, - "grad_norm": 1.51752769947052, - "learning_rate": 6.787738693467338e-05, - "loss": 5.2489, - "step": 32473 - }, - { - "epoch": 16.935593220338983, - "grad_norm": 1.4608659744262695, - "learning_rate": 6.787638190954774e-05, - "loss": 5.6711, - "step": 32474 - }, - { - "epoch": 16.936114732724903, - "grad_norm": 1.3969889879226685, - "learning_rate": 6.787537688442212e-05, - "loss": 5.6891, - "step": 32475 - }, - { - "epoch": 16.936636245110822, - "grad_norm": 1.4909021854400635, - "learning_rate": 6.787437185929648e-05, - "loss": 5.5743, - "step": 32476 - }, - { - "epoch": 16.937157757496742, - "grad_norm": 1.5796184539794922, - "learning_rate": 6.787336683417086e-05, - "loss": 5.3939, - "step": 32477 - }, - { - "epoch": 16.93767926988266, - "grad_norm": 1.506629228591919, - "learning_rate": 6.787236180904522e-05, - "loss": 5.3945, - "step": 32478 - }, - { - "epoch": 16.938200782268577, - "grad_norm": 1.454669713973999, - "learning_rate": 6.78713567839196e-05, - "loss": 5.5389, - "step": 32479 - }, - { - "epoch": 16.938722294654497, - "grad_norm": 1.384960651397705, - "learning_rate": 6.787035175879397e-05, - "loss": 5.3641, - "step": 32480 - }, - { - "epoch": 16.939243807040416, - "grad_norm": 1.3818535804748535, - "learning_rate": 6.786934673366834e-05, - "loss": 5.3624, - "step": 32481 - }, - { - "epoch": 16.939765319426336, - "grad_norm": 1.7369868755340576, - "learning_rate": 6.786834170854271e-05, - "loss": 5.0155, - "step": 32482 - }, - { - "epoch": 16.940286831812255, - "grad_norm": 1.3935303688049316, - "learning_rate": 6.786733668341709e-05, - "loss": 5.8009, - "step": 32483 - }, - { - "epoch": 16.940808344198174, - "grad_norm": 1.5462543964385986, - "learning_rate": 6.786633165829146e-05, - "loss": 5.194, - "step": 32484 - }, - { - "epoch": 16.941329856584094, - "grad_norm": 1.3278051614761353, - "learning_rate": 6.786532663316584e-05, - "loss": 5.8406, - "step": 32485 - }, - { - "epoch": 16.941851368970013, - "grad_norm": 1.5530050992965698, - "learning_rate": 6.78643216080402e-05, - "loss": 5.4171, - "step": 32486 - }, - { - "epoch": 16.942372881355933, - "grad_norm": 1.5688409805297852, - "learning_rate": 6.786331658291457e-05, - "loss": 5.4099, - "step": 32487 - }, - { - "epoch": 16.942894393741852, - "grad_norm": 1.7084956169128418, - "learning_rate": 6.786231155778895e-05, - "loss": 4.8713, - "step": 32488 - }, - { - "epoch": 16.943415906127772, - "grad_norm": 1.4010425806045532, - "learning_rate": 6.786130653266331e-05, - "loss": 5.5267, - "step": 32489 - }, - { - "epoch": 16.94393741851369, - "grad_norm": 1.5105950832366943, - "learning_rate": 6.786030150753769e-05, - "loss": 5.6022, - "step": 32490 - }, - { - "epoch": 16.944458930899607, - "grad_norm": 1.536176085472107, - "learning_rate": 6.785929648241206e-05, - "loss": 5.2281, - "step": 32491 - }, - { - "epoch": 16.944980443285527, - "grad_norm": 1.4717206954956055, - "learning_rate": 6.785829145728643e-05, - "loss": 5.3244, - "step": 32492 - }, - { - "epoch": 16.945501955671446, - "grad_norm": 1.4649834632873535, - "learning_rate": 6.785728643216081e-05, - "loss": 4.979, - "step": 32493 - }, - { - "epoch": 16.946023468057366, - "grad_norm": 1.569586992263794, - "learning_rate": 6.785628140703519e-05, - "loss": 5.109, - "step": 32494 - }, - { - "epoch": 16.946544980443285, - "grad_norm": 1.5353845357894897, - "learning_rate": 6.785527638190955e-05, - "loss": 5.5801, - "step": 32495 - }, - { - "epoch": 16.947066492829205, - "grad_norm": 1.583788275718689, - "learning_rate": 6.785427135678393e-05, - "loss": 5.4363, - "step": 32496 - }, - { - "epoch": 16.947588005215124, - "grad_norm": 1.5092730522155762, - "learning_rate": 6.78532663316583e-05, - "loss": 5.5068, - "step": 32497 - }, - { - "epoch": 16.948109517601043, - "grad_norm": 1.515787959098816, - "learning_rate": 6.785226130653267e-05, - "loss": 5.4917, - "step": 32498 - }, - { - "epoch": 16.948631029986963, - "grad_norm": 1.4549691677093506, - "learning_rate": 6.785125628140704e-05, - "loss": 5.3303, - "step": 32499 - }, - { - "epoch": 16.949152542372882, - "grad_norm": 1.4001710414886475, - "learning_rate": 6.78502512562814e-05, - "loss": 5.65, - "step": 32500 - }, - { - "epoch": 16.949674054758802, - "grad_norm": 1.6653918027877808, - "learning_rate": 6.784924623115578e-05, - "loss": 5.0974, - "step": 32501 - }, - { - "epoch": 16.95019556714472, - "grad_norm": 1.4158194065093994, - "learning_rate": 6.784824120603014e-05, - "loss": 5.6024, - "step": 32502 - }, - { - "epoch": 16.950717079530637, - "grad_norm": 1.4104406833648682, - "learning_rate": 6.784723618090452e-05, - "loss": 5.5611, - "step": 32503 - }, - { - "epoch": 16.951238591916557, - "grad_norm": 1.5437138080596924, - "learning_rate": 6.78462311557789e-05, - "loss": 5.1861, - "step": 32504 - }, - { - "epoch": 16.951760104302476, - "grad_norm": 1.5005415678024292, - "learning_rate": 6.784522613065328e-05, - "loss": 4.9843, - "step": 32505 - }, - { - "epoch": 16.952281616688396, - "grad_norm": 1.4291985034942627, - "learning_rate": 6.784422110552764e-05, - "loss": 5.4527, - "step": 32506 - }, - { - "epoch": 16.952803129074315, - "grad_norm": 1.512047529220581, - "learning_rate": 6.784321608040202e-05, - "loss": 5.0667, - "step": 32507 - }, - { - "epoch": 16.953324641460235, - "grad_norm": 1.4717110395431519, - "learning_rate": 6.784221105527638e-05, - "loss": 5.2194, - "step": 32508 - }, - { - "epoch": 16.953846153846154, - "grad_norm": 1.4788250923156738, - "learning_rate": 6.784120603015076e-05, - "loss": 4.8092, - "step": 32509 - }, - { - "epoch": 16.954367666232073, - "grad_norm": 1.4280763864517212, - "learning_rate": 6.784020100502513e-05, - "loss": 5.744, - "step": 32510 - }, - { - "epoch": 16.954889178617993, - "grad_norm": 1.4562805891036987, - "learning_rate": 6.78391959798995e-05, - "loss": 5.3434, - "step": 32511 - }, - { - "epoch": 16.955410691003912, - "grad_norm": 1.5127441883087158, - "learning_rate": 6.783819095477387e-05, - "loss": 5.4389, - "step": 32512 - }, - { - "epoch": 16.955932203389832, - "grad_norm": 1.4529786109924316, - "learning_rate": 6.783718592964825e-05, - "loss": 5.8001, - "step": 32513 - }, - { - "epoch": 16.95645371577575, - "grad_norm": 1.5164039134979248, - "learning_rate": 6.783618090452262e-05, - "loss": 5.0905, - "step": 32514 - }, - { - "epoch": 16.956975228161667, - "grad_norm": 1.543525218963623, - "learning_rate": 6.783517587939699e-05, - "loss": 5.0411, - "step": 32515 - }, - { - "epoch": 16.957496740547587, - "grad_norm": 1.50077486038208, - "learning_rate": 6.783417085427137e-05, - "loss": 5.1831, - "step": 32516 - }, - { - "epoch": 16.958018252933506, - "grad_norm": 1.497809886932373, - "learning_rate": 6.783316582914573e-05, - "loss": 5.6489, - "step": 32517 - }, - { - "epoch": 16.958539765319426, - "grad_norm": 1.4587361812591553, - "learning_rate": 6.783216080402011e-05, - "loss": 5.2305, - "step": 32518 - }, - { - "epoch": 16.959061277705345, - "grad_norm": 1.5162676572799683, - "learning_rate": 6.783115577889447e-05, - "loss": 5.5771, - "step": 32519 - }, - { - "epoch": 16.959582790091265, - "grad_norm": 1.3966959714889526, - "learning_rate": 6.783015075376885e-05, - "loss": 5.6773, - "step": 32520 - }, - { - "epoch": 16.960104302477184, - "grad_norm": 1.513175368309021, - "learning_rate": 6.782914572864321e-05, - "loss": 5.1244, - "step": 32521 - }, - { - "epoch": 16.960625814863103, - "grad_norm": 1.63186514377594, - "learning_rate": 6.782814070351759e-05, - "loss": 4.8423, - "step": 32522 - }, - { - "epoch": 16.961147327249023, - "grad_norm": 1.5202438831329346, - "learning_rate": 6.782713567839196e-05, - "loss": 4.8323, - "step": 32523 - }, - { - "epoch": 16.961668839634942, - "grad_norm": 1.5268582105636597, - "learning_rate": 6.782613065326633e-05, - "loss": 5.4536, - "step": 32524 - }, - { - "epoch": 16.962190352020862, - "grad_norm": 1.4350453615188599, - "learning_rate": 6.782512562814071e-05, - "loss": 5.5015, - "step": 32525 - }, - { - "epoch": 16.96271186440678, - "grad_norm": 1.4945721626281738, - "learning_rate": 6.782412060301508e-05, - "loss": 5.6765, - "step": 32526 - }, - { - "epoch": 16.963233376792697, - "grad_norm": 1.450631856918335, - "learning_rate": 6.782311557788945e-05, - "loss": 5.4143, - "step": 32527 - }, - { - "epoch": 16.963754889178617, - "grad_norm": 1.5579357147216797, - "learning_rate": 6.782211055276382e-05, - "loss": 5.1957, - "step": 32528 - }, - { - "epoch": 16.964276401564536, - "grad_norm": 1.4341241121292114, - "learning_rate": 6.78211055276382e-05, - "loss": 5.6335, - "step": 32529 - }, - { - "epoch": 16.964797913950456, - "grad_norm": 1.431963562965393, - "learning_rate": 6.782010050251256e-05, - "loss": 5.4324, - "step": 32530 - }, - { - "epoch": 16.965319426336375, - "grad_norm": 1.5753487348556519, - "learning_rate": 6.781909547738694e-05, - "loss": 5.2753, - "step": 32531 - }, - { - "epoch": 16.965840938722295, - "grad_norm": 1.5286152362823486, - "learning_rate": 6.78180904522613e-05, - "loss": 5.2421, - "step": 32532 - }, - { - "epoch": 16.966362451108214, - "grad_norm": 1.4572099447250366, - "learning_rate": 6.781708542713568e-05, - "loss": 5.3035, - "step": 32533 - }, - { - "epoch": 16.966883963494134, - "grad_norm": 1.5085810422897339, - "learning_rate": 6.781608040201006e-05, - "loss": 5.0268, - "step": 32534 - }, - { - "epoch": 16.967405475880053, - "grad_norm": 1.4976749420166016, - "learning_rate": 6.781507537688444e-05, - "loss": 5.0977, - "step": 32535 - }, - { - "epoch": 16.967926988265972, - "grad_norm": 1.4044971466064453, - "learning_rate": 6.78140703517588e-05, - "loss": 5.8287, - "step": 32536 - }, - { - "epoch": 16.968448500651892, - "grad_norm": 1.4583863019943237, - "learning_rate": 6.781306532663318e-05, - "loss": 4.9915, - "step": 32537 - }, - { - "epoch": 16.96897001303781, - "grad_norm": 1.5127754211425781, - "learning_rate": 6.781206030150754e-05, - "loss": 5.4736, - "step": 32538 - }, - { - "epoch": 16.969491525423727, - "grad_norm": 1.4333122968673706, - "learning_rate": 6.78110552763819e-05, - "loss": 4.8429, - "step": 32539 - }, - { - "epoch": 16.970013037809647, - "grad_norm": 1.4185161590576172, - "learning_rate": 6.781005025125628e-05, - "loss": 5.6353, - "step": 32540 - }, - { - "epoch": 16.970534550195566, - "grad_norm": 1.563348650932312, - "learning_rate": 6.780904522613065e-05, - "loss": 5.3613, - "step": 32541 - }, - { - "epoch": 16.971056062581486, - "grad_norm": 1.541674017906189, - "learning_rate": 6.780804020100503e-05, - "loss": 4.9248, - "step": 32542 - }, - { - "epoch": 16.971577574967405, - "grad_norm": 1.4914451837539673, - "learning_rate": 6.780703517587939e-05, - "loss": 5.4834, - "step": 32543 - }, - { - "epoch": 16.972099087353325, - "grad_norm": 1.5243611335754395, - "learning_rate": 6.780603015075377e-05, - "loss": 5.1817, - "step": 32544 - }, - { - "epoch": 16.972620599739244, - "grad_norm": 1.3938015699386597, - "learning_rate": 6.780502512562815e-05, - "loss": 5.5486, - "step": 32545 - }, - { - "epoch": 16.973142112125164, - "grad_norm": 1.7328482866287231, - "learning_rate": 6.780402010050252e-05, - "loss": 4.7936, - "step": 32546 - }, - { - "epoch": 16.973663624511083, - "grad_norm": 1.5499812364578247, - "learning_rate": 6.780301507537689e-05, - "loss": 5.2962, - "step": 32547 - }, - { - "epoch": 16.974185136897002, - "grad_norm": 1.503544569015503, - "learning_rate": 6.780201005025127e-05, - "loss": 4.9077, - "step": 32548 - }, - { - "epoch": 16.974706649282922, - "grad_norm": 1.590570330619812, - "learning_rate": 6.780100502512563e-05, - "loss": 4.3892, - "step": 32549 - }, - { - "epoch": 16.975228161668838, - "grad_norm": 1.6227085590362549, - "learning_rate": 6.780000000000001e-05, - "loss": 4.8424, - "step": 32550 - }, - { - "epoch": 16.975749674054757, - "grad_norm": 1.6105303764343262, - "learning_rate": 6.779899497487437e-05, - "loss": 5.0224, - "step": 32551 - }, - { - "epoch": 16.976271186440677, - "grad_norm": 1.442943811416626, - "learning_rate": 6.779798994974875e-05, - "loss": 5.1003, - "step": 32552 - }, - { - "epoch": 16.976792698826596, - "grad_norm": 1.5783569812774658, - "learning_rate": 6.779698492462311e-05, - "loss": 5.1149, - "step": 32553 - }, - { - "epoch": 16.977314211212516, - "grad_norm": 1.4210820198059082, - "learning_rate": 6.779597989949749e-05, - "loss": 5.5537, - "step": 32554 - }, - { - "epoch": 16.977835723598435, - "grad_norm": 1.5846909284591675, - "learning_rate": 6.779497487437187e-05, - "loss": 5.2629, - "step": 32555 - }, - { - "epoch": 16.978357235984355, - "grad_norm": 1.5144847631454468, - "learning_rate": 6.779396984924623e-05, - "loss": 4.8956, - "step": 32556 - }, - { - "epoch": 16.978878748370274, - "grad_norm": 1.4400230646133423, - "learning_rate": 6.779296482412061e-05, - "loss": 5.7281, - "step": 32557 - }, - { - "epoch": 16.979400260756194, - "grad_norm": 1.504347324371338, - "learning_rate": 6.779195979899498e-05, - "loss": 4.6207, - "step": 32558 - }, - { - "epoch": 16.979921773142113, - "grad_norm": 1.5274969339370728, - "learning_rate": 6.779095477386935e-05, - "loss": 5.1705, - "step": 32559 - }, - { - "epoch": 16.980443285528033, - "grad_norm": 1.4403496980667114, - "learning_rate": 6.778994974874372e-05, - "loss": 5.3427, - "step": 32560 - }, - { - "epoch": 16.980964797913952, - "grad_norm": 1.493645429611206, - "learning_rate": 6.77889447236181e-05, - "loss": 5.0072, - "step": 32561 - }, - { - "epoch": 16.98148631029987, - "grad_norm": 1.5158759355545044, - "learning_rate": 6.778793969849246e-05, - "loss": 5.1369, - "step": 32562 - }, - { - "epoch": 16.982007822685787, - "grad_norm": 1.5361461639404297, - "learning_rate": 6.778693467336684e-05, - "loss": 4.9126, - "step": 32563 - }, - { - "epoch": 16.982529335071707, - "grad_norm": 1.6894100904464722, - "learning_rate": 6.77859296482412e-05, - "loss": 4.8708, - "step": 32564 - }, - { - "epoch": 16.983050847457626, - "grad_norm": 1.4957232475280762, - "learning_rate": 6.778492462311558e-05, - "loss": 5.3323, - "step": 32565 - }, - { - "epoch": 16.983572359843546, - "grad_norm": 1.6225494146347046, - "learning_rate": 6.778391959798996e-05, - "loss": 5.5759, - "step": 32566 - }, - { - "epoch": 16.984093872229465, - "grad_norm": 1.4962069988250732, - "learning_rate": 6.778291457286432e-05, - "loss": 5.3217, - "step": 32567 - }, - { - "epoch": 16.984615384615385, - "grad_norm": 1.4344429969787598, - "learning_rate": 6.77819095477387e-05, - "loss": 5.487, - "step": 32568 - }, - { - "epoch": 16.985136897001304, - "grad_norm": 1.4702370166778564, - "learning_rate": 6.778090452261306e-05, - "loss": 5.1864, - "step": 32569 - }, - { - "epoch": 16.985658409387224, - "grad_norm": 1.547245979309082, - "learning_rate": 6.777989949748744e-05, - "loss": 5.3752, - "step": 32570 - }, - { - "epoch": 16.986179921773143, - "grad_norm": 1.5361382961273193, - "learning_rate": 6.777889447236181e-05, - "loss": 5.141, - "step": 32571 - }, - { - "epoch": 16.986701434159063, - "grad_norm": 1.3966479301452637, - "learning_rate": 6.777788944723618e-05, - "loss": 5.3503, - "step": 32572 - }, - { - "epoch": 16.987222946544982, - "grad_norm": 1.5505366325378418, - "learning_rate": 6.777688442211055e-05, - "loss": 5.2526, - "step": 32573 - }, - { - "epoch": 16.987744458930898, - "grad_norm": 1.4683705568313599, - "learning_rate": 6.777587939698493e-05, - "loss": 5.2492, - "step": 32574 - }, - { - "epoch": 16.988265971316817, - "grad_norm": 1.403518557548523, - "learning_rate": 6.77748743718593e-05, - "loss": 5.7132, - "step": 32575 - }, - { - "epoch": 16.988787483702737, - "grad_norm": 1.46780526638031, - "learning_rate": 6.777386934673368e-05, - "loss": 5.5735, - "step": 32576 - }, - { - "epoch": 16.989308996088656, - "grad_norm": 1.4418790340423584, - "learning_rate": 6.777286432160805e-05, - "loss": 5.2221, - "step": 32577 - }, - { - "epoch": 16.989830508474576, - "grad_norm": 1.4316335916519165, - "learning_rate": 6.777185929648242e-05, - "loss": 5.6872, - "step": 32578 - }, - { - "epoch": 16.990352020860495, - "grad_norm": 1.4341421127319336, - "learning_rate": 6.777085427135679e-05, - "loss": 5.8249, - "step": 32579 - }, - { - "epoch": 16.990873533246415, - "grad_norm": 1.5960557460784912, - "learning_rate": 6.776984924623115e-05, - "loss": 5.0144, - "step": 32580 - }, - { - "epoch": 16.991395045632334, - "grad_norm": 1.5283814668655396, - "learning_rate": 6.776884422110553e-05, - "loss": 5.6476, - "step": 32581 - }, - { - "epoch": 16.991916558018254, - "grad_norm": 1.6673715114593506, - "learning_rate": 6.77678391959799e-05, - "loss": 4.8409, - "step": 32582 - }, - { - "epoch": 16.992438070404173, - "grad_norm": 1.5923595428466797, - "learning_rate": 6.776683417085427e-05, - "loss": 5.3573, - "step": 32583 - }, - { - "epoch": 16.992959582790093, - "grad_norm": 1.4287127256393433, - "learning_rate": 6.776582914572864e-05, - "loss": 5.4149, - "step": 32584 - }, - { - "epoch": 16.993481095176012, - "grad_norm": 1.4457000494003296, - "learning_rate": 6.776482412060302e-05, - "loss": 5.6279, - "step": 32585 - }, - { - "epoch": 16.994002607561928, - "grad_norm": 1.5080779790878296, - "learning_rate": 6.776381909547739e-05, - "loss": 5.0433, - "step": 32586 - }, - { - "epoch": 16.994524119947847, - "grad_norm": 1.4879759550094604, - "learning_rate": 6.776281407035177e-05, - "loss": 5.1047, - "step": 32587 - }, - { - "epoch": 16.995045632333767, - "grad_norm": 1.4900236129760742, - "learning_rate": 6.776180904522614e-05, - "loss": 5.2436, - "step": 32588 - }, - { - "epoch": 16.995567144719686, - "grad_norm": 1.633459210395813, - "learning_rate": 6.776080402010051e-05, - "loss": 4.7248, - "step": 32589 - }, - { - "epoch": 16.996088657105606, - "grad_norm": 1.4386694431304932, - "learning_rate": 6.775979899497488e-05, - "loss": 5.2788, - "step": 32590 - }, - { - "epoch": 16.996610169491525, - "grad_norm": 1.4044768810272217, - "learning_rate": 6.775879396984926e-05, - "loss": 5.5121, - "step": 32591 - }, - { - "epoch": 16.997131681877445, - "grad_norm": 1.5841816663742065, - "learning_rate": 6.775778894472362e-05, - "loss": 5.2981, - "step": 32592 - }, - { - "epoch": 16.997653194263364, - "grad_norm": 1.3984915018081665, - "learning_rate": 6.775678391959798e-05, - "loss": 5.674, - "step": 32593 - }, - { - "epoch": 16.998174706649284, - "grad_norm": 1.5158419609069824, - "learning_rate": 6.775577889447236e-05, - "loss": 5.5053, - "step": 32594 - }, - { - "epoch": 16.998696219035203, - "grad_norm": 1.4182237386703491, - "learning_rate": 6.775477386934674e-05, - "loss": 5.4576, - "step": 32595 - }, - { - "epoch": 16.999217731421123, - "grad_norm": 1.425229787826538, - "learning_rate": 6.775376884422112e-05, - "loss": 5.5624, - "step": 32596 - }, - { - "epoch": 16.999739243807042, - "grad_norm": 1.5428942441940308, - "learning_rate": 6.775276381909548e-05, - "loss": 5.1246, - "step": 32597 - }, - { - "epoch": 17.000260756192958, - "grad_norm": 1.5670382976531982, - "learning_rate": 6.775175879396986e-05, - "loss": 5.4251, - "step": 32598 - }, - { - "epoch": 17.000782268578877, - "grad_norm": 1.5354171991348267, - "learning_rate": 6.775075376884422e-05, - "loss": 5.1945, - "step": 32599 - }, - { - "epoch": 17.001303780964797, - "grad_norm": 1.522490382194519, - "learning_rate": 6.77497487437186e-05, - "loss": 5.4182, - "step": 32600 - }, - { - "epoch": 17.001825293350716, - "grad_norm": 1.6278958320617676, - "learning_rate": 6.774874371859297e-05, - "loss": 4.8089, - "step": 32601 - }, - { - "epoch": 17.002346805736636, - "grad_norm": 1.4539995193481445, - "learning_rate": 6.774773869346734e-05, - "loss": 5.0148, - "step": 32602 - }, - { - "epoch": 17.002868318122555, - "grad_norm": 1.6232582330703735, - "learning_rate": 6.774673366834171e-05, - "loss": 4.9318, - "step": 32603 - }, - { - "epoch": 17.003389830508475, - "grad_norm": 1.519358515739441, - "learning_rate": 6.774572864321609e-05, - "loss": 5.1992, - "step": 32604 - }, - { - "epoch": 17.003911342894394, - "grad_norm": 1.586458444595337, - "learning_rate": 6.774472361809045e-05, - "loss": 5.3822, - "step": 32605 - }, - { - "epoch": 17.004432855280314, - "grad_norm": 1.4602147340774536, - "learning_rate": 6.774371859296483e-05, - "loss": 5.6345, - "step": 32606 - }, - { - "epoch": 17.004954367666233, - "grad_norm": 1.7083770036697388, - "learning_rate": 6.77427135678392e-05, - "loss": 4.621, - "step": 32607 - }, - { - "epoch": 17.005475880052153, - "grad_norm": 1.4725090265274048, - "learning_rate": 6.774170854271357e-05, - "loss": 4.8784, - "step": 32608 - }, - { - "epoch": 17.005997392438072, - "grad_norm": 1.5603736639022827, - "learning_rate": 6.774070351758795e-05, - "loss": 5.6224, - "step": 32609 - }, - { - "epoch": 17.006518904823988, - "grad_norm": 1.568301796913147, - "learning_rate": 6.773969849246231e-05, - "loss": 5.349, - "step": 32610 - }, - { - "epoch": 17.007040417209907, - "grad_norm": 1.4682643413543701, - "learning_rate": 6.773869346733669e-05, - "loss": 5.4426, - "step": 32611 - }, - { - "epoch": 17.007561929595827, - "grad_norm": 1.3911458253860474, - "learning_rate": 6.773768844221105e-05, - "loss": 5.5832, - "step": 32612 - }, - { - "epoch": 17.008083441981746, - "grad_norm": 1.457090139389038, - "learning_rate": 6.773668341708543e-05, - "loss": 5.5655, - "step": 32613 - }, - { - "epoch": 17.008604954367666, - "grad_norm": 1.5606069564819336, - "learning_rate": 6.77356783919598e-05, - "loss": 5.4483, - "step": 32614 - }, - { - "epoch": 17.009126466753585, - "grad_norm": 1.5327119827270508, - "learning_rate": 6.773467336683417e-05, - "loss": 5.3619, - "step": 32615 - }, - { - "epoch": 17.009647979139505, - "grad_norm": 1.5005731582641602, - "learning_rate": 6.773366834170854e-05, - "loss": 4.9634, - "step": 32616 - }, - { - "epoch": 17.010169491525424, - "grad_norm": 1.5539613962173462, - "learning_rate": 6.773266331658292e-05, - "loss": 5.4253, - "step": 32617 - }, - { - "epoch": 17.010691003911344, - "grad_norm": 1.4329841136932373, - "learning_rate": 6.77316582914573e-05, - "loss": 5.5229, - "step": 32618 - }, - { - "epoch": 17.011212516297263, - "grad_norm": 1.5503770112991333, - "learning_rate": 6.773065326633166e-05, - "loss": 5.6102, - "step": 32619 - }, - { - "epoch": 17.011734028683183, - "grad_norm": 1.4633795022964478, - "learning_rate": 6.772964824120604e-05, - "loss": 5.5581, - "step": 32620 - }, - { - "epoch": 17.012255541069102, - "grad_norm": 1.458422064781189, - "learning_rate": 6.77286432160804e-05, - "loss": 5.1167, - "step": 32621 - }, - { - "epoch": 17.012777053455018, - "grad_norm": 1.5293638706207275, - "learning_rate": 6.772763819095478e-05, - "loss": 5.1624, - "step": 32622 - }, - { - "epoch": 17.013298565840937, - "grad_norm": 1.5398755073547363, - "learning_rate": 6.772663316582914e-05, - "loss": 5.1968, - "step": 32623 - }, - { - "epoch": 17.013820078226857, - "grad_norm": 1.4639934301376343, - "learning_rate": 6.772562814070352e-05, - "loss": 5.3432, - "step": 32624 - }, - { - "epoch": 17.014341590612776, - "grad_norm": 1.4043803215026855, - "learning_rate": 6.772462311557788e-05, - "loss": 5.3961, - "step": 32625 - }, - { - "epoch": 17.014863102998696, - "grad_norm": 1.5539071559906006, - "learning_rate": 6.772361809045226e-05, - "loss": 5.2406, - "step": 32626 - }, - { - "epoch": 17.015384615384615, - "grad_norm": 1.4409493207931519, - "learning_rate": 6.772261306532664e-05, - "loss": 5.2237, - "step": 32627 - }, - { - "epoch": 17.015906127770535, - "grad_norm": 1.3490709066390991, - "learning_rate": 6.772160804020102e-05, - "loss": 5.5171, - "step": 32628 - }, - { - "epoch": 17.016427640156454, - "grad_norm": 1.5530462265014648, - "learning_rate": 6.772060301507538e-05, - "loss": 5.1377, - "step": 32629 - }, - { - "epoch": 17.016949152542374, - "grad_norm": 1.6804955005645752, - "learning_rate": 6.771959798994976e-05, - "loss": 4.8724, - "step": 32630 - }, - { - "epoch": 17.017470664928293, - "grad_norm": 1.6028428077697754, - "learning_rate": 6.771859296482412e-05, - "loss": 4.7949, - "step": 32631 - }, - { - "epoch": 17.017992177314213, - "grad_norm": 1.5633089542388916, - "learning_rate": 6.771758793969849e-05, - "loss": 5.1963, - "step": 32632 - }, - { - "epoch": 17.018513689700132, - "grad_norm": 1.562380313873291, - "learning_rate": 6.771658291457287e-05, - "loss": 5.3035, - "step": 32633 - }, - { - "epoch": 17.019035202086048, - "grad_norm": 1.6581627130508423, - "learning_rate": 6.771557788944723e-05, - "loss": 5.0388, - "step": 32634 - }, - { - "epoch": 17.019556714471967, - "grad_norm": 1.5315324068069458, - "learning_rate": 6.771457286432161e-05, - "loss": 5.6578, - "step": 32635 - }, - { - "epoch": 17.020078226857887, - "grad_norm": 1.5168949365615845, - "learning_rate": 6.771356783919597e-05, - "loss": 5.2445, - "step": 32636 - }, - { - "epoch": 17.020599739243806, - "grad_norm": 1.4864188432693481, - "learning_rate": 6.771256281407035e-05, - "loss": 5.685, - "step": 32637 - }, - { - "epoch": 17.021121251629726, - "grad_norm": 1.5467888116836548, - "learning_rate": 6.771155778894473e-05, - "loss": 5.2636, - "step": 32638 - }, - { - "epoch": 17.021642764015645, - "grad_norm": 1.3261765241622925, - "learning_rate": 6.77105527638191e-05, - "loss": 5.7187, - "step": 32639 - }, - { - "epoch": 17.022164276401565, - "grad_norm": 1.4192267656326294, - "learning_rate": 6.770954773869347e-05, - "loss": 5.4172, - "step": 32640 - }, - { - "epoch": 17.022685788787484, - "grad_norm": 1.3727198839187622, - "learning_rate": 6.770854271356785e-05, - "loss": 5.7233, - "step": 32641 - }, - { - "epoch": 17.023207301173404, - "grad_norm": 1.3780335187911987, - "learning_rate": 6.770753768844221e-05, - "loss": 5.4708, - "step": 32642 - }, - { - "epoch": 17.023728813559323, - "grad_norm": 1.4693769216537476, - "learning_rate": 6.770653266331659e-05, - "loss": 5.233, - "step": 32643 - }, - { - "epoch": 17.024250325945243, - "grad_norm": 1.5226670503616333, - "learning_rate": 6.770552763819095e-05, - "loss": 5.6181, - "step": 32644 - }, - { - "epoch": 17.02477183833116, - "grad_norm": 1.4405134916305542, - "learning_rate": 6.770452261306533e-05, - "loss": 5.5426, - "step": 32645 - }, - { - "epoch": 17.025293350717078, - "grad_norm": 1.4239792823791504, - "learning_rate": 6.77035175879397e-05, - "loss": 5.4821, - "step": 32646 - }, - { - "epoch": 17.025814863102998, - "grad_norm": 1.4079293012619019, - "learning_rate": 6.770251256281407e-05, - "loss": 5.6757, - "step": 32647 - }, - { - "epoch": 17.026336375488917, - "grad_norm": 1.458410382270813, - "learning_rate": 6.770150753768845e-05, - "loss": 5.264, - "step": 32648 - }, - { - "epoch": 17.026857887874836, - "grad_norm": 1.4556620121002197, - "learning_rate": 6.770050251256282e-05, - "loss": 5.4918, - "step": 32649 - }, - { - "epoch": 17.027379400260756, - "grad_norm": 1.4818741083145142, - "learning_rate": 6.76994974874372e-05, - "loss": 5.4618, - "step": 32650 - }, - { - "epoch": 17.027900912646675, - "grad_norm": 1.427308201789856, - "learning_rate": 6.769849246231156e-05, - "loss": 5.5824, - "step": 32651 - }, - { - "epoch": 17.028422425032595, - "grad_norm": 1.5307976007461548, - "learning_rate": 6.769748743718594e-05, - "loss": 5.3834, - "step": 32652 - }, - { - "epoch": 17.028943937418514, - "grad_norm": 1.4232450723648071, - "learning_rate": 6.76964824120603e-05, - "loss": 5.0242, - "step": 32653 - }, - { - "epoch": 17.029465449804434, - "grad_norm": 1.506750464439392, - "learning_rate": 6.769547738693468e-05, - "loss": 5.1017, - "step": 32654 - }, - { - "epoch": 17.029986962190353, - "grad_norm": 1.495019555091858, - "learning_rate": 6.769447236180904e-05, - "loss": 5.1299, - "step": 32655 - }, - { - "epoch": 17.030508474576273, - "grad_norm": 1.5037857294082642, - "learning_rate": 6.769346733668342e-05, - "loss": 5.1428, - "step": 32656 - }, - { - "epoch": 17.03102998696219, - "grad_norm": 1.382068157196045, - "learning_rate": 6.769246231155779e-05, - "loss": 5.3528, - "step": 32657 - }, - { - "epoch": 17.031551499348108, - "grad_norm": 1.5090495347976685, - "learning_rate": 6.769145728643216e-05, - "loss": 4.8587, - "step": 32658 - }, - { - "epoch": 17.032073011734028, - "grad_norm": 1.5756263732910156, - "learning_rate": 6.769045226130654e-05, - "loss": 5.2772, - "step": 32659 - }, - { - "epoch": 17.032594524119947, - "grad_norm": 1.432529330253601, - "learning_rate": 6.76894472361809e-05, - "loss": 5.2049, - "step": 32660 - }, - { - "epoch": 17.033116036505866, - "grad_norm": 1.6055145263671875, - "learning_rate": 6.768844221105528e-05, - "loss": 4.8575, - "step": 32661 - }, - { - "epoch": 17.033637548891786, - "grad_norm": 1.5106993913650513, - "learning_rate": 6.768743718592965e-05, - "loss": 5.4161, - "step": 32662 - }, - { - "epoch": 17.034159061277705, - "grad_norm": 1.5019325017929077, - "learning_rate": 6.768643216080403e-05, - "loss": 4.9665, - "step": 32663 - }, - { - "epoch": 17.034680573663625, - "grad_norm": 1.482788324356079, - "learning_rate": 6.768542713567839e-05, - "loss": 5.2746, - "step": 32664 - }, - { - "epoch": 17.035202086049544, - "grad_norm": 1.4645472764968872, - "learning_rate": 6.768442211055277e-05, - "loss": 4.9096, - "step": 32665 - }, - { - "epoch": 17.035723598435464, - "grad_norm": 1.536759376525879, - "learning_rate": 6.768341708542713e-05, - "loss": 5.1574, - "step": 32666 - }, - { - "epoch": 17.036245110821383, - "grad_norm": 1.5662240982055664, - "learning_rate": 6.768241206030151e-05, - "loss": 5.1911, - "step": 32667 - }, - { - "epoch": 17.036766623207303, - "grad_norm": 1.5822181701660156, - "learning_rate": 6.768140703517589e-05, - "loss": 5.0747, - "step": 32668 - }, - { - "epoch": 17.03728813559322, - "grad_norm": 1.4780006408691406, - "learning_rate": 6.768040201005026e-05, - "loss": 5.1508, - "step": 32669 - }, - { - "epoch": 17.037809647979138, - "grad_norm": 1.522508978843689, - "learning_rate": 6.767939698492463e-05, - "loss": 5.4859, - "step": 32670 - }, - { - "epoch": 17.038331160365058, - "grad_norm": 1.5402580499649048, - "learning_rate": 6.767839195979901e-05, - "loss": 5.2505, - "step": 32671 - }, - { - "epoch": 17.038852672750977, - "grad_norm": 1.5036377906799316, - "learning_rate": 6.767738693467337e-05, - "loss": 5.2457, - "step": 32672 - }, - { - "epoch": 17.039374185136897, - "grad_norm": 1.5307968854904175, - "learning_rate": 6.767638190954774e-05, - "loss": 5.2834, - "step": 32673 - }, - { - "epoch": 17.039895697522816, - "grad_norm": 1.4937106370925903, - "learning_rate": 6.767537688442211e-05, - "loss": 5.245, - "step": 32674 - }, - { - "epoch": 17.040417209908735, - "grad_norm": 1.3737032413482666, - "learning_rate": 6.767437185929648e-05, - "loss": 5.0971, - "step": 32675 - }, - { - "epoch": 17.040938722294655, - "grad_norm": 1.5311421155929565, - "learning_rate": 6.767336683417086e-05, - "loss": 5.1244, - "step": 32676 - }, - { - "epoch": 17.041460234680574, - "grad_norm": 1.4968583583831787, - "learning_rate": 6.767236180904522e-05, - "loss": 5.5738, - "step": 32677 - }, - { - "epoch": 17.041981747066494, - "grad_norm": 1.38335120677948, - "learning_rate": 6.76713567839196e-05, - "loss": 5.6969, - "step": 32678 - }, - { - "epoch": 17.042503259452413, - "grad_norm": 1.6288938522338867, - "learning_rate": 6.767035175879398e-05, - "loss": 5.2654, - "step": 32679 - }, - { - "epoch": 17.043024771838333, - "grad_norm": 1.5604724884033203, - "learning_rate": 6.766934673366835e-05, - "loss": 4.5048, - "step": 32680 - }, - { - "epoch": 17.04354628422425, - "grad_norm": 1.5755701065063477, - "learning_rate": 6.766834170854272e-05, - "loss": 4.9701, - "step": 32681 - }, - { - "epoch": 17.044067796610168, - "grad_norm": 1.5454376935958862, - "learning_rate": 6.76673366834171e-05, - "loss": 4.8289, - "step": 32682 - }, - { - "epoch": 17.044589308996088, - "grad_norm": 1.6236348152160645, - "learning_rate": 6.766633165829146e-05, - "loss": 4.7291, - "step": 32683 - }, - { - "epoch": 17.045110821382007, - "grad_norm": 1.4493699073791504, - "learning_rate": 6.766532663316584e-05, - "loss": 5.1784, - "step": 32684 - }, - { - "epoch": 17.045632333767927, - "grad_norm": 1.5046217441558838, - "learning_rate": 6.76643216080402e-05, - "loss": 5.361, - "step": 32685 - }, - { - "epoch": 17.046153846153846, - "grad_norm": 1.4661073684692383, - "learning_rate": 6.766331658291457e-05, - "loss": 5.584, - "step": 32686 - }, - { - "epoch": 17.046675358539765, - "grad_norm": 1.5264085531234741, - "learning_rate": 6.766231155778894e-05, - "loss": 5.313, - "step": 32687 - }, - { - "epoch": 17.047196870925685, - "grad_norm": 1.6515746116638184, - "learning_rate": 6.766130653266332e-05, - "loss": 4.8227, - "step": 32688 - }, - { - "epoch": 17.047718383311604, - "grad_norm": 1.4653654098510742, - "learning_rate": 6.76603015075377e-05, - "loss": 5.3798, - "step": 32689 - }, - { - "epoch": 17.048239895697524, - "grad_norm": 1.4337238073349, - "learning_rate": 6.765929648241206e-05, - "loss": 5.4721, - "step": 32690 - }, - { - "epoch": 17.048761408083443, - "grad_norm": 1.4874672889709473, - "learning_rate": 6.765829145728644e-05, - "loss": 5.5412, - "step": 32691 - }, - { - "epoch": 17.049282920469363, - "grad_norm": 1.4808942079544067, - "learning_rate": 6.76572864321608e-05, - "loss": 5.2112, - "step": 32692 - }, - { - "epoch": 17.04980443285528, - "grad_norm": 1.4875415563583374, - "learning_rate": 6.765628140703518e-05, - "loss": 4.5986, - "step": 32693 - }, - { - "epoch": 17.050325945241198, - "grad_norm": 2.0480470657348633, - "learning_rate": 6.765527638190955e-05, - "loss": 5.1848, - "step": 32694 - }, - { - "epoch": 17.050847457627118, - "grad_norm": 1.696158528327942, - "learning_rate": 6.765427135678393e-05, - "loss": 5.1928, - "step": 32695 - }, - { - "epoch": 17.051368970013037, - "grad_norm": 1.4214346408843994, - "learning_rate": 6.765326633165829e-05, - "loss": 5.2864, - "step": 32696 - }, - { - "epoch": 17.051890482398957, - "grad_norm": 1.5173922777175903, - "learning_rate": 6.765226130653267e-05, - "loss": 5.3088, - "step": 32697 - }, - { - "epoch": 17.052411994784876, - "grad_norm": 1.4901471138000488, - "learning_rate": 6.765125628140703e-05, - "loss": 5.0739, - "step": 32698 - }, - { - "epoch": 17.052933507170795, - "grad_norm": 1.4665706157684326, - "learning_rate": 6.765025125628141e-05, - "loss": 4.9309, - "step": 32699 - }, - { - "epoch": 17.053455019556715, - "grad_norm": 1.4632797241210938, - "learning_rate": 6.764924623115579e-05, - "loss": 5.4377, - "step": 32700 - }, - { - "epoch": 17.053976531942634, - "grad_norm": 1.39524507522583, - "learning_rate": 6.764824120603015e-05, - "loss": 5.6188, - "step": 32701 - }, - { - "epoch": 17.054498044328554, - "grad_norm": 1.512126088142395, - "learning_rate": 6.764723618090453e-05, - "loss": 4.7002, - "step": 32702 - }, - { - "epoch": 17.055019556714473, - "grad_norm": 1.4467215538024902, - "learning_rate": 6.76462311557789e-05, - "loss": 5.5849, - "step": 32703 - }, - { - "epoch": 17.055541069100393, - "grad_norm": 1.5909221172332764, - "learning_rate": 6.764522613065327e-05, - "loss": 4.8495, - "step": 32704 - }, - { - "epoch": 17.05606258148631, - "grad_norm": 1.435481309890747, - "learning_rate": 6.764422110552764e-05, - "loss": 5.7049, - "step": 32705 - }, - { - "epoch": 17.056584093872228, - "grad_norm": 1.5251350402832031, - "learning_rate": 6.764321608040201e-05, - "loss": 5.5684, - "step": 32706 - }, - { - "epoch": 17.057105606258148, - "grad_norm": 1.549417495727539, - "learning_rate": 6.764221105527638e-05, - "loss": 5.1989, - "step": 32707 - }, - { - "epoch": 17.057627118644067, - "grad_norm": 1.6187587976455688, - "learning_rate": 6.764120603015076e-05, - "loss": 5.1165, - "step": 32708 - }, - { - "epoch": 17.058148631029987, - "grad_norm": 1.523267388343811, - "learning_rate": 6.764020100502513e-05, - "loss": 5.23, - "step": 32709 - }, - { - "epoch": 17.058670143415906, - "grad_norm": 1.4713726043701172, - "learning_rate": 6.763919597989951e-05, - "loss": 5.7277, - "step": 32710 - }, - { - "epoch": 17.059191655801826, - "grad_norm": 1.6037746667861938, - "learning_rate": 6.763819095477388e-05, - "loss": 5.0004, - "step": 32711 - }, - { - "epoch": 17.059713168187745, - "grad_norm": 1.4857662916183472, - "learning_rate": 6.763718592964824e-05, - "loss": 5.5369, - "step": 32712 - }, - { - "epoch": 17.060234680573664, - "grad_norm": 1.4755051136016846, - "learning_rate": 6.763618090452262e-05, - "loss": 5.1794, - "step": 32713 - }, - { - "epoch": 17.060756192959584, - "grad_norm": 1.531886100769043, - "learning_rate": 6.763517587939698e-05, - "loss": 5.3273, - "step": 32714 - }, - { - "epoch": 17.061277705345503, - "grad_norm": 1.5029942989349365, - "learning_rate": 6.763417085427136e-05, - "loss": 5.4988, - "step": 32715 - }, - { - "epoch": 17.061799217731423, - "grad_norm": 1.4419468641281128, - "learning_rate": 6.763316582914572e-05, - "loss": 5.1225, - "step": 32716 - }, - { - "epoch": 17.06232073011734, - "grad_norm": 1.5093393325805664, - "learning_rate": 6.76321608040201e-05, - "loss": 5.6438, - "step": 32717 - }, - { - "epoch": 17.062842242503258, - "grad_norm": 1.5515681505203247, - "learning_rate": 6.763115577889447e-05, - "loss": 5.1907, - "step": 32718 - }, - { - "epoch": 17.063363754889178, - "grad_norm": 1.3721911907196045, - "learning_rate": 6.763015075376884e-05, - "loss": 4.8127, - "step": 32719 - }, - { - "epoch": 17.063885267275097, - "grad_norm": 1.464516520500183, - "learning_rate": 6.762914572864322e-05, - "loss": 5.6205, - "step": 32720 - }, - { - "epoch": 17.064406779661017, - "grad_norm": 1.5770237445831299, - "learning_rate": 6.76281407035176e-05, - "loss": 5.3732, - "step": 32721 - }, - { - "epoch": 17.064928292046936, - "grad_norm": 1.4601962566375732, - "learning_rate": 6.762713567839196e-05, - "loss": 5.4901, - "step": 32722 - }, - { - "epoch": 17.065449804432856, - "grad_norm": 1.5009537935256958, - "learning_rate": 6.762613065326634e-05, - "loss": 5.3904, - "step": 32723 - }, - { - "epoch": 17.065971316818775, - "grad_norm": 1.5564525127410889, - "learning_rate": 6.76251256281407e-05, - "loss": 4.9784, - "step": 32724 - }, - { - "epoch": 17.066492829204694, - "grad_norm": 1.7365459203720093, - "learning_rate": 6.762412060301507e-05, - "loss": 4.6995, - "step": 32725 - }, - { - "epoch": 17.067014341590614, - "grad_norm": 1.4277204275131226, - "learning_rate": 6.762311557788945e-05, - "loss": 5.4229, - "step": 32726 - }, - { - "epoch": 17.067535853976533, - "grad_norm": 1.5390962362289429, - "learning_rate": 6.762211055276381e-05, - "loss": 5.0059, - "step": 32727 - }, - { - "epoch": 17.068057366362453, - "grad_norm": 1.6140438318252563, - "learning_rate": 6.762110552763819e-05, - "loss": 4.7953, - "step": 32728 - }, - { - "epoch": 17.06857887874837, - "grad_norm": 1.5227715969085693, - "learning_rate": 6.762010050251257e-05, - "loss": 5.314, - "step": 32729 - }, - { - "epoch": 17.06910039113429, - "grad_norm": 1.567001223564148, - "learning_rate": 6.761909547738695e-05, - "loss": 5.3642, - "step": 32730 - }, - { - "epoch": 17.069621903520208, - "grad_norm": 1.4918063879013062, - "learning_rate": 6.761809045226131e-05, - "loss": 5.0481, - "step": 32731 - }, - { - "epoch": 17.070143415906127, - "grad_norm": 1.3693894147872925, - "learning_rate": 6.761708542713569e-05, - "loss": 5.4249, - "step": 32732 - }, - { - "epoch": 17.070664928292047, - "grad_norm": 1.4580495357513428, - "learning_rate": 6.761608040201005e-05, - "loss": 5.7355, - "step": 32733 - }, - { - "epoch": 17.071186440677966, - "grad_norm": 1.4347749948501587, - "learning_rate": 6.761507537688443e-05, - "loss": 5.5107, - "step": 32734 - }, - { - "epoch": 17.071707953063886, - "grad_norm": 1.3903610706329346, - "learning_rate": 6.76140703517588e-05, - "loss": 5.5455, - "step": 32735 - }, - { - "epoch": 17.072229465449805, - "grad_norm": 1.4497184753417969, - "learning_rate": 6.761306532663317e-05, - "loss": 5.2837, - "step": 32736 - }, - { - "epoch": 17.072750977835724, - "grad_norm": 1.4266901016235352, - "learning_rate": 6.761206030150754e-05, - "loss": 5.693, - "step": 32737 - }, - { - "epoch": 17.073272490221644, - "grad_norm": 1.4772578477859497, - "learning_rate": 6.761105527638191e-05, - "loss": 5.5469, - "step": 32738 - }, - { - "epoch": 17.073794002607563, - "grad_norm": 1.592820644378662, - "learning_rate": 6.761005025125628e-05, - "loss": 4.814, - "step": 32739 - }, - { - "epoch": 17.07431551499348, - "grad_norm": 1.4884593486785889, - "learning_rate": 6.760904522613066e-05, - "loss": 4.9805, - "step": 32740 - }, - { - "epoch": 17.0748370273794, - "grad_norm": 1.4994062185287476, - "learning_rate": 6.760804020100503e-05, - "loss": 5.6004, - "step": 32741 - }, - { - "epoch": 17.07535853976532, - "grad_norm": 1.4601818323135376, - "learning_rate": 6.76070351758794e-05, - "loss": 5.4466, - "step": 32742 - }, - { - "epoch": 17.075880052151238, - "grad_norm": 1.5023047924041748, - "learning_rate": 6.760603015075378e-05, - "loss": 5.5298, - "step": 32743 - }, - { - "epoch": 17.076401564537157, - "grad_norm": 1.4750057458877563, - "learning_rate": 6.760502512562814e-05, - "loss": 5.5194, - "step": 32744 - }, - { - "epoch": 17.076923076923077, - "grad_norm": 1.5165528059005737, - "learning_rate": 6.760402010050252e-05, - "loss": 5.8886, - "step": 32745 - }, - { - "epoch": 17.077444589308996, - "grad_norm": 1.5506043434143066, - "learning_rate": 6.760301507537688e-05, - "loss": 4.8954, - "step": 32746 - }, - { - "epoch": 17.077966101694916, - "grad_norm": 1.5712453126907349, - "learning_rate": 6.760201005025126e-05, - "loss": 5.424, - "step": 32747 - }, - { - "epoch": 17.078487614080835, - "grad_norm": 1.5930473804473877, - "learning_rate": 6.760100502512563e-05, - "loss": 5.4255, - "step": 32748 - }, - { - "epoch": 17.079009126466755, - "grad_norm": 1.539815068244934, - "learning_rate": 6.76e-05, - "loss": 5.2409, - "step": 32749 - }, - { - "epoch": 17.079530638852674, - "grad_norm": 1.4858801364898682, - "learning_rate": 6.759899497487438e-05, - "loss": 5.3951, - "step": 32750 - }, - { - "epoch": 17.080052151238593, - "grad_norm": 1.4898788928985596, - "learning_rate": 6.759798994974876e-05, - "loss": 5.3002, - "step": 32751 - }, - { - "epoch": 17.08057366362451, - "grad_norm": 1.6627202033996582, - "learning_rate": 6.759698492462312e-05, - "loss": 4.9211, - "step": 32752 - }, - { - "epoch": 17.08109517601043, - "grad_norm": 1.5020124912261963, - "learning_rate": 6.759597989949749e-05, - "loss": 5.3173, - "step": 32753 - }, - { - "epoch": 17.08161668839635, - "grad_norm": 1.4598956108093262, - "learning_rate": 6.759497487437187e-05, - "loss": 5.7208, - "step": 32754 - }, - { - "epoch": 17.082138200782268, - "grad_norm": 1.4948495626449585, - "learning_rate": 6.759396984924623e-05, - "loss": 5.3327, - "step": 32755 - }, - { - "epoch": 17.082659713168187, - "grad_norm": 1.5163607597351074, - "learning_rate": 6.759296482412061e-05, - "loss": 5.3067, - "step": 32756 - }, - { - "epoch": 17.083181225554107, - "grad_norm": 1.4442147016525269, - "learning_rate": 6.759195979899497e-05, - "loss": 5.3452, - "step": 32757 - }, - { - "epoch": 17.083702737940026, - "grad_norm": 1.590556263923645, - "learning_rate": 6.759095477386935e-05, - "loss": 5.5242, - "step": 32758 - }, - { - "epoch": 17.084224250325946, - "grad_norm": 1.453879714012146, - "learning_rate": 6.758994974874371e-05, - "loss": 5.1154, - "step": 32759 - }, - { - "epoch": 17.084745762711865, - "grad_norm": 1.5159367322921753, - "learning_rate": 6.758894472361809e-05, - "loss": 5.4297, - "step": 32760 - }, - { - "epoch": 17.085267275097785, - "grad_norm": 1.5488022565841675, - "learning_rate": 6.758793969849247e-05, - "loss": 5.1387, - "step": 32761 - }, - { - "epoch": 17.085788787483704, - "grad_norm": 1.5027401447296143, - "learning_rate": 6.758693467336685e-05, - "loss": 5.2209, - "step": 32762 - }, - { - "epoch": 17.086310299869623, - "grad_norm": 1.4556102752685547, - "learning_rate": 6.758592964824121e-05, - "loss": 5.4966, - "step": 32763 - }, - { - "epoch": 17.08683181225554, - "grad_norm": 1.6420769691467285, - "learning_rate": 6.758492462311559e-05, - "loss": 4.8161, - "step": 32764 - }, - { - "epoch": 17.08735332464146, - "grad_norm": 1.593593716621399, - "learning_rate": 6.758391959798995e-05, - "loss": 5.3397, - "step": 32765 - }, - { - "epoch": 17.08787483702738, - "grad_norm": 1.5082271099090576, - "learning_rate": 6.758291457286432e-05, - "loss": 5.633, - "step": 32766 - }, - { - "epoch": 17.088396349413298, - "grad_norm": 1.4972529411315918, - "learning_rate": 6.75819095477387e-05, - "loss": 5.3673, - "step": 32767 - }, - { - "epoch": 17.088917861799217, - "grad_norm": 1.5016146898269653, - "learning_rate": 6.758090452261306e-05, - "loss": 5.1869, - "step": 32768 - }, - { - "epoch": 17.089439374185137, - "grad_norm": 1.4750423431396484, - "learning_rate": 6.757989949748744e-05, - "loss": 5.3986, - "step": 32769 - }, - { - "epoch": 17.089960886571056, - "grad_norm": 1.467110276222229, - "learning_rate": 6.75788944723618e-05, - "loss": 5.5351, - "step": 32770 - }, - { - "epoch": 17.090482398956976, - "grad_norm": 1.5313583612442017, - "learning_rate": 6.757788944723618e-05, - "loss": 5.5083, - "step": 32771 - }, - { - "epoch": 17.091003911342895, - "grad_norm": 1.411472201347351, - "learning_rate": 6.757688442211056e-05, - "loss": 5.0354, - "step": 32772 - }, - { - "epoch": 17.091525423728815, - "grad_norm": 1.4584510326385498, - "learning_rate": 6.757587939698494e-05, - "loss": 5.4222, - "step": 32773 - }, - { - "epoch": 17.092046936114734, - "grad_norm": 1.5042393207550049, - "learning_rate": 6.75748743718593e-05, - "loss": 5.2893, - "step": 32774 - }, - { - "epoch": 17.092568448500653, - "grad_norm": 1.5010935068130493, - "learning_rate": 6.757386934673368e-05, - "loss": 5.4366, - "step": 32775 - }, - { - "epoch": 17.09308996088657, - "grad_norm": 1.5393823385238647, - "learning_rate": 6.757286432160804e-05, - "loss": 5.3788, - "step": 32776 - }, - { - "epoch": 17.09361147327249, - "grad_norm": 1.5449455976486206, - "learning_rate": 6.757185929648242e-05, - "loss": 5.0745, - "step": 32777 - }, - { - "epoch": 17.09413298565841, - "grad_norm": 1.4584803581237793, - "learning_rate": 6.757085427135678e-05, - "loss": 5.262, - "step": 32778 - }, - { - "epoch": 17.094654498044328, - "grad_norm": 1.4740959405899048, - "learning_rate": 6.756984924623115e-05, - "loss": 5.6124, - "step": 32779 - }, - { - "epoch": 17.095176010430247, - "grad_norm": 1.6076184511184692, - "learning_rate": 6.756884422110553e-05, - "loss": 4.9143, - "step": 32780 - }, - { - "epoch": 17.095697522816167, - "grad_norm": 1.4670206308364868, - "learning_rate": 6.75678391959799e-05, - "loss": 5.4286, - "step": 32781 - }, - { - "epoch": 17.096219035202086, - "grad_norm": 1.415895700454712, - "learning_rate": 6.756683417085428e-05, - "loss": 5.3001, - "step": 32782 - }, - { - "epoch": 17.096740547588006, - "grad_norm": 1.4286445379257202, - "learning_rate": 6.756582914572865e-05, - "loss": 5.4492, - "step": 32783 - }, - { - "epoch": 17.097262059973925, - "grad_norm": 1.4280061721801758, - "learning_rate": 6.756482412060302e-05, - "loss": 5.6781, - "step": 32784 - }, - { - "epoch": 17.097783572359845, - "grad_norm": 1.4306566715240479, - "learning_rate": 6.756381909547739e-05, - "loss": 5.4856, - "step": 32785 - }, - { - "epoch": 17.098305084745764, - "grad_norm": 1.5090337991714478, - "learning_rate": 6.756281407035177e-05, - "loss": 5.1862, - "step": 32786 - }, - { - "epoch": 17.098826597131684, - "grad_norm": 1.4397791624069214, - "learning_rate": 6.756180904522613e-05, - "loss": 5.3678, - "step": 32787 - }, - { - "epoch": 17.0993481095176, - "grad_norm": 1.4638454914093018, - "learning_rate": 6.756080402010051e-05, - "loss": 5.8525, - "step": 32788 - }, - { - "epoch": 17.09986962190352, - "grad_norm": 1.4718399047851562, - "learning_rate": 6.755979899497487e-05, - "loss": 5.0696, - "step": 32789 - }, - { - "epoch": 17.10039113428944, - "grad_norm": 1.487788438796997, - "learning_rate": 6.755879396984925e-05, - "loss": 5.1477, - "step": 32790 - }, - { - "epoch": 17.100912646675358, - "grad_norm": 1.4335778951644897, - "learning_rate": 6.755778894472361e-05, - "loss": 5.7454, - "step": 32791 - }, - { - "epoch": 17.101434159061277, - "grad_norm": 1.8780076503753662, - "learning_rate": 6.755678391959799e-05, - "loss": 5.1329, - "step": 32792 - }, - { - "epoch": 17.101955671447197, - "grad_norm": 1.4681329727172852, - "learning_rate": 6.755577889447237e-05, - "loss": 5.4907, - "step": 32793 - }, - { - "epoch": 17.102477183833116, - "grad_norm": 1.3760312795639038, - "learning_rate": 6.755477386934673e-05, - "loss": 4.5476, - "step": 32794 - }, - { - "epoch": 17.102998696219036, - "grad_norm": 1.4816341400146484, - "learning_rate": 6.755376884422111e-05, - "loss": 5.2057, - "step": 32795 - }, - { - "epoch": 17.103520208604955, - "grad_norm": 1.4538564682006836, - "learning_rate": 6.755276381909548e-05, - "loss": 5.2927, - "step": 32796 - }, - { - "epoch": 17.104041720990875, - "grad_norm": 1.3845133781433105, - "learning_rate": 6.755175879396985e-05, - "loss": 5.7526, - "step": 32797 - }, - { - "epoch": 17.104563233376794, - "grad_norm": 1.5140972137451172, - "learning_rate": 6.755075376884422e-05, - "loss": 4.9992, - "step": 32798 - }, - { - "epoch": 17.105084745762714, - "grad_norm": 1.3504012823104858, - "learning_rate": 6.75497487437186e-05, - "loss": 5.3655, - "step": 32799 - }, - { - "epoch": 17.10560625814863, - "grad_norm": 1.520885705947876, - "learning_rate": 6.754874371859296e-05, - "loss": 5.5593, - "step": 32800 - }, - { - "epoch": 17.10612777053455, - "grad_norm": 1.5622756481170654, - "learning_rate": 6.754773869346734e-05, - "loss": 5.5413, - "step": 32801 - }, - { - "epoch": 17.10664928292047, - "grad_norm": 1.5471041202545166, - "learning_rate": 6.754673366834172e-05, - "loss": 4.9693, - "step": 32802 - }, - { - "epoch": 17.107170795306388, - "grad_norm": 1.4357013702392578, - "learning_rate": 6.75457286432161e-05, - "loss": 5.3392, - "step": 32803 - }, - { - "epoch": 17.107692307692307, - "grad_norm": 1.4703725576400757, - "learning_rate": 6.754472361809046e-05, - "loss": 4.8009, - "step": 32804 - }, - { - "epoch": 17.108213820078227, - "grad_norm": 1.4283376932144165, - "learning_rate": 6.754371859296482e-05, - "loss": 5.8543, - "step": 32805 - }, - { - "epoch": 17.108735332464146, - "grad_norm": 1.4282352924346924, - "learning_rate": 6.75427135678392e-05, - "loss": 4.7559, - "step": 32806 - }, - { - "epoch": 17.109256844850066, - "grad_norm": 1.4978197813034058, - "learning_rate": 6.754170854271356e-05, - "loss": 5.4388, - "step": 32807 - }, - { - "epoch": 17.109778357235985, - "grad_norm": 1.5037217140197754, - "learning_rate": 6.754070351758794e-05, - "loss": 4.7253, - "step": 32808 - }, - { - "epoch": 17.110299869621905, - "grad_norm": 1.6101045608520508, - "learning_rate": 6.753969849246231e-05, - "loss": 5.0356, - "step": 32809 - }, - { - "epoch": 17.110821382007824, - "grad_norm": 1.5568077564239502, - "learning_rate": 6.753869346733668e-05, - "loss": 5.1991, - "step": 32810 - }, - { - "epoch": 17.111342894393744, - "grad_norm": 1.4889644384384155, - "learning_rate": 6.753768844221105e-05, - "loss": 5.3513, - "step": 32811 - }, - { - "epoch": 17.11186440677966, - "grad_norm": 1.6457443237304688, - "learning_rate": 6.753668341708543e-05, - "loss": 5.0389, - "step": 32812 - }, - { - "epoch": 17.11238591916558, - "grad_norm": 1.5254173278808594, - "learning_rate": 6.75356783919598e-05, - "loss": 5.0333, - "step": 32813 - }, - { - "epoch": 17.1129074315515, - "grad_norm": 1.5038663148880005, - "learning_rate": 6.753467336683418e-05, - "loss": 5.3343, - "step": 32814 - }, - { - "epoch": 17.113428943937418, - "grad_norm": 1.4917720556259155, - "learning_rate": 6.753366834170855e-05, - "loss": 5.2058, - "step": 32815 - }, - { - "epoch": 17.113950456323337, - "grad_norm": 1.589188575744629, - "learning_rate": 6.753266331658292e-05, - "loss": 5.2679, - "step": 32816 - }, - { - "epoch": 17.114471968709257, - "grad_norm": 1.554328203201294, - "learning_rate": 6.753165829145729e-05, - "loss": 5.386, - "step": 32817 - }, - { - "epoch": 17.114993481095176, - "grad_norm": 1.6161556243896484, - "learning_rate": 6.753065326633165e-05, - "loss": 5.5792, - "step": 32818 - }, - { - "epoch": 17.115514993481096, - "grad_norm": 1.5397775173187256, - "learning_rate": 6.752964824120603e-05, - "loss": 5.3347, - "step": 32819 - }, - { - "epoch": 17.116036505867015, - "grad_norm": 1.511048674583435, - "learning_rate": 6.75286432160804e-05, - "loss": 5.3704, - "step": 32820 - }, - { - "epoch": 17.116558018252935, - "grad_norm": 1.4122600555419922, - "learning_rate": 6.752763819095477e-05, - "loss": 5.5296, - "step": 32821 - }, - { - "epoch": 17.117079530638854, - "grad_norm": 1.5023800134658813, - "learning_rate": 6.752663316582915e-05, - "loss": 5.1898, - "step": 32822 - }, - { - "epoch": 17.117601043024774, - "grad_norm": 1.3877084255218506, - "learning_rate": 6.752562814070353e-05, - "loss": 5.4509, - "step": 32823 - }, - { - "epoch": 17.11812255541069, - "grad_norm": 1.5519635677337646, - "learning_rate": 6.752462311557789e-05, - "loss": 5.2064, - "step": 32824 - }, - { - "epoch": 17.11864406779661, - "grad_norm": 1.5306978225708008, - "learning_rate": 6.752361809045227e-05, - "loss": 5.3802, - "step": 32825 - }, - { - "epoch": 17.11916558018253, - "grad_norm": 1.488799810409546, - "learning_rate": 6.752261306532664e-05, - "loss": 5.3087, - "step": 32826 - }, - { - "epoch": 17.119687092568448, - "grad_norm": 1.4921048879623413, - "learning_rate": 6.752160804020101e-05, - "loss": 5.3779, - "step": 32827 - }, - { - "epoch": 17.120208604954367, - "grad_norm": 1.4637751579284668, - "learning_rate": 6.752060301507538e-05, - "loss": 5.6631, - "step": 32828 - }, - { - "epoch": 17.120730117340287, - "grad_norm": 1.5233283042907715, - "learning_rate": 6.751959798994976e-05, - "loss": 5.3544, - "step": 32829 - }, - { - "epoch": 17.121251629726206, - "grad_norm": 1.4425312280654907, - "learning_rate": 6.751859296482412e-05, - "loss": 5.3871, - "step": 32830 - }, - { - "epoch": 17.121773142112126, - "grad_norm": 1.3826619386672974, - "learning_rate": 6.75175879396985e-05, - "loss": 5.7644, - "step": 32831 - }, - { - "epoch": 17.122294654498045, - "grad_norm": 1.6309010982513428, - "learning_rate": 6.751658291457286e-05, - "loss": 5.1088, - "step": 32832 - }, - { - "epoch": 17.122816166883965, - "grad_norm": 1.4904911518096924, - "learning_rate": 6.751557788944724e-05, - "loss": 4.7848, - "step": 32833 - }, - { - "epoch": 17.123337679269884, - "grad_norm": 1.5239274501800537, - "learning_rate": 6.751457286432162e-05, - "loss": 5.687, - "step": 32834 - }, - { - "epoch": 17.1238591916558, - "grad_norm": 1.4433038234710693, - "learning_rate": 6.751356783919598e-05, - "loss": 5.3633, - "step": 32835 - }, - { - "epoch": 17.12438070404172, - "grad_norm": 1.3901371955871582, - "learning_rate": 6.751256281407036e-05, - "loss": 5.4589, - "step": 32836 - }, - { - "epoch": 17.12490221642764, - "grad_norm": 1.4332612752914429, - "learning_rate": 6.751155778894472e-05, - "loss": 5.7135, - "step": 32837 - }, - { - "epoch": 17.12542372881356, - "grad_norm": 1.5117411613464355, - "learning_rate": 6.75105527638191e-05, - "loss": 5.6013, - "step": 32838 - }, - { - "epoch": 17.125945241199478, - "grad_norm": 1.41855788230896, - "learning_rate": 6.750954773869347e-05, - "loss": 4.719, - "step": 32839 - }, - { - "epoch": 17.126466753585397, - "grad_norm": 1.4722753763198853, - "learning_rate": 6.750854271356784e-05, - "loss": 5.1567, - "step": 32840 - }, - { - "epoch": 17.126988265971317, - "grad_norm": 1.4507497549057007, - "learning_rate": 6.750753768844221e-05, - "loss": 4.9548, - "step": 32841 - }, - { - "epoch": 17.127509778357236, - "grad_norm": 1.5878984928131104, - "learning_rate": 6.750653266331659e-05, - "loss": 5.1994, - "step": 32842 - }, - { - "epoch": 17.128031290743156, - "grad_norm": 1.3931608200073242, - "learning_rate": 6.750552763819096e-05, - "loss": 5.4339, - "step": 32843 - }, - { - "epoch": 17.128552803129075, - "grad_norm": 1.6434316635131836, - "learning_rate": 6.750452261306534e-05, - "loss": 4.5363, - "step": 32844 - }, - { - "epoch": 17.129074315514995, - "grad_norm": 1.5650259256362915, - "learning_rate": 6.75035175879397e-05, - "loss": 5.3401, - "step": 32845 - }, - { - "epoch": 17.129595827900914, - "grad_norm": 1.3892371654510498, - "learning_rate": 6.750251256281407e-05, - "loss": 5.7455, - "step": 32846 - }, - { - "epoch": 17.13011734028683, - "grad_norm": 1.6764248609542847, - "learning_rate": 6.750150753768845e-05, - "loss": 4.9104, - "step": 32847 - }, - { - "epoch": 17.13063885267275, - "grad_norm": 1.4890100955963135, - "learning_rate": 6.750050251256281e-05, - "loss": 4.8331, - "step": 32848 - }, - { - "epoch": 17.13116036505867, - "grad_norm": 1.4367995262145996, - "learning_rate": 6.749949748743719e-05, - "loss": 5.5359, - "step": 32849 - }, - { - "epoch": 17.13168187744459, - "grad_norm": 1.5571117401123047, - "learning_rate": 6.749849246231155e-05, - "loss": 5.4754, - "step": 32850 - }, - { - "epoch": 17.132203389830508, - "grad_norm": 1.4580280780792236, - "learning_rate": 6.749748743718593e-05, - "loss": 5.6168, - "step": 32851 - }, - { - "epoch": 17.132724902216427, - "grad_norm": 1.4597853422164917, - "learning_rate": 6.74964824120603e-05, - "loss": 5.5372, - "step": 32852 - }, - { - "epoch": 17.133246414602347, - "grad_norm": 2.064976215362549, - "learning_rate": 6.749547738693467e-05, - "loss": 5.1256, - "step": 32853 - }, - { - "epoch": 17.133767926988266, - "grad_norm": 1.5614848136901855, - "learning_rate": 6.749447236180905e-05, - "loss": 5.2288, - "step": 32854 - }, - { - "epoch": 17.134289439374186, - "grad_norm": 1.5833646059036255, - "learning_rate": 6.749346733668343e-05, - "loss": 5.1776, - "step": 32855 - }, - { - "epoch": 17.134810951760105, - "grad_norm": 1.4972188472747803, - "learning_rate": 6.74924623115578e-05, - "loss": 5.3338, - "step": 32856 - }, - { - "epoch": 17.135332464146025, - "grad_norm": 1.5132651329040527, - "learning_rate": 6.749145728643217e-05, - "loss": 5.3822, - "step": 32857 - }, - { - "epoch": 17.135853976531944, - "grad_norm": 1.5867760181427002, - "learning_rate": 6.749045226130654e-05, - "loss": 4.8915, - "step": 32858 - }, - { - "epoch": 17.13637548891786, - "grad_norm": 1.5057703256607056, - "learning_rate": 6.74894472361809e-05, - "loss": 5.3337, - "step": 32859 - }, - { - "epoch": 17.13689700130378, - "grad_norm": 1.6862560510635376, - "learning_rate": 6.748844221105528e-05, - "loss": 4.5638, - "step": 32860 - }, - { - "epoch": 17.1374185136897, - "grad_norm": 1.4507765769958496, - "learning_rate": 6.748743718592964e-05, - "loss": 5.3378, - "step": 32861 - }, - { - "epoch": 17.13794002607562, - "grad_norm": 1.4768725633621216, - "learning_rate": 6.748643216080402e-05, - "loss": 5.6065, - "step": 32862 - }, - { - "epoch": 17.138461538461538, - "grad_norm": 1.4905898571014404, - "learning_rate": 6.74854271356784e-05, - "loss": 5.334, - "step": 32863 - }, - { - "epoch": 17.138983050847457, - "grad_norm": 1.457514762878418, - "learning_rate": 6.748442211055278e-05, - "loss": 5.5363, - "step": 32864 - }, - { - "epoch": 17.139504563233377, - "grad_norm": 1.4460097551345825, - "learning_rate": 6.748341708542714e-05, - "loss": 5.0871, - "step": 32865 - }, - { - "epoch": 17.140026075619296, - "grad_norm": 1.582625389099121, - "learning_rate": 6.748241206030152e-05, - "loss": 5.2255, - "step": 32866 - }, - { - "epoch": 17.140547588005216, - "grad_norm": 1.665555477142334, - "learning_rate": 6.748140703517588e-05, - "loss": 5.021, - "step": 32867 - }, - { - "epoch": 17.141069100391135, - "grad_norm": 1.5599218606948853, - "learning_rate": 6.748040201005026e-05, - "loss": 5.4604, - "step": 32868 - }, - { - "epoch": 17.141590612777055, - "grad_norm": 1.4208279848098755, - "learning_rate": 6.747939698492462e-05, - "loss": 5.7108, - "step": 32869 - }, - { - "epoch": 17.142112125162974, - "grad_norm": 1.4826055765151978, - "learning_rate": 6.7478391959799e-05, - "loss": 5.5218, - "step": 32870 - }, - { - "epoch": 17.14263363754889, - "grad_norm": 1.455627202987671, - "learning_rate": 6.747738693467337e-05, - "loss": 5.7385, - "step": 32871 - }, - { - "epoch": 17.14315514993481, - "grad_norm": 1.4555275440216064, - "learning_rate": 6.747638190954773e-05, - "loss": 5.4537, - "step": 32872 - }, - { - "epoch": 17.14367666232073, - "grad_norm": 1.5068496465682983, - "learning_rate": 6.747537688442211e-05, - "loss": 5.5484, - "step": 32873 - }, - { - "epoch": 17.14419817470665, - "grad_norm": 1.4454697370529175, - "learning_rate": 6.747437185929649e-05, - "loss": 5.4672, - "step": 32874 - }, - { - "epoch": 17.144719687092568, - "grad_norm": 1.4496955871582031, - "learning_rate": 6.747336683417086e-05, - "loss": 5.1031, - "step": 32875 - }, - { - "epoch": 17.145241199478487, - "grad_norm": 1.554413914680481, - "learning_rate": 6.747236180904523e-05, - "loss": 5.3543, - "step": 32876 - }, - { - "epoch": 17.145762711864407, - "grad_norm": 1.6058363914489746, - "learning_rate": 6.74713567839196e-05, - "loss": 4.9776, - "step": 32877 - }, - { - "epoch": 17.146284224250326, - "grad_norm": 1.410929560661316, - "learning_rate": 6.747035175879397e-05, - "loss": 5.3268, - "step": 32878 - }, - { - "epoch": 17.146805736636246, - "grad_norm": 1.3689547777175903, - "learning_rate": 6.746934673366835e-05, - "loss": 5.3072, - "step": 32879 - }, - { - "epoch": 17.147327249022165, - "grad_norm": 1.4115556478500366, - "learning_rate": 6.746834170854271e-05, - "loss": 4.9705, - "step": 32880 - }, - { - "epoch": 17.147848761408085, - "grad_norm": 1.4599069356918335, - "learning_rate": 6.746733668341709e-05, - "loss": 5.3587, - "step": 32881 - }, - { - "epoch": 17.148370273794004, - "grad_norm": 1.469376802444458, - "learning_rate": 6.746633165829145e-05, - "loss": 5.4348, - "step": 32882 - }, - { - "epoch": 17.14889178617992, - "grad_norm": 1.4895715713500977, - "learning_rate": 6.746532663316583e-05, - "loss": 5.2161, - "step": 32883 - }, - { - "epoch": 17.14941329856584, - "grad_norm": 1.5645273923873901, - "learning_rate": 6.746432160804021e-05, - "loss": 5.542, - "step": 32884 - }, - { - "epoch": 17.14993481095176, - "grad_norm": 1.512799620628357, - "learning_rate": 6.746331658291457e-05, - "loss": 5.4569, - "step": 32885 - }, - { - "epoch": 17.15045632333768, - "grad_norm": 1.4767218828201294, - "learning_rate": 6.746231155778895e-05, - "loss": 5.4797, - "step": 32886 - }, - { - "epoch": 17.150977835723598, - "grad_norm": 1.4763649702072144, - "learning_rate": 6.746130653266332e-05, - "loss": 4.9479, - "step": 32887 - }, - { - "epoch": 17.151499348109517, - "grad_norm": 1.5591144561767578, - "learning_rate": 6.74603015075377e-05, - "loss": 5.3848, - "step": 32888 - }, - { - "epoch": 17.152020860495437, - "grad_norm": 1.5869780778884888, - "learning_rate": 6.745929648241206e-05, - "loss": 4.7545, - "step": 32889 - }, - { - "epoch": 17.152542372881356, - "grad_norm": 1.6871610879898071, - "learning_rate": 6.745829145728644e-05, - "loss": 4.9892, - "step": 32890 - }, - { - "epoch": 17.153063885267276, - "grad_norm": 1.5329129695892334, - "learning_rate": 6.74572864321608e-05, - "loss": 5.2002, - "step": 32891 - }, - { - "epoch": 17.153585397653195, - "grad_norm": 1.5414183139801025, - "learning_rate": 6.745628140703518e-05, - "loss": 5.1994, - "step": 32892 - }, - { - "epoch": 17.154106910039115, - "grad_norm": 1.7387856245040894, - "learning_rate": 6.745527638190954e-05, - "loss": 4.8237, - "step": 32893 - }, - { - "epoch": 17.154628422425034, - "grad_norm": 1.6126216650009155, - "learning_rate": 6.745427135678392e-05, - "loss": 5.2741, - "step": 32894 - }, - { - "epoch": 17.15514993481095, - "grad_norm": 1.4583334922790527, - "learning_rate": 6.74532663316583e-05, - "loss": 5.3261, - "step": 32895 - }, - { - "epoch": 17.15567144719687, - "grad_norm": 1.3954870700836182, - "learning_rate": 6.745226130653268e-05, - "loss": 5.0533, - "step": 32896 - }, - { - "epoch": 17.15619295958279, - "grad_norm": 1.517376184463501, - "learning_rate": 6.745125628140704e-05, - "loss": 5.6509, - "step": 32897 - }, - { - "epoch": 17.15671447196871, - "grad_norm": 1.4280763864517212, - "learning_rate": 6.74502512562814e-05, - "loss": 5.1056, - "step": 32898 - }, - { - "epoch": 17.157235984354628, - "grad_norm": 1.416228175163269, - "learning_rate": 6.744924623115578e-05, - "loss": 5.1582, - "step": 32899 - }, - { - "epoch": 17.157757496740548, - "grad_norm": 1.4682676792144775, - "learning_rate": 6.744824120603015e-05, - "loss": 5.5805, - "step": 32900 - }, - { - "epoch": 17.158279009126467, - "grad_norm": 1.4207981824874878, - "learning_rate": 6.744723618090453e-05, - "loss": 5.2059, - "step": 32901 - }, - { - "epoch": 17.158800521512386, - "grad_norm": 1.447361946105957, - "learning_rate": 6.744623115577889e-05, - "loss": 5.0032, - "step": 32902 - }, - { - "epoch": 17.159322033898306, - "grad_norm": 1.5244979858398438, - "learning_rate": 6.744522613065327e-05, - "loss": 4.8954, - "step": 32903 - }, - { - "epoch": 17.159843546284225, - "grad_norm": 1.5691324472427368, - "learning_rate": 6.744422110552765e-05, - "loss": 5.4251, - "step": 32904 - }, - { - "epoch": 17.160365058670145, - "grad_norm": 1.5234642028808594, - "learning_rate": 6.744321608040202e-05, - "loss": 5.1315, - "step": 32905 - }, - { - "epoch": 17.160886571056064, - "grad_norm": 1.5097606182098389, - "learning_rate": 6.744221105527639e-05, - "loss": 5.4229, - "step": 32906 - }, - { - "epoch": 17.16140808344198, - "grad_norm": 1.416056513786316, - "learning_rate": 6.744120603015076e-05, - "loss": 4.92, - "step": 32907 - }, - { - "epoch": 17.1619295958279, - "grad_norm": 1.5040582418441772, - "learning_rate": 6.744020100502513e-05, - "loss": 5.1045, - "step": 32908 - }, - { - "epoch": 17.16245110821382, - "grad_norm": 1.5126540660858154, - "learning_rate": 6.743919597989951e-05, - "loss": 5.5679, - "step": 32909 - }, - { - "epoch": 17.16297262059974, - "grad_norm": 1.5757725238800049, - "learning_rate": 6.743819095477387e-05, - "loss": 5.4934, - "step": 32910 - }, - { - "epoch": 17.163494132985658, - "grad_norm": 1.4590747356414795, - "learning_rate": 6.743718592964824e-05, - "loss": 5.0341, - "step": 32911 - }, - { - "epoch": 17.164015645371578, - "grad_norm": 1.4553097486495972, - "learning_rate": 6.743618090452261e-05, - "loss": 5.7921, - "step": 32912 - }, - { - "epoch": 17.164537157757497, - "grad_norm": 1.4581011533737183, - "learning_rate": 6.743517587939698e-05, - "loss": 5.0727, - "step": 32913 - }, - { - "epoch": 17.165058670143416, - "grad_norm": 1.6015175580978394, - "learning_rate": 6.743417085427136e-05, - "loss": 5.4312, - "step": 32914 - }, - { - "epoch": 17.165580182529336, - "grad_norm": 1.557355523109436, - "learning_rate": 6.743316582914573e-05, - "loss": 5.0298, - "step": 32915 - }, - { - "epoch": 17.166101694915255, - "grad_norm": 1.591027855873108, - "learning_rate": 6.743216080402011e-05, - "loss": 5.5812, - "step": 32916 - }, - { - "epoch": 17.166623207301175, - "grad_norm": 1.5983645915985107, - "learning_rate": 6.743115577889448e-05, - "loss": 5.1457, - "step": 32917 - }, - { - "epoch": 17.167144719687094, - "grad_norm": 1.4609453678131104, - "learning_rate": 6.743015075376885e-05, - "loss": 5.645, - "step": 32918 - }, - { - "epoch": 17.16766623207301, - "grad_norm": 1.4419039487838745, - "learning_rate": 6.742914572864322e-05, - "loss": 4.784, - "step": 32919 - }, - { - "epoch": 17.16818774445893, - "grad_norm": 1.4688886404037476, - "learning_rate": 6.74281407035176e-05, - "loss": 5.4446, - "step": 32920 - }, - { - "epoch": 17.16870925684485, - "grad_norm": 1.4568467140197754, - "learning_rate": 6.742713567839196e-05, - "loss": 4.9233, - "step": 32921 - }, - { - "epoch": 17.16923076923077, - "grad_norm": 1.619380235671997, - "learning_rate": 6.742613065326634e-05, - "loss": 4.872, - "step": 32922 - }, - { - "epoch": 17.169752281616688, - "grad_norm": 1.3487818241119385, - "learning_rate": 6.74251256281407e-05, - "loss": 5.4562, - "step": 32923 - }, - { - "epoch": 17.170273794002608, - "grad_norm": 1.5321142673492432, - "learning_rate": 6.742412060301508e-05, - "loss": 5.0061, - "step": 32924 - }, - { - "epoch": 17.170795306388527, - "grad_norm": 1.3323802947998047, - "learning_rate": 6.742311557788946e-05, - "loss": 5.4478, - "step": 32925 - }, - { - "epoch": 17.171316818774446, - "grad_norm": 1.4525063037872314, - "learning_rate": 6.742211055276382e-05, - "loss": 5.3856, - "step": 32926 - }, - { - "epoch": 17.171838331160366, - "grad_norm": 1.4502235651016235, - "learning_rate": 6.74211055276382e-05, - "loss": 5.6462, - "step": 32927 - }, - { - "epoch": 17.172359843546285, - "grad_norm": 1.394148349761963, - "learning_rate": 6.742010050251256e-05, - "loss": 5.2367, - "step": 32928 - }, - { - "epoch": 17.172881355932205, - "grad_norm": 1.4198122024536133, - "learning_rate": 6.741909547738694e-05, - "loss": 5.3237, - "step": 32929 - }, - { - "epoch": 17.17340286831812, - "grad_norm": 1.5226898193359375, - "learning_rate": 6.74180904522613e-05, - "loss": 5.5998, - "step": 32930 - }, - { - "epoch": 17.17392438070404, - "grad_norm": 1.5790024995803833, - "learning_rate": 6.741708542713568e-05, - "loss": 4.9491, - "step": 32931 - }, - { - "epoch": 17.17444589308996, - "grad_norm": 1.542162299156189, - "learning_rate": 6.741608040201005e-05, - "loss": 5.06, - "step": 32932 - }, - { - "epoch": 17.17496740547588, - "grad_norm": 1.5510246753692627, - "learning_rate": 6.741507537688443e-05, - "loss": 5.2562, - "step": 32933 - }, - { - "epoch": 17.1754889178618, - "grad_norm": 1.4274033308029175, - "learning_rate": 6.741407035175879e-05, - "loss": 5.2639, - "step": 32934 - }, - { - "epoch": 17.176010430247718, - "grad_norm": 1.4763379096984863, - "learning_rate": 6.741306532663317e-05, - "loss": 5.0229, - "step": 32935 - }, - { - "epoch": 17.176531942633638, - "grad_norm": 1.4923748970031738, - "learning_rate": 6.741206030150755e-05, - "loss": 5.4986, - "step": 32936 - }, - { - "epoch": 17.177053455019557, - "grad_norm": 1.3514550924301147, - "learning_rate": 6.741105527638192e-05, - "loss": 5.0466, - "step": 32937 - }, - { - "epoch": 17.177574967405477, - "grad_norm": 1.5190341472625732, - "learning_rate": 6.741005025125629e-05, - "loss": 5.0656, - "step": 32938 - }, - { - "epoch": 17.178096479791396, - "grad_norm": 1.5011537075042725, - "learning_rate": 6.740904522613065e-05, - "loss": 5.4953, - "step": 32939 - }, - { - "epoch": 17.178617992177315, - "grad_norm": 1.4527158737182617, - "learning_rate": 6.740804020100503e-05, - "loss": 5.4899, - "step": 32940 - }, - { - "epoch": 17.179139504563235, - "grad_norm": 1.5406252145767212, - "learning_rate": 6.74070351758794e-05, - "loss": 4.5985, - "step": 32941 - }, - { - "epoch": 17.17966101694915, - "grad_norm": 1.5671554803848267, - "learning_rate": 6.740603015075377e-05, - "loss": 5.4745, - "step": 32942 - }, - { - "epoch": 17.18018252933507, - "grad_norm": 1.600188136100769, - "learning_rate": 6.740502512562814e-05, - "loss": 5.2246, - "step": 32943 - }, - { - "epoch": 17.18070404172099, - "grad_norm": 1.4949333667755127, - "learning_rate": 6.740402010050251e-05, - "loss": 5.3168, - "step": 32944 - }, - { - "epoch": 17.18122555410691, - "grad_norm": 1.397896647453308, - "learning_rate": 6.740301507537688e-05, - "loss": 5.4146, - "step": 32945 - }, - { - "epoch": 17.18174706649283, - "grad_norm": 1.5179786682128906, - "learning_rate": 6.740201005025126e-05, - "loss": 4.9287, - "step": 32946 - }, - { - "epoch": 17.182268578878748, - "grad_norm": 1.389293909072876, - "learning_rate": 6.740100502512563e-05, - "loss": 5.5433, - "step": 32947 - }, - { - "epoch": 17.182790091264668, - "grad_norm": 1.5862549543380737, - "learning_rate": 6.740000000000001e-05, - "loss": 4.6919, - "step": 32948 - }, - { - "epoch": 17.183311603650587, - "grad_norm": 1.495482325553894, - "learning_rate": 6.739899497487438e-05, - "loss": 5.1954, - "step": 32949 - }, - { - "epoch": 17.183833116036507, - "grad_norm": 1.6546146869659424, - "learning_rate": 6.739798994974875e-05, - "loss": 5.1123, - "step": 32950 - }, - { - "epoch": 17.184354628422426, - "grad_norm": 1.4995152950286865, - "learning_rate": 6.739698492462312e-05, - "loss": 5.3682, - "step": 32951 - }, - { - "epoch": 17.184876140808345, - "grad_norm": 1.443284034729004, - "learning_rate": 6.739597989949748e-05, - "loss": 5.3948, - "step": 32952 - }, - { - "epoch": 17.185397653194265, - "grad_norm": 1.4093999862670898, - "learning_rate": 6.739497487437186e-05, - "loss": 5.6315, - "step": 32953 - }, - { - "epoch": 17.18591916558018, - "grad_norm": 1.6893444061279297, - "learning_rate": 6.739396984924622e-05, - "loss": 5.0516, - "step": 32954 - }, - { - "epoch": 17.1864406779661, - "grad_norm": 1.5030590295791626, - "learning_rate": 6.73929648241206e-05, - "loss": 4.9421, - "step": 32955 - }, - { - "epoch": 17.18696219035202, - "grad_norm": 1.4450631141662598, - "learning_rate": 6.739195979899498e-05, - "loss": 5.6119, - "step": 32956 - }, - { - "epoch": 17.18748370273794, - "grad_norm": 1.5362632274627686, - "learning_rate": 6.739095477386936e-05, - "loss": 5.0703, - "step": 32957 - }, - { - "epoch": 17.18800521512386, - "grad_norm": 1.5217534303665161, - "learning_rate": 6.738994974874372e-05, - "loss": 5.2104, - "step": 32958 - }, - { - "epoch": 17.188526727509778, - "grad_norm": 1.4878727197647095, - "learning_rate": 6.73889447236181e-05, - "loss": 5.0472, - "step": 32959 - }, - { - "epoch": 17.189048239895698, - "grad_norm": 1.429185390472412, - "learning_rate": 6.738793969849246e-05, - "loss": 5.3472, - "step": 32960 - }, - { - "epoch": 17.189569752281617, - "grad_norm": 1.5073646306991577, - "learning_rate": 6.738693467336684e-05, - "loss": 5.4654, - "step": 32961 - }, - { - "epoch": 17.190091264667537, - "grad_norm": 1.4468257427215576, - "learning_rate": 6.73859296482412e-05, - "loss": 5.1945, - "step": 32962 - }, - { - "epoch": 17.190612777053456, - "grad_norm": 1.4590836763381958, - "learning_rate": 6.738492462311558e-05, - "loss": 5.5116, - "step": 32963 - }, - { - "epoch": 17.191134289439375, - "grad_norm": 1.462514877319336, - "learning_rate": 6.738391959798995e-05, - "loss": 5.4036, - "step": 32964 - }, - { - "epoch": 17.191655801825295, - "grad_norm": 1.539069652557373, - "learning_rate": 6.738291457286431e-05, - "loss": 5.2431, - "step": 32965 - }, - { - "epoch": 17.19217731421121, - "grad_norm": 1.5758827924728394, - "learning_rate": 6.738190954773869e-05, - "loss": 5.0424, - "step": 32966 - }, - { - "epoch": 17.19269882659713, - "grad_norm": 1.5653212070465088, - "learning_rate": 6.738090452261307e-05, - "loss": 5.1583, - "step": 32967 - }, - { - "epoch": 17.19322033898305, - "grad_norm": 1.4738274812698364, - "learning_rate": 6.737989949748745e-05, - "loss": 5.0173, - "step": 32968 - }, - { - "epoch": 17.19374185136897, - "grad_norm": 1.5968079566955566, - "learning_rate": 6.737889447236181e-05, - "loss": 5.4328, - "step": 32969 - }, - { - "epoch": 17.19426336375489, - "grad_norm": 1.5280812978744507, - "learning_rate": 6.737788944723619e-05, - "loss": 5.6135, - "step": 32970 - }, - { - "epoch": 17.194784876140808, - "grad_norm": 1.4790552854537964, - "learning_rate": 6.737688442211055e-05, - "loss": 5.4406, - "step": 32971 - }, - { - "epoch": 17.195306388526728, - "grad_norm": 1.4183121919631958, - "learning_rate": 6.737587939698493e-05, - "loss": 5.9783, - "step": 32972 - }, - { - "epoch": 17.195827900912647, - "grad_norm": 1.614562749862671, - "learning_rate": 6.73748743718593e-05, - "loss": 4.8376, - "step": 32973 - }, - { - "epoch": 17.196349413298567, - "grad_norm": 1.410207986831665, - "learning_rate": 6.737386934673367e-05, - "loss": 5.3791, - "step": 32974 - }, - { - "epoch": 17.196870925684486, - "grad_norm": 1.4985867738723755, - "learning_rate": 6.737286432160804e-05, - "loss": 5.522, - "step": 32975 - }, - { - "epoch": 17.197392438070406, - "grad_norm": 1.4103248119354248, - "learning_rate": 6.737185929648241e-05, - "loss": 5.0306, - "step": 32976 - }, - { - "epoch": 17.197913950456325, - "grad_norm": 1.516087532043457, - "learning_rate": 6.737085427135679e-05, - "loss": 5.0275, - "step": 32977 - }, - { - "epoch": 17.19843546284224, - "grad_norm": 1.4929207563400269, - "learning_rate": 6.736984924623116e-05, - "loss": 5.1097, - "step": 32978 - }, - { - "epoch": 17.19895697522816, - "grad_norm": 1.4872592687606812, - "learning_rate": 6.736884422110553e-05, - "loss": 5.5908, - "step": 32979 - }, - { - "epoch": 17.19947848761408, - "grad_norm": 1.6106736660003662, - "learning_rate": 6.73678391959799e-05, - "loss": 5.2366, - "step": 32980 - }, - { - "epoch": 17.2, - "grad_norm": 1.5341112613677979, - "learning_rate": 6.736683417085428e-05, - "loss": 5.2078, - "step": 32981 - }, - { - "epoch": 17.20052151238592, - "grad_norm": 1.4327343702316284, - "learning_rate": 6.736582914572864e-05, - "loss": 5.4407, - "step": 32982 - }, - { - "epoch": 17.201043024771838, - "grad_norm": 1.5070419311523438, - "learning_rate": 6.736482412060302e-05, - "loss": 5.5602, - "step": 32983 - }, - { - "epoch": 17.201564537157758, - "grad_norm": 1.4391369819641113, - "learning_rate": 6.736381909547738e-05, - "loss": 5.6127, - "step": 32984 - }, - { - "epoch": 17.202086049543677, - "grad_norm": 1.6736966371536255, - "learning_rate": 6.736281407035176e-05, - "loss": 5.1789, - "step": 32985 - }, - { - "epoch": 17.202607561929597, - "grad_norm": 1.5302575826644897, - "learning_rate": 6.736180904522613e-05, - "loss": 5.552, - "step": 32986 - }, - { - "epoch": 17.203129074315516, - "grad_norm": 1.4787553548812866, - "learning_rate": 6.73608040201005e-05, - "loss": 5.3203, - "step": 32987 - }, - { - "epoch": 17.203650586701436, - "grad_norm": 1.4686959981918335, - "learning_rate": 6.735979899497488e-05, - "loss": 5.6412, - "step": 32988 - }, - { - "epoch": 17.204172099087355, - "grad_norm": 1.432977318763733, - "learning_rate": 6.735879396984926e-05, - "loss": 5.3903, - "step": 32989 - }, - { - "epoch": 17.20469361147327, - "grad_norm": 1.6083422899246216, - "learning_rate": 6.735778894472362e-05, - "loss": 4.789, - "step": 32990 - }, - { - "epoch": 17.20521512385919, - "grad_norm": 1.5510125160217285, - "learning_rate": 6.735678391959799e-05, - "loss": 5.6311, - "step": 32991 - }, - { - "epoch": 17.20573663624511, - "grad_norm": 1.5995690822601318, - "learning_rate": 6.735577889447237e-05, - "loss": 4.8253, - "step": 32992 - }, - { - "epoch": 17.20625814863103, - "grad_norm": 1.4871851205825806, - "learning_rate": 6.735477386934673e-05, - "loss": 5.1167, - "step": 32993 - }, - { - "epoch": 17.20677966101695, - "grad_norm": 1.4823243618011475, - "learning_rate": 6.735376884422111e-05, - "loss": 5.3402, - "step": 32994 - }, - { - "epoch": 17.20730117340287, - "grad_norm": 1.4872583150863647, - "learning_rate": 6.735276381909547e-05, - "loss": 5.5683, - "step": 32995 - }, - { - "epoch": 17.207822685788788, - "grad_norm": 1.4722498655319214, - "learning_rate": 6.735175879396985e-05, - "loss": 5.4301, - "step": 32996 - }, - { - "epoch": 17.208344198174707, - "grad_norm": 1.5104588270187378, - "learning_rate": 6.735075376884423e-05, - "loss": 5.2326, - "step": 32997 - }, - { - "epoch": 17.208865710560627, - "grad_norm": 1.416021466255188, - "learning_rate": 6.73497487437186e-05, - "loss": 5.1825, - "step": 32998 - }, - { - "epoch": 17.209387222946546, - "grad_norm": 1.6700868606567383, - "learning_rate": 6.734874371859297e-05, - "loss": 5.3422, - "step": 32999 - }, - { - "epoch": 17.209908735332466, - "grad_norm": 1.5616004467010498, - "learning_rate": 6.734773869346735e-05, - "loss": 5.4967, - "step": 33000 - }, - { - "epoch": 17.210430247718385, - "grad_norm": 1.460205078125, - "learning_rate": 6.734673366834171e-05, - "loss": 5.2729, - "step": 33001 - }, - { - "epoch": 17.2109517601043, - "grad_norm": 1.4771742820739746, - "learning_rate": 6.734572864321609e-05, - "loss": 5.2296, - "step": 33002 - }, - { - "epoch": 17.21147327249022, - "grad_norm": 1.4089467525482178, - "learning_rate": 6.734472361809045e-05, - "loss": 5.6486, - "step": 33003 - }, - { - "epoch": 17.21199478487614, - "grad_norm": 1.3384062051773071, - "learning_rate": 6.734371859296483e-05, - "loss": 5.7257, - "step": 33004 - }, - { - "epoch": 17.21251629726206, - "grad_norm": 1.4010330438613892, - "learning_rate": 6.73427135678392e-05, - "loss": 5.7266, - "step": 33005 - }, - { - "epoch": 17.21303780964798, - "grad_norm": 1.5016255378723145, - "learning_rate": 6.734170854271356e-05, - "loss": 5.3597, - "step": 33006 - }, - { - "epoch": 17.2135593220339, - "grad_norm": 1.600670576095581, - "learning_rate": 6.734070351758794e-05, - "loss": 5.0756, - "step": 33007 - }, - { - "epoch": 17.214080834419818, - "grad_norm": 1.3905823230743408, - "learning_rate": 6.733969849246232e-05, - "loss": 5.815, - "step": 33008 - }, - { - "epoch": 17.214602346805737, - "grad_norm": 1.4914847612380981, - "learning_rate": 6.73386934673367e-05, - "loss": 5.3183, - "step": 33009 - }, - { - "epoch": 17.215123859191657, - "grad_norm": 1.8379583358764648, - "learning_rate": 6.733768844221106e-05, - "loss": 4.621, - "step": 33010 - }, - { - "epoch": 17.215645371577576, - "grad_norm": 1.593700885772705, - "learning_rate": 6.733668341708544e-05, - "loss": 5.4909, - "step": 33011 - }, - { - "epoch": 17.216166883963496, - "grad_norm": 1.599282145500183, - "learning_rate": 6.73356783919598e-05, - "loss": 5.3386, - "step": 33012 - }, - { - "epoch": 17.216688396349415, - "grad_norm": 1.4589089155197144, - "learning_rate": 6.733467336683418e-05, - "loss": 5.6896, - "step": 33013 - }, - { - "epoch": 17.21720990873533, - "grad_norm": 1.5570796728134155, - "learning_rate": 6.733366834170854e-05, - "loss": 5.2422, - "step": 33014 - }, - { - "epoch": 17.21773142112125, - "grad_norm": 1.4735710620880127, - "learning_rate": 6.733266331658292e-05, - "loss": 5.0286, - "step": 33015 - }, - { - "epoch": 17.21825293350717, - "grad_norm": 1.5435670614242554, - "learning_rate": 6.733165829145728e-05, - "loss": 5.2346, - "step": 33016 - }, - { - "epoch": 17.21877444589309, - "grad_norm": 1.4969120025634766, - "learning_rate": 6.733065326633166e-05, - "loss": 5.2332, - "step": 33017 - }, - { - "epoch": 17.21929595827901, - "grad_norm": 1.5742106437683105, - "learning_rate": 6.732964824120604e-05, - "loss": 5.465, - "step": 33018 - }, - { - "epoch": 17.21981747066493, - "grad_norm": 1.5299242734909058, - "learning_rate": 6.73286432160804e-05, - "loss": 4.8944, - "step": 33019 - }, - { - "epoch": 17.220338983050848, - "grad_norm": 1.469718337059021, - "learning_rate": 6.732763819095478e-05, - "loss": 5.5035, - "step": 33020 - }, - { - "epoch": 17.220860495436767, - "grad_norm": 1.5592552423477173, - "learning_rate": 6.732663316582915e-05, - "loss": 4.7859, - "step": 33021 - }, - { - "epoch": 17.221382007822687, - "grad_norm": 1.411392092704773, - "learning_rate": 6.732562814070352e-05, - "loss": 5.5939, - "step": 33022 - }, - { - "epoch": 17.221903520208606, - "grad_norm": 1.4857913255691528, - "learning_rate": 6.732462311557789e-05, - "loss": 5.4589, - "step": 33023 - }, - { - "epoch": 17.222425032594526, - "grad_norm": 1.519877314567566, - "learning_rate": 6.732361809045227e-05, - "loss": 5.3058, - "step": 33024 - }, - { - "epoch": 17.22294654498044, - "grad_norm": 1.6130982637405396, - "learning_rate": 6.732261306532663e-05, - "loss": 5.4047, - "step": 33025 - }, - { - "epoch": 17.22346805736636, - "grad_norm": 1.6669623851776123, - "learning_rate": 6.732160804020101e-05, - "loss": 5.0843, - "step": 33026 - }, - { - "epoch": 17.22398956975228, - "grad_norm": 1.542178750038147, - "learning_rate": 6.732060301507537e-05, - "loss": 4.9327, - "step": 33027 - }, - { - "epoch": 17.2245110821382, - "grad_norm": 1.5398890972137451, - "learning_rate": 6.731959798994975e-05, - "loss": 5.2641, - "step": 33028 - }, - { - "epoch": 17.22503259452412, - "grad_norm": 1.564886450767517, - "learning_rate": 6.731859296482413e-05, - "loss": 5.2447, - "step": 33029 - }, - { - "epoch": 17.22555410691004, - "grad_norm": 1.4806571006774902, - "learning_rate": 6.73175879396985e-05, - "loss": 5.4506, - "step": 33030 - }, - { - "epoch": 17.22607561929596, - "grad_norm": 1.513751745223999, - "learning_rate": 6.731658291457287e-05, - "loss": 5.2669, - "step": 33031 - }, - { - "epoch": 17.226597131681878, - "grad_norm": 1.4606151580810547, - "learning_rate": 6.731557788944723e-05, - "loss": 5.3583, - "step": 33032 - }, - { - "epoch": 17.227118644067797, - "grad_norm": 1.5345149040222168, - "learning_rate": 6.731457286432161e-05, - "loss": 5.5676, - "step": 33033 - }, - { - "epoch": 17.227640156453717, - "grad_norm": 1.569079875946045, - "learning_rate": 6.731356783919598e-05, - "loss": 5.0151, - "step": 33034 - }, - { - "epoch": 17.228161668839636, - "grad_norm": 1.4353673458099365, - "learning_rate": 6.731256281407035e-05, - "loss": 5.5836, - "step": 33035 - }, - { - "epoch": 17.228683181225556, - "grad_norm": 1.5186940431594849, - "learning_rate": 6.731155778894472e-05, - "loss": 5.2923, - "step": 33036 - }, - { - "epoch": 17.22920469361147, - "grad_norm": 1.5227168798446655, - "learning_rate": 6.73105527638191e-05, - "loss": 5.2045, - "step": 33037 - }, - { - "epoch": 17.22972620599739, - "grad_norm": 1.5670908689498901, - "learning_rate": 6.730954773869347e-05, - "loss": 5.3307, - "step": 33038 - }, - { - "epoch": 17.23024771838331, - "grad_norm": 1.5845212936401367, - "learning_rate": 6.730854271356785e-05, - "loss": 5.5631, - "step": 33039 - }, - { - "epoch": 17.23076923076923, - "grad_norm": 1.5276299715042114, - "learning_rate": 6.730753768844222e-05, - "loss": 4.9217, - "step": 33040 - }, - { - "epoch": 17.23129074315515, - "grad_norm": 1.451278805732727, - "learning_rate": 6.73065326633166e-05, - "loss": 5.1013, - "step": 33041 - }, - { - "epoch": 17.23181225554107, - "grad_norm": 1.4404869079589844, - "learning_rate": 6.730552763819096e-05, - "loss": 5.1484, - "step": 33042 - }, - { - "epoch": 17.23233376792699, - "grad_norm": 1.5303912162780762, - "learning_rate": 6.730452261306534e-05, - "loss": 4.9234, - "step": 33043 - }, - { - "epoch": 17.232855280312908, - "grad_norm": 1.5982868671417236, - "learning_rate": 6.73035175879397e-05, - "loss": 5.0624, - "step": 33044 - }, - { - "epoch": 17.233376792698827, - "grad_norm": 1.5918546915054321, - "learning_rate": 6.730251256281406e-05, - "loss": 5.2703, - "step": 33045 - }, - { - "epoch": 17.233898305084747, - "grad_norm": 1.5596381425857544, - "learning_rate": 6.730150753768844e-05, - "loss": 5.6807, - "step": 33046 - }, - { - "epoch": 17.234419817470666, - "grad_norm": 1.387915849685669, - "learning_rate": 6.730050251256281e-05, - "loss": 5.7737, - "step": 33047 - }, - { - "epoch": 17.234941329856586, - "grad_norm": 1.5169873237609863, - "learning_rate": 6.729949748743718e-05, - "loss": 5.2902, - "step": 33048 - }, - { - "epoch": 17.2354628422425, - "grad_norm": 1.3742892742156982, - "learning_rate": 6.729849246231156e-05, - "loss": 5.6768, - "step": 33049 - }, - { - "epoch": 17.23598435462842, - "grad_norm": 1.4264678955078125, - "learning_rate": 6.729748743718594e-05, - "loss": 5.4213, - "step": 33050 - }, - { - "epoch": 17.23650586701434, - "grad_norm": 1.6054863929748535, - "learning_rate": 6.72964824120603e-05, - "loss": 4.9492, - "step": 33051 - }, - { - "epoch": 17.23702737940026, - "grad_norm": 1.4192224740982056, - "learning_rate": 6.729547738693468e-05, - "loss": 5.1617, - "step": 33052 - }, - { - "epoch": 17.23754889178618, - "grad_norm": 1.5397831201553345, - "learning_rate": 6.729447236180905e-05, - "loss": 5.5401, - "step": 33053 - }, - { - "epoch": 17.2380704041721, - "grad_norm": 1.678920865058899, - "learning_rate": 6.729346733668342e-05, - "loss": 5.1019, - "step": 33054 - }, - { - "epoch": 17.23859191655802, - "grad_norm": 1.5028958320617676, - "learning_rate": 6.729246231155779e-05, - "loss": 5.1992, - "step": 33055 - }, - { - "epoch": 17.239113428943938, - "grad_norm": 1.434629201889038, - "learning_rate": 6.729145728643217e-05, - "loss": 5.2653, - "step": 33056 - }, - { - "epoch": 17.239634941329857, - "grad_norm": 1.583923578262329, - "learning_rate": 6.729045226130653e-05, - "loss": 4.7602, - "step": 33057 - }, - { - "epoch": 17.240156453715777, - "grad_norm": 1.4719865322113037, - "learning_rate": 6.728944723618091e-05, - "loss": 5.7704, - "step": 33058 - }, - { - "epoch": 17.240677966101696, - "grad_norm": 1.5597851276397705, - "learning_rate": 6.728844221105529e-05, - "loss": 5.614, - "step": 33059 - }, - { - "epoch": 17.241199478487616, - "grad_norm": 1.4889317750930786, - "learning_rate": 6.728743718592965e-05, - "loss": 5.5494, - "step": 33060 - }, - { - "epoch": 17.24172099087353, - "grad_norm": 1.3837171792984009, - "learning_rate": 6.728643216080403e-05, - "loss": 5.5966, - "step": 33061 - }, - { - "epoch": 17.24224250325945, - "grad_norm": 1.4758646488189697, - "learning_rate": 6.728542713567839e-05, - "loss": 5.4834, - "step": 33062 - }, - { - "epoch": 17.24276401564537, - "grad_norm": 1.519622564315796, - "learning_rate": 6.728442211055277e-05, - "loss": 5.1589, - "step": 33063 - }, - { - "epoch": 17.24328552803129, - "grad_norm": 1.5133532285690308, - "learning_rate": 6.728341708542714e-05, - "loss": 5.2835, - "step": 33064 - }, - { - "epoch": 17.24380704041721, - "grad_norm": 1.5009416341781616, - "learning_rate": 6.728241206030151e-05, - "loss": 5.5358, - "step": 33065 - }, - { - "epoch": 17.24432855280313, - "grad_norm": 1.500701904296875, - "learning_rate": 6.728140703517588e-05, - "loss": 5.1598, - "step": 33066 - }, - { - "epoch": 17.24485006518905, - "grad_norm": 1.5384846925735474, - "learning_rate": 6.728040201005026e-05, - "loss": 5.147, - "step": 33067 - }, - { - "epoch": 17.245371577574968, - "grad_norm": 1.5217021703720093, - "learning_rate": 6.727939698492462e-05, - "loss": 5.0667, - "step": 33068 - }, - { - "epoch": 17.245893089960887, - "grad_norm": 1.624153971672058, - "learning_rate": 6.7278391959799e-05, - "loss": 5.4558, - "step": 33069 - }, - { - "epoch": 17.246414602346807, - "grad_norm": 1.5640376806259155, - "learning_rate": 6.727738693467338e-05, - "loss": 5.1108, - "step": 33070 - }, - { - "epoch": 17.246936114732726, - "grad_norm": 1.4836621284484863, - "learning_rate": 6.727638190954774e-05, - "loss": 5.097, - "step": 33071 - }, - { - "epoch": 17.247457627118646, - "grad_norm": 1.447358250617981, - "learning_rate": 6.727537688442212e-05, - "loss": 5.501, - "step": 33072 - }, - { - "epoch": 17.24797913950456, - "grad_norm": 1.4946554899215698, - "learning_rate": 6.727437185929648e-05, - "loss": 5.418, - "step": 33073 - }, - { - "epoch": 17.24850065189048, - "grad_norm": 1.4443635940551758, - "learning_rate": 6.727336683417086e-05, - "loss": 5.5153, - "step": 33074 - }, - { - "epoch": 17.2490221642764, - "grad_norm": 1.5026990175247192, - "learning_rate": 6.727236180904522e-05, - "loss": 5.544, - "step": 33075 - }, - { - "epoch": 17.24954367666232, - "grad_norm": 1.4703046083450317, - "learning_rate": 6.72713567839196e-05, - "loss": 5.6513, - "step": 33076 - }, - { - "epoch": 17.25006518904824, - "grad_norm": 1.5484355688095093, - "learning_rate": 6.727035175879397e-05, - "loss": 4.8765, - "step": 33077 - }, - { - "epoch": 17.25058670143416, - "grad_norm": 1.3599857091903687, - "learning_rate": 6.726934673366834e-05, - "loss": 5.4428, - "step": 33078 - }, - { - "epoch": 17.25110821382008, - "grad_norm": 1.3206490278244019, - "learning_rate": 6.726834170854272e-05, - "loss": 4.6021, - "step": 33079 - }, - { - "epoch": 17.251629726205998, - "grad_norm": 1.498935341835022, - "learning_rate": 6.72673366834171e-05, - "loss": 5.4066, - "step": 33080 - }, - { - "epoch": 17.252151238591917, - "grad_norm": 1.537130355834961, - "learning_rate": 6.726633165829146e-05, - "loss": 5.607, - "step": 33081 - }, - { - "epoch": 17.252672750977837, - "grad_norm": 1.6036616563796997, - "learning_rate": 6.726532663316584e-05, - "loss": 4.3552, - "step": 33082 - }, - { - "epoch": 17.253194263363756, - "grad_norm": 1.571157693862915, - "learning_rate": 6.72643216080402e-05, - "loss": 5.1717, - "step": 33083 - }, - { - "epoch": 17.253715775749676, - "grad_norm": 1.4563874006271362, - "learning_rate": 6.726331658291457e-05, - "loss": 5.3483, - "step": 33084 - }, - { - "epoch": 17.25423728813559, - "grad_norm": 1.4639335870742798, - "learning_rate": 6.726231155778895e-05, - "loss": 5.4082, - "step": 33085 - }, - { - "epoch": 17.25475880052151, - "grad_norm": 1.5027341842651367, - "learning_rate": 6.726130653266331e-05, - "loss": 5.2037, - "step": 33086 - }, - { - "epoch": 17.25528031290743, - "grad_norm": 1.6195493936538696, - "learning_rate": 6.726030150753769e-05, - "loss": 5.5882, - "step": 33087 - }, - { - "epoch": 17.25580182529335, - "grad_norm": 1.470340609550476, - "learning_rate": 6.725929648241205e-05, - "loss": 5.2375, - "step": 33088 - }, - { - "epoch": 17.25632333767927, - "grad_norm": 1.4694015979766846, - "learning_rate": 6.725829145728643e-05, - "loss": 5.4597, - "step": 33089 - }, - { - "epoch": 17.25684485006519, - "grad_norm": 1.4675050973892212, - "learning_rate": 6.725728643216081e-05, - "loss": 5.1999, - "step": 33090 - }, - { - "epoch": 17.25736636245111, - "grad_norm": 1.4413299560546875, - "learning_rate": 6.725628140703519e-05, - "loss": 5.5012, - "step": 33091 - }, - { - "epoch": 17.257887874837028, - "grad_norm": 1.450779914855957, - "learning_rate": 6.725527638190955e-05, - "loss": 5.6489, - "step": 33092 - }, - { - "epoch": 17.258409387222947, - "grad_norm": 1.3951330184936523, - "learning_rate": 6.725427135678393e-05, - "loss": 5.6849, - "step": 33093 - }, - { - "epoch": 17.258930899608867, - "grad_norm": 1.4722871780395508, - "learning_rate": 6.72532663316583e-05, - "loss": 5.5087, - "step": 33094 - }, - { - "epoch": 17.259452411994786, - "grad_norm": 1.42384934425354, - "learning_rate": 6.725226130653267e-05, - "loss": 5.4987, - "step": 33095 - }, - { - "epoch": 17.259973924380706, - "grad_norm": 1.4726128578186035, - "learning_rate": 6.725125628140704e-05, - "loss": 5.3534, - "step": 33096 - }, - { - "epoch": 17.26049543676662, - "grad_norm": 1.3934811353683472, - "learning_rate": 6.725025125628141e-05, - "loss": 5.2719, - "step": 33097 - }, - { - "epoch": 17.26101694915254, - "grad_norm": 1.6118996143341064, - "learning_rate": 6.724924623115578e-05, - "loss": 4.9341, - "step": 33098 - }, - { - "epoch": 17.26153846153846, - "grad_norm": 1.5051982402801514, - "learning_rate": 6.724824120603016e-05, - "loss": 4.9932, - "step": 33099 - }, - { - "epoch": 17.26205997392438, - "grad_norm": 1.5010708570480347, - "learning_rate": 6.724723618090453e-05, - "loss": 5.5156, - "step": 33100 - }, - { - "epoch": 17.2625814863103, - "grad_norm": 1.4464389085769653, - "learning_rate": 6.72462311557789e-05, - "loss": 5.5339, - "step": 33101 - }, - { - "epoch": 17.26310299869622, - "grad_norm": 1.452811360359192, - "learning_rate": 6.724522613065328e-05, - "loss": 5.3761, - "step": 33102 - }, - { - "epoch": 17.26362451108214, - "grad_norm": 1.4266804456710815, - "learning_rate": 6.724422110552764e-05, - "loss": 5.4284, - "step": 33103 - }, - { - "epoch": 17.264146023468058, - "grad_norm": 1.4337427616119385, - "learning_rate": 6.724321608040202e-05, - "loss": 5.4107, - "step": 33104 - }, - { - "epoch": 17.264667535853977, - "grad_norm": 1.3999773263931274, - "learning_rate": 6.724221105527638e-05, - "loss": 5.181, - "step": 33105 - }, - { - "epoch": 17.265189048239897, - "grad_norm": 1.4316717386245728, - "learning_rate": 6.724120603015076e-05, - "loss": 5.7232, - "step": 33106 - }, - { - "epoch": 17.265710560625816, - "grad_norm": 1.5130348205566406, - "learning_rate": 6.724020100502512e-05, - "loss": 5.6844, - "step": 33107 - }, - { - "epoch": 17.266232073011736, - "grad_norm": 1.507133960723877, - "learning_rate": 6.72391959798995e-05, - "loss": 5.0935, - "step": 33108 - }, - { - "epoch": 17.26675358539765, - "grad_norm": 1.3988518714904785, - "learning_rate": 6.723819095477387e-05, - "loss": 5.3655, - "step": 33109 - }, - { - "epoch": 17.26727509778357, - "grad_norm": 1.4704922437667847, - "learning_rate": 6.723718592964824e-05, - "loss": 5.4655, - "step": 33110 - }, - { - "epoch": 17.26779661016949, - "grad_norm": 1.5172432661056519, - "learning_rate": 6.723618090452262e-05, - "loss": 5.5026, - "step": 33111 - }, - { - "epoch": 17.26831812255541, - "grad_norm": 1.5471817255020142, - "learning_rate": 6.723517587939699e-05, - "loss": 5.2943, - "step": 33112 - }, - { - "epoch": 17.26883963494133, - "grad_norm": 1.468357801437378, - "learning_rate": 6.723417085427136e-05, - "loss": 5.4901, - "step": 33113 - }, - { - "epoch": 17.26936114732725, - "grad_norm": 1.3486647605895996, - "learning_rate": 6.723316582914573e-05, - "loss": 4.85, - "step": 33114 - }, - { - "epoch": 17.26988265971317, - "grad_norm": 1.4096494913101196, - "learning_rate": 6.72321608040201e-05, - "loss": 5.4003, - "step": 33115 - }, - { - "epoch": 17.270404172099088, - "grad_norm": 1.6548469066619873, - "learning_rate": 6.723115577889447e-05, - "loss": 5.4647, - "step": 33116 - }, - { - "epoch": 17.270925684485007, - "grad_norm": 1.4954118728637695, - "learning_rate": 6.723015075376885e-05, - "loss": 5.6496, - "step": 33117 - }, - { - "epoch": 17.271447196870927, - "grad_norm": 1.5319968461990356, - "learning_rate": 6.722914572864321e-05, - "loss": 4.7432, - "step": 33118 - }, - { - "epoch": 17.271968709256846, - "grad_norm": 1.5681389570236206, - "learning_rate": 6.722814070351759e-05, - "loss": 5.2887, - "step": 33119 - }, - { - "epoch": 17.272490221642762, - "grad_norm": 1.5804660320281982, - "learning_rate": 6.722713567839195e-05, - "loss": 5.4533, - "step": 33120 - }, - { - "epoch": 17.27301173402868, - "grad_norm": 1.593161702156067, - "learning_rate": 6.722613065326633e-05, - "loss": 4.7542, - "step": 33121 - }, - { - "epoch": 17.2735332464146, - "grad_norm": 1.577492117881775, - "learning_rate": 6.722512562814071e-05, - "loss": 5.1895, - "step": 33122 - }, - { - "epoch": 17.27405475880052, - "grad_norm": 1.5749127864837646, - "learning_rate": 6.722412060301509e-05, - "loss": 5.0513, - "step": 33123 - }, - { - "epoch": 17.27457627118644, - "grad_norm": 1.4444574117660522, - "learning_rate": 6.722311557788945e-05, - "loss": 5.2353, - "step": 33124 - }, - { - "epoch": 17.27509778357236, - "grad_norm": 1.4535558223724365, - "learning_rate": 6.722211055276382e-05, - "loss": 5.4569, - "step": 33125 - }, - { - "epoch": 17.27561929595828, - "grad_norm": 1.5269397497177124, - "learning_rate": 6.72211055276382e-05, - "loss": 5.5832, - "step": 33126 - }, - { - "epoch": 17.2761408083442, - "grad_norm": 1.4970554113388062, - "learning_rate": 6.722010050251256e-05, - "loss": 5.2811, - "step": 33127 - }, - { - "epoch": 17.276662320730118, - "grad_norm": 1.4499982595443726, - "learning_rate": 6.721909547738694e-05, - "loss": 5.112, - "step": 33128 - }, - { - "epoch": 17.277183833116037, - "grad_norm": 1.5835902690887451, - "learning_rate": 6.72180904522613e-05, - "loss": 5.5609, - "step": 33129 - }, - { - "epoch": 17.277705345501957, - "grad_norm": 1.4528114795684814, - "learning_rate": 6.721708542713568e-05, - "loss": 5.4103, - "step": 33130 - }, - { - "epoch": 17.278226857887876, - "grad_norm": 1.4258383512496948, - "learning_rate": 6.721608040201006e-05, - "loss": 5.5491, - "step": 33131 - }, - { - "epoch": 17.278748370273792, - "grad_norm": 1.4439412355422974, - "learning_rate": 6.721507537688443e-05, - "loss": 5.6825, - "step": 33132 - }, - { - "epoch": 17.27926988265971, - "grad_norm": 1.5801351070404053, - "learning_rate": 6.72140703517588e-05, - "loss": 4.8651, - "step": 33133 - }, - { - "epoch": 17.27979139504563, - "grad_norm": 1.485285758972168, - "learning_rate": 6.721306532663318e-05, - "loss": 4.4824, - "step": 33134 - }, - { - "epoch": 17.28031290743155, - "grad_norm": 1.4575996398925781, - "learning_rate": 6.721206030150754e-05, - "loss": 5.4403, - "step": 33135 - }, - { - "epoch": 17.28083441981747, - "grad_norm": 1.5435138940811157, - "learning_rate": 6.721105527638192e-05, - "loss": 5.1151, - "step": 33136 - }, - { - "epoch": 17.28135593220339, - "grad_norm": 1.4718621969223022, - "learning_rate": 6.721005025125628e-05, - "loss": 5.0595, - "step": 33137 - }, - { - "epoch": 17.28187744458931, - "grad_norm": 1.5386159420013428, - "learning_rate": 6.720904522613065e-05, - "loss": 5.1874, - "step": 33138 - }, - { - "epoch": 17.28239895697523, - "grad_norm": 1.4540226459503174, - "learning_rate": 6.720804020100503e-05, - "loss": 5.1577, - "step": 33139 - }, - { - "epoch": 17.282920469361148, - "grad_norm": 1.475630521774292, - "learning_rate": 6.720703517587939e-05, - "loss": 4.8422, - "step": 33140 - }, - { - "epoch": 17.283441981747067, - "grad_norm": 1.6125669479370117, - "learning_rate": 6.720603015075377e-05, - "loss": 4.4939, - "step": 33141 - }, - { - "epoch": 17.283963494132987, - "grad_norm": 1.4516061544418335, - "learning_rate": 6.720502512562815e-05, - "loss": 5.4468, - "step": 33142 - }, - { - "epoch": 17.284485006518906, - "grad_norm": 1.5348562002182007, - "learning_rate": 6.720402010050252e-05, - "loss": 5.2769, - "step": 33143 - }, - { - "epoch": 17.285006518904822, - "grad_norm": 1.4671552181243896, - "learning_rate": 6.720301507537689e-05, - "loss": 5.545, - "step": 33144 - }, - { - "epoch": 17.285528031290742, - "grad_norm": 1.4223171472549438, - "learning_rate": 6.720201005025127e-05, - "loss": 5.734, - "step": 33145 - }, - { - "epoch": 17.28604954367666, - "grad_norm": 1.5763932466506958, - "learning_rate": 6.720100502512563e-05, - "loss": 5.3947, - "step": 33146 - }, - { - "epoch": 17.28657105606258, - "grad_norm": 1.5405575037002563, - "learning_rate": 6.720000000000001e-05, - "loss": 5.3235, - "step": 33147 - }, - { - "epoch": 17.2870925684485, - "grad_norm": 1.5947425365447998, - "learning_rate": 6.719899497487437e-05, - "loss": 5.3613, - "step": 33148 - }, - { - "epoch": 17.28761408083442, - "grad_norm": 1.4649916887283325, - "learning_rate": 6.719798994974875e-05, - "loss": 5.5909, - "step": 33149 - }, - { - "epoch": 17.28813559322034, - "grad_norm": 1.4919161796569824, - "learning_rate": 6.719698492462311e-05, - "loss": 5.4265, - "step": 33150 - }, - { - "epoch": 17.28865710560626, - "grad_norm": 1.5208284854888916, - "learning_rate": 6.719597989949749e-05, - "loss": 5.2367, - "step": 33151 - }, - { - "epoch": 17.289178617992178, - "grad_norm": 1.462421178817749, - "learning_rate": 6.719497487437187e-05, - "loss": 5.0673, - "step": 33152 - }, - { - "epoch": 17.289700130378097, - "grad_norm": 1.5937293767929077, - "learning_rate": 6.719396984924623e-05, - "loss": 4.9681, - "step": 33153 - }, - { - "epoch": 17.290221642764017, - "grad_norm": 1.513319492340088, - "learning_rate": 6.719296482412061e-05, - "loss": 5.2845, - "step": 33154 - }, - { - "epoch": 17.290743155149936, - "grad_norm": 1.570441722869873, - "learning_rate": 6.719195979899498e-05, - "loss": 5.2329, - "step": 33155 - }, - { - "epoch": 17.291264667535852, - "grad_norm": 1.4280297756195068, - "learning_rate": 6.719095477386935e-05, - "loss": 5.57, - "step": 33156 - }, - { - "epoch": 17.291786179921772, - "grad_norm": 1.588638186454773, - "learning_rate": 6.718994974874372e-05, - "loss": 5.0189, - "step": 33157 - }, - { - "epoch": 17.29230769230769, - "grad_norm": 1.5735361576080322, - "learning_rate": 6.71889447236181e-05, - "loss": 5.2035, - "step": 33158 - }, - { - "epoch": 17.29282920469361, - "grad_norm": 1.6605991125106812, - "learning_rate": 6.718793969849246e-05, - "loss": 5.3493, - "step": 33159 - }, - { - "epoch": 17.29335071707953, - "grad_norm": 1.5678064823150635, - "learning_rate": 6.718693467336684e-05, - "loss": 5.3412, - "step": 33160 - }, - { - "epoch": 17.29387222946545, - "grad_norm": 1.3842918872833252, - "learning_rate": 6.71859296482412e-05, - "loss": 4.9148, - "step": 33161 - }, - { - "epoch": 17.29439374185137, - "grad_norm": 1.4697318077087402, - "learning_rate": 6.718492462311558e-05, - "loss": 5.0472, - "step": 33162 - }, - { - "epoch": 17.29491525423729, - "grad_norm": 1.5619704723358154, - "learning_rate": 6.718391959798996e-05, - "loss": 4.5757, - "step": 33163 - }, - { - "epoch": 17.295436766623208, - "grad_norm": 1.530806303024292, - "learning_rate": 6.718291457286432e-05, - "loss": 5.1879, - "step": 33164 - }, - { - "epoch": 17.295958279009128, - "grad_norm": 1.4949417114257812, - "learning_rate": 6.71819095477387e-05, - "loss": 4.8458, - "step": 33165 - }, - { - "epoch": 17.296479791395047, - "grad_norm": 1.6334277391433716, - "learning_rate": 6.718090452261306e-05, - "loss": 5.4912, - "step": 33166 - }, - { - "epoch": 17.297001303780966, - "grad_norm": 1.492998480796814, - "learning_rate": 6.717989949748744e-05, - "loss": 5.406, - "step": 33167 - }, - { - "epoch": 17.297522816166882, - "grad_norm": 1.4616643190383911, - "learning_rate": 6.71788944723618e-05, - "loss": 5.2367, - "step": 33168 - }, - { - "epoch": 17.298044328552802, - "grad_norm": 1.3032419681549072, - "learning_rate": 6.717788944723618e-05, - "loss": 4.8502, - "step": 33169 - }, - { - "epoch": 17.29856584093872, - "grad_norm": 1.5514568090438843, - "learning_rate": 6.717688442211055e-05, - "loss": 5.2081, - "step": 33170 - }, - { - "epoch": 17.29908735332464, - "grad_norm": 1.6480389833450317, - "learning_rate": 6.717587939698493e-05, - "loss": 5.2, - "step": 33171 - }, - { - "epoch": 17.29960886571056, - "grad_norm": 1.4397997856140137, - "learning_rate": 6.71748743718593e-05, - "loss": 5.6867, - "step": 33172 - }, - { - "epoch": 17.30013037809648, - "grad_norm": 1.515960454940796, - "learning_rate": 6.717386934673368e-05, - "loss": 5.4481, - "step": 33173 - }, - { - "epoch": 17.3006518904824, - "grad_norm": 1.4769114255905151, - "learning_rate": 6.717286432160805e-05, - "loss": 5.2841, - "step": 33174 - }, - { - "epoch": 17.30117340286832, - "grad_norm": 1.6379542350769043, - "learning_rate": 6.717185929648242e-05, - "loss": 4.9416, - "step": 33175 - }, - { - "epoch": 17.301694915254238, - "grad_norm": 1.3936405181884766, - "learning_rate": 6.717085427135679e-05, - "loss": 5.6795, - "step": 33176 - }, - { - "epoch": 17.302216427640158, - "grad_norm": 1.4576603174209595, - "learning_rate": 6.716984924623115e-05, - "loss": 4.9436, - "step": 33177 - }, - { - "epoch": 17.302737940026077, - "grad_norm": 1.5046980381011963, - "learning_rate": 6.716884422110553e-05, - "loss": 5.4864, - "step": 33178 - }, - { - "epoch": 17.303259452411996, - "grad_norm": 1.546419382095337, - "learning_rate": 6.71678391959799e-05, - "loss": 4.5243, - "step": 33179 - }, - { - "epoch": 17.303780964797912, - "grad_norm": 1.6308013200759888, - "learning_rate": 6.716683417085427e-05, - "loss": 5.6363, - "step": 33180 - }, - { - "epoch": 17.304302477183832, - "grad_norm": 1.5208423137664795, - "learning_rate": 6.716582914572864e-05, - "loss": 5.4821, - "step": 33181 - }, - { - "epoch": 17.30482398956975, - "grad_norm": 1.4783250093460083, - "learning_rate": 6.716482412060301e-05, - "loss": 5.5555, - "step": 33182 - }, - { - "epoch": 17.30534550195567, - "grad_norm": 1.5645936727523804, - "learning_rate": 6.716381909547739e-05, - "loss": 5.428, - "step": 33183 - }, - { - "epoch": 17.30586701434159, - "grad_norm": 1.579830288887024, - "learning_rate": 6.716281407035177e-05, - "loss": 5.6782, - "step": 33184 - }, - { - "epoch": 17.30638852672751, - "grad_norm": 1.605069637298584, - "learning_rate": 6.716180904522613e-05, - "loss": 4.8672, - "step": 33185 - }, - { - "epoch": 17.30691003911343, - "grad_norm": 1.4614670276641846, - "learning_rate": 6.716080402010051e-05, - "loss": 5.2024, - "step": 33186 - }, - { - "epoch": 17.30743155149935, - "grad_norm": 1.3892738819122314, - "learning_rate": 6.715979899497488e-05, - "loss": 5.7253, - "step": 33187 - }, - { - "epoch": 17.307953063885268, - "grad_norm": 1.490455985069275, - "learning_rate": 6.715879396984925e-05, - "loss": 5.0238, - "step": 33188 - }, - { - "epoch": 17.308474576271188, - "grad_norm": 1.476836085319519, - "learning_rate": 6.715778894472362e-05, - "loss": 5.3054, - "step": 33189 - }, - { - "epoch": 17.308996088657107, - "grad_norm": 1.5626593828201294, - "learning_rate": 6.7156783919598e-05, - "loss": 5.1933, - "step": 33190 - }, - { - "epoch": 17.309517601043027, - "grad_norm": 1.4611499309539795, - "learning_rate": 6.715577889447236e-05, - "loss": 5.4942, - "step": 33191 - }, - { - "epoch": 17.310039113428942, - "grad_norm": 1.4913069009780884, - "learning_rate": 6.715477386934674e-05, - "loss": 5.6672, - "step": 33192 - }, - { - "epoch": 17.310560625814862, - "grad_norm": 1.617273211479187, - "learning_rate": 6.715376884422112e-05, - "loss": 5.1062, - "step": 33193 - }, - { - "epoch": 17.31108213820078, - "grad_norm": 1.4564327001571655, - "learning_rate": 6.715276381909548e-05, - "loss": 5.2344, - "step": 33194 - }, - { - "epoch": 17.3116036505867, - "grad_norm": 1.599757194519043, - "learning_rate": 6.715175879396986e-05, - "loss": 5.462, - "step": 33195 - }, - { - "epoch": 17.31212516297262, - "grad_norm": 1.5887442827224731, - "learning_rate": 6.715075376884422e-05, - "loss": 4.7409, - "step": 33196 - }, - { - "epoch": 17.31264667535854, - "grad_norm": 1.4789084196090698, - "learning_rate": 6.71497487437186e-05, - "loss": 5.8818, - "step": 33197 - }, - { - "epoch": 17.31316818774446, - "grad_norm": 1.5181585550308228, - "learning_rate": 6.714874371859296e-05, - "loss": 4.9669, - "step": 33198 - }, - { - "epoch": 17.31368970013038, - "grad_norm": 1.5422979593276978, - "learning_rate": 6.714773869346734e-05, - "loss": 5.3478, - "step": 33199 - }, - { - "epoch": 17.314211212516298, - "grad_norm": 1.5148438215255737, - "learning_rate": 6.71467336683417e-05, - "loss": 4.943, - "step": 33200 - }, - { - "epoch": 17.314732724902218, - "grad_norm": 1.4661107063293457, - "learning_rate": 6.714572864321608e-05, - "loss": 5.4885, - "step": 33201 - }, - { - "epoch": 17.315254237288137, - "grad_norm": 1.4110069274902344, - "learning_rate": 6.714472361809045e-05, - "loss": 5.403, - "step": 33202 - }, - { - "epoch": 17.315775749674053, - "grad_norm": 1.3942290544509888, - "learning_rate": 6.714371859296483e-05, - "loss": 5.5083, - "step": 33203 - }, - { - "epoch": 17.316297262059972, - "grad_norm": 1.659695029258728, - "learning_rate": 6.71427135678392e-05, - "loss": 5.3198, - "step": 33204 - }, - { - "epoch": 17.316818774445892, - "grad_norm": 1.5211834907531738, - "learning_rate": 6.714170854271357e-05, - "loss": 5.5309, - "step": 33205 - }, - { - "epoch": 17.31734028683181, - "grad_norm": 1.4818496704101562, - "learning_rate": 6.714070351758795e-05, - "loss": 5.6084, - "step": 33206 - }, - { - "epoch": 17.31786179921773, - "grad_norm": 1.4255462884902954, - "learning_rate": 6.713969849246231e-05, - "loss": 5.6652, - "step": 33207 - }, - { - "epoch": 17.31838331160365, - "grad_norm": 1.5564340353012085, - "learning_rate": 6.713869346733669e-05, - "loss": 5.3376, - "step": 33208 - }, - { - "epoch": 17.31890482398957, - "grad_norm": 1.4262070655822754, - "learning_rate": 6.713768844221105e-05, - "loss": 5.5301, - "step": 33209 - }, - { - "epoch": 17.31942633637549, - "grad_norm": 1.4322673082351685, - "learning_rate": 6.713668341708543e-05, - "loss": 5.5563, - "step": 33210 - }, - { - "epoch": 17.31994784876141, - "grad_norm": 1.450416922569275, - "learning_rate": 6.71356783919598e-05, - "loss": 5.3191, - "step": 33211 - }, - { - "epoch": 17.320469361147328, - "grad_norm": 1.4206316471099854, - "learning_rate": 6.713467336683417e-05, - "loss": 5.2099, - "step": 33212 - }, - { - "epoch": 17.320990873533248, - "grad_norm": 1.3772287368774414, - "learning_rate": 6.713366834170855e-05, - "loss": 5.8706, - "step": 33213 - }, - { - "epoch": 17.321512385919167, - "grad_norm": 1.4742846488952637, - "learning_rate": 6.713266331658293e-05, - "loss": 5.4429, - "step": 33214 - }, - { - "epoch": 17.322033898305083, - "grad_norm": 1.4918235540390015, - "learning_rate": 6.713165829145729e-05, - "loss": 5.1708, - "step": 33215 - }, - { - "epoch": 17.322555410691002, - "grad_norm": 1.4645241498947144, - "learning_rate": 6.713065326633167e-05, - "loss": 5.6119, - "step": 33216 - }, - { - "epoch": 17.323076923076922, - "grad_norm": 1.4755951166152954, - "learning_rate": 6.712964824120603e-05, - "loss": 5.665, - "step": 33217 - }, - { - "epoch": 17.32359843546284, - "grad_norm": 1.4705549478530884, - "learning_rate": 6.71286432160804e-05, - "loss": 5.5169, - "step": 33218 - }, - { - "epoch": 17.32411994784876, - "grad_norm": 1.5725562572479248, - "learning_rate": 6.712763819095478e-05, - "loss": 4.893, - "step": 33219 - }, - { - "epoch": 17.32464146023468, - "grad_norm": 1.4593894481658936, - "learning_rate": 6.712663316582914e-05, - "loss": 5.9263, - "step": 33220 - }, - { - "epoch": 17.3251629726206, - "grad_norm": 1.4249464273452759, - "learning_rate": 6.712562814070352e-05, - "loss": 5.1909, - "step": 33221 - }, - { - "epoch": 17.32568448500652, - "grad_norm": 1.4653352499008179, - "learning_rate": 6.712462311557788e-05, - "loss": 4.9265, - "step": 33222 - }, - { - "epoch": 17.32620599739244, - "grad_norm": 1.4481717348098755, - "learning_rate": 6.712361809045226e-05, - "loss": 5.4974, - "step": 33223 - }, - { - "epoch": 17.326727509778358, - "grad_norm": 1.5504660606384277, - "learning_rate": 6.712261306532664e-05, - "loss": 5.4364, - "step": 33224 - }, - { - "epoch": 17.327249022164278, - "grad_norm": 1.452991247177124, - "learning_rate": 6.712160804020102e-05, - "loss": 5.2479, - "step": 33225 - }, - { - "epoch": 17.327770534550197, - "grad_norm": 1.496942162513733, - "learning_rate": 6.712060301507538e-05, - "loss": 5.4417, - "step": 33226 - }, - { - "epoch": 17.328292046936113, - "grad_norm": 1.5147522687911987, - "learning_rate": 6.711959798994976e-05, - "loss": 5.3122, - "step": 33227 - }, - { - "epoch": 17.328813559322032, - "grad_norm": 1.490186333656311, - "learning_rate": 6.711859296482412e-05, - "loss": 5.4852, - "step": 33228 - }, - { - "epoch": 17.329335071707952, - "grad_norm": 1.4018722772598267, - "learning_rate": 6.71175879396985e-05, - "loss": 5.7745, - "step": 33229 - }, - { - "epoch": 17.32985658409387, - "grad_norm": 1.5242911577224731, - "learning_rate": 6.711658291457287e-05, - "loss": 5.1237, - "step": 33230 - }, - { - "epoch": 17.33037809647979, - "grad_norm": 1.3994967937469482, - "learning_rate": 6.711557788944723e-05, - "loss": 4.971, - "step": 33231 - }, - { - "epoch": 17.33089960886571, - "grad_norm": 1.6286417245864868, - "learning_rate": 6.711457286432161e-05, - "loss": 5.2154, - "step": 33232 - }, - { - "epoch": 17.33142112125163, - "grad_norm": 1.5904589891433716, - "learning_rate": 6.711356783919599e-05, - "loss": 5.648, - "step": 33233 - }, - { - "epoch": 17.33194263363755, - "grad_norm": 1.531175136566162, - "learning_rate": 6.711256281407036e-05, - "loss": 5.2367, - "step": 33234 - }, - { - "epoch": 17.33246414602347, - "grad_norm": 1.4969462156295776, - "learning_rate": 6.711155778894473e-05, - "loss": 5.3353, - "step": 33235 - }, - { - "epoch": 17.332985658409388, - "grad_norm": 1.494615077972412, - "learning_rate": 6.71105527638191e-05, - "loss": 5.4642, - "step": 33236 - }, - { - "epoch": 17.333507170795308, - "grad_norm": 1.4615050554275513, - "learning_rate": 6.710954773869347e-05, - "loss": 5.5628, - "step": 33237 - }, - { - "epoch": 17.334028683181227, - "grad_norm": 1.5205121040344238, - "learning_rate": 6.710854271356785e-05, - "loss": 5.2111, - "step": 33238 - }, - { - "epoch": 17.334550195567143, - "grad_norm": 1.4522955417633057, - "learning_rate": 6.710753768844221e-05, - "loss": 5.0421, - "step": 33239 - }, - { - "epoch": 17.335071707953063, - "grad_norm": 1.5782256126403809, - "learning_rate": 6.710653266331659e-05, - "loss": 5.0426, - "step": 33240 - }, - { - "epoch": 17.335593220338982, - "grad_norm": 1.4247515201568604, - "learning_rate": 6.710552763819095e-05, - "loss": 5.2889, - "step": 33241 - }, - { - "epoch": 17.3361147327249, - "grad_norm": 1.37389075756073, - "learning_rate": 6.710452261306533e-05, - "loss": 5.4888, - "step": 33242 - }, - { - "epoch": 17.33663624511082, - "grad_norm": 1.4760550260543823, - "learning_rate": 6.71035175879397e-05, - "loss": 4.9202, - "step": 33243 - }, - { - "epoch": 17.33715775749674, - "grad_norm": 1.549900770187378, - "learning_rate": 6.710251256281407e-05, - "loss": 4.7648, - "step": 33244 - }, - { - "epoch": 17.33767926988266, - "grad_norm": 1.4825389385223389, - "learning_rate": 6.710150753768845e-05, - "loss": 5.5162, - "step": 33245 - }, - { - "epoch": 17.33820078226858, - "grad_norm": 1.4502508640289307, - "learning_rate": 6.710050251256282e-05, - "loss": 5.4234, - "step": 33246 - }, - { - "epoch": 17.3387222946545, - "grad_norm": 1.4650189876556396, - "learning_rate": 6.70994974874372e-05, - "loss": 5.312, - "step": 33247 - }, - { - "epoch": 17.33924380704042, - "grad_norm": 1.440493106842041, - "learning_rate": 6.709849246231156e-05, - "loss": 4.8152, - "step": 33248 - }, - { - "epoch": 17.339765319426338, - "grad_norm": 1.4682263135910034, - "learning_rate": 6.709748743718594e-05, - "loss": 5.5043, - "step": 33249 - }, - { - "epoch": 17.340286831812257, - "grad_norm": 1.5719629526138306, - "learning_rate": 6.70964824120603e-05, - "loss": 5.193, - "step": 33250 - }, - { - "epoch": 17.340808344198173, - "grad_norm": 1.420749545097351, - "learning_rate": 6.709547738693468e-05, - "loss": 5.5757, - "step": 33251 - }, - { - "epoch": 17.341329856584093, - "grad_norm": 1.524451732635498, - "learning_rate": 6.709447236180904e-05, - "loss": 5.0751, - "step": 33252 - }, - { - "epoch": 17.341851368970012, - "grad_norm": 1.5972613096237183, - "learning_rate": 6.709346733668342e-05, - "loss": 4.8599, - "step": 33253 - }, - { - "epoch": 17.34237288135593, - "grad_norm": 1.5457258224487305, - "learning_rate": 6.70924623115578e-05, - "loss": 5.3253, - "step": 33254 - }, - { - "epoch": 17.34289439374185, - "grad_norm": 1.469899296760559, - "learning_rate": 6.709145728643218e-05, - "loss": 5.3576, - "step": 33255 - }, - { - "epoch": 17.34341590612777, - "grad_norm": 1.4682813882827759, - "learning_rate": 6.709045226130654e-05, - "loss": 5.5739, - "step": 33256 - }, - { - "epoch": 17.34393741851369, - "grad_norm": 1.4549336433410645, - "learning_rate": 6.70894472361809e-05, - "loss": 4.8927, - "step": 33257 - }, - { - "epoch": 17.34445893089961, - "grad_norm": 1.4896405935287476, - "learning_rate": 6.708844221105528e-05, - "loss": 5.1153, - "step": 33258 - }, - { - "epoch": 17.34498044328553, - "grad_norm": 1.4678148031234741, - "learning_rate": 6.708743718592965e-05, - "loss": 5.5373, - "step": 33259 - }, - { - "epoch": 17.34550195567145, - "grad_norm": 1.540353775024414, - "learning_rate": 6.708643216080402e-05, - "loss": 5.0122, - "step": 33260 - }, - { - "epoch": 17.346023468057368, - "grad_norm": 1.4068691730499268, - "learning_rate": 6.708542713567839e-05, - "loss": 5.7613, - "step": 33261 - }, - { - "epoch": 17.346544980443287, - "grad_norm": 1.5258499383926392, - "learning_rate": 6.708442211055277e-05, - "loss": 5.6161, - "step": 33262 - }, - { - "epoch": 17.347066492829203, - "grad_norm": 1.4104093313217163, - "learning_rate": 6.708341708542713e-05, - "loss": 5.5217, - "step": 33263 - }, - { - "epoch": 17.347588005215123, - "grad_norm": 1.4445139169692993, - "learning_rate": 6.708241206030151e-05, - "loss": 5.2846, - "step": 33264 - }, - { - "epoch": 17.348109517601042, - "grad_norm": 1.3789421319961548, - "learning_rate": 6.708140703517589e-05, - "loss": 5.4516, - "step": 33265 - }, - { - "epoch": 17.34863102998696, - "grad_norm": 1.4387985467910767, - "learning_rate": 6.708040201005026e-05, - "loss": 5.3738, - "step": 33266 - }, - { - "epoch": 17.34915254237288, - "grad_norm": 1.5338661670684814, - "learning_rate": 6.707939698492463e-05, - "loss": 5.4728, - "step": 33267 - }, - { - "epoch": 17.3496740547588, - "grad_norm": 1.5096923112869263, - "learning_rate": 6.7078391959799e-05, - "loss": 5.0071, - "step": 33268 - }, - { - "epoch": 17.35019556714472, - "grad_norm": 1.5378172397613525, - "learning_rate": 6.707738693467337e-05, - "loss": 5.3125, - "step": 33269 - }, - { - "epoch": 17.35071707953064, - "grad_norm": 1.4495646953582764, - "learning_rate": 6.707638190954773e-05, - "loss": 5.6163, - "step": 33270 - }, - { - "epoch": 17.35123859191656, - "grad_norm": 1.5388805866241455, - "learning_rate": 6.707537688442211e-05, - "loss": 5.5217, - "step": 33271 - }, - { - "epoch": 17.35176010430248, - "grad_norm": 1.5537958145141602, - "learning_rate": 6.707437185929648e-05, - "loss": 5.036, - "step": 33272 - }, - { - "epoch": 17.352281616688398, - "grad_norm": 1.508648157119751, - "learning_rate": 6.707336683417085e-05, - "loss": 4.7866, - "step": 33273 - }, - { - "epoch": 17.352803129074317, - "grad_norm": 1.6014647483825684, - "learning_rate": 6.707236180904522e-05, - "loss": 5.2154, - "step": 33274 - }, - { - "epoch": 17.353324641460233, - "grad_norm": 1.486573576927185, - "learning_rate": 6.70713567839196e-05, - "loss": 5.3862, - "step": 33275 - }, - { - "epoch": 17.353846153846153, - "grad_norm": 1.4437408447265625, - "learning_rate": 6.707035175879397e-05, - "loss": 4.9848, - "step": 33276 - }, - { - "epoch": 17.354367666232072, - "grad_norm": 1.5180683135986328, - "learning_rate": 6.706934673366835e-05, - "loss": 4.8545, - "step": 33277 - }, - { - "epoch": 17.35488917861799, - "grad_norm": 1.5918524265289307, - "learning_rate": 6.706834170854272e-05, - "loss": 5.0233, - "step": 33278 - }, - { - "epoch": 17.35541069100391, - "grad_norm": 1.616398572921753, - "learning_rate": 6.70673366834171e-05, - "loss": 5.2491, - "step": 33279 - }, - { - "epoch": 17.35593220338983, - "grad_norm": 1.4517589807510376, - "learning_rate": 6.706633165829146e-05, - "loss": 5.5103, - "step": 33280 - }, - { - "epoch": 17.35645371577575, - "grad_norm": 1.4538401365280151, - "learning_rate": 6.706532663316584e-05, - "loss": 5.5721, - "step": 33281 - }, - { - "epoch": 17.35697522816167, - "grad_norm": 1.5214110612869263, - "learning_rate": 6.70643216080402e-05, - "loss": 5.0706, - "step": 33282 - }, - { - "epoch": 17.35749674054759, - "grad_norm": 1.416880488395691, - "learning_rate": 6.706331658291458e-05, - "loss": 5.7093, - "step": 33283 - }, - { - "epoch": 17.35801825293351, - "grad_norm": 1.510191798210144, - "learning_rate": 6.706231155778894e-05, - "loss": 5.043, - "step": 33284 - }, - { - "epoch": 17.358539765319428, - "grad_norm": 1.4894198179244995, - "learning_rate": 6.706130653266332e-05, - "loss": 5.545, - "step": 33285 - }, - { - "epoch": 17.359061277705347, - "grad_norm": 1.5111842155456543, - "learning_rate": 6.70603015075377e-05, - "loss": 5.2214, - "step": 33286 - }, - { - "epoch": 17.359582790091263, - "grad_norm": 1.4494503736495972, - "learning_rate": 6.705929648241206e-05, - "loss": 5.4127, - "step": 33287 - }, - { - "epoch": 17.360104302477183, - "grad_norm": 1.5593048334121704, - "learning_rate": 6.705829145728644e-05, - "loss": 5.1627, - "step": 33288 - }, - { - "epoch": 17.360625814863102, - "grad_norm": 1.528159737586975, - "learning_rate": 6.70572864321608e-05, - "loss": 5.4747, - "step": 33289 - }, - { - "epoch": 17.36114732724902, - "grad_norm": 1.5171750783920288, - "learning_rate": 6.705628140703518e-05, - "loss": 5.5792, - "step": 33290 - }, - { - "epoch": 17.36166883963494, - "grad_norm": 1.5074782371520996, - "learning_rate": 6.705527638190955e-05, - "loss": 5.158, - "step": 33291 - }, - { - "epoch": 17.36219035202086, - "grad_norm": 1.4312578439712524, - "learning_rate": 6.705427135678392e-05, - "loss": 5.337, - "step": 33292 - }, - { - "epoch": 17.36271186440678, - "grad_norm": 1.5035767555236816, - "learning_rate": 6.705326633165829e-05, - "loss": 5.4988, - "step": 33293 - }, - { - "epoch": 17.3632333767927, - "grad_norm": 1.5542173385620117, - "learning_rate": 6.705226130653267e-05, - "loss": 5.3462, - "step": 33294 - }, - { - "epoch": 17.36375488917862, - "grad_norm": 1.4892315864562988, - "learning_rate": 6.705125628140703e-05, - "loss": 5.6049, - "step": 33295 - }, - { - "epoch": 17.36427640156454, - "grad_norm": 1.4602221250534058, - "learning_rate": 6.705025125628141e-05, - "loss": 5.5096, - "step": 33296 - }, - { - "epoch": 17.364797913950458, - "grad_norm": 1.6699146032333374, - "learning_rate": 6.704924623115579e-05, - "loss": 5.3029, - "step": 33297 - }, - { - "epoch": 17.365319426336377, - "grad_norm": 1.510675311088562, - "learning_rate": 6.704824120603015e-05, - "loss": 5.2456, - "step": 33298 - }, - { - "epoch": 17.365840938722293, - "grad_norm": 1.4095947742462158, - "learning_rate": 6.704723618090453e-05, - "loss": 4.7944, - "step": 33299 - }, - { - "epoch": 17.366362451108213, - "grad_norm": 1.4368574619293213, - "learning_rate": 6.70462311557789e-05, - "loss": 5.3269, - "step": 33300 - }, - { - "epoch": 17.366883963494132, - "grad_norm": 1.416461706161499, - "learning_rate": 6.704522613065327e-05, - "loss": 5.0957, - "step": 33301 - }, - { - "epoch": 17.36740547588005, - "grad_norm": 1.4320602416992188, - "learning_rate": 6.704422110552764e-05, - "loss": 5.3166, - "step": 33302 - }, - { - "epoch": 17.36792698826597, - "grad_norm": 1.3926359415054321, - "learning_rate": 6.704321608040201e-05, - "loss": 5.7496, - "step": 33303 - }, - { - "epoch": 17.36844850065189, - "grad_norm": 1.3798729181289673, - "learning_rate": 6.704221105527638e-05, - "loss": 5.8203, - "step": 33304 - }, - { - "epoch": 17.36897001303781, - "grad_norm": 1.5494803190231323, - "learning_rate": 6.704120603015076e-05, - "loss": 5.2629, - "step": 33305 - }, - { - "epoch": 17.36949152542373, - "grad_norm": 1.5308970212936401, - "learning_rate": 6.704020100502513e-05, - "loss": 4.972, - "step": 33306 - }, - { - "epoch": 17.37001303780965, - "grad_norm": 1.4289151430130005, - "learning_rate": 6.703919597989951e-05, - "loss": 5.7106, - "step": 33307 - }, - { - "epoch": 17.37053455019557, - "grad_norm": 1.5010827779769897, - "learning_rate": 6.703819095477388e-05, - "loss": 5.2, - "step": 33308 - }, - { - "epoch": 17.371056062581488, - "grad_norm": 1.6940085887908936, - "learning_rate": 6.703718592964825e-05, - "loss": 4.7772, - "step": 33309 - }, - { - "epoch": 17.371577574967404, - "grad_norm": 1.5264337062835693, - "learning_rate": 6.703618090452262e-05, - "loss": 4.7844, - "step": 33310 - }, - { - "epoch": 17.372099087353323, - "grad_norm": 1.49103844165802, - "learning_rate": 6.703517587939698e-05, - "loss": 5.3426, - "step": 33311 - }, - { - "epoch": 17.372620599739243, - "grad_norm": 1.5894649028778076, - "learning_rate": 6.703417085427136e-05, - "loss": 5.1003, - "step": 33312 - }, - { - "epoch": 17.373142112125162, - "grad_norm": 1.5057048797607422, - "learning_rate": 6.703316582914572e-05, - "loss": 5.8182, - "step": 33313 - }, - { - "epoch": 17.37366362451108, - "grad_norm": 1.402477741241455, - "learning_rate": 6.70321608040201e-05, - "loss": 5.3998, - "step": 33314 - }, - { - "epoch": 17.374185136897, - "grad_norm": 1.494332194328308, - "learning_rate": 6.703115577889447e-05, - "loss": 5.4828, - "step": 33315 - }, - { - "epoch": 17.37470664928292, - "grad_norm": 1.3700257539749146, - "learning_rate": 6.703015075376884e-05, - "loss": 5.5576, - "step": 33316 - }, - { - "epoch": 17.37522816166884, - "grad_norm": 1.5250554084777832, - "learning_rate": 6.702914572864322e-05, - "loss": 5.2957, - "step": 33317 - }, - { - "epoch": 17.37574967405476, - "grad_norm": 1.5218008756637573, - "learning_rate": 6.70281407035176e-05, - "loss": 4.9751, - "step": 33318 - }, - { - "epoch": 17.37627118644068, - "grad_norm": 1.5109528303146362, - "learning_rate": 6.702713567839196e-05, - "loss": 5.1179, - "step": 33319 - }, - { - "epoch": 17.3767926988266, - "grad_norm": 1.6224979162216187, - "learning_rate": 6.702613065326634e-05, - "loss": 4.7204, - "step": 33320 - }, - { - "epoch": 17.377314211212518, - "grad_norm": 1.4317749738693237, - "learning_rate": 6.70251256281407e-05, - "loss": 5.4391, - "step": 33321 - }, - { - "epoch": 17.377835723598434, - "grad_norm": 1.350717306137085, - "learning_rate": 6.702412060301508e-05, - "loss": 5.4098, - "step": 33322 - }, - { - "epoch": 17.378357235984353, - "grad_norm": 1.5577723979949951, - "learning_rate": 6.702311557788945e-05, - "loss": 5.2119, - "step": 33323 - }, - { - "epoch": 17.378878748370273, - "grad_norm": 1.5646779537200928, - "learning_rate": 6.702211055276381e-05, - "loss": 5.4156, - "step": 33324 - }, - { - "epoch": 17.379400260756192, - "grad_norm": 1.5695018768310547, - "learning_rate": 6.702110552763819e-05, - "loss": 5.178, - "step": 33325 - }, - { - "epoch": 17.37992177314211, - "grad_norm": 1.4942240715026855, - "learning_rate": 6.702010050251257e-05, - "loss": 4.8478, - "step": 33326 - }, - { - "epoch": 17.38044328552803, - "grad_norm": 1.5628708600997925, - "learning_rate": 6.701909547738695e-05, - "loss": 5.6197, - "step": 33327 - }, - { - "epoch": 17.38096479791395, - "grad_norm": 1.4393888711929321, - "learning_rate": 6.701809045226131e-05, - "loss": 5.1081, - "step": 33328 - }, - { - "epoch": 17.38148631029987, - "grad_norm": 1.569771409034729, - "learning_rate": 6.701708542713569e-05, - "loss": 5.3521, - "step": 33329 - }, - { - "epoch": 17.38200782268579, - "grad_norm": 1.3715975284576416, - "learning_rate": 6.701608040201005e-05, - "loss": 5.2551, - "step": 33330 - }, - { - "epoch": 17.38252933507171, - "grad_norm": 1.3442103862762451, - "learning_rate": 6.701507537688443e-05, - "loss": 4.8815, - "step": 33331 - }, - { - "epoch": 17.38305084745763, - "grad_norm": 1.5002572536468506, - "learning_rate": 6.70140703517588e-05, - "loss": 5.2933, - "step": 33332 - }, - { - "epoch": 17.383572359843548, - "grad_norm": 1.4916445016860962, - "learning_rate": 6.701306532663317e-05, - "loss": 5.4251, - "step": 33333 - }, - { - "epoch": 17.384093872229464, - "grad_norm": 1.4967323541641235, - "learning_rate": 6.701206030150754e-05, - "loss": 5.3184, - "step": 33334 - }, - { - "epoch": 17.384615384615383, - "grad_norm": 1.4301798343658447, - "learning_rate": 6.701105527638191e-05, - "loss": 5.6372, - "step": 33335 - }, - { - "epoch": 17.385136897001303, - "grad_norm": 1.597723364830017, - "learning_rate": 6.701005025125628e-05, - "loss": 5.1683, - "step": 33336 - }, - { - "epoch": 17.385658409387222, - "grad_norm": 1.5463420152664185, - "learning_rate": 6.700904522613066e-05, - "loss": 5.1107, - "step": 33337 - }, - { - "epoch": 17.38617992177314, - "grad_norm": 1.4795109033584595, - "learning_rate": 6.700804020100503e-05, - "loss": 5.2401, - "step": 33338 - }, - { - "epoch": 17.38670143415906, - "grad_norm": 1.5979831218719482, - "learning_rate": 6.70070351758794e-05, - "loss": 5.147, - "step": 33339 - }, - { - "epoch": 17.38722294654498, - "grad_norm": 1.446309208869934, - "learning_rate": 6.700603015075378e-05, - "loss": 4.321, - "step": 33340 - }, - { - "epoch": 17.3877444589309, - "grad_norm": 1.4715323448181152, - "learning_rate": 6.700502512562814e-05, - "loss": 5.6124, - "step": 33341 - }, - { - "epoch": 17.38826597131682, - "grad_norm": 1.5259476900100708, - "learning_rate": 6.700402010050252e-05, - "loss": 5.5421, - "step": 33342 - }, - { - "epoch": 17.38878748370274, - "grad_norm": 1.5545669794082642, - "learning_rate": 6.700301507537688e-05, - "loss": 5.4783, - "step": 33343 - }, - { - "epoch": 17.38930899608866, - "grad_norm": 1.478026270866394, - "learning_rate": 6.700201005025126e-05, - "loss": 5.4344, - "step": 33344 - }, - { - "epoch": 17.389830508474578, - "grad_norm": 1.5149037837982178, - "learning_rate": 6.700100502512562e-05, - "loss": 5.5141, - "step": 33345 - }, - { - "epoch": 17.390352020860494, - "grad_norm": 1.447161316871643, - "learning_rate": 6.7e-05, - "loss": 5.6699, - "step": 33346 - }, - { - "epoch": 17.390873533246413, - "grad_norm": 1.4778060913085938, - "learning_rate": 6.699899497487438e-05, - "loss": 5.1445, - "step": 33347 - }, - { - "epoch": 17.391395045632333, - "grad_norm": 1.4476680755615234, - "learning_rate": 6.699798994974876e-05, - "loss": 4.9997, - "step": 33348 - }, - { - "epoch": 17.391916558018252, - "grad_norm": 1.4786076545715332, - "learning_rate": 6.699698492462312e-05, - "loss": 5.6836, - "step": 33349 - }, - { - "epoch": 17.39243807040417, - "grad_norm": 1.4523953199386597, - "learning_rate": 6.699597989949749e-05, - "loss": 5.3827, - "step": 33350 - }, - { - "epoch": 17.39295958279009, - "grad_norm": 1.5675325393676758, - "learning_rate": 6.699497487437186e-05, - "loss": 4.7767, - "step": 33351 - }, - { - "epoch": 17.39348109517601, - "grad_norm": 1.4686639308929443, - "learning_rate": 6.699396984924623e-05, - "loss": 5.5464, - "step": 33352 - }, - { - "epoch": 17.39400260756193, - "grad_norm": 1.6835224628448486, - "learning_rate": 6.69929648241206e-05, - "loss": 5.4047, - "step": 33353 - }, - { - "epoch": 17.39452411994785, - "grad_norm": 1.407196283340454, - "learning_rate": 6.699195979899497e-05, - "loss": 5.0106, - "step": 33354 - }, - { - "epoch": 17.39504563233377, - "grad_norm": 1.5315074920654297, - "learning_rate": 6.699095477386935e-05, - "loss": 5.4374, - "step": 33355 - }, - { - "epoch": 17.39556714471969, - "grad_norm": 1.5192471742630005, - "learning_rate": 6.698994974874371e-05, - "loss": 5.2218, - "step": 33356 - }, - { - "epoch": 17.396088657105608, - "grad_norm": 1.5894134044647217, - "learning_rate": 6.698894472361809e-05, - "loss": 4.8999, - "step": 33357 - }, - { - "epoch": 17.396610169491524, - "grad_norm": 1.3911851644515991, - "learning_rate": 6.698793969849247e-05, - "loss": 5.5698, - "step": 33358 - }, - { - "epoch": 17.397131681877443, - "grad_norm": 1.4347749948501587, - "learning_rate": 6.698693467336685e-05, - "loss": 5.5104, - "step": 33359 - }, - { - "epoch": 17.397653194263363, - "grad_norm": 1.5029618740081787, - "learning_rate": 6.698592964824121e-05, - "loss": 5.3089, - "step": 33360 - }, - { - "epoch": 17.398174706649282, - "grad_norm": 1.4655876159667969, - "learning_rate": 6.698492462311559e-05, - "loss": 5.265, - "step": 33361 - }, - { - "epoch": 17.3986962190352, - "grad_norm": 1.3924915790557861, - "learning_rate": 6.698391959798995e-05, - "loss": 5.5763, - "step": 33362 - }, - { - "epoch": 17.39921773142112, - "grad_norm": 1.4144177436828613, - "learning_rate": 6.698291457286433e-05, - "loss": 5.1312, - "step": 33363 - }, - { - "epoch": 17.39973924380704, - "grad_norm": 1.4794998168945312, - "learning_rate": 6.69819095477387e-05, - "loss": 5.0788, - "step": 33364 - }, - { - "epoch": 17.40026075619296, - "grad_norm": 1.3483731746673584, - "learning_rate": 6.698090452261306e-05, - "loss": 5.4778, - "step": 33365 - }, - { - "epoch": 17.40078226857888, - "grad_norm": 1.5851805210113525, - "learning_rate": 6.697989949748744e-05, - "loss": 5.1411, - "step": 33366 - }, - { - "epoch": 17.4013037809648, - "grad_norm": 1.439660668373108, - "learning_rate": 6.697889447236181e-05, - "loss": 5.359, - "step": 33367 - }, - { - "epoch": 17.40182529335072, - "grad_norm": 1.4661223888397217, - "learning_rate": 6.697788944723619e-05, - "loss": 5.4059, - "step": 33368 - }, - { - "epoch": 17.402346805736638, - "grad_norm": 1.4442590475082397, - "learning_rate": 6.697688442211056e-05, - "loss": 5.5956, - "step": 33369 - }, - { - "epoch": 17.402868318122554, - "grad_norm": 1.535129189491272, - "learning_rate": 6.697587939698493e-05, - "loss": 4.8672, - "step": 33370 - }, - { - "epoch": 17.403389830508473, - "grad_norm": 1.4925034046173096, - "learning_rate": 6.69748743718593e-05, - "loss": 5.4492, - "step": 33371 - }, - { - "epoch": 17.403911342894393, - "grad_norm": 1.4940823316574097, - "learning_rate": 6.697386934673368e-05, - "loss": 5.3661, - "step": 33372 - }, - { - "epoch": 17.404432855280312, - "grad_norm": 1.4812257289886475, - "learning_rate": 6.697286432160804e-05, - "loss": 4.8743, - "step": 33373 - }, - { - "epoch": 17.40495436766623, - "grad_norm": 1.5105160474777222, - "learning_rate": 6.697185929648242e-05, - "loss": 5.4142, - "step": 33374 - }, - { - "epoch": 17.40547588005215, - "grad_norm": 1.591681957244873, - "learning_rate": 6.697085427135678e-05, - "loss": 4.7602, - "step": 33375 - }, - { - "epoch": 17.40599739243807, - "grad_norm": 1.4522515535354614, - "learning_rate": 6.696984924623116e-05, - "loss": 5.5426, - "step": 33376 - }, - { - "epoch": 17.40651890482399, - "grad_norm": 1.4685931205749512, - "learning_rate": 6.696884422110553e-05, - "loss": 5.32, - "step": 33377 - }, - { - "epoch": 17.40704041720991, - "grad_norm": 1.304081916809082, - "learning_rate": 6.69678391959799e-05, - "loss": 5.5398, - "step": 33378 - }, - { - "epoch": 17.40756192959583, - "grad_norm": 1.4807417392730713, - "learning_rate": 6.696683417085428e-05, - "loss": 5.0898, - "step": 33379 - }, - { - "epoch": 17.40808344198175, - "grad_norm": 1.4419134855270386, - "learning_rate": 6.696582914572865e-05, - "loss": 5.1452, - "step": 33380 - }, - { - "epoch": 17.408604954367668, - "grad_norm": 1.4522358179092407, - "learning_rate": 6.696482412060302e-05, - "loss": 5.3972, - "step": 33381 - }, - { - "epoch": 17.409126466753584, - "grad_norm": 1.4647465944290161, - "learning_rate": 6.696381909547739e-05, - "loss": 5.4331, - "step": 33382 - }, - { - "epoch": 17.409647979139503, - "grad_norm": 1.506130576133728, - "learning_rate": 6.696281407035177e-05, - "loss": 5.4074, - "step": 33383 - }, - { - "epoch": 17.410169491525423, - "grad_norm": 1.394020438194275, - "learning_rate": 6.696180904522613e-05, - "loss": 5.7874, - "step": 33384 - }, - { - "epoch": 17.410691003911342, - "grad_norm": 1.4645332098007202, - "learning_rate": 6.696080402010051e-05, - "loss": 5.8053, - "step": 33385 - }, - { - "epoch": 17.41121251629726, - "grad_norm": 1.552578330039978, - "learning_rate": 6.695979899497487e-05, - "loss": 5.3209, - "step": 33386 - }, - { - "epoch": 17.41173402868318, - "grad_norm": 1.600856900215149, - "learning_rate": 6.695879396984925e-05, - "loss": 5.0006, - "step": 33387 - }, - { - "epoch": 17.4122555410691, - "grad_norm": 1.506874918937683, - "learning_rate": 6.695778894472363e-05, - "loss": 5.2559, - "step": 33388 - }, - { - "epoch": 17.41277705345502, - "grad_norm": 1.5019475221633911, - "learning_rate": 6.6956783919598e-05, - "loss": 5.1934, - "step": 33389 - }, - { - "epoch": 17.41329856584094, - "grad_norm": 1.4603872299194336, - "learning_rate": 6.695577889447237e-05, - "loss": 5.6321, - "step": 33390 - }, - { - "epoch": 17.41382007822686, - "grad_norm": 1.4812606573104858, - "learning_rate": 6.695477386934673e-05, - "loss": 5.4891, - "step": 33391 - }, - { - "epoch": 17.41434159061278, - "grad_norm": 1.4687554836273193, - "learning_rate": 6.695376884422111e-05, - "loss": 5.3157, - "step": 33392 - }, - { - "epoch": 17.414863102998694, - "grad_norm": 1.5121663808822632, - "learning_rate": 6.695276381909548e-05, - "loss": 5.5617, - "step": 33393 - }, - { - "epoch": 17.415384615384614, - "grad_norm": 1.4370312690734863, - "learning_rate": 6.695175879396985e-05, - "loss": 5.2216, - "step": 33394 - }, - { - "epoch": 17.415906127770533, - "grad_norm": 1.4511982202529907, - "learning_rate": 6.695075376884422e-05, - "loss": 5.7065, - "step": 33395 - }, - { - "epoch": 17.416427640156453, - "grad_norm": 1.5566942691802979, - "learning_rate": 6.69497487437186e-05, - "loss": 5.2102, - "step": 33396 - }, - { - "epoch": 17.416949152542372, - "grad_norm": 1.4614759683609009, - "learning_rate": 6.694874371859296e-05, - "loss": 5.0203, - "step": 33397 - }, - { - "epoch": 17.41747066492829, - "grad_norm": 1.4133148193359375, - "learning_rate": 6.694773869346734e-05, - "loss": 5.0788, - "step": 33398 - }, - { - "epoch": 17.41799217731421, - "grad_norm": 1.5128679275512695, - "learning_rate": 6.694673366834172e-05, - "loss": 5.0232, - "step": 33399 - }, - { - "epoch": 17.41851368970013, - "grad_norm": 1.3955103158950806, - "learning_rate": 6.69457286432161e-05, - "loss": 5.8089, - "step": 33400 - }, - { - "epoch": 17.41903520208605, - "grad_norm": 1.7728441953659058, - "learning_rate": 6.694472361809046e-05, - "loss": 5.2146, - "step": 33401 - }, - { - "epoch": 17.41955671447197, - "grad_norm": 1.5319862365722656, - "learning_rate": 6.694371859296484e-05, - "loss": 5.1129, - "step": 33402 - }, - { - "epoch": 17.42007822685789, - "grad_norm": 1.4230257272720337, - "learning_rate": 6.69427135678392e-05, - "loss": 5.6501, - "step": 33403 - }, - { - "epoch": 17.42059973924381, - "grad_norm": 1.5069741010665894, - "learning_rate": 6.694170854271356e-05, - "loss": 5.5578, - "step": 33404 - }, - { - "epoch": 17.421121251629724, - "grad_norm": 1.4823344945907593, - "learning_rate": 6.694070351758794e-05, - "loss": 5.3896, - "step": 33405 - }, - { - "epoch": 17.421642764015644, - "grad_norm": 1.586643099784851, - "learning_rate": 6.69396984924623e-05, - "loss": 5.0997, - "step": 33406 - }, - { - "epoch": 17.422164276401563, - "grad_norm": 1.5451557636260986, - "learning_rate": 6.693869346733668e-05, - "loss": 5.0284, - "step": 33407 - }, - { - "epoch": 17.422685788787483, - "grad_norm": 1.419773817062378, - "learning_rate": 6.693768844221106e-05, - "loss": 5.3897, - "step": 33408 - }, - { - "epoch": 17.423207301173402, - "grad_norm": 1.4194345474243164, - "learning_rate": 6.693668341708544e-05, - "loss": 5.5917, - "step": 33409 - }, - { - "epoch": 17.423728813559322, - "grad_norm": 1.4225023984909058, - "learning_rate": 6.69356783919598e-05, - "loss": 5.3747, - "step": 33410 - }, - { - "epoch": 17.42425032594524, - "grad_norm": 1.4442819356918335, - "learning_rate": 6.693467336683418e-05, - "loss": 5.3892, - "step": 33411 - }, - { - "epoch": 17.42477183833116, - "grad_norm": 1.4609419107437134, - "learning_rate": 6.693366834170855e-05, - "loss": 5.4657, - "step": 33412 - }, - { - "epoch": 17.42529335071708, - "grad_norm": 1.4340753555297852, - "learning_rate": 6.693266331658292e-05, - "loss": 5.4272, - "step": 33413 - }, - { - "epoch": 17.425814863103, - "grad_norm": 1.461288332939148, - "learning_rate": 6.693165829145729e-05, - "loss": 5.6586, - "step": 33414 - }, - { - "epoch": 17.42633637548892, - "grad_norm": 1.4543256759643555, - "learning_rate": 6.693065326633167e-05, - "loss": 5.1243, - "step": 33415 - }, - { - "epoch": 17.42685788787484, - "grad_norm": 1.591681957244873, - "learning_rate": 6.692964824120603e-05, - "loss": 5.0296, - "step": 33416 - }, - { - "epoch": 17.427379400260754, - "grad_norm": 1.4125231504440308, - "learning_rate": 6.69286432160804e-05, - "loss": 5.7179, - "step": 33417 - }, - { - "epoch": 17.427900912646674, - "grad_norm": 1.5724565982818604, - "learning_rate": 6.692763819095477e-05, - "loss": 5.0699, - "step": 33418 - }, - { - "epoch": 17.428422425032593, - "grad_norm": 1.4777381420135498, - "learning_rate": 6.692663316582915e-05, - "loss": 5.6745, - "step": 33419 - }, - { - "epoch": 17.428943937418513, - "grad_norm": 1.4690351486206055, - "learning_rate": 6.692562814070353e-05, - "loss": 5.2018, - "step": 33420 - }, - { - "epoch": 17.429465449804432, - "grad_norm": 1.5413535833358765, - "learning_rate": 6.692462311557789e-05, - "loss": 5.3191, - "step": 33421 - }, - { - "epoch": 17.429986962190352, - "grad_norm": 1.5026785135269165, - "learning_rate": 6.692361809045227e-05, - "loss": 5.6279, - "step": 33422 - }, - { - "epoch": 17.43050847457627, - "grad_norm": 1.6291320323944092, - "learning_rate": 6.692261306532663e-05, - "loss": 5.0699, - "step": 33423 - }, - { - "epoch": 17.43102998696219, - "grad_norm": 1.5108582973480225, - "learning_rate": 6.692160804020101e-05, - "loss": 5.2898, - "step": 33424 - }, - { - "epoch": 17.43155149934811, - "grad_norm": 1.5346630811691284, - "learning_rate": 6.692060301507538e-05, - "loss": 5.3551, - "step": 33425 - }, - { - "epoch": 17.43207301173403, - "grad_norm": 1.5172280073165894, - "learning_rate": 6.691959798994975e-05, - "loss": 5.1765, - "step": 33426 - }, - { - "epoch": 17.43259452411995, - "grad_norm": 1.5742782354354858, - "learning_rate": 6.691859296482412e-05, - "loss": 5.4742, - "step": 33427 - }, - { - "epoch": 17.43311603650587, - "grad_norm": 1.3925607204437256, - "learning_rate": 6.69175879396985e-05, - "loss": 5.3325, - "step": 33428 - }, - { - "epoch": 17.433637548891785, - "grad_norm": 1.482879877090454, - "learning_rate": 6.691658291457287e-05, - "loss": 5.4536, - "step": 33429 - }, - { - "epoch": 17.434159061277704, - "grad_norm": 1.5765290260314941, - "learning_rate": 6.691557788944724e-05, - "loss": 5.1975, - "step": 33430 - }, - { - "epoch": 17.434680573663623, - "grad_norm": 1.445927619934082, - "learning_rate": 6.691457286432162e-05, - "loss": 5.328, - "step": 33431 - }, - { - "epoch": 17.435202086049543, - "grad_norm": 1.5870797634124756, - "learning_rate": 6.691356783919598e-05, - "loss": 5.31, - "step": 33432 - }, - { - "epoch": 17.435723598435462, - "grad_norm": 1.4691380262374878, - "learning_rate": 6.691256281407036e-05, - "loss": 5.6373, - "step": 33433 - }, - { - "epoch": 17.436245110821382, - "grad_norm": 1.5584250688552856, - "learning_rate": 6.691155778894472e-05, - "loss": 5.1729, - "step": 33434 - }, - { - "epoch": 17.4367666232073, - "grad_norm": 1.406686782836914, - "learning_rate": 6.69105527638191e-05, - "loss": 5.5716, - "step": 33435 - }, - { - "epoch": 17.43728813559322, - "grad_norm": 1.439328908920288, - "learning_rate": 6.690954773869346e-05, - "loss": 5.349, - "step": 33436 - }, - { - "epoch": 17.43780964797914, - "grad_norm": 1.5305980443954468, - "learning_rate": 6.690854271356784e-05, - "loss": 5.2621, - "step": 33437 - }, - { - "epoch": 17.43833116036506, - "grad_norm": 1.4466997385025024, - "learning_rate": 6.690753768844221e-05, - "loss": 5.5381, - "step": 33438 - }, - { - "epoch": 17.43885267275098, - "grad_norm": 1.4293235540390015, - "learning_rate": 6.690653266331658e-05, - "loss": 5.397, - "step": 33439 - }, - { - "epoch": 17.4393741851369, - "grad_norm": 1.3869141340255737, - "learning_rate": 6.690552763819096e-05, - "loss": 5.0727, - "step": 33440 - }, - { - "epoch": 17.439895697522815, - "grad_norm": 1.4625264406204224, - "learning_rate": 6.690452261306534e-05, - "loss": 4.8833, - "step": 33441 - }, - { - "epoch": 17.440417209908734, - "grad_norm": 1.4170868396759033, - "learning_rate": 6.69035175879397e-05, - "loss": 4.9248, - "step": 33442 - }, - { - "epoch": 17.440938722294653, - "grad_norm": 1.4564176797866821, - "learning_rate": 6.690251256281407e-05, - "loss": 5.3085, - "step": 33443 - }, - { - "epoch": 17.441460234680573, - "grad_norm": 1.532454013824463, - "learning_rate": 6.690150753768845e-05, - "loss": 5.4331, - "step": 33444 - }, - { - "epoch": 17.441981747066492, - "grad_norm": 1.485054612159729, - "learning_rate": 6.690050251256281e-05, - "loss": 5.3198, - "step": 33445 - }, - { - "epoch": 17.442503259452412, - "grad_norm": 1.4993305206298828, - "learning_rate": 6.689949748743719e-05, - "loss": 5.3497, - "step": 33446 - }, - { - "epoch": 17.44302477183833, - "grad_norm": 1.559287428855896, - "learning_rate": 6.689849246231155e-05, - "loss": 5.0438, - "step": 33447 - }, - { - "epoch": 17.44354628422425, - "grad_norm": 1.4844106435775757, - "learning_rate": 6.689748743718593e-05, - "loss": 5.2888, - "step": 33448 - }, - { - "epoch": 17.44406779661017, - "grad_norm": 1.450250267982483, - "learning_rate": 6.68964824120603e-05, - "loss": 5.3924, - "step": 33449 - }, - { - "epoch": 17.44458930899609, - "grad_norm": 1.5296880006790161, - "learning_rate": 6.689547738693467e-05, - "loss": 5.2523, - "step": 33450 - }, - { - "epoch": 17.44511082138201, - "grad_norm": 1.5038554668426514, - "learning_rate": 6.689447236180905e-05, - "loss": 5.2753, - "step": 33451 - }, - { - "epoch": 17.44563233376793, - "grad_norm": 1.6183041334152222, - "learning_rate": 6.689346733668343e-05, - "loss": 5.3079, - "step": 33452 - }, - { - "epoch": 17.446153846153845, - "grad_norm": 1.4974493980407715, - "learning_rate": 6.689246231155779e-05, - "loss": 5.1431, - "step": 33453 - }, - { - "epoch": 17.446675358539764, - "grad_norm": 1.4454435110092163, - "learning_rate": 6.689145728643217e-05, - "loss": 5.6045, - "step": 33454 - }, - { - "epoch": 17.447196870925683, - "grad_norm": 1.4477466344833374, - "learning_rate": 6.689045226130653e-05, - "loss": 5.7088, - "step": 33455 - }, - { - "epoch": 17.447718383311603, - "grad_norm": 1.5472747087478638, - "learning_rate": 6.688944723618091e-05, - "loss": 5.3334, - "step": 33456 - }, - { - "epoch": 17.448239895697522, - "grad_norm": 1.5871977806091309, - "learning_rate": 6.688844221105528e-05, - "loss": 5.0604, - "step": 33457 - }, - { - "epoch": 17.448761408083442, - "grad_norm": 1.3523386716842651, - "learning_rate": 6.688743718592964e-05, - "loss": 5.6409, - "step": 33458 - }, - { - "epoch": 17.44928292046936, - "grad_norm": 1.341492772102356, - "learning_rate": 6.688643216080402e-05, - "loss": 5.6424, - "step": 33459 - }, - { - "epoch": 17.44980443285528, - "grad_norm": 1.5359411239624023, - "learning_rate": 6.68854271356784e-05, - "loss": 5.2636, - "step": 33460 - }, - { - "epoch": 17.4503259452412, - "grad_norm": 1.4575995206832886, - "learning_rate": 6.688442211055277e-05, - "loss": 5.5405, - "step": 33461 - }, - { - "epoch": 17.45084745762712, - "grad_norm": 1.4974840879440308, - "learning_rate": 6.688341708542714e-05, - "loss": 4.8354, - "step": 33462 - }, - { - "epoch": 17.45136897001304, - "grad_norm": 1.9118720293045044, - "learning_rate": 6.688241206030152e-05, - "loss": 5.0692, - "step": 33463 - }, - { - "epoch": 17.45189048239896, - "grad_norm": 1.517846941947937, - "learning_rate": 6.688140703517588e-05, - "loss": 5.4709, - "step": 33464 - }, - { - "epoch": 17.452411994784875, - "grad_norm": 1.5454707145690918, - "learning_rate": 6.688040201005026e-05, - "loss": 5.0994, - "step": 33465 - }, - { - "epoch": 17.452933507170794, - "grad_norm": 1.5073370933532715, - "learning_rate": 6.687939698492462e-05, - "loss": 5.5712, - "step": 33466 - }, - { - "epoch": 17.453455019556714, - "grad_norm": 1.469252109527588, - "learning_rate": 6.6878391959799e-05, - "loss": 5.1803, - "step": 33467 - }, - { - "epoch": 17.453976531942633, - "grad_norm": 1.681057095527649, - "learning_rate": 6.687738693467337e-05, - "loss": 4.7956, - "step": 33468 - }, - { - "epoch": 17.454498044328552, - "grad_norm": 1.5131666660308838, - "learning_rate": 6.687638190954774e-05, - "loss": 4.9839, - "step": 33469 - }, - { - "epoch": 17.455019556714472, - "grad_norm": 1.4671692848205566, - "learning_rate": 6.687537688442211e-05, - "loss": 5.2324, - "step": 33470 - }, - { - "epoch": 17.45554106910039, - "grad_norm": 1.4797028303146362, - "learning_rate": 6.687437185929649e-05, - "loss": 5.7024, - "step": 33471 - }, - { - "epoch": 17.45606258148631, - "grad_norm": 1.4669514894485474, - "learning_rate": 6.687336683417086e-05, - "loss": 5.7934, - "step": 33472 - }, - { - "epoch": 17.45658409387223, - "grad_norm": 1.63804292678833, - "learning_rate": 6.687236180904523e-05, - "loss": 5.2055, - "step": 33473 - }, - { - "epoch": 17.45710560625815, - "grad_norm": 1.4778004884719849, - "learning_rate": 6.68713567839196e-05, - "loss": 5.1445, - "step": 33474 - }, - { - "epoch": 17.45762711864407, - "grad_norm": 1.4870740175247192, - "learning_rate": 6.687035175879397e-05, - "loss": 5.5789, - "step": 33475 - }, - { - "epoch": 17.45814863102999, - "grad_norm": 1.5420019626617432, - "learning_rate": 6.686934673366835e-05, - "loss": 4.8956, - "step": 33476 - }, - { - "epoch": 17.458670143415905, - "grad_norm": 1.5009231567382812, - "learning_rate": 6.686834170854271e-05, - "loss": 5.6337, - "step": 33477 - }, - { - "epoch": 17.459191655801824, - "grad_norm": 1.476914882659912, - "learning_rate": 6.686733668341709e-05, - "loss": 5.1692, - "step": 33478 - }, - { - "epoch": 17.459713168187744, - "grad_norm": 1.3784942626953125, - "learning_rate": 6.686633165829145e-05, - "loss": 5.3096, - "step": 33479 - }, - { - "epoch": 17.460234680573663, - "grad_norm": 1.4420616626739502, - "learning_rate": 6.686532663316583e-05, - "loss": 5.5707, - "step": 33480 - }, - { - "epoch": 17.460756192959582, - "grad_norm": 1.438058853149414, - "learning_rate": 6.686432160804021e-05, - "loss": 5.1297, - "step": 33481 - }, - { - "epoch": 17.461277705345502, - "grad_norm": 1.5284255743026733, - "learning_rate": 6.686331658291459e-05, - "loss": 5.0355, - "step": 33482 - }, - { - "epoch": 17.46179921773142, - "grad_norm": 1.6728577613830566, - "learning_rate": 6.686231155778895e-05, - "loss": 4.6254, - "step": 33483 - }, - { - "epoch": 17.46232073011734, - "grad_norm": 1.3940612077713013, - "learning_rate": 6.686130653266332e-05, - "loss": 5.6026, - "step": 33484 - }, - { - "epoch": 17.46284224250326, - "grad_norm": 1.4673092365264893, - "learning_rate": 6.68603015075377e-05, - "loss": 5.7139, - "step": 33485 - }, - { - "epoch": 17.46336375488918, - "grad_norm": 1.4897104501724243, - "learning_rate": 6.685929648241206e-05, - "loss": 5.247, - "step": 33486 - }, - { - "epoch": 17.4638852672751, - "grad_norm": 1.565875768661499, - "learning_rate": 6.685829145728644e-05, - "loss": 5.4224, - "step": 33487 - }, - { - "epoch": 17.46440677966102, - "grad_norm": 1.4719622135162354, - "learning_rate": 6.68572864321608e-05, - "loss": 5.4058, - "step": 33488 - }, - { - "epoch": 17.464928292046935, - "grad_norm": 1.4342900514602661, - "learning_rate": 6.685628140703518e-05, - "loss": 5.3396, - "step": 33489 - }, - { - "epoch": 17.465449804432854, - "grad_norm": 1.4089603424072266, - "learning_rate": 6.685527638190954e-05, - "loss": 5.8537, - "step": 33490 - }, - { - "epoch": 17.465971316818774, - "grad_norm": 1.4695247411727905, - "learning_rate": 6.685427135678392e-05, - "loss": 4.9305, - "step": 33491 - }, - { - "epoch": 17.466492829204693, - "grad_norm": 1.419036865234375, - "learning_rate": 6.68532663316583e-05, - "loss": 5.3849, - "step": 33492 - }, - { - "epoch": 17.467014341590613, - "grad_norm": 1.4535226821899414, - "learning_rate": 6.685226130653268e-05, - "loss": 5.4034, - "step": 33493 - }, - { - "epoch": 17.467535853976532, - "grad_norm": 1.4767887592315674, - "learning_rate": 6.685125628140704e-05, - "loss": 5.3438, - "step": 33494 - }, - { - "epoch": 17.46805736636245, - "grad_norm": 1.4260567426681519, - "learning_rate": 6.685025125628142e-05, - "loss": 5.5399, - "step": 33495 - }, - { - "epoch": 17.46857887874837, - "grad_norm": 1.4666277170181274, - "learning_rate": 6.684924623115578e-05, - "loss": 5.426, - "step": 33496 - }, - { - "epoch": 17.46910039113429, - "grad_norm": 1.5771691799163818, - "learning_rate": 6.684824120603015e-05, - "loss": 4.918, - "step": 33497 - }, - { - "epoch": 17.46962190352021, - "grad_norm": 1.535502552986145, - "learning_rate": 6.684723618090452e-05, - "loss": 5.0628, - "step": 33498 - }, - { - "epoch": 17.47014341590613, - "grad_norm": 1.5861425399780273, - "learning_rate": 6.684623115577889e-05, - "loss": 5.1203, - "step": 33499 - }, - { - "epoch": 17.470664928292045, - "grad_norm": 1.508304238319397, - "learning_rate": 6.684522613065327e-05, - "loss": 5.1636, - "step": 33500 - }, - { - "epoch": 17.471186440677965, - "grad_norm": 1.519816517829895, - "learning_rate": 6.684422110552764e-05, - "loss": 5.4293, - "step": 33501 - }, - { - "epoch": 17.471707953063884, - "grad_norm": 1.4848240613937378, - "learning_rate": 6.684321608040202e-05, - "loss": 5.6302, - "step": 33502 - }, - { - "epoch": 17.472229465449804, - "grad_norm": 1.5345518589019775, - "learning_rate": 6.684221105527639e-05, - "loss": 4.8362, - "step": 33503 - }, - { - "epoch": 17.472750977835723, - "grad_norm": 1.4793061017990112, - "learning_rate": 6.684120603015076e-05, - "loss": 5.3737, - "step": 33504 - }, - { - "epoch": 17.473272490221643, - "grad_norm": 1.3838433027267456, - "learning_rate": 6.684020100502513e-05, - "loss": 5.5717, - "step": 33505 - }, - { - "epoch": 17.473794002607562, - "grad_norm": 1.5499197244644165, - "learning_rate": 6.68391959798995e-05, - "loss": 5.3729, - "step": 33506 - }, - { - "epoch": 17.47431551499348, - "grad_norm": 1.5816171169281006, - "learning_rate": 6.683819095477387e-05, - "loss": 5.6421, - "step": 33507 - }, - { - "epoch": 17.4748370273794, - "grad_norm": 1.4248586893081665, - "learning_rate": 6.683718592964825e-05, - "loss": 5.1068, - "step": 33508 - }, - { - "epoch": 17.47535853976532, - "grad_norm": 1.4413464069366455, - "learning_rate": 6.683618090452261e-05, - "loss": 5.6054, - "step": 33509 - }, - { - "epoch": 17.47588005215124, - "grad_norm": 1.478460669517517, - "learning_rate": 6.683517587939698e-05, - "loss": 5.5342, - "step": 33510 - }, - { - "epoch": 17.47640156453716, - "grad_norm": 1.5583748817443848, - "learning_rate": 6.683417085427135e-05, - "loss": 4.9795, - "step": 33511 - }, - { - "epoch": 17.476923076923075, - "grad_norm": 1.4952244758605957, - "learning_rate": 6.683316582914573e-05, - "loss": 5.6182, - "step": 33512 - }, - { - "epoch": 17.477444589308995, - "grad_norm": 1.4532502889633179, - "learning_rate": 6.683216080402011e-05, - "loss": 5.4582, - "step": 33513 - }, - { - "epoch": 17.477966101694914, - "grad_norm": 1.5680408477783203, - "learning_rate": 6.683115577889447e-05, - "loss": 4.8428, - "step": 33514 - }, - { - "epoch": 17.478487614080834, - "grad_norm": 1.3751276731491089, - "learning_rate": 6.683015075376885e-05, - "loss": 5.697, - "step": 33515 - }, - { - "epoch": 17.479009126466753, - "grad_norm": 1.539723515510559, - "learning_rate": 6.682914572864322e-05, - "loss": 5.222, - "step": 33516 - }, - { - "epoch": 17.479530638852673, - "grad_norm": 1.5359681844711304, - "learning_rate": 6.68281407035176e-05, - "loss": 5.4974, - "step": 33517 - }, - { - "epoch": 17.480052151238592, - "grad_norm": 1.5961744785308838, - "learning_rate": 6.682713567839196e-05, - "loss": 5.2092, - "step": 33518 - }, - { - "epoch": 17.48057366362451, - "grad_norm": 1.51680326461792, - "learning_rate": 6.682613065326634e-05, - "loss": 5.4328, - "step": 33519 - }, - { - "epoch": 17.48109517601043, - "grad_norm": 1.49820077419281, - "learning_rate": 6.68251256281407e-05, - "loss": 5.3644, - "step": 33520 - }, - { - "epoch": 17.48161668839635, - "grad_norm": 1.5129673480987549, - "learning_rate": 6.682412060301508e-05, - "loss": 5.4348, - "step": 33521 - }, - { - "epoch": 17.48213820078227, - "grad_norm": 1.564743161201477, - "learning_rate": 6.682311557788946e-05, - "loss": 5.2788, - "step": 33522 - }, - { - "epoch": 17.48265971316819, - "grad_norm": 1.4221248626708984, - "learning_rate": 6.682211055276382e-05, - "loss": 5.3871, - "step": 33523 - }, - { - "epoch": 17.483181225554105, - "grad_norm": 1.522007703781128, - "learning_rate": 6.68211055276382e-05, - "loss": 5.6406, - "step": 33524 - }, - { - "epoch": 17.483702737940025, - "grad_norm": 1.487996220588684, - "learning_rate": 6.682010050251256e-05, - "loss": 5.4128, - "step": 33525 - }, - { - "epoch": 17.484224250325944, - "grad_norm": 1.5830345153808594, - "learning_rate": 6.681909547738694e-05, - "loss": 4.6409, - "step": 33526 - }, - { - "epoch": 17.484745762711864, - "grad_norm": 1.43160879611969, - "learning_rate": 6.68180904522613e-05, - "loss": 5.6553, - "step": 33527 - }, - { - "epoch": 17.485267275097783, - "grad_norm": 1.5266607999801636, - "learning_rate": 6.681708542713568e-05, - "loss": 5.1455, - "step": 33528 - }, - { - "epoch": 17.485788787483703, - "grad_norm": 1.4455790519714355, - "learning_rate": 6.681608040201005e-05, - "loss": 5.4504, - "step": 33529 - }, - { - "epoch": 17.486310299869622, - "grad_norm": 1.3521860837936401, - "learning_rate": 6.681507537688442e-05, - "loss": 5.2934, - "step": 33530 - }, - { - "epoch": 17.48683181225554, - "grad_norm": 1.3788613080978394, - "learning_rate": 6.681407035175879e-05, - "loss": 5.4867, - "step": 33531 - }, - { - "epoch": 17.48735332464146, - "grad_norm": 1.4932641983032227, - "learning_rate": 6.681306532663317e-05, - "loss": 4.8435, - "step": 33532 - }, - { - "epoch": 17.48787483702738, - "grad_norm": 1.4172641038894653, - "learning_rate": 6.681206030150754e-05, - "loss": 5.6636, - "step": 33533 - }, - { - "epoch": 17.4883963494133, - "grad_norm": 1.3882536888122559, - "learning_rate": 6.681105527638192e-05, - "loss": 5.7971, - "step": 33534 - }, - { - "epoch": 17.48891786179922, - "grad_norm": 1.548936367034912, - "learning_rate": 6.681005025125629e-05, - "loss": 4.9391, - "step": 33535 - }, - { - "epoch": 17.489439374185135, - "grad_norm": 1.4632668495178223, - "learning_rate": 6.680904522613065e-05, - "loss": 5.7876, - "step": 33536 - }, - { - "epoch": 17.489960886571055, - "grad_norm": 1.5263110399246216, - "learning_rate": 6.680804020100503e-05, - "loss": 4.9114, - "step": 33537 - }, - { - "epoch": 17.490482398956974, - "grad_norm": 1.5349773168563843, - "learning_rate": 6.68070351758794e-05, - "loss": 5.3014, - "step": 33538 - }, - { - "epoch": 17.491003911342894, - "grad_norm": 1.4554063081741333, - "learning_rate": 6.680603015075377e-05, - "loss": 5.4729, - "step": 33539 - }, - { - "epoch": 17.491525423728813, - "grad_norm": 1.4763081073760986, - "learning_rate": 6.680502512562814e-05, - "loss": 5.5152, - "step": 33540 - }, - { - "epoch": 17.492046936114733, - "grad_norm": 1.452770709991455, - "learning_rate": 6.680402010050251e-05, - "loss": 5.4009, - "step": 33541 - }, - { - "epoch": 17.492568448500652, - "grad_norm": 1.3868911266326904, - "learning_rate": 6.680301507537689e-05, - "loss": 5.4403, - "step": 33542 - }, - { - "epoch": 17.49308996088657, - "grad_norm": 1.515199065208435, - "learning_rate": 6.680201005025127e-05, - "loss": 5.1869, - "step": 33543 - }, - { - "epoch": 17.49361147327249, - "grad_norm": 1.5757421255111694, - "learning_rate": 6.680100502512563e-05, - "loss": 5.6033, - "step": 33544 - }, - { - "epoch": 17.49413298565841, - "grad_norm": 1.5854285955429077, - "learning_rate": 6.680000000000001e-05, - "loss": 5.023, - "step": 33545 - }, - { - "epoch": 17.49465449804433, - "grad_norm": 1.5450571775436401, - "learning_rate": 6.679899497487438e-05, - "loss": 4.7958, - "step": 33546 - }, - { - "epoch": 17.49517601043025, - "grad_norm": 1.4891716241836548, - "learning_rate": 6.679798994974875e-05, - "loss": 5.7182, - "step": 33547 - }, - { - "epoch": 17.495697522816165, - "grad_norm": 1.582636833190918, - "learning_rate": 6.679698492462312e-05, - "loss": 5.2431, - "step": 33548 - }, - { - "epoch": 17.496219035202085, - "grad_norm": 1.514701008796692, - "learning_rate": 6.67959798994975e-05, - "loss": 5.7405, - "step": 33549 - }, - { - "epoch": 17.496740547588004, - "grad_norm": 1.5487916469573975, - "learning_rate": 6.679497487437186e-05, - "loss": 5.2309, - "step": 33550 - }, - { - "epoch": 17.497262059973924, - "grad_norm": 1.4171514511108398, - "learning_rate": 6.679396984924622e-05, - "loss": 4.8839, - "step": 33551 - }, - { - "epoch": 17.497783572359843, - "grad_norm": 1.3275659084320068, - "learning_rate": 6.67929648241206e-05, - "loss": 5.6371, - "step": 33552 - }, - { - "epoch": 17.498305084745763, - "grad_norm": 1.5101946592330933, - "learning_rate": 6.679195979899498e-05, - "loss": 5.2267, - "step": 33553 - }, - { - "epoch": 17.498826597131682, - "grad_norm": 1.4753997325897217, - "learning_rate": 6.679095477386936e-05, - "loss": 5.5606, - "step": 33554 - }, - { - "epoch": 17.4993481095176, - "grad_norm": 1.5931370258331299, - "learning_rate": 6.678994974874372e-05, - "loss": 5.5364, - "step": 33555 - }, - { - "epoch": 17.49986962190352, - "grad_norm": 1.5078400373458862, - "learning_rate": 6.67889447236181e-05, - "loss": 5.5726, - "step": 33556 - }, - { - "epoch": 17.50039113428944, - "grad_norm": 1.7046397924423218, - "learning_rate": 6.678793969849246e-05, - "loss": 4.4278, - "step": 33557 - }, - { - "epoch": 17.50091264667536, - "grad_norm": 1.5490761995315552, - "learning_rate": 6.678693467336684e-05, - "loss": 4.4357, - "step": 33558 - }, - { - "epoch": 17.50143415906128, - "grad_norm": 1.5909647941589355, - "learning_rate": 6.67859296482412e-05, - "loss": 5.1081, - "step": 33559 - }, - { - "epoch": 17.501955671447195, - "grad_norm": 1.5092285871505737, - "learning_rate": 6.678492462311558e-05, - "loss": 5.7141, - "step": 33560 - }, - { - "epoch": 17.502477183833115, - "grad_norm": 1.5692945718765259, - "learning_rate": 6.678391959798995e-05, - "loss": 5.025, - "step": 33561 - }, - { - "epoch": 17.502998696219034, - "grad_norm": 1.6386104822158813, - "learning_rate": 6.678291457286433e-05, - "loss": 4.9107, - "step": 33562 - }, - { - "epoch": 17.503520208604954, - "grad_norm": 1.6459720134735107, - "learning_rate": 6.67819095477387e-05, - "loss": 5.0206, - "step": 33563 - }, - { - "epoch": 17.504041720990873, - "grad_norm": 1.469666600227356, - "learning_rate": 6.678090452261307e-05, - "loss": 5.5328, - "step": 33564 - }, - { - "epoch": 17.504563233376793, - "grad_norm": 1.498950481414795, - "learning_rate": 6.677989949748745e-05, - "loss": 5.468, - "step": 33565 - }, - { - "epoch": 17.505084745762712, - "grad_norm": 1.4868378639221191, - "learning_rate": 6.677889447236181e-05, - "loss": 5.3955, - "step": 33566 - }, - { - "epoch": 17.50560625814863, - "grad_norm": 1.4994423389434814, - "learning_rate": 6.677788944723619e-05, - "loss": 5.6555, - "step": 33567 - }, - { - "epoch": 17.50612777053455, - "grad_norm": 1.5074726343154907, - "learning_rate": 6.677688442211055e-05, - "loss": 5.2838, - "step": 33568 - }, - { - "epoch": 17.50664928292047, - "grad_norm": 1.4139057397842407, - "learning_rate": 6.677587939698493e-05, - "loss": 5.3472, - "step": 33569 - }, - { - "epoch": 17.50717079530639, - "grad_norm": 1.629477620124817, - "learning_rate": 6.67748743718593e-05, - "loss": 5.022, - "step": 33570 - }, - { - "epoch": 17.50769230769231, - "grad_norm": 1.5378221273422241, - "learning_rate": 6.677386934673367e-05, - "loss": 5.1959, - "step": 33571 - }, - { - "epoch": 17.508213820078225, - "grad_norm": 1.4986706972122192, - "learning_rate": 6.677286432160804e-05, - "loss": 5.0108, - "step": 33572 - }, - { - "epoch": 17.508735332464145, - "grad_norm": 1.516480565071106, - "learning_rate": 6.677185929648241e-05, - "loss": 5.4887, - "step": 33573 - }, - { - "epoch": 17.509256844850064, - "grad_norm": 1.4564619064331055, - "learning_rate": 6.677085427135679e-05, - "loss": 5.5599, - "step": 33574 - }, - { - "epoch": 17.509778357235984, - "grad_norm": 1.6503117084503174, - "learning_rate": 6.676984924623117e-05, - "loss": 4.4819, - "step": 33575 - }, - { - "epoch": 17.510299869621903, - "grad_norm": 1.5032923221588135, - "learning_rate": 6.676884422110553e-05, - "loss": 4.9698, - "step": 33576 - }, - { - "epoch": 17.510821382007823, - "grad_norm": 1.454858422279358, - "learning_rate": 6.67678391959799e-05, - "loss": 5.3213, - "step": 33577 - }, - { - "epoch": 17.511342894393742, - "grad_norm": 1.4162172079086304, - "learning_rate": 6.676683417085428e-05, - "loss": 5.5863, - "step": 33578 - }, - { - "epoch": 17.51186440677966, - "grad_norm": 1.5438073873519897, - "learning_rate": 6.676582914572864e-05, - "loss": 5.1078, - "step": 33579 - }, - { - "epoch": 17.51238591916558, - "grad_norm": 1.501839280128479, - "learning_rate": 6.676482412060302e-05, - "loss": 5.4797, - "step": 33580 - }, - { - "epoch": 17.5129074315515, - "grad_norm": 1.5382083654403687, - "learning_rate": 6.676381909547738e-05, - "loss": 5.3909, - "step": 33581 - }, - { - "epoch": 17.51342894393742, - "grad_norm": 1.6107404232025146, - "learning_rate": 6.676281407035176e-05, - "loss": 5.1726, - "step": 33582 - }, - { - "epoch": 17.513950456323336, - "grad_norm": 1.4606279134750366, - "learning_rate": 6.676180904522614e-05, - "loss": 5.0782, - "step": 33583 - }, - { - "epoch": 17.514471968709255, - "grad_norm": 1.4372286796569824, - "learning_rate": 6.676080402010052e-05, - "loss": 5.485, - "step": 33584 - }, - { - "epoch": 17.514993481095175, - "grad_norm": 1.5472735166549683, - "learning_rate": 6.675979899497488e-05, - "loss": 5.4155, - "step": 33585 - }, - { - "epoch": 17.515514993481094, - "grad_norm": 1.3883097171783447, - "learning_rate": 6.675879396984926e-05, - "loss": 5.7492, - "step": 33586 - }, - { - "epoch": 17.516036505867014, - "grad_norm": 1.4100937843322754, - "learning_rate": 6.675778894472362e-05, - "loss": 4.9691, - "step": 33587 - }, - { - "epoch": 17.516558018252933, - "grad_norm": 1.4207518100738525, - "learning_rate": 6.6756783919598e-05, - "loss": 5.6347, - "step": 33588 - }, - { - "epoch": 17.517079530638853, - "grad_norm": 1.4281411170959473, - "learning_rate": 6.675577889447236e-05, - "loss": 5.8982, - "step": 33589 - }, - { - "epoch": 17.517601043024772, - "grad_norm": 1.4364161491394043, - "learning_rate": 6.675477386934673e-05, - "loss": 5.793, - "step": 33590 - }, - { - "epoch": 17.51812255541069, - "grad_norm": 1.5742452144622803, - "learning_rate": 6.67537688442211e-05, - "loss": 5.0185, - "step": 33591 - }, - { - "epoch": 17.51864406779661, - "grad_norm": 1.3904318809509277, - "learning_rate": 6.675276381909547e-05, - "loss": 5.5428, - "step": 33592 - }, - { - "epoch": 17.51916558018253, - "grad_norm": 1.4912211894989014, - "learning_rate": 6.675175879396985e-05, - "loss": 5.5415, - "step": 33593 - }, - { - "epoch": 17.51968709256845, - "grad_norm": 1.5904203653335571, - "learning_rate": 6.675075376884423e-05, - "loss": 4.9657, - "step": 33594 - }, - { - "epoch": 17.52020860495437, - "grad_norm": 1.543765664100647, - "learning_rate": 6.67497487437186e-05, - "loss": 5.0986, - "step": 33595 - }, - { - "epoch": 17.520730117340285, - "grad_norm": 1.5020555257797241, - "learning_rate": 6.674874371859297e-05, - "loss": 5.0867, - "step": 33596 - }, - { - "epoch": 17.521251629726205, - "grad_norm": 1.4749630689620972, - "learning_rate": 6.674773869346735e-05, - "loss": 5.1648, - "step": 33597 - }, - { - "epoch": 17.521773142112124, - "grad_norm": 1.4686397314071655, - "learning_rate": 6.674673366834171e-05, - "loss": 5.2412, - "step": 33598 - }, - { - "epoch": 17.522294654498044, - "grad_norm": 1.490548849105835, - "learning_rate": 6.674572864321609e-05, - "loss": 5.5105, - "step": 33599 - }, - { - "epoch": 17.522816166883963, - "grad_norm": 1.5316661596298218, - "learning_rate": 6.674472361809045e-05, - "loss": 5.3177, - "step": 33600 - }, - { - "epoch": 17.523337679269883, - "grad_norm": 1.4707355499267578, - "learning_rate": 6.674371859296483e-05, - "loss": 5.5006, - "step": 33601 - }, - { - "epoch": 17.523859191655802, - "grad_norm": 1.445454716682434, - "learning_rate": 6.67427135678392e-05, - "loss": 5.0029, - "step": 33602 - }, - { - "epoch": 17.52438070404172, - "grad_norm": 1.415960669517517, - "learning_rate": 6.674170854271356e-05, - "loss": 5.7087, - "step": 33603 - }, - { - "epoch": 17.52490221642764, - "grad_norm": 1.4890999794006348, - "learning_rate": 6.674070351758794e-05, - "loss": 5.4903, - "step": 33604 - }, - { - "epoch": 17.52542372881356, - "grad_norm": 1.5075147151947021, - "learning_rate": 6.673969849246231e-05, - "loss": 5.202, - "step": 33605 - }, - { - "epoch": 17.52594524119948, - "grad_norm": 1.4761990308761597, - "learning_rate": 6.673869346733669e-05, - "loss": 5.0487, - "step": 33606 - }, - { - "epoch": 17.526466753585396, - "grad_norm": 1.3872963190078735, - "learning_rate": 6.673768844221106e-05, - "loss": 5.2417, - "step": 33607 - }, - { - "epoch": 17.526988265971315, - "grad_norm": 1.717200517654419, - "learning_rate": 6.673668341708543e-05, - "loss": 5.1929, - "step": 33608 - }, - { - "epoch": 17.527509778357235, - "grad_norm": 1.5887441635131836, - "learning_rate": 6.67356783919598e-05, - "loss": 5.0995, - "step": 33609 - }, - { - "epoch": 17.528031290743154, - "grad_norm": 1.567001223564148, - "learning_rate": 6.673467336683418e-05, - "loss": 5.6378, - "step": 33610 - }, - { - "epoch": 17.528552803129074, - "grad_norm": 1.524603009223938, - "learning_rate": 6.673366834170854e-05, - "loss": 4.9643, - "step": 33611 - }, - { - "epoch": 17.529074315514993, - "grad_norm": 1.4020081758499146, - "learning_rate": 6.673266331658292e-05, - "loss": 5.3662, - "step": 33612 - }, - { - "epoch": 17.529595827900913, - "grad_norm": 1.5260341167449951, - "learning_rate": 6.673165829145728e-05, - "loss": 5.1805, - "step": 33613 - }, - { - "epoch": 17.530117340286832, - "grad_norm": 1.514857292175293, - "learning_rate": 6.673065326633166e-05, - "loss": 5.655, - "step": 33614 - }, - { - "epoch": 17.53063885267275, - "grad_norm": 1.4716734886169434, - "learning_rate": 6.672964824120604e-05, - "loss": 5.2438, - "step": 33615 - }, - { - "epoch": 17.53116036505867, - "grad_norm": 1.7494533061981201, - "learning_rate": 6.67286432160804e-05, - "loss": 4.3148, - "step": 33616 - }, - { - "epoch": 17.53168187744459, - "grad_norm": 1.4963124990463257, - "learning_rate": 6.672763819095478e-05, - "loss": 4.317, - "step": 33617 - }, - { - "epoch": 17.53220338983051, - "grad_norm": 1.479438066482544, - "learning_rate": 6.672663316582915e-05, - "loss": 5.3836, - "step": 33618 - }, - { - "epoch": 17.532724902216426, - "grad_norm": 1.5507454872131348, - "learning_rate": 6.672562814070352e-05, - "loss": 5.028, - "step": 33619 - }, - { - "epoch": 17.533246414602345, - "grad_norm": 1.4634819030761719, - "learning_rate": 6.672462311557789e-05, - "loss": 5.407, - "step": 33620 - }, - { - "epoch": 17.533767926988265, - "grad_norm": 1.5122042894363403, - "learning_rate": 6.672361809045227e-05, - "loss": 5.2915, - "step": 33621 - }, - { - "epoch": 17.534289439374184, - "grad_norm": 1.4723451137542725, - "learning_rate": 6.672261306532663e-05, - "loss": 5.3107, - "step": 33622 - }, - { - "epoch": 17.534810951760104, - "grad_norm": 1.5636420249938965, - "learning_rate": 6.672160804020101e-05, - "loss": 5.0189, - "step": 33623 - }, - { - "epoch": 17.535332464146023, - "grad_norm": 1.5002046823501587, - "learning_rate": 6.672060301507537e-05, - "loss": 4.6782, - "step": 33624 - }, - { - "epoch": 17.535853976531943, - "grad_norm": 1.4689173698425293, - "learning_rate": 6.671959798994975e-05, - "loss": 5.3595, - "step": 33625 - }, - { - "epoch": 17.536375488917862, - "grad_norm": 1.4571763277053833, - "learning_rate": 6.671859296482413e-05, - "loss": 5.277, - "step": 33626 - }, - { - "epoch": 17.53689700130378, - "grad_norm": 1.3910627365112305, - "learning_rate": 6.67175879396985e-05, - "loss": 5.493, - "step": 33627 - }, - { - "epoch": 17.5374185136897, - "grad_norm": 1.4405733346939087, - "learning_rate": 6.671658291457287e-05, - "loss": 5.2626, - "step": 33628 - }, - { - "epoch": 17.53794002607562, - "grad_norm": 1.5706112384796143, - "learning_rate": 6.671557788944723e-05, - "loss": 5.0165, - "step": 33629 - }, - { - "epoch": 17.53846153846154, - "grad_norm": 1.4711456298828125, - "learning_rate": 6.671457286432161e-05, - "loss": 5.5881, - "step": 33630 - }, - { - "epoch": 17.538983050847456, - "grad_norm": 1.4380877017974854, - "learning_rate": 6.671356783919598e-05, - "loss": 5.3846, - "step": 33631 - }, - { - "epoch": 17.539504563233375, - "grad_norm": 1.5314782857894897, - "learning_rate": 6.671256281407035e-05, - "loss": 4.8974, - "step": 33632 - }, - { - "epoch": 17.540026075619295, - "grad_norm": 1.4686771631240845, - "learning_rate": 6.671155778894472e-05, - "loss": 5.3661, - "step": 33633 - }, - { - "epoch": 17.540547588005214, - "grad_norm": 1.5379958152770996, - "learning_rate": 6.67105527638191e-05, - "loss": 5.2994, - "step": 33634 - }, - { - "epoch": 17.541069100391134, - "grad_norm": 1.4266431331634521, - "learning_rate": 6.670954773869347e-05, - "loss": 5.2493, - "step": 33635 - }, - { - "epoch": 17.541590612777053, - "grad_norm": 1.4842463731765747, - "learning_rate": 6.670854271356785e-05, - "loss": 5.2025, - "step": 33636 - }, - { - "epoch": 17.542112125162973, - "grad_norm": 1.6071094274520874, - "learning_rate": 6.670753768844222e-05, - "loss": 5.0505, - "step": 33637 - }, - { - "epoch": 17.542633637548892, - "grad_norm": 1.4466701745986938, - "learning_rate": 6.67065326633166e-05, - "loss": 5.5867, - "step": 33638 - }, - { - "epoch": 17.54315514993481, - "grad_norm": 1.4442840814590454, - "learning_rate": 6.670552763819096e-05, - "loss": 5.3218, - "step": 33639 - }, - { - "epoch": 17.54367666232073, - "grad_norm": 1.5439329147338867, - "learning_rate": 6.670452261306534e-05, - "loss": 4.9258, - "step": 33640 - }, - { - "epoch": 17.54419817470665, - "grad_norm": 1.5394783020019531, - "learning_rate": 6.67035175879397e-05, - "loss": 5.1029, - "step": 33641 - }, - { - "epoch": 17.54471968709257, - "grad_norm": 1.4558593034744263, - "learning_rate": 6.670251256281408e-05, - "loss": 5.6461, - "step": 33642 - }, - { - "epoch": 17.545241199478486, - "grad_norm": 1.5075854063034058, - "learning_rate": 6.670150753768844e-05, - "loss": 5.3562, - "step": 33643 - }, - { - "epoch": 17.545762711864406, - "grad_norm": 1.5236035585403442, - "learning_rate": 6.67005025125628e-05, - "loss": 5.211, - "step": 33644 - }, - { - "epoch": 17.546284224250325, - "grad_norm": 1.4719504117965698, - "learning_rate": 6.669949748743718e-05, - "loss": 5.7418, - "step": 33645 - }, - { - "epoch": 17.546805736636244, - "grad_norm": 1.4759455919265747, - "learning_rate": 6.669849246231156e-05, - "loss": 5.4494, - "step": 33646 - }, - { - "epoch": 17.547327249022164, - "grad_norm": 1.5810338258743286, - "learning_rate": 6.669748743718594e-05, - "loss": 5.3793, - "step": 33647 - }, - { - "epoch": 17.547848761408083, - "grad_norm": 1.5113155841827393, - "learning_rate": 6.66964824120603e-05, - "loss": 5.1755, - "step": 33648 - }, - { - "epoch": 17.548370273794003, - "grad_norm": 1.4526629447937012, - "learning_rate": 6.669547738693468e-05, - "loss": 5.7052, - "step": 33649 - }, - { - "epoch": 17.548891786179922, - "grad_norm": 1.5794836282730103, - "learning_rate": 6.669447236180905e-05, - "loss": 5.4931, - "step": 33650 - }, - { - "epoch": 17.54941329856584, - "grad_norm": 1.4392940998077393, - "learning_rate": 6.669346733668342e-05, - "loss": 5.5864, - "step": 33651 - }, - { - "epoch": 17.54993481095176, - "grad_norm": 1.5202467441558838, - "learning_rate": 6.669246231155779e-05, - "loss": 5.4677, - "step": 33652 - }, - { - "epoch": 17.55045632333768, - "grad_norm": 1.519304633140564, - "learning_rate": 6.669145728643217e-05, - "loss": 4.8759, - "step": 33653 - }, - { - "epoch": 17.5509778357236, - "grad_norm": 1.4954559803009033, - "learning_rate": 6.669045226130653e-05, - "loss": 5.1882, - "step": 33654 - }, - { - "epoch": 17.551499348109516, - "grad_norm": 1.469001293182373, - "learning_rate": 6.668944723618091e-05, - "loss": 5.3801, - "step": 33655 - }, - { - "epoch": 17.552020860495436, - "grad_norm": 1.4140528440475464, - "learning_rate": 6.668844221105529e-05, - "loss": 5.3347, - "step": 33656 - }, - { - "epoch": 17.552542372881355, - "grad_norm": 1.3711529970169067, - "learning_rate": 6.668743718592965e-05, - "loss": 5.4455, - "step": 33657 - }, - { - "epoch": 17.553063885267274, - "grad_norm": 1.3301188945770264, - "learning_rate": 6.668643216080403e-05, - "loss": 5.63, - "step": 33658 - }, - { - "epoch": 17.553585397653194, - "grad_norm": 1.5203075408935547, - "learning_rate": 6.668542713567839e-05, - "loss": 5.5518, - "step": 33659 - }, - { - "epoch": 17.554106910039113, - "grad_norm": 1.5048304796218872, - "learning_rate": 6.668442211055277e-05, - "loss": 5.2824, - "step": 33660 - }, - { - "epoch": 17.554628422425033, - "grad_norm": 1.5061664581298828, - "learning_rate": 6.668341708542713e-05, - "loss": 4.7134, - "step": 33661 - }, - { - "epoch": 17.555149934810952, - "grad_norm": 1.5149650573730469, - "learning_rate": 6.668241206030151e-05, - "loss": 5.2948, - "step": 33662 - }, - { - "epoch": 17.555671447196872, - "grad_norm": 1.4104220867156982, - "learning_rate": 6.668140703517588e-05, - "loss": 5.52, - "step": 33663 - }, - { - "epoch": 17.55619295958279, - "grad_norm": 1.5604751110076904, - "learning_rate": 6.668040201005025e-05, - "loss": 5.6831, - "step": 33664 - }, - { - "epoch": 17.55671447196871, - "grad_norm": 1.5430617332458496, - "learning_rate": 6.667939698492462e-05, - "loss": 5.694, - "step": 33665 - }, - { - "epoch": 17.557235984354627, - "grad_norm": 1.4992448091506958, - "learning_rate": 6.6678391959799e-05, - "loss": 5.6967, - "step": 33666 - }, - { - "epoch": 17.557757496740546, - "grad_norm": 1.7155048847198486, - "learning_rate": 6.667738693467337e-05, - "loss": 4.9862, - "step": 33667 - }, - { - "epoch": 17.558279009126466, - "grad_norm": 1.4662829637527466, - "learning_rate": 6.667638190954775e-05, - "loss": 5.2965, - "step": 33668 - }, - { - "epoch": 17.558800521512385, - "grad_norm": 1.5863327980041504, - "learning_rate": 6.667537688442212e-05, - "loss": 4.7476, - "step": 33669 - }, - { - "epoch": 17.559322033898304, - "grad_norm": 1.5220351219177246, - "learning_rate": 6.667437185929648e-05, - "loss": 5.355, - "step": 33670 - }, - { - "epoch": 17.559843546284224, - "grad_norm": 1.4135812520980835, - "learning_rate": 6.667336683417086e-05, - "loss": 5.1745, - "step": 33671 - }, - { - "epoch": 17.560365058670143, - "grad_norm": 1.5421332120895386, - "learning_rate": 6.667236180904522e-05, - "loss": 4.8216, - "step": 33672 - }, - { - "epoch": 17.560886571056063, - "grad_norm": 1.551348328590393, - "learning_rate": 6.66713567839196e-05, - "loss": 5.6274, - "step": 33673 - }, - { - "epoch": 17.561408083441982, - "grad_norm": 1.5774986743927002, - "learning_rate": 6.667035175879396e-05, - "loss": 5.2699, - "step": 33674 - }, - { - "epoch": 17.561929595827902, - "grad_norm": 1.5038342475891113, - "learning_rate": 6.666934673366834e-05, - "loss": 5.3302, - "step": 33675 - }, - { - "epoch": 17.56245110821382, - "grad_norm": 1.4705525636672974, - "learning_rate": 6.666834170854272e-05, - "loss": 5.3696, - "step": 33676 - }, - { - "epoch": 17.56297262059974, - "grad_norm": 1.4745301008224487, - "learning_rate": 6.66673366834171e-05, - "loss": 5.3296, - "step": 33677 - }, - { - "epoch": 17.56349413298566, - "grad_norm": 1.47714364528656, - "learning_rate": 6.666633165829146e-05, - "loss": 5.3645, - "step": 33678 - }, - { - "epoch": 17.564015645371576, - "grad_norm": 1.5404696464538574, - "learning_rate": 6.666532663316584e-05, - "loss": 5.1053, - "step": 33679 - }, - { - "epoch": 17.564537157757496, - "grad_norm": 1.5076885223388672, - "learning_rate": 6.66643216080402e-05, - "loss": 5.5291, - "step": 33680 - }, - { - "epoch": 17.565058670143415, - "grad_norm": 1.570454478263855, - "learning_rate": 6.666331658291458e-05, - "loss": 4.4599, - "step": 33681 - }, - { - "epoch": 17.565580182529335, - "grad_norm": 1.468189001083374, - "learning_rate": 6.666231155778895e-05, - "loss": 5.1342, - "step": 33682 - }, - { - "epoch": 17.566101694915254, - "grad_norm": 1.565236210823059, - "learning_rate": 6.666130653266331e-05, - "loss": 5.1939, - "step": 33683 - }, - { - "epoch": 17.566623207301173, - "grad_norm": 1.4437369108200073, - "learning_rate": 6.666030150753769e-05, - "loss": 5.4193, - "step": 33684 - }, - { - "epoch": 17.567144719687093, - "grad_norm": 1.52808678150177, - "learning_rate": 6.665929648241205e-05, - "loss": 5.729, - "step": 33685 - }, - { - "epoch": 17.567666232073012, - "grad_norm": 1.5112067461013794, - "learning_rate": 6.665829145728643e-05, - "loss": 5.6051, - "step": 33686 - }, - { - "epoch": 17.568187744458932, - "grad_norm": 1.5656968355178833, - "learning_rate": 6.665728643216081e-05, - "loss": 4.6542, - "step": 33687 - }, - { - "epoch": 17.56870925684485, - "grad_norm": 1.4708634614944458, - "learning_rate": 6.665628140703519e-05, - "loss": 5.3319, - "step": 33688 - }, - { - "epoch": 17.56923076923077, - "grad_norm": 1.4556068181991577, - "learning_rate": 6.665527638190955e-05, - "loss": 5.4015, - "step": 33689 - }, - { - "epoch": 17.569752281616687, - "grad_norm": 1.493565559387207, - "learning_rate": 6.665427135678393e-05, - "loss": 5.3053, - "step": 33690 - }, - { - "epoch": 17.570273794002606, - "grad_norm": 1.6009663343429565, - "learning_rate": 6.665326633165829e-05, - "loss": 5.3105, - "step": 33691 - }, - { - "epoch": 17.570795306388526, - "grad_norm": 1.543242335319519, - "learning_rate": 6.665226130653267e-05, - "loss": 5.2045, - "step": 33692 - }, - { - "epoch": 17.571316818774445, - "grad_norm": 1.416947603225708, - "learning_rate": 6.665125628140704e-05, - "loss": 5.354, - "step": 33693 - }, - { - "epoch": 17.571838331160365, - "grad_norm": 1.5502263307571411, - "learning_rate": 6.665025125628141e-05, - "loss": 5.404, - "step": 33694 - }, - { - "epoch": 17.572359843546284, - "grad_norm": 1.5582188367843628, - "learning_rate": 6.664924623115578e-05, - "loss": 5.5557, - "step": 33695 - }, - { - "epoch": 17.572881355932203, - "grad_norm": 1.634811282157898, - "learning_rate": 6.664824120603015e-05, - "loss": 5.4009, - "step": 33696 - }, - { - "epoch": 17.573402868318123, - "grad_norm": 1.5006394386291504, - "learning_rate": 6.664723618090453e-05, - "loss": 5.3488, - "step": 33697 - }, - { - "epoch": 17.573924380704042, - "grad_norm": 1.470304250717163, - "learning_rate": 6.66462311557789e-05, - "loss": 5.2474, - "step": 33698 - }, - { - "epoch": 17.574445893089962, - "grad_norm": 1.5794919729232788, - "learning_rate": 6.664522613065327e-05, - "loss": 5.7548, - "step": 33699 - }, - { - "epoch": 17.57496740547588, - "grad_norm": 1.4947253465652466, - "learning_rate": 6.664422110552764e-05, - "loss": 5.8375, - "step": 33700 - }, - { - "epoch": 17.5754889178618, - "grad_norm": 1.640751838684082, - "learning_rate": 6.664321608040202e-05, - "loss": 4.6767, - "step": 33701 - }, - { - "epoch": 17.576010430247717, - "grad_norm": 1.4546747207641602, - "learning_rate": 6.664221105527638e-05, - "loss": 5.5339, - "step": 33702 - }, - { - "epoch": 17.576531942633636, - "grad_norm": 1.5838881731033325, - "learning_rate": 6.664120603015076e-05, - "loss": 5.333, - "step": 33703 - }, - { - "epoch": 17.577053455019556, - "grad_norm": 1.49124276638031, - "learning_rate": 6.664020100502512e-05, - "loss": 5.5637, - "step": 33704 - }, - { - "epoch": 17.577574967405475, - "grad_norm": 1.516244888305664, - "learning_rate": 6.66391959798995e-05, - "loss": 5.0493, - "step": 33705 - }, - { - "epoch": 17.578096479791395, - "grad_norm": 1.6370948553085327, - "learning_rate": 6.663819095477387e-05, - "loss": 5.3325, - "step": 33706 - }, - { - "epoch": 17.578617992177314, - "grad_norm": 1.573280692100525, - "learning_rate": 6.663718592964824e-05, - "loss": 5.5464, - "step": 33707 - }, - { - "epoch": 17.579139504563233, - "grad_norm": 1.4545621871948242, - "learning_rate": 6.663618090452262e-05, - "loss": 5.4381, - "step": 33708 - }, - { - "epoch": 17.579661016949153, - "grad_norm": 1.536813735961914, - "learning_rate": 6.663517587939699e-05, - "loss": 5.105, - "step": 33709 - }, - { - "epoch": 17.580182529335072, - "grad_norm": 1.5321704149246216, - "learning_rate": 6.663417085427136e-05, - "loss": 5.2441, - "step": 33710 - }, - { - "epoch": 17.580704041720992, - "grad_norm": 1.4790980815887451, - "learning_rate": 6.663316582914573e-05, - "loss": 5.4938, - "step": 33711 - }, - { - "epoch": 17.58122555410691, - "grad_norm": 1.491873860359192, - "learning_rate": 6.66321608040201e-05, - "loss": 5.7178, - "step": 33712 - }, - { - "epoch": 17.58174706649283, - "grad_norm": 1.5181962251663208, - "learning_rate": 6.663115577889447e-05, - "loss": 5.413, - "step": 33713 - }, - { - "epoch": 17.582268578878747, - "grad_norm": 1.6142183542251587, - "learning_rate": 6.663015075376885e-05, - "loss": 5.0219, - "step": 33714 - }, - { - "epoch": 17.582790091264666, - "grad_norm": 1.4339420795440674, - "learning_rate": 6.662914572864321e-05, - "loss": 5.5702, - "step": 33715 - }, - { - "epoch": 17.583311603650586, - "grad_norm": 1.433240532875061, - "learning_rate": 6.662814070351759e-05, - "loss": 5.7526, - "step": 33716 - }, - { - "epoch": 17.583833116036505, - "grad_norm": 1.4996200799942017, - "learning_rate": 6.662713567839197e-05, - "loss": 5.1479, - "step": 33717 - }, - { - "epoch": 17.584354628422425, - "grad_norm": 1.6656213998794556, - "learning_rate": 6.662613065326635e-05, - "loss": 5.115, - "step": 33718 - }, - { - "epoch": 17.584876140808344, - "grad_norm": 1.421303153038025, - "learning_rate": 6.662512562814071e-05, - "loss": 5.0037, - "step": 33719 - }, - { - "epoch": 17.585397653194264, - "grad_norm": 1.7406703233718872, - "learning_rate": 6.662412060301509e-05, - "loss": 4.337, - "step": 33720 - }, - { - "epoch": 17.585919165580183, - "grad_norm": 1.533309817314148, - "learning_rate": 6.662311557788945e-05, - "loss": 5.4689, - "step": 33721 - }, - { - "epoch": 17.586440677966102, - "grad_norm": 1.382738471031189, - "learning_rate": 6.662211055276382e-05, - "loss": 4.8571, - "step": 33722 - }, - { - "epoch": 17.586962190352022, - "grad_norm": 1.5203123092651367, - "learning_rate": 6.66211055276382e-05, - "loss": 5.6117, - "step": 33723 - }, - { - "epoch": 17.58748370273794, - "grad_norm": 1.6312896013259888, - "learning_rate": 6.662010050251256e-05, - "loss": 5.3036, - "step": 33724 - }, - { - "epoch": 17.58800521512386, - "grad_norm": 1.5435765981674194, - "learning_rate": 6.661909547738694e-05, - "loss": 4.8804, - "step": 33725 - }, - { - "epoch": 17.588526727509777, - "grad_norm": 1.529320240020752, - "learning_rate": 6.66180904522613e-05, - "loss": 5.6202, - "step": 33726 - }, - { - "epoch": 17.589048239895696, - "grad_norm": 1.4398858547210693, - "learning_rate": 6.661708542713568e-05, - "loss": 5.3196, - "step": 33727 - }, - { - "epoch": 17.589569752281616, - "grad_norm": 1.573088526725769, - "learning_rate": 6.661608040201006e-05, - "loss": 5.1118, - "step": 33728 - }, - { - "epoch": 17.590091264667535, - "grad_norm": 1.626304268836975, - "learning_rate": 6.661507537688443e-05, - "loss": 5.3575, - "step": 33729 - }, - { - "epoch": 17.590612777053455, - "grad_norm": 1.4716562032699585, - "learning_rate": 6.66140703517588e-05, - "loss": 5.056, - "step": 33730 - }, - { - "epoch": 17.591134289439374, - "grad_norm": 1.5783181190490723, - "learning_rate": 6.661306532663318e-05, - "loss": 5.4429, - "step": 33731 - }, - { - "epoch": 17.591655801825294, - "grad_norm": 1.417047142982483, - "learning_rate": 6.661206030150754e-05, - "loss": 5.7955, - "step": 33732 - }, - { - "epoch": 17.592177314211213, - "grad_norm": 1.4808237552642822, - "learning_rate": 6.661105527638192e-05, - "loss": 5.1222, - "step": 33733 - }, - { - "epoch": 17.592698826597132, - "grad_norm": 1.5179468393325806, - "learning_rate": 6.661005025125628e-05, - "loss": 4.9214, - "step": 33734 - }, - { - "epoch": 17.593220338983052, - "grad_norm": 1.4209370613098145, - "learning_rate": 6.660904522613066e-05, - "loss": 5.4295, - "step": 33735 - }, - { - "epoch": 17.59374185136897, - "grad_norm": 1.4800810813903809, - "learning_rate": 6.660804020100502e-05, - "loss": 5.6346, - "step": 33736 - }, - { - "epoch": 17.59426336375489, - "grad_norm": 1.5786670446395874, - "learning_rate": 6.66070351758794e-05, - "loss": 4.7558, - "step": 33737 - }, - { - "epoch": 17.594784876140807, - "grad_norm": 1.5224394798278809, - "learning_rate": 6.660603015075378e-05, - "loss": 5.4386, - "step": 33738 - }, - { - "epoch": 17.595306388526726, - "grad_norm": 1.4232096672058105, - "learning_rate": 6.660502512562814e-05, - "loss": 5.6777, - "step": 33739 - }, - { - "epoch": 17.595827900912646, - "grad_norm": 1.5230361223220825, - "learning_rate": 6.660402010050252e-05, - "loss": 5.2491, - "step": 33740 - }, - { - "epoch": 17.596349413298565, - "grad_norm": 1.4490054845809937, - "learning_rate": 6.660301507537689e-05, - "loss": 5.2858, - "step": 33741 - }, - { - "epoch": 17.596870925684485, - "grad_norm": 1.4635945558547974, - "learning_rate": 6.660201005025126e-05, - "loss": 4.8286, - "step": 33742 - }, - { - "epoch": 17.597392438070404, - "grad_norm": 1.4475090503692627, - "learning_rate": 6.660100502512563e-05, - "loss": 5.3005, - "step": 33743 - }, - { - "epoch": 17.597913950456324, - "grad_norm": 1.625119686126709, - "learning_rate": 6.66e-05, - "loss": 5.0339, - "step": 33744 - }, - { - "epoch": 17.598435462842243, - "grad_norm": 1.4772495031356812, - "learning_rate": 6.659899497487437e-05, - "loss": 5.6718, - "step": 33745 - }, - { - "epoch": 17.598956975228162, - "grad_norm": 1.3886334896087646, - "learning_rate": 6.659798994974875e-05, - "loss": 5.815, - "step": 33746 - }, - { - "epoch": 17.599478487614082, - "grad_norm": 1.4445483684539795, - "learning_rate": 6.659698492462311e-05, - "loss": 5.2054, - "step": 33747 - }, - { - "epoch": 17.6, - "grad_norm": 1.4985733032226562, - "learning_rate": 6.659597989949749e-05, - "loss": 5.0516, - "step": 33748 - }, - { - "epoch": 17.60052151238592, - "grad_norm": 1.343237042427063, - "learning_rate": 6.659497487437187e-05, - "loss": 4.6615, - "step": 33749 - }, - { - "epoch": 17.601043024771837, - "grad_norm": 1.4917412996292114, - "learning_rate": 6.659396984924623e-05, - "loss": 5.3123, - "step": 33750 - }, - { - "epoch": 17.601564537157756, - "grad_norm": 1.5030463933944702, - "learning_rate": 6.659296482412061e-05, - "loss": 5.0499, - "step": 33751 - }, - { - "epoch": 17.602086049543676, - "grad_norm": 1.6039479970932007, - "learning_rate": 6.659195979899497e-05, - "loss": 4.57, - "step": 33752 - }, - { - "epoch": 17.602607561929595, - "grad_norm": 1.5410643815994263, - "learning_rate": 6.659095477386935e-05, - "loss": 5.5559, - "step": 33753 - }, - { - "epoch": 17.603129074315515, - "grad_norm": 1.5341347455978394, - "learning_rate": 6.658994974874372e-05, - "loss": 5.1812, - "step": 33754 - }, - { - "epoch": 17.603650586701434, - "grad_norm": 1.4772324562072754, - "learning_rate": 6.65889447236181e-05, - "loss": 5.6167, - "step": 33755 - }, - { - "epoch": 17.604172099087354, - "grad_norm": 1.4263862371444702, - "learning_rate": 6.658793969849246e-05, - "loss": 4.7809, - "step": 33756 - }, - { - "epoch": 17.604693611473273, - "grad_norm": 1.6111737489700317, - "learning_rate": 6.658693467336684e-05, - "loss": 5.2436, - "step": 33757 - }, - { - "epoch": 17.605215123859193, - "grad_norm": 1.4762698411941528, - "learning_rate": 6.658592964824121e-05, - "loss": 5.412, - "step": 33758 - }, - { - "epoch": 17.605736636245112, - "grad_norm": 1.4229278564453125, - "learning_rate": 6.658492462311559e-05, - "loss": 5.7681, - "step": 33759 - }, - { - "epoch": 17.60625814863103, - "grad_norm": 1.4975672960281372, - "learning_rate": 6.658391959798996e-05, - "loss": 5.1172, - "step": 33760 - }, - { - "epoch": 17.60677966101695, - "grad_norm": 1.5242888927459717, - "learning_rate": 6.658291457286433e-05, - "loss": 4.9736, - "step": 33761 - }, - { - "epoch": 17.607301173402867, - "grad_norm": 1.4693795442581177, - "learning_rate": 6.65819095477387e-05, - "loss": 5.6989, - "step": 33762 - }, - { - "epoch": 17.607822685788786, - "grad_norm": 1.4379431009292603, - "learning_rate": 6.658090452261306e-05, - "loss": 5.7966, - "step": 33763 - }, - { - "epoch": 17.608344198174706, - "grad_norm": 1.38744056224823, - "learning_rate": 6.657989949748744e-05, - "loss": 5.5788, - "step": 33764 - }, - { - "epoch": 17.608865710560625, - "grad_norm": 1.3683186769485474, - "learning_rate": 6.65788944723618e-05, - "loss": 5.749, - "step": 33765 - }, - { - "epoch": 17.609387222946545, - "grad_norm": 1.5162110328674316, - "learning_rate": 6.657788944723618e-05, - "loss": 5.4777, - "step": 33766 - }, - { - "epoch": 17.609908735332464, - "grad_norm": 1.5629044771194458, - "learning_rate": 6.657688442211055e-05, - "loss": 5.4041, - "step": 33767 - }, - { - "epoch": 17.610430247718384, - "grad_norm": 1.5068835020065308, - "learning_rate": 6.657587939698492e-05, - "loss": 5.6522, - "step": 33768 - }, - { - "epoch": 17.610951760104303, - "grad_norm": 1.4826602935791016, - "learning_rate": 6.65748743718593e-05, - "loss": 5.122, - "step": 33769 - }, - { - "epoch": 17.611473272490223, - "grad_norm": 1.4078724384307861, - "learning_rate": 6.657386934673368e-05, - "loss": 5.7527, - "step": 33770 - }, - { - "epoch": 17.611994784876142, - "grad_norm": 1.460314154624939, - "learning_rate": 6.657286432160804e-05, - "loss": 5.5369, - "step": 33771 - }, - { - "epoch": 17.61251629726206, - "grad_norm": 1.4740344285964966, - "learning_rate": 6.657185929648242e-05, - "loss": 5.5821, - "step": 33772 - }, - { - "epoch": 17.613037809647977, - "grad_norm": 1.4589219093322754, - "learning_rate": 6.657085427135679e-05, - "loss": 5.3075, - "step": 33773 - }, - { - "epoch": 17.613559322033897, - "grad_norm": 1.4922641515731812, - "learning_rate": 6.656984924623116e-05, - "loss": 5.3301, - "step": 33774 - }, - { - "epoch": 17.614080834419816, - "grad_norm": 1.5355814695358276, - "learning_rate": 6.656884422110553e-05, - "loss": 4.8599, - "step": 33775 - }, - { - "epoch": 17.614602346805736, - "grad_norm": 1.449438214302063, - "learning_rate": 6.65678391959799e-05, - "loss": 5.562, - "step": 33776 - }, - { - "epoch": 17.615123859191655, - "grad_norm": 1.5283610820770264, - "learning_rate": 6.656683417085427e-05, - "loss": 4.9283, - "step": 33777 - }, - { - "epoch": 17.615645371577575, - "grad_norm": 1.506994605064392, - "learning_rate": 6.656582914572864e-05, - "loss": 5.3903, - "step": 33778 - }, - { - "epoch": 17.616166883963494, - "grad_norm": 1.556771993637085, - "learning_rate": 6.656482412060301e-05, - "loss": 5.2816, - "step": 33779 - }, - { - "epoch": 17.616688396349414, - "grad_norm": 1.5162992477416992, - "learning_rate": 6.656381909547739e-05, - "loss": 5.4064, - "step": 33780 - }, - { - "epoch": 17.617209908735333, - "grad_norm": 1.5130563974380493, - "learning_rate": 6.656281407035177e-05, - "loss": 5.271, - "step": 33781 - }, - { - "epoch": 17.617731421121253, - "grad_norm": 1.5461342334747314, - "learning_rate": 6.656180904522613e-05, - "loss": 5.5198, - "step": 33782 - }, - { - "epoch": 17.618252933507172, - "grad_norm": 1.493440866470337, - "learning_rate": 6.656080402010051e-05, - "loss": 5.0257, - "step": 33783 - }, - { - "epoch": 17.61877444589309, - "grad_norm": 1.4186168909072876, - "learning_rate": 6.655979899497488e-05, - "loss": 5.6198, - "step": 33784 - }, - { - "epoch": 17.619295958279007, - "grad_norm": 1.5349383354187012, - "learning_rate": 6.655879396984925e-05, - "loss": 5.4171, - "step": 33785 - }, - { - "epoch": 17.619817470664927, - "grad_norm": 1.4538053274154663, - "learning_rate": 6.655778894472362e-05, - "loss": 5.2591, - "step": 33786 - }, - { - "epoch": 17.620338983050846, - "grad_norm": 1.6187940835952759, - "learning_rate": 6.6556783919598e-05, - "loss": 4.5785, - "step": 33787 - }, - { - "epoch": 17.620860495436766, - "grad_norm": 1.4495912790298462, - "learning_rate": 6.655577889447236e-05, - "loss": 4.9868, - "step": 33788 - }, - { - "epoch": 17.621382007822685, - "grad_norm": 1.4841169118881226, - "learning_rate": 6.655477386934674e-05, - "loss": 5.5063, - "step": 33789 - }, - { - "epoch": 17.621903520208605, - "grad_norm": 1.446797490119934, - "learning_rate": 6.655376884422112e-05, - "loss": 5.2298, - "step": 33790 - }, - { - "epoch": 17.622425032594524, - "grad_norm": 1.5024536848068237, - "learning_rate": 6.655276381909548e-05, - "loss": 5.2061, - "step": 33791 - }, - { - "epoch": 17.622946544980444, - "grad_norm": 1.4112651348114014, - "learning_rate": 6.655175879396986e-05, - "loss": 5.6213, - "step": 33792 - }, - { - "epoch": 17.623468057366363, - "grad_norm": 1.4014456272125244, - "learning_rate": 6.655075376884422e-05, - "loss": 5.589, - "step": 33793 - }, - { - "epoch": 17.623989569752283, - "grad_norm": 1.5408058166503906, - "learning_rate": 6.65497487437186e-05, - "loss": 5.2963, - "step": 33794 - }, - { - "epoch": 17.624511082138202, - "grad_norm": 1.5169403553009033, - "learning_rate": 6.654874371859296e-05, - "loss": 5.2042, - "step": 33795 - }, - { - "epoch": 17.62503259452412, - "grad_norm": 1.359486699104309, - "learning_rate": 6.654773869346734e-05, - "loss": 4.5958, - "step": 33796 - }, - { - "epoch": 17.625554106910037, - "grad_norm": 1.3975286483764648, - "learning_rate": 6.65467336683417e-05, - "loss": 5.0587, - "step": 33797 - }, - { - "epoch": 17.626075619295957, - "grad_norm": 1.4145923852920532, - "learning_rate": 6.654572864321608e-05, - "loss": 5.576, - "step": 33798 - }, - { - "epoch": 17.626597131681876, - "grad_norm": 1.4635958671569824, - "learning_rate": 6.654472361809045e-05, - "loss": 5.3606, - "step": 33799 - }, - { - "epoch": 17.627118644067796, - "grad_norm": 1.4422733783721924, - "learning_rate": 6.654371859296483e-05, - "loss": 5.3383, - "step": 33800 - }, - { - "epoch": 17.627640156453715, - "grad_norm": 1.4125279188156128, - "learning_rate": 6.65427135678392e-05, - "loss": 5.7087, - "step": 33801 - }, - { - "epoch": 17.628161668839635, - "grad_norm": 1.5115190744400024, - "learning_rate": 6.654170854271357e-05, - "loss": 5.5336, - "step": 33802 - }, - { - "epoch": 17.628683181225554, - "grad_norm": 1.4467918872833252, - "learning_rate": 6.654070351758795e-05, - "loss": 5.4829, - "step": 33803 - }, - { - "epoch": 17.629204693611474, - "grad_norm": 1.4647235870361328, - "learning_rate": 6.653969849246231e-05, - "loss": 5.6157, - "step": 33804 - }, - { - "epoch": 17.629726205997393, - "grad_norm": 1.5650229454040527, - "learning_rate": 6.653869346733669e-05, - "loss": 4.8279, - "step": 33805 - }, - { - "epoch": 17.630247718383313, - "grad_norm": 1.6451488733291626, - "learning_rate": 6.653768844221105e-05, - "loss": 5.0798, - "step": 33806 - }, - { - "epoch": 17.630769230769232, - "grad_norm": 1.6121498346328735, - "learning_rate": 6.653668341708543e-05, - "loss": 5.0958, - "step": 33807 - }, - { - "epoch": 17.63129074315515, - "grad_norm": 1.5337408781051636, - "learning_rate": 6.65356783919598e-05, - "loss": 5.5043, - "step": 33808 - }, - { - "epoch": 17.631812255541067, - "grad_norm": 1.5322473049163818, - "learning_rate": 6.653467336683417e-05, - "loss": 5.4167, - "step": 33809 - }, - { - "epoch": 17.632333767926987, - "grad_norm": 1.513409972190857, - "learning_rate": 6.653366834170855e-05, - "loss": 5.469, - "step": 33810 - }, - { - "epoch": 17.632855280312906, - "grad_norm": 1.4426590204238892, - "learning_rate": 6.653266331658293e-05, - "loss": 5.3086, - "step": 33811 - }, - { - "epoch": 17.633376792698826, - "grad_norm": Infinity, - "learning_rate": 6.653266331658293e-05, - "loss": 4.8068, - "step": 33812 - }, - { - "epoch": 17.633898305084745, - "grad_norm": 1.4448779821395874, - "learning_rate": 6.653165829145729e-05, - "loss": 5.5443, - "step": 33813 - }, - { - "epoch": 17.634419817470665, - "grad_norm": 1.5438796281814575, - "learning_rate": 6.653065326633167e-05, - "loss": 4.8116, - "step": 33814 - }, - { - "epoch": 17.634941329856584, - "grad_norm": 1.4495450258255005, - "learning_rate": 6.652964824120603e-05, - "loss": 5.1423, - "step": 33815 - }, - { - "epoch": 17.635462842242504, - "grad_norm": 1.5102604627609253, - "learning_rate": 6.652864321608041e-05, - "loss": 5.2453, - "step": 33816 - }, - { - "epoch": 17.635984354628423, - "grad_norm": 1.6123223304748535, - "learning_rate": 6.652763819095478e-05, - "loss": 5.366, - "step": 33817 - }, - { - "epoch": 17.636505867014343, - "grad_norm": 1.5805411338806152, - "learning_rate": 6.652663316582914e-05, - "loss": 5.0092, - "step": 33818 - }, - { - "epoch": 17.637027379400262, - "grad_norm": 1.4305447340011597, - "learning_rate": 6.652562814070352e-05, - "loss": 5.4225, - "step": 33819 - }, - { - "epoch": 17.63754889178618, - "grad_norm": 1.5143747329711914, - "learning_rate": 6.652462311557788e-05, - "loss": 5.3565, - "step": 33820 - }, - { - "epoch": 17.638070404172097, - "grad_norm": 1.4664747714996338, - "learning_rate": 6.652361809045226e-05, - "loss": 5.2885, - "step": 33821 - }, - { - "epoch": 17.638591916558017, - "grad_norm": 1.4960533380508423, - "learning_rate": 6.652261306532664e-05, - "loss": 5.2998, - "step": 33822 - }, - { - "epoch": 17.639113428943936, - "grad_norm": 1.4765934944152832, - "learning_rate": 6.652160804020102e-05, - "loss": 5.3076, - "step": 33823 - }, - { - "epoch": 17.639634941329856, - "grad_norm": 1.4125553369522095, - "learning_rate": 6.652060301507538e-05, - "loss": 5.4089, - "step": 33824 - }, - { - "epoch": 17.640156453715775, - "grad_norm": 1.5316276550292969, - "learning_rate": 6.651959798994976e-05, - "loss": 5.1998, - "step": 33825 - }, - { - "epoch": 17.640677966101695, - "grad_norm": 1.4357267618179321, - "learning_rate": 6.651859296482412e-05, - "loss": 5.4879, - "step": 33826 - }, - { - "epoch": 17.641199478487614, - "grad_norm": 1.4521565437316895, - "learning_rate": 6.65175879396985e-05, - "loss": 5.0126, - "step": 33827 - }, - { - "epoch": 17.641720990873534, - "grad_norm": 1.5098233222961426, - "learning_rate": 6.651658291457286e-05, - "loss": 5.4746, - "step": 33828 - }, - { - "epoch": 17.642242503259453, - "grad_norm": 1.4050415754318237, - "learning_rate": 6.651557788944724e-05, - "loss": 4.963, - "step": 33829 - }, - { - "epoch": 17.642764015645373, - "grad_norm": 1.4532086849212646, - "learning_rate": 6.65145728643216e-05, - "loss": 5.5061, - "step": 33830 - }, - { - "epoch": 17.643285528031292, - "grad_norm": 1.458490014076233, - "learning_rate": 6.651356783919598e-05, - "loss": 5.2467, - "step": 33831 - }, - { - "epoch": 17.64380704041721, - "grad_norm": 1.401644229888916, - "learning_rate": 6.651256281407036e-05, - "loss": 5.7751, - "step": 33832 - }, - { - "epoch": 17.644328552803128, - "grad_norm": 1.4745346307754517, - "learning_rate": 6.651155778894473e-05, - "loss": 5.0297, - "step": 33833 - }, - { - "epoch": 17.644850065189047, - "grad_norm": 1.5701780319213867, - "learning_rate": 6.65105527638191e-05, - "loss": 5.2247, - "step": 33834 - }, - { - "epoch": 17.645371577574966, - "grad_norm": 1.446345329284668, - "learning_rate": 6.650954773869347e-05, - "loss": 5.6006, - "step": 33835 - }, - { - "epoch": 17.645893089960886, - "grad_norm": 1.4327127933502197, - "learning_rate": 6.650854271356785e-05, - "loss": 5.5744, - "step": 33836 - }, - { - "epoch": 17.646414602346805, - "grad_norm": 1.4238333702087402, - "learning_rate": 6.650753768844221e-05, - "loss": 5.4359, - "step": 33837 - }, - { - "epoch": 17.646936114732725, - "grad_norm": 1.53669011592865, - "learning_rate": 6.650653266331659e-05, - "loss": 5.3665, - "step": 33838 - }, - { - "epoch": 17.647457627118644, - "grad_norm": 1.5340012311935425, - "learning_rate": 6.650552763819095e-05, - "loss": 5.0239, - "step": 33839 - }, - { - "epoch": 17.647979139504564, - "grad_norm": 1.449972152709961, - "learning_rate": 6.650452261306533e-05, - "loss": 5.3324, - "step": 33840 - }, - { - "epoch": 17.648500651890483, - "grad_norm": 1.6092926263809204, - "learning_rate": 6.65035175879397e-05, - "loss": 5.0087, - "step": 33841 - }, - { - "epoch": 17.649022164276403, - "grad_norm": 1.451465129852295, - "learning_rate": 6.650251256281407e-05, - "loss": 5.4494, - "step": 33842 - }, - { - "epoch": 17.649543676662322, - "grad_norm": 1.4876304864883423, - "learning_rate": 6.650150753768845e-05, - "loss": 5.4229, - "step": 33843 - }, - { - "epoch": 17.65006518904824, - "grad_norm": 1.3650668859481812, - "learning_rate": 6.650050251256281e-05, - "loss": 5.4394, - "step": 33844 - }, - { - "epoch": 17.650586701434158, - "grad_norm": 1.3925808668136597, - "learning_rate": 6.649949748743719e-05, - "loss": 5.7454, - "step": 33845 - }, - { - "epoch": 17.651108213820077, - "grad_norm": 1.3614554405212402, - "learning_rate": 6.649849246231156e-05, - "loss": 5.6958, - "step": 33846 - }, - { - "epoch": 17.651629726205996, - "grad_norm": 1.4332492351531982, - "learning_rate": 6.649748743718593e-05, - "loss": 5.6354, - "step": 33847 - }, - { - "epoch": 17.652151238591916, - "grad_norm": 1.456476092338562, - "learning_rate": 6.64964824120603e-05, - "loss": 5.7842, - "step": 33848 - }, - { - "epoch": 17.652672750977835, - "grad_norm": 1.494954228401184, - "learning_rate": 6.649547738693468e-05, - "loss": 5.399, - "step": 33849 - }, - { - "epoch": 17.653194263363755, - "grad_norm": 1.4491567611694336, - "learning_rate": 6.649447236180904e-05, - "loss": 5.5687, - "step": 33850 - }, - { - "epoch": 17.653715775749674, - "grad_norm": 1.4236538410186768, - "learning_rate": 6.649346733668342e-05, - "loss": 5.5914, - "step": 33851 - }, - { - "epoch": 17.654237288135594, - "grad_norm": 1.4422253370285034, - "learning_rate": 6.64924623115578e-05, - "loss": 5.5362, - "step": 33852 - }, - { - "epoch": 17.654758800521513, - "grad_norm": 1.4629368782043457, - "learning_rate": 6.649145728643217e-05, - "loss": 5.5788, - "step": 33853 - }, - { - "epoch": 17.655280312907433, - "grad_norm": 1.4266023635864258, - "learning_rate": 6.649045226130654e-05, - "loss": 5.3217, - "step": 33854 - }, - { - "epoch": 17.655801825293352, - "grad_norm": 1.4266482591629028, - "learning_rate": 6.648944723618092e-05, - "loss": 5.4001, - "step": 33855 - }, - { - "epoch": 17.656323337679268, - "grad_norm": 1.477031946182251, - "learning_rate": 6.648844221105528e-05, - "loss": 5.2308, - "step": 33856 - }, - { - "epoch": 17.656844850065188, - "grad_norm": 1.466384768486023, - "learning_rate": 6.648743718592965e-05, - "loss": 5.5058, - "step": 33857 - }, - { - "epoch": 17.657366362451107, - "grad_norm": 1.3970139026641846, - "learning_rate": 6.648643216080402e-05, - "loss": 5.8083, - "step": 33858 - }, - { - "epoch": 17.657887874837026, - "grad_norm": 1.4201661348342896, - "learning_rate": 6.648542713567839e-05, - "loss": 5.2677, - "step": 33859 - }, - { - "epoch": 17.658409387222946, - "grad_norm": 1.4833662509918213, - "learning_rate": 6.648442211055277e-05, - "loss": 5.3023, - "step": 33860 - }, - { - "epoch": 17.658930899608865, - "grad_norm": 1.3651678562164307, - "learning_rate": 6.648341708542713e-05, - "loss": 5.4226, - "step": 33861 - }, - { - "epoch": 17.659452411994785, - "grad_norm": 1.3860450983047485, - "learning_rate": 6.648241206030151e-05, - "loss": 5.4318, - "step": 33862 - }, - { - "epoch": 17.659973924380704, - "grad_norm": 1.4952644109725952, - "learning_rate": 6.648140703517589e-05, - "loss": 5.1795, - "step": 33863 - }, - { - "epoch": 17.660495436766624, - "grad_norm": 1.484712839126587, - "learning_rate": 6.648040201005026e-05, - "loss": 5.4457, - "step": 33864 - }, - { - "epoch": 17.661016949152543, - "grad_norm": 1.6080079078674316, - "learning_rate": 6.647939698492463e-05, - "loss": 4.7939, - "step": 33865 - }, - { - "epoch": 17.661538461538463, - "grad_norm": 1.4690585136413574, - "learning_rate": 6.6478391959799e-05, - "loss": 5.1035, - "step": 33866 - }, - { - "epoch": 17.662059973924382, - "grad_norm": 1.4821181297302246, - "learning_rate": 6.647738693467337e-05, - "loss": 5.1969, - "step": 33867 - }, - { - "epoch": 17.6625814863103, - "grad_norm": 1.412678837776184, - "learning_rate": 6.647638190954775e-05, - "loss": 5.8397, - "step": 33868 - }, - { - "epoch": 17.663102998696218, - "grad_norm": 1.3921016454696655, - "learning_rate": 6.647537688442211e-05, - "loss": 5.4288, - "step": 33869 - }, - { - "epoch": 17.663624511082137, - "grad_norm": 1.5297200679779053, - "learning_rate": 6.647437185929648e-05, - "loss": 5.6231, - "step": 33870 - }, - { - "epoch": 17.664146023468057, - "grad_norm": 1.4491889476776123, - "learning_rate": 6.647336683417085e-05, - "loss": 5.3223, - "step": 33871 - }, - { - "epoch": 17.664667535853976, - "grad_norm": 1.6279913187026978, - "learning_rate": 6.647236180904523e-05, - "loss": 5.179, - "step": 33872 - }, - { - "epoch": 17.665189048239895, - "grad_norm": 1.521485447883606, - "learning_rate": 6.647135678391961e-05, - "loss": 4.9657, - "step": 33873 - }, - { - "epoch": 17.665710560625815, - "grad_norm": 1.503202199935913, - "learning_rate": 6.647035175879397e-05, - "loss": 5.2446, - "step": 33874 - }, - { - "epoch": 17.666232073011734, - "grad_norm": 1.4568456411361694, - "learning_rate": 6.646934673366835e-05, - "loss": 5.2215, - "step": 33875 - }, - { - "epoch": 17.666753585397654, - "grad_norm": 1.3720821142196655, - "learning_rate": 6.646834170854272e-05, - "loss": 5.6146, - "step": 33876 - }, - { - "epoch": 17.667275097783573, - "grad_norm": 2.3169100284576416, - "learning_rate": 6.64673366834171e-05, - "loss": 4.9517, - "step": 33877 - }, - { - "epoch": 17.667796610169493, - "grad_norm": 1.475656270980835, - "learning_rate": 6.646633165829146e-05, - "loss": 5.371, - "step": 33878 - }, - { - "epoch": 17.668318122555412, - "grad_norm": 1.4679182767868042, - "learning_rate": 6.646532663316584e-05, - "loss": 5.5775, - "step": 33879 - }, - { - "epoch": 17.668839634941328, - "grad_norm": 1.5183557271957397, - "learning_rate": 6.64643216080402e-05, - "loss": 5.0717, - "step": 33880 - }, - { - "epoch": 17.669361147327248, - "grad_norm": 1.4274855852127075, - "learning_rate": 6.646331658291458e-05, - "loss": 5.4067, - "step": 33881 - }, - { - "epoch": 17.669882659713167, - "grad_norm": 1.5732702016830444, - "learning_rate": 6.646231155778894e-05, - "loss": 5.2105, - "step": 33882 - }, - { - "epoch": 17.670404172099087, - "grad_norm": 1.493910312652588, - "learning_rate": 6.646130653266332e-05, - "loss": 5.5915, - "step": 33883 - }, - { - "epoch": 17.670925684485006, - "grad_norm": 1.6213699579238892, - "learning_rate": 6.64603015075377e-05, - "loss": 5.1789, - "step": 33884 - }, - { - "epoch": 17.671447196870925, - "grad_norm": 1.6003233194351196, - "learning_rate": 6.645929648241206e-05, - "loss": 5.3253, - "step": 33885 - }, - { - "epoch": 17.671968709256845, - "grad_norm": 1.6233675479888916, - "learning_rate": 6.645829145728644e-05, - "loss": 5.2121, - "step": 33886 - }, - { - "epoch": 17.672490221642764, - "grad_norm": 1.5195295810699463, - "learning_rate": 6.64572864321608e-05, - "loss": 5.1551, - "step": 33887 - }, - { - "epoch": 17.673011734028684, - "grad_norm": 1.4437282085418701, - "learning_rate": 6.645628140703518e-05, - "loss": 5.4272, - "step": 33888 - }, - { - "epoch": 17.673533246414603, - "grad_norm": 1.4974250793457031, - "learning_rate": 6.645527638190955e-05, - "loss": 5.1523, - "step": 33889 - }, - { - "epoch": 17.674054758800523, - "grad_norm": 1.5292688608169556, - "learning_rate": 6.645427135678392e-05, - "loss": 5.2353, - "step": 33890 - }, - { - "epoch": 17.674576271186442, - "grad_norm": 1.431607723236084, - "learning_rate": 6.645326633165829e-05, - "loss": 5.3304, - "step": 33891 - }, - { - "epoch": 17.675097783572358, - "grad_norm": 1.4660619497299194, - "learning_rate": 6.645226130653267e-05, - "loss": 5.6571, - "step": 33892 - }, - { - "epoch": 17.675619295958278, - "grad_norm": 1.4676519632339478, - "learning_rate": 6.645125628140704e-05, - "loss": 5.3781, - "step": 33893 - }, - { - "epoch": 17.676140808344197, - "grad_norm": 1.4927010536193848, - "learning_rate": 6.645025125628142e-05, - "loss": 5.5255, - "step": 33894 - }, - { - "epoch": 17.676662320730117, - "grad_norm": 1.4480130672454834, - "learning_rate": 6.644924623115579e-05, - "loss": 5.6221, - "step": 33895 - }, - { - "epoch": 17.677183833116036, - "grad_norm": 1.4894375801086426, - "learning_rate": 6.644824120603015e-05, - "loss": 5.4627, - "step": 33896 - }, - { - "epoch": 17.677705345501955, - "grad_norm": 1.5619641542434692, - "learning_rate": 6.644723618090453e-05, - "loss": 5.4296, - "step": 33897 - }, - { - "epoch": 17.678226857887875, - "grad_norm": 1.5440536737442017, - "learning_rate": 6.644623115577889e-05, - "loss": 5.445, - "step": 33898 - }, - { - "epoch": 17.678748370273794, - "grad_norm": 1.5323095321655273, - "learning_rate": 6.644522613065327e-05, - "loss": 5.3187, - "step": 33899 - }, - { - "epoch": 17.679269882659714, - "grad_norm": 1.5132790803909302, - "learning_rate": 6.644422110552763e-05, - "loss": 5.613, - "step": 33900 - }, - { - "epoch": 17.679791395045633, - "grad_norm": 1.6001657247543335, - "learning_rate": 6.644321608040201e-05, - "loss": 4.9786, - "step": 33901 - }, - { - "epoch": 17.680312907431553, - "grad_norm": 1.5349171161651611, - "learning_rate": 6.644221105527638e-05, - "loss": 5.2091, - "step": 33902 - }, - { - "epoch": 17.680834419817472, - "grad_norm": 1.5584683418273926, - "learning_rate": 6.644120603015075e-05, - "loss": 4.927, - "step": 33903 - }, - { - "epoch": 17.681355932203388, - "grad_norm": 1.443010687828064, - "learning_rate": 6.644020100502513e-05, - "loss": 4.8382, - "step": 33904 - }, - { - "epoch": 17.681877444589308, - "grad_norm": 1.4086185693740845, - "learning_rate": 6.643919597989951e-05, - "loss": 5.3392, - "step": 33905 - }, - { - "epoch": 17.682398956975227, - "grad_norm": 1.454107642173767, - "learning_rate": 6.643819095477387e-05, - "loss": 5.2746, - "step": 33906 - }, - { - "epoch": 17.682920469361147, - "grad_norm": 1.4081306457519531, - "learning_rate": 6.643718592964825e-05, - "loss": 5.1607, - "step": 33907 - }, - { - "epoch": 17.683441981747066, - "grad_norm": 1.4575961828231812, - "learning_rate": 6.643618090452262e-05, - "loss": 5.5815, - "step": 33908 - }, - { - "epoch": 17.683963494132986, - "grad_norm": 1.4838870763778687, - "learning_rate": 6.6435175879397e-05, - "loss": 5.239, - "step": 33909 - }, - { - "epoch": 17.684485006518905, - "grad_norm": 1.3310797214508057, - "learning_rate": 6.643417085427136e-05, - "loss": 5.9214, - "step": 33910 - }, - { - "epoch": 17.685006518904824, - "grad_norm": 1.4356521368026733, - "learning_rate": 6.643316582914572e-05, - "loss": 5.4567, - "step": 33911 - }, - { - "epoch": 17.685528031290744, - "grad_norm": 1.4327622652053833, - "learning_rate": 6.64321608040201e-05, - "loss": 4.9907, - "step": 33912 - }, - { - "epoch": 17.686049543676663, - "grad_norm": 1.5596448183059692, - "learning_rate": 6.643115577889448e-05, - "loss": 5.248, - "step": 33913 - }, - { - "epoch": 17.686571056062583, - "grad_norm": 1.4986680746078491, - "learning_rate": 6.643015075376886e-05, - "loss": 5.2715, - "step": 33914 - }, - { - "epoch": 17.687092568448502, - "grad_norm": 1.4337475299835205, - "learning_rate": 6.642914572864322e-05, - "loss": 5.3356, - "step": 33915 - }, - { - "epoch": 17.687614080834418, - "grad_norm": 1.3556197881698608, - "learning_rate": 6.64281407035176e-05, - "loss": 5.6934, - "step": 33916 - }, - { - "epoch": 17.688135593220338, - "grad_norm": 1.4887497425079346, - "learning_rate": 6.642713567839196e-05, - "loss": 5.4294, - "step": 33917 - }, - { - "epoch": 17.688657105606257, - "grad_norm": 1.4488849639892578, - "learning_rate": 6.642613065326634e-05, - "loss": 4.9872, - "step": 33918 - }, - { - "epoch": 17.689178617992177, - "grad_norm": 1.4602442979812622, - "learning_rate": 6.64251256281407e-05, - "loss": 5.2515, - "step": 33919 - }, - { - "epoch": 17.689700130378096, - "grad_norm": 1.4292163848876953, - "learning_rate": 6.642412060301508e-05, - "loss": 5.8074, - "step": 33920 - }, - { - "epoch": 17.690221642764016, - "grad_norm": 1.3874404430389404, - "learning_rate": 6.642311557788945e-05, - "loss": 5.6882, - "step": 33921 - }, - { - "epoch": 17.690743155149935, - "grad_norm": 1.546589732170105, - "learning_rate": 6.642211055276382e-05, - "loss": 4.545, - "step": 33922 - }, - { - "epoch": 17.691264667535854, - "grad_norm": 1.4791722297668457, - "learning_rate": 6.642110552763819e-05, - "loss": 5.3847, - "step": 33923 - }, - { - "epoch": 17.691786179921774, - "grad_norm": 1.4502544403076172, - "learning_rate": 6.642010050251257e-05, - "loss": 5.519, - "step": 33924 - }, - { - "epoch": 17.692307692307693, - "grad_norm": 1.47048819065094, - "learning_rate": 6.641909547738694e-05, - "loss": 5.526, - "step": 33925 - }, - { - "epoch": 17.692829204693613, - "grad_norm": 1.4873194694519043, - "learning_rate": 6.641809045226131e-05, - "loss": 5.2702, - "step": 33926 - }, - { - "epoch": 17.693350717079532, - "grad_norm": 1.5172910690307617, - "learning_rate": 6.641708542713569e-05, - "loss": 5.4209, - "step": 33927 - }, - { - "epoch": 17.69387222946545, - "grad_norm": 1.6104000806808472, - "learning_rate": 6.641608040201005e-05, - "loss": 5.0777, - "step": 33928 - }, - { - "epoch": 17.694393741851368, - "grad_norm": 1.5024957656860352, - "learning_rate": 6.641507537688443e-05, - "loss": 5.2314, - "step": 33929 - }, - { - "epoch": 17.694915254237287, - "grad_norm": 1.4198925495147705, - "learning_rate": 6.641407035175879e-05, - "loss": 4.2081, - "step": 33930 - }, - { - "epoch": 17.695436766623207, - "grad_norm": 1.4405436515808105, - "learning_rate": 6.641306532663317e-05, - "loss": 4.6771, - "step": 33931 - }, - { - "epoch": 17.695958279009126, - "grad_norm": 1.4244331121444702, - "learning_rate": 6.641206030150754e-05, - "loss": 5.7691, - "step": 33932 - }, - { - "epoch": 17.696479791395046, - "grad_norm": 1.4938485622406006, - "learning_rate": 6.641105527638191e-05, - "loss": 5.0114, - "step": 33933 - }, - { - "epoch": 17.697001303780965, - "grad_norm": 1.5359841585159302, - "learning_rate": 6.641005025125628e-05, - "loss": 5.1511, - "step": 33934 - }, - { - "epoch": 17.697522816166884, - "grad_norm": 1.4772708415985107, - "learning_rate": 6.640904522613065e-05, - "loss": 5.4924, - "step": 33935 - }, - { - "epoch": 17.698044328552804, - "grad_norm": 1.5132079124450684, - "learning_rate": 6.640804020100503e-05, - "loss": 5.0828, - "step": 33936 - }, - { - "epoch": 17.698565840938723, - "grad_norm": 1.5220805406570435, - "learning_rate": 6.64070351758794e-05, - "loss": 5.1595, - "step": 33937 - }, - { - "epoch": 17.699087353324643, - "grad_norm": 1.5240825414657593, - "learning_rate": 6.640603015075377e-05, - "loss": 5.2319, - "step": 33938 - }, - { - "epoch": 17.69960886571056, - "grad_norm": 1.4871296882629395, - "learning_rate": 6.640502512562814e-05, - "loss": 5.3343, - "step": 33939 - }, - { - "epoch": 17.70013037809648, - "grad_norm": 1.6370012760162354, - "learning_rate": 6.640402010050252e-05, - "loss": 4.7728, - "step": 33940 - }, - { - "epoch": 17.700651890482398, - "grad_norm": 1.4474058151245117, - "learning_rate": 6.640301507537688e-05, - "loss": 5.7329, - "step": 33941 - }, - { - "epoch": 17.701173402868317, - "grad_norm": 1.4073457717895508, - "learning_rate": 6.640201005025126e-05, - "loss": 5.2567, - "step": 33942 - }, - { - "epoch": 17.701694915254237, - "grad_norm": 1.4269779920578003, - "learning_rate": 6.640100502512562e-05, - "loss": 5.0222, - "step": 33943 - }, - { - "epoch": 17.702216427640156, - "grad_norm": 1.5119004249572754, - "learning_rate": 6.64e-05, - "loss": 5.3722, - "step": 33944 - }, - { - "epoch": 17.702737940026076, - "grad_norm": 1.5115355253219604, - "learning_rate": 6.639899497487438e-05, - "loss": 5.1401, - "step": 33945 - }, - { - "epoch": 17.703259452411995, - "grad_norm": 1.4069793224334717, - "learning_rate": 6.639798994974876e-05, - "loss": 4.9536, - "step": 33946 - }, - { - "epoch": 17.703780964797915, - "grad_norm": 1.4682292938232422, - "learning_rate": 6.639698492462312e-05, - "loss": 5.181, - "step": 33947 - }, - { - "epoch": 17.704302477183834, - "grad_norm": 1.5309064388275146, - "learning_rate": 6.63959798994975e-05, - "loss": 5.1188, - "step": 33948 - }, - { - "epoch": 17.704823989569753, - "grad_norm": 1.5165921449661255, - "learning_rate": 6.639497487437186e-05, - "loss": 5.3185, - "step": 33949 - }, - { - "epoch": 17.705345501955673, - "grad_norm": 1.5475646257400513, - "learning_rate": 6.639396984924623e-05, - "loss": 5.5732, - "step": 33950 - }, - { - "epoch": 17.705867014341592, - "grad_norm": 1.5823655128479004, - "learning_rate": 6.63929648241206e-05, - "loss": 5.1366, - "step": 33951 - }, - { - "epoch": 17.70638852672751, - "grad_norm": 1.523358941078186, - "learning_rate": 6.639195979899497e-05, - "loss": 5.1217, - "step": 33952 - }, - { - "epoch": 17.706910039113428, - "grad_norm": 1.4531477689743042, - "learning_rate": 6.639095477386935e-05, - "loss": 5.2526, - "step": 33953 - }, - { - "epoch": 17.707431551499347, - "grad_norm": 1.4679646492004395, - "learning_rate": 6.638994974874371e-05, - "loss": 5.1577, - "step": 33954 - }, - { - "epoch": 17.707953063885267, - "grad_norm": 1.584568738937378, - "learning_rate": 6.638894472361809e-05, - "loss": 5.4387, - "step": 33955 - }, - { - "epoch": 17.708474576271186, - "grad_norm": 1.4758305549621582, - "learning_rate": 6.638793969849247e-05, - "loss": 5.1792, - "step": 33956 - }, - { - "epoch": 17.708996088657106, - "grad_norm": 1.5371962785720825, - "learning_rate": 6.638693467336685e-05, - "loss": 4.9521, - "step": 33957 - }, - { - "epoch": 17.709517601043025, - "grad_norm": 1.5461273193359375, - "learning_rate": 6.638592964824121e-05, - "loss": 5.164, - "step": 33958 - }, - { - "epoch": 17.710039113428945, - "grad_norm": 1.4966219663619995, - "learning_rate": 6.638492462311559e-05, - "loss": 5.6426, - "step": 33959 - }, - { - "epoch": 17.710560625814864, - "grad_norm": 1.3943805694580078, - "learning_rate": 6.638391959798995e-05, - "loss": 5.7572, - "step": 33960 - }, - { - "epoch": 17.711082138200783, - "grad_norm": 1.5811325311660767, - "learning_rate": 6.638291457286433e-05, - "loss": 5.1492, - "step": 33961 - }, - { - "epoch": 17.711603650586703, - "grad_norm": 1.54188871383667, - "learning_rate": 6.63819095477387e-05, - "loss": 5.3182, - "step": 33962 - }, - { - "epoch": 17.71212516297262, - "grad_norm": 1.768578290939331, - "learning_rate": 6.638090452261306e-05, - "loss": 4.8658, - "step": 33963 - }, - { - "epoch": 17.71264667535854, - "grad_norm": 1.583997368812561, - "learning_rate": 6.637989949748744e-05, - "loss": 5.3972, - "step": 33964 - }, - { - "epoch": 17.713168187744458, - "grad_norm": 1.5256150960922241, - "learning_rate": 6.637889447236181e-05, - "loss": 5.3428, - "step": 33965 - }, - { - "epoch": 17.713689700130377, - "grad_norm": 1.6452378034591675, - "learning_rate": 6.637788944723619e-05, - "loss": 5.1093, - "step": 33966 - }, - { - "epoch": 17.714211212516297, - "grad_norm": 1.4007986783981323, - "learning_rate": 6.637688442211056e-05, - "loss": 5.4862, - "step": 33967 - }, - { - "epoch": 17.714732724902216, - "grad_norm": 1.4481621980667114, - "learning_rate": 6.637587939698493e-05, - "loss": 5.6233, - "step": 33968 - }, - { - "epoch": 17.715254237288136, - "grad_norm": 1.5582596063613892, - "learning_rate": 6.63748743718593e-05, - "loss": 5.2842, - "step": 33969 - }, - { - "epoch": 17.715775749674055, - "grad_norm": 1.5077180862426758, - "learning_rate": 6.637386934673368e-05, - "loss": 5.1885, - "step": 33970 - }, - { - "epoch": 17.716297262059975, - "grad_norm": 1.4837169647216797, - "learning_rate": 6.637286432160804e-05, - "loss": 5.4177, - "step": 33971 - }, - { - "epoch": 17.716818774445894, - "grad_norm": 1.4707247018814087, - "learning_rate": 6.637185929648242e-05, - "loss": 5.1212, - "step": 33972 - }, - { - "epoch": 17.717340286831814, - "grad_norm": 1.5598130226135254, - "learning_rate": 6.637085427135678e-05, - "loss": 4.7059, - "step": 33973 - }, - { - "epoch": 17.717861799217733, - "grad_norm": 1.4122270345687866, - "learning_rate": 6.636984924623116e-05, - "loss": 5.567, - "step": 33974 - }, - { - "epoch": 17.71838331160365, - "grad_norm": 1.4736398458480835, - "learning_rate": 6.636884422110552e-05, - "loss": 5.5962, - "step": 33975 - }, - { - "epoch": 17.71890482398957, - "grad_norm": 1.4897257089614868, - "learning_rate": 6.63678391959799e-05, - "loss": 5.5385, - "step": 33976 - }, - { - "epoch": 17.719426336375488, - "grad_norm": 1.3955022096633911, - "learning_rate": 6.636683417085428e-05, - "loss": 5.4917, - "step": 33977 - }, - { - "epoch": 17.719947848761407, - "grad_norm": 1.296190857887268, - "learning_rate": 6.636582914572864e-05, - "loss": 4.9687, - "step": 33978 - }, - { - "epoch": 17.720469361147327, - "grad_norm": 1.4385679960250854, - "learning_rate": 6.636482412060302e-05, - "loss": 5.411, - "step": 33979 - }, - { - "epoch": 17.720990873533246, - "grad_norm": 1.4437214136123657, - "learning_rate": 6.636381909547739e-05, - "loss": 5.4045, - "step": 33980 - }, - { - "epoch": 17.721512385919166, - "grad_norm": 1.5357213020324707, - "learning_rate": 6.636281407035176e-05, - "loss": 5.1633, - "step": 33981 - }, - { - "epoch": 17.722033898305085, - "grad_norm": 1.5092896223068237, - "learning_rate": 6.636180904522613e-05, - "loss": 5.2877, - "step": 33982 - }, - { - "epoch": 17.722555410691005, - "grad_norm": 1.53424072265625, - "learning_rate": 6.63608040201005e-05, - "loss": 5.1418, - "step": 33983 - }, - { - "epoch": 17.723076923076924, - "grad_norm": 1.4948614835739136, - "learning_rate": 6.635979899497487e-05, - "loss": 5.4838, - "step": 33984 - }, - { - "epoch": 17.723598435462844, - "grad_norm": 1.5220615863800049, - "learning_rate": 6.635879396984925e-05, - "loss": 5.2881, - "step": 33985 - }, - { - "epoch": 17.724119947848763, - "grad_norm": 1.5628411769866943, - "learning_rate": 6.635778894472363e-05, - "loss": 5.1657, - "step": 33986 - }, - { - "epoch": 17.72464146023468, - "grad_norm": 1.518025517463684, - "learning_rate": 6.6356783919598e-05, - "loss": 5.1628, - "step": 33987 - }, - { - "epoch": 17.7251629726206, - "grad_norm": 1.4870736598968506, - "learning_rate": 6.635577889447237e-05, - "loss": 5.262, - "step": 33988 - }, - { - "epoch": 17.725684485006518, - "grad_norm": 1.4452954530715942, - "learning_rate": 6.635477386934673e-05, - "loss": 5.4286, - "step": 33989 - }, - { - "epoch": 17.726205997392437, - "grad_norm": 1.511130928993225, - "learning_rate": 6.635376884422111e-05, - "loss": 5.3685, - "step": 33990 - }, - { - "epoch": 17.726727509778357, - "grad_norm": 1.4646263122558594, - "learning_rate": 6.635276381909547e-05, - "loss": 5.63, - "step": 33991 - }, - { - "epoch": 17.727249022164276, - "grad_norm": 1.4366792440414429, - "learning_rate": 6.635175879396985e-05, - "loss": 5.4624, - "step": 33992 - }, - { - "epoch": 17.727770534550196, - "grad_norm": 1.5110094547271729, - "learning_rate": 6.635075376884422e-05, - "loss": 5.326, - "step": 33993 - }, - { - "epoch": 17.728292046936115, - "grad_norm": 1.5028971433639526, - "learning_rate": 6.63497487437186e-05, - "loss": 5.508, - "step": 33994 - }, - { - "epoch": 17.728813559322035, - "grad_norm": 1.5843050479888916, - "learning_rate": 6.634874371859296e-05, - "loss": 4.9835, - "step": 33995 - }, - { - "epoch": 17.729335071707954, - "grad_norm": 1.4506911039352417, - "learning_rate": 6.634773869346734e-05, - "loss": 5.5396, - "step": 33996 - }, - { - "epoch": 17.729856584093874, - "grad_norm": 1.4093742370605469, - "learning_rate": 6.634673366834171e-05, - "loss": 5.3419, - "step": 33997 - }, - { - "epoch": 17.730378096479793, - "grad_norm": 1.4181358814239502, - "learning_rate": 6.634572864321609e-05, - "loss": 5.2916, - "step": 33998 - }, - { - "epoch": 17.73089960886571, - "grad_norm": 1.4684287309646606, - "learning_rate": 6.634472361809046e-05, - "loss": 5.5967, - "step": 33999 - }, - { - "epoch": 17.73142112125163, - "grad_norm": 1.5258405208587646, - "learning_rate": 6.634371859296483e-05, - "loss": 5.3881, - "step": 34000 - }, - { - "epoch": 17.731942633637548, - "grad_norm": 1.5774190425872803, - "learning_rate": 6.63427135678392e-05, - "loss": 5.2617, - "step": 34001 - }, - { - "epoch": 17.732464146023467, - "grad_norm": 1.6051017045974731, - "learning_rate": 6.634170854271358e-05, - "loss": 4.7995, - "step": 34002 - }, - { - "epoch": 17.732985658409387, - "grad_norm": 1.6786003112792969, - "learning_rate": 6.634070351758794e-05, - "loss": 5.1396, - "step": 34003 - }, - { - "epoch": 17.733507170795306, - "grad_norm": 1.4347326755523682, - "learning_rate": 6.63396984924623e-05, - "loss": 5.3605, - "step": 34004 - }, - { - "epoch": 17.734028683181226, - "grad_norm": 1.3448669910430908, - "learning_rate": 6.633869346733668e-05, - "loss": 5.3233, - "step": 34005 - }, - { - "epoch": 17.734550195567145, - "grad_norm": 1.4563310146331787, - "learning_rate": 6.633768844221106e-05, - "loss": 5.6125, - "step": 34006 - }, - { - "epoch": 17.735071707953065, - "grad_norm": 1.4407594203948975, - "learning_rate": 6.633668341708544e-05, - "loss": 5.4792, - "step": 34007 - }, - { - "epoch": 17.735593220338984, - "grad_norm": 1.5474656820297241, - "learning_rate": 6.63356783919598e-05, - "loss": 5.3747, - "step": 34008 - }, - { - "epoch": 17.736114732724904, - "grad_norm": 1.5788911581039429, - "learning_rate": 6.633467336683418e-05, - "loss": 4.9608, - "step": 34009 - }, - { - "epoch": 17.736636245110823, - "grad_norm": 1.584781289100647, - "learning_rate": 6.633366834170854e-05, - "loss": 5.238, - "step": 34010 - }, - { - "epoch": 17.73715775749674, - "grad_norm": 1.44032621383667, - "learning_rate": 6.633266331658292e-05, - "loss": 5.5393, - "step": 34011 - }, - { - "epoch": 17.73767926988266, - "grad_norm": 1.4892401695251465, - "learning_rate": 6.633165829145729e-05, - "loss": 5.1824, - "step": 34012 - }, - { - "epoch": 17.738200782268578, - "grad_norm": 1.5925078392028809, - "learning_rate": 6.633065326633166e-05, - "loss": 4.8028, - "step": 34013 - }, - { - "epoch": 17.738722294654497, - "grad_norm": 1.4390100240707397, - "learning_rate": 6.632964824120603e-05, - "loss": 5.6108, - "step": 34014 - }, - { - "epoch": 17.739243807040417, - "grad_norm": 1.498437762260437, - "learning_rate": 6.632864321608041e-05, - "loss": 5.3658, - "step": 34015 - }, - { - "epoch": 17.739765319426336, - "grad_norm": 1.5017285346984863, - "learning_rate": 6.632763819095477e-05, - "loss": 5.2876, - "step": 34016 - }, - { - "epoch": 17.740286831812256, - "grad_norm": 1.3372889757156372, - "learning_rate": 6.632663316582915e-05, - "loss": 4.9044, - "step": 34017 - }, - { - "epoch": 17.740808344198175, - "grad_norm": 1.4238203763961792, - "learning_rate": 6.632562814070353e-05, - "loss": 5.4756, - "step": 34018 - }, - { - "epoch": 17.741329856584095, - "grad_norm": 1.5349379777908325, - "learning_rate": 6.632462311557789e-05, - "loss": 5.4139, - "step": 34019 - }, - { - "epoch": 17.741851368970014, - "grad_norm": 1.5294885635375977, - "learning_rate": 6.632361809045227e-05, - "loss": 5.3083, - "step": 34020 - }, - { - "epoch": 17.742372881355934, - "grad_norm": 1.5907020568847656, - "learning_rate": 6.632261306532663e-05, - "loss": 5.4858, - "step": 34021 - }, - { - "epoch": 17.742894393741853, - "grad_norm": 1.4713807106018066, - "learning_rate": 6.632160804020101e-05, - "loss": 5.2902, - "step": 34022 - }, - { - "epoch": 17.74341590612777, - "grad_norm": 1.386351227760315, - "learning_rate": 6.632060301507538e-05, - "loss": 5.7511, - "step": 34023 - }, - { - "epoch": 17.74393741851369, - "grad_norm": 1.48618745803833, - "learning_rate": 6.631959798994975e-05, - "loss": 5.4261, - "step": 34024 - }, - { - "epoch": 17.744458930899608, - "grad_norm": 1.4960335493087769, - "learning_rate": 6.631859296482412e-05, - "loss": 5.6106, - "step": 34025 - }, - { - "epoch": 17.744980443285527, - "grad_norm": 1.4809279441833496, - "learning_rate": 6.63175879396985e-05, - "loss": 5.2712, - "step": 34026 - }, - { - "epoch": 17.745501955671447, - "grad_norm": 1.5545375347137451, - "learning_rate": 6.631658291457287e-05, - "loss": 5.1411, - "step": 34027 - }, - { - "epoch": 17.746023468057366, - "grad_norm": 1.4441710710525513, - "learning_rate": 6.631557788944725e-05, - "loss": 5.5798, - "step": 34028 - }, - { - "epoch": 17.746544980443286, - "grad_norm": 1.6738885641098022, - "learning_rate": 6.631457286432162e-05, - "loss": 4.7865, - "step": 34029 - }, - { - "epoch": 17.747066492829205, - "grad_norm": 1.3681094646453857, - "learning_rate": 6.631356783919598e-05, - "loss": 5.6539, - "step": 34030 - }, - { - "epoch": 17.747588005215125, - "grad_norm": 1.6925127506256104, - "learning_rate": 6.631256281407036e-05, - "loss": 5.002, - "step": 34031 - }, - { - "epoch": 17.748109517601044, - "grad_norm": 1.5873538255691528, - "learning_rate": 6.631155778894472e-05, - "loss": 5.3818, - "step": 34032 - }, - { - "epoch": 17.748631029986964, - "grad_norm": 1.524775505065918, - "learning_rate": 6.63105527638191e-05, - "loss": 5.767, - "step": 34033 - }, - { - "epoch": 17.749152542372883, - "grad_norm": 1.4148629903793335, - "learning_rate": 6.630954773869346e-05, - "loss": 5.3173, - "step": 34034 - }, - { - "epoch": 17.7496740547588, - "grad_norm": 1.5661494731903076, - "learning_rate": 6.630854271356784e-05, - "loss": 4.8247, - "step": 34035 - }, - { - "epoch": 17.75019556714472, - "grad_norm": 1.5565531253814697, - "learning_rate": 6.63075376884422e-05, - "loss": 5.1137, - "step": 34036 - }, - { - "epoch": 17.750717079530638, - "grad_norm": 1.617518424987793, - "learning_rate": 6.630653266331658e-05, - "loss": 5.4182, - "step": 34037 - }, - { - "epoch": 17.751238591916557, - "grad_norm": 1.4515782594680786, - "learning_rate": 6.630552763819096e-05, - "loss": 5.5014, - "step": 34038 - }, - { - "epoch": 17.751760104302477, - "grad_norm": 1.442868709564209, - "learning_rate": 6.630452261306534e-05, - "loss": 5.4058, - "step": 34039 - }, - { - "epoch": 17.752281616688396, - "grad_norm": 1.501365303993225, - "learning_rate": 6.63035175879397e-05, - "loss": 4.9482, - "step": 34040 - }, - { - "epoch": 17.752803129074316, - "grad_norm": 1.458452820777893, - "learning_rate": 6.630251256281408e-05, - "loss": 4.9089, - "step": 34041 - }, - { - "epoch": 17.753324641460235, - "grad_norm": 1.446770191192627, - "learning_rate": 6.630150753768845e-05, - "loss": 5.3489, - "step": 34042 - }, - { - "epoch": 17.753846153846155, - "grad_norm": 1.604756474494934, - "learning_rate": 6.630050251256281e-05, - "loss": 5.0985, - "step": 34043 - }, - { - "epoch": 17.754367666232074, - "grad_norm": 1.5058866739273071, - "learning_rate": 6.629949748743719e-05, - "loss": 5.6879, - "step": 34044 - }, - { - "epoch": 17.754889178617994, - "grad_norm": 1.5655930042266846, - "learning_rate": 6.629849246231155e-05, - "loss": 5.3617, - "step": 34045 - }, - { - "epoch": 17.75541069100391, - "grad_norm": 1.4845702648162842, - "learning_rate": 6.629748743718593e-05, - "loss": 5.5881, - "step": 34046 - }, - { - "epoch": 17.75593220338983, - "grad_norm": 1.4271787405014038, - "learning_rate": 6.629648241206031e-05, - "loss": 5.604, - "step": 34047 - }, - { - "epoch": 17.75645371577575, - "grad_norm": 1.526403546333313, - "learning_rate": 6.629547738693469e-05, - "loss": 4.8927, - "step": 34048 - }, - { - "epoch": 17.756975228161668, - "grad_norm": 1.5337697267532349, - "learning_rate": 6.629447236180905e-05, - "loss": 5.3205, - "step": 34049 - }, - { - "epoch": 17.757496740547587, - "grad_norm": 1.4769103527069092, - "learning_rate": 6.629346733668343e-05, - "loss": 5.1893, - "step": 34050 - }, - { - "epoch": 17.758018252933507, - "grad_norm": 1.6541460752487183, - "learning_rate": 6.629246231155779e-05, - "loss": 5.0246, - "step": 34051 - }, - { - "epoch": 17.758539765319426, - "grad_norm": 1.5017657279968262, - "learning_rate": 6.629145728643217e-05, - "loss": 5.5085, - "step": 34052 - }, - { - "epoch": 17.759061277705346, - "grad_norm": 1.3816566467285156, - "learning_rate": 6.629045226130653e-05, - "loss": 5.4103, - "step": 34053 - }, - { - "epoch": 17.759582790091265, - "grad_norm": 1.464143991470337, - "learning_rate": 6.628944723618091e-05, - "loss": 5.8631, - "step": 34054 - }, - { - "epoch": 17.760104302477185, - "grad_norm": 1.5266371965408325, - "learning_rate": 6.628844221105528e-05, - "loss": 5.5927, - "step": 34055 - }, - { - "epoch": 17.760625814863104, - "grad_norm": 1.4708077907562256, - "learning_rate": 6.628743718592964e-05, - "loss": 5.679, - "step": 34056 - }, - { - "epoch": 17.761147327249024, - "grad_norm": 1.5320889949798584, - "learning_rate": 6.628643216080402e-05, - "loss": 5.6736, - "step": 34057 - }, - { - "epoch": 17.761668839634943, - "grad_norm": 1.392321228981018, - "learning_rate": 6.62854271356784e-05, - "loss": 5.4249, - "step": 34058 - }, - { - "epoch": 17.76219035202086, - "grad_norm": 1.4703823328018188, - "learning_rate": 6.628442211055277e-05, - "loss": 5.3536, - "step": 34059 - }, - { - "epoch": 17.76271186440678, - "grad_norm": 1.4343229532241821, - "learning_rate": 6.628341708542714e-05, - "loss": 5.6212, - "step": 34060 - }, - { - "epoch": 17.763233376792698, - "grad_norm": 1.447316288948059, - "learning_rate": 6.628241206030152e-05, - "loss": 5.6336, - "step": 34061 - }, - { - "epoch": 17.763754889178617, - "grad_norm": 1.4628902673721313, - "learning_rate": 6.628140703517588e-05, - "loss": 5.7239, - "step": 34062 - }, - { - "epoch": 17.764276401564537, - "grad_norm": 1.4735627174377441, - "learning_rate": 6.628040201005026e-05, - "loss": 5.317, - "step": 34063 - }, - { - "epoch": 17.764797913950456, - "grad_norm": 1.541876196861267, - "learning_rate": 6.627939698492462e-05, - "loss": 5.5583, - "step": 34064 - }, - { - "epoch": 17.765319426336376, - "grad_norm": 1.5245206356048584, - "learning_rate": 6.6278391959799e-05, - "loss": 5.3153, - "step": 34065 - }, - { - "epoch": 17.765840938722295, - "grad_norm": 1.5133216381072998, - "learning_rate": 6.627738693467336e-05, - "loss": 5.3205, - "step": 34066 - }, - { - "epoch": 17.766362451108215, - "grad_norm": 1.4117406606674194, - "learning_rate": 6.627638190954774e-05, - "loss": 5.1594, - "step": 34067 - }, - { - "epoch": 17.766883963494134, - "grad_norm": 1.6245986223220825, - "learning_rate": 6.627537688442212e-05, - "loss": 5.2965, - "step": 34068 - }, - { - "epoch": 17.767405475880054, - "grad_norm": 1.5102895498275757, - "learning_rate": 6.627437185929648e-05, - "loss": 4.8158, - "step": 34069 - }, - { - "epoch": 17.76792698826597, - "grad_norm": 1.4122233390808105, - "learning_rate": 6.627336683417086e-05, - "loss": 4.8252, - "step": 34070 - }, - { - "epoch": 17.76844850065189, - "grad_norm": 1.4739338159561157, - "learning_rate": 6.627236180904523e-05, - "loss": 5.4903, - "step": 34071 - }, - { - "epoch": 17.76897001303781, - "grad_norm": 1.4737794399261475, - "learning_rate": 6.62713567839196e-05, - "loss": 5.394, - "step": 34072 - }, - { - "epoch": 17.769491525423728, - "grad_norm": 1.4276959896087646, - "learning_rate": 6.627035175879397e-05, - "loss": 5.1141, - "step": 34073 - }, - { - "epoch": 17.770013037809647, - "grad_norm": 1.4630588293075562, - "learning_rate": 6.626934673366835e-05, - "loss": 5.6762, - "step": 34074 - }, - { - "epoch": 17.770534550195567, - "grad_norm": 1.5317208766937256, - "learning_rate": 6.626834170854271e-05, - "loss": 5.1598, - "step": 34075 - }, - { - "epoch": 17.771056062581486, - "grad_norm": 1.4011359214782715, - "learning_rate": 6.626733668341709e-05, - "loss": 5.5981, - "step": 34076 - }, - { - "epoch": 17.771577574967406, - "grad_norm": 1.4914757013320923, - "learning_rate": 6.626633165829145e-05, - "loss": 5.7157, - "step": 34077 - }, - { - "epoch": 17.772099087353325, - "grad_norm": 1.5700732469558716, - "learning_rate": 6.626532663316583e-05, - "loss": 5.2566, - "step": 34078 - }, - { - "epoch": 17.772620599739245, - "grad_norm": 1.659585952758789, - "learning_rate": 6.626432160804021e-05, - "loss": 5.3603, - "step": 34079 - }, - { - "epoch": 17.773142112125164, - "grad_norm": 1.417339563369751, - "learning_rate": 6.626331658291459e-05, - "loss": 5.6049, - "step": 34080 - }, - { - "epoch": 17.773663624511084, - "grad_norm": 1.5806554555892944, - "learning_rate": 6.626231155778895e-05, - "loss": 5.142, - "step": 34081 - }, - { - "epoch": 17.774185136897, - "grad_norm": 1.5389351844787598, - "learning_rate": 6.626130653266331e-05, - "loss": 5.3522, - "step": 34082 - }, - { - "epoch": 17.77470664928292, - "grad_norm": 1.5545969009399414, - "learning_rate": 6.626030150753769e-05, - "loss": 4.6723, - "step": 34083 - }, - { - "epoch": 17.77522816166884, - "grad_norm": 1.5485390424728394, - "learning_rate": 6.625929648241206e-05, - "loss": 5.6948, - "step": 34084 - }, - { - "epoch": 17.775749674054758, - "grad_norm": 1.5437757968902588, - "learning_rate": 6.625829145728643e-05, - "loss": 5.2709, - "step": 34085 - }, - { - "epoch": 17.776271186440677, - "grad_norm": 1.4965580701828003, - "learning_rate": 6.62572864321608e-05, - "loss": 5.1718, - "step": 34086 - }, - { - "epoch": 17.776792698826597, - "grad_norm": 1.5819000005722046, - "learning_rate": 6.625628140703518e-05, - "loss": 5.1059, - "step": 34087 - }, - { - "epoch": 17.777314211212516, - "grad_norm": 1.4199316501617432, - "learning_rate": 6.625527638190955e-05, - "loss": 5.185, - "step": 34088 - }, - { - "epoch": 17.777835723598436, - "grad_norm": 1.3812531232833862, - "learning_rate": 6.625427135678393e-05, - "loss": 5.1865, - "step": 34089 - }, - { - "epoch": 17.778357235984355, - "grad_norm": 1.4293076992034912, - "learning_rate": 6.62532663316583e-05, - "loss": 4.9047, - "step": 34090 - }, - { - "epoch": 17.778878748370275, - "grad_norm": 1.3740917444229126, - "learning_rate": 6.625226130653267e-05, - "loss": 5.6706, - "step": 34091 - }, - { - "epoch": 17.779400260756194, - "grad_norm": 1.6101309061050415, - "learning_rate": 6.625125628140704e-05, - "loss": 5.0422, - "step": 34092 - }, - { - "epoch": 17.779921773142114, - "grad_norm": 1.6216946840286255, - "learning_rate": 6.625025125628142e-05, - "loss": 5.3548, - "step": 34093 - }, - { - "epoch": 17.78044328552803, - "grad_norm": 1.4708176851272583, - "learning_rate": 6.624924623115578e-05, - "loss": 5.0864, - "step": 34094 - }, - { - "epoch": 17.78096479791395, - "grad_norm": 1.5334604978561401, - "learning_rate": 6.624824120603016e-05, - "loss": 5.1087, - "step": 34095 - }, - { - "epoch": 17.78148631029987, - "grad_norm": 1.5152472257614136, - "learning_rate": 6.624723618090452e-05, - "loss": 5.181, - "step": 34096 - }, - { - "epoch": 17.782007822685788, - "grad_norm": 1.4161919355392456, - "learning_rate": 6.624623115577889e-05, - "loss": 5.3471, - "step": 34097 - }, - { - "epoch": 17.782529335071708, - "grad_norm": 1.6311007738113403, - "learning_rate": 6.624522613065327e-05, - "loss": 5.4504, - "step": 34098 - }, - { - "epoch": 17.783050847457627, - "grad_norm": 1.6146471500396729, - "learning_rate": 6.624422110552764e-05, - "loss": 5.123, - "step": 34099 - }, - { - "epoch": 17.783572359843546, - "grad_norm": 1.5950535535812378, - "learning_rate": 6.624321608040202e-05, - "loss": 5.4654, - "step": 34100 - }, - { - "epoch": 17.784093872229466, - "grad_norm": 1.5153263807296753, - "learning_rate": 6.624221105527639e-05, - "loss": 5.2997, - "step": 34101 - }, - { - "epoch": 17.784615384615385, - "grad_norm": 1.6389403343200684, - "learning_rate": 6.624120603015076e-05, - "loss": 4.8207, - "step": 34102 - }, - { - "epoch": 17.785136897001305, - "grad_norm": 1.5448570251464844, - "learning_rate": 6.624020100502513e-05, - "loss": 5.0458, - "step": 34103 - }, - { - "epoch": 17.785658409387224, - "grad_norm": 1.466753363609314, - "learning_rate": 6.62391959798995e-05, - "loss": 5.4656, - "step": 34104 - }, - { - "epoch": 17.786179921773144, - "grad_norm": 1.4027987718582153, - "learning_rate": 6.623819095477387e-05, - "loss": 5.5565, - "step": 34105 - }, - { - "epoch": 17.78670143415906, - "grad_norm": 1.4886294603347778, - "learning_rate": 6.623718592964825e-05, - "loss": 5.4626, - "step": 34106 - }, - { - "epoch": 17.78722294654498, - "grad_norm": 1.5093833208084106, - "learning_rate": 6.623618090452261e-05, - "loss": 5.5134, - "step": 34107 - }, - { - "epoch": 17.7877444589309, - "grad_norm": 1.5809425115585327, - "learning_rate": 6.623517587939699e-05, - "loss": 5.1823, - "step": 34108 - }, - { - "epoch": 17.788265971316818, - "grad_norm": 1.548783779144287, - "learning_rate": 6.623417085427135e-05, - "loss": 5.0598, - "step": 34109 - }, - { - "epoch": 17.788787483702738, - "grad_norm": 1.4810954332351685, - "learning_rate": 6.623316582914573e-05, - "loss": 5.1004, - "step": 34110 - }, - { - "epoch": 17.789308996088657, - "grad_norm": 1.514634132385254, - "learning_rate": 6.623216080402011e-05, - "loss": 5.5315, - "step": 34111 - }, - { - "epoch": 17.789830508474576, - "grad_norm": 1.5924838781356812, - "learning_rate": 6.623115577889447e-05, - "loss": 4.5643, - "step": 34112 - }, - { - "epoch": 17.790352020860496, - "grad_norm": 1.5920065641403198, - "learning_rate": 6.623015075376885e-05, - "loss": 5.6889, - "step": 34113 - }, - { - "epoch": 17.790873533246415, - "grad_norm": 1.5513619184494019, - "learning_rate": 6.622914572864322e-05, - "loss": 5.036, - "step": 34114 - }, - { - "epoch": 17.791395045632335, - "grad_norm": 1.455264687538147, - "learning_rate": 6.62281407035176e-05, - "loss": 5.2154, - "step": 34115 - }, - { - "epoch": 17.791916558018254, - "grad_norm": 1.4811134338378906, - "learning_rate": 6.622713567839196e-05, - "loss": 5.4472, - "step": 34116 - }, - { - "epoch": 17.792438070404174, - "grad_norm": 1.5069597959518433, - "learning_rate": 6.622613065326634e-05, - "loss": 5.4174, - "step": 34117 - }, - { - "epoch": 17.79295958279009, - "grad_norm": 1.400193691253662, - "learning_rate": 6.62251256281407e-05, - "loss": 5.6491, - "step": 34118 - }, - { - "epoch": 17.79348109517601, - "grad_norm": 1.4338458776474, - "learning_rate": 6.622412060301508e-05, - "loss": 4.9351, - "step": 34119 - }, - { - "epoch": 17.79400260756193, - "grad_norm": 1.5411001443862915, - "learning_rate": 6.622311557788946e-05, - "loss": 5.0466, - "step": 34120 - }, - { - "epoch": 17.794524119947848, - "grad_norm": 1.4247519969940186, - "learning_rate": 6.622211055276383e-05, - "loss": 5.6181, - "step": 34121 - }, - { - "epoch": 17.795045632333768, - "grad_norm": 1.3847562074661255, - "learning_rate": 6.62211055276382e-05, - "loss": 5.4639, - "step": 34122 - }, - { - "epoch": 17.795567144719687, - "grad_norm": 1.5717153549194336, - "learning_rate": 6.622010050251256e-05, - "loss": 5.1208, - "step": 34123 - }, - { - "epoch": 17.796088657105607, - "grad_norm": 1.4580587148666382, - "learning_rate": 6.621909547738694e-05, - "loss": 5.6597, - "step": 34124 - }, - { - "epoch": 17.796610169491526, - "grad_norm": 1.4694101810455322, - "learning_rate": 6.62180904522613e-05, - "loss": 5.2462, - "step": 34125 - }, - { - "epoch": 17.797131681877445, - "grad_norm": 1.33254873752594, - "learning_rate": 6.621708542713568e-05, - "loss": 4.8385, - "step": 34126 - }, - { - "epoch": 17.797653194263365, - "grad_norm": 1.5079607963562012, - "learning_rate": 6.621608040201005e-05, - "loss": 4.7002, - "step": 34127 - }, - { - "epoch": 17.798174706649284, - "grad_norm": 1.5180739164352417, - "learning_rate": 6.621507537688442e-05, - "loss": 5.3188, - "step": 34128 - }, - { - "epoch": 17.7986962190352, - "grad_norm": 1.4293779134750366, - "learning_rate": 6.621407035175879e-05, - "loss": 5.2707, - "step": 34129 - }, - { - "epoch": 17.79921773142112, - "grad_norm": 1.4428153038024902, - "learning_rate": 6.621306532663317e-05, - "loss": 5.5586, - "step": 34130 - }, - { - "epoch": 17.79973924380704, - "grad_norm": 1.3518590927124023, - "learning_rate": 6.621206030150754e-05, - "loss": 5.3518, - "step": 34131 - }, - { - "epoch": 17.80026075619296, - "grad_norm": 1.4788901805877686, - "learning_rate": 6.621105527638192e-05, - "loss": 5.2263, - "step": 34132 - }, - { - "epoch": 17.800782268578878, - "grad_norm": 1.4910988807678223, - "learning_rate": 6.621005025125629e-05, - "loss": 5.3799, - "step": 34133 - }, - { - "epoch": 17.801303780964798, - "grad_norm": 1.4465559720993042, - "learning_rate": 6.620904522613066e-05, - "loss": 5.5595, - "step": 34134 - }, - { - "epoch": 17.801825293350717, - "grad_norm": 1.666839599609375, - "learning_rate": 6.620804020100503e-05, - "loss": 5.204, - "step": 34135 - }, - { - "epoch": 17.802346805736637, - "grad_norm": 1.605843424797058, - "learning_rate": 6.620703517587939e-05, - "loss": 5.4852, - "step": 34136 - }, - { - "epoch": 17.802868318122556, - "grad_norm": 1.4259769916534424, - "learning_rate": 6.620603015075377e-05, - "loss": 5.6396, - "step": 34137 - }, - { - "epoch": 17.803389830508475, - "grad_norm": 1.552046537399292, - "learning_rate": 6.620502512562813e-05, - "loss": 4.9099, - "step": 34138 - }, - { - "epoch": 17.803911342894395, - "grad_norm": 1.401660680770874, - "learning_rate": 6.620402010050251e-05, - "loss": 5.3042, - "step": 34139 - }, - { - "epoch": 17.804432855280314, - "grad_norm": 1.6784077882766724, - "learning_rate": 6.620301507537689e-05, - "loss": 4.7102, - "step": 34140 - }, - { - "epoch": 17.804954367666234, - "grad_norm": 1.4585713148117065, - "learning_rate": 6.620201005025127e-05, - "loss": 4.952, - "step": 34141 - }, - { - "epoch": 17.80547588005215, - "grad_norm": 1.512412428855896, - "learning_rate": 6.620100502512563e-05, - "loss": 5.1914, - "step": 34142 - }, - { - "epoch": 17.80599739243807, - "grad_norm": 1.5335512161254883, - "learning_rate": 6.620000000000001e-05, - "loss": 5.3396, - "step": 34143 - }, - { - "epoch": 17.80651890482399, - "grad_norm": 1.4567714929580688, - "learning_rate": 6.619899497487437e-05, - "loss": 5.6313, - "step": 34144 - }, - { - "epoch": 17.807040417209908, - "grad_norm": 1.5505163669586182, - "learning_rate": 6.619798994974875e-05, - "loss": 5.2584, - "step": 34145 - }, - { - "epoch": 17.807561929595828, - "grad_norm": 1.5707868337631226, - "learning_rate": 6.619698492462312e-05, - "loss": 5.1976, - "step": 34146 - }, - { - "epoch": 17.808083441981747, - "grad_norm": 1.5511524677276611, - "learning_rate": 6.61959798994975e-05, - "loss": 5.419, - "step": 34147 - }, - { - "epoch": 17.808604954367667, - "grad_norm": 1.5905958414077759, - "learning_rate": 6.619497487437186e-05, - "loss": 5.2128, - "step": 34148 - }, - { - "epoch": 17.809126466753586, - "grad_norm": 1.648732304573059, - "learning_rate": 6.619396984924622e-05, - "loss": 4.9393, - "step": 34149 - }, - { - "epoch": 17.809647979139505, - "grad_norm": 1.60069739818573, - "learning_rate": 6.61929648241206e-05, - "loss": 5.2973, - "step": 34150 - }, - { - "epoch": 17.810169491525425, - "grad_norm": 1.559180736541748, - "learning_rate": 6.619195979899498e-05, - "loss": 5.1804, - "step": 34151 - }, - { - "epoch": 17.810691003911344, - "grad_norm": 1.461225152015686, - "learning_rate": 6.619095477386936e-05, - "loss": 5.6263, - "step": 34152 - }, - { - "epoch": 17.81121251629726, - "grad_norm": 1.5481172800064087, - "learning_rate": 6.618994974874372e-05, - "loss": 5.2492, - "step": 34153 - }, - { - "epoch": 17.81173402868318, - "grad_norm": 1.4384114742279053, - "learning_rate": 6.61889447236181e-05, - "loss": 5.6567, - "step": 34154 - }, - { - "epoch": 17.8122555410691, - "grad_norm": 1.4931539297103882, - "learning_rate": 6.618793969849246e-05, - "loss": 5.2217, - "step": 34155 - }, - { - "epoch": 17.81277705345502, - "grad_norm": 1.3814979791641235, - "learning_rate": 6.618693467336684e-05, - "loss": 5.2595, - "step": 34156 - }, - { - "epoch": 17.813298565840938, - "grad_norm": 1.474827527999878, - "learning_rate": 6.61859296482412e-05, - "loss": 5.3994, - "step": 34157 - }, - { - "epoch": 17.813820078226858, - "grad_norm": 1.5105987787246704, - "learning_rate": 6.618492462311558e-05, - "loss": 5.4001, - "step": 34158 - }, - { - "epoch": 17.814341590612777, - "grad_norm": 1.426007628440857, - "learning_rate": 6.618391959798995e-05, - "loss": 5.6906, - "step": 34159 - }, - { - "epoch": 17.814863102998697, - "grad_norm": 1.5066930055618286, - "learning_rate": 6.618291457286432e-05, - "loss": 5.4432, - "step": 34160 - }, - { - "epoch": 17.815384615384616, - "grad_norm": 1.5136616230010986, - "learning_rate": 6.61819095477387e-05, - "loss": 5.5499, - "step": 34161 - }, - { - "epoch": 17.815906127770536, - "grad_norm": 1.5226930379867554, - "learning_rate": 6.618090452261307e-05, - "loss": 5.4659, - "step": 34162 - }, - { - "epoch": 17.816427640156455, - "grad_norm": 1.5295212268829346, - "learning_rate": 6.617989949748744e-05, - "loss": 5.2271, - "step": 34163 - }, - { - "epoch": 17.816949152542374, - "grad_norm": 1.5003018379211426, - "learning_rate": 6.617889447236181e-05, - "loss": 5.3546, - "step": 34164 - }, - { - "epoch": 17.81747066492829, - "grad_norm": 1.852987289428711, - "learning_rate": 6.617788944723619e-05, - "loss": 5.0304, - "step": 34165 - }, - { - "epoch": 17.81799217731421, - "grad_norm": 1.3983187675476074, - "learning_rate": 6.617688442211055e-05, - "loss": 5.7503, - "step": 34166 - }, - { - "epoch": 17.81851368970013, - "grad_norm": 1.5318094491958618, - "learning_rate": 6.617587939698493e-05, - "loss": 4.9793, - "step": 34167 - }, - { - "epoch": 17.81903520208605, - "grad_norm": 1.5291134119033813, - "learning_rate": 6.617487437185929e-05, - "loss": 4.8036, - "step": 34168 - }, - { - "epoch": 17.819556714471968, - "grad_norm": 1.5256781578063965, - "learning_rate": 6.617386934673367e-05, - "loss": 5.487, - "step": 34169 - }, - { - "epoch": 17.820078226857888, - "grad_norm": 1.4821258783340454, - "learning_rate": 6.617286432160804e-05, - "loss": 5.0929, - "step": 34170 - }, - { - "epoch": 17.820599739243807, - "grad_norm": 1.5533053874969482, - "learning_rate": 6.617185929648241e-05, - "loss": 5.1573, - "step": 34171 - }, - { - "epoch": 17.821121251629727, - "grad_norm": 1.5665349960327148, - "learning_rate": 6.617085427135679e-05, - "loss": 4.977, - "step": 34172 - }, - { - "epoch": 17.821642764015646, - "grad_norm": 1.4240509271621704, - "learning_rate": 6.616984924623117e-05, - "loss": 5.4387, - "step": 34173 - }, - { - "epoch": 17.822164276401566, - "grad_norm": 1.453436255455017, - "learning_rate": 6.616884422110553e-05, - "loss": 5.5724, - "step": 34174 - }, - { - "epoch": 17.822685788787485, - "grad_norm": 1.4486750364303589, - "learning_rate": 6.616783919597991e-05, - "loss": 5.2324, - "step": 34175 - }, - { - "epoch": 17.823207301173404, - "grad_norm": 1.4306252002716064, - "learning_rate": 6.616683417085427e-05, - "loss": 5.6746, - "step": 34176 - }, - { - "epoch": 17.82372881355932, - "grad_norm": 1.5455257892608643, - "learning_rate": 6.616582914572864e-05, - "loss": 5.6496, - "step": 34177 - }, - { - "epoch": 17.82425032594524, - "grad_norm": 1.4717196226119995, - "learning_rate": 6.616482412060302e-05, - "loss": 5.6606, - "step": 34178 - }, - { - "epoch": 17.82477183833116, - "grad_norm": 1.4590615034103394, - "learning_rate": 6.616381909547738e-05, - "loss": 5.5873, - "step": 34179 - }, - { - "epoch": 17.82529335071708, - "grad_norm": 1.4441953897476196, - "learning_rate": 6.616281407035176e-05, - "loss": 5.3262, - "step": 34180 - }, - { - "epoch": 17.825814863103, - "grad_norm": 1.5191038846969604, - "learning_rate": 6.616180904522614e-05, - "loss": 5.4373, - "step": 34181 - }, - { - "epoch": 17.826336375488918, - "grad_norm": 1.4156248569488525, - "learning_rate": 6.616080402010051e-05, - "loss": 5.7195, - "step": 34182 - }, - { - "epoch": 17.826857887874837, - "grad_norm": 1.5161134004592896, - "learning_rate": 6.615979899497488e-05, - "loss": 5.3318, - "step": 34183 - }, - { - "epoch": 17.827379400260757, - "grad_norm": 1.5400136709213257, - "learning_rate": 6.615879396984926e-05, - "loss": 5.2867, - "step": 34184 - }, - { - "epoch": 17.827900912646676, - "grad_norm": 1.351003885269165, - "learning_rate": 6.615778894472362e-05, - "loss": 5.3927, - "step": 34185 - }, - { - "epoch": 17.828422425032596, - "grad_norm": 1.5704249143600464, - "learning_rate": 6.6156783919598e-05, - "loss": 4.9257, - "step": 34186 - }, - { - "epoch": 17.828943937418515, - "grad_norm": 1.4762811660766602, - "learning_rate": 6.615577889447236e-05, - "loss": 5.3818, - "step": 34187 - }, - { - "epoch": 17.829465449804434, - "grad_norm": 1.4651257991790771, - "learning_rate": 6.615477386934674e-05, - "loss": 5.2169, - "step": 34188 - }, - { - "epoch": 17.82998696219035, - "grad_norm": 1.584814429283142, - "learning_rate": 6.61537688442211e-05, - "loss": 5.172, - "step": 34189 - }, - { - "epoch": 17.83050847457627, - "grad_norm": 1.4631388187408447, - "learning_rate": 6.615276381909547e-05, - "loss": 5.4748, - "step": 34190 - }, - { - "epoch": 17.83102998696219, - "grad_norm": 1.5782526731491089, - "learning_rate": 6.615175879396985e-05, - "loss": 5.1783, - "step": 34191 - }, - { - "epoch": 17.83155149934811, - "grad_norm": 1.426953911781311, - "learning_rate": 6.615075376884423e-05, - "loss": 5.2351, - "step": 34192 - }, - { - "epoch": 17.83207301173403, - "grad_norm": 1.4784955978393555, - "learning_rate": 6.61497487437186e-05, - "loss": 5.3305, - "step": 34193 - }, - { - "epoch": 17.832594524119948, - "grad_norm": 1.5964387655258179, - "learning_rate": 6.614874371859297e-05, - "loss": 4.9802, - "step": 34194 - }, - { - "epoch": 17.833116036505867, - "grad_norm": 1.5952011346817017, - "learning_rate": 6.614773869346735e-05, - "loss": 4.6885, - "step": 34195 - }, - { - "epoch": 17.833637548891787, - "grad_norm": 1.5009689331054688, - "learning_rate": 6.614673366834171e-05, - "loss": 4.9583, - "step": 34196 - }, - { - "epoch": 17.834159061277706, - "grad_norm": 1.442628264427185, - "learning_rate": 6.614572864321609e-05, - "loss": 5.3865, - "step": 34197 - }, - { - "epoch": 17.834680573663626, - "grad_norm": 1.62046480178833, - "learning_rate": 6.614472361809045e-05, - "loss": 4.909, - "step": 34198 - }, - { - "epoch": 17.835202086049545, - "grad_norm": 1.5355955362319946, - "learning_rate": 6.614371859296483e-05, - "loss": 5.2237, - "step": 34199 - }, - { - "epoch": 17.835723598435465, - "grad_norm": 1.5196722745895386, - "learning_rate": 6.61427135678392e-05, - "loss": 5.3342, - "step": 34200 - }, - { - "epoch": 17.83624511082138, - "grad_norm": 1.4143521785736084, - "learning_rate": 6.614170854271357e-05, - "loss": 5.627, - "step": 34201 - }, - { - "epoch": 17.8367666232073, - "grad_norm": 1.4236629009246826, - "learning_rate": 6.614070351758795e-05, - "loss": 5.5351, - "step": 34202 - }, - { - "epoch": 17.83728813559322, - "grad_norm": 1.4348660707473755, - "learning_rate": 6.613969849246231e-05, - "loss": 5.4767, - "step": 34203 - }, - { - "epoch": 17.83780964797914, - "grad_norm": 1.4315319061279297, - "learning_rate": 6.613869346733669e-05, - "loss": 5.6239, - "step": 34204 - }, - { - "epoch": 17.83833116036506, - "grad_norm": 1.440545916557312, - "learning_rate": 6.613768844221106e-05, - "loss": 5.3444, - "step": 34205 - }, - { - "epoch": 17.838852672750978, - "grad_norm": 1.455419898033142, - "learning_rate": 6.613668341708543e-05, - "loss": 5.4415, - "step": 34206 - }, - { - "epoch": 17.839374185136897, - "grad_norm": 1.8297001123428345, - "learning_rate": 6.61356783919598e-05, - "loss": 5.5604, - "step": 34207 - }, - { - "epoch": 17.839895697522817, - "grad_norm": 1.4895025491714478, - "learning_rate": 6.613467336683418e-05, - "loss": 5.5389, - "step": 34208 - }, - { - "epoch": 17.840417209908736, - "grad_norm": 1.5638506412506104, - "learning_rate": 6.613366834170854e-05, - "loss": 5.2634, - "step": 34209 - }, - { - "epoch": 17.840938722294656, - "grad_norm": 1.4221298694610596, - "learning_rate": 6.613266331658292e-05, - "loss": 5.2549, - "step": 34210 - }, - { - "epoch": 17.841460234680575, - "grad_norm": 1.448691725730896, - "learning_rate": 6.613165829145728e-05, - "loss": 5.6286, - "step": 34211 - }, - { - "epoch": 17.841981747066495, - "grad_norm": 1.5602831840515137, - "learning_rate": 6.613065326633166e-05, - "loss": 5.2697, - "step": 34212 - }, - { - "epoch": 17.84250325945241, - "grad_norm": 1.3921191692352295, - "learning_rate": 6.612964824120604e-05, - "loss": 5.5511, - "step": 34213 - }, - { - "epoch": 17.84302477183833, - "grad_norm": 1.4501956701278687, - "learning_rate": 6.612864321608042e-05, - "loss": 5.3092, - "step": 34214 - }, - { - "epoch": 17.84354628422425, - "grad_norm": 1.4256457090377808, - "learning_rate": 6.612763819095478e-05, - "loss": 5.248, - "step": 34215 - }, - { - "epoch": 17.84406779661017, - "grad_norm": 1.6580601930618286, - "learning_rate": 6.612663316582914e-05, - "loss": 5.0267, - "step": 34216 - }, - { - "epoch": 17.84458930899609, - "grad_norm": 1.71620774269104, - "learning_rate": 6.612562814070352e-05, - "loss": 5.0205, - "step": 34217 - }, - { - "epoch": 17.845110821382008, - "grad_norm": 1.4386459589004517, - "learning_rate": 6.612462311557789e-05, - "loss": 5.3301, - "step": 34218 - }, - { - "epoch": 17.845632333767927, - "grad_norm": 1.405339002609253, - "learning_rate": 6.612361809045226e-05, - "loss": 5.361, - "step": 34219 - }, - { - "epoch": 17.846153846153847, - "grad_norm": 1.5633009672164917, - "learning_rate": 6.612261306532663e-05, - "loss": 5.1953, - "step": 34220 - }, - { - "epoch": 17.846675358539766, - "grad_norm": 1.4191161394119263, - "learning_rate": 6.6121608040201e-05, - "loss": 5.4512, - "step": 34221 - }, - { - "epoch": 17.847196870925686, - "grad_norm": 1.7495454549789429, - "learning_rate": 6.612060301507538e-05, - "loss": 5.464, - "step": 34222 - }, - { - "epoch": 17.847718383311605, - "grad_norm": 1.5335725545883179, - "learning_rate": 6.611959798994976e-05, - "loss": 5.1904, - "step": 34223 - }, - { - "epoch": 17.848239895697525, - "grad_norm": 1.614998459815979, - "learning_rate": 6.611859296482413e-05, - "loss": 5.2055, - "step": 34224 - }, - { - "epoch": 17.84876140808344, - "grad_norm": 1.5965970754623413, - "learning_rate": 6.61175879396985e-05, - "loss": 5.152, - "step": 34225 - }, - { - "epoch": 17.84928292046936, - "grad_norm": 1.5307005643844604, - "learning_rate": 6.611658291457287e-05, - "loss": 5.3936, - "step": 34226 - }, - { - "epoch": 17.84980443285528, - "grad_norm": 1.5912830829620361, - "learning_rate": 6.611557788944725e-05, - "loss": 5.1247, - "step": 34227 - }, - { - "epoch": 17.8503259452412, - "grad_norm": 1.4778438806533813, - "learning_rate": 6.611457286432161e-05, - "loss": 5.5917, - "step": 34228 - }, - { - "epoch": 17.85084745762712, - "grad_norm": 1.4843554496765137, - "learning_rate": 6.611356783919597e-05, - "loss": 5.2679, - "step": 34229 - }, - { - "epoch": 17.851368970013038, - "grad_norm": 1.5290446281433105, - "learning_rate": 6.611256281407035e-05, - "loss": 5.0466, - "step": 34230 - }, - { - "epoch": 17.851890482398957, - "grad_norm": 1.6280843019485474, - "learning_rate": 6.611155778894472e-05, - "loss": 5.3551, - "step": 34231 - }, - { - "epoch": 17.852411994784877, - "grad_norm": 1.5427229404449463, - "learning_rate": 6.61105527638191e-05, - "loss": 5.2826, - "step": 34232 - }, - { - "epoch": 17.852933507170796, - "grad_norm": 1.3898035287857056, - "learning_rate": 6.610954773869347e-05, - "loss": 5.5814, - "step": 34233 - }, - { - "epoch": 17.853455019556716, - "grad_norm": 1.409363031387329, - "learning_rate": 6.610854271356785e-05, - "loss": 5.5992, - "step": 34234 - }, - { - "epoch": 17.853976531942635, - "grad_norm": 1.4842053651809692, - "learning_rate": 6.610753768844221e-05, - "loss": 5.6128, - "step": 34235 - }, - { - "epoch": 17.85449804432855, - "grad_norm": 1.5171129703521729, - "learning_rate": 6.610653266331659e-05, - "loss": 5.0907, - "step": 34236 - }, - { - "epoch": 17.85501955671447, - "grad_norm": 1.5887953042984009, - "learning_rate": 6.610552763819096e-05, - "loss": 5.2574, - "step": 34237 - }, - { - "epoch": 17.85554106910039, - "grad_norm": 1.5280817747116089, - "learning_rate": 6.610452261306533e-05, - "loss": 5.4802, - "step": 34238 - }, - { - "epoch": 17.85606258148631, - "grad_norm": 1.3522779941558838, - "learning_rate": 6.61035175879397e-05, - "loss": 5.8811, - "step": 34239 - }, - { - "epoch": 17.85658409387223, - "grad_norm": 1.4000474214553833, - "learning_rate": 6.610251256281408e-05, - "loss": 5.6149, - "step": 34240 - }, - { - "epoch": 17.85710560625815, - "grad_norm": 1.5271005630493164, - "learning_rate": 6.610150753768844e-05, - "loss": 5.2299, - "step": 34241 - }, - { - "epoch": 17.857627118644068, - "grad_norm": 1.5306812524795532, - "learning_rate": 6.610050251256282e-05, - "loss": 5.1524, - "step": 34242 - }, - { - "epoch": 17.858148631029987, - "grad_norm": 1.5865569114685059, - "learning_rate": 6.60994974874372e-05, - "loss": 4.8119, - "step": 34243 - }, - { - "epoch": 17.858670143415907, - "grad_norm": 1.5330123901367188, - "learning_rate": 6.609849246231156e-05, - "loss": 5.6831, - "step": 34244 - }, - { - "epoch": 17.859191655801826, - "grad_norm": 1.5897490978240967, - "learning_rate": 6.609748743718594e-05, - "loss": 4.8735, - "step": 34245 - }, - { - "epoch": 17.859713168187746, - "grad_norm": 1.3767491579055786, - "learning_rate": 6.60964824120603e-05, - "loss": 5.5944, - "step": 34246 - }, - { - "epoch": 17.860234680573665, - "grad_norm": 1.501340627670288, - "learning_rate": 6.609547738693468e-05, - "loss": 5.7418, - "step": 34247 - }, - { - "epoch": 17.860756192959585, - "grad_norm": 1.5212697982788086, - "learning_rate": 6.609447236180904e-05, - "loss": 5.1356, - "step": 34248 - }, - { - "epoch": 17.8612777053455, - "grad_norm": 1.5443267822265625, - "learning_rate": 6.609346733668342e-05, - "loss": 5.4035, - "step": 34249 - }, - { - "epoch": 17.86179921773142, - "grad_norm": 1.4423816204071045, - "learning_rate": 6.609246231155779e-05, - "loss": 5.6803, - "step": 34250 - }, - { - "epoch": 17.86232073011734, - "grad_norm": 1.5504013299942017, - "learning_rate": 6.609145728643216e-05, - "loss": 5.4724, - "step": 34251 - }, - { - "epoch": 17.86284224250326, - "grad_norm": 1.4825057983398438, - "learning_rate": 6.609045226130653e-05, - "loss": 5.4633, - "step": 34252 - }, - { - "epoch": 17.86336375488918, - "grad_norm": 1.5905287265777588, - "learning_rate": 6.608944723618091e-05, - "loss": 4.8042, - "step": 34253 - }, - { - "epoch": 17.863885267275098, - "grad_norm": 1.644788146018982, - "learning_rate": 6.608844221105528e-05, - "loss": 5.2166, - "step": 34254 - }, - { - "epoch": 17.864406779661017, - "grad_norm": 1.5036271810531616, - "learning_rate": 6.608743718592965e-05, - "loss": 4.7817, - "step": 34255 - }, - { - "epoch": 17.864928292046937, - "grad_norm": 1.5006879568099976, - "learning_rate": 6.608643216080403e-05, - "loss": 5.3809, - "step": 34256 - }, - { - "epoch": 17.865449804432856, - "grad_norm": 1.6307622194290161, - "learning_rate": 6.608542713567839e-05, - "loss": 4.8047, - "step": 34257 - }, - { - "epoch": 17.865971316818776, - "grad_norm": 1.7038416862487793, - "learning_rate": 6.608442211055277e-05, - "loss": 4.8781, - "step": 34258 - }, - { - "epoch": 17.866492829204695, - "grad_norm": 1.5110116004943848, - "learning_rate": 6.608341708542713e-05, - "loss": 5.3178, - "step": 34259 - }, - { - "epoch": 17.86701434159061, - "grad_norm": 1.5513111352920532, - "learning_rate": 6.608241206030151e-05, - "loss": 5.2132, - "step": 34260 - }, - { - "epoch": 17.86753585397653, - "grad_norm": 1.484167218208313, - "learning_rate": 6.608140703517588e-05, - "loss": 5.394, - "step": 34261 - }, - { - "epoch": 17.86805736636245, - "grad_norm": 1.4068678617477417, - "learning_rate": 6.608040201005025e-05, - "loss": 5.244, - "step": 34262 - }, - { - "epoch": 17.86857887874837, - "grad_norm": 1.4469465017318726, - "learning_rate": 6.607939698492462e-05, - "loss": 5.0151, - "step": 34263 - }, - { - "epoch": 17.86910039113429, - "grad_norm": 1.479839563369751, - "learning_rate": 6.6078391959799e-05, - "loss": 5.507, - "step": 34264 - }, - { - "epoch": 17.86962190352021, - "grad_norm": 1.5507227182388306, - "learning_rate": 6.607738693467337e-05, - "loss": 5.1836, - "step": 34265 - }, - { - "epoch": 17.870143415906128, - "grad_norm": 1.3847005367279053, - "learning_rate": 6.607638190954775e-05, - "loss": 5.6938, - "step": 34266 - }, - { - "epoch": 17.870664928292047, - "grad_norm": 1.4785410165786743, - "learning_rate": 6.607537688442212e-05, - "loss": 5.4739, - "step": 34267 - }, - { - "epoch": 17.871186440677967, - "grad_norm": 1.5126827955245972, - "learning_rate": 6.607437185929649e-05, - "loss": 5.401, - "step": 34268 - }, - { - "epoch": 17.871707953063886, - "grad_norm": 1.4535393714904785, - "learning_rate": 6.607336683417086e-05, - "loss": 5.3828, - "step": 34269 - }, - { - "epoch": 17.872229465449806, - "grad_norm": 1.57003915309906, - "learning_rate": 6.607236180904522e-05, - "loss": 4.995, - "step": 34270 - }, - { - "epoch": 17.872750977835725, - "grad_norm": 1.4679940938949585, - "learning_rate": 6.60713567839196e-05, - "loss": 5.1758, - "step": 34271 - }, - { - "epoch": 17.87327249022164, - "grad_norm": 1.6485371589660645, - "learning_rate": 6.607035175879396e-05, - "loss": 5.1855, - "step": 34272 - }, - { - "epoch": 17.87379400260756, - "grad_norm": 1.5646096467971802, - "learning_rate": 6.606934673366834e-05, - "loss": 4.9806, - "step": 34273 - }, - { - "epoch": 17.87431551499348, - "grad_norm": 1.646519660949707, - "learning_rate": 6.606834170854272e-05, - "loss": 5.5725, - "step": 34274 - }, - { - "epoch": 17.8748370273794, - "grad_norm": 1.467394232749939, - "learning_rate": 6.60673366834171e-05, - "loss": 5.2025, - "step": 34275 - }, - { - "epoch": 17.87535853976532, - "grad_norm": 1.5502254962921143, - "learning_rate": 6.606633165829146e-05, - "loss": 5.1567, - "step": 34276 - }, - { - "epoch": 17.87588005215124, - "grad_norm": 1.545447587966919, - "learning_rate": 6.606532663316584e-05, - "loss": 5.2575, - "step": 34277 - }, - { - "epoch": 17.876401564537158, - "grad_norm": 1.4885976314544678, - "learning_rate": 6.60643216080402e-05, - "loss": 5.2117, - "step": 34278 - }, - { - "epoch": 17.876923076923077, - "grad_norm": 1.3436576128005981, - "learning_rate": 6.606331658291458e-05, - "loss": 5.69, - "step": 34279 - }, - { - "epoch": 17.877444589308997, - "grad_norm": 1.5994162559509277, - "learning_rate": 6.606231155778895e-05, - "loss": 5.28, - "step": 34280 - }, - { - "epoch": 17.877966101694916, - "grad_norm": 1.5656797885894775, - "learning_rate": 6.606130653266332e-05, - "loss": 5.2027, - "step": 34281 - }, - { - "epoch": 17.878487614080836, - "grad_norm": 1.5898449420928955, - "learning_rate": 6.606030150753769e-05, - "loss": 4.9238, - "step": 34282 - }, - { - "epoch": 17.879009126466755, - "grad_norm": 1.3857134580612183, - "learning_rate": 6.605929648241205e-05, - "loss": 5.5318, - "step": 34283 - }, - { - "epoch": 17.87953063885267, - "grad_norm": 1.4773529767990112, - "learning_rate": 6.605829145728643e-05, - "loss": 4.7084, - "step": 34284 - }, - { - "epoch": 17.88005215123859, - "grad_norm": 1.4176915884017944, - "learning_rate": 6.605728643216081e-05, - "loss": 4.9889, - "step": 34285 - }, - { - "epoch": 17.88057366362451, - "grad_norm": 1.5539906024932861, - "learning_rate": 6.605628140703519e-05, - "loss": 5.4707, - "step": 34286 - }, - { - "epoch": 17.88109517601043, - "grad_norm": 1.457578182220459, - "learning_rate": 6.605527638190955e-05, - "loss": 5.1965, - "step": 34287 - }, - { - "epoch": 17.88161668839635, - "grad_norm": 1.4837476015090942, - "learning_rate": 6.605427135678393e-05, - "loss": 5.6014, - "step": 34288 - }, - { - "epoch": 17.88213820078227, - "grad_norm": 1.4923933744430542, - "learning_rate": 6.605326633165829e-05, - "loss": 5.6288, - "step": 34289 - }, - { - "epoch": 17.882659713168188, - "grad_norm": 1.4792006015777588, - "learning_rate": 6.605226130653267e-05, - "loss": 5.2508, - "step": 34290 - }, - { - "epoch": 17.883181225554107, - "grad_norm": 1.4770957231521606, - "learning_rate": 6.605125628140703e-05, - "loss": 4.68, - "step": 34291 - }, - { - "epoch": 17.883702737940027, - "grad_norm": 1.4279297590255737, - "learning_rate": 6.605025125628141e-05, - "loss": 5.4131, - "step": 34292 - }, - { - "epoch": 17.884224250325946, - "grad_norm": 1.4511011838912964, - "learning_rate": 6.604924623115578e-05, - "loss": 5.6574, - "step": 34293 - }, - { - "epoch": 17.884745762711866, - "grad_norm": 1.6181857585906982, - "learning_rate": 6.604824120603015e-05, - "loss": 5.2491, - "step": 34294 - }, - { - "epoch": 17.885267275097785, - "grad_norm": 1.5415500402450562, - "learning_rate": 6.604723618090453e-05, - "loss": 5.0471, - "step": 34295 - }, - { - "epoch": 17.8857887874837, - "grad_norm": 1.475870132446289, - "learning_rate": 6.60462311557789e-05, - "loss": 5.2694, - "step": 34296 - }, - { - "epoch": 17.88631029986962, - "grad_norm": 1.546311616897583, - "learning_rate": 6.604522613065327e-05, - "loss": 5.3764, - "step": 34297 - }, - { - "epoch": 17.88683181225554, - "grad_norm": 1.5309628248214722, - "learning_rate": 6.604422110552764e-05, - "loss": 5.1227, - "step": 34298 - }, - { - "epoch": 17.88735332464146, - "grad_norm": 1.5206081867218018, - "learning_rate": 6.604321608040202e-05, - "loss": 5.5534, - "step": 34299 - }, - { - "epoch": 17.88787483702738, - "grad_norm": 1.305435299873352, - "learning_rate": 6.604221105527638e-05, - "loss": 4.8824, - "step": 34300 - }, - { - "epoch": 17.8883963494133, - "grad_norm": 1.4808733463287354, - "learning_rate": 6.604120603015076e-05, - "loss": 5.3793, - "step": 34301 - }, - { - "epoch": 17.888917861799218, - "grad_norm": 1.5884743928909302, - "learning_rate": 6.604020100502512e-05, - "loss": 4.5152, - "step": 34302 - }, - { - "epoch": 17.889439374185137, - "grad_norm": 1.5793156623840332, - "learning_rate": 6.60391959798995e-05, - "loss": 5.1627, - "step": 34303 - }, - { - "epoch": 17.889960886571057, - "grad_norm": 1.4587676525115967, - "learning_rate": 6.603819095477386e-05, - "loss": 5.1129, - "step": 34304 - }, - { - "epoch": 17.890482398956976, - "grad_norm": 1.6242718696594238, - "learning_rate": 6.603718592964824e-05, - "loss": 5.2755, - "step": 34305 - }, - { - "epoch": 17.891003911342896, - "grad_norm": 1.5037269592285156, - "learning_rate": 6.603618090452262e-05, - "loss": 5.4531, - "step": 34306 - }, - { - "epoch": 17.891525423728815, - "grad_norm": 1.7668569087982178, - "learning_rate": 6.6035175879397e-05, - "loss": 4.8486, - "step": 34307 - }, - { - "epoch": 17.89204693611473, - "grad_norm": 1.4393941164016724, - "learning_rate": 6.603417085427136e-05, - "loss": 4.9392, - "step": 34308 - }, - { - "epoch": 17.89256844850065, - "grad_norm": 1.5389803647994995, - "learning_rate": 6.603316582914573e-05, - "loss": 5.3733, - "step": 34309 - }, - { - "epoch": 17.89308996088657, - "grad_norm": 1.504636287689209, - "learning_rate": 6.60321608040201e-05, - "loss": 5.3089, - "step": 34310 - }, - { - "epoch": 17.89361147327249, - "grad_norm": 1.4715403318405151, - "learning_rate": 6.603115577889447e-05, - "loss": 5.288, - "step": 34311 - }, - { - "epoch": 17.89413298565841, - "grad_norm": 1.571930170059204, - "learning_rate": 6.603015075376885e-05, - "loss": 5.2378, - "step": 34312 - }, - { - "epoch": 17.89465449804433, - "grad_norm": 1.5191696882247925, - "learning_rate": 6.602914572864321e-05, - "loss": 5.3735, - "step": 34313 - }, - { - "epoch": 17.895176010430248, - "grad_norm": 1.438056230545044, - "learning_rate": 6.602814070351759e-05, - "loss": 5.5818, - "step": 34314 - }, - { - "epoch": 17.895697522816167, - "grad_norm": 1.4287487268447876, - "learning_rate": 6.602713567839197e-05, - "loss": 4.8871, - "step": 34315 - }, - { - "epoch": 17.896219035202087, - "grad_norm": 1.6304067373275757, - "learning_rate": 6.602613065326634e-05, - "loss": 4.9426, - "step": 34316 - }, - { - "epoch": 17.896740547588006, - "grad_norm": 1.4841417074203491, - "learning_rate": 6.602512562814071e-05, - "loss": 5.62, - "step": 34317 - }, - { - "epoch": 17.897262059973926, - "grad_norm": 1.5853943824768066, - "learning_rate": 6.602412060301509e-05, - "loss": 4.9016, - "step": 34318 - }, - { - "epoch": 17.89778357235984, - "grad_norm": 1.5410048961639404, - "learning_rate": 6.602311557788945e-05, - "loss": 5.7982, - "step": 34319 - }, - { - "epoch": 17.89830508474576, - "grad_norm": 1.4535164833068848, - "learning_rate": 6.602211055276383e-05, - "loss": 5.204, - "step": 34320 - }, - { - "epoch": 17.89882659713168, - "grad_norm": 1.477927803993225, - "learning_rate": 6.602110552763819e-05, - "loss": 4.9445, - "step": 34321 - }, - { - "epoch": 17.8993481095176, - "grad_norm": 1.4723994731903076, - "learning_rate": 6.602010050251256e-05, - "loss": 5.2182, - "step": 34322 - }, - { - "epoch": 17.89986962190352, - "grad_norm": 1.5383005142211914, - "learning_rate": 6.601909547738693e-05, - "loss": 5.1965, - "step": 34323 - }, - { - "epoch": 17.90039113428944, - "grad_norm": 1.4806584119796753, - "learning_rate": 6.60180904522613e-05, - "loss": 5.5848, - "step": 34324 - }, - { - "epoch": 17.90091264667536, - "grad_norm": 1.5270910263061523, - "learning_rate": 6.601708542713568e-05, - "loss": 4.8923, - "step": 34325 - }, - { - "epoch": 17.901434159061278, - "grad_norm": 1.4437390565872192, - "learning_rate": 6.601608040201005e-05, - "loss": 5.5137, - "step": 34326 - }, - { - "epoch": 17.901955671447197, - "grad_norm": 1.3967571258544922, - "learning_rate": 6.601507537688443e-05, - "loss": 5.7095, - "step": 34327 - }, - { - "epoch": 17.902477183833117, - "grad_norm": 1.466966986656189, - "learning_rate": 6.60140703517588e-05, - "loss": 5.2091, - "step": 34328 - }, - { - "epoch": 17.902998696219036, - "grad_norm": 1.5472118854522705, - "learning_rate": 6.601306532663317e-05, - "loss": 5.5125, - "step": 34329 - }, - { - "epoch": 17.903520208604956, - "grad_norm": 1.4890903234481812, - "learning_rate": 6.601206030150754e-05, - "loss": 5.1248, - "step": 34330 - }, - { - "epoch": 17.904041720990875, - "grad_norm": 1.4807450771331787, - "learning_rate": 6.601105527638192e-05, - "loss": 5.6028, - "step": 34331 - }, - { - "epoch": 17.90456323337679, - "grad_norm": 1.4130516052246094, - "learning_rate": 6.601005025125628e-05, - "loss": 4.68, - "step": 34332 - }, - { - "epoch": 17.90508474576271, - "grad_norm": 1.6209840774536133, - "learning_rate": 6.600904522613066e-05, - "loss": 4.883, - "step": 34333 - }, - { - "epoch": 17.90560625814863, - "grad_norm": 1.5422109365463257, - "learning_rate": 6.600804020100502e-05, - "loss": 5.2461, - "step": 34334 - }, - { - "epoch": 17.90612777053455, - "grad_norm": 1.5997483730316162, - "learning_rate": 6.60070351758794e-05, - "loss": 5.5061, - "step": 34335 - }, - { - "epoch": 17.90664928292047, - "grad_norm": 1.549616813659668, - "learning_rate": 6.600603015075378e-05, - "loss": 5.1344, - "step": 34336 - }, - { - "epoch": 17.90717079530639, - "grad_norm": 1.4406410455703735, - "learning_rate": 6.600502512562814e-05, - "loss": 4.6982, - "step": 34337 - }, - { - "epoch": 17.907692307692308, - "grad_norm": 1.4506278038024902, - "learning_rate": 6.600402010050252e-05, - "loss": 5.6724, - "step": 34338 - }, - { - "epoch": 17.908213820078227, - "grad_norm": 1.4972416162490845, - "learning_rate": 6.600301507537689e-05, - "loss": 5.3252, - "step": 34339 - }, - { - "epoch": 17.908735332464147, - "grad_norm": 1.525876522064209, - "learning_rate": 6.600201005025126e-05, - "loss": 5.3308, - "step": 34340 - }, - { - "epoch": 17.909256844850066, - "grad_norm": 1.4741392135620117, - "learning_rate": 6.600100502512563e-05, - "loss": 5.336, - "step": 34341 - }, - { - "epoch": 17.909778357235986, - "grad_norm": 1.383142113685608, - "learning_rate": 6.6e-05, - "loss": 5.6342, - "step": 34342 - }, - { - "epoch": 17.910299869621902, - "grad_norm": 1.5795763731002808, - "learning_rate": 6.599899497487437e-05, - "loss": 5.3659, - "step": 34343 - }, - { - "epoch": 17.91082138200782, - "grad_norm": 1.6061278581619263, - "learning_rate": 6.599798994974875e-05, - "loss": 4.8205, - "step": 34344 - }, - { - "epoch": 17.91134289439374, - "grad_norm": 1.5197378396987915, - "learning_rate": 6.599698492462311e-05, - "loss": 5.072, - "step": 34345 - }, - { - "epoch": 17.91186440677966, - "grad_norm": 1.4489303827285767, - "learning_rate": 6.599597989949749e-05, - "loss": 5.2671, - "step": 34346 - }, - { - "epoch": 17.91238591916558, - "grad_norm": 1.5640021562576294, - "learning_rate": 6.599497487437187e-05, - "loss": 5.0491, - "step": 34347 - }, - { - "epoch": 17.9129074315515, - "grad_norm": 1.4317678213119507, - "learning_rate": 6.599396984924623e-05, - "loss": 5.4981, - "step": 34348 - }, - { - "epoch": 17.91342894393742, - "grad_norm": 1.3751047849655151, - "learning_rate": 6.599296482412061e-05, - "loss": 5.2875, - "step": 34349 - }, - { - "epoch": 17.913950456323338, - "grad_norm": 1.4384973049163818, - "learning_rate": 6.599195979899497e-05, - "loss": 5.344, - "step": 34350 - }, - { - "epoch": 17.914471968709258, - "grad_norm": 1.5145318508148193, - "learning_rate": 6.599095477386935e-05, - "loss": 5.1262, - "step": 34351 - }, - { - "epoch": 17.914993481095177, - "grad_norm": 1.4930682182312012, - "learning_rate": 6.598994974874372e-05, - "loss": 5.2098, - "step": 34352 - }, - { - "epoch": 17.915514993481096, - "grad_norm": 1.473304033279419, - "learning_rate": 6.59889447236181e-05, - "loss": 5.4621, - "step": 34353 - }, - { - "epoch": 17.916036505867016, - "grad_norm": 1.450059175491333, - "learning_rate": 6.598793969849246e-05, - "loss": 5.723, - "step": 34354 - }, - { - "epoch": 17.916558018252932, - "grad_norm": 1.5123645067214966, - "learning_rate": 6.598693467336684e-05, - "loss": 5.2225, - "step": 34355 - }, - { - "epoch": 17.91707953063885, - "grad_norm": 1.4717576503753662, - "learning_rate": 6.598592964824121e-05, - "loss": 5.4957, - "step": 34356 - }, - { - "epoch": 17.91760104302477, - "grad_norm": 1.5175846815109253, - "learning_rate": 6.598492462311559e-05, - "loss": 4.6423, - "step": 34357 - }, - { - "epoch": 17.91812255541069, - "grad_norm": 1.5194475650787354, - "learning_rate": 6.598391959798996e-05, - "loss": 5.451, - "step": 34358 - }, - { - "epoch": 17.91864406779661, - "grad_norm": 1.444828987121582, - "learning_rate": 6.598291457286433e-05, - "loss": 5.2631, - "step": 34359 - }, - { - "epoch": 17.91916558018253, - "grad_norm": 1.4019463062286377, - "learning_rate": 6.59819095477387e-05, - "loss": 5.3991, - "step": 34360 - }, - { - "epoch": 17.91968709256845, - "grad_norm": 1.4619684219360352, - "learning_rate": 6.598090452261308e-05, - "loss": 5.4606, - "step": 34361 - }, - { - "epoch": 17.920208604954368, - "grad_norm": 1.4813050031661987, - "learning_rate": 6.597989949748744e-05, - "loss": 5.4336, - "step": 34362 - }, - { - "epoch": 17.920730117340288, - "grad_norm": 1.4892215728759766, - "learning_rate": 6.59788944723618e-05, - "loss": 5.1176, - "step": 34363 - }, - { - "epoch": 17.921251629726207, - "grad_norm": 1.3983666896820068, - "learning_rate": 6.597788944723618e-05, - "loss": 5.6227, - "step": 34364 - }, - { - "epoch": 17.921773142112126, - "grad_norm": 1.4170631170272827, - "learning_rate": 6.597688442211055e-05, - "loss": 5.3566, - "step": 34365 - }, - { - "epoch": 17.922294654498046, - "grad_norm": 1.4657639265060425, - "learning_rate": 6.597587939698492e-05, - "loss": 5.2947, - "step": 34366 - }, - { - "epoch": 17.922816166883962, - "grad_norm": 1.7334684133529663, - "learning_rate": 6.59748743718593e-05, - "loss": 4.9584, - "step": 34367 - }, - { - "epoch": 17.92333767926988, - "grad_norm": 1.6044389009475708, - "learning_rate": 6.597386934673368e-05, - "loss": 5.1436, - "step": 34368 - }, - { - "epoch": 17.9238591916558, - "grad_norm": 1.4594050645828247, - "learning_rate": 6.597286432160804e-05, - "loss": 5.6422, - "step": 34369 - }, - { - "epoch": 17.92438070404172, - "grad_norm": 1.5653172731399536, - "learning_rate": 6.597185929648242e-05, - "loss": 5.2493, - "step": 34370 - }, - { - "epoch": 17.92490221642764, - "grad_norm": 1.4057302474975586, - "learning_rate": 6.597085427135679e-05, - "loss": 5.6529, - "step": 34371 - }, - { - "epoch": 17.92542372881356, - "grad_norm": 1.3936306238174438, - "learning_rate": 6.596984924623116e-05, - "loss": 5.3327, - "step": 34372 - }, - { - "epoch": 17.92594524119948, - "grad_norm": 1.4910236597061157, - "learning_rate": 6.596884422110553e-05, - "loss": 5.3041, - "step": 34373 - }, - { - "epoch": 17.926466753585398, - "grad_norm": 1.5565458536148071, - "learning_rate": 6.59678391959799e-05, - "loss": 5.1271, - "step": 34374 - }, - { - "epoch": 17.926988265971318, - "grad_norm": 1.4892969131469727, - "learning_rate": 6.596683417085427e-05, - "loss": 5.5242, - "step": 34375 - }, - { - "epoch": 17.927509778357237, - "grad_norm": 1.438807725906372, - "learning_rate": 6.596582914572865e-05, - "loss": 5.5105, - "step": 34376 - }, - { - "epoch": 17.928031290743156, - "grad_norm": 1.6103761196136475, - "learning_rate": 6.596482412060303e-05, - "loss": 5.1425, - "step": 34377 - }, - { - "epoch": 17.928552803129076, - "grad_norm": 1.4833029508590698, - "learning_rate": 6.596381909547739e-05, - "loss": 5.5946, - "step": 34378 - }, - { - "epoch": 17.929074315514992, - "grad_norm": 1.445305347442627, - "learning_rate": 6.596281407035177e-05, - "loss": 5.6615, - "step": 34379 - }, - { - "epoch": 17.92959582790091, - "grad_norm": 1.556133508682251, - "learning_rate": 6.596180904522613e-05, - "loss": 5.048, - "step": 34380 - }, - { - "epoch": 17.93011734028683, - "grad_norm": 1.5786863565444946, - "learning_rate": 6.596080402010051e-05, - "loss": 4.9046, - "step": 34381 - }, - { - "epoch": 17.93063885267275, - "grad_norm": 1.5990296602249146, - "learning_rate": 6.595979899497487e-05, - "loss": 4.73, - "step": 34382 - }, - { - "epoch": 17.93116036505867, - "grad_norm": 1.4951269626617432, - "learning_rate": 6.595879396984925e-05, - "loss": 5.5411, - "step": 34383 - }, - { - "epoch": 17.93168187744459, - "grad_norm": 1.5027637481689453, - "learning_rate": 6.595778894472362e-05, - "loss": 5.2993, - "step": 34384 - }, - { - "epoch": 17.93220338983051, - "grad_norm": 1.5903836488723755, - "learning_rate": 6.5956783919598e-05, - "loss": 5.1278, - "step": 34385 - }, - { - "epoch": 17.932724902216428, - "grad_norm": 1.4844489097595215, - "learning_rate": 6.595577889447236e-05, - "loss": 5.1519, - "step": 34386 - }, - { - "epoch": 17.933246414602348, - "grad_norm": 1.5740009546279907, - "learning_rate": 6.595477386934674e-05, - "loss": 5.332, - "step": 34387 - }, - { - "epoch": 17.933767926988267, - "grad_norm": 1.5752532482147217, - "learning_rate": 6.595376884422111e-05, - "loss": 5.5796, - "step": 34388 - }, - { - "epoch": 17.934289439374187, - "grad_norm": 1.6262003183364868, - "learning_rate": 6.595276381909548e-05, - "loss": 5.1002, - "step": 34389 - }, - { - "epoch": 17.934810951760106, - "grad_norm": 1.558205485343933, - "learning_rate": 6.595175879396986e-05, - "loss": 5.14, - "step": 34390 - }, - { - "epoch": 17.935332464146022, - "grad_norm": 1.520970344543457, - "learning_rate": 6.595075376884422e-05, - "loss": 5.729, - "step": 34391 - }, - { - "epoch": 17.93585397653194, - "grad_norm": 1.5807749032974243, - "learning_rate": 6.59497487437186e-05, - "loss": 5.4206, - "step": 34392 - }, - { - "epoch": 17.93637548891786, - "grad_norm": 1.468370795249939, - "learning_rate": 6.594874371859296e-05, - "loss": 5.3128, - "step": 34393 - }, - { - "epoch": 17.93689700130378, - "grad_norm": 1.4803082942962646, - "learning_rate": 6.594773869346734e-05, - "loss": 5.4769, - "step": 34394 - }, - { - "epoch": 17.9374185136897, - "grad_norm": 1.4807759523391724, - "learning_rate": 6.59467336683417e-05, - "loss": 5.4717, - "step": 34395 - }, - { - "epoch": 17.93794002607562, - "grad_norm": 1.4631083011627197, - "learning_rate": 6.594572864321608e-05, - "loss": 5.6441, - "step": 34396 - }, - { - "epoch": 17.93846153846154, - "grad_norm": 1.7901641130447388, - "learning_rate": 6.594472361809046e-05, - "loss": 5.0724, - "step": 34397 - }, - { - "epoch": 17.938983050847458, - "grad_norm": 1.708248496055603, - "learning_rate": 6.594371859296484e-05, - "loss": 4.6501, - "step": 34398 - }, - { - "epoch": 17.939504563233378, - "grad_norm": 1.558355689048767, - "learning_rate": 6.59427135678392e-05, - "loss": 5.0007, - "step": 34399 - }, - { - "epoch": 17.940026075619297, - "grad_norm": 1.4739186763763428, - "learning_rate": 6.594170854271358e-05, - "loss": 5.6871, - "step": 34400 - }, - { - "epoch": 17.940547588005217, - "grad_norm": 1.566986322402954, - "learning_rate": 6.594070351758794e-05, - "loss": 5.3276, - "step": 34401 - }, - { - "epoch": 17.941069100391136, - "grad_norm": 1.5484529733657837, - "learning_rate": 6.593969849246231e-05, - "loss": 5.3662, - "step": 34402 - }, - { - "epoch": 17.941590612777052, - "grad_norm": 1.4450653791427612, - "learning_rate": 6.593869346733669e-05, - "loss": 5.3392, - "step": 34403 - }, - { - "epoch": 17.94211212516297, - "grad_norm": 1.424186110496521, - "learning_rate": 6.593768844221105e-05, - "loss": 5.3798, - "step": 34404 - }, - { - "epoch": 17.94263363754889, - "grad_norm": 1.4306763410568237, - "learning_rate": 6.593668341708543e-05, - "loss": 5.571, - "step": 34405 - }, - { - "epoch": 17.94315514993481, - "grad_norm": 1.423705816268921, - "learning_rate": 6.593567839195979e-05, - "loss": 5.7543, - "step": 34406 - }, - { - "epoch": 17.94367666232073, - "grad_norm": 1.462921380996704, - "learning_rate": 6.593467336683417e-05, - "loss": 5.2965, - "step": 34407 - }, - { - "epoch": 17.94419817470665, - "grad_norm": 1.4385993480682373, - "learning_rate": 6.593366834170855e-05, - "loss": 4.5254, - "step": 34408 - }, - { - "epoch": 17.94471968709257, - "grad_norm": 1.4086865186691284, - "learning_rate": 6.593266331658293e-05, - "loss": 5.2288, - "step": 34409 - }, - { - "epoch": 17.945241199478488, - "grad_norm": 1.443440318107605, - "learning_rate": 6.593165829145729e-05, - "loss": 5.2817, - "step": 34410 - }, - { - "epoch": 17.945762711864408, - "grad_norm": 1.417703628540039, - "learning_rate": 6.593065326633167e-05, - "loss": 5.4867, - "step": 34411 - }, - { - "epoch": 17.946284224250327, - "grad_norm": 1.394863247871399, - "learning_rate": 6.592964824120603e-05, - "loss": 5.5516, - "step": 34412 - }, - { - "epoch": 17.946805736636247, - "grad_norm": 1.509758710861206, - "learning_rate": 6.592864321608041e-05, - "loss": 4.4577, - "step": 34413 - }, - { - "epoch": 17.947327249022166, - "grad_norm": 1.4747332334518433, - "learning_rate": 6.592763819095477e-05, - "loss": 5.5588, - "step": 34414 - }, - { - "epoch": 17.947848761408082, - "grad_norm": 1.4741194248199463, - "learning_rate": 6.592663316582914e-05, - "loss": 5.5316, - "step": 34415 - }, - { - "epoch": 17.948370273794, - "grad_norm": 1.452453374862671, - "learning_rate": 6.592562814070352e-05, - "loss": 4.9555, - "step": 34416 - }, - { - "epoch": 17.94889178617992, - "grad_norm": 1.4843635559082031, - "learning_rate": 6.59246231155779e-05, - "loss": 4.9671, - "step": 34417 - }, - { - "epoch": 17.94941329856584, - "grad_norm": 1.475831389427185, - "learning_rate": 6.592361809045227e-05, - "loss": 4.971, - "step": 34418 - }, - { - "epoch": 17.94993481095176, - "grad_norm": 1.4550071954727173, - "learning_rate": 6.592261306532664e-05, - "loss": 5.0599, - "step": 34419 - }, - { - "epoch": 17.95045632333768, - "grad_norm": 1.414887547492981, - "learning_rate": 6.592160804020101e-05, - "loss": 5.196, - "step": 34420 - }, - { - "epoch": 17.9509778357236, - "grad_norm": 1.5802339315414429, - "learning_rate": 6.592060301507538e-05, - "loss": 5.207, - "step": 34421 - }, - { - "epoch": 17.951499348109518, - "grad_norm": 1.4896469116210938, - "learning_rate": 6.591959798994976e-05, - "loss": 5.2987, - "step": 34422 - }, - { - "epoch": 17.952020860495438, - "grad_norm": 1.4796608686447144, - "learning_rate": 6.591859296482412e-05, - "loss": 5.2857, - "step": 34423 - }, - { - "epoch": 17.952542372881357, - "grad_norm": 1.4489291906356812, - "learning_rate": 6.59175879396985e-05, - "loss": 5.1175, - "step": 34424 - }, - { - "epoch": 17.953063885267277, - "grad_norm": 1.3709566593170166, - "learning_rate": 6.591658291457286e-05, - "loss": 5.6324, - "step": 34425 - }, - { - "epoch": 17.953585397653193, - "grad_norm": 1.511335849761963, - "learning_rate": 6.591557788944724e-05, - "loss": 5.4055, - "step": 34426 - }, - { - "epoch": 17.954106910039112, - "grad_norm": 1.529376745223999, - "learning_rate": 6.59145728643216e-05, - "loss": 5.0681, - "step": 34427 - }, - { - "epoch": 17.95462842242503, - "grad_norm": 1.6351845264434814, - "learning_rate": 6.591356783919598e-05, - "loss": 5.1163, - "step": 34428 - }, - { - "epoch": 17.95514993481095, - "grad_norm": 1.4918276071548462, - "learning_rate": 6.591256281407036e-05, - "loss": 4.6064, - "step": 34429 - }, - { - "epoch": 17.95567144719687, - "grad_norm": 1.4514238834381104, - "learning_rate": 6.591155778894473e-05, - "loss": 5.4008, - "step": 34430 - }, - { - "epoch": 17.95619295958279, - "grad_norm": 1.5568983554840088, - "learning_rate": 6.59105527638191e-05, - "loss": 5.6156, - "step": 34431 - }, - { - "epoch": 17.95671447196871, - "grad_norm": 1.4124045372009277, - "learning_rate": 6.590954773869347e-05, - "loss": 5.15, - "step": 34432 - }, - { - "epoch": 17.95723598435463, - "grad_norm": 1.6143410205841064, - "learning_rate": 6.590854271356785e-05, - "loss": 5.3033, - "step": 34433 - }, - { - "epoch": 17.957757496740548, - "grad_norm": 1.4711146354675293, - "learning_rate": 6.590753768844221e-05, - "loss": 5.1967, - "step": 34434 - }, - { - "epoch": 17.958279009126468, - "grad_norm": 1.3660919666290283, - "learning_rate": 6.590653266331659e-05, - "loss": 4.7957, - "step": 34435 - }, - { - "epoch": 17.958800521512387, - "grad_norm": 1.4333277940750122, - "learning_rate": 6.590552763819095e-05, - "loss": 5.4178, - "step": 34436 - }, - { - "epoch": 17.959322033898307, - "grad_norm": 1.4448387622833252, - "learning_rate": 6.590452261306533e-05, - "loss": 5.4867, - "step": 34437 - }, - { - "epoch": 17.959843546284226, - "grad_norm": 1.556132197380066, - "learning_rate": 6.59035175879397e-05, - "loss": 5.5271, - "step": 34438 - }, - { - "epoch": 17.960365058670142, - "grad_norm": 1.5021482706069946, - "learning_rate": 6.590251256281407e-05, - "loss": 4.9552, - "step": 34439 - }, - { - "epoch": 17.96088657105606, - "grad_norm": 1.3885306119918823, - "learning_rate": 6.590150753768845e-05, - "loss": 5.7048, - "step": 34440 - }, - { - "epoch": 17.96140808344198, - "grad_norm": 1.4817050695419312, - "learning_rate": 6.590050251256281e-05, - "loss": 5.4861, - "step": 34441 - }, - { - "epoch": 17.9619295958279, - "grad_norm": 1.4771947860717773, - "learning_rate": 6.589949748743719e-05, - "loss": 5.8164, - "step": 34442 - }, - { - "epoch": 17.96245110821382, - "grad_norm": 1.427932620048523, - "learning_rate": 6.589849246231156e-05, - "loss": 4.9818, - "step": 34443 - }, - { - "epoch": 17.96297262059974, - "grad_norm": 1.4942162036895752, - "learning_rate": 6.589748743718593e-05, - "loss": 5.1518, - "step": 34444 - }, - { - "epoch": 17.96349413298566, - "grad_norm": 1.4958653450012207, - "learning_rate": 6.58964824120603e-05, - "loss": 5.401, - "step": 34445 - }, - { - "epoch": 17.96401564537158, - "grad_norm": 1.6489388942718506, - "learning_rate": 6.589547738693468e-05, - "loss": 4.5806, - "step": 34446 - }, - { - "epoch": 17.964537157757498, - "grad_norm": 2.018339157104492, - "learning_rate": 6.589447236180904e-05, - "loss": 4.3656, - "step": 34447 - }, - { - "epoch": 17.965058670143417, - "grad_norm": 1.5560673475265503, - "learning_rate": 6.589346733668342e-05, - "loss": 5.3096, - "step": 34448 - }, - { - "epoch": 17.965580182529337, - "grad_norm": 1.4995695352554321, - "learning_rate": 6.58924623115578e-05, - "loss": 5.0362, - "step": 34449 - }, - { - "epoch": 17.966101694915253, - "grad_norm": 1.547061800956726, - "learning_rate": 6.589145728643217e-05, - "loss": 4.7571, - "step": 34450 - }, - { - "epoch": 17.966623207301172, - "grad_norm": 1.4818675518035889, - "learning_rate": 6.589045226130654e-05, - "loss": 5.5762, - "step": 34451 - }, - { - "epoch": 17.96714471968709, - "grad_norm": 1.5967682600021362, - "learning_rate": 6.588944723618092e-05, - "loss": 4.9524, - "step": 34452 - }, - { - "epoch": 17.96766623207301, - "grad_norm": 1.4125715494155884, - "learning_rate": 6.588844221105528e-05, - "loss": 5.1431, - "step": 34453 - }, - { - "epoch": 17.96818774445893, - "grad_norm": 1.3975938558578491, - "learning_rate": 6.588743718592966e-05, - "loss": 5.5986, - "step": 34454 - }, - { - "epoch": 17.96870925684485, - "grad_norm": 1.4413443803787231, - "learning_rate": 6.588643216080402e-05, - "loss": 5.5699, - "step": 34455 - }, - { - "epoch": 17.96923076923077, - "grad_norm": 1.505035400390625, - "learning_rate": 6.588542713567839e-05, - "loss": 4.9469, - "step": 34456 - }, - { - "epoch": 17.96975228161669, - "grad_norm": 1.6174718141555786, - "learning_rate": 6.588442211055276e-05, - "loss": 5.0715, - "step": 34457 - }, - { - "epoch": 17.97027379400261, - "grad_norm": 1.4666956663131714, - "learning_rate": 6.588341708542713e-05, - "loss": 4.8327, - "step": 34458 - }, - { - "epoch": 17.970795306388528, - "grad_norm": 1.6872104406356812, - "learning_rate": 6.58824120603015e-05, - "loss": 5.0049, - "step": 34459 - }, - { - "epoch": 17.971316818774447, - "grad_norm": 1.4665522575378418, - "learning_rate": 6.588140703517588e-05, - "loss": 5.1514, - "step": 34460 - }, - { - "epoch": 17.971838331160367, - "grad_norm": 1.442374587059021, - "learning_rate": 6.588040201005026e-05, - "loss": 4.9683, - "step": 34461 - }, - { - "epoch": 17.972359843546283, - "grad_norm": 1.5501861572265625, - "learning_rate": 6.587939698492463e-05, - "loss": 5.0206, - "step": 34462 - }, - { - "epoch": 17.972881355932202, - "grad_norm": 1.5179953575134277, - "learning_rate": 6.5878391959799e-05, - "loss": 5.3598, - "step": 34463 - }, - { - "epoch": 17.97340286831812, - "grad_norm": 1.5812222957611084, - "learning_rate": 6.587738693467337e-05, - "loss": 4.858, - "step": 34464 - }, - { - "epoch": 17.97392438070404, - "grad_norm": 1.5433059930801392, - "learning_rate": 6.587638190954775e-05, - "loss": 5.112, - "step": 34465 - }, - { - "epoch": 17.97444589308996, - "grad_norm": 1.4881670475006104, - "learning_rate": 6.587537688442211e-05, - "loss": 5.2423, - "step": 34466 - }, - { - "epoch": 17.97496740547588, - "grad_norm": 1.480857014656067, - "learning_rate": 6.587437185929649e-05, - "loss": 5.2695, - "step": 34467 - }, - { - "epoch": 17.9754889178618, - "grad_norm": 1.5090144872665405, - "learning_rate": 6.587336683417085e-05, - "loss": 5.5395, - "step": 34468 - }, - { - "epoch": 17.97601043024772, - "grad_norm": 1.6066844463348389, - "learning_rate": 6.587236180904523e-05, - "loss": 5.0667, - "step": 34469 - }, - { - "epoch": 17.97653194263364, - "grad_norm": 1.5069674253463745, - "learning_rate": 6.587135678391961e-05, - "loss": 5.3961, - "step": 34470 - }, - { - "epoch": 17.977053455019558, - "grad_norm": 1.6355369091033936, - "learning_rate": 6.587035175879397e-05, - "loss": 4.2723, - "step": 34471 - }, - { - "epoch": 17.977574967405477, - "grad_norm": 1.4382572174072266, - "learning_rate": 6.586934673366835e-05, - "loss": 4.7113, - "step": 34472 - }, - { - "epoch": 17.978096479791397, - "grad_norm": 1.3749853372573853, - "learning_rate": 6.586834170854271e-05, - "loss": 5.6679, - "step": 34473 - }, - { - "epoch": 17.978617992177313, - "grad_norm": 1.6042677164077759, - "learning_rate": 6.586733668341709e-05, - "loss": 5.1639, - "step": 34474 - }, - { - "epoch": 17.979139504563232, - "grad_norm": 1.4687360525131226, - "learning_rate": 6.586633165829146e-05, - "loss": 5.4139, - "step": 34475 - }, - { - "epoch": 17.97966101694915, - "grad_norm": 1.4729738235473633, - "learning_rate": 6.586532663316583e-05, - "loss": 5.3963, - "step": 34476 - }, - { - "epoch": 17.98018252933507, - "grad_norm": 1.4514473676681519, - "learning_rate": 6.58643216080402e-05, - "loss": 5.5556, - "step": 34477 - }, - { - "epoch": 17.98070404172099, - "grad_norm": 1.4739985466003418, - "learning_rate": 6.586331658291458e-05, - "loss": 5.3095, - "step": 34478 - }, - { - "epoch": 17.98122555410691, - "grad_norm": 1.5298302173614502, - "learning_rate": 6.586231155778894e-05, - "loss": 5.3047, - "step": 34479 - }, - { - "epoch": 17.98174706649283, - "grad_norm": 1.4847583770751953, - "learning_rate": 6.586130653266332e-05, - "loss": 5.5505, - "step": 34480 - }, - { - "epoch": 17.98226857887875, - "grad_norm": 1.4698156118392944, - "learning_rate": 6.58603015075377e-05, - "loss": 5.1537, - "step": 34481 - }, - { - "epoch": 17.98279009126467, - "grad_norm": 1.6611104011535645, - "learning_rate": 6.585929648241206e-05, - "loss": 5.0434, - "step": 34482 - }, - { - "epoch": 17.983311603650588, - "grad_norm": 1.413561463356018, - "learning_rate": 6.585829145728644e-05, - "loss": 5.5948, - "step": 34483 - }, - { - "epoch": 17.983833116036507, - "grad_norm": 1.6320198774337769, - "learning_rate": 6.58572864321608e-05, - "loss": 5.3974, - "step": 34484 - }, - { - "epoch": 17.984354628422427, - "grad_norm": 1.5658491849899292, - "learning_rate": 6.585628140703518e-05, - "loss": 5.1025, - "step": 34485 - }, - { - "epoch": 17.984876140808343, - "grad_norm": 1.4075894355773926, - "learning_rate": 6.585527638190954e-05, - "loss": 5.5595, - "step": 34486 - }, - { - "epoch": 17.985397653194262, - "grad_norm": 1.4674696922302246, - "learning_rate": 6.585427135678392e-05, - "loss": 5.7313, - "step": 34487 - }, - { - "epoch": 17.98591916558018, - "grad_norm": 1.478952169418335, - "learning_rate": 6.585326633165829e-05, - "loss": 5.3665, - "step": 34488 - }, - { - "epoch": 17.9864406779661, - "grad_norm": 1.503686547279358, - "learning_rate": 6.585226130653266e-05, - "loss": 5.2726, - "step": 34489 - }, - { - "epoch": 17.98696219035202, - "grad_norm": 1.5387918949127197, - "learning_rate": 6.585125628140704e-05, - "loss": 5.4846, - "step": 34490 - }, - { - "epoch": 17.98748370273794, - "grad_norm": 1.6141655445098877, - "learning_rate": 6.585025125628142e-05, - "loss": 5.0813, - "step": 34491 - }, - { - "epoch": 17.98800521512386, - "grad_norm": 1.4289445877075195, - "learning_rate": 6.584924623115578e-05, - "loss": 5.5869, - "step": 34492 - }, - { - "epoch": 17.98852672750978, - "grad_norm": 1.3425426483154297, - "learning_rate": 6.584824120603016e-05, - "loss": 5.4578, - "step": 34493 - }, - { - "epoch": 17.9890482398957, - "grad_norm": 1.4658864736557007, - "learning_rate": 6.584723618090453e-05, - "loss": 4.7531, - "step": 34494 - }, - { - "epoch": 17.989569752281618, - "grad_norm": 1.4604839086532593, - "learning_rate": 6.584623115577889e-05, - "loss": 5.3061, - "step": 34495 - }, - { - "epoch": 17.990091264667537, - "grad_norm": 1.4926074743270874, - "learning_rate": 6.584522613065327e-05, - "loss": 5.0724, - "step": 34496 - }, - { - "epoch": 17.990612777053457, - "grad_norm": 1.4288212060928345, - "learning_rate": 6.584422110552763e-05, - "loss": 4.904, - "step": 34497 - }, - { - "epoch": 17.991134289439373, - "grad_norm": 1.5141072273254395, - "learning_rate": 6.584321608040201e-05, - "loss": 5.4866, - "step": 34498 - }, - { - "epoch": 17.991655801825292, - "grad_norm": 1.503017544746399, - "learning_rate": 6.584221105527638e-05, - "loss": 5.4056, - "step": 34499 - }, - { - "epoch": 17.99217731421121, - "grad_norm": 1.513900637626648, - "learning_rate": 6.584120603015075e-05, - "loss": 5.3111, - "step": 34500 - }, - { - "epoch": 17.99269882659713, - "grad_norm": 1.5654958486557007, - "learning_rate": 6.584020100502513e-05, - "loss": 5.1362, - "step": 34501 - }, - { - "epoch": 17.99322033898305, - "grad_norm": 1.4661364555358887, - "learning_rate": 6.583919597989951e-05, - "loss": 5.3756, - "step": 34502 - }, - { - "epoch": 17.99374185136897, - "grad_norm": 1.4246188402175903, - "learning_rate": 6.583819095477387e-05, - "loss": 5.4712, - "step": 34503 - }, - { - "epoch": 17.99426336375489, - "grad_norm": 1.4901608228683472, - "learning_rate": 6.583718592964825e-05, - "loss": 5.2447, - "step": 34504 - }, - { - "epoch": 17.99478487614081, - "grad_norm": 1.4214166402816772, - "learning_rate": 6.583618090452262e-05, - "loss": 5.5812, - "step": 34505 - }, - { - "epoch": 17.99530638852673, - "grad_norm": 1.4669297933578491, - "learning_rate": 6.583517587939699e-05, - "loss": 5.6186, - "step": 34506 - }, - { - "epoch": 17.995827900912648, - "grad_norm": 1.5792323350906372, - "learning_rate": 6.583417085427136e-05, - "loss": 5.7059, - "step": 34507 - }, - { - "epoch": 17.996349413298567, - "grad_norm": 1.542504072189331, - "learning_rate": 6.583316582914572e-05, - "loss": 5.6651, - "step": 34508 - }, - { - "epoch": 17.996870925684483, - "grad_norm": 1.3837231397628784, - "learning_rate": 6.58321608040201e-05, - "loss": 5.4374, - "step": 34509 - }, - { - "epoch": 17.997392438070403, - "grad_norm": 1.4747819900512695, - "learning_rate": 6.583115577889448e-05, - "loss": 5.2826, - "step": 34510 - }, - { - "epoch": 17.997913950456322, - "grad_norm": 1.5706905126571655, - "learning_rate": 6.583015075376886e-05, - "loss": 5.6431, - "step": 34511 - }, - { - "epoch": 17.99843546284224, - "grad_norm": 1.495792269706726, - "learning_rate": 6.582914572864322e-05, - "loss": 5.5918, - "step": 34512 - }, - { - "epoch": 17.99895697522816, - "grad_norm": 1.5378512144088745, - "learning_rate": 6.58281407035176e-05, - "loss": 5.5235, - "step": 34513 - }, - { - "epoch": 17.99947848761408, - "grad_norm": 1.5648638010025024, - "learning_rate": 6.582713567839196e-05, - "loss": 5.0495, - "step": 34514 - }, - { - "epoch": 18.0, - "grad_norm": 1.7369029521942139, - "learning_rate": 6.582613065326634e-05, - "loss": 4.642, - "step": 34515 - }, - { - "epoch": 18.00052151238592, - "grad_norm": 1.535120964050293, - "learning_rate": 6.58251256281407e-05, - "loss": 5.4772, - "step": 34516 - }, - { - "epoch": 18.00104302477184, - "grad_norm": 1.4991706609725952, - "learning_rate": 6.582412060301508e-05, - "loss": 4.7877, - "step": 34517 - }, - { - "epoch": 18.00156453715776, - "grad_norm": 1.4832313060760498, - "learning_rate": 6.582311557788945e-05, - "loss": 5.1773, - "step": 34518 - }, - { - "epoch": 18.002086049543678, - "grad_norm": 1.4328093528747559, - "learning_rate": 6.582211055276382e-05, - "loss": 5.7447, - "step": 34519 - }, - { - "epoch": 18.002607561929597, - "grad_norm": 1.4386639595031738, - "learning_rate": 6.582110552763819e-05, - "loss": 5.6265, - "step": 34520 - }, - { - "epoch": 18.003129074315513, - "grad_norm": 1.546228289604187, - "learning_rate": 6.582010050251257e-05, - "loss": 5.7554, - "step": 34521 - }, - { - "epoch": 18.003650586701433, - "grad_norm": 1.3976783752441406, - "learning_rate": 6.581909547738694e-05, - "loss": 5.6754, - "step": 34522 - }, - { - "epoch": 18.004172099087352, - "grad_norm": 1.477152705192566, - "learning_rate": 6.581809045226131e-05, - "loss": 5.3829, - "step": 34523 - }, - { - "epoch": 18.00469361147327, - "grad_norm": 1.3992375135421753, - "learning_rate": 6.581708542713569e-05, - "loss": 5.6274, - "step": 34524 - }, - { - "epoch": 18.00521512385919, - "grad_norm": 1.6800516843795776, - "learning_rate": 6.581608040201005e-05, - "loss": 5.2934, - "step": 34525 - }, - { - "epoch": 18.00573663624511, - "grad_norm": 1.4416236877441406, - "learning_rate": 6.581507537688443e-05, - "loss": 5.7029, - "step": 34526 - }, - { - "epoch": 18.00625814863103, - "grad_norm": 1.5177481174468994, - "learning_rate": 6.581407035175879e-05, - "loss": 5.4666, - "step": 34527 - }, - { - "epoch": 18.00677966101695, - "grad_norm": 1.5266560316085815, - "learning_rate": 6.581306532663317e-05, - "loss": 5.0459, - "step": 34528 - }, - { - "epoch": 18.00730117340287, - "grad_norm": 1.4611364603042603, - "learning_rate": 6.581206030150753e-05, - "loss": 5.114, - "step": 34529 - }, - { - "epoch": 18.00782268578879, - "grad_norm": 1.6384785175323486, - "learning_rate": 6.581105527638191e-05, - "loss": 4.9461, - "step": 34530 - }, - { - "epoch": 18.008344198174708, - "grad_norm": 1.5622379779815674, - "learning_rate": 6.581005025125629e-05, - "loss": 5.1997, - "step": 34531 - }, - { - "epoch": 18.008865710560627, - "grad_norm": 1.5216342210769653, - "learning_rate": 6.580904522613067e-05, - "loss": 5.3772, - "step": 34532 - }, - { - "epoch": 18.009387222946543, - "grad_norm": 1.5181798934936523, - "learning_rate": 6.580804020100503e-05, - "loss": 5.6937, - "step": 34533 - }, - { - "epoch": 18.009908735332463, - "grad_norm": 1.4834717512130737, - "learning_rate": 6.58070351758794e-05, - "loss": 5.6316, - "step": 34534 - }, - { - "epoch": 18.010430247718382, - "grad_norm": 1.5943796634674072, - "learning_rate": 6.580603015075377e-05, - "loss": 4.9181, - "step": 34535 - }, - { - "epoch": 18.0109517601043, - "grad_norm": 1.5342224836349487, - "learning_rate": 6.580502512562814e-05, - "loss": 5.6433, - "step": 34536 - }, - { - "epoch": 18.01147327249022, - "grad_norm": 1.4739564657211304, - "learning_rate": 6.580402010050252e-05, - "loss": 5.6559, - "step": 34537 - }, - { - "epoch": 18.01199478487614, - "grad_norm": 1.4124770164489746, - "learning_rate": 6.580301507537688e-05, - "loss": 5.7322, - "step": 34538 - }, - { - "epoch": 18.01251629726206, - "grad_norm": 1.5247951745986938, - "learning_rate": 6.580201005025126e-05, - "loss": 5.2637, - "step": 34539 - }, - { - "epoch": 18.01303780964798, - "grad_norm": 1.7152971029281616, - "learning_rate": 6.580100502512562e-05, - "loss": 5.2072, - "step": 34540 - }, - { - "epoch": 18.0135593220339, - "grad_norm": 1.5179792642593384, - "learning_rate": 6.58e-05, - "loss": 5.5805, - "step": 34541 - }, - { - "epoch": 18.01408083441982, - "grad_norm": 1.5255911350250244, - "learning_rate": 6.579899497487438e-05, - "loss": 5.1937, - "step": 34542 - }, - { - "epoch": 18.014602346805738, - "grad_norm": 1.4795796871185303, - "learning_rate": 6.579798994974876e-05, - "loss": 5.7144, - "step": 34543 - }, - { - "epoch": 18.015123859191657, - "grad_norm": 1.5454121828079224, - "learning_rate": 6.579698492462312e-05, - "loss": 5.1325, - "step": 34544 - }, - { - "epoch": 18.015645371577573, - "grad_norm": 1.5040340423583984, - "learning_rate": 6.57959798994975e-05, - "loss": 4.9165, - "step": 34545 - }, - { - "epoch": 18.016166883963493, - "grad_norm": 1.562425136566162, - "learning_rate": 6.579497487437186e-05, - "loss": 4.7336, - "step": 34546 - }, - { - "epoch": 18.016688396349412, - "grad_norm": 1.492027997970581, - "learning_rate": 6.579396984924624e-05, - "loss": 4.866, - "step": 34547 - }, - { - "epoch": 18.01720990873533, - "grad_norm": 1.5027825832366943, - "learning_rate": 6.57929648241206e-05, - "loss": 5.172, - "step": 34548 - }, - { - "epoch": 18.01773142112125, - "grad_norm": 1.3953830003738403, - "learning_rate": 6.579195979899497e-05, - "loss": 5.7471, - "step": 34549 - }, - { - "epoch": 18.01825293350717, - "grad_norm": 1.4958491325378418, - "learning_rate": 6.579095477386935e-05, - "loss": 5.4687, - "step": 34550 - }, - { - "epoch": 18.01877444589309, - "grad_norm": 1.5545681715011597, - "learning_rate": 6.578994974874372e-05, - "loss": 5.4791, - "step": 34551 - }, - { - "epoch": 18.01929595827901, - "grad_norm": 1.6165564060211182, - "learning_rate": 6.57889447236181e-05, - "loss": 5.125, - "step": 34552 - }, - { - "epoch": 18.01981747066493, - "grad_norm": 1.4830657243728638, - "learning_rate": 6.578793969849247e-05, - "loss": 5.327, - "step": 34553 - }, - { - "epoch": 18.02033898305085, - "grad_norm": 1.5338672399520874, - "learning_rate": 6.578693467336684e-05, - "loss": 5.3565, - "step": 34554 - }, - { - "epoch": 18.020860495436768, - "grad_norm": 1.3909610509872437, - "learning_rate": 6.578592964824121e-05, - "loss": 4.6126, - "step": 34555 - }, - { - "epoch": 18.021382007822687, - "grad_norm": 1.539527416229248, - "learning_rate": 6.578492462311559e-05, - "loss": 5.5139, - "step": 34556 - }, - { - "epoch": 18.021903520208603, - "grad_norm": 1.5561306476593018, - "learning_rate": 6.578391959798995e-05, - "loss": 5.0995, - "step": 34557 - }, - { - "epoch": 18.022425032594523, - "grad_norm": 1.4354084730148315, - "learning_rate": 6.578291457286433e-05, - "loss": 5.7345, - "step": 34558 - }, - { - "epoch": 18.022946544980442, - "grad_norm": 1.4375252723693848, - "learning_rate": 6.578190954773869e-05, - "loss": 5.3047, - "step": 34559 - }, - { - "epoch": 18.02346805736636, - "grad_norm": 1.4918491840362549, - "learning_rate": 6.578090452261307e-05, - "loss": 5.2178, - "step": 34560 - }, - { - "epoch": 18.02398956975228, - "grad_norm": 1.5885992050170898, - "learning_rate": 6.577989949748743e-05, - "loss": 5.0412, - "step": 34561 - }, - { - "epoch": 18.0245110821382, - "grad_norm": 1.483041763305664, - "learning_rate": 6.577889447236181e-05, - "loss": 5.2471, - "step": 34562 - }, - { - "epoch": 18.02503259452412, - "grad_norm": 1.4388571977615356, - "learning_rate": 6.577788944723619e-05, - "loss": 5.0853, - "step": 34563 - }, - { - "epoch": 18.02555410691004, - "grad_norm": 1.5534882545471191, - "learning_rate": 6.577688442211055e-05, - "loss": 5.0776, - "step": 34564 - }, - { - "epoch": 18.02607561929596, - "grad_norm": 1.5898467302322388, - "learning_rate": 6.577587939698493e-05, - "loss": 4.8765, - "step": 34565 - }, - { - "epoch": 18.02659713168188, - "grad_norm": 1.4725756645202637, - "learning_rate": 6.57748743718593e-05, - "loss": 5.3694, - "step": 34566 - }, - { - "epoch": 18.027118644067798, - "grad_norm": 1.3981398344039917, - "learning_rate": 6.577386934673367e-05, - "loss": 5.5904, - "step": 34567 - }, - { - "epoch": 18.027640156453717, - "grad_norm": 1.4878056049346924, - "learning_rate": 6.577286432160804e-05, - "loss": 5.2232, - "step": 34568 - }, - { - "epoch": 18.028161668839633, - "grad_norm": 1.5437144041061401, - "learning_rate": 6.577185929648242e-05, - "loss": 5.2577, - "step": 34569 - }, - { - "epoch": 18.028683181225553, - "grad_norm": 1.4957984685897827, - "learning_rate": 6.577085427135678e-05, - "loss": 5.4858, - "step": 34570 - }, - { - "epoch": 18.029204693611472, - "grad_norm": 1.512265920639038, - "learning_rate": 6.576984924623116e-05, - "loss": 5.3819, - "step": 34571 - }, - { - "epoch": 18.02972620599739, - "grad_norm": 1.5195114612579346, - "learning_rate": 6.576884422110554e-05, - "loss": 5.248, - "step": 34572 - }, - { - "epoch": 18.03024771838331, - "grad_norm": 1.518963098526001, - "learning_rate": 6.576783919597991e-05, - "loss": 5.6539, - "step": 34573 - }, - { - "epoch": 18.03076923076923, - "grad_norm": 1.4354969263076782, - "learning_rate": 6.576683417085428e-05, - "loss": 5.7035, - "step": 34574 - }, - { - "epoch": 18.03129074315515, - "grad_norm": 1.6240463256835938, - "learning_rate": 6.576582914572864e-05, - "loss": 4.9394, - "step": 34575 - }, - { - "epoch": 18.03181225554107, - "grad_norm": 1.5011909008026123, - "learning_rate": 6.576482412060302e-05, - "loss": 5.1787, - "step": 34576 - }, - { - "epoch": 18.03233376792699, - "grad_norm": 1.503113865852356, - "learning_rate": 6.576381909547739e-05, - "loss": 5.5052, - "step": 34577 - }, - { - "epoch": 18.03285528031291, - "grad_norm": 1.4374912977218628, - "learning_rate": 6.576281407035176e-05, - "loss": 5.1866, - "step": 34578 - }, - { - "epoch": 18.033376792698828, - "grad_norm": 1.781506061553955, - "learning_rate": 6.576180904522613e-05, - "loss": 5.4219, - "step": 34579 - }, - { - "epoch": 18.033898305084747, - "grad_norm": 1.5514981746673584, - "learning_rate": 6.57608040201005e-05, - "loss": 5.4388, - "step": 34580 - }, - { - "epoch": 18.034419817470663, - "grad_norm": 1.5693433284759521, - "learning_rate": 6.575979899497487e-05, - "loss": 5.1358, - "step": 34581 - }, - { - "epoch": 18.034941329856583, - "grad_norm": 1.472252607345581, - "learning_rate": 6.575879396984925e-05, - "loss": 5.3515, - "step": 34582 - }, - { - "epoch": 18.035462842242502, - "grad_norm": 1.4153048992156982, - "learning_rate": 6.575778894472363e-05, - "loss": 5.4055, - "step": 34583 - }, - { - "epoch": 18.03598435462842, - "grad_norm": 1.5130982398986816, - "learning_rate": 6.5756783919598e-05, - "loss": 5.4355, - "step": 34584 - }, - { - "epoch": 18.03650586701434, - "grad_norm": 1.599351167678833, - "learning_rate": 6.575577889447237e-05, - "loss": 5.3867, - "step": 34585 - }, - { - "epoch": 18.03702737940026, - "grad_norm": 1.6010029315948486, - "learning_rate": 6.575477386934674e-05, - "loss": 5.5749, - "step": 34586 - }, - { - "epoch": 18.03754889178618, - "grad_norm": 1.6208842992782593, - "learning_rate": 6.575376884422111e-05, - "loss": 4.8557, - "step": 34587 - }, - { - "epoch": 18.0380704041721, - "grad_norm": 1.6073371171951294, - "learning_rate": 6.575276381909547e-05, - "loss": 5.1565, - "step": 34588 - }, - { - "epoch": 18.03859191655802, - "grad_norm": 1.5532559156417847, - "learning_rate": 6.575175879396985e-05, - "loss": 5.3807, - "step": 34589 - }, - { - "epoch": 18.03911342894394, - "grad_norm": 1.5103278160095215, - "learning_rate": 6.575075376884422e-05, - "loss": 5.2288, - "step": 34590 - }, - { - "epoch": 18.039634941329858, - "grad_norm": 1.5695115327835083, - "learning_rate": 6.57497487437186e-05, - "loss": 5.665, - "step": 34591 - }, - { - "epoch": 18.040156453715777, - "grad_norm": 1.639782428741455, - "learning_rate": 6.574874371859296e-05, - "loss": 5.1793, - "step": 34592 - }, - { - "epoch": 18.040677966101693, - "grad_norm": 1.5369418859481812, - "learning_rate": 6.574773869346734e-05, - "loss": 5.306, - "step": 34593 - }, - { - "epoch": 18.041199478487613, - "grad_norm": 1.6074708700180054, - "learning_rate": 6.574673366834171e-05, - "loss": 5.3109, - "step": 34594 - }, - { - "epoch": 18.041720990873532, - "grad_norm": 1.5674928426742554, - "learning_rate": 6.574572864321609e-05, - "loss": 5.149, - "step": 34595 - }, - { - "epoch": 18.042242503259452, - "grad_norm": 1.508589744567871, - "learning_rate": 6.574472361809046e-05, - "loss": 5.0721, - "step": 34596 - }, - { - "epoch": 18.04276401564537, - "grad_norm": 1.5352027416229248, - "learning_rate": 6.574371859296483e-05, - "loss": 5.5259, - "step": 34597 - }, - { - "epoch": 18.04328552803129, - "grad_norm": 1.5572913885116577, - "learning_rate": 6.57427135678392e-05, - "loss": 5.1563, - "step": 34598 - }, - { - "epoch": 18.04380704041721, - "grad_norm": 1.4075987339019775, - "learning_rate": 6.574170854271358e-05, - "loss": 5.4353, - "step": 34599 - }, - { - "epoch": 18.04432855280313, - "grad_norm": 1.464223027229309, - "learning_rate": 6.574070351758794e-05, - "loss": 5.4848, - "step": 34600 - }, - { - "epoch": 18.04485006518905, - "grad_norm": 1.3523657321929932, - "learning_rate": 6.57396984924623e-05, - "loss": 5.8222, - "step": 34601 - }, - { - "epoch": 18.04537157757497, - "grad_norm": 1.4958513975143433, - "learning_rate": 6.573869346733668e-05, - "loss": 5.2151, - "step": 34602 - }, - { - "epoch": 18.045893089960888, - "grad_norm": 1.4361563920974731, - "learning_rate": 6.573768844221106e-05, - "loss": 5.3382, - "step": 34603 - }, - { - "epoch": 18.046414602346807, - "grad_norm": 1.503666639328003, - "learning_rate": 6.573668341708544e-05, - "loss": 5.2727, - "step": 34604 - }, - { - "epoch": 18.046936114732723, - "grad_norm": 1.5056921243667603, - "learning_rate": 6.57356783919598e-05, - "loss": 5.6894, - "step": 34605 - }, - { - "epoch": 18.047457627118643, - "grad_norm": 1.5336576700210571, - "learning_rate": 6.573467336683418e-05, - "loss": 5.4023, - "step": 34606 - }, - { - "epoch": 18.047979139504562, - "grad_norm": 1.4512757062911987, - "learning_rate": 6.573366834170854e-05, - "loss": 5.4506, - "step": 34607 - }, - { - "epoch": 18.048500651890482, - "grad_norm": 1.478692650794983, - "learning_rate": 6.573266331658292e-05, - "loss": 5.2107, - "step": 34608 - }, - { - "epoch": 18.0490221642764, - "grad_norm": 1.4543477296829224, - "learning_rate": 6.573165829145729e-05, - "loss": 5.499, - "step": 34609 - }, - { - "epoch": 18.04954367666232, - "grad_norm": 1.4903199672698975, - "learning_rate": 6.573065326633166e-05, - "loss": 5.3322, - "step": 34610 - }, - { - "epoch": 18.05006518904824, - "grad_norm": 1.5416743755340576, - "learning_rate": 6.572964824120603e-05, - "loss": 5.0808, - "step": 34611 - }, - { - "epoch": 18.05058670143416, - "grad_norm": 1.4331419467926025, - "learning_rate": 6.57286432160804e-05, - "loss": 5.5223, - "step": 34612 - }, - { - "epoch": 18.05110821382008, - "grad_norm": 1.5897860527038574, - "learning_rate": 6.572763819095477e-05, - "loss": 5.3195, - "step": 34613 - }, - { - "epoch": 18.051629726206, - "grad_norm": 1.4615106582641602, - "learning_rate": 6.572663316582915e-05, - "loss": 5.2033, - "step": 34614 - }, - { - "epoch": 18.052151238591918, - "grad_norm": 1.5043411254882812, - "learning_rate": 6.572562814070353e-05, - "loss": 5.2385, - "step": 34615 - }, - { - "epoch": 18.052672750977834, - "grad_norm": 1.5116440057754517, - "learning_rate": 6.572462311557789e-05, - "loss": 5.4361, - "step": 34616 - }, - { - "epoch": 18.053194263363753, - "grad_norm": 1.4523956775665283, - "learning_rate": 6.572361809045227e-05, - "loss": 5.5032, - "step": 34617 - }, - { - "epoch": 18.053715775749673, - "grad_norm": 1.448911428451538, - "learning_rate": 6.572261306532663e-05, - "loss": 5.2659, - "step": 34618 - }, - { - "epoch": 18.054237288135592, - "grad_norm": 1.5118372440338135, - "learning_rate": 6.572160804020101e-05, - "loss": 5.626, - "step": 34619 - }, - { - "epoch": 18.054758800521512, - "grad_norm": 1.5289254188537598, - "learning_rate": 6.572060301507537e-05, - "loss": 4.9954, - "step": 34620 - }, - { - "epoch": 18.05528031290743, - "grad_norm": 1.469541072845459, - "learning_rate": 6.571959798994975e-05, - "loss": 5.5143, - "step": 34621 - }, - { - "epoch": 18.05580182529335, - "grad_norm": 1.5532082319259644, - "learning_rate": 6.571859296482412e-05, - "loss": 5.4735, - "step": 34622 - }, - { - "epoch": 18.05632333767927, - "grad_norm": 1.4450894594192505, - "learning_rate": 6.57175879396985e-05, - "loss": 5.2722, - "step": 34623 - }, - { - "epoch": 18.05684485006519, - "grad_norm": 1.5134862661361694, - "learning_rate": 6.571658291457287e-05, - "loss": 5.2481, - "step": 34624 - }, - { - "epoch": 18.05736636245111, - "grad_norm": 1.5844768285751343, - "learning_rate": 6.571557788944725e-05, - "loss": 5.6222, - "step": 34625 - }, - { - "epoch": 18.05788787483703, - "grad_norm": 1.6586672067642212, - "learning_rate": 6.571457286432161e-05, - "loss": 5.3307, - "step": 34626 - }, - { - "epoch": 18.058409387222948, - "grad_norm": 1.6478641033172607, - "learning_rate": 6.571356783919599e-05, - "loss": 5.512, - "step": 34627 - }, - { - "epoch": 18.058930899608864, - "grad_norm": 1.580997109413147, - "learning_rate": 6.571256281407036e-05, - "loss": 4.8734, - "step": 34628 - }, - { - "epoch": 18.059452411994783, - "grad_norm": 1.4740139245986938, - "learning_rate": 6.571155778894472e-05, - "loss": 5.4667, - "step": 34629 - }, - { - "epoch": 18.059973924380703, - "grad_norm": 1.4440287351608276, - "learning_rate": 6.57105527638191e-05, - "loss": 5.8202, - "step": 34630 - }, - { - "epoch": 18.060495436766622, - "grad_norm": 1.3620930910110474, - "learning_rate": 6.570954773869346e-05, - "loss": 5.7675, - "step": 34631 - }, - { - "epoch": 18.061016949152542, - "grad_norm": 1.5663306713104248, - "learning_rate": 6.570854271356784e-05, - "loss": 5.1806, - "step": 34632 - }, - { - "epoch": 18.06153846153846, - "grad_norm": 1.587952733039856, - "learning_rate": 6.57075376884422e-05, - "loss": 5.0576, - "step": 34633 - }, - { - "epoch": 18.06205997392438, - "grad_norm": 1.5675674676895142, - "learning_rate": 6.570653266331658e-05, - "loss": 5.3668, - "step": 34634 - }, - { - "epoch": 18.0625814863103, - "grad_norm": 1.4197580814361572, - "learning_rate": 6.570552763819096e-05, - "loss": 5.7254, - "step": 34635 - }, - { - "epoch": 18.06310299869622, - "grad_norm": 1.4922350645065308, - "learning_rate": 6.570452261306534e-05, - "loss": 5.0202, - "step": 34636 - }, - { - "epoch": 18.06362451108214, - "grad_norm": 1.5960174798965454, - "learning_rate": 6.57035175879397e-05, - "loss": 5.2306, - "step": 34637 - }, - { - "epoch": 18.06414602346806, - "grad_norm": 1.4535239934921265, - "learning_rate": 6.570251256281408e-05, - "loss": 5.5079, - "step": 34638 - }, - { - "epoch": 18.064667535853978, - "grad_norm": 1.4340791702270508, - "learning_rate": 6.570150753768844e-05, - "loss": 5.3525, - "step": 34639 - }, - { - "epoch": 18.065189048239894, - "grad_norm": 1.5600826740264893, - "learning_rate": 6.570050251256282e-05, - "loss": 5.1839, - "step": 34640 - }, - { - "epoch": 18.065710560625813, - "grad_norm": 1.4844518899917603, - "learning_rate": 6.569949748743719e-05, - "loss": 5.583, - "step": 34641 - }, - { - "epoch": 18.066232073011733, - "grad_norm": 1.5528546571731567, - "learning_rate": 6.569849246231155e-05, - "loss": 5.384, - "step": 34642 - }, - { - "epoch": 18.066753585397652, - "grad_norm": 1.468652606010437, - "learning_rate": 6.569748743718593e-05, - "loss": 5.2699, - "step": 34643 - }, - { - "epoch": 18.067275097783572, - "grad_norm": 1.5190958976745605, - "learning_rate": 6.56964824120603e-05, - "loss": 5.4236, - "step": 34644 - }, - { - "epoch": 18.06779661016949, - "grad_norm": 1.5703768730163574, - "learning_rate": 6.569547738693468e-05, - "loss": 4.9771, - "step": 34645 - }, - { - "epoch": 18.06831812255541, - "grad_norm": 1.6199052333831787, - "learning_rate": 6.569447236180905e-05, - "loss": 5.3266, - "step": 34646 - }, - { - "epoch": 18.06883963494133, - "grad_norm": 1.5301989316940308, - "learning_rate": 6.569346733668343e-05, - "loss": 5.0812, - "step": 34647 - }, - { - "epoch": 18.06936114732725, - "grad_norm": 1.481387972831726, - "learning_rate": 6.569246231155779e-05, - "loss": 5.1372, - "step": 34648 - }, - { - "epoch": 18.06988265971317, - "grad_norm": 1.6264225244522095, - "learning_rate": 6.569145728643217e-05, - "loss": 5.154, - "step": 34649 - }, - { - "epoch": 18.07040417209909, - "grad_norm": 1.5865304470062256, - "learning_rate": 6.569045226130653e-05, - "loss": 5.4165, - "step": 34650 - }, - { - "epoch": 18.070925684485008, - "grad_norm": 1.4768562316894531, - "learning_rate": 6.568944723618091e-05, - "loss": 5.2928, - "step": 34651 - }, - { - "epoch": 18.071447196870924, - "grad_norm": 1.6357741355895996, - "learning_rate": 6.568844221105528e-05, - "loss": 5.0816, - "step": 34652 - }, - { - "epoch": 18.071968709256844, - "grad_norm": 1.5055735111236572, - "learning_rate": 6.568743718592965e-05, - "loss": 5.0562, - "step": 34653 - }, - { - "epoch": 18.072490221642763, - "grad_norm": 1.5756511688232422, - "learning_rate": 6.568643216080402e-05, - "loss": 4.7764, - "step": 34654 - }, - { - "epoch": 18.073011734028682, - "grad_norm": 1.5165220499038696, - "learning_rate": 6.56854271356784e-05, - "loss": 5.2413, - "step": 34655 - }, - { - "epoch": 18.073533246414602, - "grad_norm": 1.6151864528656006, - "learning_rate": 6.568442211055277e-05, - "loss": 5.5243, - "step": 34656 - }, - { - "epoch": 18.07405475880052, - "grad_norm": 1.5366954803466797, - "learning_rate": 6.568341708542714e-05, - "loss": 4.672, - "step": 34657 - }, - { - "epoch": 18.07457627118644, - "grad_norm": 1.454894781112671, - "learning_rate": 6.568241206030151e-05, - "loss": 5.7864, - "step": 34658 - }, - { - "epoch": 18.07509778357236, - "grad_norm": 1.467299461364746, - "learning_rate": 6.568140703517588e-05, - "loss": 5.2823, - "step": 34659 - }, - { - "epoch": 18.07561929595828, - "grad_norm": 1.7381941080093384, - "learning_rate": 6.568040201005026e-05, - "loss": 4.52, - "step": 34660 - }, - { - "epoch": 18.0761408083442, - "grad_norm": 1.442779302597046, - "learning_rate": 6.567939698492462e-05, - "loss": 5.5065, - "step": 34661 - }, - { - "epoch": 18.07666232073012, - "grad_norm": 1.5210611820220947, - "learning_rate": 6.5678391959799e-05, - "loss": 4.8792, - "step": 34662 - }, - { - "epoch": 18.077183833116038, - "grad_norm": 1.5103472471237183, - "learning_rate": 6.567738693467336e-05, - "loss": 5.3209, - "step": 34663 - }, - { - "epoch": 18.077705345501954, - "grad_norm": 1.501133680343628, - "learning_rate": 6.567638190954774e-05, - "loss": 5.354, - "step": 34664 - }, - { - "epoch": 18.078226857887874, - "grad_norm": 1.5217347145080566, - "learning_rate": 6.567537688442212e-05, - "loss": 5.5244, - "step": 34665 - }, - { - "epoch": 18.078748370273793, - "grad_norm": 1.4500813484191895, - "learning_rate": 6.56743718592965e-05, - "loss": 5.2248, - "step": 34666 - }, - { - "epoch": 18.079269882659712, - "grad_norm": 1.5539237260818481, - "learning_rate": 6.567336683417086e-05, - "loss": 5.467, - "step": 34667 - }, - { - "epoch": 18.079791395045632, - "grad_norm": 1.5291252136230469, - "learning_rate": 6.567236180904523e-05, - "loss": 5.4098, - "step": 34668 - }, - { - "epoch": 18.08031290743155, - "grad_norm": 1.3955698013305664, - "learning_rate": 6.56713567839196e-05, - "loss": 5.5886, - "step": 34669 - }, - { - "epoch": 18.08083441981747, - "grad_norm": 1.548580288887024, - "learning_rate": 6.567035175879397e-05, - "loss": 5.1342, - "step": 34670 - }, - { - "epoch": 18.08135593220339, - "grad_norm": 1.4891387224197388, - "learning_rate": 6.566934673366835e-05, - "loss": 5.2413, - "step": 34671 - }, - { - "epoch": 18.08187744458931, - "grad_norm": 1.4824316501617432, - "learning_rate": 6.566834170854271e-05, - "loss": 5.5576, - "step": 34672 - }, - { - "epoch": 18.08239895697523, - "grad_norm": 1.5341542959213257, - "learning_rate": 6.566733668341709e-05, - "loss": 5.4321, - "step": 34673 - }, - { - "epoch": 18.08292046936115, - "grad_norm": 1.3576890230178833, - "learning_rate": 6.566633165829145e-05, - "loss": 5.3471, - "step": 34674 - }, - { - "epoch": 18.083441981747068, - "grad_norm": 1.4840565919876099, - "learning_rate": 6.566532663316583e-05, - "loss": 5.1134, - "step": 34675 - }, - { - "epoch": 18.083963494132984, - "grad_norm": 1.4401267766952515, - "learning_rate": 6.566432160804021e-05, - "loss": 5.6173, - "step": 34676 - }, - { - "epoch": 18.084485006518904, - "grad_norm": 1.4396402835845947, - "learning_rate": 6.566331658291459e-05, - "loss": 5.5454, - "step": 34677 - }, - { - "epoch": 18.085006518904823, - "grad_norm": 1.4900788068771362, - "learning_rate": 6.566231155778895e-05, - "loss": 5.0067, - "step": 34678 - }, - { - "epoch": 18.085528031290742, - "grad_norm": 1.5447666645050049, - "learning_rate": 6.566130653266333e-05, - "loss": 5.3049, - "step": 34679 - }, - { - "epoch": 18.086049543676662, - "grad_norm": 1.4292688369750977, - "learning_rate": 6.566030150753769e-05, - "loss": 5.4874, - "step": 34680 - }, - { - "epoch": 18.08657105606258, - "grad_norm": 1.5779412984848022, - "learning_rate": 6.565929648241206e-05, - "loss": 5.3022, - "step": 34681 - }, - { - "epoch": 18.0870925684485, - "grad_norm": 1.5314241647720337, - "learning_rate": 6.565829145728643e-05, - "loss": 5.1228, - "step": 34682 - }, - { - "epoch": 18.08761408083442, - "grad_norm": 1.550503134727478, - "learning_rate": 6.56572864321608e-05, - "loss": 5.6373, - "step": 34683 - }, - { - "epoch": 18.08813559322034, - "grad_norm": 1.4603941440582275, - "learning_rate": 6.565628140703518e-05, - "loss": 5.6687, - "step": 34684 - }, - { - "epoch": 18.08865710560626, - "grad_norm": 1.536291480064392, - "learning_rate": 6.565527638190955e-05, - "loss": 5.3633, - "step": 34685 - }, - { - "epoch": 18.08917861799218, - "grad_norm": 1.487256646156311, - "learning_rate": 6.565427135678393e-05, - "loss": 5.1725, - "step": 34686 - }, - { - "epoch": 18.089700130378098, - "grad_norm": 1.5196473598480225, - "learning_rate": 6.56532663316583e-05, - "loss": 5.0312, - "step": 34687 - }, - { - "epoch": 18.090221642764014, - "grad_norm": 1.6211988925933838, - "learning_rate": 6.565226130653267e-05, - "loss": 5.1287, - "step": 34688 - }, - { - "epoch": 18.090743155149934, - "grad_norm": 1.490683913230896, - "learning_rate": 6.565125628140704e-05, - "loss": 5.4484, - "step": 34689 - }, - { - "epoch": 18.091264667535853, - "grad_norm": 1.5342042446136475, - "learning_rate": 6.565025125628142e-05, - "loss": 5.0193, - "step": 34690 - }, - { - "epoch": 18.091786179921773, - "grad_norm": 1.5123234987258911, - "learning_rate": 6.564924623115578e-05, - "loss": 4.9961, - "step": 34691 - }, - { - "epoch": 18.092307692307692, - "grad_norm": 1.5340791940689087, - "learning_rate": 6.564824120603016e-05, - "loss": 5.0627, - "step": 34692 - }, - { - "epoch": 18.09282920469361, - "grad_norm": 1.5588375329971313, - "learning_rate": 6.564723618090452e-05, - "loss": 5.0206, - "step": 34693 - }, - { - "epoch": 18.09335071707953, - "grad_norm": 1.6013777256011963, - "learning_rate": 6.564623115577889e-05, - "loss": 5.3824, - "step": 34694 - }, - { - "epoch": 18.09387222946545, - "grad_norm": 1.5170643329620361, - "learning_rate": 6.564522613065326e-05, - "loss": 5.3069, - "step": 34695 - }, - { - "epoch": 18.09439374185137, - "grad_norm": 1.5521310567855835, - "learning_rate": 6.564422110552764e-05, - "loss": 5.4812, - "step": 34696 - }, - { - "epoch": 18.09491525423729, - "grad_norm": 1.5121878385543823, - "learning_rate": 6.564321608040202e-05, - "loss": 4.9242, - "step": 34697 - }, - { - "epoch": 18.09543676662321, - "grad_norm": 1.5706411600112915, - "learning_rate": 6.564221105527638e-05, - "loss": 4.4191, - "step": 34698 - }, - { - "epoch": 18.09595827900913, - "grad_norm": 1.531265139579773, - "learning_rate": 6.564120603015076e-05, - "loss": 5.3713, - "step": 34699 - }, - { - "epoch": 18.096479791395044, - "grad_norm": 1.4260010719299316, - "learning_rate": 6.564020100502513e-05, - "loss": 5.6904, - "step": 34700 - }, - { - "epoch": 18.097001303780964, - "grad_norm": 1.5078775882720947, - "learning_rate": 6.56391959798995e-05, - "loss": 5.3762, - "step": 34701 - }, - { - "epoch": 18.097522816166883, - "grad_norm": 1.4709781408309937, - "learning_rate": 6.563819095477387e-05, - "loss": 5.0558, - "step": 34702 - }, - { - "epoch": 18.098044328552803, - "grad_norm": 1.5705885887145996, - "learning_rate": 6.563718592964825e-05, - "loss": 5.0662, - "step": 34703 - }, - { - "epoch": 18.098565840938722, - "grad_norm": 1.5149872303009033, - "learning_rate": 6.563618090452261e-05, - "loss": 4.705, - "step": 34704 - }, - { - "epoch": 18.09908735332464, - "grad_norm": 1.4991281032562256, - "learning_rate": 6.563517587939699e-05, - "loss": 5.6538, - "step": 34705 - }, - { - "epoch": 18.09960886571056, - "grad_norm": 1.5288684368133545, - "learning_rate": 6.563417085427137e-05, - "loss": 5.0997, - "step": 34706 - }, - { - "epoch": 18.10013037809648, - "grad_norm": 1.5611311197280884, - "learning_rate": 6.563316582914573e-05, - "loss": 5.0954, - "step": 34707 - }, - { - "epoch": 18.1006518904824, - "grad_norm": 1.5838377475738525, - "learning_rate": 6.563216080402011e-05, - "loss": 5.4684, - "step": 34708 - }, - { - "epoch": 18.10117340286832, - "grad_norm": 1.5602283477783203, - "learning_rate": 6.563115577889447e-05, - "loss": 5.1135, - "step": 34709 - }, - { - "epoch": 18.10169491525424, - "grad_norm": 1.5354801416397095, - "learning_rate": 6.563015075376885e-05, - "loss": 5.0882, - "step": 34710 - }, - { - "epoch": 18.102216427640155, - "grad_norm": 1.4216192960739136, - "learning_rate": 6.562914572864321e-05, - "loss": 5.4656, - "step": 34711 - }, - { - "epoch": 18.102737940026074, - "grad_norm": 1.3924976587295532, - "learning_rate": 6.562814070351759e-05, - "loss": 5.6316, - "step": 34712 - }, - { - "epoch": 18.103259452411994, - "grad_norm": 1.3719847202301025, - "learning_rate": 6.562713567839196e-05, - "loss": 4.708, - "step": 34713 - }, - { - "epoch": 18.103780964797913, - "grad_norm": 1.4247363805770874, - "learning_rate": 6.562613065326633e-05, - "loss": 5.4642, - "step": 34714 - }, - { - "epoch": 18.104302477183833, - "grad_norm": 1.434099555015564, - "learning_rate": 6.56251256281407e-05, - "loss": 5.6319, - "step": 34715 - }, - { - "epoch": 18.104823989569752, - "grad_norm": 1.486373782157898, - "learning_rate": 6.562412060301508e-05, - "loss": 5.4936, - "step": 34716 - }, - { - "epoch": 18.10534550195567, - "grad_norm": 1.4984190464019775, - "learning_rate": 6.562311557788945e-05, - "loss": 5.252, - "step": 34717 - }, - { - "epoch": 18.10586701434159, - "grad_norm": 1.5200908184051514, - "learning_rate": 6.562211055276383e-05, - "loss": 5.3486, - "step": 34718 - }, - { - "epoch": 18.10638852672751, - "grad_norm": 1.5217214822769165, - "learning_rate": 6.56211055276382e-05, - "loss": 5.2665, - "step": 34719 - }, - { - "epoch": 18.10691003911343, - "grad_norm": 1.4659438133239746, - "learning_rate": 6.562010050251257e-05, - "loss": 5.2444, - "step": 34720 - }, - { - "epoch": 18.10743155149935, - "grad_norm": 1.489905595779419, - "learning_rate": 6.561909547738694e-05, - "loss": 5.5706, - "step": 34721 - }, - { - "epoch": 18.10795306388527, - "grad_norm": 1.5095866918563843, - "learning_rate": 6.56180904522613e-05, - "loss": 5.4657, - "step": 34722 - }, - { - "epoch": 18.108474576271185, - "grad_norm": 1.4741783142089844, - "learning_rate": 6.561708542713568e-05, - "loss": 5.578, - "step": 34723 - }, - { - "epoch": 18.108996088657104, - "grad_norm": 1.3754218816757202, - "learning_rate": 6.561608040201004e-05, - "loss": 5.6938, - "step": 34724 - }, - { - "epoch": 18.109517601043024, - "grad_norm": 1.5434041023254395, - "learning_rate": 6.561507537688442e-05, - "loss": 5.1074, - "step": 34725 - }, - { - "epoch": 18.110039113428943, - "grad_norm": 1.4458158016204834, - "learning_rate": 6.56140703517588e-05, - "loss": 5.6353, - "step": 34726 - }, - { - "epoch": 18.110560625814863, - "grad_norm": 1.5700881481170654, - "learning_rate": 6.561306532663318e-05, - "loss": 5.1557, - "step": 34727 - }, - { - "epoch": 18.111082138200782, - "grad_norm": 1.551288366317749, - "learning_rate": 6.561206030150754e-05, - "loss": 5.3828, - "step": 34728 - }, - { - "epoch": 18.1116036505867, - "grad_norm": 1.4929635524749756, - "learning_rate": 6.561105527638192e-05, - "loss": 4.8684, - "step": 34729 - }, - { - "epoch": 18.11212516297262, - "grad_norm": 1.4571599960327148, - "learning_rate": 6.561005025125628e-05, - "loss": 5.5977, - "step": 34730 - }, - { - "epoch": 18.11264667535854, - "grad_norm": 1.4532625675201416, - "learning_rate": 6.560904522613066e-05, - "loss": 5.0411, - "step": 34731 - }, - { - "epoch": 18.11316818774446, - "grad_norm": 1.4782838821411133, - "learning_rate": 6.560804020100503e-05, - "loss": 5.5598, - "step": 34732 - }, - { - "epoch": 18.11368970013038, - "grad_norm": 1.5391632318496704, - "learning_rate": 6.56070351758794e-05, - "loss": 5.2991, - "step": 34733 - }, - { - "epoch": 18.1142112125163, - "grad_norm": 1.535821795463562, - "learning_rate": 6.560603015075377e-05, - "loss": 4.8132, - "step": 34734 - }, - { - "epoch": 18.114732724902215, - "grad_norm": 1.415401577949524, - "learning_rate": 6.560502512562813e-05, - "loss": 5.4479, - "step": 34735 - }, - { - "epoch": 18.115254237288134, - "grad_norm": 1.4523394107818604, - "learning_rate": 6.560402010050251e-05, - "loss": 5.835, - "step": 34736 - }, - { - "epoch": 18.115775749674054, - "grad_norm": 1.603868007659912, - "learning_rate": 6.560301507537689e-05, - "loss": 5.401, - "step": 34737 - }, - { - "epoch": 18.116297262059973, - "grad_norm": 1.5369523763656616, - "learning_rate": 6.560201005025127e-05, - "loss": 5.2468, - "step": 34738 - }, - { - "epoch": 18.116818774445893, - "grad_norm": 1.5394657850265503, - "learning_rate": 6.560100502512563e-05, - "loss": 5.2049, - "step": 34739 - }, - { - "epoch": 18.117340286831812, - "grad_norm": 1.5186843872070312, - "learning_rate": 6.560000000000001e-05, - "loss": 5.5596, - "step": 34740 - }, - { - "epoch": 18.11786179921773, - "grad_norm": 1.5636845827102661, - "learning_rate": 6.559899497487437e-05, - "loss": 5.2315, - "step": 34741 - }, - { - "epoch": 18.11838331160365, - "grad_norm": 1.5405263900756836, - "learning_rate": 6.559798994974875e-05, - "loss": 5.6729, - "step": 34742 - }, - { - "epoch": 18.11890482398957, - "grad_norm": 1.5722442865371704, - "learning_rate": 6.559698492462312e-05, - "loss": 5.1296, - "step": 34743 - }, - { - "epoch": 18.11942633637549, - "grad_norm": 1.557265281677246, - "learning_rate": 6.559597989949749e-05, - "loss": 4.9102, - "step": 34744 - }, - { - "epoch": 18.11994784876141, - "grad_norm": 1.5416128635406494, - "learning_rate": 6.559497487437186e-05, - "loss": 5.5092, - "step": 34745 - }, - { - "epoch": 18.12046936114733, - "grad_norm": 1.5286929607391357, - "learning_rate": 6.559396984924624e-05, - "loss": 5.6569, - "step": 34746 - }, - { - "epoch": 18.120990873533245, - "grad_norm": 1.4312111139297485, - "learning_rate": 6.559296482412061e-05, - "loss": 5.4523, - "step": 34747 - }, - { - "epoch": 18.121512385919164, - "grad_norm": 1.503358244895935, - "learning_rate": 6.559195979899498e-05, - "loss": 5.488, - "step": 34748 - }, - { - "epoch": 18.122033898305084, - "grad_norm": 1.6116706132888794, - "learning_rate": 6.559095477386936e-05, - "loss": 4.6766, - "step": 34749 - }, - { - "epoch": 18.122555410691003, - "grad_norm": 1.5582810640335083, - "learning_rate": 6.558994974874372e-05, - "loss": 5.288, - "step": 34750 - }, - { - "epoch": 18.123076923076923, - "grad_norm": 1.4830929040908813, - "learning_rate": 6.55889447236181e-05, - "loss": 4.614, - "step": 34751 - }, - { - "epoch": 18.123598435462842, - "grad_norm": 1.5543071031570435, - "learning_rate": 6.558793969849246e-05, - "loss": 5.1815, - "step": 34752 - }, - { - "epoch": 18.12411994784876, - "grad_norm": 1.4314194917678833, - "learning_rate": 6.558693467336684e-05, - "loss": 5.5246, - "step": 34753 - }, - { - "epoch": 18.12464146023468, - "grad_norm": 1.4149539470672607, - "learning_rate": 6.55859296482412e-05, - "loss": 4.9751, - "step": 34754 - }, - { - "epoch": 18.1251629726206, - "grad_norm": 1.4905495643615723, - "learning_rate": 6.558492462311558e-05, - "loss": 5.3322, - "step": 34755 - }, - { - "epoch": 18.12568448500652, - "grad_norm": 1.5954179763793945, - "learning_rate": 6.558391959798995e-05, - "loss": 4.996, - "step": 34756 - }, - { - "epoch": 18.12620599739244, - "grad_norm": 1.4741817712783813, - "learning_rate": 6.558291457286432e-05, - "loss": 5.4455, - "step": 34757 - }, - { - "epoch": 18.12672750977836, - "grad_norm": 1.4491560459136963, - "learning_rate": 6.55819095477387e-05, - "loss": 5.4061, - "step": 34758 - }, - { - "epoch": 18.127249022164275, - "grad_norm": 1.458097219467163, - "learning_rate": 6.558090452261308e-05, - "loss": 5.5017, - "step": 34759 - }, - { - "epoch": 18.127770534550194, - "grad_norm": 1.5456874370574951, - "learning_rate": 6.557989949748744e-05, - "loss": 5.1826, - "step": 34760 - }, - { - "epoch": 18.128292046936114, - "grad_norm": 1.4613863229751587, - "learning_rate": 6.557889447236181e-05, - "loss": 5.2529, - "step": 34761 - }, - { - "epoch": 18.128813559322033, - "grad_norm": 1.490883469581604, - "learning_rate": 6.557788944723619e-05, - "loss": 5.4128, - "step": 34762 - }, - { - "epoch": 18.129335071707953, - "grad_norm": 1.5510258674621582, - "learning_rate": 6.557688442211055e-05, - "loss": 5.0327, - "step": 34763 - }, - { - "epoch": 18.129856584093872, - "grad_norm": 1.6998423337936401, - "learning_rate": 6.557587939698493e-05, - "loss": 5.353, - "step": 34764 - }, - { - "epoch": 18.13037809647979, - "grad_norm": 1.536425232887268, - "learning_rate": 6.557487437185929e-05, - "loss": 5.1388, - "step": 34765 - }, - { - "epoch": 18.13089960886571, - "grad_norm": 1.5780502557754517, - "learning_rate": 6.557386934673367e-05, - "loss": 5.1475, - "step": 34766 - }, - { - "epoch": 18.13142112125163, - "grad_norm": 1.4200111627578735, - "learning_rate": 6.557286432160803e-05, - "loss": 5.4249, - "step": 34767 - }, - { - "epoch": 18.13194263363755, - "grad_norm": 1.576996922492981, - "learning_rate": 6.557185929648241e-05, - "loss": 5.1582, - "step": 34768 - }, - { - "epoch": 18.13246414602347, - "grad_norm": 1.4303525686264038, - "learning_rate": 6.557085427135679e-05, - "loss": 5.3291, - "step": 34769 - }, - { - "epoch": 18.13298565840939, - "grad_norm": 1.5240280628204346, - "learning_rate": 6.556984924623117e-05, - "loss": 5.145, - "step": 34770 - }, - { - "epoch": 18.133507170795305, - "grad_norm": 1.5700992345809937, - "learning_rate": 6.556884422110553e-05, - "loss": 5.0276, - "step": 34771 - }, - { - "epoch": 18.134028683181224, - "grad_norm": 1.4257880449295044, - "learning_rate": 6.556783919597991e-05, - "loss": 5.6319, - "step": 34772 - }, - { - "epoch": 18.134550195567144, - "grad_norm": 1.4043596982955933, - "learning_rate": 6.556683417085427e-05, - "loss": 5.516, - "step": 34773 - }, - { - "epoch": 18.135071707953063, - "grad_norm": 1.4611157178878784, - "learning_rate": 6.556582914572864e-05, - "loss": 4.9041, - "step": 34774 - }, - { - "epoch": 18.135593220338983, - "grad_norm": 1.4195982217788696, - "learning_rate": 6.556482412060302e-05, - "loss": 5.2208, - "step": 34775 - }, - { - "epoch": 18.136114732724902, - "grad_norm": 1.669226050376892, - "learning_rate": 6.556381909547738e-05, - "loss": 4.9217, - "step": 34776 - }, - { - "epoch": 18.13663624511082, - "grad_norm": 1.4859085083007812, - "learning_rate": 6.556281407035176e-05, - "loss": 5.5533, - "step": 34777 - }, - { - "epoch": 18.13715775749674, - "grad_norm": 1.444925308227539, - "learning_rate": 6.556180904522614e-05, - "loss": 5.3828, - "step": 34778 - }, - { - "epoch": 18.13767926988266, - "grad_norm": 1.4358723163604736, - "learning_rate": 6.556080402010051e-05, - "loss": 4.9216, - "step": 34779 - }, - { - "epoch": 18.13820078226858, - "grad_norm": 1.5191117525100708, - "learning_rate": 6.555979899497488e-05, - "loss": 5.0092, - "step": 34780 - }, - { - "epoch": 18.1387222946545, - "grad_norm": 1.5146043300628662, - "learning_rate": 6.555879396984926e-05, - "loss": 5.2127, - "step": 34781 - }, - { - "epoch": 18.13924380704042, - "grad_norm": 1.4268568754196167, - "learning_rate": 6.555778894472362e-05, - "loss": 5.266, - "step": 34782 - }, - { - "epoch": 18.139765319426335, - "grad_norm": 1.5672972202301025, - "learning_rate": 6.5556783919598e-05, - "loss": 4.8385, - "step": 34783 - }, - { - "epoch": 18.140286831812254, - "grad_norm": 1.5006475448608398, - "learning_rate": 6.555577889447236e-05, - "loss": 4.6466, - "step": 34784 - }, - { - "epoch": 18.140808344198174, - "grad_norm": 1.7708674669265747, - "learning_rate": 6.555477386934674e-05, - "loss": 4.6203, - "step": 34785 - }, - { - "epoch": 18.141329856584093, - "grad_norm": 1.4927825927734375, - "learning_rate": 6.55537688442211e-05, - "loss": 5.5905, - "step": 34786 - }, - { - "epoch": 18.141851368970013, - "grad_norm": 1.5249882936477661, - "learning_rate": 6.555276381909547e-05, - "loss": 5.6026, - "step": 34787 - }, - { - "epoch": 18.142372881355932, - "grad_norm": 1.5089328289031982, - "learning_rate": 6.555175879396985e-05, - "loss": 4.7361, - "step": 34788 - }, - { - "epoch": 18.14289439374185, - "grad_norm": 1.4815776348114014, - "learning_rate": 6.555075376884422e-05, - "loss": 5.2353, - "step": 34789 - }, - { - "epoch": 18.14341590612777, - "grad_norm": 1.5689575672149658, - "learning_rate": 6.55497487437186e-05, - "loss": 5.1548, - "step": 34790 - }, - { - "epoch": 18.14393741851369, - "grad_norm": 1.4064397811889648, - "learning_rate": 6.554874371859297e-05, - "loss": 5.477, - "step": 34791 - }, - { - "epoch": 18.14445893089961, - "grad_norm": 1.438963770866394, - "learning_rate": 6.554773869346734e-05, - "loss": 5.5769, - "step": 34792 - }, - { - "epoch": 18.14498044328553, - "grad_norm": 1.4705028533935547, - "learning_rate": 6.554673366834171e-05, - "loss": 5.2047, - "step": 34793 - }, - { - "epoch": 18.14550195567145, - "grad_norm": 1.6379690170288086, - "learning_rate": 6.554572864321609e-05, - "loss": 5.208, - "step": 34794 - }, - { - "epoch": 18.146023468057365, - "grad_norm": 1.703682780265808, - "learning_rate": 6.554472361809045e-05, - "loss": 5.1877, - "step": 34795 - }, - { - "epoch": 18.146544980443284, - "grad_norm": 1.564548373222351, - "learning_rate": 6.554371859296483e-05, - "loss": 5.3858, - "step": 34796 - }, - { - "epoch": 18.147066492829204, - "grad_norm": 1.5450574159622192, - "learning_rate": 6.554271356783919e-05, - "loss": 5.2099, - "step": 34797 - }, - { - "epoch": 18.147588005215123, - "grad_norm": 1.4630632400512695, - "learning_rate": 6.554170854271357e-05, - "loss": 4.8974, - "step": 34798 - }, - { - "epoch": 18.148109517601043, - "grad_norm": 1.522316336631775, - "learning_rate": 6.554070351758795e-05, - "loss": 5.138, - "step": 34799 - }, - { - "epoch": 18.148631029986962, - "grad_norm": 1.5295764207839966, - "learning_rate": 6.553969849246231e-05, - "loss": 4.9352, - "step": 34800 - }, - { - "epoch": 18.14915254237288, - "grad_norm": 1.441697120666504, - "learning_rate": 6.553869346733669e-05, - "loss": 5.7123, - "step": 34801 - }, - { - "epoch": 18.1496740547588, - "grad_norm": 1.4220669269561768, - "learning_rate": 6.553768844221105e-05, - "loss": 5.5786, - "step": 34802 - }, - { - "epoch": 18.15019556714472, - "grad_norm": 1.414972186088562, - "learning_rate": 6.553668341708543e-05, - "loss": 5.8154, - "step": 34803 - }, - { - "epoch": 18.15071707953064, - "grad_norm": 1.4886680841445923, - "learning_rate": 6.55356783919598e-05, - "loss": 5.6275, - "step": 34804 - }, - { - "epoch": 18.15123859191656, - "grad_norm": 1.4749085903167725, - "learning_rate": 6.553467336683417e-05, - "loss": 5.3677, - "step": 34805 - }, - { - "epoch": 18.151760104302475, - "grad_norm": 1.5885953903198242, - "learning_rate": 6.553366834170854e-05, - "loss": 5.2762, - "step": 34806 - }, - { - "epoch": 18.152281616688395, - "grad_norm": 1.691893458366394, - "learning_rate": 6.553266331658292e-05, - "loss": 4.6892, - "step": 34807 - }, - { - "epoch": 18.152803129074314, - "grad_norm": 1.3592748641967773, - "learning_rate": 6.553165829145728e-05, - "loss": 5.5479, - "step": 34808 - }, - { - "epoch": 18.153324641460234, - "grad_norm": 1.5086095333099365, - "learning_rate": 6.553065326633166e-05, - "loss": 5.5778, - "step": 34809 - }, - { - "epoch": 18.153846153846153, - "grad_norm": 1.6411833763122559, - "learning_rate": 6.552964824120604e-05, - "loss": 5.2297, - "step": 34810 - }, - { - "epoch": 18.154367666232073, - "grad_norm": 1.4941132068634033, - "learning_rate": 6.552864321608041e-05, - "loss": 5.5801, - "step": 34811 - }, - { - "epoch": 18.154889178617992, - "grad_norm": 1.6270151138305664, - "learning_rate": 6.552763819095478e-05, - "loss": 5.5104, - "step": 34812 - }, - { - "epoch": 18.15541069100391, - "grad_norm": 1.6110855340957642, - "learning_rate": 6.552663316582916e-05, - "loss": 5.2098, - "step": 34813 - }, - { - "epoch": 18.15593220338983, - "grad_norm": 1.5446337461471558, - "learning_rate": 6.552562814070352e-05, - "loss": 4.9073, - "step": 34814 - }, - { - "epoch": 18.15645371577575, - "grad_norm": 1.5922949314117432, - "learning_rate": 6.552462311557789e-05, - "loss": 4.6378, - "step": 34815 - }, - { - "epoch": 18.15697522816167, - "grad_norm": 1.4414341449737549, - "learning_rate": 6.552361809045226e-05, - "loss": 5.5202, - "step": 34816 - }, - { - "epoch": 18.15749674054759, - "grad_norm": 1.5039165019989014, - "learning_rate": 6.552261306532663e-05, - "loss": 5.0949, - "step": 34817 - }, - { - "epoch": 18.158018252933505, - "grad_norm": 1.5626249313354492, - "learning_rate": 6.5521608040201e-05, - "loss": 5.063, - "step": 34818 - }, - { - "epoch": 18.158539765319425, - "grad_norm": 1.4279576539993286, - "learning_rate": 6.552060301507538e-05, - "loss": 5.4147, - "step": 34819 - }, - { - "epoch": 18.159061277705344, - "grad_norm": 1.4760702848434448, - "learning_rate": 6.551959798994976e-05, - "loss": 5.3884, - "step": 34820 - }, - { - "epoch": 18.159582790091264, - "grad_norm": 1.4193261861801147, - "learning_rate": 6.551859296482413e-05, - "loss": 5.4898, - "step": 34821 - }, - { - "epoch": 18.160104302477183, - "grad_norm": 1.4374274015426636, - "learning_rate": 6.55175879396985e-05, - "loss": 5.3901, - "step": 34822 - }, - { - "epoch": 18.160625814863103, - "grad_norm": 1.558549404144287, - "learning_rate": 6.551658291457287e-05, - "loss": 4.9953, - "step": 34823 - }, - { - "epoch": 18.161147327249022, - "grad_norm": 1.5603439807891846, - "learning_rate": 6.551557788944724e-05, - "loss": 5.2498, - "step": 34824 - }, - { - "epoch": 18.16166883963494, - "grad_norm": 1.5648305416107178, - "learning_rate": 6.551457286432161e-05, - "loss": 5.5491, - "step": 34825 - }, - { - "epoch": 18.16219035202086, - "grad_norm": 1.36715829372406, - "learning_rate": 6.551356783919599e-05, - "loss": 5.3994, - "step": 34826 - }, - { - "epoch": 18.16271186440678, - "grad_norm": 1.484755516052246, - "learning_rate": 6.551256281407035e-05, - "loss": 5.2725, - "step": 34827 - }, - { - "epoch": 18.1632333767927, - "grad_norm": 1.4975780248641968, - "learning_rate": 6.551155778894472e-05, - "loss": 5.0361, - "step": 34828 - }, - { - "epoch": 18.16375488917862, - "grad_norm": 1.9857290983200073, - "learning_rate": 6.55105527638191e-05, - "loss": 5.5029, - "step": 34829 - }, - { - "epoch": 18.164276401564535, - "grad_norm": 1.5992182493209839, - "learning_rate": 6.550954773869347e-05, - "loss": 5.1238, - "step": 34830 - }, - { - "epoch": 18.164797913950455, - "grad_norm": 1.4990993738174438, - "learning_rate": 6.550854271356785e-05, - "loss": 5.3654, - "step": 34831 - }, - { - "epoch": 18.165319426336374, - "grad_norm": 1.5324193239212036, - "learning_rate": 6.550753768844221e-05, - "loss": 5.1376, - "step": 34832 - }, - { - "epoch": 18.165840938722294, - "grad_norm": 1.504879355430603, - "learning_rate": 6.550653266331659e-05, - "loss": 5.4207, - "step": 34833 - }, - { - "epoch": 18.166362451108213, - "grad_norm": 1.577104926109314, - "learning_rate": 6.550552763819096e-05, - "loss": 5.0177, - "step": 34834 - }, - { - "epoch": 18.166883963494133, - "grad_norm": 1.4338055849075317, - "learning_rate": 6.550452261306533e-05, - "loss": 5.6527, - "step": 34835 - }, - { - "epoch": 18.167405475880052, - "grad_norm": 1.4941486120224, - "learning_rate": 6.55035175879397e-05, - "loss": 5.2445, - "step": 34836 - }, - { - "epoch": 18.16792698826597, - "grad_norm": 1.595936894416809, - "learning_rate": 6.550251256281408e-05, - "loss": 5.4488, - "step": 34837 - }, - { - "epoch": 18.16844850065189, - "grad_norm": 1.5036816596984863, - "learning_rate": 6.550150753768844e-05, - "loss": 5.6253, - "step": 34838 - }, - { - "epoch": 18.16897001303781, - "grad_norm": 1.4664523601531982, - "learning_rate": 6.550050251256282e-05, - "loss": 5.0494, - "step": 34839 - }, - { - "epoch": 18.16949152542373, - "grad_norm": 1.5147820711135864, - "learning_rate": 6.54994974874372e-05, - "loss": 5.2882, - "step": 34840 - }, - { - "epoch": 18.17001303780965, - "grad_norm": 1.4081207513809204, - "learning_rate": 6.549849246231156e-05, - "loss": 5.5182, - "step": 34841 - }, - { - "epoch": 18.170534550195566, - "grad_norm": 1.5154842138290405, - "learning_rate": 6.549748743718594e-05, - "loss": 5.4794, - "step": 34842 - }, - { - "epoch": 18.171056062581485, - "grad_norm": 1.4003199338912964, - "learning_rate": 6.54964824120603e-05, - "loss": 5.3934, - "step": 34843 - }, - { - "epoch": 18.171577574967404, - "grad_norm": 1.4939452409744263, - "learning_rate": 6.549547738693468e-05, - "loss": 5.3668, - "step": 34844 - }, - { - "epoch": 18.172099087353324, - "grad_norm": 1.5782525539398193, - "learning_rate": 6.549447236180904e-05, - "loss": 5.5585, - "step": 34845 - }, - { - "epoch": 18.172620599739243, - "grad_norm": 1.6183388233184814, - "learning_rate": 6.549346733668342e-05, - "loss": 5.0846, - "step": 34846 - }, - { - "epoch": 18.173142112125163, - "grad_norm": 1.4439342021942139, - "learning_rate": 6.549246231155779e-05, - "loss": 5.4851, - "step": 34847 - }, - { - "epoch": 18.173663624511082, - "grad_norm": 1.5396722555160522, - "learning_rate": 6.549145728643216e-05, - "loss": 5.0394, - "step": 34848 - }, - { - "epoch": 18.174185136897, - "grad_norm": 1.549690842628479, - "learning_rate": 6.549045226130653e-05, - "loss": 5.1813, - "step": 34849 - }, - { - "epoch": 18.17470664928292, - "grad_norm": 1.4745891094207764, - "learning_rate": 6.54894472361809e-05, - "loss": 5.2724, - "step": 34850 - }, - { - "epoch": 18.17522816166884, - "grad_norm": 1.4597036838531494, - "learning_rate": 6.548844221105528e-05, - "loss": 5.6108, - "step": 34851 - }, - { - "epoch": 18.17574967405476, - "grad_norm": 1.4187307357788086, - "learning_rate": 6.548743718592966e-05, - "loss": 5.6991, - "step": 34852 - }, - { - "epoch": 18.17627118644068, - "grad_norm": 1.4298580884933472, - "learning_rate": 6.548643216080403e-05, - "loss": 5.3496, - "step": 34853 - }, - { - "epoch": 18.176792698826596, - "grad_norm": 1.548215389251709, - "learning_rate": 6.548542713567839e-05, - "loss": 4.8909, - "step": 34854 - }, - { - "epoch": 18.177314211212515, - "grad_norm": 1.4845858812332153, - "learning_rate": 6.548442211055277e-05, - "loss": 5.1576, - "step": 34855 - }, - { - "epoch": 18.177835723598434, - "grad_norm": 1.492318868637085, - "learning_rate": 6.548341708542713e-05, - "loss": 4.8227, - "step": 34856 - }, - { - "epoch": 18.178357235984354, - "grad_norm": 1.4679827690124512, - "learning_rate": 6.548241206030151e-05, - "loss": 5.2984, - "step": 34857 - }, - { - "epoch": 18.178878748370273, - "grad_norm": 1.3732436895370483, - "learning_rate": 6.548140703517587e-05, - "loss": 5.5394, - "step": 34858 - }, - { - "epoch": 18.179400260756193, - "grad_norm": 1.5245345830917358, - "learning_rate": 6.548040201005025e-05, - "loss": 5.2395, - "step": 34859 - }, - { - "epoch": 18.179921773142112, - "grad_norm": 1.454908013343811, - "learning_rate": 6.547939698492463e-05, - "loss": 5.6756, - "step": 34860 - }, - { - "epoch": 18.180443285528032, - "grad_norm": 1.5772024393081665, - "learning_rate": 6.547839195979901e-05, - "loss": 5.3701, - "step": 34861 - }, - { - "epoch": 18.18096479791395, - "grad_norm": 1.4381818771362305, - "learning_rate": 6.547738693467337e-05, - "loss": 5.4803, - "step": 34862 - }, - { - "epoch": 18.18148631029987, - "grad_norm": 1.529289722442627, - "learning_rate": 6.547638190954775e-05, - "loss": 5.4907, - "step": 34863 - }, - { - "epoch": 18.18200782268579, - "grad_norm": 1.5543384552001953, - "learning_rate": 6.547537688442211e-05, - "loss": 5.3666, - "step": 34864 - }, - { - "epoch": 18.18252933507171, - "grad_norm": 1.6258039474487305, - "learning_rate": 6.547437185929649e-05, - "loss": 5.0658, - "step": 34865 - }, - { - "epoch": 18.183050847457626, - "grad_norm": 1.5146974325180054, - "learning_rate": 6.547336683417086e-05, - "loss": 4.8877, - "step": 34866 - }, - { - "epoch": 18.183572359843545, - "grad_norm": 1.5242600440979004, - "learning_rate": 6.547236180904522e-05, - "loss": 5.8429, - "step": 34867 - }, - { - "epoch": 18.184093872229464, - "grad_norm": 1.8700261116027832, - "learning_rate": 6.54713567839196e-05, - "loss": 4.7958, - "step": 34868 - }, - { - "epoch": 18.184615384615384, - "grad_norm": 1.44826340675354, - "learning_rate": 6.547035175879396e-05, - "loss": 5.4566, - "step": 34869 - }, - { - "epoch": 18.185136897001303, - "grad_norm": 1.536860704421997, - "learning_rate": 6.546934673366834e-05, - "loss": 5.2347, - "step": 34870 - }, - { - "epoch": 18.185658409387223, - "grad_norm": 1.4871758222579956, - "learning_rate": 6.546834170854272e-05, - "loss": 4.9105, - "step": 34871 - }, - { - "epoch": 18.186179921773142, - "grad_norm": 1.555174469947815, - "learning_rate": 6.54673366834171e-05, - "loss": 5.0048, - "step": 34872 - }, - { - "epoch": 18.186701434159062, - "grad_norm": 1.4288769960403442, - "learning_rate": 6.546633165829146e-05, - "loss": 5.4919, - "step": 34873 - }, - { - "epoch": 18.18722294654498, - "grad_norm": 1.5099208354949951, - "learning_rate": 6.546532663316584e-05, - "loss": 4.9789, - "step": 34874 - }, - { - "epoch": 18.1877444589309, - "grad_norm": 1.408856749534607, - "learning_rate": 6.54643216080402e-05, - "loss": 5.4968, - "step": 34875 - }, - { - "epoch": 18.18826597131682, - "grad_norm": 1.4189766645431519, - "learning_rate": 6.546331658291458e-05, - "loss": 5.9798, - "step": 34876 - }, - { - "epoch": 18.18878748370274, - "grad_norm": 1.5772254467010498, - "learning_rate": 6.546231155778894e-05, - "loss": 5.4231, - "step": 34877 - }, - { - "epoch": 18.189308996088656, - "grad_norm": 1.5328242778778076, - "learning_rate": 6.546130653266332e-05, - "loss": 5.3255, - "step": 34878 - }, - { - "epoch": 18.189830508474575, - "grad_norm": 1.3975861072540283, - "learning_rate": 6.546030150753769e-05, - "loss": 5.0906, - "step": 34879 - }, - { - "epoch": 18.190352020860495, - "grad_norm": 1.54291832447052, - "learning_rate": 6.545929648241206e-05, - "loss": 5.7682, - "step": 34880 - }, - { - "epoch": 18.190873533246414, - "grad_norm": 1.4059083461761475, - "learning_rate": 6.545829145728644e-05, - "loss": 5.6107, - "step": 34881 - }, - { - "epoch": 18.191395045632333, - "grad_norm": 1.4131250381469727, - "learning_rate": 6.54572864321608e-05, - "loss": 6.0269, - "step": 34882 - }, - { - "epoch": 18.191916558018253, - "grad_norm": 1.5313061475753784, - "learning_rate": 6.545628140703518e-05, - "loss": 4.904, - "step": 34883 - }, - { - "epoch": 18.192438070404172, - "grad_norm": 1.4025726318359375, - "learning_rate": 6.545527638190955e-05, - "loss": 5.6513, - "step": 34884 - }, - { - "epoch": 18.192959582790092, - "grad_norm": 1.5110735893249512, - "learning_rate": 6.545427135678393e-05, - "loss": 5.4355, - "step": 34885 - }, - { - "epoch": 18.19348109517601, - "grad_norm": 1.4641635417938232, - "learning_rate": 6.545326633165829e-05, - "loss": 5.3327, - "step": 34886 - }, - { - "epoch": 18.19400260756193, - "grad_norm": 1.3486363887786865, - "learning_rate": 6.545226130653267e-05, - "loss": 4.8032, - "step": 34887 - }, - { - "epoch": 18.19452411994785, - "grad_norm": 1.432440161705017, - "learning_rate": 6.545125628140703e-05, - "loss": 5.4287, - "step": 34888 - }, - { - "epoch": 18.195045632333766, - "grad_norm": 1.5041319131851196, - "learning_rate": 6.545025125628141e-05, - "loss": 5.8263, - "step": 34889 - }, - { - "epoch": 18.195567144719686, - "grad_norm": 1.626662015914917, - "learning_rate": 6.544924623115578e-05, - "loss": 4.9387, - "step": 34890 - }, - { - "epoch": 18.196088657105605, - "grad_norm": 1.5161420106887817, - "learning_rate": 6.544824120603015e-05, - "loss": 5.394, - "step": 34891 - }, - { - "epoch": 18.196610169491525, - "grad_norm": 1.6143486499786377, - "learning_rate": 6.544723618090453e-05, - "loss": 5.4262, - "step": 34892 - }, - { - "epoch": 18.197131681877444, - "grad_norm": 1.7086371183395386, - "learning_rate": 6.54462311557789e-05, - "loss": 4.7685, - "step": 34893 - }, - { - "epoch": 18.197653194263363, - "grad_norm": 1.5377107858657837, - "learning_rate": 6.544522613065327e-05, - "loss": 4.9892, - "step": 34894 - }, - { - "epoch": 18.198174706649283, - "grad_norm": 1.4700813293457031, - "learning_rate": 6.544422110552764e-05, - "loss": 5.7902, - "step": 34895 - }, - { - "epoch": 18.198696219035202, - "grad_norm": 1.5194870233535767, - "learning_rate": 6.544321608040201e-05, - "loss": 5.3173, - "step": 34896 - }, - { - "epoch": 18.199217731421122, - "grad_norm": 1.4761676788330078, - "learning_rate": 6.544221105527638e-05, - "loss": 5.1231, - "step": 34897 - }, - { - "epoch": 18.19973924380704, - "grad_norm": 1.5172455310821533, - "learning_rate": 6.544120603015076e-05, - "loss": 5.2889, - "step": 34898 - }, - { - "epoch": 18.20026075619296, - "grad_norm": 1.5345903635025024, - "learning_rate": 6.544020100502512e-05, - "loss": 5.1841, - "step": 34899 - }, - { - "epoch": 18.20078226857888, - "grad_norm": 1.39176607131958, - "learning_rate": 6.54391959798995e-05, - "loss": 5.3672, - "step": 34900 - }, - { - "epoch": 18.201303780964796, - "grad_norm": 1.470988392829895, - "learning_rate": 6.543819095477388e-05, - "loss": 5.1991, - "step": 34901 - }, - { - "epoch": 18.201825293350716, - "grad_norm": 1.4883195161819458, - "learning_rate": 6.543718592964825e-05, - "loss": 5.2369, - "step": 34902 - }, - { - "epoch": 18.202346805736635, - "grad_norm": 1.4885742664337158, - "learning_rate": 6.543618090452262e-05, - "loss": 4.8383, - "step": 34903 - }, - { - "epoch": 18.202868318122555, - "grad_norm": 1.5049000978469849, - "learning_rate": 6.5435175879397e-05, - "loss": 4.538, - "step": 34904 - }, - { - "epoch": 18.203389830508474, - "grad_norm": 1.4914672374725342, - "learning_rate": 6.543417085427136e-05, - "loss": 5.2485, - "step": 34905 - }, - { - "epoch": 18.203911342894393, - "grad_norm": 1.3827276229858398, - "learning_rate": 6.543316582914574e-05, - "loss": 5.4979, - "step": 34906 - }, - { - "epoch": 18.204432855280313, - "grad_norm": 1.4302587509155273, - "learning_rate": 6.54321608040201e-05, - "loss": 5.6093, - "step": 34907 - }, - { - "epoch": 18.204954367666232, - "grad_norm": 1.3937782049179077, - "learning_rate": 6.543115577889447e-05, - "loss": 5.6041, - "step": 34908 - }, - { - "epoch": 18.205475880052152, - "grad_norm": 1.589322805404663, - "learning_rate": 6.543015075376885e-05, - "loss": 5.1134, - "step": 34909 - }, - { - "epoch": 18.20599739243807, - "grad_norm": 1.413887619972229, - "learning_rate": 6.542914572864321e-05, - "loss": 4.9726, - "step": 34910 - }, - { - "epoch": 18.20651890482399, - "grad_norm": 1.5907660722732544, - "learning_rate": 6.542814070351759e-05, - "loss": 4.744, - "step": 34911 - }, - { - "epoch": 18.20704041720991, - "grad_norm": 1.653412938117981, - "learning_rate": 6.542713567839197e-05, - "loss": 4.5515, - "step": 34912 - }, - { - "epoch": 18.207561929595826, - "grad_norm": 1.493399739265442, - "learning_rate": 6.542613065326634e-05, - "loss": 5.2282, - "step": 34913 - }, - { - "epoch": 18.208083441981746, - "grad_norm": 1.5394561290740967, - "learning_rate": 6.542512562814071e-05, - "loss": 5.3628, - "step": 34914 - }, - { - "epoch": 18.208604954367665, - "grad_norm": 1.549996256828308, - "learning_rate": 6.542412060301509e-05, - "loss": 4.5695, - "step": 34915 - }, - { - "epoch": 18.209126466753585, - "grad_norm": 1.6083146333694458, - "learning_rate": 6.542311557788945e-05, - "loss": 5.2977, - "step": 34916 - }, - { - "epoch": 18.209647979139504, - "grad_norm": 1.5066291093826294, - "learning_rate": 6.542211055276383e-05, - "loss": 5.3512, - "step": 34917 - }, - { - "epoch": 18.210169491525424, - "grad_norm": 1.6417279243469238, - "learning_rate": 6.542110552763819e-05, - "loss": 4.5723, - "step": 34918 - }, - { - "epoch": 18.210691003911343, - "grad_norm": 1.4482747316360474, - "learning_rate": 6.542010050251257e-05, - "loss": 4.2383, - "step": 34919 - }, - { - "epoch": 18.211212516297262, - "grad_norm": 1.6391679048538208, - "learning_rate": 6.541909547738693e-05, - "loss": 5.3475, - "step": 34920 - }, - { - "epoch": 18.211734028683182, - "grad_norm": 1.5966691970825195, - "learning_rate": 6.54180904522613e-05, - "loss": 4.5324, - "step": 34921 - }, - { - "epoch": 18.2122555410691, - "grad_norm": 1.5638182163238525, - "learning_rate": 6.541708542713568e-05, - "loss": 5.2908, - "step": 34922 - }, - { - "epoch": 18.21277705345502, - "grad_norm": 1.4671496152877808, - "learning_rate": 6.541608040201005e-05, - "loss": 5.349, - "step": 34923 - }, - { - "epoch": 18.21329856584094, - "grad_norm": 1.4323121309280396, - "learning_rate": 6.541507537688443e-05, - "loss": 5.4634, - "step": 34924 - }, - { - "epoch": 18.213820078226856, - "grad_norm": 1.4304111003875732, - "learning_rate": 6.54140703517588e-05, - "loss": 5.3667, - "step": 34925 - }, - { - "epoch": 18.214341590612776, - "grad_norm": 1.5397720336914062, - "learning_rate": 6.541306532663317e-05, - "loss": 5.4003, - "step": 34926 - }, - { - "epoch": 18.214863102998695, - "grad_norm": 1.5086771249771118, - "learning_rate": 6.541206030150754e-05, - "loss": 4.8089, - "step": 34927 - }, - { - "epoch": 18.215384615384615, - "grad_norm": 1.9160431623458862, - "learning_rate": 6.541105527638192e-05, - "loss": 4.9003, - "step": 34928 - }, - { - "epoch": 18.215906127770534, - "grad_norm": 1.5085468292236328, - "learning_rate": 6.541005025125628e-05, - "loss": 5.5162, - "step": 34929 - }, - { - "epoch": 18.216427640156454, - "grad_norm": 1.4818345308303833, - "learning_rate": 6.540904522613066e-05, - "loss": 5.0746, - "step": 34930 - }, - { - "epoch": 18.216949152542373, - "grad_norm": 1.4832135438919067, - "learning_rate": 6.540804020100502e-05, - "loss": 5.2095, - "step": 34931 - }, - { - "epoch": 18.217470664928292, - "grad_norm": 1.4490822553634644, - "learning_rate": 6.54070351758794e-05, - "loss": 5.7031, - "step": 34932 - }, - { - "epoch": 18.217992177314212, - "grad_norm": 1.4371428489685059, - "learning_rate": 6.540603015075378e-05, - "loss": 5.84, - "step": 34933 - }, - { - "epoch": 18.21851368970013, - "grad_norm": 1.4846360683441162, - "learning_rate": 6.540502512562814e-05, - "loss": 5.2933, - "step": 34934 - }, - { - "epoch": 18.21903520208605, - "grad_norm": 1.3966889381408691, - "learning_rate": 6.540402010050252e-05, - "loss": 5.3803, - "step": 34935 - }, - { - "epoch": 18.21955671447197, - "grad_norm": 1.4686295986175537, - "learning_rate": 6.540301507537688e-05, - "loss": 4.9963, - "step": 34936 - }, - { - "epoch": 18.220078226857886, - "grad_norm": 1.4865243434906006, - "learning_rate": 6.540201005025126e-05, - "loss": 5.3597, - "step": 34937 - }, - { - "epoch": 18.220599739243806, - "grad_norm": 1.4970570802688599, - "learning_rate": 6.540100502512563e-05, - "loss": 5.1109, - "step": 34938 - }, - { - "epoch": 18.221121251629725, - "grad_norm": 1.5543824434280396, - "learning_rate": 6.54e-05, - "loss": 5.633, - "step": 34939 - }, - { - "epoch": 18.221642764015645, - "grad_norm": 1.444128394126892, - "learning_rate": 6.539899497487437e-05, - "loss": 5.4458, - "step": 34940 - }, - { - "epoch": 18.222164276401564, - "grad_norm": 1.373309850692749, - "learning_rate": 6.539798994974875e-05, - "loss": 5.5022, - "step": 34941 - }, - { - "epoch": 18.222685788787484, - "grad_norm": 1.6551028490066528, - "learning_rate": 6.539698492462311e-05, - "loss": 4.9663, - "step": 34942 - }, - { - "epoch": 18.223207301173403, - "grad_norm": 1.4227426052093506, - "learning_rate": 6.539597989949749e-05, - "loss": 5.7229, - "step": 34943 - }, - { - "epoch": 18.223728813559323, - "grad_norm": 1.4319027662277222, - "learning_rate": 6.539497487437187e-05, - "loss": 5.4229, - "step": 34944 - }, - { - "epoch": 18.224250325945242, - "grad_norm": 1.489188313484192, - "learning_rate": 6.539396984924624e-05, - "loss": 5.5823, - "step": 34945 - }, - { - "epoch": 18.22477183833116, - "grad_norm": 1.490453839302063, - "learning_rate": 6.539296482412061e-05, - "loss": 5.3553, - "step": 34946 - }, - { - "epoch": 18.22529335071708, - "grad_norm": 1.4017893075942993, - "learning_rate": 6.539195979899497e-05, - "loss": 5.2463, - "step": 34947 - }, - { - "epoch": 18.225814863103, - "grad_norm": 1.5393755435943604, - "learning_rate": 6.539095477386935e-05, - "loss": 5.6232, - "step": 34948 - }, - { - "epoch": 18.226336375488916, - "grad_norm": 1.4903268814086914, - "learning_rate": 6.538994974874371e-05, - "loss": 5.2934, - "step": 34949 - }, - { - "epoch": 18.226857887874836, - "grad_norm": 1.4268423318862915, - "learning_rate": 6.538894472361809e-05, - "loss": 5.7189, - "step": 34950 - }, - { - "epoch": 18.227379400260755, - "grad_norm": 1.5113489627838135, - "learning_rate": 6.538793969849246e-05, - "loss": 5.3047, - "step": 34951 - }, - { - "epoch": 18.227900912646675, - "grad_norm": 1.667169213294983, - "learning_rate": 6.538693467336683e-05, - "loss": 5.182, - "step": 34952 - }, - { - "epoch": 18.228422425032594, - "grad_norm": 1.5584176778793335, - "learning_rate": 6.538592964824121e-05, - "loss": 4.8539, - "step": 34953 - }, - { - "epoch": 18.228943937418514, - "grad_norm": 1.6588366031646729, - "learning_rate": 6.538492462311559e-05, - "loss": 5.1831, - "step": 34954 - }, - { - "epoch": 18.229465449804433, - "grad_norm": 1.480884313583374, - "learning_rate": 6.538391959798995e-05, - "loss": 5.1815, - "step": 34955 - }, - { - "epoch": 18.229986962190353, - "grad_norm": 1.6008315086364746, - "learning_rate": 6.538291457286433e-05, - "loss": 4.819, - "step": 34956 - }, - { - "epoch": 18.230508474576272, - "grad_norm": 1.616977572441101, - "learning_rate": 6.53819095477387e-05, - "loss": 5.3242, - "step": 34957 - }, - { - "epoch": 18.23102998696219, - "grad_norm": 1.4958577156066895, - "learning_rate": 6.538090452261307e-05, - "loss": 4.8731, - "step": 34958 - }, - { - "epoch": 18.23155149934811, - "grad_norm": 1.6139802932739258, - "learning_rate": 6.537989949748744e-05, - "loss": 5.3742, - "step": 34959 - }, - { - "epoch": 18.23207301173403, - "grad_norm": 1.5121251344680786, - "learning_rate": 6.53788944723618e-05, - "loss": 5.1271, - "step": 34960 - }, - { - "epoch": 18.232594524119946, - "grad_norm": 1.5334523916244507, - "learning_rate": 6.537788944723618e-05, - "loss": 4.9628, - "step": 34961 - }, - { - "epoch": 18.233116036505866, - "grad_norm": 1.5490156412124634, - "learning_rate": 6.537688442211054e-05, - "loss": 5.1167, - "step": 34962 - }, - { - "epoch": 18.233637548891785, - "grad_norm": 1.4508469104766846, - "learning_rate": 6.537587939698492e-05, - "loss": 5.6574, - "step": 34963 - }, - { - "epoch": 18.234159061277705, - "grad_norm": 1.4784696102142334, - "learning_rate": 6.53748743718593e-05, - "loss": 5.3553, - "step": 34964 - }, - { - "epoch": 18.234680573663624, - "grad_norm": 1.5792509317398071, - "learning_rate": 6.537386934673368e-05, - "loss": 5.1535, - "step": 34965 - }, - { - "epoch": 18.235202086049544, - "grad_norm": 1.5628973245620728, - "learning_rate": 6.537286432160804e-05, - "loss": 5.3379, - "step": 34966 - }, - { - "epoch": 18.235723598435463, - "grad_norm": 1.428718090057373, - "learning_rate": 6.537185929648242e-05, - "loss": 5.2931, - "step": 34967 - }, - { - "epoch": 18.236245110821383, - "grad_norm": 1.4996012449264526, - "learning_rate": 6.537085427135678e-05, - "loss": 5.3151, - "step": 34968 - }, - { - "epoch": 18.236766623207302, - "grad_norm": 1.4652284383773804, - "learning_rate": 6.536984924623116e-05, - "loss": 5.2653, - "step": 34969 - }, - { - "epoch": 18.23728813559322, - "grad_norm": 1.4968318939208984, - "learning_rate": 6.536884422110553e-05, - "loss": 5.2984, - "step": 34970 - }, - { - "epoch": 18.23780964797914, - "grad_norm": 1.414518117904663, - "learning_rate": 6.53678391959799e-05, - "loss": 5.187, - "step": 34971 - }, - { - "epoch": 18.23833116036506, - "grad_norm": 1.383997917175293, - "learning_rate": 6.536683417085427e-05, - "loss": 5.5866, - "step": 34972 - }, - { - "epoch": 18.238852672750976, - "grad_norm": 1.3529374599456787, - "learning_rate": 6.536582914572865e-05, - "loss": 5.7564, - "step": 34973 - }, - { - "epoch": 18.239374185136896, - "grad_norm": 1.5095305442810059, - "learning_rate": 6.536482412060302e-05, - "loss": 5.5031, - "step": 34974 - }, - { - "epoch": 18.239895697522815, - "grad_norm": 1.564564824104309, - "learning_rate": 6.536381909547739e-05, - "loss": 4.8906, - "step": 34975 - }, - { - "epoch": 18.240417209908735, - "grad_norm": 1.481202483177185, - "learning_rate": 6.536281407035177e-05, - "loss": 5.0803, - "step": 34976 - }, - { - "epoch": 18.240938722294654, - "grad_norm": 1.4377580881118774, - "learning_rate": 6.536180904522613e-05, - "loss": 5.6235, - "step": 34977 - }, - { - "epoch": 18.241460234680574, - "grad_norm": 1.5630308389663696, - "learning_rate": 6.536080402010051e-05, - "loss": 5.1241, - "step": 34978 - }, - { - "epoch": 18.241981747066493, - "grad_norm": 1.5421499013900757, - "learning_rate": 6.535979899497487e-05, - "loss": 5.3461, - "step": 34979 - }, - { - "epoch": 18.242503259452413, - "grad_norm": 1.4625457525253296, - "learning_rate": 6.535879396984925e-05, - "loss": 5.1414, - "step": 34980 - }, - { - "epoch": 18.243024771838332, - "grad_norm": 1.4876269102096558, - "learning_rate": 6.535778894472362e-05, - "loss": 5.542, - "step": 34981 - }, - { - "epoch": 18.24354628422425, - "grad_norm": 1.6286735534667969, - "learning_rate": 6.535678391959799e-05, - "loss": 5.1831, - "step": 34982 - }, - { - "epoch": 18.24406779661017, - "grad_norm": 1.5157155990600586, - "learning_rate": 6.535577889447236e-05, - "loss": 5.4166, - "step": 34983 - }, - { - "epoch": 18.24458930899609, - "grad_norm": 1.5572270154953003, - "learning_rate": 6.535477386934674e-05, - "loss": 4.8215, - "step": 34984 - }, - { - "epoch": 18.245110821382006, - "grad_norm": 1.6048136949539185, - "learning_rate": 6.535376884422111e-05, - "loss": 4.9738, - "step": 34985 - }, - { - "epoch": 18.245632333767926, - "grad_norm": 1.459631323814392, - "learning_rate": 6.535276381909549e-05, - "loss": 5.3908, - "step": 34986 - }, - { - "epoch": 18.246153846153845, - "grad_norm": 1.38510000705719, - "learning_rate": 6.535175879396986e-05, - "loss": 5.4302, - "step": 34987 - }, - { - "epoch": 18.246675358539765, - "grad_norm": 1.457414150238037, - "learning_rate": 6.535075376884422e-05, - "loss": 5.4087, - "step": 34988 - }, - { - "epoch": 18.247196870925684, - "grad_norm": 1.522373914718628, - "learning_rate": 6.53497487437186e-05, - "loss": 5.2812, - "step": 34989 - }, - { - "epoch": 18.247718383311604, - "grad_norm": 1.6016508340835571, - "learning_rate": 6.534874371859296e-05, - "loss": 5.1616, - "step": 34990 - }, - { - "epoch": 18.248239895697523, - "grad_norm": 1.5636253356933594, - "learning_rate": 6.534773869346734e-05, - "loss": 5.4699, - "step": 34991 - }, - { - "epoch": 18.248761408083443, - "grad_norm": 1.3814116716384888, - "learning_rate": 6.53467336683417e-05, - "loss": 5.6258, - "step": 34992 - }, - { - "epoch": 18.249282920469362, - "grad_norm": 1.4744046926498413, - "learning_rate": 6.534572864321608e-05, - "loss": 5.1171, - "step": 34993 - }, - { - "epoch": 18.24980443285528, - "grad_norm": 1.4139642715454102, - "learning_rate": 6.534472361809046e-05, - "loss": 5.5253, - "step": 34994 - }, - { - "epoch": 18.2503259452412, - "grad_norm": 1.4739009141921997, - "learning_rate": 6.534371859296484e-05, - "loss": 5.2792, - "step": 34995 - }, - { - "epoch": 18.250847457627117, - "grad_norm": 1.4067747592926025, - "learning_rate": 6.53427135678392e-05, - "loss": 5.6618, - "step": 34996 - }, - { - "epoch": 18.251368970013036, - "grad_norm": 1.541038990020752, - "learning_rate": 6.534170854271358e-05, - "loss": 5.1972, - "step": 34997 - }, - { - "epoch": 18.251890482398956, - "grad_norm": 1.5615936517715454, - "learning_rate": 6.534070351758794e-05, - "loss": 5.0422, - "step": 34998 - }, - { - "epoch": 18.252411994784875, - "grad_norm": 2.518430233001709, - "learning_rate": 6.533969849246232e-05, - "loss": 4.4554, - "step": 34999 - }, - { - "epoch": 18.252933507170795, - "grad_norm": 1.599814772605896, - "learning_rate": 6.533869346733669e-05, - "loss": 4.9589, - "step": 35000 - }, - { - "epoch": 18.252933507170795, - "eval_loss": 5.407883644104004, - "eval_runtime": 42.6695, - "eval_samples_per_second": 28.732, - "eval_steps_per_second": 3.609, - "step": 35000 - }, - { - "epoch": 18.253455019556714, - "grad_norm": 1.4744583368301392, - "learning_rate": 6.533768844221105e-05, - "loss": 5.6146, - "step": 35001 - }, - { - "epoch": 18.253976531942634, - "grad_norm": 1.5093780755996704, - "learning_rate": 6.533668341708543e-05, - "loss": 5.5566, - "step": 35002 - }, - { - "epoch": 18.254498044328553, - "grad_norm": 1.4635560512542725, - "learning_rate": 6.533567839195979e-05, - "loss": 4.7602, - "step": 35003 - }, - { - "epoch": 18.255019556714473, - "grad_norm": 1.4679769277572632, - "learning_rate": 6.533467336683417e-05, - "loss": 5.2049, - "step": 35004 - }, - { - "epoch": 18.255541069100392, - "grad_norm": 1.4327971935272217, - "learning_rate": 6.533366834170855e-05, - "loss": 5.5009, - "step": 35005 - }, - { - "epoch": 18.25606258148631, - "grad_norm": 1.4482378959655762, - "learning_rate": 6.533266331658293e-05, - "loss": 5.3461, - "step": 35006 - }, - { - "epoch": 18.25658409387223, - "grad_norm": 1.4593786001205444, - "learning_rate": 6.533165829145729e-05, - "loss": 5.2237, - "step": 35007 - }, - { - "epoch": 18.257105606258147, - "grad_norm": 1.5640251636505127, - "learning_rate": 6.533065326633167e-05, - "loss": 5.0657, - "step": 35008 - }, - { - "epoch": 18.257627118644066, - "grad_norm": 1.5037530660629272, - "learning_rate": 6.532964824120603e-05, - "loss": 4.9187, - "step": 35009 - }, - { - "epoch": 18.258148631029986, - "grad_norm": 1.3998653888702393, - "learning_rate": 6.532864321608041e-05, - "loss": 5.3342, - "step": 35010 - }, - { - "epoch": 18.258670143415905, - "grad_norm": 1.4953688383102417, - "learning_rate": 6.532763819095477e-05, - "loss": 5.0601, - "step": 35011 - }, - { - "epoch": 18.259191655801825, - "grad_norm": 1.463054895401001, - "learning_rate": 6.532663316582915e-05, - "loss": 5.3887, - "step": 35012 - }, - { - "epoch": 18.259713168187744, - "grad_norm": 1.44282066822052, - "learning_rate": 6.532562814070352e-05, - "loss": 5.3184, - "step": 35013 - }, - { - "epoch": 18.260234680573664, - "grad_norm": 1.399288535118103, - "learning_rate": 6.53246231155779e-05, - "loss": 5.563, - "step": 35014 - }, - { - "epoch": 18.260756192959583, - "grad_norm": 1.474032998085022, - "learning_rate": 6.532361809045227e-05, - "loss": 5.0232, - "step": 35015 - }, - { - "epoch": 18.261277705345503, - "grad_norm": 1.4506990909576416, - "learning_rate": 6.532261306532664e-05, - "loss": 5.6039, - "step": 35016 - }, - { - "epoch": 18.261799217731422, - "grad_norm": 1.3858733177185059, - "learning_rate": 6.532160804020101e-05, - "loss": 5.6229, - "step": 35017 - }, - { - "epoch": 18.26232073011734, - "grad_norm": 1.4483445882797241, - "learning_rate": 6.532060301507538e-05, - "loss": 5.1575, - "step": 35018 - }, - { - "epoch": 18.26284224250326, - "grad_norm": 1.5089054107666016, - "learning_rate": 6.531959798994976e-05, - "loss": 5.6749, - "step": 35019 - }, - { - "epoch": 18.263363754889177, - "grad_norm": 1.5345643758773804, - "learning_rate": 6.531859296482412e-05, - "loss": 4.9062, - "step": 35020 - }, - { - "epoch": 18.263885267275096, - "grad_norm": 1.5761226415634155, - "learning_rate": 6.53175879396985e-05, - "loss": 5.0435, - "step": 35021 - }, - { - "epoch": 18.264406779661016, - "grad_norm": 1.5919557809829712, - "learning_rate": 6.531658291457286e-05, - "loss": 5.1726, - "step": 35022 - }, - { - "epoch": 18.264928292046935, - "grad_norm": 1.417968988418579, - "learning_rate": 6.531557788944724e-05, - "loss": 5.4721, - "step": 35023 - }, - { - "epoch": 18.265449804432855, - "grad_norm": 1.5379531383514404, - "learning_rate": 6.53145728643216e-05, - "loss": 5.6888, - "step": 35024 - }, - { - "epoch": 18.265971316818774, - "grad_norm": 1.709830403327942, - "learning_rate": 6.531356783919598e-05, - "loss": 5.115, - "step": 35025 - }, - { - "epoch": 18.266492829204694, - "grad_norm": 1.543633222579956, - "learning_rate": 6.531256281407036e-05, - "loss": 4.8994, - "step": 35026 - }, - { - "epoch": 18.267014341590613, - "grad_norm": 1.597963571548462, - "learning_rate": 6.531155778894472e-05, - "loss": 5.0651, - "step": 35027 - }, - { - "epoch": 18.267535853976533, - "grad_norm": 1.4522123336791992, - "learning_rate": 6.53105527638191e-05, - "loss": 5.102, - "step": 35028 - }, - { - "epoch": 18.268057366362452, - "grad_norm": 1.4743984937667847, - "learning_rate": 6.530954773869347e-05, - "loss": 5.4168, - "step": 35029 - }, - { - "epoch": 18.26857887874837, - "grad_norm": 1.477513074874878, - "learning_rate": 6.530854271356784e-05, - "loss": 5.2536, - "step": 35030 - }, - { - "epoch": 18.26910039113429, - "grad_norm": 1.4317578077316284, - "learning_rate": 6.530753768844221e-05, - "loss": 5.0996, - "step": 35031 - }, - { - "epoch": 18.269621903520207, - "grad_norm": 1.428789496421814, - "learning_rate": 6.530653266331659e-05, - "loss": 5.117, - "step": 35032 - }, - { - "epoch": 18.270143415906126, - "grad_norm": 1.5860494375228882, - "learning_rate": 6.530552763819095e-05, - "loss": 4.809, - "step": 35033 - }, - { - "epoch": 18.270664928292046, - "grad_norm": 1.4391672611236572, - "learning_rate": 6.530452261306533e-05, - "loss": 4.9687, - "step": 35034 - }, - { - "epoch": 18.271186440677965, - "grad_norm": 1.4418835639953613, - "learning_rate": 6.53035175879397e-05, - "loss": 5.528, - "step": 35035 - }, - { - "epoch": 18.271707953063885, - "grad_norm": 1.4729626178741455, - "learning_rate": 6.530251256281408e-05, - "loss": 5.2565, - "step": 35036 - }, - { - "epoch": 18.272229465449804, - "grad_norm": 1.4033286571502686, - "learning_rate": 6.530150753768845e-05, - "loss": 4.9116, - "step": 35037 - }, - { - "epoch": 18.272750977835724, - "grad_norm": 1.4527310132980347, - "learning_rate": 6.530050251256283e-05, - "loss": 5.459, - "step": 35038 - }, - { - "epoch": 18.273272490221643, - "grad_norm": 1.5872374773025513, - "learning_rate": 6.529949748743719e-05, - "loss": 5.334, - "step": 35039 - }, - { - "epoch": 18.273794002607563, - "grad_norm": 1.3629285097122192, - "learning_rate": 6.529849246231155e-05, - "loss": 5.4291, - "step": 35040 - }, - { - "epoch": 18.274315514993482, - "grad_norm": 1.4781479835510254, - "learning_rate": 6.529748743718593e-05, - "loss": 5.0934, - "step": 35041 - }, - { - "epoch": 18.2748370273794, - "grad_norm": 1.5002739429473877, - "learning_rate": 6.52964824120603e-05, - "loss": 4.7202, - "step": 35042 - }, - { - "epoch": 18.27535853976532, - "grad_norm": 1.6170668601989746, - "learning_rate": 6.529547738693467e-05, - "loss": 5.4367, - "step": 35043 - }, - { - "epoch": 18.275880052151237, - "grad_norm": 1.4809002876281738, - "learning_rate": 6.529447236180904e-05, - "loss": 5.6315, - "step": 35044 - }, - { - "epoch": 18.276401564537156, - "grad_norm": 1.5057294368743896, - "learning_rate": 6.529346733668342e-05, - "loss": 5.1497, - "step": 35045 - }, - { - "epoch": 18.276923076923076, - "grad_norm": 1.4007129669189453, - "learning_rate": 6.52924623115578e-05, - "loss": 5.6067, - "step": 35046 - }, - { - "epoch": 18.277444589308995, - "grad_norm": 1.524448275566101, - "learning_rate": 6.529145728643217e-05, - "loss": 5.753, - "step": 35047 - }, - { - "epoch": 18.277966101694915, - "grad_norm": 1.59407639503479, - "learning_rate": 6.529045226130654e-05, - "loss": 5.0088, - "step": 35048 - }, - { - "epoch": 18.278487614080834, - "grad_norm": 1.3730294704437256, - "learning_rate": 6.528944723618091e-05, - "loss": 5.6526, - "step": 35049 - }, - { - "epoch": 18.279009126466754, - "grad_norm": 1.4471385478973389, - "learning_rate": 6.528844221105528e-05, - "loss": 5.2925, - "step": 35050 - }, - { - "epoch": 18.279530638852673, - "grad_norm": 1.5030394792556763, - "learning_rate": 6.528743718592966e-05, - "loss": 5.6132, - "step": 35051 - }, - { - "epoch": 18.280052151238593, - "grad_norm": 1.5707652568817139, - "learning_rate": 6.528643216080402e-05, - "loss": 5.1568, - "step": 35052 - }, - { - "epoch": 18.280573663624512, - "grad_norm": 1.4697586297988892, - "learning_rate": 6.528542713567839e-05, - "loss": 4.9857, - "step": 35053 - }, - { - "epoch": 18.28109517601043, - "grad_norm": 1.4924544095993042, - "learning_rate": 6.528442211055276e-05, - "loss": 5.4866, - "step": 35054 - }, - { - "epoch": 18.28161668839635, - "grad_norm": 1.5782535076141357, - "learning_rate": 6.528341708542714e-05, - "loss": 4.8994, - "step": 35055 - }, - { - "epoch": 18.282138200782267, - "grad_norm": 1.4191405773162842, - "learning_rate": 6.528241206030152e-05, - "loss": 5.4888, - "step": 35056 - }, - { - "epoch": 18.282659713168186, - "grad_norm": 1.4657505750656128, - "learning_rate": 6.528140703517588e-05, - "loss": 5.043, - "step": 35057 - }, - { - "epoch": 18.283181225554106, - "grad_norm": 1.4569674730300903, - "learning_rate": 6.528040201005026e-05, - "loss": 5.4564, - "step": 35058 - }, - { - "epoch": 18.283702737940025, - "grad_norm": 1.6470770835876465, - "learning_rate": 6.527939698492463e-05, - "loss": 5.2388, - "step": 35059 - }, - { - "epoch": 18.284224250325945, - "grad_norm": 1.5584067106246948, - "learning_rate": 6.5278391959799e-05, - "loss": 5.1508, - "step": 35060 - }, - { - "epoch": 18.284745762711864, - "grad_norm": 1.3580663204193115, - "learning_rate": 6.527738693467337e-05, - "loss": 5.9433, - "step": 35061 - }, - { - "epoch": 18.285267275097784, - "grad_norm": 1.402835488319397, - "learning_rate": 6.527638190954775e-05, - "loss": 5.6628, - "step": 35062 - }, - { - "epoch": 18.285788787483703, - "grad_norm": 1.4722617864608765, - "learning_rate": 6.527537688442211e-05, - "loss": 5.2383, - "step": 35063 - }, - { - "epoch": 18.286310299869623, - "grad_norm": 1.444960594177246, - "learning_rate": 6.527437185929649e-05, - "loss": 5.3096, - "step": 35064 - }, - { - "epoch": 18.286831812255542, - "grad_norm": 1.541443109512329, - "learning_rate": 6.527336683417085e-05, - "loss": 5.2715, - "step": 35065 - }, - { - "epoch": 18.28735332464146, - "grad_norm": 1.509670615196228, - "learning_rate": 6.527236180904523e-05, - "loss": 4.8869, - "step": 35066 - }, - { - "epoch": 18.28787483702738, - "grad_norm": 1.3948801755905151, - "learning_rate": 6.527135678391961e-05, - "loss": 5.5176, - "step": 35067 - }, - { - "epoch": 18.288396349413297, - "grad_norm": 1.4195419549942017, - "learning_rate": 6.527035175879397e-05, - "loss": 5.3927, - "step": 35068 - }, - { - "epoch": 18.288917861799217, - "grad_norm": 1.9095288515090942, - "learning_rate": 6.526934673366835e-05, - "loss": 4.3727, - "step": 35069 - }, - { - "epoch": 18.289439374185136, - "grad_norm": 1.4740098714828491, - "learning_rate": 6.526834170854271e-05, - "loss": 5.4788, - "step": 35070 - }, - { - "epoch": 18.289960886571055, - "grad_norm": 1.5836005210876465, - "learning_rate": 6.526733668341709e-05, - "loss": 5.2222, - "step": 35071 - }, - { - "epoch": 18.290482398956975, - "grad_norm": 1.4758213758468628, - "learning_rate": 6.526633165829146e-05, - "loss": 5.1607, - "step": 35072 - }, - { - "epoch": 18.291003911342894, - "grad_norm": 1.5381370782852173, - "learning_rate": 6.526532663316583e-05, - "loss": 5.0456, - "step": 35073 - }, - { - "epoch": 18.291525423728814, - "grad_norm": 1.5346479415893555, - "learning_rate": 6.52643216080402e-05, - "loss": 5.3359, - "step": 35074 - }, - { - "epoch": 18.292046936114733, - "grad_norm": 1.5240410566329956, - "learning_rate": 6.526331658291458e-05, - "loss": 4.9471, - "step": 35075 - }, - { - "epoch": 18.292568448500653, - "grad_norm": 1.4305683374404907, - "learning_rate": 6.526231155778895e-05, - "loss": 4.8442, - "step": 35076 - }, - { - "epoch": 18.293089960886572, - "grad_norm": 1.5130236148834229, - "learning_rate": 6.526130653266333e-05, - "loss": 5.5346, - "step": 35077 - }, - { - "epoch": 18.29361147327249, - "grad_norm": 1.5122520923614502, - "learning_rate": 6.52603015075377e-05, - "loss": 5.576, - "step": 35078 - }, - { - "epoch": 18.294132985658408, - "grad_norm": 1.3756455183029175, - "learning_rate": 6.525929648241207e-05, - "loss": 5.5373, - "step": 35079 - }, - { - "epoch": 18.294654498044327, - "grad_norm": 1.4993983507156372, - "learning_rate": 6.525829145728644e-05, - "loss": 5.5226, - "step": 35080 - }, - { - "epoch": 18.295176010430247, - "grad_norm": 1.4946441650390625, - "learning_rate": 6.52572864321608e-05, - "loss": 5.3777, - "step": 35081 - }, - { - "epoch": 18.295697522816166, - "grad_norm": 1.5364803075790405, - "learning_rate": 6.525628140703518e-05, - "loss": 5.2701, - "step": 35082 - }, - { - "epoch": 18.296219035202085, - "grad_norm": 1.4952648878097534, - "learning_rate": 6.525527638190954e-05, - "loss": 5.4548, - "step": 35083 - }, - { - "epoch": 18.296740547588005, - "grad_norm": 1.432321310043335, - "learning_rate": 6.525427135678392e-05, - "loss": 5.8266, - "step": 35084 - }, - { - "epoch": 18.297262059973924, - "grad_norm": 1.5266563892364502, - "learning_rate": 6.525326633165829e-05, - "loss": 5.1661, - "step": 35085 - }, - { - "epoch": 18.297783572359844, - "grad_norm": 1.5056899785995483, - "learning_rate": 6.525226130653266e-05, - "loss": 5.6401, - "step": 35086 - }, - { - "epoch": 18.298305084745763, - "grad_norm": 1.594820261001587, - "learning_rate": 6.525125628140704e-05, - "loss": 5.449, - "step": 35087 - }, - { - "epoch": 18.298826597131683, - "grad_norm": 1.616301417350769, - "learning_rate": 6.525025125628142e-05, - "loss": 5.1367, - "step": 35088 - }, - { - "epoch": 18.299348109517602, - "grad_norm": 1.5118192434310913, - "learning_rate": 6.524924623115578e-05, - "loss": 4.9854, - "step": 35089 - }, - { - "epoch": 18.29986962190352, - "grad_norm": 1.4813098907470703, - "learning_rate": 6.524824120603016e-05, - "loss": 5.2121, - "step": 35090 - }, - { - "epoch": 18.300391134289438, - "grad_norm": 1.492374062538147, - "learning_rate": 6.524723618090453e-05, - "loss": 4.7307, - "step": 35091 - }, - { - "epoch": 18.300912646675357, - "grad_norm": 1.51215660572052, - "learning_rate": 6.52462311557789e-05, - "loss": 5.6453, - "step": 35092 - }, - { - "epoch": 18.301434159061277, - "grad_norm": 1.5311630964279175, - "learning_rate": 6.524522613065327e-05, - "loss": 5.7132, - "step": 35093 - }, - { - "epoch": 18.301955671447196, - "grad_norm": 1.5042660236358643, - "learning_rate": 6.524422110552763e-05, - "loss": 5.3321, - "step": 35094 - }, - { - "epoch": 18.302477183833116, - "grad_norm": 1.3796346187591553, - "learning_rate": 6.524321608040201e-05, - "loss": 5.4953, - "step": 35095 - }, - { - "epoch": 18.302998696219035, - "grad_norm": 1.4991424083709717, - "learning_rate": 6.524221105527637e-05, - "loss": 5.2295, - "step": 35096 - }, - { - "epoch": 18.303520208604954, - "grad_norm": 1.557072401046753, - "learning_rate": 6.524120603015075e-05, - "loss": 5.3073, - "step": 35097 - }, - { - "epoch": 18.304041720990874, - "grad_norm": 1.4419294595718384, - "learning_rate": 6.524020100502513e-05, - "loss": 5.7083, - "step": 35098 - }, - { - "epoch": 18.304563233376793, - "grad_norm": 1.5642204284667969, - "learning_rate": 6.523919597989951e-05, - "loss": 4.8507, - "step": 35099 - }, - { - "epoch": 18.305084745762713, - "grad_norm": 1.6245651245117188, - "learning_rate": 6.523819095477387e-05, - "loss": 5.0838, - "step": 35100 - }, - { - "epoch": 18.305606258148632, - "grad_norm": 1.489628791809082, - "learning_rate": 6.523718592964825e-05, - "loss": 5.614, - "step": 35101 - }, - { - "epoch": 18.30612777053455, - "grad_norm": 1.5807197093963623, - "learning_rate": 6.523618090452261e-05, - "loss": 5.003, - "step": 35102 - }, - { - "epoch": 18.306649282920468, - "grad_norm": 1.660123586654663, - "learning_rate": 6.523517587939699e-05, - "loss": 5.0949, - "step": 35103 - }, - { - "epoch": 18.307170795306387, - "grad_norm": 1.5544617176055908, - "learning_rate": 6.523417085427136e-05, - "loss": 4.9605, - "step": 35104 - }, - { - "epoch": 18.307692307692307, - "grad_norm": 1.5405970811843872, - "learning_rate": 6.523316582914573e-05, - "loss": 5.1883, - "step": 35105 - }, - { - "epoch": 18.308213820078226, - "grad_norm": 1.5640230178833008, - "learning_rate": 6.52321608040201e-05, - "loss": 4.8407, - "step": 35106 - }, - { - "epoch": 18.308735332464146, - "grad_norm": 1.4821656942367554, - "learning_rate": 6.523115577889448e-05, - "loss": 5.2668, - "step": 35107 - }, - { - "epoch": 18.309256844850065, - "grad_norm": 1.521856665611267, - "learning_rate": 6.523015075376885e-05, - "loss": 5.5759, - "step": 35108 - }, - { - "epoch": 18.309778357235984, - "grad_norm": 1.3172255754470825, - "learning_rate": 6.522914572864322e-05, - "loss": 5.4853, - "step": 35109 - }, - { - "epoch": 18.310299869621904, - "grad_norm": 1.457727313041687, - "learning_rate": 6.52281407035176e-05, - "loss": 5.345, - "step": 35110 - }, - { - "epoch": 18.310821382007823, - "grad_norm": 1.4871201515197754, - "learning_rate": 6.522713567839196e-05, - "loss": 4.9154, - "step": 35111 - }, - { - "epoch": 18.311342894393743, - "grad_norm": 1.4918783903121948, - "learning_rate": 6.522613065326634e-05, - "loss": 5.4383, - "step": 35112 - }, - { - "epoch": 18.311864406779662, - "grad_norm": 1.4231157302856445, - "learning_rate": 6.52251256281407e-05, - "loss": 5.0122, - "step": 35113 - }, - { - "epoch": 18.312385919165582, - "grad_norm": 1.5278922319412231, - "learning_rate": 6.522412060301508e-05, - "loss": 5.5652, - "step": 35114 - }, - { - "epoch": 18.312907431551498, - "grad_norm": 1.7249114513397217, - "learning_rate": 6.522311557788944e-05, - "loss": 4.6803, - "step": 35115 - }, - { - "epoch": 18.313428943937417, - "grad_norm": 1.52769935131073, - "learning_rate": 6.522211055276382e-05, - "loss": 5.3064, - "step": 35116 - }, - { - "epoch": 18.313950456323337, - "grad_norm": 1.5816044807434082, - "learning_rate": 6.522110552763819e-05, - "loss": 5.1862, - "step": 35117 - }, - { - "epoch": 18.314471968709256, - "grad_norm": 1.4829825162887573, - "learning_rate": 6.522010050251256e-05, - "loss": 5.6199, - "step": 35118 - }, - { - "epoch": 18.314993481095176, - "grad_norm": 1.4761645793914795, - "learning_rate": 6.521909547738694e-05, - "loss": 5.2687, - "step": 35119 - }, - { - "epoch": 18.315514993481095, - "grad_norm": 1.425998330116272, - "learning_rate": 6.52180904522613e-05, - "loss": 4.9591, - "step": 35120 - }, - { - "epoch": 18.316036505867014, - "grad_norm": 1.5633008480072021, - "learning_rate": 6.521708542713568e-05, - "loss": 5.3384, - "step": 35121 - }, - { - "epoch": 18.316558018252934, - "grad_norm": 1.5759329795837402, - "learning_rate": 6.521608040201005e-05, - "loss": 5.0275, - "step": 35122 - }, - { - "epoch": 18.317079530638853, - "grad_norm": 1.451280951499939, - "learning_rate": 6.521507537688443e-05, - "loss": 4.906, - "step": 35123 - }, - { - "epoch": 18.317601043024773, - "grad_norm": 1.5154730081558228, - "learning_rate": 6.521407035175879e-05, - "loss": 5.5236, - "step": 35124 - }, - { - "epoch": 18.318122555410692, - "grad_norm": 1.4819393157958984, - "learning_rate": 6.521306532663317e-05, - "loss": 5.4434, - "step": 35125 - }, - { - "epoch": 18.318644067796612, - "grad_norm": 1.6486972570419312, - "learning_rate": 6.521206030150753e-05, - "loss": 4.8115, - "step": 35126 - }, - { - "epoch": 18.319165580182528, - "grad_norm": 1.51009202003479, - "learning_rate": 6.521105527638191e-05, - "loss": 5.51, - "step": 35127 - }, - { - "epoch": 18.319687092568447, - "grad_norm": 1.591661810874939, - "learning_rate": 6.521005025125629e-05, - "loss": 4.784, - "step": 35128 - }, - { - "epoch": 18.320208604954367, - "grad_norm": 1.5596026182174683, - "learning_rate": 6.520904522613067e-05, - "loss": 4.8631, - "step": 35129 - }, - { - "epoch": 18.320730117340286, - "grad_norm": 1.5759214162826538, - "learning_rate": 6.520804020100503e-05, - "loss": 5.3815, - "step": 35130 - }, - { - "epoch": 18.321251629726206, - "grad_norm": 1.4733295440673828, - "learning_rate": 6.520703517587941e-05, - "loss": 5.177, - "step": 35131 - }, - { - "epoch": 18.321773142112125, - "grad_norm": 1.6423019170761108, - "learning_rate": 6.520603015075377e-05, - "loss": 5.2366, - "step": 35132 - }, - { - "epoch": 18.322294654498045, - "grad_norm": 1.4718286991119385, - "learning_rate": 6.520502512562814e-05, - "loss": 5.5968, - "step": 35133 - }, - { - "epoch": 18.322816166883964, - "grad_norm": 1.524812936782837, - "learning_rate": 6.520402010050251e-05, - "loss": 5.1604, - "step": 35134 - }, - { - "epoch": 18.323337679269883, - "grad_norm": 1.4660134315490723, - "learning_rate": 6.520301507537688e-05, - "loss": 5.3487, - "step": 35135 - }, - { - "epoch": 18.323859191655803, - "grad_norm": 1.5009397268295288, - "learning_rate": 6.520201005025126e-05, - "loss": 5.6135, - "step": 35136 - }, - { - "epoch": 18.324380704041722, - "grad_norm": 1.463448405265808, - "learning_rate": 6.520100502512562e-05, - "loss": 5.4579, - "step": 35137 - }, - { - "epoch": 18.324902216427642, - "grad_norm": 1.4928309917449951, - "learning_rate": 6.52e-05, - "loss": 5.0648, - "step": 35138 - }, - { - "epoch": 18.325423728813558, - "grad_norm": 1.6146224737167358, - "learning_rate": 6.519899497487438e-05, - "loss": 5.3982, - "step": 35139 - }, - { - "epoch": 18.325945241199477, - "grad_norm": 1.488438367843628, - "learning_rate": 6.519798994974875e-05, - "loss": 5.439, - "step": 35140 - }, - { - "epoch": 18.326466753585397, - "grad_norm": 1.4773155450820923, - "learning_rate": 6.519698492462312e-05, - "loss": 4.8582, - "step": 35141 - }, - { - "epoch": 18.326988265971316, - "grad_norm": 1.4975755214691162, - "learning_rate": 6.51959798994975e-05, - "loss": 5.6043, - "step": 35142 - }, - { - "epoch": 18.327509778357236, - "grad_norm": 1.5166904926300049, - "learning_rate": 6.519497487437186e-05, - "loss": 5.2688, - "step": 35143 - }, - { - "epoch": 18.328031290743155, - "grad_norm": 1.5336179733276367, - "learning_rate": 6.519396984924624e-05, - "loss": 5.3195, - "step": 35144 - }, - { - "epoch": 18.328552803129075, - "grad_norm": 1.5348587036132812, - "learning_rate": 6.51929648241206e-05, - "loss": 5.4915, - "step": 35145 - }, - { - "epoch": 18.329074315514994, - "grad_norm": 1.5124449729919434, - "learning_rate": 6.519195979899497e-05, - "loss": 5.049, - "step": 35146 - }, - { - "epoch": 18.329595827900913, - "grad_norm": 1.4915082454681396, - "learning_rate": 6.519095477386935e-05, - "loss": 5.498, - "step": 35147 - }, - { - "epoch": 18.330117340286833, - "grad_norm": 1.4981940984725952, - "learning_rate": 6.518994974874372e-05, - "loss": 5.4586, - "step": 35148 - }, - { - "epoch": 18.330638852672752, - "grad_norm": 1.544198751449585, - "learning_rate": 6.51889447236181e-05, - "loss": 5.1423, - "step": 35149 - }, - { - "epoch": 18.331160365058672, - "grad_norm": 1.4918171167373657, - "learning_rate": 6.518793969849247e-05, - "loss": 5.2995, - "step": 35150 - }, - { - "epoch": 18.331681877444588, - "grad_norm": 1.6736890077590942, - "learning_rate": 6.518693467336684e-05, - "loss": 5.5729, - "step": 35151 - }, - { - "epoch": 18.332203389830507, - "grad_norm": 1.4865087270736694, - "learning_rate": 6.518592964824121e-05, - "loss": 5.2274, - "step": 35152 - }, - { - "epoch": 18.332724902216427, - "grad_norm": 1.4728646278381348, - "learning_rate": 6.518492462311559e-05, - "loss": 5.0253, - "step": 35153 - }, - { - "epoch": 18.333246414602346, - "grad_norm": 1.4921391010284424, - "learning_rate": 6.518391959798995e-05, - "loss": 5.1798, - "step": 35154 - }, - { - "epoch": 18.333767926988266, - "grad_norm": 1.6297333240509033, - "learning_rate": 6.518291457286433e-05, - "loss": 4.8407, - "step": 35155 - }, - { - "epoch": 18.334289439374185, - "grad_norm": 1.4869496822357178, - "learning_rate": 6.518190954773869e-05, - "loss": 5.5811, - "step": 35156 - }, - { - "epoch": 18.334810951760105, - "grad_norm": 1.544710636138916, - "learning_rate": 6.518090452261307e-05, - "loss": 4.7884, - "step": 35157 - }, - { - "epoch": 18.335332464146024, - "grad_norm": 1.5675817728042603, - "learning_rate": 6.517989949748743e-05, - "loss": 5.4433, - "step": 35158 - }, - { - "epoch": 18.335853976531943, - "grad_norm": 1.5041277408599854, - "learning_rate": 6.517889447236181e-05, - "loss": 5.1943, - "step": 35159 - }, - { - "epoch": 18.336375488917863, - "grad_norm": 1.5333248376846313, - "learning_rate": 6.517788944723619e-05, - "loss": 5.3319, - "step": 35160 - }, - { - "epoch": 18.336897001303782, - "grad_norm": 1.5351759195327759, - "learning_rate": 6.517688442211055e-05, - "loss": 5.3293, - "step": 35161 - }, - { - "epoch": 18.3374185136897, - "grad_norm": 1.5114445686340332, - "learning_rate": 6.517587939698493e-05, - "loss": 5.4527, - "step": 35162 - }, - { - "epoch": 18.337940026075618, - "grad_norm": 1.5147082805633545, - "learning_rate": 6.51748743718593e-05, - "loss": 5.5439, - "step": 35163 - }, - { - "epoch": 18.338461538461537, - "grad_norm": 1.598482370376587, - "learning_rate": 6.517386934673367e-05, - "loss": 5.1629, - "step": 35164 - }, - { - "epoch": 18.338983050847457, - "grad_norm": 1.5303378105163574, - "learning_rate": 6.517286432160804e-05, - "loss": 5.0091, - "step": 35165 - }, - { - "epoch": 18.339504563233376, - "grad_norm": 1.6546776294708252, - "learning_rate": 6.517185929648242e-05, - "loss": 4.9706, - "step": 35166 - }, - { - "epoch": 18.340026075619296, - "grad_norm": 1.5059641599655151, - "learning_rate": 6.517085427135678e-05, - "loss": 5.0661, - "step": 35167 - }, - { - "epoch": 18.340547588005215, - "grad_norm": 1.449028730392456, - "learning_rate": 6.516984924623116e-05, - "loss": 5.6342, - "step": 35168 - }, - { - "epoch": 18.341069100391135, - "grad_norm": 1.4861338138580322, - "learning_rate": 6.516884422110554e-05, - "loss": 4.6347, - "step": 35169 - }, - { - "epoch": 18.341590612777054, - "grad_norm": 1.4365317821502686, - "learning_rate": 6.516783919597991e-05, - "loss": 5.3018, - "step": 35170 - }, - { - "epoch": 18.342112125162974, - "grad_norm": 1.9669784307479858, - "learning_rate": 6.516683417085428e-05, - "loss": 4.9078, - "step": 35171 - }, - { - "epoch": 18.342633637548893, - "grad_norm": 1.4798266887664795, - "learning_rate": 6.516582914572866e-05, - "loss": 5.2858, - "step": 35172 - }, - { - "epoch": 18.343155149934812, - "grad_norm": 1.4691230058670044, - "learning_rate": 6.516482412060302e-05, - "loss": 5.5603, - "step": 35173 - }, - { - "epoch": 18.343676662320732, - "grad_norm": 1.5285770893096924, - "learning_rate": 6.516381909547738e-05, - "loss": 5.2734, - "step": 35174 - }, - { - "epoch": 18.344198174706648, - "grad_norm": 1.438864827156067, - "learning_rate": 6.516281407035176e-05, - "loss": 5.5221, - "step": 35175 - }, - { - "epoch": 18.344719687092567, - "grad_norm": 1.5140881538391113, - "learning_rate": 6.516180904522613e-05, - "loss": 5.5927, - "step": 35176 - }, - { - "epoch": 18.345241199478487, - "grad_norm": 1.5184677839279175, - "learning_rate": 6.51608040201005e-05, - "loss": 5.6412, - "step": 35177 - }, - { - "epoch": 18.345762711864406, - "grad_norm": 1.644676685333252, - "learning_rate": 6.515979899497487e-05, - "loss": 5.5343, - "step": 35178 - }, - { - "epoch": 18.346284224250326, - "grad_norm": 1.6149630546569824, - "learning_rate": 6.515879396984925e-05, - "loss": 5.0041, - "step": 35179 - }, - { - "epoch": 18.346805736636245, - "grad_norm": 1.5279104709625244, - "learning_rate": 6.515778894472362e-05, - "loss": 5.2118, - "step": 35180 - }, - { - "epoch": 18.347327249022165, - "grad_norm": 1.440938115119934, - "learning_rate": 6.5156783919598e-05, - "loss": 5.6172, - "step": 35181 - }, - { - "epoch": 18.347848761408084, - "grad_norm": 1.5831209421157837, - "learning_rate": 6.515577889447237e-05, - "loss": 4.6745, - "step": 35182 - }, - { - "epoch": 18.348370273794004, - "grad_norm": 1.6178001165390015, - "learning_rate": 6.515477386934674e-05, - "loss": 4.9988, - "step": 35183 - }, - { - "epoch": 18.348891786179923, - "grad_norm": 1.5885592699050903, - "learning_rate": 6.515376884422111e-05, - "loss": 5.3116, - "step": 35184 - }, - { - "epoch": 18.349413298565842, - "grad_norm": 1.516729712486267, - "learning_rate": 6.515276381909549e-05, - "loss": 5.42, - "step": 35185 - }, - { - "epoch": 18.34993481095176, - "grad_norm": 1.4849730730056763, - "learning_rate": 6.515175879396985e-05, - "loss": 5.3537, - "step": 35186 - }, - { - "epoch": 18.350456323337678, - "grad_norm": 1.6143810749053955, - "learning_rate": 6.515075376884421e-05, - "loss": 5.4134, - "step": 35187 - }, - { - "epoch": 18.350977835723597, - "grad_norm": 1.534790277481079, - "learning_rate": 6.514974874371859e-05, - "loss": 5.0033, - "step": 35188 - }, - { - "epoch": 18.351499348109517, - "grad_norm": 1.7004790306091309, - "learning_rate": 6.514874371859297e-05, - "loss": 5.0305, - "step": 35189 - }, - { - "epoch": 18.352020860495436, - "grad_norm": 1.513424038887024, - "learning_rate": 6.514773869346735e-05, - "loss": 5.1107, - "step": 35190 - }, - { - "epoch": 18.352542372881356, - "grad_norm": 1.425889492034912, - "learning_rate": 6.514673366834171e-05, - "loss": 4.942, - "step": 35191 - }, - { - "epoch": 18.353063885267275, - "grad_norm": 1.4612765312194824, - "learning_rate": 6.514572864321609e-05, - "loss": 5.2527, - "step": 35192 - }, - { - "epoch": 18.353585397653195, - "grad_norm": 1.5380311012268066, - "learning_rate": 6.514472361809045e-05, - "loss": 5.0215, - "step": 35193 - }, - { - "epoch": 18.354106910039114, - "grad_norm": 1.5127739906311035, - "learning_rate": 6.514371859296483e-05, - "loss": 5.4098, - "step": 35194 - }, - { - "epoch": 18.354628422425034, - "grad_norm": 1.4190622568130493, - "learning_rate": 6.51427135678392e-05, - "loss": 5.6236, - "step": 35195 - }, - { - "epoch": 18.355149934810953, - "grad_norm": 1.5216056108474731, - "learning_rate": 6.514170854271357e-05, - "loss": 5.133, - "step": 35196 - }, - { - "epoch": 18.355671447196872, - "grad_norm": 1.4676140546798706, - "learning_rate": 6.514070351758794e-05, - "loss": 5.5132, - "step": 35197 - }, - { - "epoch": 18.35619295958279, - "grad_norm": 1.5536983013153076, - "learning_rate": 6.513969849246232e-05, - "loss": 4.7484, - "step": 35198 - }, - { - "epoch": 18.356714471968708, - "grad_norm": 1.588032603263855, - "learning_rate": 6.513869346733668e-05, - "loss": 4.976, - "step": 35199 - }, - { - "epoch": 18.357235984354627, - "grad_norm": 1.468733549118042, - "learning_rate": 6.513768844221106e-05, - "loss": 5.6676, - "step": 35200 - }, - { - "epoch": 18.357757496740547, - "grad_norm": 1.4783326387405396, - "learning_rate": 6.513668341708544e-05, - "loss": 5.2403, - "step": 35201 - }, - { - "epoch": 18.358279009126466, - "grad_norm": 1.6664625406265259, - "learning_rate": 6.51356783919598e-05, - "loss": 5.0735, - "step": 35202 - }, - { - "epoch": 18.358800521512386, - "grad_norm": 1.547019362449646, - "learning_rate": 6.513467336683418e-05, - "loss": 5.2245, - "step": 35203 - }, - { - "epoch": 18.359322033898305, - "grad_norm": 1.615593433380127, - "learning_rate": 6.513366834170854e-05, - "loss": 4.5041, - "step": 35204 - }, - { - "epoch": 18.359843546284225, - "grad_norm": 1.4702873229980469, - "learning_rate": 6.513266331658292e-05, - "loss": 5.25, - "step": 35205 - }, - { - "epoch": 18.360365058670144, - "grad_norm": 1.5272537469863892, - "learning_rate": 6.513165829145728e-05, - "loss": 5.3529, - "step": 35206 - }, - { - "epoch": 18.360886571056064, - "grad_norm": 1.4862070083618164, - "learning_rate": 6.513065326633166e-05, - "loss": 5.7859, - "step": 35207 - }, - { - "epoch": 18.361408083441983, - "grad_norm": 1.4013558626174927, - "learning_rate": 6.512964824120603e-05, - "loss": 5.6293, - "step": 35208 - }, - { - "epoch": 18.361929595827903, - "grad_norm": 1.678311824798584, - "learning_rate": 6.51286432160804e-05, - "loss": 5.1165, - "step": 35209 - }, - { - "epoch": 18.36245110821382, - "grad_norm": 1.4972341060638428, - "learning_rate": 6.512763819095478e-05, - "loss": 5.3459, - "step": 35210 - }, - { - "epoch": 18.362972620599738, - "grad_norm": 1.4877958297729492, - "learning_rate": 6.512663316582916e-05, - "loss": 5.2868, - "step": 35211 - }, - { - "epoch": 18.363494132985657, - "grad_norm": 1.600679636001587, - "learning_rate": 6.512562814070352e-05, - "loss": 4.9573, - "step": 35212 - }, - { - "epoch": 18.364015645371577, - "grad_norm": 1.6013747453689575, - "learning_rate": 6.512462311557789e-05, - "loss": 5.1246, - "step": 35213 - }, - { - "epoch": 18.364537157757496, - "grad_norm": 1.4502426385879517, - "learning_rate": 6.512361809045227e-05, - "loss": 5.3007, - "step": 35214 - }, - { - "epoch": 18.365058670143416, - "grad_norm": 1.5188653469085693, - "learning_rate": 6.512261306532663e-05, - "loss": 5.8121, - "step": 35215 - }, - { - "epoch": 18.365580182529335, - "grad_norm": 1.6589570045471191, - "learning_rate": 6.512160804020101e-05, - "loss": 5.2324, - "step": 35216 - }, - { - "epoch": 18.366101694915255, - "grad_norm": 1.7599300146102905, - "learning_rate": 6.512060301507537e-05, - "loss": 5.1238, - "step": 35217 - }, - { - "epoch": 18.366623207301174, - "grad_norm": 1.5365235805511475, - "learning_rate": 6.511959798994975e-05, - "loss": 4.8656, - "step": 35218 - }, - { - "epoch": 18.367144719687094, - "grad_norm": 1.5722875595092773, - "learning_rate": 6.511859296482412e-05, - "loss": 4.9021, - "step": 35219 - }, - { - "epoch": 18.367666232073013, - "grad_norm": 1.3917123079299927, - "learning_rate": 6.511758793969849e-05, - "loss": 5.5974, - "step": 35220 - }, - { - "epoch": 18.368187744458933, - "grad_norm": 1.4243500232696533, - "learning_rate": 6.511658291457287e-05, - "loss": 5.5722, - "step": 35221 - }, - { - "epoch": 18.36870925684485, - "grad_norm": 1.4543038606643677, - "learning_rate": 6.511557788944725e-05, - "loss": 5.3488, - "step": 35222 - }, - { - "epoch": 18.369230769230768, - "grad_norm": 1.4754700660705566, - "learning_rate": 6.511457286432161e-05, - "loss": 5.2769, - "step": 35223 - }, - { - "epoch": 18.369752281616687, - "grad_norm": 1.427228331565857, - "learning_rate": 6.511356783919599e-05, - "loss": 5.4373, - "step": 35224 - }, - { - "epoch": 18.370273794002607, - "grad_norm": 1.4878312349319458, - "learning_rate": 6.511256281407036e-05, - "loss": 5.1905, - "step": 35225 - }, - { - "epoch": 18.370795306388526, - "grad_norm": 1.4094617366790771, - "learning_rate": 6.511155778894472e-05, - "loss": 5.2975, - "step": 35226 - }, - { - "epoch": 18.371316818774446, - "grad_norm": 1.4788113832473755, - "learning_rate": 6.51105527638191e-05, - "loss": 5.7644, - "step": 35227 - }, - { - "epoch": 18.371838331160365, - "grad_norm": 1.34215247631073, - "learning_rate": 6.510954773869346e-05, - "loss": 5.6988, - "step": 35228 - }, - { - "epoch": 18.372359843546285, - "grad_norm": 1.4588916301727295, - "learning_rate": 6.510854271356784e-05, - "loss": 5.3242, - "step": 35229 - }, - { - "epoch": 18.372881355932204, - "grad_norm": 1.528038740158081, - "learning_rate": 6.510753768844222e-05, - "loss": 5.3378, - "step": 35230 - }, - { - "epoch": 18.373402868318124, - "grad_norm": 1.4804543256759644, - "learning_rate": 6.51065326633166e-05, - "loss": 5.1651, - "step": 35231 - }, - { - "epoch": 18.373924380704043, - "grad_norm": 1.5230283737182617, - "learning_rate": 6.510552763819096e-05, - "loss": 5.029, - "step": 35232 - }, - { - "epoch": 18.374445893089963, - "grad_norm": 1.5226306915283203, - "learning_rate": 6.510452261306534e-05, - "loss": 5.6098, - "step": 35233 - }, - { - "epoch": 18.37496740547588, - "grad_norm": 1.435072422027588, - "learning_rate": 6.51035175879397e-05, - "loss": 5.1473, - "step": 35234 - }, - { - "epoch": 18.375488917861798, - "grad_norm": 1.4489738941192627, - "learning_rate": 6.510251256281408e-05, - "loss": 4.9171, - "step": 35235 - }, - { - "epoch": 18.376010430247717, - "grad_norm": 1.5729494094848633, - "learning_rate": 6.510150753768844e-05, - "loss": 5.4577, - "step": 35236 - }, - { - "epoch": 18.376531942633637, - "grad_norm": 1.5707871913909912, - "learning_rate": 6.510050251256282e-05, - "loss": 5.6443, - "step": 35237 - }, - { - "epoch": 18.377053455019556, - "grad_norm": 1.555337905883789, - "learning_rate": 6.509949748743719e-05, - "loss": 5.2593, - "step": 35238 - }, - { - "epoch": 18.377574967405476, - "grad_norm": 1.600921630859375, - "learning_rate": 6.509849246231155e-05, - "loss": 4.8542, - "step": 35239 - }, - { - "epoch": 18.378096479791395, - "grad_norm": 1.6057699918746948, - "learning_rate": 6.509748743718593e-05, - "loss": 5.3746, - "step": 35240 - }, - { - "epoch": 18.378617992177315, - "grad_norm": 1.6773979663848877, - "learning_rate": 6.50964824120603e-05, - "loss": 5.1454, - "step": 35241 - }, - { - "epoch": 18.379139504563234, - "grad_norm": 1.441369891166687, - "learning_rate": 6.509547738693468e-05, - "loss": 5.3756, - "step": 35242 - }, - { - "epoch": 18.379661016949154, - "grad_norm": 1.4600188732147217, - "learning_rate": 6.509447236180905e-05, - "loss": 5.2631, - "step": 35243 - }, - { - "epoch": 18.380182529335073, - "grad_norm": 1.449599027633667, - "learning_rate": 6.509346733668343e-05, - "loss": 5.4044, - "step": 35244 - }, - { - "epoch": 18.380704041720993, - "grad_norm": 1.607900619506836, - "learning_rate": 6.509246231155779e-05, - "loss": 5.5113, - "step": 35245 - }, - { - "epoch": 18.38122555410691, - "grad_norm": 1.5998753309249878, - "learning_rate": 6.509145728643217e-05, - "loss": 4.9982, - "step": 35246 - }, - { - "epoch": 18.381747066492828, - "grad_norm": 1.419301986694336, - "learning_rate": 6.509045226130653e-05, - "loss": 5.174, - "step": 35247 - }, - { - "epoch": 18.382268578878747, - "grad_norm": 1.4373308420181274, - "learning_rate": 6.508944723618091e-05, - "loss": 5.3976, - "step": 35248 - }, - { - "epoch": 18.382790091264667, - "grad_norm": 1.8252958059310913, - "learning_rate": 6.508844221105527e-05, - "loss": 4.9375, - "step": 35249 - }, - { - "epoch": 18.383311603650586, - "grad_norm": 1.4874064922332764, - "learning_rate": 6.508743718592965e-05, - "loss": 5.4297, - "step": 35250 - }, - { - "epoch": 18.383833116036506, - "grad_norm": 1.4181715250015259, - "learning_rate": 6.508643216080402e-05, - "loss": 5.5747, - "step": 35251 - }, - { - "epoch": 18.384354628422425, - "grad_norm": 1.4135088920593262, - "learning_rate": 6.50854271356784e-05, - "loss": 4.9607, - "step": 35252 - }, - { - "epoch": 18.384876140808345, - "grad_norm": 1.6797513961791992, - "learning_rate": 6.508442211055277e-05, - "loss": 5.2303, - "step": 35253 - }, - { - "epoch": 18.385397653194264, - "grad_norm": 1.49441397190094, - "learning_rate": 6.508341708542714e-05, - "loss": 5.446, - "step": 35254 - }, - { - "epoch": 18.385919165580184, - "grad_norm": 1.5303007364273071, - "learning_rate": 6.508241206030151e-05, - "loss": 4.9379, - "step": 35255 - }, - { - "epoch": 18.386440677966103, - "grad_norm": 1.5456660985946655, - "learning_rate": 6.508140703517588e-05, - "loss": 5.3577, - "step": 35256 - }, - { - "epoch": 18.386962190352023, - "grad_norm": 1.534050464630127, - "learning_rate": 6.508040201005026e-05, - "loss": 5.0826, - "step": 35257 - }, - { - "epoch": 18.38748370273794, - "grad_norm": 1.5652581453323364, - "learning_rate": 6.507939698492462e-05, - "loss": 5.0116, - "step": 35258 - }, - { - "epoch": 18.388005215123858, - "grad_norm": 1.5942519903182983, - "learning_rate": 6.5078391959799e-05, - "loss": 5.5833, - "step": 35259 - }, - { - "epoch": 18.388526727509777, - "grad_norm": 1.4917103052139282, - "learning_rate": 6.507738693467336e-05, - "loss": 4.8085, - "step": 35260 - }, - { - "epoch": 18.389048239895697, - "grad_norm": 1.506113052368164, - "learning_rate": 6.507638190954774e-05, - "loss": 5.5569, - "step": 35261 - }, - { - "epoch": 18.389569752281616, - "grad_norm": 1.4957773685455322, - "learning_rate": 6.507537688442212e-05, - "loss": 5.0019, - "step": 35262 - }, - { - "epoch": 18.390091264667536, - "grad_norm": 1.501278281211853, - "learning_rate": 6.50743718592965e-05, - "loss": 5.6986, - "step": 35263 - }, - { - "epoch": 18.390612777053455, - "grad_norm": 1.5174301862716675, - "learning_rate": 6.507336683417086e-05, - "loss": 5.0302, - "step": 35264 - }, - { - "epoch": 18.391134289439375, - "grad_norm": 1.4908496141433716, - "learning_rate": 6.507236180904524e-05, - "loss": 5.4177, - "step": 35265 - }, - { - "epoch": 18.391655801825294, - "grad_norm": 1.4700311422348022, - "learning_rate": 6.50713567839196e-05, - "loss": 5.3105, - "step": 35266 - }, - { - "epoch": 18.392177314211214, - "grad_norm": 1.4361238479614258, - "learning_rate": 6.507035175879397e-05, - "loss": 5.1158, - "step": 35267 - }, - { - "epoch": 18.392698826597133, - "grad_norm": 1.5050125122070312, - "learning_rate": 6.506934673366834e-05, - "loss": 5.3698, - "step": 35268 - }, - { - "epoch": 18.39322033898305, - "grad_norm": 1.5255146026611328, - "learning_rate": 6.506834170854271e-05, - "loss": 5.0041, - "step": 35269 - }, - { - "epoch": 18.39374185136897, - "grad_norm": 1.3809155225753784, - "learning_rate": 6.506733668341709e-05, - "loss": 5.1002, - "step": 35270 - }, - { - "epoch": 18.394263363754888, - "grad_norm": 1.5239441394805908, - "learning_rate": 6.506633165829145e-05, - "loss": 4.9689, - "step": 35271 - }, - { - "epoch": 18.394784876140807, - "grad_norm": 1.4743595123291016, - "learning_rate": 6.506532663316583e-05, - "loss": 5.7889, - "step": 35272 - }, - { - "epoch": 18.395306388526727, - "grad_norm": 1.5137022733688354, - "learning_rate": 6.50643216080402e-05, - "loss": 5.3488, - "step": 35273 - }, - { - "epoch": 18.395827900912646, - "grad_norm": 1.4848301410675049, - "learning_rate": 6.506331658291458e-05, - "loss": 5.4443, - "step": 35274 - }, - { - "epoch": 18.396349413298566, - "grad_norm": 1.462069034576416, - "learning_rate": 6.506231155778895e-05, - "loss": 5.7674, - "step": 35275 - }, - { - "epoch": 18.396870925684485, - "grad_norm": 1.4704978466033936, - "learning_rate": 6.506130653266333e-05, - "loss": 5.2562, - "step": 35276 - }, - { - "epoch": 18.397392438070405, - "grad_norm": 1.490984559059143, - "learning_rate": 6.506030150753769e-05, - "loss": 5.2877, - "step": 35277 - }, - { - "epoch": 18.397913950456324, - "grad_norm": 1.5030500888824463, - "learning_rate": 6.505929648241207e-05, - "loss": 5.55, - "step": 35278 - }, - { - "epoch": 18.398435462842244, - "grad_norm": 1.5085588693618774, - "learning_rate": 6.505829145728643e-05, - "loss": 5.1266, - "step": 35279 - }, - { - "epoch": 18.398956975228163, - "grad_norm": 1.4518979787826538, - "learning_rate": 6.50572864321608e-05, - "loss": 5.179, - "step": 35280 - }, - { - "epoch": 18.39947848761408, - "grad_norm": 1.4150406122207642, - "learning_rate": 6.505628140703517e-05, - "loss": 5.5495, - "step": 35281 - }, - { - "epoch": 18.4, - "grad_norm": 1.4545100927352905, - "learning_rate": 6.505527638190955e-05, - "loss": 5.7078, - "step": 35282 - }, - { - "epoch": 18.400521512385918, - "grad_norm": 1.4610793590545654, - "learning_rate": 6.505427135678393e-05, - "loss": 5.5073, - "step": 35283 - }, - { - "epoch": 18.401043024771838, - "grad_norm": 1.5517970323562622, - "learning_rate": 6.50532663316583e-05, - "loss": 5.2492, - "step": 35284 - }, - { - "epoch": 18.401564537157757, - "grad_norm": 1.5593278408050537, - "learning_rate": 6.505226130653267e-05, - "loss": 5.3431, - "step": 35285 - }, - { - "epoch": 18.402086049543676, - "grad_norm": 1.496038556098938, - "learning_rate": 6.505125628140704e-05, - "loss": 4.8134, - "step": 35286 - }, - { - "epoch": 18.402607561929596, - "grad_norm": 1.461105465888977, - "learning_rate": 6.505025125628141e-05, - "loss": 5.3797, - "step": 35287 - }, - { - "epoch": 18.403129074315515, - "grad_norm": 1.5215188264846802, - "learning_rate": 6.504924623115578e-05, - "loss": 5.5118, - "step": 35288 - }, - { - "epoch": 18.403650586701435, - "grad_norm": 1.519545316696167, - "learning_rate": 6.504824120603016e-05, - "loss": 5.0402, - "step": 35289 - }, - { - "epoch": 18.404172099087354, - "grad_norm": 1.4486206769943237, - "learning_rate": 6.504723618090452e-05, - "loss": 5.4524, - "step": 35290 - }, - { - "epoch": 18.404693611473274, - "grad_norm": 1.5370471477508545, - "learning_rate": 6.50462311557789e-05, - "loss": 5.2161, - "step": 35291 - }, - { - "epoch": 18.405215123859193, - "grad_norm": 1.5663869380950928, - "learning_rate": 6.504522613065326e-05, - "loss": 5.285, - "step": 35292 - }, - { - "epoch": 18.40573663624511, - "grad_norm": 1.6275314092636108, - "learning_rate": 6.504422110552764e-05, - "loss": 4.4001, - "step": 35293 - }, - { - "epoch": 18.40625814863103, - "grad_norm": 1.4769930839538574, - "learning_rate": 6.504321608040202e-05, - "loss": 5.482, - "step": 35294 - }, - { - "epoch": 18.406779661016948, - "grad_norm": 1.4571819305419922, - "learning_rate": 6.504221105527638e-05, - "loss": 5.5107, - "step": 35295 - }, - { - "epoch": 18.407301173402868, - "grad_norm": 1.4298310279846191, - "learning_rate": 6.504120603015076e-05, - "loss": 5.6881, - "step": 35296 - }, - { - "epoch": 18.407822685788787, - "grad_norm": 1.5141212940216064, - "learning_rate": 6.504020100502513e-05, - "loss": 5.2055, - "step": 35297 - }, - { - "epoch": 18.408344198174706, - "grad_norm": 1.4593467712402344, - "learning_rate": 6.50391959798995e-05, - "loss": 5.7636, - "step": 35298 - }, - { - "epoch": 18.408865710560626, - "grad_norm": 1.5348776578903198, - "learning_rate": 6.503819095477387e-05, - "loss": 5.1077, - "step": 35299 - }, - { - "epoch": 18.409387222946545, - "grad_norm": 1.6128573417663574, - "learning_rate": 6.503718592964825e-05, - "loss": 4.9064, - "step": 35300 - }, - { - "epoch": 18.409908735332465, - "grad_norm": 1.7701175212860107, - "learning_rate": 6.503618090452261e-05, - "loss": 4.4341, - "step": 35301 - }, - { - "epoch": 18.410430247718384, - "grad_norm": 1.4944113492965698, - "learning_rate": 6.503517587939699e-05, - "loss": 5.2663, - "step": 35302 - }, - { - "epoch": 18.410951760104304, - "grad_norm": 1.6672803163528442, - "learning_rate": 6.503417085427136e-05, - "loss": 5.2584, - "step": 35303 - }, - { - "epoch": 18.411473272490223, - "grad_norm": 1.586869478225708, - "learning_rate": 6.503316582914574e-05, - "loss": 5.0572, - "step": 35304 - }, - { - "epoch": 18.41199478487614, - "grad_norm": 1.427100419998169, - "learning_rate": 6.503216080402011e-05, - "loss": 5.6722, - "step": 35305 - }, - { - "epoch": 18.41251629726206, - "grad_norm": 1.4616844654083252, - "learning_rate": 6.503115577889447e-05, - "loss": 5.5391, - "step": 35306 - }, - { - "epoch": 18.413037809647978, - "grad_norm": 1.4312878847122192, - "learning_rate": 6.503015075376885e-05, - "loss": 5.2727, - "step": 35307 - }, - { - "epoch": 18.413559322033898, - "grad_norm": 1.4902793169021606, - "learning_rate": 6.502914572864321e-05, - "loss": 5.5041, - "step": 35308 - }, - { - "epoch": 18.414080834419817, - "grad_norm": 1.493479609489441, - "learning_rate": 6.502814070351759e-05, - "loss": 5.294, - "step": 35309 - }, - { - "epoch": 18.414602346805736, - "grad_norm": 1.519619107246399, - "learning_rate": 6.502713567839196e-05, - "loss": 5.171, - "step": 35310 - }, - { - "epoch": 18.415123859191656, - "grad_norm": 1.5770891904830933, - "learning_rate": 6.502613065326633e-05, - "loss": 5.1954, - "step": 35311 - }, - { - "epoch": 18.415645371577575, - "grad_norm": 1.3564246892929077, - "learning_rate": 6.50251256281407e-05, - "loss": 5.2473, - "step": 35312 - }, - { - "epoch": 18.416166883963495, - "grad_norm": 1.5160160064697266, - "learning_rate": 6.502412060301508e-05, - "loss": 5.1289, - "step": 35313 - }, - { - "epoch": 18.416688396349414, - "grad_norm": 1.4503087997436523, - "learning_rate": 6.502311557788945e-05, - "loss": 5.1837, - "step": 35314 - }, - { - "epoch": 18.417209908735334, - "grad_norm": 1.4963157176971436, - "learning_rate": 6.502211055276383e-05, - "loss": 5.743, - "step": 35315 - }, - { - "epoch": 18.417731421121253, - "grad_norm": 1.448970913887024, - "learning_rate": 6.50211055276382e-05, - "loss": 5.6131, - "step": 35316 - }, - { - "epoch": 18.41825293350717, - "grad_norm": 1.5873438119888306, - "learning_rate": 6.502010050251257e-05, - "loss": 5.1507, - "step": 35317 - }, - { - "epoch": 18.41877444589309, - "grad_norm": 1.4814525842666626, - "learning_rate": 6.501909547738694e-05, - "loss": 5.3282, - "step": 35318 - }, - { - "epoch": 18.419295958279008, - "grad_norm": 1.6601386070251465, - "learning_rate": 6.50180904522613e-05, - "loss": 4.3975, - "step": 35319 - }, - { - "epoch": 18.419817470664928, - "grad_norm": 1.5429420471191406, - "learning_rate": 6.501708542713568e-05, - "loss": 5.6248, - "step": 35320 - }, - { - "epoch": 18.420338983050847, - "grad_norm": 1.5141254663467407, - "learning_rate": 6.501608040201004e-05, - "loss": 5.2372, - "step": 35321 - }, - { - "epoch": 18.420860495436767, - "grad_norm": 1.4814646244049072, - "learning_rate": 6.501507537688442e-05, - "loss": 5.1427, - "step": 35322 - }, - { - "epoch": 18.421382007822686, - "grad_norm": 1.4261384010314941, - "learning_rate": 6.50140703517588e-05, - "loss": 5.6754, - "step": 35323 - }, - { - "epoch": 18.421903520208605, - "grad_norm": 1.4649937152862549, - "learning_rate": 6.501306532663318e-05, - "loss": 5.6532, - "step": 35324 - }, - { - "epoch": 18.422425032594525, - "grad_norm": 1.6732724905014038, - "learning_rate": 6.501206030150754e-05, - "loss": 4.7962, - "step": 35325 - }, - { - "epoch": 18.422946544980444, - "grad_norm": 1.4529629945755005, - "learning_rate": 6.501105527638192e-05, - "loss": 5.4328, - "step": 35326 - }, - { - "epoch": 18.423468057366364, - "grad_norm": 1.4552761316299438, - "learning_rate": 6.501005025125628e-05, - "loss": 5.4543, - "step": 35327 - }, - { - "epoch": 18.423989569752283, - "grad_norm": 1.551533579826355, - "learning_rate": 6.500904522613066e-05, - "loss": 5.053, - "step": 35328 - }, - { - "epoch": 18.4245110821382, - "grad_norm": 1.5224037170410156, - "learning_rate": 6.500804020100503e-05, - "loss": 5.4789, - "step": 35329 - }, - { - "epoch": 18.42503259452412, - "grad_norm": 1.4882076978683472, - "learning_rate": 6.50070351758794e-05, - "loss": 5.3402, - "step": 35330 - }, - { - "epoch": 18.425554106910038, - "grad_norm": 1.6162794828414917, - "learning_rate": 6.500603015075377e-05, - "loss": 4.9185, - "step": 35331 - }, - { - "epoch": 18.426075619295958, - "grad_norm": 1.4745005369186401, - "learning_rate": 6.500502512562813e-05, - "loss": 5.422, - "step": 35332 - }, - { - "epoch": 18.426597131681877, - "grad_norm": 1.5176873207092285, - "learning_rate": 6.500402010050251e-05, - "loss": 5.3841, - "step": 35333 - }, - { - "epoch": 18.427118644067797, - "grad_norm": 1.3660645484924316, - "learning_rate": 6.500301507537689e-05, - "loss": 5.867, - "step": 35334 - }, - { - "epoch": 18.427640156453716, - "grad_norm": 1.6045658588409424, - "learning_rate": 6.500201005025127e-05, - "loss": 4.8194, - "step": 35335 - }, - { - "epoch": 18.428161668839635, - "grad_norm": 1.5610361099243164, - "learning_rate": 6.500100502512563e-05, - "loss": 5.2886, - "step": 35336 - }, - { - "epoch": 18.428683181225555, - "grad_norm": 1.614809274673462, - "learning_rate": 6.500000000000001e-05, - "loss": 5.0943, - "step": 35337 - }, - { - "epoch": 18.429204693611474, - "grad_norm": 1.4449462890625, - "learning_rate": 6.499899497487437e-05, - "loss": 5.5332, - "step": 35338 - }, - { - "epoch": 18.429726205997394, - "grad_norm": 1.394823670387268, - "learning_rate": 6.499798994974875e-05, - "loss": 5.6594, - "step": 35339 - }, - { - "epoch": 18.430247718383313, - "grad_norm": 1.4832926988601685, - "learning_rate": 6.499698492462311e-05, - "loss": 5.5112, - "step": 35340 - }, - { - "epoch": 18.43076923076923, - "grad_norm": 1.4940158128738403, - "learning_rate": 6.499597989949749e-05, - "loss": 4.9424, - "step": 35341 - }, - { - "epoch": 18.43129074315515, - "grad_norm": 1.5216963291168213, - "learning_rate": 6.499497487437186e-05, - "loss": 5.3823, - "step": 35342 - }, - { - "epoch": 18.431812255541068, - "grad_norm": 1.4266589879989624, - "learning_rate": 6.499396984924623e-05, - "loss": 5.7538, - "step": 35343 - }, - { - "epoch": 18.432333767926988, - "grad_norm": 1.6211663484573364, - "learning_rate": 6.499296482412061e-05, - "loss": 5.1607, - "step": 35344 - }, - { - "epoch": 18.432855280312907, - "grad_norm": 1.4464757442474365, - "learning_rate": 6.499195979899498e-05, - "loss": 4.7223, - "step": 35345 - }, - { - "epoch": 18.433376792698827, - "grad_norm": 1.4377882480621338, - "learning_rate": 6.499095477386935e-05, - "loss": 5.6721, - "step": 35346 - }, - { - "epoch": 18.433898305084746, - "grad_norm": 1.47059965133667, - "learning_rate": 6.498994974874372e-05, - "loss": 5.567, - "step": 35347 - }, - { - "epoch": 18.434419817470665, - "grad_norm": 1.5374125242233276, - "learning_rate": 6.49889447236181e-05, - "loss": 5.026, - "step": 35348 - }, - { - "epoch": 18.434941329856585, - "grad_norm": 1.520053744316101, - "learning_rate": 6.498793969849246e-05, - "loss": 5.662, - "step": 35349 - }, - { - "epoch": 18.435462842242504, - "grad_norm": 1.554174542427063, - "learning_rate": 6.498693467336684e-05, - "loss": 4.7984, - "step": 35350 - }, - { - "epoch": 18.435984354628424, - "grad_norm": 1.4888135194778442, - "learning_rate": 6.49859296482412e-05, - "loss": 5.654, - "step": 35351 - }, - { - "epoch": 18.43650586701434, - "grad_norm": 1.4486197233200073, - "learning_rate": 6.498492462311558e-05, - "loss": 5.4912, - "step": 35352 - }, - { - "epoch": 18.43702737940026, - "grad_norm": 1.4718955755233765, - "learning_rate": 6.498391959798994e-05, - "loss": 5.4259, - "step": 35353 - }, - { - "epoch": 18.43754889178618, - "grad_norm": 1.5083993673324585, - "learning_rate": 6.498291457286432e-05, - "loss": 4.7653, - "step": 35354 - }, - { - "epoch": 18.438070404172098, - "grad_norm": 1.5512456893920898, - "learning_rate": 6.49819095477387e-05, - "loss": 5.5338, - "step": 35355 - }, - { - "epoch": 18.438591916558018, - "grad_norm": 1.5420596599578857, - "learning_rate": 6.498090452261308e-05, - "loss": 5.3927, - "step": 35356 - }, - { - "epoch": 18.439113428943937, - "grad_norm": 1.65750253200531, - "learning_rate": 6.497989949748744e-05, - "loss": 5.3095, - "step": 35357 - }, - { - "epoch": 18.439634941329857, - "grad_norm": 1.4846305847167969, - "learning_rate": 6.497889447236182e-05, - "loss": 5.3905, - "step": 35358 - }, - { - "epoch": 18.440156453715776, - "grad_norm": 1.5360530614852905, - "learning_rate": 6.497788944723618e-05, - "loss": 4.9349, - "step": 35359 - }, - { - "epoch": 18.440677966101696, - "grad_norm": 1.5036289691925049, - "learning_rate": 6.497688442211055e-05, - "loss": 5.0728, - "step": 35360 - }, - { - "epoch": 18.441199478487615, - "grad_norm": 1.5356239080429077, - "learning_rate": 6.497587939698493e-05, - "loss": 5.6877, - "step": 35361 - }, - { - "epoch": 18.441720990873534, - "grad_norm": 1.5183510780334473, - "learning_rate": 6.497487437185929e-05, - "loss": 5.3498, - "step": 35362 - }, - { - "epoch": 18.442242503259454, - "grad_norm": 1.5315098762512207, - "learning_rate": 6.497386934673367e-05, - "loss": 5.2903, - "step": 35363 - }, - { - "epoch": 18.442764015645373, - "grad_norm": 1.5531846284866333, - "learning_rate": 6.497286432160805e-05, - "loss": 5.2089, - "step": 35364 - }, - { - "epoch": 18.44328552803129, - "grad_norm": 1.4574592113494873, - "learning_rate": 6.497185929648242e-05, - "loss": 5.72, - "step": 35365 - }, - { - "epoch": 18.44380704041721, - "grad_norm": 1.4751760959625244, - "learning_rate": 6.497085427135679e-05, - "loss": 5.3536, - "step": 35366 - }, - { - "epoch": 18.444328552803128, - "grad_norm": 1.452718734741211, - "learning_rate": 6.496984924623117e-05, - "loss": 4.902, - "step": 35367 - }, - { - "epoch": 18.444850065189048, - "grad_norm": 1.4632970094680786, - "learning_rate": 6.496884422110553e-05, - "loss": 5.1402, - "step": 35368 - }, - { - "epoch": 18.445371577574967, - "grad_norm": 1.4100139141082764, - "learning_rate": 6.496783919597991e-05, - "loss": 5.1328, - "step": 35369 - }, - { - "epoch": 18.445893089960887, - "grad_norm": 1.5236485004425049, - "learning_rate": 6.496683417085427e-05, - "loss": 4.778, - "step": 35370 - }, - { - "epoch": 18.446414602346806, - "grad_norm": 1.6898021697998047, - "learning_rate": 6.496582914572865e-05, - "loss": 4.5007, - "step": 35371 - }, - { - "epoch": 18.446936114732726, - "grad_norm": 1.4275513887405396, - "learning_rate": 6.496482412060301e-05, - "loss": 5.5937, - "step": 35372 - }, - { - "epoch": 18.447457627118645, - "grad_norm": 1.4258006811141968, - "learning_rate": 6.496381909547738e-05, - "loss": 5.6156, - "step": 35373 - }, - { - "epoch": 18.447979139504564, - "grad_norm": 1.8907052278518677, - "learning_rate": 6.496281407035176e-05, - "loss": 4.7683, - "step": 35374 - }, - { - "epoch": 18.448500651890484, - "grad_norm": 1.4136124849319458, - "learning_rate": 6.496180904522613e-05, - "loss": 5.559, - "step": 35375 - }, - { - "epoch": 18.4490221642764, - "grad_norm": 1.500968098640442, - "learning_rate": 6.496080402010051e-05, - "loss": 4.9055, - "step": 35376 - }, - { - "epoch": 18.44954367666232, - "grad_norm": 1.4806249141693115, - "learning_rate": 6.495979899497488e-05, - "loss": 5.5007, - "step": 35377 - }, - { - "epoch": 18.45006518904824, - "grad_norm": 1.4415949583053589, - "learning_rate": 6.495879396984925e-05, - "loss": 5.6106, - "step": 35378 - }, - { - "epoch": 18.45058670143416, - "grad_norm": 1.6371302604675293, - "learning_rate": 6.495778894472362e-05, - "loss": 5.0532, - "step": 35379 - }, - { - "epoch": 18.451108213820078, - "grad_norm": 1.4344274997711182, - "learning_rate": 6.4956783919598e-05, - "loss": 5.3266, - "step": 35380 - }, - { - "epoch": 18.451629726205997, - "grad_norm": 1.3941668272018433, - "learning_rate": 6.495577889447236e-05, - "loss": 5.7062, - "step": 35381 - }, - { - "epoch": 18.452151238591917, - "grad_norm": 1.5668227672576904, - "learning_rate": 6.495477386934674e-05, - "loss": 5.7404, - "step": 35382 - }, - { - "epoch": 18.452672750977836, - "grad_norm": 1.5487717390060425, - "learning_rate": 6.49537688442211e-05, - "loss": 5.2419, - "step": 35383 - }, - { - "epoch": 18.453194263363756, - "grad_norm": 1.4686191082000732, - "learning_rate": 6.495276381909548e-05, - "loss": 5.3603, - "step": 35384 - }, - { - "epoch": 18.453715775749675, - "grad_norm": 1.994951605796814, - "learning_rate": 6.495175879396986e-05, - "loss": 4.6264, - "step": 35385 - }, - { - "epoch": 18.454237288135594, - "grad_norm": 1.4223012924194336, - "learning_rate": 6.495075376884422e-05, - "loss": 5.7303, - "step": 35386 - }, - { - "epoch": 18.454758800521514, - "grad_norm": 1.5756138563156128, - "learning_rate": 6.49497487437186e-05, - "loss": 5.3568, - "step": 35387 - }, - { - "epoch": 18.45528031290743, - "grad_norm": 1.5661418437957764, - "learning_rate": 6.494874371859297e-05, - "loss": 5.1827, - "step": 35388 - }, - { - "epoch": 18.45580182529335, - "grad_norm": 1.4975249767303467, - "learning_rate": 6.494773869346734e-05, - "loss": 5.2714, - "step": 35389 - }, - { - "epoch": 18.45632333767927, - "grad_norm": 1.5480414628982544, - "learning_rate": 6.494673366834171e-05, - "loss": 5.1693, - "step": 35390 - }, - { - "epoch": 18.45684485006519, - "grad_norm": 1.3362300395965576, - "learning_rate": 6.494572864321609e-05, - "loss": 5.0241, - "step": 35391 - }, - { - "epoch": 18.457366362451108, - "grad_norm": 1.4789241552352905, - "learning_rate": 6.494472361809045e-05, - "loss": 5.3813, - "step": 35392 - }, - { - "epoch": 18.457887874837027, - "grad_norm": 1.4637984037399292, - "learning_rate": 6.494371859296483e-05, - "loss": 5.5245, - "step": 35393 - }, - { - "epoch": 18.458409387222947, - "grad_norm": 1.5049444437026978, - "learning_rate": 6.494271356783919e-05, - "loss": 5.3576, - "step": 35394 - }, - { - "epoch": 18.458930899608866, - "grad_norm": 1.4522918462753296, - "learning_rate": 6.494170854271357e-05, - "loss": 5.1529, - "step": 35395 - }, - { - "epoch": 18.459452411994786, - "grad_norm": 1.533366084098816, - "learning_rate": 6.494070351758795e-05, - "loss": 5.3989, - "step": 35396 - }, - { - "epoch": 18.459973924380705, - "grad_norm": 1.4364441633224487, - "learning_rate": 6.493969849246233e-05, - "loss": 5.5818, - "step": 35397 - }, - { - "epoch": 18.460495436766625, - "grad_norm": 1.4394077062606812, - "learning_rate": 6.493869346733669e-05, - "loss": 5.4892, - "step": 35398 - }, - { - "epoch": 18.461016949152544, - "grad_norm": 1.9129582643508911, - "learning_rate": 6.493768844221105e-05, - "loss": 4.8712, - "step": 35399 - }, - { - "epoch": 18.46153846153846, - "grad_norm": 1.5635312795639038, - "learning_rate": 6.493668341708543e-05, - "loss": 5.2926, - "step": 35400 - }, - { - "epoch": 18.46205997392438, - "grad_norm": 1.6109960079193115, - "learning_rate": 6.49356783919598e-05, - "loss": 5.3274, - "step": 35401 - }, - { - "epoch": 18.4625814863103, - "grad_norm": 1.443228006362915, - "learning_rate": 6.493467336683417e-05, - "loss": 5.3535, - "step": 35402 - }, - { - "epoch": 18.46310299869622, - "grad_norm": 1.5739573240280151, - "learning_rate": 6.493366834170854e-05, - "loss": 4.8543, - "step": 35403 - }, - { - "epoch": 18.463624511082138, - "grad_norm": 1.463592290878296, - "learning_rate": 6.493266331658292e-05, - "loss": 4.5751, - "step": 35404 - }, - { - "epoch": 18.464146023468057, - "grad_norm": 1.6104319095611572, - "learning_rate": 6.49316582914573e-05, - "loss": 5.4133, - "step": 35405 - }, - { - "epoch": 18.464667535853977, - "grad_norm": 1.4912549257278442, - "learning_rate": 6.493065326633167e-05, - "loss": 5.2075, - "step": 35406 - }, - { - "epoch": 18.465189048239896, - "grad_norm": 1.5088138580322266, - "learning_rate": 6.492964824120604e-05, - "loss": 5.5302, - "step": 35407 - }, - { - "epoch": 18.465710560625816, - "grad_norm": 1.4186854362487793, - "learning_rate": 6.492864321608041e-05, - "loss": 5.865, - "step": 35408 - }, - { - "epoch": 18.466232073011735, - "grad_norm": 1.5519641637802124, - "learning_rate": 6.492763819095478e-05, - "loss": 5.3448, - "step": 35409 - }, - { - "epoch": 18.466753585397655, - "grad_norm": 1.4350141286849976, - "learning_rate": 6.492663316582916e-05, - "loss": 5.0024, - "step": 35410 - }, - { - "epoch": 18.467275097783574, - "grad_norm": 1.6955196857452393, - "learning_rate": 6.492562814070352e-05, - "loss": 4.4941, - "step": 35411 - }, - { - "epoch": 18.46779661016949, - "grad_norm": 1.4555683135986328, - "learning_rate": 6.492462311557788e-05, - "loss": 5.4659, - "step": 35412 - }, - { - "epoch": 18.46831812255541, - "grad_norm": 1.5380946397781372, - "learning_rate": 6.492361809045226e-05, - "loss": 5.2456, - "step": 35413 - }, - { - "epoch": 18.46883963494133, - "grad_norm": 1.585091471672058, - "learning_rate": 6.492261306532663e-05, - "loss": 5.0918, - "step": 35414 - }, - { - "epoch": 18.46936114732725, - "grad_norm": 1.5110771656036377, - "learning_rate": 6.4921608040201e-05, - "loss": 5.6213, - "step": 35415 - }, - { - "epoch": 18.469882659713168, - "grad_norm": 1.6065860986709595, - "learning_rate": 6.492060301507538e-05, - "loss": 5.3905, - "step": 35416 - }, - { - "epoch": 18.470404172099087, - "grad_norm": 1.4187955856323242, - "learning_rate": 6.491959798994976e-05, - "loss": 5.5393, - "step": 35417 - }, - { - "epoch": 18.470925684485007, - "grad_norm": 1.46259605884552, - "learning_rate": 6.491859296482412e-05, - "loss": 5.5303, - "step": 35418 - }, - { - "epoch": 18.471447196870926, - "grad_norm": 1.4321526288986206, - "learning_rate": 6.49175879396985e-05, - "loss": 5.4976, - "step": 35419 - }, - { - "epoch": 18.471968709256846, - "grad_norm": 1.3810888528823853, - "learning_rate": 6.491658291457287e-05, - "loss": 5.3922, - "step": 35420 - }, - { - "epoch": 18.472490221642765, - "grad_norm": 1.5117462873458862, - "learning_rate": 6.491557788944724e-05, - "loss": 5.2183, - "step": 35421 - }, - { - "epoch": 18.473011734028685, - "grad_norm": 1.389697551727295, - "learning_rate": 6.491457286432161e-05, - "loss": 5.5407, - "step": 35422 - }, - { - "epoch": 18.473533246414604, - "grad_norm": 1.342136263847351, - "learning_rate": 6.491356783919599e-05, - "loss": 5.8574, - "step": 35423 - }, - { - "epoch": 18.47405475880052, - "grad_norm": 1.5424485206604004, - "learning_rate": 6.491256281407035e-05, - "loss": 5.1128, - "step": 35424 - }, - { - "epoch": 18.47457627118644, - "grad_norm": 1.5053235292434692, - "learning_rate": 6.491155778894471e-05, - "loss": 5.6577, - "step": 35425 - }, - { - "epoch": 18.47509778357236, - "grad_norm": 1.4760935306549072, - "learning_rate": 6.491055276381909e-05, - "loss": 5.5241, - "step": 35426 - }, - { - "epoch": 18.47561929595828, - "grad_norm": 1.5390331745147705, - "learning_rate": 6.490954773869347e-05, - "loss": 5.3238, - "step": 35427 - }, - { - "epoch": 18.476140808344198, - "grad_norm": 1.4704557657241821, - "learning_rate": 6.490854271356785e-05, - "loss": 5.1908, - "step": 35428 - }, - { - "epoch": 18.476662320730117, - "grad_norm": 1.4879099130630493, - "learning_rate": 6.490753768844221e-05, - "loss": 5.4092, - "step": 35429 - }, - { - "epoch": 18.477183833116037, - "grad_norm": 1.4675546884536743, - "learning_rate": 6.490653266331659e-05, - "loss": 5.7129, - "step": 35430 - }, - { - "epoch": 18.477705345501956, - "grad_norm": 1.493099570274353, - "learning_rate": 6.490552763819095e-05, - "loss": 5.5864, - "step": 35431 - }, - { - "epoch": 18.478226857887876, - "grad_norm": 1.5453606843948364, - "learning_rate": 6.490452261306533e-05, - "loss": 5.1499, - "step": 35432 - }, - { - "epoch": 18.478748370273795, - "grad_norm": 1.5509778261184692, - "learning_rate": 6.49035175879397e-05, - "loss": 4.963, - "step": 35433 - }, - { - "epoch": 18.479269882659715, - "grad_norm": 1.5753614902496338, - "learning_rate": 6.490251256281407e-05, - "loss": 5.4965, - "step": 35434 - }, - { - "epoch": 18.479791395045634, - "grad_norm": 1.4161410331726074, - "learning_rate": 6.490150753768844e-05, - "loss": 5.7232, - "step": 35435 - }, - { - "epoch": 18.48031290743155, - "grad_norm": 1.5373061895370483, - "learning_rate": 6.490050251256282e-05, - "loss": 5.1043, - "step": 35436 - }, - { - "epoch": 18.48083441981747, - "grad_norm": 1.5283647775650024, - "learning_rate": 6.48994974874372e-05, - "loss": 5.5081, - "step": 35437 - }, - { - "epoch": 18.48135593220339, - "grad_norm": 1.6201008558273315, - "learning_rate": 6.489849246231157e-05, - "loss": 5.0912, - "step": 35438 - }, - { - "epoch": 18.48187744458931, - "grad_norm": 1.4762250185012817, - "learning_rate": 6.489748743718594e-05, - "loss": 5.3188, - "step": 35439 - }, - { - "epoch": 18.482398956975228, - "grad_norm": 1.3981010913848877, - "learning_rate": 6.48964824120603e-05, - "loss": 5.4128, - "step": 35440 - }, - { - "epoch": 18.482920469361147, - "grad_norm": 1.4846478700637817, - "learning_rate": 6.489547738693468e-05, - "loss": 5.5662, - "step": 35441 - }, - { - "epoch": 18.483441981747067, - "grad_norm": 1.5230200290679932, - "learning_rate": 6.489447236180904e-05, - "loss": 5.1054, - "step": 35442 - }, - { - "epoch": 18.483963494132986, - "grad_norm": 1.551759123802185, - "learning_rate": 6.489346733668342e-05, - "loss": 5.0776, - "step": 35443 - }, - { - "epoch": 18.484485006518906, - "grad_norm": 1.5195060968399048, - "learning_rate": 6.489246231155778e-05, - "loss": 5.6296, - "step": 35444 - }, - { - "epoch": 18.485006518904825, - "grad_norm": 1.5474330186843872, - "learning_rate": 6.489145728643216e-05, - "loss": 5.4563, - "step": 35445 - }, - { - "epoch": 18.485528031290745, - "grad_norm": 1.5079458951950073, - "learning_rate": 6.489045226130653e-05, - "loss": 5.4901, - "step": 35446 - }, - { - "epoch": 18.486049543676664, - "grad_norm": 1.5125809907913208, - "learning_rate": 6.48894472361809e-05, - "loss": 5.3729, - "step": 35447 - }, - { - "epoch": 18.48657105606258, - "grad_norm": 1.5129650831222534, - "learning_rate": 6.488844221105528e-05, - "loss": 5.0258, - "step": 35448 - }, - { - "epoch": 18.4870925684485, - "grad_norm": 1.4959920644760132, - "learning_rate": 6.488743718592966e-05, - "loss": 5.571, - "step": 35449 - }, - { - "epoch": 18.48761408083442, - "grad_norm": 1.5242972373962402, - "learning_rate": 6.488643216080402e-05, - "loss": 4.815, - "step": 35450 - }, - { - "epoch": 18.48813559322034, - "grad_norm": 1.495603084564209, - "learning_rate": 6.48854271356784e-05, - "loss": 5.4233, - "step": 35451 - }, - { - "epoch": 18.488657105606258, - "grad_norm": 1.5951530933380127, - "learning_rate": 6.488442211055277e-05, - "loss": 5.4481, - "step": 35452 - }, - { - "epoch": 18.489178617992177, - "grad_norm": 1.5429401397705078, - "learning_rate": 6.488341708542713e-05, - "loss": 5.0311, - "step": 35453 - }, - { - "epoch": 18.489700130378097, - "grad_norm": 1.567263126373291, - "learning_rate": 6.488241206030151e-05, - "loss": 5.3736, - "step": 35454 - }, - { - "epoch": 18.490221642764016, - "grad_norm": 1.5790342092514038, - "learning_rate": 6.488140703517587e-05, - "loss": 5.2556, - "step": 35455 - }, - { - "epoch": 18.490743155149936, - "grad_norm": 1.5973724126815796, - "learning_rate": 6.488040201005025e-05, - "loss": 5.0487, - "step": 35456 - }, - { - "epoch": 18.491264667535855, - "grad_norm": 1.541680097579956, - "learning_rate": 6.487939698492463e-05, - "loss": 5.0512, - "step": 35457 - }, - { - "epoch": 18.491786179921775, - "grad_norm": 1.3658303022384644, - "learning_rate": 6.4878391959799e-05, - "loss": 5.7681, - "step": 35458 - }, - { - "epoch": 18.49230769230769, - "grad_norm": 1.540747046470642, - "learning_rate": 6.487738693467337e-05, - "loss": 5.7482, - "step": 35459 - }, - { - "epoch": 18.49282920469361, - "grad_norm": 1.6963053941726685, - "learning_rate": 6.487638190954775e-05, - "loss": 4.9647, - "step": 35460 - }, - { - "epoch": 18.49335071707953, - "grad_norm": 1.4181220531463623, - "learning_rate": 6.487537688442211e-05, - "loss": 5.2612, - "step": 35461 - }, - { - "epoch": 18.49387222946545, - "grad_norm": 1.4932340383529663, - "learning_rate": 6.487437185929649e-05, - "loss": 5.025, - "step": 35462 - }, - { - "epoch": 18.49439374185137, - "grad_norm": 1.4796345233917236, - "learning_rate": 6.487336683417086e-05, - "loss": 5.3683, - "step": 35463 - }, - { - "epoch": 18.494915254237288, - "grad_norm": 1.3907248973846436, - "learning_rate": 6.487236180904523e-05, - "loss": 5.8619, - "step": 35464 - }, - { - "epoch": 18.495436766623207, - "grad_norm": 1.4548245668411255, - "learning_rate": 6.48713567839196e-05, - "loss": 5.1801, - "step": 35465 - }, - { - "epoch": 18.495958279009127, - "grad_norm": 1.5921149253845215, - "learning_rate": 6.487035175879396e-05, - "loss": 5.0937, - "step": 35466 - }, - { - "epoch": 18.496479791395046, - "grad_norm": 1.5150690078735352, - "learning_rate": 6.486934673366834e-05, - "loss": 5.1891, - "step": 35467 - }, - { - "epoch": 18.497001303780966, - "grad_norm": 1.598847508430481, - "learning_rate": 6.486834170854272e-05, - "loss": 5.609, - "step": 35468 - }, - { - "epoch": 18.497522816166885, - "grad_norm": 1.4594804048538208, - "learning_rate": 6.48673366834171e-05, - "loss": 5.1867, - "step": 35469 - }, - { - "epoch": 18.498044328552805, - "grad_norm": 1.4736852645874023, - "learning_rate": 6.486633165829146e-05, - "loss": 5.5875, - "step": 35470 - }, - { - "epoch": 18.49856584093872, - "grad_norm": 1.5286160707473755, - "learning_rate": 6.486532663316584e-05, - "loss": 5.3453, - "step": 35471 - }, - { - "epoch": 18.49908735332464, - "grad_norm": 1.4607089757919312, - "learning_rate": 6.48643216080402e-05, - "loss": 5.0675, - "step": 35472 - }, - { - "epoch": 18.49960886571056, - "grad_norm": 1.4371898174285889, - "learning_rate": 6.486331658291458e-05, - "loss": 5.5728, - "step": 35473 - }, - { - "epoch": 18.50013037809648, - "grad_norm": 1.5173791646957397, - "learning_rate": 6.486231155778894e-05, - "loss": 5.0847, - "step": 35474 - }, - { - "epoch": 18.5006518904824, - "grad_norm": 1.483396053314209, - "learning_rate": 6.486130653266332e-05, - "loss": 5.6492, - "step": 35475 - }, - { - "epoch": 18.501173402868318, - "grad_norm": 1.5216718912124634, - "learning_rate": 6.486030150753769e-05, - "loss": 4.988, - "step": 35476 - }, - { - "epoch": 18.501694915254237, - "grad_norm": 1.6171027421951294, - "learning_rate": 6.485929648241206e-05, - "loss": 5.0624, - "step": 35477 - }, - { - "epoch": 18.502216427640157, - "grad_norm": 1.410894751548767, - "learning_rate": 6.485829145728644e-05, - "loss": 5.6148, - "step": 35478 - }, - { - "epoch": 18.502737940026076, - "grad_norm": 1.5431923866271973, - "learning_rate": 6.48572864321608e-05, - "loss": 5.5598, - "step": 35479 - }, - { - "epoch": 18.503259452411996, - "grad_norm": 1.459151029586792, - "learning_rate": 6.485628140703518e-05, - "loss": 5.0877, - "step": 35480 - }, - { - "epoch": 18.503780964797915, - "grad_norm": 1.4986822605133057, - "learning_rate": 6.485527638190955e-05, - "loss": 5.5132, - "step": 35481 - }, - { - "epoch": 18.504302477183835, - "grad_norm": 1.4528541564941406, - "learning_rate": 6.485427135678393e-05, - "loss": 5.1542, - "step": 35482 - }, - { - "epoch": 18.50482398956975, - "grad_norm": 1.4425474405288696, - "learning_rate": 6.485326633165829e-05, - "loss": 4.9311, - "step": 35483 - }, - { - "epoch": 18.50534550195567, - "grad_norm": 1.5117945671081543, - "learning_rate": 6.485226130653267e-05, - "loss": 4.7707, - "step": 35484 - }, - { - "epoch": 18.50586701434159, - "grad_norm": 1.47257661819458, - "learning_rate": 6.485125628140703e-05, - "loss": 5.7236, - "step": 35485 - }, - { - "epoch": 18.50638852672751, - "grad_norm": 1.4577170610427856, - "learning_rate": 6.485025125628141e-05, - "loss": 5.7277, - "step": 35486 - }, - { - "epoch": 18.50691003911343, - "grad_norm": 1.464302659034729, - "learning_rate": 6.484924623115577e-05, - "loss": 5.2901, - "step": 35487 - }, - { - "epoch": 18.507431551499348, - "grad_norm": 1.4555132389068604, - "learning_rate": 6.484824120603015e-05, - "loss": 5.3892, - "step": 35488 - }, - { - "epoch": 18.507953063885267, - "grad_norm": 1.491204023361206, - "learning_rate": 6.484723618090453e-05, - "loss": 5.3865, - "step": 35489 - }, - { - "epoch": 18.508474576271187, - "grad_norm": 1.5877994298934937, - "learning_rate": 6.484623115577891e-05, - "loss": 4.993, - "step": 35490 - }, - { - "epoch": 18.508996088657106, - "grad_norm": 1.4624348878860474, - "learning_rate": 6.484522613065327e-05, - "loss": 5.1995, - "step": 35491 - }, - { - "epoch": 18.509517601043026, - "grad_norm": 1.446710228919983, - "learning_rate": 6.484422110552764e-05, - "loss": 5.2028, - "step": 35492 - }, - { - "epoch": 18.510039113428945, - "grad_norm": 1.626200795173645, - "learning_rate": 6.484321608040201e-05, - "loss": 5.1737, - "step": 35493 - }, - { - "epoch": 18.510560625814865, - "grad_norm": 1.464115858078003, - "learning_rate": 6.484221105527638e-05, - "loss": 4.9925, - "step": 35494 - }, - { - "epoch": 18.51108213820078, - "grad_norm": 1.4619052410125732, - "learning_rate": 6.484120603015076e-05, - "loss": 5.4528, - "step": 35495 - }, - { - "epoch": 18.5116036505867, - "grad_norm": 1.5576401948928833, - "learning_rate": 6.484020100502512e-05, - "loss": 5.1253, - "step": 35496 - }, - { - "epoch": 18.51212516297262, - "grad_norm": 1.4755072593688965, - "learning_rate": 6.48391959798995e-05, - "loss": 5.503, - "step": 35497 - }, - { - "epoch": 18.51264667535854, - "grad_norm": 1.4425896406173706, - "learning_rate": 6.483819095477388e-05, - "loss": 5.631, - "step": 35498 - }, - { - "epoch": 18.51316818774446, - "grad_norm": 1.4946441650390625, - "learning_rate": 6.483718592964825e-05, - "loss": 5.0423, - "step": 35499 - }, - { - "epoch": 18.513689700130378, - "grad_norm": 1.4918228387832642, - "learning_rate": 6.483618090452262e-05, - "loss": 5.131, - "step": 35500 - }, - { - "epoch": 18.514211212516297, - "grad_norm": 1.6058791875839233, - "learning_rate": 6.4835175879397e-05, - "loss": 5.1695, - "step": 35501 - }, - { - "epoch": 18.514732724902217, - "grad_norm": 1.5280064344406128, - "learning_rate": 6.483417085427136e-05, - "loss": 5.6078, - "step": 35502 - }, - { - "epoch": 18.515254237288136, - "grad_norm": 1.5316718816757202, - "learning_rate": 6.483316582914574e-05, - "loss": 5.4516, - "step": 35503 - }, - { - "epoch": 18.515775749674056, - "grad_norm": 1.6612154245376587, - "learning_rate": 6.48321608040201e-05, - "loss": 5.0401, - "step": 35504 - }, - { - "epoch": 18.516297262059975, - "grad_norm": 1.480027675628662, - "learning_rate": 6.483115577889447e-05, - "loss": 5.1944, - "step": 35505 - }, - { - "epoch": 18.516818774445895, - "grad_norm": 1.4312092065811157, - "learning_rate": 6.483015075376884e-05, - "loss": 5.1764, - "step": 35506 - }, - { - "epoch": 18.51734028683181, - "grad_norm": 1.3732590675354004, - "learning_rate": 6.482914572864321e-05, - "loss": 5.5659, - "step": 35507 - }, - { - "epoch": 18.51786179921773, - "grad_norm": 1.6740384101867676, - "learning_rate": 6.482814070351759e-05, - "loss": 5.1727, - "step": 35508 - }, - { - "epoch": 18.51838331160365, - "grad_norm": 1.5454546213150024, - "learning_rate": 6.482713567839196e-05, - "loss": 5.1987, - "step": 35509 - }, - { - "epoch": 18.51890482398957, - "grad_norm": 1.507055640220642, - "learning_rate": 6.482613065326634e-05, - "loss": 5.4581, - "step": 35510 - }, - { - "epoch": 18.51942633637549, - "grad_norm": 1.3497278690338135, - "learning_rate": 6.48251256281407e-05, - "loss": 5.5685, - "step": 35511 - }, - { - "epoch": 18.519947848761408, - "grad_norm": 1.5519951581954956, - "learning_rate": 6.482412060301508e-05, - "loss": 4.9936, - "step": 35512 - }, - { - "epoch": 18.520469361147327, - "grad_norm": 1.4718763828277588, - "learning_rate": 6.482311557788945e-05, - "loss": 5.5069, - "step": 35513 - }, - { - "epoch": 18.520990873533247, - "grad_norm": 1.5588761568069458, - "learning_rate": 6.482211055276383e-05, - "loss": 4.9248, - "step": 35514 - }, - { - "epoch": 18.521512385919166, - "grad_norm": 1.436373233795166, - "learning_rate": 6.482110552763819e-05, - "loss": 5.3652, - "step": 35515 - }, - { - "epoch": 18.522033898305086, - "grad_norm": 1.5915199518203735, - "learning_rate": 6.482010050251257e-05, - "loss": 5.4193, - "step": 35516 - }, - { - "epoch": 18.522555410691005, - "grad_norm": 1.5224963426589966, - "learning_rate": 6.481909547738693e-05, - "loss": 5.2056, - "step": 35517 - }, - { - "epoch": 18.523076923076925, - "grad_norm": 1.5738803148269653, - "learning_rate": 6.481809045226131e-05, - "loss": 5.0447, - "step": 35518 - }, - { - "epoch": 18.52359843546284, - "grad_norm": 1.4802074432373047, - "learning_rate": 6.481708542713569e-05, - "loss": 5.4094, - "step": 35519 - }, - { - "epoch": 18.52411994784876, - "grad_norm": 1.5555838346481323, - "learning_rate": 6.481608040201005e-05, - "loss": 5.3868, - "step": 35520 - }, - { - "epoch": 18.52464146023468, - "grad_norm": 1.4200795888900757, - "learning_rate": 6.481507537688443e-05, - "loss": 5.8335, - "step": 35521 - }, - { - "epoch": 18.5251629726206, - "grad_norm": 1.4517983198165894, - "learning_rate": 6.48140703517588e-05, - "loss": 5.2918, - "step": 35522 - }, - { - "epoch": 18.52568448500652, - "grad_norm": 1.5185976028442383, - "learning_rate": 6.481306532663317e-05, - "loss": 5.2263, - "step": 35523 - }, - { - "epoch": 18.526205997392438, - "grad_norm": 1.382768154144287, - "learning_rate": 6.481206030150754e-05, - "loss": 5.0499, - "step": 35524 - }, - { - "epoch": 18.526727509778357, - "grad_norm": 1.565409779548645, - "learning_rate": 6.481105527638191e-05, - "loss": 4.9161, - "step": 35525 - }, - { - "epoch": 18.527249022164277, - "grad_norm": 1.5516471862792969, - "learning_rate": 6.481005025125628e-05, - "loss": 5.7253, - "step": 35526 - }, - { - "epoch": 18.527770534550196, - "grad_norm": 1.5027854442596436, - "learning_rate": 6.480904522613066e-05, - "loss": 5.4209, - "step": 35527 - }, - { - "epoch": 18.528292046936116, - "grad_norm": 1.4789053201675415, - "learning_rate": 6.480804020100502e-05, - "loss": 5.5322, - "step": 35528 - }, - { - "epoch": 18.528813559322035, - "grad_norm": 1.4199090003967285, - "learning_rate": 6.48070351758794e-05, - "loss": 5.1008, - "step": 35529 - }, - { - "epoch": 18.529335071707955, - "grad_norm": 1.4920481443405151, - "learning_rate": 6.480603015075378e-05, - "loss": 5.5376, - "step": 35530 - }, - { - "epoch": 18.52985658409387, - "grad_norm": 1.4184472560882568, - "learning_rate": 6.480502512562815e-05, - "loss": 5.4905, - "step": 35531 - }, - { - "epoch": 18.53037809647979, - "grad_norm": 1.5036133527755737, - "learning_rate": 6.480402010050252e-05, - "loss": 5.4261, - "step": 35532 - }, - { - "epoch": 18.53089960886571, - "grad_norm": 1.5692049264907837, - "learning_rate": 6.480301507537688e-05, - "loss": 5.4678, - "step": 35533 - }, - { - "epoch": 18.53142112125163, - "grad_norm": 1.4867217540740967, - "learning_rate": 6.480201005025126e-05, - "loss": 5.2224, - "step": 35534 - }, - { - "epoch": 18.53194263363755, - "grad_norm": 1.4854336977005005, - "learning_rate": 6.480100502512563e-05, - "loss": 5.421, - "step": 35535 - }, - { - "epoch": 18.532464146023468, - "grad_norm": 1.4389443397521973, - "learning_rate": 6.48e-05, - "loss": 5.6175, - "step": 35536 - }, - { - "epoch": 18.532985658409387, - "grad_norm": 1.4565304517745972, - "learning_rate": 6.479899497487437e-05, - "loss": 5.0368, - "step": 35537 - }, - { - "epoch": 18.533507170795307, - "grad_norm": 1.6402233839035034, - "learning_rate": 6.479798994974875e-05, - "loss": 4.7655, - "step": 35538 - }, - { - "epoch": 18.534028683181226, - "grad_norm": 1.5245435237884521, - "learning_rate": 6.479698492462312e-05, - "loss": 5.4058, - "step": 35539 - }, - { - "epoch": 18.534550195567146, - "grad_norm": 1.437264084815979, - "learning_rate": 6.47959798994975e-05, - "loss": 5.2931, - "step": 35540 - }, - { - "epoch": 18.535071707953065, - "grad_norm": 1.5119725465774536, - "learning_rate": 6.479497487437187e-05, - "loss": 5.4938, - "step": 35541 - }, - { - "epoch": 18.53559322033898, - "grad_norm": 1.4952521324157715, - "learning_rate": 6.479396984924624e-05, - "loss": 5.4068, - "step": 35542 - }, - { - "epoch": 18.5361147327249, - "grad_norm": 1.504055380821228, - "learning_rate": 6.479296482412061e-05, - "loss": 5.4531, - "step": 35543 - }, - { - "epoch": 18.53663624511082, - "grad_norm": 1.534546136856079, - "learning_rate": 6.479195979899498e-05, - "loss": 5.3904, - "step": 35544 - }, - { - "epoch": 18.53715775749674, - "grad_norm": 1.5656923055648804, - "learning_rate": 6.479095477386935e-05, - "loss": 5.0032, - "step": 35545 - }, - { - "epoch": 18.53767926988266, - "grad_norm": 1.4622232913970947, - "learning_rate": 6.478994974874371e-05, - "loss": 5.5152, - "step": 35546 - }, - { - "epoch": 18.53820078226858, - "grad_norm": 1.573738932609558, - "learning_rate": 6.478894472361809e-05, - "loss": 5.2028, - "step": 35547 - }, - { - "epoch": 18.538722294654498, - "grad_norm": 1.4976190328598022, - "learning_rate": 6.478793969849246e-05, - "loss": 5.4686, - "step": 35548 - }, - { - "epoch": 18.539243807040418, - "grad_norm": 1.4427175521850586, - "learning_rate": 6.478693467336683e-05, - "loss": 5.4573, - "step": 35549 - }, - { - "epoch": 18.539765319426337, - "grad_norm": 1.4563417434692383, - "learning_rate": 6.478592964824121e-05, - "loss": 5.3768, - "step": 35550 - }, - { - "epoch": 18.540286831812256, - "grad_norm": 1.4356193542480469, - "learning_rate": 6.478492462311559e-05, - "loss": 5.4663, - "step": 35551 - }, - { - "epoch": 18.540808344198176, - "grad_norm": 1.5954298973083496, - "learning_rate": 6.478391959798995e-05, - "loss": 4.8864, - "step": 35552 - }, - { - "epoch": 18.541329856584095, - "grad_norm": 1.5576984882354736, - "learning_rate": 6.478291457286433e-05, - "loss": 5.1557, - "step": 35553 - }, - { - "epoch": 18.541851368970015, - "grad_norm": 1.4916774034500122, - "learning_rate": 6.47819095477387e-05, - "loss": 5.4654, - "step": 35554 - }, - { - "epoch": 18.54237288135593, - "grad_norm": 1.4641087055206299, - "learning_rate": 6.478090452261307e-05, - "loss": 5.2205, - "step": 35555 - }, - { - "epoch": 18.54289439374185, - "grad_norm": 1.5349711179733276, - "learning_rate": 6.477989949748744e-05, - "loss": 4.9694, - "step": 35556 - }, - { - "epoch": 18.54341590612777, - "grad_norm": 1.3819282054901123, - "learning_rate": 6.477889447236182e-05, - "loss": 5.0068, - "step": 35557 - }, - { - "epoch": 18.54393741851369, - "grad_norm": 1.5060696601867676, - "learning_rate": 6.477788944723618e-05, - "loss": 5.4714, - "step": 35558 - }, - { - "epoch": 18.54445893089961, - "grad_norm": 1.713645100593567, - "learning_rate": 6.477688442211056e-05, - "loss": 5.1647, - "step": 35559 - }, - { - "epoch": 18.544980443285528, - "grad_norm": 1.5000172853469849, - "learning_rate": 6.477587939698494e-05, - "loss": 5.2198, - "step": 35560 - }, - { - "epoch": 18.545501955671448, - "grad_norm": 1.512481451034546, - "learning_rate": 6.47748743718593e-05, - "loss": 4.9982, - "step": 35561 - }, - { - "epoch": 18.546023468057367, - "grad_norm": 1.477190613746643, - "learning_rate": 6.477386934673368e-05, - "loss": 5.4669, - "step": 35562 - }, - { - "epoch": 18.546544980443286, - "grad_norm": 1.4803235530853271, - "learning_rate": 6.477286432160804e-05, - "loss": 5.3387, - "step": 35563 - }, - { - "epoch": 18.547066492829206, - "grad_norm": 1.485863208770752, - "learning_rate": 6.477185929648242e-05, - "loss": 4.661, - "step": 35564 - }, - { - "epoch": 18.547588005215125, - "grad_norm": 1.606426477432251, - "learning_rate": 6.477085427135678e-05, - "loss": 4.9948, - "step": 35565 - }, - { - "epoch": 18.54810951760104, - "grad_norm": 1.438741683959961, - "learning_rate": 6.476984924623116e-05, - "loss": 5.6791, - "step": 35566 - }, - { - "epoch": 18.54863102998696, - "grad_norm": 1.409909725189209, - "learning_rate": 6.476884422110553e-05, - "loss": 5.4527, - "step": 35567 - }, - { - "epoch": 18.54915254237288, - "grad_norm": 1.4105454683303833, - "learning_rate": 6.47678391959799e-05, - "loss": 5.5576, - "step": 35568 - }, - { - "epoch": 18.5496740547588, - "grad_norm": 1.404860496520996, - "learning_rate": 6.476683417085427e-05, - "loss": 5.0654, - "step": 35569 - }, - { - "epoch": 18.55019556714472, - "grad_norm": 1.5405018329620361, - "learning_rate": 6.476582914572865e-05, - "loss": 5.0055, - "step": 35570 - }, - { - "epoch": 18.55071707953064, - "grad_norm": 1.5294269323349, - "learning_rate": 6.476482412060302e-05, - "loss": 5.3264, - "step": 35571 - }, - { - "epoch": 18.551238591916558, - "grad_norm": 1.4796496629714966, - "learning_rate": 6.476381909547739e-05, - "loss": 5.1241, - "step": 35572 - }, - { - "epoch": 18.551760104302478, - "grad_norm": 1.5621120929718018, - "learning_rate": 6.476281407035177e-05, - "loss": 5.2591, - "step": 35573 - }, - { - "epoch": 18.552281616688397, - "grad_norm": 1.4804742336273193, - "learning_rate": 6.476180904522613e-05, - "loss": 4.9445, - "step": 35574 - }, - { - "epoch": 18.552803129074317, - "grad_norm": 1.321298599243164, - "learning_rate": 6.476080402010051e-05, - "loss": 5.2601, - "step": 35575 - }, - { - "epoch": 18.553324641460236, - "grad_norm": 1.4469120502471924, - "learning_rate": 6.475979899497487e-05, - "loss": 5.4447, - "step": 35576 - }, - { - "epoch": 18.553846153846155, - "grad_norm": 1.4912959337234497, - "learning_rate": 6.475879396984925e-05, - "loss": 4.9665, - "step": 35577 - }, - { - "epoch": 18.55436766623207, - "grad_norm": 1.4086633920669556, - "learning_rate": 6.475778894472361e-05, - "loss": 5.6664, - "step": 35578 - }, - { - "epoch": 18.55488917861799, - "grad_norm": 1.5440562963485718, - "learning_rate": 6.475678391959799e-05, - "loss": 5.1566, - "step": 35579 - }, - { - "epoch": 18.55541069100391, - "grad_norm": 1.43160879611969, - "learning_rate": 6.475577889447236e-05, - "loss": 4.9944, - "step": 35580 - }, - { - "epoch": 18.55593220338983, - "grad_norm": 1.49031662940979, - "learning_rate": 6.475477386934673e-05, - "loss": 5.2582, - "step": 35581 - }, - { - "epoch": 18.55645371577575, - "grad_norm": 1.5763808488845825, - "learning_rate": 6.475376884422111e-05, - "loss": 5.6148, - "step": 35582 - }, - { - "epoch": 18.55697522816167, - "grad_norm": 1.4925205707550049, - "learning_rate": 6.475276381909549e-05, - "loss": 5.0341, - "step": 35583 - }, - { - "epoch": 18.557496740547588, - "grad_norm": 1.408478021621704, - "learning_rate": 6.475175879396985e-05, - "loss": 5.6439, - "step": 35584 - }, - { - "epoch": 18.558018252933508, - "grad_norm": 1.4134036302566528, - "learning_rate": 6.475075376884422e-05, - "loss": 5.4019, - "step": 35585 - }, - { - "epoch": 18.558539765319427, - "grad_norm": 1.4603469371795654, - "learning_rate": 6.47497487437186e-05, - "loss": 5.2438, - "step": 35586 - }, - { - "epoch": 18.559061277705347, - "grad_norm": 1.5013766288757324, - "learning_rate": 6.474874371859296e-05, - "loss": 5.1946, - "step": 35587 - }, - { - "epoch": 18.559582790091266, - "grad_norm": 1.4313961267471313, - "learning_rate": 6.474773869346734e-05, - "loss": 5.1489, - "step": 35588 - }, - { - "epoch": 18.560104302477185, - "grad_norm": 1.6062086820602417, - "learning_rate": 6.47467336683417e-05, - "loss": 5.0221, - "step": 35589 - }, - { - "epoch": 18.5606258148631, - "grad_norm": 1.4948956966400146, - "learning_rate": 6.474572864321608e-05, - "loss": 5.3412, - "step": 35590 - }, - { - "epoch": 18.56114732724902, - "grad_norm": 1.4754451513290405, - "learning_rate": 6.474472361809046e-05, - "loss": 5.204, - "step": 35591 - }, - { - "epoch": 18.56166883963494, - "grad_norm": 1.4590420722961426, - "learning_rate": 6.474371859296484e-05, - "loss": 5.0794, - "step": 35592 - }, - { - "epoch": 18.56219035202086, - "grad_norm": 1.4891713857650757, - "learning_rate": 6.47427135678392e-05, - "loss": 5.5038, - "step": 35593 - }, - { - "epoch": 18.56271186440678, - "grad_norm": 1.519974708557129, - "learning_rate": 6.474170854271358e-05, - "loss": 5.1938, - "step": 35594 - }, - { - "epoch": 18.5632333767927, - "grad_norm": 1.5187040567398071, - "learning_rate": 6.474070351758794e-05, - "loss": 4.9275, - "step": 35595 - }, - { - "epoch": 18.563754889178618, - "grad_norm": 1.4492067098617554, - "learning_rate": 6.473969849246232e-05, - "loss": 5.1744, - "step": 35596 - }, - { - "epoch": 18.564276401564538, - "grad_norm": 1.523616075515747, - "learning_rate": 6.473869346733668e-05, - "loss": 5.2482, - "step": 35597 - }, - { - "epoch": 18.564797913950457, - "grad_norm": 1.5563427209854126, - "learning_rate": 6.473768844221105e-05, - "loss": 4.7921, - "step": 35598 - }, - { - "epoch": 18.565319426336377, - "grad_norm": 1.571441411972046, - "learning_rate": 6.473668341708543e-05, - "loss": 5.3674, - "step": 35599 - }, - { - "epoch": 18.565840938722296, - "grad_norm": 1.5123358964920044, - "learning_rate": 6.473567839195979e-05, - "loss": 5.4699, - "step": 35600 - }, - { - "epoch": 18.566362451108215, - "grad_norm": 1.5096668004989624, - "learning_rate": 6.473467336683417e-05, - "loss": 5.2786, - "step": 35601 - }, - { - "epoch": 18.56688396349413, - "grad_norm": 1.491349220275879, - "learning_rate": 6.473366834170855e-05, - "loss": 5.1126, - "step": 35602 - }, - { - "epoch": 18.56740547588005, - "grad_norm": 1.4231665134429932, - "learning_rate": 6.473266331658292e-05, - "loss": 5.8665, - "step": 35603 - }, - { - "epoch": 18.56792698826597, - "grad_norm": 1.5278055667877197, - "learning_rate": 6.473165829145729e-05, - "loss": 5.5027, - "step": 35604 - }, - { - "epoch": 18.56844850065189, - "grad_norm": 1.3443926572799683, - "learning_rate": 6.473065326633167e-05, - "loss": 4.8755, - "step": 35605 - }, - { - "epoch": 18.56897001303781, - "grad_norm": 1.5191975831985474, - "learning_rate": 6.472964824120603e-05, - "loss": 4.6944, - "step": 35606 - }, - { - "epoch": 18.56949152542373, - "grad_norm": 1.4903696775436401, - "learning_rate": 6.472864321608041e-05, - "loss": 5.1913, - "step": 35607 - }, - { - "epoch": 18.570013037809648, - "grad_norm": 1.5194417238235474, - "learning_rate": 6.472763819095477e-05, - "loss": 5.054, - "step": 35608 - }, - { - "epoch": 18.570534550195568, - "grad_norm": 1.4624252319335938, - "learning_rate": 6.472663316582915e-05, - "loss": 5.3377, - "step": 35609 - }, - { - "epoch": 18.571056062581487, - "grad_norm": 1.4921990633010864, - "learning_rate": 6.472562814070352e-05, - "loss": 4.8174, - "step": 35610 - }, - { - "epoch": 18.571577574967407, - "grad_norm": 1.5260392427444458, - "learning_rate": 6.472462311557789e-05, - "loss": 5.1678, - "step": 35611 - }, - { - "epoch": 18.572099087353326, - "grad_norm": 1.4944303035736084, - "learning_rate": 6.472361809045227e-05, - "loss": 5.4034, - "step": 35612 - }, - { - "epoch": 18.572620599739246, - "grad_norm": 1.4951120615005493, - "learning_rate": 6.472261306532663e-05, - "loss": 5.143, - "step": 35613 - }, - { - "epoch": 18.57314211212516, - "grad_norm": 1.5664386749267578, - "learning_rate": 6.472160804020101e-05, - "loss": 5.4, - "step": 35614 - }, - { - "epoch": 18.57366362451108, - "grad_norm": 1.5019649267196655, - "learning_rate": 6.472060301507538e-05, - "loss": 5.3375, - "step": 35615 - }, - { - "epoch": 18.574185136897, - "grad_norm": 1.5554442405700684, - "learning_rate": 6.471959798994975e-05, - "loss": 5.0062, - "step": 35616 - }, - { - "epoch": 18.57470664928292, - "grad_norm": 1.6129951477050781, - "learning_rate": 6.471859296482412e-05, - "loss": 5.1277, - "step": 35617 - }, - { - "epoch": 18.57522816166884, - "grad_norm": 1.5531551837921143, - "learning_rate": 6.47175879396985e-05, - "loss": 5.8224, - "step": 35618 - }, - { - "epoch": 18.57574967405476, - "grad_norm": 1.3367098569869995, - "learning_rate": 6.471658291457286e-05, - "loss": 5.6557, - "step": 35619 - }, - { - "epoch": 18.576271186440678, - "grad_norm": 1.5051144361495972, - "learning_rate": 6.471557788944724e-05, - "loss": 4.995, - "step": 35620 - }, - { - "epoch": 18.576792698826598, - "grad_norm": 1.556246280670166, - "learning_rate": 6.47145728643216e-05, - "loss": 5.3415, - "step": 35621 - }, - { - "epoch": 18.577314211212517, - "grad_norm": 1.4459372758865356, - "learning_rate": 6.471356783919598e-05, - "loss": 5.6476, - "step": 35622 - }, - { - "epoch": 18.577835723598437, - "grad_norm": 1.3812949657440186, - "learning_rate": 6.471256281407036e-05, - "loss": 5.6466, - "step": 35623 - }, - { - "epoch": 18.578357235984356, - "grad_norm": 1.3929649591445923, - "learning_rate": 6.471155778894474e-05, - "loss": 5.4131, - "step": 35624 - }, - { - "epoch": 18.578878748370272, - "grad_norm": 1.516778826713562, - "learning_rate": 6.47105527638191e-05, - "loss": 5.3772, - "step": 35625 - }, - { - "epoch": 18.57940026075619, - "grad_norm": 1.4588415622711182, - "learning_rate": 6.470954773869347e-05, - "loss": 5.1021, - "step": 35626 - }, - { - "epoch": 18.57992177314211, - "grad_norm": 1.5326430797576904, - "learning_rate": 6.470854271356784e-05, - "loss": 5.336, - "step": 35627 - }, - { - "epoch": 18.58044328552803, - "grad_norm": 1.4224209785461426, - "learning_rate": 6.470753768844221e-05, - "loss": 5.3616, - "step": 35628 - }, - { - "epoch": 18.58096479791395, - "grad_norm": 1.4800015687942505, - "learning_rate": 6.470653266331659e-05, - "loss": 5.6189, - "step": 35629 - }, - { - "epoch": 18.58148631029987, - "grad_norm": 1.4834058284759521, - "learning_rate": 6.470552763819095e-05, - "loss": 5.2629, - "step": 35630 - }, - { - "epoch": 18.58200782268579, - "grad_norm": 1.4612185955047607, - "learning_rate": 6.470452261306533e-05, - "loss": 5.2755, - "step": 35631 - }, - { - "epoch": 18.58252933507171, - "grad_norm": 1.642467975616455, - "learning_rate": 6.47035175879397e-05, - "loss": 5.251, - "step": 35632 - }, - { - "epoch": 18.583050847457628, - "grad_norm": 1.5199463367462158, - "learning_rate": 6.470251256281408e-05, - "loss": 5.2039, - "step": 35633 - }, - { - "epoch": 18.583572359843547, - "grad_norm": 1.502884030342102, - "learning_rate": 6.470150753768845e-05, - "loss": 5.4855, - "step": 35634 - }, - { - "epoch": 18.584093872229467, - "grad_norm": 1.439645528793335, - "learning_rate": 6.470050251256283e-05, - "loss": 5.6512, - "step": 35635 - }, - { - "epoch": 18.584615384615386, - "grad_norm": 1.5089534521102905, - "learning_rate": 6.469949748743719e-05, - "loss": 4.8927, - "step": 35636 - }, - { - "epoch": 18.585136897001306, - "grad_norm": 1.485379934310913, - "learning_rate": 6.469849246231157e-05, - "loss": 5.6404, - "step": 35637 - }, - { - "epoch": 18.58565840938722, - "grad_norm": 1.520887017250061, - "learning_rate": 6.469748743718593e-05, - "loss": 5.4014, - "step": 35638 - }, - { - "epoch": 18.58617992177314, - "grad_norm": 1.4602385759353638, - "learning_rate": 6.46964824120603e-05, - "loss": 5.1413, - "step": 35639 - }, - { - "epoch": 18.58670143415906, - "grad_norm": 1.437320590019226, - "learning_rate": 6.469547738693467e-05, - "loss": 4.6434, - "step": 35640 - }, - { - "epoch": 18.58722294654498, - "grad_norm": 1.4553226232528687, - "learning_rate": 6.469447236180904e-05, - "loss": 5.4599, - "step": 35641 - }, - { - "epoch": 18.5877444589309, - "grad_norm": 1.6096646785736084, - "learning_rate": 6.469346733668342e-05, - "loss": 5.5541, - "step": 35642 - }, - { - "epoch": 18.58826597131682, - "grad_norm": 1.6640825271606445, - "learning_rate": 6.46924623115578e-05, - "loss": 5.3014, - "step": 35643 - }, - { - "epoch": 18.58878748370274, - "grad_norm": 1.4291355609893799, - "learning_rate": 6.469145728643217e-05, - "loss": 5.8131, - "step": 35644 - }, - { - "epoch": 18.589308996088658, - "grad_norm": 1.4709298610687256, - "learning_rate": 6.469045226130654e-05, - "loss": 5.1337, - "step": 35645 - }, - { - "epoch": 18.589830508474577, - "grad_norm": 1.5349431037902832, - "learning_rate": 6.468944723618091e-05, - "loss": 5.3621, - "step": 35646 - }, - { - "epoch": 18.590352020860497, - "grad_norm": 1.4615540504455566, - "learning_rate": 6.468844221105528e-05, - "loss": 5.4458, - "step": 35647 - }, - { - "epoch": 18.590873533246416, - "grad_norm": 1.4666386842727661, - "learning_rate": 6.468743718592966e-05, - "loss": 5.1142, - "step": 35648 - }, - { - "epoch": 18.591395045632332, - "grad_norm": 1.5116899013519287, - "learning_rate": 6.468643216080402e-05, - "loss": 5.5801, - "step": 35649 - }, - { - "epoch": 18.59191655801825, - "grad_norm": 1.4550970792770386, - "learning_rate": 6.46854271356784e-05, - "loss": 5.6166, - "step": 35650 - }, - { - "epoch": 18.59243807040417, - "grad_norm": 1.4989086389541626, - "learning_rate": 6.468442211055276e-05, - "loss": 5.4623, - "step": 35651 - }, - { - "epoch": 18.59295958279009, - "grad_norm": 1.4663373231887817, - "learning_rate": 6.468341708542714e-05, - "loss": 5.11, - "step": 35652 - }, - { - "epoch": 18.59348109517601, - "grad_norm": 1.4460784196853638, - "learning_rate": 6.468241206030152e-05, - "loss": 5.7037, - "step": 35653 - }, - { - "epoch": 18.59400260756193, - "grad_norm": 1.3866466283798218, - "learning_rate": 6.468140703517588e-05, - "loss": 5.6477, - "step": 35654 - }, - { - "epoch": 18.59452411994785, - "grad_norm": 1.504508376121521, - "learning_rate": 6.468040201005026e-05, - "loss": 5.0559, - "step": 35655 - }, - { - "epoch": 18.59504563233377, - "grad_norm": 1.4549195766448975, - "learning_rate": 6.467939698492462e-05, - "loss": 5.4825, - "step": 35656 - }, - { - "epoch": 18.595567144719688, - "grad_norm": 1.5147217512130737, - "learning_rate": 6.4678391959799e-05, - "loss": 5.0815, - "step": 35657 - }, - { - "epoch": 18.596088657105607, - "grad_norm": 1.5177127122879028, - "learning_rate": 6.467738693467337e-05, - "loss": 5.6046, - "step": 35658 - }, - { - "epoch": 18.596610169491527, - "grad_norm": 1.556951880455017, - "learning_rate": 6.467638190954774e-05, - "loss": 4.6431, - "step": 35659 - }, - { - "epoch": 18.597131681877446, - "grad_norm": 1.526574730873108, - "learning_rate": 6.467537688442211e-05, - "loss": 5.2371, - "step": 35660 - }, - { - "epoch": 18.597653194263362, - "grad_norm": 1.4985172748565674, - "learning_rate": 6.467437185929649e-05, - "loss": 5.5868, - "step": 35661 - }, - { - "epoch": 18.59817470664928, - "grad_norm": 1.6932499408721924, - "learning_rate": 6.467336683417085e-05, - "loss": 5.7849, - "step": 35662 - }, - { - "epoch": 18.5986962190352, - "grad_norm": 1.4690706729888916, - "learning_rate": 6.467236180904523e-05, - "loss": 5.6624, - "step": 35663 - }, - { - "epoch": 18.59921773142112, - "grad_norm": 1.5380942821502686, - "learning_rate": 6.46713567839196e-05, - "loss": 5.2306, - "step": 35664 - }, - { - "epoch": 18.59973924380704, - "grad_norm": 1.5486863851547241, - "learning_rate": 6.467035175879397e-05, - "loss": 5.2321, - "step": 35665 - }, - { - "epoch": 18.60026075619296, - "grad_norm": 1.4356722831726074, - "learning_rate": 6.466934673366835e-05, - "loss": 5.4949, - "step": 35666 - }, - { - "epoch": 18.60078226857888, - "grad_norm": 1.4122802019119263, - "learning_rate": 6.466834170854271e-05, - "loss": 5.8333, - "step": 35667 - }, - { - "epoch": 18.6013037809648, - "grad_norm": 1.5342355966567993, - "learning_rate": 6.466733668341709e-05, - "loss": 5.2377, - "step": 35668 - }, - { - "epoch": 18.601825293350718, - "grad_norm": 1.5379911661148071, - "learning_rate": 6.466633165829145e-05, - "loss": 5.1378, - "step": 35669 - }, - { - "epoch": 18.602346805736637, - "grad_norm": 1.5277397632598877, - "learning_rate": 6.466532663316583e-05, - "loss": 5.5737, - "step": 35670 - }, - { - "epoch": 18.602868318122557, - "grad_norm": 1.4944877624511719, - "learning_rate": 6.46643216080402e-05, - "loss": 5.3562, - "step": 35671 - }, - { - "epoch": 18.603389830508476, - "grad_norm": 1.6642173528671265, - "learning_rate": 6.466331658291457e-05, - "loss": 5.2812, - "step": 35672 - }, - { - "epoch": 18.603911342894392, - "grad_norm": 1.5946834087371826, - "learning_rate": 6.466231155778895e-05, - "loss": 5.2073, - "step": 35673 - }, - { - "epoch": 18.60443285528031, - "grad_norm": 1.565137505531311, - "learning_rate": 6.466130653266333e-05, - "loss": 4.7934, - "step": 35674 - }, - { - "epoch": 18.60495436766623, - "grad_norm": 1.613046407699585, - "learning_rate": 6.46603015075377e-05, - "loss": 5.1202, - "step": 35675 - }, - { - "epoch": 18.60547588005215, - "grad_norm": 1.461641788482666, - "learning_rate": 6.465929648241207e-05, - "loss": 5.2907, - "step": 35676 - }, - { - "epoch": 18.60599739243807, - "grad_norm": 1.5208925008773804, - "learning_rate": 6.465829145728644e-05, - "loss": 5.4786, - "step": 35677 - }, - { - "epoch": 18.60651890482399, - "grad_norm": 1.5136189460754395, - "learning_rate": 6.46572864321608e-05, - "loss": 5.0732, - "step": 35678 - }, - { - "epoch": 18.60704041720991, - "grad_norm": 1.402705430984497, - "learning_rate": 6.465628140703518e-05, - "loss": 5.7, - "step": 35679 - }, - { - "epoch": 18.60756192959583, - "grad_norm": 1.5396026372909546, - "learning_rate": 6.465527638190954e-05, - "loss": 5.2216, - "step": 35680 - }, - { - "epoch": 18.608083441981748, - "grad_norm": 1.6411998271942139, - "learning_rate": 6.465427135678392e-05, - "loss": 5.3357, - "step": 35681 - }, - { - "epoch": 18.608604954367667, - "grad_norm": 1.4079591035842896, - "learning_rate": 6.465326633165828e-05, - "loss": 5.7004, - "step": 35682 - }, - { - "epoch": 18.609126466753587, - "grad_norm": 1.4946318864822388, - "learning_rate": 6.465226130653266e-05, - "loss": 5.3283, - "step": 35683 - }, - { - "epoch": 18.609647979139506, - "grad_norm": 1.5205613374710083, - "learning_rate": 6.465125628140704e-05, - "loss": 5.419, - "step": 35684 - }, - { - "epoch": 18.610169491525422, - "grad_norm": 1.369079351425171, - "learning_rate": 6.465025125628142e-05, - "loss": 4.9419, - "step": 35685 - }, - { - "epoch": 18.61069100391134, - "grad_norm": 1.4976699352264404, - "learning_rate": 6.464924623115578e-05, - "loss": 5.144, - "step": 35686 - }, - { - "epoch": 18.61121251629726, - "grad_norm": 1.4628801345825195, - "learning_rate": 6.464824120603016e-05, - "loss": 5.1055, - "step": 35687 - }, - { - "epoch": 18.61173402868318, - "grad_norm": 1.5604889392852783, - "learning_rate": 6.464723618090452e-05, - "loss": 5.4881, - "step": 35688 - }, - { - "epoch": 18.6122555410691, - "grad_norm": 1.5043004751205444, - "learning_rate": 6.46462311557789e-05, - "loss": 5.5629, - "step": 35689 - }, - { - "epoch": 18.61277705345502, - "grad_norm": 1.5009984970092773, - "learning_rate": 6.464522613065327e-05, - "loss": 5.3097, - "step": 35690 - }, - { - "epoch": 18.61329856584094, - "grad_norm": 1.4242819547653198, - "learning_rate": 6.464422110552763e-05, - "loss": 5.6209, - "step": 35691 - }, - { - "epoch": 18.61382007822686, - "grad_norm": 1.6416399478912354, - "learning_rate": 6.464321608040201e-05, - "loss": 5.1948, - "step": 35692 - }, - { - "epoch": 18.614341590612778, - "grad_norm": 1.4774624109268188, - "learning_rate": 6.464221105527639e-05, - "loss": 4.8668, - "step": 35693 - }, - { - "epoch": 18.614863102998697, - "grad_norm": 1.4252318143844604, - "learning_rate": 6.464120603015076e-05, - "loss": 5.75, - "step": 35694 - }, - { - "epoch": 18.615384615384617, - "grad_norm": 1.5123910903930664, - "learning_rate": 6.464020100502513e-05, - "loss": 5.1209, - "step": 35695 - }, - { - "epoch": 18.615906127770536, - "grad_norm": 1.38404381275177, - "learning_rate": 6.463919597989951e-05, - "loss": 5.8219, - "step": 35696 - }, - { - "epoch": 18.616427640156452, - "grad_norm": 1.5775744915008545, - "learning_rate": 6.463819095477387e-05, - "loss": 5.3173, - "step": 35697 - }, - { - "epoch": 18.61694915254237, - "grad_norm": 1.53171706199646, - "learning_rate": 6.463718592964825e-05, - "loss": 5.5244, - "step": 35698 - }, - { - "epoch": 18.61747066492829, - "grad_norm": 1.5157113075256348, - "learning_rate": 6.463618090452261e-05, - "loss": 5.4933, - "step": 35699 - }, - { - "epoch": 18.61799217731421, - "grad_norm": 1.4244669675827026, - "learning_rate": 6.463517587939699e-05, - "loss": 5.5625, - "step": 35700 - }, - { - "epoch": 18.61851368970013, - "grad_norm": 1.5404554605484009, - "learning_rate": 6.463417085427136e-05, - "loss": 4.9083, - "step": 35701 - }, - { - "epoch": 18.61903520208605, - "grad_norm": 1.511816143989563, - "learning_rate": 6.463316582914573e-05, - "loss": 5.408, - "step": 35702 - }, - { - "epoch": 18.61955671447197, - "grad_norm": 1.6500056982040405, - "learning_rate": 6.46321608040201e-05, - "loss": 4.9072, - "step": 35703 - }, - { - "epoch": 18.62007822685789, - "grad_norm": 1.4363733530044556, - "learning_rate": 6.463115577889448e-05, - "loss": 5.2935, - "step": 35704 - }, - { - "epoch": 18.620599739243808, - "grad_norm": 1.4194766283035278, - "learning_rate": 6.463015075376885e-05, - "loss": 5.5554, - "step": 35705 - }, - { - "epoch": 18.621121251629727, - "grad_norm": 1.431024193763733, - "learning_rate": 6.462914572864322e-05, - "loss": 5.423, - "step": 35706 - }, - { - "epoch": 18.621642764015647, - "grad_norm": 1.6386945247650146, - "learning_rate": 6.46281407035176e-05, - "loss": 5.0412, - "step": 35707 - }, - { - "epoch": 18.622164276401566, - "grad_norm": 1.5235464572906494, - "learning_rate": 6.462713567839196e-05, - "loss": 5.4026, - "step": 35708 - }, - { - "epoch": 18.622685788787482, - "grad_norm": 1.6194361448287964, - "learning_rate": 6.462613065326634e-05, - "loss": 4.6249, - "step": 35709 - }, - { - "epoch": 18.6232073011734, - "grad_norm": 1.5814553499221802, - "learning_rate": 6.46251256281407e-05, - "loss": 4.9726, - "step": 35710 - }, - { - "epoch": 18.62372881355932, - "grad_norm": 1.6135741472244263, - "learning_rate": 6.462412060301508e-05, - "loss": 5.2646, - "step": 35711 - }, - { - "epoch": 18.62425032594524, - "grad_norm": 1.6042035818099976, - "learning_rate": 6.462311557788944e-05, - "loss": 5.278, - "step": 35712 - }, - { - "epoch": 18.62477183833116, - "grad_norm": 1.6219844818115234, - "learning_rate": 6.462211055276382e-05, - "loss": 4.9808, - "step": 35713 - }, - { - "epoch": 18.62529335071708, - "grad_norm": 1.4816056489944458, - "learning_rate": 6.46211055276382e-05, - "loss": 5.4879, - "step": 35714 - }, - { - "epoch": 18.625814863103, - "grad_norm": 1.5391602516174316, - "learning_rate": 6.462010050251258e-05, - "loss": 5.3604, - "step": 35715 - }, - { - "epoch": 18.62633637548892, - "grad_norm": 1.5850592851638794, - "learning_rate": 6.461909547738694e-05, - "loss": 5.4922, - "step": 35716 - }, - { - "epoch": 18.626857887874838, - "grad_norm": 1.5333458185195923, - "learning_rate": 6.461809045226132e-05, - "loss": 5.6645, - "step": 35717 - }, - { - "epoch": 18.627379400260757, - "grad_norm": 1.4506787061691284, - "learning_rate": 6.461708542713568e-05, - "loss": 5.6141, - "step": 35718 - }, - { - "epoch": 18.627900912646677, - "grad_norm": 1.560341238975525, - "learning_rate": 6.461608040201005e-05, - "loss": 5.1058, - "step": 35719 - }, - { - "epoch": 18.628422425032596, - "grad_norm": 1.5256457328796387, - "learning_rate": 6.461507537688443e-05, - "loss": 5.0338, - "step": 35720 - }, - { - "epoch": 18.628943937418512, - "grad_norm": 1.544498324394226, - "learning_rate": 6.461407035175879e-05, - "loss": 5.2393, - "step": 35721 - }, - { - "epoch": 18.62946544980443, - "grad_norm": 1.4463434219360352, - "learning_rate": 6.461306532663317e-05, - "loss": 4.3408, - "step": 35722 - }, - { - "epoch": 18.62998696219035, - "grad_norm": 1.5248974561691284, - "learning_rate": 6.461206030150753e-05, - "loss": 5.1851, - "step": 35723 - }, - { - "epoch": 18.63050847457627, - "grad_norm": 1.4676568508148193, - "learning_rate": 6.461105527638191e-05, - "loss": 5.5468, - "step": 35724 - }, - { - "epoch": 18.63102998696219, - "grad_norm": 1.448968529701233, - "learning_rate": 6.461005025125629e-05, - "loss": 4.7021, - "step": 35725 - }, - { - "epoch": 18.63155149934811, - "grad_norm": 1.4675486087799072, - "learning_rate": 6.460904522613067e-05, - "loss": 5.4268, - "step": 35726 - }, - { - "epoch": 18.63207301173403, - "grad_norm": 1.551528811454773, - "learning_rate": 6.460804020100503e-05, - "loss": 4.8253, - "step": 35727 - }, - { - "epoch": 18.63259452411995, - "grad_norm": 1.5216543674468994, - "learning_rate": 6.460703517587941e-05, - "loss": 5.5637, - "step": 35728 - }, - { - "epoch": 18.633116036505868, - "grad_norm": 1.447148323059082, - "learning_rate": 6.460603015075377e-05, - "loss": 5.1024, - "step": 35729 - }, - { - "epoch": 18.633637548891787, - "grad_norm": 1.4456433057785034, - "learning_rate": 6.460502512562815e-05, - "loss": 5.6436, - "step": 35730 - }, - { - "epoch": 18.634159061277707, - "grad_norm": 1.518314242362976, - "learning_rate": 6.460402010050251e-05, - "loss": 4.7601, - "step": 35731 - }, - { - "epoch": 18.634680573663623, - "grad_norm": 1.5895026922225952, - "learning_rate": 6.460301507537688e-05, - "loss": 5.2427, - "step": 35732 - }, - { - "epoch": 18.635202086049542, - "grad_norm": 1.4320745468139648, - "learning_rate": 6.460201005025126e-05, - "loss": 5.6824, - "step": 35733 - }, - { - "epoch": 18.63572359843546, - "grad_norm": 1.590407371520996, - "learning_rate": 6.460100502512563e-05, - "loss": 4.0585, - "step": 35734 - }, - { - "epoch": 18.63624511082138, - "grad_norm": 1.6626660823822021, - "learning_rate": 6.460000000000001e-05, - "loss": 5.2052, - "step": 35735 - }, - { - "epoch": 18.6367666232073, - "grad_norm": 1.5386260747909546, - "learning_rate": 6.459899497487438e-05, - "loss": 4.9133, - "step": 35736 - }, - { - "epoch": 18.63728813559322, - "grad_norm": 1.3685499429702759, - "learning_rate": 6.459798994974875e-05, - "loss": 5.8768, - "step": 35737 - }, - { - "epoch": 18.63780964797914, - "grad_norm": 1.5671584606170654, - "learning_rate": 6.459698492462312e-05, - "loss": 4.8872, - "step": 35738 - }, - { - "epoch": 18.63833116036506, - "grad_norm": 1.6712560653686523, - "learning_rate": 6.45959798994975e-05, - "loss": 4.5802, - "step": 35739 - }, - { - "epoch": 18.63885267275098, - "grad_norm": 1.5600121021270752, - "learning_rate": 6.459497487437186e-05, - "loss": 5.3992, - "step": 35740 - }, - { - "epoch": 18.639374185136898, - "grad_norm": 1.4680752754211426, - "learning_rate": 6.459396984924624e-05, - "loss": 5.5519, - "step": 35741 - }, - { - "epoch": 18.639895697522817, - "grad_norm": 1.5317106246948242, - "learning_rate": 6.45929648241206e-05, - "loss": 5.2691, - "step": 35742 - }, - { - "epoch": 18.640417209908737, - "grad_norm": 1.4375470876693726, - "learning_rate": 6.459195979899498e-05, - "loss": 5.1133, - "step": 35743 - }, - { - "epoch": 18.640938722294656, - "grad_norm": 1.5963140726089478, - "learning_rate": 6.459095477386934e-05, - "loss": 5.2218, - "step": 35744 - }, - { - "epoch": 18.641460234680572, - "grad_norm": 1.4930917024612427, - "learning_rate": 6.458994974874372e-05, - "loss": 5.4117, - "step": 35745 - }, - { - "epoch": 18.64198174706649, - "grad_norm": 1.5607619285583496, - "learning_rate": 6.45889447236181e-05, - "loss": 5.1923, - "step": 35746 - }, - { - "epoch": 18.64250325945241, - "grad_norm": 1.4626344442367554, - "learning_rate": 6.458793969849246e-05, - "loss": 5.5924, - "step": 35747 - }, - { - "epoch": 18.64302477183833, - "grad_norm": 1.4379467964172363, - "learning_rate": 6.458693467336684e-05, - "loss": 5.3966, - "step": 35748 - }, - { - "epoch": 18.64354628422425, - "grad_norm": 1.4184541702270508, - "learning_rate": 6.45859296482412e-05, - "loss": 5.8415, - "step": 35749 - }, - { - "epoch": 18.64406779661017, - "grad_norm": 1.500709891319275, - "learning_rate": 6.458492462311558e-05, - "loss": 5.0863, - "step": 35750 - }, - { - "epoch": 18.64458930899609, - "grad_norm": 1.3310719728469849, - "learning_rate": 6.458391959798995e-05, - "loss": 5.6213, - "step": 35751 - }, - { - "epoch": 18.64511082138201, - "grad_norm": 1.4872798919677734, - "learning_rate": 6.458291457286433e-05, - "loss": 5.5593, - "step": 35752 - }, - { - "epoch": 18.645632333767928, - "grad_norm": 1.587708592414856, - "learning_rate": 6.458190954773869e-05, - "loss": 5.3322, - "step": 35753 - }, - { - "epoch": 18.646153846153847, - "grad_norm": 1.401796817779541, - "learning_rate": 6.458090452261307e-05, - "loss": 5.5252, - "step": 35754 - }, - { - "epoch": 18.646675358539767, - "grad_norm": 1.4356905221939087, - "learning_rate": 6.457989949748743e-05, - "loss": 5.2006, - "step": 35755 - }, - { - "epoch": 18.647196870925683, - "grad_norm": 1.4411189556121826, - "learning_rate": 6.457889447236181e-05, - "loss": 5.6274, - "step": 35756 - }, - { - "epoch": 18.647718383311602, - "grad_norm": 1.4890936613082886, - "learning_rate": 6.457788944723619e-05, - "loss": 4.2956, - "step": 35757 - }, - { - "epoch": 18.64823989569752, - "grad_norm": 1.516455888748169, - "learning_rate": 6.457688442211055e-05, - "loss": 5.1727, - "step": 35758 - }, - { - "epoch": 18.64876140808344, - "grad_norm": 1.4456534385681152, - "learning_rate": 6.457587939698493e-05, - "loss": 4.9408, - "step": 35759 - }, - { - "epoch": 18.64928292046936, - "grad_norm": 1.503006100654602, - "learning_rate": 6.45748743718593e-05, - "loss": 4.8298, - "step": 35760 - }, - { - "epoch": 18.64980443285528, - "grad_norm": 1.4841952323913574, - "learning_rate": 6.457386934673367e-05, - "loss": 5.1959, - "step": 35761 - }, - { - "epoch": 18.6503259452412, - "grad_norm": 1.4460926055908203, - "learning_rate": 6.457286432160804e-05, - "loss": 5.0603, - "step": 35762 - }, - { - "epoch": 18.65084745762712, - "grad_norm": 1.4572546482086182, - "learning_rate": 6.457185929648241e-05, - "loss": 5.3988, - "step": 35763 - }, - { - "epoch": 18.65136897001304, - "grad_norm": 1.5059317350387573, - "learning_rate": 6.457085427135678e-05, - "loss": 4.9519, - "step": 35764 - }, - { - "epoch": 18.651890482398958, - "grad_norm": 1.4994629621505737, - "learning_rate": 6.456984924623116e-05, - "loss": 5.282, - "step": 35765 - }, - { - "epoch": 18.652411994784877, - "grad_norm": 1.5077558755874634, - "learning_rate": 6.456884422110553e-05, - "loss": 5.3685, - "step": 35766 - }, - { - "epoch": 18.652933507170797, - "grad_norm": 1.5312817096710205, - "learning_rate": 6.456783919597991e-05, - "loss": 5.5764, - "step": 35767 - }, - { - "epoch": 18.653455019556713, - "grad_norm": 1.5252366065979004, - "learning_rate": 6.456683417085428e-05, - "loss": 5.3226, - "step": 35768 - }, - { - "epoch": 18.653976531942632, - "grad_norm": 1.3596715927124023, - "learning_rate": 6.456582914572865e-05, - "loss": 5.691, - "step": 35769 - }, - { - "epoch": 18.65449804432855, - "grad_norm": 1.4140753746032715, - "learning_rate": 6.456482412060302e-05, - "loss": 5.9027, - "step": 35770 - }, - { - "epoch": 18.65501955671447, - "grad_norm": 1.514192819595337, - "learning_rate": 6.456381909547738e-05, - "loss": 5.4687, - "step": 35771 - }, - { - "epoch": 18.65554106910039, - "grad_norm": 1.4447715282440186, - "learning_rate": 6.456281407035176e-05, - "loss": 5.5384, - "step": 35772 - }, - { - "epoch": 18.65606258148631, - "grad_norm": 1.5164729356765747, - "learning_rate": 6.456180904522613e-05, - "loss": 5.6949, - "step": 35773 - }, - { - "epoch": 18.65658409387223, - "grad_norm": 1.4898416996002197, - "learning_rate": 6.45608040201005e-05, - "loss": 5.2513, - "step": 35774 - }, - { - "epoch": 18.65710560625815, - "grad_norm": 1.6705743074417114, - "learning_rate": 6.455979899497487e-05, - "loss": 5.2746, - "step": 35775 - }, - { - "epoch": 18.65762711864407, - "grad_norm": 1.5918811559677124, - "learning_rate": 6.455879396984925e-05, - "loss": 5.0631, - "step": 35776 - }, - { - "epoch": 18.658148631029988, - "grad_norm": 1.5300389528274536, - "learning_rate": 6.455778894472362e-05, - "loss": 5.1861, - "step": 35777 - }, - { - "epoch": 18.658670143415907, - "grad_norm": 1.5191282033920288, - "learning_rate": 6.4556783919598e-05, - "loss": 5.0186, - "step": 35778 - }, - { - "epoch": 18.659191655801827, - "grad_norm": 1.5296348333358765, - "learning_rate": 6.455577889447237e-05, - "loss": 5.4964, - "step": 35779 - }, - { - "epoch": 18.659713168187743, - "grad_norm": 1.552445888519287, - "learning_rate": 6.455477386934674e-05, - "loss": 5.1891, - "step": 35780 - }, - { - "epoch": 18.660234680573662, - "grad_norm": 1.6074129343032837, - "learning_rate": 6.455376884422111e-05, - "loss": 5.1237, - "step": 35781 - }, - { - "epoch": 18.66075619295958, - "grad_norm": 1.5651556253433228, - "learning_rate": 6.455276381909548e-05, - "loss": 5.4602, - "step": 35782 - }, - { - "epoch": 18.6612777053455, - "grad_norm": 1.4472140073776245, - "learning_rate": 6.455175879396985e-05, - "loss": 5.3256, - "step": 35783 - }, - { - "epoch": 18.66179921773142, - "grad_norm": 1.5698143243789673, - "learning_rate": 6.455075376884421e-05, - "loss": 5.9515, - "step": 35784 - }, - { - "epoch": 18.66232073011734, - "grad_norm": 1.5940662622451782, - "learning_rate": 6.454974874371859e-05, - "loss": 5.0846, - "step": 35785 - }, - { - "epoch": 18.66284224250326, - "grad_norm": 1.4774161577224731, - "learning_rate": 6.454874371859297e-05, - "loss": 5.4936, - "step": 35786 - }, - { - "epoch": 18.66336375488918, - "grad_norm": 1.499632477760315, - "learning_rate": 6.454773869346735e-05, - "loss": 5.398, - "step": 35787 - }, - { - "epoch": 18.6638852672751, - "grad_norm": 1.4496902227401733, - "learning_rate": 6.454673366834171e-05, - "loss": 5.4012, - "step": 35788 - }, - { - "epoch": 18.664406779661018, - "grad_norm": 1.5756906270980835, - "learning_rate": 6.454572864321609e-05, - "loss": 5.1978, - "step": 35789 - }, - { - "epoch": 18.664928292046937, - "grad_norm": 1.5237839221954346, - "learning_rate": 6.454472361809045e-05, - "loss": 5.5109, - "step": 35790 - }, - { - "epoch": 18.665449804432857, - "grad_norm": 1.494284987449646, - "learning_rate": 6.454371859296483e-05, - "loss": 5.1215, - "step": 35791 - }, - { - "epoch": 18.665971316818773, - "grad_norm": 1.5743721723556519, - "learning_rate": 6.45427135678392e-05, - "loss": 5.106, - "step": 35792 - }, - { - "epoch": 18.666492829204692, - "grad_norm": 1.548122763633728, - "learning_rate": 6.454170854271357e-05, - "loss": 4.8674, - "step": 35793 - }, - { - "epoch": 18.667014341590612, - "grad_norm": 1.5801239013671875, - "learning_rate": 6.454070351758794e-05, - "loss": 4.8568, - "step": 35794 - }, - { - "epoch": 18.66753585397653, - "grad_norm": 1.7037566900253296, - "learning_rate": 6.453969849246232e-05, - "loss": 5.6387, - "step": 35795 - }, - { - "epoch": 18.66805736636245, - "grad_norm": 1.6037253141403198, - "learning_rate": 6.453869346733668e-05, - "loss": 5.3808, - "step": 35796 - }, - { - "epoch": 18.66857887874837, - "grad_norm": 1.4332889318466187, - "learning_rate": 6.453768844221106e-05, - "loss": 4.8014, - "step": 35797 - }, - { - "epoch": 18.66910039113429, - "grad_norm": 1.535971760749817, - "learning_rate": 6.453668341708544e-05, - "loss": 5.2199, - "step": 35798 - }, - { - "epoch": 18.66962190352021, - "grad_norm": 1.5308573246002197, - "learning_rate": 6.45356783919598e-05, - "loss": 5.2749, - "step": 35799 - }, - { - "epoch": 18.67014341590613, - "grad_norm": 1.5455416440963745, - "learning_rate": 6.453467336683418e-05, - "loss": 5.3524, - "step": 35800 - }, - { - "epoch": 18.670664928292048, - "grad_norm": 1.5711168050765991, - "learning_rate": 6.453366834170854e-05, - "loss": 5.0899, - "step": 35801 - }, - { - "epoch": 18.671186440677968, - "grad_norm": 1.5153120756149292, - "learning_rate": 6.453266331658292e-05, - "loss": 5.2082, - "step": 35802 - }, - { - "epoch": 18.671707953063887, - "grad_norm": 1.5068107843399048, - "learning_rate": 6.453165829145728e-05, - "loss": 5.2647, - "step": 35803 - }, - { - "epoch": 18.672229465449803, - "grad_norm": 1.542945384979248, - "learning_rate": 6.453065326633166e-05, - "loss": 5.39, - "step": 35804 - }, - { - "epoch": 18.672750977835722, - "grad_norm": 1.4741911888122559, - "learning_rate": 6.452964824120603e-05, - "loss": 4.944, - "step": 35805 - }, - { - "epoch": 18.673272490221642, - "grad_norm": 1.69082510471344, - "learning_rate": 6.45286432160804e-05, - "loss": 5.0588, - "step": 35806 - }, - { - "epoch": 18.67379400260756, - "grad_norm": 1.5544863939285278, - "learning_rate": 6.452763819095478e-05, - "loss": 5.4232, - "step": 35807 - }, - { - "epoch": 18.67431551499348, - "grad_norm": 1.5561741590499878, - "learning_rate": 6.452663316582916e-05, - "loss": 5.571, - "step": 35808 - }, - { - "epoch": 18.6748370273794, - "grad_norm": 1.4946218729019165, - "learning_rate": 6.452562814070352e-05, - "loss": 5.3819, - "step": 35809 - }, - { - "epoch": 18.67535853976532, - "grad_norm": 1.4943900108337402, - "learning_rate": 6.45246231155779e-05, - "loss": 5.3449, - "step": 35810 - }, - { - "epoch": 18.67588005215124, - "grad_norm": 1.4619845151901245, - "learning_rate": 6.452361809045227e-05, - "loss": 5.1864, - "step": 35811 - }, - { - "epoch": 18.67640156453716, - "grad_norm": 1.610729455947876, - "learning_rate": 6.452261306532663e-05, - "loss": 5.2922, - "step": 35812 - }, - { - "epoch": 18.676923076923078, - "grad_norm": 1.5920456647872925, - "learning_rate": 6.452160804020101e-05, - "loss": 4.7326, - "step": 35813 - }, - { - "epoch": 18.677444589308998, - "grad_norm": 1.4704784154891968, - "learning_rate": 6.452060301507537e-05, - "loss": 5.3544, - "step": 35814 - }, - { - "epoch": 18.677966101694913, - "grad_norm": 1.4732096195220947, - "learning_rate": 6.451959798994975e-05, - "loss": 5.5782, - "step": 35815 - }, - { - "epoch": 18.678487614080833, - "grad_norm": 1.6220703125, - "learning_rate": 6.451859296482411e-05, - "loss": 4.6908, - "step": 35816 - }, - { - "epoch": 18.679009126466752, - "grad_norm": 1.4792379140853882, - "learning_rate": 6.451758793969849e-05, - "loss": 5.1441, - "step": 35817 - }, - { - "epoch": 18.679530638852672, - "grad_norm": 1.550882339477539, - "learning_rate": 6.451658291457287e-05, - "loss": 4.8606, - "step": 35818 - }, - { - "epoch": 18.68005215123859, - "grad_norm": 1.5322669744491577, - "learning_rate": 6.451557788944725e-05, - "loss": 5.408, - "step": 35819 - }, - { - "epoch": 18.68057366362451, - "grad_norm": 1.508384346961975, - "learning_rate": 6.451457286432161e-05, - "loss": 5.418, - "step": 35820 - }, - { - "epoch": 18.68109517601043, - "grad_norm": 1.496322751045227, - "learning_rate": 6.451356783919599e-05, - "loss": 5.5945, - "step": 35821 - }, - { - "epoch": 18.68161668839635, - "grad_norm": 1.4665178060531616, - "learning_rate": 6.451256281407035e-05, - "loss": 5.4386, - "step": 35822 - }, - { - "epoch": 18.68213820078227, - "grad_norm": 1.4962414503097534, - "learning_rate": 6.451155778894473e-05, - "loss": 5.5519, - "step": 35823 - }, - { - "epoch": 18.68265971316819, - "grad_norm": 1.465462327003479, - "learning_rate": 6.45105527638191e-05, - "loss": 5.5345, - "step": 35824 - }, - { - "epoch": 18.683181225554108, - "grad_norm": 1.5143828392028809, - "learning_rate": 6.450954773869346e-05, - "loss": 5.6081, - "step": 35825 - }, - { - "epoch": 18.683702737940028, - "grad_norm": 1.6393991708755493, - "learning_rate": 6.450854271356784e-05, - "loss": 5.1193, - "step": 35826 - }, - { - "epoch": 18.684224250325947, - "grad_norm": 1.409268856048584, - "learning_rate": 6.450753768844222e-05, - "loss": 5.3908, - "step": 35827 - }, - { - "epoch": 18.684745762711863, - "grad_norm": 1.5955629348754883, - "learning_rate": 6.45065326633166e-05, - "loss": 5.5188, - "step": 35828 - }, - { - "epoch": 18.685267275097782, - "grad_norm": 1.5691931247711182, - "learning_rate": 6.450552763819096e-05, - "loss": 5.5986, - "step": 35829 - }, - { - "epoch": 18.685788787483702, - "grad_norm": 1.5182652473449707, - "learning_rate": 6.450452261306534e-05, - "loss": 5.2076, - "step": 35830 - }, - { - "epoch": 18.68631029986962, - "grad_norm": 1.515506386756897, - "learning_rate": 6.45035175879397e-05, - "loss": 5.7458, - "step": 35831 - }, - { - "epoch": 18.68683181225554, - "grad_norm": 1.483201503753662, - "learning_rate": 6.450251256281408e-05, - "loss": 5.3837, - "step": 35832 - }, - { - "epoch": 18.68735332464146, - "grad_norm": 1.503985047340393, - "learning_rate": 6.450150753768844e-05, - "loss": 5.5059, - "step": 35833 - }, - { - "epoch": 18.68787483702738, - "grad_norm": 1.4424192905426025, - "learning_rate": 6.450050251256282e-05, - "loss": 5.5429, - "step": 35834 - }, - { - "epoch": 18.6883963494133, - "grad_norm": 1.4684611558914185, - "learning_rate": 6.449949748743718e-05, - "loss": 5.5643, - "step": 35835 - }, - { - "epoch": 18.68891786179922, - "grad_norm": 1.6057968139648438, - "learning_rate": 6.449849246231156e-05, - "loss": 4.6394, - "step": 35836 - }, - { - "epoch": 18.689439374185138, - "grad_norm": 1.4764420986175537, - "learning_rate": 6.449748743718593e-05, - "loss": 5.3702, - "step": 35837 - }, - { - "epoch": 18.689960886571058, - "grad_norm": 1.584904670715332, - "learning_rate": 6.44964824120603e-05, - "loss": 5.1189, - "step": 35838 - }, - { - "epoch": 18.690482398956973, - "grad_norm": 1.5156574249267578, - "learning_rate": 6.449547738693468e-05, - "loss": 5.2726, - "step": 35839 - }, - { - "epoch": 18.691003911342893, - "grad_norm": 1.587459921836853, - "learning_rate": 6.449447236180905e-05, - "loss": 5.3815, - "step": 35840 - }, - { - "epoch": 18.691525423728812, - "grad_norm": 1.5527766942977905, - "learning_rate": 6.449346733668342e-05, - "loss": 5.427, - "step": 35841 - }, - { - "epoch": 18.692046936114732, - "grad_norm": 1.412217378616333, - "learning_rate": 6.449246231155779e-05, - "loss": 5.6249, - "step": 35842 - }, - { - "epoch": 18.69256844850065, - "grad_norm": 1.498233437538147, - "learning_rate": 6.449145728643217e-05, - "loss": 5.2736, - "step": 35843 - }, - { - "epoch": 18.69308996088657, - "grad_norm": 1.467262625694275, - "learning_rate": 6.449045226130653e-05, - "loss": 5.7096, - "step": 35844 - }, - { - "epoch": 18.69361147327249, - "grad_norm": 1.629478096961975, - "learning_rate": 6.448944723618091e-05, - "loss": 5.2919, - "step": 35845 - }, - { - "epoch": 18.69413298565841, - "grad_norm": 1.5553669929504395, - "learning_rate": 6.448844221105527e-05, - "loss": 4.806, - "step": 35846 - }, - { - "epoch": 18.69465449804433, - "grad_norm": 1.4115216732025146, - "learning_rate": 6.448743718592965e-05, - "loss": 5.396, - "step": 35847 - }, - { - "epoch": 18.69517601043025, - "grad_norm": 1.467923879623413, - "learning_rate": 6.448643216080403e-05, - "loss": 5.2609, - "step": 35848 - }, - { - "epoch": 18.695697522816168, - "grad_norm": 1.53794527053833, - "learning_rate": 6.44854271356784e-05, - "loss": 5.3818, - "step": 35849 - }, - { - "epoch": 18.696219035202088, - "grad_norm": 1.4258854389190674, - "learning_rate": 6.448442211055277e-05, - "loss": 5.5802, - "step": 35850 - }, - { - "epoch": 18.696740547588004, - "grad_norm": 1.5675604343414307, - "learning_rate": 6.448341708542713e-05, - "loss": 4.7035, - "step": 35851 - }, - { - "epoch": 18.697262059973923, - "grad_norm": 1.5047305822372437, - "learning_rate": 6.448241206030151e-05, - "loss": 5.5237, - "step": 35852 - }, - { - "epoch": 18.697783572359842, - "grad_norm": 1.460528016090393, - "learning_rate": 6.448140703517588e-05, - "loss": 5.5958, - "step": 35853 - }, - { - "epoch": 18.698305084745762, - "grad_norm": 1.5070680379867554, - "learning_rate": 6.448040201005025e-05, - "loss": 5.4623, - "step": 35854 - }, - { - "epoch": 18.69882659713168, - "grad_norm": 1.4966051578521729, - "learning_rate": 6.447939698492462e-05, - "loss": 5.1239, - "step": 35855 - }, - { - "epoch": 18.6993481095176, - "grad_norm": 1.4453483819961548, - "learning_rate": 6.4478391959799e-05, - "loss": 5.1837, - "step": 35856 - }, - { - "epoch": 18.69986962190352, - "grad_norm": 1.5746926069259644, - "learning_rate": 6.447738693467336e-05, - "loss": 4.7999, - "step": 35857 - }, - { - "epoch": 18.70039113428944, - "grad_norm": 1.473960041999817, - "learning_rate": 6.447638190954774e-05, - "loss": 5.5455, - "step": 35858 - }, - { - "epoch": 18.70091264667536, - "grad_norm": 1.5786218643188477, - "learning_rate": 6.447537688442212e-05, - "loss": 4.5817, - "step": 35859 - }, - { - "epoch": 18.70143415906128, - "grad_norm": 1.434946894645691, - "learning_rate": 6.44743718592965e-05, - "loss": 5.5126, - "step": 35860 - }, - { - "epoch": 18.701955671447198, - "grad_norm": 1.4372460842132568, - "learning_rate": 6.447336683417086e-05, - "loss": 5.6747, - "step": 35861 - }, - { - "epoch": 18.702477183833118, - "grad_norm": 1.4418162107467651, - "learning_rate": 6.447236180904524e-05, - "loss": 5.4345, - "step": 35862 - }, - { - "epoch": 18.702998696219034, - "grad_norm": 1.539210557937622, - "learning_rate": 6.44713567839196e-05, - "loss": 4.9327, - "step": 35863 - }, - { - "epoch": 18.703520208604953, - "grad_norm": 1.581088900566101, - "learning_rate": 6.447035175879397e-05, - "loss": 5.1434, - "step": 35864 - }, - { - "epoch": 18.704041720990872, - "grad_norm": 1.559638500213623, - "learning_rate": 6.446934673366834e-05, - "loss": 5.2323, - "step": 35865 - }, - { - "epoch": 18.704563233376792, - "grad_norm": 1.4857606887817383, - "learning_rate": 6.446834170854271e-05, - "loss": 5.1399, - "step": 35866 - }, - { - "epoch": 18.70508474576271, - "grad_norm": 1.5214449167251587, - "learning_rate": 6.446733668341709e-05, - "loss": 5.093, - "step": 35867 - }, - { - "epoch": 18.70560625814863, - "grad_norm": 1.612060785293579, - "learning_rate": 6.446633165829146e-05, - "loss": 4.7051, - "step": 35868 - }, - { - "epoch": 18.70612777053455, - "grad_norm": 1.4832314252853394, - "learning_rate": 6.446532663316584e-05, - "loss": 5.4856, - "step": 35869 - }, - { - "epoch": 18.70664928292047, - "grad_norm": 1.464050054550171, - "learning_rate": 6.44643216080402e-05, - "loss": 5.5356, - "step": 35870 - }, - { - "epoch": 18.70717079530639, - "grad_norm": 1.510913610458374, - "learning_rate": 6.446331658291458e-05, - "loss": 5.4065, - "step": 35871 - }, - { - "epoch": 18.70769230769231, - "grad_norm": 1.4881433248519897, - "learning_rate": 6.446231155778895e-05, - "loss": 5.671, - "step": 35872 - }, - { - "epoch": 18.708213820078228, - "grad_norm": 1.5408648252487183, - "learning_rate": 6.446130653266333e-05, - "loss": 5.3345, - "step": 35873 - }, - { - "epoch": 18.708735332464148, - "grad_norm": 1.5621751546859741, - "learning_rate": 6.446030150753769e-05, - "loss": 5.1055, - "step": 35874 - }, - { - "epoch": 18.709256844850064, - "grad_norm": 1.6275897026062012, - "learning_rate": 6.445929648241207e-05, - "loss": 4.4156, - "step": 35875 - }, - { - "epoch": 18.709778357235983, - "grad_norm": 1.5488805770874023, - "learning_rate": 6.445829145728643e-05, - "loss": 5.2016, - "step": 35876 - }, - { - "epoch": 18.710299869621903, - "grad_norm": 1.416773796081543, - "learning_rate": 6.44572864321608e-05, - "loss": 5.5555, - "step": 35877 - }, - { - "epoch": 18.710821382007822, - "grad_norm": 1.5676122903823853, - "learning_rate": 6.445628140703517e-05, - "loss": 5.4308, - "step": 35878 - }, - { - "epoch": 18.71134289439374, - "grad_norm": 1.4382609128952026, - "learning_rate": 6.445527638190955e-05, - "loss": 4.8076, - "step": 35879 - }, - { - "epoch": 18.71186440677966, - "grad_norm": 1.58767569065094, - "learning_rate": 6.445427135678393e-05, - "loss": 5.0166, - "step": 35880 - }, - { - "epoch": 18.71238591916558, - "grad_norm": 1.406355381011963, - "learning_rate": 6.44532663316583e-05, - "loss": 5.5933, - "step": 35881 - }, - { - "epoch": 18.7129074315515, - "grad_norm": 1.4473050832748413, - "learning_rate": 6.445226130653267e-05, - "loss": 5.1367, - "step": 35882 - }, - { - "epoch": 18.71342894393742, - "grad_norm": 1.3843144178390503, - "learning_rate": 6.445125628140704e-05, - "loss": 5.4483, - "step": 35883 - }, - { - "epoch": 18.71395045632334, - "grad_norm": 1.4565273523330688, - "learning_rate": 6.445025125628141e-05, - "loss": 5.4261, - "step": 35884 - }, - { - "epoch": 18.714471968709258, - "grad_norm": 1.4990873336791992, - "learning_rate": 6.444924623115578e-05, - "loss": 5.1823, - "step": 35885 - }, - { - "epoch": 18.714993481095178, - "grad_norm": 1.5340697765350342, - "learning_rate": 6.444824120603016e-05, - "loss": 5.4962, - "step": 35886 - }, - { - "epoch": 18.715514993481094, - "grad_norm": 1.3688395023345947, - "learning_rate": 6.444723618090452e-05, - "loss": 5.1884, - "step": 35887 - }, - { - "epoch": 18.716036505867013, - "grad_norm": 1.5042369365692139, - "learning_rate": 6.44462311557789e-05, - "loss": 5.3413, - "step": 35888 - }, - { - "epoch": 18.716558018252933, - "grad_norm": 1.5028636455535889, - "learning_rate": 6.444522613065328e-05, - "loss": 4.8842, - "step": 35889 - }, - { - "epoch": 18.717079530638852, - "grad_norm": 1.5264544486999512, - "learning_rate": 6.444422110552765e-05, - "loss": 4.9118, - "step": 35890 - }, - { - "epoch": 18.71760104302477, - "grad_norm": 1.4580771923065186, - "learning_rate": 6.444321608040202e-05, - "loss": 5.2195, - "step": 35891 - }, - { - "epoch": 18.71812255541069, - "grad_norm": 1.4936691522598267, - "learning_rate": 6.444221105527638e-05, - "loss": 4.8478, - "step": 35892 - }, - { - "epoch": 18.71864406779661, - "grad_norm": 1.5331989526748657, - "learning_rate": 6.444120603015076e-05, - "loss": 5.5033, - "step": 35893 - }, - { - "epoch": 18.71916558018253, - "grad_norm": 1.5714527368545532, - "learning_rate": 6.444020100502512e-05, - "loss": 5.6474, - "step": 35894 - }, - { - "epoch": 18.71968709256845, - "grad_norm": 1.5396742820739746, - "learning_rate": 6.44391959798995e-05, - "loss": 5.3222, - "step": 35895 - }, - { - "epoch": 18.72020860495437, - "grad_norm": 1.5309150218963623, - "learning_rate": 6.443819095477387e-05, - "loss": 5.1256, - "step": 35896 - }, - { - "epoch": 18.72073011734029, - "grad_norm": 1.538632869720459, - "learning_rate": 6.443718592964824e-05, - "loss": 5.451, - "step": 35897 - }, - { - "epoch": 18.721251629726208, - "grad_norm": 1.4195797443389893, - "learning_rate": 6.443618090452261e-05, - "loss": 5.5289, - "step": 35898 - }, - { - "epoch": 18.721773142112124, - "grad_norm": 1.565274953842163, - "learning_rate": 6.443517587939699e-05, - "loss": 5.5902, - "step": 35899 - }, - { - "epoch": 18.722294654498043, - "grad_norm": 1.4483401775360107, - "learning_rate": 6.443417085427136e-05, - "loss": 5.5122, - "step": 35900 - }, - { - "epoch": 18.722816166883963, - "grad_norm": 1.4502251148223877, - "learning_rate": 6.443316582914574e-05, - "loss": 5.0233, - "step": 35901 - }, - { - "epoch": 18.723337679269882, - "grad_norm": 1.4447886943817139, - "learning_rate": 6.44321608040201e-05, - "loss": 5.5414, - "step": 35902 - }, - { - "epoch": 18.7238591916558, - "grad_norm": 1.502145528793335, - "learning_rate": 6.443115577889448e-05, - "loss": 5.1978, - "step": 35903 - }, - { - "epoch": 18.72438070404172, - "grad_norm": 1.3791937828063965, - "learning_rate": 6.443015075376885e-05, - "loss": 5.7487, - "step": 35904 - }, - { - "epoch": 18.72490221642764, - "grad_norm": 1.5792959928512573, - "learning_rate": 6.442914572864321e-05, - "loss": 5.1401, - "step": 35905 - }, - { - "epoch": 18.72542372881356, - "grad_norm": 1.5177900791168213, - "learning_rate": 6.442814070351759e-05, - "loss": 5.2445, - "step": 35906 - }, - { - "epoch": 18.72594524119948, - "grad_norm": 1.4903252124786377, - "learning_rate": 6.442713567839195e-05, - "loss": 4.7671, - "step": 35907 - }, - { - "epoch": 18.7264667535854, - "grad_norm": 1.5932835340499878, - "learning_rate": 6.442613065326633e-05, - "loss": 5.1247, - "step": 35908 - }, - { - "epoch": 18.72698826597132, - "grad_norm": 1.6513744592666626, - "learning_rate": 6.44251256281407e-05, - "loss": 5.3099, - "step": 35909 - }, - { - "epoch": 18.727509778357238, - "grad_norm": 1.5338518619537354, - "learning_rate": 6.442412060301507e-05, - "loss": 5.397, - "step": 35910 - }, - { - "epoch": 18.728031290743154, - "grad_norm": 1.580959439277649, - "learning_rate": 6.442311557788945e-05, - "loss": 4.9072, - "step": 35911 - }, - { - "epoch": 18.728552803129073, - "grad_norm": 1.5294533967971802, - "learning_rate": 6.442211055276383e-05, - "loss": 5.3437, - "step": 35912 - }, - { - "epoch": 18.729074315514993, - "grad_norm": 1.4677773714065552, - "learning_rate": 6.44211055276382e-05, - "loss": 5.2594, - "step": 35913 - }, - { - "epoch": 18.729595827900912, - "grad_norm": 1.4779075384140015, - "learning_rate": 6.442010050251257e-05, - "loss": 4.7745, - "step": 35914 - }, - { - "epoch": 18.73011734028683, - "grad_norm": 1.4289888143539429, - "learning_rate": 6.441909547738694e-05, - "loss": 5.6996, - "step": 35915 - }, - { - "epoch": 18.73063885267275, - "grad_norm": 1.40090811252594, - "learning_rate": 6.441809045226131e-05, - "loss": 5.7723, - "step": 35916 - }, - { - "epoch": 18.73116036505867, - "grad_norm": 1.579135775566101, - "learning_rate": 6.441708542713568e-05, - "loss": 5.2758, - "step": 35917 - }, - { - "epoch": 18.73168187744459, - "grad_norm": 1.5520106554031372, - "learning_rate": 6.441608040201004e-05, - "loss": 5.1556, - "step": 35918 - }, - { - "epoch": 18.73220338983051, - "grad_norm": 1.5295913219451904, - "learning_rate": 6.441507537688442e-05, - "loss": 5.3044, - "step": 35919 - }, - { - "epoch": 18.73272490221643, - "grad_norm": 1.4292582273483276, - "learning_rate": 6.44140703517588e-05, - "loss": 5.7273, - "step": 35920 - }, - { - "epoch": 18.73324641460235, - "grad_norm": 1.5562400817871094, - "learning_rate": 6.441306532663318e-05, - "loss": 5.1322, - "step": 35921 - }, - { - "epoch": 18.733767926988264, - "grad_norm": 1.413487195968628, - "learning_rate": 6.441206030150754e-05, - "loss": 5.4699, - "step": 35922 - }, - { - "epoch": 18.734289439374184, - "grad_norm": 1.5121992826461792, - "learning_rate": 6.441105527638192e-05, - "loss": 5.4812, - "step": 35923 - }, - { - "epoch": 18.734810951760103, - "grad_norm": 1.4073642492294312, - "learning_rate": 6.441005025125628e-05, - "loss": 5.3461, - "step": 35924 - }, - { - "epoch": 18.735332464146023, - "grad_norm": 1.479889154434204, - "learning_rate": 6.440904522613066e-05, - "loss": 5.448, - "step": 35925 - }, - { - "epoch": 18.735853976531942, - "grad_norm": 1.4538196325302124, - "learning_rate": 6.440804020100502e-05, - "loss": 5.5391, - "step": 35926 - }, - { - "epoch": 18.73637548891786, - "grad_norm": 1.4855526685714722, - "learning_rate": 6.44070351758794e-05, - "loss": 5.1415, - "step": 35927 - }, - { - "epoch": 18.73689700130378, - "grad_norm": 1.4763131141662598, - "learning_rate": 6.440603015075377e-05, - "loss": 5.5997, - "step": 35928 - }, - { - "epoch": 18.7374185136897, - "grad_norm": 1.4911435842514038, - "learning_rate": 6.440502512562814e-05, - "loss": 5.2742, - "step": 35929 - }, - { - "epoch": 18.73794002607562, - "grad_norm": 1.4628106355667114, - "learning_rate": 6.440402010050251e-05, - "loss": 5.4732, - "step": 35930 - }, - { - "epoch": 18.73846153846154, - "grad_norm": 1.504129409790039, - "learning_rate": 6.440301507537689e-05, - "loss": 4.6388, - "step": 35931 - }, - { - "epoch": 18.73898305084746, - "grad_norm": 1.4977718591690063, - "learning_rate": 6.440201005025126e-05, - "loss": 5.5933, - "step": 35932 - }, - { - "epoch": 18.73950456323338, - "grad_norm": 1.5340303182601929, - "learning_rate": 6.440100502512563e-05, - "loss": 5.4104, - "step": 35933 - }, - { - "epoch": 18.740026075619298, - "grad_norm": 1.5769332647323608, - "learning_rate": 6.440000000000001e-05, - "loss": 5.3708, - "step": 35934 - }, - { - "epoch": 18.740547588005214, - "grad_norm": 1.4378165006637573, - "learning_rate": 6.439899497487437e-05, - "loss": 4.9009, - "step": 35935 - }, - { - "epoch": 18.741069100391133, - "grad_norm": 1.5983855724334717, - "learning_rate": 6.439798994974875e-05, - "loss": 5.3889, - "step": 35936 - }, - { - "epoch": 18.741590612777053, - "grad_norm": 1.5478078126907349, - "learning_rate": 6.439698492462311e-05, - "loss": 5.0589, - "step": 35937 - }, - { - "epoch": 18.742112125162972, - "grad_norm": 1.4716432094573975, - "learning_rate": 6.439597989949749e-05, - "loss": 5.5704, - "step": 35938 - }, - { - "epoch": 18.74263363754889, - "grad_norm": 1.477070927619934, - "learning_rate": 6.439497487437186e-05, - "loss": 5.6062, - "step": 35939 - }, - { - "epoch": 18.74315514993481, - "grad_norm": 1.4988014698028564, - "learning_rate": 6.439396984924623e-05, - "loss": 5.7342, - "step": 35940 - }, - { - "epoch": 18.74367666232073, - "grad_norm": 1.5080150365829468, - "learning_rate": 6.439296482412061e-05, - "loss": 5.0092, - "step": 35941 - }, - { - "epoch": 18.74419817470665, - "grad_norm": 1.396349549293518, - "learning_rate": 6.439195979899499e-05, - "loss": 5.3552, - "step": 35942 - }, - { - "epoch": 18.74471968709257, - "grad_norm": 1.493714451789856, - "learning_rate": 6.439095477386935e-05, - "loss": 5.1735, - "step": 35943 - }, - { - "epoch": 18.74524119947849, - "grad_norm": 1.4889450073242188, - "learning_rate": 6.438994974874372e-05, - "loss": 5.518, - "step": 35944 - }, - { - "epoch": 18.74576271186441, - "grad_norm": 1.3912254571914673, - "learning_rate": 6.43889447236181e-05, - "loss": 5.7902, - "step": 35945 - }, - { - "epoch": 18.746284224250324, - "grad_norm": 1.4617716073989868, - "learning_rate": 6.438793969849246e-05, - "loss": 5.747, - "step": 35946 - }, - { - "epoch": 18.746805736636244, - "grad_norm": 1.4233908653259277, - "learning_rate": 6.438693467336684e-05, - "loss": 5.6954, - "step": 35947 - }, - { - "epoch": 18.747327249022163, - "grad_norm": 1.5063837766647339, - "learning_rate": 6.43859296482412e-05, - "loss": 5.0712, - "step": 35948 - }, - { - "epoch": 18.747848761408083, - "grad_norm": 1.3843729496002197, - "learning_rate": 6.438492462311558e-05, - "loss": 5.5407, - "step": 35949 - }, - { - "epoch": 18.748370273794002, - "grad_norm": 1.4871999025344849, - "learning_rate": 6.438391959798994e-05, - "loss": 5.5751, - "step": 35950 - }, - { - "epoch": 18.74889178617992, - "grad_norm": 1.5002293586730957, - "learning_rate": 6.438291457286432e-05, - "loss": 5.4059, - "step": 35951 - }, - { - "epoch": 18.74941329856584, - "grad_norm": 1.4471940994262695, - "learning_rate": 6.43819095477387e-05, - "loss": 5.2221, - "step": 35952 - }, - { - "epoch": 18.74993481095176, - "grad_norm": 1.4587483406066895, - "learning_rate": 6.438090452261308e-05, - "loss": 5.3665, - "step": 35953 - }, - { - "epoch": 18.75045632333768, - "grad_norm": 1.454163908958435, - "learning_rate": 6.437989949748744e-05, - "loss": 5.5564, - "step": 35954 - }, - { - "epoch": 18.7509778357236, - "grad_norm": 1.5216068029403687, - "learning_rate": 6.437889447236182e-05, - "loss": 5.4751, - "step": 35955 - }, - { - "epoch": 18.75149934810952, - "grad_norm": 1.7101479768753052, - "learning_rate": 6.437788944723618e-05, - "loss": 4.3614, - "step": 35956 - }, - { - "epoch": 18.75202086049544, - "grad_norm": 1.5059953927993774, - "learning_rate": 6.437688442211055e-05, - "loss": 5.4161, - "step": 35957 - }, - { - "epoch": 18.752542372881354, - "grad_norm": 1.4646583795547485, - "learning_rate": 6.437587939698493e-05, - "loss": 5.1433, - "step": 35958 - }, - { - "epoch": 18.753063885267274, - "grad_norm": 1.5954256057739258, - "learning_rate": 6.437487437185929e-05, - "loss": 4.8411, - "step": 35959 - }, - { - "epoch": 18.753585397653193, - "grad_norm": 1.462733268737793, - "learning_rate": 6.437386934673367e-05, - "loss": 5.5399, - "step": 35960 - }, - { - "epoch": 18.754106910039113, - "grad_norm": 1.3803651332855225, - "learning_rate": 6.437286432160805e-05, - "loss": 5.6655, - "step": 35961 - }, - { - "epoch": 18.754628422425032, - "grad_norm": 1.4766440391540527, - "learning_rate": 6.437185929648242e-05, - "loss": 5.3898, - "step": 35962 - }, - { - "epoch": 18.75514993481095, - "grad_norm": 1.5125455856323242, - "learning_rate": 6.437085427135679e-05, - "loss": 5.4823, - "step": 35963 - }, - { - "epoch": 18.75567144719687, - "grad_norm": 1.4845635890960693, - "learning_rate": 6.436984924623117e-05, - "loss": 5.2042, - "step": 35964 - }, - { - "epoch": 18.75619295958279, - "grad_norm": 1.5359193086624146, - "learning_rate": 6.436884422110553e-05, - "loss": 4.9357, - "step": 35965 - }, - { - "epoch": 18.75671447196871, - "grad_norm": 1.5510168075561523, - "learning_rate": 6.436783919597991e-05, - "loss": 4.8557, - "step": 35966 - }, - { - "epoch": 18.75723598435463, - "grad_norm": 1.52571439743042, - "learning_rate": 6.436683417085427e-05, - "loss": 5.4275, - "step": 35967 - }, - { - "epoch": 18.75775749674055, - "grad_norm": 1.5027728080749512, - "learning_rate": 6.436582914572865e-05, - "loss": 5.2151, - "step": 35968 - }, - { - "epoch": 18.75827900912647, - "grad_norm": 1.4956024885177612, - "learning_rate": 6.436482412060301e-05, - "loss": 5.7419, - "step": 35969 - }, - { - "epoch": 18.758800521512384, - "grad_norm": 1.4039554595947266, - "learning_rate": 6.436381909547738e-05, - "loss": 5.3327, - "step": 35970 - }, - { - "epoch": 18.759322033898304, - "grad_norm": 1.4389750957489014, - "learning_rate": 6.436281407035176e-05, - "loss": 5.5641, - "step": 35971 - }, - { - "epoch": 18.759843546284223, - "grad_norm": 1.479677438735962, - "learning_rate": 6.436180904522613e-05, - "loss": 5.4728, - "step": 35972 - }, - { - "epoch": 18.760365058670143, - "grad_norm": 1.440555453300476, - "learning_rate": 6.436080402010051e-05, - "loss": 5.5631, - "step": 35973 - }, - { - "epoch": 18.760886571056062, - "grad_norm": 1.4581302404403687, - "learning_rate": 6.435979899497488e-05, - "loss": 5.2504, - "step": 35974 - }, - { - "epoch": 18.76140808344198, - "grad_norm": 1.4555846452713013, - "learning_rate": 6.435879396984925e-05, - "loss": 5.5736, - "step": 35975 - }, - { - "epoch": 18.7619295958279, - "grad_norm": 1.464991569519043, - "learning_rate": 6.435778894472362e-05, - "loss": 5.7302, - "step": 35976 - }, - { - "epoch": 18.76245110821382, - "grad_norm": 1.4912514686584473, - "learning_rate": 6.4356783919598e-05, - "loss": 4.9978, - "step": 35977 - }, - { - "epoch": 18.76297262059974, - "grad_norm": 1.5738859176635742, - "learning_rate": 6.435577889447236e-05, - "loss": 5.1236, - "step": 35978 - }, - { - "epoch": 18.76349413298566, - "grad_norm": 1.416872262954712, - "learning_rate": 6.435477386934674e-05, - "loss": 5.732, - "step": 35979 - }, - { - "epoch": 18.76401564537158, - "grad_norm": 1.5594245195388794, - "learning_rate": 6.43537688442211e-05, - "loss": 5.7775, - "step": 35980 - }, - { - "epoch": 18.7645371577575, - "grad_norm": 1.5753251314163208, - "learning_rate": 6.435276381909548e-05, - "loss": 5.057, - "step": 35981 - }, - { - "epoch": 18.765058670143414, - "grad_norm": 1.4689850807189941, - "learning_rate": 6.435175879396986e-05, - "loss": 5.5669, - "step": 35982 - }, - { - "epoch": 18.765580182529334, - "grad_norm": 1.50339937210083, - "learning_rate": 6.435075376884424e-05, - "loss": 5.4985, - "step": 35983 - }, - { - "epoch": 18.766101694915253, - "grad_norm": 1.5625276565551758, - "learning_rate": 6.43497487437186e-05, - "loss": 4.8275, - "step": 35984 - }, - { - "epoch": 18.766623207301173, - "grad_norm": 1.6479352712631226, - "learning_rate": 6.434874371859296e-05, - "loss": 5.0595, - "step": 35985 - }, - { - "epoch": 18.767144719687092, - "grad_norm": 1.5480881929397583, - "learning_rate": 6.434773869346734e-05, - "loss": 5.4488, - "step": 35986 - }, - { - "epoch": 18.76766623207301, - "grad_norm": 1.5219255685806274, - "learning_rate": 6.43467336683417e-05, - "loss": 5.1787, - "step": 35987 - }, - { - "epoch": 18.76818774445893, - "grad_norm": 1.4788928031921387, - "learning_rate": 6.434572864321608e-05, - "loss": 5.2386, - "step": 35988 - }, - { - "epoch": 18.76870925684485, - "grad_norm": 1.417788028717041, - "learning_rate": 6.434472361809045e-05, - "loss": 5.6073, - "step": 35989 - }, - { - "epoch": 18.76923076923077, - "grad_norm": 1.4561620950698853, - "learning_rate": 6.434371859296483e-05, - "loss": 5.3993, - "step": 35990 - }, - { - "epoch": 18.76975228161669, - "grad_norm": 1.5165493488311768, - "learning_rate": 6.434271356783919e-05, - "loss": 5.3941, - "step": 35991 - }, - { - "epoch": 18.77027379400261, - "grad_norm": 1.5922296047210693, - "learning_rate": 6.434170854271357e-05, - "loss": 5.0538, - "step": 35992 - }, - { - "epoch": 18.77079530638853, - "grad_norm": 1.4400945901870728, - "learning_rate": 6.434070351758795e-05, - "loss": 5.8271, - "step": 35993 - }, - { - "epoch": 18.771316818774444, - "grad_norm": 1.5726474523544312, - "learning_rate": 6.433969849246232e-05, - "loss": 4.7619, - "step": 35994 - }, - { - "epoch": 18.771838331160364, - "grad_norm": 1.5216251611709595, - "learning_rate": 6.433869346733669e-05, - "loss": 5.2877, - "step": 35995 - }, - { - "epoch": 18.772359843546283, - "grad_norm": 1.5327459573745728, - "learning_rate": 6.433768844221107e-05, - "loss": 5.3404, - "step": 35996 - }, - { - "epoch": 18.772881355932203, - "grad_norm": 1.4364293813705444, - "learning_rate": 6.433668341708543e-05, - "loss": 5.3564, - "step": 35997 - }, - { - "epoch": 18.773402868318122, - "grad_norm": 1.5556209087371826, - "learning_rate": 6.43356783919598e-05, - "loss": 5.2787, - "step": 35998 - }, - { - "epoch": 18.77392438070404, - "grad_norm": 1.5219001770019531, - "learning_rate": 6.433467336683417e-05, - "loss": 5.1653, - "step": 35999 - }, - { - "epoch": 18.77444589308996, - "grad_norm": 1.5169719457626343, - "learning_rate": 6.433366834170854e-05, - "loss": 5.6007, - "step": 36000 - }, - { - "epoch": 18.77496740547588, - "grad_norm": 1.446241855621338, - "learning_rate": 6.433266331658291e-05, - "loss": 5.5105, - "step": 36001 - }, - { - "epoch": 18.7754889178618, - "grad_norm": 1.365213394165039, - "learning_rate": 6.433165829145729e-05, - "loss": 5.625, - "step": 36002 - }, - { - "epoch": 18.77601043024772, - "grad_norm": 1.5360474586486816, - "learning_rate": 6.433065326633167e-05, - "loss": 5.0326, - "step": 36003 - }, - { - "epoch": 18.77653194263364, - "grad_norm": 1.5679458379745483, - "learning_rate": 6.432964824120603e-05, - "loss": 5.1289, - "step": 36004 - }, - { - "epoch": 18.777053455019555, - "grad_norm": 1.4371522665023804, - "learning_rate": 6.432864321608041e-05, - "loss": 5.1662, - "step": 36005 - }, - { - "epoch": 18.777574967405474, - "grad_norm": 1.5886493921279907, - "learning_rate": 6.432763819095478e-05, - "loss": 5.4969, - "step": 36006 - }, - { - "epoch": 18.778096479791394, - "grad_norm": 1.5960646867752075, - "learning_rate": 6.432663316582915e-05, - "loss": 4.8358, - "step": 36007 - }, - { - "epoch": 18.778617992177313, - "grad_norm": 1.4446533918380737, - "learning_rate": 6.432562814070352e-05, - "loss": 5.2533, - "step": 36008 - }, - { - "epoch": 18.779139504563233, - "grad_norm": 1.350625991821289, - "learning_rate": 6.43246231155779e-05, - "loss": 5.6095, - "step": 36009 - }, - { - "epoch": 18.779661016949152, - "grad_norm": 1.4180002212524414, - "learning_rate": 6.432361809045226e-05, - "loss": 5.4002, - "step": 36010 - }, - { - "epoch": 18.78018252933507, - "grad_norm": 1.3934980630874634, - "learning_rate": 6.432261306532663e-05, - "loss": 5.5321, - "step": 36011 - }, - { - "epoch": 18.78070404172099, - "grad_norm": 1.456923007965088, - "learning_rate": 6.4321608040201e-05, - "loss": 5.8476, - "step": 36012 - }, - { - "epoch": 18.78122555410691, - "grad_norm": 1.4901783466339111, - "learning_rate": 6.432060301507538e-05, - "loss": 4.9783, - "step": 36013 - }, - { - "epoch": 18.78174706649283, - "grad_norm": 1.5256454944610596, - "learning_rate": 6.431959798994976e-05, - "loss": 4.8575, - "step": 36014 - }, - { - "epoch": 18.78226857887875, - "grad_norm": 1.4159520864486694, - "learning_rate": 6.431859296482412e-05, - "loss": 5.7019, - "step": 36015 - }, - { - "epoch": 18.78279009126467, - "grad_norm": 1.4676216840744019, - "learning_rate": 6.43175879396985e-05, - "loss": 4.9824, - "step": 36016 - }, - { - "epoch": 18.78331160365059, - "grad_norm": 1.5069823265075684, - "learning_rate": 6.431658291457287e-05, - "loss": 5.3904, - "step": 36017 - }, - { - "epoch": 18.783833116036504, - "grad_norm": 1.574047327041626, - "learning_rate": 6.431557788944724e-05, - "loss": 5.4438, - "step": 36018 - }, - { - "epoch": 18.784354628422424, - "grad_norm": 1.4844332933425903, - "learning_rate": 6.431457286432161e-05, - "loss": 5.1596, - "step": 36019 - }, - { - "epoch": 18.784876140808343, - "grad_norm": 1.6502926349639893, - "learning_rate": 6.431356783919599e-05, - "loss": 5.2086, - "step": 36020 - }, - { - "epoch": 18.785397653194263, - "grad_norm": 1.4404512643814087, - "learning_rate": 6.431256281407035e-05, - "loss": 5.0398, - "step": 36021 - }, - { - "epoch": 18.785919165580182, - "grad_norm": 1.5548290014266968, - "learning_rate": 6.431155778894473e-05, - "loss": 5.4709, - "step": 36022 - }, - { - "epoch": 18.7864406779661, - "grad_norm": 1.6028720140457153, - "learning_rate": 6.43105527638191e-05, - "loss": 5.17, - "step": 36023 - }, - { - "epoch": 18.78696219035202, - "grad_norm": 1.516111969947815, - "learning_rate": 6.430954773869347e-05, - "loss": 4.9369, - "step": 36024 - }, - { - "epoch": 18.78748370273794, - "grad_norm": 1.4384026527404785, - "learning_rate": 6.430854271356785e-05, - "loss": 4.9671, - "step": 36025 - }, - { - "epoch": 18.78800521512386, - "grad_norm": 1.4424917697906494, - "learning_rate": 6.430753768844221e-05, - "loss": 5.4508, - "step": 36026 - }, - { - "epoch": 18.78852672750978, - "grad_norm": 1.4626296758651733, - "learning_rate": 6.430653266331659e-05, - "loss": 5.5546, - "step": 36027 - }, - { - "epoch": 18.7890482398957, - "grad_norm": 1.5384074449539185, - "learning_rate": 6.430552763819095e-05, - "loss": 5.2985, - "step": 36028 - }, - { - "epoch": 18.789569752281615, - "grad_norm": 1.4901114702224731, - "learning_rate": 6.430452261306533e-05, - "loss": 5.698, - "step": 36029 - }, - { - "epoch": 18.790091264667534, - "grad_norm": 1.6116293668746948, - "learning_rate": 6.43035175879397e-05, - "loss": 4.6158, - "step": 36030 - }, - { - "epoch": 18.790612777053454, - "grad_norm": 1.6068745851516724, - "learning_rate": 6.430251256281407e-05, - "loss": 5.5172, - "step": 36031 - }, - { - "epoch": 18.791134289439373, - "grad_norm": 1.544826626777649, - "learning_rate": 6.430150753768844e-05, - "loss": 5.3338, - "step": 36032 - }, - { - "epoch": 18.791655801825293, - "grad_norm": 1.5216360092163086, - "learning_rate": 6.430050251256282e-05, - "loss": 5.1879, - "step": 36033 - }, - { - "epoch": 18.792177314211212, - "grad_norm": 1.494059443473816, - "learning_rate": 6.42994974874372e-05, - "loss": 5.557, - "step": 36034 - }, - { - "epoch": 18.79269882659713, - "grad_norm": 1.464355707168579, - "learning_rate": 6.429849246231157e-05, - "loss": 5.1137, - "step": 36035 - }, - { - "epoch": 18.79322033898305, - "grad_norm": 1.4487351179122925, - "learning_rate": 6.429748743718594e-05, - "loss": 4.7595, - "step": 36036 - }, - { - "epoch": 18.79374185136897, - "grad_norm": 1.3909642696380615, - "learning_rate": 6.42964824120603e-05, - "loss": 5.5904, - "step": 36037 - }, - { - "epoch": 18.79426336375489, - "grad_norm": 1.5639530420303345, - "learning_rate": 6.429547738693468e-05, - "loss": 5.5759, - "step": 36038 - }, - { - "epoch": 18.79478487614081, - "grad_norm": 1.4806147813796997, - "learning_rate": 6.429447236180904e-05, - "loss": 5.4042, - "step": 36039 - }, - { - "epoch": 18.79530638852673, - "grad_norm": 1.5979598760604858, - "learning_rate": 6.429346733668342e-05, - "loss": 4.8099, - "step": 36040 - }, - { - "epoch": 18.795827900912645, - "grad_norm": 1.509364128112793, - "learning_rate": 6.429246231155778e-05, - "loss": 4.9721, - "step": 36041 - }, - { - "epoch": 18.796349413298564, - "grad_norm": 1.4226834774017334, - "learning_rate": 6.429145728643216e-05, - "loss": 4.9521, - "step": 36042 - }, - { - "epoch": 18.796870925684484, - "grad_norm": 1.4375622272491455, - "learning_rate": 6.429045226130654e-05, - "loss": 5.7821, - "step": 36043 - }, - { - "epoch": 18.797392438070403, - "grad_norm": 1.4903947114944458, - "learning_rate": 6.428944723618092e-05, - "loss": 5.1339, - "step": 36044 - }, - { - "epoch": 18.797913950456323, - "grad_norm": 1.6116632223129272, - "learning_rate": 6.428844221105528e-05, - "loss": 4.8203, - "step": 36045 - }, - { - "epoch": 18.798435462842242, - "grad_norm": 1.4433597326278687, - "learning_rate": 6.428743718592966e-05, - "loss": 5.5789, - "step": 36046 - }, - { - "epoch": 18.798956975228162, - "grad_norm": 1.5817699432373047, - "learning_rate": 6.428643216080402e-05, - "loss": 5.3466, - "step": 36047 - }, - { - "epoch": 18.79947848761408, - "grad_norm": 1.5329012870788574, - "learning_rate": 6.42854271356784e-05, - "loss": 5.064, - "step": 36048 - }, - { - "epoch": 18.8, - "grad_norm": 1.5491595268249512, - "learning_rate": 6.428442211055277e-05, - "loss": 4.861, - "step": 36049 - }, - { - "epoch": 18.80052151238592, - "grad_norm": 1.5053445100784302, - "learning_rate": 6.428341708542713e-05, - "loss": 5.2523, - "step": 36050 - }, - { - "epoch": 18.80104302477184, - "grad_norm": 1.5213111639022827, - "learning_rate": 6.428241206030151e-05, - "loss": 5.5073, - "step": 36051 - }, - { - "epoch": 18.80156453715776, - "grad_norm": 1.483224868774414, - "learning_rate": 6.428140703517587e-05, - "loss": 5.2766, - "step": 36052 - }, - { - "epoch": 18.802086049543675, - "grad_norm": 1.4543591737747192, - "learning_rate": 6.428040201005025e-05, - "loss": 5.4459, - "step": 36053 - }, - { - "epoch": 18.802607561929594, - "grad_norm": 1.4117722511291504, - "learning_rate": 6.427939698492463e-05, - "loss": 5.3106, - "step": 36054 - }, - { - "epoch": 18.803129074315514, - "grad_norm": 1.439598798751831, - "learning_rate": 6.4278391959799e-05, - "loss": 5.0341, - "step": 36055 - }, - { - "epoch": 18.803650586701433, - "grad_norm": 1.470990777015686, - "learning_rate": 6.427738693467337e-05, - "loss": 5.1052, - "step": 36056 - }, - { - "epoch": 18.804172099087353, - "grad_norm": 1.5455282926559448, - "learning_rate": 6.427638190954775e-05, - "loss": 4.9904, - "step": 36057 - }, - { - "epoch": 18.804693611473272, - "grad_norm": 1.4508600234985352, - "learning_rate": 6.427537688442211e-05, - "loss": 5.5365, - "step": 36058 - }, - { - "epoch": 18.805215123859192, - "grad_norm": 1.4391283988952637, - "learning_rate": 6.427437185929649e-05, - "loss": 5.6732, - "step": 36059 - }, - { - "epoch": 18.80573663624511, - "grad_norm": 1.4928991794586182, - "learning_rate": 6.427336683417085e-05, - "loss": 5.267, - "step": 36060 - }, - { - "epoch": 18.80625814863103, - "grad_norm": 1.504171371459961, - "learning_rate": 6.427236180904523e-05, - "loss": 4.806, - "step": 36061 - }, - { - "epoch": 18.80677966101695, - "grad_norm": 1.5151803493499756, - "learning_rate": 6.42713567839196e-05, - "loss": 5.2609, - "step": 36062 - }, - { - "epoch": 18.80730117340287, - "grad_norm": 1.5344252586364746, - "learning_rate": 6.427035175879397e-05, - "loss": 5.4522, - "step": 36063 - }, - { - "epoch": 18.80782268578879, - "grad_norm": 1.4511191844940186, - "learning_rate": 6.426934673366835e-05, - "loss": 5.552, - "step": 36064 - }, - { - "epoch": 18.808344198174705, - "grad_norm": 1.5820181369781494, - "learning_rate": 6.426834170854272e-05, - "loss": 5.2848, - "step": 36065 - }, - { - "epoch": 18.808865710560625, - "grad_norm": 1.4871257543563843, - "learning_rate": 6.42673366834171e-05, - "loss": 5.2409, - "step": 36066 - }, - { - "epoch": 18.809387222946544, - "grad_norm": 1.5056977272033691, - "learning_rate": 6.426633165829146e-05, - "loss": 4.9768, - "step": 36067 - }, - { - "epoch": 18.809908735332463, - "grad_norm": 1.5940700769424438, - "learning_rate": 6.426532663316584e-05, - "loss": 5.254, - "step": 36068 - }, - { - "epoch": 18.810430247718383, - "grad_norm": 1.492523193359375, - "learning_rate": 6.42643216080402e-05, - "loss": 4.9955, - "step": 36069 - }, - { - "epoch": 18.810951760104302, - "grad_norm": 1.476087212562561, - "learning_rate": 6.426331658291458e-05, - "loss": 5.2455, - "step": 36070 - }, - { - "epoch": 18.811473272490222, - "grad_norm": 1.4331799745559692, - "learning_rate": 6.426231155778894e-05, - "loss": 5.4012, - "step": 36071 - }, - { - "epoch": 18.81199478487614, - "grad_norm": 1.444035530090332, - "learning_rate": 6.426130653266332e-05, - "loss": 5.2477, - "step": 36072 - }, - { - "epoch": 18.81251629726206, - "grad_norm": 1.4727575778961182, - "learning_rate": 6.426030150753768e-05, - "loss": 5.4795, - "step": 36073 - }, - { - "epoch": 18.81303780964798, - "grad_norm": 1.4322048425674438, - "learning_rate": 6.425929648241206e-05, - "loss": 5.2227, - "step": 36074 - }, - { - "epoch": 18.8135593220339, - "grad_norm": 1.4474053382873535, - "learning_rate": 6.425829145728644e-05, - "loss": 5.2086, - "step": 36075 - }, - { - "epoch": 18.81408083441982, - "grad_norm": 1.4845854043960571, - "learning_rate": 6.425728643216082e-05, - "loss": 5.5118, - "step": 36076 - }, - { - "epoch": 18.814602346805735, - "grad_norm": 1.385987639427185, - "learning_rate": 6.425628140703518e-05, - "loss": 5.3209, - "step": 36077 - }, - { - "epoch": 18.815123859191655, - "grad_norm": 1.4590829610824585, - "learning_rate": 6.425527638190955e-05, - "loss": 5.5627, - "step": 36078 - }, - { - "epoch": 18.815645371577574, - "grad_norm": 1.51578688621521, - "learning_rate": 6.425427135678392e-05, - "loss": 5.4238, - "step": 36079 - }, - { - "epoch": 18.816166883963493, - "grad_norm": 1.431358814239502, - "learning_rate": 6.425326633165829e-05, - "loss": 5.2687, - "step": 36080 - }, - { - "epoch": 18.816688396349413, - "grad_norm": 1.5030032396316528, - "learning_rate": 6.425226130653267e-05, - "loss": 5.2143, - "step": 36081 - }, - { - "epoch": 18.817209908735332, - "grad_norm": 1.5048463344573975, - "learning_rate": 6.425125628140703e-05, - "loss": 5.2971, - "step": 36082 - }, - { - "epoch": 18.817731421121252, - "grad_norm": 1.5764142274856567, - "learning_rate": 6.425025125628141e-05, - "loss": 4.8038, - "step": 36083 - }, - { - "epoch": 18.81825293350717, - "grad_norm": 1.581382155418396, - "learning_rate": 6.424924623115577e-05, - "loss": 5.2385, - "step": 36084 - }, - { - "epoch": 18.81877444589309, - "grad_norm": 1.636052131652832, - "learning_rate": 6.424824120603015e-05, - "loss": 5.0311, - "step": 36085 - }, - { - "epoch": 18.81929595827901, - "grad_norm": 1.6845051050186157, - "learning_rate": 6.424723618090453e-05, - "loss": 4.889, - "step": 36086 - }, - { - "epoch": 18.81981747066493, - "grad_norm": 1.5255126953125, - "learning_rate": 6.42462311557789e-05, - "loss": 5.1711, - "step": 36087 - }, - { - "epoch": 18.820338983050846, - "grad_norm": 1.5924474000930786, - "learning_rate": 6.424522613065327e-05, - "loss": 5.2888, - "step": 36088 - }, - { - "epoch": 18.820860495436765, - "grad_norm": 1.471899390220642, - "learning_rate": 6.424422110552765e-05, - "loss": 5.2278, - "step": 36089 - }, - { - "epoch": 18.821382007822685, - "grad_norm": 1.5269067287445068, - "learning_rate": 6.424321608040201e-05, - "loss": 5.2542, - "step": 36090 - }, - { - "epoch": 18.821903520208604, - "grad_norm": 1.5013633966445923, - "learning_rate": 6.424221105527638e-05, - "loss": 4.661, - "step": 36091 - }, - { - "epoch": 18.822425032594523, - "grad_norm": 1.4967750310897827, - "learning_rate": 6.424120603015075e-05, - "loss": 4.9938, - "step": 36092 - }, - { - "epoch": 18.822946544980443, - "grad_norm": 1.6618824005126953, - "learning_rate": 6.424020100502512e-05, - "loss": 5.3534, - "step": 36093 - }, - { - "epoch": 18.823468057366362, - "grad_norm": 1.5356123447418213, - "learning_rate": 6.42391959798995e-05, - "loss": 5.2957, - "step": 36094 - }, - { - "epoch": 18.823989569752282, - "grad_norm": 1.6126197576522827, - "learning_rate": 6.423819095477387e-05, - "loss": 5.3641, - "step": 36095 - }, - { - "epoch": 18.8245110821382, - "grad_norm": 1.4369739294052124, - "learning_rate": 6.423718592964825e-05, - "loss": 5.4883, - "step": 36096 - }, - { - "epoch": 18.82503259452412, - "grad_norm": 1.4441215991973877, - "learning_rate": 6.423618090452262e-05, - "loss": 4.685, - "step": 36097 - }, - { - "epoch": 18.82555410691004, - "grad_norm": 1.465701937675476, - "learning_rate": 6.4235175879397e-05, - "loss": 5.3209, - "step": 36098 - }, - { - "epoch": 18.82607561929596, - "grad_norm": 1.5968279838562012, - "learning_rate": 6.423417085427136e-05, - "loss": 4.8652, - "step": 36099 - }, - { - "epoch": 18.82659713168188, - "grad_norm": 1.4692115783691406, - "learning_rate": 6.423316582914574e-05, - "loss": 5.377, - "step": 36100 - }, - { - "epoch": 18.827118644067795, - "grad_norm": 1.4457939863204956, - "learning_rate": 6.42321608040201e-05, - "loss": 5.2752, - "step": 36101 - }, - { - "epoch": 18.827640156453715, - "grad_norm": 1.5108660459518433, - "learning_rate": 6.423115577889448e-05, - "loss": 5.1735, - "step": 36102 - }, - { - "epoch": 18.828161668839634, - "grad_norm": 1.5377298593521118, - "learning_rate": 6.423015075376884e-05, - "loss": 5.2679, - "step": 36103 - }, - { - "epoch": 18.828683181225554, - "grad_norm": 1.3418734073638916, - "learning_rate": 6.422914572864321e-05, - "loss": 5.0887, - "step": 36104 - }, - { - "epoch": 18.829204693611473, - "grad_norm": 1.4896987676620483, - "learning_rate": 6.422814070351759e-05, - "loss": 5.2941, - "step": 36105 - }, - { - "epoch": 18.829726205997392, - "grad_norm": 1.5635124444961548, - "learning_rate": 6.422713567839196e-05, - "loss": 5.5966, - "step": 36106 - }, - { - "epoch": 18.830247718383312, - "grad_norm": Infinity, - "learning_rate": 6.422713567839196e-05, - "loss": 4.8514, - "step": 36107 - }, - { - "epoch": 18.83076923076923, - "grad_norm": 1.451920986175537, - "learning_rate": 6.422613065326634e-05, - "loss": 5.3455, - "step": 36108 - }, - { - "epoch": 18.83129074315515, - "grad_norm": 1.5483399629592896, - "learning_rate": 6.42251256281407e-05, - "loss": 5.2777, - "step": 36109 - }, - { - "epoch": 18.83181225554107, - "grad_norm": 1.5404943227767944, - "learning_rate": 6.422412060301508e-05, - "loss": 5.5924, - "step": 36110 - }, - { - "epoch": 18.83233376792699, - "grad_norm": 1.4740252494812012, - "learning_rate": 6.422311557788945e-05, - "loss": 5.6482, - "step": 36111 - }, - { - "epoch": 18.832855280312906, - "grad_norm": 1.510061264038086, - "learning_rate": 6.422211055276383e-05, - "loss": 5.3455, - "step": 36112 - }, - { - "epoch": 18.833376792698825, - "grad_norm": 1.5111699104309082, - "learning_rate": 6.422110552763819e-05, - "loss": 5.4569, - "step": 36113 - }, - { - "epoch": 18.833898305084745, - "grad_norm": 1.4755043983459473, - "learning_rate": 6.422010050251257e-05, - "loss": 5.2555, - "step": 36114 - }, - { - "epoch": 18.834419817470664, - "grad_norm": 1.4287505149841309, - "learning_rate": 6.421909547738693e-05, - "loss": 5.45, - "step": 36115 - }, - { - "epoch": 18.834941329856584, - "grad_norm": 1.5825756788253784, - "learning_rate": 6.421809045226131e-05, - "loss": 5.3062, - "step": 36116 - }, - { - "epoch": 18.835462842242503, - "grad_norm": 1.4397374391555786, - "learning_rate": 6.421708542713569e-05, - "loss": 5.4148, - "step": 36117 - }, - { - "epoch": 18.835984354628422, - "grad_norm": 1.4076414108276367, - "learning_rate": 6.421608040201005e-05, - "loss": 5.4608, - "step": 36118 - }, - { - "epoch": 18.836505867014342, - "grad_norm": 1.4894334077835083, - "learning_rate": 6.421507537688443e-05, - "loss": 5.6938, - "step": 36119 - }, - { - "epoch": 18.83702737940026, - "grad_norm": 1.4657782316207886, - "learning_rate": 6.42140703517588e-05, - "loss": 5.497, - "step": 36120 - }, - { - "epoch": 18.83754889178618, - "grad_norm": 1.4420604705810547, - "learning_rate": 6.421306532663317e-05, - "loss": 5.3888, - "step": 36121 - }, - { - "epoch": 18.8380704041721, - "grad_norm": 1.3516170978546143, - "learning_rate": 6.421206030150754e-05, - "loss": 4.6801, - "step": 36122 - }, - { - "epoch": 18.83859191655802, - "grad_norm": 1.532957673072815, - "learning_rate": 6.421105527638191e-05, - "loss": 5.2078, - "step": 36123 - }, - { - "epoch": 18.839113428943936, - "grad_norm": 1.4825836420059204, - "learning_rate": 6.421005025125628e-05, - "loss": 5.3477, - "step": 36124 - }, - { - "epoch": 18.839634941329855, - "grad_norm": 1.5326720476150513, - "learning_rate": 6.420904522613066e-05, - "loss": 4.6331, - "step": 36125 - }, - { - "epoch": 18.840156453715775, - "grad_norm": 1.4762625694274902, - "learning_rate": 6.420804020100502e-05, - "loss": 5.4156, - "step": 36126 - }, - { - "epoch": 18.840677966101694, - "grad_norm": 1.5373018980026245, - "learning_rate": 6.42070351758794e-05, - "loss": 5.1059, - "step": 36127 - }, - { - "epoch": 18.841199478487614, - "grad_norm": 1.528296709060669, - "learning_rate": 6.420603015075378e-05, - "loss": 5.1478, - "step": 36128 - }, - { - "epoch": 18.841720990873533, - "grad_norm": 1.5118441581726074, - "learning_rate": 6.420502512562815e-05, - "loss": 5.1437, - "step": 36129 - }, - { - "epoch": 18.842242503259452, - "grad_norm": 1.5167953968048096, - "learning_rate": 6.420402010050252e-05, - "loss": 5.3557, - "step": 36130 - }, - { - "epoch": 18.842764015645372, - "grad_norm": 1.506500244140625, - "learning_rate": 6.420301507537688e-05, - "loss": 5.4865, - "step": 36131 - }, - { - "epoch": 18.84328552803129, - "grad_norm": 1.5104295015335083, - "learning_rate": 6.420201005025126e-05, - "loss": 5.3155, - "step": 36132 - }, - { - "epoch": 18.84380704041721, - "grad_norm": 1.5090841054916382, - "learning_rate": 6.420100502512562e-05, - "loss": 5.2028, - "step": 36133 - }, - { - "epoch": 18.84432855280313, - "grad_norm": 1.527465581893921, - "learning_rate": 6.42e-05, - "loss": 5.3877, - "step": 36134 - }, - { - "epoch": 18.84485006518905, - "grad_norm": 1.4443304538726807, - "learning_rate": 6.419899497487437e-05, - "loss": 5.7053, - "step": 36135 - }, - { - "epoch": 18.845371577574966, - "grad_norm": 1.480575680732727, - "learning_rate": 6.419798994974874e-05, - "loss": 5.336, - "step": 36136 - }, - { - "epoch": 18.845893089960885, - "grad_norm": 1.5768316984176636, - "learning_rate": 6.419698492462312e-05, - "loss": 4.5486, - "step": 36137 - }, - { - "epoch": 18.846414602346805, - "grad_norm": 1.4727017879486084, - "learning_rate": 6.41959798994975e-05, - "loss": 5.2689, - "step": 36138 - }, - { - "epoch": 18.846936114732724, - "grad_norm": 1.5554426908493042, - "learning_rate": 6.419497487437186e-05, - "loss": 4.9502, - "step": 36139 - }, - { - "epoch": 18.847457627118644, - "grad_norm": 1.6397885084152222, - "learning_rate": 6.419396984924624e-05, - "loss": 5.5564, - "step": 36140 - }, - { - "epoch": 18.847979139504563, - "grad_norm": 1.5764665603637695, - "learning_rate": 6.41929648241206e-05, - "loss": 5.7077, - "step": 36141 - }, - { - "epoch": 18.848500651890483, - "grad_norm": 1.5532081127166748, - "learning_rate": 6.419195979899498e-05, - "loss": 5.6569, - "step": 36142 - }, - { - "epoch": 18.849022164276402, - "grad_norm": 1.4353117942810059, - "learning_rate": 6.419095477386935e-05, - "loss": 5.3956, - "step": 36143 - }, - { - "epoch": 18.84954367666232, - "grad_norm": 1.6036063432693481, - "learning_rate": 6.418994974874371e-05, - "loss": 5.0505, - "step": 36144 - }, - { - "epoch": 18.85006518904824, - "grad_norm": 1.6070334911346436, - "learning_rate": 6.418894472361809e-05, - "loss": 5.1452, - "step": 36145 - }, - { - "epoch": 18.85058670143416, - "grad_norm": 1.4581546783447266, - "learning_rate": 6.418793969849245e-05, - "loss": 5.3766, - "step": 36146 - }, - { - "epoch": 18.85110821382008, - "grad_norm": 1.5928704738616943, - "learning_rate": 6.418693467336683e-05, - "loss": 5.3131, - "step": 36147 - }, - { - "epoch": 18.851629726205996, - "grad_norm": 1.4039537906646729, - "learning_rate": 6.418592964824121e-05, - "loss": 5.1189, - "step": 36148 - }, - { - "epoch": 18.852151238591915, - "grad_norm": 1.4385722875595093, - "learning_rate": 6.418492462311559e-05, - "loss": 5.3115, - "step": 36149 - }, - { - "epoch": 18.852672750977835, - "grad_norm": 1.549341082572937, - "learning_rate": 6.418391959798995e-05, - "loss": 5.1702, - "step": 36150 - }, - { - "epoch": 18.853194263363754, - "grad_norm": 1.5574125051498413, - "learning_rate": 6.418291457286433e-05, - "loss": 5.4016, - "step": 36151 - }, - { - "epoch": 18.853715775749674, - "grad_norm": 1.4301645755767822, - "learning_rate": 6.41819095477387e-05, - "loss": 5.3148, - "step": 36152 - }, - { - "epoch": 18.854237288135593, - "grad_norm": 1.4813175201416016, - "learning_rate": 6.418090452261307e-05, - "loss": 5.4461, - "step": 36153 - }, - { - "epoch": 18.854758800521513, - "grad_norm": 1.4868955612182617, - "learning_rate": 6.417989949748744e-05, - "loss": 5.5258, - "step": 36154 - }, - { - "epoch": 18.855280312907432, - "grad_norm": 1.41548490524292, - "learning_rate": 6.417889447236181e-05, - "loss": 5.7512, - "step": 36155 - }, - { - "epoch": 18.85580182529335, - "grad_norm": 1.651811122894287, - "learning_rate": 6.417788944723618e-05, - "loss": 4.6088, - "step": 36156 - }, - { - "epoch": 18.85632333767927, - "grad_norm": 1.407164216041565, - "learning_rate": 6.417688442211056e-05, - "loss": 5.4805, - "step": 36157 - }, - { - "epoch": 18.85684485006519, - "grad_norm": 1.5518611669540405, - "learning_rate": 6.417587939698493e-05, - "loss": 5.1078, - "step": 36158 - }, - { - "epoch": 18.85736636245111, - "grad_norm": 1.5467054843902588, - "learning_rate": 6.41748743718593e-05, - "loss": 5.4464, - "step": 36159 - }, - { - "epoch": 18.857887874837026, - "grad_norm": 1.4400272369384766, - "learning_rate": 6.417386934673368e-05, - "loss": 5.5942, - "step": 36160 - }, - { - "epoch": 18.858409387222945, - "grad_norm": 1.6352183818817139, - "learning_rate": 6.417286432160804e-05, - "loss": 5.4098, - "step": 36161 - }, - { - "epoch": 18.858930899608865, - "grad_norm": 1.4745895862579346, - "learning_rate": 6.417185929648242e-05, - "loss": 5.2622, - "step": 36162 - }, - { - "epoch": 18.859452411994784, - "grad_norm": 1.5266833305358887, - "learning_rate": 6.417085427135678e-05, - "loss": 5.373, - "step": 36163 - }, - { - "epoch": 18.859973924380704, - "grad_norm": 1.5257980823516846, - "learning_rate": 6.416984924623116e-05, - "loss": 5.6442, - "step": 36164 - }, - { - "epoch": 18.860495436766623, - "grad_norm": 1.5030465126037598, - "learning_rate": 6.416884422110552e-05, - "loss": 4.8075, - "step": 36165 - }, - { - "epoch": 18.861016949152543, - "grad_norm": 1.5006550550460815, - "learning_rate": 6.41678391959799e-05, - "loss": 5.7362, - "step": 36166 - }, - { - "epoch": 18.861538461538462, - "grad_norm": 1.541820764541626, - "learning_rate": 6.416683417085427e-05, - "loss": 5.1161, - "step": 36167 - }, - { - "epoch": 18.86205997392438, - "grad_norm": 1.418144941329956, - "learning_rate": 6.416582914572864e-05, - "loss": 5.4494, - "step": 36168 - }, - { - "epoch": 18.8625814863103, - "grad_norm": 1.4381521940231323, - "learning_rate": 6.416482412060302e-05, - "loss": 5.2734, - "step": 36169 - }, - { - "epoch": 18.86310299869622, - "grad_norm": 1.519903540611267, - "learning_rate": 6.41638190954774e-05, - "loss": 5.4159, - "step": 36170 - }, - { - "epoch": 18.86362451108214, - "grad_norm": 1.5013196468353271, - "learning_rate": 6.416281407035176e-05, - "loss": 5.121, - "step": 36171 - }, - { - "epoch": 18.864146023468056, - "grad_norm": 1.4469938278198242, - "learning_rate": 6.416180904522613e-05, - "loss": 5.3576, - "step": 36172 - }, - { - "epoch": 18.864667535853975, - "grad_norm": 1.504755973815918, - "learning_rate": 6.416080402010051e-05, - "loss": 5.0637, - "step": 36173 - }, - { - "epoch": 18.865189048239895, - "grad_norm": 1.5609660148620605, - "learning_rate": 6.415979899497487e-05, - "loss": 5.1176, - "step": 36174 - }, - { - "epoch": 18.865710560625814, - "grad_norm": 1.4792709350585938, - "learning_rate": 6.415879396984925e-05, - "loss": 5.3452, - "step": 36175 - }, - { - "epoch": 18.866232073011734, - "grad_norm": 1.6429461240768433, - "learning_rate": 6.415778894472361e-05, - "loss": 4.4522, - "step": 36176 - }, - { - "epoch": 18.866753585397653, - "grad_norm": 1.5945658683776855, - "learning_rate": 6.415678391959799e-05, - "loss": 4.6088, - "step": 36177 - }, - { - "epoch": 18.867275097783573, - "grad_norm": 1.464270830154419, - "learning_rate": 6.415577889447237e-05, - "loss": 5.6644, - "step": 36178 - }, - { - "epoch": 18.867796610169492, - "grad_norm": 1.5146056413650513, - "learning_rate": 6.415477386934675e-05, - "loss": 5.625, - "step": 36179 - }, - { - "epoch": 18.86831812255541, - "grad_norm": 1.5702866315841675, - "learning_rate": 6.415376884422111e-05, - "loss": 4.7964, - "step": 36180 - }, - { - "epoch": 18.86883963494133, - "grad_norm": 1.4289476871490479, - "learning_rate": 6.415276381909549e-05, - "loss": 5.6681, - "step": 36181 - }, - { - "epoch": 18.86936114732725, - "grad_norm": 1.6191656589508057, - "learning_rate": 6.415175879396985e-05, - "loss": 4.7698, - "step": 36182 - }, - { - "epoch": 18.86988265971317, - "grad_norm": 1.5618919134140015, - "learning_rate": 6.415075376884423e-05, - "loss": 5.1574, - "step": 36183 - }, - { - "epoch": 18.870404172099086, - "grad_norm": 1.555629014968872, - "learning_rate": 6.41497487437186e-05, - "loss": 5.3518, - "step": 36184 - }, - { - "epoch": 18.870925684485005, - "grad_norm": 1.428525686264038, - "learning_rate": 6.414874371859296e-05, - "loss": 5.4144, - "step": 36185 - }, - { - "epoch": 18.871447196870925, - "grad_norm": 1.4993728399276733, - "learning_rate": 6.414773869346734e-05, - "loss": 5.3761, - "step": 36186 - }, - { - "epoch": 18.871968709256844, - "grad_norm": 1.5075191259384155, - "learning_rate": 6.41467336683417e-05, - "loss": 5.0582, - "step": 36187 - }, - { - "epoch": 18.872490221642764, - "grad_norm": 1.4090582132339478, - "learning_rate": 6.414572864321608e-05, - "loss": 5.2219, - "step": 36188 - }, - { - "epoch": 18.873011734028683, - "grad_norm": 1.5127609968185425, - "learning_rate": 6.414472361809046e-05, - "loss": 4.7753, - "step": 36189 - }, - { - "epoch": 18.873533246414603, - "grad_norm": 1.4577314853668213, - "learning_rate": 6.414371859296484e-05, - "loss": 5.3963, - "step": 36190 - }, - { - "epoch": 18.874054758800522, - "grad_norm": 1.683051586151123, - "learning_rate": 6.41427135678392e-05, - "loss": 4.5575, - "step": 36191 - }, - { - "epoch": 18.87457627118644, - "grad_norm": 1.4989440441131592, - "learning_rate": 6.414170854271358e-05, - "loss": 5.295, - "step": 36192 - }, - { - "epoch": 18.87509778357236, - "grad_norm": 1.4751557111740112, - "learning_rate": 6.414070351758794e-05, - "loss": 5.5287, - "step": 36193 - }, - { - "epoch": 18.87561929595828, - "grad_norm": 1.4696446657180786, - "learning_rate": 6.413969849246232e-05, - "loss": 5.0422, - "step": 36194 - }, - { - "epoch": 18.876140808344196, - "grad_norm": 1.6558433771133423, - "learning_rate": 6.413869346733668e-05, - "loss": 4.8848, - "step": 36195 - }, - { - "epoch": 18.876662320730116, - "grad_norm": 1.3356479406356812, - "learning_rate": 6.413768844221106e-05, - "loss": 5.3599, - "step": 36196 - }, - { - "epoch": 18.877183833116035, - "grad_norm": 1.4148725271224976, - "learning_rate": 6.413668341708543e-05, - "loss": 5.1478, - "step": 36197 - }, - { - "epoch": 18.877705345501955, - "grad_norm": 1.5477231740951538, - "learning_rate": 6.41356783919598e-05, - "loss": 5.4894, - "step": 36198 - }, - { - "epoch": 18.878226857887874, - "grad_norm": 1.4769586324691772, - "learning_rate": 6.413467336683418e-05, - "loss": 5.5393, - "step": 36199 - }, - { - "epoch": 18.878748370273794, - "grad_norm": 1.5485038757324219, - "learning_rate": 6.413366834170855e-05, - "loss": 5.4837, - "step": 36200 - }, - { - "epoch": 18.879269882659713, - "grad_norm": 1.4943690299987793, - "learning_rate": 6.413266331658292e-05, - "loss": 5.6402, - "step": 36201 - }, - { - "epoch": 18.879791395045633, - "grad_norm": 1.426095724105835, - "learning_rate": 6.413165829145729e-05, - "loss": 5.2068, - "step": 36202 - }, - { - "epoch": 18.880312907431552, - "grad_norm": 1.410951852798462, - "learning_rate": 6.413065326633167e-05, - "loss": 5.4858, - "step": 36203 - }, - { - "epoch": 18.88083441981747, - "grad_norm": 1.4840108156204224, - "learning_rate": 6.412964824120603e-05, - "loss": 5.1904, - "step": 36204 - }, - { - "epoch": 18.88135593220339, - "grad_norm": 1.5329428911209106, - "learning_rate": 6.412864321608041e-05, - "loss": 4.9698, - "step": 36205 - }, - { - "epoch": 18.88187744458931, - "grad_norm": 1.5627620220184326, - "learning_rate": 6.412763819095477e-05, - "loss": 4.9359, - "step": 36206 - }, - { - "epoch": 18.88239895697523, - "grad_norm": 1.5360904932022095, - "learning_rate": 6.412663316582915e-05, - "loss": 4.9533, - "step": 36207 - }, - { - "epoch": 18.882920469361146, - "grad_norm": 1.4812527894973755, - "learning_rate": 6.412562814070351e-05, - "loss": 5.069, - "step": 36208 - }, - { - "epoch": 18.883441981747065, - "grad_norm": 1.6092174053192139, - "learning_rate": 6.412462311557789e-05, - "loss": 4.9909, - "step": 36209 - }, - { - "epoch": 18.883963494132985, - "grad_norm": 1.415519118309021, - "learning_rate": 6.412361809045227e-05, - "loss": 5.6325, - "step": 36210 - }, - { - "epoch": 18.884485006518904, - "grad_norm": 1.4367152452468872, - "learning_rate": 6.412261306532663e-05, - "loss": 5.0477, - "step": 36211 - }, - { - "epoch": 18.885006518904824, - "grad_norm": 1.4186406135559082, - "learning_rate": 6.412160804020101e-05, - "loss": 5.6373, - "step": 36212 - }, - { - "epoch": 18.885528031290743, - "grad_norm": 1.3370003700256348, - "learning_rate": 6.412060301507538e-05, - "loss": 5.0403, - "step": 36213 - }, - { - "epoch": 18.886049543676663, - "grad_norm": 1.3751076459884644, - "learning_rate": 6.411959798994975e-05, - "loss": 5.5715, - "step": 36214 - }, - { - "epoch": 18.886571056062582, - "grad_norm": 1.5535491704940796, - "learning_rate": 6.411859296482412e-05, - "loss": 5.1477, - "step": 36215 - }, - { - "epoch": 18.8870925684485, - "grad_norm": 1.5548862218856812, - "learning_rate": 6.41175879396985e-05, - "loss": 5.4689, - "step": 36216 - }, - { - "epoch": 18.88761408083442, - "grad_norm": 1.6026164293289185, - "learning_rate": 6.411658291457286e-05, - "loss": 4.7455, - "step": 36217 - }, - { - "epoch": 18.88813559322034, - "grad_norm": 1.4965481758117676, - "learning_rate": 6.411557788944724e-05, - "loss": 5.4677, - "step": 36218 - }, - { - "epoch": 18.888657105606256, - "grad_norm": 1.517593502998352, - "learning_rate": 6.411457286432162e-05, - "loss": 5.47, - "step": 36219 - }, - { - "epoch": 18.889178617992176, - "grad_norm": 1.476999044418335, - "learning_rate": 6.4113567839196e-05, - "loss": 5.4333, - "step": 36220 - }, - { - "epoch": 18.889700130378095, - "grad_norm": 1.4837355613708496, - "learning_rate": 6.411256281407036e-05, - "loss": 5.6582, - "step": 36221 - }, - { - "epoch": 18.890221642764015, - "grad_norm": 1.4419198036193848, - "learning_rate": 6.411155778894474e-05, - "loss": 5.5284, - "step": 36222 - }, - { - "epoch": 18.890743155149934, - "grad_norm": 1.5367094278335571, - "learning_rate": 6.41105527638191e-05, - "loss": 4.9759, - "step": 36223 - }, - { - "epoch": 18.891264667535854, - "grad_norm": 1.3293428421020508, - "learning_rate": 6.410954773869346e-05, - "loss": 4.8429, - "step": 36224 - }, - { - "epoch": 18.891786179921773, - "grad_norm": 1.4388452768325806, - "learning_rate": 6.410854271356784e-05, - "loss": 5.3419, - "step": 36225 - }, - { - "epoch": 18.892307692307693, - "grad_norm": 1.537064552307129, - "learning_rate": 6.41075376884422e-05, - "loss": 5.3593, - "step": 36226 - }, - { - "epoch": 18.892829204693612, - "grad_norm": 1.5382177829742432, - "learning_rate": 6.410653266331658e-05, - "loss": 5.3, - "step": 36227 - }, - { - "epoch": 18.89335071707953, - "grad_norm": 1.4849672317504883, - "learning_rate": 6.410552763819095e-05, - "loss": 5.032, - "step": 36228 - }, - { - "epoch": 18.89387222946545, - "grad_norm": 1.4590810537338257, - "learning_rate": 6.410452261306533e-05, - "loss": 5.278, - "step": 36229 - }, - { - "epoch": 18.89439374185137, - "grad_norm": 1.5812088251113892, - "learning_rate": 6.41035175879397e-05, - "loss": 5.8198, - "step": 36230 - }, - { - "epoch": 18.894915254237286, - "grad_norm": 1.4040464162826538, - "learning_rate": 6.410251256281408e-05, - "loss": 4.9985, - "step": 36231 - }, - { - "epoch": 18.895436766623206, - "grad_norm": 1.4660000801086426, - "learning_rate": 6.410150753768845e-05, - "loss": 5.6376, - "step": 36232 - }, - { - "epoch": 18.895958279009125, - "grad_norm": 1.5645114183425903, - "learning_rate": 6.410050251256282e-05, - "loss": 5.267, - "step": 36233 - }, - { - "epoch": 18.896479791395045, - "grad_norm": 1.4926010370254517, - "learning_rate": 6.409949748743719e-05, - "loss": 5.2456, - "step": 36234 - }, - { - "epoch": 18.897001303780964, - "grad_norm": 1.6270456314086914, - "learning_rate": 6.409849246231157e-05, - "loss": 4.8949, - "step": 36235 - }, - { - "epoch": 18.897522816166884, - "grad_norm": 1.4939253330230713, - "learning_rate": 6.409748743718593e-05, - "loss": 4.7594, - "step": 36236 - }, - { - "epoch": 18.898044328552803, - "grad_norm": 1.540641188621521, - "learning_rate": 6.40964824120603e-05, - "loss": 4.6584, - "step": 36237 - }, - { - "epoch": 18.898565840938723, - "grad_norm": 1.4230870008468628, - "learning_rate": 6.409547738693467e-05, - "loss": 5.6161, - "step": 36238 - }, - { - "epoch": 18.899087353324642, - "grad_norm": 1.5657404661178589, - "learning_rate": 6.409447236180904e-05, - "loss": 4.9062, - "step": 36239 - }, - { - "epoch": 18.89960886571056, - "grad_norm": 1.5106126070022583, - "learning_rate": 6.409346733668341e-05, - "loss": 5.5243, - "step": 36240 - }, - { - "epoch": 18.90013037809648, - "grad_norm": 1.3716099262237549, - "learning_rate": 6.409246231155779e-05, - "loss": 5.5547, - "step": 36241 - }, - { - "epoch": 18.9006518904824, - "grad_norm": 1.449914574623108, - "learning_rate": 6.409145728643217e-05, - "loss": 5.3415, - "step": 36242 - }, - { - "epoch": 18.901173402868316, - "grad_norm": 1.4893356561660767, - "learning_rate": 6.409045226130653e-05, - "loss": 5.4049, - "step": 36243 - }, - { - "epoch": 18.901694915254236, - "grad_norm": 1.4949487447738647, - "learning_rate": 6.408944723618091e-05, - "loss": 5.4026, - "step": 36244 - }, - { - "epoch": 18.902216427640155, - "grad_norm": 1.4770277738571167, - "learning_rate": 6.408844221105528e-05, - "loss": 5.0551, - "step": 36245 - }, - { - "epoch": 18.902737940026075, - "grad_norm": 1.5279767513275146, - "learning_rate": 6.408743718592965e-05, - "loss": 5.3989, - "step": 36246 - }, - { - "epoch": 18.903259452411994, - "grad_norm": 1.39360773563385, - "learning_rate": 6.408643216080402e-05, - "loss": 4.9144, - "step": 36247 - }, - { - "epoch": 18.903780964797914, - "grad_norm": 1.6055200099945068, - "learning_rate": 6.40854271356784e-05, - "loss": 4.7782, - "step": 36248 - }, - { - "epoch": 18.904302477183833, - "grad_norm": 1.4155393838882446, - "learning_rate": 6.408442211055276e-05, - "loss": 5.2885, - "step": 36249 - }, - { - "epoch": 18.904823989569753, - "grad_norm": 1.534401774406433, - "learning_rate": 6.408341708542714e-05, - "loss": 5.2177, - "step": 36250 - }, - { - "epoch": 18.905345501955672, - "grad_norm": 1.4786614179611206, - "learning_rate": 6.408241206030152e-05, - "loss": 5.0644, - "step": 36251 - }, - { - "epoch": 18.90586701434159, - "grad_norm": 1.5742942094802856, - "learning_rate": 6.408140703517588e-05, - "loss": 5.2134, - "step": 36252 - }, - { - "epoch": 18.90638852672751, - "grad_norm": 1.5724858045578003, - "learning_rate": 6.408040201005026e-05, - "loss": 5.0606, - "step": 36253 - }, - { - "epoch": 18.90691003911343, - "grad_norm": 1.4771459102630615, - "learning_rate": 6.407939698492462e-05, - "loss": 5.5542, - "step": 36254 - }, - { - "epoch": 18.907431551499347, - "grad_norm": 1.5088170766830444, - "learning_rate": 6.4078391959799e-05, - "loss": 5.0269, - "step": 36255 - }, - { - "epoch": 18.907953063885266, - "grad_norm": 1.5115199089050293, - "learning_rate": 6.407738693467337e-05, - "loss": 5.3521, - "step": 36256 - }, - { - "epoch": 18.908474576271185, - "grad_norm": 1.5129362344741821, - "learning_rate": 6.407638190954774e-05, - "loss": 4.9043, - "step": 36257 - }, - { - "epoch": 18.908996088657105, - "grad_norm": 1.5595279932022095, - "learning_rate": 6.407537688442211e-05, - "loss": 5.4909, - "step": 36258 - }, - { - "epoch": 18.909517601043024, - "grad_norm": 1.4718765020370483, - "learning_rate": 6.407437185929649e-05, - "loss": 5.0456, - "step": 36259 - }, - { - "epoch": 18.910039113428944, - "grad_norm": 1.6125346422195435, - "learning_rate": 6.407336683417085e-05, - "loss": 5.1327, - "step": 36260 - }, - { - "epoch": 18.910560625814863, - "grad_norm": 1.5639050006866455, - "learning_rate": 6.407236180904523e-05, - "loss": 5.3801, - "step": 36261 - }, - { - "epoch": 18.911082138200783, - "grad_norm": 1.6114286184310913, - "learning_rate": 6.40713567839196e-05, - "loss": 4.9951, - "step": 36262 - }, - { - "epoch": 18.911603650586702, - "grad_norm": 1.4626898765563965, - "learning_rate": 6.407035175879398e-05, - "loss": 5.3983, - "step": 36263 - }, - { - "epoch": 18.91212516297262, - "grad_norm": 1.4688057899475098, - "learning_rate": 6.406934673366835e-05, - "loss": 5.3434, - "step": 36264 - }, - { - "epoch": 18.91264667535854, - "grad_norm": 1.415566325187683, - "learning_rate": 6.406834170854271e-05, - "loss": 5.7499, - "step": 36265 - }, - { - "epoch": 18.91316818774446, - "grad_norm": 1.4452229738235474, - "learning_rate": 6.406733668341709e-05, - "loss": 5.4272, - "step": 36266 - }, - { - "epoch": 18.913689700130377, - "grad_norm": 1.5191586017608643, - "learning_rate": 6.406633165829145e-05, - "loss": 5.1583, - "step": 36267 - }, - { - "epoch": 18.914211212516296, - "grad_norm": 1.381970763206482, - "learning_rate": 6.406532663316583e-05, - "loss": 5.4643, - "step": 36268 - }, - { - "epoch": 18.914732724902215, - "grad_norm": 1.5015459060668945, - "learning_rate": 6.40643216080402e-05, - "loss": 5.3138, - "step": 36269 - }, - { - "epoch": 18.915254237288135, - "grad_norm": 1.5229072570800781, - "learning_rate": 6.406331658291457e-05, - "loss": 4.8448, - "step": 36270 - }, - { - "epoch": 18.915775749674054, - "grad_norm": 1.4963020086288452, - "learning_rate": 6.406231155778895e-05, - "loss": 5.2093, - "step": 36271 - }, - { - "epoch": 18.916297262059974, - "grad_norm": 1.4080429077148438, - "learning_rate": 6.406130653266333e-05, - "loss": 5.5149, - "step": 36272 - }, - { - "epoch": 18.916818774445893, - "grad_norm": 1.5292623043060303, - "learning_rate": 6.40603015075377e-05, - "loss": 5.2252, - "step": 36273 - }, - { - "epoch": 18.917340286831813, - "grad_norm": 1.3280973434448242, - "learning_rate": 6.405929648241207e-05, - "loss": 5.8189, - "step": 36274 - }, - { - "epoch": 18.917861799217732, - "grad_norm": 1.5941418409347534, - "learning_rate": 6.405829145728644e-05, - "loss": 4.955, - "step": 36275 - }, - { - "epoch": 18.91838331160365, - "grad_norm": 1.5420382022857666, - "learning_rate": 6.405728643216081e-05, - "loss": 5.2073, - "step": 36276 - }, - { - "epoch": 18.91890482398957, - "grad_norm": 1.4519355297088623, - "learning_rate": 6.405628140703518e-05, - "loss": 5.6307, - "step": 36277 - }, - { - "epoch": 18.919426336375487, - "grad_norm": 1.5104440450668335, - "learning_rate": 6.405527638190954e-05, - "loss": 5.6372, - "step": 36278 - }, - { - "epoch": 18.919947848761407, - "grad_norm": 1.470607042312622, - "learning_rate": 6.405427135678392e-05, - "loss": 5.0959, - "step": 36279 - }, - { - "epoch": 18.920469361147326, - "grad_norm": 1.5599162578582764, - "learning_rate": 6.405326633165828e-05, - "loss": 4.8659, - "step": 36280 - }, - { - "epoch": 18.920990873533245, - "grad_norm": 1.583740234375, - "learning_rate": 6.405226130653266e-05, - "loss": 5.3151, - "step": 36281 - }, - { - "epoch": 18.921512385919165, - "grad_norm": 1.5363425016403198, - "learning_rate": 6.405125628140704e-05, - "loss": 5.0949, - "step": 36282 - }, - { - "epoch": 18.922033898305084, - "grad_norm": 1.3608624935150146, - "learning_rate": 6.405025125628142e-05, - "loss": 5.348, - "step": 36283 - }, - { - "epoch": 18.922555410691004, - "grad_norm": 1.427709937095642, - "learning_rate": 6.404924623115578e-05, - "loss": 5.6384, - "step": 36284 - }, - { - "epoch": 18.923076923076923, - "grad_norm": 1.5290457010269165, - "learning_rate": 6.404824120603016e-05, - "loss": 5.4431, - "step": 36285 - }, - { - "epoch": 18.923598435462843, - "grad_norm": 1.4528067111968994, - "learning_rate": 6.404723618090452e-05, - "loss": 5.2626, - "step": 36286 - }, - { - "epoch": 18.924119947848762, - "grad_norm": 1.4299062490463257, - "learning_rate": 6.40462311557789e-05, - "loss": 5.53, - "step": 36287 - }, - { - "epoch": 18.92464146023468, - "grad_norm": 1.513229489326477, - "learning_rate": 6.404522613065327e-05, - "loss": 5.1313, - "step": 36288 - }, - { - "epoch": 18.9251629726206, - "grad_norm": 1.4293532371520996, - "learning_rate": 6.404422110552764e-05, - "loss": 5.2942, - "step": 36289 - }, - { - "epoch": 18.92568448500652, - "grad_norm": 1.5070669651031494, - "learning_rate": 6.404321608040201e-05, - "loss": 5.2009, - "step": 36290 - }, - { - "epoch": 18.926205997392437, - "grad_norm": 1.3984112739562988, - "learning_rate": 6.404221105527639e-05, - "loss": 5.0371, - "step": 36291 - }, - { - "epoch": 18.926727509778356, - "grad_norm": 1.4432040452957153, - "learning_rate": 6.404120603015076e-05, - "loss": 5.5534, - "step": 36292 - }, - { - "epoch": 18.927249022164276, - "grad_norm": 1.5959495306015015, - "learning_rate": 6.404020100502513e-05, - "loss": 5.3301, - "step": 36293 - }, - { - "epoch": 18.927770534550195, - "grad_norm": 1.4927648305892944, - "learning_rate": 6.40391959798995e-05, - "loss": 4.9522, - "step": 36294 - }, - { - "epoch": 18.928292046936114, - "grad_norm": 1.4653586149215698, - "learning_rate": 6.403819095477387e-05, - "loss": 5.0528, - "step": 36295 - }, - { - "epoch": 18.928813559322034, - "grad_norm": 1.5588667392730713, - "learning_rate": 6.403718592964825e-05, - "loss": 4.6189, - "step": 36296 - }, - { - "epoch": 18.929335071707953, - "grad_norm": 1.532081127166748, - "learning_rate": 6.403618090452261e-05, - "loss": 5.3178, - "step": 36297 - }, - { - "epoch": 18.929856584093873, - "grad_norm": 1.5147581100463867, - "learning_rate": 6.403517587939699e-05, - "loss": 5.1475, - "step": 36298 - }, - { - "epoch": 18.930378096479792, - "grad_norm": 1.6128572225570679, - "learning_rate": 6.403417085427135e-05, - "loss": 5.1585, - "step": 36299 - }, - { - "epoch": 18.93089960886571, - "grad_norm": 1.5722092390060425, - "learning_rate": 6.403316582914573e-05, - "loss": 5.4534, - "step": 36300 - }, - { - "epoch": 18.93142112125163, - "grad_norm": 1.4924544095993042, - "learning_rate": 6.40321608040201e-05, - "loss": 5.381, - "step": 36301 - }, - { - "epoch": 18.931942633637547, - "grad_norm": 1.6154961585998535, - "learning_rate": 6.403115577889447e-05, - "loss": 5.2774, - "step": 36302 - }, - { - "epoch": 18.932464146023467, - "grad_norm": 1.497407078742981, - "learning_rate": 6.403015075376885e-05, - "loss": 5.1194, - "step": 36303 - }, - { - "epoch": 18.932985658409386, - "grad_norm": 1.711442470550537, - "learning_rate": 6.402914572864322e-05, - "loss": 4.4724, - "step": 36304 - }, - { - "epoch": 18.933507170795306, - "grad_norm": 1.5402292013168335, - "learning_rate": 6.40281407035176e-05, - "loss": 5.4043, - "step": 36305 - }, - { - "epoch": 18.934028683181225, - "grad_norm": 1.5562809705734253, - "learning_rate": 6.402713567839196e-05, - "loss": 5.442, - "step": 36306 - }, - { - "epoch": 18.934550195567144, - "grad_norm": 1.5067366361618042, - "learning_rate": 6.402613065326634e-05, - "loss": 5.784, - "step": 36307 - }, - { - "epoch": 18.935071707953064, - "grad_norm": 1.4032869338989258, - "learning_rate": 6.40251256281407e-05, - "loss": 5.679, - "step": 36308 - }, - { - "epoch": 18.935593220338983, - "grad_norm": 1.4953317642211914, - "learning_rate": 6.402412060301508e-05, - "loss": 5.0396, - "step": 36309 - }, - { - "epoch": 18.936114732724903, - "grad_norm": 1.5478482246398926, - "learning_rate": 6.402311557788944e-05, - "loss": 5.0592, - "step": 36310 - }, - { - "epoch": 18.936636245110822, - "grad_norm": 1.5210950374603271, - "learning_rate": 6.402211055276382e-05, - "loss": 4.7872, - "step": 36311 - }, - { - "epoch": 18.937157757496742, - "grad_norm": 1.5244574546813965, - "learning_rate": 6.40211055276382e-05, - "loss": 5.439, - "step": 36312 - }, - { - "epoch": 18.93767926988266, - "grad_norm": 1.5688457489013672, - "learning_rate": 6.402010050251258e-05, - "loss": 5.3549, - "step": 36313 - }, - { - "epoch": 18.938200782268577, - "grad_norm": 1.5115646123886108, - "learning_rate": 6.401909547738694e-05, - "loss": 4.9958, - "step": 36314 - }, - { - "epoch": 18.938722294654497, - "grad_norm": 1.4146430492401123, - "learning_rate": 6.401809045226132e-05, - "loss": 5.4082, - "step": 36315 - }, - { - "epoch": 18.939243807040416, - "grad_norm": 1.4981220960617065, - "learning_rate": 6.401708542713568e-05, - "loss": 5.0569, - "step": 36316 - }, - { - "epoch": 18.939765319426336, - "grad_norm": 1.5089508295059204, - "learning_rate": 6.401608040201005e-05, - "loss": 5.2073, - "step": 36317 - }, - { - "epoch": 18.940286831812255, - "grad_norm": 1.6205538511276245, - "learning_rate": 6.401507537688442e-05, - "loss": 4.5975, - "step": 36318 - }, - { - "epoch": 18.940808344198174, - "grad_norm": 1.595171570777893, - "learning_rate": 6.401407035175879e-05, - "loss": 5.6086, - "step": 36319 - }, - { - "epoch": 18.941329856584094, - "grad_norm": 1.5850378274917603, - "learning_rate": 6.401306532663317e-05, - "loss": 5.3111, - "step": 36320 - }, - { - "epoch": 18.941851368970013, - "grad_norm": 1.5371571779251099, - "learning_rate": 6.401206030150753e-05, - "loss": 5.109, - "step": 36321 - }, - { - "epoch": 18.942372881355933, - "grad_norm": 1.5438822507858276, - "learning_rate": 6.401105527638191e-05, - "loss": 5.2879, - "step": 36322 - }, - { - "epoch": 18.942894393741852, - "grad_norm": 1.548506498336792, - "learning_rate": 6.401005025125629e-05, - "loss": 4.7352, - "step": 36323 - }, - { - "epoch": 18.943415906127772, - "grad_norm": 1.563644289970398, - "learning_rate": 6.400904522613066e-05, - "loss": 4.8368, - "step": 36324 - }, - { - "epoch": 18.94393741851369, - "grad_norm": 1.5550519227981567, - "learning_rate": 6.400804020100503e-05, - "loss": 4.7403, - "step": 36325 - }, - { - "epoch": 18.944458930899607, - "grad_norm": 1.5488699674606323, - "learning_rate": 6.40070351758794e-05, - "loss": 5.0924, - "step": 36326 - }, - { - "epoch": 18.944980443285527, - "grad_norm": 1.5228408575057983, - "learning_rate": 6.400603015075377e-05, - "loss": 4.8859, - "step": 36327 - }, - { - "epoch": 18.945501955671446, - "grad_norm": 1.5239155292510986, - "learning_rate": 6.400502512562815e-05, - "loss": 5.4988, - "step": 36328 - }, - { - "epoch": 18.946023468057366, - "grad_norm": 1.5264171361923218, - "learning_rate": 6.400402010050251e-05, - "loss": 5.5263, - "step": 36329 - }, - { - "epoch": 18.946544980443285, - "grad_norm": 1.4120473861694336, - "learning_rate": 6.400301507537688e-05, - "loss": 5.0121, - "step": 36330 - }, - { - "epoch": 18.947066492829205, - "grad_norm": 1.5149757862091064, - "learning_rate": 6.400201005025126e-05, - "loss": 5.4649, - "step": 36331 - }, - { - "epoch": 18.947588005215124, - "grad_norm": 1.6116588115692139, - "learning_rate": 6.400100502512563e-05, - "loss": 5.4928, - "step": 36332 - }, - { - "epoch": 18.948109517601043, - "grad_norm": 1.4784910678863525, - "learning_rate": 6.400000000000001e-05, - "loss": 5.099, - "step": 36333 - }, - { - "epoch": 18.948631029986963, - "grad_norm": 1.5227853059768677, - "learning_rate": 6.399899497487437e-05, - "loss": 5.1638, - "step": 36334 - }, - { - "epoch": 18.949152542372882, - "grad_norm": 1.5314266681671143, - "learning_rate": 6.399798994974875e-05, - "loss": 4.7931, - "step": 36335 - }, - { - "epoch": 18.949674054758802, - "grad_norm": 1.7029438018798828, - "learning_rate": 6.399698492462312e-05, - "loss": 5.1359, - "step": 36336 - }, - { - "epoch": 18.95019556714472, - "grad_norm": 1.5243538618087769, - "learning_rate": 6.39959798994975e-05, - "loss": 5.3436, - "step": 36337 - }, - { - "epoch": 18.950717079530637, - "grad_norm": 1.4239857196807861, - "learning_rate": 6.399497487437186e-05, - "loss": 5.6126, - "step": 36338 - }, - { - "epoch": 18.951238591916557, - "grad_norm": 1.542472004890442, - "learning_rate": 6.399396984924624e-05, - "loss": 5.3236, - "step": 36339 - }, - { - "epoch": 18.951760104302476, - "grad_norm": 1.4876352548599243, - "learning_rate": 6.39929648241206e-05, - "loss": 5.3279, - "step": 36340 - }, - { - "epoch": 18.952281616688396, - "grad_norm": 1.4652842283248901, - "learning_rate": 6.399195979899498e-05, - "loss": 4.48, - "step": 36341 - }, - { - "epoch": 18.952803129074315, - "grad_norm": 1.3962911367416382, - "learning_rate": 6.399095477386934e-05, - "loss": 5.5819, - "step": 36342 - }, - { - "epoch": 18.953324641460235, - "grad_norm": 1.4073245525360107, - "learning_rate": 6.398994974874372e-05, - "loss": 5.2623, - "step": 36343 - }, - { - "epoch": 18.953846153846154, - "grad_norm": 1.5652891397476196, - "learning_rate": 6.39889447236181e-05, - "loss": 4.485, - "step": 36344 - }, - { - "epoch": 18.954367666232073, - "grad_norm": 1.494625210762024, - "learning_rate": 6.398793969849246e-05, - "loss": 4.5897, - "step": 36345 - }, - { - "epoch": 18.954889178617993, - "grad_norm": 1.5235084295272827, - "learning_rate": 6.398693467336684e-05, - "loss": 5.4312, - "step": 36346 - }, - { - "epoch": 18.955410691003912, - "grad_norm": 1.5086491107940674, - "learning_rate": 6.39859296482412e-05, - "loss": 5.3733, - "step": 36347 - }, - { - "epoch": 18.955932203389832, - "grad_norm": 1.399393081665039, - "learning_rate": 6.398492462311558e-05, - "loss": 5.6341, - "step": 36348 - }, - { - "epoch": 18.95645371577575, - "grad_norm": 1.6006807088851929, - "learning_rate": 6.398391959798995e-05, - "loss": 5.485, - "step": 36349 - }, - { - "epoch": 18.956975228161667, - "grad_norm": 1.5168216228485107, - "learning_rate": 6.398291457286433e-05, - "loss": 5.2761, - "step": 36350 - }, - { - "epoch": 18.957496740547587, - "grad_norm": 1.352529764175415, - "learning_rate": 6.398190954773869e-05, - "loss": 5.6392, - "step": 36351 - }, - { - "epoch": 18.958018252933506, - "grad_norm": 1.7657673358917236, - "learning_rate": 6.398090452261307e-05, - "loss": 4.7742, - "step": 36352 - }, - { - "epoch": 18.958539765319426, - "grad_norm": 1.475597858428955, - "learning_rate": 6.397989949748745e-05, - "loss": 5.2816, - "step": 36353 - }, - { - "epoch": 18.959061277705345, - "grad_norm": 1.504118800163269, - "learning_rate": 6.397889447236182e-05, - "loss": 5.5358, - "step": 36354 - }, - { - "epoch": 18.959582790091265, - "grad_norm": 1.5021814107894897, - "learning_rate": 6.397788944723619e-05, - "loss": 5.0309, - "step": 36355 - }, - { - "epoch": 18.960104302477184, - "grad_norm": 1.5989235639572144, - "learning_rate": 6.397688442211057e-05, - "loss": 5.4913, - "step": 36356 - }, - { - "epoch": 18.960625814863103, - "grad_norm": 1.4271684885025024, - "learning_rate": 6.397587939698493e-05, - "loss": 5.6721, - "step": 36357 - }, - { - "epoch": 18.961147327249023, - "grad_norm": 1.6072583198547363, - "learning_rate": 6.39748743718593e-05, - "loss": 5.0589, - "step": 36358 - }, - { - "epoch": 18.961668839634942, - "grad_norm": 1.5302656888961792, - "learning_rate": 6.397386934673367e-05, - "loss": 5.143, - "step": 36359 - }, - { - "epoch": 18.962190352020862, - "grad_norm": 1.420785665512085, - "learning_rate": 6.397286432160804e-05, - "loss": 5.3878, - "step": 36360 - }, - { - "epoch": 18.96271186440678, - "grad_norm": 1.4502924680709839, - "learning_rate": 6.397185929648241e-05, - "loss": 5.2445, - "step": 36361 - }, - { - "epoch": 18.963233376792697, - "grad_norm": 1.5360506772994995, - "learning_rate": 6.397085427135678e-05, - "loss": 5.0256, - "step": 36362 - }, - { - "epoch": 18.963754889178617, - "grad_norm": 1.5216636657714844, - "learning_rate": 6.396984924623116e-05, - "loss": 5.621, - "step": 36363 - }, - { - "epoch": 18.964276401564536, - "grad_norm": 1.5981419086456299, - "learning_rate": 6.396884422110553e-05, - "loss": 5.2899, - "step": 36364 - }, - { - "epoch": 18.964797913950456, - "grad_norm": 1.4571863412857056, - "learning_rate": 6.396783919597991e-05, - "loss": 5.0993, - "step": 36365 - }, - { - "epoch": 18.965319426336375, - "grad_norm": 1.5443283319473267, - "learning_rate": 6.396683417085428e-05, - "loss": 5.4587, - "step": 36366 - }, - { - "epoch": 18.965840938722295, - "grad_norm": 1.5857517719268799, - "learning_rate": 6.396582914572865e-05, - "loss": 4.9785, - "step": 36367 - }, - { - "epoch": 18.966362451108214, - "grad_norm": 1.5439568758010864, - "learning_rate": 6.396482412060302e-05, - "loss": 5.2066, - "step": 36368 - }, - { - "epoch": 18.966883963494134, - "grad_norm": 1.5963279008865356, - "learning_rate": 6.39638190954774e-05, - "loss": 5.2824, - "step": 36369 - }, - { - "epoch": 18.967405475880053, - "grad_norm": 1.5480585098266602, - "learning_rate": 6.396281407035176e-05, - "loss": 5.5399, - "step": 36370 - }, - { - "epoch": 18.967926988265972, - "grad_norm": 1.6044108867645264, - "learning_rate": 6.396180904522612e-05, - "loss": 4.8455, - "step": 36371 - }, - { - "epoch": 18.968448500651892, - "grad_norm": 1.6003870964050293, - "learning_rate": 6.39608040201005e-05, - "loss": 4.9205, - "step": 36372 - }, - { - "epoch": 18.96897001303781, - "grad_norm": 1.481105923652649, - "learning_rate": 6.395979899497488e-05, - "loss": 5.0724, - "step": 36373 - }, - { - "epoch": 18.969491525423727, - "grad_norm": 1.4993730783462524, - "learning_rate": 6.395879396984926e-05, - "loss": 5.4487, - "step": 36374 - }, - { - "epoch": 18.970013037809647, - "grad_norm": 1.443029761314392, - "learning_rate": 6.395778894472362e-05, - "loss": 5.1632, - "step": 36375 - }, - { - "epoch": 18.970534550195566, - "grad_norm": 1.5736000537872314, - "learning_rate": 6.3956783919598e-05, - "loss": 5.199, - "step": 36376 - }, - { - "epoch": 18.971056062581486, - "grad_norm": 1.5692921876907349, - "learning_rate": 6.395577889447236e-05, - "loss": 4.8155, - "step": 36377 - }, - { - "epoch": 18.971577574967405, - "grad_norm": 1.5377954244613647, - "learning_rate": 6.395477386934674e-05, - "loss": 5.1676, - "step": 36378 - }, - { - "epoch": 18.972099087353325, - "grad_norm": 1.523167371749878, - "learning_rate": 6.39537688442211e-05, - "loss": 5.5383, - "step": 36379 - }, - { - "epoch": 18.972620599739244, - "grad_norm": 1.5092257261276245, - "learning_rate": 6.395276381909548e-05, - "loss": 5.3072, - "step": 36380 - }, - { - "epoch": 18.973142112125164, - "grad_norm": 1.4799182415008545, - "learning_rate": 6.395175879396985e-05, - "loss": 5.1202, - "step": 36381 - }, - { - "epoch": 18.973663624511083, - "grad_norm": 1.4440547227859497, - "learning_rate": 6.395075376884423e-05, - "loss": 5.2105, - "step": 36382 - }, - { - "epoch": 18.974185136897002, - "grad_norm": 1.565718173980713, - "learning_rate": 6.394974874371859e-05, - "loss": 5.1607, - "step": 36383 - }, - { - "epoch": 18.974706649282922, - "grad_norm": 1.5154064893722534, - "learning_rate": 6.394874371859297e-05, - "loss": 5.1281, - "step": 36384 - }, - { - "epoch": 18.975228161668838, - "grad_norm": 1.5831751823425293, - "learning_rate": 6.394773869346735e-05, - "loss": 5.3439, - "step": 36385 - }, - { - "epoch": 18.975749674054757, - "grad_norm": 1.510176658630371, - "learning_rate": 6.394673366834171e-05, - "loss": 5.4103, - "step": 36386 - }, - { - "epoch": 18.976271186440677, - "grad_norm": 1.4515647888183594, - "learning_rate": 6.394572864321609e-05, - "loss": 5.6268, - "step": 36387 - }, - { - "epoch": 18.976792698826596, - "grad_norm": 1.393825888633728, - "learning_rate": 6.394472361809045e-05, - "loss": 5.5522, - "step": 36388 - }, - { - "epoch": 18.977314211212516, - "grad_norm": 1.4746861457824707, - "learning_rate": 6.394371859296483e-05, - "loss": 5.3951, - "step": 36389 - }, - { - "epoch": 18.977835723598435, - "grad_norm": 1.5197993516921997, - "learning_rate": 6.39427135678392e-05, - "loss": 5.0111, - "step": 36390 - }, - { - "epoch": 18.978357235984355, - "grad_norm": 1.5499286651611328, - "learning_rate": 6.394170854271357e-05, - "loss": 5.2341, - "step": 36391 - }, - { - "epoch": 18.978878748370274, - "grad_norm": 1.4168745279312134, - "learning_rate": 6.394070351758794e-05, - "loss": 5.3837, - "step": 36392 - }, - { - "epoch": 18.979400260756194, - "grad_norm": 1.4090020656585693, - "learning_rate": 6.393969849246231e-05, - "loss": 5.3674, - "step": 36393 - }, - { - "epoch": 18.979921773142113, - "grad_norm": 1.5054775476455688, - "learning_rate": 6.393869346733669e-05, - "loss": 5.3589, - "step": 36394 - }, - { - "epoch": 18.980443285528033, - "grad_norm": 1.536129117012024, - "learning_rate": 6.393768844221107e-05, - "loss": 5.1643, - "step": 36395 - }, - { - "epoch": 18.980964797913952, - "grad_norm": 1.5385679006576538, - "learning_rate": 6.393668341708543e-05, - "loss": 4.9146, - "step": 36396 - }, - { - "epoch": 18.98148631029987, - "grad_norm": 1.4788508415222168, - "learning_rate": 6.39356783919598e-05, - "loss": 5.2362, - "step": 36397 - }, - { - "epoch": 18.982007822685787, - "grad_norm": 1.4977564811706543, - "learning_rate": 6.393467336683418e-05, - "loss": 5.3996, - "step": 36398 - }, - { - "epoch": 18.982529335071707, - "grad_norm": 1.4907888174057007, - "learning_rate": 6.393366834170854e-05, - "loss": 5.5442, - "step": 36399 - }, - { - "epoch": 18.983050847457626, - "grad_norm": 1.4022825956344604, - "learning_rate": 6.393266331658292e-05, - "loss": 5.51, - "step": 36400 - }, - { - "epoch": 18.983572359843546, - "grad_norm": 1.5182963609695435, - "learning_rate": 6.393165829145728e-05, - "loss": 5.1229, - "step": 36401 - }, - { - "epoch": 18.984093872229465, - "grad_norm": 1.5249119997024536, - "learning_rate": 6.393065326633166e-05, - "loss": 5.1968, - "step": 36402 - }, - { - "epoch": 18.984615384615385, - "grad_norm": 1.4615013599395752, - "learning_rate": 6.392964824120602e-05, - "loss": 5.2629, - "step": 36403 - }, - { - "epoch": 18.985136897001304, - "grad_norm": 1.473999261856079, - "learning_rate": 6.39286432160804e-05, - "loss": 5.4992, - "step": 36404 - }, - { - "epoch": 18.985658409387224, - "grad_norm": 1.4354653358459473, - "learning_rate": 6.392763819095478e-05, - "loss": 4.9285, - "step": 36405 - }, - { - "epoch": 18.986179921773143, - "grad_norm": 1.6344817876815796, - "learning_rate": 6.392663316582916e-05, - "loss": 5.1617, - "step": 36406 - }, - { - "epoch": 18.986701434159063, - "grad_norm": 1.5544605255126953, - "learning_rate": 6.392562814070352e-05, - "loss": 5.482, - "step": 36407 - }, - { - "epoch": 18.987222946544982, - "grad_norm": 1.4644179344177246, - "learning_rate": 6.39246231155779e-05, - "loss": 5.5054, - "step": 36408 - }, - { - "epoch": 18.987744458930898, - "grad_norm": 1.363000512123108, - "learning_rate": 6.392361809045226e-05, - "loss": 5.1858, - "step": 36409 - }, - { - "epoch": 18.988265971316817, - "grad_norm": 1.4165173768997192, - "learning_rate": 6.392261306532663e-05, - "loss": 5.2271, - "step": 36410 - }, - { - "epoch": 18.988787483702737, - "grad_norm": 1.5173660516738892, - "learning_rate": 6.392160804020101e-05, - "loss": 5.3691, - "step": 36411 - }, - { - "epoch": 18.989308996088656, - "grad_norm": 1.3722710609436035, - "learning_rate": 6.392060301507537e-05, - "loss": 5.5191, - "step": 36412 - }, - { - "epoch": 18.989830508474576, - "grad_norm": 1.490736484527588, - "learning_rate": 6.391959798994975e-05, - "loss": 5.502, - "step": 36413 - }, - { - "epoch": 18.990352020860495, - "grad_norm": 1.508996605873108, - "learning_rate": 6.391859296482411e-05, - "loss": 5.1428, - "step": 36414 - }, - { - "epoch": 18.990873533246415, - "grad_norm": 1.5832735300064087, - "learning_rate": 6.391758793969849e-05, - "loss": 5.3106, - "step": 36415 - }, - { - "epoch": 18.991395045632334, - "grad_norm": 1.6087205410003662, - "learning_rate": 6.391658291457287e-05, - "loss": 5.3761, - "step": 36416 - }, - { - "epoch": 18.991916558018254, - "grad_norm": 1.5368000268936157, - "learning_rate": 6.391557788944725e-05, - "loss": 5.506, - "step": 36417 - }, - { - "epoch": 18.992438070404173, - "grad_norm": 1.5611159801483154, - "learning_rate": 6.391457286432161e-05, - "loss": 5.2054, - "step": 36418 - }, - { - "epoch": 18.992959582790093, - "grad_norm": 1.4925205707550049, - "learning_rate": 6.391356783919599e-05, - "loss": 5.2638, - "step": 36419 - }, - { - "epoch": 18.993481095176012, - "grad_norm": 1.4701879024505615, - "learning_rate": 6.391256281407035e-05, - "loss": 5.7408, - "step": 36420 - }, - { - "epoch": 18.994002607561928, - "grad_norm": 1.449507474899292, - "learning_rate": 6.391155778894473e-05, - "loss": 5.7025, - "step": 36421 - }, - { - "epoch": 18.994524119947847, - "grad_norm": 1.5352505445480347, - "learning_rate": 6.39105527638191e-05, - "loss": 5.4507, - "step": 36422 - }, - { - "epoch": 18.995045632333767, - "grad_norm": 1.6228381395339966, - "learning_rate": 6.390954773869346e-05, - "loss": 5.0959, - "step": 36423 - }, - { - "epoch": 18.995567144719686, - "grad_norm": 1.5831514596939087, - "learning_rate": 6.390854271356784e-05, - "loss": 5.3233, - "step": 36424 - }, - { - "epoch": 18.996088657105606, - "grad_norm": 1.456286907196045, - "learning_rate": 6.390753768844222e-05, - "loss": 5.5739, - "step": 36425 - }, - { - "epoch": 18.996610169491525, - "grad_norm": 1.5418107509613037, - "learning_rate": 6.390653266331659e-05, - "loss": 5.4024, - "step": 36426 - }, - { - "epoch": 18.997131681877445, - "grad_norm": 1.3206504583358765, - "learning_rate": 6.390552763819096e-05, - "loss": 5.5905, - "step": 36427 - }, - { - "epoch": 18.997653194263364, - "grad_norm": 1.5055292844772339, - "learning_rate": 6.390452261306534e-05, - "loss": 5.0207, - "step": 36428 - }, - { - "epoch": 18.998174706649284, - "grad_norm": 1.3307043313980103, - "learning_rate": 6.39035175879397e-05, - "loss": 4.848, - "step": 36429 - }, - { - "epoch": 18.998696219035203, - "grad_norm": 1.4130092859268188, - "learning_rate": 6.390251256281408e-05, - "loss": 5.1887, - "step": 36430 - }, - { - "epoch": 18.999217731421123, - "grad_norm": 1.5378360748291016, - "learning_rate": 6.390150753768844e-05, - "loss": 5.5752, - "step": 36431 - }, - { - "epoch": 18.999739243807042, - "grad_norm": 1.5153274536132812, - "learning_rate": 6.390050251256282e-05, - "loss": 5.5561, - "step": 36432 - }, - { - "epoch": 19.000260756192958, - "grad_norm": 1.4498170614242554, - "learning_rate": 6.389949748743718e-05, - "loss": 5.812, - "step": 36433 - }, - { - "epoch": 19.000782268578877, - "grad_norm": 1.4657938480377197, - "learning_rate": 6.389849246231156e-05, - "loss": 5.3594, - "step": 36434 - }, - { - "epoch": 19.001303780964797, - "grad_norm": 1.5366445779800415, - "learning_rate": 6.389748743718593e-05, - "loss": 5.3402, - "step": 36435 - }, - { - "epoch": 19.001825293350716, - "grad_norm": 1.6665617227554321, - "learning_rate": 6.38964824120603e-05, - "loss": 5.2992, - "step": 36436 - }, - { - "epoch": 19.002346805736636, - "grad_norm": 1.477283000946045, - "learning_rate": 6.389547738693468e-05, - "loss": 5.2712, - "step": 36437 - }, - { - "epoch": 19.002868318122555, - "grad_norm": 1.4257358312606812, - "learning_rate": 6.389447236180905e-05, - "loss": 5.6476, - "step": 36438 - }, - { - "epoch": 19.003389830508475, - "grad_norm": 1.5418530702590942, - "learning_rate": 6.389346733668342e-05, - "loss": 5.3388, - "step": 36439 - }, - { - "epoch": 19.003911342894394, - "grad_norm": 1.3973771333694458, - "learning_rate": 6.389246231155779e-05, - "loss": 4.8605, - "step": 36440 - }, - { - "epoch": 19.004432855280314, - "grad_norm": 1.510692834854126, - "learning_rate": 6.389145728643217e-05, - "loss": 5.2427, - "step": 36441 - }, - { - "epoch": 19.004954367666233, - "grad_norm": 1.4329012632369995, - "learning_rate": 6.389045226130653e-05, - "loss": 5.4912, - "step": 36442 - }, - { - "epoch": 19.005475880052153, - "grad_norm": 1.3920092582702637, - "learning_rate": 6.388944723618091e-05, - "loss": 5.2158, - "step": 36443 - }, - { - "epoch": 19.005997392438072, - "grad_norm": 1.531011700630188, - "learning_rate": 6.388844221105527e-05, - "loss": 5.5185, - "step": 36444 - }, - { - "epoch": 19.006518904823988, - "grad_norm": 1.7794307470321655, - "learning_rate": 6.388743718592965e-05, - "loss": 4.9228, - "step": 36445 - }, - { - "epoch": 19.007040417209907, - "grad_norm": 1.6493555307388306, - "learning_rate": 6.388643216080403e-05, - "loss": 4.8674, - "step": 36446 - }, - { - "epoch": 19.007561929595827, - "grad_norm": 1.436018943786621, - "learning_rate": 6.38854271356784e-05, - "loss": 5.1412, - "step": 36447 - }, - { - "epoch": 19.008083441981746, - "grad_norm": 1.4513887166976929, - "learning_rate": 6.388442211055277e-05, - "loss": 5.5302, - "step": 36448 - }, - { - "epoch": 19.008604954367666, - "grad_norm": 1.4046200513839722, - "learning_rate": 6.388341708542715e-05, - "loss": 5.5264, - "step": 36449 - }, - { - "epoch": 19.009126466753585, - "grad_norm": 1.4527695178985596, - "learning_rate": 6.388241206030151e-05, - "loss": 4.807, - "step": 36450 - }, - { - "epoch": 19.009647979139505, - "grad_norm": 1.4804778099060059, - "learning_rate": 6.388140703517588e-05, - "loss": 5.2941, - "step": 36451 - }, - { - "epoch": 19.010169491525424, - "grad_norm": 1.4075803756713867, - "learning_rate": 6.388040201005025e-05, - "loss": 5.3264, - "step": 36452 - }, - { - "epoch": 19.010691003911344, - "grad_norm": 1.472478985786438, - "learning_rate": 6.387939698492462e-05, - "loss": 5.326, - "step": 36453 - }, - { - "epoch": 19.011212516297263, - "grad_norm": 1.424439787864685, - "learning_rate": 6.3878391959799e-05, - "loss": 5.3651, - "step": 36454 - }, - { - "epoch": 19.011734028683183, - "grad_norm": 1.4651509523391724, - "learning_rate": 6.387738693467336e-05, - "loss": 4.9113, - "step": 36455 - }, - { - "epoch": 19.012255541069102, - "grad_norm": 1.5615006685256958, - "learning_rate": 6.387638190954774e-05, - "loss": 5.1468, - "step": 36456 - }, - { - "epoch": 19.012777053455018, - "grad_norm": 1.4683409929275513, - "learning_rate": 6.387537688442212e-05, - "loss": 5.4286, - "step": 36457 - }, - { - "epoch": 19.013298565840937, - "grad_norm": 1.4806933403015137, - "learning_rate": 6.38743718592965e-05, - "loss": 5.2872, - "step": 36458 - }, - { - "epoch": 19.013820078226857, - "grad_norm": 1.400978922843933, - "learning_rate": 6.387336683417086e-05, - "loss": 5.6631, - "step": 36459 - }, - { - "epoch": 19.014341590612776, - "grad_norm": 1.4781547784805298, - "learning_rate": 6.387236180904524e-05, - "loss": 5.3451, - "step": 36460 - }, - { - "epoch": 19.014863102998696, - "grad_norm": 1.4590935707092285, - "learning_rate": 6.38713567839196e-05, - "loss": 5.3484, - "step": 36461 - }, - { - "epoch": 19.015384615384615, - "grad_norm": 1.5076823234558105, - "learning_rate": 6.387035175879398e-05, - "loss": 5.279, - "step": 36462 - }, - { - "epoch": 19.015906127770535, - "grad_norm": 1.5022331476211548, - "learning_rate": 6.386934673366834e-05, - "loss": 4.1949, - "step": 36463 - }, - { - "epoch": 19.016427640156454, - "grad_norm": 1.5384430885314941, - "learning_rate": 6.38683417085427e-05, - "loss": 5.0968, - "step": 36464 - }, - { - "epoch": 19.016949152542374, - "grad_norm": 1.5601495504379272, - "learning_rate": 6.386733668341708e-05, - "loss": 5.4517, - "step": 36465 - }, - { - "epoch": 19.017470664928293, - "grad_norm": 1.5081920623779297, - "learning_rate": 6.386633165829146e-05, - "loss": 5.4368, - "step": 36466 - }, - { - "epoch": 19.017992177314213, - "grad_norm": 1.5114338397979736, - "learning_rate": 6.386532663316584e-05, - "loss": 5.4837, - "step": 36467 - }, - { - "epoch": 19.018513689700132, - "grad_norm": 1.4646339416503906, - "learning_rate": 6.38643216080402e-05, - "loss": 5.4553, - "step": 36468 - }, - { - "epoch": 19.019035202086048, - "grad_norm": 1.6463786363601685, - "learning_rate": 6.386331658291458e-05, - "loss": 5.0925, - "step": 36469 - }, - { - "epoch": 19.019556714471967, - "grad_norm": 1.493266224861145, - "learning_rate": 6.386231155778895e-05, - "loss": 5.1596, - "step": 36470 - }, - { - "epoch": 19.020078226857887, - "grad_norm": 1.5242294073104858, - "learning_rate": 6.386130653266332e-05, - "loss": 5.4097, - "step": 36471 - }, - { - "epoch": 19.020599739243806, - "grad_norm": 1.5348807573318481, - "learning_rate": 6.386030150753769e-05, - "loss": 5.0576, - "step": 36472 - }, - { - "epoch": 19.021121251629726, - "grad_norm": 1.5065150260925293, - "learning_rate": 6.385929648241207e-05, - "loss": 5.7928, - "step": 36473 - }, - { - "epoch": 19.021642764015645, - "grad_norm": 1.395912766456604, - "learning_rate": 6.385829145728643e-05, - "loss": 5.5437, - "step": 36474 - }, - { - "epoch": 19.022164276401565, - "grad_norm": 1.4397414922714233, - "learning_rate": 6.385728643216081e-05, - "loss": 5.5168, - "step": 36475 - }, - { - "epoch": 19.022685788787484, - "grad_norm": 1.4051600694656372, - "learning_rate": 6.385628140703517e-05, - "loss": 4.7824, - "step": 36476 - }, - { - "epoch": 19.023207301173404, - "grad_norm": 1.4835326671600342, - "learning_rate": 6.385527638190955e-05, - "loss": 5.6432, - "step": 36477 - }, - { - "epoch": 19.023728813559323, - "grad_norm": 1.5000804662704468, - "learning_rate": 6.385427135678393e-05, - "loss": 5.496, - "step": 36478 - }, - { - "epoch": 19.024250325945243, - "grad_norm": 1.5501950979232788, - "learning_rate": 6.385326633165829e-05, - "loss": 5.4527, - "step": 36479 - }, - { - "epoch": 19.02477183833116, - "grad_norm": 1.4072762727737427, - "learning_rate": 6.385226130653267e-05, - "loss": 5.6153, - "step": 36480 - }, - { - "epoch": 19.025293350717078, - "grad_norm": 1.399797797203064, - "learning_rate": 6.385125628140703e-05, - "loss": 5.4149, - "step": 36481 - }, - { - "epoch": 19.025814863102998, - "grad_norm": 1.6464295387268066, - "learning_rate": 6.385025125628141e-05, - "loss": 4.8524, - "step": 36482 - }, - { - "epoch": 19.026336375488917, - "grad_norm": 1.603816032409668, - "learning_rate": 6.384924623115578e-05, - "loss": 4.9213, - "step": 36483 - }, - { - "epoch": 19.026857887874836, - "grad_norm": 1.501155138015747, - "learning_rate": 6.384824120603015e-05, - "loss": 5.3296, - "step": 36484 - }, - { - "epoch": 19.027379400260756, - "grad_norm": 1.4615312814712524, - "learning_rate": 6.384723618090452e-05, - "loss": 4.9667, - "step": 36485 - }, - { - "epoch": 19.027900912646675, - "grad_norm": 1.5007359981536865, - "learning_rate": 6.38462311557789e-05, - "loss": 5.3927, - "step": 36486 - }, - { - "epoch": 19.028422425032595, - "grad_norm": 1.4750616550445557, - "learning_rate": 6.384522613065327e-05, - "loss": 5.2445, - "step": 36487 - }, - { - "epoch": 19.028943937418514, - "grad_norm": 1.6725836992263794, - "learning_rate": 6.384422110552765e-05, - "loss": 4.7844, - "step": 36488 - }, - { - "epoch": 19.029465449804434, - "grad_norm": 1.4435590505599976, - "learning_rate": 6.384321608040202e-05, - "loss": 5.5217, - "step": 36489 - }, - { - "epoch": 19.029986962190353, - "grad_norm": 1.4350696802139282, - "learning_rate": 6.384221105527638e-05, - "loss": 5.4831, - "step": 36490 - }, - { - "epoch": 19.030508474576273, - "grad_norm": 1.4217041730880737, - "learning_rate": 6.384120603015076e-05, - "loss": 5.4418, - "step": 36491 - }, - { - "epoch": 19.03102998696219, - "grad_norm": 1.4142299890518188, - "learning_rate": 6.384020100502512e-05, - "loss": 5.796, - "step": 36492 - }, - { - "epoch": 19.031551499348108, - "grad_norm": 1.5102044343948364, - "learning_rate": 6.38391959798995e-05, - "loss": 5.1056, - "step": 36493 - }, - { - "epoch": 19.032073011734028, - "grad_norm": 1.5957304239273071, - "learning_rate": 6.383819095477387e-05, - "loss": 5.441, - "step": 36494 - }, - { - "epoch": 19.032594524119947, - "grad_norm": 1.422868013381958, - "learning_rate": 6.383718592964824e-05, - "loss": 4.9152, - "step": 36495 - }, - { - "epoch": 19.033116036505866, - "grad_norm": 1.4956989288330078, - "learning_rate": 6.383618090452261e-05, - "loss": 5.2306, - "step": 36496 - }, - { - "epoch": 19.033637548891786, - "grad_norm": 1.5107190608978271, - "learning_rate": 6.383517587939699e-05, - "loss": 5.4643, - "step": 36497 - }, - { - "epoch": 19.034159061277705, - "grad_norm": 1.5417370796203613, - "learning_rate": 6.383417085427136e-05, - "loss": 5.1927, - "step": 36498 - }, - { - "epoch": 19.034680573663625, - "grad_norm": 1.391204833984375, - "learning_rate": 6.383316582914574e-05, - "loss": 5.282, - "step": 36499 - }, - { - "epoch": 19.035202086049544, - "grad_norm": 1.5229167938232422, - "learning_rate": 6.38321608040201e-05, - "loss": 5.4241, - "step": 36500 - }, - { - "epoch": 19.035723598435464, - "grad_norm": 1.5110135078430176, - "learning_rate": 6.383115577889448e-05, - "loss": 4.9811, - "step": 36501 - }, - { - "epoch": 19.036245110821383, - "grad_norm": 1.5072760581970215, - "learning_rate": 6.383015075376885e-05, - "loss": 5.1421, - "step": 36502 - }, - { - "epoch": 19.036766623207303, - "grad_norm": 1.499022364616394, - "learning_rate": 6.382914572864321e-05, - "loss": 5.4395, - "step": 36503 - }, - { - "epoch": 19.03728813559322, - "grad_norm": 1.4008692502975464, - "learning_rate": 6.382814070351759e-05, - "loss": 5.4162, - "step": 36504 - }, - { - "epoch": 19.037809647979138, - "grad_norm": 1.6091115474700928, - "learning_rate": 6.382713567839195e-05, - "loss": 5.0299, - "step": 36505 - }, - { - "epoch": 19.038331160365058, - "grad_norm": 1.591385841369629, - "learning_rate": 6.382613065326633e-05, - "loss": 5.0674, - "step": 36506 - }, - { - "epoch": 19.038852672750977, - "grad_norm": 1.4665042161941528, - "learning_rate": 6.382512562814071e-05, - "loss": 5.3333, - "step": 36507 - }, - { - "epoch": 19.039374185136897, - "grad_norm": 1.5577585697174072, - "learning_rate": 6.382412060301509e-05, - "loss": 5.5395, - "step": 36508 - }, - { - "epoch": 19.039895697522816, - "grad_norm": 1.5146288871765137, - "learning_rate": 6.382311557788945e-05, - "loss": 5.5293, - "step": 36509 - }, - { - "epoch": 19.040417209908735, - "grad_norm": 1.5342308282852173, - "learning_rate": 6.382211055276383e-05, - "loss": 5.0126, - "step": 36510 - }, - { - "epoch": 19.040938722294655, - "grad_norm": 1.4932032823562622, - "learning_rate": 6.38211055276382e-05, - "loss": 4.8517, - "step": 36511 - }, - { - "epoch": 19.041460234680574, - "grad_norm": 1.514173150062561, - "learning_rate": 6.382010050251257e-05, - "loss": 5.2624, - "step": 36512 - }, - { - "epoch": 19.041981747066494, - "grad_norm": 1.4733203649520874, - "learning_rate": 6.381909547738694e-05, - "loss": 5.6135, - "step": 36513 - }, - { - "epoch": 19.042503259452413, - "grad_norm": 1.5975241661071777, - "learning_rate": 6.381809045226131e-05, - "loss": 5.2755, - "step": 36514 - }, - { - "epoch": 19.043024771838333, - "grad_norm": 1.4733407497406006, - "learning_rate": 6.381708542713568e-05, - "loss": 5.4039, - "step": 36515 - }, - { - "epoch": 19.04354628422425, - "grad_norm": 1.4654357433319092, - "learning_rate": 6.381608040201004e-05, - "loss": 5.4956, - "step": 36516 - }, - { - "epoch": 19.044067796610168, - "grad_norm": 1.487318515777588, - "learning_rate": 6.381507537688442e-05, - "loss": 5.6068, - "step": 36517 - }, - { - "epoch": 19.044589308996088, - "grad_norm": 1.5200185775756836, - "learning_rate": 6.38140703517588e-05, - "loss": 5.5286, - "step": 36518 - }, - { - "epoch": 19.045110821382007, - "grad_norm": 1.5736103057861328, - "learning_rate": 6.381306532663318e-05, - "loss": 5.3334, - "step": 36519 - }, - { - "epoch": 19.045632333767927, - "grad_norm": 1.4616059064865112, - "learning_rate": 6.381206030150754e-05, - "loss": 5.4751, - "step": 36520 - }, - { - "epoch": 19.046153846153846, - "grad_norm": 1.6072853803634644, - "learning_rate": 6.381105527638192e-05, - "loss": 5.0529, - "step": 36521 - }, - { - "epoch": 19.046675358539765, - "grad_norm": 1.6348141431808472, - "learning_rate": 6.381005025125628e-05, - "loss": 5.2557, - "step": 36522 - }, - { - "epoch": 19.047196870925685, - "grad_norm": 1.4991405010223389, - "learning_rate": 6.380904522613066e-05, - "loss": 5.6913, - "step": 36523 - }, - { - "epoch": 19.047718383311604, - "grad_norm": 1.5230505466461182, - "learning_rate": 6.380804020100502e-05, - "loss": 5.4666, - "step": 36524 - }, - { - "epoch": 19.048239895697524, - "grad_norm": 1.5461788177490234, - "learning_rate": 6.38070351758794e-05, - "loss": 5.4028, - "step": 36525 - }, - { - "epoch": 19.048761408083443, - "grad_norm": 1.4686331748962402, - "learning_rate": 6.380603015075377e-05, - "loss": 5.2241, - "step": 36526 - }, - { - "epoch": 19.049282920469363, - "grad_norm": 1.5382760763168335, - "learning_rate": 6.380502512562814e-05, - "loss": 5.0061, - "step": 36527 - }, - { - "epoch": 19.04980443285528, - "grad_norm": 1.5505443811416626, - "learning_rate": 6.380402010050252e-05, - "loss": 5.4528, - "step": 36528 - }, - { - "epoch": 19.050325945241198, - "grad_norm": 1.4329123497009277, - "learning_rate": 6.38030150753769e-05, - "loss": 5.6293, - "step": 36529 - }, - { - "epoch": 19.050847457627118, - "grad_norm": 1.5236488580703735, - "learning_rate": 6.380201005025126e-05, - "loss": 5.4299, - "step": 36530 - }, - { - "epoch": 19.051368970013037, - "grad_norm": 1.5810667276382446, - "learning_rate": 6.380100502512563e-05, - "loss": 5.294, - "step": 36531 - }, - { - "epoch": 19.051890482398957, - "grad_norm": 1.6183044910430908, - "learning_rate": 6.38e-05, - "loss": 5.161, - "step": 36532 - }, - { - "epoch": 19.052411994784876, - "grad_norm": 1.475118637084961, - "learning_rate": 6.379899497487437e-05, - "loss": 5.2044, - "step": 36533 - }, - { - "epoch": 19.052933507170795, - "grad_norm": 1.473341703414917, - "learning_rate": 6.379798994974875e-05, - "loss": 5.1578, - "step": 36534 - }, - { - "epoch": 19.053455019556715, - "grad_norm": 1.4159265756607056, - "learning_rate": 6.379698492462311e-05, - "loss": 5.5254, - "step": 36535 - }, - { - "epoch": 19.053976531942634, - "grad_norm": 1.757319450378418, - "learning_rate": 6.379597989949749e-05, - "loss": 4.918, - "step": 36536 - }, - { - "epoch": 19.054498044328554, - "grad_norm": 1.3613264560699463, - "learning_rate": 6.379497487437185e-05, - "loss": 5.5014, - "step": 36537 - }, - { - "epoch": 19.055019556714473, - "grad_norm": 1.5040158033370972, - "learning_rate": 6.379396984924623e-05, - "loss": 4.9637, - "step": 36538 - }, - { - "epoch": 19.055541069100393, - "grad_norm": 1.5155035257339478, - "learning_rate": 6.379296482412061e-05, - "loss": 5.3774, - "step": 36539 - }, - { - "epoch": 19.05606258148631, - "grad_norm": 1.5342602729797363, - "learning_rate": 6.379195979899499e-05, - "loss": 5.2978, - "step": 36540 - }, - { - "epoch": 19.056584093872228, - "grad_norm": 1.4987059831619263, - "learning_rate": 6.379095477386935e-05, - "loss": 5.2034, - "step": 36541 - }, - { - "epoch": 19.057105606258148, - "grad_norm": 1.4527872800827026, - "learning_rate": 6.378994974874373e-05, - "loss": 5.3865, - "step": 36542 - }, - { - "epoch": 19.057627118644067, - "grad_norm": 1.4571586847305298, - "learning_rate": 6.37889447236181e-05, - "loss": 5.5738, - "step": 36543 - }, - { - "epoch": 19.058148631029987, - "grad_norm": 1.4864294528961182, - "learning_rate": 6.378793969849246e-05, - "loss": 5.1064, - "step": 36544 - }, - { - "epoch": 19.058670143415906, - "grad_norm": 1.54171621799469, - "learning_rate": 6.378693467336684e-05, - "loss": 5.1263, - "step": 36545 - }, - { - "epoch": 19.059191655801826, - "grad_norm": 1.5183409452438354, - "learning_rate": 6.37859296482412e-05, - "loss": 4.8823, - "step": 36546 - }, - { - "epoch": 19.059713168187745, - "grad_norm": 1.5672645568847656, - "learning_rate": 6.378492462311558e-05, - "loss": 4.8046, - "step": 36547 - }, - { - "epoch": 19.060234680573664, - "grad_norm": 1.4592701196670532, - "learning_rate": 6.378391959798996e-05, - "loss": 5.5306, - "step": 36548 - }, - { - "epoch": 19.060756192959584, - "grad_norm": 1.4693222045898438, - "learning_rate": 6.378291457286433e-05, - "loss": 5.4183, - "step": 36549 - }, - { - "epoch": 19.061277705345503, - "grad_norm": 1.4959423542022705, - "learning_rate": 6.37819095477387e-05, - "loss": 5.2775, - "step": 36550 - }, - { - "epoch": 19.061799217731423, - "grad_norm": 1.5169011354446411, - "learning_rate": 6.378090452261308e-05, - "loss": 5.5043, - "step": 36551 - }, - { - "epoch": 19.06232073011734, - "grad_norm": 1.4433250427246094, - "learning_rate": 6.377989949748744e-05, - "loss": 5.4013, - "step": 36552 - }, - { - "epoch": 19.062842242503258, - "grad_norm": 1.5331544876098633, - "learning_rate": 6.377889447236182e-05, - "loss": 4.9131, - "step": 36553 - }, - { - "epoch": 19.063363754889178, - "grad_norm": 1.6172598600387573, - "learning_rate": 6.377788944723618e-05, - "loss": 5.0992, - "step": 36554 - }, - { - "epoch": 19.063885267275097, - "grad_norm": 1.6446874141693115, - "learning_rate": 6.377688442211056e-05, - "loss": 4.5598, - "step": 36555 - }, - { - "epoch": 19.064406779661017, - "grad_norm": 1.6799814701080322, - "learning_rate": 6.377587939698492e-05, - "loss": 4.8566, - "step": 36556 - }, - { - "epoch": 19.064928292046936, - "grad_norm": 1.6671537160873413, - "learning_rate": 6.377487437185929e-05, - "loss": 5.4178, - "step": 36557 - }, - { - "epoch": 19.065449804432856, - "grad_norm": 1.6838674545288086, - "learning_rate": 6.377386934673367e-05, - "loss": 5.0808, - "step": 36558 - }, - { - "epoch": 19.065971316818775, - "grad_norm": 1.6032413244247437, - "learning_rate": 6.377286432160804e-05, - "loss": 5.0605, - "step": 36559 - }, - { - "epoch": 19.066492829204694, - "grad_norm": 1.5606235265731812, - "learning_rate": 6.377185929648242e-05, - "loss": 5.3278, - "step": 36560 - }, - { - "epoch": 19.067014341590614, - "grad_norm": 1.4803038835525513, - "learning_rate": 6.377085427135679e-05, - "loss": 5.6547, - "step": 36561 - }, - { - "epoch": 19.067535853976533, - "grad_norm": 1.5093634128570557, - "learning_rate": 6.376984924623116e-05, - "loss": 5.2453, - "step": 36562 - }, - { - "epoch": 19.068057366362453, - "grad_norm": 1.5818591117858887, - "learning_rate": 6.376884422110553e-05, - "loss": 5.366, - "step": 36563 - }, - { - "epoch": 19.06857887874837, - "grad_norm": 1.543556571006775, - "learning_rate": 6.37678391959799e-05, - "loss": 5.2485, - "step": 36564 - }, - { - "epoch": 19.06910039113429, - "grad_norm": 1.5193630456924438, - "learning_rate": 6.376683417085427e-05, - "loss": 5.2204, - "step": 36565 - }, - { - "epoch": 19.069621903520208, - "grad_norm": 1.562666654586792, - "learning_rate": 6.376582914572865e-05, - "loss": 5.0643, - "step": 36566 - }, - { - "epoch": 19.070143415906127, - "grad_norm": 1.5432220697402954, - "learning_rate": 6.376482412060301e-05, - "loss": 5.401, - "step": 36567 - }, - { - "epoch": 19.070664928292047, - "grad_norm": 1.6121044158935547, - "learning_rate": 6.376381909547739e-05, - "loss": 5.0907, - "step": 36568 - }, - { - "epoch": 19.071186440677966, - "grad_norm": 1.477071762084961, - "learning_rate": 6.376281407035176e-05, - "loss": 5.5063, - "step": 36569 - }, - { - "epoch": 19.071707953063886, - "grad_norm": 1.5291582345962524, - "learning_rate": 6.376180904522613e-05, - "loss": 5.4276, - "step": 36570 - }, - { - "epoch": 19.072229465449805, - "grad_norm": 1.6010515689849854, - "learning_rate": 6.376080402010051e-05, - "loss": 4.6766, - "step": 36571 - }, - { - "epoch": 19.072750977835724, - "grad_norm": 1.5369950532913208, - "learning_rate": 6.375979899497487e-05, - "loss": 5.354, - "step": 36572 - }, - { - "epoch": 19.073272490221644, - "grad_norm": 1.6257680654525757, - "learning_rate": 6.375879396984925e-05, - "loss": 4.9673, - "step": 36573 - }, - { - "epoch": 19.073794002607563, - "grad_norm": 1.3857817649841309, - "learning_rate": 6.375778894472362e-05, - "loss": 4.3686, - "step": 36574 - }, - { - "epoch": 19.07431551499348, - "grad_norm": 1.5214663743972778, - "learning_rate": 6.3756783919598e-05, - "loss": 5.4902, - "step": 36575 - }, - { - "epoch": 19.0748370273794, - "grad_norm": 1.4161297082901, - "learning_rate": 6.375577889447236e-05, - "loss": 5.6936, - "step": 36576 - }, - { - "epoch": 19.07535853976532, - "grad_norm": 1.5125356912612915, - "learning_rate": 6.375477386934674e-05, - "loss": 5.6921, - "step": 36577 - }, - { - "epoch": 19.075880052151238, - "grad_norm": 1.4842946529388428, - "learning_rate": 6.37537688442211e-05, - "loss": 5.6686, - "step": 36578 - }, - { - "epoch": 19.076401564537157, - "grad_norm": 1.4877196550369263, - "learning_rate": 6.375276381909548e-05, - "loss": 5.6936, - "step": 36579 - }, - { - "epoch": 19.076923076923077, - "grad_norm": 1.462684154510498, - "learning_rate": 6.375175879396986e-05, - "loss": 5.1331, - "step": 36580 - }, - { - "epoch": 19.077444589308996, - "grad_norm": 1.510770559310913, - "learning_rate": 6.375075376884423e-05, - "loss": 5.0803, - "step": 36581 - }, - { - "epoch": 19.077966101694916, - "grad_norm": 1.5933682918548584, - "learning_rate": 6.37497487437186e-05, - "loss": 5.0981, - "step": 36582 - }, - { - "epoch": 19.078487614080835, - "grad_norm": 1.4598448276519775, - "learning_rate": 6.374874371859296e-05, - "loss": 5.0569, - "step": 36583 - }, - { - "epoch": 19.079009126466755, - "grad_norm": 1.6159898042678833, - "learning_rate": 6.374773869346734e-05, - "loss": 4.8164, - "step": 36584 - }, - { - "epoch": 19.079530638852674, - "grad_norm": 1.4630815982818604, - "learning_rate": 6.37467336683417e-05, - "loss": 5.3505, - "step": 36585 - }, - { - "epoch": 19.080052151238593, - "grad_norm": 1.4564765691757202, - "learning_rate": 6.374572864321608e-05, - "loss": 5.5377, - "step": 36586 - }, - { - "epoch": 19.08057366362451, - "grad_norm": 1.5831578969955444, - "learning_rate": 6.374472361809045e-05, - "loss": 5.0242, - "step": 36587 - }, - { - "epoch": 19.08109517601043, - "grad_norm": 1.4714878797531128, - "learning_rate": 6.374371859296483e-05, - "loss": 5.4547, - "step": 36588 - }, - { - "epoch": 19.08161668839635, - "grad_norm": 1.492266058921814, - "learning_rate": 6.374271356783919e-05, - "loss": 5.3775, - "step": 36589 - }, - { - "epoch": 19.082138200782268, - "grad_norm": 1.4242206811904907, - "learning_rate": 6.374170854271357e-05, - "loss": 5.4771, - "step": 36590 - }, - { - "epoch": 19.082659713168187, - "grad_norm": 1.4412202835083008, - "learning_rate": 6.374070351758795e-05, - "loss": 5.4024, - "step": 36591 - }, - { - "epoch": 19.083181225554107, - "grad_norm": 1.4410947561264038, - "learning_rate": 6.373969849246232e-05, - "loss": 5.2565, - "step": 36592 - }, - { - "epoch": 19.083702737940026, - "grad_norm": 1.4974161386489868, - "learning_rate": 6.373869346733669e-05, - "loss": 5.1217, - "step": 36593 - }, - { - "epoch": 19.084224250325946, - "grad_norm": 1.516315221786499, - "learning_rate": 6.373768844221107e-05, - "loss": 5.3457, - "step": 36594 - }, - { - "epoch": 19.084745762711865, - "grad_norm": 1.5183523893356323, - "learning_rate": 6.373668341708543e-05, - "loss": 4.8831, - "step": 36595 - }, - { - "epoch": 19.085267275097785, - "grad_norm": 1.5317456722259521, - "learning_rate": 6.37356783919598e-05, - "loss": 5.4046, - "step": 36596 - }, - { - "epoch": 19.085788787483704, - "grad_norm": 1.4952510595321655, - "learning_rate": 6.373467336683417e-05, - "loss": 5.7256, - "step": 36597 - }, - { - "epoch": 19.086310299869623, - "grad_norm": 1.515519380569458, - "learning_rate": 6.373366834170854e-05, - "loss": 5.4626, - "step": 36598 - }, - { - "epoch": 19.08683181225554, - "grad_norm": 1.6007788181304932, - "learning_rate": 6.373266331658291e-05, - "loss": 5.3058, - "step": 36599 - }, - { - "epoch": 19.08735332464146, - "grad_norm": 1.6001778841018677, - "learning_rate": 6.373165829145729e-05, - "loss": 5.0834, - "step": 36600 - }, - { - "epoch": 19.08787483702738, - "grad_norm": 1.5111722946166992, - "learning_rate": 6.373065326633167e-05, - "loss": 5.3711, - "step": 36601 - }, - { - "epoch": 19.088396349413298, - "grad_norm": 1.53868567943573, - "learning_rate": 6.372964824120603e-05, - "loss": 4.7364, - "step": 36602 - }, - { - "epoch": 19.088917861799217, - "grad_norm": 1.5261507034301758, - "learning_rate": 6.372864321608041e-05, - "loss": 5.1144, - "step": 36603 - }, - { - "epoch": 19.089439374185137, - "grad_norm": 1.563050627708435, - "learning_rate": 6.372763819095478e-05, - "loss": 5.2185, - "step": 36604 - }, - { - "epoch": 19.089960886571056, - "grad_norm": 1.6243329048156738, - "learning_rate": 6.372663316582915e-05, - "loss": 5.4385, - "step": 36605 - }, - { - "epoch": 19.090482398956976, - "grad_norm": 1.4927983283996582, - "learning_rate": 6.372562814070352e-05, - "loss": 4.7481, - "step": 36606 - }, - { - "epoch": 19.091003911342895, - "grad_norm": 1.6091526746749878, - "learning_rate": 6.37246231155779e-05, - "loss": 5.0116, - "step": 36607 - }, - { - "epoch": 19.091525423728815, - "grad_norm": 1.5347274541854858, - "learning_rate": 6.372361809045226e-05, - "loss": 5.2605, - "step": 36608 - }, - { - "epoch": 19.092046936114734, - "grad_norm": 1.5131807327270508, - "learning_rate": 6.372261306532664e-05, - "loss": 5.2358, - "step": 36609 - }, - { - "epoch": 19.092568448500653, - "grad_norm": 1.5860117673873901, - "learning_rate": 6.3721608040201e-05, - "loss": 4.2869, - "step": 36610 - }, - { - "epoch": 19.09308996088657, - "grad_norm": 1.569191575050354, - "learning_rate": 6.372060301507538e-05, - "loss": 4.7987, - "step": 36611 - }, - { - "epoch": 19.09361147327249, - "grad_norm": 1.5398077964782715, - "learning_rate": 6.371959798994976e-05, - "loss": 5.1365, - "step": 36612 - }, - { - "epoch": 19.09413298565841, - "grad_norm": 1.4519495964050293, - "learning_rate": 6.371859296482412e-05, - "loss": 5.5062, - "step": 36613 - }, - { - "epoch": 19.094654498044328, - "grad_norm": 1.5475866794586182, - "learning_rate": 6.37175879396985e-05, - "loss": 5.4709, - "step": 36614 - }, - { - "epoch": 19.095176010430247, - "grad_norm": 1.482215166091919, - "learning_rate": 6.371658291457286e-05, - "loss": 5.4117, - "step": 36615 - }, - { - "epoch": 19.095697522816167, - "grad_norm": 1.5125893354415894, - "learning_rate": 6.371557788944724e-05, - "loss": 5.1712, - "step": 36616 - }, - { - "epoch": 19.096219035202086, - "grad_norm": 1.5505627393722534, - "learning_rate": 6.37145728643216e-05, - "loss": 4.6246, - "step": 36617 - }, - { - "epoch": 19.096740547588006, - "grad_norm": 1.5194066762924194, - "learning_rate": 6.371356783919598e-05, - "loss": 5.5317, - "step": 36618 - }, - { - "epoch": 19.097262059973925, - "grad_norm": 1.6286145448684692, - "learning_rate": 6.371256281407035e-05, - "loss": 5.2625, - "step": 36619 - }, - { - "epoch": 19.097783572359845, - "grad_norm": 1.5543354749679565, - "learning_rate": 6.371155778894473e-05, - "loss": 5.3475, - "step": 36620 - }, - { - "epoch": 19.098305084745764, - "grad_norm": 1.5435441732406616, - "learning_rate": 6.37105527638191e-05, - "loss": 5.7836, - "step": 36621 - }, - { - "epoch": 19.098826597131684, - "grad_norm": 1.5697647333145142, - "learning_rate": 6.370954773869348e-05, - "loss": 5.5183, - "step": 36622 - }, - { - "epoch": 19.0993481095176, - "grad_norm": 1.414245367050171, - "learning_rate": 6.370854271356785e-05, - "loss": 5.1591, - "step": 36623 - }, - { - "epoch": 19.09986962190352, - "grad_norm": 1.4757938385009766, - "learning_rate": 6.370753768844221e-05, - "loss": 5.1909, - "step": 36624 - }, - { - "epoch": 19.10039113428944, - "grad_norm": 1.4704240560531616, - "learning_rate": 6.370653266331659e-05, - "loss": 4.8111, - "step": 36625 - }, - { - "epoch": 19.100912646675358, - "grad_norm": 1.5882112979888916, - "learning_rate": 6.370552763819095e-05, - "loss": 5.3636, - "step": 36626 - }, - { - "epoch": 19.101434159061277, - "grad_norm": 1.4745354652404785, - "learning_rate": 6.370452261306533e-05, - "loss": 5.0016, - "step": 36627 - }, - { - "epoch": 19.101955671447197, - "grad_norm": 1.4902204275131226, - "learning_rate": 6.37035175879397e-05, - "loss": 5.0485, - "step": 36628 - }, - { - "epoch": 19.102477183833116, - "grad_norm": 1.547548532485962, - "learning_rate": 6.370251256281407e-05, - "loss": 4.6967, - "step": 36629 - }, - { - "epoch": 19.102998696219036, - "grad_norm": 1.4887185096740723, - "learning_rate": 6.370150753768844e-05, - "loss": 5.3562, - "step": 36630 - }, - { - "epoch": 19.103520208604955, - "grad_norm": 1.453380823135376, - "learning_rate": 6.370050251256281e-05, - "loss": 5.0912, - "step": 36631 - }, - { - "epoch": 19.104041720990875, - "grad_norm": 1.5910871028900146, - "learning_rate": 6.369949748743719e-05, - "loss": 5.1222, - "step": 36632 - }, - { - "epoch": 19.104563233376794, - "grad_norm": 1.508381962776184, - "learning_rate": 6.369849246231157e-05, - "loss": 5.1156, - "step": 36633 - }, - { - "epoch": 19.105084745762714, - "grad_norm": 1.5169093608856201, - "learning_rate": 6.369748743718593e-05, - "loss": 5.444, - "step": 36634 - }, - { - "epoch": 19.10560625814863, - "grad_norm": 1.4904595613479614, - "learning_rate": 6.369648241206031e-05, - "loss": 5.091, - "step": 36635 - }, - { - "epoch": 19.10612777053455, - "grad_norm": 1.5362987518310547, - "learning_rate": 6.369547738693468e-05, - "loss": 5.0145, - "step": 36636 - }, - { - "epoch": 19.10664928292047, - "grad_norm": 1.5943119525909424, - "learning_rate": 6.369447236180904e-05, - "loss": 5.1033, - "step": 36637 - }, - { - "epoch": 19.107170795306388, - "grad_norm": 1.8877424001693726, - "learning_rate": 6.369346733668342e-05, - "loss": 4.3844, - "step": 36638 - }, - { - "epoch": 19.107692307692307, - "grad_norm": 1.522752046585083, - "learning_rate": 6.369246231155778e-05, - "loss": 5.6336, - "step": 36639 - }, - { - "epoch": 19.108213820078227, - "grad_norm": 1.5923227071762085, - "learning_rate": 6.369145728643216e-05, - "loss": 5.1493, - "step": 36640 - }, - { - "epoch": 19.108735332464146, - "grad_norm": 1.5385719537734985, - "learning_rate": 6.369045226130654e-05, - "loss": 5.2029, - "step": 36641 - }, - { - "epoch": 19.109256844850066, - "grad_norm": 1.4773063659667969, - "learning_rate": 6.368944723618092e-05, - "loss": 5.3868, - "step": 36642 - }, - { - "epoch": 19.109778357235985, - "grad_norm": 1.523688793182373, - "learning_rate": 6.368844221105528e-05, - "loss": 5.4164, - "step": 36643 - }, - { - "epoch": 19.110299869621905, - "grad_norm": 1.4781272411346436, - "learning_rate": 6.368743718592966e-05, - "loss": 5.3316, - "step": 36644 - }, - { - "epoch": 19.110821382007824, - "grad_norm": 1.5225461721420288, - "learning_rate": 6.368643216080402e-05, - "loss": 5.4767, - "step": 36645 - }, - { - "epoch": 19.111342894393744, - "grad_norm": 1.5292868614196777, - "learning_rate": 6.36854271356784e-05, - "loss": 5.3126, - "step": 36646 - }, - { - "epoch": 19.11186440677966, - "grad_norm": 1.4513019323349, - "learning_rate": 6.368442211055276e-05, - "loss": 5.4346, - "step": 36647 - }, - { - "epoch": 19.11238591916558, - "grad_norm": 1.5161265134811401, - "learning_rate": 6.368341708542714e-05, - "loss": 5.283, - "step": 36648 - }, - { - "epoch": 19.1129074315515, - "grad_norm": 1.483806848526001, - "learning_rate": 6.368241206030151e-05, - "loss": 5.59, - "step": 36649 - }, - { - "epoch": 19.113428943937418, - "grad_norm": 1.5715782642364502, - "learning_rate": 6.368140703517587e-05, - "loss": 5.1454, - "step": 36650 - }, - { - "epoch": 19.113950456323337, - "grad_norm": 1.9295519590377808, - "learning_rate": 6.368040201005025e-05, - "loss": 4.9613, - "step": 36651 - }, - { - "epoch": 19.114471968709257, - "grad_norm": 1.5225756168365479, - "learning_rate": 6.367939698492463e-05, - "loss": 5.497, - "step": 36652 - }, - { - "epoch": 19.114993481095176, - "grad_norm": 1.585646629333496, - "learning_rate": 6.3678391959799e-05, - "loss": 4.8232, - "step": 36653 - }, - { - "epoch": 19.115514993481096, - "grad_norm": 1.4593799114227295, - "learning_rate": 6.367738693467337e-05, - "loss": 5.4117, - "step": 36654 - }, - { - "epoch": 19.116036505867015, - "grad_norm": 1.4406676292419434, - "learning_rate": 6.367638190954775e-05, - "loss": 5.5029, - "step": 36655 - }, - { - "epoch": 19.116558018252935, - "grad_norm": 1.45802640914917, - "learning_rate": 6.367537688442211e-05, - "loss": 5.4294, - "step": 36656 - }, - { - "epoch": 19.117079530638854, - "grad_norm": 1.4387221336364746, - "learning_rate": 6.367437185929649e-05, - "loss": 5.5685, - "step": 36657 - }, - { - "epoch": 19.117601043024774, - "grad_norm": 1.4047938585281372, - "learning_rate": 6.367336683417085e-05, - "loss": 5.6091, - "step": 36658 - }, - { - "epoch": 19.11812255541069, - "grad_norm": 1.4875246286392212, - "learning_rate": 6.367236180904523e-05, - "loss": 5.2661, - "step": 36659 - }, - { - "epoch": 19.11864406779661, - "grad_norm": 1.5932927131652832, - "learning_rate": 6.36713567839196e-05, - "loss": 5.1668, - "step": 36660 - }, - { - "epoch": 19.11916558018253, - "grad_norm": 1.4666935205459595, - "learning_rate": 6.367035175879397e-05, - "loss": 5.2257, - "step": 36661 - }, - { - "epoch": 19.119687092568448, - "grad_norm": 1.6545078754425049, - "learning_rate": 6.366934673366835e-05, - "loss": 4.8783, - "step": 36662 - }, - { - "epoch": 19.120208604954367, - "grad_norm": 1.4923994541168213, - "learning_rate": 6.366834170854272e-05, - "loss": 5.4607, - "step": 36663 - }, - { - "epoch": 19.120730117340287, - "grad_norm": 1.4454237222671509, - "learning_rate": 6.366733668341709e-05, - "loss": 5.6011, - "step": 36664 - }, - { - "epoch": 19.121251629726206, - "grad_norm": 1.544828176498413, - "learning_rate": 6.366633165829146e-05, - "loss": 5.2578, - "step": 36665 - }, - { - "epoch": 19.121773142112126, - "grad_norm": 1.5267301797866821, - "learning_rate": 6.366532663316584e-05, - "loss": 5.2091, - "step": 36666 - }, - { - "epoch": 19.122294654498045, - "grad_norm": 1.4454506635665894, - "learning_rate": 6.36643216080402e-05, - "loss": 5.6723, - "step": 36667 - }, - { - "epoch": 19.122816166883965, - "grad_norm": 1.4652971029281616, - "learning_rate": 6.366331658291458e-05, - "loss": 5.1305, - "step": 36668 - }, - { - "epoch": 19.123337679269884, - "grad_norm": 1.6296154260635376, - "learning_rate": 6.366231155778894e-05, - "loss": 4.9305, - "step": 36669 - }, - { - "epoch": 19.1238591916558, - "grad_norm": 1.498972773551941, - "learning_rate": 6.366130653266332e-05, - "loss": 5.5859, - "step": 36670 - }, - { - "epoch": 19.12438070404172, - "grad_norm": 1.5503907203674316, - "learning_rate": 6.366030150753768e-05, - "loss": 5.2768, - "step": 36671 - }, - { - "epoch": 19.12490221642764, - "grad_norm": 1.580798625946045, - "learning_rate": 6.365929648241206e-05, - "loss": 4.6612, - "step": 36672 - }, - { - "epoch": 19.12542372881356, - "grad_norm": 1.5076416730880737, - "learning_rate": 6.365829145728644e-05, - "loss": 5.6805, - "step": 36673 - }, - { - "epoch": 19.125945241199478, - "grad_norm": 1.4653459787368774, - "learning_rate": 6.365728643216082e-05, - "loss": 5.4984, - "step": 36674 - }, - { - "epoch": 19.126466753585397, - "grad_norm": 1.5098403692245483, - "learning_rate": 6.365628140703518e-05, - "loss": 5.5689, - "step": 36675 - }, - { - "epoch": 19.126988265971317, - "grad_norm": 1.5670430660247803, - "learning_rate": 6.365527638190955e-05, - "loss": 5.4045, - "step": 36676 - }, - { - "epoch": 19.127509778357236, - "grad_norm": 1.6365662813186646, - "learning_rate": 6.365427135678392e-05, - "loss": 4.9307, - "step": 36677 - }, - { - "epoch": 19.128031290743156, - "grad_norm": 1.390795111656189, - "learning_rate": 6.365326633165829e-05, - "loss": 5.4529, - "step": 36678 - }, - { - "epoch": 19.128552803129075, - "grad_norm": 1.5535682439804077, - "learning_rate": 6.365226130653267e-05, - "loss": 4.885, - "step": 36679 - }, - { - "epoch": 19.129074315514995, - "grad_norm": 1.6099435091018677, - "learning_rate": 6.365125628140703e-05, - "loss": 5.5162, - "step": 36680 - }, - { - "epoch": 19.129595827900914, - "grad_norm": 1.4569209814071655, - "learning_rate": 6.365025125628141e-05, - "loss": 5.2667, - "step": 36681 - }, - { - "epoch": 19.13011734028683, - "grad_norm": 1.5156009197235107, - "learning_rate": 6.364924623115579e-05, - "loss": 5.4432, - "step": 36682 - }, - { - "epoch": 19.13063885267275, - "grad_norm": 1.4014796018600464, - "learning_rate": 6.364824120603016e-05, - "loss": 5.4455, - "step": 36683 - }, - { - "epoch": 19.13116036505867, - "grad_norm": 1.5175632238388062, - "learning_rate": 6.364723618090453e-05, - "loss": 5.5212, - "step": 36684 - }, - { - "epoch": 19.13168187744459, - "grad_norm": 1.5460649728775024, - "learning_rate": 6.36462311557789e-05, - "loss": 5.2031, - "step": 36685 - }, - { - "epoch": 19.132203389830508, - "grad_norm": 1.5894851684570312, - "learning_rate": 6.364522613065327e-05, - "loss": 5.0905, - "step": 36686 - }, - { - "epoch": 19.132724902216427, - "grad_norm": 1.450091004371643, - "learning_rate": 6.364422110552765e-05, - "loss": 5.7567, - "step": 36687 - }, - { - "epoch": 19.133246414602347, - "grad_norm": 1.463516116142273, - "learning_rate": 6.364321608040201e-05, - "loss": 5.249, - "step": 36688 - }, - { - "epoch": 19.133767926988266, - "grad_norm": 1.557680606842041, - "learning_rate": 6.364221105527638e-05, - "loss": 5.2718, - "step": 36689 - }, - { - "epoch": 19.134289439374186, - "grad_norm": 1.5359597206115723, - "learning_rate": 6.364120603015075e-05, - "loss": 5.1885, - "step": 36690 - }, - { - "epoch": 19.134810951760105, - "grad_norm": 1.513721227645874, - "learning_rate": 6.364020100502512e-05, - "loss": 5.2285, - "step": 36691 - }, - { - "epoch": 19.135332464146025, - "grad_norm": 1.5070686340332031, - "learning_rate": 6.36391959798995e-05, - "loss": 5.2909, - "step": 36692 - }, - { - "epoch": 19.135853976531944, - "grad_norm": 1.6413143873214722, - "learning_rate": 6.363819095477387e-05, - "loss": 5.2341, - "step": 36693 - }, - { - "epoch": 19.13637548891786, - "grad_norm": 1.4899499416351318, - "learning_rate": 6.363718592964825e-05, - "loss": 4.8422, - "step": 36694 - }, - { - "epoch": 19.13689700130378, - "grad_norm": 1.553184151649475, - "learning_rate": 6.363618090452262e-05, - "loss": 5.456, - "step": 36695 - }, - { - "epoch": 19.1374185136897, - "grad_norm": 1.5641809701919556, - "learning_rate": 6.3635175879397e-05, - "loss": 4.6049, - "step": 36696 - }, - { - "epoch": 19.13794002607562, - "grad_norm": 1.4524518251419067, - "learning_rate": 6.363417085427136e-05, - "loss": 5.2979, - "step": 36697 - }, - { - "epoch": 19.138461538461538, - "grad_norm": 1.5345206260681152, - "learning_rate": 6.363316582914574e-05, - "loss": 4.7639, - "step": 36698 - }, - { - "epoch": 19.138983050847457, - "grad_norm": 1.3980988264083862, - "learning_rate": 6.36321608040201e-05, - "loss": 5.2046, - "step": 36699 - }, - { - "epoch": 19.139504563233377, - "grad_norm": 1.519384503364563, - "learning_rate": 6.363115577889448e-05, - "loss": 5.0816, - "step": 36700 - }, - { - "epoch": 19.140026075619296, - "grad_norm": 1.4587405920028687, - "learning_rate": 6.363015075376884e-05, - "loss": 5.2695, - "step": 36701 - }, - { - "epoch": 19.140547588005216, - "grad_norm": 1.5970838069915771, - "learning_rate": 6.362914572864322e-05, - "loss": 5.1794, - "step": 36702 - }, - { - "epoch": 19.141069100391135, - "grad_norm": 1.4958934783935547, - "learning_rate": 6.36281407035176e-05, - "loss": 5.2617, - "step": 36703 - }, - { - "epoch": 19.141590612777055, - "grad_norm": 1.515602707862854, - "learning_rate": 6.362713567839196e-05, - "loss": 5.5411, - "step": 36704 - }, - { - "epoch": 19.142112125162974, - "grad_norm": 1.4052804708480835, - "learning_rate": 6.362613065326634e-05, - "loss": 5.8909, - "step": 36705 - }, - { - "epoch": 19.14263363754889, - "grad_norm": 1.4969438314437866, - "learning_rate": 6.36251256281407e-05, - "loss": 5.366, - "step": 36706 - }, - { - "epoch": 19.14315514993481, - "grad_norm": 1.5021653175354004, - "learning_rate": 6.362412060301508e-05, - "loss": 5.0925, - "step": 36707 - }, - { - "epoch": 19.14367666232073, - "grad_norm": 1.423660159111023, - "learning_rate": 6.362311557788945e-05, - "loss": 5.4912, - "step": 36708 - }, - { - "epoch": 19.14419817470665, - "grad_norm": 1.5736644268035889, - "learning_rate": 6.362211055276382e-05, - "loss": 5.1471, - "step": 36709 - }, - { - "epoch": 19.144719687092568, - "grad_norm": 1.5139347314834595, - "learning_rate": 6.362110552763819e-05, - "loss": 5.2065, - "step": 36710 - }, - { - "epoch": 19.145241199478487, - "grad_norm": 1.5472419261932373, - "learning_rate": 6.362010050251257e-05, - "loss": 5.2008, - "step": 36711 - }, - { - "epoch": 19.145762711864407, - "grad_norm": 1.3990893363952637, - "learning_rate": 6.361909547738693e-05, - "loss": 5.6672, - "step": 36712 - }, - { - "epoch": 19.146284224250326, - "grad_norm": 1.4895142316818237, - "learning_rate": 6.361809045226131e-05, - "loss": 5.4265, - "step": 36713 - }, - { - "epoch": 19.146805736636246, - "grad_norm": 1.5120099782943726, - "learning_rate": 6.361708542713569e-05, - "loss": 5.7039, - "step": 36714 - }, - { - "epoch": 19.147327249022165, - "grad_norm": 1.5210435390472412, - "learning_rate": 6.361608040201006e-05, - "loss": 5.5757, - "step": 36715 - }, - { - "epoch": 19.147848761408085, - "grad_norm": 1.439422845840454, - "learning_rate": 6.361507537688443e-05, - "loss": 5.5971, - "step": 36716 - }, - { - "epoch": 19.148370273794004, - "grad_norm": 1.4505170583724976, - "learning_rate": 6.361407035175879e-05, - "loss": 5.5323, - "step": 36717 - }, - { - "epoch": 19.14889178617992, - "grad_norm": 1.6118375062942505, - "learning_rate": 6.361306532663317e-05, - "loss": 5.0422, - "step": 36718 - }, - { - "epoch": 19.14941329856584, - "grad_norm": 1.4072320461273193, - "learning_rate": 6.361206030150753e-05, - "loss": 5.846, - "step": 36719 - }, - { - "epoch": 19.14993481095176, - "grad_norm": 1.4924556016921997, - "learning_rate": 6.361105527638191e-05, - "loss": 5.2857, - "step": 36720 - }, - { - "epoch": 19.15045632333768, - "grad_norm": 1.4349915981292725, - "learning_rate": 6.361005025125628e-05, - "loss": 5.372, - "step": 36721 - }, - { - "epoch": 19.150977835723598, - "grad_norm": 1.5508767366409302, - "learning_rate": 6.360904522613065e-05, - "loss": 5.0448, - "step": 36722 - }, - { - "epoch": 19.151499348109517, - "grad_norm": 1.4822667837142944, - "learning_rate": 6.360804020100503e-05, - "loss": 5.1718, - "step": 36723 - }, - { - "epoch": 19.152020860495437, - "grad_norm": 1.5021687746047974, - "learning_rate": 6.360703517587941e-05, - "loss": 5.3285, - "step": 36724 - }, - { - "epoch": 19.152542372881356, - "grad_norm": 1.5797396898269653, - "learning_rate": 6.360603015075377e-05, - "loss": 4.7987, - "step": 36725 - }, - { - "epoch": 19.153063885267276, - "grad_norm": 1.550919771194458, - "learning_rate": 6.360502512562815e-05, - "loss": 5.483, - "step": 36726 - }, - { - "epoch": 19.153585397653195, - "grad_norm": 1.5435576438903809, - "learning_rate": 6.360402010050252e-05, - "loss": 5.1494, - "step": 36727 - }, - { - "epoch": 19.154106910039115, - "grad_norm": 1.5107754468917847, - "learning_rate": 6.36030150753769e-05, - "loss": 5.3998, - "step": 36728 - }, - { - "epoch": 19.154628422425034, - "grad_norm": 1.5041286945343018, - "learning_rate": 6.360201005025126e-05, - "loss": 5.3143, - "step": 36729 - }, - { - "epoch": 19.15514993481095, - "grad_norm": 1.4255475997924805, - "learning_rate": 6.360100502512562e-05, - "loss": 5.5105, - "step": 36730 - }, - { - "epoch": 19.15567144719687, - "grad_norm": 1.6097819805145264, - "learning_rate": 6.36e-05, - "loss": 4.6624, - "step": 36731 - }, - { - "epoch": 19.15619295958279, - "grad_norm": 1.448428750038147, - "learning_rate": 6.359899497487437e-05, - "loss": 5.5891, - "step": 36732 - }, - { - "epoch": 19.15671447196871, - "grad_norm": 1.6476534605026245, - "learning_rate": 6.359798994974874e-05, - "loss": 5.0132, - "step": 36733 - }, - { - "epoch": 19.157235984354628, - "grad_norm": 1.4491063356399536, - "learning_rate": 6.359698492462312e-05, - "loss": 5.2638, - "step": 36734 - }, - { - "epoch": 19.157757496740548, - "grad_norm": 1.4501032829284668, - "learning_rate": 6.35959798994975e-05, - "loss": 5.4452, - "step": 36735 - }, - { - "epoch": 19.158279009126467, - "grad_norm": 1.5640023946762085, - "learning_rate": 6.359497487437186e-05, - "loss": 5.1814, - "step": 36736 - }, - { - "epoch": 19.158800521512386, - "grad_norm": 1.462880253791809, - "learning_rate": 6.359396984924624e-05, - "loss": 5.1419, - "step": 36737 - }, - { - "epoch": 19.159322033898306, - "grad_norm": 1.3963650465011597, - "learning_rate": 6.35929648241206e-05, - "loss": 5.7111, - "step": 36738 - }, - { - "epoch": 19.159843546284225, - "grad_norm": 1.5613547563552856, - "learning_rate": 6.359195979899498e-05, - "loss": 5.1805, - "step": 36739 - }, - { - "epoch": 19.160365058670145, - "grad_norm": 1.4732403755187988, - "learning_rate": 6.359095477386935e-05, - "loss": 5.4086, - "step": 36740 - }, - { - "epoch": 19.160886571056064, - "grad_norm": 1.4188930988311768, - "learning_rate": 6.358994974874373e-05, - "loss": 5.4696, - "step": 36741 - }, - { - "epoch": 19.16140808344198, - "grad_norm": 1.5713647603988647, - "learning_rate": 6.358894472361809e-05, - "loss": 4.8627, - "step": 36742 - }, - { - "epoch": 19.1619295958279, - "grad_norm": 1.5737862586975098, - "learning_rate": 6.358793969849245e-05, - "loss": 5.2368, - "step": 36743 - }, - { - "epoch": 19.16245110821382, - "grad_norm": 1.4565887451171875, - "learning_rate": 6.358693467336683e-05, - "loss": 5.0875, - "step": 36744 - }, - { - "epoch": 19.16297262059974, - "grad_norm": 1.4393203258514404, - "learning_rate": 6.358592964824121e-05, - "loss": 5.2719, - "step": 36745 - }, - { - "epoch": 19.163494132985658, - "grad_norm": 1.711273431777954, - "learning_rate": 6.358492462311559e-05, - "loss": 4.6576, - "step": 36746 - }, - { - "epoch": 19.164015645371578, - "grad_norm": 1.5344659090042114, - "learning_rate": 6.358391959798995e-05, - "loss": 5.1877, - "step": 36747 - }, - { - "epoch": 19.164537157757497, - "grad_norm": 1.4457507133483887, - "learning_rate": 6.358291457286433e-05, - "loss": 5.3724, - "step": 36748 - }, - { - "epoch": 19.165058670143416, - "grad_norm": 1.5495229959487915, - "learning_rate": 6.35819095477387e-05, - "loss": 5.1971, - "step": 36749 - }, - { - "epoch": 19.165580182529336, - "grad_norm": 1.5006396770477295, - "learning_rate": 6.358090452261307e-05, - "loss": 5.5803, - "step": 36750 - }, - { - "epoch": 19.166101694915255, - "grad_norm": 1.5812116861343384, - "learning_rate": 6.357989949748744e-05, - "loss": 5.0205, - "step": 36751 - }, - { - "epoch": 19.166623207301175, - "grad_norm": 1.5254671573638916, - "learning_rate": 6.357889447236181e-05, - "loss": 5.1561, - "step": 36752 - }, - { - "epoch": 19.167144719687094, - "grad_norm": 1.4609397649765015, - "learning_rate": 6.357788944723618e-05, - "loss": 5.0739, - "step": 36753 - }, - { - "epoch": 19.16766623207301, - "grad_norm": 1.593964695930481, - "learning_rate": 6.357688442211056e-05, - "loss": 5.4341, - "step": 36754 - }, - { - "epoch": 19.16818774445893, - "grad_norm": 1.5672165155410767, - "learning_rate": 6.357587939698493e-05, - "loss": 5.3494, - "step": 36755 - }, - { - "epoch": 19.16870925684485, - "grad_norm": 1.514840006828308, - "learning_rate": 6.35748743718593e-05, - "loss": 5.3843, - "step": 36756 - }, - { - "epoch": 19.16923076923077, - "grad_norm": 1.5333455801010132, - "learning_rate": 6.357386934673368e-05, - "loss": 5.0843, - "step": 36757 - }, - { - "epoch": 19.169752281616688, - "grad_norm": 1.435300588607788, - "learning_rate": 6.357286432160804e-05, - "loss": 5.2276, - "step": 36758 - }, - { - "epoch": 19.170273794002608, - "grad_norm": 1.4682116508483887, - "learning_rate": 6.357185929648242e-05, - "loss": 5.5223, - "step": 36759 - }, - { - "epoch": 19.170795306388527, - "grad_norm": 1.5379986763000488, - "learning_rate": 6.357085427135678e-05, - "loss": 5.249, - "step": 36760 - }, - { - "epoch": 19.171316818774446, - "grad_norm": 1.6812586784362793, - "learning_rate": 6.356984924623116e-05, - "loss": 5.2766, - "step": 36761 - }, - { - "epoch": 19.171838331160366, - "grad_norm": 1.508056402206421, - "learning_rate": 6.356884422110552e-05, - "loss": 5.0748, - "step": 36762 - }, - { - "epoch": 19.172359843546285, - "grad_norm": 1.5261307954788208, - "learning_rate": 6.35678391959799e-05, - "loss": 5.2117, - "step": 36763 - }, - { - "epoch": 19.172881355932205, - "grad_norm": 1.5969741344451904, - "learning_rate": 6.356683417085427e-05, - "loss": 4.8812, - "step": 36764 - }, - { - "epoch": 19.17340286831812, - "grad_norm": 1.4700530767440796, - "learning_rate": 6.356582914572864e-05, - "loss": 5.6947, - "step": 36765 - }, - { - "epoch": 19.17392438070404, - "grad_norm": 1.667560338973999, - "learning_rate": 6.356482412060302e-05, - "loss": 5.18, - "step": 36766 - }, - { - "epoch": 19.17444589308996, - "grad_norm": 1.4506348371505737, - "learning_rate": 6.35638190954774e-05, - "loss": 5.4567, - "step": 36767 - }, - { - "epoch": 19.17496740547588, - "grad_norm": 1.5589851140975952, - "learning_rate": 6.356281407035176e-05, - "loss": 5.1669, - "step": 36768 - }, - { - "epoch": 19.1754889178618, - "grad_norm": 1.488966941833496, - "learning_rate": 6.356180904522613e-05, - "loss": 5.6474, - "step": 36769 - }, - { - "epoch": 19.176010430247718, - "grad_norm": 1.4959592819213867, - "learning_rate": 6.35608040201005e-05, - "loss": 5.3277, - "step": 36770 - }, - { - "epoch": 19.176531942633638, - "grad_norm": 1.4872239828109741, - "learning_rate": 6.355979899497487e-05, - "loss": 4.7649, - "step": 36771 - }, - { - "epoch": 19.177053455019557, - "grad_norm": 1.3071706295013428, - "learning_rate": 6.355879396984925e-05, - "loss": 5.7023, - "step": 36772 - }, - { - "epoch": 19.177574967405477, - "grad_norm": 1.4639469385147095, - "learning_rate": 6.355778894472361e-05, - "loss": 5.094, - "step": 36773 - }, - { - "epoch": 19.178096479791396, - "grad_norm": 1.461814045906067, - "learning_rate": 6.355678391959799e-05, - "loss": 5.6071, - "step": 36774 - }, - { - "epoch": 19.178617992177315, - "grad_norm": 1.4291685819625854, - "learning_rate": 6.355577889447237e-05, - "loss": 5.619, - "step": 36775 - }, - { - "epoch": 19.179139504563235, - "grad_norm": 1.5129731893539429, - "learning_rate": 6.355477386934675e-05, - "loss": 4.7896, - "step": 36776 - }, - { - "epoch": 19.17966101694915, - "grad_norm": 1.447580337524414, - "learning_rate": 6.355376884422111e-05, - "loss": 5.4114, - "step": 36777 - }, - { - "epoch": 19.18018252933507, - "grad_norm": 1.4864143133163452, - "learning_rate": 6.355276381909549e-05, - "loss": 4.917, - "step": 36778 - }, - { - "epoch": 19.18070404172099, - "grad_norm": 1.4383151531219482, - "learning_rate": 6.355175879396985e-05, - "loss": 5.608, - "step": 36779 - }, - { - "epoch": 19.18122555410691, - "grad_norm": 1.5035390853881836, - "learning_rate": 6.355075376884423e-05, - "loss": 5.5742, - "step": 36780 - }, - { - "epoch": 19.18174706649283, - "grad_norm": 1.4522112607955933, - "learning_rate": 6.35497487437186e-05, - "loss": 4.9576, - "step": 36781 - }, - { - "epoch": 19.182268578878748, - "grad_norm": 1.4563677310943604, - "learning_rate": 6.354874371859296e-05, - "loss": 5.539, - "step": 36782 - }, - { - "epoch": 19.182790091264668, - "grad_norm": 1.439776062965393, - "learning_rate": 6.354773869346734e-05, - "loss": 5.5646, - "step": 36783 - }, - { - "epoch": 19.183311603650587, - "grad_norm": 1.4637523889541626, - "learning_rate": 6.35467336683417e-05, - "loss": 5.616, - "step": 36784 - }, - { - "epoch": 19.183833116036507, - "grad_norm": 1.6338952779769897, - "learning_rate": 6.354572864321608e-05, - "loss": 4.8155, - "step": 36785 - }, - { - "epoch": 19.184354628422426, - "grad_norm": 1.721680760383606, - "learning_rate": 6.354472361809046e-05, - "loss": 5.2407, - "step": 36786 - }, - { - "epoch": 19.184876140808345, - "grad_norm": 1.3643306493759155, - "learning_rate": 6.354371859296483e-05, - "loss": 5.6971, - "step": 36787 - }, - { - "epoch": 19.185397653194265, - "grad_norm": 1.6221256256103516, - "learning_rate": 6.35427135678392e-05, - "loss": 5.1571, - "step": 36788 - }, - { - "epoch": 19.18591916558018, - "grad_norm": 1.4043117761611938, - "learning_rate": 6.354170854271358e-05, - "loss": 5.5485, - "step": 36789 - }, - { - "epoch": 19.1864406779661, - "grad_norm": 1.4833513498306274, - "learning_rate": 6.354070351758794e-05, - "loss": 5.2985, - "step": 36790 - }, - { - "epoch": 19.18696219035202, - "grad_norm": 1.6393437385559082, - "learning_rate": 6.353969849246232e-05, - "loss": 5.322, - "step": 36791 - }, - { - "epoch": 19.18748370273794, - "grad_norm": 1.4731892347335815, - "learning_rate": 6.353869346733668e-05, - "loss": 5.4432, - "step": 36792 - }, - { - "epoch": 19.18800521512386, - "grad_norm": 1.5758880376815796, - "learning_rate": 6.353768844221106e-05, - "loss": 5.0793, - "step": 36793 - }, - { - "epoch": 19.188526727509778, - "grad_norm": 1.5631556510925293, - "learning_rate": 6.353668341708542e-05, - "loss": 5.0857, - "step": 36794 - }, - { - "epoch": 19.189048239895698, - "grad_norm": 1.5493124723434448, - "learning_rate": 6.35356783919598e-05, - "loss": 5.4534, - "step": 36795 - }, - { - "epoch": 19.189569752281617, - "grad_norm": 1.452446699142456, - "learning_rate": 6.353467336683418e-05, - "loss": 5.5016, - "step": 36796 - }, - { - "epoch": 19.190091264667537, - "grad_norm": 1.5019510984420776, - "learning_rate": 6.353366834170854e-05, - "loss": 5.0226, - "step": 36797 - }, - { - "epoch": 19.190612777053456, - "grad_norm": 1.5245356559753418, - "learning_rate": 6.353266331658292e-05, - "loss": 5.1922, - "step": 36798 - }, - { - "epoch": 19.191134289439375, - "grad_norm": 1.5438936948776245, - "learning_rate": 6.353165829145729e-05, - "loss": 5.3812, - "step": 36799 - }, - { - "epoch": 19.191655801825295, - "grad_norm": 1.6431673765182495, - "learning_rate": 6.353065326633166e-05, - "loss": 4.8042, - "step": 36800 - }, - { - "epoch": 19.19217731421121, - "grad_norm": 1.5544520616531372, - "learning_rate": 6.352964824120603e-05, - "loss": 5.3633, - "step": 36801 - }, - { - "epoch": 19.19269882659713, - "grad_norm": 1.439686894416809, - "learning_rate": 6.35286432160804e-05, - "loss": 5.8256, - "step": 36802 - }, - { - "epoch": 19.19322033898305, - "grad_norm": 1.4189653396606445, - "learning_rate": 6.352763819095477e-05, - "loss": 5.6226, - "step": 36803 - }, - { - "epoch": 19.19374185136897, - "grad_norm": 1.5858079195022583, - "learning_rate": 6.352663316582915e-05, - "loss": 4.9998, - "step": 36804 - }, - { - "epoch": 19.19426336375489, - "grad_norm": 1.5106598138809204, - "learning_rate": 6.352562814070351e-05, - "loss": 5.2401, - "step": 36805 - }, - { - "epoch": 19.194784876140808, - "grad_norm": 1.5000866651535034, - "learning_rate": 6.352462311557789e-05, - "loss": 4.7224, - "step": 36806 - }, - { - "epoch": 19.195306388526728, - "grad_norm": 1.5284020900726318, - "learning_rate": 6.352361809045227e-05, - "loss": 5.0151, - "step": 36807 - }, - { - "epoch": 19.195827900912647, - "grad_norm": 1.5355902910232544, - "learning_rate": 6.352261306532665e-05, - "loss": 5.1299, - "step": 36808 - }, - { - "epoch": 19.196349413298567, - "grad_norm": 1.601561427116394, - "learning_rate": 6.352160804020101e-05, - "loss": 5.4361, - "step": 36809 - }, - { - "epoch": 19.196870925684486, - "grad_norm": 1.605168342590332, - "learning_rate": 6.352060301507538e-05, - "loss": 4.852, - "step": 36810 - }, - { - "epoch": 19.197392438070406, - "grad_norm": 1.6390225887298584, - "learning_rate": 6.351959798994975e-05, - "loss": 5.3375, - "step": 36811 - }, - { - "epoch": 19.197913950456325, - "grad_norm": 1.5888879299163818, - "learning_rate": 6.351859296482412e-05, - "loss": 5.013, - "step": 36812 - }, - { - "epoch": 19.19843546284224, - "grad_norm": 1.4969247579574585, - "learning_rate": 6.35175879396985e-05, - "loss": 5.4347, - "step": 36813 - }, - { - "epoch": 19.19895697522816, - "grad_norm": 1.6226699352264404, - "learning_rate": 6.351658291457286e-05, - "loss": 5.0224, - "step": 36814 - }, - { - "epoch": 19.19947848761408, - "grad_norm": 1.5531498193740845, - "learning_rate": 6.351557788944724e-05, - "loss": 5.3062, - "step": 36815 - }, - { - "epoch": 19.2, - "grad_norm": 1.434910774230957, - "learning_rate": 6.351457286432161e-05, - "loss": 5.5387, - "step": 36816 - }, - { - "epoch": 19.20052151238592, - "grad_norm": 1.4743329286575317, - "learning_rate": 6.351356783919599e-05, - "loss": 4.9693, - "step": 36817 - }, - { - "epoch": 19.201043024771838, - "grad_norm": 1.6672056913375854, - "learning_rate": 6.351256281407036e-05, - "loss": 4.8509, - "step": 36818 - }, - { - "epoch": 19.201564537157758, - "grad_norm": 1.6185163259506226, - "learning_rate": 6.351155778894473e-05, - "loss": 5.3013, - "step": 36819 - }, - { - "epoch": 19.202086049543677, - "grad_norm": 1.499768853187561, - "learning_rate": 6.35105527638191e-05, - "loss": 5.2753, - "step": 36820 - }, - { - "epoch": 19.202607561929597, - "grad_norm": 1.6774084568023682, - "learning_rate": 6.350954773869348e-05, - "loss": 5.1828, - "step": 36821 - }, - { - "epoch": 19.203129074315516, - "grad_norm": 1.4903481006622314, - "learning_rate": 6.350854271356784e-05, - "loss": 4.9863, - "step": 36822 - }, - { - "epoch": 19.203650586701436, - "grad_norm": 1.634554147720337, - "learning_rate": 6.35075376884422e-05, - "loss": 5.3333, - "step": 36823 - }, - { - "epoch": 19.204172099087355, - "grad_norm": 1.5463955402374268, - "learning_rate": 6.350653266331658e-05, - "loss": 5.1927, - "step": 36824 - }, - { - "epoch": 19.20469361147327, - "grad_norm": 1.5335463285446167, - "learning_rate": 6.350552763819095e-05, - "loss": 5.6385, - "step": 36825 - }, - { - "epoch": 19.20521512385919, - "grad_norm": 1.4487677812576294, - "learning_rate": 6.350452261306533e-05, - "loss": 5.4294, - "step": 36826 - }, - { - "epoch": 19.20573663624511, - "grad_norm": 1.4891185760498047, - "learning_rate": 6.35035175879397e-05, - "loss": 5.3142, - "step": 36827 - }, - { - "epoch": 19.20625814863103, - "grad_norm": 1.6271955966949463, - "learning_rate": 6.350251256281408e-05, - "loss": 5.4027, - "step": 36828 - }, - { - "epoch": 19.20677966101695, - "grad_norm": 1.6103308200836182, - "learning_rate": 6.350150753768845e-05, - "loss": 5.3095, - "step": 36829 - }, - { - "epoch": 19.20730117340287, - "grad_norm": 1.5681896209716797, - "learning_rate": 6.350050251256282e-05, - "loss": 5.2383, - "step": 36830 - }, - { - "epoch": 19.207822685788788, - "grad_norm": 1.5440258979797363, - "learning_rate": 6.349949748743719e-05, - "loss": 5.376, - "step": 36831 - }, - { - "epoch": 19.208344198174707, - "grad_norm": 1.476962685585022, - "learning_rate": 6.349849246231157e-05, - "loss": 5.45, - "step": 36832 - }, - { - "epoch": 19.208865710560627, - "grad_norm": 1.3936923742294312, - "learning_rate": 6.349748743718593e-05, - "loss": 5.0786, - "step": 36833 - }, - { - "epoch": 19.209387222946546, - "grad_norm": 1.4395536184310913, - "learning_rate": 6.349648241206031e-05, - "loss": 5.5912, - "step": 36834 - }, - { - "epoch": 19.209908735332466, - "grad_norm": 1.5874015092849731, - "learning_rate": 6.349547738693467e-05, - "loss": 5.1842, - "step": 36835 - }, - { - "epoch": 19.210430247718385, - "grad_norm": 1.4127824306488037, - "learning_rate": 6.349447236180905e-05, - "loss": 4.7363, - "step": 36836 - }, - { - "epoch": 19.2109517601043, - "grad_norm": 1.4570986032485962, - "learning_rate": 6.349346733668343e-05, - "loss": 5.4811, - "step": 36837 - }, - { - "epoch": 19.21147327249022, - "grad_norm": 1.4810131788253784, - "learning_rate": 6.349246231155779e-05, - "loss": 5.4135, - "step": 36838 - }, - { - "epoch": 19.21199478487614, - "grad_norm": 1.6047756671905518, - "learning_rate": 6.349145728643217e-05, - "loss": 4.9279, - "step": 36839 - }, - { - "epoch": 19.21251629726206, - "grad_norm": 1.500387191772461, - "learning_rate": 6.349045226130653e-05, - "loss": 4.7995, - "step": 36840 - }, - { - "epoch": 19.21303780964798, - "grad_norm": 1.5432854890823364, - "learning_rate": 6.348944723618091e-05, - "loss": 5.3838, - "step": 36841 - }, - { - "epoch": 19.2135593220339, - "grad_norm": 1.4466543197631836, - "learning_rate": 6.348844221105528e-05, - "loss": 5.3336, - "step": 36842 - }, - { - "epoch": 19.214080834419818, - "grad_norm": 1.5626252889633179, - "learning_rate": 6.348743718592965e-05, - "loss": 5.5827, - "step": 36843 - }, - { - "epoch": 19.214602346805737, - "grad_norm": 1.5846391916275024, - "learning_rate": 6.348643216080402e-05, - "loss": 5.4965, - "step": 36844 - }, - { - "epoch": 19.215123859191657, - "grad_norm": 1.5805906057357788, - "learning_rate": 6.34854271356784e-05, - "loss": 5.6206, - "step": 36845 - }, - { - "epoch": 19.215645371577576, - "grad_norm": 1.5605173110961914, - "learning_rate": 6.348442211055276e-05, - "loss": 5.4108, - "step": 36846 - }, - { - "epoch": 19.216166883963496, - "grad_norm": 1.5346235036849976, - "learning_rate": 6.348341708542714e-05, - "loss": 5.419, - "step": 36847 - }, - { - "epoch": 19.216688396349415, - "grad_norm": 1.381447672843933, - "learning_rate": 6.348241206030152e-05, - "loss": 4.5753, - "step": 36848 - }, - { - "epoch": 19.21720990873533, - "grad_norm": 1.6419659852981567, - "learning_rate": 6.348140703517588e-05, - "loss": 5.0773, - "step": 36849 - }, - { - "epoch": 19.21773142112125, - "grad_norm": 1.4510810375213623, - "learning_rate": 6.348040201005026e-05, - "loss": 5.709, - "step": 36850 - }, - { - "epoch": 19.21825293350717, - "grad_norm": 1.534165859222412, - "learning_rate": 6.347939698492462e-05, - "loss": 5.59, - "step": 36851 - }, - { - "epoch": 19.21877444589309, - "grad_norm": 1.5674177408218384, - "learning_rate": 6.3478391959799e-05, - "loss": 5.2574, - "step": 36852 - }, - { - "epoch": 19.21929595827901, - "grad_norm": 1.443166971206665, - "learning_rate": 6.347738693467336e-05, - "loss": 5.6958, - "step": 36853 - }, - { - "epoch": 19.21981747066493, - "grad_norm": 1.4661056995391846, - "learning_rate": 6.347638190954774e-05, - "loss": 5.1274, - "step": 36854 - }, - { - "epoch": 19.220338983050848, - "grad_norm": 1.628048300743103, - "learning_rate": 6.34753768844221e-05, - "loss": 5.0349, - "step": 36855 - }, - { - "epoch": 19.220860495436767, - "grad_norm": 1.49128258228302, - "learning_rate": 6.347437185929648e-05, - "loss": 5.4518, - "step": 36856 - }, - { - "epoch": 19.221382007822687, - "grad_norm": 1.4849789142608643, - "learning_rate": 6.347336683417086e-05, - "loss": 5.6713, - "step": 36857 - }, - { - "epoch": 19.221903520208606, - "grad_norm": 1.6297568082809448, - "learning_rate": 6.347236180904524e-05, - "loss": 4.7927, - "step": 36858 - }, - { - "epoch": 19.222425032594526, - "grad_norm": 1.4678562879562378, - "learning_rate": 6.34713567839196e-05, - "loss": 5.3589, - "step": 36859 - }, - { - "epoch": 19.22294654498044, - "grad_norm": 1.4813907146453857, - "learning_rate": 6.347035175879398e-05, - "loss": 5.4533, - "step": 36860 - }, - { - "epoch": 19.22346805736636, - "grad_norm": 1.5493189096450806, - "learning_rate": 6.346934673366835e-05, - "loss": 5.3319, - "step": 36861 - }, - { - "epoch": 19.22398956975228, - "grad_norm": 1.4202569723129272, - "learning_rate": 6.346834170854271e-05, - "loss": 4.9688, - "step": 36862 - }, - { - "epoch": 19.2245110821382, - "grad_norm": 1.45529305934906, - "learning_rate": 6.346733668341709e-05, - "loss": 5.271, - "step": 36863 - }, - { - "epoch": 19.22503259452412, - "grad_norm": 1.5787216424942017, - "learning_rate": 6.346633165829145e-05, - "loss": 4.9847, - "step": 36864 - }, - { - "epoch": 19.22555410691004, - "grad_norm": 1.7754377126693726, - "learning_rate": 6.346532663316583e-05, - "loss": 4.4854, - "step": 36865 - }, - { - "epoch": 19.22607561929596, - "grad_norm": 1.476708173751831, - "learning_rate": 6.34643216080402e-05, - "loss": 5.4723, - "step": 36866 - }, - { - "epoch": 19.226597131681878, - "grad_norm": 1.489437460899353, - "learning_rate": 6.346331658291457e-05, - "loss": 5.0789, - "step": 36867 - }, - { - "epoch": 19.227118644067797, - "grad_norm": 1.492141604423523, - "learning_rate": 6.346231155778895e-05, - "loss": 5.3091, - "step": 36868 - }, - { - "epoch": 19.227640156453717, - "grad_norm": 1.4575738906860352, - "learning_rate": 6.346130653266333e-05, - "loss": 5.7123, - "step": 36869 - }, - { - "epoch": 19.228161668839636, - "grad_norm": 1.484814167022705, - "learning_rate": 6.346030150753769e-05, - "loss": 5.5521, - "step": 36870 - }, - { - "epoch": 19.228683181225556, - "grad_norm": 1.4843875169754028, - "learning_rate": 6.345929648241207e-05, - "loss": 5.4517, - "step": 36871 - }, - { - "epoch": 19.22920469361147, - "grad_norm": 1.4497064352035522, - "learning_rate": 6.345829145728643e-05, - "loss": 5.0019, - "step": 36872 - }, - { - "epoch": 19.22972620599739, - "grad_norm": 1.5412789583206177, - "learning_rate": 6.345728643216081e-05, - "loss": 4.7164, - "step": 36873 - }, - { - "epoch": 19.23024771838331, - "grad_norm": 1.4401829242706299, - "learning_rate": 6.345628140703518e-05, - "loss": 5.4663, - "step": 36874 - }, - { - "epoch": 19.23076923076923, - "grad_norm": 1.6556521654129028, - "learning_rate": 6.345527638190954e-05, - "loss": 5.1637, - "step": 36875 - }, - { - "epoch": 19.23129074315515, - "grad_norm": 1.4327027797698975, - "learning_rate": 6.345427135678392e-05, - "loss": 5.6371, - "step": 36876 - }, - { - "epoch": 19.23181225554107, - "grad_norm": 1.6042107343673706, - "learning_rate": 6.34532663316583e-05, - "loss": 4.8231, - "step": 36877 - }, - { - "epoch": 19.23233376792699, - "grad_norm": 1.4737534523010254, - "learning_rate": 6.345226130653267e-05, - "loss": 5.5331, - "step": 36878 - }, - { - "epoch": 19.232855280312908, - "grad_norm": 1.5860285758972168, - "learning_rate": 6.345125628140704e-05, - "loss": 4.9038, - "step": 36879 - }, - { - "epoch": 19.233376792698827, - "grad_norm": 1.3962498903274536, - "learning_rate": 6.345025125628142e-05, - "loss": 5.6514, - "step": 36880 - }, - { - "epoch": 19.233898305084747, - "grad_norm": 1.6860625743865967, - "learning_rate": 6.344924623115578e-05, - "loss": 5.0277, - "step": 36881 - }, - { - "epoch": 19.234419817470666, - "grad_norm": 1.4662601947784424, - "learning_rate": 6.344824120603016e-05, - "loss": 5.2007, - "step": 36882 - }, - { - "epoch": 19.234941329856586, - "grad_norm": 1.4557886123657227, - "learning_rate": 6.344723618090452e-05, - "loss": 5.5913, - "step": 36883 - }, - { - "epoch": 19.2354628422425, - "grad_norm": 1.5355714559555054, - "learning_rate": 6.34462311557789e-05, - "loss": 5.1394, - "step": 36884 - }, - { - "epoch": 19.23598435462842, - "grad_norm": 1.529517412185669, - "learning_rate": 6.344522613065326e-05, - "loss": 5.7525, - "step": 36885 - }, - { - "epoch": 19.23650586701434, - "grad_norm": 1.4590818881988525, - "learning_rate": 6.344422110552764e-05, - "loss": 5.1185, - "step": 36886 - }, - { - "epoch": 19.23702737940026, - "grad_norm": 1.6308363676071167, - "learning_rate": 6.344321608040201e-05, - "loss": 4.5298, - "step": 36887 - }, - { - "epoch": 19.23754889178618, - "grad_norm": 1.6101256608963013, - "learning_rate": 6.344221105527638e-05, - "loss": 5.475, - "step": 36888 - }, - { - "epoch": 19.2380704041721, - "grad_norm": 1.4836009740829468, - "learning_rate": 6.344120603015076e-05, - "loss": 5.3024, - "step": 36889 - }, - { - "epoch": 19.23859191655802, - "grad_norm": 1.5156887769699097, - "learning_rate": 6.344020100502513e-05, - "loss": 5.0775, - "step": 36890 - }, - { - "epoch": 19.239113428943938, - "grad_norm": 1.5670709609985352, - "learning_rate": 6.34391959798995e-05, - "loss": 5.6457, - "step": 36891 - }, - { - "epoch": 19.239634941329857, - "grad_norm": 1.5336781740188599, - "learning_rate": 6.343819095477387e-05, - "loss": 5.1949, - "step": 36892 - }, - { - "epoch": 19.240156453715777, - "grad_norm": 1.4362921714782715, - "learning_rate": 6.343718592964825e-05, - "loss": 5.5624, - "step": 36893 - }, - { - "epoch": 19.240677966101696, - "grad_norm": 1.687852144241333, - "learning_rate": 6.343618090452261e-05, - "loss": 5.0137, - "step": 36894 - }, - { - "epoch": 19.241199478487616, - "grad_norm": 1.4673957824707031, - "learning_rate": 6.343517587939699e-05, - "loss": 5.4657, - "step": 36895 - }, - { - "epoch": 19.24172099087353, - "grad_norm": 1.449641466140747, - "learning_rate": 6.343417085427135e-05, - "loss": 5.3665, - "step": 36896 - }, - { - "epoch": 19.24224250325945, - "grad_norm": 1.4761532545089722, - "learning_rate": 6.343316582914573e-05, - "loss": 4.6071, - "step": 36897 - }, - { - "epoch": 19.24276401564537, - "grad_norm": 1.4906343221664429, - "learning_rate": 6.34321608040201e-05, - "loss": 5.523, - "step": 36898 - }, - { - "epoch": 19.24328552803129, - "grad_norm": 1.6711719036102295, - "learning_rate": 6.343115577889447e-05, - "loss": 5.0888, - "step": 36899 - }, - { - "epoch": 19.24380704041721, - "grad_norm": 1.5183913707733154, - "learning_rate": 6.343015075376885e-05, - "loss": 5.4914, - "step": 36900 - }, - { - "epoch": 19.24432855280313, - "grad_norm": 1.5291370153427124, - "learning_rate": 6.342914572864323e-05, - "loss": 5.1587, - "step": 36901 - }, - { - "epoch": 19.24485006518905, - "grad_norm": 1.5439311265945435, - "learning_rate": 6.342814070351759e-05, - "loss": 4.7495, - "step": 36902 - }, - { - "epoch": 19.245371577574968, - "grad_norm": 1.484724760055542, - "learning_rate": 6.342713567839196e-05, - "loss": 5.1885, - "step": 36903 - }, - { - "epoch": 19.245893089960887, - "grad_norm": 1.4891382455825806, - "learning_rate": 6.342613065326634e-05, - "loss": 5.5848, - "step": 36904 - }, - { - "epoch": 19.246414602346807, - "grad_norm": 1.5138885974884033, - "learning_rate": 6.34251256281407e-05, - "loss": 5.3052, - "step": 36905 - }, - { - "epoch": 19.246936114732726, - "grad_norm": 1.6237576007843018, - "learning_rate": 6.342412060301508e-05, - "loss": 4.905, - "step": 36906 - }, - { - "epoch": 19.247457627118646, - "grad_norm": 1.4857239723205566, - "learning_rate": 6.342311557788944e-05, - "loss": 5.2908, - "step": 36907 - }, - { - "epoch": 19.24797913950456, - "grad_norm": 1.6112921237945557, - "learning_rate": 6.342211055276382e-05, - "loss": 4.8324, - "step": 36908 - }, - { - "epoch": 19.24850065189048, - "grad_norm": 1.4779003858566284, - "learning_rate": 6.34211055276382e-05, - "loss": 5.6801, - "step": 36909 - }, - { - "epoch": 19.2490221642764, - "grad_norm": 1.391730546951294, - "learning_rate": 6.342010050251258e-05, - "loss": 5.4727, - "step": 36910 - }, - { - "epoch": 19.24954367666232, - "grad_norm": 1.5292145013809204, - "learning_rate": 6.341909547738694e-05, - "loss": 4.8833, - "step": 36911 - }, - { - "epoch": 19.25006518904824, - "grad_norm": 1.4813306331634521, - "learning_rate": 6.341809045226132e-05, - "loss": 5.2467, - "step": 36912 - }, - { - "epoch": 19.25058670143416, - "grad_norm": 1.4558688402175903, - "learning_rate": 6.341708542713568e-05, - "loss": 5.5094, - "step": 36913 - }, - { - "epoch": 19.25110821382008, - "grad_norm": 1.5205247402191162, - "learning_rate": 6.341608040201006e-05, - "loss": 5.0898, - "step": 36914 - }, - { - "epoch": 19.251629726205998, - "grad_norm": 1.5173863172531128, - "learning_rate": 6.341507537688442e-05, - "loss": 5.4024, - "step": 36915 - }, - { - "epoch": 19.252151238591917, - "grad_norm": 1.5855995416641235, - "learning_rate": 6.341407035175879e-05, - "loss": 5.2493, - "step": 36916 - }, - { - "epoch": 19.252672750977837, - "grad_norm": 1.4934707880020142, - "learning_rate": 6.341306532663317e-05, - "loss": 4.9347, - "step": 36917 - }, - { - "epoch": 19.253194263363756, - "grad_norm": 1.5738959312438965, - "learning_rate": 6.341206030150753e-05, - "loss": 5.0986, - "step": 36918 - }, - { - "epoch": 19.253715775749676, - "grad_norm": 1.435224175453186, - "learning_rate": 6.341105527638191e-05, - "loss": 5.2367, - "step": 36919 - }, - { - "epoch": 19.25423728813559, - "grad_norm": 1.5308747291564941, - "learning_rate": 6.341005025125629e-05, - "loss": 4.9427, - "step": 36920 - }, - { - "epoch": 19.25475880052151, - "grad_norm": 1.5276764631271362, - "learning_rate": 6.340904522613066e-05, - "loss": 5.0881, - "step": 36921 - }, - { - "epoch": 19.25528031290743, - "grad_norm": 1.3677276372909546, - "learning_rate": 6.340804020100503e-05, - "loss": 5.5581, - "step": 36922 - }, - { - "epoch": 19.25580182529335, - "grad_norm": 1.5276122093200684, - "learning_rate": 6.34070351758794e-05, - "loss": 5.3721, - "step": 36923 - }, - { - "epoch": 19.25632333767927, - "grad_norm": 1.3930071592330933, - "learning_rate": 6.340603015075377e-05, - "loss": 5.5698, - "step": 36924 - }, - { - "epoch": 19.25684485006519, - "grad_norm": 1.5334687232971191, - "learning_rate": 6.340502512562815e-05, - "loss": 5.4448, - "step": 36925 - }, - { - "epoch": 19.25736636245111, - "grad_norm": 1.5532457828521729, - "learning_rate": 6.340402010050251e-05, - "loss": 5.3501, - "step": 36926 - }, - { - "epoch": 19.257887874837028, - "grad_norm": 1.536877989768982, - "learning_rate": 6.340301507537689e-05, - "loss": 5.756, - "step": 36927 - }, - { - "epoch": 19.258409387222947, - "grad_norm": 1.5355427265167236, - "learning_rate": 6.340201005025125e-05, - "loss": 5.2108, - "step": 36928 - }, - { - "epoch": 19.258930899608867, - "grad_norm": 1.4861279726028442, - "learning_rate": 6.340100502512563e-05, - "loss": 5.3329, - "step": 36929 - }, - { - "epoch": 19.259452411994786, - "grad_norm": 1.4631205797195435, - "learning_rate": 6.340000000000001e-05, - "loss": 5.1522, - "step": 36930 - }, - { - "epoch": 19.259973924380706, - "grad_norm": 1.4840400218963623, - "learning_rate": 6.339899497487437e-05, - "loss": 5.1723, - "step": 36931 - }, - { - "epoch": 19.26049543676662, - "grad_norm": 1.5493239164352417, - "learning_rate": 6.339798994974875e-05, - "loss": 5.145, - "step": 36932 - }, - { - "epoch": 19.26101694915254, - "grad_norm": 1.5541789531707764, - "learning_rate": 6.339698492462312e-05, - "loss": 5.6941, - "step": 36933 - }, - { - "epoch": 19.26153846153846, - "grad_norm": 1.593537449836731, - "learning_rate": 6.33959798994975e-05, - "loss": 5.5049, - "step": 36934 - }, - { - "epoch": 19.26205997392438, - "grad_norm": 1.5609166622161865, - "learning_rate": 6.339497487437186e-05, - "loss": 5.4556, - "step": 36935 - }, - { - "epoch": 19.2625814863103, - "grad_norm": 1.4707036018371582, - "learning_rate": 6.339396984924624e-05, - "loss": 5.2974, - "step": 36936 - }, - { - "epoch": 19.26310299869622, - "grad_norm": 1.4938044548034668, - "learning_rate": 6.33929648241206e-05, - "loss": 5.3953, - "step": 36937 - }, - { - "epoch": 19.26362451108214, - "grad_norm": 1.614590048789978, - "learning_rate": 6.339195979899498e-05, - "loss": 5.3994, - "step": 36938 - }, - { - "epoch": 19.264146023468058, - "grad_norm": 1.5082640647888184, - "learning_rate": 6.339095477386934e-05, - "loss": 5.241, - "step": 36939 - }, - { - "epoch": 19.264667535853977, - "grad_norm": 1.6660542488098145, - "learning_rate": 6.338994974874372e-05, - "loss": 4.5689, - "step": 36940 - }, - { - "epoch": 19.265189048239897, - "grad_norm": 1.620902419090271, - "learning_rate": 6.33889447236181e-05, - "loss": 5.2737, - "step": 36941 - }, - { - "epoch": 19.265710560625816, - "grad_norm": 1.464135766029358, - "learning_rate": 6.338793969849246e-05, - "loss": 5.2803, - "step": 36942 - }, - { - "epoch": 19.266232073011736, - "grad_norm": 1.5170650482177734, - "learning_rate": 6.338693467336684e-05, - "loss": 5.4217, - "step": 36943 - }, - { - "epoch": 19.26675358539765, - "grad_norm": 1.6289030313491821, - "learning_rate": 6.33859296482412e-05, - "loss": 4.9642, - "step": 36944 - }, - { - "epoch": 19.26727509778357, - "grad_norm": 1.6857249736785889, - "learning_rate": 6.338492462311558e-05, - "loss": 5.2609, - "step": 36945 - }, - { - "epoch": 19.26779661016949, - "grad_norm": 1.5560426712036133, - "learning_rate": 6.338391959798995e-05, - "loss": 5.1615, - "step": 36946 - }, - { - "epoch": 19.26831812255541, - "grad_norm": 1.5423920154571533, - "learning_rate": 6.338291457286432e-05, - "loss": 5.5504, - "step": 36947 - }, - { - "epoch": 19.26883963494133, - "grad_norm": 1.5581380128860474, - "learning_rate": 6.338190954773869e-05, - "loss": 5.2994, - "step": 36948 - }, - { - "epoch": 19.26936114732725, - "grad_norm": 1.473510980606079, - "learning_rate": 6.338090452261307e-05, - "loss": 5.3017, - "step": 36949 - }, - { - "epoch": 19.26988265971317, - "grad_norm": 1.5097030401229858, - "learning_rate": 6.337989949748744e-05, - "loss": 5.4822, - "step": 36950 - }, - { - "epoch": 19.270404172099088, - "grad_norm": 1.6531695127487183, - "learning_rate": 6.337889447236182e-05, - "loss": 5.11, - "step": 36951 - }, - { - "epoch": 19.270925684485007, - "grad_norm": 1.4781410694122314, - "learning_rate": 6.337788944723619e-05, - "loss": 5.2756, - "step": 36952 - }, - { - "epoch": 19.271447196870927, - "grad_norm": 1.440588355064392, - "learning_rate": 6.337688442211056e-05, - "loss": 5.1442, - "step": 36953 - }, - { - "epoch": 19.271968709256846, - "grad_norm": 1.429126262664795, - "learning_rate": 6.337587939698493e-05, - "loss": 5.4935, - "step": 36954 - }, - { - "epoch": 19.272490221642762, - "grad_norm": 1.5640674829483032, - "learning_rate": 6.337487437185929e-05, - "loss": 5.1456, - "step": 36955 - }, - { - "epoch": 19.27301173402868, - "grad_norm": 1.5524988174438477, - "learning_rate": 6.337386934673367e-05, - "loss": 4.6682, - "step": 36956 - }, - { - "epoch": 19.2735332464146, - "grad_norm": 1.6039880514144897, - "learning_rate": 6.337286432160803e-05, - "loss": 5.557, - "step": 36957 - }, - { - "epoch": 19.27405475880052, - "grad_norm": 1.5884076356887817, - "learning_rate": 6.337185929648241e-05, - "loss": 5.2362, - "step": 36958 - }, - { - "epoch": 19.27457627118644, - "grad_norm": 1.5353890657424927, - "learning_rate": 6.337085427135678e-05, - "loss": 4.8334, - "step": 36959 - }, - { - "epoch": 19.27509778357236, - "grad_norm": 1.6452640295028687, - "learning_rate": 6.336984924623115e-05, - "loss": 5.3837, - "step": 36960 - }, - { - "epoch": 19.27561929595828, - "grad_norm": 1.5327543020248413, - "learning_rate": 6.336884422110553e-05, - "loss": 5.5065, - "step": 36961 - }, - { - "epoch": 19.2761408083442, - "grad_norm": 1.554150938987732, - "learning_rate": 6.336783919597991e-05, - "loss": 5.0363, - "step": 36962 - }, - { - "epoch": 19.276662320730118, - "grad_norm": 1.4831501245498657, - "learning_rate": 6.336683417085427e-05, - "loss": 4.9323, - "step": 36963 - }, - { - "epoch": 19.277183833116037, - "grad_norm": 1.4818572998046875, - "learning_rate": 6.336582914572865e-05, - "loss": 5.5608, - "step": 36964 - }, - { - "epoch": 19.277705345501957, - "grad_norm": 1.4605873823165894, - "learning_rate": 6.336482412060302e-05, - "loss": 5.2915, - "step": 36965 - }, - { - "epoch": 19.278226857887876, - "grad_norm": 1.5389604568481445, - "learning_rate": 6.33638190954774e-05, - "loss": 4.7589, - "step": 36966 - }, - { - "epoch": 19.278748370273792, - "grad_norm": 1.5020772218704224, - "learning_rate": 6.336281407035176e-05, - "loss": 5.5426, - "step": 36967 - }, - { - "epoch": 19.27926988265971, - "grad_norm": 1.5246011018753052, - "learning_rate": 6.336180904522612e-05, - "loss": 5.2249, - "step": 36968 - }, - { - "epoch": 19.27979139504563, - "grad_norm": 1.4716694355010986, - "learning_rate": 6.33608040201005e-05, - "loss": 5.5469, - "step": 36969 - }, - { - "epoch": 19.28031290743155, - "grad_norm": 1.5546784400939941, - "learning_rate": 6.335979899497488e-05, - "loss": 5.2756, - "step": 36970 - }, - { - "epoch": 19.28083441981747, - "grad_norm": 1.5236930847167969, - "learning_rate": 6.335879396984926e-05, - "loss": 5.084, - "step": 36971 - }, - { - "epoch": 19.28135593220339, - "grad_norm": 1.636731743812561, - "learning_rate": 6.335778894472362e-05, - "loss": 5.2955, - "step": 36972 - }, - { - "epoch": 19.28187744458931, - "grad_norm": 1.525723934173584, - "learning_rate": 6.3356783919598e-05, - "loss": 5.5118, - "step": 36973 - }, - { - "epoch": 19.28239895697523, - "grad_norm": 1.5043638944625854, - "learning_rate": 6.335577889447236e-05, - "loss": 5.458, - "step": 36974 - }, - { - "epoch": 19.282920469361148, - "grad_norm": 1.5032025575637817, - "learning_rate": 6.335477386934674e-05, - "loss": 5.5807, - "step": 36975 - }, - { - "epoch": 19.283441981747067, - "grad_norm": 1.4374566078186035, - "learning_rate": 6.33537688442211e-05, - "loss": 5.7435, - "step": 36976 - }, - { - "epoch": 19.283963494132987, - "grad_norm": 1.5648101568222046, - "learning_rate": 6.335276381909548e-05, - "loss": 5.6351, - "step": 36977 - }, - { - "epoch": 19.284485006518906, - "grad_norm": 1.6216416358947754, - "learning_rate": 6.335175879396985e-05, - "loss": 5.6382, - "step": 36978 - }, - { - "epoch": 19.285006518904822, - "grad_norm": 1.4489424228668213, - "learning_rate": 6.335075376884423e-05, - "loss": 5.7505, - "step": 36979 - }, - { - "epoch": 19.285528031290742, - "grad_norm": 1.5426013469696045, - "learning_rate": 6.334974874371859e-05, - "loss": 5.5321, - "step": 36980 - }, - { - "epoch": 19.28604954367666, - "grad_norm": 1.529561161994934, - "learning_rate": 6.334874371859297e-05, - "loss": 5.1118, - "step": 36981 - }, - { - "epoch": 19.28657105606258, - "grad_norm": 1.445841908454895, - "learning_rate": 6.334773869346734e-05, - "loss": 5.66, - "step": 36982 - }, - { - "epoch": 19.2870925684485, - "grad_norm": 1.5005908012390137, - "learning_rate": 6.334673366834171e-05, - "loss": 5.524, - "step": 36983 - }, - { - "epoch": 19.28761408083442, - "grad_norm": 1.5497288703918457, - "learning_rate": 6.334572864321609e-05, - "loss": 5.4262, - "step": 36984 - }, - { - "epoch": 19.28813559322034, - "grad_norm": 1.54937744140625, - "learning_rate": 6.334472361809045e-05, - "loss": 5.3626, - "step": 36985 - }, - { - "epoch": 19.28865710560626, - "grad_norm": 1.5920655727386475, - "learning_rate": 6.334371859296483e-05, - "loss": 5.1219, - "step": 36986 - }, - { - "epoch": 19.289178617992178, - "grad_norm": 1.4081624746322632, - "learning_rate": 6.33427135678392e-05, - "loss": 5.7339, - "step": 36987 - }, - { - "epoch": 19.289700130378097, - "grad_norm": 1.5500937700271606, - "learning_rate": 6.334170854271357e-05, - "loss": 5.3096, - "step": 36988 - }, - { - "epoch": 19.290221642764017, - "grad_norm": 1.564881443977356, - "learning_rate": 6.334070351758794e-05, - "loss": 5.0718, - "step": 36989 - }, - { - "epoch": 19.290743155149936, - "grad_norm": 1.5192279815673828, - "learning_rate": 6.333969849246231e-05, - "loss": 5.0125, - "step": 36990 - }, - { - "epoch": 19.291264667535852, - "grad_norm": 1.4879258871078491, - "learning_rate": 6.333869346733669e-05, - "loss": 5.0246, - "step": 36991 - }, - { - "epoch": 19.291786179921772, - "grad_norm": 1.4760690927505493, - "learning_rate": 6.333768844221107e-05, - "loss": 5.3884, - "step": 36992 - }, - { - "epoch": 19.29230769230769, - "grad_norm": 1.4323617219924927, - "learning_rate": 6.333668341708543e-05, - "loss": 5.6063, - "step": 36993 - }, - { - "epoch": 19.29282920469361, - "grad_norm": 1.4789483547210693, - "learning_rate": 6.333567839195981e-05, - "loss": 5.4262, - "step": 36994 - }, - { - "epoch": 19.29335071707953, - "grad_norm": 1.4340264797210693, - "learning_rate": 6.333467336683418e-05, - "loss": 5.5647, - "step": 36995 - }, - { - "epoch": 19.29387222946545, - "grad_norm": 1.4710607528686523, - "learning_rate": 6.333366834170854e-05, - "loss": 5.6171, - "step": 36996 - }, - { - "epoch": 19.29439374185137, - "grad_norm": 1.5966708660125732, - "learning_rate": 6.333266331658292e-05, - "loss": 5.3733, - "step": 36997 - }, - { - "epoch": 19.29491525423729, - "grad_norm": 1.5931636095046997, - "learning_rate": 6.333165829145728e-05, - "loss": 5.1957, - "step": 36998 - }, - { - "epoch": 19.295436766623208, - "grad_norm": 1.5114933252334595, - "learning_rate": 6.333065326633166e-05, - "loss": 5.2603, - "step": 36999 - }, - { - "epoch": 19.295958279009128, - "grad_norm": 1.4077621698379517, - "learning_rate": 6.332964824120602e-05, - "loss": 5.1852, - "step": 37000 - }, - { - "epoch": 19.296479791395047, - "grad_norm": 1.5346025228500366, - "learning_rate": 6.33286432160804e-05, - "loss": 5.1795, - "step": 37001 - }, - { - "epoch": 19.297001303780966, - "grad_norm": 1.7000153064727783, - "learning_rate": 6.332763819095478e-05, - "loss": 4.8945, - "step": 37002 - }, - { - "epoch": 19.297522816166882, - "grad_norm": 1.5090970993041992, - "learning_rate": 6.332663316582916e-05, - "loss": 5.0531, - "step": 37003 - }, - { - "epoch": 19.298044328552802, - "grad_norm": 1.6013832092285156, - "learning_rate": 6.332562814070352e-05, - "loss": 5.0197, - "step": 37004 - }, - { - "epoch": 19.29856584093872, - "grad_norm": 1.710984468460083, - "learning_rate": 6.33246231155779e-05, - "loss": 5.0483, - "step": 37005 - }, - { - "epoch": 19.29908735332464, - "grad_norm": 1.511130928993225, - "learning_rate": 6.332361809045226e-05, - "loss": 4.8332, - "step": 37006 - }, - { - "epoch": 19.29960886571056, - "grad_norm": 1.4409373998641968, - "learning_rate": 6.332261306532664e-05, - "loss": 5.5192, - "step": 37007 - }, - { - "epoch": 19.30013037809648, - "grad_norm": 1.410717248916626, - "learning_rate": 6.3321608040201e-05, - "loss": 5.8575, - "step": 37008 - }, - { - "epoch": 19.3006518904824, - "grad_norm": 1.4723435640335083, - "learning_rate": 6.332060301507537e-05, - "loss": 5.5105, - "step": 37009 - }, - { - "epoch": 19.30117340286832, - "grad_norm": 1.4109721183776855, - "learning_rate": 6.331959798994975e-05, - "loss": 5.1421, - "step": 37010 - }, - { - "epoch": 19.301694915254238, - "grad_norm": 1.450457215309143, - "learning_rate": 6.331859296482413e-05, - "loss": 5.5569, - "step": 37011 - }, - { - "epoch": 19.302216427640158, - "grad_norm": 1.3753290176391602, - "learning_rate": 6.33175879396985e-05, - "loss": 5.4319, - "step": 37012 - }, - { - "epoch": 19.302737940026077, - "grad_norm": 1.5892975330352783, - "learning_rate": 6.331658291457287e-05, - "loss": 5.2195, - "step": 37013 - }, - { - "epoch": 19.303259452411996, - "grad_norm": 1.611024022102356, - "learning_rate": 6.331557788944725e-05, - "loss": 5.1911, - "step": 37014 - }, - { - "epoch": 19.303780964797912, - "grad_norm": 1.5561045408248901, - "learning_rate": 6.331457286432161e-05, - "loss": 5.0182, - "step": 37015 - }, - { - "epoch": 19.304302477183832, - "grad_norm": 1.3958892822265625, - "learning_rate": 6.331356783919599e-05, - "loss": 5.5558, - "step": 37016 - }, - { - "epoch": 19.30482398956975, - "grad_norm": 1.5448585748672485, - "learning_rate": 6.331256281407035e-05, - "loss": 4.0007, - "step": 37017 - }, - { - "epoch": 19.30534550195567, - "grad_norm": 1.4566551446914673, - "learning_rate": 6.331155778894473e-05, - "loss": 5.4945, - "step": 37018 - }, - { - "epoch": 19.30586701434159, - "grad_norm": 1.5056254863739014, - "learning_rate": 6.33105527638191e-05, - "loss": 4.992, - "step": 37019 - }, - { - "epoch": 19.30638852672751, - "grad_norm": 1.4802403450012207, - "learning_rate": 6.330954773869347e-05, - "loss": 5.1516, - "step": 37020 - }, - { - "epoch": 19.30691003911343, - "grad_norm": 1.5480082035064697, - "learning_rate": 6.330854271356784e-05, - "loss": 5.5624, - "step": 37021 - }, - { - "epoch": 19.30743155149935, - "grad_norm": 1.5364937782287598, - "learning_rate": 6.330753768844221e-05, - "loss": 5.049, - "step": 37022 - }, - { - "epoch": 19.307953063885268, - "grad_norm": 1.6175222396850586, - "learning_rate": 6.330653266331659e-05, - "loss": 4.6898, - "step": 37023 - }, - { - "epoch": 19.308474576271188, - "grad_norm": 1.3721835613250732, - "learning_rate": 6.330552763819096e-05, - "loss": 5.1701, - "step": 37024 - }, - { - "epoch": 19.308996088657107, - "grad_norm": 1.5426119565963745, - "learning_rate": 6.330452261306533e-05, - "loss": 5.1736, - "step": 37025 - }, - { - "epoch": 19.309517601043027, - "grad_norm": 1.5243970155715942, - "learning_rate": 6.33035175879397e-05, - "loss": 5.1837, - "step": 37026 - }, - { - "epoch": 19.310039113428942, - "grad_norm": 1.519598126411438, - "learning_rate": 6.330251256281408e-05, - "loss": 5.479, - "step": 37027 - }, - { - "epoch": 19.310560625814862, - "grad_norm": 1.491750717163086, - "learning_rate": 6.330150753768844e-05, - "loss": 4.9179, - "step": 37028 - }, - { - "epoch": 19.31108213820078, - "grad_norm": 1.5575309991836548, - "learning_rate": 6.330050251256282e-05, - "loss": 4.5672, - "step": 37029 - }, - { - "epoch": 19.3116036505867, - "grad_norm": 1.5337893962860107, - "learning_rate": 6.329949748743718e-05, - "loss": 5.4104, - "step": 37030 - }, - { - "epoch": 19.31212516297262, - "grad_norm": 1.444305181503296, - "learning_rate": 6.329849246231156e-05, - "loss": 5.0335, - "step": 37031 - }, - { - "epoch": 19.31264667535854, - "grad_norm": 1.3032722473144531, - "learning_rate": 6.329748743718594e-05, - "loss": 5.132, - "step": 37032 - }, - { - "epoch": 19.31316818774446, - "grad_norm": 1.4571813344955444, - "learning_rate": 6.329648241206032e-05, - "loss": 5.6325, - "step": 37033 - }, - { - "epoch": 19.31368970013038, - "grad_norm": 1.3492635488510132, - "learning_rate": 6.329547738693468e-05, - "loss": 5.0656, - "step": 37034 - }, - { - "epoch": 19.314211212516298, - "grad_norm": 1.4143400192260742, - "learning_rate": 6.329447236180904e-05, - "loss": 5.2471, - "step": 37035 - }, - { - "epoch": 19.314732724902218, - "grad_norm": 1.5203739404678345, - "learning_rate": 6.329346733668342e-05, - "loss": 4.7781, - "step": 37036 - }, - { - "epoch": 19.315254237288137, - "grad_norm": 1.5239968299865723, - "learning_rate": 6.329246231155779e-05, - "loss": 5.3128, - "step": 37037 - }, - { - "epoch": 19.315775749674053, - "grad_norm": 1.4645259380340576, - "learning_rate": 6.329145728643216e-05, - "loss": 5.3126, - "step": 37038 - }, - { - "epoch": 19.316297262059972, - "grad_norm": 1.5467439889907837, - "learning_rate": 6.329045226130653e-05, - "loss": 5.4619, - "step": 37039 - }, - { - "epoch": 19.316818774445892, - "grad_norm": 1.5318442583084106, - "learning_rate": 6.32894472361809e-05, - "loss": 5.3047, - "step": 37040 - }, - { - "epoch": 19.31734028683181, - "grad_norm": 1.5713196992874146, - "learning_rate": 6.328844221105527e-05, - "loss": 5.1694, - "step": 37041 - }, - { - "epoch": 19.31786179921773, - "grad_norm": 1.5806161165237427, - "learning_rate": 6.328743718592965e-05, - "loss": 5.6307, - "step": 37042 - }, - { - "epoch": 19.31838331160365, - "grad_norm": 1.5741537809371948, - "learning_rate": 6.328643216080403e-05, - "loss": 5.3628, - "step": 37043 - }, - { - "epoch": 19.31890482398957, - "grad_norm": 1.5723493099212646, - "learning_rate": 6.32854271356784e-05, - "loss": 5.5148, - "step": 37044 - }, - { - "epoch": 19.31942633637549, - "grad_norm": 1.5280975103378296, - "learning_rate": 6.328442211055277e-05, - "loss": 5.2795, - "step": 37045 - }, - { - "epoch": 19.31994784876141, - "grad_norm": 1.6120126247406006, - "learning_rate": 6.328341708542715e-05, - "loss": 4.9336, - "step": 37046 - }, - { - "epoch": 19.320469361147328, - "grad_norm": 1.616157054901123, - "learning_rate": 6.328241206030151e-05, - "loss": 5.4347, - "step": 37047 - }, - { - "epoch": 19.320990873533248, - "grad_norm": 1.4936193227767944, - "learning_rate": 6.328140703517588e-05, - "loss": 5.3858, - "step": 37048 - }, - { - "epoch": 19.321512385919167, - "grad_norm": 1.510345697402954, - "learning_rate": 6.328040201005025e-05, - "loss": 4.8475, - "step": 37049 - }, - { - "epoch": 19.322033898305083, - "grad_norm": 1.5535348653793335, - "learning_rate": 6.327939698492462e-05, - "loss": 5.1352, - "step": 37050 - }, - { - "epoch": 19.322555410691002, - "grad_norm": 1.4663493633270264, - "learning_rate": 6.3278391959799e-05, - "loss": 5.141, - "step": 37051 - }, - { - "epoch": 19.323076923076922, - "grad_norm": 1.5262188911437988, - "learning_rate": 6.327738693467337e-05, - "loss": 5.1044, - "step": 37052 - }, - { - "epoch": 19.32359843546284, - "grad_norm": 1.391670823097229, - "learning_rate": 6.327638190954775e-05, - "loss": 5.6062, - "step": 37053 - }, - { - "epoch": 19.32411994784876, - "grad_norm": 1.5654526948928833, - "learning_rate": 6.327537688442211e-05, - "loss": 5.4623, - "step": 37054 - }, - { - "epoch": 19.32464146023468, - "grad_norm": 1.4188475608825684, - "learning_rate": 6.327437185929649e-05, - "loss": 5.0733, - "step": 37055 - }, - { - "epoch": 19.3251629726206, - "grad_norm": 1.451438546180725, - "learning_rate": 6.327336683417086e-05, - "loss": 5.2397, - "step": 37056 - }, - { - "epoch": 19.32568448500652, - "grad_norm": 1.5199898481369019, - "learning_rate": 6.327236180904523e-05, - "loss": 5.1675, - "step": 37057 - }, - { - "epoch": 19.32620599739244, - "grad_norm": 1.4219231605529785, - "learning_rate": 6.32713567839196e-05, - "loss": 4.5553, - "step": 37058 - }, - { - "epoch": 19.326727509778358, - "grad_norm": 1.477130651473999, - "learning_rate": 6.327035175879398e-05, - "loss": 5.2435, - "step": 37059 - }, - { - "epoch": 19.327249022164278, - "grad_norm": 1.5212773084640503, - "learning_rate": 6.326934673366834e-05, - "loss": 5.2433, - "step": 37060 - }, - { - "epoch": 19.327770534550197, - "grad_norm": 1.4492864608764648, - "learning_rate": 6.326834170854272e-05, - "loss": 4.9764, - "step": 37061 - }, - { - "epoch": 19.328292046936113, - "grad_norm": 1.5227199792861938, - "learning_rate": 6.326733668341708e-05, - "loss": 5.5993, - "step": 37062 - }, - { - "epoch": 19.328813559322032, - "grad_norm": 1.3341858386993408, - "learning_rate": 6.326633165829146e-05, - "loss": 5.3899, - "step": 37063 - }, - { - "epoch": 19.329335071707952, - "grad_norm": 1.4651274681091309, - "learning_rate": 6.326532663316584e-05, - "loss": 5.3836, - "step": 37064 - }, - { - "epoch": 19.32985658409387, - "grad_norm": 1.557844877243042, - "learning_rate": 6.32643216080402e-05, - "loss": 5.8565, - "step": 37065 - }, - { - "epoch": 19.33037809647979, - "grad_norm": 1.4713866710662842, - "learning_rate": 6.326331658291458e-05, - "loss": 5.0659, - "step": 37066 - }, - { - "epoch": 19.33089960886571, - "grad_norm": 1.4952170848846436, - "learning_rate": 6.326231155778895e-05, - "loss": 5.5119, - "step": 37067 - }, - { - "epoch": 19.33142112125163, - "grad_norm": 1.4446187019348145, - "learning_rate": 6.326130653266332e-05, - "loss": 5.23, - "step": 37068 - }, - { - "epoch": 19.33194263363755, - "grad_norm": 1.3665767908096313, - "learning_rate": 6.326030150753769e-05, - "loss": 5.5613, - "step": 37069 - }, - { - "epoch": 19.33246414602347, - "grad_norm": 1.4433425664901733, - "learning_rate": 6.325929648241207e-05, - "loss": 5.3037, - "step": 37070 - }, - { - "epoch": 19.332985658409388, - "grad_norm": 1.4764549732208252, - "learning_rate": 6.325829145728643e-05, - "loss": 5.4244, - "step": 37071 - }, - { - "epoch": 19.333507170795308, - "grad_norm": 1.4227054119110107, - "learning_rate": 6.325728643216081e-05, - "loss": 5.5109, - "step": 37072 - }, - { - "epoch": 19.334028683181227, - "grad_norm": 1.590537667274475, - "learning_rate": 6.325628140703517e-05, - "loss": 4.9628, - "step": 37073 - }, - { - "epoch": 19.334550195567143, - "grad_norm": 1.5459904670715332, - "learning_rate": 6.325527638190955e-05, - "loss": 5.4186, - "step": 37074 - }, - { - "epoch": 19.335071707953063, - "grad_norm": 1.4941155910491943, - "learning_rate": 6.325427135678393e-05, - "loss": 5.5436, - "step": 37075 - }, - { - "epoch": 19.335593220338982, - "grad_norm": 1.5174111127853394, - "learning_rate": 6.325326633165829e-05, - "loss": 5.5859, - "step": 37076 - }, - { - "epoch": 19.3361147327249, - "grad_norm": 1.3993632793426514, - "learning_rate": 6.325226130653267e-05, - "loss": 5.5369, - "step": 37077 - }, - { - "epoch": 19.33663624511082, - "grad_norm": 1.520076036453247, - "learning_rate": 6.325125628140703e-05, - "loss": 4.9423, - "step": 37078 - }, - { - "epoch": 19.33715775749674, - "grad_norm": 1.476530909538269, - "learning_rate": 6.325025125628141e-05, - "loss": 5.491, - "step": 37079 - }, - { - "epoch": 19.33767926988266, - "grad_norm": 1.497899055480957, - "learning_rate": 6.324924623115578e-05, - "loss": 5.3347, - "step": 37080 - }, - { - "epoch": 19.33820078226858, - "grad_norm": 1.6110990047454834, - "learning_rate": 6.324824120603015e-05, - "loss": 5.0702, - "step": 37081 - }, - { - "epoch": 19.3387222946545, - "grad_norm": 1.511828064918518, - "learning_rate": 6.324723618090452e-05, - "loss": 4.9993, - "step": 37082 - }, - { - "epoch": 19.33924380704042, - "grad_norm": 1.3735371828079224, - "learning_rate": 6.32462311557789e-05, - "loss": 5.8972, - "step": 37083 - }, - { - "epoch": 19.339765319426338, - "grad_norm": 1.4447212219238281, - "learning_rate": 6.324522613065327e-05, - "loss": 5.4688, - "step": 37084 - }, - { - "epoch": 19.340286831812257, - "grad_norm": 1.4063836336135864, - "learning_rate": 6.324422110552765e-05, - "loss": 5.2246, - "step": 37085 - }, - { - "epoch": 19.340808344198173, - "grad_norm": 1.4194180965423584, - "learning_rate": 6.324321608040202e-05, - "loss": 5.5086, - "step": 37086 - }, - { - "epoch": 19.341329856584093, - "grad_norm": 1.440015196800232, - "learning_rate": 6.32422110552764e-05, - "loss": 5.0298, - "step": 37087 - }, - { - "epoch": 19.341851368970012, - "grad_norm": 1.549672245979309, - "learning_rate": 6.324120603015076e-05, - "loss": 5.1629, - "step": 37088 - }, - { - "epoch": 19.34237288135593, - "grad_norm": 1.545289158821106, - "learning_rate": 6.324020100502512e-05, - "loss": 5.3528, - "step": 37089 - }, - { - "epoch": 19.34289439374185, - "grad_norm": 1.5546116828918457, - "learning_rate": 6.32391959798995e-05, - "loss": 5.4503, - "step": 37090 - }, - { - "epoch": 19.34341590612777, - "grad_norm": 1.4881292581558228, - "learning_rate": 6.323819095477386e-05, - "loss": 5.4775, - "step": 37091 - }, - { - "epoch": 19.34393741851369, - "grad_norm": 1.4726693630218506, - "learning_rate": 6.323718592964824e-05, - "loss": 5.452, - "step": 37092 - }, - { - "epoch": 19.34445893089961, - "grad_norm": 1.5209449529647827, - "learning_rate": 6.32361809045226e-05, - "loss": 5.2722, - "step": 37093 - }, - { - "epoch": 19.34498044328553, - "grad_norm": 1.5259023904800415, - "learning_rate": 6.323517587939698e-05, - "loss": 5.3292, - "step": 37094 - }, - { - "epoch": 19.34550195567145, - "grad_norm": 1.4960318803787231, - "learning_rate": 6.323417085427136e-05, - "loss": 5.4169, - "step": 37095 - }, - { - "epoch": 19.346023468057368, - "grad_norm": 1.6557385921478271, - "learning_rate": 6.323316582914574e-05, - "loss": 5.1099, - "step": 37096 - }, - { - "epoch": 19.346544980443287, - "grad_norm": 1.5973485708236694, - "learning_rate": 6.32321608040201e-05, - "loss": 4.8045, - "step": 37097 - }, - { - "epoch": 19.347066492829203, - "grad_norm": 1.703015923500061, - "learning_rate": 6.323115577889448e-05, - "loss": 4.6729, - "step": 37098 - }, - { - "epoch": 19.347588005215123, - "grad_norm": 1.5253891944885254, - "learning_rate": 6.323015075376885e-05, - "loss": 5.37, - "step": 37099 - }, - { - "epoch": 19.348109517601042, - "grad_norm": 1.587241768836975, - "learning_rate": 6.322914572864322e-05, - "loss": 5.337, - "step": 37100 - }, - { - "epoch": 19.34863102998696, - "grad_norm": 1.5907340049743652, - "learning_rate": 6.322814070351759e-05, - "loss": 5.0856, - "step": 37101 - }, - { - "epoch": 19.34915254237288, - "grad_norm": 1.68035888671875, - "learning_rate": 6.322713567839195e-05, - "loss": 5.2875, - "step": 37102 - }, - { - "epoch": 19.3496740547588, - "grad_norm": 1.4841622114181519, - "learning_rate": 6.322613065326633e-05, - "loss": 4.9518, - "step": 37103 - }, - { - "epoch": 19.35019556714472, - "grad_norm": 1.5364917516708374, - "learning_rate": 6.322512562814071e-05, - "loss": 5.4003, - "step": 37104 - }, - { - "epoch": 19.35071707953064, - "grad_norm": 1.4059512615203857, - "learning_rate": 6.322412060301509e-05, - "loss": 5.2571, - "step": 37105 - }, - { - "epoch": 19.35123859191656, - "grad_norm": 1.6129995584487915, - "learning_rate": 6.322311557788945e-05, - "loss": 4.8882, - "step": 37106 - }, - { - "epoch": 19.35176010430248, - "grad_norm": 1.5219221115112305, - "learning_rate": 6.322211055276383e-05, - "loss": 5.4591, - "step": 37107 - }, - { - "epoch": 19.352281616688398, - "grad_norm": 1.5046744346618652, - "learning_rate": 6.322110552763819e-05, - "loss": 5.1932, - "step": 37108 - }, - { - "epoch": 19.352803129074317, - "grad_norm": 1.640858769416809, - "learning_rate": 6.322010050251257e-05, - "loss": 4.9884, - "step": 37109 - }, - { - "epoch": 19.353324641460233, - "grad_norm": 1.5399210453033447, - "learning_rate": 6.321909547738693e-05, - "loss": 5.4409, - "step": 37110 - }, - { - "epoch": 19.353846153846153, - "grad_norm": 1.4734032154083252, - "learning_rate": 6.321809045226131e-05, - "loss": 5.5055, - "step": 37111 - }, - { - "epoch": 19.354367666232072, - "grad_norm": 1.6703662872314453, - "learning_rate": 6.321708542713568e-05, - "loss": 5.5863, - "step": 37112 - }, - { - "epoch": 19.35488917861799, - "grad_norm": 1.5822257995605469, - "learning_rate": 6.321608040201005e-05, - "loss": 5.2479, - "step": 37113 - }, - { - "epoch": 19.35541069100391, - "grad_norm": 1.5929591655731201, - "learning_rate": 6.321507537688442e-05, - "loss": 5.2665, - "step": 37114 - }, - { - "epoch": 19.35593220338983, - "grad_norm": 1.5502634048461914, - "learning_rate": 6.32140703517588e-05, - "loss": 5.4112, - "step": 37115 - }, - { - "epoch": 19.35645371577575, - "grad_norm": 1.504163146018982, - "learning_rate": 6.321306532663317e-05, - "loss": 5.7628, - "step": 37116 - }, - { - "epoch": 19.35697522816167, - "grad_norm": 1.5411347150802612, - "learning_rate": 6.321206030150754e-05, - "loss": 5.441, - "step": 37117 - }, - { - "epoch": 19.35749674054759, - "grad_norm": 1.5393457412719727, - "learning_rate": 6.321105527638192e-05, - "loss": 5.4607, - "step": 37118 - }, - { - "epoch": 19.35801825293351, - "grad_norm": 1.5102952718734741, - "learning_rate": 6.321005025125628e-05, - "loss": 5.0934, - "step": 37119 - }, - { - "epoch": 19.358539765319428, - "grad_norm": 1.6449638605117798, - "learning_rate": 6.320904522613066e-05, - "loss": 5.2486, - "step": 37120 - }, - { - "epoch": 19.359061277705347, - "grad_norm": 1.4409879446029663, - "learning_rate": 6.320804020100502e-05, - "loss": 5.6407, - "step": 37121 - }, - { - "epoch": 19.359582790091263, - "grad_norm": 1.5536202192306519, - "learning_rate": 6.32070351758794e-05, - "loss": 5.259, - "step": 37122 - }, - { - "epoch": 19.360104302477183, - "grad_norm": 1.4153131246566772, - "learning_rate": 6.320603015075376e-05, - "loss": 5.1754, - "step": 37123 - }, - { - "epoch": 19.360625814863102, - "grad_norm": 1.4174671173095703, - "learning_rate": 6.320502512562814e-05, - "loss": 5.3541, - "step": 37124 - }, - { - "epoch": 19.36114732724902, - "grad_norm": 1.5260249376296997, - "learning_rate": 6.320402010050252e-05, - "loss": 4.8405, - "step": 37125 - }, - { - "epoch": 19.36166883963494, - "grad_norm": 1.4917182922363281, - "learning_rate": 6.32030150753769e-05, - "loss": 5.4925, - "step": 37126 - }, - { - "epoch": 19.36219035202086, - "grad_norm": 1.599554181098938, - "learning_rate": 6.320201005025126e-05, - "loss": 5.2647, - "step": 37127 - }, - { - "epoch": 19.36271186440678, - "grad_norm": 1.5066841840744019, - "learning_rate": 6.320100502512563e-05, - "loss": 5.1892, - "step": 37128 - }, - { - "epoch": 19.3632333767927, - "grad_norm": 1.503145694732666, - "learning_rate": 6.32e-05, - "loss": 4.9431, - "step": 37129 - }, - { - "epoch": 19.36375488917862, - "grad_norm": 1.4827722311019897, - "learning_rate": 6.319899497487437e-05, - "loss": 4.7366, - "step": 37130 - }, - { - "epoch": 19.36427640156454, - "grad_norm": 1.4827415943145752, - "learning_rate": 6.319798994974875e-05, - "loss": 5.32, - "step": 37131 - }, - { - "epoch": 19.364797913950458, - "grad_norm": 1.5232783555984497, - "learning_rate": 6.319698492462311e-05, - "loss": 5.3045, - "step": 37132 - }, - { - "epoch": 19.365319426336377, - "grad_norm": 1.5192553997039795, - "learning_rate": 6.319597989949749e-05, - "loss": 4.8675, - "step": 37133 - }, - { - "epoch": 19.365840938722293, - "grad_norm": 1.5345357656478882, - "learning_rate": 6.319497487437185e-05, - "loss": 5.319, - "step": 37134 - }, - { - "epoch": 19.366362451108213, - "grad_norm": 1.5717198848724365, - "learning_rate": 6.319396984924623e-05, - "loss": 5.2626, - "step": 37135 - }, - { - "epoch": 19.366883963494132, - "grad_norm": 1.5151216983795166, - "learning_rate": 6.319296482412061e-05, - "loss": 5.2011, - "step": 37136 - }, - { - "epoch": 19.36740547588005, - "grad_norm": 1.580197811126709, - "learning_rate": 6.319195979899499e-05, - "loss": 5.0186, - "step": 37137 - }, - { - "epoch": 19.36792698826597, - "grad_norm": 1.4931432008743286, - "learning_rate": 6.319095477386935e-05, - "loss": 5.0168, - "step": 37138 - }, - { - "epoch": 19.36844850065189, - "grad_norm": 1.5474936962127686, - "learning_rate": 6.318994974874373e-05, - "loss": 4.8129, - "step": 37139 - }, - { - "epoch": 19.36897001303781, - "grad_norm": 1.4531172513961792, - "learning_rate": 6.318894472361809e-05, - "loss": 5.4508, - "step": 37140 - }, - { - "epoch": 19.36949152542373, - "grad_norm": 1.5608268976211548, - "learning_rate": 6.318793969849246e-05, - "loss": 5.1669, - "step": 37141 - }, - { - "epoch": 19.37001303780965, - "grad_norm": 1.523646593093872, - "learning_rate": 6.318693467336684e-05, - "loss": 5.4248, - "step": 37142 - }, - { - "epoch": 19.37053455019557, - "grad_norm": 1.5354210138320923, - "learning_rate": 6.31859296482412e-05, - "loss": 5.3409, - "step": 37143 - }, - { - "epoch": 19.371056062581488, - "grad_norm": 1.5741533041000366, - "learning_rate": 6.318492462311558e-05, - "loss": 5.0989, - "step": 37144 - }, - { - "epoch": 19.371577574967404, - "grad_norm": 1.6116901636123657, - "learning_rate": 6.318391959798996e-05, - "loss": 5.3771, - "step": 37145 - }, - { - "epoch": 19.372099087353323, - "grad_norm": 1.4636410474777222, - "learning_rate": 6.318291457286433e-05, - "loss": 5.7453, - "step": 37146 - }, - { - "epoch": 19.372620599739243, - "grad_norm": 1.532753348350525, - "learning_rate": 6.31819095477387e-05, - "loss": 5.1635, - "step": 37147 - }, - { - "epoch": 19.373142112125162, - "grad_norm": 1.4354628324508667, - "learning_rate": 6.318090452261308e-05, - "loss": 5.078, - "step": 37148 - }, - { - "epoch": 19.37366362451108, - "grad_norm": 1.4830875396728516, - "learning_rate": 6.317989949748744e-05, - "loss": 5.2865, - "step": 37149 - }, - { - "epoch": 19.374185136897, - "grad_norm": 1.390494465827942, - "learning_rate": 6.317889447236182e-05, - "loss": 4.8135, - "step": 37150 - }, - { - "epoch": 19.37470664928292, - "grad_norm": 1.4069291353225708, - "learning_rate": 6.317788944723618e-05, - "loss": 5.5019, - "step": 37151 - }, - { - "epoch": 19.37522816166884, - "grad_norm": 1.520014762878418, - "learning_rate": 6.317688442211056e-05, - "loss": 5.469, - "step": 37152 - }, - { - "epoch": 19.37574967405476, - "grad_norm": 1.543834924697876, - "learning_rate": 6.317587939698492e-05, - "loss": 4.9698, - "step": 37153 - }, - { - "epoch": 19.37627118644068, - "grad_norm": 1.4796831607818604, - "learning_rate": 6.31748743718593e-05, - "loss": 5.5475, - "step": 37154 - }, - { - "epoch": 19.3767926988266, - "grad_norm": 1.471279263496399, - "learning_rate": 6.317386934673367e-05, - "loss": 4.8241, - "step": 37155 - }, - { - "epoch": 19.377314211212518, - "grad_norm": 1.569157361984253, - "learning_rate": 6.317286432160804e-05, - "loss": 5.7546, - "step": 37156 - }, - { - "epoch": 19.377835723598434, - "grad_norm": 1.5846130847930908, - "learning_rate": 6.317185929648242e-05, - "loss": 5.2128, - "step": 37157 - }, - { - "epoch": 19.378357235984353, - "grad_norm": 1.6638171672821045, - "learning_rate": 6.317085427135679e-05, - "loss": 5.3114, - "step": 37158 - }, - { - "epoch": 19.378878748370273, - "grad_norm": 1.4874213933944702, - "learning_rate": 6.316984924623116e-05, - "loss": 4.9927, - "step": 37159 - }, - { - "epoch": 19.379400260756192, - "grad_norm": 1.6096141338348389, - "learning_rate": 6.316884422110553e-05, - "loss": 4.9932, - "step": 37160 - }, - { - "epoch": 19.37992177314211, - "grad_norm": 1.4527568817138672, - "learning_rate": 6.31678391959799e-05, - "loss": 5.1799, - "step": 37161 - }, - { - "epoch": 19.38044328552803, - "grad_norm": 1.3724361658096313, - "learning_rate": 6.316683417085427e-05, - "loss": 5.3762, - "step": 37162 - }, - { - "epoch": 19.38096479791395, - "grad_norm": 1.448685884475708, - "learning_rate": 6.316582914572865e-05, - "loss": 5.766, - "step": 37163 - }, - { - "epoch": 19.38148631029987, - "grad_norm": 1.5163205862045288, - "learning_rate": 6.316482412060301e-05, - "loss": 5.1211, - "step": 37164 - }, - { - "epoch": 19.38200782268579, - "grad_norm": 1.5153307914733887, - "learning_rate": 6.316381909547739e-05, - "loss": 5.5643, - "step": 37165 - }, - { - "epoch": 19.38252933507171, - "grad_norm": 1.5981110334396362, - "learning_rate": 6.316281407035177e-05, - "loss": 4.7278, - "step": 37166 - }, - { - "epoch": 19.38305084745763, - "grad_norm": 1.5576859712600708, - "learning_rate": 6.316180904522615e-05, - "loss": 5.7755, - "step": 37167 - }, - { - "epoch": 19.383572359843548, - "grad_norm": 1.4697257280349731, - "learning_rate": 6.316080402010051e-05, - "loss": 5.3523, - "step": 37168 - }, - { - "epoch": 19.384093872229464, - "grad_norm": 1.5471277236938477, - "learning_rate": 6.315979899497487e-05, - "loss": 4.5826, - "step": 37169 - }, - { - "epoch": 19.384615384615383, - "grad_norm": 1.4236682653427124, - "learning_rate": 6.315879396984925e-05, - "loss": 5.5613, - "step": 37170 - }, - { - "epoch": 19.385136897001303, - "grad_norm": 1.5070534944534302, - "learning_rate": 6.315778894472362e-05, - "loss": 5.3282, - "step": 37171 - }, - { - "epoch": 19.385658409387222, - "grad_norm": 1.4455013275146484, - "learning_rate": 6.3156783919598e-05, - "loss": 5.2175, - "step": 37172 - }, - { - "epoch": 19.38617992177314, - "grad_norm": 1.4438649415969849, - "learning_rate": 6.315577889447236e-05, - "loss": 5.5264, - "step": 37173 - }, - { - "epoch": 19.38670143415906, - "grad_norm": 1.523566722869873, - "learning_rate": 6.315477386934674e-05, - "loss": 5.3955, - "step": 37174 - }, - { - "epoch": 19.38722294654498, - "grad_norm": 1.515112280845642, - "learning_rate": 6.31537688442211e-05, - "loss": 5.424, - "step": 37175 - }, - { - "epoch": 19.3877444589309, - "grad_norm": 1.6169953346252441, - "learning_rate": 6.315276381909548e-05, - "loss": 5.0866, - "step": 37176 - }, - { - "epoch": 19.38826597131682, - "grad_norm": 1.5474475622177124, - "learning_rate": 6.315175879396986e-05, - "loss": 5.1086, - "step": 37177 - }, - { - "epoch": 19.38878748370274, - "grad_norm": 1.5691883563995361, - "learning_rate": 6.315075376884423e-05, - "loss": 5.3893, - "step": 37178 - }, - { - "epoch": 19.38930899608866, - "grad_norm": 1.4614287614822388, - "learning_rate": 6.31497487437186e-05, - "loss": 4.8538, - "step": 37179 - }, - { - "epoch": 19.389830508474578, - "grad_norm": 1.499411940574646, - "learning_rate": 6.314874371859298e-05, - "loss": 5.592, - "step": 37180 - }, - { - "epoch": 19.390352020860494, - "grad_norm": 1.5720584392547607, - "learning_rate": 6.314773869346734e-05, - "loss": 5.2985, - "step": 37181 - }, - { - "epoch": 19.390873533246413, - "grad_norm": 1.6698482036590576, - "learning_rate": 6.31467336683417e-05, - "loss": 5.2644, - "step": 37182 - }, - { - "epoch": 19.391395045632333, - "grad_norm": 1.524125337600708, - "learning_rate": 6.314572864321608e-05, - "loss": 5.1625, - "step": 37183 - }, - { - "epoch": 19.391916558018252, - "grad_norm": 1.497400164604187, - "learning_rate": 6.314472361809045e-05, - "loss": 5.0787, - "step": 37184 - }, - { - "epoch": 19.39243807040417, - "grad_norm": 1.393250823020935, - "learning_rate": 6.314371859296482e-05, - "loss": 4.6639, - "step": 37185 - }, - { - "epoch": 19.39295958279009, - "grad_norm": 1.509372353553772, - "learning_rate": 6.31427135678392e-05, - "loss": 5.2969, - "step": 37186 - }, - { - "epoch": 19.39348109517601, - "grad_norm": 1.5587784051895142, - "learning_rate": 6.314170854271358e-05, - "loss": 5.5173, - "step": 37187 - }, - { - "epoch": 19.39400260756193, - "grad_norm": 1.4776206016540527, - "learning_rate": 6.314070351758794e-05, - "loss": 5.4593, - "step": 37188 - }, - { - "epoch": 19.39452411994785, - "grad_norm": 1.5518769025802612, - "learning_rate": 6.313969849246232e-05, - "loss": 4.6403, - "step": 37189 - }, - { - "epoch": 19.39504563233377, - "grad_norm": 1.533639669418335, - "learning_rate": 6.313869346733669e-05, - "loss": 5.1828, - "step": 37190 - }, - { - "epoch": 19.39556714471969, - "grad_norm": 1.6061030626296997, - "learning_rate": 6.313768844221106e-05, - "loss": 4.6874, - "step": 37191 - }, - { - "epoch": 19.396088657105608, - "grad_norm": 1.4598623514175415, - "learning_rate": 6.313668341708543e-05, - "loss": 5.1044, - "step": 37192 - }, - { - "epoch": 19.396610169491524, - "grad_norm": 1.4822458028793335, - "learning_rate": 6.31356783919598e-05, - "loss": 5.5344, - "step": 37193 - }, - { - "epoch": 19.397131681877443, - "grad_norm": 1.5150407552719116, - "learning_rate": 6.313467336683417e-05, - "loss": 5.1483, - "step": 37194 - }, - { - "epoch": 19.397653194263363, - "grad_norm": 1.5336297750473022, - "learning_rate": 6.313366834170853e-05, - "loss": 5.3455, - "step": 37195 - }, - { - "epoch": 19.398174706649282, - "grad_norm": 1.4362671375274658, - "learning_rate": 6.313266331658291e-05, - "loss": 5.544, - "step": 37196 - }, - { - "epoch": 19.3986962190352, - "grad_norm": 1.4620648622512817, - "learning_rate": 6.313165829145729e-05, - "loss": 5.1571, - "step": 37197 - }, - { - "epoch": 19.39921773142112, - "grad_norm": 1.4666802883148193, - "learning_rate": 6.313065326633167e-05, - "loss": 5.0899, - "step": 37198 - }, - { - "epoch": 19.39973924380704, - "grad_norm": 1.4427659511566162, - "learning_rate": 6.312964824120603e-05, - "loss": 5.1333, - "step": 37199 - }, - { - "epoch": 19.40026075619296, - "grad_norm": 1.452598214149475, - "learning_rate": 6.312864321608041e-05, - "loss": 4.9409, - "step": 37200 - }, - { - "epoch": 19.40078226857888, - "grad_norm": 1.655092716217041, - "learning_rate": 6.312763819095477e-05, - "loss": 5.014, - "step": 37201 - }, - { - "epoch": 19.4013037809648, - "grad_norm": 1.5137276649475098, - "learning_rate": 6.312663316582915e-05, - "loss": 5.1864, - "step": 37202 - }, - { - "epoch": 19.40182529335072, - "grad_norm": 1.560375690460205, - "learning_rate": 6.312562814070352e-05, - "loss": 5.4316, - "step": 37203 - }, - { - "epoch": 19.402346805736638, - "grad_norm": 1.5381048917770386, - "learning_rate": 6.31246231155779e-05, - "loss": 4.7365, - "step": 37204 - }, - { - "epoch": 19.402868318122554, - "grad_norm": 1.4114108085632324, - "learning_rate": 6.312361809045226e-05, - "loss": 5.9943, - "step": 37205 - }, - { - "epoch": 19.403389830508473, - "grad_norm": 1.756009578704834, - "learning_rate": 6.312261306532664e-05, - "loss": 5.3833, - "step": 37206 - }, - { - "epoch": 19.403911342894393, - "grad_norm": 1.4191328287124634, - "learning_rate": 6.312160804020101e-05, - "loss": 4.8109, - "step": 37207 - }, - { - "epoch": 19.404432855280312, - "grad_norm": 1.4172736406326294, - "learning_rate": 6.312060301507538e-05, - "loss": 5.4628, - "step": 37208 - }, - { - "epoch": 19.40495436766623, - "grad_norm": 1.6597981452941895, - "learning_rate": 6.311959798994976e-05, - "loss": 4.8706, - "step": 37209 - }, - { - "epoch": 19.40547588005215, - "grad_norm": 1.5348907709121704, - "learning_rate": 6.311859296482412e-05, - "loss": 5.3727, - "step": 37210 - }, - { - "epoch": 19.40599739243807, - "grad_norm": 1.5799050331115723, - "learning_rate": 6.31175879396985e-05, - "loss": 5.4354, - "step": 37211 - }, - { - "epoch": 19.40651890482399, - "grad_norm": 1.5251089334487915, - "learning_rate": 6.311658291457286e-05, - "loss": 5.3834, - "step": 37212 - }, - { - "epoch": 19.40704041720991, - "grad_norm": 1.4434407949447632, - "learning_rate": 6.311557788944724e-05, - "loss": 5.3558, - "step": 37213 - }, - { - "epoch": 19.40756192959583, - "grad_norm": 1.5814133882522583, - "learning_rate": 6.31145728643216e-05, - "loss": 5.2056, - "step": 37214 - }, - { - "epoch": 19.40808344198175, - "grad_norm": 1.609458088874817, - "learning_rate": 6.311356783919598e-05, - "loss": 5.6505, - "step": 37215 - }, - { - "epoch": 19.408604954367668, - "grad_norm": 1.6148344278335571, - "learning_rate": 6.311256281407035e-05, - "loss": 5.252, - "step": 37216 - }, - { - "epoch": 19.409126466753584, - "grad_norm": 1.559738278388977, - "learning_rate": 6.311155778894473e-05, - "loss": 5.2684, - "step": 37217 - }, - { - "epoch": 19.409647979139503, - "grad_norm": 1.470294713973999, - "learning_rate": 6.31105527638191e-05, - "loss": 5.3866, - "step": 37218 - }, - { - "epoch": 19.410169491525423, - "grad_norm": 1.3613698482513428, - "learning_rate": 6.310954773869348e-05, - "loss": 5.3841, - "step": 37219 - }, - { - "epoch": 19.410691003911342, - "grad_norm": 1.487229824066162, - "learning_rate": 6.310854271356785e-05, - "loss": 5.0261, - "step": 37220 - }, - { - "epoch": 19.41121251629726, - "grad_norm": 1.6053881645202637, - "learning_rate": 6.310753768844221e-05, - "loss": 5.3562, - "step": 37221 - }, - { - "epoch": 19.41173402868318, - "grad_norm": 1.5299992561340332, - "learning_rate": 6.310653266331659e-05, - "loss": 5.3561, - "step": 37222 - }, - { - "epoch": 19.4122555410691, - "grad_norm": 1.5384016036987305, - "learning_rate": 6.310552763819095e-05, - "loss": 4.6831, - "step": 37223 - }, - { - "epoch": 19.41277705345502, - "grad_norm": 1.5678869485855103, - "learning_rate": 6.310452261306533e-05, - "loss": 5.1611, - "step": 37224 - }, - { - "epoch": 19.41329856584094, - "grad_norm": 1.4890213012695312, - "learning_rate": 6.31035175879397e-05, - "loss": 5.0934, - "step": 37225 - }, - { - "epoch": 19.41382007822686, - "grad_norm": 1.4977561235427856, - "learning_rate": 6.310251256281407e-05, - "loss": 5.1157, - "step": 37226 - }, - { - "epoch": 19.41434159061278, - "grad_norm": 1.5159685611724854, - "learning_rate": 6.310150753768844e-05, - "loss": 5.5567, - "step": 37227 - }, - { - "epoch": 19.414863102998694, - "grad_norm": 1.5723031759262085, - "learning_rate": 6.310050251256281e-05, - "loss": 5.4754, - "step": 37228 - }, - { - "epoch": 19.415384615384614, - "grad_norm": 1.5406476259231567, - "learning_rate": 6.309949748743719e-05, - "loss": 5.2093, - "step": 37229 - }, - { - "epoch": 19.415906127770533, - "grad_norm": 1.4024009704589844, - "learning_rate": 6.309849246231157e-05, - "loss": 5.265, - "step": 37230 - }, - { - "epoch": 19.416427640156453, - "grad_norm": 1.4275270700454712, - "learning_rate": 6.309748743718593e-05, - "loss": 4.9464, - "step": 37231 - }, - { - "epoch": 19.416949152542372, - "grad_norm": 1.5062472820281982, - "learning_rate": 6.309648241206031e-05, - "loss": 5.0283, - "step": 37232 - }, - { - "epoch": 19.41747066492829, - "grad_norm": 1.4368422031402588, - "learning_rate": 6.309547738693468e-05, - "loss": 5.4871, - "step": 37233 - }, - { - "epoch": 19.41799217731421, - "grad_norm": 1.6018155813217163, - "learning_rate": 6.309447236180904e-05, - "loss": 5.2045, - "step": 37234 - }, - { - "epoch": 19.41851368970013, - "grad_norm": 1.5483193397521973, - "learning_rate": 6.309346733668342e-05, - "loss": 5.1693, - "step": 37235 - }, - { - "epoch": 19.41903520208605, - "grad_norm": 1.5866891145706177, - "learning_rate": 6.309246231155778e-05, - "loss": 5.3571, - "step": 37236 - }, - { - "epoch": 19.41955671447197, - "grad_norm": 1.5753493309020996, - "learning_rate": 6.309145728643216e-05, - "loss": 5.2074, - "step": 37237 - }, - { - "epoch": 19.42007822685789, - "grad_norm": 1.5064932107925415, - "learning_rate": 6.309045226130654e-05, - "loss": 5.521, - "step": 37238 - }, - { - "epoch": 19.42059973924381, - "grad_norm": 1.583794355392456, - "learning_rate": 6.308944723618092e-05, - "loss": 5.6401, - "step": 37239 - }, - { - "epoch": 19.421121251629724, - "grad_norm": 1.7183427810668945, - "learning_rate": 6.308844221105528e-05, - "loss": 5.0604, - "step": 37240 - }, - { - "epoch": 19.421642764015644, - "grad_norm": 1.4739699363708496, - "learning_rate": 6.308743718592966e-05, - "loss": 5.6901, - "step": 37241 - }, - { - "epoch": 19.422164276401563, - "grad_norm": 1.602225661277771, - "learning_rate": 6.308643216080402e-05, - "loss": 5.2367, - "step": 37242 - }, - { - "epoch": 19.422685788787483, - "grad_norm": 1.4460948705673218, - "learning_rate": 6.30854271356784e-05, - "loss": 5.6569, - "step": 37243 - }, - { - "epoch": 19.423207301173402, - "grad_norm": 1.569582223892212, - "learning_rate": 6.308442211055276e-05, - "loss": 5.5332, - "step": 37244 - }, - { - "epoch": 19.423728813559322, - "grad_norm": 1.575559139251709, - "learning_rate": 6.308341708542714e-05, - "loss": 5.565, - "step": 37245 - }, - { - "epoch": 19.42425032594524, - "grad_norm": 1.5398699045181274, - "learning_rate": 6.30824120603015e-05, - "loss": 5.384, - "step": 37246 - }, - { - "epoch": 19.42477183833116, - "grad_norm": 1.5141342878341675, - "learning_rate": 6.308140703517588e-05, - "loss": 5.3436, - "step": 37247 - }, - { - "epoch": 19.42529335071708, - "grad_norm": 1.5421720743179321, - "learning_rate": 6.308040201005025e-05, - "loss": 5.4526, - "step": 37248 - }, - { - "epoch": 19.425814863103, - "grad_norm": 1.475429654121399, - "learning_rate": 6.307939698492463e-05, - "loss": 5.3894, - "step": 37249 - }, - { - "epoch": 19.42633637548892, - "grad_norm": 1.575569987297058, - "learning_rate": 6.3078391959799e-05, - "loss": 5.478, - "step": 37250 - }, - { - "epoch": 19.42685788787484, - "grad_norm": 1.4934653043746948, - "learning_rate": 6.307738693467337e-05, - "loss": 5.7038, - "step": 37251 - }, - { - "epoch": 19.427379400260754, - "grad_norm": 1.4902020692825317, - "learning_rate": 6.307638190954775e-05, - "loss": 5.5386, - "step": 37252 - }, - { - "epoch": 19.427900912646674, - "grad_norm": 1.519992470741272, - "learning_rate": 6.307537688442211e-05, - "loss": 5.0999, - "step": 37253 - }, - { - "epoch": 19.428422425032593, - "grad_norm": 1.9740692377090454, - "learning_rate": 6.307437185929649e-05, - "loss": 5.151, - "step": 37254 - }, - { - "epoch": 19.428943937418513, - "grad_norm": 1.5554062128067017, - "learning_rate": 6.307336683417085e-05, - "loss": 5.1663, - "step": 37255 - }, - { - "epoch": 19.429465449804432, - "grad_norm": 1.4907764196395874, - "learning_rate": 6.307236180904523e-05, - "loss": 5.4956, - "step": 37256 - }, - { - "epoch": 19.429986962190352, - "grad_norm": 1.504427433013916, - "learning_rate": 6.30713567839196e-05, - "loss": 5.2189, - "step": 37257 - }, - { - "epoch": 19.43050847457627, - "grad_norm": 1.4602121114730835, - "learning_rate": 6.307035175879397e-05, - "loss": 5.4462, - "step": 37258 - }, - { - "epoch": 19.43102998696219, - "grad_norm": 1.4749996662139893, - "learning_rate": 6.306934673366835e-05, - "loss": 5.4368, - "step": 37259 - }, - { - "epoch": 19.43155149934811, - "grad_norm": 1.6066274642944336, - "learning_rate": 6.306834170854273e-05, - "loss": 4.8398, - "step": 37260 - }, - { - "epoch": 19.43207301173403, - "grad_norm": 1.488830804824829, - "learning_rate": 6.306733668341709e-05, - "loss": 5.5997, - "step": 37261 - }, - { - "epoch": 19.43259452411995, - "grad_norm": 1.4294817447662354, - "learning_rate": 6.306633165829146e-05, - "loss": 5.2905, - "step": 37262 - }, - { - "epoch": 19.43311603650587, - "grad_norm": 1.55461585521698, - "learning_rate": 6.306532663316583e-05, - "loss": 5.4724, - "step": 37263 - }, - { - "epoch": 19.433637548891785, - "grad_norm": 1.5447301864624023, - "learning_rate": 6.30643216080402e-05, - "loss": 5.2932, - "step": 37264 - }, - { - "epoch": 19.434159061277704, - "grad_norm": 1.4225887060165405, - "learning_rate": 6.306331658291458e-05, - "loss": 5.5343, - "step": 37265 - }, - { - "epoch": 19.434680573663623, - "grad_norm": 1.4907426834106445, - "learning_rate": 6.306231155778894e-05, - "loss": 4.9991, - "step": 37266 - }, - { - "epoch": 19.435202086049543, - "grad_norm": 1.5347858667373657, - "learning_rate": 6.306130653266332e-05, - "loss": 5.1725, - "step": 37267 - }, - { - "epoch": 19.435723598435462, - "grad_norm": 1.6455618143081665, - "learning_rate": 6.306030150753768e-05, - "loss": 5.0643, - "step": 37268 - }, - { - "epoch": 19.436245110821382, - "grad_norm": 1.580331802368164, - "learning_rate": 6.305929648241206e-05, - "loss": 4.9387, - "step": 37269 - }, - { - "epoch": 19.4367666232073, - "grad_norm": 1.5663504600524902, - "learning_rate": 6.305829145728644e-05, - "loss": 5.1564, - "step": 37270 - }, - { - "epoch": 19.43728813559322, - "grad_norm": 1.4984995126724243, - "learning_rate": 6.305728643216082e-05, - "loss": 5.0121, - "step": 37271 - }, - { - "epoch": 19.43780964797914, - "grad_norm": 1.522286057472229, - "learning_rate": 6.305628140703518e-05, - "loss": 5.4763, - "step": 37272 - }, - { - "epoch": 19.43833116036506, - "grad_norm": 1.5093772411346436, - "learning_rate": 6.305527638190956e-05, - "loss": 5.4766, - "step": 37273 - }, - { - "epoch": 19.43885267275098, - "grad_norm": 1.4995659589767456, - "learning_rate": 6.305427135678392e-05, - "loss": 5.2377, - "step": 37274 - }, - { - "epoch": 19.4393741851369, - "grad_norm": 1.5183773040771484, - "learning_rate": 6.305326633165829e-05, - "loss": 4.9542, - "step": 37275 - }, - { - "epoch": 19.439895697522815, - "grad_norm": 1.486081838607788, - "learning_rate": 6.305226130653266e-05, - "loss": 5.4226, - "step": 37276 - }, - { - "epoch": 19.440417209908734, - "grad_norm": 1.5112732648849487, - "learning_rate": 6.305125628140703e-05, - "loss": 5.2461, - "step": 37277 - }, - { - "epoch": 19.440938722294653, - "grad_norm": 1.5091496706008911, - "learning_rate": 6.30502512562814e-05, - "loss": 5.4919, - "step": 37278 - }, - { - "epoch": 19.441460234680573, - "grad_norm": 1.4098429679870605, - "learning_rate": 6.304924623115578e-05, - "loss": 5.8317, - "step": 37279 - }, - { - "epoch": 19.441981747066492, - "grad_norm": 1.521650791168213, - "learning_rate": 6.304824120603016e-05, - "loss": 5.3419, - "step": 37280 - }, - { - "epoch": 19.442503259452412, - "grad_norm": 1.4135823249816895, - "learning_rate": 6.304723618090453e-05, - "loss": 5.5825, - "step": 37281 - }, - { - "epoch": 19.44302477183833, - "grad_norm": 1.4460318088531494, - "learning_rate": 6.30462311557789e-05, - "loss": 5.4736, - "step": 37282 - }, - { - "epoch": 19.44354628422425, - "grad_norm": 1.6256747245788574, - "learning_rate": 6.304522613065327e-05, - "loss": 4.9557, - "step": 37283 - }, - { - "epoch": 19.44406779661017, - "grad_norm": 1.418568730354309, - "learning_rate": 6.304422110552765e-05, - "loss": 5.2874, - "step": 37284 - }, - { - "epoch": 19.44458930899609, - "grad_norm": 1.5083210468292236, - "learning_rate": 6.304321608040201e-05, - "loss": 5.4561, - "step": 37285 - }, - { - "epoch": 19.44511082138201, - "grad_norm": 1.5574920177459717, - "learning_rate": 6.304221105527639e-05, - "loss": 5.1684, - "step": 37286 - }, - { - "epoch": 19.44563233376793, - "grad_norm": 1.6772947311401367, - "learning_rate": 6.304120603015075e-05, - "loss": 5.0604, - "step": 37287 - }, - { - "epoch": 19.446153846153845, - "grad_norm": 1.4920275211334229, - "learning_rate": 6.304020100502512e-05, - "loss": 5.3984, - "step": 37288 - }, - { - "epoch": 19.446675358539764, - "grad_norm": 1.5521111488342285, - "learning_rate": 6.30391959798995e-05, - "loss": 5.4675, - "step": 37289 - }, - { - "epoch": 19.447196870925683, - "grad_norm": 1.5375505685806274, - "learning_rate": 6.303819095477387e-05, - "loss": 5.1897, - "step": 37290 - }, - { - "epoch": 19.447718383311603, - "grad_norm": 1.5636916160583496, - "learning_rate": 6.303718592964825e-05, - "loss": 5.2452, - "step": 37291 - }, - { - "epoch": 19.448239895697522, - "grad_norm": 1.5020076036453247, - "learning_rate": 6.303618090452261e-05, - "loss": 5.5928, - "step": 37292 - }, - { - "epoch": 19.448761408083442, - "grad_norm": 1.5464260578155518, - "learning_rate": 6.303517587939699e-05, - "loss": 5.4023, - "step": 37293 - }, - { - "epoch": 19.44928292046936, - "grad_norm": 1.420599341392517, - "learning_rate": 6.303417085427136e-05, - "loss": 5.6967, - "step": 37294 - }, - { - "epoch": 19.44980443285528, - "grad_norm": 1.493533968925476, - "learning_rate": 6.303316582914573e-05, - "loss": 4.9638, - "step": 37295 - }, - { - "epoch": 19.4503259452412, - "grad_norm": 1.5219279527664185, - "learning_rate": 6.30321608040201e-05, - "loss": 5.168, - "step": 37296 - }, - { - "epoch": 19.45084745762712, - "grad_norm": 1.4469178915023804, - "learning_rate": 6.303115577889448e-05, - "loss": 5.5201, - "step": 37297 - }, - { - "epoch": 19.45136897001304, - "grad_norm": 1.6388332843780518, - "learning_rate": 6.303015075376884e-05, - "loss": 5.0313, - "step": 37298 - }, - { - "epoch": 19.45189048239896, - "grad_norm": 1.402552604675293, - "learning_rate": 6.302914572864322e-05, - "loss": 5.049, - "step": 37299 - }, - { - "epoch": 19.452411994784875, - "grad_norm": 1.4198784828186035, - "learning_rate": 6.30281407035176e-05, - "loss": 5.7073, - "step": 37300 - }, - { - "epoch": 19.452933507170794, - "grad_norm": 1.4833933115005493, - "learning_rate": 6.302713567839196e-05, - "loss": 5.2907, - "step": 37301 - }, - { - "epoch": 19.453455019556714, - "grad_norm": 1.4306979179382324, - "learning_rate": 6.302613065326634e-05, - "loss": 5.4608, - "step": 37302 - }, - { - "epoch": 19.453976531942633, - "grad_norm": 1.4787994623184204, - "learning_rate": 6.30251256281407e-05, - "loss": 5.3125, - "step": 37303 - }, - { - "epoch": 19.454498044328552, - "grad_norm": 1.4288735389709473, - "learning_rate": 6.302412060301508e-05, - "loss": 5.2381, - "step": 37304 - }, - { - "epoch": 19.455019556714472, - "grad_norm": 1.341896414756775, - "learning_rate": 6.302311557788945e-05, - "loss": 5.5058, - "step": 37305 - }, - { - "epoch": 19.45554106910039, - "grad_norm": 1.5564379692077637, - "learning_rate": 6.302211055276382e-05, - "loss": 5.4784, - "step": 37306 - }, - { - "epoch": 19.45606258148631, - "grad_norm": 1.6468271017074585, - "learning_rate": 6.302110552763819e-05, - "loss": 4.9523, - "step": 37307 - }, - { - "epoch": 19.45658409387223, - "grad_norm": 1.4131602048873901, - "learning_rate": 6.302010050251257e-05, - "loss": 5.4534, - "step": 37308 - }, - { - "epoch": 19.45710560625815, - "grad_norm": 1.5736078023910522, - "learning_rate": 6.301909547738693e-05, - "loss": 5.0176, - "step": 37309 - }, - { - "epoch": 19.45762711864407, - "grad_norm": 1.4280222654342651, - "learning_rate": 6.301809045226131e-05, - "loss": 5.2648, - "step": 37310 - }, - { - "epoch": 19.45814863102999, - "grad_norm": 1.4703959226608276, - "learning_rate": 6.301708542713569e-05, - "loss": 5.5572, - "step": 37311 - }, - { - "epoch": 19.458670143415905, - "grad_norm": 1.501389980316162, - "learning_rate": 6.301608040201006e-05, - "loss": 4.7714, - "step": 37312 - }, - { - "epoch": 19.459191655801824, - "grad_norm": 1.5663864612579346, - "learning_rate": 6.301507537688443e-05, - "loss": 4.9158, - "step": 37313 - }, - { - "epoch": 19.459713168187744, - "grad_norm": 1.5946868658065796, - "learning_rate": 6.301407035175879e-05, - "loss": 4.8477, - "step": 37314 - }, - { - "epoch": 19.460234680573663, - "grad_norm": 1.4271913766860962, - "learning_rate": 6.301306532663317e-05, - "loss": 5.4187, - "step": 37315 - }, - { - "epoch": 19.460756192959582, - "grad_norm": 1.4007647037506104, - "learning_rate": 6.301206030150753e-05, - "loss": 5.1085, - "step": 37316 - }, - { - "epoch": 19.461277705345502, - "grad_norm": 1.4066468477249146, - "learning_rate": 6.301105527638191e-05, - "loss": 5.3755, - "step": 37317 - }, - { - "epoch": 19.46179921773142, - "grad_norm": 1.612689733505249, - "learning_rate": 6.301005025125628e-05, - "loss": 4.5325, - "step": 37318 - }, - { - "epoch": 19.46232073011734, - "grad_norm": 1.5783993005752563, - "learning_rate": 6.300904522613065e-05, - "loss": 5.2231, - "step": 37319 - }, - { - "epoch": 19.46284224250326, - "grad_norm": 1.4529337882995605, - "learning_rate": 6.300804020100503e-05, - "loss": 5.774, - "step": 37320 - }, - { - "epoch": 19.46336375488918, - "grad_norm": 1.5188593864440918, - "learning_rate": 6.300703517587941e-05, - "loss": 5.7188, - "step": 37321 - }, - { - "epoch": 19.4638852672751, - "grad_norm": 1.5579113960266113, - "learning_rate": 6.300603015075377e-05, - "loss": 5.2305, - "step": 37322 - }, - { - "epoch": 19.46440677966102, - "grad_norm": 1.4882932901382446, - "learning_rate": 6.300502512562815e-05, - "loss": 5.1786, - "step": 37323 - }, - { - "epoch": 19.464928292046935, - "grad_norm": 1.5697908401489258, - "learning_rate": 6.300402010050252e-05, - "loss": 4.9005, - "step": 37324 - }, - { - "epoch": 19.465449804432854, - "grad_norm": 1.3653483390808105, - "learning_rate": 6.30030150753769e-05, - "loss": 5.6436, - "step": 37325 - }, - { - "epoch": 19.465971316818774, - "grad_norm": 1.416479229927063, - "learning_rate": 6.300201005025126e-05, - "loss": 5.2184, - "step": 37326 - }, - { - "epoch": 19.466492829204693, - "grad_norm": 1.54704749584198, - "learning_rate": 6.300100502512562e-05, - "loss": 5.0929, - "step": 37327 - }, - { - "epoch": 19.467014341590613, - "grad_norm": 1.5011281967163086, - "learning_rate": 6.3e-05, - "loss": 5.5939, - "step": 37328 - }, - { - "epoch": 19.467535853976532, - "grad_norm": 1.5293469429016113, - "learning_rate": 6.299899497487436e-05, - "loss": 5.0441, - "step": 37329 - }, - { - "epoch": 19.46805736636245, - "grad_norm": 1.578230857849121, - "learning_rate": 6.299798994974874e-05, - "loss": 4.9815, - "step": 37330 - }, - { - "epoch": 19.46857887874837, - "grad_norm": 1.4376766681671143, - "learning_rate": 6.299698492462312e-05, - "loss": 5.383, - "step": 37331 - }, - { - "epoch": 19.46910039113429, - "grad_norm": 1.5833238363265991, - "learning_rate": 6.29959798994975e-05, - "loss": 5.3092, - "step": 37332 - }, - { - "epoch": 19.46962190352021, - "grad_norm": 1.5258651971817017, - "learning_rate": 6.299497487437186e-05, - "loss": 4.6548, - "step": 37333 - }, - { - "epoch": 19.47014341590613, - "grad_norm": 1.5256139039993286, - "learning_rate": 6.299396984924624e-05, - "loss": 5.6907, - "step": 37334 - }, - { - "epoch": 19.470664928292045, - "grad_norm": 1.6399085521697998, - "learning_rate": 6.29929648241206e-05, - "loss": 4.6621, - "step": 37335 - }, - { - "epoch": 19.471186440677965, - "grad_norm": 1.5760735273361206, - "learning_rate": 6.299195979899498e-05, - "loss": 5.3937, - "step": 37336 - }, - { - "epoch": 19.471707953063884, - "grad_norm": 1.657070517539978, - "learning_rate": 6.299095477386935e-05, - "loss": 5.4718, - "step": 37337 - }, - { - "epoch": 19.472229465449804, - "grad_norm": 1.5149132013320923, - "learning_rate": 6.298994974874372e-05, - "loss": 5.2985, - "step": 37338 - }, - { - "epoch": 19.472750977835723, - "grad_norm": 1.502623438835144, - "learning_rate": 6.298894472361809e-05, - "loss": 5.4277, - "step": 37339 - }, - { - "epoch": 19.473272490221643, - "grad_norm": 1.5746971368789673, - "learning_rate": 6.298793969849247e-05, - "loss": 5.1482, - "step": 37340 - }, - { - "epoch": 19.473794002607562, - "grad_norm": 1.5487323999404907, - "learning_rate": 6.298693467336684e-05, - "loss": 5.3819, - "step": 37341 - }, - { - "epoch": 19.47431551499348, - "grad_norm": 1.6664633750915527, - "learning_rate": 6.298592964824121e-05, - "loss": 5.1589, - "step": 37342 - }, - { - "epoch": 19.4748370273794, - "grad_norm": 1.679703712463379, - "learning_rate": 6.298492462311559e-05, - "loss": 5.2432, - "step": 37343 - }, - { - "epoch": 19.47535853976532, - "grad_norm": 1.556282877922058, - "learning_rate": 6.298391959798995e-05, - "loss": 5.226, - "step": 37344 - }, - { - "epoch": 19.47588005215124, - "grad_norm": 1.5468372106552124, - "learning_rate": 6.298291457286433e-05, - "loss": 5.1671, - "step": 37345 - }, - { - "epoch": 19.47640156453716, - "grad_norm": 1.5020066499710083, - "learning_rate": 6.298190954773869e-05, - "loss": 5.4424, - "step": 37346 - }, - { - "epoch": 19.476923076923075, - "grad_norm": 1.4907149076461792, - "learning_rate": 6.298090452261307e-05, - "loss": 5.4206, - "step": 37347 - }, - { - "epoch": 19.477444589308995, - "grad_norm": 1.5200107097625732, - "learning_rate": 6.297989949748743e-05, - "loss": 5.817, - "step": 37348 - }, - { - "epoch": 19.477966101694914, - "grad_norm": 1.4848884344100952, - "learning_rate": 6.297889447236181e-05, - "loss": 5.0979, - "step": 37349 - }, - { - "epoch": 19.478487614080834, - "grad_norm": 1.5240153074264526, - "learning_rate": 6.297788944723618e-05, - "loss": 5.428, - "step": 37350 - }, - { - "epoch": 19.479009126466753, - "grad_norm": 1.6533585786819458, - "learning_rate": 6.297688442211055e-05, - "loss": 5.5099, - "step": 37351 - }, - { - "epoch": 19.479530638852673, - "grad_norm": 1.7335293292999268, - "learning_rate": 6.297587939698493e-05, - "loss": 5.0175, - "step": 37352 - }, - { - "epoch": 19.480052151238592, - "grad_norm": 1.518573522567749, - "learning_rate": 6.297487437185931e-05, - "loss": 5.2858, - "step": 37353 - }, - { - "epoch": 19.48057366362451, - "grad_norm": 1.4349814653396606, - "learning_rate": 6.297386934673367e-05, - "loss": 5.8855, - "step": 37354 - }, - { - "epoch": 19.48109517601043, - "grad_norm": 1.6711530685424805, - "learning_rate": 6.297286432160804e-05, - "loss": 5.1467, - "step": 37355 - }, - { - "epoch": 19.48161668839635, - "grad_norm": 1.5619884729385376, - "learning_rate": 6.297185929648242e-05, - "loss": 5.4166, - "step": 37356 - }, - { - "epoch": 19.48213820078227, - "grad_norm": 1.4860761165618896, - "learning_rate": 6.297085427135678e-05, - "loss": 5.2916, - "step": 37357 - }, - { - "epoch": 19.48265971316819, - "grad_norm": 1.5150444507598877, - "learning_rate": 6.296984924623116e-05, - "loss": 5.1904, - "step": 37358 - }, - { - "epoch": 19.483181225554105, - "grad_norm": 1.5208262205123901, - "learning_rate": 6.296884422110552e-05, - "loss": 4.955, - "step": 37359 - }, - { - "epoch": 19.483702737940025, - "grad_norm": 1.535759687423706, - "learning_rate": 6.29678391959799e-05, - "loss": 5.3011, - "step": 37360 - }, - { - "epoch": 19.484224250325944, - "grad_norm": 1.5087066888809204, - "learning_rate": 6.296683417085428e-05, - "loss": 5.1726, - "step": 37361 - }, - { - "epoch": 19.484745762711864, - "grad_norm": 1.5211093425750732, - "learning_rate": 6.296582914572866e-05, - "loss": 5.1205, - "step": 37362 - }, - { - "epoch": 19.485267275097783, - "grad_norm": 1.5269780158996582, - "learning_rate": 6.296482412060302e-05, - "loss": 5.2067, - "step": 37363 - }, - { - "epoch": 19.485788787483703, - "grad_norm": 1.4537190198898315, - "learning_rate": 6.29638190954774e-05, - "loss": 5.5389, - "step": 37364 - }, - { - "epoch": 19.486310299869622, - "grad_norm": 1.4940224885940552, - "learning_rate": 6.296281407035176e-05, - "loss": 5.0526, - "step": 37365 - }, - { - "epoch": 19.48683181225554, - "grad_norm": 1.5141774415969849, - "learning_rate": 6.296180904522614e-05, - "loss": 5.6359, - "step": 37366 - }, - { - "epoch": 19.48735332464146, - "grad_norm": 1.5057686567306519, - "learning_rate": 6.29608040201005e-05, - "loss": 5.8364, - "step": 37367 - }, - { - "epoch": 19.48787483702738, - "grad_norm": 1.545931100845337, - "learning_rate": 6.295979899497487e-05, - "loss": 5.3301, - "step": 37368 - }, - { - "epoch": 19.4883963494133, - "grad_norm": 1.4198249578475952, - "learning_rate": 6.295879396984925e-05, - "loss": 5.3101, - "step": 37369 - }, - { - "epoch": 19.48891786179922, - "grad_norm": 1.5603911876678467, - "learning_rate": 6.295778894472361e-05, - "loss": 5.0234, - "step": 37370 - }, - { - "epoch": 19.489439374185135, - "grad_norm": 1.4139888286590576, - "learning_rate": 6.295678391959799e-05, - "loss": 4.992, - "step": 37371 - }, - { - "epoch": 19.489960886571055, - "grad_norm": 1.4619789123535156, - "learning_rate": 6.295577889447237e-05, - "loss": 5.3645, - "step": 37372 - }, - { - "epoch": 19.490482398956974, - "grad_norm": 1.4725536108016968, - "learning_rate": 6.295477386934674e-05, - "loss": 5.1141, - "step": 37373 - }, - { - "epoch": 19.491003911342894, - "grad_norm": 1.5416631698608398, - "learning_rate": 6.295376884422111e-05, - "loss": 5.143, - "step": 37374 - }, - { - "epoch": 19.491525423728813, - "grad_norm": 1.5572096109390259, - "learning_rate": 6.295276381909549e-05, - "loss": 5.4227, - "step": 37375 - }, - { - "epoch": 19.492046936114733, - "grad_norm": 1.5277761220932007, - "learning_rate": 6.295175879396985e-05, - "loss": 5.6088, - "step": 37376 - }, - { - "epoch": 19.492568448500652, - "grad_norm": 1.4732385873794556, - "learning_rate": 6.295075376884423e-05, - "loss": 5.6025, - "step": 37377 - }, - { - "epoch": 19.49308996088657, - "grad_norm": 1.5243701934814453, - "learning_rate": 6.294974874371859e-05, - "loss": 5.3534, - "step": 37378 - }, - { - "epoch": 19.49361147327249, - "grad_norm": 1.4268461465835571, - "learning_rate": 6.294874371859297e-05, - "loss": 5.4602, - "step": 37379 - }, - { - "epoch": 19.49413298565841, - "grad_norm": 1.6526100635528564, - "learning_rate": 6.294773869346734e-05, - "loss": 4.9007, - "step": 37380 - }, - { - "epoch": 19.49465449804433, - "grad_norm": 1.5461496114730835, - "learning_rate": 6.294673366834171e-05, - "loss": 5.207, - "step": 37381 - }, - { - "epoch": 19.49517601043025, - "grad_norm": 1.4336929321289062, - "learning_rate": 6.294572864321609e-05, - "loss": 5.4362, - "step": 37382 - }, - { - "epoch": 19.495697522816165, - "grad_norm": 1.496029257774353, - "learning_rate": 6.294472361809046e-05, - "loss": 5.4709, - "step": 37383 - }, - { - "epoch": 19.496219035202085, - "grad_norm": 1.4630082845687866, - "learning_rate": 6.294371859296483e-05, - "loss": 5.5147, - "step": 37384 - }, - { - "epoch": 19.496740547588004, - "grad_norm": 1.467675805091858, - "learning_rate": 6.29427135678392e-05, - "loss": 5.1248, - "step": 37385 - }, - { - "epoch": 19.497262059973924, - "grad_norm": 1.6054035425186157, - "learning_rate": 6.294170854271358e-05, - "loss": 5.4171, - "step": 37386 - }, - { - "epoch": 19.497783572359843, - "grad_norm": 1.576171875, - "learning_rate": 6.294070351758794e-05, - "loss": 5.35, - "step": 37387 - }, - { - "epoch": 19.498305084745763, - "grad_norm": 1.5490212440490723, - "learning_rate": 6.293969849246232e-05, - "loss": 5.2228, - "step": 37388 - }, - { - "epoch": 19.498826597131682, - "grad_norm": 1.468489646911621, - "learning_rate": 6.293869346733668e-05, - "loss": 5.5472, - "step": 37389 - }, - { - "epoch": 19.4993481095176, - "grad_norm": 1.8731147050857544, - "learning_rate": 6.293768844221106e-05, - "loss": 4.5855, - "step": 37390 - }, - { - "epoch": 19.49986962190352, - "grad_norm": 1.525352120399475, - "learning_rate": 6.293668341708542e-05, - "loss": 5.3013, - "step": 37391 - }, - { - "epoch": 19.50039113428944, - "grad_norm": 1.5804531574249268, - "learning_rate": 6.29356783919598e-05, - "loss": 5.2738, - "step": 37392 - }, - { - "epoch": 19.50091264667536, - "grad_norm": 1.5275425910949707, - "learning_rate": 6.293467336683418e-05, - "loss": 5.4325, - "step": 37393 - }, - { - "epoch": 19.50143415906128, - "grad_norm": 1.3847652673721313, - "learning_rate": 6.293366834170854e-05, - "loss": 4.6321, - "step": 37394 - }, - { - "epoch": 19.501955671447195, - "grad_norm": 1.6265003681182861, - "learning_rate": 6.293266331658292e-05, - "loss": 5.2354, - "step": 37395 - }, - { - "epoch": 19.502477183833115, - "grad_norm": 1.4969078302383423, - "learning_rate": 6.293165829145729e-05, - "loss": 5.5002, - "step": 37396 - }, - { - "epoch": 19.502998696219034, - "grad_norm": 1.4565858840942383, - "learning_rate": 6.293065326633166e-05, - "loss": 5.5101, - "step": 37397 - }, - { - "epoch": 19.503520208604954, - "grad_norm": 1.4968385696411133, - "learning_rate": 6.292964824120603e-05, - "loss": 5.2815, - "step": 37398 - }, - { - "epoch": 19.504041720990873, - "grad_norm": 1.4382553100585938, - "learning_rate": 6.29286432160804e-05, - "loss": 5.6591, - "step": 37399 - }, - { - "epoch": 19.504563233376793, - "grad_norm": 1.502134919166565, - "learning_rate": 6.292763819095477e-05, - "loss": 5.4695, - "step": 37400 - }, - { - "epoch": 19.505084745762712, - "grad_norm": 1.5889514684677124, - "learning_rate": 6.292663316582915e-05, - "loss": 5.2816, - "step": 37401 - }, - { - "epoch": 19.50560625814863, - "grad_norm": 1.7039299011230469, - "learning_rate": 6.292562814070351e-05, - "loss": 5.0244, - "step": 37402 - }, - { - "epoch": 19.50612777053455, - "grad_norm": 1.4913221597671509, - "learning_rate": 6.292462311557789e-05, - "loss": 5.0428, - "step": 37403 - }, - { - "epoch": 19.50664928292047, - "grad_norm": 1.5622193813323975, - "learning_rate": 6.292361809045227e-05, - "loss": 5.0815, - "step": 37404 - }, - { - "epoch": 19.50717079530639, - "grad_norm": 1.5759025812149048, - "learning_rate": 6.292261306532665e-05, - "loss": 4.9676, - "step": 37405 - }, - { - "epoch": 19.50769230769231, - "grad_norm": 1.4436661005020142, - "learning_rate": 6.292160804020101e-05, - "loss": 5.4342, - "step": 37406 - }, - { - "epoch": 19.508213820078225, - "grad_norm": 1.501172423362732, - "learning_rate": 6.292060301507537e-05, - "loss": 5.6958, - "step": 37407 - }, - { - "epoch": 19.508735332464145, - "grad_norm": 1.4558210372924805, - "learning_rate": 6.291959798994975e-05, - "loss": 4.8576, - "step": 37408 - }, - { - "epoch": 19.509256844850064, - "grad_norm": 1.4975327253341675, - "learning_rate": 6.291859296482412e-05, - "loss": 5.134, - "step": 37409 - }, - { - "epoch": 19.509778357235984, - "grad_norm": 1.4757581949234009, - "learning_rate": 6.29175879396985e-05, - "loss": 5.7075, - "step": 37410 - }, - { - "epoch": 19.510299869621903, - "grad_norm": 1.5648298263549805, - "learning_rate": 6.291658291457286e-05, - "loss": 5.2389, - "step": 37411 - }, - { - "epoch": 19.510821382007823, - "grad_norm": 1.6016122102737427, - "learning_rate": 6.291557788944724e-05, - "loss": 4.7801, - "step": 37412 - }, - { - "epoch": 19.511342894393742, - "grad_norm": 1.4645133018493652, - "learning_rate": 6.291457286432161e-05, - "loss": 5.4104, - "step": 37413 - }, - { - "epoch": 19.51186440677966, - "grad_norm": 1.4620314836502075, - "learning_rate": 6.291356783919599e-05, - "loss": 5.0597, - "step": 37414 - }, - { - "epoch": 19.51238591916558, - "grad_norm": 1.4295207262039185, - "learning_rate": 6.291256281407036e-05, - "loss": 5.3201, - "step": 37415 - }, - { - "epoch": 19.5129074315515, - "grad_norm": 1.4604666233062744, - "learning_rate": 6.291155778894473e-05, - "loss": 5.4127, - "step": 37416 - }, - { - "epoch": 19.51342894393742, - "grad_norm": 1.5334651470184326, - "learning_rate": 6.29105527638191e-05, - "loss": 5.3196, - "step": 37417 - }, - { - "epoch": 19.513950456323336, - "grad_norm": 1.5397158861160278, - "learning_rate": 6.290954773869348e-05, - "loss": 5.1471, - "step": 37418 - }, - { - "epoch": 19.514471968709255, - "grad_norm": 1.4055157899856567, - "learning_rate": 6.290854271356784e-05, - "loss": 5.2655, - "step": 37419 - }, - { - "epoch": 19.514993481095175, - "grad_norm": 1.4520074129104614, - "learning_rate": 6.290753768844222e-05, - "loss": 5.5835, - "step": 37420 - }, - { - "epoch": 19.515514993481094, - "grad_norm": 1.5064903497695923, - "learning_rate": 6.290653266331658e-05, - "loss": 5.1775, - "step": 37421 - }, - { - "epoch": 19.516036505867014, - "grad_norm": 1.4846075773239136, - "learning_rate": 6.290552763819095e-05, - "loss": 5.2161, - "step": 37422 - }, - { - "epoch": 19.516558018252933, - "grad_norm": 1.4353463649749756, - "learning_rate": 6.290452261306532e-05, - "loss": 5.1005, - "step": 37423 - }, - { - "epoch": 19.517079530638853, - "grad_norm": 1.4706088304519653, - "learning_rate": 6.29035175879397e-05, - "loss": 5.4241, - "step": 37424 - }, - { - "epoch": 19.517601043024772, - "grad_norm": 1.5441031455993652, - "learning_rate": 6.290251256281408e-05, - "loss": 5.0299, - "step": 37425 - }, - { - "epoch": 19.51812255541069, - "grad_norm": 1.5890207290649414, - "learning_rate": 6.290150753768844e-05, - "loss": 4.8899, - "step": 37426 - }, - { - "epoch": 19.51864406779661, - "grad_norm": 1.5170304775238037, - "learning_rate": 6.290050251256282e-05, - "loss": 5.4802, - "step": 37427 - }, - { - "epoch": 19.51916558018253, - "grad_norm": 1.4741225242614746, - "learning_rate": 6.289949748743719e-05, - "loss": 5.1109, - "step": 37428 - }, - { - "epoch": 19.51968709256845, - "grad_norm": 1.4384995698928833, - "learning_rate": 6.289849246231156e-05, - "loss": 5.4311, - "step": 37429 - }, - { - "epoch": 19.52020860495437, - "grad_norm": 1.5288879871368408, - "learning_rate": 6.289748743718593e-05, - "loss": 5.3896, - "step": 37430 - }, - { - "epoch": 19.520730117340285, - "grad_norm": 1.590402603149414, - "learning_rate": 6.28964824120603e-05, - "loss": 4.6944, - "step": 37431 - }, - { - "epoch": 19.521251629726205, - "grad_norm": 1.6978366374969482, - "learning_rate": 6.289547738693467e-05, - "loss": 4.8139, - "step": 37432 - }, - { - "epoch": 19.521773142112124, - "grad_norm": 1.6142961978912354, - "learning_rate": 6.289447236180905e-05, - "loss": 4.7552, - "step": 37433 - }, - { - "epoch": 19.522294654498044, - "grad_norm": 1.5438636541366577, - "learning_rate": 6.289346733668343e-05, - "loss": 5.3293, - "step": 37434 - }, - { - "epoch": 19.522816166883963, - "grad_norm": 1.4304120540618896, - "learning_rate": 6.289246231155779e-05, - "loss": 5.0658, - "step": 37435 - }, - { - "epoch": 19.523337679269883, - "grad_norm": 1.544596552848816, - "learning_rate": 6.289145728643217e-05, - "loss": 5.0356, - "step": 37436 - }, - { - "epoch": 19.523859191655802, - "grad_norm": 1.3785384893417358, - "learning_rate": 6.289045226130653e-05, - "loss": 5.6539, - "step": 37437 - }, - { - "epoch": 19.52438070404172, - "grad_norm": 1.5315215587615967, - "learning_rate": 6.288944723618091e-05, - "loss": 5.1332, - "step": 37438 - }, - { - "epoch": 19.52490221642764, - "grad_norm": 1.5645599365234375, - "learning_rate": 6.288844221105527e-05, - "loss": 4.8235, - "step": 37439 - }, - { - "epoch": 19.52542372881356, - "grad_norm": 1.51400625705719, - "learning_rate": 6.288743718592965e-05, - "loss": 5.4424, - "step": 37440 - }, - { - "epoch": 19.52594524119948, - "grad_norm": 1.4766571521759033, - "learning_rate": 6.288643216080402e-05, - "loss": 5.5457, - "step": 37441 - }, - { - "epoch": 19.526466753585396, - "grad_norm": 1.4231263399124146, - "learning_rate": 6.28854271356784e-05, - "loss": 5.2135, - "step": 37442 - }, - { - "epoch": 19.526988265971315, - "grad_norm": 1.6407511234283447, - "learning_rate": 6.288442211055276e-05, - "loss": 5.0354, - "step": 37443 - }, - { - "epoch": 19.527509778357235, - "grad_norm": 1.515535831451416, - "learning_rate": 6.288341708542714e-05, - "loss": 5.4358, - "step": 37444 - }, - { - "epoch": 19.528031290743154, - "grad_norm": 1.5242292881011963, - "learning_rate": 6.288241206030151e-05, - "loss": 5.5564, - "step": 37445 - }, - { - "epoch": 19.528552803129074, - "grad_norm": 1.5468130111694336, - "learning_rate": 6.288140703517589e-05, - "loss": 5.5873, - "step": 37446 - }, - { - "epoch": 19.529074315514993, - "grad_norm": 1.4609287977218628, - "learning_rate": 6.288040201005026e-05, - "loss": 5.7852, - "step": 37447 - }, - { - "epoch": 19.529595827900913, - "grad_norm": 1.4723258018493652, - "learning_rate": 6.287939698492462e-05, - "loss": 5.633, - "step": 37448 - }, - { - "epoch": 19.530117340286832, - "grad_norm": 1.5329313278198242, - "learning_rate": 6.2878391959799e-05, - "loss": 5.4327, - "step": 37449 - }, - { - "epoch": 19.53063885267275, - "grad_norm": 1.5618882179260254, - "learning_rate": 6.287738693467336e-05, - "loss": 5.363, - "step": 37450 - }, - { - "epoch": 19.53116036505867, - "grad_norm": 1.494633436203003, - "learning_rate": 6.287638190954774e-05, - "loss": 5.3407, - "step": 37451 - }, - { - "epoch": 19.53168187744459, - "grad_norm": 1.554058313369751, - "learning_rate": 6.28753768844221e-05, - "loss": 5.2192, - "step": 37452 - }, - { - "epoch": 19.53220338983051, - "grad_norm": 1.5984495878219604, - "learning_rate": 6.287437185929648e-05, - "loss": 5.1761, - "step": 37453 - }, - { - "epoch": 19.532724902216426, - "grad_norm": 1.6217752695083618, - "learning_rate": 6.287336683417086e-05, - "loss": 5.0292, - "step": 37454 - }, - { - "epoch": 19.533246414602345, - "grad_norm": 1.528728723526001, - "learning_rate": 6.287236180904524e-05, - "loss": 5.4789, - "step": 37455 - }, - { - "epoch": 19.533767926988265, - "grad_norm": 1.5629384517669678, - "learning_rate": 6.28713567839196e-05, - "loss": 5.2033, - "step": 37456 - }, - { - "epoch": 19.534289439374184, - "grad_norm": 1.4917786121368408, - "learning_rate": 6.287035175879398e-05, - "loss": 5.3138, - "step": 37457 - }, - { - "epoch": 19.534810951760104, - "grad_norm": 1.6691511869430542, - "learning_rate": 6.286934673366835e-05, - "loss": 5.307, - "step": 37458 - }, - { - "epoch": 19.535332464146023, - "grad_norm": 1.5341060161590576, - "learning_rate": 6.286834170854272e-05, - "loss": 5.4368, - "step": 37459 - }, - { - "epoch": 19.535853976531943, - "grad_norm": 1.5439406633377075, - "learning_rate": 6.286733668341709e-05, - "loss": 5.2648, - "step": 37460 - }, - { - "epoch": 19.536375488917862, - "grad_norm": 1.5557283163070679, - "learning_rate": 6.286633165829145e-05, - "loss": 5.4651, - "step": 37461 - }, - { - "epoch": 19.53689700130378, - "grad_norm": 1.4684910774230957, - "learning_rate": 6.286532663316583e-05, - "loss": 5.2367, - "step": 37462 - }, - { - "epoch": 19.5374185136897, - "grad_norm": 1.430099368095398, - "learning_rate": 6.28643216080402e-05, - "loss": 5.5626, - "step": 37463 - }, - { - "epoch": 19.53794002607562, - "grad_norm": 1.4476091861724854, - "learning_rate": 6.286331658291457e-05, - "loss": 5.1099, - "step": 37464 - }, - { - "epoch": 19.53846153846154, - "grad_norm": 1.5505305528640747, - "learning_rate": 6.286231155778895e-05, - "loss": 5.2486, - "step": 37465 - }, - { - "epoch": 19.538983050847456, - "grad_norm": 1.4357610940933228, - "learning_rate": 6.286130653266333e-05, - "loss": 4.7092, - "step": 37466 - }, - { - "epoch": 19.539504563233375, - "grad_norm": 1.442327618598938, - "learning_rate": 6.286030150753769e-05, - "loss": 5.4076, - "step": 37467 - }, - { - "epoch": 19.540026075619295, - "grad_norm": 1.4965447187423706, - "learning_rate": 6.285929648241207e-05, - "loss": 5.7182, - "step": 37468 - }, - { - "epoch": 19.540547588005214, - "grad_norm": 1.4274893999099731, - "learning_rate": 6.285829145728643e-05, - "loss": 5.6063, - "step": 37469 - }, - { - "epoch": 19.541069100391134, - "grad_norm": 1.4446918964385986, - "learning_rate": 6.285728643216081e-05, - "loss": 5.4581, - "step": 37470 - }, - { - "epoch": 19.541590612777053, - "grad_norm": 1.4935729503631592, - "learning_rate": 6.285628140703518e-05, - "loss": 5.1614, - "step": 37471 - }, - { - "epoch": 19.542112125162973, - "grad_norm": 1.6776635646820068, - "learning_rate": 6.285527638190955e-05, - "loss": 4.6668, - "step": 37472 - }, - { - "epoch": 19.542633637548892, - "grad_norm": 1.4967087507247925, - "learning_rate": 6.285427135678392e-05, - "loss": 5.6446, - "step": 37473 - }, - { - "epoch": 19.54315514993481, - "grad_norm": 1.6585583686828613, - "learning_rate": 6.28532663316583e-05, - "loss": 5.1118, - "step": 37474 - }, - { - "epoch": 19.54367666232073, - "grad_norm": 1.5823849439620972, - "learning_rate": 6.285226130653267e-05, - "loss": 5.2226, - "step": 37475 - }, - { - "epoch": 19.54419817470665, - "grad_norm": 1.548598289489746, - "learning_rate": 6.285125628140704e-05, - "loss": 5.4156, - "step": 37476 - }, - { - "epoch": 19.54471968709257, - "grad_norm": 1.4502068758010864, - "learning_rate": 6.285025125628142e-05, - "loss": 4.6628, - "step": 37477 - }, - { - "epoch": 19.545241199478486, - "grad_norm": 1.391873836517334, - "learning_rate": 6.284924623115578e-05, - "loss": 5.6292, - "step": 37478 - }, - { - "epoch": 19.545762711864406, - "grad_norm": 1.5155748128890991, - "learning_rate": 6.284824120603016e-05, - "loss": 5.4312, - "step": 37479 - }, - { - "epoch": 19.546284224250325, - "grad_norm": 1.5796457529067993, - "learning_rate": 6.284723618090452e-05, - "loss": 5.0863, - "step": 37480 - }, - { - "epoch": 19.546805736636244, - "grad_norm": 1.536846399307251, - "learning_rate": 6.28462311557789e-05, - "loss": 5.4377, - "step": 37481 - }, - { - "epoch": 19.547327249022164, - "grad_norm": 1.585491418838501, - "learning_rate": 6.284522613065326e-05, - "loss": 5.0156, - "step": 37482 - }, - { - "epoch": 19.547848761408083, - "grad_norm": 1.6833882331848145, - "learning_rate": 6.284422110552764e-05, - "loss": 4.8335, - "step": 37483 - }, - { - "epoch": 19.548370273794003, - "grad_norm": 1.5156689882278442, - "learning_rate": 6.2843216080402e-05, - "loss": 4.9986, - "step": 37484 - }, - { - "epoch": 19.548891786179922, - "grad_norm": 1.426401138305664, - "learning_rate": 6.284221105527638e-05, - "loss": 5.8179, - "step": 37485 - }, - { - "epoch": 19.54941329856584, - "grad_norm": 1.5560848712921143, - "learning_rate": 6.284120603015076e-05, - "loss": 5.3171, - "step": 37486 - }, - { - "epoch": 19.54993481095176, - "grad_norm": 1.5668470859527588, - "learning_rate": 6.284020100502513e-05, - "loss": 5.1523, - "step": 37487 - }, - { - "epoch": 19.55045632333768, - "grad_norm": 1.4982200860977173, - "learning_rate": 6.28391959798995e-05, - "loss": 5.3335, - "step": 37488 - }, - { - "epoch": 19.5509778357236, - "grad_norm": 1.5658247470855713, - "learning_rate": 6.283819095477387e-05, - "loss": 5.3295, - "step": 37489 - }, - { - "epoch": 19.551499348109516, - "grad_norm": 1.405168056488037, - "learning_rate": 6.283718592964825e-05, - "loss": 5.5674, - "step": 37490 - }, - { - "epoch": 19.552020860495436, - "grad_norm": 1.4721736907958984, - "learning_rate": 6.283618090452261e-05, - "loss": 5.6618, - "step": 37491 - }, - { - "epoch": 19.552542372881355, - "grad_norm": 1.4577295780181885, - "learning_rate": 6.283517587939699e-05, - "loss": 5.1682, - "step": 37492 - }, - { - "epoch": 19.553063885267274, - "grad_norm": 1.537173867225647, - "learning_rate": 6.283417085427135e-05, - "loss": 5.4976, - "step": 37493 - }, - { - "epoch": 19.553585397653194, - "grad_norm": 1.5518070459365845, - "learning_rate": 6.283316582914573e-05, - "loss": 5.5164, - "step": 37494 - }, - { - "epoch": 19.554106910039113, - "grad_norm": 1.455331802368164, - "learning_rate": 6.283216080402011e-05, - "loss": 5.6294, - "step": 37495 - }, - { - "epoch": 19.554628422425033, - "grad_norm": 1.4764269590377808, - "learning_rate": 6.283115577889449e-05, - "loss": 5.4742, - "step": 37496 - }, - { - "epoch": 19.555149934810952, - "grad_norm": 1.4882757663726807, - "learning_rate": 6.283015075376885e-05, - "loss": 5.5503, - "step": 37497 - }, - { - "epoch": 19.555671447196872, - "grad_norm": 1.4425289630889893, - "learning_rate": 6.282914572864323e-05, - "loss": 5.2949, - "step": 37498 - }, - { - "epoch": 19.55619295958279, - "grad_norm": 1.537475824356079, - "learning_rate": 6.282814070351759e-05, - "loss": 5.5836, - "step": 37499 - }, - { - "epoch": 19.55671447196871, - "grad_norm": 1.4583909511566162, - "learning_rate": 6.282713567839196e-05, - "loss": 5.2354, - "step": 37500 - }, - { - "epoch": 19.557235984354627, - "grad_norm": 1.4233314990997314, - "learning_rate": 6.282613065326633e-05, - "loss": 5.6107, - "step": 37501 - }, - { - "epoch": 19.557757496740546, - "grad_norm": 1.4939262866973877, - "learning_rate": 6.28251256281407e-05, - "loss": 5.591, - "step": 37502 - }, - { - "epoch": 19.558279009126466, - "grad_norm": 1.4904749393463135, - "learning_rate": 6.282412060301508e-05, - "loss": 5.1102, - "step": 37503 - }, - { - "epoch": 19.558800521512385, - "grad_norm": 1.5590099096298218, - "learning_rate": 6.282311557788944e-05, - "loss": 4.6874, - "step": 37504 - }, - { - "epoch": 19.559322033898304, - "grad_norm": 1.55458402633667, - "learning_rate": 6.282211055276382e-05, - "loss": 4.9892, - "step": 37505 - }, - { - "epoch": 19.559843546284224, - "grad_norm": 1.6992926597595215, - "learning_rate": 6.28211055276382e-05, - "loss": 4.987, - "step": 37506 - }, - { - "epoch": 19.560365058670143, - "grad_norm": 1.3965368270874023, - "learning_rate": 6.282010050251257e-05, - "loss": 5.3039, - "step": 37507 - }, - { - "epoch": 19.560886571056063, - "grad_norm": 1.4321143627166748, - "learning_rate": 6.281909547738694e-05, - "loss": 5.4974, - "step": 37508 - }, - { - "epoch": 19.561408083441982, - "grad_norm": 1.50222647190094, - "learning_rate": 6.281809045226132e-05, - "loss": 5.1136, - "step": 37509 - }, - { - "epoch": 19.561929595827902, - "grad_norm": 1.5559730529785156, - "learning_rate": 6.281708542713568e-05, - "loss": 5.5758, - "step": 37510 - }, - { - "epoch": 19.56245110821382, - "grad_norm": 1.4700188636779785, - "learning_rate": 6.281608040201006e-05, - "loss": 5.6807, - "step": 37511 - }, - { - "epoch": 19.56297262059974, - "grad_norm": 1.4860584735870361, - "learning_rate": 6.281507537688442e-05, - "loss": 5.3676, - "step": 37512 - }, - { - "epoch": 19.56349413298566, - "grad_norm": 1.4578760862350464, - "learning_rate": 6.28140703517588e-05, - "loss": 5.3806, - "step": 37513 - }, - { - "epoch": 19.564015645371576, - "grad_norm": 1.3947306871414185, - "learning_rate": 6.281306532663316e-05, - "loss": 5.4997, - "step": 37514 - }, - { - "epoch": 19.564537157757496, - "grad_norm": 1.4823371171951294, - "learning_rate": 6.281206030150754e-05, - "loss": 5.272, - "step": 37515 - }, - { - "epoch": 19.565058670143415, - "grad_norm": 1.4940375089645386, - "learning_rate": 6.281105527638192e-05, - "loss": 5.3478, - "step": 37516 - }, - { - "epoch": 19.565580182529335, - "grad_norm": 1.4621760845184326, - "learning_rate": 6.281005025125628e-05, - "loss": 5.3436, - "step": 37517 - }, - { - "epoch": 19.566101694915254, - "grad_norm": 1.6056277751922607, - "learning_rate": 6.280904522613066e-05, - "loss": 5.2662, - "step": 37518 - }, - { - "epoch": 19.566623207301173, - "grad_norm": 1.461944818496704, - "learning_rate": 6.280804020100503e-05, - "loss": 5.169, - "step": 37519 - }, - { - "epoch": 19.567144719687093, - "grad_norm": 1.5362483263015747, - "learning_rate": 6.28070351758794e-05, - "loss": 4.8722, - "step": 37520 - }, - { - "epoch": 19.567666232073012, - "grad_norm": 1.6124696731567383, - "learning_rate": 6.280603015075377e-05, - "loss": 4.8766, - "step": 37521 - }, - { - "epoch": 19.568187744458932, - "grad_norm": 1.556427240371704, - "learning_rate": 6.280502512562815e-05, - "loss": 4.9939, - "step": 37522 - }, - { - "epoch": 19.56870925684485, - "grad_norm": 1.5003563165664673, - "learning_rate": 6.280402010050251e-05, - "loss": 5.2814, - "step": 37523 - }, - { - "epoch": 19.56923076923077, - "grad_norm": 1.5423680543899536, - "learning_rate": 6.280301507537689e-05, - "loss": 5.3686, - "step": 37524 - }, - { - "epoch": 19.569752281616687, - "grad_norm": 1.6424102783203125, - "learning_rate": 6.280201005025125e-05, - "loss": 5.544, - "step": 37525 - }, - { - "epoch": 19.570273794002606, - "grad_norm": 1.7642232179641724, - "learning_rate": 6.280100502512563e-05, - "loss": 4.9591, - "step": 37526 - }, - { - "epoch": 19.570795306388526, - "grad_norm": 1.4571833610534668, - "learning_rate": 6.280000000000001e-05, - "loss": 5.2986, - "step": 37527 - }, - { - "epoch": 19.571316818774445, - "grad_norm": 1.4873907566070557, - "learning_rate": 6.279899497487437e-05, - "loss": 5.4254, - "step": 37528 - }, - { - "epoch": 19.571838331160365, - "grad_norm": 1.4587247371673584, - "learning_rate": 6.279798994974875e-05, - "loss": 5.5735, - "step": 37529 - }, - { - "epoch": 19.572359843546284, - "grad_norm": 1.6283143758773804, - "learning_rate": 6.279698492462311e-05, - "loss": 5.089, - "step": 37530 - }, - { - "epoch": 19.572881355932203, - "grad_norm": 1.646662712097168, - "learning_rate": 6.279597989949749e-05, - "loss": 4.7903, - "step": 37531 - }, - { - "epoch": 19.573402868318123, - "grad_norm": 1.4829233884811401, - "learning_rate": 6.279497487437186e-05, - "loss": 5.5188, - "step": 37532 - }, - { - "epoch": 19.573924380704042, - "grad_norm": 1.62949538230896, - "learning_rate": 6.279396984924623e-05, - "loss": 4.7079, - "step": 37533 - }, - { - "epoch": 19.574445893089962, - "grad_norm": 1.532666802406311, - "learning_rate": 6.27929648241206e-05, - "loss": 5.2303, - "step": 37534 - }, - { - "epoch": 19.57496740547588, - "grad_norm": 1.6331340074539185, - "learning_rate": 6.279195979899498e-05, - "loss": 5.6569, - "step": 37535 - }, - { - "epoch": 19.5754889178618, - "grad_norm": 1.6694132089614868, - "learning_rate": 6.279095477386935e-05, - "loss": 5.2909, - "step": 37536 - }, - { - "epoch": 19.576010430247717, - "grad_norm": 1.6019362211227417, - "learning_rate": 6.278994974874373e-05, - "loss": 5.4318, - "step": 37537 - }, - { - "epoch": 19.576531942633636, - "grad_norm": 1.5223824977874756, - "learning_rate": 6.27889447236181e-05, - "loss": 4.7419, - "step": 37538 - }, - { - "epoch": 19.577053455019556, - "grad_norm": 1.4767334461212158, - "learning_rate": 6.278793969849247e-05, - "loss": 5.3535, - "step": 37539 - }, - { - "epoch": 19.577574967405475, - "grad_norm": 1.5782471895217896, - "learning_rate": 6.278693467336684e-05, - "loss": 5.1381, - "step": 37540 - }, - { - "epoch": 19.578096479791395, - "grad_norm": 1.5867197513580322, - "learning_rate": 6.27859296482412e-05, - "loss": 5.2569, - "step": 37541 - }, - { - "epoch": 19.578617992177314, - "grad_norm": 1.5299867391586304, - "learning_rate": 6.278492462311558e-05, - "loss": 5.0196, - "step": 37542 - }, - { - "epoch": 19.579139504563233, - "grad_norm": 1.504335880279541, - "learning_rate": 6.278391959798995e-05, - "loss": 5.0948, - "step": 37543 - }, - { - "epoch": 19.579661016949153, - "grad_norm": 1.4975916147232056, - "learning_rate": 6.278291457286432e-05, - "loss": 5.6012, - "step": 37544 - }, - { - "epoch": 19.580182529335072, - "grad_norm": 1.6199015378952026, - "learning_rate": 6.278190954773869e-05, - "loss": 5.6067, - "step": 37545 - }, - { - "epoch": 19.580704041720992, - "grad_norm": 1.5995440483093262, - "learning_rate": 6.278090452261307e-05, - "loss": 5.0517, - "step": 37546 - }, - { - "epoch": 19.58122555410691, - "grad_norm": 1.5336226224899292, - "learning_rate": 6.277989949748744e-05, - "loss": 5.2133, - "step": 37547 - }, - { - "epoch": 19.58174706649283, - "grad_norm": 1.4558686017990112, - "learning_rate": 6.277889447236182e-05, - "loss": 5.4197, - "step": 37548 - }, - { - "epoch": 19.582268578878747, - "grad_norm": 1.5445451736450195, - "learning_rate": 6.277788944723619e-05, - "loss": 5.2175, - "step": 37549 - }, - { - "epoch": 19.582790091264666, - "grad_norm": 1.478776454925537, - "learning_rate": 6.277688442211056e-05, - "loss": 5.3742, - "step": 37550 - }, - { - "epoch": 19.583311603650586, - "grad_norm": 1.583756446838379, - "learning_rate": 6.277587939698493e-05, - "loss": 5.229, - "step": 37551 - }, - { - "epoch": 19.583833116036505, - "grad_norm": 1.6518765687942505, - "learning_rate": 6.27748743718593e-05, - "loss": 4.6818, - "step": 37552 - }, - { - "epoch": 19.584354628422425, - "grad_norm": 1.4976038932800293, - "learning_rate": 6.277386934673367e-05, - "loss": 5.4114, - "step": 37553 - }, - { - "epoch": 19.584876140808344, - "grad_norm": 1.3866770267486572, - "learning_rate": 6.277286432160803e-05, - "loss": 5.405, - "step": 37554 - }, - { - "epoch": 19.585397653194264, - "grad_norm": 1.5570836067199707, - "learning_rate": 6.277185929648241e-05, - "loss": 5.3777, - "step": 37555 - }, - { - "epoch": 19.585919165580183, - "grad_norm": 1.51217782497406, - "learning_rate": 6.277085427135679e-05, - "loss": 5.2728, - "step": 37556 - }, - { - "epoch": 19.586440677966102, - "grad_norm": 1.4295389652252197, - "learning_rate": 6.276984924623117e-05, - "loss": 5.6566, - "step": 37557 - }, - { - "epoch": 19.586962190352022, - "grad_norm": 1.582895040512085, - "learning_rate": 6.276884422110553e-05, - "loss": 5.2283, - "step": 37558 - }, - { - "epoch": 19.58748370273794, - "grad_norm": 1.5975871086120605, - "learning_rate": 6.276783919597991e-05, - "loss": 4.8399, - "step": 37559 - }, - { - "epoch": 19.58800521512386, - "grad_norm": 1.5466372966766357, - "learning_rate": 6.276683417085427e-05, - "loss": 5.2187, - "step": 37560 - }, - { - "epoch": 19.588526727509777, - "grad_norm": 1.6074717044830322, - "learning_rate": 6.276582914572865e-05, - "loss": 5.2391, - "step": 37561 - }, - { - "epoch": 19.589048239895696, - "grad_norm": 1.4472650289535522, - "learning_rate": 6.276482412060302e-05, - "loss": 5.3369, - "step": 37562 - }, - { - "epoch": 19.589569752281616, - "grad_norm": 1.5407295227050781, - "learning_rate": 6.27638190954774e-05, - "loss": 5.4798, - "step": 37563 - }, - { - "epoch": 19.590091264667535, - "grad_norm": 1.4994728565216064, - "learning_rate": 6.276281407035176e-05, - "loss": 5.3987, - "step": 37564 - }, - { - "epoch": 19.590612777053455, - "grad_norm": 1.391235589981079, - "learning_rate": 6.276180904522614e-05, - "loss": 4.9206, - "step": 37565 - }, - { - "epoch": 19.591134289439374, - "grad_norm": 1.470260500907898, - "learning_rate": 6.27608040201005e-05, - "loss": 5.3633, - "step": 37566 - }, - { - "epoch": 19.591655801825294, - "grad_norm": 1.4692065715789795, - "learning_rate": 6.275979899497488e-05, - "loss": 5.3129, - "step": 37567 - }, - { - "epoch": 19.592177314211213, - "grad_norm": 1.5469679832458496, - "learning_rate": 6.275879396984926e-05, - "loss": 4.7844, - "step": 37568 - }, - { - "epoch": 19.592698826597132, - "grad_norm": 1.6025574207305908, - "learning_rate": 6.275778894472362e-05, - "loss": 5.0649, - "step": 37569 - }, - { - "epoch": 19.593220338983052, - "grad_norm": 1.45127272605896, - "learning_rate": 6.2756783919598e-05, - "loss": 5.2027, - "step": 37570 - }, - { - "epoch": 19.59374185136897, - "grad_norm": 1.496595025062561, - "learning_rate": 6.275577889447236e-05, - "loss": 5.1162, - "step": 37571 - }, - { - "epoch": 19.59426336375489, - "grad_norm": 1.502832293510437, - "learning_rate": 6.275477386934674e-05, - "loss": 5.6622, - "step": 37572 - }, - { - "epoch": 19.594784876140807, - "grad_norm": 1.4173272848129272, - "learning_rate": 6.27537688442211e-05, - "loss": 5.5247, - "step": 37573 - }, - { - "epoch": 19.595306388526726, - "grad_norm": 1.5187772512435913, - "learning_rate": 6.275276381909548e-05, - "loss": 5.591, - "step": 37574 - }, - { - "epoch": 19.595827900912646, - "grad_norm": 1.5332928895950317, - "learning_rate": 6.275175879396985e-05, - "loss": 5.3176, - "step": 37575 - }, - { - "epoch": 19.596349413298565, - "grad_norm": 1.6054776906967163, - "learning_rate": 6.275075376884422e-05, - "loss": 5.5292, - "step": 37576 - }, - { - "epoch": 19.596870925684485, - "grad_norm": 1.5825316905975342, - "learning_rate": 6.274974874371859e-05, - "loss": 4.8125, - "step": 37577 - }, - { - "epoch": 19.597392438070404, - "grad_norm": 1.494697093963623, - "learning_rate": 6.274874371859297e-05, - "loss": 5.2449, - "step": 37578 - }, - { - "epoch": 19.597913950456324, - "grad_norm": 1.5960270166397095, - "learning_rate": 6.274773869346734e-05, - "loss": 5.0087, - "step": 37579 - }, - { - "epoch": 19.598435462842243, - "grad_norm": 1.541136622428894, - "learning_rate": 6.274673366834171e-05, - "loss": 5.2362, - "step": 37580 - }, - { - "epoch": 19.598956975228162, - "grad_norm": 1.3700608015060425, - "learning_rate": 6.274572864321609e-05, - "loss": 5.8244, - "step": 37581 - }, - { - "epoch": 19.599478487614082, - "grad_norm": 1.4307323694229126, - "learning_rate": 6.274472361809045e-05, - "loss": 5.0418, - "step": 37582 - }, - { - "epoch": 19.6, - "grad_norm": 1.4940412044525146, - "learning_rate": 6.274371859296483e-05, - "loss": 5.4635, - "step": 37583 - }, - { - "epoch": 19.60052151238592, - "grad_norm": 1.5038869380950928, - "learning_rate": 6.274271356783919e-05, - "loss": 5.2173, - "step": 37584 - }, - { - "epoch": 19.601043024771837, - "grad_norm": 1.6027987003326416, - "learning_rate": 6.274170854271357e-05, - "loss": 4.8352, - "step": 37585 - }, - { - "epoch": 19.601564537157756, - "grad_norm": 1.4765115976333618, - "learning_rate": 6.274070351758793e-05, - "loss": 5.3093, - "step": 37586 - }, - { - "epoch": 19.602086049543676, - "grad_norm": 1.5005013942718506, - "learning_rate": 6.273969849246231e-05, - "loss": 5.242, - "step": 37587 - }, - { - "epoch": 19.602607561929595, - "grad_norm": 1.4754964113235474, - "learning_rate": 6.273869346733669e-05, - "loss": 5.0594, - "step": 37588 - }, - { - "epoch": 19.603129074315515, - "grad_norm": 1.446650743484497, - "learning_rate": 6.273768844221107e-05, - "loss": 5.1257, - "step": 37589 - }, - { - "epoch": 19.603650586701434, - "grad_norm": 1.4760850667953491, - "learning_rate": 6.273668341708543e-05, - "loss": 5.1751, - "step": 37590 - }, - { - "epoch": 19.604172099087354, - "grad_norm": 1.550814151763916, - "learning_rate": 6.273567839195981e-05, - "loss": 5.6976, - "step": 37591 - }, - { - "epoch": 19.604693611473273, - "grad_norm": 1.4828503131866455, - "learning_rate": 6.273467336683417e-05, - "loss": 5.7513, - "step": 37592 - }, - { - "epoch": 19.605215123859193, - "grad_norm": 1.5224705934524536, - "learning_rate": 6.273366834170854e-05, - "loss": 5.3214, - "step": 37593 - }, - { - "epoch": 19.605736636245112, - "grad_norm": 1.4208425283432007, - "learning_rate": 6.273266331658292e-05, - "loss": 5.3941, - "step": 37594 - }, - { - "epoch": 19.60625814863103, - "grad_norm": 1.626675009727478, - "learning_rate": 6.273165829145728e-05, - "loss": 5.0443, - "step": 37595 - }, - { - "epoch": 19.60677966101695, - "grad_norm": 1.5927811861038208, - "learning_rate": 6.273065326633166e-05, - "loss": 4.8796, - "step": 37596 - }, - { - "epoch": 19.607301173402867, - "grad_norm": 1.4182718992233276, - "learning_rate": 6.272964824120602e-05, - "loss": 5.5953, - "step": 37597 - }, - { - "epoch": 19.607822685788786, - "grad_norm": 1.3938947916030884, - "learning_rate": 6.27286432160804e-05, - "loss": 5.7721, - "step": 37598 - }, - { - "epoch": 19.608344198174706, - "grad_norm": 1.5096721649169922, - "learning_rate": 6.272763819095478e-05, - "loss": 5.3634, - "step": 37599 - }, - { - "epoch": 19.608865710560625, - "grad_norm": 1.4636753797531128, - "learning_rate": 6.272663316582916e-05, - "loss": 5.6679, - "step": 37600 - }, - { - "epoch": 19.609387222946545, - "grad_norm": 1.5411666631698608, - "learning_rate": 6.272562814070352e-05, - "loss": 5.3685, - "step": 37601 - }, - { - "epoch": 19.609908735332464, - "grad_norm": 1.5273106098175049, - "learning_rate": 6.27246231155779e-05, - "loss": 5.4342, - "step": 37602 - }, - { - "epoch": 19.610430247718384, - "grad_norm": 1.51449716091156, - "learning_rate": 6.272361809045226e-05, - "loss": 5.3833, - "step": 37603 - }, - { - "epoch": 19.610951760104303, - "grad_norm": 1.3736467361450195, - "learning_rate": 6.272261306532664e-05, - "loss": 5.3503, - "step": 37604 - }, - { - "epoch": 19.611473272490223, - "grad_norm": 1.4307125806808472, - "learning_rate": 6.2721608040201e-05, - "loss": 5.684, - "step": 37605 - }, - { - "epoch": 19.611994784876142, - "grad_norm": 1.5913656949996948, - "learning_rate": 6.272060301507538e-05, - "loss": 5.2938, - "step": 37606 - }, - { - "epoch": 19.61251629726206, - "grad_norm": 1.4736003875732422, - "learning_rate": 6.271959798994975e-05, - "loss": 5.2257, - "step": 37607 - }, - { - "epoch": 19.613037809647977, - "grad_norm": 1.5495914220809937, - "learning_rate": 6.271859296482412e-05, - "loss": 5.067, - "step": 37608 - }, - { - "epoch": 19.613559322033897, - "grad_norm": 1.458991289138794, - "learning_rate": 6.27175879396985e-05, - "loss": 5.6035, - "step": 37609 - }, - { - "epoch": 19.614080834419816, - "grad_norm": 1.5523844957351685, - "learning_rate": 6.271658291457287e-05, - "loss": 5.2848, - "step": 37610 - }, - { - "epoch": 19.614602346805736, - "grad_norm": 1.5820930004119873, - "learning_rate": 6.271557788944724e-05, - "loss": 5.1735, - "step": 37611 - }, - { - "epoch": 19.615123859191655, - "grad_norm": 1.5134700536727905, - "learning_rate": 6.271457286432161e-05, - "loss": 4.6003, - "step": 37612 - }, - { - "epoch": 19.615645371577575, - "grad_norm": 1.4905681610107422, - "learning_rate": 6.271356783919599e-05, - "loss": 5.4521, - "step": 37613 - }, - { - "epoch": 19.616166883963494, - "grad_norm": 1.4900822639465332, - "learning_rate": 6.271256281407035e-05, - "loss": 5.4371, - "step": 37614 - }, - { - "epoch": 19.616688396349414, - "grad_norm": 1.4078669548034668, - "learning_rate": 6.271155778894473e-05, - "loss": 4.8921, - "step": 37615 - }, - { - "epoch": 19.617209908735333, - "grad_norm": 1.565551519393921, - "learning_rate": 6.271055276381909e-05, - "loss": 5.3998, - "step": 37616 - }, - { - "epoch": 19.617731421121253, - "grad_norm": 1.4072154760360718, - "learning_rate": 6.270954773869347e-05, - "loss": 5.1363, - "step": 37617 - }, - { - "epoch": 19.618252933507172, - "grad_norm": 1.4375736713409424, - "learning_rate": 6.270854271356784e-05, - "loss": 4.5125, - "step": 37618 - }, - { - "epoch": 19.61877444589309, - "grad_norm": 1.4776878356933594, - "learning_rate": 6.270753768844221e-05, - "loss": 5.2903, - "step": 37619 - }, - { - "epoch": 19.619295958279007, - "grad_norm": 1.435483694076538, - "learning_rate": 6.270653266331659e-05, - "loss": 5.2808, - "step": 37620 - }, - { - "epoch": 19.619817470664927, - "grad_norm": 1.6224850416183472, - "learning_rate": 6.270552763819096e-05, - "loss": 5.5047, - "step": 37621 - }, - { - "epoch": 19.620338983050846, - "grad_norm": 1.7488044500350952, - "learning_rate": 6.270452261306533e-05, - "loss": 4.3046, - "step": 37622 - }, - { - "epoch": 19.620860495436766, - "grad_norm": 1.4290629625320435, - "learning_rate": 6.27035175879397e-05, - "loss": 5.5056, - "step": 37623 - }, - { - "epoch": 19.621382007822685, - "grad_norm": 1.4691249132156372, - "learning_rate": 6.270251256281408e-05, - "loss": 5.3162, - "step": 37624 - }, - { - "epoch": 19.621903520208605, - "grad_norm": 1.4943100214004517, - "learning_rate": 6.270150753768844e-05, - "loss": 5.4307, - "step": 37625 - }, - { - "epoch": 19.622425032594524, - "grad_norm": 1.5160104036331177, - "learning_rate": 6.270050251256282e-05, - "loss": 5.2294, - "step": 37626 - }, - { - "epoch": 19.622946544980444, - "grad_norm": 1.5675759315490723, - "learning_rate": 6.269949748743718e-05, - "loss": 5.1675, - "step": 37627 - }, - { - "epoch": 19.623468057366363, - "grad_norm": 1.4964457750320435, - "learning_rate": 6.269849246231156e-05, - "loss": 5.3027, - "step": 37628 - }, - { - "epoch": 19.623989569752283, - "grad_norm": 1.741913080215454, - "learning_rate": 6.269748743718594e-05, - "loss": 4.5475, - "step": 37629 - }, - { - "epoch": 19.624511082138202, - "grad_norm": 1.4432686567306519, - "learning_rate": 6.269648241206032e-05, - "loss": 5.6251, - "step": 37630 - }, - { - "epoch": 19.62503259452412, - "grad_norm": 1.5002470016479492, - "learning_rate": 6.269547738693468e-05, - "loss": 5.543, - "step": 37631 - }, - { - "epoch": 19.625554106910037, - "grad_norm": 1.4617347717285156, - "learning_rate": 6.269447236180906e-05, - "loss": 4.9584, - "step": 37632 - }, - { - "epoch": 19.626075619295957, - "grad_norm": 1.554542899131775, - "learning_rate": 6.269346733668342e-05, - "loss": 5.8027, - "step": 37633 - }, - { - "epoch": 19.626597131681876, - "grad_norm": 1.450566291809082, - "learning_rate": 6.269246231155779e-05, - "loss": 5.5116, - "step": 37634 - }, - { - "epoch": 19.627118644067796, - "grad_norm": 1.5099658966064453, - "learning_rate": 6.269145728643216e-05, - "loss": 5.4224, - "step": 37635 - }, - { - "epoch": 19.627640156453715, - "grad_norm": 1.5328614711761475, - "learning_rate": 6.269045226130653e-05, - "loss": 5.506, - "step": 37636 - }, - { - "epoch": 19.628161668839635, - "grad_norm": 1.6219265460968018, - "learning_rate": 6.26894472361809e-05, - "loss": 4.9139, - "step": 37637 - }, - { - "epoch": 19.628683181225554, - "grad_norm": 1.6054140329360962, - "learning_rate": 6.268844221105527e-05, - "loss": 5.379, - "step": 37638 - }, - { - "epoch": 19.629204693611474, - "grad_norm": 1.5029445886611938, - "learning_rate": 6.268743718592965e-05, - "loss": 5.3809, - "step": 37639 - }, - { - "epoch": 19.629726205997393, - "grad_norm": 1.4691535234451294, - "learning_rate": 6.268643216080403e-05, - "loss": 5.4861, - "step": 37640 - }, - { - "epoch": 19.630247718383313, - "grad_norm": 1.445295810699463, - "learning_rate": 6.26854271356784e-05, - "loss": 5.5806, - "step": 37641 - }, - { - "epoch": 19.630769230769232, - "grad_norm": 1.4059147834777832, - "learning_rate": 6.268442211055277e-05, - "loss": 5.6724, - "step": 37642 - }, - { - "epoch": 19.63129074315515, - "grad_norm": 1.4308180809020996, - "learning_rate": 6.268341708542715e-05, - "loss": 5.4149, - "step": 37643 - }, - { - "epoch": 19.631812255541067, - "grad_norm": 1.5484178066253662, - "learning_rate": 6.268241206030151e-05, - "loss": 5.064, - "step": 37644 - }, - { - "epoch": 19.632333767926987, - "grad_norm": 1.4323620796203613, - "learning_rate": 6.268140703517589e-05, - "loss": 5.5499, - "step": 37645 - }, - { - "epoch": 19.632855280312906, - "grad_norm": 1.6358072757720947, - "learning_rate": 6.268040201005025e-05, - "loss": 5.0233, - "step": 37646 - }, - { - "epoch": 19.633376792698826, - "grad_norm": 1.4526996612548828, - "learning_rate": 6.267939698492462e-05, - "loss": 5.3598, - "step": 37647 - }, - { - "epoch": 19.633898305084745, - "grad_norm": 1.4445244073867798, - "learning_rate": 6.2678391959799e-05, - "loss": 5.3885, - "step": 37648 - }, - { - "epoch": 19.634419817470665, - "grad_norm": 1.5908727645874023, - "learning_rate": 6.267738693467337e-05, - "loss": 5.194, - "step": 37649 - }, - { - "epoch": 19.634941329856584, - "grad_norm": 1.6128159761428833, - "learning_rate": 6.267638190954775e-05, - "loss": 5.2101, - "step": 37650 - }, - { - "epoch": 19.635462842242504, - "grad_norm": 1.5362493991851807, - "learning_rate": 6.267537688442211e-05, - "loss": 5.4873, - "step": 37651 - }, - { - "epoch": 19.635984354628423, - "grad_norm": 1.5178464651107788, - "learning_rate": 6.267437185929649e-05, - "loss": 5.3452, - "step": 37652 - }, - { - "epoch": 19.636505867014343, - "grad_norm": 1.576580286026001, - "learning_rate": 6.267336683417086e-05, - "loss": 5.2976, - "step": 37653 - }, - { - "epoch": 19.637027379400262, - "grad_norm": 1.43178129196167, - "learning_rate": 6.267236180904523e-05, - "loss": 5.7311, - "step": 37654 - }, - { - "epoch": 19.63754889178618, - "grad_norm": 1.5006719827651978, - "learning_rate": 6.26713567839196e-05, - "loss": 5.6302, - "step": 37655 - }, - { - "epoch": 19.638070404172097, - "grad_norm": 1.5514339208602905, - "learning_rate": 6.267035175879398e-05, - "loss": 5.3075, - "step": 37656 - }, - { - "epoch": 19.638591916558017, - "grad_norm": 1.5391095876693726, - "learning_rate": 6.266934673366834e-05, - "loss": 4.9979, - "step": 37657 - }, - { - "epoch": 19.639113428943936, - "grad_norm": 1.56154465675354, - "learning_rate": 6.266834170854272e-05, - "loss": 4.8533, - "step": 37658 - }, - { - "epoch": 19.639634941329856, - "grad_norm": 1.4972847700119019, - "learning_rate": 6.266733668341708e-05, - "loss": 5.3583, - "step": 37659 - }, - { - "epoch": 19.640156453715775, - "grad_norm": 1.49470055103302, - "learning_rate": 6.266633165829146e-05, - "loss": 5.2732, - "step": 37660 - }, - { - "epoch": 19.640677966101695, - "grad_norm": 1.4360369443893433, - "learning_rate": 6.266532663316584e-05, - "loss": 5.5246, - "step": 37661 - }, - { - "epoch": 19.641199478487614, - "grad_norm": 1.4864399433135986, - "learning_rate": 6.26643216080402e-05, - "loss": 5.6585, - "step": 37662 - }, - { - "epoch": 19.641720990873534, - "grad_norm": 1.4260280132293701, - "learning_rate": 6.266331658291458e-05, - "loss": 5.4164, - "step": 37663 - }, - { - "epoch": 19.642242503259453, - "grad_norm": 1.7840913534164429, - "learning_rate": 6.266231155778894e-05, - "loss": 4.2047, - "step": 37664 - }, - { - "epoch": 19.642764015645373, - "grad_norm": 1.465456247329712, - "learning_rate": 6.266130653266332e-05, - "loss": 5.6235, - "step": 37665 - }, - { - "epoch": 19.643285528031292, - "grad_norm": 1.6156920194625854, - "learning_rate": 6.266030150753769e-05, - "loss": 5.1372, - "step": 37666 - }, - { - "epoch": 19.64380704041721, - "grad_norm": 1.5833464860916138, - "learning_rate": 6.265929648241206e-05, - "loss": 5.6132, - "step": 37667 - }, - { - "epoch": 19.644328552803128, - "grad_norm": 1.4955601692199707, - "learning_rate": 6.265829145728643e-05, - "loss": 5.5649, - "step": 37668 - }, - { - "epoch": 19.644850065189047, - "grad_norm": 1.4541289806365967, - "learning_rate": 6.26572864321608e-05, - "loss": 5.4881, - "step": 37669 - }, - { - "epoch": 19.645371577574966, - "grad_norm": 1.608767032623291, - "learning_rate": 6.265628140703518e-05, - "loss": 5.198, - "step": 37670 - }, - { - "epoch": 19.645893089960886, - "grad_norm": 1.4522544145584106, - "learning_rate": 6.265527638190956e-05, - "loss": 5.331, - "step": 37671 - }, - { - "epoch": 19.646414602346805, - "grad_norm": 1.4974626302719116, - "learning_rate": 6.265427135678393e-05, - "loss": 5.3959, - "step": 37672 - }, - { - "epoch": 19.646936114732725, - "grad_norm": 1.5265084505081177, - "learning_rate": 6.265326633165829e-05, - "loss": 5.1391, - "step": 37673 - }, - { - "epoch": 19.647457627118644, - "grad_norm": 1.4977551698684692, - "learning_rate": 6.265226130653267e-05, - "loss": 5.144, - "step": 37674 - }, - { - "epoch": 19.647979139504564, - "grad_norm": 1.4018847942352295, - "learning_rate": 6.265125628140703e-05, - "loss": 5.8881, - "step": 37675 - }, - { - "epoch": 19.648500651890483, - "grad_norm": 1.5085691213607788, - "learning_rate": 6.265025125628141e-05, - "loss": 5.6515, - "step": 37676 - }, - { - "epoch": 19.649022164276403, - "grad_norm": 1.5129985809326172, - "learning_rate": 6.264924623115577e-05, - "loss": 4.6483, - "step": 37677 - }, - { - "epoch": 19.649543676662322, - "grad_norm": 1.505188226699829, - "learning_rate": 6.264824120603015e-05, - "loss": 5.5479, - "step": 37678 - }, - { - "epoch": 19.65006518904824, - "grad_norm": 1.464761734008789, - "learning_rate": 6.264723618090452e-05, - "loss": 5.7337, - "step": 37679 - }, - { - "epoch": 19.650586701434158, - "grad_norm": 1.4081918001174927, - "learning_rate": 6.26462311557789e-05, - "loss": 5.3074, - "step": 37680 - }, - { - "epoch": 19.651108213820077, - "grad_norm": 1.5023560523986816, - "learning_rate": 6.264522613065327e-05, - "loss": 5.535, - "step": 37681 - }, - { - "epoch": 19.651629726205996, - "grad_norm": 1.3931162357330322, - "learning_rate": 6.264422110552765e-05, - "loss": 5.3269, - "step": 37682 - }, - { - "epoch": 19.652151238591916, - "grad_norm": 1.5462297201156616, - "learning_rate": 6.264321608040201e-05, - "loss": 4.4096, - "step": 37683 - }, - { - "epoch": 19.652672750977835, - "grad_norm": 1.5756338834762573, - "learning_rate": 6.264221105527639e-05, - "loss": 5.2541, - "step": 37684 - }, - { - "epoch": 19.653194263363755, - "grad_norm": 1.5468881130218506, - "learning_rate": 6.264120603015076e-05, - "loss": 5.296, - "step": 37685 - }, - { - "epoch": 19.653715775749674, - "grad_norm": 1.64215886592865, - "learning_rate": 6.264020100502512e-05, - "loss": 5.0474, - "step": 37686 - }, - { - "epoch": 19.654237288135594, - "grad_norm": 1.5330426692962646, - "learning_rate": 6.26391959798995e-05, - "loss": 4.6743, - "step": 37687 - }, - { - "epoch": 19.654758800521513, - "grad_norm": 1.566285490989685, - "learning_rate": 6.263819095477386e-05, - "loss": 5.219, - "step": 37688 - }, - { - "epoch": 19.655280312907433, - "grad_norm": 1.596718668937683, - "learning_rate": 6.263718592964824e-05, - "loss": 5.4018, - "step": 37689 - }, - { - "epoch": 19.655801825293352, - "grad_norm": 1.639778733253479, - "learning_rate": 6.263618090452262e-05, - "loss": 5.1801, - "step": 37690 - }, - { - "epoch": 19.656323337679268, - "grad_norm": 1.445666790008545, - "learning_rate": 6.2635175879397e-05, - "loss": 4.8818, - "step": 37691 - }, - { - "epoch": 19.656844850065188, - "grad_norm": 1.5076650381088257, - "learning_rate": 6.263417085427136e-05, - "loss": 5.271, - "step": 37692 - }, - { - "epoch": 19.657366362451107, - "grad_norm": 1.4407343864440918, - "learning_rate": 6.263316582914574e-05, - "loss": 5.4046, - "step": 37693 - }, - { - "epoch": 19.657887874837026, - "grad_norm": 1.5431572198867798, - "learning_rate": 6.26321608040201e-05, - "loss": 5.5447, - "step": 37694 - }, - { - "epoch": 19.658409387222946, - "grad_norm": 1.6018019914627075, - "learning_rate": 6.263115577889448e-05, - "loss": 4.9305, - "step": 37695 - }, - { - "epoch": 19.658930899608865, - "grad_norm": 1.6334898471832275, - "learning_rate": 6.263015075376885e-05, - "loss": 5.2877, - "step": 37696 - }, - { - "epoch": 19.659452411994785, - "grad_norm": 1.4812756776809692, - "learning_rate": 6.262914572864322e-05, - "loss": 5.5454, - "step": 37697 - }, - { - "epoch": 19.659973924380704, - "grad_norm": 1.405326247215271, - "learning_rate": 6.262814070351759e-05, - "loss": 5.3841, - "step": 37698 - }, - { - "epoch": 19.660495436766624, - "grad_norm": 1.4963828325271606, - "learning_rate": 6.262713567839197e-05, - "loss": 5.1248, - "step": 37699 - }, - { - "epoch": 19.661016949152543, - "grad_norm": 1.4346935749053955, - "learning_rate": 6.262613065326633e-05, - "loss": 4.7042, - "step": 37700 - }, - { - "epoch": 19.661538461538463, - "grad_norm": 1.4753185510635376, - "learning_rate": 6.262512562814071e-05, - "loss": 5.2905, - "step": 37701 - }, - { - "epoch": 19.662059973924382, - "grad_norm": 1.4415271282196045, - "learning_rate": 6.262412060301508e-05, - "loss": 5.4079, - "step": 37702 - }, - { - "epoch": 19.6625814863103, - "grad_norm": 1.4726511240005493, - "learning_rate": 6.262311557788945e-05, - "loss": 4.812, - "step": 37703 - }, - { - "epoch": 19.663102998696218, - "grad_norm": 1.477829933166504, - "learning_rate": 6.262211055276383e-05, - "loss": 5.453, - "step": 37704 - }, - { - "epoch": 19.663624511082137, - "grad_norm": 1.5461270809173584, - "learning_rate": 6.262110552763819e-05, - "loss": 4.8501, - "step": 37705 - }, - { - "epoch": 19.664146023468057, - "grad_norm": 1.5563873052597046, - "learning_rate": 6.262010050251257e-05, - "loss": 5.3168, - "step": 37706 - }, - { - "epoch": 19.664667535853976, - "grad_norm": 1.491679310798645, - "learning_rate": 6.261909547738693e-05, - "loss": 5.3774, - "step": 37707 - }, - { - "epoch": 19.665189048239895, - "grad_norm": 1.5337982177734375, - "learning_rate": 6.261809045226131e-05, - "loss": 5.4782, - "step": 37708 - }, - { - "epoch": 19.665710560625815, - "grad_norm": 1.9858604669570923, - "learning_rate": 6.261708542713568e-05, - "loss": 4.8895, - "step": 37709 - }, - { - "epoch": 19.666232073011734, - "grad_norm": 1.5323824882507324, - "learning_rate": 6.261608040201005e-05, - "loss": 5.0373, - "step": 37710 - }, - { - "epoch": 19.666753585397654, - "grad_norm": 1.5937687158584595, - "learning_rate": 6.261507537688443e-05, - "loss": 5.2399, - "step": 37711 - }, - { - "epoch": 19.667275097783573, - "grad_norm": 1.4722965955734253, - "learning_rate": 6.261407035175881e-05, - "loss": 5.4287, - "step": 37712 - }, - { - "epoch": 19.667796610169493, - "grad_norm": 1.477921962738037, - "learning_rate": 6.261306532663317e-05, - "loss": 5.3096, - "step": 37713 - }, - { - "epoch": 19.668318122555412, - "grad_norm": 1.3599882125854492, - "learning_rate": 6.261206030150754e-05, - "loss": 4.4335, - "step": 37714 - }, - { - "epoch": 19.668839634941328, - "grad_norm": 1.5106711387634277, - "learning_rate": 6.261105527638192e-05, - "loss": 4.9404, - "step": 37715 - }, - { - "epoch": 19.669361147327248, - "grad_norm": 1.4957044124603271, - "learning_rate": 6.261005025125628e-05, - "loss": 5.1446, - "step": 37716 - }, - { - "epoch": 19.669882659713167, - "grad_norm": 1.4679063558578491, - "learning_rate": 6.260904522613066e-05, - "loss": 5.3381, - "step": 37717 - }, - { - "epoch": 19.670404172099087, - "grad_norm": 1.5154857635498047, - "learning_rate": 6.260804020100502e-05, - "loss": 5.5359, - "step": 37718 - }, - { - "epoch": 19.670925684485006, - "grad_norm": 1.5127296447753906, - "learning_rate": 6.26070351758794e-05, - "loss": 5.4362, - "step": 37719 - }, - { - "epoch": 19.671447196870925, - "grad_norm": 1.628928542137146, - "learning_rate": 6.260603015075376e-05, - "loss": 5.218, - "step": 37720 - }, - { - "epoch": 19.671968709256845, - "grad_norm": 1.502188801765442, - "learning_rate": 6.260502512562814e-05, - "loss": 5.5269, - "step": 37721 - }, - { - "epoch": 19.672490221642764, - "grad_norm": 1.488709807395935, - "learning_rate": 6.260402010050252e-05, - "loss": 5.0923, - "step": 37722 - }, - { - "epoch": 19.673011734028684, - "grad_norm": 1.4561975002288818, - "learning_rate": 6.26030150753769e-05, - "loss": 5.7409, - "step": 37723 - }, - { - "epoch": 19.673533246414603, - "grad_norm": 1.5619852542877197, - "learning_rate": 6.260201005025126e-05, - "loss": 5.19, - "step": 37724 - }, - { - "epoch": 19.674054758800523, - "grad_norm": 1.457887887954712, - "learning_rate": 6.260100502512564e-05, - "loss": 5.4114, - "step": 37725 - }, - { - "epoch": 19.674576271186442, - "grad_norm": 1.5466421842575073, - "learning_rate": 6.26e-05, - "loss": 5.1683, - "step": 37726 - }, - { - "epoch": 19.675097783572358, - "grad_norm": 1.5486266613006592, - "learning_rate": 6.259899497487437e-05, - "loss": 5.1546, - "step": 37727 - }, - { - "epoch": 19.675619295958278, - "grad_norm": 1.5473213195800781, - "learning_rate": 6.259798994974875e-05, - "loss": 5.3938, - "step": 37728 - }, - { - "epoch": 19.676140808344197, - "grad_norm": 1.4843559265136719, - "learning_rate": 6.259698492462311e-05, - "loss": 5.6119, - "step": 37729 - }, - { - "epoch": 19.676662320730117, - "grad_norm": 1.477612018585205, - "learning_rate": 6.259597989949749e-05, - "loss": 5.3071, - "step": 37730 - }, - { - "epoch": 19.677183833116036, - "grad_norm": 1.435394525527954, - "learning_rate": 6.259497487437185e-05, - "loss": 5.4872, - "step": 37731 - }, - { - "epoch": 19.677705345501955, - "grad_norm": 1.3838953971862793, - "learning_rate": 6.259396984924623e-05, - "loss": 5.7507, - "step": 37732 - }, - { - "epoch": 19.678226857887875, - "grad_norm": 1.4059388637542725, - "learning_rate": 6.259296482412061e-05, - "loss": 5.7324, - "step": 37733 - }, - { - "epoch": 19.678748370273794, - "grad_norm": 1.5324269533157349, - "learning_rate": 6.259195979899499e-05, - "loss": 5.0334, - "step": 37734 - }, - { - "epoch": 19.679269882659714, - "grad_norm": 1.525691270828247, - "learning_rate": 6.259095477386935e-05, - "loss": 5.0279, - "step": 37735 - }, - { - "epoch": 19.679791395045633, - "grad_norm": 1.5493007898330688, - "learning_rate": 6.258994974874373e-05, - "loss": 5.2455, - "step": 37736 - }, - { - "epoch": 19.680312907431553, - "grad_norm": 1.5372636318206787, - "learning_rate": 6.258894472361809e-05, - "loss": 5.0935, - "step": 37737 - }, - { - "epoch": 19.680834419817472, - "grad_norm": 1.441118597984314, - "learning_rate": 6.258793969849247e-05, - "loss": 5.3325, - "step": 37738 - }, - { - "epoch": 19.681355932203388, - "grad_norm": 1.5197871923446655, - "learning_rate": 6.258693467336683e-05, - "loss": 5.4174, - "step": 37739 - }, - { - "epoch": 19.681877444589308, - "grad_norm": 1.4089950323104858, - "learning_rate": 6.25859296482412e-05, - "loss": 5.5369, - "step": 37740 - }, - { - "epoch": 19.682398956975227, - "grad_norm": 1.5526202917099, - "learning_rate": 6.258492462311558e-05, - "loss": 5.123, - "step": 37741 - }, - { - "epoch": 19.682920469361147, - "grad_norm": 1.5623321533203125, - "learning_rate": 6.258391959798995e-05, - "loss": 4.9804, - "step": 37742 - }, - { - "epoch": 19.683441981747066, - "grad_norm": 1.5126683712005615, - "learning_rate": 6.258291457286433e-05, - "loss": 5.4399, - "step": 37743 - }, - { - "epoch": 19.683963494132986, - "grad_norm": 1.4831838607788086, - "learning_rate": 6.25819095477387e-05, - "loss": 4.9608, - "step": 37744 - }, - { - "epoch": 19.684485006518905, - "grad_norm": 1.5250674486160278, - "learning_rate": 6.258090452261307e-05, - "loss": 5.172, - "step": 37745 - }, - { - "epoch": 19.685006518904824, - "grad_norm": 1.4581689834594727, - "learning_rate": 6.257989949748744e-05, - "loss": 5.4039, - "step": 37746 - }, - { - "epoch": 19.685528031290744, - "grad_norm": 1.5732287168502808, - "learning_rate": 6.257889447236182e-05, - "loss": 4.6906, - "step": 37747 - }, - { - "epoch": 19.686049543676663, - "grad_norm": 1.5067365169525146, - "learning_rate": 6.257788944723618e-05, - "loss": 5.4114, - "step": 37748 - }, - { - "epoch": 19.686571056062583, - "grad_norm": 1.5502276420593262, - "learning_rate": 6.257688442211056e-05, - "loss": 5.0355, - "step": 37749 - }, - { - "epoch": 19.687092568448502, - "grad_norm": 1.5083999633789062, - "learning_rate": 6.257587939698492e-05, - "loss": 5.2252, - "step": 37750 - }, - { - "epoch": 19.687614080834418, - "grad_norm": 1.569830298423767, - "learning_rate": 6.25748743718593e-05, - "loss": 5.0373, - "step": 37751 - }, - { - "epoch": 19.688135593220338, - "grad_norm": 1.610499620437622, - "learning_rate": 6.257386934673366e-05, - "loss": 5.2319, - "step": 37752 - }, - { - "epoch": 19.688657105606257, - "grad_norm": 1.4957754611968994, - "learning_rate": 6.257286432160804e-05, - "loss": 5.494, - "step": 37753 - }, - { - "epoch": 19.689178617992177, - "grad_norm": 1.6002564430236816, - "learning_rate": 6.257185929648242e-05, - "loss": 5.113, - "step": 37754 - }, - { - "epoch": 19.689700130378096, - "grad_norm": 1.5098798274993896, - "learning_rate": 6.257085427135678e-05, - "loss": 5.4296, - "step": 37755 - }, - { - "epoch": 19.690221642764016, - "grad_norm": 1.6264398097991943, - "learning_rate": 6.256984924623116e-05, - "loss": 4.9672, - "step": 37756 - }, - { - "epoch": 19.690743155149935, - "grad_norm": 1.3986549377441406, - "learning_rate": 6.256884422110553e-05, - "loss": 5.2593, - "step": 37757 - }, - { - "epoch": 19.691264667535854, - "grad_norm": 1.440482497215271, - "learning_rate": 6.25678391959799e-05, - "loss": 5.5761, - "step": 37758 - }, - { - "epoch": 19.691786179921774, - "grad_norm": 1.5494471788406372, - "learning_rate": 6.256683417085427e-05, - "loss": 5.503, - "step": 37759 - }, - { - "epoch": 19.692307692307693, - "grad_norm": 1.50187349319458, - "learning_rate": 6.256582914572865e-05, - "loss": 4.5023, - "step": 37760 - }, - { - "epoch": 19.692829204693613, - "grad_norm": 1.5744003057479858, - "learning_rate": 6.256482412060301e-05, - "loss": 4.9213, - "step": 37761 - }, - { - "epoch": 19.693350717079532, - "grad_norm": 1.455190896987915, - "learning_rate": 6.256381909547739e-05, - "loss": 5.4741, - "step": 37762 - }, - { - "epoch": 19.69387222946545, - "grad_norm": 1.5965027809143066, - "learning_rate": 6.256281407035177e-05, - "loss": 5.3953, - "step": 37763 - }, - { - "epoch": 19.694393741851368, - "grad_norm": 1.544043779373169, - "learning_rate": 6.256180904522614e-05, - "loss": 4.8939, - "step": 37764 - }, - { - "epoch": 19.694915254237287, - "grad_norm": 1.5488483905792236, - "learning_rate": 6.256080402010051e-05, - "loss": 5.3882, - "step": 37765 - }, - { - "epoch": 19.695436766623207, - "grad_norm": 1.462303876876831, - "learning_rate": 6.255979899497487e-05, - "loss": 5.0861, - "step": 37766 - }, - { - "epoch": 19.695958279009126, - "grad_norm": 1.6059184074401855, - "learning_rate": 6.255879396984925e-05, - "loss": 5.2646, - "step": 37767 - }, - { - "epoch": 19.696479791395046, - "grad_norm": 1.4851332902908325, - "learning_rate": 6.255778894472362e-05, - "loss": 5.5366, - "step": 37768 - }, - { - "epoch": 19.697001303780965, - "grad_norm": 1.4932317733764648, - "learning_rate": 6.255678391959799e-05, - "loss": 5.1809, - "step": 37769 - }, - { - "epoch": 19.697522816166884, - "grad_norm": 1.507222294807434, - "learning_rate": 6.255577889447236e-05, - "loss": 5.0557, - "step": 37770 - }, - { - "epoch": 19.698044328552804, - "grad_norm": 1.4107666015625, - "learning_rate": 6.255477386934673e-05, - "loss": 5.1493, - "step": 37771 - }, - { - "epoch": 19.698565840938723, - "grad_norm": 1.613178014755249, - "learning_rate": 6.25537688442211e-05, - "loss": 5.396, - "step": 37772 - }, - { - "epoch": 19.699087353324643, - "grad_norm": 1.4749068021774292, - "learning_rate": 6.255276381909548e-05, - "loss": 5.6813, - "step": 37773 - }, - { - "epoch": 19.69960886571056, - "grad_norm": 1.693174123764038, - "learning_rate": 6.255175879396985e-05, - "loss": 5.1729, - "step": 37774 - }, - { - "epoch": 19.70013037809648, - "grad_norm": 1.438726782798767, - "learning_rate": 6.255075376884423e-05, - "loss": 5.7595, - "step": 37775 - }, - { - "epoch": 19.700651890482398, - "grad_norm": 1.439436435699463, - "learning_rate": 6.25497487437186e-05, - "loss": 4.9995, - "step": 37776 - }, - { - "epoch": 19.701173402868317, - "grad_norm": 1.388101577758789, - "learning_rate": 6.254874371859297e-05, - "loss": 5.3256, - "step": 37777 - }, - { - "epoch": 19.701694915254237, - "grad_norm": 1.556438684463501, - "learning_rate": 6.254773869346734e-05, - "loss": 5.2898, - "step": 37778 - }, - { - "epoch": 19.702216427640156, - "grad_norm": 1.6184853315353394, - "learning_rate": 6.25467336683417e-05, - "loss": 5.334, - "step": 37779 - }, - { - "epoch": 19.702737940026076, - "grad_norm": 1.504356026649475, - "learning_rate": 6.254572864321608e-05, - "loss": 5.2109, - "step": 37780 - }, - { - "epoch": 19.703259452411995, - "grad_norm": 1.4765268564224243, - "learning_rate": 6.254472361809045e-05, - "loss": 5.548, - "step": 37781 - }, - { - "epoch": 19.703780964797915, - "grad_norm": 1.479588508605957, - "learning_rate": 6.254371859296482e-05, - "loss": 5.3464, - "step": 37782 - }, - { - "epoch": 19.704302477183834, - "grad_norm": 1.4930155277252197, - "learning_rate": 6.25427135678392e-05, - "loss": 5.114, - "step": 37783 - }, - { - "epoch": 19.704823989569753, - "grad_norm": 1.5175000429153442, - "learning_rate": 6.254170854271358e-05, - "loss": 5.0785, - "step": 37784 - }, - { - "epoch": 19.705345501955673, - "grad_norm": 1.5469098091125488, - "learning_rate": 6.254070351758794e-05, - "loss": 5.4572, - "step": 37785 - }, - { - "epoch": 19.705867014341592, - "grad_norm": 1.6336795091629028, - "learning_rate": 6.253969849246232e-05, - "loss": 5.1819, - "step": 37786 - }, - { - "epoch": 19.70638852672751, - "grad_norm": 1.6442217826843262, - "learning_rate": 6.253869346733669e-05, - "loss": 4.8745, - "step": 37787 - }, - { - "epoch": 19.706910039113428, - "grad_norm": 1.5792070627212524, - "learning_rate": 6.253768844221106e-05, - "loss": 5.2774, - "step": 37788 - }, - { - "epoch": 19.707431551499347, - "grad_norm": 1.4850196838378906, - "learning_rate": 6.253668341708543e-05, - "loss": 5.1423, - "step": 37789 - }, - { - "epoch": 19.707953063885267, - "grad_norm": 1.451205849647522, - "learning_rate": 6.25356783919598e-05, - "loss": 5.513, - "step": 37790 - }, - { - "epoch": 19.708474576271186, - "grad_norm": 1.4933828115463257, - "learning_rate": 6.253467336683417e-05, - "loss": 5.6319, - "step": 37791 - }, - { - "epoch": 19.708996088657106, - "grad_norm": 1.5349767208099365, - "learning_rate": 6.253366834170855e-05, - "loss": 5.112, - "step": 37792 - }, - { - "epoch": 19.709517601043025, - "grad_norm": 1.5057306289672852, - "learning_rate": 6.253266331658291e-05, - "loss": 4.8375, - "step": 37793 - }, - { - "epoch": 19.710039113428945, - "grad_norm": 1.5328397750854492, - "learning_rate": 6.253165829145729e-05, - "loss": 5.3934, - "step": 37794 - }, - { - "epoch": 19.710560625814864, - "grad_norm": 1.5326961278915405, - "learning_rate": 6.253065326633167e-05, - "loss": 5.3965, - "step": 37795 - }, - { - "epoch": 19.711082138200783, - "grad_norm": 1.4378913640975952, - "learning_rate": 6.252964824120603e-05, - "loss": 5.7705, - "step": 37796 - }, - { - "epoch": 19.711603650586703, - "grad_norm": 1.5641452074050903, - "learning_rate": 6.252864321608041e-05, - "loss": 5.2552, - "step": 37797 - }, - { - "epoch": 19.71212516297262, - "grad_norm": 1.4623820781707764, - "learning_rate": 6.252763819095477e-05, - "loss": 5.1806, - "step": 37798 - }, - { - "epoch": 19.71264667535854, - "grad_norm": 1.4451181888580322, - "learning_rate": 6.252663316582915e-05, - "loss": 5.5482, - "step": 37799 - }, - { - "epoch": 19.713168187744458, - "grad_norm": 1.4379671812057495, - "learning_rate": 6.252562814070352e-05, - "loss": 5.0405, - "step": 37800 - }, - { - "epoch": 19.713689700130377, - "grad_norm": 1.6108121871948242, - "learning_rate": 6.25246231155779e-05, - "loss": 5.1643, - "step": 37801 - }, - { - "epoch": 19.714211212516297, - "grad_norm": 1.434021234512329, - "learning_rate": 6.252361809045226e-05, - "loss": 5.7419, - "step": 37802 - }, - { - "epoch": 19.714732724902216, - "grad_norm": 1.6004538536071777, - "learning_rate": 6.252261306532664e-05, - "loss": 5.4776, - "step": 37803 - }, - { - "epoch": 19.715254237288136, - "grad_norm": 1.6579724550247192, - "learning_rate": 6.252160804020101e-05, - "loss": 5.0847, - "step": 37804 - }, - { - "epoch": 19.715775749674055, - "grad_norm": 1.5093960762023926, - "learning_rate": 6.252060301507539e-05, - "loss": 5.2128, - "step": 37805 - }, - { - "epoch": 19.716297262059975, - "grad_norm": 1.4800500869750977, - "learning_rate": 6.251959798994976e-05, - "loss": 5.2295, - "step": 37806 - }, - { - "epoch": 19.716818774445894, - "grad_norm": 1.459172010421753, - "learning_rate": 6.251859296482412e-05, - "loss": 5.5372, - "step": 37807 - }, - { - "epoch": 19.717340286831814, - "grad_norm": 1.4869908094406128, - "learning_rate": 6.25175879396985e-05, - "loss": 5.4684, - "step": 37808 - }, - { - "epoch": 19.717861799217733, - "grad_norm": 1.4305486679077148, - "learning_rate": 6.251658291457286e-05, - "loss": 5.5493, - "step": 37809 - }, - { - "epoch": 19.71838331160365, - "grad_norm": 1.462227463722229, - "learning_rate": 6.251557788944724e-05, - "loss": 4.9258, - "step": 37810 - }, - { - "epoch": 19.71890482398957, - "grad_norm": 1.5091060400009155, - "learning_rate": 6.25145728643216e-05, - "loss": 5.4094, - "step": 37811 - }, - { - "epoch": 19.719426336375488, - "grad_norm": 1.5153913497924805, - "learning_rate": 6.251356783919598e-05, - "loss": 5.24, - "step": 37812 - }, - { - "epoch": 19.719947848761407, - "grad_norm": 1.5013216733932495, - "learning_rate": 6.251256281407035e-05, - "loss": 4.5806, - "step": 37813 - }, - { - "epoch": 19.720469361147327, - "grad_norm": 1.5234694480895996, - "learning_rate": 6.251155778894472e-05, - "loss": 5.2576, - "step": 37814 - }, - { - "epoch": 19.720990873533246, - "grad_norm": 1.411712408065796, - "learning_rate": 6.25105527638191e-05, - "loss": 5.6703, - "step": 37815 - }, - { - "epoch": 19.721512385919166, - "grad_norm": 1.4780032634735107, - "learning_rate": 6.250954773869348e-05, - "loss": 5.5083, - "step": 37816 - }, - { - "epoch": 19.722033898305085, - "grad_norm": 1.5177258253097534, - "learning_rate": 6.250854271356784e-05, - "loss": 5.7371, - "step": 37817 - }, - { - "epoch": 19.722555410691005, - "grad_norm": 1.442824363708496, - "learning_rate": 6.250753768844222e-05, - "loss": 4.8508, - "step": 37818 - }, - { - "epoch": 19.723076923076924, - "grad_norm": 1.5782191753387451, - "learning_rate": 6.250653266331659e-05, - "loss": 5.2344, - "step": 37819 - }, - { - "epoch": 19.723598435462844, - "grad_norm": 1.513321042060852, - "learning_rate": 6.250552763819095e-05, - "loss": 5.2039, - "step": 37820 - }, - { - "epoch": 19.724119947848763, - "grad_norm": 1.5104763507843018, - "learning_rate": 6.250452261306533e-05, - "loss": 5.3411, - "step": 37821 - }, - { - "epoch": 19.72464146023468, - "grad_norm": 1.5687726736068726, - "learning_rate": 6.250351758793969e-05, - "loss": 5.3951, - "step": 37822 - }, - { - "epoch": 19.7251629726206, - "grad_norm": 1.4483989477157593, - "learning_rate": 6.250251256281407e-05, - "loss": 5.7638, - "step": 37823 - }, - { - "epoch": 19.725684485006518, - "grad_norm": 1.5446287393569946, - "learning_rate": 6.250150753768845e-05, - "loss": 5.5115, - "step": 37824 - }, - { - "epoch": 19.726205997392437, - "grad_norm": 1.5480376482009888, - "learning_rate": 6.250050251256283e-05, - "loss": 4.7803, - "step": 37825 - }, - { - "epoch": 19.726727509778357, - "grad_norm": 1.5428684949874878, - "learning_rate": 6.249949748743719e-05, - "loss": 4.9716, - "step": 37826 - }, - { - "epoch": 19.727249022164276, - "grad_norm": 1.5290207862854004, - "learning_rate": 6.249849246231157e-05, - "loss": 5.2947, - "step": 37827 - }, - { - "epoch": 19.727770534550196, - "grad_norm": 1.4951180219650269, - "learning_rate": 6.249748743718593e-05, - "loss": 4.4958, - "step": 37828 - }, - { - "epoch": 19.728292046936115, - "grad_norm": 1.4740008115768433, - "learning_rate": 6.249648241206031e-05, - "loss": 5.2654, - "step": 37829 - }, - { - "epoch": 19.728813559322035, - "grad_norm": 1.5051628351211548, - "learning_rate": 6.249547738693467e-05, - "loss": 5.1327, - "step": 37830 - }, - { - "epoch": 19.729335071707954, - "grad_norm": 1.6264816522598267, - "learning_rate": 6.249447236180905e-05, - "loss": 4.9842, - "step": 37831 - }, - { - "epoch": 19.729856584093874, - "grad_norm": 1.5140687227249146, - "learning_rate": 6.249346733668342e-05, - "loss": 4.8895, - "step": 37832 - }, - { - "epoch": 19.730378096479793, - "grad_norm": 1.4988443851470947, - "learning_rate": 6.249246231155778e-05, - "loss": 5.5743, - "step": 37833 - }, - { - "epoch": 19.73089960886571, - "grad_norm": 1.433921456336975, - "learning_rate": 6.249145728643216e-05, - "loss": 5.3213, - "step": 37834 - }, - { - "epoch": 19.73142112125163, - "grad_norm": 1.4616093635559082, - "learning_rate": 6.249045226130654e-05, - "loss": 5.2684, - "step": 37835 - }, - { - "epoch": 19.731942633637548, - "grad_norm": 1.4831050634384155, - "learning_rate": 6.248944723618091e-05, - "loss": 5.7318, - "step": 37836 - }, - { - "epoch": 19.732464146023467, - "grad_norm": 1.5151301622390747, - "learning_rate": 6.248844221105528e-05, - "loss": 5.4818, - "step": 37837 - }, - { - "epoch": 19.732985658409387, - "grad_norm": 1.5885009765625, - "learning_rate": 6.248743718592966e-05, - "loss": 5.328, - "step": 37838 - }, - { - "epoch": 19.733507170795306, - "grad_norm": 1.5005912780761719, - "learning_rate": 6.248643216080402e-05, - "loss": 5.2076, - "step": 37839 - }, - { - "epoch": 19.734028683181226, - "grad_norm": 1.4275038242340088, - "learning_rate": 6.24854271356784e-05, - "loss": 5.1536, - "step": 37840 - }, - { - "epoch": 19.734550195567145, - "grad_norm": 1.6004914045333862, - "learning_rate": 6.248442211055276e-05, - "loss": 5.3333, - "step": 37841 - }, - { - "epoch": 19.735071707953065, - "grad_norm": 1.5905088186264038, - "learning_rate": 6.248341708542714e-05, - "loss": 4.7954, - "step": 37842 - }, - { - "epoch": 19.735593220338984, - "grad_norm": 1.3899710178375244, - "learning_rate": 6.24824120603015e-05, - "loss": 4.2084, - "step": 37843 - }, - { - "epoch": 19.736114732724904, - "grad_norm": 1.581222653388977, - "learning_rate": 6.248140703517588e-05, - "loss": 5.0418, - "step": 37844 - }, - { - "epoch": 19.736636245110823, - "grad_norm": 1.5096070766448975, - "learning_rate": 6.248040201005026e-05, - "loss": 5.6808, - "step": 37845 - }, - { - "epoch": 19.73715775749674, - "grad_norm": 1.4473145008087158, - "learning_rate": 6.247939698492462e-05, - "loss": 5.0456, - "step": 37846 - }, - { - "epoch": 19.73767926988266, - "grad_norm": 1.4710744619369507, - "learning_rate": 6.2478391959799e-05, - "loss": 5.5726, - "step": 37847 - }, - { - "epoch": 19.738200782268578, - "grad_norm": 1.4862555265426636, - "learning_rate": 6.247738693467337e-05, - "loss": 5.1843, - "step": 37848 - }, - { - "epoch": 19.738722294654497, - "grad_norm": 1.5971487760543823, - "learning_rate": 6.247638190954774e-05, - "loss": 5.1056, - "step": 37849 - }, - { - "epoch": 19.739243807040417, - "grad_norm": 1.450016975402832, - "learning_rate": 6.247537688442211e-05, - "loss": 5.4541, - "step": 37850 - }, - { - "epoch": 19.739765319426336, - "grad_norm": 1.494406819343567, - "learning_rate": 6.247437185929649e-05, - "loss": 4.9449, - "step": 37851 - }, - { - "epoch": 19.740286831812256, - "grad_norm": 1.4493924379348755, - "learning_rate": 6.247336683417085e-05, - "loss": 4.8702, - "step": 37852 - }, - { - "epoch": 19.740808344198175, - "grad_norm": 1.5237534046173096, - "learning_rate": 6.247236180904523e-05, - "loss": 5.021, - "step": 37853 - }, - { - "epoch": 19.741329856584095, - "grad_norm": 1.3231899738311768, - "learning_rate": 6.247135678391959e-05, - "loss": 5.3605, - "step": 37854 - }, - { - "epoch": 19.741851368970014, - "grad_norm": 1.510671854019165, - "learning_rate": 6.247035175879397e-05, - "loss": 5.0252, - "step": 37855 - }, - { - "epoch": 19.742372881355934, - "grad_norm": 1.491106390953064, - "learning_rate": 6.246934673366835e-05, - "loss": 5.2942, - "step": 37856 - }, - { - "epoch": 19.742894393741853, - "grad_norm": 1.5492260456085205, - "learning_rate": 6.246834170854273e-05, - "loss": 5.5271, - "step": 37857 - }, - { - "epoch": 19.74341590612777, - "grad_norm": 1.4395604133605957, - "learning_rate": 6.246733668341709e-05, - "loss": 5.3536, - "step": 37858 - }, - { - "epoch": 19.74393741851369, - "grad_norm": 1.4375420808792114, - "learning_rate": 6.246633165829146e-05, - "loss": 5.6136, - "step": 37859 - }, - { - "epoch": 19.744458930899608, - "grad_norm": 1.3927416801452637, - "learning_rate": 6.246532663316583e-05, - "loss": 5.4892, - "step": 37860 - }, - { - "epoch": 19.744980443285527, - "grad_norm": 1.4871512651443481, - "learning_rate": 6.24643216080402e-05, - "loss": 5.3566, - "step": 37861 - }, - { - "epoch": 19.745501955671447, - "grad_norm": 1.4942916631698608, - "learning_rate": 6.246331658291458e-05, - "loss": 5.4643, - "step": 37862 - }, - { - "epoch": 19.746023468057366, - "grad_norm": 1.5165129899978638, - "learning_rate": 6.246231155778894e-05, - "loss": 4.7036, - "step": 37863 - }, - { - "epoch": 19.746544980443286, - "grad_norm": 1.4744783639907837, - "learning_rate": 6.246130653266332e-05, - "loss": 5.2952, - "step": 37864 - }, - { - "epoch": 19.747066492829205, - "grad_norm": 1.5818086862564087, - "learning_rate": 6.24603015075377e-05, - "loss": 4.7931, - "step": 37865 - }, - { - "epoch": 19.747588005215125, - "grad_norm": 1.559005618095398, - "learning_rate": 6.245929648241207e-05, - "loss": 5.2935, - "step": 37866 - }, - { - "epoch": 19.748109517601044, - "grad_norm": 1.5769602060317993, - "learning_rate": 6.245829145728644e-05, - "loss": 5.5368, - "step": 37867 - }, - { - "epoch": 19.748631029986964, - "grad_norm": 1.4772735834121704, - "learning_rate": 6.245728643216082e-05, - "loss": 5.065, - "step": 37868 - }, - { - "epoch": 19.749152542372883, - "grad_norm": 1.6992079019546509, - "learning_rate": 6.245628140703518e-05, - "loss": 4.9169, - "step": 37869 - }, - { - "epoch": 19.7496740547588, - "grad_norm": 1.5731006860733032, - "learning_rate": 6.245527638190956e-05, - "loss": 5.2755, - "step": 37870 - }, - { - "epoch": 19.75019556714472, - "grad_norm": 1.5770598649978638, - "learning_rate": 6.245427135678392e-05, - "loss": 5.3413, - "step": 37871 - }, - { - "epoch": 19.750717079530638, - "grad_norm": 1.4807945489883423, - "learning_rate": 6.24532663316583e-05, - "loss": 5.4304, - "step": 37872 - }, - { - "epoch": 19.751238591916557, - "grad_norm": 1.4881576299667358, - "learning_rate": 6.245226130653266e-05, - "loss": 5.3335, - "step": 37873 - }, - { - "epoch": 19.751760104302477, - "grad_norm": 1.4085116386413574, - "learning_rate": 6.245125628140703e-05, - "loss": 5.1879, - "step": 37874 - }, - { - "epoch": 19.752281616688396, - "grad_norm": 1.3979103565216064, - "learning_rate": 6.24502512562814e-05, - "loss": 5.1988, - "step": 37875 - }, - { - "epoch": 19.752803129074316, - "grad_norm": 1.5044398307800293, - "learning_rate": 6.244924623115578e-05, - "loss": 5.0744, - "step": 37876 - }, - { - "epoch": 19.753324641460235, - "grad_norm": 1.3856213092803955, - "learning_rate": 6.244824120603016e-05, - "loss": 5.45, - "step": 37877 - }, - { - "epoch": 19.753846153846155, - "grad_norm": 1.638953685760498, - "learning_rate": 6.244723618090453e-05, - "loss": 5.3878, - "step": 37878 - }, - { - "epoch": 19.754367666232074, - "grad_norm": 1.4830732345581055, - "learning_rate": 6.24462311557789e-05, - "loss": 5.073, - "step": 37879 - }, - { - "epoch": 19.754889178617994, - "grad_norm": 1.6036432981491089, - "learning_rate": 6.244522613065327e-05, - "loss": 5.1853, - "step": 37880 - }, - { - "epoch": 19.75541069100391, - "grad_norm": 1.4311625957489014, - "learning_rate": 6.244422110552765e-05, - "loss": 5.6717, - "step": 37881 - }, - { - "epoch": 19.75593220338983, - "grad_norm": 1.5456891059875488, - "learning_rate": 6.244321608040201e-05, - "loss": 5.2779, - "step": 37882 - }, - { - "epoch": 19.75645371577575, - "grad_norm": 1.4808745384216309, - "learning_rate": 6.244221105527639e-05, - "loss": 5.4308, - "step": 37883 - }, - { - "epoch": 19.756975228161668, - "grad_norm": 1.6404788494110107, - "learning_rate": 6.244120603015075e-05, - "loss": 4.6887, - "step": 37884 - }, - { - "epoch": 19.757496740547587, - "grad_norm": 1.6766293048858643, - "learning_rate": 6.244020100502513e-05, - "loss": 5.2705, - "step": 37885 - }, - { - "epoch": 19.758018252933507, - "grad_norm": 1.459170937538147, - "learning_rate": 6.243919597989951e-05, - "loss": 5.1837, - "step": 37886 - }, - { - "epoch": 19.758539765319426, - "grad_norm": 1.4335148334503174, - "learning_rate": 6.243819095477387e-05, - "loss": 5.6594, - "step": 37887 - }, - { - "epoch": 19.759061277705346, - "grad_norm": 1.5123573541641235, - "learning_rate": 6.243718592964825e-05, - "loss": 5.3992, - "step": 37888 - }, - { - "epoch": 19.759582790091265, - "grad_norm": 1.4564015865325928, - "learning_rate": 6.243618090452261e-05, - "loss": 5.4688, - "step": 37889 - }, - { - "epoch": 19.760104302477185, - "grad_norm": 1.5538996458053589, - "learning_rate": 6.243517587939699e-05, - "loss": 5.2644, - "step": 37890 - }, - { - "epoch": 19.760625814863104, - "grad_norm": 1.4885549545288086, - "learning_rate": 6.243417085427136e-05, - "loss": 4.8914, - "step": 37891 - }, - { - "epoch": 19.761147327249024, - "grad_norm": 1.5047187805175781, - "learning_rate": 6.243316582914573e-05, - "loss": 5.3489, - "step": 37892 - }, - { - "epoch": 19.761668839634943, - "grad_norm": 1.5376787185668945, - "learning_rate": 6.24321608040201e-05, - "loss": 4.8675, - "step": 37893 - }, - { - "epoch": 19.76219035202086, - "grad_norm": 1.5502614974975586, - "learning_rate": 6.243115577889448e-05, - "loss": 5.3057, - "step": 37894 - }, - { - "epoch": 19.76271186440678, - "grad_norm": 1.5024405717849731, - "learning_rate": 6.243015075376884e-05, - "loss": 5.4517, - "step": 37895 - }, - { - "epoch": 19.763233376792698, - "grad_norm": 1.478054404258728, - "learning_rate": 6.242914572864322e-05, - "loss": 5.5504, - "step": 37896 - }, - { - "epoch": 19.763754889178617, - "grad_norm": 1.4159501791000366, - "learning_rate": 6.24281407035176e-05, - "loss": 4.9256, - "step": 37897 - }, - { - "epoch": 19.764276401564537, - "grad_norm": 1.5996776819229126, - "learning_rate": 6.242713567839197e-05, - "loss": 5.2488, - "step": 37898 - }, - { - "epoch": 19.764797913950456, - "grad_norm": 1.3936280012130737, - "learning_rate": 6.242613065326634e-05, - "loss": 5.4103, - "step": 37899 - }, - { - "epoch": 19.765319426336376, - "grad_norm": 1.4405661821365356, - "learning_rate": 6.24251256281407e-05, - "loss": 5.5773, - "step": 37900 - }, - { - "epoch": 19.765840938722295, - "grad_norm": 1.6067155599594116, - "learning_rate": 6.242412060301508e-05, - "loss": 5.2757, - "step": 37901 - }, - { - "epoch": 19.766362451108215, - "grad_norm": 1.5616137981414795, - "learning_rate": 6.242311557788944e-05, - "loss": 5.0349, - "step": 37902 - }, - { - "epoch": 19.766883963494134, - "grad_norm": 1.45431387424469, - "learning_rate": 6.242211055276382e-05, - "loss": 5.581, - "step": 37903 - }, - { - "epoch": 19.767405475880054, - "grad_norm": 1.5646575689315796, - "learning_rate": 6.242110552763819e-05, - "loss": 5.4041, - "step": 37904 - }, - { - "epoch": 19.76792698826597, - "grad_norm": 1.534603238105774, - "learning_rate": 6.242010050251256e-05, - "loss": 5.2977, - "step": 37905 - }, - { - "epoch": 19.76844850065189, - "grad_norm": 1.3910027742385864, - "learning_rate": 6.241909547738693e-05, - "loss": 5.7659, - "step": 37906 - }, - { - "epoch": 19.76897001303781, - "grad_norm": 1.561954140663147, - "learning_rate": 6.24180904522613e-05, - "loss": 5.3435, - "step": 37907 - }, - { - "epoch": 19.769491525423728, - "grad_norm": 1.3996227979660034, - "learning_rate": 6.241708542713568e-05, - "loss": 5.7221, - "step": 37908 - }, - { - "epoch": 19.770013037809647, - "grad_norm": 1.5145820379257202, - "learning_rate": 6.241608040201006e-05, - "loss": 4.8949, - "step": 37909 - }, - { - "epoch": 19.770534550195567, - "grad_norm": 1.5196846723556519, - "learning_rate": 6.241507537688443e-05, - "loss": 5.5613, - "step": 37910 - }, - { - "epoch": 19.771056062581486, - "grad_norm": 1.6124944686889648, - "learning_rate": 6.24140703517588e-05, - "loss": 5.4493, - "step": 37911 - }, - { - "epoch": 19.771577574967406, - "grad_norm": 1.518170952796936, - "learning_rate": 6.241306532663317e-05, - "loss": 5.1691, - "step": 37912 - }, - { - "epoch": 19.772099087353325, - "grad_norm": 1.507743239402771, - "learning_rate": 6.241206030150753e-05, - "loss": 4.3393, - "step": 37913 - }, - { - "epoch": 19.772620599739245, - "grad_norm": 1.4252804517745972, - "learning_rate": 6.241105527638191e-05, - "loss": 5.4384, - "step": 37914 - }, - { - "epoch": 19.773142112125164, - "grad_norm": 1.4261409044265747, - "learning_rate": 6.241005025125627e-05, - "loss": 5.182, - "step": 37915 - }, - { - "epoch": 19.773663624511084, - "grad_norm": 1.6514545679092407, - "learning_rate": 6.240904522613065e-05, - "loss": 5.1475, - "step": 37916 - }, - { - "epoch": 19.774185136897, - "grad_norm": 1.5993057489395142, - "learning_rate": 6.240804020100503e-05, - "loss": 5.1386, - "step": 37917 - }, - { - "epoch": 19.77470664928292, - "grad_norm": 1.5274536609649658, - "learning_rate": 6.240703517587941e-05, - "loss": 4.7128, - "step": 37918 - }, - { - "epoch": 19.77522816166884, - "grad_norm": 1.4728682041168213, - "learning_rate": 6.240603015075377e-05, - "loss": 5.7014, - "step": 37919 - }, - { - "epoch": 19.775749674054758, - "grad_norm": 1.5567195415496826, - "learning_rate": 6.240502512562815e-05, - "loss": 5.2897, - "step": 37920 - }, - { - "epoch": 19.776271186440677, - "grad_norm": 1.5591979026794434, - "learning_rate": 6.240402010050251e-05, - "loss": 4.785, - "step": 37921 - }, - { - "epoch": 19.776792698826597, - "grad_norm": 1.4901695251464844, - "learning_rate": 6.240301507537689e-05, - "loss": 5.5088, - "step": 37922 - }, - { - "epoch": 19.777314211212516, - "grad_norm": 1.5758423805236816, - "learning_rate": 6.240201005025126e-05, - "loss": 5.4174, - "step": 37923 - }, - { - "epoch": 19.777835723598436, - "grad_norm": 1.6006519794464111, - "learning_rate": 6.240100502512563e-05, - "loss": 4.9604, - "step": 37924 - }, - { - "epoch": 19.778357235984355, - "grad_norm": 1.5476160049438477, - "learning_rate": 6.24e-05, - "loss": 4.912, - "step": 37925 - }, - { - "epoch": 19.778878748370275, - "grad_norm": 1.553688645362854, - "learning_rate": 6.239899497487436e-05, - "loss": 5.3405, - "step": 37926 - }, - { - "epoch": 19.779400260756194, - "grad_norm": 1.5041866302490234, - "learning_rate": 6.239798994974874e-05, - "loss": 5.2451, - "step": 37927 - }, - { - "epoch": 19.779921773142114, - "grad_norm": 1.5947567224502563, - "learning_rate": 6.239698492462312e-05, - "loss": 5.357, - "step": 37928 - }, - { - "epoch": 19.78044328552803, - "grad_norm": 1.445551872253418, - "learning_rate": 6.23959798994975e-05, - "loss": 5.5081, - "step": 37929 - }, - { - "epoch": 19.78096479791395, - "grad_norm": 1.4842232465744019, - "learning_rate": 6.239497487437186e-05, - "loss": 5.2026, - "step": 37930 - }, - { - "epoch": 19.78148631029987, - "grad_norm": 1.618261456489563, - "learning_rate": 6.239396984924624e-05, - "loss": 5.254, - "step": 37931 - }, - { - "epoch": 19.782007822685788, - "grad_norm": 1.6319878101348877, - "learning_rate": 6.23929648241206e-05, - "loss": 5.2088, - "step": 37932 - }, - { - "epoch": 19.782529335071708, - "grad_norm": 1.631272554397583, - "learning_rate": 6.239195979899498e-05, - "loss": 4.4034, - "step": 37933 - }, - { - "epoch": 19.783050847457627, - "grad_norm": 1.582678198814392, - "learning_rate": 6.239095477386935e-05, - "loss": 4.6646, - "step": 37934 - }, - { - "epoch": 19.783572359843546, - "grad_norm": 1.624849557876587, - "learning_rate": 6.238994974874372e-05, - "loss": 5.4356, - "step": 37935 - }, - { - "epoch": 19.784093872229466, - "grad_norm": 1.5852569341659546, - "learning_rate": 6.238894472361809e-05, - "loss": 5.3472, - "step": 37936 - }, - { - "epoch": 19.784615384615385, - "grad_norm": 1.5713002681732178, - "learning_rate": 6.238793969849247e-05, - "loss": 5.8222, - "step": 37937 - }, - { - "epoch": 19.785136897001305, - "grad_norm": 1.6058754920959473, - "learning_rate": 6.238693467336684e-05, - "loss": 5.301, - "step": 37938 - }, - { - "epoch": 19.785658409387224, - "grad_norm": 1.4954670667648315, - "learning_rate": 6.238592964824121e-05, - "loss": 5.6014, - "step": 37939 - }, - { - "epoch": 19.786179921773144, - "grad_norm": 1.6555826663970947, - "learning_rate": 6.238492462311558e-05, - "loss": 5.096, - "step": 37940 - }, - { - "epoch": 19.78670143415906, - "grad_norm": 1.4372092485427856, - "learning_rate": 6.238391959798995e-05, - "loss": 5.05, - "step": 37941 - }, - { - "epoch": 19.78722294654498, - "grad_norm": 1.5183593034744263, - "learning_rate": 6.238291457286433e-05, - "loss": 5.0198, - "step": 37942 - }, - { - "epoch": 19.7877444589309, - "grad_norm": 1.54574716091156, - "learning_rate": 6.238190954773869e-05, - "loss": 5.0073, - "step": 37943 - }, - { - "epoch": 19.788265971316818, - "grad_norm": 1.5377893447875977, - "learning_rate": 6.238090452261307e-05, - "loss": 5.4108, - "step": 37944 - }, - { - "epoch": 19.788787483702738, - "grad_norm": 1.5202785730361938, - "learning_rate": 6.237989949748743e-05, - "loss": 5.4633, - "step": 37945 - }, - { - "epoch": 19.789308996088657, - "grad_norm": 1.457612156867981, - "learning_rate": 6.237889447236181e-05, - "loss": 5.0786, - "step": 37946 - }, - { - "epoch": 19.789830508474576, - "grad_norm": 1.5222173929214478, - "learning_rate": 6.237788944723618e-05, - "loss": 5.0004, - "step": 37947 - }, - { - "epoch": 19.790352020860496, - "grad_norm": 1.410109281539917, - "learning_rate": 6.237688442211055e-05, - "loss": 5.2428, - "step": 37948 - }, - { - "epoch": 19.790873533246415, - "grad_norm": 1.531026840209961, - "learning_rate": 6.237587939698493e-05, - "loss": 5.4401, - "step": 37949 - }, - { - "epoch": 19.791395045632335, - "grad_norm": 1.6082311868667603, - "learning_rate": 6.237487437185931e-05, - "loss": 5.1511, - "step": 37950 - }, - { - "epoch": 19.791916558018254, - "grad_norm": 1.5064042806625366, - "learning_rate": 6.237386934673367e-05, - "loss": 4.7823, - "step": 37951 - }, - { - "epoch": 19.792438070404174, - "grad_norm": 1.5897302627563477, - "learning_rate": 6.237286432160804e-05, - "loss": 5.196, - "step": 37952 - }, - { - "epoch": 19.79295958279009, - "grad_norm": 1.5613031387329102, - "learning_rate": 6.237185929648242e-05, - "loss": 5.3008, - "step": 37953 - }, - { - "epoch": 19.79348109517601, - "grad_norm": 1.513170599937439, - "learning_rate": 6.237085427135678e-05, - "loss": 4.8138, - "step": 37954 - }, - { - "epoch": 19.79400260756193, - "grad_norm": 1.534480333328247, - "learning_rate": 6.236984924623116e-05, - "loss": 5.5589, - "step": 37955 - }, - { - "epoch": 19.794524119947848, - "grad_norm": 1.5180028676986694, - "learning_rate": 6.236884422110552e-05, - "loss": 5.6102, - "step": 37956 - }, - { - "epoch": 19.795045632333768, - "grad_norm": 1.3917313814163208, - "learning_rate": 6.23678391959799e-05, - "loss": 4.9517, - "step": 37957 - }, - { - "epoch": 19.795567144719687, - "grad_norm": 1.543663740158081, - "learning_rate": 6.236683417085428e-05, - "loss": 5.3366, - "step": 37958 - }, - { - "epoch": 19.796088657105607, - "grad_norm": 1.4171018600463867, - "learning_rate": 6.236582914572866e-05, - "loss": 5.616, - "step": 37959 - }, - { - "epoch": 19.796610169491526, - "grad_norm": 1.5353766679763794, - "learning_rate": 6.236482412060302e-05, - "loss": 5.531, - "step": 37960 - }, - { - "epoch": 19.797131681877445, - "grad_norm": 1.4977765083312988, - "learning_rate": 6.23638190954774e-05, - "loss": 5.1348, - "step": 37961 - }, - { - "epoch": 19.797653194263365, - "grad_norm": 1.5105645656585693, - "learning_rate": 6.236281407035176e-05, - "loss": 5.7478, - "step": 37962 - }, - { - "epoch": 19.798174706649284, - "grad_norm": 1.4885071516036987, - "learning_rate": 6.236180904522614e-05, - "loss": 5.0597, - "step": 37963 - }, - { - "epoch": 19.7986962190352, - "grad_norm": 1.6074190139770508, - "learning_rate": 6.23608040201005e-05, - "loss": 4.1745, - "step": 37964 - }, - { - "epoch": 19.79921773142112, - "grad_norm": 1.4167407751083374, - "learning_rate": 6.235979899497488e-05, - "loss": 5.9348, - "step": 37965 - }, - { - "epoch": 19.79973924380704, - "grad_norm": 1.4617085456848145, - "learning_rate": 6.235879396984925e-05, - "loss": 5.5878, - "step": 37966 - }, - { - "epoch": 19.80026075619296, - "grad_norm": 1.5409557819366455, - "learning_rate": 6.235778894472361e-05, - "loss": 5.1928, - "step": 37967 - }, - { - "epoch": 19.800782268578878, - "grad_norm": 1.5670197010040283, - "learning_rate": 6.235678391959799e-05, - "loss": 5.0331, - "step": 37968 - }, - { - "epoch": 19.801303780964798, - "grad_norm": 1.4615172147750854, - "learning_rate": 6.235577889447237e-05, - "loss": 5.2866, - "step": 37969 - }, - { - "epoch": 19.801825293350717, - "grad_norm": 1.4920437335968018, - "learning_rate": 6.235477386934674e-05, - "loss": 5.3059, - "step": 37970 - }, - { - "epoch": 19.802346805736637, - "grad_norm": 1.4995673894882202, - "learning_rate": 6.235376884422111e-05, - "loss": 5.1503, - "step": 37971 - }, - { - "epoch": 19.802868318122556, - "grad_norm": 1.530464768409729, - "learning_rate": 6.235276381909549e-05, - "loss": 4.8525, - "step": 37972 - }, - { - "epoch": 19.803389830508475, - "grad_norm": 1.4550292491912842, - "learning_rate": 6.235175879396985e-05, - "loss": 5.317, - "step": 37973 - }, - { - "epoch": 19.803911342894395, - "grad_norm": 1.5828214883804321, - "learning_rate": 6.235075376884423e-05, - "loss": 5.4188, - "step": 37974 - }, - { - "epoch": 19.804432855280314, - "grad_norm": 1.4898735284805298, - "learning_rate": 6.234974874371859e-05, - "loss": 5.151, - "step": 37975 - }, - { - "epoch": 19.804954367666234, - "grad_norm": 1.5183597803115845, - "learning_rate": 6.234874371859297e-05, - "loss": 4.9042, - "step": 37976 - }, - { - "epoch": 19.80547588005215, - "grad_norm": 1.5182961225509644, - "learning_rate": 6.234773869346733e-05, - "loss": 5.1099, - "step": 37977 - }, - { - "epoch": 19.80599739243807, - "grad_norm": 1.554877758026123, - "learning_rate": 6.234673366834171e-05, - "loss": 4.5279, - "step": 37978 - }, - { - "epoch": 19.80651890482399, - "grad_norm": 1.5954318046569824, - "learning_rate": 6.234572864321609e-05, - "loss": 5.2507, - "step": 37979 - }, - { - "epoch": 19.807040417209908, - "grad_norm": 1.5655957460403442, - "learning_rate": 6.234472361809045e-05, - "loss": 5.2511, - "step": 37980 - }, - { - "epoch": 19.807561929595828, - "grad_norm": 1.600876808166504, - "learning_rate": 6.234371859296483e-05, - "loss": 5.3997, - "step": 37981 - }, - { - "epoch": 19.808083441981747, - "grad_norm": 1.4760515689849854, - "learning_rate": 6.23427135678392e-05, - "loss": 5.6209, - "step": 37982 - }, - { - "epoch": 19.808604954367667, - "grad_norm": 1.5411690473556519, - "learning_rate": 6.234170854271357e-05, - "loss": 5.2906, - "step": 37983 - }, - { - "epoch": 19.809126466753586, - "grad_norm": 1.3796164989471436, - "learning_rate": 6.234070351758794e-05, - "loss": 5.6678, - "step": 37984 - }, - { - "epoch": 19.809647979139505, - "grad_norm": 1.5040525197982788, - "learning_rate": 6.233969849246232e-05, - "loss": 5.1564, - "step": 37985 - }, - { - "epoch": 19.810169491525425, - "grad_norm": 1.504699468612671, - "learning_rate": 6.233869346733668e-05, - "loss": 5.3602, - "step": 37986 - }, - { - "epoch": 19.810691003911344, - "grad_norm": 1.3707400560379028, - "learning_rate": 6.233768844221106e-05, - "loss": 5.6488, - "step": 37987 - }, - { - "epoch": 19.81121251629726, - "grad_norm": 1.589952826499939, - "learning_rate": 6.233668341708542e-05, - "loss": 5.508, - "step": 37988 - }, - { - "epoch": 19.81173402868318, - "grad_norm": 1.4846524000167847, - "learning_rate": 6.23356783919598e-05, - "loss": 4.7587, - "step": 37989 - }, - { - "epoch": 19.8122555410691, - "grad_norm": 1.4885581731796265, - "learning_rate": 6.233467336683418e-05, - "loss": 5.7483, - "step": 37990 - }, - { - "epoch": 19.81277705345502, - "grad_norm": 1.443688988685608, - "learning_rate": 6.233366834170856e-05, - "loss": 5.5744, - "step": 37991 - }, - { - "epoch": 19.813298565840938, - "grad_norm": 1.4822362661361694, - "learning_rate": 6.233266331658292e-05, - "loss": 5.6398, - "step": 37992 - }, - { - "epoch": 19.813820078226858, - "grad_norm": 1.5231585502624512, - "learning_rate": 6.233165829145728e-05, - "loss": 4.8695, - "step": 37993 - }, - { - "epoch": 19.814341590612777, - "grad_norm": 1.5086250305175781, - "learning_rate": 6.233065326633166e-05, - "loss": 5.2059, - "step": 37994 - }, - { - "epoch": 19.814863102998697, - "grad_norm": 1.513839840888977, - "learning_rate": 6.232964824120603e-05, - "loss": 5.6504, - "step": 37995 - }, - { - "epoch": 19.815384615384616, - "grad_norm": 1.5438265800476074, - "learning_rate": 6.23286432160804e-05, - "loss": 5.3498, - "step": 37996 - }, - { - "epoch": 19.815906127770536, - "grad_norm": 1.6211137771606445, - "learning_rate": 6.232763819095477e-05, - "loss": 5.095, - "step": 37997 - }, - { - "epoch": 19.816427640156455, - "grad_norm": 1.534665584564209, - "learning_rate": 6.232663316582915e-05, - "loss": 5.5688, - "step": 37998 - }, - { - "epoch": 19.816949152542374, - "grad_norm": 1.517006278038025, - "learning_rate": 6.232562814070352e-05, - "loss": 5.4629, - "step": 37999 - }, - { - "epoch": 19.81747066492829, - "grad_norm": 1.6067497730255127, - "learning_rate": 6.23246231155779e-05, - "loss": 5.231, - "step": 38000 - }, - { - "epoch": 19.81799217731421, - "grad_norm": 1.624896764755249, - "learning_rate": 6.232361809045227e-05, - "loss": 5.4177, - "step": 38001 - }, - { - "epoch": 19.81851368970013, - "grad_norm": 1.704095721244812, - "learning_rate": 6.232261306532664e-05, - "loss": 5.3702, - "step": 38002 - }, - { - "epoch": 19.81903520208605, - "grad_norm": 1.5762667655944824, - "learning_rate": 6.232160804020101e-05, - "loss": 5.0502, - "step": 38003 - }, - { - "epoch": 19.819556714471968, - "grad_norm": 1.501335859298706, - "learning_rate": 6.232060301507539e-05, - "loss": 5.061, - "step": 38004 - }, - { - "epoch": 19.820078226857888, - "grad_norm": 2.148737668991089, - "learning_rate": 6.231959798994975e-05, - "loss": 4.6081, - "step": 38005 - }, - { - "epoch": 19.820599739243807, - "grad_norm": 1.45090651512146, - "learning_rate": 6.231859296482412e-05, - "loss": 4.9161, - "step": 38006 - }, - { - "epoch": 19.821121251629727, - "grad_norm": 1.464372992515564, - "learning_rate": 6.231758793969849e-05, - "loss": 5.4176, - "step": 38007 - }, - { - "epoch": 19.821642764015646, - "grad_norm": 1.4733357429504395, - "learning_rate": 6.231658291457286e-05, - "loss": 5.3914, - "step": 38008 - }, - { - "epoch": 19.822164276401566, - "grad_norm": 1.3993964195251465, - "learning_rate": 6.231557788944723e-05, - "loss": 5.5884, - "step": 38009 - }, - { - "epoch": 19.822685788787485, - "grad_norm": 1.5895321369171143, - "learning_rate": 6.231457286432161e-05, - "loss": 5.2185, - "step": 38010 - }, - { - "epoch": 19.823207301173404, - "grad_norm": 1.6116583347320557, - "learning_rate": 6.231356783919599e-05, - "loss": 4.7286, - "step": 38011 - }, - { - "epoch": 19.82372881355932, - "grad_norm": 1.4666268825531006, - "learning_rate": 6.231256281407035e-05, - "loss": 5.1135, - "step": 38012 - }, - { - "epoch": 19.82425032594524, - "grad_norm": 1.5283137559890747, - "learning_rate": 6.231155778894473e-05, - "loss": 4.8095, - "step": 38013 - }, - { - "epoch": 19.82477183833116, - "grad_norm": 1.5556869506835938, - "learning_rate": 6.23105527638191e-05, - "loss": 5.3667, - "step": 38014 - }, - { - "epoch": 19.82529335071708, - "grad_norm": 1.4338719844818115, - "learning_rate": 6.230954773869347e-05, - "loss": 5.4184, - "step": 38015 - }, - { - "epoch": 19.825814863103, - "grad_norm": 1.48334801197052, - "learning_rate": 6.230854271356784e-05, - "loss": 5.2668, - "step": 38016 - }, - { - "epoch": 19.826336375488918, - "grad_norm": 1.4707789421081543, - "learning_rate": 6.230753768844222e-05, - "loss": 5.5442, - "step": 38017 - }, - { - "epoch": 19.826857887874837, - "grad_norm": 1.5436851978302002, - "learning_rate": 6.230653266331658e-05, - "loss": 4.8246, - "step": 38018 - }, - { - "epoch": 19.827379400260757, - "grad_norm": 1.40238356590271, - "learning_rate": 6.230552763819096e-05, - "loss": 5.2318, - "step": 38019 - }, - { - "epoch": 19.827900912646676, - "grad_norm": 1.562132477760315, - "learning_rate": 6.230452261306534e-05, - "loss": 5.4156, - "step": 38020 - }, - { - "epoch": 19.828422425032596, - "grad_norm": 1.5392040014266968, - "learning_rate": 6.23035175879397e-05, - "loss": 5.4697, - "step": 38021 - }, - { - "epoch": 19.828943937418515, - "grad_norm": 1.4969412088394165, - "learning_rate": 6.230251256281408e-05, - "loss": 5.2666, - "step": 38022 - }, - { - "epoch": 19.829465449804434, - "grad_norm": 1.5227526426315308, - "learning_rate": 6.230150753768844e-05, - "loss": 5.1344, - "step": 38023 - }, - { - "epoch": 19.82998696219035, - "grad_norm": 1.471665382385254, - "learning_rate": 6.230050251256282e-05, - "loss": 5.1429, - "step": 38024 - }, - { - "epoch": 19.83050847457627, - "grad_norm": 1.4799587726593018, - "learning_rate": 6.229949748743719e-05, - "loss": 4.9117, - "step": 38025 - }, - { - "epoch": 19.83102998696219, - "grad_norm": 1.625701904296875, - "learning_rate": 6.229849246231156e-05, - "loss": 5.2241, - "step": 38026 - }, - { - "epoch": 19.83155149934811, - "grad_norm": 1.5110909938812256, - "learning_rate": 6.229748743718593e-05, - "loss": 4.9161, - "step": 38027 - }, - { - "epoch": 19.83207301173403, - "grad_norm": 1.4727725982666016, - "learning_rate": 6.22964824120603e-05, - "loss": 5.3655, - "step": 38028 - }, - { - "epoch": 19.832594524119948, - "grad_norm": 1.5114096403121948, - "learning_rate": 6.229547738693467e-05, - "loss": 5.381, - "step": 38029 - }, - { - "epoch": 19.833116036505867, - "grad_norm": 1.6261932849884033, - "learning_rate": 6.229447236180905e-05, - "loss": 5.3231, - "step": 38030 - }, - { - "epoch": 19.833637548891787, - "grad_norm": 1.558958649635315, - "learning_rate": 6.229346733668343e-05, - "loss": 4.9138, - "step": 38031 - }, - { - "epoch": 19.834159061277706, - "grad_norm": 1.6377509832382202, - "learning_rate": 6.229246231155779e-05, - "loss": 4.5949, - "step": 38032 - }, - { - "epoch": 19.834680573663626, - "grad_norm": 1.4197070598602295, - "learning_rate": 6.229145728643217e-05, - "loss": 5.3803, - "step": 38033 - }, - { - "epoch": 19.835202086049545, - "grad_norm": 1.4567188024520874, - "learning_rate": 6.229045226130653e-05, - "loss": 5.5583, - "step": 38034 - }, - { - "epoch": 19.835723598435465, - "grad_norm": 1.525467038154602, - "learning_rate": 6.228944723618091e-05, - "loss": 5.1433, - "step": 38035 - }, - { - "epoch": 19.83624511082138, - "grad_norm": 1.5763332843780518, - "learning_rate": 6.228844221105527e-05, - "loss": 5.2427, - "step": 38036 - }, - { - "epoch": 19.8367666232073, - "grad_norm": 1.5999610424041748, - "learning_rate": 6.228743718592965e-05, - "loss": 5.4941, - "step": 38037 - }, - { - "epoch": 19.83728813559322, - "grad_norm": 1.5742014646530151, - "learning_rate": 6.228643216080402e-05, - "loss": 5.4147, - "step": 38038 - }, - { - "epoch": 19.83780964797914, - "grad_norm": 1.6569479703903198, - "learning_rate": 6.22854271356784e-05, - "loss": 5.1514, - "step": 38039 - }, - { - "epoch": 19.83833116036506, - "grad_norm": 1.541446328163147, - "learning_rate": 6.228442211055277e-05, - "loss": 5.3103, - "step": 38040 - }, - { - "epoch": 19.838852672750978, - "grad_norm": 1.6543647050857544, - "learning_rate": 6.228341708542715e-05, - "loss": 4.8745, - "step": 38041 - }, - { - "epoch": 19.839374185136897, - "grad_norm": 1.4653807878494263, - "learning_rate": 6.228241206030151e-05, - "loss": 5.3947, - "step": 38042 - }, - { - "epoch": 19.839895697522817, - "grad_norm": 1.5723772048950195, - "learning_rate": 6.228140703517589e-05, - "loss": 5.4279, - "step": 38043 - }, - { - "epoch": 19.840417209908736, - "grad_norm": 1.4371367692947388, - "learning_rate": 6.228040201005026e-05, - "loss": 5.4613, - "step": 38044 - }, - { - "epoch": 19.840938722294656, - "grad_norm": 1.3919565677642822, - "learning_rate": 6.227939698492462e-05, - "loss": 5.686, - "step": 38045 - }, - { - "epoch": 19.841460234680575, - "grad_norm": 1.613714575767517, - "learning_rate": 6.2278391959799e-05, - "loss": 5.4214, - "step": 38046 - }, - { - "epoch": 19.841981747066495, - "grad_norm": 1.465040683746338, - "learning_rate": 6.227738693467336e-05, - "loss": 5.1775, - "step": 38047 - }, - { - "epoch": 19.84250325945241, - "grad_norm": 1.5393273830413818, - "learning_rate": 6.227638190954774e-05, - "loss": 5.2407, - "step": 38048 - }, - { - "epoch": 19.84302477183833, - "grad_norm": 1.5612901449203491, - "learning_rate": 6.22753768844221e-05, - "loss": 4.8177, - "step": 38049 - }, - { - "epoch": 19.84354628422425, - "grad_norm": 1.5520093441009521, - "learning_rate": 6.227437185929648e-05, - "loss": 4.6307, - "step": 38050 - }, - { - "epoch": 19.84406779661017, - "grad_norm": 1.4969916343688965, - "learning_rate": 6.227336683417086e-05, - "loss": 5.4872, - "step": 38051 - }, - { - "epoch": 19.84458930899609, - "grad_norm": 1.460471510887146, - "learning_rate": 6.227236180904524e-05, - "loss": 5.5307, - "step": 38052 - }, - { - "epoch": 19.845110821382008, - "grad_norm": 1.4979948997497559, - "learning_rate": 6.22713567839196e-05, - "loss": 5.2736, - "step": 38053 - }, - { - "epoch": 19.845632333767927, - "grad_norm": 1.5570520162582397, - "learning_rate": 6.227035175879398e-05, - "loss": 5.0863, - "step": 38054 - }, - { - "epoch": 19.846153846153847, - "grad_norm": 1.5851945877075195, - "learning_rate": 6.226934673366834e-05, - "loss": 5.3066, - "step": 38055 - }, - { - "epoch": 19.846675358539766, - "grad_norm": 1.4957586526870728, - "learning_rate": 6.226834170854272e-05, - "loss": 5.5075, - "step": 38056 - }, - { - "epoch": 19.847196870925686, - "grad_norm": 1.723888635635376, - "learning_rate": 6.226733668341709e-05, - "loss": 4.7469, - "step": 38057 - }, - { - "epoch": 19.847718383311605, - "grad_norm": 1.4808363914489746, - "learning_rate": 6.226633165829146e-05, - "loss": 5.3174, - "step": 38058 - }, - { - "epoch": 19.848239895697525, - "grad_norm": 1.4626599550247192, - "learning_rate": 6.226532663316583e-05, - "loss": 5.4731, - "step": 38059 - }, - { - "epoch": 19.84876140808344, - "grad_norm": 1.6283854246139526, - "learning_rate": 6.226432160804019e-05, - "loss": 5.6096, - "step": 38060 - }, - { - "epoch": 19.84928292046936, - "grad_norm": 1.5155583620071411, - "learning_rate": 6.226331658291457e-05, - "loss": 5.3367, - "step": 38061 - }, - { - "epoch": 19.84980443285528, - "grad_norm": 1.631174087524414, - "learning_rate": 6.226231155778895e-05, - "loss": 5.4392, - "step": 38062 - }, - { - "epoch": 19.8503259452412, - "grad_norm": 1.6434369087219238, - "learning_rate": 6.226130653266333e-05, - "loss": 5.1768, - "step": 38063 - }, - { - "epoch": 19.85084745762712, - "grad_norm": 1.601196527481079, - "learning_rate": 6.226030150753769e-05, - "loss": 5.4631, - "step": 38064 - }, - { - "epoch": 19.851368970013038, - "grad_norm": 1.4975336790084839, - "learning_rate": 6.225929648241207e-05, - "loss": 5.5221, - "step": 38065 - }, - { - "epoch": 19.851890482398957, - "grad_norm": 1.5268256664276123, - "learning_rate": 6.225829145728643e-05, - "loss": 5.2, - "step": 38066 - }, - { - "epoch": 19.852411994784877, - "grad_norm": 1.6482908725738525, - "learning_rate": 6.225728643216081e-05, - "loss": 5.017, - "step": 38067 - }, - { - "epoch": 19.852933507170796, - "grad_norm": 1.5571174621582031, - "learning_rate": 6.225628140703517e-05, - "loss": 5.2844, - "step": 38068 - }, - { - "epoch": 19.853455019556716, - "grad_norm": 1.6609224081039429, - "learning_rate": 6.225527638190955e-05, - "loss": 5.5004, - "step": 38069 - }, - { - "epoch": 19.853976531942635, - "grad_norm": 1.583567500114441, - "learning_rate": 6.225427135678392e-05, - "loss": 5.0465, - "step": 38070 - }, - { - "epoch": 19.85449804432855, - "grad_norm": 1.367842674255371, - "learning_rate": 6.22532663316583e-05, - "loss": 5.7591, - "step": 38071 - }, - { - "epoch": 19.85501955671447, - "grad_norm": 1.5644207000732422, - "learning_rate": 6.225226130653267e-05, - "loss": 5.0872, - "step": 38072 - }, - { - "epoch": 19.85554106910039, - "grad_norm": 1.4315071105957031, - "learning_rate": 6.225125628140704e-05, - "loss": 4.8854, - "step": 38073 - }, - { - "epoch": 19.85606258148631, - "grad_norm": 1.5598132610321045, - "learning_rate": 6.225025125628141e-05, - "loss": 5.5906, - "step": 38074 - }, - { - "epoch": 19.85658409387223, - "grad_norm": 1.4406980276107788, - "learning_rate": 6.224924623115578e-05, - "loss": 5.3627, - "step": 38075 - }, - { - "epoch": 19.85710560625815, - "grad_norm": 1.5026601552963257, - "learning_rate": 6.224824120603016e-05, - "loss": 4.8173, - "step": 38076 - }, - { - "epoch": 19.857627118644068, - "grad_norm": 1.4293718338012695, - "learning_rate": 6.224723618090452e-05, - "loss": 5.5717, - "step": 38077 - }, - { - "epoch": 19.858148631029987, - "grad_norm": 1.4541141986846924, - "learning_rate": 6.22462311557789e-05, - "loss": 5.1425, - "step": 38078 - }, - { - "epoch": 19.858670143415907, - "grad_norm": 1.5465751886367798, - "learning_rate": 6.224522613065326e-05, - "loss": 4.8516, - "step": 38079 - }, - { - "epoch": 19.859191655801826, - "grad_norm": 1.5505495071411133, - "learning_rate": 6.224422110552764e-05, - "loss": 5.4299, - "step": 38080 - }, - { - "epoch": 19.859713168187746, - "grad_norm": 1.5347495079040527, - "learning_rate": 6.2243216080402e-05, - "loss": 5.1183, - "step": 38081 - }, - { - "epoch": 19.860234680573665, - "grad_norm": 1.5249630212783813, - "learning_rate": 6.224221105527638e-05, - "loss": 5.0767, - "step": 38082 - }, - { - "epoch": 19.860756192959585, - "grad_norm": 1.6139954328536987, - "learning_rate": 6.224120603015076e-05, - "loss": 4.7193, - "step": 38083 - }, - { - "epoch": 19.8612777053455, - "grad_norm": 1.3960182666778564, - "learning_rate": 6.224020100502514e-05, - "loss": 5.7539, - "step": 38084 - }, - { - "epoch": 19.86179921773142, - "grad_norm": 1.5544743537902832, - "learning_rate": 6.22391959798995e-05, - "loss": 5.4848, - "step": 38085 - }, - { - "epoch": 19.86232073011734, - "grad_norm": 1.4651744365692139, - "learning_rate": 6.223819095477387e-05, - "loss": 4.9023, - "step": 38086 - }, - { - "epoch": 19.86284224250326, - "grad_norm": 1.378217339515686, - "learning_rate": 6.223718592964824e-05, - "loss": 5.5016, - "step": 38087 - }, - { - "epoch": 19.86336375488918, - "grad_norm": 1.5137689113616943, - "learning_rate": 6.223618090452261e-05, - "loss": 5.2097, - "step": 38088 - }, - { - "epoch": 19.863885267275098, - "grad_norm": 1.4685672521591187, - "learning_rate": 6.223517587939699e-05, - "loss": 5.2524, - "step": 38089 - }, - { - "epoch": 19.864406779661017, - "grad_norm": 1.4706100225448608, - "learning_rate": 6.223417085427135e-05, - "loss": 5.2208, - "step": 38090 - }, - { - "epoch": 19.864928292046937, - "grad_norm": 1.4041749238967896, - "learning_rate": 6.223316582914573e-05, - "loss": 5.3538, - "step": 38091 - }, - { - "epoch": 19.865449804432856, - "grad_norm": 1.5164860486984253, - "learning_rate": 6.223216080402011e-05, - "loss": 5.2101, - "step": 38092 - }, - { - "epoch": 19.865971316818776, - "grad_norm": 1.4187732934951782, - "learning_rate": 6.223115577889448e-05, - "loss": 5.0221, - "step": 38093 - }, - { - "epoch": 19.866492829204695, - "grad_norm": 1.5141184329986572, - "learning_rate": 6.223015075376885e-05, - "loss": 5.4959, - "step": 38094 - }, - { - "epoch": 19.86701434159061, - "grad_norm": 1.5288240909576416, - "learning_rate": 6.222914572864323e-05, - "loss": 5.3952, - "step": 38095 - }, - { - "epoch": 19.86753585397653, - "grad_norm": 1.4411494731903076, - "learning_rate": 6.222814070351759e-05, - "loss": 5.4695, - "step": 38096 - }, - { - "epoch": 19.86805736636245, - "grad_norm": 1.428125023841858, - "learning_rate": 6.222713567839197e-05, - "loss": 5.3916, - "step": 38097 - }, - { - "epoch": 19.86857887874837, - "grad_norm": 1.389845609664917, - "learning_rate": 6.222613065326633e-05, - "loss": 5.7761, - "step": 38098 - }, - { - "epoch": 19.86910039113429, - "grad_norm": 1.4753390550613403, - "learning_rate": 6.22251256281407e-05, - "loss": 5.7875, - "step": 38099 - }, - { - "epoch": 19.86962190352021, - "grad_norm": 1.570302128791809, - "learning_rate": 6.222412060301508e-05, - "loss": 5.0258, - "step": 38100 - }, - { - "epoch": 19.870143415906128, - "grad_norm": 1.5758273601531982, - "learning_rate": 6.222311557788944e-05, - "loss": 5.1677, - "step": 38101 - }, - { - "epoch": 19.870664928292047, - "grad_norm": 1.4620864391326904, - "learning_rate": 6.222211055276382e-05, - "loss": 5.4278, - "step": 38102 - }, - { - "epoch": 19.871186440677967, - "grad_norm": 1.450624942779541, - "learning_rate": 6.22211055276382e-05, - "loss": 5.7174, - "step": 38103 - }, - { - "epoch": 19.871707953063886, - "grad_norm": 1.5064587593078613, - "learning_rate": 6.222010050251257e-05, - "loss": 5.6004, - "step": 38104 - }, - { - "epoch": 19.872229465449806, - "grad_norm": 1.4028217792510986, - "learning_rate": 6.221909547738694e-05, - "loss": 5.5525, - "step": 38105 - }, - { - "epoch": 19.872750977835725, - "grad_norm": 1.4189088344573975, - "learning_rate": 6.221809045226132e-05, - "loss": 5.6656, - "step": 38106 - }, - { - "epoch": 19.87327249022164, - "grad_norm": 1.433026671409607, - "learning_rate": 6.221708542713568e-05, - "loss": 5.0296, - "step": 38107 - }, - { - "epoch": 19.87379400260756, - "grad_norm": 1.619960904121399, - "learning_rate": 6.221608040201006e-05, - "loss": 5.2135, - "step": 38108 - }, - { - "epoch": 19.87431551499348, - "grad_norm": 1.4980055093765259, - "learning_rate": 6.221507537688442e-05, - "loss": 5.4753, - "step": 38109 - }, - { - "epoch": 19.8748370273794, - "grad_norm": 1.436659812927246, - "learning_rate": 6.22140703517588e-05, - "loss": 5.0363, - "step": 38110 - }, - { - "epoch": 19.87535853976532, - "grad_norm": 1.4961206912994385, - "learning_rate": 6.221306532663316e-05, - "loss": 5.4807, - "step": 38111 - }, - { - "epoch": 19.87588005215124, - "grad_norm": 1.480179786682129, - "learning_rate": 6.221206030150754e-05, - "loss": 5.5051, - "step": 38112 - }, - { - "epoch": 19.876401564537158, - "grad_norm": 1.4929604530334473, - "learning_rate": 6.221105527638192e-05, - "loss": 5.3254, - "step": 38113 - }, - { - "epoch": 19.876923076923077, - "grad_norm": 1.5296822786331177, - "learning_rate": 6.221005025125628e-05, - "loss": 5.2485, - "step": 38114 - }, - { - "epoch": 19.877444589308997, - "grad_norm": 1.557716727256775, - "learning_rate": 6.220904522613066e-05, - "loss": 4.9773, - "step": 38115 - }, - { - "epoch": 19.877966101694916, - "grad_norm": 1.4799778461456299, - "learning_rate": 6.220804020100503e-05, - "loss": 5.4878, - "step": 38116 - }, - { - "epoch": 19.878487614080836, - "grad_norm": 1.4203803539276123, - "learning_rate": 6.22070351758794e-05, - "loss": 5.7354, - "step": 38117 - }, - { - "epoch": 19.879009126466755, - "grad_norm": 1.4516133069992065, - "learning_rate": 6.220603015075377e-05, - "loss": 5.558, - "step": 38118 - }, - { - "epoch": 19.87953063885267, - "grad_norm": 1.5652246475219727, - "learning_rate": 6.220502512562815e-05, - "loss": 4.752, - "step": 38119 - }, - { - "epoch": 19.88005215123859, - "grad_norm": 1.4018625020980835, - "learning_rate": 6.220402010050251e-05, - "loss": 5.4666, - "step": 38120 - }, - { - "epoch": 19.88057366362451, - "grad_norm": 1.5595145225524902, - "learning_rate": 6.220301507537689e-05, - "loss": 5.5208, - "step": 38121 - }, - { - "epoch": 19.88109517601043, - "grad_norm": 1.4398109912872314, - "learning_rate": 6.220201005025125e-05, - "loss": 5.6426, - "step": 38122 - }, - { - "epoch": 19.88161668839635, - "grad_norm": 1.6427068710327148, - "learning_rate": 6.220100502512563e-05, - "loss": 4.7887, - "step": 38123 - }, - { - "epoch": 19.88213820078227, - "grad_norm": 1.6221426725387573, - "learning_rate": 6.220000000000001e-05, - "loss": 4.5102, - "step": 38124 - }, - { - "epoch": 19.882659713168188, - "grad_norm": 1.498834490776062, - "learning_rate": 6.219899497487437e-05, - "loss": 5.4057, - "step": 38125 - }, - { - "epoch": 19.883181225554107, - "grad_norm": 1.4918246269226074, - "learning_rate": 6.219798994974875e-05, - "loss": 5.2287, - "step": 38126 - }, - { - "epoch": 19.883702737940027, - "grad_norm": 1.6179622411727905, - "learning_rate": 6.219698492462311e-05, - "loss": 4.8194, - "step": 38127 - }, - { - "epoch": 19.884224250325946, - "grad_norm": 1.6153532266616821, - "learning_rate": 6.219597989949749e-05, - "loss": 4.994, - "step": 38128 - }, - { - "epoch": 19.884745762711866, - "grad_norm": 1.5056079626083374, - "learning_rate": 6.219497487437186e-05, - "loss": 5.2541, - "step": 38129 - }, - { - "epoch": 19.885267275097785, - "grad_norm": 1.5173094272613525, - "learning_rate": 6.219396984924623e-05, - "loss": 4.8681, - "step": 38130 - }, - { - "epoch": 19.8857887874837, - "grad_norm": 1.4664044380187988, - "learning_rate": 6.21929648241206e-05, - "loss": 5.5091, - "step": 38131 - }, - { - "epoch": 19.88631029986962, - "grad_norm": 1.5254746675491333, - "learning_rate": 6.219195979899498e-05, - "loss": 5.1538, - "step": 38132 - }, - { - "epoch": 19.88683181225554, - "grad_norm": 1.5804431438446045, - "learning_rate": 6.219095477386935e-05, - "loss": 5.2793, - "step": 38133 - }, - { - "epoch": 19.88735332464146, - "grad_norm": 1.4151618480682373, - "learning_rate": 6.218994974874373e-05, - "loss": 5.1958, - "step": 38134 - }, - { - "epoch": 19.88787483702738, - "grad_norm": 1.5552607774734497, - "learning_rate": 6.21889447236181e-05, - "loss": 5.1933, - "step": 38135 - }, - { - "epoch": 19.8883963494133, - "grad_norm": 1.4916871786117554, - "learning_rate": 6.218793969849247e-05, - "loss": 5.5832, - "step": 38136 - }, - { - "epoch": 19.888917861799218, - "grad_norm": 1.6262755393981934, - "learning_rate": 6.218693467336684e-05, - "loss": 4.8665, - "step": 38137 - }, - { - "epoch": 19.889439374185137, - "grad_norm": 1.5290266275405884, - "learning_rate": 6.21859296482412e-05, - "loss": 5.2546, - "step": 38138 - }, - { - "epoch": 19.889960886571057, - "grad_norm": 1.4643021821975708, - "learning_rate": 6.218492462311558e-05, - "loss": 5.6464, - "step": 38139 - }, - { - "epoch": 19.890482398956976, - "grad_norm": 1.4137722253799438, - "learning_rate": 6.218391959798994e-05, - "loss": 4.9865, - "step": 38140 - }, - { - "epoch": 19.891003911342896, - "grad_norm": 1.4223109483718872, - "learning_rate": 6.218291457286432e-05, - "loss": 5.629, - "step": 38141 - }, - { - "epoch": 19.891525423728815, - "grad_norm": 1.4793031215667725, - "learning_rate": 6.218190954773869e-05, - "loss": 5.0585, - "step": 38142 - }, - { - "epoch": 19.89204693611473, - "grad_norm": 1.5550274848937988, - "learning_rate": 6.218090452261306e-05, - "loss": 4.5658, - "step": 38143 - }, - { - "epoch": 19.89256844850065, - "grad_norm": 1.5505367517471313, - "learning_rate": 6.217989949748744e-05, - "loss": 4.9584, - "step": 38144 - }, - { - "epoch": 19.89308996088657, - "grad_norm": 1.4890332221984863, - "learning_rate": 6.217889447236182e-05, - "loss": 5.3106, - "step": 38145 - }, - { - "epoch": 19.89361147327249, - "grad_norm": 1.5142959356307983, - "learning_rate": 6.217788944723618e-05, - "loss": 5.1914, - "step": 38146 - }, - { - "epoch": 19.89413298565841, - "grad_norm": 1.4256622791290283, - "learning_rate": 6.217688442211056e-05, - "loss": 5.5909, - "step": 38147 - }, - { - "epoch": 19.89465449804433, - "grad_norm": 1.4868316650390625, - "learning_rate": 6.217587939698493e-05, - "loss": 5.3283, - "step": 38148 - }, - { - "epoch": 19.895176010430248, - "grad_norm": 1.531471848487854, - "learning_rate": 6.21748743718593e-05, - "loss": 4.2213, - "step": 38149 - }, - { - "epoch": 19.895697522816167, - "grad_norm": 1.5173839330673218, - "learning_rate": 6.217386934673367e-05, - "loss": 5.342, - "step": 38150 - }, - { - "epoch": 19.896219035202087, - "grad_norm": 1.498688817024231, - "learning_rate": 6.217286432160805e-05, - "loss": 5.4315, - "step": 38151 - }, - { - "epoch": 19.896740547588006, - "grad_norm": 1.3598445653915405, - "learning_rate": 6.217185929648241e-05, - "loss": 4.921, - "step": 38152 - }, - { - "epoch": 19.897262059973926, - "grad_norm": 1.461531162261963, - "learning_rate": 6.217085427135679e-05, - "loss": 5.71, - "step": 38153 - }, - { - "epoch": 19.89778357235984, - "grad_norm": 1.5162572860717773, - "learning_rate": 6.216984924623117e-05, - "loss": 5.2894, - "step": 38154 - }, - { - "epoch": 19.89830508474576, - "grad_norm": 1.477660059928894, - "learning_rate": 6.216884422110553e-05, - "loss": 5.3669, - "step": 38155 - }, - { - "epoch": 19.89882659713168, - "grad_norm": 1.6160160303115845, - "learning_rate": 6.216783919597991e-05, - "loss": 4.8417, - "step": 38156 - }, - { - "epoch": 19.8993481095176, - "grad_norm": 1.5090303421020508, - "learning_rate": 6.216683417085427e-05, - "loss": 5.3364, - "step": 38157 - }, - { - "epoch": 19.89986962190352, - "grad_norm": 1.4430267810821533, - "learning_rate": 6.216582914572865e-05, - "loss": 5.3982, - "step": 38158 - }, - { - "epoch": 19.90039113428944, - "grad_norm": 1.49335515499115, - "learning_rate": 6.216482412060301e-05, - "loss": 5.0079, - "step": 38159 - }, - { - "epoch": 19.90091264667536, - "grad_norm": 1.4905319213867188, - "learning_rate": 6.216381909547739e-05, - "loss": 5.2631, - "step": 38160 - }, - { - "epoch": 19.901434159061278, - "grad_norm": 1.4842778444290161, - "learning_rate": 6.216281407035176e-05, - "loss": 5.4243, - "step": 38161 - }, - { - "epoch": 19.901955671447197, - "grad_norm": 1.4998399019241333, - "learning_rate": 6.216180904522613e-05, - "loss": 4.7933, - "step": 38162 - }, - { - "epoch": 19.902477183833117, - "grad_norm": 1.4168721437454224, - "learning_rate": 6.21608040201005e-05, - "loss": 5.4006, - "step": 38163 - }, - { - "epoch": 19.902998696219036, - "grad_norm": 1.7023727893829346, - "learning_rate": 6.215979899497488e-05, - "loss": 5.024, - "step": 38164 - }, - { - "epoch": 19.903520208604956, - "grad_norm": 1.5093584060668945, - "learning_rate": 6.215879396984925e-05, - "loss": 5.4139, - "step": 38165 - }, - { - "epoch": 19.904041720990875, - "grad_norm": 1.5375622510910034, - "learning_rate": 6.215778894472362e-05, - "loss": 5.381, - "step": 38166 - }, - { - "epoch": 19.90456323337679, - "grad_norm": 1.5324050188064575, - "learning_rate": 6.2156783919598e-05, - "loss": 5.2118, - "step": 38167 - }, - { - "epoch": 19.90508474576271, - "grad_norm": 1.4609447717666626, - "learning_rate": 6.215577889447236e-05, - "loss": 4.709, - "step": 38168 - }, - { - "epoch": 19.90560625814863, - "grad_norm": 1.4736759662628174, - "learning_rate": 6.215477386934674e-05, - "loss": 5.4876, - "step": 38169 - }, - { - "epoch": 19.90612777053455, - "grad_norm": 1.4363259077072144, - "learning_rate": 6.21537688442211e-05, - "loss": 4.7938, - "step": 38170 - }, - { - "epoch": 19.90664928292047, - "grad_norm": 1.446358323097229, - "learning_rate": 6.215276381909548e-05, - "loss": 5.587, - "step": 38171 - }, - { - "epoch": 19.90717079530639, - "grad_norm": 1.515824794769287, - "learning_rate": 6.215175879396985e-05, - "loss": 5.3575, - "step": 38172 - }, - { - "epoch": 19.907692307692308, - "grad_norm": 1.4200032949447632, - "learning_rate": 6.215075376884422e-05, - "loss": 4.5062, - "step": 38173 - }, - { - "epoch": 19.908213820078227, - "grad_norm": 1.3990845680236816, - "learning_rate": 6.21497487437186e-05, - "loss": 5.1961, - "step": 38174 - }, - { - "epoch": 19.908735332464147, - "grad_norm": 1.483668565750122, - "learning_rate": 6.214874371859298e-05, - "loss": 4.9355, - "step": 38175 - }, - { - "epoch": 19.909256844850066, - "grad_norm": 1.5391247272491455, - "learning_rate": 6.214773869346734e-05, - "loss": 4.6317, - "step": 38176 - }, - { - "epoch": 19.909778357235986, - "grad_norm": 1.472774624824524, - "learning_rate": 6.214673366834172e-05, - "loss": 5.5288, - "step": 38177 - }, - { - "epoch": 19.910299869621902, - "grad_norm": 1.4478139877319336, - "learning_rate": 6.214572864321609e-05, - "loss": 5.4348, - "step": 38178 - }, - { - "epoch": 19.91082138200782, - "grad_norm": 1.4396167993545532, - "learning_rate": 6.214472361809045e-05, - "loss": 5.2163, - "step": 38179 - }, - { - "epoch": 19.91134289439374, - "grad_norm": 1.4811393022537231, - "learning_rate": 6.214371859296483e-05, - "loss": 5.3467, - "step": 38180 - }, - { - "epoch": 19.91186440677966, - "grad_norm": 1.5732979774475098, - "learning_rate": 6.214271356783919e-05, - "loss": 5.3665, - "step": 38181 - }, - { - "epoch": 19.91238591916558, - "grad_norm": 1.4391684532165527, - "learning_rate": 6.214170854271357e-05, - "loss": 5.9374, - "step": 38182 - }, - { - "epoch": 19.9129074315515, - "grad_norm": 1.5206786394119263, - "learning_rate": 6.214070351758793e-05, - "loss": 5.4421, - "step": 38183 - }, - { - "epoch": 19.91342894393742, - "grad_norm": 1.4679421186447144, - "learning_rate": 6.213969849246231e-05, - "loss": 5.2188, - "step": 38184 - }, - { - "epoch": 19.913950456323338, - "grad_norm": 1.562019944190979, - "learning_rate": 6.213869346733669e-05, - "loss": 5.58, - "step": 38185 - }, - { - "epoch": 19.914471968709258, - "grad_norm": 1.4214998483657837, - "learning_rate": 6.213768844221107e-05, - "loss": 5.4855, - "step": 38186 - }, - { - "epoch": 19.914993481095177, - "grad_norm": 1.5634026527404785, - "learning_rate": 6.213668341708543e-05, - "loss": 5.0633, - "step": 38187 - }, - { - "epoch": 19.915514993481096, - "grad_norm": 1.5691609382629395, - "learning_rate": 6.213567839195981e-05, - "loss": 5.3099, - "step": 38188 - }, - { - "epoch": 19.916036505867016, - "grad_norm": 1.5828901529312134, - "learning_rate": 6.213467336683417e-05, - "loss": 5.3425, - "step": 38189 - }, - { - "epoch": 19.916558018252932, - "grad_norm": 1.4627023935317993, - "learning_rate": 6.213366834170855e-05, - "loss": 5.525, - "step": 38190 - }, - { - "epoch": 19.91707953063885, - "grad_norm": 1.436439037322998, - "learning_rate": 6.213266331658292e-05, - "loss": 5.7044, - "step": 38191 - }, - { - "epoch": 19.91760104302477, - "grad_norm": 1.528156042098999, - "learning_rate": 6.213165829145728e-05, - "loss": 5.4609, - "step": 38192 - }, - { - "epoch": 19.91812255541069, - "grad_norm": 1.3592088222503662, - "learning_rate": 6.213065326633166e-05, - "loss": 5.565, - "step": 38193 - }, - { - "epoch": 19.91864406779661, - "grad_norm": 1.4342150688171387, - "learning_rate": 6.212964824120604e-05, - "loss": 5.4675, - "step": 38194 - }, - { - "epoch": 19.91916558018253, - "grad_norm": 1.3914097547531128, - "learning_rate": 6.212864321608041e-05, - "loss": 5.8754, - "step": 38195 - }, - { - "epoch": 19.91968709256845, - "grad_norm": 1.3995081186294556, - "learning_rate": 6.212763819095478e-05, - "loss": 5.0407, - "step": 38196 - }, - { - "epoch": 19.920208604954368, - "grad_norm": 1.4648427963256836, - "learning_rate": 6.212663316582916e-05, - "loss": 5.3271, - "step": 38197 - }, - { - "epoch": 19.920730117340288, - "grad_norm": 1.386121392250061, - "learning_rate": 6.212562814070352e-05, - "loss": 5.6145, - "step": 38198 - }, - { - "epoch": 19.921251629726207, - "grad_norm": 1.4974291324615479, - "learning_rate": 6.21246231155779e-05, - "loss": 4.7265, - "step": 38199 - }, - { - "epoch": 19.921773142112126, - "grad_norm": 1.4993605613708496, - "learning_rate": 6.212361809045226e-05, - "loss": 5.1517, - "step": 38200 - }, - { - "epoch": 19.922294654498046, - "grad_norm": 1.7060974836349487, - "learning_rate": 6.212261306532664e-05, - "loss": 5.1871, - "step": 38201 - }, - { - "epoch": 19.922816166883962, - "grad_norm": 1.5000334978103638, - "learning_rate": 6.2121608040201e-05, - "loss": 5.4437, - "step": 38202 - }, - { - "epoch": 19.92333767926988, - "grad_norm": 1.5411897897720337, - "learning_rate": 6.212060301507538e-05, - "loss": 5.4723, - "step": 38203 - }, - { - "epoch": 19.9238591916558, - "grad_norm": 1.485679030418396, - "learning_rate": 6.211959798994975e-05, - "loss": 5.0698, - "step": 38204 - }, - { - "epoch": 19.92438070404172, - "grad_norm": 1.4133687019348145, - "learning_rate": 6.211859296482412e-05, - "loss": 4.3637, - "step": 38205 - }, - { - "epoch": 19.92490221642764, - "grad_norm": 1.444465160369873, - "learning_rate": 6.21175879396985e-05, - "loss": 4.9207, - "step": 38206 - }, - { - "epoch": 19.92542372881356, - "grad_norm": 1.4572407007217407, - "learning_rate": 6.211658291457287e-05, - "loss": 5.3377, - "step": 38207 - }, - { - "epoch": 19.92594524119948, - "grad_norm": 1.569268822669983, - "learning_rate": 6.211557788944724e-05, - "loss": 5.0791, - "step": 38208 - }, - { - "epoch": 19.926466753585398, - "grad_norm": 1.512372612953186, - "learning_rate": 6.211457286432161e-05, - "loss": 5.4726, - "step": 38209 - }, - { - "epoch": 19.926988265971318, - "grad_norm": 1.5261820554733276, - "learning_rate": 6.211356783919599e-05, - "loss": 5.1882, - "step": 38210 - }, - { - "epoch": 19.927509778357237, - "grad_norm": 1.5607030391693115, - "learning_rate": 6.211256281407035e-05, - "loss": 5.3144, - "step": 38211 - }, - { - "epoch": 19.928031290743156, - "grad_norm": 1.6497431993484497, - "learning_rate": 6.211155778894473e-05, - "loss": 5.2583, - "step": 38212 - }, - { - "epoch": 19.928552803129076, - "grad_norm": 1.500420331954956, - "learning_rate": 6.211055276381909e-05, - "loss": 5.3685, - "step": 38213 - }, - { - "epoch": 19.929074315514992, - "grad_norm": 1.4268224239349365, - "learning_rate": 6.210954773869347e-05, - "loss": 5.5477, - "step": 38214 - }, - { - "epoch": 19.92959582790091, - "grad_norm": 1.5568667650222778, - "learning_rate": 6.210854271356785e-05, - "loss": 4.8751, - "step": 38215 - }, - { - "epoch": 19.93011734028683, - "grad_norm": 1.579789400100708, - "learning_rate": 6.210753768844223e-05, - "loss": 5.1579, - "step": 38216 - }, - { - "epoch": 19.93063885267275, - "grad_norm": 1.480023741722107, - "learning_rate": 6.210653266331659e-05, - "loss": 5.3738, - "step": 38217 - }, - { - "epoch": 19.93116036505867, - "grad_norm": 1.5211929082870483, - "learning_rate": 6.210552763819095e-05, - "loss": 5.1022, - "step": 38218 - }, - { - "epoch": 19.93168187744459, - "grad_norm": 1.4928573369979858, - "learning_rate": 6.210452261306533e-05, - "loss": 5.267, - "step": 38219 - }, - { - "epoch": 19.93220338983051, - "grad_norm": 1.6059174537658691, - "learning_rate": 6.21035175879397e-05, - "loss": 4.565, - "step": 38220 - }, - { - "epoch": 19.932724902216428, - "grad_norm": 1.387036919593811, - "learning_rate": 6.210251256281407e-05, - "loss": 5.0912, - "step": 38221 - }, - { - "epoch": 19.933246414602348, - "grad_norm": 1.4522606134414673, - "learning_rate": 6.210150753768844e-05, - "loss": 5.5052, - "step": 38222 - }, - { - "epoch": 19.933767926988267, - "grad_norm": 1.3652220964431763, - "learning_rate": 6.210050251256282e-05, - "loss": 5.4326, - "step": 38223 - }, - { - "epoch": 19.934289439374187, - "grad_norm": 1.460880994796753, - "learning_rate": 6.209949748743718e-05, - "loss": 5.6017, - "step": 38224 - }, - { - "epoch": 19.934810951760106, - "grad_norm": 1.4801867008209229, - "learning_rate": 6.209849246231156e-05, - "loss": 5.3201, - "step": 38225 - }, - { - "epoch": 19.935332464146022, - "grad_norm": 1.4583388566970825, - "learning_rate": 6.209748743718594e-05, - "loss": 5.4775, - "step": 38226 - }, - { - "epoch": 19.93585397653194, - "grad_norm": 1.499648094177246, - "learning_rate": 6.209648241206031e-05, - "loss": 5.2127, - "step": 38227 - }, - { - "epoch": 19.93637548891786, - "grad_norm": 1.5414307117462158, - "learning_rate": 6.209547738693468e-05, - "loss": 5.5175, - "step": 38228 - }, - { - "epoch": 19.93689700130378, - "grad_norm": 1.5581543445587158, - "learning_rate": 6.209447236180906e-05, - "loss": 5.4307, - "step": 38229 - }, - { - "epoch": 19.9374185136897, - "grad_norm": 1.5137557983398438, - "learning_rate": 6.209346733668342e-05, - "loss": 5.1771, - "step": 38230 - }, - { - "epoch": 19.93794002607562, - "grad_norm": 1.5382746458053589, - "learning_rate": 6.209246231155778e-05, - "loss": 4.8634, - "step": 38231 - }, - { - "epoch": 19.93846153846154, - "grad_norm": 1.453658103942871, - "learning_rate": 6.209145728643216e-05, - "loss": 5.041, - "step": 38232 - }, - { - "epoch": 19.938983050847458, - "grad_norm": 1.5428329706192017, - "learning_rate": 6.209045226130653e-05, - "loss": 5.0689, - "step": 38233 - }, - { - "epoch": 19.939504563233378, - "grad_norm": 1.6713454723358154, - "learning_rate": 6.20894472361809e-05, - "loss": 4.6827, - "step": 38234 - }, - { - "epoch": 19.940026075619297, - "grad_norm": 1.572962760925293, - "learning_rate": 6.208844221105527e-05, - "loss": 5.4348, - "step": 38235 - }, - { - "epoch": 19.940547588005217, - "grad_norm": 1.4381239414215088, - "learning_rate": 6.208743718592965e-05, - "loss": 5.3034, - "step": 38236 - }, - { - "epoch": 19.941069100391136, - "grad_norm": 1.5577903985977173, - "learning_rate": 6.208643216080402e-05, - "loss": 5.0238, - "step": 38237 - }, - { - "epoch": 19.941590612777052, - "grad_norm": 1.5006067752838135, - "learning_rate": 6.20854271356784e-05, - "loss": 5.3092, - "step": 38238 - }, - { - "epoch": 19.94211212516297, - "grad_norm": 1.4910619258880615, - "learning_rate": 6.208442211055277e-05, - "loss": 5.5342, - "step": 38239 - }, - { - "epoch": 19.94263363754889, - "grad_norm": 1.4897197484970093, - "learning_rate": 6.208341708542714e-05, - "loss": 5.4778, - "step": 38240 - }, - { - "epoch": 19.94315514993481, - "grad_norm": 1.578852891921997, - "learning_rate": 6.208241206030151e-05, - "loss": 5.2282, - "step": 38241 - }, - { - "epoch": 19.94367666232073, - "grad_norm": 1.5442230701446533, - "learning_rate": 6.208140703517589e-05, - "loss": 5.0632, - "step": 38242 - }, - { - "epoch": 19.94419817470665, - "grad_norm": 1.5175142288208008, - "learning_rate": 6.208040201005025e-05, - "loss": 5.895, - "step": 38243 - }, - { - "epoch": 19.94471968709257, - "grad_norm": 1.5281156301498413, - "learning_rate": 6.207939698492463e-05, - "loss": 5.3891, - "step": 38244 - }, - { - "epoch": 19.945241199478488, - "grad_norm": 1.616683840751648, - "learning_rate": 6.207839195979899e-05, - "loss": 5.2726, - "step": 38245 - }, - { - "epoch": 19.945762711864408, - "grad_norm": 1.6550337076187134, - "learning_rate": 6.207738693467337e-05, - "loss": 4.6522, - "step": 38246 - }, - { - "epoch": 19.946284224250327, - "grad_norm": 1.5008409023284912, - "learning_rate": 6.207638190954775e-05, - "loss": 5.3102, - "step": 38247 - }, - { - "epoch": 19.946805736636247, - "grad_norm": 1.6091493368148804, - "learning_rate": 6.207537688442211e-05, - "loss": 5.0733, - "step": 38248 - }, - { - "epoch": 19.947327249022166, - "grad_norm": 1.4614239931106567, - "learning_rate": 6.207437185929649e-05, - "loss": 5.5875, - "step": 38249 - }, - { - "epoch": 19.947848761408082, - "grad_norm": 1.460827350616455, - "learning_rate": 6.207336683417085e-05, - "loss": 5.675, - "step": 38250 - }, - { - "epoch": 19.948370273794, - "grad_norm": 1.5288445949554443, - "learning_rate": 6.207236180904523e-05, - "loss": 5.2832, - "step": 38251 - }, - { - "epoch": 19.94889178617992, - "grad_norm": 1.5294309854507446, - "learning_rate": 6.20713567839196e-05, - "loss": 4.9536, - "step": 38252 - }, - { - "epoch": 19.94941329856584, - "grad_norm": 1.5054280757904053, - "learning_rate": 6.207035175879397e-05, - "loss": 5.4047, - "step": 38253 - }, - { - "epoch": 19.94993481095176, - "grad_norm": 1.6656136512756348, - "learning_rate": 6.206934673366834e-05, - "loss": 5.0735, - "step": 38254 - }, - { - "epoch": 19.95045632333768, - "grad_norm": 1.4616235494613647, - "learning_rate": 6.206834170854272e-05, - "loss": 5.4085, - "step": 38255 - }, - { - "epoch": 19.9509778357236, - "grad_norm": 1.5389493703842163, - "learning_rate": 6.206733668341708e-05, - "loss": 4.961, - "step": 38256 - }, - { - "epoch": 19.951499348109518, - "grad_norm": 1.4426450729370117, - "learning_rate": 6.206633165829146e-05, - "loss": 5.5164, - "step": 38257 - }, - { - "epoch": 19.952020860495438, - "grad_norm": 1.469177007675171, - "learning_rate": 6.206532663316584e-05, - "loss": 5.3368, - "step": 38258 - }, - { - "epoch": 19.952542372881357, - "grad_norm": 1.5553861856460571, - "learning_rate": 6.20643216080402e-05, - "loss": 5.289, - "step": 38259 - }, - { - "epoch": 19.953063885267277, - "grad_norm": 1.5059492588043213, - "learning_rate": 6.206331658291458e-05, - "loss": 5.4239, - "step": 38260 - }, - { - "epoch": 19.953585397653193, - "grad_norm": 1.5416524410247803, - "learning_rate": 6.206231155778894e-05, - "loss": 5.112, - "step": 38261 - }, - { - "epoch": 19.954106910039112, - "grad_norm": 1.53389573097229, - "learning_rate": 6.206130653266332e-05, - "loss": 5.1772, - "step": 38262 - }, - { - "epoch": 19.95462842242503, - "grad_norm": 1.5212841033935547, - "learning_rate": 6.206030150753769e-05, - "loss": 5.3659, - "step": 38263 - }, - { - "epoch": 19.95514993481095, - "grad_norm": 1.7363357543945312, - "learning_rate": 6.205929648241206e-05, - "loss": 4.8177, - "step": 38264 - }, - { - "epoch": 19.95567144719687, - "grad_norm": 1.4889999628067017, - "learning_rate": 6.205829145728643e-05, - "loss": 5.366, - "step": 38265 - }, - { - "epoch": 19.95619295958279, - "grad_norm": 1.517215371131897, - "learning_rate": 6.20572864321608e-05, - "loss": 5.418, - "step": 38266 - }, - { - "epoch": 19.95671447196871, - "grad_norm": 1.475551724433899, - "learning_rate": 6.205628140703518e-05, - "loss": 5.3996, - "step": 38267 - }, - { - "epoch": 19.95723598435463, - "grad_norm": 1.5664246082305908, - "learning_rate": 6.205527638190956e-05, - "loss": 5.4523, - "step": 38268 - }, - { - "epoch": 19.957757496740548, - "grad_norm": 1.5209778547286987, - "learning_rate": 6.205427135678393e-05, - "loss": 5.5006, - "step": 38269 - }, - { - "epoch": 19.958279009126468, - "grad_norm": 1.5092730522155762, - "learning_rate": 6.20532663316583e-05, - "loss": 4.9878, - "step": 38270 - }, - { - "epoch": 19.958800521512387, - "grad_norm": 1.461768388748169, - "learning_rate": 6.205226130653267e-05, - "loss": 5.5595, - "step": 38271 - }, - { - "epoch": 19.959322033898307, - "grad_norm": 1.7307533025741577, - "learning_rate": 6.205125628140703e-05, - "loss": 4.8672, - "step": 38272 - }, - { - "epoch": 19.959843546284226, - "grad_norm": 1.5503100156784058, - "learning_rate": 6.205025125628141e-05, - "loss": 5.2294, - "step": 38273 - }, - { - "epoch": 19.960365058670142, - "grad_norm": 1.490294337272644, - "learning_rate": 6.204924623115577e-05, - "loss": 4.8679, - "step": 38274 - }, - { - "epoch": 19.96088657105606, - "grad_norm": 1.4753285646438599, - "learning_rate": 6.204824120603015e-05, - "loss": 5.3733, - "step": 38275 - }, - { - "epoch": 19.96140808344198, - "grad_norm": 1.6775805950164795, - "learning_rate": 6.204723618090452e-05, - "loss": 5.3552, - "step": 38276 - }, - { - "epoch": 19.9619295958279, - "grad_norm": 1.5332751274108887, - "learning_rate": 6.20462311557789e-05, - "loss": 5.5506, - "step": 38277 - }, - { - "epoch": 19.96245110821382, - "grad_norm": 1.5473403930664062, - "learning_rate": 6.204522613065327e-05, - "loss": 5.1156, - "step": 38278 - }, - { - "epoch": 19.96297262059974, - "grad_norm": 1.5635771751403809, - "learning_rate": 6.204422110552765e-05, - "loss": 4.7657, - "step": 38279 - }, - { - "epoch": 19.96349413298566, - "grad_norm": 1.455479383468628, - "learning_rate": 6.204321608040201e-05, - "loss": 5.2005, - "step": 38280 - }, - { - "epoch": 19.96401564537158, - "grad_norm": 1.551373839378357, - "learning_rate": 6.204221105527639e-05, - "loss": 5.3, - "step": 38281 - }, - { - "epoch": 19.964537157757498, - "grad_norm": 1.428611159324646, - "learning_rate": 6.204120603015076e-05, - "loss": 5.1217, - "step": 38282 - }, - { - "epoch": 19.965058670143417, - "grad_norm": 1.4961553812026978, - "learning_rate": 6.204020100502513e-05, - "loss": 5.3162, - "step": 38283 - }, - { - "epoch": 19.965580182529337, - "grad_norm": 1.5838223695755005, - "learning_rate": 6.20391959798995e-05, - "loss": 5.0899, - "step": 38284 - }, - { - "epoch": 19.966101694915253, - "grad_norm": 1.5647261142730713, - "learning_rate": 6.203819095477386e-05, - "loss": 5.204, - "step": 38285 - }, - { - "epoch": 19.966623207301172, - "grad_norm": 1.4564136266708374, - "learning_rate": 6.203718592964824e-05, - "loss": 5.5877, - "step": 38286 - }, - { - "epoch": 19.96714471968709, - "grad_norm": 1.4654467105865479, - "learning_rate": 6.203618090452262e-05, - "loss": 5.4059, - "step": 38287 - }, - { - "epoch": 19.96766623207301, - "grad_norm": 1.4822232723236084, - "learning_rate": 6.2035175879397e-05, - "loss": 5.4883, - "step": 38288 - }, - { - "epoch": 19.96818774445893, - "grad_norm": 1.4983853101730347, - "learning_rate": 6.203417085427136e-05, - "loss": 5.0726, - "step": 38289 - }, - { - "epoch": 19.96870925684485, - "grad_norm": 1.5626475811004639, - "learning_rate": 6.203316582914574e-05, - "loss": 5.3834, - "step": 38290 - }, - { - "epoch": 19.96923076923077, - "grad_norm": 1.4274855852127075, - "learning_rate": 6.20321608040201e-05, - "loss": 5.6162, - "step": 38291 - }, - { - "epoch": 19.96975228161669, - "grad_norm": 1.5643380880355835, - "learning_rate": 6.203115577889448e-05, - "loss": 5.6857, - "step": 38292 - }, - { - "epoch": 19.97027379400261, - "grad_norm": 1.492038607597351, - "learning_rate": 6.203015075376884e-05, - "loss": 5.3568, - "step": 38293 - }, - { - "epoch": 19.970795306388528, - "grad_norm": 1.524474859237671, - "learning_rate": 6.202914572864322e-05, - "loss": 5.5221, - "step": 38294 - }, - { - "epoch": 19.971316818774447, - "grad_norm": 1.5520282983779907, - "learning_rate": 6.202814070351759e-05, - "loss": 4.8685, - "step": 38295 - }, - { - "epoch": 19.971838331160367, - "grad_norm": 1.4594491720199585, - "learning_rate": 6.202713567839196e-05, - "loss": 5.5657, - "step": 38296 - }, - { - "epoch": 19.972359843546283, - "grad_norm": 1.428917646408081, - "learning_rate": 6.202613065326633e-05, - "loss": 5.7834, - "step": 38297 - }, - { - "epoch": 19.972881355932202, - "grad_norm": 1.534371018409729, - "learning_rate": 6.20251256281407e-05, - "loss": 4.8774, - "step": 38298 - }, - { - "epoch": 19.97340286831812, - "grad_norm": 1.5672705173492432, - "learning_rate": 6.202412060301508e-05, - "loss": 5.3706, - "step": 38299 - }, - { - "epoch": 19.97392438070404, - "grad_norm": 1.5536917448043823, - "learning_rate": 6.202311557788945e-05, - "loss": 4.9969, - "step": 38300 - }, - { - "epoch": 19.97444589308996, - "grad_norm": 1.5125210285186768, - "learning_rate": 6.202211055276383e-05, - "loss": 5.4372, - "step": 38301 - }, - { - "epoch": 19.97496740547588, - "grad_norm": 1.4361066818237305, - "learning_rate": 6.202110552763819e-05, - "loss": 5.0774, - "step": 38302 - }, - { - "epoch": 19.9754889178618, - "grad_norm": 1.5917662382125854, - "learning_rate": 6.202010050251257e-05, - "loss": 4.5265, - "step": 38303 - }, - { - "epoch": 19.97601043024772, - "grad_norm": 1.5558242797851562, - "learning_rate": 6.201909547738693e-05, - "loss": 5.1149, - "step": 38304 - }, - { - "epoch": 19.97653194263364, - "grad_norm": 1.4747296571731567, - "learning_rate": 6.201809045226131e-05, - "loss": 5.5078, - "step": 38305 - }, - { - "epoch": 19.977053455019558, - "grad_norm": 1.590537667274475, - "learning_rate": 6.201708542713567e-05, - "loss": 4.7747, - "step": 38306 - }, - { - "epoch": 19.977574967405477, - "grad_norm": 1.449483871459961, - "learning_rate": 6.201608040201005e-05, - "loss": 5.2204, - "step": 38307 - }, - { - "epoch": 19.978096479791397, - "grad_norm": 1.3806482553482056, - "learning_rate": 6.201507537688443e-05, - "loss": 5.6059, - "step": 38308 - }, - { - "epoch": 19.978617992177313, - "grad_norm": 1.481704592704773, - "learning_rate": 6.201407035175881e-05, - "loss": 5.6264, - "step": 38309 - }, - { - "epoch": 19.979139504563232, - "grad_norm": Infinity, - "learning_rate": 6.201407035175881e-05, - "loss": 5.3032, - "step": 38310 - }, - { - "epoch": 19.97966101694915, - "grad_norm": 1.51116943359375, - "learning_rate": 6.201306532663317e-05, - "loss": 5.2179, - "step": 38311 - }, - { - "epoch": 19.98018252933507, - "grad_norm": 1.5021331310272217, - "learning_rate": 6.201206030150754e-05, - "loss": 5.6067, - "step": 38312 - }, - { - "epoch": 19.98070404172099, - "grad_norm": 1.4031672477722168, - "learning_rate": 6.201105527638191e-05, - "loss": 5.4398, - "step": 38313 - }, - { - "epoch": 19.98122555410691, - "grad_norm": 1.4915101528167725, - "learning_rate": 6.201005025125628e-05, - "loss": 5.519, - "step": 38314 - }, - { - "epoch": 19.98174706649283, - "grad_norm": 1.5065876245498657, - "learning_rate": 6.200904522613066e-05, - "loss": 5.0572, - "step": 38315 - }, - { - "epoch": 19.98226857887875, - "grad_norm": 1.4821653366088867, - "learning_rate": 6.200804020100502e-05, - "loss": 5.5387, - "step": 38316 - }, - { - "epoch": 19.98279009126467, - "grad_norm": 1.4739785194396973, - "learning_rate": 6.20070351758794e-05, - "loss": 5.4162, - "step": 38317 - }, - { - "epoch": 19.983311603650588, - "grad_norm": 1.4773660898208618, - "learning_rate": 6.200603015075376e-05, - "loss": 5.5508, - "step": 38318 - }, - { - "epoch": 19.983833116036507, - "grad_norm": 1.6143766641616821, - "learning_rate": 6.200502512562814e-05, - "loss": 5.2878, - "step": 38319 - }, - { - "epoch": 19.984354628422427, - "grad_norm": 1.536381483078003, - "learning_rate": 6.200402010050252e-05, - "loss": 4.9506, - "step": 38320 - }, - { - "epoch": 19.984876140808343, - "grad_norm": 1.4987151622772217, - "learning_rate": 6.20030150753769e-05, - "loss": 5.298, - "step": 38321 - }, - { - "epoch": 19.985397653194262, - "grad_norm": 1.5543229579925537, - "learning_rate": 6.200201005025126e-05, - "loss": 5.2184, - "step": 38322 - }, - { - "epoch": 19.98591916558018, - "grad_norm": 1.497510313987732, - "learning_rate": 6.200100502512564e-05, - "loss": 5.3506, - "step": 38323 - }, - { - "epoch": 19.9864406779661, - "grad_norm": 1.4817461967468262, - "learning_rate": 6.2e-05, - "loss": 5.1208, - "step": 38324 - }, - { - "epoch": 19.98696219035202, - "grad_norm": 1.4718077182769775, - "learning_rate": 6.199899497487438e-05, - "loss": 5.3013, - "step": 38325 - }, - { - "epoch": 19.98748370273794, - "grad_norm": 1.6227890253067017, - "learning_rate": 6.199798994974874e-05, - "loss": 4.8002, - "step": 38326 - }, - { - "epoch": 19.98800521512386, - "grad_norm": 1.4617931842803955, - "learning_rate": 6.199698492462311e-05, - "loss": 5.5222, - "step": 38327 - }, - { - "epoch": 19.98852672750978, - "grad_norm": 1.4979294538497925, - "learning_rate": 6.199597989949749e-05, - "loss": 5.3152, - "step": 38328 - }, - { - "epoch": 19.9890482398957, - "grad_norm": 1.4967265129089355, - "learning_rate": 6.199497487437186e-05, - "loss": 5.0194, - "step": 38329 - }, - { - "epoch": 19.989569752281618, - "grad_norm": 1.577396035194397, - "learning_rate": 6.199396984924624e-05, - "loss": 5.285, - "step": 38330 - }, - { - "epoch": 19.990091264667537, - "grad_norm": 1.4141192436218262, - "learning_rate": 6.199296482412061e-05, - "loss": 5.4598, - "step": 38331 - }, - { - "epoch": 19.990612777053457, - "grad_norm": 1.7098125219345093, - "learning_rate": 6.199195979899498e-05, - "loss": 4.5923, - "step": 38332 - }, - { - "epoch": 19.991134289439373, - "grad_norm": 1.480564832687378, - "learning_rate": 6.199095477386935e-05, - "loss": 5.5478, - "step": 38333 - }, - { - "epoch": 19.991655801825292, - "grad_norm": 1.4652355909347534, - "learning_rate": 6.198994974874373e-05, - "loss": 5.4401, - "step": 38334 - }, - { - "epoch": 19.99217731421121, - "grad_norm": 1.4865045547485352, - "learning_rate": 6.198894472361809e-05, - "loss": 5.1785, - "step": 38335 - }, - { - "epoch": 19.99269882659713, - "grad_norm": 1.6490668058395386, - "learning_rate": 6.198793969849247e-05, - "loss": 5.1903, - "step": 38336 - }, - { - "epoch": 19.99322033898305, - "grad_norm": 1.4895744323730469, - "learning_rate": 6.198693467336683e-05, - "loss": 5.154, - "step": 38337 - }, - { - "epoch": 19.99374185136897, - "grad_norm": 1.494354248046875, - "learning_rate": 6.198592964824121e-05, - "loss": 5.6342, - "step": 38338 - }, - { - "epoch": 19.99426336375489, - "grad_norm": 1.5652570724487305, - "learning_rate": 6.198492462311558e-05, - "loss": 4.8167, - "step": 38339 - }, - { - "epoch": 19.99478487614081, - "grad_norm": 1.5241749286651611, - "learning_rate": 6.198391959798995e-05, - "loss": 5.0666, - "step": 38340 - }, - { - "epoch": 19.99530638852673, - "grad_norm": 1.5899394750595093, - "learning_rate": 6.198291457286433e-05, - "loss": 5.7087, - "step": 38341 - }, - { - "epoch": 19.995827900912648, - "grad_norm": 1.6270229816436768, - "learning_rate": 6.19819095477387e-05, - "loss": 5.4033, - "step": 38342 - }, - { - "epoch": 19.996349413298567, - "grad_norm": 1.4796744585037231, - "learning_rate": 6.198090452261307e-05, - "loss": 4.8208, - "step": 38343 - }, - { - "epoch": 19.996870925684483, - "grad_norm": 1.5884077548980713, - "learning_rate": 6.197989949748744e-05, - "loss": 4.8114, - "step": 38344 - }, - { - "epoch": 19.997392438070403, - "grad_norm": 1.5920491218566895, - "learning_rate": 6.197889447236182e-05, - "loss": 5.3038, - "step": 38345 - }, - { - "epoch": 19.997913950456322, - "grad_norm": 1.4736557006835938, - "learning_rate": 6.197788944723618e-05, - "loss": 5.5012, - "step": 38346 - }, - { - "epoch": 19.99843546284224, - "grad_norm": 1.6492938995361328, - "learning_rate": 6.197688442211056e-05, - "loss": 4.8642, - "step": 38347 - }, - { - "epoch": 19.99895697522816, - "grad_norm": 1.4494704008102417, - "learning_rate": 6.197587939698492e-05, - "loss": 5.2915, - "step": 38348 - }, - { - "epoch": 19.99947848761408, - "grad_norm": 1.5519440174102783, - "learning_rate": 6.19748743718593e-05, - "loss": 4.7947, - "step": 38349 - }, - { - "epoch": 20.0, - "grad_norm": 1.7670592069625854, - "learning_rate": 6.197386934673368e-05, - "loss": 5.0612, - "step": 38350 - }, - { - "epoch": 20.00052151238592, - "grad_norm": 1.4991551637649536, - "learning_rate": 6.197286432160805e-05, - "loss": 5.0481, - "step": 38351 - }, - { - "epoch": 20.00104302477184, - "grad_norm": 1.5075438022613525, - "learning_rate": 6.197185929648242e-05, - "loss": 5.2745, - "step": 38352 - }, - { - "epoch": 20.00156453715776, - "grad_norm": 1.5979329347610474, - "learning_rate": 6.197085427135678e-05, - "loss": 5.4155, - "step": 38353 - }, - { - "epoch": 20.002086049543678, - "grad_norm": 1.5887905359268188, - "learning_rate": 6.196984924623116e-05, - "loss": 4.9441, - "step": 38354 - }, - { - "epoch": 20.002607561929597, - "grad_norm": 1.5453920364379883, - "learning_rate": 6.196884422110553e-05, - "loss": 5.5281, - "step": 38355 - }, - { - "epoch": 20.003129074315513, - "grad_norm": 1.5092674493789673, - "learning_rate": 6.19678391959799e-05, - "loss": 5.3524, - "step": 38356 - }, - { - "epoch": 20.003650586701433, - "grad_norm": 1.5544883012771606, - "learning_rate": 6.196683417085427e-05, - "loss": 5.4691, - "step": 38357 - }, - { - "epoch": 20.004172099087352, - "grad_norm": 1.4410196542739868, - "learning_rate": 6.196582914572865e-05, - "loss": 4.9703, - "step": 38358 - }, - { - "epoch": 20.00469361147327, - "grad_norm": 1.503227949142456, - "learning_rate": 6.196482412060301e-05, - "loss": 5.4416, - "step": 38359 - }, - { - "epoch": 20.00521512385919, - "grad_norm": 1.4966946840286255, - "learning_rate": 6.196381909547739e-05, - "loss": 4.7147, - "step": 38360 - }, - { - "epoch": 20.00573663624511, - "grad_norm": 1.542250156402588, - "learning_rate": 6.196281407035177e-05, - "loss": 5.1907, - "step": 38361 - }, - { - "epoch": 20.00625814863103, - "grad_norm": 1.478912591934204, - "learning_rate": 6.196180904522614e-05, - "loss": 5.469, - "step": 38362 - }, - { - "epoch": 20.00677966101695, - "grad_norm": 1.4546810388565063, - "learning_rate": 6.196080402010051e-05, - "loss": 4.7283, - "step": 38363 - }, - { - "epoch": 20.00730117340287, - "grad_norm": 1.4002141952514648, - "learning_rate": 6.195979899497489e-05, - "loss": 4.9661, - "step": 38364 - }, - { - "epoch": 20.00782268578879, - "grad_norm": 1.4379291534423828, - "learning_rate": 6.195879396984925e-05, - "loss": 5.4524, - "step": 38365 - }, - { - "epoch": 20.008344198174708, - "grad_norm": 1.453396201133728, - "learning_rate": 6.195778894472361e-05, - "loss": 5.2206, - "step": 38366 - }, - { - "epoch": 20.008865710560627, - "grad_norm": 1.5911533832550049, - "learning_rate": 6.195678391959799e-05, - "loss": 5.0024, - "step": 38367 - }, - { - "epoch": 20.009387222946543, - "grad_norm": 1.5506181716918945, - "learning_rate": 6.195577889447236e-05, - "loss": 4.8984, - "step": 38368 - }, - { - "epoch": 20.009908735332463, - "grad_norm": 1.531511902809143, - "learning_rate": 6.195477386934673e-05, - "loss": 5.112, - "step": 38369 - }, - { - "epoch": 20.010430247718382, - "grad_norm": 1.4704431295394897, - "learning_rate": 6.195376884422111e-05, - "loss": 5.2469, - "step": 38370 - }, - { - "epoch": 20.0109517601043, - "grad_norm": 1.482861876487732, - "learning_rate": 6.195276381909549e-05, - "loss": 5.2703, - "step": 38371 - }, - { - "epoch": 20.01147327249022, - "grad_norm": 1.3952692747116089, - "learning_rate": 6.195175879396985e-05, - "loss": 5.6045, - "step": 38372 - }, - { - "epoch": 20.01199478487614, - "grad_norm": 1.463992714881897, - "learning_rate": 6.195075376884423e-05, - "loss": 5.2716, - "step": 38373 - }, - { - "epoch": 20.01251629726206, - "grad_norm": 1.5541954040527344, - "learning_rate": 6.19497487437186e-05, - "loss": 5.1242, - "step": 38374 - }, - { - "epoch": 20.01303780964798, - "grad_norm": 1.5170527696609497, - "learning_rate": 6.194874371859297e-05, - "loss": 5.2831, - "step": 38375 - }, - { - "epoch": 20.0135593220339, - "grad_norm": 1.5074068307876587, - "learning_rate": 6.194773869346734e-05, - "loss": 4.9258, - "step": 38376 - }, - { - "epoch": 20.01408083441982, - "grad_norm": 1.4942498207092285, - "learning_rate": 6.194673366834172e-05, - "loss": 5.442, - "step": 38377 - }, - { - "epoch": 20.014602346805738, - "grad_norm": 1.554732084274292, - "learning_rate": 6.194572864321608e-05, - "loss": 4.9194, - "step": 38378 - }, - { - "epoch": 20.015123859191657, - "grad_norm": 1.6314871311187744, - "learning_rate": 6.194472361809044e-05, - "loss": 5.437, - "step": 38379 - }, - { - "epoch": 20.015645371577573, - "grad_norm": 1.5272865295410156, - "learning_rate": 6.194371859296482e-05, - "loss": 4.9432, - "step": 38380 - }, - { - "epoch": 20.016166883963493, - "grad_norm": 1.544514536857605, - "learning_rate": 6.19427135678392e-05, - "loss": 5.093, - "step": 38381 - }, - { - "epoch": 20.016688396349412, - "grad_norm": 1.5155658721923828, - "learning_rate": 6.194170854271358e-05, - "loss": 5.0238, - "step": 38382 - }, - { - "epoch": 20.01720990873533, - "grad_norm": 1.590888261795044, - "learning_rate": 6.194070351758794e-05, - "loss": 5.2328, - "step": 38383 - }, - { - "epoch": 20.01773142112125, - "grad_norm": 1.5151185989379883, - "learning_rate": 6.193969849246232e-05, - "loss": 5.07, - "step": 38384 - }, - { - "epoch": 20.01825293350717, - "grad_norm": 1.49502694606781, - "learning_rate": 6.193869346733668e-05, - "loss": 5.1957, - "step": 38385 - }, - { - "epoch": 20.01877444589309, - "grad_norm": 1.5676205158233643, - "learning_rate": 6.193768844221106e-05, - "loss": 5.5284, - "step": 38386 - }, - { - "epoch": 20.01929595827901, - "grad_norm": 1.3869047164916992, - "learning_rate": 6.193668341708543e-05, - "loss": 5.3116, - "step": 38387 - }, - { - "epoch": 20.01981747066493, - "grad_norm": 1.587816834449768, - "learning_rate": 6.19356783919598e-05, - "loss": 5.137, - "step": 38388 - }, - { - "epoch": 20.02033898305085, - "grad_norm": 1.4600805044174194, - "learning_rate": 6.193467336683417e-05, - "loss": 4.8914, - "step": 38389 - }, - { - "epoch": 20.020860495436768, - "grad_norm": 1.598531723022461, - "learning_rate": 6.193366834170855e-05, - "loss": 5.5073, - "step": 38390 - }, - { - "epoch": 20.021382007822687, - "grad_norm": 1.4647703170776367, - "learning_rate": 6.193266331658291e-05, - "loss": 5.0429, - "step": 38391 - }, - { - "epoch": 20.021903520208603, - "grad_norm": 1.545299768447876, - "learning_rate": 6.193165829145729e-05, - "loss": 5.4232, - "step": 38392 - }, - { - "epoch": 20.022425032594523, - "grad_norm": 1.6224243640899658, - "learning_rate": 6.193065326633167e-05, - "loss": 5.0732, - "step": 38393 - }, - { - "epoch": 20.022946544980442, - "grad_norm": 1.4504443407058716, - "learning_rate": 6.192964824120603e-05, - "loss": 5.6731, - "step": 38394 - }, - { - "epoch": 20.02346805736636, - "grad_norm": 1.5926960706710815, - "learning_rate": 6.192864321608041e-05, - "loss": 5.6407, - "step": 38395 - }, - { - "epoch": 20.02398956975228, - "grad_norm": 1.5381401777267456, - "learning_rate": 6.192763819095477e-05, - "loss": 5.4208, - "step": 38396 - }, - { - "epoch": 20.0245110821382, - "grad_norm": 1.6528329849243164, - "learning_rate": 6.192663316582915e-05, - "loss": 4.7361, - "step": 38397 - }, - { - "epoch": 20.02503259452412, - "grad_norm": 1.6026922464370728, - "learning_rate": 6.192562814070351e-05, - "loss": 5.5348, - "step": 38398 - }, - { - "epoch": 20.02555410691004, - "grad_norm": 1.551564335823059, - "learning_rate": 6.192462311557789e-05, - "loss": 5.4295, - "step": 38399 - }, - { - "epoch": 20.02607561929596, - "grad_norm": 1.5610331296920776, - "learning_rate": 6.192361809045226e-05, - "loss": 5.3704, - "step": 38400 - }, - { - "epoch": 20.02659713168188, - "grad_norm": 1.5625617504119873, - "learning_rate": 6.192261306532663e-05, - "loss": 5.058, - "step": 38401 - }, - { - "epoch": 20.027118644067798, - "grad_norm": 1.3963245153427124, - "learning_rate": 6.192160804020101e-05, - "loss": 5.7464, - "step": 38402 - }, - { - "epoch": 20.027640156453717, - "grad_norm": 1.5664513111114502, - "learning_rate": 6.192060301507539e-05, - "loss": 4.778, - "step": 38403 - }, - { - "epoch": 20.028161668839633, - "grad_norm": 1.4568419456481934, - "learning_rate": 6.191959798994975e-05, - "loss": 5.6187, - "step": 38404 - }, - { - "epoch": 20.028683181225553, - "grad_norm": 1.5702580213546753, - "learning_rate": 6.191859296482412e-05, - "loss": 5.4227, - "step": 38405 - }, - { - "epoch": 20.029204693611472, - "grad_norm": 1.5674772262573242, - "learning_rate": 6.19175879396985e-05, - "loss": 5.3551, - "step": 38406 - }, - { - "epoch": 20.02972620599739, - "grad_norm": 1.5585013628005981, - "learning_rate": 6.191658291457286e-05, - "loss": 5.1697, - "step": 38407 - }, - { - "epoch": 20.03024771838331, - "grad_norm": 1.642568826675415, - "learning_rate": 6.191557788944724e-05, - "loss": 5.1243, - "step": 38408 - }, - { - "epoch": 20.03076923076923, - "grad_norm": 1.5893622636795044, - "learning_rate": 6.19145728643216e-05, - "loss": 4.8495, - "step": 38409 - }, - { - "epoch": 20.03129074315515, - "grad_norm": 1.5313950777053833, - "learning_rate": 6.191356783919598e-05, - "loss": 5.0677, - "step": 38410 - }, - { - "epoch": 20.03181225554107, - "grad_norm": 1.4648582935333252, - "learning_rate": 6.191256281407035e-05, - "loss": 5.5975, - "step": 38411 - }, - { - "epoch": 20.03233376792699, - "grad_norm": 1.5536527633666992, - "learning_rate": 6.191155778894472e-05, - "loss": 5.0323, - "step": 38412 - }, - { - "epoch": 20.03285528031291, - "grad_norm": 1.557538390159607, - "learning_rate": 6.19105527638191e-05, - "loss": 5.0763, - "step": 38413 - }, - { - "epoch": 20.033376792698828, - "grad_norm": 1.4583901166915894, - "learning_rate": 6.190954773869348e-05, - "loss": 5.4506, - "step": 38414 - }, - { - "epoch": 20.033898305084747, - "grad_norm": 1.5870263576507568, - "learning_rate": 6.190854271356784e-05, - "loss": 5.0072, - "step": 38415 - }, - { - "epoch": 20.034419817470663, - "grad_norm": 1.5255483388900757, - "learning_rate": 6.190753768844222e-05, - "loss": 5.3761, - "step": 38416 - }, - { - "epoch": 20.034941329856583, - "grad_norm": 1.5742522478103638, - "learning_rate": 6.190653266331659e-05, - "loss": 5.2756, - "step": 38417 - }, - { - "epoch": 20.035462842242502, - "grad_norm": 1.5301873683929443, - "learning_rate": 6.190552763819096e-05, - "loss": 4.8402, - "step": 38418 - }, - { - "epoch": 20.03598435462842, - "grad_norm": 1.5005854368209839, - "learning_rate": 6.190452261306533e-05, - "loss": 5.2236, - "step": 38419 - }, - { - "epoch": 20.03650586701434, - "grad_norm": 1.4867439270019531, - "learning_rate": 6.190351758793969e-05, - "loss": 5.0934, - "step": 38420 - }, - { - "epoch": 20.03702737940026, - "grad_norm": 1.5350110530853271, - "learning_rate": 6.190251256281407e-05, - "loss": 5.5612, - "step": 38421 - }, - { - "epoch": 20.03754889178618, - "grad_norm": 1.4691708087921143, - "learning_rate": 6.190150753768845e-05, - "loss": 5.5288, - "step": 38422 - }, - { - "epoch": 20.0380704041721, - "grad_norm": 1.512378215789795, - "learning_rate": 6.190050251256282e-05, - "loss": 4.7789, - "step": 38423 - }, - { - "epoch": 20.03859191655802, - "grad_norm": 1.4904849529266357, - "learning_rate": 6.189949748743719e-05, - "loss": 5.4314, - "step": 38424 - }, - { - "epoch": 20.03911342894394, - "grad_norm": 1.4355167150497437, - "learning_rate": 6.189849246231157e-05, - "loss": 5.5798, - "step": 38425 - }, - { - "epoch": 20.039634941329858, - "grad_norm": 1.4825509786605835, - "learning_rate": 6.189748743718593e-05, - "loss": 5.4187, - "step": 38426 - }, - { - "epoch": 20.040156453715777, - "grad_norm": 1.5370471477508545, - "learning_rate": 6.189648241206031e-05, - "loss": 5.4004, - "step": 38427 - }, - { - "epoch": 20.040677966101693, - "grad_norm": 1.4789812564849854, - "learning_rate": 6.189547738693467e-05, - "loss": 5.6624, - "step": 38428 - }, - { - "epoch": 20.041199478487613, - "grad_norm": 1.4799489974975586, - "learning_rate": 6.189447236180905e-05, - "loss": 5.2217, - "step": 38429 - }, - { - "epoch": 20.041720990873532, - "grad_norm": 1.5122636556625366, - "learning_rate": 6.189346733668342e-05, - "loss": 4.8582, - "step": 38430 - }, - { - "epoch": 20.042242503259452, - "grad_norm": 1.543449878692627, - "learning_rate": 6.18924623115578e-05, - "loss": 5.2453, - "step": 38431 - }, - { - "epoch": 20.04276401564537, - "grad_norm": 1.5097383260726929, - "learning_rate": 6.189145728643216e-05, - "loss": 5.0603, - "step": 38432 - }, - { - "epoch": 20.04328552803129, - "grad_norm": 1.4585131406784058, - "learning_rate": 6.189045226130654e-05, - "loss": 5.3117, - "step": 38433 - }, - { - "epoch": 20.04380704041721, - "grad_norm": 1.5462977886199951, - "learning_rate": 6.188944723618091e-05, - "loss": 5.0589, - "step": 38434 - }, - { - "epoch": 20.04432855280313, - "grad_norm": 1.436532735824585, - "learning_rate": 6.188844221105528e-05, - "loss": 5.6479, - "step": 38435 - }, - { - "epoch": 20.04485006518905, - "grad_norm": 1.4964061975479126, - "learning_rate": 6.188743718592966e-05, - "loss": 5.0756, - "step": 38436 - }, - { - "epoch": 20.04537157757497, - "grad_norm": 1.4857820272445679, - "learning_rate": 6.188643216080402e-05, - "loss": 5.39, - "step": 38437 - }, - { - "epoch": 20.045893089960888, - "grad_norm": 1.4438871145248413, - "learning_rate": 6.18854271356784e-05, - "loss": 4.9049, - "step": 38438 - }, - { - "epoch": 20.046414602346807, - "grad_norm": 1.551041841506958, - "learning_rate": 6.188442211055276e-05, - "loss": 4.8662, - "step": 38439 - }, - { - "epoch": 20.046936114732723, - "grad_norm": 1.5907386541366577, - "learning_rate": 6.188341708542714e-05, - "loss": 4.9587, - "step": 38440 - }, - { - "epoch": 20.047457627118643, - "grad_norm": 1.4488600492477417, - "learning_rate": 6.18824120603015e-05, - "loss": 5.4533, - "step": 38441 - }, - { - "epoch": 20.047979139504562, - "grad_norm": 1.4760756492614746, - "learning_rate": 6.188140703517588e-05, - "loss": 5.5056, - "step": 38442 - }, - { - "epoch": 20.048500651890482, - "grad_norm": 1.4011493921279907, - "learning_rate": 6.188040201005026e-05, - "loss": 5.743, - "step": 38443 - }, - { - "epoch": 20.0490221642764, - "grad_norm": 1.5420926809310913, - "learning_rate": 6.187939698492464e-05, - "loss": 5.4779, - "step": 38444 - }, - { - "epoch": 20.04954367666232, - "grad_norm": 1.4932574033737183, - "learning_rate": 6.1878391959799e-05, - "loss": 5.3426, - "step": 38445 - }, - { - "epoch": 20.05006518904824, - "grad_norm": 1.466773509979248, - "learning_rate": 6.187738693467337e-05, - "loss": 5.6908, - "step": 38446 - }, - { - "epoch": 20.05058670143416, - "grad_norm": 1.4316977262496948, - "learning_rate": 6.187638190954774e-05, - "loss": 5.0673, - "step": 38447 - }, - { - "epoch": 20.05110821382008, - "grad_norm": 1.5069615840911865, - "learning_rate": 6.187537688442211e-05, - "loss": 5.3042, - "step": 38448 - }, - { - "epoch": 20.051629726206, - "grad_norm": 1.4266856908798218, - "learning_rate": 6.187437185929649e-05, - "loss": 5.8059, - "step": 38449 - }, - { - "epoch": 20.052151238591918, - "grad_norm": 1.508659839630127, - "learning_rate": 6.187336683417085e-05, - "loss": 5.6164, - "step": 38450 - }, - { - "epoch": 20.052672750977834, - "grad_norm": 1.5979679822921753, - "learning_rate": 6.187236180904523e-05, - "loss": 4.9606, - "step": 38451 - }, - { - "epoch": 20.053194263363753, - "grad_norm": 1.755431056022644, - "learning_rate": 6.187135678391959e-05, - "loss": 4.9024, - "step": 38452 - }, - { - "epoch": 20.053715775749673, - "grad_norm": 1.6115193367004395, - "learning_rate": 6.187035175879397e-05, - "loss": 4.673, - "step": 38453 - }, - { - "epoch": 20.054237288135592, - "grad_norm": 1.4572842121124268, - "learning_rate": 6.186934673366835e-05, - "loss": 5.317, - "step": 38454 - }, - { - "epoch": 20.054758800521512, - "grad_norm": 1.66477370262146, - "learning_rate": 6.186834170854273e-05, - "loss": 5.1272, - "step": 38455 - }, - { - "epoch": 20.05528031290743, - "grad_norm": 1.5466547012329102, - "learning_rate": 6.186733668341709e-05, - "loss": 4.9858, - "step": 38456 - }, - { - "epoch": 20.05580182529335, - "grad_norm": 1.4053394794464111, - "learning_rate": 6.186633165829147e-05, - "loss": 5.6473, - "step": 38457 - }, - { - "epoch": 20.05632333767927, - "grad_norm": 1.480117917060852, - "learning_rate": 6.186532663316583e-05, - "loss": 5.2639, - "step": 38458 - }, - { - "epoch": 20.05684485006519, - "grad_norm": 1.6367876529693604, - "learning_rate": 6.18643216080402e-05, - "loss": 4.9262, - "step": 38459 - }, - { - "epoch": 20.05736636245111, - "grad_norm": 1.5097612142562866, - "learning_rate": 6.186331658291457e-05, - "loss": 5.1067, - "step": 38460 - }, - { - "epoch": 20.05788787483703, - "grad_norm": 1.4982255697250366, - "learning_rate": 6.186231155778894e-05, - "loss": 5.7607, - "step": 38461 - }, - { - "epoch": 20.058409387222948, - "grad_norm": 1.635248064994812, - "learning_rate": 6.186130653266332e-05, - "loss": 5.104, - "step": 38462 - }, - { - "epoch": 20.058930899608864, - "grad_norm": 1.446808099746704, - "learning_rate": 6.18603015075377e-05, - "loss": 5.5374, - "step": 38463 - }, - { - "epoch": 20.059452411994783, - "grad_norm": 1.4944933652877808, - "learning_rate": 6.185929648241207e-05, - "loss": 5.4496, - "step": 38464 - }, - { - "epoch": 20.059973924380703, - "grad_norm": 1.5801339149475098, - "learning_rate": 6.185829145728644e-05, - "loss": 5.5124, - "step": 38465 - }, - { - "epoch": 20.060495436766622, - "grad_norm": 1.5195292234420776, - "learning_rate": 6.185728643216081e-05, - "loss": 5.101, - "step": 38466 - }, - { - "epoch": 20.061016949152542, - "grad_norm": 1.4888685941696167, - "learning_rate": 6.185628140703518e-05, - "loss": 5.0348, - "step": 38467 - }, - { - "epoch": 20.06153846153846, - "grad_norm": 1.5030338764190674, - "learning_rate": 6.185527638190956e-05, - "loss": 4.9932, - "step": 38468 - }, - { - "epoch": 20.06205997392438, - "grad_norm": 1.5674996376037598, - "learning_rate": 6.185427135678392e-05, - "loss": 5.2346, - "step": 38469 - }, - { - "epoch": 20.0625814863103, - "grad_norm": 1.5799942016601562, - "learning_rate": 6.18532663316583e-05, - "loss": 5.3575, - "step": 38470 - }, - { - "epoch": 20.06310299869622, - "grad_norm": 1.4671993255615234, - "learning_rate": 6.185226130653266e-05, - "loss": 5.436, - "step": 38471 - }, - { - "epoch": 20.06362451108214, - "grad_norm": 1.5939100980758667, - "learning_rate": 6.185125628140703e-05, - "loss": 5.2234, - "step": 38472 - }, - { - "epoch": 20.06414602346806, - "grad_norm": 1.534515380859375, - "learning_rate": 6.18502512562814e-05, - "loss": 4.6969, - "step": 38473 - }, - { - "epoch": 20.064667535853978, - "grad_norm": 1.5435783863067627, - "learning_rate": 6.184924623115578e-05, - "loss": 5.4023, - "step": 38474 - }, - { - "epoch": 20.065189048239894, - "grad_norm": 1.5855234861373901, - "learning_rate": 6.184824120603016e-05, - "loss": 4.8683, - "step": 38475 - }, - { - "epoch": 20.065710560625813, - "grad_norm": 1.4912241697311401, - "learning_rate": 6.184723618090452e-05, - "loss": 5.3877, - "step": 38476 - }, - { - "epoch": 20.066232073011733, - "grad_norm": 1.502462387084961, - "learning_rate": 6.18462311557789e-05, - "loss": 5.6019, - "step": 38477 - }, - { - "epoch": 20.066753585397652, - "grad_norm": 1.551421880722046, - "learning_rate": 6.184522613065327e-05, - "loss": 5.3343, - "step": 38478 - }, - { - "epoch": 20.067275097783572, - "grad_norm": 1.5208722352981567, - "learning_rate": 6.184422110552764e-05, - "loss": 5.0349, - "step": 38479 - }, - { - "epoch": 20.06779661016949, - "grad_norm": 1.4309135675430298, - "learning_rate": 6.184321608040201e-05, - "loss": 5.3525, - "step": 38480 - }, - { - "epoch": 20.06831812255541, - "grad_norm": 1.5060803890228271, - "learning_rate": 6.184221105527639e-05, - "loss": 5.2212, - "step": 38481 - }, - { - "epoch": 20.06883963494133, - "grad_norm": 1.456944465637207, - "learning_rate": 6.184120603015075e-05, - "loss": 5.3446, - "step": 38482 - }, - { - "epoch": 20.06936114732725, - "grad_norm": 1.5476816892623901, - "learning_rate": 6.184020100502513e-05, - "loss": 5.2039, - "step": 38483 - }, - { - "epoch": 20.06988265971317, - "grad_norm": 1.5427982807159424, - "learning_rate": 6.18391959798995e-05, - "loss": 4.9848, - "step": 38484 - }, - { - "epoch": 20.07040417209909, - "grad_norm": 1.5014585256576538, - "learning_rate": 6.183819095477387e-05, - "loss": 5.3976, - "step": 38485 - }, - { - "epoch": 20.070925684485008, - "grad_norm": 1.53799569606781, - "learning_rate": 6.183718592964825e-05, - "loss": 4.7688, - "step": 38486 - }, - { - "epoch": 20.071447196870924, - "grad_norm": 1.5701849460601807, - "learning_rate": 6.183618090452261e-05, - "loss": 4.6645, - "step": 38487 - }, - { - "epoch": 20.071968709256844, - "grad_norm": 1.5447826385498047, - "learning_rate": 6.183517587939699e-05, - "loss": 5.5599, - "step": 38488 - }, - { - "epoch": 20.072490221642763, - "grad_norm": 1.5328484773635864, - "learning_rate": 6.183417085427135e-05, - "loss": 5.6055, - "step": 38489 - }, - { - "epoch": 20.073011734028682, - "grad_norm": 1.597350001335144, - "learning_rate": 6.183316582914573e-05, - "loss": 5.2684, - "step": 38490 - }, - { - "epoch": 20.073533246414602, - "grad_norm": 1.5024501085281372, - "learning_rate": 6.18321608040201e-05, - "loss": 5.5539, - "step": 38491 - }, - { - "epoch": 20.07405475880052, - "grad_norm": 1.5314534902572632, - "learning_rate": 6.183115577889447e-05, - "loss": 5.2335, - "step": 38492 - }, - { - "epoch": 20.07457627118644, - "grad_norm": 1.6881284713745117, - "learning_rate": 6.183015075376884e-05, - "loss": 4.8615, - "step": 38493 - }, - { - "epoch": 20.07509778357236, - "grad_norm": 1.5235024690628052, - "learning_rate": 6.182914572864322e-05, - "loss": 5.0037, - "step": 38494 - }, - { - "epoch": 20.07561929595828, - "grad_norm": 1.5761784315109253, - "learning_rate": 6.18281407035176e-05, - "loss": 5.1075, - "step": 38495 - }, - { - "epoch": 20.0761408083442, - "grad_norm": 1.5596257448196411, - "learning_rate": 6.182713567839197e-05, - "loss": 4.8236, - "step": 38496 - }, - { - "epoch": 20.07666232073012, - "grad_norm": 1.5764896869659424, - "learning_rate": 6.182613065326634e-05, - "loss": 5.2057, - "step": 38497 - }, - { - "epoch": 20.077183833116038, - "grad_norm": 1.5630866289138794, - "learning_rate": 6.18251256281407e-05, - "loss": 5.222, - "step": 38498 - }, - { - "epoch": 20.077705345501954, - "grad_norm": 1.4481397867202759, - "learning_rate": 6.182412060301508e-05, - "loss": 5.4137, - "step": 38499 - }, - { - "epoch": 20.078226857887874, - "grad_norm": 1.4301581382751465, - "learning_rate": 6.182311557788944e-05, - "loss": 5.1653, - "step": 38500 - }, - { - "epoch": 20.078748370273793, - "grad_norm": 1.6191366910934448, - "learning_rate": 6.182211055276382e-05, - "loss": 4.8714, - "step": 38501 - }, - { - "epoch": 20.079269882659712, - "grad_norm": 1.448967695236206, - "learning_rate": 6.182110552763819e-05, - "loss": 5.3517, - "step": 38502 - }, - { - "epoch": 20.079791395045632, - "grad_norm": 1.455005407333374, - "learning_rate": 6.182010050251256e-05, - "loss": 4.832, - "step": 38503 - }, - { - "epoch": 20.08031290743155, - "grad_norm": 1.4309290647506714, - "learning_rate": 6.181909547738694e-05, - "loss": 5.4649, - "step": 38504 - }, - { - "epoch": 20.08083441981747, - "grad_norm": 1.4761850833892822, - "learning_rate": 6.181809045226132e-05, - "loss": 5.4937, - "step": 38505 - }, - { - "epoch": 20.08135593220339, - "grad_norm": 1.533424973487854, - "learning_rate": 6.181708542713568e-05, - "loss": 5.5969, - "step": 38506 - }, - { - "epoch": 20.08187744458931, - "grad_norm": 1.4834061861038208, - "learning_rate": 6.181608040201006e-05, - "loss": 4.6386, - "step": 38507 - }, - { - "epoch": 20.08239895697523, - "grad_norm": 1.5381070375442505, - "learning_rate": 6.181507537688443e-05, - "loss": 5.0967, - "step": 38508 - }, - { - "epoch": 20.08292046936115, - "grad_norm": 1.4019349813461304, - "learning_rate": 6.18140703517588e-05, - "loss": 4.6498, - "step": 38509 - }, - { - "epoch": 20.083441981747068, - "grad_norm": 1.573056697845459, - "learning_rate": 6.181306532663317e-05, - "loss": 5.3443, - "step": 38510 - }, - { - "epoch": 20.083963494132984, - "grad_norm": 1.5414665937423706, - "learning_rate": 6.181206030150755e-05, - "loss": 5.4254, - "step": 38511 - }, - { - "epoch": 20.084485006518904, - "grad_norm": 1.452172875404358, - "learning_rate": 6.181105527638191e-05, - "loss": 5.659, - "step": 38512 - }, - { - "epoch": 20.085006518904823, - "grad_norm": 1.4776780605316162, - "learning_rate": 6.181005025125627e-05, - "loss": 5.3069, - "step": 38513 - }, - { - "epoch": 20.085528031290742, - "grad_norm": 1.4714559316635132, - "learning_rate": 6.180904522613065e-05, - "loss": 5.4353, - "step": 38514 - }, - { - "epoch": 20.086049543676662, - "grad_norm": 1.5292280912399292, - "learning_rate": 6.180804020100503e-05, - "loss": 5.4732, - "step": 38515 - }, - { - "epoch": 20.08657105606258, - "grad_norm": 1.5390563011169434, - "learning_rate": 6.180703517587941e-05, - "loss": 5.1489, - "step": 38516 - }, - { - "epoch": 20.0870925684485, - "grad_norm": 1.5135494470596313, - "learning_rate": 6.180603015075377e-05, - "loss": 5.3783, - "step": 38517 - }, - { - "epoch": 20.08761408083442, - "grad_norm": 1.503393530845642, - "learning_rate": 6.180502512562815e-05, - "loss": 5.5059, - "step": 38518 - }, - { - "epoch": 20.08813559322034, - "grad_norm": 1.566335678100586, - "learning_rate": 6.180402010050251e-05, - "loss": 5.031, - "step": 38519 - }, - { - "epoch": 20.08865710560626, - "grad_norm": 1.5201599597930908, - "learning_rate": 6.180301507537689e-05, - "loss": 4.9813, - "step": 38520 - }, - { - "epoch": 20.08917861799218, - "grad_norm": 1.494736671447754, - "learning_rate": 6.180201005025126e-05, - "loss": 5.3736, - "step": 38521 - }, - { - "epoch": 20.089700130378098, - "grad_norm": 1.4916061162948608, - "learning_rate": 6.180100502512563e-05, - "loss": 5.2267, - "step": 38522 - }, - { - "epoch": 20.090221642764014, - "grad_norm": 1.4387646913528442, - "learning_rate": 6.18e-05, - "loss": 5.5116, - "step": 38523 - }, - { - "epoch": 20.090743155149934, - "grad_norm": 1.4683613777160645, - "learning_rate": 6.179899497487438e-05, - "loss": 5.0749, - "step": 38524 - }, - { - "epoch": 20.091264667535853, - "grad_norm": 1.5496107339859009, - "learning_rate": 6.179798994974875e-05, - "loss": 4.8635, - "step": 38525 - }, - { - "epoch": 20.091786179921773, - "grad_norm": 1.5670212507247925, - "learning_rate": 6.179698492462312e-05, - "loss": 4.7193, - "step": 38526 - }, - { - "epoch": 20.092307692307692, - "grad_norm": 1.6760023832321167, - "learning_rate": 6.17959798994975e-05, - "loss": 4.7146, - "step": 38527 - }, - { - "epoch": 20.09282920469361, - "grad_norm": 1.6453315019607544, - "learning_rate": 6.179497487437186e-05, - "loss": 5.575, - "step": 38528 - }, - { - "epoch": 20.09335071707953, - "grad_norm": 1.6068875789642334, - "learning_rate": 6.179396984924624e-05, - "loss": 5.6045, - "step": 38529 - }, - { - "epoch": 20.09387222946545, - "grad_norm": 1.4400311708450317, - "learning_rate": 6.17929648241206e-05, - "loss": 5.3338, - "step": 38530 - }, - { - "epoch": 20.09439374185137, - "grad_norm": 1.463665246963501, - "learning_rate": 6.179195979899498e-05, - "loss": 5.4333, - "step": 38531 - }, - { - "epoch": 20.09491525423729, - "grad_norm": 1.4831563234329224, - "learning_rate": 6.179095477386934e-05, - "loss": 5.5331, - "step": 38532 - }, - { - "epoch": 20.09543676662321, - "grad_norm": 1.5855357646942139, - "learning_rate": 6.178994974874372e-05, - "loss": 5.0125, - "step": 38533 - }, - { - "epoch": 20.09595827900913, - "grad_norm": 1.495572805404663, - "learning_rate": 6.178894472361809e-05, - "loss": 5.7021, - "step": 38534 - }, - { - "epoch": 20.096479791395044, - "grad_norm": 1.4683012962341309, - "learning_rate": 6.178793969849246e-05, - "loss": 5.3324, - "step": 38535 - }, - { - "epoch": 20.097001303780964, - "grad_norm": 1.7425317764282227, - "learning_rate": 6.178693467336684e-05, - "loss": 5.3896, - "step": 38536 - }, - { - "epoch": 20.097522816166883, - "grad_norm": 1.4906350374221802, - "learning_rate": 6.178592964824122e-05, - "loss": 5.5473, - "step": 38537 - }, - { - "epoch": 20.098044328552803, - "grad_norm": 1.5587809085845947, - "learning_rate": 6.178492462311558e-05, - "loss": 5.0221, - "step": 38538 - }, - { - "epoch": 20.098565840938722, - "grad_norm": 1.655808925628662, - "learning_rate": 6.178391959798995e-05, - "loss": 5.0678, - "step": 38539 - }, - { - "epoch": 20.09908735332464, - "grad_norm": 1.4382820129394531, - "learning_rate": 6.178291457286433e-05, - "loss": 5.5419, - "step": 38540 - }, - { - "epoch": 20.09960886571056, - "grad_norm": 1.5981334447860718, - "learning_rate": 6.178190954773869e-05, - "loss": 5.009, - "step": 38541 - }, - { - "epoch": 20.10013037809648, - "grad_norm": 1.475565791130066, - "learning_rate": 6.178090452261307e-05, - "loss": 4.9232, - "step": 38542 - }, - { - "epoch": 20.1006518904824, - "grad_norm": 1.655060052871704, - "learning_rate": 6.177989949748743e-05, - "loss": 5.2709, - "step": 38543 - }, - { - "epoch": 20.10117340286832, - "grad_norm": 1.405463457107544, - "learning_rate": 6.177889447236181e-05, - "loss": 5.574, - "step": 38544 - }, - { - "epoch": 20.10169491525424, - "grad_norm": 1.4235963821411133, - "learning_rate": 6.177788944723619e-05, - "loss": 5.5684, - "step": 38545 - }, - { - "epoch": 20.102216427640155, - "grad_norm": 1.578945279121399, - "learning_rate": 6.177688442211057e-05, - "loss": 5.2131, - "step": 38546 - }, - { - "epoch": 20.102737940026074, - "grad_norm": 1.4678783416748047, - "learning_rate": 6.177587939698493e-05, - "loss": 5.6409, - "step": 38547 - }, - { - "epoch": 20.103259452411994, - "grad_norm": 1.5249897241592407, - "learning_rate": 6.177487437185931e-05, - "loss": 5.2485, - "step": 38548 - }, - { - "epoch": 20.103780964797913, - "grad_norm": 1.3929344415664673, - "learning_rate": 6.177386934673367e-05, - "loss": 5.7874, - "step": 38549 - }, - { - "epoch": 20.104302477183833, - "grad_norm": 1.5698636770248413, - "learning_rate": 6.177286432160805e-05, - "loss": 5.1236, - "step": 38550 - }, - { - "epoch": 20.104823989569752, - "grad_norm": 1.5356385707855225, - "learning_rate": 6.177185929648241e-05, - "loss": 5.1866, - "step": 38551 - }, - { - "epoch": 20.10534550195567, - "grad_norm": 1.5756529569625854, - "learning_rate": 6.177085427135678e-05, - "loss": 5.0823, - "step": 38552 - }, - { - "epoch": 20.10586701434159, - "grad_norm": 1.631987452507019, - "learning_rate": 6.176984924623116e-05, - "loss": 5.015, - "step": 38553 - }, - { - "epoch": 20.10638852672751, - "grad_norm": 1.4420053958892822, - "learning_rate": 6.176884422110552e-05, - "loss": 5.1761, - "step": 38554 - }, - { - "epoch": 20.10691003911343, - "grad_norm": 1.5529810190200806, - "learning_rate": 6.17678391959799e-05, - "loss": 5.1748, - "step": 38555 - }, - { - "epoch": 20.10743155149935, - "grad_norm": 1.5226433277130127, - "learning_rate": 6.176683417085428e-05, - "loss": 5.5658, - "step": 38556 - }, - { - "epoch": 20.10795306388527, - "grad_norm": 1.5063096284866333, - "learning_rate": 6.176582914572865e-05, - "loss": 5.3938, - "step": 38557 - }, - { - "epoch": 20.108474576271185, - "grad_norm": 1.465323805809021, - "learning_rate": 6.176482412060302e-05, - "loss": 5.4899, - "step": 38558 - }, - { - "epoch": 20.108996088657104, - "grad_norm": 1.571457862854004, - "learning_rate": 6.17638190954774e-05, - "loss": 5.0036, - "step": 38559 - }, - { - "epoch": 20.109517601043024, - "grad_norm": 1.561355710029602, - "learning_rate": 6.176281407035176e-05, - "loss": 5.291, - "step": 38560 - }, - { - "epoch": 20.110039113428943, - "grad_norm": 1.5571049451828003, - "learning_rate": 6.176180904522614e-05, - "loss": 5.8926, - "step": 38561 - }, - { - "epoch": 20.110560625814863, - "grad_norm": 1.5960458517074585, - "learning_rate": 6.17608040201005e-05, - "loss": 5.2228, - "step": 38562 - }, - { - "epoch": 20.111082138200782, - "grad_norm": 1.4739036560058594, - "learning_rate": 6.175979899497488e-05, - "loss": 5.2315, - "step": 38563 - }, - { - "epoch": 20.1116036505867, - "grad_norm": 1.560827612876892, - "learning_rate": 6.175879396984924e-05, - "loss": 4.9656, - "step": 38564 - }, - { - "epoch": 20.11212516297262, - "grad_norm": 1.4980367422103882, - "learning_rate": 6.175778894472361e-05, - "loss": 5.0656, - "step": 38565 - }, - { - "epoch": 20.11264667535854, - "grad_norm": 1.438679575920105, - "learning_rate": 6.175678391959799e-05, - "loss": 4.7451, - "step": 38566 - }, - { - "epoch": 20.11316818774446, - "grad_norm": 1.404511570930481, - "learning_rate": 6.175577889447236e-05, - "loss": 5.7304, - "step": 38567 - }, - { - "epoch": 20.11368970013038, - "grad_norm": 1.4491422176361084, - "learning_rate": 6.175477386934674e-05, - "loss": 5.3432, - "step": 38568 - }, - { - "epoch": 20.1142112125163, - "grad_norm": 1.558864951133728, - "learning_rate": 6.175376884422111e-05, - "loss": 5.3514, - "step": 38569 - }, - { - "epoch": 20.114732724902215, - "grad_norm": 1.4954278469085693, - "learning_rate": 6.175276381909548e-05, - "loss": 5.2829, - "step": 38570 - }, - { - "epoch": 20.115254237288134, - "grad_norm": 1.5221354961395264, - "learning_rate": 6.175175879396985e-05, - "loss": 5.2184, - "step": 38571 - }, - { - "epoch": 20.115775749674054, - "grad_norm": 1.4322227239608765, - "learning_rate": 6.175075376884423e-05, - "loss": 5.1887, - "step": 38572 - }, - { - "epoch": 20.116297262059973, - "grad_norm": 1.552506923675537, - "learning_rate": 6.174974874371859e-05, - "loss": 5.3747, - "step": 38573 - }, - { - "epoch": 20.116818774445893, - "grad_norm": 1.6186401844024658, - "learning_rate": 6.174874371859297e-05, - "loss": 4.3894, - "step": 38574 - }, - { - "epoch": 20.117340286831812, - "grad_norm": 1.479161024093628, - "learning_rate": 6.174773869346733e-05, - "loss": 5.4412, - "step": 38575 - }, - { - "epoch": 20.11786179921773, - "grad_norm": 1.5059820413589478, - "learning_rate": 6.174673366834171e-05, - "loss": 5.3221, - "step": 38576 - }, - { - "epoch": 20.11838331160365, - "grad_norm": 1.537973165512085, - "learning_rate": 6.174572864321609e-05, - "loss": 5.3658, - "step": 38577 - }, - { - "epoch": 20.11890482398957, - "grad_norm": 1.4992201328277588, - "learning_rate": 6.174472361809045e-05, - "loss": 5.2155, - "step": 38578 - }, - { - "epoch": 20.11942633637549, - "grad_norm": 1.398603916168213, - "learning_rate": 6.174371859296483e-05, - "loss": 5.5064, - "step": 38579 - }, - { - "epoch": 20.11994784876141, - "grad_norm": 1.5721955299377441, - "learning_rate": 6.17427135678392e-05, - "loss": 4.9478, - "step": 38580 - }, - { - "epoch": 20.12046936114733, - "grad_norm": 1.50698983669281, - "learning_rate": 6.174170854271357e-05, - "loss": 4.8125, - "step": 38581 - }, - { - "epoch": 20.120990873533245, - "grad_norm": 1.4552392959594727, - "learning_rate": 6.174070351758794e-05, - "loss": 5.0577, - "step": 38582 - }, - { - "epoch": 20.121512385919164, - "grad_norm": 1.3634613752365112, - "learning_rate": 6.173969849246232e-05, - "loss": 5.8709, - "step": 38583 - }, - { - "epoch": 20.122033898305084, - "grad_norm": 1.615193247795105, - "learning_rate": 6.173869346733668e-05, - "loss": 5.0255, - "step": 38584 - }, - { - "epoch": 20.122555410691003, - "grad_norm": 1.461206316947937, - "learning_rate": 6.173768844221106e-05, - "loss": 5.0813, - "step": 38585 - }, - { - "epoch": 20.123076923076923, - "grad_norm": 1.9166373014450073, - "learning_rate": 6.173668341708542e-05, - "loss": 4.8397, - "step": 38586 - }, - { - "epoch": 20.123598435462842, - "grad_norm": 1.6170282363891602, - "learning_rate": 6.17356783919598e-05, - "loss": 5.0112, - "step": 38587 - }, - { - "epoch": 20.12411994784876, - "grad_norm": 1.5713094472885132, - "learning_rate": 6.173467336683418e-05, - "loss": 4.8932, - "step": 38588 - }, - { - "epoch": 20.12464146023468, - "grad_norm": 1.4417084455490112, - "learning_rate": 6.173366834170856e-05, - "loss": 5.453, - "step": 38589 - }, - { - "epoch": 20.1251629726206, - "grad_norm": 1.4695556163787842, - "learning_rate": 6.173266331658292e-05, - "loss": 5.3211, - "step": 38590 - }, - { - "epoch": 20.12568448500652, - "grad_norm": 1.5307310819625854, - "learning_rate": 6.173165829145728e-05, - "loss": 5.5081, - "step": 38591 - }, - { - "epoch": 20.12620599739244, - "grad_norm": 1.5614421367645264, - "learning_rate": 6.173065326633166e-05, - "loss": 4.6494, - "step": 38592 - }, - { - "epoch": 20.12672750977836, - "grad_norm": 1.4611682891845703, - "learning_rate": 6.172964824120603e-05, - "loss": 5.3175, - "step": 38593 - }, - { - "epoch": 20.127249022164275, - "grad_norm": 1.6017662286758423, - "learning_rate": 6.17286432160804e-05, - "loss": 4.9029, - "step": 38594 - }, - { - "epoch": 20.127770534550194, - "grad_norm": 1.4874608516693115, - "learning_rate": 6.172763819095477e-05, - "loss": 5.4427, - "step": 38595 - }, - { - "epoch": 20.128292046936114, - "grad_norm": 1.5627154111862183, - "learning_rate": 6.172663316582915e-05, - "loss": 5.4239, - "step": 38596 - }, - { - "epoch": 20.128813559322033, - "grad_norm": 1.5337722301483154, - "learning_rate": 6.172562814070352e-05, - "loss": 5.0623, - "step": 38597 - }, - { - "epoch": 20.129335071707953, - "grad_norm": 1.557196855545044, - "learning_rate": 6.17246231155779e-05, - "loss": 5.3106, - "step": 38598 - }, - { - "epoch": 20.129856584093872, - "grad_norm": 1.57356858253479, - "learning_rate": 6.172361809045227e-05, - "loss": 5.0455, - "step": 38599 - }, - { - "epoch": 20.13037809647979, - "grad_norm": 1.4545104503631592, - "learning_rate": 6.172261306532664e-05, - "loss": 5.4049, - "step": 38600 - }, - { - "epoch": 20.13089960886571, - "grad_norm": 1.4383140802383423, - "learning_rate": 6.172160804020101e-05, - "loss": 5.6317, - "step": 38601 - }, - { - "epoch": 20.13142112125163, - "grad_norm": 1.4411762952804565, - "learning_rate": 6.172060301507539e-05, - "loss": 5.6866, - "step": 38602 - }, - { - "epoch": 20.13194263363755, - "grad_norm": 1.4621316194534302, - "learning_rate": 6.171959798994975e-05, - "loss": 5.7408, - "step": 38603 - }, - { - "epoch": 20.13246414602347, - "grad_norm": 1.5645672082901, - "learning_rate": 6.171859296482413e-05, - "loss": 5.1359, - "step": 38604 - }, - { - "epoch": 20.13298565840939, - "grad_norm": 1.5689210891723633, - "learning_rate": 6.171758793969849e-05, - "loss": 5.2809, - "step": 38605 - }, - { - "epoch": 20.133507170795305, - "grad_norm": 1.6742188930511475, - "learning_rate": 6.171658291457286e-05, - "loss": 5.2945, - "step": 38606 - }, - { - "epoch": 20.134028683181224, - "grad_norm": 1.4837597608566284, - "learning_rate": 6.171557788944723e-05, - "loss": 5.6012, - "step": 38607 - }, - { - "epoch": 20.134550195567144, - "grad_norm": 1.5046340227127075, - "learning_rate": 6.171457286432161e-05, - "loss": 5.6144, - "step": 38608 - }, - { - "epoch": 20.135071707953063, - "grad_norm": 1.644165277481079, - "learning_rate": 6.171356783919599e-05, - "loss": 5.2163, - "step": 38609 - }, - { - "epoch": 20.135593220338983, - "grad_norm": 1.4691990613937378, - "learning_rate": 6.171256281407035e-05, - "loss": 5.1466, - "step": 38610 - }, - { - "epoch": 20.136114732724902, - "grad_norm": 1.4932986497879028, - "learning_rate": 6.171155778894473e-05, - "loss": 5.7095, - "step": 38611 - }, - { - "epoch": 20.13663624511082, - "grad_norm": 1.5249931812286377, - "learning_rate": 6.17105527638191e-05, - "loss": 5.201, - "step": 38612 - }, - { - "epoch": 20.13715775749674, - "grad_norm": 1.4290790557861328, - "learning_rate": 6.170954773869347e-05, - "loss": 5.3403, - "step": 38613 - }, - { - "epoch": 20.13767926988266, - "grad_norm": 1.5265834331512451, - "learning_rate": 6.170854271356784e-05, - "loss": 5.1564, - "step": 38614 - }, - { - "epoch": 20.13820078226858, - "grad_norm": 1.5553442239761353, - "learning_rate": 6.170753768844222e-05, - "loss": 5.1308, - "step": 38615 - }, - { - "epoch": 20.1387222946545, - "grad_norm": 1.556208848953247, - "learning_rate": 6.170653266331658e-05, - "loss": 5.2464, - "step": 38616 - }, - { - "epoch": 20.13924380704042, - "grad_norm": 1.4675995111465454, - "learning_rate": 6.170552763819096e-05, - "loss": 5.3501, - "step": 38617 - }, - { - "epoch": 20.139765319426335, - "grad_norm": 1.556364893913269, - "learning_rate": 6.170452261306534e-05, - "loss": 5.6397, - "step": 38618 - }, - { - "epoch": 20.140286831812254, - "grad_norm": 1.7445341348648071, - "learning_rate": 6.17035175879397e-05, - "loss": 4.4371, - "step": 38619 - }, - { - "epoch": 20.140808344198174, - "grad_norm": 1.6007767915725708, - "learning_rate": 6.170251256281408e-05, - "loss": 5.3573, - "step": 38620 - }, - { - "epoch": 20.141329856584093, - "grad_norm": 1.6099179983139038, - "learning_rate": 6.170150753768844e-05, - "loss": 5.2296, - "step": 38621 - }, - { - "epoch": 20.141851368970013, - "grad_norm": 1.5737632513046265, - "learning_rate": 6.170050251256282e-05, - "loss": 5.4101, - "step": 38622 - }, - { - "epoch": 20.142372881355932, - "grad_norm": 1.5527278184890747, - "learning_rate": 6.169949748743718e-05, - "loss": 5.1139, - "step": 38623 - }, - { - "epoch": 20.14289439374185, - "grad_norm": 1.5857445001602173, - "learning_rate": 6.169849246231156e-05, - "loss": 5.3968, - "step": 38624 - }, - { - "epoch": 20.14341590612777, - "grad_norm": 1.484298825263977, - "learning_rate": 6.169748743718593e-05, - "loss": 5.2113, - "step": 38625 - }, - { - "epoch": 20.14393741851369, - "grad_norm": 1.5412592887878418, - "learning_rate": 6.16964824120603e-05, - "loss": 5.2804, - "step": 38626 - }, - { - "epoch": 20.14445893089961, - "grad_norm": 1.4974403381347656, - "learning_rate": 6.169547738693467e-05, - "loss": 5.375, - "step": 38627 - }, - { - "epoch": 20.14498044328553, - "grad_norm": 1.6157993078231812, - "learning_rate": 6.169447236180905e-05, - "loss": 5.2016, - "step": 38628 - }, - { - "epoch": 20.14550195567145, - "grad_norm": 1.571169137954712, - "learning_rate": 6.169346733668342e-05, - "loss": 4.4208, - "step": 38629 - }, - { - "epoch": 20.146023468057365, - "grad_norm": 1.428978443145752, - "learning_rate": 6.16924623115578e-05, - "loss": 5.568, - "step": 38630 - }, - { - "epoch": 20.146544980443284, - "grad_norm": 1.478332281112671, - "learning_rate": 6.169145728643217e-05, - "loss": 5.4636, - "step": 38631 - }, - { - "epoch": 20.147066492829204, - "grad_norm": 1.4465315341949463, - "learning_rate": 6.169045226130653e-05, - "loss": 5.2345, - "step": 38632 - }, - { - "epoch": 20.147588005215123, - "grad_norm": 1.4252408742904663, - "learning_rate": 6.168944723618091e-05, - "loss": 5.5172, - "step": 38633 - }, - { - "epoch": 20.148109517601043, - "grad_norm": 1.4164742231369019, - "learning_rate": 6.168844221105527e-05, - "loss": 5.2634, - "step": 38634 - }, - { - "epoch": 20.148631029986962, - "grad_norm": 1.615415096282959, - "learning_rate": 6.168743718592965e-05, - "loss": 5.5901, - "step": 38635 - }, - { - "epoch": 20.14915254237288, - "grad_norm": 1.532148003578186, - "learning_rate": 6.168643216080401e-05, - "loss": 5.1214, - "step": 38636 - }, - { - "epoch": 20.1496740547588, - "grad_norm": 1.5574300289154053, - "learning_rate": 6.168542713567839e-05, - "loss": 5.5176, - "step": 38637 - }, - { - "epoch": 20.15019556714472, - "grad_norm": 1.6399857997894287, - "learning_rate": 6.168442211055277e-05, - "loss": 5.4529, - "step": 38638 - }, - { - "epoch": 20.15071707953064, - "grad_norm": 1.5439329147338867, - "learning_rate": 6.168341708542715e-05, - "loss": 5.3374, - "step": 38639 - }, - { - "epoch": 20.15123859191656, - "grad_norm": 1.5264943838119507, - "learning_rate": 6.168241206030151e-05, - "loss": 5.5258, - "step": 38640 - }, - { - "epoch": 20.151760104302475, - "grad_norm": 1.5109431743621826, - "learning_rate": 6.168140703517589e-05, - "loss": 5.3282, - "step": 38641 - }, - { - "epoch": 20.152281616688395, - "grad_norm": 1.5024092197418213, - "learning_rate": 6.168040201005025e-05, - "loss": 5.4645, - "step": 38642 - }, - { - "epoch": 20.152803129074314, - "grad_norm": 1.5650906562805176, - "learning_rate": 6.167939698492463e-05, - "loss": 5.3073, - "step": 38643 - }, - { - "epoch": 20.153324641460234, - "grad_norm": 1.5399954319000244, - "learning_rate": 6.1678391959799e-05, - "loss": 4.9895, - "step": 38644 - }, - { - "epoch": 20.153846153846153, - "grad_norm": 1.5510436296463013, - "learning_rate": 6.167738693467336e-05, - "loss": 5.3852, - "step": 38645 - }, - { - "epoch": 20.154367666232073, - "grad_norm": 1.566744327545166, - "learning_rate": 6.167638190954774e-05, - "loss": 5.175, - "step": 38646 - }, - { - "epoch": 20.154889178617992, - "grad_norm": 1.5004661083221436, - "learning_rate": 6.16753768844221e-05, - "loss": 5.266, - "step": 38647 - }, - { - "epoch": 20.15541069100391, - "grad_norm": 1.5350818634033203, - "learning_rate": 6.167437185929648e-05, - "loss": 5.1929, - "step": 38648 - }, - { - "epoch": 20.15593220338983, - "grad_norm": 1.539420247077942, - "learning_rate": 6.167336683417086e-05, - "loss": 5.2232, - "step": 38649 - }, - { - "epoch": 20.15645371577575, - "grad_norm": 1.6019084453582764, - "learning_rate": 6.167236180904524e-05, - "loss": 5.2918, - "step": 38650 - }, - { - "epoch": 20.15697522816167, - "grad_norm": 1.476466417312622, - "learning_rate": 6.16713567839196e-05, - "loss": 5.2299, - "step": 38651 - }, - { - "epoch": 20.15749674054759, - "grad_norm": 1.5431426763534546, - "learning_rate": 6.167035175879398e-05, - "loss": 5.1314, - "step": 38652 - }, - { - "epoch": 20.158018252933505, - "grad_norm": 1.5486465692520142, - "learning_rate": 6.166934673366834e-05, - "loss": 4.7199, - "step": 38653 - }, - { - "epoch": 20.158539765319425, - "grad_norm": 1.5984957218170166, - "learning_rate": 6.166834170854272e-05, - "loss": 5.3981, - "step": 38654 - }, - { - "epoch": 20.159061277705344, - "grad_norm": 1.5200992822647095, - "learning_rate": 6.166733668341709e-05, - "loss": 5.3666, - "step": 38655 - }, - { - "epoch": 20.159582790091264, - "grad_norm": 1.6320445537567139, - "learning_rate": 6.166633165829146e-05, - "loss": 5.0336, - "step": 38656 - }, - { - "epoch": 20.160104302477183, - "grad_norm": 1.6178951263427734, - "learning_rate": 6.166532663316583e-05, - "loss": 5.2347, - "step": 38657 - }, - { - "epoch": 20.160625814863103, - "grad_norm": 1.6060850620269775, - "learning_rate": 6.16643216080402e-05, - "loss": 5.4943, - "step": 38658 - }, - { - "epoch": 20.161147327249022, - "grad_norm": 1.4348559379577637, - "learning_rate": 6.166331658291458e-05, - "loss": 5.0866, - "step": 38659 - }, - { - "epoch": 20.16166883963494, - "grad_norm": 1.5869314670562744, - "learning_rate": 6.166231155778895e-05, - "loss": 5.4427, - "step": 38660 - }, - { - "epoch": 20.16219035202086, - "grad_norm": 1.5707459449768066, - "learning_rate": 6.166130653266332e-05, - "loss": 4.9008, - "step": 38661 - }, - { - "epoch": 20.16271186440678, - "grad_norm": 1.59604811668396, - "learning_rate": 6.166030150753769e-05, - "loss": 5.0528, - "step": 38662 - }, - { - "epoch": 20.1632333767927, - "grad_norm": 1.3991843461990356, - "learning_rate": 6.165929648241207e-05, - "loss": 5.2545, - "step": 38663 - }, - { - "epoch": 20.16375488917862, - "grad_norm": 1.445783019065857, - "learning_rate": 6.165829145728643e-05, - "loss": 5.7448, - "step": 38664 - }, - { - "epoch": 20.164276401564535, - "grad_norm": 1.605556607246399, - "learning_rate": 6.165728643216081e-05, - "loss": 4.818, - "step": 38665 - }, - { - "epoch": 20.164797913950455, - "grad_norm": 1.5854179859161377, - "learning_rate": 6.165628140703517e-05, - "loss": 5.0649, - "step": 38666 - }, - { - "epoch": 20.165319426336374, - "grad_norm": 1.4289045333862305, - "learning_rate": 6.165527638190955e-05, - "loss": 5.0823, - "step": 38667 - }, - { - "epoch": 20.165840938722294, - "grad_norm": 1.534348726272583, - "learning_rate": 6.165427135678392e-05, - "loss": 5.5518, - "step": 38668 - }, - { - "epoch": 20.166362451108213, - "grad_norm": 1.5558463335037231, - "learning_rate": 6.16532663316583e-05, - "loss": 5.6122, - "step": 38669 - }, - { - "epoch": 20.166883963494133, - "grad_norm": 1.5152127742767334, - "learning_rate": 6.165226130653267e-05, - "loss": 5.6267, - "step": 38670 - }, - { - "epoch": 20.167405475880052, - "grad_norm": 1.5328717231750488, - "learning_rate": 6.165125628140704e-05, - "loss": 5.1223, - "step": 38671 - }, - { - "epoch": 20.16792698826597, - "grad_norm": 1.5127960443496704, - "learning_rate": 6.165025125628141e-05, - "loss": 5.0523, - "step": 38672 - }, - { - "epoch": 20.16844850065189, - "grad_norm": 1.5602655410766602, - "learning_rate": 6.164924623115578e-05, - "loss": 5.4612, - "step": 38673 - }, - { - "epoch": 20.16897001303781, - "grad_norm": 1.5324584245681763, - "learning_rate": 6.164824120603016e-05, - "loss": 5.0763, - "step": 38674 - }, - { - "epoch": 20.16949152542373, - "grad_norm": 1.4503953456878662, - "learning_rate": 6.164723618090452e-05, - "loss": 5.7364, - "step": 38675 - }, - { - "epoch": 20.17001303780965, - "grad_norm": 1.5634844303131104, - "learning_rate": 6.16462311557789e-05, - "loss": 5.068, - "step": 38676 - }, - { - "epoch": 20.170534550195566, - "grad_norm": 1.5550934076309204, - "learning_rate": 6.164522613065326e-05, - "loss": 5.1348, - "step": 38677 - }, - { - "epoch": 20.171056062581485, - "grad_norm": 1.5124202966690063, - "learning_rate": 6.164422110552764e-05, - "loss": 5.2685, - "step": 38678 - }, - { - "epoch": 20.171577574967404, - "grad_norm": 1.5481520891189575, - "learning_rate": 6.164321608040202e-05, - "loss": 5.3227, - "step": 38679 - }, - { - "epoch": 20.172099087353324, - "grad_norm": 1.480562686920166, - "learning_rate": 6.16422110552764e-05, - "loss": 5.244, - "step": 38680 - }, - { - "epoch": 20.172620599739243, - "grad_norm": 1.5333843231201172, - "learning_rate": 6.164120603015076e-05, - "loss": 4.7925, - "step": 38681 - }, - { - "epoch": 20.173142112125163, - "grad_norm": 1.5219974517822266, - "learning_rate": 6.164020100502514e-05, - "loss": 5.5282, - "step": 38682 - }, - { - "epoch": 20.173663624511082, - "grad_norm": 1.5302159786224365, - "learning_rate": 6.16391959798995e-05, - "loss": 5.599, - "step": 38683 - }, - { - "epoch": 20.174185136897, - "grad_norm": 1.4423072338104248, - "learning_rate": 6.163819095477388e-05, - "loss": 5.5866, - "step": 38684 - }, - { - "epoch": 20.17470664928292, - "grad_norm": 1.5089632272720337, - "learning_rate": 6.163718592964824e-05, - "loss": 5.5561, - "step": 38685 - }, - { - "epoch": 20.17522816166884, - "grad_norm": 1.494275450706482, - "learning_rate": 6.163618090452261e-05, - "loss": 5.4933, - "step": 38686 - }, - { - "epoch": 20.17574967405476, - "grad_norm": 1.5518304109573364, - "learning_rate": 6.163517587939699e-05, - "loss": 5.1324, - "step": 38687 - }, - { - "epoch": 20.17627118644068, - "grad_norm": 1.6110292673110962, - "learning_rate": 6.163417085427135e-05, - "loss": 5.096, - "step": 38688 - }, - { - "epoch": 20.176792698826596, - "grad_norm": 1.4092140197753906, - "learning_rate": 6.163316582914573e-05, - "loss": 4.3003, - "step": 38689 - }, - { - "epoch": 20.177314211212515, - "grad_norm": 1.4345428943634033, - "learning_rate": 6.16321608040201e-05, - "loss": 5.3422, - "step": 38690 - }, - { - "epoch": 20.177835723598434, - "grad_norm": 1.5705370903015137, - "learning_rate": 6.163115577889448e-05, - "loss": 5.4919, - "step": 38691 - }, - { - "epoch": 20.178357235984354, - "grad_norm": 1.5396379232406616, - "learning_rate": 6.163015075376885e-05, - "loss": 5.2908, - "step": 38692 - }, - { - "epoch": 20.178878748370273, - "grad_norm": 2.0110301971435547, - "learning_rate": 6.162914572864323e-05, - "loss": 5.1717, - "step": 38693 - }, - { - "epoch": 20.179400260756193, - "grad_norm": 1.6019877195358276, - "learning_rate": 6.162814070351759e-05, - "loss": 5.5089, - "step": 38694 - }, - { - "epoch": 20.179921773142112, - "grad_norm": 1.5300891399383545, - "learning_rate": 6.162713567839197e-05, - "loss": 5.4047, - "step": 38695 - }, - { - "epoch": 20.180443285528032, - "grad_norm": 1.590362548828125, - "learning_rate": 6.162613065326633e-05, - "loss": 5.2489, - "step": 38696 - }, - { - "epoch": 20.18096479791395, - "grad_norm": 1.5862038135528564, - "learning_rate": 6.162512562814071e-05, - "loss": 4.9228, - "step": 38697 - }, - { - "epoch": 20.18148631029987, - "grad_norm": 1.4764759540557861, - "learning_rate": 6.162412060301507e-05, - "loss": 5.6622, - "step": 38698 - }, - { - "epoch": 20.18200782268579, - "grad_norm": 1.4284685850143433, - "learning_rate": 6.162311557788945e-05, - "loss": 5.2658, - "step": 38699 - }, - { - "epoch": 20.18252933507171, - "grad_norm": 1.4263982772827148, - "learning_rate": 6.162211055276383e-05, - "loss": 5.7437, - "step": 38700 - }, - { - "epoch": 20.183050847457626, - "grad_norm": 1.5178149938583374, - "learning_rate": 6.16211055276382e-05, - "loss": 5.7092, - "step": 38701 - }, - { - "epoch": 20.183572359843545, - "grad_norm": 1.5071231126785278, - "learning_rate": 6.162010050251257e-05, - "loss": 5.3013, - "step": 38702 - }, - { - "epoch": 20.184093872229464, - "grad_norm": 1.575380563735962, - "learning_rate": 6.161909547738694e-05, - "loss": 5.2241, - "step": 38703 - }, - { - "epoch": 20.184615384615384, - "grad_norm": 1.644905924797058, - "learning_rate": 6.161809045226131e-05, - "loss": 5.1562, - "step": 38704 - }, - { - "epoch": 20.185136897001303, - "grad_norm": 1.4710954427719116, - "learning_rate": 6.161708542713568e-05, - "loss": 5.5142, - "step": 38705 - }, - { - "epoch": 20.185658409387223, - "grad_norm": 1.4019136428833008, - "learning_rate": 6.161608040201006e-05, - "loss": 5.4239, - "step": 38706 - }, - { - "epoch": 20.186179921773142, - "grad_norm": 1.4951791763305664, - "learning_rate": 6.161507537688442e-05, - "loss": 4.6579, - "step": 38707 - }, - { - "epoch": 20.186701434159062, - "grad_norm": 1.5546544790267944, - "learning_rate": 6.16140703517588e-05, - "loss": 5.2469, - "step": 38708 - }, - { - "epoch": 20.18722294654498, - "grad_norm": 1.5502313375473022, - "learning_rate": 6.161306532663316e-05, - "loss": 4.629, - "step": 38709 - }, - { - "epoch": 20.1877444589309, - "grad_norm": 1.53248131275177, - "learning_rate": 6.161206030150754e-05, - "loss": 5.5422, - "step": 38710 - }, - { - "epoch": 20.18826597131682, - "grad_norm": 1.5384491682052612, - "learning_rate": 6.161105527638192e-05, - "loss": 5.5666, - "step": 38711 - }, - { - "epoch": 20.18878748370274, - "grad_norm": 1.4549921751022339, - "learning_rate": 6.161005025125628e-05, - "loss": 5.6778, - "step": 38712 - }, - { - "epoch": 20.189308996088656, - "grad_norm": 1.3993197679519653, - "learning_rate": 6.160904522613066e-05, - "loss": 5.5648, - "step": 38713 - }, - { - "epoch": 20.189830508474575, - "grad_norm": 1.4174277782440186, - "learning_rate": 6.160804020100502e-05, - "loss": 5.5546, - "step": 38714 - }, - { - "epoch": 20.190352020860495, - "grad_norm": 1.6495996713638306, - "learning_rate": 6.16070351758794e-05, - "loss": 5.1813, - "step": 38715 - }, - { - "epoch": 20.190873533246414, - "grad_norm": 1.5890578031539917, - "learning_rate": 6.160603015075377e-05, - "loss": 5.1441, - "step": 38716 - }, - { - "epoch": 20.191395045632333, - "grad_norm": 1.5440881252288818, - "learning_rate": 6.160502512562814e-05, - "loss": 5.3273, - "step": 38717 - }, - { - "epoch": 20.191916558018253, - "grad_norm": 1.4479783773422241, - "learning_rate": 6.160402010050251e-05, - "loss": 5.331, - "step": 38718 - }, - { - "epoch": 20.192438070404172, - "grad_norm": 1.4906413555145264, - "learning_rate": 6.160301507537689e-05, - "loss": 5.2766, - "step": 38719 - }, - { - "epoch": 20.192959582790092, - "grad_norm": 1.6548335552215576, - "learning_rate": 6.160201005025125e-05, - "loss": 5.1345, - "step": 38720 - }, - { - "epoch": 20.19348109517601, - "grad_norm": 1.5615806579589844, - "learning_rate": 6.160100502512563e-05, - "loss": 5.1456, - "step": 38721 - }, - { - "epoch": 20.19400260756193, - "grad_norm": 1.6723201274871826, - "learning_rate": 6.16e-05, - "loss": 4.8182, - "step": 38722 - }, - { - "epoch": 20.19452411994785, - "grad_norm": 1.6104260683059692, - "learning_rate": 6.159899497487438e-05, - "loss": 5.149, - "step": 38723 - }, - { - "epoch": 20.195045632333766, - "grad_norm": 1.4527119398117065, - "learning_rate": 6.159798994974875e-05, - "loss": 5.4252, - "step": 38724 - }, - { - "epoch": 20.195567144719686, - "grad_norm": 1.4763766527175903, - "learning_rate": 6.159698492462311e-05, - "loss": 5.8091, - "step": 38725 - }, - { - "epoch": 20.196088657105605, - "grad_norm": 1.4666398763656616, - "learning_rate": 6.159597989949749e-05, - "loss": 5.3024, - "step": 38726 - }, - { - "epoch": 20.196610169491525, - "grad_norm": 1.557051181793213, - "learning_rate": 6.159497487437186e-05, - "loss": 4.9849, - "step": 38727 - }, - { - "epoch": 20.197131681877444, - "grad_norm": 1.580950140953064, - "learning_rate": 6.159396984924623e-05, - "loss": 5.052, - "step": 38728 - }, - { - "epoch": 20.197653194263363, - "grad_norm": 1.5560944080352783, - "learning_rate": 6.15929648241206e-05, - "loss": 5.379, - "step": 38729 - }, - { - "epoch": 20.198174706649283, - "grad_norm": 1.5086809396743774, - "learning_rate": 6.159195979899497e-05, - "loss": 5.3295, - "step": 38730 - }, - { - "epoch": 20.198696219035202, - "grad_norm": 1.4507672786712646, - "learning_rate": 6.159095477386935e-05, - "loss": 5.4818, - "step": 38731 - }, - { - "epoch": 20.199217731421122, - "grad_norm": 1.5740967988967896, - "learning_rate": 6.158994974874373e-05, - "loss": 4.9317, - "step": 38732 - }, - { - "epoch": 20.19973924380704, - "grad_norm": 1.6050904989242554, - "learning_rate": 6.15889447236181e-05, - "loss": 5.3358, - "step": 38733 - }, - { - "epoch": 20.20026075619296, - "grad_norm": 1.5623772144317627, - "learning_rate": 6.158793969849247e-05, - "loss": 5.2654, - "step": 38734 - }, - { - "epoch": 20.20078226857888, - "grad_norm": 1.5484157800674438, - "learning_rate": 6.158693467336684e-05, - "loss": 4.8295, - "step": 38735 - }, - { - "epoch": 20.201303780964796, - "grad_norm": 1.5139784812927246, - "learning_rate": 6.158592964824121e-05, - "loss": 4.6044, - "step": 38736 - }, - { - "epoch": 20.201825293350716, - "grad_norm": 1.591526985168457, - "learning_rate": 6.158492462311558e-05, - "loss": 5.2619, - "step": 38737 - }, - { - "epoch": 20.202346805736635, - "grad_norm": 1.5008195638656616, - "learning_rate": 6.158391959798994e-05, - "loss": 5.1938, - "step": 38738 - }, - { - "epoch": 20.202868318122555, - "grad_norm": 1.5061639547348022, - "learning_rate": 6.158291457286432e-05, - "loss": 5.3082, - "step": 38739 - }, - { - "epoch": 20.203389830508474, - "grad_norm": 1.4229801893234253, - "learning_rate": 6.158190954773869e-05, - "loss": 5.8232, - "step": 38740 - }, - { - "epoch": 20.203911342894393, - "grad_norm": 1.5004254579544067, - "learning_rate": 6.158090452261306e-05, - "loss": 5.2783, - "step": 38741 - }, - { - "epoch": 20.204432855280313, - "grad_norm": 1.5466594696044922, - "learning_rate": 6.157989949748744e-05, - "loss": 5.0645, - "step": 38742 - }, - { - "epoch": 20.204954367666232, - "grad_norm": 1.518619418144226, - "learning_rate": 6.157889447236182e-05, - "loss": 5.5035, - "step": 38743 - }, - { - "epoch": 20.205475880052152, - "grad_norm": 1.4989635944366455, - "learning_rate": 6.157788944723618e-05, - "loss": 5.4791, - "step": 38744 - }, - { - "epoch": 20.20599739243807, - "grad_norm": 1.459814190864563, - "learning_rate": 6.157688442211056e-05, - "loss": 5.6589, - "step": 38745 - }, - { - "epoch": 20.20651890482399, - "grad_norm": 1.5033451318740845, - "learning_rate": 6.157587939698493e-05, - "loss": 5.0698, - "step": 38746 - }, - { - "epoch": 20.20704041720991, - "grad_norm": 1.4514124393463135, - "learning_rate": 6.15748743718593e-05, - "loss": 5.1882, - "step": 38747 - }, - { - "epoch": 20.207561929595826, - "grad_norm": 1.44204843044281, - "learning_rate": 6.157386934673367e-05, - "loss": 4.6966, - "step": 38748 - }, - { - "epoch": 20.208083441981746, - "grad_norm": 1.6254905462265015, - "learning_rate": 6.157286432160805e-05, - "loss": 5.5012, - "step": 38749 - }, - { - "epoch": 20.208604954367665, - "grad_norm": 1.5037719011306763, - "learning_rate": 6.157185929648241e-05, - "loss": 4.9922, - "step": 38750 - }, - { - "epoch": 20.209126466753585, - "grad_norm": 1.5800042152404785, - "learning_rate": 6.157085427135679e-05, - "loss": 5.2329, - "step": 38751 - }, - { - "epoch": 20.209647979139504, - "grad_norm": 1.4816044569015503, - "learning_rate": 6.156984924623117e-05, - "loss": 5.5745, - "step": 38752 - }, - { - "epoch": 20.210169491525424, - "grad_norm": 1.603934407234192, - "learning_rate": 6.156884422110553e-05, - "loss": 5.2091, - "step": 38753 - }, - { - "epoch": 20.210691003911343, - "grad_norm": 1.3865203857421875, - "learning_rate": 6.156783919597991e-05, - "loss": 5.5024, - "step": 38754 - }, - { - "epoch": 20.211212516297262, - "grad_norm": 1.6053229570388794, - "learning_rate": 6.156683417085427e-05, - "loss": 5.4094, - "step": 38755 - }, - { - "epoch": 20.211734028683182, - "grad_norm": 1.4980101585388184, - "learning_rate": 6.156582914572865e-05, - "loss": 5.5568, - "step": 38756 - }, - { - "epoch": 20.2122555410691, - "grad_norm": 1.5558741092681885, - "learning_rate": 6.156482412060301e-05, - "loss": 5.1382, - "step": 38757 - }, - { - "epoch": 20.21277705345502, - "grad_norm": 1.4673783779144287, - "learning_rate": 6.156381909547739e-05, - "loss": 5.6164, - "step": 38758 - }, - { - "epoch": 20.21329856584094, - "grad_norm": 1.5643507242202759, - "learning_rate": 6.156281407035176e-05, - "loss": 5.5524, - "step": 38759 - }, - { - "epoch": 20.213820078226856, - "grad_norm": 1.5511448383331299, - "learning_rate": 6.156180904522613e-05, - "loss": 5.1981, - "step": 38760 - }, - { - "epoch": 20.214341590612776, - "grad_norm": 1.6431188583374023, - "learning_rate": 6.15608040201005e-05, - "loss": 4.9717, - "step": 38761 - }, - { - "epoch": 20.214863102998695, - "grad_norm": 1.4964299201965332, - "learning_rate": 6.155979899497488e-05, - "loss": 5.6104, - "step": 38762 - }, - { - "epoch": 20.215384615384615, - "grad_norm": 1.451062560081482, - "learning_rate": 6.155879396984925e-05, - "loss": 4.3644, - "step": 38763 - }, - { - "epoch": 20.215906127770534, - "grad_norm": 1.4916681051254272, - "learning_rate": 6.155778894472362e-05, - "loss": 5.0697, - "step": 38764 - }, - { - "epoch": 20.216427640156454, - "grad_norm": 1.4641718864440918, - "learning_rate": 6.1556783919598e-05, - "loss": 5.4302, - "step": 38765 - }, - { - "epoch": 20.216949152542373, - "grad_norm": 1.4921820163726807, - "learning_rate": 6.155577889447236e-05, - "loss": 5.3383, - "step": 38766 - }, - { - "epoch": 20.217470664928292, - "grad_norm": 1.583154559135437, - "learning_rate": 6.155477386934674e-05, - "loss": 5.113, - "step": 38767 - }, - { - "epoch": 20.217992177314212, - "grad_norm": 1.5793778896331787, - "learning_rate": 6.15537688442211e-05, - "loss": 5.3046, - "step": 38768 - }, - { - "epoch": 20.21851368970013, - "grad_norm": 1.526437759399414, - "learning_rate": 6.155276381909548e-05, - "loss": 5.0363, - "step": 38769 - }, - { - "epoch": 20.21903520208605, - "grad_norm": 1.5280686616897583, - "learning_rate": 6.155175879396984e-05, - "loss": 5.8405, - "step": 38770 - }, - { - "epoch": 20.21955671447197, - "grad_norm": 1.626832365989685, - "learning_rate": 6.155075376884422e-05, - "loss": 5.034, - "step": 38771 - }, - { - "epoch": 20.220078226857886, - "grad_norm": 1.589781403541565, - "learning_rate": 6.15497487437186e-05, - "loss": 4.8952, - "step": 38772 - }, - { - "epoch": 20.220599739243806, - "grad_norm": 1.562495470046997, - "learning_rate": 6.154874371859298e-05, - "loss": 5.4704, - "step": 38773 - }, - { - "epoch": 20.221121251629725, - "grad_norm": 1.5884995460510254, - "learning_rate": 6.154773869346734e-05, - "loss": 5.3986, - "step": 38774 - }, - { - "epoch": 20.221642764015645, - "grad_norm": 1.5738818645477295, - "learning_rate": 6.154673366834172e-05, - "loss": 5.0101, - "step": 38775 - }, - { - "epoch": 20.222164276401564, - "grad_norm": 1.5095783472061157, - "learning_rate": 6.154572864321608e-05, - "loss": 5.1955, - "step": 38776 - }, - { - "epoch": 20.222685788787484, - "grad_norm": 1.4916489124298096, - "learning_rate": 6.154472361809046e-05, - "loss": 5.2976, - "step": 38777 - }, - { - "epoch": 20.223207301173403, - "grad_norm": 1.4893434047698975, - "learning_rate": 6.154371859296483e-05, - "loss": 5.4731, - "step": 38778 - }, - { - "epoch": 20.223728813559323, - "grad_norm": 1.39323091506958, - "learning_rate": 6.154271356783919e-05, - "loss": 5.3792, - "step": 38779 - }, - { - "epoch": 20.224250325945242, - "grad_norm": 1.4979183673858643, - "learning_rate": 6.154170854271357e-05, - "loss": 5.5766, - "step": 38780 - }, - { - "epoch": 20.22477183833116, - "grad_norm": 1.5326833724975586, - "learning_rate": 6.154070351758793e-05, - "loss": 5.0445, - "step": 38781 - }, - { - "epoch": 20.22529335071708, - "grad_norm": 1.5402565002441406, - "learning_rate": 6.153969849246231e-05, - "loss": 4.9427, - "step": 38782 - }, - { - "epoch": 20.225814863103, - "grad_norm": 1.6214228868484497, - "learning_rate": 6.153869346733669e-05, - "loss": 5.4567, - "step": 38783 - }, - { - "epoch": 20.226336375488916, - "grad_norm": 1.5909479856491089, - "learning_rate": 6.153768844221107e-05, - "loss": 4.8861, - "step": 38784 - }, - { - "epoch": 20.226857887874836, - "grad_norm": 1.5378336906433105, - "learning_rate": 6.153668341708543e-05, - "loss": 5.1053, - "step": 38785 - }, - { - "epoch": 20.227379400260755, - "grad_norm": 1.5735191106796265, - "learning_rate": 6.153567839195981e-05, - "loss": 5.3115, - "step": 38786 - }, - { - "epoch": 20.227900912646675, - "grad_norm": 1.6072754859924316, - "learning_rate": 6.153467336683417e-05, - "loss": 4.6644, - "step": 38787 - }, - { - "epoch": 20.228422425032594, - "grad_norm": 1.3864837884902954, - "learning_rate": 6.153366834170855e-05, - "loss": 5.5825, - "step": 38788 - }, - { - "epoch": 20.228943937418514, - "grad_norm": 1.6598174571990967, - "learning_rate": 6.153266331658291e-05, - "loss": 4.4815, - "step": 38789 - }, - { - "epoch": 20.229465449804433, - "grad_norm": 1.593738317489624, - "learning_rate": 6.153165829145729e-05, - "loss": 5.2755, - "step": 38790 - }, - { - "epoch": 20.229986962190353, - "grad_norm": 1.5579906702041626, - "learning_rate": 6.153065326633166e-05, - "loss": 5.2043, - "step": 38791 - }, - { - "epoch": 20.230508474576272, - "grad_norm": 1.4602326154708862, - "learning_rate": 6.152964824120603e-05, - "loss": 4.8043, - "step": 38792 - }, - { - "epoch": 20.23102998696219, - "grad_norm": 1.4637948274612427, - "learning_rate": 6.152864321608041e-05, - "loss": 5.6723, - "step": 38793 - }, - { - "epoch": 20.23155149934811, - "grad_norm": 1.4770451784133911, - "learning_rate": 6.152763819095478e-05, - "loss": 5.3415, - "step": 38794 - }, - { - "epoch": 20.23207301173403, - "grad_norm": 1.3597707748413086, - "learning_rate": 6.152663316582915e-05, - "loss": 5.7734, - "step": 38795 - }, - { - "epoch": 20.232594524119946, - "grad_norm": 1.532223105430603, - "learning_rate": 6.152562814070352e-05, - "loss": 5.4036, - "step": 38796 - }, - { - "epoch": 20.233116036505866, - "grad_norm": 1.3785998821258545, - "learning_rate": 6.15246231155779e-05, - "loss": 5.4101, - "step": 38797 - }, - { - "epoch": 20.233637548891785, - "grad_norm": 1.540358066558838, - "learning_rate": 6.152361809045226e-05, - "loss": 5.4152, - "step": 38798 - }, - { - "epoch": 20.234159061277705, - "grad_norm": 1.451528787612915, - "learning_rate": 6.152261306532664e-05, - "loss": 5.0288, - "step": 38799 - }, - { - "epoch": 20.234680573663624, - "grad_norm": 1.5864378213882446, - "learning_rate": 6.1521608040201e-05, - "loss": 5.1697, - "step": 38800 - }, - { - "epoch": 20.235202086049544, - "grad_norm": 1.5600072145462036, - "learning_rate": 6.152060301507538e-05, - "loss": 5.0437, - "step": 38801 - }, - { - "epoch": 20.235723598435463, - "grad_norm": 1.5615878105163574, - "learning_rate": 6.151959798994974e-05, - "loss": 4.9346, - "step": 38802 - }, - { - "epoch": 20.236245110821383, - "grad_norm": 1.4146225452423096, - "learning_rate": 6.151859296482412e-05, - "loss": 5.5031, - "step": 38803 - }, - { - "epoch": 20.236766623207302, - "grad_norm": 1.5940543413162231, - "learning_rate": 6.15175879396985e-05, - "loss": 5.3446, - "step": 38804 - }, - { - "epoch": 20.23728813559322, - "grad_norm": 1.5338404178619385, - "learning_rate": 6.151658291457286e-05, - "loss": 5.2861, - "step": 38805 - }, - { - "epoch": 20.23780964797914, - "grad_norm": 1.6093162298202515, - "learning_rate": 6.151557788944724e-05, - "loss": 5.5733, - "step": 38806 - }, - { - "epoch": 20.23833116036506, - "grad_norm": 1.5166798830032349, - "learning_rate": 6.151457286432161e-05, - "loss": 4.9553, - "step": 38807 - }, - { - "epoch": 20.238852672750976, - "grad_norm": 1.4622304439544678, - "learning_rate": 6.151356783919598e-05, - "loss": 5.664, - "step": 38808 - }, - { - "epoch": 20.239374185136896, - "grad_norm": 1.4933046102523804, - "learning_rate": 6.151256281407035e-05, - "loss": 4.556, - "step": 38809 - }, - { - "epoch": 20.239895697522815, - "grad_norm": 1.5863615274429321, - "learning_rate": 6.151155778894473e-05, - "loss": 5.2111, - "step": 38810 - }, - { - "epoch": 20.240417209908735, - "grad_norm": 1.5060656070709229, - "learning_rate": 6.151055276381909e-05, - "loss": 5.4308, - "step": 38811 - }, - { - "epoch": 20.240938722294654, - "grad_norm": 1.4615967273712158, - "learning_rate": 6.150954773869347e-05, - "loss": 5.883, - "step": 38812 - }, - { - "epoch": 20.241460234680574, - "grad_norm": 1.4310134649276733, - "learning_rate": 6.150854271356785e-05, - "loss": 5.5608, - "step": 38813 - }, - { - "epoch": 20.241981747066493, - "grad_norm": 1.444650650024414, - "learning_rate": 6.150753768844222e-05, - "loss": 5.1797, - "step": 38814 - }, - { - "epoch": 20.242503259452413, - "grad_norm": 1.6298139095306396, - "learning_rate": 6.150653266331659e-05, - "loss": 5.0461, - "step": 38815 - }, - { - "epoch": 20.243024771838332, - "grad_norm": 1.5772768259048462, - "learning_rate": 6.150552763819097e-05, - "loss": 4.8134, - "step": 38816 - }, - { - "epoch": 20.24354628422425, - "grad_norm": 1.4875357151031494, - "learning_rate": 6.150452261306533e-05, - "loss": 5.5939, - "step": 38817 - }, - { - "epoch": 20.24406779661017, - "grad_norm": 1.443407654762268, - "learning_rate": 6.15035175879397e-05, - "loss": 5.6761, - "step": 38818 - }, - { - "epoch": 20.24458930899609, - "grad_norm": 1.488156795501709, - "learning_rate": 6.150251256281407e-05, - "loss": 5.3069, - "step": 38819 - }, - { - "epoch": 20.245110821382006, - "grad_norm": 1.4697115421295166, - "learning_rate": 6.150150753768844e-05, - "loss": 5.7286, - "step": 38820 - }, - { - "epoch": 20.245632333767926, - "grad_norm": 1.5686125755310059, - "learning_rate": 6.150050251256282e-05, - "loss": 4.8816, - "step": 38821 - }, - { - "epoch": 20.246153846153845, - "grad_norm": 1.6092056035995483, - "learning_rate": 6.149949748743718e-05, - "loss": 5.0332, - "step": 38822 - }, - { - "epoch": 20.246675358539765, - "grad_norm": 1.4796721935272217, - "learning_rate": 6.149849246231156e-05, - "loss": 5.1921, - "step": 38823 - }, - { - "epoch": 20.247196870925684, - "grad_norm": 1.6173590421676636, - "learning_rate": 6.149748743718594e-05, - "loss": 4.9955, - "step": 38824 - }, - { - "epoch": 20.247718383311604, - "grad_norm": 1.621656060218811, - "learning_rate": 6.149648241206031e-05, - "loss": 5.2917, - "step": 38825 - }, - { - "epoch": 20.248239895697523, - "grad_norm": 1.3533858060836792, - "learning_rate": 6.149547738693468e-05, - "loss": 5.6795, - "step": 38826 - }, - { - "epoch": 20.248761408083443, - "grad_norm": 1.5897855758666992, - "learning_rate": 6.149447236180906e-05, - "loss": 5.2868, - "step": 38827 - }, - { - "epoch": 20.249282920469362, - "grad_norm": 1.4255290031433105, - "learning_rate": 6.149346733668342e-05, - "loss": 5.6681, - "step": 38828 - }, - { - "epoch": 20.24980443285528, - "grad_norm": 1.4905554056167603, - "learning_rate": 6.14924623115578e-05, - "loss": 4.9006, - "step": 38829 - }, - { - "epoch": 20.2503259452412, - "grad_norm": 1.6326013803482056, - "learning_rate": 6.149145728643216e-05, - "loss": 5.2261, - "step": 38830 - }, - { - "epoch": 20.250847457627117, - "grad_norm": 1.55451500415802, - "learning_rate": 6.149045226130653e-05, - "loss": 5.4792, - "step": 38831 - }, - { - "epoch": 20.251368970013036, - "grad_norm": 1.4840644598007202, - "learning_rate": 6.14894472361809e-05, - "loss": 5.4717, - "step": 38832 - }, - { - "epoch": 20.251890482398956, - "grad_norm": 1.5045934915542603, - "learning_rate": 6.148844221105528e-05, - "loss": 5.4938, - "step": 38833 - }, - { - "epoch": 20.252411994784875, - "grad_norm": 1.5231295824050903, - "learning_rate": 6.148743718592966e-05, - "loss": 4.9925, - "step": 38834 - }, - { - "epoch": 20.252933507170795, - "grad_norm": 1.4792925119400024, - "learning_rate": 6.148643216080402e-05, - "loss": 5.6739, - "step": 38835 - }, - { - "epoch": 20.253455019556714, - "grad_norm": 1.5991653203964233, - "learning_rate": 6.14854271356784e-05, - "loss": 5.5947, - "step": 38836 - }, - { - "epoch": 20.253976531942634, - "grad_norm": 1.6063240766525269, - "learning_rate": 6.148442211055277e-05, - "loss": 5.6252, - "step": 38837 - }, - { - "epoch": 20.254498044328553, - "grad_norm": 1.4987483024597168, - "learning_rate": 6.148341708542714e-05, - "loss": 5.459, - "step": 38838 - }, - { - "epoch": 20.255019556714473, - "grad_norm": 1.4964790344238281, - "learning_rate": 6.148241206030151e-05, - "loss": 5.5283, - "step": 38839 - }, - { - "epoch": 20.255541069100392, - "grad_norm": 1.439732551574707, - "learning_rate": 6.148140703517589e-05, - "loss": 5.2748, - "step": 38840 - }, - { - "epoch": 20.25606258148631, - "grad_norm": 1.4735980033874512, - "learning_rate": 6.148040201005025e-05, - "loss": 5.0775, - "step": 38841 - }, - { - "epoch": 20.25658409387223, - "grad_norm": 1.637336015701294, - "learning_rate": 6.147939698492463e-05, - "loss": 5.0317, - "step": 38842 - }, - { - "epoch": 20.257105606258147, - "grad_norm": 1.7421656847000122, - "learning_rate": 6.147839195979899e-05, - "loss": 4.839, - "step": 38843 - }, - { - "epoch": 20.257627118644066, - "grad_norm": 1.575321078300476, - "learning_rate": 6.147738693467337e-05, - "loss": 5.4285, - "step": 38844 - }, - { - "epoch": 20.258148631029986, - "grad_norm": 1.659794569015503, - "learning_rate": 6.147638190954775e-05, - "loss": 5.581, - "step": 38845 - }, - { - "epoch": 20.258670143415905, - "grad_norm": 1.4580484628677368, - "learning_rate": 6.147537688442211e-05, - "loss": 5.3262, - "step": 38846 - }, - { - "epoch": 20.259191655801825, - "grad_norm": 1.5640933513641357, - "learning_rate": 6.147437185929649e-05, - "loss": 4.7674, - "step": 38847 - }, - { - "epoch": 20.259713168187744, - "grad_norm": 1.4847848415374756, - "learning_rate": 6.147336683417085e-05, - "loss": 5.7128, - "step": 38848 - }, - { - "epoch": 20.260234680573664, - "grad_norm": 1.547215461730957, - "learning_rate": 6.147236180904523e-05, - "loss": 5.3642, - "step": 38849 - }, - { - "epoch": 20.260756192959583, - "grad_norm": 1.4898394346237183, - "learning_rate": 6.14713567839196e-05, - "loss": 5.5543, - "step": 38850 - }, - { - "epoch": 20.261277705345503, - "grad_norm": 1.487136960029602, - "learning_rate": 6.147035175879397e-05, - "loss": 5.2852, - "step": 38851 - }, - { - "epoch": 20.261799217731422, - "grad_norm": 1.5314915180206299, - "learning_rate": 6.146934673366834e-05, - "loss": 5.5285, - "step": 38852 - }, - { - "epoch": 20.26232073011734, - "grad_norm": 1.6019150018692017, - "learning_rate": 6.146834170854272e-05, - "loss": 5.0234, - "step": 38853 - }, - { - "epoch": 20.26284224250326, - "grad_norm": 1.465004563331604, - "learning_rate": 6.14673366834171e-05, - "loss": 5.4149, - "step": 38854 - }, - { - "epoch": 20.263363754889177, - "grad_norm": 1.4701566696166992, - "learning_rate": 6.146633165829147e-05, - "loss": 5.2787, - "step": 38855 - }, - { - "epoch": 20.263885267275096, - "grad_norm": 1.4532235860824585, - "learning_rate": 6.146532663316584e-05, - "loss": 5.728, - "step": 38856 - }, - { - "epoch": 20.264406779661016, - "grad_norm": 1.5708858966827393, - "learning_rate": 6.14643216080402e-05, - "loss": 5.075, - "step": 38857 - }, - { - "epoch": 20.264928292046935, - "grad_norm": 1.4573662281036377, - "learning_rate": 6.146331658291458e-05, - "loss": 5.4086, - "step": 38858 - }, - { - "epoch": 20.265449804432855, - "grad_norm": 1.404242992401123, - "learning_rate": 6.146231155778894e-05, - "loss": 4.7339, - "step": 38859 - }, - { - "epoch": 20.265971316818774, - "grad_norm": 1.4999991655349731, - "learning_rate": 6.146130653266332e-05, - "loss": 5.6051, - "step": 38860 - }, - { - "epoch": 20.266492829204694, - "grad_norm": 1.5094375610351562, - "learning_rate": 6.146030150753768e-05, - "loss": 5.4867, - "step": 38861 - }, - { - "epoch": 20.267014341590613, - "grad_norm": 1.4439409971237183, - "learning_rate": 6.145929648241206e-05, - "loss": 5.2829, - "step": 38862 - }, - { - "epoch": 20.267535853976533, - "grad_norm": 1.5473002195358276, - "learning_rate": 6.145829145728643e-05, - "loss": 4.9703, - "step": 38863 - }, - { - "epoch": 20.268057366362452, - "grad_norm": 1.5631005764007568, - "learning_rate": 6.14572864321608e-05, - "loss": 5.2262, - "step": 38864 - }, - { - "epoch": 20.26857887874837, - "grad_norm": 1.5108368396759033, - "learning_rate": 6.145628140703518e-05, - "loss": 5.1104, - "step": 38865 - }, - { - "epoch": 20.26910039113429, - "grad_norm": 1.588607668876648, - "learning_rate": 6.145527638190956e-05, - "loss": 5.4489, - "step": 38866 - }, - { - "epoch": 20.269621903520207, - "grad_norm": 1.5584031343460083, - "learning_rate": 6.145427135678392e-05, - "loss": 4.8257, - "step": 38867 - }, - { - "epoch": 20.270143415906126, - "grad_norm": 1.5939509868621826, - "learning_rate": 6.14532663316583e-05, - "loss": 5.2251, - "step": 38868 - }, - { - "epoch": 20.270664928292046, - "grad_norm": 1.534656286239624, - "learning_rate": 6.145226130653267e-05, - "loss": 5.141, - "step": 38869 - }, - { - "epoch": 20.271186440677965, - "grad_norm": 1.550120234489441, - "learning_rate": 6.145125628140704e-05, - "loss": 5.3279, - "step": 38870 - }, - { - "epoch": 20.271707953063885, - "grad_norm": 1.5358978509902954, - "learning_rate": 6.145025125628141e-05, - "loss": 5.0588, - "step": 38871 - }, - { - "epoch": 20.272229465449804, - "grad_norm": 1.5765703916549683, - "learning_rate": 6.144924623115577e-05, - "loss": 4.7941, - "step": 38872 - }, - { - "epoch": 20.272750977835724, - "grad_norm": 1.4579061269760132, - "learning_rate": 6.144824120603015e-05, - "loss": 5.201, - "step": 38873 - }, - { - "epoch": 20.273272490221643, - "grad_norm": 1.525787115097046, - "learning_rate": 6.144723618090453e-05, - "loss": 4.7932, - "step": 38874 - }, - { - "epoch": 20.273794002607563, - "grad_norm": 1.549570918083191, - "learning_rate": 6.14462311557789e-05, - "loss": 5.5149, - "step": 38875 - }, - { - "epoch": 20.274315514993482, - "grad_norm": 1.5403556823730469, - "learning_rate": 6.144522613065327e-05, - "loss": 5.1153, - "step": 38876 - }, - { - "epoch": 20.2748370273794, - "grad_norm": 1.57559335231781, - "learning_rate": 6.144422110552765e-05, - "loss": 5.0205, - "step": 38877 - }, - { - "epoch": 20.27535853976532, - "grad_norm": 1.4571866989135742, - "learning_rate": 6.144321608040201e-05, - "loss": 5.7639, - "step": 38878 - }, - { - "epoch": 20.275880052151237, - "grad_norm": 1.5154949426651, - "learning_rate": 6.144221105527639e-05, - "loss": 4.924, - "step": 38879 - }, - { - "epoch": 20.276401564537156, - "grad_norm": 1.5160510540008545, - "learning_rate": 6.144120603015075e-05, - "loss": 5.4196, - "step": 38880 - }, - { - "epoch": 20.276923076923076, - "grad_norm": 1.4504001140594482, - "learning_rate": 6.144020100502513e-05, - "loss": 5.4527, - "step": 38881 - }, - { - "epoch": 20.277444589308995, - "grad_norm": 1.6148287057876587, - "learning_rate": 6.14391959798995e-05, - "loss": 5.5924, - "step": 38882 - }, - { - "epoch": 20.277966101694915, - "grad_norm": 1.531987190246582, - "learning_rate": 6.143819095477387e-05, - "loss": 4.5716, - "step": 38883 - }, - { - "epoch": 20.278487614080834, - "grad_norm": 1.5701878070831299, - "learning_rate": 6.143718592964824e-05, - "loss": 4.9742, - "step": 38884 - }, - { - "epoch": 20.279009126466754, - "grad_norm": 1.496734619140625, - "learning_rate": 6.143618090452262e-05, - "loss": 5.5565, - "step": 38885 - }, - { - "epoch": 20.279530638852673, - "grad_norm": 1.4867477416992188, - "learning_rate": 6.1435175879397e-05, - "loss": 5.2839, - "step": 38886 - }, - { - "epoch": 20.280052151238593, - "grad_norm": 1.5807726383209229, - "learning_rate": 6.143417085427136e-05, - "loss": 4.8485, - "step": 38887 - }, - { - "epoch": 20.280573663624512, - "grad_norm": 1.6672688722610474, - "learning_rate": 6.143316582914574e-05, - "loss": 4.6759, - "step": 38888 - }, - { - "epoch": 20.28109517601043, - "grad_norm": 1.534371018409729, - "learning_rate": 6.14321608040201e-05, - "loss": 5.3765, - "step": 38889 - }, - { - "epoch": 20.28161668839635, - "grad_norm": 1.4836466312408447, - "learning_rate": 6.143115577889448e-05, - "loss": 5.4724, - "step": 38890 - }, - { - "epoch": 20.282138200782267, - "grad_norm": 1.4868474006652832, - "learning_rate": 6.143015075376884e-05, - "loss": 5.1598, - "step": 38891 - }, - { - "epoch": 20.282659713168186, - "grad_norm": 1.51041579246521, - "learning_rate": 6.142914572864322e-05, - "loss": 4.9031, - "step": 38892 - }, - { - "epoch": 20.283181225554106, - "grad_norm": 1.4888370037078857, - "learning_rate": 6.142814070351759e-05, - "loss": 5.2599, - "step": 38893 - }, - { - "epoch": 20.283702737940025, - "grad_norm": 1.4658361673355103, - "learning_rate": 6.142713567839196e-05, - "loss": 5.6359, - "step": 38894 - }, - { - "epoch": 20.284224250325945, - "grad_norm": 1.598999261856079, - "learning_rate": 6.142613065326633e-05, - "loss": 4.8818, - "step": 38895 - }, - { - "epoch": 20.284745762711864, - "grad_norm": 1.4351089000701904, - "learning_rate": 6.14251256281407e-05, - "loss": 5.5946, - "step": 38896 - }, - { - "epoch": 20.285267275097784, - "grad_norm": 1.378549337387085, - "learning_rate": 6.142412060301508e-05, - "loss": 4.8948, - "step": 38897 - }, - { - "epoch": 20.285788787483703, - "grad_norm": 1.5197657346725464, - "learning_rate": 6.142311557788945e-05, - "loss": 5.2293, - "step": 38898 - }, - { - "epoch": 20.286310299869623, - "grad_norm": 1.5250890254974365, - "learning_rate": 6.142211055276382e-05, - "loss": 5.128, - "step": 38899 - }, - { - "epoch": 20.286831812255542, - "grad_norm": 1.4345823526382446, - "learning_rate": 6.142110552763819e-05, - "loss": 5.3729, - "step": 38900 - }, - { - "epoch": 20.28735332464146, - "grad_norm": 1.4713832139968872, - "learning_rate": 6.142010050251257e-05, - "loss": 5.0619, - "step": 38901 - }, - { - "epoch": 20.28787483702738, - "grad_norm": 1.5416258573532104, - "learning_rate": 6.141909547738693e-05, - "loss": 5.327, - "step": 38902 - }, - { - "epoch": 20.288396349413297, - "grad_norm": 1.6048376560211182, - "learning_rate": 6.141809045226131e-05, - "loss": 5.318, - "step": 38903 - }, - { - "epoch": 20.288917861799217, - "grad_norm": 1.8454334735870361, - "learning_rate": 6.141708542713567e-05, - "loss": 4.76, - "step": 38904 - }, - { - "epoch": 20.289439374185136, - "grad_norm": 1.4412399530410767, - "learning_rate": 6.141608040201005e-05, - "loss": 5.2224, - "step": 38905 - }, - { - "epoch": 20.289960886571055, - "grad_norm": 1.4729448556900024, - "learning_rate": 6.141507537688443e-05, - "loss": 5.6735, - "step": 38906 - }, - { - "epoch": 20.290482398956975, - "grad_norm": 1.4507662057876587, - "learning_rate": 6.141407035175881e-05, - "loss": 5.0682, - "step": 38907 - }, - { - "epoch": 20.291003911342894, - "grad_norm": 1.5077345371246338, - "learning_rate": 6.141306532663317e-05, - "loss": 4.9456, - "step": 38908 - }, - { - "epoch": 20.291525423728814, - "grad_norm": 1.5296891927719116, - "learning_rate": 6.141206030150755e-05, - "loss": 5.0344, - "step": 38909 - }, - { - "epoch": 20.292046936114733, - "grad_norm": 1.4503198862075806, - "learning_rate": 6.141105527638191e-05, - "loss": 5.7422, - "step": 38910 - }, - { - "epoch": 20.292568448500653, - "grad_norm": 1.5403175354003906, - "learning_rate": 6.141005025125628e-05, - "loss": 5.46, - "step": 38911 - }, - { - "epoch": 20.293089960886572, - "grad_norm": 1.6044268608093262, - "learning_rate": 6.140904522613066e-05, - "loss": 5.2829, - "step": 38912 - }, - { - "epoch": 20.29361147327249, - "grad_norm": 1.5751328468322754, - "learning_rate": 6.140804020100502e-05, - "loss": 5.2846, - "step": 38913 - }, - { - "epoch": 20.294132985658408, - "grad_norm": 1.4373857975006104, - "learning_rate": 6.14070351758794e-05, - "loss": 5.6541, - "step": 38914 - }, - { - "epoch": 20.294654498044327, - "grad_norm": 1.4545822143554688, - "learning_rate": 6.140603015075376e-05, - "loss": 4.9846, - "step": 38915 - }, - { - "epoch": 20.295176010430247, - "grad_norm": 1.640956163406372, - "learning_rate": 6.140502512562814e-05, - "loss": 4.9976, - "step": 38916 - }, - { - "epoch": 20.295697522816166, - "grad_norm": 1.5287598371505737, - "learning_rate": 6.140402010050252e-05, - "loss": 5.272, - "step": 38917 - }, - { - "epoch": 20.296219035202085, - "grad_norm": 1.6217031478881836, - "learning_rate": 6.14030150753769e-05, - "loss": 5.0824, - "step": 38918 - }, - { - "epoch": 20.296740547588005, - "grad_norm": 1.4491448402404785, - "learning_rate": 6.140201005025126e-05, - "loss": 5.2191, - "step": 38919 - }, - { - "epoch": 20.297262059973924, - "grad_norm": 1.5324313640594482, - "learning_rate": 6.140100502512564e-05, - "loss": 5.2206, - "step": 38920 - }, - { - "epoch": 20.297783572359844, - "grad_norm": 1.455832839012146, - "learning_rate": 6.14e-05, - "loss": 5.5269, - "step": 38921 - }, - { - "epoch": 20.298305084745763, - "grad_norm": 1.5848917961120605, - "learning_rate": 6.139899497487438e-05, - "loss": 5.6879, - "step": 38922 - }, - { - "epoch": 20.298826597131683, - "grad_norm": 1.508481502532959, - "learning_rate": 6.139798994974874e-05, - "loss": 5.1503, - "step": 38923 - }, - { - "epoch": 20.299348109517602, - "grad_norm": 1.6080601215362549, - "learning_rate": 6.139698492462311e-05, - "loss": 5.1727, - "step": 38924 - }, - { - "epoch": 20.29986962190352, - "grad_norm": 1.5506482124328613, - "learning_rate": 6.139597989949749e-05, - "loss": 5.2633, - "step": 38925 - }, - { - "epoch": 20.300391134289438, - "grad_norm": 1.4910155534744263, - "learning_rate": 6.139497487437186e-05, - "loss": 5.1452, - "step": 38926 - }, - { - "epoch": 20.300912646675357, - "grad_norm": 1.5613034963607788, - "learning_rate": 6.139396984924624e-05, - "loss": 5.0506, - "step": 38927 - }, - { - "epoch": 20.301434159061277, - "grad_norm": 1.5240859985351562, - "learning_rate": 6.13929648241206e-05, - "loss": 5.3936, - "step": 38928 - }, - { - "epoch": 20.301955671447196, - "grad_norm": 1.4174308776855469, - "learning_rate": 6.139195979899498e-05, - "loss": 5.5042, - "step": 38929 - }, - { - "epoch": 20.302477183833116, - "grad_norm": 1.3952689170837402, - "learning_rate": 6.139095477386935e-05, - "loss": 5.0879, - "step": 38930 - }, - { - "epoch": 20.302998696219035, - "grad_norm": 1.5198860168457031, - "learning_rate": 6.138994974874373e-05, - "loss": 5.1708, - "step": 38931 - }, - { - "epoch": 20.303520208604954, - "grad_norm": 1.3821953535079956, - "learning_rate": 6.138894472361809e-05, - "loss": 5.562, - "step": 38932 - }, - { - "epoch": 20.304041720990874, - "grad_norm": 1.582653522491455, - "learning_rate": 6.138793969849247e-05, - "loss": 5.0181, - "step": 38933 - }, - { - "epoch": 20.304563233376793, - "grad_norm": 1.4439626932144165, - "learning_rate": 6.138693467336683e-05, - "loss": 5.1071, - "step": 38934 - }, - { - "epoch": 20.305084745762713, - "grad_norm": 1.4683512449264526, - "learning_rate": 6.138592964824121e-05, - "loss": 5.4204, - "step": 38935 - }, - { - "epoch": 20.305606258148632, - "grad_norm": 1.5318756103515625, - "learning_rate": 6.138492462311557e-05, - "loss": 5.3111, - "step": 38936 - }, - { - "epoch": 20.30612777053455, - "grad_norm": 1.5385550260543823, - "learning_rate": 6.138391959798995e-05, - "loss": 5.1534, - "step": 38937 - }, - { - "epoch": 20.306649282920468, - "grad_norm": 1.6668226718902588, - "learning_rate": 6.138291457286433e-05, - "loss": 4.3553, - "step": 38938 - }, - { - "epoch": 20.307170795306387, - "grad_norm": 1.4483027458190918, - "learning_rate": 6.13819095477387e-05, - "loss": 5.2084, - "step": 38939 - }, - { - "epoch": 20.307692307692307, - "grad_norm": 1.515108585357666, - "learning_rate": 6.138090452261307e-05, - "loss": 5.097, - "step": 38940 - }, - { - "epoch": 20.308213820078226, - "grad_norm": 1.4388914108276367, - "learning_rate": 6.137989949748744e-05, - "loss": 5.4656, - "step": 38941 - }, - { - "epoch": 20.308735332464146, - "grad_norm": 1.5518642663955688, - "learning_rate": 6.137889447236181e-05, - "loss": 5.2271, - "step": 38942 - }, - { - "epoch": 20.309256844850065, - "grad_norm": 1.513202428817749, - "learning_rate": 6.137788944723618e-05, - "loss": 5.3544, - "step": 38943 - }, - { - "epoch": 20.309778357235984, - "grad_norm": 1.5441851615905762, - "learning_rate": 6.137688442211056e-05, - "loss": 5.1668, - "step": 38944 - }, - { - "epoch": 20.310299869621904, - "grad_norm": 1.4754576683044434, - "learning_rate": 6.137587939698492e-05, - "loss": 5.1113, - "step": 38945 - }, - { - "epoch": 20.310821382007823, - "grad_norm": 1.6175622940063477, - "learning_rate": 6.13748743718593e-05, - "loss": 5.3332, - "step": 38946 - }, - { - "epoch": 20.311342894393743, - "grad_norm": 1.5251471996307373, - "learning_rate": 6.137386934673368e-05, - "loss": 5.1744, - "step": 38947 - }, - { - "epoch": 20.311864406779662, - "grad_norm": 1.5198582410812378, - "learning_rate": 6.137286432160805e-05, - "loss": 5.5547, - "step": 38948 - }, - { - "epoch": 20.312385919165582, - "grad_norm": 1.625289797782898, - "learning_rate": 6.137185929648242e-05, - "loss": 5.0455, - "step": 38949 - }, - { - "epoch": 20.312907431551498, - "grad_norm": 1.5139297246932983, - "learning_rate": 6.137085427135678e-05, - "loss": 5.0525, - "step": 38950 - }, - { - "epoch": 20.313428943937417, - "grad_norm": 1.5706355571746826, - "learning_rate": 6.136984924623116e-05, - "loss": 5.4745, - "step": 38951 - }, - { - "epoch": 20.313950456323337, - "grad_norm": 1.5285108089447021, - "learning_rate": 6.136884422110552e-05, - "loss": 5.5993, - "step": 38952 - }, - { - "epoch": 20.314471968709256, - "grad_norm": 1.6407617330551147, - "learning_rate": 6.13678391959799e-05, - "loss": 4.7721, - "step": 38953 - }, - { - "epoch": 20.314993481095176, - "grad_norm": 1.5688549280166626, - "learning_rate": 6.136683417085427e-05, - "loss": 5.4783, - "step": 38954 - }, - { - "epoch": 20.315514993481095, - "grad_norm": 1.4947811365127563, - "learning_rate": 6.136582914572864e-05, - "loss": 5.6479, - "step": 38955 - }, - { - "epoch": 20.316036505867014, - "grad_norm": 1.586327075958252, - "learning_rate": 6.136482412060301e-05, - "loss": 4.9663, - "step": 38956 - }, - { - "epoch": 20.316558018252934, - "grad_norm": 1.5007834434509277, - "learning_rate": 6.136381909547739e-05, - "loss": 4.9901, - "step": 38957 - }, - { - "epoch": 20.317079530638853, - "grad_norm": 1.7143219709396362, - "learning_rate": 6.136281407035176e-05, - "loss": 5.022, - "step": 38958 - }, - { - "epoch": 20.317601043024773, - "grad_norm": 1.592516303062439, - "learning_rate": 6.136180904522614e-05, - "loss": 5.1819, - "step": 38959 - }, - { - "epoch": 20.318122555410692, - "grad_norm": 1.5869141817092896, - "learning_rate": 6.13608040201005e-05, - "loss": 5.4718, - "step": 38960 - }, - { - "epoch": 20.318644067796612, - "grad_norm": 1.4803764820098877, - "learning_rate": 6.135979899497488e-05, - "loss": 5.2283, - "step": 38961 - }, - { - "epoch": 20.319165580182528, - "grad_norm": 1.473483920097351, - "learning_rate": 6.135879396984925e-05, - "loss": 5.6238, - "step": 38962 - }, - { - "epoch": 20.319687092568447, - "grad_norm": 1.499606728553772, - "learning_rate": 6.135778894472363e-05, - "loss": 5.5183, - "step": 38963 - }, - { - "epoch": 20.320208604954367, - "grad_norm": 1.530145287513733, - "learning_rate": 6.135678391959799e-05, - "loss": 5.3769, - "step": 38964 - }, - { - "epoch": 20.320730117340286, - "grad_norm": 1.500069499015808, - "learning_rate": 6.135577889447236e-05, - "loss": 4.775, - "step": 38965 - }, - { - "epoch": 20.321251629726206, - "grad_norm": 1.3895155191421509, - "learning_rate": 6.135477386934673e-05, - "loss": 5.7833, - "step": 38966 - }, - { - "epoch": 20.321773142112125, - "grad_norm": 1.4355623722076416, - "learning_rate": 6.135376884422111e-05, - "loss": 5.4409, - "step": 38967 - }, - { - "epoch": 20.322294654498045, - "grad_norm": 1.5078169107437134, - "learning_rate": 6.135276381909549e-05, - "loss": 4.9519, - "step": 38968 - }, - { - "epoch": 20.322816166883964, - "grad_norm": 1.5116007328033447, - "learning_rate": 6.135175879396985e-05, - "loss": 5.4823, - "step": 38969 - }, - { - "epoch": 20.323337679269883, - "grad_norm": 1.439882516860962, - "learning_rate": 6.135075376884423e-05, - "loss": 5.598, - "step": 38970 - }, - { - "epoch": 20.323859191655803, - "grad_norm": 1.6008241176605225, - "learning_rate": 6.13497487437186e-05, - "loss": 5.4548, - "step": 38971 - }, - { - "epoch": 20.324380704041722, - "grad_norm": 1.4671337604522705, - "learning_rate": 6.134874371859297e-05, - "loss": 5.586, - "step": 38972 - }, - { - "epoch": 20.324902216427642, - "grad_norm": 1.577132225036621, - "learning_rate": 6.134773869346734e-05, - "loss": 4.9266, - "step": 38973 - }, - { - "epoch": 20.325423728813558, - "grad_norm": 1.432394027709961, - "learning_rate": 6.134673366834171e-05, - "loss": 5.1645, - "step": 38974 - }, - { - "epoch": 20.325945241199477, - "grad_norm": 1.6237038373947144, - "learning_rate": 6.134572864321608e-05, - "loss": 5.1524, - "step": 38975 - }, - { - "epoch": 20.326466753585397, - "grad_norm": 1.5865139961242676, - "learning_rate": 6.134472361809046e-05, - "loss": 5.1184, - "step": 38976 - }, - { - "epoch": 20.326988265971316, - "grad_norm": 1.4645291566848755, - "learning_rate": 6.134371859296482e-05, - "loss": 5.42, - "step": 38977 - }, - { - "epoch": 20.327509778357236, - "grad_norm": 1.4829438924789429, - "learning_rate": 6.13427135678392e-05, - "loss": 5.2008, - "step": 38978 - }, - { - "epoch": 20.328031290743155, - "grad_norm": 1.441131591796875, - "learning_rate": 6.134170854271358e-05, - "loss": 5.635, - "step": 38979 - }, - { - "epoch": 20.328552803129075, - "grad_norm": 1.4779366254806519, - "learning_rate": 6.134070351758794e-05, - "loss": 5.3405, - "step": 38980 - }, - { - "epoch": 20.329074315514994, - "grad_norm": 1.4987767934799194, - "learning_rate": 6.133969849246232e-05, - "loss": 5.0317, - "step": 38981 - }, - { - "epoch": 20.329595827900913, - "grad_norm": 1.5270270109176636, - "learning_rate": 6.133869346733668e-05, - "loss": 5.2158, - "step": 38982 - }, - { - "epoch": 20.330117340286833, - "grad_norm": 1.648970365524292, - "learning_rate": 6.133768844221106e-05, - "loss": 4.6947, - "step": 38983 - }, - { - "epoch": 20.330638852672752, - "grad_norm": 1.6118874549865723, - "learning_rate": 6.133668341708543e-05, - "loss": 5.3233, - "step": 38984 - }, - { - "epoch": 20.331160365058672, - "grad_norm": 1.5482470989227295, - "learning_rate": 6.13356783919598e-05, - "loss": 5.2165, - "step": 38985 - }, - { - "epoch": 20.331681877444588, - "grad_norm": 1.5853029489517212, - "learning_rate": 6.133467336683417e-05, - "loss": 4.8628, - "step": 38986 - }, - { - "epoch": 20.332203389830507, - "grad_norm": 1.5033226013183594, - "learning_rate": 6.133366834170855e-05, - "loss": 5.3854, - "step": 38987 - }, - { - "epoch": 20.332724902216427, - "grad_norm": 1.6188560724258423, - "learning_rate": 6.133266331658292e-05, - "loss": 5.2042, - "step": 38988 - }, - { - "epoch": 20.333246414602346, - "grad_norm": 1.6049602031707764, - "learning_rate": 6.13316582914573e-05, - "loss": 4.5603, - "step": 38989 - }, - { - "epoch": 20.333767926988266, - "grad_norm": 1.6667308807373047, - "learning_rate": 6.133065326633167e-05, - "loss": 5.0909, - "step": 38990 - }, - { - "epoch": 20.334289439374185, - "grad_norm": 1.532781958580017, - "learning_rate": 6.132964824120603e-05, - "loss": 5.115, - "step": 38991 - }, - { - "epoch": 20.334810951760105, - "grad_norm": 1.5039350986480713, - "learning_rate": 6.132864321608041e-05, - "loss": 5.416, - "step": 38992 - }, - { - "epoch": 20.335332464146024, - "grad_norm": 1.4996626377105713, - "learning_rate": 6.132763819095477e-05, - "loss": 5.6543, - "step": 38993 - }, - { - "epoch": 20.335853976531943, - "grad_norm": 1.4587947130203247, - "learning_rate": 6.132663316582915e-05, - "loss": 5.528, - "step": 38994 - }, - { - "epoch": 20.336375488917863, - "grad_norm": 1.6142001152038574, - "learning_rate": 6.132562814070351e-05, - "loss": 5.4709, - "step": 38995 - }, - { - "epoch": 20.336897001303782, - "grad_norm": 1.4943121671676636, - "learning_rate": 6.132462311557789e-05, - "loss": 5.0228, - "step": 38996 - }, - { - "epoch": 20.3374185136897, - "grad_norm": 1.60222589969635, - "learning_rate": 6.132361809045226e-05, - "loss": 5.3925, - "step": 38997 - }, - { - "epoch": 20.337940026075618, - "grad_norm": 1.4420368671417236, - "learning_rate": 6.132261306532663e-05, - "loss": 5.776, - "step": 38998 - }, - { - "epoch": 20.338461538461537, - "grad_norm": 1.5439374446868896, - "learning_rate": 6.132160804020101e-05, - "loss": 4.8956, - "step": 38999 - }, - { - "epoch": 20.338983050847457, - "grad_norm": 1.4922178983688354, - "learning_rate": 6.132060301507539e-05, - "loss": 5.2825, - "step": 39000 - }, - { - "epoch": 20.339504563233376, - "grad_norm": 1.4057302474975586, - "learning_rate": 6.131959798994975e-05, - "loss": 5.6373, - "step": 39001 - }, - { - "epoch": 20.340026075619296, - "grad_norm": 1.4534698724746704, - "learning_rate": 6.131859296482413e-05, - "loss": 5.5259, - "step": 39002 - }, - { - "epoch": 20.340547588005215, - "grad_norm": 1.5071537494659424, - "learning_rate": 6.13175879396985e-05, - "loss": 4.3066, - "step": 39003 - }, - { - "epoch": 20.341069100391135, - "grad_norm": 1.4628397226333618, - "learning_rate": 6.131658291457286e-05, - "loss": 5.1649, - "step": 39004 - }, - { - "epoch": 20.341590612777054, - "grad_norm": 1.5064796209335327, - "learning_rate": 6.131557788944724e-05, - "loss": 5.5567, - "step": 39005 - }, - { - "epoch": 20.342112125162974, - "grad_norm": 1.5753031969070435, - "learning_rate": 6.13145728643216e-05, - "loss": 5.135, - "step": 39006 - }, - { - "epoch": 20.342633637548893, - "grad_norm": 1.6645632982254028, - "learning_rate": 6.131356783919598e-05, - "loss": 4.3897, - "step": 39007 - }, - { - "epoch": 20.343155149934812, - "grad_norm": 1.5725023746490479, - "learning_rate": 6.131256281407036e-05, - "loss": 4.7589, - "step": 39008 - }, - { - "epoch": 20.343676662320732, - "grad_norm": 1.5553046464920044, - "learning_rate": 6.131155778894474e-05, - "loss": 4.8187, - "step": 39009 - }, - { - "epoch": 20.344198174706648, - "grad_norm": 1.551418662071228, - "learning_rate": 6.13105527638191e-05, - "loss": 5.4027, - "step": 39010 - }, - { - "epoch": 20.344719687092567, - "grad_norm": 1.5608811378479004, - "learning_rate": 6.130954773869348e-05, - "loss": 5.6389, - "step": 39011 - }, - { - "epoch": 20.345241199478487, - "grad_norm": 1.6240736246109009, - "learning_rate": 6.130854271356784e-05, - "loss": 5.0011, - "step": 39012 - }, - { - "epoch": 20.345762711864406, - "grad_norm": 1.516531229019165, - "learning_rate": 6.130753768844222e-05, - "loss": 5.3784, - "step": 39013 - }, - { - "epoch": 20.346284224250326, - "grad_norm": 1.5389937162399292, - "learning_rate": 6.130653266331658e-05, - "loss": 5.3206, - "step": 39014 - }, - { - "epoch": 20.346805736636245, - "grad_norm": 1.5237147808074951, - "learning_rate": 6.130552763819096e-05, - "loss": 5.5844, - "step": 39015 - }, - { - "epoch": 20.347327249022165, - "grad_norm": 1.4138357639312744, - "learning_rate": 6.130452261306533e-05, - "loss": 5.4417, - "step": 39016 - }, - { - "epoch": 20.347848761408084, - "grad_norm": 1.6032546758651733, - "learning_rate": 6.130351758793969e-05, - "loss": 5.1158, - "step": 39017 - }, - { - "epoch": 20.348370273794004, - "grad_norm": 1.590702772140503, - "learning_rate": 6.130251256281407e-05, - "loss": 4.1346, - "step": 39018 - }, - { - "epoch": 20.348891786179923, - "grad_norm": 1.5176955461502075, - "learning_rate": 6.130150753768845e-05, - "loss": 5.7603, - "step": 39019 - }, - { - "epoch": 20.349413298565842, - "grad_norm": 1.535244345664978, - "learning_rate": 6.130050251256282e-05, - "loss": 4.7776, - "step": 39020 - }, - { - "epoch": 20.34993481095176, - "grad_norm": 1.6550427675247192, - "learning_rate": 6.129949748743719e-05, - "loss": 5.3546, - "step": 39021 - }, - { - "epoch": 20.350456323337678, - "grad_norm": 1.5235251188278198, - "learning_rate": 6.129849246231157e-05, - "loss": 5.1901, - "step": 39022 - }, - { - "epoch": 20.350977835723597, - "grad_norm": 1.5626846551895142, - "learning_rate": 6.129748743718593e-05, - "loss": 5.0505, - "step": 39023 - }, - { - "epoch": 20.351499348109517, - "grad_norm": 1.5114418268203735, - "learning_rate": 6.129648241206031e-05, - "loss": 4.9701, - "step": 39024 - }, - { - "epoch": 20.352020860495436, - "grad_norm": 1.4964227676391602, - "learning_rate": 6.129547738693467e-05, - "loss": 5.4275, - "step": 39025 - }, - { - "epoch": 20.352542372881356, - "grad_norm": 1.5470404624938965, - "learning_rate": 6.129447236180905e-05, - "loss": 4.594, - "step": 39026 - }, - { - "epoch": 20.353063885267275, - "grad_norm": 1.5921827554702759, - "learning_rate": 6.129346733668341e-05, - "loss": 5.1422, - "step": 39027 - }, - { - "epoch": 20.353585397653195, - "grad_norm": 1.5512826442718506, - "learning_rate": 6.129246231155779e-05, - "loss": 5.1524, - "step": 39028 - }, - { - "epoch": 20.354106910039114, - "grad_norm": 1.3832217454910278, - "learning_rate": 6.129145728643217e-05, - "loss": 5.5142, - "step": 39029 - }, - { - "epoch": 20.354628422425034, - "grad_norm": 1.5491729974746704, - "learning_rate": 6.129045226130653e-05, - "loss": 5.1783, - "step": 39030 - }, - { - "epoch": 20.355149934810953, - "grad_norm": 1.5985569953918457, - "learning_rate": 6.128944723618091e-05, - "loss": 4.7055, - "step": 39031 - }, - { - "epoch": 20.355671447196872, - "grad_norm": 1.5520789623260498, - "learning_rate": 6.128844221105528e-05, - "loss": 5.1855, - "step": 39032 - }, - { - "epoch": 20.35619295958279, - "grad_norm": 1.5621240139007568, - "learning_rate": 6.128743718592965e-05, - "loss": 5.2747, - "step": 39033 - }, - { - "epoch": 20.356714471968708, - "grad_norm": 1.4997938871383667, - "learning_rate": 6.128643216080402e-05, - "loss": 5.4516, - "step": 39034 - }, - { - "epoch": 20.357235984354627, - "grad_norm": 1.6321650743484497, - "learning_rate": 6.12854271356784e-05, - "loss": 5.2169, - "step": 39035 - }, - { - "epoch": 20.357757496740547, - "grad_norm": 1.5056384801864624, - "learning_rate": 6.128442211055276e-05, - "loss": 5.3889, - "step": 39036 - }, - { - "epoch": 20.358279009126466, - "grad_norm": 1.5455660820007324, - "learning_rate": 6.128341708542714e-05, - "loss": 5.4873, - "step": 39037 - }, - { - "epoch": 20.358800521512386, - "grad_norm": 1.4223275184631348, - "learning_rate": 6.12824120603015e-05, - "loss": 5.2598, - "step": 39038 - }, - { - "epoch": 20.359322033898305, - "grad_norm": 1.7271310091018677, - "learning_rate": 6.128140703517588e-05, - "loss": 4.7592, - "step": 39039 - }, - { - "epoch": 20.359843546284225, - "grad_norm": 1.3792104721069336, - "learning_rate": 6.128040201005026e-05, - "loss": 5.232, - "step": 39040 - }, - { - "epoch": 20.360365058670144, - "grad_norm": 1.6100587844848633, - "learning_rate": 6.127939698492464e-05, - "loss": 5.3511, - "step": 39041 - }, - { - "epoch": 20.360886571056064, - "grad_norm": 1.6421093940734863, - "learning_rate": 6.1278391959799e-05, - "loss": 5.2991, - "step": 39042 - }, - { - "epoch": 20.361408083441983, - "grad_norm": 1.5531854629516602, - "learning_rate": 6.127738693467336e-05, - "loss": 4.9846, - "step": 39043 - }, - { - "epoch": 20.361929595827903, - "grad_norm": 1.509024739265442, - "learning_rate": 6.127638190954774e-05, - "loss": 5.3293, - "step": 39044 - }, - { - "epoch": 20.36245110821382, - "grad_norm": 1.599167823791504, - "learning_rate": 6.127537688442211e-05, - "loss": 5.2123, - "step": 39045 - }, - { - "epoch": 20.362972620599738, - "grad_norm": 1.4890978336334229, - "learning_rate": 6.127437185929648e-05, - "loss": 5.4733, - "step": 39046 - }, - { - "epoch": 20.363494132985657, - "grad_norm": 1.5485782623291016, - "learning_rate": 6.127336683417085e-05, - "loss": 5.1353, - "step": 39047 - }, - { - "epoch": 20.364015645371577, - "grad_norm": 1.521567940711975, - "learning_rate": 6.127236180904523e-05, - "loss": 5.0983, - "step": 39048 - }, - { - "epoch": 20.364537157757496, - "grad_norm": 1.465916633605957, - "learning_rate": 6.127135678391959e-05, - "loss": 5.4984, - "step": 39049 - }, - { - "epoch": 20.365058670143416, - "grad_norm": 1.5336171388626099, - "learning_rate": 6.127035175879397e-05, - "loss": 5.3191, - "step": 39050 - }, - { - "epoch": 20.365580182529335, - "grad_norm": 1.538618564605713, - "learning_rate": 6.126934673366835e-05, - "loss": 5.3631, - "step": 39051 - }, - { - "epoch": 20.366101694915255, - "grad_norm": 1.5858047008514404, - "learning_rate": 6.126834170854272e-05, - "loss": 4.9975, - "step": 39052 - }, - { - "epoch": 20.366623207301174, - "grad_norm": 1.5527147054672241, - "learning_rate": 6.126733668341709e-05, - "loss": 5.5239, - "step": 39053 - }, - { - "epoch": 20.367144719687094, - "grad_norm": 1.5079368352890015, - "learning_rate": 6.126633165829147e-05, - "loss": 5.5225, - "step": 39054 - }, - { - "epoch": 20.367666232073013, - "grad_norm": 1.5120962858200073, - "learning_rate": 6.126532663316583e-05, - "loss": 5.3256, - "step": 39055 - }, - { - "epoch": 20.368187744458933, - "grad_norm": 1.5087090730667114, - "learning_rate": 6.126432160804021e-05, - "loss": 5.3695, - "step": 39056 - }, - { - "epoch": 20.36870925684485, - "grad_norm": 1.5351253747940063, - "learning_rate": 6.126331658291457e-05, - "loss": 5.2667, - "step": 39057 - }, - { - "epoch": 20.369230769230768, - "grad_norm": 1.5603647232055664, - "learning_rate": 6.126231155778894e-05, - "loss": 5.12, - "step": 39058 - }, - { - "epoch": 20.369752281616687, - "grad_norm": 1.6095304489135742, - "learning_rate": 6.126130653266332e-05, - "loss": 4.5243, - "step": 39059 - }, - { - "epoch": 20.370273794002607, - "grad_norm": 1.4848781824111938, - "learning_rate": 6.126030150753769e-05, - "loss": 5.1777, - "step": 39060 - }, - { - "epoch": 20.370795306388526, - "grad_norm": 1.540753960609436, - "learning_rate": 6.125929648241207e-05, - "loss": 4.9288, - "step": 39061 - }, - { - "epoch": 20.371316818774446, - "grad_norm": 1.5512019395828247, - "learning_rate": 6.125829145728644e-05, - "loss": 4.8188, - "step": 39062 - }, - { - "epoch": 20.371838331160365, - "grad_norm": 1.5257257223129272, - "learning_rate": 6.125728643216081e-05, - "loss": 5.4349, - "step": 39063 - }, - { - "epoch": 20.372359843546285, - "grad_norm": 1.6056439876556396, - "learning_rate": 6.125628140703518e-05, - "loss": 5.3893, - "step": 39064 - }, - { - "epoch": 20.372881355932204, - "grad_norm": 1.5176252126693726, - "learning_rate": 6.125527638190956e-05, - "loss": 4.8913, - "step": 39065 - }, - { - "epoch": 20.373402868318124, - "grad_norm": 1.5902800559997559, - "learning_rate": 6.125427135678392e-05, - "loss": 5.3132, - "step": 39066 - }, - { - "epoch": 20.373924380704043, - "grad_norm": 1.456115484237671, - "learning_rate": 6.12532663316583e-05, - "loss": 5.5958, - "step": 39067 - }, - { - "epoch": 20.374445893089963, - "grad_norm": 1.4676082134246826, - "learning_rate": 6.125226130653266e-05, - "loss": 5.7208, - "step": 39068 - }, - { - "epoch": 20.37496740547588, - "grad_norm": 1.3680689334869385, - "learning_rate": 6.125125628140704e-05, - "loss": 4.8922, - "step": 39069 - }, - { - "epoch": 20.375488917861798, - "grad_norm": 1.550582766532898, - "learning_rate": 6.12502512562814e-05, - "loss": 4.938, - "step": 39070 - }, - { - "epoch": 20.376010430247717, - "grad_norm": 1.6184269189834595, - "learning_rate": 6.124924623115578e-05, - "loss": 5.2703, - "step": 39071 - }, - { - "epoch": 20.376531942633637, - "grad_norm": 1.5899096727371216, - "learning_rate": 6.124824120603016e-05, - "loss": 5.0748, - "step": 39072 - }, - { - "epoch": 20.377053455019556, - "grad_norm": 1.558464765548706, - "learning_rate": 6.124723618090452e-05, - "loss": 5.3616, - "step": 39073 - }, - { - "epoch": 20.377574967405476, - "grad_norm": 1.5973069667816162, - "learning_rate": 6.12462311557789e-05, - "loss": 4.4712, - "step": 39074 - }, - { - "epoch": 20.378096479791395, - "grad_norm": 1.5174379348754883, - "learning_rate": 6.124522613065327e-05, - "loss": 5.6013, - "step": 39075 - }, - { - "epoch": 20.378617992177315, - "grad_norm": 1.587668538093567, - "learning_rate": 6.124422110552764e-05, - "loss": 5.0132, - "step": 39076 - }, - { - "epoch": 20.379139504563234, - "grad_norm": 1.4951015710830688, - "learning_rate": 6.124321608040201e-05, - "loss": 5.1294, - "step": 39077 - }, - { - "epoch": 20.379661016949154, - "grad_norm": 1.501212239265442, - "learning_rate": 6.124221105527639e-05, - "loss": 5.2672, - "step": 39078 - }, - { - "epoch": 20.380182529335073, - "grad_norm": 1.4350157976150513, - "learning_rate": 6.124120603015075e-05, - "loss": 5.1364, - "step": 39079 - }, - { - "epoch": 20.380704041720993, - "grad_norm": 1.4404677152633667, - "learning_rate": 6.124020100502513e-05, - "loss": 5.6768, - "step": 39080 - }, - { - "epoch": 20.38122555410691, - "grad_norm": 1.4691946506500244, - "learning_rate": 6.12391959798995e-05, - "loss": 5.258, - "step": 39081 - }, - { - "epoch": 20.381747066492828, - "grad_norm": 1.592268943786621, - "learning_rate": 6.123819095477388e-05, - "loss": 5.3727, - "step": 39082 - }, - { - "epoch": 20.382268578878747, - "grad_norm": 1.5507586002349854, - "learning_rate": 6.123718592964825e-05, - "loss": 4.9015, - "step": 39083 - }, - { - "epoch": 20.382790091264667, - "grad_norm": 1.4289288520812988, - "learning_rate": 6.123618090452261e-05, - "loss": 5.2646, - "step": 39084 - }, - { - "epoch": 20.383311603650586, - "grad_norm": 1.566942811012268, - "learning_rate": 6.123517587939699e-05, - "loss": 5.1315, - "step": 39085 - }, - { - "epoch": 20.383833116036506, - "grad_norm": 1.4899202585220337, - "learning_rate": 6.123417085427135e-05, - "loss": 5.5673, - "step": 39086 - }, - { - "epoch": 20.384354628422425, - "grad_norm": 1.497159481048584, - "learning_rate": 6.123316582914573e-05, - "loss": 5.3705, - "step": 39087 - }, - { - "epoch": 20.384876140808345, - "grad_norm": 1.5891838073730469, - "learning_rate": 6.12321608040201e-05, - "loss": 5.0816, - "step": 39088 - }, - { - "epoch": 20.385397653194264, - "grad_norm": 1.6166093349456787, - "learning_rate": 6.123115577889447e-05, - "loss": 4.8056, - "step": 39089 - }, - { - "epoch": 20.385919165580184, - "grad_norm": 1.5258103609085083, - "learning_rate": 6.123015075376884e-05, - "loss": 4.975, - "step": 39090 - }, - { - "epoch": 20.386440677966103, - "grad_norm": 1.5827770233154297, - "learning_rate": 6.122914572864322e-05, - "loss": 5.3785, - "step": 39091 - }, - { - "epoch": 20.386962190352023, - "grad_norm": 1.5706799030303955, - "learning_rate": 6.12281407035176e-05, - "loss": 5.2484, - "step": 39092 - }, - { - "epoch": 20.38748370273794, - "grad_norm": 1.4847716093063354, - "learning_rate": 6.122713567839197e-05, - "loss": 5.3095, - "step": 39093 - }, - { - "epoch": 20.388005215123858, - "grad_norm": 1.5119279623031616, - "learning_rate": 6.122613065326634e-05, - "loss": 5.6387, - "step": 39094 - }, - { - "epoch": 20.388526727509777, - "grad_norm": 1.545487642288208, - "learning_rate": 6.122512562814071e-05, - "loss": 5.599, - "step": 39095 - }, - { - "epoch": 20.389048239895697, - "grad_norm": 1.476743221282959, - "learning_rate": 6.122412060301508e-05, - "loss": 5.6704, - "step": 39096 - }, - { - "epoch": 20.389569752281616, - "grad_norm": 1.4909508228302002, - "learning_rate": 6.122311557788944e-05, - "loss": 5.2748, - "step": 39097 - }, - { - "epoch": 20.390091264667536, - "grad_norm": 1.4329723119735718, - "learning_rate": 6.122211055276382e-05, - "loss": 5.4032, - "step": 39098 - }, - { - "epoch": 20.390612777053455, - "grad_norm": 1.5163997411727905, - "learning_rate": 6.122110552763818e-05, - "loss": 5.1672, - "step": 39099 - }, - { - "epoch": 20.391134289439375, - "grad_norm": 1.5240025520324707, - "learning_rate": 6.122010050251256e-05, - "loss": 5.441, - "step": 39100 - }, - { - "epoch": 20.391655801825294, - "grad_norm": 1.5419052839279175, - "learning_rate": 6.121909547738694e-05, - "loss": 5.3711, - "step": 39101 - }, - { - "epoch": 20.392177314211214, - "grad_norm": 1.4940335750579834, - "learning_rate": 6.121809045226132e-05, - "loss": 5.3362, - "step": 39102 - }, - { - "epoch": 20.392698826597133, - "grad_norm": 1.5756571292877197, - "learning_rate": 6.121708542713568e-05, - "loss": 4.5446, - "step": 39103 - }, - { - "epoch": 20.39322033898305, - "grad_norm": 1.5270276069641113, - "learning_rate": 6.121608040201006e-05, - "loss": 5.0847, - "step": 39104 - }, - { - "epoch": 20.39374185136897, - "grad_norm": 1.548668384552002, - "learning_rate": 6.121507537688442e-05, - "loss": 5.3695, - "step": 39105 - }, - { - "epoch": 20.394263363754888, - "grad_norm": 1.4212546348571777, - "learning_rate": 6.12140703517588e-05, - "loss": 5.3153, - "step": 39106 - }, - { - "epoch": 20.394784876140807, - "grad_norm": 1.5117441415786743, - "learning_rate": 6.121306532663317e-05, - "loss": 5.4751, - "step": 39107 - }, - { - "epoch": 20.395306388526727, - "grad_norm": 1.365805983543396, - "learning_rate": 6.121206030150754e-05, - "loss": 5.8326, - "step": 39108 - }, - { - "epoch": 20.395827900912646, - "grad_norm": 1.5644761323928833, - "learning_rate": 6.121105527638191e-05, - "loss": 4.9445, - "step": 39109 - }, - { - "epoch": 20.396349413298566, - "grad_norm": 1.5726583003997803, - "learning_rate": 6.121005025125627e-05, - "loss": 5.2832, - "step": 39110 - }, - { - "epoch": 20.396870925684485, - "grad_norm": 1.4119763374328613, - "learning_rate": 6.120904522613065e-05, - "loss": 5.6537, - "step": 39111 - }, - { - "epoch": 20.397392438070405, - "grad_norm": 1.4441804885864258, - "learning_rate": 6.120804020100503e-05, - "loss": 5.4232, - "step": 39112 - }, - { - "epoch": 20.397913950456324, - "grad_norm": 1.5257585048675537, - "learning_rate": 6.12070351758794e-05, - "loss": 4.9246, - "step": 39113 - }, - { - "epoch": 20.398435462842244, - "grad_norm": 1.6661722660064697, - "learning_rate": 6.120603015075377e-05, - "loss": 5.1367, - "step": 39114 - }, - { - "epoch": 20.398956975228163, - "grad_norm": 1.5135685205459595, - "learning_rate": 6.120502512562815e-05, - "loss": 5.0041, - "step": 39115 - }, - { - "epoch": 20.39947848761408, - "grad_norm": 1.5521724224090576, - "learning_rate": 6.120402010050251e-05, - "loss": 5.6358, - "step": 39116 - }, - { - "epoch": 20.4, - "grad_norm": 1.4465432167053223, - "learning_rate": 6.120301507537689e-05, - "loss": 5.8501, - "step": 39117 - }, - { - "epoch": 20.400521512385918, - "grad_norm": 1.5845431089401245, - "learning_rate": 6.120201005025125e-05, - "loss": 5.306, - "step": 39118 - }, - { - "epoch": 20.401043024771838, - "grad_norm": 1.5388000011444092, - "learning_rate": 6.120100502512563e-05, - "loss": 5.2636, - "step": 39119 - }, - { - "epoch": 20.401564537157757, - "grad_norm": 1.568698763847351, - "learning_rate": 6.12e-05, - "loss": 5.0672, - "step": 39120 - }, - { - "epoch": 20.402086049543676, - "grad_norm": 1.642250418663025, - "learning_rate": 6.119899497487437e-05, - "loss": 5.0477, - "step": 39121 - }, - { - "epoch": 20.402607561929596, - "grad_norm": 1.4970066547393799, - "learning_rate": 6.119798994974875e-05, - "loss": 5.7093, - "step": 39122 - }, - { - "epoch": 20.403129074315515, - "grad_norm": 1.4126724004745483, - "learning_rate": 6.119698492462312e-05, - "loss": 5.501, - "step": 39123 - }, - { - "epoch": 20.403650586701435, - "grad_norm": 1.59050714969635, - "learning_rate": 6.11959798994975e-05, - "loss": 4.9626, - "step": 39124 - }, - { - "epoch": 20.404172099087354, - "grad_norm": 1.5347026586532593, - "learning_rate": 6.119497487437186e-05, - "loss": 5.2787, - "step": 39125 - }, - { - "epoch": 20.404693611473274, - "grad_norm": 1.6729317903518677, - "learning_rate": 6.119396984924624e-05, - "loss": 5.2993, - "step": 39126 - }, - { - "epoch": 20.405215123859193, - "grad_norm": 1.5412582159042358, - "learning_rate": 6.11929648241206e-05, - "loss": 5.4915, - "step": 39127 - }, - { - "epoch": 20.40573663624511, - "grad_norm": 1.4775314331054688, - "learning_rate": 6.119195979899498e-05, - "loss": 5.1497, - "step": 39128 - }, - { - "epoch": 20.40625814863103, - "grad_norm": 1.5748220682144165, - "learning_rate": 6.119095477386934e-05, - "loss": 5.094, - "step": 39129 - }, - { - "epoch": 20.406779661016948, - "grad_norm": 1.4944684505462646, - "learning_rate": 6.118994974874372e-05, - "loss": 5.6307, - "step": 39130 - }, - { - "epoch": 20.407301173402868, - "grad_norm": 1.4201107025146484, - "learning_rate": 6.118894472361809e-05, - "loss": 5.7429, - "step": 39131 - }, - { - "epoch": 20.407822685788787, - "grad_norm": 1.566638469696045, - "learning_rate": 6.118793969849246e-05, - "loss": 5.1446, - "step": 39132 - }, - { - "epoch": 20.408344198174706, - "grad_norm": 1.4989084005355835, - "learning_rate": 6.118693467336684e-05, - "loss": 5.0668, - "step": 39133 - }, - { - "epoch": 20.408865710560626, - "grad_norm": 1.566967248916626, - "learning_rate": 6.118592964824122e-05, - "loss": 5.4076, - "step": 39134 - }, - { - "epoch": 20.409387222946545, - "grad_norm": 1.6560512781143188, - "learning_rate": 6.118492462311558e-05, - "loss": 4.9024, - "step": 39135 - }, - { - "epoch": 20.409908735332465, - "grad_norm": 1.5748809576034546, - "learning_rate": 6.118391959798996e-05, - "loss": 5.2771, - "step": 39136 - }, - { - "epoch": 20.410430247718384, - "grad_norm": 1.5949653387069702, - "learning_rate": 6.118291457286433e-05, - "loss": 5.4099, - "step": 39137 - }, - { - "epoch": 20.410951760104304, - "grad_norm": 1.6597089767456055, - "learning_rate": 6.118190954773869e-05, - "loss": 5.3435, - "step": 39138 - }, - { - "epoch": 20.411473272490223, - "grad_norm": 1.569689154624939, - "learning_rate": 6.118090452261307e-05, - "loss": 5.3263, - "step": 39139 - }, - { - "epoch": 20.41199478487614, - "grad_norm": 1.5590767860412598, - "learning_rate": 6.117989949748743e-05, - "loss": 5.3222, - "step": 39140 - }, - { - "epoch": 20.41251629726206, - "grad_norm": 1.573208212852478, - "learning_rate": 6.117889447236181e-05, - "loss": 5.2463, - "step": 39141 - }, - { - "epoch": 20.413037809647978, - "grad_norm": 1.6863559484481812, - "learning_rate": 6.117788944723619e-05, - "loss": 5.4989, - "step": 39142 - }, - { - "epoch": 20.413559322033898, - "grad_norm": 1.6088703870773315, - "learning_rate": 6.117688442211056e-05, - "loss": 5.3232, - "step": 39143 - }, - { - "epoch": 20.414080834419817, - "grad_norm": 1.528210997581482, - "learning_rate": 6.117587939698493e-05, - "loss": 5.6033, - "step": 39144 - }, - { - "epoch": 20.414602346805736, - "grad_norm": 1.6275591850280762, - "learning_rate": 6.117487437185931e-05, - "loss": 5.844, - "step": 39145 - }, - { - "epoch": 20.415123859191656, - "grad_norm": 1.556462287902832, - "learning_rate": 6.117386934673367e-05, - "loss": 4.6781, - "step": 39146 - }, - { - "epoch": 20.415645371577575, - "grad_norm": 1.5109871625900269, - "learning_rate": 6.117286432160805e-05, - "loss": 5.4118, - "step": 39147 - }, - { - "epoch": 20.416166883963495, - "grad_norm": 1.5809930562973022, - "learning_rate": 6.117185929648241e-05, - "loss": 4.8862, - "step": 39148 - }, - { - "epoch": 20.416688396349414, - "grad_norm": 1.5480096340179443, - "learning_rate": 6.117085427135679e-05, - "loss": 5.2902, - "step": 39149 - }, - { - "epoch": 20.417209908735334, - "grad_norm": 1.5376540422439575, - "learning_rate": 6.116984924623116e-05, - "loss": 5.4216, - "step": 39150 - }, - { - "epoch": 20.417731421121253, - "grad_norm": 1.6855875253677368, - "learning_rate": 6.116884422110552e-05, - "loss": 4.5425, - "step": 39151 - }, - { - "epoch": 20.41825293350717, - "grad_norm": 1.6210318803787231, - "learning_rate": 6.11678391959799e-05, - "loss": 5.0848, - "step": 39152 - }, - { - "epoch": 20.41877444589309, - "grad_norm": 1.6392858028411865, - "learning_rate": 6.116683417085428e-05, - "loss": 4.9807, - "step": 39153 - }, - { - "epoch": 20.419295958279008, - "grad_norm": 1.653232455253601, - "learning_rate": 6.116582914572865e-05, - "loss": 5.2879, - "step": 39154 - }, - { - "epoch": 20.419817470664928, - "grad_norm": 1.643033742904663, - "learning_rate": 6.116482412060302e-05, - "loss": 5.1741, - "step": 39155 - }, - { - "epoch": 20.420338983050847, - "grad_norm": 1.5167585611343384, - "learning_rate": 6.11638190954774e-05, - "loss": 5.6661, - "step": 39156 - }, - { - "epoch": 20.420860495436767, - "grad_norm": 1.5611923933029175, - "learning_rate": 6.116281407035176e-05, - "loss": 5.4721, - "step": 39157 - }, - { - "epoch": 20.421382007822686, - "grad_norm": 1.5772531032562256, - "learning_rate": 6.116180904522614e-05, - "loss": 5.3757, - "step": 39158 - }, - { - "epoch": 20.421903520208605, - "grad_norm": 1.5584921836853027, - "learning_rate": 6.11608040201005e-05, - "loss": 4.846, - "step": 39159 - }, - { - "epoch": 20.422425032594525, - "grad_norm": 1.5448660850524902, - "learning_rate": 6.115979899497488e-05, - "loss": 5.0396, - "step": 39160 - }, - { - "epoch": 20.422946544980444, - "grad_norm": 1.5479423999786377, - "learning_rate": 6.115879396984924e-05, - "loss": 5.1938, - "step": 39161 - }, - { - "epoch": 20.423468057366364, - "grad_norm": 1.4870119094848633, - "learning_rate": 6.115778894472362e-05, - "loss": 5.3273, - "step": 39162 - }, - { - "epoch": 20.423989569752283, - "grad_norm": 1.557992935180664, - "learning_rate": 6.1156783919598e-05, - "loss": 5.3715, - "step": 39163 - }, - { - "epoch": 20.4245110821382, - "grad_norm": 1.4583230018615723, - "learning_rate": 6.115577889447236e-05, - "loss": 5.34, - "step": 39164 - }, - { - "epoch": 20.42503259452412, - "grad_norm": 1.5566149950027466, - "learning_rate": 6.115477386934674e-05, - "loss": 5.1713, - "step": 39165 - }, - { - "epoch": 20.425554106910038, - "grad_norm": 1.4666873216629028, - "learning_rate": 6.11537688442211e-05, - "loss": 5.3737, - "step": 39166 - }, - { - "epoch": 20.426075619295958, - "grad_norm": 1.5978419780731201, - "learning_rate": 6.115276381909548e-05, - "loss": 4.9386, - "step": 39167 - }, - { - "epoch": 20.426597131681877, - "grad_norm": 1.451133370399475, - "learning_rate": 6.115175879396985e-05, - "loss": 5.7342, - "step": 39168 - }, - { - "epoch": 20.427118644067797, - "grad_norm": 1.6309210062026978, - "learning_rate": 6.115075376884423e-05, - "loss": 4.6965, - "step": 39169 - }, - { - "epoch": 20.427640156453716, - "grad_norm": 1.4942840337753296, - "learning_rate": 6.114974874371859e-05, - "loss": 5.468, - "step": 39170 - }, - { - "epoch": 20.428161668839635, - "grad_norm": 1.4919672012329102, - "learning_rate": 6.114874371859297e-05, - "loss": 5.5374, - "step": 39171 - }, - { - "epoch": 20.428683181225555, - "grad_norm": 1.4606890678405762, - "learning_rate": 6.114773869346733e-05, - "loss": 5.3868, - "step": 39172 - }, - { - "epoch": 20.429204693611474, - "grad_norm": 1.445459246635437, - "learning_rate": 6.114673366834171e-05, - "loss": 5.749, - "step": 39173 - }, - { - "epoch": 20.429726205997394, - "grad_norm": 1.6059123277664185, - "learning_rate": 6.114572864321609e-05, - "loss": 5.2537, - "step": 39174 - }, - { - "epoch": 20.430247718383313, - "grad_norm": 1.61715567111969, - "learning_rate": 6.114472361809047e-05, - "loss": 5.348, - "step": 39175 - }, - { - "epoch": 20.43076923076923, - "grad_norm": 1.5249617099761963, - "learning_rate": 6.114371859296483e-05, - "loss": 5.587, - "step": 39176 - }, - { - "epoch": 20.43129074315515, - "grad_norm": 1.499993085861206, - "learning_rate": 6.11427135678392e-05, - "loss": 5.5058, - "step": 39177 - }, - { - "epoch": 20.431812255541068, - "grad_norm": 1.5570179224014282, - "learning_rate": 6.114170854271357e-05, - "loss": 5.6777, - "step": 39178 - }, - { - "epoch": 20.432333767926988, - "grad_norm": 1.6335922479629517, - "learning_rate": 6.114070351758794e-05, - "loss": 5.1646, - "step": 39179 - }, - { - "epoch": 20.432855280312907, - "grad_norm": 1.5311355590820312, - "learning_rate": 6.113969849246231e-05, - "loss": 5.4258, - "step": 39180 - }, - { - "epoch": 20.433376792698827, - "grad_norm": 1.510166049003601, - "learning_rate": 6.113869346733668e-05, - "loss": 5.1941, - "step": 39181 - }, - { - "epoch": 20.433898305084746, - "grad_norm": 1.691862940788269, - "learning_rate": 6.113768844221106e-05, - "loss": 4.5655, - "step": 39182 - }, - { - "epoch": 20.434419817470665, - "grad_norm": 1.5858371257781982, - "learning_rate": 6.113668341708543e-05, - "loss": 5.4818, - "step": 39183 - }, - { - "epoch": 20.434941329856585, - "grad_norm": 1.6371864080429077, - "learning_rate": 6.113567839195981e-05, - "loss": 4.891, - "step": 39184 - }, - { - "epoch": 20.435462842242504, - "grad_norm": 1.647875189781189, - "learning_rate": 6.113467336683418e-05, - "loss": 5.0171, - "step": 39185 - }, - { - "epoch": 20.435984354628424, - "grad_norm": 1.6351635456085205, - "learning_rate": 6.113366834170855e-05, - "loss": 5.2249, - "step": 39186 - }, - { - "epoch": 20.43650586701434, - "grad_norm": 1.546930193901062, - "learning_rate": 6.113266331658292e-05, - "loss": 5.0325, - "step": 39187 - }, - { - "epoch": 20.43702737940026, - "grad_norm": 1.6765334606170654, - "learning_rate": 6.11316582914573e-05, - "loss": 5.2269, - "step": 39188 - }, - { - "epoch": 20.43754889178618, - "grad_norm": 1.4339665174484253, - "learning_rate": 6.113065326633166e-05, - "loss": 5.4486, - "step": 39189 - }, - { - "epoch": 20.438070404172098, - "grad_norm": 1.5157150030136108, - "learning_rate": 6.112964824120602e-05, - "loss": 5.4449, - "step": 39190 - }, - { - "epoch": 20.438591916558018, - "grad_norm": 1.4742637872695923, - "learning_rate": 6.11286432160804e-05, - "loss": 5.3775, - "step": 39191 - }, - { - "epoch": 20.439113428943937, - "grad_norm": 1.5768489837646484, - "learning_rate": 6.112763819095477e-05, - "loss": 5.3375, - "step": 39192 - }, - { - "epoch": 20.439634941329857, - "grad_norm": 1.5502500534057617, - "learning_rate": 6.112663316582914e-05, - "loss": 5.3344, - "step": 39193 - }, - { - "epoch": 20.440156453715776, - "grad_norm": 1.462510585784912, - "learning_rate": 6.112562814070352e-05, - "loss": 5.2455, - "step": 39194 - }, - { - "epoch": 20.440677966101696, - "grad_norm": 1.4972797632217407, - "learning_rate": 6.11246231155779e-05, - "loss": 5.4439, - "step": 39195 - }, - { - "epoch": 20.441199478487615, - "grad_norm": 1.4721014499664307, - "learning_rate": 6.112361809045226e-05, - "loss": 5.4188, - "step": 39196 - }, - { - "epoch": 20.441720990873534, - "grad_norm": 1.483281135559082, - "learning_rate": 6.112261306532664e-05, - "loss": 5.4233, - "step": 39197 - }, - { - "epoch": 20.442242503259454, - "grad_norm": 1.6059657335281372, - "learning_rate": 6.1121608040201e-05, - "loss": 5.0426, - "step": 39198 - }, - { - "epoch": 20.442764015645373, - "grad_norm": 1.4513877630233765, - "learning_rate": 6.112060301507538e-05, - "loss": 5.7841, - "step": 39199 - }, - { - "epoch": 20.44328552803129, - "grad_norm": 1.502186894416809, - "learning_rate": 6.111959798994975e-05, - "loss": 5.2735, - "step": 39200 - }, - { - "epoch": 20.44380704041721, - "grad_norm": 1.637000560760498, - "learning_rate": 6.111859296482413e-05, - "loss": 4.9812, - "step": 39201 - }, - { - "epoch": 20.444328552803128, - "grad_norm": 1.4670974016189575, - "learning_rate": 6.111758793969849e-05, - "loss": 5.4211, - "step": 39202 - }, - { - "epoch": 20.444850065189048, - "grad_norm": 1.5129468441009521, - "learning_rate": 6.111658291457287e-05, - "loss": 5.2644, - "step": 39203 - }, - { - "epoch": 20.445371577574967, - "grad_norm": 1.4795082807540894, - "learning_rate": 6.111557788944725e-05, - "loss": 5.5089, - "step": 39204 - }, - { - "epoch": 20.445893089960887, - "grad_norm": 1.4679921865463257, - "learning_rate": 6.111457286432161e-05, - "loss": 5.3141, - "step": 39205 - }, - { - "epoch": 20.446414602346806, - "grad_norm": 1.5355230569839478, - "learning_rate": 6.111356783919599e-05, - "loss": 5.6228, - "step": 39206 - }, - { - "epoch": 20.446936114732726, - "grad_norm": 1.4816838502883911, - "learning_rate": 6.111256281407035e-05, - "loss": 5.1623, - "step": 39207 - }, - { - "epoch": 20.447457627118645, - "grad_norm": 1.514768362045288, - "learning_rate": 6.111155778894473e-05, - "loss": 5.2152, - "step": 39208 - }, - { - "epoch": 20.447979139504564, - "grad_norm": 1.4594944715499878, - "learning_rate": 6.11105527638191e-05, - "loss": 5.3124, - "step": 39209 - }, - { - "epoch": 20.448500651890484, - "grad_norm": 1.6265099048614502, - "learning_rate": 6.110954773869347e-05, - "loss": 5.1866, - "step": 39210 - }, - { - "epoch": 20.4490221642764, - "grad_norm": 1.4618339538574219, - "learning_rate": 6.110854271356784e-05, - "loss": 4.6618, - "step": 39211 - }, - { - "epoch": 20.44954367666232, - "grad_norm": 1.4374983310699463, - "learning_rate": 6.110753768844221e-05, - "loss": 5.4524, - "step": 39212 - }, - { - "epoch": 20.45006518904824, - "grad_norm": 1.4458436965942383, - "learning_rate": 6.110653266331658e-05, - "loss": 5.4549, - "step": 39213 - }, - { - "epoch": 20.45058670143416, - "grad_norm": 1.5121983289718628, - "learning_rate": 6.110552763819096e-05, - "loss": 5.4661, - "step": 39214 - }, - { - "epoch": 20.451108213820078, - "grad_norm": 1.4858014583587646, - "learning_rate": 6.110452261306533e-05, - "loss": 5.1677, - "step": 39215 - }, - { - "epoch": 20.451629726205997, - "grad_norm": 1.5144952535629272, - "learning_rate": 6.11035175879397e-05, - "loss": 5.1272, - "step": 39216 - }, - { - "epoch": 20.452151238591917, - "grad_norm": 1.5353615283966064, - "learning_rate": 6.110251256281408e-05, - "loss": 5.3849, - "step": 39217 - }, - { - "epoch": 20.452672750977836, - "grad_norm": 1.5405497550964355, - "learning_rate": 6.110150753768844e-05, - "loss": 5.0416, - "step": 39218 - }, - { - "epoch": 20.453194263363756, - "grad_norm": 1.659932017326355, - "learning_rate": 6.110050251256282e-05, - "loss": 5.146, - "step": 39219 - }, - { - "epoch": 20.453715775749675, - "grad_norm": 1.574473261833191, - "learning_rate": 6.109949748743718e-05, - "loss": 5.3654, - "step": 39220 - }, - { - "epoch": 20.454237288135594, - "grad_norm": 1.5099859237670898, - "learning_rate": 6.109849246231156e-05, - "loss": 5.415, - "step": 39221 - }, - { - "epoch": 20.454758800521514, - "grad_norm": 1.4849106073379517, - "learning_rate": 6.109748743718593e-05, - "loss": 5.4117, - "step": 39222 - }, - { - "epoch": 20.45528031290743, - "grad_norm": 1.4712542295455933, - "learning_rate": 6.10964824120603e-05, - "loss": 5.3523, - "step": 39223 - }, - { - "epoch": 20.45580182529335, - "grad_norm": 1.6596806049346924, - "learning_rate": 6.109547738693467e-05, - "loss": 4.9182, - "step": 39224 - }, - { - "epoch": 20.45632333767927, - "grad_norm": 1.6348894834518433, - "learning_rate": 6.109447236180905e-05, - "loss": 4.8502, - "step": 39225 - }, - { - "epoch": 20.45684485006519, - "grad_norm": 1.6819812059402466, - "learning_rate": 6.109346733668342e-05, - "loss": 4.9856, - "step": 39226 - }, - { - "epoch": 20.457366362451108, - "grad_norm": 1.464590072631836, - "learning_rate": 6.10924623115578e-05, - "loss": 5.7342, - "step": 39227 - }, - { - "epoch": 20.457887874837027, - "grad_norm": 1.4976483583450317, - "learning_rate": 6.109145728643217e-05, - "loss": 5.1473, - "step": 39228 - }, - { - "epoch": 20.458409387222947, - "grad_norm": 1.4871894121170044, - "learning_rate": 6.109045226130654e-05, - "loss": 5.7313, - "step": 39229 - }, - { - "epoch": 20.458930899608866, - "grad_norm": 1.6520514488220215, - "learning_rate": 6.108944723618091e-05, - "loss": 5.283, - "step": 39230 - }, - { - "epoch": 20.459452411994786, - "grad_norm": 1.5655161142349243, - "learning_rate": 6.108844221105527e-05, - "loss": 5.2381, - "step": 39231 - }, - { - "epoch": 20.459973924380705, - "grad_norm": 1.5517462491989136, - "learning_rate": 6.108743718592965e-05, - "loss": 4.9867, - "step": 39232 - }, - { - "epoch": 20.460495436766625, - "grad_norm": 1.5415490865707397, - "learning_rate": 6.108643216080401e-05, - "loss": 5.505, - "step": 39233 - }, - { - "epoch": 20.461016949152544, - "grad_norm": 1.611360788345337, - "learning_rate": 6.108542713567839e-05, - "loss": 5.1393, - "step": 39234 - }, - { - "epoch": 20.46153846153846, - "grad_norm": 1.5025660991668701, - "learning_rate": 6.108442211055277e-05, - "loss": 5.4201, - "step": 39235 - }, - { - "epoch": 20.46205997392438, - "grad_norm": 1.4526742696762085, - "learning_rate": 6.108341708542715e-05, - "loss": 5.4098, - "step": 39236 - }, - { - "epoch": 20.4625814863103, - "grad_norm": 1.4513765573501587, - "learning_rate": 6.108241206030151e-05, - "loss": 5.1847, - "step": 39237 - }, - { - "epoch": 20.46310299869622, - "grad_norm": 1.5924623012542725, - "learning_rate": 6.108140703517589e-05, - "loss": 5.2675, - "step": 39238 - }, - { - "epoch": 20.463624511082138, - "grad_norm": 1.610077977180481, - "learning_rate": 6.108040201005025e-05, - "loss": 5.3199, - "step": 39239 - }, - { - "epoch": 20.464146023468057, - "grad_norm": 1.4391592741012573, - "learning_rate": 6.107939698492463e-05, - "loss": 5.203, - "step": 39240 - }, - { - "epoch": 20.464667535853977, - "grad_norm": 1.4315147399902344, - "learning_rate": 6.1078391959799e-05, - "loss": 5.5225, - "step": 39241 - }, - { - "epoch": 20.465189048239896, - "grad_norm": 1.4747183322906494, - "learning_rate": 6.107738693467337e-05, - "loss": 5.5624, - "step": 39242 - }, - { - "epoch": 20.465710560625816, - "grad_norm": 1.4516838788986206, - "learning_rate": 6.107638190954774e-05, - "loss": 5.31, - "step": 39243 - }, - { - "epoch": 20.466232073011735, - "grad_norm": 1.6226801872253418, - "learning_rate": 6.10753768844221e-05, - "loss": 4.8929, - "step": 39244 - }, - { - "epoch": 20.466753585397655, - "grad_norm": 1.4757024049758911, - "learning_rate": 6.107437185929648e-05, - "loss": 5.2256, - "step": 39245 - }, - { - "epoch": 20.467275097783574, - "grad_norm": 1.8639414310455322, - "learning_rate": 6.107336683417086e-05, - "loss": 4.8316, - "step": 39246 - }, - { - "epoch": 20.46779661016949, - "grad_norm": 1.4654583930969238, - "learning_rate": 6.107236180904524e-05, - "loss": 5.6576, - "step": 39247 - }, - { - "epoch": 20.46831812255541, - "grad_norm": 1.5810139179229736, - "learning_rate": 6.10713567839196e-05, - "loss": 5.4652, - "step": 39248 - }, - { - "epoch": 20.46883963494133, - "grad_norm": 1.6509428024291992, - "learning_rate": 6.107035175879398e-05, - "loss": 5.2123, - "step": 39249 - }, - { - "epoch": 20.46936114732725, - "grad_norm": 1.4936994314193726, - "learning_rate": 6.106934673366834e-05, - "loss": 5.0404, - "step": 39250 - }, - { - "epoch": 20.469882659713168, - "grad_norm": 1.5470963716506958, - "learning_rate": 6.106834170854272e-05, - "loss": 4.9574, - "step": 39251 - }, - { - "epoch": 20.470404172099087, - "grad_norm": 1.5847070217132568, - "learning_rate": 6.106733668341708e-05, - "loss": 5.3926, - "step": 39252 - }, - { - "epoch": 20.470925684485007, - "grad_norm": 1.471047043800354, - "learning_rate": 6.106633165829146e-05, - "loss": 5.2604, - "step": 39253 - }, - { - "epoch": 20.471447196870926, - "grad_norm": 1.8438405990600586, - "learning_rate": 6.106532663316583e-05, - "loss": 5.0121, - "step": 39254 - }, - { - "epoch": 20.471968709256846, - "grad_norm": 1.5046148300170898, - "learning_rate": 6.10643216080402e-05, - "loss": 5.4523, - "step": 39255 - }, - { - "epoch": 20.472490221642765, - "grad_norm": 1.4635696411132812, - "learning_rate": 6.106331658291458e-05, - "loss": 5.5091, - "step": 39256 - }, - { - "epoch": 20.473011734028685, - "grad_norm": 1.5024489164352417, - "learning_rate": 6.106231155778895e-05, - "loss": 5.2482, - "step": 39257 - }, - { - "epoch": 20.473533246414604, - "grad_norm": 1.581940770149231, - "learning_rate": 6.106130653266332e-05, - "loss": 5.3334, - "step": 39258 - }, - { - "epoch": 20.47405475880052, - "grad_norm": 1.5555427074432373, - "learning_rate": 6.106030150753769e-05, - "loss": 5.3749, - "step": 39259 - }, - { - "epoch": 20.47457627118644, - "grad_norm": 1.5814120769500732, - "learning_rate": 6.105929648241207e-05, - "loss": 5.0587, - "step": 39260 - }, - { - "epoch": 20.47509778357236, - "grad_norm": 1.5108728408813477, - "learning_rate": 6.105829145728643e-05, - "loss": 5.3813, - "step": 39261 - }, - { - "epoch": 20.47561929595828, - "grad_norm": 1.575006365776062, - "learning_rate": 6.105728643216081e-05, - "loss": 5.1302, - "step": 39262 - }, - { - "epoch": 20.476140808344198, - "grad_norm": 1.5255606174468994, - "learning_rate": 6.105628140703517e-05, - "loss": 4.9115, - "step": 39263 - }, - { - "epoch": 20.476662320730117, - "grad_norm": 1.5296711921691895, - "learning_rate": 6.105527638190955e-05, - "loss": 5.3278, - "step": 39264 - }, - { - "epoch": 20.477183833116037, - "grad_norm": 1.5366332530975342, - "learning_rate": 6.105427135678391e-05, - "loss": 5.5106, - "step": 39265 - }, - { - "epoch": 20.477705345501956, - "grad_norm": 1.585924744606018, - "learning_rate": 6.105326633165829e-05, - "loss": 4.8841, - "step": 39266 - }, - { - "epoch": 20.478226857887876, - "grad_norm": 1.4706027507781982, - "learning_rate": 6.105226130653267e-05, - "loss": 5.686, - "step": 39267 - }, - { - "epoch": 20.478748370273795, - "grad_norm": 1.5690721273422241, - "learning_rate": 6.105125628140705e-05, - "loss": 5.265, - "step": 39268 - }, - { - "epoch": 20.479269882659715, - "grad_norm": 1.4599096775054932, - "learning_rate": 6.105025125628141e-05, - "loss": 5.3254, - "step": 39269 - }, - { - "epoch": 20.479791395045634, - "grad_norm": 1.5083411931991577, - "learning_rate": 6.104924623115578e-05, - "loss": 5.6519, - "step": 39270 - }, - { - "epoch": 20.48031290743155, - "grad_norm": 1.470821499824524, - "learning_rate": 6.104824120603015e-05, - "loss": 5.3031, - "step": 39271 - }, - { - "epoch": 20.48083441981747, - "grad_norm": 1.5599735975265503, - "learning_rate": 6.104723618090452e-05, - "loss": 5.4226, - "step": 39272 - }, - { - "epoch": 20.48135593220339, - "grad_norm": 1.4817174673080444, - "learning_rate": 6.10462311557789e-05, - "loss": 5.5017, - "step": 39273 - }, - { - "epoch": 20.48187744458931, - "grad_norm": 1.5488996505737305, - "learning_rate": 6.104522613065326e-05, - "loss": 5.0908, - "step": 39274 - }, - { - "epoch": 20.482398956975228, - "grad_norm": 1.5446836948394775, - "learning_rate": 6.104422110552764e-05, - "loss": 5.2754, - "step": 39275 - }, - { - "epoch": 20.482920469361147, - "grad_norm": 1.7011641263961792, - "learning_rate": 6.104321608040202e-05, - "loss": 5.5051, - "step": 39276 - }, - { - "epoch": 20.483441981747067, - "grad_norm": 1.506603717803955, - "learning_rate": 6.10422110552764e-05, - "loss": 5.0818, - "step": 39277 - }, - { - "epoch": 20.483963494132986, - "grad_norm": 1.61571204662323, - "learning_rate": 6.104120603015076e-05, - "loss": 5.4402, - "step": 39278 - }, - { - "epoch": 20.484485006518906, - "grad_norm": 1.5746686458587646, - "learning_rate": 6.104020100502514e-05, - "loss": 5.1099, - "step": 39279 - }, - { - "epoch": 20.485006518904825, - "grad_norm": 1.7260318994522095, - "learning_rate": 6.10391959798995e-05, - "loss": 5.1174, - "step": 39280 - }, - { - "epoch": 20.485528031290745, - "grad_norm": 1.5812554359436035, - "learning_rate": 6.103819095477388e-05, - "loss": 5.0754, - "step": 39281 - }, - { - "epoch": 20.486049543676664, - "grad_norm": 1.6353305578231812, - "learning_rate": 6.103718592964824e-05, - "loss": 5.0108, - "step": 39282 - }, - { - "epoch": 20.48657105606258, - "grad_norm": 1.4977896213531494, - "learning_rate": 6.103618090452261e-05, - "loss": 5.3568, - "step": 39283 - }, - { - "epoch": 20.4870925684485, - "grad_norm": 1.5921846628189087, - "learning_rate": 6.103517587939698e-05, - "loss": 5.6261, - "step": 39284 - }, - { - "epoch": 20.48761408083442, - "grad_norm": 1.479851484298706, - "learning_rate": 6.1034170854271356e-05, - "loss": 5.4782, - "step": 39285 - }, - { - "epoch": 20.48813559322034, - "grad_norm": 1.5566447973251343, - "learning_rate": 6.1033165829145734e-05, - "loss": 5.3916, - "step": 39286 - }, - { - "epoch": 20.488657105606258, - "grad_norm": 1.6301968097686768, - "learning_rate": 6.10321608040201e-05, - "loss": 5.179, - "step": 39287 - }, - { - "epoch": 20.489178617992177, - "grad_norm": 1.6149908304214478, - "learning_rate": 6.1031155778894476e-05, - "loss": 4.7348, - "step": 39288 - }, - { - "epoch": 20.489700130378097, - "grad_norm": 1.4717965126037598, - "learning_rate": 6.103015075376885e-05, - "loss": 5.4496, - "step": 39289 - }, - { - "epoch": 20.490221642764016, - "grad_norm": 1.4751356840133667, - "learning_rate": 6.1029145728643225e-05, - "loss": 5.5731, - "step": 39290 - }, - { - "epoch": 20.490743155149936, - "grad_norm": 1.7154840230941772, - "learning_rate": 6.102814070351759e-05, - "loss": 5.1433, - "step": 39291 - }, - { - "epoch": 20.491264667535855, - "grad_norm": 1.461373209953308, - "learning_rate": 6.102713567839197e-05, - "loss": 5.455, - "step": 39292 - }, - { - "epoch": 20.491786179921775, - "grad_norm": 1.5330816507339478, - "learning_rate": 6.102613065326633e-05, - "loss": 5.479, - "step": 39293 - }, - { - "epoch": 20.49230769230769, - "grad_norm": 1.511727213859558, - "learning_rate": 6.102512562814071e-05, - "loss": 5.2021, - "step": 39294 - }, - { - "epoch": 20.49282920469361, - "grad_norm": 1.5649257898330688, - "learning_rate": 6.102412060301508e-05, - "loss": 5.4161, - "step": 39295 - }, - { - "epoch": 20.49335071707953, - "grad_norm": 1.5506330728530884, - "learning_rate": 6.1023115577889444e-05, - "loss": 5.6021, - "step": 39296 - }, - { - "epoch": 20.49387222946545, - "grad_norm": 1.5315881967544556, - "learning_rate": 6.102211055276382e-05, - "loss": 4.9586, - "step": 39297 - }, - { - "epoch": 20.49439374185137, - "grad_norm": 1.7004175186157227, - "learning_rate": 6.1021105527638186e-05, - "loss": 4.7243, - "step": 39298 - }, - { - "epoch": 20.494915254237288, - "grad_norm": 1.442698359489441, - "learning_rate": 6.1020100502512564e-05, - "loss": 5.3075, - "step": 39299 - }, - { - "epoch": 20.495436766623207, - "grad_norm": 1.4816226959228516, - "learning_rate": 6.1019095477386935e-05, - "loss": 5.5148, - "step": 39300 - }, - { - "epoch": 20.495958279009127, - "grad_norm": 1.3376781940460205, - "learning_rate": 6.101809045226131e-05, - "loss": 5.7856, - "step": 39301 - }, - { - "epoch": 20.496479791395046, - "grad_norm": 1.428850769996643, - "learning_rate": 6.101708542713568e-05, - "loss": 5.4598, - "step": 39302 - }, - { - "epoch": 20.497001303780966, - "grad_norm": 1.4255536794662476, - "learning_rate": 6.1016080402010055e-05, - "loss": 5.3882, - "step": 39303 - }, - { - "epoch": 20.497522816166885, - "grad_norm": 1.6311066150665283, - "learning_rate": 6.1015075376884426e-05, - "loss": 5.1186, - "step": 39304 - }, - { - "epoch": 20.498044328552805, - "grad_norm": 1.402056097984314, - "learning_rate": 6.1014070351758804e-05, - "loss": 5.6825, - "step": 39305 - }, - { - "epoch": 20.49856584093872, - "grad_norm": 1.555078148841858, - "learning_rate": 6.101306532663317e-05, - "loss": 5.4639, - "step": 39306 - }, - { - "epoch": 20.49908735332464, - "grad_norm": 1.4640473127365112, - "learning_rate": 6.1012060301507546e-05, - "loss": 5.5691, - "step": 39307 - }, - { - "epoch": 20.49960886571056, - "grad_norm": 1.532235860824585, - "learning_rate": 6.101105527638191e-05, - "loss": 5.4043, - "step": 39308 - }, - { - "epoch": 20.50013037809648, - "grad_norm": 1.9495059251785278, - "learning_rate": 6.101005025125628e-05, - "loss": 4.4664, - "step": 39309 - }, - { - "epoch": 20.5006518904824, - "grad_norm": 1.5615993738174438, - "learning_rate": 6.100904522613066e-05, - "loss": 5.2994, - "step": 39310 - }, - { - "epoch": 20.501173402868318, - "grad_norm": 1.519203782081604, - "learning_rate": 6.1008040201005024e-05, - "loss": 4.7192, - "step": 39311 - }, - { - "epoch": 20.501694915254237, - "grad_norm": 1.5965657234191895, - "learning_rate": 6.10070351758794e-05, - "loss": 5.5527, - "step": 39312 - }, - { - "epoch": 20.502216427640157, - "grad_norm": 1.5153594017028809, - "learning_rate": 6.1006030150753766e-05, - "loss": 5.504, - "step": 39313 - }, - { - "epoch": 20.502737940026076, - "grad_norm": 1.4216541051864624, - "learning_rate": 6.1005025125628143e-05, - "loss": 5.5458, - "step": 39314 - }, - { - "epoch": 20.503259452411996, - "grad_norm": 1.528558373451233, - "learning_rate": 6.1004020100502515e-05, - "loss": 5.194, - "step": 39315 - }, - { - "epoch": 20.503780964797915, - "grad_norm": 1.5273253917694092, - "learning_rate": 6.100301507537689e-05, - "loss": 4.8383, - "step": 39316 - }, - { - "epoch": 20.504302477183835, - "grad_norm": 1.35048508644104, - "learning_rate": 6.100201005025126e-05, - "loss": 4.7407, - "step": 39317 - }, - { - "epoch": 20.50482398956975, - "grad_norm": 1.5074056386947632, - "learning_rate": 6.1001005025125634e-05, - "loss": 5.3911, - "step": 39318 - }, - { - "epoch": 20.50534550195567, - "grad_norm": 1.495975375175476, - "learning_rate": 6.1e-05, - "loss": 5.265, - "step": 39319 - }, - { - "epoch": 20.50586701434159, - "grad_norm": 1.5368255376815796, - "learning_rate": 6.0998994974874377e-05, - "loss": 5.0668, - "step": 39320 - }, - { - "epoch": 20.50638852672751, - "grad_norm": 1.4520970582962036, - "learning_rate": 6.099798994974875e-05, - "loss": 5.4531, - "step": 39321 - }, - { - "epoch": 20.50691003911343, - "grad_norm": 1.4855247735977173, - "learning_rate": 6.0996984924623125e-05, - "loss": 4.6894, - "step": 39322 - }, - { - "epoch": 20.507431551499348, - "grad_norm": 1.6418043375015259, - "learning_rate": 6.099597989949749e-05, - "loss": 5.0289, - "step": 39323 - }, - { - "epoch": 20.507953063885267, - "grad_norm": 1.5531940460205078, - "learning_rate": 6.099497487437186e-05, - "loss": 5.5306, - "step": 39324 - }, - { - "epoch": 20.508474576271187, - "grad_norm": 1.5128228664398193, - "learning_rate": 6.099396984924624e-05, - "loss": 4.9188, - "step": 39325 - }, - { - "epoch": 20.508996088657106, - "grad_norm": 1.4974497556686401, - "learning_rate": 6.09929648241206e-05, - "loss": 5.313, - "step": 39326 - }, - { - "epoch": 20.509517601043026, - "grad_norm": 1.6299883127212524, - "learning_rate": 6.099195979899498e-05, - "loss": 4.8403, - "step": 39327 - }, - { - "epoch": 20.510039113428945, - "grad_norm": 1.551372766494751, - "learning_rate": 6.0990954773869345e-05, - "loss": 5.4634, - "step": 39328 - }, - { - "epoch": 20.510560625814865, - "grad_norm": 1.5218173265457153, - "learning_rate": 6.098994974874372e-05, - "loss": 5.1974, - "step": 39329 - }, - { - "epoch": 20.51108213820078, - "grad_norm": 1.4747446775436401, - "learning_rate": 6.0988944723618094e-05, - "loss": 5.1762, - "step": 39330 - }, - { - "epoch": 20.5116036505867, - "grad_norm": 1.6974270343780518, - "learning_rate": 6.098793969849247e-05, - "loss": 4.8721, - "step": 39331 - }, - { - "epoch": 20.51212516297262, - "grad_norm": 1.5009088516235352, - "learning_rate": 6.0986934673366836e-05, - "loss": 5.0687, - "step": 39332 - }, - { - "epoch": 20.51264667535854, - "grad_norm": 1.543951392173767, - "learning_rate": 6.0985929648241214e-05, - "loss": 5.0862, - "step": 39333 - }, - { - "epoch": 20.51316818774446, - "grad_norm": 1.546045184135437, - "learning_rate": 6.098492462311558e-05, - "loss": 5.3447, - "step": 39334 - }, - { - "epoch": 20.513689700130378, - "grad_norm": 1.4431819915771484, - "learning_rate": 6.0983919597989956e-05, - "loss": 5.3641, - "step": 39335 - }, - { - "epoch": 20.514211212516297, - "grad_norm": 1.4401087760925293, - "learning_rate": 6.098291457286433e-05, - "loss": 5.4214, - "step": 39336 - }, - { - "epoch": 20.514732724902217, - "grad_norm": 1.5025286674499512, - "learning_rate": 6.098190954773869e-05, - "loss": 4.9465, - "step": 39337 - }, - { - "epoch": 20.515254237288136, - "grad_norm": 1.4988389015197754, - "learning_rate": 6.098090452261307e-05, - "loss": 5.4828, - "step": 39338 - }, - { - "epoch": 20.515775749674056, - "grad_norm": 1.4675507545471191, - "learning_rate": 6.097989949748743e-05, - "loss": 5.2669, - "step": 39339 - }, - { - "epoch": 20.516297262059975, - "grad_norm": 1.472285509109497, - "learning_rate": 6.097889447236181e-05, - "loss": 5.3688, - "step": 39340 - }, - { - "epoch": 20.516818774445895, - "grad_norm": 1.4791316986083984, - "learning_rate": 6.097788944723618e-05, - "loss": 5.1714, - "step": 39341 - }, - { - "epoch": 20.51734028683181, - "grad_norm": 1.4187883138656616, - "learning_rate": 6.097688442211056e-05, - "loss": 5.5328, - "step": 39342 - }, - { - "epoch": 20.51786179921773, - "grad_norm": 1.5268903970718384, - "learning_rate": 6.0975879396984924e-05, - "loss": 5.3544, - "step": 39343 - }, - { - "epoch": 20.51838331160365, - "grad_norm": 1.591433048248291, - "learning_rate": 6.09748743718593e-05, - "loss": 5.5056, - "step": 39344 - }, - { - "epoch": 20.51890482398957, - "grad_norm": 1.6669795513153076, - "learning_rate": 6.0973869346733666e-05, - "loss": 5.4731, - "step": 39345 - }, - { - "epoch": 20.51942633637549, - "grad_norm": 1.4789931774139404, - "learning_rate": 6.0972864321608044e-05, - "loss": 5.4483, - "step": 39346 - }, - { - "epoch": 20.519947848761408, - "grad_norm": 1.6159300804138184, - "learning_rate": 6.0971859296482415e-05, - "loss": 4.892, - "step": 39347 - }, - { - "epoch": 20.520469361147327, - "grad_norm": 1.3779780864715576, - "learning_rate": 6.097085427135679e-05, - "loss": 5.4075, - "step": 39348 - }, - { - "epoch": 20.520990873533247, - "grad_norm": 1.543480634689331, - "learning_rate": 6.096984924623116e-05, - "loss": 5.3206, - "step": 39349 - }, - { - "epoch": 20.521512385919166, - "grad_norm": 1.5860562324523926, - "learning_rate": 6.096884422110553e-05, - "loss": 5.3015, - "step": 39350 - }, - { - "epoch": 20.522033898305086, - "grad_norm": 1.4648233652114868, - "learning_rate": 6.0967839195979906e-05, - "loss": 5.3243, - "step": 39351 - }, - { - "epoch": 20.522555410691005, - "grad_norm": 1.6231242418289185, - "learning_rate": 6.096683417085427e-05, - "loss": 4.8403, - "step": 39352 - }, - { - "epoch": 20.523076923076925, - "grad_norm": 1.4124469757080078, - "learning_rate": 6.096582914572865e-05, - "loss": 5.3699, - "step": 39353 - }, - { - "epoch": 20.52359843546284, - "grad_norm": 1.4449304342269897, - "learning_rate": 6.096482412060301e-05, - "loss": 5.4157, - "step": 39354 - }, - { - "epoch": 20.52411994784876, - "grad_norm": 1.4580754041671753, - "learning_rate": 6.096381909547739e-05, - "loss": 5.3524, - "step": 39355 - }, - { - "epoch": 20.52464146023468, - "grad_norm": 1.4876171350479126, - "learning_rate": 6.096281407035176e-05, - "loss": 4.6418, - "step": 39356 - }, - { - "epoch": 20.5251629726206, - "grad_norm": 1.4255387783050537, - "learning_rate": 6.096180904522614e-05, - "loss": 5.443, - "step": 39357 - }, - { - "epoch": 20.52568448500652, - "grad_norm": 1.560710072517395, - "learning_rate": 6.0960804020100504e-05, - "loss": 5.5355, - "step": 39358 - }, - { - "epoch": 20.526205997392438, - "grad_norm": 1.402662754058838, - "learning_rate": 6.095979899497488e-05, - "loss": 4.5727, - "step": 39359 - }, - { - "epoch": 20.526727509778357, - "grad_norm": 1.4244173765182495, - "learning_rate": 6.0958793969849246e-05, - "loss": 5.6894, - "step": 39360 - }, - { - "epoch": 20.527249022164277, - "grad_norm": 1.4352562427520752, - "learning_rate": 6.0957788944723624e-05, - "loss": 4.8222, - "step": 39361 - }, - { - "epoch": 20.527770534550196, - "grad_norm": 1.5219932794570923, - "learning_rate": 6.0956783919597995e-05, - "loss": 5.2166, - "step": 39362 - }, - { - "epoch": 20.528292046936116, - "grad_norm": 1.5687726736068726, - "learning_rate": 6.095577889447236e-05, - "loss": 5.521, - "step": 39363 - }, - { - "epoch": 20.528813559322035, - "grad_norm": 1.6225850582122803, - "learning_rate": 6.095477386934674e-05, - "loss": 5.0688, - "step": 39364 - }, - { - "epoch": 20.529335071707955, - "grad_norm": 1.5433387756347656, - "learning_rate": 6.09537688442211e-05, - "loss": 5.1837, - "step": 39365 - }, - { - "epoch": 20.52985658409387, - "grad_norm": 1.4791748523712158, - "learning_rate": 6.095276381909548e-05, - "loss": 4.2965, - "step": 39366 - }, - { - "epoch": 20.53037809647979, - "grad_norm": 1.5635392665863037, - "learning_rate": 6.095175879396985e-05, - "loss": 4.865, - "step": 39367 - }, - { - "epoch": 20.53089960886571, - "grad_norm": 1.5515981912612915, - "learning_rate": 6.095075376884423e-05, - "loss": 5.3706, - "step": 39368 - }, - { - "epoch": 20.53142112125163, - "grad_norm": 1.529679775238037, - "learning_rate": 6.094974874371859e-05, - "loss": 5.3264, - "step": 39369 - }, - { - "epoch": 20.53194263363755, - "grad_norm": 1.4900251626968384, - "learning_rate": 6.094874371859297e-05, - "loss": 5.5384, - "step": 39370 - }, - { - "epoch": 20.532464146023468, - "grad_norm": 1.4516819715499878, - "learning_rate": 6.094773869346734e-05, - "loss": 5.6852, - "step": 39371 - }, - { - "epoch": 20.532985658409387, - "grad_norm": 1.6036107540130615, - "learning_rate": 6.094673366834172e-05, - "loss": 5.3745, - "step": 39372 - }, - { - "epoch": 20.533507170795307, - "grad_norm": 1.6219121217727661, - "learning_rate": 6.094572864321608e-05, - "loss": 5.3281, - "step": 39373 - }, - { - "epoch": 20.534028683181226, - "grad_norm": 1.4624727964401245, - "learning_rate": 6.094472361809046e-05, - "loss": 5.5453, - "step": 39374 - }, - { - "epoch": 20.534550195567146, - "grad_norm": 1.6031413078308105, - "learning_rate": 6.0943718592964825e-05, - "loss": 5.0036, - "step": 39375 - }, - { - "epoch": 20.535071707953065, - "grad_norm": 1.54746413230896, - "learning_rate": 6.0942713567839196e-05, - "loss": 5.3341, - "step": 39376 - }, - { - "epoch": 20.53559322033898, - "grad_norm": 1.595733404159546, - "learning_rate": 6.0941708542713574e-05, - "loss": 5.3268, - "step": 39377 - }, - { - "epoch": 20.5361147327249, - "grad_norm": 1.5625718832015991, - "learning_rate": 6.094070351758794e-05, - "loss": 5.4521, - "step": 39378 - }, - { - "epoch": 20.53663624511082, - "grad_norm": 1.6058812141418457, - "learning_rate": 6.0939698492462316e-05, - "loss": 5.2401, - "step": 39379 - }, - { - "epoch": 20.53715775749674, - "grad_norm": 1.5571467876434326, - "learning_rate": 6.093869346733668e-05, - "loss": 5.3479, - "step": 39380 - }, - { - "epoch": 20.53767926988266, - "grad_norm": 1.5247060060501099, - "learning_rate": 6.093768844221106e-05, - "loss": 5.5184, - "step": 39381 - }, - { - "epoch": 20.53820078226858, - "grad_norm": 1.5689324140548706, - "learning_rate": 6.093668341708543e-05, - "loss": 5.7521, - "step": 39382 - }, - { - "epoch": 20.538722294654498, - "grad_norm": 1.6441315412521362, - "learning_rate": 6.093567839195981e-05, - "loss": 5.0187, - "step": 39383 - }, - { - "epoch": 20.539243807040418, - "grad_norm": 1.5081523656845093, - "learning_rate": 6.093467336683417e-05, - "loss": 5.2386, - "step": 39384 - }, - { - "epoch": 20.539765319426337, - "grad_norm": 1.588078498840332, - "learning_rate": 6.093366834170855e-05, - "loss": 5.1381, - "step": 39385 - }, - { - "epoch": 20.540286831812256, - "grad_norm": 1.6446964740753174, - "learning_rate": 6.0932663316582913e-05, - "loss": 5.1853, - "step": 39386 - }, - { - "epoch": 20.540808344198176, - "grad_norm": 1.5577090978622437, - "learning_rate": 6.093165829145729e-05, - "loss": 5.232, - "step": 39387 - }, - { - "epoch": 20.541329856584095, - "grad_norm": 1.6448345184326172, - "learning_rate": 6.093065326633166e-05, - "loss": 5.0021, - "step": 39388 - }, - { - "epoch": 20.541851368970015, - "grad_norm": 1.476454734802246, - "learning_rate": 6.0929648241206027e-05, - "loss": 5.6116, - "step": 39389 - }, - { - "epoch": 20.54237288135593, - "grad_norm": 1.5331435203552246, - "learning_rate": 6.0928643216080404e-05, - "loss": 5.2872, - "step": 39390 - }, - { - "epoch": 20.54289439374185, - "grad_norm": 1.5109621286392212, - "learning_rate": 6.0927638190954775e-05, - "loss": 5.3416, - "step": 39391 - }, - { - "epoch": 20.54341590612777, - "grad_norm": 1.4367281198501587, - "learning_rate": 6.092663316582915e-05, - "loss": 5.3387, - "step": 39392 - }, - { - "epoch": 20.54393741851369, - "grad_norm": 1.6654082536697388, - "learning_rate": 6.092562814070352e-05, - "loss": 4.5465, - "step": 39393 - }, - { - "epoch": 20.54445893089961, - "grad_norm": 1.5357283353805542, - "learning_rate": 6.0924623115577895e-05, - "loss": 5.6508, - "step": 39394 - }, - { - "epoch": 20.544980443285528, - "grad_norm": 1.4247822761535645, - "learning_rate": 6.092361809045226e-05, - "loss": 5.4437, - "step": 39395 - }, - { - "epoch": 20.545501955671448, - "grad_norm": 1.5824918746948242, - "learning_rate": 6.092261306532664e-05, - "loss": 5.2144, - "step": 39396 - }, - { - "epoch": 20.546023468057367, - "grad_norm": 1.4608968496322632, - "learning_rate": 6.092160804020101e-05, - "loss": 5.2426, - "step": 39397 - }, - { - "epoch": 20.546544980443286, - "grad_norm": 1.4815287590026855, - "learning_rate": 6.0920603015075386e-05, - "loss": 5.4486, - "step": 39398 - }, - { - "epoch": 20.547066492829206, - "grad_norm": 1.546678066253662, - "learning_rate": 6.091959798994975e-05, - "loss": 5.3167, - "step": 39399 - }, - { - "epoch": 20.547588005215125, - "grad_norm": 1.4661179780960083, - "learning_rate": 6.091859296482413e-05, - "loss": 5.4071, - "step": 39400 - }, - { - "epoch": 20.54810951760104, - "grad_norm": 1.5644499063491821, - "learning_rate": 6.091758793969849e-05, - "loss": 4.7755, - "step": 39401 - }, - { - "epoch": 20.54863102998696, - "grad_norm": 1.493137001991272, - "learning_rate": 6.0916582914572864e-05, - "loss": 5.5109, - "step": 39402 - }, - { - "epoch": 20.54915254237288, - "grad_norm": 1.5213409662246704, - "learning_rate": 6.091557788944724e-05, - "loss": 5.0593, - "step": 39403 - }, - { - "epoch": 20.5496740547588, - "grad_norm": 1.5352951288223267, - "learning_rate": 6.0914572864321606e-05, - "loss": 5.6213, - "step": 39404 - }, - { - "epoch": 20.55019556714472, - "grad_norm": 1.4461774826049805, - "learning_rate": 6.0913567839195984e-05, - "loss": 5.4989, - "step": 39405 - }, - { - "epoch": 20.55071707953064, - "grad_norm": 1.6170477867126465, - "learning_rate": 6.091256281407035e-05, - "loss": 5.505, - "step": 39406 - }, - { - "epoch": 20.551238591916558, - "grad_norm": 1.4707235097885132, - "learning_rate": 6.0911557788944726e-05, - "loss": 5.133, - "step": 39407 - }, - { - "epoch": 20.551760104302478, - "grad_norm": 1.526599645614624, - "learning_rate": 6.09105527638191e-05, - "loss": 4.9323, - "step": 39408 - }, - { - "epoch": 20.552281616688397, - "grad_norm": 1.5554530620574951, - "learning_rate": 6.0909547738693475e-05, - "loss": 5.1551, - "step": 39409 - }, - { - "epoch": 20.552803129074317, - "grad_norm": 1.5319855213165283, - "learning_rate": 6.090854271356784e-05, - "loss": 4.7772, - "step": 39410 - }, - { - "epoch": 20.553324641460236, - "grad_norm": 1.4569919109344482, - "learning_rate": 6.090753768844222e-05, - "loss": 5.5713, - "step": 39411 - }, - { - "epoch": 20.553846153846155, - "grad_norm": 1.6351990699768066, - "learning_rate": 6.090653266331659e-05, - "loss": 5.202, - "step": 39412 - }, - { - "epoch": 20.55436766623207, - "grad_norm": 1.5905256271362305, - "learning_rate": 6.0905527638190966e-05, - "loss": 5.1375, - "step": 39413 - }, - { - "epoch": 20.55488917861799, - "grad_norm": 1.7902603149414062, - "learning_rate": 6.090452261306533e-05, - "loss": 5.7295, - "step": 39414 - }, - { - "epoch": 20.55541069100391, - "grad_norm": 1.6377737522125244, - "learning_rate": 6.090351758793971e-05, - "loss": 4.7697, - "step": 39415 - }, - { - "epoch": 20.55593220338983, - "grad_norm": 1.5110399723052979, - "learning_rate": 6.090251256281407e-05, - "loss": 5.1417, - "step": 39416 - }, - { - "epoch": 20.55645371577575, - "grad_norm": 1.5144548416137695, - "learning_rate": 6.090150753768844e-05, - "loss": 5.1747, - "step": 39417 - }, - { - "epoch": 20.55697522816167, - "grad_norm": 1.5214625597000122, - "learning_rate": 6.090050251256282e-05, - "loss": 5.3781, - "step": 39418 - }, - { - "epoch": 20.557496740547588, - "grad_norm": 1.3808985948562622, - "learning_rate": 6.0899497487437185e-05, - "loss": 5.5016, - "step": 39419 - }, - { - "epoch": 20.558018252933508, - "grad_norm": 1.5280511379241943, - "learning_rate": 6.089849246231156e-05, - "loss": 5.503, - "step": 39420 - }, - { - "epoch": 20.558539765319427, - "grad_norm": 1.4521267414093018, - "learning_rate": 6.089748743718593e-05, - "loss": 5.4234, - "step": 39421 - }, - { - "epoch": 20.559061277705347, - "grad_norm": 1.4117993116378784, - "learning_rate": 6.0896482412060305e-05, - "loss": 5.8622, - "step": 39422 - }, - { - "epoch": 20.559582790091266, - "grad_norm": 1.4404985904693604, - "learning_rate": 6.0895477386934676e-05, - "loss": 5.44, - "step": 39423 - }, - { - "epoch": 20.560104302477185, - "grad_norm": 1.5323840379714966, - "learning_rate": 6.0894472361809054e-05, - "loss": 5.4201, - "step": 39424 - }, - { - "epoch": 20.5606258148631, - "grad_norm": 1.5557546615600586, - "learning_rate": 6.089346733668342e-05, - "loss": 5.2495, - "step": 39425 - }, - { - "epoch": 20.56114732724902, - "grad_norm": 1.4497714042663574, - "learning_rate": 6.0892462311557796e-05, - "loss": 5.2772, - "step": 39426 - }, - { - "epoch": 20.56166883963494, - "grad_norm": 1.4826403856277466, - "learning_rate": 6.089145728643216e-05, - "loss": 5.1972, - "step": 39427 - }, - { - "epoch": 20.56219035202086, - "grad_norm": 1.4309693574905396, - "learning_rate": 6.089045226130654e-05, - "loss": 5.2927, - "step": 39428 - }, - { - "epoch": 20.56271186440678, - "grad_norm": 1.6100399494171143, - "learning_rate": 6.088944723618091e-05, - "loss": 4.951, - "step": 39429 - }, - { - "epoch": 20.5632333767927, - "grad_norm": 1.5190948247909546, - "learning_rate": 6.0888442211055274e-05, - "loss": 5.207, - "step": 39430 - }, - { - "epoch": 20.563754889178618, - "grad_norm": 1.4372402429580688, - "learning_rate": 6.088743718592965e-05, - "loss": 5.4809, - "step": 39431 - }, - { - "epoch": 20.564276401564538, - "grad_norm": 1.450183629989624, - "learning_rate": 6.0886432160804016e-05, - "loss": 5.4361, - "step": 39432 - }, - { - "epoch": 20.564797913950457, - "grad_norm": 1.6359535455703735, - "learning_rate": 6.0885427135678393e-05, - "loss": 4.7277, - "step": 39433 - }, - { - "epoch": 20.565319426336377, - "grad_norm": 1.4612196683883667, - "learning_rate": 6.0884422110552765e-05, - "loss": 5.3584, - "step": 39434 - }, - { - "epoch": 20.565840938722296, - "grad_norm": 1.5562682151794434, - "learning_rate": 6.088341708542714e-05, - "loss": 5.044, - "step": 39435 - }, - { - "epoch": 20.566362451108215, - "grad_norm": 1.5783534049987793, - "learning_rate": 6.088241206030151e-05, - "loss": 5.2621, - "step": 39436 - }, - { - "epoch": 20.56688396349413, - "grad_norm": 1.5121049880981445, - "learning_rate": 6.0881407035175884e-05, - "loss": 4.9292, - "step": 39437 - }, - { - "epoch": 20.56740547588005, - "grad_norm": 1.6122666597366333, - "learning_rate": 6.0880402010050256e-05, - "loss": 5.0181, - "step": 39438 - }, - { - "epoch": 20.56792698826597, - "grad_norm": 1.5026912689208984, - "learning_rate": 6.087939698492463e-05, - "loss": 5.2683, - "step": 39439 - }, - { - "epoch": 20.56844850065189, - "grad_norm": 1.4204106330871582, - "learning_rate": 6.0878391959799e-05, - "loss": 5.5137, - "step": 39440 - }, - { - "epoch": 20.56897001303781, - "grad_norm": 1.641945481300354, - "learning_rate": 6.0877386934673375e-05, - "loss": 4.9206, - "step": 39441 - }, - { - "epoch": 20.56949152542373, - "grad_norm": 1.5947831869125366, - "learning_rate": 6.087638190954774e-05, - "loss": 5.4065, - "step": 39442 - }, - { - "epoch": 20.570013037809648, - "grad_norm": 1.611958384513855, - "learning_rate": 6.087537688442211e-05, - "loss": 5.0679, - "step": 39443 - }, - { - "epoch": 20.570534550195568, - "grad_norm": 1.5477532148361206, - "learning_rate": 6.087437185929649e-05, - "loss": 5.5253, - "step": 39444 - }, - { - "epoch": 20.571056062581487, - "grad_norm": 1.5274550914764404, - "learning_rate": 6.087336683417085e-05, - "loss": 5.42, - "step": 39445 - }, - { - "epoch": 20.571577574967407, - "grad_norm": 1.5009684562683105, - "learning_rate": 6.087236180904523e-05, - "loss": 5.4681, - "step": 39446 - }, - { - "epoch": 20.572099087353326, - "grad_norm": 1.5220609903335571, - "learning_rate": 6.0871356783919595e-05, - "loss": 5.2783, - "step": 39447 - }, - { - "epoch": 20.572620599739246, - "grad_norm": 1.6397931575775146, - "learning_rate": 6.087035175879397e-05, - "loss": 5.5902, - "step": 39448 - }, - { - "epoch": 20.57314211212516, - "grad_norm": 1.4499843120574951, - "learning_rate": 6.0869346733668344e-05, - "loss": 5.3103, - "step": 39449 - }, - { - "epoch": 20.57366362451108, - "grad_norm": 1.3678580522537231, - "learning_rate": 6.086834170854272e-05, - "loss": 4.9661, - "step": 39450 - }, - { - "epoch": 20.574185136897, - "grad_norm": 1.504172682762146, - "learning_rate": 6.0867336683417086e-05, - "loss": 5.3044, - "step": 39451 - }, - { - "epoch": 20.57470664928292, - "grad_norm": 1.5688613653182983, - "learning_rate": 6.0866331658291464e-05, - "loss": 5.1527, - "step": 39452 - }, - { - "epoch": 20.57522816166884, - "grad_norm": 1.4701305627822876, - "learning_rate": 6.086532663316583e-05, - "loss": 5.6125, - "step": 39453 - }, - { - "epoch": 20.57574967405476, - "grad_norm": 1.5266461372375488, - "learning_rate": 6.0864321608040206e-05, - "loss": 5.3919, - "step": 39454 - }, - { - "epoch": 20.576271186440678, - "grad_norm": 1.5410836935043335, - "learning_rate": 6.086331658291458e-05, - "loss": 4.9354, - "step": 39455 - }, - { - "epoch": 20.576792698826598, - "grad_norm": 1.47757089138031, - "learning_rate": 6.086231155778894e-05, - "loss": 5.4498, - "step": 39456 - }, - { - "epoch": 20.577314211212517, - "grad_norm": 1.5593279600143433, - "learning_rate": 6.086130653266332e-05, - "loss": 4.706, - "step": 39457 - }, - { - "epoch": 20.577835723598437, - "grad_norm": 1.5631204843521118, - "learning_rate": 6.086030150753769e-05, - "loss": 5.2485, - "step": 39458 - }, - { - "epoch": 20.578357235984356, - "grad_norm": 1.5886244773864746, - "learning_rate": 6.085929648241207e-05, - "loss": 4.7512, - "step": 39459 - }, - { - "epoch": 20.578878748370272, - "grad_norm": 1.4447076320648193, - "learning_rate": 6.085829145728643e-05, - "loss": 5.5566, - "step": 39460 - }, - { - "epoch": 20.57940026075619, - "grad_norm": 1.3683221340179443, - "learning_rate": 6.085728643216081e-05, - "loss": 5.3489, - "step": 39461 - }, - { - "epoch": 20.57992177314211, - "grad_norm": 1.506969690322876, - "learning_rate": 6.0856281407035174e-05, - "loss": 5.3612, - "step": 39462 - }, - { - "epoch": 20.58044328552803, - "grad_norm": 1.473534107208252, - "learning_rate": 6.085527638190955e-05, - "loss": 5.748, - "step": 39463 - }, - { - "epoch": 20.58096479791395, - "grad_norm": 1.3983136415481567, - "learning_rate": 6.085427135678392e-05, - "loss": 5.3461, - "step": 39464 - }, - { - "epoch": 20.58148631029987, - "grad_norm": 1.5360524654388428, - "learning_rate": 6.08532663316583e-05, - "loss": 5.2095, - "step": 39465 - }, - { - "epoch": 20.58200782268579, - "grad_norm": 1.469625473022461, - "learning_rate": 6.0852261306532665e-05, - "loss": 5.0779, - "step": 39466 - }, - { - "epoch": 20.58252933507171, - "grad_norm": 1.5269228219985962, - "learning_rate": 6.085125628140704e-05, - "loss": 4.9791, - "step": 39467 - }, - { - "epoch": 20.583050847457628, - "grad_norm": 1.5214533805847168, - "learning_rate": 6.085025125628141e-05, - "loss": 5.511, - "step": 39468 - }, - { - "epoch": 20.583572359843547, - "grad_norm": 1.4907368421554565, - "learning_rate": 6.084924623115578e-05, - "loss": 5.5028, - "step": 39469 - }, - { - "epoch": 20.584093872229467, - "grad_norm": 1.6120752096176147, - "learning_rate": 6.0848241206030156e-05, - "loss": 5.0576, - "step": 39470 - }, - { - "epoch": 20.584615384615386, - "grad_norm": 1.574238657951355, - "learning_rate": 6.084723618090452e-05, - "loss": 4.9604, - "step": 39471 - }, - { - "epoch": 20.585136897001306, - "grad_norm": 1.4375874996185303, - "learning_rate": 6.08462311557789e-05, - "loss": 5.6272, - "step": 39472 - }, - { - "epoch": 20.58565840938722, - "grad_norm": 1.5566617250442505, - "learning_rate": 6.084522613065326e-05, - "loss": 5.2702, - "step": 39473 - }, - { - "epoch": 20.58617992177314, - "grad_norm": 1.4980430603027344, - "learning_rate": 6.084422110552764e-05, - "loss": 5.2359, - "step": 39474 - }, - { - "epoch": 20.58670143415906, - "grad_norm": 1.4835655689239502, - "learning_rate": 6.084321608040201e-05, - "loss": 5.0265, - "step": 39475 - }, - { - "epoch": 20.58722294654498, - "grad_norm": 1.55263090133667, - "learning_rate": 6.084221105527639e-05, - "loss": 5.579, - "step": 39476 - }, - { - "epoch": 20.5877444589309, - "grad_norm": 1.648334264755249, - "learning_rate": 6.0841206030150754e-05, - "loss": 5.1807, - "step": 39477 - }, - { - "epoch": 20.58826597131682, - "grad_norm": 1.586987018585205, - "learning_rate": 6.084020100502513e-05, - "loss": 5.2615, - "step": 39478 - }, - { - "epoch": 20.58878748370274, - "grad_norm": 1.4790875911712646, - "learning_rate": 6.08391959798995e-05, - "loss": 5.4848, - "step": 39479 - }, - { - "epoch": 20.589308996088658, - "grad_norm": 1.5620747804641724, - "learning_rate": 6.083819095477388e-05, - "loss": 5.1716, - "step": 39480 - }, - { - "epoch": 20.589830508474577, - "grad_norm": 1.5078541040420532, - "learning_rate": 6.0837185929648245e-05, - "loss": 5.0079, - "step": 39481 - }, - { - "epoch": 20.590352020860497, - "grad_norm": 1.4756380319595337, - "learning_rate": 6.083618090452261e-05, - "loss": 5.3985, - "step": 39482 - }, - { - "epoch": 20.590873533246416, - "grad_norm": 1.601255178451538, - "learning_rate": 6.083517587939699e-05, - "loss": 5.1856, - "step": 39483 - }, - { - "epoch": 20.591395045632332, - "grad_norm": 1.4116466045379639, - "learning_rate": 6.083417085427136e-05, - "loss": 5.4748, - "step": 39484 - }, - { - "epoch": 20.59191655801825, - "grad_norm": 1.4749634265899658, - "learning_rate": 6.0833165829145736e-05, - "loss": 5.4071, - "step": 39485 - }, - { - "epoch": 20.59243807040417, - "grad_norm": 1.4961082935333252, - "learning_rate": 6.08321608040201e-05, - "loss": 5.5108, - "step": 39486 - }, - { - "epoch": 20.59295958279009, - "grad_norm": 1.7391705513000488, - "learning_rate": 6.083115577889448e-05, - "loss": 5.0293, - "step": 39487 - }, - { - "epoch": 20.59348109517601, - "grad_norm": 1.4929144382476807, - "learning_rate": 6.083015075376884e-05, - "loss": 5.5841, - "step": 39488 - }, - { - "epoch": 20.59400260756193, - "grad_norm": 1.5907635688781738, - "learning_rate": 6.082914572864322e-05, - "loss": 4.9332, - "step": 39489 - }, - { - "epoch": 20.59452411994785, - "grad_norm": 1.5337276458740234, - "learning_rate": 6.082814070351759e-05, - "loss": 5.4847, - "step": 39490 - }, - { - "epoch": 20.59504563233377, - "grad_norm": 1.6205893754959106, - "learning_rate": 6.082713567839197e-05, - "loss": 5.0998, - "step": 39491 - }, - { - "epoch": 20.595567144719688, - "grad_norm": 1.4510581493377686, - "learning_rate": 6.082613065326633e-05, - "loss": 5.4899, - "step": 39492 - }, - { - "epoch": 20.596088657105607, - "grad_norm": 1.5349059104919434, - "learning_rate": 6.082512562814071e-05, - "loss": 5.448, - "step": 39493 - }, - { - "epoch": 20.596610169491527, - "grad_norm": 1.655225157737732, - "learning_rate": 6.0824120603015075e-05, - "loss": 5.0008, - "step": 39494 - }, - { - "epoch": 20.597131681877446, - "grad_norm": 1.622947096824646, - "learning_rate": 6.082311557788945e-05, - "loss": 5.115, - "step": 39495 - }, - { - "epoch": 20.597653194263362, - "grad_norm": 1.4599788188934326, - "learning_rate": 6.0822110552763824e-05, - "loss": 5.6301, - "step": 39496 - }, - { - "epoch": 20.59817470664928, - "grad_norm": 1.6917649507522583, - "learning_rate": 6.082110552763819e-05, - "loss": 5.0848, - "step": 39497 - }, - { - "epoch": 20.5986962190352, - "grad_norm": 1.4262168407440186, - "learning_rate": 6.0820100502512566e-05, - "loss": 5.3798, - "step": 39498 - }, - { - "epoch": 20.59921773142112, - "grad_norm": 1.5220191478729248, - "learning_rate": 6.081909547738693e-05, - "loss": 5.1316, - "step": 39499 - }, - { - "epoch": 20.59973924380704, - "grad_norm": 1.4505013227462769, - "learning_rate": 6.081809045226131e-05, - "loss": 5.5714, - "step": 39500 - }, - { - "epoch": 20.60026075619296, - "grad_norm": 1.4815289974212646, - "learning_rate": 6.081708542713568e-05, - "loss": 5.0286, - "step": 39501 - }, - { - "epoch": 20.60078226857888, - "grad_norm": 1.54777991771698, - "learning_rate": 6.081608040201006e-05, - "loss": 4.8765, - "step": 39502 - }, - { - "epoch": 20.6013037809648, - "grad_norm": 1.4883688688278198, - "learning_rate": 6.081507537688442e-05, - "loss": 4.9519, - "step": 39503 - }, - { - "epoch": 20.601825293350718, - "grad_norm": 1.4861541986465454, - "learning_rate": 6.08140703517588e-05, - "loss": 5.4027, - "step": 39504 - }, - { - "epoch": 20.602346805736637, - "grad_norm": 1.6370915174484253, - "learning_rate": 6.081306532663317e-05, - "loss": 4.831, - "step": 39505 - }, - { - "epoch": 20.602868318122557, - "grad_norm": 1.5818172693252563, - "learning_rate": 6.081206030150755e-05, - "loss": 4.8733, - "step": 39506 - }, - { - "epoch": 20.603389830508476, - "grad_norm": 1.4497076272964478, - "learning_rate": 6.081105527638191e-05, - "loss": 5.6802, - "step": 39507 - }, - { - "epoch": 20.603911342894392, - "grad_norm": 1.494806170463562, - "learning_rate": 6.081005025125629e-05, - "loss": 5.1823, - "step": 39508 - }, - { - "epoch": 20.60443285528031, - "grad_norm": 1.5388914346694946, - "learning_rate": 6.0809045226130654e-05, - "loss": 4.8847, - "step": 39509 - }, - { - "epoch": 20.60495436766623, - "grad_norm": 1.6000969409942627, - "learning_rate": 6.0808040201005025e-05, - "loss": 5.1719, - "step": 39510 - }, - { - "epoch": 20.60547588005215, - "grad_norm": 1.5799751281738281, - "learning_rate": 6.08070351758794e-05, - "loss": 5.3492, - "step": 39511 - }, - { - "epoch": 20.60599739243807, - "grad_norm": 1.5413357019424438, - "learning_rate": 6.080603015075377e-05, - "loss": 4.9651, - "step": 39512 - }, - { - "epoch": 20.60651890482399, - "grad_norm": 1.460109829902649, - "learning_rate": 6.0805025125628145e-05, - "loss": 5.1348, - "step": 39513 - }, - { - "epoch": 20.60704041720991, - "grad_norm": 1.5084465742111206, - "learning_rate": 6.080402010050251e-05, - "loss": 4.9882, - "step": 39514 - }, - { - "epoch": 20.60756192959583, - "grad_norm": 1.46568763256073, - "learning_rate": 6.080301507537689e-05, - "loss": 4.6816, - "step": 39515 - }, - { - "epoch": 20.608083441981748, - "grad_norm": 1.523721694946289, - "learning_rate": 6.080201005025126e-05, - "loss": 5.0635, - "step": 39516 - }, - { - "epoch": 20.608604954367667, - "grad_norm": 1.568245768547058, - "learning_rate": 6.0801005025125636e-05, - "loss": 5.5582, - "step": 39517 - }, - { - "epoch": 20.609126466753587, - "grad_norm": 1.4532641172409058, - "learning_rate": 6.08e-05, - "loss": 5.6076, - "step": 39518 - }, - { - "epoch": 20.609647979139506, - "grad_norm": 1.527274250984192, - "learning_rate": 6.079899497487438e-05, - "loss": 5.5414, - "step": 39519 - }, - { - "epoch": 20.610169491525422, - "grad_norm": 1.5536152124404907, - "learning_rate": 6.079798994974874e-05, - "loss": 5.2574, - "step": 39520 - }, - { - "epoch": 20.61069100391134, - "grad_norm": 1.600590705871582, - "learning_rate": 6.079698492462312e-05, - "loss": 4.9268, - "step": 39521 - }, - { - "epoch": 20.61121251629726, - "grad_norm": 1.5556484460830688, - "learning_rate": 6.079597989949749e-05, - "loss": 4.7436, - "step": 39522 - }, - { - "epoch": 20.61173402868318, - "grad_norm": 1.600376844406128, - "learning_rate": 6.0794974874371856e-05, - "loss": 4.9797, - "step": 39523 - }, - { - "epoch": 20.6122555410691, - "grad_norm": 1.6710917949676514, - "learning_rate": 6.0793969849246234e-05, - "loss": 5.0929, - "step": 39524 - }, - { - "epoch": 20.61277705345502, - "grad_norm": 1.6319414377212524, - "learning_rate": 6.0792964824120605e-05, - "loss": 4.7066, - "step": 39525 - }, - { - "epoch": 20.61329856584094, - "grad_norm": 1.6205097436904907, - "learning_rate": 6.079195979899498e-05, - "loss": 5.2977, - "step": 39526 - }, - { - "epoch": 20.61382007822686, - "grad_norm": 1.4910601377487183, - "learning_rate": 6.079095477386935e-05, - "loss": 4.7023, - "step": 39527 - }, - { - "epoch": 20.614341590612778, - "grad_norm": 1.448333501815796, - "learning_rate": 6.0789949748743725e-05, - "loss": 5.8077, - "step": 39528 - }, - { - "epoch": 20.614863102998697, - "grad_norm": 1.457947850227356, - "learning_rate": 6.078894472361809e-05, - "loss": 5.5462, - "step": 39529 - }, - { - "epoch": 20.615384615384617, - "grad_norm": 1.4835091829299927, - "learning_rate": 6.078793969849247e-05, - "loss": 5.3286, - "step": 39530 - }, - { - "epoch": 20.615906127770536, - "grad_norm": 1.4405900239944458, - "learning_rate": 6.078693467336684e-05, - "loss": 5.3172, - "step": 39531 - }, - { - "epoch": 20.616427640156452, - "grad_norm": 1.4012694358825684, - "learning_rate": 6.0785929648241216e-05, - "loss": 5.3669, - "step": 39532 - }, - { - "epoch": 20.61694915254237, - "grad_norm": 1.3665969371795654, - "learning_rate": 6.078492462311558e-05, - "loss": 5.5389, - "step": 39533 - }, - { - "epoch": 20.61747066492829, - "grad_norm": 1.4401227235794067, - "learning_rate": 6.078391959798996e-05, - "loss": 5.5834, - "step": 39534 - }, - { - "epoch": 20.61799217731421, - "grad_norm": 1.44844388961792, - "learning_rate": 6.078291457286432e-05, - "loss": 5.2541, - "step": 39535 - }, - { - "epoch": 20.61851368970013, - "grad_norm": 1.4645276069641113, - "learning_rate": 6.078190954773869e-05, - "loss": 5.6591, - "step": 39536 - }, - { - "epoch": 20.61903520208605, - "grad_norm": 1.5074151754379272, - "learning_rate": 6.078090452261307e-05, - "loss": 5.1913, - "step": 39537 - }, - { - "epoch": 20.61955671447197, - "grad_norm": 1.5155527591705322, - "learning_rate": 6.0779899497487435e-05, - "loss": 5.3865, - "step": 39538 - }, - { - "epoch": 20.62007822685789, - "grad_norm": 1.4852958917617798, - "learning_rate": 6.077889447236181e-05, - "loss": 4.9842, - "step": 39539 - }, - { - "epoch": 20.620599739243808, - "grad_norm": 1.511531949043274, - "learning_rate": 6.077788944723618e-05, - "loss": 5.5712, - "step": 39540 - }, - { - "epoch": 20.621121251629727, - "grad_norm": 1.6284053325653076, - "learning_rate": 6.0776884422110555e-05, - "loss": 5.133, - "step": 39541 - }, - { - "epoch": 20.621642764015647, - "grad_norm": 1.4609575271606445, - "learning_rate": 6.0775879396984926e-05, - "loss": 5.5109, - "step": 39542 - }, - { - "epoch": 20.622164276401566, - "grad_norm": 1.5815978050231934, - "learning_rate": 6.0774874371859304e-05, - "loss": 5.4523, - "step": 39543 - }, - { - "epoch": 20.622685788787482, - "grad_norm": 1.497287631034851, - "learning_rate": 6.077386934673367e-05, - "loss": 5.4339, - "step": 39544 - }, - { - "epoch": 20.6232073011734, - "grad_norm": 1.506528615951538, - "learning_rate": 6.0772864321608046e-05, - "loss": 5.285, - "step": 39545 - }, - { - "epoch": 20.62372881355932, - "grad_norm": 1.5729997158050537, - "learning_rate": 6.077185929648242e-05, - "loss": 5.3299, - "step": 39546 - }, - { - "epoch": 20.62425032594524, - "grad_norm": 1.4790287017822266, - "learning_rate": 6.0770854271356795e-05, - "loss": 5.045, - "step": 39547 - }, - { - "epoch": 20.62477183833116, - "grad_norm": 1.4249814748764038, - "learning_rate": 6.076984924623116e-05, - "loss": 5.4296, - "step": 39548 - }, - { - "epoch": 20.62529335071708, - "grad_norm": 1.5219523906707764, - "learning_rate": 6.0768844221105524e-05, - "loss": 5.4245, - "step": 39549 - }, - { - "epoch": 20.625814863103, - "grad_norm": 1.627550482749939, - "learning_rate": 6.07678391959799e-05, - "loss": 5.2756, - "step": 39550 - }, - { - "epoch": 20.62633637548892, - "grad_norm": 1.5636066198349, - "learning_rate": 6.076683417085427e-05, - "loss": 5.5839, - "step": 39551 - }, - { - "epoch": 20.626857887874838, - "grad_norm": 1.5191186666488647, - "learning_rate": 6.076582914572865e-05, - "loss": 5.322, - "step": 39552 - }, - { - "epoch": 20.627379400260757, - "grad_norm": 1.4723035097122192, - "learning_rate": 6.0764824120603015e-05, - "loss": 5.5293, - "step": 39553 - }, - { - "epoch": 20.627900912646677, - "grad_norm": 1.5393054485321045, - "learning_rate": 6.076381909547739e-05, - "loss": 4.9926, - "step": 39554 - }, - { - "epoch": 20.628422425032596, - "grad_norm": 1.442798137664795, - "learning_rate": 6.076281407035176e-05, - "loss": 5.4889, - "step": 39555 - }, - { - "epoch": 20.628943937418512, - "grad_norm": 1.5653629302978516, - "learning_rate": 6.0761809045226134e-05, - "loss": 5.5575, - "step": 39556 - }, - { - "epoch": 20.62946544980443, - "grad_norm": 1.4968180656433105, - "learning_rate": 6.0760804020100506e-05, - "loss": 5.8928, - "step": 39557 - }, - { - "epoch": 20.62998696219035, - "grad_norm": 1.4395779371261597, - "learning_rate": 6.075979899497488e-05, - "loss": 5.3611, - "step": 39558 - }, - { - "epoch": 20.63050847457627, - "grad_norm": 1.4254486560821533, - "learning_rate": 6.075879396984925e-05, - "loss": 5.5934, - "step": 39559 - }, - { - "epoch": 20.63102998696219, - "grad_norm": 1.4557905197143555, - "learning_rate": 6.0757788944723625e-05, - "loss": 5.5971, - "step": 39560 - }, - { - "epoch": 20.63155149934811, - "grad_norm": 1.5105934143066406, - "learning_rate": 6.075678391959799e-05, - "loss": 4.9387, - "step": 39561 - }, - { - "epoch": 20.63207301173403, - "grad_norm": 1.5561039447784424, - "learning_rate": 6.075577889447236e-05, - "loss": 5.2759, - "step": 39562 - }, - { - "epoch": 20.63259452411995, - "grad_norm": 1.3952305316925049, - "learning_rate": 6.075477386934674e-05, - "loss": 4.9302, - "step": 39563 - }, - { - "epoch": 20.633116036505868, - "grad_norm": 1.5250401496887207, - "learning_rate": 6.07537688442211e-05, - "loss": 5.224, - "step": 39564 - }, - { - "epoch": 20.633637548891787, - "grad_norm": 1.6308919191360474, - "learning_rate": 6.075276381909548e-05, - "loss": 5.0653, - "step": 39565 - }, - { - "epoch": 20.634159061277707, - "grad_norm": 1.4848275184631348, - "learning_rate": 6.075175879396985e-05, - "loss": 4.702, - "step": 39566 - }, - { - "epoch": 20.634680573663623, - "grad_norm": 1.5058636665344238, - "learning_rate": 6.075075376884423e-05, - "loss": 5.1686, - "step": 39567 - }, - { - "epoch": 20.635202086049542, - "grad_norm": 1.5807693004608154, - "learning_rate": 6.0749748743718594e-05, - "loss": 5.3642, - "step": 39568 - }, - { - "epoch": 20.63572359843546, - "grad_norm": 1.400421380996704, - "learning_rate": 6.074874371859297e-05, - "loss": 5.8684, - "step": 39569 - }, - { - "epoch": 20.63624511082138, - "grad_norm": 1.5733561515808105, - "learning_rate": 6.0747738693467336e-05, - "loss": 5.7101, - "step": 39570 - }, - { - "epoch": 20.6367666232073, - "grad_norm": 1.549376130104065, - "learning_rate": 6.0746733668341714e-05, - "loss": 4.9195, - "step": 39571 - }, - { - "epoch": 20.63728813559322, - "grad_norm": 1.4177049398422241, - "learning_rate": 6.0745728643216085e-05, - "loss": 5.1644, - "step": 39572 - }, - { - "epoch": 20.63780964797914, - "grad_norm": 1.3962563276290894, - "learning_rate": 6.074472361809046e-05, - "loss": 5.0966, - "step": 39573 - }, - { - "epoch": 20.63833116036506, - "grad_norm": 1.5535407066345215, - "learning_rate": 6.074371859296483e-05, - "loss": 5.1353, - "step": 39574 - }, - { - "epoch": 20.63885267275098, - "grad_norm": 1.412893295288086, - "learning_rate": 6.074271356783919e-05, - "loss": 5.629, - "step": 39575 - }, - { - "epoch": 20.639374185136898, - "grad_norm": 1.6047130823135376, - "learning_rate": 6.074170854271357e-05, - "loss": 5.0091, - "step": 39576 - }, - { - "epoch": 20.639895697522817, - "grad_norm": 1.509798526763916, - "learning_rate": 6.074070351758794e-05, - "loss": 5.4926, - "step": 39577 - }, - { - "epoch": 20.640417209908737, - "grad_norm": 1.5203871726989746, - "learning_rate": 6.073969849246232e-05, - "loss": 5.5318, - "step": 39578 - }, - { - "epoch": 20.640938722294656, - "grad_norm": 1.4767358303070068, - "learning_rate": 6.073869346733668e-05, - "loss": 5.3676, - "step": 39579 - }, - { - "epoch": 20.641460234680572, - "grad_norm": 1.5503684282302856, - "learning_rate": 6.073768844221106e-05, - "loss": 5.1495, - "step": 39580 - }, - { - "epoch": 20.64198174706649, - "grad_norm": 1.5477228164672852, - "learning_rate": 6.0736683417085424e-05, - "loss": 5.2735, - "step": 39581 - }, - { - "epoch": 20.64250325945241, - "grad_norm": 1.5348738431930542, - "learning_rate": 6.07356783919598e-05, - "loss": 5.2717, - "step": 39582 - }, - { - "epoch": 20.64302477183833, - "grad_norm": 1.5274512767791748, - "learning_rate": 6.073467336683417e-05, - "loss": 5.3435, - "step": 39583 - }, - { - "epoch": 20.64354628422425, - "grad_norm": 1.4755600690841675, - "learning_rate": 6.073366834170855e-05, - "loss": 5.5555, - "step": 39584 - }, - { - "epoch": 20.64406779661017, - "grad_norm": 1.5469361543655396, - "learning_rate": 6.0732663316582915e-05, - "loss": 5.205, - "step": 39585 - }, - { - "epoch": 20.64458930899609, - "grad_norm": 1.611266851425171, - "learning_rate": 6.073165829145729e-05, - "loss": 5.1897, - "step": 39586 - }, - { - "epoch": 20.64511082138201, - "grad_norm": 1.4687286615371704, - "learning_rate": 6.073065326633166e-05, - "loss": 4.7972, - "step": 39587 - }, - { - "epoch": 20.645632333767928, - "grad_norm": 2.843839168548584, - "learning_rate": 6.0729648241206035e-05, - "loss": 5.1662, - "step": 39588 - }, - { - "epoch": 20.646153846153847, - "grad_norm": 1.5008503198623657, - "learning_rate": 6.0728643216080406e-05, - "loss": 5.3612, - "step": 39589 - }, - { - "epoch": 20.646675358539767, - "grad_norm": 1.557961106300354, - "learning_rate": 6.072763819095477e-05, - "loss": 5.3283, - "step": 39590 - }, - { - "epoch": 20.647196870925683, - "grad_norm": 1.5532467365264893, - "learning_rate": 6.072663316582915e-05, - "loss": 4.8042, - "step": 39591 - }, - { - "epoch": 20.647718383311602, - "grad_norm": 1.5034074783325195, - "learning_rate": 6.072562814070352e-05, - "loss": 4.9573, - "step": 39592 - }, - { - "epoch": 20.64823989569752, - "grad_norm": 1.5052039623260498, - "learning_rate": 6.07246231155779e-05, - "loss": 5.2592, - "step": 39593 - }, - { - "epoch": 20.64876140808344, - "grad_norm": 1.5911632776260376, - "learning_rate": 6.072361809045226e-05, - "loss": 5.1202, - "step": 39594 - }, - { - "epoch": 20.64928292046936, - "grad_norm": 1.5099008083343506, - "learning_rate": 6.072261306532664e-05, - "loss": 5.2463, - "step": 39595 - }, - { - "epoch": 20.64980443285528, - "grad_norm": 1.5546927452087402, - "learning_rate": 6.0721608040201004e-05, - "loss": 5.2759, - "step": 39596 - }, - { - "epoch": 20.6503259452412, - "grad_norm": 1.4716174602508545, - "learning_rate": 6.072060301507538e-05, - "loss": 5.5306, - "step": 39597 - }, - { - "epoch": 20.65084745762712, - "grad_norm": 1.476719856262207, - "learning_rate": 6.071959798994975e-05, - "loss": 5.0525, - "step": 39598 - }, - { - "epoch": 20.65136897001304, - "grad_norm": 1.4898656606674194, - "learning_rate": 6.071859296482413e-05, - "loss": 5.0215, - "step": 39599 - }, - { - "epoch": 20.651890482398958, - "grad_norm": 1.3997082710266113, - "learning_rate": 6.0717587939698495e-05, - "loss": 5.7421, - "step": 39600 - }, - { - "epoch": 20.652411994784877, - "grad_norm": 1.5321660041809082, - "learning_rate": 6.071658291457287e-05, - "loss": 5.2857, - "step": 39601 - }, - { - "epoch": 20.652933507170797, - "grad_norm": 1.5518457889556885, - "learning_rate": 6.071557788944724e-05, - "loss": 4.5992, - "step": 39602 - }, - { - "epoch": 20.653455019556713, - "grad_norm": 1.588607668876648, - "learning_rate": 6.071457286432161e-05, - "loss": 5.1962, - "step": 39603 - }, - { - "epoch": 20.653976531942632, - "grad_norm": 1.6560239791870117, - "learning_rate": 6.0713567839195986e-05, - "loss": 5.0278, - "step": 39604 - }, - { - "epoch": 20.65449804432855, - "grad_norm": 1.4610310792922974, - "learning_rate": 6.071256281407035e-05, - "loss": 5.6813, - "step": 39605 - }, - { - "epoch": 20.65501955671447, - "grad_norm": 1.6536736488342285, - "learning_rate": 6.071155778894473e-05, - "loss": 5.2484, - "step": 39606 - }, - { - "epoch": 20.65554106910039, - "grad_norm": 1.5251692533493042, - "learning_rate": 6.071055276381909e-05, - "loss": 5.5003, - "step": 39607 - }, - { - "epoch": 20.65606258148631, - "grad_norm": 1.461708426475525, - "learning_rate": 6.070954773869347e-05, - "loss": 5.2168, - "step": 39608 - }, - { - "epoch": 20.65658409387223, - "grad_norm": 1.4689382314682007, - "learning_rate": 6.070854271356784e-05, - "loss": 5.2112, - "step": 39609 - }, - { - "epoch": 20.65710560625815, - "grad_norm": 1.552442193031311, - "learning_rate": 6.070753768844222e-05, - "loss": 4.8881, - "step": 39610 - }, - { - "epoch": 20.65762711864407, - "grad_norm": 1.8615492582321167, - "learning_rate": 6.070653266331658e-05, - "loss": 5.1729, - "step": 39611 - }, - { - "epoch": 20.658148631029988, - "grad_norm": 1.5445365905761719, - "learning_rate": 6.070552763819096e-05, - "loss": 5.2546, - "step": 39612 - }, - { - "epoch": 20.658670143415907, - "grad_norm": 1.5995570421218872, - "learning_rate": 6.070452261306533e-05, - "loss": 5.6215, - "step": 39613 - }, - { - "epoch": 20.659191655801827, - "grad_norm": 1.5695749521255493, - "learning_rate": 6.070351758793971e-05, - "loss": 4.6172, - "step": 39614 - }, - { - "epoch": 20.659713168187743, - "grad_norm": 1.6100183725357056, - "learning_rate": 6.0702512562814074e-05, - "loss": 4.8409, - "step": 39615 - }, - { - "epoch": 20.660234680573662, - "grad_norm": 1.5412354469299316, - "learning_rate": 6.070150753768844e-05, - "loss": 5.4417, - "step": 39616 - }, - { - "epoch": 20.66075619295958, - "grad_norm": 1.5960217714309692, - "learning_rate": 6.0700502512562816e-05, - "loss": 5.077, - "step": 39617 - }, - { - "epoch": 20.6612777053455, - "grad_norm": 1.4516631364822388, - "learning_rate": 6.069949748743719e-05, - "loss": 5.6287, - "step": 39618 - }, - { - "epoch": 20.66179921773142, - "grad_norm": 1.5934832096099854, - "learning_rate": 6.0698492462311565e-05, - "loss": 5.3119, - "step": 39619 - }, - { - "epoch": 20.66232073011734, - "grad_norm": 1.5315513610839844, - "learning_rate": 6.069748743718593e-05, - "loss": 5.1281, - "step": 39620 - }, - { - "epoch": 20.66284224250326, - "grad_norm": 1.5744798183441162, - "learning_rate": 6.069648241206031e-05, - "loss": 5.7684, - "step": 39621 - }, - { - "epoch": 20.66336375488918, - "grad_norm": 1.48493230342865, - "learning_rate": 6.069547738693467e-05, - "loss": 5.0384, - "step": 39622 - }, - { - "epoch": 20.6638852672751, - "grad_norm": 1.6417945623397827, - "learning_rate": 6.069447236180905e-05, - "loss": 4.8651, - "step": 39623 - }, - { - "epoch": 20.664406779661018, - "grad_norm": 1.575063943862915, - "learning_rate": 6.069346733668342e-05, - "loss": 5.1701, - "step": 39624 - }, - { - "epoch": 20.664928292046937, - "grad_norm": 1.5158315896987915, - "learning_rate": 6.06924623115578e-05, - "loss": 5.2761, - "step": 39625 - }, - { - "epoch": 20.665449804432857, - "grad_norm": 1.4806312322616577, - "learning_rate": 6.069145728643216e-05, - "loss": 5.2551, - "step": 39626 - }, - { - "epoch": 20.665971316818773, - "grad_norm": 1.5863522291183472, - "learning_rate": 6.069045226130654e-05, - "loss": 4.9347, - "step": 39627 - }, - { - "epoch": 20.666492829204692, - "grad_norm": 1.6204432249069214, - "learning_rate": 6.0689447236180904e-05, - "loss": 5.3022, - "step": 39628 - }, - { - "epoch": 20.667014341590612, - "grad_norm": 1.4518715143203735, - "learning_rate": 6.0688442211055275e-05, - "loss": 5.591, - "step": 39629 - }, - { - "epoch": 20.66753585397653, - "grad_norm": 1.4831781387329102, - "learning_rate": 6.068743718592965e-05, - "loss": 4.9552, - "step": 39630 - }, - { - "epoch": 20.66805736636245, - "grad_norm": 1.4503012895584106, - "learning_rate": 6.068643216080402e-05, - "loss": 4.9653, - "step": 39631 - }, - { - "epoch": 20.66857887874837, - "grad_norm": 1.5947444438934326, - "learning_rate": 6.0685427135678395e-05, - "loss": 5.4755, - "step": 39632 - }, - { - "epoch": 20.66910039113429, - "grad_norm": 1.6464022397994995, - "learning_rate": 6.0684422110552766e-05, - "loss": 4.9038, - "step": 39633 - }, - { - "epoch": 20.66962190352021, - "grad_norm": 1.7467135190963745, - "learning_rate": 6.0683417085427144e-05, - "loss": 5.139, - "step": 39634 - }, - { - "epoch": 20.67014341590613, - "grad_norm": 1.4447052478790283, - "learning_rate": 6.068241206030151e-05, - "loss": 5.5703, - "step": 39635 - }, - { - "epoch": 20.670664928292048, - "grad_norm": 1.4708151817321777, - "learning_rate": 6.0681407035175886e-05, - "loss": 5.3737, - "step": 39636 - }, - { - "epoch": 20.671186440677968, - "grad_norm": 1.4892035722732544, - "learning_rate": 6.068040201005025e-05, - "loss": 4.665, - "step": 39637 - }, - { - "epoch": 20.671707953063887, - "grad_norm": 1.614014744758606, - "learning_rate": 6.067939698492463e-05, - "loss": 5.1746, - "step": 39638 - }, - { - "epoch": 20.672229465449803, - "grad_norm": 1.3913809061050415, - "learning_rate": 6.0678391959799e-05, - "loss": 5.2747, - "step": 39639 - }, - { - "epoch": 20.672750977835722, - "grad_norm": 1.4664220809936523, - "learning_rate": 6.067738693467338e-05, - "loss": 5.5587, - "step": 39640 - }, - { - "epoch": 20.673272490221642, - "grad_norm": 1.5246155261993408, - "learning_rate": 6.067638190954774e-05, - "loss": 5.223, - "step": 39641 - }, - { - "epoch": 20.67379400260756, - "grad_norm": 1.5164976119995117, - "learning_rate": 6.0675376884422106e-05, - "loss": 5.3623, - "step": 39642 - }, - { - "epoch": 20.67431551499348, - "grad_norm": 1.5193370580673218, - "learning_rate": 6.0674371859296484e-05, - "loss": 5.215, - "step": 39643 - }, - { - "epoch": 20.6748370273794, - "grad_norm": 1.384103536605835, - "learning_rate": 6.0673366834170855e-05, - "loss": 4.7754, - "step": 39644 - }, - { - "epoch": 20.67535853976532, - "grad_norm": 1.607710599899292, - "learning_rate": 6.067236180904523e-05, - "loss": 5.1621, - "step": 39645 - }, - { - "epoch": 20.67588005215124, - "grad_norm": 1.5798412561416626, - "learning_rate": 6.06713567839196e-05, - "loss": 4.9091, - "step": 39646 - }, - { - "epoch": 20.67640156453716, - "grad_norm": 1.4984991550445557, - "learning_rate": 6.0670351758793975e-05, - "loss": 5.3283, - "step": 39647 - }, - { - "epoch": 20.676923076923078, - "grad_norm": 1.456598162651062, - "learning_rate": 6.066934673366834e-05, - "loss": 5.2855, - "step": 39648 - }, - { - "epoch": 20.677444589308998, - "grad_norm": 1.545089602470398, - "learning_rate": 6.066834170854272e-05, - "loss": 5.2577, - "step": 39649 - }, - { - "epoch": 20.677966101694913, - "grad_norm": 1.6115305423736572, - "learning_rate": 6.066733668341709e-05, - "loss": 4.7761, - "step": 39650 - }, - { - "epoch": 20.678487614080833, - "grad_norm": 1.640951156616211, - "learning_rate": 6.0666331658291466e-05, - "loss": 5.0087, - "step": 39651 - }, - { - "epoch": 20.679009126466752, - "grad_norm": 1.422574520111084, - "learning_rate": 6.066532663316583e-05, - "loss": 5.6314, - "step": 39652 - }, - { - "epoch": 20.679530638852672, - "grad_norm": 1.5351743698120117, - "learning_rate": 6.066432160804021e-05, - "loss": 5.6608, - "step": 39653 - }, - { - "epoch": 20.68005215123859, - "grad_norm": 1.5877008438110352, - "learning_rate": 6.066331658291458e-05, - "loss": 5.2251, - "step": 39654 - }, - { - "epoch": 20.68057366362451, - "grad_norm": 1.6308757066726685, - "learning_rate": 6.066231155778894e-05, - "loss": 5.3004, - "step": 39655 - }, - { - "epoch": 20.68109517601043, - "grad_norm": 1.4891036748886108, - "learning_rate": 6.066130653266332e-05, - "loss": 5.2709, - "step": 39656 - }, - { - "epoch": 20.68161668839635, - "grad_norm": 1.5929555892944336, - "learning_rate": 6.0660301507537685e-05, - "loss": 5.1301, - "step": 39657 - }, - { - "epoch": 20.68213820078227, - "grad_norm": 1.4326093196868896, - "learning_rate": 6.065929648241206e-05, - "loss": 5.3955, - "step": 39658 - }, - { - "epoch": 20.68265971316819, - "grad_norm": 1.4973465204238892, - "learning_rate": 6.0658291457286434e-05, - "loss": 5.5967, - "step": 39659 - }, - { - "epoch": 20.683181225554108, - "grad_norm": 1.519844889640808, - "learning_rate": 6.065728643216081e-05, - "loss": 5.5867, - "step": 39660 - }, - { - "epoch": 20.683702737940028, - "grad_norm": 1.4781899452209473, - "learning_rate": 6.0656281407035176e-05, - "loss": 5.2477, - "step": 39661 - }, - { - "epoch": 20.684224250325947, - "grad_norm": 1.4774765968322754, - "learning_rate": 6.0655276381909554e-05, - "loss": 5.0375, - "step": 39662 - }, - { - "epoch": 20.684745762711863, - "grad_norm": 1.664771556854248, - "learning_rate": 6.065427135678392e-05, - "loss": 5.1633, - "step": 39663 - }, - { - "epoch": 20.685267275097782, - "grad_norm": 1.452271580696106, - "learning_rate": 6.0653266331658296e-05, - "loss": 5.2912, - "step": 39664 - }, - { - "epoch": 20.685788787483702, - "grad_norm": 1.5154359340667725, - "learning_rate": 6.065226130653267e-05, - "loss": 5.2144, - "step": 39665 - }, - { - "epoch": 20.68631029986962, - "grad_norm": 1.5261255502700806, - "learning_rate": 6.0651256281407045e-05, - "loss": 5.1983, - "step": 39666 - }, - { - "epoch": 20.68683181225554, - "grad_norm": 1.4827284812927246, - "learning_rate": 6.065025125628141e-05, - "loss": 5.6337, - "step": 39667 - }, - { - "epoch": 20.68735332464146, - "grad_norm": 1.419244647026062, - "learning_rate": 6.0649246231155774e-05, - "loss": 5.6312, - "step": 39668 - }, - { - "epoch": 20.68787483702738, - "grad_norm": 1.500741958618164, - "learning_rate": 6.064824120603015e-05, - "loss": 5.3013, - "step": 39669 - }, - { - "epoch": 20.6883963494133, - "grad_norm": 1.5659035444259644, - "learning_rate": 6.064723618090452e-05, - "loss": 5.4496, - "step": 39670 - }, - { - "epoch": 20.68891786179922, - "grad_norm": 1.4246485233306885, - "learning_rate": 6.06462311557789e-05, - "loss": 5.8158, - "step": 39671 - }, - { - "epoch": 20.689439374185138, - "grad_norm": 1.5327054262161255, - "learning_rate": 6.0645226130653265e-05, - "loss": 5.5889, - "step": 39672 - }, - { - "epoch": 20.689960886571058, - "grad_norm": 1.4768836498260498, - "learning_rate": 6.064422110552764e-05, - "loss": 5.3547, - "step": 39673 - }, - { - "epoch": 20.690482398956973, - "grad_norm": 1.4829355478286743, - "learning_rate": 6.064321608040201e-05, - "loss": 5.0484, - "step": 39674 - }, - { - "epoch": 20.691003911342893, - "grad_norm": 1.4919958114624023, - "learning_rate": 6.0642211055276384e-05, - "loss": 4.9077, - "step": 39675 - }, - { - "epoch": 20.691525423728812, - "grad_norm": 1.4404346942901611, - "learning_rate": 6.0641206030150756e-05, - "loss": 5.7267, - "step": 39676 - }, - { - "epoch": 20.692046936114732, - "grad_norm": 1.586681604385376, - "learning_rate": 6.064020100502513e-05, - "loss": 5.1418, - "step": 39677 - }, - { - "epoch": 20.69256844850065, - "grad_norm": 1.4936484098434448, - "learning_rate": 6.06391959798995e-05, - "loss": 5.6661, - "step": 39678 - }, - { - "epoch": 20.69308996088657, - "grad_norm": 1.5253725051879883, - "learning_rate": 6.0638190954773875e-05, - "loss": 5.4229, - "step": 39679 - }, - { - "epoch": 20.69361147327249, - "grad_norm": 1.5829837322235107, - "learning_rate": 6.0637185929648247e-05, - "loss": 5.115, - "step": 39680 - }, - { - "epoch": 20.69413298565841, - "grad_norm": 1.5386154651641846, - "learning_rate": 6.0636180904522624e-05, - "loss": 5.4345, - "step": 39681 - }, - { - "epoch": 20.69465449804433, - "grad_norm": 1.4490097761154175, - "learning_rate": 6.063517587939699e-05, - "loss": 5.1775, - "step": 39682 - }, - { - "epoch": 20.69517601043025, - "grad_norm": 1.5062490701675415, - "learning_rate": 6.063417085427135e-05, - "loss": 5.4783, - "step": 39683 - }, - { - "epoch": 20.695697522816168, - "grad_norm": 1.6646472215652466, - "learning_rate": 6.063316582914573e-05, - "loss": 5.4656, - "step": 39684 - }, - { - "epoch": 20.696219035202088, - "grad_norm": 1.5669032335281372, - "learning_rate": 6.06321608040201e-05, - "loss": 5.1794, - "step": 39685 - }, - { - "epoch": 20.696740547588004, - "grad_norm": 1.6104190349578857, - "learning_rate": 6.063115577889448e-05, - "loss": 4.7927, - "step": 39686 - }, - { - "epoch": 20.697262059973923, - "grad_norm": 1.4725980758666992, - "learning_rate": 6.0630150753768844e-05, - "loss": 5.1545, - "step": 39687 - }, - { - "epoch": 20.697783572359842, - "grad_norm": 1.4490851163864136, - "learning_rate": 6.062914572864322e-05, - "loss": 5.4966, - "step": 39688 - }, - { - "epoch": 20.698305084745762, - "grad_norm": 1.5122294425964355, - "learning_rate": 6.0628140703517586e-05, - "loss": 5.4112, - "step": 39689 - }, - { - "epoch": 20.69882659713168, - "grad_norm": 1.3854050636291504, - "learning_rate": 6.0627135678391964e-05, - "loss": 5.6632, - "step": 39690 - }, - { - "epoch": 20.6993481095176, - "grad_norm": 1.464016079902649, - "learning_rate": 6.0626130653266335e-05, - "loss": 5.1103, - "step": 39691 - }, - { - "epoch": 20.69986962190352, - "grad_norm": 1.5136443376541138, - "learning_rate": 6.062512562814071e-05, - "loss": 5.4228, - "step": 39692 - }, - { - "epoch": 20.70039113428944, - "grad_norm": 1.6263755559921265, - "learning_rate": 6.062412060301508e-05, - "loss": 5.2283, - "step": 39693 - }, - { - "epoch": 20.70091264667536, - "grad_norm": 1.5163757801055908, - "learning_rate": 6.0623115577889455e-05, - "loss": 5.1964, - "step": 39694 - }, - { - "epoch": 20.70143415906128, - "grad_norm": 1.481034755706787, - "learning_rate": 6.062211055276382e-05, - "loss": 5.2653, - "step": 39695 - }, - { - "epoch": 20.701955671447198, - "grad_norm": 1.5828278064727783, - "learning_rate": 6.062110552763819e-05, - "loss": 5.4427, - "step": 39696 - }, - { - "epoch": 20.702477183833118, - "grad_norm": 1.5008360147476196, - "learning_rate": 6.062010050251257e-05, - "loss": 4.5259, - "step": 39697 - }, - { - "epoch": 20.702998696219034, - "grad_norm": 1.4895949363708496, - "learning_rate": 6.061909547738693e-05, - "loss": 5.6325, - "step": 39698 - }, - { - "epoch": 20.703520208604953, - "grad_norm": 1.436837911605835, - "learning_rate": 6.061809045226131e-05, - "loss": 5.627, - "step": 39699 - }, - { - "epoch": 20.704041720990872, - "grad_norm": 1.5880860090255737, - "learning_rate": 6.061708542713568e-05, - "loss": 4.9563, - "step": 39700 - }, - { - "epoch": 20.704563233376792, - "grad_norm": 1.4658045768737793, - "learning_rate": 6.061608040201006e-05, - "loss": 5.315, - "step": 39701 - }, - { - "epoch": 20.70508474576271, - "grad_norm": 1.5733054876327515, - "learning_rate": 6.061507537688442e-05, - "loss": 5.1037, - "step": 39702 - }, - { - "epoch": 20.70560625814863, - "grad_norm": 1.5945769548416138, - "learning_rate": 6.06140703517588e-05, - "loss": 4.8891, - "step": 39703 - }, - { - "epoch": 20.70612777053455, - "grad_norm": 1.5164551734924316, - "learning_rate": 6.0613065326633165e-05, - "loss": 4.8761, - "step": 39704 - }, - { - "epoch": 20.70664928292047, - "grad_norm": 1.5165902376174927, - "learning_rate": 6.061206030150754e-05, - "loss": 5.2305, - "step": 39705 - }, - { - "epoch": 20.70717079530639, - "grad_norm": 1.4905699491500854, - "learning_rate": 6.0611055276381914e-05, - "loss": 5.3704, - "step": 39706 - }, - { - "epoch": 20.70769230769231, - "grad_norm": 1.4542635679244995, - "learning_rate": 6.061005025125629e-05, - "loss": 5.2978, - "step": 39707 - }, - { - "epoch": 20.708213820078228, - "grad_norm": 1.5128779411315918, - "learning_rate": 6.0609045226130656e-05, - "loss": 5.4578, - "step": 39708 - }, - { - "epoch": 20.708735332464148, - "grad_norm": 1.486722469329834, - "learning_rate": 6.060804020100502e-05, - "loss": 4.7348, - "step": 39709 - }, - { - "epoch": 20.709256844850064, - "grad_norm": 1.461548089981079, - "learning_rate": 6.06070351758794e-05, - "loss": 5.5692, - "step": 39710 - }, - { - "epoch": 20.709778357235983, - "grad_norm": 1.4298595190048218, - "learning_rate": 6.060603015075377e-05, - "loss": 5.455, - "step": 39711 - }, - { - "epoch": 20.710299869621903, - "grad_norm": 1.5788531303405762, - "learning_rate": 6.060502512562815e-05, - "loss": 4.9828, - "step": 39712 - }, - { - "epoch": 20.710821382007822, - "grad_norm": 1.4364399909973145, - "learning_rate": 6.060402010050251e-05, - "loss": 5.6243, - "step": 39713 - }, - { - "epoch": 20.71134289439374, - "grad_norm": 1.448679804801941, - "learning_rate": 6.060301507537689e-05, - "loss": 5.2822, - "step": 39714 - }, - { - "epoch": 20.71186440677966, - "grad_norm": 1.632962703704834, - "learning_rate": 6.0602010050251254e-05, - "loss": 5.2341, - "step": 39715 - }, - { - "epoch": 20.71238591916558, - "grad_norm": 1.5599056482315063, - "learning_rate": 6.060100502512563e-05, - "loss": 5.524, - "step": 39716 - }, - { - "epoch": 20.7129074315515, - "grad_norm": 1.5329740047454834, - "learning_rate": 6.06e-05, - "loss": 5.3756, - "step": 39717 - }, - { - "epoch": 20.71342894393742, - "grad_norm": 1.4626308679580688, - "learning_rate": 6.059899497487438e-05, - "loss": 5.1181, - "step": 39718 - }, - { - "epoch": 20.71395045632334, - "grad_norm": 1.4925694465637207, - "learning_rate": 6.0597989949748745e-05, - "loss": 5.2811, - "step": 39719 - }, - { - "epoch": 20.714471968709258, - "grad_norm": 1.5021424293518066, - "learning_rate": 6.059698492462312e-05, - "loss": 5.3948, - "step": 39720 - }, - { - "epoch": 20.714993481095178, - "grad_norm": 1.6545783281326294, - "learning_rate": 6.0595979899497494e-05, - "loss": 4.6826, - "step": 39721 - }, - { - "epoch": 20.715514993481094, - "grad_norm": 1.5768743753433228, - "learning_rate": 6.059497487437186e-05, - "loss": 5.5131, - "step": 39722 - }, - { - "epoch": 20.716036505867013, - "grad_norm": 1.390627145767212, - "learning_rate": 6.0593969849246236e-05, - "loss": 5.6471, - "step": 39723 - }, - { - "epoch": 20.716558018252933, - "grad_norm": 1.5367443561553955, - "learning_rate": 6.05929648241206e-05, - "loss": 4.9448, - "step": 39724 - }, - { - "epoch": 20.717079530638852, - "grad_norm": 1.5401952266693115, - "learning_rate": 6.059195979899498e-05, - "loss": 5.3741, - "step": 39725 - }, - { - "epoch": 20.71760104302477, - "grad_norm": 1.6512949466705322, - "learning_rate": 6.059095477386935e-05, - "loss": 5.0313, - "step": 39726 - }, - { - "epoch": 20.71812255541069, - "grad_norm": 1.5453531742095947, - "learning_rate": 6.058994974874373e-05, - "loss": 4.8017, - "step": 39727 - }, - { - "epoch": 20.71864406779661, - "grad_norm": 1.5704362392425537, - "learning_rate": 6.058894472361809e-05, - "loss": 5.5133, - "step": 39728 - }, - { - "epoch": 20.71916558018253, - "grad_norm": 1.4298028945922852, - "learning_rate": 6.058793969849247e-05, - "loss": 5.2659, - "step": 39729 - }, - { - "epoch": 20.71968709256845, - "grad_norm": 1.5677509307861328, - "learning_rate": 6.058693467336683e-05, - "loss": 5.5212, - "step": 39730 - }, - { - "epoch": 20.72020860495437, - "grad_norm": 1.5907855033874512, - "learning_rate": 6.058592964824121e-05, - "loss": 5.4314, - "step": 39731 - }, - { - "epoch": 20.72073011734029, - "grad_norm": 1.4736322164535522, - "learning_rate": 6.058492462311558e-05, - "loss": 4.6506, - "step": 39732 - }, - { - "epoch": 20.721251629726208, - "grad_norm": 1.6135705709457397, - "learning_rate": 6.058391959798996e-05, - "loss": 5.0766, - "step": 39733 - }, - { - "epoch": 20.721773142112124, - "grad_norm": 1.609515905380249, - "learning_rate": 6.0582914572864324e-05, - "loss": 5.2169, - "step": 39734 - }, - { - "epoch": 20.722294654498043, - "grad_norm": 1.5136406421661377, - "learning_rate": 6.058190954773869e-05, - "loss": 5.4938, - "step": 39735 - }, - { - "epoch": 20.722816166883963, - "grad_norm": 1.3771679401397705, - "learning_rate": 6.0580904522613066e-05, - "loss": 5.7415, - "step": 39736 - }, - { - "epoch": 20.723337679269882, - "grad_norm": 1.4625364542007446, - "learning_rate": 6.057989949748744e-05, - "loss": 5.3351, - "step": 39737 - }, - { - "epoch": 20.7238591916558, - "grad_norm": 1.4680311679840088, - "learning_rate": 6.0578894472361815e-05, - "loss": 5.7621, - "step": 39738 - }, - { - "epoch": 20.72438070404172, - "grad_norm": 1.4878965616226196, - "learning_rate": 6.057788944723618e-05, - "loss": 5.4067, - "step": 39739 - }, - { - "epoch": 20.72490221642764, - "grad_norm": 1.3818632364273071, - "learning_rate": 6.057688442211056e-05, - "loss": 5.5887, - "step": 39740 - }, - { - "epoch": 20.72542372881356, - "grad_norm": 1.4431909322738647, - "learning_rate": 6.057587939698493e-05, - "loss": 5.2527, - "step": 39741 - }, - { - "epoch": 20.72594524119948, - "grad_norm": 1.433445930480957, - "learning_rate": 6.0574874371859306e-05, - "loss": 5.1012, - "step": 39742 - }, - { - "epoch": 20.7264667535854, - "grad_norm": 1.4869837760925293, - "learning_rate": 6.057386934673367e-05, - "loss": 5.2846, - "step": 39743 - }, - { - "epoch": 20.72698826597132, - "grad_norm": 1.55743408203125, - "learning_rate": 6.057286432160805e-05, - "loss": 5.0319, - "step": 39744 - }, - { - "epoch": 20.727509778357238, - "grad_norm": 1.4954663515090942, - "learning_rate": 6.057185929648241e-05, - "loss": 5.3212, - "step": 39745 - }, - { - "epoch": 20.728031290743154, - "grad_norm": 1.5554485321044922, - "learning_rate": 6.057085427135679e-05, - "loss": 4.932, - "step": 39746 - }, - { - "epoch": 20.728552803129073, - "grad_norm": 1.6133321523666382, - "learning_rate": 6.056984924623116e-05, - "loss": 5.2647, - "step": 39747 - }, - { - "epoch": 20.729074315514993, - "grad_norm": 1.5690889358520508, - "learning_rate": 6.0568844221105525e-05, - "loss": 4.9635, - "step": 39748 - }, - { - "epoch": 20.729595827900912, - "grad_norm": 1.571123719215393, - "learning_rate": 6.05678391959799e-05, - "loss": 4.5981, - "step": 39749 - }, - { - "epoch": 20.73011734028683, - "grad_norm": 1.5257474184036255, - "learning_rate": 6.056683417085427e-05, - "loss": 5.3633, - "step": 39750 - }, - { - "epoch": 20.73063885267275, - "grad_norm": 1.4855064153671265, - "learning_rate": 6.0565829145728645e-05, - "loss": 5.6341, - "step": 39751 - }, - { - "epoch": 20.73116036505867, - "grad_norm": 1.6079655885696411, - "learning_rate": 6.0564824120603016e-05, - "loss": 5.0567, - "step": 39752 - }, - { - "epoch": 20.73168187744459, - "grad_norm": 1.5049333572387695, - "learning_rate": 6.0563819095477394e-05, - "loss": 5.5275, - "step": 39753 - }, - { - "epoch": 20.73220338983051, - "grad_norm": 1.526338815689087, - "learning_rate": 6.056281407035176e-05, - "loss": 5.2811, - "step": 39754 - }, - { - "epoch": 20.73272490221643, - "grad_norm": 1.5201102495193481, - "learning_rate": 6.0561809045226136e-05, - "loss": 5.5283, - "step": 39755 - }, - { - "epoch": 20.73324641460235, - "grad_norm": 1.6012699604034424, - "learning_rate": 6.05608040201005e-05, - "loss": 5.2551, - "step": 39756 - }, - { - "epoch": 20.733767926988264, - "grad_norm": 1.4903942346572876, - "learning_rate": 6.055979899497488e-05, - "loss": 5.5, - "step": 39757 - }, - { - "epoch": 20.734289439374184, - "grad_norm": 1.6086437702178955, - "learning_rate": 6.055879396984925e-05, - "loss": 5.5873, - "step": 39758 - }, - { - "epoch": 20.734810951760103, - "grad_norm": 1.5619721412658691, - "learning_rate": 6.055778894472363e-05, - "loss": 4.9673, - "step": 39759 - }, - { - "epoch": 20.735332464146023, - "grad_norm": 1.5650744438171387, - "learning_rate": 6.055678391959799e-05, - "loss": 4.3292, - "step": 39760 - }, - { - "epoch": 20.735853976531942, - "grad_norm": 1.517235279083252, - "learning_rate": 6.0555778894472356e-05, - "loss": 5.2056, - "step": 39761 - }, - { - "epoch": 20.73637548891786, - "grad_norm": 1.7537144422531128, - "learning_rate": 6.0554773869346734e-05, - "loss": 4.4085, - "step": 39762 - }, - { - "epoch": 20.73689700130378, - "grad_norm": 1.5310475826263428, - "learning_rate": 6.0553768844221105e-05, - "loss": 5.6475, - "step": 39763 - }, - { - "epoch": 20.7374185136897, - "grad_norm": 1.5442466735839844, - "learning_rate": 6.055276381909548e-05, - "loss": 5.3191, - "step": 39764 - }, - { - "epoch": 20.73794002607562, - "grad_norm": 1.5191656351089478, - "learning_rate": 6.055175879396985e-05, - "loss": 5.4069, - "step": 39765 - }, - { - "epoch": 20.73846153846154, - "grad_norm": 1.4222437143325806, - "learning_rate": 6.0550753768844225e-05, - "loss": 5.7249, - "step": 39766 - }, - { - "epoch": 20.73898305084746, - "grad_norm": 1.4908653497695923, - "learning_rate": 6.0549748743718596e-05, - "loss": 5.613, - "step": 39767 - }, - { - "epoch": 20.73950456323338, - "grad_norm": 1.5584337711334229, - "learning_rate": 6.0548743718592974e-05, - "loss": 5.3289, - "step": 39768 - }, - { - "epoch": 20.740026075619298, - "grad_norm": 1.5817900896072388, - "learning_rate": 6.054773869346734e-05, - "loss": 4.9118, - "step": 39769 - }, - { - "epoch": 20.740547588005214, - "grad_norm": 1.4674632549285889, - "learning_rate": 6.0546733668341716e-05, - "loss": 5.5338, - "step": 39770 - }, - { - "epoch": 20.741069100391133, - "grad_norm": 1.5344324111938477, - "learning_rate": 6.054572864321608e-05, - "loss": 5.5126, - "step": 39771 - }, - { - "epoch": 20.741590612777053, - "grad_norm": 1.515645146369934, - "learning_rate": 6.054472361809046e-05, - "loss": 5.2481, - "step": 39772 - }, - { - "epoch": 20.742112125162972, - "grad_norm": 1.47941255569458, - "learning_rate": 6.054371859296483e-05, - "loss": 5.6244, - "step": 39773 - }, - { - "epoch": 20.74263363754889, - "grad_norm": 1.5873017311096191, - "learning_rate": 6.054271356783921e-05, - "loss": 5.1201, - "step": 39774 - }, - { - "epoch": 20.74315514993481, - "grad_norm": 1.5855456590652466, - "learning_rate": 6.054170854271357e-05, - "loss": 5.1248, - "step": 39775 - }, - { - "epoch": 20.74367666232073, - "grad_norm": 1.5980288982391357, - "learning_rate": 6.0540703517587935e-05, - "loss": 5.0643, - "step": 39776 - }, - { - "epoch": 20.74419817470665, - "grad_norm": 1.5541105270385742, - "learning_rate": 6.053969849246231e-05, - "loss": 5.4156, - "step": 39777 - }, - { - "epoch": 20.74471968709257, - "grad_norm": 1.4421323537826538, - "learning_rate": 6.0538693467336684e-05, - "loss": 5.5487, - "step": 39778 - }, - { - "epoch": 20.74524119947849, - "grad_norm": 1.4712700843811035, - "learning_rate": 6.053768844221106e-05, - "loss": 5.399, - "step": 39779 - }, - { - "epoch": 20.74576271186441, - "grad_norm": 1.466018557548523, - "learning_rate": 6.0536683417085426e-05, - "loss": 4.8383, - "step": 39780 - }, - { - "epoch": 20.746284224250324, - "grad_norm": 1.5672438144683838, - "learning_rate": 6.0535678391959804e-05, - "loss": 5.0133, - "step": 39781 - }, - { - "epoch": 20.746805736636244, - "grad_norm": 1.5790257453918457, - "learning_rate": 6.053467336683417e-05, - "loss": 5.1544, - "step": 39782 - }, - { - "epoch": 20.747327249022163, - "grad_norm": 1.5260897874832153, - "learning_rate": 6.0533668341708546e-05, - "loss": 5.2601, - "step": 39783 - }, - { - "epoch": 20.747848761408083, - "grad_norm": 1.5079429149627686, - "learning_rate": 6.053266331658292e-05, - "loss": 5.343, - "step": 39784 - }, - { - "epoch": 20.748370273794002, - "grad_norm": 1.4808851480484009, - "learning_rate": 6.0531658291457295e-05, - "loss": 5.5798, - "step": 39785 - }, - { - "epoch": 20.74889178617992, - "grad_norm": 1.553613305091858, - "learning_rate": 6.053065326633166e-05, - "loss": 5.0639, - "step": 39786 - }, - { - "epoch": 20.74941329856584, - "grad_norm": 1.5047022104263306, - "learning_rate": 6.052964824120604e-05, - "loss": 5.0546, - "step": 39787 - }, - { - "epoch": 20.74993481095176, - "grad_norm": 1.512042760848999, - "learning_rate": 6.052864321608041e-05, - "loss": 5.3988, - "step": 39788 - }, - { - "epoch": 20.75045632333768, - "grad_norm": 1.5827561616897583, - "learning_rate": 6.052763819095477e-05, - "loss": 5.4751, - "step": 39789 - }, - { - "epoch": 20.7509778357236, - "grad_norm": 1.5344197750091553, - "learning_rate": 6.052663316582915e-05, - "loss": 5.6466, - "step": 39790 - }, - { - "epoch": 20.75149934810952, - "grad_norm": 1.4560250043869019, - "learning_rate": 6.0525628140703515e-05, - "loss": 5.7269, - "step": 39791 - }, - { - "epoch": 20.75202086049544, - "grad_norm": 1.6703704595565796, - "learning_rate": 6.052462311557789e-05, - "loss": 5.0548, - "step": 39792 - }, - { - "epoch": 20.752542372881354, - "grad_norm": 1.6121684312820435, - "learning_rate": 6.0523618090452263e-05, - "loss": 5.3991, - "step": 39793 - }, - { - "epoch": 20.753063885267274, - "grad_norm": 1.5770491361618042, - "learning_rate": 6.052261306532664e-05, - "loss": 4.9547, - "step": 39794 - }, - { - "epoch": 20.753585397653193, - "grad_norm": 1.5843219757080078, - "learning_rate": 6.0521608040201006e-05, - "loss": 5.4891, - "step": 39795 - }, - { - "epoch": 20.754106910039113, - "grad_norm": 1.429232120513916, - "learning_rate": 6.0520603015075383e-05, - "loss": 4.9084, - "step": 39796 - }, - { - "epoch": 20.754628422425032, - "grad_norm": 1.50742769241333, - "learning_rate": 6.051959798994975e-05, - "loss": 5.1303, - "step": 39797 - }, - { - "epoch": 20.75514993481095, - "grad_norm": 1.4886915683746338, - "learning_rate": 6.0518592964824126e-05, - "loss": 5.1417, - "step": 39798 - }, - { - "epoch": 20.75567144719687, - "grad_norm": 1.5768355131149292, - "learning_rate": 6.0517587939698497e-05, - "loss": 5.4628, - "step": 39799 - }, - { - "epoch": 20.75619295958279, - "grad_norm": 1.484576940536499, - "learning_rate": 6.0516582914572874e-05, - "loss": 5.7427, - "step": 39800 - }, - { - "epoch": 20.75671447196871, - "grad_norm": 1.456667423248291, - "learning_rate": 6.051557788944724e-05, - "loss": 5.041, - "step": 39801 - }, - { - "epoch": 20.75723598435463, - "grad_norm": 1.5110745429992676, - "learning_rate": 6.05145728643216e-05, - "loss": 5.2433, - "step": 39802 - }, - { - "epoch": 20.75775749674055, - "grad_norm": 1.4115207195281982, - "learning_rate": 6.051356783919598e-05, - "loss": 5.4306, - "step": 39803 - }, - { - "epoch": 20.75827900912647, - "grad_norm": 1.579367995262146, - "learning_rate": 6.051256281407035e-05, - "loss": 5.2368, - "step": 39804 - }, - { - "epoch": 20.758800521512384, - "grad_norm": 1.4721304178237915, - "learning_rate": 6.051155778894473e-05, - "loss": 5.2844, - "step": 39805 - }, - { - "epoch": 20.759322033898304, - "grad_norm": 1.4999877214431763, - "learning_rate": 6.0510552763819094e-05, - "loss": 5.6235, - "step": 39806 - }, - { - "epoch": 20.759843546284223, - "grad_norm": 1.6428139209747314, - "learning_rate": 6.050954773869347e-05, - "loss": 5.3177, - "step": 39807 - }, - { - "epoch": 20.760365058670143, - "grad_norm": 1.550929307937622, - "learning_rate": 6.050854271356784e-05, - "loss": 5.2686, - "step": 39808 - }, - { - "epoch": 20.760886571056062, - "grad_norm": 1.4819782972335815, - "learning_rate": 6.050753768844222e-05, - "loss": 5.5498, - "step": 39809 - }, - { - "epoch": 20.76140808344198, - "grad_norm": 1.4415926933288574, - "learning_rate": 6.0506532663316585e-05, - "loss": 5.045, - "step": 39810 - }, - { - "epoch": 20.7619295958279, - "grad_norm": 1.4308089017868042, - "learning_rate": 6.050552763819096e-05, - "loss": 5.5597, - "step": 39811 - }, - { - "epoch": 20.76245110821382, - "grad_norm": 1.5281836986541748, - "learning_rate": 6.050452261306533e-05, - "loss": 5.6835, - "step": 39812 - }, - { - "epoch": 20.76297262059974, - "grad_norm": 1.6392236948013306, - "learning_rate": 6.0503517587939705e-05, - "loss": 4.9248, - "step": 39813 - }, - { - "epoch": 20.76349413298566, - "grad_norm": 1.5648926496505737, - "learning_rate": 6.0502512562814076e-05, - "loss": 5.3039, - "step": 39814 - }, - { - "epoch": 20.76401564537158, - "grad_norm": 1.4695162773132324, - "learning_rate": 6.050150753768844e-05, - "loss": 5.2771, - "step": 39815 - }, - { - "epoch": 20.7645371577575, - "grad_norm": 1.455304741859436, - "learning_rate": 6.050050251256282e-05, - "loss": 5.4062, - "step": 39816 - }, - { - "epoch": 20.765058670143414, - "grad_norm": 1.500754952430725, - "learning_rate": 6.049949748743718e-05, - "loss": 5.4293, - "step": 39817 - }, - { - "epoch": 20.765580182529334, - "grad_norm": 1.6058802604675293, - "learning_rate": 6.049849246231156e-05, - "loss": 5.4247, - "step": 39818 - }, - { - "epoch": 20.766101694915253, - "grad_norm": 1.5973701477050781, - "learning_rate": 6.049748743718593e-05, - "loss": 5.1767, - "step": 39819 - }, - { - "epoch": 20.766623207301173, - "grad_norm": 1.5378121137619019, - "learning_rate": 6.049648241206031e-05, - "loss": 5.4508, - "step": 39820 - }, - { - "epoch": 20.767144719687092, - "grad_norm": 1.4487895965576172, - "learning_rate": 6.049547738693467e-05, - "loss": 5.385, - "step": 39821 - }, - { - "epoch": 20.76766623207301, - "grad_norm": 1.4878191947937012, - "learning_rate": 6.049447236180905e-05, - "loss": 5.0518, - "step": 39822 - }, - { - "epoch": 20.76818774445893, - "grad_norm": 1.5537110567092896, - "learning_rate": 6.0493467336683415e-05, - "loss": 5.254, - "step": 39823 - }, - { - "epoch": 20.76870925684485, - "grad_norm": 1.5708539485931396, - "learning_rate": 6.049246231155779e-05, - "loss": 5.5589, - "step": 39824 - }, - { - "epoch": 20.76923076923077, - "grad_norm": 1.5808264017105103, - "learning_rate": 6.0491457286432164e-05, - "loss": 4.7519, - "step": 39825 - }, - { - "epoch": 20.76975228161669, - "grad_norm": 1.555550217628479, - "learning_rate": 6.049045226130654e-05, - "loss": 5.5497, - "step": 39826 - }, - { - "epoch": 20.77027379400261, - "grad_norm": 1.6048319339752197, - "learning_rate": 6.0489447236180906e-05, - "loss": 5.4876, - "step": 39827 - }, - { - "epoch": 20.77079530638853, - "grad_norm": 1.5714980363845825, - "learning_rate": 6.048844221105527e-05, - "loss": 5.2624, - "step": 39828 - }, - { - "epoch": 20.771316818774444, - "grad_norm": 1.6500221490859985, - "learning_rate": 6.048743718592965e-05, - "loss": 5.3867, - "step": 39829 - }, - { - "epoch": 20.771838331160364, - "grad_norm": 1.4877474308013916, - "learning_rate": 6.048643216080402e-05, - "loss": 5.7078, - "step": 39830 - }, - { - "epoch": 20.772359843546283, - "grad_norm": 1.5882434844970703, - "learning_rate": 6.04854271356784e-05, - "loss": 5.1284, - "step": 39831 - }, - { - "epoch": 20.772881355932203, - "grad_norm": 1.5127390623092651, - "learning_rate": 6.048442211055276e-05, - "loss": 5.0011, - "step": 39832 - }, - { - "epoch": 20.773402868318122, - "grad_norm": 1.5394182205200195, - "learning_rate": 6.048341708542714e-05, - "loss": 5.4263, - "step": 39833 - }, - { - "epoch": 20.77392438070404, - "grad_norm": 1.508617639541626, - "learning_rate": 6.048241206030151e-05, - "loss": 5.3419, - "step": 39834 - }, - { - "epoch": 20.77444589308996, - "grad_norm": 1.4134243726730347, - "learning_rate": 6.048140703517589e-05, - "loss": 5.7115, - "step": 39835 - }, - { - "epoch": 20.77496740547588, - "grad_norm": 1.4994351863861084, - "learning_rate": 6.048040201005025e-05, - "loss": 5.4973, - "step": 39836 - }, - { - "epoch": 20.7754889178618, - "grad_norm": 1.4198002815246582, - "learning_rate": 6.047939698492463e-05, - "loss": 5.2973, - "step": 39837 - }, - { - "epoch": 20.77601043024772, - "grad_norm": 1.5055568218231201, - "learning_rate": 6.0478391959798995e-05, - "loss": 5.1774, - "step": 39838 - }, - { - "epoch": 20.77653194263364, - "grad_norm": 1.4391368627548218, - "learning_rate": 6.047738693467337e-05, - "loss": 5.7397, - "step": 39839 - }, - { - "epoch": 20.777053455019555, - "grad_norm": 1.5909496545791626, - "learning_rate": 6.0476381909547744e-05, - "loss": 5.1295, - "step": 39840 - }, - { - "epoch": 20.777574967405474, - "grad_norm": 1.5903970003128052, - "learning_rate": 6.047537688442211e-05, - "loss": 5.135, - "step": 39841 - }, - { - "epoch": 20.778096479791394, - "grad_norm": 1.4708921909332275, - "learning_rate": 6.0474371859296486e-05, - "loss": 5.5788, - "step": 39842 - }, - { - "epoch": 20.778617992177313, - "grad_norm": 1.3997968435287476, - "learning_rate": 6.047336683417085e-05, - "loss": 5.6846, - "step": 39843 - }, - { - "epoch": 20.779139504563233, - "grad_norm": 1.6252117156982422, - "learning_rate": 6.047236180904523e-05, - "loss": 5.4533, - "step": 39844 - }, - { - "epoch": 20.779661016949152, - "grad_norm": 1.515307068824768, - "learning_rate": 6.04713567839196e-05, - "loss": 5.4832, - "step": 39845 - }, - { - "epoch": 20.78018252933507, - "grad_norm": 1.7766960859298706, - "learning_rate": 6.047035175879398e-05, - "loss": 5.4787, - "step": 39846 - }, - { - "epoch": 20.78070404172099, - "grad_norm": 1.410578727722168, - "learning_rate": 6.046934673366834e-05, - "loss": 5.664, - "step": 39847 - }, - { - "epoch": 20.78122555410691, - "grad_norm": 1.4207721948623657, - "learning_rate": 6.046834170854272e-05, - "loss": 5.5757, - "step": 39848 - }, - { - "epoch": 20.78174706649283, - "grad_norm": 1.5073217153549194, - "learning_rate": 6.046733668341708e-05, - "loss": 5.3726, - "step": 39849 - }, - { - "epoch": 20.78226857887875, - "grad_norm": 1.5117231607437134, - "learning_rate": 6.046633165829146e-05, - "loss": 5.3304, - "step": 39850 - }, - { - "epoch": 20.78279009126467, - "grad_norm": 1.631959080696106, - "learning_rate": 6.046532663316583e-05, - "loss": 4.6168, - "step": 39851 - }, - { - "epoch": 20.78331160365059, - "grad_norm": 1.6062592267990112, - "learning_rate": 6.046432160804021e-05, - "loss": 5.378, - "step": 39852 - }, - { - "epoch": 20.783833116036504, - "grad_norm": 1.61146879196167, - "learning_rate": 6.0463316582914574e-05, - "loss": 5.3774, - "step": 39853 - }, - { - "epoch": 20.784354628422424, - "grad_norm": 1.574057698249817, - "learning_rate": 6.0462311557788945e-05, - "loss": 5.5193, - "step": 39854 - }, - { - "epoch": 20.784876140808343, - "grad_norm": 1.5598788261413574, - "learning_rate": 6.046130653266332e-05, - "loss": 5.188, - "step": 39855 - }, - { - "epoch": 20.785397653194263, - "grad_norm": 1.4512594938278198, - "learning_rate": 6.046030150753769e-05, - "loss": 5.465, - "step": 39856 - }, - { - "epoch": 20.785919165580182, - "grad_norm": 1.619172215461731, - "learning_rate": 6.0459296482412065e-05, - "loss": 4.7827, - "step": 39857 - }, - { - "epoch": 20.7864406779661, - "grad_norm": 1.5253984928131104, - "learning_rate": 6.045829145728643e-05, - "loss": 5.6884, - "step": 39858 - }, - { - "epoch": 20.78696219035202, - "grad_norm": 1.5285894870758057, - "learning_rate": 6.045728643216081e-05, - "loss": 5.343, - "step": 39859 - }, - { - "epoch": 20.78748370273794, - "grad_norm": 1.4380850791931152, - "learning_rate": 6.045628140703518e-05, - "loss": 5.4463, - "step": 39860 - }, - { - "epoch": 20.78800521512386, - "grad_norm": 1.7334266901016235, - "learning_rate": 6.0455276381909556e-05, - "loss": 4.3763, - "step": 39861 - }, - { - "epoch": 20.78852672750978, - "grad_norm": 1.4341411590576172, - "learning_rate": 6.045427135678392e-05, - "loss": 5.4011, - "step": 39862 - }, - { - "epoch": 20.7890482398957, - "grad_norm": 1.5777955055236816, - "learning_rate": 6.04532663316583e-05, - "loss": 5.4974, - "step": 39863 - }, - { - "epoch": 20.789569752281615, - "grad_norm": 1.4106073379516602, - "learning_rate": 6.045226130653266e-05, - "loss": 5.5509, - "step": 39864 - }, - { - "epoch": 20.790091264667534, - "grad_norm": 1.4069663286209106, - "learning_rate": 6.045125628140704e-05, - "loss": 5.1899, - "step": 39865 - }, - { - "epoch": 20.790612777053454, - "grad_norm": 1.4910531044006348, - "learning_rate": 6.045025125628141e-05, - "loss": 5.0952, - "step": 39866 - }, - { - "epoch": 20.791134289439373, - "grad_norm": 1.4396672248840332, - "learning_rate": 6.044924623115579e-05, - "loss": 5.1368, - "step": 39867 - }, - { - "epoch": 20.791655801825293, - "grad_norm": 1.5694156885147095, - "learning_rate": 6.044824120603015e-05, - "loss": 5.2465, - "step": 39868 - }, - { - "epoch": 20.792177314211212, - "grad_norm": 1.3601713180541992, - "learning_rate": 6.044723618090452e-05, - "loss": 4.721, - "step": 39869 - }, - { - "epoch": 20.79269882659713, - "grad_norm": 1.3907192945480347, - "learning_rate": 6.0446231155778895e-05, - "loss": 5.4633, - "step": 39870 - }, - { - "epoch": 20.79322033898305, - "grad_norm": 1.4873064756393433, - "learning_rate": 6.0445226130653266e-05, - "loss": 5.6899, - "step": 39871 - }, - { - "epoch": 20.79374185136897, - "grad_norm": 1.4381142854690552, - "learning_rate": 6.0444221105527644e-05, - "loss": 5.5967, - "step": 39872 - }, - { - "epoch": 20.79426336375489, - "grad_norm": 1.4789106845855713, - "learning_rate": 6.044321608040201e-05, - "loss": 4.2494, - "step": 39873 - }, - { - "epoch": 20.79478487614081, - "grad_norm": 1.672726035118103, - "learning_rate": 6.0442211055276386e-05, - "loss": 4.83, - "step": 39874 - }, - { - "epoch": 20.79530638852673, - "grad_norm": 1.6276755332946777, - "learning_rate": 6.044120603015076e-05, - "loss": 5.1632, - "step": 39875 - }, - { - "epoch": 20.795827900912645, - "grad_norm": 1.5594706535339355, - "learning_rate": 6.0440201005025135e-05, - "loss": 5.4431, - "step": 39876 - }, - { - "epoch": 20.796349413298564, - "grad_norm": 1.6361676454544067, - "learning_rate": 6.04391959798995e-05, - "loss": 5.1623, - "step": 39877 - }, - { - "epoch": 20.796870925684484, - "grad_norm": 1.5639970302581787, - "learning_rate": 6.043819095477388e-05, - "loss": 4.7074, - "step": 39878 - }, - { - "epoch": 20.797392438070403, - "grad_norm": 1.642386794090271, - "learning_rate": 6.043718592964824e-05, - "loss": 4.812, - "step": 39879 - }, - { - "epoch": 20.797913950456323, - "grad_norm": 1.4882620573043823, - "learning_rate": 6.043618090452262e-05, - "loss": 5.4587, - "step": 39880 - }, - { - "epoch": 20.798435462842242, - "grad_norm": 1.4787524938583374, - "learning_rate": 6.043517587939699e-05, - "loss": 5.3744, - "step": 39881 - }, - { - "epoch": 20.798956975228162, - "grad_norm": 1.483515977859497, - "learning_rate": 6.0434170854271355e-05, - "loss": 5.019, - "step": 39882 - }, - { - "epoch": 20.79947848761408, - "grad_norm": 1.5972687005996704, - "learning_rate": 6.043316582914573e-05, - "loss": 5.2904, - "step": 39883 - }, - { - "epoch": 20.8, - "grad_norm": 1.4910686016082764, - "learning_rate": 6.04321608040201e-05, - "loss": 5.6277, - "step": 39884 - }, - { - "epoch": 20.80052151238592, - "grad_norm": 1.4789589643478394, - "learning_rate": 6.0431155778894475e-05, - "loss": 5.5252, - "step": 39885 - }, - { - "epoch": 20.80104302477184, - "grad_norm": 1.5255082845687866, - "learning_rate": 6.0430150753768846e-05, - "loss": 4.5761, - "step": 39886 - }, - { - "epoch": 20.80156453715776, - "grad_norm": 1.440598487854004, - "learning_rate": 6.0429145728643224e-05, - "loss": 5.2248, - "step": 39887 - }, - { - "epoch": 20.802086049543675, - "grad_norm": 1.4021072387695312, - "learning_rate": 6.042814070351759e-05, - "loss": 5.3205, - "step": 39888 - }, - { - "epoch": 20.802607561929594, - "grad_norm": 1.4987972974777222, - "learning_rate": 6.0427135678391966e-05, - "loss": 5.4646, - "step": 39889 - }, - { - "epoch": 20.803129074315514, - "grad_norm": 1.49226713180542, - "learning_rate": 6.042613065326633e-05, - "loss": 5.0423, - "step": 39890 - }, - { - "epoch": 20.803650586701433, - "grad_norm": 1.5054028034210205, - "learning_rate": 6.042512562814071e-05, - "loss": 5.5228, - "step": 39891 - }, - { - "epoch": 20.804172099087353, - "grad_norm": 1.6311123371124268, - "learning_rate": 6.042412060301508e-05, - "loss": 5.1491, - "step": 39892 - }, - { - "epoch": 20.804693611473272, - "grad_norm": 1.6004329919815063, - "learning_rate": 6.042311557788946e-05, - "loss": 5.3356, - "step": 39893 - }, - { - "epoch": 20.805215123859192, - "grad_norm": 1.5624935626983643, - "learning_rate": 6.042211055276382e-05, - "loss": 4.4504, - "step": 39894 - }, - { - "epoch": 20.80573663624511, - "grad_norm": 1.4448143243789673, - "learning_rate": 6.042110552763819e-05, - "loss": 5.4986, - "step": 39895 - }, - { - "epoch": 20.80625814863103, - "grad_norm": 1.511906623840332, - "learning_rate": 6.042010050251257e-05, - "loss": 4.5871, - "step": 39896 - }, - { - "epoch": 20.80677966101695, - "grad_norm": 1.5463826656341553, - "learning_rate": 6.0419095477386934e-05, - "loss": 5.2836, - "step": 39897 - }, - { - "epoch": 20.80730117340287, - "grad_norm": 1.5231024026870728, - "learning_rate": 6.041809045226131e-05, - "loss": 5.3128, - "step": 39898 - }, - { - "epoch": 20.80782268578879, - "grad_norm": 1.4506170749664307, - "learning_rate": 6.0417085427135676e-05, - "loss": 5.4482, - "step": 39899 - }, - { - "epoch": 20.808344198174705, - "grad_norm": 1.4772603511810303, - "learning_rate": 6.0416080402010054e-05, - "loss": 5.052, - "step": 39900 - }, - { - "epoch": 20.808865710560625, - "grad_norm": 1.4169104099273682, - "learning_rate": 6.0415075376884425e-05, - "loss": 5.3644, - "step": 39901 - }, - { - "epoch": 20.809387222946544, - "grad_norm": 1.5253602266311646, - "learning_rate": 6.04140703517588e-05, - "loss": 4.8209, - "step": 39902 - }, - { - "epoch": 20.809908735332463, - "grad_norm": 1.4498074054718018, - "learning_rate": 6.041306532663317e-05, - "loss": 5.8008, - "step": 39903 - }, - { - "epoch": 20.810430247718383, - "grad_norm": 1.45956289768219, - "learning_rate": 6.0412060301507545e-05, - "loss": 5.6856, - "step": 39904 - }, - { - "epoch": 20.810951760104302, - "grad_norm": 1.5131890773773193, - "learning_rate": 6.041105527638191e-05, - "loss": 5.4971, - "step": 39905 - }, - { - "epoch": 20.811473272490222, - "grad_norm": 1.450623631477356, - "learning_rate": 6.041005025125629e-05, - "loss": 5.3875, - "step": 39906 - }, - { - "epoch": 20.81199478487614, - "grad_norm": 1.482239007949829, - "learning_rate": 6.040904522613066e-05, - "loss": 5.0981, - "step": 39907 - }, - { - "epoch": 20.81251629726206, - "grad_norm": 1.5412629842758179, - "learning_rate": 6.040804020100502e-05, - "loss": 5.45, - "step": 39908 - }, - { - "epoch": 20.81303780964798, - "grad_norm": 1.5341947078704834, - "learning_rate": 6.04070351758794e-05, - "loss": 5.4021, - "step": 39909 - }, - { - "epoch": 20.8135593220339, - "grad_norm": 1.4398082494735718, - "learning_rate": 6.0406030150753765e-05, - "loss": 4.7041, - "step": 39910 - }, - { - "epoch": 20.81408083441982, - "grad_norm": 1.5265710353851318, - "learning_rate": 6.040502512562814e-05, - "loss": 5.397, - "step": 39911 - }, - { - "epoch": 20.814602346805735, - "grad_norm": 1.4480386972427368, - "learning_rate": 6.0404020100502513e-05, - "loss": 5.1887, - "step": 39912 - }, - { - "epoch": 20.815123859191655, - "grad_norm": 1.6141515970230103, - "learning_rate": 6.040301507537689e-05, - "loss": 4.79, - "step": 39913 - }, - { - "epoch": 20.815645371577574, - "grad_norm": 1.4284404516220093, - "learning_rate": 6.0402010050251256e-05, - "loss": 5.6329, - "step": 39914 - }, - { - "epoch": 20.816166883963493, - "grad_norm": 1.6170051097869873, - "learning_rate": 6.0401005025125633e-05, - "loss": 4.7098, - "step": 39915 - }, - { - "epoch": 20.816688396349413, - "grad_norm": 1.4958391189575195, - "learning_rate": 6.04e-05, - "loss": 5.4194, - "step": 39916 - }, - { - "epoch": 20.817209908735332, - "grad_norm": 1.4733611345291138, - "learning_rate": 6.0398994974874376e-05, - "loss": 4.8584, - "step": 39917 - }, - { - "epoch": 20.817731421121252, - "grad_norm": 1.4669216871261597, - "learning_rate": 6.0397989949748747e-05, - "loss": 5.0529, - "step": 39918 - }, - { - "epoch": 20.81825293350717, - "grad_norm": 1.3859708309173584, - "learning_rate": 6.0396984924623124e-05, - "loss": 5.6833, - "step": 39919 - }, - { - "epoch": 20.81877444589309, - "grad_norm": 1.476605772972107, - "learning_rate": 6.039597989949749e-05, - "loss": 5.5159, - "step": 39920 - }, - { - "epoch": 20.81929595827901, - "grad_norm": 1.3268121480941772, - "learning_rate": 6.039497487437186e-05, - "loss": 5.0258, - "step": 39921 - }, - { - "epoch": 20.81981747066493, - "grad_norm": 1.4842336177825928, - "learning_rate": 6.039396984924624e-05, - "loss": 5.4764, - "step": 39922 - }, - { - "epoch": 20.820338983050846, - "grad_norm": 1.4540526866912842, - "learning_rate": 6.03929648241206e-05, - "loss": 5.2722, - "step": 39923 - }, - { - "epoch": 20.820860495436765, - "grad_norm": 1.599642276763916, - "learning_rate": 6.039195979899498e-05, - "loss": 5.1843, - "step": 39924 - }, - { - "epoch": 20.821382007822685, - "grad_norm": 1.591845154762268, - "learning_rate": 6.0390954773869344e-05, - "loss": 5.0244, - "step": 39925 - }, - { - "epoch": 20.821903520208604, - "grad_norm": 1.504915714263916, - "learning_rate": 6.038994974874372e-05, - "loss": 5.058, - "step": 39926 - }, - { - "epoch": 20.822425032594523, - "grad_norm": 1.4430899620056152, - "learning_rate": 6.038894472361809e-05, - "loss": 5.4913, - "step": 39927 - }, - { - "epoch": 20.822946544980443, - "grad_norm": 1.511671543121338, - "learning_rate": 6.038793969849247e-05, - "loss": 5.2106, - "step": 39928 - }, - { - "epoch": 20.823468057366362, - "grad_norm": 1.545095443725586, - "learning_rate": 6.0386934673366835e-05, - "loss": 4.6872, - "step": 39929 - }, - { - "epoch": 20.823989569752282, - "grad_norm": 1.5059067010879517, - "learning_rate": 6.038592964824121e-05, - "loss": 5.1071, - "step": 39930 - }, - { - "epoch": 20.8245110821382, - "grad_norm": 1.4671326875686646, - "learning_rate": 6.038492462311558e-05, - "loss": 4.4065, - "step": 39931 - }, - { - "epoch": 20.82503259452412, - "grad_norm": 1.5128850936889648, - "learning_rate": 6.0383919597989955e-05, - "loss": 5.0276, - "step": 39932 - }, - { - "epoch": 20.82555410691004, - "grad_norm": 1.4708514213562012, - "learning_rate": 6.0382914572864326e-05, - "loss": 5.5917, - "step": 39933 - }, - { - "epoch": 20.82607561929596, - "grad_norm": 1.5712562799453735, - "learning_rate": 6.038190954773869e-05, - "loss": 5.2616, - "step": 39934 - }, - { - "epoch": 20.82659713168188, - "grad_norm": 1.564740777015686, - "learning_rate": 6.038090452261307e-05, - "loss": 4.8635, - "step": 39935 - }, - { - "epoch": 20.827118644067795, - "grad_norm": 1.4187517166137695, - "learning_rate": 6.037989949748743e-05, - "loss": 5.3705, - "step": 39936 - }, - { - "epoch": 20.827640156453715, - "grad_norm": 1.506408929824829, - "learning_rate": 6.037889447236181e-05, - "loss": 5.403, - "step": 39937 - }, - { - "epoch": 20.828161668839634, - "grad_norm": 1.4963879585266113, - "learning_rate": 6.037788944723618e-05, - "loss": 5.3444, - "step": 39938 - }, - { - "epoch": 20.828683181225554, - "grad_norm": 1.688828945159912, - "learning_rate": 6.037688442211056e-05, - "loss": 4.7799, - "step": 39939 - }, - { - "epoch": 20.829204693611473, - "grad_norm": 1.4276642799377441, - "learning_rate": 6.037587939698492e-05, - "loss": 5.6716, - "step": 39940 - }, - { - "epoch": 20.829726205997392, - "grad_norm": 1.593644142150879, - "learning_rate": 6.03748743718593e-05, - "loss": 5.4766, - "step": 39941 - }, - { - "epoch": 20.830247718383312, - "grad_norm": 1.44269859790802, - "learning_rate": 6.037386934673367e-05, - "loss": 5.5356, - "step": 39942 - }, - { - "epoch": 20.83076923076923, - "grad_norm": 1.70134699344635, - "learning_rate": 6.037286432160805e-05, - "loss": 4.5554, - "step": 39943 - }, - { - "epoch": 20.83129074315515, - "grad_norm": 1.62839937210083, - "learning_rate": 6.0371859296482414e-05, - "loss": 5.496, - "step": 39944 - }, - { - "epoch": 20.83181225554107, - "grad_norm": 1.4503368139266968, - "learning_rate": 6.037085427135679e-05, - "loss": 5.4414, - "step": 39945 - }, - { - "epoch": 20.83233376792699, - "grad_norm": 1.4764113426208496, - "learning_rate": 6.0369849246231156e-05, - "loss": 5.5379, - "step": 39946 - }, - { - "epoch": 20.832855280312906, - "grad_norm": 1.439281702041626, - "learning_rate": 6.0368844221105534e-05, - "loss": 5.0078, - "step": 39947 - }, - { - "epoch": 20.833376792698825, - "grad_norm": 1.5833196640014648, - "learning_rate": 6.0367839195979905e-05, - "loss": 5.0277, - "step": 39948 - }, - { - "epoch": 20.833898305084745, - "grad_norm": 1.518999457359314, - "learning_rate": 6.036683417085427e-05, - "loss": 5.6431, - "step": 39949 - }, - { - "epoch": 20.834419817470664, - "grad_norm": 1.6284846067428589, - "learning_rate": 6.036582914572865e-05, - "loss": 4.7729, - "step": 39950 - }, - { - "epoch": 20.834941329856584, - "grad_norm": 1.5679675340652466, - "learning_rate": 6.036482412060301e-05, - "loss": 4.313, - "step": 39951 - }, - { - "epoch": 20.835462842242503, - "grad_norm": 1.4857149124145508, - "learning_rate": 6.036381909547739e-05, - "loss": 5.4421, - "step": 39952 - }, - { - "epoch": 20.835984354628422, - "grad_norm": 1.5602260828018188, - "learning_rate": 6.036281407035176e-05, - "loss": 5.374, - "step": 39953 - }, - { - "epoch": 20.836505867014342, - "grad_norm": 1.5773767232894897, - "learning_rate": 6.036180904522614e-05, - "loss": 5.0935, - "step": 39954 - }, - { - "epoch": 20.83702737940026, - "grad_norm": 1.648660659790039, - "learning_rate": 6.03608040201005e-05, - "loss": 5.1309, - "step": 39955 - }, - { - "epoch": 20.83754889178618, - "grad_norm": 1.6436294317245483, - "learning_rate": 6.035979899497488e-05, - "loss": 4.7443, - "step": 39956 - }, - { - "epoch": 20.8380704041721, - "grad_norm": 1.491809606552124, - "learning_rate": 6.0358793969849245e-05, - "loss": 5.4018, - "step": 39957 - }, - { - "epoch": 20.83859191655802, - "grad_norm": 1.5448087453842163, - "learning_rate": 6.035778894472362e-05, - "loss": 4.9539, - "step": 39958 - }, - { - "epoch": 20.839113428943936, - "grad_norm": 1.5586057901382446, - "learning_rate": 6.0356783919597994e-05, - "loss": 5.0509, - "step": 39959 - }, - { - "epoch": 20.839634941329855, - "grad_norm": 1.5103428363800049, - "learning_rate": 6.035577889447237e-05, - "loss": 4.9019, - "step": 39960 - }, - { - "epoch": 20.840156453715775, - "grad_norm": 1.4897074699401855, - "learning_rate": 6.0354773869346736e-05, - "loss": 4.7142, - "step": 39961 - }, - { - "epoch": 20.840677966101694, - "grad_norm": 1.5955137014389038, - "learning_rate": 6.035376884422111e-05, - "loss": 5.1867, - "step": 39962 - }, - { - "epoch": 20.841199478487614, - "grad_norm": 1.4793031215667725, - "learning_rate": 6.0352763819095485e-05, - "loss": 5.6091, - "step": 39963 - }, - { - "epoch": 20.841720990873533, - "grad_norm": 1.517891526222229, - "learning_rate": 6.035175879396985e-05, - "loss": 4.7439, - "step": 39964 - }, - { - "epoch": 20.842242503259452, - "grad_norm": 1.4355560541152954, - "learning_rate": 6.035075376884423e-05, - "loss": 5.569, - "step": 39965 - }, - { - "epoch": 20.842764015645372, - "grad_norm": 1.3943548202514648, - "learning_rate": 6.034974874371859e-05, - "loss": 5.3957, - "step": 39966 - }, - { - "epoch": 20.84328552803129, - "grad_norm": 1.5681556463241577, - "learning_rate": 6.034874371859297e-05, - "loss": 4.3901, - "step": 39967 - }, - { - "epoch": 20.84380704041721, - "grad_norm": 1.5573011636734009, - "learning_rate": 6.034773869346734e-05, - "loss": 5.3652, - "step": 39968 - }, - { - "epoch": 20.84432855280313, - "grad_norm": 1.5746870040893555, - "learning_rate": 6.034673366834172e-05, - "loss": 5.527, - "step": 39969 - }, - { - "epoch": 20.84485006518905, - "grad_norm": 1.4059549570083618, - "learning_rate": 6.034572864321608e-05, - "loss": 5.6607, - "step": 39970 - }, - { - "epoch": 20.845371577574966, - "grad_norm": 1.5632845163345337, - "learning_rate": 6.034472361809046e-05, - "loss": 5.184, - "step": 39971 - }, - { - "epoch": 20.845893089960885, - "grad_norm": 1.5040395259857178, - "learning_rate": 6.0343718592964824e-05, - "loss": 5.4614, - "step": 39972 - }, - { - "epoch": 20.846414602346805, - "grad_norm": 1.5227994918823242, - "learning_rate": 6.03427135678392e-05, - "loss": 5.2957, - "step": 39973 - }, - { - "epoch": 20.846936114732724, - "grad_norm": 1.4963380098342896, - "learning_rate": 6.034170854271357e-05, - "loss": 5.071, - "step": 39974 - }, - { - "epoch": 20.847457627118644, - "grad_norm": 1.522970199584961, - "learning_rate": 6.034070351758794e-05, - "loss": 5.3396, - "step": 39975 - }, - { - "epoch": 20.847979139504563, - "grad_norm": 1.5952558517456055, - "learning_rate": 6.0339698492462315e-05, - "loss": 5.5151, - "step": 39976 - }, - { - "epoch": 20.848500651890483, - "grad_norm": 1.6090325117111206, - "learning_rate": 6.033869346733668e-05, - "loss": 5.1836, - "step": 39977 - }, - { - "epoch": 20.849022164276402, - "grad_norm": 1.5110201835632324, - "learning_rate": 6.033768844221106e-05, - "loss": 5.2427, - "step": 39978 - }, - { - "epoch": 20.84954367666232, - "grad_norm": 1.4837230443954468, - "learning_rate": 6.033668341708543e-05, - "loss": 5.5362, - "step": 39979 - }, - { - "epoch": 20.85006518904824, - "grad_norm": 1.6052162647247314, - "learning_rate": 6.0335678391959806e-05, - "loss": 5.2376, - "step": 39980 - }, - { - "epoch": 20.85058670143416, - "grad_norm": 1.503879189491272, - "learning_rate": 6.033467336683417e-05, - "loss": 4.8504, - "step": 39981 - }, - { - "epoch": 20.85110821382008, - "grad_norm": 1.6010560989379883, - "learning_rate": 6.033366834170855e-05, - "loss": 5.2417, - "step": 39982 - }, - { - "epoch": 20.851629726205996, - "grad_norm": 1.5949069261550903, - "learning_rate": 6.033266331658292e-05, - "loss": 5.1719, - "step": 39983 - }, - { - "epoch": 20.852151238591915, - "grad_norm": 1.499493956565857, - "learning_rate": 6.03316582914573e-05, - "loss": 5.3564, - "step": 39984 - }, - { - "epoch": 20.852672750977835, - "grad_norm": 1.5797473192214966, - "learning_rate": 6.033065326633166e-05, - "loss": 5.3165, - "step": 39985 - }, - { - "epoch": 20.853194263363754, - "grad_norm": 1.549668550491333, - "learning_rate": 6.032964824120604e-05, - "loss": 5.0041, - "step": 39986 - }, - { - "epoch": 20.853715775749674, - "grad_norm": 1.4657344818115234, - "learning_rate": 6.03286432160804e-05, - "loss": 5.2258, - "step": 39987 - }, - { - "epoch": 20.854237288135593, - "grad_norm": 1.5446922779083252, - "learning_rate": 6.0327638190954774e-05, - "loss": 5.4646, - "step": 39988 - }, - { - "epoch": 20.854758800521513, - "grad_norm": 1.5638127326965332, - "learning_rate": 6.032663316582915e-05, - "loss": 4.5802, - "step": 39989 - }, - { - "epoch": 20.855280312907432, - "grad_norm": 1.4872932434082031, - "learning_rate": 6.0325628140703517e-05, - "loss": 5.5491, - "step": 39990 - }, - { - "epoch": 20.85580182529335, - "grad_norm": 1.495995044708252, - "learning_rate": 6.0324623115577894e-05, - "loss": 5.0393, - "step": 39991 - }, - { - "epoch": 20.85632333767927, - "grad_norm": 1.5403873920440674, - "learning_rate": 6.032361809045226e-05, - "loss": 4.5354, - "step": 39992 - }, - { - "epoch": 20.85684485006519, - "grad_norm": 1.4766384363174438, - "learning_rate": 6.0322613065326636e-05, - "loss": 5.6803, - "step": 39993 - }, - { - "epoch": 20.85736636245111, - "grad_norm": 1.4905648231506348, - "learning_rate": 6.032160804020101e-05, - "loss": 5.097, - "step": 39994 - }, - { - "epoch": 20.857887874837026, - "grad_norm": 1.433417558670044, - "learning_rate": 6.0320603015075385e-05, - "loss": 5.726, - "step": 39995 - }, - { - "epoch": 20.858409387222945, - "grad_norm": 2.1533966064453125, - "learning_rate": 6.031959798994975e-05, - "loss": 5.0923, - "step": 39996 - }, - { - "epoch": 20.858930899608865, - "grad_norm": 1.5557854175567627, - "learning_rate": 6.031859296482413e-05, - "loss": 4.7194, - "step": 39997 - }, - { - "epoch": 20.859452411994784, - "grad_norm": 1.5013822317123413, - "learning_rate": 6.031758793969849e-05, - "loss": 4.8325, - "step": 39998 - }, - { - "epoch": 20.859973924380704, - "grad_norm": 1.6386536359786987, - "learning_rate": 6.031658291457287e-05, - "loss": 5.0144, - "step": 39999 - }, - { - "epoch": 20.860495436766623, - "grad_norm": 1.6077555418014526, - "learning_rate": 6.031557788944724e-05, - "loss": 4.9266, - "step": 40000 - }, - { - "epoch": 20.860495436766623, - "eval_loss": 5.391127109527588, - "eval_runtime": 42.6724, - "eval_samples_per_second": 28.73, - "eval_steps_per_second": 3.609, - "step": 40000 - }, - { - "epoch": 20.861016949152543, - "grad_norm": 1.6149286031723022, - "learning_rate": 6.0314572864321605e-05, - "loss": 5.2929, - "step": 40001 - }, - { - "epoch": 20.861538461538462, - "grad_norm": 1.5909291505813599, - "learning_rate": 6.031356783919598e-05, - "loss": 4.9987, - "step": 40002 - }, - { - "epoch": 20.86205997392438, - "grad_norm": 1.5366368293762207, - "learning_rate": 6.031256281407035e-05, - "loss": 5.5527, - "step": 40003 - }, - { - "epoch": 20.8625814863103, - "grad_norm": 1.485438585281372, - "learning_rate": 6.0311557788944725e-05, - "loss": 5.507, - "step": 40004 - }, - { - "epoch": 20.86310299869622, - "grad_norm": 1.5034315586090088, - "learning_rate": 6.0310552763819096e-05, - "loss": 5.1777, - "step": 40005 - }, - { - "epoch": 20.86362451108214, - "grad_norm": 1.5249171257019043, - "learning_rate": 6.0309547738693474e-05, - "loss": 5.3676, - "step": 40006 - }, - { - "epoch": 20.864146023468056, - "grad_norm": 1.542660117149353, - "learning_rate": 6.030854271356784e-05, - "loss": 4.9075, - "step": 40007 - }, - { - "epoch": 20.864667535853975, - "grad_norm": 1.4002628326416016, - "learning_rate": 6.0307537688442216e-05, - "loss": 5.8253, - "step": 40008 - }, - { - "epoch": 20.865189048239895, - "grad_norm": 1.4901403188705444, - "learning_rate": 6.030653266331659e-05, - "loss": 5.288, - "step": 40009 - }, - { - "epoch": 20.865710560625814, - "grad_norm": 1.4464964866638184, - "learning_rate": 6.0305527638190965e-05, - "loss": 5.5196, - "step": 40010 - }, - { - "epoch": 20.866232073011734, - "grad_norm": 1.4654141664505005, - "learning_rate": 6.030452261306533e-05, - "loss": 5.2431, - "step": 40011 - }, - { - "epoch": 20.866753585397653, - "grad_norm": 1.608750343322754, - "learning_rate": 6.030351758793971e-05, - "loss": 4.9492, - "step": 40012 - }, - { - "epoch": 20.867275097783573, - "grad_norm": 1.5605833530426025, - "learning_rate": 6.030251256281407e-05, - "loss": 5.288, - "step": 40013 - }, - { - "epoch": 20.867796610169492, - "grad_norm": 1.5330029726028442, - "learning_rate": 6.030150753768844e-05, - "loss": 5.093, - "step": 40014 - }, - { - "epoch": 20.86831812255541, - "grad_norm": 1.636875867843628, - "learning_rate": 6.030050251256282e-05, - "loss": 5.5587, - "step": 40015 - }, - { - "epoch": 20.86883963494133, - "grad_norm": 1.5427441596984863, - "learning_rate": 6.0299497487437184e-05, - "loss": 5.5173, - "step": 40016 - }, - { - "epoch": 20.86936114732725, - "grad_norm": 1.5639699697494507, - "learning_rate": 6.029849246231156e-05, - "loss": 5.026, - "step": 40017 - }, - { - "epoch": 20.86988265971317, - "grad_norm": 1.4935379028320312, - "learning_rate": 6.0297487437185926e-05, - "loss": 5.3788, - "step": 40018 - }, - { - "epoch": 20.870404172099086, - "grad_norm": 1.5325037240982056, - "learning_rate": 6.0296482412060304e-05, - "loss": 5.2197, - "step": 40019 - }, - { - "epoch": 20.870925684485005, - "grad_norm": 1.5644073486328125, - "learning_rate": 6.0295477386934675e-05, - "loss": 5.2995, - "step": 40020 - }, - { - "epoch": 20.871447196870925, - "grad_norm": 1.5578187704086304, - "learning_rate": 6.029447236180905e-05, - "loss": 5.3543, - "step": 40021 - }, - { - "epoch": 20.871968709256844, - "grad_norm": 1.5423818826675415, - "learning_rate": 6.029346733668342e-05, - "loss": 5.1625, - "step": 40022 - }, - { - "epoch": 20.872490221642764, - "grad_norm": 1.5208367109298706, - "learning_rate": 6.0292462311557795e-05, - "loss": 5.2998, - "step": 40023 - }, - { - "epoch": 20.873011734028683, - "grad_norm": 1.4979146718978882, - "learning_rate": 6.029145728643216e-05, - "loss": 5.5241, - "step": 40024 - }, - { - "epoch": 20.873533246414603, - "grad_norm": 1.5274099111557007, - "learning_rate": 6.029045226130654e-05, - "loss": 5.2596, - "step": 40025 - }, - { - "epoch": 20.874054758800522, - "grad_norm": 1.5418825149536133, - "learning_rate": 6.028944723618091e-05, - "loss": 5.3175, - "step": 40026 - }, - { - "epoch": 20.87457627118644, - "grad_norm": 1.4363479614257812, - "learning_rate": 6.028844221105527e-05, - "loss": 5.4367, - "step": 40027 - }, - { - "epoch": 20.87509778357236, - "grad_norm": 1.5427542924880981, - "learning_rate": 6.028743718592965e-05, - "loss": 5.3513, - "step": 40028 - }, - { - "epoch": 20.87561929595828, - "grad_norm": 1.4335319995880127, - "learning_rate": 6.028643216080402e-05, - "loss": 5.2387, - "step": 40029 - }, - { - "epoch": 20.876140808344196, - "grad_norm": 1.3532804250717163, - "learning_rate": 6.02854271356784e-05, - "loss": 5.1876, - "step": 40030 - }, - { - "epoch": 20.876662320730116, - "grad_norm": 1.5246355533599854, - "learning_rate": 6.0284422110552764e-05, - "loss": 4.9018, - "step": 40031 - }, - { - "epoch": 20.877183833116035, - "grad_norm": 1.6411628723144531, - "learning_rate": 6.028341708542714e-05, - "loss": 4.7051, - "step": 40032 - }, - { - "epoch": 20.877705345501955, - "grad_norm": 1.4828377962112427, - "learning_rate": 6.0282412060301506e-05, - "loss": 5.5515, - "step": 40033 - }, - { - "epoch": 20.878226857887874, - "grad_norm": 1.5776827335357666, - "learning_rate": 6.0281407035175883e-05, - "loss": 5.3735, - "step": 40034 - }, - { - "epoch": 20.878748370273794, - "grad_norm": 1.5174503326416016, - "learning_rate": 6.0280402010050254e-05, - "loss": 5.1007, - "step": 40035 - }, - { - "epoch": 20.879269882659713, - "grad_norm": 1.5100367069244385, - "learning_rate": 6.027939698492463e-05, - "loss": 5.0767, - "step": 40036 - }, - { - "epoch": 20.879791395045633, - "grad_norm": 1.4190988540649414, - "learning_rate": 6.0278391959798997e-05, - "loss": 5.7459, - "step": 40037 - }, - { - "epoch": 20.880312907431552, - "grad_norm": 1.489125370979309, - "learning_rate": 6.0277386934673374e-05, - "loss": 5.5291, - "step": 40038 - }, - { - "epoch": 20.88083441981747, - "grad_norm": 1.5731769800186157, - "learning_rate": 6.027638190954774e-05, - "loss": 4.7828, - "step": 40039 - }, - { - "epoch": 20.88135593220339, - "grad_norm": 1.5434705018997192, - "learning_rate": 6.0275376884422117e-05, - "loss": 5.3044, - "step": 40040 - }, - { - "epoch": 20.88187744458931, - "grad_norm": 1.4419794082641602, - "learning_rate": 6.027437185929649e-05, - "loss": 5.3339, - "step": 40041 - }, - { - "epoch": 20.88239895697523, - "grad_norm": 1.5613213777542114, - "learning_rate": 6.027336683417085e-05, - "loss": 5.1324, - "step": 40042 - }, - { - "epoch": 20.882920469361146, - "grad_norm": 1.5402007102966309, - "learning_rate": 6.027236180904523e-05, - "loss": 4.8199, - "step": 40043 - }, - { - "epoch": 20.883441981747065, - "grad_norm": 1.4976166486740112, - "learning_rate": 6.0271356783919594e-05, - "loss": 5.3158, - "step": 40044 - }, - { - "epoch": 20.883963494132985, - "grad_norm": 1.4589430093765259, - "learning_rate": 6.027035175879397e-05, - "loss": 5.0514, - "step": 40045 - }, - { - "epoch": 20.884485006518904, - "grad_norm": 1.5237559080123901, - "learning_rate": 6.026934673366834e-05, - "loss": 5.452, - "step": 40046 - }, - { - "epoch": 20.885006518904824, - "grad_norm": 1.544539451599121, - "learning_rate": 6.026834170854272e-05, - "loss": 5.4719, - "step": 40047 - }, - { - "epoch": 20.885528031290743, - "grad_norm": 1.4824291467666626, - "learning_rate": 6.0267336683417085e-05, - "loss": 5.2715, - "step": 40048 - }, - { - "epoch": 20.886049543676663, - "grad_norm": 1.5072659254074097, - "learning_rate": 6.026633165829146e-05, - "loss": 5.2842, - "step": 40049 - }, - { - "epoch": 20.886571056062582, - "grad_norm": 1.4685763120651245, - "learning_rate": 6.0265326633165834e-05, - "loss": 5.6424, - "step": 40050 - }, - { - "epoch": 20.8870925684485, - "grad_norm": 1.4253073930740356, - "learning_rate": 6.026432160804021e-05, - "loss": 5.1098, - "step": 40051 - }, - { - "epoch": 20.88761408083442, - "grad_norm": 1.632025122642517, - "learning_rate": 6.0263316582914576e-05, - "loss": 5.1759, - "step": 40052 - }, - { - "epoch": 20.88813559322034, - "grad_norm": 1.6079111099243164, - "learning_rate": 6.0262311557788954e-05, - "loss": 5.2461, - "step": 40053 - }, - { - "epoch": 20.888657105606256, - "grad_norm": 1.416792631149292, - "learning_rate": 6.026130653266332e-05, - "loss": 5.1232, - "step": 40054 - }, - { - "epoch": 20.889178617992176, - "grad_norm": 1.4809178113937378, - "learning_rate": 6.026030150753769e-05, - "loss": 4.9761, - "step": 40055 - }, - { - "epoch": 20.889700130378095, - "grad_norm": 1.568742275238037, - "learning_rate": 6.025929648241207e-05, - "loss": 5.2317, - "step": 40056 - }, - { - "epoch": 20.890221642764015, - "grad_norm": 1.4731265306472778, - "learning_rate": 6.025829145728643e-05, - "loss": 5.6155, - "step": 40057 - }, - { - "epoch": 20.890743155149934, - "grad_norm": 1.5951759815216064, - "learning_rate": 6.025728643216081e-05, - "loss": 5.4796, - "step": 40058 - }, - { - "epoch": 20.891264667535854, - "grad_norm": 1.6033629179000854, - "learning_rate": 6.025628140703517e-05, - "loss": 5.3336, - "step": 40059 - }, - { - "epoch": 20.891786179921773, - "grad_norm": 1.514505386352539, - "learning_rate": 6.025527638190955e-05, - "loss": 5.4626, - "step": 40060 - }, - { - "epoch": 20.892307692307693, - "grad_norm": 1.7006981372833252, - "learning_rate": 6.025427135678392e-05, - "loss": 5.0514, - "step": 40061 - }, - { - "epoch": 20.892829204693612, - "grad_norm": 1.4950098991394043, - "learning_rate": 6.02532663316583e-05, - "loss": 5.4885, - "step": 40062 - }, - { - "epoch": 20.89335071707953, - "grad_norm": 1.5678675174713135, - "learning_rate": 6.0252261306532664e-05, - "loss": 5.4155, - "step": 40063 - }, - { - "epoch": 20.89387222946545, - "grad_norm": 1.4668123722076416, - "learning_rate": 6.025125628140704e-05, - "loss": 5.6335, - "step": 40064 - }, - { - "epoch": 20.89439374185137, - "grad_norm": 1.480548620223999, - "learning_rate": 6.0250251256281406e-05, - "loss": 5.3845, - "step": 40065 - }, - { - "epoch": 20.894915254237286, - "grad_norm": 1.6238813400268555, - "learning_rate": 6.0249246231155784e-05, - "loss": 4.848, - "step": 40066 - }, - { - "epoch": 20.895436766623206, - "grad_norm": 1.395859956741333, - "learning_rate": 6.0248241206030155e-05, - "loss": 5.6509, - "step": 40067 - }, - { - "epoch": 20.895958279009125, - "grad_norm": 1.474166989326477, - "learning_rate": 6.024723618090452e-05, - "loss": 5.745, - "step": 40068 - }, - { - "epoch": 20.896479791395045, - "grad_norm": 1.6041585206985474, - "learning_rate": 6.02462311557789e-05, - "loss": 4.9707, - "step": 40069 - }, - { - "epoch": 20.897001303780964, - "grad_norm": 1.6987378597259521, - "learning_rate": 6.024522613065327e-05, - "loss": 4.9491, - "step": 40070 - }, - { - "epoch": 20.897522816166884, - "grad_norm": 1.4874483346939087, - "learning_rate": 6.0244221105527646e-05, - "loss": 5.4322, - "step": 40071 - }, - { - "epoch": 20.898044328552803, - "grad_norm": 1.586398959159851, - "learning_rate": 6.024321608040201e-05, - "loss": 5.3099, - "step": 40072 - }, - { - "epoch": 20.898565840938723, - "grad_norm": 1.4801256656646729, - "learning_rate": 6.024221105527639e-05, - "loss": 5.2541, - "step": 40073 - }, - { - "epoch": 20.899087353324642, - "grad_norm": 1.5793423652648926, - "learning_rate": 6.024120603015075e-05, - "loss": 5.5438, - "step": 40074 - }, - { - "epoch": 20.89960886571056, - "grad_norm": 1.5230382680892944, - "learning_rate": 6.024020100502513e-05, - "loss": 5.3552, - "step": 40075 - }, - { - "epoch": 20.90013037809648, - "grad_norm": 1.529515266418457, - "learning_rate": 6.02391959798995e-05, - "loss": 5.3351, - "step": 40076 - }, - { - "epoch": 20.9006518904824, - "grad_norm": 1.3799694776535034, - "learning_rate": 6.023819095477388e-05, - "loss": 4.9661, - "step": 40077 - }, - { - "epoch": 20.901173402868316, - "grad_norm": 1.545832633972168, - "learning_rate": 6.0237185929648244e-05, - "loss": 5.1994, - "step": 40078 - }, - { - "epoch": 20.901694915254236, - "grad_norm": 1.4758573770523071, - "learning_rate": 6.023618090452262e-05, - "loss": 5.3581, - "step": 40079 - }, - { - "epoch": 20.902216427640155, - "grad_norm": 1.453084111213684, - "learning_rate": 6.0235175879396986e-05, - "loss": 4.898, - "step": 40080 - }, - { - "epoch": 20.902737940026075, - "grad_norm": 1.666993260383606, - "learning_rate": 6.023417085427136e-05, - "loss": 5.0067, - "step": 40081 - }, - { - "epoch": 20.903259452411994, - "grad_norm": 1.5796171426773071, - "learning_rate": 6.0233165829145735e-05, - "loss": 4.2646, - "step": 40082 - }, - { - "epoch": 20.903780964797914, - "grad_norm": 1.6471984386444092, - "learning_rate": 6.02321608040201e-05, - "loss": 5.2119, - "step": 40083 - }, - { - "epoch": 20.904302477183833, - "grad_norm": 1.579105019569397, - "learning_rate": 6.023115577889448e-05, - "loss": 5.2941, - "step": 40084 - }, - { - "epoch": 20.904823989569753, - "grad_norm": 1.777175784111023, - "learning_rate": 6.023015075376884e-05, - "loss": 5.3527, - "step": 40085 - }, - { - "epoch": 20.905345501955672, - "grad_norm": 1.5237518548965454, - "learning_rate": 6.022914572864322e-05, - "loss": 5.469, - "step": 40086 - }, - { - "epoch": 20.90586701434159, - "grad_norm": 1.5888643264770508, - "learning_rate": 6.022814070351759e-05, - "loss": 5.3022, - "step": 40087 - }, - { - "epoch": 20.90638852672751, - "grad_norm": 1.5788813829421997, - "learning_rate": 6.022713567839197e-05, - "loss": 5.6265, - "step": 40088 - }, - { - "epoch": 20.90691003911343, - "grad_norm": 1.5781879425048828, - "learning_rate": 6.022613065326633e-05, - "loss": 5.3788, - "step": 40089 - }, - { - "epoch": 20.907431551499347, - "grad_norm": 1.6271270513534546, - "learning_rate": 6.022512562814071e-05, - "loss": 4.7775, - "step": 40090 - }, - { - "epoch": 20.907953063885266, - "grad_norm": 1.5963928699493408, - "learning_rate": 6.0224120603015074e-05, - "loss": 5.0235, - "step": 40091 - }, - { - "epoch": 20.908474576271185, - "grad_norm": 1.5063369274139404, - "learning_rate": 6.022311557788945e-05, - "loss": 4.9472, - "step": 40092 - }, - { - "epoch": 20.908996088657105, - "grad_norm": 1.6844631433486938, - "learning_rate": 6.022211055276382e-05, - "loss": 4.9336, - "step": 40093 - }, - { - "epoch": 20.909517601043024, - "grad_norm": 1.438459873199463, - "learning_rate": 6.022110552763819e-05, - "loss": 5.439, - "step": 40094 - }, - { - "epoch": 20.910039113428944, - "grad_norm": 1.4677313566207886, - "learning_rate": 6.0220100502512565e-05, - "loss": 5.3671, - "step": 40095 - }, - { - "epoch": 20.910560625814863, - "grad_norm": 1.5824062824249268, - "learning_rate": 6.0219095477386936e-05, - "loss": 5.3408, - "step": 40096 - }, - { - "epoch": 20.911082138200783, - "grad_norm": 1.68326997756958, - "learning_rate": 6.0218090452261314e-05, - "loss": 5.1346, - "step": 40097 - }, - { - "epoch": 20.911603650586702, - "grad_norm": 1.5087056159973145, - "learning_rate": 6.021708542713568e-05, - "loss": 4.9556, - "step": 40098 - }, - { - "epoch": 20.91212516297262, - "grad_norm": 1.4743117094039917, - "learning_rate": 6.0216080402010056e-05, - "loss": 5.5545, - "step": 40099 - }, - { - "epoch": 20.91264667535854, - "grad_norm": 1.4857059717178345, - "learning_rate": 6.021507537688442e-05, - "loss": 5.0298, - "step": 40100 - }, - { - "epoch": 20.91316818774446, - "grad_norm": 1.5393195152282715, - "learning_rate": 6.02140703517588e-05, - "loss": 5.0775, - "step": 40101 - }, - { - "epoch": 20.913689700130377, - "grad_norm": 1.5671625137329102, - "learning_rate": 6.021306532663317e-05, - "loss": 4.6823, - "step": 40102 - }, - { - "epoch": 20.914211212516296, - "grad_norm": 1.5019588470458984, - "learning_rate": 6.021206030150755e-05, - "loss": 5.2135, - "step": 40103 - }, - { - "epoch": 20.914732724902215, - "grad_norm": 1.4813984632492065, - "learning_rate": 6.021105527638191e-05, - "loss": 5.5481, - "step": 40104 - }, - { - "epoch": 20.915254237288135, - "grad_norm": 1.5667246580123901, - "learning_rate": 6.021005025125629e-05, - "loss": 5.4134, - "step": 40105 - }, - { - "epoch": 20.915775749674054, - "grad_norm": 1.424500823020935, - "learning_rate": 6.020904522613065e-05, - "loss": 5.5741, - "step": 40106 - }, - { - "epoch": 20.916297262059974, - "grad_norm": 1.5366369485855103, - "learning_rate": 6.0208040201005024e-05, - "loss": 5.1487, - "step": 40107 - }, - { - "epoch": 20.916818774445893, - "grad_norm": 1.5908422470092773, - "learning_rate": 6.02070351758794e-05, - "loss": 4.9716, - "step": 40108 - }, - { - "epoch": 20.917340286831813, - "grad_norm": 1.495580792427063, - "learning_rate": 6.0206030150753767e-05, - "loss": 5.1251, - "step": 40109 - }, - { - "epoch": 20.917861799217732, - "grad_norm": 1.63461172580719, - "learning_rate": 6.0205025125628144e-05, - "loss": 4.99, - "step": 40110 - }, - { - "epoch": 20.91838331160365, - "grad_norm": 1.5443072319030762, - "learning_rate": 6.020402010050251e-05, - "loss": 5.5153, - "step": 40111 - }, - { - "epoch": 20.91890482398957, - "grad_norm": 1.5649410486221313, - "learning_rate": 6.0203015075376886e-05, - "loss": 5.2708, - "step": 40112 - }, - { - "epoch": 20.919426336375487, - "grad_norm": 1.5051201581954956, - "learning_rate": 6.020201005025126e-05, - "loss": 4.9989, - "step": 40113 - }, - { - "epoch": 20.919947848761407, - "grad_norm": 1.5114772319793701, - "learning_rate": 6.0201005025125635e-05, - "loss": 5.2138, - "step": 40114 - }, - { - "epoch": 20.920469361147326, - "grad_norm": 1.5812599658966064, - "learning_rate": 6.02e-05, - "loss": 5.666, - "step": 40115 - }, - { - "epoch": 20.920990873533245, - "grad_norm": 1.653073787689209, - "learning_rate": 6.019899497487438e-05, - "loss": 5.2132, - "step": 40116 - }, - { - "epoch": 20.921512385919165, - "grad_norm": 1.5771384239196777, - "learning_rate": 6.019798994974875e-05, - "loss": 5.0369, - "step": 40117 - }, - { - "epoch": 20.922033898305084, - "grad_norm": 1.4368616342544556, - "learning_rate": 6.0196984924623126e-05, - "loss": 5.1551, - "step": 40118 - }, - { - "epoch": 20.922555410691004, - "grad_norm": 1.5372655391693115, - "learning_rate": 6.019597989949749e-05, - "loss": 4.8848, - "step": 40119 - }, - { - "epoch": 20.923076923076923, - "grad_norm": 1.7004553079605103, - "learning_rate": 6.0194974874371855e-05, - "loss": 5.3764, - "step": 40120 - }, - { - "epoch": 20.923598435462843, - "grad_norm": 1.6447502374649048, - "learning_rate": 6.019396984924623e-05, - "loss": 5.3838, - "step": 40121 - }, - { - "epoch": 20.924119947848762, - "grad_norm": 1.6129873991012573, - "learning_rate": 6.0192964824120604e-05, - "loss": 5.2763, - "step": 40122 - }, - { - "epoch": 20.92464146023468, - "grad_norm": 1.7042375802993774, - "learning_rate": 6.019195979899498e-05, - "loss": 4.6105, - "step": 40123 - }, - { - "epoch": 20.9251629726206, - "grad_norm": 1.54409658908844, - "learning_rate": 6.0190954773869346e-05, - "loss": 5.0121, - "step": 40124 - }, - { - "epoch": 20.92568448500652, - "grad_norm": 1.4965101480484009, - "learning_rate": 6.0189949748743724e-05, - "loss": 5.1868, - "step": 40125 - }, - { - "epoch": 20.926205997392437, - "grad_norm": 1.6124176979064941, - "learning_rate": 6.018894472361809e-05, - "loss": 5.3203, - "step": 40126 - }, - { - "epoch": 20.926727509778356, - "grad_norm": 1.590895414352417, - "learning_rate": 6.0187939698492466e-05, - "loss": 5.5363, - "step": 40127 - }, - { - "epoch": 20.927249022164276, - "grad_norm": 1.6438887119293213, - "learning_rate": 6.018693467336684e-05, - "loss": 4.9561, - "step": 40128 - }, - { - "epoch": 20.927770534550195, - "grad_norm": 1.4365217685699463, - "learning_rate": 6.0185929648241215e-05, - "loss": 5.3499, - "step": 40129 - }, - { - "epoch": 20.928292046936114, - "grad_norm": 1.5837492942810059, - "learning_rate": 6.018492462311558e-05, - "loss": 5.1114, - "step": 40130 - }, - { - "epoch": 20.928813559322034, - "grad_norm": 1.5927616357803345, - "learning_rate": 6.018391959798996e-05, - "loss": 5.5207, - "step": 40131 - }, - { - "epoch": 20.929335071707953, - "grad_norm": 1.4584507942199707, - "learning_rate": 6.018291457286432e-05, - "loss": 5.6504, - "step": 40132 - }, - { - "epoch": 20.929856584093873, - "grad_norm": 1.5015032291412354, - "learning_rate": 6.01819095477387e-05, - "loss": 5.5236, - "step": 40133 - }, - { - "epoch": 20.930378096479792, - "grad_norm": 1.580650806427002, - "learning_rate": 6.018090452261307e-05, - "loss": 5.0829, - "step": 40134 - }, - { - "epoch": 20.93089960886571, - "grad_norm": 1.4846616983413696, - "learning_rate": 6.0179899497487434e-05, - "loss": 5.2264, - "step": 40135 - }, - { - "epoch": 20.93142112125163, - "grad_norm": 1.5967588424682617, - "learning_rate": 6.017889447236181e-05, - "loss": 5.0715, - "step": 40136 - }, - { - "epoch": 20.931942633637547, - "grad_norm": 1.533526062965393, - "learning_rate": 6.017788944723618e-05, - "loss": 5.0155, - "step": 40137 - }, - { - "epoch": 20.932464146023467, - "grad_norm": 1.5463529825210571, - "learning_rate": 6.017688442211056e-05, - "loss": 5.3019, - "step": 40138 - }, - { - "epoch": 20.932985658409386, - "grad_norm": 1.6065436601638794, - "learning_rate": 6.0175879396984925e-05, - "loss": 5.3666, - "step": 40139 - }, - { - "epoch": 20.933507170795306, - "grad_norm": 1.5459471940994263, - "learning_rate": 6.01748743718593e-05, - "loss": 5.6053, - "step": 40140 - }, - { - "epoch": 20.934028683181225, - "grad_norm": 1.467740774154663, - "learning_rate": 6.017386934673367e-05, - "loss": 5.6041, - "step": 40141 - }, - { - "epoch": 20.934550195567144, - "grad_norm": 1.5223225355148315, - "learning_rate": 6.0172864321608045e-05, - "loss": 5.4651, - "step": 40142 - }, - { - "epoch": 20.935071707953064, - "grad_norm": 1.5120021104812622, - "learning_rate": 6.0171859296482416e-05, - "loss": 5.4131, - "step": 40143 - }, - { - "epoch": 20.935593220338983, - "grad_norm": 1.5661296844482422, - "learning_rate": 6.0170854271356794e-05, - "loss": 5.1209, - "step": 40144 - }, - { - "epoch": 20.936114732724903, - "grad_norm": 1.5404590368270874, - "learning_rate": 6.016984924623116e-05, - "loss": 4.9498, - "step": 40145 - }, - { - "epoch": 20.936636245110822, - "grad_norm": 1.4349257946014404, - "learning_rate": 6.0168844221105536e-05, - "loss": 5.6255, - "step": 40146 - }, - { - "epoch": 20.937157757496742, - "grad_norm": 1.528143048286438, - "learning_rate": 6.01678391959799e-05, - "loss": 5.5342, - "step": 40147 - }, - { - "epoch": 20.93767926988266, - "grad_norm": 1.3296117782592773, - "learning_rate": 6.016683417085427e-05, - "loss": 5.5537, - "step": 40148 - }, - { - "epoch": 20.938200782268577, - "grad_norm": 1.451151967048645, - "learning_rate": 6.016582914572865e-05, - "loss": 4.9141, - "step": 40149 - }, - { - "epoch": 20.938722294654497, - "grad_norm": 1.4518226385116577, - "learning_rate": 6.0164824120603014e-05, - "loss": 5.6612, - "step": 40150 - }, - { - "epoch": 20.939243807040416, - "grad_norm": 1.462399959564209, - "learning_rate": 6.016381909547739e-05, - "loss": 5.1106, - "step": 40151 - }, - { - "epoch": 20.939765319426336, - "grad_norm": 1.4577128887176514, - "learning_rate": 6.0162814070351756e-05, - "loss": 5.1552, - "step": 40152 - }, - { - "epoch": 20.940286831812255, - "grad_norm": 1.551796555519104, - "learning_rate": 6.0161809045226133e-05, - "loss": 4.8739, - "step": 40153 - }, - { - "epoch": 20.940808344198174, - "grad_norm": 1.4434036016464233, - "learning_rate": 6.0160804020100505e-05, - "loss": 5.5439, - "step": 40154 - }, - { - "epoch": 20.941329856584094, - "grad_norm": 1.5747034549713135, - "learning_rate": 6.015979899497488e-05, - "loss": 5.4643, - "step": 40155 - }, - { - "epoch": 20.941851368970013, - "grad_norm": 1.5744515657424927, - "learning_rate": 6.0158793969849247e-05, - "loss": 4.9625, - "step": 40156 - }, - { - "epoch": 20.942372881355933, - "grad_norm": 1.5187890529632568, - "learning_rate": 6.0157788944723624e-05, - "loss": 5.1006, - "step": 40157 - }, - { - "epoch": 20.942894393741852, - "grad_norm": 1.6071891784667969, - "learning_rate": 6.015678391959799e-05, - "loss": 5.3865, - "step": 40158 - }, - { - "epoch": 20.943415906127772, - "grad_norm": 1.5806126594543457, - "learning_rate": 6.0155778894472367e-05, - "loss": 4.5366, - "step": 40159 - }, - { - "epoch": 20.94393741851369, - "grad_norm": 1.518614649772644, - "learning_rate": 6.015477386934674e-05, - "loss": 5.5511, - "step": 40160 - }, - { - "epoch": 20.944458930899607, - "grad_norm": 1.5267651081085205, - "learning_rate": 6.01537688442211e-05, - "loss": 5.4554, - "step": 40161 - }, - { - "epoch": 20.944980443285527, - "grad_norm": 1.4261300563812256, - "learning_rate": 6.015276381909548e-05, - "loss": 5.1089, - "step": 40162 - }, - { - "epoch": 20.945501955671446, - "grad_norm": 1.571205735206604, - "learning_rate": 6.015175879396985e-05, - "loss": 5.0308, - "step": 40163 - }, - { - "epoch": 20.946023468057366, - "grad_norm": 1.557940125465393, - "learning_rate": 6.015075376884423e-05, - "loss": 5.1122, - "step": 40164 - }, - { - "epoch": 20.946544980443285, - "grad_norm": 1.5680867433547974, - "learning_rate": 6.014974874371859e-05, - "loss": 5.3282, - "step": 40165 - }, - { - "epoch": 20.947066492829205, - "grad_norm": 1.5002192258834839, - "learning_rate": 6.014874371859297e-05, - "loss": 5.4396, - "step": 40166 - }, - { - "epoch": 20.947588005215124, - "grad_norm": 1.5569390058517456, - "learning_rate": 6.0147738693467335e-05, - "loss": 5.0725, - "step": 40167 - }, - { - "epoch": 20.948109517601043, - "grad_norm": 1.3556677103042603, - "learning_rate": 6.014673366834171e-05, - "loss": 5.8337, - "step": 40168 - }, - { - "epoch": 20.948631029986963, - "grad_norm": 1.4943475723266602, - "learning_rate": 6.0145728643216084e-05, - "loss": 5.2591, - "step": 40169 - }, - { - "epoch": 20.949152542372882, - "grad_norm": 1.454598307609558, - "learning_rate": 6.014472361809046e-05, - "loss": 4.9122, - "step": 40170 - }, - { - "epoch": 20.949674054758802, - "grad_norm": 1.5308911800384521, - "learning_rate": 6.0143718592964826e-05, - "loss": 5.2948, - "step": 40171 - }, - { - "epoch": 20.95019556714472, - "grad_norm": 1.5228846073150635, - "learning_rate": 6.0142713567839204e-05, - "loss": 4.532, - "step": 40172 - }, - { - "epoch": 20.950717079530637, - "grad_norm": 1.5819575786590576, - "learning_rate": 6.014170854271357e-05, - "loss": 5.5765, - "step": 40173 - }, - { - "epoch": 20.951238591916557, - "grad_norm": 1.5619804859161377, - "learning_rate": 6.014070351758794e-05, - "loss": 5.2392, - "step": 40174 - }, - { - "epoch": 20.951760104302476, - "grad_norm": 1.698674201965332, - "learning_rate": 6.013969849246232e-05, - "loss": 5.2774, - "step": 40175 - }, - { - "epoch": 20.952281616688396, - "grad_norm": 1.613785743713379, - "learning_rate": 6.013869346733668e-05, - "loss": 5.2368, - "step": 40176 - }, - { - "epoch": 20.952803129074315, - "grad_norm": 1.4727815389633179, - "learning_rate": 6.013768844221106e-05, - "loss": 5.43, - "step": 40177 - }, - { - "epoch": 20.953324641460235, - "grad_norm": 1.5783919095993042, - "learning_rate": 6.013668341708542e-05, - "loss": 4.9756, - "step": 40178 - }, - { - "epoch": 20.953846153846154, - "grad_norm": 1.5336049795150757, - "learning_rate": 6.01356783919598e-05, - "loss": 5.6235, - "step": 40179 - }, - { - "epoch": 20.954367666232073, - "grad_norm": 1.4777024984359741, - "learning_rate": 6.013467336683417e-05, - "loss": 5.5426, - "step": 40180 - }, - { - "epoch": 20.954889178617993, - "grad_norm": 1.597744107246399, - "learning_rate": 6.013366834170855e-05, - "loss": 4.9515, - "step": 40181 - }, - { - "epoch": 20.955410691003912, - "grad_norm": 1.4273394346237183, - "learning_rate": 6.0132663316582914e-05, - "loss": 5.4339, - "step": 40182 - }, - { - "epoch": 20.955932203389832, - "grad_norm": 1.486538052558899, - "learning_rate": 6.013165829145729e-05, - "loss": 5.5648, - "step": 40183 - }, - { - "epoch": 20.95645371577575, - "grad_norm": 1.4847687482833862, - "learning_rate": 6.013065326633166e-05, - "loss": 5.1431, - "step": 40184 - }, - { - "epoch": 20.956975228161667, - "grad_norm": 1.5765632390975952, - "learning_rate": 6.012964824120604e-05, - "loss": 5.2142, - "step": 40185 - }, - { - "epoch": 20.957496740547587, - "grad_norm": 1.5786784887313843, - "learning_rate": 6.0128643216080405e-05, - "loss": 4.8707, - "step": 40186 - }, - { - "epoch": 20.958018252933506, - "grad_norm": 1.5200437307357788, - "learning_rate": 6.012763819095477e-05, - "loss": 5.3054, - "step": 40187 - }, - { - "epoch": 20.958539765319426, - "grad_norm": 1.5504025220870972, - "learning_rate": 6.012663316582915e-05, - "loss": 5.5163, - "step": 40188 - }, - { - "epoch": 20.959061277705345, - "grad_norm": 1.545135498046875, - "learning_rate": 6.012562814070352e-05, - "loss": 5.623, - "step": 40189 - }, - { - "epoch": 20.959582790091265, - "grad_norm": 1.5906264781951904, - "learning_rate": 6.0124623115577896e-05, - "loss": 5.4274, - "step": 40190 - }, - { - "epoch": 20.960104302477184, - "grad_norm": 1.453804850578308, - "learning_rate": 6.012361809045226e-05, - "loss": 5.2976, - "step": 40191 - }, - { - "epoch": 20.960625814863103, - "grad_norm": 1.5727537870407104, - "learning_rate": 6.012261306532664e-05, - "loss": 5.2347, - "step": 40192 - }, - { - "epoch": 20.961147327249023, - "grad_norm": 1.548128604888916, - "learning_rate": 6.0121608040201e-05, - "loss": 5.3548, - "step": 40193 - }, - { - "epoch": 20.961668839634942, - "grad_norm": 1.5605583190917969, - "learning_rate": 6.012060301507538e-05, - "loss": 5.481, - "step": 40194 - }, - { - "epoch": 20.962190352020862, - "grad_norm": 1.5379910469055176, - "learning_rate": 6.011959798994975e-05, - "loss": 5.6524, - "step": 40195 - }, - { - "epoch": 20.96271186440678, - "grad_norm": 1.6494733095169067, - "learning_rate": 6.011859296482413e-05, - "loss": 5.2571, - "step": 40196 - }, - { - "epoch": 20.963233376792697, - "grad_norm": 1.5354130268096924, - "learning_rate": 6.0117587939698494e-05, - "loss": 5.0124, - "step": 40197 - }, - { - "epoch": 20.963754889178617, - "grad_norm": 1.546015977859497, - "learning_rate": 6.011658291457287e-05, - "loss": 5.2774, - "step": 40198 - }, - { - "epoch": 20.964276401564536, - "grad_norm": 1.5150935649871826, - "learning_rate": 6.0115577889447236e-05, - "loss": 5.215, - "step": 40199 - }, - { - "epoch": 20.964797913950456, - "grad_norm": 1.6140588521957397, - "learning_rate": 6.011457286432161e-05, - "loss": 5.0133, - "step": 40200 - }, - { - "epoch": 20.965319426336375, - "grad_norm": 1.518580436706543, - "learning_rate": 6.0113567839195985e-05, - "loss": 5.2849, - "step": 40201 - }, - { - "epoch": 20.965840938722295, - "grad_norm": 1.620582938194275, - "learning_rate": 6.011256281407035e-05, - "loss": 4.7791, - "step": 40202 - }, - { - "epoch": 20.966362451108214, - "grad_norm": 1.541845440864563, - "learning_rate": 6.011155778894473e-05, - "loss": 5.3425, - "step": 40203 - }, - { - "epoch": 20.966883963494134, - "grad_norm": 1.657497525215149, - "learning_rate": 6.01105527638191e-05, - "loss": 5.6106, - "step": 40204 - }, - { - "epoch": 20.967405475880053, - "grad_norm": 1.4770286083221436, - "learning_rate": 6.0109547738693476e-05, - "loss": 5.6313, - "step": 40205 - }, - { - "epoch": 20.967926988265972, - "grad_norm": 1.4619706869125366, - "learning_rate": 6.010854271356784e-05, - "loss": 5.228, - "step": 40206 - }, - { - "epoch": 20.968448500651892, - "grad_norm": 1.4926683902740479, - "learning_rate": 6.010753768844222e-05, - "loss": 5.41, - "step": 40207 - }, - { - "epoch": 20.96897001303781, - "grad_norm": 1.5441648960113525, - "learning_rate": 6.010653266331658e-05, - "loss": 5.5308, - "step": 40208 - }, - { - "epoch": 20.969491525423727, - "grad_norm": 1.4317371845245361, - "learning_rate": 6.010552763819096e-05, - "loss": 5.3666, - "step": 40209 - }, - { - "epoch": 20.970013037809647, - "grad_norm": 1.6250205039978027, - "learning_rate": 6.010452261306533e-05, - "loss": 5.25, - "step": 40210 - }, - { - "epoch": 20.970534550195566, - "grad_norm": 1.6558005809783936, - "learning_rate": 6.010351758793971e-05, - "loss": 4.9811, - "step": 40211 - }, - { - "epoch": 20.971056062581486, - "grad_norm": 1.4917068481445312, - "learning_rate": 6.010251256281407e-05, - "loss": 5.4748, - "step": 40212 - }, - { - "epoch": 20.971577574967405, - "grad_norm": 1.6469484567642212, - "learning_rate": 6.010150753768844e-05, - "loss": 5.1952, - "step": 40213 - }, - { - "epoch": 20.972099087353325, - "grad_norm": 1.5276374816894531, - "learning_rate": 6.0100502512562815e-05, - "loss": 5.3595, - "step": 40214 - }, - { - "epoch": 20.972620599739244, - "grad_norm": 1.4749454259872437, - "learning_rate": 6.0099497487437186e-05, - "loss": 4.772, - "step": 40215 - }, - { - "epoch": 20.973142112125164, - "grad_norm": 1.6171891689300537, - "learning_rate": 6.0098492462311564e-05, - "loss": 5.2312, - "step": 40216 - }, - { - "epoch": 20.973663624511083, - "grad_norm": 1.4953112602233887, - "learning_rate": 6.009748743718593e-05, - "loss": 5.4778, - "step": 40217 - }, - { - "epoch": 20.974185136897002, - "grad_norm": 1.4792813062667847, - "learning_rate": 6.0096482412060306e-05, - "loss": 5.6235, - "step": 40218 - }, - { - "epoch": 20.974706649282922, - "grad_norm": 1.4887871742248535, - "learning_rate": 6.009547738693467e-05, - "loss": 5.4614, - "step": 40219 - }, - { - "epoch": 20.975228161668838, - "grad_norm": 1.5162588357925415, - "learning_rate": 6.009447236180905e-05, - "loss": 5.1471, - "step": 40220 - }, - { - "epoch": 20.975749674054757, - "grad_norm": 1.6672980785369873, - "learning_rate": 6.009346733668342e-05, - "loss": 5.1384, - "step": 40221 - }, - { - "epoch": 20.976271186440677, - "grad_norm": 1.4811428785324097, - "learning_rate": 6.00924623115578e-05, - "loss": 5.3882, - "step": 40222 - }, - { - "epoch": 20.976792698826596, - "grad_norm": 1.4637203216552734, - "learning_rate": 6.009145728643216e-05, - "loss": 4.81, - "step": 40223 - }, - { - "epoch": 20.977314211212516, - "grad_norm": 1.538106083869934, - "learning_rate": 6.009045226130654e-05, - "loss": 5.2851, - "step": 40224 - }, - { - "epoch": 20.977835723598435, - "grad_norm": 1.6831083297729492, - "learning_rate": 6.008944723618091e-05, - "loss": 5.1094, - "step": 40225 - }, - { - "epoch": 20.978357235984355, - "grad_norm": 1.6933670043945312, - "learning_rate": 6.008844221105529e-05, - "loss": 4.8109, - "step": 40226 - }, - { - "epoch": 20.978878748370274, - "grad_norm": 1.7358567714691162, - "learning_rate": 6.008743718592965e-05, - "loss": 5.0023, - "step": 40227 - }, - { - "epoch": 20.979400260756194, - "grad_norm": 1.5748038291931152, - "learning_rate": 6.0086432160804017e-05, - "loss": 5.2449, - "step": 40228 - }, - { - "epoch": 20.979921773142113, - "grad_norm": 1.4807603359222412, - "learning_rate": 6.0085427135678394e-05, - "loss": 5.4888, - "step": 40229 - }, - { - "epoch": 20.980443285528033, - "grad_norm": 1.5149019956588745, - "learning_rate": 6.0084422110552765e-05, - "loss": 5.4159, - "step": 40230 - }, - { - "epoch": 20.980964797913952, - "grad_norm": 1.6893212795257568, - "learning_rate": 6.008341708542714e-05, - "loss": 5.2402, - "step": 40231 - }, - { - "epoch": 20.98148631029987, - "grad_norm": 1.4182393550872803, - "learning_rate": 6.008241206030151e-05, - "loss": 5.3413, - "step": 40232 - }, - { - "epoch": 20.982007822685787, - "grad_norm": 1.4657871723175049, - "learning_rate": 6.0081407035175885e-05, - "loss": 5.4494, - "step": 40233 - }, - { - "epoch": 20.982529335071707, - "grad_norm": 1.7487273216247559, - "learning_rate": 6.008040201005025e-05, - "loss": 4.4625, - "step": 40234 - }, - { - "epoch": 20.983050847457626, - "grad_norm": 1.4355273246765137, - "learning_rate": 6.007939698492463e-05, - "loss": 5.463, - "step": 40235 - }, - { - "epoch": 20.983572359843546, - "grad_norm": 1.5604431629180908, - "learning_rate": 6.0078391959799e-05, - "loss": 5.0324, - "step": 40236 - }, - { - "epoch": 20.984093872229465, - "grad_norm": 1.652076005935669, - "learning_rate": 6.0077386934673376e-05, - "loss": 4.6932, - "step": 40237 - }, - { - "epoch": 20.984615384615385, - "grad_norm": 1.5230931043624878, - "learning_rate": 6.007638190954774e-05, - "loss": 5.2558, - "step": 40238 - }, - { - "epoch": 20.985136897001304, - "grad_norm": 1.5501974821090698, - "learning_rate": 6.007537688442212e-05, - "loss": 4.6476, - "step": 40239 - }, - { - "epoch": 20.985658409387224, - "grad_norm": 1.4960957765579224, - "learning_rate": 6.007437185929648e-05, - "loss": 5.3601, - "step": 40240 - }, - { - "epoch": 20.986179921773143, - "grad_norm": 1.4244786500930786, - "learning_rate": 6.0073366834170854e-05, - "loss": 5.2403, - "step": 40241 - }, - { - "epoch": 20.986701434159063, - "grad_norm": 1.503225326538086, - "learning_rate": 6.007236180904523e-05, - "loss": 5.4623, - "step": 40242 - }, - { - "epoch": 20.987222946544982, - "grad_norm": 1.591659426689148, - "learning_rate": 6.0071356783919596e-05, - "loss": 5.2805, - "step": 40243 - }, - { - "epoch": 20.987744458930898, - "grad_norm": 1.465066909790039, - "learning_rate": 6.0070351758793974e-05, - "loss": 5.2604, - "step": 40244 - }, - { - "epoch": 20.988265971316817, - "grad_norm": 1.5989128351211548, - "learning_rate": 6.006934673366834e-05, - "loss": 5.1662, - "step": 40245 - }, - { - "epoch": 20.988787483702737, - "grad_norm": 1.4269365072250366, - "learning_rate": 6.0068341708542716e-05, - "loss": 5.4351, - "step": 40246 - }, - { - "epoch": 20.989308996088656, - "grad_norm": 1.6657384634017944, - "learning_rate": 6.006733668341709e-05, - "loss": 5.5203, - "step": 40247 - }, - { - "epoch": 20.989830508474576, - "grad_norm": 1.4499242305755615, - "learning_rate": 6.0066331658291465e-05, - "loss": 5.2816, - "step": 40248 - }, - { - "epoch": 20.990352020860495, - "grad_norm": 1.4440535306930542, - "learning_rate": 6.006532663316583e-05, - "loss": 5.416, - "step": 40249 - }, - { - "epoch": 20.990873533246415, - "grad_norm": 1.5198662281036377, - "learning_rate": 6.006432160804021e-05, - "loss": 5.6567, - "step": 40250 - }, - { - "epoch": 20.991395045632334, - "grad_norm": 1.4505279064178467, - "learning_rate": 6.006331658291458e-05, - "loss": 5.2816, - "step": 40251 - }, - { - "epoch": 20.991916558018254, - "grad_norm": 1.5252782106399536, - "learning_rate": 6.0062311557788956e-05, - "loss": 5.2111, - "step": 40252 - }, - { - "epoch": 20.992438070404173, - "grad_norm": 1.4900864362716675, - "learning_rate": 6.006130653266332e-05, - "loss": 5.5018, - "step": 40253 - }, - { - "epoch": 20.992959582790093, - "grad_norm": 1.50275719165802, - "learning_rate": 6.0060301507537684e-05, - "loss": 5.7173, - "step": 40254 - }, - { - "epoch": 20.993481095176012, - "grad_norm": 1.359324336051941, - "learning_rate": 6.005929648241206e-05, - "loss": 5.7325, - "step": 40255 - }, - { - "epoch": 20.994002607561928, - "grad_norm": 1.518571376800537, - "learning_rate": 6.005829145728643e-05, - "loss": 5.1995, - "step": 40256 - }, - { - "epoch": 20.994524119947847, - "grad_norm": 1.4319403171539307, - "learning_rate": 6.005728643216081e-05, - "loss": 5.2674, - "step": 40257 - }, - { - "epoch": 20.995045632333767, - "grad_norm": 1.4090073108673096, - "learning_rate": 6.0056281407035175e-05, - "loss": 5.3663, - "step": 40258 - }, - { - "epoch": 20.995567144719686, - "grad_norm": 1.5255705118179321, - "learning_rate": 6.005527638190955e-05, - "loss": 5.1647, - "step": 40259 - }, - { - "epoch": 20.996088657105606, - "grad_norm": 1.588913083076477, - "learning_rate": 6.005427135678392e-05, - "loss": 5.0062, - "step": 40260 - }, - { - "epoch": 20.996610169491525, - "grad_norm": 1.45968496799469, - "learning_rate": 6.0053266331658295e-05, - "loss": 4.8246, - "step": 40261 - }, - { - "epoch": 20.997131681877445, - "grad_norm": 1.59174644947052, - "learning_rate": 6.0052261306532666e-05, - "loss": 5.0051, - "step": 40262 - }, - { - "epoch": 20.997653194263364, - "grad_norm": 1.6332316398620605, - "learning_rate": 6.0051256281407044e-05, - "loss": 5.4038, - "step": 40263 - }, - { - "epoch": 20.998174706649284, - "grad_norm": 1.4970576763153076, - "learning_rate": 6.005025125628141e-05, - "loss": 5.2375, - "step": 40264 - }, - { - "epoch": 20.998696219035203, - "grad_norm": 1.5869061946868896, - "learning_rate": 6.0049246231155786e-05, - "loss": 5.453, - "step": 40265 - }, - { - "epoch": 20.999217731421123, - "grad_norm": 1.5531926155090332, - "learning_rate": 6.004824120603015e-05, - "loss": 4.8579, - "step": 40266 - }, - { - "epoch": 20.999739243807042, - "grad_norm": 1.4642184972763062, - "learning_rate": 6.004723618090452e-05, - "loss": 5.4726, - "step": 40267 - }, - { - "epoch": 21.000260756192958, - "grad_norm": 1.6982650756835938, - "learning_rate": 6.00462311557789e-05, - "loss": 5.3608, - "step": 40268 - }, - { - "epoch": 21.000782268578877, - "grad_norm": 1.4933518171310425, - "learning_rate": 6.0045226130653264e-05, - "loss": 5.3215, - "step": 40269 - }, - { - "epoch": 21.001303780964797, - "grad_norm": 1.5109684467315674, - "learning_rate": 6.004422110552764e-05, - "loss": 5.4334, - "step": 40270 - }, - { - "epoch": 21.001825293350716, - "grad_norm": 1.6125102043151855, - "learning_rate": 6.004321608040201e-05, - "loss": 5.0635, - "step": 40271 - }, - { - "epoch": 21.002346805736636, - "grad_norm": 1.5816333293914795, - "learning_rate": 6.004221105527639e-05, - "loss": 5.1659, - "step": 40272 - }, - { - "epoch": 21.002868318122555, - "grad_norm": 1.4574201107025146, - "learning_rate": 6.0041206030150755e-05, - "loss": 5.4722, - "step": 40273 - }, - { - "epoch": 21.003389830508475, - "grad_norm": 1.635692834854126, - "learning_rate": 6.004020100502513e-05, - "loss": 5.3357, - "step": 40274 - }, - { - "epoch": 21.003911342894394, - "grad_norm": 1.554926872253418, - "learning_rate": 6.00391959798995e-05, - "loss": 4.4909, - "step": 40275 - }, - { - "epoch": 21.004432855280314, - "grad_norm": 1.556113839149475, - "learning_rate": 6.0038190954773874e-05, - "loss": 5.5364, - "step": 40276 - }, - { - "epoch": 21.004954367666233, - "grad_norm": 1.5638800859451294, - "learning_rate": 6.0037185929648246e-05, - "loss": 5.4427, - "step": 40277 - }, - { - "epoch": 21.005475880052153, - "grad_norm": 1.4955930709838867, - "learning_rate": 6.003618090452262e-05, - "loss": 5.5536, - "step": 40278 - }, - { - "epoch": 21.005997392438072, - "grad_norm": 1.482633352279663, - "learning_rate": 6.003517587939699e-05, - "loss": 5.4127, - "step": 40279 - }, - { - "epoch": 21.006518904823988, - "grad_norm": 1.4531344175338745, - "learning_rate": 6.003417085427135e-05, - "loss": 5.7888, - "step": 40280 - }, - { - "epoch": 21.007040417209907, - "grad_norm": 1.6312295198440552, - "learning_rate": 6.003316582914573e-05, - "loss": 4.6033, - "step": 40281 - }, - { - "epoch": 21.007561929595827, - "grad_norm": 1.5481932163238525, - "learning_rate": 6.00321608040201e-05, - "loss": 5.3524, - "step": 40282 - }, - { - "epoch": 21.008083441981746, - "grad_norm": 1.5337562561035156, - "learning_rate": 6.003115577889448e-05, - "loss": 5.1111, - "step": 40283 - }, - { - "epoch": 21.008604954367666, - "grad_norm": 1.5550724267959595, - "learning_rate": 6.003015075376884e-05, - "loss": 5.1641, - "step": 40284 - }, - { - "epoch": 21.009126466753585, - "grad_norm": 1.4227324724197388, - "learning_rate": 6.002914572864322e-05, - "loss": 5.7504, - "step": 40285 - }, - { - "epoch": 21.009647979139505, - "grad_norm": 1.5139414072036743, - "learning_rate": 6.0028140703517585e-05, - "loss": 5.7246, - "step": 40286 - }, - { - "epoch": 21.010169491525424, - "grad_norm": 1.5168286561965942, - "learning_rate": 6.002713567839196e-05, - "loss": 5.5099, - "step": 40287 - }, - { - "epoch": 21.010691003911344, - "grad_norm": 1.5149667263031006, - "learning_rate": 6.0026130653266334e-05, - "loss": 5.1914, - "step": 40288 - }, - { - "epoch": 21.011212516297263, - "grad_norm": 1.5573232173919678, - "learning_rate": 6.002512562814071e-05, - "loss": 4.9893, - "step": 40289 - }, - { - "epoch": 21.011734028683183, - "grad_norm": 1.5841418504714966, - "learning_rate": 6.0024120603015076e-05, - "loss": 5.3715, - "step": 40290 - }, - { - "epoch": 21.012255541069102, - "grad_norm": 1.4941086769104004, - "learning_rate": 6.0023115577889454e-05, - "loss": 5.5093, - "step": 40291 - }, - { - "epoch": 21.012777053455018, - "grad_norm": 1.5442582368850708, - "learning_rate": 6.0022110552763825e-05, - "loss": 5.2348, - "step": 40292 - }, - { - "epoch": 21.013298565840937, - "grad_norm": 1.6026268005371094, - "learning_rate": 6.002110552763819e-05, - "loss": 5.1517, - "step": 40293 - }, - { - "epoch": 21.013820078226857, - "grad_norm": 1.547715663909912, - "learning_rate": 6.002010050251257e-05, - "loss": 5.3268, - "step": 40294 - }, - { - "epoch": 21.014341590612776, - "grad_norm": 1.587480068206787, - "learning_rate": 6.001909547738693e-05, - "loss": 5.2388, - "step": 40295 - }, - { - "epoch": 21.014863102998696, - "grad_norm": 1.482025146484375, - "learning_rate": 6.001809045226131e-05, - "loss": 5.2134, - "step": 40296 - }, - { - "epoch": 21.015384615384615, - "grad_norm": 1.5048632621765137, - "learning_rate": 6.001708542713568e-05, - "loss": 5.4017, - "step": 40297 - }, - { - "epoch": 21.015906127770535, - "grad_norm": 1.4276080131530762, - "learning_rate": 6.001608040201006e-05, - "loss": 5.334, - "step": 40298 - }, - { - "epoch": 21.016427640156454, - "grad_norm": 1.504310131072998, - "learning_rate": 6.001507537688442e-05, - "loss": 5.4576, - "step": 40299 - }, - { - "epoch": 21.016949152542374, - "grad_norm": 1.5477471351623535, - "learning_rate": 6.00140703517588e-05, - "loss": 5.3124, - "step": 40300 - }, - { - "epoch": 21.017470664928293, - "grad_norm": 1.5822176933288574, - "learning_rate": 6.0013065326633164e-05, - "loss": 5.3247, - "step": 40301 - }, - { - "epoch": 21.017992177314213, - "grad_norm": 1.561895728111267, - "learning_rate": 6.001206030150754e-05, - "loss": 5.1954, - "step": 40302 - }, - { - "epoch": 21.018513689700132, - "grad_norm": 1.594109296798706, - "learning_rate": 6.001105527638191e-05, - "loss": 5.2637, - "step": 40303 - }, - { - "epoch": 21.019035202086048, - "grad_norm": 1.5589921474456787, - "learning_rate": 6.001005025125629e-05, - "loss": 5.0376, - "step": 40304 - }, - { - "epoch": 21.019556714471967, - "grad_norm": 1.4877195358276367, - "learning_rate": 6.0009045226130655e-05, - "loss": 5.2987, - "step": 40305 - }, - { - "epoch": 21.020078226857887, - "grad_norm": 1.6553051471710205, - "learning_rate": 6.000804020100503e-05, - "loss": 4.8251, - "step": 40306 - }, - { - "epoch": 21.020599739243806, - "grad_norm": 1.6016215085983276, - "learning_rate": 6.00070351758794e-05, - "loss": 5.4391, - "step": 40307 - }, - { - "epoch": 21.021121251629726, - "grad_norm": 1.4515763521194458, - "learning_rate": 6.000603015075377e-05, - "loss": 5.2177, - "step": 40308 - }, - { - "epoch": 21.021642764015645, - "grad_norm": 1.7168132066726685, - "learning_rate": 6.0005025125628146e-05, - "loss": 4.6944, - "step": 40309 - }, - { - "epoch": 21.022164276401565, - "grad_norm": 1.453839659690857, - "learning_rate": 6.000402010050251e-05, - "loss": 4.7506, - "step": 40310 - }, - { - "epoch": 21.022685788787484, - "grad_norm": 1.4824360609054565, - "learning_rate": 6.000301507537689e-05, - "loss": 4.8908, - "step": 40311 - }, - { - "epoch": 21.023207301173404, - "grad_norm": 1.4710594415664673, - "learning_rate": 6.000201005025126e-05, - "loss": 5.3352, - "step": 40312 - }, - { - "epoch": 21.023728813559323, - "grad_norm": 1.490584135055542, - "learning_rate": 6.000100502512564e-05, - "loss": 4.9869, - "step": 40313 - }, - { - "epoch": 21.024250325945243, - "grad_norm": 1.477036714553833, - "learning_rate": 6e-05, - "loss": 5.0558, - "step": 40314 - }, - { - "epoch": 21.02477183833116, - "grad_norm": 1.438130259513855, - "learning_rate": 5.999899497487438e-05, - "loss": 5.3332, - "step": 40315 - }, - { - "epoch": 21.025293350717078, - "grad_norm": 1.4845020771026611, - "learning_rate": 5.9997989949748744e-05, - "loss": 5.0556, - "step": 40316 - }, - { - "epoch": 21.025814863102998, - "grad_norm": 1.4898744821548462, - "learning_rate": 5.999698492462312e-05, - "loss": 4.9486, - "step": 40317 - }, - { - "epoch": 21.026336375488917, - "grad_norm": 1.5747170448303223, - "learning_rate": 5.999597989949749e-05, - "loss": 4.8809, - "step": 40318 - }, - { - "epoch": 21.026857887874836, - "grad_norm": 1.4557090997695923, - "learning_rate": 5.999497487437187e-05, - "loss": 5.5055, - "step": 40319 - }, - { - "epoch": 21.027379400260756, - "grad_norm": 1.5053032636642456, - "learning_rate": 5.9993969849246235e-05, - "loss": 5.508, - "step": 40320 - }, - { - "epoch": 21.027900912646675, - "grad_norm": 1.5862587690353394, - "learning_rate": 5.99929648241206e-05, - "loss": 5.3276, - "step": 40321 - }, - { - "epoch": 21.028422425032595, - "grad_norm": 1.4656000137329102, - "learning_rate": 5.999195979899498e-05, - "loss": 5.8587, - "step": 40322 - }, - { - "epoch": 21.028943937418514, - "grad_norm": 1.5410146713256836, - "learning_rate": 5.999095477386935e-05, - "loss": 5.2779, - "step": 40323 - }, - { - "epoch": 21.029465449804434, - "grad_norm": 1.547837257385254, - "learning_rate": 5.9989949748743726e-05, - "loss": 5.1354, - "step": 40324 - }, - { - "epoch": 21.029986962190353, - "grad_norm": 1.4998126029968262, - "learning_rate": 5.998894472361809e-05, - "loss": 5.5004, - "step": 40325 - }, - { - "epoch": 21.030508474576273, - "grad_norm": 1.572510838508606, - "learning_rate": 5.998793969849247e-05, - "loss": 5.2196, - "step": 40326 - }, - { - "epoch": 21.03102998696219, - "grad_norm": 1.5417941808700562, - "learning_rate": 5.998693467336683e-05, - "loss": 5.5647, - "step": 40327 - }, - { - "epoch": 21.031551499348108, - "grad_norm": 1.535940170288086, - "learning_rate": 5.998592964824121e-05, - "loss": 5.064, - "step": 40328 - }, - { - "epoch": 21.032073011734028, - "grad_norm": 1.4961856603622437, - "learning_rate": 5.998492462311558e-05, - "loss": 5.323, - "step": 40329 - }, - { - "epoch": 21.032594524119947, - "grad_norm": 1.4591492414474487, - "learning_rate": 5.998391959798996e-05, - "loss": 5.6348, - "step": 40330 - }, - { - "epoch": 21.033116036505866, - "grad_norm": 1.413661241531372, - "learning_rate": 5.998291457286432e-05, - "loss": 5.5023, - "step": 40331 - }, - { - "epoch": 21.033637548891786, - "grad_norm": 1.5521583557128906, - "learning_rate": 5.99819095477387e-05, - "loss": 5.208, - "step": 40332 - }, - { - "epoch": 21.034159061277705, - "grad_norm": 1.5993579626083374, - "learning_rate": 5.9980904522613065e-05, - "loss": 5.2177, - "step": 40333 - }, - { - "epoch": 21.034680573663625, - "grad_norm": 1.5065300464630127, - "learning_rate": 5.9979899497487436e-05, - "loss": 5.1744, - "step": 40334 - }, - { - "epoch": 21.035202086049544, - "grad_norm": 1.5574893951416016, - "learning_rate": 5.9978894472361814e-05, - "loss": 4.7886, - "step": 40335 - }, - { - "epoch": 21.035723598435464, - "grad_norm": 1.5106351375579834, - "learning_rate": 5.997788944723618e-05, - "loss": 4.9583, - "step": 40336 - }, - { - "epoch": 21.036245110821383, - "grad_norm": 1.5090610980987549, - "learning_rate": 5.9976884422110556e-05, - "loss": 5.4181, - "step": 40337 - }, - { - "epoch": 21.036766623207303, - "grad_norm": 1.668508529663086, - "learning_rate": 5.997587939698493e-05, - "loss": 5.013, - "step": 40338 - }, - { - "epoch": 21.03728813559322, - "grad_norm": 1.5508103370666504, - "learning_rate": 5.9974874371859305e-05, - "loss": 4.9643, - "step": 40339 - }, - { - "epoch": 21.037809647979138, - "grad_norm": 1.6395087242126465, - "learning_rate": 5.997386934673367e-05, - "loss": 4.7375, - "step": 40340 - }, - { - "epoch": 21.038331160365058, - "grad_norm": 1.4180349111557007, - "learning_rate": 5.997286432160805e-05, - "loss": 4.5332, - "step": 40341 - }, - { - "epoch": 21.038852672750977, - "grad_norm": 1.4339677095413208, - "learning_rate": 5.997185929648241e-05, - "loss": 5.5386, - "step": 40342 - }, - { - "epoch": 21.039374185136897, - "grad_norm": 1.3930926322937012, - "learning_rate": 5.997085427135679e-05, - "loss": 5.6717, - "step": 40343 - }, - { - "epoch": 21.039895697522816, - "grad_norm": 1.4621506929397583, - "learning_rate": 5.996984924623116e-05, - "loss": 5.3347, - "step": 40344 - }, - { - "epoch": 21.040417209908735, - "grad_norm": 1.5797785520553589, - "learning_rate": 5.996884422110554e-05, - "loss": 5.4533, - "step": 40345 - }, - { - "epoch": 21.040938722294655, - "grad_norm": 1.4994438886642456, - "learning_rate": 5.99678391959799e-05, - "loss": 5.3379, - "step": 40346 - }, - { - "epoch": 21.041460234680574, - "grad_norm": 1.4864157438278198, - "learning_rate": 5.9966834170854267e-05, - "loss": 5.2781, - "step": 40347 - }, - { - "epoch": 21.041981747066494, - "grad_norm": 1.4713951349258423, - "learning_rate": 5.9965829145728644e-05, - "loss": 5.1767, - "step": 40348 - }, - { - "epoch": 21.042503259452413, - "grad_norm": 1.4549949169158936, - "learning_rate": 5.9964824120603015e-05, - "loss": 5.1135, - "step": 40349 - }, - { - "epoch": 21.043024771838333, - "grad_norm": 1.5170758962631226, - "learning_rate": 5.996381909547739e-05, - "loss": 5.5312, - "step": 40350 - }, - { - "epoch": 21.04354628422425, - "grad_norm": 1.5293550491333008, - "learning_rate": 5.996281407035176e-05, - "loss": 4.8149, - "step": 40351 - }, - { - "epoch": 21.044067796610168, - "grad_norm": 1.6020863056182861, - "learning_rate": 5.9961809045226135e-05, - "loss": 5.4051, - "step": 40352 - }, - { - "epoch": 21.044589308996088, - "grad_norm": 1.5399811267852783, - "learning_rate": 5.99608040201005e-05, - "loss": 5.0463, - "step": 40353 - }, - { - "epoch": 21.045110821382007, - "grad_norm": 1.4660260677337646, - "learning_rate": 5.995979899497488e-05, - "loss": 5.3122, - "step": 40354 - }, - { - "epoch": 21.045632333767927, - "grad_norm": 1.5565156936645508, - "learning_rate": 5.995879396984925e-05, - "loss": 4.9425, - "step": 40355 - }, - { - "epoch": 21.046153846153846, - "grad_norm": 1.5840297937393188, - "learning_rate": 5.9957788944723626e-05, - "loss": 5.2442, - "step": 40356 - }, - { - "epoch": 21.046675358539765, - "grad_norm": 1.4347575902938843, - "learning_rate": 5.995678391959799e-05, - "loss": 5.764, - "step": 40357 - }, - { - "epoch": 21.047196870925685, - "grad_norm": 1.4967448711395264, - "learning_rate": 5.995577889447237e-05, - "loss": 5.4049, - "step": 40358 - }, - { - "epoch": 21.047718383311604, - "grad_norm": 1.7114477157592773, - "learning_rate": 5.995477386934674e-05, - "loss": 5.2523, - "step": 40359 - }, - { - "epoch": 21.048239895697524, - "grad_norm": 1.5145765542984009, - "learning_rate": 5.9953768844221104e-05, - "loss": 5.3668, - "step": 40360 - }, - { - "epoch": 21.048761408083443, - "grad_norm": 1.6865406036376953, - "learning_rate": 5.995276381909548e-05, - "loss": 4.7117, - "step": 40361 - }, - { - "epoch": 21.049282920469363, - "grad_norm": 1.5630327463150024, - "learning_rate": 5.9951758793969846e-05, - "loss": 5.0823, - "step": 40362 - }, - { - "epoch": 21.04980443285528, - "grad_norm": 1.5027399063110352, - "learning_rate": 5.9950753768844224e-05, - "loss": 5.0845, - "step": 40363 - }, - { - "epoch": 21.050325945241198, - "grad_norm": 1.4975191354751587, - "learning_rate": 5.9949748743718595e-05, - "loss": 5.6971, - "step": 40364 - }, - { - "epoch": 21.050847457627118, - "grad_norm": 1.5459964275360107, - "learning_rate": 5.994874371859297e-05, - "loss": 5.1648, - "step": 40365 - }, - { - "epoch": 21.051368970013037, - "grad_norm": 1.6261237859725952, - "learning_rate": 5.994773869346734e-05, - "loss": 5.1223, - "step": 40366 - }, - { - "epoch": 21.051890482398957, - "grad_norm": 1.5345220565795898, - "learning_rate": 5.9946733668341715e-05, - "loss": 5.3707, - "step": 40367 - }, - { - "epoch": 21.052411994784876, - "grad_norm": 1.645136833190918, - "learning_rate": 5.994572864321608e-05, - "loss": 4.6298, - "step": 40368 - }, - { - "epoch": 21.052933507170795, - "grad_norm": 1.6052868366241455, - "learning_rate": 5.994472361809046e-05, - "loss": 4.5733, - "step": 40369 - }, - { - "epoch": 21.053455019556715, - "grad_norm": 1.4566707611083984, - "learning_rate": 5.994371859296483e-05, - "loss": 5.5273, - "step": 40370 - }, - { - "epoch": 21.053976531942634, - "grad_norm": 1.6116071939468384, - "learning_rate": 5.9942713567839206e-05, - "loss": 4.9594, - "step": 40371 - }, - { - "epoch": 21.054498044328554, - "grad_norm": 1.4607092142105103, - "learning_rate": 5.994170854271357e-05, - "loss": 5.3639, - "step": 40372 - }, - { - "epoch": 21.055019556714473, - "grad_norm": 1.4738078117370605, - "learning_rate": 5.9940703517587934e-05, - "loss": 5.3527, - "step": 40373 - }, - { - "epoch": 21.055541069100393, - "grad_norm": 1.5473918914794922, - "learning_rate": 5.993969849246231e-05, - "loss": 5.5514, - "step": 40374 - }, - { - "epoch": 21.05606258148631, - "grad_norm": 1.5012526512145996, - "learning_rate": 5.993869346733668e-05, - "loss": 5.1012, - "step": 40375 - }, - { - "epoch": 21.056584093872228, - "grad_norm": 1.4310847520828247, - "learning_rate": 5.993768844221106e-05, - "loss": 5.6591, - "step": 40376 - }, - { - "epoch": 21.057105606258148, - "grad_norm": 1.6541026830673218, - "learning_rate": 5.9936683417085425e-05, - "loss": 5.0254, - "step": 40377 - }, - { - "epoch": 21.057627118644067, - "grad_norm": 1.4835400581359863, - "learning_rate": 5.99356783919598e-05, - "loss": 5.4378, - "step": 40378 - }, - { - "epoch": 21.058148631029987, - "grad_norm": 1.5869765281677246, - "learning_rate": 5.9934673366834174e-05, - "loss": 5.2637, - "step": 40379 - }, - { - "epoch": 21.058670143415906, - "grad_norm": 1.5224345922470093, - "learning_rate": 5.993366834170855e-05, - "loss": 5.3463, - "step": 40380 - }, - { - "epoch": 21.059191655801826, - "grad_norm": 1.5414509773254395, - "learning_rate": 5.9932663316582916e-05, - "loss": 4.9937, - "step": 40381 - }, - { - "epoch": 21.059713168187745, - "grad_norm": 1.5173695087432861, - "learning_rate": 5.9931658291457294e-05, - "loss": 5.2013, - "step": 40382 - }, - { - "epoch": 21.060234680573664, - "grad_norm": 1.549431324005127, - "learning_rate": 5.993065326633166e-05, - "loss": 4.7318, - "step": 40383 - }, - { - "epoch": 21.060756192959584, - "grad_norm": 1.4791259765625, - "learning_rate": 5.9929648241206036e-05, - "loss": 5.2644, - "step": 40384 - }, - { - "epoch": 21.061277705345503, - "grad_norm": 1.5860217809677124, - "learning_rate": 5.992864321608041e-05, - "loss": 5.5841, - "step": 40385 - }, - { - "epoch": 21.061799217731423, - "grad_norm": 1.5154565572738647, - "learning_rate": 5.992763819095477e-05, - "loss": 5.2084, - "step": 40386 - }, - { - "epoch": 21.06232073011734, - "grad_norm": 1.5342824459075928, - "learning_rate": 5.992663316582915e-05, - "loss": 4.9991, - "step": 40387 - }, - { - "epoch": 21.062842242503258, - "grad_norm": 1.4464399814605713, - "learning_rate": 5.9925628140703514e-05, - "loss": 5.5001, - "step": 40388 - }, - { - "epoch": 21.063363754889178, - "grad_norm": 1.519986867904663, - "learning_rate": 5.992462311557789e-05, - "loss": 4.7058, - "step": 40389 - }, - { - "epoch": 21.063885267275097, - "grad_norm": 1.4727811813354492, - "learning_rate": 5.992361809045226e-05, - "loss": 5.2539, - "step": 40390 - }, - { - "epoch": 21.064406779661017, - "grad_norm": 1.5164682865142822, - "learning_rate": 5.992261306532664e-05, - "loss": 5.1668, - "step": 40391 - }, - { - "epoch": 21.064928292046936, - "grad_norm": 1.5629678964614868, - "learning_rate": 5.9921608040201005e-05, - "loss": 5.4169, - "step": 40392 - }, - { - "epoch": 21.065449804432856, - "grad_norm": 1.4999115467071533, - "learning_rate": 5.992060301507538e-05, - "loss": 5.0804, - "step": 40393 - }, - { - "epoch": 21.065971316818775, - "grad_norm": 1.4797821044921875, - "learning_rate": 5.991959798994975e-05, - "loss": 5.5736, - "step": 40394 - }, - { - "epoch": 21.066492829204694, - "grad_norm": 1.550120234489441, - "learning_rate": 5.9918592964824124e-05, - "loss": 5.4076, - "step": 40395 - }, - { - "epoch": 21.067014341590614, - "grad_norm": 1.390349268913269, - "learning_rate": 5.9917587939698496e-05, - "loss": 5.3583, - "step": 40396 - }, - { - "epoch": 21.067535853976533, - "grad_norm": 1.5997527837753296, - "learning_rate": 5.991658291457287e-05, - "loss": 5.1002, - "step": 40397 - }, - { - "epoch": 21.068057366362453, - "grad_norm": 1.5309362411499023, - "learning_rate": 5.991557788944724e-05, - "loss": 5.4891, - "step": 40398 - }, - { - "epoch": 21.06857887874837, - "grad_norm": 1.5490130186080933, - "learning_rate": 5.9914572864321615e-05, - "loss": 5.1167, - "step": 40399 - }, - { - "epoch": 21.06910039113429, - "grad_norm": 1.6302458047866821, - "learning_rate": 5.9913567839195987e-05, - "loss": 5.3459, - "step": 40400 - }, - { - "epoch": 21.069621903520208, - "grad_norm": 1.5515536069869995, - "learning_rate": 5.991256281407035e-05, - "loss": 5.4387, - "step": 40401 - }, - { - "epoch": 21.070143415906127, - "grad_norm": 1.499293565750122, - "learning_rate": 5.991155778894473e-05, - "loss": 5.2539, - "step": 40402 - }, - { - "epoch": 21.070664928292047, - "grad_norm": 1.5099488496780396, - "learning_rate": 5.991055276381909e-05, - "loss": 5.2149, - "step": 40403 - }, - { - "epoch": 21.071186440677966, - "grad_norm": 1.4873926639556885, - "learning_rate": 5.990954773869347e-05, - "loss": 5.1351, - "step": 40404 - }, - { - "epoch": 21.071707953063886, - "grad_norm": 1.5114779472351074, - "learning_rate": 5.990854271356784e-05, - "loss": 5.2743, - "step": 40405 - }, - { - "epoch": 21.072229465449805, - "grad_norm": 1.443228840827942, - "learning_rate": 5.990753768844222e-05, - "loss": 5.7163, - "step": 40406 - }, - { - "epoch": 21.072750977835724, - "grad_norm": 1.435282826423645, - "learning_rate": 5.9906532663316584e-05, - "loss": 5.6784, - "step": 40407 - }, - { - "epoch": 21.073272490221644, - "grad_norm": 1.4752062559127808, - "learning_rate": 5.990552763819096e-05, - "loss": 5.2758, - "step": 40408 - }, - { - "epoch": 21.073794002607563, - "grad_norm": 1.5468318462371826, - "learning_rate": 5.9904522613065326e-05, - "loss": 5.4254, - "step": 40409 - }, - { - "epoch": 21.07431551499348, - "grad_norm": 1.4777799844741821, - "learning_rate": 5.9903517587939704e-05, - "loss": 5.0548, - "step": 40410 - }, - { - "epoch": 21.0748370273794, - "grad_norm": 1.5729069709777832, - "learning_rate": 5.9902512562814075e-05, - "loss": 5.4661, - "step": 40411 - }, - { - "epoch": 21.07535853976532, - "grad_norm": 1.5045760869979858, - "learning_rate": 5.990150753768845e-05, - "loss": 5.2886, - "step": 40412 - }, - { - "epoch": 21.075880052151238, - "grad_norm": 1.5258417129516602, - "learning_rate": 5.990050251256282e-05, - "loss": 5.3664, - "step": 40413 - }, - { - "epoch": 21.076401564537157, - "grad_norm": 1.539175271987915, - "learning_rate": 5.989949748743718e-05, - "loss": 5.2761, - "step": 40414 - }, - { - "epoch": 21.076923076923077, - "grad_norm": 1.436463475227356, - "learning_rate": 5.989849246231156e-05, - "loss": 5.2673, - "step": 40415 - }, - { - "epoch": 21.077444589308996, - "grad_norm": 1.5854564905166626, - "learning_rate": 5.989748743718593e-05, - "loss": 5.0315, - "step": 40416 - }, - { - "epoch": 21.077966101694916, - "grad_norm": 1.5790973901748657, - "learning_rate": 5.989648241206031e-05, - "loss": 5.1532, - "step": 40417 - }, - { - "epoch": 21.078487614080835, - "grad_norm": 1.5370365381240845, - "learning_rate": 5.989547738693467e-05, - "loss": 5.1437, - "step": 40418 - }, - { - "epoch": 21.079009126466755, - "grad_norm": 1.5858910083770752, - "learning_rate": 5.989447236180905e-05, - "loss": 5.1078, - "step": 40419 - }, - { - "epoch": 21.079530638852674, - "grad_norm": 1.5030497312545776, - "learning_rate": 5.9893467336683414e-05, - "loss": 5.1256, - "step": 40420 - }, - { - "epoch": 21.080052151238593, - "grad_norm": 1.722938060760498, - "learning_rate": 5.989246231155779e-05, - "loss": 5.1341, - "step": 40421 - }, - { - "epoch": 21.08057366362451, - "grad_norm": 1.5431077480316162, - "learning_rate": 5.989145728643216e-05, - "loss": 5.3995, - "step": 40422 - }, - { - "epoch": 21.08109517601043, - "grad_norm": 1.5121657848358154, - "learning_rate": 5.989045226130654e-05, - "loss": 5.4958, - "step": 40423 - }, - { - "epoch": 21.08161668839635, - "grad_norm": 1.6048474311828613, - "learning_rate": 5.9889447236180905e-05, - "loss": 5.5061, - "step": 40424 - }, - { - "epoch": 21.082138200782268, - "grad_norm": 1.537729024887085, - "learning_rate": 5.988844221105528e-05, - "loss": 4.9886, - "step": 40425 - }, - { - "epoch": 21.082659713168187, - "grad_norm": 1.6699798107147217, - "learning_rate": 5.9887437185929654e-05, - "loss": 5.0797, - "step": 40426 - }, - { - "epoch": 21.083181225554107, - "grad_norm": 1.454174280166626, - "learning_rate": 5.988643216080402e-05, - "loss": 5.0666, - "step": 40427 - }, - { - "epoch": 21.083702737940026, - "grad_norm": 1.600732684135437, - "learning_rate": 5.9885427135678396e-05, - "loss": 5.1711, - "step": 40428 - }, - { - "epoch": 21.084224250325946, - "grad_norm": 1.4605021476745605, - "learning_rate": 5.988442211055276e-05, - "loss": 5.4381, - "step": 40429 - }, - { - "epoch": 21.084745762711865, - "grad_norm": 1.3942335844039917, - "learning_rate": 5.988341708542714e-05, - "loss": 5.7274, - "step": 40430 - }, - { - "epoch": 21.085267275097785, - "grad_norm": 1.6235052347183228, - "learning_rate": 5.988241206030151e-05, - "loss": 4.9994, - "step": 40431 - }, - { - "epoch": 21.085788787483704, - "grad_norm": 1.50924551486969, - "learning_rate": 5.988140703517589e-05, - "loss": 5.5725, - "step": 40432 - }, - { - "epoch": 21.086310299869623, - "grad_norm": 1.4373935461044312, - "learning_rate": 5.988040201005025e-05, - "loss": 5.3389, - "step": 40433 - }, - { - "epoch": 21.08683181225554, - "grad_norm": 1.537576675415039, - "learning_rate": 5.987939698492463e-05, - "loss": 4.7413, - "step": 40434 - }, - { - "epoch": 21.08735332464146, - "grad_norm": 1.4725618362426758, - "learning_rate": 5.9878391959798994e-05, - "loss": 5.555, - "step": 40435 - }, - { - "epoch": 21.08787483702738, - "grad_norm": 1.5339632034301758, - "learning_rate": 5.987738693467337e-05, - "loss": 5.0229, - "step": 40436 - }, - { - "epoch": 21.088396349413298, - "grad_norm": 1.5626211166381836, - "learning_rate": 5.987638190954774e-05, - "loss": 5.4258, - "step": 40437 - }, - { - "epoch": 21.088917861799217, - "grad_norm": 1.5049450397491455, - "learning_rate": 5.987537688442212e-05, - "loss": 5.2395, - "step": 40438 - }, - { - "epoch": 21.089439374185137, - "grad_norm": 1.7836427688598633, - "learning_rate": 5.9874371859296485e-05, - "loss": 4.8023, - "step": 40439 - }, - { - "epoch": 21.089960886571056, - "grad_norm": 1.5529954433441162, - "learning_rate": 5.987336683417085e-05, - "loss": 5.3701, - "step": 40440 - }, - { - "epoch": 21.090482398956976, - "grad_norm": 1.4089865684509277, - "learning_rate": 5.987236180904523e-05, - "loss": 5.3612, - "step": 40441 - }, - { - "epoch": 21.091003911342895, - "grad_norm": 1.4377624988555908, - "learning_rate": 5.98713567839196e-05, - "loss": 5.4074, - "step": 40442 - }, - { - "epoch": 21.091525423728815, - "grad_norm": 1.4648789167404175, - "learning_rate": 5.9870351758793976e-05, - "loss": 5.3629, - "step": 40443 - }, - { - "epoch": 21.092046936114734, - "grad_norm": 1.5598760843276978, - "learning_rate": 5.986934673366834e-05, - "loss": 5.3171, - "step": 40444 - }, - { - "epoch": 21.092568448500653, - "grad_norm": 1.4527603387832642, - "learning_rate": 5.986834170854272e-05, - "loss": 5.2719, - "step": 40445 - }, - { - "epoch": 21.09308996088657, - "grad_norm": 1.5831148624420166, - "learning_rate": 5.986733668341709e-05, - "loss": 5.2809, - "step": 40446 - }, - { - "epoch": 21.09361147327249, - "grad_norm": 1.6403896808624268, - "learning_rate": 5.9866331658291467e-05, - "loss": 4.6864, - "step": 40447 - }, - { - "epoch": 21.09413298565841, - "grad_norm": 1.5236560106277466, - "learning_rate": 5.986532663316583e-05, - "loss": 5.6539, - "step": 40448 - }, - { - "epoch": 21.094654498044328, - "grad_norm": 1.5617449283599854, - "learning_rate": 5.986432160804021e-05, - "loss": 5.5145, - "step": 40449 - }, - { - "epoch": 21.095176010430247, - "grad_norm": 1.4581414461135864, - "learning_rate": 5.986331658291457e-05, - "loss": 5.2671, - "step": 40450 - }, - { - "epoch": 21.095697522816167, - "grad_norm": 1.586154580116272, - "learning_rate": 5.986231155778895e-05, - "loss": 5.3387, - "step": 40451 - }, - { - "epoch": 21.096219035202086, - "grad_norm": 1.5706770420074463, - "learning_rate": 5.986130653266332e-05, - "loss": 5.0477, - "step": 40452 - }, - { - "epoch": 21.096740547588006, - "grad_norm": 1.4694738388061523, - "learning_rate": 5.9860301507537686e-05, - "loss": 5.4925, - "step": 40453 - }, - { - "epoch": 21.097262059973925, - "grad_norm": 1.4194875955581665, - "learning_rate": 5.9859296482412064e-05, - "loss": 4.8194, - "step": 40454 - }, - { - "epoch": 21.097783572359845, - "grad_norm": 1.522809386253357, - "learning_rate": 5.985829145728643e-05, - "loss": 5.121, - "step": 40455 - }, - { - "epoch": 21.098305084745764, - "grad_norm": 1.4670060873031616, - "learning_rate": 5.9857286432160806e-05, - "loss": 5.4332, - "step": 40456 - }, - { - "epoch": 21.098826597131684, - "grad_norm": 1.4306670427322388, - "learning_rate": 5.985628140703518e-05, - "loss": 5.2564, - "step": 40457 - }, - { - "epoch": 21.0993481095176, - "grad_norm": 1.5054138898849487, - "learning_rate": 5.9855276381909555e-05, - "loss": 5.3078, - "step": 40458 - }, - { - "epoch": 21.09986962190352, - "grad_norm": 1.5119010210037231, - "learning_rate": 5.985427135678392e-05, - "loss": 5.1843, - "step": 40459 - }, - { - "epoch": 21.10039113428944, - "grad_norm": 1.596413016319275, - "learning_rate": 5.98532663316583e-05, - "loss": 4.879, - "step": 40460 - }, - { - "epoch": 21.100912646675358, - "grad_norm": 1.6137243509292603, - "learning_rate": 5.985226130653266e-05, - "loss": 5.172, - "step": 40461 - }, - { - "epoch": 21.101434159061277, - "grad_norm": 1.4975477457046509, - "learning_rate": 5.985125628140704e-05, - "loss": 5.2918, - "step": 40462 - }, - { - "epoch": 21.101955671447197, - "grad_norm": 1.5288376808166504, - "learning_rate": 5.985025125628141e-05, - "loss": 5.2361, - "step": 40463 - }, - { - "epoch": 21.102477183833116, - "grad_norm": 1.6010289192199707, - "learning_rate": 5.984924623115579e-05, - "loss": 5.0082, - "step": 40464 - }, - { - "epoch": 21.102998696219036, - "grad_norm": 1.6673272848129272, - "learning_rate": 5.984824120603015e-05, - "loss": 5.4964, - "step": 40465 - }, - { - "epoch": 21.103520208604955, - "grad_norm": 1.6450858116149902, - "learning_rate": 5.984723618090452e-05, - "loss": 5.022, - "step": 40466 - }, - { - "epoch": 21.104041720990875, - "grad_norm": 1.566383719444275, - "learning_rate": 5.98462311557789e-05, - "loss": 5.6457, - "step": 40467 - }, - { - "epoch": 21.104563233376794, - "grad_norm": 1.6621265411376953, - "learning_rate": 5.9845226130653265e-05, - "loss": 5.1344, - "step": 40468 - }, - { - "epoch": 21.105084745762714, - "grad_norm": 1.4125103950500488, - "learning_rate": 5.984422110552764e-05, - "loss": 5.1382, - "step": 40469 - }, - { - "epoch": 21.10560625814863, - "grad_norm": 1.476352572441101, - "learning_rate": 5.984321608040201e-05, - "loss": 5.0588, - "step": 40470 - }, - { - "epoch": 21.10612777053455, - "grad_norm": 1.5042837858200073, - "learning_rate": 5.9842211055276385e-05, - "loss": 5.4934, - "step": 40471 - }, - { - "epoch": 21.10664928292047, - "grad_norm": 1.451910376548767, - "learning_rate": 5.9841206030150756e-05, - "loss": 5.1945, - "step": 40472 - }, - { - "epoch": 21.107170795306388, - "grad_norm": 1.6424407958984375, - "learning_rate": 5.9840201005025134e-05, - "loss": 5.3011, - "step": 40473 - }, - { - "epoch": 21.107692307692307, - "grad_norm": 1.587241530418396, - "learning_rate": 5.98391959798995e-05, - "loss": 5.1846, - "step": 40474 - }, - { - "epoch": 21.108213820078227, - "grad_norm": 1.4831129312515259, - "learning_rate": 5.9838190954773876e-05, - "loss": 5.4498, - "step": 40475 - }, - { - "epoch": 21.108735332464146, - "grad_norm": 1.5142009258270264, - "learning_rate": 5.983718592964824e-05, - "loss": 5.1398, - "step": 40476 - }, - { - "epoch": 21.109256844850066, - "grad_norm": 1.4450006484985352, - "learning_rate": 5.983618090452262e-05, - "loss": 5.6326, - "step": 40477 - }, - { - "epoch": 21.109778357235985, - "grad_norm": 1.4935756921768188, - "learning_rate": 5.983517587939699e-05, - "loss": 5.6283, - "step": 40478 - }, - { - "epoch": 21.110299869621905, - "grad_norm": 1.6060832738876343, - "learning_rate": 5.9834170854271354e-05, - "loss": 5.2143, - "step": 40479 - }, - { - "epoch": 21.110821382007824, - "grad_norm": 1.5077277421951294, - "learning_rate": 5.983316582914573e-05, - "loss": 5.3397, - "step": 40480 - }, - { - "epoch": 21.111342894393744, - "grad_norm": 1.5558044910430908, - "learning_rate": 5.9832160804020096e-05, - "loss": 5.5193, - "step": 40481 - }, - { - "epoch": 21.11186440677966, - "grad_norm": 1.5218029022216797, - "learning_rate": 5.9831155778894474e-05, - "loss": 4.7618, - "step": 40482 - }, - { - "epoch": 21.11238591916558, - "grad_norm": 1.5532203912734985, - "learning_rate": 5.9830150753768845e-05, - "loss": 5.1237, - "step": 40483 - }, - { - "epoch": 21.1129074315515, - "grad_norm": 1.5586994886398315, - "learning_rate": 5.982914572864322e-05, - "loss": 5.4746, - "step": 40484 - }, - { - "epoch": 21.113428943937418, - "grad_norm": 1.4544874429702759, - "learning_rate": 5.982814070351759e-05, - "loss": 4.9114, - "step": 40485 - }, - { - "epoch": 21.113950456323337, - "grad_norm": 1.5348527431488037, - "learning_rate": 5.9827135678391965e-05, - "loss": 5.3531, - "step": 40486 - }, - { - "epoch": 21.114471968709257, - "grad_norm": 1.4761649370193481, - "learning_rate": 5.982613065326633e-05, - "loss": 5.8766, - "step": 40487 - }, - { - "epoch": 21.114993481095176, - "grad_norm": 1.4431233406066895, - "learning_rate": 5.982512562814071e-05, - "loss": 5.8608, - "step": 40488 - }, - { - "epoch": 21.115514993481096, - "grad_norm": 1.4149786233901978, - "learning_rate": 5.982412060301508e-05, - "loss": 4.7492, - "step": 40489 - }, - { - "epoch": 21.116036505867015, - "grad_norm": 1.5748789310455322, - "learning_rate": 5.9823115577889456e-05, - "loss": 4.7643, - "step": 40490 - }, - { - "epoch": 21.116558018252935, - "grad_norm": 1.5708955526351929, - "learning_rate": 5.982211055276382e-05, - "loss": 5.1629, - "step": 40491 - }, - { - "epoch": 21.117079530638854, - "grad_norm": 1.4050750732421875, - "learning_rate": 5.98211055276382e-05, - "loss": 5.7844, - "step": 40492 - }, - { - "epoch": 21.117601043024774, - "grad_norm": 1.5531952381134033, - "learning_rate": 5.982010050251257e-05, - "loss": 4.6696, - "step": 40493 - }, - { - "epoch": 21.11812255541069, - "grad_norm": 1.5518747568130493, - "learning_rate": 5.981909547738693e-05, - "loss": 5.2633, - "step": 40494 - }, - { - "epoch": 21.11864406779661, - "grad_norm": 1.50668466091156, - "learning_rate": 5.981809045226131e-05, - "loss": 5.5256, - "step": 40495 - }, - { - "epoch": 21.11916558018253, - "grad_norm": 1.5196616649627686, - "learning_rate": 5.9817085427135675e-05, - "loss": 5.666, - "step": 40496 - }, - { - "epoch": 21.119687092568448, - "grad_norm": 1.5376895666122437, - "learning_rate": 5.981608040201005e-05, - "loss": 5.1744, - "step": 40497 - }, - { - "epoch": 21.120208604954367, - "grad_norm": 1.5256839990615845, - "learning_rate": 5.9815075376884424e-05, - "loss": 5.6403, - "step": 40498 - }, - { - "epoch": 21.120730117340287, - "grad_norm": 1.4935588836669922, - "learning_rate": 5.98140703517588e-05, - "loss": 5.5343, - "step": 40499 - }, - { - "epoch": 21.121251629726206, - "grad_norm": 1.5131702423095703, - "learning_rate": 5.9813065326633166e-05, - "loss": 5.5956, - "step": 40500 - }, - { - "epoch": 21.121773142112126, - "grad_norm": 1.5082676410675049, - "learning_rate": 5.9812060301507544e-05, - "loss": 4.5166, - "step": 40501 - }, - { - "epoch": 21.122294654498045, - "grad_norm": 1.4995970726013184, - "learning_rate": 5.981105527638191e-05, - "loss": 5.2458, - "step": 40502 - }, - { - "epoch": 21.122816166883965, - "grad_norm": 1.5533170700073242, - "learning_rate": 5.9810050251256286e-05, - "loss": 5.2451, - "step": 40503 - }, - { - "epoch": 21.123337679269884, - "grad_norm": 1.5492286682128906, - "learning_rate": 5.980904522613066e-05, - "loss": 5.0129, - "step": 40504 - }, - { - "epoch": 21.1238591916558, - "grad_norm": 1.49004065990448, - "learning_rate": 5.9808040201005035e-05, - "loss": 5.2184, - "step": 40505 - }, - { - "epoch": 21.12438070404172, - "grad_norm": 1.5308761596679688, - "learning_rate": 5.98070351758794e-05, - "loss": 5.4461, - "step": 40506 - }, - { - "epoch": 21.12490221642764, - "grad_norm": 1.4886794090270996, - "learning_rate": 5.9806030150753764e-05, - "loss": 4.7275, - "step": 40507 - }, - { - "epoch": 21.12542372881356, - "grad_norm": 1.5160338878631592, - "learning_rate": 5.980502512562814e-05, - "loss": 5.4562, - "step": 40508 - }, - { - "epoch": 21.125945241199478, - "grad_norm": 1.5570695400238037, - "learning_rate": 5.980402010050251e-05, - "loss": 5.5712, - "step": 40509 - }, - { - "epoch": 21.126466753585397, - "grad_norm": 1.5468798875808716, - "learning_rate": 5.980301507537689e-05, - "loss": 5.1783, - "step": 40510 - }, - { - "epoch": 21.126988265971317, - "grad_norm": 1.6814110279083252, - "learning_rate": 5.9802010050251255e-05, - "loss": 5.2683, - "step": 40511 - }, - { - "epoch": 21.127509778357236, - "grad_norm": 1.6233142614364624, - "learning_rate": 5.980100502512563e-05, - "loss": 4.8768, - "step": 40512 - }, - { - "epoch": 21.128031290743156, - "grad_norm": 1.4384679794311523, - "learning_rate": 5.9800000000000003e-05, - "loss": 5.3773, - "step": 40513 - }, - { - "epoch": 21.128552803129075, - "grad_norm": 1.5343406200408936, - "learning_rate": 5.979899497487438e-05, - "loss": 5.1732, - "step": 40514 - }, - { - "epoch": 21.129074315514995, - "grad_norm": 1.6052615642547607, - "learning_rate": 5.9797989949748746e-05, - "loss": 4.64, - "step": 40515 - }, - { - "epoch": 21.129595827900914, - "grad_norm": 1.455440878868103, - "learning_rate": 5.979698492462312e-05, - "loss": 5.7678, - "step": 40516 - }, - { - "epoch": 21.13011734028683, - "grad_norm": 1.512150764465332, - "learning_rate": 5.979597989949749e-05, - "loss": 5.1306, - "step": 40517 - }, - { - "epoch": 21.13063885267275, - "grad_norm": 1.604032039642334, - "learning_rate": 5.9794974874371865e-05, - "loss": 4.9549, - "step": 40518 - }, - { - "epoch": 21.13116036505867, - "grad_norm": 1.4354671239852905, - "learning_rate": 5.9793969849246237e-05, - "loss": 5.6652, - "step": 40519 - }, - { - "epoch": 21.13168187744459, - "grad_norm": 1.5312098264694214, - "learning_rate": 5.97929648241206e-05, - "loss": 4.7124, - "step": 40520 - }, - { - "epoch": 21.132203389830508, - "grad_norm": 1.5357425212860107, - "learning_rate": 5.979195979899498e-05, - "loss": 4.8568, - "step": 40521 - }, - { - "epoch": 21.132724902216427, - "grad_norm": 1.5877001285552979, - "learning_rate": 5.979095477386934e-05, - "loss": 5.4052, - "step": 40522 - }, - { - "epoch": 21.133246414602347, - "grad_norm": 1.447835087776184, - "learning_rate": 5.978994974874372e-05, - "loss": 5.7247, - "step": 40523 - }, - { - "epoch": 21.133767926988266, - "grad_norm": 1.463950753211975, - "learning_rate": 5.978894472361809e-05, - "loss": 5.0692, - "step": 40524 - }, - { - "epoch": 21.134289439374186, - "grad_norm": 1.4584983587265015, - "learning_rate": 5.978793969849247e-05, - "loss": 5.164, - "step": 40525 - }, - { - "epoch": 21.134810951760105, - "grad_norm": 1.4880266189575195, - "learning_rate": 5.9786934673366834e-05, - "loss": 5.4359, - "step": 40526 - }, - { - "epoch": 21.135332464146025, - "grad_norm": 1.5281604528427124, - "learning_rate": 5.978592964824121e-05, - "loss": 5.3851, - "step": 40527 - }, - { - "epoch": 21.135853976531944, - "grad_norm": 1.4921480417251587, - "learning_rate": 5.9784924623115576e-05, - "loss": 5.1255, - "step": 40528 - }, - { - "epoch": 21.13637548891786, - "grad_norm": 1.4789621829986572, - "learning_rate": 5.9783919597989954e-05, - "loss": 5.2672, - "step": 40529 - }, - { - "epoch": 21.13689700130378, - "grad_norm": 1.551680088043213, - "learning_rate": 5.9782914572864325e-05, - "loss": 5.704, - "step": 40530 - }, - { - "epoch": 21.1374185136897, - "grad_norm": 1.4833424091339111, - "learning_rate": 5.97819095477387e-05, - "loss": 4.8105, - "step": 40531 - }, - { - "epoch": 21.13794002607562, - "grad_norm": 1.5106178522109985, - "learning_rate": 5.978090452261307e-05, - "loss": 5.2242, - "step": 40532 - }, - { - "epoch": 21.138461538461538, - "grad_norm": 1.4758743047714233, - "learning_rate": 5.977989949748744e-05, - "loss": 5.1175, - "step": 40533 - }, - { - "epoch": 21.138983050847457, - "grad_norm": 1.6112853288650513, - "learning_rate": 5.9778894472361816e-05, - "loss": 5.3677, - "step": 40534 - }, - { - "epoch": 21.139504563233377, - "grad_norm": 1.6271826028823853, - "learning_rate": 5.977788944723618e-05, - "loss": 5.2227, - "step": 40535 - }, - { - "epoch": 21.140026075619296, - "grad_norm": 1.5717853307724, - "learning_rate": 5.977688442211056e-05, - "loss": 5.2404, - "step": 40536 - }, - { - "epoch": 21.140547588005216, - "grad_norm": 1.5730926990509033, - "learning_rate": 5.977587939698492e-05, - "loss": 5.2007, - "step": 40537 - }, - { - "epoch": 21.141069100391135, - "grad_norm": 1.4699945449829102, - "learning_rate": 5.97748743718593e-05, - "loss": 5.3718, - "step": 40538 - }, - { - "epoch": 21.141590612777055, - "grad_norm": 1.454193353652954, - "learning_rate": 5.977386934673367e-05, - "loss": 5.6283, - "step": 40539 - }, - { - "epoch": 21.142112125162974, - "grad_norm": 1.621497392654419, - "learning_rate": 5.977286432160805e-05, - "loss": 4.8496, - "step": 40540 - }, - { - "epoch": 21.14263363754889, - "grad_norm": 1.419335126876831, - "learning_rate": 5.977185929648241e-05, - "loss": 5.5454, - "step": 40541 - }, - { - "epoch": 21.14315514993481, - "grad_norm": 1.5860153436660767, - "learning_rate": 5.977085427135679e-05, - "loss": 5.375, - "step": 40542 - }, - { - "epoch": 21.14367666232073, - "grad_norm": 1.5255674123764038, - "learning_rate": 5.9769849246231155e-05, - "loss": 5.2427, - "step": 40543 - }, - { - "epoch": 21.14419817470665, - "grad_norm": 1.5108267068862915, - "learning_rate": 5.976884422110553e-05, - "loss": 4.6561, - "step": 40544 - }, - { - "epoch": 21.144719687092568, - "grad_norm": 1.5271291732788086, - "learning_rate": 5.9767839195979904e-05, - "loss": 5.6177, - "step": 40545 - }, - { - "epoch": 21.145241199478487, - "grad_norm": 1.4991004467010498, - "learning_rate": 5.976683417085427e-05, - "loss": 5.7441, - "step": 40546 - }, - { - "epoch": 21.145762711864407, - "grad_norm": 1.5990164279937744, - "learning_rate": 5.9765829145728646e-05, - "loss": 5.1927, - "step": 40547 - }, - { - "epoch": 21.146284224250326, - "grad_norm": 1.6530146598815918, - "learning_rate": 5.976482412060301e-05, - "loss": 4.975, - "step": 40548 - }, - { - "epoch": 21.146805736636246, - "grad_norm": 1.5724226236343384, - "learning_rate": 5.976381909547739e-05, - "loss": 5.4426, - "step": 40549 - }, - { - "epoch": 21.147327249022165, - "grad_norm": 1.5909117460250854, - "learning_rate": 5.976281407035176e-05, - "loss": 5.5623, - "step": 40550 - }, - { - "epoch": 21.147848761408085, - "grad_norm": 1.4992179870605469, - "learning_rate": 5.976180904522614e-05, - "loss": 5.6115, - "step": 40551 - }, - { - "epoch": 21.148370273794004, - "grad_norm": 1.6057852506637573, - "learning_rate": 5.97608040201005e-05, - "loss": 4.8385, - "step": 40552 - }, - { - "epoch": 21.14889178617992, - "grad_norm": 1.5973511934280396, - "learning_rate": 5.975979899497488e-05, - "loss": 5.0595, - "step": 40553 - }, - { - "epoch": 21.14941329856584, - "grad_norm": 1.4936127662658691, - "learning_rate": 5.975879396984925e-05, - "loss": 5.4076, - "step": 40554 - }, - { - "epoch": 21.14993481095176, - "grad_norm": 1.4421006441116333, - "learning_rate": 5.975778894472363e-05, - "loss": 5.6904, - "step": 40555 - }, - { - "epoch": 21.15045632333768, - "grad_norm": 1.5135644674301147, - "learning_rate": 5.975678391959799e-05, - "loss": 5.0872, - "step": 40556 - }, - { - "epoch": 21.150977835723598, - "grad_norm": 1.6418724060058594, - "learning_rate": 5.975577889447237e-05, - "loss": 4.8918, - "step": 40557 - }, - { - "epoch": 21.151499348109517, - "grad_norm": 1.429557204246521, - "learning_rate": 5.9754773869346735e-05, - "loss": 5.7187, - "step": 40558 - }, - { - "epoch": 21.152020860495437, - "grad_norm": 1.525814175605774, - "learning_rate": 5.9753768844221106e-05, - "loss": 5.2657, - "step": 40559 - }, - { - "epoch": 21.152542372881356, - "grad_norm": 1.494579553604126, - "learning_rate": 5.9752763819095484e-05, - "loss": 5.3456, - "step": 40560 - }, - { - "epoch": 21.153063885267276, - "grad_norm": 1.578065037727356, - "learning_rate": 5.975175879396985e-05, - "loss": 5.3397, - "step": 40561 - }, - { - "epoch": 21.153585397653195, - "grad_norm": 1.457777976989746, - "learning_rate": 5.9750753768844226e-05, - "loss": 5.3574, - "step": 40562 - }, - { - "epoch": 21.154106910039115, - "grad_norm": 1.5035675764083862, - "learning_rate": 5.974974874371859e-05, - "loss": 5.3374, - "step": 40563 - }, - { - "epoch": 21.154628422425034, - "grad_norm": 1.5819815397262573, - "learning_rate": 5.974874371859297e-05, - "loss": 5.4262, - "step": 40564 - }, - { - "epoch": 21.15514993481095, - "grad_norm": 1.4558929204940796, - "learning_rate": 5.974773869346734e-05, - "loss": 5.7661, - "step": 40565 - }, - { - "epoch": 21.15567144719687, - "grad_norm": 1.6401617527008057, - "learning_rate": 5.9746733668341717e-05, - "loss": 5.178, - "step": 40566 - }, - { - "epoch": 21.15619295958279, - "grad_norm": 1.592689871788025, - "learning_rate": 5.974572864321608e-05, - "loss": 5.3349, - "step": 40567 - }, - { - "epoch": 21.15671447196871, - "grad_norm": 1.5307620763778687, - "learning_rate": 5.974472361809046e-05, - "loss": 5.1594, - "step": 40568 - }, - { - "epoch": 21.157235984354628, - "grad_norm": 1.5390926599502563, - "learning_rate": 5.974371859296482e-05, - "loss": 5.2129, - "step": 40569 - }, - { - "epoch": 21.157757496740548, - "grad_norm": 1.5479118824005127, - "learning_rate": 5.97427135678392e-05, - "loss": 5.3018, - "step": 40570 - }, - { - "epoch": 21.158279009126467, - "grad_norm": 1.535560131072998, - "learning_rate": 5.974170854271357e-05, - "loss": 5.3338, - "step": 40571 - }, - { - "epoch": 21.158800521512386, - "grad_norm": 1.65035879611969, - "learning_rate": 5.9740703517587936e-05, - "loss": 5.0294, - "step": 40572 - }, - { - "epoch": 21.159322033898306, - "grad_norm": 1.452447533607483, - "learning_rate": 5.9739698492462314e-05, - "loss": 5.6015, - "step": 40573 - }, - { - "epoch": 21.159843546284225, - "grad_norm": 1.6139521598815918, - "learning_rate": 5.973869346733668e-05, - "loss": 5.1903, - "step": 40574 - }, - { - "epoch": 21.160365058670145, - "grad_norm": 1.4652280807495117, - "learning_rate": 5.9737688442211056e-05, - "loss": 5.1274, - "step": 40575 - }, - { - "epoch": 21.160886571056064, - "grad_norm": 1.5154614448547363, - "learning_rate": 5.973668341708543e-05, - "loss": 4.8997, - "step": 40576 - }, - { - "epoch": 21.16140808344198, - "grad_norm": 1.41560697555542, - "learning_rate": 5.9735678391959805e-05, - "loss": 5.4255, - "step": 40577 - }, - { - "epoch": 21.1619295958279, - "grad_norm": 1.496713638305664, - "learning_rate": 5.973467336683417e-05, - "loss": 5.6983, - "step": 40578 - }, - { - "epoch": 21.16245110821382, - "grad_norm": 1.449817180633545, - "learning_rate": 5.973366834170855e-05, - "loss": 5.304, - "step": 40579 - }, - { - "epoch": 21.16297262059974, - "grad_norm": 1.5906455516815186, - "learning_rate": 5.973266331658292e-05, - "loss": 5.1318, - "step": 40580 - }, - { - "epoch": 21.163494132985658, - "grad_norm": 1.5979536771774292, - "learning_rate": 5.9731658291457296e-05, - "loss": 5.091, - "step": 40581 - }, - { - "epoch": 21.164015645371578, - "grad_norm": 1.5023746490478516, - "learning_rate": 5.973065326633166e-05, - "loss": 4.8425, - "step": 40582 - }, - { - "epoch": 21.164537157757497, - "grad_norm": 1.5801231861114502, - "learning_rate": 5.972964824120604e-05, - "loss": 5.3994, - "step": 40583 - }, - { - "epoch": 21.165058670143416, - "grad_norm": 1.469386339187622, - "learning_rate": 5.97286432160804e-05, - "loss": 5.5612, - "step": 40584 - }, - { - "epoch": 21.165580182529336, - "grad_norm": 1.4849580526351929, - "learning_rate": 5.972763819095478e-05, - "loss": 5.3121, - "step": 40585 - }, - { - "epoch": 21.166101694915255, - "grad_norm": 1.4124133586883545, - "learning_rate": 5.972663316582915e-05, - "loss": 5.6649, - "step": 40586 - }, - { - "epoch": 21.166623207301175, - "grad_norm": 1.5104923248291016, - "learning_rate": 5.9725628140703515e-05, - "loss": 5.2213, - "step": 40587 - }, - { - "epoch": 21.167144719687094, - "grad_norm": 1.5204596519470215, - "learning_rate": 5.972462311557789e-05, - "loss": 5.4067, - "step": 40588 - }, - { - "epoch": 21.16766623207301, - "grad_norm": 1.5049691200256348, - "learning_rate": 5.972361809045226e-05, - "loss": 5.2132, - "step": 40589 - }, - { - "epoch": 21.16818774445893, - "grad_norm": 1.4714080095291138, - "learning_rate": 5.9722613065326635e-05, - "loss": 5.4587, - "step": 40590 - }, - { - "epoch": 21.16870925684485, - "grad_norm": 1.5509520769119263, - "learning_rate": 5.9721608040201006e-05, - "loss": 5.5615, - "step": 40591 - }, - { - "epoch": 21.16923076923077, - "grad_norm": 1.6020852327346802, - "learning_rate": 5.9720603015075384e-05, - "loss": 5.4443, - "step": 40592 - }, - { - "epoch": 21.169752281616688, - "grad_norm": 1.6457250118255615, - "learning_rate": 5.971959798994975e-05, - "loss": 4.787, - "step": 40593 - }, - { - "epoch": 21.170273794002608, - "grad_norm": 1.6122372150421143, - "learning_rate": 5.9718592964824126e-05, - "loss": 4.9118, - "step": 40594 - }, - { - "epoch": 21.170795306388527, - "grad_norm": 1.5317331552505493, - "learning_rate": 5.971758793969849e-05, - "loss": 5.3112, - "step": 40595 - }, - { - "epoch": 21.171316818774446, - "grad_norm": 1.6187509298324585, - "learning_rate": 5.971658291457287e-05, - "loss": 4.6214, - "step": 40596 - }, - { - "epoch": 21.171838331160366, - "grad_norm": 1.607183575630188, - "learning_rate": 5.971557788944724e-05, - "loss": 4.893, - "step": 40597 - }, - { - "epoch": 21.172359843546285, - "grad_norm": 1.5166726112365723, - "learning_rate": 5.971457286432162e-05, - "loss": 5.3135, - "step": 40598 - }, - { - "epoch": 21.172881355932205, - "grad_norm": 1.5715097188949585, - "learning_rate": 5.971356783919598e-05, - "loss": 5.2823, - "step": 40599 - }, - { - "epoch": 21.17340286831812, - "grad_norm": 1.5899711847305298, - "learning_rate": 5.971256281407035e-05, - "loss": 5.0503, - "step": 40600 - }, - { - "epoch": 21.17392438070404, - "grad_norm": 1.6250227689743042, - "learning_rate": 5.971155778894473e-05, - "loss": 4.9744, - "step": 40601 - }, - { - "epoch": 21.17444589308996, - "grad_norm": 1.425146460533142, - "learning_rate": 5.9710552763819095e-05, - "loss": 5.1954, - "step": 40602 - }, - { - "epoch": 21.17496740547588, - "grad_norm": 1.4517695903778076, - "learning_rate": 5.970954773869347e-05, - "loss": 5.1685, - "step": 40603 - }, - { - "epoch": 21.1754889178618, - "grad_norm": 1.5541102886199951, - "learning_rate": 5.970854271356784e-05, - "loss": 5.2239, - "step": 40604 - }, - { - "epoch": 21.176010430247718, - "grad_norm": 1.6134741306304932, - "learning_rate": 5.9707537688442215e-05, - "loss": 5.0682, - "step": 40605 - }, - { - "epoch": 21.176531942633638, - "grad_norm": 1.4248137474060059, - "learning_rate": 5.9706532663316586e-05, - "loss": 5.6403, - "step": 40606 - }, - { - "epoch": 21.177053455019557, - "grad_norm": 1.6588658094406128, - "learning_rate": 5.9705527638190964e-05, - "loss": 4.6825, - "step": 40607 - }, - { - "epoch": 21.177574967405477, - "grad_norm": 1.5338305234909058, - "learning_rate": 5.970452261306533e-05, - "loss": 5.3615, - "step": 40608 - }, - { - "epoch": 21.178096479791396, - "grad_norm": 1.6117602586746216, - "learning_rate": 5.9703517587939706e-05, - "loss": 5.0227, - "step": 40609 - }, - { - "epoch": 21.178617992177315, - "grad_norm": 1.490416407585144, - "learning_rate": 5.970251256281407e-05, - "loss": 4.9704, - "step": 40610 - }, - { - "epoch": 21.179139504563235, - "grad_norm": 1.5879480838775635, - "learning_rate": 5.970150753768845e-05, - "loss": 5.0938, - "step": 40611 - }, - { - "epoch": 21.17966101694915, - "grad_norm": 1.4804872274398804, - "learning_rate": 5.970050251256282e-05, - "loss": 5.3505, - "step": 40612 - }, - { - "epoch": 21.18018252933507, - "grad_norm": 1.462067723274231, - "learning_rate": 5.969949748743718e-05, - "loss": 5.4705, - "step": 40613 - }, - { - "epoch": 21.18070404172099, - "grad_norm": 1.5462214946746826, - "learning_rate": 5.969849246231156e-05, - "loss": 5.2449, - "step": 40614 - }, - { - "epoch": 21.18122555410691, - "grad_norm": 1.567888855934143, - "learning_rate": 5.9697487437185925e-05, - "loss": 5.6053, - "step": 40615 - }, - { - "epoch": 21.18174706649283, - "grad_norm": 1.5803216695785522, - "learning_rate": 5.96964824120603e-05, - "loss": 5.4036, - "step": 40616 - }, - { - "epoch": 21.182268578878748, - "grad_norm": 1.4339674711227417, - "learning_rate": 5.9695477386934674e-05, - "loss": 5.7128, - "step": 40617 - }, - { - "epoch": 21.182790091264668, - "grad_norm": 1.3847391605377197, - "learning_rate": 5.969447236180905e-05, - "loss": 5.4878, - "step": 40618 - }, - { - "epoch": 21.183311603650587, - "grad_norm": 1.6508862972259521, - "learning_rate": 5.9693467336683416e-05, - "loss": 4.5501, - "step": 40619 - }, - { - "epoch": 21.183833116036507, - "grad_norm": 1.540549874305725, - "learning_rate": 5.9692462311557794e-05, - "loss": 5.4801, - "step": 40620 - }, - { - "epoch": 21.184354628422426, - "grad_norm": 1.4584678411483765, - "learning_rate": 5.9691457286432165e-05, - "loss": 5.3343, - "step": 40621 - }, - { - "epoch": 21.184876140808345, - "grad_norm": 1.6501909494400024, - "learning_rate": 5.969045226130654e-05, - "loss": 4.759, - "step": 40622 - }, - { - "epoch": 21.185397653194265, - "grad_norm": 1.5307040214538574, - "learning_rate": 5.968944723618091e-05, - "loss": 5.3919, - "step": 40623 - }, - { - "epoch": 21.18591916558018, - "grad_norm": 1.576462984085083, - "learning_rate": 5.9688442211055285e-05, - "loss": 5.4487, - "step": 40624 - }, - { - "epoch": 21.1864406779661, - "grad_norm": 1.4717215299606323, - "learning_rate": 5.968743718592965e-05, - "loss": 5.4381, - "step": 40625 - }, - { - "epoch": 21.18696219035202, - "grad_norm": 1.44826078414917, - "learning_rate": 5.968643216080402e-05, - "loss": 5.453, - "step": 40626 - }, - { - "epoch": 21.18748370273794, - "grad_norm": 1.5031006336212158, - "learning_rate": 5.96854271356784e-05, - "loss": 5.5376, - "step": 40627 - }, - { - "epoch": 21.18800521512386, - "grad_norm": 1.656994104385376, - "learning_rate": 5.968442211055276e-05, - "loss": 5.1949, - "step": 40628 - }, - { - "epoch": 21.188526727509778, - "grad_norm": 1.4495015144348145, - "learning_rate": 5.968341708542714e-05, - "loss": 4.555, - "step": 40629 - }, - { - "epoch": 21.189048239895698, - "grad_norm": 1.5009620189666748, - "learning_rate": 5.9682412060301505e-05, - "loss": 5.3268, - "step": 40630 - }, - { - "epoch": 21.189569752281617, - "grad_norm": 1.581083059310913, - "learning_rate": 5.968140703517588e-05, - "loss": 5.1681, - "step": 40631 - }, - { - "epoch": 21.190091264667537, - "grad_norm": 1.4545484781265259, - "learning_rate": 5.9680402010050253e-05, - "loss": 5.9612, - "step": 40632 - }, - { - "epoch": 21.190612777053456, - "grad_norm": 1.434973955154419, - "learning_rate": 5.967939698492463e-05, - "loss": 5.623, - "step": 40633 - }, - { - "epoch": 21.191134289439375, - "grad_norm": 1.6205551624298096, - "learning_rate": 5.9678391959798996e-05, - "loss": 5.0967, - "step": 40634 - }, - { - "epoch": 21.191655801825295, - "grad_norm": 1.5244802236557007, - "learning_rate": 5.967738693467337e-05, - "loss": 4.9634, - "step": 40635 - }, - { - "epoch": 21.19217731421121, - "grad_norm": 1.5559725761413574, - "learning_rate": 5.967638190954774e-05, - "loss": 5.0815, - "step": 40636 - }, - { - "epoch": 21.19269882659713, - "grad_norm": 1.438287377357483, - "learning_rate": 5.9675376884422115e-05, - "loss": 5.0869, - "step": 40637 - }, - { - "epoch": 21.19322033898305, - "grad_norm": 1.5143187046051025, - "learning_rate": 5.9674371859296487e-05, - "loss": 4.9993, - "step": 40638 - }, - { - "epoch": 21.19374185136897, - "grad_norm": 1.6917345523834229, - "learning_rate": 5.967336683417085e-05, - "loss": 4.8606, - "step": 40639 - }, - { - "epoch": 21.19426336375489, - "grad_norm": 1.5854580402374268, - "learning_rate": 5.967236180904523e-05, - "loss": 5.0629, - "step": 40640 - }, - { - "epoch": 21.194784876140808, - "grad_norm": 1.5831925868988037, - "learning_rate": 5.96713567839196e-05, - "loss": 5.2907, - "step": 40641 - }, - { - "epoch": 21.195306388526728, - "grad_norm": 1.62052583694458, - "learning_rate": 5.967035175879398e-05, - "loss": 4.5758, - "step": 40642 - }, - { - "epoch": 21.195827900912647, - "grad_norm": 1.4569064378738403, - "learning_rate": 5.966934673366834e-05, - "loss": 5.7979, - "step": 40643 - }, - { - "epoch": 21.196349413298567, - "grad_norm": 1.736686110496521, - "learning_rate": 5.966834170854272e-05, - "loss": 4.8302, - "step": 40644 - }, - { - "epoch": 21.196870925684486, - "grad_norm": 1.4086229801177979, - "learning_rate": 5.9667336683417084e-05, - "loss": 5.8459, - "step": 40645 - }, - { - "epoch": 21.197392438070406, - "grad_norm": 1.435834288597107, - "learning_rate": 5.966633165829146e-05, - "loss": 5.5059, - "step": 40646 - }, - { - "epoch": 21.197913950456325, - "grad_norm": 1.5013762712478638, - "learning_rate": 5.966532663316583e-05, - "loss": 5.4594, - "step": 40647 - }, - { - "epoch": 21.19843546284224, - "grad_norm": 1.6662782430648804, - "learning_rate": 5.966432160804021e-05, - "loss": 5.1555, - "step": 40648 - }, - { - "epoch": 21.19895697522816, - "grad_norm": 1.5300575494766235, - "learning_rate": 5.9663316582914575e-05, - "loss": 4.824, - "step": 40649 - }, - { - "epoch": 21.19947848761408, - "grad_norm": 1.4639308452606201, - "learning_rate": 5.966231155778895e-05, - "loss": 5.4297, - "step": 40650 - }, - { - "epoch": 21.2, - "grad_norm": 1.5549782514572144, - "learning_rate": 5.966130653266332e-05, - "loss": 5.2158, - "step": 40651 - }, - { - "epoch": 21.20052151238592, - "grad_norm": 1.417754054069519, - "learning_rate": 5.966030150753769e-05, - "loss": 5.7853, - "step": 40652 - }, - { - "epoch": 21.201043024771838, - "grad_norm": 1.5551081895828247, - "learning_rate": 5.9659296482412066e-05, - "loss": 5.0077, - "step": 40653 - }, - { - "epoch": 21.201564537157758, - "grad_norm": 1.4925522804260254, - "learning_rate": 5.965829145728643e-05, - "loss": 4.8515, - "step": 40654 - }, - { - "epoch": 21.202086049543677, - "grad_norm": 1.4862408638000488, - "learning_rate": 5.965728643216081e-05, - "loss": 5.2133, - "step": 40655 - }, - { - "epoch": 21.202607561929597, - "grad_norm": 1.3617701530456543, - "learning_rate": 5.965628140703517e-05, - "loss": 5.7975, - "step": 40656 - }, - { - "epoch": 21.203129074315516, - "grad_norm": 1.461024284362793, - "learning_rate": 5.965527638190955e-05, - "loss": 5.6379, - "step": 40657 - }, - { - "epoch": 21.203650586701436, - "grad_norm": 1.647446870803833, - "learning_rate": 5.965427135678392e-05, - "loss": 4.6717, - "step": 40658 - }, - { - "epoch": 21.204172099087355, - "grad_norm": 1.418860673904419, - "learning_rate": 5.96532663316583e-05, - "loss": 4.6865, - "step": 40659 - }, - { - "epoch": 21.20469361147327, - "grad_norm": 1.5359752178192139, - "learning_rate": 5.965226130653266e-05, - "loss": 5.2944, - "step": 40660 - }, - { - "epoch": 21.20521512385919, - "grad_norm": 1.468827247619629, - "learning_rate": 5.965125628140704e-05, - "loss": 5.2504, - "step": 40661 - }, - { - "epoch": 21.20573663624511, - "grad_norm": 1.582008957862854, - "learning_rate": 5.9650251256281405e-05, - "loss": 5.3548, - "step": 40662 - }, - { - "epoch": 21.20625814863103, - "grad_norm": 1.504395604133606, - "learning_rate": 5.964924623115578e-05, - "loss": 5.1717, - "step": 40663 - }, - { - "epoch": 21.20677966101695, - "grad_norm": 1.5606064796447754, - "learning_rate": 5.9648241206030154e-05, - "loss": 4.8401, - "step": 40664 - }, - { - "epoch": 21.20730117340287, - "grad_norm": 1.5171400308609009, - "learning_rate": 5.964723618090452e-05, - "loss": 5.0575, - "step": 40665 - }, - { - "epoch": 21.207822685788788, - "grad_norm": 1.61776864528656, - "learning_rate": 5.9646231155778896e-05, - "loss": 4.9886, - "step": 40666 - }, - { - "epoch": 21.208344198174707, - "grad_norm": 1.5356152057647705, - "learning_rate": 5.964522613065327e-05, - "loss": 5.4236, - "step": 40667 - }, - { - "epoch": 21.208865710560627, - "grad_norm": 1.5440709590911865, - "learning_rate": 5.9644221105527645e-05, - "loss": 5.1622, - "step": 40668 - }, - { - "epoch": 21.209387222946546, - "grad_norm": 1.65593421459198, - "learning_rate": 5.964321608040201e-05, - "loss": 4.5791, - "step": 40669 - }, - { - "epoch": 21.209908735332466, - "grad_norm": 1.571873426437378, - "learning_rate": 5.964221105527639e-05, - "loss": 4.9211, - "step": 40670 - }, - { - "epoch": 21.210430247718385, - "grad_norm": 1.574280023574829, - "learning_rate": 5.964120603015075e-05, - "loss": 5.2429, - "step": 40671 - }, - { - "epoch": 21.2109517601043, - "grad_norm": 1.3971047401428223, - "learning_rate": 5.964020100502513e-05, - "loss": 5.6198, - "step": 40672 - }, - { - "epoch": 21.21147327249022, - "grad_norm": 1.462854027748108, - "learning_rate": 5.96391959798995e-05, - "loss": 5.2688, - "step": 40673 - }, - { - "epoch": 21.21199478487614, - "grad_norm": 1.5383580923080444, - "learning_rate": 5.963819095477388e-05, - "loss": 4.2754, - "step": 40674 - }, - { - "epoch": 21.21251629726206, - "grad_norm": 1.6117019653320312, - "learning_rate": 5.963718592964824e-05, - "loss": 5.115, - "step": 40675 - }, - { - "epoch": 21.21303780964798, - "grad_norm": 1.5221608877182007, - "learning_rate": 5.963618090452262e-05, - "loss": 5.2615, - "step": 40676 - }, - { - "epoch": 21.2135593220339, - "grad_norm": 1.4906824827194214, - "learning_rate": 5.9635175879396985e-05, - "loss": 5.3335, - "step": 40677 - }, - { - "epoch": 21.214080834419818, - "grad_norm": 1.5931057929992676, - "learning_rate": 5.963417085427136e-05, - "loss": 5.1498, - "step": 40678 - }, - { - "epoch": 21.214602346805737, - "grad_norm": 1.52841317653656, - "learning_rate": 5.9633165829145734e-05, - "loss": 5.2862, - "step": 40679 - }, - { - "epoch": 21.215123859191657, - "grad_norm": 1.6055434942245483, - "learning_rate": 5.96321608040201e-05, - "loss": 5.5136, - "step": 40680 - }, - { - "epoch": 21.215645371577576, - "grad_norm": 1.7348253726959229, - "learning_rate": 5.9631155778894476e-05, - "loss": 5.2745, - "step": 40681 - }, - { - "epoch": 21.216166883963496, - "grad_norm": 1.6879568099975586, - "learning_rate": 5.963015075376884e-05, - "loss": 5.0558, - "step": 40682 - }, - { - "epoch": 21.216688396349415, - "grad_norm": 1.5645301342010498, - "learning_rate": 5.962914572864322e-05, - "loss": 4.4842, - "step": 40683 - }, - { - "epoch": 21.21720990873533, - "grad_norm": 1.5281503200531006, - "learning_rate": 5.962814070351759e-05, - "loss": 5.3088, - "step": 40684 - }, - { - "epoch": 21.21773142112125, - "grad_norm": 1.4888880252838135, - "learning_rate": 5.962713567839197e-05, - "loss": 5.4383, - "step": 40685 - }, - { - "epoch": 21.21825293350717, - "grad_norm": 1.561076283454895, - "learning_rate": 5.962613065326633e-05, - "loss": 5.1468, - "step": 40686 - }, - { - "epoch": 21.21877444589309, - "grad_norm": 1.4732894897460938, - "learning_rate": 5.962512562814071e-05, - "loss": 5.5201, - "step": 40687 - }, - { - "epoch": 21.21929595827901, - "grad_norm": 1.6400636434555054, - "learning_rate": 5.962412060301508e-05, - "loss": 4.476, - "step": 40688 - }, - { - "epoch": 21.21981747066493, - "grad_norm": 1.4940588474273682, - "learning_rate": 5.962311557788946e-05, - "loss": 4.7539, - "step": 40689 - }, - { - "epoch": 21.220338983050848, - "grad_norm": 1.4909626245498657, - "learning_rate": 5.962211055276382e-05, - "loss": 5.4266, - "step": 40690 - }, - { - "epoch": 21.220860495436767, - "grad_norm": 1.6058056354522705, - "learning_rate": 5.96211055276382e-05, - "loss": 4.961, - "step": 40691 - }, - { - "epoch": 21.221382007822687, - "grad_norm": 1.4971870183944702, - "learning_rate": 5.9620100502512564e-05, - "loss": 5.6193, - "step": 40692 - }, - { - "epoch": 21.221903520208606, - "grad_norm": 1.44918692111969, - "learning_rate": 5.9619095477386935e-05, - "loss": 5.4614, - "step": 40693 - }, - { - "epoch": 21.222425032594526, - "grad_norm": 1.5328431129455566, - "learning_rate": 5.961809045226131e-05, - "loss": 5.2643, - "step": 40694 - }, - { - "epoch": 21.22294654498044, - "grad_norm": 1.4669524431228638, - "learning_rate": 5.961708542713568e-05, - "loss": 5.0707, - "step": 40695 - }, - { - "epoch": 21.22346805736636, - "grad_norm": 1.4946417808532715, - "learning_rate": 5.9616080402010055e-05, - "loss": 5.5828, - "step": 40696 - }, - { - "epoch": 21.22398956975228, - "grad_norm": 1.4900370836257935, - "learning_rate": 5.961507537688442e-05, - "loss": 5.2675, - "step": 40697 - }, - { - "epoch": 21.2245110821382, - "grad_norm": 1.5576834678649902, - "learning_rate": 5.96140703517588e-05, - "loss": 4.7376, - "step": 40698 - }, - { - "epoch": 21.22503259452412, - "grad_norm": 1.4590167999267578, - "learning_rate": 5.961306532663317e-05, - "loss": 5.3745, - "step": 40699 - }, - { - "epoch": 21.22555410691004, - "grad_norm": 1.6524975299835205, - "learning_rate": 5.9612060301507546e-05, - "loss": 4.8418, - "step": 40700 - }, - { - "epoch": 21.22607561929596, - "grad_norm": 1.517913818359375, - "learning_rate": 5.961105527638191e-05, - "loss": 5.3512, - "step": 40701 - }, - { - "epoch": 21.226597131681878, - "grad_norm": 1.5073001384735107, - "learning_rate": 5.961005025125629e-05, - "loss": 4.8733, - "step": 40702 - }, - { - "epoch": 21.227118644067797, - "grad_norm": 1.6129887104034424, - "learning_rate": 5.960904522613065e-05, - "loss": 5.4667, - "step": 40703 - }, - { - "epoch": 21.227640156453717, - "grad_norm": 1.4693940877914429, - "learning_rate": 5.960804020100503e-05, - "loss": 5.4461, - "step": 40704 - }, - { - "epoch": 21.228161668839636, - "grad_norm": 1.5384207963943481, - "learning_rate": 5.96070351758794e-05, - "loss": 5.2487, - "step": 40705 - }, - { - "epoch": 21.228683181225556, - "grad_norm": 1.626287579536438, - "learning_rate": 5.9606030150753765e-05, - "loss": 4.7199, - "step": 40706 - }, - { - "epoch": 21.22920469361147, - "grad_norm": 1.6779847145080566, - "learning_rate": 5.960502512562814e-05, - "loss": 5.2069, - "step": 40707 - }, - { - "epoch": 21.22972620599739, - "grad_norm": 1.5597922801971436, - "learning_rate": 5.9604020100502514e-05, - "loss": 5.4751, - "step": 40708 - }, - { - "epoch": 21.23024771838331, - "grad_norm": 1.5380358695983887, - "learning_rate": 5.960301507537689e-05, - "loss": 5.369, - "step": 40709 - }, - { - "epoch": 21.23076923076923, - "grad_norm": 1.4512758255004883, - "learning_rate": 5.9602010050251256e-05, - "loss": 5.2023, - "step": 40710 - }, - { - "epoch": 21.23129074315515, - "grad_norm": 1.5846806764602661, - "learning_rate": 5.9601005025125634e-05, - "loss": 5.0814, - "step": 40711 - }, - { - "epoch": 21.23181225554107, - "grad_norm": 1.4970588684082031, - "learning_rate": 5.96e-05, - "loss": 5.3202, - "step": 40712 - }, - { - "epoch": 21.23233376792699, - "grad_norm": 1.4731444120407104, - "learning_rate": 5.9598994974874376e-05, - "loss": 5.5008, - "step": 40713 - }, - { - "epoch": 21.232855280312908, - "grad_norm": 1.372088074684143, - "learning_rate": 5.959798994974875e-05, - "loss": 5.5865, - "step": 40714 - }, - { - "epoch": 21.233376792698827, - "grad_norm": 1.712204098701477, - "learning_rate": 5.9596984924623125e-05, - "loss": 5.1757, - "step": 40715 - }, - { - "epoch": 21.233898305084747, - "grad_norm": 1.4219629764556885, - "learning_rate": 5.959597989949749e-05, - "loss": 5.6423, - "step": 40716 - }, - { - "epoch": 21.234419817470666, - "grad_norm": 1.51162588596344, - "learning_rate": 5.959497487437187e-05, - "loss": 5.495, - "step": 40717 - }, - { - "epoch": 21.234941329856586, - "grad_norm": 1.4844706058502197, - "learning_rate": 5.959396984924623e-05, - "loss": 5.5553, - "step": 40718 - }, - { - "epoch": 21.2354628422425, - "grad_norm": 1.5182784795761108, - "learning_rate": 5.95929648241206e-05, - "loss": 5.2003, - "step": 40719 - }, - { - "epoch": 21.23598435462842, - "grad_norm": 1.5879640579223633, - "learning_rate": 5.959195979899498e-05, - "loss": 5.3074, - "step": 40720 - }, - { - "epoch": 21.23650586701434, - "grad_norm": 1.5592247247695923, - "learning_rate": 5.9590954773869345e-05, - "loss": 5.2428, - "step": 40721 - }, - { - "epoch": 21.23702737940026, - "grad_norm": 1.5055763721466064, - "learning_rate": 5.958994974874372e-05, - "loss": 5.3936, - "step": 40722 - }, - { - "epoch": 21.23754889178618, - "grad_norm": 1.5267512798309326, - "learning_rate": 5.958894472361809e-05, - "loss": 5.0857, - "step": 40723 - }, - { - "epoch": 21.2380704041721, - "grad_norm": 1.5841096639633179, - "learning_rate": 5.9587939698492465e-05, - "loss": 4.9565, - "step": 40724 - }, - { - "epoch": 21.23859191655802, - "grad_norm": 1.5224006175994873, - "learning_rate": 5.9586934673366836e-05, - "loss": 5.169, - "step": 40725 - }, - { - "epoch": 21.239113428943938, - "grad_norm": 1.5191453695297241, - "learning_rate": 5.9585929648241214e-05, - "loss": 5.613, - "step": 40726 - }, - { - "epoch": 21.239634941329857, - "grad_norm": 1.5786255598068237, - "learning_rate": 5.958492462311558e-05, - "loss": 5.3386, - "step": 40727 - }, - { - "epoch": 21.240156453715777, - "grad_norm": 1.4854495525360107, - "learning_rate": 5.9583919597989956e-05, - "loss": 5.1638, - "step": 40728 - }, - { - "epoch": 21.240677966101696, - "grad_norm": 1.646871566772461, - "learning_rate": 5.958291457286433e-05, - "loss": 4.7628, - "step": 40729 - }, - { - "epoch": 21.241199478487616, - "grad_norm": 1.3875795602798462, - "learning_rate": 5.9581909547738705e-05, - "loss": 5.5777, - "step": 40730 - }, - { - "epoch": 21.24172099087353, - "grad_norm": 1.3808659315109253, - "learning_rate": 5.958090452261307e-05, - "loss": 5.5746, - "step": 40731 - }, - { - "epoch": 21.24224250325945, - "grad_norm": 1.5761308670043945, - "learning_rate": 5.957989949748743e-05, - "loss": 4.9748, - "step": 40732 - }, - { - "epoch": 21.24276401564537, - "grad_norm": 1.5349422693252563, - "learning_rate": 5.957889447236181e-05, - "loss": 5.3548, - "step": 40733 - }, - { - "epoch": 21.24328552803129, - "grad_norm": 1.5793297290802002, - "learning_rate": 5.957788944723618e-05, - "loss": 5.2812, - "step": 40734 - }, - { - "epoch": 21.24380704041721, - "grad_norm": 1.4175671339035034, - "learning_rate": 5.957688442211056e-05, - "loss": 5.2258, - "step": 40735 - }, - { - "epoch": 21.24432855280313, - "grad_norm": 1.5572582483291626, - "learning_rate": 5.9575879396984924e-05, - "loss": 5.0401, - "step": 40736 - }, - { - "epoch": 21.24485006518905, - "grad_norm": 1.4599263668060303, - "learning_rate": 5.95748743718593e-05, - "loss": 5.3435, - "step": 40737 - }, - { - "epoch": 21.245371577574968, - "grad_norm": 1.5179498195648193, - "learning_rate": 5.9573869346733666e-05, - "loss": 5.486, - "step": 40738 - }, - { - "epoch": 21.245893089960887, - "grad_norm": 1.5965720415115356, - "learning_rate": 5.9572864321608044e-05, - "loss": 4.8161, - "step": 40739 - }, - { - "epoch": 21.246414602346807, - "grad_norm": 1.508539080619812, - "learning_rate": 5.9571859296482415e-05, - "loss": 5.2906, - "step": 40740 - }, - { - "epoch": 21.246936114732726, - "grad_norm": 1.4128646850585938, - "learning_rate": 5.957085427135679e-05, - "loss": 5.0294, - "step": 40741 - }, - { - "epoch": 21.247457627118646, - "grad_norm": 1.4843413829803467, - "learning_rate": 5.956984924623116e-05, - "loss": 4.9894, - "step": 40742 - }, - { - "epoch": 21.24797913950456, - "grad_norm": 1.5948389768600464, - "learning_rate": 5.9568844221105535e-05, - "loss": 5.2611, - "step": 40743 - }, - { - "epoch": 21.24850065189048, - "grad_norm": 1.5166685581207275, - "learning_rate": 5.95678391959799e-05, - "loss": 5.1153, - "step": 40744 - }, - { - "epoch": 21.2490221642764, - "grad_norm": 1.7201374769210815, - "learning_rate": 5.956683417085427e-05, - "loss": 5.2824, - "step": 40745 - }, - { - "epoch": 21.24954367666232, - "grad_norm": 1.5264537334442139, - "learning_rate": 5.956582914572865e-05, - "loss": 5.0989, - "step": 40746 - }, - { - "epoch": 21.25006518904824, - "grad_norm": 1.4438689947128296, - "learning_rate": 5.956482412060301e-05, - "loss": 5.3879, - "step": 40747 - }, - { - "epoch": 21.25058670143416, - "grad_norm": 1.4175055027008057, - "learning_rate": 5.956381909547739e-05, - "loss": 4.7027, - "step": 40748 - }, - { - "epoch": 21.25110821382008, - "grad_norm": 1.570082187652588, - "learning_rate": 5.9562814070351755e-05, - "loss": 5.1995, - "step": 40749 - }, - { - "epoch": 21.251629726205998, - "grad_norm": 1.6001746654510498, - "learning_rate": 5.956180904522613e-05, - "loss": 5.2271, - "step": 40750 - }, - { - "epoch": 21.252151238591917, - "grad_norm": 1.5747109651565552, - "learning_rate": 5.9560804020100503e-05, - "loss": 5.2623, - "step": 40751 - }, - { - "epoch": 21.252672750977837, - "grad_norm": 1.5383111238479614, - "learning_rate": 5.955979899497488e-05, - "loss": 5.575, - "step": 40752 - }, - { - "epoch": 21.253194263363756, - "grad_norm": 1.471304178237915, - "learning_rate": 5.9558793969849246e-05, - "loss": 5.3038, - "step": 40753 - }, - { - "epoch": 21.253715775749676, - "grad_norm": 1.5575268268585205, - "learning_rate": 5.9557788944723623e-05, - "loss": 4.7148, - "step": 40754 - }, - { - "epoch": 21.25423728813559, - "grad_norm": 1.4256460666656494, - "learning_rate": 5.9556783919597994e-05, - "loss": 5.6964, - "step": 40755 - }, - { - "epoch": 21.25475880052151, - "grad_norm": 1.5142194032669067, - "learning_rate": 5.955577889447237e-05, - "loss": 5.5685, - "step": 40756 - }, - { - "epoch": 21.25528031290743, - "grad_norm": 1.657931923866272, - "learning_rate": 5.9554773869346737e-05, - "loss": 5.0898, - "step": 40757 - }, - { - "epoch": 21.25580182529335, - "grad_norm": 1.5456980466842651, - "learning_rate": 5.9553768844221114e-05, - "loss": 5.1292, - "step": 40758 - }, - { - "epoch": 21.25632333767927, - "grad_norm": 1.5730788707733154, - "learning_rate": 5.955276381909548e-05, - "loss": 5.281, - "step": 40759 - }, - { - "epoch": 21.25684485006519, - "grad_norm": 1.5308345556259155, - "learning_rate": 5.955175879396985e-05, - "loss": 5.2778, - "step": 40760 - }, - { - "epoch": 21.25736636245111, - "grad_norm": 1.5675803422927856, - "learning_rate": 5.955075376884423e-05, - "loss": 4.955, - "step": 40761 - }, - { - "epoch": 21.257887874837028, - "grad_norm": 1.5283371210098267, - "learning_rate": 5.954974874371859e-05, - "loss": 4.9722, - "step": 40762 - }, - { - "epoch": 21.258409387222947, - "grad_norm": 1.522217035293579, - "learning_rate": 5.954874371859297e-05, - "loss": 5.3346, - "step": 40763 - }, - { - "epoch": 21.258930899608867, - "grad_norm": 1.6428052186965942, - "learning_rate": 5.9547738693467334e-05, - "loss": 5.3357, - "step": 40764 - }, - { - "epoch": 21.259452411994786, - "grad_norm": 1.5825231075286865, - "learning_rate": 5.954673366834171e-05, - "loss": 4.8384, - "step": 40765 - }, - { - "epoch": 21.259973924380706, - "grad_norm": 1.5588725805282593, - "learning_rate": 5.954572864321608e-05, - "loss": 5.3867, - "step": 40766 - }, - { - "epoch": 21.26049543676662, - "grad_norm": 1.54362952709198, - "learning_rate": 5.954472361809046e-05, - "loss": 5.1471, - "step": 40767 - }, - { - "epoch": 21.26101694915254, - "grad_norm": 1.5852717161178589, - "learning_rate": 5.9543718592964825e-05, - "loss": 5.2636, - "step": 40768 - }, - { - "epoch": 21.26153846153846, - "grad_norm": 1.5657789707183838, - "learning_rate": 5.95427135678392e-05, - "loss": 4.725, - "step": 40769 - }, - { - "epoch": 21.26205997392438, - "grad_norm": 1.561194658279419, - "learning_rate": 5.954170854271357e-05, - "loss": 5.0246, - "step": 40770 - }, - { - "epoch": 21.2625814863103, - "grad_norm": 1.5646576881408691, - "learning_rate": 5.9540703517587945e-05, - "loss": 4.9327, - "step": 40771 - }, - { - "epoch": 21.26310299869622, - "grad_norm": 1.6228318214416504, - "learning_rate": 5.9539698492462316e-05, - "loss": 5.09, - "step": 40772 - }, - { - "epoch": 21.26362451108214, - "grad_norm": 1.5183427333831787, - "learning_rate": 5.953869346733668e-05, - "loss": 5.1868, - "step": 40773 - }, - { - "epoch": 21.264146023468058, - "grad_norm": 1.6428048610687256, - "learning_rate": 5.953768844221106e-05, - "loss": 5.0283, - "step": 40774 - }, - { - "epoch": 21.264667535853977, - "grad_norm": 1.564725637435913, - "learning_rate": 5.953668341708543e-05, - "loss": 4.6594, - "step": 40775 - }, - { - "epoch": 21.265189048239897, - "grad_norm": 1.5947381258010864, - "learning_rate": 5.953567839195981e-05, - "loss": 5.4537, - "step": 40776 - }, - { - "epoch": 21.265710560625816, - "grad_norm": 1.53286874294281, - "learning_rate": 5.953467336683417e-05, - "loss": 4.7313, - "step": 40777 - }, - { - "epoch": 21.266232073011736, - "grad_norm": 1.7175650596618652, - "learning_rate": 5.953366834170855e-05, - "loss": 5.0876, - "step": 40778 - }, - { - "epoch": 21.26675358539765, - "grad_norm": 1.606264591217041, - "learning_rate": 5.953266331658291e-05, - "loss": 5.0314, - "step": 40779 - }, - { - "epoch": 21.26727509778357, - "grad_norm": 1.5979820489883423, - "learning_rate": 5.953165829145729e-05, - "loss": 5.2411, - "step": 40780 - }, - { - "epoch": 21.26779661016949, - "grad_norm": 1.6378285884857178, - "learning_rate": 5.953065326633166e-05, - "loss": 5.0771, - "step": 40781 - }, - { - "epoch": 21.26831812255541, - "grad_norm": 1.4904508590698242, - "learning_rate": 5.952964824120604e-05, - "loss": 5.5173, - "step": 40782 - }, - { - "epoch": 21.26883963494133, - "grad_norm": 1.6446096897125244, - "learning_rate": 5.9528643216080404e-05, - "loss": 4.5363, - "step": 40783 - }, - { - "epoch": 21.26936114732725, - "grad_norm": 1.5241581201553345, - "learning_rate": 5.952763819095478e-05, - "loss": 5.1654, - "step": 40784 - }, - { - "epoch": 21.26988265971317, - "grad_norm": 1.547930121421814, - "learning_rate": 5.9526633165829146e-05, - "loss": 5.085, - "step": 40785 - }, - { - "epoch": 21.270404172099088, - "grad_norm": 1.5604771375656128, - "learning_rate": 5.952562814070352e-05, - "loss": 5.3727, - "step": 40786 - }, - { - "epoch": 21.270925684485007, - "grad_norm": 1.662916660308838, - "learning_rate": 5.9524623115577895e-05, - "loss": 5.2492, - "step": 40787 - }, - { - "epoch": 21.271447196870927, - "grad_norm": 1.7025134563446045, - "learning_rate": 5.952361809045226e-05, - "loss": 5.3127, - "step": 40788 - }, - { - "epoch": 21.271968709256846, - "grad_norm": 1.5748717784881592, - "learning_rate": 5.952261306532664e-05, - "loss": 4.988, - "step": 40789 - }, - { - "epoch": 21.272490221642762, - "grad_norm": 1.5459505319595337, - "learning_rate": 5.9521608040201e-05, - "loss": 5.5061, - "step": 40790 - }, - { - "epoch": 21.27301173402868, - "grad_norm": 1.4504777193069458, - "learning_rate": 5.952060301507538e-05, - "loss": 5.3292, - "step": 40791 - }, - { - "epoch": 21.2735332464146, - "grad_norm": 1.5916770696640015, - "learning_rate": 5.951959798994975e-05, - "loss": 4.8519, - "step": 40792 - }, - { - "epoch": 21.27405475880052, - "grad_norm": 1.5238193273544312, - "learning_rate": 5.951859296482413e-05, - "loss": 5.6013, - "step": 40793 - }, - { - "epoch": 21.27457627118644, - "grad_norm": 1.4957349300384521, - "learning_rate": 5.951758793969849e-05, - "loss": 5.3082, - "step": 40794 - }, - { - "epoch": 21.27509778357236, - "grad_norm": 1.524200201034546, - "learning_rate": 5.951658291457287e-05, - "loss": 5.6481, - "step": 40795 - }, - { - "epoch": 21.27561929595828, - "grad_norm": 1.4160727262496948, - "learning_rate": 5.951557788944724e-05, - "loss": 5.6878, - "step": 40796 - }, - { - "epoch": 21.2761408083442, - "grad_norm": 1.5115456581115723, - "learning_rate": 5.951457286432162e-05, - "loss": 5.4487, - "step": 40797 - }, - { - "epoch": 21.276662320730118, - "grad_norm": 1.7243092060089111, - "learning_rate": 5.9513567839195984e-05, - "loss": 4.4869, - "step": 40798 - }, - { - "epoch": 21.277183833116037, - "grad_norm": 1.5576086044311523, - "learning_rate": 5.951256281407035e-05, - "loss": 5.4606, - "step": 40799 - }, - { - "epoch": 21.277705345501957, - "grad_norm": 1.5223690271377563, - "learning_rate": 5.9511557788944726e-05, - "loss": 5.3064, - "step": 40800 - }, - { - "epoch": 21.278226857887876, - "grad_norm": 1.7975701093673706, - "learning_rate": 5.95105527638191e-05, - "loss": 5.0122, - "step": 40801 - }, - { - "epoch": 21.278748370273792, - "grad_norm": Infinity, - "learning_rate": 5.95105527638191e-05, - "loss": 4.0356, - "step": 40802 - }, - { - "epoch": 21.27926988265971, - "grad_norm": 1.5575697422027588, - "learning_rate": 5.9509547738693475e-05, - "loss": 5.5336, - "step": 40803 - }, - { - "epoch": 21.27979139504563, - "grad_norm": 1.4135459661483765, - "learning_rate": 5.950854271356784e-05, - "loss": 5.486, - "step": 40804 - }, - { - "epoch": 21.28031290743155, - "grad_norm": 1.5902893543243408, - "learning_rate": 5.950753768844222e-05, - "loss": 5.3169, - "step": 40805 - }, - { - "epoch": 21.28083441981747, - "grad_norm": 1.6225979328155518, - "learning_rate": 5.950653266331658e-05, - "loss": 5.1091, - "step": 40806 - }, - { - "epoch": 21.28135593220339, - "grad_norm": 1.4464935064315796, - "learning_rate": 5.950552763819096e-05, - "loss": 5.198, - "step": 40807 - }, - { - "epoch": 21.28187744458931, - "grad_norm": 1.5664358139038086, - "learning_rate": 5.950452261306533e-05, - "loss": 5.302, - "step": 40808 - }, - { - "epoch": 21.28239895697523, - "grad_norm": 1.519441843032837, - "learning_rate": 5.950351758793971e-05, - "loss": 5.065, - "step": 40809 - }, - { - "epoch": 21.282920469361148, - "grad_norm": 1.554803729057312, - "learning_rate": 5.950251256281407e-05, - "loss": 5.3305, - "step": 40810 - }, - { - "epoch": 21.283441981747067, - "grad_norm": 1.404586911201477, - "learning_rate": 5.950150753768845e-05, - "loss": 5.2685, - "step": 40811 - }, - { - "epoch": 21.283963494132987, - "grad_norm": 1.5561023950576782, - "learning_rate": 5.9500502512562814e-05, - "loss": 5.1981, - "step": 40812 - }, - { - "epoch": 21.284485006518906, - "grad_norm": 1.5188193321228027, - "learning_rate": 5.9499497487437185e-05, - "loss": 4.8598, - "step": 40813 - }, - { - "epoch": 21.285006518904822, - "grad_norm": 1.563275933265686, - "learning_rate": 5.949849246231156e-05, - "loss": 5.1793, - "step": 40814 - }, - { - "epoch": 21.285528031290742, - "grad_norm": 1.5542573928833008, - "learning_rate": 5.949748743718593e-05, - "loss": 5.2132, - "step": 40815 - }, - { - "epoch": 21.28604954367666, - "grad_norm": 1.497840166091919, - "learning_rate": 5.9496482412060305e-05, - "loss": 5.6925, - "step": 40816 - }, - { - "epoch": 21.28657105606258, - "grad_norm": 1.5876822471618652, - "learning_rate": 5.9495477386934676e-05, - "loss": 5.0219, - "step": 40817 - }, - { - "epoch": 21.2870925684485, - "grad_norm": 1.4898223876953125, - "learning_rate": 5.9494472361809054e-05, - "loss": 5.3229, - "step": 40818 - }, - { - "epoch": 21.28761408083442, - "grad_norm": 1.5026066303253174, - "learning_rate": 5.949346733668342e-05, - "loss": 5.6066, - "step": 40819 - }, - { - "epoch": 21.28813559322034, - "grad_norm": 1.5082387924194336, - "learning_rate": 5.9492462311557796e-05, - "loss": 5.3428, - "step": 40820 - }, - { - "epoch": 21.28865710560626, - "grad_norm": 1.5716331005096436, - "learning_rate": 5.949145728643216e-05, - "loss": 5.5041, - "step": 40821 - }, - { - "epoch": 21.289178617992178, - "grad_norm": 1.5413674116134644, - "learning_rate": 5.949045226130654e-05, - "loss": 5.262, - "step": 40822 - }, - { - "epoch": 21.289700130378097, - "grad_norm": 1.451149582862854, - "learning_rate": 5.948944723618091e-05, - "loss": 5.272, - "step": 40823 - }, - { - "epoch": 21.290221642764017, - "grad_norm": 1.5008817911148071, - "learning_rate": 5.948844221105529e-05, - "loss": 5.4996, - "step": 40824 - }, - { - "epoch": 21.290743155149936, - "grad_norm": 1.447507619857788, - "learning_rate": 5.948743718592965e-05, - "loss": 5.5659, - "step": 40825 - }, - { - "epoch": 21.291264667535852, - "grad_norm": 1.4228202104568481, - "learning_rate": 5.9486432160804016e-05, - "loss": 5.6322, - "step": 40826 - }, - { - "epoch": 21.291786179921772, - "grad_norm": 1.4734055995941162, - "learning_rate": 5.948542713567839e-05, - "loss": 5.6801, - "step": 40827 - }, - { - "epoch": 21.29230769230769, - "grad_norm": 1.5387847423553467, - "learning_rate": 5.9484422110552764e-05, - "loss": 5.5591, - "step": 40828 - }, - { - "epoch": 21.29282920469361, - "grad_norm": 1.406144380569458, - "learning_rate": 5.948341708542714e-05, - "loss": 5.366, - "step": 40829 - }, - { - "epoch": 21.29335071707953, - "grad_norm": 1.406234860420227, - "learning_rate": 5.9482412060301506e-05, - "loss": 5.1303, - "step": 40830 - }, - { - "epoch": 21.29387222946545, - "grad_norm": 1.4289017915725708, - "learning_rate": 5.9481407035175884e-05, - "loss": 5.1968, - "step": 40831 - }, - { - "epoch": 21.29439374185137, - "grad_norm": 1.585181474685669, - "learning_rate": 5.948040201005025e-05, - "loss": 5.4074, - "step": 40832 - }, - { - "epoch": 21.29491525423729, - "grad_norm": 1.6468982696533203, - "learning_rate": 5.9479396984924626e-05, - "loss": 5.1316, - "step": 40833 - }, - { - "epoch": 21.295436766623208, - "grad_norm": 1.5228610038757324, - "learning_rate": 5.9478391959799e-05, - "loss": 5.5807, - "step": 40834 - }, - { - "epoch": 21.295958279009128, - "grad_norm": 1.491750955581665, - "learning_rate": 5.9477386934673375e-05, - "loss": 5.3337, - "step": 40835 - }, - { - "epoch": 21.296479791395047, - "grad_norm": 1.4969621896743774, - "learning_rate": 5.947638190954774e-05, - "loss": 5.1844, - "step": 40836 - }, - { - "epoch": 21.297001303780966, - "grad_norm": 1.5636826753616333, - "learning_rate": 5.947537688442212e-05, - "loss": 5.4384, - "step": 40837 - }, - { - "epoch": 21.297522816166882, - "grad_norm": 1.674810528755188, - "learning_rate": 5.947437185929648e-05, - "loss": 5.2384, - "step": 40838 - }, - { - "epoch": 21.298044328552802, - "grad_norm": 1.5583528280258179, - "learning_rate": 5.947336683417085e-05, - "loss": 5.404, - "step": 40839 - }, - { - "epoch": 21.29856584093872, - "grad_norm": 1.571091890335083, - "learning_rate": 5.947236180904523e-05, - "loss": 5.2897, - "step": 40840 - }, - { - "epoch": 21.29908735332464, - "grad_norm": 1.5122441053390503, - "learning_rate": 5.9471356783919595e-05, - "loss": 4.8536, - "step": 40841 - }, - { - "epoch": 21.29960886571056, - "grad_norm": 1.4769506454467773, - "learning_rate": 5.947035175879397e-05, - "loss": 5.6909, - "step": 40842 - }, - { - "epoch": 21.30013037809648, - "grad_norm": 1.4571459293365479, - "learning_rate": 5.9469346733668344e-05, - "loss": 5.4572, - "step": 40843 - }, - { - "epoch": 21.3006518904824, - "grad_norm": 1.543034315109253, - "learning_rate": 5.946834170854272e-05, - "loss": 5.3452, - "step": 40844 - }, - { - "epoch": 21.30117340286832, - "grad_norm": 1.587611436843872, - "learning_rate": 5.9467336683417086e-05, - "loss": 5.0673, - "step": 40845 - }, - { - "epoch": 21.301694915254238, - "grad_norm": 1.6007832288742065, - "learning_rate": 5.9466331658291464e-05, - "loss": 5.0559, - "step": 40846 - }, - { - "epoch": 21.302216427640158, - "grad_norm": 1.5647269487380981, - "learning_rate": 5.946532663316583e-05, - "loss": 5.4144, - "step": 40847 - }, - { - "epoch": 21.302737940026077, - "grad_norm": 1.5859144926071167, - "learning_rate": 5.9464321608040206e-05, - "loss": 5.229, - "step": 40848 - }, - { - "epoch": 21.303259452411996, - "grad_norm": 1.5060439109802246, - "learning_rate": 5.946331658291458e-05, - "loss": 5.4049, - "step": 40849 - }, - { - "epoch": 21.303780964797912, - "grad_norm": 1.6064549684524536, - "learning_rate": 5.9462311557788955e-05, - "loss": 5.3078, - "step": 40850 - }, - { - "epoch": 21.304302477183832, - "grad_norm": 1.5979022979736328, - "learning_rate": 5.946130653266332e-05, - "loss": 5.0798, - "step": 40851 - }, - { - "epoch": 21.30482398956975, - "grad_norm": 1.4101814031600952, - "learning_rate": 5.94603015075377e-05, - "loss": 5.1673, - "step": 40852 - }, - { - "epoch": 21.30534550195567, - "grad_norm": 1.4803450107574463, - "learning_rate": 5.945929648241206e-05, - "loss": 5.685, - "step": 40853 - }, - { - "epoch": 21.30586701434159, - "grad_norm": 1.4151355028152466, - "learning_rate": 5.945829145728643e-05, - "loss": 5.6212, - "step": 40854 - }, - { - "epoch": 21.30638852672751, - "grad_norm": 1.4481624364852905, - "learning_rate": 5.945728643216081e-05, - "loss": 5.6481, - "step": 40855 - }, - { - "epoch": 21.30691003911343, - "grad_norm": 1.4457807540893555, - "learning_rate": 5.9456281407035174e-05, - "loss": 5.6049, - "step": 40856 - }, - { - "epoch": 21.30743155149935, - "grad_norm": 1.7257094383239746, - "learning_rate": 5.945527638190955e-05, - "loss": 4.7543, - "step": 40857 - }, - { - "epoch": 21.307953063885268, - "grad_norm": 1.5033897161483765, - "learning_rate": 5.9454271356783916e-05, - "loss": 5.2218, - "step": 40858 - }, - { - "epoch": 21.308474576271188, - "grad_norm": 1.4415249824523926, - "learning_rate": 5.9453266331658294e-05, - "loss": 5.4551, - "step": 40859 - }, - { - "epoch": 21.308996088657107, - "grad_norm": 1.6185505390167236, - "learning_rate": 5.9452261306532665e-05, - "loss": 5.0613, - "step": 40860 - }, - { - "epoch": 21.309517601043027, - "grad_norm": 1.5475696325302124, - "learning_rate": 5.945125628140704e-05, - "loss": 5.5256, - "step": 40861 - }, - { - "epoch": 21.310039113428942, - "grad_norm": 1.5712594985961914, - "learning_rate": 5.945025125628141e-05, - "loss": 4.9915, - "step": 40862 - }, - { - "epoch": 21.310560625814862, - "grad_norm": 1.6783169507980347, - "learning_rate": 5.9449246231155785e-05, - "loss": 5.3569, - "step": 40863 - }, - { - "epoch": 21.31108213820078, - "grad_norm": 1.5205625295639038, - "learning_rate": 5.9448241206030156e-05, - "loss": 4.2032, - "step": 40864 - }, - { - "epoch": 21.3116036505867, - "grad_norm": 1.5024465322494507, - "learning_rate": 5.9447236180904534e-05, - "loss": 5.2259, - "step": 40865 - }, - { - "epoch": 21.31212516297262, - "grad_norm": 1.5990606546401978, - "learning_rate": 5.94462311557789e-05, - "loss": 5.2908, - "step": 40866 - }, - { - "epoch": 21.31264667535854, - "grad_norm": 1.7005352973937988, - "learning_rate": 5.944522613065326e-05, - "loss": 4.8477, - "step": 40867 - }, - { - "epoch": 21.31316818774446, - "grad_norm": 1.4687269926071167, - "learning_rate": 5.944422110552764e-05, - "loss": 4.8357, - "step": 40868 - }, - { - "epoch": 21.31368970013038, - "grad_norm": 1.5487514734268188, - "learning_rate": 5.944321608040201e-05, - "loss": 5.3452, - "step": 40869 - }, - { - "epoch": 21.314211212516298, - "grad_norm": 1.5928713083267212, - "learning_rate": 5.944221105527639e-05, - "loss": 4.9163, - "step": 40870 - }, - { - "epoch": 21.314732724902218, - "grad_norm": 1.4816498756408691, - "learning_rate": 5.9441206030150753e-05, - "loss": 5.5664, - "step": 40871 - }, - { - "epoch": 21.315254237288137, - "grad_norm": 1.4892343282699585, - "learning_rate": 5.944020100502513e-05, - "loss": 5.2443, - "step": 40872 - }, - { - "epoch": 21.315775749674053, - "grad_norm": 1.5128345489501953, - "learning_rate": 5.9439195979899496e-05, - "loss": 5.6021, - "step": 40873 - }, - { - "epoch": 21.316297262059972, - "grad_norm": 1.555304765701294, - "learning_rate": 5.9438190954773873e-05, - "loss": 5.4058, - "step": 40874 - }, - { - "epoch": 21.316818774445892, - "grad_norm": 1.5632262229919434, - "learning_rate": 5.9437185929648244e-05, - "loss": 4.8621, - "step": 40875 - }, - { - "epoch": 21.31734028683181, - "grad_norm": 1.4406673908233643, - "learning_rate": 5.943618090452262e-05, - "loss": 5.6848, - "step": 40876 - }, - { - "epoch": 21.31786179921773, - "grad_norm": 1.511650562286377, - "learning_rate": 5.9435175879396987e-05, - "loss": 5.5256, - "step": 40877 - }, - { - "epoch": 21.31838331160365, - "grad_norm": 1.4770852327346802, - "learning_rate": 5.9434170854271364e-05, - "loss": 5.0751, - "step": 40878 - }, - { - "epoch": 21.31890482398957, - "grad_norm": 1.649117350578308, - "learning_rate": 5.943316582914573e-05, - "loss": 5.2123, - "step": 40879 - }, - { - "epoch": 21.31942633637549, - "grad_norm": 1.532670021057129, - "learning_rate": 5.94321608040201e-05, - "loss": 5.3957, - "step": 40880 - }, - { - "epoch": 21.31994784876141, - "grad_norm": 1.4848772287368774, - "learning_rate": 5.943115577889448e-05, - "loss": 5.0417, - "step": 40881 - }, - { - "epoch": 21.320469361147328, - "grad_norm": 1.581044316291809, - "learning_rate": 5.943015075376884e-05, - "loss": 5.2071, - "step": 40882 - }, - { - "epoch": 21.320990873533248, - "grad_norm": 1.6283496618270874, - "learning_rate": 5.942914572864322e-05, - "loss": 4.9686, - "step": 40883 - }, - { - "epoch": 21.321512385919167, - "grad_norm": 1.5617624521255493, - "learning_rate": 5.942814070351759e-05, - "loss": 4.9491, - "step": 40884 - }, - { - "epoch": 21.322033898305083, - "grad_norm": 1.5271189212799072, - "learning_rate": 5.942713567839197e-05, - "loss": 5.5983, - "step": 40885 - }, - { - "epoch": 21.322555410691002, - "grad_norm": 1.740585207939148, - "learning_rate": 5.942613065326633e-05, - "loss": 4.5544, - "step": 40886 - }, - { - "epoch": 21.323076923076922, - "grad_norm": 1.5167793035507202, - "learning_rate": 5.942512562814071e-05, - "loss": 5.5948, - "step": 40887 - }, - { - "epoch": 21.32359843546284, - "grad_norm": 1.4684667587280273, - "learning_rate": 5.9424120603015075e-05, - "loss": 5.2968, - "step": 40888 - }, - { - "epoch": 21.32411994784876, - "grad_norm": 1.5451933145523071, - "learning_rate": 5.942311557788945e-05, - "loss": 5.3635, - "step": 40889 - }, - { - "epoch": 21.32464146023468, - "grad_norm": 1.4969037771224976, - "learning_rate": 5.9422110552763824e-05, - "loss": 5.3454, - "step": 40890 - }, - { - "epoch": 21.3251629726206, - "grad_norm": 1.4507194757461548, - "learning_rate": 5.94211055276382e-05, - "loss": 4.9199, - "step": 40891 - }, - { - "epoch": 21.32568448500652, - "grad_norm": 1.4298386573791504, - "learning_rate": 5.9420100502512566e-05, - "loss": 5.5899, - "step": 40892 - }, - { - "epoch": 21.32620599739244, - "grad_norm": 1.4388476610183716, - "learning_rate": 5.941909547738693e-05, - "loss": 5.3227, - "step": 40893 - }, - { - "epoch": 21.326727509778358, - "grad_norm": 1.6280312538146973, - "learning_rate": 5.941809045226131e-05, - "loss": 5.4076, - "step": 40894 - }, - { - "epoch": 21.327249022164278, - "grad_norm": 1.4186466932296753, - "learning_rate": 5.941708542713568e-05, - "loss": 5.6743, - "step": 40895 - }, - { - "epoch": 21.327770534550197, - "grad_norm": 1.4409352540969849, - "learning_rate": 5.941608040201006e-05, - "loss": 4.8889, - "step": 40896 - }, - { - "epoch": 21.328292046936113, - "grad_norm": 1.5117113590240479, - "learning_rate": 5.941507537688442e-05, - "loss": 5.5685, - "step": 40897 - }, - { - "epoch": 21.328813559322032, - "grad_norm": 1.615553617477417, - "learning_rate": 5.94140703517588e-05, - "loss": 5.2513, - "step": 40898 - }, - { - "epoch": 21.329335071707952, - "grad_norm": 1.4416154623031616, - "learning_rate": 5.941306532663316e-05, - "loss": 5.7036, - "step": 40899 - }, - { - "epoch": 21.32985658409387, - "grad_norm": 1.512376070022583, - "learning_rate": 5.941206030150754e-05, - "loss": 5.4488, - "step": 40900 - }, - { - "epoch": 21.33037809647979, - "grad_norm": 1.5365761518478394, - "learning_rate": 5.941105527638191e-05, - "loss": 5.0196, - "step": 40901 - }, - { - "epoch": 21.33089960886571, - "grad_norm": 1.58588445186615, - "learning_rate": 5.941005025125629e-05, - "loss": 5.0691, - "step": 40902 - }, - { - "epoch": 21.33142112125163, - "grad_norm": 1.6771491765975952, - "learning_rate": 5.9409045226130654e-05, - "loss": 5.1472, - "step": 40903 - }, - { - "epoch": 21.33194263363755, - "grad_norm": 1.4331022500991821, - "learning_rate": 5.940804020100503e-05, - "loss": 5.3881, - "step": 40904 - }, - { - "epoch": 21.33246414602347, - "grad_norm": 1.6036385297775269, - "learning_rate": 5.9407035175879396e-05, - "loss": 5.1325, - "step": 40905 - }, - { - "epoch": 21.332985658409388, - "grad_norm": 1.7574537992477417, - "learning_rate": 5.940603015075377e-05, - "loss": 5.3321, - "step": 40906 - }, - { - "epoch": 21.333507170795308, - "grad_norm": 1.5995945930480957, - "learning_rate": 5.9405025125628145e-05, - "loss": 4.899, - "step": 40907 - }, - { - "epoch": 21.334028683181227, - "grad_norm": 1.6289172172546387, - "learning_rate": 5.940402010050251e-05, - "loss": 4.5806, - "step": 40908 - }, - { - "epoch": 21.334550195567143, - "grad_norm": 1.5385991334915161, - "learning_rate": 5.940301507537689e-05, - "loss": 5.793, - "step": 40909 - }, - { - "epoch": 21.335071707953063, - "grad_norm": 1.5260945558547974, - "learning_rate": 5.940201005025126e-05, - "loss": 5.7182, - "step": 40910 - }, - { - "epoch": 21.335593220338982, - "grad_norm": 1.5184907913208008, - "learning_rate": 5.9401005025125636e-05, - "loss": 5.3771, - "step": 40911 - }, - { - "epoch": 21.3361147327249, - "grad_norm": 1.4863297939300537, - "learning_rate": 5.94e-05, - "loss": 5.5571, - "step": 40912 - }, - { - "epoch": 21.33663624511082, - "grad_norm": 1.484132170677185, - "learning_rate": 5.939899497487438e-05, - "loss": 5.0135, - "step": 40913 - }, - { - "epoch": 21.33715775749674, - "grad_norm": 1.5695160627365112, - "learning_rate": 5.939798994974874e-05, - "loss": 4.8249, - "step": 40914 - }, - { - "epoch": 21.33767926988266, - "grad_norm": 1.5659550428390503, - "learning_rate": 5.939698492462312e-05, - "loss": 5.3158, - "step": 40915 - }, - { - "epoch": 21.33820078226858, - "grad_norm": 1.445351243019104, - "learning_rate": 5.939597989949749e-05, - "loss": 5.4378, - "step": 40916 - }, - { - "epoch": 21.3387222946545, - "grad_norm": 1.3961231708526611, - "learning_rate": 5.939497487437187e-05, - "loss": 5.5526, - "step": 40917 - }, - { - "epoch": 21.33924380704042, - "grad_norm": 1.6317304372787476, - "learning_rate": 5.9393969849246234e-05, - "loss": 4.9291, - "step": 40918 - }, - { - "epoch": 21.339765319426338, - "grad_norm": 1.5887010097503662, - "learning_rate": 5.93929648241206e-05, - "loss": 5.3342, - "step": 40919 - }, - { - "epoch": 21.340286831812257, - "grad_norm": 1.512373447418213, - "learning_rate": 5.9391959798994976e-05, - "loss": 4.9451, - "step": 40920 - }, - { - "epoch": 21.340808344198173, - "grad_norm": 1.599632740020752, - "learning_rate": 5.939095477386935e-05, - "loss": 4.7479, - "step": 40921 - }, - { - "epoch": 21.341329856584093, - "grad_norm": 1.5299012660980225, - "learning_rate": 5.9389949748743725e-05, - "loss": 4.8165, - "step": 40922 - }, - { - "epoch": 21.341851368970012, - "grad_norm": 1.5663541555404663, - "learning_rate": 5.938894472361809e-05, - "loss": 5.1421, - "step": 40923 - }, - { - "epoch": 21.34237288135593, - "grad_norm": 1.5739582777023315, - "learning_rate": 5.938793969849247e-05, - "loss": 4.9402, - "step": 40924 - }, - { - "epoch": 21.34289439374185, - "grad_norm": 1.4704631567001343, - "learning_rate": 5.938693467336683e-05, - "loss": 5.1615, - "step": 40925 - }, - { - "epoch": 21.34341590612777, - "grad_norm": 1.584706425666809, - "learning_rate": 5.938592964824121e-05, - "loss": 5.3041, - "step": 40926 - }, - { - "epoch": 21.34393741851369, - "grad_norm": 1.586729884147644, - "learning_rate": 5.938492462311558e-05, - "loss": 5.0961, - "step": 40927 - }, - { - "epoch": 21.34445893089961, - "grad_norm": 1.4849932193756104, - "learning_rate": 5.938391959798996e-05, - "loss": 5.3816, - "step": 40928 - }, - { - "epoch": 21.34498044328553, - "grad_norm": 1.4750339984893799, - "learning_rate": 5.938291457286432e-05, - "loss": 4.7413, - "step": 40929 - }, - { - "epoch": 21.34550195567145, - "grad_norm": 1.4762884378433228, - "learning_rate": 5.93819095477387e-05, - "loss": 5.4931, - "step": 40930 - }, - { - "epoch": 21.346023468057368, - "grad_norm": 1.5032840967178345, - "learning_rate": 5.938090452261307e-05, - "loss": 5.1127, - "step": 40931 - }, - { - "epoch": 21.346544980443287, - "grad_norm": 1.4938700199127197, - "learning_rate": 5.9379899497487435e-05, - "loss": 5.6202, - "step": 40932 - }, - { - "epoch": 21.347066492829203, - "grad_norm": 1.4965375661849976, - "learning_rate": 5.937889447236181e-05, - "loss": 5.1272, - "step": 40933 - }, - { - "epoch": 21.347588005215123, - "grad_norm": 1.555314540863037, - "learning_rate": 5.937788944723618e-05, - "loss": 5.4571, - "step": 40934 - }, - { - "epoch": 21.348109517601042, - "grad_norm": 1.571028470993042, - "learning_rate": 5.9376884422110555e-05, - "loss": 5.4981, - "step": 40935 - }, - { - "epoch": 21.34863102998696, - "grad_norm": 1.4949557781219482, - "learning_rate": 5.9375879396984926e-05, - "loss": 5.3821, - "step": 40936 - }, - { - "epoch": 21.34915254237288, - "grad_norm": 1.583286166191101, - "learning_rate": 5.9374874371859304e-05, - "loss": 5.1934, - "step": 40937 - }, - { - "epoch": 21.3496740547588, - "grad_norm": 1.495627999305725, - "learning_rate": 5.937386934673367e-05, - "loss": 5.0903, - "step": 40938 - }, - { - "epoch": 21.35019556714472, - "grad_norm": 1.6588165760040283, - "learning_rate": 5.9372864321608046e-05, - "loss": 4.8647, - "step": 40939 - }, - { - "epoch": 21.35071707953064, - "grad_norm": 1.5746252536773682, - "learning_rate": 5.937185929648241e-05, - "loss": 5.019, - "step": 40940 - }, - { - "epoch": 21.35123859191656, - "grad_norm": 1.5444270372390747, - "learning_rate": 5.937085427135679e-05, - "loss": 5.4598, - "step": 40941 - }, - { - "epoch": 21.35176010430248, - "grad_norm": 1.5286359786987305, - "learning_rate": 5.936984924623116e-05, - "loss": 5.3243, - "step": 40942 - }, - { - "epoch": 21.352281616688398, - "grad_norm": 1.552099347114563, - "learning_rate": 5.936884422110554e-05, - "loss": 5.4579, - "step": 40943 - }, - { - "epoch": 21.352803129074317, - "grad_norm": 1.4719291925430298, - "learning_rate": 5.93678391959799e-05, - "loss": 5.0688, - "step": 40944 - }, - { - "epoch": 21.353324641460233, - "grad_norm": 1.4922274351119995, - "learning_rate": 5.936683417085428e-05, - "loss": 4.3699, - "step": 40945 - }, - { - "epoch": 21.353846153846153, - "grad_norm": 1.474350929260254, - "learning_rate": 5.936582914572864e-05, - "loss": 5.5728, - "step": 40946 - }, - { - "epoch": 21.354367666232072, - "grad_norm": 1.575473427772522, - "learning_rate": 5.9364824120603014e-05, - "loss": 5.5566, - "step": 40947 - }, - { - "epoch": 21.35488917861799, - "grad_norm": 1.4146181344985962, - "learning_rate": 5.936381909547739e-05, - "loss": 5.8061, - "step": 40948 - }, - { - "epoch": 21.35541069100391, - "grad_norm": 1.403114914894104, - "learning_rate": 5.9362814070351757e-05, - "loss": 5.0211, - "step": 40949 - }, - { - "epoch": 21.35593220338983, - "grad_norm": 1.4499248266220093, - "learning_rate": 5.9361809045226134e-05, - "loss": 5.5439, - "step": 40950 - }, - { - "epoch": 21.35645371577575, - "grad_norm": 1.723794937133789, - "learning_rate": 5.9360804020100505e-05, - "loss": 4.7494, - "step": 40951 - }, - { - "epoch": 21.35697522816167, - "grad_norm": 1.460012435913086, - "learning_rate": 5.935979899497488e-05, - "loss": 5.63, - "step": 40952 - }, - { - "epoch": 21.35749674054759, - "grad_norm": 1.6020454168319702, - "learning_rate": 5.935879396984925e-05, - "loss": 5.2717, - "step": 40953 - }, - { - "epoch": 21.35801825293351, - "grad_norm": 1.6452161073684692, - "learning_rate": 5.9357788944723625e-05, - "loss": 5.3466, - "step": 40954 - }, - { - "epoch": 21.358539765319428, - "grad_norm": 1.7217261791229248, - "learning_rate": 5.935678391959799e-05, - "loss": 5.0425, - "step": 40955 - }, - { - "epoch": 21.359061277705347, - "grad_norm": 1.4887254238128662, - "learning_rate": 5.935577889447237e-05, - "loss": 5.2533, - "step": 40956 - }, - { - "epoch": 21.359582790091263, - "grad_norm": 1.5125799179077148, - "learning_rate": 5.935477386934674e-05, - "loss": 5.7371, - "step": 40957 - }, - { - "epoch": 21.360104302477183, - "grad_norm": 1.4952157735824585, - "learning_rate": 5.9353768844221116e-05, - "loss": 5.3257, - "step": 40958 - }, - { - "epoch": 21.360625814863102, - "grad_norm": 1.5983624458312988, - "learning_rate": 5.935276381909548e-05, - "loss": 5.343, - "step": 40959 - }, - { - "epoch": 21.36114732724902, - "grad_norm": 1.6501446962356567, - "learning_rate": 5.9351758793969845e-05, - "loss": 4.8979, - "step": 40960 - }, - { - "epoch": 21.36166883963494, - "grad_norm": 1.5375128984451294, - "learning_rate": 5.935075376884422e-05, - "loss": 5.6032, - "step": 40961 - }, - { - "epoch": 21.36219035202086, - "grad_norm": 1.4957034587860107, - "learning_rate": 5.9349748743718594e-05, - "loss": 5.6672, - "step": 40962 - }, - { - "epoch": 21.36271186440678, - "grad_norm": 1.6168785095214844, - "learning_rate": 5.934874371859297e-05, - "loss": 5.0255, - "step": 40963 - }, - { - "epoch": 21.3632333767927, - "grad_norm": 1.4650394916534424, - "learning_rate": 5.9347738693467336e-05, - "loss": 5.611, - "step": 40964 - }, - { - "epoch": 21.36375488917862, - "grad_norm": 1.4465699195861816, - "learning_rate": 5.9346733668341714e-05, - "loss": 5.5756, - "step": 40965 - }, - { - "epoch": 21.36427640156454, - "grad_norm": 1.4711436033248901, - "learning_rate": 5.934572864321608e-05, - "loss": 5.3214, - "step": 40966 - }, - { - "epoch": 21.364797913950458, - "grad_norm": 1.452285647392273, - "learning_rate": 5.9344723618090456e-05, - "loss": 5.3879, - "step": 40967 - }, - { - "epoch": 21.365319426336377, - "grad_norm": 1.4728838205337524, - "learning_rate": 5.934371859296483e-05, - "loss": 5.107, - "step": 40968 - }, - { - "epoch": 21.365840938722293, - "grad_norm": 1.64299738407135, - "learning_rate": 5.9342713567839205e-05, - "loss": 5.0141, - "step": 40969 - }, - { - "epoch": 21.366362451108213, - "grad_norm": 1.570068597793579, - "learning_rate": 5.934170854271357e-05, - "loss": 5.2134, - "step": 40970 - }, - { - "epoch": 21.366883963494132, - "grad_norm": 1.563784122467041, - "learning_rate": 5.934070351758795e-05, - "loss": 5.4986, - "step": 40971 - }, - { - "epoch": 21.36740547588005, - "grad_norm": 1.534156084060669, - "learning_rate": 5.933969849246232e-05, - "loss": 5.0891, - "step": 40972 - }, - { - "epoch": 21.36792698826597, - "grad_norm": 1.6588236093521118, - "learning_rate": 5.933869346733668e-05, - "loss": 4.5432, - "step": 40973 - }, - { - "epoch": 21.36844850065189, - "grad_norm": 1.6133077144622803, - "learning_rate": 5.933768844221106e-05, - "loss": 4.9522, - "step": 40974 - }, - { - "epoch": 21.36897001303781, - "grad_norm": 1.6876251697540283, - "learning_rate": 5.9336683417085424e-05, - "loss": 4.6777, - "step": 40975 - }, - { - "epoch": 21.36949152542373, - "grad_norm": 1.5592303276062012, - "learning_rate": 5.93356783919598e-05, - "loss": 5.25, - "step": 40976 - }, - { - "epoch": 21.37001303780965, - "grad_norm": 1.4913634061813354, - "learning_rate": 5.933467336683417e-05, - "loss": 5.4922, - "step": 40977 - }, - { - "epoch": 21.37053455019557, - "grad_norm": 1.6158907413482666, - "learning_rate": 5.933366834170855e-05, - "loss": 5.0464, - "step": 40978 - }, - { - "epoch": 21.371056062581488, - "grad_norm": 1.4802371263504028, - "learning_rate": 5.9332663316582915e-05, - "loss": 5.655, - "step": 40979 - }, - { - "epoch": 21.371577574967404, - "grad_norm": 1.5686805248260498, - "learning_rate": 5.933165829145729e-05, - "loss": 5.0281, - "step": 40980 - }, - { - "epoch": 21.372099087353323, - "grad_norm": 1.5785152912139893, - "learning_rate": 5.933065326633166e-05, - "loss": 5.3534, - "step": 40981 - }, - { - "epoch": 21.372620599739243, - "grad_norm": 1.6537035703659058, - "learning_rate": 5.9329648241206035e-05, - "loss": 4.4954, - "step": 40982 - }, - { - "epoch": 21.373142112125162, - "grad_norm": 1.52735435962677, - "learning_rate": 5.9328643216080406e-05, - "loss": 5.5233, - "step": 40983 - }, - { - "epoch": 21.37366362451108, - "grad_norm": 1.357649564743042, - "learning_rate": 5.9327638190954784e-05, - "loss": 5.7687, - "step": 40984 - }, - { - "epoch": 21.374185136897, - "grad_norm": 1.472169041633606, - "learning_rate": 5.932663316582915e-05, - "loss": 5.3882, - "step": 40985 - }, - { - "epoch": 21.37470664928292, - "grad_norm": 1.5623241662979126, - "learning_rate": 5.932562814070351e-05, - "loss": 5.0453, - "step": 40986 - }, - { - "epoch": 21.37522816166884, - "grad_norm": 1.563472867012024, - "learning_rate": 5.932462311557789e-05, - "loss": 5.1863, - "step": 40987 - }, - { - "epoch": 21.37574967405476, - "grad_norm": 1.6141726970672607, - "learning_rate": 5.932361809045226e-05, - "loss": 5.3087, - "step": 40988 - }, - { - "epoch": 21.37627118644068, - "grad_norm": 1.5981614589691162, - "learning_rate": 5.932261306532664e-05, - "loss": 5.1248, - "step": 40989 - }, - { - "epoch": 21.3767926988266, - "grad_norm": 1.6629509925842285, - "learning_rate": 5.9321608040201004e-05, - "loss": 5.3344, - "step": 40990 - }, - { - "epoch": 21.377314211212518, - "grad_norm": 1.5006574392318726, - "learning_rate": 5.932060301507538e-05, - "loss": 4.8448, - "step": 40991 - }, - { - "epoch": 21.377835723598434, - "grad_norm": 1.757929801940918, - "learning_rate": 5.9319597989949746e-05, - "loss": 5.2148, - "step": 40992 - }, - { - "epoch": 21.378357235984353, - "grad_norm": 1.7399604320526123, - "learning_rate": 5.9318592964824123e-05, - "loss": 5.3898, - "step": 40993 - }, - { - "epoch": 21.378878748370273, - "grad_norm": 1.5058708190917969, - "learning_rate": 5.9317587939698494e-05, - "loss": 5.2788, - "step": 40994 - }, - { - "epoch": 21.379400260756192, - "grad_norm": 1.5097706317901611, - "learning_rate": 5.931658291457287e-05, - "loss": 4.9801, - "step": 40995 - }, - { - "epoch": 21.37992177314211, - "grad_norm": 1.58722722530365, - "learning_rate": 5.9315577889447237e-05, - "loss": 5.5879, - "step": 40996 - }, - { - "epoch": 21.38044328552803, - "grad_norm": 1.5102382898330688, - "learning_rate": 5.9314572864321614e-05, - "loss": 4.9375, - "step": 40997 - }, - { - "epoch": 21.38096479791395, - "grad_norm": 1.5648524761199951, - "learning_rate": 5.9313567839195985e-05, - "loss": 5.1801, - "step": 40998 - }, - { - "epoch": 21.38148631029987, - "grad_norm": 1.5839422941207886, - "learning_rate": 5.931256281407035e-05, - "loss": 4.4988, - "step": 40999 - }, - { - "epoch": 21.38200782268579, - "grad_norm": 1.633887529373169, - "learning_rate": 5.931155778894473e-05, - "loss": 5.37, - "step": 41000 - }, - { - "epoch": 21.38252933507171, - "grad_norm": 1.614801049232483, - "learning_rate": 5.931055276381909e-05, - "loss": 5.5887, - "step": 41001 - }, - { - "epoch": 21.38305084745763, - "grad_norm": 1.4197735786437988, - "learning_rate": 5.930954773869347e-05, - "loss": 5.7268, - "step": 41002 - }, - { - "epoch": 21.383572359843548, - "grad_norm": 1.737823486328125, - "learning_rate": 5.930854271356784e-05, - "loss": 4.3521, - "step": 41003 - }, - { - "epoch": 21.384093872229464, - "grad_norm": 1.5161893367767334, - "learning_rate": 5.930753768844222e-05, - "loss": 5.1994, - "step": 41004 - }, - { - "epoch": 21.384615384615383, - "grad_norm": 1.593271255493164, - "learning_rate": 5.930653266331658e-05, - "loss": 5.1088, - "step": 41005 - }, - { - "epoch": 21.385136897001303, - "grad_norm": 1.5263408422470093, - "learning_rate": 5.930552763819096e-05, - "loss": 5.4207, - "step": 41006 - }, - { - "epoch": 21.385658409387222, - "grad_norm": 1.4992153644561768, - "learning_rate": 5.9304522613065325e-05, - "loss": 5.2577, - "step": 41007 - }, - { - "epoch": 21.38617992177314, - "grad_norm": 1.6267746686935425, - "learning_rate": 5.93035175879397e-05, - "loss": 5.3733, - "step": 41008 - }, - { - "epoch": 21.38670143415906, - "grad_norm": 1.5455204248428345, - "learning_rate": 5.9302512562814074e-05, - "loss": 5.4886, - "step": 41009 - }, - { - "epoch": 21.38722294654498, - "grad_norm": 1.4201644659042358, - "learning_rate": 5.930150753768845e-05, - "loss": 5.5605, - "step": 41010 - }, - { - "epoch": 21.3877444589309, - "grad_norm": 1.6348481178283691, - "learning_rate": 5.9300502512562816e-05, - "loss": 4.6502, - "step": 41011 - }, - { - "epoch": 21.38826597131682, - "grad_norm": 1.5664865970611572, - "learning_rate": 5.929949748743718e-05, - "loss": 5.1563, - "step": 41012 - }, - { - "epoch": 21.38878748370274, - "grad_norm": 1.602364182472229, - "learning_rate": 5.929849246231156e-05, - "loss": 4.9187, - "step": 41013 - }, - { - "epoch": 21.38930899608866, - "grad_norm": 1.5807092189788818, - "learning_rate": 5.929748743718593e-05, - "loss": 5.0818, - "step": 41014 - }, - { - "epoch": 21.389830508474578, - "grad_norm": 1.5047962665557861, - "learning_rate": 5.929648241206031e-05, - "loss": 4.9442, - "step": 41015 - }, - { - "epoch": 21.390352020860494, - "grad_norm": 1.5893853902816772, - "learning_rate": 5.929547738693467e-05, - "loss": 5.1936, - "step": 41016 - }, - { - "epoch": 21.390873533246413, - "grad_norm": 1.511821985244751, - "learning_rate": 5.929447236180905e-05, - "loss": 5.561, - "step": 41017 - }, - { - "epoch": 21.391395045632333, - "grad_norm": 1.5792101621627808, - "learning_rate": 5.929346733668342e-05, - "loss": 5.1699, - "step": 41018 - }, - { - "epoch": 21.391916558018252, - "grad_norm": 1.629119873046875, - "learning_rate": 5.92924623115578e-05, - "loss": 4.8822, - "step": 41019 - }, - { - "epoch": 21.39243807040417, - "grad_norm": 1.5236436128616333, - "learning_rate": 5.929145728643216e-05, - "loss": 5.2727, - "step": 41020 - }, - { - "epoch": 21.39295958279009, - "grad_norm": 1.5177249908447266, - "learning_rate": 5.929045226130654e-05, - "loss": 5.7773, - "step": 41021 - }, - { - "epoch": 21.39348109517601, - "grad_norm": 1.5594468116760254, - "learning_rate": 5.9289447236180904e-05, - "loss": 5.3569, - "step": 41022 - }, - { - "epoch": 21.39400260756193, - "grad_norm": 1.6227952241897583, - "learning_rate": 5.928844221105528e-05, - "loss": 5.2662, - "step": 41023 - }, - { - "epoch": 21.39452411994785, - "grad_norm": 1.5346007347106934, - "learning_rate": 5.928743718592965e-05, - "loss": 5.0805, - "step": 41024 - }, - { - "epoch": 21.39504563233377, - "grad_norm": 1.4589375257492065, - "learning_rate": 5.928643216080402e-05, - "loss": 5.4739, - "step": 41025 - }, - { - "epoch": 21.39556714471969, - "grad_norm": 1.450156569480896, - "learning_rate": 5.9285427135678395e-05, - "loss": 5.5263, - "step": 41026 - }, - { - "epoch": 21.396088657105608, - "grad_norm": 1.507494330406189, - "learning_rate": 5.928442211055276e-05, - "loss": 5.6013, - "step": 41027 - }, - { - "epoch": 21.396610169491524, - "grad_norm": 1.4497063159942627, - "learning_rate": 5.928341708542714e-05, - "loss": 5.4827, - "step": 41028 - }, - { - "epoch": 21.397131681877443, - "grad_norm": 1.56150221824646, - "learning_rate": 5.928241206030151e-05, - "loss": 5.373, - "step": 41029 - }, - { - "epoch": 21.397653194263363, - "grad_norm": 1.4473379850387573, - "learning_rate": 5.9281407035175886e-05, - "loss": 5.4087, - "step": 41030 - }, - { - "epoch": 21.398174706649282, - "grad_norm": 1.510840654373169, - "learning_rate": 5.928040201005025e-05, - "loss": 5.1225, - "step": 41031 - }, - { - "epoch": 21.3986962190352, - "grad_norm": 1.6419967412948608, - "learning_rate": 5.927939698492463e-05, - "loss": 5.3574, - "step": 41032 - }, - { - "epoch": 21.39921773142112, - "grad_norm": 1.5846710205078125, - "learning_rate": 5.927839195979899e-05, - "loss": 5.1445, - "step": 41033 - }, - { - "epoch": 21.39973924380704, - "grad_norm": 1.5163947343826294, - "learning_rate": 5.927738693467337e-05, - "loss": 5.1643, - "step": 41034 - }, - { - "epoch": 21.40026075619296, - "grad_norm": 1.4968881607055664, - "learning_rate": 5.927638190954774e-05, - "loss": 5.4778, - "step": 41035 - }, - { - "epoch": 21.40078226857888, - "grad_norm": 1.5700329542160034, - "learning_rate": 5.927537688442212e-05, - "loss": 5.0708, - "step": 41036 - }, - { - "epoch": 21.4013037809648, - "grad_norm": 1.6675209999084473, - "learning_rate": 5.9274371859296484e-05, - "loss": 4.9417, - "step": 41037 - }, - { - "epoch": 21.40182529335072, - "grad_norm": 1.5523279905319214, - "learning_rate": 5.927336683417086e-05, - "loss": 5.3075, - "step": 41038 - }, - { - "epoch": 21.402346805736638, - "grad_norm": 1.484291434288025, - "learning_rate": 5.927236180904523e-05, - "loss": 4.4498, - "step": 41039 - }, - { - "epoch": 21.402868318122554, - "grad_norm": 1.5777686834335327, - "learning_rate": 5.92713567839196e-05, - "loss": 4.9492, - "step": 41040 - }, - { - "epoch": 21.403389830508473, - "grad_norm": 1.4950445890426636, - "learning_rate": 5.9270351758793975e-05, - "loss": 5.5504, - "step": 41041 - }, - { - "epoch": 21.403911342894393, - "grad_norm": 1.4731183052062988, - "learning_rate": 5.926934673366834e-05, - "loss": 5.6877, - "step": 41042 - }, - { - "epoch": 21.404432855280312, - "grad_norm": 1.510014533996582, - "learning_rate": 5.926834170854272e-05, - "loss": 5.124, - "step": 41043 - }, - { - "epoch": 21.40495436766623, - "grad_norm": 1.5817887783050537, - "learning_rate": 5.926733668341709e-05, - "loss": 5.2589, - "step": 41044 - }, - { - "epoch": 21.40547588005215, - "grad_norm": 1.5144833326339722, - "learning_rate": 5.9266331658291466e-05, - "loss": 5.3821, - "step": 41045 - }, - { - "epoch": 21.40599739243807, - "grad_norm": 1.4214595556259155, - "learning_rate": 5.926532663316583e-05, - "loss": 5.3311, - "step": 41046 - }, - { - "epoch": 21.40651890482399, - "grad_norm": 1.6286941766738892, - "learning_rate": 5.926432160804021e-05, - "loss": 5.1227, - "step": 41047 - }, - { - "epoch": 21.40704041720991, - "grad_norm": 1.5133235454559326, - "learning_rate": 5.926331658291457e-05, - "loss": 5.1979, - "step": 41048 - }, - { - "epoch": 21.40756192959583, - "grad_norm": 2.3715803623199463, - "learning_rate": 5.926231155778895e-05, - "loss": 4.8061, - "step": 41049 - }, - { - "epoch": 21.40808344198175, - "grad_norm": 1.6216412782669067, - "learning_rate": 5.926130653266332e-05, - "loss": 5.5482, - "step": 41050 - }, - { - "epoch": 21.408604954367668, - "grad_norm": 1.4913182258605957, - "learning_rate": 5.92603015075377e-05, - "loss": 5.3412, - "step": 41051 - }, - { - "epoch": 21.409126466753584, - "grad_norm": 1.6990469694137573, - "learning_rate": 5.925929648241206e-05, - "loss": 4.9918, - "step": 41052 - }, - { - "epoch": 21.409647979139503, - "grad_norm": 1.4792122840881348, - "learning_rate": 5.925829145728643e-05, - "loss": 5.4069, - "step": 41053 - }, - { - "epoch": 21.410169491525423, - "grad_norm": 1.4729372262954712, - "learning_rate": 5.9257286432160805e-05, - "loss": 5.4093, - "step": 41054 - }, - { - "epoch": 21.410691003911342, - "grad_norm": 1.496396780014038, - "learning_rate": 5.9256281407035176e-05, - "loss": 5.3636, - "step": 41055 - }, - { - "epoch": 21.41121251629726, - "grad_norm": 1.4755038022994995, - "learning_rate": 5.9255276381909554e-05, - "loss": 4.7169, - "step": 41056 - }, - { - "epoch": 21.41173402868318, - "grad_norm": 1.4520140886306763, - "learning_rate": 5.925427135678392e-05, - "loss": 5.4506, - "step": 41057 - }, - { - "epoch": 21.4122555410691, - "grad_norm": 1.5533554553985596, - "learning_rate": 5.9253266331658296e-05, - "loss": 5.0492, - "step": 41058 - }, - { - "epoch": 21.41277705345502, - "grad_norm": 1.3886674642562866, - "learning_rate": 5.925226130653267e-05, - "loss": 5.724, - "step": 41059 - }, - { - "epoch": 21.41329856584094, - "grad_norm": 1.717076301574707, - "learning_rate": 5.9251256281407045e-05, - "loss": 5.2265, - "step": 41060 - }, - { - "epoch": 21.41382007822686, - "grad_norm": 1.3786736726760864, - "learning_rate": 5.925025125628141e-05, - "loss": 4.8147, - "step": 41061 - }, - { - "epoch": 21.41434159061278, - "grad_norm": 1.5504847764968872, - "learning_rate": 5.924924623115579e-05, - "loss": 5.3173, - "step": 41062 - }, - { - "epoch": 21.414863102998694, - "grad_norm": 1.4121050834655762, - "learning_rate": 5.924824120603015e-05, - "loss": 5.3676, - "step": 41063 - }, - { - "epoch": 21.415384615384614, - "grad_norm": 1.6170237064361572, - "learning_rate": 5.924723618090453e-05, - "loss": 4.8045, - "step": 41064 - }, - { - "epoch": 21.415906127770533, - "grad_norm": 1.4522677659988403, - "learning_rate": 5.92462311557789e-05, - "loss": 5.5759, - "step": 41065 - }, - { - "epoch": 21.416427640156453, - "grad_norm": 1.5410972833633423, - "learning_rate": 5.9245226130653264e-05, - "loss": 5.2174, - "step": 41066 - }, - { - "epoch": 21.416949152542372, - "grad_norm": 1.5329581499099731, - "learning_rate": 5.924422110552764e-05, - "loss": 3.7775, - "step": 41067 - }, - { - "epoch": 21.41747066492829, - "grad_norm": 1.652663230895996, - "learning_rate": 5.9243216080402007e-05, - "loss": 4.9956, - "step": 41068 - }, - { - "epoch": 21.41799217731421, - "grad_norm": 1.514403223991394, - "learning_rate": 5.9242211055276384e-05, - "loss": 5.4549, - "step": 41069 - }, - { - "epoch": 21.41851368970013, - "grad_norm": 1.7414703369140625, - "learning_rate": 5.9241206030150755e-05, - "loss": 4.9867, - "step": 41070 - }, - { - "epoch": 21.41903520208605, - "grad_norm": 1.6054807901382446, - "learning_rate": 5.924020100502513e-05, - "loss": 5.0059, - "step": 41071 - }, - { - "epoch": 21.41955671447197, - "grad_norm": 1.5161943435668945, - "learning_rate": 5.92391959798995e-05, - "loss": 5.6421, - "step": 41072 - }, - { - "epoch": 21.42007822685789, - "grad_norm": 1.4522037506103516, - "learning_rate": 5.9238190954773875e-05, - "loss": 5.8762, - "step": 41073 - }, - { - "epoch": 21.42059973924381, - "grad_norm": 2.422438144683838, - "learning_rate": 5.923718592964824e-05, - "loss": 4.9055, - "step": 41074 - }, - { - "epoch": 21.421121251629724, - "grad_norm": 1.5230520963668823, - "learning_rate": 5.923618090452262e-05, - "loss": 4.7994, - "step": 41075 - }, - { - "epoch": 21.421642764015644, - "grad_norm": 1.567209005355835, - "learning_rate": 5.923517587939699e-05, - "loss": 5.1228, - "step": 41076 - }, - { - "epoch": 21.422164276401563, - "grad_norm": 1.48659348487854, - "learning_rate": 5.9234170854271366e-05, - "loss": 5.208, - "step": 41077 - }, - { - "epoch": 21.422685788787483, - "grad_norm": 1.6859955787658691, - "learning_rate": 5.923316582914573e-05, - "loss": 4.7495, - "step": 41078 - }, - { - "epoch": 21.423207301173402, - "grad_norm": 1.4938021898269653, - "learning_rate": 5.9232160804020095e-05, - "loss": 5.488, - "step": 41079 - }, - { - "epoch": 21.423728813559322, - "grad_norm": 1.5396283864974976, - "learning_rate": 5.923115577889447e-05, - "loss": 5.1227, - "step": 41080 - }, - { - "epoch": 21.42425032594524, - "grad_norm": 1.5156424045562744, - "learning_rate": 5.9230150753768844e-05, - "loss": 5.3561, - "step": 41081 - }, - { - "epoch": 21.42477183833116, - "grad_norm": 1.6391777992248535, - "learning_rate": 5.922914572864322e-05, - "loss": 4.9924, - "step": 41082 - }, - { - "epoch": 21.42529335071708, - "grad_norm": 1.520200490951538, - "learning_rate": 5.9228140703517586e-05, - "loss": 5.0437, - "step": 41083 - }, - { - "epoch": 21.425814863103, - "grad_norm": 1.6187902688980103, - "learning_rate": 5.9227135678391964e-05, - "loss": 5.0699, - "step": 41084 - }, - { - "epoch": 21.42633637548892, - "grad_norm": 1.7063201665878296, - "learning_rate": 5.9226130653266335e-05, - "loss": 5.1236, - "step": 41085 - }, - { - "epoch": 21.42685788787484, - "grad_norm": 1.551129937171936, - "learning_rate": 5.922512562814071e-05, - "loss": 5.3837, - "step": 41086 - }, - { - "epoch": 21.427379400260754, - "grad_norm": 1.7293967008590698, - "learning_rate": 5.922412060301508e-05, - "loss": 4.9043, - "step": 41087 - }, - { - "epoch": 21.427900912646674, - "grad_norm": 1.5616123676300049, - "learning_rate": 5.9223115577889455e-05, - "loss": 4.9117, - "step": 41088 - }, - { - "epoch": 21.428422425032593, - "grad_norm": 1.9327290058135986, - "learning_rate": 5.922211055276382e-05, - "loss": 5.4518, - "step": 41089 - }, - { - "epoch": 21.428943937418513, - "grad_norm": 1.5790907144546509, - "learning_rate": 5.92211055276382e-05, - "loss": 4.5076, - "step": 41090 - }, - { - "epoch": 21.429465449804432, - "grad_norm": 1.6041349172592163, - "learning_rate": 5.922010050251257e-05, - "loss": 4.966, - "step": 41091 - }, - { - "epoch": 21.429986962190352, - "grad_norm": 1.5374449491500854, - "learning_rate": 5.921909547738693e-05, - "loss": 4.8887, - "step": 41092 - }, - { - "epoch": 21.43050847457627, - "grad_norm": 1.623756766319275, - "learning_rate": 5.921809045226131e-05, - "loss": 5.4154, - "step": 41093 - }, - { - "epoch": 21.43102998696219, - "grad_norm": 1.5386654138565063, - "learning_rate": 5.9217085427135674e-05, - "loss": 4.9311, - "step": 41094 - }, - { - "epoch": 21.43155149934811, - "grad_norm": 1.716391682624817, - "learning_rate": 5.921608040201005e-05, - "loss": 5.122, - "step": 41095 - }, - { - "epoch": 21.43207301173403, - "grad_norm": 1.5140138864517212, - "learning_rate": 5.921507537688442e-05, - "loss": 4.8581, - "step": 41096 - }, - { - "epoch": 21.43259452411995, - "grad_norm": 1.5096015930175781, - "learning_rate": 5.92140703517588e-05, - "loss": 5.2939, - "step": 41097 - }, - { - "epoch": 21.43311603650587, - "grad_norm": 1.6007031202316284, - "learning_rate": 5.9213065326633165e-05, - "loss": 5.1037, - "step": 41098 - }, - { - "epoch": 21.433637548891785, - "grad_norm": 1.56480872631073, - "learning_rate": 5.921206030150754e-05, - "loss": 5.0266, - "step": 41099 - }, - { - "epoch": 21.434159061277704, - "grad_norm": 1.545104742050171, - "learning_rate": 5.921105527638191e-05, - "loss": 5.3377, - "step": 41100 - }, - { - "epoch": 21.434680573663623, - "grad_norm": 1.5894389152526855, - "learning_rate": 5.9210050251256285e-05, - "loss": 4.8444, - "step": 41101 - }, - { - "epoch": 21.435202086049543, - "grad_norm": 1.5818474292755127, - "learning_rate": 5.9209045226130656e-05, - "loss": 5.2247, - "step": 41102 - }, - { - "epoch": 21.435723598435462, - "grad_norm": 1.6359614133834839, - "learning_rate": 5.9208040201005034e-05, - "loss": 5.1891, - "step": 41103 - }, - { - "epoch": 21.436245110821382, - "grad_norm": 1.4823541641235352, - "learning_rate": 5.92070351758794e-05, - "loss": 4.9077, - "step": 41104 - }, - { - "epoch": 21.4367666232073, - "grad_norm": 1.4793648719787598, - "learning_rate": 5.920603015075377e-05, - "loss": 5.2415, - "step": 41105 - }, - { - "epoch": 21.43728813559322, - "grad_norm": 1.5936557054519653, - "learning_rate": 5.920502512562815e-05, - "loss": 5.1797, - "step": 41106 - }, - { - "epoch": 21.43780964797914, - "grad_norm": 1.4779069423675537, - "learning_rate": 5.920402010050251e-05, - "loss": 5.1808, - "step": 41107 - }, - { - "epoch": 21.43833116036506, - "grad_norm": 1.5176036357879639, - "learning_rate": 5.920301507537689e-05, - "loss": 5.0645, - "step": 41108 - }, - { - "epoch": 21.43885267275098, - "grad_norm": 1.4553302526474, - "learning_rate": 5.9202010050251254e-05, - "loss": 4.9093, - "step": 41109 - }, - { - "epoch": 21.4393741851369, - "grad_norm": 1.455739140510559, - "learning_rate": 5.920100502512563e-05, - "loss": 5.5781, - "step": 41110 - }, - { - "epoch": 21.439895697522815, - "grad_norm": 1.4968539476394653, - "learning_rate": 5.92e-05, - "loss": 5.5818, - "step": 41111 - }, - { - "epoch": 21.440417209908734, - "grad_norm": 1.5650750398635864, - "learning_rate": 5.919899497487438e-05, - "loss": 5.0588, - "step": 41112 - }, - { - "epoch": 21.440938722294653, - "grad_norm": 1.4428352117538452, - "learning_rate": 5.9197989949748745e-05, - "loss": 5.3827, - "step": 41113 - }, - { - "epoch": 21.441460234680573, - "grad_norm": 1.5384511947631836, - "learning_rate": 5.919698492462312e-05, - "loss": 5.256, - "step": 41114 - }, - { - "epoch": 21.441981747066492, - "grad_norm": 1.6068804264068604, - "learning_rate": 5.9195979899497487e-05, - "loss": 5.3996, - "step": 41115 - }, - { - "epoch": 21.442503259452412, - "grad_norm": 1.5617308616638184, - "learning_rate": 5.9194974874371864e-05, - "loss": 4.982, - "step": 41116 - }, - { - "epoch": 21.44302477183833, - "grad_norm": 1.4507203102111816, - "learning_rate": 5.9193969849246235e-05, - "loss": 5.2052, - "step": 41117 - }, - { - "epoch": 21.44354628422425, - "grad_norm": 1.5799821615219116, - "learning_rate": 5.919296482412061e-05, - "loss": 4.9293, - "step": 41118 - }, - { - "epoch": 21.44406779661017, - "grad_norm": 1.4922505617141724, - "learning_rate": 5.919195979899498e-05, - "loss": 5.1822, - "step": 41119 - }, - { - "epoch": 21.44458930899609, - "grad_norm": 1.4315698146820068, - "learning_rate": 5.919095477386934e-05, - "loss": 5.3661, - "step": 41120 - }, - { - "epoch": 21.44511082138201, - "grad_norm": 1.5785952806472778, - "learning_rate": 5.918994974874372e-05, - "loss": 5.0197, - "step": 41121 - }, - { - "epoch": 21.44563233376793, - "grad_norm": 1.4807422161102295, - "learning_rate": 5.918894472361809e-05, - "loss": 5.5114, - "step": 41122 - }, - { - "epoch": 21.446153846153845, - "grad_norm": 1.4588969945907593, - "learning_rate": 5.918793969849247e-05, - "loss": 5.4579, - "step": 41123 - }, - { - "epoch": 21.446675358539764, - "grad_norm": 1.5385782718658447, - "learning_rate": 5.918693467336683e-05, - "loss": 5.071, - "step": 41124 - }, - { - "epoch": 21.447196870925683, - "grad_norm": 1.5653952360153198, - "learning_rate": 5.918592964824121e-05, - "loss": 4.7828, - "step": 41125 - }, - { - "epoch": 21.447718383311603, - "grad_norm": 1.605215072631836, - "learning_rate": 5.918492462311558e-05, - "loss": 5.1829, - "step": 41126 - }, - { - "epoch": 21.448239895697522, - "grad_norm": 1.592134714126587, - "learning_rate": 5.918391959798996e-05, - "loss": 4.7724, - "step": 41127 - }, - { - "epoch": 21.448761408083442, - "grad_norm": 1.565037727355957, - "learning_rate": 5.9182914572864324e-05, - "loss": 5.0003, - "step": 41128 - }, - { - "epoch": 21.44928292046936, - "grad_norm": 1.5050938129425049, - "learning_rate": 5.91819095477387e-05, - "loss": 5.1437, - "step": 41129 - }, - { - "epoch": 21.44980443285528, - "grad_norm": 1.5295896530151367, - "learning_rate": 5.9180904522613066e-05, - "loss": 4.923, - "step": 41130 - }, - { - "epoch": 21.4503259452412, - "grad_norm": 1.5047335624694824, - "learning_rate": 5.9179899497487444e-05, - "loss": 5.7705, - "step": 41131 - }, - { - "epoch": 21.45084745762712, - "grad_norm": 1.5995969772338867, - "learning_rate": 5.9178894472361815e-05, - "loss": 5.0854, - "step": 41132 - }, - { - "epoch": 21.45136897001304, - "grad_norm": 1.4608933925628662, - "learning_rate": 5.917788944723618e-05, - "loss": 4.8073, - "step": 41133 - }, - { - "epoch": 21.45189048239896, - "grad_norm": 1.5143768787384033, - "learning_rate": 5.917688442211056e-05, - "loss": 5.3879, - "step": 41134 - }, - { - "epoch": 21.452411994784875, - "grad_norm": 1.5135085582733154, - "learning_rate": 5.917587939698492e-05, - "loss": 5.0996, - "step": 41135 - }, - { - "epoch": 21.452933507170794, - "grad_norm": 1.4530612230300903, - "learning_rate": 5.91748743718593e-05, - "loss": 4.8794, - "step": 41136 - }, - { - "epoch": 21.453455019556714, - "grad_norm": 1.4465845823287964, - "learning_rate": 5.917386934673367e-05, - "loss": 5.2073, - "step": 41137 - }, - { - "epoch": 21.453976531942633, - "grad_norm": 1.470965027809143, - "learning_rate": 5.917286432160805e-05, - "loss": 4.5187, - "step": 41138 - }, - { - "epoch": 21.454498044328552, - "grad_norm": 1.5436632633209229, - "learning_rate": 5.917185929648241e-05, - "loss": 5.2408, - "step": 41139 - }, - { - "epoch": 21.455019556714472, - "grad_norm": 1.5198332071304321, - "learning_rate": 5.917085427135679e-05, - "loss": 5.4214, - "step": 41140 - }, - { - "epoch": 21.45554106910039, - "grad_norm": 1.4951589107513428, - "learning_rate": 5.9169849246231154e-05, - "loss": 5.3258, - "step": 41141 - }, - { - "epoch": 21.45606258148631, - "grad_norm": 1.559004783630371, - "learning_rate": 5.916884422110553e-05, - "loss": 4.5089, - "step": 41142 - }, - { - "epoch": 21.45658409387223, - "grad_norm": 1.5241467952728271, - "learning_rate": 5.91678391959799e-05, - "loss": 5.4665, - "step": 41143 - }, - { - "epoch": 21.45710560625815, - "grad_norm": 1.4609414339065552, - "learning_rate": 5.916683417085428e-05, - "loss": 5.4369, - "step": 41144 - }, - { - "epoch": 21.45762711864407, - "grad_norm": 1.4938504695892334, - "learning_rate": 5.9165829145728645e-05, - "loss": 5.3748, - "step": 41145 - }, - { - "epoch": 21.45814863102999, - "grad_norm": 1.5268255472183228, - "learning_rate": 5.9164824120603016e-05, - "loss": 5.405, - "step": 41146 - }, - { - "epoch": 21.458670143415905, - "grad_norm": 1.5430594682693481, - "learning_rate": 5.9163819095477394e-05, - "loss": 5.4704, - "step": 41147 - }, - { - "epoch": 21.459191655801824, - "grad_norm": 1.4992681741714478, - "learning_rate": 5.916281407035176e-05, - "loss": 5.1042, - "step": 41148 - }, - { - "epoch": 21.459713168187744, - "grad_norm": 1.674988865852356, - "learning_rate": 5.9161809045226136e-05, - "loss": 5.0209, - "step": 41149 - }, - { - "epoch": 21.460234680573663, - "grad_norm": 1.5126831531524658, - "learning_rate": 5.91608040201005e-05, - "loss": 5.5444, - "step": 41150 - }, - { - "epoch": 21.460756192959582, - "grad_norm": 1.5453957319259644, - "learning_rate": 5.915979899497488e-05, - "loss": 4.7466, - "step": 41151 - }, - { - "epoch": 21.461277705345502, - "grad_norm": 1.556870460510254, - "learning_rate": 5.915879396984925e-05, - "loss": 5.3538, - "step": 41152 - }, - { - "epoch": 21.46179921773142, - "grad_norm": 1.5816092491149902, - "learning_rate": 5.915778894472363e-05, - "loss": 5.5292, - "step": 41153 - }, - { - "epoch": 21.46232073011734, - "grad_norm": 1.503212809562683, - "learning_rate": 5.915678391959799e-05, - "loss": 5.5807, - "step": 41154 - }, - { - "epoch": 21.46284224250326, - "grad_norm": 1.5021730661392212, - "learning_rate": 5.915577889447237e-05, - "loss": 5.4327, - "step": 41155 - }, - { - "epoch": 21.46336375488918, - "grad_norm": 1.5607396364212036, - "learning_rate": 5.9154773869346734e-05, - "loss": 5.505, - "step": 41156 - }, - { - "epoch": 21.4638852672751, - "grad_norm": 1.5450488328933716, - "learning_rate": 5.915376884422111e-05, - "loss": 5.0243, - "step": 41157 - }, - { - "epoch": 21.46440677966102, - "grad_norm": 1.5298491716384888, - "learning_rate": 5.915276381909548e-05, - "loss": 5.6584, - "step": 41158 - }, - { - "epoch": 21.464928292046935, - "grad_norm": 1.5874018669128418, - "learning_rate": 5.915175879396985e-05, - "loss": 5.5467, - "step": 41159 - }, - { - "epoch": 21.465449804432854, - "grad_norm": 1.4667235612869263, - "learning_rate": 5.9150753768844225e-05, - "loss": 5.333, - "step": 41160 - }, - { - "epoch": 21.465971316818774, - "grad_norm": 1.4391800165176392, - "learning_rate": 5.914974874371859e-05, - "loss": 4.4899, - "step": 41161 - }, - { - "epoch": 21.466492829204693, - "grad_norm": 1.5391337871551514, - "learning_rate": 5.914874371859297e-05, - "loss": 4.8772, - "step": 41162 - }, - { - "epoch": 21.467014341590613, - "grad_norm": 1.576309323310852, - "learning_rate": 5.914773869346734e-05, - "loss": 5.2822, - "step": 41163 - }, - { - "epoch": 21.467535853976532, - "grad_norm": 1.5789377689361572, - "learning_rate": 5.9146733668341716e-05, - "loss": 5.1393, - "step": 41164 - }, - { - "epoch": 21.46805736636245, - "grad_norm": 1.4880632162094116, - "learning_rate": 5.914572864321608e-05, - "loss": 5.1259, - "step": 41165 - }, - { - "epoch": 21.46857887874837, - "grad_norm": 1.5775562524795532, - "learning_rate": 5.914472361809046e-05, - "loss": 5.0851, - "step": 41166 - }, - { - "epoch": 21.46910039113429, - "grad_norm": 1.5226553678512573, - "learning_rate": 5.914371859296482e-05, - "loss": 5.3097, - "step": 41167 - }, - { - "epoch": 21.46962190352021, - "grad_norm": 1.601026177406311, - "learning_rate": 5.91427135678392e-05, - "loss": 4.9864, - "step": 41168 - }, - { - "epoch": 21.47014341590613, - "grad_norm": 1.5087001323699951, - "learning_rate": 5.914170854271357e-05, - "loss": 5.3757, - "step": 41169 - }, - { - "epoch": 21.470664928292045, - "grad_norm": 1.5159602165222168, - "learning_rate": 5.914070351758795e-05, - "loss": 5.4397, - "step": 41170 - }, - { - "epoch": 21.471186440677965, - "grad_norm": 1.506822109222412, - "learning_rate": 5.913969849246231e-05, - "loss": 5.7466, - "step": 41171 - }, - { - "epoch": 21.471707953063884, - "grad_norm": 1.566779375076294, - "learning_rate": 5.9138693467336684e-05, - "loss": 5.6096, - "step": 41172 - }, - { - "epoch": 21.472229465449804, - "grad_norm": 1.5106900930404663, - "learning_rate": 5.913768844221106e-05, - "loss": 5.332, - "step": 41173 - }, - { - "epoch": 21.472750977835723, - "grad_norm": 1.556606411933899, - "learning_rate": 5.9136683417085426e-05, - "loss": 5.6151, - "step": 41174 - }, - { - "epoch": 21.473272490221643, - "grad_norm": 1.449682354927063, - "learning_rate": 5.9135678391959804e-05, - "loss": 4.7145, - "step": 41175 - }, - { - "epoch": 21.473794002607562, - "grad_norm": 1.4287097454071045, - "learning_rate": 5.913467336683417e-05, - "loss": 5.5583, - "step": 41176 - }, - { - "epoch": 21.47431551499348, - "grad_norm": 1.6095407009124756, - "learning_rate": 5.9133668341708546e-05, - "loss": 5.0712, - "step": 41177 - }, - { - "epoch": 21.4748370273794, - "grad_norm": 1.4746060371398926, - "learning_rate": 5.913266331658292e-05, - "loss": 5.2952, - "step": 41178 - }, - { - "epoch": 21.47535853976532, - "grad_norm": 1.5001964569091797, - "learning_rate": 5.9131658291457295e-05, - "loss": 5.2824, - "step": 41179 - }, - { - "epoch": 21.47588005215124, - "grad_norm": 1.5815166234970093, - "learning_rate": 5.913065326633166e-05, - "loss": 4.8536, - "step": 41180 - }, - { - "epoch": 21.47640156453716, - "grad_norm": 1.5653260946273804, - "learning_rate": 5.912964824120604e-05, - "loss": 4.7497, - "step": 41181 - }, - { - "epoch": 21.476923076923075, - "grad_norm": 1.4771090745925903, - "learning_rate": 5.91286432160804e-05, - "loss": 5.5982, - "step": 41182 - }, - { - "epoch": 21.477444589308995, - "grad_norm": 1.4325979948043823, - "learning_rate": 5.912763819095478e-05, - "loss": 5.4012, - "step": 41183 - }, - { - "epoch": 21.477966101694914, - "grad_norm": 1.557810664176941, - "learning_rate": 5.912663316582915e-05, - "loss": 5.3068, - "step": 41184 - }, - { - "epoch": 21.478487614080834, - "grad_norm": 1.3967316150665283, - "learning_rate": 5.9125628140703514e-05, - "loss": 5.626, - "step": 41185 - }, - { - "epoch": 21.479009126466753, - "grad_norm": 1.5346946716308594, - "learning_rate": 5.912462311557789e-05, - "loss": 5.5987, - "step": 41186 - }, - { - "epoch": 21.479530638852673, - "grad_norm": 1.6300305128097534, - "learning_rate": 5.9123618090452257e-05, - "loss": 4.9651, - "step": 41187 - }, - { - "epoch": 21.480052151238592, - "grad_norm": 1.4661213159561157, - "learning_rate": 5.9122613065326634e-05, - "loss": 5.3466, - "step": 41188 - }, - { - "epoch": 21.48057366362451, - "grad_norm": 1.6083879470825195, - "learning_rate": 5.9121608040201005e-05, - "loss": 4.7983, - "step": 41189 - }, - { - "epoch": 21.48109517601043, - "grad_norm": 1.4596834182739258, - "learning_rate": 5.912060301507538e-05, - "loss": 5.6572, - "step": 41190 - }, - { - "epoch": 21.48161668839635, - "grad_norm": 1.563409686088562, - "learning_rate": 5.911959798994975e-05, - "loss": 5.0891, - "step": 41191 - }, - { - "epoch": 21.48213820078227, - "grad_norm": 1.56592857837677, - "learning_rate": 5.9118592964824125e-05, - "loss": 5.3416, - "step": 41192 - }, - { - "epoch": 21.48265971316819, - "grad_norm": 1.5389330387115479, - "learning_rate": 5.9117587939698496e-05, - "loss": 4.9226, - "step": 41193 - }, - { - "epoch": 21.483181225554105, - "grad_norm": 1.5529509782791138, - "learning_rate": 5.9116582914572874e-05, - "loss": 4.3574, - "step": 41194 - }, - { - "epoch": 21.483702737940025, - "grad_norm": 1.624025583267212, - "learning_rate": 5.911557788944724e-05, - "loss": 5.0792, - "step": 41195 - }, - { - "epoch": 21.484224250325944, - "grad_norm": 1.4370590448379517, - "learning_rate": 5.9114572864321616e-05, - "loss": 5.7706, - "step": 41196 - }, - { - "epoch": 21.484745762711864, - "grad_norm": 1.5869472026824951, - "learning_rate": 5.911356783919598e-05, - "loss": 4.9356, - "step": 41197 - }, - { - "epoch": 21.485267275097783, - "grad_norm": 1.5522187948226929, - "learning_rate": 5.911256281407035e-05, - "loss": 5.1373, - "step": 41198 - }, - { - "epoch": 21.485788787483703, - "grad_norm": 1.5030452013015747, - "learning_rate": 5.911155778894473e-05, - "loss": 5.3523, - "step": 41199 - }, - { - "epoch": 21.486310299869622, - "grad_norm": 1.514507532119751, - "learning_rate": 5.9110552763819094e-05, - "loss": 5.1713, - "step": 41200 - }, - { - "epoch": 21.48683181225554, - "grad_norm": 1.3508753776550293, - "learning_rate": 5.910954773869347e-05, - "loss": 5.8561, - "step": 41201 - }, - { - "epoch": 21.48735332464146, - "grad_norm": 1.5350942611694336, - "learning_rate": 5.9108542713567836e-05, - "loss": 5.1713, - "step": 41202 - }, - { - "epoch": 21.48787483702738, - "grad_norm": 1.5217972993850708, - "learning_rate": 5.9107537688442214e-05, - "loss": 5.3972, - "step": 41203 - }, - { - "epoch": 21.4883963494133, - "grad_norm": 1.617126703262329, - "learning_rate": 5.9106532663316585e-05, - "loss": 4.8247, - "step": 41204 - }, - { - "epoch": 21.48891786179922, - "grad_norm": 1.5497868061065674, - "learning_rate": 5.910552763819096e-05, - "loss": 5.3665, - "step": 41205 - }, - { - "epoch": 21.489439374185135, - "grad_norm": 1.6398006677627563, - "learning_rate": 5.910452261306533e-05, - "loss": 4.9331, - "step": 41206 - }, - { - "epoch": 21.489960886571055, - "grad_norm": 1.4751466512680054, - "learning_rate": 5.9103517587939705e-05, - "loss": 5.6548, - "step": 41207 - }, - { - "epoch": 21.490482398956974, - "grad_norm": 1.5717668533325195, - "learning_rate": 5.910251256281407e-05, - "loss": 4.7295, - "step": 41208 - }, - { - "epoch": 21.491003911342894, - "grad_norm": 1.566636562347412, - "learning_rate": 5.910150753768845e-05, - "loss": 4.6615, - "step": 41209 - }, - { - "epoch": 21.491525423728813, - "grad_norm": 1.6807433366775513, - "learning_rate": 5.910050251256282e-05, - "loss": 5.0344, - "step": 41210 - }, - { - "epoch": 21.492046936114733, - "grad_norm": 1.6894841194152832, - "learning_rate": 5.9099497487437196e-05, - "loss": 5.3511, - "step": 41211 - }, - { - "epoch": 21.492568448500652, - "grad_norm": 1.6384341716766357, - "learning_rate": 5.909849246231156e-05, - "loss": 5.4068, - "step": 41212 - }, - { - "epoch": 21.49308996088657, - "grad_norm": 1.5998798608779907, - "learning_rate": 5.909748743718593e-05, - "loss": 5.6401, - "step": 41213 - }, - { - "epoch": 21.49361147327249, - "grad_norm": 1.5968462228775024, - "learning_rate": 5.909648241206031e-05, - "loss": 5.5511, - "step": 41214 - }, - { - "epoch": 21.49413298565841, - "grad_norm": 1.611171007156372, - "learning_rate": 5.909547738693467e-05, - "loss": 5.2913, - "step": 41215 - }, - { - "epoch": 21.49465449804433, - "grad_norm": 1.4959501028060913, - "learning_rate": 5.909447236180905e-05, - "loss": 4.6228, - "step": 41216 - }, - { - "epoch": 21.49517601043025, - "grad_norm": 1.6977726221084595, - "learning_rate": 5.9093467336683415e-05, - "loss": 4.667, - "step": 41217 - }, - { - "epoch": 21.495697522816165, - "grad_norm": 1.6247860193252563, - "learning_rate": 5.909246231155779e-05, - "loss": 5.254, - "step": 41218 - }, - { - "epoch": 21.496219035202085, - "grad_norm": 1.4898165464401245, - "learning_rate": 5.9091457286432164e-05, - "loss": 5.5982, - "step": 41219 - }, - { - "epoch": 21.496740547588004, - "grad_norm": 1.4868561029434204, - "learning_rate": 5.909045226130654e-05, - "loss": 5.6851, - "step": 41220 - }, - { - "epoch": 21.497262059973924, - "grad_norm": 1.432295322418213, - "learning_rate": 5.9089447236180906e-05, - "loss": 5.3737, - "step": 41221 - }, - { - "epoch": 21.497783572359843, - "grad_norm": 1.6012927293777466, - "learning_rate": 5.9088442211055284e-05, - "loss": 5.3639, - "step": 41222 - }, - { - "epoch": 21.498305084745763, - "grad_norm": 1.5758346319198608, - "learning_rate": 5.908743718592965e-05, - "loss": 5.6594, - "step": 41223 - }, - { - "epoch": 21.498826597131682, - "grad_norm": 1.4600738286972046, - "learning_rate": 5.9086432160804026e-05, - "loss": 5.4422, - "step": 41224 - }, - { - "epoch": 21.4993481095176, - "grad_norm": 1.4165698289871216, - "learning_rate": 5.90854271356784e-05, - "loss": 5.2058, - "step": 41225 - }, - { - "epoch": 21.49986962190352, - "grad_norm": 1.6297355890274048, - "learning_rate": 5.908442211055276e-05, - "loss": 4.8911, - "step": 41226 - }, - { - "epoch": 21.50039113428944, - "grad_norm": 1.505671501159668, - "learning_rate": 5.908341708542714e-05, - "loss": 5.4934, - "step": 41227 - }, - { - "epoch": 21.50091264667536, - "grad_norm": 1.6158794164657593, - "learning_rate": 5.9082412060301504e-05, - "loss": 5.009, - "step": 41228 - }, - { - "epoch": 21.50143415906128, - "grad_norm": 1.6683810949325562, - "learning_rate": 5.908140703517588e-05, - "loss": 5.3823, - "step": 41229 - }, - { - "epoch": 21.501955671447195, - "grad_norm": 1.4975764751434326, - "learning_rate": 5.908040201005025e-05, - "loss": 5.5457, - "step": 41230 - }, - { - "epoch": 21.502477183833115, - "grad_norm": 1.5996835231781006, - "learning_rate": 5.907939698492463e-05, - "loss": 5.4359, - "step": 41231 - }, - { - "epoch": 21.502998696219034, - "grad_norm": 1.4670857191085815, - "learning_rate": 5.9078391959798995e-05, - "loss": 5.7115, - "step": 41232 - }, - { - "epoch": 21.503520208604954, - "grad_norm": 1.6114745140075684, - "learning_rate": 5.907738693467337e-05, - "loss": 4.9153, - "step": 41233 - }, - { - "epoch": 21.504041720990873, - "grad_norm": 1.4757940769195557, - "learning_rate": 5.907638190954774e-05, - "loss": 4.8792, - "step": 41234 - }, - { - "epoch": 21.504563233376793, - "grad_norm": 1.5152256488800049, - "learning_rate": 5.9075376884422114e-05, - "loss": 4.943, - "step": 41235 - }, - { - "epoch": 21.505084745762712, - "grad_norm": 1.5824360847473145, - "learning_rate": 5.9074371859296486e-05, - "loss": 4.2691, - "step": 41236 - }, - { - "epoch": 21.50560625814863, - "grad_norm": 1.4625403881072998, - "learning_rate": 5.907336683417086e-05, - "loss": 5.1327, - "step": 41237 - }, - { - "epoch": 21.50612777053455, - "grad_norm": 1.5414425134658813, - "learning_rate": 5.907236180904523e-05, - "loss": 5.2404, - "step": 41238 - }, - { - "epoch": 21.50664928292047, - "grad_norm": 1.5251132249832153, - "learning_rate": 5.90713567839196e-05, - "loss": 5.0439, - "step": 41239 - }, - { - "epoch": 21.50717079530639, - "grad_norm": 1.4669935703277588, - "learning_rate": 5.9070351758793976e-05, - "loss": 5.3542, - "step": 41240 - }, - { - "epoch": 21.50769230769231, - "grad_norm": 1.4769057035446167, - "learning_rate": 5.906934673366834e-05, - "loss": 5.6058, - "step": 41241 - }, - { - "epoch": 21.508213820078225, - "grad_norm": 1.5450319051742554, - "learning_rate": 5.906834170854272e-05, - "loss": 4.9746, - "step": 41242 - }, - { - "epoch": 21.508735332464145, - "grad_norm": 1.526537537574768, - "learning_rate": 5.906733668341708e-05, - "loss": 5.1116, - "step": 41243 - }, - { - "epoch": 21.509256844850064, - "grad_norm": 1.5868839025497437, - "learning_rate": 5.906633165829146e-05, - "loss": 5.3606, - "step": 41244 - }, - { - "epoch": 21.509778357235984, - "grad_norm": 1.5160589218139648, - "learning_rate": 5.906532663316583e-05, - "loss": 5.3751, - "step": 41245 - }, - { - "epoch": 21.510299869621903, - "grad_norm": 1.5750348567962646, - "learning_rate": 5.906432160804021e-05, - "loss": 5.2339, - "step": 41246 - }, - { - "epoch": 21.510821382007823, - "grad_norm": 1.6170661449432373, - "learning_rate": 5.9063316582914574e-05, - "loss": 5.3353, - "step": 41247 - }, - { - "epoch": 21.511342894393742, - "grad_norm": 1.6072708368301392, - "learning_rate": 5.906231155778895e-05, - "loss": 4.995, - "step": 41248 - }, - { - "epoch": 21.51186440677966, - "grad_norm": 1.4909777641296387, - "learning_rate": 5.9061306532663316e-05, - "loss": 5.1838, - "step": 41249 - }, - { - "epoch": 21.51238591916558, - "grad_norm": 1.5419069528579712, - "learning_rate": 5.9060301507537694e-05, - "loss": 4.9955, - "step": 41250 - }, - { - "epoch": 21.5129074315515, - "grad_norm": 1.5469324588775635, - "learning_rate": 5.9059296482412065e-05, - "loss": 5.3478, - "step": 41251 - }, - { - "epoch": 21.51342894393742, - "grad_norm": 1.4407187700271606, - "learning_rate": 5.905829145728643e-05, - "loss": 5.4094, - "step": 41252 - }, - { - "epoch": 21.513950456323336, - "grad_norm": 1.500580072402954, - "learning_rate": 5.905728643216081e-05, - "loss": 5.5507, - "step": 41253 - }, - { - "epoch": 21.514471968709255, - "grad_norm": 1.5069688558578491, - "learning_rate": 5.905628140703517e-05, - "loss": 5.2306, - "step": 41254 - }, - { - "epoch": 21.514993481095175, - "grad_norm": 1.4819751977920532, - "learning_rate": 5.905527638190955e-05, - "loss": 5.1309, - "step": 41255 - }, - { - "epoch": 21.515514993481094, - "grad_norm": 1.5541964769363403, - "learning_rate": 5.905427135678392e-05, - "loss": 5.2847, - "step": 41256 - }, - { - "epoch": 21.516036505867014, - "grad_norm": 1.4017810821533203, - "learning_rate": 5.90532663316583e-05, - "loss": 5.343, - "step": 41257 - }, - { - "epoch": 21.516558018252933, - "grad_norm": 1.4409304857254028, - "learning_rate": 5.905226130653266e-05, - "loss": 5.1683, - "step": 41258 - }, - { - "epoch": 21.517079530638853, - "grad_norm": 1.569568395614624, - "learning_rate": 5.905125628140704e-05, - "loss": 5.0821, - "step": 41259 - }, - { - "epoch": 21.517601043024772, - "grad_norm": 1.5077970027923584, - "learning_rate": 5.905025125628141e-05, - "loss": 5.3681, - "step": 41260 - }, - { - "epoch": 21.51812255541069, - "grad_norm": 1.605093240737915, - "learning_rate": 5.904924623115579e-05, - "loss": 5.0028, - "step": 41261 - }, - { - "epoch": 21.51864406779661, - "grad_norm": 1.5409423112869263, - "learning_rate": 5.904824120603015e-05, - "loss": 5.3555, - "step": 41262 - }, - { - "epoch": 21.51916558018253, - "grad_norm": 1.5781704187393188, - "learning_rate": 5.904723618090453e-05, - "loss": 5.2161, - "step": 41263 - }, - { - "epoch": 21.51968709256845, - "grad_norm": 1.4119279384613037, - "learning_rate": 5.9046231155778895e-05, - "loss": 5.4529, - "step": 41264 - }, - { - "epoch": 21.52020860495437, - "grad_norm": 1.5030242204666138, - "learning_rate": 5.9045226130653266e-05, - "loss": 5.2937, - "step": 41265 - }, - { - "epoch": 21.520730117340285, - "grad_norm": 1.556138277053833, - "learning_rate": 5.9044221105527644e-05, - "loss": 5.0308, - "step": 41266 - }, - { - "epoch": 21.521251629726205, - "grad_norm": 1.6110644340515137, - "learning_rate": 5.904321608040201e-05, - "loss": 5.2451, - "step": 41267 - }, - { - "epoch": 21.521773142112124, - "grad_norm": 1.553777813911438, - "learning_rate": 5.9042211055276386e-05, - "loss": 5.1459, - "step": 41268 - }, - { - "epoch": 21.522294654498044, - "grad_norm": 1.5336902141571045, - "learning_rate": 5.904120603015075e-05, - "loss": 5.3676, - "step": 41269 - }, - { - "epoch": 21.522816166883963, - "grad_norm": 1.6905298233032227, - "learning_rate": 5.904020100502513e-05, - "loss": 5.0414, - "step": 41270 - }, - { - "epoch": 21.523337679269883, - "grad_norm": 1.4861394166946411, - "learning_rate": 5.90391959798995e-05, - "loss": 5.3022, - "step": 41271 - }, - { - "epoch": 21.523859191655802, - "grad_norm": 1.5998461246490479, - "learning_rate": 5.903819095477388e-05, - "loss": 4.3752, - "step": 41272 - }, - { - "epoch": 21.52438070404172, - "grad_norm": 1.4904248714447021, - "learning_rate": 5.903718592964824e-05, - "loss": 5.1484, - "step": 41273 - }, - { - "epoch": 21.52490221642764, - "grad_norm": 1.5549038648605347, - "learning_rate": 5.903618090452262e-05, - "loss": 5.7047, - "step": 41274 - }, - { - "epoch": 21.52542372881356, - "grad_norm": 1.4862509965896606, - "learning_rate": 5.9035175879396984e-05, - "loss": 5.0035, - "step": 41275 - }, - { - "epoch": 21.52594524119948, - "grad_norm": 1.575300931930542, - "learning_rate": 5.903417085427136e-05, - "loss": 5.1487, - "step": 41276 - }, - { - "epoch": 21.526466753585396, - "grad_norm": 1.49616277217865, - "learning_rate": 5.903316582914573e-05, - "loss": 5.6388, - "step": 41277 - }, - { - "epoch": 21.526988265971315, - "grad_norm": 1.5445679426193237, - "learning_rate": 5.90321608040201e-05, - "loss": 5.0637, - "step": 41278 - }, - { - "epoch": 21.527509778357235, - "grad_norm": 1.5683672428131104, - "learning_rate": 5.9031155778894475e-05, - "loss": 5.0367, - "step": 41279 - }, - { - "epoch": 21.528031290743154, - "grad_norm": 1.6211491823196411, - "learning_rate": 5.9030150753768846e-05, - "loss": 4.2479, - "step": 41280 - }, - { - "epoch": 21.528552803129074, - "grad_norm": 1.5930051803588867, - "learning_rate": 5.9029145728643223e-05, - "loss": 5.1962, - "step": 41281 - }, - { - "epoch": 21.529074315514993, - "grad_norm": 1.5900702476501465, - "learning_rate": 5.902814070351759e-05, - "loss": 5.1131, - "step": 41282 - }, - { - "epoch": 21.529595827900913, - "grad_norm": 1.7908767461776733, - "learning_rate": 5.9027135678391966e-05, - "loss": 5.0271, - "step": 41283 - }, - { - "epoch": 21.530117340286832, - "grad_norm": 1.503699779510498, - "learning_rate": 5.902613065326633e-05, - "loss": 5.4777, - "step": 41284 - }, - { - "epoch": 21.53063885267275, - "grad_norm": 1.3899527788162231, - "learning_rate": 5.902512562814071e-05, - "loss": 5.297, - "step": 41285 - }, - { - "epoch": 21.53116036505867, - "grad_norm": 1.7177962064743042, - "learning_rate": 5.902412060301508e-05, - "loss": 5.026, - "step": 41286 - }, - { - "epoch": 21.53168187744459, - "grad_norm": 1.455560564994812, - "learning_rate": 5.9023115577889457e-05, - "loss": 5.5653, - "step": 41287 - }, - { - "epoch": 21.53220338983051, - "grad_norm": 1.5129448175430298, - "learning_rate": 5.902211055276382e-05, - "loss": 5.5221, - "step": 41288 - }, - { - "epoch": 21.532724902216426, - "grad_norm": 1.4738863706588745, - "learning_rate": 5.90211055276382e-05, - "loss": 5.4537, - "step": 41289 - }, - { - "epoch": 21.533246414602345, - "grad_norm": 1.5757029056549072, - "learning_rate": 5.902010050251256e-05, - "loss": 5.483, - "step": 41290 - }, - { - "epoch": 21.533767926988265, - "grad_norm": 1.5015990734100342, - "learning_rate": 5.9019095477386934e-05, - "loss": 5.2347, - "step": 41291 - }, - { - "epoch": 21.534289439374184, - "grad_norm": 1.533408522605896, - "learning_rate": 5.901809045226131e-05, - "loss": 5.534, - "step": 41292 - }, - { - "epoch": 21.534810951760104, - "grad_norm": 1.5703332424163818, - "learning_rate": 5.9017085427135676e-05, - "loss": 5.3702, - "step": 41293 - }, - { - "epoch": 21.535332464146023, - "grad_norm": 1.5454661846160889, - "learning_rate": 5.9016080402010054e-05, - "loss": 5.2517, - "step": 41294 - }, - { - "epoch": 21.535853976531943, - "grad_norm": 1.4862533807754517, - "learning_rate": 5.901507537688442e-05, - "loss": 5.6052, - "step": 41295 - }, - { - "epoch": 21.536375488917862, - "grad_norm": 1.6355597972869873, - "learning_rate": 5.9014070351758796e-05, - "loss": 5.1414, - "step": 41296 - }, - { - "epoch": 21.53689700130378, - "grad_norm": 1.6251059770584106, - "learning_rate": 5.901306532663317e-05, - "loss": 5.0651, - "step": 41297 - }, - { - "epoch": 21.5374185136897, - "grad_norm": 1.4672293663024902, - "learning_rate": 5.9012060301507545e-05, - "loss": 4.8981, - "step": 41298 - }, - { - "epoch": 21.53794002607562, - "grad_norm": 1.5937309265136719, - "learning_rate": 5.901105527638191e-05, - "loss": 5.0438, - "step": 41299 - }, - { - "epoch": 21.53846153846154, - "grad_norm": 1.5574020147323608, - "learning_rate": 5.901005025125629e-05, - "loss": 5.3621, - "step": 41300 - }, - { - "epoch": 21.538983050847456, - "grad_norm": 1.6874536275863647, - "learning_rate": 5.900904522613066e-05, - "loss": 5.1124, - "step": 41301 - }, - { - "epoch": 21.539504563233375, - "grad_norm": 1.4630059003829956, - "learning_rate": 5.9008040201005036e-05, - "loss": 4.9967, - "step": 41302 - }, - { - "epoch": 21.540026075619295, - "grad_norm": 1.4255181550979614, - "learning_rate": 5.90070351758794e-05, - "loss": 5.541, - "step": 41303 - }, - { - "epoch": 21.540547588005214, - "grad_norm": 1.4596010446548462, - "learning_rate": 5.900603015075378e-05, - "loss": 5.226, - "step": 41304 - }, - { - "epoch": 21.541069100391134, - "grad_norm": 1.4685242176055908, - "learning_rate": 5.900502512562814e-05, - "loss": 5.2777, - "step": 41305 - }, - { - "epoch": 21.541590612777053, - "grad_norm": 1.469725251197815, - "learning_rate": 5.900402010050251e-05, - "loss": 5.4477, - "step": 41306 - }, - { - "epoch": 21.542112125162973, - "grad_norm": 1.4878336191177368, - "learning_rate": 5.900301507537689e-05, - "loss": 5.4303, - "step": 41307 - }, - { - "epoch": 21.542633637548892, - "grad_norm": 1.538132905960083, - "learning_rate": 5.9002010050251255e-05, - "loss": 5.1373, - "step": 41308 - }, - { - "epoch": 21.54315514993481, - "grad_norm": 1.6088814735412598, - "learning_rate": 5.900100502512563e-05, - "loss": 5.1346, - "step": 41309 - }, - { - "epoch": 21.54367666232073, - "grad_norm": 1.4650967121124268, - "learning_rate": 5.9e-05, - "loss": 5.4505, - "step": 41310 - }, - { - "epoch": 21.54419817470665, - "grad_norm": 1.6112236976623535, - "learning_rate": 5.8998994974874375e-05, - "loss": 5.1138, - "step": 41311 - }, - { - "epoch": 21.54471968709257, - "grad_norm": 1.6032872200012207, - "learning_rate": 5.8997989949748746e-05, - "loss": 5.239, - "step": 41312 - }, - { - "epoch": 21.545241199478486, - "grad_norm": 1.429436445236206, - "learning_rate": 5.8996984924623124e-05, - "loss": 5.3193, - "step": 41313 - }, - { - "epoch": 21.545762711864406, - "grad_norm": 1.4618483781814575, - "learning_rate": 5.899597989949749e-05, - "loss": 5.4398, - "step": 41314 - }, - { - "epoch": 21.546284224250325, - "grad_norm": 1.5813241004943848, - "learning_rate": 5.8994974874371866e-05, - "loss": 5.142, - "step": 41315 - }, - { - "epoch": 21.546805736636244, - "grad_norm": 1.4722589254379272, - "learning_rate": 5.899396984924623e-05, - "loss": 5.3079, - "step": 41316 - }, - { - "epoch": 21.547327249022164, - "grad_norm": 1.4642257690429688, - "learning_rate": 5.899296482412061e-05, - "loss": 5.3665, - "step": 41317 - }, - { - "epoch": 21.547848761408083, - "grad_norm": 1.525261640548706, - "learning_rate": 5.899195979899498e-05, - "loss": 5.2653, - "step": 41318 - }, - { - "epoch": 21.548370273794003, - "grad_norm": 1.459911823272705, - "learning_rate": 5.8990954773869344e-05, - "loss": 5.3607, - "step": 41319 - }, - { - "epoch": 21.548891786179922, - "grad_norm": 1.5273574590682983, - "learning_rate": 5.898994974874372e-05, - "loss": 5.5016, - "step": 41320 - }, - { - "epoch": 21.54941329856584, - "grad_norm": 1.5402766466140747, - "learning_rate": 5.8988944723618086e-05, - "loss": 5.3229, - "step": 41321 - }, - { - "epoch": 21.54993481095176, - "grad_norm": 1.4661905765533447, - "learning_rate": 5.8987939698492464e-05, - "loss": 5.4789, - "step": 41322 - }, - { - "epoch": 21.55045632333768, - "grad_norm": 1.509519100189209, - "learning_rate": 5.8986934673366835e-05, - "loss": 5.215, - "step": 41323 - }, - { - "epoch": 21.5509778357236, - "grad_norm": 1.557570219039917, - "learning_rate": 5.898592964824121e-05, - "loss": 5.5647, - "step": 41324 - }, - { - "epoch": 21.551499348109516, - "grad_norm": 1.5032492876052856, - "learning_rate": 5.898492462311558e-05, - "loss": 5.5752, - "step": 41325 - }, - { - "epoch": 21.552020860495436, - "grad_norm": 1.4643694162368774, - "learning_rate": 5.8983919597989955e-05, - "loss": 5.6583, - "step": 41326 - }, - { - "epoch": 21.552542372881355, - "grad_norm": 1.4774478673934937, - "learning_rate": 5.8982914572864326e-05, - "loss": 5.3805, - "step": 41327 - }, - { - "epoch": 21.553063885267274, - "grad_norm": 1.580359697341919, - "learning_rate": 5.8981909547738704e-05, - "loss": 4.2801, - "step": 41328 - }, - { - "epoch": 21.553585397653194, - "grad_norm": 1.5588352680206299, - "learning_rate": 5.898090452261307e-05, - "loss": 5.2692, - "step": 41329 - }, - { - "epoch": 21.554106910039113, - "grad_norm": 1.5803859233856201, - "learning_rate": 5.8979899497487446e-05, - "loss": 5.2465, - "step": 41330 - }, - { - "epoch": 21.554628422425033, - "grad_norm": 1.4927420616149902, - "learning_rate": 5.897889447236181e-05, - "loss": 5.4267, - "step": 41331 - }, - { - "epoch": 21.555149934810952, - "grad_norm": 1.5084747076034546, - "learning_rate": 5.897788944723618e-05, - "loss": 5.1386, - "step": 41332 - }, - { - "epoch": 21.555671447196872, - "grad_norm": 1.533982276916504, - "learning_rate": 5.897688442211056e-05, - "loss": 5.1234, - "step": 41333 - }, - { - "epoch": 21.55619295958279, - "grad_norm": 1.5401102304458618, - "learning_rate": 5.897587939698492e-05, - "loss": 5.1858, - "step": 41334 - }, - { - "epoch": 21.55671447196871, - "grad_norm": 1.6077932119369507, - "learning_rate": 5.89748743718593e-05, - "loss": 5.2746, - "step": 41335 - }, - { - "epoch": 21.557235984354627, - "grad_norm": 1.6266292333602905, - "learning_rate": 5.8973869346733665e-05, - "loss": 4.8293, - "step": 41336 - }, - { - "epoch": 21.557757496740546, - "grad_norm": 1.6232482194900513, - "learning_rate": 5.897286432160804e-05, - "loss": 4.933, - "step": 41337 - }, - { - "epoch": 21.558279009126466, - "grad_norm": 1.4842878580093384, - "learning_rate": 5.8971859296482414e-05, - "loss": 5.3509, - "step": 41338 - }, - { - "epoch": 21.558800521512385, - "grad_norm": 1.4475979804992676, - "learning_rate": 5.897085427135679e-05, - "loss": 5.7354, - "step": 41339 - }, - { - "epoch": 21.559322033898304, - "grad_norm": 1.5124280452728271, - "learning_rate": 5.8969849246231156e-05, - "loss": 5.2072, - "step": 41340 - }, - { - "epoch": 21.559843546284224, - "grad_norm": 1.508368730545044, - "learning_rate": 5.8968844221105534e-05, - "loss": 5.2261, - "step": 41341 - }, - { - "epoch": 21.560365058670143, - "grad_norm": 1.5105043649673462, - "learning_rate": 5.89678391959799e-05, - "loss": 5.5376, - "step": 41342 - }, - { - "epoch": 21.560886571056063, - "grad_norm": 1.4866056442260742, - "learning_rate": 5.8966834170854276e-05, - "loss": 5.5118, - "step": 41343 - }, - { - "epoch": 21.561408083441982, - "grad_norm": 1.5670864582061768, - "learning_rate": 5.896582914572865e-05, - "loss": 5.3386, - "step": 41344 - }, - { - "epoch": 21.561929595827902, - "grad_norm": 1.5481849908828735, - "learning_rate": 5.896482412060301e-05, - "loss": 4.9978, - "step": 41345 - }, - { - "epoch": 21.56245110821382, - "grad_norm": 1.557443618774414, - "learning_rate": 5.896381909547739e-05, - "loss": 5.3842, - "step": 41346 - }, - { - "epoch": 21.56297262059974, - "grad_norm": 1.6475753784179688, - "learning_rate": 5.896281407035176e-05, - "loss": 5.0819, - "step": 41347 - }, - { - "epoch": 21.56349413298566, - "grad_norm": 1.558729887008667, - "learning_rate": 5.896180904522614e-05, - "loss": 5.0014, - "step": 41348 - }, - { - "epoch": 21.564015645371576, - "grad_norm": 1.4555788040161133, - "learning_rate": 5.89608040201005e-05, - "loss": 5.4457, - "step": 41349 - }, - { - "epoch": 21.564537157757496, - "grad_norm": 1.5509744882583618, - "learning_rate": 5.895979899497488e-05, - "loss": 5.2223, - "step": 41350 - }, - { - "epoch": 21.565058670143415, - "grad_norm": 1.5769461393356323, - "learning_rate": 5.8958793969849245e-05, - "loss": 4.6862, - "step": 41351 - }, - { - "epoch": 21.565580182529335, - "grad_norm": 1.5209707021713257, - "learning_rate": 5.895778894472362e-05, - "loss": 5.4294, - "step": 41352 - }, - { - "epoch": 21.566101694915254, - "grad_norm": 1.5281062126159668, - "learning_rate": 5.8956783919597993e-05, - "loss": 5.2457, - "step": 41353 - }, - { - "epoch": 21.566623207301173, - "grad_norm": 1.636772632598877, - "learning_rate": 5.895577889447237e-05, - "loss": 5.0229, - "step": 41354 - }, - { - "epoch": 21.567144719687093, - "grad_norm": 1.528497338294983, - "learning_rate": 5.8954773869346736e-05, - "loss": 5.5249, - "step": 41355 - }, - { - "epoch": 21.567666232073012, - "grad_norm": 1.5780446529388428, - "learning_rate": 5.895376884422111e-05, - "loss": 5.4808, - "step": 41356 - }, - { - "epoch": 21.568187744458932, - "grad_norm": 1.536303162574768, - "learning_rate": 5.895276381909548e-05, - "loss": 5.3068, - "step": 41357 - }, - { - "epoch": 21.56870925684485, - "grad_norm": 1.5733270645141602, - "learning_rate": 5.895175879396985e-05, - "loss": 5.2632, - "step": 41358 - }, - { - "epoch": 21.56923076923077, - "grad_norm": 1.6079787015914917, - "learning_rate": 5.8950753768844227e-05, - "loss": 5.4695, - "step": 41359 - }, - { - "epoch": 21.569752281616687, - "grad_norm": 1.4912632703781128, - "learning_rate": 5.894974874371859e-05, - "loss": 5.6474, - "step": 41360 - }, - { - "epoch": 21.570273794002606, - "grad_norm": 1.4878822565078735, - "learning_rate": 5.894874371859297e-05, - "loss": 5.4958, - "step": 41361 - }, - { - "epoch": 21.570795306388526, - "grad_norm": 1.5711787939071655, - "learning_rate": 5.894773869346733e-05, - "loss": 4.7963, - "step": 41362 - }, - { - "epoch": 21.571316818774445, - "grad_norm": 1.536095380783081, - "learning_rate": 5.894673366834171e-05, - "loss": 5.3576, - "step": 41363 - }, - { - "epoch": 21.571838331160365, - "grad_norm": 1.5319575071334839, - "learning_rate": 5.894572864321608e-05, - "loss": 5.2181, - "step": 41364 - }, - { - "epoch": 21.572359843546284, - "grad_norm": 1.606568694114685, - "learning_rate": 5.894472361809046e-05, - "loss": 5.1574, - "step": 41365 - }, - { - "epoch": 21.572881355932203, - "grad_norm": 1.537639856338501, - "learning_rate": 5.8943718592964824e-05, - "loss": 4.8327, - "step": 41366 - }, - { - "epoch": 21.573402868318123, - "grad_norm": 1.6431291103363037, - "learning_rate": 5.89427135678392e-05, - "loss": 5.5306, - "step": 41367 - }, - { - "epoch": 21.573924380704042, - "grad_norm": 1.5570900440216064, - "learning_rate": 5.894170854271357e-05, - "loss": 4.7533, - "step": 41368 - }, - { - "epoch": 21.574445893089962, - "grad_norm": 1.518526315689087, - "learning_rate": 5.894070351758795e-05, - "loss": 5.4788, - "step": 41369 - }, - { - "epoch": 21.57496740547588, - "grad_norm": 1.5892174243927002, - "learning_rate": 5.8939698492462315e-05, - "loss": 5.2001, - "step": 41370 - }, - { - "epoch": 21.5754889178618, - "grad_norm": 1.501483678817749, - "learning_rate": 5.893869346733668e-05, - "loss": 5.3999, - "step": 41371 - }, - { - "epoch": 21.576010430247717, - "grad_norm": 1.5541528463363647, - "learning_rate": 5.893768844221106e-05, - "loss": 4.5509, - "step": 41372 - }, - { - "epoch": 21.576531942633636, - "grad_norm": 1.4519033432006836, - "learning_rate": 5.893668341708543e-05, - "loss": 5.6925, - "step": 41373 - }, - { - "epoch": 21.577053455019556, - "grad_norm": 1.6237998008728027, - "learning_rate": 5.8935678391959806e-05, - "loss": 4.3589, - "step": 41374 - }, - { - "epoch": 21.577574967405475, - "grad_norm": 1.4941470623016357, - "learning_rate": 5.893467336683417e-05, - "loss": 5.3427, - "step": 41375 - }, - { - "epoch": 21.578096479791395, - "grad_norm": 1.5373578071594238, - "learning_rate": 5.893366834170855e-05, - "loss": 5.1249, - "step": 41376 - }, - { - "epoch": 21.578617992177314, - "grad_norm": 1.5348938703536987, - "learning_rate": 5.893266331658291e-05, - "loss": 5.2206, - "step": 41377 - }, - { - "epoch": 21.579139504563233, - "grad_norm": 1.5524977445602417, - "learning_rate": 5.893165829145729e-05, - "loss": 5.5673, - "step": 41378 - }, - { - "epoch": 21.579661016949153, - "grad_norm": 1.5716183185577393, - "learning_rate": 5.893065326633166e-05, - "loss": 5.4244, - "step": 41379 - }, - { - "epoch": 21.580182529335072, - "grad_norm": 1.5389695167541504, - "learning_rate": 5.892964824120604e-05, - "loss": 5.5832, - "step": 41380 - }, - { - "epoch": 21.580704041720992, - "grad_norm": 1.5567045211791992, - "learning_rate": 5.89286432160804e-05, - "loss": 5.4572, - "step": 41381 - }, - { - "epoch": 21.58122555410691, - "grad_norm": 1.499077320098877, - "learning_rate": 5.892763819095478e-05, - "loss": 5.3614, - "step": 41382 - }, - { - "epoch": 21.58174706649283, - "grad_norm": 1.4536120891571045, - "learning_rate": 5.8926633165829145e-05, - "loss": 5.4685, - "step": 41383 - }, - { - "epoch": 21.582268578878747, - "grad_norm": 1.468278169631958, - "learning_rate": 5.8925628140703516e-05, - "loss": 5.4499, - "step": 41384 - }, - { - "epoch": 21.582790091264666, - "grad_norm": 1.6011974811553955, - "learning_rate": 5.8924623115577894e-05, - "loss": 5.2173, - "step": 41385 - }, - { - "epoch": 21.583311603650586, - "grad_norm": 1.5428211688995361, - "learning_rate": 5.892361809045226e-05, - "loss": 4.6602, - "step": 41386 - }, - { - "epoch": 21.583833116036505, - "grad_norm": 1.6843336820602417, - "learning_rate": 5.8922613065326636e-05, - "loss": 5.0611, - "step": 41387 - }, - { - "epoch": 21.584354628422425, - "grad_norm": 1.5177103281021118, - "learning_rate": 5.892160804020101e-05, - "loss": 5.2427, - "step": 41388 - }, - { - "epoch": 21.584876140808344, - "grad_norm": 1.7024086713790894, - "learning_rate": 5.8920603015075385e-05, - "loss": 4.8055, - "step": 41389 - }, - { - "epoch": 21.585397653194264, - "grad_norm": 1.453627347946167, - "learning_rate": 5.891959798994975e-05, - "loss": 5.4637, - "step": 41390 - }, - { - "epoch": 21.585919165580183, - "grad_norm": 1.5850744247436523, - "learning_rate": 5.891859296482413e-05, - "loss": 5.5119, - "step": 41391 - }, - { - "epoch": 21.586440677966102, - "grad_norm": 1.5077663660049438, - "learning_rate": 5.891758793969849e-05, - "loss": 5.1874, - "step": 41392 - }, - { - "epoch": 21.586962190352022, - "grad_norm": 1.5064185857772827, - "learning_rate": 5.891658291457287e-05, - "loss": 5.3789, - "step": 41393 - }, - { - "epoch": 21.58748370273794, - "grad_norm": 1.5786540508270264, - "learning_rate": 5.891557788944724e-05, - "loss": 5.0895, - "step": 41394 - }, - { - "epoch": 21.58800521512386, - "grad_norm": 1.5935633182525635, - "learning_rate": 5.891457286432162e-05, - "loss": 5.5072, - "step": 41395 - }, - { - "epoch": 21.588526727509777, - "grad_norm": 1.4772313833236694, - "learning_rate": 5.891356783919598e-05, - "loss": 5.3167, - "step": 41396 - }, - { - "epoch": 21.589048239895696, - "grad_norm": 1.5155328512191772, - "learning_rate": 5.891256281407036e-05, - "loss": 5.3329, - "step": 41397 - }, - { - "epoch": 21.589569752281616, - "grad_norm": 1.4700196981430054, - "learning_rate": 5.8911557788944725e-05, - "loss": 5.0588, - "step": 41398 - }, - { - "epoch": 21.590091264667535, - "grad_norm": 1.6144379377365112, - "learning_rate": 5.8910552763819096e-05, - "loss": 5.1268, - "step": 41399 - }, - { - "epoch": 21.590612777053455, - "grad_norm": 1.4258018732070923, - "learning_rate": 5.8909547738693474e-05, - "loss": 5.3086, - "step": 41400 - }, - { - "epoch": 21.591134289439374, - "grad_norm": 1.5602774620056152, - "learning_rate": 5.890854271356784e-05, - "loss": 5.1959, - "step": 41401 - }, - { - "epoch": 21.591655801825294, - "grad_norm": 1.5990005731582642, - "learning_rate": 5.8907537688442216e-05, - "loss": 4.9382, - "step": 41402 - }, - { - "epoch": 21.592177314211213, - "grad_norm": 1.5364447832107544, - "learning_rate": 5.890653266331658e-05, - "loss": 5.4361, - "step": 41403 - }, - { - "epoch": 21.592698826597132, - "grad_norm": 1.6290539503097534, - "learning_rate": 5.890552763819096e-05, - "loss": 5.3129, - "step": 41404 - }, - { - "epoch": 21.593220338983052, - "grad_norm": 1.6921759843826294, - "learning_rate": 5.890452261306533e-05, - "loss": 5.1706, - "step": 41405 - }, - { - "epoch": 21.59374185136897, - "grad_norm": 1.4879244565963745, - "learning_rate": 5.8903517587939707e-05, - "loss": 5.1502, - "step": 41406 - }, - { - "epoch": 21.59426336375489, - "grad_norm": 1.53236722946167, - "learning_rate": 5.890251256281407e-05, - "loss": 5.3556, - "step": 41407 - }, - { - "epoch": 21.594784876140807, - "grad_norm": 1.4980614185333252, - "learning_rate": 5.890150753768845e-05, - "loss": 5.6075, - "step": 41408 - }, - { - "epoch": 21.595306388526726, - "grad_norm": 1.5190949440002441, - "learning_rate": 5.890050251256281e-05, - "loss": 5.1779, - "step": 41409 - }, - { - "epoch": 21.595827900912646, - "grad_norm": 1.4911565780639648, - "learning_rate": 5.889949748743719e-05, - "loss": 5.2891, - "step": 41410 - }, - { - "epoch": 21.596349413298565, - "grad_norm": 1.500127911567688, - "learning_rate": 5.889849246231156e-05, - "loss": 5.3406, - "step": 41411 - }, - { - "epoch": 21.596870925684485, - "grad_norm": 1.4777846336364746, - "learning_rate": 5.8897487437185926e-05, - "loss": 5.5058, - "step": 41412 - }, - { - "epoch": 21.597392438070404, - "grad_norm": 1.6216222047805786, - "learning_rate": 5.8896482412060304e-05, - "loss": 5.24, - "step": 41413 - }, - { - "epoch": 21.597913950456324, - "grad_norm": 1.4594931602478027, - "learning_rate": 5.8895477386934675e-05, - "loss": 5.4462, - "step": 41414 - }, - { - "epoch": 21.598435462842243, - "grad_norm": 1.5225138664245605, - "learning_rate": 5.889447236180905e-05, - "loss": 4.7676, - "step": 41415 - }, - { - "epoch": 21.598956975228162, - "grad_norm": 1.5399303436279297, - "learning_rate": 5.889346733668342e-05, - "loss": 4.5742, - "step": 41416 - }, - { - "epoch": 21.599478487614082, - "grad_norm": 1.567804217338562, - "learning_rate": 5.8892462311557795e-05, - "loss": 5.1795, - "step": 41417 - }, - { - "epoch": 21.6, - "grad_norm": 1.497216820716858, - "learning_rate": 5.889145728643216e-05, - "loss": 5.6878, - "step": 41418 - }, - { - "epoch": 21.60052151238592, - "grad_norm": 1.544082760810852, - "learning_rate": 5.889045226130654e-05, - "loss": 5.3631, - "step": 41419 - }, - { - "epoch": 21.601043024771837, - "grad_norm": 1.9596598148345947, - "learning_rate": 5.888944723618091e-05, - "loss": 4.3475, - "step": 41420 - }, - { - "epoch": 21.601564537157756, - "grad_norm": 1.464604139328003, - "learning_rate": 5.8888442211055286e-05, - "loss": 5.4788, - "step": 41421 - }, - { - "epoch": 21.602086049543676, - "grad_norm": 1.601670742034912, - "learning_rate": 5.888743718592965e-05, - "loss": 5.1724, - "step": 41422 - }, - { - "epoch": 21.602607561929595, - "grad_norm": 1.5380523204803467, - "learning_rate": 5.888643216080403e-05, - "loss": 5.2525, - "step": 41423 - }, - { - "epoch": 21.603129074315515, - "grad_norm": 1.5500893592834473, - "learning_rate": 5.888542713567839e-05, - "loss": 5.4176, - "step": 41424 - }, - { - "epoch": 21.603650586701434, - "grad_norm": 1.5062679052352905, - "learning_rate": 5.888442211055276e-05, - "loss": 5.262, - "step": 41425 - }, - { - "epoch": 21.604172099087354, - "grad_norm": 1.4823168516159058, - "learning_rate": 5.888341708542714e-05, - "loss": 4.8063, - "step": 41426 - }, - { - "epoch": 21.604693611473273, - "grad_norm": 1.592042326927185, - "learning_rate": 5.8882412060301505e-05, - "loss": 5.2058, - "step": 41427 - }, - { - "epoch": 21.605215123859193, - "grad_norm": 1.8300832509994507, - "learning_rate": 5.888140703517588e-05, - "loss": 4.9953, - "step": 41428 - }, - { - "epoch": 21.605736636245112, - "grad_norm": 1.5570449829101562, - "learning_rate": 5.888040201005025e-05, - "loss": 4.8435, - "step": 41429 - }, - { - "epoch": 21.60625814863103, - "grad_norm": 1.4779289960861206, - "learning_rate": 5.8879396984924625e-05, - "loss": 5.4483, - "step": 41430 - }, - { - "epoch": 21.60677966101695, - "grad_norm": 1.5822534561157227, - "learning_rate": 5.8878391959798996e-05, - "loss": 5.1704, - "step": 41431 - }, - { - "epoch": 21.607301173402867, - "grad_norm": 1.5070769786834717, - "learning_rate": 5.8877386934673374e-05, - "loss": 5.4858, - "step": 41432 - }, - { - "epoch": 21.607822685788786, - "grad_norm": 1.536942958831787, - "learning_rate": 5.887638190954774e-05, - "loss": 5.4964, - "step": 41433 - }, - { - "epoch": 21.608344198174706, - "grad_norm": 1.6103627681732178, - "learning_rate": 5.8875376884422116e-05, - "loss": 5.0719, - "step": 41434 - }, - { - "epoch": 21.608865710560625, - "grad_norm": 1.5022697448730469, - "learning_rate": 5.887437185929649e-05, - "loss": 5.3846, - "step": 41435 - }, - { - "epoch": 21.609387222946545, - "grad_norm": 1.4528095722198486, - "learning_rate": 5.8873366834170865e-05, - "loss": 5.0784, - "step": 41436 - }, - { - "epoch": 21.609908735332464, - "grad_norm": 1.4652621746063232, - "learning_rate": 5.887236180904523e-05, - "loss": 5.4985, - "step": 41437 - }, - { - "epoch": 21.610430247718384, - "grad_norm": 1.5103555917739868, - "learning_rate": 5.8871356783919594e-05, - "loss": 5.4056, - "step": 41438 - }, - { - "epoch": 21.610951760104303, - "grad_norm": 1.4960732460021973, - "learning_rate": 5.887035175879397e-05, - "loss": 4.8726, - "step": 41439 - }, - { - "epoch": 21.611473272490223, - "grad_norm": 1.5556795597076416, - "learning_rate": 5.886934673366834e-05, - "loss": 5.5837, - "step": 41440 - }, - { - "epoch": 21.611994784876142, - "grad_norm": 1.7180513143539429, - "learning_rate": 5.886834170854272e-05, - "loss": 5.3557, - "step": 41441 - }, - { - "epoch": 21.61251629726206, - "grad_norm": 1.6356369256973267, - "learning_rate": 5.8867336683417085e-05, - "loss": 5.3709, - "step": 41442 - }, - { - "epoch": 21.613037809647977, - "grad_norm": 1.7187151908874512, - "learning_rate": 5.886633165829146e-05, - "loss": 4.6471, - "step": 41443 - }, - { - "epoch": 21.613559322033897, - "grad_norm": 1.5300993919372559, - "learning_rate": 5.886532663316583e-05, - "loss": 5.6507, - "step": 41444 - }, - { - "epoch": 21.614080834419816, - "grad_norm": 1.6551614999771118, - "learning_rate": 5.8864321608040205e-05, - "loss": 5.4529, - "step": 41445 - }, - { - "epoch": 21.614602346805736, - "grad_norm": 1.5432894229888916, - "learning_rate": 5.8863316582914576e-05, - "loss": 5.5323, - "step": 41446 - }, - { - "epoch": 21.615123859191655, - "grad_norm": 1.4930585622787476, - "learning_rate": 5.8862311557788954e-05, - "loss": 5.5351, - "step": 41447 - }, - { - "epoch": 21.615645371577575, - "grad_norm": 1.5499262809753418, - "learning_rate": 5.886130653266332e-05, - "loss": 5.2547, - "step": 41448 - }, - { - "epoch": 21.616166883963494, - "grad_norm": 1.5132684707641602, - "learning_rate": 5.8860301507537696e-05, - "loss": 5.5468, - "step": 41449 - }, - { - "epoch": 21.616688396349414, - "grad_norm": 1.5655758380889893, - "learning_rate": 5.885929648241206e-05, - "loss": 4.9281, - "step": 41450 - }, - { - "epoch": 21.617209908735333, - "grad_norm": 1.4573997259140015, - "learning_rate": 5.885829145728643e-05, - "loss": 5.5674, - "step": 41451 - }, - { - "epoch": 21.617731421121253, - "grad_norm": 1.5619337558746338, - "learning_rate": 5.885728643216081e-05, - "loss": 5.366, - "step": 41452 - }, - { - "epoch": 21.618252933507172, - "grad_norm": 1.6491507291793823, - "learning_rate": 5.885628140703517e-05, - "loss": 5.1505, - "step": 41453 - }, - { - "epoch": 21.61877444589309, - "grad_norm": 1.6573599576950073, - "learning_rate": 5.885527638190955e-05, - "loss": 5.1203, - "step": 41454 - }, - { - "epoch": 21.619295958279007, - "grad_norm": 1.5246413946151733, - "learning_rate": 5.885427135678392e-05, - "loss": 5.501, - "step": 41455 - }, - { - "epoch": 21.619817470664927, - "grad_norm": 1.5017472505569458, - "learning_rate": 5.88532663316583e-05, - "loss": 5.2805, - "step": 41456 - }, - { - "epoch": 21.620338983050846, - "grad_norm": 1.4843274354934692, - "learning_rate": 5.8852261306532664e-05, - "loss": 5.6728, - "step": 41457 - }, - { - "epoch": 21.620860495436766, - "grad_norm": 1.503167748451233, - "learning_rate": 5.885125628140704e-05, - "loss": 5.271, - "step": 41458 - }, - { - "epoch": 21.621382007822685, - "grad_norm": 1.516230583190918, - "learning_rate": 5.8850251256281406e-05, - "loss": 5.2359, - "step": 41459 - }, - { - "epoch": 21.621903520208605, - "grad_norm": 1.5255297422409058, - "learning_rate": 5.8849246231155784e-05, - "loss": 5.5463, - "step": 41460 - }, - { - "epoch": 21.622425032594524, - "grad_norm": 1.4131778478622437, - "learning_rate": 5.8848241206030155e-05, - "loss": 5.7039, - "step": 41461 - }, - { - "epoch": 21.622946544980444, - "grad_norm": 1.4790581464767456, - "learning_rate": 5.884723618090453e-05, - "loss": 5.4384, - "step": 41462 - }, - { - "epoch": 21.623468057366363, - "grad_norm": 1.5140877962112427, - "learning_rate": 5.88462311557789e-05, - "loss": 5.2616, - "step": 41463 - }, - { - "epoch": 21.623989569752283, - "grad_norm": 1.440026879310608, - "learning_rate": 5.884522613065326e-05, - "loss": 5.756, - "step": 41464 - }, - { - "epoch": 21.624511082138202, - "grad_norm": 1.4127751588821411, - "learning_rate": 5.884422110552764e-05, - "loss": 5.8806, - "step": 41465 - }, - { - "epoch": 21.62503259452412, - "grad_norm": 1.499885082244873, - "learning_rate": 5.884321608040201e-05, - "loss": 5.6502, - "step": 41466 - }, - { - "epoch": 21.625554106910037, - "grad_norm": 1.549926996231079, - "learning_rate": 5.884221105527639e-05, - "loss": 5.5028, - "step": 41467 - }, - { - "epoch": 21.626075619295957, - "grad_norm": 1.4875452518463135, - "learning_rate": 5.884120603015075e-05, - "loss": 5.2259, - "step": 41468 - }, - { - "epoch": 21.626597131681876, - "grad_norm": 1.4460562467575073, - "learning_rate": 5.884020100502513e-05, - "loss": 5.5204, - "step": 41469 - }, - { - "epoch": 21.627118644067796, - "grad_norm": 1.6393229961395264, - "learning_rate": 5.8839195979899495e-05, - "loss": 5.0958, - "step": 41470 - }, - { - "epoch": 21.627640156453715, - "grad_norm": 1.4342387914657593, - "learning_rate": 5.883819095477387e-05, - "loss": 5.5004, - "step": 41471 - }, - { - "epoch": 21.628161668839635, - "grad_norm": 1.622225284576416, - "learning_rate": 5.8837185929648243e-05, - "loss": 4.9858, - "step": 41472 - }, - { - "epoch": 21.628683181225554, - "grad_norm": 1.5272003412246704, - "learning_rate": 5.883618090452262e-05, - "loss": 5.5978, - "step": 41473 - }, - { - "epoch": 21.629204693611474, - "grad_norm": 1.5245200395584106, - "learning_rate": 5.8835175879396986e-05, - "loss": 4.8239, - "step": 41474 - }, - { - "epoch": 21.629726205997393, - "grad_norm": 1.5967583656311035, - "learning_rate": 5.883417085427136e-05, - "loss": 4.9959, - "step": 41475 - }, - { - "epoch": 21.630247718383313, - "grad_norm": 1.5394117832183838, - "learning_rate": 5.8833165829145734e-05, - "loss": 5.088, - "step": 41476 - }, - { - "epoch": 21.630769230769232, - "grad_norm": 1.521960973739624, - "learning_rate": 5.88321608040201e-05, - "loss": 5.3275, - "step": 41477 - }, - { - "epoch": 21.63129074315515, - "grad_norm": 1.5794556140899658, - "learning_rate": 5.8831155778894477e-05, - "loss": 5.0139, - "step": 41478 - }, - { - "epoch": 21.631812255541067, - "grad_norm": 1.5904173851013184, - "learning_rate": 5.883015075376884e-05, - "loss": 4.9991, - "step": 41479 - }, - { - "epoch": 21.632333767926987, - "grad_norm": 1.6401469707489014, - "learning_rate": 5.882914572864322e-05, - "loss": 4.8051, - "step": 41480 - }, - { - "epoch": 21.632855280312906, - "grad_norm": 1.5257781744003296, - "learning_rate": 5.882814070351759e-05, - "loss": 5.6436, - "step": 41481 - }, - { - "epoch": 21.633376792698826, - "grad_norm": 1.5611255168914795, - "learning_rate": 5.882713567839197e-05, - "loss": 5.3355, - "step": 41482 - }, - { - "epoch": 21.633898305084745, - "grad_norm": 1.5504740476608276, - "learning_rate": 5.882613065326633e-05, - "loss": 5.2409, - "step": 41483 - }, - { - "epoch": 21.634419817470665, - "grad_norm": 1.5472609996795654, - "learning_rate": 5.882512562814071e-05, - "loss": 5.0568, - "step": 41484 - }, - { - "epoch": 21.634941329856584, - "grad_norm": 1.612432599067688, - "learning_rate": 5.8824120603015074e-05, - "loss": 4.7758, - "step": 41485 - }, - { - "epoch": 21.635462842242504, - "grad_norm": 1.4828989505767822, - "learning_rate": 5.882311557788945e-05, - "loss": 5.2986, - "step": 41486 - }, - { - "epoch": 21.635984354628423, - "grad_norm": 1.5615484714508057, - "learning_rate": 5.882211055276382e-05, - "loss": 5.2926, - "step": 41487 - }, - { - "epoch": 21.636505867014343, - "grad_norm": 1.6252198219299316, - "learning_rate": 5.88211055276382e-05, - "loss": 5.1225, - "step": 41488 - }, - { - "epoch": 21.637027379400262, - "grad_norm": 1.58849036693573, - "learning_rate": 5.8820100502512565e-05, - "loss": 5.5416, - "step": 41489 - }, - { - "epoch": 21.63754889178618, - "grad_norm": 1.4664409160614014, - "learning_rate": 5.881909547738694e-05, - "loss": 5.5028, - "step": 41490 - }, - { - "epoch": 21.638070404172097, - "grad_norm": 1.5577236413955688, - "learning_rate": 5.881809045226131e-05, - "loss": 5.353, - "step": 41491 - }, - { - "epoch": 21.638591916558017, - "grad_norm": 1.4853779077529907, - "learning_rate": 5.881708542713568e-05, - "loss": 5.2698, - "step": 41492 - }, - { - "epoch": 21.639113428943936, - "grad_norm": 1.490096926689148, - "learning_rate": 5.8816080402010056e-05, - "loss": 5.2485, - "step": 41493 - }, - { - "epoch": 21.639634941329856, - "grad_norm": 1.630578637123108, - "learning_rate": 5.881507537688442e-05, - "loss": 4.6888, - "step": 41494 - }, - { - "epoch": 21.640156453715775, - "grad_norm": 1.5800950527191162, - "learning_rate": 5.88140703517588e-05, - "loss": 4.942, - "step": 41495 - }, - { - "epoch": 21.640677966101695, - "grad_norm": 1.5310500860214233, - "learning_rate": 5.881306532663316e-05, - "loss": 5.3328, - "step": 41496 - }, - { - "epoch": 21.641199478487614, - "grad_norm": 1.4441509246826172, - "learning_rate": 5.881206030150754e-05, - "loss": 5.2632, - "step": 41497 - }, - { - "epoch": 21.641720990873534, - "grad_norm": 1.4731454849243164, - "learning_rate": 5.881105527638191e-05, - "loss": 5.4027, - "step": 41498 - }, - { - "epoch": 21.642242503259453, - "grad_norm": 1.4906498193740845, - "learning_rate": 5.881005025125629e-05, - "loss": 5.4936, - "step": 41499 - }, - { - "epoch": 21.642764015645373, - "grad_norm": 1.5505999326705933, - "learning_rate": 5.880904522613065e-05, - "loss": 4.9277, - "step": 41500 - }, - { - "epoch": 21.643285528031292, - "grad_norm": 1.5863078832626343, - "learning_rate": 5.880804020100503e-05, - "loss": 5.4625, - "step": 41501 - }, - { - "epoch": 21.64380704041721, - "grad_norm": 1.4908655881881714, - "learning_rate": 5.88070351758794e-05, - "loss": 5.541, - "step": 41502 - }, - { - "epoch": 21.644328552803128, - "grad_norm": 1.560986042022705, - "learning_rate": 5.880603015075378e-05, - "loss": 5.3541, - "step": 41503 - }, - { - "epoch": 21.644850065189047, - "grad_norm": 1.5521982908248901, - "learning_rate": 5.8805025125628144e-05, - "loss": 5.1449, - "step": 41504 - }, - { - "epoch": 21.645371577574966, - "grad_norm": 1.5278199911117554, - "learning_rate": 5.880402010050251e-05, - "loss": 5.0264, - "step": 41505 - }, - { - "epoch": 21.645893089960886, - "grad_norm": 1.6114189624786377, - "learning_rate": 5.8803015075376886e-05, - "loss": 4.7129, - "step": 41506 - }, - { - "epoch": 21.646414602346805, - "grad_norm": 1.5774400234222412, - "learning_rate": 5.880201005025126e-05, - "loss": 5.4961, - "step": 41507 - }, - { - "epoch": 21.646936114732725, - "grad_norm": 1.488282561302185, - "learning_rate": 5.8801005025125635e-05, - "loss": 5.2712, - "step": 41508 - }, - { - "epoch": 21.647457627118644, - "grad_norm": 1.5817421674728394, - "learning_rate": 5.88e-05, - "loss": 4.9787, - "step": 41509 - }, - { - "epoch": 21.647979139504564, - "grad_norm": 1.458275318145752, - "learning_rate": 5.879899497487438e-05, - "loss": 4.8619, - "step": 41510 - }, - { - "epoch": 21.648500651890483, - "grad_norm": 1.4415987730026245, - "learning_rate": 5.879798994974874e-05, - "loss": 5.6537, - "step": 41511 - }, - { - "epoch": 21.649022164276403, - "grad_norm": 1.614202618598938, - "learning_rate": 5.879698492462312e-05, - "loss": 4.9395, - "step": 41512 - }, - { - "epoch": 21.649543676662322, - "grad_norm": 1.5437053442001343, - "learning_rate": 5.879597989949749e-05, - "loss": 5.349, - "step": 41513 - }, - { - "epoch": 21.65006518904824, - "grad_norm": 1.5099420547485352, - "learning_rate": 5.879497487437187e-05, - "loss": 5.148, - "step": 41514 - }, - { - "epoch": 21.650586701434158, - "grad_norm": 1.5536085367202759, - "learning_rate": 5.879396984924623e-05, - "loss": 5.5164, - "step": 41515 - }, - { - "epoch": 21.651108213820077, - "grad_norm": 1.562759518623352, - "learning_rate": 5.879296482412061e-05, - "loss": 4.7856, - "step": 41516 - }, - { - "epoch": 21.651629726205996, - "grad_norm": 1.6061336994171143, - "learning_rate": 5.8791959798994975e-05, - "loss": 5.3023, - "step": 41517 - }, - { - "epoch": 21.652151238591916, - "grad_norm": 1.6152944564819336, - "learning_rate": 5.8790954773869346e-05, - "loss": 5.3574, - "step": 41518 - }, - { - "epoch": 21.652672750977835, - "grad_norm": 1.4823570251464844, - "learning_rate": 5.8789949748743724e-05, - "loss": 5.5355, - "step": 41519 - }, - { - "epoch": 21.653194263363755, - "grad_norm": 1.7027454376220703, - "learning_rate": 5.878894472361809e-05, - "loss": 5.1259, - "step": 41520 - }, - { - "epoch": 21.653715775749674, - "grad_norm": 1.5516915321350098, - "learning_rate": 5.8787939698492466e-05, - "loss": 5.3549, - "step": 41521 - }, - { - "epoch": 21.654237288135594, - "grad_norm": 1.4874484539031982, - "learning_rate": 5.878693467336684e-05, - "loss": 5.3714, - "step": 41522 - }, - { - "epoch": 21.654758800521513, - "grad_norm": 1.5007872581481934, - "learning_rate": 5.8785929648241215e-05, - "loss": 5.3796, - "step": 41523 - }, - { - "epoch": 21.655280312907433, - "grad_norm": 1.6526179313659668, - "learning_rate": 5.878492462311558e-05, - "loss": 5.0339, - "step": 41524 - }, - { - "epoch": 21.655801825293352, - "grad_norm": 1.4845941066741943, - "learning_rate": 5.878391959798996e-05, - "loss": 5.3272, - "step": 41525 - }, - { - "epoch": 21.656323337679268, - "grad_norm": 1.5386086702346802, - "learning_rate": 5.878291457286432e-05, - "loss": 5.3645, - "step": 41526 - }, - { - "epoch": 21.656844850065188, - "grad_norm": 1.5495390892028809, - "learning_rate": 5.87819095477387e-05, - "loss": 5.5022, - "step": 41527 - }, - { - "epoch": 21.657366362451107, - "grad_norm": 1.5641697645187378, - "learning_rate": 5.878090452261307e-05, - "loss": 5.1136, - "step": 41528 - }, - { - "epoch": 21.657887874837026, - "grad_norm": 1.6152046918869019, - "learning_rate": 5.877989949748745e-05, - "loss": 5.3907, - "step": 41529 - }, - { - "epoch": 21.658409387222946, - "grad_norm": 1.566256046295166, - "learning_rate": 5.877889447236181e-05, - "loss": 4.9571, - "step": 41530 - }, - { - "epoch": 21.658930899608865, - "grad_norm": 1.5805747509002686, - "learning_rate": 5.8777889447236176e-05, - "loss": 5.4289, - "step": 41531 - }, - { - "epoch": 21.659452411994785, - "grad_norm": 1.4535826444625854, - "learning_rate": 5.8776884422110554e-05, - "loss": 5.2906, - "step": 41532 - }, - { - "epoch": 21.659973924380704, - "grad_norm": 1.5007268190383911, - "learning_rate": 5.8775879396984925e-05, - "loss": 5.5228, - "step": 41533 - }, - { - "epoch": 21.660495436766624, - "grad_norm": 1.4440490007400513, - "learning_rate": 5.87748743718593e-05, - "loss": 5.5366, - "step": 41534 - }, - { - "epoch": 21.661016949152543, - "grad_norm": 1.4966466426849365, - "learning_rate": 5.877386934673367e-05, - "loss": 5.1826, - "step": 41535 - }, - { - "epoch": 21.661538461538463, - "grad_norm": 1.5930553674697876, - "learning_rate": 5.8772864321608045e-05, - "loss": 5.0775, - "step": 41536 - }, - { - "epoch": 21.662059973924382, - "grad_norm": 1.4895614385604858, - "learning_rate": 5.877185929648241e-05, - "loss": 5.1094, - "step": 41537 - }, - { - "epoch": 21.6625814863103, - "grad_norm": 1.3979547023773193, - "learning_rate": 5.877085427135679e-05, - "loss": 5.7944, - "step": 41538 - }, - { - "epoch": 21.663102998696218, - "grad_norm": 1.588274359703064, - "learning_rate": 5.876984924623116e-05, - "loss": 4.9553, - "step": 41539 - }, - { - "epoch": 21.663624511082137, - "grad_norm": 1.3533085584640503, - "learning_rate": 5.8768844221105536e-05, - "loss": 4.8717, - "step": 41540 - }, - { - "epoch": 21.664146023468057, - "grad_norm": 1.5132156610488892, - "learning_rate": 5.87678391959799e-05, - "loss": 5.631, - "step": 41541 - }, - { - "epoch": 21.664667535853976, - "grad_norm": 1.5336025953292847, - "learning_rate": 5.876683417085428e-05, - "loss": 5.3086, - "step": 41542 - }, - { - "epoch": 21.665189048239895, - "grad_norm": 1.6147290468215942, - "learning_rate": 5.876582914572865e-05, - "loss": 5.1867, - "step": 41543 - }, - { - "epoch": 21.665710560625815, - "grad_norm": 1.463538408279419, - "learning_rate": 5.876482412060301e-05, - "loss": 5.3344, - "step": 41544 - }, - { - "epoch": 21.666232073011734, - "grad_norm": 1.5623269081115723, - "learning_rate": 5.876381909547739e-05, - "loss": 5.3158, - "step": 41545 - }, - { - "epoch": 21.666753585397654, - "grad_norm": 1.6242740154266357, - "learning_rate": 5.8762814070351755e-05, - "loss": 5.3291, - "step": 41546 - }, - { - "epoch": 21.667275097783573, - "grad_norm": 1.5234125852584839, - "learning_rate": 5.876180904522613e-05, - "loss": 4.8262, - "step": 41547 - }, - { - "epoch": 21.667796610169493, - "grad_norm": 1.5326701402664185, - "learning_rate": 5.8760804020100504e-05, - "loss": 4.5862, - "step": 41548 - }, - { - "epoch": 21.668318122555412, - "grad_norm": 1.6132596731185913, - "learning_rate": 5.875979899497488e-05, - "loss": 5.0954, - "step": 41549 - }, - { - "epoch": 21.668839634941328, - "grad_norm": 1.5785390138626099, - "learning_rate": 5.8758793969849246e-05, - "loss": 5.1461, - "step": 41550 - }, - { - "epoch": 21.669361147327248, - "grad_norm": 1.5764044523239136, - "learning_rate": 5.8757788944723624e-05, - "loss": 5.2634, - "step": 41551 - }, - { - "epoch": 21.669882659713167, - "grad_norm": 1.587061882019043, - "learning_rate": 5.875678391959799e-05, - "loss": 4.8855, - "step": 41552 - }, - { - "epoch": 21.670404172099087, - "grad_norm": 1.5338783264160156, - "learning_rate": 5.8755778894472366e-05, - "loss": 4.9377, - "step": 41553 - }, - { - "epoch": 21.670925684485006, - "grad_norm": 1.5670100450515747, - "learning_rate": 5.875477386934674e-05, - "loss": 5.1425, - "step": 41554 - }, - { - "epoch": 21.671447196870925, - "grad_norm": 1.5008275508880615, - "learning_rate": 5.8753768844221115e-05, - "loss": 5.2058, - "step": 41555 - }, - { - "epoch": 21.671968709256845, - "grad_norm": 1.5660088062286377, - "learning_rate": 5.875276381909548e-05, - "loss": 4.8861, - "step": 41556 - }, - { - "epoch": 21.672490221642764, - "grad_norm": 1.652713656425476, - "learning_rate": 5.8751758793969844e-05, - "loss": 4.2282, - "step": 41557 - }, - { - "epoch": 21.673011734028684, - "grad_norm": 1.5557433366775513, - "learning_rate": 5.875075376884422e-05, - "loss": 5.0507, - "step": 41558 - }, - { - "epoch": 21.673533246414603, - "grad_norm": 1.505203366279602, - "learning_rate": 5.874974874371859e-05, - "loss": 5.2477, - "step": 41559 - }, - { - "epoch": 21.674054758800523, - "grad_norm": 1.6316397190093994, - "learning_rate": 5.874874371859297e-05, - "loss": 5.0936, - "step": 41560 - }, - { - "epoch": 21.674576271186442, - "grad_norm": 1.431463360786438, - "learning_rate": 5.8747738693467335e-05, - "loss": 5.566, - "step": 41561 - }, - { - "epoch": 21.675097783572358, - "grad_norm": 1.481339454650879, - "learning_rate": 5.874673366834171e-05, - "loss": 4.68, - "step": 41562 - }, - { - "epoch": 21.675619295958278, - "grad_norm": 1.456303358078003, - "learning_rate": 5.874572864321608e-05, - "loss": 5.2258, - "step": 41563 - }, - { - "epoch": 21.676140808344197, - "grad_norm": 1.4916270971298218, - "learning_rate": 5.8744723618090455e-05, - "loss": 5.338, - "step": 41564 - }, - { - "epoch": 21.676662320730117, - "grad_norm": 1.5957539081573486, - "learning_rate": 5.8743718592964826e-05, - "loss": 4.7849, - "step": 41565 - }, - { - "epoch": 21.677183833116036, - "grad_norm": 1.6977516412734985, - "learning_rate": 5.8742713567839204e-05, - "loss": 5.4997, - "step": 41566 - }, - { - "epoch": 21.677705345501955, - "grad_norm": 1.4743386507034302, - "learning_rate": 5.874170854271357e-05, - "loss": 5.1304, - "step": 41567 - }, - { - "epoch": 21.678226857887875, - "grad_norm": 1.4511635303497314, - "learning_rate": 5.8740703517587946e-05, - "loss": 5.2283, - "step": 41568 - }, - { - "epoch": 21.678748370273794, - "grad_norm": 1.4672003984451294, - "learning_rate": 5.873969849246232e-05, - "loss": 5.5206, - "step": 41569 - }, - { - "epoch": 21.679269882659714, - "grad_norm": 1.6245176792144775, - "learning_rate": 5.8738693467336695e-05, - "loss": 4.8676, - "step": 41570 - }, - { - "epoch": 21.679791395045633, - "grad_norm": 1.6278373003005981, - "learning_rate": 5.873768844221106e-05, - "loss": 5.0047, - "step": 41571 - }, - { - "epoch": 21.680312907431553, - "grad_norm": 1.533719778060913, - "learning_rate": 5.873668341708542e-05, - "loss": 5.4471, - "step": 41572 - }, - { - "epoch": 21.680834419817472, - "grad_norm": 1.5350322723388672, - "learning_rate": 5.87356783919598e-05, - "loss": 5.7294, - "step": 41573 - }, - { - "epoch": 21.681355932203388, - "grad_norm": 1.6318467855453491, - "learning_rate": 5.873467336683417e-05, - "loss": 5.2185, - "step": 41574 - }, - { - "epoch": 21.681877444589308, - "grad_norm": 1.5409212112426758, - "learning_rate": 5.873366834170855e-05, - "loss": 5.0666, - "step": 41575 - }, - { - "epoch": 21.682398956975227, - "grad_norm": 1.5142617225646973, - "learning_rate": 5.8732663316582914e-05, - "loss": 5.1279, - "step": 41576 - }, - { - "epoch": 21.682920469361147, - "grad_norm": 1.5297150611877441, - "learning_rate": 5.873165829145729e-05, - "loss": 5.4343, - "step": 41577 - }, - { - "epoch": 21.683441981747066, - "grad_norm": 1.5091995000839233, - "learning_rate": 5.8730653266331656e-05, - "loss": 5.4554, - "step": 41578 - }, - { - "epoch": 21.683963494132986, - "grad_norm": 1.5110188722610474, - "learning_rate": 5.8729648241206034e-05, - "loss": 5.3325, - "step": 41579 - }, - { - "epoch": 21.684485006518905, - "grad_norm": 1.6137596368789673, - "learning_rate": 5.8728643216080405e-05, - "loss": 4.6743, - "step": 41580 - }, - { - "epoch": 21.685006518904824, - "grad_norm": 1.5791412591934204, - "learning_rate": 5.872763819095478e-05, - "loss": 5.3418, - "step": 41581 - }, - { - "epoch": 21.685528031290744, - "grad_norm": 1.5611743927001953, - "learning_rate": 5.872663316582915e-05, - "loss": 5.1687, - "step": 41582 - }, - { - "epoch": 21.686049543676663, - "grad_norm": 1.37284517288208, - "learning_rate": 5.8725628140703525e-05, - "loss": 5.5268, - "step": 41583 - }, - { - "epoch": 21.686571056062583, - "grad_norm": 1.5076850652694702, - "learning_rate": 5.872462311557789e-05, - "loss": 5.4081, - "step": 41584 - }, - { - "epoch": 21.687092568448502, - "grad_norm": 1.4455592632293701, - "learning_rate": 5.872361809045226e-05, - "loss": 5.8342, - "step": 41585 - }, - { - "epoch": 21.687614080834418, - "grad_norm": 1.5059542655944824, - "learning_rate": 5.872261306532664e-05, - "loss": 5.2419, - "step": 41586 - }, - { - "epoch": 21.688135593220338, - "grad_norm": 1.5242027044296265, - "learning_rate": 5.8721608040201e-05, - "loss": 5.2147, - "step": 41587 - }, - { - "epoch": 21.688657105606257, - "grad_norm": 1.5656870603561401, - "learning_rate": 5.872060301507538e-05, - "loss": 4.9617, - "step": 41588 - }, - { - "epoch": 21.689178617992177, - "grad_norm": 1.5875540971755981, - "learning_rate": 5.871959798994975e-05, - "loss": 5.3399, - "step": 41589 - }, - { - "epoch": 21.689700130378096, - "grad_norm": 1.5428742170333862, - "learning_rate": 5.871859296482413e-05, - "loss": 5.0035, - "step": 41590 - }, - { - "epoch": 21.690221642764016, - "grad_norm": 1.5891075134277344, - "learning_rate": 5.8717587939698493e-05, - "loss": 5.0829, - "step": 41591 - }, - { - "epoch": 21.690743155149935, - "grad_norm": 1.4274256229400635, - "learning_rate": 5.871658291457287e-05, - "loss": 5.3249, - "step": 41592 - }, - { - "epoch": 21.691264667535854, - "grad_norm": 1.5292534828186035, - "learning_rate": 5.8715577889447236e-05, - "loss": 5.4152, - "step": 41593 - }, - { - "epoch": 21.691786179921774, - "grad_norm": 1.5141563415527344, - "learning_rate": 5.8714572864321613e-05, - "loss": 5.2902, - "step": 41594 - }, - { - "epoch": 21.692307692307693, - "grad_norm": 1.4516974687576294, - "learning_rate": 5.8713567839195984e-05, - "loss": 5.1744, - "step": 41595 - }, - { - "epoch": 21.692829204693613, - "grad_norm": 1.49988853931427, - "learning_rate": 5.871256281407036e-05, - "loss": 5.1353, - "step": 41596 - }, - { - "epoch": 21.693350717079532, - "grad_norm": 1.5363038778305054, - "learning_rate": 5.8711557788944727e-05, - "loss": 5.1725, - "step": 41597 - }, - { - "epoch": 21.69387222946545, - "grad_norm": 1.5775610208511353, - "learning_rate": 5.871055276381909e-05, - "loss": 5.1296, - "step": 41598 - }, - { - "epoch": 21.694393741851368, - "grad_norm": 1.363779902458191, - "learning_rate": 5.870954773869347e-05, - "loss": 4.7818, - "step": 41599 - }, - { - "epoch": 21.694915254237287, - "grad_norm": 1.5950227975845337, - "learning_rate": 5.870854271356784e-05, - "loss": 5.2163, - "step": 41600 - }, - { - "epoch": 21.695436766623207, - "grad_norm": 1.561263918876648, - "learning_rate": 5.870753768844222e-05, - "loss": 5.6703, - "step": 41601 - }, - { - "epoch": 21.695958279009126, - "grad_norm": 1.5532898902893066, - "learning_rate": 5.870653266331658e-05, - "loss": 5.2946, - "step": 41602 - }, - { - "epoch": 21.696479791395046, - "grad_norm": 1.4461994171142578, - "learning_rate": 5.870552763819096e-05, - "loss": 4.5124, - "step": 41603 - }, - { - "epoch": 21.697001303780965, - "grad_norm": 1.554764986038208, - "learning_rate": 5.8704522613065324e-05, - "loss": 5.2938, - "step": 41604 - }, - { - "epoch": 21.697522816166884, - "grad_norm": 1.4569101333618164, - "learning_rate": 5.87035175879397e-05, - "loss": 5.3154, - "step": 41605 - }, - { - "epoch": 21.698044328552804, - "grad_norm": 1.532118320465088, - "learning_rate": 5.870251256281407e-05, - "loss": 5.3158, - "step": 41606 - }, - { - "epoch": 21.698565840938723, - "grad_norm": 1.5417871475219727, - "learning_rate": 5.870150753768845e-05, - "loss": 5.3223, - "step": 41607 - }, - { - "epoch": 21.699087353324643, - "grad_norm": 1.534792423248291, - "learning_rate": 5.8700502512562815e-05, - "loss": 5.2436, - "step": 41608 - }, - { - "epoch": 21.69960886571056, - "grad_norm": 1.5698204040527344, - "learning_rate": 5.869949748743719e-05, - "loss": 5.2673, - "step": 41609 - }, - { - "epoch": 21.70013037809648, - "grad_norm": 1.601309061050415, - "learning_rate": 5.8698492462311564e-05, - "loss": 5.0187, - "step": 41610 - }, - { - "epoch": 21.700651890482398, - "grad_norm": 1.5326530933380127, - "learning_rate": 5.869748743718593e-05, - "loss": 5.0773, - "step": 41611 - }, - { - "epoch": 21.701173402868317, - "grad_norm": 1.4165189266204834, - "learning_rate": 5.8696482412060306e-05, - "loss": 5.7365, - "step": 41612 - }, - { - "epoch": 21.701694915254237, - "grad_norm": 1.4711717367172241, - "learning_rate": 5.869547738693467e-05, - "loss": 5.0257, - "step": 41613 - }, - { - "epoch": 21.702216427640156, - "grad_norm": 1.440752625465393, - "learning_rate": 5.869447236180905e-05, - "loss": 5.2014, - "step": 41614 - }, - { - "epoch": 21.702737940026076, - "grad_norm": 1.5573344230651855, - "learning_rate": 5.869346733668342e-05, - "loss": 4.7664, - "step": 41615 - }, - { - "epoch": 21.703259452411995, - "grad_norm": 1.523794412612915, - "learning_rate": 5.86924623115578e-05, - "loss": 5.0491, - "step": 41616 - }, - { - "epoch": 21.703780964797915, - "grad_norm": 1.498833417892456, - "learning_rate": 5.869145728643216e-05, - "loss": 5.6219, - "step": 41617 - }, - { - "epoch": 21.704302477183834, - "grad_norm": 1.5770301818847656, - "learning_rate": 5.869045226130654e-05, - "loss": 5.2613, - "step": 41618 - }, - { - "epoch": 21.704823989569753, - "grad_norm": 1.5487691164016724, - "learning_rate": 5.86894472361809e-05, - "loss": 5.4751, - "step": 41619 - }, - { - "epoch": 21.705345501955673, - "grad_norm": 1.5106180906295776, - "learning_rate": 5.868844221105528e-05, - "loss": 5.3268, - "step": 41620 - }, - { - "epoch": 21.705867014341592, - "grad_norm": 1.4114280939102173, - "learning_rate": 5.868743718592965e-05, - "loss": 5.1584, - "step": 41621 - }, - { - "epoch": 21.70638852672751, - "grad_norm": 1.5481749773025513, - "learning_rate": 5.868643216080403e-05, - "loss": 5.5984, - "step": 41622 - }, - { - "epoch": 21.706910039113428, - "grad_norm": 1.5671601295471191, - "learning_rate": 5.8685427135678394e-05, - "loss": 5.1909, - "step": 41623 - }, - { - "epoch": 21.707431551499347, - "grad_norm": 1.5201764106750488, - "learning_rate": 5.868442211055276e-05, - "loss": 5.2519, - "step": 41624 - }, - { - "epoch": 21.707953063885267, - "grad_norm": 1.474036693572998, - "learning_rate": 5.8683417085427136e-05, - "loss": 5.602, - "step": 41625 - }, - { - "epoch": 21.708474576271186, - "grad_norm": 1.5074152946472168, - "learning_rate": 5.868241206030151e-05, - "loss": 5.5343, - "step": 41626 - }, - { - "epoch": 21.708996088657106, - "grad_norm": 1.5228127241134644, - "learning_rate": 5.8681407035175885e-05, - "loss": 5.1297, - "step": 41627 - }, - { - "epoch": 21.709517601043025, - "grad_norm": 1.5907186269760132, - "learning_rate": 5.868040201005025e-05, - "loss": 5.2322, - "step": 41628 - }, - { - "epoch": 21.710039113428945, - "grad_norm": 1.4514117240905762, - "learning_rate": 5.867939698492463e-05, - "loss": 5.5359, - "step": 41629 - }, - { - "epoch": 21.710560625814864, - "grad_norm": 1.6561555862426758, - "learning_rate": 5.8678391959799e-05, - "loss": 4.887, - "step": 41630 - }, - { - "epoch": 21.711082138200783, - "grad_norm": 1.5670137405395508, - "learning_rate": 5.8677386934673376e-05, - "loss": 5.1571, - "step": 41631 - }, - { - "epoch": 21.711603650586703, - "grad_norm": 1.5550730228424072, - "learning_rate": 5.867638190954774e-05, - "loss": 4.9928, - "step": 41632 - }, - { - "epoch": 21.71212516297262, - "grad_norm": 1.4351286888122559, - "learning_rate": 5.867537688442212e-05, - "loss": 5.2484, - "step": 41633 - }, - { - "epoch": 21.71264667535854, - "grad_norm": 2.0400471687316895, - "learning_rate": 5.867437185929648e-05, - "loss": 5.1955, - "step": 41634 - }, - { - "epoch": 21.713168187744458, - "grad_norm": 1.6497867107391357, - "learning_rate": 5.867336683417086e-05, - "loss": 5.0971, - "step": 41635 - }, - { - "epoch": 21.713689700130377, - "grad_norm": 1.5154831409454346, - "learning_rate": 5.867236180904523e-05, - "loss": 5.1647, - "step": 41636 - }, - { - "epoch": 21.714211212516297, - "grad_norm": 1.4534724950790405, - "learning_rate": 5.8671356783919596e-05, - "loss": 5.7118, - "step": 41637 - }, - { - "epoch": 21.714732724902216, - "grad_norm": 1.5982604026794434, - "learning_rate": 5.8670351758793974e-05, - "loss": 4.9627, - "step": 41638 - }, - { - "epoch": 21.715254237288136, - "grad_norm": 1.5213985443115234, - "learning_rate": 5.866934673366834e-05, - "loss": 5.1192, - "step": 41639 - }, - { - "epoch": 21.715775749674055, - "grad_norm": 1.5826442241668701, - "learning_rate": 5.8668341708542716e-05, - "loss": 5.801, - "step": 41640 - }, - { - "epoch": 21.716297262059975, - "grad_norm": 1.467716097831726, - "learning_rate": 5.866733668341709e-05, - "loss": 5.3388, - "step": 41641 - }, - { - "epoch": 21.716818774445894, - "grad_norm": 1.5774949789047241, - "learning_rate": 5.8666331658291465e-05, - "loss": 5.0351, - "step": 41642 - }, - { - "epoch": 21.717340286831814, - "grad_norm": 1.4917237758636475, - "learning_rate": 5.866532663316583e-05, - "loss": 5.4311, - "step": 41643 - }, - { - "epoch": 21.717861799217733, - "grad_norm": 1.6009315252304077, - "learning_rate": 5.866432160804021e-05, - "loss": 4.9347, - "step": 41644 - }, - { - "epoch": 21.71838331160365, - "grad_norm": 1.579338550567627, - "learning_rate": 5.866331658291457e-05, - "loss": 4.8105, - "step": 41645 - }, - { - "epoch": 21.71890482398957, - "grad_norm": 1.5788494348526, - "learning_rate": 5.866231155778895e-05, - "loss": 5.0322, - "step": 41646 - }, - { - "epoch": 21.719426336375488, - "grad_norm": 1.458644151687622, - "learning_rate": 5.866130653266332e-05, - "loss": 4.8138, - "step": 41647 - }, - { - "epoch": 21.719947848761407, - "grad_norm": 1.5361034870147705, - "learning_rate": 5.86603015075377e-05, - "loss": 5.7437, - "step": 41648 - }, - { - "epoch": 21.720469361147327, - "grad_norm": 1.492159366607666, - "learning_rate": 5.865929648241206e-05, - "loss": 5.298, - "step": 41649 - }, - { - "epoch": 21.720990873533246, - "grad_norm": 1.527204990386963, - "learning_rate": 5.8658291457286426e-05, - "loss": 5.2036, - "step": 41650 - }, - { - "epoch": 21.721512385919166, - "grad_norm": 1.6302096843719482, - "learning_rate": 5.8657286432160804e-05, - "loss": 4.7824, - "step": 41651 - }, - { - "epoch": 21.722033898305085, - "grad_norm": 1.4915475845336914, - "learning_rate": 5.8656281407035175e-05, - "loss": 5.1224, - "step": 41652 - }, - { - "epoch": 21.722555410691005, - "grad_norm": 1.4843460321426392, - "learning_rate": 5.865527638190955e-05, - "loss": 4.8146, - "step": 41653 - }, - { - "epoch": 21.723076923076924, - "grad_norm": 1.5194110870361328, - "learning_rate": 5.865427135678392e-05, - "loss": 5.3773, - "step": 41654 - }, - { - "epoch": 21.723598435462844, - "grad_norm": 1.4112035036087036, - "learning_rate": 5.8653266331658295e-05, - "loss": 5.3597, - "step": 41655 - }, - { - "epoch": 21.724119947848763, - "grad_norm": 1.5047193765640259, - "learning_rate": 5.8652261306532666e-05, - "loss": 4.9178, - "step": 41656 - }, - { - "epoch": 21.72464146023468, - "grad_norm": 1.525019645690918, - "learning_rate": 5.8651256281407044e-05, - "loss": 5.6815, - "step": 41657 - }, - { - "epoch": 21.7251629726206, - "grad_norm": 1.4826734066009521, - "learning_rate": 5.865025125628141e-05, - "loss": 5.7038, - "step": 41658 - }, - { - "epoch": 21.725684485006518, - "grad_norm": 1.6101466417312622, - "learning_rate": 5.8649246231155786e-05, - "loss": 4.978, - "step": 41659 - }, - { - "epoch": 21.726205997392437, - "grad_norm": 1.5250028371810913, - "learning_rate": 5.864824120603015e-05, - "loss": 4.9208, - "step": 41660 - }, - { - "epoch": 21.726727509778357, - "grad_norm": 1.823823094367981, - "learning_rate": 5.864723618090453e-05, - "loss": 5.2502, - "step": 41661 - }, - { - "epoch": 21.727249022164276, - "grad_norm": 1.5998808145523071, - "learning_rate": 5.86462311557789e-05, - "loss": 5.2485, - "step": 41662 - }, - { - "epoch": 21.727770534550196, - "grad_norm": 1.6679456233978271, - "learning_rate": 5.864522613065328e-05, - "loss": 4.9546, - "step": 41663 - }, - { - "epoch": 21.728292046936115, - "grad_norm": 1.6930371522903442, - "learning_rate": 5.864422110552764e-05, - "loss": 5.2218, - "step": 41664 - }, - { - "epoch": 21.728813559322035, - "grad_norm": 1.6375067234039307, - "learning_rate": 5.8643216080402005e-05, - "loss": 5.2093, - "step": 41665 - }, - { - "epoch": 21.729335071707954, - "grad_norm": 1.603919267654419, - "learning_rate": 5.864221105527638e-05, - "loss": 4.7537, - "step": 41666 - }, - { - "epoch": 21.729856584093874, - "grad_norm": 1.5303230285644531, - "learning_rate": 5.8641206030150754e-05, - "loss": 5.3871, - "step": 41667 - }, - { - "epoch": 21.730378096479793, - "grad_norm": 1.5369701385498047, - "learning_rate": 5.864020100502513e-05, - "loss": 5.4282, - "step": 41668 - }, - { - "epoch": 21.73089960886571, - "grad_norm": 1.7466238737106323, - "learning_rate": 5.8639195979899496e-05, - "loss": 5.4601, - "step": 41669 - }, - { - "epoch": 21.73142112125163, - "grad_norm": 1.6249661445617676, - "learning_rate": 5.8638190954773874e-05, - "loss": 5.259, - "step": 41670 - }, - { - "epoch": 21.731942633637548, - "grad_norm": 1.6299608945846558, - "learning_rate": 5.863718592964824e-05, - "loss": 5.1786, - "step": 41671 - }, - { - "epoch": 21.732464146023467, - "grad_norm": 1.4250259399414062, - "learning_rate": 5.8636180904522616e-05, - "loss": 5.5702, - "step": 41672 - }, - { - "epoch": 21.732985658409387, - "grad_norm": 1.6248235702514648, - "learning_rate": 5.863517587939699e-05, - "loss": 5.1458, - "step": 41673 - }, - { - "epoch": 21.733507170795306, - "grad_norm": 1.634889006614685, - "learning_rate": 5.8634170854271365e-05, - "loss": 5.1358, - "step": 41674 - }, - { - "epoch": 21.734028683181226, - "grad_norm": 1.544450283050537, - "learning_rate": 5.863316582914573e-05, - "loss": 5.3923, - "step": 41675 - }, - { - "epoch": 21.734550195567145, - "grad_norm": 1.513262152671814, - "learning_rate": 5.863216080402011e-05, - "loss": 5.5082, - "step": 41676 - }, - { - "epoch": 21.735071707953065, - "grad_norm": 1.4429874420166016, - "learning_rate": 5.863115577889448e-05, - "loss": 4.7651, - "step": 41677 - }, - { - "epoch": 21.735593220338984, - "grad_norm": 1.4627509117126465, - "learning_rate": 5.863015075376884e-05, - "loss": 5.2569, - "step": 41678 - }, - { - "epoch": 21.736114732724904, - "grad_norm": 1.4974406957626343, - "learning_rate": 5.862914572864322e-05, - "loss": 5.3774, - "step": 41679 - }, - { - "epoch": 21.736636245110823, - "grad_norm": 1.583622932434082, - "learning_rate": 5.8628140703517585e-05, - "loss": 5.1374, - "step": 41680 - }, - { - "epoch": 21.73715775749674, - "grad_norm": 1.51862633228302, - "learning_rate": 5.862713567839196e-05, - "loss": 5.6368, - "step": 41681 - }, - { - "epoch": 21.73767926988266, - "grad_norm": 1.5752471685409546, - "learning_rate": 5.8626130653266334e-05, - "loss": 5.3291, - "step": 41682 - }, - { - "epoch": 21.738200782268578, - "grad_norm": 1.560415506362915, - "learning_rate": 5.862512562814071e-05, - "loss": 5.5238, - "step": 41683 - }, - { - "epoch": 21.738722294654497, - "grad_norm": 1.452353596687317, - "learning_rate": 5.8624120603015076e-05, - "loss": 5.1288, - "step": 41684 - }, - { - "epoch": 21.739243807040417, - "grad_norm": 1.4789149761199951, - "learning_rate": 5.8623115577889454e-05, - "loss": 5.412, - "step": 41685 - }, - { - "epoch": 21.739765319426336, - "grad_norm": 1.5746650695800781, - "learning_rate": 5.862211055276382e-05, - "loss": 5.1628, - "step": 41686 - }, - { - "epoch": 21.740286831812256, - "grad_norm": 1.5910027027130127, - "learning_rate": 5.8621105527638196e-05, - "loss": 5.2831, - "step": 41687 - }, - { - "epoch": 21.740808344198175, - "grad_norm": 1.7910445928573608, - "learning_rate": 5.862010050251257e-05, - "loss": 5.447, - "step": 41688 - }, - { - "epoch": 21.741329856584095, - "grad_norm": 1.575778603553772, - "learning_rate": 5.8619095477386945e-05, - "loss": 5.6634, - "step": 41689 - }, - { - "epoch": 21.741851368970014, - "grad_norm": 1.575588583946228, - "learning_rate": 5.861809045226131e-05, - "loss": 5.1386, - "step": 41690 - }, - { - "epoch": 21.742372881355934, - "grad_norm": 1.5298939943313599, - "learning_rate": 5.861708542713567e-05, - "loss": 5.3302, - "step": 41691 - }, - { - "epoch": 21.742894393741853, - "grad_norm": 1.5256588459014893, - "learning_rate": 5.861608040201005e-05, - "loss": 5.2342, - "step": 41692 - }, - { - "epoch": 21.74341590612777, - "grad_norm": 1.5763205289840698, - "learning_rate": 5.861507537688442e-05, - "loss": 5.6516, - "step": 41693 - }, - { - "epoch": 21.74393741851369, - "grad_norm": 1.7419191598892212, - "learning_rate": 5.86140703517588e-05, - "loss": 5.0366, - "step": 41694 - }, - { - "epoch": 21.744458930899608, - "grad_norm": 1.5829273462295532, - "learning_rate": 5.8613065326633164e-05, - "loss": 5.3145, - "step": 41695 - }, - { - "epoch": 21.744980443285527, - "grad_norm": 1.4798948764801025, - "learning_rate": 5.861206030150754e-05, - "loss": 5.1799, - "step": 41696 - }, - { - "epoch": 21.745501955671447, - "grad_norm": 1.4848198890686035, - "learning_rate": 5.861105527638191e-05, - "loss": 5.2231, - "step": 41697 - }, - { - "epoch": 21.746023468057366, - "grad_norm": 1.4181134700775146, - "learning_rate": 5.861005025125629e-05, - "loss": 5.4051, - "step": 41698 - }, - { - "epoch": 21.746544980443286, - "grad_norm": 1.524461269378662, - "learning_rate": 5.8609045226130655e-05, - "loss": 5.2468, - "step": 41699 - }, - { - "epoch": 21.747066492829205, - "grad_norm": 1.5203258991241455, - "learning_rate": 5.860804020100503e-05, - "loss": 5.2497, - "step": 41700 - }, - { - "epoch": 21.747588005215125, - "grad_norm": 1.4700255393981934, - "learning_rate": 5.86070351758794e-05, - "loss": 5.3482, - "step": 41701 - }, - { - "epoch": 21.748109517601044, - "grad_norm": 1.5455230474472046, - "learning_rate": 5.8606030150753775e-05, - "loss": 5.3132, - "step": 41702 - }, - { - "epoch": 21.748631029986964, - "grad_norm": 1.4175063371658325, - "learning_rate": 5.8605025125628146e-05, - "loss": 5.5718, - "step": 41703 - }, - { - "epoch": 21.749152542372883, - "grad_norm": 1.5279626846313477, - "learning_rate": 5.860402010050251e-05, - "loss": 5.5703, - "step": 41704 - }, - { - "epoch": 21.7496740547588, - "grad_norm": 1.7537732124328613, - "learning_rate": 5.860301507537689e-05, - "loss": 4.9383, - "step": 41705 - }, - { - "epoch": 21.75019556714472, - "grad_norm": 1.39093017578125, - "learning_rate": 5.860201005025125e-05, - "loss": 5.4712, - "step": 41706 - }, - { - "epoch": 21.750717079530638, - "grad_norm": 1.5120582580566406, - "learning_rate": 5.860100502512563e-05, - "loss": 5.1623, - "step": 41707 - }, - { - "epoch": 21.751238591916557, - "grad_norm": 1.5481622219085693, - "learning_rate": 5.86e-05, - "loss": 5.3943, - "step": 41708 - }, - { - "epoch": 21.751760104302477, - "grad_norm": 1.6215606927871704, - "learning_rate": 5.859899497487438e-05, - "loss": 4.9913, - "step": 41709 - }, - { - "epoch": 21.752281616688396, - "grad_norm": 1.5443366765975952, - "learning_rate": 5.8597989949748743e-05, - "loss": 5.0037, - "step": 41710 - }, - { - "epoch": 21.752803129074316, - "grad_norm": 1.4347741603851318, - "learning_rate": 5.859698492462312e-05, - "loss": 5.7168, - "step": 41711 - }, - { - "epoch": 21.753324641460235, - "grad_norm": 1.6700955629348755, - "learning_rate": 5.8595979899497486e-05, - "loss": 4.9891, - "step": 41712 - }, - { - "epoch": 21.753846153846155, - "grad_norm": 1.4156585931777954, - "learning_rate": 5.8594974874371863e-05, - "loss": 5.5044, - "step": 41713 - }, - { - "epoch": 21.754367666232074, - "grad_norm": 1.514775276184082, - "learning_rate": 5.8593969849246234e-05, - "loss": 5.3508, - "step": 41714 - }, - { - "epoch": 21.754889178617994, - "grad_norm": 1.5144778490066528, - "learning_rate": 5.859296482412061e-05, - "loss": 5.0875, - "step": 41715 - }, - { - "epoch": 21.75541069100391, - "grad_norm": 1.5499054193496704, - "learning_rate": 5.8591959798994977e-05, - "loss": 5.0937, - "step": 41716 - }, - { - "epoch": 21.75593220338983, - "grad_norm": 1.5318212509155273, - "learning_rate": 5.859095477386935e-05, - "loss": 5.3987, - "step": 41717 - }, - { - "epoch": 21.75645371577575, - "grad_norm": Infinity, - "learning_rate": 5.859095477386935e-05, - "loss": 5.3748, - "step": 41718 - }, - { - "epoch": 21.756975228161668, - "grad_norm": 1.549851417541504, - "learning_rate": 5.8589949748743725e-05, - "loss": 5.5775, - "step": 41719 - }, - { - "epoch": 21.757496740547587, - "grad_norm": 1.5160008668899536, - "learning_rate": 5.858894472361809e-05, - "loss": 5.1359, - "step": 41720 - }, - { - "epoch": 21.758018252933507, - "grad_norm": 1.4741359949111938, - "learning_rate": 5.858793969849247e-05, - "loss": 5.502, - "step": 41721 - }, - { - "epoch": 21.758539765319426, - "grad_norm": 1.5032916069030762, - "learning_rate": 5.858693467336683e-05, - "loss": 5.5692, - "step": 41722 - }, - { - "epoch": 21.759061277705346, - "grad_norm": 1.6086719036102295, - "learning_rate": 5.858592964824121e-05, - "loss": 5.2595, - "step": 41723 - }, - { - "epoch": 21.759582790091265, - "grad_norm": 1.532251000404358, - "learning_rate": 5.858492462311558e-05, - "loss": 5.2012, - "step": 41724 - }, - { - "epoch": 21.760104302477185, - "grad_norm": 1.4939624071121216, - "learning_rate": 5.858391959798996e-05, - "loss": 5.3989, - "step": 41725 - }, - { - "epoch": 21.760625814863104, - "grad_norm": 1.3975390195846558, - "learning_rate": 5.858291457286432e-05, - "loss": 5.0917, - "step": 41726 - }, - { - "epoch": 21.761147327249024, - "grad_norm": 1.6127427816390991, - "learning_rate": 5.85819095477387e-05, - "loss": 4.5582, - "step": 41727 - }, - { - "epoch": 21.761668839634943, - "grad_norm": 1.4647825956344604, - "learning_rate": 5.8580904522613065e-05, - "loss": 5.7997, - "step": 41728 - }, - { - "epoch": 21.76219035202086, - "grad_norm": 1.5685114860534668, - "learning_rate": 5.857989949748744e-05, - "loss": 5.3508, - "step": 41729 - }, - { - "epoch": 21.76271186440678, - "grad_norm": 1.6191972494125366, - "learning_rate": 5.8578894472361814e-05, - "loss": 5.1166, - "step": 41730 - }, - { - "epoch": 21.763233376792698, - "grad_norm": 1.5419520139694214, - "learning_rate": 5.857788944723618e-05, - "loss": 5.7086, - "step": 41731 - }, - { - "epoch": 21.763754889178617, - "grad_norm": 1.4646605253219604, - "learning_rate": 5.8576884422110556e-05, - "loss": 5.236, - "step": 41732 - }, - { - "epoch": 21.764276401564537, - "grad_norm": 1.4417520761489868, - "learning_rate": 5.857587939698492e-05, - "loss": 5.6661, - "step": 41733 - }, - { - "epoch": 21.764797913950456, - "grad_norm": 1.8630716800689697, - "learning_rate": 5.85748743718593e-05, - "loss": 4.6237, - "step": 41734 - }, - { - "epoch": 21.765319426336376, - "grad_norm": 1.4803338050842285, - "learning_rate": 5.857386934673367e-05, - "loss": 5.6184, - "step": 41735 - }, - { - "epoch": 21.765840938722295, - "grad_norm": 1.5654864311218262, - "learning_rate": 5.857286432160805e-05, - "loss": 5.0281, - "step": 41736 - }, - { - "epoch": 21.766362451108215, - "grad_norm": 1.6804496049880981, - "learning_rate": 5.857185929648241e-05, - "loss": 4.8331, - "step": 41737 - }, - { - "epoch": 21.766883963494134, - "grad_norm": 1.4900615215301514, - "learning_rate": 5.857085427135679e-05, - "loss": 5.4467, - "step": 41738 - }, - { - "epoch": 21.767405475880054, - "grad_norm": 1.652434229850769, - "learning_rate": 5.856984924623115e-05, - "loss": 4.9778, - "step": 41739 - }, - { - "epoch": 21.76792698826597, - "grad_norm": 1.5790510177612305, - "learning_rate": 5.856884422110553e-05, - "loss": 5.1748, - "step": 41740 - }, - { - "epoch": 21.76844850065189, - "grad_norm": 1.6165881156921387, - "learning_rate": 5.85678391959799e-05, - "loss": 5.1735, - "step": 41741 - }, - { - "epoch": 21.76897001303781, - "grad_norm": 1.461808681488037, - "learning_rate": 5.856683417085428e-05, - "loss": 5.5187, - "step": 41742 - }, - { - "epoch": 21.769491525423728, - "grad_norm": 1.5976723432540894, - "learning_rate": 5.8565829145728644e-05, - "loss": 5.355, - "step": 41743 - }, - { - "epoch": 21.770013037809647, - "grad_norm": 1.521122694015503, - "learning_rate": 5.8564824120603015e-05, - "loss": 5.38, - "step": 41744 - }, - { - "epoch": 21.770534550195567, - "grad_norm": 1.4872664213180542, - "learning_rate": 5.856381909547739e-05, - "loss": 5.4462, - "step": 41745 - }, - { - "epoch": 21.771056062581486, - "grad_norm": 1.6282308101654053, - "learning_rate": 5.856281407035176e-05, - "loss": 4.6676, - "step": 41746 - }, - { - "epoch": 21.771577574967406, - "grad_norm": 1.6099826097488403, - "learning_rate": 5.8561809045226135e-05, - "loss": 5.2836, - "step": 41747 - }, - { - "epoch": 21.772099087353325, - "grad_norm": 1.5733752250671387, - "learning_rate": 5.85608040201005e-05, - "loss": 5.0979, - "step": 41748 - }, - { - "epoch": 21.772620599739245, - "grad_norm": 1.5208629369735718, - "learning_rate": 5.855979899497488e-05, - "loss": 5.2863, - "step": 41749 - }, - { - "epoch": 21.773142112125164, - "grad_norm": 1.5577448606491089, - "learning_rate": 5.855879396984925e-05, - "loss": 5.0803, - "step": 41750 - }, - { - "epoch": 21.773663624511084, - "grad_norm": 1.6274619102478027, - "learning_rate": 5.8557788944723626e-05, - "loss": 5.3288, - "step": 41751 - }, - { - "epoch": 21.774185136897, - "grad_norm": 1.6342273950576782, - "learning_rate": 5.855678391959799e-05, - "loss": 4.7541, - "step": 41752 - }, - { - "epoch": 21.77470664928292, - "grad_norm": 1.561427116394043, - "learning_rate": 5.855577889447237e-05, - "loss": 5.3606, - "step": 41753 - }, - { - "epoch": 21.77522816166884, - "grad_norm": 1.557403802871704, - "learning_rate": 5.855477386934673e-05, - "loss": 5.6472, - "step": 41754 - }, - { - "epoch": 21.775749674054758, - "grad_norm": 1.5467591285705566, - "learning_rate": 5.855376884422111e-05, - "loss": 5.3031, - "step": 41755 - }, - { - "epoch": 21.776271186440677, - "grad_norm": 1.5626713037490845, - "learning_rate": 5.855276381909548e-05, - "loss": 5.5909, - "step": 41756 - }, - { - "epoch": 21.776792698826597, - "grad_norm": 1.5318406820297241, - "learning_rate": 5.855175879396986e-05, - "loss": 5.1521, - "step": 41757 - }, - { - "epoch": 21.777314211212516, - "grad_norm": 1.5830122232437134, - "learning_rate": 5.8550753768844224e-05, - "loss": 4.9899, - "step": 41758 - }, - { - "epoch": 21.777835723598436, - "grad_norm": 1.5559252500534058, - "learning_rate": 5.854974874371859e-05, - "loss": 5.3508, - "step": 41759 - }, - { - "epoch": 21.778357235984355, - "grad_norm": 1.5764858722686768, - "learning_rate": 5.8548743718592966e-05, - "loss": 5.2253, - "step": 41760 - }, - { - "epoch": 21.778878748370275, - "grad_norm": 1.6277918815612793, - "learning_rate": 5.854773869346734e-05, - "loss": 5.2671, - "step": 41761 - }, - { - "epoch": 21.779400260756194, - "grad_norm": 1.4668973684310913, - "learning_rate": 5.8546733668341715e-05, - "loss": 5.3443, - "step": 41762 - }, - { - "epoch": 21.779921773142114, - "grad_norm": 1.5415254831314087, - "learning_rate": 5.854572864321608e-05, - "loss": 5.6908, - "step": 41763 - }, - { - "epoch": 21.78044328552803, - "grad_norm": 1.3588570356369019, - "learning_rate": 5.854472361809046e-05, - "loss": 4.9589, - "step": 41764 - }, - { - "epoch": 21.78096479791395, - "grad_norm": 1.6194422245025635, - "learning_rate": 5.854371859296483e-05, - "loss": 5.2047, - "step": 41765 - }, - { - "epoch": 21.78148631029987, - "grad_norm": 1.505468726158142, - "learning_rate": 5.8542713567839206e-05, - "loss": 5.5038, - "step": 41766 - }, - { - "epoch": 21.782007822685788, - "grad_norm": 1.5702511072158813, - "learning_rate": 5.854170854271357e-05, - "loss": 5.4089, - "step": 41767 - }, - { - "epoch": 21.782529335071708, - "grad_norm": 1.5578513145446777, - "learning_rate": 5.854070351758795e-05, - "loss": 4.8612, - "step": 41768 - }, - { - "epoch": 21.783050847457627, - "grad_norm": 1.5202736854553223, - "learning_rate": 5.853969849246231e-05, - "loss": 5.6449, - "step": 41769 - }, - { - "epoch": 21.783572359843546, - "grad_norm": 1.527382493019104, - "learning_rate": 5.853869346733669e-05, - "loss": 5.5107, - "step": 41770 - }, - { - "epoch": 21.784093872229466, - "grad_norm": 1.585921049118042, - "learning_rate": 5.853768844221106e-05, - "loss": 5.1988, - "step": 41771 - }, - { - "epoch": 21.784615384615385, - "grad_norm": 1.4409370422363281, - "learning_rate": 5.8536683417085425e-05, - "loss": 5.2969, - "step": 41772 - }, - { - "epoch": 21.785136897001305, - "grad_norm": 1.6492187976837158, - "learning_rate": 5.85356783919598e-05, - "loss": 5.5865, - "step": 41773 - }, - { - "epoch": 21.785658409387224, - "grad_norm": 1.593255877494812, - "learning_rate": 5.853467336683417e-05, - "loss": 4.968, - "step": 41774 - }, - { - "epoch": 21.786179921773144, - "grad_norm": 1.4729552268981934, - "learning_rate": 5.8533668341708545e-05, - "loss": 5.3996, - "step": 41775 - }, - { - "epoch": 21.78670143415906, - "grad_norm": 1.4222331047058105, - "learning_rate": 5.8532663316582916e-05, - "loss": 5.4531, - "step": 41776 - }, - { - "epoch": 21.78722294654498, - "grad_norm": 1.3882203102111816, - "learning_rate": 5.8531658291457294e-05, - "loss": 5.3101, - "step": 41777 - }, - { - "epoch": 21.7877444589309, - "grad_norm": 1.4726414680480957, - "learning_rate": 5.853065326633166e-05, - "loss": 5.57, - "step": 41778 - }, - { - "epoch": 21.788265971316818, - "grad_norm": 1.4402600526809692, - "learning_rate": 5.8529648241206036e-05, - "loss": 5.2553, - "step": 41779 - }, - { - "epoch": 21.788787483702738, - "grad_norm": 1.489721655845642, - "learning_rate": 5.85286432160804e-05, - "loss": 5.7216, - "step": 41780 - }, - { - "epoch": 21.789308996088657, - "grad_norm": 1.547945499420166, - "learning_rate": 5.852763819095478e-05, - "loss": 5.2273, - "step": 41781 - }, - { - "epoch": 21.789830508474576, - "grad_norm": 1.4512337446212769, - "learning_rate": 5.852663316582915e-05, - "loss": 5.5932, - "step": 41782 - }, - { - "epoch": 21.790352020860496, - "grad_norm": 1.5749917030334473, - "learning_rate": 5.852562814070353e-05, - "loss": 5.2333, - "step": 41783 - }, - { - "epoch": 21.790873533246415, - "grad_norm": 1.603283405303955, - "learning_rate": 5.852462311557789e-05, - "loss": 4.8951, - "step": 41784 - }, - { - "epoch": 21.791395045632335, - "grad_norm": 1.5073927640914917, - "learning_rate": 5.852361809045226e-05, - "loss": 5.0157, - "step": 41785 - }, - { - "epoch": 21.791916558018254, - "grad_norm": 1.5076488256454468, - "learning_rate": 5.852261306532664e-05, - "loss": 5.5838, - "step": 41786 - }, - { - "epoch": 21.792438070404174, - "grad_norm": 1.5056546926498413, - "learning_rate": 5.8521608040201004e-05, - "loss": 5.1899, - "step": 41787 - }, - { - "epoch": 21.79295958279009, - "grad_norm": 1.6219654083251953, - "learning_rate": 5.852060301507538e-05, - "loss": 4.816, - "step": 41788 - }, - { - "epoch": 21.79348109517601, - "grad_norm": 1.5256848335266113, - "learning_rate": 5.8519597989949746e-05, - "loss": 5.3018, - "step": 41789 - }, - { - "epoch": 21.79400260756193, - "grad_norm": 1.5361566543579102, - "learning_rate": 5.8518592964824124e-05, - "loss": 5.1922, - "step": 41790 - }, - { - "epoch": 21.794524119947848, - "grad_norm": 1.4824978113174438, - "learning_rate": 5.8517587939698495e-05, - "loss": 5.4743, - "step": 41791 - }, - { - "epoch": 21.795045632333768, - "grad_norm": 1.5215915441513062, - "learning_rate": 5.851658291457287e-05, - "loss": 5.2588, - "step": 41792 - }, - { - "epoch": 21.795567144719687, - "grad_norm": 1.543323040008545, - "learning_rate": 5.851557788944724e-05, - "loss": 5.4174, - "step": 41793 - }, - { - "epoch": 21.796088657105607, - "grad_norm": 1.539798617362976, - "learning_rate": 5.8514572864321615e-05, - "loss": 4.834, - "step": 41794 - }, - { - "epoch": 21.796610169491526, - "grad_norm": 1.5915992259979248, - "learning_rate": 5.851356783919598e-05, - "loss": 4.8217, - "step": 41795 - }, - { - "epoch": 21.797131681877445, - "grad_norm": 1.5297062397003174, - "learning_rate": 5.851256281407036e-05, - "loss": 5.4361, - "step": 41796 - }, - { - "epoch": 21.797653194263365, - "grad_norm": 1.612471580505371, - "learning_rate": 5.851155778894473e-05, - "loss": 5.1828, - "step": 41797 - }, - { - "epoch": 21.798174706649284, - "grad_norm": 1.528226613998413, - "learning_rate": 5.851055276381909e-05, - "loss": 5.2382, - "step": 41798 - }, - { - "epoch": 21.7986962190352, - "grad_norm": 1.4575625658035278, - "learning_rate": 5.850954773869347e-05, - "loss": 4.9815, - "step": 41799 - }, - { - "epoch": 21.79921773142112, - "grad_norm": 1.635818600654602, - "learning_rate": 5.8508542713567835e-05, - "loss": 4.4711, - "step": 41800 - }, - { - "epoch": 21.79973924380704, - "grad_norm": 1.6312175989151, - "learning_rate": 5.850753768844221e-05, - "loss": 5.1643, - "step": 41801 - }, - { - "epoch": 21.80026075619296, - "grad_norm": 1.6046547889709473, - "learning_rate": 5.8506532663316584e-05, - "loss": 5.0013, - "step": 41802 - }, - { - "epoch": 21.800782268578878, - "grad_norm": 1.6288723945617676, - "learning_rate": 5.850552763819096e-05, - "loss": 5.5005, - "step": 41803 - }, - { - "epoch": 21.801303780964798, - "grad_norm": 1.47405207157135, - "learning_rate": 5.8504522613065326e-05, - "loss": 5.5191, - "step": 41804 - }, - { - "epoch": 21.801825293350717, - "grad_norm": 1.4922550916671753, - "learning_rate": 5.8503517587939704e-05, - "loss": 5.2235, - "step": 41805 - }, - { - "epoch": 21.802346805736637, - "grad_norm": 1.5171229839324951, - "learning_rate": 5.8502512562814075e-05, - "loss": 5.0464, - "step": 41806 - }, - { - "epoch": 21.802868318122556, - "grad_norm": 1.509813666343689, - "learning_rate": 5.850150753768845e-05, - "loss": 4.7567, - "step": 41807 - }, - { - "epoch": 21.803389830508475, - "grad_norm": 1.5275622606277466, - "learning_rate": 5.850050251256282e-05, - "loss": 5.3084, - "step": 41808 - }, - { - "epoch": 21.803911342894395, - "grad_norm": 1.542248010635376, - "learning_rate": 5.8499497487437195e-05, - "loss": 4.9625, - "step": 41809 - }, - { - "epoch": 21.804432855280314, - "grad_norm": 1.425857424736023, - "learning_rate": 5.849849246231156e-05, - "loss": 5.0354, - "step": 41810 - }, - { - "epoch": 21.804954367666234, - "grad_norm": 1.5100351572036743, - "learning_rate": 5.849748743718593e-05, - "loss": 5.6914, - "step": 41811 - }, - { - "epoch": 21.80547588005215, - "grad_norm": 1.5530920028686523, - "learning_rate": 5.849648241206031e-05, - "loss": 5.3472, - "step": 41812 - }, - { - "epoch": 21.80599739243807, - "grad_norm": 1.6062617301940918, - "learning_rate": 5.849547738693467e-05, - "loss": 5.0467, - "step": 41813 - }, - { - "epoch": 21.80651890482399, - "grad_norm": 1.8850963115692139, - "learning_rate": 5.849447236180905e-05, - "loss": 4.9317, - "step": 41814 - }, - { - "epoch": 21.807040417209908, - "grad_norm": 1.5544040203094482, - "learning_rate": 5.8493467336683414e-05, - "loss": 5.3197, - "step": 41815 - }, - { - "epoch": 21.807561929595828, - "grad_norm": 1.486678123474121, - "learning_rate": 5.849246231155779e-05, - "loss": 5.5676, - "step": 41816 - }, - { - "epoch": 21.808083441981747, - "grad_norm": 1.5463550090789795, - "learning_rate": 5.849145728643216e-05, - "loss": 4.6434, - "step": 41817 - }, - { - "epoch": 21.808604954367667, - "grad_norm": 1.6071466207504272, - "learning_rate": 5.849045226130654e-05, - "loss": 5.2877, - "step": 41818 - }, - { - "epoch": 21.809126466753586, - "grad_norm": 1.6218366622924805, - "learning_rate": 5.8489447236180905e-05, - "loss": 4.7233, - "step": 41819 - }, - { - "epoch": 21.809647979139505, - "grad_norm": 1.5549222230911255, - "learning_rate": 5.848844221105528e-05, - "loss": 5.3702, - "step": 41820 - }, - { - "epoch": 21.810169491525425, - "grad_norm": 1.5836418867111206, - "learning_rate": 5.848743718592965e-05, - "loss": 5.2021, - "step": 41821 - }, - { - "epoch": 21.810691003911344, - "grad_norm": 1.544958472251892, - "learning_rate": 5.8486432160804025e-05, - "loss": 5.4492, - "step": 41822 - }, - { - "epoch": 21.81121251629726, - "grad_norm": 1.5403763055801392, - "learning_rate": 5.8485427135678396e-05, - "loss": 5.3777, - "step": 41823 - }, - { - "epoch": 21.81173402868318, - "grad_norm": 1.48047935962677, - "learning_rate": 5.848442211055276e-05, - "loss": 4.9786, - "step": 41824 - }, - { - "epoch": 21.8122555410691, - "grad_norm": 1.5669288635253906, - "learning_rate": 5.848341708542714e-05, - "loss": 4.8041, - "step": 41825 - }, - { - "epoch": 21.81277705345502, - "grad_norm": 1.5604220628738403, - "learning_rate": 5.84824120603015e-05, - "loss": 5.4154, - "step": 41826 - }, - { - "epoch": 21.813298565840938, - "grad_norm": 1.5233486890792847, - "learning_rate": 5.848140703517588e-05, - "loss": 5.4174, - "step": 41827 - }, - { - "epoch": 21.813820078226858, - "grad_norm": 1.5509045124053955, - "learning_rate": 5.848040201005025e-05, - "loss": 4.9884, - "step": 41828 - }, - { - "epoch": 21.814341590612777, - "grad_norm": 1.485085129737854, - "learning_rate": 5.847939698492463e-05, - "loss": 5.3725, - "step": 41829 - }, - { - "epoch": 21.814863102998697, - "grad_norm": 1.5500649213790894, - "learning_rate": 5.8478391959798993e-05, - "loss": 5.2316, - "step": 41830 - }, - { - "epoch": 21.815384615384616, - "grad_norm": 1.6023967266082764, - "learning_rate": 5.847738693467337e-05, - "loss": 4.9724, - "step": 41831 - }, - { - "epoch": 21.815906127770536, - "grad_norm": 1.6323894262313843, - "learning_rate": 5.847638190954774e-05, - "loss": 5.0212, - "step": 41832 - }, - { - "epoch": 21.816427640156455, - "grad_norm": 1.4577659368515015, - "learning_rate": 5.847537688442212e-05, - "loss": 5.4725, - "step": 41833 - }, - { - "epoch": 21.816949152542374, - "grad_norm": 1.5011835098266602, - "learning_rate": 5.8474371859296484e-05, - "loss": 5.0388, - "step": 41834 - }, - { - "epoch": 21.81747066492829, - "grad_norm": 1.6251546144485474, - "learning_rate": 5.847336683417086e-05, - "loss": 5.2904, - "step": 41835 - }, - { - "epoch": 21.81799217731421, - "grad_norm": 1.5147134065628052, - "learning_rate": 5.8472361809045227e-05, - "loss": 4.9802, - "step": 41836 - }, - { - "epoch": 21.81851368970013, - "grad_norm": 1.5521677732467651, - "learning_rate": 5.84713567839196e-05, - "loss": 4.7293, - "step": 41837 - }, - { - "epoch": 21.81903520208605, - "grad_norm": 1.462471604347229, - "learning_rate": 5.8470351758793975e-05, - "loss": 5.4529, - "step": 41838 - }, - { - "epoch": 21.819556714471968, - "grad_norm": 1.6528427600860596, - "learning_rate": 5.846934673366834e-05, - "loss": 5.0635, - "step": 41839 - }, - { - "epoch": 21.820078226857888, - "grad_norm": 1.6585314273834229, - "learning_rate": 5.846834170854272e-05, - "loss": 4.7191, - "step": 41840 - }, - { - "epoch": 21.820599739243807, - "grad_norm": 1.5294302701950073, - "learning_rate": 5.846733668341708e-05, - "loss": 4.8398, - "step": 41841 - }, - { - "epoch": 21.821121251629727, - "grad_norm": 1.616568684577942, - "learning_rate": 5.846633165829146e-05, - "loss": 5.236, - "step": 41842 - }, - { - "epoch": 21.821642764015646, - "grad_norm": 1.4857842922210693, - "learning_rate": 5.846532663316583e-05, - "loss": 5.4738, - "step": 41843 - }, - { - "epoch": 21.822164276401566, - "grad_norm": 1.6349114179611206, - "learning_rate": 5.846432160804021e-05, - "loss": 4.6649, - "step": 41844 - }, - { - "epoch": 21.822685788787485, - "grad_norm": 1.538591980934143, - "learning_rate": 5.846331658291457e-05, - "loss": 5.3922, - "step": 41845 - }, - { - "epoch": 21.823207301173404, - "grad_norm": 1.5729013681411743, - "learning_rate": 5.846231155778895e-05, - "loss": 5.5455, - "step": 41846 - }, - { - "epoch": 21.82372881355932, - "grad_norm": 1.5337170362472534, - "learning_rate": 5.8461306532663315e-05, - "loss": 5.6652, - "step": 41847 - }, - { - "epoch": 21.82425032594524, - "grad_norm": 1.5598108768463135, - "learning_rate": 5.846030150753769e-05, - "loss": 5.4816, - "step": 41848 - }, - { - "epoch": 21.82477183833116, - "grad_norm": 1.6860698461532593, - "learning_rate": 5.8459296482412064e-05, - "loss": 4.9557, - "step": 41849 - }, - { - "epoch": 21.82529335071708, - "grad_norm": 1.5731563568115234, - "learning_rate": 5.845829145728644e-05, - "loss": 5.12, - "step": 41850 - }, - { - "epoch": 21.825814863103, - "grad_norm": 1.5078023672103882, - "learning_rate": 5.8457286432160806e-05, - "loss": 5.2813, - "step": 41851 - }, - { - "epoch": 21.826336375488918, - "grad_norm": 1.6467094421386719, - "learning_rate": 5.845628140703518e-05, - "loss": 5.095, - "step": 41852 - }, - { - "epoch": 21.826857887874837, - "grad_norm": 1.5297105312347412, - "learning_rate": 5.8455276381909555e-05, - "loss": 5.0524, - "step": 41853 - }, - { - "epoch": 21.827379400260757, - "grad_norm": 1.4620307683944702, - "learning_rate": 5.845427135678392e-05, - "loss": 5.3246, - "step": 41854 - }, - { - "epoch": 21.827900912646676, - "grad_norm": 1.5144662857055664, - "learning_rate": 5.84532663316583e-05, - "loss": 5.2477, - "step": 41855 - }, - { - "epoch": 21.828422425032596, - "grad_norm": 1.4711079597473145, - "learning_rate": 5.845226130653266e-05, - "loss": 5.6519, - "step": 41856 - }, - { - "epoch": 21.828943937418515, - "grad_norm": 1.588218331336975, - "learning_rate": 5.845125628140704e-05, - "loss": 5.0603, - "step": 41857 - }, - { - "epoch": 21.829465449804434, - "grad_norm": 1.49828040599823, - "learning_rate": 5.845025125628141e-05, - "loss": 5.4423, - "step": 41858 - }, - { - "epoch": 21.82998696219035, - "grad_norm": 1.5939992666244507, - "learning_rate": 5.844924623115579e-05, - "loss": 5.0538, - "step": 41859 - }, - { - "epoch": 21.83050847457627, - "grad_norm": 1.5635906457901, - "learning_rate": 5.844824120603015e-05, - "loss": 4.9217, - "step": 41860 - }, - { - "epoch": 21.83102998696219, - "grad_norm": 1.6150535345077515, - "learning_rate": 5.844723618090453e-05, - "loss": 5.4034, - "step": 41861 - }, - { - "epoch": 21.83155149934811, - "grad_norm": 1.5982779264450073, - "learning_rate": 5.8446231155778894e-05, - "loss": 5.0582, - "step": 41862 - }, - { - "epoch": 21.83207301173403, - "grad_norm": 1.5209678411483765, - "learning_rate": 5.844522613065327e-05, - "loss": 5.0012, - "step": 41863 - }, - { - "epoch": 21.832594524119948, - "grad_norm": 1.7833702564239502, - "learning_rate": 5.844422110552764e-05, - "loss": 4.9613, - "step": 41864 - }, - { - "epoch": 21.833116036505867, - "grad_norm": 1.5641496181488037, - "learning_rate": 5.844321608040201e-05, - "loss": 5.5788, - "step": 41865 - }, - { - "epoch": 21.833637548891787, - "grad_norm": 1.5508756637573242, - "learning_rate": 5.8442211055276385e-05, - "loss": 5.5326, - "step": 41866 - }, - { - "epoch": 21.834159061277706, - "grad_norm": 1.498386025428772, - "learning_rate": 5.844120603015075e-05, - "loss": 5.4402, - "step": 41867 - }, - { - "epoch": 21.834680573663626, - "grad_norm": 1.5959192514419556, - "learning_rate": 5.844020100502513e-05, - "loss": 5.0828, - "step": 41868 - }, - { - "epoch": 21.835202086049545, - "grad_norm": 1.4415475130081177, - "learning_rate": 5.84391959798995e-05, - "loss": 5.1377, - "step": 41869 - }, - { - "epoch": 21.835723598435465, - "grad_norm": 1.5473214387893677, - "learning_rate": 5.8438190954773876e-05, - "loss": 5.2678, - "step": 41870 - }, - { - "epoch": 21.83624511082138, - "grad_norm": 1.5427993535995483, - "learning_rate": 5.843718592964824e-05, - "loss": 5.2679, - "step": 41871 - }, - { - "epoch": 21.8367666232073, - "grad_norm": 1.4379045963287354, - "learning_rate": 5.843618090452262e-05, - "loss": 5.2339, - "step": 41872 - }, - { - "epoch": 21.83728813559322, - "grad_norm": 1.5980710983276367, - "learning_rate": 5.843517587939699e-05, - "loss": 5.5609, - "step": 41873 - }, - { - "epoch": 21.83780964797914, - "grad_norm": 1.4317413568496704, - "learning_rate": 5.843417085427137e-05, - "loss": 5.3896, - "step": 41874 - }, - { - "epoch": 21.83833116036506, - "grad_norm": 1.6057835817337036, - "learning_rate": 5.843316582914573e-05, - "loss": 5.4141, - "step": 41875 - }, - { - "epoch": 21.838852672750978, - "grad_norm": 1.4632275104522705, - "learning_rate": 5.843216080402011e-05, - "loss": 5.4878, - "step": 41876 - }, - { - "epoch": 21.839374185136897, - "grad_norm": 1.455889105796814, - "learning_rate": 5.8431155778894474e-05, - "loss": 5.5863, - "step": 41877 - }, - { - "epoch": 21.839895697522817, - "grad_norm": 1.5341012477874756, - "learning_rate": 5.8430150753768845e-05, - "loss": 5.1537, - "step": 41878 - }, - { - "epoch": 21.840417209908736, - "grad_norm": 1.4913326501846313, - "learning_rate": 5.842914572864322e-05, - "loss": 5.1791, - "step": 41879 - }, - { - "epoch": 21.840938722294656, - "grad_norm": 1.5357085466384888, - "learning_rate": 5.842814070351759e-05, - "loss": 5.0442, - "step": 41880 - }, - { - "epoch": 21.841460234680575, - "grad_norm": 1.6094989776611328, - "learning_rate": 5.8427135678391965e-05, - "loss": 5.2962, - "step": 41881 - }, - { - "epoch": 21.841981747066495, - "grad_norm": 1.5590009689331055, - "learning_rate": 5.842613065326633e-05, - "loss": 5.3653, - "step": 41882 - }, - { - "epoch": 21.84250325945241, - "grad_norm": 1.5385627746582031, - "learning_rate": 5.842512562814071e-05, - "loss": 4.9555, - "step": 41883 - }, - { - "epoch": 21.84302477183833, - "grad_norm": 1.57291841506958, - "learning_rate": 5.842412060301508e-05, - "loss": 5.1792, - "step": 41884 - }, - { - "epoch": 21.84354628422425, - "grad_norm": 1.5229089260101318, - "learning_rate": 5.8423115577889456e-05, - "loss": 5.4457, - "step": 41885 - }, - { - "epoch": 21.84406779661017, - "grad_norm": 1.3921809196472168, - "learning_rate": 5.842211055276382e-05, - "loss": 5.1839, - "step": 41886 - }, - { - "epoch": 21.84458930899609, - "grad_norm": 1.4522595405578613, - "learning_rate": 5.84211055276382e-05, - "loss": 5.5781, - "step": 41887 - }, - { - "epoch": 21.845110821382008, - "grad_norm": 1.5894471406936646, - "learning_rate": 5.842010050251256e-05, - "loss": 4.8582, - "step": 41888 - }, - { - "epoch": 21.845632333767927, - "grad_norm": 1.4380908012390137, - "learning_rate": 5.841909547738694e-05, - "loss": 5.6892, - "step": 41889 - }, - { - "epoch": 21.846153846153847, - "grad_norm": 1.4884122610092163, - "learning_rate": 5.841809045226131e-05, - "loss": 5.4317, - "step": 41890 - }, - { - "epoch": 21.846675358539766, - "grad_norm": 1.496809959411621, - "learning_rate": 5.8417085427135675e-05, - "loss": 5.4385, - "step": 41891 - }, - { - "epoch": 21.847196870925686, - "grad_norm": 1.6301608085632324, - "learning_rate": 5.841608040201005e-05, - "loss": 5.1881, - "step": 41892 - }, - { - "epoch": 21.847718383311605, - "grad_norm": 1.483633041381836, - "learning_rate": 5.841507537688442e-05, - "loss": 5.3472, - "step": 41893 - }, - { - "epoch": 21.848239895697525, - "grad_norm": 1.599779486656189, - "learning_rate": 5.8414070351758795e-05, - "loss": 4.5516, - "step": 41894 - }, - { - "epoch": 21.84876140808344, - "grad_norm": 1.4741852283477783, - "learning_rate": 5.8413065326633166e-05, - "loss": 5.4519, - "step": 41895 - }, - { - "epoch": 21.84928292046936, - "grad_norm": 1.520350694656372, - "learning_rate": 5.8412060301507544e-05, - "loss": 5.2032, - "step": 41896 - }, - { - "epoch": 21.84980443285528, - "grad_norm": 1.6136078834533691, - "learning_rate": 5.841105527638191e-05, - "loss": 4.9613, - "step": 41897 - }, - { - "epoch": 21.8503259452412, - "grad_norm": 1.4915363788604736, - "learning_rate": 5.8410050251256286e-05, - "loss": 5.0082, - "step": 41898 - }, - { - "epoch": 21.85084745762712, - "grad_norm": 1.452694058418274, - "learning_rate": 5.840904522613066e-05, - "loss": 5.5215, - "step": 41899 - }, - { - "epoch": 21.851368970013038, - "grad_norm": 1.4307173490524292, - "learning_rate": 5.8408040201005035e-05, - "loss": 5.1033, - "step": 41900 - }, - { - "epoch": 21.851890482398957, - "grad_norm": 1.4931042194366455, - "learning_rate": 5.84070351758794e-05, - "loss": 5.4082, - "step": 41901 - }, - { - "epoch": 21.852411994784877, - "grad_norm": 1.4555625915527344, - "learning_rate": 5.840603015075378e-05, - "loss": 5.0541, - "step": 41902 - }, - { - "epoch": 21.852933507170796, - "grad_norm": 1.7087438106536865, - "learning_rate": 5.840502512562814e-05, - "loss": 4.6929, - "step": 41903 - }, - { - "epoch": 21.853455019556716, - "grad_norm": 1.40498685836792, - "learning_rate": 5.840402010050251e-05, - "loss": 5.364, - "step": 41904 - }, - { - "epoch": 21.853976531942635, - "grad_norm": 1.659467339515686, - "learning_rate": 5.840301507537689e-05, - "loss": 5.1238, - "step": 41905 - }, - { - "epoch": 21.85449804432855, - "grad_norm": 1.6358180046081543, - "learning_rate": 5.8402010050251254e-05, - "loss": 4.7675, - "step": 41906 - }, - { - "epoch": 21.85501955671447, - "grad_norm": 1.4843775033950806, - "learning_rate": 5.840100502512563e-05, - "loss": 5.4466, - "step": 41907 - }, - { - "epoch": 21.85554106910039, - "grad_norm": 1.5180821418762207, - "learning_rate": 5.8399999999999997e-05, - "loss": 5.36, - "step": 41908 - }, - { - "epoch": 21.85606258148631, - "grad_norm": 1.4790209531784058, - "learning_rate": 5.8398994974874374e-05, - "loss": 5.6131, - "step": 41909 - }, - { - "epoch": 21.85658409387223, - "grad_norm": 1.6452873945236206, - "learning_rate": 5.8397989949748745e-05, - "loss": 4.8184, - "step": 41910 - }, - { - "epoch": 21.85710560625815, - "grad_norm": 1.5504568815231323, - "learning_rate": 5.839698492462312e-05, - "loss": 4.8807, - "step": 41911 - }, - { - "epoch": 21.857627118644068, - "grad_norm": 1.5455732345581055, - "learning_rate": 5.839597989949749e-05, - "loss": 5.6304, - "step": 41912 - }, - { - "epoch": 21.858148631029987, - "grad_norm": 1.582024335861206, - "learning_rate": 5.8394974874371865e-05, - "loss": 5.0792, - "step": 41913 - }, - { - "epoch": 21.858670143415907, - "grad_norm": 1.5457736253738403, - "learning_rate": 5.839396984924623e-05, - "loss": 5.1021, - "step": 41914 - }, - { - "epoch": 21.859191655801826, - "grad_norm": 1.5175983905792236, - "learning_rate": 5.839296482412061e-05, - "loss": 5.4904, - "step": 41915 - }, - { - "epoch": 21.859713168187746, - "grad_norm": 1.5347245931625366, - "learning_rate": 5.839195979899498e-05, - "loss": 4.8764, - "step": 41916 - }, - { - "epoch": 21.860234680573665, - "grad_norm": 1.5013045072555542, - "learning_rate": 5.839095477386934e-05, - "loss": 5.6277, - "step": 41917 - }, - { - "epoch": 21.860756192959585, - "grad_norm": 1.5162074565887451, - "learning_rate": 5.838994974874372e-05, - "loss": 5.4623, - "step": 41918 - }, - { - "epoch": 21.8612777053455, - "grad_norm": 1.5441786050796509, - "learning_rate": 5.838894472361809e-05, - "loss": 5.4825, - "step": 41919 - }, - { - "epoch": 21.86179921773142, - "grad_norm": 1.509523630142212, - "learning_rate": 5.838793969849247e-05, - "loss": 5.2309, - "step": 41920 - }, - { - "epoch": 21.86232073011734, - "grad_norm": 1.5717930793762207, - "learning_rate": 5.8386934673366834e-05, - "loss": 5.3303, - "step": 41921 - }, - { - "epoch": 21.86284224250326, - "grad_norm": 1.4411300420761108, - "learning_rate": 5.838592964824121e-05, - "loss": 5.3709, - "step": 41922 - }, - { - "epoch": 21.86336375488918, - "grad_norm": 1.5654906034469604, - "learning_rate": 5.8384924623115576e-05, - "loss": 5.6031, - "step": 41923 - }, - { - "epoch": 21.863885267275098, - "grad_norm": 1.5137501955032349, - "learning_rate": 5.8383919597989954e-05, - "loss": 5.4535, - "step": 41924 - }, - { - "epoch": 21.864406779661017, - "grad_norm": 1.5513395071029663, - "learning_rate": 5.8382914572864325e-05, - "loss": 5.543, - "step": 41925 - }, - { - "epoch": 21.864928292046937, - "grad_norm": 1.488561749458313, - "learning_rate": 5.83819095477387e-05, - "loss": 5.7962, - "step": 41926 - }, - { - "epoch": 21.865449804432856, - "grad_norm": 1.5669893026351929, - "learning_rate": 5.838090452261307e-05, - "loss": 5.074, - "step": 41927 - }, - { - "epoch": 21.865971316818776, - "grad_norm": 1.4754858016967773, - "learning_rate": 5.8379899497487445e-05, - "loss": 5.0765, - "step": 41928 - }, - { - "epoch": 21.866492829204695, - "grad_norm": 1.5180875062942505, - "learning_rate": 5.837889447236181e-05, - "loss": 5.2106, - "step": 41929 - }, - { - "epoch": 21.86701434159061, - "grad_norm": 1.5075641870498657, - "learning_rate": 5.837788944723619e-05, - "loss": 5.3933, - "step": 41930 - }, - { - "epoch": 21.86753585397653, - "grad_norm": 1.6527711153030396, - "learning_rate": 5.837688442211056e-05, - "loss": 5.3742, - "step": 41931 - }, - { - "epoch": 21.86805736636245, - "grad_norm": 1.514183521270752, - "learning_rate": 5.837587939698492e-05, - "loss": 5.3399, - "step": 41932 - }, - { - "epoch": 21.86857887874837, - "grad_norm": 1.5060375928878784, - "learning_rate": 5.83748743718593e-05, - "loss": 5.5315, - "step": 41933 - }, - { - "epoch": 21.86910039113429, - "grad_norm": 1.5075675249099731, - "learning_rate": 5.8373869346733664e-05, - "loss": 5.5403, - "step": 41934 - }, - { - "epoch": 21.86962190352021, - "grad_norm": 1.5204569101333618, - "learning_rate": 5.837286432160804e-05, - "loss": 5.2979, - "step": 41935 - }, - { - "epoch": 21.870143415906128, - "grad_norm": 1.596657633781433, - "learning_rate": 5.837185929648241e-05, - "loss": 5.1186, - "step": 41936 - }, - { - "epoch": 21.870664928292047, - "grad_norm": 1.5224225521087646, - "learning_rate": 5.837085427135679e-05, - "loss": 5.4516, - "step": 41937 - }, - { - "epoch": 21.871186440677967, - "grad_norm": 1.5894834995269775, - "learning_rate": 5.8369849246231155e-05, - "loss": 5.1063, - "step": 41938 - }, - { - "epoch": 21.871707953063886, - "grad_norm": 1.538542628288269, - "learning_rate": 5.836884422110553e-05, - "loss": 5.3148, - "step": 41939 - }, - { - "epoch": 21.872229465449806, - "grad_norm": 1.4963231086730957, - "learning_rate": 5.8367839195979904e-05, - "loss": 5.4986, - "step": 41940 - }, - { - "epoch": 21.872750977835725, - "grad_norm": 1.485551357269287, - "learning_rate": 5.836683417085428e-05, - "loss": 5.5251, - "step": 41941 - }, - { - "epoch": 21.87327249022164, - "grad_norm": 1.6024152040481567, - "learning_rate": 5.8365829145728646e-05, - "loss": 4.663, - "step": 41942 - }, - { - "epoch": 21.87379400260756, - "grad_norm": 1.5055773258209229, - "learning_rate": 5.8364824120603024e-05, - "loss": 5.2487, - "step": 41943 - }, - { - "epoch": 21.87431551499348, - "grad_norm": 1.6701358556747437, - "learning_rate": 5.836381909547739e-05, - "loss": 4.6191, - "step": 41944 - }, - { - "epoch": 21.8748370273794, - "grad_norm": 1.6039685010910034, - "learning_rate": 5.836281407035176e-05, - "loss": 5.546, - "step": 41945 - }, - { - "epoch": 21.87535853976532, - "grad_norm": 1.561792254447937, - "learning_rate": 5.836180904522614e-05, - "loss": 5.427, - "step": 41946 - }, - { - "epoch": 21.87588005215124, - "grad_norm": 1.5535634756088257, - "learning_rate": 5.83608040201005e-05, - "loss": 5.4515, - "step": 41947 - }, - { - "epoch": 21.876401564537158, - "grad_norm": 1.5093504190444946, - "learning_rate": 5.835979899497488e-05, - "loss": 5.5186, - "step": 41948 - }, - { - "epoch": 21.876923076923077, - "grad_norm": 1.546369194984436, - "learning_rate": 5.8358793969849244e-05, - "loss": 5.3234, - "step": 41949 - }, - { - "epoch": 21.877444589308997, - "grad_norm": 1.66780686378479, - "learning_rate": 5.835778894472362e-05, - "loss": 4.4648, - "step": 41950 - }, - { - "epoch": 21.877966101694916, - "grad_norm": 1.4737446308135986, - "learning_rate": 5.835678391959799e-05, - "loss": 5.5348, - "step": 41951 - }, - { - "epoch": 21.878487614080836, - "grad_norm": 1.50537109375, - "learning_rate": 5.835577889447237e-05, - "loss": 5.7937, - "step": 41952 - }, - { - "epoch": 21.879009126466755, - "grad_norm": 1.4941002130508423, - "learning_rate": 5.8354773869346734e-05, - "loss": 5.0339, - "step": 41953 - }, - { - "epoch": 21.87953063885267, - "grad_norm": 1.4653023481369019, - "learning_rate": 5.835376884422111e-05, - "loss": 5.3883, - "step": 41954 - }, - { - "epoch": 21.88005215123859, - "grad_norm": 1.513156533241272, - "learning_rate": 5.8352763819095477e-05, - "loss": 5.4429, - "step": 41955 - }, - { - "epoch": 21.88057366362451, - "grad_norm": 1.4504368305206299, - "learning_rate": 5.8351758793969854e-05, - "loss": 5.4428, - "step": 41956 - }, - { - "epoch": 21.88109517601043, - "grad_norm": 1.4611902236938477, - "learning_rate": 5.8350753768844225e-05, - "loss": 5.6856, - "step": 41957 - }, - { - "epoch": 21.88161668839635, - "grad_norm": 1.5420540571212769, - "learning_rate": 5.834974874371859e-05, - "loss": 5.0313, - "step": 41958 - }, - { - "epoch": 21.88213820078227, - "grad_norm": 1.3872066736221313, - "learning_rate": 5.834874371859297e-05, - "loss": 4.8624, - "step": 41959 - }, - { - "epoch": 21.882659713168188, - "grad_norm": 1.564996600151062, - "learning_rate": 5.834773869346734e-05, - "loss": 4.9693, - "step": 41960 - }, - { - "epoch": 21.883181225554107, - "grad_norm": 1.514563798904419, - "learning_rate": 5.8346733668341716e-05, - "loss": 5.3859, - "step": 41961 - }, - { - "epoch": 21.883702737940027, - "grad_norm": 1.595097303390503, - "learning_rate": 5.834572864321608e-05, - "loss": 5.466, - "step": 41962 - }, - { - "epoch": 21.884224250325946, - "grad_norm": 1.6685523986816406, - "learning_rate": 5.834472361809046e-05, - "loss": 4.9853, - "step": 41963 - }, - { - "epoch": 21.884745762711866, - "grad_norm": 1.5359693765640259, - "learning_rate": 5.834371859296482e-05, - "loss": 5.0661, - "step": 41964 - }, - { - "epoch": 21.885267275097785, - "grad_norm": 1.4945323467254639, - "learning_rate": 5.83427135678392e-05, - "loss": 5.0705, - "step": 41965 - }, - { - "epoch": 21.8857887874837, - "grad_norm": 1.497876763343811, - "learning_rate": 5.834170854271357e-05, - "loss": 5.5896, - "step": 41966 - }, - { - "epoch": 21.88631029986962, - "grad_norm": 1.4886724948883057, - "learning_rate": 5.834070351758795e-05, - "loss": 5.5292, - "step": 41967 - }, - { - "epoch": 21.88683181225554, - "grad_norm": 1.4681150913238525, - "learning_rate": 5.8339698492462314e-05, - "loss": 5.4734, - "step": 41968 - }, - { - "epoch": 21.88735332464146, - "grad_norm": 1.5266600847244263, - "learning_rate": 5.833869346733669e-05, - "loss": 5.1259, - "step": 41969 - }, - { - "epoch": 21.88787483702738, - "grad_norm": 1.4793592691421509, - "learning_rate": 5.8337688442211056e-05, - "loss": 5.1573, - "step": 41970 - }, - { - "epoch": 21.8883963494133, - "grad_norm": 1.5315974950790405, - "learning_rate": 5.833668341708543e-05, - "loss": 5.2563, - "step": 41971 - }, - { - "epoch": 21.888917861799218, - "grad_norm": 1.5838834047317505, - "learning_rate": 5.8335678391959805e-05, - "loss": 4.9376, - "step": 41972 - }, - { - "epoch": 21.889439374185137, - "grad_norm": 1.538482666015625, - "learning_rate": 5.833467336683417e-05, - "loss": 5.3885, - "step": 41973 - }, - { - "epoch": 21.889960886571057, - "grad_norm": 1.5373525619506836, - "learning_rate": 5.833366834170855e-05, - "loss": 5.2494, - "step": 41974 - }, - { - "epoch": 21.890482398956976, - "grad_norm": 1.649280309677124, - "learning_rate": 5.833266331658291e-05, - "loss": 4.7732, - "step": 41975 - }, - { - "epoch": 21.891003911342896, - "grad_norm": 1.5088695287704468, - "learning_rate": 5.833165829145729e-05, - "loss": 5.2788, - "step": 41976 - }, - { - "epoch": 21.891525423728815, - "grad_norm": 1.5766627788543701, - "learning_rate": 5.833065326633166e-05, - "loss": 5.3025, - "step": 41977 - }, - { - "epoch": 21.89204693611473, - "grad_norm": 1.5534403324127197, - "learning_rate": 5.832964824120604e-05, - "loss": 4.9831, - "step": 41978 - }, - { - "epoch": 21.89256844850065, - "grad_norm": 1.5899075269699097, - "learning_rate": 5.83286432160804e-05, - "loss": 4.8409, - "step": 41979 - }, - { - "epoch": 21.89308996088657, - "grad_norm": 1.5571039915084839, - "learning_rate": 5.832763819095478e-05, - "loss": 5.4544, - "step": 41980 - }, - { - "epoch": 21.89361147327249, - "grad_norm": 1.5878766775131226, - "learning_rate": 5.8326633165829144e-05, - "loss": 5.1214, - "step": 41981 - }, - { - "epoch": 21.89413298565841, - "grad_norm": 1.527906060218811, - "learning_rate": 5.832562814070352e-05, - "loss": 5.5225, - "step": 41982 - }, - { - "epoch": 21.89465449804433, - "grad_norm": 1.4888696670532227, - "learning_rate": 5.832462311557789e-05, - "loss": 5.7317, - "step": 41983 - }, - { - "epoch": 21.895176010430248, - "grad_norm": 1.4556025266647339, - "learning_rate": 5.832361809045226e-05, - "loss": 5.2562, - "step": 41984 - }, - { - "epoch": 21.895697522816167, - "grad_norm": 1.4777439832687378, - "learning_rate": 5.8322613065326635e-05, - "loss": 5.6589, - "step": 41985 - }, - { - "epoch": 21.896219035202087, - "grad_norm": 1.5699248313903809, - "learning_rate": 5.8321608040201006e-05, - "loss": 4.9895, - "step": 41986 - }, - { - "epoch": 21.896740547588006, - "grad_norm": 1.5966968536376953, - "learning_rate": 5.8320603015075384e-05, - "loss": 5.2519, - "step": 41987 - }, - { - "epoch": 21.897262059973926, - "grad_norm": 1.5805176496505737, - "learning_rate": 5.831959798994975e-05, - "loss": 4.756, - "step": 41988 - }, - { - "epoch": 21.89778357235984, - "grad_norm": 1.4405996799468994, - "learning_rate": 5.8318592964824126e-05, - "loss": 5.5167, - "step": 41989 - }, - { - "epoch": 21.89830508474576, - "grad_norm": 1.5649290084838867, - "learning_rate": 5.831758793969849e-05, - "loss": 5.4918, - "step": 41990 - }, - { - "epoch": 21.89882659713168, - "grad_norm": 1.5600794553756714, - "learning_rate": 5.831658291457287e-05, - "loss": 5.5562, - "step": 41991 - }, - { - "epoch": 21.8993481095176, - "grad_norm": 1.4807024002075195, - "learning_rate": 5.831557788944724e-05, - "loss": 5.2046, - "step": 41992 - }, - { - "epoch": 21.89986962190352, - "grad_norm": 1.6025186777114868, - "learning_rate": 5.831457286432162e-05, - "loss": 5.3162, - "step": 41993 - }, - { - "epoch": 21.90039113428944, - "grad_norm": 1.5062063932418823, - "learning_rate": 5.831356783919598e-05, - "loss": 5.7683, - "step": 41994 - }, - { - "epoch": 21.90091264667536, - "grad_norm": 1.5127612352371216, - "learning_rate": 5.831256281407036e-05, - "loss": 5.3821, - "step": 41995 - }, - { - "epoch": 21.901434159061278, - "grad_norm": 1.4498271942138672, - "learning_rate": 5.8311557788944724e-05, - "loss": 5.2588, - "step": 41996 - }, - { - "epoch": 21.901955671447197, - "grad_norm": 1.6701335906982422, - "learning_rate": 5.8310552763819095e-05, - "loss": 5.0367, - "step": 41997 - }, - { - "epoch": 21.902477183833117, - "grad_norm": 1.5532876253128052, - "learning_rate": 5.830954773869347e-05, - "loss": 5.3337, - "step": 41998 - }, - { - "epoch": 21.902998696219036, - "grad_norm": 1.5312162637710571, - "learning_rate": 5.830854271356784e-05, - "loss": 5.3679, - "step": 41999 - }, - { - "epoch": 21.903520208604956, - "grad_norm": 1.5049115419387817, - "learning_rate": 5.8307537688442215e-05, - "loss": 5.4632, - "step": 42000 - }, - { - "epoch": 21.904041720990875, - "grad_norm": 1.5623483657836914, - "learning_rate": 5.830653266331658e-05, - "loss": 5.4609, - "step": 42001 - }, - { - "epoch": 21.90456323337679, - "grad_norm": 1.5191283226013184, - "learning_rate": 5.830552763819096e-05, - "loss": 5.6238, - "step": 42002 - }, - { - "epoch": 21.90508474576271, - "grad_norm": 1.4985300302505493, - "learning_rate": 5.830452261306533e-05, - "loss": 5.3175, - "step": 42003 - }, - { - "epoch": 21.90560625814863, - "grad_norm": 1.3812922239303589, - "learning_rate": 5.8303517587939706e-05, - "loss": 5.7412, - "step": 42004 - }, - { - "epoch": 21.90612777053455, - "grad_norm": 1.5603723526000977, - "learning_rate": 5.830251256281407e-05, - "loss": 5.0244, - "step": 42005 - }, - { - "epoch": 21.90664928292047, - "grad_norm": 1.5485970973968506, - "learning_rate": 5.830150753768845e-05, - "loss": 5.1907, - "step": 42006 - }, - { - "epoch": 21.90717079530639, - "grad_norm": 1.5613195896148682, - "learning_rate": 5.830050251256282e-05, - "loss": 5.0657, - "step": 42007 - }, - { - "epoch": 21.907692307692308, - "grad_norm": 1.5981497764587402, - "learning_rate": 5.8299497487437197e-05, - "loss": 5.0002, - "step": 42008 - }, - { - "epoch": 21.908213820078227, - "grad_norm": 1.5079877376556396, - "learning_rate": 5.829849246231156e-05, - "loss": 5.6621, - "step": 42009 - }, - { - "epoch": 21.908735332464147, - "grad_norm": 1.6818382740020752, - "learning_rate": 5.8297487437185925e-05, - "loss": 4.1537, - "step": 42010 - }, - { - "epoch": 21.909256844850066, - "grad_norm": 1.4978188276290894, - "learning_rate": 5.82964824120603e-05, - "loss": 5.5116, - "step": 42011 - }, - { - "epoch": 21.909778357235986, - "grad_norm": 1.5603281259536743, - "learning_rate": 5.8295477386934674e-05, - "loss": 5.4766, - "step": 42012 - }, - { - "epoch": 21.910299869621902, - "grad_norm": 1.6450597047805786, - "learning_rate": 5.829447236180905e-05, - "loss": 4.7945, - "step": 42013 - }, - { - "epoch": 21.91082138200782, - "grad_norm": 1.583295464515686, - "learning_rate": 5.8293467336683416e-05, - "loss": 5.1988, - "step": 42014 - }, - { - "epoch": 21.91134289439374, - "grad_norm": 1.3789119720458984, - "learning_rate": 5.8292462311557794e-05, - "loss": 5.756, - "step": 42015 - }, - { - "epoch": 21.91186440677966, - "grad_norm": 1.475592851638794, - "learning_rate": 5.829145728643216e-05, - "loss": 5.5703, - "step": 42016 - }, - { - "epoch": 21.91238591916558, - "grad_norm": 1.6182458400726318, - "learning_rate": 5.8290452261306536e-05, - "loss": 5.2822, - "step": 42017 - }, - { - "epoch": 21.9129074315515, - "grad_norm": 1.365044116973877, - "learning_rate": 5.828944723618091e-05, - "loss": 4.9048, - "step": 42018 - }, - { - "epoch": 21.91342894393742, - "grad_norm": 1.7405434846878052, - "learning_rate": 5.8288442211055285e-05, - "loss": 4.6075, - "step": 42019 - }, - { - "epoch": 21.913950456323338, - "grad_norm": 1.5701165199279785, - "learning_rate": 5.828743718592965e-05, - "loss": 5.5114, - "step": 42020 - }, - { - "epoch": 21.914471968709258, - "grad_norm": 1.6888223886489868, - "learning_rate": 5.828643216080403e-05, - "loss": 4.8161, - "step": 42021 - }, - { - "epoch": 21.914993481095177, - "grad_norm": 1.5084335803985596, - "learning_rate": 5.828542713567839e-05, - "loss": 5.3072, - "step": 42022 - }, - { - "epoch": 21.915514993481096, - "grad_norm": 1.5703974962234497, - "learning_rate": 5.828442211055277e-05, - "loss": 5.0759, - "step": 42023 - }, - { - "epoch": 21.916036505867016, - "grad_norm": 1.4951527118682861, - "learning_rate": 5.828341708542714e-05, - "loss": 5.2994, - "step": 42024 - }, - { - "epoch": 21.916558018252932, - "grad_norm": 1.608962059020996, - "learning_rate": 5.8282412060301504e-05, - "loss": 5.3088, - "step": 42025 - }, - { - "epoch": 21.91707953063885, - "grad_norm": 1.5105503797531128, - "learning_rate": 5.828140703517588e-05, - "loss": 5.4178, - "step": 42026 - }, - { - "epoch": 21.91760104302477, - "grad_norm": 1.4586607217788696, - "learning_rate": 5.828040201005025e-05, - "loss": 5.5626, - "step": 42027 - }, - { - "epoch": 21.91812255541069, - "grad_norm": 1.542940616607666, - "learning_rate": 5.827939698492463e-05, - "loss": 5.2173, - "step": 42028 - }, - { - "epoch": 21.91864406779661, - "grad_norm": 1.457555890083313, - "learning_rate": 5.8278391959798995e-05, - "loss": 5.4643, - "step": 42029 - }, - { - "epoch": 21.91916558018253, - "grad_norm": 1.5745370388031006, - "learning_rate": 5.827738693467337e-05, - "loss": 5.224, - "step": 42030 - }, - { - "epoch": 21.91968709256845, - "grad_norm": 1.5205106735229492, - "learning_rate": 5.827638190954774e-05, - "loss": 5.596, - "step": 42031 - }, - { - "epoch": 21.920208604954368, - "grad_norm": 1.482153058052063, - "learning_rate": 5.8275376884422115e-05, - "loss": 5.3695, - "step": 42032 - }, - { - "epoch": 21.920730117340288, - "grad_norm": 1.5774900913238525, - "learning_rate": 5.8274371859296486e-05, - "loss": 5.3925, - "step": 42033 - }, - { - "epoch": 21.921251629726207, - "grad_norm": 1.6781002283096313, - "learning_rate": 5.8273366834170864e-05, - "loss": 4.7475, - "step": 42034 - }, - { - "epoch": 21.921773142112126, - "grad_norm": 1.580794095993042, - "learning_rate": 5.827236180904523e-05, - "loss": 4.872, - "step": 42035 - }, - { - "epoch": 21.922294654498046, - "grad_norm": 1.5266492366790771, - "learning_rate": 5.8271356783919606e-05, - "loss": 5.3525, - "step": 42036 - }, - { - "epoch": 21.922816166883962, - "grad_norm": 1.5178310871124268, - "learning_rate": 5.827035175879397e-05, - "loss": 5.4739, - "step": 42037 - }, - { - "epoch": 21.92333767926988, - "grad_norm": 1.4860700368881226, - "learning_rate": 5.826934673366834e-05, - "loss": 5.1177, - "step": 42038 - }, - { - "epoch": 21.9238591916558, - "grad_norm": 1.5002357959747314, - "learning_rate": 5.826834170854272e-05, - "loss": 5.6885, - "step": 42039 - }, - { - "epoch": 21.92438070404172, - "grad_norm": 1.6520437002182007, - "learning_rate": 5.8267336683417084e-05, - "loss": 5.4964, - "step": 42040 - }, - { - "epoch": 21.92490221642764, - "grad_norm": 1.4138195514678955, - "learning_rate": 5.826633165829146e-05, - "loss": 5.1667, - "step": 42041 - }, - { - "epoch": 21.92542372881356, - "grad_norm": 1.5630048513412476, - "learning_rate": 5.8265326633165826e-05, - "loss": 5.5006, - "step": 42042 - }, - { - "epoch": 21.92594524119948, - "grad_norm": 1.5247585773468018, - "learning_rate": 5.8264321608040204e-05, - "loss": 5.3403, - "step": 42043 - }, - { - "epoch": 21.926466753585398, - "grad_norm": 1.5392122268676758, - "learning_rate": 5.8263316582914575e-05, - "loss": 5.3917, - "step": 42044 - }, - { - "epoch": 21.926988265971318, - "grad_norm": 1.6335647106170654, - "learning_rate": 5.826231155778895e-05, - "loss": 5.1803, - "step": 42045 - }, - { - "epoch": 21.927509778357237, - "grad_norm": 1.5000687837600708, - "learning_rate": 5.826130653266332e-05, - "loss": 5.1532, - "step": 42046 - }, - { - "epoch": 21.928031290743156, - "grad_norm": 1.5084764957427979, - "learning_rate": 5.8260301507537695e-05, - "loss": 5.581, - "step": 42047 - }, - { - "epoch": 21.928552803129076, - "grad_norm": 1.50753653049469, - "learning_rate": 5.8259296482412066e-05, - "loss": 4.9312, - "step": 42048 - }, - { - "epoch": 21.929074315514992, - "grad_norm": 1.632819414138794, - "learning_rate": 5.8258291457286444e-05, - "loss": 4.6885, - "step": 42049 - }, - { - "epoch": 21.92959582790091, - "grad_norm": 1.549473524093628, - "learning_rate": 5.825728643216081e-05, - "loss": 4.8301, - "step": 42050 - }, - { - "epoch": 21.93011734028683, - "grad_norm": 1.3727803230285645, - "learning_rate": 5.825628140703517e-05, - "loss": 5.4922, - "step": 42051 - }, - { - "epoch": 21.93063885267275, - "grad_norm": 1.435546875, - "learning_rate": 5.825527638190955e-05, - "loss": 4.8336, - "step": 42052 - }, - { - "epoch": 21.93116036505867, - "grad_norm": 1.588252305984497, - "learning_rate": 5.825427135678392e-05, - "loss": 5.195, - "step": 42053 - }, - { - "epoch": 21.93168187744459, - "grad_norm": 1.6049964427947998, - "learning_rate": 5.82532663316583e-05, - "loss": 5.123, - "step": 42054 - }, - { - "epoch": 21.93220338983051, - "grad_norm": 1.5056451559066772, - "learning_rate": 5.825226130653266e-05, - "loss": 5.436, - "step": 42055 - }, - { - "epoch": 21.932724902216428, - "grad_norm": 1.5290497541427612, - "learning_rate": 5.825125628140704e-05, - "loss": 4.9612, - "step": 42056 - }, - { - "epoch": 21.933246414602348, - "grad_norm": 1.5625461339950562, - "learning_rate": 5.8250251256281405e-05, - "loss": 5.5316, - "step": 42057 - }, - { - "epoch": 21.933767926988267, - "grad_norm": 1.5629171133041382, - "learning_rate": 5.824924623115578e-05, - "loss": 5.2947, - "step": 42058 - }, - { - "epoch": 21.934289439374187, - "grad_norm": 1.436341404914856, - "learning_rate": 5.8248241206030154e-05, - "loss": 5.5308, - "step": 42059 - }, - { - "epoch": 21.934810951760106, - "grad_norm": 1.532678484916687, - "learning_rate": 5.824723618090453e-05, - "loss": 5.4107, - "step": 42060 - }, - { - "epoch": 21.935332464146022, - "grad_norm": 1.4010720252990723, - "learning_rate": 5.8246231155778896e-05, - "loss": 5.826, - "step": 42061 - }, - { - "epoch": 21.93585397653194, - "grad_norm": 1.6664141416549683, - "learning_rate": 5.8245226130653274e-05, - "loss": 4.8218, - "step": 42062 - }, - { - "epoch": 21.93637548891786, - "grad_norm": 1.524713397026062, - "learning_rate": 5.824422110552764e-05, - "loss": 5.5256, - "step": 42063 - }, - { - "epoch": 21.93689700130378, - "grad_norm": 1.5958356857299805, - "learning_rate": 5.824321608040201e-05, - "loss": 5.3986, - "step": 42064 - }, - { - "epoch": 21.9374185136897, - "grad_norm": 1.4752930402755737, - "learning_rate": 5.824221105527639e-05, - "loss": 4.4377, - "step": 42065 - }, - { - "epoch": 21.93794002607562, - "grad_norm": 1.6368935108184814, - "learning_rate": 5.824120603015075e-05, - "loss": 5.0611, - "step": 42066 - }, - { - "epoch": 21.93846153846154, - "grad_norm": 1.5998173952102661, - "learning_rate": 5.824020100502513e-05, - "loss": 5.4908, - "step": 42067 - }, - { - "epoch": 21.938983050847458, - "grad_norm": 1.5348546504974365, - "learning_rate": 5.8239195979899494e-05, - "loss": 5.052, - "step": 42068 - }, - { - "epoch": 21.939504563233378, - "grad_norm": 1.6235400438308716, - "learning_rate": 5.823819095477387e-05, - "loss": 5.3374, - "step": 42069 - }, - { - "epoch": 21.940026075619297, - "grad_norm": 1.466383934020996, - "learning_rate": 5.823718592964824e-05, - "loss": 5.4586, - "step": 42070 - }, - { - "epoch": 21.940547588005217, - "grad_norm": 1.57613205909729, - "learning_rate": 5.823618090452262e-05, - "loss": 5.1459, - "step": 42071 - }, - { - "epoch": 21.941069100391136, - "grad_norm": 1.4336999654769897, - "learning_rate": 5.8235175879396985e-05, - "loss": 5.2142, - "step": 42072 - }, - { - "epoch": 21.941590612777052, - "grad_norm": 1.4654468297958374, - "learning_rate": 5.823417085427136e-05, - "loss": 5.4257, - "step": 42073 - }, - { - "epoch": 21.94211212516297, - "grad_norm": 1.5660783052444458, - "learning_rate": 5.8233165829145733e-05, - "loss": 5.0364, - "step": 42074 - }, - { - "epoch": 21.94263363754889, - "grad_norm": 1.640815019607544, - "learning_rate": 5.823216080402011e-05, - "loss": 5.5139, - "step": 42075 - }, - { - "epoch": 21.94315514993481, - "grad_norm": 1.4631438255310059, - "learning_rate": 5.8231155778894475e-05, - "loss": 5.2812, - "step": 42076 - }, - { - "epoch": 21.94367666232073, - "grad_norm": 1.5351910591125488, - "learning_rate": 5.823015075376884e-05, - "loss": 5.5789, - "step": 42077 - }, - { - "epoch": 21.94419817470665, - "grad_norm": 1.5925915241241455, - "learning_rate": 5.822914572864322e-05, - "loss": 4.9249, - "step": 42078 - }, - { - "epoch": 21.94471968709257, - "grad_norm": 1.5787886381149292, - "learning_rate": 5.822814070351759e-05, - "loss": 4.9654, - "step": 42079 - }, - { - "epoch": 21.945241199478488, - "grad_norm": 1.529506802558899, - "learning_rate": 5.8227135678391966e-05, - "loss": 4.9347, - "step": 42080 - }, - { - "epoch": 21.945762711864408, - "grad_norm": 1.4680368900299072, - "learning_rate": 5.822613065326633e-05, - "loss": 5.5374, - "step": 42081 - }, - { - "epoch": 21.946284224250327, - "grad_norm": 1.707362174987793, - "learning_rate": 5.822512562814071e-05, - "loss": 4.9373, - "step": 42082 - }, - { - "epoch": 21.946805736636247, - "grad_norm": 1.4404871463775635, - "learning_rate": 5.822412060301507e-05, - "loss": 5.635, - "step": 42083 - }, - { - "epoch": 21.947327249022166, - "grad_norm": 1.4904073476791382, - "learning_rate": 5.822311557788945e-05, - "loss": 5.3745, - "step": 42084 - }, - { - "epoch": 21.947848761408082, - "grad_norm": 1.474326491355896, - "learning_rate": 5.822211055276382e-05, - "loss": 5.5313, - "step": 42085 - }, - { - "epoch": 21.948370273794, - "grad_norm": 1.4067562818527222, - "learning_rate": 5.82211055276382e-05, - "loss": 5.4885, - "step": 42086 - }, - { - "epoch": 21.94889178617992, - "grad_norm": 1.5355733633041382, - "learning_rate": 5.8220100502512564e-05, - "loss": 5.1048, - "step": 42087 - }, - { - "epoch": 21.94941329856584, - "grad_norm": 1.4649490118026733, - "learning_rate": 5.821909547738694e-05, - "loss": 5.4616, - "step": 42088 - }, - { - "epoch": 21.94993481095176, - "grad_norm": 1.4604824781417847, - "learning_rate": 5.8218090452261306e-05, - "loss": 4.8086, - "step": 42089 - }, - { - "epoch": 21.95045632333768, - "grad_norm": 1.4685108661651611, - "learning_rate": 5.821708542713568e-05, - "loss": 4.9491, - "step": 42090 - }, - { - "epoch": 21.9509778357236, - "grad_norm": 1.5075209140777588, - "learning_rate": 5.8216080402010055e-05, - "loss": 5.3008, - "step": 42091 - }, - { - "epoch": 21.951499348109518, - "grad_norm": 1.4777847528457642, - "learning_rate": 5.821507537688442e-05, - "loss": 5.4415, - "step": 42092 - }, - { - "epoch": 21.952020860495438, - "grad_norm": 1.5505176782608032, - "learning_rate": 5.82140703517588e-05, - "loss": 5.1612, - "step": 42093 - }, - { - "epoch": 21.952542372881357, - "grad_norm": 1.5828884840011597, - "learning_rate": 5.821306532663317e-05, - "loss": 5.2586, - "step": 42094 - }, - { - "epoch": 21.953063885267277, - "grad_norm": 1.4432294368743896, - "learning_rate": 5.8212060301507546e-05, - "loss": 5.6862, - "step": 42095 - }, - { - "epoch": 21.953585397653193, - "grad_norm": 1.4948519468307495, - "learning_rate": 5.821105527638191e-05, - "loss": 4.6054, - "step": 42096 - }, - { - "epoch": 21.954106910039112, - "grad_norm": 1.5245412588119507, - "learning_rate": 5.821005025125629e-05, - "loss": 5.5406, - "step": 42097 - }, - { - "epoch": 21.95462842242503, - "grad_norm": 1.5176786184310913, - "learning_rate": 5.820904522613065e-05, - "loss": 5.3078, - "step": 42098 - }, - { - "epoch": 21.95514993481095, - "grad_norm": 1.5956100225448608, - "learning_rate": 5.820804020100503e-05, - "loss": 4.9958, - "step": 42099 - }, - { - "epoch": 21.95567144719687, - "grad_norm": 1.531893014907837, - "learning_rate": 5.82070351758794e-05, - "loss": 5.36, - "step": 42100 - }, - { - "epoch": 21.95619295958279, - "grad_norm": 1.5048855543136597, - "learning_rate": 5.820603015075378e-05, - "loss": 5.5331, - "step": 42101 - }, - { - "epoch": 21.95671447196871, - "grad_norm": 1.5997244119644165, - "learning_rate": 5.820502512562814e-05, - "loss": 4.6256, - "step": 42102 - }, - { - "epoch": 21.95723598435463, - "grad_norm": 1.5563286542892456, - "learning_rate": 5.820402010050251e-05, - "loss": 5.346, - "step": 42103 - }, - { - "epoch": 21.957757496740548, - "grad_norm": 1.5403648614883423, - "learning_rate": 5.8203015075376885e-05, - "loss": 5.3513, - "step": 42104 - }, - { - "epoch": 21.958279009126468, - "grad_norm": 1.4803470373153687, - "learning_rate": 5.8202010050251256e-05, - "loss": 5.3674, - "step": 42105 - }, - { - "epoch": 21.958800521512387, - "grad_norm": 1.6556289196014404, - "learning_rate": 5.8201005025125634e-05, - "loss": 4.6369, - "step": 42106 - }, - { - "epoch": 21.959322033898307, - "grad_norm": 1.5434647798538208, - "learning_rate": 5.82e-05, - "loss": 4.8886, - "step": 42107 - }, - { - "epoch": 21.959843546284226, - "grad_norm": 1.7631635665893555, - "learning_rate": 5.8198994974874376e-05, - "loss": 4.9237, - "step": 42108 - }, - { - "epoch": 21.960365058670142, - "grad_norm": 1.4906246662139893, - "learning_rate": 5.819798994974874e-05, - "loss": 5.5423, - "step": 42109 - }, - { - "epoch": 21.96088657105606, - "grad_norm": 1.560929536819458, - "learning_rate": 5.819698492462312e-05, - "loss": 5.2586, - "step": 42110 - }, - { - "epoch": 21.96140808344198, - "grad_norm": 1.5039037466049194, - "learning_rate": 5.819597989949749e-05, - "loss": 5.6529, - "step": 42111 - }, - { - "epoch": 21.9619295958279, - "grad_norm": 1.5942096710205078, - "learning_rate": 5.819497487437187e-05, - "loss": 5.1574, - "step": 42112 - }, - { - "epoch": 21.96245110821382, - "grad_norm": 1.391998529434204, - "learning_rate": 5.819396984924623e-05, - "loss": 4.5104, - "step": 42113 - }, - { - "epoch": 21.96297262059974, - "grad_norm": 1.4537588357925415, - "learning_rate": 5.819296482412061e-05, - "loss": 5.476, - "step": 42114 - }, - { - "epoch": 21.96349413298566, - "grad_norm": 1.5042388439178467, - "learning_rate": 5.819195979899498e-05, - "loss": 5.2225, - "step": 42115 - }, - { - "epoch": 21.96401564537158, - "grad_norm": 1.5018008947372437, - "learning_rate": 5.819095477386936e-05, - "loss": 4.6879, - "step": 42116 - }, - { - "epoch": 21.964537157757498, - "grad_norm": 1.5898692607879639, - "learning_rate": 5.818994974874372e-05, - "loss": 5.5802, - "step": 42117 - }, - { - "epoch": 21.965058670143417, - "grad_norm": 1.5287984609603882, - "learning_rate": 5.818894472361809e-05, - "loss": 5.3689, - "step": 42118 - }, - { - "epoch": 21.965580182529337, - "grad_norm": 1.5139858722686768, - "learning_rate": 5.8187939698492465e-05, - "loss": 5.1533, - "step": 42119 - }, - { - "epoch": 21.966101694915253, - "grad_norm": 1.5237902402877808, - "learning_rate": 5.8186934673366836e-05, - "loss": 5.5651, - "step": 42120 - }, - { - "epoch": 21.966623207301172, - "grad_norm": 1.4550294876098633, - "learning_rate": 5.8185929648241213e-05, - "loss": 5.0271, - "step": 42121 - }, - { - "epoch": 21.96714471968709, - "grad_norm": 1.4642835855484009, - "learning_rate": 5.818492462311558e-05, - "loss": 5.4201, - "step": 42122 - }, - { - "epoch": 21.96766623207301, - "grad_norm": 1.4589670896530151, - "learning_rate": 5.8183919597989956e-05, - "loss": 5.6589, - "step": 42123 - }, - { - "epoch": 21.96818774445893, - "grad_norm": 1.5269198417663574, - "learning_rate": 5.818291457286432e-05, - "loss": 5.346, - "step": 42124 - }, - { - "epoch": 21.96870925684485, - "grad_norm": 1.5818287134170532, - "learning_rate": 5.81819095477387e-05, - "loss": 5.2311, - "step": 42125 - }, - { - "epoch": 21.96923076923077, - "grad_norm": 1.3386163711547852, - "learning_rate": 5.818090452261307e-05, - "loss": 5.6011, - "step": 42126 - }, - { - "epoch": 21.96975228161669, - "grad_norm": 1.5470058917999268, - "learning_rate": 5.8179899497487447e-05, - "loss": 5.1538, - "step": 42127 - }, - { - "epoch": 21.97027379400261, - "grad_norm": 1.731455683708191, - "learning_rate": 5.817889447236181e-05, - "loss": 4.7328, - "step": 42128 - }, - { - "epoch": 21.970795306388528, - "grad_norm": 1.4202128648757935, - "learning_rate": 5.817788944723619e-05, - "loss": 5.0677, - "step": 42129 - }, - { - "epoch": 21.971316818774447, - "grad_norm": 1.843341588973999, - "learning_rate": 5.817688442211055e-05, - "loss": 4.9524, - "step": 42130 - }, - { - "epoch": 21.971838331160367, - "grad_norm": 1.4760949611663818, - "learning_rate": 5.8175879396984924e-05, - "loss": 5.499, - "step": 42131 - }, - { - "epoch": 21.972359843546283, - "grad_norm": 1.572011947631836, - "learning_rate": 5.81748743718593e-05, - "loss": 4.9848, - "step": 42132 - }, - { - "epoch": 21.972881355932202, - "grad_norm": 1.5579276084899902, - "learning_rate": 5.8173869346733666e-05, - "loss": 5.1625, - "step": 42133 - }, - { - "epoch": 21.97340286831812, - "grad_norm": 1.6083675622940063, - "learning_rate": 5.8172864321608044e-05, - "loss": 5.3133, - "step": 42134 - }, - { - "epoch": 21.97392438070404, - "grad_norm": 1.4813382625579834, - "learning_rate": 5.8171859296482415e-05, - "loss": 4.9943, - "step": 42135 - }, - { - "epoch": 21.97444589308996, - "grad_norm": 1.5064232349395752, - "learning_rate": 5.817085427135679e-05, - "loss": 4.9173, - "step": 42136 - }, - { - "epoch": 21.97496740547588, - "grad_norm": 1.5259687900543213, - "learning_rate": 5.816984924623116e-05, - "loss": 5.3921, - "step": 42137 - }, - { - "epoch": 21.9754889178618, - "grad_norm": 1.6212314367294312, - "learning_rate": 5.8168844221105535e-05, - "loss": 4.6806, - "step": 42138 - }, - { - "epoch": 21.97601043024772, - "grad_norm": 1.5363695621490479, - "learning_rate": 5.81678391959799e-05, - "loss": 5.3511, - "step": 42139 - }, - { - "epoch": 21.97653194263364, - "grad_norm": 1.5135668516159058, - "learning_rate": 5.816683417085428e-05, - "loss": 5.4472, - "step": 42140 - }, - { - "epoch": 21.977053455019558, - "grad_norm": 1.5170632600784302, - "learning_rate": 5.816582914572865e-05, - "loss": 5.3477, - "step": 42141 - }, - { - "epoch": 21.977574967405477, - "grad_norm": 1.4819536209106445, - "learning_rate": 5.8164824120603026e-05, - "loss": 4.5059, - "step": 42142 - }, - { - "epoch": 21.978096479791397, - "grad_norm": 1.5759046077728271, - "learning_rate": 5.816381909547739e-05, - "loss": 4.8937, - "step": 42143 - }, - { - "epoch": 21.978617992177313, - "grad_norm": 1.5767240524291992, - "learning_rate": 5.8162814070351754e-05, - "loss": 5.2315, - "step": 42144 - }, - { - "epoch": 21.979139504563232, - "grad_norm": 1.4092458486557007, - "learning_rate": 5.816180904522613e-05, - "loss": 5.6344, - "step": 42145 - }, - { - "epoch": 21.97966101694915, - "grad_norm": 1.664224624633789, - "learning_rate": 5.81608040201005e-05, - "loss": 4.7398, - "step": 42146 - }, - { - "epoch": 21.98018252933507, - "grad_norm": 1.536131501197815, - "learning_rate": 5.815979899497488e-05, - "loss": 5.0595, - "step": 42147 - }, - { - "epoch": 21.98070404172099, - "grad_norm": 1.4872055053710938, - "learning_rate": 5.8158793969849245e-05, - "loss": 5.4025, - "step": 42148 - }, - { - "epoch": 21.98122555410691, - "grad_norm": 1.5688204765319824, - "learning_rate": 5.815778894472362e-05, - "loss": 4.8293, - "step": 42149 - }, - { - "epoch": 21.98174706649283, - "grad_norm": 1.4652063846588135, - "learning_rate": 5.815678391959799e-05, - "loss": 5.4027, - "step": 42150 - }, - { - "epoch": 21.98226857887875, - "grad_norm": 1.6321640014648438, - "learning_rate": 5.8155778894472365e-05, - "loss": 4.9994, - "step": 42151 - }, - { - "epoch": 21.98279009126467, - "grad_norm": 1.4840806722640991, - "learning_rate": 5.8154773869346736e-05, - "loss": 5.3424, - "step": 42152 - }, - { - "epoch": 21.983311603650588, - "grad_norm": 1.4152530431747437, - "learning_rate": 5.8153768844221114e-05, - "loss": 5.5168, - "step": 42153 - }, - { - "epoch": 21.983833116036507, - "grad_norm": 1.4785317182540894, - "learning_rate": 5.815276381909548e-05, - "loss": 5.4903, - "step": 42154 - }, - { - "epoch": 21.984354628422427, - "grad_norm": 1.5743228197097778, - "learning_rate": 5.8151758793969856e-05, - "loss": 5.2291, - "step": 42155 - }, - { - "epoch": 21.984876140808343, - "grad_norm": 1.4972797632217407, - "learning_rate": 5.815075376884422e-05, - "loss": 5.7381, - "step": 42156 - }, - { - "epoch": 21.985397653194262, - "grad_norm": 1.5571222305297852, - "learning_rate": 5.814974874371859e-05, - "loss": 5.0386, - "step": 42157 - }, - { - "epoch": 21.98591916558018, - "grad_norm": 1.4645054340362549, - "learning_rate": 5.814874371859297e-05, - "loss": 5.4694, - "step": 42158 - }, - { - "epoch": 21.9864406779661, - "grad_norm": 1.5832356214523315, - "learning_rate": 5.8147738693467334e-05, - "loss": 5.1461, - "step": 42159 - }, - { - "epoch": 21.98696219035202, - "grad_norm": 1.4825469255447388, - "learning_rate": 5.814673366834171e-05, - "loss": 4.9455, - "step": 42160 - }, - { - "epoch": 21.98748370273794, - "grad_norm": 1.5109059810638428, - "learning_rate": 5.814572864321608e-05, - "loss": 5.174, - "step": 42161 - }, - { - "epoch": 21.98800521512386, - "grad_norm": 1.4460844993591309, - "learning_rate": 5.814472361809046e-05, - "loss": 5.5148, - "step": 42162 - }, - { - "epoch": 21.98852672750978, - "grad_norm": 1.9474440813064575, - "learning_rate": 5.8143718592964825e-05, - "loss": 5.2287, - "step": 42163 - }, - { - "epoch": 21.9890482398957, - "grad_norm": 1.4937024116516113, - "learning_rate": 5.81427135678392e-05, - "loss": 5.2602, - "step": 42164 - }, - { - "epoch": 21.989569752281618, - "grad_norm": 1.4989162683486938, - "learning_rate": 5.814170854271357e-05, - "loss": 5.3758, - "step": 42165 - }, - { - "epoch": 21.990091264667537, - "grad_norm": 1.4958611726760864, - "learning_rate": 5.8140703517587945e-05, - "loss": 5.5151, - "step": 42166 - }, - { - "epoch": 21.990612777053457, - "grad_norm": 1.544307827949524, - "learning_rate": 5.8139698492462316e-05, - "loss": 5.607, - "step": 42167 - }, - { - "epoch": 21.991134289439373, - "grad_norm": 1.561173915863037, - "learning_rate": 5.8138693467336694e-05, - "loss": 5.0536, - "step": 42168 - }, - { - "epoch": 21.991655801825292, - "grad_norm": 1.5484232902526855, - "learning_rate": 5.813768844221106e-05, - "loss": 5.3116, - "step": 42169 - }, - { - "epoch": 21.99217731421121, - "grad_norm": 1.6643389463424683, - "learning_rate": 5.813668341708542e-05, - "loss": 4.7715, - "step": 42170 - }, - { - "epoch": 21.99269882659713, - "grad_norm": 1.4491355419158936, - "learning_rate": 5.81356783919598e-05, - "loss": 5.2348, - "step": 42171 - }, - { - "epoch": 21.99322033898305, - "grad_norm": 1.5861461162567139, - "learning_rate": 5.813467336683417e-05, - "loss": 5.0803, - "step": 42172 - }, - { - "epoch": 21.99374185136897, - "grad_norm": 1.5061039924621582, - "learning_rate": 5.813366834170855e-05, - "loss": 4.7567, - "step": 42173 - }, - { - "epoch": 21.99426336375489, - "grad_norm": 1.4513672590255737, - "learning_rate": 5.813266331658291e-05, - "loss": 5.5294, - "step": 42174 - }, - { - "epoch": 21.99478487614081, - "grad_norm": 1.5401742458343506, - "learning_rate": 5.813165829145729e-05, - "loss": 4.9539, - "step": 42175 - }, - { - "epoch": 21.99530638852673, - "grad_norm": 1.579932689666748, - "learning_rate": 5.8130653266331655e-05, - "loss": 4.8499, - "step": 42176 - }, - { - "epoch": 21.995827900912648, - "grad_norm": 1.570725917816162, - "learning_rate": 5.812964824120603e-05, - "loss": 5.3096, - "step": 42177 - }, - { - "epoch": 21.996349413298567, - "grad_norm": 1.5353684425354004, - "learning_rate": 5.8128643216080404e-05, - "loss": 5.2757, - "step": 42178 - }, - { - "epoch": 21.996870925684483, - "grad_norm": 1.4778517484664917, - "learning_rate": 5.812763819095478e-05, - "loss": 4.4032, - "step": 42179 - }, - { - "epoch": 21.997392438070403, - "grad_norm": 1.5564367771148682, - "learning_rate": 5.8126633165829146e-05, - "loss": 5.4146, - "step": 42180 - }, - { - "epoch": 21.997913950456322, - "grad_norm": 1.5175681114196777, - "learning_rate": 5.8125628140703524e-05, - "loss": 5.2948, - "step": 42181 - }, - { - "epoch": 21.99843546284224, - "grad_norm": 1.4250667095184326, - "learning_rate": 5.8124623115577895e-05, - "loss": 5.5665, - "step": 42182 - }, - { - "epoch": 21.99895697522816, - "grad_norm": 1.6407155990600586, - "learning_rate": 5.812361809045226e-05, - "loss": 5.2181, - "step": 42183 - }, - { - "epoch": 21.99947848761408, - "grad_norm": 1.6010212898254395, - "learning_rate": 5.812261306532664e-05, - "loss": 4.9045, - "step": 42184 - }, - { - "epoch": 22.0, - "grad_norm": 1.7898919582366943, - "learning_rate": 5.8121608040201e-05, - "loss": 5.1907, - "step": 42185 - }, - { - "epoch": 22.00052151238592, - "grad_norm": 1.5473474264144897, - "learning_rate": 5.812060301507538e-05, - "loss": 5.0991, - "step": 42186 - }, - { - "epoch": 22.00104302477184, - "grad_norm": 1.5148394107818604, - "learning_rate": 5.811959798994975e-05, - "loss": 5.2961, - "step": 42187 - }, - { - "epoch": 22.00156453715776, - "grad_norm": 1.4671710729599, - "learning_rate": 5.811859296482413e-05, - "loss": 5.5163, - "step": 42188 - }, - { - "epoch": 22.002086049543678, - "grad_norm": 1.3937492370605469, - "learning_rate": 5.811758793969849e-05, - "loss": 5.8544, - "step": 42189 - }, - { - "epoch": 22.002607561929597, - "grad_norm": 1.4880857467651367, - "learning_rate": 5.811658291457287e-05, - "loss": 5.5973, - "step": 42190 - }, - { - "epoch": 22.003129074315513, - "grad_norm": 1.5444488525390625, - "learning_rate": 5.8115577889447235e-05, - "loss": 4.7218, - "step": 42191 - }, - { - "epoch": 22.003650586701433, - "grad_norm": 1.5536354780197144, - "learning_rate": 5.811457286432161e-05, - "loss": 5.0955, - "step": 42192 - }, - { - "epoch": 22.004172099087352, - "grad_norm": 1.5018465518951416, - "learning_rate": 5.8113567839195983e-05, - "loss": 4.915, - "step": 42193 - }, - { - "epoch": 22.00469361147327, - "grad_norm": 1.4450932741165161, - "learning_rate": 5.811256281407036e-05, - "loss": 5.2797, - "step": 42194 - }, - { - "epoch": 22.00521512385919, - "grad_norm": 1.5485340356826782, - "learning_rate": 5.8111557788944726e-05, - "loss": 5.3059, - "step": 42195 - }, - { - "epoch": 22.00573663624511, - "grad_norm": 1.5409448146820068, - "learning_rate": 5.811055276381909e-05, - "loss": 5.0839, - "step": 42196 - }, - { - "epoch": 22.00625814863103, - "grad_norm": 1.5119385719299316, - "learning_rate": 5.810954773869347e-05, - "loss": 5.6625, - "step": 42197 - }, - { - "epoch": 22.00677966101695, - "grad_norm": 1.4794385433197021, - "learning_rate": 5.810854271356784e-05, - "loss": 5.6239, - "step": 42198 - }, - { - "epoch": 22.00730117340287, - "grad_norm": 1.6015331745147705, - "learning_rate": 5.8107537688442216e-05, - "loss": 5.4303, - "step": 42199 - }, - { - "epoch": 22.00782268578879, - "grad_norm": 1.5086472034454346, - "learning_rate": 5.810653266331658e-05, - "loss": 4.7496, - "step": 42200 - }, - { - "epoch": 22.008344198174708, - "grad_norm": 1.500246524810791, - "learning_rate": 5.810552763819096e-05, - "loss": 5.4023, - "step": 42201 - }, - { - "epoch": 22.008865710560627, - "grad_norm": 1.6118499040603638, - "learning_rate": 5.810452261306533e-05, - "loss": 4.6176, - "step": 42202 - }, - { - "epoch": 22.009387222946543, - "grad_norm": 1.5325058698654175, - "learning_rate": 5.810351758793971e-05, - "loss": 5.085, - "step": 42203 - }, - { - "epoch": 22.009908735332463, - "grad_norm": 1.4773192405700684, - "learning_rate": 5.810251256281407e-05, - "loss": 5.2834, - "step": 42204 - }, - { - "epoch": 22.010430247718382, - "grad_norm": 1.5281156301498413, - "learning_rate": 5.810150753768845e-05, - "loss": 4.7989, - "step": 42205 - }, - { - "epoch": 22.0109517601043, - "grad_norm": 1.4840501546859741, - "learning_rate": 5.8100502512562814e-05, - "loss": 5.3223, - "step": 42206 - }, - { - "epoch": 22.01147327249022, - "grad_norm": 1.6088289022445679, - "learning_rate": 5.809949748743719e-05, - "loss": 5.3307, - "step": 42207 - }, - { - "epoch": 22.01199478487614, - "grad_norm": 1.4916146993637085, - "learning_rate": 5.809849246231156e-05, - "loss": 5.5598, - "step": 42208 - }, - { - "epoch": 22.01251629726206, - "grad_norm": 1.659359097480774, - "learning_rate": 5.809748743718594e-05, - "loss": 5.3009, - "step": 42209 - }, - { - "epoch": 22.01303780964798, - "grad_norm": 1.6196494102478027, - "learning_rate": 5.8096482412060305e-05, - "loss": 5.0247, - "step": 42210 - }, - { - "epoch": 22.0135593220339, - "grad_norm": 1.5432448387145996, - "learning_rate": 5.809547738693467e-05, - "loss": 5.2458, - "step": 42211 - }, - { - "epoch": 22.01408083441982, - "grad_norm": 1.5713915824890137, - "learning_rate": 5.809447236180905e-05, - "loss": 5.0024, - "step": 42212 - }, - { - "epoch": 22.014602346805738, - "grad_norm": 1.5571273565292358, - "learning_rate": 5.809346733668342e-05, - "loss": 5.2121, - "step": 42213 - }, - { - "epoch": 22.015123859191657, - "grad_norm": 1.5246895551681519, - "learning_rate": 5.8092462311557796e-05, - "loss": 5.6447, - "step": 42214 - }, - { - "epoch": 22.015645371577573, - "grad_norm": 1.535440444946289, - "learning_rate": 5.809145728643216e-05, - "loss": 4.9861, - "step": 42215 - }, - { - "epoch": 22.016166883963493, - "grad_norm": 1.5461304187774658, - "learning_rate": 5.809045226130654e-05, - "loss": 5.3143, - "step": 42216 - }, - { - "epoch": 22.016688396349412, - "grad_norm": 1.6400445699691772, - "learning_rate": 5.80894472361809e-05, - "loss": 5.1612, - "step": 42217 - }, - { - "epoch": 22.01720990873533, - "grad_norm": 1.642970323562622, - "learning_rate": 5.808844221105528e-05, - "loss": 4.7964, - "step": 42218 - }, - { - "epoch": 22.01773142112125, - "grad_norm": 1.5452964305877686, - "learning_rate": 5.808743718592965e-05, - "loss": 5.1517, - "step": 42219 - }, - { - "epoch": 22.01825293350717, - "grad_norm": 1.5313557386398315, - "learning_rate": 5.808643216080403e-05, - "loss": 5.5918, - "step": 42220 - }, - { - "epoch": 22.01877444589309, - "grad_norm": 1.5130937099456787, - "learning_rate": 5.808542713567839e-05, - "loss": 5.1508, - "step": 42221 - }, - { - "epoch": 22.01929595827901, - "grad_norm": 1.5912034511566162, - "learning_rate": 5.808442211055277e-05, - "loss": 5.1843, - "step": 42222 - }, - { - "epoch": 22.01981747066493, - "grad_norm": 1.4688340425491333, - "learning_rate": 5.8083417085427135e-05, - "loss": 5.1524, - "step": 42223 - }, - { - "epoch": 22.02033898305085, - "grad_norm": 1.627386212348938, - "learning_rate": 5.8082412060301506e-05, - "loss": 5.142, - "step": 42224 - }, - { - "epoch": 22.020860495436768, - "grad_norm": 1.5849157571792603, - "learning_rate": 5.8081407035175884e-05, - "loss": 4.9926, - "step": 42225 - }, - { - "epoch": 22.021382007822687, - "grad_norm": 1.4365702867507935, - "learning_rate": 5.808040201005025e-05, - "loss": 5.5992, - "step": 42226 - }, - { - "epoch": 22.021903520208603, - "grad_norm": 1.5197229385375977, - "learning_rate": 5.8079396984924626e-05, - "loss": 5.2509, - "step": 42227 - }, - { - "epoch": 22.022425032594523, - "grad_norm": 1.437827229499817, - "learning_rate": 5.8078391959799e-05, - "loss": 5.3122, - "step": 42228 - }, - { - "epoch": 22.022946544980442, - "grad_norm": 1.5956943035125732, - "learning_rate": 5.8077386934673375e-05, - "loss": 5.0064, - "step": 42229 - }, - { - "epoch": 22.02346805736636, - "grad_norm": 1.535682201385498, - "learning_rate": 5.807638190954774e-05, - "loss": 5.527, - "step": 42230 - }, - { - "epoch": 22.02398956975228, - "grad_norm": 1.4262722730636597, - "learning_rate": 5.807537688442212e-05, - "loss": 5.5058, - "step": 42231 - }, - { - "epoch": 22.0245110821382, - "grad_norm": 1.5825973749160767, - "learning_rate": 5.807437185929648e-05, - "loss": 5.3274, - "step": 42232 - }, - { - "epoch": 22.02503259452412, - "grad_norm": 1.5673924684524536, - "learning_rate": 5.807336683417086e-05, - "loss": 5.4373, - "step": 42233 - }, - { - "epoch": 22.02555410691004, - "grad_norm": 1.5093003511428833, - "learning_rate": 5.807236180904523e-05, - "loss": 4.931, - "step": 42234 - }, - { - "epoch": 22.02607561929596, - "grad_norm": 1.5039019584655762, - "learning_rate": 5.807135678391961e-05, - "loss": 5.2337, - "step": 42235 - }, - { - "epoch": 22.02659713168188, - "grad_norm": 1.6816078424453735, - "learning_rate": 5.807035175879397e-05, - "loss": 5.3646, - "step": 42236 - }, - { - "epoch": 22.027118644067798, - "grad_norm": 1.4858564138412476, - "learning_rate": 5.806934673366834e-05, - "loss": 5.2436, - "step": 42237 - }, - { - "epoch": 22.027640156453717, - "grad_norm": 1.5412591695785522, - "learning_rate": 5.8068341708542715e-05, - "loss": 5.0854, - "step": 42238 - }, - { - "epoch": 22.028161668839633, - "grad_norm": 1.5451936721801758, - "learning_rate": 5.8067336683417086e-05, - "loss": 5.2861, - "step": 42239 - }, - { - "epoch": 22.028683181225553, - "grad_norm": 1.5802855491638184, - "learning_rate": 5.8066331658291463e-05, - "loss": 5.1191, - "step": 42240 - }, - { - "epoch": 22.029204693611472, - "grad_norm": 1.5186986923217773, - "learning_rate": 5.806532663316583e-05, - "loss": 5.4269, - "step": 42241 - }, - { - "epoch": 22.02972620599739, - "grad_norm": 1.4904438257217407, - "learning_rate": 5.8064321608040206e-05, - "loss": 5.2085, - "step": 42242 - }, - { - "epoch": 22.03024771838331, - "grad_norm": 1.6654895544052124, - "learning_rate": 5.806331658291457e-05, - "loss": 5.2702, - "step": 42243 - }, - { - "epoch": 22.03076923076923, - "grad_norm": 1.5423372983932495, - "learning_rate": 5.806231155778895e-05, - "loss": 5.1416, - "step": 42244 - }, - { - "epoch": 22.03129074315515, - "grad_norm": 1.5702077150344849, - "learning_rate": 5.806130653266332e-05, - "loss": 5.2851, - "step": 42245 - }, - { - "epoch": 22.03181225554107, - "grad_norm": 1.480294942855835, - "learning_rate": 5.8060301507537697e-05, - "loss": 5.287, - "step": 42246 - }, - { - "epoch": 22.03233376792699, - "grad_norm": 1.5242141485214233, - "learning_rate": 5.805929648241206e-05, - "loss": 5.3468, - "step": 42247 - }, - { - "epoch": 22.03285528031291, - "grad_norm": 1.4922040700912476, - "learning_rate": 5.805829145728644e-05, - "loss": 5.282, - "step": 42248 - }, - { - "epoch": 22.033376792698828, - "grad_norm": 1.5157889127731323, - "learning_rate": 5.805728643216081e-05, - "loss": 5.1929, - "step": 42249 - }, - { - "epoch": 22.033898305084747, - "grad_norm": 1.5766901969909668, - "learning_rate": 5.8056281407035174e-05, - "loss": 5.112, - "step": 42250 - }, - { - "epoch": 22.034419817470663, - "grad_norm": 1.5137531757354736, - "learning_rate": 5.805527638190955e-05, - "loss": 5.2829, - "step": 42251 - }, - { - "epoch": 22.034941329856583, - "grad_norm": 1.5672322511672974, - "learning_rate": 5.8054271356783916e-05, - "loss": 4.947, - "step": 42252 - }, - { - "epoch": 22.035462842242502, - "grad_norm": 1.6018668413162231, - "learning_rate": 5.8053266331658294e-05, - "loss": 5.4892, - "step": 42253 - }, - { - "epoch": 22.03598435462842, - "grad_norm": 1.5134884119033813, - "learning_rate": 5.8052261306532665e-05, - "loss": 5.3792, - "step": 42254 - }, - { - "epoch": 22.03650586701434, - "grad_norm": 1.6152496337890625, - "learning_rate": 5.805125628140704e-05, - "loss": 5.0169, - "step": 42255 - }, - { - "epoch": 22.03702737940026, - "grad_norm": 1.490946650505066, - "learning_rate": 5.805025125628141e-05, - "loss": 5.5264, - "step": 42256 - }, - { - "epoch": 22.03754889178618, - "grad_norm": 1.5038586854934692, - "learning_rate": 5.8049246231155785e-05, - "loss": 5.1481, - "step": 42257 - }, - { - "epoch": 22.0380704041721, - "grad_norm": 1.5374336242675781, - "learning_rate": 5.804824120603015e-05, - "loss": 5.2445, - "step": 42258 - }, - { - "epoch": 22.03859191655802, - "grad_norm": 1.5626789331436157, - "learning_rate": 5.804723618090453e-05, - "loss": 5.3559, - "step": 42259 - }, - { - "epoch": 22.03911342894394, - "grad_norm": 1.6444525718688965, - "learning_rate": 5.80462311557789e-05, - "loss": 5.5492, - "step": 42260 - }, - { - "epoch": 22.039634941329858, - "grad_norm": 1.637704610824585, - "learning_rate": 5.8045226130653276e-05, - "loss": 5.387, - "step": 42261 - }, - { - "epoch": 22.040156453715777, - "grad_norm": 1.4715656042099, - "learning_rate": 5.804422110552764e-05, - "loss": 5.633, - "step": 42262 - }, - { - "epoch": 22.040677966101693, - "grad_norm": 1.4672647714614868, - "learning_rate": 5.8043216080402004e-05, - "loss": 5.3676, - "step": 42263 - }, - { - "epoch": 22.041199478487613, - "grad_norm": 1.4865983724594116, - "learning_rate": 5.804221105527638e-05, - "loss": 4.928, - "step": 42264 - }, - { - "epoch": 22.041720990873532, - "grad_norm": 1.5633963346481323, - "learning_rate": 5.804120603015075e-05, - "loss": 5.281, - "step": 42265 - }, - { - "epoch": 22.042242503259452, - "grad_norm": 1.6120134592056274, - "learning_rate": 5.804020100502513e-05, - "loss": 5.5161, - "step": 42266 - }, - { - "epoch": 22.04276401564537, - "grad_norm": 1.4965249300003052, - "learning_rate": 5.8039195979899495e-05, - "loss": 5.0576, - "step": 42267 - }, - { - "epoch": 22.04328552803129, - "grad_norm": 1.5903080701828003, - "learning_rate": 5.803819095477387e-05, - "loss": 5.3705, - "step": 42268 - }, - { - "epoch": 22.04380704041721, - "grad_norm": 1.5209394693374634, - "learning_rate": 5.8037185929648244e-05, - "loss": 5.2117, - "step": 42269 - }, - { - "epoch": 22.04432855280313, - "grad_norm": 1.5541586875915527, - "learning_rate": 5.803618090452262e-05, - "loss": 5.4584, - "step": 42270 - }, - { - "epoch": 22.04485006518905, - "grad_norm": 1.6272467374801636, - "learning_rate": 5.8035175879396986e-05, - "loss": 4.9546, - "step": 42271 - }, - { - "epoch": 22.04537157757497, - "grad_norm": 1.5151827335357666, - "learning_rate": 5.8034170854271364e-05, - "loss": 4.986, - "step": 42272 - }, - { - "epoch": 22.045893089960888, - "grad_norm": 1.4931079149246216, - "learning_rate": 5.803316582914573e-05, - "loss": 5.475, - "step": 42273 - }, - { - "epoch": 22.046414602346807, - "grad_norm": 1.543350100517273, - "learning_rate": 5.8032160804020106e-05, - "loss": 5.2002, - "step": 42274 - }, - { - "epoch": 22.046936114732723, - "grad_norm": 1.544380784034729, - "learning_rate": 5.803115577889448e-05, - "loss": 5.4718, - "step": 42275 - }, - { - "epoch": 22.047457627118643, - "grad_norm": 1.4293986558914185, - "learning_rate": 5.803015075376884e-05, - "loss": 5.5374, - "step": 42276 - }, - { - "epoch": 22.047979139504562, - "grad_norm": 1.3892316818237305, - "learning_rate": 5.802914572864322e-05, - "loss": 5.156, - "step": 42277 - }, - { - "epoch": 22.048500651890482, - "grad_norm": 1.507064700126648, - "learning_rate": 5.8028140703517584e-05, - "loss": 5.5612, - "step": 42278 - }, - { - "epoch": 22.0490221642764, - "grad_norm": 1.5701760053634644, - "learning_rate": 5.802713567839196e-05, - "loss": 5.5168, - "step": 42279 - }, - { - "epoch": 22.04954367666232, - "grad_norm": 1.6472729444503784, - "learning_rate": 5.802613065326633e-05, - "loss": 4.9352, - "step": 42280 - }, - { - "epoch": 22.05006518904824, - "grad_norm": 1.5006089210510254, - "learning_rate": 5.802512562814071e-05, - "loss": 5.5991, - "step": 42281 - }, - { - "epoch": 22.05058670143416, - "grad_norm": 1.6083143949508667, - "learning_rate": 5.8024120603015075e-05, - "loss": 5.6905, - "step": 42282 - }, - { - "epoch": 22.05110821382008, - "grad_norm": 1.5426721572875977, - "learning_rate": 5.802311557788945e-05, - "loss": 5.4596, - "step": 42283 - }, - { - "epoch": 22.051629726206, - "grad_norm": 1.5273357629776, - "learning_rate": 5.802211055276382e-05, - "loss": 5.4297, - "step": 42284 - }, - { - "epoch": 22.052151238591918, - "grad_norm": 1.4488849639892578, - "learning_rate": 5.8021105527638195e-05, - "loss": 5.8062, - "step": 42285 - }, - { - "epoch": 22.052672750977834, - "grad_norm": 1.6520737409591675, - "learning_rate": 5.8020100502512566e-05, - "loss": 5.0421, - "step": 42286 - }, - { - "epoch": 22.053194263363753, - "grad_norm": 1.4769448041915894, - "learning_rate": 5.8019095477386944e-05, - "loss": 5.0619, - "step": 42287 - }, - { - "epoch": 22.053715775749673, - "grad_norm": 1.5856926441192627, - "learning_rate": 5.801809045226131e-05, - "loss": 5.3888, - "step": 42288 - }, - { - "epoch": 22.054237288135592, - "grad_norm": 1.8263765573501587, - "learning_rate": 5.801708542713568e-05, - "loss": 5.1289, - "step": 42289 - }, - { - "epoch": 22.054758800521512, - "grad_norm": 1.525852918624878, - "learning_rate": 5.801608040201006e-05, - "loss": 5.2605, - "step": 42290 - }, - { - "epoch": 22.05528031290743, - "grad_norm": 1.5276423692703247, - "learning_rate": 5.801507537688442e-05, - "loss": 4.9702, - "step": 42291 - }, - { - "epoch": 22.05580182529335, - "grad_norm": 1.549009084701538, - "learning_rate": 5.80140703517588e-05, - "loss": 5.2373, - "step": 42292 - }, - { - "epoch": 22.05632333767927, - "grad_norm": 1.575695514678955, - "learning_rate": 5.801306532663316e-05, - "loss": 5.1719, - "step": 42293 - }, - { - "epoch": 22.05684485006519, - "grad_norm": 1.4412477016448975, - "learning_rate": 5.801206030150754e-05, - "loss": 5.718, - "step": 42294 - }, - { - "epoch": 22.05736636245111, - "grad_norm": 1.4487104415893555, - "learning_rate": 5.801105527638191e-05, - "loss": 5.2456, - "step": 42295 - }, - { - "epoch": 22.05788787483703, - "grad_norm": 1.4973044395446777, - "learning_rate": 5.801005025125629e-05, - "loss": 4.6319, - "step": 42296 - }, - { - "epoch": 22.058409387222948, - "grad_norm": 1.4930790662765503, - "learning_rate": 5.8009045226130654e-05, - "loss": 5.3642, - "step": 42297 - }, - { - "epoch": 22.058930899608864, - "grad_norm": 1.5872652530670166, - "learning_rate": 5.800804020100503e-05, - "loss": 5.2263, - "step": 42298 - }, - { - "epoch": 22.059452411994783, - "grad_norm": 1.6044197082519531, - "learning_rate": 5.8007035175879396e-05, - "loss": 5.4276, - "step": 42299 - }, - { - "epoch": 22.059973924380703, - "grad_norm": 1.6059513092041016, - "learning_rate": 5.8006030150753774e-05, - "loss": 5.1901, - "step": 42300 - }, - { - "epoch": 22.060495436766622, - "grad_norm": 1.5164756774902344, - "learning_rate": 5.8005025125628145e-05, - "loss": 5.5163, - "step": 42301 - }, - { - "epoch": 22.061016949152542, - "grad_norm": 1.5234034061431885, - "learning_rate": 5.800402010050252e-05, - "loss": 5.4376, - "step": 42302 - }, - { - "epoch": 22.06153846153846, - "grad_norm": 1.716391921043396, - "learning_rate": 5.800301507537689e-05, - "loss": 5.3651, - "step": 42303 - }, - { - "epoch": 22.06205997392438, - "grad_norm": 1.5376743078231812, - "learning_rate": 5.800201005025125e-05, - "loss": 5.5506, - "step": 42304 - }, - { - "epoch": 22.0625814863103, - "grad_norm": 1.5157427787780762, - "learning_rate": 5.800100502512563e-05, - "loss": 5.1742, - "step": 42305 - }, - { - "epoch": 22.06310299869622, - "grad_norm": 1.6346800327301025, - "learning_rate": 5.8e-05, - "loss": 4.662, - "step": 42306 - }, - { - "epoch": 22.06362451108214, - "grad_norm": 1.6226364374160767, - "learning_rate": 5.799899497487438e-05, - "loss": 5.0782, - "step": 42307 - }, - { - "epoch": 22.06414602346806, - "grad_norm": 1.505082368850708, - "learning_rate": 5.799798994974874e-05, - "loss": 5.4002, - "step": 42308 - }, - { - "epoch": 22.064667535853978, - "grad_norm": 1.552036166191101, - "learning_rate": 5.799698492462312e-05, - "loss": 5.013, - "step": 42309 - }, - { - "epoch": 22.065189048239894, - "grad_norm": 1.4929243326187134, - "learning_rate": 5.7995979899497485e-05, - "loss": 4.7207, - "step": 42310 - }, - { - "epoch": 22.065710560625813, - "grad_norm": 1.5077801942825317, - "learning_rate": 5.799497487437186e-05, - "loss": 4.8949, - "step": 42311 - }, - { - "epoch": 22.066232073011733, - "grad_norm": 1.5349164009094238, - "learning_rate": 5.7993969849246233e-05, - "loss": 5.5173, - "step": 42312 - }, - { - "epoch": 22.066753585397652, - "grad_norm": 1.5867209434509277, - "learning_rate": 5.799296482412061e-05, - "loss": 4.9735, - "step": 42313 - }, - { - "epoch": 22.067275097783572, - "grad_norm": 1.4702298641204834, - "learning_rate": 5.7991959798994976e-05, - "loss": 5.6164, - "step": 42314 - }, - { - "epoch": 22.06779661016949, - "grad_norm": 1.6182284355163574, - "learning_rate": 5.799095477386935e-05, - "loss": 4.7971, - "step": 42315 - }, - { - "epoch": 22.06831812255541, - "grad_norm": 1.5292675495147705, - "learning_rate": 5.7989949748743724e-05, - "loss": 5.3853, - "step": 42316 - }, - { - "epoch": 22.06883963494133, - "grad_norm": 1.476227879524231, - "learning_rate": 5.798894472361809e-05, - "loss": 4.7682, - "step": 42317 - }, - { - "epoch": 22.06936114732725, - "grad_norm": 1.5176769495010376, - "learning_rate": 5.7987939698492467e-05, - "loss": 5.385, - "step": 42318 - }, - { - "epoch": 22.06988265971317, - "grad_norm": 1.5595006942749023, - "learning_rate": 5.798693467336683e-05, - "loss": 4.7278, - "step": 42319 - }, - { - "epoch": 22.07040417209909, - "grad_norm": 1.5096850395202637, - "learning_rate": 5.798592964824121e-05, - "loss": 4.5756, - "step": 42320 - }, - { - "epoch": 22.070925684485008, - "grad_norm": 1.6761773824691772, - "learning_rate": 5.798492462311558e-05, - "loss": 5.5695, - "step": 42321 - }, - { - "epoch": 22.071447196870924, - "grad_norm": 1.6459242105484009, - "learning_rate": 5.798391959798996e-05, - "loss": 4.791, - "step": 42322 - }, - { - "epoch": 22.071968709256844, - "grad_norm": 1.6878302097320557, - "learning_rate": 5.798291457286432e-05, - "loss": 5.3555, - "step": 42323 - }, - { - "epoch": 22.072490221642763, - "grad_norm": 1.4874836206436157, - "learning_rate": 5.79819095477387e-05, - "loss": 5.4683, - "step": 42324 - }, - { - "epoch": 22.073011734028682, - "grad_norm": 1.672272801399231, - "learning_rate": 5.7980904522613064e-05, - "loss": 5.577, - "step": 42325 - }, - { - "epoch": 22.073533246414602, - "grad_norm": 1.6163147687911987, - "learning_rate": 5.797989949748744e-05, - "loss": 5.3418, - "step": 42326 - }, - { - "epoch": 22.07405475880052, - "grad_norm": 1.4997661113739014, - "learning_rate": 5.797889447236181e-05, - "loss": 5.092, - "step": 42327 - }, - { - "epoch": 22.07457627118644, - "grad_norm": 1.5598405599594116, - "learning_rate": 5.797788944723619e-05, - "loss": 5.1498, - "step": 42328 - }, - { - "epoch": 22.07509778357236, - "grad_norm": 1.5226545333862305, - "learning_rate": 5.7976884422110555e-05, - "loss": 5.3728, - "step": 42329 - }, - { - "epoch": 22.07561929595828, - "grad_norm": 1.5446579456329346, - "learning_rate": 5.797587939698492e-05, - "loss": 5.27, - "step": 42330 - }, - { - "epoch": 22.0761408083442, - "grad_norm": 1.5285747051239014, - "learning_rate": 5.79748743718593e-05, - "loss": 5.4291, - "step": 42331 - }, - { - "epoch": 22.07666232073012, - "grad_norm": 1.5519615411758423, - "learning_rate": 5.797386934673367e-05, - "loss": 5.5362, - "step": 42332 - }, - { - "epoch": 22.077183833116038, - "grad_norm": 1.5340614318847656, - "learning_rate": 5.7972864321608046e-05, - "loss": 5.1594, - "step": 42333 - }, - { - "epoch": 22.077705345501954, - "grad_norm": 1.4453030824661255, - "learning_rate": 5.797185929648241e-05, - "loss": 5.1403, - "step": 42334 - }, - { - "epoch": 22.078226857887874, - "grad_norm": 1.57541024684906, - "learning_rate": 5.797085427135679e-05, - "loss": 4.6054, - "step": 42335 - }, - { - "epoch": 22.078748370273793, - "grad_norm": 1.6429253816604614, - "learning_rate": 5.796984924623116e-05, - "loss": 5.4739, - "step": 42336 - }, - { - "epoch": 22.079269882659712, - "grad_norm": 1.4531382322311401, - "learning_rate": 5.796884422110554e-05, - "loss": 5.2818, - "step": 42337 - }, - { - "epoch": 22.079791395045632, - "grad_norm": 1.5491150617599487, - "learning_rate": 5.79678391959799e-05, - "loss": 5.4018, - "step": 42338 - }, - { - "epoch": 22.08031290743155, - "grad_norm": 1.4766401052474976, - "learning_rate": 5.796683417085428e-05, - "loss": 5.4083, - "step": 42339 - }, - { - "epoch": 22.08083441981747, - "grad_norm": 1.4444125890731812, - "learning_rate": 5.796582914572864e-05, - "loss": 5.4281, - "step": 42340 - }, - { - "epoch": 22.08135593220339, - "grad_norm": 1.5357054471969604, - "learning_rate": 5.796482412060302e-05, - "loss": 5.4772, - "step": 42341 - }, - { - "epoch": 22.08187744458931, - "grad_norm": 1.655051350593567, - "learning_rate": 5.796381909547739e-05, - "loss": 5.0499, - "step": 42342 - }, - { - "epoch": 22.08239895697523, - "grad_norm": 1.5851609706878662, - "learning_rate": 5.7962814070351756e-05, - "loss": 5.1557, - "step": 42343 - }, - { - "epoch": 22.08292046936115, - "grad_norm": 1.523876428604126, - "learning_rate": 5.7961809045226134e-05, - "loss": 5.3931, - "step": 42344 - }, - { - "epoch": 22.083441981747068, - "grad_norm": 1.5812326669692993, - "learning_rate": 5.79608040201005e-05, - "loss": 4.8676, - "step": 42345 - }, - { - "epoch": 22.083963494132984, - "grad_norm": 1.4729214906692505, - "learning_rate": 5.7959798994974876e-05, - "loss": 5.6163, - "step": 42346 - }, - { - "epoch": 22.084485006518904, - "grad_norm": 1.4948015213012695, - "learning_rate": 5.795879396984925e-05, - "loss": 4.8288, - "step": 42347 - }, - { - "epoch": 22.085006518904823, - "grad_norm": 1.4991923570632935, - "learning_rate": 5.7957788944723625e-05, - "loss": 5.0738, - "step": 42348 - }, - { - "epoch": 22.085528031290742, - "grad_norm": 1.5286527872085571, - "learning_rate": 5.795678391959799e-05, - "loss": 5.3468, - "step": 42349 - }, - { - "epoch": 22.086049543676662, - "grad_norm": 1.5830589532852173, - "learning_rate": 5.795577889447237e-05, - "loss": 4.9178, - "step": 42350 - }, - { - "epoch": 22.08657105606258, - "grad_norm": 1.6079641580581665, - "learning_rate": 5.795477386934673e-05, - "loss": 5.4443, - "step": 42351 - }, - { - "epoch": 22.0870925684485, - "grad_norm": 1.6402677297592163, - "learning_rate": 5.795376884422111e-05, - "loss": 5.287, - "step": 42352 - }, - { - "epoch": 22.08761408083442, - "grad_norm": 1.485399603843689, - "learning_rate": 5.795276381909548e-05, - "loss": 5.5949, - "step": 42353 - }, - { - "epoch": 22.08813559322034, - "grad_norm": 1.5588369369506836, - "learning_rate": 5.795175879396986e-05, - "loss": 4.6217, - "step": 42354 - }, - { - "epoch": 22.08865710560626, - "grad_norm": 1.5767165422439575, - "learning_rate": 5.795075376884422e-05, - "loss": 4.9396, - "step": 42355 - }, - { - "epoch": 22.08917861799218, - "grad_norm": 1.529052734375, - "learning_rate": 5.7949748743718594e-05, - "loss": 5.5324, - "step": 42356 - }, - { - "epoch": 22.089700130378098, - "grad_norm": 1.5543524026870728, - "learning_rate": 5.794874371859297e-05, - "loss": 5.13, - "step": 42357 - }, - { - "epoch": 22.090221642764014, - "grad_norm": 1.684630036354065, - "learning_rate": 5.7947738693467336e-05, - "loss": 4.9293, - "step": 42358 - }, - { - "epoch": 22.090743155149934, - "grad_norm": 1.4738095998764038, - "learning_rate": 5.7946733668341714e-05, - "loss": 5.0208, - "step": 42359 - }, - { - "epoch": 22.091264667535853, - "grad_norm": 1.5739991664886475, - "learning_rate": 5.794572864321608e-05, - "loss": 5.2874, - "step": 42360 - }, - { - "epoch": 22.091786179921773, - "grad_norm": 1.4648410081863403, - "learning_rate": 5.7944723618090456e-05, - "loss": 5.3737, - "step": 42361 - }, - { - "epoch": 22.092307692307692, - "grad_norm": 1.4977666139602661, - "learning_rate": 5.794371859296483e-05, - "loss": 5.5731, - "step": 42362 - }, - { - "epoch": 22.09282920469361, - "grad_norm": 1.5609028339385986, - "learning_rate": 5.7942713567839204e-05, - "loss": 5.3642, - "step": 42363 - }, - { - "epoch": 22.09335071707953, - "grad_norm": 1.626520037651062, - "learning_rate": 5.794170854271357e-05, - "loss": 4.8408, - "step": 42364 - }, - { - "epoch": 22.09387222946545, - "grad_norm": 1.6557269096374512, - "learning_rate": 5.7940703517587947e-05, - "loss": 5.3467, - "step": 42365 - }, - { - "epoch": 22.09439374185137, - "grad_norm": 1.462580680847168, - "learning_rate": 5.793969849246231e-05, - "loss": 5.4928, - "step": 42366 - }, - { - "epoch": 22.09491525423729, - "grad_norm": 1.534935474395752, - "learning_rate": 5.793869346733669e-05, - "loss": 5.4451, - "step": 42367 - }, - { - "epoch": 22.09543676662321, - "grad_norm": 1.5024702548980713, - "learning_rate": 5.793768844221106e-05, - "loss": 5.3948, - "step": 42368 - }, - { - "epoch": 22.09595827900913, - "grad_norm": 1.5085101127624512, - "learning_rate": 5.7936683417085424e-05, - "loss": 5.4895, - "step": 42369 - }, - { - "epoch": 22.096479791395044, - "grad_norm": 1.580207109451294, - "learning_rate": 5.79356783919598e-05, - "loss": 5.1627, - "step": 42370 - }, - { - "epoch": 22.097001303780964, - "grad_norm": 1.4655929803848267, - "learning_rate": 5.7934673366834166e-05, - "loss": 5.6526, - "step": 42371 - }, - { - "epoch": 22.097522816166883, - "grad_norm": 1.5333974361419678, - "learning_rate": 5.7933668341708544e-05, - "loss": 5.0227, - "step": 42372 - }, - { - "epoch": 22.098044328552803, - "grad_norm": 1.520455002784729, - "learning_rate": 5.7932663316582915e-05, - "loss": 5.4336, - "step": 42373 - }, - { - "epoch": 22.098565840938722, - "grad_norm": 1.5777113437652588, - "learning_rate": 5.793165829145729e-05, - "loss": 5.3191, - "step": 42374 - }, - { - "epoch": 22.09908735332464, - "grad_norm": 1.527712106704712, - "learning_rate": 5.793065326633166e-05, - "loss": 5.528, - "step": 42375 - }, - { - "epoch": 22.09960886571056, - "grad_norm": 1.4661518335342407, - "learning_rate": 5.7929648241206035e-05, - "loss": 5.3659, - "step": 42376 - }, - { - "epoch": 22.10013037809648, - "grad_norm": 1.5879225730895996, - "learning_rate": 5.7928643216080406e-05, - "loss": 5.1932, - "step": 42377 - }, - { - "epoch": 22.1006518904824, - "grad_norm": 1.7491592168807983, - "learning_rate": 5.7927638190954784e-05, - "loss": 4.8636, - "step": 42378 - }, - { - "epoch": 22.10117340286832, - "grad_norm": 1.456304907798767, - "learning_rate": 5.792663316582915e-05, - "loss": 5.1481, - "step": 42379 - }, - { - "epoch": 22.10169491525424, - "grad_norm": 1.5472043752670288, - "learning_rate": 5.7925628140703526e-05, - "loss": 5.1285, - "step": 42380 - }, - { - "epoch": 22.102216427640155, - "grad_norm": 1.5230553150177002, - "learning_rate": 5.792462311557789e-05, - "loss": 5.2753, - "step": 42381 - }, - { - "epoch": 22.102737940026074, - "grad_norm": 1.587855339050293, - "learning_rate": 5.792361809045227e-05, - "loss": 5.0918, - "step": 42382 - }, - { - "epoch": 22.103259452411994, - "grad_norm": 1.5250290632247925, - "learning_rate": 5.792261306532664e-05, - "loss": 5.3373, - "step": 42383 - }, - { - "epoch": 22.103780964797913, - "grad_norm": 1.7338262796401978, - "learning_rate": 5.7921608040201e-05, - "loss": 4.9569, - "step": 42384 - }, - { - "epoch": 22.104302477183833, - "grad_norm": 1.4724820852279663, - "learning_rate": 5.792060301507538e-05, - "loss": 5.3365, - "step": 42385 - }, - { - "epoch": 22.104823989569752, - "grad_norm": 1.5225813388824463, - "learning_rate": 5.7919597989949745e-05, - "loss": 5.0477, - "step": 42386 - }, - { - "epoch": 22.10534550195567, - "grad_norm": 1.4993162155151367, - "learning_rate": 5.791859296482412e-05, - "loss": 5.3746, - "step": 42387 - }, - { - "epoch": 22.10586701434159, - "grad_norm": 1.568951964378357, - "learning_rate": 5.7917587939698494e-05, - "loss": 5.5184, - "step": 42388 - }, - { - "epoch": 22.10638852672751, - "grad_norm": 1.534873366355896, - "learning_rate": 5.791658291457287e-05, - "loss": 5.3923, - "step": 42389 - }, - { - "epoch": 22.10691003911343, - "grad_norm": 1.561029076576233, - "learning_rate": 5.7915577889447236e-05, - "loss": 4.8038, - "step": 42390 - }, - { - "epoch": 22.10743155149935, - "grad_norm": 1.5880491733551025, - "learning_rate": 5.7914572864321614e-05, - "loss": 5.0686, - "step": 42391 - }, - { - "epoch": 22.10795306388527, - "grad_norm": 1.5562140941619873, - "learning_rate": 5.791356783919598e-05, - "loss": 5.1571, - "step": 42392 - }, - { - "epoch": 22.108474576271185, - "grad_norm": 1.558252215385437, - "learning_rate": 5.7912562814070356e-05, - "loss": 5.3734, - "step": 42393 - }, - { - "epoch": 22.108996088657104, - "grad_norm": 1.6999043226242065, - "learning_rate": 5.791155778894473e-05, - "loss": 5.3371, - "step": 42394 - }, - { - "epoch": 22.109517601043024, - "grad_norm": 1.5452244281768799, - "learning_rate": 5.7910552763819105e-05, - "loss": 5.3142, - "step": 42395 - }, - { - "epoch": 22.110039113428943, - "grad_norm": 1.4969416856765747, - "learning_rate": 5.790954773869347e-05, - "loss": 5.5275, - "step": 42396 - }, - { - "epoch": 22.110560625814863, - "grad_norm": 1.7184135913848877, - "learning_rate": 5.7908542713567834e-05, - "loss": 5.2162, - "step": 42397 - }, - { - "epoch": 22.111082138200782, - "grad_norm": 1.5630230903625488, - "learning_rate": 5.790753768844221e-05, - "loss": 5.4283, - "step": 42398 - }, - { - "epoch": 22.1116036505867, - "grad_norm": 1.5013071298599243, - "learning_rate": 5.790653266331658e-05, - "loss": 5.5491, - "step": 42399 - }, - { - "epoch": 22.11212516297262, - "grad_norm": 1.5905548334121704, - "learning_rate": 5.790552763819096e-05, - "loss": 5.1892, - "step": 42400 - }, - { - "epoch": 22.11264667535854, - "grad_norm": 1.5060958862304688, - "learning_rate": 5.7904522613065325e-05, - "loss": 5.6362, - "step": 42401 - }, - { - "epoch": 22.11316818774446, - "grad_norm": 1.455564022064209, - "learning_rate": 5.79035175879397e-05, - "loss": 5.1936, - "step": 42402 - }, - { - "epoch": 22.11368970013038, - "grad_norm": 1.4664674997329712, - "learning_rate": 5.7902512562814074e-05, - "loss": 5.5896, - "step": 42403 - }, - { - "epoch": 22.1142112125163, - "grad_norm": 1.4814037084579468, - "learning_rate": 5.790150753768845e-05, - "loss": 5.2531, - "step": 42404 - }, - { - "epoch": 22.114732724902215, - "grad_norm": 1.4196617603302002, - "learning_rate": 5.7900502512562816e-05, - "loss": 5.4499, - "step": 42405 - }, - { - "epoch": 22.115254237288134, - "grad_norm": 1.360715389251709, - "learning_rate": 5.7899497487437194e-05, - "loss": 4.9149, - "step": 42406 - }, - { - "epoch": 22.115775749674054, - "grad_norm": 1.5204967260360718, - "learning_rate": 5.789849246231156e-05, - "loss": 5.1063, - "step": 42407 - }, - { - "epoch": 22.116297262059973, - "grad_norm": 1.4832125902175903, - "learning_rate": 5.7897487437185936e-05, - "loss": 5.2975, - "step": 42408 - }, - { - "epoch": 22.116818774445893, - "grad_norm": 1.7650363445281982, - "learning_rate": 5.789648241206031e-05, - "loss": 4.9583, - "step": 42409 - }, - { - "epoch": 22.117340286831812, - "grad_norm": 1.5806444883346558, - "learning_rate": 5.789547738693467e-05, - "loss": 5.29, - "step": 42410 - }, - { - "epoch": 22.11786179921773, - "grad_norm": 1.5122441053390503, - "learning_rate": 5.789447236180905e-05, - "loss": 5.4857, - "step": 42411 - }, - { - "epoch": 22.11838331160365, - "grad_norm": 1.6182904243469238, - "learning_rate": 5.789346733668341e-05, - "loss": 4.6349, - "step": 42412 - }, - { - "epoch": 22.11890482398957, - "grad_norm": 1.5898643732070923, - "learning_rate": 5.789246231155779e-05, - "loss": 4.9678, - "step": 42413 - }, - { - "epoch": 22.11942633637549, - "grad_norm": 1.505020022392273, - "learning_rate": 5.789145728643216e-05, - "loss": 5.2337, - "step": 42414 - }, - { - "epoch": 22.11994784876141, - "grad_norm": 1.5221680402755737, - "learning_rate": 5.789045226130654e-05, - "loss": 5.1032, - "step": 42415 - }, - { - "epoch": 22.12046936114733, - "grad_norm": 1.5927999019622803, - "learning_rate": 5.7889447236180904e-05, - "loss": 4.9481, - "step": 42416 - }, - { - "epoch": 22.120990873533245, - "grad_norm": 1.4744617938995361, - "learning_rate": 5.788844221105528e-05, - "loss": 5.5805, - "step": 42417 - }, - { - "epoch": 22.121512385919164, - "grad_norm": 1.4752416610717773, - "learning_rate": 5.7887437185929646e-05, - "loss": 5.1436, - "step": 42418 - }, - { - "epoch": 22.122033898305084, - "grad_norm": 1.4079784154891968, - "learning_rate": 5.7886432160804024e-05, - "loss": 5.5562, - "step": 42419 - }, - { - "epoch": 22.122555410691003, - "grad_norm": 1.5233315229415894, - "learning_rate": 5.7885427135678395e-05, - "loss": 5.5424, - "step": 42420 - }, - { - "epoch": 22.123076923076923, - "grad_norm": 1.612548828125, - "learning_rate": 5.788442211055277e-05, - "loss": 4.9846, - "step": 42421 - }, - { - "epoch": 22.123598435462842, - "grad_norm": 1.4921735525131226, - "learning_rate": 5.788341708542714e-05, - "loss": 5.2717, - "step": 42422 - }, - { - "epoch": 22.12411994784876, - "grad_norm": 1.5684130191802979, - "learning_rate": 5.788241206030151e-05, - "loss": 5.1035, - "step": 42423 - }, - { - "epoch": 22.12464146023468, - "grad_norm": 1.5260653495788574, - "learning_rate": 5.7881407035175886e-05, - "loss": 5.1471, - "step": 42424 - }, - { - "epoch": 22.1251629726206, - "grad_norm": 1.4931695461273193, - "learning_rate": 5.788040201005025e-05, - "loss": 5.3753, - "step": 42425 - }, - { - "epoch": 22.12568448500652, - "grad_norm": 1.5880813598632812, - "learning_rate": 5.787939698492463e-05, - "loss": 5.3603, - "step": 42426 - }, - { - "epoch": 22.12620599739244, - "grad_norm": 1.4524049758911133, - "learning_rate": 5.787839195979899e-05, - "loss": 5.2705, - "step": 42427 - }, - { - "epoch": 22.12672750977836, - "grad_norm": 1.5040369033813477, - "learning_rate": 5.787738693467337e-05, - "loss": 5.0453, - "step": 42428 - }, - { - "epoch": 22.127249022164275, - "grad_norm": 1.5458515882492065, - "learning_rate": 5.787638190954774e-05, - "loss": 5.2505, - "step": 42429 - }, - { - "epoch": 22.127770534550194, - "grad_norm": 1.498842477798462, - "learning_rate": 5.787537688442212e-05, - "loss": 5.5789, - "step": 42430 - }, - { - "epoch": 22.128292046936114, - "grad_norm": 1.5059565305709839, - "learning_rate": 5.7874371859296483e-05, - "loss": 5.3404, - "step": 42431 - }, - { - "epoch": 22.128813559322033, - "grad_norm": 1.5272843837738037, - "learning_rate": 5.787336683417086e-05, - "loss": 5.4054, - "step": 42432 - }, - { - "epoch": 22.129335071707953, - "grad_norm": 1.566743016242981, - "learning_rate": 5.7872361809045226e-05, - "loss": 5.2445, - "step": 42433 - }, - { - "epoch": 22.129856584093872, - "grad_norm": 1.5728050470352173, - "learning_rate": 5.78713567839196e-05, - "loss": 5.0965, - "step": 42434 - }, - { - "epoch": 22.13037809647979, - "grad_norm": 1.5137262344360352, - "learning_rate": 5.7870351758793974e-05, - "loss": 5.4532, - "step": 42435 - }, - { - "epoch": 22.13089960886571, - "grad_norm": 1.5174181461334229, - "learning_rate": 5.786934673366834e-05, - "loss": 5.4427, - "step": 42436 - }, - { - "epoch": 22.13142112125163, - "grad_norm": 1.4334295988082886, - "learning_rate": 5.7868341708542717e-05, - "loss": 5.9383, - "step": 42437 - }, - { - "epoch": 22.13194263363755, - "grad_norm": 1.5448453426361084, - "learning_rate": 5.786733668341708e-05, - "loss": 5.3783, - "step": 42438 - }, - { - "epoch": 22.13246414602347, - "grad_norm": 1.5559437274932861, - "learning_rate": 5.786633165829146e-05, - "loss": 5.0697, - "step": 42439 - }, - { - "epoch": 22.13298565840939, - "grad_norm": 1.5171972513198853, - "learning_rate": 5.786532663316583e-05, - "loss": 5.3929, - "step": 42440 - }, - { - "epoch": 22.133507170795305, - "grad_norm": 1.522678256034851, - "learning_rate": 5.786432160804021e-05, - "loss": 5.4585, - "step": 42441 - }, - { - "epoch": 22.134028683181224, - "grad_norm": 1.4943335056304932, - "learning_rate": 5.786331658291457e-05, - "loss": 5.5293, - "step": 42442 - }, - { - "epoch": 22.134550195567144, - "grad_norm": 1.5469905138015747, - "learning_rate": 5.786231155778895e-05, - "loss": 5.137, - "step": 42443 - }, - { - "epoch": 22.135071707953063, - "grad_norm": 1.6245616674423218, - "learning_rate": 5.786130653266332e-05, - "loss": 5.1763, - "step": 42444 - }, - { - "epoch": 22.135593220338983, - "grad_norm": 1.5286318063735962, - "learning_rate": 5.78603015075377e-05, - "loss": 4.6862, - "step": 42445 - }, - { - "epoch": 22.136114732724902, - "grad_norm": 1.614904761314392, - "learning_rate": 5.785929648241206e-05, - "loss": 5.5257, - "step": 42446 - }, - { - "epoch": 22.13663624511082, - "grad_norm": 1.5107609033584595, - "learning_rate": 5.785829145728644e-05, - "loss": 5.3874, - "step": 42447 - }, - { - "epoch": 22.13715775749674, - "grad_norm": 1.6361048221588135, - "learning_rate": 5.7857286432160805e-05, - "loss": 5.2898, - "step": 42448 - }, - { - "epoch": 22.13767926988266, - "grad_norm": 1.6439523696899414, - "learning_rate": 5.7856281407035176e-05, - "loss": 4.8607, - "step": 42449 - }, - { - "epoch": 22.13820078226858, - "grad_norm": 1.4825921058654785, - "learning_rate": 5.7855276381909554e-05, - "loss": 5.1329, - "step": 42450 - }, - { - "epoch": 22.1387222946545, - "grad_norm": 1.5446991920471191, - "learning_rate": 5.785427135678392e-05, - "loss": 5.2465, - "step": 42451 - }, - { - "epoch": 22.13924380704042, - "grad_norm": 1.4345476627349854, - "learning_rate": 5.7853266331658296e-05, - "loss": 4.8215, - "step": 42452 - }, - { - "epoch": 22.139765319426335, - "grad_norm": 1.669278860092163, - "learning_rate": 5.785226130653266e-05, - "loss": 4.9634, - "step": 42453 - }, - { - "epoch": 22.140286831812254, - "grad_norm": 1.4580302238464355, - "learning_rate": 5.785125628140704e-05, - "loss": 5.333, - "step": 42454 - }, - { - "epoch": 22.140808344198174, - "grad_norm": 1.5962154865264893, - "learning_rate": 5.785025125628141e-05, - "loss": 5.0291, - "step": 42455 - }, - { - "epoch": 22.141329856584093, - "grad_norm": 1.5726583003997803, - "learning_rate": 5.784924623115579e-05, - "loss": 5.2439, - "step": 42456 - }, - { - "epoch": 22.141851368970013, - "grad_norm": 1.5028910636901855, - "learning_rate": 5.784824120603015e-05, - "loss": 5.4363, - "step": 42457 - }, - { - "epoch": 22.142372881355932, - "grad_norm": 1.581496238708496, - "learning_rate": 5.784723618090453e-05, - "loss": 5.0561, - "step": 42458 - }, - { - "epoch": 22.14289439374185, - "grad_norm": 1.6305019855499268, - "learning_rate": 5.784623115577889e-05, - "loss": 5.0364, - "step": 42459 - }, - { - "epoch": 22.14341590612777, - "grad_norm": 1.952868103981018, - "learning_rate": 5.784522613065327e-05, - "loss": 4.9313, - "step": 42460 - }, - { - "epoch": 22.14393741851369, - "grad_norm": 1.5616897344589233, - "learning_rate": 5.784422110552764e-05, - "loss": 5.5213, - "step": 42461 - }, - { - "epoch": 22.14445893089961, - "grad_norm": 1.5195074081420898, - "learning_rate": 5.7843216080402006e-05, - "loss": 5.3328, - "step": 42462 - }, - { - "epoch": 22.14498044328553, - "grad_norm": 1.4279369115829468, - "learning_rate": 5.7842211055276384e-05, - "loss": 5.736, - "step": 42463 - }, - { - "epoch": 22.14550195567145, - "grad_norm": 1.4495587348937988, - "learning_rate": 5.7841206030150755e-05, - "loss": 4.7346, - "step": 42464 - }, - { - "epoch": 22.146023468057365, - "grad_norm": 1.4699784517288208, - "learning_rate": 5.784020100502513e-05, - "loss": 5.0945, - "step": 42465 - }, - { - "epoch": 22.146544980443284, - "grad_norm": 1.706677794456482, - "learning_rate": 5.78391959798995e-05, - "loss": 4.8237, - "step": 42466 - }, - { - "epoch": 22.147066492829204, - "grad_norm": 1.6133407354354858, - "learning_rate": 5.7838190954773875e-05, - "loss": 4.9414, - "step": 42467 - }, - { - "epoch": 22.147588005215123, - "grad_norm": 1.4920588731765747, - "learning_rate": 5.783718592964824e-05, - "loss": 5.4182, - "step": 42468 - }, - { - "epoch": 22.148109517601043, - "grad_norm": 1.6906228065490723, - "learning_rate": 5.783618090452262e-05, - "loss": 5.1222, - "step": 42469 - }, - { - "epoch": 22.148631029986962, - "grad_norm": 1.5719300508499146, - "learning_rate": 5.783517587939699e-05, - "loss": 5.4841, - "step": 42470 - }, - { - "epoch": 22.14915254237288, - "grad_norm": 1.4884006977081299, - "learning_rate": 5.7834170854271366e-05, - "loss": 5.3088, - "step": 42471 - }, - { - "epoch": 22.1496740547588, - "grad_norm": 1.5142745971679688, - "learning_rate": 5.783316582914573e-05, - "loss": 4.8832, - "step": 42472 - }, - { - "epoch": 22.15019556714472, - "grad_norm": 1.527806043624878, - "learning_rate": 5.783216080402011e-05, - "loss": 5.2662, - "step": 42473 - }, - { - "epoch": 22.15071707953064, - "grad_norm": 1.5068389177322388, - "learning_rate": 5.783115577889447e-05, - "loss": 5.0321, - "step": 42474 - }, - { - "epoch": 22.15123859191656, - "grad_norm": 1.7204368114471436, - "learning_rate": 5.783015075376885e-05, - "loss": 4.2287, - "step": 42475 - }, - { - "epoch": 22.151760104302475, - "grad_norm": 1.623386025428772, - "learning_rate": 5.782914572864322e-05, - "loss": 5.3817, - "step": 42476 - }, - { - "epoch": 22.152281616688395, - "grad_norm": 1.5028835535049438, - "learning_rate": 5.7828140703517586e-05, - "loss": 5.2626, - "step": 42477 - }, - { - "epoch": 22.152803129074314, - "grad_norm": 1.5511829853057861, - "learning_rate": 5.7827135678391964e-05, - "loss": 5.147, - "step": 42478 - }, - { - "epoch": 22.153324641460234, - "grad_norm": 1.5281428098678589, - "learning_rate": 5.782613065326633e-05, - "loss": 5.6311, - "step": 42479 - }, - { - "epoch": 22.153846153846153, - "grad_norm": 1.6268402338027954, - "learning_rate": 5.7825125628140706e-05, - "loss": 5.0836, - "step": 42480 - }, - { - "epoch": 22.154367666232073, - "grad_norm": 1.614466667175293, - "learning_rate": 5.782412060301508e-05, - "loss": 5.2388, - "step": 42481 - }, - { - "epoch": 22.154889178617992, - "grad_norm": 1.6218923330307007, - "learning_rate": 5.7823115577889455e-05, - "loss": 5.4546, - "step": 42482 - }, - { - "epoch": 22.15541069100391, - "grad_norm": 1.5170239210128784, - "learning_rate": 5.782211055276382e-05, - "loss": 5.5063, - "step": 42483 - }, - { - "epoch": 22.15593220338983, - "grad_norm": 1.614268183708191, - "learning_rate": 5.78211055276382e-05, - "loss": 4.7181, - "step": 42484 - }, - { - "epoch": 22.15645371577575, - "grad_norm": 1.5421935319900513, - "learning_rate": 5.782010050251256e-05, - "loss": 4.39, - "step": 42485 - }, - { - "epoch": 22.15697522816167, - "grad_norm": 1.5646377801895142, - "learning_rate": 5.781909547738694e-05, - "loss": 4.9358, - "step": 42486 - }, - { - "epoch": 22.15749674054759, - "grad_norm": 1.472159504890442, - "learning_rate": 5.781809045226131e-05, - "loss": 5.5702, - "step": 42487 - }, - { - "epoch": 22.158018252933505, - "grad_norm": 1.6557201147079468, - "learning_rate": 5.781708542713569e-05, - "loss": 5.433, - "step": 42488 - }, - { - "epoch": 22.158539765319425, - "grad_norm": 1.5124895572662354, - "learning_rate": 5.781608040201005e-05, - "loss": 5.2726, - "step": 42489 - }, - { - "epoch": 22.159061277705344, - "grad_norm": 1.5945518016815186, - "learning_rate": 5.781507537688442e-05, - "loss": 5.4863, - "step": 42490 - }, - { - "epoch": 22.159582790091264, - "grad_norm": 1.6163686513900757, - "learning_rate": 5.78140703517588e-05, - "loss": 4.9146, - "step": 42491 - }, - { - "epoch": 22.160104302477183, - "grad_norm": 1.4189941883087158, - "learning_rate": 5.7813065326633165e-05, - "loss": 5.5887, - "step": 42492 - }, - { - "epoch": 22.160625814863103, - "grad_norm": 1.5492609739303589, - "learning_rate": 5.781206030150754e-05, - "loss": 5.2678, - "step": 42493 - }, - { - "epoch": 22.161147327249022, - "grad_norm": 1.5212125778198242, - "learning_rate": 5.781105527638191e-05, - "loss": 5.5067, - "step": 42494 - }, - { - "epoch": 22.16166883963494, - "grad_norm": 1.6353012323379517, - "learning_rate": 5.7810050251256285e-05, - "loss": 5.1052, - "step": 42495 - }, - { - "epoch": 22.16219035202086, - "grad_norm": 1.5542573928833008, - "learning_rate": 5.7809045226130656e-05, - "loss": 4.6794, - "step": 42496 - }, - { - "epoch": 22.16271186440678, - "grad_norm": 1.530464768409729, - "learning_rate": 5.7808040201005034e-05, - "loss": 5.2556, - "step": 42497 - }, - { - "epoch": 22.1632333767927, - "grad_norm": 1.4862868785858154, - "learning_rate": 5.78070351758794e-05, - "loss": 5.3835, - "step": 42498 - }, - { - "epoch": 22.16375488917862, - "grad_norm": 1.6068871021270752, - "learning_rate": 5.7806030150753776e-05, - "loss": 5.1598, - "step": 42499 - }, - { - "epoch": 22.164276401564535, - "grad_norm": 1.404327154159546, - "learning_rate": 5.780502512562814e-05, - "loss": 5.7232, - "step": 42500 - }, - { - "epoch": 22.164797913950455, - "grad_norm": 1.6383136510849, - "learning_rate": 5.780402010050252e-05, - "loss": 4.8621, - "step": 42501 - }, - { - "epoch": 22.165319426336374, - "grad_norm": 1.65328049659729, - "learning_rate": 5.780301507537689e-05, - "loss": 4.988, - "step": 42502 - }, - { - "epoch": 22.165840938722294, - "grad_norm": 1.4483566284179688, - "learning_rate": 5.780201005025125e-05, - "loss": 5.5363, - "step": 42503 - }, - { - "epoch": 22.166362451108213, - "grad_norm": 1.7229372262954712, - "learning_rate": 5.780100502512563e-05, - "loss": 4.9346, - "step": 42504 - }, - { - "epoch": 22.166883963494133, - "grad_norm": 1.5034925937652588, - "learning_rate": 5.7799999999999995e-05, - "loss": 4.5823, - "step": 42505 - }, - { - "epoch": 22.167405475880052, - "grad_norm": 1.5120712518692017, - "learning_rate": 5.779899497487437e-05, - "loss": 5.554, - "step": 42506 - }, - { - "epoch": 22.16792698826597, - "grad_norm": 1.5640324354171753, - "learning_rate": 5.7797989949748744e-05, - "loss": 5.0052, - "step": 42507 - }, - { - "epoch": 22.16844850065189, - "grad_norm": 1.5640054941177368, - "learning_rate": 5.779698492462312e-05, - "loss": 5.2134, - "step": 42508 - }, - { - "epoch": 22.16897001303781, - "grad_norm": 1.4938898086547852, - "learning_rate": 5.7795979899497486e-05, - "loss": 5.2013, - "step": 42509 - }, - { - "epoch": 22.16949152542373, - "grad_norm": 1.4905893802642822, - "learning_rate": 5.7794974874371864e-05, - "loss": 5.0494, - "step": 42510 - }, - { - "epoch": 22.17001303780965, - "grad_norm": 1.5026718378067017, - "learning_rate": 5.7793969849246235e-05, - "loss": 5.2788, - "step": 42511 - }, - { - "epoch": 22.170534550195566, - "grad_norm": 1.4981063604354858, - "learning_rate": 5.779296482412061e-05, - "loss": 4.6325, - "step": 42512 - }, - { - "epoch": 22.171056062581485, - "grad_norm": 1.6805537939071655, - "learning_rate": 5.779195979899498e-05, - "loss": 5.4506, - "step": 42513 - }, - { - "epoch": 22.171577574967404, - "grad_norm": 1.5829294919967651, - "learning_rate": 5.7790954773869355e-05, - "loss": 5.3093, - "step": 42514 - }, - { - "epoch": 22.172099087353324, - "grad_norm": 1.5502252578735352, - "learning_rate": 5.778994974874372e-05, - "loss": 5.1075, - "step": 42515 - }, - { - "epoch": 22.172620599739243, - "grad_norm": 1.6039518117904663, - "learning_rate": 5.778894472361809e-05, - "loss": 4.8972, - "step": 42516 - }, - { - "epoch": 22.173142112125163, - "grad_norm": 1.592633605003357, - "learning_rate": 5.778793969849247e-05, - "loss": 5.474, - "step": 42517 - }, - { - "epoch": 22.173663624511082, - "grad_norm": 1.5572813749313354, - "learning_rate": 5.778693467336683e-05, - "loss": 5.1167, - "step": 42518 - }, - { - "epoch": 22.174185136897, - "grad_norm": 1.5340052843093872, - "learning_rate": 5.778592964824121e-05, - "loss": 5.2024, - "step": 42519 - }, - { - "epoch": 22.17470664928292, - "grad_norm": 1.4748904705047607, - "learning_rate": 5.7784924623115575e-05, - "loss": 5.2721, - "step": 42520 - }, - { - "epoch": 22.17522816166884, - "grad_norm": 1.5882571935653687, - "learning_rate": 5.778391959798995e-05, - "loss": 5.1149, - "step": 42521 - }, - { - "epoch": 22.17574967405476, - "grad_norm": 1.6267693042755127, - "learning_rate": 5.7782914572864324e-05, - "loss": 5.2073, - "step": 42522 - }, - { - "epoch": 22.17627118644068, - "grad_norm": 1.5906232595443726, - "learning_rate": 5.77819095477387e-05, - "loss": 5.3144, - "step": 42523 - }, - { - "epoch": 22.176792698826596, - "grad_norm": 1.5212279558181763, - "learning_rate": 5.7780904522613066e-05, - "loss": 5.3769, - "step": 42524 - }, - { - "epoch": 22.177314211212515, - "grad_norm": 1.5507404804229736, - "learning_rate": 5.7779899497487444e-05, - "loss": 5.3783, - "step": 42525 - }, - { - "epoch": 22.177835723598434, - "grad_norm": 1.4369686841964722, - "learning_rate": 5.777889447236181e-05, - "loss": 5.501, - "step": 42526 - }, - { - "epoch": 22.178357235984354, - "grad_norm": 1.5280250310897827, - "learning_rate": 5.7777889447236186e-05, - "loss": 5.0346, - "step": 42527 - }, - { - "epoch": 22.178878748370273, - "grad_norm": 1.5499906539916992, - "learning_rate": 5.777688442211056e-05, - "loss": 5.246, - "step": 42528 - }, - { - "epoch": 22.179400260756193, - "grad_norm": 1.5179470777511597, - "learning_rate": 5.777587939698492e-05, - "loss": 5.1357, - "step": 42529 - }, - { - "epoch": 22.179921773142112, - "grad_norm": 1.5376241207122803, - "learning_rate": 5.77748743718593e-05, - "loss": 5.0403, - "step": 42530 - }, - { - "epoch": 22.180443285528032, - "grad_norm": 1.707153081893921, - "learning_rate": 5.777386934673367e-05, - "loss": 5.0533, - "step": 42531 - }, - { - "epoch": 22.18096479791395, - "grad_norm": 1.514082908630371, - "learning_rate": 5.777286432160805e-05, - "loss": 5.1297, - "step": 42532 - }, - { - "epoch": 22.18148631029987, - "grad_norm": 1.6012250185012817, - "learning_rate": 5.777185929648241e-05, - "loss": 5.0699, - "step": 42533 - }, - { - "epoch": 22.18200782268579, - "grad_norm": 1.5024631023406982, - "learning_rate": 5.777085427135679e-05, - "loss": 4.3468, - "step": 42534 - }, - { - "epoch": 22.18252933507171, - "grad_norm": 1.494377613067627, - "learning_rate": 5.7769849246231154e-05, - "loss": 5.3719, - "step": 42535 - }, - { - "epoch": 22.183050847457626, - "grad_norm": 1.5651891231536865, - "learning_rate": 5.776884422110553e-05, - "loss": 5.5365, - "step": 42536 - }, - { - "epoch": 22.183572359843545, - "grad_norm": 1.612901210784912, - "learning_rate": 5.77678391959799e-05, - "loss": 4.9181, - "step": 42537 - }, - { - "epoch": 22.184093872229464, - "grad_norm": 1.442598819732666, - "learning_rate": 5.776683417085428e-05, - "loss": 5.2512, - "step": 42538 - }, - { - "epoch": 22.184615384615384, - "grad_norm": 1.598738431930542, - "learning_rate": 5.7765829145728645e-05, - "loss": 4.9234, - "step": 42539 - }, - { - "epoch": 22.185136897001303, - "grad_norm": 2.189188003540039, - "learning_rate": 5.776482412060302e-05, - "loss": 4.7128, - "step": 42540 - }, - { - "epoch": 22.185658409387223, - "grad_norm": 1.5383086204528809, - "learning_rate": 5.776381909547739e-05, - "loss": 4.9109, - "step": 42541 - }, - { - "epoch": 22.186179921773142, - "grad_norm": 1.6215639114379883, - "learning_rate": 5.776281407035176e-05, - "loss": 5.2811, - "step": 42542 - }, - { - "epoch": 22.186701434159062, - "grad_norm": 1.4468625783920288, - "learning_rate": 5.7761809045226136e-05, - "loss": 5.3306, - "step": 42543 - }, - { - "epoch": 22.18722294654498, - "grad_norm": 1.5875988006591797, - "learning_rate": 5.77608040201005e-05, - "loss": 5.2358, - "step": 42544 - }, - { - "epoch": 22.1877444589309, - "grad_norm": 1.4534822702407837, - "learning_rate": 5.775979899497488e-05, - "loss": 5.481, - "step": 42545 - }, - { - "epoch": 22.18826597131682, - "grad_norm": 1.495900273323059, - "learning_rate": 5.775879396984924e-05, - "loss": 5.312, - "step": 42546 - }, - { - "epoch": 22.18878748370274, - "grad_norm": 1.4406330585479736, - "learning_rate": 5.775778894472362e-05, - "loss": 5.6595, - "step": 42547 - }, - { - "epoch": 22.189308996088656, - "grad_norm": 1.5420492887496948, - "learning_rate": 5.775678391959799e-05, - "loss": 4.8101, - "step": 42548 - }, - { - "epoch": 22.189830508474575, - "grad_norm": 1.567585825920105, - "learning_rate": 5.775577889447237e-05, - "loss": 4.9244, - "step": 42549 - }, - { - "epoch": 22.190352020860495, - "grad_norm": 1.5246533155441284, - "learning_rate": 5.7754773869346733e-05, - "loss": 5.5939, - "step": 42550 - }, - { - "epoch": 22.190873533246414, - "grad_norm": 1.62144935131073, - "learning_rate": 5.775376884422111e-05, - "loss": 5.1667, - "step": 42551 - }, - { - "epoch": 22.191395045632333, - "grad_norm": 1.5452485084533691, - "learning_rate": 5.7752763819095476e-05, - "loss": 4.8752, - "step": 42552 - }, - { - "epoch": 22.191916558018253, - "grad_norm": 1.4873158931732178, - "learning_rate": 5.7751758793969853e-05, - "loss": 5.4687, - "step": 42553 - }, - { - "epoch": 22.192438070404172, - "grad_norm": 1.5990431308746338, - "learning_rate": 5.7750753768844224e-05, - "loss": 5.3453, - "step": 42554 - }, - { - "epoch": 22.192959582790092, - "grad_norm": 1.5737054347991943, - "learning_rate": 5.774974874371859e-05, - "loss": 5.4789, - "step": 42555 - }, - { - "epoch": 22.19348109517601, - "grad_norm": 1.5673563480377197, - "learning_rate": 5.7748743718592967e-05, - "loss": 5.342, - "step": 42556 - }, - { - "epoch": 22.19400260756193, - "grad_norm": 1.882723093032837, - "learning_rate": 5.774773869346734e-05, - "loss": 5.2589, - "step": 42557 - }, - { - "epoch": 22.19452411994785, - "grad_norm": 1.5392907857894897, - "learning_rate": 5.7746733668341715e-05, - "loss": 4.9383, - "step": 42558 - }, - { - "epoch": 22.195045632333766, - "grad_norm": 1.4543898105621338, - "learning_rate": 5.774572864321608e-05, - "loss": 5.2059, - "step": 42559 - }, - { - "epoch": 22.195567144719686, - "grad_norm": 1.4797441959381104, - "learning_rate": 5.774472361809046e-05, - "loss": 5.4585, - "step": 42560 - }, - { - "epoch": 22.196088657105605, - "grad_norm": 1.5450032949447632, - "learning_rate": 5.774371859296482e-05, - "loss": 5.3872, - "step": 42561 - }, - { - "epoch": 22.196610169491525, - "grad_norm": 1.5037400722503662, - "learning_rate": 5.77427135678392e-05, - "loss": 5.4688, - "step": 42562 - }, - { - "epoch": 22.197131681877444, - "grad_norm": 1.5387507677078247, - "learning_rate": 5.774170854271357e-05, - "loss": 5.0315, - "step": 42563 - }, - { - "epoch": 22.197653194263363, - "grad_norm": 1.5901215076446533, - "learning_rate": 5.774070351758795e-05, - "loss": 5.0382, - "step": 42564 - }, - { - "epoch": 22.198174706649283, - "grad_norm": 1.4452202320098877, - "learning_rate": 5.773969849246231e-05, - "loss": 5.4021, - "step": 42565 - }, - { - "epoch": 22.198696219035202, - "grad_norm": 1.4574350118637085, - "learning_rate": 5.773869346733669e-05, - "loss": 5.4893, - "step": 42566 - }, - { - "epoch": 22.199217731421122, - "grad_norm": 1.4673196077346802, - "learning_rate": 5.7737688442211055e-05, - "loss": 5.0766, - "step": 42567 - }, - { - "epoch": 22.19973924380704, - "grad_norm": 1.6169793605804443, - "learning_rate": 5.773668341708543e-05, - "loss": 5.4718, - "step": 42568 - }, - { - "epoch": 22.20026075619296, - "grad_norm": 1.5590616464614868, - "learning_rate": 5.7735678391959804e-05, - "loss": 5.3002, - "step": 42569 - }, - { - "epoch": 22.20078226857888, - "grad_norm": 1.5284812450408936, - "learning_rate": 5.773467336683417e-05, - "loss": 5.4893, - "step": 42570 - }, - { - "epoch": 22.201303780964796, - "grad_norm": 1.5278363227844238, - "learning_rate": 5.7733668341708546e-05, - "loss": 5.0048, - "step": 42571 - }, - { - "epoch": 22.201825293350716, - "grad_norm": 1.552660346031189, - "learning_rate": 5.773266331658291e-05, - "loss": 5.176, - "step": 42572 - }, - { - "epoch": 22.202346805736635, - "grad_norm": 1.583893060684204, - "learning_rate": 5.773165829145729e-05, - "loss": 5.2547, - "step": 42573 - }, - { - "epoch": 22.202868318122555, - "grad_norm": 1.5119929313659668, - "learning_rate": 5.773065326633166e-05, - "loss": 5.3093, - "step": 42574 - }, - { - "epoch": 22.203389830508474, - "grad_norm": 1.5216015577316284, - "learning_rate": 5.772964824120604e-05, - "loss": 5.152, - "step": 42575 - }, - { - "epoch": 22.203911342894393, - "grad_norm": 1.648738145828247, - "learning_rate": 5.77286432160804e-05, - "loss": 5.1119, - "step": 42576 - }, - { - "epoch": 22.204432855280313, - "grad_norm": 1.555527925491333, - "learning_rate": 5.772763819095478e-05, - "loss": 5.3321, - "step": 42577 - }, - { - "epoch": 22.204954367666232, - "grad_norm": 1.6664870977401733, - "learning_rate": 5.772663316582915e-05, - "loss": 5.1898, - "step": 42578 - }, - { - "epoch": 22.205475880052152, - "grad_norm": 1.571927785873413, - "learning_rate": 5.772562814070353e-05, - "loss": 5.311, - "step": 42579 - }, - { - "epoch": 22.20599739243807, - "grad_norm": 1.555804967880249, - "learning_rate": 5.772462311557789e-05, - "loss": 4.9896, - "step": 42580 - }, - { - "epoch": 22.20651890482399, - "grad_norm": 1.4492287635803223, - "learning_rate": 5.772361809045227e-05, - "loss": 4.875, - "step": 42581 - }, - { - "epoch": 22.20704041720991, - "grad_norm": 1.5096968412399292, - "learning_rate": 5.7722613065326634e-05, - "loss": 5.338, - "step": 42582 - }, - { - "epoch": 22.207561929595826, - "grad_norm": 1.5618921518325806, - "learning_rate": 5.7721608040201005e-05, - "loss": 5.4177, - "step": 42583 - }, - { - "epoch": 22.208083441981746, - "grad_norm": 1.5084766149520874, - "learning_rate": 5.772060301507538e-05, - "loss": 5.2551, - "step": 42584 - }, - { - "epoch": 22.208604954367665, - "grad_norm": 1.511855125427246, - "learning_rate": 5.771959798994975e-05, - "loss": 5.4964, - "step": 42585 - }, - { - "epoch": 22.209126466753585, - "grad_norm": 1.546643614768982, - "learning_rate": 5.7718592964824125e-05, - "loss": 5.4038, - "step": 42586 - }, - { - "epoch": 22.209647979139504, - "grad_norm": 1.3998074531555176, - "learning_rate": 5.771758793969849e-05, - "loss": 4.7129, - "step": 42587 - }, - { - "epoch": 22.210169491525424, - "grad_norm": 1.4719446897506714, - "learning_rate": 5.771658291457287e-05, - "loss": 5.4134, - "step": 42588 - }, - { - "epoch": 22.210691003911343, - "grad_norm": 1.4947519302368164, - "learning_rate": 5.771557788944724e-05, - "loss": 5.0808, - "step": 42589 - }, - { - "epoch": 22.211212516297262, - "grad_norm": 1.574833869934082, - "learning_rate": 5.7714572864321616e-05, - "loss": 5.4438, - "step": 42590 - }, - { - "epoch": 22.211734028683182, - "grad_norm": 1.5651153326034546, - "learning_rate": 5.771356783919598e-05, - "loss": 5.1063, - "step": 42591 - }, - { - "epoch": 22.2122555410691, - "grad_norm": 1.568068504333496, - "learning_rate": 5.771256281407036e-05, - "loss": 4.8297, - "step": 42592 - }, - { - "epoch": 22.21277705345502, - "grad_norm": 1.5472921133041382, - "learning_rate": 5.771155778894472e-05, - "loss": 5.2556, - "step": 42593 - }, - { - "epoch": 22.21329856584094, - "grad_norm": 1.4602279663085938, - "learning_rate": 5.77105527638191e-05, - "loss": 5.339, - "step": 42594 - }, - { - "epoch": 22.213820078226856, - "grad_norm": 1.5650819540023804, - "learning_rate": 5.770954773869347e-05, - "loss": 5.4447, - "step": 42595 - }, - { - "epoch": 22.214341590612776, - "grad_norm": 1.503077507019043, - "learning_rate": 5.7708542713567836e-05, - "loss": 5.2636, - "step": 42596 - }, - { - "epoch": 22.214863102998695, - "grad_norm": 1.4853368997573853, - "learning_rate": 5.7707537688442214e-05, - "loss": 5.2138, - "step": 42597 - }, - { - "epoch": 22.215384615384615, - "grad_norm": 1.4757829904556274, - "learning_rate": 5.7706532663316585e-05, - "loss": 5.1226, - "step": 42598 - }, - { - "epoch": 22.215906127770534, - "grad_norm": 1.415135383605957, - "learning_rate": 5.770552763819096e-05, - "loss": 5.7026, - "step": 42599 - }, - { - "epoch": 22.216427640156454, - "grad_norm": 1.472921371459961, - "learning_rate": 5.770452261306533e-05, - "loss": 5.4842, - "step": 42600 - }, - { - "epoch": 22.216949152542373, - "grad_norm": 1.436500072479248, - "learning_rate": 5.7703517587939705e-05, - "loss": 5.5856, - "step": 42601 - }, - { - "epoch": 22.217470664928292, - "grad_norm": 1.537264108657837, - "learning_rate": 5.770251256281407e-05, - "loss": 5.0244, - "step": 42602 - }, - { - "epoch": 22.217992177314212, - "grad_norm": 1.4670379161834717, - "learning_rate": 5.770150753768845e-05, - "loss": 5.5084, - "step": 42603 - }, - { - "epoch": 22.21851368970013, - "grad_norm": 1.5485451221466064, - "learning_rate": 5.770050251256282e-05, - "loss": 5.2092, - "step": 42604 - }, - { - "epoch": 22.21903520208605, - "grad_norm": 1.590876579284668, - "learning_rate": 5.7699497487437196e-05, - "loss": 5.1619, - "step": 42605 - }, - { - "epoch": 22.21955671447197, - "grad_norm": 1.6168969869613647, - "learning_rate": 5.769849246231156e-05, - "loss": 5.2726, - "step": 42606 - }, - { - "epoch": 22.220078226857886, - "grad_norm": 1.643803358078003, - "learning_rate": 5.769748743718594e-05, - "loss": 5.1866, - "step": 42607 - }, - { - "epoch": 22.220599739243806, - "grad_norm": 1.4727168083190918, - "learning_rate": 5.76964824120603e-05, - "loss": 5.0001, - "step": 42608 - }, - { - "epoch": 22.221121251629725, - "grad_norm": 1.6031196117401123, - "learning_rate": 5.769547738693467e-05, - "loss": 5.1832, - "step": 42609 - }, - { - "epoch": 22.221642764015645, - "grad_norm": 1.6368236541748047, - "learning_rate": 5.769447236180905e-05, - "loss": 5.5941, - "step": 42610 - }, - { - "epoch": 22.222164276401564, - "grad_norm": 1.5174440145492554, - "learning_rate": 5.7693467336683415e-05, - "loss": 5.2497, - "step": 42611 - }, - { - "epoch": 22.222685788787484, - "grad_norm": 1.575860857963562, - "learning_rate": 5.769246231155779e-05, - "loss": 5.3528, - "step": 42612 - }, - { - "epoch": 22.223207301173403, - "grad_norm": 1.6122685670852661, - "learning_rate": 5.769145728643216e-05, - "loss": 4.8929, - "step": 42613 - }, - { - "epoch": 22.223728813559323, - "grad_norm": 1.5797749757766724, - "learning_rate": 5.7690452261306535e-05, - "loss": 5.2694, - "step": 42614 - }, - { - "epoch": 22.224250325945242, - "grad_norm": 1.57999587059021, - "learning_rate": 5.7689447236180906e-05, - "loss": 5.6052, - "step": 42615 - }, - { - "epoch": 22.22477183833116, - "grad_norm": 1.6453219652175903, - "learning_rate": 5.7688442211055284e-05, - "loss": 5.2249, - "step": 42616 - }, - { - "epoch": 22.22529335071708, - "grad_norm": 1.5031291246414185, - "learning_rate": 5.768743718592965e-05, - "loss": 5.2297, - "step": 42617 - }, - { - "epoch": 22.225814863103, - "grad_norm": 1.5103729963302612, - "learning_rate": 5.7686432160804026e-05, - "loss": 5.1614, - "step": 42618 - }, - { - "epoch": 22.226336375488916, - "grad_norm": 1.5939828157424927, - "learning_rate": 5.76854271356784e-05, - "loss": 5.4588, - "step": 42619 - }, - { - "epoch": 22.226857887874836, - "grad_norm": 1.5063965320587158, - "learning_rate": 5.7684422110552775e-05, - "loss": 5.1008, - "step": 42620 - }, - { - "epoch": 22.227379400260755, - "grad_norm": 1.5316174030303955, - "learning_rate": 5.768341708542714e-05, - "loss": 5.2719, - "step": 42621 - }, - { - "epoch": 22.227900912646675, - "grad_norm": 1.5397471189498901, - "learning_rate": 5.7682412060301503e-05, - "loss": 4.8511, - "step": 42622 - }, - { - "epoch": 22.228422425032594, - "grad_norm": 1.4139792919158936, - "learning_rate": 5.768140703517588e-05, - "loss": 5.6913, - "step": 42623 - }, - { - "epoch": 22.228943937418514, - "grad_norm": 1.5370490550994873, - "learning_rate": 5.768040201005025e-05, - "loss": 5.4794, - "step": 42624 - }, - { - "epoch": 22.229465449804433, - "grad_norm": 1.4744714498519897, - "learning_rate": 5.767939698492463e-05, - "loss": 5.7551, - "step": 42625 - }, - { - "epoch": 22.229986962190353, - "grad_norm": 1.560343623161316, - "learning_rate": 5.7678391959798994e-05, - "loss": 5.1483, - "step": 42626 - }, - { - "epoch": 22.230508474576272, - "grad_norm": 1.6559511423110962, - "learning_rate": 5.767738693467337e-05, - "loss": 4.7718, - "step": 42627 - }, - { - "epoch": 22.23102998696219, - "grad_norm": 1.5244826078414917, - "learning_rate": 5.7676381909547736e-05, - "loss": 5.1453, - "step": 42628 - }, - { - "epoch": 22.23155149934811, - "grad_norm": 1.5855116844177246, - "learning_rate": 5.7675376884422114e-05, - "loss": 5.5052, - "step": 42629 - }, - { - "epoch": 22.23207301173403, - "grad_norm": 1.6794129610061646, - "learning_rate": 5.7674371859296485e-05, - "loss": 5.1415, - "step": 42630 - }, - { - "epoch": 22.232594524119946, - "grad_norm": 1.4752428531646729, - "learning_rate": 5.767336683417086e-05, - "loss": 5.4253, - "step": 42631 - }, - { - "epoch": 22.233116036505866, - "grad_norm": 1.6020498275756836, - "learning_rate": 5.767236180904523e-05, - "loss": 4.6551, - "step": 42632 - }, - { - "epoch": 22.233637548891785, - "grad_norm": 1.5850929021835327, - "learning_rate": 5.7671356783919605e-05, - "loss": 5.1329, - "step": 42633 - }, - { - "epoch": 22.234159061277705, - "grad_norm": 1.4890735149383545, - "learning_rate": 5.767035175879397e-05, - "loss": 5.2005, - "step": 42634 - }, - { - "epoch": 22.234680573663624, - "grad_norm": 1.5829156637191772, - "learning_rate": 5.766934673366834e-05, - "loss": 5.3421, - "step": 42635 - }, - { - "epoch": 22.235202086049544, - "grad_norm": 1.5378198623657227, - "learning_rate": 5.766834170854272e-05, - "loss": 5.2257, - "step": 42636 - }, - { - "epoch": 22.235723598435463, - "grad_norm": 1.5660229921340942, - "learning_rate": 5.766733668341708e-05, - "loss": 4.8735, - "step": 42637 - }, - { - "epoch": 22.236245110821383, - "grad_norm": 1.567365050315857, - "learning_rate": 5.766633165829146e-05, - "loss": 5.6089, - "step": 42638 - }, - { - "epoch": 22.236766623207302, - "grad_norm": 1.5342856645584106, - "learning_rate": 5.7665326633165825e-05, - "loss": 5.2709, - "step": 42639 - }, - { - "epoch": 22.23728813559322, - "grad_norm": 1.562403917312622, - "learning_rate": 5.76643216080402e-05, - "loss": 5.3583, - "step": 42640 - }, - { - "epoch": 22.23780964797914, - "grad_norm": 1.5080257654190063, - "learning_rate": 5.7663316582914574e-05, - "loss": 5.4751, - "step": 42641 - }, - { - "epoch": 22.23833116036506, - "grad_norm": 1.5445575714111328, - "learning_rate": 5.766231155778895e-05, - "loss": 5.1796, - "step": 42642 - }, - { - "epoch": 22.238852672750976, - "grad_norm": 1.5741121768951416, - "learning_rate": 5.7661306532663316e-05, - "loss": 5.0369, - "step": 42643 - }, - { - "epoch": 22.239374185136896, - "grad_norm": 1.522300362586975, - "learning_rate": 5.7660301507537694e-05, - "loss": 4.8851, - "step": 42644 - }, - { - "epoch": 22.239895697522815, - "grad_norm": 1.6120563745498657, - "learning_rate": 5.7659296482412065e-05, - "loss": 5.1793, - "step": 42645 - }, - { - "epoch": 22.240417209908735, - "grad_norm": 1.6236398220062256, - "learning_rate": 5.765829145728644e-05, - "loss": 5.3586, - "step": 42646 - }, - { - "epoch": 22.240938722294654, - "grad_norm": 1.5386542081832886, - "learning_rate": 5.765728643216081e-05, - "loss": 5.2244, - "step": 42647 - }, - { - "epoch": 22.241460234680574, - "grad_norm": 1.5857760906219482, - "learning_rate": 5.765628140703517e-05, - "loss": 4.3847, - "step": 42648 - }, - { - "epoch": 22.241981747066493, - "grad_norm": 1.465994119644165, - "learning_rate": 5.765527638190955e-05, - "loss": 5.6288, - "step": 42649 - }, - { - "epoch": 22.242503259452413, - "grad_norm": 1.6312357187271118, - "learning_rate": 5.765427135678392e-05, - "loss": 5.0444, - "step": 42650 - }, - { - "epoch": 22.243024771838332, - "grad_norm": 1.6125133037567139, - "learning_rate": 5.76532663316583e-05, - "loss": 5.4235, - "step": 42651 - }, - { - "epoch": 22.24354628422425, - "grad_norm": 1.491666316986084, - "learning_rate": 5.765226130653266e-05, - "loss": 5.6472, - "step": 42652 - }, - { - "epoch": 22.24406779661017, - "grad_norm": 1.4415994882583618, - "learning_rate": 5.765125628140704e-05, - "loss": 5.4673, - "step": 42653 - }, - { - "epoch": 22.24458930899609, - "grad_norm": 1.5354139804840088, - "learning_rate": 5.7650251256281404e-05, - "loss": 5.0521, - "step": 42654 - }, - { - "epoch": 22.245110821382006, - "grad_norm": 1.6082944869995117, - "learning_rate": 5.764924623115578e-05, - "loss": 5.0043, - "step": 42655 - }, - { - "epoch": 22.245632333767926, - "grad_norm": 1.551696538925171, - "learning_rate": 5.764824120603015e-05, - "loss": 5.1367, - "step": 42656 - }, - { - "epoch": 22.246153846153845, - "grad_norm": 1.6853907108306885, - "learning_rate": 5.764723618090453e-05, - "loss": 4.8054, - "step": 42657 - }, - { - "epoch": 22.246675358539765, - "grad_norm": 1.5112788677215576, - "learning_rate": 5.7646231155778895e-05, - "loss": 5.517, - "step": 42658 - }, - { - "epoch": 22.247196870925684, - "grad_norm": 1.5705713033676147, - "learning_rate": 5.764522613065327e-05, - "loss": 5.3934, - "step": 42659 - }, - { - "epoch": 22.247718383311604, - "grad_norm": 1.591688632965088, - "learning_rate": 5.764422110552764e-05, - "loss": 5.2686, - "step": 42660 - }, - { - "epoch": 22.248239895697523, - "grad_norm": 1.5254555940628052, - "learning_rate": 5.7643216080402015e-05, - "loss": 5.2984, - "step": 42661 - }, - { - "epoch": 22.248761408083443, - "grad_norm": 1.5278242826461792, - "learning_rate": 5.7642211055276386e-05, - "loss": 5.0187, - "step": 42662 - }, - { - "epoch": 22.249282920469362, - "grad_norm": 1.6001027822494507, - "learning_rate": 5.764120603015075e-05, - "loss": 5.2682, - "step": 42663 - }, - { - "epoch": 22.24980443285528, - "grad_norm": 1.5511822700500488, - "learning_rate": 5.764020100502513e-05, - "loss": 5.4129, - "step": 42664 - }, - { - "epoch": 22.2503259452412, - "grad_norm": 1.6065582036972046, - "learning_rate": 5.76391959798995e-05, - "loss": 5.0766, - "step": 42665 - }, - { - "epoch": 22.250847457627117, - "grad_norm": 1.474036693572998, - "learning_rate": 5.763819095477388e-05, - "loss": 5.4186, - "step": 42666 - }, - { - "epoch": 22.251368970013036, - "grad_norm": 1.562574863433838, - "learning_rate": 5.763718592964824e-05, - "loss": 5.2641, - "step": 42667 - }, - { - "epoch": 22.251890482398956, - "grad_norm": 1.5029574632644653, - "learning_rate": 5.763618090452262e-05, - "loss": 5.4474, - "step": 42668 - }, - { - "epoch": 22.252411994784875, - "grad_norm": 1.4710605144500732, - "learning_rate": 5.7635175879396983e-05, - "loss": 5.5298, - "step": 42669 - }, - { - "epoch": 22.252933507170795, - "grad_norm": 1.6180781126022339, - "learning_rate": 5.763417085427136e-05, - "loss": 5.1952, - "step": 42670 - }, - { - "epoch": 22.253455019556714, - "grad_norm": 1.5945063829421997, - "learning_rate": 5.763316582914573e-05, - "loss": 4.74, - "step": 42671 - }, - { - "epoch": 22.253976531942634, - "grad_norm": 1.4723384380340576, - "learning_rate": 5.763216080402011e-05, - "loss": 5.239, - "step": 42672 - }, - { - "epoch": 22.254498044328553, - "grad_norm": 1.5497794151306152, - "learning_rate": 5.7631155778894474e-05, - "loss": 4.9599, - "step": 42673 - }, - { - "epoch": 22.255019556714473, - "grad_norm": 1.659677505493164, - "learning_rate": 5.763015075376885e-05, - "loss": 4.9014, - "step": 42674 - }, - { - "epoch": 22.255541069100392, - "grad_norm": 1.5408847332000732, - "learning_rate": 5.7629145728643217e-05, - "loss": 5.1412, - "step": 42675 - }, - { - "epoch": 22.25606258148631, - "grad_norm": 1.638092041015625, - "learning_rate": 5.762814070351759e-05, - "loss": 4.9378, - "step": 42676 - }, - { - "epoch": 22.25658409387223, - "grad_norm": 1.5986945629119873, - "learning_rate": 5.7627135678391965e-05, - "loss": 5.2219, - "step": 42677 - }, - { - "epoch": 22.257105606258147, - "grad_norm": 1.5849286317825317, - "learning_rate": 5.762613065326633e-05, - "loss": 4.862, - "step": 42678 - }, - { - "epoch": 22.257627118644066, - "grad_norm": 1.5108551979064941, - "learning_rate": 5.762512562814071e-05, - "loss": 4.6383, - "step": 42679 - }, - { - "epoch": 22.258148631029986, - "grad_norm": 1.5310474634170532, - "learning_rate": 5.762412060301507e-05, - "loss": 5.1257, - "step": 42680 - }, - { - "epoch": 22.258670143415905, - "grad_norm": 1.5359526872634888, - "learning_rate": 5.762311557788945e-05, - "loss": 5.2523, - "step": 42681 - }, - { - "epoch": 22.259191655801825, - "grad_norm": 1.6780747175216675, - "learning_rate": 5.762211055276382e-05, - "loss": 4.8469, - "step": 42682 - }, - { - "epoch": 22.259713168187744, - "grad_norm": 1.5906333923339844, - "learning_rate": 5.76211055276382e-05, - "loss": 5.0673, - "step": 42683 - }, - { - "epoch": 22.260234680573664, - "grad_norm": 1.6442816257476807, - "learning_rate": 5.762010050251256e-05, - "loss": 4.4878, - "step": 42684 - }, - { - "epoch": 22.260756192959583, - "grad_norm": 1.498566746711731, - "learning_rate": 5.761909547738694e-05, - "loss": 5.1336, - "step": 42685 - }, - { - "epoch": 22.261277705345503, - "grad_norm": 1.5446151494979858, - "learning_rate": 5.761809045226131e-05, - "loss": 4.9948, - "step": 42686 - }, - { - "epoch": 22.261799217731422, - "grad_norm": 1.5894014835357666, - "learning_rate": 5.761708542713569e-05, - "loss": 4.9815, - "step": 42687 - }, - { - "epoch": 22.26232073011734, - "grad_norm": 1.5603742599487305, - "learning_rate": 5.7616080402010054e-05, - "loss": 5.0045, - "step": 42688 - }, - { - "epoch": 22.26284224250326, - "grad_norm": 1.611146092414856, - "learning_rate": 5.761507537688442e-05, - "loss": 4.7783, - "step": 42689 - }, - { - "epoch": 22.263363754889177, - "grad_norm": 1.7263976335525513, - "learning_rate": 5.7614070351758796e-05, - "loss": 4.9833, - "step": 42690 - }, - { - "epoch": 22.263885267275096, - "grad_norm": 1.6046302318572998, - "learning_rate": 5.761306532663317e-05, - "loss": 4.9028, - "step": 42691 - }, - { - "epoch": 22.264406779661016, - "grad_norm": 1.5123279094696045, - "learning_rate": 5.7612060301507545e-05, - "loss": 5.3621, - "step": 42692 - }, - { - "epoch": 22.264928292046935, - "grad_norm": 1.5057975053787231, - "learning_rate": 5.761105527638191e-05, - "loss": 5.5031, - "step": 42693 - }, - { - "epoch": 22.265449804432855, - "grad_norm": 1.5535962581634521, - "learning_rate": 5.761005025125629e-05, - "loss": 5.0217, - "step": 42694 - }, - { - "epoch": 22.265971316818774, - "grad_norm": 1.497111439704895, - "learning_rate": 5.760904522613065e-05, - "loss": 5.2261, - "step": 42695 - }, - { - "epoch": 22.266492829204694, - "grad_norm": 1.4009497165679932, - "learning_rate": 5.760804020100503e-05, - "loss": 5.7061, - "step": 42696 - }, - { - "epoch": 22.267014341590613, - "grad_norm": 1.5643246173858643, - "learning_rate": 5.76070351758794e-05, - "loss": 5.2161, - "step": 42697 - }, - { - "epoch": 22.267535853976533, - "grad_norm": 1.478999137878418, - "learning_rate": 5.760603015075378e-05, - "loss": 5.6338, - "step": 42698 - }, - { - "epoch": 22.268057366362452, - "grad_norm": 1.4726502895355225, - "learning_rate": 5.760502512562814e-05, - "loss": 5.2562, - "step": 42699 - }, - { - "epoch": 22.26857887874837, - "grad_norm": 1.5999311208724976, - "learning_rate": 5.760402010050252e-05, - "loss": 5.2911, - "step": 42700 - }, - { - "epoch": 22.26910039113429, - "grad_norm": 1.487545132637024, - "learning_rate": 5.7603015075376884e-05, - "loss": 5.6426, - "step": 42701 - }, - { - "epoch": 22.269621903520207, - "grad_norm": 1.4669175148010254, - "learning_rate": 5.7602010050251255e-05, - "loss": 5.429, - "step": 42702 - }, - { - "epoch": 22.270143415906126, - "grad_norm": 1.6137542724609375, - "learning_rate": 5.760100502512563e-05, - "loss": 4.8515, - "step": 42703 - }, - { - "epoch": 22.270664928292046, - "grad_norm": 1.616709589958191, - "learning_rate": 5.76e-05, - "loss": 4.9934, - "step": 42704 - }, - { - "epoch": 22.271186440677965, - "grad_norm": 1.5681886672973633, - "learning_rate": 5.7598994974874375e-05, - "loss": 5.2474, - "step": 42705 - }, - { - "epoch": 22.271707953063885, - "grad_norm": 1.469547986984253, - "learning_rate": 5.7597989949748746e-05, - "loss": 5.6365, - "step": 42706 - }, - { - "epoch": 22.272229465449804, - "grad_norm": 1.513516902923584, - "learning_rate": 5.7596984924623124e-05, - "loss": 5.1288, - "step": 42707 - }, - { - "epoch": 22.272750977835724, - "grad_norm": 1.6259602308273315, - "learning_rate": 5.759597989949749e-05, - "loss": 5.0155, - "step": 42708 - }, - { - "epoch": 22.273272490221643, - "grad_norm": 1.5893603563308716, - "learning_rate": 5.7594974874371866e-05, - "loss": 5.6015, - "step": 42709 - }, - { - "epoch": 22.273794002607563, - "grad_norm": 1.6439648866653442, - "learning_rate": 5.759396984924623e-05, - "loss": 5.1084, - "step": 42710 - }, - { - "epoch": 22.274315514993482, - "grad_norm": 1.5470153093338013, - "learning_rate": 5.759296482412061e-05, - "loss": 5.6569, - "step": 42711 - }, - { - "epoch": 22.2748370273794, - "grad_norm": 1.5094358921051025, - "learning_rate": 5.759195979899498e-05, - "loss": 5.4451, - "step": 42712 - }, - { - "epoch": 22.27535853976532, - "grad_norm": 1.486604928970337, - "learning_rate": 5.759095477386936e-05, - "loss": 5.7262, - "step": 42713 - }, - { - "epoch": 22.275880052151237, - "grad_norm": 1.4622840881347656, - "learning_rate": 5.758994974874372e-05, - "loss": 5.562, - "step": 42714 - }, - { - "epoch": 22.276401564537156, - "grad_norm": 1.4377809762954712, - "learning_rate": 5.7588944723618086e-05, - "loss": 5.4962, - "step": 42715 - }, - { - "epoch": 22.276923076923076, - "grad_norm": 1.5447652339935303, - "learning_rate": 5.7587939698492464e-05, - "loss": 5.1716, - "step": 42716 - }, - { - "epoch": 22.277444589308995, - "grad_norm": 1.5258245468139648, - "learning_rate": 5.7586934673366835e-05, - "loss": 5.4097, - "step": 42717 - }, - { - "epoch": 22.277966101694915, - "grad_norm": 1.539490818977356, - "learning_rate": 5.758592964824121e-05, - "loss": 5.4347, - "step": 42718 - }, - { - "epoch": 22.278487614080834, - "grad_norm": 1.4843858480453491, - "learning_rate": 5.758492462311558e-05, - "loss": 5.5729, - "step": 42719 - }, - { - "epoch": 22.279009126466754, - "grad_norm": 1.6736210584640503, - "learning_rate": 5.7583919597989955e-05, - "loss": 5.3521, - "step": 42720 - }, - { - "epoch": 22.279530638852673, - "grad_norm": 1.5727450847625732, - "learning_rate": 5.758291457286432e-05, - "loss": 5.2599, - "step": 42721 - }, - { - "epoch": 22.280052151238593, - "grad_norm": 1.6118577718734741, - "learning_rate": 5.75819095477387e-05, - "loss": 5.2005, - "step": 42722 - }, - { - "epoch": 22.280573663624512, - "grad_norm": 1.5826573371887207, - "learning_rate": 5.758090452261307e-05, - "loss": 5.1751, - "step": 42723 - }, - { - "epoch": 22.28109517601043, - "grad_norm": 1.535997748374939, - "learning_rate": 5.7579899497487446e-05, - "loss": 5.227, - "step": 42724 - }, - { - "epoch": 22.28161668839635, - "grad_norm": 1.5772911310195923, - "learning_rate": 5.757889447236181e-05, - "loss": 5.4234, - "step": 42725 - }, - { - "epoch": 22.282138200782267, - "grad_norm": 1.594077467918396, - "learning_rate": 5.757788944723619e-05, - "loss": 5.0254, - "step": 42726 - }, - { - "epoch": 22.282659713168186, - "grad_norm": 1.4820584058761597, - "learning_rate": 5.757688442211055e-05, - "loss": 5.6656, - "step": 42727 - }, - { - "epoch": 22.283181225554106, - "grad_norm": 1.6719586849212646, - "learning_rate": 5.757587939698492e-05, - "loss": 5.105, - "step": 42728 - }, - { - "epoch": 22.283702737940025, - "grad_norm": 1.520719289779663, - "learning_rate": 5.75748743718593e-05, - "loss": 5.7191, - "step": 42729 - }, - { - "epoch": 22.284224250325945, - "grad_norm": 1.4987751245498657, - "learning_rate": 5.7573869346733665e-05, - "loss": 5.6759, - "step": 42730 - }, - { - "epoch": 22.284745762711864, - "grad_norm": 1.5564543008804321, - "learning_rate": 5.757286432160804e-05, - "loss": 5.1286, - "step": 42731 - }, - { - "epoch": 22.285267275097784, - "grad_norm": 1.5780175924301147, - "learning_rate": 5.7571859296482414e-05, - "loss": 5.3732, - "step": 42732 - }, - { - "epoch": 22.285788787483703, - "grad_norm": 1.6035034656524658, - "learning_rate": 5.757085427135679e-05, - "loss": 5.1144, - "step": 42733 - }, - { - "epoch": 22.286310299869623, - "grad_norm": 1.526979923248291, - "learning_rate": 5.7569849246231156e-05, - "loss": 5.4491, - "step": 42734 - }, - { - "epoch": 22.286831812255542, - "grad_norm": 1.6305540800094604, - "learning_rate": 5.7568844221105534e-05, - "loss": 4.5524, - "step": 42735 - }, - { - "epoch": 22.28735332464146, - "grad_norm": 1.720597743988037, - "learning_rate": 5.75678391959799e-05, - "loss": 4.7765, - "step": 42736 - }, - { - "epoch": 22.28787483702738, - "grad_norm": 1.5495737791061401, - "learning_rate": 5.7566834170854276e-05, - "loss": 5.6303, - "step": 42737 - }, - { - "epoch": 22.288396349413297, - "grad_norm": 1.6587685346603394, - "learning_rate": 5.756582914572865e-05, - "loss": 4.6155, - "step": 42738 - }, - { - "epoch": 22.288917861799217, - "grad_norm": 1.630443811416626, - "learning_rate": 5.7564824120603025e-05, - "loss": 4.7045, - "step": 42739 - }, - { - "epoch": 22.289439374185136, - "grad_norm": 1.5973398685455322, - "learning_rate": 5.756381909547739e-05, - "loss": 5.0058, - "step": 42740 - }, - { - "epoch": 22.289960886571055, - "grad_norm": 1.4852455854415894, - "learning_rate": 5.756281407035177e-05, - "loss": 5.102, - "step": 42741 - }, - { - "epoch": 22.290482398956975, - "grad_norm": 1.6017577648162842, - "learning_rate": 5.756180904522613e-05, - "loss": 5.1057, - "step": 42742 - }, - { - "epoch": 22.291003911342894, - "grad_norm": 1.5645556449890137, - "learning_rate": 5.75608040201005e-05, - "loss": 5.118, - "step": 42743 - }, - { - "epoch": 22.291525423728814, - "grad_norm": 1.639432668685913, - "learning_rate": 5.755979899497488e-05, - "loss": 5.255, - "step": 42744 - }, - { - "epoch": 22.292046936114733, - "grad_norm": 1.4921025037765503, - "learning_rate": 5.7558793969849244e-05, - "loss": 4.6801, - "step": 42745 - }, - { - "epoch": 22.292568448500653, - "grad_norm": 1.5907570123672485, - "learning_rate": 5.755778894472362e-05, - "loss": 5.1928, - "step": 42746 - }, - { - "epoch": 22.293089960886572, - "grad_norm": 1.5050814151763916, - "learning_rate": 5.7556783919597986e-05, - "loss": 5.2712, - "step": 42747 - }, - { - "epoch": 22.29361147327249, - "grad_norm": 1.6353517770767212, - "learning_rate": 5.7555778894472364e-05, - "loss": 4.9787, - "step": 42748 - }, - { - "epoch": 22.294132985658408, - "grad_norm": 1.5008080005645752, - "learning_rate": 5.7554773869346735e-05, - "loss": 4.942, - "step": 42749 - }, - { - "epoch": 22.294654498044327, - "grad_norm": 1.474360466003418, - "learning_rate": 5.755376884422111e-05, - "loss": 5.0111, - "step": 42750 - }, - { - "epoch": 22.295176010430247, - "grad_norm": 1.6336030960083008, - "learning_rate": 5.755276381909548e-05, - "loss": 4.8312, - "step": 42751 - }, - { - "epoch": 22.295697522816166, - "grad_norm": 1.5718523263931274, - "learning_rate": 5.7551758793969855e-05, - "loss": 5.3217, - "step": 42752 - }, - { - "epoch": 22.296219035202085, - "grad_norm": 1.4818423986434937, - "learning_rate": 5.7550753768844226e-05, - "loss": 4.8477, - "step": 42753 - }, - { - "epoch": 22.296740547588005, - "grad_norm": 1.5473694801330566, - "learning_rate": 5.7549748743718604e-05, - "loss": 5.1857, - "step": 42754 - }, - { - "epoch": 22.297262059973924, - "grad_norm": 1.4837837219238281, - "learning_rate": 5.754874371859297e-05, - "loss": 5.1828, - "step": 42755 - }, - { - "epoch": 22.297783572359844, - "grad_norm": 1.4898433685302734, - "learning_rate": 5.754773869346733e-05, - "loss": 4.9015, - "step": 42756 - }, - { - "epoch": 22.298305084745763, - "grad_norm": 1.5580596923828125, - "learning_rate": 5.754673366834171e-05, - "loss": 5.2694, - "step": 42757 - }, - { - "epoch": 22.298826597131683, - "grad_norm": 1.4999011754989624, - "learning_rate": 5.754572864321608e-05, - "loss": 5.658, - "step": 42758 - }, - { - "epoch": 22.299348109517602, - "grad_norm": 1.586396336555481, - "learning_rate": 5.754472361809046e-05, - "loss": 5.2743, - "step": 42759 - }, - { - "epoch": 22.29986962190352, - "grad_norm": 1.470942735671997, - "learning_rate": 5.7543718592964824e-05, - "loss": 5.637, - "step": 42760 - }, - { - "epoch": 22.300391134289438, - "grad_norm": 1.5429136753082275, - "learning_rate": 5.75427135678392e-05, - "loss": 5.474, - "step": 42761 - }, - { - "epoch": 22.300912646675357, - "grad_norm": 1.4912092685699463, - "learning_rate": 5.7541708542713566e-05, - "loss": 5.4072, - "step": 42762 - }, - { - "epoch": 22.301434159061277, - "grad_norm": 1.5132808685302734, - "learning_rate": 5.7540703517587944e-05, - "loss": 5.3368, - "step": 42763 - }, - { - "epoch": 22.301955671447196, - "grad_norm": 1.5622069835662842, - "learning_rate": 5.7539698492462315e-05, - "loss": 5.487, - "step": 42764 - }, - { - "epoch": 22.302477183833116, - "grad_norm": 1.437434434890747, - "learning_rate": 5.753869346733669e-05, - "loss": 5.7006, - "step": 42765 - }, - { - "epoch": 22.302998696219035, - "grad_norm": 1.803671956062317, - "learning_rate": 5.753768844221106e-05, - "loss": 4.1344, - "step": 42766 - }, - { - "epoch": 22.303520208604954, - "grad_norm": 1.652134895324707, - "learning_rate": 5.7536683417085435e-05, - "loss": 5.193, - "step": 42767 - }, - { - "epoch": 22.304041720990874, - "grad_norm": 1.446913242340088, - "learning_rate": 5.75356783919598e-05, - "loss": 5.5676, - "step": 42768 - }, - { - "epoch": 22.304563233376793, - "grad_norm": 1.8638867139816284, - "learning_rate": 5.753467336683417e-05, - "loss": 5.3321, - "step": 42769 - }, - { - "epoch": 22.305084745762713, - "grad_norm": 1.418784499168396, - "learning_rate": 5.753366834170855e-05, - "loss": 5.5671, - "step": 42770 - }, - { - "epoch": 22.305606258148632, - "grad_norm": 1.6472268104553223, - "learning_rate": 5.753266331658291e-05, - "loss": 5.1672, - "step": 42771 - }, - { - "epoch": 22.30612777053455, - "grad_norm": 1.4837148189544678, - "learning_rate": 5.753165829145729e-05, - "loss": 5.6534, - "step": 42772 - }, - { - "epoch": 22.306649282920468, - "grad_norm": 1.597639560699463, - "learning_rate": 5.753065326633166e-05, - "loss": 4.6664, - "step": 42773 - }, - { - "epoch": 22.307170795306387, - "grad_norm": 1.5298717021942139, - "learning_rate": 5.752964824120604e-05, - "loss": 5.1607, - "step": 42774 - }, - { - "epoch": 22.307692307692307, - "grad_norm": 1.553389310836792, - "learning_rate": 5.75286432160804e-05, - "loss": 4.8918, - "step": 42775 - }, - { - "epoch": 22.308213820078226, - "grad_norm": 1.7909811735153198, - "learning_rate": 5.752763819095478e-05, - "loss": 4.8344, - "step": 42776 - }, - { - "epoch": 22.308735332464146, - "grad_norm": 1.8073606491088867, - "learning_rate": 5.7526633165829145e-05, - "loss": 4.7238, - "step": 42777 - }, - { - "epoch": 22.309256844850065, - "grad_norm": 1.4976232051849365, - "learning_rate": 5.752562814070352e-05, - "loss": 5.3658, - "step": 42778 - }, - { - "epoch": 22.309778357235984, - "grad_norm": 1.501645803451538, - "learning_rate": 5.7524623115577894e-05, - "loss": 5.6423, - "step": 42779 - }, - { - "epoch": 22.310299869621904, - "grad_norm": 1.4912315607070923, - "learning_rate": 5.752361809045227e-05, - "loss": 5.471, - "step": 42780 - }, - { - "epoch": 22.310821382007823, - "grad_norm": 1.384901762008667, - "learning_rate": 5.7522613065326636e-05, - "loss": 5.7815, - "step": 42781 - }, - { - "epoch": 22.311342894393743, - "grad_norm": 1.4566527605056763, - "learning_rate": 5.7521608040201e-05, - "loss": 5.3961, - "step": 42782 - }, - { - "epoch": 22.311864406779662, - "grad_norm": 1.528397798538208, - "learning_rate": 5.752060301507538e-05, - "loss": 5.2963, - "step": 42783 - }, - { - "epoch": 22.312385919165582, - "grad_norm": 1.603379726409912, - "learning_rate": 5.751959798994975e-05, - "loss": 5.0302, - "step": 42784 - }, - { - "epoch": 22.312907431551498, - "grad_norm": 1.5466679334640503, - "learning_rate": 5.751859296482413e-05, - "loss": 5.2818, - "step": 42785 - }, - { - "epoch": 22.313428943937417, - "grad_norm": 1.4719234704971313, - "learning_rate": 5.751758793969849e-05, - "loss": 5.57, - "step": 42786 - }, - { - "epoch": 22.313950456323337, - "grad_norm": 1.5690820217132568, - "learning_rate": 5.751658291457287e-05, - "loss": 4.9966, - "step": 42787 - }, - { - "epoch": 22.314471968709256, - "grad_norm": 1.5730342864990234, - "learning_rate": 5.7515577889447233e-05, - "loss": 5.2985, - "step": 42788 - }, - { - "epoch": 22.314993481095176, - "grad_norm": 1.6106051206588745, - "learning_rate": 5.751457286432161e-05, - "loss": 5.0798, - "step": 42789 - }, - { - "epoch": 22.315514993481095, - "grad_norm": 1.557536005973816, - "learning_rate": 5.751356783919598e-05, - "loss": 4.9497, - "step": 42790 - }, - { - "epoch": 22.316036505867014, - "grad_norm": 1.4732604026794434, - "learning_rate": 5.751256281407036e-05, - "loss": 5.569, - "step": 42791 - }, - { - "epoch": 22.316558018252934, - "grad_norm": 1.564284086227417, - "learning_rate": 5.7511557788944724e-05, - "loss": 5.6254, - "step": 42792 - }, - { - "epoch": 22.317079530638853, - "grad_norm": 1.4350491762161255, - "learning_rate": 5.75105527638191e-05, - "loss": 5.6616, - "step": 42793 - }, - { - "epoch": 22.317601043024773, - "grad_norm": 1.528190016746521, - "learning_rate": 5.750954773869347e-05, - "loss": 5.2763, - "step": 42794 - }, - { - "epoch": 22.318122555410692, - "grad_norm": 1.4811400175094604, - "learning_rate": 5.750854271356784e-05, - "loss": 5.3562, - "step": 42795 - }, - { - "epoch": 22.318644067796612, - "grad_norm": 1.5091955661773682, - "learning_rate": 5.7507537688442215e-05, - "loss": 5.4477, - "step": 42796 - }, - { - "epoch": 22.319165580182528, - "grad_norm": 1.545815110206604, - "learning_rate": 5.750653266331658e-05, - "loss": 5.258, - "step": 42797 - }, - { - "epoch": 22.319687092568447, - "grad_norm": 1.5791497230529785, - "learning_rate": 5.750552763819096e-05, - "loss": 5.1381, - "step": 42798 - }, - { - "epoch": 22.320208604954367, - "grad_norm": 1.5134024620056152, - "learning_rate": 5.750452261306533e-05, - "loss": 5.3597, - "step": 42799 - }, - { - "epoch": 22.320730117340286, - "grad_norm": 1.4704854488372803, - "learning_rate": 5.7503517587939706e-05, - "loss": 5.4095, - "step": 42800 - }, - { - "epoch": 22.321251629726206, - "grad_norm": 1.592573881149292, - "learning_rate": 5.750251256281407e-05, - "loss": 4.9296, - "step": 42801 - }, - { - "epoch": 22.321773142112125, - "grad_norm": 1.4801677465438843, - "learning_rate": 5.750150753768845e-05, - "loss": 5.5423, - "step": 42802 - }, - { - "epoch": 22.322294654498045, - "grad_norm": 1.5843042135238647, - "learning_rate": 5.750050251256281e-05, - "loss": 5.2979, - "step": 42803 - }, - { - "epoch": 22.322816166883964, - "grad_norm": 1.4889599084854126, - "learning_rate": 5.749949748743719e-05, - "loss": 4.9618, - "step": 42804 - }, - { - "epoch": 22.323337679269883, - "grad_norm": 1.6002346277236938, - "learning_rate": 5.749849246231156e-05, - "loss": 5.064, - "step": 42805 - }, - { - "epoch": 22.323859191655803, - "grad_norm": 1.5704405307769775, - "learning_rate": 5.749748743718594e-05, - "loss": 5.2321, - "step": 42806 - }, - { - "epoch": 22.324380704041722, - "grad_norm": 1.6929042339324951, - "learning_rate": 5.7496482412060304e-05, - "loss": 5.0314, - "step": 42807 - }, - { - "epoch": 22.324902216427642, - "grad_norm": 1.650964617729187, - "learning_rate": 5.749547738693467e-05, - "loss": 4.8591, - "step": 42808 - }, - { - "epoch": 22.325423728813558, - "grad_norm": 1.540054202079773, - "learning_rate": 5.7494472361809046e-05, - "loss": 4.9632, - "step": 42809 - }, - { - "epoch": 22.325945241199477, - "grad_norm": 1.5971612930297852, - "learning_rate": 5.749346733668342e-05, - "loss": 5.1937, - "step": 42810 - }, - { - "epoch": 22.326466753585397, - "grad_norm": 1.6140475273132324, - "learning_rate": 5.7492462311557795e-05, - "loss": 5.2626, - "step": 42811 - }, - { - "epoch": 22.326988265971316, - "grad_norm": 1.4521375894546509, - "learning_rate": 5.749145728643216e-05, - "loss": 5.4056, - "step": 42812 - }, - { - "epoch": 22.327509778357236, - "grad_norm": 1.5422048568725586, - "learning_rate": 5.749045226130654e-05, - "loss": 5.6258, - "step": 42813 - }, - { - "epoch": 22.328031290743155, - "grad_norm": 1.5375878810882568, - "learning_rate": 5.74894472361809e-05, - "loss": 5.2817, - "step": 42814 - }, - { - "epoch": 22.328552803129075, - "grad_norm": 1.488816261291504, - "learning_rate": 5.748844221105528e-05, - "loss": 5.0139, - "step": 42815 - }, - { - "epoch": 22.329074315514994, - "grad_norm": 1.6023046970367432, - "learning_rate": 5.748743718592965e-05, - "loss": 4.9498, - "step": 42816 - }, - { - "epoch": 22.329595827900913, - "grad_norm": 1.5314183235168457, - "learning_rate": 5.748643216080403e-05, - "loss": 5.1151, - "step": 42817 - }, - { - "epoch": 22.330117340286833, - "grad_norm": 1.526774287223816, - "learning_rate": 5.748542713567839e-05, - "loss": 5.2484, - "step": 42818 - }, - { - "epoch": 22.330638852672752, - "grad_norm": 1.4831304550170898, - "learning_rate": 5.748442211055277e-05, - "loss": 5.437, - "step": 42819 - }, - { - "epoch": 22.331160365058672, - "grad_norm": 1.6507471799850464, - "learning_rate": 5.748341708542714e-05, - "loss": 5.0606, - "step": 42820 - }, - { - "epoch": 22.331681877444588, - "grad_norm": 1.602585792541504, - "learning_rate": 5.7482412060301505e-05, - "loss": 5.513, - "step": 42821 - }, - { - "epoch": 22.332203389830507, - "grad_norm": 1.504675030708313, - "learning_rate": 5.748140703517588e-05, - "loss": 5.3852, - "step": 42822 - }, - { - "epoch": 22.332724902216427, - "grad_norm": 1.4387164115905762, - "learning_rate": 5.748040201005025e-05, - "loss": 5.8431, - "step": 42823 - }, - { - "epoch": 22.333246414602346, - "grad_norm": 1.5318255424499512, - "learning_rate": 5.7479396984924625e-05, - "loss": 5.3473, - "step": 42824 - }, - { - "epoch": 22.333767926988266, - "grad_norm": 1.6261584758758545, - "learning_rate": 5.7478391959798996e-05, - "loss": 5.0366, - "step": 42825 - }, - { - "epoch": 22.334289439374185, - "grad_norm": 1.4903706312179565, - "learning_rate": 5.7477386934673374e-05, - "loss": 5.3913, - "step": 42826 - }, - { - "epoch": 22.334810951760105, - "grad_norm": 1.565327763557434, - "learning_rate": 5.747638190954774e-05, - "loss": 4.9777, - "step": 42827 - }, - { - "epoch": 22.335332464146024, - "grad_norm": 1.6317617893218994, - "learning_rate": 5.7475376884422116e-05, - "loss": 4.3646, - "step": 42828 - }, - { - "epoch": 22.335853976531943, - "grad_norm": 1.4455677270889282, - "learning_rate": 5.747437185929648e-05, - "loss": 5.6147, - "step": 42829 - }, - { - "epoch": 22.336375488917863, - "grad_norm": 1.4496139287948608, - "learning_rate": 5.747336683417086e-05, - "loss": 5.3208, - "step": 42830 - }, - { - "epoch": 22.336897001303782, - "grad_norm": 1.5573583841323853, - "learning_rate": 5.747236180904523e-05, - "loss": 5.3901, - "step": 42831 - }, - { - "epoch": 22.3374185136897, - "grad_norm": 1.6755626201629639, - "learning_rate": 5.747135678391961e-05, - "loss": 5.0111, - "step": 42832 - }, - { - "epoch": 22.337940026075618, - "grad_norm": 1.5682693719863892, - "learning_rate": 5.747035175879397e-05, - "loss": 4.9562, - "step": 42833 - }, - { - "epoch": 22.338461538461537, - "grad_norm": 1.5217403173446655, - "learning_rate": 5.746934673366835e-05, - "loss": 5.3804, - "step": 42834 - }, - { - "epoch": 22.338983050847457, - "grad_norm": 1.5286898612976074, - "learning_rate": 5.7468341708542714e-05, - "loss": 5.297, - "step": 42835 - }, - { - "epoch": 22.339504563233376, - "grad_norm": 1.5463917255401611, - "learning_rate": 5.7467336683417085e-05, - "loss": 5.2, - "step": 42836 - }, - { - "epoch": 22.340026075619296, - "grad_norm": 1.484580636024475, - "learning_rate": 5.746633165829146e-05, - "loss": 4.3636, - "step": 42837 - }, - { - "epoch": 22.340547588005215, - "grad_norm": 1.5628561973571777, - "learning_rate": 5.746532663316583e-05, - "loss": 5.5169, - "step": 42838 - }, - { - "epoch": 22.341069100391135, - "grad_norm": 1.4254413843154907, - "learning_rate": 5.7464321608040205e-05, - "loss": 5.0598, - "step": 42839 - }, - { - "epoch": 22.341590612777054, - "grad_norm": 1.5039458274841309, - "learning_rate": 5.7463316582914576e-05, - "loss": 5.3165, - "step": 42840 - }, - { - "epoch": 22.342112125162974, - "grad_norm": 1.5416696071624756, - "learning_rate": 5.7462311557788953e-05, - "loss": 5.2756, - "step": 42841 - }, - { - "epoch": 22.342633637548893, - "grad_norm": 1.6005851030349731, - "learning_rate": 5.746130653266332e-05, - "loss": 5.2146, - "step": 42842 - }, - { - "epoch": 22.343155149934812, - "grad_norm": 1.5308830738067627, - "learning_rate": 5.7460301507537696e-05, - "loss": 5.7603, - "step": 42843 - }, - { - "epoch": 22.343676662320732, - "grad_norm": 1.510621190071106, - "learning_rate": 5.745929648241206e-05, - "loss": 5.3567, - "step": 42844 - }, - { - "epoch": 22.344198174706648, - "grad_norm": 1.4573752880096436, - "learning_rate": 5.745829145728644e-05, - "loss": 5.563, - "step": 42845 - }, - { - "epoch": 22.344719687092567, - "grad_norm": 1.5491293668746948, - "learning_rate": 5.745728643216081e-05, - "loss": 5.2459, - "step": 42846 - }, - { - "epoch": 22.345241199478487, - "grad_norm": 1.5386322736740112, - "learning_rate": 5.7456281407035187e-05, - "loss": 4.7565, - "step": 42847 - }, - { - "epoch": 22.345762711864406, - "grad_norm": 1.685471773147583, - "learning_rate": 5.745527638190955e-05, - "loss": 5.2997, - "step": 42848 - }, - { - "epoch": 22.346284224250326, - "grad_norm": 1.5531773567199707, - "learning_rate": 5.7454271356783915e-05, - "loss": 5.0679, - "step": 42849 - }, - { - "epoch": 22.346805736636245, - "grad_norm": 1.622462511062622, - "learning_rate": 5.745326633165829e-05, - "loss": 5.4753, - "step": 42850 - }, - { - "epoch": 22.347327249022165, - "grad_norm": 1.5963082313537598, - "learning_rate": 5.7452261306532664e-05, - "loss": 5.0217, - "step": 42851 - }, - { - "epoch": 22.347848761408084, - "grad_norm": 1.4886051416397095, - "learning_rate": 5.745125628140704e-05, - "loss": 5.4631, - "step": 42852 - }, - { - "epoch": 22.348370273794004, - "grad_norm": 1.5990064144134521, - "learning_rate": 5.7450251256281406e-05, - "loss": 5.1362, - "step": 42853 - }, - { - "epoch": 22.348891786179923, - "grad_norm": 1.575565218925476, - "learning_rate": 5.7449246231155784e-05, - "loss": 5.2738, - "step": 42854 - }, - { - "epoch": 22.349413298565842, - "grad_norm": 1.4949923753738403, - "learning_rate": 5.744824120603015e-05, - "loss": 5.3391, - "step": 42855 - }, - { - "epoch": 22.34993481095176, - "grad_norm": 1.6351250410079956, - "learning_rate": 5.7447236180904526e-05, - "loss": 4.8036, - "step": 42856 - }, - { - "epoch": 22.350456323337678, - "grad_norm": 1.4721981287002563, - "learning_rate": 5.74462311557789e-05, - "loss": 5.2592, - "step": 42857 - }, - { - "epoch": 22.350977835723597, - "grad_norm": 1.525246024131775, - "learning_rate": 5.7445226130653275e-05, - "loss": 4.9373, - "step": 42858 - }, - { - "epoch": 22.351499348109517, - "grad_norm": 1.607305645942688, - "learning_rate": 5.744422110552764e-05, - "loss": 5.5217, - "step": 42859 - }, - { - "epoch": 22.352020860495436, - "grad_norm": 1.5342620611190796, - "learning_rate": 5.744321608040202e-05, - "loss": 5.37, - "step": 42860 - }, - { - "epoch": 22.352542372881356, - "grad_norm": 1.6928009986877441, - "learning_rate": 5.744221105527639e-05, - "loss": 5.5065, - "step": 42861 - }, - { - "epoch": 22.353063885267275, - "grad_norm": 1.533974289894104, - "learning_rate": 5.744120603015075e-05, - "loss": 5.3807, - "step": 42862 - }, - { - "epoch": 22.353585397653195, - "grad_norm": 1.5266495943069458, - "learning_rate": 5.744020100502513e-05, - "loss": 5.4767, - "step": 42863 - }, - { - "epoch": 22.354106910039114, - "grad_norm": 1.6541231870651245, - "learning_rate": 5.7439195979899494e-05, - "loss": 4.9945, - "step": 42864 - }, - { - "epoch": 22.354628422425034, - "grad_norm": 1.5179107189178467, - "learning_rate": 5.743819095477387e-05, - "loss": 5.4464, - "step": 42865 - }, - { - "epoch": 22.355149934810953, - "grad_norm": 1.6335253715515137, - "learning_rate": 5.743718592964824e-05, - "loss": 4.8241, - "step": 42866 - }, - { - "epoch": 22.355671447196872, - "grad_norm": 1.492908239364624, - "learning_rate": 5.743618090452262e-05, - "loss": 5.2071, - "step": 42867 - }, - { - "epoch": 22.35619295958279, - "grad_norm": 1.5183640718460083, - "learning_rate": 5.7435175879396985e-05, - "loss": 5.0362, - "step": 42868 - }, - { - "epoch": 22.356714471968708, - "grad_norm": 1.4831368923187256, - "learning_rate": 5.743417085427136e-05, - "loss": 5.014, - "step": 42869 - }, - { - "epoch": 22.357235984354627, - "grad_norm": 1.5525263547897339, - "learning_rate": 5.743316582914573e-05, - "loss": 5.4015, - "step": 42870 - }, - { - "epoch": 22.357757496740547, - "grad_norm": 1.5319117307662964, - "learning_rate": 5.7432160804020105e-05, - "loss": 5.3297, - "step": 42871 - }, - { - "epoch": 22.358279009126466, - "grad_norm": 1.4968235492706299, - "learning_rate": 5.7431155778894476e-05, - "loss": 5.3102, - "step": 42872 - }, - { - "epoch": 22.358800521512386, - "grad_norm": 1.566992163658142, - "learning_rate": 5.7430150753768854e-05, - "loss": 4.7126, - "step": 42873 - }, - { - "epoch": 22.359322033898305, - "grad_norm": 1.46524977684021, - "learning_rate": 5.742914572864322e-05, - "loss": 5.1122, - "step": 42874 - }, - { - "epoch": 22.359843546284225, - "grad_norm": 1.51374351978302, - "learning_rate": 5.742814070351758e-05, - "loss": 5.1658, - "step": 42875 - }, - { - "epoch": 22.360365058670144, - "grad_norm": 1.5765234231948853, - "learning_rate": 5.742713567839196e-05, - "loss": 5.3228, - "step": 42876 - }, - { - "epoch": 22.360886571056064, - "grad_norm": 1.4952105283737183, - "learning_rate": 5.742613065326633e-05, - "loss": 5.4397, - "step": 42877 - }, - { - "epoch": 22.361408083441983, - "grad_norm": 1.5856044292449951, - "learning_rate": 5.742512562814071e-05, - "loss": 5.305, - "step": 42878 - }, - { - "epoch": 22.361929595827903, - "grad_norm": 1.5251072645187378, - "learning_rate": 5.7424120603015074e-05, - "loss": 4.8119, - "step": 42879 - }, - { - "epoch": 22.36245110821382, - "grad_norm": 1.482258677482605, - "learning_rate": 5.742311557788945e-05, - "loss": 5.3897, - "step": 42880 - }, - { - "epoch": 22.362972620599738, - "grad_norm": 1.4763643741607666, - "learning_rate": 5.742211055276382e-05, - "loss": 5.4899, - "step": 42881 - }, - { - "epoch": 22.363494132985657, - "grad_norm": 1.5505344867706299, - "learning_rate": 5.74211055276382e-05, - "loss": 4.772, - "step": 42882 - }, - { - "epoch": 22.364015645371577, - "grad_norm": 1.6290355920791626, - "learning_rate": 5.7420100502512565e-05, - "loss": 5.4754, - "step": 42883 - }, - { - "epoch": 22.364537157757496, - "grad_norm": 1.5098071098327637, - "learning_rate": 5.741909547738694e-05, - "loss": 5.3604, - "step": 42884 - }, - { - "epoch": 22.365058670143416, - "grad_norm": 1.5537015199661255, - "learning_rate": 5.741809045226131e-05, - "loss": 4.7007, - "step": 42885 - }, - { - "epoch": 22.365580182529335, - "grad_norm": 1.5832258462905884, - "learning_rate": 5.7417085427135685e-05, - "loss": 4.6154, - "step": 42886 - }, - { - "epoch": 22.366101694915255, - "grad_norm": 1.550389289855957, - "learning_rate": 5.7416080402010056e-05, - "loss": 5.1796, - "step": 42887 - }, - { - "epoch": 22.366623207301174, - "grad_norm": 1.5595495700836182, - "learning_rate": 5.741507537688442e-05, - "loss": 5.2152, - "step": 42888 - }, - { - "epoch": 22.367144719687094, - "grad_norm": 1.5427892208099365, - "learning_rate": 5.74140703517588e-05, - "loss": 5.2097, - "step": 42889 - }, - { - "epoch": 22.367666232073013, - "grad_norm": 1.5164943933486938, - "learning_rate": 5.741306532663316e-05, - "loss": 5.34, - "step": 42890 - }, - { - "epoch": 22.368187744458933, - "grad_norm": 1.6050875186920166, - "learning_rate": 5.741206030150754e-05, - "loss": 5.0682, - "step": 42891 - }, - { - "epoch": 22.36870925684485, - "grad_norm": 1.515204906463623, - "learning_rate": 5.741105527638191e-05, - "loss": 5.0972, - "step": 42892 - }, - { - "epoch": 22.369230769230768, - "grad_norm": 1.5969642400741577, - "learning_rate": 5.741005025125629e-05, - "loss": 4.9931, - "step": 42893 - }, - { - "epoch": 22.369752281616687, - "grad_norm": 1.5290542840957642, - "learning_rate": 5.740904522613065e-05, - "loss": 5.3126, - "step": 42894 - }, - { - "epoch": 22.370273794002607, - "grad_norm": 1.4441026449203491, - "learning_rate": 5.740804020100503e-05, - "loss": 5.3445, - "step": 42895 - }, - { - "epoch": 22.370795306388526, - "grad_norm": 1.6258403062820435, - "learning_rate": 5.7407035175879395e-05, - "loss": 5.2518, - "step": 42896 - }, - { - "epoch": 22.371316818774446, - "grad_norm": 1.612577199935913, - "learning_rate": 5.740603015075377e-05, - "loss": 5.3864, - "step": 42897 - }, - { - "epoch": 22.371838331160365, - "grad_norm": 1.6264102458953857, - "learning_rate": 5.7405025125628144e-05, - "loss": 5.149, - "step": 42898 - }, - { - "epoch": 22.372359843546285, - "grad_norm": 1.521488904953003, - "learning_rate": 5.740402010050252e-05, - "loss": 5.5002, - "step": 42899 - }, - { - "epoch": 22.372881355932204, - "grad_norm": 1.628991961479187, - "learning_rate": 5.7403015075376886e-05, - "loss": 5.3411, - "step": 42900 - }, - { - "epoch": 22.373402868318124, - "grad_norm": 1.5440915822982788, - "learning_rate": 5.740201005025125e-05, - "loss": 5.3417, - "step": 42901 - }, - { - "epoch": 22.373924380704043, - "grad_norm": 1.5288612842559814, - "learning_rate": 5.740100502512563e-05, - "loss": 4.8967, - "step": 42902 - }, - { - "epoch": 22.374445893089963, - "grad_norm": 1.5228627920150757, - "learning_rate": 5.74e-05, - "loss": 5.184, - "step": 42903 - }, - { - "epoch": 22.37496740547588, - "grad_norm": 1.5160425901412964, - "learning_rate": 5.739899497487438e-05, - "loss": 5.5055, - "step": 42904 - }, - { - "epoch": 22.375488917861798, - "grad_norm": 1.5593863725662231, - "learning_rate": 5.739798994974874e-05, - "loss": 4.9926, - "step": 42905 - }, - { - "epoch": 22.376010430247717, - "grad_norm": 1.5880602598190308, - "learning_rate": 5.739698492462312e-05, - "loss": 5.5848, - "step": 42906 - }, - { - "epoch": 22.376531942633637, - "grad_norm": 1.6088818311691284, - "learning_rate": 5.739597989949749e-05, - "loss": 5.259, - "step": 42907 - }, - { - "epoch": 22.377053455019556, - "grad_norm": 1.5144789218902588, - "learning_rate": 5.739497487437187e-05, - "loss": 5.4824, - "step": 42908 - }, - { - "epoch": 22.377574967405476, - "grad_norm": 1.4692318439483643, - "learning_rate": 5.739396984924623e-05, - "loss": 5.177, - "step": 42909 - }, - { - "epoch": 22.378096479791395, - "grad_norm": 1.5088121891021729, - "learning_rate": 5.739296482412061e-05, - "loss": 4.8333, - "step": 42910 - }, - { - "epoch": 22.378617992177315, - "grad_norm": 1.4833545684814453, - "learning_rate": 5.7391959798994974e-05, - "loss": 5.33, - "step": 42911 - }, - { - "epoch": 22.379139504563234, - "grad_norm": 1.6378998756408691, - "learning_rate": 5.739095477386935e-05, - "loss": 5.0495, - "step": 42912 - }, - { - "epoch": 22.379661016949154, - "grad_norm": 1.4974472522735596, - "learning_rate": 5.738994974874372e-05, - "loss": 5.2579, - "step": 42913 - }, - { - "epoch": 22.380182529335073, - "grad_norm": 1.5015650987625122, - "learning_rate": 5.738894472361809e-05, - "loss": 5.3076, - "step": 42914 - }, - { - "epoch": 22.380704041720993, - "grad_norm": 1.544075608253479, - "learning_rate": 5.7387939698492465e-05, - "loss": 5.154, - "step": 42915 - }, - { - "epoch": 22.38122555410691, - "grad_norm": 1.5891059637069702, - "learning_rate": 5.738693467336683e-05, - "loss": 5.5685, - "step": 42916 - }, - { - "epoch": 22.381747066492828, - "grad_norm": 1.5550227165222168, - "learning_rate": 5.738592964824121e-05, - "loss": 4.8232, - "step": 42917 - }, - { - "epoch": 22.382268578878747, - "grad_norm": 1.5777685642242432, - "learning_rate": 5.738492462311558e-05, - "loss": 4.8894, - "step": 42918 - }, - { - "epoch": 22.382790091264667, - "grad_norm": 1.4557856321334839, - "learning_rate": 5.7383919597989956e-05, - "loss": 5.3715, - "step": 42919 - }, - { - "epoch": 22.383311603650586, - "grad_norm": 1.5824731588363647, - "learning_rate": 5.738291457286432e-05, - "loss": 5.6409, - "step": 42920 - }, - { - "epoch": 22.383833116036506, - "grad_norm": 1.6751708984375, - "learning_rate": 5.73819095477387e-05, - "loss": 4.6927, - "step": 42921 - }, - { - "epoch": 22.384354628422425, - "grad_norm": 1.4853863716125488, - "learning_rate": 5.738090452261306e-05, - "loss": 5.4917, - "step": 42922 - }, - { - "epoch": 22.384876140808345, - "grad_norm": 1.5500450134277344, - "learning_rate": 5.737989949748744e-05, - "loss": 5.5968, - "step": 42923 - }, - { - "epoch": 22.385397653194264, - "grad_norm": 1.614044427871704, - "learning_rate": 5.737889447236181e-05, - "loss": 5.1832, - "step": 42924 - }, - { - "epoch": 22.385919165580184, - "grad_norm": 1.578064203262329, - "learning_rate": 5.737788944723619e-05, - "loss": 4.5597, - "step": 42925 - }, - { - "epoch": 22.386440677966103, - "grad_norm": 1.491530179977417, - "learning_rate": 5.7376884422110554e-05, - "loss": 5.5475, - "step": 42926 - }, - { - "epoch": 22.386962190352023, - "grad_norm": 1.5222409963607788, - "learning_rate": 5.737587939698493e-05, - "loss": 5.2512, - "step": 42927 - }, - { - "epoch": 22.38748370273794, - "grad_norm": 1.5096936225891113, - "learning_rate": 5.73748743718593e-05, - "loss": 5.493, - "step": 42928 - }, - { - "epoch": 22.388005215123858, - "grad_norm": 1.3845131397247314, - "learning_rate": 5.737386934673367e-05, - "loss": 4.8905, - "step": 42929 - }, - { - "epoch": 22.388526727509777, - "grad_norm": 1.4581518173217773, - "learning_rate": 5.7372864321608045e-05, - "loss": 4.7885, - "step": 42930 - }, - { - "epoch": 22.389048239895697, - "grad_norm": 1.5288833379745483, - "learning_rate": 5.737185929648241e-05, - "loss": 5.0267, - "step": 42931 - }, - { - "epoch": 22.389569752281616, - "grad_norm": 1.5153896808624268, - "learning_rate": 5.737085427135679e-05, - "loss": 5.0125, - "step": 42932 - }, - { - "epoch": 22.390091264667536, - "grad_norm": 1.4864826202392578, - "learning_rate": 5.736984924623116e-05, - "loss": 5.3303, - "step": 42933 - }, - { - "epoch": 22.390612777053455, - "grad_norm": 1.44159734249115, - "learning_rate": 5.7368844221105536e-05, - "loss": 5.3555, - "step": 42934 - }, - { - "epoch": 22.391134289439375, - "grad_norm": 1.5137139558792114, - "learning_rate": 5.73678391959799e-05, - "loss": 4.8672, - "step": 42935 - }, - { - "epoch": 22.391655801825294, - "grad_norm": 1.5595335960388184, - "learning_rate": 5.736683417085428e-05, - "loss": 5.2208, - "step": 42936 - }, - { - "epoch": 22.392177314211214, - "grad_norm": 1.5753421783447266, - "learning_rate": 5.736582914572864e-05, - "loss": 4.6185, - "step": 42937 - }, - { - "epoch": 22.392698826597133, - "grad_norm": 1.4661716222763062, - "learning_rate": 5.736482412060302e-05, - "loss": 5.1462, - "step": 42938 - }, - { - "epoch": 22.39322033898305, - "grad_norm": 1.6038013696670532, - "learning_rate": 5.736381909547739e-05, - "loss": 5.2655, - "step": 42939 - }, - { - "epoch": 22.39374185136897, - "grad_norm": 1.5865533351898193, - "learning_rate": 5.736281407035177e-05, - "loss": 5.2245, - "step": 42940 - }, - { - "epoch": 22.394263363754888, - "grad_norm": 1.623478889465332, - "learning_rate": 5.736180904522613e-05, - "loss": 5.2023, - "step": 42941 - }, - { - "epoch": 22.394784876140807, - "grad_norm": 1.6189968585968018, - "learning_rate": 5.73608040201005e-05, - "loss": 4.8899, - "step": 42942 - }, - { - "epoch": 22.395306388526727, - "grad_norm": 1.5673750638961792, - "learning_rate": 5.7359798994974875e-05, - "loss": 5.3843, - "step": 42943 - }, - { - "epoch": 22.395827900912646, - "grad_norm": 1.4984862804412842, - "learning_rate": 5.7358793969849246e-05, - "loss": 5.6647, - "step": 42944 - }, - { - "epoch": 22.396349413298566, - "grad_norm": 1.6237870454788208, - "learning_rate": 5.7357788944723624e-05, - "loss": 4.4427, - "step": 42945 - }, - { - "epoch": 22.396870925684485, - "grad_norm": 1.6366217136383057, - "learning_rate": 5.735678391959799e-05, - "loss": 4.952, - "step": 42946 - }, - { - "epoch": 22.397392438070405, - "grad_norm": 1.5805312395095825, - "learning_rate": 5.7355778894472366e-05, - "loss": 5.1587, - "step": 42947 - }, - { - "epoch": 22.397913950456324, - "grad_norm": 1.5899157524108887, - "learning_rate": 5.735477386934674e-05, - "loss": 4.9688, - "step": 42948 - }, - { - "epoch": 22.398435462842244, - "grad_norm": 1.6820751428604126, - "learning_rate": 5.7353768844221115e-05, - "loss": 5.0211, - "step": 42949 - }, - { - "epoch": 22.398956975228163, - "grad_norm": 1.5500439405441284, - "learning_rate": 5.735276381909548e-05, - "loss": 5.35, - "step": 42950 - }, - { - "epoch": 22.39947848761408, - "grad_norm": 1.4282385110855103, - "learning_rate": 5.735175879396986e-05, - "loss": 5.0786, - "step": 42951 - }, - { - "epoch": 22.4, - "grad_norm": 1.5523983240127563, - "learning_rate": 5.735075376884422e-05, - "loss": 5.4911, - "step": 42952 - }, - { - "epoch": 22.400521512385918, - "grad_norm": 1.5324007272720337, - "learning_rate": 5.73497487437186e-05, - "loss": 5.3575, - "step": 42953 - }, - { - "epoch": 22.401043024771838, - "grad_norm": 1.5103315114974976, - "learning_rate": 5.734874371859297e-05, - "loss": 5.5573, - "step": 42954 - }, - { - "epoch": 22.401564537157757, - "grad_norm": 1.4810384511947632, - "learning_rate": 5.7347738693467335e-05, - "loss": 5.2345, - "step": 42955 - }, - { - "epoch": 22.402086049543676, - "grad_norm": 1.564318060874939, - "learning_rate": 5.734673366834171e-05, - "loss": 4.9503, - "step": 42956 - }, - { - "epoch": 22.402607561929596, - "grad_norm": 1.509112000465393, - "learning_rate": 5.734572864321608e-05, - "loss": 5.2697, - "step": 42957 - }, - { - "epoch": 22.403129074315515, - "grad_norm": 1.582565426826477, - "learning_rate": 5.7344723618090455e-05, - "loss": 5.5892, - "step": 42958 - }, - { - "epoch": 22.403650586701435, - "grad_norm": 1.498915672302246, - "learning_rate": 5.7343718592964826e-05, - "loss": 5.299, - "step": 42959 - }, - { - "epoch": 22.404172099087354, - "grad_norm": 1.4977563619613647, - "learning_rate": 5.7342713567839203e-05, - "loss": 5.4458, - "step": 42960 - }, - { - "epoch": 22.404693611473274, - "grad_norm": 1.560768961906433, - "learning_rate": 5.734170854271357e-05, - "loss": 5.3105, - "step": 42961 - }, - { - "epoch": 22.405215123859193, - "grad_norm": 1.5477086305618286, - "learning_rate": 5.7340703517587946e-05, - "loss": 5.4601, - "step": 42962 - }, - { - "epoch": 22.40573663624511, - "grad_norm": 1.5421011447906494, - "learning_rate": 5.733969849246231e-05, - "loss": 5.4691, - "step": 42963 - }, - { - "epoch": 22.40625814863103, - "grad_norm": 1.4226195812225342, - "learning_rate": 5.733869346733669e-05, - "loss": 5.7444, - "step": 42964 - }, - { - "epoch": 22.406779661016948, - "grad_norm": 1.4875434637069702, - "learning_rate": 5.733768844221106e-05, - "loss": 5.3218, - "step": 42965 - }, - { - "epoch": 22.407301173402868, - "grad_norm": 1.6513762474060059, - "learning_rate": 5.7336683417085437e-05, - "loss": 5.2812, - "step": 42966 - }, - { - "epoch": 22.407822685788787, - "grad_norm": 1.5039398670196533, - "learning_rate": 5.73356783919598e-05, - "loss": 5.3846, - "step": 42967 - }, - { - "epoch": 22.408344198174706, - "grad_norm": 1.6373463869094849, - "learning_rate": 5.7334673366834165e-05, - "loss": 5.2182, - "step": 42968 - }, - { - "epoch": 22.408865710560626, - "grad_norm": 1.5168533325195312, - "learning_rate": 5.733366834170854e-05, - "loss": 5.1719, - "step": 42969 - }, - { - "epoch": 22.409387222946545, - "grad_norm": 1.510921597480774, - "learning_rate": 5.7332663316582914e-05, - "loss": 5.241, - "step": 42970 - }, - { - "epoch": 22.409908735332465, - "grad_norm": 1.5986089706420898, - "learning_rate": 5.733165829145729e-05, - "loss": 5.2137, - "step": 42971 - }, - { - "epoch": 22.410430247718384, - "grad_norm": 1.586984634399414, - "learning_rate": 5.7330653266331656e-05, - "loss": 4.6787, - "step": 42972 - }, - { - "epoch": 22.410951760104304, - "grad_norm": 1.56332266330719, - "learning_rate": 5.7329648241206034e-05, - "loss": 4.9515, - "step": 42973 - }, - { - "epoch": 22.411473272490223, - "grad_norm": 1.6697046756744385, - "learning_rate": 5.7328643216080405e-05, - "loss": 4.6514, - "step": 42974 - }, - { - "epoch": 22.41199478487614, - "grad_norm": 1.5866037607192993, - "learning_rate": 5.732763819095478e-05, - "loss": 5.0813, - "step": 42975 - }, - { - "epoch": 22.41251629726206, - "grad_norm": 1.5401126146316528, - "learning_rate": 5.732663316582915e-05, - "loss": 5.526, - "step": 42976 - }, - { - "epoch": 22.413037809647978, - "grad_norm": 1.5919543504714966, - "learning_rate": 5.7325628140703525e-05, - "loss": 5.5045, - "step": 42977 - }, - { - "epoch": 22.413559322033898, - "grad_norm": 1.5107676982879639, - "learning_rate": 5.732462311557789e-05, - "loss": 5.4299, - "step": 42978 - }, - { - "epoch": 22.414080834419817, - "grad_norm": 1.5541752576828003, - "learning_rate": 5.732361809045227e-05, - "loss": 5.1719, - "step": 42979 - }, - { - "epoch": 22.414602346805736, - "grad_norm": 1.5348137617111206, - "learning_rate": 5.732261306532664e-05, - "loss": 5.1652, - "step": 42980 - }, - { - "epoch": 22.415123859191656, - "grad_norm": 1.5209633111953735, - "learning_rate": 5.7321608040201e-05, - "loss": 5.2305, - "step": 42981 - }, - { - "epoch": 22.415645371577575, - "grad_norm": 1.4900665283203125, - "learning_rate": 5.732060301507538e-05, - "loss": 5.0474, - "step": 42982 - }, - { - "epoch": 22.416166883963495, - "grad_norm": 1.4426689147949219, - "learning_rate": 5.7319597989949744e-05, - "loss": 5.84, - "step": 42983 - }, - { - "epoch": 22.416688396349414, - "grad_norm": 1.5433100461959839, - "learning_rate": 5.731859296482412e-05, - "loss": 5.419, - "step": 42984 - }, - { - "epoch": 22.417209908735334, - "grad_norm": 1.5430413484573364, - "learning_rate": 5.731758793969849e-05, - "loss": 5.3476, - "step": 42985 - }, - { - "epoch": 22.417731421121253, - "grad_norm": 1.5598622560501099, - "learning_rate": 5.731658291457287e-05, - "loss": 5.0451, - "step": 42986 - }, - { - "epoch": 22.41825293350717, - "grad_norm": 1.5985435247421265, - "learning_rate": 5.7315577889447235e-05, - "loss": 5.3728, - "step": 42987 - }, - { - "epoch": 22.41877444589309, - "grad_norm": 1.5666905641555786, - "learning_rate": 5.731457286432161e-05, - "loss": 5.452, - "step": 42988 - }, - { - "epoch": 22.419295958279008, - "grad_norm": 1.497247338294983, - "learning_rate": 5.731356783919598e-05, - "loss": 5.391, - "step": 42989 - }, - { - "epoch": 22.419817470664928, - "grad_norm": 1.470530390739441, - "learning_rate": 5.7312562814070355e-05, - "loss": 5.3001, - "step": 42990 - }, - { - "epoch": 22.420338983050847, - "grad_norm": 1.6626722812652588, - "learning_rate": 5.7311557788944726e-05, - "loss": 5.0425, - "step": 42991 - }, - { - "epoch": 22.420860495436767, - "grad_norm": 1.5917035341262817, - "learning_rate": 5.7310552763819104e-05, - "loss": 5.3316, - "step": 42992 - }, - { - "epoch": 22.421382007822686, - "grad_norm": 1.630299687385559, - "learning_rate": 5.730954773869347e-05, - "loss": 5.0101, - "step": 42993 - }, - { - "epoch": 22.421903520208605, - "grad_norm": 1.5384160280227661, - "learning_rate": 5.730854271356784e-05, - "loss": 5.2237, - "step": 42994 - }, - { - "epoch": 22.422425032594525, - "grad_norm": 1.5404452085494995, - "learning_rate": 5.730753768844222e-05, - "loss": 5.5181, - "step": 42995 - }, - { - "epoch": 22.422946544980444, - "grad_norm": 1.5374116897583008, - "learning_rate": 5.730653266331658e-05, - "loss": 5.3683, - "step": 42996 - }, - { - "epoch": 22.423468057366364, - "grad_norm": 1.521562099456787, - "learning_rate": 5.730552763819096e-05, - "loss": 4.9144, - "step": 42997 - }, - { - "epoch": 22.423989569752283, - "grad_norm": 1.6409480571746826, - "learning_rate": 5.7304522613065324e-05, - "loss": 5.4403, - "step": 42998 - }, - { - "epoch": 22.4245110821382, - "grad_norm": 1.4521045684814453, - "learning_rate": 5.73035175879397e-05, - "loss": 5.3833, - "step": 42999 - }, - { - "epoch": 22.42503259452412, - "grad_norm": 1.6786061525344849, - "learning_rate": 5.730251256281407e-05, - "loss": 4.5115, - "step": 43000 - }, - { - "epoch": 22.425554106910038, - "grad_norm": 1.4974408149719238, - "learning_rate": 5.730150753768845e-05, - "loss": 5.3547, - "step": 43001 - }, - { - "epoch": 22.426075619295958, - "grad_norm": 1.3582251071929932, - "learning_rate": 5.7300502512562815e-05, - "loss": 5.0592, - "step": 43002 - }, - { - "epoch": 22.426597131681877, - "grad_norm": 1.5153381824493408, - "learning_rate": 5.729949748743719e-05, - "loss": 5.1461, - "step": 43003 - }, - { - "epoch": 22.427118644067797, - "grad_norm": 1.5432884693145752, - "learning_rate": 5.729849246231156e-05, - "loss": 5.5511, - "step": 43004 - }, - { - "epoch": 22.427640156453716, - "grad_norm": 1.4955365657806396, - "learning_rate": 5.7297487437185935e-05, - "loss": 5.46, - "step": 43005 - }, - { - "epoch": 22.428161668839635, - "grad_norm": 1.6270861625671387, - "learning_rate": 5.7296482412060306e-05, - "loss": 5.0261, - "step": 43006 - }, - { - "epoch": 22.428683181225555, - "grad_norm": 1.5490330457687378, - "learning_rate": 5.729547738693467e-05, - "loss": 5.3966, - "step": 43007 - }, - { - "epoch": 22.429204693611474, - "grad_norm": 1.567928433418274, - "learning_rate": 5.729447236180905e-05, - "loss": 5.3603, - "step": 43008 - }, - { - "epoch": 22.429726205997394, - "grad_norm": 1.6198670864105225, - "learning_rate": 5.729346733668341e-05, - "loss": 4.7273, - "step": 43009 - }, - { - "epoch": 22.430247718383313, - "grad_norm": 1.5863292217254639, - "learning_rate": 5.729246231155779e-05, - "loss": 5.4659, - "step": 43010 - }, - { - "epoch": 22.43076923076923, - "grad_norm": 1.5651425123214722, - "learning_rate": 5.729145728643216e-05, - "loss": 5.0377, - "step": 43011 - }, - { - "epoch": 22.43129074315515, - "grad_norm": 1.5528302192687988, - "learning_rate": 5.729045226130654e-05, - "loss": 5.4088, - "step": 43012 - }, - { - "epoch": 22.431812255541068, - "grad_norm": 1.4889565706253052, - "learning_rate": 5.72894472361809e-05, - "loss": 5.467, - "step": 43013 - }, - { - "epoch": 22.432333767926988, - "grad_norm": 1.5110816955566406, - "learning_rate": 5.728844221105528e-05, - "loss": 5.2857, - "step": 43014 - }, - { - "epoch": 22.432855280312907, - "grad_norm": 1.5241477489471436, - "learning_rate": 5.728743718592965e-05, - "loss": 5.3819, - "step": 43015 - }, - { - "epoch": 22.433376792698827, - "grad_norm": 1.5583972930908203, - "learning_rate": 5.728643216080403e-05, - "loss": 5.3085, - "step": 43016 - }, - { - "epoch": 22.433898305084746, - "grad_norm": 1.5643582344055176, - "learning_rate": 5.7285427135678394e-05, - "loss": 5.4384, - "step": 43017 - }, - { - "epoch": 22.434419817470665, - "grad_norm": 1.5266704559326172, - "learning_rate": 5.728442211055277e-05, - "loss": 5.442, - "step": 43018 - }, - { - "epoch": 22.434941329856585, - "grad_norm": 1.509541392326355, - "learning_rate": 5.7283417085427136e-05, - "loss": 5.2518, - "step": 43019 - }, - { - "epoch": 22.435462842242504, - "grad_norm": 1.5320398807525635, - "learning_rate": 5.7282412060301514e-05, - "loss": 5.1012, - "step": 43020 - }, - { - "epoch": 22.435984354628424, - "grad_norm": 1.4879629611968994, - "learning_rate": 5.7281407035175885e-05, - "loss": 5.4898, - "step": 43021 - }, - { - "epoch": 22.43650586701434, - "grad_norm": 1.6283085346221924, - "learning_rate": 5.728040201005025e-05, - "loss": 4.9876, - "step": 43022 - }, - { - "epoch": 22.43702737940026, - "grad_norm": 1.5218249559402466, - "learning_rate": 5.727939698492463e-05, - "loss": 5.3664, - "step": 43023 - }, - { - "epoch": 22.43754889178618, - "grad_norm": 1.5896350145339966, - "learning_rate": 5.727839195979899e-05, - "loss": 5.276, - "step": 43024 - }, - { - "epoch": 22.438070404172098, - "grad_norm": 1.5657119750976562, - "learning_rate": 5.727738693467337e-05, - "loss": 5.306, - "step": 43025 - }, - { - "epoch": 22.438591916558018, - "grad_norm": 1.5224825143814087, - "learning_rate": 5.727638190954774e-05, - "loss": 5.342, - "step": 43026 - }, - { - "epoch": 22.439113428943937, - "grad_norm": 1.5480314493179321, - "learning_rate": 5.727537688442212e-05, - "loss": 3.8716, - "step": 43027 - }, - { - "epoch": 22.439634941329857, - "grad_norm": 1.5204529762268066, - "learning_rate": 5.727437185929648e-05, - "loss": 5.4225, - "step": 43028 - }, - { - "epoch": 22.440156453715776, - "grad_norm": 1.6223480701446533, - "learning_rate": 5.727336683417086e-05, - "loss": 5.0503, - "step": 43029 - }, - { - "epoch": 22.440677966101696, - "grad_norm": 1.5132330656051636, - "learning_rate": 5.7272361809045225e-05, - "loss": 5.0694, - "step": 43030 - }, - { - "epoch": 22.441199478487615, - "grad_norm": 1.5760234594345093, - "learning_rate": 5.72713567839196e-05, - "loss": 4.8916, - "step": 43031 - }, - { - "epoch": 22.441720990873534, - "grad_norm": 1.4590411186218262, - "learning_rate": 5.7270351758793973e-05, - "loss": 4.6364, - "step": 43032 - }, - { - "epoch": 22.442242503259454, - "grad_norm": 1.6879725456237793, - "learning_rate": 5.726934673366835e-05, - "loss": 5.4472, - "step": 43033 - }, - { - "epoch": 22.442764015645373, - "grad_norm": 1.487238883972168, - "learning_rate": 5.7268341708542715e-05, - "loss": 4.9121, - "step": 43034 - }, - { - "epoch": 22.44328552803129, - "grad_norm": 1.806837797164917, - "learning_rate": 5.7267336683417087e-05, - "loss": 5.123, - "step": 43035 - }, - { - "epoch": 22.44380704041721, - "grad_norm": 1.6022913455963135, - "learning_rate": 5.7266331658291464e-05, - "loss": 4.8379, - "step": 43036 - }, - { - "epoch": 22.444328552803128, - "grad_norm": 1.5299046039581299, - "learning_rate": 5.726532663316583e-05, - "loss": 5.0465, - "step": 43037 - }, - { - "epoch": 22.444850065189048, - "grad_norm": 1.4908334016799927, - "learning_rate": 5.7264321608040206e-05, - "loss": 5.672, - "step": 43038 - }, - { - "epoch": 22.445371577574967, - "grad_norm": 1.4566712379455566, - "learning_rate": 5.726331658291457e-05, - "loss": 5.5562, - "step": 43039 - }, - { - "epoch": 22.445893089960887, - "grad_norm": 1.5142180919647217, - "learning_rate": 5.726231155778895e-05, - "loss": 5.3831, - "step": 43040 - }, - { - "epoch": 22.446414602346806, - "grad_norm": 1.5296202898025513, - "learning_rate": 5.726130653266332e-05, - "loss": 5.6536, - "step": 43041 - }, - { - "epoch": 22.446936114732726, - "grad_norm": 1.5888808965682983, - "learning_rate": 5.72603015075377e-05, - "loss": 4.6286, - "step": 43042 - }, - { - "epoch": 22.447457627118645, - "grad_norm": 1.6591140031814575, - "learning_rate": 5.725929648241206e-05, - "loss": 4.5574, - "step": 43043 - }, - { - "epoch": 22.447979139504564, - "grad_norm": 1.4406663179397583, - "learning_rate": 5.725829145728644e-05, - "loss": 5.1034, - "step": 43044 - }, - { - "epoch": 22.448500651890484, - "grad_norm": 1.486299753189087, - "learning_rate": 5.7257286432160804e-05, - "loss": 5.3618, - "step": 43045 - }, - { - "epoch": 22.4490221642764, - "grad_norm": 1.5371942520141602, - "learning_rate": 5.725628140703518e-05, - "loss": 5.0789, - "step": 43046 - }, - { - "epoch": 22.44954367666232, - "grad_norm": 1.413948655128479, - "learning_rate": 5.725527638190955e-05, - "loss": 5.255, - "step": 43047 - }, - { - "epoch": 22.45006518904824, - "grad_norm": 1.5162546634674072, - "learning_rate": 5.725427135678392e-05, - "loss": 4.6927, - "step": 43048 - }, - { - "epoch": 22.45058670143416, - "grad_norm": 1.53749680519104, - "learning_rate": 5.7253266331658295e-05, - "loss": 5.3948, - "step": 43049 - }, - { - "epoch": 22.451108213820078, - "grad_norm": 1.3882874250411987, - "learning_rate": 5.725226130653266e-05, - "loss": 5.6269, - "step": 43050 - }, - { - "epoch": 22.451629726205997, - "grad_norm": 1.4944310188293457, - "learning_rate": 5.725125628140704e-05, - "loss": 5.1606, - "step": 43051 - }, - { - "epoch": 22.452151238591917, - "grad_norm": 1.5099520683288574, - "learning_rate": 5.725025125628141e-05, - "loss": 5.3241, - "step": 43052 - }, - { - "epoch": 22.452672750977836, - "grad_norm": 1.5625684261322021, - "learning_rate": 5.7249246231155786e-05, - "loss": 5.1926, - "step": 43053 - }, - { - "epoch": 22.453194263363756, - "grad_norm": 1.5217818021774292, - "learning_rate": 5.724824120603015e-05, - "loss": 5.3976, - "step": 43054 - }, - { - "epoch": 22.453715775749675, - "grad_norm": 1.533125877380371, - "learning_rate": 5.724723618090453e-05, - "loss": 5.1846, - "step": 43055 - }, - { - "epoch": 22.454237288135594, - "grad_norm": 1.3770337104797363, - "learning_rate": 5.724623115577889e-05, - "loss": 5.3023, - "step": 43056 - }, - { - "epoch": 22.454758800521514, - "grad_norm": 1.5092167854309082, - "learning_rate": 5.724522613065327e-05, - "loss": 5.5155, - "step": 43057 - }, - { - "epoch": 22.45528031290743, - "grad_norm": 1.472259759902954, - "learning_rate": 5.724422110552764e-05, - "loss": 5.2524, - "step": 43058 - }, - { - "epoch": 22.45580182529335, - "grad_norm": 1.5265469551086426, - "learning_rate": 5.724321608040202e-05, - "loss": 5.2307, - "step": 43059 - }, - { - "epoch": 22.45632333767927, - "grad_norm": 1.4518202543258667, - "learning_rate": 5.724221105527638e-05, - "loss": 4.85, - "step": 43060 - }, - { - "epoch": 22.45684485006519, - "grad_norm": 1.7922356128692627, - "learning_rate": 5.7241206030150754e-05, - "loss": 5.3117, - "step": 43061 - }, - { - "epoch": 22.457366362451108, - "grad_norm": 1.5231406688690186, - "learning_rate": 5.724020100502513e-05, - "loss": 5.3571, - "step": 43062 - }, - { - "epoch": 22.457887874837027, - "grad_norm": 1.5115879774093628, - "learning_rate": 5.7239195979899496e-05, - "loss": 4.6838, - "step": 43063 - }, - { - "epoch": 22.458409387222947, - "grad_norm": 1.5500391721725464, - "learning_rate": 5.7238190954773874e-05, - "loss": 5.6006, - "step": 43064 - }, - { - "epoch": 22.458930899608866, - "grad_norm": 1.4544562101364136, - "learning_rate": 5.723718592964824e-05, - "loss": 5.5626, - "step": 43065 - }, - { - "epoch": 22.459452411994786, - "grad_norm": 1.4498063325881958, - "learning_rate": 5.7236180904522616e-05, - "loss": 5.2381, - "step": 43066 - }, - { - "epoch": 22.459973924380705, - "grad_norm": 1.4926817417144775, - "learning_rate": 5.723517587939699e-05, - "loss": 5.476, - "step": 43067 - }, - { - "epoch": 22.460495436766625, - "grad_norm": 1.4490236043930054, - "learning_rate": 5.7234170854271365e-05, - "loss": 5.1975, - "step": 43068 - }, - { - "epoch": 22.461016949152544, - "grad_norm": 1.6239135265350342, - "learning_rate": 5.723316582914573e-05, - "loss": 5.3905, - "step": 43069 - }, - { - "epoch": 22.46153846153846, - "grad_norm": 1.4835656881332397, - "learning_rate": 5.723216080402011e-05, - "loss": 5.5078, - "step": 43070 - }, - { - "epoch": 22.46205997392438, - "grad_norm": 1.5136959552764893, - "learning_rate": 5.723115577889447e-05, - "loss": 4.8098, - "step": 43071 - }, - { - "epoch": 22.4625814863103, - "grad_norm": 1.4116190671920776, - "learning_rate": 5.723015075376885e-05, - "loss": 5.7298, - "step": 43072 - }, - { - "epoch": 22.46310299869622, - "grad_norm": 1.6326230764389038, - "learning_rate": 5.722914572864322e-05, - "loss": 4.9614, - "step": 43073 - }, - { - "epoch": 22.463624511082138, - "grad_norm": 1.6463686227798462, - "learning_rate": 5.7228140703517585e-05, - "loss": 4.9892, - "step": 43074 - }, - { - "epoch": 22.464146023468057, - "grad_norm": 1.4874809980392456, - "learning_rate": 5.722713567839196e-05, - "loss": 5.3407, - "step": 43075 - }, - { - "epoch": 22.464667535853977, - "grad_norm": 1.5444296598434448, - "learning_rate": 5.722613065326633e-05, - "loss": 5.3499, - "step": 43076 - }, - { - "epoch": 22.465189048239896, - "grad_norm": 1.4579188823699951, - "learning_rate": 5.7225125628140705e-05, - "loss": 5.1586, - "step": 43077 - }, - { - "epoch": 22.465710560625816, - "grad_norm": 1.607480764389038, - "learning_rate": 5.7224120603015076e-05, - "loss": 4.5511, - "step": 43078 - }, - { - "epoch": 22.466232073011735, - "grad_norm": 1.5057421922683716, - "learning_rate": 5.7223115577889453e-05, - "loss": 5.3737, - "step": 43079 - }, - { - "epoch": 22.466753585397655, - "grad_norm": 1.5903584957122803, - "learning_rate": 5.722211055276382e-05, - "loss": 5.0403, - "step": 43080 - }, - { - "epoch": 22.467275097783574, - "grad_norm": 1.4893826246261597, - "learning_rate": 5.7221105527638196e-05, - "loss": 5.484, - "step": 43081 - }, - { - "epoch": 22.46779661016949, - "grad_norm": 1.4427158832550049, - "learning_rate": 5.722010050251257e-05, - "loss": 5.5129, - "step": 43082 - }, - { - "epoch": 22.46831812255541, - "grad_norm": 1.5021981000900269, - "learning_rate": 5.7219095477386944e-05, - "loss": 4.8702, - "step": 43083 - }, - { - "epoch": 22.46883963494133, - "grad_norm": 1.5542888641357422, - "learning_rate": 5.721809045226131e-05, - "loss": 5.2173, - "step": 43084 - }, - { - "epoch": 22.46936114732725, - "grad_norm": 1.600624918937683, - "learning_rate": 5.7217085427135687e-05, - "loss": 4.5454, - "step": 43085 - }, - { - "epoch": 22.469882659713168, - "grad_norm": 1.5310766696929932, - "learning_rate": 5.721608040201005e-05, - "loss": 4.563, - "step": 43086 - }, - { - "epoch": 22.470404172099087, - "grad_norm": 1.5951251983642578, - "learning_rate": 5.721507537688442e-05, - "loss": 5.1721, - "step": 43087 - }, - { - "epoch": 22.470925684485007, - "grad_norm": 1.4577196836471558, - "learning_rate": 5.72140703517588e-05, - "loss": 5.6319, - "step": 43088 - }, - { - "epoch": 22.471447196870926, - "grad_norm": 1.5762759447097778, - "learning_rate": 5.7213065326633164e-05, - "loss": 4.8252, - "step": 43089 - }, - { - "epoch": 22.471968709256846, - "grad_norm": 1.5954216718673706, - "learning_rate": 5.721206030150754e-05, - "loss": 4.9435, - "step": 43090 - }, - { - "epoch": 22.472490221642765, - "grad_norm": 1.5123989582061768, - "learning_rate": 5.7211055276381906e-05, - "loss": 4.5009, - "step": 43091 - }, - { - "epoch": 22.473011734028685, - "grad_norm": 1.5634748935699463, - "learning_rate": 5.7210050251256284e-05, - "loss": 4.8732, - "step": 43092 - }, - { - "epoch": 22.473533246414604, - "grad_norm": 1.5364056825637817, - "learning_rate": 5.7209045226130655e-05, - "loss": 5.4201, - "step": 43093 - }, - { - "epoch": 22.47405475880052, - "grad_norm": 1.5165671110153198, - "learning_rate": 5.720804020100503e-05, - "loss": 5.5555, - "step": 43094 - }, - { - "epoch": 22.47457627118644, - "grad_norm": 1.589097023010254, - "learning_rate": 5.72070351758794e-05, - "loss": 5.1722, - "step": 43095 - }, - { - "epoch": 22.47509778357236, - "grad_norm": 1.7934550046920776, - "learning_rate": 5.7206030150753775e-05, - "loss": 4.9193, - "step": 43096 - }, - { - "epoch": 22.47561929595828, - "grad_norm": 1.5561072826385498, - "learning_rate": 5.720502512562814e-05, - "loss": 5.3533, - "step": 43097 - }, - { - "epoch": 22.476140808344198, - "grad_norm": 1.523532748222351, - "learning_rate": 5.720402010050252e-05, - "loss": 5.2988, - "step": 43098 - }, - { - "epoch": 22.476662320730117, - "grad_norm": 1.6269596815109253, - "learning_rate": 5.720301507537689e-05, - "loss": 5.1066, - "step": 43099 - }, - { - "epoch": 22.477183833116037, - "grad_norm": 1.5015316009521484, - "learning_rate": 5.720201005025125e-05, - "loss": 5.4576, - "step": 43100 - }, - { - "epoch": 22.477705345501956, - "grad_norm": 1.5973457098007202, - "learning_rate": 5.720100502512563e-05, - "loss": 5.3419, - "step": 43101 - }, - { - "epoch": 22.478226857887876, - "grad_norm": 1.6131362915039062, - "learning_rate": 5.72e-05, - "loss": 4.7468, - "step": 43102 - }, - { - "epoch": 22.478748370273795, - "grad_norm": 1.5770792961120605, - "learning_rate": 5.719899497487438e-05, - "loss": 5.3632, - "step": 43103 - }, - { - "epoch": 22.479269882659715, - "grad_norm": 1.5619451999664307, - "learning_rate": 5.719798994974874e-05, - "loss": 5.6035, - "step": 43104 - }, - { - "epoch": 22.479791395045634, - "grad_norm": 1.5660187005996704, - "learning_rate": 5.719698492462312e-05, - "loss": 5.2898, - "step": 43105 - }, - { - "epoch": 22.48031290743155, - "grad_norm": 1.5590558052062988, - "learning_rate": 5.7195979899497485e-05, - "loss": 5.0141, - "step": 43106 - }, - { - "epoch": 22.48083441981747, - "grad_norm": 1.6828688383102417, - "learning_rate": 5.719497487437186e-05, - "loss": 5.0394, - "step": 43107 - }, - { - "epoch": 22.48135593220339, - "grad_norm": 1.4853003025054932, - "learning_rate": 5.7193969849246234e-05, - "loss": 5.1917, - "step": 43108 - }, - { - "epoch": 22.48187744458931, - "grad_norm": 1.5906245708465576, - "learning_rate": 5.719296482412061e-05, - "loss": 4.9636, - "step": 43109 - }, - { - "epoch": 22.482398956975228, - "grad_norm": 1.4996970891952515, - "learning_rate": 5.7191959798994976e-05, - "loss": 5.4102, - "step": 43110 - }, - { - "epoch": 22.482920469361147, - "grad_norm": 1.5439682006835938, - "learning_rate": 5.7190954773869354e-05, - "loss": 5.3798, - "step": 43111 - }, - { - "epoch": 22.483441981747067, - "grad_norm": 1.5278795957565308, - "learning_rate": 5.718994974874372e-05, - "loss": 5.3671, - "step": 43112 - }, - { - "epoch": 22.483963494132986, - "grad_norm": 1.5425803661346436, - "learning_rate": 5.7188944723618096e-05, - "loss": 5.0125, - "step": 43113 - }, - { - "epoch": 22.484485006518906, - "grad_norm": 1.456473469734192, - "learning_rate": 5.718793969849247e-05, - "loss": 4.7902, - "step": 43114 - }, - { - "epoch": 22.485006518904825, - "grad_norm": 1.4851598739624023, - "learning_rate": 5.718693467336683e-05, - "loss": 5.6178, - "step": 43115 - }, - { - "epoch": 22.485528031290745, - "grad_norm": 1.537054419517517, - "learning_rate": 5.718592964824121e-05, - "loss": 4.928, - "step": 43116 - }, - { - "epoch": 22.486049543676664, - "grad_norm": 1.537340760231018, - "learning_rate": 5.7184924623115574e-05, - "loss": 4.874, - "step": 43117 - }, - { - "epoch": 22.48657105606258, - "grad_norm": 1.6599732637405396, - "learning_rate": 5.718391959798995e-05, - "loss": 5.2564, - "step": 43118 - }, - { - "epoch": 22.4870925684485, - "grad_norm": 1.506908893585205, - "learning_rate": 5.718291457286432e-05, - "loss": 5.1215, - "step": 43119 - }, - { - "epoch": 22.48761408083442, - "grad_norm": 1.5170080661773682, - "learning_rate": 5.71819095477387e-05, - "loss": 5.3622, - "step": 43120 - }, - { - "epoch": 22.48813559322034, - "grad_norm": 1.5598951578140259, - "learning_rate": 5.7180904522613065e-05, - "loss": 5.3573, - "step": 43121 - }, - { - "epoch": 22.488657105606258, - "grad_norm": 1.4889683723449707, - "learning_rate": 5.717989949748744e-05, - "loss": 5.1189, - "step": 43122 - }, - { - "epoch": 22.489178617992177, - "grad_norm": 1.6903903484344482, - "learning_rate": 5.7178894472361814e-05, - "loss": 4.7277, - "step": 43123 - }, - { - "epoch": 22.489700130378097, - "grad_norm": 1.5174801349639893, - "learning_rate": 5.717788944723619e-05, - "loss": 5.0754, - "step": 43124 - }, - { - "epoch": 22.490221642764016, - "grad_norm": 1.4573112726211548, - "learning_rate": 5.7176884422110556e-05, - "loss": 5.1322, - "step": 43125 - }, - { - "epoch": 22.490743155149936, - "grad_norm": 1.7390567064285278, - "learning_rate": 5.7175879396984934e-05, - "loss": 4.3391, - "step": 43126 - }, - { - "epoch": 22.491264667535855, - "grad_norm": 1.5083625316619873, - "learning_rate": 5.71748743718593e-05, - "loss": 5.3055, - "step": 43127 - }, - { - "epoch": 22.491786179921775, - "grad_norm": 1.455742597579956, - "learning_rate": 5.717386934673367e-05, - "loss": 4.8702, - "step": 43128 - }, - { - "epoch": 22.49230769230769, - "grad_norm": 1.4063392877578735, - "learning_rate": 5.717286432160805e-05, - "loss": 5.7432, - "step": 43129 - }, - { - "epoch": 22.49282920469361, - "grad_norm": 1.5482995510101318, - "learning_rate": 5.717185929648241e-05, - "loss": 5.4364, - "step": 43130 - }, - { - "epoch": 22.49335071707953, - "grad_norm": 1.5649389028549194, - "learning_rate": 5.717085427135679e-05, - "loss": 5.2203, - "step": 43131 - }, - { - "epoch": 22.49387222946545, - "grad_norm": 1.4924832582473755, - "learning_rate": 5.716984924623115e-05, - "loss": 5.2333, - "step": 43132 - }, - { - "epoch": 22.49439374185137, - "grad_norm": 1.5364832878112793, - "learning_rate": 5.716884422110553e-05, - "loss": 5.7237, - "step": 43133 - }, - { - "epoch": 22.494915254237288, - "grad_norm": 1.5215322971343994, - "learning_rate": 5.71678391959799e-05, - "loss": 5.149, - "step": 43134 - }, - { - "epoch": 22.495436766623207, - "grad_norm": 1.5609171390533447, - "learning_rate": 5.716683417085428e-05, - "loss": 5.1574, - "step": 43135 - }, - { - "epoch": 22.495958279009127, - "grad_norm": 1.614754319190979, - "learning_rate": 5.7165829145728644e-05, - "loss": 4.8837, - "step": 43136 - }, - { - "epoch": 22.496479791395046, - "grad_norm": 1.4762547016143799, - "learning_rate": 5.716482412060302e-05, - "loss": 5.4924, - "step": 43137 - }, - { - "epoch": 22.497001303780966, - "grad_norm": 1.4839476346969604, - "learning_rate": 5.7163819095477386e-05, - "loss": 4.8394, - "step": 43138 - }, - { - "epoch": 22.497522816166885, - "grad_norm": 1.523445725440979, - "learning_rate": 5.7162814070351764e-05, - "loss": 5.5008, - "step": 43139 - }, - { - "epoch": 22.498044328552805, - "grad_norm": 1.4341411590576172, - "learning_rate": 5.7161809045226135e-05, - "loss": 5.5009, - "step": 43140 - }, - { - "epoch": 22.49856584093872, - "grad_norm": 1.42341947555542, - "learning_rate": 5.71608040201005e-05, - "loss": 5.2516, - "step": 43141 - }, - { - "epoch": 22.49908735332464, - "grad_norm": 1.481767177581787, - "learning_rate": 5.715979899497488e-05, - "loss": 5.57, - "step": 43142 - }, - { - "epoch": 22.49960886571056, - "grad_norm": 1.5396596193313599, - "learning_rate": 5.715879396984924e-05, - "loss": 4.9598, - "step": 43143 - }, - { - "epoch": 22.50013037809648, - "grad_norm": 1.5281959772109985, - "learning_rate": 5.715778894472362e-05, - "loss": 4.8319, - "step": 43144 - }, - { - "epoch": 22.5006518904824, - "grad_norm": 1.525223970413208, - "learning_rate": 5.715678391959799e-05, - "loss": 5.4895, - "step": 43145 - }, - { - "epoch": 22.501173402868318, - "grad_norm": 1.5709556341171265, - "learning_rate": 5.715577889447237e-05, - "loss": 4.3263, - "step": 43146 - }, - { - "epoch": 22.501694915254237, - "grad_norm": 1.6439722776412964, - "learning_rate": 5.715477386934673e-05, - "loss": 5.2131, - "step": 43147 - }, - { - "epoch": 22.502216427640157, - "grad_norm": 1.4806957244873047, - "learning_rate": 5.715376884422111e-05, - "loss": 5.4224, - "step": 43148 - }, - { - "epoch": 22.502737940026076, - "grad_norm": 1.5501161813735962, - "learning_rate": 5.715276381909548e-05, - "loss": 5.5685, - "step": 43149 - }, - { - "epoch": 22.503259452411996, - "grad_norm": 1.6109122037887573, - "learning_rate": 5.715175879396986e-05, - "loss": 5.4455, - "step": 43150 - }, - { - "epoch": 22.503780964797915, - "grad_norm": 1.4691832065582275, - "learning_rate": 5.7150753768844223e-05, - "loss": 5.0062, - "step": 43151 - }, - { - "epoch": 22.504302477183835, - "grad_norm": 1.500876545906067, - "learning_rate": 5.71497487437186e-05, - "loss": 5.4606, - "step": 43152 - }, - { - "epoch": 22.50482398956975, - "grad_norm": 1.5278596878051758, - "learning_rate": 5.7148743718592966e-05, - "loss": 4.8661, - "step": 43153 - }, - { - "epoch": 22.50534550195567, - "grad_norm": 1.6564992666244507, - "learning_rate": 5.7147738693467337e-05, - "loss": 5.6592, - "step": 43154 - }, - { - "epoch": 22.50586701434159, - "grad_norm": 1.6532273292541504, - "learning_rate": 5.7146733668341714e-05, - "loss": 5.337, - "step": 43155 - }, - { - "epoch": 22.50638852672751, - "grad_norm": 1.554994821548462, - "learning_rate": 5.714572864321608e-05, - "loss": 4.941, - "step": 43156 - }, - { - "epoch": 22.50691003911343, - "grad_norm": 1.61863374710083, - "learning_rate": 5.7144723618090456e-05, - "loss": 4.9318, - "step": 43157 - }, - { - "epoch": 22.507431551499348, - "grad_norm": 1.517767071723938, - "learning_rate": 5.714371859296482e-05, - "loss": 5.3932, - "step": 43158 - }, - { - "epoch": 22.507953063885267, - "grad_norm": 1.6253814697265625, - "learning_rate": 5.71427135678392e-05, - "loss": 5.2172, - "step": 43159 - }, - { - "epoch": 22.508474576271187, - "grad_norm": 1.5596435070037842, - "learning_rate": 5.714170854271357e-05, - "loss": 5.3367, - "step": 43160 - }, - { - "epoch": 22.508996088657106, - "grad_norm": 1.6528273820877075, - "learning_rate": 5.714070351758795e-05, - "loss": 4.3917, - "step": 43161 - }, - { - "epoch": 22.509517601043026, - "grad_norm": 1.5229084491729736, - "learning_rate": 5.713969849246231e-05, - "loss": 5.387, - "step": 43162 - }, - { - "epoch": 22.510039113428945, - "grad_norm": 1.491413950920105, - "learning_rate": 5.713869346733669e-05, - "loss": 5.2887, - "step": 43163 - }, - { - "epoch": 22.510560625814865, - "grad_norm": 1.642224669456482, - "learning_rate": 5.7137688442211054e-05, - "loss": 5.1975, - "step": 43164 - }, - { - "epoch": 22.51108213820078, - "grad_norm": 1.4375232458114624, - "learning_rate": 5.713668341708543e-05, - "loss": 5.5921, - "step": 43165 - }, - { - "epoch": 22.5116036505867, - "grad_norm": 1.6763157844543457, - "learning_rate": 5.71356783919598e-05, - "loss": 4.9597, - "step": 43166 - }, - { - "epoch": 22.51212516297262, - "grad_norm": 1.5744147300720215, - "learning_rate": 5.713467336683417e-05, - "loss": 5.1265, - "step": 43167 - }, - { - "epoch": 22.51264667535854, - "grad_norm": 1.6696281433105469, - "learning_rate": 5.7133668341708545e-05, - "loss": 5.2229, - "step": 43168 - }, - { - "epoch": 22.51316818774446, - "grad_norm": 1.6615289449691772, - "learning_rate": 5.7132663316582916e-05, - "loss": 5.0917, - "step": 43169 - }, - { - "epoch": 22.513689700130378, - "grad_norm": 1.5789470672607422, - "learning_rate": 5.7131658291457294e-05, - "loss": 5.4439, - "step": 43170 - }, - { - "epoch": 22.514211212516297, - "grad_norm": 1.71641206741333, - "learning_rate": 5.713065326633166e-05, - "loss": 4.9477, - "step": 43171 - }, - { - "epoch": 22.514732724902217, - "grad_norm": 1.5849716663360596, - "learning_rate": 5.7129648241206036e-05, - "loss": 5.3681, - "step": 43172 - }, - { - "epoch": 22.515254237288136, - "grad_norm": 1.6634455919265747, - "learning_rate": 5.71286432160804e-05, - "loss": 4.9376, - "step": 43173 - }, - { - "epoch": 22.515775749674056, - "grad_norm": 1.6493333578109741, - "learning_rate": 5.712763819095478e-05, - "loss": 4.6737, - "step": 43174 - }, - { - "epoch": 22.516297262059975, - "grad_norm": 1.5191906690597534, - "learning_rate": 5.712663316582915e-05, - "loss": 5.2476, - "step": 43175 - }, - { - "epoch": 22.516818774445895, - "grad_norm": 1.4210448265075684, - "learning_rate": 5.712562814070353e-05, - "loss": 5.3542, - "step": 43176 - }, - { - "epoch": 22.51734028683181, - "grad_norm": 1.5321118831634521, - "learning_rate": 5.712462311557789e-05, - "loss": 5.2545, - "step": 43177 - }, - { - "epoch": 22.51786179921773, - "grad_norm": 1.5318512916564941, - "learning_rate": 5.712361809045227e-05, - "loss": 5.1407, - "step": 43178 - }, - { - "epoch": 22.51838331160365, - "grad_norm": 1.5893195867538452, - "learning_rate": 5.712261306532663e-05, - "loss": 4.7959, - "step": 43179 - }, - { - "epoch": 22.51890482398957, - "grad_norm": 1.54738187789917, - "learning_rate": 5.7121608040201004e-05, - "loss": 5.4886, - "step": 43180 - }, - { - "epoch": 22.51942633637549, - "grad_norm": 1.490206241607666, - "learning_rate": 5.712060301507538e-05, - "loss": 5.1809, - "step": 43181 - }, - { - "epoch": 22.519947848761408, - "grad_norm": 1.5779560804367065, - "learning_rate": 5.7119597989949746e-05, - "loss": 5.2665, - "step": 43182 - }, - { - "epoch": 22.520469361147327, - "grad_norm": 1.5259690284729004, - "learning_rate": 5.7118592964824124e-05, - "loss": 5.4031, - "step": 43183 - }, - { - "epoch": 22.520990873533247, - "grad_norm": 1.6987825632095337, - "learning_rate": 5.711758793969849e-05, - "loss": 5.1216, - "step": 43184 - }, - { - "epoch": 22.521512385919166, - "grad_norm": 1.5258303880691528, - "learning_rate": 5.7116582914572866e-05, - "loss": 5.3339, - "step": 43185 - }, - { - "epoch": 22.522033898305086, - "grad_norm": 1.5030094385147095, - "learning_rate": 5.711557788944724e-05, - "loss": 5.479, - "step": 43186 - }, - { - "epoch": 22.522555410691005, - "grad_norm": 1.5304136276245117, - "learning_rate": 5.7114572864321615e-05, - "loss": 5.7803, - "step": 43187 - }, - { - "epoch": 22.523076923076925, - "grad_norm": 1.5807774066925049, - "learning_rate": 5.711356783919598e-05, - "loss": 5.3349, - "step": 43188 - }, - { - "epoch": 22.52359843546284, - "grad_norm": 1.551010251045227, - "learning_rate": 5.711256281407036e-05, - "loss": 5.6706, - "step": 43189 - }, - { - "epoch": 22.52411994784876, - "grad_norm": 1.4996862411499023, - "learning_rate": 5.711155778894473e-05, - "loss": 5.402, - "step": 43190 - }, - { - "epoch": 22.52464146023468, - "grad_norm": 1.4900217056274414, - "learning_rate": 5.7110552763819106e-05, - "loss": 5.647, - "step": 43191 - }, - { - "epoch": 22.5251629726206, - "grad_norm": 1.6283787488937378, - "learning_rate": 5.710954773869347e-05, - "loss": 5.2732, - "step": 43192 - }, - { - "epoch": 22.52568448500652, - "grad_norm": 1.5593897104263306, - "learning_rate": 5.710854271356785e-05, - "loss": 5.4187, - "step": 43193 - }, - { - "epoch": 22.526205997392438, - "grad_norm": 1.5734307765960693, - "learning_rate": 5.710753768844221e-05, - "loss": 5.4, - "step": 43194 - }, - { - "epoch": 22.526727509778357, - "grad_norm": 1.4677157402038574, - "learning_rate": 5.7106532663316584e-05, - "loss": 4.5543, - "step": 43195 - }, - { - "epoch": 22.527249022164277, - "grad_norm": 1.5963696241378784, - "learning_rate": 5.710552763819096e-05, - "loss": 5.2349, - "step": 43196 - }, - { - "epoch": 22.527770534550196, - "grad_norm": 1.5544975996017456, - "learning_rate": 5.7104522613065326e-05, - "loss": 5.1337, - "step": 43197 - }, - { - "epoch": 22.528292046936116, - "grad_norm": 1.642124891281128, - "learning_rate": 5.7103517587939703e-05, - "loss": 5.4498, - "step": 43198 - }, - { - "epoch": 22.528813559322035, - "grad_norm": 1.6113452911376953, - "learning_rate": 5.710251256281407e-05, - "loss": 5.2065, - "step": 43199 - }, - { - "epoch": 22.529335071707955, - "grad_norm": 1.5183161497116089, - "learning_rate": 5.7101507537688446e-05, - "loss": 5.1414, - "step": 43200 - }, - { - "epoch": 22.52985658409387, - "grad_norm": 1.6247782707214355, - "learning_rate": 5.710050251256282e-05, - "loss": 5.1617, - "step": 43201 - }, - { - "epoch": 22.53037809647979, - "grad_norm": 1.5843122005462646, - "learning_rate": 5.7099497487437194e-05, - "loss": 5.3948, - "step": 43202 - }, - { - "epoch": 22.53089960886571, - "grad_norm": 1.4967448711395264, - "learning_rate": 5.709849246231156e-05, - "loss": 5.5053, - "step": 43203 - }, - { - "epoch": 22.53142112125163, - "grad_norm": 1.552748680114746, - "learning_rate": 5.7097487437185937e-05, - "loss": 5.6544, - "step": 43204 - }, - { - "epoch": 22.53194263363755, - "grad_norm": 1.5219686031341553, - "learning_rate": 5.70964824120603e-05, - "loss": 4.9539, - "step": 43205 - }, - { - "epoch": 22.532464146023468, - "grad_norm": 1.48931086063385, - "learning_rate": 5.709547738693468e-05, - "loss": 5.2648, - "step": 43206 - }, - { - "epoch": 22.532985658409387, - "grad_norm": 1.6440725326538086, - "learning_rate": 5.709447236180905e-05, - "loss": 5.2661, - "step": 43207 - }, - { - "epoch": 22.533507170795307, - "grad_norm": 1.608794927597046, - "learning_rate": 5.7093467336683414e-05, - "loss": 4.9503, - "step": 43208 - }, - { - "epoch": 22.534028683181226, - "grad_norm": 1.5316861867904663, - "learning_rate": 5.709246231155779e-05, - "loss": 5.509, - "step": 43209 - }, - { - "epoch": 22.534550195567146, - "grad_norm": 1.587078332901001, - "learning_rate": 5.709145728643216e-05, - "loss": 5.3329, - "step": 43210 - }, - { - "epoch": 22.535071707953065, - "grad_norm": 1.5029144287109375, - "learning_rate": 5.709045226130654e-05, - "loss": 5.4065, - "step": 43211 - }, - { - "epoch": 22.53559322033898, - "grad_norm": 1.5604274272918701, - "learning_rate": 5.7089447236180905e-05, - "loss": 4.9827, - "step": 43212 - }, - { - "epoch": 22.5361147327249, - "grad_norm": 1.6063064336776733, - "learning_rate": 5.708844221105528e-05, - "loss": 5.5832, - "step": 43213 - }, - { - "epoch": 22.53663624511082, - "grad_norm": 1.5680820941925049, - "learning_rate": 5.708743718592965e-05, - "loss": 5.0059, - "step": 43214 - }, - { - "epoch": 22.53715775749674, - "grad_norm": 1.5184288024902344, - "learning_rate": 5.7086432160804025e-05, - "loss": 4.5837, - "step": 43215 - }, - { - "epoch": 22.53767926988266, - "grad_norm": 1.5021497011184692, - "learning_rate": 5.7085427135678396e-05, - "loss": 4.8913, - "step": 43216 - }, - { - "epoch": 22.53820078226858, - "grad_norm": 1.8004798889160156, - "learning_rate": 5.7084422110552774e-05, - "loss": 4.8703, - "step": 43217 - }, - { - "epoch": 22.538722294654498, - "grad_norm": 1.599009394645691, - "learning_rate": 5.708341708542714e-05, - "loss": 5.3061, - "step": 43218 - }, - { - "epoch": 22.539243807040418, - "grad_norm": 1.5108356475830078, - "learning_rate": 5.7082412060301516e-05, - "loss": 5.5498, - "step": 43219 - }, - { - "epoch": 22.539765319426337, - "grad_norm": 1.545542597770691, - "learning_rate": 5.708140703517588e-05, - "loss": 4.9423, - "step": 43220 - }, - { - "epoch": 22.540286831812256, - "grad_norm": 1.5357881784439087, - "learning_rate": 5.708040201005025e-05, - "loss": 5.3218, - "step": 43221 - }, - { - "epoch": 22.540808344198176, - "grad_norm": 1.4995262622833252, - "learning_rate": 5.707939698492463e-05, - "loss": 5.5552, - "step": 43222 - }, - { - "epoch": 22.541329856584095, - "grad_norm": 1.522287130355835, - "learning_rate": 5.707839195979899e-05, - "loss": 5.4942, - "step": 43223 - }, - { - "epoch": 22.541851368970015, - "grad_norm": 1.590205192565918, - "learning_rate": 5.707738693467337e-05, - "loss": 4.6225, - "step": 43224 - }, - { - "epoch": 22.54237288135593, - "grad_norm": 1.5339124202728271, - "learning_rate": 5.7076381909547735e-05, - "loss": 4.895, - "step": 43225 - }, - { - "epoch": 22.54289439374185, - "grad_norm": 1.4556055068969727, - "learning_rate": 5.707537688442211e-05, - "loss": 5.2244, - "step": 43226 - }, - { - "epoch": 22.54341590612777, - "grad_norm": 1.5607589483261108, - "learning_rate": 5.7074371859296484e-05, - "loss": 5.1324, - "step": 43227 - }, - { - "epoch": 22.54393741851369, - "grad_norm": 1.5456149578094482, - "learning_rate": 5.707336683417086e-05, - "loss": 5.144, - "step": 43228 - }, - { - "epoch": 22.54445893089961, - "grad_norm": 1.6929330825805664, - "learning_rate": 5.7072361809045226e-05, - "loss": 5.1076, - "step": 43229 - }, - { - "epoch": 22.544980443285528, - "grad_norm": 1.5696947574615479, - "learning_rate": 5.7071356783919604e-05, - "loss": 5.37, - "step": 43230 - }, - { - "epoch": 22.545501955671448, - "grad_norm": 1.5648627281188965, - "learning_rate": 5.707035175879397e-05, - "loss": 5.3787, - "step": 43231 - }, - { - "epoch": 22.546023468057367, - "grad_norm": 1.5468299388885498, - "learning_rate": 5.7069346733668346e-05, - "loss": 4.8519, - "step": 43232 - }, - { - "epoch": 22.546544980443286, - "grad_norm": 1.586899995803833, - "learning_rate": 5.706834170854272e-05, - "loss": 5.3725, - "step": 43233 - }, - { - "epoch": 22.547066492829206, - "grad_norm": 1.4647307395935059, - "learning_rate": 5.706733668341708e-05, - "loss": 5.1812, - "step": 43234 - }, - { - "epoch": 22.547588005215125, - "grad_norm": 1.4945837259292603, - "learning_rate": 5.706633165829146e-05, - "loss": 5.4369, - "step": 43235 - }, - { - "epoch": 22.54810951760104, - "grad_norm": 1.4614646434783936, - "learning_rate": 5.706532663316583e-05, - "loss": 5.7362, - "step": 43236 - }, - { - "epoch": 22.54863102998696, - "grad_norm": 1.6803538799285889, - "learning_rate": 5.706432160804021e-05, - "loss": 5.319, - "step": 43237 - }, - { - "epoch": 22.54915254237288, - "grad_norm": 1.5274139642715454, - "learning_rate": 5.706331658291457e-05, - "loss": 5.2676, - "step": 43238 - }, - { - "epoch": 22.5496740547588, - "grad_norm": 1.5153086185455322, - "learning_rate": 5.706231155778895e-05, - "loss": 4.9862, - "step": 43239 - }, - { - "epoch": 22.55019556714472, - "grad_norm": 1.463563323020935, - "learning_rate": 5.7061306532663315e-05, - "loss": 5.6523, - "step": 43240 - }, - { - "epoch": 22.55071707953064, - "grad_norm": 1.5825313329696655, - "learning_rate": 5.706030150753769e-05, - "loss": 5.0187, - "step": 43241 - }, - { - "epoch": 22.551238591916558, - "grad_norm": 1.6053706407546997, - "learning_rate": 5.7059296482412064e-05, - "loss": 5.4993, - "step": 43242 - }, - { - "epoch": 22.551760104302478, - "grad_norm": 1.5472259521484375, - "learning_rate": 5.705829145728644e-05, - "loss": 5.1998, - "step": 43243 - }, - { - "epoch": 22.552281616688397, - "grad_norm": 1.5163027048110962, - "learning_rate": 5.7057286432160806e-05, - "loss": 4.5642, - "step": 43244 - }, - { - "epoch": 22.552803129074317, - "grad_norm": 1.5083364248275757, - "learning_rate": 5.7056281407035184e-05, - "loss": 5.2114, - "step": 43245 - }, - { - "epoch": 22.553324641460236, - "grad_norm": 1.5288965702056885, - "learning_rate": 5.705527638190955e-05, - "loss": 5.0589, - "step": 43246 - }, - { - "epoch": 22.553846153846155, - "grad_norm": 1.5227463245391846, - "learning_rate": 5.705427135678392e-05, - "loss": 5.4135, - "step": 43247 - }, - { - "epoch": 22.55436766623207, - "grad_norm": 1.4686754941940308, - "learning_rate": 5.70532663316583e-05, - "loss": 5.6706, - "step": 43248 - }, - { - "epoch": 22.55488917861799, - "grad_norm": 1.6276915073394775, - "learning_rate": 5.705226130653266e-05, - "loss": 5.0946, - "step": 43249 - }, - { - "epoch": 22.55541069100391, - "grad_norm": 1.4741880893707275, - "learning_rate": 5.705125628140704e-05, - "loss": 5.0679, - "step": 43250 - }, - { - "epoch": 22.55593220338983, - "grad_norm": 1.4882673025131226, - "learning_rate": 5.70502512562814e-05, - "loss": 5.0204, - "step": 43251 - }, - { - "epoch": 22.55645371577575, - "grad_norm": 1.5444152355194092, - "learning_rate": 5.704924623115578e-05, - "loss": 4.809, - "step": 43252 - }, - { - "epoch": 22.55697522816167, - "grad_norm": 1.5663658380508423, - "learning_rate": 5.704824120603015e-05, - "loss": 5.0445, - "step": 43253 - }, - { - "epoch": 22.557496740547588, - "grad_norm": 1.4032074213027954, - "learning_rate": 5.704723618090453e-05, - "loss": 5.7648, - "step": 43254 - }, - { - "epoch": 22.558018252933508, - "grad_norm": 1.657432198524475, - "learning_rate": 5.7046231155778894e-05, - "loss": 5.3118, - "step": 43255 - }, - { - "epoch": 22.558539765319427, - "grad_norm": 1.3873841762542725, - "learning_rate": 5.704522613065327e-05, - "loss": 5.4019, - "step": 43256 - }, - { - "epoch": 22.559061277705347, - "grad_norm": 1.4490346908569336, - "learning_rate": 5.704422110552764e-05, - "loss": 4.9548, - "step": 43257 - }, - { - "epoch": 22.559582790091266, - "grad_norm": 1.5590282678604126, - "learning_rate": 5.704321608040202e-05, - "loss": 5.5496, - "step": 43258 - }, - { - "epoch": 22.560104302477185, - "grad_norm": 1.5294862985610962, - "learning_rate": 5.7042211055276385e-05, - "loss": 4.7322, - "step": 43259 - }, - { - "epoch": 22.5606258148631, - "grad_norm": 1.4792033433914185, - "learning_rate": 5.704120603015075e-05, - "loss": 5.7083, - "step": 43260 - }, - { - "epoch": 22.56114732724902, - "grad_norm": 1.58607017993927, - "learning_rate": 5.704020100502513e-05, - "loss": 5.1641, - "step": 43261 - }, - { - "epoch": 22.56166883963494, - "grad_norm": 1.5614137649536133, - "learning_rate": 5.70391959798995e-05, - "loss": 4.8442, - "step": 43262 - }, - { - "epoch": 22.56219035202086, - "grad_norm": 1.601027488708496, - "learning_rate": 5.7038190954773876e-05, - "loss": 5.186, - "step": 43263 - }, - { - "epoch": 22.56271186440678, - "grad_norm": 1.5703307390213013, - "learning_rate": 5.703718592964824e-05, - "loss": 5.3353, - "step": 43264 - }, - { - "epoch": 22.5632333767927, - "grad_norm": 1.5101289749145508, - "learning_rate": 5.703618090452262e-05, - "loss": 4.6302, - "step": 43265 - }, - { - "epoch": 22.563754889178618, - "grad_norm": 1.6383060216903687, - "learning_rate": 5.703517587939698e-05, - "loss": 5.0555, - "step": 43266 - }, - { - "epoch": 22.564276401564538, - "grad_norm": 1.725376844406128, - "learning_rate": 5.703417085427136e-05, - "loss": 5.076, - "step": 43267 - }, - { - "epoch": 22.564797913950457, - "grad_norm": 1.5537540912628174, - "learning_rate": 5.703316582914573e-05, - "loss": 5.5138, - "step": 43268 - }, - { - "epoch": 22.565319426336377, - "grad_norm": 1.530961036682129, - "learning_rate": 5.703216080402011e-05, - "loss": 5.296, - "step": 43269 - }, - { - "epoch": 22.565840938722296, - "grad_norm": 1.5897762775421143, - "learning_rate": 5.7031155778894473e-05, - "loss": 5.2919, - "step": 43270 - }, - { - "epoch": 22.566362451108215, - "grad_norm": 1.5545247793197632, - "learning_rate": 5.703015075376885e-05, - "loss": 5.5083, - "step": 43271 - }, - { - "epoch": 22.56688396349413, - "grad_norm": 1.4740970134735107, - "learning_rate": 5.7029145728643216e-05, - "loss": 4.9061, - "step": 43272 - }, - { - "epoch": 22.56740547588005, - "grad_norm": 1.5766983032226562, - "learning_rate": 5.7028140703517587e-05, - "loss": 5.5919, - "step": 43273 - }, - { - "epoch": 22.56792698826597, - "grad_norm": 1.6207754611968994, - "learning_rate": 5.7027135678391964e-05, - "loss": 4.7018, - "step": 43274 - }, - { - "epoch": 22.56844850065189, - "grad_norm": 1.4908854961395264, - "learning_rate": 5.702613065326633e-05, - "loss": 5.1436, - "step": 43275 - }, - { - "epoch": 22.56897001303781, - "grad_norm": 1.617387056350708, - "learning_rate": 5.7025125628140707e-05, - "loss": 5.4443, - "step": 43276 - }, - { - "epoch": 22.56949152542373, - "grad_norm": 1.6835154294967651, - "learning_rate": 5.702412060301508e-05, - "loss": 5.3108, - "step": 43277 - }, - { - "epoch": 22.570013037809648, - "grad_norm": 1.5083848237991333, - "learning_rate": 5.7023115577889455e-05, - "loss": 5.4245, - "step": 43278 - }, - { - "epoch": 22.570534550195568, - "grad_norm": 1.4063540697097778, - "learning_rate": 5.702211055276382e-05, - "loss": 5.6357, - "step": 43279 - }, - { - "epoch": 22.571056062581487, - "grad_norm": 1.603441834449768, - "learning_rate": 5.70211055276382e-05, - "loss": 5.1902, - "step": 43280 - }, - { - "epoch": 22.571577574967407, - "grad_norm": 1.493664264678955, - "learning_rate": 5.702010050251256e-05, - "loss": 5.3887, - "step": 43281 - }, - { - "epoch": 22.572099087353326, - "grad_norm": 1.6283291578292847, - "learning_rate": 5.701909547738694e-05, - "loss": 4.7791, - "step": 43282 - }, - { - "epoch": 22.572620599739246, - "grad_norm": 1.5124313831329346, - "learning_rate": 5.701809045226131e-05, - "loss": 5.3175, - "step": 43283 - }, - { - "epoch": 22.57314211212516, - "grad_norm": 1.4233287572860718, - "learning_rate": 5.701708542713569e-05, - "loss": 5.6461, - "step": 43284 - }, - { - "epoch": 22.57366362451108, - "grad_norm": 1.551692008972168, - "learning_rate": 5.701608040201005e-05, - "loss": 5.4017, - "step": 43285 - }, - { - "epoch": 22.574185136897, - "grad_norm": 1.4999221563339233, - "learning_rate": 5.701507537688443e-05, - "loss": 5.7175, - "step": 43286 - }, - { - "epoch": 22.57470664928292, - "grad_norm": 1.5960990190505981, - "learning_rate": 5.7014070351758795e-05, - "loss": 4.5643, - "step": 43287 - }, - { - "epoch": 22.57522816166884, - "grad_norm": 1.4346483945846558, - "learning_rate": 5.7013065326633166e-05, - "loss": 5.6836, - "step": 43288 - }, - { - "epoch": 22.57574967405476, - "grad_norm": 1.56142258644104, - "learning_rate": 5.7012060301507544e-05, - "loss": 5.1943, - "step": 43289 - }, - { - "epoch": 22.576271186440678, - "grad_norm": 1.4480229616165161, - "learning_rate": 5.701105527638191e-05, - "loss": 5.0704, - "step": 43290 - }, - { - "epoch": 22.576792698826598, - "grad_norm": 1.407570242881775, - "learning_rate": 5.7010050251256286e-05, - "loss": 5.6288, - "step": 43291 - }, - { - "epoch": 22.577314211212517, - "grad_norm": 1.5344690084457397, - "learning_rate": 5.700904522613065e-05, - "loss": 5.5939, - "step": 43292 - }, - { - "epoch": 22.577835723598437, - "grad_norm": 1.677284598350525, - "learning_rate": 5.700804020100503e-05, - "loss": 4.953, - "step": 43293 - }, - { - "epoch": 22.578357235984356, - "grad_norm": 1.5601017475128174, - "learning_rate": 5.70070351758794e-05, - "loss": 5.1063, - "step": 43294 - }, - { - "epoch": 22.578878748370272, - "grad_norm": 1.4369345903396606, - "learning_rate": 5.700603015075378e-05, - "loss": 5.7115, - "step": 43295 - }, - { - "epoch": 22.57940026075619, - "grad_norm": 1.4899661540985107, - "learning_rate": 5.700502512562814e-05, - "loss": 5.4828, - "step": 43296 - }, - { - "epoch": 22.57992177314211, - "grad_norm": 1.593403697013855, - "learning_rate": 5.700402010050252e-05, - "loss": 5.3735, - "step": 43297 - }, - { - "epoch": 22.58044328552803, - "grad_norm": 1.502347707748413, - "learning_rate": 5.700301507537688e-05, - "loss": 5.5141, - "step": 43298 - }, - { - "epoch": 22.58096479791395, - "grad_norm": 1.551188588142395, - "learning_rate": 5.700201005025126e-05, - "loss": 5.4645, - "step": 43299 - }, - { - "epoch": 22.58148631029987, - "grad_norm": 1.475858211517334, - "learning_rate": 5.700100502512563e-05, - "loss": 5.4393, - "step": 43300 - }, - { - "epoch": 22.58200782268579, - "grad_norm": 1.5221503973007202, - "learning_rate": 5.6999999999999996e-05, - "loss": 5.3587, - "step": 43301 - }, - { - "epoch": 22.58252933507171, - "grad_norm": 1.4866483211517334, - "learning_rate": 5.6998994974874374e-05, - "loss": 5.3347, - "step": 43302 - }, - { - "epoch": 22.583050847457628, - "grad_norm": 1.4972871541976929, - "learning_rate": 5.6997989949748745e-05, - "loss": 5.1276, - "step": 43303 - }, - { - "epoch": 22.583572359843547, - "grad_norm": 1.4681146144866943, - "learning_rate": 5.699698492462312e-05, - "loss": 5.1675, - "step": 43304 - }, - { - "epoch": 22.584093872229467, - "grad_norm": 1.4607173204421997, - "learning_rate": 5.699597989949749e-05, - "loss": 5.4636, - "step": 43305 - }, - { - "epoch": 22.584615384615386, - "grad_norm": 1.6550503969192505, - "learning_rate": 5.6994974874371865e-05, - "loss": 4.2732, - "step": 43306 - }, - { - "epoch": 22.585136897001306, - "grad_norm": 1.6234793663024902, - "learning_rate": 5.699396984924623e-05, - "loss": 5.1332, - "step": 43307 - }, - { - "epoch": 22.58565840938722, - "grad_norm": 1.5409679412841797, - "learning_rate": 5.699296482412061e-05, - "loss": 5.4075, - "step": 43308 - }, - { - "epoch": 22.58617992177314, - "grad_norm": 1.5418065786361694, - "learning_rate": 5.699195979899498e-05, - "loss": 5.3218, - "step": 43309 - }, - { - "epoch": 22.58670143415906, - "grad_norm": 1.5750799179077148, - "learning_rate": 5.6990954773869356e-05, - "loss": 5.3465, - "step": 43310 - }, - { - "epoch": 22.58722294654498, - "grad_norm": 2.0511574745178223, - "learning_rate": 5.698994974874372e-05, - "loss": 4.5436, - "step": 43311 - }, - { - "epoch": 22.5877444589309, - "grad_norm": 1.7447589635849, - "learning_rate": 5.69889447236181e-05, - "loss": 5.212, - "step": 43312 - }, - { - "epoch": 22.58826597131682, - "grad_norm": 1.5185309648513794, - "learning_rate": 5.698793969849246e-05, - "loss": 5.4494, - "step": 43313 - }, - { - "epoch": 22.58878748370274, - "grad_norm": 1.567466139793396, - "learning_rate": 5.6986934673366834e-05, - "loss": 5.2042, - "step": 43314 - }, - { - "epoch": 22.589308996088658, - "grad_norm": 1.5263041257858276, - "learning_rate": 5.698592964824121e-05, - "loss": 5.116, - "step": 43315 - }, - { - "epoch": 22.589830508474577, - "grad_norm": 1.5116162300109863, - "learning_rate": 5.6984924623115576e-05, - "loss": 5.2255, - "step": 43316 - }, - { - "epoch": 22.590352020860497, - "grad_norm": 1.4477955102920532, - "learning_rate": 5.6983919597989954e-05, - "loss": 5.507, - "step": 43317 - }, - { - "epoch": 22.590873533246416, - "grad_norm": 1.481142520904541, - "learning_rate": 5.698291457286432e-05, - "loss": 5.4643, - "step": 43318 - }, - { - "epoch": 22.591395045632332, - "grad_norm": 1.499234914779663, - "learning_rate": 5.6981909547738696e-05, - "loss": 5.2349, - "step": 43319 - }, - { - "epoch": 22.59191655801825, - "grad_norm": 1.4883562326431274, - "learning_rate": 5.698090452261307e-05, - "loss": 5.5232, - "step": 43320 - }, - { - "epoch": 22.59243807040417, - "grad_norm": 1.7106218338012695, - "learning_rate": 5.6979899497487445e-05, - "loss": 5.412, - "step": 43321 - }, - { - "epoch": 22.59295958279009, - "grad_norm": 1.4643250703811646, - "learning_rate": 5.697889447236181e-05, - "loss": 5.6347, - "step": 43322 - }, - { - "epoch": 22.59348109517601, - "grad_norm": 1.4930530786514282, - "learning_rate": 5.6977889447236187e-05, - "loss": 4.5618, - "step": 43323 - }, - { - "epoch": 22.59400260756193, - "grad_norm": 1.5150835514068604, - "learning_rate": 5.697688442211056e-05, - "loss": 5.4472, - "step": 43324 - }, - { - "epoch": 22.59452411994785, - "grad_norm": 1.5299272537231445, - "learning_rate": 5.6975879396984935e-05, - "loss": 5.2204, - "step": 43325 - }, - { - "epoch": 22.59504563233377, - "grad_norm": 1.5457578897476196, - "learning_rate": 5.69748743718593e-05, - "loss": 4.6898, - "step": 43326 - }, - { - "epoch": 22.595567144719688, - "grad_norm": 1.4869896173477173, - "learning_rate": 5.6973869346733664e-05, - "loss": 5.5308, - "step": 43327 - }, - { - "epoch": 22.596088657105607, - "grad_norm": 1.5084805488586426, - "learning_rate": 5.697286432160804e-05, - "loss": 5.5239, - "step": 43328 - }, - { - "epoch": 22.596610169491527, - "grad_norm": 1.5118868350982666, - "learning_rate": 5.697185929648241e-05, - "loss": 5.3679, - "step": 43329 - }, - { - "epoch": 22.597131681877446, - "grad_norm": 1.5370091199874878, - "learning_rate": 5.697085427135679e-05, - "loss": 5.1151, - "step": 43330 - }, - { - "epoch": 22.597653194263362, - "grad_norm": 1.5238187313079834, - "learning_rate": 5.6969849246231155e-05, - "loss": 5.3439, - "step": 43331 - }, - { - "epoch": 22.59817470664928, - "grad_norm": 1.5585284233093262, - "learning_rate": 5.696884422110553e-05, - "loss": 4.9867, - "step": 43332 - }, - { - "epoch": 22.5986962190352, - "grad_norm": 1.588065266609192, - "learning_rate": 5.69678391959799e-05, - "loss": 5.1078, - "step": 43333 - }, - { - "epoch": 22.59921773142112, - "grad_norm": 1.561334490776062, - "learning_rate": 5.6966834170854275e-05, - "loss": 5.4776, - "step": 43334 - }, - { - "epoch": 22.59973924380704, - "grad_norm": 1.4545437097549438, - "learning_rate": 5.6965829145728646e-05, - "loss": 5.6543, - "step": 43335 - }, - { - "epoch": 22.60026075619296, - "grad_norm": 1.490424633026123, - "learning_rate": 5.6964824120603024e-05, - "loss": 5.4052, - "step": 43336 - }, - { - "epoch": 22.60078226857888, - "grad_norm": 1.4967079162597656, - "learning_rate": 5.696381909547739e-05, - "loss": 5.2738, - "step": 43337 - }, - { - "epoch": 22.6013037809648, - "grad_norm": 1.5988097190856934, - "learning_rate": 5.6962814070351766e-05, - "loss": 5.1009, - "step": 43338 - }, - { - "epoch": 22.601825293350718, - "grad_norm": 1.5766143798828125, - "learning_rate": 5.696180904522613e-05, - "loss": 5.6338, - "step": 43339 - }, - { - "epoch": 22.602346805736637, - "grad_norm": 1.5173710584640503, - "learning_rate": 5.69608040201005e-05, - "loss": 5.3059, - "step": 43340 - }, - { - "epoch": 22.602868318122557, - "grad_norm": 1.589680552482605, - "learning_rate": 5.695979899497488e-05, - "loss": 4.6069, - "step": 43341 - }, - { - "epoch": 22.603389830508476, - "grad_norm": 1.4725210666656494, - "learning_rate": 5.695879396984924e-05, - "loss": 5.4619, - "step": 43342 - }, - { - "epoch": 22.603911342894392, - "grad_norm": 1.5072097778320312, - "learning_rate": 5.695778894472362e-05, - "loss": 5.273, - "step": 43343 - }, - { - "epoch": 22.60443285528031, - "grad_norm": 1.6802442073822021, - "learning_rate": 5.695678391959799e-05, - "loss": 5.3098, - "step": 43344 - }, - { - "epoch": 22.60495436766623, - "grad_norm": 1.5687181949615479, - "learning_rate": 5.695577889447237e-05, - "loss": 5.3782, - "step": 43345 - }, - { - "epoch": 22.60547588005215, - "grad_norm": 1.557298183441162, - "learning_rate": 5.6954773869346734e-05, - "loss": 4.2172, - "step": 43346 - }, - { - "epoch": 22.60599739243807, - "grad_norm": 1.4804941415786743, - "learning_rate": 5.695376884422111e-05, - "loss": 5.5081, - "step": 43347 - }, - { - "epoch": 22.60651890482399, - "grad_norm": 1.5630719661712646, - "learning_rate": 5.6952763819095476e-05, - "loss": 5.2766, - "step": 43348 - }, - { - "epoch": 22.60704041720991, - "grad_norm": 1.5567946434020996, - "learning_rate": 5.6951758793969854e-05, - "loss": 5.383, - "step": 43349 - }, - { - "epoch": 22.60756192959583, - "grad_norm": 1.6519356966018677, - "learning_rate": 5.6950753768844225e-05, - "loss": 4.7608, - "step": 43350 - }, - { - "epoch": 22.608083441981748, - "grad_norm": 1.5048415660858154, - "learning_rate": 5.69497487437186e-05, - "loss": 5.087, - "step": 43351 - }, - { - "epoch": 22.608604954367667, - "grad_norm": 1.5025955438613892, - "learning_rate": 5.694874371859297e-05, - "loss": 5.6412, - "step": 43352 - }, - { - "epoch": 22.609126466753587, - "grad_norm": 1.4400184154510498, - "learning_rate": 5.694773869346733e-05, - "loss": 5.3645, - "step": 43353 - }, - { - "epoch": 22.609647979139506, - "grad_norm": 1.5571807622909546, - "learning_rate": 5.694673366834171e-05, - "loss": 4.4673, - "step": 43354 - }, - { - "epoch": 22.610169491525422, - "grad_norm": 1.501349925994873, - "learning_rate": 5.694572864321608e-05, - "loss": 5.3425, - "step": 43355 - }, - { - "epoch": 22.61069100391134, - "grad_norm": 1.6236090660095215, - "learning_rate": 5.694472361809046e-05, - "loss": 4.658, - "step": 43356 - }, - { - "epoch": 22.61121251629726, - "grad_norm": 1.526179313659668, - "learning_rate": 5.694371859296482e-05, - "loss": 4.9504, - "step": 43357 - }, - { - "epoch": 22.61173402868318, - "grad_norm": 1.5293267965316772, - "learning_rate": 5.69427135678392e-05, - "loss": 5.1592, - "step": 43358 - }, - { - "epoch": 22.6122555410691, - "grad_norm": 1.6158748865127563, - "learning_rate": 5.6941708542713565e-05, - "loss": 5.191, - "step": 43359 - }, - { - "epoch": 22.61277705345502, - "grad_norm": 1.4765465259552002, - "learning_rate": 5.694070351758794e-05, - "loss": 5.3882, - "step": 43360 - }, - { - "epoch": 22.61329856584094, - "grad_norm": 1.545562505722046, - "learning_rate": 5.6939698492462314e-05, - "loss": 4.7515, - "step": 43361 - }, - { - "epoch": 22.61382007822686, - "grad_norm": 1.5682399272918701, - "learning_rate": 5.693869346733669e-05, - "loss": 5.7196, - "step": 43362 - }, - { - "epoch": 22.614341590612778, - "grad_norm": 1.6733752489089966, - "learning_rate": 5.6937688442211056e-05, - "loss": 4.8292, - "step": 43363 - }, - { - "epoch": 22.614863102998697, - "grad_norm": 1.580146074295044, - "learning_rate": 5.6936683417085434e-05, - "loss": 5.0742, - "step": 43364 - }, - { - "epoch": 22.615384615384617, - "grad_norm": 1.4860153198242188, - "learning_rate": 5.6935678391959805e-05, - "loss": 5.4281, - "step": 43365 - }, - { - "epoch": 22.615906127770536, - "grad_norm": 1.531622290611267, - "learning_rate": 5.693467336683417e-05, - "loss": 5.33, - "step": 43366 - }, - { - "epoch": 22.616427640156452, - "grad_norm": 1.4644767045974731, - "learning_rate": 5.693366834170855e-05, - "loss": 5.4932, - "step": 43367 - }, - { - "epoch": 22.61694915254237, - "grad_norm": 1.5520808696746826, - "learning_rate": 5.693266331658291e-05, - "loss": 4.9282, - "step": 43368 - }, - { - "epoch": 22.61747066492829, - "grad_norm": 1.4997872114181519, - "learning_rate": 5.693165829145729e-05, - "loss": 5.1316, - "step": 43369 - }, - { - "epoch": 22.61799217731421, - "grad_norm": 1.5395225286483765, - "learning_rate": 5.693065326633166e-05, - "loss": 5.3594, - "step": 43370 - }, - { - "epoch": 22.61851368970013, - "grad_norm": 1.6091073751449585, - "learning_rate": 5.692964824120604e-05, - "loss": 4.8329, - "step": 43371 - }, - { - "epoch": 22.61903520208605, - "grad_norm": 1.6259477138519287, - "learning_rate": 5.69286432160804e-05, - "loss": 4.9108, - "step": 43372 - }, - { - "epoch": 22.61955671447197, - "grad_norm": 1.5601993799209595, - "learning_rate": 5.692763819095478e-05, - "loss": 5.1551, - "step": 43373 - }, - { - "epoch": 22.62007822685789, - "grad_norm": 1.6736907958984375, - "learning_rate": 5.6926633165829144e-05, - "loss": 4.7729, - "step": 43374 - }, - { - "epoch": 22.620599739243808, - "grad_norm": 1.617360234260559, - "learning_rate": 5.692562814070352e-05, - "loss": 5.4219, - "step": 43375 - }, - { - "epoch": 22.621121251629727, - "grad_norm": 1.5513502359390259, - "learning_rate": 5.692462311557789e-05, - "loss": 5.0617, - "step": 43376 - }, - { - "epoch": 22.621642764015647, - "grad_norm": 1.5137771368026733, - "learning_rate": 5.692361809045227e-05, - "loss": 5.2299, - "step": 43377 - }, - { - "epoch": 22.622164276401566, - "grad_norm": 1.4652163982391357, - "learning_rate": 5.6922613065326635e-05, - "loss": 5.5083, - "step": 43378 - }, - { - "epoch": 22.622685788787482, - "grad_norm": 1.447622537612915, - "learning_rate": 5.692160804020101e-05, - "loss": 5.6127, - "step": 43379 - }, - { - "epoch": 22.6232073011734, - "grad_norm": 1.6096493005752563, - "learning_rate": 5.692060301507538e-05, - "loss": 4.89, - "step": 43380 - }, - { - "epoch": 22.62372881355932, - "grad_norm": 1.4738688468933105, - "learning_rate": 5.691959798994975e-05, - "loss": 5.2871, - "step": 43381 - }, - { - "epoch": 22.62425032594524, - "grad_norm": 1.5074107646942139, - "learning_rate": 5.6918592964824126e-05, - "loss": 5.5065, - "step": 43382 - }, - { - "epoch": 22.62477183833116, - "grad_norm": 1.4599087238311768, - "learning_rate": 5.691758793969849e-05, - "loss": 5.5128, - "step": 43383 - }, - { - "epoch": 22.62529335071708, - "grad_norm": 1.5737236738204956, - "learning_rate": 5.691658291457287e-05, - "loss": 5.1804, - "step": 43384 - }, - { - "epoch": 22.625814863103, - "grad_norm": 1.5290586948394775, - "learning_rate": 5.691557788944723e-05, - "loss": 5.443, - "step": 43385 - }, - { - "epoch": 22.62633637548892, - "grad_norm": 1.628075361251831, - "learning_rate": 5.691457286432161e-05, - "loss": 5.3524, - "step": 43386 - }, - { - "epoch": 22.626857887874838, - "grad_norm": 1.5058979988098145, - "learning_rate": 5.691356783919598e-05, - "loss": 5.5274, - "step": 43387 - }, - { - "epoch": 22.627379400260757, - "grad_norm": 1.4948521852493286, - "learning_rate": 5.691256281407036e-05, - "loss": 5.2274, - "step": 43388 - }, - { - "epoch": 22.627900912646677, - "grad_norm": 1.455642580986023, - "learning_rate": 5.6911557788944723e-05, - "loss": 4.8948, - "step": 43389 - }, - { - "epoch": 22.628422425032596, - "grad_norm": 1.4949259757995605, - "learning_rate": 5.69105527638191e-05, - "loss": 5.6025, - "step": 43390 - }, - { - "epoch": 22.628943937418512, - "grad_norm": 1.5313342809677124, - "learning_rate": 5.690954773869347e-05, - "loss": 5.6891, - "step": 43391 - }, - { - "epoch": 22.62946544980443, - "grad_norm": 1.5686628818511963, - "learning_rate": 5.690854271356785e-05, - "loss": 5.3456, - "step": 43392 - }, - { - "epoch": 22.62998696219035, - "grad_norm": 1.6155208349227905, - "learning_rate": 5.6907537688442214e-05, - "loss": 4.7524, - "step": 43393 - }, - { - "epoch": 22.63050847457627, - "grad_norm": 1.396476149559021, - "learning_rate": 5.690653266331658e-05, - "loss": 5.3261, - "step": 43394 - }, - { - "epoch": 22.63102998696219, - "grad_norm": 1.5512588024139404, - "learning_rate": 5.6905527638190957e-05, - "loss": 4.4608, - "step": 43395 - }, - { - "epoch": 22.63155149934811, - "grad_norm": 1.4881954193115234, - "learning_rate": 5.690452261306533e-05, - "loss": 5.3872, - "step": 43396 - }, - { - "epoch": 22.63207301173403, - "grad_norm": 1.5852798223495483, - "learning_rate": 5.6903517587939705e-05, - "loss": 5.2358, - "step": 43397 - }, - { - "epoch": 22.63259452411995, - "grad_norm": 1.5538225173950195, - "learning_rate": 5.690251256281407e-05, - "loss": 5.3084, - "step": 43398 - }, - { - "epoch": 22.633116036505868, - "grad_norm": 1.6098660230636597, - "learning_rate": 5.690150753768845e-05, - "loss": 5.207, - "step": 43399 - }, - { - "epoch": 22.633637548891787, - "grad_norm": 1.544649600982666, - "learning_rate": 5.690050251256281e-05, - "loss": 5.0862, - "step": 43400 - }, - { - "epoch": 22.634159061277707, - "grad_norm": 1.5251119136810303, - "learning_rate": 5.689949748743719e-05, - "loss": 4.37, - "step": 43401 - }, - { - "epoch": 22.634680573663623, - "grad_norm": 1.5308527946472168, - "learning_rate": 5.689849246231156e-05, - "loss": 5.4535, - "step": 43402 - }, - { - "epoch": 22.635202086049542, - "grad_norm": 1.5559757947921753, - "learning_rate": 5.689748743718594e-05, - "loss": 5.4594, - "step": 43403 - }, - { - "epoch": 22.63572359843546, - "grad_norm": 1.6279411315917969, - "learning_rate": 5.68964824120603e-05, - "loss": 5.1286, - "step": 43404 - }, - { - "epoch": 22.63624511082138, - "grad_norm": 1.6169337034225464, - "learning_rate": 5.689547738693468e-05, - "loss": 5.5433, - "step": 43405 - }, - { - "epoch": 22.6367666232073, - "grad_norm": 1.470681071281433, - "learning_rate": 5.6894472361809045e-05, - "loss": 5.3321, - "step": 43406 - }, - { - "epoch": 22.63728813559322, - "grad_norm": 1.7545480728149414, - "learning_rate": 5.6893467336683416e-05, - "loss": 4.5339, - "step": 43407 - }, - { - "epoch": 22.63780964797914, - "grad_norm": 1.5468413829803467, - "learning_rate": 5.6892462311557794e-05, - "loss": 5.5344, - "step": 43408 - }, - { - "epoch": 22.63833116036506, - "grad_norm": 1.5201261043548584, - "learning_rate": 5.689145728643216e-05, - "loss": 5.2349, - "step": 43409 - }, - { - "epoch": 22.63885267275098, - "grad_norm": 1.591415286064148, - "learning_rate": 5.6890452261306536e-05, - "loss": 5.4789, - "step": 43410 - }, - { - "epoch": 22.639374185136898, - "grad_norm": 1.5852774381637573, - "learning_rate": 5.688944723618091e-05, - "loss": 4.9207, - "step": 43411 - }, - { - "epoch": 22.639895697522817, - "grad_norm": 1.5791733264923096, - "learning_rate": 5.6888442211055285e-05, - "loss": 5.2969, - "step": 43412 - }, - { - "epoch": 22.640417209908737, - "grad_norm": 1.498121976852417, - "learning_rate": 5.688743718592965e-05, - "loss": 5.5087, - "step": 43413 - }, - { - "epoch": 22.640938722294656, - "grad_norm": 1.600346565246582, - "learning_rate": 5.688643216080403e-05, - "loss": 5.0717, - "step": 43414 - }, - { - "epoch": 22.641460234680572, - "grad_norm": 1.5682787895202637, - "learning_rate": 5.688542713567839e-05, - "loss": 5.4815, - "step": 43415 - }, - { - "epoch": 22.64198174706649, - "grad_norm": 1.5074329376220703, - "learning_rate": 5.688442211055277e-05, - "loss": 5.5268, - "step": 43416 - }, - { - "epoch": 22.64250325945241, - "grad_norm": 1.5524739027023315, - "learning_rate": 5.688341708542714e-05, - "loss": 5.5597, - "step": 43417 - }, - { - "epoch": 22.64302477183833, - "grad_norm": 1.5351697206497192, - "learning_rate": 5.688241206030152e-05, - "loss": 5.0179, - "step": 43418 - }, - { - "epoch": 22.64354628422425, - "grad_norm": 1.496140480041504, - "learning_rate": 5.688140703517588e-05, - "loss": 5.3386, - "step": 43419 - }, - { - "epoch": 22.64406779661017, - "grad_norm": 1.448280692100525, - "learning_rate": 5.6880402010050246e-05, - "loss": 5.4146, - "step": 43420 - }, - { - "epoch": 22.64458930899609, - "grad_norm": 1.6069144010543823, - "learning_rate": 5.6879396984924624e-05, - "loss": 5.3766, - "step": 43421 - }, - { - "epoch": 22.64511082138201, - "grad_norm": 1.459661841392517, - "learning_rate": 5.6878391959798995e-05, - "loss": 4.4722, - "step": 43422 - }, - { - "epoch": 22.645632333767928, - "grad_norm": 1.5729111433029175, - "learning_rate": 5.687738693467337e-05, - "loss": 5.4468, - "step": 43423 - }, - { - "epoch": 22.646153846153847, - "grad_norm": 1.602390170097351, - "learning_rate": 5.687638190954774e-05, - "loss": 5.2264, - "step": 43424 - }, - { - "epoch": 22.646675358539767, - "grad_norm": 1.680341124534607, - "learning_rate": 5.6875376884422115e-05, - "loss": 5.0955, - "step": 43425 - }, - { - "epoch": 22.647196870925683, - "grad_norm": 1.6376413106918335, - "learning_rate": 5.687437185929648e-05, - "loss": 4.767, - "step": 43426 - }, - { - "epoch": 22.647718383311602, - "grad_norm": 1.5398024320602417, - "learning_rate": 5.687336683417086e-05, - "loss": 5.6833, - "step": 43427 - }, - { - "epoch": 22.64823989569752, - "grad_norm": 1.5230238437652588, - "learning_rate": 5.687236180904523e-05, - "loss": 5.3886, - "step": 43428 - }, - { - "epoch": 22.64876140808344, - "grad_norm": 1.5326732397079468, - "learning_rate": 5.6871356783919606e-05, - "loss": 5.5722, - "step": 43429 - }, - { - "epoch": 22.64928292046936, - "grad_norm": 1.5411186218261719, - "learning_rate": 5.687035175879397e-05, - "loss": 5.2122, - "step": 43430 - }, - { - "epoch": 22.64980443285528, - "grad_norm": 1.5744762420654297, - "learning_rate": 5.686934673366835e-05, - "loss": 4.9478, - "step": 43431 - }, - { - "epoch": 22.6503259452412, - "grad_norm": 1.6029685735702515, - "learning_rate": 5.686834170854272e-05, - "loss": 4.4127, - "step": 43432 - }, - { - "epoch": 22.65084745762712, - "grad_norm": 1.4619553089141846, - "learning_rate": 5.6867336683417084e-05, - "loss": 5.5804, - "step": 43433 - }, - { - "epoch": 22.65136897001304, - "grad_norm": 1.495229959487915, - "learning_rate": 5.686633165829146e-05, - "loss": 5.3774, - "step": 43434 - }, - { - "epoch": 22.651890482398958, - "grad_norm": 1.512961983680725, - "learning_rate": 5.6865326633165826e-05, - "loss": 5.2747, - "step": 43435 - }, - { - "epoch": 22.652411994784877, - "grad_norm": 1.6186894178390503, - "learning_rate": 5.6864321608040204e-05, - "loss": 4.6514, - "step": 43436 - }, - { - "epoch": 22.652933507170797, - "grad_norm": 1.6435412168502808, - "learning_rate": 5.6863316582914575e-05, - "loss": 5.2517, - "step": 43437 - }, - { - "epoch": 22.653455019556713, - "grad_norm": 1.561131238937378, - "learning_rate": 5.686231155778895e-05, - "loss": 5.1358, - "step": 43438 - }, - { - "epoch": 22.653976531942632, - "grad_norm": 1.530440092086792, - "learning_rate": 5.686130653266332e-05, - "loss": 5.0956, - "step": 43439 - }, - { - "epoch": 22.65449804432855, - "grad_norm": 1.5329136848449707, - "learning_rate": 5.6860301507537695e-05, - "loss": 5.2689, - "step": 43440 - }, - { - "epoch": 22.65501955671447, - "grad_norm": 1.511812448501587, - "learning_rate": 5.685929648241206e-05, - "loss": 5.2321, - "step": 43441 - }, - { - "epoch": 22.65554106910039, - "grad_norm": 1.5152184963226318, - "learning_rate": 5.685829145728644e-05, - "loss": 4.9315, - "step": 43442 - }, - { - "epoch": 22.65606258148631, - "grad_norm": 1.6392571926116943, - "learning_rate": 5.685728643216081e-05, - "loss": 5.094, - "step": 43443 - }, - { - "epoch": 22.65658409387223, - "grad_norm": 1.3999741077423096, - "learning_rate": 5.6856281407035186e-05, - "loss": 5.3789, - "step": 43444 - }, - { - "epoch": 22.65710560625815, - "grad_norm": 1.5350711345672607, - "learning_rate": 5.685527638190955e-05, - "loss": 5.225, - "step": 43445 - }, - { - "epoch": 22.65762711864407, - "grad_norm": 1.5857648849487305, - "learning_rate": 5.6854271356783914e-05, - "loss": 5.2556, - "step": 43446 - }, - { - "epoch": 22.658148631029988, - "grad_norm": 1.46377694606781, - "learning_rate": 5.685326633165829e-05, - "loss": 5.6548, - "step": 43447 - }, - { - "epoch": 22.658670143415907, - "grad_norm": 1.5281000137329102, - "learning_rate": 5.685226130653266e-05, - "loss": 5.6763, - "step": 43448 - }, - { - "epoch": 22.659191655801827, - "grad_norm": 1.4469417333602905, - "learning_rate": 5.685125628140704e-05, - "loss": 5.4109, - "step": 43449 - }, - { - "epoch": 22.659713168187743, - "grad_norm": 1.5894666910171509, - "learning_rate": 5.6850251256281405e-05, - "loss": 5.0664, - "step": 43450 - }, - { - "epoch": 22.660234680573662, - "grad_norm": 1.5560327768325806, - "learning_rate": 5.684924623115578e-05, - "loss": 5.498, - "step": 43451 - }, - { - "epoch": 22.66075619295958, - "grad_norm": 1.506399393081665, - "learning_rate": 5.6848241206030154e-05, - "loss": 5.3683, - "step": 43452 - }, - { - "epoch": 22.6612777053455, - "grad_norm": 1.5730129480361938, - "learning_rate": 5.684723618090453e-05, - "loss": 5.2027, - "step": 43453 - }, - { - "epoch": 22.66179921773142, - "grad_norm": 1.5869864225387573, - "learning_rate": 5.6846231155778896e-05, - "loss": 4.9663, - "step": 43454 - }, - { - "epoch": 22.66232073011734, - "grad_norm": 1.370853066444397, - "learning_rate": 5.6845226130653274e-05, - "loss": 5.8782, - "step": 43455 - }, - { - "epoch": 22.66284224250326, - "grad_norm": 1.5110903978347778, - "learning_rate": 5.684422110552764e-05, - "loss": 5.4926, - "step": 43456 - }, - { - "epoch": 22.66336375488918, - "grad_norm": 1.5697135925292969, - "learning_rate": 5.6843216080402016e-05, - "loss": 5.2669, - "step": 43457 - }, - { - "epoch": 22.6638852672751, - "grad_norm": 1.5262030363082886, - "learning_rate": 5.684221105527639e-05, - "loss": 5.1738, - "step": 43458 - }, - { - "epoch": 22.664406779661018, - "grad_norm": 1.6138776540756226, - "learning_rate": 5.684120603015075e-05, - "loss": 4.9495, - "step": 43459 - }, - { - "epoch": 22.664928292046937, - "grad_norm": 1.5255464315414429, - "learning_rate": 5.684020100502513e-05, - "loss": 5.8651, - "step": 43460 - }, - { - "epoch": 22.665449804432857, - "grad_norm": 1.4810463190078735, - "learning_rate": 5.683919597989949e-05, - "loss": 5.5218, - "step": 43461 - }, - { - "epoch": 22.665971316818773, - "grad_norm": 1.510490894317627, - "learning_rate": 5.683819095477387e-05, - "loss": 5.3512, - "step": 43462 - }, - { - "epoch": 22.666492829204692, - "grad_norm": 1.6599408388137817, - "learning_rate": 5.683718592964824e-05, - "loss": 4.6698, - "step": 43463 - }, - { - "epoch": 22.667014341590612, - "grad_norm": 1.5557852983474731, - "learning_rate": 5.683618090452262e-05, - "loss": 5.1996, - "step": 43464 - }, - { - "epoch": 22.66753585397653, - "grad_norm": 1.4641211032867432, - "learning_rate": 5.6835175879396984e-05, - "loss": 5.4641, - "step": 43465 - }, - { - "epoch": 22.66805736636245, - "grad_norm": 1.5515233278274536, - "learning_rate": 5.683417085427136e-05, - "loss": 4.9672, - "step": 43466 - }, - { - "epoch": 22.66857887874837, - "grad_norm": 1.4964193105697632, - "learning_rate": 5.6833165829145726e-05, - "loss": 5.4476, - "step": 43467 - }, - { - "epoch": 22.66910039113429, - "grad_norm": 1.560524582862854, - "learning_rate": 5.6832160804020104e-05, - "loss": 5.0456, - "step": 43468 - }, - { - "epoch": 22.66962190352021, - "grad_norm": 1.5053558349609375, - "learning_rate": 5.6831155778894475e-05, - "loss": 5.7724, - "step": 43469 - }, - { - "epoch": 22.67014341590613, - "grad_norm": 1.4117577075958252, - "learning_rate": 5.683015075376885e-05, - "loss": 5.0519, - "step": 43470 - }, - { - "epoch": 22.670664928292048, - "grad_norm": 1.4025828838348389, - "learning_rate": 5.682914572864322e-05, - "loss": 4.9893, - "step": 43471 - }, - { - "epoch": 22.671186440677968, - "grad_norm": 1.6143975257873535, - "learning_rate": 5.6828140703517595e-05, - "loss": 5.1728, - "step": 43472 - }, - { - "epoch": 22.671707953063887, - "grad_norm": 1.624085783958435, - "learning_rate": 5.682713567839196e-05, - "loss": 4.8093, - "step": 43473 - }, - { - "epoch": 22.672229465449803, - "grad_norm": 1.5144000053405762, - "learning_rate": 5.682613065326633e-05, - "loss": 5.3705, - "step": 43474 - }, - { - "epoch": 22.672750977835722, - "grad_norm": 1.5274183750152588, - "learning_rate": 5.682512562814071e-05, - "loss": 5.3755, - "step": 43475 - }, - { - "epoch": 22.673272490221642, - "grad_norm": 1.468226432800293, - "learning_rate": 5.682412060301507e-05, - "loss": 5.4724, - "step": 43476 - }, - { - "epoch": 22.67379400260756, - "grad_norm": 1.4956412315368652, - "learning_rate": 5.682311557788945e-05, - "loss": 5.2382, - "step": 43477 - }, - { - "epoch": 22.67431551499348, - "grad_norm": 1.5511806011199951, - "learning_rate": 5.682211055276382e-05, - "loss": 5.3287, - "step": 43478 - }, - { - "epoch": 22.6748370273794, - "grad_norm": 1.6077880859375, - "learning_rate": 5.68211055276382e-05, - "loss": 4.9851, - "step": 43479 - }, - { - "epoch": 22.67535853976532, - "grad_norm": 1.5828624963760376, - "learning_rate": 5.6820100502512564e-05, - "loss": 5.4475, - "step": 43480 - }, - { - "epoch": 22.67588005215124, - "grad_norm": 1.4829890727996826, - "learning_rate": 5.681909547738694e-05, - "loss": 5.6844, - "step": 43481 - }, - { - "epoch": 22.67640156453716, - "grad_norm": 1.503535270690918, - "learning_rate": 5.6818090452261306e-05, - "loss": 4.952, - "step": 43482 - }, - { - "epoch": 22.676923076923078, - "grad_norm": 1.7117077112197876, - "learning_rate": 5.6817085427135684e-05, - "loss": 5.4892, - "step": 43483 - }, - { - "epoch": 22.677444589308998, - "grad_norm": 1.5806976556777954, - "learning_rate": 5.6816080402010055e-05, - "loss": 5.5147, - "step": 43484 - }, - { - "epoch": 22.677966101694913, - "grad_norm": 1.714493989944458, - "learning_rate": 5.681507537688443e-05, - "loss": 5.1543, - "step": 43485 - }, - { - "epoch": 22.678487614080833, - "grad_norm": 1.5772976875305176, - "learning_rate": 5.68140703517588e-05, - "loss": 5.0268, - "step": 43486 - }, - { - "epoch": 22.679009126466752, - "grad_norm": 1.585015892982483, - "learning_rate": 5.681306532663316e-05, - "loss": 5.7949, - "step": 43487 - }, - { - "epoch": 22.679530638852672, - "grad_norm": 1.4531642198562622, - "learning_rate": 5.681206030150754e-05, - "loss": 5.3754, - "step": 43488 - }, - { - "epoch": 22.68005215123859, - "grad_norm": 1.4830397367477417, - "learning_rate": 5.681105527638191e-05, - "loss": 4.8818, - "step": 43489 - }, - { - "epoch": 22.68057366362451, - "grad_norm": 1.594510793685913, - "learning_rate": 5.681005025125629e-05, - "loss": 4.9427, - "step": 43490 - }, - { - "epoch": 22.68109517601043, - "grad_norm": 1.6483129262924194, - "learning_rate": 5.680904522613065e-05, - "loss": 5.4484, - "step": 43491 - }, - { - "epoch": 22.68161668839635, - "grad_norm": 1.7799484729766846, - "learning_rate": 5.680804020100503e-05, - "loss": 4.49, - "step": 43492 - }, - { - "epoch": 22.68213820078227, - "grad_norm": 1.4736367464065552, - "learning_rate": 5.6807035175879394e-05, - "loss": 5.2339, - "step": 43493 - }, - { - "epoch": 22.68265971316819, - "grad_norm": 1.5133734941482544, - "learning_rate": 5.680603015075377e-05, - "loss": 5.3411, - "step": 43494 - }, - { - "epoch": 22.683181225554108, - "grad_norm": 1.626164436340332, - "learning_rate": 5.680502512562814e-05, - "loss": 4.5177, - "step": 43495 - }, - { - "epoch": 22.683702737940028, - "grad_norm": 1.632803201675415, - "learning_rate": 5.680402010050252e-05, - "loss": 4.6958, - "step": 43496 - }, - { - "epoch": 22.684224250325947, - "grad_norm": 1.4988389015197754, - "learning_rate": 5.6803015075376885e-05, - "loss": 5.4149, - "step": 43497 - }, - { - "epoch": 22.684745762711863, - "grad_norm": 1.5293842554092407, - "learning_rate": 5.680201005025126e-05, - "loss": 5.531, - "step": 43498 - }, - { - "epoch": 22.685267275097782, - "grad_norm": 1.604077696800232, - "learning_rate": 5.6801005025125634e-05, - "loss": 5.2417, - "step": 43499 - }, - { - "epoch": 22.685788787483702, - "grad_norm": 1.4824504852294922, - "learning_rate": 5.68e-05, - "loss": 5.3253, - "step": 43500 - }, - { - "epoch": 22.68631029986962, - "grad_norm": 1.5647428035736084, - "learning_rate": 5.6798994974874376e-05, - "loss": 5.5283, - "step": 43501 - }, - { - "epoch": 22.68683181225554, - "grad_norm": 1.624413251876831, - "learning_rate": 5.679798994974874e-05, - "loss": 4.9395, - "step": 43502 - }, - { - "epoch": 22.68735332464146, - "grad_norm": 1.4406791925430298, - "learning_rate": 5.679698492462312e-05, - "loss": 5.2668, - "step": 43503 - }, - { - "epoch": 22.68787483702738, - "grad_norm": 1.4966073036193848, - "learning_rate": 5.679597989949749e-05, - "loss": 5.357, - "step": 43504 - }, - { - "epoch": 22.6883963494133, - "grad_norm": 1.5166990756988525, - "learning_rate": 5.679497487437187e-05, - "loss": 5.5096, - "step": 43505 - }, - { - "epoch": 22.68891786179922, - "grad_norm": 1.568264126777649, - "learning_rate": 5.679396984924623e-05, - "loss": 5.3761, - "step": 43506 - }, - { - "epoch": 22.689439374185138, - "grad_norm": 1.5365759134292603, - "learning_rate": 5.679296482412061e-05, - "loss": 5.2755, - "step": 43507 - }, - { - "epoch": 22.689960886571058, - "grad_norm": 1.5792359113693237, - "learning_rate": 5.6791959798994973e-05, - "loss": 5.0718, - "step": 43508 - }, - { - "epoch": 22.690482398956973, - "grad_norm": 1.488196849822998, - "learning_rate": 5.679095477386935e-05, - "loss": 5.6517, - "step": 43509 - }, - { - "epoch": 22.691003911342893, - "grad_norm": 1.5041993856430054, - "learning_rate": 5.678994974874372e-05, - "loss": 5.0485, - "step": 43510 - }, - { - "epoch": 22.691525423728812, - "grad_norm": 1.5619808435440063, - "learning_rate": 5.67889447236181e-05, - "loss": 4.5185, - "step": 43511 - }, - { - "epoch": 22.692046936114732, - "grad_norm": 1.5983145236968994, - "learning_rate": 5.6787939698492464e-05, - "loss": 5.2646, - "step": 43512 - }, - { - "epoch": 22.69256844850065, - "grad_norm": 1.5716911554336548, - "learning_rate": 5.678693467336683e-05, - "loss": 5.2386, - "step": 43513 - }, - { - "epoch": 22.69308996088657, - "grad_norm": 1.590446949005127, - "learning_rate": 5.6785929648241207e-05, - "loss": 5.2132, - "step": 43514 - }, - { - "epoch": 22.69361147327249, - "grad_norm": 1.6289048194885254, - "learning_rate": 5.678492462311558e-05, - "loss": 5.3967, - "step": 43515 - }, - { - "epoch": 22.69413298565841, - "grad_norm": 1.5387171506881714, - "learning_rate": 5.6783919597989955e-05, - "loss": 5.2391, - "step": 43516 - }, - { - "epoch": 22.69465449804433, - "grad_norm": 1.7043304443359375, - "learning_rate": 5.678291457286432e-05, - "loss": 5.0614, - "step": 43517 - }, - { - "epoch": 22.69517601043025, - "grad_norm": 1.5253762006759644, - "learning_rate": 5.67819095477387e-05, - "loss": 5.6708, - "step": 43518 - }, - { - "epoch": 22.695697522816168, - "grad_norm": 1.5298278331756592, - "learning_rate": 5.678090452261307e-05, - "loss": 5.2562, - "step": 43519 - }, - { - "epoch": 22.696219035202088, - "grad_norm": 1.509393334388733, - "learning_rate": 5.6779899497487446e-05, - "loss": 5.2055, - "step": 43520 - }, - { - "epoch": 22.696740547588004, - "grad_norm": 1.4686716794967651, - "learning_rate": 5.677889447236181e-05, - "loss": 5.4163, - "step": 43521 - }, - { - "epoch": 22.697262059973923, - "grad_norm": 1.708779215812683, - "learning_rate": 5.677788944723619e-05, - "loss": 5.1745, - "step": 43522 - }, - { - "epoch": 22.697783572359842, - "grad_norm": 1.5766597986221313, - "learning_rate": 5.677688442211055e-05, - "loss": 5.1826, - "step": 43523 - }, - { - "epoch": 22.698305084745762, - "grad_norm": 1.630409598350525, - "learning_rate": 5.677587939698493e-05, - "loss": 5.0573, - "step": 43524 - }, - { - "epoch": 22.69882659713168, - "grad_norm": 1.6618056297302246, - "learning_rate": 5.67748743718593e-05, - "loss": 4.916, - "step": 43525 - }, - { - "epoch": 22.6993481095176, - "grad_norm": 1.4817008972167969, - "learning_rate": 5.6773869346733666e-05, - "loss": 5.1963, - "step": 43526 - }, - { - "epoch": 22.69986962190352, - "grad_norm": 1.5734859704971313, - "learning_rate": 5.6772864321608044e-05, - "loss": 5.0859, - "step": 43527 - }, - { - "epoch": 22.70039113428944, - "grad_norm": 1.5953967571258545, - "learning_rate": 5.677185929648241e-05, - "loss": 5.3767, - "step": 43528 - }, - { - "epoch": 22.70091264667536, - "grad_norm": 1.5312923192977905, - "learning_rate": 5.6770854271356786e-05, - "loss": 5.2412, - "step": 43529 - }, - { - "epoch": 22.70143415906128, - "grad_norm": 1.4814808368682861, - "learning_rate": 5.676984924623116e-05, - "loss": 5.6486, - "step": 43530 - }, - { - "epoch": 22.701955671447198, - "grad_norm": 1.40824294090271, - "learning_rate": 5.6768844221105535e-05, - "loss": 5.5594, - "step": 43531 - }, - { - "epoch": 22.702477183833118, - "grad_norm": 1.492762804031372, - "learning_rate": 5.67678391959799e-05, - "loss": 4.9764, - "step": 43532 - }, - { - "epoch": 22.702998696219034, - "grad_norm": 1.5337538719177246, - "learning_rate": 5.676683417085428e-05, - "loss": 5.7506, - "step": 43533 - }, - { - "epoch": 22.703520208604953, - "grad_norm": 1.5257457494735718, - "learning_rate": 5.676582914572864e-05, - "loss": 5.2241, - "step": 43534 - }, - { - "epoch": 22.704041720990872, - "grad_norm": 1.679463267326355, - "learning_rate": 5.676482412060302e-05, - "loss": 4.7901, - "step": 43535 - }, - { - "epoch": 22.704563233376792, - "grad_norm": 1.5392781496047974, - "learning_rate": 5.676381909547739e-05, - "loss": 5.0232, - "step": 43536 - }, - { - "epoch": 22.70508474576271, - "grad_norm": 1.5237269401550293, - "learning_rate": 5.676281407035177e-05, - "loss": 5.5379, - "step": 43537 - }, - { - "epoch": 22.70560625814863, - "grad_norm": 1.4810911417007446, - "learning_rate": 5.676180904522613e-05, - "loss": 5.6565, - "step": 43538 - }, - { - "epoch": 22.70612777053455, - "grad_norm": 1.4005436897277832, - "learning_rate": 5.67608040201005e-05, - "loss": 5.393, - "step": 43539 - }, - { - "epoch": 22.70664928292047, - "grad_norm": 1.6453535556793213, - "learning_rate": 5.675979899497488e-05, - "loss": 5.1728, - "step": 43540 - }, - { - "epoch": 22.70717079530639, - "grad_norm": 1.5412641763687134, - "learning_rate": 5.6758793969849245e-05, - "loss": 5.409, - "step": 43541 - }, - { - "epoch": 22.70769230769231, - "grad_norm": 1.6259503364562988, - "learning_rate": 5.675778894472362e-05, - "loss": 4.4565, - "step": 43542 - }, - { - "epoch": 22.708213820078228, - "grad_norm": 1.551547884941101, - "learning_rate": 5.675678391959799e-05, - "loss": 5.1626, - "step": 43543 - }, - { - "epoch": 22.708735332464148, - "grad_norm": 1.6154043674468994, - "learning_rate": 5.6755778894472365e-05, - "loss": 4.972, - "step": 43544 - }, - { - "epoch": 22.709256844850064, - "grad_norm": 1.4810090065002441, - "learning_rate": 5.6754773869346736e-05, - "loss": 4.8992, - "step": 43545 - }, - { - "epoch": 22.709778357235983, - "grad_norm": 1.4425731897354126, - "learning_rate": 5.6753768844221114e-05, - "loss": 5.3868, - "step": 43546 - }, - { - "epoch": 22.710299869621903, - "grad_norm": 1.5424957275390625, - "learning_rate": 5.675276381909548e-05, - "loss": 5.0856, - "step": 43547 - }, - { - "epoch": 22.710821382007822, - "grad_norm": 1.5184839963912964, - "learning_rate": 5.6751758793969856e-05, - "loss": 5.228, - "step": 43548 - }, - { - "epoch": 22.71134289439374, - "grad_norm": 1.5284631252288818, - "learning_rate": 5.675075376884422e-05, - "loss": 5.183, - "step": 43549 - }, - { - "epoch": 22.71186440677966, - "grad_norm": 1.5117998123168945, - "learning_rate": 5.67497487437186e-05, - "loss": 5.2011, - "step": 43550 - }, - { - "epoch": 22.71238591916558, - "grad_norm": 1.5185693502426147, - "learning_rate": 5.674874371859297e-05, - "loss": 5.0064, - "step": 43551 - }, - { - "epoch": 22.7129074315515, - "grad_norm": 1.5872184038162231, - "learning_rate": 5.674773869346735e-05, - "loss": 4.9059, - "step": 43552 - }, - { - "epoch": 22.71342894393742, - "grad_norm": 1.5759347677230835, - "learning_rate": 5.674673366834171e-05, - "loss": 5.5491, - "step": 43553 - }, - { - "epoch": 22.71395045632334, - "grad_norm": 1.7255204916000366, - "learning_rate": 5.6745728643216076e-05, - "loss": 4.7667, - "step": 43554 - }, - { - "epoch": 22.714471968709258, - "grad_norm": 1.613344669342041, - "learning_rate": 5.6744723618090454e-05, - "loss": 4.965, - "step": 43555 - }, - { - "epoch": 22.714993481095178, - "grad_norm": 1.6315727233886719, - "learning_rate": 5.6743718592964825e-05, - "loss": 5.1913, - "step": 43556 - }, - { - "epoch": 22.715514993481094, - "grad_norm": 1.446747899055481, - "learning_rate": 5.67427135678392e-05, - "loss": 5.3358, - "step": 43557 - }, - { - "epoch": 22.716036505867013, - "grad_norm": 1.5313606262207031, - "learning_rate": 5.674170854271357e-05, - "loss": 4.5914, - "step": 43558 - }, - { - "epoch": 22.716558018252933, - "grad_norm": 1.5648066997528076, - "learning_rate": 5.6740703517587945e-05, - "loss": 5.1248, - "step": 43559 - }, - { - "epoch": 22.717079530638852, - "grad_norm": 1.560665249824524, - "learning_rate": 5.673969849246231e-05, - "loss": 5.186, - "step": 43560 - }, - { - "epoch": 22.71760104302477, - "grad_norm": 1.577545404434204, - "learning_rate": 5.673869346733669e-05, - "loss": 4.8958, - "step": 43561 - }, - { - "epoch": 22.71812255541069, - "grad_norm": 1.577321171760559, - "learning_rate": 5.673768844221106e-05, - "loss": 5.4797, - "step": 43562 - }, - { - "epoch": 22.71864406779661, - "grad_norm": 1.5836830139160156, - "learning_rate": 5.6736683417085436e-05, - "loss": 5.1977, - "step": 43563 - }, - { - "epoch": 22.71916558018253, - "grad_norm": 1.5391831398010254, - "learning_rate": 5.67356783919598e-05, - "loss": 5.2764, - "step": 43564 - }, - { - "epoch": 22.71968709256845, - "grad_norm": 1.477423071861267, - "learning_rate": 5.673467336683418e-05, - "loss": 5.5323, - "step": 43565 - }, - { - "epoch": 22.72020860495437, - "grad_norm": 1.504712700843811, - "learning_rate": 5.673366834170855e-05, - "loss": 5.3355, - "step": 43566 - }, - { - "epoch": 22.72073011734029, - "grad_norm": 1.5606032609939575, - "learning_rate": 5.673266331658291e-05, - "loss": 5.0343, - "step": 43567 - }, - { - "epoch": 22.721251629726208, - "grad_norm": 1.583422303199768, - "learning_rate": 5.673165829145729e-05, - "loss": 4.6195, - "step": 43568 - }, - { - "epoch": 22.721773142112124, - "grad_norm": 1.5231785774230957, - "learning_rate": 5.6730653266331655e-05, - "loss": 5.2844, - "step": 43569 - }, - { - "epoch": 22.722294654498043, - "grad_norm": 1.5133657455444336, - "learning_rate": 5.672964824120603e-05, - "loss": 4.6609, - "step": 43570 - }, - { - "epoch": 22.722816166883963, - "grad_norm": 1.4149539470672607, - "learning_rate": 5.6728643216080404e-05, - "loss": 5.6956, - "step": 43571 - }, - { - "epoch": 22.723337679269882, - "grad_norm": 1.5304936170578003, - "learning_rate": 5.672763819095478e-05, - "loss": 4.742, - "step": 43572 - }, - { - "epoch": 22.7238591916558, - "grad_norm": 1.5497275590896606, - "learning_rate": 5.6726633165829146e-05, - "loss": 5.1553, - "step": 43573 - }, - { - "epoch": 22.72438070404172, - "grad_norm": 1.5889712572097778, - "learning_rate": 5.6725628140703524e-05, - "loss": 4.9864, - "step": 43574 - }, - { - "epoch": 22.72490221642764, - "grad_norm": 1.5712617635726929, - "learning_rate": 5.672462311557789e-05, - "loss": 5.3212, - "step": 43575 - }, - { - "epoch": 22.72542372881356, - "grad_norm": 1.6376333236694336, - "learning_rate": 5.6723618090452266e-05, - "loss": 5.1262, - "step": 43576 - }, - { - "epoch": 22.72594524119948, - "grad_norm": 1.70259690284729, - "learning_rate": 5.672261306532664e-05, - "loss": 4.6448, - "step": 43577 - }, - { - "epoch": 22.7264667535854, - "grad_norm": 1.5344536304473877, - "learning_rate": 5.6721608040201015e-05, - "loss": 5.5073, - "step": 43578 - }, - { - "epoch": 22.72698826597132, - "grad_norm": 1.509818196296692, - "learning_rate": 5.672060301507538e-05, - "loss": 5.6096, - "step": 43579 - }, - { - "epoch": 22.727509778357238, - "grad_norm": 1.4955521821975708, - "learning_rate": 5.6719597989949743e-05, - "loss": 5.2584, - "step": 43580 - }, - { - "epoch": 22.728031290743154, - "grad_norm": 1.5107946395874023, - "learning_rate": 5.671859296482412e-05, - "loss": 5.6334, - "step": 43581 - }, - { - "epoch": 22.728552803129073, - "grad_norm": 1.4461408853530884, - "learning_rate": 5.671758793969849e-05, - "loss": 5.7721, - "step": 43582 - }, - { - "epoch": 22.729074315514993, - "grad_norm": 1.4458807706832886, - "learning_rate": 5.671658291457287e-05, - "loss": 5.1345, - "step": 43583 - }, - { - "epoch": 22.729595827900912, - "grad_norm": 1.6711066961288452, - "learning_rate": 5.6715577889447234e-05, - "loss": 5.0777, - "step": 43584 - }, - { - "epoch": 22.73011734028683, - "grad_norm": 1.4790852069854736, - "learning_rate": 5.671457286432161e-05, - "loss": 4.7271, - "step": 43585 - }, - { - "epoch": 22.73063885267275, - "grad_norm": 1.621751308441162, - "learning_rate": 5.671356783919598e-05, - "loss": 5.2924, - "step": 43586 - }, - { - "epoch": 22.73116036505867, - "grad_norm": 1.5565738677978516, - "learning_rate": 5.671256281407036e-05, - "loss": 5.1618, - "step": 43587 - }, - { - "epoch": 22.73168187744459, - "grad_norm": 1.584689974784851, - "learning_rate": 5.6711557788944725e-05, - "loss": 5.5078, - "step": 43588 - }, - { - "epoch": 22.73220338983051, - "grad_norm": 1.4949774742126465, - "learning_rate": 5.67105527638191e-05, - "loss": 5.4743, - "step": 43589 - }, - { - "epoch": 22.73272490221643, - "grad_norm": 1.613179326057434, - "learning_rate": 5.670954773869347e-05, - "loss": 5.5513, - "step": 43590 - }, - { - "epoch": 22.73324641460235, - "grad_norm": 1.5137758255004883, - "learning_rate": 5.6708542713567845e-05, - "loss": 5.2433, - "step": 43591 - }, - { - "epoch": 22.733767926988264, - "grad_norm": 1.6054010391235352, - "learning_rate": 5.6707537688442216e-05, - "loss": 4.5095, - "step": 43592 - }, - { - "epoch": 22.734289439374184, - "grad_norm": 1.4707938432693481, - "learning_rate": 5.670653266331658e-05, - "loss": 5.2525, - "step": 43593 - }, - { - "epoch": 22.734810951760103, - "grad_norm": 1.4152129888534546, - "learning_rate": 5.670552763819096e-05, - "loss": 5.447, - "step": 43594 - }, - { - "epoch": 22.735332464146023, - "grad_norm": 1.513128399848938, - "learning_rate": 5.670452261306532e-05, - "loss": 5.4371, - "step": 43595 - }, - { - "epoch": 22.735853976531942, - "grad_norm": 1.4943568706512451, - "learning_rate": 5.67035175879397e-05, - "loss": 5.5873, - "step": 43596 - }, - { - "epoch": 22.73637548891786, - "grad_norm": 1.648838996887207, - "learning_rate": 5.670251256281407e-05, - "loss": 4.5718, - "step": 43597 - }, - { - "epoch": 22.73689700130378, - "grad_norm": 1.4185198545455933, - "learning_rate": 5.670150753768845e-05, - "loss": 4.454, - "step": 43598 - }, - { - "epoch": 22.7374185136897, - "grad_norm": 1.6395576000213623, - "learning_rate": 5.6700502512562814e-05, - "loss": 5.5014, - "step": 43599 - }, - { - "epoch": 22.73794002607562, - "grad_norm": 1.5954861640930176, - "learning_rate": 5.669949748743719e-05, - "loss": 5.1225, - "step": 43600 - }, - { - "epoch": 22.73846153846154, - "grad_norm": 1.5396878719329834, - "learning_rate": 5.6698492462311556e-05, - "loss": 4.7217, - "step": 43601 - }, - { - "epoch": 22.73898305084746, - "grad_norm": 1.638129711151123, - "learning_rate": 5.6697487437185934e-05, - "loss": 4.7643, - "step": 43602 - }, - { - "epoch": 22.73950456323338, - "grad_norm": 1.5255956649780273, - "learning_rate": 5.6696482412060305e-05, - "loss": 5.3576, - "step": 43603 - }, - { - "epoch": 22.740026075619298, - "grad_norm": 1.4064580202102661, - "learning_rate": 5.669547738693468e-05, - "loss": 5.1621, - "step": 43604 - }, - { - "epoch": 22.740547588005214, - "grad_norm": 1.5537289381027222, - "learning_rate": 5.669447236180905e-05, - "loss": 5.1802, - "step": 43605 - }, - { - "epoch": 22.741069100391133, - "grad_norm": 1.5225088596343994, - "learning_rate": 5.669346733668342e-05, - "loss": 5.1985, - "step": 43606 - }, - { - "epoch": 22.741590612777053, - "grad_norm": 1.5258302688598633, - "learning_rate": 5.6692462311557796e-05, - "loss": 5.0387, - "step": 43607 - }, - { - "epoch": 22.742112125162972, - "grad_norm": 1.5152956247329712, - "learning_rate": 5.669145728643216e-05, - "loss": 5.3807, - "step": 43608 - }, - { - "epoch": 22.74263363754889, - "grad_norm": 1.544751763343811, - "learning_rate": 5.669045226130654e-05, - "loss": 4.9786, - "step": 43609 - }, - { - "epoch": 22.74315514993481, - "grad_norm": 1.5272228717803955, - "learning_rate": 5.66894472361809e-05, - "loss": 5.3058, - "step": 43610 - }, - { - "epoch": 22.74367666232073, - "grad_norm": 1.5379263162612915, - "learning_rate": 5.668844221105528e-05, - "loss": 5.6403, - "step": 43611 - }, - { - "epoch": 22.74419817470665, - "grad_norm": 1.498295545578003, - "learning_rate": 5.668743718592965e-05, - "loss": 5.4049, - "step": 43612 - }, - { - "epoch": 22.74471968709257, - "grad_norm": 1.626025676727295, - "learning_rate": 5.668643216080403e-05, - "loss": 4.6974, - "step": 43613 - }, - { - "epoch": 22.74524119947849, - "grad_norm": 1.5138968229293823, - "learning_rate": 5.668542713567839e-05, - "loss": 4.8628, - "step": 43614 - }, - { - "epoch": 22.74576271186441, - "grad_norm": 1.4963243007659912, - "learning_rate": 5.668442211055277e-05, - "loss": 5.403, - "step": 43615 - }, - { - "epoch": 22.746284224250324, - "grad_norm": 1.6218912601470947, - "learning_rate": 5.6683417085427135e-05, - "loss": 4.9604, - "step": 43616 - }, - { - "epoch": 22.746805736636244, - "grad_norm": 1.5605095624923706, - "learning_rate": 5.668241206030151e-05, - "loss": 5.2476, - "step": 43617 - }, - { - "epoch": 22.747327249022163, - "grad_norm": 1.641647458076477, - "learning_rate": 5.6681407035175884e-05, - "loss": 5.1801, - "step": 43618 - }, - { - "epoch": 22.747848761408083, - "grad_norm": 1.6971056461334229, - "learning_rate": 5.668040201005025e-05, - "loss": 4.7762, - "step": 43619 - }, - { - "epoch": 22.748370273794002, - "grad_norm": 1.4995695352554321, - "learning_rate": 5.6679396984924626e-05, - "loss": 5.3086, - "step": 43620 - }, - { - "epoch": 22.74889178617992, - "grad_norm": 1.5065323114395142, - "learning_rate": 5.667839195979899e-05, - "loss": 5.2079, - "step": 43621 - }, - { - "epoch": 22.74941329856584, - "grad_norm": 1.5797990560531616, - "learning_rate": 5.667738693467337e-05, - "loss": 4.6228, - "step": 43622 - }, - { - "epoch": 22.74993481095176, - "grad_norm": 1.6195157766342163, - "learning_rate": 5.667638190954774e-05, - "loss": 4.7835, - "step": 43623 - }, - { - "epoch": 22.75045632333768, - "grad_norm": 1.5439677238464355, - "learning_rate": 5.667537688442212e-05, - "loss": 5.42, - "step": 43624 - }, - { - "epoch": 22.7509778357236, - "grad_norm": 1.4853371381759644, - "learning_rate": 5.667437185929648e-05, - "loss": 5.4518, - "step": 43625 - }, - { - "epoch": 22.75149934810952, - "grad_norm": 1.675188422203064, - "learning_rate": 5.667336683417086e-05, - "loss": 4.5801, - "step": 43626 - }, - { - "epoch": 22.75202086049544, - "grad_norm": 1.5656832456588745, - "learning_rate": 5.6672361809045223e-05, - "loss": 4.7563, - "step": 43627 - }, - { - "epoch": 22.752542372881354, - "grad_norm": 1.5228904485702515, - "learning_rate": 5.66713567839196e-05, - "loss": 5.4426, - "step": 43628 - }, - { - "epoch": 22.753063885267274, - "grad_norm": 1.4836459159851074, - "learning_rate": 5.667035175879397e-05, - "loss": 5.2775, - "step": 43629 - }, - { - "epoch": 22.753585397653193, - "grad_norm": 1.4909400939941406, - "learning_rate": 5.666934673366835e-05, - "loss": 5.5038, - "step": 43630 - }, - { - "epoch": 22.754106910039113, - "grad_norm": 1.6502809524536133, - "learning_rate": 5.6668341708542714e-05, - "loss": 4.5246, - "step": 43631 - }, - { - "epoch": 22.754628422425032, - "grad_norm": 1.5173877477645874, - "learning_rate": 5.6667336683417086e-05, - "loss": 5.4248, - "step": 43632 - }, - { - "epoch": 22.75514993481095, - "grad_norm": 1.4898436069488525, - "learning_rate": 5.666633165829146e-05, - "loss": 5.3518, - "step": 43633 - }, - { - "epoch": 22.75567144719687, - "grad_norm": 1.4200117588043213, - "learning_rate": 5.666532663316583e-05, - "loss": 5.3838, - "step": 43634 - }, - { - "epoch": 22.75619295958279, - "grad_norm": 1.4129799604415894, - "learning_rate": 5.6664321608040205e-05, - "loss": 5.588, - "step": 43635 - }, - { - "epoch": 22.75671447196871, - "grad_norm": 1.5679535865783691, - "learning_rate": 5.666331658291457e-05, - "loss": 5.3686, - "step": 43636 - }, - { - "epoch": 22.75723598435463, - "grad_norm": 1.5806092023849487, - "learning_rate": 5.666231155778895e-05, - "loss": 5.3805, - "step": 43637 - }, - { - "epoch": 22.75775749674055, - "grad_norm": 1.495985507965088, - "learning_rate": 5.666130653266332e-05, - "loss": 5.2669, - "step": 43638 - }, - { - "epoch": 22.75827900912647, - "grad_norm": 1.5262519121170044, - "learning_rate": 5.6660301507537696e-05, - "loss": 5.2356, - "step": 43639 - }, - { - "epoch": 22.758800521512384, - "grad_norm": 1.499624252319336, - "learning_rate": 5.665929648241206e-05, - "loss": 5.5129, - "step": 43640 - }, - { - "epoch": 22.759322033898304, - "grad_norm": 1.5143710374832153, - "learning_rate": 5.665829145728644e-05, - "loss": 5.2874, - "step": 43641 - }, - { - "epoch": 22.759843546284223, - "grad_norm": 1.472136378288269, - "learning_rate": 5.66572864321608e-05, - "loss": 5.5729, - "step": 43642 - }, - { - "epoch": 22.760365058670143, - "grad_norm": 1.537054181098938, - "learning_rate": 5.665628140703518e-05, - "loss": 5.208, - "step": 43643 - }, - { - "epoch": 22.760886571056062, - "grad_norm": 1.6284784078598022, - "learning_rate": 5.665527638190955e-05, - "loss": 5.0598, - "step": 43644 - }, - { - "epoch": 22.76140808344198, - "grad_norm": 1.519278645515442, - "learning_rate": 5.665427135678393e-05, - "loss": 5.0009, - "step": 43645 - }, - { - "epoch": 22.7619295958279, - "grad_norm": 1.576280117034912, - "learning_rate": 5.6653266331658294e-05, - "loss": 5.037, - "step": 43646 - }, - { - "epoch": 22.76245110821382, - "grad_norm": 1.5863081216812134, - "learning_rate": 5.665226130653266e-05, - "loss": 5.021, - "step": 43647 - }, - { - "epoch": 22.76297262059974, - "grad_norm": 1.430732250213623, - "learning_rate": 5.6651256281407036e-05, - "loss": 5.5501, - "step": 43648 - }, - { - "epoch": 22.76349413298566, - "grad_norm": 1.6200342178344727, - "learning_rate": 5.665025125628141e-05, - "loss": 5.0242, - "step": 43649 - }, - { - "epoch": 22.76401564537158, - "grad_norm": 1.5858516693115234, - "learning_rate": 5.6649246231155785e-05, - "loss": 4.6684, - "step": 43650 - }, - { - "epoch": 22.7645371577575, - "grad_norm": 1.5747095346450806, - "learning_rate": 5.664824120603015e-05, - "loss": 5.3528, - "step": 43651 - }, - { - "epoch": 22.765058670143414, - "grad_norm": 1.570696234703064, - "learning_rate": 5.664723618090453e-05, - "loss": 5.0764, - "step": 43652 - }, - { - "epoch": 22.765580182529334, - "grad_norm": 1.5419296026229858, - "learning_rate": 5.66462311557789e-05, - "loss": 5.2731, - "step": 43653 - }, - { - "epoch": 22.766101694915253, - "grad_norm": 1.4512871503829956, - "learning_rate": 5.6645226130653276e-05, - "loss": 5.3827, - "step": 43654 - }, - { - "epoch": 22.766623207301173, - "grad_norm": 1.6451237201690674, - "learning_rate": 5.664422110552764e-05, - "loss": 5.1082, - "step": 43655 - }, - { - "epoch": 22.767144719687092, - "grad_norm": 1.506790280342102, - "learning_rate": 5.664321608040202e-05, - "loss": 4.8938, - "step": 43656 - }, - { - "epoch": 22.76766623207301, - "grad_norm": 1.4181106090545654, - "learning_rate": 5.664221105527638e-05, - "loss": 5.2883, - "step": 43657 - }, - { - "epoch": 22.76818774445893, - "grad_norm": 1.5718648433685303, - "learning_rate": 5.664120603015076e-05, - "loss": 5.6015, - "step": 43658 - }, - { - "epoch": 22.76870925684485, - "grad_norm": 1.5471014976501465, - "learning_rate": 5.664020100502513e-05, - "loss": 5.6134, - "step": 43659 - }, - { - "epoch": 22.76923076923077, - "grad_norm": 1.61711847782135, - "learning_rate": 5.6639195979899495e-05, - "loss": 5.2856, - "step": 43660 - }, - { - "epoch": 22.76975228161669, - "grad_norm": 1.5757273435592651, - "learning_rate": 5.663819095477387e-05, - "loss": 5.446, - "step": 43661 - }, - { - "epoch": 22.77027379400261, - "grad_norm": 1.520330786705017, - "learning_rate": 5.663718592964824e-05, - "loss": 5.016, - "step": 43662 - }, - { - "epoch": 22.77079530638853, - "grad_norm": 1.4801751375198364, - "learning_rate": 5.6636180904522615e-05, - "loss": 5.6165, - "step": 43663 - }, - { - "epoch": 22.771316818774444, - "grad_norm": 1.459517240524292, - "learning_rate": 5.6635175879396986e-05, - "loss": 5.5898, - "step": 43664 - }, - { - "epoch": 22.771838331160364, - "grad_norm": 1.4061470031738281, - "learning_rate": 5.6634170854271364e-05, - "loss": 4.627, - "step": 43665 - }, - { - "epoch": 22.772359843546283, - "grad_norm": 1.5996545553207397, - "learning_rate": 5.663316582914573e-05, - "loss": 5.1153, - "step": 43666 - }, - { - "epoch": 22.772881355932203, - "grad_norm": 1.5378884077072144, - "learning_rate": 5.6632160804020106e-05, - "loss": 5.0596, - "step": 43667 - }, - { - "epoch": 22.773402868318122, - "grad_norm": 1.655340552330017, - "learning_rate": 5.663115577889447e-05, - "loss": 5.1982, - "step": 43668 - }, - { - "epoch": 22.77392438070404, - "grad_norm": 1.4504578113555908, - "learning_rate": 5.663015075376885e-05, - "loss": 5.1665, - "step": 43669 - }, - { - "epoch": 22.77444589308996, - "grad_norm": 1.5471187829971313, - "learning_rate": 5.662914572864322e-05, - "loss": 5.3317, - "step": 43670 - }, - { - "epoch": 22.77496740547588, - "grad_norm": 1.6404417753219604, - "learning_rate": 5.66281407035176e-05, - "loss": 5.0919, - "step": 43671 - }, - { - "epoch": 22.7754889178618, - "grad_norm": 1.596103549003601, - "learning_rate": 5.662713567839196e-05, - "loss": 5.4668, - "step": 43672 - }, - { - "epoch": 22.77601043024772, - "grad_norm": 1.7144513130187988, - "learning_rate": 5.662613065326633e-05, - "loss": 4.876, - "step": 43673 - }, - { - "epoch": 22.77653194263364, - "grad_norm": 1.5253721475601196, - "learning_rate": 5.662512562814071e-05, - "loss": 5.3484, - "step": 43674 - }, - { - "epoch": 22.777053455019555, - "grad_norm": 1.4473891258239746, - "learning_rate": 5.6624120603015075e-05, - "loss": 5.5389, - "step": 43675 - }, - { - "epoch": 22.777574967405474, - "grad_norm": 1.6750437021255493, - "learning_rate": 5.662311557788945e-05, - "loss": 4.8897, - "step": 43676 - }, - { - "epoch": 22.778096479791394, - "grad_norm": 1.4849916696548462, - "learning_rate": 5.662211055276382e-05, - "loss": 4.7024, - "step": 43677 - }, - { - "epoch": 22.778617992177313, - "grad_norm": 1.4258695840835571, - "learning_rate": 5.6621105527638195e-05, - "loss": 5.734, - "step": 43678 - }, - { - "epoch": 22.779139504563233, - "grad_norm": 1.505090355873108, - "learning_rate": 5.6620100502512566e-05, - "loss": 5.2473, - "step": 43679 - }, - { - "epoch": 22.779661016949152, - "grad_norm": 1.5054527521133423, - "learning_rate": 5.6619095477386943e-05, - "loss": 5.366, - "step": 43680 - }, - { - "epoch": 22.78018252933507, - "grad_norm": 1.4825023412704468, - "learning_rate": 5.661809045226131e-05, - "loss": 5.4887, - "step": 43681 - }, - { - "epoch": 22.78070404172099, - "grad_norm": 1.4821866750717163, - "learning_rate": 5.6617085427135686e-05, - "loss": 5.0848, - "step": 43682 - }, - { - "epoch": 22.78122555410691, - "grad_norm": 1.5326170921325684, - "learning_rate": 5.661608040201005e-05, - "loss": 5.0806, - "step": 43683 - }, - { - "epoch": 22.78174706649283, - "grad_norm": 1.5626336336135864, - "learning_rate": 5.661507537688443e-05, - "loss": 5.3584, - "step": 43684 - }, - { - "epoch": 22.78226857887875, - "grad_norm": 1.6134955883026123, - "learning_rate": 5.66140703517588e-05, - "loss": 5.2807, - "step": 43685 - }, - { - "epoch": 22.78279009126467, - "grad_norm": 1.5278500318527222, - "learning_rate": 5.661306532663316e-05, - "loss": 5.6251, - "step": 43686 - }, - { - "epoch": 22.78331160365059, - "grad_norm": 1.418513298034668, - "learning_rate": 5.661206030150754e-05, - "loss": 5.3588, - "step": 43687 - }, - { - "epoch": 22.783833116036504, - "grad_norm": 1.490968108177185, - "learning_rate": 5.6611055276381905e-05, - "loss": 5.4776, - "step": 43688 - }, - { - "epoch": 22.784354628422424, - "grad_norm": 1.5030300617218018, - "learning_rate": 5.661005025125628e-05, - "loss": 5.7577, - "step": 43689 - }, - { - "epoch": 22.784876140808343, - "grad_norm": 1.8143651485443115, - "learning_rate": 5.6609045226130654e-05, - "loss": 4.2511, - "step": 43690 - }, - { - "epoch": 22.785397653194263, - "grad_norm": 1.4392682313919067, - "learning_rate": 5.660804020100503e-05, - "loss": 5.5684, - "step": 43691 - }, - { - "epoch": 22.785919165580182, - "grad_norm": 1.6126751899719238, - "learning_rate": 5.6607035175879396e-05, - "loss": 5.2665, - "step": 43692 - }, - { - "epoch": 22.7864406779661, - "grad_norm": 1.6236506700515747, - "learning_rate": 5.6606030150753774e-05, - "loss": 4.7849, - "step": 43693 - }, - { - "epoch": 22.78696219035202, - "grad_norm": 1.595902442932129, - "learning_rate": 5.6605025125628145e-05, - "loss": 5.1276, - "step": 43694 - }, - { - "epoch": 22.78748370273794, - "grad_norm": 1.487149953842163, - "learning_rate": 5.660402010050252e-05, - "loss": 5.5149, - "step": 43695 - }, - { - "epoch": 22.78800521512386, - "grad_norm": 1.501965880393982, - "learning_rate": 5.660301507537689e-05, - "loss": 5.1254, - "step": 43696 - }, - { - "epoch": 22.78852672750978, - "grad_norm": 1.4730650186538696, - "learning_rate": 5.6602010050251265e-05, - "loss": 5.3045, - "step": 43697 - }, - { - "epoch": 22.7890482398957, - "grad_norm": 1.5798925161361694, - "learning_rate": 5.660100502512563e-05, - "loss": 5.0667, - "step": 43698 - }, - { - "epoch": 22.789569752281615, - "grad_norm": 1.4892210960388184, - "learning_rate": 5.66e-05, - "loss": 5.5592, - "step": 43699 - }, - { - "epoch": 22.790091264667534, - "grad_norm": 1.6364331245422363, - "learning_rate": 5.659899497487438e-05, - "loss": 4.9781, - "step": 43700 - }, - { - "epoch": 22.790612777053454, - "grad_norm": 1.6389548778533936, - "learning_rate": 5.659798994974874e-05, - "loss": 5.5653, - "step": 43701 - }, - { - "epoch": 22.791134289439373, - "grad_norm": 1.5353292226791382, - "learning_rate": 5.659698492462312e-05, - "loss": 5.4065, - "step": 43702 - }, - { - "epoch": 22.791655801825293, - "grad_norm": 1.5009474754333496, - "learning_rate": 5.6595979899497484e-05, - "loss": 5.3319, - "step": 43703 - }, - { - "epoch": 22.792177314211212, - "grad_norm": 1.518190860748291, - "learning_rate": 5.659497487437186e-05, - "loss": 5.2438, - "step": 43704 - }, - { - "epoch": 22.79269882659713, - "grad_norm": 1.6329461336135864, - "learning_rate": 5.659396984924623e-05, - "loss": 4.9717, - "step": 43705 - }, - { - "epoch": 22.79322033898305, - "grad_norm": 1.4716770648956299, - "learning_rate": 5.659296482412061e-05, - "loss": 5.648, - "step": 43706 - }, - { - "epoch": 22.79374185136897, - "grad_norm": 1.5126428604125977, - "learning_rate": 5.6591959798994975e-05, - "loss": 5.3561, - "step": 43707 - }, - { - "epoch": 22.79426336375489, - "grad_norm": 1.528153657913208, - "learning_rate": 5.659095477386935e-05, - "loss": 4.6661, - "step": 43708 - }, - { - "epoch": 22.79478487614081, - "grad_norm": 1.6828837394714355, - "learning_rate": 5.658994974874372e-05, - "loss": 4.9548, - "step": 43709 - }, - { - "epoch": 22.79530638852673, - "grad_norm": 1.4357631206512451, - "learning_rate": 5.6588944723618095e-05, - "loss": 4.8507, - "step": 43710 - }, - { - "epoch": 22.795827900912645, - "grad_norm": 1.5777868032455444, - "learning_rate": 5.6587939698492466e-05, - "loss": 4.9381, - "step": 43711 - }, - { - "epoch": 22.796349413298564, - "grad_norm": 1.623633861541748, - "learning_rate": 5.658693467336683e-05, - "loss": 5.3188, - "step": 43712 - }, - { - "epoch": 22.796870925684484, - "grad_norm": 1.5988550186157227, - "learning_rate": 5.658592964824121e-05, - "loss": 5.5118, - "step": 43713 - }, - { - "epoch": 22.797392438070403, - "grad_norm": 1.6397204399108887, - "learning_rate": 5.658492462311557e-05, - "loss": 4.7462, - "step": 43714 - }, - { - "epoch": 22.797913950456323, - "grad_norm": 1.6496988534927368, - "learning_rate": 5.658391959798995e-05, - "loss": 5.0277, - "step": 43715 - }, - { - "epoch": 22.798435462842242, - "grad_norm": 1.6283150911331177, - "learning_rate": 5.658291457286432e-05, - "loss": 5.1489, - "step": 43716 - }, - { - "epoch": 22.798956975228162, - "grad_norm": 1.6909691095352173, - "learning_rate": 5.65819095477387e-05, - "loss": 5.1794, - "step": 43717 - }, - { - "epoch": 22.79947848761408, - "grad_norm": 1.5947465896606445, - "learning_rate": 5.6580904522613064e-05, - "loss": 4.8501, - "step": 43718 - }, - { - "epoch": 22.8, - "grad_norm": 1.5666594505310059, - "learning_rate": 5.657989949748744e-05, - "loss": 5.3232, - "step": 43719 - }, - { - "epoch": 22.80052151238592, - "grad_norm": 1.61076819896698, - "learning_rate": 5.657889447236181e-05, - "loss": 5.2836, - "step": 43720 - }, - { - "epoch": 22.80104302477184, - "grad_norm": 1.5377427339553833, - "learning_rate": 5.657788944723619e-05, - "loss": 5.4385, - "step": 43721 - }, - { - "epoch": 22.80156453715776, - "grad_norm": 1.6580294370651245, - "learning_rate": 5.6576884422110555e-05, - "loss": 5.3174, - "step": 43722 - }, - { - "epoch": 22.802086049543675, - "grad_norm": 1.5692241191864014, - "learning_rate": 5.657587939698493e-05, - "loss": 5.3678, - "step": 43723 - }, - { - "epoch": 22.802607561929594, - "grad_norm": 1.5294373035430908, - "learning_rate": 5.65748743718593e-05, - "loss": 5.4117, - "step": 43724 - }, - { - "epoch": 22.803129074315514, - "grad_norm": 1.6298224925994873, - "learning_rate": 5.657386934673367e-05, - "loss": 4.9828, - "step": 43725 - }, - { - "epoch": 22.803650586701433, - "grad_norm": 1.4418774843215942, - "learning_rate": 5.6572864321608046e-05, - "loss": 5.5381, - "step": 43726 - }, - { - "epoch": 22.804172099087353, - "grad_norm": 1.491703748703003, - "learning_rate": 5.657185929648241e-05, - "loss": 5.377, - "step": 43727 - }, - { - "epoch": 22.804693611473272, - "grad_norm": 1.4799867868423462, - "learning_rate": 5.657085427135679e-05, - "loss": 5.2699, - "step": 43728 - }, - { - "epoch": 22.805215123859192, - "grad_norm": 1.6676127910614014, - "learning_rate": 5.656984924623115e-05, - "loss": 4.9483, - "step": 43729 - }, - { - "epoch": 22.80573663624511, - "grad_norm": 1.5966219902038574, - "learning_rate": 5.656884422110553e-05, - "loss": 4.9256, - "step": 43730 - }, - { - "epoch": 22.80625814863103, - "grad_norm": 1.5287697315216064, - "learning_rate": 5.65678391959799e-05, - "loss": 4.6827, - "step": 43731 - }, - { - "epoch": 22.80677966101695, - "grad_norm": 1.5581183433532715, - "learning_rate": 5.656683417085428e-05, - "loss": 4.9505, - "step": 43732 - }, - { - "epoch": 22.80730117340287, - "grad_norm": 1.5147632360458374, - "learning_rate": 5.656582914572864e-05, - "loss": 5.4739, - "step": 43733 - }, - { - "epoch": 22.80782268578879, - "grad_norm": 1.501495361328125, - "learning_rate": 5.656482412060302e-05, - "loss": 5.2612, - "step": 43734 - }, - { - "epoch": 22.808344198174705, - "grad_norm": 1.5001682043075562, - "learning_rate": 5.6563819095477385e-05, - "loss": 5.5923, - "step": 43735 - }, - { - "epoch": 22.808865710560625, - "grad_norm": 1.4991084337234497, - "learning_rate": 5.656281407035176e-05, - "loss": 5.6115, - "step": 43736 - }, - { - "epoch": 22.809387222946544, - "grad_norm": 1.5743961334228516, - "learning_rate": 5.6561809045226134e-05, - "loss": 5.5638, - "step": 43737 - }, - { - "epoch": 22.809908735332463, - "grad_norm": 1.4118157625198364, - "learning_rate": 5.656080402010051e-05, - "loss": 5.5267, - "step": 43738 - }, - { - "epoch": 22.810430247718383, - "grad_norm": 1.4575284719467163, - "learning_rate": 5.6559798994974876e-05, - "loss": 5.3721, - "step": 43739 - }, - { - "epoch": 22.810951760104302, - "grad_norm": 1.5031465291976929, - "learning_rate": 5.655879396984925e-05, - "loss": 5.5379, - "step": 43740 - }, - { - "epoch": 22.811473272490222, - "grad_norm": 1.505236268043518, - "learning_rate": 5.6557788944723625e-05, - "loss": 5.6081, - "step": 43741 - }, - { - "epoch": 22.81199478487614, - "grad_norm": 1.6393318176269531, - "learning_rate": 5.655678391959799e-05, - "loss": 4.4834, - "step": 43742 - }, - { - "epoch": 22.81251629726206, - "grad_norm": 1.5184906721115112, - "learning_rate": 5.655577889447237e-05, - "loss": 5.1623, - "step": 43743 - }, - { - "epoch": 22.81303780964798, - "grad_norm": 1.4365428686141968, - "learning_rate": 5.655477386934673e-05, - "loss": 4.9359, - "step": 43744 - }, - { - "epoch": 22.8135593220339, - "grad_norm": 1.4715070724487305, - "learning_rate": 5.655376884422111e-05, - "loss": 5.5884, - "step": 43745 - }, - { - "epoch": 22.81408083441982, - "grad_norm": 1.5084565877914429, - "learning_rate": 5.655276381909548e-05, - "loss": 5.5499, - "step": 43746 - }, - { - "epoch": 22.814602346805735, - "grad_norm": 1.6595224142074585, - "learning_rate": 5.655175879396986e-05, - "loss": 4.6722, - "step": 43747 - }, - { - "epoch": 22.815123859191655, - "grad_norm": 1.5206230878829956, - "learning_rate": 5.655075376884422e-05, - "loss": 5.0612, - "step": 43748 - }, - { - "epoch": 22.815645371577574, - "grad_norm": 1.5778412818908691, - "learning_rate": 5.65497487437186e-05, - "loss": 5.3522, - "step": 43749 - }, - { - "epoch": 22.816166883963493, - "grad_norm": 1.5016103982925415, - "learning_rate": 5.6548743718592964e-05, - "loss": 4.654, - "step": 43750 - }, - { - "epoch": 22.816688396349413, - "grad_norm": 1.651724934577942, - "learning_rate": 5.654773869346734e-05, - "loss": 5.2623, - "step": 43751 - }, - { - "epoch": 22.817209908735332, - "grad_norm": 1.5032416582107544, - "learning_rate": 5.654673366834171e-05, - "loss": 4.9326, - "step": 43752 - }, - { - "epoch": 22.817731421121252, - "grad_norm": 1.735308051109314, - "learning_rate": 5.654572864321608e-05, - "loss": 4.4942, - "step": 43753 - }, - { - "epoch": 22.81825293350717, - "grad_norm": 1.5778172016143799, - "learning_rate": 5.6544723618090455e-05, - "loss": 5.4034, - "step": 43754 - }, - { - "epoch": 22.81877444589309, - "grad_norm": 1.4365259408950806, - "learning_rate": 5.654371859296482e-05, - "loss": 5.8424, - "step": 43755 - }, - { - "epoch": 22.81929595827901, - "grad_norm": 1.5548961162567139, - "learning_rate": 5.65427135678392e-05, - "loss": 5.5016, - "step": 43756 - }, - { - "epoch": 22.81981747066493, - "grad_norm": 1.593336582183838, - "learning_rate": 5.654170854271357e-05, - "loss": 5.1074, - "step": 43757 - }, - { - "epoch": 22.820338983050846, - "grad_norm": 1.566320538520813, - "learning_rate": 5.6540703517587946e-05, - "loss": 5.4542, - "step": 43758 - }, - { - "epoch": 22.820860495436765, - "grad_norm": 1.5194801092147827, - "learning_rate": 5.653969849246231e-05, - "loss": 5.8924, - "step": 43759 - }, - { - "epoch": 22.821382007822685, - "grad_norm": 1.6163641214370728, - "learning_rate": 5.653869346733669e-05, - "loss": 5.0887, - "step": 43760 - }, - { - "epoch": 22.821903520208604, - "grad_norm": 1.512014389038086, - "learning_rate": 5.653768844221106e-05, - "loss": 5.5713, - "step": 43761 - }, - { - "epoch": 22.822425032594523, - "grad_norm": 1.5809484720230103, - "learning_rate": 5.653668341708544e-05, - "loss": 5.3977, - "step": 43762 - }, - { - "epoch": 22.822946544980443, - "grad_norm": 1.5775283575057983, - "learning_rate": 5.65356783919598e-05, - "loss": 5.2115, - "step": 43763 - }, - { - "epoch": 22.823468057366362, - "grad_norm": 1.5577479600906372, - "learning_rate": 5.653467336683418e-05, - "loss": 5.2705, - "step": 43764 - }, - { - "epoch": 22.823989569752282, - "grad_norm": 1.497727870941162, - "learning_rate": 5.6533668341708544e-05, - "loss": 5.6414, - "step": 43765 - }, - { - "epoch": 22.8245110821382, - "grad_norm": 1.556006908416748, - "learning_rate": 5.6532663316582915e-05, - "loss": 5.4465, - "step": 43766 - }, - { - "epoch": 22.82503259452412, - "grad_norm": 1.4713867902755737, - "learning_rate": 5.653165829145729e-05, - "loss": 5.4795, - "step": 43767 - }, - { - "epoch": 22.82555410691004, - "grad_norm": 1.454753041267395, - "learning_rate": 5.653065326633166e-05, - "loss": 5.5017, - "step": 43768 - }, - { - "epoch": 22.82607561929596, - "grad_norm": 1.5429636240005493, - "learning_rate": 5.6529648241206035e-05, - "loss": 5.4768, - "step": 43769 - }, - { - "epoch": 22.82659713168188, - "grad_norm": 1.5849156379699707, - "learning_rate": 5.65286432160804e-05, - "loss": 5.3157, - "step": 43770 - }, - { - "epoch": 22.827118644067795, - "grad_norm": 1.5864230394363403, - "learning_rate": 5.652763819095478e-05, - "loss": 5.4186, - "step": 43771 - }, - { - "epoch": 22.827640156453715, - "grad_norm": 1.614202857017517, - "learning_rate": 5.652663316582915e-05, - "loss": 5.3282, - "step": 43772 - }, - { - "epoch": 22.828161668839634, - "grad_norm": 1.6227835416793823, - "learning_rate": 5.6525628140703526e-05, - "loss": 5.0294, - "step": 43773 - }, - { - "epoch": 22.828683181225554, - "grad_norm": 1.5998501777648926, - "learning_rate": 5.652462311557789e-05, - "loss": 5.3034, - "step": 43774 - }, - { - "epoch": 22.829204693611473, - "grad_norm": 1.5297520160675049, - "learning_rate": 5.652361809045227e-05, - "loss": 5.4418, - "step": 43775 - }, - { - "epoch": 22.829726205997392, - "grad_norm": 1.5936472415924072, - "learning_rate": 5.652261306532663e-05, - "loss": 4.991, - "step": 43776 - }, - { - "epoch": 22.830247718383312, - "grad_norm": 1.5370672941207886, - "learning_rate": 5.652160804020101e-05, - "loss": 4.7693, - "step": 43777 - }, - { - "epoch": 22.83076923076923, - "grad_norm": 1.5695016384124756, - "learning_rate": 5.652060301507538e-05, - "loss": 4.9633, - "step": 43778 - }, - { - "epoch": 22.83129074315515, - "grad_norm": 1.5994375944137573, - "learning_rate": 5.6519597989949745e-05, - "loss": 4.5993, - "step": 43779 - }, - { - "epoch": 22.83181225554107, - "grad_norm": 1.573333501815796, - "learning_rate": 5.651859296482412e-05, - "loss": 5.1097, - "step": 43780 - }, - { - "epoch": 22.83233376792699, - "grad_norm": 1.539019227027893, - "learning_rate": 5.6517587939698494e-05, - "loss": 4.6058, - "step": 43781 - }, - { - "epoch": 22.832855280312906, - "grad_norm": 1.5624301433563232, - "learning_rate": 5.651658291457287e-05, - "loss": 5.1848, - "step": 43782 - }, - { - "epoch": 22.833376792698825, - "grad_norm": 1.649697184562683, - "learning_rate": 5.6515577889447236e-05, - "loss": 5.0209, - "step": 43783 - }, - { - "epoch": 22.833898305084745, - "grad_norm": 1.5021121501922607, - "learning_rate": 5.6514572864321614e-05, - "loss": 5.3816, - "step": 43784 - }, - { - "epoch": 22.834419817470664, - "grad_norm": 1.494035005569458, - "learning_rate": 5.651356783919598e-05, - "loss": 5.485, - "step": 43785 - }, - { - "epoch": 22.834941329856584, - "grad_norm": 1.4962831735610962, - "learning_rate": 5.6512562814070356e-05, - "loss": 5.7114, - "step": 43786 - }, - { - "epoch": 22.835462842242503, - "grad_norm": 1.634596347808838, - "learning_rate": 5.651155778894473e-05, - "loss": 4.941, - "step": 43787 - }, - { - "epoch": 22.835984354628422, - "grad_norm": 1.51726496219635, - "learning_rate": 5.6510552763819105e-05, - "loss": 5.5643, - "step": 43788 - }, - { - "epoch": 22.836505867014342, - "grad_norm": 1.649924397468567, - "learning_rate": 5.650954773869347e-05, - "loss": 4.7783, - "step": 43789 - }, - { - "epoch": 22.83702737940026, - "grad_norm": 1.5116080045700073, - "learning_rate": 5.650854271356785e-05, - "loss": 5.5925, - "step": 43790 - }, - { - "epoch": 22.83754889178618, - "grad_norm": 1.5655758380889893, - "learning_rate": 5.650753768844221e-05, - "loss": 5.1, - "step": 43791 - }, - { - "epoch": 22.8380704041721, - "grad_norm": 1.6500061750411987, - "learning_rate": 5.650653266331658e-05, - "loss": 5.2737, - "step": 43792 - }, - { - "epoch": 22.83859191655802, - "grad_norm": 1.5976057052612305, - "learning_rate": 5.650552763819096e-05, - "loss": 4.888, - "step": 43793 - }, - { - "epoch": 22.839113428943936, - "grad_norm": 1.5810602903366089, - "learning_rate": 5.6504522613065325e-05, - "loss": 5.3982, - "step": 43794 - }, - { - "epoch": 22.839634941329855, - "grad_norm": 1.641796350479126, - "learning_rate": 5.65035175879397e-05, - "loss": 4.8946, - "step": 43795 - }, - { - "epoch": 22.840156453715775, - "grad_norm": 1.6535438299179077, - "learning_rate": 5.650251256281407e-05, - "loss": 5.1788, - "step": 43796 - }, - { - "epoch": 22.840677966101694, - "grad_norm": 1.4792399406433105, - "learning_rate": 5.6501507537688445e-05, - "loss": 5.6275, - "step": 43797 - }, - { - "epoch": 22.841199478487614, - "grad_norm": 1.5986177921295166, - "learning_rate": 5.6500502512562816e-05, - "loss": 5.2438, - "step": 43798 - }, - { - "epoch": 22.841720990873533, - "grad_norm": 1.6343178749084473, - "learning_rate": 5.6499497487437193e-05, - "loss": 5.2472, - "step": 43799 - }, - { - "epoch": 22.842242503259452, - "grad_norm": 1.5091255903244019, - "learning_rate": 5.649849246231156e-05, - "loss": 5.3155, - "step": 43800 - }, - { - "epoch": 22.842764015645372, - "grad_norm": 1.4162400960922241, - "learning_rate": 5.6497487437185936e-05, - "loss": 4.9852, - "step": 43801 - }, - { - "epoch": 22.84328552803129, - "grad_norm": 1.543320655822754, - "learning_rate": 5.64964824120603e-05, - "loss": 5.1768, - "step": 43802 - }, - { - "epoch": 22.84380704041721, - "grad_norm": 1.5972241163253784, - "learning_rate": 5.649547738693468e-05, - "loss": 4.4758, - "step": 43803 - }, - { - "epoch": 22.84432855280313, - "grad_norm": 1.4737979173660278, - "learning_rate": 5.649447236180905e-05, - "loss": 5.127, - "step": 43804 - }, - { - "epoch": 22.84485006518905, - "grad_norm": 1.430006742477417, - "learning_rate": 5.649346733668341e-05, - "loss": 5.5857, - "step": 43805 - }, - { - "epoch": 22.845371577574966, - "grad_norm": 1.587385654449463, - "learning_rate": 5.649246231155779e-05, - "loss": 5.192, - "step": 43806 - }, - { - "epoch": 22.845893089960885, - "grad_norm": 1.4399969577789307, - "learning_rate": 5.649145728643216e-05, - "loss": 5.5704, - "step": 43807 - }, - { - "epoch": 22.846414602346805, - "grad_norm": 1.637709140777588, - "learning_rate": 5.649045226130654e-05, - "loss": 5.2747, - "step": 43808 - }, - { - "epoch": 22.846936114732724, - "grad_norm": 1.5029748678207397, - "learning_rate": 5.6489447236180904e-05, - "loss": 5.0861, - "step": 43809 - }, - { - "epoch": 22.847457627118644, - "grad_norm": 1.5541677474975586, - "learning_rate": 5.648844221105528e-05, - "loss": 5.6417, - "step": 43810 - }, - { - "epoch": 22.847979139504563, - "grad_norm": 1.4661240577697754, - "learning_rate": 5.6487437185929646e-05, - "loss": 5.4102, - "step": 43811 - }, - { - "epoch": 22.848500651890483, - "grad_norm": 1.5125446319580078, - "learning_rate": 5.6486432160804024e-05, - "loss": 5.3428, - "step": 43812 - }, - { - "epoch": 22.849022164276402, - "grad_norm": 1.5155744552612305, - "learning_rate": 5.6485427135678395e-05, - "loss": 5.3729, - "step": 43813 - }, - { - "epoch": 22.84954367666232, - "grad_norm": 1.488637924194336, - "learning_rate": 5.648442211055277e-05, - "loss": 4.7946, - "step": 43814 - }, - { - "epoch": 22.85006518904824, - "grad_norm": 1.4626681804656982, - "learning_rate": 5.648341708542714e-05, - "loss": 5.5681, - "step": 43815 - }, - { - "epoch": 22.85058670143416, - "grad_norm": 1.6661564111709595, - "learning_rate": 5.6482412060301515e-05, - "loss": 5.1619, - "step": 43816 - }, - { - "epoch": 22.85110821382008, - "grad_norm": 1.4850658178329468, - "learning_rate": 5.648140703517588e-05, - "loss": 4.9396, - "step": 43817 - }, - { - "epoch": 22.851629726205996, - "grad_norm": 1.4683902263641357, - "learning_rate": 5.648040201005025e-05, - "loss": 5.4609, - "step": 43818 - }, - { - "epoch": 22.852151238591915, - "grad_norm": 1.5296398401260376, - "learning_rate": 5.647939698492463e-05, - "loss": 5.1031, - "step": 43819 - }, - { - "epoch": 22.852672750977835, - "grad_norm": 1.5345362424850464, - "learning_rate": 5.647839195979899e-05, - "loss": 5.4742, - "step": 43820 - }, - { - "epoch": 22.853194263363754, - "grad_norm": 1.5610917806625366, - "learning_rate": 5.647738693467337e-05, - "loss": 4.8429, - "step": 43821 - }, - { - "epoch": 22.853715775749674, - "grad_norm": 1.4760565757751465, - "learning_rate": 5.6476381909547734e-05, - "loss": 5.7243, - "step": 43822 - }, - { - "epoch": 22.854237288135593, - "grad_norm": 1.6025846004486084, - "learning_rate": 5.647537688442211e-05, - "loss": 5.3551, - "step": 43823 - }, - { - "epoch": 22.854758800521513, - "grad_norm": 1.5385838747024536, - "learning_rate": 5.647437185929648e-05, - "loss": 5.5729, - "step": 43824 - }, - { - "epoch": 22.855280312907432, - "grad_norm": 1.5963371992111206, - "learning_rate": 5.647336683417086e-05, - "loss": 5.3661, - "step": 43825 - }, - { - "epoch": 22.85580182529335, - "grad_norm": 1.447227120399475, - "learning_rate": 5.6472361809045225e-05, - "loss": 5.2161, - "step": 43826 - }, - { - "epoch": 22.85632333767927, - "grad_norm": 1.5842950344085693, - "learning_rate": 5.64713567839196e-05, - "loss": 5.4134, - "step": 43827 - }, - { - "epoch": 22.85684485006519, - "grad_norm": 1.5333112478256226, - "learning_rate": 5.6470351758793974e-05, - "loss": 4.8267, - "step": 43828 - }, - { - "epoch": 22.85736636245111, - "grad_norm": 1.6053402423858643, - "learning_rate": 5.646934673366835e-05, - "loss": 5.1661, - "step": 43829 - }, - { - "epoch": 22.857887874837026, - "grad_norm": 1.5621650218963623, - "learning_rate": 5.6468341708542716e-05, - "loss": 5.1015, - "step": 43830 - }, - { - "epoch": 22.858409387222945, - "grad_norm": 1.551958441734314, - "learning_rate": 5.6467336683417094e-05, - "loss": 5.4141, - "step": 43831 - }, - { - "epoch": 22.858930899608865, - "grad_norm": 1.679864525794983, - "learning_rate": 5.646633165829146e-05, - "loss": 4.7649, - "step": 43832 - }, - { - "epoch": 22.859452411994784, - "grad_norm": 1.5776410102844238, - "learning_rate": 5.646532663316583e-05, - "loss": 5.2124, - "step": 43833 - }, - { - "epoch": 22.859973924380704, - "grad_norm": 1.4428001642227173, - "learning_rate": 5.646432160804021e-05, - "loss": 5.602, - "step": 43834 - }, - { - "epoch": 22.860495436766623, - "grad_norm": 1.5516902208328247, - "learning_rate": 5.646331658291457e-05, - "loss": 5.3725, - "step": 43835 - }, - { - "epoch": 22.861016949152543, - "grad_norm": 1.505524754524231, - "learning_rate": 5.646231155778895e-05, - "loss": 5.2809, - "step": 43836 - }, - { - "epoch": 22.861538461538462, - "grad_norm": 1.484533429145813, - "learning_rate": 5.6461306532663314e-05, - "loss": 5.2707, - "step": 43837 - }, - { - "epoch": 22.86205997392438, - "grad_norm": 1.579887866973877, - "learning_rate": 5.646030150753769e-05, - "loss": 5.341, - "step": 43838 - }, - { - "epoch": 22.8625814863103, - "grad_norm": 1.4718706607818604, - "learning_rate": 5.645929648241206e-05, - "loss": 5.0232, - "step": 43839 - }, - { - "epoch": 22.86310299869622, - "grad_norm": 1.5966250896453857, - "learning_rate": 5.645829145728644e-05, - "loss": 5.0821, - "step": 43840 - }, - { - "epoch": 22.86362451108214, - "grad_norm": 1.5993702411651611, - "learning_rate": 5.6457286432160805e-05, - "loss": 5.2378, - "step": 43841 - }, - { - "epoch": 22.864146023468056, - "grad_norm": 1.5641024112701416, - "learning_rate": 5.645628140703518e-05, - "loss": 5.5057, - "step": 43842 - }, - { - "epoch": 22.864667535853975, - "grad_norm": 1.4936962127685547, - "learning_rate": 5.645527638190955e-05, - "loss": 5.0434, - "step": 43843 - }, - { - "epoch": 22.865189048239895, - "grad_norm": 1.6132298707962036, - "learning_rate": 5.6454271356783925e-05, - "loss": 4.9867, - "step": 43844 - }, - { - "epoch": 22.865710560625814, - "grad_norm": 1.5181260108947754, - "learning_rate": 5.6453266331658296e-05, - "loss": 4.7652, - "step": 43845 - }, - { - "epoch": 22.866232073011734, - "grad_norm": 1.5254075527191162, - "learning_rate": 5.645226130653266e-05, - "loss": 5.4906, - "step": 43846 - }, - { - "epoch": 22.866753585397653, - "grad_norm": 1.46632981300354, - "learning_rate": 5.645125628140704e-05, - "loss": 5.3874, - "step": 43847 - }, - { - "epoch": 22.867275097783573, - "grad_norm": 1.5208100080490112, - "learning_rate": 5.645025125628141e-05, - "loss": 5.1825, - "step": 43848 - }, - { - "epoch": 22.867796610169492, - "grad_norm": 1.5796253681182861, - "learning_rate": 5.644924623115579e-05, - "loss": 5.1691, - "step": 43849 - }, - { - "epoch": 22.86831812255541, - "grad_norm": 1.5706462860107422, - "learning_rate": 5.644824120603015e-05, - "loss": 5.3786, - "step": 43850 - }, - { - "epoch": 22.86883963494133, - "grad_norm": 1.5800679922103882, - "learning_rate": 5.644723618090453e-05, - "loss": 4.4353, - "step": 43851 - }, - { - "epoch": 22.86936114732725, - "grad_norm": 1.5463582277297974, - "learning_rate": 5.644623115577889e-05, - "loss": 5.4958, - "step": 43852 - }, - { - "epoch": 22.86988265971317, - "grad_norm": 1.491467833518982, - "learning_rate": 5.644522613065327e-05, - "loss": 5.4322, - "step": 43853 - }, - { - "epoch": 22.870404172099086, - "grad_norm": 1.4935115575790405, - "learning_rate": 5.644422110552764e-05, - "loss": 5.5245, - "step": 43854 - }, - { - "epoch": 22.870925684485005, - "grad_norm": 1.5543060302734375, - "learning_rate": 5.644321608040202e-05, - "loss": 5.0588, - "step": 43855 - }, - { - "epoch": 22.871447196870925, - "grad_norm": 1.5403547286987305, - "learning_rate": 5.6442211055276384e-05, - "loss": 4.8794, - "step": 43856 - }, - { - "epoch": 22.871968709256844, - "grad_norm": 1.495591640472412, - "learning_rate": 5.644120603015076e-05, - "loss": 4.8834, - "step": 43857 - }, - { - "epoch": 22.872490221642764, - "grad_norm": 1.5178899765014648, - "learning_rate": 5.6440201005025126e-05, - "loss": 5.5146, - "step": 43858 - }, - { - "epoch": 22.873011734028683, - "grad_norm": 1.4939416646957397, - "learning_rate": 5.64391959798995e-05, - "loss": 5.2657, - "step": 43859 - }, - { - "epoch": 22.873533246414603, - "grad_norm": 1.6456714868545532, - "learning_rate": 5.6438190954773875e-05, - "loss": 4.96, - "step": 43860 - }, - { - "epoch": 22.874054758800522, - "grad_norm": 1.5073128938674927, - "learning_rate": 5.643718592964824e-05, - "loss": 5.442, - "step": 43861 - }, - { - "epoch": 22.87457627118644, - "grad_norm": 1.436726450920105, - "learning_rate": 5.643618090452262e-05, - "loss": 5.3891, - "step": 43862 - }, - { - "epoch": 22.87509778357236, - "grad_norm": 1.4376827478408813, - "learning_rate": 5.643517587939698e-05, - "loss": 5.5528, - "step": 43863 - }, - { - "epoch": 22.87561929595828, - "grad_norm": 1.5143874883651733, - "learning_rate": 5.643417085427136e-05, - "loss": 5.5992, - "step": 43864 - }, - { - "epoch": 22.876140808344196, - "grad_norm": 1.540245532989502, - "learning_rate": 5.643316582914573e-05, - "loss": 5.3623, - "step": 43865 - }, - { - "epoch": 22.876662320730116, - "grad_norm": 1.5702929496765137, - "learning_rate": 5.643216080402011e-05, - "loss": 5.0962, - "step": 43866 - }, - { - "epoch": 22.877183833116035, - "grad_norm": 1.481192946434021, - "learning_rate": 5.643115577889447e-05, - "loss": 5.0792, - "step": 43867 - }, - { - "epoch": 22.877705345501955, - "grad_norm": 1.585952639579773, - "learning_rate": 5.643015075376885e-05, - "loss": 5.0503, - "step": 43868 - }, - { - "epoch": 22.878226857887874, - "grad_norm": 1.4856120347976685, - "learning_rate": 5.642914572864322e-05, - "loss": 5.5093, - "step": 43869 - }, - { - "epoch": 22.878748370273794, - "grad_norm": 1.5142191648483276, - "learning_rate": 5.64281407035176e-05, - "loss": 4.9326, - "step": 43870 - }, - { - "epoch": 22.879269882659713, - "grad_norm": 1.5204355716705322, - "learning_rate": 5.642713567839196e-05, - "loss": 5.3328, - "step": 43871 - }, - { - "epoch": 22.879791395045633, - "grad_norm": 1.5076828002929688, - "learning_rate": 5.642613065326633e-05, - "loss": 5.5636, - "step": 43872 - }, - { - "epoch": 22.880312907431552, - "grad_norm": 1.4129658937454224, - "learning_rate": 5.6425125628140705e-05, - "loss": 5.5727, - "step": 43873 - }, - { - "epoch": 22.88083441981747, - "grad_norm": 1.5949684381484985, - "learning_rate": 5.6424120603015077e-05, - "loss": 5.3183, - "step": 43874 - }, - { - "epoch": 22.88135593220339, - "grad_norm": 1.3961563110351562, - "learning_rate": 5.6423115577889454e-05, - "loss": 5.6692, - "step": 43875 - }, - { - "epoch": 22.88187744458931, - "grad_norm": 1.5556163787841797, - "learning_rate": 5.642211055276382e-05, - "loss": 4.9951, - "step": 43876 - }, - { - "epoch": 22.88239895697523, - "grad_norm": 1.5672740936279297, - "learning_rate": 5.6421105527638196e-05, - "loss": 4.9473, - "step": 43877 - }, - { - "epoch": 22.882920469361146, - "grad_norm": 1.604331135749817, - "learning_rate": 5.642010050251256e-05, - "loss": 5.255, - "step": 43878 - }, - { - "epoch": 22.883441981747065, - "grad_norm": 1.6034636497497559, - "learning_rate": 5.641909547738694e-05, - "loss": 4.9468, - "step": 43879 - }, - { - "epoch": 22.883963494132985, - "grad_norm": 1.5386732816696167, - "learning_rate": 5.641809045226131e-05, - "loss": 5.6785, - "step": 43880 - }, - { - "epoch": 22.884485006518904, - "grad_norm": 1.488684892654419, - "learning_rate": 5.641708542713569e-05, - "loss": 5.2526, - "step": 43881 - }, - { - "epoch": 22.885006518904824, - "grad_norm": 1.5124902725219727, - "learning_rate": 5.641608040201005e-05, - "loss": 5.2503, - "step": 43882 - }, - { - "epoch": 22.885528031290743, - "grad_norm": 1.6074758768081665, - "learning_rate": 5.641507537688443e-05, - "loss": 5.2478, - "step": 43883 - }, - { - "epoch": 22.886049543676663, - "grad_norm": 1.5183216333389282, - "learning_rate": 5.6414070351758794e-05, - "loss": 5.7435, - "step": 43884 - }, - { - "epoch": 22.886571056062582, - "grad_norm": 1.5744880437850952, - "learning_rate": 5.6413065326633165e-05, - "loss": 5.2411, - "step": 43885 - }, - { - "epoch": 22.8870925684485, - "grad_norm": 1.7396072149276733, - "learning_rate": 5.641206030150754e-05, - "loss": 5.1458, - "step": 43886 - }, - { - "epoch": 22.88761408083442, - "grad_norm": 1.6306849718093872, - "learning_rate": 5.641105527638191e-05, - "loss": 5.313, - "step": 43887 - }, - { - "epoch": 22.88813559322034, - "grad_norm": 1.620241403579712, - "learning_rate": 5.6410050251256285e-05, - "loss": 5.2111, - "step": 43888 - }, - { - "epoch": 22.888657105606256, - "grad_norm": 1.5316517353057861, - "learning_rate": 5.640904522613065e-05, - "loss": 5.7275, - "step": 43889 - }, - { - "epoch": 22.889178617992176, - "grad_norm": 1.5592721700668335, - "learning_rate": 5.640804020100503e-05, - "loss": 5.3795, - "step": 43890 - }, - { - "epoch": 22.889700130378095, - "grad_norm": 1.5572409629821777, - "learning_rate": 5.64070351758794e-05, - "loss": 5.2895, - "step": 43891 - }, - { - "epoch": 22.890221642764015, - "grad_norm": 1.583119511604309, - "learning_rate": 5.6406030150753776e-05, - "loss": 5.2647, - "step": 43892 - }, - { - "epoch": 22.890743155149934, - "grad_norm": 1.5672944784164429, - "learning_rate": 5.640502512562814e-05, - "loss": 5.0594, - "step": 43893 - }, - { - "epoch": 22.891264667535854, - "grad_norm": 1.6076546907424927, - "learning_rate": 5.640402010050252e-05, - "loss": 5.2662, - "step": 43894 - }, - { - "epoch": 22.891786179921773, - "grad_norm": 1.592577576637268, - "learning_rate": 5.640301507537689e-05, - "loss": 5.4113, - "step": 43895 - }, - { - "epoch": 22.892307692307693, - "grad_norm": 1.5166155099868774, - "learning_rate": 5.640201005025127e-05, - "loss": 5.7303, - "step": 43896 - }, - { - "epoch": 22.892829204693612, - "grad_norm": 1.6370842456817627, - "learning_rate": 5.640100502512563e-05, - "loss": 5.0385, - "step": 43897 - }, - { - "epoch": 22.89335071707953, - "grad_norm": 1.5847370624542236, - "learning_rate": 5.6399999999999995e-05, - "loss": 5.3656, - "step": 43898 - }, - { - "epoch": 22.89387222946545, - "grad_norm": 1.5062470436096191, - "learning_rate": 5.639899497487437e-05, - "loss": 5.2647, - "step": 43899 - }, - { - "epoch": 22.89439374185137, - "grad_norm": 1.5968453884124756, - "learning_rate": 5.6397989949748744e-05, - "loss": 5.1529, - "step": 43900 - }, - { - "epoch": 22.894915254237286, - "grad_norm": 1.5363004207611084, - "learning_rate": 5.639698492462312e-05, - "loss": 5.5286, - "step": 43901 - }, - { - "epoch": 22.895436766623206, - "grad_norm": 1.585430383682251, - "learning_rate": 5.6395979899497486e-05, - "loss": 5.0544, - "step": 43902 - }, - { - "epoch": 22.895958279009125, - "grad_norm": 1.5483131408691406, - "learning_rate": 5.6394974874371864e-05, - "loss": 5.1722, - "step": 43903 - }, - { - "epoch": 22.896479791395045, - "grad_norm": 1.5975139141082764, - "learning_rate": 5.639396984924623e-05, - "loss": 4.9969, - "step": 43904 - }, - { - "epoch": 22.897001303780964, - "grad_norm": 1.6602686643600464, - "learning_rate": 5.6392964824120606e-05, - "loss": 4.9658, - "step": 43905 - }, - { - "epoch": 22.897522816166884, - "grad_norm": 1.5871829986572266, - "learning_rate": 5.639195979899498e-05, - "loss": 4.8261, - "step": 43906 - }, - { - "epoch": 22.898044328552803, - "grad_norm": 1.5655676126480103, - "learning_rate": 5.6390954773869355e-05, - "loss": 4.9143, - "step": 43907 - }, - { - "epoch": 22.898565840938723, - "grad_norm": 1.5854569673538208, - "learning_rate": 5.638994974874372e-05, - "loss": 5.7372, - "step": 43908 - }, - { - "epoch": 22.899087353324642, - "grad_norm": 1.5272400379180908, - "learning_rate": 5.63889447236181e-05, - "loss": 5.1663, - "step": 43909 - }, - { - "epoch": 22.89960886571056, - "grad_norm": 1.4531322717666626, - "learning_rate": 5.638793969849246e-05, - "loss": 5.0429, - "step": 43910 - }, - { - "epoch": 22.90013037809648, - "grad_norm": 1.4522771835327148, - "learning_rate": 5.638693467336683e-05, - "loss": 5.7825, - "step": 43911 - }, - { - "epoch": 22.9006518904824, - "grad_norm": 1.5248713493347168, - "learning_rate": 5.638592964824121e-05, - "loss": 5.5188, - "step": 43912 - }, - { - "epoch": 22.901173402868316, - "grad_norm": 1.6154943704605103, - "learning_rate": 5.6384924623115575e-05, - "loss": 5.5475, - "step": 43913 - }, - { - "epoch": 22.901694915254236, - "grad_norm": 1.6890860795974731, - "learning_rate": 5.638391959798995e-05, - "loss": 4.4913, - "step": 43914 - }, - { - "epoch": 22.902216427640155, - "grad_norm": 1.8384783267974854, - "learning_rate": 5.6382914572864324e-05, - "loss": 4.8023, - "step": 43915 - }, - { - "epoch": 22.902737940026075, - "grad_norm": 1.6328279972076416, - "learning_rate": 5.63819095477387e-05, - "loss": 4.7488, - "step": 43916 - }, - { - "epoch": 22.903259452411994, - "grad_norm": 1.5609350204467773, - "learning_rate": 5.6380904522613066e-05, - "loss": 5.459, - "step": 43917 - }, - { - "epoch": 22.903780964797914, - "grad_norm": 1.6764976978302002, - "learning_rate": 5.6379899497487443e-05, - "loss": 5.4223, - "step": 43918 - }, - { - "epoch": 22.904302477183833, - "grad_norm": 1.5950367450714111, - "learning_rate": 5.637889447236181e-05, - "loss": 5.0014, - "step": 43919 - }, - { - "epoch": 22.904823989569753, - "grad_norm": 1.7364130020141602, - "learning_rate": 5.6377889447236186e-05, - "loss": 4.8181, - "step": 43920 - }, - { - "epoch": 22.905345501955672, - "grad_norm": 1.6436392068862915, - "learning_rate": 5.637688442211056e-05, - "loss": 4.9525, - "step": 43921 - }, - { - "epoch": 22.90586701434159, - "grad_norm": 1.6760200262069702, - "learning_rate": 5.6375879396984934e-05, - "loss": 5.0236, - "step": 43922 - }, - { - "epoch": 22.90638852672751, - "grad_norm": 1.6810145378112793, - "learning_rate": 5.63748743718593e-05, - "loss": 5.2108, - "step": 43923 - }, - { - "epoch": 22.90691003911343, - "grad_norm": 1.5254544019699097, - "learning_rate": 5.6373869346733677e-05, - "loss": 5.1979, - "step": 43924 - }, - { - "epoch": 22.907431551499347, - "grad_norm": 1.5837843418121338, - "learning_rate": 5.637286432160804e-05, - "loss": 5.0538, - "step": 43925 - }, - { - "epoch": 22.907953063885266, - "grad_norm": 1.6617910861968994, - "learning_rate": 5.637185929648241e-05, - "loss": 5.2948, - "step": 43926 - }, - { - "epoch": 22.908474576271185, - "grad_norm": 1.5291924476623535, - "learning_rate": 5.637085427135679e-05, - "loss": 5.3043, - "step": 43927 - }, - { - "epoch": 22.908996088657105, - "grad_norm": 1.5804210901260376, - "learning_rate": 5.6369849246231154e-05, - "loss": 5.1193, - "step": 43928 - }, - { - "epoch": 22.909517601043024, - "grad_norm": 1.6330556869506836, - "learning_rate": 5.636884422110553e-05, - "loss": 4.9202, - "step": 43929 - }, - { - "epoch": 22.910039113428944, - "grad_norm": 1.4870970249176025, - "learning_rate": 5.6367839195979896e-05, - "loss": 5.3474, - "step": 43930 - }, - { - "epoch": 22.910560625814863, - "grad_norm": 1.579213261604309, - "learning_rate": 5.6366834170854274e-05, - "loss": 5.1351, - "step": 43931 - }, - { - "epoch": 22.911082138200783, - "grad_norm": 1.581527829170227, - "learning_rate": 5.6365829145728645e-05, - "loss": 4.9337, - "step": 43932 - }, - { - "epoch": 22.911603650586702, - "grad_norm": 1.6781792640686035, - "learning_rate": 5.636482412060302e-05, - "loss": 5.3971, - "step": 43933 - }, - { - "epoch": 22.91212516297262, - "grad_norm": 1.5742840766906738, - "learning_rate": 5.636381909547739e-05, - "loss": 4.7718, - "step": 43934 - }, - { - "epoch": 22.91264667535854, - "grad_norm": 1.512369155883789, - "learning_rate": 5.6362814070351765e-05, - "loss": 4.5918, - "step": 43935 - }, - { - "epoch": 22.91316818774446, - "grad_norm": 1.4804518222808838, - "learning_rate": 5.6361809045226136e-05, - "loss": 4.9479, - "step": 43936 - }, - { - "epoch": 22.913689700130377, - "grad_norm": 1.6695044040679932, - "learning_rate": 5.6360804020100514e-05, - "loss": 4.3998, - "step": 43937 - }, - { - "epoch": 22.914211212516296, - "grad_norm": 1.6352910995483398, - "learning_rate": 5.635979899497488e-05, - "loss": 4.7448, - "step": 43938 - }, - { - "epoch": 22.914732724902215, - "grad_norm": 1.617153286933899, - "learning_rate": 5.635879396984924e-05, - "loss": 5.0969, - "step": 43939 - }, - { - "epoch": 22.915254237288135, - "grad_norm": 1.6170361042022705, - "learning_rate": 5.635778894472362e-05, - "loss": 4.6339, - "step": 43940 - }, - { - "epoch": 22.915775749674054, - "grad_norm": 1.722010850906372, - "learning_rate": 5.635678391959799e-05, - "loss": 4.8016, - "step": 43941 - }, - { - "epoch": 22.916297262059974, - "grad_norm": 1.5365633964538574, - "learning_rate": 5.635577889447237e-05, - "loss": 5.1739, - "step": 43942 - }, - { - "epoch": 22.916818774445893, - "grad_norm": 1.5213146209716797, - "learning_rate": 5.635477386934673e-05, - "loss": 5.5579, - "step": 43943 - }, - { - "epoch": 22.917340286831813, - "grad_norm": 1.475122094154358, - "learning_rate": 5.635376884422111e-05, - "loss": 5.6066, - "step": 43944 - }, - { - "epoch": 22.917861799217732, - "grad_norm": 1.663979411125183, - "learning_rate": 5.6352763819095475e-05, - "loss": 4.6066, - "step": 43945 - }, - { - "epoch": 22.91838331160365, - "grad_norm": 1.420501708984375, - "learning_rate": 5.635175879396985e-05, - "loss": 5.5343, - "step": 43946 - }, - { - "epoch": 22.91890482398957, - "grad_norm": 1.6265175342559814, - "learning_rate": 5.6350753768844224e-05, - "loss": 5.3059, - "step": 43947 - }, - { - "epoch": 22.919426336375487, - "grad_norm": 1.5129711627960205, - "learning_rate": 5.63497487437186e-05, - "loss": 4.9467, - "step": 43948 - }, - { - "epoch": 22.919947848761407, - "grad_norm": 1.6144676208496094, - "learning_rate": 5.6348743718592966e-05, - "loss": 4.7979, - "step": 43949 - }, - { - "epoch": 22.920469361147326, - "grad_norm": 1.5220717191696167, - "learning_rate": 5.6347738693467344e-05, - "loss": 5.3558, - "step": 43950 - }, - { - "epoch": 22.920990873533245, - "grad_norm": 1.6656690835952759, - "learning_rate": 5.634673366834171e-05, - "loss": 5.1728, - "step": 43951 - }, - { - "epoch": 22.921512385919165, - "grad_norm": 1.6289803981781006, - "learning_rate": 5.634572864321608e-05, - "loss": 5.3415, - "step": 43952 - }, - { - "epoch": 22.922033898305084, - "grad_norm": 1.6315346956253052, - "learning_rate": 5.634472361809046e-05, - "loss": 4.6387, - "step": 43953 - }, - { - "epoch": 22.922555410691004, - "grad_norm": 1.6612775325775146, - "learning_rate": 5.634371859296482e-05, - "loss": 5.735, - "step": 43954 - }, - { - "epoch": 22.923076923076923, - "grad_norm": 1.5979325771331787, - "learning_rate": 5.63427135678392e-05, - "loss": 5.1228, - "step": 43955 - }, - { - "epoch": 22.923598435462843, - "grad_norm": 1.5397437810897827, - "learning_rate": 5.6341708542713564e-05, - "loss": 5.4637, - "step": 43956 - }, - { - "epoch": 22.924119947848762, - "grad_norm": 1.5566164255142212, - "learning_rate": 5.634070351758794e-05, - "loss": 5.0734, - "step": 43957 - }, - { - "epoch": 22.92464146023468, - "grad_norm": 1.628625512123108, - "learning_rate": 5.633969849246231e-05, - "loss": 4.4375, - "step": 43958 - }, - { - "epoch": 22.9251629726206, - "grad_norm": 1.5245404243469238, - "learning_rate": 5.633869346733669e-05, - "loss": 5.4129, - "step": 43959 - }, - { - "epoch": 22.92568448500652, - "grad_norm": 1.636980652809143, - "learning_rate": 5.6337688442211055e-05, - "loss": 5.0094, - "step": 43960 - }, - { - "epoch": 22.926205997392437, - "grad_norm": 1.50666081905365, - "learning_rate": 5.633668341708543e-05, - "loss": 5.3941, - "step": 43961 - }, - { - "epoch": 22.926727509778356, - "grad_norm": 1.6201801300048828, - "learning_rate": 5.6335678391959804e-05, - "loss": 4.8278, - "step": 43962 - }, - { - "epoch": 22.927249022164276, - "grad_norm": 1.4927394390106201, - "learning_rate": 5.633467336683418e-05, - "loss": 5.0763, - "step": 43963 - }, - { - "epoch": 22.927770534550195, - "grad_norm": 1.4961588382720947, - "learning_rate": 5.6333668341708546e-05, - "loss": 5.1818, - "step": 43964 - }, - { - "epoch": 22.928292046936114, - "grad_norm": 1.429048776626587, - "learning_rate": 5.633266331658291e-05, - "loss": 5.0859, - "step": 43965 - }, - { - "epoch": 22.928813559322034, - "grad_norm": 1.5219444036483765, - "learning_rate": 5.633165829145729e-05, - "loss": 5.1233, - "step": 43966 - }, - { - "epoch": 22.929335071707953, - "grad_norm": 1.5636223554611206, - "learning_rate": 5.633065326633166e-05, - "loss": 5.2895, - "step": 43967 - }, - { - "epoch": 22.929856584093873, - "grad_norm": 1.5056648254394531, - "learning_rate": 5.632964824120604e-05, - "loss": 5.181, - "step": 43968 - }, - { - "epoch": 22.930378096479792, - "grad_norm": 1.5227999687194824, - "learning_rate": 5.63286432160804e-05, - "loss": 5.3637, - "step": 43969 - }, - { - "epoch": 22.93089960886571, - "grad_norm": 1.4446873664855957, - "learning_rate": 5.632763819095478e-05, - "loss": 5.3276, - "step": 43970 - }, - { - "epoch": 22.93142112125163, - "grad_norm": 1.3778117895126343, - "learning_rate": 5.632663316582914e-05, - "loss": 5.653, - "step": 43971 - }, - { - "epoch": 22.931942633637547, - "grad_norm": 1.5520516633987427, - "learning_rate": 5.632562814070352e-05, - "loss": 5.1753, - "step": 43972 - }, - { - "epoch": 22.932464146023467, - "grad_norm": 1.7097194194793701, - "learning_rate": 5.632462311557789e-05, - "loss": 4.4798, - "step": 43973 - }, - { - "epoch": 22.932985658409386, - "grad_norm": 1.4768521785736084, - "learning_rate": 5.632361809045227e-05, - "loss": 4.8343, - "step": 43974 - }, - { - "epoch": 22.933507170795306, - "grad_norm": 1.5331894159317017, - "learning_rate": 5.6322613065326634e-05, - "loss": 5.5059, - "step": 43975 - }, - { - "epoch": 22.934028683181225, - "grad_norm": 1.5519486665725708, - "learning_rate": 5.632160804020101e-05, - "loss": 4.948, - "step": 43976 - }, - { - "epoch": 22.934550195567144, - "grad_norm": 1.53237783908844, - "learning_rate": 5.6320603015075376e-05, - "loss": 5.1662, - "step": 43977 - }, - { - "epoch": 22.935071707953064, - "grad_norm": 1.5506590604782104, - "learning_rate": 5.631959798994975e-05, - "loss": 5.3458, - "step": 43978 - }, - { - "epoch": 22.935593220338983, - "grad_norm": 1.5153002738952637, - "learning_rate": 5.6318592964824125e-05, - "loss": 5.4112, - "step": 43979 - }, - { - "epoch": 22.936114732724903, - "grad_norm": 1.4981226921081543, - "learning_rate": 5.631758793969849e-05, - "loss": 5.6101, - "step": 43980 - }, - { - "epoch": 22.936636245110822, - "grad_norm": 1.5827105045318604, - "learning_rate": 5.631658291457287e-05, - "loss": 5.3776, - "step": 43981 - }, - { - "epoch": 22.937157757496742, - "grad_norm": 1.5604897737503052, - "learning_rate": 5.631557788944724e-05, - "loss": 5.4678, - "step": 43982 - }, - { - "epoch": 22.93767926988266, - "grad_norm": 1.5260343551635742, - "learning_rate": 5.6314572864321616e-05, - "loss": 5.2747, - "step": 43983 - }, - { - "epoch": 22.938200782268577, - "grad_norm": 1.5876272916793823, - "learning_rate": 5.631356783919598e-05, - "loss": 4.8567, - "step": 43984 - }, - { - "epoch": 22.938722294654497, - "grad_norm": 1.5781205892562866, - "learning_rate": 5.631256281407036e-05, - "loss": 5.289, - "step": 43985 - }, - { - "epoch": 22.939243807040416, - "grad_norm": 1.4727349281311035, - "learning_rate": 5.631155778894472e-05, - "loss": 5.378, - "step": 43986 - }, - { - "epoch": 22.939765319426336, - "grad_norm": 1.6080297231674194, - "learning_rate": 5.63105527638191e-05, - "loss": 5.2689, - "step": 43987 - }, - { - "epoch": 22.940286831812255, - "grad_norm": 1.4950214624404907, - "learning_rate": 5.630954773869347e-05, - "loss": 5.6667, - "step": 43988 - }, - { - "epoch": 22.940808344198174, - "grad_norm": 1.5363363027572632, - "learning_rate": 5.630854271356785e-05, - "loss": 5.3952, - "step": 43989 - }, - { - "epoch": 22.941329856584094, - "grad_norm": 1.5760457515716553, - "learning_rate": 5.6307537688442213e-05, - "loss": 5.0277, - "step": 43990 - }, - { - "epoch": 22.941851368970013, - "grad_norm": 1.6175216436386108, - "learning_rate": 5.630653266331658e-05, - "loss": 5.2345, - "step": 43991 - }, - { - "epoch": 22.942372881355933, - "grad_norm": 1.4629230499267578, - "learning_rate": 5.6305527638190956e-05, - "loss": 5.3791, - "step": 43992 - }, - { - "epoch": 22.942894393741852, - "grad_norm": 1.660480260848999, - "learning_rate": 5.6304522613065327e-05, - "loss": 5.3354, - "step": 43993 - }, - { - "epoch": 22.943415906127772, - "grad_norm": 1.625388264656067, - "learning_rate": 5.6303517587939704e-05, - "loss": 5.3221, - "step": 43994 - }, - { - "epoch": 22.94393741851369, - "grad_norm": 1.4898459911346436, - "learning_rate": 5.630251256281407e-05, - "loss": 5.582, - "step": 43995 - }, - { - "epoch": 22.944458930899607, - "grad_norm": 1.5073834657669067, - "learning_rate": 5.6301507537688446e-05, - "loss": 5.1166, - "step": 43996 - }, - { - "epoch": 22.944980443285527, - "grad_norm": 1.6113460063934326, - "learning_rate": 5.630050251256281e-05, - "loss": 5.3444, - "step": 43997 - }, - { - "epoch": 22.945501955671446, - "grad_norm": 1.4706408977508545, - "learning_rate": 5.629949748743719e-05, - "loss": 5.4544, - "step": 43998 - }, - { - "epoch": 22.946023468057366, - "grad_norm": 1.4544531106948853, - "learning_rate": 5.629849246231156e-05, - "loss": 5.5088, - "step": 43999 - }, - { - "epoch": 22.946544980443285, - "grad_norm": 1.4736666679382324, - "learning_rate": 5.629748743718594e-05, - "loss": 5.1489, - "step": 44000 - }, - { - "epoch": 22.947066492829205, - "grad_norm": 1.663905143737793, - "learning_rate": 5.62964824120603e-05, - "loss": 5.3924, - "step": 44001 - }, - { - "epoch": 22.947588005215124, - "grad_norm": 1.5415337085723877, - "learning_rate": 5.629547738693468e-05, - "loss": 4.9444, - "step": 44002 - }, - { - "epoch": 22.948109517601043, - "grad_norm": 1.5413964986801147, - "learning_rate": 5.629447236180905e-05, - "loss": 4.9241, - "step": 44003 - }, - { - "epoch": 22.948631029986963, - "grad_norm": 1.537776231765747, - "learning_rate": 5.629346733668343e-05, - "loss": 5.6537, - "step": 44004 - }, - { - "epoch": 22.949152542372882, - "grad_norm": 1.559747576713562, - "learning_rate": 5.629246231155779e-05, - "loss": 5.4506, - "step": 44005 - }, - { - "epoch": 22.949674054758802, - "grad_norm": 1.6596547365188599, - "learning_rate": 5.629145728643216e-05, - "loss": 4.7549, - "step": 44006 - }, - { - "epoch": 22.95019556714472, - "grad_norm": 1.6191202402114868, - "learning_rate": 5.6290452261306535e-05, - "loss": 4.9306, - "step": 44007 - }, - { - "epoch": 22.950717079530637, - "grad_norm": 1.6380170583724976, - "learning_rate": 5.6289447236180906e-05, - "loss": 5.3569, - "step": 44008 - }, - { - "epoch": 22.951238591916557, - "grad_norm": 1.566148042678833, - "learning_rate": 5.6288442211055284e-05, - "loss": 5.3534, - "step": 44009 - }, - { - "epoch": 22.951760104302476, - "grad_norm": 1.5120725631713867, - "learning_rate": 5.628743718592965e-05, - "loss": 5.1883, - "step": 44010 - }, - { - "epoch": 22.952281616688396, - "grad_norm": 1.590471625328064, - "learning_rate": 5.6286432160804026e-05, - "loss": 4.9957, - "step": 44011 - }, - { - "epoch": 22.952803129074315, - "grad_norm": 1.6497929096221924, - "learning_rate": 5.628542713567839e-05, - "loss": 5.25, - "step": 44012 - }, - { - "epoch": 22.953324641460235, - "grad_norm": 1.5450183153152466, - "learning_rate": 5.628442211055277e-05, - "loss": 5.407, - "step": 44013 - }, - { - "epoch": 22.953846153846154, - "grad_norm": 1.41344153881073, - "learning_rate": 5.628341708542714e-05, - "loss": 5.5918, - "step": 44014 - }, - { - "epoch": 22.954367666232073, - "grad_norm": 1.5517079830169678, - "learning_rate": 5.628241206030152e-05, - "loss": 5.2776, - "step": 44015 - }, - { - "epoch": 22.954889178617993, - "grad_norm": 1.4785363674163818, - "learning_rate": 5.628140703517588e-05, - "loss": 4.9469, - "step": 44016 - }, - { - "epoch": 22.955410691003912, - "grad_norm": 1.6513373851776123, - "learning_rate": 5.628040201005026e-05, - "loss": 5.0082, - "step": 44017 - }, - { - "epoch": 22.955932203389832, - "grad_norm": 1.4752473831176758, - "learning_rate": 5.627939698492462e-05, - "loss": 5.1943, - "step": 44018 - }, - { - "epoch": 22.95645371577575, - "grad_norm": 1.589176893234253, - "learning_rate": 5.6278391959798994e-05, - "loss": 5.4167, - "step": 44019 - }, - { - "epoch": 22.956975228161667, - "grad_norm": 1.8575412034988403, - "learning_rate": 5.627738693467337e-05, - "loss": 4.9391, - "step": 44020 - }, - { - "epoch": 22.957496740547587, - "grad_norm": 1.5082495212554932, - "learning_rate": 5.6276381909547736e-05, - "loss": 5.5104, - "step": 44021 - }, - { - "epoch": 22.958018252933506, - "grad_norm": 1.4279236793518066, - "learning_rate": 5.6275376884422114e-05, - "loss": 5.4063, - "step": 44022 - }, - { - "epoch": 22.958539765319426, - "grad_norm": 1.474502682685852, - "learning_rate": 5.6274371859296485e-05, - "loss": 5.5541, - "step": 44023 - }, - { - "epoch": 22.959061277705345, - "grad_norm": 1.583530306816101, - "learning_rate": 5.627336683417086e-05, - "loss": 4.8739, - "step": 44024 - }, - { - "epoch": 22.959582790091265, - "grad_norm": 1.576289176940918, - "learning_rate": 5.627236180904523e-05, - "loss": 5.1191, - "step": 44025 - }, - { - "epoch": 22.960104302477184, - "grad_norm": 1.4606496095657349, - "learning_rate": 5.6271356783919605e-05, - "loss": 5.3671, - "step": 44026 - }, - { - "epoch": 22.960625814863103, - "grad_norm": 1.463059425354004, - "learning_rate": 5.627035175879397e-05, - "loss": 5.4798, - "step": 44027 - }, - { - "epoch": 22.961147327249023, - "grad_norm": 1.6371541023254395, - "learning_rate": 5.626934673366835e-05, - "loss": 5.1206, - "step": 44028 - }, - { - "epoch": 22.961668839634942, - "grad_norm": 1.532850742340088, - "learning_rate": 5.626834170854272e-05, - "loss": 4.8538, - "step": 44029 - }, - { - "epoch": 22.962190352020862, - "grad_norm": 1.5197837352752686, - "learning_rate": 5.6267336683417096e-05, - "loss": 4.9883, - "step": 44030 - }, - { - "epoch": 22.96271186440678, - "grad_norm": 1.6664855480194092, - "learning_rate": 5.626633165829146e-05, - "loss": 5.0164, - "step": 44031 - }, - { - "epoch": 22.963233376792697, - "grad_norm": 1.4934005737304688, - "learning_rate": 5.6265326633165825e-05, - "loss": 4.6812, - "step": 44032 - }, - { - "epoch": 22.963754889178617, - "grad_norm": 1.5279492139816284, - "learning_rate": 5.62643216080402e-05, - "loss": 5.1764, - "step": 44033 - }, - { - "epoch": 22.964276401564536, - "grad_norm": 1.513526439666748, - "learning_rate": 5.6263316582914574e-05, - "loss": 5.1018, - "step": 44034 - }, - { - "epoch": 22.964797913950456, - "grad_norm": 1.7110165357589722, - "learning_rate": 5.626231155778895e-05, - "loss": 5.056, - "step": 44035 - }, - { - "epoch": 22.965319426336375, - "grad_norm": 1.5855284929275513, - "learning_rate": 5.6261306532663316e-05, - "loss": 5.5769, - "step": 44036 - }, - { - "epoch": 22.965840938722295, - "grad_norm": 1.6046127080917358, - "learning_rate": 5.6260301507537693e-05, - "loss": 4.9911, - "step": 44037 - }, - { - "epoch": 22.966362451108214, - "grad_norm": 1.5545600652694702, - "learning_rate": 5.625929648241206e-05, - "loss": 4.9362, - "step": 44038 - }, - { - "epoch": 22.966883963494134, - "grad_norm": 1.5197006464004517, - "learning_rate": 5.6258291457286436e-05, - "loss": 5.2668, - "step": 44039 - }, - { - "epoch": 22.967405475880053, - "grad_norm": 1.4803029298782349, - "learning_rate": 5.625728643216081e-05, - "loss": 5.4607, - "step": 44040 - }, - { - "epoch": 22.967926988265972, - "grad_norm": 1.5844285488128662, - "learning_rate": 5.6256281407035184e-05, - "loss": 5.3269, - "step": 44041 - }, - { - "epoch": 22.968448500651892, - "grad_norm": 1.5247690677642822, - "learning_rate": 5.625527638190955e-05, - "loss": 5.4722, - "step": 44042 - }, - { - "epoch": 22.96897001303781, - "grad_norm": 1.5167207717895508, - "learning_rate": 5.6254271356783927e-05, - "loss": 5.169, - "step": 44043 - }, - { - "epoch": 22.969491525423727, - "grad_norm": 1.4671735763549805, - "learning_rate": 5.625326633165829e-05, - "loss": 5.0933, - "step": 44044 - }, - { - "epoch": 22.970013037809647, - "grad_norm": 1.6494500637054443, - "learning_rate": 5.625226130653266e-05, - "loss": 5.6151, - "step": 44045 - }, - { - "epoch": 22.970534550195566, - "grad_norm": 1.4672765731811523, - "learning_rate": 5.625125628140704e-05, - "loss": 5.037, - "step": 44046 - }, - { - "epoch": 22.971056062581486, - "grad_norm": 1.50511634349823, - "learning_rate": 5.6250251256281404e-05, - "loss": 5.5448, - "step": 44047 - }, - { - "epoch": 22.971577574967405, - "grad_norm": 1.507594108581543, - "learning_rate": 5.624924623115578e-05, - "loss": 4.9921, - "step": 44048 - }, - { - "epoch": 22.972099087353325, - "grad_norm": 1.5252268314361572, - "learning_rate": 5.624824120603015e-05, - "loss": 5.2267, - "step": 44049 - }, - { - "epoch": 22.972620599739244, - "grad_norm": 1.4307701587677002, - "learning_rate": 5.624723618090453e-05, - "loss": 4.8045, - "step": 44050 - }, - { - "epoch": 22.973142112125164, - "grad_norm": 1.5661019086837769, - "learning_rate": 5.6246231155778895e-05, - "loss": 4.5491, - "step": 44051 - }, - { - "epoch": 22.973663624511083, - "grad_norm": 1.4553507566452026, - "learning_rate": 5.624522613065327e-05, - "loss": 5.6224, - "step": 44052 - }, - { - "epoch": 22.974185136897002, - "grad_norm": 1.5095007419586182, - "learning_rate": 5.624422110552764e-05, - "loss": 5.3017, - "step": 44053 - }, - { - "epoch": 22.974706649282922, - "grad_norm": 1.5380406379699707, - "learning_rate": 5.6243216080402015e-05, - "loss": 5.3244, - "step": 44054 - }, - { - "epoch": 22.975228161668838, - "grad_norm": 1.5967761278152466, - "learning_rate": 5.6242211055276386e-05, - "loss": 4.918, - "step": 44055 - }, - { - "epoch": 22.975749674054757, - "grad_norm": 1.4848142862319946, - "learning_rate": 5.6241206030150764e-05, - "loss": 5.5117, - "step": 44056 - }, - { - "epoch": 22.976271186440677, - "grad_norm": 1.523545503616333, - "learning_rate": 5.624020100502513e-05, - "loss": 5.4724, - "step": 44057 - }, - { - "epoch": 22.976792698826596, - "grad_norm": 1.503098487854004, - "learning_rate": 5.623919597989949e-05, - "loss": 5.0057, - "step": 44058 - }, - { - "epoch": 22.977314211212516, - "grad_norm": 1.584805965423584, - "learning_rate": 5.623819095477387e-05, - "loss": 5.3204, - "step": 44059 - }, - { - "epoch": 22.977835723598435, - "grad_norm": 1.4231570959091187, - "learning_rate": 5.623718592964824e-05, - "loss": 5.4983, - "step": 44060 - }, - { - "epoch": 22.978357235984355, - "grad_norm": 1.4428253173828125, - "learning_rate": 5.623618090452262e-05, - "loss": 5.5572, - "step": 44061 - }, - { - "epoch": 22.978878748370274, - "grad_norm": 1.4895906448364258, - "learning_rate": 5.623517587939698e-05, - "loss": 5.6394, - "step": 44062 - }, - { - "epoch": 22.979400260756194, - "grad_norm": 1.5029640197753906, - "learning_rate": 5.623417085427136e-05, - "loss": 5.6583, - "step": 44063 - }, - { - "epoch": 22.979921773142113, - "grad_norm": 1.5950064659118652, - "learning_rate": 5.6233165829145725e-05, - "loss": 5.5935, - "step": 44064 - }, - { - "epoch": 22.980443285528033, - "grad_norm": 1.480576992034912, - "learning_rate": 5.62321608040201e-05, - "loss": 5.1865, - "step": 44065 - }, - { - "epoch": 22.980964797913952, - "grad_norm": 1.4549455642700195, - "learning_rate": 5.6231155778894474e-05, - "loss": 5.5733, - "step": 44066 - }, - { - "epoch": 22.98148631029987, - "grad_norm": 1.456506371498108, - "learning_rate": 5.623015075376885e-05, - "loss": 5.3811, - "step": 44067 - }, - { - "epoch": 22.982007822685787, - "grad_norm": 1.4077600240707397, - "learning_rate": 5.6229145728643216e-05, - "loss": 5.7994, - "step": 44068 - }, - { - "epoch": 22.982529335071707, - "grad_norm": 1.627037763595581, - "learning_rate": 5.6228140703517594e-05, - "loss": 5.3829, - "step": 44069 - }, - { - "epoch": 22.983050847457626, - "grad_norm": 1.5012351274490356, - "learning_rate": 5.6227135678391965e-05, - "loss": 5.0029, - "step": 44070 - }, - { - "epoch": 22.983572359843546, - "grad_norm": 1.4944490194320679, - "learning_rate": 5.622613065326633e-05, - "loss": 5.1808, - "step": 44071 - }, - { - "epoch": 22.984093872229465, - "grad_norm": 1.4852514266967773, - "learning_rate": 5.622512562814071e-05, - "loss": 5.3792, - "step": 44072 - }, - { - "epoch": 22.984615384615385, - "grad_norm": 1.4140602350234985, - "learning_rate": 5.622412060301507e-05, - "loss": 5.3983, - "step": 44073 - }, - { - "epoch": 22.985136897001304, - "grad_norm": 1.6279915571212769, - "learning_rate": 5.622311557788945e-05, - "loss": 5.0152, - "step": 44074 - }, - { - "epoch": 22.985658409387224, - "grad_norm": 1.5198726654052734, - "learning_rate": 5.622211055276382e-05, - "loss": 5.588, - "step": 44075 - }, - { - "epoch": 22.986179921773143, - "grad_norm": 1.6081055402755737, - "learning_rate": 5.62211055276382e-05, - "loss": 4.7823, - "step": 44076 - }, - { - "epoch": 22.986701434159063, - "grad_norm": 1.6730215549468994, - "learning_rate": 5.622010050251256e-05, - "loss": 4.7682, - "step": 44077 - }, - { - "epoch": 22.987222946544982, - "grad_norm": 1.5244570970535278, - "learning_rate": 5.621909547738694e-05, - "loss": 5.2134, - "step": 44078 - }, - { - "epoch": 22.987744458930898, - "grad_norm": 1.636431336402893, - "learning_rate": 5.6218090452261305e-05, - "loss": 5.1304, - "step": 44079 - }, - { - "epoch": 22.988265971316817, - "grad_norm": 1.498121976852417, - "learning_rate": 5.621708542713568e-05, - "loss": 5.4388, - "step": 44080 - }, - { - "epoch": 22.988787483702737, - "grad_norm": 1.5559110641479492, - "learning_rate": 5.6216080402010054e-05, - "loss": 5.2462, - "step": 44081 - }, - { - "epoch": 22.989308996088656, - "grad_norm": 1.574946403503418, - "learning_rate": 5.621507537688443e-05, - "loss": 5.5067, - "step": 44082 - }, - { - "epoch": 22.989830508474576, - "grad_norm": 1.4182963371276855, - "learning_rate": 5.6214070351758796e-05, - "loss": 5.4216, - "step": 44083 - }, - { - "epoch": 22.990352020860495, - "grad_norm": 1.535606861114502, - "learning_rate": 5.621306532663316e-05, - "loss": 5.4884, - "step": 44084 - }, - { - "epoch": 22.990873533246415, - "grad_norm": 1.5597097873687744, - "learning_rate": 5.621206030150754e-05, - "loss": 5.729, - "step": 44085 - }, - { - "epoch": 22.991395045632334, - "grad_norm": 1.6042708158493042, - "learning_rate": 5.621105527638191e-05, - "loss": 5.0583, - "step": 44086 - }, - { - "epoch": 22.991916558018254, - "grad_norm": 1.4815045595169067, - "learning_rate": 5.621005025125629e-05, - "loss": 5.5103, - "step": 44087 - }, - { - "epoch": 22.992438070404173, - "grad_norm": 1.559187412261963, - "learning_rate": 5.620904522613065e-05, - "loss": 5.3178, - "step": 44088 - }, - { - "epoch": 22.992959582790093, - "grad_norm": 1.653410792350769, - "learning_rate": 5.620804020100503e-05, - "loss": 4.3553, - "step": 44089 - }, - { - "epoch": 22.993481095176012, - "grad_norm": 1.5962815284729004, - "learning_rate": 5.62070351758794e-05, - "loss": 4.8565, - "step": 44090 - }, - { - "epoch": 22.994002607561928, - "grad_norm": 1.5313242673873901, - "learning_rate": 5.620603015075378e-05, - "loss": 5.213, - "step": 44091 - }, - { - "epoch": 22.994524119947847, - "grad_norm": 1.4439717531204224, - "learning_rate": 5.620502512562814e-05, - "loss": 4.7648, - "step": 44092 - }, - { - "epoch": 22.995045632333767, - "grad_norm": 1.6210914850234985, - "learning_rate": 5.620402010050252e-05, - "loss": 5.4878, - "step": 44093 - }, - { - "epoch": 22.995567144719686, - "grad_norm": 1.5509672164916992, - "learning_rate": 5.6203015075376884e-05, - "loss": 5.1722, - "step": 44094 - }, - { - "epoch": 22.996088657105606, - "grad_norm": 1.5761781930923462, - "learning_rate": 5.620201005025126e-05, - "loss": 5.049, - "step": 44095 - }, - { - "epoch": 22.996610169491525, - "grad_norm": 1.4706286191940308, - "learning_rate": 5.620100502512563e-05, - "loss": 5.0305, - "step": 44096 - }, - { - "epoch": 22.997131681877445, - "grad_norm": 1.5995993614196777, - "learning_rate": 5.620000000000001e-05, - "loss": 5.2825, - "step": 44097 - }, - { - "epoch": 22.997653194263364, - "grad_norm": 1.561200499534607, - "learning_rate": 5.6198994974874375e-05, - "loss": 5.629, - "step": 44098 - }, - { - "epoch": 22.998174706649284, - "grad_norm": 1.61214280128479, - "learning_rate": 5.619798994974874e-05, - "loss": 5.1613, - "step": 44099 - }, - { - "epoch": 22.998696219035203, - "grad_norm": 1.5085150003433228, - "learning_rate": 5.619698492462312e-05, - "loss": 5.3224, - "step": 44100 - }, - { - "epoch": 22.999217731421123, - "grad_norm": 1.4961739778518677, - "learning_rate": 5.619597989949749e-05, - "loss": 5.1694, - "step": 44101 - }, - { - "epoch": 22.999739243807042, - "grad_norm": 1.4383876323699951, - "learning_rate": 5.6194974874371866e-05, - "loss": 5.6287, - "step": 44102 - }, - { - "epoch": 23.000260756192958, - "grad_norm": 1.7588353157043457, - "learning_rate": 5.619396984924623e-05, - "loss": 5.1551, - "step": 44103 - }, - { - "epoch": 23.000782268578877, - "grad_norm": 1.5699337720870972, - "learning_rate": 5.619296482412061e-05, - "loss": 5.1483, - "step": 44104 - }, - { - "epoch": 23.001303780964797, - "grad_norm": 1.5176074504852295, - "learning_rate": 5.619195979899497e-05, - "loss": 5.2755, - "step": 44105 - }, - { - "epoch": 23.001825293350716, - "grad_norm": 1.4847779273986816, - "learning_rate": 5.619095477386935e-05, - "loss": 5.2375, - "step": 44106 - }, - { - "epoch": 23.002346805736636, - "grad_norm": 1.5303823947906494, - "learning_rate": 5.618994974874372e-05, - "loss": 4.9822, - "step": 44107 - }, - { - "epoch": 23.002868318122555, - "grad_norm": 1.5105905532836914, - "learning_rate": 5.61889447236181e-05, - "loss": 5.5286, - "step": 44108 - }, - { - "epoch": 23.003389830508475, - "grad_norm": 1.4539484977722168, - "learning_rate": 5.6187939698492463e-05, - "loss": 5.0467, - "step": 44109 - }, - { - "epoch": 23.003911342894394, - "grad_norm": 1.5218104124069214, - "learning_rate": 5.618693467336684e-05, - "loss": 4.9173, - "step": 44110 - }, - { - "epoch": 23.004432855280314, - "grad_norm": 1.5706593990325928, - "learning_rate": 5.618592964824121e-05, - "loss": 5.3231, - "step": 44111 - }, - { - "epoch": 23.004954367666233, - "grad_norm": 1.58208429813385, - "learning_rate": 5.6184924623115577e-05, - "loss": 5.1382, - "step": 44112 - }, - { - "epoch": 23.005475880052153, - "grad_norm": 1.5139538049697876, - "learning_rate": 5.6183919597989954e-05, - "loss": 4.7019, - "step": 44113 - }, - { - "epoch": 23.005997392438072, - "grad_norm": 1.5079801082611084, - "learning_rate": 5.618291457286432e-05, - "loss": 5.1059, - "step": 44114 - }, - { - "epoch": 23.006518904823988, - "grad_norm": 1.4938979148864746, - "learning_rate": 5.6181909547738697e-05, - "loss": 5.3422, - "step": 44115 - }, - { - "epoch": 23.007040417209907, - "grad_norm": 1.5331518650054932, - "learning_rate": 5.618090452261307e-05, - "loss": 5.3532, - "step": 44116 - }, - { - "epoch": 23.007561929595827, - "grad_norm": 1.5456804037094116, - "learning_rate": 5.6179899497487445e-05, - "loss": 5.2898, - "step": 44117 - }, - { - "epoch": 23.008083441981746, - "grad_norm": 1.5163803100585938, - "learning_rate": 5.617889447236181e-05, - "loss": 5.0099, - "step": 44118 - }, - { - "epoch": 23.008604954367666, - "grad_norm": 1.524611473083496, - "learning_rate": 5.617788944723619e-05, - "loss": 5.3774, - "step": 44119 - }, - { - "epoch": 23.009126466753585, - "grad_norm": 1.5253468751907349, - "learning_rate": 5.617688442211055e-05, - "loss": 5.2014, - "step": 44120 - }, - { - "epoch": 23.009647979139505, - "grad_norm": 1.6026999950408936, - "learning_rate": 5.617587939698493e-05, - "loss": 4.7867, - "step": 44121 - }, - { - "epoch": 23.010169491525424, - "grad_norm": 1.4861558675765991, - "learning_rate": 5.61748743718593e-05, - "loss": 5.4878, - "step": 44122 - }, - { - "epoch": 23.010691003911344, - "grad_norm": 1.6254584789276123, - "learning_rate": 5.617386934673368e-05, - "loss": 5.2507, - "step": 44123 - }, - { - "epoch": 23.011212516297263, - "grad_norm": 1.6275973320007324, - "learning_rate": 5.617286432160804e-05, - "loss": 4.9022, - "step": 44124 - }, - { - "epoch": 23.011734028683183, - "grad_norm": 1.5329769849777222, - "learning_rate": 5.617185929648241e-05, - "loss": 5.0549, - "step": 44125 - }, - { - "epoch": 23.012255541069102, - "grad_norm": 1.4493244886398315, - "learning_rate": 5.6170854271356785e-05, - "loss": 5.6359, - "step": 44126 - }, - { - "epoch": 23.012777053455018, - "grad_norm": 1.471423625946045, - "learning_rate": 5.6169849246231156e-05, - "loss": 5.1138, - "step": 44127 - }, - { - "epoch": 23.013298565840937, - "grad_norm": 1.5144530534744263, - "learning_rate": 5.6168844221105534e-05, - "loss": 4.8477, - "step": 44128 - }, - { - "epoch": 23.013820078226857, - "grad_norm": 1.5249199867248535, - "learning_rate": 5.61678391959799e-05, - "loss": 5.191, - "step": 44129 - }, - { - "epoch": 23.014341590612776, - "grad_norm": 1.5001307725906372, - "learning_rate": 5.6166834170854276e-05, - "loss": 4.9884, - "step": 44130 - }, - { - "epoch": 23.014863102998696, - "grad_norm": 1.423403024673462, - "learning_rate": 5.616582914572864e-05, - "loss": 5.8198, - "step": 44131 - }, - { - "epoch": 23.015384615384615, - "grad_norm": 1.530036211013794, - "learning_rate": 5.616482412060302e-05, - "loss": 5.6179, - "step": 44132 - }, - { - "epoch": 23.015906127770535, - "grad_norm": 1.5245168209075928, - "learning_rate": 5.616381909547739e-05, - "loss": 4.6743, - "step": 44133 - }, - { - "epoch": 23.016427640156454, - "grad_norm": 1.4966111183166504, - "learning_rate": 5.616281407035177e-05, - "loss": 4.8368, - "step": 44134 - }, - { - "epoch": 23.016949152542374, - "grad_norm": 1.5477303266525269, - "learning_rate": 5.616180904522613e-05, - "loss": 5.7182, - "step": 44135 - }, - { - "epoch": 23.017470664928293, - "grad_norm": 1.4636286497116089, - "learning_rate": 5.616080402010051e-05, - "loss": 5.3811, - "step": 44136 - }, - { - "epoch": 23.017992177314213, - "grad_norm": 1.540224313735962, - "learning_rate": 5.615979899497488e-05, - "loss": 4.7569, - "step": 44137 - }, - { - "epoch": 23.018513689700132, - "grad_norm": 1.5466463565826416, - "learning_rate": 5.6158793969849244e-05, - "loss": 5.5355, - "step": 44138 - }, - { - "epoch": 23.019035202086048, - "grad_norm": 1.4627565145492554, - "learning_rate": 5.615778894472362e-05, - "loss": 5.3994, - "step": 44139 - }, - { - "epoch": 23.019556714471967, - "grad_norm": 1.5818090438842773, - "learning_rate": 5.6156783919597986e-05, - "loss": 5.1401, - "step": 44140 - }, - { - "epoch": 23.020078226857887, - "grad_norm": 1.4703141450881958, - "learning_rate": 5.6155778894472364e-05, - "loss": 5.6937, - "step": 44141 - }, - { - "epoch": 23.020599739243806, - "grad_norm": 1.574371576309204, - "learning_rate": 5.6154773869346735e-05, - "loss": 4.7369, - "step": 44142 - }, - { - "epoch": 23.021121251629726, - "grad_norm": 1.6514657735824585, - "learning_rate": 5.615376884422111e-05, - "loss": 4.8413, - "step": 44143 - }, - { - "epoch": 23.021642764015645, - "grad_norm": 1.6309975385665894, - "learning_rate": 5.615276381909548e-05, - "loss": 4.756, - "step": 44144 - }, - { - "epoch": 23.022164276401565, - "grad_norm": 1.5601089000701904, - "learning_rate": 5.6151758793969855e-05, - "loss": 5.2113, - "step": 44145 - }, - { - "epoch": 23.022685788787484, - "grad_norm": 1.6040171384811401, - "learning_rate": 5.615075376884422e-05, - "loss": 4.9641, - "step": 44146 - }, - { - "epoch": 23.023207301173404, - "grad_norm": 1.5096713304519653, - "learning_rate": 5.61497487437186e-05, - "loss": 5.2857, - "step": 44147 - }, - { - "epoch": 23.023728813559323, - "grad_norm": 1.5144857168197632, - "learning_rate": 5.614874371859297e-05, - "loss": 5.4664, - "step": 44148 - }, - { - "epoch": 23.024250325945243, - "grad_norm": 1.6723887920379639, - "learning_rate": 5.6147738693467346e-05, - "loss": 5.3033, - "step": 44149 - }, - { - "epoch": 23.02477183833116, - "grad_norm": 1.5519890785217285, - "learning_rate": 5.614673366834171e-05, - "loss": 5.5244, - "step": 44150 - }, - { - "epoch": 23.025293350717078, - "grad_norm": 1.5825716257095337, - "learning_rate": 5.6145728643216075e-05, - "loss": 5.413, - "step": 44151 - }, - { - "epoch": 23.025814863102998, - "grad_norm": 1.5205601453781128, - "learning_rate": 5.614472361809045e-05, - "loss": 5.36, - "step": 44152 - }, - { - "epoch": 23.026336375488917, - "grad_norm": 1.6226478815078735, - "learning_rate": 5.6143718592964824e-05, - "loss": 4.679, - "step": 44153 - }, - { - "epoch": 23.026857887874836, - "grad_norm": 1.5495645999908447, - "learning_rate": 5.61427135678392e-05, - "loss": 5.2031, - "step": 44154 - }, - { - "epoch": 23.027379400260756, - "grad_norm": 1.6152679920196533, - "learning_rate": 5.6141708542713566e-05, - "loss": 5.5204, - "step": 44155 - }, - { - "epoch": 23.027900912646675, - "grad_norm": 1.5449550151824951, - "learning_rate": 5.6140703517587944e-05, - "loss": 5.2636, - "step": 44156 - }, - { - "epoch": 23.028422425032595, - "grad_norm": 1.5193078517913818, - "learning_rate": 5.6139698492462315e-05, - "loss": 5.3413, - "step": 44157 - }, - { - "epoch": 23.028943937418514, - "grad_norm": 1.4681317806243896, - "learning_rate": 5.613869346733669e-05, - "loss": 5.2517, - "step": 44158 - }, - { - "epoch": 23.029465449804434, - "grad_norm": 1.672638177871704, - "learning_rate": 5.613768844221106e-05, - "loss": 5.1265, - "step": 44159 - }, - { - "epoch": 23.029986962190353, - "grad_norm": 1.6082420349121094, - "learning_rate": 5.6136683417085434e-05, - "loss": 5.2193, - "step": 44160 - }, - { - "epoch": 23.030508474576273, - "grad_norm": 1.5401055812835693, - "learning_rate": 5.61356783919598e-05, - "loss": 5.4234, - "step": 44161 - }, - { - "epoch": 23.03102998696219, - "grad_norm": 1.536392092704773, - "learning_rate": 5.6134673366834177e-05, - "loss": 4.9333, - "step": 44162 - }, - { - "epoch": 23.031551499348108, - "grad_norm": 1.5998786687850952, - "learning_rate": 5.613366834170855e-05, - "loss": 4.7861, - "step": 44163 - }, - { - "epoch": 23.032073011734028, - "grad_norm": 1.6624573469161987, - "learning_rate": 5.613266331658291e-05, - "loss": 5.2664, - "step": 44164 - }, - { - "epoch": 23.032594524119947, - "grad_norm": 1.5876541137695312, - "learning_rate": 5.613165829145729e-05, - "loss": 5.0848, - "step": 44165 - }, - { - "epoch": 23.033116036505866, - "grad_norm": 1.6187584400177002, - "learning_rate": 5.6130653266331654e-05, - "loss": 5.4199, - "step": 44166 - }, - { - "epoch": 23.033637548891786, - "grad_norm": 1.64701509475708, - "learning_rate": 5.612964824120603e-05, - "loss": 4.9288, - "step": 44167 - }, - { - "epoch": 23.034159061277705, - "grad_norm": 1.4942572116851807, - "learning_rate": 5.61286432160804e-05, - "loss": 4.9982, - "step": 44168 - }, - { - "epoch": 23.034680573663625, - "grad_norm": 1.4706209897994995, - "learning_rate": 5.612763819095478e-05, - "loss": 5.4747, - "step": 44169 - }, - { - "epoch": 23.035202086049544, - "grad_norm": 1.558253526687622, - "learning_rate": 5.6126633165829145e-05, - "loss": 5.3631, - "step": 44170 - }, - { - "epoch": 23.035723598435464, - "grad_norm": 1.670030951499939, - "learning_rate": 5.612562814070352e-05, - "loss": 5.1758, - "step": 44171 - }, - { - "epoch": 23.036245110821383, - "grad_norm": 1.5509953498840332, - "learning_rate": 5.612462311557789e-05, - "loss": 5.2983, - "step": 44172 - }, - { - "epoch": 23.036766623207303, - "grad_norm": 1.565963625907898, - "learning_rate": 5.6123618090452265e-05, - "loss": 5.4427, - "step": 44173 - }, - { - "epoch": 23.03728813559322, - "grad_norm": 1.510735273361206, - "learning_rate": 5.6122613065326636e-05, - "loss": 5.3611, - "step": 44174 - }, - { - "epoch": 23.037809647979138, - "grad_norm": 1.6084152460098267, - "learning_rate": 5.6121608040201014e-05, - "loss": 5.1011, - "step": 44175 - }, - { - "epoch": 23.038331160365058, - "grad_norm": 1.625915288925171, - "learning_rate": 5.612060301507538e-05, - "loss": 5.1559, - "step": 44176 - }, - { - "epoch": 23.038852672750977, - "grad_norm": 1.5887471437454224, - "learning_rate": 5.611959798994975e-05, - "loss": 5.2114, - "step": 44177 - }, - { - "epoch": 23.039374185136897, - "grad_norm": 1.6323591470718384, - "learning_rate": 5.611859296482413e-05, - "loss": 5.0946, - "step": 44178 - }, - { - "epoch": 23.039895697522816, - "grad_norm": 1.5416592359542847, - "learning_rate": 5.611758793969849e-05, - "loss": 5.5736, - "step": 44179 - }, - { - "epoch": 23.040417209908735, - "grad_norm": 1.55796480178833, - "learning_rate": 5.611658291457287e-05, - "loss": 5.2377, - "step": 44180 - }, - { - "epoch": 23.040938722294655, - "grad_norm": 1.5152515172958374, - "learning_rate": 5.611557788944723e-05, - "loss": 4.8527, - "step": 44181 - }, - { - "epoch": 23.041460234680574, - "grad_norm": 1.5090440511703491, - "learning_rate": 5.611457286432161e-05, - "loss": 5.3446, - "step": 44182 - }, - { - "epoch": 23.041981747066494, - "grad_norm": 1.5127770900726318, - "learning_rate": 5.611356783919598e-05, - "loss": 5.0866, - "step": 44183 - }, - { - "epoch": 23.042503259452413, - "grad_norm": 1.6319607496261597, - "learning_rate": 5.611256281407036e-05, - "loss": 5.1327, - "step": 44184 - }, - { - "epoch": 23.043024771838333, - "grad_norm": 1.5124109983444214, - "learning_rate": 5.6111557788944724e-05, - "loss": 5.4293, - "step": 44185 - }, - { - "epoch": 23.04354628422425, - "grad_norm": 1.5612196922302246, - "learning_rate": 5.61105527638191e-05, - "loss": 5.3042, - "step": 44186 - }, - { - "epoch": 23.044067796610168, - "grad_norm": 1.5254265069961548, - "learning_rate": 5.6109547738693466e-05, - "loss": 5.3866, - "step": 44187 - }, - { - "epoch": 23.044589308996088, - "grad_norm": 1.4587345123291016, - "learning_rate": 5.6108542713567844e-05, - "loss": 5.2136, - "step": 44188 - }, - { - "epoch": 23.045110821382007, - "grad_norm": 1.529735803604126, - "learning_rate": 5.6107537688442215e-05, - "loss": 5.1422, - "step": 44189 - }, - { - "epoch": 23.045632333767927, - "grad_norm": 1.5934793949127197, - "learning_rate": 5.610653266331659e-05, - "loss": 5.3534, - "step": 44190 - }, - { - "epoch": 23.046153846153846, - "grad_norm": 1.532606840133667, - "learning_rate": 5.610552763819096e-05, - "loss": 5.3825, - "step": 44191 - }, - { - "epoch": 23.046675358539765, - "grad_norm": 1.5712941884994507, - "learning_rate": 5.610452261306532e-05, - "loss": 4.9236, - "step": 44192 - }, - { - "epoch": 23.047196870925685, - "grad_norm": 1.6492092609405518, - "learning_rate": 5.61035175879397e-05, - "loss": 5.4112, - "step": 44193 - }, - { - "epoch": 23.047718383311604, - "grad_norm": 1.476919412612915, - "learning_rate": 5.610251256281407e-05, - "loss": 5.5795, - "step": 44194 - }, - { - "epoch": 23.048239895697524, - "grad_norm": 1.5565747022628784, - "learning_rate": 5.610150753768845e-05, - "loss": 4.9406, - "step": 44195 - }, - { - "epoch": 23.048761408083443, - "grad_norm": 1.516579031944275, - "learning_rate": 5.610050251256281e-05, - "loss": 5.4549, - "step": 44196 - }, - { - "epoch": 23.049282920469363, - "grad_norm": 1.5050814151763916, - "learning_rate": 5.609949748743719e-05, - "loss": 5.3274, - "step": 44197 - }, - { - "epoch": 23.04980443285528, - "grad_norm": 1.6160473823547363, - "learning_rate": 5.609849246231156e-05, - "loss": 5.3057, - "step": 44198 - }, - { - "epoch": 23.050325945241198, - "grad_norm": 1.6080831289291382, - "learning_rate": 5.609748743718594e-05, - "loss": 5.1603, - "step": 44199 - }, - { - "epoch": 23.050847457627118, - "grad_norm": 1.5089322328567505, - "learning_rate": 5.6096482412060304e-05, - "loss": 5.4709, - "step": 44200 - }, - { - "epoch": 23.051368970013037, - "grad_norm": 1.5144751071929932, - "learning_rate": 5.609547738693468e-05, - "loss": 5.4352, - "step": 44201 - }, - { - "epoch": 23.051890482398957, - "grad_norm": 1.647049069404602, - "learning_rate": 5.6094472361809046e-05, - "loss": 5.0064, - "step": 44202 - }, - { - "epoch": 23.052411994784876, - "grad_norm": 1.4825514554977417, - "learning_rate": 5.6093467336683424e-05, - "loss": 5.3918, - "step": 44203 - }, - { - "epoch": 23.052933507170795, - "grad_norm": 1.544574499130249, - "learning_rate": 5.6092462311557795e-05, - "loss": 4.9544, - "step": 44204 - }, - { - "epoch": 23.053455019556715, - "grad_norm": 1.49387526512146, - "learning_rate": 5.609145728643216e-05, - "loss": 5.1768, - "step": 44205 - }, - { - "epoch": 23.053976531942634, - "grad_norm": 1.5668026208877563, - "learning_rate": 5.609045226130654e-05, - "loss": 5.1151, - "step": 44206 - }, - { - "epoch": 23.054498044328554, - "grad_norm": 1.5981688499450684, - "learning_rate": 5.60894472361809e-05, - "loss": 4.9203, - "step": 44207 - }, - { - "epoch": 23.055019556714473, - "grad_norm": 1.5040651559829712, - "learning_rate": 5.608844221105528e-05, - "loss": 5.7794, - "step": 44208 - }, - { - "epoch": 23.055541069100393, - "grad_norm": 1.5354117155075073, - "learning_rate": 5.608743718592965e-05, - "loss": 4.9094, - "step": 44209 - }, - { - "epoch": 23.05606258148631, - "grad_norm": 1.5024762153625488, - "learning_rate": 5.608643216080403e-05, - "loss": 5.2287, - "step": 44210 - }, - { - "epoch": 23.056584093872228, - "grad_norm": 1.5759599208831787, - "learning_rate": 5.608542713567839e-05, - "loss": 5.2285, - "step": 44211 - }, - { - "epoch": 23.057105606258148, - "grad_norm": 1.6399372816085815, - "learning_rate": 5.608442211055277e-05, - "loss": 5.271, - "step": 44212 - }, - { - "epoch": 23.057627118644067, - "grad_norm": 1.7120574712753296, - "learning_rate": 5.6083417085427134e-05, - "loss": 4.9534, - "step": 44213 - }, - { - "epoch": 23.058148631029987, - "grad_norm": 1.4270570278167725, - "learning_rate": 5.608241206030151e-05, - "loss": 5.1419, - "step": 44214 - }, - { - "epoch": 23.058670143415906, - "grad_norm": 1.4865541458129883, - "learning_rate": 5.608140703517588e-05, - "loss": 5.0917, - "step": 44215 - }, - { - "epoch": 23.059191655801826, - "grad_norm": 1.5296885967254639, - "learning_rate": 5.608040201005026e-05, - "loss": 5.4378, - "step": 44216 - }, - { - "epoch": 23.059713168187745, - "grad_norm": 1.5675642490386963, - "learning_rate": 5.6079396984924625e-05, - "loss": 5.3041, - "step": 44217 - }, - { - "epoch": 23.060234680573664, - "grad_norm": 1.6579577922821045, - "learning_rate": 5.607839195979899e-05, - "loss": 5.0469, - "step": 44218 - }, - { - "epoch": 23.060756192959584, - "grad_norm": 1.663954734802246, - "learning_rate": 5.607738693467337e-05, - "loss": 4.8878, - "step": 44219 - }, - { - "epoch": 23.061277705345503, - "grad_norm": 1.6589109897613525, - "learning_rate": 5.607638190954774e-05, - "loss": 5.3474, - "step": 44220 - }, - { - "epoch": 23.061799217731423, - "grad_norm": 1.5335230827331543, - "learning_rate": 5.6075376884422116e-05, - "loss": 5.5685, - "step": 44221 - }, - { - "epoch": 23.06232073011734, - "grad_norm": 1.5106971263885498, - "learning_rate": 5.607437185929648e-05, - "loss": 5.7113, - "step": 44222 - }, - { - "epoch": 23.062842242503258, - "grad_norm": 1.7632415294647217, - "learning_rate": 5.607336683417086e-05, - "loss": 4.7541, - "step": 44223 - }, - { - "epoch": 23.063363754889178, - "grad_norm": 1.6437299251556396, - "learning_rate": 5.607236180904523e-05, - "loss": 5.0254, - "step": 44224 - }, - { - "epoch": 23.063885267275097, - "grad_norm": 1.583186388015747, - "learning_rate": 5.607135678391961e-05, - "loss": 5.4276, - "step": 44225 - }, - { - "epoch": 23.064406779661017, - "grad_norm": 1.5530083179473877, - "learning_rate": 5.607035175879397e-05, - "loss": 5.0412, - "step": 44226 - }, - { - "epoch": 23.064928292046936, - "grad_norm": 1.4764927625656128, - "learning_rate": 5.606934673366835e-05, - "loss": 5.7177, - "step": 44227 - }, - { - "epoch": 23.065449804432856, - "grad_norm": 1.50925612449646, - "learning_rate": 5.6068341708542713e-05, - "loss": 5.4734, - "step": 44228 - }, - { - "epoch": 23.065971316818775, - "grad_norm": 1.6366883516311646, - "learning_rate": 5.606733668341709e-05, - "loss": 5.053, - "step": 44229 - }, - { - "epoch": 23.066492829204694, - "grad_norm": 1.5717936754226685, - "learning_rate": 5.606633165829146e-05, - "loss": 5.4273, - "step": 44230 - }, - { - "epoch": 23.067014341590614, - "grad_norm": 1.6765187978744507, - "learning_rate": 5.6065326633165827e-05, - "loss": 4.8675, - "step": 44231 - }, - { - "epoch": 23.067535853976533, - "grad_norm": 1.4949336051940918, - "learning_rate": 5.6064321608040204e-05, - "loss": 5.5861, - "step": 44232 - }, - { - "epoch": 23.068057366362453, - "grad_norm": 1.5550392866134644, - "learning_rate": 5.606331658291457e-05, - "loss": 5.509, - "step": 44233 - }, - { - "epoch": 23.06857887874837, - "grad_norm": 1.5104155540466309, - "learning_rate": 5.6062311557788947e-05, - "loss": 5.6239, - "step": 44234 - }, - { - "epoch": 23.06910039113429, - "grad_norm": 1.46573007106781, - "learning_rate": 5.606130653266332e-05, - "loss": 5.6563, - "step": 44235 - }, - { - "epoch": 23.069621903520208, - "grad_norm": 1.550082802772522, - "learning_rate": 5.6060301507537695e-05, - "loss": 5.3968, - "step": 44236 - }, - { - "epoch": 23.070143415906127, - "grad_norm": 1.489371418952942, - "learning_rate": 5.605929648241206e-05, - "loss": 4.8999, - "step": 44237 - }, - { - "epoch": 23.070664928292047, - "grad_norm": 1.4827178716659546, - "learning_rate": 5.605829145728644e-05, - "loss": 5.733, - "step": 44238 - }, - { - "epoch": 23.071186440677966, - "grad_norm": 1.6949259042739868, - "learning_rate": 5.60572864321608e-05, - "loss": 5.2909, - "step": 44239 - }, - { - "epoch": 23.071707953063886, - "grad_norm": 1.5821691751480103, - "learning_rate": 5.605628140703518e-05, - "loss": 4.9813, - "step": 44240 - }, - { - "epoch": 23.072229465449805, - "grad_norm": 1.587161898612976, - "learning_rate": 5.605527638190955e-05, - "loss": 5.0957, - "step": 44241 - }, - { - "epoch": 23.072750977835724, - "grad_norm": 1.546392798423767, - "learning_rate": 5.605427135678393e-05, - "loss": 5.3123, - "step": 44242 - }, - { - "epoch": 23.073272490221644, - "grad_norm": 1.6802620887756348, - "learning_rate": 5.605326633165829e-05, - "loss": 5.2688, - "step": 44243 - }, - { - "epoch": 23.073794002607563, - "grad_norm": 1.464930534362793, - "learning_rate": 5.6052261306532664e-05, - "loss": 4.9244, - "step": 44244 - }, - { - "epoch": 23.07431551499348, - "grad_norm": 1.5040459632873535, - "learning_rate": 5.605125628140704e-05, - "loss": 5.1904, - "step": 44245 - }, - { - "epoch": 23.0748370273794, - "grad_norm": 1.5453670024871826, - "learning_rate": 5.6050251256281406e-05, - "loss": 5.2639, - "step": 44246 - }, - { - "epoch": 23.07535853976532, - "grad_norm": 1.4370310306549072, - "learning_rate": 5.6049246231155784e-05, - "loss": 5.4263, - "step": 44247 - }, - { - "epoch": 23.075880052151238, - "grad_norm": 1.5279301404953003, - "learning_rate": 5.604824120603015e-05, - "loss": 5.1857, - "step": 44248 - }, - { - "epoch": 23.076401564537157, - "grad_norm": 1.5973496437072754, - "learning_rate": 5.6047236180904526e-05, - "loss": 5.5631, - "step": 44249 - }, - { - "epoch": 23.076923076923077, - "grad_norm": 1.5487520694732666, - "learning_rate": 5.60462311557789e-05, - "loss": 5.6208, - "step": 44250 - }, - { - "epoch": 23.077444589308996, - "grad_norm": 1.5417556762695312, - "learning_rate": 5.6045226130653275e-05, - "loss": 5.2185, - "step": 44251 - }, - { - "epoch": 23.077966101694916, - "grad_norm": 1.5724568367004395, - "learning_rate": 5.604422110552764e-05, - "loss": 5.3911, - "step": 44252 - }, - { - "epoch": 23.078487614080835, - "grad_norm": 1.4482685327529907, - "learning_rate": 5.604321608040202e-05, - "loss": 5.4551, - "step": 44253 - }, - { - "epoch": 23.079009126466755, - "grad_norm": 1.545122504234314, - "learning_rate": 5.604221105527638e-05, - "loss": 5.6441, - "step": 44254 - }, - { - "epoch": 23.079530638852674, - "grad_norm": 1.5829100608825684, - "learning_rate": 5.604120603015076e-05, - "loss": 5.2844, - "step": 44255 - }, - { - "epoch": 23.080052151238593, - "grad_norm": 1.4594438076019287, - "learning_rate": 5.604020100502513e-05, - "loss": 5.5087, - "step": 44256 - }, - { - "epoch": 23.08057366362451, - "grad_norm": 1.5534204244613647, - "learning_rate": 5.6039195979899494e-05, - "loss": 4.8756, - "step": 44257 - }, - { - "epoch": 23.08109517601043, - "grad_norm": 1.5856492519378662, - "learning_rate": 5.603819095477387e-05, - "loss": 4.669, - "step": 44258 - }, - { - "epoch": 23.08161668839635, - "grad_norm": 1.5068422555923462, - "learning_rate": 5.6037185929648236e-05, - "loss": 5.4423, - "step": 44259 - }, - { - "epoch": 23.082138200782268, - "grad_norm": 1.5549417734146118, - "learning_rate": 5.6036180904522614e-05, - "loss": 4.6242, - "step": 44260 - }, - { - "epoch": 23.082659713168187, - "grad_norm": 1.528660774230957, - "learning_rate": 5.6035175879396985e-05, - "loss": 4.8343, - "step": 44261 - }, - { - "epoch": 23.083181225554107, - "grad_norm": 1.5528937578201294, - "learning_rate": 5.603417085427136e-05, - "loss": 5.4004, - "step": 44262 - }, - { - "epoch": 23.083702737940026, - "grad_norm": 1.5217008590698242, - "learning_rate": 5.603316582914573e-05, - "loss": 5.17, - "step": 44263 - }, - { - "epoch": 23.084224250325946, - "grad_norm": 1.6222697496414185, - "learning_rate": 5.6032160804020105e-05, - "loss": 4.9701, - "step": 44264 - }, - { - "epoch": 23.084745762711865, - "grad_norm": 1.460544466972351, - "learning_rate": 5.6031155778894476e-05, - "loss": 5.5013, - "step": 44265 - }, - { - "epoch": 23.085267275097785, - "grad_norm": 1.5549968481063843, - "learning_rate": 5.6030150753768854e-05, - "loss": 4.955, - "step": 44266 - }, - { - "epoch": 23.085788787483704, - "grad_norm": 1.558011531829834, - "learning_rate": 5.602914572864322e-05, - "loss": 4.9515, - "step": 44267 - }, - { - "epoch": 23.086310299869623, - "grad_norm": 1.5533313751220703, - "learning_rate": 5.6028140703517596e-05, - "loss": 4.9929, - "step": 44268 - }, - { - "epoch": 23.08683181225554, - "grad_norm": 1.4526426792144775, - "learning_rate": 5.602713567839196e-05, - "loss": 5.3377, - "step": 44269 - }, - { - "epoch": 23.08735332464146, - "grad_norm": 1.5141342878341675, - "learning_rate": 5.602613065326633e-05, - "loss": 5.3245, - "step": 44270 - }, - { - "epoch": 23.08787483702738, - "grad_norm": 1.5991771221160889, - "learning_rate": 5.602512562814071e-05, - "loss": 5.368, - "step": 44271 - }, - { - "epoch": 23.088396349413298, - "grad_norm": 1.588348388671875, - "learning_rate": 5.6024120603015074e-05, - "loss": 4.8774, - "step": 44272 - }, - { - "epoch": 23.088917861799217, - "grad_norm": 1.6172094345092773, - "learning_rate": 5.602311557788945e-05, - "loss": 5.0733, - "step": 44273 - }, - { - "epoch": 23.089439374185137, - "grad_norm": 1.5130037069320679, - "learning_rate": 5.6022110552763816e-05, - "loss": 5.5688, - "step": 44274 - }, - { - "epoch": 23.089960886571056, - "grad_norm": 1.4715625047683716, - "learning_rate": 5.6021105527638194e-05, - "loss": 5.3636, - "step": 44275 - }, - { - "epoch": 23.090482398956976, - "grad_norm": 1.522261381149292, - "learning_rate": 5.6020100502512565e-05, - "loss": 5.5567, - "step": 44276 - }, - { - "epoch": 23.091003911342895, - "grad_norm": 1.6823046207427979, - "learning_rate": 5.601909547738694e-05, - "loss": 5.0217, - "step": 44277 - }, - { - "epoch": 23.091525423728815, - "grad_norm": 1.5491161346435547, - "learning_rate": 5.601809045226131e-05, - "loss": 5.3432, - "step": 44278 - }, - { - "epoch": 23.092046936114734, - "grad_norm": 1.6071226596832275, - "learning_rate": 5.6017085427135685e-05, - "loss": 5.5206, - "step": 44279 - }, - { - "epoch": 23.092568448500653, - "grad_norm": 1.6054600477218628, - "learning_rate": 5.601608040201005e-05, - "loss": 4.6726, - "step": 44280 - }, - { - "epoch": 23.09308996088657, - "grad_norm": 1.7696481943130493, - "learning_rate": 5.6015075376884427e-05, - "loss": 5.1434, - "step": 44281 - }, - { - "epoch": 23.09361147327249, - "grad_norm": 1.606623649597168, - "learning_rate": 5.60140703517588e-05, - "loss": 5.4651, - "step": 44282 - }, - { - "epoch": 23.09413298565841, - "grad_norm": 1.6091371774673462, - "learning_rate": 5.6013065326633175e-05, - "loss": 5.0726, - "step": 44283 - }, - { - "epoch": 23.094654498044328, - "grad_norm": 1.4765387773513794, - "learning_rate": 5.601206030150754e-05, - "loss": 5.6324, - "step": 44284 - }, - { - "epoch": 23.095176010430247, - "grad_norm": 1.5956339836120605, - "learning_rate": 5.6011055276381904e-05, - "loss": 5.245, - "step": 44285 - }, - { - "epoch": 23.095697522816167, - "grad_norm": 1.5123881101608276, - "learning_rate": 5.601005025125628e-05, - "loss": 5.4311, - "step": 44286 - }, - { - "epoch": 23.096219035202086, - "grad_norm": 1.603606104850769, - "learning_rate": 5.600904522613065e-05, - "loss": 5.2715, - "step": 44287 - }, - { - "epoch": 23.096740547588006, - "grad_norm": 1.5447793006896973, - "learning_rate": 5.600804020100503e-05, - "loss": 4.6607, - "step": 44288 - }, - { - "epoch": 23.097262059973925, - "grad_norm": 1.4804331064224243, - "learning_rate": 5.6007035175879395e-05, - "loss": 5.4718, - "step": 44289 - }, - { - "epoch": 23.097783572359845, - "grad_norm": 1.5432261228561401, - "learning_rate": 5.600603015075377e-05, - "loss": 5.5115, - "step": 44290 - }, - { - "epoch": 23.098305084745764, - "grad_norm": 1.531092643737793, - "learning_rate": 5.6005025125628144e-05, - "loss": 5.2517, - "step": 44291 - }, - { - "epoch": 23.098826597131684, - "grad_norm": 1.518325924873352, - "learning_rate": 5.600402010050252e-05, - "loss": 5.6896, - "step": 44292 - }, - { - "epoch": 23.0993481095176, - "grad_norm": 1.5546977519989014, - "learning_rate": 5.6003015075376886e-05, - "loss": 5.3904, - "step": 44293 - }, - { - "epoch": 23.09986962190352, - "grad_norm": 1.5527348518371582, - "learning_rate": 5.6002010050251264e-05, - "loss": 4.9812, - "step": 44294 - }, - { - "epoch": 23.10039113428944, - "grad_norm": 1.6087510585784912, - "learning_rate": 5.600100502512563e-05, - "loss": 5.2385, - "step": 44295 - }, - { - "epoch": 23.100912646675358, - "grad_norm": 1.5110647678375244, - "learning_rate": 5.6000000000000006e-05, - "loss": 5.4561, - "step": 44296 - }, - { - "epoch": 23.101434159061277, - "grad_norm": 1.5034502744674683, - "learning_rate": 5.599899497487438e-05, - "loss": 5.268, - "step": 44297 - }, - { - "epoch": 23.101955671447197, - "grad_norm": 1.7065238952636719, - "learning_rate": 5.599798994974874e-05, - "loss": 4.1829, - "step": 44298 - }, - { - "epoch": 23.102477183833116, - "grad_norm": 1.6263313293457031, - "learning_rate": 5.599698492462312e-05, - "loss": 5.1696, - "step": 44299 - }, - { - "epoch": 23.102998696219036, - "grad_norm": 1.4984503984451294, - "learning_rate": 5.599597989949748e-05, - "loss": 5.3902, - "step": 44300 - }, - { - "epoch": 23.103520208604955, - "grad_norm": 1.7102168798446655, - "learning_rate": 5.599497487437186e-05, - "loss": 4.8354, - "step": 44301 - }, - { - "epoch": 23.104041720990875, - "grad_norm": 1.6121397018432617, - "learning_rate": 5.599396984924623e-05, - "loss": 5.2388, - "step": 44302 - }, - { - "epoch": 23.104563233376794, - "grad_norm": 1.5432770252227783, - "learning_rate": 5.599296482412061e-05, - "loss": 4.2624, - "step": 44303 - }, - { - "epoch": 23.105084745762714, - "grad_norm": 1.5199192762374878, - "learning_rate": 5.5991959798994974e-05, - "loss": 4.9791, - "step": 44304 - }, - { - "epoch": 23.10560625814863, - "grad_norm": 1.6821264028549194, - "learning_rate": 5.599095477386935e-05, - "loss": 5.0596, - "step": 44305 - }, - { - "epoch": 23.10612777053455, - "grad_norm": 1.3521559238433838, - "learning_rate": 5.5989949748743716e-05, - "loss": 4.6266, - "step": 44306 - }, - { - "epoch": 23.10664928292047, - "grad_norm": 1.5250223875045776, - "learning_rate": 5.5988944723618094e-05, - "loss": 5.4424, - "step": 44307 - }, - { - "epoch": 23.107170795306388, - "grad_norm": 1.5268884897232056, - "learning_rate": 5.5987939698492465e-05, - "loss": 5.427, - "step": 44308 - }, - { - "epoch": 23.107692307692307, - "grad_norm": 1.6352870464324951, - "learning_rate": 5.598693467336684e-05, - "loss": 5.1343, - "step": 44309 - }, - { - "epoch": 23.108213820078227, - "grad_norm": 1.48247230052948, - "learning_rate": 5.598592964824121e-05, - "loss": 5.5253, - "step": 44310 - }, - { - "epoch": 23.108735332464146, - "grad_norm": 1.6788885593414307, - "learning_rate": 5.598492462311558e-05, - "loss": 5.1573, - "step": 44311 - }, - { - "epoch": 23.109256844850066, - "grad_norm": 1.485263705253601, - "learning_rate": 5.5983919597989956e-05, - "loss": 5.0941, - "step": 44312 - }, - { - "epoch": 23.109778357235985, - "grad_norm": 1.5086501836776733, - "learning_rate": 5.598291457286432e-05, - "loss": 4.968, - "step": 44313 - }, - { - "epoch": 23.110299869621905, - "grad_norm": 1.558056116104126, - "learning_rate": 5.59819095477387e-05, - "loss": 5.2162, - "step": 44314 - }, - { - "epoch": 23.110821382007824, - "grad_norm": 1.5805771350860596, - "learning_rate": 5.598090452261306e-05, - "loss": 5.5491, - "step": 44315 - }, - { - "epoch": 23.111342894393744, - "grad_norm": 1.5781910419464111, - "learning_rate": 5.597989949748744e-05, - "loss": 5.2424, - "step": 44316 - }, - { - "epoch": 23.11186440677966, - "grad_norm": 1.5647194385528564, - "learning_rate": 5.597889447236181e-05, - "loss": 5.5491, - "step": 44317 - }, - { - "epoch": 23.11238591916558, - "grad_norm": 1.6656441688537598, - "learning_rate": 5.597788944723619e-05, - "loss": 4.8497, - "step": 44318 - }, - { - "epoch": 23.1129074315515, - "grad_norm": 1.639399528503418, - "learning_rate": 5.5976884422110554e-05, - "loss": 4.903, - "step": 44319 - }, - { - "epoch": 23.113428943937418, - "grad_norm": 1.6096041202545166, - "learning_rate": 5.597587939698493e-05, - "loss": 4.9045, - "step": 44320 - }, - { - "epoch": 23.113950456323337, - "grad_norm": 1.515893578529358, - "learning_rate": 5.5974874371859296e-05, - "loss": 5.266, - "step": 44321 - }, - { - "epoch": 23.114471968709257, - "grad_norm": 1.492468237876892, - "learning_rate": 5.5973869346733674e-05, - "loss": 5.3952, - "step": 44322 - }, - { - "epoch": 23.114993481095176, - "grad_norm": 1.536942720413208, - "learning_rate": 5.5972864321608045e-05, - "loss": 4.7623, - "step": 44323 - }, - { - "epoch": 23.115514993481096, - "grad_norm": 1.6108567714691162, - "learning_rate": 5.597185929648241e-05, - "loss": 5.3394, - "step": 44324 - }, - { - "epoch": 23.116036505867015, - "grad_norm": 1.5863429307937622, - "learning_rate": 5.597085427135679e-05, - "loss": 5.2572, - "step": 44325 - }, - { - "epoch": 23.116558018252935, - "grad_norm": 1.5053595304489136, - "learning_rate": 5.596984924623115e-05, - "loss": 5.3792, - "step": 44326 - }, - { - "epoch": 23.117079530638854, - "grad_norm": 1.5470268726348877, - "learning_rate": 5.596884422110553e-05, - "loss": 5.3229, - "step": 44327 - }, - { - "epoch": 23.117601043024774, - "grad_norm": 1.5217698812484741, - "learning_rate": 5.59678391959799e-05, - "loss": 5.2699, - "step": 44328 - }, - { - "epoch": 23.11812255541069, - "grad_norm": 1.4709482192993164, - "learning_rate": 5.596683417085428e-05, - "loss": 5.4574, - "step": 44329 - }, - { - "epoch": 23.11864406779661, - "grad_norm": 1.493037223815918, - "learning_rate": 5.596582914572864e-05, - "loss": 5.3327, - "step": 44330 - }, - { - "epoch": 23.11916558018253, - "grad_norm": 1.6582324504852295, - "learning_rate": 5.596482412060302e-05, - "loss": 5.0363, - "step": 44331 - }, - { - "epoch": 23.119687092568448, - "grad_norm": 1.5751782655715942, - "learning_rate": 5.596381909547739e-05, - "loss": 5.4138, - "step": 44332 - }, - { - "epoch": 23.120208604954367, - "grad_norm": 1.4760597944259644, - "learning_rate": 5.596281407035177e-05, - "loss": 5.0749, - "step": 44333 - }, - { - "epoch": 23.120730117340287, - "grad_norm": 1.7289230823516846, - "learning_rate": 5.596180904522613e-05, - "loss": 5.1463, - "step": 44334 - }, - { - "epoch": 23.121251629726206, - "grad_norm": 1.5487769842147827, - "learning_rate": 5.596080402010051e-05, - "loss": 5.2902, - "step": 44335 - }, - { - "epoch": 23.121773142112126, - "grad_norm": 1.5186022520065308, - "learning_rate": 5.5959798994974875e-05, - "loss": 4.9507, - "step": 44336 - }, - { - "epoch": 23.122294654498045, - "grad_norm": 1.5960909128189087, - "learning_rate": 5.5958793969849246e-05, - "loss": 5.5566, - "step": 44337 - }, - { - "epoch": 23.122816166883965, - "grad_norm": 1.5690075159072876, - "learning_rate": 5.5957788944723624e-05, - "loss": 4.8766, - "step": 44338 - }, - { - "epoch": 23.123337679269884, - "grad_norm": 1.534613847732544, - "learning_rate": 5.595678391959799e-05, - "loss": 5.4237, - "step": 44339 - }, - { - "epoch": 23.1238591916558, - "grad_norm": 1.5109068155288696, - "learning_rate": 5.5955778894472366e-05, - "loss": 5.0691, - "step": 44340 - }, - { - "epoch": 23.12438070404172, - "grad_norm": 1.4922438859939575, - "learning_rate": 5.595477386934673e-05, - "loss": 5.1997, - "step": 44341 - }, - { - "epoch": 23.12490221642764, - "grad_norm": 1.500375747680664, - "learning_rate": 5.595376884422111e-05, - "loss": 5.5567, - "step": 44342 - }, - { - "epoch": 23.12542372881356, - "grad_norm": 1.5292985439300537, - "learning_rate": 5.595276381909548e-05, - "loss": 4.749, - "step": 44343 - }, - { - "epoch": 23.125945241199478, - "grad_norm": 1.5737888813018799, - "learning_rate": 5.595175879396986e-05, - "loss": 5.1188, - "step": 44344 - }, - { - "epoch": 23.126466753585397, - "grad_norm": 1.8988252878189087, - "learning_rate": 5.595075376884422e-05, - "loss": 4.609, - "step": 44345 - }, - { - "epoch": 23.126988265971317, - "grad_norm": 1.5698645114898682, - "learning_rate": 5.59497487437186e-05, - "loss": 5.2377, - "step": 44346 - }, - { - "epoch": 23.127509778357236, - "grad_norm": 1.6264585256576538, - "learning_rate": 5.5948743718592963e-05, - "loss": 5.4621, - "step": 44347 - }, - { - "epoch": 23.128031290743156, - "grad_norm": 1.5163516998291016, - "learning_rate": 5.594773869346734e-05, - "loss": 5.6817, - "step": 44348 - }, - { - "epoch": 23.128552803129075, - "grad_norm": 1.5212162733078003, - "learning_rate": 5.594673366834171e-05, - "loss": 5.4772, - "step": 44349 - }, - { - "epoch": 23.129074315514995, - "grad_norm": 1.754381775856018, - "learning_rate": 5.5945728643216077e-05, - "loss": 4.4816, - "step": 44350 - }, - { - "epoch": 23.129595827900914, - "grad_norm": 1.5987505912780762, - "learning_rate": 5.5944723618090454e-05, - "loss": 5.1579, - "step": 44351 - }, - { - "epoch": 23.13011734028683, - "grad_norm": 1.5706615447998047, - "learning_rate": 5.5943718592964825e-05, - "loss": 5.1844, - "step": 44352 - }, - { - "epoch": 23.13063885267275, - "grad_norm": 1.5856157541275024, - "learning_rate": 5.59427135678392e-05, - "loss": 4.2906, - "step": 44353 - }, - { - "epoch": 23.13116036505867, - "grad_norm": 1.4615163803100586, - "learning_rate": 5.594170854271357e-05, - "loss": 4.9227, - "step": 44354 - }, - { - "epoch": 23.13168187744459, - "grad_norm": 1.497867465019226, - "learning_rate": 5.5940703517587945e-05, - "loss": 5.5688, - "step": 44355 - }, - { - "epoch": 23.132203389830508, - "grad_norm": 1.5673774480819702, - "learning_rate": 5.593969849246231e-05, - "loss": 5.1855, - "step": 44356 - }, - { - "epoch": 23.132724902216427, - "grad_norm": 1.4880610704421997, - "learning_rate": 5.593869346733669e-05, - "loss": 5.284, - "step": 44357 - }, - { - "epoch": 23.133246414602347, - "grad_norm": 1.5867726802825928, - "learning_rate": 5.593768844221106e-05, - "loss": 4.5313, - "step": 44358 - }, - { - "epoch": 23.133767926988266, - "grad_norm": 1.5662530660629272, - "learning_rate": 5.5936683417085436e-05, - "loss": 5.3711, - "step": 44359 - }, - { - "epoch": 23.134289439374186, - "grad_norm": 1.525652289390564, - "learning_rate": 5.59356783919598e-05, - "loss": 4.8821, - "step": 44360 - }, - { - "epoch": 23.134810951760105, - "grad_norm": 1.5443443059921265, - "learning_rate": 5.593467336683418e-05, - "loss": 4.9643, - "step": 44361 - }, - { - "epoch": 23.135332464146025, - "grad_norm": 1.5732898712158203, - "learning_rate": 5.593366834170854e-05, - "loss": 5.506, - "step": 44362 - }, - { - "epoch": 23.135853976531944, - "grad_norm": 1.6634172201156616, - "learning_rate": 5.593266331658292e-05, - "loss": 5.2097, - "step": 44363 - }, - { - "epoch": 23.13637548891786, - "grad_norm": 1.4624149799346924, - "learning_rate": 5.593165829145729e-05, - "loss": 5.2691, - "step": 44364 - }, - { - "epoch": 23.13689700130378, - "grad_norm": 1.4564409255981445, - "learning_rate": 5.5930653266331656e-05, - "loss": 5.4651, - "step": 44365 - }, - { - "epoch": 23.1374185136897, - "grad_norm": 1.6802165508270264, - "learning_rate": 5.5929648241206034e-05, - "loss": 5.1465, - "step": 44366 - }, - { - "epoch": 23.13794002607562, - "grad_norm": 1.6761503219604492, - "learning_rate": 5.59286432160804e-05, - "loss": 4.6668, - "step": 44367 - }, - { - "epoch": 23.138461538461538, - "grad_norm": 1.4565293788909912, - "learning_rate": 5.5927638190954776e-05, - "loss": 5.5904, - "step": 44368 - }, - { - "epoch": 23.138983050847457, - "grad_norm": 1.6015913486480713, - "learning_rate": 5.592663316582915e-05, - "loss": 5.3259, - "step": 44369 - }, - { - "epoch": 23.139504563233377, - "grad_norm": 1.6245182752609253, - "learning_rate": 5.5925628140703525e-05, - "loss": 4.7964, - "step": 44370 - }, - { - "epoch": 23.140026075619296, - "grad_norm": 1.6014807224273682, - "learning_rate": 5.592462311557789e-05, - "loss": 5.4318, - "step": 44371 - }, - { - "epoch": 23.140547588005216, - "grad_norm": 1.4273537397384644, - "learning_rate": 5.592361809045227e-05, - "loss": 5.0628, - "step": 44372 - }, - { - "epoch": 23.141069100391135, - "grad_norm": 1.4863674640655518, - "learning_rate": 5.592261306532663e-05, - "loss": 5.4974, - "step": 44373 - }, - { - "epoch": 23.141590612777055, - "grad_norm": 1.6562001705169678, - "learning_rate": 5.592160804020101e-05, - "loss": 5.4295, - "step": 44374 - }, - { - "epoch": 23.142112125162974, - "grad_norm": 1.5794895887374878, - "learning_rate": 5.592060301507538e-05, - "loss": 5.3571, - "step": 44375 - }, - { - "epoch": 23.14263363754889, - "grad_norm": 1.5250059366226196, - "learning_rate": 5.591959798994976e-05, - "loss": 5.3526, - "step": 44376 - }, - { - "epoch": 23.14315514993481, - "grad_norm": 1.545509934425354, - "learning_rate": 5.591859296482412e-05, - "loss": 5.358, - "step": 44377 - }, - { - "epoch": 23.14367666232073, - "grad_norm": 1.5436042547225952, - "learning_rate": 5.591758793969849e-05, - "loss": 5.2048, - "step": 44378 - }, - { - "epoch": 23.14419817470665, - "grad_norm": 1.5172964334487915, - "learning_rate": 5.591658291457287e-05, - "loss": 5.4697, - "step": 44379 - }, - { - "epoch": 23.144719687092568, - "grad_norm": 1.674127221107483, - "learning_rate": 5.5915577889447235e-05, - "loss": 5.1954, - "step": 44380 - }, - { - "epoch": 23.145241199478487, - "grad_norm": 1.5467638969421387, - "learning_rate": 5.591457286432161e-05, - "loss": 5.3445, - "step": 44381 - }, - { - "epoch": 23.145762711864407, - "grad_norm": 1.5212962627410889, - "learning_rate": 5.591356783919598e-05, - "loss": 5.3549, - "step": 44382 - }, - { - "epoch": 23.146284224250326, - "grad_norm": 1.5430058240890503, - "learning_rate": 5.5912562814070355e-05, - "loss": 5.4836, - "step": 44383 - }, - { - "epoch": 23.146805736636246, - "grad_norm": 1.554516077041626, - "learning_rate": 5.5911557788944726e-05, - "loss": 4.7715, - "step": 44384 - }, - { - "epoch": 23.147327249022165, - "grad_norm": 1.5024774074554443, - "learning_rate": 5.5910552763819104e-05, - "loss": 5.3976, - "step": 44385 - }, - { - "epoch": 23.147848761408085, - "grad_norm": 1.5544353723526, - "learning_rate": 5.590954773869347e-05, - "loss": 4.6706, - "step": 44386 - }, - { - "epoch": 23.148370273794004, - "grad_norm": 1.6513954401016235, - "learning_rate": 5.5908542713567846e-05, - "loss": 5.4086, - "step": 44387 - }, - { - "epoch": 23.14889178617992, - "grad_norm": 1.8536309003829956, - "learning_rate": 5.590753768844221e-05, - "loss": 5.1364, - "step": 44388 - }, - { - "epoch": 23.14941329856584, - "grad_norm": 1.5676305294036865, - "learning_rate": 5.590653266331659e-05, - "loss": 5.2804, - "step": 44389 - }, - { - "epoch": 23.14993481095176, - "grad_norm": 1.4657251834869385, - "learning_rate": 5.590552763819096e-05, - "loss": 4.6826, - "step": 44390 - }, - { - "epoch": 23.15045632333768, - "grad_norm": 1.5766276121139526, - "learning_rate": 5.5904522613065324e-05, - "loss": 5.4326, - "step": 44391 - }, - { - "epoch": 23.150977835723598, - "grad_norm": 1.4632548093795776, - "learning_rate": 5.59035175879397e-05, - "loss": 4.9899, - "step": 44392 - }, - { - "epoch": 23.151499348109517, - "grad_norm": 1.5896002054214478, - "learning_rate": 5.5902512562814066e-05, - "loss": 5.1503, - "step": 44393 - }, - { - "epoch": 23.152020860495437, - "grad_norm": 1.4700225591659546, - "learning_rate": 5.5901507537688444e-05, - "loss": 5.0012, - "step": 44394 - }, - { - "epoch": 23.152542372881356, - "grad_norm": 1.5365785360336304, - "learning_rate": 5.5900502512562815e-05, - "loss": 5.3897, - "step": 44395 - }, - { - "epoch": 23.153063885267276, - "grad_norm": 1.547487497329712, - "learning_rate": 5.589949748743719e-05, - "loss": 5.2639, - "step": 44396 - }, - { - "epoch": 23.153585397653195, - "grad_norm": 1.551567792892456, - "learning_rate": 5.589849246231156e-05, - "loss": 5.1781, - "step": 44397 - }, - { - "epoch": 23.154106910039115, - "grad_norm": 1.5515793561935425, - "learning_rate": 5.5897487437185935e-05, - "loss": 5.0279, - "step": 44398 - }, - { - "epoch": 23.154628422425034, - "grad_norm": 1.5107226371765137, - "learning_rate": 5.5896482412060306e-05, - "loss": 5.2351, - "step": 44399 - }, - { - "epoch": 23.15514993481095, - "grad_norm": 1.6052024364471436, - "learning_rate": 5.5895477386934683e-05, - "loss": 4.8875, - "step": 44400 - }, - { - "epoch": 23.15567144719687, - "grad_norm": 1.7481673955917358, - "learning_rate": 5.589447236180905e-05, - "loss": 4.5725, - "step": 44401 - }, - { - "epoch": 23.15619295958279, - "grad_norm": 1.544325351715088, - "learning_rate": 5.5893467336683426e-05, - "loss": 5.1971, - "step": 44402 - }, - { - "epoch": 23.15671447196871, - "grad_norm": 1.5476394891738892, - "learning_rate": 5.589246231155779e-05, - "loss": 4.8267, - "step": 44403 - }, - { - "epoch": 23.157235984354628, - "grad_norm": 1.4956297874450684, - "learning_rate": 5.589145728643216e-05, - "loss": 5.4084, - "step": 44404 - }, - { - "epoch": 23.157757496740548, - "grad_norm": 1.6533668041229248, - "learning_rate": 5.589045226130654e-05, - "loss": 5.2848, - "step": 44405 - }, - { - "epoch": 23.158279009126467, - "grad_norm": 1.4833753108978271, - "learning_rate": 5.58894472361809e-05, - "loss": 5.2977, - "step": 44406 - }, - { - "epoch": 23.158800521512386, - "grad_norm": 1.5922671556472778, - "learning_rate": 5.588844221105528e-05, - "loss": 5.2135, - "step": 44407 - }, - { - "epoch": 23.159322033898306, - "grad_norm": 1.5757970809936523, - "learning_rate": 5.5887437185929645e-05, - "loss": 5.3662, - "step": 44408 - }, - { - "epoch": 23.159843546284225, - "grad_norm": 1.6806377172470093, - "learning_rate": 5.588643216080402e-05, - "loss": 4.703, - "step": 44409 - }, - { - "epoch": 23.160365058670145, - "grad_norm": 1.5572504997253418, - "learning_rate": 5.5885427135678394e-05, - "loss": 4.8505, - "step": 44410 - }, - { - "epoch": 23.160886571056064, - "grad_norm": 1.5185115337371826, - "learning_rate": 5.588442211055277e-05, - "loss": 5.1827, - "step": 44411 - }, - { - "epoch": 23.16140808344198, - "grad_norm": 1.4492722749710083, - "learning_rate": 5.5883417085427136e-05, - "loss": 5.7801, - "step": 44412 - }, - { - "epoch": 23.1619295958279, - "grad_norm": 1.540126085281372, - "learning_rate": 5.5882412060301514e-05, - "loss": 4.996, - "step": 44413 - }, - { - "epoch": 23.16245110821382, - "grad_norm": 1.493816614151001, - "learning_rate": 5.588140703517588e-05, - "loss": 5.26, - "step": 44414 - }, - { - "epoch": 23.16297262059974, - "grad_norm": 1.6985938549041748, - "learning_rate": 5.5880402010050256e-05, - "loss": 4.8891, - "step": 44415 - }, - { - "epoch": 23.163494132985658, - "grad_norm": 1.5733305215835571, - "learning_rate": 5.587939698492463e-05, - "loss": 5.2268, - "step": 44416 - }, - { - "epoch": 23.164015645371578, - "grad_norm": 1.4618334770202637, - "learning_rate": 5.587839195979899e-05, - "loss": 5.803, - "step": 44417 - }, - { - "epoch": 23.164537157757497, - "grad_norm": 1.5179309844970703, - "learning_rate": 5.587738693467337e-05, - "loss": 5.3328, - "step": 44418 - }, - { - "epoch": 23.165058670143416, - "grad_norm": 1.5752873420715332, - "learning_rate": 5.587638190954774e-05, - "loss": 5.2079, - "step": 44419 - }, - { - "epoch": 23.165580182529336, - "grad_norm": 1.6247870922088623, - "learning_rate": 5.587537688442212e-05, - "loss": 5.001, - "step": 44420 - }, - { - "epoch": 23.166101694915255, - "grad_norm": 1.529240608215332, - "learning_rate": 5.587437185929648e-05, - "loss": 5.4824, - "step": 44421 - }, - { - "epoch": 23.166623207301175, - "grad_norm": 1.5531929731369019, - "learning_rate": 5.587336683417086e-05, - "loss": 5.0454, - "step": 44422 - }, - { - "epoch": 23.167144719687094, - "grad_norm": 1.4715605974197388, - "learning_rate": 5.5872361809045224e-05, - "loss": 5.4019, - "step": 44423 - }, - { - "epoch": 23.16766623207301, - "grad_norm": 1.606982946395874, - "learning_rate": 5.58713567839196e-05, - "loss": 4.6081, - "step": 44424 - }, - { - "epoch": 23.16818774445893, - "grad_norm": 1.568732500076294, - "learning_rate": 5.587035175879397e-05, - "loss": 5.0155, - "step": 44425 - }, - { - "epoch": 23.16870925684485, - "grad_norm": 1.5307775735855103, - "learning_rate": 5.586934673366835e-05, - "loss": 5.669, - "step": 44426 - }, - { - "epoch": 23.16923076923077, - "grad_norm": 1.5937563180923462, - "learning_rate": 5.5868341708542715e-05, - "loss": 5.6476, - "step": 44427 - }, - { - "epoch": 23.169752281616688, - "grad_norm": 1.5149341821670532, - "learning_rate": 5.586733668341709e-05, - "loss": 4.7297, - "step": 44428 - }, - { - "epoch": 23.170273794002608, - "grad_norm": 1.595287561416626, - "learning_rate": 5.586633165829146e-05, - "loss": 5.3566, - "step": 44429 - }, - { - "epoch": 23.170795306388527, - "grad_norm": 1.510013222694397, - "learning_rate": 5.586532663316583e-05, - "loss": 5.151, - "step": 44430 - }, - { - "epoch": 23.171316818774446, - "grad_norm": 1.5315250158309937, - "learning_rate": 5.5864321608040206e-05, - "loss": 5.2568, - "step": 44431 - }, - { - "epoch": 23.171838331160366, - "grad_norm": 1.5053879022598267, - "learning_rate": 5.586331658291457e-05, - "loss": 5.1245, - "step": 44432 - }, - { - "epoch": 23.172359843546285, - "grad_norm": 1.530985713005066, - "learning_rate": 5.586231155778895e-05, - "loss": 5.2794, - "step": 44433 - }, - { - "epoch": 23.172881355932205, - "grad_norm": 1.506066918373108, - "learning_rate": 5.586130653266331e-05, - "loss": 5.4207, - "step": 44434 - }, - { - "epoch": 23.17340286831812, - "grad_norm": 1.517920970916748, - "learning_rate": 5.586030150753769e-05, - "loss": 5.116, - "step": 44435 - }, - { - "epoch": 23.17392438070404, - "grad_norm": 1.6365300416946411, - "learning_rate": 5.585929648241206e-05, - "loss": 5.2444, - "step": 44436 - }, - { - "epoch": 23.17444589308996, - "grad_norm": 1.533740758895874, - "learning_rate": 5.585829145728644e-05, - "loss": 5.4651, - "step": 44437 - }, - { - "epoch": 23.17496740547588, - "grad_norm": 1.5531818866729736, - "learning_rate": 5.5857286432160804e-05, - "loss": 5.0552, - "step": 44438 - }, - { - "epoch": 23.1754889178618, - "grad_norm": 1.5670596361160278, - "learning_rate": 5.585628140703518e-05, - "loss": 5.2571, - "step": 44439 - }, - { - "epoch": 23.176010430247718, - "grad_norm": 1.482662320137024, - "learning_rate": 5.585527638190955e-05, - "loss": 5.3953, - "step": 44440 - }, - { - "epoch": 23.176531942633638, - "grad_norm": 1.5225999355316162, - "learning_rate": 5.585427135678393e-05, - "loss": 5.5203, - "step": 44441 - }, - { - "epoch": 23.177053455019557, - "grad_norm": 1.5714598894119263, - "learning_rate": 5.5853266331658295e-05, - "loss": 5.2719, - "step": 44442 - }, - { - "epoch": 23.177574967405477, - "grad_norm": 1.7030725479125977, - "learning_rate": 5.585226130653266e-05, - "loss": 5.0575, - "step": 44443 - }, - { - "epoch": 23.178096479791396, - "grad_norm": 1.5519673824310303, - "learning_rate": 5.585125628140704e-05, - "loss": 4.963, - "step": 44444 - }, - { - "epoch": 23.178617992177315, - "grad_norm": 1.4521359205245972, - "learning_rate": 5.585025125628141e-05, - "loss": 5.2794, - "step": 44445 - }, - { - "epoch": 23.179139504563235, - "grad_norm": 1.5054372549057007, - "learning_rate": 5.5849246231155786e-05, - "loss": 5.0013, - "step": 44446 - }, - { - "epoch": 23.17966101694915, - "grad_norm": 1.4701169729232788, - "learning_rate": 5.584824120603015e-05, - "loss": 4.7161, - "step": 44447 - }, - { - "epoch": 23.18018252933507, - "grad_norm": 1.5844577550888062, - "learning_rate": 5.584723618090453e-05, - "loss": 5.6168, - "step": 44448 - }, - { - "epoch": 23.18070404172099, - "grad_norm": 1.6560114622116089, - "learning_rate": 5.584623115577889e-05, - "loss": 5.2664, - "step": 44449 - }, - { - "epoch": 23.18122555410691, - "grad_norm": 1.6769298315048218, - "learning_rate": 5.584522613065327e-05, - "loss": 5.5789, - "step": 44450 - }, - { - "epoch": 23.18174706649283, - "grad_norm": 1.581703782081604, - "learning_rate": 5.584422110552764e-05, - "loss": 5.6421, - "step": 44451 - }, - { - "epoch": 23.182268578878748, - "grad_norm": 1.4715690612792969, - "learning_rate": 5.584321608040202e-05, - "loss": 5.4105, - "step": 44452 - }, - { - "epoch": 23.182790091264668, - "grad_norm": 1.5141658782958984, - "learning_rate": 5.584221105527638e-05, - "loss": 4.9959, - "step": 44453 - }, - { - "epoch": 23.183311603650587, - "grad_norm": 1.4889776706695557, - "learning_rate": 5.584120603015076e-05, - "loss": 4.7765, - "step": 44454 - }, - { - "epoch": 23.183833116036507, - "grad_norm": 1.7164026498794556, - "learning_rate": 5.5840201005025125e-05, - "loss": 4.8498, - "step": 44455 - }, - { - "epoch": 23.184354628422426, - "grad_norm": 1.5603432655334473, - "learning_rate": 5.58391959798995e-05, - "loss": 4.6686, - "step": 44456 - }, - { - "epoch": 23.184876140808345, - "grad_norm": 1.697520136833191, - "learning_rate": 5.5838190954773874e-05, - "loss": 4.1408, - "step": 44457 - }, - { - "epoch": 23.185397653194265, - "grad_norm": 1.5019550323486328, - "learning_rate": 5.583718592964824e-05, - "loss": 5.6475, - "step": 44458 - }, - { - "epoch": 23.18591916558018, - "grad_norm": 1.5064529180526733, - "learning_rate": 5.5836180904522616e-05, - "loss": 5.7946, - "step": 44459 - }, - { - "epoch": 23.1864406779661, - "grad_norm": 1.7036986351013184, - "learning_rate": 5.583517587939698e-05, - "loss": 5.3093, - "step": 44460 - }, - { - "epoch": 23.18696219035202, - "grad_norm": 1.6460987329483032, - "learning_rate": 5.583417085427136e-05, - "loss": 5.4136, - "step": 44461 - }, - { - "epoch": 23.18748370273794, - "grad_norm": 1.5598304271697998, - "learning_rate": 5.583316582914573e-05, - "loss": 5.3218, - "step": 44462 - }, - { - "epoch": 23.18800521512386, - "grad_norm": 1.5708069801330566, - "learning_rate": 5.583216080402011e-05, - "loss": 4.8947, - "step": 44463 - }, - { - "epoch": 23.188526727509778, - "grad_norm": 1.5182583332061768, - "learning_rate": 5.583115577889447e-05, - "loss": 5.3932, - "step": 44464 - }, - { - "epoch": 23.189048239895698, - "grad_norm": 1.5008070468902588, - "learning_rate": 5.583015075376885e-05, - "loss": 5.3346, - "step": 44465 - }, - { - "epoch": 23.189569752281617, - "grad_norm": 1.5545766353607178, - "learning_rate": 5.582914572864322e-05, - "loss": 4.873, - "step": 44466 - }, - { - "epoch": 23.190091264667537, - "grad_norm": 1.611024022102356, - "learning_rate": 5.58281407035176e-05, - "loss": 5.224, - "step": 44467 - }, - { - "epoch": 23.190612777053456, - "grad_norm": 1.59691321849823, - "learning_rate": 5.582713567839196e-05, - "loss": 5.3982, - "step": 44468 - }, - { - "epoch": 23.191134289439375, - "grad_norm": 1.4951989650726318, - "learning_rate": 5.582613065326634e-05, - "loss": 5.0871, - "step": 44469 - }, - { - "epoch": 23.191655801825295, - "grad_norm": 1.46759033203125, - "learning_rate": 5.5825125628140704e-05, - "loss": 4.6307, - "step": 44470 - }, - { - "epoch": 23.19217731421121, - "grad_norm": 1.5732370615005493, - "learning_rate": 5.5824120603015076e-05, - "loss": 4.9385, - "step": 44471 - }, - { - "epoch": 23.19269882659713, - "grad_norm": 1.5035287141799927, - "learning_rate": 5.582311557788945e-05, - "loss": 5.1477, - "step": 44472 - }, - { - "epoch": 23.19322033898305, - "grad_norm": 1.554451584815979, - "learning_rate": 5.582211055276382e-05, - "loss": 5.5065, - "step": 44473 - }, - { - "epoch": 23.19374185136897, - "grad_norm": 1.5065127611160278, - "learning_rate": 5.5821105527638195e-05, - "loss": 5.5938, - "step": 44474 - }, - { - "epoch": 23.19426336375489, - "grad_norm": 1.661399483680725, - "learning_rate": 5.582010050251256e-05, - "loss": 5.2313, - "step": 44475 - }, - { - "epoch": 23.194784876140808, - "grad_norm": 1.7264972925186157, - "learning_rate": 5.581909547738694e-05, - "loss": 4.737, - "step": 44476 - }, - { - "epoch": 23.195306388526728, - "grad_norm": 1.5699737071990967, - "learning_rate": 5.581809045226131e-05, - "loss": 4.8192, - "step": 44477 - }, - { - "epoch": 23.195827900912647, - "grad_norm": 1.6524354219436646, - "learning_rate": 5.5817085427135686e-05, - "loss": 5.2304, - "step": 44478 - }, - { - "epoch": 23.196349413298567, - "grad_norm": 1.584173321723938, - "learning_rate": 5.581608040201005e-05, - "loss": 4.9043, - "step": 44479 - }, - { - "epoch": 23.196870925684486, - "grad_norm": 1.4571678638458252, - "learning_rate": 5.581507537688443e-05, - "loss": 4.0244, - "step": 44480 - }, - { - "epoch": 23.197392438070406, - "grad_norm": 1.5093358755111694, - "learning_rate": 5.581407035175879e-05, - "loss": 4.8153, - "step": 44481 - }, - { - "epoch": 23.197913950456325, - "grad_norm": 1.523886799812317, - "learning_rate": 5.581306532663317e-05, - "loss": 5.4279, - "step": 44482 - }, - { - "epoch": 23.19843546284224, - "grad_norm": 1.587496042251587, - "learning_rate": 5.581206030150754e-05, - "loss": 4.7321, - "step": 44483 - }, - { - "epoch": 23.19895697522816, - "grad_norm": 1.7043652534484863, - "learning_rate": 5.5811055276381906e-05, - "loss": 4.759, - "step": 44484 - }, - { - "epoch": 23.19947848761408, - "grad_norm": 1.701104760169983, - "learning_rate": 5.5810050251256284e-05, - "loss": 4.3673, - "step": 44485 - }, - { - "epoch": 23.2, - "grad_norm": 1.7615993022918701, - "learning_rate": 5.5809045226130655e-05, - "loss": 5.1632, - "step": 44486 - }, - { - "epoch": 23.20052151238592, - "grad_norm": 1.6056160926818848, - "learning_rate": 5.580804020100503e-05, - "loss": 4.8911, - "step": 44487 - }, - { - "epoch": 23.201043024771838, - "grad_norm": 1.561873197555542, - "learning_rate": 5.58070351758794e-05, - "loss": 5.2888, - "step": 44488 - }, - { - "epoch": 23.201564537157758, - "grad_norm": 1.5198965072631836, - "learning_rate": 5.5806030150753775e-05, - "loss": 4.9964, - "step": 44489 - }, - { - "epoch": 23.202086049543677, - "grad_norm": 1.4538987874984741, - "learning_rate": 5.580502512562814e-05, - "loss": 5.2676, - "step": 44490 - }, - { - "epoch": 23.202607561929597, - "grad_norm": 1.5355035066604614, - "learning_rate": 5.580402010050252e-05, - "loss": 5.211, - "step": 44491 - }, - { - "epoch": 23.203129074315516, - "grad_norm": 1.4796754121780396, - "learning_rate": 5.580301507537689e-05, - "loss": 5.4725, - "step": 44492 - }, - { - "epoch": 23.203650586701436, - "grad_norm": 1.5519839525222778, - "learning_rate": 5.5802010050251266e-05, - "loss": 5.4003, - "step": 44493 - }, - { - "epoch": 23.204172099087355, - "grad_norm": 1.517124056816101, - "learning_rate": 5.580100502512563e-05, - "loss": 5.5417, - "step": 44494 - }, - { - "epoch": 23.20469361147327, - "grad_norm": 1.506738305091858, - "learning_rate": 5.580000000000001e-05, - "loss": 5.5064, - "step": 44495 - }, - { - "epoch": 23.20521512385919, - "grad_norm": 1.531636357307434, - "learning_rate": 5.579899497487437e-05, - "loss": 5.1309, - "step": 44496 - }, - { - "epoch": 23.20573663624511, - "grad_norm": 1.5367642641067505, - "learning_rate": 5.579798994974874e-05, - "loss": 5.1897, - "step": 44497 - }, - { - "epoch": 23.20625814863103, - "grad_norm": 1.6637266874313354, - "learning_rate": 5.579698492462312e-05, - "loss": 4.9355, - "step": 44498 - }, - { - "epoch": 23.20677966101695, - "grad_norm": 1.6399122476577759, - "learning_rate": 5.5795979899497485e-05, - "loss": 5.1819, - "step": 44499 - }, - { - "epoch": 23.20730117340287, - "grad_norm": 1.6781072616577148, - "learning_rate": 5.579497487437186e-05, - "loss": 4.8043, - "step": 44500 - }, - { - "epoch": 23.207822685788788, - "grad_norm": 1.6894584894180298, - "learning_rate": 5.579396984924623e-05, - "loss": 4.621, - "step": 44501 - }, - { - "epoch": 23.208344198174707, - "grad_norm": 1.5262281894683838, - "learning_rate": 5.5792964824120605e-05, - "loss": 4.9033, - "step": 44502 - }, - { - "epoch": 23.208865710560627, - "grad_norm": 1.6599876880645752, - "learning_rate": 5.5791959798994976e-05, - "loss": 5.0329, - "step": 44503 - }, - { - "epoch": 23.209387222946546, - "grad_norm": 1.5819079875946045, - "learning_rate": 5.5790954773869354e-05, - "loss": 5.4085, - "step": 44504 - }, - { - "epoch": 23.209908735332466, - "grad_norm": 1.7141226530075073, - "learning_rate": 5.578994974874372e-05, - "loss": 5.1057, - "step": 44505 - }, - { - "epoch": 23.210430247718385, - "grad_norm": 1.5411040782928467, - "learning_rate": 5.5788944723618096e-05, - "loss": 5.3083, - "step": 44506 - }, - { - "epoch": 23.2109517601043, - "grad_norm": 1.556691288948059, - "learning_rate": 5.578793969849247e-05, - "loss": 5.7389, - "step": 44507 - }, - { - "epoch": 23.21147327249022, - "grad_norm": 1.6186749935150146, - "learning_rate": 5.5786934673366845e-05, - "loss": 4.9765, - "step": 44508 - }, - { - "epoch": 23.21199478487614, - "grad_norm": 1.490837574005127, - "learning_rate": 5.578592964824121e-05, - "loss": 5.3557, - "step": 44509 - }, - { - "epoch": 23.21251629726206, - "grad_norm": 1.619564175605774, - "learning_rate": 5.5784924623115574e-05, - "loss": 5.4258, - "step": 44510 - }, - { - "epoch": 23.21303780964798, - "grad_norm": 1.5919963121414185, - "learning_rate": 5.578391959798995e-05, - "loss": 5.5385, - "step": 44511 - }, - { - "epoch": 23.2135593220339, - "grad_norm": 1.6380077600479126, - "learning_rate": 5.578291457286432e-05, - "loss": 5.3102, - "step": 44512 - }, - { - "epoch": 23.214080834419818, - "grad_norm": 1.5933871269226074, - "learning_rate": 5.57819095477387e-05, - "loss": 4.7929, - "step": 44513 - }, - { - "epoch": 23.214602346805737, - "grad_norm": 1.5525047779083252, - "learning_rate": 5.5780904522613065e-05, - "loss": 5.0254, - "step": 44514 - }, - { - "epoch": 23.215123859191657, - "grad_norm": 1.5207644701004028, - "learning_rate": 5.577989949748744e-05, - "loss": 5.4492, - "step": 44515 - }, - { - "epoch": 23.215645371577576, - "grad_norm": 1.5767146348953247, - "learning_rate": 5.577889447236181e-05, - "loss": 5.7119, - "step": 44516 - }, - { - "epoch": 23.216166883963496, - "grad_norm": 1.5263234376907349, - "learning_rate": 5.5777889447236185e-05, - "loss": 5.3596, - "step": 44517 - }, - { - "epoch": 23.216688396349415, - "grad_norm": 1.5918166637420654, - "learning_rate": 5.5776884422110556e-05, - "loss": 5.184, - "step": 44518 - }, - { - "epoch": 23.21720990873533, - "grad_norm": 1.643988013267517, - "learning_rate": 5.5775879396984933e-05, - "loss": 4.4758, - "step": 44519 - }, - { - "epoch": 23.21773142112125, - "grad_norm": 1.5149116516113281, - "learning_rate": 5.57748743718593e-05, - "loss": 5.7059, - "step": 44520 - }, - { - "epoch": 23.21825293350717, - "grad_norm": 1.594844937324524, - "learning_rate": 5.5773869346733676e-05, - "loss": 5.2617, - "step": 44521 - }, - { - "epoch": 23.21877444589309, - "grad_norm": 1.43764066696167, - "learning_rate": 5.577286432160804e-05, - "loss": 5.2321, - "step": 44522 - }, - { - "epoch": 23.21929595827901, - "grad_norm": 1.481082797050476, - "learning_rate": 5.577185929648241e-05, - "loss": 5.6576, - "step": 44523 - }, - { - "epoch": 23.21981747066493, - "grad_norm": 1.6023286581039429, - "learning_rate": 5.577085427135679e-05, - "loss": 4.7092, - "step": 44524 - }, - { - "epoch": 23.220338983050848, - "grad_norm": 1.5849754810333252, - "learning_rate": 5.576984924623115e-05, - "loss": 5.287, - "step": 44525 - }, - { - "epoch": 23.220860495436767, - "grad_norm": 1.6785475015640259, - "learning_rate": 5.576884422110553e-05, - "loss": 5.2086, - "step": 44526 - }, - { - "epoch": 23.221382007822687, - "grad_norm": 1.5969887971878052, - "learning_rate": 5.57678391959799e-05, - "loss": 5.2268, - "step": 44527 - }, - { - "epoch": 23.221903520208606, - "grad_norm": 1.5361576080322266, - "learning_rate": 5.576683417085428e-05, - "loss": 5.1579, - "step": 44528 - }, - { - "epoch": 23.222425032594526, - "grad_norm": 1.6871178150177002, - "learning_rate": 5.5765829145728644e-05, - "loss": 4.6787, - "step": 44529 - }, - { - "epoch": 23.22294654498044, - "grad_norm": 1.7713544368743896, - "learning_rate": 5.576482412060302e-05, - "loss": 4.4551, - "step": 44530 - }, - { - "epoch": 23.22346805736636, - "grad_norm": 1.6521567106246948, - "learning_rate": 5.5763819095477386e-05, - "loss": 5.434, - "step": 44531 - }, - { - "epoch": 23.22398956975228, - "grad_norm": 1.587735652923584, - "learning_rate": 5.5762814070351764e-05, - "loss": 4.6837, - "step": 44532 - }, - { - "epoch": 23.2245110821382, - "grad_norm": 1.5835282802581787, - "learning_rate": 5.5761809045226135e-05, - "loss": 5.4976, - "step": 44533 - }, - { - "epoch": 23.22503259452412, - "grad_norm": 1.5470564365386963, - "learning_rate": 5.576080402010051e-05, - "loss": 4.704, - "step": 44534 - }, - { - "epoch": 23.22555410691004, - "grad_norm": 1.5559953451156616, - "learning_rate": 5.575979899497488e-05, - "loss": 5.1195, - "step": 44535 - }, - { - "epoch": 23.22607561929596, - "grad_norm": 1.6401594877243042, - "learning_rate": 5.575879396984924e-05, - "loss": 5.3782, - "step": 44536 - }, - { - "epoch": 23.226597131681878, - "grad_norm": 1.5583429336547852, - "learning_rate": 5.575778894472362e-05, - "loss": 5.1234, - "step": 44537 - }, - { - "epoch": 23.227118644067797, - "grad_norm": 1.5059813261032104, - "learning_rate": 5.575678391959799e-05, - "loss": 4.9452, - "step": 44538 - }, - { - "epoch": 23.227640156453717, - "grad_norm": 1.5531635284423828, - "learning_rate": 5.575577889447237e-05, - "loss": 5.2235, - "step": 44539 - }, - { - "epoch": 23.228161668839636, - "grad_norm": 1.48867666721344, - "learning_rate": 5.575477386934673e-05, - "loss": 5.2885, - "step": 44540 - }, - { - "epoch": 23.228683181225556, - "grad_norm": 1.5812493562698364, - "learning_rate": 5.575376884422111e-05, - "loss": 5.0846, - "step": 44541 - }, - { - "epoch": 23.22920469361147, - "grad_norm": 1.6124461889266968, - "learning_rate": 5.5752763819095474e-05, - "loss": 5.5369, - "step": 44542 - }, - { - "epoch": 23.22972620599739, - "grad_norm": 1.521266222000122, - "learning_rate": 5.575175879396985e-05, - "loss": 5.0622, - "step": 44543 - }, - { - "epoch": 23.23024771838331, - "grad_norm": 1.5308573246002197, - "learning_rate": 5.575075376884422e-05, - "loss": 5.4341, - "step": 44544 - }, - { - "epoch": 23.23076923076923, - "grad_norm": 1.633890151977539, - "learning_rate": 5.57497487437186e-05, - "loss": 5.1463, - "step": 44545 - }, - { - "epoch": 23.23129074315515, - "grad_norm": 1.4661734104156494, - "learning_rate": 5.5748743718592965e-05, - "loss": 5.1726, - "step": 44546 - }, - { - "epoch": 23.23181225554107, - "grad_norm": 1.6232037544250488, - "learning_rate": 5.574773869346734e-05, - "loss": 5.1109, - "step": 44547 - }, - { - "epoch": 23.23233376792699, - "grad_norm": 1.5435011386871338, - "learning_rate": 5.574673366834171e-05, - "loss": 5.6018, - "step": 44548 - }, - { - "epoch": 23.232855280312908, - "grad_norm": 1.5316187143325806, - "learning_rate": 5.5745728643216085e-05, - "loss": 4.92, - "step": 44549 - }, - { - "epoch": 23.233376792698827, - "grad_norm": 1.7112205028533936, - "learning_rate": 5.5744723618090456e-05, - "loss": 5.3565, - "step": 44550 - }, - { - "epoch": 23.233898305084747, - "grad_norm": 1.6411887407302856, - "learning_rate": 5.574371859296482e-05, - "loss": 4.9356, - "step": 44551 - }, - { - "epoch": 23.234419817470666, - "grad_norm": 1.5089813470840454, - "learning_rate": 5.57427135678392e-05, - "loss": 4.9833, - "step": 44552 - }, - { - "epoch": 23.234941329856586, - "grad_norm": 1.590040922164917, - "learning_rate": 5.574170854271357e-05, - "loss": 4.9826, - "step": 44553 - }, - { - "epoch": 23.2354628422425, - "grad_norm": 1.4724701642990112, - "learning_rate": 5.574070351758795e-05, - "loss": 5.2234, - "step": 44554 - }, - { - "epoch": 23.23598435462842, - "grad_norm": 1.4800435304641724, - "learning_rate": 5.573969849246231e-05, - "loss": 5.4086, - "step": 44555 - }, - { - "epoch": 23.23650586701434, - "grad_norm": 1.6484026908874512, - "learning_rate": 5.573869346733669e-05, - "loss": 5.2167, - "step": 44556 - }, - { - "epoch": 23.23702737940026, - "grad_norm": 1.492709755897522, - "learning_rate": 5.5737688442211054e-05, - "loss": 5.3841, - "step": 44557 - }, - { - "epoch": 23.23754889178618, - "grad_norm": 1.666269063949585, - "learning_rate": 5.573668341708543e-05, - "loss": 4.9408, - "step": 44558 - }, - { - "epoch": 23.2380704041721, - "grad_norm": 1.4974778890609741, - "learning_rate": 5.57356783919598e-05, - "loss": 5.5913, - "step": 44559 - }, - { - "epoch": 23.23859191655802, - "grad_norm": 1.5328329801559448, - "learning_rate": 5.573467336683418e-05, - "loss": 5.6629, - "step": 44560 - }, - { - "epoch": 23.239113428943938, - "grad_norm": 1.5449854135513306, - "learning_rate": 5.5733668341708545e-05, - "loss": 5.3747, - "step": 44561 - }, - { - "epoch": 23.239634941329857, - "grad_norm": 1.439741611480713, - "learning_rate": 5.573266331658292e-05, - "loss": 5.5506, - "step": 44562 - }, - { - "epoch": 23.240156453715777, - "grad_norm": 1.5875928401947021, - "learning_rate": 5.573165829145729e-05, - "loss": 5.5345, - "step": 44563 - }, - { - "epoch": 23.240677966101696, - "grad_norm": 1.6554032564163208, - "learning_rate": 5.573065326633166e-05, - "loss": 5.0586, - "step": 44564 - }, - { - "epoch": 23.241199478487616, - "grad_norm": 1.599313497543335, - "learning_rate": 5.5729648241206036e-05, - "loss": 5.2891, - "step": 44565 - }, - { - "epoch": 23.24172099087353, - "grad_norm": 1.510029911994934, - "learning_rate": 5.57286432160804e-05, - "loss": 5.3181, - "step": 44566 - }, - { - "epoch": 23.24224250325945, - "grad_norm": 1.562924861907959, - "learning_rate": 5.572763819095478e-05, - "loss": 5.3408, - "step": 44567 - }, - { - "epoch": 23.24276401564537, - "grad_norm": 1.5319468975067139, - "learning_rate": 5.572663316582914e-05, - "loss": 5.6065, - "step": 44568 - }, - { - "epoch": 23.24328552803129, - "grad_norm": 1.5445629358291626, - "learning_rate": 5.572562814070352e-05, - "loss": 4.9126, - "step": 44569 - }, - { - "epoch": 23.24380704041721, - "grad_norm": 1.4859198331832886, - "learning_rate": 5.572462311557789e-05, - "loss": 5.2278, - "step": 44570 - }, - { - "epoch": 23.24432855280313, - "grad_norm": 1.520655870437622, - "learning_rate": 5.572361809045227e-05, - "loss": 5.4659, - "step": 44571 - }, - { - "epoch": 23.24485006518905, - "grad_norm": 1.532092809677124, - "learning_rate": 5.572261306532663e-05, - "loss": 5.0158, - "step": 44572 - }, - { - "epoch": 23.245371577574968, - "grad_norm": 1.5513187646865845, - "learning_rate": 5.572160804020101e-05, - "loss": 5.4661, - "step": 44573 - }, - { - "epoch": 23.245893089960887, - "grad_norm": 1.5081251859664917, - "learning_rate": 5.572060301507538e-05, - "loss": 5.2861, - "step": 44574 - }, - { - "epoch": 23.246414602346807, - "grad_norm": 1.651164174079895, - "learning_rate": 5.571959798994976e-05, - "loss": 5.1333, - "step": 44575 - }, - { - "epoch": 23.246936114732726, - "grad_norm": 1.5426654815673828, - "learning_rate": 5.5718592964824124e-05, - "loss": 5.3585, - "step": 44576 - }, - { - "epoch": 23.247457627118646, - "grad_norm": 1.5087411403656006, - "learning_rate": 5.571758793969849e-05, - "loss": 5.3314, - "step": 44577 - }, - { - "epoch": 23.24797913950456, - "grad_norm": 1.5931810140609741, - "learning_rate": 5.5716582914572866e-05, - "loss": 5.3147, - "step": 44578 - }, - { - "epoch": 23.24850065189048, - "grad_norm": 1.5942541360855103, - "learning_rate": 5.571557788944724e-05, - "loss": 4.936, - "step": 44579 - }, - { - "epoch": 23.2490221642764, - "grad_norm": 1.5566232204437256, - "learning_rate": 5.5714572864321615e-05, - "loss": 5.4375, - "step": 44580 - }, - { - "epoch": 23.24954367666232, - "grad_norm": 1.5772749185562134, - "learning_rate": 5.571356783919598e-05, - "loss": 5.3146, - "step": 44581 - }, - { - "epoch": 23.25006518904824, - "grad_norm": 1.6166068315505981, - "learning_rate": 5.571256281407036e-05, - "loss": 4.1991, - "step": 44582 - }, - { - "epoch": 23.25058670143416, - "grad_norm": 1.603832483291626, - "learning_rate": 5.571155778894472e-05, - "loss": 5.1745, - "step": 44583 - }, - { - "epoch": 23.25110821382008, - "grad_norm": 1.4461263418197632, - "learning_rate": 5.57105527638191e-05, - "loss": 5.3029, - "step": 44584 - }, - { - "epoch": 23.251629726205998, - "grad_norm": 1.4724007844924927, - "learning_rate": 5.570954773869347e-05, - "loss": 5.3787, - "step": 44585 - }, - { - "epoch": 23.252151238591917, - "grad_norm": 1.7117221355438232, - "learning_rate": 5.570854271356785e-05, - "loss": 4.7482, - "step": 44586 - }, - { - "epoch": 23.252672750977837, - "grad_norm": 1.4404044151306152, - "learning_rate": 5.570753768844221e-05, - "loss": 5.7402, - "step": 44587 - }, - { - "epoch": 23.253194263363756, - "grad_norm": 1.5791313648223877, - "learning_rate": 5.570653266331659e-05, - "loss": 5.2975, - "step": 44588 - }, - { - "epoch": 23.253715775749676, - "grad_norm": 1.4962552785873413, - "learning_rate": 5.5705527638190954e-05, - "loss": 4.9357, - "step": 44589 - }, - { - "epoch": 23.25423728813559, - "grad_norm": 1.474942684173584, - "learning_rate": 5.5704522613065326e-05, - "loss": 5.0679, - "step": 44590 - }, - { - "epoch": 23.25475880052151, - "grad_norm": 1.4585527181625366, - "learning_rate": 5.57035175879397e-05, - "loss": 5.331, - "step": 44591 - }, - { - "epoch": 23.25528031290743, - "grad_norm": 1.5089999437332153, - "learning_rate": 5.570251256281407e-05, - "loss": 5.6188, - "step": 44592 - }, - { - "epoch": 23.25580182529335, - "grad_norm": 1.4410741329193115, - "learning_rate": 5.5701507537688445e-05, - "loss": 5.8025, - "step": 44593 - }, - { - "epoch": 23.25632333767927, - "grad_norm": 1.4977535009384155, - "learning_rate": 5.5700502512562817e-05, - "loss": 5.4563, - "step": 44594 - }, - { - "epoch": 23.25684485006519, - "grad_norm": 1.5469807386398315, - "learning_rate": 5.5699497487437194e-05, - "loss": 5.3378, - "step": 44595 - }, - { - "epoch": 23.25736636245111, - "grad_norm": 1.6360353231430054, - "learning_rate": 5.569849246231156e-05, - "loss": 5.3596, - "step": 44596 - }, - { - "epoch": 23.257887874837028, - "grad_norm": 1.6265379190444946, - "learning_rate": 5.5697487437185936e-05, - "loss": 5.3563, - "step": 44597 - }, - { - "epoch": 23.258409387222947, - "grad_norm": 1.667091965675354, - "learning_rate": 5.56964824120603e-05, - "loss": 4.49, - "step": 44598 - }, - { - "epoch": 23.258930899608867, - "grad_norm": 1.6011992692947388, - "learning_rate": 5.569547738693468e-05, - "loss": 4.8326, - "step": 44599 - }, - { - "epoch": 23.259452411994786, - "grad_norm": 1.5255705118179321, - "learning_rate": 5.569447236180905e-05, - "loss": 5.4961, - "step": 44600 - }, - { - "epoch": 23.259973924380706, - "grad_norm": 1.6424628496170044, - "learning_rate": 5.569346733668343e-05, - "loss": 4.845, - "step": 44601 - }, - { - "epoch": 23.26049543676662, - "grad_norm": 1.6299266815185547, - "learning_rate": 5.569246231155779e-05, - "loss": 4.9694, - "step": 44602 - }, - { - "epoch": 23.26101694915254, - "grad_norm": 1.5316828489303589, - "learning_rate": 5.5691457286432156e-05, - "loss": 5.757, - "step": 44603 - }, - { - "epoch": 23.26153846153846, - "grad_norm": 1.5627888441085815, - "learning_rate": 5.5690452261306534e-05, - "loss": 4.6872, - "step": 44604 - }, - { - "epoch": 23.26205997392438, - "grad_norm": 1.5855426788330078, - "learning_rate": 5.5689447236180905e-05, - "loss": 5.2008, - "step": 44605 - }, - { - "epoch": 23.2625814863103, - "grad_norm": 1.5990697145462036, - "learning_rate": 5.568844221105528e-05, - "loss": 4.9874, - "step": 44606 - }, - { - "epoch": 23.26310299869622, - "grad_norm": 1.4784404039382935, - "learning_rate": 5.568743718592965e-05, - "loss": 5.4411, - "step": 44607 - }, - { - "epoch": 23.26362451108214, - "grad_norm": 1.7317475080490112, - "learning_rate": 5.5686432160804025e-05, - "loss": 4.9679, - "step": 44608 - }, - { - "epoch": 23.264146023468058, - "grad_norm": 1.6227879524230957, - "learning_rate": 5.568542713567839e-05, - "loss": 4.8079, - "step": 44609 - }, - { - "epoch": 23.264667535853977, - "grad_norm": 1.6126012802124023, - "learning_rate": 5.568442211055277e-05, - "loss": 4.8223, - "step": 44610 - }, - { - "epoch": 23.265189048239897, - "grad_norm": 1.4931504726409912, - "learning_rate": 5.568341708542714e-05, - "loss": 5.1269, - "step": 44611 - }, - { - "epoch": 23.265710560625816, - "grad_norm": 1.596244215965271, - "learning_rate": 5.5682412060301516e-05, - "loss": 5.1975, - "step": 44612 - }, - { - "epoch": 23.266232073011736, - "grad_norm": 1.5338290929794312, - "learning_rate": 5.568140703517588e-05, - "loss": 4.8642, - "step": 44613 - }, - { - "epoch": 23.26675358539765, - "grad_norm": 1.5319815874099731, - "learning_rate": 5.568040201005026e-05, - "loss": 5.4331, - "step": 44614 - }, - { - "epoch": 23.26727509778357, - "grad_norm": 1.5527443885803223, - "learning_rate": 5.567939698492462e-05, - "loss": 4.3405, - "step": 44615 - }, - { - "epoch": 23.26779661016949, - "grad_norm": 1.6191926002502441, - "learning_rate": 5.567839195979899e-05, - "loss": 5.5103, - "step": 44616 - }, - { - "epoch": 23.26831812255541, - "grad_norm": 1.5476422309875488, - "learning_rate": 5.567738693467337e-05, - "loss": 5.1155, - "step": 44617 - }, - { - "epoch": 23.26883963494133, - "grad_norm": 1.5693473815917969, - "learning_rate": 5.5676381909547735e-05, - "loss": 5.1467, - "step": 44618 - }, - { - "epoch": 23.26936114732725, - "grad_norm": 1.503140926361084, - "learning_rate": 5.567537688442211e-05, - "loss": 5.2797, - "step": 44619 - }, - { - "epoch": 23.26988265971317, - "grad_norm": 1.5679562091827393, - "learning_rate": 5.5674371859296484e-05, - "loss": 5.2887, - "step": 44620 - }, - { - "epoch": 23.270404172099088, - "grad_norm": 1.5854188203811646, - "learning_rate": 5.567336683417086e-05, - "loss": 5.3159, - "step": 44621 - }, - { - "epoch": 23.270925684485007, - "grad_norm": 1.772110104560852, - "learning_rate": 5.5672361809045226e-05, - "loss": 5.1824, - "step": 44622 - }, - { - "epoch": 23.271447196870927, - "grad_norm": 1.6210100650787354, - "learning_rate": 5.5671356783919604e-05, - "loss": 5.0699, - "step": 44623 - }, - { - "epoch": 23.271968709256846, - "grad_norm": 1.5166345834732056, - "learning_rate": 5.567035175879397e-05, - "loss": 5.3716, - "step": 44624 - }, - { - "epoch": 23.272490221642762, - "grad_norm": 1.5097739696502686, - "learning_rate": 5.5669346733668346e-05, - "loss": 5.1122, - "step": 44625 - }, - { - "epoch": 23.27301173402868, - "grad_norm": 1.5546506643295288, - "learning_rate": 5.566834170854272e-05, - "loss": 5.1947, - "step": 44626 - }, - { - "epoch": 23.2735332464146, - "grad_norm": 1.5289373397827148, - "learning_rate": 5.5667336683417095e-05, - "loss": 4.4029, - "step": 44627 - }, - { - "epoch": 23.27405475880052, - "grad_norm": 1.5476421117782593, - "learning_rate": 5.566633165829146e-05, - "loss": 5.4249, - "step": 44628 - }, - { - "epoch": 23.27457627118644, - "grad_norm": 1.5300356149673462, - "learning_rate": 5.5665326633165824e-05, - "loss": 5.2788, - "step": 44629 - }, - { - "epoch": 23.27509778357236, - "grad_norm": 1.485335350036621, - "learning_rate": 5.56643216080402e-05, - "loss": 5.0492, - "step": 44630 - }, - { - "epoch": 23.27561929595828, - "grad_norm": 1.605517029762268, - "learning_rate": 5.566331658291457e-05, - "loss": 4.7811, - "step": 44631 - }, - { - "epoch": 23.2761408083442, - "grad_norm": 1.5763812065124512, - "learning_rate": 5.566231155778895e-05, - "loss": 5.2471, - "step": 44632 - }, - { - "epoch": 23.276662320730118, - "grad_norm": 1.4646650552749634, - "learning_rate": 5.5661306532663315e-05, - "loss": 4.9819, - "step": 44633 - }, - { - "epoch": 23.277183833116037, - "grad_norm": 1.566563367843628, - "learning_rate": 5.566030150753769e-05, - "loss": 5.1293, - "step": 44634 - }, - { - "epoch": 23.277705345501957, - "grad_norm": 1.5506607294082642, - "learning_rate": 5.565929648241206e-05, - "loss": 5.1751, - "step": 44635 - }, - { - "epoch": 23.278226857887876, - "grad_norm": 1.5204654932022095, - "learning_rate": 5.5658291457286435e-05, - "loss": 5.2847, - "step": 44636 - }, - { - "epoch": 23.278748370273792, - "grad_norm": 1.6279391050338745, - "learning_rate": 5.5657286432160806e-05, - "loss": 5.4812, - "step": 44637 - }, - { - "epoch": 23.27926988265971, - "grad_norm": 1.4877694845199585, - "learning_rate": 5.5656281407035183e-05, - "loss": 5.377, - "step": 44638 - }, - { - "epoch": 23.27979139504563, - "grad_norm": 1.5620474815368652, - "learning_rate": 5.565527638190955e-05, - "loss": 5.6881, - "step": 44639 - }, - { - "epoch": 23.28031290743155, - "grad_norm": 1.641066551208496, - "learning_rate": 5.5654271356783926e-05, - "loss": 5.1048, - "step": 44640 - }, - { - "epoch": 23.28083441981747, - "grad_norm": 1.6184463500976562, - "learning_rate": 5.5653266331658297e-05, - "loss": 5.1682, - "step": 44641 - }, - { - "epoch": 23.28135593220339, - "grad_norm": 1.4685239791870117, - "learning_rate": 5.5652261306532674e-05, - "loss": 5.3044, - "step": 44642 - }, - { - "epoch": 23.28187744458931, - "grad_norm": 1.4906600713729858, - "learning_rate": 5.565125628140704e-05, - "loss": 5.6824, - "step": 44643 - }, - { - "epoch": 23.28239895697523, - "grad_norm": 1.5627871751785278, - "learning_rate": 5.56502512562814e-05, - "loss": 5.3077, - "step": 44644 - }, - { - "epoch": 23.282920469361148, - "grad_norm": 1.6152735948562622, - "learning_rate": 5.564924623115578e-05, - "loss": 4.8679, - "step": 44645 - }, - { - "epoch": 23.283441981747067, - "grad_norm": 1.6646863222122192, - "learning_rate": 5.564824120603015e-05, - "loss": 4.7536, - "step": 44646 - }, - { - "epoch": 23.283963494132987, - "grad_norm": 1.6920599937438965, - "learning_rate": 5.564723618090453e-05, - "loss": 4.7927, - "step": 44647 - }, - { - "epoch": 23.284485006518906, - "grad_norm": 1.538078784942627, - "learning_rate": 5.5646231155778894e-05, - "loss": 5.5078, - "step": 44648 - }, - { - "epoch": 23.285006518904822, - "grad_norm": 1.4506512880325317, - "learning_rate": 5.564522613065327e-05, - "loss": 5.5572, - "step": 44649 - }, - { - "epoch": 23.285528031290742, - "grad_norm": 1.5637023448944092, - "learning_rate": 5.5644221105527636e-05, - "loss": 4.9838, - "step": 44650 - }, - { - "epoch": 23.28604954367666, - "grad_norm": 1.6314842700958252, - "learning_rate": 5.5643216080402014e-05, - "loss": 5.2783, - "step": 44651 - }, - { - "epoch": 23.28657105606258, - "grad_norm": 1.5503811836242676, - "learning_rate": 5.5642211055276385e-05, - "loss": 5.4445, - "step": 44652 - }, - { - "epoch": 23.2870925684485, - "grad_norm": 1.6422772407531738, - "learning_rate": 5.564120603015076e-05, - "loss": 5.585, - "step": 44653 - }, - { - "epoch": 23.28761408083442, - "grad_norm": 1.581883430480957, - "learning_rate": 5.564020100502513e-05, - "loss": 5.385, - "step": 44654 - }, - { - "epoch": 23.28813559322034, - "grad_norm": 1.5608153343200684, - "learning_rate": 5.5639195979899505e-05, - "loss": 4.7424, - "step": 44655 - }, - { - "epoch": 23.28865710560626, - "grad_norm": 1.4820151329040527, - "learning_rate": 5.563819095477387e-05, - "loss": 5.5058, - "step": 44656 - }, - { - "epoch": 23.289178617992178, - "grad_norm": 1.5099780559539795, - "learning_rate": 5.563718592964824e-05, - "loss": 5.4671, - "step": 44657 - }, - { - "epoch": 23.289700130378097, - "grad_norm": 1.8262561559677124, - "learning_rate": 5.563618090452262e-05, - "loss": 4.6794, - "step": 44658 - }, - { - "epoch": 23.290221642764017, - "grad_norm": 1.477083444595337, - "learning_rate": 5.563517587939698e-05, - "loss": 5.5808, - "step": 44659 - }, - { - "epoch": 23.290743155149936, - "grad_norm": 1.5138734579086304, - "learning_rate": 5.563417085427136e-05, - "loss": 5.489, - "step": 44660 - }, - { - "epoch": 23.291264667535852, - "grad_norm": 1.5622485876083374, - "learning_rate": 5.563316582914573e-05, - "loss": 5.0328, - "step": 44661 - }, - { - "epoch": 23.291786179921772, - "grad_norm": 1.637935996055603, - "learning_rate": 5.563216080402011e-05, - "loss": 5.0845, - "step": 44662 - }, - { - "epoch": 23.29230769230769, - "grad_norm": 1.5235352516174316, - "learning_rate": 5.563115577889447e-05, - "loss": 5.4494, - "step": 44663 - }, - { - "epoch": 23.29282920469361, - "grad_norm": 1.6391088962554932, - "learning_rate": 5.563015075376885e-05, - "loss": 5.3723, - "step": 44664 - }, - { - "epoch": 23.29335071707953, - "grad_norm": 1.5789251327514648, - "learning_rate": 5.5629145728643215e-05, - "loss": 5.5773, - "step": 44665 - }, - { - "epoch": 23.29387222946545, - "grad_norm": 1.515793800354004, - "learning_rate": 5.562814070351759e-05, - "loss": 5.2731, - "step": 44666 - }, - { - "epoch": 23.29439374185137, - "grad_norm": 1.5366852283477783, - "learning_rate": 5.5627135678391964e-05, - "loss": 4.8907, - "step": 44667 - }, - { - "epoch": 23.29491525423729, - "grad_norm": 1.5618301630020142, - "learning_rate": 5.562613065326634e-05, - "loss": 5.3766, - "step": 44668 - }, - { - "epoch": 23.295436766623208, - "grad_norm": 1.4979588985443115, - "learning_rate": 5.5625125628140706e-05, - "loss": 5.4347, - "step": 44669 - }, - { - "epoch": 23.295958279009128, - "grad_norm": 1.5338482856750488, - "learning_rate": 5.562412060301507e-05, - "loss": 5.4531, - "step": 44670 - }, - { - "epoch": 23.296479791395047, - "grad_norm": 1.5686256885528564, - "learning_rate": 5.562311557788945e-05, - "loss": 5.1434, - "step": 44671 - }, - { - "epoch": 23.297001303780966, - "grad_norm": 1.5425318479537964, - "learning_rate": 5.562211055276382e-05, - "loss": 5.2741, - "step": 44672 - }, - { - "epoch": 23.297522816166882, - "grad_norm": 1.6043047904968262, - "learning_rate": 5.56211055276382e-05, - "loss": 5.4325, - "step": 44673 - }, - { - "epoch": 23.298044328552802, - "grad_norm": 1.3888049125671387, - "learning_rate": 5.562010050251256e-05, - "loss": 5.5591, - "step": 44674 - }, - { - "epoch": 23.29856584093872, - "grad_norm": 1.5566939115524292, - "learning_rate": 5.561909547738694e-05, - "loss": 5.1854, - "step": 44675 - }, - { - "epoch": 23.29908735332464, - "grad_norm": 1.5312343835830688, - "learning_rate": 5.5618090452261304e-05, - "loss": 5.6062, - "step": 44676 - }, - { - "epoch": 23.29960886571056, - "grad_norm": 1.7183352708816528, - "learning_rate": 5.561708542713568e-05, - "loss": 4.8673, - "step": 44677 - }, - { - "epoch": 23.30013037809648, - "grad_norm": 1.5134196281433105, - "learning_rate": 5.561608040201005e-05, - "loss": 5.7735, - "step": 44678 - }, - { - "epoch": 23.3006518904824, - "grad_norm": 1.5826233625411987, - "learning_rate": 5.561507537688443e-05, - "loss": 5.0083, - "step": 44679 - }, - { - "epoch": 23.30117340286832, - "grad_norm": 1.5253071784973145, - "learning_rate": 5.5614070351758795e-05, - "loss": 4.9844, - "step": 44680 - }, - { - "epoch": 23.301694915254238, - "grad_norm": 1.4610686302185059, - "learning_rate": 5.561306532663317e-05, - "loss": 5.1329, - "step": 44681 - }, - { - "epoch": 23.302216427640158, - "grad_norm": 1.5844380855560303, - "learning_rate": 5.5612060301507544e-05, - "loss": 5.3277, - "step": 44682 - }, - { - "epoch": 23.302737940026077, - "grad_norm": 1.5018259286880493, - "learning_rate": 5.561105527638191e-05, - "loss": 5.5396, - "step": 44683 - }, - { - "epoch": 23.303259452411996, - "grad_norm": 1.47543466091156, - "learning_rate": 5.5610050251256286e-05, - "loss": 5.3904, - "step": 44684 - }, - { - "epoch": 23.303780964797912, - "grad_norm": 1.67115318775177, - "learning_rate": 5.560904522613065e-05, - "loss": 4.7273, - "step": 44685 - }, - { - "epoch": 23.304302477183832, - "grad_norm": 1.5946059226989746, - "learning_rate": 5.560804020100503e-05, - "loss": 5.6025, - "step": 44686 - }, - { - "epoch": 23.30482398956975, - "grad_norm": 1.4989362955093384, - "learning_rate": 5.56070351758794e-05, - "loss": 5.676, - "step": 44687 - }, - { - "epoch": 23.30534550195567, - "grad_norm": 1.6304326057434082, - "learning_rate": 5.560603015075378e-05, - "loss": 5.4481, - "step": 44688 - }, - { - "epoch": 23.30586701434159, - "grad_norm": 1.501067876815796, - "learning_rate": 5.560502512562814e-05, - "loss": 5.7041, - "step": 44689 - }, - { - "epoch": 23.30638852672751, - "grad_norm": 1.65953528881073, - "learning_rate": 5.560402010050252e-05, - "loss": 5.273, - "step": 44690 - }, - { - "epoch": 23.30691003911343, - "grad_norm": 1.5365334749221802, - "learning_rate": 5.560301507537688e-05, - "loss": 5.1931, - "step": 44691 - }, - { - "epoch": 23.30743155149935, - "grad_norm": 1.5538578033447266, - "learning_rate": 5.560201005025126e-05, - "loss": 5.3223, - "step": 44692 - }, - { - "epoch": 23.307953063885268, - "grad_norm": 1.6225897073745728, - "learning_rate": 5.560100502512563e-05, - "loss": 5.1897, - "step": 44693 - }, - { - "epoch": 23.308474576271188, - "grad_norm": 1.5602236986160278, - "learning_rate": 5.560000000000001e-05, - "loss": 5.1927, - "step": 44694 - }, - { - "epoch": 23.308996088657107, - "grad_norm": 1.4370789527893066, - "learning_rate": 5.5598994974874374e-05, - "loss": 5.9174, - "step": 44695 - }, - { - "epoch": 23.309517601043027, - "grad_norm": 1.5780892372131348, - "learning_rate": 5.559798994974874e-05, - "loss": 5.2164, - "step": 44696 - }, - { - "epoch": 23.310039113428942, - "grad_norm": 1.5019656419754028, - "learning_rate": 5.5596984924623116e-05, - "loss": 5.6995, - "step": 44697 - }, - { - "epoch": 23.310560625814862, - "grad_norm": 1.533696174621582, - "learning_rate": 5.559597989949749e-05, - "loss": 5.4244, - "step": 44698 - }, - { - "epoch": 23.31108213820078, - "grad_norm": 1.6366585493087769, - "learning_rate": 5.5594974874371865e-05, - "loss": 5.4108, - "step": 44699 - }, - { - "epoch": 23.3116036505867, - "grad_norm": 1.603880524635315, - "learning_rate": 5.559396984924623e-05, - "loss": 5.401, - "step": 44700 - }, - { - "epoch": 23.31212516297262, - "grad_norm": 1.5485318899154663, - "learning_rate": 5.559296482412061e-05, - "loss": 5.1255, - "step": 44701 - }, - { - "epoch": 23.31264667535854, - "grad_norm": 1.4589794874191284, - "learning_rate": 5.559195979899497e-05, - "loss": 5.0422, - "step": 44702 - }, - { - "epoch": 23.31316818774446, - "grad_norm": 1.582326889038086, - "learning_rate": 5.559095477386935e-05, - "loss": 5.1916, - "step": 44703 - }, - { - "epoch": 23.31368970013038, - "grad_norm": 1.6801213026046753, - "learning_rate": 5.558994974874372e-05, - "loss": 4.6877, - "step": 44704 - }, - { - "epoch": 23.314211212516298, - "grad_norm": 1.6560171842575073, - "learning_rate": 5.55889447236181e-05, - "loss": 5.0668, - "step": 44705 - }, - { - "epoch": 23.314732724902218, - "grad_norm": 1.5631471872329712, - "learning_rate": 5.558793969849246e-05, - "loss": 4.9834, - "step": 44706 - }, - { - "epoch": 23.315254237288137, - "grad_norm": 1.5147922039031982, - "learning_rate": 5.558693467336684e-05, - "loss": 5.2403, - "step": 44707 - }, - { - "epoch": 23.315775749674053, - "grad_norm": 1.5578421354293823, - "learning_rate": 5.558592964824121e-05, - "loss": 5.0552, - "step": 44708 - }, - { - "epoch": 23.316297262059972, - "grad_norm": 1.614723563194275, - "learning_rate": 5.5584924623115576e-05, - "loss": 5.0896, - "step": 44709 - }, - { - "epoch": 23.316818774445892, - "grad_norm": 1.5920286178588867, - "learning_rate": 5.558391959798995e-05, - "loss": 5.5287, - "step": 44710 - }, - { - "epoch": 23.31734028683181, - "grad_norm": 1.552654504776001, - "learning_rate": 5.558291457286432e-05, - "loss": 4.8938, - "step": 44711 - }, - { - "epoch": 23.31786179921773, - "grad_norm": 1.6547778844833374, - "learning_rate": 5.5581909547738695e-05, - "loss": 5.0849, - "step": 44712 - }, - { - "epoch": 23.31838331160365, - "grad_norm": 1.6074649095535278, - "learning_rate": 5.5580904522613067e-05, - "loss": 4.9143, - "step": 44713 - }, - { - "epoch": 23.31890482398957, - "grad_norm": 1.5663902759552002, - "learning_rate": 5.5579899497487444e-05, - "loss": 5.0095, - "step": 44714 - }, - { - "epoch": 23.31942633637549, - "grad_norm": 1.5822302103042603, - "learning_rate": 5.557889447236181e-05, - "loss": 4.7694, - "step": 44715 - }, - { - "epoch": 23.31994784876141, - "grad_norm": 1.638737678527832, - "learning_rate": 5.5577889447236186e-05, - "loss": 4.7813, - "step": 44716 - }, - { - "epoch": 23.320469361147328, - "grad_norm": 1.5123647451400757, - "learning_rate": 5.557688442211055e-05, - "loss": 5.1303, - "step": 44717 - }, - { - "epoch": 23.320990873533248, - "grad_norm": 1.5520777702331543, - "learning_rate": 5.557587939698493e-05, - "loss": 5.4999, - "step": 44718 - }, - { - "epoch": 23.321512385919167, - "grad_norm": 1.6192748546600342, - "learning_rate": 5.55748743718593e-05, - "loss": 4.9425, - "step": 44719 - }, - { - "epoch": 23.322033898305083, - "grad_norm": 1.6600195169448853, - "learning_rate": 5.557386934673368e-05, - "loss": 4.7403, - "step": 44720 - }, - { - "epoch": 23.322555410691002, - "grad_norm": 1.6267646551132202, - "learning_rate": 5.557286432160804e-05, - "loss": 5.0642, - "step": 44721 - }, - { - "epoch": 23.323076923076922, - "grad_norm": 1.507372498512268, - "learning_rate": 5.5571859296482406e-05, - "loss": 5.4397, - "step": 44722 - }, - { - "epoch": 23.32359843546284, - "grad_norm": 1.5454317331314087, - "learning_rate": 5.5570854271356784e-05, - "loss": 5.1344, - "step": 44723 - }, - { - "epoch": 23.32411994784876, - "grad_norm": 1.5941543579101562, - "learning_rate": 5.5569849246231155e-05, - "loss": 5.2755, - "step": 44724 - }, - { - "epoch": 23.32464146023468, - "grad_norm": 1.4954109191894531, - "learning_rate": 5.556884422110553e-05, - "loss": 5.3311, - "step": 44725 - }, - { - "epoch": 23.3251629726206, - "grad_norm": 1.505150556564331, - "learning_rate": 5.55678391959799e-05, - "loss": 5.3077, - "step": 44726 - }, - { - "epoch": 23.32568448500652, - "grad_norm": 1.5327328443527222, - "learning_rate": 5.5566834170854275e-05, - "loss": 5.3264, - "step": 44727 - }, - { - "epoch": 23.32620599739244, - "grad_norm": 1.5768507719039917, - "learning_rate": 5.5565829145728646e-05, - "loss": 5.5773, - "step": 44728 - }, - { - "epoch": 23.326727509778358, - "grad_norm": 1.6567356586456299, - "learning_rate": 5.5564824120603024e-05, - "loss": 5.0229, - "step": 44729 - }, - { - "epoch": 23.327249022164278, - "grad_norm": 1.5159434080123901, - "learning_rate": 5.556381909547739e-05, - "loss": 5.1724, - "step": 44730 - }, - { - "epoch": 23.327770534550197, - "grad_norm": 1.5891367197036743, - "learning_rate": 5.5562814070351766e-05, - "loss": 5.1333, - "step": 44731 - }, - { - "epoch": 23.328292046936113, - "grad_norm": 1.633366584777832, - "learning_rate": 5.556180904522613e-05, - "loss": 5.0104, - "step": 44732 - }, - { - "epoch": 23.328813559322032, - "grad_norm": 1.709047794342041, - "learning_rate": 5.556080402010051e-05, - "loss": 5.0638, - "step": 44733 - }, - { - "epoch": 23.329335071707952, - "grad_norm": 1.5495436191558838, - "learning_rate": 5.555979899497488e-05, - "loss": 5.3463, - "step": 44734 - }, - { - "epoch": 23.32985658409387, - "grad_norm": 1.558257818222046, - "learning_rate": 5.555879396984926e-05, - "loss": 5.55, - "step": 44735 - }, - { - "epoch": 23.33037809647979, - "grad_norm": 1.5250074863433838, - "learning_rate": 5.555778894472362e-05, - "loss": 5.6959, - "step": 44736 - }, - { - "epoch": 23.33089960886571, - "grad_norm": 1.485108733177185, - "learning_rate": 5.5556783919597985e-05, - "loss": 5.5015, - "step": 44737 - }, - { - "epoch": 23.33142112125163, - "grad_norm": 1.6164953708648682, - "learning_rate": 5.555577889447236e-05, - "loss": 4.6788, - "step": 44738 - }, - { - "epoch": 23.33194263363755, - "grad_norm": 1.5041263103485107, - "learning_rate": 5.5554773869346734e-05, - "loss": 5.1798, - "step": 44739 - }, - { - "epoch": 23.33246414602347, - "grad_norm": 1.634890079498291, - "learning_rate": 5.555376884422111e-05, - "loss": 4.4108, - "step": 44740 - }, - { - "epoch": 23.332985658409388, - "grad_norm": 1.5192242860794067, - "learning_rate": 5.5552763819095476e-05, - "loss": 4.9664, - "step": 44741 - }, - { - "epoch": 23.333507170795308, - "grad_norm": 1.644005298614502, - "learning_rate": 5.5551758793969854e-05, - "loss": 4.7573, - "step": 44742 - }, - { - "epoch": 23.334028683181227, - "grad_norm": 1.6740736961364746, - "learning_rate": 5.555075376884422e-05, - "loss": 4.898, - "step": 44743 - }, - { - "epoch": 23.334550195567143, - "grad_norm": 1.5707627534866333, - "learning_rate": 5.5549748743718596e-05, - "loss": 5.1425, - "step": 44744 - }, - { - "epoch": 23.335071707953063, - "grad_norm": 1.692326545715332, - "learning_rate": 5.554874371859297e-05, - "loss": 5.1639, - "step": 44745 - }, - { - "epoch": 23.335593220338982, - "grad_norm": 1.54416024684906, - "learning_rate": 5.5547738693467345e-05, - "loss": 5.3127, - "step": 44746 - }, - { - "epoch": 23.3361147327249, - "grad_norm": 1.5275280475616455, - "learning_rate": 5.554673366834171e-05, - "loss": 5.4431, - "step": 44747 - }, - { - "epoch": 23.33663624511082, - "grad_norm": 1.5547983646392822, - "learning_rate": 5.554572864321609e-05, - "loss": 5.414, - "step": 44748 - }, - { - "epoch": 23.33715775749674, - "grad_norm": 1.6379680633544922, - "learning_rate": 5.554472361809046e-05, - "loss": 5.1368, - "step": 44749 - }, - { - "epoch": 23.33767926988266, - "grad_norm": 1.561330795288086, - "learning_rate": 5.554371859296482e-05, - "loss": 5.2506, - "step": 44750 - }, - { - "epoch": 23.33820078226858, - "grad_norm": 1.6030808687210083, - "learning_rate": 5.55427135678392e-05, - "loss": 5.1105, - "step": 44751 - }, - { - "epoch": 23.3387222946545, - "grad_norm": 1.5966078042984009, - "learning_rate": 5.5541708542713565e-05, - "loss": 5.507, - "step": 44752 - }, - { - "epoch": 23.33924380704042, - "grad_norm": 1.5918004512786865, - "learning_rate": 5.554070351758794e-05, - "loss": 4.7794, - "step": 44753 - }, - { - "epoch": 23.339765319426338, - "grad_norm": 1.7015801668167114, - "learning_rate": 5.5539698492462314e-05, - "loss": 4.5018, - "step": 44754 - }, - { - "epoch": 23.340286831812257, - "grad_norm": 1.5586298704147339, - "learning_rate": 5.553869346733669e-05, - "loss": 5.1593, - "step": 44755 - }, - { - "epoch": 23.340808344198173, - "grad_norm": 1.5368242263793945, - "learning_rate": 5.5537688442211056e-05, - "loss": 5.4901, - "step": 44756 - }, - { - "epoch": 23.341329856584093, - "grad_norm": 1.598795771598816, - "learning_rate": 5.5536683417085433e-05, - "loss": 5.4654, - "step": 44757 - }, - { - "epoch": 23.341851368970012, - "grad_norm": 1.5153985023498535, - "learning_rate": 5.55356783919598e-05, - "loss": 5.317, - "step": 44758 - }, - { - "epoch": 23.34237288135593, - "grad_norm": 1.5620619058609009, - "learning_rate": 5.5534673366834176e-05, - "loss": 5.0553, - "step": 44759 - }, - { - "epoch": 23.34289439374185, - "grad_norm": 1.44981849193573, - "learning_rate": 5.5533668341708547e-05, - "loss": 5.4125, - "step": 44760 - }, - { - "epoch": 23.34341590612777, - "grad_norm": 1.5409274101257324, - "learning_rate": 5.5532663316582924e-05, - "loss": 5.447, - "step": 44761 - }, - { - "epoch": 23.34393741851369, - "grad_norm": 1.5615460872650146, - "learning_rate": 5.553165829145729e-05, - "loss": 4.8259, - "step": 44762 - }, - { - "epoch": 23.34445893089961, - "grad_norm": 1.555101990699768, - "learning_rate": 5.553065326633165e-05, - "loss": 5.1489, - "step": 44763 - }, - { - "epoch": 23.34498044328553, - "grad_norm": 1.532172679901123, - "learning_rate": 5.552964824120603e-05, - "loss": 5.1653, - "step": 44764 - }, - { - "epoch": 23.34550195567145, - "grad_norm": 1.624721884727478, - "learning_rate": 5.55286432160804e-05, - "loss": 5.1131, - "step": 44765 - }, - { - "epoch": 23.346023468057368, - "grad_norm": 1.4859825372695923, - "learning_rate": 5.552763819095478e-05, - "loss": 5.6562, - "step": 44766 - }, - { - "epoch": 23.346544980443287, - "grad_norm": 1.4772056341171265, - "learning_rate": 5.5526633165829144e-05, - "loss": 5.4459, - "step": 44767 - }, - { - "epoch": 23.347066492829203, - "grad_norm": 1.6053836345672607, - "learning_rate": 5.552562814070352e-05, - "loss": 4.8979, - "step": 44768 - }, - { - "epoch": 23.347588005215123, - "grad_norm": 1.481070637702942, - "learning_rate": 5.552462311557789e-05, - "loss": 5.5769, - "step": 44769 - }, - { - "epoch": 23.348109517601042, - "grad_norm": 1.5127042531967163, - "learning_rate": 5.552361809045227e-05, - "loss": 5.5403, - "step": 44770 - }, - { - "epoch": 23.34863102998696, - "grad_norm": 1.5336209535598755, - "learning_rate": 5.5522613065326635e-05, - "loss": 5.3403, - "step": 44771 - }, - { - "epoch": 23.34915254237288, - "grad_norm": 1.5843722820281982, - "learning_rate": 5.552160804020101e-05, - "loss": 5.3198, - "step": 44772 - }, - { - "epoch": 23.3496740547588, - "grad_norm": 1.5614718198776245, - "learning_rate": 5.552060301507538e-05, - "loss": 5.2318, - "step": 44773 - }, - { - "epoch": 23.35019556714472, - "grad_norm": 1.413976788520813, - "learning_rate": 5.5519597989949755e-05, - "loss": 4.9059, - "step": 44774 - }, - { - "epoch": 23.35071707953064, - "grad_norm": 1.6387206315994263, - "learning_rate": 5.5518592964824126e-05, - "loss": 5.1424, - "step": 44775 - }, - { - "epoch": 23.35123859191656, - "grad_norm": 1.5690563917160034, - "learning_rate": 5.551758793969849e-05, - "loss": 5.3351, - "step": 44776 - }, - { - "epoch": 23.35176010430248, - "grad_norm": 1.5309547185897827, - "learning_rate": 5.551658291457287e-05, - "loss": 5.595, - "step": 44777 - }, - { - "epoch": 23.352281616688398, - "grad_norm": 1.5046021938323975, - "learning_rate": 5.551557788944723e-05, - "loss": 5.2392, - "step": 44778 - }, - { - "epoch": 23.352803129074317, - "grad_norm": 1.491847276687622, - "learning_rate": 5.551457286432161e-05, - "loss": 5.5747, - "step": 44779 - }, - { - "epoch": 23.353324641460233, - "grad_norm": 1.5293476581573486, - "learning_rate": 5.551356783919598e-05, - "loss": 5.036, - "step": 44780 - }, - { - "epoch": 23.353846153846153, - "grad_norm": 1.5138404369354248, - "learning_rate": 5.551256281407036e-05, - "loss": 4.9775, - "step": 44781 - }, - { - "epoch": 23.354367666232072, - "grad_norm": 1.4660429954528809, - "learning_rate": 5.551155778894472e-05, - "loss": 5.4673, - "step": 44782 - }, - { - "epoch": 23.35488917861799, - "grad_norm": 1.5658761262893677, - "learning_rate": 5.55105527638191e-05, - "loss": 5.4376, - "step": 44783 - }, - { - "epoch": 23.35541069100391, - "grad_norm": 1.5566550493240356, - "learning_rate": 5.5509547738693465e-05, - "loss": 4.3535, - "step": 44784 - }, - { - "epoch": 23.35593220338983, - "grad_norm": 1.5932620763778687, - "learning_rate": 5.550854271356784e-05, - "loss": 5.4128, - "step": 44785 - }, - { - "epoch": 23.35645371577575, - "grad_norm": 1.4946129322052002, - "learning_rate": 5.5507537688442214e-05, - "loss": 5.4542, - "step": 44786 - }, - { - "epoch": 23.35697522816167, - "grad_norm": 1.4808534383773804, - "learning_rate": 5.550653266331659e-05, - "loss": 5.4266, - "step": 44787 - }, - { - "epoch": 23.35749674054759, - "grad_norm": 1.5076165199279785, - "learning_rate": 5.5505527638190956e-05, - "loss": 5.3677, - "step": 44788 - }, - { - "epoch": 23.35801825293351, - "grad_norm": 1.6001607179641724, - "learning_rate": 5.550452261306532e-05, - "loss": 5.1336, - "step": 44789 - }, - { - "epoch": 23.358539765319428, - "grad_norm": 1.5280483961105347, - "learning_rate": 5.55035175879397e-05, - "loss": 5.3155, - "step": 44790 - }, - { - "epoch": 23.359061277705347, - "grad_norm": 1.580587387084961, - "learning_rate": 5.550251256281407e-05, - "loss": 5.2879, - "step": 44791 - }, - { - "epoch": 23.359582790091263, - "grad_norm": 1.5784662961959839, - "learning_rate": 5.550150753768845e-05, - "loss": 5.6049, - "step": 44792 - }, - { - "epoch": 23.360104302477183, - "grad_norm": 1.668789029121399, - "learning_rate": 5.550050251256281e-05, - "loss": 4.3553, - "step": 44793 - }, - { - "epoch": 23.360625814863102, - "grad_norm": 1.6764118671417236, - "learning_rate": 5.549949748743719e-05, - "loss": 4.9711, - "step": 44794 - }, - { - "epoch": 23.36114732724902, - "grad_norm": 1.6770001649856567, - "learning_rate": 5.549849246231156e-05, - "loss": 5.4995, - "step": 44795 - }, - { - "epoch": 23.36166883963494, - "grad_norm": 1.498279333114624, - "learning_rate": 5.549748743718594e-05, - "loss": 5.1829, - "step": 44796 - }, - { - "epoch": 23.36219035202086, - "grad_norm": 1.5488457679748535, - "learning_rate": 5.54964824120603e-05, - "loss": 4.6627, - "step": 44797 - }, - { - "epoch": 23.36271186440678, - "grad_norm": 1.5784153938293457, - "learning_rate": 5.549547738693468e-05, - "loss": 5.2863, - "step": 44798 - }, - { - "epoch": 23.3632333767927, - "grad_norm": 1.44713294506073, - "learning_rate": 5.5494472361809045e-05, - "loss": 5.6199, - "step": 44799 - }, - { - "epoch": 23.36375488917862, - "grad_norm": 1.566400170326233, - "learning_rate": 5.549346733668342e-05, - "loss": 4.9808, - "step": 44800 - }, - { - "epoch": 23.36427640156454, - "grad_norm": 1.4777164459228516, - "learning_rate": 5.5492462311557794e-05, - "loss": 5.6646, - "step": 44801 - }, - { - "epoch": 23.364797913950458, - "grad_norm": 1.4942063093185425, - "learning_rate": 5.549145728643216e-05, - "loss": 5.1504, - "step": 44802 - }, - { - "epoch": 23.365319426336377, - "grad_norm": 1.614269495010376, - "learning_rate": 5.5490452261306536e-05, - "loss": 5.4345, - "step": 44803 - }, - { - "epoch": 23.365840938722293, - "grad_norm": 1.5554661750793457, - "learning_rate": 5.54894472361809e-05, - "loss": 4.8948, - "step": 44804 - }, - { - "epoch": 23.366362451108213, - "grad_norm": 1.6437816619873047, - "learning_rate": 5.548844221105528e-05, - "loss": 5.1617, - "step": 44805 - }, - { - "epoch": 23.366883963494132, - "grad_norm": 1.6021653413772583, - "learning_rate": 5.548743718592965e-05, - "loss": 4.676, - "step": 44806 - }, - { - "epoch": 23.36740547588005, - "grad_norm": 1.5827009677886963, - "learning_rate": 5.548643216080403e-05, - "loss": 4.9689, - "step": 44807 - }, - { - "epoch": 23.36792698826597, - "grad_norm": 1.528337001800537, - "learning_rate": 5.548542713567839e-05, - "loss": 5.04, - "step": 44808 - }, - { - "epoch": 23.36844850065189, - "grad_norm": 1.5188783407211304, - "learning_rate": 5.548442211055277e-05, - "loss": 5.1398, - "step": 44809 - }, - { - "epoch": 23.36897001303781, - "grad_norm": 1.4692344665527344, - "learning_rate": 5.548341708542713e-05, - "loss": 5.4519, - "step": 44810 - }, - { - "epoch": 23.36949152542373, - "grad_norm": 1.5242841243743896, - "learning_rate": 5.548241206030151e-05, - "loss": 5.2651, - "step": 44811 - }, - { - "epoch": 23.37001303780965, - "grad_norm": 1.4630486965179443, - "learning_rate": 5.548140703517588e-05, - "loss": 4.091, - "step": 44812 - }, - { - "epoch": 23.37053455019557, - "grad_norm": 1.5034470558166504, - "learning_rate": 5.548040201005026e-05, - "loss": 4.1589, - "step": 44813 - }, - { - "epoch": 23.371056062581488, - "grad_norm": 1.5538959503173828, - "learning_rate": 5.5479396984924624e-05, - "loss": 5.5367, - "step": 44814 - }, - { - "epoch": 23.371577574967404, - "grad_norm": 1.5861825942993164, - "learning_rate": 5.5478391959799e-05, - "loss": 5.4655, - "step": 44815 - }, - { - "epoch": 23.372099087353323, - "grad_norm": 1.6194576025009155, - "learning_rate": 5.547738693467337e-05, - "loss": 5.1032, - "step": 44816 - }, - { - "epoch": 23.372620599739243, - "grad_norm": 1.559667944908142, - "learning_rate": 5.547638190954774e-05, - "loss": 5.166, - "step": 44817 - }, - { - "epoch": 23.373142112125162, - "grad_norm": 1.4487583637237549, - "learning_rate": 5.5475376884422115e-05, - "loss": 5.6305, - "step": 44818 - }, - { - "epoch": 23.37366362451108, - "grad_norm": 1.5856473445892334, - "learning_rate": 5.547437185929648e-05, - "loss": 5.189, - "step": 44819 - }, - { - "epoch": 23.374185136897, - "grad_norm": 1.4817148447036743, - "learning_rate": 5.547336683417086e-05, - "loss": 5.6641, - "step": 44820 - }, - { - "epoch": 23.37470664928292, - "grad_norm": 1.5517505407333374, - "learning_rate": 5.547236180904523e-05, - "loss": 5.3899, - "step": 44821 - }, - { - "epoch": 23.37522816166884, - "grad_norm": 1.56109619140625, - "learning_rate": 5.5471356783919606e-05, - "loss": 5.2616, - "step": 44822 - }, - { - "epoch": 23.37574967405476, - "grad_norm": 1.5524286031723022, - "learning_rate": 5.547035175879397e-05, - "loss": 5.4948, - "step": 44823 - }, - { - "epoch": 23.37627118644068, - "grad_norm": 1.6954971551895142, - "learning_rate": 5.546934673366835e-05, - "loss": 5.1275, - "step": 44824 - }, - { - "epoch": 23.3767926988266, - "grad_norm": 1.6407297849655151, - "learning_rate": 5.546834170854271e-05, - "loss": 4.9815, - "step": 44825 - }, - { - "epoch": 23.377314211212518, - "grad_norm": 1.6697989702224731, - "learning_rate": 5.546733668341709e-05, - "loss": 5.6146, - "step": 44826 - }, - { - "epoch": 23.377835723598434, - "grad_norm": 1.619157075881958, - "learning_rate": 5.546633165829146e-05, - "loss": 4.8785, - "step": 44827 - }, - { - "epoch": 23.378357235984353, - "grad_norm": 1.7644245624542236, - "learning_rate": 5.546532663316584e-05, - "loss": 4.9313, - "step": 44828 - }, - { - "epoch": 23.378878748370273, - "grad_norm": 1.6359869241714478, - "learning_rate": 5.54643216080402e-05, - "loss": 5.0221, - "step": 44829 - }, - { - "epoch": 23.379400260756192, - "grad_norm": 1.5231208801269531, - "learning_rate": 5.546331658291457e-05, - "loss": 5.6047, - "step": 44830 - }, - { - "epoch": 23.37992177314211, - "grad_norm": 1.4292277097702026, - "learning_rate": 5.5462311557788945e-05, - "loss": 5.4968, - "step": 44831 - }, - { - "epoch": 23.38044328552803, - "grad_norm": 1.549662470817566, - "learning_rate": 5.5461306532663317e-05, - "loss": 5.24, - "step": 44832 - }, - { - "epoch": 23.38096479791395, - "grad_norm": 1.6458088159561157, - "learning_rate": 5.5460301507537694e-05, - "loss": 5.3427, - "step": 44833 - }, - { - "epoch": 23.38148631029987, - "grad_norm": 1.5357705354690552, - "learning_rate": 5.545929648241206e-05, - "loss": 5.49, - "step": 44834 - }, - { - "epoch": 23.38200782268579, - "grad_norm": 1.533947229385376, - "learning_rate": 5.5458291457286436e-05, - "loss": 5.5063, - "step": 44835 - }, - { - "epoch": 23.38252933507171, - "grad_norm": 1.6017277240753174, - "learning_rate": 5.545728643216081e-05, - "loss": 5.2308, - "step": 44836 - }, - { - "epoch": 23.38305084745763, - "grad_norm": 1.5580233335494995, - "learning_rate": 5.5456281407035185e-05, - "loss": 5.2376, - "step": 44837 - }, - { - "epoch": 23.383572359843548, - "grad_norm": 1.5221823453903198, - "learning_rate": 5.545527638190955e-05, - "loss": 4.8828, - "step": 44838 - }, - { - "epoch": 23.384093872229464, - "grad_norm": 1.576883316040039, - "learning_rate": 5.545427135678393e-05, - "loss": 4.9445, - "step": 44839 - }, - { - "epoch": 23.384615384615383, - "grad_norm": 1.6334574222564697, - "learning_rate": 5.545326633165829e-05, - "loss": 5.1537, - "step": 44840 - }, - { - "epoch": 23.385136897001303, - "grad_norm": 1.5723347663879395, - "learning_rate": 5.545226130653267e-05, - "loss": 5.4723, - "step": 44841 - }, - { - "epoch": 23.385658409387222, - "grad_norm": 1.570426106452942, - "learning_rate": 5.545125628140704e-05, - "loss": 5.3406, - "step": 44842 - }, - { - "epoch": 23.38617992177314, - "grad_norm": 1.5493618249893188, - "learning_rate": 5.5450251256281405e-05, - "loss": 5.657, - "step": 44843 - }, - { - "epoch": 23.38670143415906, - "grad_norm": 1.5081101655960083, - "learning_rate": 5.544924623115578e-05, - "loss": 5.3488, - "step": 44844 - }, - { - "epoch": 23.38722294654498, - "grad_norm": 1.4700024127960205, - "learning_rate": 5.544824120603015e-05, - "loss": 5.6245, - "step": 44845 - }, - { - "epoch": 23.3877444589309, - "grad_norm": 1.5843256711959839, - "learning_rate": 5.5447236180904525e-05, - "loss": 5.1503, - "step": 44846 - }, - { - "epoch": 23.38826597131682, - "grad_norm": 1.538255214691162, - "learning_rate": 5.5446231155778896e-05, - "loss": 5.1317, - "step": 44847 - }, - { - "epoch": 23.38878748370274, - "grad_norm": 1.5703628063201904, - "learning_rate": 5.5445226130653274e-05, - "loss": 5.4458, - "step": 44848 - }, - { - "epoch": 23.38930899608866, - "grad_norm": 1.4976208209991455, - "learning_rate": 5.544422110552764e-05, - "loss": 5.5471, - "step": 44849 - }, - { - "epoch": 23.389830508474578, - "grad_norm": 1.5688999891281128, - "learning_rate": 5.5443216080402016e-05, - "loss": 5.437, - "step": 44850 - }, - { - "epoch": 23.390352020860494, - "grad_norm": 1.506462812423706, - "learning_rate": 5.544221105527638e-05, - "loss": 4.8639, - "step": 44851 - }, - { - "epoch": 23.390873533246413, - "grad_norm": 1.6201246976852417, - "learning_rate": 5.544120603015076e-05, - "loss": 4.8506, - "step": 44852 - }, - { - "epoch": 23.391395045632333, - "grad_norm": 1.6703269481658936, - "learning_rate": 5.544020100502513e-05, - "loss": 5.0466, - "step": 44853 - }, - { - "epoch": 23.391916558018252, - "grad_norm": 1.523182988166809, - "learning_rate": 5.543919597989951e-05, - "loss": 5.3471, - "step": 44854 - }, - { - "epoch": 23.39243807040417, - "grad_norm": 1.5678662061691284, - "learning_rate": 5.543819095477387e-05, - "loss": 5.1039, - "step": 44855 - }, - { - "epoch": 23.39295958279009, - "grad_norm": 1.5313336849212646, - "learning_rate": 5.543718592964824e-05, - "loss": 5.2902, - "step": 44856 - }, - { - "epoch": 23.39348109517601, - "grad_norm": 1.5380008220672607, - "learning_rate": 5.543618090452262e-05, - "loss": 5.0938, - "step": 44857 - }, - { - "epoch": 23.39400260756193, - "grad_norm": 1.4714242219924927, - "learning_rate": 5.5435175879396984e-05, - "loss": 4.9877, - "step": 44858 - }, - { - "epoch": 23.39452411994785, - "grad_norm": 1.5945611000061035, - "learning_rate": 5.543417085427136e-05, - "loss": 5.496, - "step": 44859 - }, - { - "epoch": 23.39504563233377, - "grad_norm": 1.4515812397003174, - "learning_rate": 5.5433165829145726e-05, - "loss": 5.3086, - "step": 44860 - }, - { - "epoch": 23.39556714471969, - "grad_norm": 1.5526423454284668, - "learning_rate": 5.5432160804020104e-05, - "loss": 5.4301, - "step": 44861 - }, - { - "epoch": 23.396088657105608, - "grad_norm": 1.5716462135314941, - "learning_rate": 5.5431155778894475e-05, - "loss": 4.1672, - "step": 44862 - }, - { - "epoch": 23.396610169491524, - "grad_norm": 1.457054853439331, - "learning_rate": 5.543015075376885e-05, - "loss": 5.6789, - "step": 44863 - }, - { - "epoch": 23.397131681877443, - "grad_norm": 1.7080209255218506, - "learning_rate": 5.542914572864322e-05, - "loss": 4.794, - "step": 44864 - }, - { - "epoch": 23.397653194263363, - "grad_norm": 1.5621246099472046, - "learning_rate": 5.5428140703517595e-05, - "loss": 5.1312, - "step": 44865 - }, - { - "epoch": 23.398174706649282, - "grad_norm": 1.541440725326538, - "learning_rate": 5.542713567839196e-05, - "loss": 5.048, - "step": 44866 - }, - { - "epoch": 23.3986962190352, - "grad_norm": 1.558325171470642, - "learning_rate": 5.542613065326634e-05, - "loss": 5.2787, - "step": 44867 - }, - { - "epoch": 23.39921773142112, - "grad_norm": 1.4823945760726929, - "learning_rate": 5.542512562814071e-05, - "loss": 4.3879, - "step": 44868 - }, - { - "epoch": 23.39973924380704, - "grad_norm": 1.4838858842849731, - "learning_rate": 5.542412060301507e-05, - "loss": 5.6677, - "step": 44869 - }, - { - "epoch": 23.40026075619296, - "grad_norm": 1.4814224243164062, - "learning_rate": 5.542311557788945e-05, - "loss": 5.4327, - "step": 44870 - }, - { - "epoch": 23.40078226857888, - "grad_norm": 1.5469328165054321, - "learning_rate": 5.5422110552763815e-05, - "loss": 5.2989, - "step": 44871 - }, - { - "epoch": 23.4013037809648, - "grad_norm": 1.5777119398117065, - "learning_rate": 5.542110552763819e-05, - "loss": 5.4819, - "step": 44872 - }, - { - "epoch": 23.40182529335072, - "grad_norm": 1.6021257638931274, - "learning_rate": 5.5420100502512564e-05, - "loss": 5.397, - "step": 44873 - }, - { - "epoch": 23.402346805736638, - "grad_norm": 1.5289041996002197, - "learning_rate": 5.541909547738694e-05, - "loss": 5.1859, - "step": 44874 - }, - { - "epoch": 23.402868318122554, - "grad_norm": 1.5088396072387695, - "learning_rate": 5.5418090452261306e-05, - "loss": 5.2348, - "step": 44875 - }, - { - "epoch": 23.403389830508473, - "grad_norm": 1.5135935544967651, - "learning_rate": 5.5417085427135683e-05, - "loss": 5.3577, - "step": 44876 - }, - { - "epoch": 23.403911342894393, - "grad_norm": 1.5188326835632324, - "learning_rate": 5.541608040201005e-05, - "loss": 5.3038, - "step": 44877 - }, - { - "epoch": 23.404432855280312, - "grad_norm": 1.6530377864837646, - "learning_rate": 5.5415075376884426e-05, - "loss": 4.2765, - "step": 44878 - }, - { - "epoch": 23.40495436766623, - "grad_norm": 1.6188114881515503, - "learning_rate": 5.54140703517588e-05, - "loss": 5.3767, - "step": 44879 - }, - { - "epoch": 23.40547588005215, - "grad_norm": 1.609874963760376, - "learning_rate": 5.5413065326633174e-05, - "loss": 5.063, - "step": 44880 - }, - { - "epoch": 23.40599739243807, - "grad_norm": 1.7933354377746582, - "learning_rate": 5.541206030150754e-05, - "loss": 4.5534, - "step": 44881 - }, - { - "epoch": 23.40651890482399, - "grad_norm": 1.5888760089874268, - "learning_rate": 5.541105527638191e-05, - "loss": 5.1161, - "step": 44882 - }, - { - "epoch": 23.40704041720991, - "grad_norm": 1.5926038026809692, - "learning_rate": 5.541005025125629e-05, - "loss": 5.0075, - "step": 44883 - }, - { - "epoch": 23.40756192959583, - "grad_norm": 1.3859573602676392, - "learning_rate": 5.540904522613065e-05, - "loss": 5.6684, - "step": 44884 - }, - { - "epoch": 23.40808344198175, - "grad_norm": 1.602033257484436, - "learning_rate": 5.540804020100503e-05, - "loss": 5.3442, - "step": 44885 - }, - { - "epoch": 23.408604954367668, - "grad_norm": 1.546629548072815, - "learning_rate": 5.5407035175879394e-05, - "loss": 5.4811, - "step": 44886 - }, - { - "epoch": 23.409126466753584, - "grad_norm": 1.4871773719787598, - "learning_rate": 5.540603015075377e-05, - "loss": 5.7262, - "step": 44887 - }, - { - "epoch": 23.409647979139503, - "grad_norm": 1.4014136791229248, - "learning_rate": 5.540502512562814e-05, - "loss": 5.4074, - "step": 44888 - }, - { - "epoch": 23.410169491525423, - "grad_norm": 1.6141948699951172, - "learning_rate": 5.540402010050252e-05, - "loss": 5.3655, - "step": 44889 - }, - { - "epoch": 23.410691003911342, - "grad_norm": 1.480941653251648, - "learning_rate": 5.5403015075376885e-05, - "loss": 5.3766, - "step": 44890 - }, - { - "epoch": 23.41121251629726, - "grad_norm": 1.6376103162765503, - "learning_rate": 5.540201005025126e-05, - "loss": 5.0988, - "step": 44891 - }, - { - "epoch": 23.41173402868318, - "grad_norm": 1.6026805639266968, - "learning_rate": 5.540100502512563e-05, - "loss": 5.0585, - "step": 44892 - }, - { - "epoch": 23.4122555410691, - "grad_norm": 1.6583120822906494, - "learning_rate": 5.5400000000000005e-05, - "loss": 5.2839, - "step": 44893 - }, - { - "epoch": 23.41277705345502, - "grad_norm": 1.5726120471954346, - "learning_rate": 5.5398994974874376e-05, - "loss": 4.922, - "step": 44894 - }, - { - "epoch": 23.41329856584094, - "grad_norm": 1.5792877674102783, - "learning_rate": 5.539798994974874e-05, - "loss": 4.9949, - "step": 44895 - }, - { - "epoch": 23.41382007822686, - "grad_norm": 1.4660223722457886, - "learning_rate": 5.539698492462312e-05, - "loss": 5.633, - "step": 44896 - }, - { - "epoch": 23.41434159061278, - "grad_norm": 1.4152697324752808, - "learning_rate": 5.539597989949748e-05, - "loss": 5.31, - "step": 44897 - }, - { - "epoch": 23.414863102998694, - "grad_norm": 1.4570657014846802, - "learning_rate": 5.539497487437186e-05, - "loss": 5.4195, - "step": 44898 - }, - { - "epoch": 23.415384615384614, - "grad_norm": 1.5786123275756836, - "learning_rate": 5.539396984924623e-05, - "loss": 5.2348, - "step": 44899 - }, - { - "epoch": 23.415906127770533, - "grad_norm": 1.586459994316101, - "learning_rate": 5.539296482412061e-05, - "loss": 5.3063, - "step": 44900 - }, - { - "epoch": 23.416427640156453, - "grad_norm": 1.5395164489746094, - "learning_rate": 5.539195979899497e-05, - "loss": 5.0393, - "step": 44901 - }, - { - "epoch": 23.416949152542372, - "grad_norm": 1.4962353706359863, - "learning_rate": 5.539095477386935e-05, - "loss": 5.3445, - "step": 44902 - }, - { - "epoch": 23.41747066492829, - "grad_norm": 1.5453811883926392, - "learning_rate": 5.538994974874372e-05, - "loss": 5.334, - "step": 44903 - }, - { - "epoch": 23.41799217731421, - "grad_norm": 1.6115304231643677, - "learning_rate": 5.53889447236181e-05, - "loss": 5.1814, - "step": 44904 - }, - { - "epoch": 23.41851368970013, - "grad_norm": 1.5551193952560425, - "learning_rate": 5.5387939698492464e-05, - "loss": 5.1218, - "step": 44905 - }, - { - "epoch": 23.41903520208605, - "grad_norm": 1.5897161960601807, - "learning_rate": 5.538693467336684e-05, - "loss": 4.8642, - "step": 44906 - }, - { - "epoch": 23.41955671447197, - "grad_norm": 1.6551594734191895, - "learning_rate": 5.5385929648241206e-05, - "loss": 4.8146, - "step": 44907 - }, - { - "epoch": 23.42007822685789, - "grad_norm": 2.199145793914795, - "learning_rate": 5.5384924623115584e-05, - "loss": 4.8774, - "step": 44908 - }, - { - "epoch": 23.42059973924381, - "grad_norm": 1.5411579608917236, - "learning_rate": 5.5383919597989955e-05, - "loss": 5.3269, - "step": 44909 - }, - { - "epoch": 23.421121251629724, - "grad_norm": 1.4980368614196777, - "learning_rate": 5.538291457286432e-05, - "loss": 5.4747, - "step": 44910 - }, - { - "epoch": 23.421642764015644, - "grad_norm": 1.6050505638122559, - "learning_rate": 5.53819095477387e-05, - "loss": 5.1229, - "step": 44911 - }, - { - "epoch": 23.422164276401563, - "grad_norm": 1.4569580554962158, - "learning_rate": 5.538090452261306e-05, - "loss": 5.8128, - "step": 44912 - }, - { - "epoch": 23.422685788787483, - "grad_norm": 1.4727782011032104, - "learning_rate": 5.537989949748744e-05, - "loss": 5.0906, - "step": 44913 - }, - { - "epoch": 23.423207301173402, - "grad_norm": 1.7206674814224243, - "learning_rate": 5.537889447236181e-05, - "loss": 4.9645, - "step": 44914 - }, - { - "epoch": 23.423728813559322, - "grad_norm": 1.4313329458236694, - "learning_rate": 5.537788944723619e-05, - "loss": 5.3439, - "step": 44915 - }, - { - "epoch": 23.42425032594524, - "grad_norm": 1.613979458808899, - "learning_rate": 5.537688442211055e-05, - "loss": 5.2015, - "step": 44916 - }, - { - "epoch": 23.42477183833116, - "grad_norm": 1.6038111448287964, - "learning_rate": 5.537587939698493e-05, - "loss": 5.3326, - "step": 44917 - }, - { - "epoch": 23.42529335071708, - "grad_norm": 1.5882784128189087, - "learning_rate": 5.5374874371859295e-05, - "loss": 5.2954, - "step": 44918 - }, - { - "epoch": 23.425814863103, - "grad_norm": 1.6646360158920288, - "learning_rate": 5.537386934673367e-05, - "loss": 5.2949, - "step": 44919 - }, - { - "epoch": 23.42633637548892, - "grad_norm": 1.6462442874908447, - "learning_rate": 5.5372864321608044e-05, - "loss": 5.1533, - "step": 44920 - }, - { - "epoch": 23.42685788787484, - "grad_norm": 1.4788990020751953, - "learning_rate": 5.537185929648242e-05, - "loss": 4.9927, - "step": 44921 - }, - { - "epoch": 23.427379400260754, - "grad_norm": 1.533339500427246, - "learning_rate": 5.5370854271356786e-05, - "loss": 5.2571, - "step": 44922 - }, - { - "epoch": 23.427900912646674, - "grad_norm": 1.6141910552978516, - "learning_rate": 5.536984924623116e-05, - "loss": 5.3332, - "step": 44923 - }, - { - "epoch": 23.428422425032593, - "grad_norm": 1.5784156322479248, - "learning_rate": 5.5368844221105535e-05, - "loss": 5.0632, - "step": 44924 - }, - { - "epoch": 23.428943937418513, - "grad_norm": 1.5659290552139282, - "learning_rate": 5.53678391959799e-05, - "loss": 5.513, - "step": 44925 - }, - { - "epoch": 23.429465449804432, - "grad_norm": 1.7315040826797485, - "learning_rate": 5.536683417085428e-05, - "loss": 4.5922, - "step": 44926 - }, - { - "epoch": 23.429986962190352, - "grad_norm": 1.5244708061218262, - "learning_rate": 5.536582914572864e-05, - "loss": 5.3048, - "step": 44927 - }, - { - "epoch": 23.43050847457627, - "grad_norm": 1.5128982067108154, - "learning_rate": 5.536482412060302e-05, - "loss": 5.6634, - "step": 44928 - }, - { - "epoch": 23.43102998696219, - "grad_norm": 1.599971055984497, - "learning_rate": 5.536381909547739e-05, - "loss": 5.2492, - "step": 44929 - }, - { - "epoch": 23.43155149934811, - "grad_norm": 1.4794501066207886, - "learning_rate": 5.536281407035177e-05, - "loss": 5.0591, - "step": 44930 - }, - { - "epoch": 23.43207301173403, - "grad_norm": 1.4518438577651978, - "learning_rate": 5.536180904522613e-05, - "loss": 5.6221, - "step": 44931 - }, - { - "epoch": 23.43259452411995, - "grad_norm": 1.4881998300552368, - "learning_rate": 5.536080402010051e-05, - "loss": 5.3812, - "step": 44932 - }, - { - "epoch": 23.43311603650587, - "grad_norm": 1.5835877656936646, - "learning_rate": 5.5359798994974874e-05, - "loss": 5.0213, - "step": 44933 - }, - { - "epoch": 23.433637548891785, - "grad_norm": 1.540120005607605, - "learning_rate": 5.535879396984925e-05, - "loss": 5.3484, - "step": 44934 - }, - { - "epoch": 23.434159061277704, - "grad_norm": 1.5184276103973389, - "learning_rate": 5.535778894472362e-05, - "loss": 5.7223, - "step": 44935 - }, - { - "epoch": 23.434680573663623, - "grad_norm": 1.4848181009292603, - "learning_rate": 5.535678391959799e-05, - "loss": 5.2341, - "step": 44936 - }, - { - "epoch": 23.435202086049543, - "grad_norm": 1.6670907735824585, - "learning_rate": 5.5355778894472365e-05, - "loss": 4.8004, - "step": 44937 - }, - { - "epoch": 23.435723598435462, - "grad_norm": 1.458491325378418, - "learning_rate": 5.535477386934673e-05, - "loss": 5.253, - "step": 44938 - }, - { - "epoch": 23.436245110821382, - "grad_norm": 1.4633561372756958, - "learning_rate": 5.535376884422111e-05, - "loss": 4.8008, - "step": 44939 - }, - { - "epoch": 23.4367666232073, - "grad_norm": 1.5619767904281616, - "learning_rate": 5.535276381909548e-05, - "loss": 5.4754, - "step": 44940 - }, - { - "epoch": 23.43728813559322, - "grad_norm": 1.618003487586975, - "learning_rate": 5.5351758793969856e-05, - "loss": 5.3342, - "step": 44941 - }, - { - "epoch": 23.43780964797914, - "grad_norm": 1.5485447645187378, - "learning_rate": 5.535075376884422e-05, - "loss": 5.4219, - "step": 44942 - }, - { - "epoch": 23.43833116036506, - "grad_norm": 1.600870132446289, - "learning_rate": 5.53497487437186e-05, - "loss": 5.4811, - "step": 44943 - }, - { - "epoch": 23.43885267275098, - "grad_norm": 1.4615960121154785, - "learning_rate": 5.534874371859297e-05, - "loss": 5.1631, - "step": 44944 - }, - { - "epoch": 23.4393741851369, - "grad_norm": 1.6294506788253784, - "learning_rate": 5.534773869346735e-05, - "loss": 5.3379, - "step": 44945 - }, - { - "epoch": 23.439895697522815, - "grad_norm": 1.5913339853286743, - "learning_rate": 5.534673366834171e-05, - "loss": 5.2392, - "step": 44946 - }, - { - "epoch": 23.440417209908734, - "grad_norm": 1.5544298887252808, - "learning_rate": 5.534572864321609e-05, - "loss": 5.2093, - "step": 44947 - }, - { - "epoch": 23.440938722294653, - "grad_norm": 1.5079365968704224, - "learning_rate": 5.5344723618090453e-05, - "loss": 5.2516, - "step": 44948 - }, - { - "epoch": 23.441460234680573, - "grad_norm": 1.5498720407485962, - "learning_rate": 5.5343718592964824e-05, - "loss": 5.1318, - "step": 44949 - }, - { - "epoch": 23.441981747066492, - "grad_norm": 1.5529274940490723, - "learning_rate": 5.53427135678392e-05, - "loss": 5.0902, - "step": 44950 - }, - { - "epoch": 23.442503259452412, - "grad_norm": 1.494611144065857, - "learning_rate": 5.5341708542713567e-05, - "loss": 5.0036, - "step": 44951 - }, - { - "epoch": 23.44302477183833, - "grad_norm": 1.6769306659698486, - "learning_rate": 5.5340703517587944e-05, - "loss": 4.7166, - "step": 44952 - }, - { - "epoch": 23.44354628422425, - "grad_norm": 1.5726009607315063, - "learning_rate": 5.533969849246231e-05, - "loss": 5.4001, - "step": 44953 - }, - { - "epoch": 23.44406779661017, - "grad_norm": 1.5431960821151733, - "learning_rate": 5.5338693467336686e-05, - "loss": 4.9114, - "step": 44954 - }, - { - "epoch": 23.44458930899609, - "grad_norm": 1.4167211055755615, - "learning_rate": 5.533768844221106e-05, - "loss": 5.3506, - "step": 44955 - }, - { - "epoch": 23.44511082138201, - "grad_norm": 1.4785369634628296, - "learning_rate": 5.5336683417085435e-05, - "loss": 5.5445, - "step": 44956 - }, - { - "epoch": 23.44563233376793, - "grad_norm": 1.662868618965149, - "learning_rate": 5.53356783919598e-05, - "loss": 5.1304, - "step": 44957 - }, - { - "epoch": 23.446153846153845, - "grad_norm": 1.7496687173843384, - "learning_rate": 5.533467336683418e-05, - "loss": 4.8252, - "step": 44958 - }, - { - "epoch": 23.446675358539764, - "grad_norm": 1.65021550655365, - "learning_rate": 5.533366834170854e-05, - "loss": 5.2008, - "step": 44959 - }, - { - "epoch": 23.447196870925683, - "grad_norm": 1.4982399940490723, - "learning_rate": 5.533266331658292e-05, - "loss": 5.496, - "step": 44960 - }, - { - "epoch": 23.447718383311603, - "grad_norm": 1.6222468614578247, - "learning_rate": 5.533165829145729e-05, - "loss": 4.6989, - "step": 44961 - }, - { - "epoch": 23.448239895697522, - "grad_norm": 1.6078394651412964, - "learning_rate": 5.5330653266331655e-05, - "loss": 5.3305, - "step": 44962 - }, - { - "epoch": 23.448761408083442, - "grad_norm": 1.5726734399795532, - "learning_rate": 5.532964824120603e-05, - "loss": 5.0791, - "step": 44963 - }, - { - "epoch": 23.44928292046936, - "grad_norm": 1.512732982635498, - "learning_rate": 5.53286432160804e-05, - "loss": 5.3907, - "step": 44964 - }, - { - "epoch": 23.44980443285528, - "grad_norm": 1.6080949306488037, - "learning_rate": 5.5327638190954775e-05, - "loss": 5.2398, - "step": 44965 - }, - { - "epoch": 23.4503259452412, - "grad_norm": 1.5002926588058472, - "learning_rate": 5.5326633165829146e-05, - "loss": 5.185, - "step": 44966 - }, - { - "epoch": 23.45084745762712, - "grad_norm": 1.5301555395126343, - "learning_rate": 5.5325628140703524e-05, - "loss": 5.0533, - "step": 44967 - }, - { - "epoch": 23.45136897001304, - "grad_norm": 1.6135509014129639, - "learning_rate": 5.532462311557789e-05, - "loss": 5.3513, - "step": 44968 - }, - { - "epoch": 23.45189048239896, - "grad_norm": 1.5998921394348145, - "learning_rate": 5.5323618090452266e-05, - "loss": 5.1526, - "step": 44969 - }, - { - "epoch": 23.452411994784875, - "grad_norm": 1.5436983108520508, - "learning_rate": 5.532261306532664e-05, - "loss": 5.2006, - "step": 44970 - }, - { - "epoch": 23.452933507170794, - "grad_norm": 1.5689977407455444, - "learning_rate": 5.5321608040201015e-05, - "loss": 5.2237, - "step": 44971 - }, - { - "epoch": 23.453455019556714, - "grad_norm": 1.4503297805786133, - "learning_rate": 5.532060301507538e-05, - "loss": 5.4209, - "step": 44972 - }, - { - "epoch": 23.453976531942633, - "grad_norm": 1.5644619464874268, - "learning_rate": 5.531959798994976e-05, - "loss": 5.4503, - "step": 44973 - }, - { - "epoch": 23.454498044328552, - "grad_norm": 1.5319979190826416, - "learning_rate": 5.531859296482412e-05, - "loss": 5.168, - "step": 44974 - }, - { - "epoch": 23.455019556714472, - "grad_norm": 1.4716757535934448, - "learning_rate": 5.531758793969849e-05, - "loss": 5.3626, - "step": 44975 - }, - { - "epoch": 23.45554106910039, - "grad_norm": 1.5932561159133911, - "learning_rate": 5.531658291457287e-05, - "loss": 5.0097, - "step": 44976 - }, - { - "epoch": 23.45606258148631, - "grad_norm": 1.7147753238677979, - "learning_rate": 5.5315577889447234e-05, - "loss": 4.615, - "step": 44977 - }, - { - "epoch": 23.45658409387223, - "grad_norm": 1.7185699939727783, - "learning_rate": 5.531457286432161e-05, - "loss": 5.266, - "step": 44978 - }, - { - "epoch": 23.45710560625815, - "grad_norm": 1.6241304874420166, - "learning_rate": 5.5313567839195976e-05, - "loss": 5.5261, - "step": 44979 - }, - { - "epoch": 23.45762711864407, - "grad_norm": 1.5530451536178589, - "learning_rate": 5.5312562814070354e-05, - "loss": 5.4027, - "step": 44980 - }, - { - "epoch": 23.45814863102999, - "grad_norm": 1.5205875635147095, - "learning_rate": 5.5311557788944725e-05, - "loss": 5.5387, - "step": 44981 - }, - { - "epoch": 23.458670143415905, - "grad_norm": 1.4860084056854248, - "learning_rate": 5.53105527638191e-05, - "loss": 5.5759, - "step": 44982 - }, - { - "epoch": 23.459191655801824, - "grad_norm": 1.6300199031829834, - "learning_rate": 5.530954773869347e-05, - "loss": 5.1469, - "step": 44983 - }, - { - "epoch": 23.459713168187744, - "grad_norm": 1.537933349609375, - "learning_rate": 5.5308542713567845e-05, - "loss": 5.3099, - "step": 44984 - }, - { - "epoch": 23.460234680573663, - "grad_norm": 1.581310749053955, - "learning_rate": 5.530753768844221e-05, - "loss": 5.4324, - "step": 44985 - }, - { - "epoch": 23.460756192959582, - "grad_norm": 1.4879015684127808, - "learning_rate": 5.530653266331659e-05, - "loss": 5.1618, - "step": 44986 - }, - { - "epoch": 23.461277705345502, - "grad_norm": 1.5952553749084473, - "learning_rate": 5.530552763819096e-05, - "loss": 4.6894, - "step": 44987 - }, - { - "epoch": 23.46179921773142, - "grad_norm": 1.5920072793960571, - "learning_rate": 5.530452261306532e-05, - "loss": 5.4025, - "step": 44988 - }, - { - "epoch": 23.46232073011734, - "grad_norm": 1.7405831813812256, - "learning_rate": 5.53035175879397e-05, - "loss": 4.6019, - "step": 44989 - }, - { - "epoch": 23.46284224250326, - "grad_norm": 1.5043755769729614, - "learning_rate": 5.530251256281407e-05, - "loss": 5.6957, - "step": 44990 - }, - { - "epoch": 23.46336375488918, - "grad_norm": 1.6279869079589844, - "learning_rate": 5.530150753768845e-05, - "loss": 4.9453, - "step": 44991 - }, - { - "epoch": 23.4638852672751, - "grad_norm": 1.621447205543518, - "learning_rate": 5.5300502512562814e-05, - "loss": 5.2583, - "step": 44992 - }, - { - "epoch": 23.46440677966102, - "grad_norm": 1.4669862985610962, - "learning_rate": 5.529949748743719e-05, - "loss": 5.5507, - "step": 44993 - }, - { - "epoch": 23.464928292046935, - "grad_norm": 1.5908455848693848, - "learning_rate": 5.5298492462311556e-05, - "loss": 5.604, - "step": 44994 - }, - { - "epoch": 23.465449804432854, - "grad_norm": 1.5916928052902222, - "learning_rate": 5.5297487437185933e-05, - "loss": 5.2786, - "step": 44995 - }, - { - "epoch": 23.465971316818774, - "grad_norm": 1.626563310623169, - "learning_rate": 5.5296482412060305e-05, - "loss": 5.1868, - "step": 44996 - }, - { - "epoch": 23.466492829204693, - "grad_norm": 1.466921329498291, - "learning_rate": 5.529547738693468e-05, - "loss": 5.3598, - "step": 44997 - }, - { - "epoch": 23.467014341590613, - "grad_norm": 1.5370981693267822, - "learning_rate": 5.529447236180905e-05, - "loss": 5.4535, - "step": 44998 - }, - { - "epoch": 23.467535853976532, - "grad_norm": 1.565901279449463, - "learning_rate": 5.5293467336683424e-05, - "loss": 5.3905, - "step": 44999 - }, - { - "epoch": 23.46805736636245, - "grad_norm": 1.5909582376480103, - "learning_rate": 5.529246231155779e-05, - "loss": 5.1209, - "step": 45000 - }, - { - "epoch": 23.46805736636245, - "eval_loss": 5.347715377807617, - "eval_runtime": 42.6716, - "eval_samples_per_second": 28.731, - "eval_steps_per_second": 3.609, - "step": 45000 - }, - { - "epoch": 23.46857887874837, - "grad_norm": 1.4612352848052979, - "learning_rate": 5.5291457286432167e-05, - "loss": 5.1561, - "step": 45001 - }, - { - "epoch": 23.46910039113429, - "grad_norm": 1.4771522283554077, - "learning_rate": 5.529045226130654e-05, - "loss": 5.3964, - "step": 45002 - }, - { - "epoch": 23.46962190352021, - "grad_norm": 1.610545039176941, - "learning_rate": 5.52894472361809e-05, - "loss": 5.1105, - "step": 45003 - }, - { - "epoch": 23.47014341590613, - "grad_norm": 1.508809208869934, - "learning_rate": 5.528844221105528e-05, - "loss": 5.336, - "step": 45004 - }, - { - "epoch": 23.470664928292045, - "grad_norm": 1.5830525159835815, - "learning_rate": 5.5287437185929644e-05, - "loss": 5.4074, - "step": 45005 - }, - { - "epoch": 23.471186440677965, - "grad_norm": 1.5725648403167725, - "learning_rate": 5.528643216080402e-05, - "loss": 5.3925, - "step": 45006 - }, - { - "epoch": 23.471707953063884, - "grad_norm": 1.500412940979004, - "learning_rate": 5.528542713567839e-05, - "loss": 5.5655, - "step": 45007 - }, - { - "epoch": 23.472229465449804, - "grad_norm": 1.565407156944275, - "learning_rate": 5.528442211055277e-05, - "loss": 5.4518, - "step": 45008 - }, - { - "epoch": 23.472750977835723, - "grad_norm": 1.591524600982666, - "learning_rate": 5.5283417085427135e-05, - "loss": 5.4515, - "step": 45009 - }, - { - "epoch": 23.473272490221643, - "grad_norm": 1.6261792182922363, - "learning_rate": 5.528241206030151e-05, - "loss": 5.3386, - "step": 45010 - }, - { - "epoch": 23.473794002607562, - "grad_norm": 1.4288294315338135, - "learning_rate": 5.5281407035175884e-05, - "loss": 5.7177, - "step": 45011 - }, - { - "epoch": 23.47431551499348, - "grad_norm": 1.6563650369644165, - "learning_rate": 5.528040201005026e-05, - "loss": 5.2406, - "step": 45012 - }, - { - "epoch": 23.4748370273794, - "grad_norm": 1.48397696018219, - "learning_rate": 5.5279396984924626e-05, - "loss": 5.659, - "step": 45013 - }, - { - "epoch": 23.47535853976532, - "grad_norm": 1.5655341148376465, - "learning_rate": 5.5278391959799004e-05, - "loss": 5.0201, - "step": 45014 - }, - { - "epoch": 23.47588005215124, - "grad_norm": 1.4763957262039185, - "learning_rate": 5.527738693467337e-05, - "loss": 5.6983, - "step": 45015 - }, - { - "epoch": 23.47640156453716, - "grad_norm": 1.5843526124954224, - "learning_rate": 5.527638190954774e-05, - "loss": 4.9945, - "step": 45016 - }, - { - "epoch": 23.476923076923075, - "grad_norm": 1.4974619150161743, - "learning_rate": 5.527537688442212e-05, - "loss": 5.3289, - "step": 45017 - }, - { - "epoch": 23.477444589308995, - "grad_norm": 1.598821997642517, - "learning_rate": 5.527437185929648e-05, - "loss": 5.2562, - "step": 45018 - }, - { - "epoch": 23.477966101694914, - "grad_norm": 1.6421034336090088, - "learning_rate": 5.527336683417086e-05, - "loss": 5.0628, - "step": 45019 - }, - { - "epoch": 23.478487614080834, - "grad_norm": 1.3977463245391846, - "learning_rate": 5.527236180904522e-05, - "loss": 4.9353, - "step": 45020 - }, - { - "epoch": 23.479009126466753, - "grad_norm": 1.5751514434814453, - "learning_rate": 5.52713567839196e-05, - "loss": 4.7121, - "step": 45021 - }, - { - "epoch": 23.479530638852673, - "grad_norm": 1.5127168893814087, - "learning_rate": 5.527035175879397e-05, - "loss": 5.3457, - "step": 45022 - }, - { - "epoch": 23.480052151238592, - "grad_norm": 1.47398841381073, - "learning_rate": 5.526934673366835e-05, - "loss": 5.1318, - "step": 45023 - }, - { - "epoch": 23.48057366362451, - "grad_norm": 1.4608360528945923, - "learning_rate": 5.5268341708542714e-05, - "loss": 5.4417, - "step": 45024 - }, - { - "epoch": 23.48109517601043, - "grad_norm": 1.483828067779541, - "learning_rate": 5.526733668341709e-05, - "loss": 5.4418, - "step": 45025 - }, - { - "epoch": 23.48161668839635, - "grad_norm": 1.7154289484024048, - "learning_rate": 5.5266331658291456e-05, - "loss": 5.4141, - "step": 45026 - }, - { - "epoch": 23.48213820078227, - "grad_norm": 1.5350192785263062, - "learning_rate": 5.5265326633165834e-05, - "loss": 4.5566, - "step": 45027 - }, - { - "epoch": 23.48265971316819, - "grad_norm": 1.5952281951904297, - "learning_rate": 5.5264321608040205e-05, - "loss": 5.2085, - "step": 45028 - }, - { - "epoch": 23.483181225554105, - "grad_norm": 1.4713040590286255, - "learning_rate": 5.526331658291457e-05, - "loss": 5.3471, - "step": 45029 - }, - { - "epoch": 23.483702737940025, - "grad_norm": 1.6902929544448853, - "learning_rate": 5.526231155778895e-05, - "loss": 5.2995, - "step": 45030 - }, - { - "epoch": 23.484224250325944, - "grad_norm": 1.4889923334121704, - "learning_rate": 5.526130653266331e-05, - "loss": 5.0452, - "step": 45031 - }, - { - "epoch": 23.484745762711864, - "grad_norm": 1.5931788682937622, - "learning_rate": 5.526030150753769e-05, - "loss": 5.1785, - "step": 45032 - }, - { - "epoch": 23.485267275097783, - "grad_norm": 1.524707555770874, - "learning_rate": 5.525929648241206e-05, - "loss": 5.0066, - "step": 45033 - }, - { - "epoch": 23.485788787483703, - "grad_norm": 1.4234238862991333, - "learning_rate": 5.525829145728644e-05, - "loss": 5.0855, - "step": 45034 - }, - { - "epoch": 23.486310299869622, - "grad_norm": 1.5294870138168335, - "learning_rate": 5.52572864321608e-05, - "loss": 5.1467, - "step": 45035 - }, - { - "epoch": 23.48683181225554, - "grad_norm": 1.5130001306533813, - "learning_rate": 5.525628140703518e-05, - "loss": 4.8101, - "step": 45036 - }, - { - "epoch": 23.48735332464146, - "grad_norm": 1.5002493858337402, - "learning_rate": 5.525527638190955e-05, - "loss": 5.3782, - "step": 45037 - }, - { - "epoch": 23.48787483702738, - "grad_norm": 1.7037707567214966, - "learning_rate": 5.525427135678393e-05, - "loss": 4.7603, - "step": 45038 - }, - { - "epoch": 23.4883963494133, - "grad_norm": 1.521496057510376, - "learning_rate": 5.5253266331658294e-05, - "loss": 5.4269, - "step": 45039 - }, - { - "epoch": 23.48891786179922, - "grad_norm": 1.6229301691055298, - "learning_rate": 5.525226130653267e-05, - "loss": 4.833, - "step": 45040 - }, - { - "epoch": 23.489439374185135, - "grad_norm": 1.658880591392517, - "learning_rate": 5.5251256281407036e-05, - "loss": 5.1882, - "step": 45041 - }, - { - "epoch": 23.489960886571055, - "grad_norm": 1.529630184173584, - "learning_rate": 5.525025125628141e-05, - "loss": 5.1141, - "step": 45042 - }, - { - "epoch": 23.490482398956974, - "grad_norm": 1.714160442352295, - "learning_rate": 5.5249246231155785e-05, - "loss": 4.7222, - "step": 45043 - }, - { - "epoch": 23.491003911342894, - "grad_norm": 1.4787331819534302, - "learning_rate": 5.524824120603015e-05, - "loss": 4.9673, - "step": 45044 - }, - { - "epoch": 23.491525423728813, - "grad_norm": 1.556222915649414, - "learning_rate": 5.524723618090453e-05, - "loss": 5.4708, - "step": 45045 - }, - { - "epoch": 23.492046936114733, - "grad_norm": 1.5868175029754639, - "learning_rate": 5.524623115577889e-05, - "loss": 5.1471, - "step": 45046 - }, - { - "epoch": 23.492568448500652, - "grad_norm": 1.5689142942428589, - "learning_rate": 5.524522613065327e-05, - "loss": 5.2815, - "step": 45047 - }, - { - "epoch": 23.49308996088657, - "grad_norm": 1.530940294265747, - "learning_rate": 5.524422110552764e-05, - "loss": 5.8015, - "step": 45048 - }, - { - "epoch": 23.49361147327249, - "grad_norm": 1.564393162727356, - "learning_rate": 5.524321608040202e-05, - "loss": 5.6875, - "step": 45049 - }, - { - "epoch": 23.49413298565841, - "grad_norm": 1.6032979488372803, - "learning_rate": 5.524221105527638e-05, - "loss": 4.9147, - "step": 45050 - }, - { - "epoch": 23.49465449804433, - "grad_norm": 1.3761777877807617, - "learning_rate": 5.524120603015076e-05, - "loss": 4.7668, - "step": 45051 - }, - { - "epoch": 23.49517601043025, - "grad_norm": 1.7062867879867554, - "learning_rate": 5.5240201005025124e-05, - "loss": 5.1074, - "step": 45052 - }, - { - "epoch": 23.495697522816165, - "grad_norm": 1.5078603029251099, - "learning_rate": 5.52391959798995e-05, - "loss": 5.0677, - "step": 45053 - }, - { - "epoch": 23.496219035202085, - "grad_norm": 1.5129741430282593, - "learning_rate": 5.523819095477387e-05, - "loss": 5.3398, - "step": 45054 - }, - { - "epoch": 23.496740547588004, - "grad_norm": 1.5182323455810547, - "learning_rate": 5.523718592964824e-05, - "loss": 5.4624, - "step": 45055 - }, - { - "epoch": 23.497262059973924, - "grad_norm": 1.4875986576080322, - "learning_rate": 5.5236180904522615e-05, - "loss": 5.1147, - "step": 45056 - }, - { - "epoch": 23.497783572359843, - "grad_norm": 1.7591707706451416, - "learning_rate": 5.5235175879396986e-05, - "loss": 4.9003, - "step": 45057 - }, - { - "epoch": 23.498305084745763, - "grad_norm": 1.5873141288757324, - "learning_rate": 5.5234170854271364e-05, - "loss": 4.5909, - "step": 45058 - }, - { - "epoch": 23.498826597131682, - "grad_norm": 1.6183271408081055, - "learning_rate": 5.523316582914573e-05, - "loss": 5.0348, - "step": 45059 - }, - { - "epoch": 23.4993481095176, - "grad_norm": 1.5475101470947266, - "learning_rate": 5.5232160804020106e-05, - "loss": 5.3695, - "step": 45060 - }, - { - "epoch": 23.49986962190352, - "grad_norm": 1.5247535705566406, - "learning_rate": 5.523115577889447e-05, - "loss": 5.2003, - "step": 45061 - }, - { - "epoch": 23.50039113428944, - "grad_norm": 1.6140753030776978, - "learning_rate": 5.523015075376885e-05, - "loss": 5.2165, - "step": 45062 - }, - { - "epoch": 23.50091264667536, - "grad_norm": 1.5212832689285278, - "learning_rate": 5.522914572864322e-05, - "loss": 5.3545, - "step": 45063 - }, - { - "epoch": 23.50143415906128, - "grad_norm": 1.5814288854599, - "learning_rate": 5.52281407035176e-05, - "loss": 5.6908, - "step": 45064 - }, - { - "epoch": 23.501955671447195, - "grad_norm": 1.5545016527175903, - "learning_rate": 5.522713567839196e-05, - "loss": 5.0501, - "step": 45065 - }, - { - "epoch": 23.502477183833115, - "grad_norm": 1.5836246013641357, - "learning_rate": 5.522613065326634e-05, - "loss": 5.3706, - "step": 45066 - }, - { - "epoch": 23.502998696219034, - "grad_norm": 1.5750166177749634, - "learning_rate": 5.5225125628140703e-05, - "loss": 5.3314, - "step": 45067 - }, - { - "epoch": 23.503520208604954, - "grad_norm": 1.5846303701400757, - "learning_rate": 5.5224120603015074e-05, - "loss": 4.9436, - "step": 45068 - }, - { - "epoch": 23.504041720990873, - "grad_norm": 1.5784651041030884, - "learning_rate": 5.522311557788945e-05, - "loss": 5.355, - "step": 45069 - }, - { - "epoch": 23.504563233376793, - "grad_norm": 1.619275689125061, - "learning_rate": 5.5222110552763817e-05, - "loss": 5.189, - "step": 45070 - }, - { - "epoch": 23.505084745762712, - "grad_norm": 1.5252935886383057, - "learning_rate": 5.5221105527638194e-05, - "loss": 5.2501, - "step": 45071 - }, - { - "epoch": 23.50560625814863, - "grad_norm": 1.4414507150650024, - "learning_rate": 5.522010050251256e-05, - "loss": 5.4781, - "step": 45072 - }, - { - "epoch": 23.50612777053455, - "grad_norm": 1.5977143049240112, - "learning_rate": 5.5219095477386937e-05, - "loss": 5.4054, - "step": 45073 - }, - { - "epoch": 23.50664928292047, - "grad_norm": 1.5444453954696655, - "learning_rate": 5.521809045226131e-05, - "loss": 5.1981, - "step": 45074 - }, - { - "epoch": 23.50717079530639, - "grad_norm": 1.5270720720291138, - "learning_rate": 5.5217085427135685e-05, - "loss": 5.0579, - "step": 45075 - }, - { - "epoch": 23.50769230769231, - "grad_norm": 1.548493504524231, - "learning_rate": 5.521608040201005e-05, - "loss": 5.0191, - "step": 45076 - }, - { - "epoch": 23.508213820078225, - "grad_norm": 1.5735608339309692, - "learning_rate": 5.521507537688443e-05, - "loss": 4.6551, - "step": 45077 - }, - { - "epoch": 23.508735332464145, - "grad_norm": 1.5660817623138428, - "learning_rate": 5.52140703517588e-05, - "loss": 4.7702, - "step": 45078 - }, - { - "epoch": 23.509256844850064, - "grad_norm": 1.5818474292755127, - "learning_rate": 5.5213065326633176e-05, - "loss": 5.3364, - "step": 45079 - }, - { - "epoch": 23.509778357235984, - "grad_norm": 1.510961651802063, - "learning_rate": 5.521206030150754e-05, - "loss": 5.3769, - "step": 45080 - }, - { - "epoch": 23.510299869621903, - "grad_norm": 1.5668984651565552, - "learning_rate": 5.5211055276381905e-05, - "loss": 5.5767, - "step": 45081 - }, - { - "epoch": 23.510821382007823, - "grad_norm": 1.5317301750183105, - "learning_rate": 5.521005025125628e-05, - "loss": 5.0237, - "step": 45082 - }, - { - "epoch": 23.511342894393742, - "grad_norm": 1.5393564701080322, - "learning_rate": 5.5209045226130654e-05, - "loss": 5.335, - "step": 45083 - }, - { - "epoch": 23.51186440677966, - "grad_norm": 1.5804204940795898, - "learning_rate": 5.520804020100503e-05, - "loss": 5.4783, - "step": 45084 - }, - { - "epoch": 23.51238591916558, - "grad_norm": 1.6279453039169312, - "learning_rate": 5.5207035175879396e-05, - "loss": 5.363, - "step": 45085 - }, - { - "epoch": 23.5129074315515, - "grad_norm": 1.6443127393722534, - "learning_rate": 5.5206030150753774e-05, - "loss": 5.3165, - "step": 45086 - }, - { - "epoch": 23.51342894393742, - "grad_norm": 1.684666633605957, - "learning_rate": 5.520502512562814e-05, - "loss": 5.1392, - "step": 45087 - }, - { - "epoch": 23.513950456323336, - "grad_norm": 1.5392242670059204, - "learning_rate": 5.5204020100502516e-05, - "loss": 5.2445, - "step": 45088 - }, - { - "epoch": 23.514471968709255, - "grad_norm": 1.5514860153198242, - "learning_rate": 5.520301507537689e-05, - "loss": 5.5353, - "step": 45089 - }, - { - "epoch": 23.514993481095175, - "grad_norm": 1.5972399711608887, - "learning_rate": 5.5202010050251265e-05, - "loss": 5.3071, - "step": 45090 - }, - { - "epoch": 23.515514993481094, - "grad_norm": 1.5658745765686035, - "learning_rate": 5.520100502512563e-05, - "loss": 5.1803, - "step": 45091 - }, - { - "epoch": 23.516036505867014, - "grad_norm": 1.5933376550674438, - "learning_rate": 5.520000000000001e-05, - "loss": 5.4945, - "step": 45092 - }, - { - "epoch": 23.516558018252933, - "grad_norm": 1.4929301738739014, - "learning_rate": 5.519899497487437e-05, - "loss": 5.3845, - "step": 45093 - }, - { - "epoch": 23.517079530638853, - "grad_norm": 1.5012513399124146, - "learning_rate": 5.519798994974875e-05, - "loss": 5.2765, - "step": 45094 - }, - { - "epoch": 23.517601043024772, - "grad_norm": 1.498672604560852, - "learning_rate": 5.519698492462312e-05, - "loss": 4.9528, - "step": 45095 - }, - { - "epoch": 23.51812255541069, - "grad_norm": 1.7148864269256592, - "learning_rate": 5.5195979899497484e-05, - "loss": 5.6454, - "step": 45096 - }, - { - "epoch": 23.51864406779661, - "grad_norm": 1.685937762260437, - "learning_rate": 5.519497487437186e-05, - "loss": 4.9297, - "step": 45097 - }, - { - "epoch": 23.51916558018253, - "grad_norm": 1.542743444442749, - "learning_rate": 5.519396984924623e-05, - "loss": 5.0165, - "step": 45098 - }, - { - "epoch": 23.51968709256845, - "grad_norm": 1.6755121946334839, - "learning_rate": 5.519296482412061e-05, - "loss": 5.0977, - "step": 45099 - }, - { - "epoch": 23.52020860495437, - "grad_norm": 1.4921413660049438, - "learning_rate": 5.5191959798994975e-05, - "loss": 5.3896, - "step": 45100 - }, - { - "epoch": 23.520730117340285, - "grad_norm": 1.6777423620224, - "learning_rate": 5.519095477386935e-05, - "loss": 5.4064, - "step": 45101 - }, - { - "epoch": 23.521251629726205, - "grad_norm": 1.5083500146865845, - "learning_rate": 5.518994974874372e-05, - "loss": 5.7307, - "step": 45102 - }, - { - "epoch": 23.521773142112124, - "grad_norm": 1.5942606925964355, - "learning_rate": 5.5188944723618095e-05, - "loss": 5.262, - "step": 45103 - }, - { - "epoch": 23.522294654498044, - "grad_norm": 1.5755467414855957, - "learning_rate": 5.5187939698492466e-05, - "loss": 5.2819, - "step": 45104 - }, - { - "epoch": 23.522816166883963, - "grad_norm": 1.5891109704971313, - "learning_rate": 5.5186934673366844e-05, - "loss": 4.9078, - "step": 45105 - }, - { - "epoch": 23.523337679269883, - "grad_norm": 1.529007911682129, - "learning_rate": 5.518592964824121e-05, - "loss": 5.5188, - "step": 45106 - }, - { - "epoch": 23.523859191655802, - "grad_norm": 1.4872251749038696, - "learning_rate": 5.5184924623115586e-05, - "loss": 5.2645, - "step": 45107 - }, - { - "epoch": 23.52438070404172, - "grad_norm": 1.6716161966323853, - "learning_rate": 5.518391959798995e-05, - "loss": 5.3739, - "step": 45108 - }, - { - "epoch": 23.52490221642764, - "grad_norm": 1.503151774406433, - "learning_rate": 5.518291457286432e-05, - "loss": 5.3105, - "step": 45109 - }, - { - "epoch": 23.52542372881356, - "grad_norm": 1.6200631856918335, - "learning_rate": 5.51819095477387e-05, - "loss": 4.7666, - "step": 45110 - }, - { - "epoch": 23.52594524119948, - "grad_norm": 1.6448620557785034, - "learning_rate": 5.5180904522613064e-05, - "loss": 5.4608, - "step": 45111 - }, - { - "epoch": 23.526466753585396, - "grad_norm": 1.600652813911438, - "learning_rate": 5.517989949748744e-05, - "loss": 4.8478, - "step": 45112 - }, - { - "epoch": 23.526988265971315, - "grad_norm": 1.551236867904663, - "learning_rate": 5.5178894472361806e-05, - "loss": 5.4102, - "step": 45113 - }, - { - "epoch": 23.527509778357235, - "grad_norm": 1.4383474588394165, - "learning_rate": 5.5177889447236184e-05, - "loss": 4.9946, - "step": 45114 - }, - { - "epoch": 23.528031290743154, - "grad_norm": 1.4939708709716797, - "learning_rate": 5.5176884422110555e-05, - "loss": 5.5127, - "step": 45115 - }, - { - "epoch": 23.528552803129074, - "grad_norm": 1.6286665201187134, - "learning_rate": 5.517587939698493e-05, - "loss": 4.9197, - "step": 45116 - }, - { - "epoch": 23.529074315514993, - "grad_norm": 1.4824366569519043, - "learning_rate": 5.51748743718593e-05, - "loss": 4.561, - "step": 45117 - }, - { - "epoch": 23.529595827900913, - "grad_norm": 1.524134635925293, - "learning_rate": 5.5173869346733674e-05, - "loss": 5.4218, - "step": 45118 - }, - { - "epoch": 23.530117340286832, - "grad_norm": 1.425180435180664, - "learning_rate": 5.517286432160804e-05, - "loss": 5.5388, - "step": 45119 - }, - { - "epoch": 23.53063885267275, - "grad_norm": 1.4767742156982422, - "learning_rate": 5.5171859296482417e-05, - "loss": 5.2861, - "step": 45120 - }, - { - "epoch": 23.53116036505867, - "grad_norm": 1.5365070104599, - "learning_rate": 5.517085427135679e-05, - "loss": 5.191, - "step": 45121 - }, - { - "epoch": 23.53168187744459, - "grad_norm": 1.491297721862793, - "learning_rate": 5.516984924623115e-05, - "loss": 5.0401, - "step": 45122 - }, - { - "epoch": 23.53220338983051, - "grad_norm": 1.701664686203003, - "learning_rate": 5.516884422110553e-05, - "loss": 4.8309, - "step": 45123 - }, - { - "epoch": 23.532724902216426, - "grad_norm": 1.5275886058807373, - "learning_rate": 5.51678391959799e-05, - "loss": 4.7821, - "step": 45124 - }, - { - "epoch": 23.533246414602345, - "grad_norm": 1.6087068319320679, - "learning_rate": 5.516683417085428e-05, - "loss": 4.9402, - "step": 45125 - }, - { - "epoch": 23.533767926988265, - "grad_norm": 1.6394647359848022, - "learning_rate": 5.516582914572864e-05, - "loss": 5.5365, - "step": 45126 - }, - { - "epoch": 23.534289439374184, - "grad_norm": 1.5398077964782715, - "learning_rate": 5.516482412060302e-05, - "loss": 5.1148, - "step": 45127 - }, - { - "epoch": 23.534810951760104, - "grad_norm": 1.6146302223205566, - "learning_rate": 5.5163819095477385e-05, - "loss": 4.9711, - "step": 45128 - }, - { - "epoch": 23.535332464146023, - "grad_norm": 1.5116279125213623, - "learning_rate": 5.516281407035176e-05, - "loss": 5.5458, - "step": 45129 - }, - { - "epoch": 23.535853976531943, - "grad_norm": 1.5122876167297363, - "learning_rate": 5.5161809045226134e-05, - "loss": 5.4928, - "step": 45130 - }, - { - "epoch": 23.536375488917862, - "grad_norm": 1.5672675371170044, - "learning_rate": 5.516080402010051e-05, - "loss": 5.2158, - "step": 45131 - }, - { - "epoch": 23.53689700130378, - "grad_norm": 1.6747959852218628, - "learning_rate": 5.5159798994974876e-05, - "loss": 4.941, - "step": 45132 - }, - { - "epoch": 23.5374185136897, - "grad_norm": 1.7433459758758545, - "learning_rate": 5.5158793969849254e-05, - "loss": 5.0159, - "step": 45133 - }, - { - "epoch": 23.53794002607562, - "grad_norm": 1.521287202835083, - "learning_rate": 5.515778894472362e-05, - "loss": 4.9901, - "step": 45134 - }, - { - "epoch": 23.53846153846154, - "grad_norm": 1.5506750345230103, - "learning_rate": 5.515678391959799e-05, - "loss": 5.0103, - "step": 45135 - }, - { - "epoch": 23.538983050847456, - "grad_norm": 1.6222010850906372, - "learning_rate": 5.515577889447237e-05, - "loss": 5.2147, - "step": 45136 - }, - { - "epoch": 23.539504563233375, - "grad_norm": 1.597408413887024, - "learning_rate": 5.515477386934673e-05, - "loss": 5.2873, - "step": 45137 - }, - { - "epoch": 23.540026075619295, - "grad_norm": 1.4901565313339233, - "learning_rate": 5.515376884422111e-05, - "loss": 5.3233, - "step": 45138 - }, - { - "epoch": 23.540547588005214, - "grad_norm": 1.472495675086975, - "learning_rate": 5.515276381909547e-05, - "loss": 5.1347, - "step": 45139 - }, - { - "epoch": 23.541069100391134, - "grad_norm": 1.5472133159637451, - "learning_rate": 5.515175879396985e-05, - "loss": 5.4053, - "step": 45140 - }, - { - "epoch": 23.541590612777053, - "grad_norm": 1.3495067358016968, - "learning_rate": 5.515075376884422e-05, - "loss": 5.862, - "step": 45141 - }, - { - "epoch": 23.542112125162973, - "grad_norm": 1.5349332094192505, - "learning_rate": 5.51497487437186e-05, - "loss": 4.9914, - "step": 45142 - }, - { - "epoch": 23.542633637548892, - "grad_norm": 1.5606087446212769, - "learning_rate": 5.5148743718592964e-05, - "loss": 4.4232, - "step": 45143 - }, - { - "epoch": 23.54315514993481, - "grad_norm": 1.6113998889923096, - "learning_rate": 5.514773869346734e-05, - "loss": 4.8711, - "step": 45144 - }, - { - "epoch": 23.54367666232073, - "grad_norm": 1.5672622919082642, - "learning_rate": 5.514673366834171e-05, - "loss": 5.2802, - "step": 45145 - }, - { - "epoch": 23.54419817470665, - "grad_norm": 1.5892664194107056, - "learning_rate": 5.514572864321609e-05, - "loss": 4.989, - "step": 45146 - }, - { - "epoch": 23.54471968709257, - "grad_norm": 1.6486588716506958, - "learning_rate": 5.5144723618090455e-05, - "loss": 5.1183, - "step": 45147 - }, - { - "epoch": 23.545241199478486, - "grad_norm": 1.488864541053772, - "learning_rate": 5.514371859296482e-05, - "loss": 5.4434, - "step": 45148 - }, - { - "epoch": 23.545762711864406, - "grad_norm": 1.4675776958465576, - "learning_rate": 5.51427135678392e-05, - "loss": 5.5661, - "step": 45149 - }, - { - "epoch": 23.546284224250325, - "grad_norm": 1.5645157098770142, - "learning_rate": 5.514170854271357e-05, - "loss": 5.0815, - "step": 45150 - }, - { - "epoch": 23.546805736636244, - "grad_norm": 1.6107739210128784, - "learning_rate": 5.5140703517587946e-05, - "loss": 5.3177, - "step": 45151 - }, - { - "epoch": 23.547327249022164, - "grad_norm": 1.6017814874649048, - "learning_rate": 5.513969849246231e-05, - "loss": 5.4182, - "step": 45152 - }, - { - "epoch": 23.547848761408083, - "grad_norm": 1.5589812994003296, - "learning_rate": 5.513869346733669e-05, - "loss": 5.1569, - "step": 45153 - }, - { - "epoch": 23.548370273794003, - "grad_norm": 1.6451574563980103, - "learning_rate": 5.513768844221105e-05, - "loss": 4.7715, - "step": 45154 - }, - { - "epoch": 23.548891786179922, - "grad_norm": 1.6286566257476807, - "learning_rate": 5.513668341708543e-05, - "loss": 5.1163, - "step": 45155 - }, - { - "epoch": 23.54941329856584, - "grad_norm": 1.5601731538772583, - "learning_rate": 5.51356783919598e-05, - "loss": 5.158, - "step": 45156 - }, - { - "epoch": 23.54993481095176, - "grad_norm": 1.6185295581817627, - "learning_rate": 5.513467336683418e-05, - "loss": 4.8593, - "step": 45157 - }, - { - "epoch": 23.55045632333768, - "grad_norm": 1.5209765434265137, - "learning_rate": 5.5133668341708544e-05, - "loss": 5.1588, - "step": 45158 - }, - { - "epoch": 23.5509778357236, - "grad_norm": 1.5844625234603882, - "learning_rate": 5.513266331658292e-05, - "loss": 4.7454, - "step": 45159 - }, - { - "epoch": 23.551499348109516, - "grad_norm": 1.56330144405365, - "learning_rate": 5.5131658291457286e-05, - "loss": 5.3782, - "step": 45160 - }, - { - "epoch": 23.552020860495436, - "grad_norm": 1.5634639263153076, - "learning_rate": 5.513065326633166e-05, - "loss": 5.2309, - "step": 45161 - }, - { - "epoch": 23.552542372881355, - "grad_norm": 1.5007258653640747, - "learning_rate": 5.5129648241206035e-05, - "loss": 4.598, - "step": 45162 - }, - { - "epoch": 23.553063885267274, - "grad_norm": 1.4781451225280762, - "learning_rate": 5.51286432160804e-05, - "loss": 5.658, - "step": 45163 - }, - { - "epoch": 23.553585397653194, - "grad_norm": 1.5620771646499634, - "learning_rate": 5.512763819095478e-05, - "loss": 5.4203, - "step": 45164 - }, - { - "epoch": 23.554106910039113, - "grad_norm": 1.6020227670669556, - "learning_rate": 5.512663316582915e-05, - "loss": 5.4699, - "step": 45165 - }, - { - "epoch": 23.554628422425033, - "grad_norm": 1.7678658962249756, - "learning_rate": 5.5125628140703526e-05, - "loss": 4.9287, - "step": 45166 - }, - { - "epoch": 23.555149934810952, - "grad_norm": 1.5473146438598633, - "learning_rate": 5.512462311557789e-05, - "loss": 5.2885, - "step": 45167 - }, - { - "epoch": 23.555671447196872, - "grad_norm": 1.6008638143539429, - "learning_rate": 5.512361809045227e-05, - "loss": 5.3295, - "step": 45168 - }, - { - "epoch": 23.55619295958279, - "grad_norm": 1.7466163635253906, - "learning_rate": 5.512261306532663e-05, - "loss": 4.7897, - "step": 45169 - }, - { - "epoch": 23.55671447196871, - "grad_norm": 1.585847020149231, - "learning_rate": 5.512160804020101e-05, - "loss": 5.5803, - "step": 45170 - }, - { - "epoch": 23.557235984354627, - "grad_norm": 1.5720826387405396, - "learning_rate": 5.512060301507538e-05, - "loss": 5.1498, - "step": 45171 - }, - { - "epoch": 23.557757496740546, - "grad_norm": 1.5221236944198608, - "learning_rate": 5.511959798994976e-05, - "loss": 5.3903, - "step": 45172 - }, - { - "epoch": 23.558279009126466, - "grad_norm": 1.53538978099823, - "learning_rate": 5.511859296482412e-05, - "loss": 5.5028, - "step": 45173 - }, - { - "epoch": 23.558800521512385, - "grad_norm": 1.60988187789917, - "learning_rate": 5.51175879396985e-05, - "loss": 4.1883, - "step": 45174 - }, - { - "epoch": 23.559322033898304, - "grad_norm": 1.6525676250457764, - "learning_rate": 5.5116582914572865e-05, - "loss": 4.9079, - "step": 45175 - }, - { - "epoch": 23.559843546284224, - "grad_norm": 1.5581003427505493, - "learning_rate": 5.5115577889447236e-05, - "loss": 5.1303, - "step": 45176 - }, - { - "epoch": 23.560365058670143, - "grad_norm": 1.4955047369003296, - "learning_rate": 5.5114572864321614e-05, - "loss": 5.5612, - "step": 45177 - }, - { - "epoch": 23.560886571056063, - "grad_norm": 1.4893156290054321, - "learning_rate": 5.511356783919598e-05, - "loss": 4.9962, - "step": 45178 - }, - { - "epoch": 23.561408083441982, - "grad_norm": 1.5251617431640625, - "learning_rate": 5.5112562814070356e-05, - "loss": 4.8926, - "step": 45179 - }, - { - "epoch": 23.561929595827902, - "grad_norm": 1.574378252029419, - "learning_rate": 5.511155778894472e-05, - "loss": 5.4265, - "step": 45180 - }, - { - "epoch": 23.56245110821382, - "grad_norm": 1.6475176811218262, - "learning_rate": 5.51105527638191e-05, - "loss": 4.9328, - "step": 45181 - }, - { - "epoch": 23.56297262059974, - "grad_norm": 1.612644076347351, - "learning_rate": 5.510954773869347e-05, - "loss": 5.1772, - "step": 45182 - }, - { - "epoch": 23.56349413298566, - "grad_norm": 1.5410295724868774, - "learning_rate": 5.510854271356785e-05, - "loss": 5.4076, - "step": 45183 - }, - { - "epoch": 23.564015645371576, - "grad_norm": 1.5774060487747192, - "learning_rate": 5.510753768844221e-05, - "loss": 5.2228, - "step": 45184 - }, - { - "epoch": 23.564537157757496, - "grad_norm": 1.6848657131195068, - "learning_rate": 5.510653266331659e-05, - "loss": 4.6848, - "step": 45185 - }, - { - "epoch": 23.565058670143415, - "grad_norm": 1.668009638786316, - "learning_rate": 5.510552763819096e-05, - "loss": 4.8957, - "step": 45186 - }, - { - "epoch": 23.565580182529335, - "grad_norm": 1.5051920413970947, - "learning_rate": 5.510452261306534e-05, - "loss": 5.4218, - "step": 45187 - }, - { - "epoch": 23.566101694915254, - "grad_norm": 1.5622782707214355, - "learning_rate": 5.51035175879397e-05, - "loss": 5.2783, - "step": 45188 - }, - { - "epoch": 23.566623207301173, - "grad_norm": 1.5067251920700073, - "learning_rate": 5.5102512562814067e-05, - "loss": 5.3043, - "step": 45189 - }, - { - "epoch": 23.567144719687093, - "grad_norm": 1.4599759578704834, - "learning_rate": 5.5101507537688444e-05, - "loss": 5.302, - "step": 45190 - }, - { - "epoch": 23.567666232073012, - "grad_norm": 1.6299688816070557, - "learning_rate": 5.5100502512562815e-05, - "loss": 5.2553, - "step": 45191 - }, - { - "epoch": 23.568187744458932, - "grad_norm": 1.6544501781463623, - "learning_rate": 5.509949748743719e-05, - "loss": 4.8659, - "step": 45192 - }, - { - "epoch": 23.56870925684485, - "grad_norm": 1.585411548614502, - "learning_rate": 5.509849246231156e-05, - "loss": 5.0501, - "step": 45193 - }, - { - "epoch": 23.56923076923077, - "grad_norm": 1.687155842781067, - "learning_rate": 5.5097487437185935e-05, - "loss": 5.2617, - "step": 45194 - }, - { - "epoch": 23.569752281616687, - "grad_norm": 1.5199084281921387, - "learning_rate": 5.50964824120603e-05, - "loss": 5.4283, - "step": 45195 - }, - { - "epoch": 23.570273794002606, - "grad_norm": 1.6087290048599243, - "learning_rate": 5.509547738693468e-05, - "loss": 5.3532, - "step": 45196 - }, - { - "epoch": 23.570795306388526, - "grad_norm": 1.5891603231430054, - "learning_rate": 5.509447236180905e-05, - "loss": 4.9642, - "step": 45197 - }, - { - "epoch": 23.571316818774445, - "grad_norm": 1.5311102867126465, - "learning_rate": 5.5093467336683426e-05, - "loss": 5.4067, - "step": 45198 - }, - { - "epoch": 23.571838331160365, - "grad_norm": 1.4934756755828857, - "learning_rate": 5.509246231155779e-05, - "loss": 5.3914, - "step": 45199 - }, - { - "epoch": 23.572359843546284, - "grad_norm": 1.5320544242858887, - "learning_rate": 5.509145728643217e-05, - "loss": 5.5151, - "step": 45200 - }, - { - "epoch": 23.572881355932203, - "grad_norm": 1.7711284160614014, - "learning_rate": 5.509045226130653e-05, - "loss": 4.9506, - "step": 45201 - }, - { - "epoch": 23.573402868318123, - "grad_norm": 1.6130386590957642, - "learning_rate": 5.5089447236180904e-05, - "loss": 4.8548, - "step": 45202 - }, - { - "epoch": 23.573924380704042, - "grad_norm": 1.5604774951934814, - "learning_rate": 5.508844221105528e-05, - "loss": 5.5504, - "step": 45203 - }, - { - "epoch": 23.574445893089962, - "grad_norm": 1.4515950679779053, - "learning_rate": 5.5087437185929646e-05, - "loss": 5.43, - "step": 45204 - }, - { - "epoch": 23.57496740547588, - "grad_norm": 1.5292539596557617, - "learning_rate": 5.5086432160804024e-05, - "loss": 5.2935, - "step": 45205 - }, - { - "epoch": 23.5754889178618, - "grad_norm": 1.5900356769561768, - "learning_rate": 5.508542713567839e-05, - "loss": 5.4402, - "step": 45206 - }, - { - "epoch": 23.576010430247717, - "grad_norm": 1.5139657258987427, - "learning_rate": 5.5084422110552766e-05, - "loss": 4.8596, - "step": 45207 - }, - { - "epoch": 23.576531942633636, - "grad_norm": 1.4952867031097412, - "learning_rate": 5.508341708542714e-05, - "loss": 5.3075, - "step": 45208 - }, - { - "epoch": 23.577053455019556, - "grad_norm": 1.5425843000411987, - "learning_rate": 5.5082412060301515e-05, - "loss": 5.5149, - "step": 45209 - }, - { - "epoch": 23.577574967405475, - "grad_norm": 1.5261116027832031, - "learning_rate": 5.508140703517588e-05, - "loss": 5.3478, - "step": 45210 - }, - { - "epoch": 23.578096479791395, - "grad_norm": 1.403017520904541, - "learning_rate": 5.508040201005026e-05, - "loss": 4.1932, - "step": 45211 - }, - { - "epoch": 23.578617992177314, - "grad_norm": 1.5294585227966309, - "learning_rate": 5.507939698492463e-05, - "loss": 5.5274, - "step": 45212 - }, - { - "epoch": 23.579139504563233, - "grad_norm": 1.707960844039917, - "learning_rate": 5.5078391959799006e-05, - "loss": 5.0986, - "step": 45213 - }, - { - "epoch": 23.579661016949153, - "grad_norm": 1.4405442476272583, - "learning_rate": 5.507738693467337e-05, - "loss": 5.4452, - "step": 45214 - }, - { - "epoch": 23.580182529335072, - "grad_norm": 1.6236084699630737, - "learning_rate": 5.5076381909547734e-05, - "loss": 5.2728, - "step": 45215 - }, - { - "epoch": 23.580704041720992, - "grad_norm": 1.5963010787963867, - "learning_rate": 5.507537688442211e-05, - "loss": 5.5195, - "step": 45216 - }, - { - "epoch": 23.58122555410691, - "grad_norm": 1.5697970390319824, - "learning_rate": 5.507437185929648e-05, - "loss": 5.6127, - "step": 45217 - }, - { - "epoch": 23.58174706649283, - "grad_norm": 1.5806407928466797, - "learning_rate": 5.507336683417086e-05, - "loss": 5.1282, - "step": 45218 - }, - { - "epoch": 23.582268578878747, - "grad_norm": 1.4499099254608154, - "learning_rate": 5.5072361809045225e-05, - "loss": 5.722, - "step": 45219 - }, - { - "epoch": 23.582790091264666, - "grad_norm": 1.5740374326705933, - "learning_rate": 5.50713567839196e-05, - "loss": 5.1852, - "step": 45220 - }, - { - "epoch": 23.583311603650586, - "grad_norm": 1.7124232053756714, - "learning_rate": 5.507035175879397e-05, - "loss": 5.2668, - "step": 45221 - }, - { - "epoch": 23.583833116036505, - "grad_norm": 1.5908503532409668, - "learning_rate": 5.5069346733668345e-05, - "loss": 5.0748, - "step": 45222 - }, - { - "epoch": 23.584354628422425, - "grad_norm": 1.5473947525024414, - "learning_rate": 5.5068341708542716e-05, - "loss": 5.4818, - "step": 45223 - }, - { - "epoch": 23.584876140808344, - "grad_norm": 1.558558702468872, - "learning_rate": 5.5067336683417094e-05, - "loss": 5.2866, - "step": 45224 - }, - { - "epoch": 23.585397653194264, - "grad_norm": 1.521040678024292, - "learning_rate": 5.506633165829146e-05, - "loss": 5.6871, - "step": 45225 - }, - { - "epoch": 23.585919165580183, - "grad_norm": 1.5595734119415283, - "learning_rate": 5.5065326633165836e-05, - "loss": 5.4927, - "step": 45226 - }, - { - "epoch": 23.586440677966102, - "grad_norm": 1.522033929824829, - "learning_rate": 5.50643216080402e-05, - "loss": 4.9662, - "step": 45227 - }, - { - "epoch": 23.586962190352022, - "grad_norm": 1.548729419708252, - "learning_rate": 5.506331658291457e-05, - "loss": 5.2696, - "step": 45228 - }, - { - "epoch": 23.58748370273794, - "grad_norm": 1.5914177894592285, - "learning_rate": 5.506231155778895e-05, - "loss": 4.8992, - "step": 45229 - }, - { - "epoch": 23.58800521512386, - "grad_norm": 1.6128554344177246, - "learning_rate": 5.5061306532663314e-05, - "loss": 5.3303, - "step": 45230 - }, - { - "epoch": 23.588526727509777, - "grad_norm": 1.4604445695877075, - "learning_rate": 5.506030150753769e-05, - "loss": 5.7304, - "step": 45231 - }, - { - "epoch": 23.589048239895696, - "grad_norm": 1.4736802577972412, - "learning_rate": 5.505929648241206e-05, - "loss": 5.51, - "step": 45232 - }, - { - "epoch": 23.589569752281616, - "grad_norm": 1.5698902606964111, - "learning_rate": 5.505829145728644e-05, - "loss": 5.2221, - "step": 45233 - }, - { - "epoch": 23.590091264667535, - "grad_norm": 1.5999317169189453, - "learning_rate": 5.5057286432160805e-05, - "loss": 5.3865, - "step": 45234 - }, - { - "epoch": 23.590612777053455, - "grad_norm": 1.528164029121399, - "learning_rate": 5.505628140703518e-05, - "loss": 5.2398, - "step": 45235 - }, - { - "epoch": 23.591134289439374, - "grad_norm": 1.5013551712036133, - "learning_rate": 5.505527638190955e-05, - "loss": 5.7581, - "step": 45236 - }, - { - "epoch": 23.591655801825294, - "grad_norm": 1.5219188928604126, - "learning_rate": 5.5054271356783925e-05, - "loss": 4.9211, - "step": 45237 - }, - { - "epoch": 23.592177314211213, - "grad_norm": 1.4756724834442139, - "learning_rate": 5.5053266331658296e-05, - "loss": 5.5095, - "step": 45238 - }, - { - "epoch": 23.592698826597132, - "grad_norm": 1.670495867729187, - "learning_rate": 5.5052261306532673e-05, - "loss": 5.1948, - "step": 45239 - }, - { - "epoch": 23.593220338983052, - "grad_norm": 1.6014080047607422, - "learning_rate": 5.505125628140704e-05, - "loss": 4.8507, - "step": 45240 - }, - { - "epoch": 23.59374185136897, - "grad_norm": 1.5810050964355469, - "learning_rate": 5.50502512562814e-05, - "loss": 5.1633, - "step": 45241 - }, - { - "epoch": 23.59426336375489, - "grad_norm": 1.573981523513794, - "learning_rate": 5.504924623115578e-05, - "loss": 5.4471, - "step": 45242 - }, - { - "epoch": 23.594784876140807, - "grad_norm": 1.6828585863113403, - "learning_rate": 5.504824120603015e-05, - "loss": 5.1566, - "step": 45243 - }, - { - "epoch": 23.595306388526726, - "grad_norm": 1.6379529237747192, - "learning_rate": 5.504723618090453e-05, - "loss": 4.7593, - "step": 45244 - }, - { - "epoch": 23.595827900912646, - "grad_norm": 1.6080615520477295, - "learning_rate": 5.504623115577889e-05, - "loss": 5.4472, - "step": 45245 - }, - { - "epoch": 23.596349413298565, - "grad_norm": 1.5610448122024536, - "learning_rate": 5.504522613065327e-05, - "loss": 5.5119, - "step": 45246 - }, - { - "epoch": 23.596870925684485, - "grad_norm": 1.6764482259750366, - "learning_rate": 5.5044221105527635e-05, - "loss": 4.5919, - "step": 45247 - }, - { - "epoch": 23.597392438070404, - "grad_norm": 1.6057460308074951, - "learning_rate": 5.504321608040201e-05, - "loss": 5.1253, - "step": 45248 - }, - { - "epoch": 23.597913950456324, - "grad_norm": 1.4662604331970215, - "learning_rate": 5.5042211055276384e-05, - "loss": 4.9188, - "step": 45249 - }, - { - "epoch": 23.598435462842243, - "grad_norm": 1.4419028759002686, - "learning_rate": 5.504120603015076e-05, - "loss": 5.2653, - "step": 45250 - }, - { - "epoch": 23.598956975228162, - "grad_norm": 1.4595078229904175, - "learning_rate": 5.5040201005025126e-05, - "loss": 5.5243, - "step": 45251 - }, - { - "epoch": 23.599478487614082, - "grad_norm": 1.6463961601257324, - "learning_rate": 5.5039195979899504e-05, - "loss": 4.919, - "step": 45252 - }, - { - "epoch": 23.6, - "grad_norm": 1.5563658475875854, - "learning_rate": 5.5038190954773875e-05, - "loss": 5.0708, - "step": 45253 - }, - { - "epoch": 23.60052151238592, - "grad_norm": 1.5095690488815308, - "learning_rate": 5.503718592964824e-05, - "loss": 4.7389, - "step": 45254 - }, - { - "epoch": 23.601043024771837, - "grad_norm": 1.6362158060073853, - "learning_rate": 5.503618090452262e-05, - "loss": 4.465, - "step": 45255 - }, - { - "epoch": 23.601564537157756, - "grad_norm": 1.6024861335754395, - "learning_rate": 5.503517587939698e-05, - "loss": 4.7564, - "step": 45256 - }, - { - "epoch": 23.602086049543676, - "grad_norm": 1.6377919912338257, - "learning_rate": 5.503417085427136e-05, - "loss": 4.8566, - "step": 45257 - }, - { - "epoch": 23.602607561929595, - "grad_norm": 1.4983742237091064, - "learning_rate": 5.503316582914573e-05, - "loss": 5.1027, - "step": 45258 - }, - { - "epoch": 23.603129074315515, - "grad_norm": 1.578943133354187, - "learning_rate": 5.503216080402011e-05, - "loss": 5.241, - "step": 45259 - }, - { - "epoch": 23.603650586701434, - "grad_norm": 1.545652151107788, - "learning_rate": 5.503115577889447e-05, - "loss": 5.4391, - "step": 45260 - }, - { - "epoch": 23.604172099087354, - "grad_norm": 1.4662399291992188, - "learning_rate": 5.503015075376885e-05, - "loss": 5.0974, - "step": 45261 - }, - { - "epoch": 23.604693611473273, - "grad_norm": 1.5108139514923096, - "learning_rate": 5.5029145728643214e-05, - "loss": 5.7098, - "step": 45262 - }, - { - "epoch": 23.605215123859193, - "grad_norm": 1.4686954021453857, - "learning_rate": 5.502814070351759e-05, - "loss": 5.3942, - "step": 45263 - }, - { - "epoch": 23.605736636245112, - "grad_norm": 1.624500036239624, - "learning_rate": 5.502713567839196e-05, - "loss": 4.8201, - "step": 45264 - }, - { - "epoch": 23.60625814863103, - "grad_norm": 1.5154461860656738, - "learning_rate": 5.502613065326634e-05, - "loss": 5.428, - "step": 45265 - }, - { - "epoch": 23.60677966101695, - "grad_norm": 1.572263479232788, - "learning_rate": 5.5025125628140705e-05, - "loss": 5.1764, - "step": 45266 - }, - { - "epoch": 23.607301173402867, - "grad_norm": 1.5162533521652222, - "learning_rate": 5.502412060301508e-05, - "loss": 4.6596, - "step": 45267 - }, - { - "epoch": 23.607822685788786, - "grad_norm": 1.5757859945297241, - "learning_rate": 5.502311557788945e-05, - "loss": 5.2003, - "step": 45268 - }, - { - "epoch": 23.608344198174706, - "grad_norm": 1.7127867937088013, - "learning_rate": 5.502211055276382e-05, - "loss": 5.3312, - "step": 45269 - }, - { - "epoch": 23.608865710560625, - "grad_norm": 1.5678132772445679, - "learning_rate": 5.5021105527638196e-05, - "loss": 5.4483, - "step": 45270 - }, - { - "epoch": 23.609387222946545, - "grad_norm": 1.6114519834518433, - "learning_rate": 5.502010050251256e-05, - "loss": 5.6719, - "step": 45271 - }, - { - "epoch": 23.609908735332464, - "grad_norm": 1.5737437009811401, - "learning_rate": 5.501909547738694e-05, - "loss": 5.0614, - "step": 45272 - }, - { - "epoch": 23.610430247718384, - "grad_norm": 1.5928925275802612, - "learning_rate": 5.501809045226131e-05, - "loss": 5.3108, - "step": 45273 - }, - { - "epoch": 23.610951760104303, - "grad_norm": 1.560725212097168, - "learning_rate": 5.501708542713569e-05, - "loss": 5.5557, - "step": 45274 - }, - { - "epoch": 23.611473272490223, - "grad_norm": 1.6528434753417969, - "learning_rate": 5.501608040201005e-05, - "loss": 5.1965, - "step": 45275 - }, - { - "epoch": 23.611994784876142, - "grad_norm": 1.6121461391448975, - "learning_rate": 5.501507537688443e-05, - "loss": 5.1921, - "step": 45276 - }, - { - "epoch": 23.61251629726206, - "grad_norm": 1.55232572555542, - "learning_rate": 5.5014070351758794e-05, - "loss": 5.2445, - "step": 45277 - }, - { - "epoch": 23.613037809647977, - "grad_norm": 1.5917562246322632, - "learning_rate": 5.501306532663317e-05, - "loss": 5.3772, - "step": 45278 - }, - { - "epoch": 23.613559322033897, - "grad_norm": 1.5021917819976807, - "learning_rate": 5.501206030150754e-05, - "loss": 4.7516, - "step": 45279 - }, - { - "epoch": 23.614080834419816, - "grad_norm": 1.5773768424987793, - "learning_rate": 5.501105527638192e-05, - "loss": 5.1406, - "step": 45280 - }, - { - "epoch": 23.614602346805736, - "grad_norm": 1.5924891233444214, - "learning_rate": 5.5010050251256285e-05, - "loss": 5.1268, - "step": 45281 - }, - { - "epoch": 23.615123859191655, - "grad_norm": 1.5497431755065918, - "learning_rate": 5.500904522613065e-05, - "loss": 5.1177, - "step": 45282 - }, - { - "epoch": 23.615645371577575, - "grad_norm": 1.475248098373413, - "learning_rate": 5.500804020100503e-05, - "loss": 4.9263, - "step": 45283 - }, - { - "epoch": 23.616166883963494, - "grad_norm": 1.4931033849716187, - "learning_rate": 5.50070351758794e-05, - "loss": 5.0478, - "step": 45284 - }, - { - "epoch": 23.616688396349414, - "grad_norm": 1.5530096292495728, - "learning_rate": 5.5006030150753776e-05, - "loss": 4.8942, - "step": 45285 - }, - { - "epoch": 23.617209908735333, - "grad_norm": 1.5607755184173584, - "learning_rate": 5.500502512562814e-05, - "loss": 5.4289, - "step": 45286 - }, - { - "epoch": 23.617731421121253, - "grad_norm": 1.6028773784637451, - "learning_rate": 5.500402010050252e-05, - "loss": 5.3057, - "step": 45287 - }, - { - "epoch": 23.618252933507172, - "grad_norm": 1.6737390756607056, - "learning_rate": 5.500301507537688e-05, - "loss": 5.149, - "step": 45288 - }, - { - "epoch": 23.61877444589309, - "grad_norm": 1.4922083616256714, - "learning_rate": 5.500201005025126e-05, - "loss": 5.5795, - "step": 45289 - }, - { - "epoch": 23.619295958279007, - "grad_norm": 1.7001910209655762, - "learning_rate": 5.500100502512563e-05, - "loss": 5.0932, - "step": 45290 - }, - { - "epoch": 23.619817470664927, - "grad_norm": 1.5723458528518677, - "learning_rate": 5.500000000000001e-05, - "loss": 5.5213, - "step": 45291 - }, - { - "epoch": 23.620338983050846, - "grad_norm": 1.5070350170135498, - "learning_rate": 5.499899497487437e-05, - "loss": 4.6884, - "step": 45292 - }, - { - "epoch": 23.620860495436766, - "grad_norm": 1.519818663597107, - "learning_rate": 5.499798994974875e-05, - "loss": 5.2898, - "step": 45293 - }, - { - "epoch": 23.621382007822685, - "grad_norm": 1.549261212348938, - "learning_rate": 5.4996984924623115e-05, - "loss": 5.2018, - "step": 45294 - }, - { - "epoch": 23.621903520208605, - "grad_norm": 1.4388186931610107, - "learning_rate": 5.4995979899497486e-05, - "loss": 5.5087, - "step": 45295 - }, - { - "epoch": 23.622425032594524, - "grad_norm": 1.5441579818725586, - "learning_rate": 5.4994974874371864e-05, - "loss": 5.6334, - "step": 45296 - }, - { - "epoch": 23.622946544980444, - "grad_norm": 1.3835536241531372, - "learning_rate": 5.499396984924623e-05, - "loss": 5.3155, - "step": 45297 - }, - { - "epoch": 23.623468057366363, - "grad_norm": 1.5017846822738647, - "learning_rate": 5.4992964824120606e-05, - "loss": 5.3502, - "step": 45298 - }, - { - "epoch": 23.623989569752283, - "grad_norm": 1.60231614112854, - "learning_rate": 5.499195979899498e-05, - "loss": 4.7698, - "step": 45299 - }, - { - "epoch": 23.624511082138202, - "grad_norm": 1.6030080318450928, - "learning_rate": 5.4990954773869355e-05, - "loss": 4.6274, - "step": 45300 - }, - { - "epoch": 23.62503259452412, - "grad_norm": 1.4322714805603027, - "learning_rate": 5.498994974874372e-05, - "loss": 5.4673, - "step": 45301 - }, - { - "epoch": 23.625554106910037, - "grad_norm": 1.6278003454208374, - "learning_rate": 5.49889447236181e-05, - "loss": 5.3381, - "step": 45302 - }, - { - "epoch": 23.626075619295957, - "grad_norm": 1.6379565000534058, - "learning_rate": 5.498793969849246e-05, - "loss": 5.3837, - "step": 45303 - }, - { - "epoch": 23.626597131681876, - "grad_norm": 1.5679290294647217, - "learning_rate": 5.498693467336684e-05, - "loss": 5.177, - "step": 45304 - }, - { - "epoch": 23.627118644067796, - "grad_norm": 1.6396254301071167, - "learning_rate": 5.498592964824121e-05, - "loss": 4.9183, - "step": 45305 - }, - { - "epoch": 23.627640156453715, - "grad_norm": 1.5953056812286377, - "learning_rate": 5.498492462311559e-05, - "loss": 4.9769, - "step": 45306 - }, - { - "epoch": 23.628161668839635, - "grad_norm": 1.5951595306396484, - "learning_rate": 5.498391959798995e-05, - "loss": 5.0724, - "step": 45307 - }, - { - "epoch": 23.628683181225554, - "grad_norm": 1.5496015548706055, - "learning_rate": 5.498291457286432e-05, - "loss": 5.2458, - "step": 45308 - }, - { - "epoch": 23.629204693611474, - "grad_norm": 1.6966696977615356, - "learning_rate": 5.4981909547738694e-05, - "loss": 4.9441, - "step": 45309 - }, - { - "epoch": 23.629726205997393, - "grad_norm": 1.5768994092941284, - "learning_rate": 5.4980904522613065e-05, - "loss": 5.39, - "step": 45310 - }, - { - "epoch": 23.630247718383313, - "grad_norm": 1.5399878025054932, - "learning_rate": 5.497989949748744e-05, - "loss": 5.3583, - "step": 45311 - }, - { - "epoch": 23.630769230769232, - "grad_norm": 1.5750014781951904, - "learning_rate": 5.497889447236181e-05, - "loss": 5.2349, - "step": 45312 - }, - { - "epoch": 23.63129074315515, - "grad_norm": 1.5129845142364502, - "learning_rate": 5.4977889447236185e-05, - "loss": 5.4352, - "step": 45313 - }, - { - "epoch": 23.631812255541067, - "grad_norm": 1.4325238466262817, - "learning_rate": 5.497688442211055e-05, - "loss": 5.6206, - "step": 45314 - }, - { - "epoch": 23.632333767926987, - "grad_norm": 1.631861686706543, - "learning_rate": 5.497587939698493e-05, - "loss": 4.8691, - "step": 45315 - }, - { - "epoch": 23.632855280312906, - "grad_norm": 1.5593675374984741, - "learning_rate": 5.49748743718593e-05, - "loss": 4.968, - "step": 45316 - }, - { - "epoch": 23.633376792698826, - "grad_norm": 1.6038849353790283, - "learning_rate": 5.4973869346733676e-05, - "loss": 4.9043, - "step": 45317 - }, - { - "epoch": 23.633898305084745, - "grad_norm": 1.5373280048370361, - "learning_rate": 5.497286432160804e-05, - "loss": 4.8561, - "step": 45318 - }, - { - "epoch": 23.634419817470665, - "grad_norm": 1.5237919092178345, - "learning_rate": 5.497185929648242e-05, - "loss": 5.4643, - "step": 45319 - }, - { - "epoch": 23.634941329856584, - "grad_norm": 1.6127339601516724, - "learning_rate": 5.497085427135679e-05, - "loss": 5.1082, - "step": 45320 - }, - { - "epoch": 23.635462842242504, - "grad_norm": 1.5417441129684448, - "learning_rate": 5.4969849246231154e-05, - "loss": 5.3815, - "step": 45321 - }, - { - "epoch": 23.635984354628423, - "grad_norm": 1.8096040487289429, - "learning_rate": 5.496884422110553e-05, - "loss": 5.1914, - "step": 45322 - }, - { - "epoch": 23.636505867014343, - "grad_norm": 1.4839853048324585, - "learning_rate": 5.4967839195979896e-05, - "loss": 5.5535, - "step": 45323 - }, - { - "epoch": 23.637027379400262, - "grad_norm": 1.5061789751052856, - "learning_rate": 5.4966834170854274e-05, - "loss": 5.2739, - "step": 45324 - }, - { - "epoch": 23.63754889178618, - "grad_norm": 1.6278361082077026, - "learning_rate": 5.4965829145728645e-05, - "loss": 5.0997, - "step": 45325 - }, - { - "epoch": 23.638070404172097, - "grad_norm": 1.5376230478286743, - "learning_rate": 5.496482412060302e-05, - "loss": 5.1841, - "step": 45326 - }, - { - "epoch": 23.638591916558017, - "grad_norm": 1.5682841539382935, - "learning_rate": 5.496381909547739e-05, - "loss": 5.4658, - "step": 45327 - }, - { - "epoch": 23.639113428943936, - "grad_norm": 1.4551482200622559, - "learning_rate": 5.4962814070351765e-05, - "loss": 5.3131, - "step": 45328 - }, - { - "epoch": 23.639634941329856, - "grad_norm": 1.5070785284042358, - "learning_rate": 5.496180904522613e-05, - "loss": 4.858, - "step": 45329 - }, - { - "epoch": 23.640156453715775, - "grad_norm": 1.4977874755859375, - "learning_rate": 5.496080402010051e-05, - "loss": 4.9099, - "step": 45330 - }, - { - "epoch": 23.640677966101695, - "grad_norm": 1.4867523908615112, - "learning_rate": 5.495979899497488e-05, - "loss": 5.5148, - "step": 45331 - }, - { - "epoch": 23.641199478487614, - "grad_norm": 1.4235230684280396, - "learning_rate": 5.4958793969849256e-05, - "loss": 5.4484, - "step": 45332 - }, - { - "epoch": 23.641720990873534, - "grad_norm": 1.65019953250885, - "learning_rate": 5.495778894472362e-05, - "loss": 5.0744, - "step": 45333 - }, - { - "epoch": 23.642242503259453, - "grad_norm": 1.4573196172714233, - "learning_rate": 5.4956783919597984e-05, - "loss": 5.8144, - "step": 45334 - }, - { - "epoch": 23.642764015645373, - "grad_norm": 1.5646789073944092, - "learning_rate": 5.495577889447236e-05, - "loss": 5.2421, - "step": 45335 - }, - { - "epoch": 23.643285528031292, - "grad_norm": 1.6756519079208374, - "learning_rate": 5.495477386934673e-05, - "loss": 5.4637, - "step": 45336 - }, - { - "epoch": 23.64380704041721, - "grad_norm": 1.5998197793960571, - "learning_rate": 5.495376884422111e-05, - "loss": 5.3628, - "step": 45337 - }, - { - "epoch": 23.644328552803128, - "grad_norm": 1.7166519165039062, - "learning_rate": 5.4952763819095475e-05, - "loss": 4.4805, - "step": 45338 - }, - { - "epoch": 23.644850065189047, - "grad_norm": 1.544224500656128, - "learning_rate": 5.495175879396985e-05, - "loss": 4.9427, - "step": 45339 - }, - { - "epoch": 23.645371577574966, - "grad_norm": 1.5469862222671509, - "learning_rate": 5.4950753768844224e-05, - "loss": 5.0593, - "step": 45340 - }, - { - "epoch": 23.645893089960886, - "grad_norm": 1.5188933610916138, - "learning_rate": 5.49497487437186e-05, - "loss": 5.5464, - "step": 45341 - }, - { - "epoch": 23.646414602346805, - "grad_norm": 1.5509384870529175, - "learning_rate": 5.4948743718592966e-05, - "loss": 4.7517, - "step": 45342 - }, - { - "epoch": 23.646936114732725, - "grad_norm": 1.5761544704437256, - "learning_rate": 5.4947738693467344e-05, - "loss": 5.3398, - "step": 45343 - }, - { - "epoch": 23.647457627118644, - "grad_norm": 1.5717899799346924, - "learning_rate": 5.494673366834171e-05, - "loss": 5.5484, - "step": 45344 - }, - { - "epoch": 23.647979139504564, - "grad_norm": 1.6187279224395752, - "learning_rate": 5.4945728643216086e-05, - "loss": 5.2785, - "step": 45345 - }, - { - "epoch": 23.648500651890483, - "grad_norm": 1.5319993495941162, - "learning_rate": 5.494472361809046e-05, - "loss": 4.7357, - "step": 45346 - }, - { - "epoch": 23.649022164276403, - "grad_norm": 1.7056225538253784, - "learning_rate": 5.494371859296482e-05, - "loss": 5.0886, - "step": 45347 - }, - { - "epoch": 23.649543676662322, - "grad_norm": 1.5748392343521118, - "learning_rate": 5.49427135678392e-05, - "loss": 5.1046, - "step": 45348 - }, - { - "epoch": 23.65006518904824, - "grad_norm": 1.5663747787475586, - "learning_rate": 5.4941708542713564e-05, - "loss": 4.9925, - "step": 45349 - }, - { - "epoch": 23.650586701434158, - "grad_norm": 1.730271577835083, - "learning_rate": 5.494070351758794e-05, - "loss": 4.9597, - "step": 45350 - }, - { - "epoch": 23.651108213820077, - "grad_norm": 1.636501669883728, - "learning_rate": 5.493969849246231e-05, - "loss": 5.3739, - "step": 45351 - }, - { - "epoch": 23.651629726205996, - "grad_norm": 1.6183212995529175, - "learning_rate": 5.493869346733669e-05, - "loss": 5.1531, - "step": 45352 - }, - { - "epoch": 23.652151238591916, - "grad_norm": 1.5543594360351562, - "learning_rate": 5.4937688442211055e-05, - "loss": 5.5124, - "step": 45353 - }, - { - "epoch": 23.652672750977835, - "grad_norm": 1.4713082313537598, - "learning_rate": 5.493668341708543e-05, - "loss": 5.6816, - "step": 45354 - }, - { - "epoch": 23.653194263363755, - "grad_norm": 1.432661771774292, - "learning_rate": 5.49356783919598e-05, - "loss": 5.6227, - "step": 45355 - }, - { - "epoch": 23.653715775749674, - "grad_norm": 1.4964239597320557, - "learning_rate": 5.4934673366834175e-05, - "loss": 5.4864, - "step": 45356 - }, - { - "epoch": 23.654237288135594, - "grad_norm": 1.6548165082931519, - "learning_rate": 5.4933668341708546e-05, - "loss": 4.9732, - "step": 45357 - }, - { - "epoch": 23.654758800521513, - "grad_norm": 1.5675036907196045, - "learning_rate": 5.4932663316582923e-05, - "loss": 5.2138, - "step": 45358 - }, - { - "epoch": 23.655280312907433, - "grad_norm": 1.6943219900131226, - "learning_rate": 5.493165829145729e-05, - "loss": 4.9591, - "step": 45359 - }, - { - "epoch": 23.655801825293352, - "grad_norm": 1.5690582990646362, - "learning_rate": 5.4930653266331666e-05, - "loss": 5.6462, - "step": 45360 - }, - { - "epoch": 23.656323337679268, - "grad_norm": 1.67183256149292, - "learning_rate": 5.492964824120603e-05, - "loss": 5.0037, - "step": 45361 - }, - { - "epoch": 23.656844850065188, - "grad_norm": 1.4693117141723633, - "learning_rate": 5.49286432160804e-05, - "loss": 5.654, - "step": 45362 - }, - { - "epoch": 23.657366362451107, - "grad_norm": 1.5728647708892822, - "learning_rate": 5.492763819095478e-05, - "loss": 4.91, - "step": 45363 - }, - { - "epoch": 23.657887874837026, - "grad_norm": 1.5370826721191406, - "learning_rate": 5.492663316582914e-05, - "loss": 5.1949, - "step": 45364 - }, - { - "epoch": 23.658409387222946, - "grad_norm": 1.6270078420639038, - "learning_rate": 5.492562814070352e-05, - "loss": 4.7613, - "step": 45365 - }, - { - "epoch": 23.658930899608865, - "grad_norm": 1.4937224388122559, - "learning_rate": 5.492462311557789e-05, - "loss": 4.8513, - "step": 45366 - }, - { - "epoch": 23.659452411994785, - "grad_norm": 1.5443580150604248, - "learning_rate": 5.492361809045227e-05, - "loss": 4.3389, - "step": 45367 - }, - { - "epoch": 23.659973924380704, - "grad_norm": 1.4476935863494873, - "learning_rate": 5.4922613065326634e-05, - "loss": 5.473, - "step": 45368 - }, - { - "epoch": 23.660495436766624, - "grad_norm": 1.6652225255966187, - "learning_rate": 5.492160804020101e-05, - "loss": 5.1673, - "step": 45369 - }, - { - "epoch": 23.661016949152543, - "grad_norm": 1.5816748142242432, - "learning_rate": 5.4920603015075376e-05, - "loss": 4.8805, - "step": 45370 - }, - { - "epoch": 23.661538461538463, - "grad_norm": 1.3939203023910522, - "learning_rate": 5.4919597989949754e-05, - "loss": 4.869, - "step": 45371 - }, - { - "epoch": 23.662059973924382, - "grad_norm": 1.4879183769226074, - "learning_rate": 5.4918592964824125e-05, - "loss": 5.3605, - "step": 45372 - }, - { - "epoch": 23.6625814863103, - "grad_norm": 1.5105429887771606, - "learning_rate": 5.49175879396985e-05, - "loss": 5.5926, - "step": 45373 - }, - { - "epoch": 23.663102998696218, - "grad_norm": 1.5205044746398926, - "learning_rate": 5.491658291457287e-05, - "loss": 5.1191, - "step": 45374 - }, - { - "epoch": 23.663624511082137, - "grad_norm": 1.5895963907241821, - "learning_rate": 5.491557788944723e-05, - "loss": 5.2034, - "step": 45375 - }, - { - "epoch": 23.664146023468057, - "grad_norm": 1.7297574281692505, - "learning_rate": 5.491457286432161e-05, - "loss": 4.3387, - "step": 45376 - }, - { - "epoch": 23.664667535853976, - "grad_norm": 1.525249719619751, - "learning_rate": 5.491356783919598e-05, - "loss": 5.5687, - "step": 45377 - }, - { - "epoch": 23.665189048239895, - "grad_norm": 1.5461193323135376, - "learning_rate": 5.491256281407036e-05, - "loss": 5.5893, - "step": 45378 - }, - { - "epoch": 23.665710560625815, - "grad_norm": 1.559857726097107, - "learning_rate": 5.491155778894472e-05, - "loss": 5.55, - "step": 45379 - }, - { - "epoch": 23.666232073011734, - "grad_norm": 1.5784025192260742, - "learning_rate": 5.49105527638191e-05, - "loss": 5.3672, - "step": 45380 - }, - { - "epoch": 23.666753585397654, - "grad_norm": 1.5045815706253052, - "learning_rate": 5.4909547738693464e-05, - "loss": 5.3037, - "step": 45381 - }, - { - "epoch": 23.667275097783573, - "grad_norm": 1.5105022192001343, - "learning_rate": 5.490854271356784e-05, - "loss": 5.4813, - "step": 45382 - }, - { - "epoch": 23.667796610169493, - "grad_norm": 1.4519191980361938, - "learning_rate": 5.490753768844221e-05, - "loss": 5.3916, - "step": 45383 - }, - { - "epoch": 23.668318122555412, - "grad_norm": 1.5912586450576782, - "learning_rate": 5.490653266331659e-05, - "loss": 5.3732, - "step": 45384 - }, - { - "epoch": 23.668839634941328, - "grad_norm": 1.4389643669128418, - "learning_rate": 5.4905527638190955e-05, - "loss": 5.4426, - "step": 45385 - }, - { - "epoch": 23.669361147327248, - "grad_norm": 1.6408048868179321, - "learning_rate": 5.490452261306533e-05, - "loss": 5.0375, - "step": 45386 - }, - { - "epoch": 23.669882659713167, - "grad_norm": 1.5693790912628174, - "learning_rate": 5.4903517587939704e-05, - "loss": 5.0791, - "step": 45387 - }, - { - "epoch": 23.670404172099087, - "grad_norm": 1.5060327053070068, - "learning_rate": 5.490251256281407e-05, - "loss": 5.101, - "step": 45388 - }, - { - "epoch": 23.670925684485006, - "grad_norm": 1.512671947479248, - "learning_rate": 5.4901507537688446e-05, - "loss": 4.958, - "step": 45389 - }, - { - "epoch": 23.671447196870925, - "grad_norm": 1.6752002239227295, - "learning_rate": 5.490050251256281e-05, - "loss": 4.775, - "step": 45390 - }, - { - "epoch": 23.671968709256845, - "grad_norm": 1.4634467363357544, - "learning_rate": 5.489949748743719e-05, - "loss": 5.2737, - "step": 45391 - }, - { - "epoch": 23.672490221642764, - "grad_norm": 1.5844084024429321, - "learning_rate": 5.489849246231156e-05, - "loss": 4.9158, - "step": 45392 - }, - { - "epoch": 23.673011734028684, - "grad_norm": 1.5587491989135742, - "learning_rate": 5.489748743718594e-05, - "loss": 5.4425, - "step": 45393 - }, - { - "epoch": 23.673533246414603, - "grad_norm": 1.5607906579971313, - "learning_rate": 5.48964824120603e-05, - "loss": 5.3201, - "step": 45394 - }, - { - "epoch": 23.674054758800523, - "grad_norm": 1.5762008428573608, - "learning_rate": 5.489547738693468e-05, - "loss": 5.5178, - "step": 45395 - }, - { - "epoch": 23.674576271186442, - "grad_norm": 1.585372805595398, - "learning_rate": 5.4894472361809044e-05, - "loss": 5.1511, - "step": 45396 - }, - { - "epoch": 23.675097783572358, - "grad_norm": 1.546237587928772, - "learning_rate": 5.489346733668342e-05, - "loss": 5.2264, - "step": 45397 - }, - { - "epoch": 23.675619295958278, - "grad_norm": 1.5854939222335815, - "learning_rate": 5.489246231155779e-05, - "loss": 4.8491, - "step": 45398 - }, - { - "epoch": 23.676140808344197, - "grad_norm": 1.575784683227539, - "learning_rate": 5.489145728643217e-05, - "loss": 5.2168, - "step": 45399 - }, - { - "epoch": 23.676662320730117, - "grad_norm": 1.493667721748352, - "learning_rate": 5.4890452261306535e-05, - "loss": 5.4677, - "step": 45400 - }, - { - "epoch": 23.677183833116036, - "grad_norm": 1.5314258337020874, - "learning_rate": 5.48894472361809e-05, - "loss": 5.3516, - "step": 45401 - }, - { - "epoch": 23.677705345501955, - "grad_norm": 1.573110580444336, - "learning_rate": 5.488844221105528e-05, - "loss": 5.2845, - "step": 45402 - }, - { - "epoch": 23.678226857887875, - "grad_norm": 1.5564453601837158, - "learning_rate": 5.488743718592965e-05, - "loss": 5.282, - "step": 45403 - }, - { - "epoch": 23.678748370273794, - "grad_norm": 1.6481062173843384, - "learning_rate": 5.4886432160804026e-05, - "loss": 5.2711, - "step": 45404 - }, - { - "epoch": 23.679269882659714, - "grad_norm": 1.561964750289917, - "learning_rate": 5.488542713567839e-05, - "loss": 5.1987, - "step": 45405 - }, - { - "epoch": 23.679791395045633, - "grad_norm": 1.559525489807129, - "learning_rate": 5.488442211055277e-05, - "loss": 5.3966, - "step": 45406 - }, - { - "epoch": 23.680312907431553, - "grad_norm": 1.5913983583450317, - "learning_rate": 5.488341708542714e-05, - "loss": 5.4745, - "step": 45407 - }, - { - "epoch": 23.680834419817472, - "grad_norm": 1.5240859985351562, - "learning_rate": 5.488241206030152e-05, - "loss": 5.4063, - "step": 45408 - }, - { - "epoch": 23.681355932203388, - "grad_norm": 1.517305612564087, - "learning_rate": 5.488140703517588e-05, - "loss": 5.3938, - "step": 45409 - }, - { - "epoch": 23.681877444589308, - "grad_norm": 1.6201436519622803, - "learning_rate": 5.488040201005026e-05, - "loss": 5.1827, - "step": 45410 - }, - { - "epoch": 23.682398956975227, - "grad_norm": 1.5816996097564697, - "learning_rate": 5.487939698492462e-05, - "loss": 4.7747, - "step": 45411 - }, - { - "epoch": 23.682920469361147, - "grad_norm": 1.4409162998199463, - "learning_rate": 5.4878391959799e-05, - "loss": 4.351, - "step": 45412 - }, - { - "epoch": 23.683441981747066, - "grad_norm": 1.5661442279815674, - "learning_rate": 5.487738693467337e-05, - "loss": 4.9628, - "step": 45413 - }, - { - "epoch": 23.683963494132986, - "grad_norm": 1.5423375368118286, - "learning_rate": 5.4876381909547736e-05, - "loss": 5.4432, - "step": 45414 - }, - { - "epoch": 23.684485006518905, - "grad_norm": 1.5334928035736084, - "learning_rate": 5.4875376884422114e-05, - "loss": 5.2809, - "step": 45415 - }, - { - "epoch": 23.685006518904824, - "grad_norm": 1.5452947616577148, - "learning_rate": 5.487437185929648e-05, - "loss": 5.4066, - "step": 45416 - }, - { - "epoch": 23.685528031290744, - "grad_norm": 1.4427522420883179, - "learning_rate": 5.4873366834170856e-05, - "loss": 5.6173, - "step": 45417 - }, - { - "epoch": 23.686049543676663, - "grad_norm": 1.6459593772888184, - "learning_rate": 5.487236180904523e-05, - "loss": 5.114, - "step": 45418 - }, - { - "epoch": 23.686571056062583, - "grad_norm": 1.6027591228485107, - "learning_rate": 5.4871356783919605e-05, - "loss": 5.3219, - "step": 45419 - }, - { - "epoch": 23.687092568448502, - "grad_norm": 1.5153411626815796, - "learning_rate": 5.487035175879397e-05, - "loss": 4.7753, - "step": 45420 - }, - { - "epoch": 23.687614080834418, - "grad_norm": 1.5862008333206177, - "learning_rate": 5.486934673366835e-05, - "loss": 4.8958, - "step": 45421 - }, - { - "epoch": 23.688135593220338, - "grad_norm": 1.5831340551376343, - "learning_rate": 5.486834170854271e-05, - "loss": 5.1558, - "step": 45422 - }, - { - "epoch": 23.688657105606257, - "grad_norm": 1.5397521257400513, - "learning_rate": 5.486733668341709e-05, - "loss": 5.1662, - "step": 45423 - }, - { - "epoch": 23.689178617992177, - "grad_norm": 1.6853231191635132, - "learning_rate": 5.486633165829146e-05, - "loss": 4.8772, - "step": 45424 - }, - { - "epoch": 23.689700130378096, - "grad_norm": 1.5655403137207031, - "learning_rate": 5.486532663316584e-05, - "loss": 5.305, - "step": 45425 - }, - { - "epoch": 23.690221642764016, - "grad_norm": 1.588850498199463, - "learning_rate": 5.48643216080402e-05, - "loss": 5.2129, - "step": 45426 - }, - { - "epoch": 23.690743155149935, - "grad_norm": 1.681321620941162, - "learning_rate": 5.4863316582914573e-05, - "loss": 4.908, - "step": 45427 - }, - { - "epoch": 23.691264667535854, - "grad_norm": 1.4943418502807617, - "learning_rate": 5.486231155778895e-05, - "loss": 5.6367, - "step": 45428 - }, - { - "epoch": 23.691786179921774, - "grad_norm": 1.5288238525390625, - "learning_rate": 5.4861306532663316e-05, - "loss": 4.7913, - "step": 45429 - }, - { - "epoch": 23.692307692307693, - "grad_norm": 1.54961097240448, - "learning_rate": 5.486030150753769e-05, - "loss": 5.1195, - "step": 45430 - }, - { - "epoch": 23.692829204693613, - "grad_norm": 1.470288634300232, - "learning_rate": 5.485929648241206e-05, - "loss": 5.7391, - "step": 45431 - }, - { - "epoch": 23.693350717079532, - "grad_norm": 1.9362585544586182, - "learning_rate": 5.4858291457286435e-05, - "loss": 5.0007, - "step": 45432 - }, - { - "epoch": 23.69387222946545, - "grad_norm": 1.7284375429153442, - "learning_rate": 5.4857286432160806e-05, - "loss": 5.1278, - "step": 45433 - }, - { - "epoch": 23.694393741851368, - "grad_norm": 1.6771650314331055, - "learning_rate": 5.4856281407035184e-05, - "loss": 4.8117, - "step": 45434 - }, - { - "epoch": 23.694915254237287, - "grad_norm": 1.537010908126831, - "learning_rate": 5.485527638190955e-05, - "loss": 5.3271, - "step": 45435 - }, - { - "epoch": 23.695436766623207, - "grad_norm": 1.5736531019210815, - "learning_rate": 5.4854271356783926e-05, - "loss": 4.7159, - "step": 45436 - }, - { - "epoch": 23.695958279009126, - "grad_norm": 1.5919716358184814, - "learning_rate": 5.485326633165829e-05, - "loss": 5.2741, - "step": 45437 - }, - { - "epoch": 23.696479791395046, - "grad_norm": 1.5706818103790283, - "learning_rate": 5.485226130653267e-05, - "loss": 5.3578, - "step": 45438 - }, - { - "epoch": 23.697001303780965, - "grad_norm": 1.6277132034301758, - "learning_rate": 5.485125628140704e-05, - "loss": 5.1102, - "step": 45439 - }, - { - "epoch": 23.697522816166884, - "grad_norm": 1.588730812072754, - "learning_rate": 5.4850251256281404e-05, - "loss": 5.1082, - "step": 45440 - }, - { - "epoch": 23.698044328552804, - "grad_norm": 1.6546802520751953, - "learning_rate": 5.484924623115578e-05, - "loss": 5.1389, - "step": 45441 - }, - { - "epoch": 23.698565840938723, - "grad_norm": 1.4681358337402344, - "learning_rate": 5.4848241206030146e-05, - "loss": 4.9354, - "step": 45442 - }, - { - "epoch": 23.699087353324643, - "grad_norm": 1.7487012147903442, - "learning_rate": 5.4847236180904524e-05, - "loss": 5.1995, - "step": 45443 - }, - { - "epoch": 23.69960886571056, - "grad_norm": 1.5226659774780273, - "learning_rate": 5.4846231155778895e-05, - "loss": 4.7945, - "step": 45444 - }, - { - "epoch": 23.70013037809648, - "grad_norm": 1.52105712890625, - "learning_rate": 5.484522613065327e-05, - "loss": 5.3399, - "step": 45445 - }, - { - "epoch": 23.700651890482398, - "grad_norm": 1.5224363803863525, - "learning_rate": 5.484422110552764e-05, - "loss": 4.7415, - "step": 45446 - }, - { - "epoch": 23.701173402868317, - "grad_norm": 1.4834359884262085, - "learning_rate": 5.4843216080402015e-05, - "loss": 5.3466, - "step": 45447 - }, - { - "epoch": 23.701694915254237, - "grad_norm": 1.655357837677002, - "learning_rate": 5.484221105527638e-05, - "loss": 4.6303, - "step": 45448 - }, - { - "epoch": 23.702216427640156, - "grad_norm": 1.557248830795288, - "learning_rate": 5.484120603015076e-05, - "loss": 5.2264, - "step": 45449 - }, - { - "epoch": 23.702737940026076, - "grad_norm": 1.5627228021621704, - "learning_rate": 5.484020100502513e-05, - "loss": 5.4834, - "step": 45450 - }, - { - "epoch": 23.703259452411995, - "grad_norm": 1.855590581893921, - "learning_rate": 5.4839195979899506e-05, - "loss": 4.911, - "step": 45451 - }, - { - "epoch": 23.703780964797915, - "grad_norm": 1.6151762008666992, - "learning_rate": 5.483819095477387e-05, - "loss": 5.2705, - "step": 45452 - }, - { - "epoch": 23.704302477183834, - "grad_norm": 1.5826822519302368, - "learning_rate": 5.483718592964825e-05, - "loss": 4.9398, - "step": 45453 - }, - { - "epoch": 23.704823989569753, - "grad_norm": 1.6045047044754028, - "learning_rate": 5.483618090452262e-05, - "loss": 4.8894, - "step": 45454 - }, - { - "epoch": 23.705345501955673, - "grad_norm": 1.5597285032272339, - "learning_rate": 5.483517587939698e-05, - "loss": 5.552, - "step": 45455 - }, - { - "epoch": 23.705867014341592, - "grad_norm": 1.6307579278945923, - "learning_rate": 5.483417085427136e-05, - "loss": 5.2149, - "step": 45456 - }, - { - "epoch": 23.70638852672751, - "grad_norm": 1.6405891180038452, - "learning_rate": 5.4833165829145725e-05, - "loss": 5.451, - "step": 45457 - }, - { - "epoch": 23.706910039113428, - "grad_norm": 1.5551711320877075, - "learning_rate": 5.48321608040201e-05, - "loss": 5.2975, - "step": 45458 - }, - { - "epoch": 23.707431551499347, - "grad_norm": 1.5141947269439697, - "learning_rate": 5.4831155778894474e-05, - "loss": 5.1663, - "step": 45459 - }, - { - "epoch": 23.707953063885267, - "grad_norm": 1.542397379875183, - "learning_rate": 5.483015075376885e-05, - "loss": 4.888, - "step": 45460 - }, - { - "epoch": 23.708474576271186, - "grad_norm": 1.5886623859405518, - "learning_rate": 5.4829145728643216e-05, - "loss": 5.5378, - "step": 45461 - }, - { - "epoch": 23.708996088657106, - "grad_norm": 1.6089768409729004, - "learning_rate": 5.4828140703517594e-05, - "loss": 4.5744, - "step": 45462 - }, - { - "epoch": 23.709517601043025, - "grad_norm": 1.6160433292388916, - "learning_rate": 5.482713567839196e-05, - "loss": 5.2513, - "step": 45463 - }, - { - "epoch": 23.710039113428945, - "grad_norm": 1.475787878036499, - "learning_rate": 5.4826130653266336e-05, - "loss": 5.4489, - "step": 45464 - }, - { - "epoch": 23.710560625814864, - "grad_norm": 1.5249608755111694, - "learning_rate": 5.482512562814071e-05, - "loss": 5.453, - "step": 45465 - }, - { - "epoch": 23.711082138200783, - "grad_norm": 1.67307710647583, - "learning_rate": 5.4824120603015085e-05, - "loss": 5.2043, - "step": 45466 - }, - { - "epoch": 23.711603650586703, - "grad_norm": 1.6462039947509766, - "learning_rate": 5.482311557788945e-05, - "loss": 5.3003, - "step": 45467 - }, - { - "epoch": 23.71212516297262, - "grad_norm": 1.5233888626098633, - "learning_rate": 5.4822110552763814e-05, - "loss": 5.4387, - "step": 45468 - }, - { - "epoch": 23.71264667535854, - "grad_norm": 1.471303105354309, - "learning_rate": 5.482110552763819e-05, - "loss": 4.8486, - "step": 45469 - }, - { - "epoch": 23.713168187744458, - "grad_norm": 1.535362720489502, - "learning_rate": 5.482010050251256e-05, - "loss": 5.4574, - "step": 45470 - }, - { - "epoch": 23.713689700130377, - "grad_norm": 1.5225898027420044, - "learning_rate": 5.481909547738694e-05, - "loss": 5.0967, - "step": 45471 - }, - { - "epoch": 23.714211212516297, - "grad_norm": 1.5924068689346313, - "learning_rate": 5.4818090452261305e-05, - "loss": 4.9581, - "step": 45472 - }, - { - "epoch": 23.714732724902216, - "grad_norm": 1.5639668703079224, - "learning_rate": 5.481708542713568e-05, - "loss": 5.152, - "step": 45473 - }, - { - "epoch": 23.715254237288136, - "grad_norm": 1.5610477924346924, - "learning_rate": 5.4816080402010053e-05, - "loss": 5.6806, - "step": 45474 - }, - { - "epoch": 23.715775749674055, - "grad_norm": 1.5582925081253052, - "learning_rate": 5.481507537688443e-05, - "loss": 5.4608, - "step": 45475 - }, - { - "epoch": 23.716297262059975, - "grad_norm": 1.5266163349151611, - "learning_rate": 5.4814070351758796e-05, - "loss": 5.4823, - "step": 45476 - }, - { - "epoch": 23.716818774445894, - "grad_norm": 1.6936920881271362, - "learning_rate": 5.4813065326633173e-05, - "loss": 5.1694, - "step": 45477 - }, - { - "epoch": 23.717340286831814, - "grad_norm": 1.6649174690246582, - "learning_rate": 5.481206030150754e-05, - "loss": 5.0661, - "step": 45478 - }, - { - "epoch": 23.717861799217733, - "grad_norm": 1.5385949611663818, - "learning_rate": 5.4811055276381916e-05, - "loss": 5.3011, - "step": 45479 - }, - { - "epoch": 23.71838331160365, - "grad_norm": 1.5461658239364624, - "learning_rate": 5.4810050251256287e-05, - "loss": 5.0898, - "step": 45480 - }, - { - "epoch": 23.71890482398957, - "grad_norm": 1.5695945024490356, - "learning_rate": 5.480904522613065e-05, - "loss": 4.8006, - "step": 45481 - }, - { - "epoch": 23.719426336375488, - "grad_norm": 1.5563256740570068, - "learning_rate": 5.480804020100503e-05, - "loss": 5.7115, - "step": 45482 - }, - { - "epoch": 23.719947848761407, - "grad_norm": 1.721711277961731, - "learning_rate": 5.480703517587939e-05, - "loss": 5.0098, - "step": 45483 - }, - { - "epoch": 23.720469361147327, - "grad_norm": 1.4925111532211304, - "learning_rate": 5.480603015075377e-05, - "loss": 5.543, - "step": 45484 - }, - { - "epoch": 23.720990873533246, - "grad_norm": 1.6744385957717896, - "learning_rate": 5.480502512562814e-05, - "loss": 4.4586, - "step": 45485 - }, - { - "epoch": 23.721512385919166, - "grad_norm": 1.5619972944259644, - "learning_rate": 5.480402010050252e-05, - "loss": 5.3269, - "step": 45486 - }, - { - "epoch": 23.722033898305085, - "grad_norm": 1.5236601829528809, - "learning_rate": 5.4803015075376884e-05, - "loss": 5.5697, - "step": 45487 - }, - { - "epoch": 23.722555410691005, - "grad_norm": 1.4542951583862305, - "learning_rate": 5.480201005025126e-05, - "loss": 4.5981, - "step": 45488 - }, - { - "epoch": 23.723076923076924, - "grad_norm": 1.4952211380004883, - "learning_rate": 5.4801005025125626e-05, - "loss": 5.5162, - "step": 45489 - }, - { - "epoch": 23.723598435462844, - "grad_norm": 1.6091101169586182, - "learning_rate": 5.4800000000000004e-05, - "loss": 5.4687, - "step": 45490 - }, - { - "epoch": 23.724119947848763, - "grad_norm": 1.7095035314559937, - "learning_rate": 5.4798994974874375e-05, - "loss": 5.1773, - "step": 45491 - }, - { - "epoch": 23.72464146023468, - "grad_norm": 1.5495740175247192, - "learning_rate": 5.479798994974875e-05, - "loss": 5.3442, - "step": 45492 - }, - { - "epoch": 23.7251629726206, - "grad_norm": 1.51512610912323, - "learning_rate": 5.479698492462312e-05, - "loss": 5.3729, - "step": 45493 - }, - { - "epoch": 23.725684485006518, - "grad_norm": 1.5282114744186401, - "learning_rate": 5.479597989949749e-05, - "loss": 5.5463, - "step": 45494 - }, - { - "epoch": 23.726205997392437, - "grad_norm": 1.5310368537902832, - "learning_rate": 5.4794974874371866e-05, - "loss": 5.4015, - "step": 45495 - }, - { - "epoch": 23.726727509778357, - "grad_norm": 1.4125651121139526, - "learning_rate": 5.479396984924623e-05, - "loss": 5.5265, - "step": 45496 - }, - { - "epoch": 23.727249022164276, - "grad_norm": 1.5056511163711548, - "learning_rate": 5.479296482412061e-05, - "loss": 5.1968, - "step": 45497 - }, - { - "epoch": 23.727770534550196, - "grad_norm": 1.4995546340942383, - "learning_rate": 5.479195979899497e-05, - "loss": 5.0752, - "step": 45498 - }, - { - "epoch": 23.728292046936115, - "grad_norm": 1.5345330238342285, - "learning_rate": 5.479095477386935e-05, - "loss": 5.1321, - "step": 45499 - }, - { - "epoch": 23.728813559322035, - "grad_norm": 1.6317113637924194, - "learning_rate": 5.478994974874372e-05, - "loss": 4.4865, - "step": 45500 - }, - { - "epoch": 23.729335071707954, - "grad_norm": 1.471787691116333, - "learning_rate": 5.47889447236181e-05, - "loss": 4.4581, - "step": 45501 - }, - { - "epoch": 23.729856584093874, - "grad_norm": 1.5323214530944824, - "learning_rate": 5.478793969849246e-05, - "loss": 5.2919, - "step": 45502 - }, - { - "epoch": 23.730378096479793, - "grad_norm": 1.5690193176269531, - "learning_rate": 5.478693467336684e-05, - "loss": 5.0818, - "step": 45503 - }, - { - "epoch": 23.73089960886571, - "grad_norm": 1.6435353755950928, - "learning_rate": 5.4785929648241205e-05, - "loss": 4.9514, - "step": 45504 - }, - { - "epoch": 23.73142112125163, - "grad_norm": 1.5845906734466553, - "learning_rate": 5.478492462311558e-05, - "loss": 5.1475, - "step": 45505 - }, - { - "epoch": 23.731942633637548, - "grad_norm": 1.4370909929275513, - "learning_rate": 5.4783919597989954e-05, - "loss": 5.6316, - "step": 45506 - }, - { - "epoch": 23.732464146023467, - "grad_norm": 1.515535593032837, - "learning_rate": 5.478291457286432e-05, - "loss": 5.3143, - "step": 45507 - }, - { - "epoch": 23.732985658409387, - "grad_norm": 1.5976325273513794, - "learning_rate": 5.4781909547738696e-05, - "loss": 4.8167, - "step": 45508 - }, - { - "epoch": 23.733507170795306, - "grad_norm": 1.4917724132537842, - "learning_rate": 5.478090452261306e-05, - "loss": 5.2286, - "step": 45509 - }, - { - "epoch": 23.734028683181226, - "grad_norm": 1.5505422353744507, - "learning_rate": 5.477989949748744e-05, - "loss": 4.9157, - "step": 45510 - }, - { - "epoch": 23.734550195567145, - "grad_norm": 1.5433779954910278, - "learning_rate": 5.477889447236181e-05, - "loss": 5.599, - "step": 45511 - }, - { - "epoch": 23.735071707953065, - "grad_norm": 1.5939396619796753, - "learning_rate": 5.477788944723619e-05, - "loss": 5.3695, - "step": 45512 - }, - { - "epoch": 23.735593220338984, - "grad_norm": 1.619171142578125, - "learning_rate": 5.477688442211055e-05, - "loss": 4.9266, - "step": 45513 - }, - { - "epoch": 23.736114732724904, - "grad_norm": 1.67328679561615, - "learning_rate": 5.477587939698493e-05, - "loss": 4.8544, - "step": 45514 - }, - { - "epoch": 23.736636245110823, - "grad_norm": 1.4480946063995361, - "learning_rate": 5.47748743718593e-05, - "loss": 5.7374, - "step": 45515 - }, - { - "epoch": 23.73715775749674, - "grad_norm": 1.5192031860351562, - "learning_rate": 5.477386934673368e-05, - "loss": 5.6697, - "step": 45516 - }, - { - "epoch": 23.73767926988266, - "grad_norm": 1.531114101409912, - "learning_rate": 5.477286432160804e-05, - "loss": 4.9454, - "step": 45517 - }, - { - "epoch": 23.738200782268578, - "grad_norm": 1.5330220460891724, - "learning_rate": 5.477185929648242e-05, - "loss": 5.4402, - "step": 45518 - }, - { - "epoch": 23.738722294654497, - "grad_norm": 1.4450418949127197, - "learning_rate": 5.4770854271356785e-05, - "loss": 5.4586, - "step": 45519 - }, - { - "epoch": 23.739243807040417, - "grad_norm": 1.5722280740737915, - "learning_rate": 5.4769849246231156e-05, - "loss": 5.0428, - "step": 45520 - }, - { - "epoch": 23.739765319426336, - "grad_norm": 1.5784227848052979, - "learning_rate": 5.4768844221105534e-05, - "loss": 4.9772, - "step": 45521 - }, - { - "epoch": 23.740286831812256, - "grad_norm": 1.465787649154663, - "learning_rate": 5.47678391959799e-05, - "loss": 5.3156, - "step": 45522 - }, - { - "epoch": 23.740808344198175, - "grad_norm": 1.4775776863098145, - "learning_rate": 5.4766834170854276e-05, - "loss": 5.7627, - "step": 45523 - }, - { - "epoch": 23.741329856584095, - "grad_norm": 1.5245797634124756, - "learning_rate": 5.476582914572864e-05, - "loss": 5.0894, - "step": 45524 - }, - { - "epoch": 23.741851368970014, - "grad_norm": 1.5634026527404785, - "learning_rate": 5.476482412060302e-05, - "loss": 5.0069, - "step": 45525 - }, - { - "epoch": 23.742372881355934, - "grad_norm": 1.5170888900756836, - "learning_rate": 5.476381909547739e-05, - "loss": 5.4412, - "step": 45526 - }, - { - "epoch": 23.742894393741853, - "grad_norm": 1.6286404132843018, - "learning_rate": 5.476281407035177e-05, - "loss": 5.2049, - "step": 45527 - }, - { - "epoch": 23.74341590612777, - "grad_norm": 1.634924292564392, - "learning_rate": 5.476180904522613e-05, - "loss": 5.1179, - "step": 45528 - }, - { - "epoch": 23.74393741851369, - "grad_norm": 1.6193797588348389, - "learning_rate": 5.476080402010051e-05, - "loss": 5.3293, - "step": 45529 - }, - { - "epoch": 23.744458930899608, - "grad_norm": 1.631040096282959, - "learning_rate": 5.475979899497487e-05, - "loss": 4.5779, - "step": 45530 - }, - { - "epoch": 23.744980443285527, - "grad_norm": 1.5833927392959595, - "learning_rate": 5.475879396984925e-05, - "loss": 4.867, - "step": 45531 - }, - { - "epoch": 23.745501955671447, - "grad_norm": 1.459072470664978, - "learning_rate": 5.475778894472362e-05, - "loss": 5.2162, - "step": 45532 - }, - { - "epoch": 23.746023468057366, - "grad_norm": 1.5365707874298096, - "learning_rate": 5.4756783919597986e-05, - "loss": 5.2657, - "step": 45533 - }, - { - "epoch": 23.746544980443286, - "grad_norm": 1.5581051111221313, - "learning_rate": 5.4755778894472364e-05, - "loss": 5.3734, - "step": 45534 - }, - { - "epoch": 23.747066492829205, - "grad_norm": 1.4761043787002563, - "learning_rate": 5.475477386934673e-05, - "loss": 5.4168, - "step": 45535 - }, - { - "epoch": 23.747588005215125, - "grad_norm": 1.5213805437088013, - "learning_rate": 5.4753768844221106e-05, - "loss": 5.2026, - "step": 45536 - }, - { - "epoch": 23.748109517601044, - "grad_norm": 1.5675749778747559, - "learning_rate": 5.475276381909548e-05, - "loss": 5.2758, - "step": 45537 - }, - { - "epoch": 23.748631029986964, - "grad_norm": 1.5312714576721191, - "learning_rate": 5.4751758793969855e-05, - "loss": 4.8544, - "step": 45538 - }, - { - "epoch": 23.749152542372883, - "grad_norm": 1.5608341693878174, - "learning_rate": 5.475075376884422e-05, - "loss": 5.5526, - "step": 45539 - }, - { - "epoch": 23.7496740547588, - "grad_norm": 1.5707173347473145, - "learning_rate": 5.47497487437186e-05, - "loss": 5.0062, - "step": 45540 - }, - { - "epoch": 23.75019556714472, - "grad_norm": 1.5798749923706055, - "learning_rate": 5.474874371859297e-05, - "loss": 5.3482, - "step": 45541 - }, - { - "epoch": 23.750717079530638, - "grad_norm": 1.6384940147399902, - "learning_rate": 5.4747738693467346e-05, - "loss": 4.9713, - "step": 45542 - }, - { - "epoch": 23.751238591916557, - "grad_norm": 1.6084184646606445, - "learning_rate": 5.474673366834171e-05, - "loss": 5.5291, - "step": 45543 - }, - { - "epoch": 23.751760104302477, - "grad_norm": 1.5509833097457886, - "learning_rate": 5.474572864321609e-05, - "loss": 5.0936, - "step": 45544 - }, - { - "epoch": 23.752281616688396, - "grad_norm": 1.5234421491622925, - "learning_rate": 5.474472361809045e-05, - "loss": 4.8785, - "step": 45545 - }, - { - "epoch": 23.752803129074316, - "grad_norm": 1.5684385299682617, - "learning_rate": 5.474371859296483e-05, - "loss": 5.442, - "step": 45546 - }, - { - "epoch": 23.753324641460235, - "grad_norm": 1.5926090478897095, - "learning_rate": 5.47427135678392e-05, - "loss": 4.8603, - "step": 45547 - }, - { - "epoch": 23.753846153846155, - "grad_norm": 1.6869227886199951, - "learning_rate": 5.4741708542713566e-05, - "loss": 4.8772, - "step": 45548 - }, - { - "epoch": 23.754367666232074, - "grad_norm": 1.5697087049484253, - "learning_rate": 5.474070351758794e-05, - "loss": 4.9237, - "step": 45549 - }, - { - "epoch": 23.754889178617994, - "grad_norm": 1.5976415872573853, - "learning_rate": 5.473969849246231e-05, - "loss": 5.4077, - "step": 45550 - }, - { - "epoch": 23.75541069100391, - "grad_norm": 1.5596749782562256, - "learning_rate": 5.4738693467336685e-05, - "loss": 5.501, - "step": 45551 - }, - { - "epoch": 23.75593220338983, - "grad_norm": 1.537361741065979, - "learning_rate": 5.4737688442211057e-05, - "loss": 5.2404, - "step": 45552 - }, - { - "epoch": 23.75645371577575, - "grad_norm": 1.53136146068573, - "learning_rate": 5.4736683417085434e-05, - "loss": 5.2777, - "step": 45553 - }, - { - "epoch": 23.756975228161668, - "grad_norm": 1.6369484663009644, - "learning_rate": 5.47356783919598e-05, - "loss": 5.2479, - "step": 45554 - }, - { - "epoch": 23.757496740547587, - "grad_norm": 1.6579148769378662, - "learning_rate": 5.4734673366834176e-05, - "loss": 5.2407, - "step": 45555 - }, - { - "epoch": 23.758018252933507, - "grad_norm": 1.6201000213623047, - "learning_rate": 5.473366834170854e-05, - "loss": 5.4951, - "step": 45556 - }, - { - "epoch": 23.758539765319426, - "grad_norm": 1.5092310905456543, - "learning_rate": 5.473266331658292e-05, - "loss": 5.5003, - "step": 45557 - }, - { - "epoch": 23.759061277705346, - "grad_norm": 1.520350694656372, - "learning_rate": 5.473165829145729e-05, - "loss": 5.1771, - "step": 45558 - }, - { - "epoch": 23.759582790091265, - "grad_norm": 1.580060362815857, - "learning_rate": 5.473065326633167e-05, - "loss": 5.0116, - "step": 45559 - }, - { - "epoch": 23.760104302477185, - "grad_norm": 1.5320247411727905, - "learning_rate": 5.472964824120603e-05, - "loss": 5.1429, - "step": 45560 - }, - { - "epoch": 23.760625814863104, - "grad_norm": 1.60117506980896, - "learning_rate": 5.47286432160804e-05, - "loss": 5.0238, - "step": 45561 - }, - { - "epoch": 23.761147327249024, - "grad_norm": 1.4952518939971924, - "learning_rate": 5.472763819095478e-05, - "loss": 5.4559, - "step": 45562 - }, - { - "epoch": 23.761668839634943, - "grad_norm": 1.5346426963806152, - "learning_rate": 5.4726633165829145e-05, - "loss": 5.5317, - "step": 45563 - }, - { - "epoch": 23.76219035202086, - "grad_norm": 1.591434359550476, - "learning_rate": 5.472562814070352e-05, - "loss": 5.2117, - "step": 45564 - }, - { - "epoch": 23.76271186440678, - "grad_norm": 1.5169031620025635, - "learning_rate": 5.472462311557789e-05, - "loss": 5.1653, - "step": 45565 - }, - { - "epoch": 23.763233376792698, - "grad_norm": 1.6451448202133179, - "learning_rate": 5.4723618090452265e-05, - "loss": 5.3259, - "step": 45566 - }, - { - "epoch": 23.763754889178617, - "grad_norm": 1.5538430213928223, - "learning_rate": 5.4722613065326636e-05, - "loss": 5.5687, - "step": 45567 - }, - { - "epoch": 23.764276401564537, - "grad_norm": 1.5899289846420288, - "learning_rate": 5.4721608040201014e-05, - "loss": 5.3915, - "step": 45568 - }, - { - "epoch": 23.764797913950456, - "grad_norm": 1.5633107423782349, - "learning_rate": 5.472060301507538e-05, - "loss": 5.1921, - "step": 45569 - }, - { - "epoch": 23.765319426336376, - "grad_norm": 1.5991624593734741, - "learning_rate": 5.4719597989949756e-05, - "loss": 5.5313, - "step": 45570 - }, - { - "epoch": 23.765840938722295, - "grad_norm": 1.6889333724975586, - "learning_rate": 5.471859296482412e-05, - "loss": 5.1803, - "step": 45571 - }, - { - "epoch": 23.766362451108215, - "grad_norm": 1.661473035812378, - "learning_rate": 5.47175879396985e-05, - "loss": 4.9393, - "step": 45572 - }, - { - "epoch": 23.766883963494134, - "grad_norm": 1.5111528635025024, - "learning_rate": 5.471658291457287e-05, - "loss": 5.6494, - "step": 45573 - }, - { - "epoch": 23.767405475880054, - "grad_norm": 1.5687949657440186, - "learning_rate": 5.471557788944723e-05, - "loss": 5.0417, - "step": 45574 - }, - { - "epoch": 23.76792698826597, - "grad_norm": 1.5879193544387817, - "learning_rate": 5.471457286432161e-05, - "loss": 4.5277, - "step": 45575 - }, - { - "epoch": 23.76844850065189, - "grad_norm": 1.6603033542633057, - "learning_rate": 5.4713567839195975e-05, - "loss": 5.1101, - "step": 45576 - }, - { - "epoch": 23.76897001303781, - "grad_norm": 1.4515095949172974, - "learning_rate": 5.471256281407035e-05, - "loss": 5.3845, - "step": 45577 - }, - { - "epoch": 23.769491525423728, - "grad_norm": 1.6455930471420288, - "learning_rate": 5.4711557788944724e-05, - "loss": 5.2771, - "step": 45578 - }, - { - "epoch": 23.770013037809647, - "grad_norm": 1.5000885725021362, - "learning_rate": 5.47105527638191e-05, - "loss": 5.6286, - "step": 45579 - }, - { - "epoch": 23.770534550195567, - "grad_norm": 1.471977710723877, - "learning_rate": 5.4709547738693466e-05, - "loss": 5.4358, - "step": 45580 - }, - { - "epoch": 23.771056062581486, - "grad_norm": 1.4891124963760376, - "learning_rate": 5.4708542713567844e-05, - "loss": 5.4437, - "step": 45581 - }, - { - "epoch": 23.771577574967406, - "grad_norm": 1.5085984468460083, - "learning_rate": 5.4707537688442215e-05, - "loss": 5.5172, - "step": 45582 - }, - { - "epoch": 23.772099087353325, - "grad_norm": 1.5924235582351685, - "learning_rate": 5.470653266331659e-05, - "loss": 5.3704, - "step": 45583 - }, - { - "epoch": 23.772620599739245, - "grad_norm": 1.5920051336288452, - "learning_rate": 5.470552763819096e-05, - "loss": 5.1687, - "step": 45584 - }, - { - "epoch": 23.773142112125164, - "grad_norm": 1.5796808004379272, - "learning_rate": 5.4704522613065335e-05, - "loss": 5.5073, - "step": 45585 - }, - { - "epoch": 23.773663624511084, - "grad_norm": 1.583308458328247, - "learning_rate": 5.47035175879397e-05, - "loss": 5.4615, - "step": 45586 - }, - { - "epoch": 23.774185136897, - "grad_norm": 1.4598954916000366, - "learning_rate": 5.470251256281407e-05, - "loss": 5.2191, - "step": 45587 - }, - { - "epoch": 23.77470664928292, - "grad_norm": 1.5341423749923706, - "learning_rate": 5.470150753768845e-05, - "loss": 5.1383, - "step": 45588 - }, - { - "epoch": 23.77522816166884, - "grad_norm": 1.5312010049819946, - "learning_rate": 5.470050251256281e-05, - "loss": 5.5275, - "step": 45589 - }, - { - "epoch": 23.775749674054758, - "grad_norm": 1.5399258136749268, - "learning_rate": 5.469949748743719e-05, - "loss": 5.0674, - "step": 45590 - }, - { - "epoch": 23.776271186440677, - "grad_norm": 1.561847448348999, - "learning_rate": 5.4698492462311555e-05, - "loss": 5.0787, - "step": 45591 - }, - { - "epoch": 23.776792698826597, - "grad_norm": 1.6160967350006104, - "learning_rate": 5.469748743718593e-05, - "loss": 5.1464, - "step": 45592 - }, - { - "epoch": 23.777314211212516, - "grad_norm": 1.6874266862869263, - "learning_rate": 5.4696482412060304e-05, - "loss": 4.8269, - "step": 45593 - }, - { - "epoch": 23.777835723598436, - "grad_norm": 1.64320707321167, - "learning_rate": 5.469547738693468e-05, - "loss": 5.3372, - "step": 45594 - }, - { - "epoch": 23.778357235984355, - "grad_norm": 1.5179800987243652, - "learning_rate": 5.4694472361809046e-05, - "loss": 5.0057, - "step": 45595 - }, - { - "epoch": 23.778878748370275, - "grad_norm": 1.5386121273040771, - "learning_rate": 5.4693467336683423e-05, - "loss": 5.4595, - "step": 45596 - }, - { - "epoch": 23.779400260756194, - "grad_norm": 1.5363490581512451, - "learning_rate": 5.469246231155779e-05, - "loss": 5.23, - "step": 45597 - }, - { - "epoch": 23.779921773142114, - "grad_norm": 1.564741849899292, - "learning_rate": 5.4691457286432166e-05, - "loss": 5.337, - "step": 45598 - }, - { - "epoch": 23.78044328552803, - "grad_norm": 1.522445797920227, - "learning_rate": 5.4690452261306537e-05, - "loss": 4.8393, - "step": 45599 - }, - { - "epoch": 23.78096479791395, - "grad_norm": 1.7139244079589844, - "learning_rate": 5.46894472361809e-05, - "loss": 5.0846, - "step": 45600 - }, - { - "epoch": 23.78148631029987, - "grad_norm": 1.5847437381744385, - "learning_rate": 5.468844221105528e-05, - "loss": 5.1546, - "step": 45601 - }, - { - "epoch": 23.782007822685788, - "grad_norm": 1.4729623794555664, - "learning_rate": 5.468743718592965e-05, - "loss": 5.3434, - "step": 45602 - }, - { - "epoch": 23.782529335071708, - "grad_norm": 1.5097603797912598, - "learning_rate": 5.468643216080403e-05, - "loss": 5.3511, - "step": 45603 - }, - { - "epoch": 23.783050847457627, - "grad_norm": 1.4371808767318726, - "learning_rate": 5.468542713567839e-05, - "loss": 5.2706, - "step": 45604 - }, - { - "epoch": 23.783572359843546, - "grad_norm": 1.5548120737075806, - "learning_rate": 5.468442211055277e-05, - "loss": 5.1314, - "step": 45605 - }, - { - "epoch": 23.784093872229466, - "grad_norm": 1.6140737533569336, - "learning_rate": 5.4683417085427134e-05, - "loss": 4.4126, - "step": 45606 - }, - { - "epoch": 23.784615384615385, - "grad_norm": 1.5003042221069336, - "learning_rate": 5.468241206030151e-05, - "loss": 5.1528, - "step": 45607 - }, - { - "epoch": 23.785136897001305, - "grad_norm": 1.5096466541290283, - "learning_rate": 5.468140703517588e-05, - "loss": 5.626, - "step": 45608 - }, - { - "epoch": 23.785658409387224, - "grad_norm": 1.5922995805740356, - "learning_rate": 5.468040201005026e-05, - "loss": 5.0398, - "step": 45609 - }, - { - "epoch": 23.786179921773144, - "grad_norm": 1.5243654251098633, - "learning_rate": 5.4679396984924625e-05, - "loss": 5.2806, - "step": 45610 - }, - { - "epoch": 23.78670143415906, - "grad_norm": 1.5414623022079468, - "learning_rate": 5.4678391959799e-05, - "loss": 5.2246, - "step": 45611 - }, - { - "epoch": 23.78722294654498, - "grad_norm": 1.5485609769821167, - "learning_rate": 5.467738693467337e-05, - "loss": 5.0763, - "step": 45612 - }, - { - "epoch": 23.7877444589309, - "grad_norm": 1.4935978651046753, - "learning_rate": 5.467638190954774e-05, - "loss": 5.3177, - "step": 45613 - }, - { - "epoch": 23.788265971316818, - "grad_norm": 1.5225372314453125, - "learning_rate": 5.4675376884422116e-05, - "loss": 5.1687, - "step": 45614 - }, - { - "epoch": 23.788787483702738, - "grad_norm": 1.5105504989624023, - "learning_rate": 5.467437185929648e-05, - "loss": 5.3346, - "step": 45615 - }, - { - "epoch": 23.789308996088657, - "grad_norm": 1.578044056892395, - "learning_rate": 5.467336683417086e-05, - "loss": 5.3649, - "step": 45616 - }, - { - "epoch": 23.789830508474576, - "grad_norm": 1.566383719444275, - "learning_rate": 5.467236180904522e-05, - "loss": 5.363, - "step": 45617 - }, - { - "epoch": 23.790352020860496, - "grad_norm": 1.5391912460327148, - "learning_rate": 5.46713567839196e-05, - "loss": 5.4415, - "step": 45618 - }, - { - "epoch": 23.790873533246415, - "grad_norm": 1.4659645557403564, - "learning_rate": 5.467035175879397e-05, - "loss": 5.426, - "step": 45619 - }, - { - "epoch": 23.791395045632335, - "grad_norm": 1.5128706693649292, - "learning_rate": 5.466934673366835e-05, - "loss": 4.6104, - "step": 45620 - }, - { - "epoch": 23.791916558018254, - "grad_norm": 1.4967721700668335, - "learning_rate": 5.466834170854271e-05, - "loss": 5.2518, - "step": 45621 - }, - { - "epoch": 23.792438070404174, - "grad_norm": 1.5066938400268555, - "learning_rate": 5.466733668341709e-05, - "loss": 5.5306, - "step": 45622 - }, - { - "epoch": 23.79295958279009, - "grad_norm": 1.6101067066192627, - "learning_rate": 5.4666331658291455e-05, - "loss": 4.4855, - "step": 45623 - }, - { - "epoch": 23.79348109517601, - "grad_norm": 1.513290524482727, - "learning_rate": 5.466532663316583e-05, - "loss": 5.6409, - "step": 45624 - }, - { - "epoch": 23.79400260756193, - "grad_norm": 1.5240391492843628, - "learning_rate": 5.4664321608040204e-05, - "loss": 5.3027, - "step": 45625 - }, - { - "epoch": 23.794524119947848, - "grad_norm": 1.6967500448226929, - "learning_rate": 5.466331658291458e-05, - "loss": 5.3196, - "step": 45626 - }, - { - "epoch": 23.795045632333768, - "grad_norm": 1.4742120504379272, - "learning_rate": 5.4662311557788946e-05, - "loss": 5.4384, - "step": 45627 - }, - { - "epoch": 23.795567144719687, - "grad_norm": 1.6050699949264526, - "learning_rate": 5.466130653266332e-05, - "loss": 5.3646, - "step": 45628 - }, - { - "epoch": 23.796088657105607, - "grad_norm": 1.526998519897461, - "learning_rate": 5.4660301507537695e-05, - "loss": 5.3179, - "step": 45629 - }, - { - "epoch": 23.796610169491526, - "grad_norm": 1.6623457670211792, - "learning_rate": 5.465929648241206e-05, - "loss": 5.2424, - "step": 45630 - }, - { - "epoch": 23.797131681877445, - "grad_norm": 1.6069891452789307, - "learning_rate": 5.465829145728644e-05, - "loss": 5.4848, - "step": 45631 - }, - { - "epoch": 23.797653194263365, - "grad_norm": 1.5887254476547241, - "learning_rate": 5.46572864321608e-05, - "loss": 5.6769, - "step": 45632 - }, - { - "epoch": 23.798174706649284, - "grad_norm": 1.463175892829895, - "learning_rate": 5.465628140703518e-05, - "loss": 5.4114, - "step": 45633 - }, - { - "epoch": 23.7986962190352, - "grad_norm": 1.4851170778274536, - "learning_rate": 5.465527638190955e-05, - "loss": 5.2941, - "step": 45634 - }, - { - "epoch": 23.79921773142112, - "grad_norm": 1.5653049945831299, - "learning_rate": 5.465427135678393e-05, - "loss": 5.2004, - "step": 45635 - }, - { - "epoch": 23.79973924380704, - "grad_norm": 1.502803921699524, - "learning_rate": 5.465326633165829e-05, - "loss": 5.1226, - "step": 45636 - }, - { - "epoch": 23.80026075619296, - "grad_norm": 1.555456280708313, - "learning_rate": 5.465226130653267e-05, - "loss": 5.6344, - "step": 45637 - }, - { - "epoch": 23.800782268578878, - "grad_norm": 1.564927577972412, - "learning_rate": 5.4651256281407035e-05, - "loss": 4.7929, - "step": 45638 - }, - { - "epoch": 23.801303780964798, - "grad_norm": 1.5598373413085938, - "learning_rate": 5.465025125628141e-05, - "loss": 4.9837, - "step": 45639 - }, - { - "epoch": 23.801825293350717, - "grad_norm": 1.5646661520004272, - "learning_rate": 5.4649246231155784e-05, - "loss": 5.3377, - "step": 45640 - }, - { - "epoch": 23.802346805736637, - "grad_norm": 1.4906476736068726, - "learning_rate": 5.464824120603015e-05, - "loss": 5.4458, - "step": 45641 - }, - { - "epoch": 23.802868318122556, - "grad_norm": 1.566502571105957, - "learning_rate": 5.4647236180904526e-05, - "loss": 5.2145, - "step": 45642 - }, - { - "epoch": 23.803389830508475, - "grad_norm": 1.5425204038619995, - "learning_rate": 5.464623115577889e-05, - "loss": 5.169, - "step": 45643 - }, - { - "epoch": 23.803911342894395, - "grad_norm": 1.5663312673568726, - "learning_rate": 5.464522613065327e-05, - "loss": 5.2089, - "step": 45644 - }, - { - "epoch": 23.804432855280314, - "grad_norm": 1.6487261056900024, - "learning_rate": 5.464422110552764e-05, - "loss": 5.1783, - "step": 45645 - }, - { - "epoch": 23.804954367666234, - "grad_norm": 1.7251083850860596, - "learning_rate": 5.464321608040202e-05, - "loss": 5.4514, - "step": 45646 - }, - { - "epoch": 23.80547588005215, - "grad_norm": 1.53765070438385, - "learning_rate": 5.464221105527638e-05, - "loss": 5.0057, - "step": 45647 - }, - { - "epoch": 23.80599739243807, - "grad_norm": 1.654558777809143, - "learning_rate": 5.464120603015076e-05, - "loss": 5.5086, - "step": 45648 - }, - { - "epoch": 23.80651890482399, - "grad_norm": 1.5266557931900024, - "learning_rate": 5.464020100502513e-05, - "loss": 5.03, - "step": 45649 - }, - { - "epoch": 23.807040417209908, - "grad_norm": 1.5755503177642822, - "learning_rate": 5.463919597989951e-05, - "loss": 5.2452, - "step": 45650 - }, - { - "epoch": 23.807561929595828, - "grad_norm": 1.5978163480758667, - "learning_rate": 5.463819095477387e-05, - "loss": 5.3655, - "step": 45651 - }, - { - "epoch": 23.808083441981747, - "grad_norm": 1.6048599481582642, - "learning_rate": 5.463718592964825e-05, - "loss": 4.978, - "step": 45652 - }, - { - "epoch": 23.808604954367667, - "grad_norm": 1.467402696609497, - "learning_rate": 5.4636180904522614e-05, - "loss": 5.325, - "step": 45653 - }, - { - "epoch": 23.809126466753586, - "grad_norm": 1.495947241783142, - "learning_rate": 5.4635175879396985e-05, - "loss": 5.4659, - "step": 45654 - }, - { - "epoch": 23.809647979139505, - "grad_norm": 1.57086181640625, - "learning_rate": 5.463417085427136e-05, - "loss": 5.1865, - "step": 45655 - }, - { - "epoch": 23.810169491525425, - "grad_norm": 1.5319677591323853, - "learning_rate": 5.463316582914573e-05, - "loss": 5.5961, - "step": 45656 - }, - { - "epoch": 23.810691003911344, - "grad_norm": 1.6074886322021484, - "learning_rate": 5.4632160804020105e-05, - "loss": 5.2843, - "step": 45657 - }, - { - "epoch": 23.81121251629726, - "grad_norm": 1.5399596691131592, - "learning_rate": 5.463115577889447e-05, - "loss": 4.7537, - "step": 45658 - }, - { - "epoch": 23.81173402868318, - "grad_norm": 1.5703710317611694, - "learning_rate": 5.463015075376885e-05, - "loss": 5.2689, - "step": 45659 - }, - { - "epoch": 23.8122555410691, - "grad_norm": 1.573734164237976, - "learning_rate": 5.462914572864322e-05, - "loss": 5.2427, - "step": 45660 - }, - { - "epoch": 23.81277705345502, - "grad_norm": 1.5512616634368896, - "learning_rate": 5.4628140703517596e-05, - "loss": 5.505, - "step": 45661 - }, - { - "epoch": 23.813298565840938, - "grad_norm": 1.673316240310669, - "learning_rate": 5.462713567839196e-05, - "loss": 5.3981, - "step": 45662 - }, - { - "epoch": 23.813820078226858, - "grad_norm": 1.6535769701004028, - "learning_rate": 5.462613065326634e-05, - "loss": 5.2198, - "step": 45663 - }, - { - "epoch": 23.814341590612777, - "grad_norm": 1.7144695520401, - "learning_rate": 5.46251256281407e-05, - "loss": 5.4355, - "step": 45664 - }, - { - "epoch": 23.814863102998697, - "grad_norm": 1.5820033550262451, - "learning_rate": 5.462412060301508e-05, - "loss": 5.0149, - "step": 45665 - }, - { - "epoch": 23.815384615384616, - "grad_norm": 1.4936332702636719, - "learning_rate": 5.462311557788945e-05, - "loss": 5.1354, - "step": 45666 - }, - { - "epoch": 23.815906127770536, - "grad_norm": 1.5168439149856567, - "learning_rate": 5.4622110552763816e-05, - "loss": 5.3773, - "step": 45667 - }, - { - "epoch": 23.816427640156455, - "grad_norm": 1.6429424285888672, - "learning_rate": 5.462110552763819e-05, - "loss": 5.0809, - "step": 45668 - }, - { - "epoch": 23.816949152542374, - "grad_norm": 2.0559117794036865, - "learning_rate": 5.4620100502512564e-05, - "loss": 4.6248, - "step": 45669 - }, - { - "epoch": 23.81747066492829, - "grad_norm": 1.6072907447814941, - "learning_rate": 5.461909547738694e-05, - "loss": 4.6996, - "step": 45670 - }, - { - "epoch": 23.81799217731421, - "grad_norm": 1.5074412822723389, - "learning_rate": 5.4618090452261307e-05, - "loss": 5.1692, - "step": 45671 - }, - { - "epoch": 23.81851368970013, - "grad_norm": 1.5723017454147339, - "learning_rate": 5.4617085427135684e-05, - "loss": 5.4292, - "step": 45672 - }, - { - "epoch": 23.81903520208605, - "grad_norm": 1.530968427658081, - "learning_rate": 5.461608040201005e-05, - "loss": 5.3773, - "step": 45673 - }, - { - "epoch": 23.819556714471968, - "grad_norm": 1.6167012453079224, - "learning_rate": 5.4615075376884426e-05, - "loss": 5.3367, - "step": 45674 - }, - { - "epoch": 23.820078226857888, - "grad_norm": 1.6550828218460083, - "learning_rate": 5.46140703517588e-05, - "loss": 5.2219, - "step": 45675 - }, - { - "epoch": 23.820599739243807, - "grad_norm": 1.5960091352462769, - "learning_rate": 5.4613065326633175e-05, - "loss": 4.8715, - "step": 45676 - }, - { - "epoch": 23.821121251629727, - "grad_norm": 1.5195292234420776, - "learning_rate": 5.461206030150754e-05, - "loss": 5.294, - "step": 45677 - }, - { - "epoch": 23.821642764015646, - "grad_norm": 1.543068289756775, - "learning_rate": 5.461105527638192e-05, - "loss": 5.0974, - "step": 45678 - }, - { - "epoch": 23.822164276401566, - "grad_norm": 1.5892126560211182, - "learning_rate": 5.461005025125628e-05, - "loss": 4.8178, - "step": 45679 - }, - { - "epoch": 23.822685788787485, - "grad_norm": 1.594329595565796, - "learning_rate": 5.460904522613065e-05, - "loss": 5.1244, - "step": 45680 - }, - { - "epoch": 23.823207301173404, - "grad_norm": 1.6278249025344849, - "learning_rate": 5.460804020100503e-05, - "loss": 5.0369, - "step": 45681 - }, - { - "epoch": 23.82372881355932, - "grad_norm": 1.6407253742218018, - "learning_rate": 5.4607035175879395e-05, - "loss": 5.4542, - "step": 45682 - }, - { - "epoch": 23.82425032594524, - "grad_norm": 1.4998034238815308, - "learning_rate": 5.460603015075377e-05, - "loss": 4.4948, - "step": 45683 - }, - { - "epoch": 23.82477183833116, - "grad_norm": 1.5681369304656982, - "learning_rate": 5.460502512562814e-05, - "loss": 5.03, - "step": 45684 - }, - { - "epoch": 23.82529335071708, - "grad_norm": 1.566685676574707, - "learning_rate": 5.4604020100502515e-05, - "loss": 5.4447, - "step": 45685 - }, - { - "epoch": 23.825814863103, - "grad_norm": 1.536383867263794, - "learning_rate": 5.4603015075376886e-05, - "loss": 5.0774, - "step": 45686 - }, - { - "epoch": 23.826336375488918, - "grad_norm": 1.5938314199447632, - "learning_rate": 5.4602010050251264e-05, - "loss": 5.4869, - "step": 45687 - }, - { - "epoch": 23.826857887874837, - "grad_norm": 1.5806763172149658, - "learning_rate": 5.460100502512563e-05, - "loss": 5.3257, - "step": 45688 - }, - { - "epoch": 23.827379400260757, - "grad_norm": 1.4978464841842651, - "learning_rate": 5.4600000000000006e-05, - "loss": 5.0467, - "step": 45689 - }, - { - "epoch": 23.827900912646676, - "grad_norm": 1.521727442741394, - "learning_rate": 5.459899497487437e-05, - "loss": 5.4827, - "step": 45690 - }, - { - "epoch": 23.828422425032596, - "grad_norm": 1.478752851486206, - "learning_rate": 5.459798994974875e-05, - "loss": 4.9442, - "step": 45691 - }, - { - "epoch": 23.828943937418515, - "grad_norm": 1.5004795789718628, - "learning_rate": 5.459698492462312e-05, - "loss": 5.3536, - "step": 45692 - }, - { - "epoch": 23.829465449804434, - "grad_norm": 1.4914175271987915, - "learning_rate": 5.459597989949748e-05, - "loss": 5.4205, - "step": 45693 - }, - { - "epoch": 23.82998696219035, - "grad_norm": 1.5124778747558594, - "learning_rate": 5.459497487437186e-05, - "loss": 5.4302, - "step": 45694 - }, - { - "epoch": 23.83050847457627, - "grad_norm": 1.5605061054229736, - "learning_rate": 5.459396984924623e-05, - "loss": 5.3654, - "step": 45695 - }, - { - "epoch": 23.83102998696219, - "grad_norm": 1.5202621221542358, - "learning_rate": 5.459296482412061e-05, - "loss": 5.2558, - "step": 45696 - }, - { - "epoch": 23.83155149934811, - "grad_norm": 1.5348337888717651, - "learning_rate": 5.4591959798994974e-05, - "loss": 5.3585, - "step": 45697 - }, - { - "epoch": 23.83207301173403, - "grad_norm": 1.4834576845169067, - "learning_rate": 5.459095477386935e-05, - "loss": 5.7349, - "step": 45698 - }, - { - "epoch": 23.832594524119948, - "grad_norm": 1.663902759552002, - "learning_rate": 5.4589949748743716e-05, - "loss": 4.9678, - "step": 45699 - }, - { - "epoch": 23.833116036505867, - "grad_norm": 1.4950532913208008, - "learning_rate": 5.4588944723618094e-05, - "loss": 5.215, - "step": 45700 - }, - { - "epoch": 23.833637548891787, - "grad_norm": 1.5009773969650269, - "learning_rate": 5.4587939698492465e-05, - "loss": 5.5561, - "step": 45701 - }, - { - "epoch": 23.834159061277706, - "grad_norm": 1.5879104137420654, - "learning_rate": 5.458693467336684e-05, - "loss": 5.0261, - "step": 45702 - }, - { - "epoch": 23.834680573663626, - "grad_norm": 1.5260707139968872, - "learning_rate": 5.458592964824121e-05, - "loss": 5.1565, - "step": 45703 - }, - { - "epoch": 23.835202086049545, - "grad_norm": 1.5117911100387573, - "learning_rate": 5.4584924623115585e-05, - "loss": 5.14, - "step": 45704 - }, - { - "epoch": 23.835723598435465, - "grad_norm": 1.5996063947677612, - "learning_rate": 5.458391959798995e-05, - "loss": 5.0497, - "step": 45705 - }, - { - "epoch": 23.83624511082138, - "grad_norm": 1.6874775886535645, - "learning_rate": 5.458291457286432e-05, - "loss": 4.8702, - "step": 45706 - }, - { - "epoch": 23.8367666232073, - "grad_norm": 1.5974851846694946, - "learning_rate": 5.45819095477387e-05, - "loss": 5.5712, - "step": 45707 - }, - { - "epoch": 23.83728813559322, - "grad_norm": 1.5320372581481934, - "learning_rate": 5.458090452261306e-05, - "loss": 5.0822, - "step": 45708 - }, - { - "epoch": 23.83780964797914, - "grad_norm": 1.510243535041809, - "learning_rate": 5.457989949748744e-05, - "loss": 5.4158, - "step": 45709 - }, - { - "epoch": 23.83833116036506, - "grad_norm": 1.4690251350402832, - "learning_rate": 5.4578894472361805e-05, - "loss": 5.1816, - "step": 45710 - }, - { - "epoch": 23.838852672750978, - "grad_norm": 1.6103930473327637, - "learning_rate": 5.457788944723618e-05, - "loss": 5.0808, - "step": 45711 - }, - { - "epoch": 23.839374185136897, - "grad_norm": 1.5905827283859253, - "learning_rate": 5.4576884422110554e-05, - "loss": 5.3192, - "step": 45712 - }, - { - "epoch": 23.839895697522817, - "grad_norm": 1.5647701025009155, - "learning_rate": 5.457587939698493e-05, - "loss": 5.3459, - "step": 45713 - }, - { - "epoch": 23.840417209908736, - "grad_norm": 1.488312005996704, - "learning_rate": 5.4574874371859296e-05, - "loss": 5.1826, - "step": 45714 - }, - { - "epoch": 23.840938722294656, - "grad_norm": 1.6420440673828125, - "learning_rate": 5.4573869346733673e-05, - "loss": 5.2485, - "step": 45715 - }, - { - "epoch": 23.841460234680575, - "grad_norm": 1.7368983030319214, - "learning_rate": 5.4572864321608045e-05, - "loss": 4.6659, - "step": 45716 - }, - { - "epoch": 23.841981747066495, - "grad_norm": 1.5079989433288574, - "learning_rate": 5.457185929648242e-05, - "loss": 5.4024, - "step": 45717 - }, - { - "epoch": 23.84250325945241, - "grad_norm": 1.5000303983688354, - "learning_rate": 5.457085427135679e-05, - "loss": 5.3031, - "step": 45718 - }, - { - "epoch": 23.84302477183833, - "grad_norm": 1.5231084823608398, - "learning_rate": 5.4569849246231164e-05, - "loss": 5.1191, - "step": 45719 - }, - { - "epoch": 23.84354628422425, - "grad_norm": 1.5415656566619873, - "learning_rate": 5.456884422110553e-05, - "loss": 5.1228, - "step": 45720 - }, - { - "epoch": 23.84406779661017, - "grad_norm": 1.5478147268295288, - "learning_rate": 5.45678391959799e-05, - "loss": 5.3524, - "step": 45721 - }, - { - "epoch": 23.84458930899609, - "grad_norm": 1.5636478662490845, - "learning_rate": 5.456683417085428e-05, - "loss": 5.2231, - "step": 45722 - }, - { - "epoch": 23.845110821382008, - "grad_norm": 1.559726595878601, - "learning_rate": 5.456582914572864e-05, - "loss": 5.0369, - "step": 45723 - }, - { - "epoch": 23.845632333767927, - "grad_norm": 1.5596660375595093, - "learning_rate": 5.456482412060302e-05, - "loss": 5.2007, - "step": 45724 - }, - { - "epoch": 23.846153846153847, - "grad_norm": 1.5686088800430298, - "learning_rate": 5.4563819095477384e-05, - "loss": 5.1237, - "step": 45725 - }, - { - "epoch": 23.846675358539766, - "grad_norm": 1.5370724201202393, - "learning_rate": 5.456281407035176e-05, - "loss": 5.3505, - "step": 45726 - }, - { - "epoch": 23.847196870925686, - "grad_norm": 1.808107614517212, - "learning_rate": 5.456180904522613e-05, - "loss": 5.0098, - "step": 45727 - }, - { - "epoch": 23.847718383311605, - "grad_norm": 1.6387404203414917, - "learning_rate": 5.456080402010051e-05, - "loss": 5.1394, - "step": 45728 - }, - { - "epoch": 23.848239895697525, - "grad_norm": 1.5786752700805664, - "learning_rate": 5.4559798994974875e-05, - "loss": 4.8787, - "step": 45729 - }, - { - "epoch": 23.84876140808344, - "grad_norm": 1.5562694072723389, - "learning_rate": 5.455879396984925e-05, - "loss": 5.3608, - "step": 45730 - }, - { - "epoch": 23.84928292046936, - "grad_norm": 1.571564793586731, - "learning_rate": 5.455778894472362e-05, - "loss": 5.3569, - "step": 45731 - }, - { - "epoch": 23.84980443285528, - "grad_norm": 1.517909288406372, - "learning_rate": 5.4556783919597995e-05, - "loss": 5.233, - "step": 45732 - }, - { - "epoch": 23.8503259452412, - "grad_norm": 1.5479347705841064, - "learning_rate": 5.4555778894472366e-05, - "loss": 5.1617, - "step": 45733 - }, - { - "epoch": 23.85084745762712, - "grad_norm": 1.5677481889724731, - "learning_rate": 5.455477386934673e-05, - "loss": 4.386, - "step": 45734 - }, - { - "epoch": 23.851368970013038, - "grad_norm": 1.4351811408996582, - "learning_rate": 5.455376884422111e-05, - "loss": 4.9599, - "step": 45735 - }, - { - "epoch": 23.851890482398957, - "grad_norm": 1.6870310306549072, - "learning_rate": 5.455276381909548e-05, - "loss": 5.3797, - "step": 45736 - }, - { - "epoch": 23.852411994784877, - "grad_norm": 1.5158500671386719, - "learning_rate": 5.455175879396986e-05, - "loss": 4.9093, - "step": 45737 - }, - { - "epoch": 23.852933507170796, - "grad_norm": 1.5210378170013428, - "learning_rate": 5.455075376884422e-05, - "loss": 5.2741, - "step": 45738 - }, - { - "epoch": 23.853455019556716, - "grad_norm": 1.5630955696105957, - "learning_rate": 5.45497487437186e-05, - "loss": 4.6626, - "step": 45739 - }, - { - "epoch": 23.853976531942635, - "grad_norm": 1.6099202632904053, - "learning_rate": 5.454874371859296e-05, - "loss": 5.2313, - "step": 45740 - }, - { - "epoch": 23.85449804432855, - "grad_norm": 1.541640043258667, - "learning_rate": 5.454773869346734e-05, - "loss": 5.5699, - "step": 45741 - }, - { - "epoch": 23.85501955671447, - "grad_norm": 1.5464141368865967, - "learning_rate": 5.454673366834171e-05, - "loss": 4.3741, - "step": 45742 - }, - { - "epoch": 23.85554106910039, - "grad_norm": 1.5519157648086548, - "learning_rate": 5.454572864321609e-05, - "loss": 4.9094, - "step": 45743 - }, - { - "epoch": 23.85606258148631, - "grad_norm": 1.5708643198013306, - "learning_rate": 5.4544723618090454e-05, - "loss": 5.0217, - "step": 45744 - }, - { - "epoch": 23.85658409387223, - "grad_norm": 1.517678141593933, - "learning_rate": 5.454371859296483e-05, - "loss": 5.6667, - "step": 45745 - }, - { - "epoch": 23.85710560625815, - "grad_norm": 1.5696351528167725, - "learning_rate": 5.4542713567839196e-05, - "loss": 4.7892, - "step": 45746 - }, - { - "epoch": 23.857627118644068, - "grad_norm": 1.5545626878738403, - "learning_rate": 5.454170854271357e-05, - "loss": 5.1106, - "step": 45747 - }, - { - "epoch": 23.858148631029987, - "grad_norm": 1.3935390710830688, - "learning_rate": 5.4540703517587945e-05, - "loss": 5.6974, - "step": 45748 - }, - { - "epoch": 23.858670143415907, - "grad_norm": 1.4122824668884277, - "learning_rate": 5.453969849246231e-05, - "loss": 5.413, - "step": 45749 - }, - { - "epoch": 23.859191655801826, - "grad_norm": 1.5244327783584595, - "learning_rate": 5.453869346733669e-05, - "loss": 5.6904, - "step": 45750 - }, - { - "epoch": 23.859713168187746, - "grad_norm": 1.5274535417556763, - "learning_rate": 5.453768844221105e-05, - "loss": 5.0018, - "step": 45751 - }, - { - "epoch": 23.860234680573665, - "grad_norm": 1.535589575767517, - "learning_rate": 5.453668341708543e-05, - "loss": 4.7691, - "step": 45752 - }, - { - "epoch": 23.860756192959585, - "grad_norm": 1.4804866313934326, - "learning_rate": 5.45356783919598e-05, - "loss": 5.453, - "step": 45753 - }, - { - "epoch": 23.8612777053455, - "grad_norm": 1.4854577779769897, - "learning_rate": 5.453467336683418e-05, - "loss": 5.4004, - "step": 45754 - }, - { - "epoch": 23.86179921773142, - "grad_norm": 1.5883245468139648, - "learning_rate": 5.453366834170854e-05, - "loss": 5.6003, - "step": 45755 - }, - { - "epoch": 23.86232073011734, - "grad_norm": 1.4717642068862915, - "learning_rate": 5.453266331658292e-05, - "loss": 5.5304, - "step": 45756 - }, - { - "epoch": 23.86284224250326, - "grad_norm": 1.5408867597579956, - "learning_rate": 5.453165829145729e-05, - "loss": 5.6797, - "step": 45757 - }, - { - "epoch": 23.86336375488918, - "grad_norm": 1.5372098684310913, - "learning_rate": 5.453065326633167e-05, - "loss": 5.5028, - "step": 45758 - }, - { - "epoch": 23.863885267275098, - "grad_norm": 1.5802035331726074, - "learning_rate": 5.4529648241206034e-05, - "loss": 5.249, - "step": 45759 - }, - { - "epoch": 23.864406779661017, - "grad_norm": 1.6118299961090088, - "learning_rate": 5.45286432160804e-05, - "loss": 5.283, - "step": 45760 - }, - { - "epoch": 23.864928292046937, - "grad_norm": 1.4433091878890991, - "learning_rate": 5.4527638190954776e-05, - "loss": 5.4359, - "step": 45761 - }, - { - "epoch": 23.865449804432856, - "grad_norm": 1.5282405614852905, - "learning_rate": 5.452663316582915e-05, - "loss": 4.7196, - "step": 45762 - }, - { - "epoch": 23.865971316818776, - "grad_norm": 1.6439448595046997, - "learning_rate": 5.4525628140703525e-05, - "loss": 5.1954, - "step": 45763 - }, - { - "epoch": 23.866492829204695, - "grad_norm": 1.5624064207077026, - "learning_rate": 5.452462311557789e-05, - "loss": 5.1915, - "step": 45764 - }, - { - "epoch": 23.86701434159061, - "grad_norm": 1.5416712760925293, - "learning_rate": 5.452361809045227e-05, - "loss": 4.7749, - "step": 45765 - }, - { - "epoch": 23.86753585397653, - "grad_norm": 1.4940491914749146, - "learning_rate": 5.452261306532663e-05, - "loss": 5.2536, - "step": 45766 - }, - { - "epoch": 23.86805736636245, - "grad_norm": 1.5270013809204102, - "learning_rate": 5.452160804020101e-05, - "loss": 5.591, - "step": 45767 - }, - { - "epoch": 23.86857887874837, - "grad_norm": 1.53765070438385, - "learning_rate": 5.452060301507538e-05, - "loss": 5.3056, - "step": 45768 - }, - { - "epoch": 23.86910039113429, - "grad_norm": 1.4766361713409424, - "learning_rate": 5.451959798994976e-05, - "loss": 4.7719, - "step": 45769 - }, - { - "epoch": 23.86962190352021, - "grad_norm": 1.6917356252670288, - "learning_rate": 5.451859296482412e-05, - "loss": 4.4614, - "step": 45770 - }, - { - "epoch": 23.870143415906128, - "grad_norm": 1.5858254432678223, - "learning_rate": 5.45175879396985e-05, - "loss": 5.4034, - "step": 45771 - }, - { - "epoch": 23.870664928292047, - "grad_norm": 1.6354995965957642, - "learning_rate": 5.4516582914572864e-05, - "loss": 5.2476, - "step": 45772 - }, - { - "epoch": 23.871186440677967, - "grad_norm": 1.6223936080932617, - "learning_rate": 5.4515577889447235e-05, - "loss": 5.1587, - "step": 45773 - }, - { - "epoch": 23.871707953063886, - "grad_norm": 1.551016092300415, - "learning_rate": 5.451457286432161e-05, - "loss": 5.4821, - "step": 45774 - }, - { - "epoch": 23.872229465449806, - "grad_norm": 1.6104533672332764, - "learning_rate": 5.451356783919598e-05, - "loss": 4.8969, - "step": 45775 - }, - { - "epoch": 23.872750977835725, - "grad_norm": 1.5861245393753052, - "learning_rate": 5.4512562814070355e-05, - "loss": 4.5855, - "step": 45776 - }, - { - "epoch": 23.87327249022164, - "grad_norm": 1.6187744140625, - "learning_rate": 5.451155778894472e-05, - "loss": 4.8712, - "step": 45777 - }, - { - "epoch": 23.87379400260756, - "grad_norm": 1.448129415512085, - "learning_rate": 5.45105527638191e-05, - "loss": 5.8031, - "step": 45778 - }, - { - "epoch": 23.87431551499348, - "grad_norm": 1.6637630462646484, - "learning_rate": 5.450954773869347e-05, - "loss": 5.1826, - "step": 45779 - }, - { - "epoch": 23.8748370273794, - "grad_norm": 1.6522270441055298, - "learning_rate": 5.4508542713567846e-05, - "loss": 4.9361, - "step": 45780 - }, - { - "epoch": 23.87535853976532, - "grad_norm": 1.5531680583953857, - "learning_rate": 5.450753768844221e-05, - "loss": 5.3039, - "step": 45781 - }, - { - "epoch": 23.87588005215124, - "grad_norm": 1.481773853302002, - "learning_rate": 5.450653266331659e-05, - "loss": 5.2235, - "step": 45782 - }, - { - "epoch": 23.876401564537158, - "grad_norm": 1.5604844093322754, - "learning_rate": 5.450552763819096e-05, - "loss": 4.8617, - "step": 45783 - }, - { - "epoch": 23.876923076923077, - "grad_norm": 1.5179202556610107, - "learning_rate": 5.450452261306534e-05, - "loss": 5.0294, - "step": 45784 - }, - { - "epoch": 23.877444589308997, - "grad_norm": 1.4755017757415771, - "learning_rate": 5.45035175879397e-05, - "loss": 5.2671, - "step": 45785 - }, - { - "epoch": 23.877966101694916, - "grad_norm": 1.504150629043579, - "learning_rate": 5.4502512562814066e-05, - "loss": 5.3906, - "step": 45786 - }, - { - "epoch": 23.878487614080836, - "grad_norm": 1.4744025468826294, - "learning_rate": 5.4501507537688443e-05, - "loss": 5.1405, - "step": 45787 - }, - { - "epoch": 23.879009126466755, - "grad_norm": 1.6154359579086304, - "learning_rate": 5.4500502512562814e-05, - "loss": 4.7258, - "step": 45788 - }, - { - "epoch": 23.87953063885267, - "grad_norm": 1.512521743774414, - "learning_rate": 5.449949748743719e-05, - "loss": 5.4261, - "step": 45789 - }, - { - "epoch": 23.88005215123859, - "grad_norm": 1.576574683189392, - "learning_rate": 5.4498492462311557e-05, - "loss": 5.1451, - "step": 45790 - }, - { - "epoch": 23.88057366362451, - "grad_norm": 1.3990269899368286, - "learning_rate": 5.4497487437185934e-05, - "loss": 5.0301, - "step": 45791 - }, - { - "epoch": 23.88109517601043, - "grad_norm": 1.4903937578201294, - "learning_rate": 5.44964824120603e-05, - "loss": 4.9783, - "step": 45792 - }, - { - "epoch": 23.88161668839635, - "grad_norm": 1.4282925128936768, - "learning_rate": 5.4495477386934676e-05, - "loss": 5.7536, - "step": 45793 - }, - { - "epoch": 23.88213820078227, - "grad_norm": 1.5289151668548584, - "learning_rate": 5.449447236180905e-05, - "loss": 5.3803, - "step": 45794 - }, - { - "epoch": 23.882659713168188, - "grad_norm": 1.4598572254180908, - "learning_rate": 5.4493467336683425e-05, - "loss": 5.6005, - "step": 45795 - }, - { - "epoch": 23.883181225554107, - "grad_norm": 1.5788817405700684, - "learning_rate": 5.449246231155779e-05, - "loss": 5.1587, - "step": 45796 - }, - { - "epoch": 23.883702737940027, - "grad_norm": 1.5729764699935913, - "learning_rate": 5.449145728643217e-05, - "loss": 5.1848, - "step": 45797 - }, - { - "epoch": 23.884224250325946, - "grad_norm": 1.5400643348693848, - "learning_rate": 5.449045226130653e-05, - "loss": 5.5227, - "step": 45798 - }, - { - "epoch": 23.884745762711866, - "grad_norm": 1.490918517112732, - "learning_rate": 5.44894472361809e-05, - "loss": 5.4188, - "step": 45799 - }, - { - "epoch": 23.885267275097785, - "grad_norm": 1.5947996377944946, - "learning_rate": 5.448844221105528e-05, - "loss": 5.3103, - "step": 45800 - }, - { - "epoch": 23.8857887874837, - "grad_norm": 1.5015416145324707, - "learning_rate": 5.4487437185929645e-05, - "loss": 5.5447, - "step": 45801 - }, - { - "epoch": 23.88631029986962, - "grad_norm": 1.51035475730896, - "learning_rate": 5.448643216080402e-05, - "loss": 5.4498, - "step": 45802 - }, - { - "epoch": 23.88683181225554, - "grad_norm": 1.5182982683181763, - "learning_rate": 5.4485427135678394e-05, - "loss": 5.1971, - "step": 45803 - }, - { - "epoch": 23.88735332464146, - "grad_norm": 1.5741297006607056, - "learning_rate": 5.448442211055277e-05, - "loss": 5.2922, - "step": 45804 - }, - { - "epoch": 23.88787483702738, - "grad_norm": 1.5525071620941162, - "learning_rate": 5.4483417085427136e-05, - "loss": 5.4244, - "step": 45805 - }, - { - "epoch": 23.8883963494133, - "grad_norm": 1.5326189994812012, - "learning_rate": 5.4482412060301514e-05, - "loss": 4.7822, - "step": 45806 - }, - { - "epoch": 23.888917861799218, - "grad_norm": 1.6522706747055054, - "learning_rate": 5.448140703517588e-05, - "loss": 5.2093, - "step": 45807 - }, - { - "epoch": 23.889439374185137, - "grad_norm": 1.5380135774612427, - "learning_rate": 5.4480402010050256e-05, - "loss": 5.3221, - "step": 45808 - }, - { - "epoch": 23.889960886571057, - "grad_norm": 1.541497826576233, - "learning_rate": 5.447939698492463e-05, - "loss": 4.6299, - "step": 45809 - }, - { - "epoch": 23.890482398956976, - "grad_norm": 1.5378525257110596, - "learning_rate": 5.4478391959799005e-05, - "loss": 5.1103, - "step": 45810 - }, - { - "epoch": 23.891003911342896, - "grad_norm": 1.5975524187088013, - "learning_rate": 5.447738693467337e-05, - "loss": 5.1851, - "step": 45811 - }, - { - "epoch": 23.891525423728815, - "grad_norm": 1.6927083730697632, - "learning_rate": 5.447638190954775e-05, - "loss": 4.4792, - "step": 45812 - }, - { - "epoch": 23.89204693611473, - "grad_norm": 1.562325358390808, - "learning_rate": 5.447537688442211e-05, - "loss": 5.3374, - "step": 45813 - }, - { - "epoch": 23.89256844850065, - "grad_norm": 1.5862953662872314, - "learning_rate": 5.447437185929648e-05, - "loss": 5.5624, - "step": 45814 - }, - { - "epoch": 23.89308996088657, - "grad_norm": 1.595808744430542, - "learning_rate": 5.447336683417086e-05, - "loss": 5.2084, - "step": 45815 - }, - { - "epoch": 23.89361147327249, - "grad_norm": 1.6932799816131592, - "learning_rate": 5.4472361809045224e-05, - "loss": 5.1516, - "step": 45816 - }, - { - "epoch": 23.89413298565841, - "grad_norm": 1.4883548021316528, - "learning_rate": 5.44713567839196e-05, - "loss": 5.1434, - "step": 45817 - }, - { - "epoch": 23.89465449804433, - "grad_norm": 1.5288604497909546, - "learning_rate": 5.4470351758793966e-05, - "loss": 5.2264, - "step": 45818 - }, - { - "epoch": 23.895176010430248, - "grad_norm": 1.6474590301513672, - "learning_rate": 5.4469346733668344e-05, - "loss": 5.1152, - "step": 45819 - }, - { - "epoch": 23.895697522816167, - "grad_norm": 1.639183521270752, - "learning_rate": 5.4468341708542715e-05, - "loss": 5.4581, - "step": 45820 - }, - { - "epoch": 23.896219035202087, - "grad_norm": 1.6723036766052246, - "learning_rate": 5.446733668341709e-05, - "loss": 5.4748, - "step": 45821 - }, - { - "epoch": 23.896740547588006, - "grad_norm": 1.7622416019439697, - "learning_rate": 5.446633165829146e-05, - "loss": 5.0331, - "step": 45822 - }, - { - "epoch": 23.897262059973926, - "grad_norm": 1.594859004020691, - "learning_rate": 5.4465326633165835e-05, - "loss": 5.2682, - "step": 45823 - }, - { - "epoch": 23.89778357235984, - "grad_norm": 1.5091334581375122, - "learning_rate": 5.4464321608040206e-05, - "loss": 5.3948, - "step": 45824 - }, - { - "epoch": 23.89830508474576, - "grad_norm": 1.4702675342559814, - "learning_rate": 5.4463316582914584e-05, - "loss": 5.348, - "step": 45825 - }, - { - "epoch": 23.89882659713168, - "grad_norm": 1.536076307296753, - "learning_rate": 5.446231155778895e-05, - "loss": 5.3346, - "step": 45826 - }, - { - "epoch": 23.8993481095176, - "grad_norm": 1.4801017045974731, - "learning_rate": 5.446130653266331e-05, - "loss": 5.5954, - "step": 45827 - }, - { - "epoch": 23.89986962190352, - "grad_norm": 1.4148688316345215, - "learning_rate": 5.446030150753769e-05, - "loss": 5.4358, - "step": 45828 - }, - { - "epoch": 23.90039113428944, - "grad_norm": 1.5147449970245361, - "learning_rate": 5.445929648241206e-05, - "loss": 5.5276, - "step": 45829 - }, - { - "epoch": 23.90091264667536, - "grad_norm": 1.5393526554107666, - "learning_rate": 5.445829145728644e-05, - "loss": 5.3064, - "step": 45830 - }, - { - "epoch": 23.901434159061278, - "grad_norm": 1.4648455381393433, - "learning_rate": 5.4457286432160804e-05, - "loss": 5.0436, - "step": 45831 - }, - { - "epoch": 23.901955671447197, - "grad_norm": 1.5096162557601929, - "learning_rate": 5.445628140703518e-05, - "loss": 4.9541, - "step": 45832 - }, - { - "epoch": 23.902477183833117, - "grad_norm": 1.5777872800827026, - "learning_rate": 5.4455276381909546e-05, - "loss": 5.452, - "step": 45833 - }, - { - "epoch": 23.902998696219036, - "grad_norm": 1.6403920650482178, - "learning_rate": 5.4454271356783923e-05, - "loss": 4.8935, - "step": 45834 - }, - { - "epoch": 23.903520208604956, - "grad_norm": 1.5506229400634766, - "learning_rate": 5.4453266331658295e-05, - "loss": 5.1948, - "step": 45835 - }, - { - "epoch": 23.904041720990875, - "grad_norm": 1.551715612411499, - "learning_rate": 5.445226130653267e-05, - "loss": 4.6453, - "step": 45836 - }, - { - "epoch": 23.90456323337679, - "grad_norm": 1.545270562171936, - "learning_rate": 5.445125628140704e-05, - "loss": 5.0619, - "step": 45837 - }, - { - "epoch": 23.90508474576271, - "grad_norm": 1.6402338743209839, - "learning_rate": 5.4450251256281414e-05, - "loss": 5.1712, - "step": 45838 - }, - { - "epoch": 23.90560625814863, - "grad_norm": 1.5181512832641602, - "learning_rate": 5.444924623115578e-05, - "loss": 5.1897, - "step": 45839 - }, - { - "epoch": 23.90612777053455, - "grad_norm": 1.585888147354126, - "learning_rate": 5.444824120603015e-05, - "loss": 4.7403, - "step": 45840 - }, - { - "epoch": 23.90664928292047, - "grad_norm": 1.6686553955078125, - "learning_rate": 5.444723618090453e-05, - "loss": 5.1283, - "step": 45841 - }, - { - "epoch": 23.90717079530639, - "grad_norm": 1.4981985092163086, - "learning_rate": 5.444623115577889e-05, - "loss": 5.5001, - "step": 45842 - }, - { - "epoch": 23.907692307692308, - "grad_norm": 1.551850438117981, - "learning_rate": 5.444522613065327e-05, - "loss": 5.2693, - "step": 45843 - }, - { - "epoch": 23.908213820078227, - "grad_norm": 1.5057674646377563, - "learning_rate": 5.444422110552764e-05, - "loss": 5.1421, - "step": 45844 - }, - { - "epoch": 23.908735332464147, - "grad_norm": 1.686669111251831, - "learning_rate": 5.444321608040202e-05, - "loss": 4.8524, - "step": 45845 - }, - { - "epoch": 23.909256844850066, - "grad_norm": 1.644380807876587, - "learning_rate": 5.444221105527638e-05, - "loss": 4.4684, - "step": 45846 - }, - { - "epoch": 23.909778357235986, - "grad_norm": 1.5846244096755981, - "learning_rate": 5.444120603015076e-05, - "loss": 5.4292, - "step": 45847 - }, - { - "epoch": 23.910299869621902, - "grad_norm": 1.5941330194473267, - "learning_rate": 5.4440201005025125e-05, - "loss": 5.2438, - "step": 45848 - }, - { - "epoch": 23.91082138200782, - "grad_norm": 1.5942860841751099, - "learning_rate": 5.44391959798995e-05, - "loss": 5.2057, - "step": 45849 - }, - { - "epoch": 23.91134289439374, - "grad_norm": 1.5306389331817627, - "learning_rate": 5.4438190954773874e-05, - "loss": 5.6217, - "step": 45850 - }, - { - "epoch": 23.91186440677966, - "grad_norm": 1.7005889415740967, - "learning_rate": 5.443718592964825e-05, - "loss": 4.7101, - "step": 45851 - }, - { - "epoch": 23.91238591916558, - "grad_norm": 1.5270657539367676, - "learning_rate": 5.4436180904522616e-05, - "loss": 4.7201, - "step": 45852 - }, - { - "epoch": 23.9129074315515, - "grad_norm": 1.5456143617630005, - "learning_rate": 5.443517587939698e-05, - "loss": 5.1935, - "step": 45853 - }, - { - "epoch": 23.91342894393742, - "grad_norm": 1.4218398332595825, - "learning_rate": 5.443417085427136e-05, - "loss": 5.7742, - "step": 45854 - }, - { - "epoch": 23.913950456323338, - "grad_norm": 1.5944900512695312, - "learning_rate": 5.443316582914573e-05, - "loss": 5.5022, - "step": 45855 - }, - { - "epoch": 23.914471968709258, - "grad_norm": 1.5528451204299927, - "learning_rate": 5.443216080402011e-05, - "loss": 4.9247, - "step": 45856 - }, - { - "epoch": 23.914993481095177, - "grad_norm": 1.5426291227340698, - "learning_rate": 5.443115577889447e-05, - "loss": 5.251, - "step": 45857 - }, - { - "epoch": 23.915514993481096, - "grad_norm": 1.589393973350525, - "learning_rate": 5.443015075376885e-05, - "loss": 5.4151, - "step": 45858 - }, - { - "epoch": 23.916036505867016, - "grad_norm": 1.58730947971344, - "learning_rate": 5.442914572864321e-05, - "loss": 5.6786, - "step": 45859 - }, - { - "epoch": 23.916558018252932, - "grad_norm": 1.4604380130767822, - "learning_rate": 5.442814070351759e-05, - "loss": 5.7695, - "step": 45860 - }, - { - "epoch": 23.91707953063885, - "grad_norm": 1.6044037342071533, - "learning_rate": 5.442713567839196e-05, - "loss": 5.1913, - "step": 45861 - }, - { - "epoch": 23.91760104302477, - "grad_norm": 1.4892586469650269, - "learning_rate": 5.442613065326634e-05, - "loss": 4.5983, - "step": 45862 - }, - { - "epoch": 23.91812255541069, - "grad_norm": 1.6101680994033813, - "learning_rate": 5.4425125628140704e-05, - "loss": 4.62, - "step": 45863 - }, - { - "epoch": 23.91864406779661, - "grad_norm": 1.5110687017440796, - "learning_rate": 5.442412060301508e-05, - "loss": 5.2401, - "step": 45864 - }, - { - "epoch": 23.91916558018253, - "grad_norm": 1.5721936225891113, - "learning_rate": 5.4423115577889446e-05, - "loss": 5.088, - "step": 45865 - }, - { - "epoch": 23.91968709256845, - "grad_norm": 1.5956507921218872, - "learning_rate": 5.442211055276382e-05, - "loss": 5.1531, - "step": 45866 - }, - { - "epoch": 23.920208604954368, - "grad_norm": 1.5419212579727173, - "learning_rate": 5.4421105527638195e-05, - "loss": 5.0128, - "step": 45867 - }, - { - "epoch": 23.920730117340288, - "grad_norm": 1.4806262254714966, - "learning_rate": 5.442010050251256e-05, - "loss": 4.7288, - "step": 45868 - }, - { - "epoch": 23.921251629726207, - "grad_norm": 1.5445619821548462, - "learning_rate": 5.441909547738694e-05, - "loss": 5.5026, - "step": 45869 - }, - { - "epoch": 23.921773142112126, - "grad_norm": 1.4414212703704834, - "learning_rate": 5.441809045226131e-05, - "loss": 5.7686, - "step": 45870 - }, - { - "epoch": 23.922294654498046, - "grad_norm": 1.581486463546753, - "learning_rate": 5.4417085427135686e-05, - "loss": 4.7736, - "step": 45871 - }, - { - "epoch": 23.922816166883962, - "grad_norm": 1.5704773664474487, - "learning_rate": 5.441608040201005e-05, - "loss": 5.4808, - "step": 45872 - }, - { - "epoch": 23.92333767926988, - "grad_norm": 1.507904291152954, - "learning_rate": 5.441507537688443e-05, - "loss": 5.1613, - "step": 45873 - }, - { - "epoch": 23.9238591916558, - "grad_norm": 1.6657450199127197, - "learning_rate": 5.441407035175879e-05, - "loss": 4.7872, - "step": 45874 - }, - { - "epoch": 23.92438070404172, - "grad_norm": 1.533423662185669, - "learning_rate": 5.441306532663317e-05, - "loss": 5.3441, - "step": 45875 - }, - { - "epoch": 23.92490221642764, - "grad_norm": 1.4304890632629395, - "learning_rate": 5.441206030150754e-05, - "loss": 5.471, - "step": 45876 - }, - { - "epoch": 23.92542372881356, - "grad_norm": 1.5482558012008667, - "learning_rate": 5.441105527638192e-05, - "loss": 5.0198, - "step": 45877 - }, - { - "epoch": 23.92594524119948, - "grad_norm": 1.5618070363998413, - "learning_rate": 5.4410050251256284e-05, - "loss": 5.3508, - "step": 45878 - }, - { - "epoch": 23.926466753585398, - "grad_norm": 1.5809727907180786, - "learning_rate": 5.440904522613065e-05, - "loss": 5.2821, - "step": 45879 - }, - { - "epoch": 23.926988265971318, - "grad_norm": 1.7704592943191528, - "learning_rate": 5.4408040201005026e-05, - "loss": 4.9802, - "step": 45880 - }, - { - "epoch": 23.927509778357237, - "grad_norm": 1.6758694648742676, - "learning_rate": 5.44070351758794e-05, - "loss": 5.5108, - "step": 45881 - }, - { - "epoch": 23.928031290743156, - "grad_norm": 1.5804835557937622, - "learning_rate": 5.4406030150753775e-05, - "loss": 4.8507, - "step": 45882 - }, - { - "epoch": 23.928552803129076, - "grad_norm": 1.5148566961288452, - "learning_rate": 5.440502512562814e-05, - "loss": 5.3239, - "step": 45883 - }, - { - "epoch": 23.929074315514992, - "grad_norm": 1.539462924003601, - "learning_rate": 5.440402010050252e-05, - "loss": 5.3225, - "step": 45884 - }, - { - "epoch": 23.92959582790091, - "grad_norm": 1.6392443180084229, - "learning_rate": 5.440301507537688e-05, - "loss": 5.3838, - "step": 45885 - }, - { - "epoch": 23.93011734028683, - "grad_norm": 1.5067055225372314, - "learning_rate": 5.440201005025126e-05, - "loss": 5.6791, - "step": 45886 - }, - { - "epoch": 23.93063885267275, - "grad_norm": 1.731990933418274, - "learning_rate": 5.440100502512563e-05, - "loss": 5.2506, - "step": 45887 - }, - { - "epoch": 23.93116036505867, - "grad_norm": 1.6715755462646484, - "learning_rate": 5.440000000000001e-05, - "loss": 5.1057, - "step": 45888 - }, - { - "epoch": 23.93168187744459, - "grad_norm": 1.5228095054626465, - "learning_rate": 5.439899497487437e-05, - "loss": 5.3782, - "step": 45889 - }, - { - "epoch": 23.93220338983051, - "grad_norm": 1.5447797775268555, - "learning_rate": 5.439798994974875e-05, - "loss": 5.081, - "step": 45890 - }, - { - "epoch": 23.932724902216428, - "grad_norm": 1.530352234840393, - "learning_rate": 5.439698492462312e-05, - "loss": 5.4885, - "step": 45891 - }, - { - "epoch": 23.933246414602348, - "grad_norm": 1.4631054401397705, - "learning_rate": 5.4395979899497485e-05, - "loss": 5.2858, - "step": 45892 - }, - { - "epoch": 23.933767926988267, - "grad_norm": 1.5115141868591309, - "learning_rate": 5.439497487437186e-05, - "loss": 5.4553, - "step": 45893 - }, - { - "epoch": 23.934289439374187, - "grad_norm": 1.616053819656372, - "learning_rate": 5.439396984924623e-05, - "loss": 5.0099, - "step": 45894 - }, - { - "epoch": 23.934810951760106, - "grad_norm": 1.5843645334243774, - "learning_rate": 5.4392964824120605e-05, - "loss": 5.3652, - "step": 45895 - }, - { - "epoch": 23.935332464146022, - "grad_norm": 1.5625044107437134, - "learning_rate": 5.4391959798994976e-05, - "loss": 5.8486, - "step": 45896 - }, - { - "epoch": 23.93585397653194, - "grad_norm": 1.5380901098251343, - "learning_rate": 5.4390954773869354e-05, - "loss": 5.1604, - "step": 45897 - }, - { - "epoch": 23.93637548891786, - "grad_norm": 1.5540616512298584, - "learning_rate": 5.438994974874372e-05, - "loss": 5.2653, - "step": 45898 - }, - { - "epoch": 23.93689700130378, - "grad_norm": 1.6385149955749512, - "learning_rate": 5.4388944723618096e-05, - "loss": 5.0696, - "step": 45899 - }, - { - "epoch": 23.9374185136897, - "grad_norm": 1.659623384475708, - "learning_rate": 5.438793969849246e-05, - "loss": 5.3515, - "step": 45900 - }, - { - "epoch": 23.93794002607562, - "grad_norm": 1.5500977039337158, - "learning_rate": 5.438693467336684e-05, - "loss": 5.2196, - "step": 45901 - }, - { - "epoch": 23.93846153846154, - "grad_norm": 1.5647515058517456, - "learning_rate": 5.438592964824121e-05, - "loss": 4.5635, - "step": 45902 - }, - { - "epoch": 23.938983050847458, - "grad_norm": 1.6098263263702393, - "learning_rate": 5.438492462311559e-05, - "loss": 5.4984, - "step": 45903 - }, - { - "epoch": 23.939504563233378, - "grad_norm": 1.624824047088623, - "learning_rate": 5.438391959798995e-05, - "loss": 5.6292, - "step": 45904 - }, - { - "epoch": 23.940026075619297, - "grad_norm": 1.4059901237487793, - "learning_rate": 5.438291457286433e-05, - "loss": 5.4482, - "step": 45905 - }, - { - "epoch": 23.940547588005217, - "grad_norm": 1.4479320049285889, - "learning_rate": 5.4381909547738693e-05, - "loss": 5.2356, - "step": 45906 - }, - { - "epoch": 23.941069100391136, - "grad_norm": 1.651581048965454, - "learning_rate": 5.4380904522613064e-05, - "loss": 5.3036, - "step": 45907 - }, - { - "epoch": 23.941590612777052, - "grad_norm": 1.38026762008667, - "learning_rate": 5.437989949748744e-05, - "loss": 4.7287, - "step": 45908 - }, - { - "epoch": 23.94211212516297, - "grad_norm": 1.5506731271743774, - "learning_rate": 5.4378894472361807e-05, - "loss": 4.3538, - "step": 45909 - }, - { - "epoch": 23.94263363754889, - "grad_norm": 1.7149958610534668, - "learning_rate": 5.4377889447236184e-05, - "loss": 4.998, - "step": 45910 - }, - { - "epoch": 23.94315514993481, - "grad_norm": 1.5911314487457275, - "learning_rate": 5.4376884422110555e-05, - "loss": 5.5602, - "step": 45911 - }, - { - "epoch": 23.94367666232073, - "grad_norm": 1.52681303024292, - "learning_rate": 5.437587939698493e-05, - "loss": 4.8873, - "step": 45912 - }, - { - "epoch": 23.94419817470665, - "grad_norm": 1.5438066720962524, - "learning_rate": 5.43748743718593e-05, - "loss": 5.5888, - "step": 45913 - }, - { - "epoch": 23.94471968709257, - "grad_norm": 1.5009058713912964, - "learning_rate": 5.4373869346733675e-05, - "loss": 5.2002, - "step": 45914 - }, - { - "epoch": 23.945241199478488, - "grad_norm": 1.5067570209503174, - "learning_rate": 5.437286432160804e-05, - "loss": 5.2242, - "step": 45915 - }, - { - "epoch": 23.945762711864408, - "grad_norm": 1.6327003240585327, - "learning_rate": 5.437185929648242e-05, - "loss": 4.9546, - "step": 45916 - }, - { - "epoch": 23.946284224250327, - "grad_norm": 1.5817492008209229, - "learning_rate": 5.437085427135679e-05, - "loss": 5.449, - "step": 45917 - }, - { - "epoch": 23.946805736636247, - "grad_norm": 1.6199629306793213, - "learning_rate": 5.4369849246231166e-05, - "loss": 5.3517, - "step": 45918 - }, - { - "epoch": 23.947327249022166, - "grad_norm": 1.4944099187850952, - "learning_rate": 5.436884422110553e-05, - "loss": 4.7674, - "step": 45919 - }, - { - "epoch": 23.947848761408082, - "grad_norm": 1.4839332103729248, - "learning_rate": 5.4367839195979895e-05, - "loss": 5.1236, - "step": 45920 - }, - { - "epoch": 23.948370273794, - "grad_norm": 1.6466569900512695, - "learning_rate": 5.436683417085427e-05, - "loss": 4.8823, - "step": 45921 - }, - { - "epoch": 23.94889178617992, - "grad_norm": 1.5306611061096191, - "learning_rate": 5.4365829145728644e-05, - "loss": 5.1877, - "step": 45922 - }, - { - "epoch": 23.94941329856584, - "grad_norm": 1.474365472793579, - "learning_rate": 5.436482412060302e-05, - "loss": 5.4896, - "step": 45923 - }, - { - "epoch": 23.94993481095176, - "grad_norm": 1.5926676988601685, - "learning_rate": 5.4363819095477386e-05, - "loss": 4.9817, - "step": 45924 - }, - { - "epoch": 23.95045632333768, - "grad_norm": 1.6341049671173096, - "learning_rate": 5.4362814070351764e-05, - "loss": 4.8117, - "step": 45925 - }, - { - "epoch": 23.9509778357236, - "grad_norm": 1.6057682037353516, - "learning_rate": 5.436180904522613e-05, - "loss": 5.1572, - "step": 45926 - }, - { - "epoch": 23.951499348109518, - "grad_norm": 1.5786569118499756, - "learning_rate": 5.4360804020100506e-05, - "loss": 5.1309, - "step": 45927 - }, - { - "epoch": 23.952020860495438, - "grad_norm": 1.499778151512146, - "learning_rate": 5.435979899497488e-05, - "loss": 5.5189, - "step": 45928 - }, - { - "epoch": 23.952542372881357, - "grad_norm": 1.5492783784866333, - "learning_rate": 5.4358793969849255e-05, - "loss": 5.2531, - "step": 45929 - }, - { - "epoch": 23.953063885267277, - "grad_norm": 1.6237252950668335, - "learning_rate": 5.435778894472362e-05, - "loss": 5.0436, - "step": 45930 - }, - { - "epoch": 23.953585397653193, - "grad_norm": 1.3664621114730835, - "learning_rate": 5.4356783919598e-05, - "loss": 5.6392, - "step": 45931 - }, - { - "epoch": 23.954106910039112, - "grad_norm": 1.502312183380127, - "learning_rate": 5.435577889447237e-05, - "loss": 5.2975, - "step": 45932 - }, - { - "epoch": 23.95462842242503, - "grad_norm": 1.543453574180603, - "learning_rate": 5.435477386934673e-05, - "loss": 5.3388, - "step": 45933 - }, - { - "epoch": 23.95514993481095, - "grad_norm": 1.5563774108886719, - "learning_rate": 5.435376884422111e-05, - "loss": 5.3539, - "step": 45934 - }, - { - "epoch": 23.95567144719687, - "grad_norm": 1.46745765209198, - "learning_rate": 5.4352763819095474e-05, - "loss": 4.8749, - "step": 45935 - }, - { - "epoch": 23.95619295958279, - "grad_norm": 1.5346966981887817, - "learning_rate": 5.435175879396985e-05, - "loss": 5.385, - "step": 45936 - }, - { - "epoch": 23.95671447196871, - "grad_norm": 1.5657808780670166, - "learning_rate": 5.435075376884422e-05, - "loss": 5.0163, - "step": 45937 - }, - { - "epoch": 23.95723598435463, - "grad_norm": 1.5875946283340454, - "learning_rate": 5.43497487437186e-05, - "loss": 5.1982, - "step": 45938 - }, - { - "epoch": 23.957757496740548, - "grad_norm": 1.5437651872634888, - "learning_rate": 5.4348743718592965e-05, - "loss": 5.0069, - "step": 45939 - }, - { - "epoch": 23.958279009126468, - "grad_norm": 1.4974595308303833, - "learning_rate": 5.434773869346734e-05, - "loss": 5.324, - "step": 45940 - }, - { - "epoch": 23.958800521512387, - "grad_norm": 1.5313531160354614, - "learning_rate": 5.434673366834171e-05, - "loss": 5.6401, - "step": 45941 - }, - { - "epoch": 23.959322033898307, - "grad_norm": 1.545339584350586, - "learning_rate": 5.4345728643216085e-05, - "loss": 5.5891, - "step": 45942 - }, - { - "epoch": 23.959843546284226, - "grad_norm": 1.5711708068847656, - "learning_rate": 5.4344723618090456e-05, - "loss": 5.4524, - "step": 45943 - }, - { - "epoch": 23.960365058670142, - "grad_norm": 1.5513815879821777, - "learning_rate": 5.4343718592964834e-05, - "loss": 5.2216, - "step": 45944 - }, - { - "epoch": 23.96088657105606, - "grad_norm": 1.4729284048080444, - "learning_rate": 5.43427135678392e-05, - "loss": 5.4566, - "step": 45945 - }, - { - "epoch": 23.96140808344198, - "grad_norm": 1.5640850067138672, - "learning_rate": 5.434170854271356e-05, - "loss": 5.3657, - "step": 45946 - }, - { - "epoch": 23.9619295958279, - "grad_norm": 1.5616397857666016, - "learning_rate": 5.434070351758794e-05, - "loss": 4.732, - "step": 45947 - }, - { - "epoch": 23.96245110821382, - "grad_norm": 1.4964392185211182, - "learning_rate": 5.433969849246231e-05, - "loss": 5.3881, - "step": 45948 - }, - { - "epoch": 23.96297262059974, - "grad_norm": 1.5051445960998535, - "learning_rate": 5.433869346733669e-05, - "loss": 5.6716, - "step": 45949 - }, - { - "epoch": 23.96349413298566, - "grad_norm": 1.5547480583190918, - "learning_rate": 5.4337688442211054e-05, - "loss": 4.6809, - "step": 45950 - }, - { - "epoch": 23.96401564537158, - "grad_norm": 1.636846661567688, - "learning_rate": 5.433668341708543e-05, - "loss": 4.8921, - "step": 45951 - }, - { - "epoch": 23.964537157757498, - "grad_norm": 1.6491202116012573, - "learning_rate": 5.4335678391959796e-05, - "loss": 4.9124, - "step": 45952 - }, - { - "epoch": 23.965058670143417, - "grad_norm": 1.6907470226287842, - "learning_rate": 5.4334673366834173e-05, - "loss": 5.2623, - "step": 45953 - }, - { - "epoch": 23.965580182529337, - "grad_norm": 1.6396926641464233, - "learning_rate": 5.4333668341708545e-05, - "loss": 5.6804, - "step": 45954 - }, - { - "epoch": 23.966101694915253, - "grad_norm": 1.702321171760559, - "learning_rate": 5.433266331658292e-05, - "loss": 5.0645, - "step": 45955 - }, - { - "epoch": 23.966623207301172, - "grad_norm": 1.6079583168029785, - "learning_rate": 5.433165829145729e-05, - "loss": 5.0371, - "step": 45956 - }, - { - "epoch": 23.96714471968709, - "grad_norm": 1.6777218580245972, - "learning_rate": 5.4330653266331664e-05, - "loss": 5.171, - "step": 45957 - }, - { - "epoch": 23.96766623207301, - "grad_norm": 1.6809300184249878, - "learning_rate": 5.4329648241206036e-05, - "loss": 4.8227, - "step": 45958 - }, - { - "epoch": 23.96818774445893, - "grad_norm": 1.5920047760009766, - "learning_rate": 5.43286432160804e-05, - "loss": 5.1429, - "step": 45959 - }, - { - "epoch": 23.96870925684485, - "grad_norm": 1.508156418800354, - "learning_rate": 5.432763819095478e-05, - "loss": 5.3045, - "step": 45960 - }, - { - "epoch": 23.96923076923077, - "grad_norm": 1.5615688562393188, - "learning_rate": 5.432663316582914e-05, - "loss": 5.2084, - "step": 45961 - }, - { - "epoch": 23.96975228161669, - "grad_norm": 1.5932196378707886, - "learning_rate": 5.432562814070352e-05, - "loss": 5.135, - "step": 45962 - }, - { - "epoch": 23.97027379400261, - "grad_norm": 1.6053498983383179, - "learning_rate": 5.432462311557789e-05, - "loss": 4.8316, - "step": 45963 - }, - { - "epoch": 23.970795306388528, - "grad_norm": 1.4911683797836304, - "learning_rate": 5.432361809045227e-05, - "loss": 5.6475, - "step": 45964 - }, - { - "epoch": 23.971316818774447, - "grad_norm": 1.5564721822738647, - "learning_rate": 5.432261306532663e-05, - "loss": 5.4389, - "step": 45965 - }, - { - "epoch": 23.971838331160367, - "grad_norm": 1.5763745307922363, - "learning_rate": 5.432160804020101e-05, - "loss": 5.0623, - "step": 45966 - }, - { - "epoch": 23.972359843546283, - "grad_norm": 1.5209274291992188, - "learning_rate": 5.4320603015075375e-05, - "loss": 5.4824, - "step": 45967 - }, - { - "epoch": 23.972881355932202, - "grad_norm": 1.5195144414901733, - "learning_rate": 5.431959798994975e-05, - "loss": 4.6157, - "step": 45968 - }, - { - "epoch": 23.97340286831812, - "grad_norm": 1.5060853958129883, - "learning_rate": 5.4318592964824124e-05, - "loss": 5.596, - "step": 45969 - }, - { - "epoch": 23.97392438070404, - "grad_norm": 1.469939947128296, - "learning_rate": 5.43175879396985e-05, - "loss": 5.6327, - "step": 45970 - }, - { - "epoch": 23.97444589308996, - "grad_norm": 1.5959091186523438, - "learning_rate": 5.4316582914572866e-05, - "loss": 5.4911, - "step": 45971 - }, - { - "epoch": 23.97496740547588, - "grad_norm": 1.6032130718231201, - "learning_rate": 5.431557788944723e-05, - "loss": 5.2095, - "step": 45972 - }, - { - "epoch": 23.9754889178618, - "grad_norm": 1.549489140510559, - "learning_rate": 5.431457286432161e-05, - "loss": 5.634, - "step": 45973 - }, - { - "epoch": 23.97601043024772, - "grad_norm": 1.576242446899414, - "learning_rate": 5.431356783919598e-05, - "loss": 4.6322, - "step": 45974 - }, - { - "epoch": 23.97653194263364, - "grad_norm": 1.7056081295013428, - "learning_rate": 5.431256281407036e-05, - "loss": 4.8858, - "step": 45975 - }, - { - "epoch": 23.977053455019558, - "grad_norm": 1.6119083166122437, - "learning_rate": 5.431155778894472e-05, - "loss": 4.8088, - "step": 45976 - }, - { - "epoch": 23.977574967405477, - "grad_norm": 1.5760382413864136, - "learning_rate": 5.43105527638191e-05, - "loss": 5.564, - "step": 45977 - }, - { - "epoch": 23.978096479791397, - "grad_norm": 1.648408055305481, - "learning_rate": 5.430954773869347e-05, - "loss": 5.2894, - "step": 45978 - }, - { - "epoch": 23.978617992177313, - "grad_norm": 1.579911231994629, - "learning_rate": 5.430854271356785e-05, - "loss": 5.1021, - "step": 45979 - }, - { - "epoch": 23.979139504563232, - "grad_norm": 1.5487580299377441, - "learning_rate": 5.430753768844221e-05, - "loss": 5.6105, - "step": 45980 - }, - { - "epoch": 23.97966101694915, - "grad_norm": 1.5298149585723877, - "learning_rate": 5.430653266331659e-05, - "loss": 5.5857, - "step": 45981 - }, - { - "epoch": 23.98018252933507, - "grad_norm": 1.604411244392395, - "learning_rate": 5.4305527638190954e-05, - "loss": 5.2439, - "step": 45982 - }, - { - "epoch": 23.98070404172099, - "grad_norm": 1.6002827882766724, - "learning_rate": 5.430452261306533e-05, - "loss": 5.3309, - "step": 45983 - }, - { - "epoch": 23.98122555410691, - "grad_norm": 1.6413325071334839, - "learning_rate": 5.43035175879397e-05, - "loss": 5.5603, - "step": 45984 - }, - { - "epoch": 23.98174706649283, - "grad_norm": 1.4506750106811523, - "learning_rate": 5.430251256281408e-05, - "loss": 5.654, - "step": 45985 - }, - { - "epoch": 23.98226857887875, - "grad_norm": 1.4530068635940552, - "learning_rate": 5.4301507537688445e-05, - "loss": 4.8243, - "step": 45986 - }, - { - "epoch": 23.98279009126467, - "grad_norm": 1.5964878797531128, - "learning_rate": 5.430050251256281e-05, - "loss": 4.9618, - "step": 45987 - }, - { - "epoch": 23.983311603650588, - "grad_norm": 1.5209447145462036, - "learning_rate": 5.429949748743719e-05, - "loss": 5.3976, - "step": 45988 - }, - { - "epoch": 23.983833116036507, - "grad_norm": Infinity, - "learning_rate": 5.429949748743719e-05, - "loss": 4.8725, - "step": 45989 - }, - { - "epoch": 23.984354628422427, - "grad_norm": 1.5815855264663696, - "learning_rate": 5.429849246231156e-05, - "loss": 5.3473, - "step": 45990 - }, - { - "epoch": 23.984876140808343, - "grad_norm": 1.5126343965530396, - "learning_rate": 5.4297487437185936e-05, - "loss": 5.3729, - "step": 45991 - }, - { - "epoch": 23.985397653194262, - "grad_norm": 1.6722396612167358, - "learning_rate": 5.42964824120603e-05, - "loss": 5.3587, - "step": 45992 - }, - { - "epoch": 23.98591916558018, - "grad_norm": 1.5411133766174316, - "learning_rate": 5.429547738693468e-05, - "loss": 5.5628, - "step": 45993 - }, - { - "epoch": 23.9864406779661, - "grad_norm": 1.5605889558792114, - "learning_rate": 5.429447236180904e-05, - "loss": 4.8569, - "step": 45994 - }, - { - "epoch": 23.98696219035202, - "grad_norm": 1.5758171081542969, - "learning_rate": 5.429346733668342e-05, - "loss": 4.7693, - "step": 45995 - }, - { - "epoch": 23.98748370273794, - "grad_norm": 1.5309958457946777, - "learning_rate": 5.429246231155779e-05, - "loss": 4.937, - "step": 45996 - }, - { - "epoch": 23.98800521512386, - "grad_norm": 1.4814704656600952, - "learning_rate": 5.429145728643217e-05, - "loss": 5.3306, - "step": 45997 - }, - { - "epoch": 23.98852672750978, - "grad_norm": 1.511664867401123, - "learning_rate": 5.4290452261306534e-05, - "loss": 5.3207, - "step": 45998 - }, - { - "epoch": 23.9890482398957, - "grad_norm": 1.4380537271499634, - "learning_rate": 5.428944723618091e-05, - "loss": 5.5652, - "step": 45999 - }, - { - "epoch": 23.989569752281618, - "grad_norm": 1.4643508195877075, - "learning_rate": 5.428844221105528e-05, - "loss": 5.5917, - "step": 46000 - }, - { - "epoch": 23.990091264667537, - "grad_norm": 1.6133898496627808, - "learning_rate": 5.428743718592965e-05, - "loss": 5.2042, - "step": 46001 - }, - { - "epoch": 23.990612777053457, - "grad_norm": 1.6193649768829346, - "learning_rate": 5.4286432160804025e-05, - "loss": 5.175, - "step": 46002 - }, - { - "epoch": 23.991134289439373, - "grad_norm": 1.4771482944488525, - "learning_rate": 5.428542713567839e-05, - "loss": 5.3236, - "step": 46003 - }, - { - "epoch": 23.991655801825292, - "grad_norm": 1.6050174236297607, - "learning_rate": 5.428442211055277e-05, - "loss": 4.4981, - "step": 46004 - }, - { - "epoch": 23.99217731421121, - "grad_norm": 1.7049267292022705, - "learning_rate": 5.428341708542714e-05, - "loss": 5.0193, - "step": 46005 - }, - { - "epoch": 23.99269882659713, - "grad_norm": 1.4995810985565186, - "learning_rate": 5.4282412060301516e-05, - "loss": 5.6079, - "step": 46006 - }, - { - "epoch": 23.99322033898305, - "grad_norm": 1.4873191118240356, - "learning_rate": 5.428140703517588e-05, - "loss": 5.1858, - "step": 46007 - }, - { - "epoch": 23.99374185136897, - "grad_norm": 1.5495026111602783, - "learning_rate": 5.428040201005026e-05, - "loss": 5.0814, - "step": 46008 - }, - { - "epoch": 23.99426336375489, - "grad_norm": 1.4731147289276123, - "learning_rate": 5.427939698492462e-05, - "loss": 5.3038, - "step": 46009 - }, - { - "epoch": 23.99478487614081, - "grad_norm": 1.5070608854293823, - "learning_rate": 5.4278391959799e-05, - "loss": 4.8748, - "step": 46010 - }, - { - "epoch": 23.99530638852673, - "grad_norm": 1.5443145036697388, - "learning_rate": 5.427738693467337e-05, - "loss": 5.2913, - "step": 46011 - }, - { - "epoch": 23.995827900912648, - "grad_norm": 1.5552198886871338, - "learning_rate": 5.427638190954775e-05, - "loss": 4.9417, - "step": 46012 - }, - { - "epoch": 23.996349413298567, - "grad_norm": 1.669577717781067, - "learning_rate": 5.427537688442211e-05, - "loss": 5.5687, - "step": 46013 - }, - { - "epoch": 23.996870925684483, - "grad_norm": 1.8017690181732178, - "learning_rate": 5.427437185929648e-05, - "loss": 5.1779, - "step": 46014 - }, - { - "epoch": 23.997392438070403, - "grad_norm": 1.5378084182739258, - "learning_rate": 5.4273366834170855e-05, - "loss": 5.3497, - "step": 46015 - }, - { - "epoch": 23.997913950456322, - "grad_norm": 1.5156443119049072, - "learning_rate": 5.4272361809045226e-05, - "loss": 5.1203, - "step": 46016 - }, - { - "epoch": 23.99843546284224, - "grad_norm": 1.5367891788482666, - "learning_rate": 5.4271356783919604e-05, - "loss": 5.5455, - "step": 46017 - }, - { - "epoch": 23.99895697522816, - "grad_norm": 1.4522937536239624, - "learning_rate": 5.427035175879397e-05, - "loss": 5.6112, - "step": 46018 - }, - { - "epoch": 23.99947848761408, - "grad_norm": 1.613660216331482, - "learning_rate": 5.4269346733668346e-05, - "loss": 4.9531, - "step": 46019 - }, - { - "epoch": 24.0, - "grad_norm": 1.6710158586502075, - "learning_rate": 5.426834170854271e-05, - "loss": 5.321, - "step": 46020 - }, - { - "epoch": 24.00052151238592, - "grad_norm": 1.4684683084487915, - "learning_rate": 5.426733668341709e-05, - "loss": 5.3245, - "step": 46021 - }, - { - "epoch": 24.00104302477184, - "grad_norm": 1.606246829032898, - "learning_rate": 5.426633165829146e-05, - "loss": 5.2038, - "step": 46022 - }, - { - "epoch": 24.00156453715776, - "grad_norm": 1.5719938278198242, - "learning_rate": 5.426532663316584e-05, - "loss": 5.137, - "step": 46023 - }, - { - "epoch": 24.002086049543678, - "grad_norm": 1.5940433740615845, - "learning_rate": 5.42643216080402e-05, - "loss": 5.0891, - "step": 46024 - }, - { - "epoch": 24.002607561929597, - "grad_norm": 1.5087535381317139, - "learning_rate": 5.426331658291458e-05, - "loss": 5.1106, - "step": 46025 - }, - { - "epoch": 24.003129074315513, - "grad_norm": 1.52946937084198, - "learning_rate": 5.426231155778895e-05, - "loss": 4.906, - "step": 46026 - }, - { - "epoch": 24.003650586701433, - "grad_norm": 1.617351770401001, - "learning_rate": 5.4261306532663314e-05, - "loss": 4.8505, - "step": 46027 - }, - { - "epoch": 24.004172099087352, - "grad_norm": 1.5695254802703857, - "learning_rate": 5.426030150753769e-05, - "loss": 4.6777, - "step": 46028 - }, - { - "epoch": 24.00469361147327, - "grad_norm": 1.6444867849349976, - "learning_rate": 5.4259296482412057e-05, - "loss": 5.2649, - "step": 46029 - }, - { - "epoch": 24.00521512385919, - "grad_norm": 1.4997265338897705, - "learning_rate": 5.4258291457286434e-05, - "loss": 5.5159, - "step": 46030 - }, - { - "epoch": 24.00573663624511, - "grad_norm": 1.588230013847351, - "learning_rate": 5.4257286432160805e-05, - "loss": 4.848, - "step": 46031 - }, - { - "epoch": 24.00625814863103, - "grad_norm": 1.5247836112976074, - "learning_rate": 5.425628140703518e-05, - "loss": 5.714, - "step": 46032 - }, - { - "epoch": 24.00677966101695, - "grad_norm": 1.7232493162155151, - "learning_rate": 5.425527638190955e-05, - "loss": 4.978, - "step": 46033 - }, - { - "epoch": 24.00730117340287, - "grad_norm": 1.5294561386108398, - "learning_rate": 5.4254271356783925e-05, - "loss": 5.3366, - "step": 46034 - }, - { - "epoch": 24.00782268578879, - "grad_norm": 1.461788296699524, - "learning_rate": 5.425326633165829e-05, - "loss": 4.8345, - "step": 46035 - }, - { - "epoch": 24.008344198174708, - "grad_norm": 1.569153070449829, - "learning_rate": 5.425226130653267e-05, - "loss": 4.7699, - "step": 46036 - }, - { - "epoch": 24.008865710560627, - "grad_norm": 1.569054126739502, - "learning_rate": 5.425125628140704e-05, - "loss": 5.3337, - "step": 46037 - }, - { - "epoch": 24.009387222946543, - "grad_norm": 1.6185903549194336, - "learning_rate": 5.4250251256281416e-05, - "loss": 5.5041, - "step": 46038 - }, - { - "epoch": 24.009908735332463, - "grad_norm": 1.5332311391830444, - "learning_rate": 5.424924623115578e-05, - "loss": 5.4248, - "step": 46039 - }, - { - "epoch": 24.010430247718382, - "grad_norm": 1.5594563484191895, - "learning_rate": 5.4248241206030145e-05, - "loss": 5.3164, - "step": 46040 - }, - { - "epoch": 24.0109517601043, - "grad_norm": 1.5309391021728516, - "learning_rate": 5.424723618090452e-05, - "loss": 5.5143, - "step": 46041 - }, - { - "epoch": 24.01147327249022, - "grad_norm": 1.5855588912963867, - "learning_rate": 5.4246231155778894e-05, - "loss": 5.2378, - "step": 46042 - }, - { - "epoch": 24.01199478487614, - "grad_norm": 1.496659517288208, - "learning_rate": 5.424522613065327e-05, - "loss": 5.6601, - "step": 46043 - }, - { - "epoch": 24.01251629726206, - "grad_norm": 1.6735243797302246, - "learning_rate": 5.4244221105527636e-05, - "loss": 4.9908, - "step": 46044 - }, - { - "epoch": 24.01303780964798, - "grad_norm": 1.645653247833252, - "learning_rate": 5.4243216080402014e-05, - "loss": 5.0656, - "step": 46045 - }, - { - "epoch": 24.0135593220339, - "grad_norm": 1.6388884782791138, - "learning_rate": 5.4242211055276385e-05, - "loss": 5.1619, - "step": 46046 - }, - { - "epoch": 24.01408083441982, - "grad_norm": 1.6696414947509766, - "learning_rate": 5.424120603015076e-05, - "loss": 4.9574, - "step": 46047 - }, - { - "epoch": 24.014602346805738, - "grad_norm": 1.5975017547607422, - "learning_rate": 5.424020100502513e-05, - "loss": 5.12, - "step": 46048 - }, - { - "epoch": 24.015123859191657, - "grad_norm": 1.532721757888794, - "learning_rate": 5.4239195979899505e-05, - "loss": 5.4742, - "step": 46049 - }, - { - "epoch": 24.015645371577573, - "grad_norm": 1.5162363052368164, - "learning_rate": 5.423819095477387e-05, - "loss": 4.8136, - "step": 46050 - }, - { - "epoch": 24.016166883963493, - "grad_norm": 2.0477590560913086, - "learning_rate": 5.423718592964825e-05, - "loss": 4.7733, - "step": 46051 - }, - { - "epoch": 24.016688396349412, - "grad_norm": 1.5923370122909546, - "learning_rate": 5.423618090452262e-05, - "loss": 5.1871, - "step": 46052 - }, - { - "epoch": 24.01720990873533, - "grad_norm": 1.4261493682861328, - "learning_rate": 5.423517587939698e-05, - "loss": 5.7367, - "step": 46053 - }, - { - "epoch": 24.01773142112125, - "grad_norm": 1.600920557975769, - "learning_rate": 5.423417085427136e-05, - "loss": 4.4217, - "step": 46054 - }, - { - "epoch": 24.01825293350717, - "grad_norm": 1.651667594909668, - "learning_rate": 5.4233165829145724e-05, - "loss": 5.0547, - "step": 46055 - }, - { - "epoch": 24.01877444589309, - "grad_norm": 1.4112615585327148, - "learning_rate": 5.42321608040201e-05, - "loss": 5.3478, - "step": 46056 - }, - { - "epoch": 24.01929595827901, - "grad_norm": 1.5659563541412354, - "learning_rate": 5.423115577889447e-05, - "loss": 5.4329, - "step": 46057 - }, - { - "epoch": 24.01981747066493, - "grad_norm": 1.51419997215271, - "learning_rate": 5.423015075376885e-05, - "loss": 5.4193, - "step": 46058 - }, - { - "epoch": 24.02033898305085, - "grad_norm": 1.546014666557312, - "learning_rate": 5.4229145728643215e-05, - "loss": 5.276, - "step": 46059 - }, - { - "epoch": 24.020860495436768, - "grad_norm": 1.5950777530670166, - "learning_rate": 5.422814070351759e-05, - "loss": 5.1339, - "step": 46060 - }, - { - "epoch": 24.021382007822687, - "grad_norm": 1.4555310010910034, - "learning_rate": 5.422713567839196e-05, - "loss": 5.6, - "step": 46061 - }, - { - "epoch": 24.021903520208603, - "grad_norm": 1.5049126148223877, - "learning_rate": 5.4226130653266335e-05, - "loss": 5.5152, - "step": 46062 - }, - { - "epoch": 24.022425032594523, - "grad_norm": 1.4997739791870117, - "learning_rate": 5.4225125628140706e-05, - "loss": 5.4713, - "step": 46063 - }, - { - "epoch": 24.022946544980442, - "grad_norm": 1.556372880935669, - "learning_rate": 5.4224120603015084e-05, - "loss": 5.0858, - "step": 46064 - }, - { - "epoch": 24.02346805736636, - "grad_norm": 1.4993529319763184, - "learning_rate": 5.422311557788945e-05, - "loss": 5.0031, - "step": 46065 - }, - { - "epoch": 24.02398956975228, - "grad_norm": 1.5561082363128662, - "learning_rate": 5.422211055276382e-05, - "loss": 5.1331, - "step": 46066 - }, - { - "epoch": 24.0245110821382, - "grad_norm": 1.4982510805130005, - "learning_rate": 5.42211055276382e-05, - "loss": 5.4972, - "step": 46067 - }, - { - "epoch": 24.02503259452412, - "grad_norm": 1.620223879814148, - "learning_rate": 5.422010050251256e-05, - "loss": 5.1852, - "step": 46068 - }, - { - "epoch": 24.02555410691004, - "grad_norm": 1.6083669662475586, - "learning_rate": 5.421909547738694e-05, - "loss": 5.1749, - "step": 46069 - }, - { - "epoch": 24.02607561929596, - "grad_norm": 1.60512375831604, - "learning_rate": 5.4218090452261304e-05, - "loss": 5.1214, - "step": 46070 - }, - { - "epoch": 24.02659713168188, - "grad_norm": 1.50897216796875, - "learning_rate": 5.421708542713568e-05, - "loss": 5.4374, - "step": 46071 - }, - { - "epoch": 24.027118644067798, - "grad_norm": 1.5528723001480103, - "learning_rate": 5.421608040201005e-05, - "loss": 5.4749, - "step": 46072 - }, - { - "epoch": 24.027640156453717, - "grad_norm": 1.4743549823760986, - "learning_rate": 5.421507537688443e-05, - "loss": 5.5223, - "step": 46073 - }, - { - "epoch": 24.028161668839633, - "grad_norm": 1.5961159467697144, - "learning_rate": 5.4214070351758795e-05, - "loss": 5.4454, - "step": 46074 - }, - { - "epoch": 24.028683181225553, - "grad_norm": 1.793000340461731, - "learning_rate": 5.421306532663317e-05, - "loss": 4.731, - "step": 46075 - }, - { - "epoch": 24.029204693611472, - "grad_norm": 1.6856452226638794, - "learning_rate": 5.421206030150754e-05, - "loss": 5.3187, - "step": 46076 - }, - { - "epoch": 24.02972620599739, - "grad_norm": 1.454793930053711, - "learning_rate": 5.4211055276381914e-05, - "loss": 5.7701, - "step": 46077 - }, - { - "epoch": 24.03024771838331, - "grad_norm": 1.481123924255371, - "learning_rate": 5.4210050251256286e-05, - "loss": 5.3467, - "step": 46078 - }, - { - "epoch": 24.03076923076923, - "grad_norm": 1.4650630950927734, - "learning_rate": 5.420904522613066e-05, - "loss": 4.8783, - "step": 46079 - }, - { - "epoch": 24.03129074315515, - "grad_norm": 1.5515016317367554, - "learning_rate": 5.420804020100503e-05, - "loss": 5.5613, - "step": 46080 - }, - { - "epoch": 24.03181225554107, - "grad_norm": 1.511457085609436, - "learning_rate": 5.420703517587939e-05, - "loss": 5.2557, - "step": 46081 - }, - { - "epoch": 24.03233376792699, - "grad_norm": 1.6389484405517578, - "learning_rate": 5.420603015075377e-05, - "loss": 5.1918, - "step": 46082 - }, - { - "epoch": 24.03285528031291, - "grad_norm": 1.5714011192321777, - "learning_rate": 5.420502512562814e-05, - "loss": 5.4691, - "step": 46083 - }, - { - "epoch": 24.033376792698828, - "grad_norm": 1.618318796157837, - "learning_rate": 5.420402010050252e-05, - "loss": 4.9458, - "step": 46084 - }, - { - "epoch": 24.033898305084747, - "grad_norm": 1.6347686052322388, - "learning_rate": 5.420301507537688e-05, - "loss": 5.3703, - "step": 46085 - }, - { - "epoch": 24.034419817470663, - "grad_norm": 1.5274754762649536, - "learning_rate": 5.420201005025126e-05, - "loss": 5.6552, - "step": 46086 - }, - { - "epoch": 24.034941329856583, - "grad_norm": 1.522687315940857, - "learning_rate": 5.420100502512563e-05, - "loss": 5.4275, - "step": 46087 - }, - { - "epoch": 24.035462842242502, - "grad_norm": 1.539440631866455, - "learning_rate": 5.420000000000001e-05, - "loss": 4.6405, - "step": 46088 - }, - { - "epoch": 24.03598435462842, - "grad_norm": 1.5528409481048584, - "learning_rate": 5.4198994974874374e-05, - "loss": 5.4346, - "step": 46089 - }, - { - "epoch": 24.03650586701434, - "grad_norm": 1.5965327024459839, - "learning_rate": 5.419798994974875e-05, - "loss": 4.638, - "step": 46090 - }, - { - "epoch": 24.03702737940026, - "grad_norm": 1.5039515495300293, - "learning_rate": 5.4196984924623116e-05, - "loss": 5.6826, - "step": 46091 - }, - { - "epoch": 24.03754889178618, - "grad_norm": 1.6296741962432861, - "learning_rate": 5.4195979899497494e-05, - "loss": 4.9458, - "step": 46092 - }, - { - "epoch": 24.0380704041721, - "grad_norm": 1.5898256301879883, - "learning_rate": 5.4194974874371865e-05, - "loss": 5.0443, - "step": 46093 - }, - { - "epoch": 24.03859191655802, - "grad_norm": 1.5694644451141357, - "learning_rate": 5.419396984924623e-05, - "loss": 5.3816, - "step": 46094 - }, - { - "epoch": 24.03911342894394, - "grad_norm": 1.4784035682678223, - "learning_rate": 5.419296482412061e-05, - "loss": 4.9497, - "step": 46095 - }, - { - "epoch": 24.039634941329858, - "grad_norm": 1.5374153852462769, - "learning_rate": 5.419195979899497e-05, - "loss": 5.3113, - "step": 46096 - }, - { - "epoch": 24.040156453715777, - "grad_norm": 1.671799659729004, - "learning_rate": 5.419095477386935e-05, - "loss": 5.1469, - "step": 46097 - }, - { - "epoch": 24.040677966101693, - "grad_norm": 1.7802436351776123, - "learning_rate": 5.418994974874372e-05, - "loss": 5.039, - "step": 46098 - }, - { - "epoch": 24.041199478487613, - "grad_norm": 1.7207837104797363, - "learning_rate": 5.41889447236181e-05, - "loss": 4.6694, - "step": 46099 - }, - { - "epoch": 24.041720990873532, - "grad_norm": 1.5498486757278442, - "learning_rate": 5.418793969849246e-05, - "loss": 4.9114, - "step": 46100 - }, - { - "epoch": 24.042242503259452, - "grad_norm": 1.6191529035568237, - "learning_rate": 5.418693467336684e-05, - "loss": 5.2036, - "step": 46101 - }, - { - "epoch": 24.04276401564537, - "grad_norm": 1.580148696899414, - "learning_rate": 5.4185929648241204e-05, - "loss": 5.053, - "step": 46102 - }, - { - "epoch": 24.04328552803129, - "grad_norm": 1.4993054866790771, - "learning_rate": 5.418492462311558e-05, - "loss": 5.4512, - "step": 46103 - }, - { - "epoch": 24.04380704041721, - "grad_norm": 1.642574667930603, - "learning_rate": 5.418391959798995e-05, - "loss": 5.1678, - "step": 46104 - }, - { - "epoch": 24.04432855280313, - "grad_norm": 1.6004410982131958, - "learning_rate": 5.418291457286433e-05, - "loss": 4.4682, - "step": 46105 - }, - { - "epoch": 24.04485006518905, - "grad_norm": 1.6212694644927979, - "learning_rate": 5.4181909547738695e-05, - "loss": 5.0306, - "step": 46106 - }, - { - "epoch": 24.04537157757497, - "grad_norm": 1.5393000841140747, - "learning_rate": 5.418090452261306e-05, - "loss": 5.5827, - "step": 46107 - }, - { - "epoch": 24.045893089960888, - "grad_norm": 1.6398674249649048, - "learning_rate": 5.417989949748744e-05, - "loss": 4.8379, - "step": 46108 - }, - { - "epoch": 24.046414602346807, - "grad_norm": 1.5472393035888672, - "learning_rate": 5.417889447236181e-05, - "loss": 5.0969, - "step": 46109 - }, - { - "epoch": 24.046936114732723, - "grad_norm": 1.607438564300537, - "learning_rate": 5.4177889447236186e-05, - "loss": 5.1359, - "step": 46110 - }, - { - "epoch": 24.047457627118643, - "grad_norm": 1.4798245429992676, - "learning_rate": 5.417688442211055e-05, - "loss": 5.4496, - "step": 46111 - }, - { - "epoch": 24.047979139504562, - "grad_norm": 1.615877389907837, - "learning_rate": 5.417587939698493e-05, - "loss": 5.3755, - "step": 46112 - }, - { - "epoch": 24.048500651890482, - "grad_norm": 1.4993796348571777, - "learning_rate": 5.41748743718593e-05, - "loss": 5.2106, - "step": 46113 - }, - { - "epoch": 24.0490221642764, - "grad_norm": 1.5948331356048584, - "learning_rate": 5.417386934673368e-05, - "loss": 4.8748, - "step": 46114 - }, - { - "epoch": 24.04954367666232, - "grad_norm": 1.5158321857452393, - "learning_rate": 5.417286432160804e-05, - "loss": 5.172, - "step": 46115 - }, - { - "epoch": 24.05006518904824, - "grad_norm": 1.513336181640625, - "learning_rate": 5.417185929648242e-05, - "loss": 5.1024, - "step": 46116 - }, - { - "epoch": 24.05058670143416, - "grad_norm": 1.5061228275299072, - "learning_rate": 5.4170854271356784e-05, - "loss": 5.2831, - "step": 46117 - }, - { - "epoch": 24.05110821382008, - "grad_norm": 1.5956271886825562, - "learning_rate": 5.416984924623116e-05, - "loss": 4.9538, - "step": 46118 - }, - { - "epoch": 24.051629726206, - "grad_norm": 1.5196985006332397, - "learning_rate": 5.416884422110553e-05, - "loss": 5.1045, - "step": 46119 - }, - { - "epoch": 24.052151238591918, - "grad_norm": 1.6203173398971558, - "learning_rate": 5.41678391959799e-05, - "loss": 5.3808, - "step": 46120 - }, - { - "epoch": 24.052672750977834, - "grad_norm": 1.612718105316162, - "learning_rate": 5.4166834170854275e-05, - "loss": 5.2778, - "step": 46121 - }, - { - "epoch": 24.053194263363753, - "grad_norm": 1.5709731578826904, - "learning_rate": 5.416582914572864e-05, - "loss": 5.3889, - "step": 46122 - }, - { - "epoch": 24.053715775749673, - "grad_norm": 1.4780852794647217, - "learning_rate": 5.416482412060302e-05, - "loss": 5.0213, - "step": 46123 - }, - { - "epoch": 24.054237288135592, - "grad_norm": 1.5251973867416382, - "learning_rate": 5.416381909547739e-05, - "loss": 4.9747, - "step": 46124 - }, - { - "epoch": 24.054758800521512, - "grad_norm": 1.4882943630218506, - "learning_rate": 5.4162814070351766e-05, - "loss": 5.4213, - "step": 46125 - }, - { - "epoch": 24.05528031290743, - "grad_norm": 1.6323121786117554, - "learning_rate": 5.416180904522613e-05, - "loss": 4.8621, - "step": 46126 - }, - { - "epoch": 24.05580182529335, - "grad_norm": 1.6088634729385376, - "learning_rate": 5.416080402010051e-05, - "loss": 5.0845, - "step": 46127 - }, - { - "epoch": 24.05632333767927, - "grad_norm": 1.7182953357696533, - "learning_rate": 5.415979899497487e-05, - "loss": 4.815, - "step": 46128 - }, - { - "epoch": 24.05684485006519, - "grad_norm": 1.5520579814910889, - "learning_rate": 5.415879396984925e-05, - "loss": 5.4736, - "step": 46129 - }, - { - "epoch": 24.05736636245111, - "grad_norm": 1.624547004699707, - "learning_rate": 5.415778894472362e-05, - "loss": 5.0882, - "step": 46130 - }, - { - "epoch": 24.05788787483703, - "grad_norm": 1.6157820224761963, - "learning_rate": 5.4156783919598e-05, - "loss": 5.4171, - "step": 46131 - }, - { - "epoch": 24.058409387222948, - "grad_norm": 1.5797287225723267, - "learning_rate": 5.415577889447236e-05, - "loss": 5.019, - "step": 46132 - }, - { - "epoch": 24.058930899608864, - "grad_norm": 1.559063196182251, - "learning_rate": 5.4154773869346734e-05, - "loss": 4.7679, - "step": 46133 - }, - { - "epoch": 24.059452411994783, - "grad_norm": 1.62010657787323, - "learning_rate": 5.415376884422111e-05, - "loss": 5.3454, - "step": 46134 - }, - { - "epoch": 24.059973924380703, - "grad_norm": 1.639583706855774, - "learning_rate": 5.4152763819095476e-05, - "loss": 5.1264, - "step": 46135 - }, - { - "epoch": 24.060495436766622, - "grad_norm": 1.5523625612258911, - "learning_rate": 5.4151758793969854e-05, - "loss": 5.1476, - "step": 46136 - }, - { - "epoch": 24.061016949152542, - "grad_norm": 1.6235785484313965, - "learning_rate": 5.415075376884422e-05, - "loss": 4.9372, - "step": 46137 - }, - { - "epoch": 24.06153846153846, - "grad_norm": 1.5566412210464478, - "learning_rate": 5.4149748743718596e-05, - "loss": 5.5051, - "step": 46138 - }, - { - "epoch": 24.06205997392438, - "grad_norm": 1.6164720058441162, - "learning_rate": 5.414874371859297e-05, - "loss": 4.5743, - "step": 46139 - }, - { - "epoch": 24.0625814863103, - "grad_norm": 1.5199687480926514, - "learning_rate": 5.4147738693467345e-05, - "loss": 5.4282, - "step": 46140 - }, - { - "epoch": 24.06310299869622, - "grad_norm": 1.6363195180892944, - "learning_rate": 5.414673366834171e-05, - "loss": 4.3532, - "step": 46141 - }, - { - "epoch": 24.06362451108214, - "grad_norm": 1.5296297073364258, - "learning_rate": 5.414572864321609e-05, - "loss": 5.3846, - "step": 46142 - }, - { - "epoch": 24.06414602346806, - "grad_norm": 1.6112593412399292, - "learning_rate": 5.414472361809045e-05, - "loss": 5.0458, - "step": 46143 - }, - { - "epoch": 24.064667535853978, - "grad_norm": 1.5586239099502563, - "learning_rate": 5.414371859296483e-05, - "loss": 5.0657, - "step": 46144 - }, - { - "epoch": 24.065189048239894, - "grad_norm": 1.5466039180755615, - "learning_rate": 5.41427135678392e-05, - "loss": 5.5197, - "step": 46145 - }, - { - "epoch": 24.065710560625813, - "grad_norm": 1.5270720720291138, - "learning_rate": 5.4141708542713564e-05, - "loss": 5.4398, - "step": 46146 - }, - { - "epoch": 24.066232073011733, - "grad_norm": 1.4853025674819946, - "learning_rate": 5.414070351758794e-05, - "loss": 5.2081, - "step": 46147 - }, - { - "epoch": 24.066753585397652, - "grad_norm": 1.4869545698165894, - "learning_rate": 5.4139698492462307e-05, - "loss": 5.0266, - "step": 46148 - }, - { - "epoch": 24.067275097783572, - "grad_norm": 1.5332838296890259, - "learning_rate": 5.4138693467336684e-05, - "loss": 5.5649, - "step": 46149 - }, - { - "epoch": 24.06779661016949, - "grad_norm": 1.5586276054382324, - "learning_rate": 5.4137688442211055e-05, - "loss": 4.9503, - "step": 46150 - }, - { - "epoch": 24.06831812255541, - "grad_norm": 1.4693835973739624, - "learning_rate": 5.413668341708543e-05, - "loss": 5.2637, - "step": 46151 - }, - { - "epoch": 24.06883963494133, - "grad_norm": 1.6159244775772095, - "learning_rate": 5.41356783919598e-05, - "loss": 4.9665, - "step": 46152 - }, - { - "epoch": 24.06936114732725, - "grad_norm": 1.6912510395050049, - "learning_rate": 5.4134673366834175e-05, - "loss": 4.9367, - "step": 46153 - }, - { - "epoch": 24.06988265971317, - "grad_norm": 1.5656617879867554, - "learning_rate": 5.4133668341708546e-05, - "loss": 4.9491, - "step": 46154 - }, - { - "epoch": 24.07040417209909, - "grad_norm": 1.546600580215454, - "learning_rate": 5.4132663316582924e-05, - "loss": 5.5664, - "step": 46155 - }, - { - "epoch": 24.070925684485008, - "grad_norm": 1.6194109916687012, - "learning_rate": 5.413165829145729e-05, - "loss": 4.3685, - "step": 46156 - }, - { - "epoch": 24.071447196870924, - "grad_norm": 1.514235019683838, - "learning_rate": 5.4130653266331666e-05, - "loss": 5.4297, - "step": 46157 - }, - { - "epoch": 24.071968709256844, - "grad_norm": 1.4718817472457886, - "learning_rate": 5.412964824120603e-05, - "loss": 5.6158, - "step": 46158 - }, - { - "epoch": 24.072490221642763, - "grad_norm": 1.4517574310302734, - "learning_rate": 5.41286432160804e-05, - "loss": 5.5131, - "step": 46159 - }, - { - "epoch": 24.073011734028682, - "grad_norm": 1.551672101020813, - "learning_rate": 5.412763819095478e-05, - "loss": 5.3943, - "step": 46160 - }, - { - "epoch": 24.073533246414602, - "grad_norm": 1.5712032318115234, - "learning_rate": 5.4126633165829144e-05, - "loss": 4.7555, - "step": 46161 - }, - { - "epoch": 24.07405475880052, - "grad_norm": 1.4879182577133179, - "learning_rate": 5.412562814070352e-05, - "loss": 5.353, - "step": 46162 - }, - { - "epoch": 24.07457627118644, - "grad_norm": 1.5015766620635986, - "learning_rate": 5.4124623115577886e-05, - "loss": 5.0462, - "step": 46163 - }, - { - "epoch": 24.07509778357236, - "grad_norm": 1.5358426570892334, - "learning_rate": 5.4123618090452264e-05, - "loss": 4.8209, - "step": 46164 - }, - { - "epoch": 24.07561929595828, - "grad_norm": 1.445813536643982, - "learning_rate": 5.4122613065326635e-05, - "loss": 5.7537, - "step": 46165 - }, - { - "epoch": 24.0761408083442, - "grad_norm": 1.541701316833496, - "learning_rate": 5.412160804020101e-05, - "loss": 5.4983, - "step": 46166 - }, - { - "epoch": 24.07666232073012, - "grad_norm": 1.4989402294158936, - "learning_rate": 5.412060301507538e-05, - "loss": 5.1591, - "step": 46167 - }, - { - "epoch": 24.077183833116038, - "grad_norm": 1.4399510622024536, - "learning_rate": 5.4119597989949755e-05, - "loss": 5.603, - "step": 46168 - }, - { - "epoch": 24.077705345501954, - "grad_norm": 1.5259425640106201, - "learning_rate": 5.411859296482412e-05, - "loss": 5.2099, - "step": 46169 - }, - { - "epoch": 24.078226857887874, - "grad_norm": 1.7379735708236694, - "learning_rate": 5.41175879396985e-05, - "loss": 5.1335, - "step": 46170 - }, - { - "epoch": 24.078748370273793, - "grad_norm": 1.5350654125213623, - "learning_rate": 5.411658291457287e-05, - "loss": 5.1806, - "step": 46171 - }, - { - "epoch": 24.079269882659712, - "grad_norm": 1.5073529481887817, - "learning_rate": 5.4115577889447246e-05, - "loss": 5.2096, - "step": 46172 - }, - { - "epoch": 24.079791395045632, - "grad_norm": 1.676622748374939, - "learning_rate": 5.411457286432161e-05, - "loss": 4.8476, - "step": 46173 - }, - { - "epoch": 24.08031290743155, - "grad_norm": 1.5904616117477417, - "learning_rate": 5.411356783919598e-05, - "loss": 4.9586, - "step": 46174 - }, - { - "epoch": 24.08083441981747, - "grad_norm": 1.6008751392364502, - "learning_rate": 5.411256281407036e-05, - "loss": 4.9652, - "step": 46175 - }, - { - "epoch": 24.08135593220339, - "grad_norm": 1.6593527793884277, - "learning_rate": 5.411155778894472e-05, - "loss": 5.2798, - "step": 46176 - }, - { - "epoch": 24.08187744458931, - "grad_norm": 1.4933072328567505, - "learning_rate": 5.41105527638191e-05, - "loss": 5.5527, - "step": 46177 - }, - { - "epoch": 24.08239895697523, - "grad_norm": 1.5341237783432007, - "learning_rate": 5.4109547738693465e-05, - "loss": 4.9895, - "step": 46178 - }, - { - "epoch": 24.08292046936115, - "grad_norm": 1.5785399675369263, - "learning_rate": 5.410854271356784e-05, - "loss": 5.2123, - "step": 46179 - }, - { - "epoch": 24.083441981747068, - "grad_norm": 1.5738776922225952, - "learning_rate": 5.4107537688442214e-05, - "loss": 4.0493, - "step": 46180 - }, - { - "epoch": 24.083963494132984, - "grad_norm": 1.4765321016311646, - "learning_rate": 5.410653266331659e-05, - "loss": 5.4741, - "step": 46181 - }, - { - "epoch": 24.084485006518904, - "grad_norm": 1.6573847532272339, - "learning_rate": 5.4105527638190956e-05, - "loss": 5.1741, - "step": 46182 - }, - { - "epoch": 24.085006518904823, - "grad_norm": 1.5710257291793823, - "learning_rate": 5.4104522613065334e-05, - "loss": 5.0465, - "step": 46183 - }, - { - "epoch": 24.085528031290742, - "grad_norm": 1.5279206037521362, - "learning_rate": 5.41035175879397e-05, - "loss": 5.1994, - "step": 46184 - }, - { - "epoch": 24.086049543676662, - "grad_norm": 1.4894112348556519, - "learning_rate": 5.4102512562814076e-05, - "loss": 5.095, - "step": 46185 - }, - { - "epoch": 24.08657105606258, - "grad_norm": 1.6499632596969604, - "learning_rate": 5.410150753768845e-05, - "loss": 4.7079, - "step": 46186 - }, - { - "epoch": 24.0870925684485, - "grad_norm": 1.558045506477356, - "learning_rate": 5.410050251256281e-05, - "loss": 5.2951, - "step": 46187 - }, - { - "epoch": 24.08761408083442, - "grad_norm": 1.6158416271209717, - "learning_rate": 5.409949748743719e-05, - "loss": 5.0308, - "step": 46188 - }, - { - "epoch": 24.08813559322034, - "grad_norm": 1.5349894762039185, - "learning_rate": 5.4098492462311554e-05, - "loss": 4.6311, - "step": 46189 - }, - { - "epoch": 24.08865710560626, - "grad_norm": 1.6273276805877686, - "learning_rate": 5.409748743718593e-05, - "loss": 5.1533, - "step": 46190 - }, - { - "epoch": 24.08917861799218, - "grad_norm": 1.6878405809402466, - "learning_rate": 5.40964824120603e-05, - "loss": 5.1714, - "step": 46191 - }, - { - "epoch": 24.089700130378098, - "grad_norm": 1.6074060201644897, - "learning_rate": 5.409547738693468e-05, - "loss": 4.9237, - "step": 46192 - }, - { - "epoch": 24.090221642764014, - "grad_norm": 1.5225616693496704, - "learning_rate": 5.4094472361809045e-05, - "loss": 4.9143, - "step": 46193 - }, - { - "epoch": 24.090743155149934, - "grad_norm": 1.6235140562057495, - "learning_rate": 5.409346733668342e-05, - "loss": 5.3798, - "step": 46194 - }, - { - "epoch": 24.091264667535853, - "grad_norm": 1.5328065156936646, - "learning_rate": 5.409246231155779e-05, - "loss": 5.1268, - "step": 46195 - }, - { - "epoch": 24.091786179921773, - "grad_norm": 1.5106827020645142, - "learning_rate": 5.4091457286432165e-05, - "loss": 5.3365, - "step": 46196 - }, - { - "epoch": 24.092307692307692, - "grad_norm": 1.5720000267028809, - "learning_rate": 5.4090452261306536e-05, - "loss": 5.1118, - "step": 46197 - }, - { - "epoch": 24.09282920469361, - "grad_norm": 1.6293867826461792, - "learning_rate": 5.4089447236180913e-05, - "loss": 4.8491, - "step": 46198 - }, - { - "epoch": 24.09335071707953, - "grad_norm": 1.4419485330581665, - "learning_rate": 5.408844221105528e-05, - "loss": 5.4614, - "step": 46199 - }, - { - "epoch": 24.09387222946545, - "grad_norm": 1.5313202142715454, - "learning_rate": 5.408743718592965e-05, - "loss": 5.6681, - "step": 46200 - }, - { - "epoch": 24.09439374185137, - "grad_norm": 1.5183393955230713, - "learning_rate": 5.4086432160804027e-05, - "loss": 5.4697, - "step": 46201 - }, - { - "epoch": 24.09491525423729, - "grad_norm": 1.4516030550003052, - "learning_rate": 5.408542713567839e-05, - "loss": 5.5997, - "step": 46202 - }, - { - "epoch": 24.09543676662321, - "grad_norm": 1.6628334522247314, - "learning_rate": 5.408442211055277e-05, - "loss": 5.2474, - "step": 46203 - }, - { - "epoch": 24.09595827900913, - "grad_norm": 1.6739684343338013, - "learning_rate": 5.408341708542713e-05, - "loss": 4.8565, - "step": 46204 - }, - { - "epoch": 24.096479791395044, - "grad_norm": 1.6147239208221436, - "learning_rate": 5.408241206030151e-05, - "loss": 4.6339, - "step": 46205 - }, - { - "epoch": 24.097001303780964, - "grad_norm": 1.5889710187911987, - "learning_rate": 5.408140703517588e-05, - "loss": 4.8023, - "step": 46206 - }, - { - "epoch": 24.097522816166883, - "grad_norm": 1.5924924612045288, - "learning_rate": 5.408040201005026e-05, - "loss": 5.337, - "step": 46207 - }, - { - "epoch": 24.098044328552803, - "grad_norm": 1.4940680265426636, - "learning_rate": 5.4079396984924624e-05, - "loss": 5.545, - "step": 46208 - }, - { - "epoch": 24.098565840938722, - "grad_norm": 1.614648461341858, - "learning_rate": 5.4078391959799e-05, - "loss": 5.076, - "step": 46209 - }, - { - "epoch": 24.09908735332464, - "grad_norm": 1.5647549629211426, - "learning_rate": 5.4077386934673366e-05, - "loss": 5.2867, - "step": 46210 - }, - { - "epoch": 24.09960886571056, - "grad_norm": 1.614320993423462, - "learning_rate": 5.4076381909547744e-05, - "loss": 4.7765, - "step": 46211 - }, - { - "epoch": 24.10013037809648, - "grad_norm": 1.981935739517212, - "learning_rate": 5.4075376884422115e-05, - "loss": 5.3463, - "step": 46212 - }, - { - "epoch": 24.1006518904824, - "grad_norm": 1.599616527557373, - "learning_rate": 5.407437185929648e-05, - "loss": 5.1578, - "step": 46213 - }, - { - "epoch": 24.10117340286832, - "grad_norm": 1.4636040925979614, - "learning_rate": 5.407336683417086e-05, - "loss": 5.2045, - "step": 46214 - }, - { - "epoch": 24.10169491525424, - "grad_norm": 1.4532266855239868, - "learning_rate": 5.407236180904522e-05, - "loss": 5.7606, - "step": 46215 - }, - { - "epoch": 24.102216427640155, - "grad_norm": 1.483828067779541, - "learning_rate": 5.40713567839196e-05, - "loss": 5.6761, - "step": 46216 - }, - { - "epoch": 24.102737940026074, - "grad_norm": 1.6140031814575195, - "learning_rate": 5.407035175879397e-05, - "loss": 4.9897, - "step": 46217 - }, - { - "epoch": 24.103259452411994, - "grad_norm": 1.6210917234420776, - "learning_rate": 5.406934673366835e-05, - "loss": 4.5014, - "step": 46218 - }, - { - "epoch": 24.103780964797913, - "grad_norm": 1.536994218826294, - "learning_rate": 5.406834170854271e-05, - "loss": 5.0039, - "step": 46219 - }, - { - "epoch": 24.104302477183833, - "grad_norm": 1.5318061113357544, - "learning_rate": 5.406733668341709e-05, - "loss": 5.2248, - "step": 46220 - }, - { - "epoch": 24.104823989569752, - "grad_norm": 1.5586416721343994, - "learning_rate": 5.406633165829146e-05, - "loss": 5.1045, - "step": 46221 - }, - { - "epoch": 24.10534550195567, - "grad_norm": 1.5919859409332275, - "learning_rate": 5.406532663316584e-05, - "loss": 5.5821, - "step": 46222 - }, - { - "epoch": 24.10586701434159, - "grad_norm": 1.4573848247528076, - "learning_rate": 5.40643216080402e-05, - "loss": 4.99, - "step": 46223 - }, - { - "epoch": 24.10638852672751, - "grad_norm": 1.5790798664093018, - "learning_rate": 5.406331658291458e-05, - "loss": 5.1041, - "step": 46224 - }, - { - "epoch": 24.10691003911343, - "grad_norm": 1.5711389780044556, - "learning_rate": 5.4062311557788945e-05, - "loss": 5.1393, - "step": 46225 - }, - { - "epoch": 24.10743155149935, - "grad_norm": 1.6000858545303345, - "learning_rate": 5.4061306532663316e-05, - "loss": 4.9326, - "step": 46226 - }, - { - "epoch": 24.10795306388527, - "grad_norm": 1.4644230604171753, - "learning_rate": 5.4060301507537694e-05, - "loss": 5.8591, - "step": 46227 - }, - { - "epoch": 24.108474576271185, - "grad_norm": 1.475772500038147, - "learning_rate": 5.405929648241206e-05, - "loss": 5.3003, - "step": 46228 - }, - { - "epoch": 24.108996088657104, - "grad_norm": 1.5789530277252197, - "learning_rate": 5.4058291457286436e-05, - "loss": 4.184, - "step": 46229 - }, - { - "epoch": 24.109517601043024, - "grad_norm": 1.614521861076355, - "learning_rate": 5.40572864321608e-05, - "loss": 4.7985, - "step": 46230 - }, - { - "epoch": 24.110039113428943, - "grad_norm": 1.458031177520752, - "learning_rate": 5.405628140703518e-05, - "loss": 5.5268, - "step": 46231 - }, - { - "epoch": 24.110560625814863, - "grad_norm": 1.5837531089782715, - "learning_rate": 5.405527638190955e-05, - "loss": 4.8697, - "step": 46232 - }, - { - "epoch": 24.111082138200782, - "grad_norm": 1.5353851318359375, - "learning_rate": 5.405427135678393e-05, - "loss": 4.8964, - "step": 46233 - }, - { - "epoch": 24.1116036505867, - "grad_norm": 1.5475854873657227, - "learning_rate": 5.405326633165829e-05, - "loss": 5.51, - "step": 46234 - }, - { - "epoch": 24.11212516297262, - "grad_norm": 1.4811782836914062, - "learning_rate": 5.405226130653267e-05, - "loss": 5.159, - "step": 46235 - }, - { - "epoch": 24.11264667535854, - "grad_norm": 1.460315227508545, - "learning_rate": 5.4051256281407034e-05, - "loss": 5.527, - "step": 46236 - }, - { - "epoch": 24.11316818774446, - "grad_norm": 1.568555474281311, - "learning_rate": 5.405025125628141e-05, - "loss": 5.1229, - "step": 46237 - }, - { - "epoch": 24.11368970013038, - "grad_norm": 1.5880444049835205, - "learning_rate": 5.404924623115578e-05, - "loss": 5.2115, - "step": 46238 - }, - { - "epoch": 24.1142112125163, - "grad_norm": 1.5320460796356201, - "learning_rate": 5.404824120603015e-05, - "loss": 4.9427, - "step": 46239 - }, - { - "epoch": 24.114732724902215, - "grad_norm": 1.5335010290145874, - "learning_rate": 5.4047236180904525e-05, - "loss": 5.3671, - "step": 46240 - }, - { - "epoch": 24.115254237288134, - "grad_norm": 1.556148886680603, - "learning_rate": 5.4046231155778896e-05, - "loss": 5.3116, - "step": 46241 - }, - { - "epoch": 24.115775749674054, - "grad_norm": 1.4792262315750122, - "learning_rate": 5.4045226130653274e-05, - "loss": 4.9028, - "step": 46242 - }, - { - "epoch": 24.116297262059973, - "grad_norm": 1.5185506343841553, - "learning_rate": 5.404422110552764e-05, - "loss": 5.2637, - "step": 46243 - }, - { - "epoch": 24.116818774445893, - "grad_norm": 1.428697109222412, - "learning_rate": 5.4043216080402016e-05, - "loss": 5.7535, - "step": 46244 - }, - { - "epoch": 24.117340286831812, - "grad_norm": 1.5568270683288574, - "learning_rate": 5.404221105527638e-05, - "loss": 5.3336, - "step": 46245 - }, - { - "epoch": 24.11786179921773, - "grad_norm": 1.5282292366027832, - "learning_rate": 5.404120603015076e-05, - "loss": 5.4707, - "step": 46246 - }, - { - "epoch": 24.11838331160365, - "grad_norm": 1.4758955240249634, - "learning_rate": 5.404020100502513e-05, - "loss": 5.6032, - "step": 46247 - }, - { - "epoch": 24.11890482398957, - "grad_norm": 1.6480367183685303, - "learning_rate": 5.403919597989951e-05, - "loss": 5.0456, - "step": 46248 - }, - { - "epoch": 24.11942633637549, - "grad_norm": 1.4414865970611572, - "learning_rate": 5.403819095477387e-05, - "loss": 5.4307, - "step": 46249 - }, - { - "epoch": 24.11994784876141, - "grad_norm": 1.6009303331375122, - "learning_rate": 5.403718592964825e-05, - "loss": 4.8828, - "step": 46250 - }, - { - "epoch": 24.12046936114733, - "grad_norm": 1.6142356395721436, - "learning_rate": 5.403618090452261e-05, - "loss": 5.1711, - "step": 46251 - }, - { - "epoch": 24.120990873533245, - "grad_norm": 1.5713139772415161, - "learning_rate": 5.4035175879396984e-05, - "loss": 4.7361, - "step": 46252 - }, - { - "epoch": 24.121512385919164, - "grad_norm": 1.5003362894058228, - "learning_rate": 5.403417085427136e-05, - "loss": 4.7122, - "step": 46253 - }, - { - "epoch": 24.122033898305084, - "grad_norm": 1.5010409355163574, - "learning_rate": 5.4033165829145726e-05, - "loss": 5.7927, - "step": 46254 - }, - { - "epoch": 24.122555410691003, - "grad_norm": 1.4410560131072998, - "learning_rate": 5.4032160804020104e-05, - "loss": 5.218, - "step": 46255 - }, - { - "epoch": 24.123076923076923, - "grad_norm": 1.5949164628982544, - "learning_rate": 5.403115577889447e-05, - "loss": 4.7966, - "step": 46256 - }, - { - "epoch": 24.123598435462842, - "grad_norm": 1.7690244913101196, - "learning_rate": 5.4030150753768846e-05, - "loss": 4.9925, - "step": 46257 - }, - { - "epoch": 24.12411994784876, - "grad_norm": 1.5081486701965332, - "learning_rate": 5.402914572864322e-05, - "loss": 5.5065, - "step": 46258 - }, - { - "epoch": 24.12464146023468, - "grad_norm": 1.5617952346801758, - "learning_rate": 5.4028140703517595e-05, - "loss": 5.4693, - "step": 46259 - }, - { - "epoch": 24.1251629726206, - "grad_norm": 1.5824449062347412, - "learning_rate": 5.402713567839196e-05, - "loss": 5.2092, - "step": 46260 - }, - { - "epoch": 24.12568448500652, - "grad_norm": 1.5250182151794434, - "learning_rate": 5.402613065326634e-05, - "loss": 5.5775, - "step": 46261 - }, - { - "epoch": 24.12620599739244, - "grad_norm": 1.5588918924331665, - "learning_rate": 5.402512562814071e-05, - "loss": 4.7993, - "step": 46262 - }, - { - "epoch": 24.12672750977836, - "grad_norm": 1.4875248670578003, - "learning_rate": 5.4024120603015086e-05, - "loss": 5.0728, - "step": 46263 - }, - { - "epoch": 24.127249022164275, - "grad_norm": 1.5333912372589111, - "learning_rate": 5.402311557788945e-05, - "loss": 5.4437, - "step": 46264 - }, - { - "epoch": 24.127770534550194, - "grad_norm": 1.6242471933364868, - "learning_rate": 5.402211055276383e-05, - "loss": 4.8073, - "step": 46265 - }, - { - "epoch": 24.128292046936114, - "grad_norm": 1.5310648679733276, - "learning_rate": 5.402110552763819e-05, - "loss": 5.73, - "step": 46266 - }, - { - "epoch": 24.128813559322033, - "grad_norm": 1.5524113178253174, - "learning_rate": 5.4020100502512563e-05, - "loss": 5.2066, - "step": 46267 - }, - { - "epoch": 24.129335071707953, - "grad_norm": 1.5742655992507935, - "learning_rate": 5.401909547738694e-05, - "loss": 5.1954, - "step": 46268 - }, - { - "epoch": 24.129856584093872, - "grad_norm": 1.5146701335906982, - "learning_rate": 5.4018090452261305e-05, - "loss": 5.2763, - "step": 46269 - }, - { - "epoch": 24.13037809647979, - "grad_norm": 1.447706699371338, - "learning_rate": 5.401708542713568e-05, - "loss": 5.3817, - "step": 46270 - }, - { - "epoch": 24.13089960886571, - "grad_norm": 1.6465013027191162, - "learning_rate": 5.401608040201005e-05, - "loss": 4.3353, - "step": 46271 - }, - { - "epoch": 24.13142112125163, - "grad_norm": 1.6284083127975464, - "learning_rate": 5.4015075376884425e-05, - "loss": 4.835, - "step": 46272 - }, - { - "epoch": 24.13194263363755, - "grad_norm": 1.5540298223495483, - "learning_rate": 5.4014070351758796e-05, - "loss": 4.9539, - "step": 46273 - }, - { - "epoch": 24.13246414602347, - "grad_norm": 1.6565953493118286, - "learning_rate": 5.4013065326633174e-05, - "loss": 4.8887, - "step": 46274 - }, - { - "epoch": 24.13298565840939, - "grad_norm": 1.5295827388763428, - "learning_rate": 5.401206030150754e-05, - "loss": 5.5523, - "step": 46275 - }, - { - "epoch": 24.133507170795305, - "grad_norm": 1.649246096611023, - "learning_rate": 5.4011055276381916e-05, - "loss": 4.9998, - "step": 46276 - }, - { - "epoch": 24.134028683181224, - "grad_norm": 1.4382654428482056, - "learning_rate": 5.401005025125628e-05, - "loss": 5.6922, - "step": 46277 - }, - { - "epoch": 24.134550195567144, - "grad_norm": 1.6374479532241821, - "learning_rate": 5.400904522613066e-05, - "loss": 5.0115, - "step": 46278 - }, - { - "epoch": 24.135071707953063, - "grad_norm": 1.6577038764953613, - "learning_rate": 5.400804020100503e-05, - "loss": 4.8425, - "step": 46279 - }, - { - "epoch": 24.135593220338983, - "grad_norm": 1.469879150390625, - "learning_rate": 5.4007035175879394e-05, - "loss": 5.7147, - "step": 46280 - }, - { - "epoch": 24.136114732724902, - "grad_norm": 1.6344963312149048, - "learning_rate": 5.400603015075377e-05, - "loss": 4.824, - "step": 46281 - }, - { - "epoch": 24.13663624511082, - "grad_norm": 1.4712461233139038, - "learning_rate": 5.4005025125628136e-05, - "loss": 5.3909, - "step": 46282 - }, - { - "epoch": 24.13715775749674, - "grad_norm": 1.5455083847045898, - "learning_rate": 5.4004020100502514e-05, - "loss": 5.1313, - "step": 46283 - }, - { - "epoch": 24.13767926988266, - "grad_norm": 1.5361002683639526, - "learning_rate": 5.4003015075376885e-05, - "loss": 4.8464, - "step": 46284 - }, - { - "epoch": 24.13820078226858, - "grad_norm": 1.6290690898895264, - "learning_rate": 5.400201005025126e-05, - "loss": 4.7786, - "step": 46285 - }, - { - "epoch": 24.1387222946545, - "grad_norm": 1.540895938873291, - "learning_rate": 5.400100502512563e-05, - "loss": 5.3719, - "step": 46286 - }, - { - "epoch": 24.13924380704042, - "grad_norm": 1.4879103899002075, - "learning_rate": 5.4000000000000005e-05, - "loss": 5.0299, - "step": 46287 - }, - { - "epoch": 24.139765319426335, - "grad_norm": 1.6355894804000854, - "learning_rate": 5.3998994974874376e-05, - "loss": 5.6305, - "step": 46288 - }, - { - "epoch": 24.140286831812254, - "grad_norm": 1.5325566530227661, - "learning_rate": 5.3997989949748754e-05, - "loss": 5.5452, - "step": 46289 - }, - { - "epoch": 24.140808344198174, - "grad_norm": 1.5134621858596802, - "learning_rate": 5.399698492462312e-05, - "loss": 5.4308, - "step": 46290 - }, - { - "epoch": 24.141329856584093, - "grad_norm": 1.4936336278915405, - "learning_rate": 5.3995979899497496e-05, - "loss": 5.3204, - "step": 46291 - }, - { - "epoch": 24.141851368970013, - "grad_norm": 1.5561835765838623, - "learning_rate": 5.399497487437186e-05, - "loss": 4.6903, - "step": 46292 - }, - { - "epoch": 24.142372881355932, - "grad_norm": 1.465512990951538, - "learning_rate": 5.399396984924623e-05, - "loss": 5.803, - "step": 46293 - }, - { - "epoch": 24.14289439374185, - "grad_norm": 1.5997511148452759, - "learning_rate": 5.399296482412061e-05, - "loss": 5.2638, - "step": 46294 - }, - { - "epoch": 24.14341590612777, - "grad_norm": 1.7532638311386108, - "learning_rate": 5.399195979899497e-05, - "loss": 4.8933, - "step": 46295 - }, - { - "epoch": 24.14393741851369, - "grad_norm": 1.5372366905212402, - "learning_rate": 5.399095477386935e-05, - "loss": 5.2893, - "step": 46296 - }, - { - "epoch": 24.14445893089961, - "grad_norm": 1.59455144405365, - "learning_rate": 5.3989949748743715e-05, - "loss": 5.3657, - "step": 46297 - }, - { - "epoch": 24.14498044328553, - "grad_norm": 1.5500688552856445, - "learning_rate": 5.398894472361809e-05, - "loss": 4.9478, - "step": 46298 - }, - { - "epoch": 24.14550195567145, - "grad_norm": 1.5390684604644775, - "learning_rate": 5.3987939698492464e-05, - "loss": 5.3699, - "step": 46299 - }, - { - "epoch": 24.146023468057365, - "grad_norm": 1.537137746810913, - "learning_rate": 5.398693467336684e-05, - "loss": 4.9374, - "step": 46300 - }, - { - "epoch": 24.146544980443284, - "grad_norm": 1.5696587562561035, - "learning_rate": 5.3985929648241206e-05, - "loss": 5.4639, - "step": 46301 - }, - { - "epoch": 24.147066492829204, - "grad_norm": 1.6008490324020386, - "learning_rate": 5.3984924623115584e-05, - "loss": 4.2608, - "step": 46302 - }, - { - "epoch": 24.147588005215123, - "grad_norm": 1.6336522102355957, - "learning_rate": 5.398391959798995e-05, - "loss": 4.8231, - "step": 46303 - }, - { - "epoch": 24.148109517601043, - "grad_norm": 1.5787789821624756, - "learning_rate": 5.3982914572864326e-05, - "loss": 5.1621, - "step": 46304 - }, - { - "epoch": 24.148631029986962, - "grad_norm": 1.6193997859954834, - "learning_rate": 5.39819095477387e-05, - "loss": 4.949, - "step": 46305 - }, - { - "epoch": 24.14915254237288, - "grad_norm": 1.558630108833313, - "learning_rate": 5.398090452261306e-05, - "loss": 5.5907, - "step": 46306 - }, - { - "epoch": 24.1496740547588, - "grad_norm": 1.5566539764404297, - "learning_rate": 5.397989949748744e-05, - "loss": 4.7883, - "step": 46307 - }, - { - "epoch": 24.15019556714472, - "grad_norm": 1.4818211793899536, - "learning_rate": 5.397889447236181e-05, - "loss": 5.2953, - "step": 46308 - }, - { - "epoch": 24.15071707953064, - "grad_norm": 1.5886698961257935, - "learning_rate": 5.397788944723619e-05, - "loss": 5.3193, - "step": 46309 - }, - { - "epoch": 24.15123859191656, - "grad_norm": 1.498441219329834, - "learning_rate": 5.397688442211055e-05, - "loss": 4.8786, - "step": 46310 - }, - { - "epoch": 24.151760104302475, - "grad_norm": 1.6078786849975586, - "learning_rate": 5.397587939698493e-05, - "loss": 5.5112, - "step": 46311 - }, - { - "epoch": 24.152281616688395, - "grad_norm": 1.5618846416473389, - "learning_rate": 5.3974874371859295e-05, - "loss": 5.2562, - "step": 46312 - }, - { - "epoch": 24.152803129074314, - "grad_norm": 1.5169811248779297, - "learning_rate": 5.397386934673367e-05, - "loss": 5.2605, - "step": 46313 - }, - { - "epoch": 24.153324641460234, - "grad_norm": 1.534814715385437, - "learning_rate": 5.3972864321608043e-05, - "loss": 5.3092, - "step": 46314 - }, - { - "epoch": 24.153846153846153, - "grad_norm": 1.4739606380462646, - "learning_rate": 5.397185929648242e-05, - "loss": 4.8877, - "step": 46315 - }, - { - "epoch": 24.154367666232073, - "grad_norm": 1.7033811807632446, - "learning_rate": 5.3970854271356786e-05, - "loss": 4.6204, - "step": 46316 - }, - { - "epoch": 24.154889178617992, - "grad_norm": 1.3983705043792725, - "learning_rate": 5.3969849246231163e-05, - "loss": 5.5582, - "step": 46317 - }, - { - "epoch": 24.15541069100391, - "grad_norm": 1.5880297422409058, - "learning_rate": 5.396884422110553e-05, - "loss": 4.8663, - "step": 46318 - }, - { - "epoch": 24.15593220338983, - "grad_norm": 1.6449909210205078, - "learning_rate": 5.39678391959799e-05, - "loss": 5.05, - "step": 46319 - }, - { - "epoch": 24.15645371577575, - "grad_norm": 1.5571874380111694, - "learning_rate": 5.3966834170854277e-05, - "loss": 5.3514, - "step": 46320 - }, - { - "epoch": 24.15697522816167, - "grad_norm": 1.6356078386306763, - "learning_rate": 5.396582914572864e-05, - "loss": 4.5578, - "step": 46321 - }, - { - "epoch": 24.15749674054759, - "grad_norm": 1.519140362739563, - "learning_rate": 5.396482412060302e-05, - "loss": 4.9288, - "step": 46322 - }, - { - "epoch": 24.158018252933505, - "grad_norm": 1.564781904220581, - "learning_rate": 5.396381909547738e-05, - "loss": 4.9108, - "step": 46323 - }, - { - "epoch": 24.158539765319425, - "grad_norm": 1.5291354656219482, - "learning_rate": 5.396281407035176e-05, - "loss": 5.234, - "step": 46324 - }, - { - "epoch": 24.159061277705344, - "grad_norm": 1.5344220399856567, - "learning_rate": 5.396180904522613e-05, - "loss": 5.3909, - "step": 46325 - }, - { - "epoch": 24.159582790091264, - "grad_norm": 1.4855977296829224, - "learning_rate": 5.396080402010051e-05, - "loss": 5.3035, - "step": 46326 - }, - { - "epoch": 24.160104302477183, - "grad_norm": 1.4897464513778687, - "learning_rate": 5.3959798994974874e-05, - "loss": 5.3991, - "step": 46327 - }, - { - "epoch": 24.160625814863103, - "grad_norm": 1.5425324440002441, - "learning_rate": 5.395879396984925e-05, - "loss": 5.5859, - "step": 46328 - }, - { - "epoch": 24.161147327249022, - "grad_norm": 1.7401981353759766, - "learning_rate": 5.395778894472362e-05, - "loss": 5.0742, - "step": 46329 - }, - { - "epoch": 24.16166883963494, - "grad_norm": 1.5870991945266724, - "learning_rate": 5.3956783919598e-05, - "loss": 5.1538, - "step": 46330 - }, - { - "epoch": 24.16219035202086, - "grad_norm": 1.567669153213501, - "learning_rate": 5.3955778894472365e-05, - "loss": 5.1611, - "step": 46331 - }, - { - "epoch": 24.16271186440678, - "grad_norm": 1.5696760416030884, - "learning_rate": 5.395477386934673e-05, - "loss": 5.2454, - "step": 46332 - }, - { - "epoch": 24.1632333767927, - "grad_norm": 1.686755657196045, - "learning_rate": 5.395376884422111e-05, - "loss": 5.1962, - "step": 46333 - }, - { - "epoch": 24.16375488917862, - "grad_norm": 1.598188877105713, - "learning_rate": 5.395276381909548e-05, - "loss": 5.2327, - "step": 46334 - }, - { - "epoch": 24.164276401564535, - "grad_norm": 1.5255699157714844, - "learning_rate": 5.3951758793969856e-05, - "loss": 5.5893, - "step": 46335 - }, - { - "epoch": 24.164797913950455, - "grad_norm": 1.7398474216461182, - "learning_rate": 5.395075376884422e-05, - "loss": 5.2375, - "step": 46336 - }, - { - "epoch": 24.165319426336374, - "grad_norm": 1.4708869457244873, - "learning_rate": 5.39497487437186e-05, - "loss": 5.3966, - "step": 46337 - }, - { - "epoch": 24.165840938722294, - "grad_norm": 1.7286466360092163, - "learning_rate": 5.394874371859296e-05, - "loss": 5.0607, - "step": 46338 - }, - { - "epoch": 24.166362451108213, - "grad_norm": 1.5741547346115112, - "learning_rate": 5.394773869346734e-05, - "loss": 5.0348, - "step": 46339 - }, - { - "epoch": 24.166883963494133, - "grad_norm": 1.5429822206497192, - "learning_rate": 5.394673366834171e-05, - "loss": 5.6848, - "step": 46340 - }, - { - "epoch": 24.167405475880052, - "grad_norm": 1.378768801689148, - "learning_rate": 5.394572864321609e-05, - "loss": 4.9461, - "step": 46341 - }, - { - "epoch": 24.16792698826597, - "grad_norm": 1.6272180080413818, - "learning_rate": 5.394472361809045e-05, - "loss": 5.0955, - "step": 46342 - }, - { - "epoch": 24.16844850065189, - "grad_norm": 1.5709151029586792, - "learning_rate": 5.394371859296483e-05, - "loss": 4.9627, - "step": 46343 - }, - { - "epoch": 24.16897001303781, - "grad_norm": 1.6599886417388916, - "learning_rate": 5.3942713567839195e-05, - "loss": 4.9125, - "step": 46344 - }, - { - "epoch": 24.16949152542373, - "grad_norm": 1.4783902168273926, - "learning_rate": 5.3941708542713566e-05, - "loss": 5.5028, - "step": 46345 - }, - { - "epoch": 24.17001303780965, - "grad_norm": 1.7009825706481934, - "learning_rate": 5.3940703517587944e-05, - "loss": 5.2658, - "step": 46346 - }, - { - "epoch": 24.170534550195566, - "grad_norm": 1.6247358322143555, - "learning_rate": 5.393969849246231e-05, - "loss": 4.6903, - "step": 46347 - }, - { - "epoch": 24.171056062581485, - "grad_norm": 1.4544612169265747, - "learning_rate": 5.3938693467336686e-05, - "loss": 4.3902, - "step": 46348 - }, - { - "epoch": 24.171577574967404, - "grad_norm": 1.6079128980636597, - "learning_rate": 5.393768844221105e-05, - "loss": 4.8316, - "step": 46349 - }, - { - "epoch": 24.172099087353324, - "grad_norm": 1.5350109338760376, - "learning_rate": 5.393668341708543e-05, - "loss": 5.4547, - "step": 46350 - }, - { - "epoch": 24.172620599739243, - "grad_norm": 1.517871618270874, - "learning_rate": 5.39356783919598e-05, - "loss": 5.4808, - "step": 46351 - }, - { - "epoch": 24.173142112125163, - "grad_norm": 1.6508053541183472, - "learning_rate": 5.393467336683418e-05, - "loss": 4.9221, - "step": 46352 - }, - { - "epoch": 24.173663624511082, - "grad_norm": 1.4982821941375732, - "learning_rate": 5.393366834170854e-05, - "loss": 5.5118, - "step": 46353 - }, - { - "epoch": 24.174185136897, - "grad_norm": 1.518595576286316, - "learning_rate": 5.393266331658292e-05, - "loss": 5.7856, - "step": 46354 - }, - { - "epoch": 24.17470664928292, - "grad_norm": 1.6157419681549072, - "learning_rate": 5.393165829145729e-05, - "loss": 5.2395, - "step": 46355 - }, - { - "epoch": 24.17522816166884, - "grad_norm": 1.5971299409866333, - "learning_rate": 5.393065326633167e-05, - "loss": 4.818, - "step": 46356 - }, - { - "epoch": 24.17574967405476, - "grad_norm": 1.585545539855957, - "learning_rate": 5.392964824120603e-05, - "loss": 5.2571, - "step": 46357 - }, - { - "epoch": 24.17627118644068, - "grad_norm": 1.50228750705719, - "learning_rate": 5.392864321608041e-05, - "loss": 5.3955, - "step": 46358 - }, - { - "epoch": 24.176792698826596, - "grad_norm": 1.5857266187667847, - "learning_rate": 5.3927638190954775e-05, - "loss": 4.9964, - "step": 46359 - }, - { - "epoch": 24.177314211212515, - "grad_norm": 1.5820868015289307, - "learning_rate": 5.3926633165829146e-05, - "loss": 4.7745, - "step": 46360 - }, - { - "epoch": 24.177835723598434, - "grad_norm": 1.5530641078948975, - "learning_rate": 5.3925628140703524e-05, - "loss": 5.4098, - "step": 46361 - }, - { - "epoch": 24.178357235984354, - "grad_norm": 1.6045013666152954, - "learning_rate": 5.392462311557789e-05, - "loss": 5.2229, - "step": 46362 - }, - { - "epoch": 24.178878748370273, - "grad_norm": 1.6189947128295898, - "learning_rate": 5.3923618090452266e-05, - "loss": 4.9864, - "step": 46363 - }, - { - "epoch": 24.179400260756193, - "grad_norm": 1.69171941280365, - "learning_rate": 5.392261306532663e-05, - "loss": 4.4985, - "step": 46364 - }, - { - "epoch": 24.179921773142112, - "grad_norm": 1.6774797439575195, - "learning_rate": 5.392160804020101e-05, - "loss": 5.2277, - "step": 46365 - }, - { - "epoch": 24.180443285528032, - "grad_norm": 1.603043556213379, - "learning_rate": 5.392060301507538e-05, - "loss": 5.6007, - "step": 46366 - }, - { - "epoch": 24.18096479791395, - "grad_norm": 1.5529862642288208, - "learning_rate": 5.391959798994976e-05, - "loss": 5.1274, - "step": 46367 - }, - { - "epoch": 24.18148631029987, - "grad_norm": 1.6332619190216064, - "learning_rate": 5.391859296482412e-05, - "loss": 5.043, - "step": 46368 - }, - { - "epoch": 24.18200782268579, - "grad_norm": 1.625966191291809, - "learning_rate": 5.39175879396985e-05, - "loss": 5.7316, - "step": 46369 - }, - { - "epoch": 24.18252933507171, - "grad_norm": 1.6669849157333374, - "learning_rate": 5.391658291457286e-05, - "loss": 5.3203, - "step": 46370 - }, - { - "epoch": 24.183050847457626, - "grad_norm": 1.6942963600158691, - "learning_rate": 5.391557788944724e-05, - "loss": 4.5723, - "step": 46371 - }, - { - "epoch": 24.183572359843545, - "grad_norm": 1.5928059816360474, - "learning_rate": 5.391457286432161e-05, - "loss": 4.968, - "step": 46372 - }, - { - "epoch": 24.184093872229464, - "grad_norm": 1.6965670585632324, - "learning_rate": 5.3913567839195976e-05, - "loss": 4.8172, - "step": 46373 - }, - { - "epoch": 24.184615384615384, - "grad_norm": 1.5861245393753052, - "learning_rate": 5.3912562814070354e-05, - "loss": 5.1711, - "step": 46374 - }, - { - "epoch": 24.185136897001303, - "grad_norm": 1.5411503314971924, - "learning_rate": 5.3911557788944725e-05, - "loss": 5.3123, - "step": 46375 - }, - { - "epoch": 24.185658409387223, - "grad_norm": 1.5282597541809082, - "learning_rate": 5.39105527638191e-05, - "loss": 5.5071, - "step": 46376 - }, - { - "epoch": 24.186179921773142, - "grad_norm": 1.6094508171081543, - "learning_rate": 5.390954773869347e-05, - "loss": 4.9529, - "step": 46377 - }, - { - "epoch": 24.186701434159062, - "grad_norm": 1.552862524986267, - "learning_rate": 5.3908542713567845e-05, - "loss": 5.11, - "step": 46378 - }, - { - "epoch": 24.18722294654498, - "grad_norm": 1.5963221788406372, - "learning_rate": 5.390753768844221e-05, - "loss": 5.5186, - "step": 46379 - }, - { - "epoch": 24.1877444589309, - "grad_norm": 1.5504820346832275, - "learning_rate": 5.390653266331659e-05, - "loss": 5.1927, - "step": 46380 - }, - { - "epoch": 24.18826597131682, - "grad_norm": 1.5412381887435913, - "learning_rate": 5.390552763819096e-05, - "loss": 5.5401, - "step": 46381 - }, - { - "epoch": 24.18878748370274, - "grad_norm": 1.6276432275772095, - "learning_rate": 5.3904522613065336e-05, - "loss": 5.2388, - "step": 46382 - }, - { - "epoch": 24.189308996088656, - "grad_norm": 1.8161855936050415, - "learning_rate": 5.39035175879397e-05, - "loss": 4.5698, - "step": 46383 - }, - { - "epoch": 24.189830508474575, - "grad_norm": 1.5865027904510498, - "learning_rate": 5.390251256281408e-05, - "loss": 5.5825, - "step": 46384 - }, - { - "epoch": 24.190352020860495, - "grad_norm": 1.48198664188385, - "learning_rate": 5.390150753768844e-05, - "loss": 5.2726, - "step": 46385 - }, - { - "epoch": 24.190873533246414, - "grad_norm": 1.586185097694397, - "learning_rate": 5.3900502512562813e-05, - "loss": 5.1222, - "step": 46386 - }, - { - "epoch": 24.191395045632333, - "grad_norm": 1.6536496877670288, - "learning_rate": 5.389949748743719e-05, - "loss": 5.2579, - "step": 46387 - }, - { - "epoch": 24.191916558018253, - "grad_norm": 1.5486581325531006, - "learning_rate": 5.3898492462311556e-05, - "loss": 5.2792, - "step": 46388 - }, - { - "epoch": 24.192438070404172, - "grad_norm": 1.5157922506332397, - "learning_rate": 5.389748743718593e-05, - "loss": 5.3727, - "step": 46389 - }, - { - "epoch": 24.192959582790092, - "grad_norm": 1.6127725839614868, - "learning_rate": 5.38964824120603e-05, - "loss": 5.0878, - "step": 46390 - }, - { - "epoch": 24.19348109517601, - "grad_norm": 1.6107511520385742, - "learning_rate": 5.3895477386934675e-05, - "loss": 5.1785, - "step": 46391 - }, - { - "epoch": 24.19400260756193, - "grad_norm": 1.5164779424667358, - "learning_rate": 5.3894472361809046e-05, - "loss": 5.3219, - "step": 46392 - }, - { - "epoch": 24.19452411994785, - "grad_norm": 1.4946221113204956, - "learning_rate": 5.3893467336683424e-05, - "loss": 5.1246, - "step": 46393 - }, - { - "epoch": 24.195045632333766, - "grad_norm": 1.4637478590011597, - "learning_rate": 5.389246231155779e-05, - "loss": 5.2091, - "step": 46394 - }, - { - "epoch": 24.195567144719686, - "grad_norm": 1.597446084022522, - "learning_rate": 5.3891457286432166e-05, - "loss": 4.578, - "step": 46395 - }, - { - "epoch": 24.196088657105605, - "grad_norm": 1.6217025518417358, - "learning_rate": 5.389045226130654e-05, - "loss": 5.4475, - "step": 46396 - }, - { - "epoch": 24.196610169491525, - "grad_norm": 1.4581788778305054, - "learning_rate": 5.3889447236180915e-05, - "loss": 5.1782, - "step": 46397 - }, - { - "epoch": 24.197131681877444, - "grad_norm": 1.4902821779251099, - "learning_rate": 5.388844221105528e-05, - "loss": 5.6112, - "step": 46398 - }, - { - "epoch": 24.197653194263363, - "grad_norm": 1.575924277305603, - "learning_rate": 5.3887437185929644e-05, - "loss": 5.1034, - "step": 46399 - }, - { - "epoch": 24.198174706649283, - "grad_norm": 1.569297194480896, - "learning_rate": 5.388643216080402e-05, - "loss": 5.4409, - "step": 46400 - }, - { - "epoch": 24.198696219035202, - "grad_norm": 1.7097095251083374, - "learning_rate": 5.388542713567839e-05, - "loss": 5.2224, - "step": 46401 - }, - { - "epoch": 24.199217731421122, - "grad_norm": 1.5452251434326172, - "learning_rate": 5.388442211055277e-05, - "loss": 5.4659, - "step": 46402 - }, - { - "epoch": 24.19973924380704, - "grad_norm": 1.5139878988265991, - "learning_rate": 5.3883417085427135e-05, - "loss": 5.6455, - "step": 46403 - }, - { - "epoch": 24.20026075619296, - "grad_norm": 1.5644828081130981, - "learning_rate": 5.388241206030151e-05, - "loss": 5.2003, - "step": 46404 - }, - { - "epoch": 24.20078226857888, - "grad_norm": 1.5589828491210938, - "learning_rate": 5.388140703517588e-05, - "loss": 5.0927, - "step": 46405 - }, - { - "epoch": 24.201303780964796, - "grad_norm": 1.557769536972046, - "learning_rate": 5.3880402010050255e-05, - "loss": 4.8181, - "step": 46406 - }, - { - "epoch": 24.201825293350716, - "grad_norm": 1.5085036754608154, - "learning_rate": 5.3879396984924626e-05, - "loss": 5.2977, - "step": 46407 - }, - { - "epoch": 24.202346805736635, - "grad_norm": 1.5682294368743896, - "learning_rate": 5.3878391959799004e-05, - "loss": 4.6436, - "step": 46408 - }, - { - "epoch": 24.202868318122555, - "grad_norm": 1.6205408573150635, - "learning_rate": 5.387738693467337e-05, - "loss": 5.397, - "step": 46409 - }, - { - "epoch": 24.203389830508474, - "grad_norm": 1.5650115013122559, - "learning_rate": 5.3876381909547746e-05, - "loss": 5.3839, - "step": 46410 - }, - { - "epoch": 24.203911342894393, - "grad_norm": 1.536582589149475, - "learning_rate": 5.387537688442211e-05, - "loss": 5.0777, - "step": 46411 - }, - { - "epoch": 24.204432855280313, - "grad_norm": 1.6016658544540405, - "learning_rate": 5.387437185929648e-05, - "loss": 5.1283, - "step": 46412 - }, - { - "epoch": 24.204954367666232, - "grad_norm": 1.5407428741455078, - "learning_rate": 5.387336683417086e-05, - "loss": 5.3833, - "step": 46413 - }, - { - "epoch": 24.205475880052152, - "grad_norm": 1.4931832551956177, - "learning_rate": 5.387236180904522e-05, - "loss": 5.2158, - "step": 46414 - }, - { - "epoch": 24.20599739243807, - "grad_norm": 1.5440089702606201, - "learning_rate": 5.38713567839196e-05, - "loss": 5.5148, - "step": 46415 - }, - { - "epoch": 24.20651890482399, - "grad_norm": 1.483192801475525, - "learning_rate": 5.387035175879397e-05, - "loss": 5.3218, - "step": 46416 - }, - { - "epoch": 24.20704041720991, - "grad_norm": 1.5212008953094482, - "learning_rate": 5.386934673366835e-05, - "loss": 5.4369, - "step": 46417 - }, - { - "epoch": 24.207561929595826, - "grad_norm": 1.668747901916504, - "learning_rate": 5.3868341708542714e-05, - "loss": 5.1273, - "step": 46418 - }, - { - "epoch": 24.208083441981746, - "grad_norm": 1.6282336711883545, - "learning_rate": 5.386733668341709e-05, - "loss": 5.1495, - "step": 46419 - }, - { - "epoch": 24.208604954367665, - "grad_norm": 1.5587127208709717, - "learning_rate": 5.3866331658291456e-05, - "loss": 5.0995, - "step": 46420 - }, - { - "epoch": 24.209126466753585, - "grad_norm": 1.5617177486419678, - "learning_rate": 5.3865326633165834e-05, - "loss": 4.9212, - "step": 46421 - }, - { - "epoch": 24.209647979139504, - "grad_norm": 1.6100897789001465, - "learning_rate": 5.3864321608040205e-05, - "loss": 4.9351, - "step": 46422 - }, - { - "epoch": 24.210169491525424, - "grad_norm": 1.535861611366272, - "learning_rate": 5.386331658291458e-05, - "loss": 5.3917, - "step": 46423 - }, - { - "epoch": 24.210691003911343, - "grad_norm": 1.662685751914978, - "learning_rate": 5.386231155778895e-05, - "loss": 5.2119, - "step": 46424 - }, - { - "epoch": 24.211212516297262, - "grad_norm": 1.5320243835449219, - "learning_rate": 5.386130653266331e-05, - "loss": 5.2787, - "step": 46425 - }, - { - "epoch": 24.211734028683182, - "grad_norm": 1.4730128049850464, - "learning_rate": 5.386030150753769e-05, - "loss": 5.4908, - "step": 46426 - }, - { - "epoch": 24.2122555410691, - "grad_norm": 1.5467630624771118, - "learning_rate": 5.385929648241206e-05, - "loss": 5.4555, - "step": 46427 - }, - { - "epoch": 24.21277705345502, - "grad_norm": 1.5134776830673218, - "learning_rate": 5.385829145728644e-05, - "loss": 5.5516, - "step": 46428 - }, - { - "epoch": 24.21329856584094, - "grad_norm": 1.5399893522262573, - "learning_rate": 5.38572864321608e-05, - "loss": 4.8632, - "step": 46429 - }, - { - "epoch": 24.213820078226856, - "grad_norm": 1.6998852491378784, - "learning_rate": 5.385628140703518e-05, - "loss": 5.0927, - "step": 46430 - }, - { - "epoch": 24.214341590612776, - "grad_norm": 1.5616800785064697, - "learning_rate": 5.3855276381909545e-05, - "loss": 5.4029, - "step": 46431 - }, - { - "epoch": 24.214863102998695, - "grad_norm": 1.5536319017410278, - "learning_rate": 5.385427135678392e-05, - "loss": 4.8738, - "step": 46432 - }, - { - "epoch": 24.215384615384615, - "grad_norm": 1.5367240905761719, - "learning_rate": 5.3853266331658293e-05, - "loss": 5.1782, - "step": 46433 - }, - { - "epoch": 24.215906127770534, - "grad_norm": 1.7184617519378662, - "learning_rate": 5.385226130653267e-05, - "loss": 4.8078, - "step": 46434 - }, - { - "epoch": 24.216427640156454, - "grad_norm": 1.5036720037460327, - "learning_rate": 5.3851256281407036e-05, - "loss": 5.4638, - "step": 46435 - }, - { - "epoch": 24.216949152542373, - "grad_norm": 1.6218678951263428, - "learning_rate": 5.3850251256281413e-05, - "loss": 4.955, - "step": 46436 - }, - { - "epoch": 24.217470664928292, - "grad_norm": 1.5789510011672974, - "learning_rate": 5.384924623115578e-05, - "loss": 5.3132, - "step": 46437 - }, - { - "epoch": 24.217992177314212, - "grad_norm": 1.6274701356887817, - "learning_rate": 5.3848241206030156e-05, - "loss": 5.077, - "step": 46438 - }, - { - "epoch": 24.21851368970013, - "grad_norm": 1.6282942295074463, - "learning_rate": 5.3847236180904527e-05, - "loss": 5.1151, - "step": 46439 - }, - { - "epoch": 24.21903520208605, - "grad_norm": 1.5768746137619019, - "learning_rate": 5.384623115577889e-05, - "loss": 4.2336, - "step": 46440 - }, - { - "epoch": 24.21955671447197, - "grad_norm": 1.5219004154205322, - "learning_rate": 5.384522613065327e-05, - "loss": 5.3608, - "step": 46441 - }, - { - "epoch": 24.220078226857886, - "grad_norm": 1.5060242414474487, - "learning_rate": 5.384422110552764e-05, - "loss": 5.3703, - "step": 46442 - }, - { - "epoch": 24.220599739243806, - "grad_norm": 1.5818642377853394, - "learning_rate": 5.384321608040202e-05, - "loss": 5.349, - "step": 46443 - }, - { - "epoch": 24.221121251629725, - "grad_norm": 1.4774409532546997, - "learning_rate": 5.384221105527638e-05, - "loss": 5.2047, - "step": 46444 - }, - { - "epoch": 24.221642764015645, - "grad_norm": 1.5908737182617188, - "learning_rate": 5.384120603015076e-05, - "loss": 4.992, - "step": 46445 - }, - { - "epoch": 24.222164276401564, - "grad_norm": 1.5936996936798096, - "learning_rate": 5.3840201005025124e-05, - "loss": 4.9241, - "step": 46446 - }, - { - "epoch": 24.222685788787484, - "grad_norm": 1.633881688117981, - "learning_rate": 5.38391959798995e-05, - "loss": 4.6818, - "step": 46447 - }, - { - "epoch": 24.223207301173403, - "grad_norm": 1.5220927000045776, - "learning_rate": 5.383819095477387e-05, - "loss": 5.5926, - "step": 46448 - }, - { - "epoch": 24.223728813559323, - "grad_norm": 1.9805582761764526, - "learning_rate": 5.383718592964825e-05, - "loss": 4.2995, - "step": 46449 - }, - { - "epoch": 24.224250325945242, - "grad_norm": 1.5875651836395264, - "learning_rate": 5.3836180904522615e-05, - "loss": 5.7699, - "step": 46450 - }, - { - "epoch": 24.22477183833116, - "grad_norm": 1.5394421815872192, - "learning_rate": 5.383517587939699e-05, - "loss": 4.8898, - "step": 46451 - }, - { - "epoch": 24.22529335071708, - "grad_norm": 1.648266077041626, - "learning_rate": 5.383417085427136e-05, - "loss": 4.8772, - "step": 46452 - }, - { - "epoch": 24.225814863103, - "grad_norm": 1.443962812423706, - "learning_rate": 5.383316582914573e-05, - "loss": 5.7601, - "step": 46453 - }, - { - "epoch": 24.226336375488916, - "grad_norm": 1.5913971662521362, - "learning_rate": 5.3832160804020106e-05, - "loss": 4.7301, - "step": 46454 - }, - { - "epoch": 24.226857887874836, - "grad_norm": 1.5389988422393799, - "learning_rate": 5.383115577889447e-05, - "loss": 4.9215, - "step": 46455 - }, - { - "epoch": 24.227379400260755, - "grad_norm": 1.6058571338653564, - "learning_rate": 5.383015075376885e-05, - "loss": 4.9166, - "step": 46456 - }, - { - "epoch": 24.227900912646675, - "grad_norm": 1.5236693620681763, - "learning_rate": 5.382914572864321e-05, - "loss": 5.2888, - "step": 46457 - }, - { - "epoch": 24.228422425032594, - "grad_norm": 1.5186322927474976, - "learning_rate": 5.382814070351759e-05, - "loss": 5.3366, - "step": 46458 - }, - { - "epoch": 24.228943937418514, - "grad_norm": 1.619195818901062, - "learning_rate": 5.382713567839196e-05, - "loss": 4.7162, - "step": 46459 - }, - { - "epoch": 24.229465449804433, - "grad_norm": 1.6159449815750122, - "learning_rate": 5.382613065326634e-05, - "loss": 5.1724, - "step": 46460 - }, - { - "epoch": 24.229986962190353, - "grad_norm": 1.5176600217819214, - "learning_rate": 5.38251256281407e-05, - "loss": 5.188, - "step": 46461 - }, - { - "epoch": 24.230508474576272, - "grad_norm": 1.5957595109939575, - "learning_rate": 5.382412060301508e-05, - "loss": 5.3252, - "step": 46462 - }, - { - "epoch": 24.23102998696219, - "grad_norm": 1.6791824102401733, - "learning_rate": 5.382311557788945e-05, - "loss": 5.0806, - "step": 46463 - }, - { - "epoch": 24.23155149934811, - "grad_norm": 1.5334595441818237, - "learning_rate": 5.382211055276383e-05, - "loss": 5.2478, - "step": 46464 - }, - { - "epoch": 24.23207301173403, - "grad_norm": 1.6755985021591187, - "learning_rate": 5.3821105527638194e-05, - "loss": 5.2174, - "step": 46465 - }, - { - "epoch": 24.232594524119946, - "grad_norm": 1.5232495069503784, - "learning_rate": 5.382010050251256e-05, - "loss": 4.8776, - "step": 46466 - }, - { - "epoch": 24.233116036505866, - "grad_norm": 1.4794540405273438, - "learning_rate": 5.3819095477386936e-05, - "loss": 5.3278, - "step": 46467 - }, - { - "epoch": 24.233637548891785, - "grad_norm": 1.651078462600708, - "learning_rate": 5.381809045226131e-05, - "loss": 4.7419, - "step": 46468 - }, - { - "epoch": 24.234159061277705, - "grad_norm": 1.504529356956482, - "learning_rate": 5.3817085427135685e-05, - "loss": 5.0214, - "step": 46469 - }, - { - "epoch": 24.234680573663624, - "grad_norm": 1.5812921524047852, - "learning_rate": 5.381608040201005e-05, - "loss": 5.558, - "step": 46470 - }, - { - "epoch": 24.235202086049544, - "grad_norm": 1.766411304473877, - "learning_rate": 5.381507537688443e-05, - "loss": 4.633, - "step": 46471 - }, - { - "epoch": 24.235723598435463, - "grad_norm": 1.5372164249420166, - "learning_rate": 5.381407035175879e-05, - "loss": 5.0309, - "step": 46472 - }, - { - "epoch": 24.236245110821383, - "grad_norm": 1.4685935974121094, - "learning_rate": 5.381306532663317e-05, - "loss": 4.728, - "step": 46473 - }, - { - "epoch": 24.236766623207302, - "grad_norm": 1.6498103141784668, - "learning_rate": 5.381206030150754e-05, - "loss": 5.2901, - "step": 46474 - }, - { - "epoch": 24.23728813559322, - "grad_norm": 1.551617980003357, - "learning_rate": 5.381105527638192e-05, - "loss": 5.4879, - "step": 46475 - }, - { - "epoch": 24.23780964797914, - "grad_norm": 1.6688379049301147, - "learning_rate": 5.381005025125628e-05, - "loss": 4.9153, - "step": 46476 - }, - { - "epoch": 24.23833116036506, - "grad_norm": 1.5804507732391357, - "learning_rate": 5.380904522613066e-05, - "loss": 5.2222, - "step": 46477 - }, - { - "epoch": 24.238852672750976, - "grad_norm": 1.4998183250427246, - "learning_rate": 5.3808040201005025e-05, - "loss": 5.1247, - "step": 46478 - }, - { - "epoch": 24.239374185136896, - "grad_norm": 1.4556199312210083, - "learning_rate": 5.3807035175879396e-05, - "loss": 5.5402, - "step": 46479 - }, - { - "epoch": 24.239895697522815, - "grad_norm": 1.4645341634750366, - "learning_rate": 5.3806030150753774e-05, - "loss": 5.5644, - "step": 46480 - }, - { - "epoch": 24.240417209908735, - "grad_norm": 1.6064203977584839, - "learning_rate": 5.380502512562814e-05, - "loss": 4.8966, - "step": 46481 - }, - { - "epoch": 24.240938722294654, - "grad_norm": 1.659401297569275, - "learning_rate": 5.3804020100502516e-05, - "loss": 4.7466, - "step": 46482 - }, - { - "epoch": 24.241460234680574, - "grad_norm": 1.527489185333252, - "learning_rate": 5.380301507537689e-05, - "loss": 5.5247, - "step": 46483 - }, - { - "epoch": 24.241981747066493, - "grad_norm": 1.4719699621200562, - "learning_rate": 5.3802010050251265e-05, - "loss": 5.4873, - "step": 46484 - }, - { - "epoch": 24.242503259452413, - "grad_norm": 1.5057063102722168, - "learning_rate": 5.380100502512563e-05, - "loss": 5.1073, - "step": 46485 - }, - { - "epoch": 24.243024771838332, - "grad_norm": 1.6750850677490234, - "learning_rate": 5.380000000000001e-05, - "loss": 5.1175, - "step": 46486 - }, - { - "epoch": 24.24354628422425, - "grad_norm": 1.6372199058532715, - "learning_rate": 5.379899497487437e-05, - "loss": 4.9759, - "step": 46487 - }, - { - "epoch": 24.24406779661017, - "grad_norm": 1.6231390237808228, - "learning_rate": 5.379798994974875e-05, - "loss": 5.3047, - "step": 46488 - }, - { - "epoch": 24.24458930899609, - "grad_norm": 1.5272561311721802, - "learning_rate": 5.379698492462312e-05, - "loss": 5.3407, - "step": 46489 - }, - { - "epoch": 24.245110821382006, - "grad_norm": 1.548416018486023, - "learning_rate": 5.37959798994975e-05, - "loss": 5.126, - "step": 46490 - }, - { - "epoch": 24.245632333767926, - "grad_norm": 1.6990758180618286, - "learning_rate": 5.379497487437186e-05, - "loss": 4.5233, - "step": 46491 - }, - { - "epoch": 24.246153846153845, - "grad_norm": 1.5591294765472412, - "learning_rate": 5.3793969849246226e-05, - "loss": 5.5398, - "step": 46492 - }, - { - "epoch": 24.246675358539765, - "grad_norm": 1.5645341873168945, - "learning_rate": 5.3792964824120604e-05, - "loss": 5.5628, - "step": 46493 - }, - { - "epoch": 24.247196870925684, - "grad_norm": 1.5691587924957275, - "learning_rate": 5.3791959798994975e-05, - "loss": 5.3879, - "step": 46494 - }, - { - "epoch": 24.247718383311604, - "grad_norm": 1.6278003454208374, - "learning_rate": 5.379095477386935e-05, - "loss": 4.6923, - "step": 46495 - }, - { - "epoch": 24.248239895697523, - "grad_norm": 1.6052556037902832, - "learning_rate": 5.378994974874372e-05, - "loss": 5.4894, - "step": 46496 - }, - { - "epoch": 24.248761408083443, - "grad_norm": 1.5084013938903809, - "learning_rate": 5.3788944723618095e-05, - "loss": 5.5154, - "step": 46497 - }, - { - "epoch": 24.249282920469362, - "grad_norm": 1.6078141927719116, - "learning_rate": 5.378793969849246e-05, - "loss": 5.2446, - "step": 46498 - }, - { - "epoch": 24.24980443285528, - "grad_norm": 1.5285695791244507, - "learning_rate": 5.378693467336684e-05, - "loss": 5.4987, - "step": 46499 - }, - { - "epoch": 24.2503259452412, - "grad_norm": 1.524895429611206, - "learning_rate": 5.378592964824121e-05, - "loss": 5.1031, - "step": 46500 - }, - { - "epoch": 24.250847457627117, - "grad_norm": 1.5889954566955566, - "learning_rate": 5.3784924623115586e-05, - "loss": 4.9945, - "step": 46501 - }, - { - "epoch": 24.251368970013036, - "grad_norm": 1.5890040397644043, - "learning_rate": 5.378391959798995e-05, - "loss": 5.0514, - "step": 46502 - }, - { - "epoch": 24.251890482398956, - "grad_norm": 1.4910718202590942, - "learning_rate": 5.378291457286433e-05, - "loss": 5.0523, - "step": 46503 - }, - { - "epoch": 24.252411994784875, - "grad_norm": 1.5374865531921387, - "learning_rate": 5.37819095477387e-05, - "loss": 5.0909, - "step": 46504 - }, - { - "epoch": 24.252933507170795, - "grad_norm": 1.4775243997573853, - "learning_rate": 5.3780904522613063e-05, - "loss": 5.1644, - "step": 46505 - }, - { - "epoch": 24.253455019556714, - "grad_norm": 1.6279429197311401, - "learning_rate": 5.377989949748744e-05, - "loss": 5.4946, - "step": 46506 - }, - { - "epoch": 24.253976531942634, - "grad_norm": 1.4730029106140137, - "learning_rate": 5.3778894472361806e-05, - "loss": 5.4121, - "step": 46507 - }, - { - "epoch": 24.254498044328553, - "grad_norm": 1.51401948928833, - "learning_rate": 5.377788944723618e-05, - "loss": 5.1287, - "step": 46508 - }, - { - "epoch": 24.255019556714473, - "grad_norm": 1.630916953086853, - "learning_rate": 5.3776884422110554e-05, - "loss": 4.7815, - "step": 46509 - }, - { - "epoch": 24.255541069100392, - "grad_norm": 1.7061710357666016, - "learning_rate": 5.377587939698493e-05, - "loss": 4.9695, - "step": 46510 - }, - { - "epoch": 24.25606258148631, - "grad_norm": 1.7191863059997559, - "learning_rate": 5.3774874371859297e-05, - "loss": 4.8089, - "step": 46511 - }, - { - "epoch": 24.25658409387223, - "grad_norm": 1.546297311782837, - "learning_rate": 5.3773869346733674e-05, - "loss": 5.2331, - "step": 46512 - }, - { - "epoch": 24.257105606258147, - "grad_norm": 1.5563076734542847, - "learning_rate": 5.377286432160804e-05, - "loss": 5.3063, - "step": 46513 - }, - { - "epoch": 24.257627118644066, - "grad_norm": 1.601966142654419, - "learning_rate": 5.3771859296482416e-05, - "loss": 4.7513, - "step": 46514 - }, - { - "epoch": 24.258148631029986, - "grad_norm": 1.479156494140625, - "learning_rate": 5.377085427135679e-05, - "loss": 5.3641, - "step": 46515 - }, - { - "epoch": 24.258670143415905, - "grad_norm": 1.8991185426712036, - "learning_rate": 5.3769849246231165e-05, - "loss": 5.3831, - "step": 46516 - }, - { - "epoch": 24.259191655801825, - "grad_norm": 1.662251353263855, - "learning_rate": 5.376884422110553e-05, - "loss": 4.8394, - "step": 46517 - }, - { - "epoch": 24.259713168187744, - "grad_norm": 1.5951800346374512, - "learning_rate": 5.3767839195979894e-05, - "loss": 5.3197, - "step": 46518 - }, - { - "epoch": 24.260234680573664, - "grad_norm": 1.5974843502044678, - "learning_rate": 5.376683417085427e-05, - "loss": 5.0499, - "step": 46519 - }, - { - "epoch": 24.260756192959583, - "grad_norm": 1.5448158979415894, - "learning_rate": 5.376582914572864e-05, - "loss": 5.2154, - "step": 46520 - }, - { - "epoch": 24.261277705345503, - "grad_norm": 1.5328088998794556, - "learning_rate": 5.376482412060302e-05, - "loss": 5.4032, - "step": 46521 - }, - { - "epoch": 24.261799217731422, - "grad_norm": 1.557132601737976, - "learning_rate": 5.3763819095477385e-05, - "loss": 5.2291, - "step": 46522 - }, - { - "epoch": 24.26232073011734, - "grad_norm": 1.569765329360962, - "learning_rate": 5.376281407035176e-05, - "loss": 5.0498, - "step": 46523 - }, - { - "epoch": 24.26284224250326, - "grad_norm": 1.536246418952942, - "learning_rate": 5.376180904522613e-05, - "loss": 4.6585, - "step": 46524 - }, - { - "epoch": 24.263363754889177, - "grad_norm": 1.5787760019302368, - "learning_rate": 5.3760804020100505e-05, - "loss": 5.4537, - "step": 46525 - }, - { - "epoch": 24.263885267275096, - "grad_norm": 1.798959493637085, - "learning_rate": 5.3759798994974876e-05, - "loss": 5.227, - "step": 46526 - }, - { - "epoch": 24.264406779661016, - "grad_norm": 1.5277239084243774, - "learning_rate": 5.3758793969849254e-05, - "loss": 5.5021, - "step": 46527 - }, - { - "epoch": 24.264928292046935, - "grad_norm": 1.596158742904663, - "learning_rate": 5.375778894472362e-05, - "loss": 4.7899, - "step": 46528 - }, - { - "epoch": 24.265449804432855, - "grad_norm": 1.546547770500183, - "learning_rate": 5.3756783919597996e-05, - "loss": 4.9555, - "step": 46529 - }, - { - "epoch": 24.265971316818774, - "grad_norm": 1.5198386907577515, - "learning_rate": 5.375577889447237e-05, - "loss": 5.18, - "step": 46530 - }, - { - "epoch": 24.266492829204694, - "grad_norm": 1.5776407718658447, - "learning_rate": 5.3754773869346745e-05, - "loss": 5.1132, - "step": 46531 - }, - { - "epoch": 24.267014341590613, - "grad_norm": 1.5499372482299805, - "learning_rate": 5.375376884422111e-05, - "loss": 5.4776, - "step": 46532 - }, - { - "epoch": 24.267535853976533, - "grad_norm": 1.4727933406829834, - "learning_rate": 5.375276381909547e-05, - "loss": 5.1737, - "step": 46533 - }, - { - "epoch": 24.268057366362452, - "grad_norm": 1.5558862686157227, - "learning_rate": 5.375175879396985e-05, - "loss": 4.9744, - "step": 46534 - }, - { - "epoch": 24.26857887874837, - "grad_norm": 1.5459965467453003, - "learning_rate": 5.375075376884422e-05, - "loss": 5.5401, - "step": 46535 - }, - { - "epoch": 24.26910039113429, - "grad_norm": 1.41295325756073, - "learning_rate": 5.37497487437186e-05, - "loss": 5.1173, - "step": 46536 - }, - { - "epoch": 24.269621903520207, - "grad_norm": 1.6529011726379395, - "learning_rate": 5.3748743718592964e-05, - "loss": 5.5341, - "step": 46537 - }, - { - "epoch": 24.270143415906126, - "grad_norm": 1.530126929283142, - "learning_rate": 5.374773869346734e-05, - "loss": 4.4887, - "step": 46538 - }, - { - "epoch": 24.270664928292046, - "grad_norm": 1.585639238357544, - "learning_rate": 5.3746733668341706e-05, - "loss": 4.7209, - "step": 46539 - }, - { - "epoch": 24.271186440677965, - "grad_norm": 1.591572880744934, - "learning_rate": 5.3745728643216084e-05, - "loss": 4.658, - "step": 46540 - }, - { - "epoch": 24.271707953063885, - "grad_norm": 1.478265643119812, - "learning_rate": 5.3744723618090455e-05, - "loss": 4.9451, - "step": 46541 - }, - { - "epoch": 24.272229465449804, - "grad_norm": 1.565230369567871, - "learning_rate": 5.374371859296483e-05, - "loss": 5.2774, - "step": 46542 - }, - { - "epoch": 24.272750977835724, - "grad_norm": 1.4534052610397339, - "learning_rate": 5.37427135678392e-05, - "loss": 5.164, - "step": 46543 - }, - { - "epoch": 24.273272490221643, - "grad_norm": 1.60517418384552, - "learning_rate": 5.3741708542713575e-05, - "loss": 5.0051, - "step": 46544 - }, - { - "epoch": 24.273794002607563, - "grad_norm": 1.5081274509429932, - "learning_rate": 5.374070351758794e-05, - "loss": 4.9418, - "step": 46545 - }, - { - "epoch": 24.274315514993482, - "grad_norm": 1.5396013259887695, - "learning_rate": 5.373969849246231e-05, - "loss": 5.3833, - "step": 46546 - }, - { - "epoch": 24.2748370273794, - "grad_norm": 1.5851737260818481, - "learning_rate": 5.373869346733669e-05, - "loss": 5.0194, - "step": 46547 - }, - { - "epoch": 24.27535853976532, - "grad_norm": 1.6919569969177246, - "learning_rate": 5.373768844221105e-05, - "loss": 5.1377, - "step": 46548 - }, - { - "epoch": 24.275880052151237, - "grad_norm": 1.5245801210403442, - "learning_rate": 5.373668341708543e-05, - "loss": 5.4671, - "step": 46549 - }, - { - "epoch": 24.276401564537156, - "grad_norm": 1.650509238243103, - "learning_rate": 5.37356783919598e-05, - "loss": 5.125, - "step": 46550 - }, - { - "epoch": 24.276923076923076, - "grad_norm": 1.6186389923095703, - "learning_rate": 5.373467336683418e-05, - "loss": 5.0391, - "step": 46551 - }, - { - "epoch": 24.277444589308995, - "grad_norm": 1.5488924980163574, - "learning_rate": 5.3733668341708544e-05, - "loss": 5.4188, - "step": 46552 - }, - { - "epoch": 24.277966101694915, - "grad_norm": 1.623411774635315, - "learning_rate": 5.373266331658292e-05, - "loss": 5.4039, - "step": 46553 - }, - { - "epoch": 24.278487614080834, - "grad_norm": 1.573259949684143, - "learning_rate": 5.3731658291457286e-05, - "loss": 5.5853, - "step": 46554 - }, - { - "epoch": 24.279009126466754, - "grad_norm": 1.6182838678359985, - "learning_rate": 5.3730653266331663e-05, - "loss": 5.2829, - "step": 46555 - }, - { - "epoch": 24.279530638852673, - "grad_norm": 1.6129504442214966, - "learning_rate": 5.3729648241206034e-05, - "loss": 5.3202, - "step": 46556 - }, - { - "epoch": 24.280052151238593, - "grad_norm": 1.6232938766479492, - "learning_rate": 5.372864321608041e-05, - "loss": 5.0764, - "step": 46557 - }, - { - "epoch": 24.280573663624512, - "grad_norm": 1.7150017023086548, - "learning_rate": 5.3727638190954777e-05, - "loss": 4.3729, - "step": 46558 - }, - { - "epoch": 24.28109517601043, - "grad_norm": 1.663986325263977, - "learning_rate": 5.372663316582914e-05, - "loss": 5.3287, - "step": 46559 - }, - { - "epoch": 24.28161668839635, - "grad_norm": 1.5687485933303833, - "learning_rate": 5.372562814070352e-05, - "loss": 5.5443, - "step": 46560 - }, - { - "epoch": 24.282138200782267, - "grad_norm": 1.5422264337539673, - "learning_rate": 5.372462311557789e-05, - "loss": 4.3648, - "step": 46561 - }, - { - "epoch": 24.282659713168186, - "grad_norm": 1.5882489681243896, - "learning_rate": 5.372361809045227e-05, - "loss": 4.4674, - "step": 46562 - }, - { - "epoch": 24.283181225554106, - "grad_norm": 1.628889799118042, - "learning_rate": 5.372261306532663e-05, - "loss": 4.7572, - "step": 46563 - }, - { - "epoch": 24.283702737940025, - "grad_norm": 1.552642583847046, - "learning_rate": 5.372160804020101e-05, - "loss": 5.3804, - "step": 46564 - }, - { - "epoch": 24.284224250325945, - "grad_norm": 1.535701036453247, - "learning_rate": 5.3720603015075374e-05, - "loss": 5.547, - "step": 46565 - }, - { - "epoch": 24.284745762711864, - "grad_norm": 1.5851973295211792, - "learning_rate": 5.371959798994975e-05, - "loss": 5.3646, - "step": 46566 - }, - { - "epoch": 24.285267275097784, - "grad_norm": 1.5524189472198486, - "learning_rate": 5.371859296482412e-05, - "loss": 5.4781, - "step": 46567 - }, - { - "epoch": 24.285788787483703, - "grad_norm": 1.6161832809448242, - "learning_rate": 5.37175879396985e-05, - "loss": 5.0334, - "step": 46568 - }, - { - "epoch": 24.286310299869623, - "grad_norm": 1.5426084995269775, - "learning_rate": 5.3716582914572865e-05, - "loss": 5.3827, - "step": 46569 - }, - { - "epoch": 24.286831812255542, - "grad_norm": 1.6541011333465576, - "learning_rate": 5.371557788944724e-05, - "loss": 4.8641, - "step": 46570 - }, - { - "epoch": 24.28735332464146, - "grad_norm": 1.6597360372543335, - "learning_rate": 5.3714572864321614e-05, - "loss": 5.084, - "step": 46571 - }, - { - "epoch": 24.28787483702738, - "grad_norm": 1.5408251285552979, - "learning_rate": 5.371356783919598e-05, - "loss": 5.1376, - "step": 46572 - }, - { - "epoch": 24.288396349413297, - "grad_norm": 1.583795428276062, - "learning_rate": 5.3712562814070356e-05, - "loss": 5.1934, - "step": 46573 - }, - { - "epoch": 24.288917861799217, - "grad_norm": 1.534959316253662, - "learning_rate": 5.371155778894472e-05, - "loss": 5.6932, - "step": 46574 - }, - { - "epoch": 24.289439374185136, - "grad_norm": 1.569667100906372, - "learning_rate": 5.37105527638191e-05, - "loss": 5.257, - "step": 46575 - }, - { - "epoch": 24.289960886571055, - "grad_norm": 1.5968492031097412, - "learning_rate": 5.370954773869347e-05, - "loss": 4.7974, - "step": 46576 - }, - { - "epoch": 24.290482398956975, - "grad_norm": 1.7246915102005005, - "learning_rate": 5.370854271356785e-05, - "loss": 4.9212, - "step": 46577 - }, - { - "epoch": 24.291003911342894, - "grad_norm": 1.6612392663955688, - "learning_rate": 5.370753768844221e-05, - "loss": 5.1836, - "step": 46578 - }, - { - "epoch": 24.291525423728814, - "grad_norm": 1.5125454664230347, - "learning_rate": 5.370653266331659e-05, - "loss": 5.4666, - "step": 46579 - }, - { - "epoch": 24.292046936114733, - "grad_norm": 1.5160938501358032, - "learning_rate": 5.370552763819095e-05, - "loss": 5.2386, - "step": 46580 - }, - { - "epoch": 24.292568448500653, - "grad_norm": 1.6713393926620483, - "learning_rate": 5.370452261306533e-05, - "loss": 5.1998, - "step": 46581 - }, - { - "epoch": 24.293089960886572, - "grad_norm": 1.5227710008621216, - "learning_rate": 5.37035175879397e-05, - "loss": 4.9512, - "step": 46582 - }, - { - "epoch": 24.29361147327249, - "grad_norm": 1.5369834899902344, - "learning_rate": 5.370251256281408e-05, - "loss": 5.2184, - "step": 46583 - }, - { - "epoch": 24.294132985658408, - "grad_norm": 1.5856119394302368, - "learning_rate": 5.3701507537688444e-05, - "loss": 5.275, - "step": 46584 - }, - { - "epoch": 24.294654498044327, - "grad_norm": 1.6110239028930664, - "learning_rate": 5.370050251256281e-05, - "loss": 5.4654, - "step": 46585 - }, - { - "epoch": 24.295176010430247, - "grad_norm": 1.589174509048462, - "learning_rate": 5.3699497487437186e-05, - "loss": 5.0054, - "step": 46586 - }, - { - "epoch": 24.295697522816166, - "grad_norm": 1.4716191291809082, - "learning_rate": 5.369849246231156e-05, - "loss": 5.4205, - "step": 46587 - }, - { - "epoch": 24.296219035202085, - "grad_norm": 1.5405246019363403, - "learning_rate": 5.3697487437185935e-05, - "loss": 5.5069, - "step": 46588 - }, - { - "epoch": 24.296740547588005, - "grad_norm": 1.6221007108688354, - "learning_rate": 5.36964824120603e-05, - "loss": 4.8461, - "step": 46589 - }, - { - "epoch": 24.297262059973924, - "grad_norm": 1.5237624645233154, - "learning_rate": 5.369547738693468e-05, - "loss": 4.2474, - "step": 46590 - }, - { - "epoch": 24.297783572359844, - "grad_norm": 1.6060062646865845, - "learning_rate": 5.369447236180905e-05, - "loss": 5.299, - "step": 46591 - }, - { - "epoch": 24.298305084745763, - "grad_norm": 1.5928664207458496, - "learning_rate": 5.3693467336683426e-05, - "loss": 5.1822, - "step": 46592 - }, - { - "epoch": 24.298826597131683, - "grad_norm": 1.6345442533493042, - "learning_rate": 5.369246231155779e-05, - "loss": 4.9443, - "step": 46593 - }, - { - "epoch": 24.299348109517602, - "grad_norm": 1.4771085977554321, - "learning_rate": 5.369145728643217e-05, - "loss": 5.2035, - "step": 46594 - }, - { - "epoch": 24.29986962190352, - "grad_norm": 1.6538231372833252, - "learning_rate": 5.369045226130653e-05, - "loss": 4.7779, - "step": 46595 - }, - { - "epoch": 24.300391134289438, - "grad_norm": 1.5676822662353516, - "learning_rate": 5.368944723618091e-05, - "loss": 5.1694, - "step": 46596 - }, - { - "epoch": 24.300912646675357, - "grad_norm": 1.4690879583358765, - "learning_rate": 5.368844221105528e-05, - "loss": 5.0034, - "step": 46597 - }, - { - "epoch": 24.301434159061277, - "grad_norm": 1.5651763677597046, - "learning_rate": 5.3687437185929646e-05, - "loss": 5.2618, - "step": 46598 - }, - { - "epoch": 24.301955671447196, - "grad_norm": 1.5007787942886353, - "learning_rate": 5.3686432160804024e-05, - "loss": 5.2824, - "step": 46599 - }, - { - "epoch": 24.302477183833116, - "grad_norm": 1.588273525238037, - "learning_rate": 5.368542713567839e-05, - "loss": 5.2891, - "step": 46600 - }, - { - "epoch": 24.302998696219035, - "grad_norm": 1.5493247509002686, - "learning_rate": 5.3684422110552766e-05, - "loss": 4.4648, - "step": 46601 - }, - { - "epoch": 24.303520208604954, - "grad_norm": 1.62213933467865, - "learning_rate": 5.368341708542714e-05, - "loss": 4.064, - "step": 46602 - }, - { - "epoch": 24.304041720990874, - "grad_norm": 1.5776082277297974, - "learning_rate": 5.3682412060301515e-05, - "loss": 5.3707, - "step": 46603 - }, - { - "epoch": 24.304563233376793, - "grad_norm": 1.6238863468170166, - "learning_rate": 5.368140703517588e-05, - "loss": 5.1448, - "step": 46604 - }, - { - "epoch": 24.305084745762713, - "grad_norm": 1.517470121383667, - "learning_rate": 5.368040201005026e-05, - "loss": 4.9888, - "step": 46605 - }, - { - "epoch": 24.305606258148632, - "grad_norm": 1.5910130739212036, - "learning_rate": 5.367939698492462e-05, - "loss": 5.2658, - "step": 46606 - }, - { - "epoch": 24.30612777053455, - "grad_norm": 1.6356123685836792, - "learning_rate": 5.3678391959799e-05, - "loss": 5.0292, - "step": 46607 - }, - { - "epoch": 24.306649282920468, - "grad_norm": 1.5970298051834106, - "learning_rate": 5.367738693467337e-05, - "loss": 5.6171, - "step": 46608 - }, - { - "epoch": 24.307170795306387, - "grad_norm": 1.726518154144287, - "learning_rate": 5.367638190954775e-05, - "loss": 5.0374, - "step": 46609 - }, - { - "epoch": 24.307692307692307, - "grad_norm": 1.4900022745132446, - "learning_rate": 5.367537688442211e-05, - "loss": 5.4899, - "step": 46610 - }, - { - "epoch": 24.308213820078226, - "grad_norm": 1.4991257190704346, - "learning_rate": 5.3674371859296476e-05, - "loss": 5.125, - "step": 46611 - }, - { - "epoch": 24.308735332464146, - "grad_norm": 1.5483914613723755, - "learning_rate": 5.3673366834170854e-05, - "loss": 4.646, - "step": 46612 - }, - { - "epoch": 24.309256844850065, - "grad_norm": 1.6165622472763062, - "learning_rate": 5.3672361809045225e-05, - "loss": 5.1141, - "step": 46613 - }, - { - "epoch": 24.309778357235984, - "grad_norm": 1.48163902759552, - "learning_rate": 5.36713567839196e-05, - "loss": 5.1583, - "step": 46614 - }, - { - "epoch": 24.310299869621904, - "grad_norm": 1.6403732299804688, - "learning_rate": 5.367035175879397e-05, - "loss": 5.4645, - "step": 46615 - }, - { - "epoch": 24.310821382007823, - "grad_norm": 1.5265711545944214, - "learning_rate": 5.3669346733668345e-05, - "loss": 5.3366, - "step": 46616 - }, - { - "epoch": 24.311342894393743, - "grad_norm": 1.5642144680023193, - "learning_rate": 5.3668341708542716e-05, - "loss": 5.106, - "step": 46617 - }, - { - "epoch": 24.311864406779662, - "grad_norm": 1.5955440998077393, - "learning_rate": 5.3667336683417094e-05, - "loss": 5.2983, - "step": 46618 - }, - { - "epoch": 24.312385919165582, - "grad_norm": 1.602898120880127, - "learning_rate": 5.366633165829146e-05, - "loss": 5.3281, - "step": 46619 - }, - { - "epoch": 24.312907431551498, - "grad_norm": 1.6177289485931396, - "learning_rate": 5.3665326633165836e-05, - "loss": 5.1293, - "step": 46620 - }, - { - "epoch": 24.313428943937417, - "grad_norm": 2.3731181621551514, - "learning_rate": 5.36643216080402e-05, - "loss": 4.4469, - "step": 46621 - }, - { - "epoch": 24.313950456323337, - "grad_norm": 1.5811498165130615, - "learning_rate": 5.366331658291458e-05, - "loss": 5.0165, - "step": 46622 - }, - { - "epoch": 24.314471968709256, - "grad_norm": 1.5065430402755737, - "learning_rate": 5.366231155778895e-05, - "loss": 5.3461, - "step": 46623 - }, - { - "epoch": 24.314993481095176, - "grad_norm": 1.5695322751998901, - "learning_rate": 5.366130653266333e-05, - "loss": 5.3073, - "step": 46624 - }, - { - "epoch": 24.315514993481095, - "grad_norm": 1.6277281045913696, - "learning_rate": 5.366030150753769e-05, - "loss": 5.0665, - "step": 46625 - }, - { - "epoch": 24.316036505867014, - "grad_norm": 1.5736382007598877, - "learning_rate": 5.3659296482412056e-05, - "loss": 5.0317, - "step": 46626 - }, - { - "epoch": 24.316558018252934, - "grad_norm": 1.6100175380706787, - "learning_rate": 5.365829145728643e-05, - "loss": 5.1464, - "step": 46627 - }, - { - "epoch": 24.317079530638853, - "grad_norm": 1.522641897201538, - "learning_rate": 5.3657286432160804e-05, - "loss": 5.4654, - "step": 46628 - }, - { - "epoch": 24.317601043024773, - "grad_norm": 1.547913908958435, - "learning_rate": 5.365628140703518e-05, - "loss": 5.3363, - "step": 46629 - }, - { - "epoch": 24.318122555410692, - "grad_norm": 1.5869567394256592, - "learning_rate": 5.3655276381909547e-05, - "loss": 5.2518, - "step": 46630 - }, - { - "epoch": 24.318644067796612, - "grad_norm": 1.5204222202301025, - "learning_rate": 5.3654271356783924e-05, - "loss": 5.4275, - "step": 46631 - }, - { - "epoch": 24.319165580182528, - "grad_norm": 1.5300172567367554, - "learning_rate": 5.365326633165829e-05, - "loss": 5.4153, - "step": 46632 - }, - { - "epoch": 24.319687092568447, - "grad_norm": 1.5143780708312988, - "learning_rate": 5.3652261306532666e-05, - "loss": 4.8942, - "step": 46633 - }, - { - "epoch": 24.320208604954367, - "grad_norm": 1.5416815280914307, - "learning_rate": 5.365125628140704e-05, - "loss": 5.5277, - "step": 46634 - }, - { - "epoch": 24.320730117340286, - "grad_norm": 1.4537303447723389, - "learning_rate": 5.3650251256281415e-05, - "loss": 5.6494, - "step": 46635 - }, - { - "epoch": 24.321251629726206, - "grad_norm": 1.4255781173706055, - "learning_rate": 5.364924623115578e-05, - "loss": 5.7643, - "step": 46636 - }, - { - "epoch": 24.321773142112125, - "grad_norm": 1.5455092191696167, - "learning_rate": 5.364824120603016e-05, - "loss": 4.8143, - "step": 46637 - }, - { - "epoch": 24.322294654498045, - "grad_norm": 1.591746211051941, - "learning_rate": 5.364723618090453e-05, - "loss": 5.2743, - "step": 46638 - }, - { - "epoch": 24.322816166883964, - "grad_norm": 1.55540132522583, - "learning_rate": 5.364623115577889e-05, - "loss": 4.9435, - "step": 46639 - }, - { - "epoch": 24.323337679269883, - "grad_norm": 1.6163493394851685, - "learning_rate": 5.364522613065327e-05, - "loss": 5.2061, - "step": 46640 - }, - { - "epoch": 24.323859191655803, - "grad_norm": 1.462853193283081, - "learning_rate": 5.3644221105527635e-05, - "loss": 5.3553, - "step": 46641 - }, - { - "epoch": 24.324380704041722, - "grad_norm": 1.5892717838287354, - "learning_rate": 5.364321608040201e-05, - "loss": 4.9348, - "step": 46642 - }, - { - "epoch": 24.324902216427642, - "grad_norm": 1.5374174118041992, - "learning_rate": 5.3642211055276384e-05, - "loss": 5.2828, - "step": 46643 - }, - { - "epoch": 24.325423728813558, - "grad_norm": 1.5996121168136597, - "learning_rate": 5.364120603015076e-05, - "loss": 5.1828, - "step": 46644 - }, - { - "epoch": 24.325945241199477, - "grad_norm": 1.515793800354004, - "learning_rate": 5.3640201005025126e-05, - "loss": 5.3633, - "step": 46645 - }, - { - "epoch": 24.326466753585397, - "grad_norm": 1.637360692024231, - "learning_rate": 5.3639195979899504e-05, - "loss": 5.3676, - "step": 46646 - }, - { - "epoch": 24.326988265971316, - "grad_norm": 1.577966570854187, - "learning_rate": 5.363819095477387e-05, - "loss": 5.0286, - "step": 46647 - }, - { - "epoch": 24.327509778357236, - "grad_norm": 1.678457260131836, - "learning_rate": 5.3637185929648246e-05, - "loss": 4.3423, - "step": 46648 - }, - { - "epoch": 24.328031290743155, - "grad_norm": 1.600327968597412, - "learning_rate": 5.363618090452262e-05, - "loss": 5.3261, - "step": 46649 - }, - { - "epoch": 24.328552803129075, - "grad_norm": 1.6013729572296143, - "learning_rate": 5.3635175879396995e-05, - "loss": 5.2041, - "step": 46650 - }, - { - "epoch": 24.329074315514994, - "grad_norm": 1.5849155187606812, - "learning_rate": 5.363417085427136e-05, - "loss": 5.3744, - "step": 46651 - }, - { - "epoch": 24.329595827900913, - "grad_norm": 1.6084797382354736, - "learning_rate": 5.363316582914572e-05, - "loss": 5.318, - "step": 46652 - }, - { - "epoch": 24.330117340286833, - "grad_norm": 1.536217451095581, - "learning_rate": 5.36321608040201e-05, - "loss": 5.5239, - "step": 46653 - }, - { - "epoch": 24.330638852672752, - "grad_norm": 1.59565269947052, - "learning_rate": 5.363115577889447e-05, - "loss": 5.2355, - "step": 46654 - }, - { - "epoch": 24.331160365058672, - "grad_norm": 1.504576325416565, - "learning_rate": 5.363015075376885e-05, - "loss": 5.3442, - "step": 46655 - }, - { - "epoch": 24.331681877444588, - "grad_norm": 1.6171420812606812, - "learning_rate": 5.3629145728643214e-05, - "loss": 4.9218, - "step": 46656 - }, - { - "epoch": 24.332203389830507, - "grad_norm": 1.454347014427185, - "learning_rate": 5.362814070351759e-05, - "loss": 5.6167, - "step": 46657 - }, - { - "epoch": 24.332724902216427, - "grad_norm": 1.7542415857315063, - "learning_rate": 5.362713567839196e-05, - "loss": 5.0358, - "step": 46658 - }, - { - "epoch": 24.333246414602346, - "grad_norm": 1.6092952489852905, - "learning_rate": 5.362613065326634e-05, - "loss": 5.2362, - "step": 46659 - }, - { - "epoch": 24.333767926988266, - "grad_norm": 1.4792824983596802, - "learning_rate": 5.3625125628140705e-05, - "loss": 5.07, - "step": 46660 - }, - { - "epoch": 24.334289439374185, - "grad_norm": 1.492871880531311, - "learning_rate": 5.362412060301508e-05, - "loss": 5.0638, - "step": 46661 - }, - { - "epoch": 24.334810951760105, - "grad_norm": 1.620306372642517, - "learning_rate": 5.362311557788945e-05, - "loss": 4.8931, - "step": 46662 - }, - { - "epoch": 24.335332464146024, - "grad_norm": 1.4903837442398071, - "learning_rate": 5.3622110552763825e-05, - "loss": 5.5703, - "step": 46663 - }, - { - "epoch": 24.335853976531943, - "grad_norm": 1.5272018909454346, - "learning_rate": 5.3621105527638196e-05, - "loss": 5.2872, - "step": 46664 - }, - { - "epoch": 24.336375488917863, - "grad_norm": 1.5871680974960327, - "learning_rate": 5.362010050251256e-05, - "loss": 5.4374, - "step": 46665 - }, - { - "epoch": 24.336897001303782, - "grad_norm": 1.5554306507110596, - "learning_rate": 5.361909547738694e-05, - "loss": 5.5505, - "step": 46666 - }, - { - "epoch": 24.3374185136897, - "grad_norm": 1.4673306941986084, - "learning_rate": 5.36180904522613e-05, - "loss": 5.7145, - "step": 46667 - }, - { - "epoch": 24.337940026075618, - "grad_norm": 1.4878606796264648, - "learning_rate": 5.361708542713568e-05, - "loss": 5.4413, - "step": 46668 - }, - { - "epoch": 24.338461538461537, - "grad_norm": 1.541118860244751, - "learning_rate": 5.361608040201005e-05, - "loss": 5.0377, - "step": 46669 - }, - { - "epoch": 24.338983050847457, - "grad_norm": 1.6206892728805542, - "learning_rate": 5.361507537688443e-05, - "loss": 5.2933, - "step": 46670 - }, - { - "epoch": 24.339504563233376, - "grad_norm": 1.446699857711792, - "learning_rate": 5.3614070351758794e-05, - "loss": 5.3192, - "step": 46671 - }, - { - "epoch": 24.340026075619296, - "grad_norm": 1.4445785284042358, - "learning_rate": 5.361306532663317e-05, - "loss": 5.5648, - "step": 46672 - }, - { - "epoch": 24.340547588005215, - "grad_norm": 1.6103436946868896, - "learning_rate": 5.3612060301507536e-05, - "loss": 4.9791, - "step": 46673 - }, - { - "epoch": 24.341069100391135, - "grad_norm": 1.6132456064224243, - "learning_rate": 5.3611055276381913e-05, - "loss": 5.0591, - "step": 46674 - }, - { - "epoch": 24.341590612777054, - "grad_norm": 1.5596740245819092, - "learning_rate": 5.3610050251256285e-05, - "loss": 5.0358, - "step": 46675 - }, - { - "epoch": 24.342112125162974, - "grad_norm": 1.5720112323760986, - "learning_rate": 5.360904522613066e-05, - "loss": 4.9, - "step": 46676 - }, - { - "epoch": 24.342633637548893, - "grad_norm": 1.4962886571884155, - "learning_rate": 5.360804020100503e-05, - "loss": 5.2838, - "step": 46677 - }, - { - "epoch": 24.343155149934812, - "grad_norm": 1.487217903137207, - "learning_rate": 5.360703517587939e-05, - "loss": 5.8337, - "step": 46678 - }, - { - "epoch": 24.343676662320732, - "grad_norm": 1.4660305976867676, - "learning_rate": 5.360603015075377e-05, - "loss": 5.3055, - "step": 46679 - }, - { - "epoch": 24.344198174706648, - "grad_norm": 1.5468034744262695, - "learning_rate": 5.360502512562814e-05, - "loss": 5.5191, - "step": 46680 - }, - { - "epoch": 24.344719687092567, - "grad_norm": 1.5067448616027832, - "learning_rate": 5.360402010050252e-05, - "loss": 5.4391, - "step": 46681 - }, - { - "epoch": 24.345241199478487, - "grad_norm": 1.5891979932785034, - "learning_rate": 5.360301507537688e-05, - "loss": 4.8512, - "step": 46682 - }, - { - "epoch": 24.345762711864406, - "grad_norm": 1.4967737197875977, - "learning_rate": 5.360201005025126e-05, - "loss": 5.8709, - "step": 46683 - }, - { - "epoch": 24.346284224250326, - "grad_norm": 1.5120757818222046, - "learning_rate": 5.360100502512563e-05, - "loss": 5.2026, - "step": 46684 - }, - { - "epoch": 24.346805736636245, - "grad_norm": 1.6322177648544312, - "learning_rate": 5.360000000000001e-05, - "loss": 4.5448, - "step": 46685 - }, - { - "epoch": 24.347327249022165, - "grad_norm": 1.665804147720337, - "learning_rate": 5.359899497487437e-05, - "loss": 5.0943, - "step": 46686 - }, - { - "epoch": 24.347848761408084, - "grad_norm": 1.5306041240692139, - "learning_rate": 5.359798994974875e-05, - "loss": 5.3209, - "step": 46687 - }, - { - "epoch": 24.348370273794004, - "grad_norm": 1.536742091178894, - "learning_rate": 5.3596984924623115e-05, - "loss": 5.2572, - "step": 46688 - }, - { - "epoch": 24.348891786179923, - "grad_norm": 1.5175718069076538, - "learning_rate": 5.359597989949749e-05, - "loss": 5.0965, - "step": 46689 - }, - { - "epoch": 24.349413298565842, - "grad_norm": 1.5491081476211548, - "learning_rate": 5.3594974874371864e-05, - "loss": 5.6037, - "step": 46690 - }, - { - "epoch": 24.34993481095176, - "grad_norm": 1.6356558799743652, - "learning_rate": 5.359396984924623e-05, - "loss": 4.9422, - "step": 46691 - }, - { - "epoch": 24.350456323337678, - "grad_norm": 1.551336407661438, - "learning_rate": 5.3592964824120606e-05, - "loss": 4.9636, - "step": 46692 - }, - { - "epoch": 24.350977835723597, - "grad_norm": 1.4739060401916504, - "learning_rate": 5.359195979899497e-05, - "loss": 4.7283, - "step": 46693 - }, - { - "epoch": 24.351499348109517, - "grad_norm": 1.6668118238449097, - "learning_rate": 5.359095477386935e-05, - "loss": 5.2958, - "step": 46694 - }, - { - "epoch": 24.352020860495436, - "grad_norm": 1.5188933610916138, - "learning_rate": 5.358994974874372e-05, - "loss": 5.5809, - "step": 46695 - }, - { - "epoch": 24.352542372881356, - "grad_norm": 1.5982838869094849, - "learning_rate": 5.35889447236181e-05, - "loss": 5.3575, - "step": 46696 - }, - { - "epoch": 24.353063885267275, - "grad_norm": 1.6466450691223145, - "learning_rate": 5.358793969849246e-05, - "loss": 5.3003, - "step": 46697 - }, - { - "epoch": 24.353585397653195, - "grad_norm": 1.6770554780960083, - "learning_rate": 5.358693467336684e-05, - "loss": 5.5338, - "step": 46698 - }, - { - "epoch": 24.354106910039114, - "grad_norm": 1.539605736732483, - "learning_rate": 5.35859296482412e-05, - "loss": 5.3957, - "step": 46699 - }, - { - "epoch": 24.354628422425034, - "grad_norm": 1.5029267072677612, - "learning_rate": 5.358492462311558e-05, - "loss": 4.8365, - "step": 46700 - }, - { - "epoch": 24.355149934810953, - "grad_norm": 1.5314891338348389, - "learning_rate": 5.358391959798995e-05, - "loss": 5.1098, - "step": 46701 - }, - { - "epoch": 24.355671447196872, - "grad_norm": 1.4592232704162598, - "learning_rate": 5.358291457286433e-05, - "loss": 5.0325, - "step": 46702 - }, - { - "epoch": 24.35619295958279, - "grad_norm": 1.5268797874450684, - "learning_rate": 5.3581909547738694e-05, - "loss": 5.1898, - "step": 46703 - }, - { - "epoch": 24.356714471968708, - "grad_norm": 1.5811846256256104, - "learning_rate": 5.3580904522613065e-05, - "loss": 4.6398, - "step": 46704 - }, - { - "epoch": 24.357235984354627, - "grad_norm": 1.4974379539489746, - "learning_rate": 5.357989949748744e-05, - "loss": 5.4097, - "step": 46705 - }, - { - "epoch": 24.357757496740547, - "grad_norm": 1.5028765201568604, - "learning_rate": 5.357889447236181e-05, - "loss": 4.9995, - "step": 46706 - }, - { - "epoch": 24.358279009126466, - "grad_norm": 1.5281312465667725, - "learning_rate": 5.3577889447236185e-05, - "loss": 5.5609, - "step": 46707 - }, - { - "epoch": 24.358800521512386, - "grad_norm": 1.6143425703048706, - "learning_rate": 5.357688442211055e-05, - "loss": 5.4462, - "step": 46708 - }, - { - "epoch": 24.359322033898305, - "grad_norm": 1.517938494682312, - "learning_rate": 5.357587939698493e-05, - "loss": 5.5515, - "step": 46709 - }, - { - "epoch": 24.359843546284225, - "grad_norm": 1.488358974456787, - "learning_rate": 5.35748743718593e-05, - "loss": 5.7042, - "step": 46710 - }, - { - "epoch": 24.360365058670144, - "grad_norm": 1.5919100046157837, - "learning_rate": 5.3573869346733676e-05, - "loss": 5.364, - "step": 46711 - }, - { - "epoch": 24.360886571056064, - "grad_norm": 1.5082749128341675, - "learning_rate": 5.357286432160804e-05, - "loss": 5.7376, - "step": 46712 - }, - { - "epoch": 24.361408083441983, - "grad_norm": 1.468997836112976, - "learning_rate": 5.357185929648242e-05, - "loss": 5.2231, - "step": 46713 - }, - { - "epoch": 24.361929595827903, - "grad_norm": 1.6588627099990845, - "learning_rate": 5.357085427135678e-05, - "loss": 4.8492, - "step": 46714 - }, - { - "epoch": 24.36245110821382, - "grad_norm": 1.5386675596237183, - "learning_rate": 5.356984924623116e-05, - "loss": 5.6983, - "step": 46715 - }, - { - "epoch": 24.362972620599738, - "grad_norm": 1.9397474527359009, - "learning_rate": 5.356884422110553e-05, - "loss": 5.226, - "step": 46716 - }, - { - "epoch": 24.363494132985657, - "grad_norm": 1.6140294075012207, - "learning_rate": 5.356783919597991e-05, - "loss": 5.1547, - "step": 46717 - }, - { - "epoch": 24.364015645371577, - "grad_norm": 1.6452679634094238, - "learning_rate": 5.3566834170854274e-05, - "loss": 4.864, - "step": 46718 - }, - { - "epoch": 24.364537157757496, - "grad_norm": 1.611659288406372, - "learning_rate": 5.356582914572864e-05, - "loss": 4.9852, - "step": 46719 - }, - { - "epoch": 24.365058670143416, - "grad_norm": 1.5865225791931152, - "learning_rate": 5.3564824120603016e-05, - "loss": 4.9751, - "step": 46720 - }, - { - "epoch": 24.365580182529335, - "grad_norm": 1.5311130285263062, - "learning_rate": 5.356381909547739e-05, - "loss": 5.5341, - "step": 46721 - }, - { - "epoch": 24.366101694915255, - "grad_norm": 1.6892858743667603, - "learning_rate": 5.3562814070351765e-05, - "loss": 4.8011, - "step": 46722 - }, - { - "epoch": 24.366623207301174, - "grad_norm": 1.5774376392364502, - "learning_rate": 5.356180904522613e-05, - "loss": 4.9398, - "step": 46723 - }, - { - "epoch": 24.367144719687094, - "grad_norm": 1.680722951889038, - "learning_rate": 5.356080402010051e-05, - "loss": 5.0496, - "step": 46724 - }, - { - "epoch": 24.367666232073013, - "grad_norm": 1.5784188508987427, - "learning_rate": 5.355979899497488e-05, - "loss": 5.0779, - "step": 46725 - }, - { - "epoch": 24.368187744458933, - "grad_norm": 1.5683897733688354, - "learning_rate": 5.3558793969849256e-05, - "loss": 5.136, - "step": 46726 - }, - { - "epoch": 24.36870925684485, - "grad_norm": 1.6975905895233154, - "learning_rate": 5.355778894472362e-05, - "loss": 4.8463, - "step": 46727 - }, - { - "epoch": 24.369230769230768, - "grad_norm": 1.5479676723480225, - "learning_rate": 5.3556783919598e-05, - "loss": 5.259, - "step": 46728 - }, - { - "epoch": 24.369752281616687, - "grad_norm": 1.5467514991760254, - "learning_rate": 5.355577889447236e-05, - "loss": 5.3925, - "step": 46729 - }, - { - "epoch": 24.370273794002607, - "grad_norm": 1.4710688591003418, - "learning_rate": 5.355477386934674e-05, - "loss": 5.5907, - "step": 46730 - }, - { - "epoch": 24.370795306388526, - "grad_norm": 1.5912089347839355, - "learning_rate": 5.355376884422111e-05, - "loss": 5.2704, - "step": 46731 - }, - { - "epoch": 24.371316818774446, - "grad_norm": 1.6285303831100464, - "learning_rate": 5.3552763819095475e-05, - "loss": 5.2124, - "step": 46732 - }, - { - "epoch": 24.371838331160365, - "grad_norm": 1.572697639465332, - "learning_rate": 5.355175879396985e-05, - "loss": 5.1687, - "step": 46733 - }, - { - "epoch": 24.372359843546285, - "grad_norm": 1.6056311130523682, - "learning_rate": 5.355075376884422e-05, - "loss": 5.2608, - "step": 46734 - }, - { - "epoch": 24.372881355932204, - "grad_norm": 1.653411626815796, - "learning_rate": 5.3549748743718595e-05, - "loss": 4.6762, - "step": 46735 - }, - { - "epoch": 24.373402868318124, - "grad_norm": 1.5580683946609497, - "learning_rate": 5.3548743718592966e-05, - "loss": 5.5959, - "step": 46736 - }, - { - "epoch": 24.373924380704043, - "grad_norm": 1.6278222799301147, - "learning_rate": 5.3547738693467344e-05, - "loss": 5.4653, - "step": 46737 - }, - { - "epoch": 24.374445893089963, - "grad_norm": 1.4319900274276733, - "learning_rate": 5.354673366834171e-05, - "loss": 5.5572, - "step": 46738 - }, - { - "epoch": 24.37496740547588, - "grad_norm": 1.564563512802124, - "learning_rate": 5.3545728643216086e-05, - "loss": 5.212, - "step": 46739 - }, - { - "epoch": 24.375488917861798, - "grad_norm": 1.5445950031280518, - "learning_rate": 5.354472361809045e-05, - "loss": 5.4503, - "step": 46740 - }, - { - "epoch": 24.376010430247717, - "grad_norm": 1.5167509317398071, - "learning_rate": 5.354371859296483e-05, - "loss": 5.4314, - "step": 46741 - }, - { - "epoch": 24.376531942633637, - "grad_norm": 1.472383737564087, - "learning_rate": 5.35427135678392e-05, - "loss": 5.4351, - "step": 46742 - }, - { - "epoch": 24.377053455019556, - "grad_norm": 1.536608338356018, - "learning_rate": 5.354170854271358e-05, - "loss": 5.0218, - "step": 46743 - }, - { - "epoch": 24.377574967405476, - "grad_norm": 1.5551252365112305, - "learning_rate": 5.354070351758794e-05, - "loss": 5.4413, - "step": 46744 - }, - { - "epoch": 24.378096479791395, - "grad_norm": 1.5637702941894531, - "learning_rate": 5.353969849246231e-05, - "loss": 5.1231, - "step": 46745 - }, - { - "epoch": 24.378617992177315, - "grad_norm": 1.5864009857177734, - "learning_rate": 5.353869346733669e-05, - "loss": 5.0547, - "step": 46746 - }, - { - "epoch": 24.379139504563234, - "grad_norm": 1.7575881481170654, - "learning_rate": 5.3537688442211054e-05, - "loss": 5.3276, - "step": 46747 - }, - { - "epoch": 24.379661016949154, - "grad_norm": 1.548041582107544, - "learning_rate": 5.353668341708543e-05, - "loss": 4.6297, - "step": 46748 - }, - { - "epoch": 24.380182529335073, - "grad_norm": 1.608337640762329, - "learning_rate": 5.3535678391959797e-05, - "loss": 5.34, - "step": 46749 - }, - { - "epoch": 24.380704041720993, - "grad_norm": 1.4815932512283325, - "learning_rate": 5.3534673366834174e-05, - "loss": 5.3316, - "step": 46750 - }, - { - "epoch": 24.38122555410691, - "grad_norm": 1.62247633934021, - "learning_rate": 5.3533668341708545e-05, - "loss": 4.6363, - "step": 46751 - }, - { - "epoch": 24.381747066492828, - "grad_norm": 1.492600679397583, - "learning_rate": 5.353266331658292e-05, - "loss": 5.578, - "step": 46752 - }, - { - "epoch": 24.382268578878747, - "grad_norm": 1.534112572669983, - "learning_rate": 5.353165829145729e-05, - "loss": 5.3305, - "step": 46753 - }, - { - "epoch": 24.382790091264667, - "grad_norm": 1.5693928003311157, - "learning_rate": 5.3530653266331665e-05, - "loss": 5.1411, - "step": 46754 - }, - { - "epoch": 24.383311603650586, - "grad_norm": 1.5709365606307983, - "learning_rate": 5.352964824120603e-05, - "loss": 5.1967, - "step": 46755 - }, - { - "epoch": 24.383833116036506, - "grad_norm": 1.5470030307769775, - "learning_rate": 5.352864321608041e-05, - "loss": 5.3642, - "step": 46756 - }, - { - "epoch": 24.384354628422425, - "grad_norm": NaN, - "learning_rate": 5.352864321608041e-05, - "loss": 5.4593, - "step": 46757 - }, - { - "epoch": 24.384876140808345, - "grad_norm": 1.6188031435012817, - "learning_rate": 5.352763819095478e-05, - "loss": 5.2543, - "step": 46758 - }, - { - "epoch": 24.385397653194264, - "grad_norm": 1.4927293062210083, - "learning_rate": 5.352663316582914e-05, - "loss": 4.9877, - "step": 46759 - }, - { - "epoch": 24.385919165580184, - "grad_norm": 1.559382677078247, - "learning_rate": 5.352562814070352e-05, - "loss": 5.026, - "step": 46760 - }, - { - "epoch": 24.386440677966103, - "grad_norm": 1.5425792932510376, - "learning_rate": 5.3524623115577885e-05, - "loss": 5.2035, - "step": 46761 - }, - { - "epoch": 24.386962190352023, - "grad_norm": 1.5445119142532349, - "learning_rate": 5.352361809045226e-05, - "loss": 5.2641, - "step": 46762 - }, - { - "epoch": 24.38748370273794, - "grad_norm": 1.7350221872329712, - "learning_rate": 5.3522613065326634e-05, - "loss": 4.8422, - "step": 46763 - }, - { - "epoch": 24.388005215123858, - "grad_norm": 1.4480572938919067, - "learning_rate": 5.352160804020101e-05, - "loss": 4.9186, - "step": 46764 - }, - { - "epoch": 24.388526727509777, - "grad_norm": 1.5679616928100586, - "learning_rate": 5.3520603015075376e-05, - "loss": 5.4042, - "step": 46765 - }, - { - "epoch": 24.389048239895697, - "grad_norm": 1.5019046068191528, - "learning_rate": 5.3519597989949754e-05, - "loss": 4.8916, - "step": 46766 - }, - { - "epoch": 24.389569752281616, - "grad_norm": 1.5769399404525757, - "learning_rate": 5.351859296482412e-05, - "loss": 4.7467, - "step": 46767 - }, - { - "epoch": 24.390091264667536, - "grad_norm": 1.6964269876480103, - "learning_rate": 5.3517587939698496e-05, - "loss": 4.539, - "step": 46768 - }, - { - "epoch": 24.390612777053455, - "grad_norm": 1.490473985671997, - "learning_rate": 5.351658291457287e-05, - "loss": 5.5297, - "step": 46769 - }, - { - "epoch": 24.391134289439375, - "grad_norm": 1.5852210521697998, - "learning_rate": 5.3515577889447245e-05, - "loss": 5.0051, - "step": 46770 - }, - { - "epoch": 24.391655801825294, - "grad_norm": 1.552804946899414, - "learning_rate": 5.351457286432161e-05, - "loss": 5.042, - "step": 46771 - }, - { - "epoch": 24.392177314211214, - "grad_norm": 1.603550910949707, - "learning_rate": 5.351356783919598e-05, - "loss": 4.9104, - "step": 46772 - }, - { - "epoch": 24.392698826597133, - "grad_norm": 1.5778529644012451, - "learning_rate": 5.351256281407036e-05, - "loss": 5.4691, - "step": 46773 - }, - { - "epoch": 24.39322033898305, - "grad_norm": 1.6180814504623413, - "learning_rate": 5.351155778894472e-05, - "loss": 5.0149, - "step": 46774 - }, - { - "epoch": 24.39374185136897, - "grad_norm": 1.4708882570266724, - "learning_rate": 5.35105527638191e-05, - "loss": 5.7194, - "step": 46775 - }, - { - "epoch": 24.394263363754888, - "grad_norm": 1.4812366962432861, - "learning_rate": 5.3509547738693464e-05, - "loss": 5.3904, - "step": 46776 - }, - { - "epoch": 24.394784876140807, - "grad_norm": 1.4794608354568481, - "learning_rate": 5.350854271356784e-05, - "loss": 5.6902, - "step": 46777 - }, - { - "epoch": 24.395306388526727, - "grad_norm": 1.5230708122253418, - "learning_rate": 5.350753768844221e-05, - "loss": 5.4786, - "step": 46778 - }, - { - "epoch": 24.395827900912646, - "grad_norm": 1.5241135358810425, - "learning_rate": 5.350653266331659e-05, - "loss": 4.6597, - "step": 46779 - }, - { - "epoch": 24.396349413298566, - "grad_norm": 1.5163906812667847, - "learning_rate": 5.3505527638190955e-05, - "loss": 5.1006, - "step": 46780 - }, - { - "epoch": 24.396870925684485, - "grad_norm": 1.4672882556915283, - "learning_rate": 5.350452261306533e-05, - "loss": 5.3952, - "step": 46781 - }, - { - "epoch": 24.397392438070405, - "grad_norm": 1.5129591226577759, - "learning_rate": 5.35035175879397e-05, - "loss": 5.6294, - "step": 46782 - }, - { - "epoch": 24.397913950456324, - "grad_norm": 1.5973355770111084, - "learning_rate": 5.3502512562814075e-05, - "loss": 5.1503, - "step": 46783 - }, - { - "epoch": 24.398435462842244, - "grad_norm": 1.5719927549362183, - "learning_rate": 5.3501507537688446e-05, - "loss": 5.143, - "step": 46784 - }, - { - "epoch": 24.398956975228163, - "grad_norm": 1.5816539525985718, - "learning_rate": 5.350050251256281e-05, - "loss": 5.2509, - "step": 46785 - }, - { - "epoch": 24.39947848761408, - "grad_norm": 1.4487128257751465, - "learning_rate": 5.349949748743719e-05, - "loss": 5.4624, - "step": 46786 - }, - { - "epoch": 24.4, - "grad_norm": 1.5997023582458496, - "learning_rate": 5.349849246231155e-05, - "loss": 5.0839, - "step": 46787 - }, - { - "epoch": 24.400521512385918, - "grad_norm": 1.605520248413086, - "learning_rate": 5.349748743718593e-05, - "loss": 4.9188, - "step": 46788 - }, - { - "epoch": 24.401043024771838, - "grad_norm": 1.49250066280365, - "learning_rate": 5.34964824120603e-05, - "loss": 5.2519, - "step": 46789 - }, - { - "epoch": 24.401564537157757, - "grad_norm": 1.5662202835083008, - "learning_rate": 5.349547738693468e-05, - "loss": 4.9463, - "step": 46790 - }, - { - "epoch": 24.402086049543676, - "grad_norm": 1.58832848072052, - "learning_rate": 5.3494472361809044e-05, - "loss": 4.8115, - "step": 46791 - }, - { - "epoch": 24.402607561929596, - "grad_norm": 1.5292195081710815, - "learning_rate": 5.349346733668342e-05, - "loss": 5.5499, - "step": 46792 - }, - { - "epoch": 24.403129074315515, - "grad_norm": 1.6421623229980469, - "learning_rate": 5.349246231155779e-05, - "loss": 5.3064, - "step": 46793 - }, - { - "epoch": 24.403650586701435, - "grad_norm": 1.4181722402572632, - "learning_rate": 5.349145728643217e-05, - "loss": 5.5014, - "step": 46794 - }, - { - "epoch": 24.404172099087354, - "grad_norm": 1.5616059303283691, - "learning_rate": 5.3490452261306535e-05, - "loss": 4.7792, - "step": 46795 - }, - { - "epoch": 24.404693611473274, - "grad_norm": 1.5391825437545776, - "learning_rate": 5.348944723618091e-05, - "loss": 5.0982, - "step": 46796 - }, - { - "epoch": 24.405215123859193, - "grad_norm": 1.5276191234588623, - "learning_rate": 5.348844221105528e-05, - "loss": 5.2672, - "step": 46797 - }, - { - "epoch": 24.40573663624511, - "grad_norm": 1.4793869256973267, - "learning_rate": 5.348743718592965e-05, - "loss": 5.2786, - "step": 46798 - }, - { - "epoch": 24.40625814863103, - "grad_norm": 1.496817708015442, - "learning_rate": 5.3486432160804026e-05, - "loss": 4.3457, - "step": 46799 - }, - { - "epoch": 24.406779661016948, - "grad_norm": 1.4432053565979004, - "learning_rate": 5.348542713567839e-05, - "loss": 5.3139, - "step": 46800 - }, - { - "epoch": 24.407301173402868, - "grad_norm": 1.6009502410888672, - "learning_rate": 5.348442211055277e-05, - "loss": 5.1683, - "step": 46801 - }, - { - "epoch": 24.407822685788787, - "grad_norm": 1.520005464553833, - "learning_rate": 5.348341708542713e-05, - "loss": 5.5065, - "step": 46802 - }, - { - "epoch": 24.408344198174706, - "grad_norm": 1.528971552848816, - "learning_rate": 5.348241206030151e-05, - "loss": 5.2842, - "step": 46803 - }, - { - "epoch": 24.408865710560626, - "grad_norm": 1.4042320251464844, - "learning_rate": 5.348140703517588e-05, - "loss": 4.8447, - "step": 46804 - }, - { - "epoch": 24.409387222946545, - "grad_norm": 1.5521961450576782, - "learning_rate": 5.348040201005026e-05, - "loss": 5.4643, - "step": 46805 - }, - { - "epoch": 24.409908735332465, - "grad_norm": 1.5985453128814697, - "learning_rate": 5.347939698492462e-05, - "loss": 5.1848, - "step": 46806 - }, - { - "epoch": 24.410430247718384, - "grad_norm": 1.6031867265701294, - "learning_rate": 5.3478391959799e-05, - "loss": 5.4451, - "step": 46807 - }, - { - "epoch": 24.410951760104304, - "grad_norm": 1.4890942573547363, - "learning_rate": 5.3477386934673365e-05, - "loss": 5.2139, - "step": 46808 - }, - { - "epoch": 24.411473272490223, - "grad_norm": 1.5691941976547241, - "learning_rate": 5.347638190954774e-05, - "loss": 5.3449, - "step": 46809 - }, - { - "epoch": 24.41199478487614, - "grad_norm": 1.612863540649414, - "learning_rate": 5.3475376884422114e-05, - "loss": 5.031, - "step": 46810 - }, - { - "epoch": 24.41251629726206, - "grad_norm": 1.4622020721435547, - "learning_rate": 5.347437185929649e-05, - "loss": 5.2702, - "step": 46811 - }, - { - "epoch": 24.413037809647978, - "grad_norm": 1.6028493642807007, - "learning_rate": 5.3473366834170856e-05, - "loss": 5.0679, - "step": 46812 - }, - { - "epoch": 24.413559322033898, - "grad_norm": 1.6639914512634277, - "learning_rate": 5.347236180904523e-05, - "loss": 5.3367, - "step": 46813 - }, - { - "epoch": 24.414080834419817, - "grad_norm": 1.5464037656784058, - "learning_rate": 5.3471356783919605e-05, - "loss": 5.5064, - "step": 46814 - }, - { - "epoch": 24.414602346805736, - "grad_norm": 1.5480848550796509, - "learning_rate": 5.347035175879397e-05, - "loss": 5.196, - "step": 46815 - }, - { - "epoch": 24.415123859191656, - "grad_norm": 1.489405870437622, - "learning_rate": 5.346934673366835e-05, - "loss": 5.8414, - "step": 46816 - }, - { - "epoch": 24.415645371577575, - "grad_norm": 1.534005880355835, - "learning_rate": 5.346834170854271e-05, - "loss": 5.4964, - "step": 46817 - }, - { - "epoch": 24.416166883963495, - "grad_norm": 1.5526580810546875, - "learning_rate": 5.346733668341709e-05, - "loss": 5.5629, - "step": 46818 - }, - { - "epoch": 24.416688396349414, - "grad_norm": 1.5449583530426025, - "learning_rate": 5.346633165829146e-05, - "loss": 5.1884, - "step": 46819 - }, - { - "epoch": 24.417209908735334, - "grad_norm": 1.6475977897644043, - "learning_rate": 5.346532663316584e-05, - "loss": 5.2614, - "step": 46820 - }, - { - "epoch": 24.417731421121253, - "grad_norm": 1.705411672592163, - "learning_rate": 5.34643216080402e-05, - "loss": 5.2128, - "step": 46821 - }, - { - "epoch": 24.41825293350717, - "grad_norm": 1.534668207168579, - "learning_rate": 5.346331658291458e-05, - "loss": 5.0137, - "step": 46822 - }, - { - "epoch": 24.41877444589309, - "grad_norm": 1.5202434062957764, - "learning_rate": 5.3462311557788944e-05, - "loss": 5.5161, - "step": 46823 - }, - { - "epoch": 24.419295958279008, - "grad_norm": 1.5040980577468872, - "learning_rate": 5.346130653266332e-05, - "loss": 5.4303, - "step": 46824 - }, - { - "epoch": 24.419817470664928, - "grad_norm": 1.5516712665557861, - "learning_rate": 5.346030150753769e-05, - "loss": 5.5705, - "step": 46825 - }, - { - "epoch": 24.420338983050847, - "grad_norm": 1.6989881992340088, - "learning_rate": 5.345929648241206e-05, - "loss": 5.2943, - "step": 46826 - }, - { - "epoch": 24.420860495436767, - "grad_norm": 1.4926403760910034, - "learning_rate": 5.3458291457286435e-05, - "loss": 5.4415, - "step": 46827 - }, - { - "epoch": 24.421382007822686, - "grad_norm": 1.5819379091262817, - "learning_rate": 5.34572864321608e-05, - "loss": 4.8879, - "step": 46828 - }, - { - "epoch": 24.421903520208605, - "grad_norm": 1.5946037769317627, - "learning_rate": 5.345628140703518e-05, - "loss": 5.3622, - "step": 46829 - }, - { - "epoch": 24.422425032594525, - "grad_norm": 1.4274275302886963, - "learning_rate": 5.345527638190955e-05, - "loss": 5.4451, - "step": 46830 - }, - { - "epoch": 24.422946544980444, - "grad_norm": 1.4762616157531738, - "learning_rate": 5.3454271356783926e-05, - "loss": 5.5169, - "step": 46831 - }, - { - "epoch": 24.423468057366364, - "grad_norm": 1.5574125051498413, - "learning_rate": 5.345326633165829e-05, - "loss": 5.2595, - "step": 46832 - }, - { - "epoch": 24.423989569752283, - "grad_norm": 1.628781795501709, - "learning_rate": 5.345226130653267e-05, - "loss": 5.3586, - "step": 46833 - }, - { - "epoch": 24.4245110821382, - "grad_norm": 1.5415176153182983, - "learning_rate": 5.345125628140704e-05, - "loss": 5.0242, - "step": 46834 - }, - { - "epoch": 24.42503259452412, - "grad_norm": 1.6040716171264648, - "learning_rate": 5.345025125628142e-05, - "loss": 5.0274, - "step": 46835 - }, - { - "epoch": 24.425554106910038, - "grad_norm": 1.6169217824935913, - "learning_rate": 5.344924623115578e-05, - "loss": 5.2575, - "step": 46836 - }, - { - "epoch": 24.426075619295958, - "grad_norm": 1.4125640392303467, - "learning_rate": 5.344824120603016e-05, - "loss": 5.4518, - "step": 46837 - }, - { - "epoch": 24.426597131681877, - "grad_norm": 1.5681970119476318, - "learning_rate": 5.3447236180904524e-05, - "loss": 5.2223, - "step": 46838 - }, - { - "epoch": 24.427118644067797, - "grad_norm": 1.7159295082092285, - "learning_rate": 5.3446231155778895e-05, - "loss": 5.001, - "step": 46839 - }, - { - "epoch": 24.427640156453716, - "grad_norm": 1.4049465656280518, - "learning_rate": 5.344522613065327e-05, - "loss": 5.6541, - "step": 46840 - }, - { - "epoch": 24.428161668839635, - "grad_norm": 1.5568429231643677, - "learning_rate": 5.344422110552764e-05, - "loss": 5.3861, - "step": 46841 - }, - { - "epoch": 24.428683181225555, - "grad_norm": 1.460735559463501, - "learning_rate": 5.3443216080402015e-05, - "loss": 5.464, - "step": 46842 - }, - { - "epoch": 24.429204693611474, - "grad_norm": 1.6145219802856445, - "learning_rate": 5.344221105527638e-05, - "loss": 5.4572, - "step": 46843 - }, - { - "epoch": 24.429726205997394, - "grad_norm": 1.638950228691101, - "learning_rate": 5.344120603015076e-05, - "loss": 4.7213, - "step": 46844 - }, - { - "epoch": 24.430247718383313, - "grad_norm": 1.4625078439712524, - "learning_rate": 5.344020100502513e-05, - "loss": 5.6535, - "step": 46845 - }, - { - "epoch": 24.43076923076923, - "grad_norm": 1.4761309623718262, - "learning_rate": 5.3439195979899506e-05, - "loss": 5.3702, - "step": 46846 - }, - { - "epoch": 24.43129074315515, - "grad_norm": 1.4490272998809814, - "learning_rate": 5.343819095477387e-05, - "loss": 5.4501, - "step": 46847 - }, - { - "epoch": 24.431812255541068, - "grad_norm": 1.5501612424850464, - "learning_rate": 5.343718592964825e-05, - "loss": 5.2548, - "step": 46848 - }, - { - "epoch": 24.432333767926988, - "grad_norm": 1.5925376415252686, - "learning_rate": 5.343618090452261e-05, - "loss": 4.67, - "step": 46849 - }, - { - "epoch": 24.432855280312907, - "grad_norm": 1.6174564361572266, - "learning_rate": 5.343517587939699e-05, - "loss": 5.0246, - "step": 46850 - }, - { - "epoch": 24.433376792698827, - "grad_norm": 1.4380359649658203, - "learning_rate": 5.343417085427136e-05, - "loss": 5.0052, - "step": 46851 - }, - { - "epoch": 24.433898305084746, - "grad_norm": 1.535888671875, - "learning_rate": 5.3433165829145725e-05, - "loss": 5.1546, - "step": 46852 - }, - { - "epoch": 24.434419817470665, - "grad_norm": 1.5637187957763672, - "learning_rate": 5.34321608040201e-05, - "loss": 5.2682, - "step": 46853 - }, - { - "epoch": 24.434941329856585, - "grad_norm": 1.5596562623977661, - "learning_rate": 5.343115577889447e-05, - "loss": 5.0149, - "step": 46854 - }, - { - "epoch": 24.435462842242504, - "grad_norm": 1.5106682777404785, - "learning_rate": 5.3430150753768845e-05, - "loss": 5.5416, - "step": 46855 - }, - { - "epoch": 24.435984354628424, - "grad_norm": 1.4926294088363647, - "learning_rate": 5.3429145728643216e-05, - "loss": 5.4728, - "step": 46856 - }, - { - "epoch": 24.43650586701434, - "grad_norm": 1.5117021799087524, - "learning_rate": 5.3428140703517594e-05, - "loss": 5.2663, - "step": 46857 - }, - { - "epoch": 24.43702737940026, - "grad_norm": 1.6741130352020264, - "learning_rate": 5.342713567839196e-05, - "loss": 5.19, - "step": 46858 - }, - { - "epoch": 24.43754889178618, - "grad_norm": 1.6343400478363037, - "learning_rate": 5.3426130653266336e-05, - "loss": 5.0055, - "step": 46859 - }, - { - "epoch": 24.438070404172098, - "grad_norm": 1.450929045677185, - "learning_rate": 5.342512562814071e-05, - "loss": 5.5479, - "step": 46860 - }, - { - "epoch": 24.438591916558018, - "grad_norm": 1.5731208324432373, - "learning_rate": 5.3424120603015085e-05, - "loss": 5.2433, - "step": 46861 - }, - { - "epoch": 24.439113428943937, - "grad_norm": 1.558265209197998, - "learning_rate": 5.342311557788945e-05, - "loss": 5.6118, - "step": 46862 - }, - { - "epoch": 24.439634941329857, - "grad_norm": 1.4933276176452637, - "learning_rate": 5.342211055276383e-05, - "loss": 5.5118, - "step": 46863 - }, - { - "epoch": 24.440156453715776, - "grad_norm": 1.555484414100647, - "learning_rate": 5.342110552763819e-05, - "loss": 5.1801, - "step": 46864 - }, - { - "epoch": 24.440677966101696, - "grad_norm": 1.7526689767837524, - "learning_rate": 5.342010050251256e-05, - "loss": 4.8368, - "step": 46865 - }, - { - "epoch": 24.441199478487615, - "grad_norm": 1.6551331281661987, - "learning_rate": 5.341909547738694e-05, - "loss": 4.9357, - "step": 46866 - }, - { - "epoch": 24.441720990873534, - "grad_norm": 1.5546497106552124, - "learning_rate": 5.3418090452261304e-05, - "loss": 5.3863, - "step": 46867 - }, - { - "epoch": 24.442242503259454, - "grad_norm": 1.5204728841781616, - "learning_rate": 5.341708542713568e-05, - "loss": 5.3732, - "step": 46868 - }, - { - "epoch": 24.442764015645373, - "grad_norm": 1.4931373596191406, - "learning_rate": 5.3416080402010047e-05, - "loss": 5.4782, - "step": 46869 - }, - { - "epoch": 24.44328552803129, - "grad_norm": 1.579802393913269, - "learning_rate": 5.3415075376884424e-05, - "loss": 4.8919, - "step": 46870 - }, - { - "epoch": 24.44380704041721, - "grad_norm": 1.5080891847610474, - "learning_rate": 5.3414070351758795e-05, - "loss": 5.0857, - "step": 46871 - }, - { - "epoch": 24.444328552803128, - "grad_norm": 1.5446221828460693, - "learning_rate": 5.341306532663317e-05, - "loss": 5.5737, - "step": 46872 - }, - { - "epoch": 24.444850065189048, - "grad_norm": 1.5653200149536133, - "learning_rate": 5.341206030150754e-05, - "loss": 5.3569, - "step": 46873 - }, - { - "epoch": 24.445371577574967, - "grad_norm": 1.5013320446014404, - "learning_rate": 5.3411055276381915e-05, - "loss": 5.2634, - "step": 46874 - }, - { - "epoch": 24.445893089960887, - "grad_norm": 1.560054898262024, - "learning_rate": 5.341005025125628e-05, - "loss": 5.2686, - "step": 46875 - }, - { - "epoch": 24.446414602346806, - "grad_norm": 1.5474975109100342, - "learning_rate": 5.340904522613066e-05, - "loss": 5.3209, - "step": 46876 - }, - { - "epoch": 24.446936114732726, - "grad_norm": 1.6157587766647339, - "learning_rate": 5.340804020100503e-05, - "loss": 4.9168, - "step": 46877 - }, - { - "epoch": 24.447457627118645, - "grad_norm": 1.5772098302841187, - "learning_rate": 5.340703517587939e-05, - "loss": 4.9507, - "step": 46878 - }, - { - "epoch": 24.447979139504564, - "grad_norm": 1.6372140645980835, - "learning_rate": 5.340603015075377e-05, - "loss": 4.8728, - "step": 46879 - }, - { - "epoch": 24.448500651890484, - "grad_norm": 1.5911861658096313, - "learning_rate": 5.340502512562814e-05, - "loss": 5.1891, - "step": 46880 - }, - { - "epoch": 24.4490221642764, - "grad_norm": 1.6129380464553833, - "learning_rate": 5.340402010050252e-05, - "loss": 5.1805, - "step": 46881 - }, - { - "epoch": 24.44954367666232, - "grad_norm": 1.608136773109436, - "learning_rate": 5.3403015075376884e-05, - "loss": 5.1125, - "step": 46882 - }, - { - "epoch": 24.45006518904824, - "grad_norm": 1.6620815992355347, - "learning_rate": 5.340201005025126e-05, - "loss": 4.864, - "step": 46883 - }, - { - "epoch": 24.45058670143416, - "grad_norm": 1.5843944549560547, - "learning_rate": 5.3401005025125626e-05, - "loss": 4.8988, - "step": 46884 - }, - { - "epoch": 24.451108213820078, - "grad_norm": 1.5106269121170044, - "learning_rate": 5.3400000000000004e-05, - "loss": 5.4836, - "step": 46885 - }, - { - "epoch": 24.451629726205997, - "grad_norm": 1.4851598739624023, - "learning_rate": 5.3398994974874375e-05, - "loss": 4.9493, - "step": 46886 - }, - { - "epoch": 24.452151238591917, - "grad_norm": 1.4578101634979248, - "learning_rate": 5.339798994974875e-05, - "loss": 5.5683, - "step": 46887 - }, - { - "epoch": 24.452672750977836, - "grad_norm": 1.584368109703064, - "learning_rate": 5.339698492462312e-05, - "loss": 5.3785, - "step": 46888 - }, - { - "epoch": 24.453194263363756, - "grad_norm": 1.4908372163772583, - "learning_rate": 5.3395979899497495e-05, - "loss": 5.4653, - "step": 46889 - }, - { - "epoch": 24.453715775749675, - "grad_norm": 1.5987317562103271, - "learning_rate": 5.339497487437186e-05, - "loss": 5.2023, - "step": 46890 - }, - { - "epoch": 24.454237288135594, - "grad_norm": 1.4295525550842285, - "learning_rate": 5.339396984924624e-05, - "loss": 5.553, - "step": 46891 - }, - { - "epoch": 24.454758800521514, - "grad_norm": 1.5404844284057617, - "learning_rate": 5.339296482412061e-05, - "loss": 5.0238, - "step": 46892 - }, - { - "epoch": 24.45528031290743, - "grad_norm": 1.549845576286316, - "learning_rate": 5.339195979899497e-05, - "loss": 4.8637, - "step": 46893 - }, - { - "epoch": 24.45580182529335, - "grad_norm": 1.541495442390442, - "learning_rate": 5.339095477386935e-05, - "loss": 5.3051, - "step": 46894 - }, - { - "epoch": 24.45632333767927, - "grad_norm": 1.4559043645858765, - "learning_rate": 5.3389949748743714e-05, - "loss": 4.6441, - "step": 46895 - }, - { - "epoch": 24.45684485006519, - "grad_norm": 1.6433387994766235, - "learning_rate": 5.338894472361809e-05, - "loss": 4.6646, - "step": 46896 - }, - { - "epoch": 24.457366362451108, - "grad_norm": 1.5289877653121948, - "learning_rate": 5.338793969849246e-05, - "loss": 5.2818, - "step": 46897 - }, - { - "epoch": 24.457887874837027, - "grad_norm": 1.5622875690460205, - "learning_rate": 5.338693467336684e-05, - "loss": 4.9651, - "step": 46898 - }, - { - "epoch": 24.458409387222947, - "grad_norm": 1.4806311130523682, - "learning_rate": 5.3385929648241205e-05, - "loss": 5.5735, - "step": 46899 - }, - { - "epoch": 24.458930899608866, - "grad_norm": 1.6699057817459106, - "learning_rate": 5.338492462311558e-05, - "loss": 5.2577, - "step": 46900 - }, - { - "epoch": 24.459452411994786, - "grad_norm": 1.5258805751800537, - "learning_rate": 5.3383919597989954e-05, - "loss": 4.7491, - "step": 46901 - }, - { - "epoch": 24.459973924380705, - "grad_norm": 1.5841914415359497, - "learning_rate": 5.338291457286433e-05, - "loss": 5.2938, - "step": 46902 - }, - { - "epoch": 24.460495436766625, - "grad_norm": 1.5252264738082886, - "learning_rate": 5.3381909547738696e-05, - "loss": 5.6679, - "step": 46903 - }, - { - "epoch": 24.461016949152544, - "grad_norm": 1.502155065536499, - "learning_rate": 5.3380904522613074e-05, - "loss": 5.5332, - "step": 46904 - }, - { - "epoch": 24.46153846153846, - "grad_norm": 1.5513098239898682, - "learning_rate": 5.337989949748744e-05, - "loss": 5.1876, - "step": 46905 - }, - { - "epoch": 24.46205997392438, - "grad_norm": 1.6216298341751099, - "learning_rate": 5.337889447236181e-05, - "loss": 4.5304, - "step": 46906 - }, - { - "epoch": 24.4625814863103, - "grad_norm": 1.6283618211746216, - "learning_rate": 5.337788944723619e-05, - "loss": 4.919, - "step": 46907 - }, - { - "epoch": 24.46310299869622, - "grad_norm": 1.5621066093444824, - "learning_rate": 5.337688442211055e-05, - "loss": 5.4725, - "step": 46908 - }, - { - "epoch": 24.463624511082138, - "grad_norm": 1.5952681303024292, - "learning_rate": 5.337587939698493e-05, - "loss": 5.4109, - "step": 46909 - }, - { - "epoch": 24.464146023468057, - "grad_norm": 1.5695478916168213, - "learning_rate": 5.3374874371859294e-05, - "loss": 5.464, - "step": 46910 - }, - { - "epoch": 24.464667535853977, - "grad_norm": 1.5776935815811157, - "learning_rate": 5.337386934673367e-05, - "loss": 5.5859, - "step": 46911 - }, - { - "epoch": 24.465189048239896, - "grad_norm": 1.7134361267089844, - "learning_rate": 5.337286432160804e-05, - "loss": 4.9267, - "step": 46912 - }, - { - "epoch": 24.465710560625816, - "grad_norm": 1.5193932056427002, - "learning_rate": 5.337185929648242e-05, - "loss": 5.3485, - "step": 46913 - }, - { - "epoch": 24.466232073011735, - "grad_norm": 1.6123600006103516, - "learning_rate": 5.3370854271356785e-05, - "loss": 5.5001, - "step": 46914 - }, - { - "epoch": 24.466753585397655, - "grad_norm": 1.5536575317382812, - "learning_rate": 5.336984924623116e-05, - "loss": 5.5449, - "step": 46915 - }, - { - "epoch": 24.467275097783574, - "grad_norm": 1.539025902748108, - "learning_rate": 5.336884422110553e-05, - "loss": 5.2784, - "step": 46916 - }, - { - "epoch": 24.46779661016949, - "grad_norm": 1.5732706785202026, - "learning_rate": 5.3367839195979904e-05, - "loss": 4.9103, - "step": 46917 - }, - { - "epoch": 24.46831812255541, - "grad_norm": 1.5040841102600098, - "learning_rate": 5.3366834170854276e-05, - "loss": 4.89, - "step": 46918 - }, - { - "epoch": 24.46883963494133, - "grad_norm": 1.6116715669631958, - "learning_rate": 5.336582914572864e-05, - "loss": 5.4721, - "step": 46919 - }, - { - "epoch": 24.46936114732725, - "grad_norm": 1.5664962530136108, - "learning_rate": 5.336482412060302e-05, - "loss": 5.4139, - "step": 46920 - }, - { - "epoch": 24.469882659713168, - "grad_norm": 1.6272259950637817, - "learning_rate": 5.336381909547739e-05, - "loss": 5.4449, - "step": 46921 - }, - { - "epoch": 24.470404172099087, - "grad_norm": 1.6525672674179077, - "learning_rate": 5.3362814070351767e-05, - "loss": 4.6873, - "step": 46922 - }, - { - "epoch": 24.470925684485007, - "grad_norm": 1.5854500532150269, - "learning_rate": 5.336180904522613e-05, - "loss": 5.1608, - "step": 46923 - }, - { - "epoch": 24.471447196870926, - "grad_norm": 1.4764102697372437, - "learning_rate": 5.336080402010051e-05, - "loss": 4.9864, - "step": 46924 - }, - { - "epoch": 24.471968709256846, - "grad_norm": 1.5040793418884277, - "learning_rate": 5.335979899497487e-05, - "loss": 5.4397, - "step": 46925 - }, - { - "epoch": 24.472490221642765, - "grad_norm": 1.5976850986480713, - "learning_rate": 5.335879396984925e-05, - "loss": 5.228, - "step": 46926 - }, - { - "epoch": 24.473011734028685, - "grad_norm": 1.5677300691604614, - "learning_rate": 5.335778894472362e-05, - "loss": 5.0082, - "step": 46927 - }, - { - "epoch": 24.473533246414604, - "grad_norm": 1.5198653936386108, - "learning_rate": 5.3356783919598e-05, - "loss": 5.0508, - "step": 46928 - }, - { - "epoch": 24.47405475880052, - "grad_norm": 1.603180170059204, - "learning_rate": 5.3355778894472364e-05, - "loss": 4.8876, - "step": 46929 - }, - { - "epoch": 24.47457627118644, - "grad_norm": 1.5647199153900146, - "learning_rate": 5.335477386934674e-05, - "loss": 5.1215, - "step": 46930 - }, - { - "epoch": 24.47509778357236, - "grad_norm": 1.5535557270050049, - "learning_rate": 5.3353768844221106e-05, - "loss": 4.9975, - "step": 46931 - }, - { - "epoch": 24.47561929595828, - "grad_norm": 1.6753103733062744, - "learning_rate": 5.335276381909548e-05, - "loss": 5.1001, - "step": 46932 - }, - { - "epoch": 24.476140808344198, - "grad_norm": 1.4759730100631714, - "learning_rate": 5.3351758793969855e-05, - "loss": 5.3686, - "step": 46933 - }, - { - "epoch": 24.476662320730117, - "grad_norm": 1.4556455612182617, - "learning_rate": 5.335075376884422e-05, - "loss": 5.6415, - "step": 46934 - }, - { - "epoch": 24.477183833116037, - "grad_norm": 1.5684616565704346, - "learning_rate": 5.33497487437186e-05, - "loss": 5.4931, - "step": 46935 - }, - { - "epoch": 24.477705345501956, - "grad_norm": 1.6388766765594482, - "learning_rate": 5.334874371859296e-05, - "loss": 5.2926, - "step": 46936 - }, - { - "epoch": 24.478226857887876, - "grad_norm": 1.498582124710083, - "learning_rate": 5.334773869346734e-05, - "loss": 4.901, - "step": 46937 - }, - { - "epoch": 24.478748370273795, - "grad_norm": 1.5016286373138428, - "learning_rate": 5.334673366834171e-05, - "loss": 5.4452, - "step": 46938 - }, - { - "epoch": 24.479269882659715, - "grad_norm": 1.6363579034805298, - "learning_rate": 5.334572864321609e-05, - "loss": 5.0962, - "step": 46939 - }, - { - "epoch": 24.479791395045634, - "grad_norm": 1.7017638683319092, - "learning_rate": 5.334472361809045e-05, - "loss": 5.2828, - "step": 46940 - }, - { - "epoch": 24.48031290743155, - "grad_norm": 1.5281842947006226, - "learning_rate": 5.334371859296483e-05, - "loss": 5.2497, - "step": 46941 - }, - { - "epoch": 24.48083441981747, - "grad_norm": 1.6567151546478271, - "learning_rate": 5.3342713567839194e-05, - "loss": 4.6082, - "step": 46942 - }, - { - "epoch": 24.48135593220339, - "grad_norm": 1.4884926080703735, - "learning_rate": 5.334170854271357e-05, - "loss": 5.4225, - "step": 46943 - }, - { - "epoch": 24.48187744458931, - "grad_norm": 1.4401594400405884, - "learning_rate": 5.334070351758794e-05, - "loss": 5.8562, - "step": 46944 - }, - { - "epoch": 24.482398956975228, - "grad_norm": 1.5681110620498657, - "learning_rate": 5.333969849246231e-05, - "loss": 5.2935, - "step": 46945 - }, - { - "epoch": 24.482920469361147, - "grad_norm": 1.611291766166687, - "learning_rate": 5.3338693467336685e-05, - "loss": 5.1855, - "step": 46946 - }, - { - "epoch": 24.483441981747067, - "grad_norm": 1.60612154006958, - "learning_rate": 5.3337688442211056e-05, - "loss": 5.4226, - "step": 46947 - }, - { - "epoch": 24.483963494132986, - "grad_norm": 1.5588703155517578, - "learning_rate": 5.3336683417085434e-05, - "loss": 5.514, - "step": 46948 - }, - { - "epoch": 24.484485006518906, - "grad_norm": 1.473008632659912, - "learning_rate": 5.33356783919598e-05, - "loss": 5.6299, - "step": 46949 - }, - { - "epoch": 24.485006518904825, - "grad_norm": 1.603568434715271, - "learning_rate": 5.3334673366834176e-05, - "loss": 5.1993, - "step": 46950 - }, - { - "epoch": 24.485528031290745, - "grad_norm": 1.6604666709899902, - "learning_rate": 5.333366834170854e-05, - "loss": 4.9037, - "step": 46951 - }, - { - "epoch": 24.486049543676664, - "grad_norm": 1.6940096616744995, - "learning_rate": 5.333266331658292e-05, - "loss": 5.1333, - "step": 46952 - }, - { - "epoch": 24.48657105606258, - "grad_norm": 1.576972484588623, - "learning_rate": 5.333165829145729e-05, - "loss": 5.0299, - "step": 46953 - }, - { - "epoch": 24.4870925684485, - "grad_norm": 1.527289628982544, - "learning_rate": 5.333065326633167e-05, - "loss": 5.2628, - "step": 46954 - }, - { - "epoch": 24.48761408083442, - "grad_norm": 1.542615532875061, - "learning_rate": 5.332964824120603e-05, - "loss": 5.3624, - "step": 46955 - }, - { - "epoch": 24.48813559322034, - "grad_norm": 1.6338961124420166, - "learning_rate": 5.332864321608041e-05, - "loss": 4.6834, - "step": 46956 - }, - { - "epoch": 24.488657105606258, - "grad_norm": 1.4894274473190308, - "learning_rate": 5.3327638190954774e-05, - "loss": 5.6548, - "step": 46957 - }, - { - "epoch": 24.489178617992177, - "grad_norm": 1.579152226448059, - "learning_rate": 5.3326633165829145e-05, - "loss": 5.3461, - "step": 46958 - }, - { - "epoch": 24.489700130378097, - "grad_norm": 1.503467321395874, - "learning_rate": 5.332562814070352e-05, - "loss": 5.6224, - "step": 46959 - }, - { - "epoch": 24.490221642764016, - "grad_norm": 1.669542908668518, - "learning_rate": 5.332462311557789e-05, - "loss": 5.3379, - "step": 46960 - }, - { - "epoch": 24.490743155149936, - "grad_norm": 1.5572022199630737, - "learning_rate": 5.3323618090452265e-05, - "loss": 5.4071, - "step": 46961 - }, - { - "epoch": 24.491264667535855, - "grad_norm": 1.5719079971313477, - "learning_rate": 5.332261306532663e-05, - "loss": 5.2748, - "step": 46962 - }, - { - "epoch": 24.491786179921775, - "grad_norm": 1.5602941513061523, - "learning_rate": 5.332160804020101e-05, - "loss": 4.9338, - "step": 46963 - }, - { - "epoch": 24.49230769230769, - "grad_norm": 1.5654107332229614, - "learning_rate": 5.332060301507538e-05, - "loss": 5.4114, - "step": 46964 - }, - { - "epoch": 24.49282920469361, - "grad_norm": 1.576171636581421, - "learning_rate": 5.3319597989949756e-05, - "loss": 4.6189, - "step": 46965 - }, - { - "epoch": 24.49335071707953, - "grad_norm": 1.5497735738754272, - "learning_rate": 5.331859296482412e-05, - "loss": 5.1549, - "step": 46966 - }, - { - "epoch": 24.49387222946545, - "grad_norm": 1.4493952989578247, - "learning_rate": 5.33175879396985e-05, - "loss": 5.6189, - "step": 46967 - }, - { - "epoch": 24.49439374185137, - "grad_norm": 1.5733602046966553, - "learning_rate": 5.331658291457287e-05, - "loss": 4.4805, - "step": 46968 - }, - { - "epoch": 24.494915254237288, - "grad_norm": 1.5480234622955322, - "learning_rate": 5.3315577889447247e-05, - "loss": 5.1361, - "step": 46969 - }, - { - "epoch": 24.495436766623207, - "grad_norm": 1.4671661853790283, - "learning_rate": 5.331457286432161e-05, - "loss": 5.3632, - "step": 46970 - }, - { - "epoch": 24.495958279009127, - "grad_norm": 2.0938878059387207, - "learning_rate": 5.3313567839195975e-05, - "loss": 4.5117, - "step": 46971 - }, - { - "epoch": 24.496479791395046, - "grad_norm": 1.5747873783111572, - "learning_rate": 5.331256281407035e-05, - "loss": 5.4546, - "step": 46972 - }, - { - "epoch": 24.497001303780966, - "grad_norm": 1.6440975666046143, - "learning_rate": 5.3311557788944724e-05, - "loss": 4.8502, - "step": 46973 - }, - { - "epoch": 24.497522816166885, - "grad_norm": 1.472338080406189, - "learning_rate": 5.33105527638191e-05, - "loss": 5.4147, - "step": 46974 - }, - { - "epoch": 24.498044328552805, - "grad_norm": 1.6443449258804321, - "learning_rate": 5.3309547738693466e-05, - "loss": 3.8802, - "step": 46975 - }, - { - "epoch": 24.49856584093872, - "grad_norm": 1.599511742591858, - "learning_rate": 5.3308542713567844e-05, - "loss": 5.0704, - "step": 46976 - }, - { - "epoch": 24.49908735332464, - "grad_norm": 1.6179391145706177, - "learning_rate": 5.330753768844221e-05, - "loss": 5.1508, - "step": 46977 - }, - { - "epoch": 24.49960886571056, - "grad_norm": 1.5943717956542969, - "learning_rate": 5.3306532663316586e-05, - "loss": 4.8495, - "step": 46978 - }, - { - "epoch": 24.50013037809648, - "grad_norm": 1.5914806127548218, - "learning_rate": 5.330552763819096e-05, - "loss": 4.9521, - "step": 46979 - }, - { - "epoch": 24.5006518904824, - "grad_norm": 1.4852319955825806, - "learning_rate": 5.3304522613065335e-05, - "loss": 5.5935, - "step": 46980 - }, - { - "epoch": 24.501173402868318, - "grad_norm": 1.5313832759857178, - "learning_rate": 5.33035175879397e-05, - "loss": 5.438, - "step": 46981 - }, - { - "epoch": 24.501694915254237, - "grad_norm": 1.5226150751113892, - "learning_rate": 5.330251256281408e-05, - "loss": 5.165, - "step": 46982 - }, - { - "epoch": 24.502216427640157, - "grad_norm": 1.6395223140716553, - "learning_rate": 5.330150753768844e-05, - "loss": 5.1003, - "step": 46983 - }, - { - "epoch": 24.502737940026076, - "grad_norm": 1.5655437707901, - "learning_rate": 5.330050251256282e-05, - "loss": 5.1513, - "step": 46984 - }, - { - "epoch": 24.503259452411996, - "grad_norm": 1.6374090909957886, - "learning_rate": 5.329949748743719e-05, - "loss": 5.2454, - "step": 46985 - }, - { - "epoch": 24.503780964797915, - "grad_norm": 1.6206214427947998, - "learning_rate": 5.3298492462311554e-05, - "loss": 5.2173, - "step": 46986 - }, - { - "epoch": 24.504302477183835, - "grad_norm": 1.5722882747650146, - "learning_rate": 5.329748743718593e-05, - "loss": 5.0491, - "step": 46987 - }, - { - "epoch": 24.50482398956975, - "grad_norm": 1.55980384349823, - "learning_rate": 5.32964824120603e-05, - "loss": 5.5175, - "step": 46988 - }, - { - "epoch": 24.50534550195567, - "grad_norm": 1.5735328197479248, - "learning_rate": 5.329547738693468e-05, - "loss": 4.7047, - "step": 46989 - }, - { - "epoch": 24.50586701434159, - "grad_norm": 1.4001905918121338, - "learning_rate": 5.3294472361809045e-05, - "loss": 5.4998, - "step": 46990 - }, - { - "epoch": 24.50638852672751, - "grad_norm": 1.5956040620803833, - "learning_rate": 5.329346733668342e-05, - "loss": 5.2431, - "step": 46991 - }, - { - "epoch": 24.50691003911343, - "grad_norm": 1.4089510440826416, - "learning_rate": 5.329246231155779e-05, - "loss": 4.8503, - "step": 46992 - }, - { - "epoch": 24.507431551499348, - "grad_norm": 1.5464413166046143, - "learning_rate": 5.3291457286432165e-05, - "loss": 5.271, - "step": 46993 - }, - { - "epoch": 24.507953063885267, - "grad_norm": 1.5495258569717407, - "learning_rate": 5.3290452261306536e-05, - "loss": 5.0857, - "step": 46994 - }, - { - "epoch": 24.508474576271187, - "grad_norm": 1.4467421770095825, - "learning_rate": 5.3289447236180914e-05, - "loss": 5.4756, - "step": 46995 - }, - { - "epoch": 24.508996088657106, - "grad_norm": 1.5310806035995483, - "learning_rate": 5.328844221105528e-05, - "loss": 5.2246, - "step": 46996 - }, - { - "epoch": 24.509517601043026, - "grad_norm": 1.5783644914627075, - "learning_rate": 5.3287437185929656e-05, - "loss": 5.3798, - "step": 46997 - }, - { - "epoch": 24.510039113428945, - "grad_norm": 1.5423732995986938, - "learning_rate": 5.328643216080402e-05, - "loss": 5.4275, - "step": 46998 - }, - { - "epoch": 24.510560625814865, - "grad_norm": 1.4872562885284424, - "learning_rate": 5.328542713567839e-05, - "loss": 5.5408, - "step": 46999 - }, - { - "epoch": 24.51108213820078, - "grad_norm": 1.6110937595367432, - "learning_rate": 5.328442211055277e-05, - "loss": 5.2964, - "step": 47000 - }, - { - "epoch": 24.5116036505867, - "grad_norm": 1.4966665506362915, - "learning_rate": 5.3283417085427134e-05, - "loss": 4.9393, - "step": 47001 - }, - { - "epoch": 24.51212516297262, - "grad_norm": 1.6254503726959229, - "learning_rate": 5.328241206030151e-05, - "loss": 4.9367, - "step": 47002 - }, - { - "epoch": 24.51264667535854, - "grad_norm": 1.672582745552063, - "learning_rate": 5.3281407035175876e-05, - "loss": 5.302, - "step": 47003 - }, - { - "epoch": 24.51316818774446, - "grad_norm": 1.5385371446609497, - "learning_rate": 5.3280402010050254e-05, - "loss": 5.0354, - "step": 47004 - }, - { - "epoch": 24.513689700130378, - "grad_norm": 1.5774885416030884, - "learning_rate": 5.3279396984924625e-05, - "loss": 5.3679, - "step": 47005 - }, - { - "epoch": 24.514211212516297, - "grad_norm": 1.4294776916503906, - "learning_rate": 5.3278391959799e-05, - "loss": 4.8923, - "step": 47006 - }, - { - "epoch": 24.514732724902217, - "grad_norm": 1.561663269996643, - "learning_rate": 5.327738693467337e-05, - "loss": 5.4225, - "step": 47007 - }, - { - "epoch": 24.515254237288136, - "grad_norm": 1.5611156225204468, - "learning_rate": 5.3276381909547745e-05, - "loss": 4.7216, - "step": 47008 - }, - { - "epoch": 24.515775749674056, - "grad_norm": 1.597525715827942, - "learning_rate": 5.327537688442211e-05, - "loss": 5.2043, - "step": 47009 - }, - { - "epoch": 24.516297262059975, - "grad_norm": 1.53656005859375, - "learning_rate": 5.327437185929649e-05, - "loss": 5.1759, - "step": 47010 - }, - { - "epoch": 24.516818774445895, - "grad_norm": 1.551531195640564, - "learning_rate": 5.327336683417086e-05, - "loss": 5.0734, - "step": 47011 - }, - { - "epoch": 24.51734028683181, - "grad_norm": 1.57008957862854, - "learning_rate": 5.327236180904522e-05, - "loss": 5.32, - "step": 47012 - }, - { - "epoch": 24.51786179921773, - "grad_norm": 1.5695199966430664, - "learning_rate": 5.32713567839196e-05, - "loss": 5.2148, - "step": 47013 - }, - { - "epoch": 24.51838331160365, - "grad_norm": 1.904607892036438, - "learning_rate": 5.327035175879397e-05, - "loss": 4.9105, - "step": 47014 - }, - { - "epoch": 24.51890482398957, - "grad_norm": 1.6498414278030396, - "learning_rate": 5.326934673366835e-05, - "loss": 4.965, - "step": 47015 - }, - { - "epoch": 24.51942633637549, - "grad_norm": 1.5195074081420898, - "learning_rate": 5.326834170854271e-05, - "loss": 5.2548, - "step": 47016 - }, - { - "epoch": 24.519947848761408, - "grad_norm": 1.530997395515442, - "learning_rate": 5.326733668341709e-05, - "loss": 5.5183, - "step": 47017 - }, - { - "epoch": 24.520469361147327, - "grad_norm": 1.640350580215454, - "learning_rate": 5.3266331658291455e-05, - "loss": 4.511, - "step": 47018 - }, - { - "epoch": 24.520990873533247, - "grad_norm": 1.6884610652923584, - "learning_rate": 5.326532663316583e-05, - "loss": 4.744, - "step": 47019 - }, - { - "epoch": 24.521512385919166, - "grad_norm": 1.608212947845459, - "learning_rate": 5.3264321608040204e-05, - "loss": 4.7739, - "step": 47020 - }, - { - "epoch": 24.522033898305086, - "grad_norm": 1.5452059507369995, - "learning_rate": 5.326331658291458e-05, - "loss": 5.4324, - "step": 47021 - }, - { - "epoch": 24.522555410691005, - "grad_norm": 1.6619318723678589, - "learning_rate": 5.3262311557788946e-05, - "loss": 5.1558, - "step": 47022 - }, - { - "epoch": 24.523076923076925, - "grad_norm": 1.5675123929977417, - "learning_rate": 5.3261306532663324e-05, - "loss": 5.4826, - "step": 47023 - }, - { - "epoch": 24.52359843546284, - "grad_norm": 1.5339269638061523, - "learning_rate": 5.326030150753769e-05, - "loss": 5.4646, - "step": 47024 - }, - { - "epoch": 24.52411994784876, - "grad_norm": 1.5070695877075195, - "learning_rate": 5.325929648241206e-05, - "loss": 5.389, - "step": 47025 - }, - { - "epoch": 24.52464146023468, - "grad_norm": 1.5428792238235474, - "learning_rate": 5.325829145728644e-05, - "loss": 5.4542, - "step": 47026 - }, - { - "epoch": 24.5251629726206, - "grad_norm": 1.47152841091156, - "learning_rate": 5.32572864321608e-05, - "loss": 5.2133, - "step": 47027 - }, - { - "epoch": 24.52568448500652, - "grad_norm": 1.6174442768096924, - "learning_rate": 5.325628140703518e-05, - "loss": 5.3646, - "step": 47028 - }, - { - "epoch": 24.526205997392438, - "grad_norm": 1.4819834232330322, - "learning_rate": 5.3255276381909544e-05, - "loss": 5.4802, - "step": 47029 - }, - { - "epoch": 24.526727509778357, - "grad_norm": 1.5882633924484253, - "learning_rate": 5.325427135678392e-05, - "loss": 5.0458, - "step": 47030 - }, - { - "epoch": 24.527249022164277, - "grad_norm": 1.5190577507019043, - "learning_rate": 5.325326633165829e-05, - "loss": 4.7818, - "step": 47031 - }, - { - "epoch": 24.527770534550196, - "grad_norm": 1.5614604949951172, - "learning_rate": 5.325226130653267e-05, - "loss": 4.9924, - "step": 47032 - }, - { - "epoch": 24.528292046936116, - "grad_norm": 1.5994915962219238, - "learning_rate": 5.3251256281407035e-05, - "loss": 5.3276, - "step": 47033 - }, - { - "epoch": 24.528813559322035, - "grad_norm": 1.6464288234710693, - "learning_rate": 5.325025125628141e-05, - "loss": 4.6116, - "step": 47034 - }, - { - "epoch": 24.529335071707955, - "grad_norm": 1.488739013671875, - "learning_rate": 5.3249246231155783e-05, - "loss": 5.2183, - "step": 47035 - }, - { - "epoch": 24.52985658409387, - "grad_norm": 1.572550892829895, - "learning_rate": 5.324824120603016e-05, - "loss": 5.3221, - "step": 47036 - }, - { - "epoch": 24.53037809647979, - "grad_norm": 1.6037341356277466, - "learning_rate": 5.3247236180904526e-05, - "loss": 5.4404, - "step": 47037 - }, - { - "epoch": 24.53089960886571, - "grad_norm": 1.5635998249053955, - "learning_rate": 5.324623115577889e-05, - "loss": 5.3136, - "step": 47038 - }, - { - "epoch": 24.53142112125163, - "grad_norm": 1.643830418586731, - "learning_rate": 5.324522613065327e-05, - "loss": 5.7225, - "step": 47039 - }, - { - "epoch": 24.53194263363755, - "grad_norm": 1.6241387128829956, - "learning_rate": 5.324422110552764e-05, - "loss": 4.8485, - "step": 47040 - }, - { - "epoch": 24.532464146023468, - "grad_norm": 1.5629453659057617, - "learning_rate": 5.3243216080402017e-05, - "loss": 5.4526, - "step": 47041 - }, - { - "epoch": 24.532985658409387, - "grad_norm": 1.5443285703659058, - "learning_rate": 5.324221105527638e-05, - "loss": 5.0476, - "step": 47042 - }, - { - "epoch": 24.533507170795307, - "grad_norm": 1.6248217821121216, - "learning_rate": 5.324120603015076e-05, - "loss": 5.2419, - "step": 47043 - }, - { - "epoch": 24.534028683181226, - "grad_norm": 1.6189706325531006, - "learning_rate": 5.324020100502512e-05, - "loss": 5.2651, - "step": 47044 - }, - { - "epoch": 24.534550195567146, - "grad_norm": 1.5307016372680664, - "learning_rate": 5.32391959798995e-05, - "loss": 5.1806, - "step": 47045 - }, - { - "epoch": 24.535071707953065, - "grad_norm": 1.5677441358566284, - "learning_rate": 5.323819095477387e-05, - "loss": 5.1639, - "step": 47046 - }, - { - "epoch": 24.53559322033898, - "grad_norm": 1.5195235013961792, - "learning_rate": 5.323718592964825e-05, - "loss": 5.376, - "step": 47047 - }, - { - "epoch": 24.5361147327249, - "grad_norm": 1.6116783618927002, - "learning_rate": 5.3236180904522614e-05, - "loss": 4.935, - "step": 47048 - }, - { - "epoch": 24.53663624511082, - "grad_norm": 1.5301162004470825, - "learning_rate": 5.323517587939699e-05, - "loss": 5.4768, - "step": 47049 - }, - { - "epoch": 24.53715775749674, - "grad_norm": 1.6664265394210815, - "learning_rate": 5.3234170854271356e-05, - "loss": 5.2695, - "step": 47050 - }, - { - "epoch": 24.53767926988266, - "grad_norm": 1.705824613571167, - "learning_rate": 5.323316582914573e-05, - "loss": 4.9167, - "step": 47051 - }, - { - "epoch": 24.53820078226858, - "grad_norm": 1.6121060848236084, - "learning_rate": 5.3232160804020105e-05, - "loss": 5.4096, - "step": 47052 - }, - { - "epoch": 24.538722294654498, - "grad_norm": 1.6011855602264404, - "learning_rate": 5.323115577889447e-05, - "loss": 5.1289, - "step": 47053 - }, - { - "epoch": 24.539243807040418, - "grad_norm": 1.5259665250778198, - "learning_rate": 5.323015075376885e-05, - "loss": 5.5757, - "step": 47054 - }, - { - "epoch": 24.539765319426337, - "grad_norm": 1.5274022817611694, - "learning_rate": 5.322914572864322e-05, - "loss": 5.1348, - "step": 47055 - }, - { - "epoch": 24.540286831812256, - "grad_norm": 1.5356996059417725, - "learning_rate": 5.3228140703517596e-05, - "loss": 5.4456, - "step": 47056 - }, - { - "epoch": 24.540808344198176, - "grad_norm": 1.562803030014038, - "learning_rate": 5.322713567839196e-05, - "loss": 5.1263, - "step": 47057 - }, - { - "epoch": 24.541329856584095, - "grad_norm": 1.5661804676055908, - "learning_rate": 5.322613065326634e-05, - "loss": 5.6693, - "step": 47058 - }, - { - "epoch": 24.541851368970015, - "grad_norm": 1.4674652814865112, - "learning_rate": 5.32251256281407e-05, - "loss": 5.1049, - "step": 47059 - }, - { - "epoch": 24.54237288135593, - "grad_norm": 1.8363059759140015, - "learning_rate": 5.322412060301508e-05, - "loss": 4.9358, - "step": 47060 - }, - { - "epoch": 24.54289439374185, - "grad_norm": 1.4799633026123047, - "learning_rate": 5.322311557788945e-05, - "loss": 5.1808, - "step": 47061 - }, - { - "epoch": 24.54341590612777, - "grad_norm": 1.532515287399292, - "learning_rate": 5.322211055276383e-05, - "loss": 5.0203, - "step": 47062 - }, - { - "epoch": 24.54393741851369, - "grad_norm": 1.574811577796936, - "learning_rate": 5.322110552763819e-05, - "loss": 5.1823, - "step": 47063 - }, - { - "epoch": 24.54445893089961, - "grad_norm": 1.5390830039978027, - "learning_rate": 5.322010050251256e-05, - "loss": 5.4646, - "step": 47064 - }, - { - "epoch": 24.544980443285528, - "grad_norm": 1.6037873029708862, - "learning_rate": 5.3219095477386935e-05, - "loss": 4.8655, - "step": 47065 - }, - { - "epoch": 24.545501955671448, - "grad_norm": 1.6616650819778442, - "learning_rate": 5.3218090452261306e-05, - "loss": 5.3843, - "step": 47066 - }, - { - "epoch": 24.546023468057367, - "grad_norm": 1.6359963417053223, - "learning_rate": 5.3217085427135684e-05, - "loss": 4.9845, - "step": 47067 - }, - { - "epoch": 24.546544980443286, - "grad_norm": 1.556873083114624, - "learning_rate": 5.321608040201005e-05, - "loss": 5.5559, - "step": 47068 - }, - { - "epoch": 24.547066492829206, - "grad_norm": 1.5560230016708374, - "learning_rate": 5.3215075376884426e-05, - "loss": 5.5444, - "step": 47069 - }, - { - "epoch": 24.547588005215125, - "grad_norm": 1.5889887809753418, - "learning_rate": 5.321407035175879e-05, - "loss": 5.1498, - "step": 47070 - }, - { - "epoch": 24.54810951760104, - "grad_norm": 1.5852190256118774, - "learning_rate": 5.321306532663317e-05, - "loss": 4.8279, - "step": 47071 - }, - { - "epoch": 24.54863102998696, - "grad_norm": 1.4976025819778442, - "learning_rate": 5.321206030150754e-05, - "loss": 5.5543, - "step": 47072 - }, - { - "epoch": 24.54915254237288, - "grad_norm": 1.601387619972229, - "learning_rate": 5.321105527638192e-05, - "loss": 5.1481, - "step": 47073 - }, - { - "epoch": 24.5496740547588, - "grad_norm": 1.5435199737548828, - "learning_rate": 5.321005025125628e-05, - "loss": 5.4969, - "step": 47074 - }, - { - "epoch": 24.55019556714472, - "grad_norm": 1.5487394332885742, - "learning_rate": 5.320904522613066e-05, - "loss": 4.6123, - "step": 47075 - }, - { - "epoch": 24.55071707953064, - "grad_norm": 1.6220908164978027, - "learning_rate": 5.320804020100503e-05, - "loss": 5.0785, - "step": 47076 - }, - { - "epoch": 24.551238591916558, - "grad_norm": 1.6250853538513184, - "learning_rate": 5.320703517587941e-05, - "loss": 5.0159, - "step": 47077 - }, - { - "epoch": 24.551760104302478, - "grad_norm": 1.5931196212768555, - "learning_rate": 5.320603015075377e-05, - "loss": 5.1938, - "step": 47078 - }, - { - "epoch": 24.552281616688397, - "grad_norm": 1.4731366634368896, - "learning_rate": 5.320502512562814e-05, - "loss": 5.3705, - "step": 47079 - }, - { - "epoch": 24.552803129074317, - "grad_norm": 1.5003907680511475, - "learning_rate": 5.3204020100502515e-05, - "loss": 5.0554, - "step": 47080 - }, - { - "epoch": 24.553324641460236, - "grad_norm": 1.6551129817962646, - "learning_rate": 5.3203015075376886e-05, - "loss": 5.0101, - "step": 47081 - }, - { - "epoch": 24.553846153846155, - "grad_norm": 1.5449728965759277, - "learning_rate": 5.3202010050251264e-05, - "loss": 5.5557, - "step": 47082 - }, - { - "epoch": 24.55436766623207, - "grad_norm": 1.7259401082992554, - "learning_rate": 5.320100502512563e-05, - "loss": 4.6966, - "step": 47083 - }, - { - "epoch": 24.55488917861799, - "grad_norm": 1.4797067642211914, - "learning_rate": 5.3200000000000006e-05, - "loss": 5.3778, - "step": 47084 - }, - { - "epoch": 24.55541069100391, - "grad_norm": 1.5345033407211304, - "learning_rate": 5.319899497487437e-05, - "loss": 5.3239, - "step": 47085 - }, - { - "epoch": 24.55593220338983, - "grad_norm": 1.6992316246032715, - "learning_rate": 5.319798994974875e-05, - "loss": 5.1966, - "step": 47086 - }, - { - "epoch": 24.55645371577575, - "grad_norm": 1.568390130996704, - "learning_rate": 5.319698492462312e-05, - "loss": 5.369, - "step": 47087 - }, - { - "epoch": 24.55697522816167, - "grad_norm": 1.5634956359863281, - "learning_rate": 5.31959798994975e-05, - "loss": 5.3814, - "step": 47088 - }, - { - "epoch": 24.557496740547588, - "grad_norm": 1.561781644821167, - "learning_rate": 5.319497487437186e-05, - "loss": 5.3529, - "step": 47089 - }, - { - "epoch": 24.558018252933508, - "grad_norm": 1.6695263385772705, - "learning_rate": 5.319396984924624e-05, - "loss": 5.0248, - "step": 47090 - }, - { - "epoch": 24.558539765319427, - "grad_norm": 1.7321109771728516, - "learning_rate": 5.31929648241206e-05, - "loss": 5.1224, - "step": 47091 - }, - { - "epoch": 24.559061277705347, - "grad_norm": 1.6410990953445435, - "learning_rate": 5.3191959798994974e-05, - "loss": 5.2466, - "step": 47092 - }, - { - "epoch": 24.559582790091266, - "grad_norm": 1.5850460529327393, - "learning_rate": 5.319095477386935e-05, - "loss": 5.0406, - "step": 47093 - }, - { - "epoch": 24.560104302477185, - "grad_norm": 1.6447933912277222, - "learning_rate": 5.3189949748743716e-05, - "loss": 4.9844, - "step": 47094 - }, - { - "epoch": 24.5606258148631, - "grad_norm": 1.5462090969085693, - "learning_rate": 5.3188944723618094e-05, - "loss": 5.3696, - "step": 47095 - }, - { - "epoch": 24.56114732724902, - "grad_norm": 1.6251468658447266, - "learning_rate": 5.318793969849246e-05, - "loss": 5.3425, - "step": 47096 - }, - { - "epoch": 24.56166883963494, - "grad_norm": 1.639073371887207, - "learning_rate": 5.3186934673366836e-05, - "loss": 5.0156, - "step": 47097 - }, - { - "epoch": 24.56219035202086, - "grad_norm": 1.6349979639053345, - "learning_rate": 5.318592964824121e-05, - "loss": 5.0279, - "step": 47098 - }, - { - "epoch": 24.56271186440678, - "grad_norm": 1.6422098875045776, - "learning_rate": 5.3184924623115585e-05, - "loss": 5.1782, - "step": 47099 - }, - { - "epoch": 24.5632333767927, - "grad_norm": 1.6538772583007812, - "learning_rate": 5.318391959798995e-05, - "loss": 5.0991, - "step": 47100 - }, - { - "epoch": 24.563754889178618, - "grad_norm": 1.6344274282455444, - "learning_rate": 5.318291457286433e-05, - "loss": 5.2534, - "step": 47101 - }, - { - "epoch": 24.564276401564538, - "grad_norm": 1.5293092727661133, - "learning_rate": 5.31819095477387e-05, - "loss": 4.9854, - "step": 47102 - }, - { - "epoch": 24.564797913950457, - "grad_norm": 1.5208077430725098, - "learning_rate": 5.3180904522613076e-05, - "loss": 5.4248, - "step": 47103 - }, - { - "epoch": 24.565319426336377, - "grad_norm": 1.5405877828598022, - "learning_rate": 5.317989949748744e-05, - "loss": 5.321, - "step": 47104 - }, - { - "epoch": 24.565840938722296, - "grad_norm": 1.4832463264465332, - "learning_rate": 5.3178894472361805e-05, - "loss": 5.2499, - "step": 47105 - }, - { - "epoch": 24.566362451108215, - "grad_norm": 1.541253924369812, - "learning_rate": 5.317788944723618e-05, - "loss": 5.3368, - "step": 47106 - }, - { - "epoch": 24.56688396349413, - "grad_norm": 1.5816291570663452, - "learning_rate": 5.317688442211055e-05, - "loss": 4.772, - "step": 47107 - }, - { - "epoch": 24.56740547588005, - "grad_norm": 1.488073706626892, - "learning_rate": 5.317587939698493e-05, - "loss": 4.7429, - "step": 47108 - }, - { - "epoch": 24.56792698826597, - "grad_norm": 1.603676438331604, - "learning_rate": 5.3174874371859295e-05, - "loss": 5.2272, - "step": 47109 - }, - { - "epoch": 24.56844850065189, - "grad_norm": 1.685495138168335, - "learning_rate": 5.317386934673367e-05, - "loss": 4.9442, - "step": 47110 - }, - { - "epoch": 24.56897001303781, - "grad_norm": 1.5859094858169556, - "learning_rate": 5.317286432160804e-05, - "loss": 5.2428, - "step": 47111 - }, - { - "epoch": 24.56949152542373, - "grad_norm": 1.5397531986236572, - "learning_rate": 5.3171859296482415e-05, - "loss": 5.2452, - "step": 47112 - }, - { - "epoch": 24.570013037809648, - "grad_norm": 1.5330406427383423, - "learning_rate": 5.3170854271356786e-05, - "loss": 5.435, - "step": 47113 - }, - { - "epoch": 24.570534550195568, - "grad_norm": 1.594334602355957, - "learning_rate": 5.3169849246231164e-05, - "loss": 5.2751, - "step": 47114 - }, - { - "epoch": 24.571056062581487, - "grad_norm": 1.499260663986206, - "learning_rate": 5.316884422110553e-05, - "loss": 5.4131, - "step": 47115 - }, - { - "epoch": 24.571577574967407, - "grad_norm": 1.531864881515503, - "learning_rate": 5.3167839195979906e-05, - "loss": 4.636, - "step": 47116 - }, - { - "epoch": 24.572099087353326, - "grad_norm": 1.588198184967041, - "learning_rate": 5.316683417085427e-05, - "loss": 4.982, - "step": 47117 - }, - { - "epoch": 24.572620599739246, - "grad_norm": 1.59525465965271, - "learning_rate": 5.316582914572864e-05, - "loss": 5.087, - "step": 47118 - }, - { - "epoch": 24.57314211212516, - "grad_norm": 1.6753292083740234, - "learning_rate": 5.316482412060302e-05, - "loss": 5.3368, - "step": 47119 - }, - { - "epoch": 24.57366362451108, - "grad_norm": 1.641148328781128, - "learning_rate": 5.3163819095477384e-05, - "loss": 4.9193, - "step": 47120 - }, - { - "epoch": 24.574185136897, - "grad_norm": 1.5460728406906128, - "learning_rate": 5.316281407035176e-05, - "loss": 5.2461, - "step": 47121 - }, - { - "epoch": 24.57470664928292, - "grad_norm": 1.5380750894546509, - "learning_rate": 5.316180904522613e-05, - "loss": 5.2758, - "step": 47122 - }, - { - "epoch": 24.57522816166884, - "grad_norm": 1.5853967666625977, - "learning_rate": 5.316080402010051e-05, - "loss": 4.9809, - "step": 47123 - }, - { - "epoch": 24.57574967405476, - "grad_norm": 1.4907805919647217, - "learning_rate": 5.3159798994974875e-05, - "loss": 4.5194, - "step": 47124 - }, - { - "epoch": 24.576271186440678, - "grad_norm": 1.5488293170928955, - "learning_rate": 5.315879396984925e-05, - "loss": 5.256, - "step": 47125 - }, - { - "epoch": 24.576792698826598, - "grad_norm": 1.6093873977661133, - "learning_rate": 5.315778894472362e-05, - "loss": 5.623, - "step": 47126 - }, - { - "epoch": 24.577314211212517, - "grad_norm": 1.5698741674423218, - "learning_rate": 5.3156783919597995e-05, - "loss": 5.1486, - "step": 47127 - }, - { - "epoch": 24.577835723598437, - "grad_norm": 1.5810225009918213, - "learning_rate": 5.3155778894472366e-05, - "loss": 5.387, - "step": 47128 - }, - { - "epoch": 24.578357235984356, - "grad_norm": 1.5035818815231323, - "learning_rate": 5.3154773869346744e-05, - "loss": 4.7217, - "step": 47129 - }, - { - "epoch": 24.578878748370272, - "grad_norm": 1.5102927684783936, - "learning_rate": 5.315376884422111e-05, - "loss": 5.1896, - "step": 47130 - }, - { - "epoch": 24.57940026075619, - "grad_norm": 1.4402732849121094, - "learning_rate": 5.315276381909547e-05, - "loss": 5.6598, - "step": 47131 - }, - { - "epoch": 24.57992177314211, - "grad_norm": 1.523047685623169, - "learning_rate": 5.315175879396985e-05, - "loss": 5.1284, - "step": 47132 - }, - { - "epoch": 24.58044328552803, - "grad_norm": 1.5802021026611328, - "learning_rate": 5.315075376884422e-05, - "loss": 5.1942, - "step": 47133 - }, - { - "epoch": 24.58096479791395, - "grad_norm": 1.6296401023864746, - "learning_rate": 5.31497487437186e-05, - "loss": 5.2271, - "step": 47134 - }, - { - "epoch": 24.58148631029987, - "grad_norm": 1.530971884727478, - "learning_rate": 5.314874371859296e-05, - "loss": 5.0256, - "step": 47135 - }, - { - "epoch": 24.58200782268579, - "grad_norm": 1.6363250017166138, - "learning_rate": 5.314773869346734e-05, - "loss": 5.3741, - "step": 47136 - }, - { - "epoch": 24.58252933507171, - "grad_norm": 1.5470699071884155, - "learning_rate": 5.3146733668341705e-05, - "loss": 5.9234, - "step": 47137 - }, - { - "epoch": 24.583050847457628, - "grad_norm": 1.463914394378662, - "learning_rate": 5.314572864321608e-05, - "loss": 5.5869, - "step": 47138 - }, - { - "epoch": 24.583572359843547, - "grad_norm": 1.4803458452224731, - "learning_rate": 5.3144723618090454e-05, - "loss": 5.4209, - "step": 47139 - }, - { - "epoch": 24.584093872229467, - "grad_norm": 1.5428608655929565, - "learning_rate": 5.314371859296483e-05, - "loss": 5.3396, - "step": 47140 - }, - { - "epoch": 24.584615384615386, - "grad_norm": 1.56388521194458, - "learning_rate": 5.3142713567839196e-05, - "loss": 5.0289, - "step": 47141 - }, - { - "epoch": 24.585136897001306, - "grad_norm": 1.6431182622909546, - "learning_rate": 5.3141708542713574e-05, - "loss": 5.4726, - "step": 47142 - }, - { - "epoch": 24.58565840938722, - "grad_norm": 1.6031723022460938, - "learning_rate": 5.3140703517587945e-05, - "loss": 5.2736, - "step": 47143 - }, - { - "epoch": 24.58617992177314, - "grad_norm": 1.5232685804367065, - "learning_rate": 5.313969849246231e-05, - "loss": 5.3398, - "step": 47144 - }, - { - "epoch": 24.58670143415906, - "grad_norm": 1.6355420351028442, - "learning_rate": 5.313869346733669e-05, - "loss": 5.0655, - "step": 47145 - }, - { - "epoch": 24.58722294654498, - "grad_norm": 1.623210072517395, - "learning_rate": 5.313768844221105e-05, - "loss": 5.1625, - "step": 47146 - }, - { - "epoch": 24.5877444589309, - "grad_norm": 1.5997458696365356, - "learning_rate": 5.313668341708543e-05, - "loss": 4.9867, - "step": 47147 - }, - { - "epoch": 24.58826597131682, - "grad_norm": 1.5831611156463623, - "learning_rate": 5.31356783919598e-05, - "loss": 5.1526, - "step": 47148 - }, - { - "epoch": 24.58878748370274, - "grad_norm": 1.4715105295181274, - "learning_rate": 5.313467336683418e-05, - "loss": 5.5879, - "step": 47149 - }, - { - "epoch": 24.589308996088658, - "grad_norm": 1.566053867340088, - "learning_rate": 5.313366834170854e-05, - "loss": 4.9439, - "step": 47150 - }, - { - "epoch": 24.589830508474577, - "grad_norm": 1.6109719276428223, - "learning_rate": 5.313266331658292e-05, - "loss": 5.1987, - "step": 47151 - }, - { - "epoch": 24.590352020860497, - "grad_norm": 1.4973137378692627, - "learning_rate": 5.3131658291457285e-05, - "loss": 5.5718, - "step": 47152 - }, - { - "epoch": 24.590873533246416, - "grad_norm": 1.6253700256347656, - "learning_rate": 5.313065326633166e-05, - "loss": 4.3589, - "step": 47153 - }, - { - "epoch": 24.591395045632332, - "grad_norm": 1.5874892473220825, - "learning_rate": 5.3129648241206033e-05, - "loss": 5.2369, - "step": 47154 - }, - { - "epoch": 24.59191655801825, - "grad_norm": 1.5949761867523193, - "learning_rate": 5.312864321608041e-05, - "loss": 5.3937, - "step": 47155 - }, - { - "epoch": 24.59243807040417, - "grad_norm": 1.5417932271957397, - "learning_rate": 5.3127638190954776e-05, - "loss": 5.4357, - "step": 47156 - }, - { - "epoch": 24.59295958279009, - "grad_norm": 1.5004431009292603, - "learning_rate": 5.312663316582914e-05, - "loss": 5.2604, - "step": 47157 - }, - { - "epoch": 24.59348109517601, - "grad_norm": 1.5038899183273315, - "learning_rate": 5.312562814070352e-05, - "loss": 5.3881, - "step": 47158 - }, - { - "epoch": 24.59400260756193, - "grad_norm": 1.653290033340454, - "learning_rate": 5.312462311557789e-05, - "loss": 4.9258, - "step": 47159 - }, - { - "epoch": 24.59452411994785, - "grad_norm": 1.5257371664047241, - "learning_rate": 5.3123618090452267e-05, - "loss": 5.2646, - "step": 47160 - }, - { - "epoch": 24.59504563233377, - "grad_norm": 1.5981290340423584, - "learning_rate": 5.312261306532663e-05, - "loss": 5.4452, - "step": 47161 - }, - { - "epoch": 24.595567144719688, - "grad_norm": 1.5279515981674194, - "learning_rate": 5.312160804020101e-05, - "loss": 5.4959, - "step": 47162 - }, - { - "epoch": 24.596088657105607, - "grad_norm": 1.5072613954544067, - "learning_rate": 5.312060301507538e-05, - "loss": 5.453, - "step": 47163 - }, - { - "epoch": 24.596610169491527, - "grad_norm": 1.6152540445327759, - "learning_rate": 5.311959798994976e-05, - "loss": 5.4195, - "step": 47164 - }, - { - "epoch": 24.597131681877446, - "grad_norm": 1.6595503091812134, - "learning_rate": 5.311859296482412e-05, - "loss": 5.0994, - "step": 47165 - }, - { - "epoch": 24.597653194263362, - "grad_norm": 1.5254706144332886, - "learning_rate": 5.31175879396985e-05, - "loss": 5.2007, - "step": 47166 - }, - { - "epoch": 24.59817470664928, - "grad_norm": 1.5437471866607666, - "learning_rate": 5.3116582914572864e-05, - "loss": 5.2794, - "step": 47167 - }, - { - "epoch": 24.5986962190352, - "grad_norm": 1.5850486755371094, - "learning_rate": 5.311557788944724e-05, - "loss": 5.0089, - "step": 47168 - }, - { - "epoch": 24.59921773142112, - "grad_norm": 1.5807647705078125, - "learning_rate": 5.311457286432161e-05, - "loss": 5.0487, - "step": 47169 - }, - { - "epoch": 24.59973924380704, - "grad_norm": 1.6075592041015625, - "learning_rate": 5.311356783919599e-05, - "loss": 5.2585, - "step": 47170 - }, - { - "epoch": 24.60026075619296, - "grad_norm": 1.6043047904968262, - "learning_rate": 5.3112562814070355e-05, - "loss": 5.1537, - "step": 47171 - }, - { - "epoch": 24.60078226857888, - "grad_norm": 1.6553950309753418, - "learning_rate": 5.311155778894472e-05, - "loss": 5.244, - "step": 47172 - }, - { - "epoch": 24.6013037809648, - "grad_norm": 1.560715913772583, - "learning_rate": 5.31105527638191e-05, - "loss": 4.8832, - "step": 47173 - }, - { - "epoch": 24.601825293350718, - "grad_norm": 1.5508004426956177, - "learning_rate": 5.310954773869347e-05, - "loss": 5.155, - "step": 47174 - }, - { - "epoch": 24.602346805736637, - "grad_norm": 1.567783236503601, - "learning_rate": 5.3108542713567846e-05, - "loss": 5.4905, - "step": 47175 - }, - { - "epoch": 24.602868318122557, - "grad_norm": 1.4671134948730469, - "learning_rate": 5.310753768844221e-05, - "loss": 4.9243, - "step": 47176 - }, - { - "epoch": 24.603389830508476, - "grad_norm": 1.534091830253601, - "learning_rate": 5.310653266331659e-05, - "loss": 5.3797, - "step": 47177 - }, - { - "epoch": 24.603911342894392, - "grad_norm": 1.4277266263961792, - "learning_rate": 5.310552763819095e-05, - "loss": 5.4755, - "step": 47178 - }, - { - "epoch": 24.60443285528031, - "grad_norm": 1.4792784452438354, - "learning_rate": 5.310452261306533e-05, - "loss": 4.5365, - "step": 47179 - }, - { - "epoch": 24.60495436766623, - "grad_norm": 1.489902377128601, - "learning_rate": 5.31035175879397e-05, - "loss": 5.4915, - "step": 47180 - }, - { - "epoch": 24.60547588005215, - "grad_norm": 1.5528783798217773, - "learning_rate": 5.310251256281408e-05, - "loss": 5.3994, - "step": 47181 - }, - { - "epoch": 24.60599739243807, - "grad_norm": 1.6299142837524414, - "learning_rate": 5.310150753768844e-05, - "loss": 5.4266, - "step": 47182 - }, - { - "epoch": 24.60651890482399, - "grad_norm": 1.475597620010376, - "learning_rate": 5.310050251256282e-05, - "loss": 5.3714, - "step": 47183 - }, - { - "epoch": 24.60704041720991, - "grad_norm": 1.4581830501556396, - "learning_rate": 5.3099497487437185e-05, - "loss": 5.4902, - "step": 47184 - }, - { - "epoch": 24.60756192959583, - "grad_norm": 1.5175524950027466, - "learning_rate": 5.3098492462311556e-05, - "loss": 5.2732, - "step": 47185 - }, - { - "epoch": 24.608083441981748, - "grad_norm": 1.531785249710083, - "learning_rate": 5.3097487437185934e-05, - "loss": 5.3204, - "step": 47186 - }, - { - "epoch": 24.608604954367667, - "grad_norm": 1.693729281425476, - "learning_rate": 5.30964824120603e-05, - "loss": 4.7665, - "step": 47187 - }, - { - "epoch": 24.609126466753587, - "grad_norm": 1.577835202217102, - "learning_rate": 5.3095477386934676e-05, - "loss": 5.3026, - "step": 47188 - }, - { - "epoch": 24.609647979139506, - "grad_norm": 1.558287262916565, - "learning_rate": 5.309447236180905e-05, - "loss": 5.197, - "step": 47189 - }, - { - "epoch": 24.610169491525422, - "grad_norm": 1.5376355648040771, - "learning_rate": 5.3093467336683425e-05, - "loss": 5.0287, - "step": 47190 - }, - { - "epoch": 24.61069100391134, - "grad_norm": 1.4702706336975098, - "learning_rate": 5.309246231155779e-05, - "loss": 5.0335, - "step": 47191 - }, - { - "epoch": 24.61121251629726, - "grad_norm": 1.7605355978012085, - "learning_rate": 5.309145728643217e-05, - "loss": 5.0939, - "step": 47192 - }, - { - "epoch": 24.61173402868318, - "grad_norm": 1.7096079587936401, - "learning_rate": 5.309045226130653e-05, - "loss": 4.9587, - "step": 47193 - }, - { - "epoch": 24.6122555410691, - "grad_norm": 1.6876126527786255, - "learning_rate": 5.308944723618091e-05, - "loss": 5.319, - "step": 47194 - }, - { - "epoch": 24.61277705345502, - "grad_norm": 1.523714303970337, - "learning_rate": 5.308844221105528e-05, - "loss": 5.2504, - "step": 47195 - }, - { - "epoch": 24.61329856584094, - "grad_norm": 1.5173941850662231, - "learning_rate": 5.308743718592966e-05, - "loss": 5.0652, - "step": 47196 - }, - { - "epoch": 24.61382007822686, - "grad_norm": 1.5298278331756592, - "learning_rate": 5.308643216080402e-05, - "loss": 5.2781, - "step": 47197 - }, - { - "epoch": 24.614341590612778, - "grad_norm": 1.6060129404067993, - "learning_rate": 5.308542713567839e-05, - "loss": 4.8044, - "step": 47198 - }, - { - "epoch": 24.614863102998697, - "grad_norm": 1.4899954795837402, - "learning_rate": 5.3084422110552765e-05, - "loss": 4.6245, - "step": 47199 - }, - { - "epoch": 24.615384615384617, - "grad_norm": 1.5288375616073608, - "learning_rate": 5.3083417085427136e-05, - "loss": 5.355, - "step": 47200 - }, - { - "epoch": 24.615906127770536, - "grad_norm": 1.6281635761260986, - "learning_rate": 5.3082412060301514e-05, - "loss": 4.9669, - "step": 47201 - }, - { - "epoch": 24.616427640156452, - "grad_norm": 1.6428015232086182, - "learning_rate": 5.308140703517588e-05, - "loss": 5.2924, - "step": 47202 - }, - { - "epoch": 24.61694915254237, - "grad_norm": 1.6521916389465332, - "learning_rate": 5.3080402010050256e-05, - "loss": 5.2068, - "step": 47203 - }, - { - "epoch": 24.61747066492829, - "grad_norm": 1.627158761024475, - "learning_rate": 5.307939698492462e-05, - "loss": 5.3274, - "step": 47204 - }, - { - "epoch": 24.61799217731421, - "grad_norm": 1.4687442779541016, - "learning_rate": 5.3078391959799e-05, - "loss": 5.6754, - "step": 47205 - }, - { - "epoch": 24.61851368970013, - "grad_norm": 1.5469396114349365, - "learning_rate": 5.307738693467337e-05, - "loss": 5.406, - "step": 47206 - }, - { - "epoch": 24.61903520208605, - "grad_norm": 1.6229835748672485, - "learning_rate": 5.307638190954775e-05, - "loss": 5.2833, - "step": 47207 - }, - { - "epoch": 24.61955671447197, - "grad_norm": 1.4443403482437134, - "learning_rate": 5.307537688442211e-05, - "loss": 5.4767, - "step": 47208 - }, - { - "epoch": 24.62007822685789, - "grad_norm": 1.721652626991272, - "learning_rate": 5.307437185929649e-05, - "loss": 5.2897, - "step": 47209 - }, - { - "epoch": 24.620599739243808, - "grad_norm": 1.523107647895813, - "learning_rate": 5.307336683417086e-05, - "loss": 5.4533, - "step": 47210 - }, - { - "epoch": 24.621121251629727, - "grad_norm": 1.5523746013641357, - "learning_rate": 5.3072361809045224e-05, - "loss": 5.2791, - "step": 47211 - }, - { - "epoch": 24.621642764015647, - "grad_norm": 1.5223809480667114, - "learning_rate": 5.30713567839196e-05, - "loss": 5.2408, - "step": 47212 - }, - { - "epoch": 24.622164276401566, - "grad_norm": 1.5237787961959839, - "learning_rate": 5.3070351758793966e-05, - "loss": 5.4868, - "step": 47213 - }, - { - "epoch": 24.622685788787482, - "grad_norm": 1.4946768283843994, - "learning_rate": 5.3069346733668344e-05, - "loss": 5.4956, - "step": 47214 - }, - { - "epoch": 24.6232073011734, - "grad_norm": 1.7775529623031616, - "learning_rate": 5.3068341708542715e-05, - "loss": 5.4592, - "step": 47215 - }, - { - "epoch": 24.62372881355932, - "grad_norm": 1.7927982807159424, - "learning_rate": 5.306733668341709e-05, - "loss": 5.2087, - "step": 47216 - }, - { - "epoch": 24.62425032594524, - "grad_norm": 1.5073119401931763, - "learning_rate": 5.306633165829146e-05, - "loss": 5.3644, - "step": 47217 - }, - { - "epoch": 24.62477183833116, - "grad_norm": 1.648723840713501, - "learning_rate": 5.3065326633165835e-05, - "loss": 5.2285, - "step": 47218 - }, - { - "epoch": 24.62529335071708, - "grad_norm": 1.5113118886947632, - "learning_rate": 5.30643216080402e-05, - "loss": 5.626, - "step": 47219 - }, - { - "epoch": 24.625814863103, - "grad_norm": 1.5916692018508911, - "learning_rate": 5.306331658291458e-05, - "loss": 4.2101, - "step": 47220 - }, - { - "epoch": 24.62633637548892, - "grad_norm": 1.5326213836669922, - "learning_rate": 5.306231155778895e-05, - "loss": 4.7042, - "step": 47221 - }, - { - "epoch": 24.626857887874838, - "grad_norm": 1.599189043045044, - "learning_rate": 5.3061306532663326e-05, - "loss": 5.1577, - "step": 47222 - }, - { - "epoch": 24.627379400260757, - "grad_norm": 1.583525538444519, - "learning_rate": 5.306030150753769e-05, - "loss": 5.2441, - "step": 47223 - }, - { - "epoch": 24.627900912646677, - "grad_norm": 1.5252430438995361, - "learning_rate": 5.3059296482412055e-05, - "loss": 5.708, - "step": 47224 - }, - { - "epoch": 24.628422425032596, - "grad_norm": 1.4827157258987427, - "learning_rate": 5.305829145728643e-05, - "loss": 5.1982, - "step": 47225 - }, - { - "epoch": 24.628943937418512, - "grad_norm": 1.5864847898483276, - "learning_rate": 5.3057286432160803e-05, - "loss": 5.2547, - "step": 47226 - }, - { - "epoch": 24.62946544980443, - "grad_norm": 1.5038081407546997, - "learning_rate": 5.305628140703518e-05, - "loss": 4.8134, - "step": 47227 - }, - { - "epoch": 24.62998696219035, - "grad_norm": 1.6411970853805542, - "learning_rate": 5.3055276381909546e-05, - "loss": 4.5142, - "step": 47228 - }, - { - "epoch": 24.63050847457627, - "grad_norm": 1.5785592794418335, - "learning_rate": 5.305427135678392e-05, - "loss": 4.997, - "step": 47229 - }, - { - "epoch": 24.63102998696219, - "grad_norm": 1.6743130683898926, - "learning_rate": 5.3053266331658294e-05, - "loss": 5.2298, - "step": 47230 - }, - { - "epoch": 24.63155149934811, - "grad_norm": 1.6261507272720337, - "learning_rate": 5.305226130653267e-05, - "loss": 5.495, - "step": 47231 - }, - { - "epoch": 24.63207301173403, - "grad_norm": 1.5224404335021973, - "learning_rate": 5.3051256281407036e-05, - "loss": 4.9609, - "step": 47232 - }, - { - "epoch": 24.63259452411995, - "grad_norm": 1.4874696731567383, - "learning_rate": 5.3050251256281414e-05, - "loss": 5.5073, - "step": 47233 - }, - { - "epoch": 24.633116036505868, - "grad_norm": 1.6006276607513428, - "learning_rate": 5.304924623115578e-05, - "loss": 4.5828, - "step": 47234 - }, - { - "epoch": 24.633637548891787, - "grad_norm": 1.6693974733352661, - "learning_rate": 5.3048241206030156e-05, - "loss": 4.8457, - "step": 47235 - }, - { - "epoch": 24.634159061277707, - "grad_norm": 1.506693720817566, - "learning_rate": 5.304723618090453e-05, - "loss": 5.1673, - "step": 47236 - }, - { - "epoch": 24.634680573663623, - "grad_norm": 1.5398550033569336, - "learning_rate": 5.304623115577889e-05, - "loss": 4.24, - "step": 47237 - }, - { - "epoch": 24.635202086049542, - "grad_norm": 1.5314178466796875, - "learning_rate": 5.304522613065327e-05, - "loss": 5.4885, - "step": 47238 - }, - { - "epoch": 24.63572359843546, - "grad_norm": 1.5919532775878906, - "learning_rate": 5.3044221105527634e-05, - "loss": 4.9412, - "step": 47239 - }, - { - "epoch": 24.63624511082138, - "grad_norm": 1.5714800357818604, - "learning_rate": 5.304321608040201e-05, - "loss": 5.1706, - "step": 47240 - }, - { - "epoch": 24.6367666232073, - "grad_norm": 1.5271435976028442, - "learning_rate": 5.304221105527638e-05, - "loss": 5.5402, - "step": 47241 - }, - { - "epoch": 24.63728813559322, - "grad_norm": 1.5567569732666016, - "learning_rate": 5.304120603015076e-05, - "loss": 5.1546, - "step": 47242 - }, - { - "epoch": 24.63780964797914, - "grad_norm": 1.656408429145813, - "learning_rate": 5.3040201005025125e-05, - "loss": 5.3986, - "step": 47243 - }, - { - "epoch": 24.63833116036506, - "grad_norm": 1.6177750825881958, - "learning_rate": 5.30391959798995e-05, - "loss": 5.27, - "step": 47244 - }, - { - "epoch": 24.63885267275098, - "grad_norm": 1.5970677137374878, - "learning_rate": 5.303819095477387e-05, - "loss": 5.2944, - "step": 47245 - }, - { - "epoch": 24.639374185136898, - "grad_norm": 1.4506051540374756, - "learning_rate": 5.3037185929648245e-05, - "loss": 5.5297, - "step": 47246 - }, - { - "epoch": 24.639895697522817, - "grad_norm": 1.6511797904968262, - "learning_rate": 5.3036180904522616e-05, - "loss": 4.8308, - "step": 47247 - }, - { - "epoch": 24.640417209908737, - "grad_norm": 1.5718356370925903, - "learning_rate": 5.3035175879396994e-05, - "loss": 5.1075, - "step": 47248 - }, - { - "epoch": 24.640938722294656, - "grad_norm": 1.768456220626831, - "learning_rate": 5.303417085427136e-05, - "loss": 5.164, - "step": 47249 - }, - { - "epoch": 24.641460234680572, - "grad_norm": 1.6422250270843506, - "learning_rate": 5.3033165829145736e-05, - "loss": 5.2793, - "step": 47250 - }, - { - "epoch": 24.64198174706649, - "grad_norm": 1.6708883047103882, - "learning_rate": 5.303216080402011e-05, - "loss": 5.0487, - "step": 47251 - }, - { - "epoch": 24.64250325945241, - "grad_norm": 1.6016300916671753, - "learning_rate": 5.303115577889447e-05, - "loss": 5.2212, - "step": 47252 - }, - { - "epoch": 24.64302477183833, - "grad_norm": 1.5365461111068726, - "learning_rate": 5.303015075376885e-05, - "loss": 5.5075, - "step": 47253 - }, - { - "epoch": 24.64354628422425, - "grad_norm": 1.605668306350708, - "learning_rate": 5.302914572864321e-05, - "loss": 5.0429, - "step": 47254 - }, - { - "epoch": 24.64406779661017, - "grad_norm": 1.4853752851486206, - "learning_rate": 5.302814070351759e-05, - "loss": 5.3686, - "step": 47255 - }, - { - "epoch": 24.64458930899609, - "grad_norm": 1.6953967809677124, - "learning_rate": 5.302713567839196e-05, - "loss": 5.1381, - "step": 47256 - }, - { - "epoch": 24.64511082138201, - "grad_norm": 1.5825589895248413, - "learning_rate": 5.302613065326634e-05, - "loss": 5.2158, - "step": 47257 - }, - { - "epoch": 24.645632333767928, - "grad_norm": 1.7015743255615234, - "learning_rate": 5.3025125628140704e-05, - "loss": 4.8415, - "step": 47258 - }, - { - "epoch": 24.646153846153847, - "grad_norm": 1.5502547025680542, - "learning_rate": 5.302412060301508e-05, - "loss": 5.2807, - "step": 47259 - }, - { - "epoch": 24.646675358539767, - "grad_norm": 2.1508536338806152, - "learning_rate": 5.3023115577889446e-05, - "loss": 5.3073, - "step": 47260 - }, - { - "epoch": 24.647196870925683, - "grad_norm": 1.6809701919555664, - "learning_rate": 5.3022110552763824e-05, - "loss": 5.1485, - "step": 47261 - }, - { - "epoch": 24.647718383311602, - "grad_norm": 1.5775620937347412, - "learning_rate": 5.3021105527638195e-05, - "loss": 5.0698, - "step": 47262 - }, - { - "epoch": 24.64823989569752, - "grad_norm": 1.5737042427062988, - "learning_rate": 5.302010050251257e-05, - "loss": 5.5859, - "step": 47263 - }, - { - "epoch": 24.64876140808344, - "grad_norm": 1.551084041595459, - "learning_rate": 5.301909547738694e-05, - "loss": 5.1445, - "step": 47264 - }, - { - "epoch": 24.64928292046936, - "grad_norm": 1.7412413358688354, - "learning_rate": 5.30180904522613e-05, - "loss": 4.7423, - "step": 47265 - }, - { - "epoch": 24.64980443285528, - "grad_norm": 1.522901177406311, - "learning_rate": 5.301708542713568e-05, - "loss": 5.4326, - "step": 47266 - }, - { - "epoch": 24.6503259452412, - "grad_norm": 1.7019038200378418, - "learning_rate": 5.301608040201005e-05, - "loss": 5.111, - "step": 47267 - }, - { - "epoch": 24.65084745762712, - "grad_norm": 1.5494191646575928, - "learning_rate": 5.301507537688443e-05, - "loss": 5.064, - "step": 47268 - }, - { - "epoch": 24.65136897001304, - "grad_norm": 1.7355725765228271, - "learning_rate": 5.301407035175879e-05, - "loss": 5.16, - "step": 47269 - }, - { - "epoch": 24.651890482398958, - "grad_norm": 1.4748327732086182, - "learning_rate": 5.301306532663317e-05, - "loss": 5.4889, - "step": 47270 - }, - { - "epoch": 24.652411994784877, - "grad_norm": 1.5693140029907227, - "learning_rate": 5.3012060301507535e-05, - "loss": 4.6313, - "step": 47271 - }, - { - "epoch": 24.652933507170797, - "grad_norm": 1.4056451320648193, - "learning_rate": 5.301105527638191e-05, - "loss": 5.9091, - "step": 47272 - }, - { - "epoch": 24.653455019556713, - "grad_norm": 1.6304889917373657, - "learning_rate": 5.3010050251256283e-05, - "loss": 4.8851, - "step": 47273 - }, - { - "epoch": 24.653976531942632, - "grad_norm": 1.7449266910552979, - "learning_rate": 5.300904522613066e-05, - "loss": 4.3523, - "step": 47274 - }, - { - "epoch": 24.65449804432855, - "grad_norm": 1.5028966665267944, - "learning_rate": 5.3008040201005026e-05, - "loss": 5.5223, - "step": 47275 - }, - { - "epoch": 24.65501955671447, - "grad_norm": 1.5807825326919556, - "learning_rate": 5.3007035175879403e-05, - "loss": 4.7024, - "step": 47276 - }, - { - "epoch": 24.65554106910039, - "grad_norm": 1.5285805463790894, - "learning_rate": 5.3006030150753774e-05, - "loss": 5.6849, - "step": 47277 - }, - { - "epoch": 24.65606258148631, - "grad_norm": 1.6249452829360962, - "learning_rate": 5.300502512562814e-05, - "loss": 4.5854, - "step": 47278 - }, - { - "epoch": 24.65658409387223, - "grad_norm": 1.557190179824829, - "learning_rate": 5.3004020100502517e-05, - "loss": 4.777, - "step": 47279 - }, - { - "epoch": 24.65710560625815, - "grad_norm": 1.5841377973556519, - "learning_rate": 5.300301507537688e-05, - "loss": 5.116, - "step": 47280 - }, - { - "epoch": 24.65762711864407, - "grad_norm": 1.4651930332183838, - "learning_rate": 5.300201005025126e-05, - "loss": 5.4987, - "step": 47281 - }, - { - "epoch": 24.658148631029988, - "grad_norm": 1.5852582454681396, - "learning_rate": 5.300100502512563e-05, - "loss": 5.2137, - "step": 47282 - }, - { - "epoch": 24.658670143415907, - "grad_norm": 1.6034446954727173, - "learning_rate": 5.300000000000001e-05, - "loss": 5.2335, - "step": 47283 - }, - { - "epoch": 24.659191655801827, - "grad_norm": 1.6141154766082764, - "learning_rate": 5.299899497487437e-05, - "loss": 5.2183, - "step": 47284 - }, - { - "epoch": 24.659713168187743, - "grad_norm": 1.5855337381362915, - "learning_rate": 5.299798994974875e-05, - "loss": 5.192, - "step": 47285 - }, - { - "epoch": 24.660234680573662, - "grad_norm": 1.5944453477859497, - "learning_rate": 5.2996984924623114e-05, - "loss": 5.5681, - "step": 47286 - }, - { - "epoch": 24.66075619295958, - "grad_norm": 1.455283284187317, - "learning_rate": 5.299597989949749e-05, - "loss": 5.0151, - "step": 47287 - }, - { - "epoch": 24.6612777053455, - "grad_norm": 1.498062252998352, - "learning_rate": 5.299497487437186e-05, - "loss": 5.4247, - "step": 47288 - }, - { - "epoch": 24.66179921773142, - "grad_norm": 1.5763134956359863, - "learning_rate": 5.299396984924624e-05, - "loss": 5.3298, - "step": 47289 - }, - { - "epoch": 24.66232073011734, - "grad_norm": 1.475059151649475, - "learning_rate": 5.2992964824120605e-05, - "loss": 5.477, - "step": 47290 - }, - { - "epoch": 24.66284224250326, - "grad_norm": 1.4729453325271606, - "learning_rate": 5.299195979899497e-05, - "loss": 5.5025, - "step": 47291 - }, - { - "epoch": 24.66336375488918, - "grad_norm": 1.4951444864273071, - "learning_rate": 5.299095477386935e-05, - "loss": 5.3979, - "step": 47292 - }, - { - "epoch": 24.6638852672751, - "grad_norm": 1.543735146522522, - "learning_rate": 5.298994974874372e-05, - "loss": 5.8062, - "step": 47293 - }, - { - "epoch": 24.664406779661018, - "grad_norm": 1.5200599431991577, - "learning_rate": 5.2988944723618096e-05, - "loss": 5.5263, - "step": 47294 - }, - { - "epoch": 24.664928292046937, - "grad_norm": 1.6580661535263062, - "learning_rate": 5.298793969849246e-05, - "loss": 5.0338, - "step": 47295 - }, - { - "epoch": 24.665449804432857, - "grad_norm": 1.5120186805725098, - "learning_rate": 5.298693467336684e-05, - "loss": 5.3792, - "step": 47296 - }, - { - "epoch": 24.665971316818773, - "grad_norm": 1.4971370697021484, - "learning_rate": 5.298592964824121e-05, - "loss": 5.6788, - "step": 47297 - }, - { - "epoch": 24.666492829204692, - "grad_norm": 1.5689584016799927, - "learning_rate": 5.298492462311559e-05, - "loss": 4.8034, - "step": 47298 - }, - { - "epoch": 24.667014341590612, - "grad_norm": 1.4943147897720337, - "learning_rate": 5.298391959798995e-05, - "loss": 5.5322, - "step": 47299 - }, - { - "epoch": 24.66753585397653, - "grad_norm": 1.5954899787902832, - "learning_rate": 5.298291457286433e-05, - "loss": 5.2418, - "step": 47300 - }, - { - "epoch": 24.66805736636245, - "grad_norm": 1.4898053407669067, - "learning_rate": 5.298190954773869e-05, - "loss": 5.6953, - "step": 47301 - }, - { - "epoch": 24.66857887874837, - "grad_norm": 1.5356955528259277, - "learning_rate": 5.298090452261307e-05, - "loss": 5.3747, - "step": 47302 - }, - { - "epoch": 24.66910039113429, - "grad_norm": 1.5907671451568604, - "learning_rate": 5.297989949748744e-05, - "loss": 5.3825, - "step": 47303 - }, - { - "epoch": 24.66962190352021, - "grad_norm": 1.6802881956100464, - "learning_rate": 5.2978894472361806e-05, - "loss": 5.0529, - "step": 47304 - }, - { - "epoch": 24.67014341590613, - "grad_norm": 1.4547600746154785, - "learning_rate": 5.2977889447236184e-05, - "loss": 5.3847, - "step": 47305 - }, - { - "epoch": 24.670664928292048, - "grad_norm": 1.559444785118103, - "learning_rate": 5.297688442211055e-05, - "loss": 5.1646, - "step": 47306 - }, - { - "epoch": 24.671186440677968, - "grad_norm": 1.5914020538330078, - "learning_rate": 5.2975879396984926e-05, - "loss": 5.335, - "step": 47307 - }, - { - "epoch": 24.671707953063887, - "grad_norm": 1.5467071533203125, - "learning_rate": 5.29748743718593e-05, - "loss": 5.14, - "step": 47308 - }, - { - "epoch": 24.672229465449803, - "grad_norm": 1.5258315801620483, - "learning_rate": 5.2973869346733675e-05, - "loss": 5.2126, - "step": 47309 - }, - { - "epoch": 24.672750977835722, - "grad_norm": 1.467491865158081, - "learning_rate": 5.297286432160804e-05, - "loss": 5.2486, - "step": 47310 - }, - { - "epoch": 24.673272490221642, - "grad_norm": 1.5919041633605957, - "learning_rate": 5.297185929648242e-05, - "loss": 5.4162, - "step": 47311 - }, - { - "epoch": 24.67379400260756, - "grad_norm": 1.4914984703063965, - "learning_rate": 5.297085427135678e-05, - "loss": 5.1323, - "step": 47312 - }, - { - "epoch": 24.67431551499348, - "grad_norm": 1.4902338981628418, - "learning_rate": 5.296984924623116e-05, - "loss": 5.2009, - "step": 47313 - }, - { - "epoch": 24.6748370273794, - "grad_norm": 1.5726829767227173, - "learning_rate": 5.296884422110553e-05, - "loss": 5.6046, - "step": 47314 - }, - { - "epoch": 24.67535853976532, - "grad_norm": 1.523964524269104, - "learning_rate": 5.296783919597991e-05, - "loss": 5.4268, - "step": 47315 - }, - { - "epoch": 24.67588005215124, - "grad_norm": 1.6206296682357788, - "learning_rate": 5.296683417085427e-05, - "loss": 4.8415, - "step": 47316 - }, - { - "epoch": 24.67640156453716, - "grad_norm": 1.713107705116272, - "learning_rate": 5.2965829145728644e-05, - "loss": 4.5193, - "step": 47317 - }, - { - "epoch": 24.676923076923078, - "grad_norm": 1.5280061960220337, - "learning_rate": 5.296482412060302e-05, - "loss": 5.6748, - "step": 47318 - }, - { - "epoch": 24.677444589308998, - "grad_norm": 1.6032605171203613, - "learning_rate": 5.2963819095477386e-05, - "loss": 4.3322, - "step": 47319 - }, - { - "epoch": 24.677966101694913, - "grad_norm": 1.5340440273284912, - "learning_rate": 5.2962814070351764e-05, - "loss": 5.2519, - "step": 47320 - }, - { - "epoch": 24.678487614080833, - "grad_norm": 1.4873908758163452, - "learning_rate": 5.296180904522613e-05, - "loss": 5.535, - "step": 47321 - }, - { - "epoch": 24.679009126466752, - "grad_norm": 1.5279886722564697, - "learning_rate": 5.2960804020100506e-05, - "loss": 5.643, - "step": 47322 - }, - { - "epoch": 24.679530638852672, - "grad_norm": 1.5305421352386475, - "learning_rate": 5.295979899497488e-05, - "loss": 5.2924, - "step": 47323 - }, - { - "epoch": 24.68005215123859, - "grad_norm": 1.594450831413269, - "learning_rate": 5.2958793969849255e-05, - "loss": 5.1636, - "step": 47324 - }, - { - "epoch": 24.68057366362451, - "grad_norm": 1.5831859111785889, - "learning_rate": 5.295778894472362e-05, - "loss": 4.9946, - "step": 47325 - }, - { - "epoch": 24.68109517601043, - "grad_norm": 1.548529863357544, - "learning_rate": 5.2956783919598e-05, - "loss": 5.3552, - "step": 47326 - }, - { - "epoch": 24.68161668839635, - "grad_norm": 1.7135651111602783, - "learning_rate": 5.295577889447236e-05, - "loss": 5.1989, - "step": 47327 - }, - { - "epoch": 24.68213820078227, - "grad_norm": 1.6396974325180054, - "learning_rate": 5.295477386934674e-05, - "loss": 5.0598, - "step": 47328 - }, - { - "epoch": 24.68265971316819, - "grad_norm": 1.5008044242858887, - "learning_rate": 5.295376884422111e-05, - "loss": 4.7358, - "step": 47329 - }, - { - "epoch": 24.683181225554108, - "grad_norm": 1.467037558555603, - "learning_rate": 5.2952763819095474e-05, - "loss": 5.6756, - "step": 47330 - }, - { - "epoch": 24.683702737940028, - "grad_norm": 1.5959055423736572, - "learning_rate": 5.295175879396985e-05, - "loss": 4.8113, - "step": 47331 - }, - { - "epoch": 24.684224250325947, - "grad_norm": 1.6172267198562622, - "learning_rate": 5.2950753768844216e-05, - "loss": 5.3932, - "step": 47332 - }, - { - "epoch": 24.684745762711863, - "grad_norm": 1.6276932954788208, - "learning_rate": 5.2949748743718594e-05, - "loss": 4.9819, - "step": 47333 - }, - { - "epoch": 24.685267275097782, - "grad_norm": 1.6367923021316528, - "learning_rate": 5.2948743718592965e-05, - "loss": 5.1476, - "step": 47334 - }, - { - "epoch": 24.685788787483702, - "grad_norm": 1.6635050773620605, - "learning_rate": 5.294773869346734e-05, - "loss": 4.9992, - "step": 47335 - }, - { - "epoch": 24.68631029986962, - "grad_norm": 1.5734236240386963, - "learning_rate": 5.294673366834171e-05, - "loss": 5.5752, - "step": 47336 - }, - { - "epoch": 24.68683181225554, - "grad_norm": 1.6350886821746826, - "learning_rate": 5.2945728643216085e-05, - "loss": 5.3038, - "step": 47337 - }, - { - "epoch": 24.68735332464146, - "grad_norm": 1.6359999179840088, - "learning_rate": 5.2944723618090456e-05, - "loss": 5.2646, - "step": 47338 - }, - { - "epoch": 24.68787483702738, - "grad_norm": 1.5551235675811768, - "learning_rate": 5.2943718592964834e-05, - "loss": 5.1918, - "step": 47339 - }, - { - "epoch": 24.6883963494133, - "grad_norm": 1.5145343542099, - "learning_rate": 5.29427135678392e-05, - "loss": 5.2244, - "step": 47340 - }, - { - "epoch": 24.68891786179922, - "grad_norm": 1.5511668920516968, - "learning_rate": 5.2941708542713576e-05, - "loss": 5.1972, - "step": 47341 - }, - { - "epoch": 24.689439374185138, - "grad_norm": 1.638988971710205, - "learning_rate": 5.294070351758794e-05, - "loss": 5.0287, - "step": 47342 - }, - { - "epoch": 24.689960886571058, - "grad_norm": 1.612149953842163, - "learning_rate": 5.293969849246232e-05, - "loss": 5.2798, - "step": 47343 - }, - { - "epoch": 24.690482398956973, - "grad_norm": 1.4347255229949951, - "learning_rate": 5.293869346733669e-05, - "loss": 5.2653, - "step": 47344 - }, - { - "epoch": 24.691003911342893, - "grad_norm": 1.5032137632369995, - "learning_rate": 5.2937688442211053e-05, - "loss": 5.1911, - "step": 47345 - }, - { - "epoch": 24.691525423728812, - "grad_norm": 1.573765516281128, - "learning_rate": 5.293668341708543e-05, - "loss": 5.5309, - "step": 47346 - }, - { - "epoch": 24.692046936114732, - "grad_norm": 1.4932886362075806, - "learning_rate": 5.2935678391959796e-05, - "loss": 4.6991, - "step": 47347 - }, - { - "epoch": 24.69256844850065, - "grad_norm": 1.6029568910598755, - "learning_rate": 5.293467336683417e-05, - "loss": 5.0425, - "step": 47348 - }, - { - "epoch": 24.69308996088657, - "grad_norm": 1.5358147621154785, - "learning_rate": 5.2933668341708544e-05, - "loss": 4.9356, - "step": 47349 - }, - { - "epoch": 24.69361147327249, - "grad_norm": 1.6393566131591797, - "learning_rate": 5.293266331658292e-05, - "loss": 5.4375, - "step": 47350 - }, - { - "epoch": 24.69413298565841, - "grad_norm": 1.4459666013717651, - "learning_rate": 5.2931658291457287e-05, - "loss": 5.2593, - "step": 47351 - }, - { - "epoch": 24.69465449804433, - "grad_norm": 1.4607338905334473, - "learning_rate": 5.2930653266331664e-05, - "loss": 5.2711, - "step": 47352 - }, - { - "epoch": 24.69517601043025, - "grad_norm": 1.5448263883590698, - "learning_rate": 5.292964824120603e-05, - "loss": 5.3287, - "step": 47353 - }, - { - "epoch": 24.695697522816168, - "grad_norm": 1.6037408113479614, - "learning_rate": 5.2928643216080406e-05, - "loss": 5.1141, - "step": 47354 - }, - { - "epoch": 24.696219035202088, - "grad_norm": 1.7563648223876953, - "learning_rate": 5.292763819095478e-05, - "loss": 5.1192, - "step": 47355 - }, - { - "epoch": 24.696740547588004, - "grad_norm": 1.6039814949035645, - "learning_rate": 5.2926633165829155e-05, - "loss": 5.2887, - "step": 47356 - }, - { - "epoch": 24.697262059973923, - "grad_norm": 1.928030014038086, - "learning_rate": 5.292562814070352e-05, - "loss": 4.884, - "step": 47357 - }, - { - "epoch": 24.697783572359842, - "grad_norm": 1.5670113563537598, - "learning_rate": 5.2924623115577884e-05, - "loss": 5.5185, - "step": 47358 - }, - { - "epoch": 24.698305084745762, - "grad_norm": 1.605115532875061, - "learning_rate": 5.292361809045226e-05, - "loss": 4.869, - "step": 47359 - }, - { - "epoch": 24.69882659713168, - "grad_norm": 1.5162445306777954, - "learning_rate": 5.292261306532663e-05, - "loss": 5.5156, - "step": 47360 - }, - { - "epoch": 24.6993481095176, - "grad_norm": 1.5317503213882446, - "learning_rate": 5.292160804020101e-05, - "loss": 5.4146, - "step": 47361 - }, - { - "epoch": 24.69986962190352, - "grad_norm": 1.465466022491455, - "learning_rate": 5.2920603015075375e-05, - "loss": 5.2047, - "step": 47362 - }, - { - "epoch": 24.70039113428944, - "grad_norm": 1.6061033010482788, - "learning_rate": 5.291959798994975e-05, - "loss": 5.0021, - "step": 47363 - }, - { - "epoch": 24.70091264667536, - "grad_norm": 1.519797682762146, - "learning_rate": 5.2918592964824124e-05, - "loss": 5.1277, - "step": 47364 - }, - { - "epoch": 24.70143415906128, - "grad_norm": 1.4747980833053589, - "learning_rate": 5.29175879396985e-05, - "loss": 5.4958, - "step": 47365 - }, - { - "epoch": 24.701955671447198, - "grad_norm": 1.5383646488189697, - "learning_rate": 5.2916582914572866e-05, - "loss": 5.0771, - "step": 47366 - }, - { - "epoch": 24.702477183833118, - "grad_norm": 1.6111302375793457, - "learning_rate": 5.2915577889447244e-05, - "loss": 5.3315, - "step": 47367 - }, - { - "epoch": 24.702998696219034, - "grad_norm": 1.63905930519104, - "learning_rate": 5.291457286432161e-05, - "loss": 5.225, - "step": 47368 - }, - { - "epoch": 24.703520208604953, - "grad_norm": 1.5803592205047607, - "learning_rate": 5.2913567839195986e-05, - "loss": 5.2988, - "step": 47369 - }, - { - "epoch": 24.704041720990872, - "grad_norm": 1.485611915588379, - "learning_rate": 5.291256281407036e-05, - "loss": 5.7067, - "step": 47370 - }, - { - "epoch": 24.704563233376792, - "grad_norm": 1.5183179378509521, - "learning_rate": 5.291155778894472e-05, - "loss": 4.9675, - "step": 47371 - }, - { - "epoch": 24.70508474576271, - "grad_norm": 1.4715124368667603, - "learning_rate": 5.29105527638191e-05, - "loss": 5.4153, - "step": 47372 - }, - { - "epoch": 24.70560625814863, - "grad_norm": 1.5149403810501099, - "learning_rate": 5.290954773869346e-05, - "loss": 5.6242, - "step": 47373 - }, - { - "epoch": 24.70612777053455, - "grad_norm": 1.5118502378463745, - "learning_rate": 5.290854271356784e-05, - "loss": 5.2936, - "step": 47374 - }, - { - "epoch": 24.70664928292047, - "grad_norm": 1.4715086221694946, - "learning_rate": 5.290753768844221e-05, - "loss": 5.7607, - "step": 47375 - }, - { - "epoch": 24.70717079530639, - "grad_norm": 1.602860927581787, - "learning_rate": 5.290653266331659e-05, - "loss": 5.1513, - "step": 47376 - }, - { - "epoch": 24.70769230769231, - "grad_norm": 1.5714776515960693, - "learning_rate": 5.2905527638190954e-05, - "loss": 4.7246, - "step": 47377 - }, - { - "epoch": 24.708213820078228, - "grad_norm": 1.4496737718582153, - "learning_rate": 5.290452261306533e-05, - "loss": 5.2328, - "step": 47378 - }, - { - "epoch": 24.708735332464148, - "grad_norm": 1.4641375541687012, - "learning_rate": 5.2903517587939696e-05, - "loss": 5.5792, - "step": 47379 - }, - { - "epoch": 24.709256844850064, - "grad_norm": 1.5336039066314697, - "learning_rate": 5.2902512562814074e-05, - "loss": 5.0133, - "step": 47380 - }, - { - "epoch": 24.709778357235983, - "grad_norm": 1.5651122331619263, - "learning_rate": 5.2901507537688445e-05, - "loss": 5.0655, - "step": 47381 - }, - { - "epoch": 24.710299869621903, - "grad_norm": 1.4556572437286377, - "learning_rate": 5.290050251256282e-05, - "loss": 5.4127, - "step": 47382 - }, - { - "epoch": 24.710821382007822, - "grad_norm": 1.6332452297210693, - "learning_rate": 5.289949748743719e-05, - "loss": 5.1906, - "step": 47383 - }, - { - "epoch": 24.71134289439374, - "grad_norm": 1.5959571599960327, - "learning_rate": 5.289849246231156e-05, - "loss": 4.6574, - "step": 47384 - }, - { - "epoch": 24.71186440677966, - "grad_norm": 1.610870361328125, - "learning_rate": 5.2897487437185936e-05, - "loss": 5.4142, - "step": 47385 - }, - { - "epoch": 24.71238591916558, - "grad_norm": 1.5060276985168457, - "learning_rate": 5.28964824120603e-05, - "loss": 4.9912, - "step": 47386 - }, - { - "epoch": 24.7129074315515, - "grad_norm": 1.5834510326385498, - "learning_rate": 5.289547738693468e-05, - "loss": 5.4094, - "step": 47387 - }, - { - "epoch": 24.71342894393742, - "grad_norm": 1.5250613689422607, - "learning_rate": 5.289447236180904e-05, - "loss": 5.1351, - "step": 47388 - }, - { - "epoch": 24.71395045632334, - "grad_norm": 1.5975128412246704, - "learning_rate": 5.289346733668342e-05, - "loss": 5.2784, - "step": 47389 - }, - { - "epoch": 24.714471968709258, - "grad_norm": 1.6757081747055054, - "learning_rate": 5.289246231155779e-05, - "loss": 4.8205, - "step": 47390 - }, - { - "epoch": 24.714993481095178, - "grad_norm": 1.6876505613327026, - "learning_rate": 5.289145728643217e-05, - "loss": 5.0773, - "step": 47391 - }, - { - "epoch": 24.715514993481094, - "grad_norm": 1.541818618774414, - "learning_rate": 5.2890452261306534e-05, - "loss": 5.0922, - "step": 47392 - }, - { - "epoch": 24.716036505867013, - "grad_norm": 1.5823266506195068, - "learning_rate": 5.288944723618091e-05, - "loss": 5.3117, - "step": 47393 - }, - { - "epoch": 24.716558018252933, - "grad_norm": 1.6786820888519287, - "learning_rate": 5.2888442211055276e-05, - "loss": 5.1552, - "step": 47394 - }, - { - "epoch": 24.717079530638852, - "grad_norm": 1.5583157539367676, - "learning_rate": 5.2887437185929653e-05, - "loss": 5.3845, - "step": 47395 - }, - { - "epoch": 24.71760104302477, - "grad_norm": 1.6496175527572632, - "learning_rate": 5.2886432160804024e-05, - "loss": 4.6893, - "step": 47396 - }, - { - "epoch": 24.71812255541069, - "grad_norm": 1.4989069700241089, - "learning_rate": 5.288542713567839e-05, - "loss": 5.4424, - "step": 47397 - }, - { - "epoch": 24.71864406779661, - "grad_norm": 1.5996272563934326, - "learning_rate": 5.2884422110552767e-05, - "loss": 5.203, - "step": 47398 - }, - { - "epoch": 24.71916558018253, - "grad_norm": 1.6613653898239136, - "learning_rate": 5.288341708542713e-05, - "loss": 5.5652, - "step": 47399 - }, - { - "epoch": 24.71968709256845, - "grad_norm": 1.5515891313552856, - "learning_rate": 5.288241206030151e-05, - "loss": 5.3419, - "step": 47400 - }, - { - "epoch": 24.72020860495437, - "grad_norm": 1.5039900541305542, - "learning_rate": 5.288140703517588e-05, - "loss": 5.0322, - "step": 47401 - }, - { - "epoch": 24.72073011734029, - "grad_norm": 1.5610084533691406, - "learning_rate": 5.288040201005026e-05, - "loss": 5.0695, - "step": 47402 - }, - { - "epoch": 24.721251629726208, - "grad_norm": 1.6099997758865356, - "learning_rate": 5.287939698492462e-05, - "loss": 5.2438, - "step": 47403 - }, - { - "epoch": 24.721773142112124, - "grad_norm": 1.5810344219207764, - "learning_rate": 5.2878391959799e-05, - "loss": 5.3501, - "step": 47404 - }, - { - "epoch": 24.722294654498043, - "grad_norm": 1.600861668586731, - "learning_rate": 5.287738693467337e-05, - "loss": 5.0323, - "step": 47405 - }, - { - "epoch": 24.722816166883963, - "grad_norm": 1.4808549880981445, - "learning_rate": 5.287638190954775e-05, - "loss": 4.6499, - "step": 47406 - }, - { - "epoch": 24.723337679269882, - "grad_norm": 1.7229527235031128, - "learning_rate": 5.287537688442211e-05, - "loss": 5.0125, - "step": 47407 - }, - { - "epoch": 24.7238591916558, - "grad_norm": 1.6062817573547363, - "learning_rate": 5.287437185929649e-05, - "loss": 5.3669, - "step": 47408 - }, - { - "epoch": 24.72438070404172, - "grad_norm": 1.6355072259902954, - "learning_rate": 5.2873366834170855e-05, - "loss": 5.568, - "step": 47409 - }, - { - "epoch": 24.72490221642764, - "grad_norm": 1.5378187894821167, - "learning_rate": 5.2872361809045226e-05, - "loss": 4.9895, - "step": 47410 - }, - { - "epoch": 24.72542372881356, - "grad_norm": 1.5877232551574707, - "learning_rate": 5.2871356783919604e-05, - "loss": 4.8618, - "step": 47411 - }, - { - "epoch": 24.72594524119948, - "grad_norm": 1.6423448324203491, - "learning_rate": 5.287035175879397e-05, - "loss": 4.8321, - "step": 47412 - }, - { - "epoch": 24.7264667535854, - "grad_norm": 1.5903444290161133, - "learning_rate": 5.2869346733668346e-05, - "loss": 5.0025, - "step": 47413 - }, - { - "epoch": 24.72698826597132, - "grad_norm": 1.5387601852416992, - "learning_rate": 5.286834170854271e-05, - "loss": 5.5535, - "step": 47414 - }, - { - "epoch": 24.727509778357238, - "grad_norm": 1.5133532285690308, - "learning_rate": 5.286733668341709e-05, - "loss": 5.4693, - "step": 47415 - }, - { - "epoch": 24.728031290743154, - "grad_norm": 1.6344048976898193, - "learning_rate": 5.286633165829146e-05, - "loss": 4.4188, - "step": 47416 - }, - { - "epoch": 24.728552803129073, - "grad_norm": 1.5636427402496338, - "learning_rate": 5.286532663316584e-05, - "loss": 3.7235, - "step": 47417 - }, - { - "epoch": 24.729074315514993, - "grad_norm": 1.5532876253128052, - "learning_rate": 5.28643216080402e-05, - "loss": 5.5252, - "step": 47418 - }, - { - "epoch": 24.729595827900912, - "grad_norm": 1.4795598983764648, - "learning_rate": 5.286331658291458e-05, - "loss": 5.1144, - "step": 47419 - }, - { - "epoch": 24.73011734028683, - "grad_norm": 1.466006875038147, - "learning_rate": 5.286231155778894e-05, - "loss": 5.118, - "step": 47420 - }, - { - "epoch": 24.73063885267275, - "grad_norm": 1.5018320083618164, - "learning_rate": 5.286130653266332e-05, - "loss": 5.4393, - "step": 47421 - }, - { - "epoch": 24.73116036505867, - "grad_norm": 1.6700994968414307, - "learning_rate": 5.286030150753769e-05, - "loss": 4.8299, - "step": 47422 - }, - { - "epoch": 24.73168187744459, - "grad_norm": 1.5098687410354614, - "learning_rate": 5.2859296482412056e-05, - "loss": 5.2341, - "step": 47423 - }, - { - "epoch": 24.73220338983051, - "grad_norm": 1.6486495733261108, - "learning_rate": 5.2858291457286434e-05, - "loss": 4.9364, - "step": 47424 - }, - { - "epoch": 24.73272490221643, - "grad_norm": 1.5243858098983765, - "learning_rate": 5.28572864321608e-05, - "loss": 5.3591, - "step": 47425 - }, - { - "epoch": 24.73324641460235, - "grad_norm": 1.586220145225525, - "learning_rate": 5.2856281407035176e-05, - "loss": 5.3367, - "step": 47426 - }, - { - "epoch": 24.733767926988264, - "grad_norm": 1.4399850368499756, - "learning_rate": 5.285527638190955e-05, - "loss": 5.6983, - "step": 47427 - }, - { - "epoch": 24.734289439374184, - "grad_norm": 1.555681586265564, - "learning_rate": 5.2854271356783925e-05, - "loss": 5.2923, - "step": 47428 - }, - { - "epoch": 24.734810951760103, - "grad_norm": 1.5340088605880737, - "learning_rate": 5.285326633165829e-05, - "loss": 5.5594, - "step": 47429 - }, - { - "epoch": 24.735332464146023, - "grad_norm": 1.5374293327331543, - "learning_rate": 5.285226130653267e-05, - "loss": 5.1602, - "step": 47430 - }, - { - "epoch": 24.735853976531942, - "grad_norm": 1.5442519187927246, - "learning_rate": 5.285125628140704e-05, - "loss": 5.2001, - "step": 47431 - }, - { - "epoch": 24.73637548891786, - "grad_norm": 1.5805193185806274, - "learning_rate": 5.2850251256281416e-05, - "loss": 5.2318, - "step": 47432 - }, - { - "epoch": 24.73689700130378, - "grad_norm": 1.5123268365859985, - "learning_rate": 5.284924623115578e-05, - "loss": 5.5972, - "step": 47433 - }, - { - "epoch": 24.7374185136897, - "grad_norm": 1.5266926288604736, - "learning_rate": 5.284824120603016e-05, - "loss": 5.4148, - "step": 47434 - }, - { - "epoch": 24.73794002607562, - "grad_norm": 1.5206873416900635, - "learning_rate": 5.284723618090452e-05, - "loss": 4.8806, - "step": 47435 - }, - { - "epoch": 24.73846153846154, - "grad_norm": 1.669305443763733, - "learning_rate": 5.28462311557789e-05, - "loss": 5.1472, - "step": 47436 - }, - { - "epoch": 24.73898305084746, - "grad_norm": 1.6166794300079346, - "learning_rate": 5.284522613065327e-05, - "loss": 5.4533, - "step": 47437 - }, - { - "epoch": 24.73950456323338, - "grad_norm": 1.558632493019104, - "learning_rate": 5.2844221105527636e-05, - "loss": 5.2843, - "step": 47438 - }, - { - "epoch": 24.740026075619298, - "grad_norm": 1.566023826599121, - "learning_rate": 5.2843216080402014e-05, - "loss": 5.0458, - "step": 47439 - }, - { - "epoch": 24.740547588005214, - "grad_norm": 1.5679587125778198, - "learning_rate": 5.284221105527638e-05, - "loss": 5.2118, - "step": 47440 - }, - { - "epoch": 24.741069100391133, - "grad_norm": 1.5457262992858887, - "learning_rate": 5.2841206030150756e-05, - "loss": 5.5065, - "step": 47441 - }, - { - "epoch": 24.741590612777053, - "grad_norm": 1.6512668132781982, - "learning_rate": 5.284020100502513e-05, - "loss": 4.8574, - "step": 47442 - }, - { - "epoch": 24.742112125162972, - "grad_norm": 1.5250366926193237, - "learning_rate": 5.2839195979899505e-05, - "loss": 5.2422, - "step": 47443 - }, - { - "epoch": 24.74263363754889, - "grad_norm": 1.5479857921600342, - "learning_rate": 5.283819095477387e-05, - "loss": 5.7012, - "step": 47444 - }, - { - "epoch": 24.74315514993481, - "grad_norm": 1.6642109155654907, - "learning_rate": 5.283718592964825e-05, - "loss": 4.9788, - "step": 47445 - }, - { - "epoch": 24.74367666232073, - "grad_norm": 1.4878603219985962, - "learning_rate": 5.283618090452261e-05, - "loss": 5.5579, - "step": 47446 - }, - { - "epoch": 24.74419817470665, - "grad_norm": 1.5626896619796753, - "learning_rate": 5.283517587939699e-05, - "loss": 5.2341, - "step": 47447 - }, - { - "epoch": 24.74471968709257, - "grad_norm": 1.5615829229354858, - "learning_rate": 5.283417085427136e-05, - "loss": 5.2907, - "step": 47448 - }, - { - "epoch": 24.74524119947849, - "grad_norm": 1.681559443473816, - "learning_rate": 5.283316582914574e-05, - "loss": 4.7711, - "step": 47449 - }, - { - "epoch": 24.74576271186441, - "grad_norm": 1.6382852792739868, - "learning_rate": 5.28321608040201e-05, - "loss": 5.1967, - "step": 47450 - }, - { - "epoch": 24.746284224250324, - "grad_norm": 1.4390983581542969, - "learning_rate": 5.283115577889447e-05, - "loss": 5.6046, - "step": 47451 - }, - { - "epoch": 24.746805736636244, - "grad_norm": 1.5192053318023682, - "learning_rate": 5.283015075376885e-05, - "loss": 5.3251, - "step": 47452 - }, - { - "epoch": 24.747327249022163, - "grad_norm": 1.6994962692260742, - "learning_rate": 5.2829145728643215e-05, - "loss": 5.1813, - "step": 47453 - }, - { - "epoch": 24.747848761408083, - "grad_norm": 1.5553594827651978, - "learning_rate": 5.282814070351759e-05, - "loss": 5.435, - "step": 47454 - }, - { - "epoch": 24.748370273794002, - "grad_norm": 1.5389695167541504, - "learning_rate": 5.282713567839196e-05, - "loss": 4.9239, - "step": 47455 - }, - { - "epoch": 24.74889178617992, - "grad_norm": 1.481982946395874, - "learning_rate": 5.2826130653266335e-05, - "loss": 5.416, - "step": 47456 - }, - { - "epoch": 24.74941329856584, - "grad_norm": 1.5305616855621338, - "learning_rate": 5.2825125628140706e-05, - "loss": 5.438, - "step": 47457 - }, - { - "epoch": 24.74993481095176, - "grad_norm": 1.6072165966033936, - "learning_rate": 5.2824120603015084e-05, - "loss": 5.1031, - "step": 47458 - }, - { - "epoch": 24.75045632333768, - "grad_norm": 1.5590033531188965, - "learning_rate": 5.282311557788945e-05, - "loss": 5.2212, - "step": 47459 - }, - { - "epoch": 24.7509778357236, - "grad_norm": 1.6058114767074585, - "learning_rate": 5.2822110552763826e-05, - "loss": 5.3505, - "step": 47460 - }, - { - "epoch": 24.75149934810952, - "grad_norm": 1.4668052196502686, - "learning_rate": 5.282110552763819e-05, - "loss": 5.2932, - "step": 47461 - }, - { - "epoch": 24.75202086049544, - "grad_norm": 1.5334019660949707, - "learning_rate": 5.282010050251257e-05, - "loss": 4.9809, - "step": 47462 - }, - { - "epoch": 24.752542372881354, - "grad_norm": 1.5113006830215454, - "learning_rate": 5.281909547738694e-05, - "loss": 5.6508, - "step": 47463 - }, - { - "epoch": 24.753063885267274, - "grad_norm": 1.5373015403747559, - "learning_rate": 5.2818090452261303e-05, - "loss": 5.5241, - "step": 47464 - }, - { - "epoch": 24.753585397653193, - "grad_norm": 1.5907377004623413, - "learning_rate": 5.281708542713568e-05, - "loss": 5.1051, - "step": 47465 - }, - { - "epoch": 24.754106910039113, - "grad_norm": 1.5339654684066772, - "learning_rate": 5.2816080402010046e-05, - "loss": 5.4189, - "step": 47466 - }, - { - "epoch": 24.754628422425032, - "grad_norm": 1.5293375253677368, - "learning_rate": 5.281507537688442e-05, - "loss": 5.2817, - "step": 47467 - }, - { - "epoch": 24.75514993481095, - "grad_norm": 1.4435278177261353, - "learning_rate": 5.2814070351758794e-05, - "loss": 5.5082, - "step": 47468 - }, - { - "epoch": 24.75567144719687, - "grad_norm": 1.6221555471420288, - "learning_rate": 5.281306532663317e-05, - "loss": 5.0163, - "step": 47469 - }, - { - "epoch": 24.75619295958279, - "grad_norm": 1.630165457725525, - "learning_rate": 5.2812060301507537e-05, - "loss": 4.9543, - "step": 47470 - }, - { - "epoch": 24.75671447196871, - "grad_norm": 1.4313089847564697, - "learning_rate": 5.2811055276381914e-05, - "loss": 4.9606, - "step": 47471 - }, - { - "epoch": 24.75723598435463, - "grad_norm": 1.469399333000183, - "learning_rate": 5.2810050251256285e-05, - "loss": 4.9652, - "step": 47472 - }, - { - "epoch": 24.75775749674055, - "grad_norm": 1.615159511566162, - "learning_rate": 5.280904522613066e-05, - "loss": 5.1887, - "step": 47473 - }, - { - "epoch": 24.75827900912647, - "grad_norm": 1.5276683568954468, - "learning_rate": 5.280804020100503e-05, - "loss": 5.4537, - "step": 47474 - }, - { - "epoch": 24.758800521512384, - "grad_norm": 1.577805519104004, - "learning_rate": 5.2807035175879405e-05, - "loss": 5.3674, - "step": 47475 - }, - { - "epoch": 24.759322033898304, - "grad_norm": 1.6209670305252075, - "learning_rate": 5.280603015075377e-05, - "loss": 5.3706, - "step": 47476 - }, - { - "epoch": 24.759843546284223, - "grad_norm": 1.5125975608825684, - "learning_rate": 5.280502512562814e-05, - "loss": 5.5455, - "step": 47477 - }, - { - "epoch": 24.760365058670143, - "grad_norm": 1.537079095840454, - "learning_rate": 5.280402010050252e-05, - "loss": 5.1048, - "step": 47478 - }, - { - "epoch": 24.760886571056062, - "grad_norm": 1.6048425436019897, - "learning_rate": 5.280301507537688e-05, - "loss": 5.1865, - "step": 47479 - }, - { - "epoch": 24.76140808344198, - "grad_norm": 1.6852270364761353, - "learning_rate": 5.280201005025126e-05, - "loss": 4.6009, - "step": 47480 - }, - { - "epoch": 24.7619295958279, - "grad_norm": 1.6097460985183716, - "learning_rate": 5.2801005025125625e-05, - "loss": 5.4573, - "step": 47481 - }, - { - "epoch": 24.76245110821382, - "grad_norm": 1.5865166187286377, - "learning_rate": 5.28e-05, - "loss": 5.4706, - "step": 47482 - }, - { - "epoch": 24.76297262059974, - "grad_norm": 1.6382147073745728, - "learning_rate": 5.2798994974874374e-05, - "loss": 4.2004, - "step": 47483 - }, - { - "epoch": 24.76349413298566, - "grad_norm": 1.54300057888031, - "learning_rate": 5.279798994974875e-05, - "loss": 4.8358, - "step": 47484 - }, - { - "epoch": 24.76401564537158, - "grad_norm": 1.6016281843185425, - "learning_rate": 5.2796984924623116e-05, - "loss": 5.0162, - "step": 47485 - }, - { - "epoch": 24.7645371577575, - "grad_norm": 1.6335874795913696, - "learning_rate": 5.2795979899497494e-05, - "loss": 5.2313, - "step": 47486 - }, - { - "epoch": 24.765058670143414, - "grad_norm": 1.5735015869140625, - "learning_rate": 5.279497487437186e-05, - "loss": 5.3067, - "step": 47487 - }, - { - "epoch": 24.765580182529334, - "grad_norm": 1.5651882886886597, - "learning_rate": 5.2793969849246236e-05, - "loss": 5.0935, - "step": 47488 - }, - { - "epoch": 24.766101694915253, - "grad_norm": 1.5960276126861572, - "learning_rate": 5.279296482412061e-05, - "loss": 5.1305, - "step": 47489 - }, - { - "epoch": 24.766623207301173, - "grad_norm": 1.575652003288269, - "learning_rate": 5.279195979899497e-05, - "loss": 5.1764, - "step": 47490 - }, - { - "epoch": 24.767144719687092, - "grad_norm": 1.5699725151062012, - "learning_rate": 5.279095477386935e-05, - "loss": 5.5052, - "step": 47491 - }, - { - "epoch": 24.76766623207301, - "grad_norm": 1.4981679916381836, - "learning_rate": 5.278994974874372e-05, - "loss": 4.8892, - "step": 47492 - }, - { - "epoch": 24.76818774445893, - "grad_norm": 1.5606601238250732, - "learning_rate": 5.27889447236181e-05, - "loss": 4.9481, - "step": 47493 - }, - { - "epoch": 24.76870925684485, - "grad_norm": 1.6086260080337524, - "learning_rate": 5.278793969849246e-05, - "loss": 4.9859, - "step": 47494 - }, - { - "epoch": 24.76923076923077, - "grad_norm": 1.5214101076126099, - "learning_rate": 5.278693467336684e-05, - "loss": 5.1028, - "step": 47495 - }, - { - "epoch": 24.76975228161669, - "grad_norm": 1.5095444917678833, - "learning_rate": 5.2785929648241204e-05, - "loss": 5.1418, - "step": 47496 - }, - { - "epoch": 24.77027379400261, - "grad_norm": 1.5300822257995605, - "learning_rate": 5.278492462311558e-05, - "loss": 5.2729, - "step": 47497 - }, - { - "epoch": 24.77079530638853, - "grad_norm": 1.5844762325286865, - "learning_rate": 5.278391959798995e-05, - "loss": 4.9532, - "step": 47498 - }, - { - "epoch": 24.771316818774444, - "grad_norm": 1.529147982597351, - "learning_rate": 5.278291457286433e-05, - "loss": 5.1491, - "step": 47499 - }, - { - "epoch": 24.771838331160364, - "grad_norm": 1.6927844285964966, - "learning_rate": 5.2781909547738695e-05, - "loss": 5.3014, - "step": 47500 - }, - { - "epoch": 24.772359843546283, - "grad_norm": 1.595672369003296, - "learning_rate": 5.278090452261307e-05, - "loss": 5.5442, - "step": 47501 - }, - { - "epoch": 24.772881355932203, - "grad_norm": 1.5376272201538086, - "learning_rate": 5.277989949748744e-05, - "loss": 5.1765, - "step": 47502 - }, - { - "epoch": 24.773402868318122, - "grad_norm": 1.589247703552246, - "learning_rate": 5.277889447236181e-05, - "loss": 5.2062, - "step": 47503 - }, - { - "epoch": 24.77392438070404, - "grad_norm": 1.5780093669891357, - "learning_rate": 5.2777889447236186e-05, - "loss": 5.51, - "step": 47504 - }, - { - "epoch": 24.77444589308996, - "grad_norm": 1.6503119468688965, - "learning_rate": 5.277688442211055e-05, - "loss": 5.1063, - "step": 47505 - }, - { - "epoch": 24.77496740547588, - "grad_norm": 1.4889123439788818, - "learning_rate": 5.277587939698493e-05, - "loss": 5.1427, - "step": 47506 - }, - { - "epoch": 24.7754889178618, - "grad_norm": 1.5205193758010864, - "learning_rate": 5.277487437185929e-05, - "loss": 5.0561, - "step": 47507 - }, - { - "epoch": 24.77601043024772, - "grad_norm": 1.6262480020523071, - "learning_rate": 5.277386934673367e-05, - "loss": 5.0302, - "step": 47508 - }, - { - "epoch": 24.77653194263364, - "grad_norm": 1.7596312761306763, - "learning_rate": 5.277286432160804e-05, - "loss": 5.0836, - "step": 47509 - }, - { - "epoch": 24.777053455019555, - "grad_norm": 1.4933420419692993, - "learning_rate": 5.277185929648242e-05, - "loss": 5.1002, - "step": 47510 - }, - { - "epoch": 24.777574967405474, - "grad_norm": 1.4821689128875732, - "learning_rate": 5.2770854271356784e-05, - "loss": 5.5749, - "step": 47511 - }, - { - "epoch": 24.778096479791394, - "grad_norm": 1.4744006395339966, - "learning_rate": 5.276984924623116e-05, - "loss": 5.5574, - "step": 47512 - }, - { - "epoch": 24.778617992177313, - "grad_norm": 1.543318271636963, - "learning_rate": 5.2768844221105526e-05, - "loss": 5.1245, - "step": 47513 - }, - { - "epoch": 24.779139504563233, - "grad_norm": 1.4699589014053345, - "learning_rate": 5.2767839195979903e-05, - "loss": 5.3484, - "step": 47514 - }, - { - "epoch": 24.779661016949152, - "grad_norm": 1.5591673851013184, - "learning_rate": 5.2766834170854275e-05, - "loss": 5.2485, - "step": 47515 - }, - { - "epoch": 24.78018252933507, - "grad_norm": 1.528746247291565, - "learning_rate": 5.276582914572864e-05, - "loss": 5.1657, - "step": 47516 - }, - { - "epoch": 24.78070404172099, - "grad_norm": 1.5867305994033813, - "learning_rate": 5.2764824120603017e-05, - "loss": 5.3466, - "step": 47517 - }, - { - "epoch": 24.78122555410691, - "grad_norm": 1.4916069507598877, - "learning_rate": 5.276381909547739e-05, - "loss": 5.0848, - "step": 47518 - }, - { - "epoch": 24.78174706649283, - "grad_norm": 1.4861212968826294, - "learning_rate": 5.2762814070351765e-05, - "loss": 5.4499, - "step": 47519 - }, - { - "epoch": 24.78226857887875, - "grad_norm": 1.6560192108154297, - "learning_rate": 5.276180904522613e-05, - "loss": 5.2978, - "step": 47520 - }, - { - "epoch": 24.78279009126467, - "grad_norm": 1.5494730472564697, - "learning_rate": 5.276080402010051e-05, - "loss": 5.4394, - "step": 47521 - }, - { - "epoch": 24.78331160365059, - "grad_norm": 1.5607541799545288, - "learning_rate": 5.275979899497487e-05, - "loss": 5.0194, - "step": 47522 - }, - { - "epoch": 24.783833116036504, - "grad_norm": 1.470194935798645, - "learning_rate": 5.275879396984925e-05, - "loss": 5.3755, - "step": 47523 - }, - { - "epoch": 24.784354628422424, - "grad_norm": 1.6068567037582397, - "learning_rate": 5.275778894472362e-05, - "loss": 5.2849, - "step": 47524 - }, - { - "epoch": 24.784876140808343, - "grad_norm": 1.6193372011184692, - "learning_rate": 5.2756783919598e-05, - "loss": 5.2435, - "step": 47525 - }, - { - "epoch": 24.785397653194263, - "grad_norm": 1.548972487449646, - "learning_rate": 5.275577889447236e-05, - "loss": 5.2963, - "step": 47526 - }, - { - "epoch": 24.785919165580182, - "grad_norm": 1.4897874593734741, - "learning_rate": 5.275477386934674e-05, - "loss": 5.4222, - "step": 47527 - }, - { - "epoch": 24.7864406779661, - "grad_norm": 1.5287671089172363, - "learning_rate": 5.2753768844221105e-05, - "loss": 5.038, - "step": 47528 - }, - { - "epoch": 24.78696219035202, - "grad_norm": 1.6856635808944702, - "learning_rate": 5.275276381909548e-05, - "loss": 5.2468, - "step": 47529 - }, - { - "epoch": 24.78748370273794, - "grad_norm": 1.577754020690918, - "learning_rate": 5.2751758793969854e-05, - "loss": 5.3736, - "step": 47530 - }, - { - "epoch": 24.78800521512386, - "grad_norm": 1.5250321626663208, - "learning_rate": 5.275075376884422e-05, - "loss": 5.3523, - "step": 47531 - }, - { - "epoch": 24.78852672750978, - "grad_norm": 1.534523367881775, - "learning_rate": 5.2749748743718596e-05, - "loss": 4.9717, - "step": 47532 - }, - { - "epoch": 24.7890482398957, - "grad_norm": 1.5770007371902466, - "learning_rate": 5.274874371859296e-05, - "loss": 5.2463, - "step": 47533 - }, - { - "epoch": 24.789569752281615, - "grad_norm": 1.489088535308838, - "learning_rate": 5.274773869346734e-05, - "loss": 5.6112, - "step": 47534 - }, - { - "epoch": 24.790091264667534, - "grad_norm": 1.5553216934204102, - "learning_rate": 5.274673366834171e-05, - "loss": 4.7451, - "step": 47535 - }, - { - "epoch": 24.790612777053454, - "grad_norm": 1.5538774728775024, - "learning_rate": 5.274572864321609e-05, - "loss": 5.2696, - "step": 47536 - }, - { - "epoch": 24.791134289439373, - "grad_norm": 1.5648025274276733, - "learning_rate": 5.274472361809045e-05, - "loss": 5.2637, - "step": 47537 - }, - { - "epoch": 24.791655801825293, - "grad_norm": 1.5613423585891724, - "learning_rate": 5.274371859296483e-05, - "loss": 5.1972, - "step": 47538 - }, - { - "epoch": 24.792177314211212, - "grad_norm": 1.4599372148513794, - "learning_rate": 5.27427135678392e-05, - "loss": 5.6729, - "step": 47539 - }, - { - "epoch": 24.79269882659713, - "grad_norm": 1.601422667503357, - "learning_rate": 5.274170854271358e-05, - "loss": 4.898, - "step": 47540 - }, - { - "epoch": 24.79322033898305, - "grad_norm": 1.4956389665603638, - "learning_rate": 5.274070351758794e-05, - "loss": 5.2339, - "step": 47541 - }, - { - "epoch": 24.79374185136897, - "grad_norm": 1.7296587228775024, - "learning_rate": 5.273969849246232e-05, - "loss": 4.6253, - "step": 47542 - }, - { - "epoch": 24.79426336375489, - "grad_norm": 1.6556662321090698, - "learning_rate": 5.2738693467336684e-05, - "loss": 5.2285, - "step": 47543 - }, - { - "epoch": 24.79478487614081, - "grad_norm": 1.6297032833099365, - "learning_rate": 5.2737688442211055e-05, - "loss": 5.0284, - "step": 47544 - }, - { - "epoch": 24.79530638852673, - "grad_norm": 1.6926029920578003, - "learning_rate": 5.273668341708543e-05, - "loss": 4.4239, - "step": 47545 - }, - { - "epoch": 24.795827900912645, - "grad_norm": 1.6048219203948975, - "learning_rate": 5.27356783919598e-05, - "loss": 5.3311, - "step": 47546 - }, - { - "epoch": 24.796349413298564, - "grad_norm": 1.5725462436676025, - "learning_rate": 5.2734673366834175e-05, - "loss": 5.465, - "step": 47547 - }, - { - "epoch": 24.796870925684484, - "grad_norm": 1.666869878768921, - "learning_rate": 5.273366834170854e-05, - "loss": 5.4491, - "step": 47548 - }, - { - "epoch": 24.797392438070403, - "grad_norm": 1.6140059232711792, - "learning_rate": 5.273266331658292e-05, - "loss": 5.3708, - "step": 47549 - }, - { - "epoch": 24.797913950456323, - "grad_norm": 1.5744291543960571, - "learning_rate": 5.273165829145729e-05, - "loss": 5.4412, - "step": 47550 - }, - { - "epoch": 24.798435462842242, - "grad_norm": 1.435795783996582, - "learning_rate": 5.2730653266331666e-05, - "loss": 5.4963, - "step": 47551 - }, - { - "epoch": 24.798956975228162, - "grad_norm": 1.4892674684524536, - "learning_rate": 5.272964824120603e-05, - "loss": 5.3157, - "step": 47552 - }, - { - "epoch": 24.79947848761408, - "grad_norm": 1.5376384258270264, - "learning_rate": 5.272864321608041e-05, - "loss": 4.2826, - "step": 47553 - }, - { - "epoch": 24.8, - "grad_norm": 1.6269644498825073, - "learning_rate": 5.272763819095477e-05, - "loss": 5.0349, - "step": 47554 - }, - { - "epoch": 24.80052151238592, - "grad_norm": 1.512412428855896, - "learning_rate": 5.272663316582915e-05, - "loss": 5.5586, - "step": 47555 - }, - { - "epoch": 24.80104302477184, - "grad_norm": 1.5930720567703247, - "learning_rate": 5.272562814070352e-05, - "loss": 5.0954, - "step": 47556 - }, - { - "epoch": 24.80156453715776, - "grad_norm": 1.6046547889709473, - "learning_rate": 5.2724623115577886e-05, - "loss": 4.8148, - "step": 47557 - }, - { - "epoch": 24.802086049543675, - "grad_norm": 1.5422947406768799, - "learning_rate": 5.2723618090452264e-05, - "loss": 5.4638, - "step": 47558 - }, - { - "epoch": 24.802607561929594, - "grad_norm": 1.632067084312439, - "learning_rate": 5.2722613065326635e-05, - "loss": 4.7168, - "step": 47559 - }, - { - "epoch": 24.803129074315514, - "grad_norm": 1.5230306386947632, - "learning_rate": 5.272160804020101e-05, - "loss": 5.4069, - "step": 47560 - }, - { - "epoch": 24.803650586701433, - "grad_norm": 1.592128038406372, - "learning_rate": 5.272060301507538e-05, - "loss": 5.4452, - "step": 47561 - }, - { - "epoch": 24.804172099087353, - "grad_norm": 1.5460057258605957, - "learning_rate": 5.2719597989949755e-05, - "loss": 5.4909, - "step": 47562 - }, - { - "epoch": 24.804693611473272, - "grad_norm": 1.5914688110351562, - "learning_rate": 5.271859296482412e-05, - "loss": 5.4366, - "step": 47563 - }, - { - "epoch": 24.805215123859192, - "grad_norm": 1.5336439609527588, - "learning_rate": 5.27175879396985e-05, - "loss": 5.3584, - "step": 47564 - }, - { - "epoch": 24.80573663624511, - "grad_norm": 1.4945257902145386, - "learning_rate": 5.271658291457287e-05, - "loss": 5.483, - "step": 47565 - }, - { - "epoch": 24.80625814863103, - "grad_norm": 1.542188048362732, - "learning_rate": 5.2715577889447246e-05, - "loss": 5.4905, - "step": 47566 - }, - { - "epoch": 24.80677966101695, - "grad_norm": 1.49358332157135, - "learning_rate": 5.271457286432161e-05, - "loss": 5.387, - "step": 47567 - }, - { - "epoch": 24.80730117340287, - "grad_norm": 1.5079996585845947, - "learning_rate": 5.271356783919599e-05, - "loss": 5.5255, - "step": 47568 - }, - { - "epoch": 24.80782268578879, - "grad_norm": 1.6331822872161865, - "learning_rate": 5.271256281407035e-05, - "loss": 5.0497, - "step": 47569 - }, - { - "epoch": 24.808344198174705, - "grad_norm": 1.538796305656433, - "learning_rate": 5.271155778894472e-05, - "loss": 5.3896, - "step": 47570 - }, - { - "epoch": 24.808865710560625, - "grad_norm": 1.538104772567749, - "learning_rate": 5.27105527638191e-05, - "loss": 5.333, - "step": 47571 - }, - { - "epoch": 24.809387222946544, - "grad_norm": 1.5917664766311646, - "learning_rate": 5.2709547738693465e-05, - "loss": 5.1779, - "step": 47572 - }, - { - "epoch": 24.809908735332463, - "grad_norm": 1.5296070575714111, - "learning_rate": 5.270854271356784e-05, - "loss": 5.2235, - "step": 47573 - }, - { - "epoch": 24.810430247718383, - "grad_norm": 1.6117219924926758, - "learning_rate": 5.270753768844221e-05, - "loss": 5.5542, - "step": 47574 - }, - { - "epoch": 24.810951760104302, - "grad_norm": 1.553123950958252, - "learning_rate": 5.2706532663316585e-05, - "loss": 5.2958, - "step": 47575 - }, - { - "epoch": 24.811473272490222, - "grad_norm": 1.5403311252593994, - "learning_rate": 5.2705527638190956e-05, - "loss": 5.4147, - "step": 47576 - }, - { - "epoch": 24.81199478487614, - "grad_norm": 1.5323834419250488, - "learning_rate": 5.2704522613065334e-05, - "loss": 5.2087, - "step": 47577 - }, - { - "epoch": 24.81251629726206, - "grad_norm": 1.5729739665985107, - "learning_rate": 5.27035175879397e-05, - "loss": 5.4178, - "step": 47578 - }, - { - "epoch": 24.81303780964798, - "grad_norm": 1.5241584777832031, - "learning_rate": 5.2702512562814076e-05, - "loss": 5.4662, - "step": 47579 - }, - { - "epoch": 24.8135593220339, - "grad_norm": 1.6713162660598755, - "learning_rate": 5.270150753768845e-05, - "loss": 4.9369, - "step": 47580 - }, - { - "epoch": 24.81408083441982, - "grad_norm": 1.700795292854309, - "learning_rate": 5.2700502512562825e-05, - "loss": 4.717, - "step": 47581 - }, - { - "epoch": 24.814602346805735, - "grad_norm": 1.6582183837890625, - "learning_rate": 5.269949748743719e-05, - "loss": 5.3538, - "step": 47582 - }, - { - "epoch": 24.815123859191655, - "grad_norm": 1.5852854251861572, - "learning_rate": 5.2698492462311553e-05, - "loss": 5.37, - "step": 47583 - }, - { - "epoch": 24.815645371577574, - "grad_norm": 1.545393705368042, - "learning_rate": 5.269748743718593e-05, - "loss": 5.7418, - "step": 47584 - }, - { - "epoch": 24.816166883963493, - "grad_norm": 1.5864192247390747, - "learning_rate": 5.26964824120603e-05, - "loss": 5.1175, - "step": 47585 - }, - { - "epoch": 24.816688396349413, - "grad_norm": 1.4018276929855347, - "learning_rate": 5.269547738693468e-05, - "loss": 5.059, - "step": 47586 - }, - { - "epoch": 24.817209908735332, - "grad_norm": 1.6075929403305054, - "learning_rate": 5.2694472361809044e-05, - "loss": 5.4799, - "step": 47587 - }, - { - "epoch": 24.817731421121252, - "grad_norm": 1.8427879810333252, - "learning_rate": 5.269346733668342e-05, - "loss": 4.8626, - "step": 47588 - }, - { - "epoch": 24.81825293350717, - "grad_norm": 1.5658609867095947, - "learning_rate": 5.2692462311557787e-05, - "loss": 5.422, - "step": 47589 - }, - { - "epoch": 24.81877444589309, - "grad_norm": 1.5325922966003418, - "learning_rate": 5.2691457286432164e-05, - "loss": 4.907, - "step": 47590 - }, - { - "epoch": 24.81929595827901, - "grad_norm": 1.5844037532806396, - "learning_rate": 5.2690452261306535e-05, - "loss": 5.2854, - "step": 47591 - }, - { - "epoch": 24.81981747066493, - "grad_norm": 1.5087043046951294, - "learning_rate": 5.268944723618091e-05, - "loss": 4.4148, - "step": 47592 - }, - { - "epoch": 24.820338983050846, - "grad_norm": 1.6013150215148926, - "learning_rate": 5.268844221105528e-05, - "loss": 4.9726, - "step": 47593 - }, - { - "epoch": 24.820860495436765, - "grad_norm": 1.706227421760559, - "learning_rate": 5.2687437185929655e-05, - "loss": 4.8196, - "step": 47594 - }, - { - "epoch": 24.821382007822685, - "grad_norm": 1.5933588743209839, - "learning_rate": 5.268643216080402e-05, - "loss": 5.3539, - "step": 47595 - }, - { - "epoch": 24.821903520208604, - "grad_norm": 1.5665749311447144, - "learning_rate": 5.268542713567839e-05, - "loss": 5.0776, - "step": 47596 - }, - { - "epoch": 24.822425032594523, - "grad_norm": 1.5875684022903442, - "learning_rate": 5.268442211055277e-05, - "loss": 4.8961, - "step": 47597 - }, - { - "epoch": 24.822946544980443, - "grad_norm": 1.6239757537841797, - "learning_rate": 5.268341708542713e-05, - "loss": 5.1489, - "step": 47598 - }, - { - "epoch": 24.823468057366362, - "grad_norm": 1.5420277118682861, - "learning_rate": 5.268241206030151e-05, - "loss": 5.5853, - "step": 47599 - }, - { - "epoch": 24.823989569752282, - "grad_norm": 1.571365237236023, - "learning_rate": 5.2681407035175875e-05, - "loss": 5.3365, - "step": 47600 - }, - { - "epoch": 24.8245110821382, - "grad_norm": 1.5884332656860352, - "learning_rate": 5.268040201005025e-05, - "loss": 5.5718, - "step": 47601 - }, - { - "epoch": 24.82503259452412, - "grad_norm": 1.6147994995117188, - "learning_rate": 5.2679396984924624e-05, - "loss": 4.8736, - "step": 47602 - }, - { - "epoch": 24.82555410691004, - "grad_norm": 1.5263615846633911, - "learning_rate": 5.2678391959799e-05, - "loss": 5.5705, - "step": 47603 - }, - { - "epoch": 24.82607561929596, - "grad_norm": 1.6066384315490723, - "learning_rate": 5.2677386934673366e-05, - "loss": 5.1235, - "step": 47604 - }, - { - "epoch": 24.82659713168188, - "grad_norm": 1.5310699939727783, - "learning_rate": 5.2676381909547744e-05, - "loss": 5.273, - "step": 47605 - }, - { - "epoch": 24.827118644067795, - "grad_norm": 1.6136231422424316, - "learning_rate": 5.2675376884422115e-05, - "loss": 5.1345, - "step": 47606 - }, - { - "epoch": 24.827640156453715, - "grad_norm": 1.5032501220703125, - "learning_rate": 5.267437185929649e-05, - "loss": 5.5692, - "step": 47607 - }, - { - "epoch": 24.828161668839634, - "grad_norm": 1.6017577648162842, - "learning_rate": 5.267336683417086e-05, - "loss": 4.8972, - "step": 47608 - }, - { - "epoch": 24.828683181225554, - "grad_norm": 1.604788899421692, - "learning_rate": 5.267236180904522e-05, - "loss": 5.3332, - "step": 47609 - }, - { - "epoch": 24.829204693611473, - "grad_norm": 1.4956939220428467, - "learning_rate": 5.26713567839196e-05, - "loss": 5.3561, - "step": 47610 - }, - { - "epoch": 24.829726205997392, - "grad_norm": 1.6135096549987793, - "learning_rate": 5.267035175879397e-05, - "loss": 4.7716, - "step": 47611 - }, - { - "epoch": 24.830247718383312, - "grad_norm": 1.5763779878616333, - "learning_rate": 5.266934673366835e-05, - "loss": 5.2531, - "step": 47612 - }, - { - "epoch": 24.83076923076923, - "grad_norm": 1.5152612924575806, - "learning_rate": 5.266834170854271e-05, - "loss": 5.0948, - "step": 47613 - }, - { - "epoch": 24.83129074315515, - "grad_norm": 1.4788919687271118, - "learning_rate": 5.266733668341709e-05, - "loss": 4.9009, - "step": 47614 - }, - { - "epoch": 24.83181225554107, - "grad_norm": 1.542481541633606, - "learning_rate": 5.2666331658291454e-05, - "loss": 5.4679, - "step": 47615 - }, - { - "epoch": 24.83233376792699, - "grad_norm": 1.6971243619918823, - "learning_rate": 5.266532663316583e-05, - "loss": 4.5892, - "step": 47616 - }, - { - "epoch": 24.832855280312906, - "grad_norm": 1.5795730352401733, - "learning_rate": 5.26643216080402e-05, - "loss": 5.5169, - "step": 47617 - }, - { - "epoch": 24.833376792698825, - "grad_norm": 1.6188507080078125, - "learning_rate": 5.266331658291458e-05, - "loss": 5.0766, - "step": 47618 - }, - { - "epoch": 24.833898305084745, - "grad_norm": 1.555129051208496, - "learning_rate": 5.2662311557788945e-05, - "loss": 4.7554, - "step": 47619 - }, - { - "epoch": 24.834419817470664, - "grad_norm": 1.565237283706665, - "learning_rate": 5.266130653266332e-05, - "loss": 4.9967, - "step": 47620 - }, - { - "epoch": 24.834941329856584, - "grad_norm": 1.4539389610290527, - "learning_rate": 5.266030150753769e-05, - "loss": 4.9517, - "step": 47621 - }, - { - "epoch": 24.835462842242503, - "grad_norm": 1.577468991279602, - "learning_rate": 5.2659296482412065e-05, - "loss": 5.1513, - "step": 47622 - }, - { - "epoch": 24.835984354628422, - "grad_norm": 1.58712899684906, - "learning_rate": 5.2658291457286436e-05, - "loss": 5.3448, - "step": 47623 - }, - { - "epoch": 24.836505867014342, - "grad_norm": 1.4740113019943237, - "learning_rate": 5.26572864321608e-05, - "loss": 5.3956, - "step": 47624 - }, - { - "epoch": 24.83702737940026, - "grad_norm": 1.5789072513580322, - "learning_rate": 5.265628140703518e-05, - "loss": 5.4184, - "step": 47625 - }, - { - "epoch": 24.83754889178618, - "grad_norm": 1.563567042350769, - "learning_rate": 5.265527638190955e-05, - "loss": 5.2468, - "step": 47626 - }, - { - "epoch": 24.8380704041721, - "grad_norm": 1.6770111322402954, - "learning_rate": 5.265427135678393e-05, - "loss": 4.9138, - "step": 47627 - }, - { - "epoch": 24.83859191655802, - "grad_norm": 1.5910348892211914, - "learning_rate": 5.265326633165829e-05, - "loss": 5.3359, - "step": 47628 - }, - { - "epoch": 24.839113428943936, - "grad_norm": 1.5392276048660278, - "learning_rate": 5.265226130653267e-05, - "loss": 5.3807, - "step": 47629 - }, - { - "epoch": 24.839634941329855, - "grad_norm": 1.5421252250671387, - "learning_rate": 5.2651256281407034e-05, - "loss": 5.2895, - "step": 47630 - }, - { - "epoch": 24.840156453715775, - "grad_norm": 1.5147168636322021, - "learning_rate": 5.265025125628141e-05, - "loss": 5.6014, - "step": 47631 - }, - { - "epoch": 24.840677966101694, - "grad_norm": 1.5462509393692017, - "learning_rate": 5.264924623115578e-05, - "loss": 5.1455, - "step": 47632 - }, - { - "epoch": 24.841199478487614, - "grad_norm": 1.4600012302398682, - "learning_rate": 5.264824120603016e-05, - "loss": 5.2715, - "step": 47633 - }, - { - "epoch": 24.841720990873533, - "grad_norm": 1.5341097116470337, - "learning_rate": 5.2647236180904525e-05, - "loss": 5.6873, - "step": 47634 - }, - { - "epoch": 24.842242503259452, - "grad_norm": 1.7246912717819214, - "learning_rate": 5.26462311557789e-05, - "loss": 4.8405, - "step": 47635 - }, - { - "epoch": 24.842764015645372, - "grad_norm": 1.5018664598464966, - "learning_rate": 5.264522613065327e-05, - "loss": 5.1872, - "step": 47636 - }, - { - "epoch": 24.84328552803129, - "grad_norm": 1.5575308799743652, - "learning_rate": 5.264422110552764e-05, - "loss": 5.1835, - "step": 47637 - }, - { - "epoch": 24.84380704041721, - "grad_norm": 1.5339595079421997, - "learning_rate": 5.2643216080402016e-05, - "loss": 5.3441, - "step": 47638 - }, - { - "epoch": 24.84432855280313, - "grad_norm": 1.4532644748687744, - "learning_rate": 5.264221105527638e-05, - "loss": 5.6701, - "step": 47639 - }, - { - "epoch": 24.84485006518905, - "grad_norm": 1.7795687913894653, - "learning_rate": 5.264120603015076e-05, - "loss": 4.6195, - "step": 47640 - }, - { - "epoch": 24.845371577574966, - "grad_norm": 1.5197502374649048, - "learning_rate": 5.264020100502512e-05, - "loss": 5.1521, - "step": 47641 - }, - { - "epoch": 24.845893089960885, - "grad_norm": 1.5420690774917603, - "learning_rate": 5.26391959798995e-05, - "loss": 5.3525, - "step": 47642 - }, - { - "epoch": 24.846414602346805, - "grad_norm": 1.5969605445861816, - "learning_rate": 5.263819095477387e-05, - "loss": 5.4385, - "step": 47643 - }, - { - "epoch": 24.846936114732724, - "grad_norm": 1.6672930717468262, - "learning_rate": 5.263718592964825e-05, - "loss": 5.2916, - "step": 47644 - }, - { - "epoch": 24.847457627118644, - "grad_norm": 1.4754692316055298, - "learning_rate": 5.263618090452261e-05, - "loss": 5.265, - "step": 47645 - }, - { - "epoch": 24.847979139504563, - "grad_norm": 1.5418092012405396, - "learning_rate": 5.263517587939699e-05, - "loss": 5.2349, - "step": 47646 - }, - { - "epoch": 24.848500651890483, - "grad_norm": 1.5092413425445557, - "learning_rate": 5.263417085427136e-05, - "loss": 5.4792, - "step": 47647 - }, - { - "epoch": 24.849022164276402, - "grad_norm": 1.5439201593399048, - "learning_rate": 5.263316582914574e-05, - "loss": 5.7572, - "step": 47648 - }, - { - "epoch": 24.84954367666232, - "grad_norm": 1.547318696975708, - "learning_rate": 5.2632160804020104e-05, - "loss": 5.2754, - "step": 47649 - }, - { - "epoch": 24.85006518904824, - "grad_norm": 1.562667727470398, - "learning_rate": 5.263115577889447e-05, - "loss": 5.3841, - "step": 47650 - }, - { - "epoch": 24.85058670143416, - "grad_norm": 1.667661428451538, - "learning_rate": 5.2630150753768846e-05, - "loss": 5.0221, - "step": 47651 - }, - { - "epoch": 24.85110821382008, - "grad_norm": 1.6626861095428467, - "learning_rate": 5.262914572864322e-05, - "loss": 5.0782, - "step": 47652 - }, - { - "epoch": 24.851629726205996, - "grad_norm": 1.5533127784729004, - "learning_rate": 5.2628140703517595e-05, - "loss": 5.2047, - "step": 47653 - }, - { - "epoch": 24.852151238591915, - "grad_norm": 1.532079815864563, - "learning_rate": 5.262713567839196e-05, - "loss": 5.6026, - "step": 47654 - }, - { - "epoch": 24.852672750977835, - "grad_norm": 1.5754894018173218, - "learning_rate": 5.262613065326634e-05, - "loss": 5.182, - "step": 47655 - }, - { - "epoch": 24.853194263363754, - "grad_norm": 1.5834249258041382, - "learning_rate": 5.26251256281407e-05, - "loss": 4.8145, - "step": 47656 - }, - { - "epoch": 24.853715775749674, - "grad_norm": 1.4557965993881226, - "learning_rate": 5.262412060301508e-05, - "loss": 5.4316, - "step": 47657 - }, - { - "epoch": 24.854237288135593, - "grad_norm": 1.5624897480010986, - "learning_rate": 5.262311557788945e-05, - "loss": 5.2327, - "step": 47658 - }, - { - "epoch": 24.854758800521513, - "grad_norm": 1.5320183038711548, - "learning_rate": 5.262211055276383e-05, - "loss": 5.1006, - "step": 47659 - }, - { - "epoch": 24.855280312907432, - "grad_norm": 1.5800933837890625, - "learning_rate": 5.262110552763819e-05, - "loss": 5.0254, - "step": 47660 - }, - { - "epoch": 24.85580182529335, - "grad_norm": 1.6376460790634155, - "learning_rate": 5.262010050251257e-05, - "loss": 5.4482, - "step": 47661 - }, - { - "epoch": 24.85632333767927, - "grad_norm": 1.5008492469787598, - "learning_rate": 5.2619095477386934e-05, - "loss": 5.2549, - "step": 47662 - }, - { - "epoch": 24.85684485006519, - "grad_norm": 1.4822336435317993, - "learning_rate": 5.2618090452261305e-05, - "loss": 5.4814, - "step": 47663 - }, - { - "epoch": 24.85736636245111, - "grad_norm": 1.501574993133545, - "learning_rate": 5.261708542713568e-05, - "loss": 4.6984, - "step": 47664 - }, - { - "epoch": 24.857887874837026, - "grad_norm": 1.503880262374878, - "learning_rate": 5.261608040201005e-05, - "loss": 5.1773, - "step": 47665 - }, - { - "epoch": 24.858409387222945, - "grad_norm": 1.5224677324295044, - "learning_rate": 5.2615075376884425e-05, - "loss": 5.6097, - "step": 47666 - }, - { - "epoch": 24.858930899608865, - "grad_norm": 1.4554351568222046, - "learning_rate": 5.2614070351758796e-05, - "loss": 5.2694, - "step": 47667 - }, - { - "epoch": 24.859452411994784, - "grad_norm": 1.5911177396774292, - "learning_rate": 5.2613065326633174e-05, - "loss": 5.5906, - "step": 47668 - }, - { - "epoch": 24.859973924380704, - "grad_norm": 1.5939325094223022, - "learning_rate": 5.261206030150754e-05, - "loss": 5.5919, - "step": 47669 - }, - { - "epoch": 24.860495436766623, - "grad_norm": 1.3965059518814087, - "learning_rate": 5.2611055276381916e-05, - "loss": 5.4885, - "step": 47670 - }, - { - "epoch": 24.861016949152543, - "grad_norm": 1.6884772777557373, - "learning_rate": 5.261005025125628e-05, - "loss": 5.425, - "step": 47671 - }, - { - "epoch": 24.861538461538462, - "grad_norm": 1.5756975412368774, - "learning_rate": 5.260904522613066e-05, - "loss": 4.6471, - "step": 47672 - }, - { - "epoch": 24.86205997392438, - "grad_norm": 1.567591905593872, - "learning_rate": 5.260804020100503e-05, - "loss": 5.3167, - "step": 47673 - }, - { - "epoch": 24.8625814863103, - "grad_norm": 1.510602355003357, - "learning_rate": 5.260703517587941e-05, - "loss": 5.2839, - "step": 47674 - }, - { - "epoch": 24.86310299869622, - "grad_norm": 1.507896900177002, - "learning_rate": 5.260603015075377e-05, - "loss": 5.4122, - "step": 47675 - }, - { - "epoch": 24.86362451108214, - "grad_norm": 1.570114016532898, - "learning_rate": 5.2605025125628136e-05, - "loss": 5.1531, - "step": 47676 - }, - { - "epoch": 24.864146023468056, - "grad_norm": 1.5529382228851318, - "learning_rate": 5.2604020100502514e-05, - "loss": 5.0554, - "step": 47677 - }, - { - "epoch": 24.864667535853975, - "grad_norm": 1.5655509233474731, - "learning_rate": 5.2603015075376885e-05, - "loss": 5.7001, - "step": 47678 - }, - { - "epoch": 24.865189048239895, - "grad_norm": 1.515552282333374, - "learning_rate": 5.260201005025126e-05, - "loss": 5.5979, - "step": 47679 - }, - { - "epoch": 24.865710560625814, - "grad_norm": 1.6701767444610596, - "learning_rate": 5.260100502512563e-05, - "loss": 4.4712, - "step": 47680 - }, - { - "epoch": 24.866232073011734, - "grad_norm": 1.6492589712142944, - "learning_rate": 5.2600000000000005e-05, - "loss": 5.0752, - "step": 47681 - }, - { - "epoch": 24.866753585397653, - "grad_norm": 1.5632976293563843, - "learning_rate": 5.259899497487437e-05, - "loss": 5.6909, - "step": 47682 - }, - { - "epoch": 24.867275097783573, - "grad_norm": 1.4932217597961426, - "learning_rate": 5.259798994974875e-05, - "loss": 4.872, - "step": 47683 - }, - { - "epoch": 24.867796610169492, - "grad_norm": 1.4718550443649292, - "learning_rate": 5.259698492462312e-05, - "loss": 5.4693, - "step": 47684 - }, - { - "epoch": 24.86831812255541, - "grad_norm": 1.6471961736679077, - "learning_rate": 5.2595979899497496e-05, - "loss": 5.541, - "step": 47685 - }, - { - "epoch": 24.86883963494133, - "grad_norm": 1.6098086833953857, - "learning_rate": 5.259497487437186e-05, - "loss": 5.155, - "step": 47686 - }, - { - "epoch": 24.86936114732725, - "grad_norm": 1.603498935699463, - "learning_rate": 5.259396984924624e-05, - "loss": 5.2024, - "step": 47687 - }, - { - "epoch": 24.86988265971317, - "grad_norm": 1.5907758474349976, - "learning_rate": 5.25929648241206e-05, - "loss": 5.1382, - "step": 47688 - }, - { - "epoch": 24.870404172099086, - "grad_norm": 1.70477294921875, - "learning_rate": 5.259195979899497e-05, - "loss": 4.7903, - "step": 47689 - }, - { - "epoch": 24.870925684485005, - "grad_norm": 1.5717171430587769, - "learning_rate": 5.259095477386935e-05, - "loss": 5.168, - "step": 47690 - }, - { - "epoch": 24.871447196870925, - "grad_norm": 1.671024203300476, - "learning_rate": 5.2589949748743715e-05, - "loss": 5.0703, - "step": 47691 - }, - { - "epoch": 24.871968709256844, - "grad_norm": 1.630667805671692, - "learning_rate": 5.258894472361809e-05, - "loss": 5.5467, - "step": 47692 - }, - { - "epoch": 24.872490221642764, - "grad_norm": 1.5529078245162964, - "learning_rate": 5.2587939698492464e-05, - "loss": 5.2816, - "step": 47693 - }, - { - "epoch": 24.873011734028683, - "grad_norm": 1.6567729711532593, - "learning_rate": 5.258693467336684e-05, - "loss": 4.4639, - "step": 47694 - }, - { - "epoch": 24.873533246414603, - "grad_norm": 1.6699813604354858, - "learning_rate": 5.2585929648241206e-05, - "loss": 4.678, - "step": 47695 - }, - { - "epoch": 24.874054758800522, - "grad_norm": 1.5209894180297852, - "learning_rate": 5.2584924623115584e-05, - "loss": 5.5606, - "step": 47696 - }, - { - "epoch": 24.87457627118644, - "grad_norm": 1.7648942470550537, - "learning_rate": 5.258391959798995e-05, - "loss": 4.9322, - "step": 47697 - }, - { - "epoch": 24.87509778357236, - "grad_norm": 1.5355969667434692, - "learning_rate": 5.2582914572864326e-05, - "loss": 5.3695, - "step": 47698 - }, - { - "epoch": 24.87561929595828, - "grad_norm": 1.6538645029067993, - "learning_rate": 5.25819095477387e-05, - "loss": 4.7162, - "step": 47699 - }, - { - "epoch": 24.876140808344196, - "grad_norm": 1.6203669309616089, - "learning_rate": 5.2580904522613075e-05, - "loss": 5.0451, - "step": 47700 - }, - { - "epoch": 24.876662320730116, - "grad_norm": 1.5546715259552002, - "learning_rate": 5.257989949748744e-05, - "loss": 5.3306, - "step": 47701 - }, - { - "epoch": 24.877183833116035, - "grad_norm": 1.4750310182571411, - "learning_rate": 5.257889447236182e-05, - "loss": 5.5758, - "step": 47702 - }, - { - "epoch": 24.877705345501955, - "grad_norm": 1.5209474563598633, - "learning_rate": 5.257788944723618e-05, - "loss": 5.0943, - "step": 47703 - }, - { - "epoch": 24.878226857887874, - "grad_norm": 1.5052242279052734, - "learning_rate": 5.257688442211055e-05, - "loss": 4.8533, - "step": 47704 - }, - { - "epoch": 24.878748370273794, - "grad_norm": 1.4948164224624634, - "learning_rate": 5.257587939698493e-05, - "loss": 5.4799, - "step": 47705 - }, - { - "epoch": 24.879269882659713, - "grad_norm": 1.5050519704818726, - "learning_rate": 5.2574874371859294e-05, - "loss": 5.3991, - "step": 47706 - }, - { - "epoch": 24.879791395045633, - "grad_norm": 1.632494568824768, - "learning_rate": 5.257386934673367e-05, - "loss": 4.6097, - "step": 47707 - }, - { - "epoch": 24.880312907431552, - "grad_norm": 1.5771527290344238, - "learning_rate": 5.2572864321608037e-05, - "loss": 4.8484, - "step": 47708 - }, - { - "epoch": 24.88083441981747, - "grad_norm": 1.603529691696167, - "learning_rate": 5.2571859296482414e-05, - "loss": 5.4611, - "step": 47709 - }, - { - "epoch": 24.88135593220339, - "grad_norm": 1.5545287132263184, - "learning_rate": 5.2570854271356785e-05, - "loss": 5.251, - "step": 47710 - }, - { - "epoch": 24.88187744458931, - "grad_norm": 1.5102593898773193, - "learning_rate": 5.256984924623116e-05, - "loss": 4.9732, - "step": 47711 - }, - { - "epoch": 24.88239895697523, - "grad_norm": 1.7157131433486938, - "learning_rate": 5.256884422110553e-05, - "loss": 4.9523, - "step": 47712 - }, - { - "epoch": 24.882920469361146, - "grad_norm": 1.5542281866073608, - "learning_rate": 5.2567839195979905e-05, - "loss": 5.516, - "step": 47713 - }, - { - "epoch": 24.883441981747065, - "grad_norm": 1.7762529850006104, - "learning_rate": 5.2566834170854276e-05, - "loss": 5.1571, - "step": 47714 - }, - { - "epoch": 24.883963494132985, - "grad_norm": 1.6818736791610718, - "learning_rate": 5.2565829145728654e-05, - "loss": 4.8064, - "step": 47715 - }, - { - "epoch": 24.884485006518904, - "grad_norm": 1.5859419107437134, - "learning_rate": 5.256482412060302e-05, - "loss": 4.8007, - "step": 47716 - }, - { - "epoch": 24.885006518904824, - "grad_norm": 1.5227797031402588, - "learning_rate": 5.256381909547738e-05, - "loss": 5.5076, - "step": 47717 - }, - { - "epoch": 24.885528031290743, - "grad_norm": 1.6248798370361328, - "learning_rate": 5.256281407035176e-05, - "loss": 5.464, - "step": 47718 - }, - { - "epoch": 24.886049543676663, - "grad_norm": 1.5068378448486328, - "learning_rate": 5.256180904522613e-05, - "loss": 4.8819, - "step": 47719 - }, - { - "epoch": 24.886571056062582, - "grad_norm": 1.5771691799163818, - "learning_rate": 5.256080402010051e-05, - "loss": 5.1556, - "step": 47720 - }, - { - "epoch": 24.8870925684485, - "grad_norm": 1.6090469360351562, - "learning_rate": 5.2559798994974874e-05, - "loss": 5.3581, - "step": 47721 - }, - { - "epoch": 24.88761408083442, - "grad_norm": 1.485993504524231, - "learning_rate": 5.255879396984925e-05, - "loss": 5.5114, - "step": 47722 - }, - { - "epoch": 24.88813559322034, - "grad_norm": 1.6352717876434326, - "learning_rate": 5.2557788944723616e-05, - "loss": 4.8617, - "step": 47723 - }, - { - "epoch": 24.888657105606256, - "grad_norm": 1.5232905149459839, - "learning_rate": 5.2556783919597994e-05, - "loss": 5.1275, - "step": 47724 - }, - { - "epoch": 24.889178617992176, - "grad_norm": 1.6271828413009644, - "learning_rate": 5.2555778894472365e-05, - "loss": 4.7737, - "step": 47725 - }, - { - "epoch": 24.889700130378095, - "grad_norm": 1.543155550956726, - "learning_rate": 5.255477386934674e-05, - "loss": 5.5462, - "step": 47726 - }, - { - "epoch": 24.890221642764015, - "grad_norm": 1.6370857954025269, - "learning_rate": 5.255376884422111e-05, - "loss": 5.2907, - "step": 47727 - }, - { - "epoch": 24.890743155149934, - "grad_norm": 1.6214169263839722, - "learning_rate": 5.2552763819095485e-05, - "loss": 4.706, - "step": 47728 - }, - { - "epoch": 24.891264667535854, - "grad_norm": 1.5547282695770264, - "learning_rate": 5.255175879396985e-05, - "loss": 5.1151, - "step": 47729 - }, - { - "epoch": 24.891786179921773, - "grad_norm": 1.5775498151779175, - "learning_rate": 5.255075376884422e-05, - "loss": 5.2578, - "step": 47730 - }, - { - "epoch": 24.892307692307693, - "grad_norm": 1.5966057777404785, - "learning_rate": 5.25497487437186e-05, - "loss": 5.2258, - "step": 47731 - }, - { - "epoch": 24.892829204693612, - "grad_norm": 1.6480427980422974, - "learning_rate": 5.254874371859296e-05, - "loss": 5.1445, - "step": 47732 - }, - { - "epoch": 24.89335071707953, - "grad_norm": 1.6053638458251953, - "learning_rate": 5.254773869346734e-05, - "loss": 4.6525, - "step": 47733 - }, - { - "epoch": 24.89387222946545, - "grad_norm": 1.5487043857574463, - "learning_rate": 5.254673366834171e-05, - "loss": 5.0401, - "step": 47734 - }, - { - "epoch": 24.89439374185137, - "grad_norm": 1.4264172315597534, - "learning_rate": 5.254572864321609e-05, - "loss": 5.3292, - "step": 47735 - }, - { - "epoch": 24.894915254237286, - "grad_norm": 1.5879815816879272, - "learning_rate": 5.254472361809045e-05, - "loss": 4.9973, - "step": 47736 - }, - { - "epoch": 24.895436766623206, - "grad_norm": 1.603525996208191, - "learning_rate": 5.254371859296483e-05, - "loss": 5.4693, - "step": 47737 - }, - { - "epoch": 24.895958279009125, - "grad_norm": 1.5971696376800537, - "learning_rate": 5.2542713567839195e-05, - "loss": 5.2515, - "step": 47738 - }, - { - "epoch": 24.896479791395045, - "grad_norm": 1.7853882312774658, - "learning_rate": 5.254170854271357e-05, - "loss": 5.0559, - "step": 47739 - }, - { - "epoch": 24.897001303780964, - "grad_norm": 1.5833179950714111, - "learning_rate": 5.2540703517587944e-05, - "loss": 5.64, - "step": 47740 - }, - { - "epoch": 24.897522816166884, - "grad_norm": 1.564195156097412, - "learning_rate": 5.253969849246232e-05, - "loss": 5.0239, - "step": 47741 - }, - { - "epoch": 24.898044328552803, - "grad_norm": 1.623983383178711, - "learning_rate": 5.2538693467336686e-05, - "loss": 4.668, - "step": 47742 - }, - { - "epoch": 24.898565840938723, - "grad_norm": 1.4910074472427368, - "learning_rate": 5.253768844221105e-05, - "loss": 5.4858, - "step": 47743 - }, - { - "epoch": 24.899087353324642, - "grad_norm": 1.6191582679748535, - "learning_rate": 5.253668341708543e-05, - "loss": 5.0819, - "step": 47744 - }, - { - "epoch": 24.89960886571056, - "grad_norm": 1.6309188604354858, - "learning_rate": 5.25356783919598e-05, - "loss": 4.4997, - "step": 47745 - }, - { - "epoch": 24.90013037809648, - "grad_norm": 1.7423293590545654, - "learning_rate": 5.253467336683418e-05, - "loss": 5.0749, - "step": 47746 - }, - { - "epoch": 24.9006518904824, - "grad_norm": 1.5687360763549805, - "learning_rate": 5.253366834170854e-05, - "loss": 5.1844, - "step": 47747 - }, - { - "epoch": 24.901173402868316, - "grad_norm": 1.5586190223693848, - "learning_rate": 5.253266331658292e-05, - "loss": 5.2014, - "step": 47748 - }, - { - "epoch": 24.901694915254236, - "grad_norm": 1.6881275177001953, - "learning_rate": 5.2531658291457284e-05, - "loss": 5.3192, - "step": 47749 - }, - { - "epoch": 24.902216427640155, - "grad_norm": 1.6396729946136475, - "learning_rate": 5.253065326633166e-05, - "loss": 5.1135, - "step": 47750 - }, - { - "epoch": 24.902737940026075, - "grad_norm": 1.5733470916748047, - "learning_rate": 5.252964824120603e-05, - "loss": 5.3723, - "step": 47751 - }, - { - "epoch": 24.903259452411994, - "grad_norm": 1.5795012712478638, - "learning_rate": 5.252864321608041e-05, - "loss": 4.9149, - "step": 47752 - }, - { - "epoch": 24.903780964797914, - "grad_norm": 1.8583464622497559, - "learning_rate": 5.2527638190954775e-05, - "loss": 4.7027, - "step": 47753 - }, - { - "epoch": 24.904302477183833, - "grad_norm": 1.6588373184204102, - "learning_rate": 5.252663316582915e-05, - "loss": 4.9412, - "step": 47754 - }, - { - "epoch": 24.904823989569753, - "grad_norm": 1.5213452577590942, - "learning_rate": 5.252562814070352e-05, - "loss": 4.8114, - "step": 47755 - }, - { - "epoch": 24.905345501955672, - "grad_norm": 1.6171296834945679, - "learning_rate": 5.252462311557789e-05, - "loss": 5.2567, - "step": 47756 - }, - { - "epoch": 24.90586701434159, - "grad_norm": 1.5120751857757568, - "learning_rate": 5.2523618090452266e-05, - "loss": 5.2787, - "step": 47757 - }, - { - "epoch": 24.90638852672751, - "grad_norm": 1.57416570186615, - "learning_rate": 5.252261306532663e-05, - "loss": 5.0991, - "step": 47758 - }, - { - "epoch": 24.90691003911343, - "grad_norm": 1.5638071298599243, - "learning_rate": 5.252160804020101e-05, - "loss": 5.2184, - "step": 47759 - }, - { - "epoch": 24.907431551499347, - "grad_norm": 1.546887755393982, - "learning_rate": 5.252060301507538e-05, - "loss": 5.0591, - "step": 47760 - }, - { - "epoch": 24.907953063885266, - "grad_norm": 1.6743097305297852, - "learning_rate": 5.2519597989949757e-05, - "loss": 4.2793, - "step": 47761 - }, - { - "epoch": 24.908474576271185, - "grad_norm": 1.5390578508377075, - "learning_rate": 5.251859296482412e-05, - "loss": 5.1922, - "step": 47762 - }, - { - "epoch": 24.908996088657105, - "grad_norm": 1.5575982332229614, - "learning_rate": 5.25175879396985e-05, - "loss": 5.2024, - "step": 47763 - }, - { - "epoch": 24.909517601043024, - "grad_norm": 1.5341124534606934, - "learning_rate": 5.251658291457286e-05, - "loss": 4.7451, - "step": 47764 - }, - { - "epoch": 24.910039113428944, - "grad_norm": 1.5733764171600342, - "learning_rate": 5.251557788944724e-05, - "loss": 5.5622, - "step": 47765 - }, - { - "epoch": 24.910560625814863, - "grad_norm": 1.5478358268737793, - "learning_rate": 5.251457286432161e-05, - "loss": 4.9181, - "step": 47766 - }, - { - "epoch": 24.911082138200783, - "grad_norm": 1.4321041107177734, - "learning_rate": 5.251356783919599e-05, - "loss": 5.6996, - "step": 47767 - }, - { - "epoch": 24.911603650586702, - "grad_norm": 1.5911256074905396, - "learning_rate": 5.2512562814070354e-05, - "loss": 5.3327, - "step": 47768 - }, - { - "epoch": 24.91212516297262, - "grad_norm": 1.4958876371383667, - "learning_rate": 5.251155778894472e-05, - "loss": 5.6893, - "step": 47769 - }, - { - "epoch": 24.91264667535854, - "grad_norm": 1.538758635520935, - "learning_rate": 5.2510552763819096e-05, - "loss": 5.6016, - "step": 47770 - }, - { - "epoch": 24.91316818774446, - "grad_norm": 1.5784504413604736, - "learning_rate": 5.250954773869347e-05, - "loss": 4.855, - "step": 47771 - }, - { - "epoch": 24.913689700130377, - "grad_norm": 1.6337946653366089, - "learning_rate": 5.2508542713567845e-05, - "loss": 5.1146, - "step": 47772 - }, - { - "epoch": 24.914211212516296, - "grad_norm": 1.522896647453308, - "learning_rate": 5.250753768844221e-05, - "loss": 5.3469, - "step": 47773 - }, - { - "epoch": 24.914732724902215, - "grad_norm": 1.5711334943771362, - "learning_rate": 5.250653266331659e-05, - "loss": 4.9305, - "step": 47774 - }, - { - "epoch": 24.915254237288135, - "grad_norm": 1.6397209167480469, - "learning_rate": 5.250552763819095e-05, - "loss": 5.0464, - "step": 47775 - }, - { - "epoch": 24.915775749674054, - "grad_norm": 1.6840505599975586, - "learning_rate": 5.250452261306533e-05, - "loss": 5.4156, - "step": 47776 - }, - { - "epoch": 24.916297262059974, - "grad_norm": 1.6195839643478394, - "learning_rate": 5.25035175879397e-05, - "loss": 5.41, - "step": 47777 - }, - { - "epoch": 24.916818774445893, - "grad_norm": 1.568685531616211, - "learning_rate": 5.250251256281408e-05, - "loss": 5.4221, - "step": 47778 - }, - { - "epoch": 24.917340286831813, - "grad_norm": 1.6174300909042358, - "learning_rate": 5.250150753768844e-05, - "loss": 4.7464, - "step": 47779 - }, - { - "epoch": 24.917861799217732, - "grad_norm": 1.5490334033966064, - "learning_rate": 5.250050251256282e-05, - "loss": 5.3887, - "step": 47780 - }, - { - "epoch": 24.91838331160365, - "grad_norm": 1.7119126319885254, - "learning_rate": 5.249949748743719e-05, - "loss": 4.7146, - "step": 47781 - }, - { - "epoch": 24.91890482398957, - "grad_norm": 1.6385048627853394, - "learning_rate": 5.2498492462311555e-05, - "loss": 4.9601, - "step": 47782 - }, - { - "epoch": 24.919426336375487, - "grad_norm": 1.5603028535842896, - "learning_rate": 5.249748743718593e-05, - "loss": 5.252, - "step": 47783 - }, - { - "epoch": 24.919947848761407, - "grad_norm": 1.5985231399536133, - "learning_rate": 5.24964824120603e-05, - "loss": 5.3915, - "step": 47784 - }, - { - "epoch": 24.920469361147326, - "grad_norm": 1.4744200706481934, - "learning_rate": 5.2495477386934675e-05, - "loss": 5.5178, - "step": 47785 - }, - { - "epoch": 24.920990873533245, - "grad_norm": 1.6509729623794556, - "learning_rate": 5.2494472361809046e-05, - "loss": 5.1761, - "step": 47786 - }, - { - "epoch": 24.921512385919165, - "grad_norm": 1.5562658309936523, - "learning_rate": 5.2493467336683424e-05, - "loss": 5.3523, - "step": 47787 - }, - { - "epoch": 24.922033898305084, - "grad_norm": 1.6325427293777466, - "learning_rate": 5.249246231155779e-05, - "loss": 4.9509, - "step": 47788 - }, - { - "epoch": 24.922555410691004, - "grad_norm": 1.4898924827575684, - "learning_rate": 5.2491457286432166e-05, - "loss": 5.384, - "step": 47789 - }, - { - "epoch": 24.923076923076923, - "grad_norm": 1.4454082250595093, - "learning_rate": 5.249045226130653e-05, - "loss": 5.1173, - "step": 47790 - }, - { - "epoch": 24.923598435462843, - "grad_norm": 1.522719144821167, - "learning_rate": 5.248944723618091e-05, - "loss": 5.2724, - "step": 47791 - }, - { - "epoch": 24.924119947848762, - "grad_norm": 1.6397840976715088, - "learning_rate": 5.248844221105528e-05, - "loss": 5.1661, - "step": 47792 - }, - { - "epoch": 24.92464146023468, - "grad_norm": 1.563125491142273, - "learning_rate": 5.248743718592966e-05, - "loss": 4.8694, - "step": 47793 - }, - { - "epoch": 24.9251629726206, - "grad_norm": 1.501402497291565, - "learning_rate": 5.248643216080402e-05, - "loss": 5.5893, - "step": 47794 - }, - { - "epoch": 24.92568448500652, - "grad_norm": 1.580520510673523, - "learning_rate": 5.24854271356784e-05, - "loss": 5.2237, - "step": 47795 - }, - { - "epoch": 24.926205997392437, - "grad_norm": 1.56383216381073, - "learning_rate": 5.2484422110552764e-05, - "loss": 4.8392, - "step": 47796 - }, - { - "epoch": 24.926727509778356, - "grad_norm": 1.5567612648010254, - "learning_rate": 5.2483417085427135e-05, - "loss": 5.2373, - "step": 47797 - }, - { - "epoch": 24.927249022164276, - "grad_norm": 1.5055768489837646, - "learning_rate": 5.248241206030151e-05, - "loss": 4.8063, - "step": 47798 - }, - { - "epoch": 24.927770534550195, - "grad_norm": 1.5126721858978271, - "learning_rate": 5.248140703517588e-05, - "loss": 5.4846, - "step": 47799 - }, - { - "epoch": 24.928292046936114, - "grad_norm": 1.479546070098877, - "learning_rate": 5.2480402010050255e-05, - "loss": 5.2124, - "step": 47800 - }, - { - "epoch": 24.928813559322034, - "grad_norm": 1.5349624156951904, - "learning_rate": 5.2479396984924626e-05, - "loss": 5.1559, - "step": 47801 - }, - { - "epoch": 24.929335071707953, - "grad_norm": 1.575971245765686, - "learning_rate": 5.2478391959799004e-05, - "loss": 5.4469, - "step": 47802 - }, - { - "epoch": 24.929856584093873, - "grad_norm": 1.5864543914794922, - "learning_rate": 5.247738693467337e-05, - "loss": 5.2331, - "step": 47803 - }, - { - "epoch": 24.930378096479792, - "grad_norm": 1.5096724033355713, - "learning_rate": 5.2476381909547746e-05, - "loss": 4.6205, - "step": 47804 - }, - { - "epoch": 24.93089960886571, - "grad_norm": 1.5665311813354492, - "learning_rate": 5.247537688442211e-05, - "loss": 4.6509, - "step": 47805 - }, - { - "epoch": 24.93142112125163, - "grad_norm": 1.529343605041504, - "learning_rate": 5.247437185929649e-05, - "loss": 5.2868, - "step": 47806 - }, - { - "epoch": 24.931942633637547, - "grad_norm": 1.5825896263122559, - "learning_rate": 5.247336683417086e-05, - "loss": 5.3469, - "step": 47807 - }, - { - "epoch": 24.932464146023467, - "grad_norm": 1.6180617809295654, - "learning_rate": 5.2472361809045237e-05, - "loss": 5.3011, - "step": 47808 - }, - { - "epoch": 24.932985658409386, - "grad_norm": 1.525445580482483, - "learning_rate": 5.24713567839196e-05, - "loss": 4.9811, - "step": 47809 - }, - { - "epoch": 24.933507170795306, - "grad_norm": 1.46649968624115, - "learning_rate": 5.2470351758793965e-05, - "loss": 4.7676, - "step": 47810 - }, - { - "epoch": 24.934028683181225, - "grad_norm": 1.604508638381958, - "learning_rate": 5.246934673366834e-05, - "loss": 5.3004, - "step": 47811 - }, - { - "epoch": 24.934550195567144, - "grad_norm": 1.587953805923462, - "learning_rate": 5.2468341708542714e-05, - "loss": 4.6854, - "step": 47812 - }, - { - "epoch": 24.935071707953064, - "grad_norm": 1.629517674446106, - "learning_rate": 5.246733668341709e-05, - "loss": 5.1924, - "step": 47813 - }, - { - "epoch": 24.935593220338983, - "grad_norm": 1.5708280801773071, - "learning_rate": 5.2466331658291456e-05, - "loss": 5.1146, - "step": 47814 - }, - { - "epoch": 24.936114732724903, - "grad_norm": 1.6082669496536255, - "learning_rate": 5.2465326633165834e-05, - "loss": 4.7022, - "step": 47815 - }, - { - "epoch": 24.936636245110822, - "grad_norm": 1.5975440740585327, - "learning_rate": 5.24643216080402e-05, - "loss": 5.44, - "step": 47816 - }, - { - "epoch": 24.937157757496742, - "grad_norm": 1.5923064947128296, - "learning_rate": 5.2463316582914576e-05, - "loss": 5.5848, - "step": 47817 - }, - { - "epoch": 24.93767926988266, - "grad_norm": 1.4620832204818726, - "learning_rate": 5.246231155778895e-05, - "loss": 5.2175, - "step": 47818 - }, - { - "epoch": 24.938200782268577, - "grad_norm": 1.6335848569869995, - "learning_rate": 5.2461306532663325e-05, - "loss": 5.4373, - "step": 47819 - }, - { - "epoch": 24.938722294654497, - "grad_norm": 1.5892499685287476, - "learning_rate": 5.246030150753769e-05, - "loss": 4.8808, - "step": 47820 - }, - { - "epoch": 24.939243807040416, - "grad_norm": 1.4610767364501953, - "learning_rate": 5.245929648241207e-05, - "loss": 5.3893, - "step": 47821 - }, - { - "epoch": 24.939765319426336, - "grad_norm": 1.6223840713500977, - "learning_rate": 5.245829145728644e-05, - "loss": 4.9274, - "step": 47822 - }, - { - "epoch": 24.940286831812255, - "grad_norm": 1.655417561531067, - "learning_rate": 5.24572864321608e-05, - "loss": 4.8352, - "step": 47823 - }, - { - "epoch": 24.940808344198174, - "grad_norm": 1.625246524810791, - "learning_rate": 5.245628140703518e-05, - "loss": 5.2717, - "step": 47824 - }, - { - "epoch": 24.941329856584094, - "grad_norm": 1.5081887245178223, - "learning_rate": 5.2455276381909544e-05, - "loss": 5.1119, - "step": 47825 - }, - { - "epoch": 24.941851368970013, - "grad_norm": 1.5487219095230103, - "learning_rate": 5.245427135678392e-05, - "loss": 4.9596, - "step": 47826 - }, - { - "epoch": 24.942372881355933, - "grad_norm": 1.5525285005569458, - "learning_rate": 5.245326633165829e-05, - "loss": 5.1782, - "step": 47827 - }, - { - "epoch": 24.942894393741852, - "grad_norm": 1.570200800895691, - "learning_rate": 5.245226130653267e-05, - "loss": 5.1889, - "step": 47828 - }, - { - "epoch": 24.943415906127772, - "grad_norm": 1.6159007549285889, - "learning_rate": 5.2451256281407035e-05, - "loss": 5.2351, - "step": 47829 - }, - { - "epoch": 24.94393741851369, - "grad_norm": 1.6352791786193848, - "learning_rate": 5.245025125628141e-05, - "loss": 5.5035, - "step": 47830 - }, - { - "epoch": 24.944458930899607, - "grad_norm": 1.5439342260360718, - "learning_rate": 5.244924623115578e-05, - "loss": 5.227, - "step": 47831 - }, - { - "epoch": 24.944980443285527, - "grad_norm": 1.6885194778442383, - "learning_rate": 5.2448241206030155e-05, - "loss": 4.7509, - "step": 47832 - }, - { - "epoch": 24.945501955671446, - "grad_norm": 1.5482327938079834, - "learning_rate": 5.2447236180904526e-05, - "loss": 5.0549, - "step": 47833 - }, - { - "epoch": 24.946023468057366, - "grad_norm": 1.6050736904144287, - "learning_rate": 5.2446231155778904e-05, - "loss": 5.2309, - "step": 47834 - }, - { - "epoch": 24.946544980443285, - "grad_norm": 1.650611162185669, - "learning_rate": 5.244522613065327e-05, - "loss": 5.2114, - "step": 47835 - }, - { - "epoch": 24.947066492829205, - "grad_norm": 1.5977762937545776, - "learning_rate": 5.244422110552763e-05, - "loss": 5.3857, - "step": 47836 - }, - { - "epoch": 24.947588005215124, - "grad_norm": 1.5972933769226074, - "learning_rate": 5.244321608040201e-05, - "loss": 4.8976, - "step": 47837 - }, - { - "epoch": 24.948109517601043, - "grad_norm": 1.5562890768051147, - "learning_rate": 5.244221105527638e-05, - "loss": 4.9802, - "step": 47838 - }, - { - "epoch": 24.948631029986963, - "grad_norm": 1.4937858581542969, - "learning_rate": 5.244120603015076e-05, - "loss": 5.3736, - "step": 47839 - }, - { - "epoch": 24.949152542372882, - "grad_norm": 1.6591356992721558, - "learning_rate": 5.2440201005025124e-05, - "loss": 5.1913, - "step": 47840 - }, - { - "epoch": 24.949674054758802, - "grad_norm": 1.650553584098816, - "learning_rate": 5.24391959798995e-05, - "loss": 5.2942, - "step": 47841 - }, - { - "epoch": 24.95019556714472, - "grad_norm": 1.531273365020752, - "learning_rate": 5.2438190954773866e-05, - "loss": 5.4793, - "step": 47842 - }, - { - "epoch": 24.950717079530637, - "grad_norm": 1.4849472045898438, - "learning_rate": 5.2437185929648244e-05, - "loss": 5.0029, - "step": 47843 - }, - { - "epoch": 24.951238591916557, - "grad_norm": 1.550865888595581, - "learning_rate": 5.2436180904522615e-05, - "loss": 5.1235, - "step": 47844 - }, - { - "epoch": 24.951760104302476, - "grad_norm": 1.67410409450531, - "learning_rate": 5.243517587939699e-05, - "loss": 5.028, - "step": 47845 - }, - { - "epoch": 24.952281616688396, - "grad_norm": 1.5550713539123535, - "learning_rate": 5.243417085427136e-05, - "loss": 5.3094, - "step": 47846 - }, - { - "epoch": 24.952803129074315, - "grad_norm": 1.4926759004592896, - "learning_rate": 5.2433165829145735e-05, - "loss": 4.7864, - "step": 47847 - }, - { - "epoch": 24.953324641460235, - "grad_norm": 1.5091379880905151, - "learning_rate": 5.2432160804020106e-05, - "loss": 5.639, - "step": 47848 - }, - { - "epoch": 24.953846153846154, - "grad_norm": 1.5771652460098267, - "learning_rate": 5.243115577889447e-05, - "loss": 5.3105, - "step": 47849 - }, - { - "epoch": 24.954367666232073, - "grad_norm": 1.619673490524292, - "learning_rate": 5.243015075376885e-05, - "loss": 4.6853, - "step": 47850 - }, - { - "epoch": 24.954889178617993, - "grad_norm": 1.643508791923523, - "learning_rate": 5.242914572864321e-05, - "loss": 5.1724, - "step": 47851 - }, - { - "epoch": 24.955410691003912, - "grad_norm": 1.526640772819519, - "learning_rate": 5.242814070351759e-05, - "loss": 5.337, - "step": 47852 - }, - { - "epoch": 24.955932203389832, - "grad_norm": 1.5601602792739868, - "learning_rate": 5.242713567839196e-05, - "loss": 5.4956, - "step": 47853 - }, - { - "epoch": 24.95645371577575, - "grad_norm": 1.5568257570266724, - "learning_rate": 5.242613065326634e-05, - "loss": 5.6838, - "step": 47854 - }, - { - "epoch": 24.956975228161667, - "grad_norm": 1.599039077758789, - "learning_rate": 5.24251256281407e-05, - "loss": 5.2723, - "step": 47855 - }, - { - "epoch": 24.957496740547587, - "grad_norm": 1.5869622230529785, - "learning_rate": 5.242412060301508e-05, - "loss": 5.1557, - "step": 47856 - }, - { - "epoch": 24.958018252933506, - "grad_norm": 1.547155499458313, - "learning_rate": 5.2423115577889445e-05, - "loss": 5.3211, - "step": 47857 - }, - { - "epoch": 24.958539765319426, - "grad_norm": 1.4963464736938477, - "learning_rate": 5.242211055276382e-05, - "loss": 5.4319, - "step": 47858 - }, - { - "epoch": 24.959061277705345, - "grad_norm": 1.5818794965744019, - "learning_rate": 5.2421105527638194e-05, - "loss": 4.9564, - "step": 47859 - }, - { - "epoch": 24.959582790091265, - "grad_norm": 1.60922372341156, - "learning_rate": 5.242010050251257e-05, - "loss": 5.6372, - "step": 47860 - }, - { - "epoch": 24.960104302477184, - "grad_norm": 1.5624961853027344, - "learning_rate": 5.2419095477386936e-05, - "loss": 5.4488, - "step": 47861 - }, - { - "epoch": 24.960625814863103, - "grad_norm": 1.547555685043335, - "learning_rate": 5.24180904522613e-05, - "loss": 5.3291, - "step": 47862 - }, - { - "epoch": 24.961147327249023, - "grad_norm": 1.5541396141052246, - "learning_rate": 5.241708542713568e-05, - "loss": 5.0084, - "step": 47863 - }, - { - "epoch": 24.961668839634942, - "grad_norm": 1.5431007146835327, - "learning_rate": 5.241608040201005e-05, - "loss": 5.3004, - "step": 47864 - }, - { - "epoch": 24.962190352020862, - "grad_norm": 1.6425081491470337, - "learning_rate": 5.241507537688443e-05, - "loss": 5.0095, - "step": 47865 - }, - { - "epoch": 24.96271186440678, - "grad_norm": 1.5993643999099731, - "learning_rate": 5.241407035175879e-05, - "loss": 5.2458, - "step": 47866 - }, - { - "epoch": 24.963233376792697, - "grad_norm": 1.5550777912139893, - "learning_rate": 5.241306532663317e-05, - "loss": 5.1539, - "step": 47867 - }, - { - "epoch": 24.963754889178617, - "grad_norm": 1.5212130546569824, - "learning_rate": 5.241206030150754e-05, - "loss": 4.9883, - "step": 47868 - }, - { - "epoch": 24.964276401564536, - "grad_norm": 1.6037548780441284, - "learning_rate": 5.241105527638192e-05, - "loss": 5.3218, - "step": 47869 - }, - { - "epoch": 24.964797913950456, - "grad_norm": 1.4925531148910522, - "learning_rate": 5.241005025125628e-05, - "loss": 5.5152, - "step": 47870 - }, - { - "epoch": 24.965319426336375, - "grad_norm": 1.5641018152236938, - "learning_rate": 5.240904522613066e-05, - "loss": 5.1541, - "step": 47871 - }, - { - "epoch": 24.965840938722295, - "grad_norm": 1.5720890760421753, - "learning_rate": 5.2408040201005025e-05, - "loss": 5.2101, - "step": 47872 - }, - { - "epoch": 24.966362451108214, - "grad_norm": 1.4760297536849976, - "learning_rate": 5.24070351758794e-05, - "loss": 5.6326, - "step": 47873 - }, - { - "epoch": 24.966883963494134, - "grad_norm": 1.6819473505020142, - "learning_rate": 5.2406030150753773e-05, - "loss": 5.0774, - "step": 47874 - }, - { - "epoch": 24.967405475880053, - "grad_norm": 1.536594033241272, - "learning_rate": 5.240502512562814e-05, - "loss": 5.234, - "step": 47875 - }, - { - "epoch": 24.967926988265972, - "grad_norm": 1.520174264907837, - "learning_rate": 5.2404020100502516e-05, - "loss": 5.253, - "step": 47876 - }, - { - "epoch": 24.968448500651892, - "grad_norm": 1.5528056621551514, - "learning_rate": 5.240301507537688e-05, - "loss": 5.6703, - "step": 47877 - }, - { - "epoch": 24.96897001303781, - "grad_norm": 1.610087513923645, - "learning_rate": 5.240201005025126e-05, - "loss": 5.4435, - "step": 47878 - }, - { - "epoch": 24.969491525423727, - "grad_norm": 1.4586291313171387, - "learning_rate": 5.240100502512563e-05, - "loss": 5.2981, - "step": 47879 - }, - { - "epoch": 24.970013037809647, - "grad_norm": 1.5283790826797485, - "learning_rate": 5.2400000000000007e-05, - "loss": 5.3911, - "step": 47880 - }, - { - "epoch": 24.970534550195566, - "grad_norm": 1.5742734670639038, - "learning_rate": 5.239899497487437e-05, - "loss": 5.6052, - "step": 47881 - }, - { - "epoch": 24.971056062581486, - "grad_norm": 1.5516984462738037, - "learning_rate": 5.239798994974875e-05, - "loss": 5.2629, - "step": 47882 - }, - { - "epoch": 24.971577574967405, - "grad_norm": 1.622444748878479, - "learning_rate": 5.239698492462311e-05, - "loss": 5.3749, - "step": 47883 - }, - { - "epoch": 24.972099087353325, - "grad_norm": 1.4948434829711914, - "learning_rate": 5.239597989949749e-05, - "loss": 5.6435, - "step": 47884 - }, - { - "epoch": 24.972620599739244, - "grad_norm": 1.5284301042556763, - "learning_rate": 5.239497487437186e-05, - "loss": 4.8262, - "step": 47885 - }, - { - "epoch": 24.973142112125164, - "grad_norm": 1.5983543395996094, - "learning_rate": 5.239396984924624e-05, - "loss": 4.7585, - "step": 47886 - }, - { - "epoch": 24.973663624511083, - "grad_norm": 1.4761754274368286, - "learning_rate": 5.2392964824120604e-05, - "loss": 5.2941, - "step": 47887 - }, - { - "epoch": 24.974185136897002, - "grad_norm": 1.4496870040893555, - "learning_rate": 5.239195979899498e-05, - "loss": 5.711, - "step": 47888 - }, - { - "epoch": 24.974706649282922, - "grad_norm": 1.530072569847107, - "learning_rate": 5.239095477386935e-05, - "loss": 5.3934, - "step": 47889 - }, - { - "epoch": 24.975228161668838, - "grad_norm": 1.5856667757034302, - "learning_rate": 5.238994974874372e-05, - "loss": 4.9854, - "step": 47890 - }, - { - "epoch": 24.975749674054757, - "grad_norm": 1.494483470916748, - "learning_rate": 5.2388944723618095e-05, - "loss": 5.0581, - "step": 47891 - }, - { - "epoch": 24.976271186440677, - "grad_norm": 1.594369649887085, - "learning_rate": 5.238793969849246e-05, - "loss": 5.1751, - "step": 47892 - }, - { - "epoch": 24.976792698826596, - "grad_norm": 1.4724606275558472, - "learning_rate": 5.238693467336684e-05, - "loss": 4.5484, - "step": 47893 - }, - { - "epoch": 24.977314211212516, - "grad_norm": 1.499943494796753, - "learning_rate": 5.238592964824121e-05, - "loss": 5.4197, - "step": 47894 - }, - { - "epoch": 24.977835723598435, - "grad_norm": 1.6119844913482666, - "learning_rate": 5.2384924623115586e-05, - "loss": 5.2106, - "step": 47895 - }, - { - "epoch": 24.978357235984355, - "grad_norm": 1.5109329223632812, - "learning_rate": 5.238391959798995e-05, - "loss": 5.4181, - "step": 47896 - }, - { - "epoch": 24.978878748370274, - "grad_norm": 1.5986223220825195, - "learning_rate": 5.238291457286433e-05, - "loss": 4.9645, - "step": 47897 - }, - { - "epoch": 24.979400260756194, - "grad_norm": 1.4698710441589355, - "learning_rate": 5.238190954773869e-05, - "loss": 5.4356, - "step": 47898 - }, - { - "epoch": 24.979921773142113, - "grad_norm": 1.4777920246124268, - "learning_rate": 5.238090452261307e-05, - "loss": 5.2425, - "step": 47899 - }, - { - "epoch": 24.980443285528033, - "grad_norm": 1.4898412227630615, - "learning_rate": 5.237989949748744e-05, - "loss": 5.556, - "step": 47900 - }, - { - "epoch": 24.980964797913952, - "grad_norm": 1.624388337135315, - "learning_rate": 5.237889447236182e-05, - "loss": 4.7454, - "step": 47901 - }, - { - "epoch": 24.98148631029987, - "grad_norm": 1.5074251890182495, - "learning_rate": 5.237788944723618e-05, - "loss": 5.4303, - "step": 47902 - }, - { - "epoch": 24.982007822685787, - "grad_norm": 1.5527795553207397, - "learning_rate": 5.237688442211055e-05, - "loss": 5.2367, - "step": 47903 - }, - { - "epoch": 24.982529335071707, - "grad_norm": 1.563732385635376, - "learning_rate": 5.2375879396984925e-05, - "loss": 5.2297, - "step": 47904 - }, - { - "epoch": 24.983050847457626, - "grad_norm": 1.6776899099349976, - "learning_rate": 5.2374874371859296e-05, - "loss": 4.5675, - "step": 47905 - }, - { - "epoch": 24.983572359843546, - "grad_norm": 1.5880287885665894, - "learning_rate": 5.2373869346733674e-05, - "loss": 5.2222, - "step": 47906 - }, - { - "epoch": 24.984093872229465, - "grad_norm": 1.6778610944747925, - "learning_rate": 5.237286432160804e-05, - "loss": 4.8532, - "step": 47907 - }, - { - "epoch": 24.984615384615385, - "grad_norm": 1.7292981147766113, - "learning_rate": 5.2371859296482416e-05, - "loss": 5.0938, - "step": 47908 - }, - { - "epoch": 24.985136897001304, - "grad_norm": 1.70377516746521, - "learning_rate": 5.237085427135679e-05, - "loss": 5.3753, - "step": 47909 - }, - { - "epoch": 24.985658409387224, - "grad_norm": 1.7160298824310303, - "learning_rate": 5.2369849246231165e-05, - "loss": 4.6394, - "step": 47910 - }, - { - "epoch": 24.986179921773143, - "grad_norm": 1.576291799545288, - "learning_rate": 5.236884422110553e-05, - "loss": 5.343, - "step": 47911 - }, - { - "epoch": 24.986701434159063, - "grad_norm": 1.6506407260894775, - "learning_rate": 5.236783919597991e-05, - "loss": 5.13, - "step": 47912 - }, - { - "epoch": 24.987222946544982, - "grad_norm": 1.6022146940231323, - "learning_rate": 5.236683417085427e-05, - "loss": 5.4724, - "step": 47913 - }, - { - "epoch": 24.987744458930898, - "grad_norm": 1.5101474523544312, - "learning_rate": 5.236582914572865e-05, - "loss": 5.2945, - "step": 47914 - }, - { - "epoch": 24.988265971316817, - "grad_norm": 1.5308173894882202, - "learning_rate": 5.236482412060302e-05, - "loss": 5.2781, - "step": 47915 - }, - { - "epoch": 24.988787483702737, - "grad_norm": 1.5754930973052979, - "learning_rate": 5.2363819095477385e-05, - "loss": 5.2611, - "step": 47916 - }, - { - "epoch": 24.989308996088656, - "grad_norm": 1.6063728332519531, - "learning_rate": 5.236281407035176e-05, - "loss": 5.3116, - "step": 47917 - }, - { - "epoch": 24.989830508474576, - "grad_norm": 1.6133534908294678, - "learning_rate": 5.236180904522613e-05, - "loss": 4.9285, - "step": 47918 - }, - { - "epoch": 24.990352020860495, - "grad_norm": 1.5047926902770996, - "learning_rate": 5.2360804020100505e-05, - "loss": 5.5971, - "step": 47919 - }, - { - "epoch": 24.990873533246415, - "grad_norm": 1.448617696762085, - "learning_rate": 5.2359798994974876e-05, - "loss": 5.3678, - "step": 47920 - }, - { - "epoch": 24.991395045632334, - "grad_norm": 1.6302272081375122, - "learning_rate": 5.2358793969849254e-05, - "loss": 5.0403, - "step": 47921 - }, - { - "epoch": 24.991916558018254, - "grad_norm": 1.6421219110488892, - "learning_rate": 5.235778894472362e-05, - "loss": 5.3084, - "step": 47922 - }, - { - "epoch": 24.992438070404173, - "grad_norm": 1.4586857557296753, - "learning_rate": 5.2356783919597996e-05, - "loss": 5.3428, - "step": 47923 - }, - { - "epoch": 24.992959582790093, - "grad_norm": 1.4806804656982422, - "learning_rate": 5.235577889447236e-05, - "loss": 5.5212, - "step": 47924 - }, - { - "epoch": 24.993481095176012, - "grad_norm": 1.6511995792388916, - "learning_rate": 5.235477386934674e-05, - "loss": 5.1964, - "step": 47925 - }, - { - "epoch": 24.994002607561928, - "grad_norm": 1.5205432176589966, - "learning_rate": 5.235376884422111e-05, - "loss": 5.3313, - "step": 47926 - }, - { - "epoch": 24.994524119947847, - "grad_norm": 1.5951286554336548, - "learning_rate": 5.2352763819095487e-05, - "loss": 5.2459, - "step": 47927 - }, - { - "epoch": 24.995045632333767, - "grad_norm": 1.6039103269577026, - "learning_rate": 5.235175879396985e-05, - "loss": 5.2013, - "step": 47928 - }, - { - "epoch": 24.995567144719686, - "grad_norm": 1.5081759691238403, - "learning_rate": 5.2350753768844215e-05, - "loss": 4.9872, - "step": 47929 - }, - { - "epoch": 24.996088657105606, - "grad_norm": 1.6342885494232178, - "learning_rate": 5.234974874371859e-05, - "loss": 4.9709, - "step": 47930 - }, - { - "epoch": 24.996610169491525, - "grad_norm": 1.6218138933181763, - "learning_rate": 5.2348743718592964e-05, - "loss": 5.3819, - "step": 47931 - }, - { - "epoch": 24.997131681877445, - "grad_norm": 1.5713235139846802, - "learning_rate": 5.234773869346734e-05, - "loss": 5.328, - "step": 47932 - }, - { - "epoch": 24.997653194263364, - "grad_norm": 1.6546761989593506, - "learning_rate": 5.2346733668341706e-05, - "loss": 5.2006, - "step": 47933 - }, - { - "epoch": 24.998174706649284, - "grad_norm": 1.5714281797409058, - "learning_rate": 5.2345728643216084e-05, - "loss": 5.452, - "step": 47934 - }, - { - "epoch": 24.998696219035203, - "grad_norm": 1.4970617294311523, - "learning_rate": 5.2344723618090455e-05, - "loss": 5.2018, - "step": 47935 - }, - { - "epoch": 24.999217731421123, - "grad_norm": 1.5740658044815063, - "learning_rate": 5.234371859296483e-05, - "loss": 5.5606, - "step": 47936 - }, - { - "epoch": 24.999739243807042, - "grad_norm": 1.5823925733566284, - "learning_rate": 5.23427135678392e-05, - "loss": 5.2419, - "step": 47937 - }, - { - "epoch": 25.000260756192958, - "grad_norm": 1.6449239253997803, - "learning_rate": 5.2341708542713575e-05, - "loss": 5.2363, - "step": 47938 - }, - { - "epoch": 25.000782268578877, - "grad_norm": 1.5797241926193237, - "learning_rate": 5.234070351758794e-05, - "loss": 5.0465, - "step": 47939 - }, - { - "epoch": 25.001303780964797, - "grad_norm": 1.5703134536743164, - "learning_rate": 5.233969849246232e-05, - "loss": 5.2689, - "step": 47940 - }, - { - "epoch": 25.001825293350716, - "grad_norm": 1.567196249961853, - "learning_rate": 5.233869346733669e-05, - "loss": 5.2132, - "step": 47941 - }, - { - "epoch": 25.002346805736636, - "grad_norm": 1.5825436115264893, - "learning_rate": 5.233768844221105e-05, - "loss": 5.0222, - "step": 47942 - }, - { - "epoch": 25.002868318122555, - "grad_norm": 1.5517722368240356, - "learning_rate": 5.233668341708543e-05, - "loss": 5.3922, - "step": 47943 - }, - { - "epoch": 25.003389830508475, - "grad_norm": 1.5709059238433838, - "learning_rate": 5.2335678391959794e-05, - "loss": 5.3029, - "step": 47944 - }, - { - "epoch": 25.003911342894394, - "grad_norm": 1.5589113235473633, - "learning_rate": 5.233467336683417e-05, - "loss": 5.2594, - "step": 47945 - }, - { - "epoch": 25.004432855280314, - "grad_norm": 1.5097631216049194, - "learning_rate": 5.233366834170854e-05, - "loss": 4.9391, - "step": 47946 - }, - { - "epoch": 25.004954367666233, - "grad_norm": 1.5377240180969238, - "learning_rate": 5.233266331658292e-05, - "loss": 5.5488, - "step": 47947 - }, - { - "epoch": 25.005475880052153, - "grad_norm": 1.5296876430511475, - "learning_rate": 5.2331658291457285e-05, - "loss": 5.0043, - "step": 47948 - }, - { - "epoch": 25.005997392438072, - "grad_norm": 1.6083464622497559, - "learning_rate": 5.233065326633166e-05, - "loss": 5.0694, - "step": 47949 - }, - { - "epoch": 25.006518904823988, - "grad_norm": 1.5442899465560913, - "learning_rate": 5.232964824120603e-05, - "loss": 5.4865, - "step": 47950 - }, - { - "epoch": 25.007040417209907, - "grad_norm": 1.6254340410232544, - "learning_rate": 5.2328643216080405e-05, - "loss": 5.4321, - "step": 47951 - }, - { - "epoch": 25.007561929595827, - "grad_norm": 1.6587506532669067, - "learning_rate": 5.2327638190954776e-05, - "loss": 5.0631, - "step": 47952 - }, - { - "epoch": 25.008083441981746, - "grad_norm": 1.5868816375732422, - "learning_rate": 5.2326633165829154e-05, - "loss": 5.3961, - "step": 47953 - }, - { - "epoch": 25.008604954367666, - "grad_norm": 1.4974640607833862, - "learning_rate": 5.232562814070352e-05, - "loss": 5.1597, - "step": 47954 - }, - { - "epoch": 25.009126466753585, - "grad_norm": 1.5757125616073608, - "learning_rate": 5.232462311557789e-05, - "loss": 5.1841, - "step": 47955 - }, - { - "epoch": 25.009647979139505, - "grad_norm": 1.4377646446228027, - "learning_rate": 5.232361809045227e-05, - "loss": 4.8552, - "step": 47956 - }, - { - "epoch": 25.010169491525424, - "grad_norm": 1.6160539388656616, - "learning_rate": 5.232261306532663e-05, - "loss": 5.178, - "step": 47957 - }, - { - "epoch": 25.010691003911344, - "grad_norm": 1.6632559299468994, - "learning_rate": 5.232160804020101e-05, - "loss": 5.0398, - "step": 47958 - }, - { - "epoch": 25.011212516297263, - "grad_norm": 1.5708813667297363, - "learning_rate": 5.2320603015075374e-05, - "loss": 5.4516, - "step": 47959 - }, - { - "epoch": 25.011734028683183, - "grad_norm": 1.4331462383270264, - "learning_rate": 5.231959798994975e-05, - "loss": 4.6523, - "step": 47960 - }, - { - "epoch": 25.012255541069102, - "grad_norm": 1.5590027570724487, - "learning_rate": 5.231859296482412e-05, - "loss": 4.9238, - "step": 47961 - }, - { - "epoch": 25.012777053455018, - "grad_norm": 1.491186261177063, - "learning_rate": 5.23175879396985e-05, - "loss": 5.4107, - "step": 47962 - }, - { - "epoch": 25.013298565840937, - "grad_norm": 1.4307039976119995, - "learning_rate": 5.2316582914572865e-05, - "loss": 5.2987, - "step": 47963 - }, - { - "epoch": 25.013820078226857, - "grad_norm": 1.5206658840179443, - "learning_rate": 5.231557788944724e-05, - "loss": 4.6962, - "step": 47964 - }, - { - "epoch": 25.014341590612776, - "grad_norm": 1.5465837717056274, - "learning_rate": 5.231457286432161e-05, - "loss": 5.3571, - "step": 47965 - }, - { - "epoch": 25.014863102998696, - "grad_norm": 1.5505754947662354, - "learning_rate": 5.2313567839195985e-05, - "loss": 5.3454, - "step": 47966 - }, - { - "epoch": 25.015384615384615, - "grad_norm": 1.50552237033844, - "learning_rate": 5.2312562814070356e-05, - "loss": 5.5509, - "step": 47967 - }, - { - "epoch": 25.015906127770535, - "grad_norm": 1.5563350915908813, - "learning_rate": 5.231155778894472e-05, - "loss": 5.2532, - "step": 47968 - }, - { - "epoch": 25.016427640156454, - "grad_norm": 1.5838203430175781, - "learning_rate": 5.23105527638191e-05, - "loss": 4.7039, - "step": 47969 - }, - { - "epoch": 25.016949152542374, - "grad_norm": 1.5300307273864746, - "learning_rate": 5.230954773869346e-05, - "loss": 5.2034, - "step": 47970 - }, - { - "epoch": 25.017470664928293, - "grad_norm": 1.5971177816390991, - "learning_rate": 5.230854271356784e-05, - "loss": 5.2748, - "step": 47971 - }, - { - "epoch": 25.017992177314213, - "grad_norm": 1.8589894771575928, - "learning_rate": 5.230753768844221e-05, - "loss": 4.8425, - "step": 47972 - }, - { - "epoch": 25.018513689700132, - "grad_norm": 1.5234215259552002, - "learning_rate": 5.230653266331659e-05, - "loss": 4.8835, - "step": 47973 - }, - { - "epoch": 25.019035202086048, - "grad_norm": 1.5195035934448242, - "learning_rate": 5.230552763819095e-05, - "loss": 5.4634, - "step": 47974 - }, - { - "epoch": 25.019556714471967, - "grad_norm": 1.7104145288467407, - "learning_rate": 5.230452261306533e-05, - "loss": 4.8629, - "step": 47975 - }, - { - "epoch": 25.020078226857887, - "grad_norm": 1.6063058376312256, - "learning_rate": 5.23035175879397e-05, - "loss": 5.003, - "step": 47976 - }, - { - "epoch": 25.020599739243806, - "grad_norm": 1.5604350566864014, - "learning_rate": 5.230251256281408e-05, - "loss": 4.911, - "step": 47977 - }, - { - "epoch": 25.021121251629726, - "grad_norm": 1.4927552938461304, - "learning_rate": 5.2301507537688444e-05, - "loss": 4.872, - "step": 47978 - }, - { - "epoch": 25.021642764015645, - "grad_norm": 1.5293821096420288, - "learning_rate": 5.230050251256282e-05, - "loss": 4.1729, - "step": 47979 - }, - { - "epoch": 25.022164276401565, - "grad_norm": 1.5910650491714478, - "learning_rate": 5.2299497487437186e-05, - "loss": 5.6085, - "step": 47980 - }, - { - "epoch": 25.022685788787484, - "grad_norm": 1.5396956205368042, - "learning_rate": 5.2298492462311564e-05, - "loss": 5.2807, - "step": 47981 - }, - { - "epoch": 25.023207301173404, - "grad_norm": 1.6444637775421143, - "learning_rate": 5.2297487437185935e-05, - "loss": 4.7798, - "step": 47982 - }, - { - "epoch": 25.023728813559323, - "grad_norm": 1.522187352180481, - "learning_rate": 5.22964824120603e-05, - "loss": 5.1165, - "step": 47983 - }, - { - "epoch": 25.024250325945243, - "grad_norm": 1.5728025436401367, - "learning_rate": 5.229547738693468e-05, - "loss": 4.8963, - "step": 47984 - }, - { - "epoch": 25.02477183833116, - "grad_norm": 1.5792328119277954, - "learning_rate": 5.229447236180904e-05, - "loss": 5.4274, - "step": 47985 - }, - { - "epoch": 25.025293350717078, - "grad_norm": 1.446913242340088, - "learning_rate": 5.229346733668342e-05, - "loss": 5.3593, - "step": 47986 - }, - { - "epoch": 25.025814863102998, - "grad_norm": 1.5294749736785889, - "learning_rate": 5.229246231155779e-05, - "loss": 5.4693, - "step": 47987 - }, - { - "epoch": 25.026336375488917, - "grad_norm": 1.6596821546554565, - "learning_rate": 5.229145728643217e-05, - "loss": 4.7274, - "step": 47988 - }, - { - "epoch": 25.026857887874836, - "grad_norm": 1.578134536743164, - "learning_rate": 5.229045226130653e-05, - "loss": 5.6329, - "step": 47989 - }, - { - "epoch": 25.027379400260756, - "grad_norm": 1.6660449504852295, - "learning_rate": 5.228944723618091e-05, - "loss": 4.8747, - "step": 47990 - }, - { - "epoch": 25.027900912646675, - "grad_norm": 1.5761691331863403, - "learning_rate": 5.2288442211055275e-05, - "loss": 5.011, - "step": 47991 - }, - { - "epoch": 25.028422425032595, - "grad_norm": 1.6081013679504395, - "learning_rate": 5.228743718592965e-05, - "loss": 5.2206, - "step": 47992 - }, - { - "epoch": 25.028943937418514, - "grad_norm": 1.6231704950332642, - "learning_rate": 5.2286432160804023e-05, - "loss": 5.2324, - "step": 47993 - }, - { - "epoch": 25.029465449804434, - "grad_norm": 1.5783947706222534, - "learning_rate": 5.22854271356784e-05, - "loss": 4.8951, - "step": 47994 - }, - { - "epoch": 25.029986962190353, - "grad_norm": 1.5610227584838867, - "learning_rate": 5.2284422110552766e-05, - "loss": 5.3224, - "step": 47995 - }, - { - "epoch": 25.030508474576273, - "grad_norm": 1.556490421295166, - "learning_rate": 5.2283417085427137e-05, - "loss": 5.1734, - "step": 47996 - }, - { - "epoch": 25.03102998696219, - "grad_norm": 1.512485384941101, - "learning_rate": 5.2282412060301514e-05, - "loss": 4.9708, - "step": 47997 - }, - { - "epoch": 25.031551499348108, - "grad_norm": 1.656604290008545, - "learning_rate": 5.228140703517588e-05, - "loss": 4.9955, - "step": 47998 - }, - { - "epoch": 25.032073011734028, - "grad_norm": 1.631657361984253, - "learning_rate": 5.2280402010050257e-05, - "loss": 5.3694, - "step": 47999 - }, - { - "epoch": 25.032594524119947, - "grad_norm": 1.5367200374603271, - "learning_rate": 5.227939698492462e-05, - "loss": 4.9226, - "step": 48000 - }, - { - "epoch": 25.033116036505866, - "grad_norm": 1.576295018196106, - "learning_rate": 5.2278391959799e-05, - "loss": 5.4135, - "step": 48001 - }, - { - "epoch": 25.033637548891786, - "grad_norm": 1.7286040782928467, - "learning_rate": 5.227738693467337e-05, - "loss": 4.6489, - "step": 48002 - }, - { - "epoch": 25.034159061277705, - "grad_norm": 1.515144944190979, - "learning_rate": 5.227638190954775e-05, - "loss": 5.5637, - "step": 48003 - }, - { - "epoch": 25.034680573663625, - "grad_norm": 1.460913062095642, - "learning_rate": 5.227537688442211e-05, - "loss": 5.2815, - "step": 48004 - }, - { - "epoch": 25.035202086049544, - "grad_norm": 1.5246466398239136, - "learning_rate": 5.227437185929649e-05, - "loss": 5.2387, - "step": 48005 - }, - { - "epoch": 25.035723598435464, - "grad_norm": 1.6306599378585815, - "learning_rate": 5.2273366834170854e-05, - "loss": 4.8912, - "step": 48006 - }, - { - "epoch": 25.036245110821383, - "grad_norm": 1.699123740196228, - "learning_rate": 5.227236180904523e-05, - "loss": 4.6782, - "step": 48007 - }, - { - "epoch": 25.036766623207303, - "grad_norm": 1.6518476009368896, - "learning_rate": 5.22713567839196e-05, - "loss": 4.9387, - "step": 48008 - }, - { - "epoch": 25.03728813559322, - "grad_norm": 1.6386560201644897, - "learning_rate": 5.227035175879397e-05, - "loss": 5.2649, - "step": 48009 - }, - { - "epoch": 25.037809647979138, - "grad_norm": 1.6140928268432617, - "learning_rate": 5.2269346733668345e-05, - "loss": 5.479, - "step": 48010 - }, - { - "epoch": 25.038331160365058, - "grad_norm": 1.5281693935394287, - "learning_rate": 5.226834170854271e-05, - "loss": 5.3758, - "step": 48011 - }, - { - "epoch": 25.038852672750977, - "grad_norm": 1.594447135925293, - "learning_rate": 5.226733668341709e-05, - "loss": 4.5486, - "step": 48012 - }, - { - "epoch": 25.039374185136897, - "grad_norm": 1.635145664215088, - "learning_rate": 5.226633165829146e-05, - "loss": 4.9519, - "step": 48013 - }, - { - "epoch": 25.039895697522816, - "grad_norm": 1.6227941513061523, - "learning_rate": 5.2265326633165836e-05, - "loss": 5.1203, - "step": 48014 - }, - { - "epoch": 25.040417209908735, - "grad_norm": 1.4908031225204468, - "learning_rate": 5.22643216080402e-05, - "loss": 5.2972, - "step": 48015 - }, - { - "epoch": 25.040938722294655, - "grad_norm": 1.6482861042022705, - "learning_rate": 5.226331658291458e-05, - "loss": 5.3533, - "step": 48016 - }, - { - "epoch": 25.041460234680574, - "grad_norm": 1.5024847984313965, - "learning_rate": 5.226231155778894e-05, - "loss": 5.3597, - "step": 48017 - }, - { - "epoch": 25.041981747066494, - "grad_norm": 1.4999306201934814, - "learning_rate": 5.226130653266332e-05, - "loss": 5.2932, - "step": 48018 - }, - { - "epoch": 25.042503259452413, - "grad_norm": 1.5252189636230469, - "learning_rate": 5.226030150753769e-05, - "loss": 5.3634, - "step": 48019 - }, - { - "epoch": 25.043024771838333, - "grad_norm": 1.650327205657959, - "learning_rate": 5.225929648241207e-05, - "loss": 5.2756, - "step": 48020 - }, - { - "epoch": 25.04354628422425, - "grad_norm": 1.5264599323272705, - "learning_rate": 5.225829145728643e-05, - "loss": 5.585, - "step": 48021 - }, - { - "epoch": 25.044067796610168, - "grad_norm": 1.5873053073883057, - "learning_rate": 5.2257286432160804e-05, - "loss": 5.0785, - "step": 48022 - }, - { - "epoch": 25.044589308996088, - "grad_norm": 1.6064082384109497, - "learning_rate": 5.225628140703518e-05, - "loss": 4.4133, - "step": 48023 - }, - { - "epoch": 25.045110821382007, - "grad_norm": 1.493614912033081, - "learning_rate": 5.2255276381909546e-05, - "loss": 5.232, - "step": 48024 - }, - { - "epoch": 25.045632333767927, - "grad_norm": 1.5414810180664062, - "learning_rate": 5.2254271356783924e-05, - "loss": 5.4797, - "step": 48025 - }, - { - "epoch": 25.046153846153846, - "grad_norm": 1.613867163658142, - "learning_rate": 5.225326633165829e-05, - "loss": 4.8193, - "step": 48026 - }, - { - "epoch": 25.046675358539765, - "grad_norm": 1.5164532661437988, - "learning_rate": 5.2252261306532666e-05, - "loss": 5.6607, - "step": 48027 - }, - { - "epoch": 25.047196870925685, - "grad_norm": 1.5713435411453247, - "learning_rate": 5.225125628140704e-05, - "loss": 4.9202, - "step": 48028 - }, - { - "epoch": 25.047718383311604, - "grad_norm": 1.6441740989685059, - "learning_rate": 5.2250251256281415e-05, - "loss": 5.5079, - "step": 48029 - }, - { - "epoch": 25.048239895697524, - "grad_norm": 1.7035290002822876, - "learning_rate": 5.224924623115578e-05, - "loss": 4.906, - "step": 48030 - }, - { - "epoch": 25.048761408083443, - "grad_norm": 2.085284471511841, - "learning_rate": 5.224824120603016e-05, - "loss": 5.0533, - "step": 48031 - }, - { - "epoch": 25.049282920469363, - "grad_norm": 1.619373083114624, - "learning_rate": 5.224723618090452e-05, - "loss": 5.522, - "step": 48032 - }, - { - "epoch": 25.04980443285528, - "grad_norm": 1.6298762559890747, - "learning_rate": 5.22462311557789e-05, - "loss": 5.5061, - "step": 48033 - }, - { - "epoch": 25.050325945241198, - "grad_norm": 1.6265641450881958, - "learning_rate": 5.224522613065327e-05, - "loss": 5.2259, - "step": 48034 - }, - { - "epoch": 25.050847457627118, - "grad_norm": 1.6626091003417969, - "learning_rate": 5.2244221105527635e-05, - "loss": 4.6286, - "step": 48035 - }, - { - "epoch": 25.051368970013037, - "grad_norm": 1.5121263265609741, - "learning_rate": 5.224321608040201e-05, - "loss": 4.7779, - "step": 48036 - }, - { - "epoch": 25.051890482398957, - "grad_norm": 1.5903284549713135, - "learning_rate": 5.224221105527638e-05, - "loss": 4.7092, - "step": 48037 - }, - { - "epoch": 25.052411994784876, - "grad_norm": 1.613397240638733, - "learning_rate": 5.2241206030150755e-05, - "loss": 5.0417, - "step": 48038 - }, - { - "epoch": 25.052933507170795, - "grad_norm": 1.5699125528335571, - "learning_rate": 5.2240201005025126e-05, - "loss": 5.5418, - "step": 48039 - }, - { - "epoch": 25.053455019556715, - "grad_norm": 1.6471492052078247, - "learning_rate": 5.2239195979899504e-05, - "loss": 5.1907, - "step": 48040 - }, - { - "epoch": 25.053976531942634, - "grad_norm": 1.6034982204437256, - "learning_rate": 5.223819095477387e-05, - "loss": 5.0717, - "step": 48041 - }, - { - "epoch": 25.054498044328554, - "grad_norm": 1.6490715742111206, - "learning_rate": 5.2237185929648246e-05, - "loss": 5.4298, - "step": 48042 - }, - { - "epoch": 25.055019556714473, - "grad_norm": 1.5352113246917725, - "learning_rate": 5.223618090452262e-05, - "loss": 5.1596, - "step": 48043 - }, - { - "epoch": 25.055541069100393, - "grad_norm": 1.549985647201538, - "learning_rate": 5.2235175879396995e-05, - "loss": 5.3552, - "step": 48044 - }, - { - "epoch": 25.05606258148631, - "grad_norm": 1.4620240926742554, - "learning_rate": 5.223417085427136e-05, - "loss": 5.5961, - "step": 48045 - }, - { - "epoch": 25.056584093872228, - "grad_norm": 1.5992226600646973, - "learning_rate": 5.223316582914574e-05, - "loss": 5.5577, - "step": 48046 - }, - { - "epoch": 25.057105606258148, - "grad_norm": 1.5935360193252563, - "learning_rate": 5.22321608040201e-05, - "loss": 5.3526, - "step": 48047 - }, - { - "epoch": 25.057627118644067, - "grad_norm": 1.5498789548873901, - "learning_rate": 5.223115577889447e-05, - "loss": 5.0973, - "step": 48048 - }, - { - "epoch": 25.058148631029987, - "grad_norm": 1.6259602308273315, - "learning_rate": 5.223015075376885e-05, - "loss": 5.0978, - "step": 48049 - }, - { - "epoch": 25.058670143415906, - "grad_norm": 1.574906349182129, - "learning_rate": 5.2229145728643214e-05, - "loss": 5.318, - "step": 48050 - }, - { - "epoch": 25.059191655801826, - "grad_norm": 1.5509297847747803, - "learning_rate": 5.222814070351759e-05, - "loss": 4.9509, - "step": 48051 - }, - { - "epoch": 25.059713168187745, - "grad_norm": 1.718526840209961, - "learning_rate": 5.2227135678391956e-05, - "loss": 5.4511, - "step": 48052 - }, - { - "epoch": 25.060234680573664, - "grad_norm": 1.6014487743377686, - "learning_rate": 5.2226130653266334e-05, - "loss": 5.4742, - "step": 48053 - }, - { - "epoch": 25.060756192959584, - "grad_norm": 1.7435474395751953, - "learning_rate": 5.2225125628140705e-05, - "loss": 5.337, - "step": 48054 - }, - { - "epoch": 25.061277705345503, - "grad_norm": 1.615799069404602, - "learning_rate": 5.222412060301508e-05, - "loss": 5.3563, - "step": 48055 - }, - { - "epoch": 25.061799217731423, - "grad_norm": 1.6639987230300903, - "learning_rate": 5.222311557788945e-05, - "loss": 4.7516, - "step": 48056 - }, - { - "epoch": 25.06232073011734, - "grad_norm": 1.520599365234375, - "learning_rate": 5.2222110552763825e-05, - "loss": 5.2604, - "step": 48057 - }, - { - "epoch": 25.062842242503258, - "grad_norm": 1.6133095026016235, - "learning_rate": 5.222110552763819e-05, - "loss": 4.711, - "step": 48058 - }, - { - "epoch": 25.063363754889178, - "grad_norm": 1.5816247463226318, - "learning_rate": 5.222010050251257e-05, - "loss": 5.128, - "step": 48059 - }, - { - "epoch": 25.063885267275097, - "grad_norm": 1.5816177129745483, - "learning_rate": 5.221909547738694e-05, - "loss": 5.1499, - "step": 48060 - }, - { - "epoch": 25.064406779661017, - "grad_norm": 1.5256105661392212, - "learning_rate": 5.2218090452261316e-05, - "loss": 5.1567, - "step": 48061 - }, - { - "epoch": 25.064928292046936, - "grad_norm": 1.6246858835220337, - "learning_rate": 5.221708542713568e-05, - "loss": 4.9064, - "step": 48062 - }, - { - "epoch": 25.065449804432856, - "grad_norm": 1.4724870920181274, - "learning_rate": 5.221608040201005e-05, - "loss": 5.6264, - "step": 48063 - }, - { - "epoch": 25.065971316818775, - "grad_norm": 1.6764525175094604, - "learning_rate": 5.221507537688443e-05, - "loss": 5.1681, - "step": 48064 - }, - { - "epoch": 25.066492829204694, - "grad_norm": 1.6687190532684326, - "learning_rate": 5.221407035175879e-05, - "loss": 4.8345, - "step": 48065 - }, - { - "epoch": 25.067014341590614, - "grad_norm": 1.5673115253448486, - "learning_rate": 5.221306532663317e-05, - "loss": 5.4683, - "step": 48066 - }, - { - "epoch": 25.067535853976533, - "grad_norm": 1.6957436800003052, - "learning_rate": 5.2212060301507535e-05, - "loss": 5.0998, - "step": 48067 - }, - { - "epoch": 25.068057366362453, - "grad_norm": 1.5277578830718994, - "learning_rate": 5.221105527638191e-05, - "loss": 5.4145, - "step": 48068 - }, - { - "epoch": 25.06857887874837, - "grad_norm": 1.6020982265472412, - "learning_rate": 5.2210050251256284e-05, - "loss": 5.3476, - "step": 48069 - }, - { - "epoch": 25.06910039113429, - "grad_norm": 1.6135144233703613, - "learning_rate": 5.220904522613066e-05, - "loss": 5.521, - "step": 48070 - }, - { - "epoch": 25.069621903520208, - "grad_norm": 1.6405580043792725, - "learning_rate": 5.2208040201005026e-05, - "loss": 5.0008, - "step": 48071 - }, - { - "epoch": 25.070143415906127, - "grad_norm": 1.5178943872451782, - "learning_rate": 5.2207035175879404e-05, - "loss": 5.1607, - "step": 48072 - }, - { - "epoch": 25.070664928292047, - "grad_norm": 1.576015591621399, - "learning_rate": 5.220603015075377e-05, - "loss": 4.7903, - "step": 48073 - }, - { - "epoch": 25.071186440677966, - "grad_norm": 1.5297528505325317, - "learning_rate": 5.2205025125628146e-05, - "loss": 5.6894, - "step": 48074 - }, - { - "epoch": 25.071707953063886, - "grad_norm": 1.5205433368682861, - "learning_rate": 5.220402010050252e-05, - "loss": 5.253, - "step": 48075 - }, - { - "epoch": 25.072229465449805, - "grad_norm": 1.5397348403930664, - "learning_rate": 5.220301507537688e-05, - "loss": 5.1561, - "step": 48076 - }, - { - "epoch": 25.072750977835724, - "grad_norm": 1.538046956062317, - "learning_rate": 5.220201005025126e-05, - "loss": 5.0638, - "step": 48077 - }, - { - "epoch": 25.073272490221644, - "grad_norm": 1.4839677810668945, - "learning_rate": 5.2201005025125624e-05, - "loss": 5.3218, - "step": 48078 - }, - { - "epoch": 25.073794002607563, - "grad_norm": 1.6622732877731323, - "learning_rate": 5.22e-05, - "loss": 5.5276, - "step": 48079 - }, - { - "epoch": 25.07431551499348, - "grad_norm": 1.6353181600570679, - "learning_rate": 5.219899497487437e-05, - "loss": 4.9584, - "step": 48080 - }, - { - "epoch": 25.0748370273794, - "grad_norm": 1.5373274087905884, - "learning_rate": 5.219798994974875e-05, - "loss": 5.3617, - "step": 48081 - }, - { - "epoch": 25.07535853976532, - "grad_norm": 1.6091219186782837, - "learning_rate": 5.2196984924623115e-05, - "loss": 5.3374, - "step": 48082 - }, - { - "epoch": 25.075880052151238, - "grad_norm": 1.6382367610931396, - "learning_rate": 5.219597989949749e-05, - "loss": 5.423, - "step": 48083 - }, - { - "epoch": 25.076401564537157, - "grad_norm": 1.4987819194793701, - "learning_rate": 5.219497487437186e-05, - "loss": 5.4526, - "step": 48084 - }, - { - "epoch": 25.076923076923077, - "grad_norm": 1.533179759979248, - "learning_rate": 5.2193969849246235e-05, - "loss": 5.0797, - "step": 48085 - }, - { - "epoch": 25.077444589308996, - "grad_norm": 1.604422688484192, - "learning_rate": 5.2192964824120606e-05, - "loss": 5.56, - "step": 48086 - }, - { - "epoch": 25.077966101694916, - "grad_norm": 1.6459507942199707, - "learning_rate": 5.2191959798994984e-05, - "loss": 5.1896, - "step": 48087 - }, - { - "epoch": 25.078487614080835, - "grad_norm": 1.5721250772476196, - "learning_rate": 5.219095477386935e-05, - "loss": 5.3563, - "step": 48088 - }, - { - "epoch": 25.079009126466755, - "grad_norm": 1.4425777196884155, - "learning_rate": 5.218994974874372e-05, - "loss": 5.6419, - "step": 48089 - }, - { - "epoch": 25.079530638852674, - "grad_norm": 1.5747184753417969, - "learning_rate": 5.21889447236181e-05, - "loss": 5.0476, - "step": 48090 - }, - { - "epoch": 25.080052151238593, - "grad_norm": 1.6852716207504272, - "learning_rate": 5.218793969849246e-05, - "loss": 4.1987, - "step": 48091 - }, - { - "epoch": 25.08057366362451, - "grad_norm": 1.590915322303772, - "learning_rate": 5.218693467336684e-05, - "loss": 5.1722, - "step": 48092 - }, - { - "epoch": 25.08109517601043, - "grad_norm": 1.5987131595611572, - "learning_rate": 5.21859296482412e-05, - "loss": 4.9057, - "step": 48093 - }, - { - "epoch": 25.08161668839635, - "grad_norm": 1.5028268098831177, - "learning_rate": 5.218492462311558e-05, - "loss": 5.0129, - "step": 48094 - }, - { - "epoch": 25.082138200782268, - "grad_norm": 1.4478603601455688, - "learning_rate": 5.218391959798995e-05, - "loss": 5.5567, - "step": 48095 - }, - { - "epoch": 25.082659713168187, - "grad_norm": 1.6364659070968628, - "learning_rate": 5.218291457286433e-05, - "loss": 3.9887, - "step": 48096 - }, - { - "epoch": 25.083181225554107, - "grad_norm": 1.6034468412399292, - "learning_rate": 5.2181909547738694e-05, - "loss": 5.2996, - "step": 48097 - }, - { - "epoch": 25.083702737940026, - "grad_norm": 1.4357606172561646, - "learning_rate": 5.218090452261307e-05, - "loss": 4.8466, - "step": 48098 - }, - { - "epoch": 25.084224250325946, - "grad_norm": 1.6267223358154297, - "learning_rate": 5.2179899497487436e-05, - "loss": 4.9795, - "step": 48099 - }, - { - "epoch": 25.084745762711865, - "grad_norm": 1.4889978170394897, - "learning_rate": 5.2178894472361814e-05, - "loss": 5.2792, - "step": 48100 - }, - { - "epoch": 25.085267275097785, - "grad_norm": 1.4590140581130981, - "learning_rate": 5.2177889447236185e-05, - "loss": 5.2074, - "step": 48101 - }, - { - "epoch": 25.085788787483704, - "grad_norm": 1.5663119554519653, - "learning_rate": 5.217688442211055e-05, - "loss": 5.3289, - "step": 48102 - }, - { - "epoch": 25.086310299869623, - "grad_norm": 1.6344408988952637, - "learning_rate": 5.217587939698493e-05, - "loss": 5.0389, - "step": 48103 - }, - { - "epoch": 25.08683181225554, - "grad_norm": 1.743761420249939, - "learning_rate": 5.217487437185929e-05, - "loss": 5.2697, - "step": 48104 - }, - { - "epoch": 25.08735332464146, - "grad_norm": 1.6246193647384644, - "learning_rate": 5.217386934673367e-05, - "loss": 4.9479, - "step": 48105 - }, - { - "epoch": 25.08787483702738, - "grad_norm": 1.5213711261749268, - "learning_rate": 5.217286432160804e-05, - "loss": 5.5987, - "step": 48106 - }, - { - "epoch": 25.088396349413298, - "grad_norm": 1.6578574180603027, - "learning_rate": 5.217185929648242e-05, - "loss": 5.1465, - "step": 48107 - }, - { - "epoch": 25.088917861799217, - "grad_norm": 1.609294056892395, - "learning_rate": 5.217085427135678e-05, - "loss": 5.3002, - "step": 48108 - }, - { - "epoch": 25.089439374185137, - "grad_norm": 1.5202021598815918, - "learning_rate": 5.216984924623116e-05, - "loss": 5.4344, - "step": 48109 - }, - { - "epoch": 25.089960886571056, - "grad_norm": 1.6063722372055054, - "learning_rate": 5.216884422110553e-05, - "loss": 5.2788, - "step": 48110 - }, - { - "epoch": 25.090482398956976, - "grad_norm": 1.7403017282485962, - "learning_rate": 5.216783919597991e-05, - "loss": 4.7151, - "step": 48111 - }, - { - "epoch": 25.091003911342895, - "grad_norm": 1.585898518562317, - "learning_rate": 5.2166834170854273e-05, - "loss": 5.3997, - "step": 48112 - }, - { - "epoch": 25.091525423728815, - "grad_norm": 1.6215565204620361, - "learning_rate": 5.216582914572865e-05, - "loss": 5.1765, - "step": 48113 - }, - { - "epoch": 25.092046936114734, - "grad_norm": 1.5886249542236328, - "learning_rate": 5.2164824120603016e-05, - "loss": 5.1362, - "step": 48114 - }, - { - "epoch": 25.092568448500653, - "grad_norm": 1.6035633087158203, - "learning_rate": 5.216381909547739e-05, - "loss": 5.3149, - "step": 48115 - }, - { - "epoch": 25.09308996088657, - "grad_norm": 1.7099478244781494, - "learning_rate": 5.2162814070351764e-05, - "loss": 5.0732, - "step": 48116 - }, - { - "epoch": 25.09361147327249, - "grad_norm": 1.5410817861557007, - "learning_rate": 5.216180904522613e-05, - "loss": 5.4971, - "step": 48117 - }, - { - "epoch": 25.09413298565841, - "grad_norm": 1.6006975173950195, - "learning_rate": 5.2160804020100507e-05, - "loss": 5.043, - "step": 48118 - }, - { - "epoch": 25.094654498044328, - "grad_norm": 1.6049522161483765, - "learning_rate": 5.215979899497487e-05, - "loss": 5.2449, - "step": 48119 - }, - { - "epoch": 25.095176010430247, - "grad_norm": 1.5726654529571533, - "learning_rate": 5.215879396984925e-05, - "loss": 5.4627, - "step": 48120 - }, - { - "epoch": 25.095697522816167, - "grad_norm": 1.6766661405563354, - "learning_rate": 5.215778894472362e-05, - "loss": 5.0462, - "step": 48121 - }, - { - "epoch": 25.096219035202086, - "grad_norm": 1.5718541145324707, - "learning_rate": 5.2156783919598e-05, - "loss": 5.0668, - "step": 48122 - }, - { - "epoch": 25.096740547588006, - "grad_norm": 1.5774974822998047, - "learning_rate": 5.215577889447236e-05, - "loss": 4.9374, - "step": 48123 - }, - { - "epoch": 25.097262059973925, - "grad_norm": 1.630448818206787, - "learning_rate": 5.215477386934674e-05, - "loss": 4.718, - "step": 48124 - }, - { - "epoch": 25.097783572359845, - "grad_norm": 1.6294201612472534, - "learning_rate": 5.2153768844221104e-05, - "loss": 4.81, - "step": 48125 - }, - { - "epoch": 25.098305084745764, - "grad_norm": 1.5562546253204346, - "learning_rate": 5.215276381909548e-05, - "loss": 4.9449, - "step": 48126 - }, - { - "epoch": 25.098826597131684, - "grad_norm": 1.608944058418274, - "learning_rate": 5.215175879396985e-05, - "loss": 5.3152, - "step": 48127 - }, - { - "epoch": 25.0993481095176, - "grad_norm": 1.5664875507354736, - "learning_rate": 5.215075376884422e-05, - "loss": 5.4298, - "step": 48128 - }, - { - "epoch": 25.09986962190352, - "grad_norm": 1.5402615070343018, - "learning_rate": 5.2149748743718595e-05, - "loss": 5.6319, - "step": 48129 - }, - { - "epoch": 25.10039113428944, - "grad_norm": 1.552189826965332, - "learning_rate": 5.2148743718592966e-05, - "loss": 5.4691, - "step": 48130 - }, - { - "epoch": 25.100912646675358, - "grad_norm": 1.6753851175308228, - "learning_rate": 5.2147738693467344e-05, - "loss": 5.0115, - "step": 48131 - }, - { - "epoch": 25.101434159061277, - "grad_norm": 1.532462239265442, - "learning_rate": 5.214673366834171e-05, - "loss": 5.4271, - "step": 48132 - }, - { - "epoch": 25.101955671447197, - "grad_norm": 1.635480284690857, - "learning_rate": 5.2145728643216086e-05, - "loss": 5.0256, - "step": 48133 - }, - { - "epoch": 25.102477183833116, - "grad_norm": 1.5448393821716309, - "learning_rate": 5.214472361809045e-05, - "loss": 5.1738, - "step": 48134 - }, - { - "epoch": 25.102998696219036, - "grad_norm": 1.5636242628097534, - "learning_rate": 5.214371859296483e-05, - "loss": 5.381, - "step": 48135 - }, - { - "epoch": 25.103520208604955, - "grad_norm": 1.501422643661499, - "learning_rate": 5.21427135678392e-05, - "loss": 3.8809, - "step": 48136 - }, - { - "epoch": 25.104041720990875, - "grad_norm": 1.5404072999954224, - "learning_rate": 5.214170854271358e-05, - "loss": 5.1133, - "step": 48137 - }, - { - "epoch": 25.104563233376794, - "grad_norm": 1.6260484457015991, - "learning_rate": 5.214070351758794e-05, - "loss": 5.1955, - "step": 48138 - }, - { - "epoch": 25.105084745762714, - "grad_norm": 1.5609612464904785, - "learning_rate": 5.213969849246232e-05, - "loss": 5.0974, - "step": 48139 - }, - { - "epoch": 25.10560625814863, - "grad_norm": 1.5654407739639282, - "learning_rate": 5.213869346733668e-05, - "loss": 5.0046, - "step": 48140 - }, - { - "epoch": 25.10612777053455, - "grad_norm": 1.618502140045166, - "learning_rate": 5.2137688442211054e-05, - "loss": 5.3164, - "step": 48141 - }, - { - "epoch": 25.10664928292047, - "grad_norm": 1.4652643203735352, - "learning_rate": 5.213668341708543e-05, - "loss": 4.6613, - "step": 48142 - }, - { - "epoch": 25.107170795306388, - "grad_norm": 1.6721347570419312, - "learning_rate": 5.2135678391959796e-05, - "loss": 5.2065, - "step": 48143 - }, - { - "epoch": 25.107692307692307, - "grad_norm": 1.497338056564331, - "learning_rate": 5.2134673366834174e-05, - "loss": 5.3773, - "step": 48144 - }, - { - "epoch": 25.108213820078227, - "grad_norm": 1.6226699352264404, - "learning_rate": 5.213366834170854e-05, - "loss": 5.4894, - "step": 48145 - }, - { - "epoch": 25.108735332464146, - "grad_norm": 1.6273462772369385, - "learning_rate": 5.2132663316582916e-05, - "loss": 5.1295, - "step": 48146 - }, - { - "epoch": 25.109256844850066, - "grad_norm": 1.575742483139038, - "learning_rate": 5.213165829145729e-05, - "loss": 5.021, - "step": 48147 - }, - { - "epoch": 25.109778357235985, - "grad_norm": 1.5621916055679321, - "learning_rate": 5.2130653266331665e-05, - "loss": 5.3857, - "step": 48148 - }, - { - "epoch": 25.110299869621905, - "grad_norm": 1.4941315650939941, - "learning_rate": 5.212964824120603e-05, - "loss": 5.2961, - "step": 48149 - }, - { - "epoch": 25.110821382007824, - "grad_norm": 1.5661746263504028, - "learning_rate": 5.212864321608041e-05, - "loss": 4.9118, - "step": 48150 - }, - { - "epoch": 25.111342894393744, - "grad_norm": 1.6174672842025757, - "learning_rate": 5.212763819095478e-05, - "loss": 4.8483, - "step": 48151 - }, - { - "epoch": 25.11186440677966, - "grad_norm": 1.5959138870239258, - "learning_rate": 5.2126633165829156e-05, - "loss": 5.1718, - "step": 48152 - }, - { - "epoch": 25.11238591916558, - "grad_norm": 1.4583885669708252, - "learning_rate": 5.212562814070352e-05, - "loss": 5.2302, - "step": 48153 - }, - { - "epoch": 25.1129074315515, - "grad_norm": 1.5151971578598022, - "learning_rate": 5.21246231155779e-05, - "loss": 5.2939, - "step": 48154 - }, - { - "epoch": 25.113428943937418, - "grad_norm": 1.5206096172332764, - "learning_rate": 5.212361809045226e-05, - "loss": 4.7663, - "step": 48155 - }, - { - "epoch": 25.113950456323337, - "grad_norm": 1.446768879890442, - "learning_rate": 5.2122613065326634e-05, - "loss": 5.5966, - "step": 48156 - }, - { - "epoch": 25.114471968709257, - "grad_norm": 1.590512990951538, - "learning_rate": 5.212160804020101e-05, - "loss": 5.0482, - "step": 48157 - }, - { - "epoch": 25.114993481095176, - "grad_norm": 1.5574973821640015, - "learning_rate": 5.2120603015075376e-05, - "loss": 5.0848, - "step": 48158 - }, - { - "epoch": 25.115514993481096, - "grad_norm": 1.5858995914459229, - "learning_rate": 5.2119597989949754e-05, - "loss": 5.0175, - "step": 48159 - }, - { - "epoch": 25.116036505867015, - "grad_norm": 1.6092939376831055, - "learning_rate": 5.211859296482412e-05, - "loss": 5.2783, - "step": 48160 - }, - { - "epoch": 25.116558018252935, - "grad_norm": 1.4780962467193604, - "learning_rate": 5.2117587939698496e-05, - "loss": 5.604, - "step": 48161 - }, - { - "epoch": 25.117079530638854, - "grad_norm": 1.6225965023040771, - "learning_rate": 5.211658291457287e-05, - "loss": 5.3382, - "step": 48162 - }, - { - "epoch": 25.117601043024774, - "grad_norm": 1.6012520790100098, - "learning_rate": 5.2115577889447245e-05, - "loss": 5.3415, - "step": 48163 - }, - { - "epoch": 25.11812255541069, - "grad_norm": 1.6274304389953613, - "learning_rate": 5.211457286432161e-05, - "loss": 4.8036, - "step": 48164 - }, - { - "epoch": 25.11864406779661, - "grad_norm": 1.5565248727798462, - "learning_rate": 5.211356783919599e-05, - "loss": 5.2834, - "step": 48165 - }, - { - "epoch": 25.11916558018253, - "grad_norm": 1.6478276252746582, - "learning_rate": 5.211256281407035e-05, - "loss": 4.9939, - "step": 48166 - }, - { - "epoch": 25.119687092568448, - "grad_norm": 1.5891567468643188, - "learning_rate": 5.211155778894473e-05, - "loss": 5.1703, - "step": 48167 - }, - { - "epoch": 25.120208604954367, - "grad_norm": 1.660111904144287, - "learning_rate": 5.21105527638191e-05, - "loss": 5.0909, - "step": 48168 - }, - { - "epoch": 25.120730117340287, - "grad_norm": 1.5375797748565674, - "learning_rate": 5.2109547738693464e-05, - "loss": 5.3287, - "step": 48169 - }, - { - "epoch": 25.121251629726206, - "grad_norm": 1.6365638971328735, - "learning_rate": 5.210854271356784e-05, - "loss": 5.0463, - "step": 48170 - }, - { - "epoch": 25.121773142112126, - "grad_norm": 1.5868256092071533, - "learning_rate": 5.2107537688442206e-05, - "loss": 5.3186, - "step": 48171 - }, - { - "epoch": 25.122294654498045, - "grad_norm": 1.5055153369903564, - "learning_rate": 5.2106532663316584e-05, - "loss": 5.7097, - "step": 48172 - }, - { - "epoch": 25.122816166883965, - "grad_norm": 1.635202407836914, - "learning_rate": 5.2105527638190955e-05, - "loss": 4.7157, - "step": 48173 - }, - { - "epoch": 25.123337679269884, - "grad_norm": 1.6698713302612305, - "learning_rate": 5.210452261306533e-05, - "loss": 5.2708, - "step": 48174 - }, - { - "epoch": 25.1238591916558, - "grad_norm": 1.646334171295166, - "learning_rate": 5.21035175879397e-05, - "loss": 5.097, - "step": 48175 - }, - { - "epoch": 25.12438070404172, - "grad_norm": 1.5443772077560425, - "learning_rate": 5.2102512562814075e-05, - "loss": 5.03, - "step": 48176 - }, - { - "epoch": 25.12490221642764, - "grad_norm": 1.578948736190796, - "learning_rate": 5.2101507537688446e-05, - "loss": 5.3146, - "step": 48177 - }, - { - "epoch": 25.12542372881356, - "grad_norm": 1.5742299556732178, - "learning_rate": 5.2100502512562824e-05, - "loss": 5.589, - "step": 48178 - }, - { - "epoch": 25.125945241199478, - "grad_norm": 1.5790657997131348, - "learning_rate": 5.209949748743719e-05, - "loss": 5.4131, - "step": 48179 - }, - { - "epoch": 25.126466753585397, - "grad_norm": 1.502204179763794, - "learning_rate": 5.2098492462311566e-05, - "loss": 5.5255, - "step": 48180 - }, - { - "epoch": 25.126988265971317, - "grad_norm": 1.6068446636199951, - "learning_rate": 5.209748743718593e-05, - "loss": 5.4598, - "step": 48181 - }, - { - "epoch": 25.127509778357236, - "grad_norm": 1.5747489929199219, - "learning_rate": 5.20964824120603e-05, - "loss": 4.9203, - "step": 48182 - }, - { - "epoch": 25.128031290743156, - "grad_norm": 1.4774532318115234, - "learning_rate": 5.209547738693468e-05, - "loss": 5.2928, - "step": 48183 - }, - { - "epoch": 25.128552803129075, - "grad_norm": 1.6008639335632324, - "learning_rate": 5.2094472361809043e-05, - "loss": 5.1, - "step": 48184 - }, - { - "epoch": 25.129074315514995, - "grad_norm": 1.4689936637878418, - "learning_rate": 5.209346733668342e-05, - "loss": 5.2165, - "step": 48185 - }, - { - "epoch": 25.129595827900914, - "grad_norm": 1.4817838668823242, - "learning_rate": 5.2092462311557786e-05, - "loss": 5.1851, - "step": 48186 - }, - { - "epoch": 25.13011734028683, - "grad_norm": 1.6935149431228638, - "learning_rate": 5.209145728643216e-05, - "loss": 4.9495, - "step": 48187 - }, - { - "epoch": 25.13063885267275, - "grad_norm": 1.4817986488342285, - "learning_rate": 5.2090452261306534e-05, - "loss": 5.4332, - "step": 48188 - }, - { - "epoch": 25.13116036505867, - "grad_norm": 1.4891773462295532, - "learning_rate": 5.208944723618091e-05, - "loss": 5.1221, - "step": 48189 - }, - { - "epoch": 25.13168187744459, - "grad_norm": 1.561461329460144, - "learning_rate": 5.2088442211055276e-05, - "loss": 5.2946, - "step": 48190 - }, - { - "epoch": 25.132203389830508, - "grad_norm": 1.5797206163406372, - "learning_rate": 5.2087437185929654e-05, - "loss": 5.0065, - "step": 48191 - }, - { - "epoch": 25.132724902216427, - "grad_norm": 1.6771870851516724, - "learning_rate": 5.208643216080402e-05, - "loss": 4.7235, - "step": 48192 - }, - { - "epoch": 25.133246414602347, - "grad_norm": 1.5815049409866333, - "learning_rate": 5.2085427135678396e-05, - "loss": 5.4588, - "step": 48193 - }, - { - "epoch": 25.133767926988266, - "grad_norm": 1.6302151679992676, - "learning_rate": 5.208442211055277e-05, - "loss": 4.9791, - "step": 48194 - }, - { - "epoch": 25.134289439374186, - "grad_norm": 1.4869463443756104, - "learning_rate": 5.208341708542713e-05, - "loss": 5.1828, - "step": 48195 - }, - { - "epoch": 25.134810951760105, - "grad_norm": 1.5574650764465332, - "learning_rate": 5.208241206030151e-05, - "loss": 5.0029, - "step": 48196 - }, - { - "epoch": 25.135332464146025, - "grad_norm": 1.5514665842056274, - "learning_rate": 5.208140703517588e-05, - "loss": 4.9966, - "step": 48197 - }, - { - "epoch": 25.135853976531944, - "grad_norm": 1.5175729990005493, - "learning_rate": 5.208040201005026e-05, - "loss": 5.2739, - "step": 48198 - }, - { - "epoch": 25.13637548891786, - "grad_norm": 1.5870496034622192, - "learning_rate": 5.207939698492462e-05, - "loss": 5.0509, - "step": 48199 - }, - { - "epoch": 25.13689700130378, - "grad_norm": 1.6089013814926147, - "learning_rate": 5.2078391959799e-05, - "loss": 4.8098, - "step": 48200 - }, - { - "epoch": 25.1374185136897, - "grad_norm": 1.5417670011520386, - "learning_rate": 5.2077386934673365e-05, - "loss": 5.2898, - "step": 48201 - }, - { - "epoch": 25.13794002607562, - "grad_norm": 1.742838978767395, - "learning_rate": 5.207638190954774e-05, - "loss": 5.0464, - "step": 48202 - }, - { - "epoch": 25.138461538461538, - "grad_norm": 1.5392214059829712, - "learning_rate": 5.2075376884422114e-05, - "loss": 4.871, - "step": 48203 - }, - { - "epoch": 25.138983050847457, - "grad_norm": 1.560907244682312, - "learning_rate": 5.207437185929649e-05, - "loss": 5.5693, - "step": 48204 - }, - { - "epoch": 25.139504563233377, - "grad_norm": 1.5184104442596436, - "learning_rate": 5.2073366834170856e-05, - "loss": 5.5042, - "step": 48205 - }, - { - "epoch": 25.140026075619296, - "grad_norm": 1.57496976852417, - "learning_rate": 5.2072361809045234e-05, - "loss": 5.4034, - "step": 48206 - }, - { - "epoch": 25.140547588005216, - "grad_norm": 1.626519799232483, - "learning_rate": 5.20713567839196e-05, - "loss": 5.2947, - "step": 48207 - }, - { - "epoch": 25.141069100391135, - "grad_norm": 1.5329514741897583, - "learning_rate": 5.207035175879397e-05, - "loss": 5.3089, - "step": 48208 - }, - { - "epoch": 25.141590612777055, - "grad_norm": 1.5630195140838623, - "learning_rate": 5.206934673366835e-05, - "loss": 5.2724, - "step": 48209 - }, - { - "epoch": 25.142112125162974, - "grad_norm": 1.5848133563995361, - "learning_rate": 5.206834170854271e-05, - "loss": 5.1734, - "step": 48210 - }, - { - "epoch": 25.14263363754889, - "grad_norm": 1.55434250831604, - "learning_rate": 5.206733668341709e-05, - "loss": 5.6921, - "step": 48211 - }, - { - "epoch": 25.14315514993481, - "grad_norm": 1.5273516178131104, - "learning_rate": 5.206633165829145e-05, - "loss": 5.3113, - "step": 48212 - }, - { - "epoch": 25.14367666232073, - "grad_norm": 1.5964916944503784, - "learning_rate": 5.206532663316583e-05, - "loss": 5.146, - "step": 48213 - }, - { - "epoch": 25.14419817470665, - "grad_norm": 1.6248948574066162, - "learning_rate": 5.20643216080402e-05, - "loss": 5.3516, - "step": 48214 - }, - { - "epoch": 25.144719687092568, - "grad_norm": 1.7536388635635376, - "learning_rate": 5.206331658291458e-05, - "loss": 5.2284, - "step": 48215 - }, - { - "epoch": 25.145241199478487, - "grad_norm": 1.6195341348648071, - "learning_rate": 5.2062311557788944e-05, - "loss": 5.1896, - "step": 48216 - }, - { - "epoch": 25.145762711864407, - "grad_norm": 1.5441358089447021, - "learning_rate": 5.206130653266332e-05, - "loss": 5.228, - "step": 48217 - }, - { - "epoch": 25.146284224250326, - "grad_norm": 1.6368287801742554, - "learning_rate": 5.206030150753769e-05, - "loss": 5.5487, - "step": 48218 - }, - { - "epoch": 25.146805736636246, - "grad_norm": 1.5946929454803467, - "learning_rate": 5.205929648241207e-05, - "loss": 4.5835, - "step": 48219 - }, - { - "epoch": 25.147327249022165, - "grad_norm": 1.424033522605896, - "learning_rate": 5.2058291457286435e-05, - "loss": 5.5432, - "step": 48220 - }, - { - "epoch": 25.147848761408085, - "grad_norm": 1.4372049570083618, - "learning_rate": 5.20572864321608e-05, - "loss": 5.0455, - "step": 48221 - }, - { - "epoch": 25.148370273794004, - "grad_norm": 1.5308092832565308, - "learning_rate": 5.205628140703518e-05, - "loss": 5.0259, - "step": 48222 - }, - { - "epoch": 25.14889178617992, - "grad_norm": 1.7394767999649048, - "learning_rate": 5.205527638190955e-05, - "loss": 4.9047, - "step": 48223 - }, - { - "epoch": 25.14941329856584, - "grad_norm": 1.9454396963119507, - "learning_rate": 5.2054271356783926e-05, - "loss": 4.8059, - "step": 48224 - }, - { - "epoch": 25.14993481095176, - "grad_norm": 1.652451992034912, - "learning_rate": 5.205326633165829e-05, - "loss": 5.2775, - "step": 48225 - }, - { - "epoch": 25.15045632333768, - "grad_norm": 1.5885117053985596, - "learning_rate": 5.205226130653267e-05, - "loss": 5.2052, - "step": 48226 - }, - { - "epoch": 25.150977835723598, - "grad_norm": 1.6662418842315674, - "learning_rate": 5.205125628140703e-05, - "loss": 4.8089, - "step": 48227 - }, - { - "epoch": 25.151499348109517, - "grad_norm": 1.6505861282348633, - "learning_rate": 5.205025125628141e-05, - "loss": 4.9137, - "step": 48228 - }, - { - "epoch": 25.152020860495437, - "grad_norm": 1.67071533203125, - "learning_rate": 5.204924623115578e-05, - "loss": 4.6297, - "step": 48229 - }, - { - "epoch": 25.152542372881356, - "grad_norm": 1.5707292556762695, - "learning_rate": 5.204824120603016e-05, - "loss": 5.2695, - "step": 48230 - }, - { - "epoch": 25.153063885267276, - "grad_norm": 1.7561732530593872, - "learning_rate": 5.2047236180904523e-05, - "loss": 4.7642, - "step": 48231 - }, - { - "epoch": 25.153585397653195, - "grad_norm": 1.5681991577148438, - "learning_rate": 5.20462311557789e-05, - "loss": 4.9484, - "step": 48232 - }, - { - "epoch": 25.154106910039115, - "grad_norm": 1.605025291442871, - "learning_rate": 5.2045226130653266e-05, - "loss": 5.0805, - "step": 48233 - }, - { - "epoch": 25.154628422425034, - "grad_norm": 1.622686743736267, - "learning_rate": 5.204422110552764e-05, - "loss": 4.8012, - "step": 48234 - }, - { - "epoch": 25.15514993481095, - "grad_norm": 1.5964213609695435, - "learning_rate": 5.2043216080402014e-05, - "loss": 5.1172, - "step": 48235 - }, - { - "epoch": 25.15567144719687, - "grad_norm": 1.6251388788223267, - "learning_rate": 5.204221105527638e-05, - "loss": 4.179, - "step": 48236 - }, - { - "epoch": 25.15619295958279, - "grad_norm": 1.4505014419555664, - "learning_rate": 5.2041206030150757e-05, - "loss": 5.2209, - "step": 48237 - }, - { - "epoch": 25.15671447196871, - "grad_norm": 1.5651741027832031, - "learning_rate": 5.204020100502513e-05, - "loss": 4.9761, - "step": 48238 - }, - { - "epoch": 25.157235984354628, - "grad_norm": 1.6765464544296265, - "learning_rate": 5.2039195979899505e-05, - "loss": 5.4433, - "step": 48239 - }, - { - "epoch": 25.157757496740548, - "grad_norm": 1.5783615112304688, - "learning_rate": 5.203819095477387e-05, - "loss": 4.7876, - "step": 48240 - }, - { - "epoch": 25.158279009126467, - "grad_norm": 1.5194097757339478, - "learning_rate": 5.203718592964825e-05, - "loss": 5.4855, - "step": 48241 - }, - { - "epoch": 25.158800521512386, - "grad_norm": 1.5367183685302734, - "learning_rate": 5.203618090452261e-05, - "loss": 4.9585, - "step": 48242 - }, - { - "epoch": 25.159322033898306, - "grad_norm": 1.6741039752960205, - "learning_rate": 5.203517587939699e-05, - "loss": 5.2096, - "step": 48243 - }, - { - "epoch": 25.159843546284225, - "grad_norm": 1.6310003995895386, - "learning_rate": 5.203417085427136e-05, - "loss": 5.1442, - "step": 48244 - }, - { - "epoch": 25.160365058670145, - "grad_norm": 1.4548531770706177, - "learning_rate": 5.203316582914574e-05, - "loss": 5.1668, - "step": 48245 - }, - { - "epoch": 25.160886571056064, - "grad_norm": 1.549142599105835, - "learning_rate": 5.20321608040201e-05, - "loss": 5.2885, - "step": 48246 - }, - { - "epoch": 25.16140808344198, - "grad_norm": 1.691920518875122, - "learning_rate": 5.203115577889448e-05, - "loss": 5.0742, - "step": 48247 - }, - { - "epoch": 25.1619295958279, - "grad_norm": 1.5953315496444702, - "learning_rate": 5.2030150753768845e-05, - "loss": 4.9535, - "step": 48248 - }, - { - "epoch": 25.16245110821382, - "grad_norm": 1.5538278818130493, - "learning_rate": 5.2029145728643216e-05, - "loss": 5.0225, - "step": 48249 - }, - { - "epoch": 25.16297262059974, - "grad_norm": 1.59413743019104, - "learning_rate": 5.2028140703517594e-05, - "loss": 5.1177, - "step": 48250 - }, - { - "epoch": 25.163494132985658, - "grad_norm": 1.7710264921188354, - "learning_rate": 5.202713567839196e-05, - "loss": 4.5543, - "step": 48251 - }, - { - "epoch": 25.164015645371578, - "grad_norm": 1.609657645225525, - "learning_rate": 5.2026130653266336e-05, - "loss": 5.361, - "step": 48252 - }, - { - "epoch": 25.164537157757497, - "grad_norm": 1.5920768976211548, - "learning_rate": 5.20251256281407e-05, - "loss": 5.3722, - "step": 48253 - }, - { - "epoch": 25.165058670143416, - "grad_norm": 1.5664012432098389, - "learning_rate": 5.202412060301508e-05, - "loss": 5.0528, - "step": 48254 - }, - { - "epoch": 25.165580182529336, - "grad_norm": 1.5830000638961792, - "learning_rate": 5.202311557788945e-05, - "loss": 5.2396, - "step": 48255 - }, - { - "epoch": 25.166101694915255, - "grad_norm": 1.5106652975082397, - "learning_rate": 5.202211055276383e-05, - "loss": 4.7429, - "step": 48256 - }, - { - "epoch": 25.166623207301175, - "grad_norm": 1.5451288223266602, - "learning_rate": 5.202110552763819e-05, - "loss": 5.763, - "step": 48257 - }, - { - "epoch": 25.167144719687094, - "grad_norm": 1.5612925291061401, - "learning_rate": 5.202010050251257e-05, - "loss": 4.5147, - "step": 48258 - }, - { - "epoch": 25.16766623207301, - "grad_norm": 1.46934175491333, - "learning_rate": 5.201909547738693e-05, - "loss": 5.4977, - "step": 48259 - }, - { - "epoch": 25.16818774445893, - "grad_norm": 1.5998698472976685, - "learning_rate": 5.201809045226131e-05, - "loss": 5.2459, - "step": 48260 - }, - { - "epoch": 25.16870925684485, - "grad_norm": 1.6068105697631836, - "learning_rate": 5.201708542713568e-05, - "loss": 5.2279, - "step": 48261 - }, - { - "epoch": 25.16923076923077, - "grad_norm": 1.5778433084487915, - "learning_rate": 5.2016080402010046e-05, - "loss": 5.5873, - "step": 48262 - }, - { - "epoch": 25.169752281616688, - "grad_norm": 1.5922578573226929, - "learning_rate": 5.2015075376884424e-05, - "loss": 5.4446, - "step": 48263 - }, - { - "epoch": 25.170273794002608, - "grad_norm": 1.5329680442810059, - "learning_rate": 5.2014070351758795e-05, - "loss": 4.7624, - "step": 48264 - }, - { - "epoch": 25.170795306388527, - "grad_norm": 1.5803509950637817, - "learning_rate": 5.201306532663317e-05, - "loss": 4.9062, - "step": 48265 - }, - { - "epoch": 25.171316818774446, - "grad_norm": 1.4776437282562256, - "learning_rate": 5.201206030150754e-05, - "loss": 5.4162, - "step": 48266 - }, - { - "epoch": 25.171838331160366, - "grad_norm": 1.5642379522323608, - "learning_rate": 5.2011055276381915e-05, - "loss": 5.4002, - "step": 48267 - }, - { - "epoch": 25.172359843546285, - "grad_norm": 1.5841513872146606, - "learning_rate": 5.201005025125628e-05, - "loss": 5.218, - "step": 48268 - }, - { - "epoch": 25.172881355932205, - "grad_norm": 1.5213850736618042, - "learning_rate": 5.200904522613066e-05, - "loss": 5.09, - "step": 48269 - }, - { - "epoch": 25.17340286831812, - "grad_norm": 1.6134103536605835, - "learning_rate": 5.200804020100503e-05, - "loss": 4.5599, - "step": 48270 - }, - { - "epoch": 25.17392438070404, - "grad_norm": 1.5928481817245483, - "learning_rate": 5.2007035175879406e-05, - "loss": 5.1158, - "step": 48271 - }, - { - "epoch": 25.17444589308996, - "grad_norm": 1.5425201654434204, - "learning_rate": 5.200603015075377e-05, - "loss": 5.5117, - "step": 48272 - }, - { - "epoch": 25.17496740547588, - "grad_norm": 1.50590181350708, - "learning_rate": 5.200502512562815e-05, - "loss": 4.9966, - "step": 48273 - }, - { - "epoch": 25.1754889178618, - "grad_norm": 1.586564064025879, - "learning_rate": 5.200402010050251e-05, - "loss": 5.1931, - "step": 48274 - }, - { - "epoch": 25.176010430247718, - "grad_norm": 1.6813656091690063, - "learning_rate": 5.2003015075376884e-05, - "loss": 5.3193, - "step": 48275 - }, - { - "epoch": 25.176531942633638, - "grad_norm": 1.583972692489624, - "learning_rate": 5.200201005025126e-05, - "loss": 5.2776, - "step": 48276 - }, - { - "epoch": 25.177053455019557, - "grad_norm": 1.694563627243042, - "learning_rate": 5.2001005025125626e-05, - "loss": 5.3477, - "step": 48277 - }, - { - "epoch": 25.177574967405477, - "grad_norm": 1.6316635608673096, - "learning_rate": 5.2000000000000004e-05, - "loss": 5.2086, - "step": 48278 - }, - { - "epoch": 25.178096479791396, - "grad_norm": 1.5485838651657104, - "learning_rate": 5.199899497487437e-05, - "loss": 5.5242, - "step": 48279 - }, - { - "epoch": 25.178617992177315, - "grad_norm": 1.649080514907837, - "learning_rate": 5.1997989949748746e-05, - "loss": 4.9274, - "step": 48280 - }, - { - "epoch": 25.179139504563235, - "grad_norm": 1.5844050645828247, - "learning_rate": 5.199698492462312e-05, - "loss": 5.1668, - "step": 48281 - }, - { - "epoch": 25.17966101694915, - "grad_norm": 1.5307279825210571, - "learning_rate": 5.1995979899497495e-05, - "loss": 5.2937, - "step": 48282 - }, - { - "epoch": 25.18018252933507, - "grad_norm": 1.5992871522903442, - "learning_rate": 5.199497487437186e-05, - "loss": 5.3289, - "step": 48283 - }, - { - "epoch": 25.18070404172099, - "grad_norm": 1.5622804164886475, - "learning_rate": 5.199396984924624e-05, - "loss": 5.1153, - "step": 48284 - }, - { - "epoch": 25.18122555410691, - "grad_norm": 1.6254280805587769, - "learning_rate": 5.199296482412061e-05, - "loss": 4.7735, - "step": 48285 - }, - { - "epoch": 25.18174706649283, - "grad_norm": 1.4742189645767212, - "learning_rate": 5.1991959798994986e-05, - "loss": 5.1951, - "step": 48286 - }, - { - "epoch": 25.182268578878748, - "grad_norm": 1.7222024202346802, - "learning_rate": 5.199095477386935e-05, - "loss": 5.3222, - "step": 48287 - }, - { - "epoch": 25.182790091264668, - "grad_norm": 1.5868488550186157, - "learning_rate": 5.1989949748743714e-05, - "loss": 5.3154, - "step": 48288 - }, - { - "epoch": 25.183311603650587, - "grad_norm": 1.6669461727142334, - "learning_rate": 5.198894472361809e-05, - "loss": 5.4858, - "step": 48289 - }, - { - "epoch": 25.183833116036507, - "grad_norm": 1.6729753017425537, - "learning_rate": 5.198793969849246e-05, - "loss": 5.2408, - "step": 48290 - }, - { - "epoch": 25.184354628422426, - "grad_norm": 1.4920904636383057, - "learning_rate": 5.198693467336684e-05, - "loss": 5.2494, - "step": 48291 - }, - { - "epoch": 25.184876140808345, - "grad_norm": 1.6071747541427612, - "learning_rate": 5.1985929648241205e-05, - "loss": 5.4232, - "step": 48292 - }, - { - "epoch": 25.185397653194265, - "grad_norm": 1.6151361465454102, - "learning_rate": 5.198492462311558e-05, - "loss": 5.3767, - "step": 48293 - }, - { - "epoch": 25.18591916558018, - "grad_norm": 1.5583058595657349, - "learning_rate": 5.198391959798995e-05, - "loss": 5.2987, - "step": 48294 - }, - { - "epoch": 25.1864406779661, - "grad_norm": 1.5590896606445312, - "learning_rate": 5.1982914572864325e-05, - "loss": 5.4547, - "step": 48295 - }, - { - "epoch": 25.18696219035202, - "grad_norm": 1.6528080701828003, - "learning_rate": 5.1981909547738696e-05, - "loss": 5.197, - "step": 48296 - }, - { - "epoch": 25.18748370273794, - "grad_norm": 1.5021320581436157, - "learning_rate": 5.1980904522613074e-05, - "loss": 4.9096, - "step": 48297 - }, - { - "epoch": 25.18800521512386, - "grad_norm": 1.5672109127044678, - "learning_rate": 5.197989949748744e-05, - "loss": 5.0116, - "step": 48298 - }, - { - "epoch": 25.188526727509778, - "grad_norm": 1.4901870489120483, - "learning_rate": 5.1978894472361816e-05, - "loss": 5.2673, - "step": 48299 - }, - { - "epoch": 25.189048239895698, - "grad_norm": 1.591065764427185, - "learning_rate": 5.197788944723618e-05, - "loss": 5.1592, - "step": 48300 - }, - { - "epoch": 25.189569752281617, - "grad_norm": 1.6320013999938965, - "learning_rate": 5.197688442211055e-05, - "loss": 5.55, - "step": 48301 - }, - { - "epoch": 25.190091264667537, - "grad_norm": 1.6306010484695435, - "learning_rate": 5.197587939698493e-05, - "loss": 5.1082, - "step": 48302 - }, - { - "epoch": 25.190612777053456, - "grad_norm": 1.5763851404190063, - "learning_rate": 5.1974874371859293e-05, - "loss": 5.0773, - "step": 48303 - }, - { - "epoch": 25.191134289439375, - "grad_norm": 1.5967589616775513, - "learning_rate": 5.197386934673367e-05, - "loss": 5.2321, - "step": 48304 - }, - { - "epoch": 25.191655801825295, - "grad_norm": 1.5414631366729736, - "learning_rate": 5.197286432160804e-05, - "loss": 5.1967, - "step": 48305 - }, - { - "epoch": 25.19217731421121, - "grad_norm": 1.4847177267074585, - "learning_rate": 5.197185929648242e-05, - "loss": 5.2536, - "step": 48306 - }, - { - "epoch": 25.19269882659713, - "grad_norm": 1.6727932691574097, - "learning_rate": 5.1970854271356784e-05, - "loss": 5.6573, - "step": 48307 - }, - { - "epoch": 25.19322033898305, - "grad_norm": 1.5710893869400024, - "learning_rate": 5.196984924623116e-05, - "loss": 5.1313, - "step": 48308 - }, - { - "epoch": 25.19374185136897, - "grad_norm": 1.6223132610321045, - "learning_rate": 5.1968844221105527e-05, - "loss": 5.1001, - "step": 48309 - }, - { - "epoch": 25.19426336375489, - "grad_norm": 1.5251421928405762, - "learning_rate": 5.1967839195979904e-05, - "loss": 5.0532, - "step": 48310 - }, - { - "epoch": 25.194784876140808, - "grad_norm": 1.5213109254837036, - "learning_rate": 5.1966834170854275e-05, - "loss": 5.0112, - "step": 48311 - }, - { - "epoch": 25.195306388526728, - "grad_norm": 1.608641505241394, - "learning_rate": 5.196582914572865e-05, - "loss": 5.616, - "step": 48312 - }, - { - "epoch": 25.195827900912647, - "grad_norm": 1.5241128206253052, - "learning_rate": 5.196482412060302e-05, - "loss": 5.3145, - "step": 48313 - }, - { - "epoch": 25.196349413298567, - "grad_norm": 1.6935138702392578, - "learning_rate": 5.196381909547738e-05, - "loss": 4.6836, - "step": 48314 - }, - { - "epoch": 25.196870925684486, - "grad_norm": 1.6507019996643066, - "learning_rate": 5.196281407035176e-05, - "loss": 5.0499, - "step": 48315 - }, - { - "epoch": 25.197392438070406, - "grad_norm": 1.596078872680664, - "learning_rate": 5.196180904522613e-05, - "loss": 4.6802, - "step": 48316 - }, - { - "epoch": 25.197913950456325, - "grad_norm": 1.546017050743103, - "learning_rate": 5.196080402010051e-05, - "loss": 4.7077, - "step": 48317 - }, - { - "epoch": 25.19843546284224, - "grad_norm": 1.629560112953186, - "learning_rate": 5.195979899497487e-05, - "loss": 5.0823, - "step": 48318 - }, - { - "epoch": 25.19895697522816, - "grad_norm": 1.6828365325927734, - "learning_rate": 5.195879396984925e-05, - "loss": 5.5038, - "step": 48319 - }, - { - "epoch": 25.19947848761408, - "grad_norm": 1.5470513105392456, - "learning_rate": 5.1957788944723615e-05, - "loss": 5.5871, - "step": 48320 - }, - { - "epoch": 25.2, - "grad_norm": 1.5760334730148315, - "learning_rate": 5.195678391959799e-05, - "loss": 5.1638, - "step": 48321 - }, - { - "epoch": 25.20052151238592, - "grad_norm": 1.4575612545013428, - "learning_rate": 5.1955778894472364e-05, - "loss": 5.5824, - "step": 48322 - }, - { - "epoch": 25.201043024771838, - "grad_norm": 1.734055519104004, - "learning_rate": 5.195477386934674e-05, - "loss": 4.7759, - "step": 48323 - }, - { - "epoch": 25.201564537157758, - "grad_norm": 1.6148805618286133, - "learning_rate": 5.1953768844221106e-05, - "loss": 5.0881, - "step": 48324 - }, - { - "epoch": 25.202086049543677, - "grad_norm": 1.574838399887085, - "learning_rate": 5.1952763819095484e-05, - "loss": 5.1639, - "step": 48325 - }, - { - "epoch": 25.202607561929597, - "grad_norm": 1.6231328248977661, - "learning_rate": 5.1951758793969855e-05, - "loss": 5.0672, - "step": 48326 - }, - { - "epoch": 25.203129074315516, - "grad_norm": 1.6548125743865967, - "learning_rate": 5.195075376884422e-05, - "loss": 5.4811, - "step": 48327 - }, - { - "epoch": 25.203650586701436, - "grad_norm": 1.6284483671188354, - "learning_rate": 5.19497487437186e-05, - "loss": 5.1359, - "step": 48328 - }, - { - "epoch": 25.204172099087355, - "grad_norm": 1.5638811588287354, - "learning_rate": 5.194874371859296e-05, - "loss": 5.5895, - "step": 48329 - }, - { - "epoch": 25.20469361147327, - "grad_norm": 1.6855058670043945, - "learning_rate": 5.194773869346734e-05, - "loss": 4.8557, - "step": 48330 - }, - { - "epoch": 25.20521512385919, - "grad_norm": 1.4649975299835205, - "learning_rate": 5.194673366834171e-05, - "loss": 5.2772, - "step": 48331 - }, - { - "epoch": 25.20573663624511, - "grad_norm": 1.6855249404907227, - "learning_rate": 5.194572864321609e-05, - "loss": 4.7377, - "step": 48332 - }, - { - "epoch": 25.20625814863103, - "grad_norm": 1.5740643739700317, - "learning_rate": 5.194472361809045e-05, - "loss": 5.3692, - "step": 48333 - }, - { - "epoch": 25.20677966101695, - "grad_norm": 1.50991690158844, - "learning_rate": 5.194371859296483e-05, - "loss": 4.9255, - "step": 48334 - }, - { - "epoch": 25.20730117340287, - "grad_norm": 1.4853595495224, - "learning_rate": 5.1942713567839194e-05, - "loss": 5.0932, - "step": 48335 - }, - { - "epoch": 25.207822685788788, - "grad_norm": 1.937727689743042, - "learning_rate": 5.194170854271357e-05, - "loss": 5.0133, - "step": 48336 - }, - { - "epoch": 25.208344198174707, - "grad_norm": 1.5737426280975342, - "learning_rate": 5.194070351758794e-05, - "loss": 5.6157, - "step": 48337 - }, - { - "epoch": 25.208865710560627, - "grad_norm": 1.6499024629592896, - "learning_rate": 5.193969849246232e-05, - "loss": 5.1248, - "step": 48338 - }, - { - "epoch": 25.209387222946546, - "grad_norm": 1.5909255743026733, - "learning_rate": 5.1938693467336685e-05, - "loss": 5.5741, - "step": 48339 - }, - { - "epoch": 25.209908735332466, - "grad_norm": 1.5819053649902344, - "learning_rate": 5.193768844221106e-05, - "loss": 4.8089, - "step": 48340 - }, - { - "epoch": 25.210430247718385, - "grad_norm": 1.6573951244354248, - "learning_rate": 5.193668341708543e-05, - "loss": 5.1161, - "step": 48341 - }, - { - "epoch": 25.2109517601043, - "grad_norm": 1.462242603302002, - "learning_rate": 5.19356783919598e-05, - "loss": 5.606, - "step": 48342 - }, - { - "epoch": 25.21147327249022, - "grad_norm": 1.5756440162658691, - "learning_rate": 5.1934673366834176e-05, - "loss": 5.0698, - "step": 48343 - }, - { - "epoch": 25.21199478487614, - "grad_norm": 1.5252902507781982, - "learning_rate": 5.193366834170854e-05, - "loss": 5.4856, - "step": 48344 - }, - { - "epoch": 25.21251629726206, - "grad_norm": 1.5936272144317627, - "learning_rate": 5.193266331658292e-05, - "loss": 5.1274, - "step": 48345 - }, - { - "epoch": 25.21303780964798, - "grad_norm": 1.6166918277740479, - "learning_rate": 5.193165829145728e-05, - "loss": 5.0241, - "step": 48346 - }, - { - "epoch": 25.2135593220339, - "grad_norm": 1.5729111433029175, - "learning_rate": 5.193065326633166e-05, - "loss": 5.1316, - "step": 48347 - }, - { - "epoch": 25.214080834419818, - "grad_norm": 1.5887471437454224, - "learning_rate": 5.192964824120603e-05, - "loss": 4.6782, - "step": 48348 - }, - { - "epoch": 25.214602346805737, - "grad_norm": 1.5337222814559937, - "learning_rate": 5.192864321608041e-05, - "loss": 5.0075, - "step": 48349 - }, - { - "epoch": 25.215123859191657, - "grad_norm": 1.489263653755188, - "learning_rate": 5.1927638190954774e-05, - "loss": 5.3088, - "step": 48350 - }, - { - "epoch": 25.215645371577576, - "grad_norm": 1.4759835004806519, - "learning_rate": 5.192663316582915e-05, - "loss": 5.4859, - "step": 48351 - }, - { - "epoch": 25.216166883963496, - "grad_norm": 1.6131857633590698, - "learning_rate": 5.192562814070352e-05, - "loss": 5.2151, - "step": 48352 - }, - { - "epoch": 25.216688396349415, - "grad_norm": 1.465651512145996, - "learning_rate": 5.19246231155779e-05, - "loss": 5.3619, - "step": 48353 - }, - { - "epoch": 25.21720990873533, - "grad_norm": 1.4825241565704346, - "learning_rate": 5.1923618090452264e-05, - "loss": 5.7105, - "step": 48354 - }, - { - "epoch": 25.21773142112125, - "grad_norm": 1.6847013235092163, - "learning_rate": 5.192261306532663e-05, - "loss": 4.9435, - "step": 48355 - }, - { - "epoch": 25.21825293350717, - "grad_norm": 1.5060125589370728, - "learning_rate": 5.1921608040201007e-05, - "loss": 5.6148, - "step": 48356 - }, - { - "epoch": 25.21877444589309, - "grad_norm": 1.5414056777954102, - "learning_rate": 5.192060301507538e-05, - "loss": 5.5711, - "step": 48357 - }, - { - "epoch": 25.21929595827901, - "grad_norm": 1.555281400680542, - "learning_rate": 5.1919597989949755e-05, - "loss": 5.485, - "step": 48358 - }, - { - "epoch": 25.21981747066493, - "grad_norm": 1.6709152460098267, - "learning_rate": 5.191859296482412e-05, - "loss": 5.1475, - "step": 48359 - }, - { - "epoch": 25.220338983050848, - "grad_norm": 1.4349151849746704, - "learning_rate": 5.19175879396985e-05, - "loss": 5.5852, - "step": 48360 - }, - { - "epoch": 25.220860495436767, - "grad_norm": 1.6444754600524902, - "learning_rate": 5.191658291457286e-05, - "loss": 5.2679, - "step": 48361 - }, - { - "epoch": 25.221382007822687, - "grad_norm": 1.5464152097702026, - "learning_rate": 5.191557788944724e-05, - "loss": 5.53, - "step": 48362 - }, - { - "epoch": 25.221903520208606, - "grad_norm": 1.6001051664352417, - "learning_rate": 5.191457286432161e-05, - "loss": 5.5129, - "step": 48363 - }, - { - "epoch": 25.222425032594526, - "grad_norm": 1.5279535055160522, - "learning_rate": 5.191356783919599e-05, - "loss": 5.6252, - "step": 48364 - }, - { - "epoch": 25.22294654498044, - "grad_norm": 1.611327052116394, - "learning_rate": 5.191256281407035e-05, - "loss": 5.5115, - "step": 48365 - }, - { - "epoch": 25.22346805736636, - "grad_norm": 1.472090721130371, - "learning_rate": 5.191155778894473e-05, - "loss": 5.1272, - "step": 48366 - }, - { - "epoch": 25.22398956975228, - "grad_norm": 1.6048035621643066, - "learning_rate": 5.1910552763819095e-05, - "loss": 5.2369, - "step": 48367 - }, - { - "epoch": 25.2245110821382, - "grad_norm": 1.4507238864898682, - "learning_rate": 5.1909547738693466e-05, - "loss": 4.7864, - "step": 48368 - }, - { - "epoch": 25.22503259452412, - "grad_norm": 1.5571866035461426, - "learning_rate": 5.1908542713567844e-05, - "loss": 5.579, - "step": 48369 - }, - { - "epoch": 25.22555410691004, - "grad_norm": 1.5456912517547607, - "learning_rate": 5.190753768844221e-05, - "loss": 5.1095, - "step": 48370 - }, - { - "epoch": 25.22607561929596, - "grad_norm": 1.6036648750305176, - "learning_rate": 5.1906532663316586e-05, - "loss": 4.9969, - "step": 48371 - }, - { - "epoch": 25.226597131681878, - "grad_norm": 1.5595334768295288, - "learning_rate": 5.190552763819096e-05, - "loss": 5.2629, - "step": 48372 - }, - { - "epoch": 25.227118644067797, - "grad_norm": 1.6687716245651245, - "learning_rate": 5.1904522613065335e-05, - "loss": 5.3684, - "step": 48373 - }, - { - "epoch": 25.227640156453717, - "grad_norm": 1.4662269353866577, - "learning_rate": 5.19035175879397e-05, - "loss": 5.8045, - "step": 48374 - }, - { - "epoch": 25.228161668839636, - "grad_norm": 1.5624916553497314, - "learning_rate": 5.190251256281408e-05, - "loss": 5.7236, - "step": 48375 - }, - { - "epoch": 25.228683181225556, - "grad_norm": 1.5957906246185303, - "learning_rate": 5.190150753768844e-05, - "loss": 5.6514, - "step": 48376 - }, - { - "epoch": 25.22920469361147, - "grad_norm": 1.6163588762283325, - "learning_rate": 5.190050251256282e-05, - "loss": 5.049, - "step": 48377 - }, - { - "epoch": 25.22972620599739, - "grad_norm": 1.4608922004699707, - "learning_rate": 5.189949748743719e-05, - "loss": 5.3884, - "step": 48378 - }, - { - "epoch": 25.23024771838331, - "grad_norm": 1.5689085721969604, - "learning_rate": 5.189849246231157e-05, - "loss": 5.5234, - "step": 48379 - }, - { - "epoch": 25.23076923076923, - "grad_norm": 1.5280333757400513, - "learning_rate": 5.189748743718593e-05, - "loss": 5.4396, - "step": 48380 - }, - { - "epoch": 25.23129074315515, - "grad_norm": 1.6102114915847778, - "learning_rate": 5.1896482412060296e-05, - "loss": 4.5859, - "step": 48381 - }, - { - "epoch": 25.23181225554107, - "grad_norm": 1.5372856855392456, - "learning_rate": 5.1895477386934674e-05, - "loss": 5.5929, - "step": 48382 - }, - { - "epoch": 25.23233376792699, - "grad_norm": 1.6287471055984497, - "learning_rate": 5.1894472361809045e-05, - "loss": 5.2827, - "step": 48383 - }, - { - "epoch": 25.232855280312908, - "grad_norm": 1.5909171104431152, - "learning_rate": 5.189346733668342e-05, - "loss": 5.3572, - "step": 48384 - }, - { - "epoch": 25.233376792698827, - "grad_norm": 1.5753047466278076, - "learning_rate": 5.189246231155779e-05, - "loss": 5.0252, - "step": 48385 - }, - { - "epoch": 25.233898305084747, - "grad_norm": 1.6671885251998901, - "learning_rate": 5.1891457286432165e-05, - "loss": 5.1712, - "step": 48386 - }, - { - "epoch": 25.234419817470666, - "grad_norm": 1.5014052391052246, - "learning_rate": 5.189045226130653e-05, - "loss": 4.9431, - "step": 48387 - }, - { - "epoch": 25.234941329856586, - "grad_norm": 1.730507493019104, - "learning_rate": 5.188944723618091e-05, - "loss": 5.2172, - "step": 48388 - }, - { - "epoch": 25.2354628422425, - "grad_norm": 1.5905430316925049, - "learning_rate": 5.188844221105528e-05, - "loss": 5.2787, - "step": 48389 - }, - { - "epoch": 25.23598435462842, - "grad_norm": 1.6713426113128662, - "learning_rate": 5.1887437185929656e-05, - "loss": 5.012, - "step": 48390 - }, - { - "epoch": 25.23650586701434, - "grad_norm": 1.545896291732788, - "learning_rate": 5.188643216080402e-05, - "loss": 4.9777, - "step": 48391 - }, - { - "epoch": 25.23702737940026, - "grad_norm": 1.4265631437301636, - "learning_rate": 5.18854271356784e-05, - "loss": 5.7121, - "step": 48392 - }, - { - "epoch": 25.23754889178618, - "grad_norm": 1.5716664791107178, - "learning_rate": 5.188442211055277e-05, - "loss": 5.1689, - "step": 48393 - }, - { - "epoch": 25.2380704041721, - "grad_norm": 1.7393176555633545, - "learning_rate": 5.1883417085427134e-05, - "loss": 4.2449, - "step": 48394 - }, - { - "epoch": 25.23859191655802, - "grad_norm": 1.6336759328842163, - "learning_rate": 5.188241206030151e-05, - "loss": 4.8914, - "step": 48395 - }, - { - "epoch": 25.239113428943938, - "grad_norm": 1.5827322006225586, - "learning_rate": 5.1881407035175876e-05, - "loss": 5.0529, - "step": 48396 - }, - { - "epoch": 25.239634941329857, - "grad_norm": 1.651581048965454, - "learning_rate": 5.1880402010050254e-05, - "loss": 5.2489, - "step": 48397 - }, - { - "epoch": 25.240156453715777, - "grad_norm": 1.474347710609436, - "learning_rate": 5.1879396984924625e-05, - "loss": 5.3414, - "step": 48398 - }, - { - "epoch": 25.240677966101696, - "grad_norm": 1.590214490890503, - "learning_rate": 5.1878391959799e-05, - "loss": 5.3885, - "step": 48399 - }, - { - "epoch": 25.241199478487616, - "grad_norm": 1.6012526750564575, - "learning_rate": 5.187738693467337e-05, - "loss": 5.0981, - "step": 48400 - }, - { - "epoch": 25.24172099087353, - "grad_norm": 1.6336185932159424, - "learning_rate": 5.1876381909547745e-05, - "loss": 5.2496, - "step": 48401 - }, - { - "epoch": 25.24224250325945, - "grad_norm": 1.4584846496582031, - "learning_rate": 5.187537688442211e-05, - "loss": 5.4841, - "step": 48402 - }, - { - "epoch": 25.24276401564537, - "grad_norm": 1.6554908752441406, - "learning_rate": 5.187437185929649e-05, - "loss": 4.8691, - "step": 48403 - }, - { - "epoch": 25.24328552803129, - "grad_norm": 1.5323480367660522, - "learning_rate": 5.187336683417086e-05, - "loss": 5.6671, - "step": 48404 - }, - { - "epoch": 25.24380704041721, - "grad_norm": 1.5569689273834229, - "learning_rate": 5.1872361809045236e-05, - "loss": 5.3156, - "step": 48405 - }, - { - "epoch": 25.24432855280313, - "grad_norm": 1.5573638677597046, - "learning_rate": 5.18713567839196e-05, - "loss": 5.7277, - "step": 48406 - }, - { - "epoch": 25.24485006518905, - "grad_norm": 1.5741711854934692, - "learning_rate": 5.1870351758793964e-05, - "loss": 5.4018, - "step": 48407 - }, - { - "epoch": 25.245371577574968, - "grad_norm": 1.660439372062683, - "learning_rate": 5.186934673366834e-05, - "loss": 4.9323, - "step": 48408 - }, - { - "epoch": 25.245893089960887, - "grad_norm": 1.547269344329834, - "learning_rate": 5.186834170854271e-05, - "loss": 5.0701, - "step": 48409 - }, - { - "epoch": 25.246414602346807, - "grad_norm": 1.5055551528930664, - "learning_rate": 5.186733668341709e-05, - "loss": 5.3178, - "step": 48410 - }, - { - "epoch": 25.246936114732726, - "grad_norm": 1.537407398223877, - "learning_rate": 5.1866331658291455e-05, - "loss": 4.9559, - "step": 48411 - }, - { - "epoch": 25.247457627118646, - "grad_norm": 1.5306118726730347, - "learning_rate": 5.186532663316583e-05, - "loss": 5.0451, - "step": 48412 - }, - { - "epoch": 25.24797913950456, - "grad_norm": 1.4959690570831299, - "learning_rate": 5.18643216080402e-05, - "loss": 4.9265, - "step": 48413 - }, - { - "epoch": 25.24850065189048, - "grad_norm": 1.5236923694610596, - "learning_rate": 5.1863316582914575e-05, - "loss": 4.8895, - "step": 48414 - }, - { - "epoch": 25.2490221642764, - "grad_norm": 1.6606241464614868, - "learning_rate": 5.1862311557788946e-05, - "loss": 4.8309, - "step": 48415 - }, - { - "epoch": 25.24954367666232, - "grad_norm": 1.5386115312576294, - "learning_rate": 5.1861306532663324e-05, - "loss": 4.7996, - "step": 48416 - }, - { - "epoch": 25.25006518904824, - "grad_norm": 1.6200599670410156, - "learning_rate": 5.186030150753769e-05, - "loss": 5.3818, - "step": 48417 - }, - { - "epoch": 25.25058670143416, - "grad_norm": 1.5998432636260986, - "learning_rate": 5.1859296482412066e-05, - "loss": 5.1543, - "step": 48418 - }, - { - "epoch": 25.25110821382008, - "grad_norm": 1.7899843454360962, - "learning_rate": 5.185829145728644e-05, - "loss": 4.9071, - "step": 48419 - }, - { - "epoch": 25.251629726205998, - "grad_norm": 1.6064870357513428, - "learning_rate": 5.18572864321608e-05, - "loss": 5.3772, - "step": 48420 - }, - { - "epoch": 25.252151238591917, - "grad_norm": 1.5558159351348877, - "learning_rate": 5.185628140703518e-05, - "loss": 4.9196, - "step": 48421 - }, - { - "epoch": 25.252672750977837, - "grad_norm": 1.6635066270828247, - "learning_rate": 5.1855276381909543e-05, - "loss": 5.0102, - "step": 48422 - }, - { - "epoch": 25.253194263363756, - "grad_norm": 1.5834736824035645, - "learning_rate": 5.185427135678392e-05, - "loss": 5.5086, - "step": 48423 - }, - { - "epoch": 25.253715775749676, - "grad_norm": 1.6167480945587158, - "learning_rate": 5.185326633165829e-05, - "loss": 5.1906, - "step": 48424 - }, - { - "epoch": 25.25423728813559, - "grad_norm": 1.5333375930786133, - "learning_rate": 5.185226130653267e-05, - "loss": 5.1787, - "step": 48425 - }, - { - "epoch": 25.25475880052151, - "grad_norm": 1.4776060581207275, - "learning_rate": 5.1851256281407034e-05, - "loss": 5.2085, - "step": 48426 - }, - { - "epoch": 25.25528031290743, - "grad_norm": 1.654433012008667, - "learning_rate": 5.185025125628141e-05, - "loss": 4.9291, - "step": 48427 - }, - { - "epoch": 25.25580182529335, - "grad_norm": 1.5435508489608765, - "learning_rate": 5.1849246231155777e-05, - "loss": 5.2624, - "step": 48428 - }, - { - "epoch": 25.25632333767927, - "grad_norm": 1.5785573720932007, - "learning_rate": 5.1848241206030154e-05, - "loss": 5.507, - "step": 48429 - }, - { - "epoch": 25.25684485006519, - "grad_norm": 1.5511677265167236, - "learning_rate": 5.1847236180904525e-05, - "loss": 5.3474, - "step": 48430 - }, - { - "epoch": 25.25736636245111, - "grad_norm": 1.5418386459350586, - "learning_rate": 5.18462311557789e-05, - "loss": 5.0291, - "step": 48431 - }, - { - "epoch": 25.257887874837028, - "grad_norm": 1.6104434728622437, - "learning_rate": 5.184522613065327e-05, - "loss": 4.8576, - "step": 48432 - }, - { - "epoch": 25.258409387222947, - "grad_norm": 1.6678651571273804, - "learning_rate": 5.1844221105527645e-05, - "loss": 5.2512, - "step": 48433 - }, - { - "epoch": 25.258930899608867, - "grad_norm": 1.6337233781814575, - "learning_rate": 5.184321608040201e-05, - "loss": 5.133, - "step": 48434 - }, - { - "epoch": 25.259452411994786, - "grad_norm": 1.6016641855239868, - "learning_rate": 5.184221105527638e-05, - "loss": 5.1501, - "step": 48435 - }, - { - "epoch": 25.259973924380706, - "grad_norm": 1.6046007871627808, - "learning_rate": 5.184120603015076e-05, - "loss": 5.2444, - "step": 48436 - }, - { - "epoch": 25.26049543676662, - "grad_norm": 1.5911872386932373, - "learning_rate": 5.184020100502512e-05, - "loss": 5.0116, - "step": 48437 - }, - { - "epoch": 25.26101694915254, - "grad_norm": 1.5764108896255493, - "learning_rate": 5.18391959798995e-05, - "loss": 5.4207, - "step": 48438 - }, - { - "epoch": 25.26153846153846, - "grad_norm": 1.498425841331482, - "learning_rate": 5.183819095477387e-05, - "loss": 5.3714, - "step": 48439 - }, - { - "epoch": 25.26205997392438, - "grad_norm": 1.8555607795715332, - "learning_rate": 5.183718592964825e-05, - "loss": 5.1469, - "step": 48440 - }, - { - "epoch": 25.2625814863103, - "grad_norm": 1.4443224668502808, - "learning_rate": 5.1836180904522614e-05, - "loss": 5.4355, - "step": 48441 - }, - { - "epoch": 25.26310299869622, - "grad_norm": 1.5884778499603271, - "learning_rate": 5.183517587939699e-05, - "loss": 5.3884, - "step": 48442 - }, - { - "epoch": 25.26362451108214, - "grad_norm": 1.5203505754470825, - "learning_rate": 5.1834170854271356e-05, - "loss": 5.256, - "step": 48443 - }, - { - "epoch": 25.264146023468058, - "grad_norm": 1.526780366897583, - "learning_rate": 5.1833165829145734e-05, - "loss": 5.5029, - "step": 48444 - }, - { - "epoch": 25.264667535853977, - "grad_norm": 1.5274556875228882, - "learning_rate": 5.1832160804020105e-05, - "loss": 5.3035, - "step": 48445 - }, - { - "epoch": 25.265189048239897, - "grad_norm": 1.662408471107483, - "learning_rate": 5.183115577889448e-05, - "loss": 5.3502, - "step": 48446 - }, - { - "epoch": 25.265710560625816, - "grad_norm": 1.4956525564193726, - "learning_rate": 5.183015075376885e-05, - "loss": 4.777, - "step": 48447 - }, - { - "epoch": 25.266232073011736, - "grad_norm": 1.5459717512130737, - "learning_rate": 5.182914572864321e-05, - "loss": 5.2627, - "step": 48448 - }, - { - "epoch": 25.26675358539765, - "grad_norm": 1.4863349199295044, - "learning_rate": 5.182814070351759e-05, - "loss": 5.1364, - "step": 48449 - }, - { - "epoch": 25.26727509778357, - "grad_norm": 1.6058123111724854, - "learning_rate": 5.182713567839196e-05, - "loss": 5.2122, - "step": 48450 - }, - { - "epoch": 25.26779661016949, - "grad_norm": 1.6187435388565063, - "learning_rate": 5.182613065326634e-05, - "loss": 5.5057, - "step": 48451 - }, - { - "epoch": 25.26831812255541, - "grad_norm": 1.6572860479354858, - "learning_rate": 5.18251256281407e-05, - "loss": 5.5885, - "step": 48452 - }, - { - "epoch": 25.26883963494133, - "grad_norm": 1.5660463571548462, - "learning_rate": 5.182412060301508e-05, - "loss": 5.0988, - "step": 48453 - }, - { - "epoch": 25.26936114732725, - "grad_norm": 1.6568342447280884, - "learning_rate": 5.1823115577889444e-05, - "loss": 5.0688, - "step": 48454 - }, - { - "epoch": 25.26988265971317, - "grad_norm": 1.5923150777816772, - "learning_rate": 5.182211055276382e-05, - "loss": 5.3963, - "step": 48455 - }, - { - "epoch": 25.270404172099088, - "grad_norm": 1.5179517269134521, - "learning_rate": 5.182110552763819e-05, - "loss": 5.4883, - "step": 48456 - }, - { - "epoch": 25.270925684485007, - "grad_norm": 1.649361491203308, - "learning_rate": 5.182010050251257e-05, - "loss": 5.3358, - "step": 48457 - }, - { - "epoch": 25.271447196870927, - "grad_norm": 1.7448351383209229, - "learning_rate": 5.1819095477386935e-05, - "loss": 5.2355, - "step": 48458 - }, - { - "epoch": 25.271968709256846, - "grad_norm": 1.5480570793151855, - "learning_rate": 5.181809045226131e-05, - "loss": 5.277, - "step": 48459 - }, - { - "epoch": 25.272490221642762, - "grad_norm": 1.5755810737609863, - "learning_rate": 5.1817085427135684e-05, - "loss": 5.1142, - "step": 48460 - }, - { - "epoch": 25.27301173402868, - "grad_norm": 1.6002317667007446, - "learning_rate": 5.181608040201005e-05, - "loss": 5.0888, - "step": 48461 - }, - { - "epoch": 25.2735332464146, - "grad_norm": 1.6315646171569824, - "learning_rate": 5.1815075376884426e-05, - "loss": 5.0488, - "step": 48462 - }, - { - "epoch": 25.27405475880052, - "grad_norm": 1.5270779132843018, - "learning_rate": 5.181407035175879e-05, - "loss": 4.6309, - "step": 48463 - }, - { - "epoch": 25.27457627118644, - "grad_norm": 1.5996840000152588, - "learning_rate": 5.181306532663317e-05, - "loss": 4.7386, - "step": 48464 - }, - { - "epoch": 25.27509778357236, - "grad_norm": 1.5075407028198242, - "learning_rate": 5.181206030150754e-05, - "loss": 5.0035, - "step": 48465 - }, - { - "epoch": 25.27561929595828, - "grad_norm": 1.5242680311203003, - "learning_rate": 5.181105527638192e-05, - "loss": 4.5605, - "step": 48466 - }, - { - "epoch": 25.2761408083442, - "grad_norm": 1.4950236082077026, - "learning_rate": 5.181005025125628e-05, - "loss": 5.1608, - "step": 48467 - }, - { - "epoch": 25.276662320730118, - "grad_norm": 1.6069222688674927, - "learning_rate": 5.180904522613066e-05, - "loss": 4.7771, - "step": 48468 - }, - { - "epoch": 25.277183833116037, - "grad_norm": 1.564070463180542, - "learning_rate": 5.1808040201005024e-05, - "loss": 5.6196, - "step": 48469 - }, - { - "epoch": 25.277705345501957, - "grad_norm": 1.691686987876892, - "learning_rate": 5.18070351758794e-05, - "loss": 5.0175, - "step": 48470 - }, - { - "epoch": 25.278226857887876, - "grad_norm": 1.7313019037246704, - "learning_rate": 5.180603015075377e-05, - "loss": 5.1403, - "step": 48471 - }, - { - "epoch": 25.278748370273792, - "grad_norm": 1.5979009866714478, - "learning_rate": 5.180502512562815e-05, - "loss": 5.2703, - "step": 48472 - }, - { - "epoch": 25.27926988265971, - "grad_norm": 1.7067283391952515, - "learning_rate": 5.1804020100502515e-05, - "loss": 4.9935, - "step": 48473 - }, - { - "epoch": 25.27979139504563, - "grad_norm": 1.604418158531189, - "learning_rate": 5.180301507537688e-05, - "loss": 4.9872, - "step": 48474 - }, - { - "epoch": 25.28031290743155, - "grad_norm": 1.525961995124817, - "learning_rate": 5.1802010050251257e-05, - "loss": 5.2086, - "step": 48475 - }, - { - "epoch": 25.28083441981747, - "grad_norm": 1.5795907974243164, - "learning_rate": 5.180100502512563e-05, - "loss": 4.8225, - "step": 48476 - }, - { - "epoch": 25.28135593220339, - "grad_norm": 1.6977088451385498, - "learning_rate": 5.1800000000000005e-05, - "loss": 5.0366, - "step": 48477 - }, - { - "epoch": 25.28187744458931, - "grad_norm": 1.6625896692276, - "learning_rate": 5.179899497487437e-05, - "loss": 5.3092, - "step": 48478 - }, - { - "epoch": 25.28239895697523, - "grad_norm": 1.5034955739974976, - "learning_rate": 5.179798994974875e-05, - "loss": 5.3432, - "step": 48479 - }, - { - "epoch": 25.282920469361148, - "grad_norm": 1.5685173273086548, - "learning_rate": 5.179698492462312e-05, - "loss": 5.4335, - "step": 48480 - }, - { - "epoch": 25.283441981747067, - "grad_norm": 1.5448129177093506, - "learning_rate": 5.1795979899497496e-05, - "loss": 5.5967, - "step": 48481 - }, - { - "epoch": 25.283963494132987, - "grad_norm": 1.5207968950271606, - "learning_rate": 5.179497487437186e-05, - "loss": 5.3052, - "step": 48482 - }, - { - "epoch": 25.284485006518906, - "grad_norm": 1.574852466583252, - "learning_rate": 5.179396984924624e-05, - "loss": 5.2387, - "step": 48483 - }, - { - "epoch": 25.285006518904822, - "grad_norm": 1.5602384805679321, - "learning_rate": 5.17929648241206e-05, - "loss": 4.7163, - "step": 48484 - }, - { - "epoch": 25.285528031290742, - "grad_norm": 1.8110796213150024, - "learning_rate": 5.179195979899498e-05, - "loss": 4.6229, - "step": 48485 - }, - { - "epoch": 25.28604954367666, - "grad_norm": 1.6231932640075684, - "learning_rate": 5.179095477386935e-05, - "loss": 4.975, - "step": 48486 - }, - { - "epoch": 25.28657105606258, - "grad_norm": 1.6373850107192993, - "learning_rate": 5.1789949748743716e-05, - "loss": 5.1001, - "step": 48487 - }, - { - "epoch": 25.2870925684485, - "grad_norm": 1.5988361835479736, - "learning_rate": 5.1788944723618094e-05, - "loss": 5.2723, - "step": 48488 - }, - { - "epoch": 25.28761408083442, - "grad_norm": 1.5776691436767578, - "learning_rate": 5.178793969849246e-05, - "loss": 5.5555, - "step": 48489 - }, - { - "epoch": 25.28813559322034, - "grad_norm": 1.6307975053787231, - "learning_rate": 5.1786934673366836e-05, - "loss": 5.2777, - "step": 48490 - }, - { - "epoch": 25.28865710560626, - "grad_norm": 1.6178057193756104, - "learning_rate": 5.178592964824121e-05, - "loss": 5.4523, - "step": 48491 - }, - { - "epoch": 25.289178617992178, - "grad_norm": 1.5960419178009033, - "learning_rate": 5.1784924623115585e-05, - "loss": 5.7774, - "step": 48492 - }, - { - "epoch": 25.289700130378097, - "grad_norm": 1.537475347518921, - "learning_rate": 5.178391959798995e-05, - "loss": 5.5407, - "step": 48493 - }, - { - "epoch": 25.290221642764017, - "grad_norm": 1.5164333581924438, - "learning_rate": 5.178291457286433e-05, - "loss": 5.6954, - "step": 48494 - }, - { - "epoch": 25.290743155149936, - "grad_norm": 1.5010545253753662, - "learning_rate": 5.178190954773869e-05, - "loss": 5.2889, - "step": 48495 - }, - { - "epoch": 25.291264667535852, - "grad_norm": 1.6553062200546265, - "learning_rate": 5.178090452261307e-05, - "loss": 4.7543, - "step": 48496 - }, - { - "epoch": 25.291786179921772, - "grad_norm": 1.5252145528793335, - "learning_rate": 5.177989949748744e-05, - "loss": 5.1668, - "step": 48497 - }, - { - "epoch": 25.29230769230769, - "grad_norm": 1.6622825860977173, - "learning_rate": 5.177889447236182e-05, - "loss": 4.4929, - "step": 48498 - }, - { - "epoch": 25.29282920469361, - "grad_norm": 1.5770264863967896, - "learning_rate": 5.177788944723618e-05, - "loss": 5.1255, - "step": 48499 - }, - { - "epoch": 25.29335071707953, - "grad_norm": 1.551990270614624, - "learning_rate": 5.1776884422110546e-05, - "loss": 5.7754, - "step": 48500 - }, - { - "epoch": 25.29387222946545, - "grad_norm": 1.4943437576293945, - "learning_rate": 5.1775879396984924e-05, - "loss": 5.2239, - "step": 48501 - }, - { - "epoch": 25.29439374185137, - "grad_norm": 1.563147783279419, - "learning_rate": 5.1774874371859295e-05, - "loss": 5.2623, - "step": 48502 - }, - { - "epoch": 25.29491525423729, - "grad_norm": 1.5493098497390747, - "learning_rate": 5.177386934673367e-05, - "loss": 4.8754, - "step": 48503 - }, - { - "epoch": 25.295436766623208, - "grad_norm": 1.5841995477676392, - "learning_rate": 5.177286432160804e-05, - "loss": 5.1965, - "step": 48504 - }, - { - "epoch": 25.295958279009128, - "grad_norm": 1.6192734241485596, - "learning_rate": 5.1771859296482415e-05, - "loss": 5.0682, - "step": 48505 - }, - { - "epoch": 25.296479791395047, - "grad_norm": 1.617760181427002, - "learning_rate": 5.1770854271356786e-05, - "loss": 5.0013, - "step": 48506 - }, - { - "epoch": 25.297001303780966, - "grad_norm": 1.559918999671936, - "learning_rate": 5.1769849246231164e-05, - "loss": 5.3393, - "step": 48507 - }, - { - "epoch": 25.297522816166882, - "grad_norm": 1.5646215677261353, - "learning_rate": 5.176884422110553e-05, - "loss": 5.1592, - "step": 48508 - }, - { - "epoch": 25.298044328552802, - "grad_norm": 1.565974473953247, - "learning_rate": 5.1767839195979906e-05, - "loss": 5.3576, - "step": 48509 - }, - { - "epoch": 25.29856584093872, - "grad_norm": 1.6343655586242676, - "learning_rate": 5.176683417085427e-05, - "loss": 5.0379, - "step": 48510 - }, - { - "epoch": 25.29908735332464, - "grad_norm": 1.516369104385376, - "learning_rate": 5.176582914572865e-05, - "loss": 5.1983, - "step": 48511 - }, - { - "epoch": 25.29960886571056, - "grad_norm": 1.6975189447402954, - "learning_rate": 5.176482412060302e-05, - "loss": 4.6323, - "step": 48512 - }, - { - "epoch": 25.30013037809648, - "grad_norm": 1.6835877895355225, - "learning_rate": 5.17638190954774e-05, - "loss": 4.9477, - "step": 48513 - }, - { - "epoch": 25.3006518904824, - "grad_norm": 1.602736473083496, - "learning_rate": 5.176281407035176e-05, - "loss": 5.3148, - "step": 48514 - }, - { - "epoch": 25.30117340286832, - "grad_norm": 1.5342170000076294, - "learning_rate": 5.1761809045226126e-05, - "loss": 4.976, - "step": 48515 - }, - { - "epoch": 25.301694915254238, - "grad_norm": 1.565859079360962, - "learning_rate": 5.1760804020100504e-05, - "loss": 4.8576, - "step": 48516 - }, - { - "epoch": 25.302216427640158, - "grad_norm": 1.6106871366500854, - "learning_rate": 5.1759798994974875e-05, - "loss": 4.9449, - "step": 48517 - }, - { - "epoch": 25.302737940026077, - "grad_norm": 1.6186937093734741, - "learning_rate": 5.175879396984925e-05, - "loss": 5.4573, - "step": 48518 - }, - { - "epoch": 25.303259452411996, - "grad_norm": 1.5835356712341309, - "learning_rate": 5.175778894472362e-05, - "loss": 5.0793, - "step": 48519 - }, - { - "epoch": 25.303780964797912, - "grad_norm": 1.6648069620132446, - "learning_rate": 5.1756783919597995e-05, - "loss": 4.7163, - "step": 48520 - }, - { - "epoch": 25.304302477183832, - "grad_norm": 1.529647946357727, - "learning_rate": 5.175577889447236e-05, - "loss": 5.2642, - "step": 48521 - }, - { - "epoch": 25.30482398956975, - "grad_norm": 1.667874813079834, - "learning_rate": 5.175477386934674e-05, - "loss": 5.09, - "step": 48522 - }, - { - "epoch": 25.30534550195567, - "grad_norm": 1.5039567947387695, - "learning_rate": 5.175376884422111e-05, - "loss": 4.975, - "step": 48523 - }, - { - "epoch": 25.30586701434159, - "grad_norm": 1.5900615453720093, - "learning_rate": 5.1752763819095486e-05, - "loss": 5.205, - "step": 48524 - }, - { - "epoch": 25.30638852672751, - "grad_norm": 1.6796729564666748, - "learning_rate": 5.175175879396985e-05, - "loss": 5.0242, - "step": 48525 - }, - { - "epoch": 25.30691003911343, - "grad_norm": 1.4709261655807495, - "learning_rate": 5.175075376884423e-05, - "loss": 5.6235, - "step": 48526 - }, - { - "epoch": 25.30743155149935, - "grad_norm": 1.5820796489715576, - "learning_rate": 5.17497487437186e-05, - "loss": 5.4441, - "step": 48527 - }, - { - "epoch": 25.307953063885268, - "grad_norm": 1.6090680360794067, - "learning_rate": 5.174874371859296e-05, - "loss": 5.1629, - "step": 48528 - }, - { - "epoch": 25.308474576271188, - "grad_norm": 1.615013837814331, - "learning_rate": 5.174773869346734e-05, - "loss": 5.6151, - "step": 48529 - }, - { - "epoch": 25.308996088657107, - "grad_norm": 1.6915628910064697, - "learning_rate": 5.1746733668341705e-05, - "loss": 5.1688, - "step": 48530 - }, - { - "epoch": 25.309517601043027, - "grad_norm": 1.5410512685775757, - "learning_rate": 5.174572864321608e-05, - "loss": 4.6584, - "step": 48531 - }, - { - "epoch": 25.310039113428942, - "grad_norm": 1.584467887878418, - "learning_rate": 5.1744723618090454e-05, - "loss": 5.0208, - "step": 48532 - }, - { - "epoch": 25.310560625814862, - "grad_norm": 1.6026755571365356, - "learning_rate": 5.174371859296483e-05, - "loss": 5.3615, - "step": 48533 - }, - { - "epoch": 25.31108213820078, - "grad_norm": 1.6326016187667847, - "learning_rate": 5.1742713567839196e-05, - "loss": 5.1819, - "step": 48534 - }, - { - "epoch": 25.3116036505867, - "grad_norm": 1.6311193704605103, - "learning_rate": 5.1741708542713574e-05, - "loss": 5.2053, - "step": 48535 - }, - { - "epoch": 25.31212516297262, - "grad_norm": 1.5434054136276245, - "learning_rate": 5.174070351758794e-05, - "loss": 4.8803, - "step": 48536 - }, - { - "epoch": 25.31264667535854, - "grad_norm": 1.5282293558120728, - "learning_rate": 5.1739698492462316e-05, - "loss": 5.2057, - "step": 48537 - }, - { - "epoch": 25.31316818774446, - "grad_norm": 1.5535529851913452, - "learning_rate": 5.173869346733669e-05, - "loss": 5.1293, - "step": 48538 - }, - { - "epoch": 25.31368970013038, - "grad_norm": 1.5664138793945312, - "learning_rate": 5.1737688442211065e-05, - "loss": 5.2861, - "step": 48539 - }, - { - "epoch": 25.314211212516298, - "grad_norm": 1.6479696035385132, - "learning_rate": 5.173668341708543e-05, - "loss": 5.303, - "step": 48540 - }, - { - "epoch": 25.314732724902218, - "grad_norm": 1.5793291330337524, - "learning_rate": 5.1735678391959793e-05, - "loss": 5.5506, - "step": 48541 - }, - { - "epoch": 25.315254237288137, - "grad_norm": 1.6412948369979858, - "learning_rate": 5.173467336683417e-05, - "loss": 5.4989, - "step": 48542 - }, - { - "epoch": 25.315775749674053, - "grad_norm": 1.6319665908813477, - "learning_rate": 5.173366834170854e-05, - "loss": 4.638, - "step": 48543 - }, - { - "epoch": 25.316297262059972, - "grad_norm": 1.5958175659179688, - "learning_rate": 5.173266331658292e-05, - "loss": 5.4106, - "step": 48544 - }, - { - "epoch": 25.316818774445892, - "grad_norm": 1.431693196296692, - "learning_rate": 5.1731658291457284e-05, - "loss": 5.5933, - "step": 48545 - }, - { - "epoch": 25.31734028683181, - "grad_norm": 1.4389233589172363, - "learning_rate": 5.173065326633166e-05, - "loss": 4.5911, - "step": 48546 - }, - { - "epoch": 25.31786179921773, - "grad_norm": 1.549519658088684, - "learning_rate": 5.172964824120603e-05, - "loss": 5.3768, - "step": 48547 - }, - { - "epoch": 25.31838331160365, - "grad_norm": 1.499122977256775, - "learning_rate": 5.172864321608041e-05, - "loss": 5.126, - "step": 48548 - }, - { - "epoch": 25.31890482398957, - "grad_norm": 1.6422274112701416, - "learning_rate": 5.1727638190954775e-05, - "loss": 4.5145, - "step": 48549 - }, - { - "epoch": 25.31942633637549, - "grad_norm": 1.648422122001648, - "learning_rate": 5.172663316582915e-05, - "loss": 5.189, - "step": 48550 - }, - { - "epoch": 25.31994784876141, - "grad_norm": 1.6189335584640503, - "learning_rate": 5.172562814070352e-05, - "loss": 5.2951, - "step": 48551 - }, - { - "epoch": 25.320469361147328, - "grad_norm": 1.6178325414657593, - "learning_rate": 5.1724623115577895e-05, - "loss": 4.9212, - "step": 48552 - }, - { - "epoch": 25.320990873533248, - "grad_norm": 1.6704673767089844, - "learning_rate": 5.1723618090452266e-05, - "loss": 4.8445, - "step": 48553 - }, - { - "epoch": 25.321512385919167, - "grad_norm": 1.6343128681182861, - "learning_rate": 5.172261306532663e-05, - "loss": 4.8825, - "step": 48554 - }, - { - "epoch": 25.322033898305083, - "grad_norm": 1.5836892127990723, - "learning_rate": 5.172160804020101e-05, - "loss": 5.4805, - "step": 48555 - }, - { - "epoch": 25.322555410691002, - "grad_norm": 1.576979398727417, - "learning_rate": 5.172060301507537e-05, - "loss": 4.9929, - "step": 48556 - }, - { - "epoch": 25.323076923076922, - "grad_norm": 1.5217466354370117, - "learning_rate": 5.171959798994975e-05, - "loss": 5.5212, - "step": 48557 - }, - { - "epoch": 25.32359843546284, - "grad_norm": 1.6653550863265991, - "learning_rate": 5.171859296482412e-05, - "loss": 5.0531, - "step": 48558 - }, - { - "epoch": 25.32411994784876, - "grad_norm": 1.4931222200393677, - "learning_rate": 5.17175879396985e-05, - "loss": 5.66, - "step": 48559 - }, - { - "epoch": 25.32464146023468, - "grad_norm": 1.631942629814148, - "learning_rate": 5.1716582914572864e-05, - "loss": 5.2773, - "step": 48560 - }, - { - "epoch": 25.3251629726206, - "grad_norm": 1.6180286407470703, - "learning_rate": 5.171557788944724e-05, - "loss": 5.1307, - "step": 48561 - }, - { - "epoch": 25.32568448500652, - "grad_norm": 1.460673213005066, - "learning_rate": 5.1714572864321606e-05, - "loss": 5.2143, - "step": 48562 - }, - { - "epoch": 25.32620599739244, - "grad_norm": 1.4587324857711792, - "learning_rate": 5.1713567839195984e-05, - "loss": 5.5697, - "step": 48563 - }, - { - "epoch": 25.326727509778358, - "grad_norm": 1.7429648637771606, - "learning_rate": 5.1712562814070355e-05, - "loss": 4.9627, - "step": 48564 - }, - { - "epoch": 25.327249022164278, - "grad_norm": 1.558127999305725, - "learning_rate": 5.171155778894473e-05, - "loss": 5.1761, - "step": 48565 - }, - { - "epoch": 25.327770534550197, - "grad_norm": 1.5692847967147827, - "learning_rate": 5.17105527638191e-05, - "loss": 5.736, - "step": 48566 - }, - { - "epoch": 25.328292046936113, - "grad_norm": 1.5903745889663696, - "learning_rate": 5.170954773869347e-05, - "loss": 5.5077, - "step": 48567 - }, - { - "epoch": 25.328813559322032, - "grad_norm": 1.6701359748840332, - "learning_rate": 5.1708542713567846e-05, - "loss": 5.1529, - "step": 48568 - }, - { - "epoch": 25.329335071707952, - "grad_norm": 1.5912038087844849, - "learning_rate": 5.170753768844221e-05, - "loss": 4.9469, - "step": 48569 - }, - { - "epoch": 25.32985658409387, - "grad_norm": 1.5388301610946655, - "learning_rate": 5.170653266331659e-05, - "loss": 5.1329, - "step": 48570 - }, - { - "epoch": 25.33037809647979, - "grad_norm": 1.7346123456954956, - "learning_rate": 5.170552763819095e-05, - "loss": 5.4142, - "step": 48571 - }, - { - "epoch": 25.33089960886571, - "grad_norm": 1.5514754056930542, - "learning_rate": 5.170452261306533e-05, - "loss": 5.4307, - "step": 48572 - }, - { - "epoch": 25.33142112125163, - "grad_norm": 1.4606399536132812, - "learning_rate": 5.17035175879397e-05, - "loss": 5.1033, - "step": 48573 - }, - { - "epoch": 25.33194263363755, - "grad_norm": 1.5193415880203247, - "learning_rate": 5.170251256281408e-05, - "loss": 5.5417, - "step": 48574 - }, - { - "epoch": 25.33246414602347, - "grad_norm": 1.5797133445739746, - "learning_rate": 5.170150753768844e-05, - "loss": 5.3006, - "step": 48575 - }, - { - "epoch": 25.332985658409388, - "grad_norm": 1.6541471481323242, - "learning_rate": 5.170050251256282e-05, - "loss": 5.3281, - "step": 48576 - }, - { - "epoch": 25.333507170795308, - "grad_norm": 1.4554307460784912, - "learning_rate": 5.1699497487437185e-05, - "loss": 5.784, - "step": 48577 - }, - { - "epoch": 25.334028683181227, - "grad_norm": 1.6292798519134521, - "learning_rate": 5.169849246231156e-05, - "loss": 5.016, - "step": 48578 - }, - { - "epoch": 25.334550195567143, - "grad_norm": 1.5957454442977905, - "learning_rate": 5.1697487437185934e-05, - "loss": 4.7679, - "step": 48579 - }, - { - "epoch": 25.335071707953063, - "grad_norm": 1.6116067171096802, - "learning_rate": 5.16964824120603e-05, - "loss": 5.1894, - "step": 48580 - }, - { - "epoch": 25.335593220338982, - "grad_norm": 1.5571861267089844, - "learning_rate": 5.1695477386934676e-05, - "loss": 4.4191, - "step": 48581 - }, - { - "epoch": 25.3361147327249, - "grad_norm": 1.5321491956710815, - "learning_rate": 5.169447236180904e-05, - "loss": 5.3026, - "step": 48582 - }, - { - "epoch": 25.33663624511082, - "grad_norm": 1.6711622476577759, - "learning_rate": 5.169346733668342e-05, - "loss": 5.139, - "step": 48583 - }, - { - "epoch": 25.33715775749674, - "grad_norm": 1.6178046464920044, - "learning_rate": 5.169246231155779e-05, - "loss": 5.519, - "step": 48584 - }, - { - "epoch": 25.33767926988266, - "grad_norm": 1.65463387966156, - "learning_rate": 5.169145728643217e-05, - "loss": 4.5465, - "step": 48585 - }, - { - "epoch": 25.33820078226858, - "grad_norm": 1.632018804550171, - "learning_rate": 5.169045226130653e-05, - "loss": 5.1444, - "step": 48586 - }, - { - "epoch": 25.3387222946545, - "grad_norm": 1.6084651947021484, - "learning_rate": 5.168944723618091e-05, - "loss": 5.1865, - "step": 48587 - }, - { - "epoch": 25.33924380704042, - "grad_norm": 1.5377250909805298, - "learning_rate": 5.1688442211055274e-05, - "loss": 5.5557, - "step": 48588 - }, - { - "epoch": 25.339765319426338, - "grad_norm": 1.5767958164215088, - "learning_rate": 5.168743718592965e-05, - "loss": 4.7571, - "step": 48589 - }, - { - "epoch": 25.340286831812257, - "grad_norm": 1.5704752206802368, - "learning_rate": 5.168643216080402e-05, - "loss": 5.4442, - "step": 48590 - }, - { - "epoch": 25.340808344198173, - "grad_norm": 1.6449116468429565, - "learning_rate": 5.16854271356784e-05, - "loss": 4.9448, - "step": 48591 - }, - { - "epoch": 25.341329856584093, - "grad_norm": 1.5385998487472534, - "learning_rate": 5.1684422110552765e-05, - "loss": 5.2356, - "step": 48592 - }, - { - "epoch": 25.341851368970012, - "grad_norm": 1.5015718936920166, - "learning_rate": 5.1683417085427136e-05, - "loss": 5.4391, - "step": 48593 - }, - { - "epoch": 25.34237288135593, - "grad_norm": 1.5409080982208252, - "learning_rate": 5.1682412060301513e-05, - "loss": 5.6314, - "step": 48594 - }, - { - "epoch": 25.34289439374185, - "grad_norm": 1.5694299936294556, - "learning_rate": 5.168140703517588e-05, - "loss": 5.443, - "step": 48595 - }, - { - "epoch": 25.34341590612777, - "grad_norm": 1.59146249294281, - "learning_rate": 5.1680402010050256e-05, - "loss": 4.8418, - "step": 48596 - }, - { - "epoch": 25.34393741851369, - "grad_norm": 1.5509833097457886, - "learning_rate": 5.167939698492462e-05, - "loss": 5.0286, - "step": 48597 - }, - { - "epoch": 25.34445893089961, - "grad_norm": 1.453006386756897, - "learning_rate": 5.1678391959799e-05, - "loss": 5.2511, - "step": 48598 - }, - { - "epoch": 25.34498044328553, - "grad_norm": 1.5223796367645264, - "learning_rate": 5.167738693467337e-05, - "loss": 5.1157, - "step": 48599 - }, - { - "epoch": 25.34550195567145, - "grad_norm": 1.590415120124817, - "learning_rate": 5.1676381909547746e-05, - "loss": 5.4028, - "step": 48600 - }, - { - "epoch": 25.346023468057368, - "grad_norm": 1.6210602521896362, - "learning_rate": 5.167537688442211e-05, - "loss": 5.7086, - "step": 48601 - }, - { - "epoch": 25.346544980443287, - "grad_norm": 1.5685685873031616, - "learning_rate": 5.167437185929649e-05, - "loss": 5.1974, - "step": 48602 - }, - { - "epoch": 25.347066492829203, - "grad_norm": 1.6845428943634033, - "learning_rate": 5.167336683417085e-05, - "loss": 4.6046, - "step": 48603 - }, - { - "epoch": 25.347588005215123, - "grad_norm": 1.512067437171936, - "learning_rate": 5.167236180904523e-05, - "loss": 4.8504, - "step": 48604 - }, - { - "epoch": 25.348109517601042, - "grad_norm": 1.674498438835144, - "learning_rate": 5.16713567839196e-05, - "loss": 5.2789, - "step": 48605 - }, - { - "epoch": 25.34863102998696, - "grad_norm": 1.5788142681121826, - "learning_rate": 5.167035175879398e-05, - "loss": 5.5211, - "step": 48606 - }, - { - "epoch": 25.34915254237288, - "grad_norm": 1.537176489830017, - "learning_rate": 5.1669346733668344e-05, - "loss": 5.3042, - "step": 48607 - }, - { - "epoch": 25.3496740547588, - "grad_norm": 1.532775640487671, - "learning_rate": 5.166834170854271e-05, - "loss": 5.422, - "step": 48608 - }, - { - "epoch": 25.35019556714472, - "grad_norm": 1.5452570915222168, - "learning_rate": 5.1667336683417086e-05, - "loss": 4.8775, - "step": 48609 - }, - { - "epoch": 25.35071707953064, - "grad_norm": 1.6748077869415283, - "learning_rate": 5.166633165829146e-05, - "loss": 5.4251, - "step": 48610 - }, - { - "epoch": 25.35123859191656, - "grad_norm": 1.656619906425476, - "learning_rate": 5.1665326633165835e-05, - "loss": 5.5135, - "step": 48611 - }, - { - "epoch": 25.35176010430248, - "grad_norm": 1.6418235301971436, - "learning_rate": 5.16643216080402e-05, - "loss": 5.195, - "step": 48612 - }, - { - "epoch": 25.352281616688398, - "grad_norm": 1.5956307649612427, - "learning_rate": 5.166331658291458e-05, - "loss": 5.6853, - "step": 48613 - }, - { - "epoch": 25.352803129074317, - "grad_norm": 1.624460220336914, - "learning_rate": 5.166231155778895e-05, - "loss": 5.4303, - "step": 48614 - }, - { - "epoch": 25.353324641460233, - "grad_norm": 1.624199390411377, - "learning_rate": 5.1661306532663326e-05, - "loss": 5.2319, - "step": 48615 - }, - { - "epoch": 25.353846153846153, - "grad_norm": 1.6969722509384155, - "learning_rate": 5.166030150753769e-05, - "loss": 5.3331, - "step": 48616 - }, - { - "epoch": 25.354367666232072, - "grad_norm": 1.5088402032852173, - "learning_rate": 5.165929648241207e-05, - "loss": 5.0024, - "step": 48617 - }, - { - "epoch": 25.35488917861799, - "grad_norm": 1.520193099975586, - "learning_rate": 5.165829145728643e-05, - "loss": 4.9225, - "step": 48618 - }, - { - "epoch": 25.35541069100391, - "grad_norm": 1.5763914585113525, - "learning_rate": 5.165728643216081e-05, - "loss": 5.2928, - "step": 48619 - }, - { - "epoch": 25.35593220338983, - "grad_norm": 1.597982406616211, - "learning_rate": 5.165628140703518e-05, - "loss": 4.9562, - "step": 48620 - }, - { - "epoch": 25.35645371577575, - "grad_norm": 1.5913941860198975, - "learning_rate": 5.1655276381909545e-05, - "loss": 4.9361, - "step": 48621 - }, - { - "epoch": 25.35697522816167, - "grad_norm": 1.876592993736267, - "learning_rate": 5.165427135678392e-05, - "loss": 4.678, - "step": 48622 - }, - { - "epoch": 25.35749674054759, - "grad_norm": 1.68411386013031, - "learning_rate": 5.165326633165829e-05, - "loss": 5.081, - "step": 48623 - }, - { - "epoch": 25.35801825293351, - "grad_norm": 1.5956672430038452, - "learning_rate": 5.1652261306532665e-05, - "loss": 5.5799, - "step": 48624 - }, - { - "epoch": 25.358539765319428, - "grad_norm": 1.5962228775024414, - "learning_rate": 5.1651256281407036e-05, - "loss": 5.0445, - "step": 48625 - }, - { - "epoch": 25.359061277705347, - "grad_norm": 1.6801810264587402, - "learning_rate": 5.1650251256281414e-05, - "loss": 5.3893, - "step": 48626 - }, - { - "epoch": 25.359582790091263, - "grad_norm": 1.5680267810821533, - "learning_rate": 5.164924623115578e-05, - "loss": 4.6678, - "step": 48627 - }, - { - "epoch": 25.360104302477183, - "grad_norm": 1.5068014860153198, - "learning_rate": 5.1648241206030156e-05, - "loss": 5.6945, - "step": 48628 - }, - { - "epoch": 25.360625814863102, - "grad_norm": 1.4337983131408691, - "learning_rate": 5.164723618090452e-05, - "loss": 4.8326, - "step": 48629 - }, - { - "epoch": 25.36114732724902, - "grad_norm": 1.585800290107727, - "learning_rate": 5.16462311557789e-05, - "loss": 5.0529, - "step": 48630 - }, - { - "epoch": 25.36166883963494, - "grad_norm": 1.5852980613708496, - "learning_rate": 5.164522613065327e-05, - "loss": 5.2876, - "step": 48631 - }, - { - "epoch": 25.36219035202086, - "grad_norm": 1.5891375541687012, - "learning_rate": 5.164422110552765e-05, - "loss": 4.7047, - "step": 48632 - }, - { - "epoch": 25.36271186440678, - "grad_norm": 1.5172505378723145, - "learning_rate": 5.164321608040201e-05, - "loss": 5.1684, - "step": 48633 - }, - { - "epoch": 25.3632333767927, - "grad_norm": 1.5760256052017212, - "learning_rate": 5.164221105527638e-05, - "loss": 5.2612, - "step": 48634 - }, - { - "epoch": 25.36375488917862, - "grad_norm": 1.607162594795227, - "learning_rate": 5.164120603015076e-05, - "loss": 5.3605, - "step": 48635 - }, - { - "epoch": 25.36427640156454, - "grad_norm": 1.5073364973068237, - "learning_rate": 5.1640201005025125e-05, - "loss": 5.6667, - "step": 48636 - }, - { - "epoch": 25.364797913950458, - "grad_norm": 1.5826934576034546, - "learning_rate": 5.16391959798995e-05, - "loss": 5.0118, - "step": 48637 - }, - { - "epoch": 25.365319426336377, - "grad_norm": 1.6883397102355957, - "learning_rate": 5.163819095477387e-05, - "loss": 4.5418, - "step": 48638 - }, - { - "epoch": 25.365840938722293, - "grad_norm": 1.5607984066009521, - "learning_rate": 5.1637185929648245e-05, - "loss": 4.9662, - "step": 48639 - }, - { - "epoch": 25.366362451108213, - "grad_norm": 1.5592013597488403, - "learning_rate": 5.1636180904522616e-05, - "loss": 5.1716, - "step": 48640 - }, - { - "epoch": 25.366883963494132, - "grad_norm": 1.6123462915420532, - "learning_rate": 5.1635175879396993e-05, - "loss": 5.177, - "step": 48641 - }, - { - "epoch": 25.36740547588005, - "grad_norm": 1.7034494876861572, - "learning_rate": 5.163417085427136e-05, - "loss": 4.3158, - "step": 48642 - }, - { - "epoch": 25.36792698826597, - "grad_norm": 1.6353645324707031, - "learning_rate": 5.1633165829145736e-05, - "loss": 4.8649, - "step": 48643 - }, - { - "epoch": 25.36844850065189, - "grad_norm": 1.640093207359314, - "learning_rate": 5.16321608040201e-05, - "loss": 5.1932, - "step": 48644 - }, - { - "epoch": 25.36897001303781, - "grad_norm": 1.5676757097244263, - "learning_rate": 5.163115577889448e-05, - "loss": 5.3293, - "step": 48645 - }, - { - "epoch": 25.36949152542373, - "grad_norm": 1.5381194353103638, - "learning_rate": 5.163015075376885e-05, - "loss": 5.2846, - "step": 48646 - }, - { - "epoch": 25.37001303780965, - "grad_norm": 1.5794904232025146, - "learning_rate": 5.162914572864321e-05, - "loss": 4.7776, - "step": 48647 - }, - { - "epoch": 25.37053455019557, - "grad_norm": 1.7643852233886719, - "learning_rate": 5.162814070351759e-05, - "loss": 4.8858, - "step": 48648 - }, - { - "epoch": 25.371056062581488, - "grad_norm": 1.4828307628631592, - "learning_rate": 5.1627135678391955e-05, - "loss": 5.6238, - "step": 48649 - }, - { - "epoch": 25.371577574967404, - "grad_norm": 1.583393931388855, - "learning_rate": 5.162613065326633e-05, - "loss": 4.7968, - "step": 48650 - }, - { - "epoch": 25.372099087353323, - "grad_norm": 1.5963013172149658, - "learning_rate": 5.1625125628140704e-05, - "loss": 5.3418, - "step": 48651 - }, - { - "epoch": 25.372620599739243, - "grad_norm": 1.6259045600891113, - "learning_rate": 5.162412060301508e-05, - "loss": 4.8592, - "step": 48652 - }, - { - "epoch": 25.373142112125162, - "grad_norm": 1.446448802947998, - "learning_rate": 5.1623115577889446e-05, - "loss": 5.5959, - "step": 48653 - }, - { - "epoch": 25.37366362451108, - "grad_norm": 1.5679539442062378, - "learning_rate": 5.1622110552763824e-05, - "loss": 5.1849, - "step": 48654 - }, - { - "epoch": 25.374185136897, - "grad_norm": 1.588621735572815, - "learning_rate": 5.1621105527638195e-05, - "loss": 4.7587, - "step": 48655 - }, - { - "epoch": 25.37470664928292, - "grad_norm": 1.5927817821502686, - "learning_rate": 5.162010050251257e-05, - "loss": 4.7108, - "step": 48656 - }, - { - "epoch": 25.37522816166884, - "grad_norm": 1.5864720344543457, - "learning_rate": 5.161909547738694e-05, - "loss": 5.1125, - "step": 48657 - }, - { - "epoch": 25.37574967405476, - "grad_norm": 1.5493046045303345, - "learning_rate": 5.1618090452261315e-05, - "loss": 5.3782, - "step": 48658 - }, - { - "epoch": 25.37627118644068, - "grad_norm": 1.5939159393310547, - "learning_rate": 5.161708542713568e-05, - "loss": 5.0131, - "step": 48659 - }, - { - "epoch": 25.3767926988266, - "grad_norm": 1.5327914953231812, - "learning_rate": 5.161608040201005e-05, - "loss": 5.4454, - "step": 48660 - }, - { - "epoch": 25.377314211212518, - "grad_norm": 1.4988529682159424, - "learning_rate": 5.161507537688443e-05, - "loss": 5.7728, - "step": 48661 - }, - { - "epoch": 25.377835723598434, - "grad_norm": 1.6155612468719482, - "learning_rate": 5.161407035175879e-05, - "loss": 4.9829, - "step": 48662 - }, - { - "epoch": 25.378357235984353, - "grad_norm": 1.5401945114135742, - "learning_rate": 5.161306532663317e-05, - "loss": 5.1764, - "step": 48663 - }, - { - "epoch": 25.378878748370273, - "grad_norm": 1.492969036102295, - "learning_rate": 5.1612060301507534e-05, - "loss": 5.5554, - "step": 48664 - }, - { - "epoch": 25.379400260756192, - "grad_norm": 1.6423238515853882, - "learning_rate": 5.161105527638191e-05, - "loss": 4.935, - "step": 48665 - }, - { - "epoch": 25.37992177314211, - "grad_norm": 1.6385939121246338, - "learning_rate": 5.161005025125628e-05, - "loss": 5.1219, - "step": 48666 - }, - { - "epoch": 25.38044328552803, - "grad_norm": 1.628400444984436, - "learning_rate": 5.160904522613066e-05, - "loss": 4.7691, - "step": 48667 - }, - { - "epoch": 25.38096479791395, - "grad_norm": 1.6573028564453125, - "learning_rate": 5.1608040201005025e-05, - "loss": 4.5814, - "step": 48668 - }, - { - "epoch": 25.38148631029987, - "grad_norm": 1.5827038288116455, - "learning_rate": 5.16070351758794e-05, - "loss": 5.2182, - "step": 48669 - }, - { - "epoch": 25.38200782268579, - "grad_norm": 1.596052885055542, - "learning_rate": 5.160603015075377e-05, - "loss": 5.36, - "step": 48670 - }, - { - "epoch": 25.38252933507171, - "grad_norm": 1.5833617448806763, - "learning_rate": 5.1605025125628145e-05, - "loss": 5.0968, - "step": 48671 - }, - { - "epoch": 25.38305084745763, - "grad_norm": 1.5769758224487305, - "learning_rate": 5.1604020100502516e-05, - "loss": 5.2713, - "step": 48672 - }, - { - "epoch": 25.383572359843548, - "grad_norm": 1.4942151308059692, - "learning_rate": 5.160301507537688e-05, - "loss": 5.605, - "step": 48673 - }, - { - "epoch": 25.384093872229464, - "grad_norm": 1.5421115159988403, - "learning_rate": 5.160201005025126e-05, - "loss": 5.2488, - "step": 48674 - }, - { - "epoch": 25.384615384615383, - "grad_norm": 1.4805301427841187, - "learning_rate": 5.160100502512562e-05, - "loss": 5.2521, - "step": 48675 - }, - { - "epoch": 25.385136897001303, - "grad_norm": 1.607869267463684, - "learning_rate": 5.16e-05, - "loss": 5.0921, - "step": 48676 - }, - { - "epoch": 25.385658409387222, - "grad_norm": 1.5150866508483887, - "learning_rate": 5.159899497487437e-05, - "loss": 5.1293, - "step": 48677 - }, - { - "epoch": 25.38617992177314, - "grad_norm": 1.5158594846725464, - "learning_rate": 5.159798994974875e-05, - "loss": 4.9655, - "step": 48678 - }, - { - "epoch": 25.38670143415906, - "grad_norm": 1.580691933631897, - "learning_rate": 5.1596984924623114e-05, - "loss": 5.4456, - "step": 48679 - }, - { - "epoch": 25.38722294654498, - "grad_norm": 1.4918469190597534, - "learning_rate": 5.159597989949749e-05, - "loss": 5.2154, - "step": 48680 - }, - { - "epoch": 25.3877444589309, - "grad_norm": 1.5084091424942017, - "learning_rate": 5.159497487437186e-05, - "loss": 5.5421, - "step": 48681 - }, - { - "epoch": 25.38826597131682, - "grad_norm": 1.574414849281311, - "learning_rate": 5.159396984924624e-05, - "loss": 5.4671, - "step": 48682 - }, - { - "epoch": 25.38878748370274, - "grad_norm": 1.5326447486877441, - "learning_rate": 5.1592964824120605e-05, - "loss": 5.3143, - "step": 48683 - }, - { - "epoch": 25.38930899608866, - "grad_norm": 1.6694937944412231, - "learning_rate": 5.159195979899498e-05, - "loss": 5.5988, - "step": 48684 - }, - { - "epoch": 25.389830508474578, - "grad_norm": 1.709242820739746, - "learning_rate": 5.159095477386935e-05, - "loss": 4.8631, - "step": 48685 - }, - { - "epoch": 25.390352020860494, - "grad_norm": 1.606035590171814, - "learning_rate": 5.158994974874372e-05, - "loss": 5.0814, - "step": 48686 - }, - { - "epoch": 25.390873533246413, - "grad_norm": 1.5427119731903076, - "learning_rate": 5.1588944723618096e-05, - "loss": 5.0456, - "step": 48687 - }, - { - "epoch": 25.391395045632333, - "grad_norm": 1.6057218313217163, - "learning_rate": 5.158793969849246e-05, - "loss": 4.7115, - "step": 48688 - }, - { - "epoch": 25.391916558018252, - "grad_norm": 1.5941840410232544, - "learning_rate": 5.158693467336684e-05, - "loss": 5.4703, - "step": 48689 - }, - { - "epoch": 25.39243807040417, - "grad_norm": 1.715334177017212, - "learning_rate": 5.15859296482412e-05, - "loss": 5.2313, - "step": 48690 - }, - { - "epoch": 25.39295958279009, - "grad_norm": 1.586108684539795, - "learning_rate": 5.158492462311558e-05, - "loss": 5.7314, - "step": 48691 - }, - { - "epoch": 25.39348109517601, - "grad_norm": 1.5748380422592163, - "learning_rate": 5.158391959798995e-05, - "loss": 5.3268, - "step": 48692 - }, - { - "epoch": 25.39400260756193, - "grad_norm": 1.4896392822265625, - "learning_rate": 5.158291457286433e-05, - "loss": 5.6592, - "step": 48693 - }, - { - "epoch": 25.39452411994785, - "grad_norm": 1.6265192031860352, - "learning_rate": 5.158190954773869e-05, - "loss": 4.9711, - "step": 48694 - }, - { - "epoch": 25.39504563233377, - "grad_norm": 1.4877095222473145, - "learning_rate": 5.158090452261307e-05, - "loss": 5.5476, - "step": 48695 - }, - { - "epoch": 25.39556714471969, - "grad_norm": 1.6540958881378174, - "learning_rate": 5.1579899497487435e-05, - "loss": 5.0944, - "step": 48696 - }, - { - "epoch": 25.396088657105608, - "grad_norm": 1.5424458980560303, - "learning_rate": 5.157889447236181e-05, - "loss": 5.1472, - "step": 48697 - }, - { - "epoch": 25.396610169491524, - "grad_norm": 1.6343233585357666, - "learning_rate": 5.1577889447236184e-05, - "loss": 5.2846, - "step": 48698 - }, - { - "epoch": 25.397131681877443, - "grad_norm": 1.503279447555542, - "learning_rate": 5.157688442211056e-05, - "loss": 5.6018, - "step": 48699 - }, - { - "epoch": 25.397653194263363, - "grad_norm": 1.5297571420669556, - "learning_rate": 5.1575879396984926e-05, - "loss": 5.7373, - "step": 48700 - }, - { - "epoch": 25.398174706649282, - "grad_norm": 1.5696423053741455, - "learning_rate": 5.15748743718593e-05, - "loss": 5.0616, - "step": 48701 - }, - { - "epoch": 25.3986962190352, - "grad_norm": 1.5223500728607178, - "learning_rate": 5.1573869346733675e-05, - "loss": 5.3451, - "step": 48702 - }, - { - "epoch": 25.39921773142112, - "grad_norm": 1.5900804996490479, - "learning_rate": 5.157286432160804e-05, - "loss": 5.0277, - "step": 48703 - }, - { - "epoch": 25.39973924380704, - "grad_norm": 1.7568047046661377, - "learning_rate": 5.157185929648242e-05, - "loss": 4.9208, - "step": 48704 - }, - { - "epoch": 25.40026075619296, - "grad_norm": 1.6192514896392822, - "learning_rate": 5.157085427135678e-05, - "loss": 5.2456, - "step": 48705 - }, - { - "epoch": 25.40078226857888, - "grad_norm": 1.6020219326019287, - "learning_rate": 5.156984924623116e-05, - "loss": 5.1556, - "step": 48706 - }, - { - "epoch": 25.4013037809648, - "grad_norm": 1.6122126579284668, - "learning_rate": 5.156884422110553e-05, - "loss": 5.1957, - "step": 48707 - }, - { - "epoch": 25.40182529335072, - "grad_norm": 1.5939961671829224, - "learning_rate": 5.156783919597991e-05, - "loss": 5.2139, - "step": 48708 - }, - { - "epoch": 25.402346805736638, - "grad_norm": 1.623036503791809, - "learning_rate": 5.156683417085427e-05, - "loss": 5.1474, - "step": 48709 - }, - { - "epoch": 25.402868318122554, - "grad_norm": 1.6067742109298706, - "learning_rate": 5.156582914572865e-05, - "loss": 5.1348, - "step": 48710 - }, - { - "epoch": 25.403389830508473, - "grad_norm": 1.5704131126403809, - "learning_rate": 5.1564824120603015e-05, - "loss": 4.8414, - "step": 48711 - }, - { - "epoch": 25.403911342894393, - "grad_norm": 1.625951886177063, - "learning_rate": 5.156381909547739e-05, - "loss": 5.1086, - "step": 48712 - }, - { - "epoch": 25.404432855280312, - "grad_norm": 1.5978426933288574, - "learning_rate": 5.1562814070351763e-05, - "loss": 5.4079, - "step": 48713 - }, - { - "epoch": 25.40495436766623, - "grad_norm": 1.6278952360153198, - "learning_rate": 5.156180904522613e-05, - "loss": 4.9326, - "step": 48714 - }, - { - "epoch": 25.40547588005215, - "grad_norm": 1.5088653564453125, - "learning_rate": 5.1560804020100506e-05, - "loss": 5.2265, - "step": 48715 - }, - { - "epoch": 25.40599739243807, - "grad_norm": 1.7388988733291626, - "learning_rate": 5.155979899497487e-05, - "loss": 4.9494, - "step": 48716 - }, - { - "epoch": 25.40651890482399, - "grad_norm": 1.5844447612762451, - "learning_rate": 5.155879396984925e-05, - "loss": 5.5313, - "step": 48717 - }, - { - "epoch": 25.40704041720991, - "grad_norm": 1.5349947214126587, - "learning_rate": 5.155778894472362e-05, - "loss": 5.369, - "step": 48718 - }, - { - "epoch": 25.40756192959583, - "grad_norm": 1.5794901847839355, - "learning_rate": 5.1556783919597997e-05, - "loss": 4.9558, - "step": 48719 - }, - { - "epoch": 25.40808344198175, - "grad_norm": 1.6343796253204346, - "learning_rate": 5.155577889447236e-05, - "loss": 5.2357, - "step": 48720 - }, - { - "epoch": 25.408604954367668, - "grad_norm": 1.5230633020401, - "learning_rate": 5.155477386934674e-05, - "loss": 5.2536, - "step": 48721 - }, - { - "epoch": 25.409126466753584, - "grad_norm": 1.7045761346817017, - "learning_rate": 5.155376884422111e-05, - "loss": 4.9698, - "step": 48722 - }, - { - "epoch": 25.409647979139503, - "grad_norm": 1.4593881368637085, - "learning_rate": 5.155276381909549e-05, - "loss": 5.7312, - "step": 48723 - }, - { - "epoch": 25.410169491525423, - "grad_norm": 1.5296324491500854, - "learning_rate": 5.155175879396985e-05, - "loss": 5.3096, - "step": 48724 - }, - { - "epoch": 25.410691003911342, - "grad_norm": 1.571215271949768, - "learning_rate": 5.155075376884423e-05, - "loss": 4.8248, - "step": 48725 - }, - { - "epoch": 25.41121251629726, - "grad_norm": 1.53542160987854, - "learning_rate": 5.1549748743718594e-05, - "loss": 5.5126, - "step": 48726 - }, - { - "epoch": 25.41173402868318, - "grad_norm": 1.5855152606964111, - "learning_rate": 5.1548743718592965e-05, - "loss": 5.0823, - "step": 48727 - }, - { - "epoch": 25.4122555410691, - "grad_norm": 1.618701457977295, - "learning_rate": 5.154773869346734e-05, - "loss": 5.4162, - "step": 48728 - }, - { - "epoch": 25.41277705345502, - "grad_norm": 1.708299160003662, - "learning_rate": 5.154673366834171e-05, - "loss": 5.2625, - "step": 48729 - }, - { - "epoch": 25.41329856584094, - "grad_norm": 1.601278305053711, - "learning_rate": 5.1545728643216085e-05, - "loss": 4.9366, - "step": 48730 - }, - { - "epoch": 25.41382007822686, - "grad_norm": 1.4437475204467773, - "learning_rate": 5.154472361809045e-05, - "loss": 5.4525, - "step": 48731 - }, - { - "epoch": 25.41434159061278, - "grad_norm": 1.5522147417068481, - "learning_rate": 5.154371859296483e-05, - "loss": 5.3465, - "step": 48732 - }, - { - "epoch": 25.414863102998694, - "grad_norm": 1.5948935747146606, - "learning_rate": 5.15427135678392e-05, - "loss": 4.9531, - "step": 48733 - }, - { - "epoch": 25.415384615384614, - "grad_norm": 1.5641202926635742, - "learning_rate": 5.1541708542713576e-05, - "loss": 5.5731, - "step": 48734 - }, - { - "epoch": 25.415906127770533, - "grad_norm": 1.5927212238311768, - "learning_rate": 5.154070351758794e-05, - "loss": 5.2419, - "step": 48735 - }, - { - "epoch": 25.416427640156453, - "grad_norm": 1.6920965909957886, - "learning_rate": 5.153969849246232e-05, - "loss": 5.2274, - "step": 48736 - }, - { - "epoch": 25.416949152542372, - "grad_norm": 1.56526780128479, - "learning_rate": 5.153869346733668e-05, - "loss": 4.893, - "step": 48737 - }, - { - "epoch": 25.41747066492829, - "grad_norm": 1.4940389394760132, - "learning_rate": 5.153768844221106e-05, - "loss": 5.2565, - "step": 48738 - }, - { - "epoch": 25.41799217731421, - "grad_norm": 1.5806639194488525, - "learning_rate": 5.153668341708543e-05, - "loss": 4.9654, - "step": 48739 - }, - { - "epoch": 25.41851368970013, - "grad_norm": 1.5217660665512085, - "learning_rate": 5.1535678391959795e-05, - "loss": 5.2316, - "step": 48740 - }, - { - "epoch": 25.41903520208605, - "grad_norm": 1.467848777770996, - "learning_rate": 5.153467336683417e-05, - "loss": 5.6382, - "step": 48741 - }, - { - "epoch": 25.41955671447197, - "grad_norm": 1.5235515832901, - "learning_rate": 5.153366834170854e-05, - "loss": 4.828, - "step": 48742 - }, - { - "epoch": 25.42007822685789, - "grad_norm": 1.5086933374404907, - "learning_rate": 5.1532663316582915e-05, - "loss": 5.2873, - "step": 48743 - }, - { - "epoch": 25.42059973924381, - "grad_norm": 1.7409090995788574, - "learning_rate": 5.1531658291457286e-05, - "loss": 5.2056, - "step": 48744 - }, - { - "epoch": 25.421121251629724, - "grad_norm": 1.6457839012145996, - "learning_rate": 5.1530653266331664e-05, - "loss": 5.4254, - "step": 48745 - }, - { - "epoch": 25.421642764015644, - "grad_norm": 1.5704584121704102, - "learning_rate": 5.152964824120603e-05, - "loss": 5.7799, - "step": 48746 - }, - { - "epoch": 25.422164276401563, - "grad_norm": 1.5595637559890747, - "learning_rate": 5.1528643216080406e-05, - "loss": 5.1484, - "step": 48747 - }, - { - "epoch": 25.422685788787483, - "grad_norm": 1.5638841390609741, - "learning_rate": 5.152763819095478e-05, - "loss": 5.4116, - "step": 48748 - }, - { - "epoch": 25.423207301173402, - "grad_norm": 1.4581124782562256, - "learning_rate": 5.1526633165829155e-05, - "loss": 5.3129, - "step": 48749 - }, - { - "epoch": 25.423728813559322, - "grad_norm": 1.6408509016036987, - "learning_rate": 5.152562814070352e-05, - "loss": 4.6183, - "step": 48750 - }, - { - "epoch": 25.42425032594524, - "grad_norm": 1.575932502746582, - "learning_rate": 5.15246231155779e-05, - "loss": 5.1306, - "step": 48751 - }, - { - "epoch": 25.42477183833116, - "grad_norm": 1.6156325340270996, - "learning_rate": 5.152361809045226e-05, - "loss": 5.7384, - "step": 48752 - }, - { - "epoch": 25.42529335071708, - "grad_norm": 1.6475096940994263, - "learning_rate": 5.152261306532663e-05, - "loss": 5.2152, - "step": 48753 - }, - { - "epoch": 25.425814863103, - "grad_norm": 1.6150480508804321, - "learning_rate": 5.152160804020101e-05, - "loss": 4.8916, - "step": 48754 - }, - { - "epoch": 25.42633637548892, - "grad_norm": 1.597909927368164, - "learning_rate": 5.1520603015075375e-05, - "loss": 5.1233, - "step": 48755 - }, - { - "epoch": 25.42685788787484, - "grad_norm": 1.6395503282546997, - "learning_rate": 5.151959798994975e-05, - "loss": 5.1619, - "step": 48756 - }, - { - "epoch": 25.427379400260754, - "grad_norm": 1.6429308652877808, - "learning_rate": 5.151859296482412e-05, - "loss": 4.9548, - "step": 48757 - }, - { - "epoch": 25.427900912646674, - "grad_norm": 1.670779824256897, - "learning_rate": 5.1517587939698495e-05, - "loss": 5.3706, - "step": 48758 - }, - { - "epoch": 25.428422425032593, - "grad_norm": 1.5284515619277954, - "learning_rate": 5.1516582914572866e-05, - "loss": 5.3276, - "step": 48759 - }, - { - "epoch": 25.428943937418513, - "grad_norm": 1.5489469766616821, - "learning_rate": 5.1515577889447244e-05, - "loss": 5.6399, - "step": 48760 - }, - { - "epoch": 25.429465449804432, - "grad_norm": 1.5555830001831055, - "learning_rate": 5.151457286432161e-05, - "loss": 5.4327, - "step": 48761 - }, - { - "epoch": 25.429986962190352, - "grad_norm": 1.5341514348983765, - "learning_rate": 5.1513567839195986e-05, - "loss": 5.1033, - "step": 48762 - }, - { - "epoch": 25.43050847457627, - "grad_norm": 1.5374161005020142, - "learning_rate": 5.151256281407035e-05, - "loss": 5.1961, - "step": 48763 - }, - { - "epoch": 25.43102998696219, - "grad_norm": 1.485835075378418, - "learning_rate": 5.151155778894473e-05, - "loss": 5.2652, - "step": 48764 - }, - { - "epoch": 25.43155149934811, - "grad_norm": 1.712064504623413, - "learning_rate": 5.15105527638191e-05, - "loss": 4.7791, - "step": 48765 - }, - { - "epoch": 25.43207301173403, - "grad_norm": 1.6681610345840454, - "learning_rate": 5.150954773869346e-05, - "loss": 5.7112, - "step": 48766 - }, - { - "epoch": 25.43259452411995, - "grad_norm": 1.5463600158691406, - "learning_rate": 5.150854271356784e-05, - "loss": 5.3073, - "step": 48767 - }, - { - "epoch": 25.43311603650587, - "grad_norm": 1.6510193347930908, - "learning_rate": 5.150753768844221e-05, - "loss": 5.0367, - "step": 48768 - }, - { - "epoch": 25.433637548891785, - "grad_norm": 1.556320309638977, - "learning_rate": 5.150653266331659e-05, - "loss": 5.2327, - "step": 48769 - }, - { - "epoch": 25.434159061277704, - "grad_norm": 1.535349726676941, - "learning_rate": 5.1505527638190954e-05, - "loss": 4.8515, - "step": 48770 - }, - { - "epoch": 25.434680573663623, - "grad_norm": 1.8054602146148682, - "learning_rate": 5.150452261306533e-05, - "loss": 5.0524, - "step": 48771 - }, - { - "epoch": 25.435202086049543, - "grad_norm": 1.5073297023773193, - "learning_rate": 5.1503517587939696e-05, - "loss": 5.1606, - "step": 48772 - }, - { - "epoch": 25.435723598435462, - "grad_norm": 1.5193510055541992, - "learning_rate": 5.1502512562814074e-05, - "loss": 5.6494, - "step": 48773 - }, - { - "epoch": 25.436245110821382, - "grad_norm": 1.5200436115264893, - "learning_rate": 5.1501507537688445e-05, - "loss": 5.6113, - "step": 48774 - }, - { - "epoch": 25.4367666232073, - "grad_norm": 1.5623139142990112, - "learning_rate": 5.150050251256282e-05, - "loss": 5.0449, - "step": 48775 - }, - { - "epoch": 25.43728813559322, - "grad_norm": 1.5080626010894775, - "learning_rate": 5.149949748743719e-05, - "loss": 5.5637, - "step": 48776 - }, - { - "epoch": 25.43780964797914, - "grad_norm": 1.57700514793396, - "learning_rate": 5.1498492462311565e-05, - "loss": 4.6756, - "step": 48777 - }, - { - "epoch": 25.43833116036506, - "grad_norm": 1.535646915435791, - "learning_rate": 5.149748743718593e-05, - "loss": 5.5462, - "step": 48778 - }, - { - "epoch": 25.43885267275098, - "grad_norm": 1.5468218326568604, - "learning_rate": 5.14964824120603e-05, - "loss": 5.6102, - "step": 48779 - }, - { - "epoch": 25.4393741851369, - "grad_norm": 1.6353203058242798, - "learning_rate": 5.149547738693468e-05, - "loss": 4.8514, - "step": 48780 - }, - { - "epoch": 25.439895697522815, - "grad_norm": 1.5476833581924438, - "learning_rate": 5.149447236180904e-05, - "loss": 5.2612, - "step": 48781 - }, - { - "epoch": 25.440417209908734, - "grad_norm": 1.535524606704712, - "learning_rate": 5.149346733668342e-05, - "loss": 5.2928, - "step": 48782 - }, - { - "epoch": 25.440938722294653, - "grad_norm": 1.5223387479782104, - "learning_rate": 5.1492462311557784e-05, - "loss": 5.4803, - "step": 48783 - }, - { - "epoch": 25.441460234680573, - "grad_norm": 1.6258329153060913, - "learning_rate": 5.149145728643216e-05, - "loss": 5.09, - "step": 48784 - }, - { - "epoch": 25.441981747066492, - "grad_norm": 1.6516965627670288, - "learning_rate": 5.149045226130653e-05, - "loss": 5.4866, - "step": 48785 - }, - { - "epoch": 25.442503259452412, - "grad_norm": 1.5564312934875488, - "learning_rate": 5.148944723618091e-05, - "loss": 5.2589, - "step": 48786 - }, - { - "epoch": 25.44302477183833, - "grad_norm": 1.6062028408050537, - "learning_rate": 5.1488442211055275e-05, - "loss": 5.1347, - "step": 48787 - }, - { - "epoch": 25.44354628422425, - "grad_norm": 1.5328680276870728, - "learning_rate": 5.148743718592965e-05, - "loss": 4.8408, - "step": 48788 - }, - { - "epoch": 25.44406779661017, - "grad_norm": 1.5672683715820312, - "learning_rate": 5.1486432160804024e-05, - "loss": 5.0368, - "step": 48789 - }, - { - "epoch": 25.44458930899609, - "grad_norm": 1.568207859992981, - "learning_rate": 5.14854271356784e-05, - "loss": 5.3468, - "step": 48790 - }, - { - "epoch": 25.44511082138201, - "grad_norm": 1.531510829925537, - "learning_rate": 5.1484422110552766e-05, - "loss": 5.5942, - "step": 48791 - }, - { - "epoch": 25.44563233376793, - "grad_norm": 1.593832015991211, - "learning_rate": 5.1483417085427144e-05, - "loss": 5.2321, - "step": 48792 - }, - { - "epoch": 25.446153846153845, - "grad_norm": 1.5372546911239624, - "learning_rate": 5.148241206030151e-05, - "loss": 4.9021, - "step": 48793 - }, - { - "epoch": 25.446675358539764, - "grad_norm": 1.5231353044509888, - "learning_rate": 5.148140703517588e-05, - "loss": 5.4177, - "step": 48794 - }, - { - "epoch": 25.447196870925683, - "grad_norm": 1.7047935724258423, - "learning_rate": 5.148040201005026e-05, - "loss": 5.1001, - "step": 48795 - }, - { - "epoch": 25.447718383311603, - "grad_norm": 1.6195977926254272, - "learning_rate": 5.147939698492462e-05, - "loss": 4.7889, - "step": 48796 - }, - { - "epoch": 25.448239895697522, - "grad_norm": 1.5864826440811157, - "learning_rate": 5.1478391959799e-05, - "loss": 5.2971, - "step": 48797 - }, - { - "epoch": 25.448761408083442, - "grad_norm": 1.6397333145141602, - "learning_rate": 5.1477386934673364e-05, - "loss": 5.5575, - "step": 48798 - }, - { - "epoch": 25.44928292046936, - "grad_norm": 1.5663326978683472, - "learning_rate": 5.147638190954774e-05, - "loss": 5.3391, - "step": 48799 - }, - { - "epoch": 25.44980443285528, - "grad_norm": 1.5343658924102783, - "learning_rate": 5.147537688442211e-05, - "loss": 5.3795, - "step": 48800 - }, - { - "epoch": 25.4503259452412, - "grad_norm": 1.7041765451431274, - "learning_rate": 5.147437185929649e-05, - "loss": 4.8936, - "step": 48801 - }, - { - "epoch": 25.45084745762712, - "grad_norm": 1.674311876296997, - "learning_rate": 5.1473366834170855e-05, - "loss": 5.0903, - "step": 48802 - }, - { - "epoch": 25.45136897001304, - "grad_norm": 1.5441570281982422, - "learning_rate": 5.147236180904523e-05, - "loss": 5.112, - "step": 48803 - }, - { - "epoch": 25.45189048239896, - "grad_norm": 1.6969025135040283, - "learning_rate": 5.14713567839196e-05, - "loss": 4.6042, - "step": 48804 - }, - { - "epoch": 25.452411994784875, - "grad_norm": 1.6273289918899536, - "learning_rate": 5.1470351758793975e-05, - "loss": 5.0513, - "step": 48805 - }, - { - "epoch": 25.452933507170794, - "grad_norm": 1.5250355005264282, - "learning_rate": 5.1469346733668346e-05, - "loss": 5.3076, - "step": 48806 - }, - { - "epoch": 25.453455019556714, - "grad_norm": 1.5515892505645752, - "learning_rate": 5.146834170854271e-05, - "loss": 5.4651, - "step": 48807 - }, - { - "epoch": 25.453976531942633, - "grad_norm": 1.5210728645324707, - "learning_rate": 5.146733668341709e-05, - "loss": 5.1738, - "step": 48808 - }, - { - "epoch": 25.454498044328552, - "grad_norm": 1.7820396423339844, - "learning_rate": 5.146633165829146e-05, - "loss": 5.1028, - "step": 48809 - }, - { - "epoch": 25.455019556714472, - "grad_norm": 1.7052854299545288, - "learning_rate": 5.146532663316584e-05, - "loss": 4.5929, - "step": 48810 - }, - { - "epoch": 25.45554106910039, - "grad_norm": 1.5416651964187622, - "learning_rate": 5.14643216080402e-05, - "loss": 4.1768, - "step": 48811 - }, - { - "epoch": 25.45606258148631, - "grad_norm": 1.703073501586914, - "learning_rate": 5.146331658291458e-05, - "loss": 5.0261, - "step": 48812 - }, - { - "epoch": 25.45658409387223, - "grad_norm": 1.718988060951233, - "learning_rate": 5.146231155778894e-05, - "loss": 5.4606, - "step": 48813 - }, - { - "epoch": 25.45710560625815, - "grad_norm": 1.723810076713562, - "learning_rate": 5.146130653266332e-05, - "loss": 5.2196, - "step": 48814 - }, - { - "epoch": 25.45762711864407, - "grad_norm": 1.5199912786483765, - "learning_rate": 5.146030150753769e-05, - "loss": 5.2068, - "step": 48815 - }, - { - "epoch": 25.45814863102999, - "grad_norm": 1.5404404401779175, - "learning_rate": 5.145929648241207e-05, - "loss": 5.2157, - "step": 48816 - }, - { - "epoch": 25.458670143415905, - "grad_norm": 1.4743837118148804, - "learning_rate": 5.1458291457286434e-05, - "loss": 5.4207, - "step": 48817 - }, - { - "epoch": 25.459191655801824, - "grad_norm": 1.652788519859314, - "learning_rate": 5.145728643216081e-05, - "loss": 4.7403, - "step": 48818 - }, - { - "epoch": 25.459713168187744, - "grad_norm": 1.6875842809677124, - "learning_rate": 5.1456281407035176e-05, - "loss": 4.6103, - "step": 48819 - }, - { - "epoch": 25.460234680573663, - "grad_norm": 1.47905695438385, - "learning_rate": 5.145527638190955e-05, - "loss": 5.6414, - "step": 48820 - }, - { - "epoch": 25.460756192959582, - "grad_norm": 1.5418161153793335, - "learning_rate": 5.1454271356783925e-05, - "loss": 5.4784, - "step": 48821 - }, - { - "epoch": 25.461277705345502, - "grad_norm": 1.518918514251709, - "learning_rate": 5.145326633165829e-05, - "loss": 5.5744, - "step": 48822 - }, - { - "epoch": 25.46179921773142, - "grad_norm": 1.6102043390274048, - "learning_rate": 5.145226130653267e-05, - "loss": 4.8869, - "step": 48823 - }, - { - "epoch": 25.46232073011734, - "grad_norm": 1.6451234817504883, - "learning_rate": 5.145125628140703e-05, - "loss": 5.0642, - "step": 48824 - }, - { - "epoch": 25.46284224250326, - "grad_norm": 1.584173560142517, - "learning_rate": 5.145025125628141e-05, - "loss": 5.187, - "step": 48825 - }, - { - "epoch": 25.46336375488918, - "grad_norm": 1.5857398509979248, - "learning_rate": 5.144924623115578e-05, - "loss": 5.4063, - "step": 48826 - }, - { - "epoch": 25.4638852672751, - "grad_norm": 1.4840774536132812, - "learning_rate": 5.144824120603016e-05, - "loss": 5.5332, - "step": 48827 - }, - { - "epoch": 25.46440677966102, - "grad_norm": 1.5834085941314697, - "learning_rate": 5.144723618090452e-05, - "loss": 5.0523, - "step": 48828 - }, - { - "epoch": 25.464928292046935, - "grad_norm": 1.6352564096450806, - "learning_rate": 5.14462311557789e-05, - "loss": 5.0238, - "step": 48829 - }, - { - "epoch": 25.465449804432854, - "grad_norm": 1.6033756732940674, - "learning_rate": 5.1445226130653265e-05, - "loss": 4.2107, - "step": 48830 - }, - { - "epoch": 25.465971316818774, - "grad_norm": 1.5955867767333984, - "learning_rate": 5.144422110552764e-05, - "loss": 4.9774, - "step": 48831 - }, - { - "epoch": 25.466492829204693, - "grad_norm": 1.5744930505752563, - "learning_rate": 5.1443216080402013e-05, - "loss": 5.0291, - "step": 48832 - }, - { - "epoch": 25.467014341590613, - "grad_norm": 1.5444729328155518, - "learning_rate": 5.144221105527638e-05, - "loss": 5.4331, - "step": 48833 - }, - { - "epoch": 25.467535853976532, - "grad_norm": 1.5675379037857056, - "learning_rate": 5.1441206030150756e-05, - "loss": 5.2658, - "step": 48834 - }, - { - "epoch": 25.46805736636245, - "grad_norm": 1.5810344219207764, - "learning_rate": 5.1440201005025127e-05, - "loss": 4.6379, - "step": 48835 - }, - { - "epoch": 25.46857887874837, - "grad_norm": 1.7068331241607666, - "learning_rate": 5.1439195979899504e-05, - "loss": 4.8198, - "step": 48836 - }, - { - "epoch": 25.46910039113429, - "grad_norm": 1.5384563207626343, - "learning_rate": 5.143819095477387e-05, - "loss": 5.3832, - "step": 48837 - }, - { - "epoch": 25.46962190352021, - "grad_norm": 1.6042972803115845, - "learning_rate": 5.1437185929648247e-05, - "loss": 5.042, - "step": 48838 - }, - { - "epoch": 25.47014341590613, - "grad_norm": 1.580981969833374, - "learning_rate": 5.143618090452261e-05, - "loss": 5.1976, - "step": 48839 - }, - { - "epoch": 25.470664928292045, - "grad_norm": 1.6592692136764526, - "learning_rate": 5.143517587939699e-05, - "loss": 5.0191, - "step": 48840 - }, - { - "epoch": 25.471186440677965, - "grad_norm": 1.6729260683059692, - "learning_rate": 5.143417085427136e-05, - "loss": 4.86, - "step": 48841 - }, - { - "epoch": 25.471707953063884, - "grad_norm": 1.6502329111099243, - "learning_rate": 5.143316582914574e-05, - "loss": 5.5569, - "step": 48842 - }, - { - "epoch": 25.472229465449804, - "grad_norm": 1.5887800455093384, - "learning_rate": 5.14321608040201e-05, - "loss": 5.4833, - "step": 48843 - }, - { - "epoch": 25.472750977835723, - "grad_norm": 1.618137240409851, - "learning_rate": 5.143115577889448e-05, - "loss": 5.0205, - "step": 48844 - }, - { - "epoch": 25.473272490221643, - "grad_norm": 1.6214656829833984, - "learning_rate": 5.1430150753768844e-05, - "loss": 4.6935, - "step": 48845 - }, - { - "epoch": 25.473794002607562, - "grad_norm": 1.5549969673156738, - "learning_rate": 5.1429145728643215e-05, - "loss": 5.1297, - "step": 48846 - }, - { - "epoch": 25.47431551499348, - "grad_norm": 1.525326132774353, - "learning_rate": 5.142814070351759e-05, - "loss": 5.1739, - "step": 48847 - }, - { - "epoch": 25.4748370273794, - "grad_norm": 1.5185112953186035, - "learning_rate": 5.142713567839196e-05, - "loss": 4.9045, - "step": 48848 - }, - { - "epoch": 25.47535853976532, - "grad_norm": 1.5881303548812866, - "learning_rate": 5.1426130653266335e-05, - "loss": 5.0601, - "step": 48849 - }, - { - "epoch": 25.47588005215124, - "grad_norm": 1.5866230726242065, - "learning_rate": 5.14251256281407e-05, - "loss": 4.9786, - "step": 48850 - }, - { - "epoch": 25.47640156453716, - "grad_norm": 1.5859715938568115, - "learning_rate": 5.142412060301508e-05, - "loss": 4.9209, - "step": 48851 - }, - { - "epoch": 25.476923076923075, - "grad_norm": 1.5441240072250366, - "learning_rate": 5.142311557788945e-05, - "loss": 4.9609, - "step": 48852 - }, - { - "epoch": 25.477444589308995, - "grad_norm": 1.5736846923828125, - "learning_rate": 5.1422110552763826e-05, - "loss": 5.4321, - "step": 48853 - }, - { - "epoch": 25.477966101694914, - "grad_norm": 1.6335018873214722, - "learning_rate": 5.142110552763819e-05, - "loss": 4.9046, - "step": 48854 - }, - { - "epoch": 25.478487614080834, - "grad_norm": 1.6391160488128662, - "learning_rate": 5.142010050251257e-05, - "loss": 4.9707, - "step": 48855 - }, - { - "epoch": 25.479009126466753, - "grad_norm": 1.573941707611084, - "learning_rate": 5.141909547738694e-05, - "loss": 5.1651, - "step": 48856 - }, - { - "epoch": 25.479530638852673, - "grad_norm": 1.5641443729400635, - "learning_rate": 5.141809045226132e-05, - "loss": 4.8327, - "step": 48857 - }, - { - "epoch": 25.480052151238592, - "grad_norm": 1.561559796333313, - "learning_rate": 5.141708542713568e-05, - "loss": 5.1975, - "step": 48858 - }, - { - "epoch": 25.48057366362451, - "grad_norm": 1.516135573387146, - "learning_rate": 5.1416080402010045e-05, - "loss": 5.1197, - "step": 48859 - }, - { - "epoch": 25.48109517601043, - "grad_norm": 1.566941261291504, - "learning_rate": 5.141507537688442e-05, - "loss": 5.0972, - "step": 48860 - }, - { - "epoch": 25.48161668839635, - "grad_norm": 1.4774816036224365, - "learning_rate": 5.1414070351758794e-05, - "loss": 5.2628, - "step": 48861 - }, - { - "epoch": 25.48213820078227, - "grad_norm": 1.5867723226547241, - "learning_rate": 5.141306532663317e-05, - "loss": 5.0021, - "step": 48862 - }, - { - "epoch": 25.48265971316819, - "grad_norm": 1.6100220680236816, - "learning_rate": 5.1412060301507536e-05, - "loss": 5.3488, - "step": 48863 - }, - { - "epoch": 25.483181225554105, - "grad_norm": 1.53816556930542, - "learning_rate": 5.1411055276381914e-05, - "loss": 5.724, - "step": 48864 - }, - { - "epoch": 25.483702737940025, - "grad_norm": 1.6329442262649536, - "learning_rate": 5.141005025125628e-05, - "loss": 4.896, - "step": 48865 - }, - { - "epoch": 25.484224250325944, - "grad_norm": 1.5224145650863647, - "learning_rate": 5.1409045226130656e-05, - "loss": 5.4182, - "step": 48866 - }, - { - "epoch": 25.484745762711864, - "grad_norm": 1.5675692558288574, - "learning_rate": 5.140804020100503e-05, - "loss": 5.4212, - "step": 48867 - }, - { - "epoch": 25.485267275097783, - "grad_norm": 1.588382601737976, - "learning_rate": 5.1407035175879405e-05, - "loss": 4.6441, - "step": 48868 - }, - { - "epoch": 25.485788787483703, - "grad_norm": 1.6388295888900757, - "learning_rate": 5.140603015075377e-05, - "loss": 5.2317, - "step": 48869 - }, - { - "epoch": 25.486310299869622, - "grad_norm": 1.5081381797790527, - "learning_rate": 5.140502512562815e-05, - "loss": 5.5675, - "step": 48870 - }, - { - "epoch": 25.48683181225554, - "grad_norm": 1.542486548423767, - "learning_rate": 5.140402010050251e-05, - "loss": 5.2376, - "step": 48871 - }, - { - "epoch": 25.48735332464146, - "grad_norm": 1.683075189590454, - "learning_rate": 5.140301507537689e-05, - "loss": 5.0428, - "step": 48872 - }, - { - "epoch": 25.48787483702738, - "grad_norm": 1.5263593196868896, - "learning_rate": 5.140201005025126e-05, - "loss": 4.9742, - "step": 48873 - }, - { - "epoch": 25.4883963494133, - "grad_norm": 1.5141197443008423, - "learning_rate": 5.1401005025125625e-05, - "loss": 4.8841, - "step": 48874 - }, - { - "epoch": 25.48891786179922, - "grad_norm": 1.7149105072021484, - "learning_rate": 5.14e-05, - "loss": 4.5746, - "step": 48875 - }, - { - "epoch": 25.489439374185135, - "grad_norm": 1.5876408815383911, - "learning_rate": 5.1398994974874374e-05, - "loss": 5.0164, - "step": 48876 - }, - { - "epoch": 25.489960886571055, - "grad_norm": 1.4524147510528564, - "learning_rate": 5.139798994974875e-05, - "loss": 5.1354, - "step": 48877 - }, - { - "epoch": 25.490482398956974, - "grad_norm": 1.5720962285995483, - "learning_rate": 5.1396984924623116e-05, - "loss": 5.2437, - "step": 48878 - }, - { - "epoch": 25.491003911342894, - "grad_norm": 1.5880882740020752, - "learning_rate": 5.1395979899497494e-05, - "loss": 5.0448, - "step": 48879 - }, - { - "epoch": 25.491525423728813, - "grad_norm": 1.5590746402740479, - "learning_rate": 5.139497487437186e-05, - "loss": 5.35, - "step": 48880 - }, - { - "epoch": 25.492046936114733, - "grad_norm": 1.5714714527130127, - "learning_rate": 5.1393969849246236e-05, - "loss": 5.4067, - "step": 48881 - }, - { - "epoch": 25.492568448500652, - "grad_norm": 1.4915602207183838, - "learning_rate": 5.139296482412061e-05, - "loss": 5.4174, - "step": 48882 - }, - { - "epoch": 25.49308996088657, - "grad_norm": 1.5792478322982788, - "learning_rate": 5.1391959798994985e-05, - "loss": 5.1946, - "step": 48883 - }, - { - "epoch": 25.49361147327249, - "grad_norm": 1.5718097686767578, - "learning_rate": 5.139095477386935e-05, - "loss": 5.1431, - "step": 48884 - }, - { - "epoch": 25.49413298565841, - "grad_norm": 1.6049352884292603, - "learning_rate": 5.138994974874373e-05, - "loss": 5.1031, - "step": 48885 - }, - { - "epoch": 25.49465449804433, - "grad_norm": 1.562516212463379, - "learning_rate": 5.138894472361809e-05, - "loss": 5.2154, - "step": 48886 - }, - { - "epoch": 25.49517601043025, - "grad_norm": 1.6091045141220093, - "learning_rate": 5.138793969849246e-05, - "loss": 4.7574, - "step": 48887 - }, - { - "epoch": 25.495697522816165, - "grad_norm": 1.6490730047225952, - "learning_rate": 5.138693467336684e-05, - "loss": 5.3149, - "step": 48888 - }, - { - "epoch": 25.496219035202085, - "grad_norm": 1.4985495805740356, - "learning_rate": 5.1385929648241204e-05, - "loss": 5.5153, - "step": 48889 - }, - { - "epoch": 25.496740547588004, - "grad_norm": 1.5908024311065674, - "learning_rate": 5.138492462311558e-05, - "loss": 5.3134, - "step": 48890 - }, - { - "epoch": 25.497262059973924, - "grad_norm": 1.5000015497207642, - "learning_rate": 5.1383919597989946e-05, - "loss": 4.9861, - "step": 48891 - }, - { - "epoch": 25.497783572359843, - "grad_norm": 1.577677607536316, - "learning_rate": 5.1382914572864324e-05, - "loss": 5.1195, - "step": 48892 - }, - { - "epoch": 25.498305084745763, - "grad_norm": 1.6747487783432007, - "learning_rate": 5.1381909547738695e-05, - "loss": 5.0455, - "step": 48893 - }, - { - "epoch": 25.498826597131682, - "grad_norm": 1.5834234952926636, - "learning_rate": 5.138090452261307e-05, - "loss": 5.1386, - "step": 48894 - }, - { - "epoch": 25.4993481095176, - "grad_norm": 1.6298543214797974, - "learning_rate": 5.137989949748744e-05, - "loss": 5.1563, - "step": 48895 - }, - { - "epoch": 25.49986962190352, - "grad_norm": 1.681318998336792, - "learning_rate": 5.1378894472361815e-05, - "loss": 5.2451, - "step": 48896 - }, - { - "epoch": 25.50039113428944, - "grad_norm": 1.511142373085022, - "learning_rate": 5.1377889447236186e-05, - "loss": 5.0406, - "step": 48897 - }, - { - "epoch": 25.50091264667536, - "grad_norm": 1.4955912828445435, - "learning_rate": 5.1376884422110564e-05, - "loss": 4.6995, - "step": 48898 - }, - { - "epoch": 25.50143415906128, - "grad_norm": 1.5270886421203613, - "learning_rate": 5.137587939698493e-05, - "loss": 5.1926, - "step": 48899 - }, - { - "epoch": 25.501955671447195, - "grad_norm": 1.5147688388824463, - "learning_rate": 5.137487437185929e-05, - "loss": 5.3912, - "step": 48900 - }, - { - "epoch": 25.502477183833115, - "grad_norm": 1.709395408630371, - "learning_rate": 5.137386934673367e-05, - "loss": 4.8436, - "step": 48901 - }, - { - "epoch": 25.502998696219034, - "grad_norm": 1.6178325414657593, - "learning_rate": 5.137286432160804e-05, - "loss": 5.4644, - "step": 48902 - }, - { - "epoch": 25.503520208604954, - "grad_norm": 1.632393717765808, - "learning_rate": 5.137185929648242e-05, - "loss": 5.1787, - "step": 48903 - }, - { - "epoch": 25.504041720990873, - "grad_norm": 1.6363539695739746, - "learning_rate": 5.137085427135678e-05, - "loss": 5.6988, - "step": 48904 - }, - { - "epoch": 25.504563233376793, - "grad_norm": 1.653057336807251, - "learning_rate": 5.136984924623116e-05, - "loss": 5.354, - "step": 48905 - }, - { - "epoch": 25.505084745762712, - "grad_norm": 1.5960164070129395, - "learning_rate": 5.1368844221105525e-05, - "loss": 5.2744, - "step": 48906 - }, - { - "epoch": 25.50560625814863, - "grad_norm": 1.5465582609176636, - "learning_rate": 5.13678391959799e-05, - "loss": 5.8128, - "step": 48907 - }, - { - "epoch": 25.50612777053455, - "grad_norm": 1.63881254196167, - "learning_rate": 5.1366834170854274e-05, - "loss": 4.86, - "step": 48908 - }, - { - "epoch": 25.50664928292047, - "grad_norm": 1.6568875312805176, - "learning_rate": 5.136582914572865e-05, - "loss": 4.8478, - "step": 48909 - }, - { - "epoch": 25.50717079530639, - "grad_norm": 1.5548490285873413, - "learning_rate": 5.1364824120603016e-05, - "loss": 5.1717, - "step": 48910 - }, - { - "epoch": 25.50769230769231, - "grad_norm": 1.6086336374282837, - "learning_rate": 5.1363819095477394e-05, - "loss": 4.8986, - "step": 48911 - }, - { - "epoch": 25.508213820078225, - "grad_norm": 1.631535291671753, - "learning_rate": 5.136281407035176e-05, - "loss": 5.3861, - "step": 48912 - }, - { - "epoch": 25.508735332464145, - "grad_norm": 1.6927114725112915, - "learning_rate": 5.136180904522613e-05, - "loss": 5.2206, - "step": 48913 - }, - { - "epoch": 25.509256844850064, - "grad_norm": 1.5992997884750366, - "learning_rate": 5.136080402010051e-05, - "loss": 5.4473, - "step": 48914 - }, - { - "epoch": 25.509778357235984, - "grad_norm": 1.5967971086502075, - "learning_rate": 5.135979899497487e-05, - "loss": 4.9, - "step": 48915 - }, - { - "epoch": 25.510299869621903, - "grad_norm": 1.7133796215057373, - "learning_rate": 5.135879396984925e-05, - "loss": 5.0104, - "step": 48916 - }, - { - "epoch": 25.510821382007823, - "grad_norm": 1.5564383268356323, - "learning_rate": 5.1357788944723614e-05, - "loss": 5.3011, - "step": 48917 - }, - { - "epoch": 25.511342894393742, - "grad_norm": 1.6722098588943481, - "learning_rate": 5.135678391959799e-05, - "loss": 4.6525, - "step": 48918 - }, - { - "epoch": 25.51186440677966, - "grad_norm": 1.6176867485046387, - "learning_rate": 5.135577889447236e-05, - "loss": 4.7227, - "step": 48919 - }, - { - "epoch": 25.51238591916558, - "grad_norm": 1.4190634489059448, - "learning_rate": 5.135477386934674e-05, - "loss": 5.121, - "step": 48920 - }, - { - "epoch": 25.5129074315515, - "grad_norm": 1.57921302318573, - "learning_rate": 5.1353768844221105e-05, - "loss": 4.9851, - "step": 48921 - }, - { - "epoch": 25.51342894393742, - "grad_norm": 1.644585371017456, - "learning_rate": 5.135276381909548e-05, - "loss": 5.2725, - "step": 48922 - }, - { - "epoch": 25.513950456323336, - "grad_norm": 1.5846506357192993, - "learning_rate": 5.1351758793969854e-05, - "loss": 5.2033, - "step": 48923 - }, - { - "epoch": 25.514471968709255, - "grad_norm": 1.4948312044143677, - "learning_rate": 5.135075376884423e-05, - "loss": 5.336, - "step": 48924 - }, - { - "epoch": 25.514993481095175, - "grad_norm": 1.679413914680481, - "learning_rate": 5.1349748743718596e-05, - "loss": 5.0964, - "step": 48925 - }, - { - "epoch": 25.515514993481094, - "grad_norm": 1.7198975086212158, - "learning_rate": 5.134874371859296e-05, - "loss": 5.0451, - "step": 48926 - }, - { - "epoch": 25.516036505867014, - "grad_norm": 1.4578702449798584, - "learning_rate": 5.134773869346734e-05, - "loss": 5.3018, - "step": 48927 - }, - { - "epoch": 25.516558018252933, - "grad_norm": 1.6058918237686157, - "learning_rate": 5.134673366834171e-05, - "loss": 5.5306, - "step": 48928 - }, - { - "epoch": 25.517079530638853, - "grad_norm": 1.4843854904174805, - "learning_rate": 5.134572864321609e-05, - "loss": 5.2767, - "step": 48929 - }, - { - "epoch": 25.517601043024772, - "grad_norm": 1.6361196041107178, - "learning_rate": 5.134472361809045e-05, - "loss": 4.9591, - "step": 48930 - }, - { - "epoch": 25.51812255541069, - "grad_norm": 1.5310431718826294, - "learning_rate": 5.134371859296483e-05, - "loss": 5.4363, - "step": 48931 - }, - { - "epoch": 25.51864406779661, - "grad_norm": 1.5858047008514404, - "learning_rate": 5.134271356783919e-05, - "loss": 5.3242, - "step": 48932 - }, - { - "epoch": 25.51916558018253, - "grad_norm": 1.4427858591079712, - "learning_rate": 5.134170854271357e-05, - "loss": 4.874, - "step": 48933 - }, - { - "epoch": 25.51968709256845, - "grad_norm": 1.6017920970916748, - "learning_rate": 5.134070351758794e-05, - "loss": 5.4163, - "step": 48934 - }, - { - "epoch": 25.52020860495437, - "grad_norm": 1.5858510732650757, - "learning_rate": 5.133969849246232e-05, - "loss": 5.0746, - "step": 48935 - }, - { - "epoch": 25.520730117340285, - "grad_norm": 1.6703598499298096, - "learning_rate": 5.1338693467336684e-05, - "loss": 5.26, - "step": 48936 - }, - { - "epoch": 25.521251629726205, - "grad_norm": 1.5491387844085693, - "learning_rate": 5.133768844221106e-05, - "loss": 4.7892, - "step": 48937 - }, - { - "epoch": 25.521773142112124, - "grad_norm": 1.5607072114944458, - "learning_rate": 5.1336683417085426e-05, - "loss": 5.6935, - "step": 48938 - }, - { - "epoch": 25.522294654498044, - "grad_norm": 1.698013424873352, - "learning_rate": 5.13356783919598e-05, - "loss": 5.3044, - "step": 48939 - }, - { - "epoch": 25.522816166883963, - "grad_norm": 1.5756579637527466, - "learning_rate": 5.1334673366834175e-05, - "loss": 5.1928, - "step": 48940 - }, - { - "epoch": 25.523337679269883, - "grad_norm": 1.5580360889434814, - "learning_rate": 5.133366834170854e-05, - "loss": 5.1655, - "step": 48941 - }, - { - "epoch": 25.523859191655802, - "grad_norm": 1.5623502731323242, - "learning_rate": 5.133266331658292e-05, - "loss": 5.0914, - "step": 48942 - }, - { - "epoch": 25.52438070404172, - "grad_norm": 1.6716681718826294, - "learning_rate": 5.133165829145729e-05, - "loss": 4.9984, - "step": 48943 - }, - { - "epoch": 25.52490221642764, - "grad_norm": 1.5713835954666138, - "learning_rate": 5.1330653266331666e-05, - "loss": 5.2237, - "step": 48944 - }, - { - "epoch": 25.52542372881356, - "grad_norm": 1.6077550649642944, - "learning_rate": 5.132964824120603e-05, - "loss": 5.1262, - "step": 48945 - }, - { - "epoch": 25.52594524119948, - "grad_norm": 1.474477767944336, - "learning_rate": 5.132864321608041e-05, - "loss": 5.0913, - "step": 48946 - }, - { - "epoch": 25.526466753585396, - "grad_norm": 1.5703102350234985, - "learning_rate": 5.132763819095477e-05, - "loss": 5.2446, - "step": 48947 - }, - { - "epoch": 25.526988265971315, - "grad_norm": 1.5775409936904907, - "learning_rate": 5.132663316582915e-05, - "loss": 5.0805, - "step": 48948 - }, - { - "epoch": 25.527509778357235, - "grad_norm": 1.5041606426239014, - "learning_rate": 5.132562814070352e-05, - "loss": 5.6556, - "step": 48949 - }, - { - "epoch": 25.528031290743154, - "grad_norm": 1.6099536418914795, - "learning_rate": 5.13246231155779e-05, - "loss": 5.1549, - "step": 48950 - }, - { - "epoch": 25.528552803129074, - "grad_norm": 1.5015314817428589, - "learning_rate": 5.1323618090452263e-05, - "loss": 5.4397, - "step": 48951 - }, - { - "epoch": 25.529074315514993, - "grad_norm": 1.7202929258346558, - "learning_rate": 5.132261306532663e-05, - "loss": 4.9111, - "step": 48952 - }, - { - "epoch": 25.529595827900913, - "grad_norm": 1.563065528869629, - "learning_rate": 5.1321608040201006e-05, - "loss": 5.0855, - "step": 48953 - }, - { - "epoch": 25.530117340286832, - "grad_norm": 1.7508015632629395, - "learning_rate": 5.132060301507538e-05, - "loss": 5.2483, - "step": 48954 - }, - { - "epoch": 25.53063885267275, - "grad_norm": 1.4619090557098389, - "learning_rate": 5.1319597989949754e-05, - "loss": 5.3557, - "step": 48955 - }, - { - "epoch": 25.53116036505867, - "grad_norm": 1.5642729997634888, - "learning_rate": 5.131859296482412e-05, - "loss": 5.4751, - "step": 48956 - }, - { - "epoch": 25.53168187744459, - "grad_norm": 1.4821125268936157, - "learning_rate": 5.1317587939698497e-05, - "loss": 5.8119, - "step": 48957 - }, - { - "epoch": 25.53220338983051, - "grad_norm": 2.3511264324188232, - "learning_rate": 5.131658291457286e-05, - "loss": 4.7796, - "step": 48958 - }, - { - "epoch": 25.532724902216426, - "grad_norm": 1.6256109476089478, - "learning_rate": 5.131557788944724e-05, - "loss": 5.5253, - "step": 48959 - }, - { - "epoch": 25.533246414602345, - "grad_norm": 1.6675409078598022, - "learning_rate": 5.131457286432161e-05, - "loss": 5.393, - "step": 48960 - }, - { - "epoch": 25.533767926988265, - "grad_norm": 1.6966451406478882, - "learning_rate": 5.131356783919599e-05, - "loss": 4.7059, - "step": 48961 - }, - { - "epoch": 25.534289439374184, - "grad_norm": 1.6025164127349854, - "learning_rate": 5.131256281407035e-05, - "loss": 4.961, - "step": 48962 - }, - { - "epoch": 25.534810951760104, - "grad_norm": 1.642952561378479, - "learning_rate": 5.131155778894473e-05, - "loss": 5.2535, - "step": 48963 - }, - { - "epoch": 25.535332464146023, - "grad_norm": 1.6073254346847534, - "learning_rate": 5.13105527638191e-05, - "loss": 4.9832, - "step": 48964 - }, - { - "epoch": 25.535853976531943, - "grad_norm": 1.545487403869629, - "learning_rate": 5.130954773869348e-05, - "loss": 5.4233, - "step": 48965 - }, - { - "epoch": 25.536375488917862, - "grad_norm": 1.62546968460083, - "learning_rate": 5.130854271356784e-05, - "loss": 5.2636, - "step": 48966 - }, - { - "epoch": 25.53689700130378, - "grad_norm": 1.645876169204712, - "learning_rate": 5.130753768844221e-05, - "loss": 4.544, - "step": 48967 - }, - { - "epoch": 25.5374185136897, - "grad_norm": 1.5837656259536743, - "learning_rate": 5.1306532663316585e-05, - "loss": 5.3983, - "step": 48968 - }, - { - "epoch": 25.53794002607562, - "grad_norm": 1.5915272235870361, - "learning_rate": 5.1305527638190956e-05, - "loss": 4.7639, - "step": 48969 - }, - { - "epoch": 25.53846153846154, - "grad_norm": 1.5823966264724731, - "learning_rate": 5.1304522613065334e-05, - "loss": 5.3679, - "step": 48970 - }, - { - "epoch": 25.538983050847456, - "grad_norm": 1.5275877714157104, - "learning_rate": 5.13035175879397e-05, - "loss": 5.5351, - "step": 48971 - }, - { - "epoch": 25.539504563233375, - "grad_norm": 1.6859833002090454, - "learning_rate": 5.1302512562814076e-05, - "loss": 5.3203, - "step": 48972 - }, - { - "epoch": 25.540026075619295, - "grad_norm": 1.5309557914733887, - "learning_rate": 5.130150753768844e-05, - "loss": 5.6007, - "step": 48973 - }, - { - "epoch": 25.540547588005214, - "grad_norm": 1.564135193824768, - "learning_rate": 5.130050251256282e-05, - "loss": 5.0097, - "step": 48974 - }, - { - "epoch": 25.541069100391134, - "grad_norm": 1.503042459487915, - "learning_rate": 5.129949748743719e-05, - "loss": 5.3741, - "step": 48975 - }, - { - "epoch": 25.541590612777053, - "grad_norm": 1.605412244796753, - "learning_rate": 5.129849246231157e-05, - "loss": 4.6807, - "step": 48976 - }, - { - "epoch": 25.542112125162973, - "grad_norm": 1.5730574131011963, - "learning_rate": 5.129748743718593e-05, - "loss": 4.9591, - "step": 48977 - }, - { - "epoch": 25.542633637548892, - "grad_norm": 1.6715433597564697, - "learning_rate": 5.129648241206031e-05, - "loss": 5.0996, - "step": 48978 - }, - { - "epoch": 25.54315514993481, - "grad_norm": 1.5096261501312256, - "learning_rate": 5.129547738693467e-05, - "loss": 4.9399, - "step": 48979 - }, - { - "epoch": 25.54367666232073, - "grad_norm": 1.5704644918441772, - "learning_rate": 5.1294472361809044e-05, - "loss": 5.0699, - "step": 48980 - }, - { - "epoch": 25.54419817470665, - "grad_norm": 1.5576554536819458, - "learning_rate": 5.129346733668342e-05, - "loss": 5.112, - "step": 48981 - }, - { - "epoch": 25.54471968709257, - "grad_norm": 1.5142595767974854, - "learning_rate": 5.1292462311557786e-05, - "loss": 5.5454, - "step": 48982 - }, - { - "epoch": 25.545241199478486, - "grad_norm": 1.6365838050842285, - "learning_rate": 5.1291457286432164e-05, - "loss": 4.8897, - "step": 48983 - }, - { - "epoch": 25.545762711864406, - "grad_norm": 1.6057697534561157, - "learning_rate": 5.1290452261306535e-05, - "loss": 5.2623, - "step": 48984 - }, - { - "epoch": 25.546284224250325, - "grad_norm": 1.733473300933838, - "learning_rate": 5.128944723618091e-05, - "loss": 5.2683, - "step": 48985 - }, - { - "epoch": 25.546805736636244, - "grad_norm": 1.6063380241394043, - "learning_rate": 5.128844221105528e-05, - "loss": 5.2876, - "step": 48986 - }, - { - "epoch": 25.547327249022164, - "grad_norm": 1.5926600694656372, - "learning_rate": 5.1287437185929655e-05, - "loss": 4.9494, - "step": 48987 - }, - { - "epoch": 25.547848761408083, - "grad_norm": 1.4451466798782349, - "learning_rate": 5.128643216080402e-05, - "loss": 5.3811, - "step": 48988 - }, - { - "epoch": 25.548370273794003, - "grad_norm": 1.4686737060546875, - "learning_rate": 5.12854271356784e-05, - "loss": 5.2443, - "step": 48989 - }, - { - "epoch": 25.548891786179922, - "grad_norm": 1.7466670274734497, - "learning_rate": 5.128442211055277e-05, - "loss": 4.7747, - "step": 48990 - }, - { - "epoch": 25.54941329856584, - "grad_norm": 1.5338099002838135, - "learning_rate": 5.1283417085427146e-05, - "loss": 5.4289, - "step": 48991 - }, - { - "epoch": 25.54993481095176, - "grad_norm": 1.6174393892288208, - "learning_rate": 5.128241206030151e-05, - "loss": 4.9862, - "step": 48992 - }, - { - "epoch": 25.55045632333768, - "grad_norm": 1.59501314163208, - "learning_rate": 5.1281407035175875e-05, - "loss": 5.3261, - "step": 48993 - }, - { - "epoch": 25.5509778357236, - "grad_norm": 1.598372220993042, - "learning_rate": 5.128040201005025e-05, - "loss": 5.0401, - "step": 48994 - }, - { - "epoch": 25.551499348109516, - "grad_norm": 1.6180822849273682, - "learning_rate": 5.1279396984924624e-05, - "loss": 5.3037, - "step": 48995 - }, - { - "epoch": 25.552020860495436, - "grad_norm": 1.5147316455841064, - "learning_rate": 5.1278391959799e-05, - "loss": 5.7119, - "step": 48996 - }, - { - "epoch": 25.552542372881355, - "grad_norm": 1.6093584299087524, - "learning_rate": 5.1277386934673366e-05, - "loss": 4.8476, - "step": 48997 - }, - { - "epoch": 25.553063885267274, - "grad_norm": 1.6142923831939697, - "learning_rate": 5.1276381909547744e-05, - "loss": 5.0304, - "step": 48998 - }, - { - "epoch": 25.553585397653194, - "grad_norm": 1.555130124092102, - "learning_rate": 5.127537688442211e-05, - "loss": 4.9458, - "step": 48999 - }, - { - "epoch": 25.554106910039113, - "grad_norm": 1.5466177463531494, - "learning_rate": 5.1274371859296486e-05, - "loss": 5.0993, - "step": 49000 - }, - { - "epoch": 25.554628422425033, - "grad_norm": 1.4153964519500732, - "learning_rate": 5.127336683417086e-05, - "loss": 5.7253, - "step": 49001 - }, - { - "epoch": 25.555149934810952, - "grad_norm": 1.541817307472229, - "learning_rate": 5.1272361809045235e-05, - "loss": 5.2991, - "step": 49002 - }, - { - "epoch": 25.555671447196872, - "grad_norm": 1.7319304943084717, - "learning_rate": 5.12713567839196e-05, - "loss": 4.9992, - "step": 49003 - }, - { - "epoch": 25.55619295958279, - "grad_norm": 1.7234410047531128, - "learning_rate": 5.127035175879398e-05, - "loss": 5.1853, - "step": 49004 - }, - { - "epoch": 25.55671447196871, - "grad_norm": 1.7207286357879639, - "learning_rate": 5.126934673366834e-05, - "loss": 5.0319, - "step": 49005 - }, - { - "epoch": 25.557235984354627, - "grad_norm": 1.4930751323699951, - "learning_rate": 5.126834170854271e-05, - "loss": 5.4419, - "step": 49006 - }, - { - "epoch": 25.557757496740546, - "grad_norm": 1.4706486463546753, - "learning_rate": 5.126733668341709e-05, - "loss": 5.2305, - "step": 49007 - }, - { - "epoch": 25.558279009126466, - "grad_norm": 1.5037871599197388, - "learning_rate": 5.1266331658291454e-05, - "loss": 5.3633, - "step": 49008 - }, - { - "epoch": 25.558800521512385, - "grad_norm": 1.6010459661483765, - "learning_rate": 5.126532663316583e-05, - "loss": 5.5258, - "step": 49009 - }, - { - "epoch": 25.559322033898304, - "grad_norm": 1.5585507154464722, - "learning_rate": 5.12643216080402e-05, - "loss": 4.9245, - "step": 49010 - }, - { - "epoch": 25.559843546284224, - "grad_norm": 1.5710432529449463, - "learning_rate": 5.126331658291458e-05, - "loss": 5.3101, - "step": 49011 - }, - { - "epoch": 25.560365058670143, - "grad_norm": 1.6948398351669312, - "learning_rate": 5.1262311557788945e-05, - "loss": 5.1777, - "step": 49012 - }, - { - "epoch": 25.560886571056063, - "grad_norm": 1.6445525884628296, - "learning_rate": 5.126130653266332e-05, - "loss": 5.2708, - "step": 49013 - }, - { - "epoch": 25.561408083441982, - "grad_norm": 1.7027512788772583, - "learning_rate": 5.126030150753769e-05, - "loss": 4.9081, - "step": 49014 - }, - { - "epoch": 25.561929595827902, - "grad_norm": 1.6997045278549194, - "learning_rate": 5.1259296482412065e-05, - "loss": 4.7336, - "step": 49015 - }, - { - "epoch": 25.56245110821382, - "grad_norm": 1.702027440071106, - "learning_rate": 5.1258291457286436e-05, - "loss": 5.2344, - "step": 49016 - }, - { - "epoch": 25.56297262059974, - "grad_norm": 1.5749287605285645, - "learning_rate": 5.1257286432160814e-05, - "loss": 5.3817, - "step": 49017 - }, - { - "epoch": 25.56349413298566, - "grad_norm": 1.6183366775512695, - "learning_rate": 5.125628140703518e-05, - "loss": 5.1276, - "step": 49018 - }, - { - "epoch": 25.564015645371576, - "grad_norm": 1.6305228471755981, - "learning_rate": 5.125527638190954e-05, - "loss": 5.3911, - "step": 49019 - }, - { - "epoch": 25.564537157757496, - "grad_norm": 1.6615885496139526, - "learning_rate": 5.125427135678392e-05, - "loss": 4.9578, - "step": 49020 - }, - { - "epoch": 25.565058670143415, - "grad_norm": 1.440714716911316, - "learning_rate": 5.125326633165829e-05, - "loss": 5.2327, - "step": 49021 - }, - { - "epoch": 25.565580182529335, - "grad_norm": 1.645450234413147, - "learning_rate": 5.125226130653267e-05, - "loss": 5.0039, - "step": 49022 - }, - { - "epoch": 25.566101694915254, - "grad_norm": 1.5840418338775635, - "learning_rate": 5.1251256281407033e-05, - "loss": 4.8198, - "step": 49023 - }, - { - "epoch": 25.566623207301173, - "grad_norm": 1.5111219882965088, - "learning_rate": 5.125025125628141e-05, - "loss": 5.809, - "step": 49024 - }, - { - "epoch": 25.567144719687093, - "grad_norm": 1.5315684080123901, - "learning_rate": 5.1249246231155775e-05, - "loss": 5.1731, - "step": 49025 - }, - { - "epoch": 25.567666232073012, - "grad_norm": 1.6320276260375977, - "learning_rate": 5.124824120603015e-05, - "loss": 5.4076, - "step": 49026 - }, - { - "epoch": 25.568187744458932, - "grad_norm": 1.5277915000915527, - "learning_rate": 5.1247236180904524e-05, - "loss": 5.2828, - "step": 49027 - }, - { - "epoch": 25.56870925684485, - "grad_norm": 1.5580967664718628, - "learning_rate": 5.12462311557789e-05, - "loss": 5.1776, - "step": 49028 - }, - { - "epoch": 25.56923076923077, - "grad_norm": 1.580306053161621, - "learning_rate": 5.1245226130653266e-05, - "loss": 5.1172, - "step": 49029 - }, - { - "epoch": 25.569752281616687, - "grad_norm": 1.7054685354232788, - "learning_rate": 5.1244221105527644e-05, - "loss": 4.6018, - "step": 49030 - }, - { - "epoch": 25.570273794002606, - "grad_norm": 1.6213661432266235, - "learning_rate": 5.1243216080402015e-05, - "loss": 5.0899, - "step": 49031 - }, - { - "epoch": 25.570795306388526, - "grad_norm": 1.5690258741378784, - "learning_rate": 5.124221105527638e-05, - "loss": 4.5694, - "step": 49032 - }, - { - "epoch": 25.571316818774445, - "grad_norm": 1.5594779253005981, - "learning_rate": 5.124120603015076e-05, - "loss": 5.0868, - "step": 49033 - }, - { - "epoch": 25.571838331160365, - "grad_norm": 1.7052491903305054, - "learning_rate": 5.124020100502512e-05, - "loss": 4.7867, - "step": 49034 - }, - { - "epoch": 25.572359843546284, - "grad_norm": 1.5800230503082275, - "learning_rate": 5.12391959798995e-05, - "loss": 5.4971, - "step": 49035 - }, - { - "epoch": 25.572881355932203, - "grad_norm": 1.5323662757873535, - "learning_rate": 5.123819095477387e-05, - "loss": 4.637, - "step": 49036 - }, - { - "epoch": 25.573402868318123, - "grad_norm": 1.5504729747772217, - "learning_rate": 5.123718592964825e-05, - "loss": 5.3233, - "step": 49037 - }, - { - "epoch": 25.573924380704042, - "grad_norm": 1.491260051727295, - "learning_rate": 5.123618090452261e-05, - "loss": 5.0176, - "step": 49038 - }, - { - "epoch": 25.574445893089962, - "grad_norm": 1.3582632541656494, - "learning_rate": 5.123517587939699e-05, - "loss": 4.8532, - "step": 49039 - }, - { - "epoch": 25.57496740547588, - "grad_norm": 1.5719590187072754, - "learning_rate": 5.1234170854271355e-05, - "loss": 4.8876, - "step": 49040 - }, - { - "epoch": 25.5754889178618, - "grad_norm": 1.6891262531280518, - "learning_rate": 5.123316582914573e-05, - "loss": 5.2117, - "step": 49041 - }, - { - "epoch": 25.576010430247717, - "grad_norm": 1.5597712993621826, - "learning_rate": 5.1232160804020104e-05, - "loss": 5.3418, - "step": 49042 - }, - { - "epoch": 25.576531942633636, - "grad_norm": 1.5350569486618042, - "learning_rate": 5.123115577889448e-05, - "loss": 5.3075, - "step": 49043 - }, - { - "epoch": 25.577053455019556, - "grad_norm": 1.5435817241668701, - "learning_rate": 5.1230150753768846e-05, - "loss": 5.0219, - "step": 49044 - }, - { - "epoch": 25.577574967405475, - "grad_norm": 1.5865895748138428, - "learning_rate": 5.122914572864321e-05, - "loss": 4.7047, - "step": 49045 - }, - { - "epoch": 25.578096479791395, - "grad_norm": 1.695312261581421, - "learning_rate": 5.122814070351759e-05, - "loss": 4.928, - "step": 49046 - }, - { - "epoch": 25.578617992177314, - "grad_norm": 1.5961538553237915, - "learning_rate": 5.122713567839196e-05, - "loss": 5.2791, - "step": 49047 - }, - { - "epoch": 25.579139504563233, - "grad_norm": 1.632523536682129, - "learning_rate": 5.122613065326634e-05, - "loss": 5.7174, - "step": 49048 - }, - { - "epoch": 25.579661016949153, - "grad_norm": 1.7493418455123901, - "learning_rate": 5.12251256281407e-05, - "loss": 4.821, - "step": 49049 - }, - { - "epoch": 25.580182529335072, - "grad_norm": 1.600051999092102, - "learning_rate": 5.122412060301508e-05, - "loss": 5.0546, - "step": 49050 - }, - { - "epoch": 25.580704041720992, - "grad_norm": 1.5523103475570679, - "learning_rate": 5.122311557788945e-05, - "loss": 5.2628, - "step": 49051 - }, - { - "epoch": 25.58122555410691, - "grad_norm": 1.5774461030960083, - "learning_rate": 5.122211055276383e-05, - "loss": 5.5472, - "step": 49052 - }, - { - "epoch": 25.58174706649283, - "grad_norm": 1.658156156539917, - "learning_rate": 5.122110552763819e-05, - "loss": 4.5366, - "step": 49053 - }, - { - "epoch": 25.582268578878747, - "grad_norm": 1.5598790645599365, - "learning_rate": 5.122010050251257e-05, - "loss": 5.0243, - "step": 49054 - }, - { - "epoch": 25.582790091264666, - "grad_norm": 1.5811610221862793, - "learning_rate": 5.1219095477386934e-05, - "loss": 5.2329, - "step": 49055 - }, - { - "epoch": 25.583311603650586, - "grad_norm": 1.669939398765564, - "learning_rate": 5.121809045226131e-05, - "loss": 5.4969, - "step": 49056 - }, - { - "epoch": 25.583833116036505, - "grad_norm": 1.7163331508636475, - "learning_rate": 5.121708542713568e-05, - "loss": 5.3208, - "step": 49057 - }, - { - "epoch": 25.584354628422425, - "grad_norm": 1.539265513420105, - "learning_rate": 5.121608040201006e-05, - "loss": 4.7531, - "step": 49058 - }, - { - "epoch": 25.584876140808344, - "grad_norm": 1.5464622974395752, - "learning_rate": 5.1215075376884425e-05, - "loss": 5.112, - "step": 49059 - }, - { - "epoch": 25.585397653194264, - "grad_norm": 1.5035585165023804, - "learning_rate": 5.121407035175879e-05, - "loss": 5.2482, - "step": 49060 - }, - { - "epoch": 25.585919165580183, - "grad_norm": 1.3709064722061157, - "learning_rate": 5.121306532663317e-05, - "loss": 4.9723, - "step": 49061 - }, - { - "epoch": 25.586440677966102, - "grad_norm": 1.5009701251983643, - "learning_rate": 5.121206030150754e-05, - "loss": 5.7504, - "step": 49062 - }, - { - "epoch": 25.586962190352022, - "grad_norm": 1.676430344581604, - "learning_rate": 5.1211055276381916e-05, - "loss": 5.1274, - "step": 49063 - }, - { - "epoch": 25.58748370273794, - "grad_norm": 1.5884846448898315, - "learning_rate": 5.121005025125628e-05, - "loss": 5.1782, - "step": 49064 - }, - { - "epoch": 25.58800521512386, - "grad_norm": 1.6321966648101807, - "learning_rate": 5.120904522613066e-05, - "loss": 5.4647, - "step": 49065 - }, - { - "epoch": 25.588526727509777, - "grad_norm": 1.6178611516952515, - "learning_rate": 5.120804020100502e-05, - "loss": 5.1295, - "step": 49066 - }, - { - "epoch": 25.589048239895696, - "grad_norm": 1.561342716217041, - "learning_rate": 5.12070351758794e-05, - "loss": 5.2864, - "step": 49067 - }, - { - "epoch": 25.589569752281616, - "grad_norm": 1.5370728969573975, - "learning_rate": 5.120603015075377e-05, - "loss": 5.5172, - "step": 49068 - }, - { - "epoch": 25.590091264667535, - "grad_norm": 1.52692711353302, - "learning_rate": 5.120502512562815e-05, - "loss": 5.5754, - "step": 49069 - }, - { - "epoch": 25.590612777053455, - "grad_norm": 1.580615520477295, - "learning_rate": 5.1204020100502513e-05, - "loss": 4.9255, - "step": 49070 - }, - { - "epoch": 25.591134289439374, - "grad_norm": 1.5649259090423584, - "learning_rate": 5.120301507537689e-05, - "loss": 5.3427, - "step": 49071 - }, - { - "epoch": 25.591655801825294, - "grad_norm": 1.5840065479278564, - "learning_rate": 5.1202010050251256e-05, - "loss": 5.4155, - "step": 49072 - }, - { - "epoch": 25.592177314211213, - "grad_norm": 1.5581624507904053, - "learning_rate": 5.120100502512563e-05, - "loss": 5.1219, - "step": 49073 - }, - { - "epoch": 25.592698826597132, - "grad_norm": 1.5416855812072754, - "learning_rate": 5.1200000000000004e-05, - "loss": 5.3425, - "step": 49074 - }, - { - "epoch": 25.593220338983052, - "grad_norm": 1.780268669128418, - "learning_rate": 5.119899497487437e-05, - "loss": 4.9414, - "step": 49075 - }, - { - "epoch": 25.59374185136897, - "grad_norm": 1.577286958694458, - "learning_rate": 5.1197989949748747e-05, - "loss": 5.0229, - "step": 49076 - }, - { - "epoch": 25.59426336375489, - "grad_norm": 1.6103581190109253, - "learning_rate": 5.119698492462312e-05, - "loss": 5.0205, - "step": 49077 - }, - { - "epoch": 25.594784876140807, - "grad_norm": 1.492685079574585, - "learning_rate": 5.1195979899497495e-05, - "loss": 4.9954, - "step": 49078 - }, - { - "epoch": 25.595306388526726, - "grad_norm": 1.5804518461227417, - "learning_rate": 5.119497487437186e-05, - "loss": 5.5262, - "step": 49079 - }, - { - "epoch": 25.595827900912646, - "grad_norm": 1.6918801069259644, - "learning_rate": 5.119396984924624e-05, - "loss": 4.8448, - "step": 49080 - }, - { - "epoch": 25.596349413298565, - "grad_norm": 1.5641834735870361, - "learning_rate": 5.11929648241206e-05, - "loss": 5.2087, - "step": 49081 - }, - { - "epoch": 25.596870925684485, - "grad_norm": 1.488663911819458, - "learning_rate": 5.119195979899498e-05, - "loss": 5.3457, - "step": 49082 - }, - { - "epoch": 25.597392438070404, - "grad_norm": 1.5597506761550903, - "learning_rate": 5.119095477386935e-05, - "loss": 5.1572, - "step": 49083 - }, - { - "epoch": 25.597913950456324, - "grad_norm": 1.6166141033172607, - "learning_rate": 5.118994974874373e-05, - "loss": 5.11, - "step": 49084 - }, - { - "epoch": 25.598435462842243, - "grad_norm": 1.5621421337127686, - "learning_rate": 5.118894472361809e-05, - "loss": 5.6174, - "step": 49085 - }, - { - "epoch": 25.598956975228162, - "grad_norm": 1.5743974447250366, - "learning_rate": 5.118793969849246e-05, - "loss": 5.1747, - "step": 49086 - }, - { - "epoch": 25.599478487614082, - "grad_norm": 1.5802388191223145, - "learning_rate": 5.1186934673366835e-05, - "loss": 5.5929, - "step": 49087 - }, - { - "epoch": 25.6, - "grad_norm": 1.668918251991272, - "learning_rate": 5.1185929648241206e-05, - "loss": 5.3279, - "step": 49088 - }, - { - "epoch": 25.60052151238592, - "grad_norm": 1.5416659116744995, - "learning_rate": 5.1184924623115584e-05, - "loss": 4.5311, - "step": 49089 - }, - { - "epoch": 25.601043024771837, - "grad_norm": 1.5511246919631958, - "learning_rate": 5.118391959798995e-05, - "loss": 5.4148, - "step": 49090 - }, - { - "epoch": 25.601564537157756, - "grad_norm": 1.540688157081604, - "learning_rate": 5.1182914572864326e-05, - "loss": 5.1984, - "step": 49091 - }, - { - "epoch": 25.602086049543676, - "grad_norm": 1.6104117631912231, - "learning_rate": 5.118190954773869e-05, - "loss": 5.1352, - "step": 49092 - }, - { - "epoch": 25.602607561929595, - "grad_norm": 1.432166337966919, - "learning_rate": 5.118090452261307e-05, - "loss": 5.6438, - "step": 49093 - }, - { - "epoch": 25.603129074315515, - "grad_norm": 1.6237026453018188, - "learning_rate": 5.117989949748744e-05, - "loss": 4.7466, - "step": 49094 - }, - { - "epoch": 25.603650586701434, - "grad_norm": 1.6028341054916382, - "learning_rate": 5.117889447236182e-05, - "loss": 5.0335, - "step": 49095 - }, - { - "epoch": 25.604172099087354, - "grad_norm": 1.744069218635559, - "learning_rate": 5.117788944723618e-05, - "loss": 5.1671, - "step": 49096 - }, - { - "epoch": 25.604693611473273, - "grad_norm": 1.6247950792312622, - "learning_rate": 5.117688442211056e-05, - "loss": 4.9478, - "step": 49097 - }, - { - "epoch": 25.605215123859193, - "grad_norm": 1.5563956499099731, - "learning_rate": 5.117587939698493e-05, - "loss": 5.121, - "step": 49098 - }, - { - "epoch": 25.605736636245112, - "grad_norm": 1.4922857284545898, - "learning_rate": 5.1174874371859294e-05, - "loss": 5.1336, - "step": 49099 - }, - { - "epoch": 25.60625814863103, - "grad_norm": 1.568471908569336, - "learning_rate": 5.117386934673367e-05, - "loss": 5.4451, - "step": 49100 - }, - { - "epoch": 25.60677966101695, - "grad_norm": 1.521238923072815, - "learning_rate": 5.1172864321608036e-05, - "loss": 5.2977, - "step": 49101 - }, - { - "epoch": 25.607301173402867, - "grad_norm": 1.558831810951233, - "learning_rate": 5.1171859296482414e-05, - "loss": 5.2558, - "step": 49102 - }, - { - "epoch": 25.607822685788786, - "grad_norm": 1.6216282844543457, - "learning_rate": 5.1170854271356785e-05, - "loss": 5.3554, - "step": 49103 - }, - { - "epoch": 25.608344198174706, - "grad_norm": 1.4886577129364014, - "learning_rate": 5.116984924623116e-05, - "loss": 5.3471, - "step": 49104 - }, - { - "epoch": 25.608865710560625, - "grad_norm": 1.645517110824585, - "learning_rate": 5.116884422110553e-05, - "loss": 5.3122, - "step": 49105 - }, - { - "epoch": 25.609387222946545, - "grad_norm": 1.727455735206604, - "learning_rate": 5.1167839195979905e-05, - "loss": 5.1367, - "step": 49106 - }, - { - "epoch": 25.609908735332464, - "grad_norm": 1.5247913599014282, - "learning_rate": 5.116683417085427e-05, - "loss": 5.4192, - "step": 49107 - }, - { - "epoch": 25.610430247718384, - "grad_norm": 1.5219420194625854, - "learning_rate": 5.116582914572865e-05, - "loss": 5.2749, - "step": 49108 - }, - { - "epoch": 25.610951760104303, - "grad_norm": 1.5105136632919312, - "learning_rate": 5.116482412060302e-05, - "loss": 5.6621, - "step": 49109 - }, - { - "epoch": 25.611473272490223, - "grad_norm": 1.5334784984588623, - "learning_rate": 5.1163819095477396e-05, - "loss": 5.0583, - "step": 49110 - }, - { - "epoch": 25.611994784876142, - "grad_norm": 1.5742101669311523, - "learning_rate": 5.116281407035176e-05, - "loss": 5.1638, - "step": 49111 - }, - { - "epoch": 25.61251629726206, - "grad_norm": 1.5286507606506348, - "learning_rate": 5.1161809045226125e-05, - "loss": 5.1399, - "step": 49112 - }, - { - "epoch": 25.613037809647977, - "grad_norm": 1.89841628074646, - "learning_rate": 5.11608040201005e-05, - "loss": 4.994, - "step": 49113 - }, - { - "epoch": 25.613559322033897, - "grad_norm": 1.4501591920852661, - "learning_rate": 5.1159798994974874e-05, - "loss": 5.6411, - "step": 49114 - }, - { - "epoch": 25.614080834419816, - "grad_norm": 1.6618890762329102, - "learning_rate": 5.115879396984925e-05, - "loss": 5.0869, - "step": 49115 - }, - { - "epoch": 25.614602346805736, - "grad_norm": 1.5400587320327759, - "learning_rate": 5.1157788944723616e-05, - "loss": 5.2713, - "step": 49116 - }, - { - "epoch": 25.615123859191655, - "grad_norm": 1.5222609043121338, - "learning_rate": 5.1156783919597994e-05, - "loss": 5.3277, - "step": 49117 - }, - { - "epoch": 25.615645371577575, - "grad_norm": 1.5732358694076538, - "learning_rate": 5.1155778894472365e-05, - "loss": 5.3679, - "step": 49118 - }, - { - "epoch": 25.616166883963494, - "grad_norm": 1.581425428390503, - "learning_rate": 5.115477386934674e-05, - "loss": 5.1489, - "step": 49119 - }, - { - "epoch": 25.616688396349414, - "grad_norm": 1.5363318920135498, - "learning_rate": 5.115376884422111e-05, - "loss": 5.1545, - "step": 49120 - }, - { - "epoch": 25.617209908735333, - "grad_norm": 1.531980037689209, - "learning_rate": 5.1152763819095485e-05, - "loss": 4.8717, - "step": 49121 - }, - { - "epoch": 25.617731421121253, - "grad_norm": 1.6081993579864502, - "learning_rate": 5.115175879396985e-05, - "loss": 5.2685, - "step": 49122 - }, - { - "epoch": 25.618252933507172, - "grad_norm": 1.6733735799789429, - "learning_rate": 5.115075376884423e-05, - "loss": 4.4109, - "step": 49123 - }, - { - "epoch": 25.61877444589309, - "grad_norm": 1.524556279182434, - "learning_rate": 5.11497487437186e-05, - "loss": 5.662, - "step": 49124 - }, - { - "epoch": 25.619295958279007, - "grad_norm": 1.5400904417037964, - "learning_rate": 5.114874371859296e-05, - "loss": 4.976, - "step": 49125 - }, - { - "epoch": 25.619817470664927, - "grad_norm": 1.5070056915283203, - "learning_rate": 5.114773869346734e-05, - "loss": 5.5631, - "step": 49126 - }, - { - "epoch": 25.620338983050846, - "grad_norm": 1.5775527954101562, - "learning_rate": 5.1146733668341704e-05, - "loss": 4.8431, - "step": 49127 - }, - { - "epoch": 25.620860495436766, - "grad_norm": 1.6206878423690796, - "learning_rate": 5.114572864321608e-05, - "loss": 4.9052, - "step": 49128 - }, - { - "epoch": 25.621382007822685, - "grad_norm": 1.5241345167160034, - "learning_rate": 5.114472361809045e-05, - "loss": 5.4531, - "step": 49129 - }, - { - "epoch": 25.621903520208605, - "grad_norm": 1.5432389974594116, - "learning_rate": 5.114371859296483e-05, - "loss": 4.9168, - "step": 49130 - }, - { - "epoch": 25.622425032594524, - "grad_norm": 1.493800401687622, - "learning_rate": 5.1142713567839195e-05, - "loss": 5.4643, - "step": 49131 - }, - { - "epoch": 25.622946544980444, - "grad_norm": 1.5379667282104492, - "learning_rate": 5.114170854271357e-05, - "loss": 5.679, - "step": 49132 - }, - { - "epoch": 25.623468057366363, - "grad_norm": 1.4993019104003906, - "learning_rate": 5.114070351758794e-05, - "loss": 5.4243, - "step": 49133 - }, - { - "epoch": 25.623989569752283, - "grad_norm": 1.5834985971450806, - "learning_rate": 5.1139698492462315e-05, - "loss": 4.7397, - "step": 49134 - }, - { - "epoch": 25.624511082138202, - "grad_norm": 1.5567588806152344, - "learning_rate": 5.1138693467336686e-05, - "loss": 5.179, - "step": 49135 - }, - { - "epoch": 25.62503259452412, - "grad_norm": 1.523097276687622, - "learning_rate": 5.1137688442211064e-05, - "loss": 4.7582, - "step": 49136 - }, - { - "epoch": 25.625554106910037, - "grad_norm": 1.6018332242965698, - "learning_rate": 5.113668341708543e-05, - "loss": 5.4773, - "step": 49137 - }, - { - "epoch": 25.626075619295957, - "grad_norm": 1.4554353952407837, - "learning_rate": 5.11356783919598e-05, - "loss": 5.6108, - "step": 49138 - }, - { - "epoch": 25.626597131681876, - "grad_norm": 1.4911459684371948, - "learning_rate": 5.113467336683418e-05, - "loss": 5.3634, - "step": 49139 - }, - { - "epoch": 25.627118644067796, - "grad_norm": 1.5685687065124512, - "learning_rate": 5.113366834170854e-05, - "loss": 5.3543, - "step": 49140 - }, - { - "epoch": 25.627640156453715, - "grad_norm": 1.5878912210464478, - "learning_rate": 5.113266331658292e-05, - "loss": 5.427, - "step": 49141 - }, - { - "epoch": 25.628161668839635, - "grad_norm": 1.536974310874939, - "learning_rate": 5.1131658291457283e-05, - "loss": 5.0973, - "step": 49142 - }, - { - "epoch": 25.628683181225554, - "grad_norm": 1.5840245485305786, - "learning_rate": 5.113065326633166e-05, - "loss": 5.2464, - "step": 49143 - }, - { - "epoch": 25.629204693611474, - "grad_norm": 1.6162400245666504, - "learning_rate": 5.112964824120603e-05, - "loss": 5.2495, - "step": 49144 - }, - { - "epoch": 25.629726205997393, - "grad_norm": 1.5294398069381714, - "learning_rate": 5.112864321608041e-05, - "loss": 5.3281, - "step": 49145 - }, - { - "epoch": 25.630247718383313, - "grad_norm": 1.6995820999145508, - "learning_rate": 5.1127638190954774e-05, - "loss": 5.0571, - "step": 49146 - }, - { - "epoch": 25.630769230769232, - "grad_norm": 1.5836718082427979, - "learning_rate": 5.112663316582915e-05, - "loss": 5.3949, - "step": 49147 - }, - { - "epoch": 25.63129074315515, - "grad_norm": 1.5894503593444824, - "learning_rate": 5.1125628140703516e-05, - "loss": 5.0718, - "step": 49148 - }, - { - "epoch": 25.631812255541067, - "grad_norm": 1.5298786163330078, - "learning_rate": 5.1124623115577894e-05, - "loss": 5.1053, - "step": 49149 - }, - { - "epoch": 25.632333767926987, - "grad_norm": 1.5406190156936646, - "learning_rate": 5.1123618090452265e-05, - "loss": 5.3833, - "step": 49150 - }, - { - "epoch": 25.632855280312906, - "grad_norm": 1.5842901468276978, - "learning_rate": 5.112261306532664e-05, - "loss": 5.3909, - "step": 49151 - }, - { - "epoch": 25.633376792698826, - "grad_norm": 1.5715540647506714, - "learning_rate": 5.112160804020101e-05, - "loss": 5.0765, - "step": 49152 - }, - { - "epoch": 25.633898305084745, - "grad_norm": 1.6718087196350098, - "learning_rate": 5.112060301507537e-05, - "loss": 5.5125, - "step": 49153 - }, - { - "epoch": 25.634419817470665, - "grad_norm": 1.6007423400878906, - "learning_rate": 5.111959798994975e-05, - "loss": 5.24, - "step": 49154 - }, - { - "epoch": 25.634941329856584, - "grad_norm": 1.5500690937042236, - "learning_rate": 5.111859296482412e-05, - "loss": 5.3468, - "step": 49155 - }, - { - "epoch": 25.635462842242504, - "grad_norm": 1.5863889455795288, - "learning_rate": 5.11175879396985e-05, - "loss": 5.0517, - "step": 49156 - }, - { - "epoch": 25.635984354628423, - "grad_norm": 1.5894200801849365, - "learning_rate": 5.111658291457286e-05, - "loss": 5.7581, - "step": 49157 - }, - { - "epoch": 25.636505867014343, - "grad_norm": 1.7194513082504272, - "learning_rate": 5.111557788944724e-05, - "loss": 4.7074, - "step": 49158 - }, - { - "epoch": 25.637027379400262, - "grad_norm": 1.5938149690628052, - "learning_rate": 5.1114572864321605e-05, - "loss": 5.3489, - "step": 49159 - }, - { - "epoch": 25.63754889178618, - "grad_norm": 1.579986333847046, - "learning_rate": 5.111356783919598e-05, - "loss": 4.9928, - "step": 49160 - }, - { - "epoch": 25.638070404172097, - "grad_norm": 1.594711422920227, - "learning_rate": 5.1112562814070354e-05, - "loss": 5.1172, - "step": 49161 - }, - { - "epoch": 25.638591916558017, - "grad_norm": 1.5814934968948364, - "learning_rate": 5.111155778894473e-05, - "loss": 5.2272, - "step": 49162 - }, - { - "epoch": 25.639113428943936, - "grad_norm": 1.6510212421417236, - "learning_rate": 5.1110552763819096e-05, - "loss": 5.0588, - "step": 49163 - }, - { - "epoch": 25.639634941329856, - "grad_norm": 1.510820984840393, - "learning_rate": 5.1109547738693474e-05, - "loss": 4.7249, - "step": 49164 - }, - { - "epoch": 25.640156453715775, - "grad_norm": 1.5110193490982056, - "learning_rate": 5.1108542713567845e-05, - "loss": 4.4376, - "step": 49165 - }, - { - "epoch": 25.640677966101695, - "grad_norm": 1.6780357360839844, - "learning_rate": 5.110753768844221e-05, - "loss": 4.9861, - "step": 49166 - }, - { - "epoch": 25.641199478487614, - "grad_norm": 1.5355644226074219, - "learning_rate": 5.110653266331659e-05, - "loss": 5.076, - "step": 49167 - }, - { - "epoch": 25.641720990873534, - "grad_norm": 1.5135924816131592, - "learning_rate": 5.110552763819095e-05, - "loss": 5.2787, - "step": 49168 - }, - { - "epoch": 25.642242503259453, - "grad_norm": 1.5608631372451782, - "learning_rate": 5.110452261306533e-05, - "loss": 5.1104, - "step": 49169 - }, - { - "epoch": 25.642764015645373, - "grad_norm": 1.4812742471694946, - "learning_rate": 5.11035175879397e-05, - "loss": 5.6825, - "step": 49170 - }, - { - "epoch": 25.643285528031292, - "grad_norm": 1.6270685195922852, - "learning_rate": 5.110251256281408e-05, - "loss": 4.8272, - "step": 49171 - }, - { - "epoch": 25.64380704041721, - "grad_norm": 1.569156289100647, - "learning_rate": 5.110150753768844e-05, - "loss": 5.1153, - "step": 49172 - }, - { - "epoch": 25.644328552803128, - "grad_norm": 1.4872324466705322, - "learning_rate": 5.110050251256282e-05, - "loss": 5.4806, - "step": 49173 - }, - { - "epoch": 25.644850065189047, - "grad_norm": 1.523077130317688, - "learning_rate": 5.1099497487437184e-05, - "loss": 5.1265, - "step": 49174 - }, - { - "epoch": 25.645371577574966, - "grad_norm": 1.5932987928390503, - "learning_rate": 5.109849246231156e-05, - "loss": 4.9285, - "step": 49175 - }, - { - "epoch": 25.645893089960886, - "grad_norm": 1.534177303314209, - "learning_rate": 5.109748743718593e-05, - "loss": 5.0235, - "step": 49176 - }, - { - "epoch": 25.646414602346805, - "grad_norm": 1.5331041812896729, - "learning_rate": 5.109648241206031e-05, - "loss": 5.4574, - "step": 49177 - }, - { - "epoch": 25.646936114732725, - "grad_norm": 1.614147663116455, - "learning_rate": 5.1095477386934675e-05, - "loss": 5.0047, - "step": 49178 - }, - { - "epoch": 25.647457627118644, - "grad_norm": 1.5699708461761475, - "learning_rate": 5.109447236180904e-05, - "loss": 5.3624, - "step": 49179 - }, - { - "epoch": 25.647979139504564, - "grad_norm": 1.574995994567871, - "learning_rate": 5.109346733668342e-05, - "loss": 5.2963, - "step": 49180 - }, - { - "epoch": 25.648500651890483, - "grad_norm": 1.6028552055358887, - "learning_rate": 5.109246231155779e-05, - "loss": 5.0963, - "step": 49181 - }, - { - "epoch": 25.649022164276403, - "grad_norm": 1.5327364206314087, - "learning_rate": 5.1091457286432166e-05, - "loss": 5.4057, - "step": 49182 - }, - { - "epoch": 25.649543676662322, - "grad_norm": 1.5549194812774658, - "learning_rate": 5.109045226130653e-05, - "loss": 5.1443, - "step": 49183 - }, - { - "epoch": 25.65006518904824, - "grad_norm": 1.5881567001342773, - "learning_rate": 5.108944723618091e-05, - "loss": 4.735, - "step": 49184 - }, - { - "epoch": 25.650586701434158, - "grad_norm": 1.5192891359329224, - "learning_rate": 5.108844221105528e-05, - "loss": 5.1489, - "step": 49185 - }, - { - "epoch": 25.651108213820077, - "grad_norm": 1.6969565153121948, - "learning_rate": 5.108743718592966e-05, - "loss": 5.359, - "step": 49186 - }, - { - "epoch": 25.651629726205996, - "grad_norm": 1.5812264680862427, - "learning_rate": 5.108643216080402e-05, - "loss": 5.2244, - "step": 49187 - }, - { - "epoch": 25.652151238591916, - "grad_norm": 1.5947718620300293, - "learning_rate": 5.10854271356784e-05, - "loss": 4.9289, - "step": 49188 - }, - { - "epoch": 25.652672750977835, - "grad_norm": 1.5899145603179932, - "learning_rate": 5.1084422110552763e-05, - "loss": 5.3391, - "step": 49189 - }, - { - "epoch": 25.653194263363755, - "grad_norm": 1.5251857042312622, - "learning_rate": 5.108341708542714e-05, - "loss": 5.6224, - "step": 49190 - }, - { - "epoch": 25.653715775749674, - "grad_norm": 1.5458552837371826, - "learning_rate": 5.108241206030151e-05, - "loss": 5.5962, - "step": 49191 - }, - { - "epoch": 25.654237288135594, - "grad_norm": 1.5834414958953857, - "learning_rate": 5.108140703517588e-05, - "loss": 5.2672, - "step": 49192 - }, - { - "epoch": 25.654758800521513, - "grad_norm": 1.6412686109542847, - "learning_rate": 5.1080402010050254e-05, - "loss": 5.5993, - "step": 49193 - }, - { - "epoch": 25.655280312907433, - "grad_norm": 1.5350762605667114, - "learning_rate": 5.107939698492462e-05, - "loss": 5.3073, - "step": 49194 - }, - { - "epoch": 25.655801825293352, - "grad_norm": 1.5967094898223877, - "learning_rate": 5.1078391959798997e-05, - "loss": 5.0799, - "step": 49195 - }, - { - "epoch": 25.656323337679268, - "grad_norm": 1.48600435256958, - "learning_rate": 5.107738693467337e-05, - "loss": 5.3525, - "step": 49196 - }, - { - "epoch": 25.656844850065188, - "grad_norm": 1.5038576126098633, - "learning_rate": 5.1076381909547745e-05, - "loss": 4.816, - "step": 49197 - }, - { - "epoch": 25.657366362451107, - "grad_norm": 1.6242690086364746, - "learning_rate": 5.107537688442211e-05, - "loss": 5.2313, - "step": 49198 - }, - { - "epoch": 25.657887874837026, - "grad_norm": 1.5811368227005005, - "learning_rate": 5.107437185929649e-05, - "loss": 5.3565, - "step": 49199 - }, - { - "epoch": 25.658409387222946, - "grad_norm": 1.5848184823989868, - "learning_rate": 5.107336683417085e-05, - "loss": 5.3558, - "step": 49200 - }, - { - "epoch": 25.658930899608865, - "grad_norm": 1.653534173965454, - "learning_rate": 5.107236180904523e-05, - "loss": 5.3303, - "step": 49201 - }, - { - "epoch": 25.659452411994785, - "grad_norm": 1.584797978401184, - "learning_rate": 5.10713567839196e-05, - "loss": 5.2902, - "step": 49202 - }, - { - "epoch": 25.659973924380704, - "grad_norm": 1.5770349502563477, - "learning_rate": 5.107035175879398e-05, - "loss": 5.2156, - "step": 49203 - }, - { - "epoch": 25.660495436766624, - "grad_norm": 1.6067333221435547, - "learning_rate": 5.106934673366834e-05, - "loss": 4.8372, - "step": 49204 - }, - { - "epoch": 25.661016949152543, - "grad_norm": 1.5497162342071533, - "learning_rate": 5.1068341708542714e-05, - "loss": 5.4877, - "step": 49205 - }, - { - "epoch": 25.661538461538463, - "grad_norm": 1.6151719093322754, - "learning_rate": 5.106733668341709e-05, - "loss": 5.2687, - "step": 49206 - }, - { - "epoch": 25.662059973924382, - "grad_norm": 1.49131178855896, - "learning_rate": 5.1066331658291456e-05, - "loss": 5.1805, - "step": 49207 - }, - { - "epoch": 25.6625814863103, - "grad_norm": 1.5304248332977295, - "learning_rate": 5.1065326633165834e-05, - "loss": 5.3188, - "step": 49208 - }, - { - "epoch": 25.663102998696218, - "grad_norm": 1.476938009262085, - "learning_rate": 5.10643216080402e-05, - "loss": 5.4825, - "step": 49209 - }, - { - "epoch": 25.663624511082137, - "grad_norm": 1.5041245222091675, - "learning_rate": 5.1063316582914576e-05, - "loss": 5.2277, - "step": 49210 - }, - { - "epoch": 25.664146023468057, - "grad_norm": 1.539077877998352, - "learning_rate": 5.106231155778895e-05, - "loss": 5.6029, - "step": 49211 - }, - { - "epoch": 25.664667535853976, - "grad_norm": 1.5351275205612183, - "learning_rate": 5.1061306532663325e-05, - "loss": 5.2239, - "step": 49212 - }, - { - "epoch": 25.665189048239895, - "grad_norm": 1.7225120067596436, - "learning_rate": 5.106030150753769e-05, - "loss": 4.6062, - "step": 49213 - }, - { - "epoch": 25.665710560625815, - "grad_norm": 1.564483880996704, - "learning_rate": 5.105929648241207e-05, - "loss": 4.9629, - "step": 49214 - }, - { - "epoch": 25.666232073011734, - "grad_norm": 1.5136295557022095, - "learning_rate": 5.105829145728643e-05, - "loss": 5.5324, - "step": 49215 - }, - { - "epoch": 25.666753585397654, - "grad_norm": 1.5076289176940918, - "learning_rate": 5.105728643216081e-05, - "loss": 5.2321, - "step": 49216 - }, - { - "epoch": 25.667275097783573, - "grad_norm": 1.5511009693145752, - "learning_rate": 5.105628140703518e-05, - "loss": 5.0414, - "step": 49217 - }, - { - "epoch": 25.667796610169493, - "grad_norm": 1.5190536975860596, - "learning_rate": 5.1055276381909544e-05, - "loss": 5.32, - "step": 49218 - }, - { - "epoch": 25.668318122555412, - "grad_norm": 1.8434364795684814, - "learning_rate": 5.105427135678392e-05, - "loss": 5.1263, - "step": 49219 - }, - { - "epoch": 25.668839634941328, - "grad_norm": 1.657619833946228, - "learning_rate": 5.1053266331658286e-05, - "loss": 5.1775, - "step": 49220 - }, - { - "epoch": 25.669361147327248, - "grad_norm": 1.6549525260925293, - "learning_rate": 5.1052261306532664e-05, - "loss": 5.4529, - "step": 49221 - }, - { - "epoch": 25.669882659713167, - "grad_norm": 1.5634465217590332, - "learning_rate": 5.1051256281407035e-05, - "loss": 5.4136, - "step": 49222 - }, - { - "epoch": 25.670404172099087, - "grad_norm": 1.6824818849563599, - "learning_rate": 5.105025125628141e-05, - "loss": 5.2232, - "step": 49223 - }, - { - "epoch": 25.670925684485006, - "grad_norm": 1.5213370323181152, - "learning_rate": 5.104924623115578e-05, - "loss": 5.1193, - "step": 49224 - }, - { - "epoch": 25.671447196870925, - "grad_norm": 1.5512503385543823, - "learning_rate": 5.1048241206030155e-05, - "loss": 4.8926, - "step": 49225 - }, - { - "epoch": 25.671968709256845, - "grad_norm": 1.5930274724960327, - "learning_rate": 5.1047236180904526e-05, - "loss": 5.1097, - "step": 49226 - }, - { - "epoch": 25.672490221642764, - "grad_norm": 1.6112669706344604, - "learning_rate": 5.1046231155778904e-05, - "loss": 5.1282, - "step": 49227 - }, - { - "epoch": 25.673011734028684, - "grad_norm": 1.6454752683639526, - "learning_rate": 5.104522613065327e-05, - "loss": 5.0288, - "step": 49228 - }, - { - "epoch": 25.673533246414603, - "grad_norm": 1.6691042184829712, - "learning_rate": 5.1044221105527646e-05, - "loss": 4.5209, - "step": 49229 - }, - { - "epoch": 25.674054758800523, - "grad_norm": 1.5245823860168457, - "learning_rate": 5.104321608040201e-05, - "loss": 5.488, - "step": 49230 - }, - { - "epoch": 25.674576271186442, - "grad_norm": 1.5637465715408325, - "learning_rate": 5.104221105527638e-05, - "loss": 5.309, - "step": 49231 - }, - { - "epoch": 25.675097783572358, - "grad_norm": 1.588206171989441, - "learning_rate": 5.104120603015076e-05, - "loss": 4.6802, - "step": 49232 - }, - { - "epoch": 25.675619295958278, - "grad_norm": 1.5431079864501953, - "learning_rate": 5.1040201005025124e-05, - "loss": 5.2082, - "step": 49233 - }, - { - "epoch": 25.676140808344197, - "grad_norm": 1.7240742444992065, - "learning_rate": 5.10391959798995e-05, - "loss": 4.9943, - "step": 49234 - }, - { - "epoch": 25.676662320730117, - "grad_norm": 1.576658844947815, - "learning_rate": 5.1038190954773866e-05, - "loss": 4.9839, - "step": 49235 - }, - { - "epoch": 25.677183833116036, - "grad_norm": 1.6010979413986206, - "learning_rate": 5.1037185929648244e-05, - "loss": 5.3563, - "step": 49236 - }, - { - "epoch": 25.677705345501955, - "grad_norm": 1.7214113473892212, - "learning_rate": 5.1036180904522615e-05, - "loss": 4.9439, - "step": 49237 - }, - { - "epoch": 25.678226857887875, - "grad_norm": 1.6144274473190308, - "learning_rate": 5.103517587939699e-05, - "loss": 5.1714, - "step": 49238 - }, - { - "epoch": 25.678748370273794, - "grad_norm": 1.608327031135559, - "learning_rate": 5.103417085427136e-05, - "loss": 5.3308, - "step": 49239 - }, - { - "epoch": 25.679269882659714, - "grad_norm": 1.7126433849334717, - "learning_rate": 5.1033165829145735e-05, - "loss": 5.3201, - "step": 49240 - }, - { - "epoch": 25.679791395045633, - "grad_norm": 1.636181116104126, - "learning_rate": 5.10321608040201e-05, - "loss": 5.397, - "step": 49241 - }, - { - "epoch": 25.680312907431553, - "grad_norm": 1.6679613590240479, - "learning_rate": 5.103115577889448e-05, - "loss": 5.5405, - "step": 49242 - }, - { - "epoch": 25.680834419817472, - "grad_norm": 1.5550286769866943, - "learning_rate": 5.103015075376885e-05, - "loss": 5.0277, - "step": 49243 - }, - { - "epoch": 25.681355932203388, - "grad_norm": 1.411195993423462, - "learning_rate": 5.1029145728643226e-05, - "loss": 5.2612, - "step": 49244 - }, - { - "epoch": 25.681877444589308, - "grad_norm": 1.5721213817596436, - "learning_rate": 5.102814070351759e-05, - "loss": 5.5007, - "step": 49245 - }, - { - "epoch": 25.682398956975227, - "grad_norm": 1.6363822221755981, - "learning_rate": 5.1027135678391954e-05, - "loss": 5.1863, - "step": 49246 - }, - { - "epoch": 25.682920469361147, - "grad_norm": 1.6044268608093262, - "learning_rate": 5.102613065326633e-05, - "loss": 5.3254, - "step": 49247 - }, - { - "epoch": 25.683441981747066, - "grad_norm": 1.667681097984314, - "learning_rate": 5.10251256281407e-05, - "loss": 5.1161, - "step": 49248 - }, - { - "epoch": 25.683963494132986, - "grad_norm": 1.6071640253067017, - "learning_rate": 5.102412060301508e-05, - "loss": 5.3012, - "step": 49249 - }, - { - "epoch": 25.684485006518905, - "grad_norm": 1.5835165977478027, - "learning_rate": 5.1023115577889445e-05, - "loss": 5.187, - "step": 49250 - }, - { - "epoch": 25.685006518904824, - "grad_norm": 1.6036936044692993, - "learning_rate": 5.102211055276382e-05, - "loss": 5.2875, - "step": 49251 - }, - { - "epoch": 25.685528031290744, - "grad_norm": 1.6675140857696533, - "learning_rate": 5.1021105527638194e-05, - "loss": 5.0991, - "step": 49252 - }, - { - "epoch": 25.686049543676663, - "grad_norm": 1.5969549417495728, - "learning_rate": 5.102010050251257e-05, - "loss": 5.628, - "step": 49253 - }, - { - "epoch": 25.686571056062583, - "grad_norm": 1.5670088529586792, - "learning_rate": 5.1019095477386936e-05, - "loss": 5.4903, - "step": 49254 - }, - { - "epoch": 25.687092568448502, - "grad_norm": 1.6088773012161255, - "learning_rate": 5.1018090452261314e-05, - "loss": 5.344, - "step": 49255 - }, - { - "epoch": 25.687614080834418, - "grad_norm": 1.523258924484253, - "learning_rate": 5.101708542713568e-05, - "loss": 4.9579, - "step": 49256 - }, - { - "epoch": 25.688135593220338, - "grad_norm": 1.677573323249817, - "learning_rate": 5.1016080402010056e-05, - "loss": 5.1265, - "step": 49257 - }, - { - "epoch": 25.688657105606257, - "grad_norm": 1.619982123374939, - "learning_rate": 5.101507537688443e-05, - "loss": 5.2179, - "step": 49258 - }, - { - "epoch": 25.689178617992177, - "grad_norm": 1.6546483039855957, - "learning_rate": 5.101407035175879e-05, - "loss": 4.5367, - "step": 49259 - }, - { - "epoch": 25.689700130378096, - "grad_norm": 1.5715004205703735, - "learning_rate": 5.101306532663317e-05, - "loss": 5.1798, - "step": 49260 - }, - { - "epoch": 25.690221642764016, - "grad_norm": 1.6097177267074585, - "learning_rate": 5.1012060301507533e-05, - "loss": 5.3305, - "step": 49261 - }, - { - "epoch": 25.690743155149935, - "grad_norm": 1.515420913696289, - "learning_rate": 5.101105527638191e-05, - "loss": 5.3069, - "step": 49262 - }, - { - "epoch": 25.691264667535854, - "grad_norm": 1.5719488859176636, - "learning_rate": 5.101005025125628e-05, - "loss": 5.5418, - "step": 49263 - }, - { - "epoch": 25.691786179921774, - "grad_norm": 1.5730029344558716, - "learning_rate": 5.100904522613066e-05, - "loss": 5.2989, - "step": 49264 - }, - { - "epoch": 25.692307692307693, - "grad_norm": 1.6662899255752563, - "learning_rate": 5.1008040201005024e-05, - "loss": 4.9136, - "step": 49265 - }, - { - "epoch": 25.692829204693613, - "grad_norm": 1.5487481355667114, - "learning_rate": 5.10070351758794e-05, - "loss": 5.2022, - "step": 49266 - }, - { - "epoch": 25.693350717079532, - "grad_norm": 1.5404555797576904, - "learning_rate": 5.1006030150753767e-05, - "loss": 4.4817, - "step": 49267 - }, - { - "epoch": 25.69387222946545, - "grad_norm": 1.6130956411361694, - "learning_rate": 5.1005025125628144e-05, - "loss": 5.3488, - "step": 49268 - }, - { - "epoch": 25.694393741851368, - "grad_norm": 1.6026216745376587, - "learning_rate": 5.1004020100502515e-05, - "loss": 4.6538, - "step": 49269 - }, - { - "epoch": 25.694915254237287, - "grad_norm": 1.5152256488800049, - "learning_rate": 5.100301507537689e-05, - "loss": 5.4396, - "step": 49270 - }, - { - "epoch": 25.695436766623207, - "grad_norm": 1.5680166482925415, - "learning_rate": 5.100201005025126e-05, - "loss": 5.2168, - "step": 49271 - }, - { - "epoch": 25.695958279009126, - "grad_norm": 1.552886962890625, - "learning_rate": 5.100100502512563e-05, - "loss": 5.3511, - "step": 49272 - }, - { - "epoch": 25.696479791395046, - "grad_norm": 1.5223517417907715, - "learning_rate": 5.1000000000000006e-05, - "loss": 5.2843, - "step": 49273 - }, - { - "epoch": 25.697001303780965, - "grad_norm": 1.5725103616714478, - "learning_rate": 5.099899497487437e-05, - "loss": 4.9953, - "step": 49274 - }, - { - "epoch": 25.697522816166884, - "grad_norm": 1.5718297958374023, - "learning_rate": 5.099798994974875e-05, - "loss": 4.644, - "step": 49275 - }, - { - "epoch": 25.698044328552804, - "grad_norm": 1.5226248502731323, - "learning_rate": 5.099698492462311e-05, - "loss": 5.3806, - "step": 49276 - }, - { - "epoch": 25.698565840938723, - "grad_norm": 1.4396837949752808, - "learning_rate": 5.099597989949749e-05, - "loss": 5.3608, - "step": 49277 - }, - { - "epoch": 25.699087353324643, - "grad_norm": 1.6610798835754395, - "learning_rate": 5.099497487437186e-05, - "loss": 4.7569, - "step": 49278 - }, - { - "epoch": 25.69960886571056, - "grad_norm": 1.5905038118362427, - "learning_rate": 5.099396984924624e-05, - "loss": 4.7241, - "step": 49279 - }, - { - "epoch": 25.70013037809648, - "grad_norm": 1.6128963232040405, - "learning_rate": 5.0992964824120604e-05, - "loss": 5.1978, - "step": 49280 - }, - { - "epoch": 25.700651890482398, - "grad_norm": 1.58334481716156, - "learning_rate": 5.099195979899498e-05, - "loss": 4.8412, - "step": 49281 - }, - { - "epoch": 25.701173402868317, - "grad_norm": 1.5157861709594727, - "learning_rate": 5.0990954773869346e-05, - "loss": 5.6653, - "step": 49282 - }, - { - "epoch": 25.701694915254237, - "grad_norm": 1.5605593919754028, - "learning_rate": 5.0989949748743724e-05, - "loss": 4.7357, - "step": 49283 - }, - { - "epoch": 25.702216427640156, - "grad_norm": 1.4497307538986206, - "learning_rate": 5.0988944723618095e-05, - "loss": 5.7749, - "step": 49284 - }, - { - "epoch": 25.702737940026076, - "grad_norm": 1.531130313873291, - "learning_rate": 5.098793969849246e-05, - "loss": 5.1743, - "step": 49285 - }, - { - "epoch": 25.703259452411995, - "grad_norm": 1.5210529565811157, - "learning_rate": 5.098693467336684e-05, - "loss": 4.9848, - "step": 49286 - }, - { - "epoch": 25.703780964797915, - "grad_norm": 1.5862457752227783, - "learning_rate": 5.09859296482412e-05, - "loss": 5.0022, - "step": 49287 - }, - { - "epoch": 25.704302477183834, - "grad_norm": 1.5462348461151123, - "learning_rate": 5.098492462311558e-05, - "loss": 5.583, - "step": 49288 - }, - { - "epoch": 25.704823989569753, - "grad_norm": 1.523478627204895, - "learning_rate": 5.098391959798995e-05, - "loss": 5.0313, - "step": 49289 - }, - { - "epoch": 25.705345501955673, - "grad_norm": 1.4469002485275269, - "learning_rate": 5.098291457286433e-05, - "loss": 5.3035, - "step": 49290 - }, - { - "epoch": 25.705867014341592, - "grad_norm": 1.4993956089019775, - "learning_rate": 5.098190954773869e-05, - "loss": 5.1864, - "step": 49291 - }, - { - "epoch": 25.70638852672751, - "grad_norm": 1.6559021472930908, - "learning_rate": 5.098090452261307e-05, - "loss": 5.181, - "step": 49292 - }, - { - "epoch": 25.706910039113428, - "grad_norm": 1.5400997400283813, - "learning_rate": 5.097989949748744e-05, - "loss": 5.3143, - "step": 49293 - }, - { - "epoch": 25.707431551499347, - "grad_norm": 1.551695704460144, - "learning_rate": 5.097889447236182e-05, - "loss": 5.2553, - "step": 49294 - }, - { - "epoch": 25.707953063885267, - "grad_norm": 1.7338887453079224, - "learning_rate": 5.097788944723618e-05, - "loss": 4.8879, - "step": 49295 - }, - { - "epoch": 25.708474576271186, - "grad_norm": 1.631761074066162, - "learning_rate": 5.097688442211056e-05, - "loss": 5.3042, - "step": 49296 - }, - { - "epoch": 25.708996088657106, - "grad_norm": 1.5082178115844727, - "learning_rate": 5.0975879396984925e-05, - "loss": 5.3269, - "step": 49297 - }, - { - "epoch": 25.709517601043025, - "grad_norm": 1.5709047317504883, - "learning_rate": 5.0974874371859296e-05, - "loss": 5.0901, - "step": 49298 - }, - { - "epoch": 25.710039113428945, - "grad_norm": 1.5274094343185425, - "learning_rate": 5.0973869346733674e-05, - "loss": 5.1424, - "step": 49299 - }, - { - "epoch": 25.710560625814864, - "grad_norm": 1.521639108657837, - "learning_rate": 5.097286432160804e-05, - "loss": 5.1964, - "step": 49300 - }, - { - "epoch": 25.711082138200783, - "grad_norm": 1.6231335401535034, - "learning_rate": 5.0971859296482416e-05, - "loss": 5.1485, - "step": 49301 - }, - { - "epoch": 25.711603650586703, - "grad_norm": 1.670088291168213, - "learning_rate": 5.097085427135678e-05, - "loss": 4.681, - "step": 49302 - }, - { - "epoch": 25.71212516297262, - "grad_norm": 1.6490533351898193, - "learning_rate": 5.096984924623116e-05, - "loss": 5.0684, - "step": 49303 - }, - { - "epoch": 25.71264667535854, - "grad_norm": 1.5283091068267822, - "learning_rate": 5.096884422110553e-05, - "loss": 4.6461, - "step": 49304 - }, - { - "epoch": 25.713168187744458, - "grad_norm": 1.5674279928207397, - "learning_rate": 5.096783919597991e-05, - "loss": 5.1307, - "step": 49305 - }, - { - "epoch": 25.713689700130377, - "grad_norm": 1.5542749166488647, - "learning_rate": 5.096683417085427e-05, - "loss": 5.5638, - "step": 49306 - }, - { - "epoch": 25.714211212516297, - "grad_norm": 1.4839057922363281, - "learning_rate": 5.096582914572865e-05, - "loss": 4.7605, - "step": 49307 - }, - { - "epoch": 25.714732724902216, - "grad_norm": 1.5267364978790283, - "learning_rate": 5.0964824120603014e-05, - "loss": 5.4467, - "step": 49308 - }, - { - "epoch": 25.715254237288136, - "grad_norm": 1.527262568473816, - "learning_rate": 5.096381909547739e-05, - "loss": 5.5758, - "step": 49309 - }, - { - "epoch": 25.715775749674055, - "grad_norm": 1.6077589988708496, - "learning_rate": 5.096281407035176e-05, - "loss": 5.3501, - "step": 49310 - }, - { - "epoch": 25.716297262059975, - "grad_norm": 1.5534435510635376, - "learning_rate": 5.096180904522613e-05, - "loss": 5.0382, - "step": 49311 - }, - { - "epoch": 25.716818774445894, - "grad_norm": 1.5131340026855469, - "learning_rate": 5.0960804020100504e-05, - "loss": 5.4865, - "step": 49312 - }, - { - "epoch": 25.717340286831814, - "grad_norm": 1.4838604927062988, - "learning_rate": 5.0959798994974876e-05, - "loss": 5.6405, - "step": 49313 - }, - { - "epoch": 25.717861799217733, - "grad_norm": 1.5507867336273193, - "learning_rate": 5.095879396984925e-05, - "loss": 5.3115, - "step": 49314 - }, - { - "epoch": 25.71838331160365, - "grad_norm": 1.541771411895752, - "learning_rate": 5.095778894472362e-05, - "loss": 5.7158, - "step": 49315 - }, - { - "epoch": 25.71890482398957, - "grad_norm": 1.6661646366119385, - "learning_rate": 5.0956783919597995e-05, - "loss": 4.4535, - "step": 49316 - }, - { - "epoch": 25.719426336375488, - "grad_norm": 1.5948138236999512, - "learning_rate": 5.095577889447236e-05, - "loss": 5.1337, - "step": 49317 - }, - { - "epoch": 25.719947848761407, - "grad_norm": 1.5485633611679077, - "learning_rate": 5.095477386934674e-05, - "loss": 4.8621, - "step": 49318 - }, - { - "epoch": 25.720469361147327, - "grad_norm": 1.4856048822402954, - "learning_rate": 5.095376884422111e-05, - "loss": 5.4332, - "step": 49319 - }, - { - "epoch": 25.720990873533246, - "grad_norm": 1.6084976196289062, - "learning_rate": 5.0952763819095486e-05, - "loss": 5.257, - "step": 49320 - }, - { - "epoch": 25.721512385919166, - "grad_norm": 1.6129834651947021, - "learning_rate": 5.095175879396985e-05, - "loss": 5.2827, - "step": 49321 - }, - { - "epoch": 25.722033898305085, - "grad_norm": 1.510401725769043, - "learning_rate": 5.095075376884423e-05, - "loss": 5.5475, - "step": 49322 - }, - { - "epoch": 25.722555410691005, - "grad_norm": 1.5679363012313843, - "learning_rate": 5.094974874371859e-05, - "loss": 5.511, - "step": 49323 - }, - { - "epoch": 25.723076923076924, - "grad_norm": 1.4712754487991333, - "learning_rate": 5.094874371859297e-05, - "loss": 5.1872, - "step": 49324 - }, - { - "epoch": 25.723598435462844, - "grad_norm": 1.581040859222412, - "learning_rate": 5.094773869346734e-05, - "loss": 5.4413, - "step": 49325 - }, - { - "epoch": 25.724119947848763, - "grad_norm": 1.5893595218658447, - "learning_rate": 5.0946733668341706e-05, - "loss": 5.3012, - "step": 49326 - }, - { - "epoch": 25.72464146023468, - "grad_norm": 1.5263031721115112, - "learning_rate": 5.0945728643216084e-05, - "loss": 5.4571, - "step": 49327 - }, - { - "epoch": 25.7251629726206, - "grad_norm": 1.4815030097961426, - "learning_rate": 5.094472361809045e-05, - "loss": 5.1848, - "step": 49328 - }, - { - "epoch": 25.725684485006518, - "grad_norm": 1.5153950452804565, - "learning_rate": 5.0943718592964826e-05, - "loss": 5.6504, - "step": 49329 - }, - { - "epoch": 25.726205997392437, - "grad_norm": 1.6310980319976807, - "learning_rate": 5.09427135678392e-05, - "loss": 5.3064, - "step": 49330 - }, - { - "epoch": 25.726727509778357, - "grad_norm": 1.5234180688858032, - "learning_rate": 5.0941708542713575e-05, - "loss": 5.4569, - "step": 49331 - }, - { - "epoch": 25.727249022164276, - "grad_norm": 1.5557876825332642, - "learning_rate": 5.094070351758794e-05, - "loss": 5.5143, - "step": 49332 - }, - { - "epoch": 25.727770534550196, - "grad_norm": 1.514122486114502, - "learning_rate": 5.093969849246232e-05, - "loss": 4.9582, - "step": 49333 - }, - { - "epoch": 25.728292046936115, - "grad_norm": 1.5339465141296387, - "learning_rate": 5.093869346733668e-05, - "loss": 5.192, - "step": 49334 - }, - { - "epoch": 25.728813559322035, - "grad_norm": 1.6598092317581177, - "learning_rate": 5.093768844221106e-05, - "loss": 4.3742, - "step": 49335 - }, - { - "epoch": 25.729335071707954, - "grad_norm": 1.528059959411621, - "learning_rate": 5.093668341708543e-05, - "loss": 5.2403, - "step": 49336 - }, - { - "epoch": 25.729856584093874, - "grad_norm": 1.552580714225769, - "learning_rate": 5.093567839195981e-05, - "loss": 5.5117, - "step": 49337 - }, - { - "epoch": 25.730378096479793, - "grad_norm": 1.5830333232879639, - "learning_rate": 5.093467336683417e-05, - "loss": 5.3754, - "step": 49338 - }, - { - "epoch": 25.73089960886571, - "grad_norm": 1.5579842329025269, - "learning_rate": 5.093366834170854e-05, - "loss": 5.4018, - "step": 49339 - }, - { - "epoch": 25.73142112125163, - "grad_norm": 1.5273287296295166, - "learning_rate": 5.093266331658292e-05, - "loss": 5.4341, - "step": 49340 - }, - { - "epoch": 25.731942633637548, - "grad_norm": 1.565125584602356, - "learning_rate": 5.0931658291457285e-05, - "loss": 4.9918, - "step": 49341 - }, - { - "epoch": 25.732464146023467, - "grad_norm": 1.5764261484146118, - "learning_rate": 5.093065326633166e-05, - "loss": 5.2748, - "step": 49342 - }, - { - "epoch": 25.732985658409387, - "grad_norm": 1.723837971687317, - "learning_rate": 5.092964824120603e-05, - "loss": 5.5267, - "step": 49343 - }, - { - "epoch": 25.733507170795306, - "grad_norm": 1.5930277109146118, - "learning_rate": 5.0928643216080405e-05, - "loss": 5.1642, - "step": 49344 - }, - { - "epoch": 25.734028683181226, - "grad_norm": 1.5656791925430298, - "learning_rate": 5.0927638190954776e-05, - "loss": 5.4041, - "step": 49345 - }, - { - "epoch": 25.734550195567145, - "grad_norm": 1.5205588340759277, - "learning_rate": 5.0926633165829154e-05, - "loss": 5.2768, - "step": 49346 - }, - { - "epoch": 25.735071707953065, - "grad_norm": 1.5250924825668335, - "learning_rate": 5.092562814070352e-05, - "loss": 5.5712, - "step": 49347 - }, - { - "epoch": 25.735593220338984, - "grad_norm": 1.518324613571167, - "learning_rate": 5.0924623115577896e-05, - "loss": 4.9638, - "step": 49348 - }, - { - "epoch": 25.736114732724904, - "grad_norm": 1.685738444328308, - "learning_rate": 5.092361809045226e-05, - "loss": 5.2608, - "step": 49349 - }, - { - "epoch": 25.736636245110823, - "grad_norm": 1.5388789176940918, - "learning_rate": 5.092261306532664e-05, - "loss": 5.3668, - "step": 49350 - }, - { - "epoch": 25.73715775749674, - "grad_norm": 1.4986523389816284, - "learning_rate": 5.092160804020101e-05, - "loss": 5.5643, - "step": 49351 - }, - { - "epoch": 25.73767926988266, - "grad_norm": 1.687217116355896, - "learning_rate": 5.0920603015075374e-05, - "loss": 5.1522, - "step": 49352 - }, - { - "epoch": 25.738200782268578, - "grad_norm": 1.6430494785308838, - "learning_rate": 5.091959798994975e-05, - "loss": 5.1228, - "step": 49353 - }, - { - "epoch": 25.738722294654497, - "grad_norm": 1.6552033424377441, - "learning_rate": 5.0918592964824116e-05, - "loss": 5.2693, - "step": 49354 - }, - { - "epoch": 25.739243807040417, - "grad_norm": 1.5962262153625488, - "learning_rate": 5.0917587939698494e-05, - "loss": 5.4971, - "step": 49355 - }, - { - "epoch": 25.739765319426336, - "grad_norm": 1.607325792312622, - "learning_rate": 5.0916582914572865e-05, - "loss": 5.137, - "step": 49356 - }, - { - "epoch": 25.740286831812256, - "grad_norm": 1.5405610799789429, - "learning_rate": 5.091557788944724e-05, - "loss": 5.4557, - "step": 49357 - }, - { - "epoch": 25.740808344198175, - "grad_norm": 1.6379045248031616, - "learning_rate": 5.091457286432161e-05, - "loss": 3.7919, - "step": 49358 - }, - { - "epoch": 25.741329856584095, - "grad_norm": 1.7428029775619507, - "learning_rate": 5.0913567839195985e-05, - "loss": 4.7714, - "step": 49359 - }, - { - "epoch": 25.741851368970014, - "grad_norm": 1.6489003896713257, - "learning_rate": 5.0912562814070356e-05, - "loss": 5.1461, - "step": 49360 - }, - { - "epoch": 25.742372881355934, - "grad_norm": 1.676252841949463, - "learning_rate": 5.0911557788944733e-05, - "loss": 4.8913, - "step": 49361 - }, - { - "epoch": 25.742894393741853, - "grad_norm": 1.6986085176467896, - "learning_rate": 5.09105527638191e-05, - "loss": 4.8659, - "step": 49362 - }, - { - "epoch": 25.74341590612777, - "grad_norm": 1.727142333984375, - "learning_rate": 5.0909547738693476e-05, - "loss": 5.1141, - "step": 49363 - }, - { - "epoch": 25.74393741851369, - "grad_norm": 1.5111390352249146, - "learning_rate": 5.090854271356784e-05, - "loss": 5.5886, - "step": 49364 - }, - { - "epoch": 25.744458930899608, - "grad_norm": 1.5994421243667603, - "learning_rate": 5.090753768844221e-05, - "loss": 5.2149, - "step": 49365 - }, - { - "epoch": 25.744980443285527, - "grad_norm": 1.60080885887146, - "learning_rate": 5.090653266331659e-05, - "loss": 4.8966, - "step": 49366 - }, - { - "epoch": 25.745501955671447, - "grad_norm": 1.5894638299942017, - "learning_rate": 5.090552763819095e-05, - "loss": 5.0134, - "step": 49367 - }, - { - "epoch": 25.746023468057366, - "grad_norm": 1.634663701057434, - "learning_rate": 5.090452261306533e-05, - "loss": 5.0327, - "step": 49368 - }, - { - "epoch": 25.746544980443286, - "grad_norm": 1.5257906913757324, - "learning_rate": 5.0903517587939695e-05, - "loss": 5.4684, - "step": 49369 - }, - { - "epoch": 25.747066492829205, - "grad_norm": 1.4024096727371216, - "learning_rate": 5.090251256281407e-05, - "loss": 5.702, - "step": 49370 - }, - { - "epoch": 25.747588005215125, - "grad_norm": 1.5542651414871216, - "learning_rate": 5.0901507537688444e-05, - "loss": 5.4471, - "step": 49371 - }, - { - "epoch": 25.748109517601044, - "grad_norm": 1.5993911027908325, - "learning_rate": 5.090050251256282e-05, - "loss": 5.2184, - "step": 49372 - }, - { - "epoch": 25.748631029986964, - "grad_norm": 1.7226794958114624, - "learning_rate": 5.0899497487437186e-05, - "loss": 4.8263, - "step": 49373 - }, - { - "epoch": 25.749152542372883, - "grad_norm": 1.4643621444702148, - "learning_rate": 5.0898492462311564e-05, - "loss": 4.6592, - "step": 49374 - }, - { - "epoch": 25.7496740547588, - "grad_norm": 1.573024868965149, - "learning_rate": 5.089748743718593e-05, - "loss": 5.2439, - "step": 49375 - }, - { - "epoch": 25.75019556714472, - "grad_norm": 1.5033069849014282, - "learning_rate": 5.0896482412060306e-05, - "loss": 5.2823, - "step": 49376 - }, - { - "epoch": 25.750717079530638, - "grad_norm": 1.782434105873108, - "learning_rate": 5.089547738693468e-05, - "loss": 5.0652, - "step": 49377 - }, - { - "epoch": 25.751238591916557, - "grad_norm": 1.5558956861495972, - "learning_rate": 5.089447236180904e-05, - "loss": 5.077, - "step": 49378 - }, - { - "epoch": 25.751760104302477, - "grad_norm": 1.5077533721923828, - "learning_rate": 5.089346733668342e-05, - "loss": 5.8387, - "step": 49379 - }, - { - "epoch": 25.752281616688396, - "grad_norm": 1.5379624366760254, - "learning_rate": 5.089246231155779e-05, - "loss": 5.4914, - "step": 49380 - }, - { - "epoch": 25.752803129074316, - "grad_norm": 1.5976415872573853, - "learning_rate": 5.089145728643217e-05, - "loss": 5.0191, - "step": 49381 - }, - { - "epoch": 25.753324641460235, - "grad_norm": 1.7827743291854858, - "learning_rate": 5.089045226130653e-05, - "loss": 4.6834, - "step": 49382 - }, - { - "epoch": 25.753846153846155, - "grad_norm": 1.5357930660247803, - "learning_rate": 5.088944723618091e-05, - "loss": 5.3996, - "step": 49383 - }, - { - "epoch": 25.754367666232074, - "grad_norm": 1.4840511083602905, - "learning_rate": 5.0888442211055274e-05, - "loss": 5.1559, - "step": 49384 - }, - { - "epoch": 25.754889178617994, - "grad_norm": 1.6122114658355713, - "learning_rate": 5.088743718592965e-05, - "loss": 5.2721, - "step": 49385 - }, - { - "epoch": 25.75541069100391, - "grad_norm": 1.7136075496673584, - "learning_rate": 5.088643216080402e-05, - "loss": 5.0306, - "step": 49386 - }, - { - "epoch": 25.75593220338983, - "grad_norm": 1.6198806762695312, - "learning_rate": 5.08854271356784e-05, - "loss": 4.4853, - "step": 49387 - }, - { - "epoch": 25.75645371577575, - "grad_norm": 1.5914064645767212, - "learning_rate": 5.0884422110552765e-05, - "loss": 5.349, - "step": 49388 - }, - { - "epoch": 25.756975228161668, - "grad_norm": 1.6038780212402344, - "learning_rate": 5.088341708542714e-05, - "loss": 5.6565, - "step": 49389 - }, - { - "epoch": 25.757496740547587, - "grad_norm": 1.5908387899398804, - "learning_rate": 5.088241206030151e-05, - "loss": 5.5372, - "step": 49390 - }, - { - "epoch": 25.758018252933507, - "grad_norm": 1.495862364768982, - "learning_rate": 5.088140703517588e-05, - "loss": 5.2625, - "step": 49391 - }, - { - "epoch": 25.758539765319426, - "grad_norm": 1.5612355470657349, - "learning_rate": 5.0880402010050256e-05, - "loss": 5.1864, - "step": 49392 - }, - { - "epoch": 25.759061277705346, - "grad_norm": 1.7252806425094604, - "learning_rate": 5.087939698492462e-05, - "loss": 5.2858, - "step": 49393 - }, - { - "epoch": 25.759582790091265, - "grad_norm": 1.6472508907318115, - "learning_rate": 5.0878391959799e-05, - "loss": 5.2819, - "step": 49394 - }, - { - "epoch": 25.760104302477185, - "grad_norm": 1.55177640914917, - "learning_rate": 5.087738693467336e-05, - "loss": 5.2607, - "step": 49395 - }, - { - "epoch": 25.760625814863104, - "grad_norm": 1.6208715438842773, - "learning_rate": 5.087638190954774e-05, - "loss": 5.5314, - "step": 49396 - }, - { - "epoch": 25.761147327249024, - "grad_norm": 1.532757043838501, - "learning_rate": 5.087537688442211e-05, - "loss": 5.5742, - "step": 49397 - }, - { - "epoch": 25.761668839634943, - "grad_norm": 1.6950056552886963, - "learning_rate": 5.087437185929649e-05, - "loss": 4.7683, - "step": 49398 - }, - { - "epoch": 25.76219035202086, - "grad_norm": 1.5801416635513306, - "learning_rate": 5.0873366834170854e-05, - "loss": 5.1743, - "step": 49399 - }, - { - "epoch": 25.76271186440678, - "grad_norm": 1.553388237953186, - "learning_rate": 5.087236180904523e-05, - "loss": 5.2229, - "step": 49400 - }, - { - "epoch": 25.763233376792698, - "grad_norm": 1.6794772148132324, - "learning_rate": 5.08713567839196e-05, - "loss": 4.9229, - "step": 49401 - }, - { - "epoch": 25.763754889178617, - "grad_norm": 1.532986044883728, - "learning_rate": 5.087035175879398e-05, - "loss": 5.2419, - "step": 49402 - }, - { - "epoch": 25.764276401564537, - "grad_norm": 1.5179522037506104, - "learning_rate": 5.0869346733668345e-05, - "loss": 5.4521, - "step": 49403 - }, - { - "epoch": 25.764797913950456, - "grad_norm": 1.5609605312347412, - "learning_rate": 5.086834170854271e-05, - "loss": 5.3162, - "step": 49404 - }, - { - "epoch": 25.765319426336376, - "grad_norm": 1.583979606628418, - "learning_rate": 5.086733668341709e-05, - "loss": 5.3165, - "step": 49405 - }, - { - "epoch": 25.765840938722295, - "grad_norm": 1.5300724506378174, - "learning_rate": 5.086633165829146e-05, - "loss": 5.0535, - "step": 49406 - }, - { - "epoch": 25.766362451108215, - "grad_norm": 1.5345032215118408, - "learning_rate": 5.0865326633165836e-05, - "loss": 5.3729, - "step": 49407 - }, - { - "epoch": 25.766883963494134, - "grad_norm": 1.46602201461792, - "learning_rate": 5.08643216080402e-05, - "loss": 5.4618, - "step": 49408 - }, - { - "epoch": 25.767405475880054, - "grad_norm": 1.5750476121902466, - "learning_rate": 5.086331658291458e-05, - "loss": 4.7539, - "step": 49409 - }, - { - "epoch": 25.76792698826597, - "grad_norm": 1.50304114818573, - "learning_rate": 5.086231155778894e-05, - "loss": 5.1944, - "step": 49410 - }, - { - "epoch": 25.76844850065189, - "grad_norm": 1.5158883333206177, - "learning_rate": 5.086130653266332e-05, - "loss": 5.4726, - "step": 49411 - }, - { - "epoch": 25.76897001303781, - "grad_norm": 1.5553834438323975, - "learning_rate": 5.086030150753769e-05, - "loss": 5.383, - "step": 49412 - }, - { - "epoch": 25.769491525423728, - "grad_norm": 1.523966908454895, - "learning_rate": 5.085929648241207e-05, - "loss": 5.3523, - "step": 49413 - }, - { - "epoch": 25.770013037809647, - "grad_norm": 1.6457067728042603, - "learning_rate": 5.085829145728643e-05, - "loss": 5.3854, - "step": 49414 - }, - { - "epoch": 25.770534550195567, - "grad_norm": 1.6466647386550903, - "learning_rate": 5.085728643216081e-05, - "loss": 5.3926, - "step": 49415 - }, - { - "epoch": 25.771056062581486, - "grad_norm": 1.518704891204834, - "learning_rate": 5.0856281407035175e-05, - "loss": 5.1084, - "step": 49416 - }, - { - "epoch": 25.771577574967406, - "grad_norm": 1.682518482208252, - "learning_rate": 5.085527638190955e-05, - "loss": 4.8017, - "step": 49417 - }, - { - "epoch": 25.772099087353325, - "grad_norm": 1.5554362535476685, - "learning_rate": 5.0854271356783924e-05, - "loss": 5.4385, - "step": 49418 - }, - { - "epoch": 25.772620599739245, - "grad_norm": 1.6435552835464478, - "learning_rate": 5.085326633165829e-05, - "loss": 4.4241, - "step": 49419 - }, - { - "epoch": 25.773142112125164, - "grad_norm": 1.587552547454834, - "learning_rate": 5.0852261306532666e-05, - "loss": 5.6095, - "step": 49420 - }, - { - "epoch": 25.773663624511084, - "grad_norm": 1.6835442781448364, - "learning_rate": 5.085125628140703e-05, - "loss": 5.2674, - "step": 49421 - }, - { - "epoch": 25.774185136897, - "grad_norm": 1.6397761106491089, - "learning_rate": 5.085025125628141e-05, - "loss": 5.3686, - "step": 49422 - }, - { - "epoch": 25.77470664928292, - "grad_norm": 1.5409375429153442, - "learning_rate": 5.084924623115578e-05, - "loss": 5.4077, - "step": 49423 - }, - { - "epoch": 25.77522816166884, - "grad_norm": 1.6750065088272095, - "learning_rate": 5.084824120603016e-05, - "loss": 5.0674, - "step": 49424 - }, - { - "epoch": 25.775749674054758, - "grad_norm": 1.5270531177520752, - "learning_rate": 5.084723618090452e-05, - "loss": 5.1853, - "step": 49425 - }, - { - "epoch": 25.776271186440677, - "grad_norm": 1.5520479679107666, - "learning_rate": 5.08462311557789e-05, - "loss": 5.2444, - "step": 49426 - }, - { - "epoch": 25.776792698826597, - "grad_norm": 1.4951086044311523, - "learning_rate": 5.084522613065327e-05, - "loss": 5.3754, - "step": 49427 - }, - { - "epoch": 25.777314211212516, - "grad_norm": 1.6303112506866455, - "learning_rate": 5.084422110552765e-05, - "loss": 5.1761, - "step": 49428 - }, - { - "epoch": 25.777835723598436, - "grad_norm": 1.4683022499084473, - "learning_rate": 5.084321608040201e-05, - "loss": 5.3622, - "step": 49429 - }, - { - "epoch": 25.778357235984355, - "grad_norm": 1.515688419342041, - "learning_rate": 5.084221105527639e-05, - "loss": 5.0638, - "step": 49430 - }, - { - "epoch": 25.778878748370275, - "grad_norm": 1.6373214721679688, - "learning_rate": 5.0841206030150755e-05, - "loss": 4.8379, - "step": 49431 - }, - { - "epoch": 25.779400260756194, - "grad_norm": 1.5292974710464478, - "learning_rate": 5.0840201005025126e-05, - "loss": 5.3852, - "step": 49432 - }, - { - "epoch": 25.779921773142114, - "grad_norm": 1.5218408107757568, - "learning_rate": 5.0839195979899503e-05, - "loss": 5.6581, - "step": 49433 - }, - { - "epoch": 25.78044328552803, - "grad_norm": 1.6062564849853516, - "learning_rate": 5.083819095477387e-05, - "loss": 5.5268, - "step": 49434 - }, - { - "epoch": 25.78096479791395, - "grad_norm": 1.570929765701294, - "learning_rate": 5.0837185929648245e-05, - "loss": 4.6978, - "step": 49435 - }, - { - "epoch": 25.78148631029987, - "grad_norm": 1.5691919326782227, - "learning_rate": 5.083618090452261e-05, - "loss": 4.9259, - "step": 49436 - }, - { - "epoch": 25.782007822685788, - "grad_norm": 1.4936182498931885, - "learning_rate": 5.083517587939699e-05, - "loss": 4.8499, - "step": 49437 - }, - { - "epoch": 25.782529335071708, - "grad_norm": 1.7058517932891846, - "learning_rate": 5.083417085427136e-05, - "loss": 4.7133, - "step": 49438 - }, - { - "epoch": 25.783050847457627, - "grad_norm": 1.6431388854980469, - "learning_rate": 5.0833165829145736e-05, - "loss": 4.9613, - "step": 49439 - }, - { - "epoch": 25.783572359843546, - "grad_norm": 1.534572720527649, - "learning_rate": 5.08321608040201e-05, - "loss": 5.5145, - "step": 49440 - }, - { - "epoch": 25.784093872229466, - "grad_norm": 1.6392868757247925, - "learning_rate": 5.083115577889448e-05, - "loss": 5.3342, - "step": 49441 - }, - { - "epoch": 25.784615384615385, - "grad_norm": 1.4516806602478027, - "learning_rate": 5.083015075376884e-05, - "loss": 5.1834, - "step": 49442 - }, - { - "epoch": 25.785136897001305, - "grad_norm": 1.6050617694854736, - "learning_rate": 5.082914572864322e-05, - "loss": 5.2445, - "step": 49443 - }, - { - "epoch": 25.785658409387224, - "grad_norm": 1.630660057067871, - "learning_rate": 5.082814070351759e-05, - "loss": 5.2186, - "step": 49444 - }, - { - "epoch": 25.786179921773144, - "grad_norm": 1.5883435010910034, - "learning_rate": 5.0827135678391956e-05, - "loss": 5.4551, - "step": 49445 - }, - { - "epoch": 25.78670143415906, - "grad_norm": 1.6089329719543457, - "learning_rate": 5.0826130653266334e-05, - "loss": 5.1317, - "step": 49446 - }, - { - "epoch": 25.78722294654498, - "grad_norm": 1.5228828191757202, - "learning_rate": 5.0825125628140705e-05, - "loss": 5.1683, - "step": 49447 - }, - { - "epoch": 25.7877444589309, - "grad_norm": 1.561089038848877, - "learning_rate": 5.082412060301508e-05, - "loss": 4.3452, - "step": 49448 - }, - { - "epoch": 25.788265971316818, - "grad_norm": 1.614431619644165, - "learning_rate": 5.082311557788945e-05, - "loss": 4.9129, - "step": 49449 - }, - { - "epoch": 25.788787483702738, - "grad_norm": 1.6357392072677612, - "learning_rate": 5.0822110552763825e-05, - "loss": 4.9095, - "step": 49450 - }, - { - "epoch": 25.789308996088657, - "grad_norm": 1.6077940464019775, - "learning_rate": 5.082110552763819e-05, - "loss": 4.8337, - "step": 49451 - }, - { - "epoch": 25.789830508474576, - "grad_norm": 1.560876727104187, - "learning_rate": 5.082010050251257e-05, - "loss": 5.3804, - "step": 49452 - }, - { - "epoch": 25.790352020860496, - "grad_norm": 1.5736517906188965, - "learning_rate": 5.081909547738694e-05, - "loss": 5.3139, - "step": 49453 - }, - { - "epoch": 25.790873533246415, - "grad_norm": 1.4973363876342773, - "learning_rate": 5.0818090452261316e-05, - "loss": 5.2046, - "step": 49454 - }, - { - "epoch": 25.791395045632335, - "grad_norm": 1.6135526895523071, - "learning_rate": 5.081708542713568e-05, - "loss": 5.061, - "step": 49455 - }, - { - "epoch": 25.791916558018254, - "grad_norm": 1.437915325164795, - "learning_rate": 5.081608040201006e-05, - "loss": 5.4908, - "step": 49456 - }, - { - "epoch": 25.792438070404174, - "grad_norm": 1.6477243900299072, - "learning_rate": 5.081507537688442e-05, - "loss": 5.3331, - "step": 49457 - }, - { - "epoch": 25.79295958279009, - "grad_norm": 1.5716592073440552, - "learning_rate": 5.081407035175879e-05, - "loss": 5.3689, - "step": 49458 - }, - { - "epoch": 25.79348109517601, - "grad_norm": 1.5989402532577515, - "learning_rate": 5.081306532663317e-05, - "loss": 5.1507, - "step": 49459 - }, - { - "epoch": 25.79400260756193, - "grad_norm": 1.685147762298584, - "learning_rate": 5.0812060301507535e-05, - "loss": 5.3705, - "step": 49460 - }, - { - "epoch": 25.794524119947848, - "grad_norm": 1.5592409372329712, - "learning_rate": 5.081105527638191e-05, - "loss": 5.3501, - "step": 49461 - }, - { - "epoch": 25.795045632333768, - "grad_norm": 1.6644999980926514, - "learning_rate": 5.081005025125628e-05, - "loss": 4.8858, - "step": 49462 - }, - { - "epoch": 25.795567144719687, - "grad_norm": 1.5945544242858887, - "learning_rate": 5.0809045226130655e-05, - "loss": 4.9049, - "step": 49463 - }, - { - "epoch": 25.796088657105607, - "grad_norm": 1.6035552024841309, - "learning_rate": 5.0808040201005026e-05, - "loss": 5.0885, - "step": 49464 - }, - { - "epoch": 25.796610169491526, - "grad_norm": 1.6711628437042236, - "learning_rate": 5.0807035175879404e-05, - "loss": 5.4894, - "step": 49465 - }, - { - "epoch": 25.797131681877445, - "grad_norm": 1.5728657245635986, - "learning_rate": 5.080603015075377e-05, - "loss": 5.2985, - "step": 49466 - }, - { - "epoch": 25.797653194263365, - "grad_norm": 1.6542669534683228, - "learning_rate": 5.0805025125628146e-05, - "loss": 4.7953, - "step": 49467 - }, - { - "epoch": 25.798174706649284, - "grad_norm": 1.5409718751907349, - "learning_rate": 5.080402010050252e-05, - "loss": 4.8169, - "step": 49468 - }, - { - "epoch": 25.7986962190352, - "grad_norm": 1.5437167882919312, - "learning_rate": 5.0803015075376895e-05, - "loss": 5.1603, - "step": 49469 - }, - { - "epoch": 25.79921773142112, - "grad_norm": 1.553315281867981, - "learning_rate": 5.080201005025126e-05, - "loss": 4.8439, - "step": 49470 - }, - { - "epoch": 25.79973924380704, - "grad_norm": 1.5841330289840698, - "learning_rate": 5.0801005025125624e-05, - "loss": 4.9966, - "step": 49471 - }, - { - "epoch": 25.80026075619296, - "grad_norm": 1.597845196723938, - "learning_rate": 5.08e-05, - "loss": 4.8417, - "step": 49472 - }, - { - "epoch": 25.800782268578878, - "grad_norm": 1.504225254058838, - "learning_rate": 5.079899497487437e-05, - "loss": 5.1945, - "step": 49473 - }, - { - "epoch": 25.801303780964798, - "grad_norm": 1.521648645401001, - "learning_rate": 5.079798994974875e-05, - "loss": 5.0905, - "step": 49474 - }, - { - "epoch": 25.801825293350717, - "grad_norm": 1.565415620803833, - "learning_rate": 5.0796984924623115e-05, - "loss": 5.4848, - "step": 49475 - }, - { - "epoch": 25.802346805736637, - "grad_norm": 1.434044361114502, - "learning_rate": 5.079597989949749e-05, - "loss": 5.308, - "step": 49476 - }, - { - "epoch": 25.802868318122556, - "grad_norm": 1.4445827007293701, - "learning_rate": 5.079497487437186e-05, - "loss": 5.3758, - "step": 49477 - }, - { - "epoch": 25.803389830508475, - "grad_norm": 1.7290375232696533, - "learning_rate": 5.0793969849246235e-05, - "loss": 4.9254, - "step": 49478 - }, - { - "epoch": 25.803911342894395, - "grad_norm": 1.5250073671340942, - "learning_rate": 5.0792964824120606e-05, - "loss": 4.9081, - "step": 49479 - }, - { - "epoch": 25.804432855280314, - "grad_norm": 1.5946053266525269, - "learning_rate": 5.0791959798994983e-05, - "loss": 5.5218, - "step": 49480 - }, - { - "epoch": 25.804954367666234, - "grad_norm": 1.5173187255859375, - "learning_rate": 5.079095477386935e-05, - "loss": 5.3331, - "step": 49481 - }, - { - "epoch": 25.80547588005215, - "grad_norm": 1.496277093887329, - "learning_rate": 5.0789949748743726e-05, - "loss": 5.4589, - "step": 49482 - }, - { - "epoch": 25.80599739243807, - "grad_norm": 1.5747895240783691, - "learning_rate": 5.078894472361809e-05, - "loss": 4.696, - "step": 49483 - }, - { - "epoch": 25.80651890482399, - "grad_norm": 1.5228025913238525, - "learning_rate": 5.078793969849246e-05, - "loss": 5.1654, - "step": 49484 - }, - { - "epoch": 25.807040417209908, - "grad_norm": 1.5916661024093628, - "learning_rate": 5.078693467336684e-05, - "loss": 4.9558, - "step": 49485 - }, - { - "epoch": 25.807561929595828, - "grad_norm": 1.5315215587615967, - "learning_rate": 5.07859296482412e-05, - "loss": 5.2467, - "step": 49486 - }, - { - "epoch": 25.808083441981747, - "grad_norm": 1.5918775796890259, - "learning_rate": 5.078492462311558e-05, - "loss": 5.3183, - "step": 49487 - }, - { - "epoch": 25.808604954367667, - "grad_norm": 1.5715746879577637, - "learning_rate": 5.0783919597989945e-05, - "loss": 5.0498, - "step": 49488 - }, - { - "epoch": 25.809126466753586, - "grad_norm": 1.623359203338623, - "learning_rate": 5.078291457286432e-05, - "loss": 5.3226, - "step": 49489 - }, - { - "epoch": 25.809647979139505, - "grad_norm": 1.5779606103897095, - "learning_rate": 5.0781909547738694e-05, - "loss": 5.3027, - "step": 49490 - }, - { - "epoch": 25.810169491525425, - "grad_norm": 1.5456421375274658, - "learning_rate": 5.078090452261307e-05, - "loss": 4.9652, - "step": 49491 - }, - { - "epoch": 25.810691003911344, - "grad_norm": 1.5808297395706177, - "learning_rate": 5.0779899497487436e-05, - "loss": 4.9155, - "step": 49492 - }, - { - "epoch": 25.81121251629726, - "grad_norm": 1.6394097805023193, - "learning_rate": 5.0778894472361814e-05, - "loss": 5.1293, - "step": 49493 - }, - { - "epoch": 25.81173402868318, - "grad_norm": 1.595434308052063, - "learning_rate": 5.0777889447236185e-05, - "loss": 5.1263, - "step": 49494 - }, - { - "epoch": 25.8122555410691, - "grad_norm": 1.592504620552063, - "learning_rate": 5.077688442211056e-05, - "loss": 5.6836, - "step": 49495 - }, - { - "epoch": 25.81277705345502, - "grad_norm": 1.6243627071380615, - "learning_rate": 5.077587939698493e-05, - "loss": 4.4058, - "step": 49496 - }, - { - "epoch": 25.813298565840938, - "grad_norm": 1.5522271394729614, - "learning_rate": 5.077487437185929e-05, - "loss": 5.4176, - "step": 49497 - }, - { - "epoch": 25.813820078226858, - "grad_norm": 1.5830094814300537, - "learning_rate": 5.077386934673367e-05, - "loss": 5.4552, - "step": 49498 - }, - { - "epoch": 25.814341590612777, - "grad_norm": 1.4778363704681396, - "learning_rate": 5.077286432160804e-05, - "loss": 5.4711, - "step": 49499 - }, - { - "epoch": 25.814863102998697, - "grad_norm": 1.703542709350586, - "learning_rate": 5.077185929648242e-05, - "loss": 4.8205, - "step": 49500 - }, - { - "epoch": 25.815384615384616, - "grad_norm": 1.4930484294891357, - "learning_rate": 5.077085427135678e-05, - "loss": 5.5047, - "step": 49501 - }, - { - "epoch": 25.815906127770536, - "grad_norm": 1.5209585428237915, - "learning_rate": 5.076984924623116e-05, - "loss": 4.7171, - "step": 49502 - }, - { - "epoch": 25.816427640156455, - "grad_norm": 1.6047848463058472, - "learning_rate": 5.0768844221105524e-05, - "loss": 5.2137, - "step": 49503 - }, - { - "epoch": 25.816949152542374, - "grad_norm": 1.4573417901992798, - "learning_rate": 5.07678391959799e-05, - "loss": 5.3798, - "step": 49504 - }, - { - "epoch": 25.81747066492829, - "grad_norm": 1.5448285341262817, - "learning_rate": 5.076683417085427e-05, - "loss": 5.1724, - "step": 49505 - }, - { - "epoch": 25.81799217731421, - "grad_norm": 1.590221881866455, - "learning_rate": 5.076582914572865e-05, - "loss": 5.6442, - "step": 49506 - }, - { - "epoch": 25.81851368970013, - "grad_norm": 1.5802807807922363, - "learning_rate": 5.0764824120603015e-05, - "loss": 5.4314, - "step": 49507 - }, - { - "epoch": 25.81903520208605, - "grad_norm": 1.6349058151245117, - "learning_rate": 5.076381909547739e-05, - "loss": 4.8415, - "step": 49508 - }, - { - "epoch": 25.819556714471968, - "grad_norm": 1.5726505517959595, - "learning_rate": 5.076281407035176e-05, - "loss": 5.3082, - "step": 49509 - }, - { - "epoch": 25.820078226857888, - "grad_norm": 1.5084272623062134, - "learning_rate": 5.0761809045226135e-05, - "loss": 5.1957, - "step": 49510 - }, - { - "epoch": 25.820599739243807, - "grad_norm": 1.6754173040390015, - "learning_rate": 5.0760804020100506e-05, - "loss": 5.366, - "step": 49511 - }, - { - "epoch": 25.821121251629727, - "grad_norm": 1.5665489435195923, - "learning_rate": 5.075979899497487e-05, - "loss": 5.4624, - "step": 49512 - }, - { - "epoch": 25.821642764015646, - "grad_norm": 1.5872730016708374, - "learning_rate": 5.075879396984925e-05, - "loss": 5.1742, - "step": 49513 - }, - { - "epoch": 25.822164276401566, - "grad_norm": 1.5471688508987427, - "learning_rate": 5.075778894472362e-05, - "loss": 5.532, - "step": 49514 - }, - { - "epoch": 25.822685788787485, - "grad_norm": 1.4969478845596313, - "learning_rate": 5.0756783919598e-05, - "loss": 5.3935, - "step": 49515 - }, - { - "epoch": 25.823207301173404, - "grad_norm": 1.6961020231246948, - "learning_rate": 5.075577889447236e-05, - "loss": 4.5413, - "step": 49516 - }, - { - "epoch": 25.82372881355932, - "grad_norm": 1.6403387784957886, - "learning_rate": 5.075477386934674e-05, - "loss": 5.3438, - "step": 49517 - }, - { - "epoch": 25.82425032594524, - "grad_norm": 1.5857869386672974, - "learning_rate": 5.0753768844221104e-05, - "loss": 4.8563, - "step": 49518 - }, - { - "epoch": 25.82477183833116, - "grad_norm": 1.6066441535949707, - "learning_rate": 5.075276381909548e-05, - "loss": 5.4583, - "step": 49519 - }, - { - "epoch": 25.82529335071708, - "grad_norm": 1.4666965007781982, - "learning_rate": 5.075175879396985e-05, - "loss": 4.2803, - "step": 49520 - }, - { - "epoch": 25.825814863103, - "grad_norm": 1.5224428176879883, - "learning_rate": 5.075075376884423e-05, - "loss": 4.6428, - "step": 49521 - }, - { - "epoch": 25.826336375488918, - "grad_norm": 1.567661166191101, - "learning_rate": 5.0749748743718595e-05, - "loss": 4.8531, - "step": 49522 - }, - { - "epoch": 25.826857887874837, - "grad_norm": 1.5749965906143188, - "learning_rate": 5.074874371859297e-05, - "loss": 5.6642, - "step": 49523 - }, - { - "epoch": 25.827379400260757, - "grad_norm": 1.5790988206863403, - "learning_rate": 5.074773869346734e-05, - "loss": 4.9828, - "step": 49524 - }, - { - "epoch": 25.827900912646676, - "grad_norm": 1.537879467010498, - "learning_rate": 5.074673366834171e-05, - "loss": 5.6543, - "step": 49525 - }, - { - "epoch": 25.828422425032596, - "grad_norm": 1.5758427381515503, - "learning_rate": 5.0745728643216086e-05, - "loss": 5.0475, - "step": 49526 - }, - { - "epoch": 25.828943937418515, - "grad_norm": 1.5924466848373413, - "learning_rate": 5.074472361809045e-05, - "loss": 5.1402, - "step": 49527 - }, - { - "epoch": 25.829465449804434, - "grad_norm": 1.5065189599990845, - "learning_rate": 5.074371859296483e-05, - "loss": 5.0869, - "step": 49528 - }, - { - "epoch": 25.82998696219035, - "grad_norm": 1.7503803968429565, - "learning_rate": 5.074271356783919e-05, - "loss": 4.9759, - "step": 49529 - }, - { - "epoch": 25.83050847457627, - "grad_norm": 1.5792039632797241, - "learning_rate": 5.074170854271357e-05, - "loss": 5.3077, - "step": 49530 - }, - { - "epoch": 25.83102998696219, - "grad_norm": 1.5053184032440186, - "learning_rate": 5.074070351758794e-05, - "loss": 5.3391, - "step": 49531 - }, - { - "epoch": 25.83155149934811, - "grad_norm": 1.6212868690490723, - "learning_rate": 5.073969849246232e-05, - "loss": 5.2022, - "step": 49532 - }, - { - "epoch": 25.83207301173403, - "grad_norm": 1.5017002820968628, - "learning_rate": 5.073869346733668e-05, - "loss": 5.2441, - "step": 49533 - }, - { - "epoch": 25.832594524119948, - "grad_norm": 1.5462223291397095, - "learning_rate": 5.073768844221106e-05, - "loss": 5.4444, - "step": 49534 - }, - { - "epoch": 25.833116036505867, - "grad_norm": 1.5560812950134277, - "learning_rate": 5.073668341708543e-05, - "loss": 5.6249, - "step": 49535 - }, - { - "epoch": 25.833637548891787, - "grad_norm": 1.5451829433441162, - "learning_rate": 5.073567839195981e-05, - "loss": 5.6649, - "step": 49536 - }, - { - "epoch": 25.834159061277706, - "grad_norm": 1.5800509452819824, - "learning_rate": 5.0734673366834174e-05, - "loss": 5.065, - "step": 49537 - }, - { - "epoch": 25.834680573663626, - "grad_norm": 1.554251790046692, - "learning_rate": 5.073366834170854e-05, - "loss": 5.3505, - "step": 49538 - }, - { - "epoch": 25.835202086049545, - "grad_norm": 1.6748102903366089, - "learning_rate": 5.0732663316582916e-05, - "loss": 4.7725, - "step": 49539 - }, - { - "epoch": 25.835723598435465, - "grad_norm": 1.510166883468628, - "learning_rate": 5.073165829145729e-05, - "loss": 5.3833, - "step": 49540 - }, - { - "epoch": 25.83624511082138, - "grad_norm": 1.521899938583374, - "learning_rate": 5.0730653266331665e-05, - "loss": 5.1791, - "step": 49541 - }, - { - "epoch": 25.8367666232073, - "grad_norm": 1.718515157699585, - "learning_rate": 5.072964824120603e-05, - "loss": 4.7595, - "step": 49542 - }, - { - "epoch": 25.83728813559322, - "grad_norm": 1.657780647277832, - "learning_rate": 5.072864321608041e-05, - "loss": 4.8947, - "step": 49543 - }, - { - "epoch": 25.83780964797914, - "grad_norm": 1.6749402284622192, - "learning_rate": 5.072763819095477e-05, - "loss": 4.6637, - "step": 49544 - }, - { - "epoch": 25.83833116036506, - "grad_norm": 1.5060797929763794, - "learning_rate": 5.072663316582915e-05, - "loss": 4.8567, - "step": 49545 - }, - { - "epoch": 25.838852672750978, - "grad_norm": 1.4830248355865479, - "learning_rate": 5.072562814070352e-05, - "loss": 5.3516, - "step": 49546 - }, - { - "epoch": 25.839374185136897, - "grad_norm": 1.5191891193389893, - "learning_rate": 5.07246231155779e-05, - "loss": 5.3817, - "step": 49547 - }, - { - "epoch": 25.839895697522817, - "grad_norm": 1.9443784952163696, - "learning_rate": 5.072361809045226e-05, - "loss": 4.8821, - "step": 49548 - }, - { - "epoch": 25.840417209908736, - "grad_norm": 1.5723484754562378, - "learning_rate": 5.072261306532664e-05, - "loss": 4.9877, - "step": 49549 - }, - { - "epoch": 25.840938722294656, - "grad_norm": 1.6439976692199707, - "learning_rate": 5.0721608040201005e-05, - "loss": 5.1718, - "step": 49550 - }, - { - "epoch": 25.841460234680575, - "grad_norm": 1.6379692554473877, - "learning_rate": 5.0720603015075376e-05, - "loss": 4.8394, - "step": 49551 - }, - { - "epoch": 25.841981747066495, - "grad_norm": 1.6673640012741089, - "learning_rate": 5.0719597989949753e-05, - "loss": 4.7225, - "step": 49552 - }, - { - "epoch": 25.84250325945241, - "grad_norm": 1.5845872163772583, - "learning_rate": 5.071859296482412e-05, - "loss": 5.0996, - "step": 49553 - }, - { - "epoch": 25.84302477183833, - "grad_norm": 1.6790130138397217, - "learning_rate": 5.0717587939698496e-05, - "loss": 5.0704, - "step": 49554 - }, - { - "epoch": 25.84354628422425, - "grad_norm": 1.574972152709961, - "learning_rate": 5.0716582914572867e-05, - "loss": 5.0735, - "step": 49555 - }, - { - "epoch": 25.84406779661017, - "grad_norm": 1.6345430612564087, - "learning_rate": 5.0715577889447244e-05, - "loss": 5.5593, - "step": 49556 - }, - { - "epoch": 25.84458930899609, - "grad_norm": 1.7090246677398682, - "learning_rate": 5.071457286432161e-05, - "loss": 4.7014, - "step": 49557 - }, - { - "epoch": 25.845110821382008, - "grad_norm": 1.5257865190505981, - "learning_rate": 5.0713567839195986e-05, - "loss": 5.5454, - "step": 49558 - }, - { - "epoch": 25.845632333767927, - "grad_norm": 1.5447801351547241, - "learning_rate": 5.071256281407035e-05, - "loss": 5.4407, - "step": 49559 - }, - { - "epoch": 25.846153846153847, - "grad_norm": 1.5845047235488892, - "learning_rate": 5.071155778894473e-05, - "loss": 4.9534, - "step": 49560 - }, - { - "epoch": 25.846675358539766, - "grad_norm": 1.511177897453308, - "learning_rate": 5.07105527638191e-05, - "loss": 5.4336, - "step": 49561 - }, - { - "epoch": 25.847196870925686, - "grad_norm": 1.6541502475738525, - "learning_rate": 5.070954773869348e-05, - "loss": 4.9978, - "step": 49562 - }, - { - "epoch": 25.847718383311605, - "grad_norm": 1.5122650861740112, - "learning_rate": 5.070854271356784e-05, - "loss": 5.0489, - "step": 49563 - }, - { - "epoch": 25.848239895697525, - "grad_norm": 1.5937185287475586, - "learning_rate": 5.0707537688442206e-05, - "loss": 5.2806, - "step": 49564 - }, - { - "epoch": 25.84876140808344, - "grad_norm": 1.6221301555633545, - "learning_rate": 5.0706532663316584e-05, - "loss": 5.0985, - "step": 49565 - }, - { - "epoch": 25.84928292046936, - "grad_norm": 1.639850378036499, - "learning_rate": 5.0705527638190955e-05, - "loss": 5.1789, - "step": 49566 - }, - { - "epoch": 25.84980443285528, - "grad_norm": 1.6068971157073975, - "learning_rate": 5.070452261306533e-05, - "loss": 5.0583, - "step": 49567 - }, - { - "epoch": 25.8503259452412, - "grad_norm": 1.5830589532852173, - "learning_rate": 5.07035175879397e-05, - "loss": 5.1855, - "step": 49568 - }, - { - "epoch": 25.85084745762712, - "grad_norm": 1.5380163192749023, - "learning_rate": 5.0702512562814075e-05, - "loss": 5.1835, - "step": 49569 - }, - { - "epoch": 25.851368970013038, - "grad_norm": 1.6589494943618774, - "learning_rate": 5.070150753768844e-05, - "loss": 5.4719, - "step": 49570 - }, - { - "epoch": 25.851890482398957, - "grad_norm": 1.7805697917938232, - "learning_rate": 5.070050251256282e-05, - "loss": 4.4424, - "step": 49571 - }, - { - "epoch": 25.852411994784877, - "grad_norm": 1.515160083770752, - "learning_rate": 5.069949748743719e-05, - "loss": 5.3538, - "step": 49572 - }, - { - "epoch": 25.852933507170796, - "grad_norm": 1.621850848197937, - "learning_rate": 5.0698492462311566e-05, - "loss": 5.3807, - "step": 49573 - }, - { - "epoch": 25.853455019556716, - "grad_norm": 1.5697494745254517, - "learning_rate": 5.069748743718593e-05, - "loss": 5.2471, - "step": 49574 - }, - { - "epoch": 25.853976531942635, - "grad_norm": 1.5473352670669556, - "learning_rate": 5.069648241206031e-05, - "loss": 5.519, - "step": 49575 - }, - { - "epoch": 25.85449804432855, - "grad_norm": 1.619220495223999, - "learning_rate": 5.069547738693467e-05, - "loss": 5.0582, - "step": 49576 - }, - { - "epoch": 25.85501955671447, - "grad_norm": 1.5892395973205566, - "learning_rate": 5.069447236180904e-05, - "loss": 5.558, - "step": 49577 - }, - { - "epoch": 25.85554106910039, - "grad_norm": 1.5641698837280273, - "learning_rate": 5.069346733668342e-05, - "loss": 4.6808, - "step": 49578 - }, - { - "epoch": 25.85606258148631, - "grad_norm": 1.5560139417648315, - "learning_rate": 5.0692462311557785e-05, - "loss": 5.5736, - "step": 49579 - }, - { - "epoch": 25.85658409387223, - "grad_norm": 1.6315935850143433, - "learning_rate": 5.069145728643216e-05, - "loss": 5.2588, - "step": 49580 - }, - { - "epoch": 25.85710560625815, - "grad_norm": 1.50831139087677, - "learning_rate": 5.0690452261306534e-05, - "loss": 4.7513, - "step": 49581 - }, - { - "epoch": 25.857627118644068, - "grad_norm": 1.5721049308776855, - "learning_rate": 5.068944723618091e-05, - "loss": 5.3292, - "step": 49582 - }, - { - "epoch": 25.858148631029987, - "grad_norm": 1.5985435247421265, - "learning_rate": 5.0688442211055276e-05, - "loss": 5.5423, - "step": 49583 - }, - { - "epoch": 25.858670143415907, - "grad_norm": 1.6277351379394531, - "learning_rate": 5.0687437185929654e-05, - "loss": 5.101, - "step": 49584 - }, - { - "epoch": 25.859191655801826, - "grad_norm": 1.6405960321426392, - "learning_rate": 5.068643216080402e-05, - "loss": 5.3736, - "step": 49585 - }, - { - "epoch": 25.859713168187746, - "grad_norm": 1.5931187868118286, - "learning_rate": 5.0685427135678396e-05, - "loss": 5.1179, - "step": 49586 - }, - { - "epoch": 25.860234680573665, - "grad_norm": 1.5884819030761719, - "learning_rate": 5.068442211055277e-05, - "loss": 4.4604, - "step": 49587 - }, - { - "epoch": 25.860756192959585, - "grad_norm": 1.5796113014221191, - "learning_rate": 5.0683417085427145e-05, - "loss": 5.3554, - "step": 49588 - }, - { - "epoch": 25.8612777053455, - "grad_norm": 1.6691526174545288, - "learning_rate": 5.068241206030151e-05, - "loss": 5.0553, - "step": 49589 - }, - { - "epoch": 25.86179921773142, - "grad_norm": 1.6295133829116821, - "learning_rate": 5.0681407035175874e-05, - "loss": 5.5442, - "step": 49590 - }, - { - "epoch": 25.86232073011734, - "grad_norm": 1.6541332006454468, - "learning_rate": 5.068040201005025e-05, - "loss": 5.1741, - "step": 49591 - }, - { - "epoch": 25.86284224250326, - "grad_norm": 1.626213550567627, - "learning_rate": 5.067939698492462e-05, - "loss": 5.3281, - "step": 49592 - }, - { - "epoch": 25.86336375488918, - "grad_norm": 1.5253450870513916, - "learning_rate": 5.0678391959799e-05, - "loss": 5.7736, - "step": 49593 - }, - { - "epoch": 25.863885267275098, - "grad_norm": 1.6202216148376465, - "learning_rate": 5.0677386934673365e-05, - "loss": 5.2085, - "step": 49594 - }, - { - "epoch": 25.864406779661017, - "grad_norm": 1.5051106214523315, - "learning_rate": 5.067638190954774e-05, - "loss": 5.3637, - "step": 49595 - }, - { - "epoch": 25.864928292046937, - "grad_norm": 1.5268385410308838, - "learning_rate": 5.067537688442211e-05, - "loss": 5.0723, - "step": 49596 - }, - { - "epoch": 25.865449804432856, - "grad_norm": 1.5474015474319458, - "learning_rate": 5.0674371859296485e-05, - "loss": 5.389, - "step": 49597 - }, - { - "epoch": 25.865971316818776, - "grad_norm": 1.705034613609314, - "learning_rate": 5.0673366834170856e-05, - "loss": 5.271, - "step": 49598 - }, - { - "epoch": 25.866492829204695, - "grad_norm": 1.6580655574798584, - "learning_rate": 5.0672361809045233e-05, - "loss": 5.2843, - "step": 49599 - }, - { - "epoch": 25.86701434159061, - "grad_norm": 1.6960854530334473, - "learning_rate": 5.06713567839196e-05, - "loss": 5.2925, - "step": 49600 - }, - { - "epoch": 25.86753585397653, - "grad_norm": 1.4683719873428345, - "learning_rate": 5.0670351758793976e-05, - "loss": 4.6469, - "step": 49601 - }, - { - "epoch": 25.86805736636245, - "grad_norm": 1.6177430152893066, - "learning_rate": 5.066934673366835e-05, - "loss": 5.3442, - "step": 49602 - }, - { - "epoch": 25.86857887874837, - "grad_norm": 1.542026400566101, - "learning_rate": 5.0668341708542724e-05, - "loss": 5.36, - "step": 49603 - }, - { - "epoch": 25.86910039113429, - "grad_norm": 1.4495600461959839, - "learning_rate": 5.066733668341709e-05, - "loss": 5.1327, - "step": 49604 - }, - { - "epoch": 25.86962190352021, - "grad_norm": 1.596571445465088, - "learning_rate": 5.066633165829145e-05, - "loss": 4.9321, - "step": 49605 - }, - { - "epoch": 25.870143415906128, - "grad_norm": 1.484727382659912, - "learning_rate": 5.066532663316583e-05, - "loss": 4.9196, - "step": 49606 - }, - { - "epoch": 25.870664928292047, - "grad_norm": 1.60414719581604, - "learning_rate": 5.06643216080402e-05, - "loss": 4.8008, - "step": 49607 - }, - { - "epoch": 25.871186440677967, - "grad_norm": 1.5871433019638062, - "learning_rate": 5.066331658291458e-05, - "loss": 5.045, - "step": 49608 - }, - { - "epoch": 25.871707953063886, - "grad_norm": 1.554621696472168, - "learning_rate": 5.0662311557788944e-05, - "loss": 4.867, - "step": 49609 - }, - { - "epoch": 25.872229465449806, - "grad_norm": 1.4861476421356201, - "learning_rate": 5.066130653266332e-05, - "loss": 5.5229, - "step": 49610 - }, - { - "epoch": 25.872750977835725, - "grad_norm": 1.4941426515579224, - "learning_rate": 5.0660301507537686e-05, - "loss": 5.2716, - "step": 49611 - }, - { - "epoch": 25.87327249022164, - "grad_norm": 1.6487493515014648, - "learning_rate": 5.0659296482412064e-05, - "loss": 4.7105, - "step": 49612 - }, - { - "epoch": 25.87379400260756, - "grad_norm": 1.5879939794540405, - "learning_rate": 5.0658291457286435e-05, - "loss": 4.8758, - "step": 49613 - }, - { - "epoch": 25.87431551499348, - "grad_norm": 1.650282382965088, - "learning_rate": 5.065728643216081e-05, - "loss": 5.5628, - "step": 49614 - }, - { - "epoch": 25.8748370273794, - "grad_norm": 1.5934847593307495, - "learning_rate": 5.065628140703518e-05, - "loss": 5.2864, - "step": 49615 - }, - { - "epoch": 25.87535853976532, - "grad_norm": 1.8107908964157104, - "learning_rate": 5.0655276381909555e-05, - "loss": 5.0097, - "step": 49616 - }, - { - "epoch": 25.87588005215124, - "grad_norm": 1.6516040563583374, - "learning_rate": 5.065427135678392e-05, - "loss": 5.2388, - "step": 49617 - }, - { - "epoch": 25.876401564537158, - "grad_norm": 1.605051875114441, - "learning_rate": 5.065326633165829e-05, - "loss": 5.4741, - "step": 49618 - }, - { - "epoch": 25.876923076923077, - "grad_norm": 1.514475703239441, - "learning_rate": 5.065226130653267e-05, - "loss": 4.9164, - "step": 49619 - }, - { - "epoch": 25.877444589308997, - "grad_norm": 1.6271687746047974, - "learning_rate": 5.065125628140703e-05, - "loss": 5.1857, - "step": 49620 - }, - { - "epoch": 25.877966101694916, - "grad_norm": 1.5166915655136108, - "learning_rate": 5.065025125628141e-05, - "loss": 5.5638, - "step": 49621 - }, - { - "epoch": 25.878487614080836, - "grad_norm": 1.6739660501480103, - "learning_rate": 5.064924623115578e-05, - "loss": 4.8131, - "step": 49622 - }, - { - "epoch": 25.879009126466755, - "grad_norm": 1.5725338459014893, - "learning_rate": 5.064824120603016e-05, - "loss": 5.4286, - "step": 49623 - }, - { - "epoch": 25.87953063885267, - "grad_norm": 1.4778566360473633, - "learning_rate": 5.064723618090452e-05, - "loss": 5.46, - "step": 49624 - }, - { - "epoch": 25.88005215123859, - "grad_norm": 1.5577316284179688, - "learning_rate": 5.06462311557789e-05, - "loss": 5.5285, - "step": 49625 - }, - { - "epoch": 25.88057366362451, - "grad_norm": 1.6236218214035034, - "learning_rate": 5.0645226130653265e-05, - "loss": 5.229, - "step": 49626 - }, - { - "epoch": 25.88109517601043, - "grad_norm": 1.7519948482513428, - "learning_rate": 5.064422110552764e-05, - "loss": 5.1624, - "step": 49627 - }, - { - "epoch": 25.88161668839635, - "grad_norm": 1.4752305746078491, - "learning_rate": 5.0643216080402014e-05, - "loss": 5.5079, - "step": 49628 - }, - { - "epoch": 25.88213820078227, - "grad_norm": 1.456191062927246, - "learning_rate": 5.064221105527639e-05, - "loss": 5.6835, - "step": 49629 - }, - { - "epoch": 25.882659713168188, - "grad_norm": 1.532139778137207, - "learning_rate": 5.0641206030150756e-05, - "loss": 5.3627, - "step": 49630 - }, - { - "epoch": 25.883181225554107, - "grad_norm": 1.4621633291244507, - "learning_rate": 5.064020100502512e-05, - "loss": 4.7263, - "step": 49631 - }, - { - "epoch": 25.883702737940027, - "grad_norm": 1.5841830968856812, - "learning_rate": 5.06391959798995e-05, - "loss": 5.3692, - "step": 49632 - }, - { - "epoch": 25.884224250325946, - "grad_norm": 1.4491742849349976, - "learning_rate": 5.063819095477387e-05, - "loss": 5.6551, - "step": 49633 - }, - { - "epoch": 25.884745762711866, - "grad_norm": 1.5217739343643188, - "learning_rate": 5.063718592964825e-05, - "loss": 5.4256, - "step": 49634 - }, - { - "epoch": 25.885267275097785, - "grad_norm": 1.5143941640853882, - "learning_rate": 5.063618090452261e-05, - "loss": 4.2643, - "step": 49635 - }, - { - "epoch": 25.8857887874837, - "grad_norm": 1.5677764415740967, - "learning_rate": 5.063517587939699e-05, - "loss": 4.901, - "step": 49636 - }, - { - "epoch": 25.88631029986962, - "grad_norm": 1.5307576656341553, - "learning_rate": 5.0634170854271354e-05, - "loss": 5.0591, - "step": 49637 - }, - { - "epoch": 25.88683181225554, - "grad_norm": 1.4900758266448975, - "learning_rate": 5.063316582914573e-05, - "loss": 5.6119, - "step": 49638 - }, - { - "epoch": 25.88735332464146, - "grad_norm": 1.6042336225509644, - "learning_rate": 5.06321608040201e-05, - "loss": 5.2681, - "step": 49639 - }, - { - "epoch": 25.88787483702738, - "grad_norm": 1.5299960374832153, - "learning_rate": 5.063115577889448e-05, - "loss": 5.3143, - "step": 49640 - }, - { - "epoch": 25.8883963494133, - "grad_norm": 1.6244276762008667, - "learning_rate": 5.0630150753768845e-05, - "loss": 5.3325, - "step": 49641 - }, - { - "epoch": 25.888917861799218, - "grad_norm": 1.699157476425171, - "learning_rate": 5.062914572864322e-05, - "loss": 4.7789, - "step": 49642 - }, - { - "epoch": 25.889439374185137, - "grad_norm": 1.4542511701583862, - "learning_rate": 5.0628140703517594e-05, - "loss": 5.3077, - "step": 49643 - }, - { - "epoch": 25.889960886571057, - "grad_norm": 1.4602171182632446, - "learning_rate": 5.062713567839196e-05, - "loss": 5.4686, - "step": 49644 - }, - { - "epoch": 25.890482398956976, - "grad_norm": 1.582260012626648, - "learning_rate": 5.0626130653266336e-05, - "loss": 5.0797, - "step": 49645 - }, - { - "epoch": 25.891003911342896, - "grad_norm": 1.5520819425582886, - "learning_rate": 5.06251256281407e-05, - "loss": 5.6351, - "step": 49646 - }, - { - "epoch": 25.891525423728815, - "grad_norm": 1.5767394304275513, - "learning_rate": 5.062412060301508e-05, - "loss": 5.147, - "step": 49647 - }, - { - "epoch": 25.89204693611473, - "grad_norm": 1.568686842918396, - "learning_rate": 5.062311557788945e-05, - "loss": 4.9463, - "step": 49648 - }, - { - "epoch": 25.89256844850065, - "grad_norm": 1.5151571035385132, - "learning_rate": 5.062211055276383e-05, - "loss": 5.5886, - "step": 49649 - }, - { - "epoch": 25.89308996088657, - "grad_norm": 1.6559059619903564, - "learning_rate": 5.062110552763819e-05, - "loss": 5.3048, - "step": 49650 - }, - { - "epoch": 25.89361147327249, - "grad_norm": 1.7158441543579102, - "learning_rate": 5.062010050251257e-05, - "loss": 5.0173, - "step": 49651 - }, - { - "epoch": 25.89413298565841, - "grad_norm": 1.6177597045898438, - "learning_rate": 5.061909547738693e-05, - "loss": 5.4282, - "step": 49652 - }, - { - "epoch": 25.89465449804433, - "grad_norm": 1.5130457878112793, - "learning_rate": 5.061809045226131e-05, - "loss": 5.3224, - "step": 49653 - }, - { - "epoch": 25.895176010430248, - "grad_norm": 1.5841517448425293, - "learning_rate": 5.061708542713568e-05, - "loss": 5.1208, - "step": 49654 - }, - { - "epoch": 25.895697522816167, - "grad_norm": 1.5145801305770874, - "learning_rate": 5.061608040201006e-05, - "loss": 5.3165, - "step": 49655 - }, - { - "epoch": 25.896219035202087, - "grad_norm": 1.5488249063491821, - "learning_rate": 5.0615075376884424e-05, - "loss": 5.2356, - "step": 49656 - }, - { - "epoch": 25.896740547588006, - "grad_norm": 1.5108458995819092, - "learning_rate": 5.061407035175879e-05, - "loss": 5.8774, - "step": 49657 - }, - { - "epoch": 25.897262059973926, - "grad_norm": 1.5750727653503418, - "learning_rate": 5.0613065326633166e-05, - "loss": 5.3238, - "step": 49658 - }, - { - "epoch": 25.89778357235984, - "grad_norm": 1.619541883468628, - "learning_rate": 5.061206030150754e-05, - "loss": 5.33, - "step": 49659 - }, - { - "epoch": 25.89830508474576, - "grad_norm": 1.6741149425506592, - "learning_rate": 5.0611055276381915e-05, - "loss": 4.9091, - "step": 49660 - }, - { - "epoch": 25.89882659713168, - "grad_norm": 1.6016485691070557, - "learning_rate": 5.061005025125628e-05, - "loss": 4.6525, - "step": 49661 - }, - { - "epoch": 25.8993481095176, - "grad_norm": 1.70486319065094, - "learning_rate": 5.060904522613066e-05, - "loss": 5.4695, - "step": 49662 - }, - { - "epoch": 25.89986962190352, - "grad_norm": 1.6090404987335205, - "learning_rate": 5.060804020100502e-05, - "loss": 4.7666, - "step": 49663 - }, - { - "epoch": 25.90039113428944, - "grad_norm": 1.538678526878357, - "learning_rate": 5.06070351758794e-05, - "loss": 4.9463, - "step": 49664 - }, - { - "epoch": 25.90091264667536, - "grad_norm": 1.5059574842453003, - "learning_rate": 5.060603015075377e-05, - "loss": 4.8128, - "step": 49665 - }, - { - "epoch": 25.901434159061278, - "grad_norm": 1.604464054107666, - "learning_rate": 5.060502512562815e-05, - "loss": 5.1057, - "step": 49666 - }, - { - "epoch": 25.901955671447197, - "grad_norm": 1.5495764017105103, - "learning_rate": 5.060402010050251e-05, - "loss": 5.0951, - "step": 49667 - }, - { - "epoch": 25.902477183833117, - "grad_norm": 1.6324249505996704, - "learning_rate": 5.060301507537689e-05, - "loss": 4.887, - "step": 49668 - }, - { - "epoch": 25.902998696219036, - "grad_norm": 1.4892849922180176, - "learning_rate": 5.060201005025126e-05, - "loss": 5.2126, - "step": 49669 - }, - { - "epoch": 25.903520208604956, - "grad_norm": 1.588434100151062, - "learning_rate": 5.0601005025125626e-05, - "loss": 5.2475, - "step": 49670 - }, - { - "epoch": 25.904041720990875, - "grad_norm": 1.5233299732208252, - "learning_rate": 5.0600000000000003e-05, - "loss": 5.771, - "step": 49671 - }, - { - "epoch": 25.90456323337679, - "grad_norm": 1.6560766696929932, - "learning_rate": 5.059899497487437e-05, - "loss": 5.0398, - "step": 49672 - }, - { - "epoch": 25.90508474576271, - "grad_norm": 1.53208589553833, - "learning_rate": 5.0597989949748746e-05, - "loss": 5.3525, - "step": 49673 - }, - { - "epoch": 25.90560625814863, - "grad_norm": 1.5583393573760986, - "learning_rate": 5.0596984924623117e-05, - "loss": 5.3723, - "step": 49674 - }, - { - "epoch": 25.90612777053455, - "grad_norm": 1.5406908988952637, - "learning_rate": 5.0595979899497494e-05, - "loss": 5.3343, - "step": 49675 - }, - { - "epoch": 25.90664928292047, - "grad_norm": 1.4924697875976562, - "learning_rate": 5.059497487437186e-05, - "loss": 5.134, - "step": 49676 - }, - { - "epoch": 25.90717079530639, - "grad_norm": 1.5244537591934204, - "learning_rate": 5.0593969849246237e-05, - "loss": 5.0286, - "step": 49677 - }, - { - "epoch": 25.907692307692308, - "grad_norm": 1.6429951190948486, - "learning_rate": 5.05929648241206e-05, - "loss": 5.1592, - "step": 49678 - }, - { - "epoch": 25.908213820078227, - "grad_norm": 1.4395991563796997, - "learning_rate": 5.059195979899498e-05, - "loss": 5.3489, - "step": 49679 - }, - { - "epoch": 25.908735332464147, - "grad_norm": 1.572118878364563, - "learning_rate": 5.059095477386935e-05, - "loss": 4.4116, - "step": 49680 - }, - { - "epoch": 25.909256844850066, - "grad_norm": 1.59121835231781, - "learning_rate": 5.058994974874373e-05, - "loss": 5.4703, - "step": 49681 - }, - { - "epoch": 25.909778357235986, - "grad_norm": 1.5811995267868042, - "learning_rate": 5.058894472361809e-05, - "loss": 5.1028, - "step": 49682 - }, - { - "epoch": 25.910299869621902, - "grad_norm": 1.6827082633972168, - "learning_rate": 5.058793969849247e-05, - "loss": 4.8138, - "step": 49683 - }, - { - "epoch": 25.91082138200782, - "grad_norm": 1.6263772249221802, - "learning_rate": 5.0586934673366834e-05, - "loss": 4.7933, - "step": 49684 - }, - { - "epoch": 25.91134289439374, - "grad_norm": 1.5300601720809937, - "learning_rate": 5.0585929648241205e-05, - "loss": 5.6238, - "step": 49685 - }, - { - "epoch": 25.91186440677966, - "grad_norm": 1.553573489189148, - "learning_rate": 5.058492462311558e-05, - "loss": 5.4116, - "step": 49686 - }, - { - "epoch": 25.91238591916558, - "grad_norm": 1.5374358892440796, - "learning_rate": 5.058391959798995e-05, - "loss": 5.2711, - "step": 49687 - }, - { - "epoch": 25.9129074315515, - "grad_norm": 1.6005572080612183, - "learning_rate": 5.0582914572864325e-05, - "loss": 4.8762, - "step": 49688 - }, - { - "epoch": 25.91342894393742, - "grad_norm": 1.5607010126113892, - "learning_rate": 5.0581909547738696e-05, - "loss": 5.2806, - "step": 49689 - }, - { - "epoch": 25.913950456323338, - "grad_norm": 1.5925556421279907, - "learning_rate": 5.0580904522613074e-05, - "loss": 5.0532, - "step": 49690 - }, - { - "epoch": 25.914471968709258, - "grad_norm": 1.4775947332382202, - "learning_rate": 5.057989949748744e-05, - "loss": 5.6018, - "step": 49691 - }, - { - "epoch": 25.914993481095177, - "grad_norm": 1.5478781461715698, - "learning_rate": 5.0578894472361816e-05, - "loss": 5.6898, - "step": 49692 - }, - { - "epoch": 25.915514993481096, - "grad_norm": 1.533357858657837, - "learning_rate": 5.057788944723618e-05, - "loss": 5.3855, - "step": 49693 - }, - { - "epoch": 25.916036505867016, - "grad_norm": 1.7544904947280884, - "learning_rate": 5.057688442211056e-05, - "loss": 5.0734, - "step": 49694 - }, - { - "epoch": 25.916558018252932, - "grad_norm": 1.5242489576339722, - "learning_rate": 5.057587939698493e-05, - "loss": 5.3787, - "step": 49695 - }, - { - "epoch": 25.91707953063885, - "grad_norm": 1.6481091976165771, - "learning_rate": 5.057487437185931e-05, - "loss": 5.131, - "step": 49696 - }, - { - "epoch": 25.91760104302477, - "grad_norm": 1.5271209478378296, - "learning_rate": 5.057386934673367e-05, - "loss": 5.122, - "step": 49697 - }, - { - "epoch": 25.91812255541069, - "grad_norm": 1.630136489868164, - "learning_rate": 5.0572864321608035e-05, - "loss": 4.5078, - "step": 49698 - }, - { - "epoch": 25.91864406779661, - "grad_norm": 1.5832562446594238, - "learning_rate": 5.057185929648241e-05, - "loss": 5.2866, - "step": 49699 - }, - { - "epoch": 25.91916558018253, - "grad_norm": 1.7158887386322021, - "learning_rate": 5.0570854271356784e-05, - "loss": 5.1356, - "step": 49700 - }, - { - "epoch": 25.91968709256845, - "grad_norm": 1.6296045780181885, - "learning_rate": 5.056984924623116e-05, - "loss": 5.0555, - "step": 49701 - }, - { - "epoch": 25.920208604954368, - "grad_norm": 1.5664219856262207, - "learning_rate": 5.0568844221105526e-05, - "loss": 5.17, - "step": 49702 - }, - { - "epoch": 25.920730117340288, - "grad_norm": 1.585598349571228, - "learning_rate": 5.0567839195979904e-05, - "loss": 5.295, - "step": 49703 - }, - { - "epoch": 25.921251629726207, - "grad_norm": 1.599936842918396, - "learning_rate": 5.056683417085427e-05, - "loss": 5.3111, - "step": 49704 - }, - { - "epoch": 25.921773142112126, - "grad_norm": 1.583478569984436, - "learning_rate": 5.0565829145728646e-05, - "loss": 5.1241, - "step": 49705 - }, - { - "epoch": 25.922294654498046, - "grad_norm": 1.6150143146514893, - "learning_rate": 5.056482412060302e-05, - "loss": 5.037, - "step": 49706 - }, - { - "epoch": 25.922816166883962, - "grad_norm": 1.567314863204956, - "learning_rate": 5.0563819095477395e-05, - "loss": 4.809, - "step": 49707 - }, - { - "epoch": 25.92333767926988, - "grad_norm": 1.5850931406021118, - "learning_rate": 5.056281407035176e-05, - "loss": 5.2791, - "step": 49708 - }, - { - "epoch": 25.9238591916558, - "grad_norm": 1.5912948846817017, - "learning_rate": 5.056180904522614e-05, - "loss": 5.1318, - "step": 49709 - }, - { - "epoch": 25.92438070404172, - "grad_norm": 1.5874093770980835, - "learning_rate": 5.056080402010051e-05, - "loss": 4.8861, - "step": 49710 - }, - { - "epoch": 25.92490221642764, - "grad_norm": 1.5220916271209717, - "learning_rate": 5.055979899497487e-05, - "loss": 5.2674, - "step": 49711 - }, - { - "epoch": 25.92542372881356, - "grad_norm": 1.5719192028045654, - "learning_rate": 5.055879396984925e-05, - "loss": 5.2808, - "step": 49712 - }, - { - "epoch": 25.92594524119948, - "grad_norm": 1.5864789485931396, - "learning_rate": 5.0557788944723615e-05, - "loss": 5.4356, - "step": 49713 - }, - { - "epoch": 25.926466753585398, - "grad_norm": 1.6272556781768799, - "learning_rate": 5.055678391959799e-05, - "loss": 5.2329, - "step": 49714 - }, - { - "epoch": 25.926988265971318, - "grad_norm": 1.5981441736221313, - "learning_rate": 5.0555778894472364e-05, - "loss": 4.7477, - "step": 49715 - }, - { - "epoch": 25.927509778357237, - "grad_norm": 1.6219923496246338, - "learning_rate": 5.055477386934674e-05, - "loss": 5.0067, - "step": 49716 - }, - { - "epoch": 25.928031290743156, - "grad_norm": 1.595299482345581, - "learning_rate": 5.0553768844221106e-05, - "loss": 5.4259, - "step": 49717 - }, - { - "epoch": 25.928552803129076, - "grad_norm": 1.6182856559753418, - "learning_rate": 5.0552763819095484e-05, - "loss": 5.199, - "step": 49718 - }, - { - "epoch": 25.929074315514992, - "grad_norm": 1.6206239461898804, - "learning_rate": 5.055175879396985e-05, - "loss": 5.3385, - "step": 49719 - }, - { - "epoch": 25.92959582790091, - "grad_norm": 1.5726951360702515, - "learning_rate": 5.0550753768844226e-05, - "loss": 5.7093, - "step": 49720 - }, - { - "epoch": 25.93011734028683, - "grad_norm": 1.5933152437210083, - "learning_rate": 5.05497487437186e-05, - "loss": 4.9724, - "step": 49721 - }, - { - "epoch": 25.93063885267275, - "grad_norm": 1.5899559259414673, - "learning_rate": 5.0548743718592974e-05, - "loss": 4.9525, - "step": 49722 - }, - { - "epoch": 25.93116036505867, - "grad_norm": 1.8017326593399048, - "learning_rate": 5.054773869346734e-05, - "loss": 4.62, - "step": 49723 - }, - { - "epoch": 25.93168187744459, - "grad_norm": 1.50344979763031, - "learning_rate": 5.05467336683417e-05, - "loss": 5.4385, - "step": 49724 - }, - { - "epoch": 25.93220338983051, - "grad_norm": 1.588431715965271, - "learning_rate": 5.054572864321608e-05, - "loss": 4.9762, - "step": 49725 - }, - { - "epoch": 25.932724902216428, - "grad_norm": 1.6521419286727905, - "learning_rate": 5.054472361809045e-05, - "loss": 5.5268, - "step": 49726 - }, - { - "epoch": 25.933246414602348, - "grad_norm": 1.5612716674804688, - "learning_rate": 5.054371859296483e-05, - "loss": 5.0163, - "step": 49727 - }, - { - "epoch": 25.933767926988267, - "grad_norm": 1.5705504417419434, - "learning_rate": 5.0542713567839194e-05, - "loss": 5.1319, - "step": 49728 - }, - { - "epoch": 25.934289439374187, - "grad_norm": 1.5617585182189941, - "learning_rate": 5.054170854271357e-05, - "loss": 5.2808, - "step": 49729 - }, - { - "epoch": 25.934810951760106, - "grad_norm": 1.469019889831543, - "learning_rate": 5.054070351758794e-05, - "loss": 5.7607, - "step": 49730 - }, - { - "epoch": 25.935332464146022, - "grad_norm": 1.6591640710830688, - "learning_rate": 5.053969849246232e-05, - "loss": 5.4767, - "step": 49731 - }, - { - "epoch": 25.93585397653194, - "grad_norm": 1.5485382080078125, - "learning_rate": 5.0538693467336685e-05, - "loss": 5.4298, - "step": 49732 - }, - { - "epoch": 25.93637548891786, - "grad_norm": 1.4884490966796875, - "learning_rate": 5.053768844221106e-05, - "loss": 5.3236, - "step": 49733 - }, - { - "epoch": 25.93689700130378, - "grad_norm": 1.6683859825134277, - "learning_rate": 5.053668341708543e-05, - "loss": 4.5523, - "step": 49734 - }, - { - "epoch": 25.9374185136897, - "grad_norm": 1.5376648902893066, - "learning_rate": 5.0535678391959805e-05, - "loss": 5.3399, - "step": 49735 - }, - { - "epoch": 25.93794002607562, - "grad_norm": 1.5781092643737793, - "learning_rate": 5.0534673366834176e-05, - "loss": 5.0177, - "step": 49736 - }, - { - "epoch": 25.93846153846154, - "grad_norm": 1.507241129875183, - "learning_rate": 5.053366834170854e-05, - "loss": 5.4279, - "step": 49737 - }, - { - "epoch": 25.938983050847458, - "grad_norm": 1.5954539775848389, - "learning_rate": 5.053266331658292e-05, - "loss": 5.5577, - "step": 49738 - }, - { - "epoch": 25.939504563233378, - "grad_norm": 1.5570346117019653, - "learning_rate": 5.053165829145728e-05, - "loss": 5.4836, - "step": 49739 - }, - { - "epoch": 25.940026075619297, - "grad_norm": 1.5413252115249634, - "learning_rate": 5.053065326633166e-05, - "loss": 5.2395, - "step": 49740 - }, - { - "epoch": 25.940547588005217, - "grad_norm": 1.6190102100372314, - "learning_rate": 5.052964824120603e-05, - "loss": 5.1257, - "step": 49741 - }, - { - "epoch": 25.941069100391136, - "grad_norm": 1.6561888456344604, - "learning_rate": 5.052864321608041e-05, - "loss": 4.9659, - "step": 49742 - }, - { - "epoch": 25.941590612777052, - "grad_norm": 1.714999794960022, - "learning_rate": 5.052763819095477e-05, - "loss": 4.6342, - "step": 49743 - }, - { - "epoch": 25.94211212516297, - "grad_norm": 1.5849323272705078, - "learning_rate": 5.052663316582915e-05, - "loss": 5.3327, - "step": 49744 - }, - { - "epoch": 25.94263363754889, - "grad_norm": 1.6554206609725952, - "learning_rate": 5.0525628140703515e-05, - "loss": 5.2266, - "step": 49745 - }, - { - "epoch": 25.94315514993481, - "grad_norm": 1.5512250661849976, - "learning_rate": 5.052462311557789e-05, - "loss": 4.9699, - "step": 49746 - }, - { - "epoch": 25.94367666232073, - "grad_norm": 1.5667290687561035, - "learning_rate": 5.0523618090452264e-05, - "loss": 5.4505, - "step": 49747 - }, - { - "epoch": 25.94419817470665, - "grad_norm": 1.5546510219573975, - "learning_rate": 5.052261306532664e-05, - "loss": 5.2626, - "step": 49748 - }, - { - "epoch": 25.94471968709257, - "grad_norm": 1.4921232461929321, - "learning_rate": 5.0521608040201006e-05, - "loss": 4.6246, - "step": 49749 - }, - { - "epoch": 25.945241199478488, - "grad_norm": 1.4779304265975952, - "learning_rate": 5.052060301507537e-05, - "loss": 5.5724, - "step": 49750 - }, - { - "epoch": 25.945762711864408, - "grad_norm": 1.5938775539398193, - "learning_rate": 5.051959798994975e-05, - "loss": 5.4145, - "step": 49751 - }, - { - "epoch": 25.946284224250327, - "grad_norm": 1.446546196937561, - "learning_rate": 5.051859296482412e-05, - "loss": 5.5248, - "step": 49752 - }, - { - "epoch": 25.946805736636247, - "grad_norm": 1.6441422700881958, - "learning_rate": 5.05175879396985e-05, - "loss": 5.0558, - "step": 49753 - }, - { - "epoch": 25.947327249022166, - "grad_norm": 1.4974286556243896, - "learning_rate": 5.051658291457286e-05, - "loss": 5.5311, - "step": 49754 - }, - { - "epoch": 25.947848761408082, - "grad_norm": 1.5930880308151245, - "learning_rate": 5.051557788944724e-05, - "loss": 5.0789, - "step": 49755 - }, - { - "epoch": 25.948370273794, - "grad_norm": 1.7435616254806519, - "learning_rate": 5.051457286432161e-05, - "loss": 4.4324, - "step": 49756 - }, - { - "epoch": 25.94889178617992, - "grad_norm": 1.5853358507156372, - "learning_rate": 5.051356783919599e-05, - "loss": 5.3403, - "step": 49757 - }, - { - "epoch": 25.94941329856584, - "grad_norm": 1.5644216537475586, - "learning_rate": 5.051256281407035e-05, - "loss": 5.7999, - "step": 49758 - }, - { - "epoch": 25.94993481095176, - "grad_norm": 1.669603705406189, - "learning_rate": 5.051155778894473e-05, - "loss": 5.3256, - "step": 49759 - }, - { - "epoch": 25.95045632333768, - "grad_norm": 1.6117825508117676, - "learning_rate": 5.0510552763819095e-05, - "loss": 4.8755, - "step": 49760 - }, - { - "epoch": 25.9509778357236, - "grad_norm": 1.4320485591888428, - "learning_rate": 5.050954773869347e-05, - "loss": 4.9666, - "step": 49761 - }, - { - "epoch": 25.951499348109518, - "grad_norm": 1.5288912057876587, - "learning_rate": 5.0508542713567844e-05, - "loss": 5.5408, - "step": 49762 - }, - { - "epoch": 25.952020860495438, - "grad_norm": 1.6417725086212158, - "learning_rate": 5.050753768844221e-05, - "loss": 5.0686, - "step": 49763 - }, - { - "epoch": 25.952542372881357, - "grad_norm": 1.555238127708435, - "learning_rate": 5.0506532663316586e-05, - "loss": 5.5991, - "step": 49764 - }, - { - "epoch": 25.953063885267277, - "grad_norm": 1.4612008333206177, - "learning_rate": 5.050552763819095e-05, - "loss": 5.5105, - "step": 49765 - }, - { - "epoch": 25.953585397653193, - "grad_norm": 1.6306742429733276, - "learning_rate": 5.050452261306533e-05, - "loss": 5.3811, - "step": 49766 - }, - { - "epoch": 25.954106910039112, - "grad_norm": 1.57932710647583, - "learning_rate": 5.05035175879397e-05, - "loss": 5.5904, - "step": 49767 - }, - { - "epoch": 25.95462842242503, - "grad_norm": 1.5359058380126953, - "learning_rate": 5.050251256281408e-05, - "loss": 5.6282, - "step": 49768 - }, - { - "epoch": 25.95514993481095, - "grad_norm": 1.7532477378845215, - "learning_rate": 5.050150753768844e-05, - "loss": 5.0519, - "step": 49769 - }, - { - "epoch": 25.95567144719687, - "grad_norm": 1.6247059106826782, - "learning_rate": 5.050050251256282e-05, - "loss": 5.3184, - "step": 49770 - }, - { - "epoch": 25.95619295958279, - "grad_norm": 1.581208348274231, - "learning_rate": 5.049949748743718e-05, - "loss": 5.1828, - "step": 49771 - }, - { - "epoch": 25.95671447196871, - "grad_norm": 1.633225440979004, - "learning_rate": 5.049849246231156e-05, - "loss": 4.7107, - "step": 49772 - }, - { - "epoch": 25.95723598435463, - "grad_norm": 2.315793991088867, - "learning_rate": 5.049748743718593e-05, - "loss": 4.9035, - "step": 49773 - }, - { - "epoch": 25.957757496740548, - "grad_norm": 1.6242378950119019, - "learning_rate": 5.049648241206031e-05, - "loss": 5.0585, - "step": 49774 - }, - { - "epoch": 25.958279009126468, - "grad_norm": 1.5733084678649902, - "learning_rate": 5.0495477386934674e-05, - "loss": 5.1664, - "step": 49775 - }, - { - "epoch": 25.958800521512387, - "grad_norm": 1.532844066619873, - "learning_rate": 5.049447236180905e-05, - "loss": 4.9759, - "step": 49776 - }, - { - "epoch": 25.959322033898307, - "grad_norm": 1.6117173433303833, - "learning_rate": 5.049346733668342e-05, - "loss": 5.1307, - "step": 49777 - }, - { - "epoch": 25.959843546284226, - "grad_norm": 1.569705843925476, - "learning_rate": 5.049246231155779e-05, - "loss": 5.0656, - "step": 49778 - }, - { - "epoch": 25.960365058670142, - "grad_norm": 1.6132324934005737, - "learning_rate": 5.0491457286432165e-05, - "loss": 4.7158, - "step": 49779 - }, - { - "epoch": 25.96088657105606, - "grad_norm": 1.5446442365646362, - "learning_rate": 5.049045226130653e-05, - "loss": 4.8318, - "step": 49780 - }, - { - "epoch": 25.96140808344198, - "grad_norm": 1.5413283109664917, - "learning_rate": 5.048944723618091e-05, - "loss": 5.5106, - "step": 49781 - }, - { - "epoch": 25.9619295958279, - "grad_norm": 1.4867891073226929, - "learning_rate": 5.048844221105528e-05, - "loss": 5.4121, - "step": 49782 - }, - { - "epoch": 25.96245110821382, - "grad_norm": 1.549892544746399, - "learning_rate": 5.0487437185929656e-05, - "loss": 5.2531, - "step": 49783 - }, - { - "epoch": 25.96297262059974, - "grad_norm": 1.5599087476730347, - "learning_rate": 5.048643216080402e-05, - "loss": 4.9882, - "step": 49784 - }, - { - "epoch": 25.96349413298566, - "grad_norm": 1.617982029914856, - "learning_rate": 5.04854271356784e-05, - "loss": 5.0741, - "step": 49785 - }, - { - "epoch": 25.96401564537158, - "grad_norm": 1.493056058883667, - "learning_rate": 5.048442211055276e-05, - "loss": 5.3912, - "step": 49786 - }, - { - "epoch": 25.964537157757498, - "grad_norm": 1.6201637983322144, - "learning_rate": 5.048341708542714e-05, - "loss": 4.8395, - "step": 49787 - }, - { - "epoch": 25.965058670143417, - "grad_norm": 1.5247822999954224, - "learning_rate": 5.048241206030151e-05, - "loss": 5.6184, - "step": 49788 - }, - { - "epoch": 25.965580182529337, - "grad_norm": 1.662615180015564, - "learning_rate": 5.048140703517589e-05, - "loss": 4.8433, - "step": 49789 - }, - { - "epoch": 25.966101694915253, - "grad_norm": 1.5597120523452759, - "learning_rate": 5.0480402010050253e-05, - "loss": 4.9448, - "step": 49790 - }, - { - "epoch": 25.966623207301172, - "grad_norm": 1.6713917255401611, - "learning_rate": 5.047939698492462e-05, - "loss": 4.8546, - "step": 49791 - }, - { - "epoch": 25.96714471968709, - "grad_norm": 1.5485515594482422, - "learning_rate": 5.0478391959798996e-05, - "loss": 5.513, - "step": 49792 - }, - { - "epoch": 25.96766623207301, - "grad_norm": 1.492374300956726, - "learning_rate": 5.0477386934673367e-05, - "loss": 5.5201, - "step": 49793 - }, - { - "epoch": 25.96818774445893, - "grad_norm": 1.6300476789474487, - "learning_rate": 5.0476381909547744e-05, - "loss": 4.9398, - "step": 49794 - }, - { - "epoch": 25.96870925684485, - "grad_norm": 1.5074901580810547, - "learning_rate": 5.047537688442211e-05, - "loss": 5.2326, - "step": 49795 - }, - { - "epoch": 25.96923076923077, - "grad_norm": 1.5828238725662231, - "learning_rate": 5.0474371859296487e-05, - "loss": 5.1619, - "step": 49796 - }, - { - "epoch": 25.96975228161669, - "grad_norm": 1.511972427368164, - "learning_rate": 5.047336683417086e-05, - "loss": 5.2499, - "step": 49797 - }, - { - "epoch": 25.97027379400261, - "grad_norm": 1.630491852760315, - "learning_rate": 5.0472361809045235e-05, - "loss": 5.0824, - "step": 49798 - }, - { - "epoch": 25.970795306388528, - "grad_norm": 1.5411858558654785, - "learning_rate": 5.04713567839196e-05, - "loss": 5.3288, - "step": 49799 - }, - { - "epoch": 25.971316818774447, - "grad_norm": 1.5976699590682983, - "learning_rate": 5.047035175879398e-05, - "loss": 5.0371, - "step": 49800 - }, - { - "epoch": 25.971838331160367, - "grad_norm": 1.6064398288726807, - "learning_rate": 5.046934673366834e-05, - "loss": 5.2998, - "step": 49801 - }, - { - "epoch": 25.972359843546283, - "grad_norm": 1.6003201007843018, - "learning_rate": 5.046834170854272e-05, - "loss": 5.0896, - "step": 49802 - }, - { - "epoch": 25.972881355932202, - "grad_norm": 1.499881386756897, - "learning_rate": 5.046733668341709e-05, - "loss": 5.2199, - "step": 49803 - }, - { - "epoch": 25.97340286831812, - "grad_norm": 1.4750078916549683, - "learning_rate": 5.0466331658291455e-05, - "loss": 5.2687, - "step": 49804 - }, - { - "epoch": 25.97392438070404, - "grad_norm": 1.4993540048599243, - "learning_rate": 5.046532663316583e-05, - "loss": 4.7765, - "step": 49805 - }, - { - "epoch": 25.97444589308996, - "grad_norm": 1.5658129453659058, - "learning_rate": 5.04643216080402e-05, - "loss": 4.8905, - "step": 49806 - }, - { - "epoch": 25.97496740547588, - "grad_norm": 1.5606919527053833, - "learning_rate": 5.0463316582914575e-05, - "loss": 5.5872, - "step": 49807 - }, - { - "epoch": 25.9754889178618, - "grad_norm": 1.5068895816802979, - "learning_rate": 5.0462311557788946e-05, - "loss": 5.4762, - "step": 49808 - }, - { - "epoch": 25.97601043024772, - "grad_norm": 1.6147211790084839, - "learning_rate": 5.0461306532663324e-05, - "loss": 5.2261, - "step": 49809 - }, - { - "epoch": 25.97653194263364, - "grad_norm": 1.5323469638824463, - "learning_rate": 5.046030150753769e-05, - "loss": 4.9664, - "step": 49810 - }, - { - "epoch": 25.977053455019558, - "grad_norm": 1.5205397605895996, - "learning_rate": 5.0459296482412066e-05, - "loss": 5.7251, - "step": 49811 - }, - { - "epoch": 25.977574967405477, - "grad_norm": 1.521767497062683, - "learning_rate": 5.045829145728643e-05, - "loss": 5.3711, - "step": 49812 - }, - { - "epoch": 25.978096479791397, - "grad_norm": 1.5131927728652954, - "learning_rate": 5.045728643216081e-05, - "loss": 5.24, - "step": 49813 - }, - { - "epoch": 25.978617992177313, - "grad_norm": 1.6066757440567017, - "learning_rate": 5.045628140703518e-05, - "loss": 5.4673, - "step": 49814 - }, - { - "epoch": 25.979139504563232, - "grad_norm": 1.592763900756836, - "learning_rate": 5.045527638190956e-05, - "loss": 4.8468, - "step": 49815 - }, - { - "epoch": 25.97966101694915, - "grad_norm": 1.5876563787460327, - "learning_rate": 5.045427135678392e-05, - "loss": 5.2206, - "step": 49816 - }, - { - "epoch": 25.98018252933507, - "grad_norm": 1.4939838647842407, - "learning_rate": 5.0453266331658285e-05, - "loss": 5.3974, - "step": 49817 - }, - { - "epoch": 25.98070404172099, - "grad_norm": 1.644418716430664, - "learning_rate": 5.045226130653266e-05, - "loss": 4.5166, - "step": 49818 - }, - { - "epoch": 25.98122555410691, - "grad_norm": 1.58175790309906, - "learning_rate": 5.0451256281407034e-05, - "loss": 5.2525, - "step": 49819 - }, - { - "epoch": 25.98174706649283, - "grad_norm": 1.5747029781341553, - "learning_rate": 5.045025125628141e-05, - "loss": 5.6392, - "step": 49820 - }, - { - "epoch": 25.98226857887875, - "grad_norm": 1.5908254384994507, - "learning_rate": 5.0449246231155776e-05, - "loss": 5.4578, - "step": 49821 - }, - { - "epoch": 25.98279009126467, - "grad_norm": 1.5626230239868164, - "learning_rate": 5.0448241206030154e-05, - "loss": 4.9507, - "step": 49822 - }, - { - "epoch": 25.983311603650588, - "grad_norm": 1.56340491771698, - "learning_rate": 5.0447236180904525e-05, - "loss": 5.2032, - "step": 49823 - }, - { - "epoch": 25.983833116036507, - "grad_norm": 1.4311556816101074, - "learning_rate": 5.04462311557789e-05, - "loss": 5.4195, - "step": 49824 - }, - { - "epoch": 25.984354628422427, - "grad_norm": 1.5117915868759155, - "learning_rate": 5.044522613065327e-05, - "loss": 5.13, - "step": 49825 - }, - { - "epoch": 25.984876140808343, - "grad_norm": 1.454552173614502, - "learning_rate": 5.0444221105527645e-05, - "loss": 5.6917, - "step": 49826 - }, - { - "epoch": 25.985397653194262, - "grad_norm": 1.4178123474121094, - "learning_rate": 5.044321608040201e-05, - "loss": 5.3776, - "step": 49827 - }, - { - "epoch": 25.98591916558018, - "grad_norm": 1.4790244102478027, - "learning_rate": 5.044221105527639e-05, - "loss": 5.2721, - "step": 49828 - }, - { - "epoch": 25.9864406779661, - "grad_norm": 1.6627033948898315, - "learning_rate": 5.044120603015076e-05, - "loss": 5.0514, - "step": 49829 - }, - { - "epoch": 25.98696219035202, - "grad_norm": 1.549363374710083, - "learning_rate": 5.044020100502512e-05, - "loss": 5.1919, - "step": 49830 - }, - { - "epoch": 25.98748370273794, - "grad_norm": 1.6241368055343628, - "learning_rate": 5.04391959798995e-05, - "loss": 4.5678, - "step": 49831 - }, - { - "epoch": 25.98800521512386, - "grad_norm": 1.5540589094161987, - "learning_rate": 5.0438190954773865e-05, - "loss": 4.867, - "step": 49832 - }, - { - "epoch": 25.98852672750978, - "grad_norm": 1.490044116973877, - "learning_rate": 5.043718592964824e-05, - "loss": 5.0171, - "step": 49833 - }, - { - "epoch": 25.9890482398957, - "grad_norm": 1.5490013360977173, - "learning_rate": 5.0436180904522614e-05, - "loss": 5.3056, - "step": 49834 - }, - { - "epoch": 25.989569752281618, - "grad_norm": 1.5932077169418335, - "learning_rate": 5.043517587939699e-05, - "loss": 4.9279, - "step": 49835 - }, - { - "epoch": 25.990091264667537, - "grad_norm": 1.5124351978302002, - "learning_rate": 5.0434170854271356e-05, - "loss": 5.3837, - "step": 49836 - }, - { - "epoch": 25.990612777053457, - "grad_norm": 1.550491213798523, - "learning_rate": 5.0433165829145734e-05, - "loss": 5.5344, - "step": 49837 - }, - { - "epoch": 25.991134289439373, - "grad_norm": 1.5916805267333984, - "learning_rate": 5.04321608040201e-05, - "loss": 5.3522, - "step": 49838 - }, - { - "epoch": 25.991655801825292, - "grad_norm": 1.6321598291397095, - "learning_rate": 5.0431155778894476e-05, - "loss": 5.6047, - "step": 49839 - }, - { - "epoch": 25.99217731421121, - "grad_norm": 1.5555574893951416, - "learning_rate": 5.043015075376885e-05, - "loss": 5.3387, - "step": 49840 - }, - { - "epoch": 25.99269882659713, - "grad_norm": 1.5286091566085815, - "learning_rate": 5.0429145728643225e-05, - "loss": 5.3712, - "step": 49841 - }, - { - "epoch": 25.99322033898305, - "grad_norm": 1.5893055200576782, - "learning_rate": 5.042814070351759e-05, - "loss": 4.9381, - "step": 49842 - }, - { - "epoch": 25.99374185136897, - "grad_norm": 1.4439986944198608, - "learning_rate": 5.042713567839196e-05, - "loss": 4.7925, - "step": 49843 - }, - { - "epoch": 25.99426336375489, - "grad_norm": 1.6688432693481445, - "learning_rate": 5.042613065326634e-05, - "loss": 4.8472, - "step": 49844 - }, - { - "epoch": 25.99478487614081, - "grad_norm": 1.6317939758300781, - "learning_rate": 5.04251256281407e-05, - "loss": 5.2285, - "step": 49845 - }, - { - "epoch": 25.99530638852673, - "grad_norm": 1.5751293897628784, - "learning_rate": 5.042412060301508e-05, - "loss": 5.4734, - "step": 49846 - }, - { - "epoch": 25.995827900912648, - "grad_norm": 1.6407735347747803, - "learning_rate": 5.0423115577889444e-05, - "loss": 5.2709, - "step": 49847 - }, - { - "epoch": 25.996349413298567, - "grad_norm": 1.6169683933258057, - "learning_rate": 5.042211055276382e-05, - "loss": 4.7377, - "step": 49848 - }, - { - "epoch": 25.996870925684483, - "grad_norm": 1.5979669094085693, - "learning_rate": 5.042110552763819e-05, - "loss": 4.9615, - "step": 49849 - }, - { - "epoch": 25.997392438070403, - "grad_norm": 1.5826510190963745, - "learning_rate": 5.042010050251257e-05, - "loss": 5.1735, - "step": 49850 - }, - { - "epoch": 25.997913950456322, - "grad_norm": 1.5629518032073975, - "learning_rate": 5.0419095477386935e-05, - "loss": 4.8396, - "step": 49851 - }, - { - "epoch": 25.99843546284224, - "grad_norm": 1.5789201259613037, - "learning_rate": 5.041809045226131e-05, - "loss": 4.9036, - "step": 49852 - }, - { - "epoch": 25.99895697522816, - "grad_norm": 1.6691685914993286, - "learning_rate": 5.041708542713568e-05, - "loss": 4.7955, - "step": 49853 - }, - { - "epoch": 25.99947848761408, - "grad_norm": 1.629495620727539, - "learning_rate": 5.0416080402010055e-05, - "loss": 5.1245, - "step": 49854 - }, - { - "epoch": 26.0, - "grad_norm": 1.7617546319961548, - "learning_rate": 5.0415075376884426e-05, - "loss": 5.7405, - "step": 49855 - }, - { - "epoch": 26.00052151238592, - "grad_norm": 1.630406141281128, - "learning_rate": 5.041407035175879e-05, - "loss": 5.0202, - "step": 49856 - }, - { - "epoch": 26.00104302477184, - "grad_norm": 1.5694475173950195, - "learning_rate": 5.041306532663317e-05, - "loss": 5.447, - "step": 49857 - }, - { - "epoch": 26.00156453715776, - "grad_norm": 1.4947025775909424, - "learning_rate": 5.041206030150753e-05, - "loss": 5.4293, - "step": 49858 - }, - { - "epoch": 26.002086049543678, - "grad_norm": 1.593772530555725, - "learning_rate": 5.041105527638191e-05, - "loss": 5.2391, - "step": 49859 - }, - { - "epoch": 26.002607561929597, - "grad_norm": 1.6247615814208984, - "learning_rate": 5.041005025125628e-05, - "loss": 5.3606, - "step": 49860 - }, - { - "epoch": 26.003129074315513, - "grad_norm": 1.6541873216629028, - "learning_rate": 5.040904522613066e-05, - "loss": 5.3258, - "step": 49861 - }, - { - "epoch": 26.003650586701433, - "grad_norm": 1.590934157371521, - "learning_rate": 5.040804020100502e-05, - "loss": 5.338, - "step": 49862 - }, - { - "epoch": 26.004172099087352, - "grad_norm": 1.5482243299484253, - "learning_rate": 5.04070351758794e-05, - "loss": 5.0853, - "step": 49863 - }, - { - "epoch": 26.00469361147327, - "grad_norm": 1.5913010835647583, - "learning_rate": 5.040603015075377e-05, - "loss": 5.1561, - "step": 49864 - }, - { - "epoch": 26.00521512385919, - "grad_norm": 1.5768563747406006, - "learning_rate": 5.040502512562815e-05, - "loss": 5.3283, - "step": 49865 - }, - { - "epoch": 26.00573663624511, - "grad_norm": 1.5579520463943481, - "learning_rate": 5.0404020100502514e-05, - "loss": 5.5463, - "step": 49866 - }, - { - "epoch": 26.00625814863103, - "grad_norm": 1.5443542003631592, - "learning_rate": 5.040301507537689e-05, - "loss": 5.646, - "step": 49867 - }, - { - "epoch": 26.00677966101695, - "grad_norm": 1.5288281440734863, - "learning_rate": 5.0402010050251256e-05, - "loss": 5.4475, - "step": 49868 - }, - { - "epoch": 26.00730117340287, - "grad_norm": 1.6905237436294556, - "learning_rate": 5.0401005025125634e-05, - "loss": 4.9491, - "step": 49869 - }, - { - "epoch": 26.00782268578879, - "grad_norm": 1.602318525314331, - "learning_rate": 5.0400000000000005e-05, - "loss": 5.0166, - "step": 49870 - }, - { - "epoch": 26.008344198174708, - "grad_norm": 1.5511987209320068, - "learning_rate": 5.039899497487437e-05, - "loss": 5.4206, - "step": 49871 - }, - { - "epoch": 26.008865710560627, - "grad_norm": 1.6969717741012573, - "learning_rate": 5.039798994974875e-05, - "loss": 5.086, - "step": 49872 - }, - { - "epoch": 26.009387222946543, - "grad_norm": 1.6044172048568726, - "learning_rate": 5.039698492462311e-05, - "loss": 5.1749, - "step": 49873 - }, - { - "epoch": 26.009908735332463, - "grad_norm": 1.5165537595748901, - "learning_rate": 5.039597989949749e-05, - "loss": 5.0529, - "step": 49874 - }, - { - "epoch": 26.010430247718382, - "grad_norm": 1.5432796478271484, - "learning_rate": 5.039497487437186e-05, - "loss": 5.6221, - "step": 49875 - }, - { - "epoch": 26.0109517601043, - "grad_norm": 1.7155026197433472, - "learning_rate": 5.039396984924624e-05, - "loss": 4.8293, - "step": 49876 - }, - { - "epoch": 26.01147327249022, - "grad_norm": 1.5145021677017212, - "learning_rate": 5.03929648241206e-05, - "loss": 5.574, - "step": 49877 - }, - { - "epoch": 26.01199478487614, - "grad_norm": 1.5249179601669312, - "learning_rate": 5.039195979899498e-05, - "loss": 5.472, - "step": 49878 - }, - { - "epoch": 26.01251629726206, - "grad_norm": 1.6135358810424805, - "learning_rate": 5.0390954773869345e-05, - "loss": 5.0177, - "step": 49879 - }, - { - "epoch": 26.01303780964798, - "grad_norm": 1.4863982200622559, - "learning_rate": 5.038994974874372e-05, - "loss": 5.1835, - "step": 49880 - }, - { - "epoch": 26.0135593220339, - "grad_norm": 1.5394470691680908, - "learning_rate": 5.0388944723618094e-05, - "loss": 5.284, - "step": 49881 - }, - { - "epoch": 26.01408083441982, - "grad_norm": 1.5694096088409424, - "learning_rate": 5.038793969849247e-05, - "loss": 5.092, - "step": 49882 - }, - { - "epoch": 26.014602346805738, - "grad_norm": 1.5273618698120117, - "learning_rate": 5.0386934673366836e-05, - "loss": 5.4781, - "step": 49883 - }, - { - "epoch": 26.015123859191657, - "grad_norm": 1.5585932731628418, - "learning_rate": 5.038592964824121e-05, - "loss": 5.2642, - "step": 49884 - }, - { - "epoch": 26.015645371577573, - "grad_norm": 1.6852264404296875, - "learning_rate": 5.0384924623115585e-05, - "loss": 5.2391, - "step": 49885 - }, - { - "epoch": 26.016166883963493, - "grad_norm": 1.617247223854065, - "learning_rate": 5.038391959798995e-05, - "loss": 4.9123, - "step": 49886 - }, - { - "epoch": 26.016688396349412, - "grad_norm": 1.5400272607803345, - "learning_rate": 5.038291457286433e-05, - "loss": 4.8747, - "step": 49887 - }, - { - "epoch": 26.01720990873533, - "grad_norm": 1.579487919807434, - "learning_rate": 5.038190954773869e-05, - "loss": 5.4667, - "step": 49888 - }, - { - "epoch": 26.01773142112125, - "grad_norm": 1.7204338312149048, - "learning_rate": 5.038090452261307e-05, - "loss": 4.4433, - "step": 49889 - }, - { - "epoch": 26.01825293350717, - "grad_norm": 1.4713044166564941, - "learning_rate": 5.037989949748744e-05, - "loss": 4.8028, - "step": 49890 - }, - { - "epoch": 26.01877444589309, - "grad_norm": 1.571448564529419, - "learning_rate": 5.037889447236182e-05, - "loss": 5.4677, - "step": 49891 - }, - { - "epoch": 26.01929595827901, - "grad_norm": 1.6490477323532104, - "learning_rate": 5.037788944723618e-05, - "loss": 5.4673, - "step": 49892 - }, - { - "epoch": 26.01981747066493, - "grad_norm": 1.478212594985962, - "learning_rate": 5.037688442211056e-05, - "loss": 5.3408, - "step": 49893 - }, - { - "epoch": 26.02033898305085, - "grad_norm": 1.6048938035964966, - "learning_rate": 5.0375879396984924e-05, - "loss": 5.084, - "step": 49894 - }, - { - "epoch": 26.020860495436768, - "grad_norm": 1.617571234703064, - "learning_rate": 5.03748743718593e-05, - "loss": 4.6362, - "step": 49895 - }, - { - "epoch": 26.021382007822687, - "grad_norm": 1.7693649530410767, - "learning_rate": 5.037386934673367e-05, - "loss": 4.2675, - "step": 49896 - }, - { - "epoch": 26.021903520208603, - "grad_norm": 1.514443278312683, - "learning_rate": 5.037286432160804e-05, - "loss": 5.3821, - "step": 49897 - }, - { - "epoch": 26.022425032594523, - "grad_norm": 1.5993214845657349, - "learning_rate": 5.0371859296482415e-05, - "loss": 4.6702, - "step": 49898 - }, - { - "epoch": 26.022946544980442, - "grad_norm": 1.5937093496322632, - "learning_rate": 5.037085427135678e-05, - "loss": 4.6762, - "step": 49899 - }, - { - "epoch": 26.02346805736636, - "grad_norm": 1.6407947540283203, - "learning_rate": 5.036984924623116e-05, - "loss": 4.9777, - "step": 49900 - }, - { - "epoch": 26.02398956975228, - "grad_norm": 1.5661416053771973, - "learning_rate": 5.036884422110553e-05, - "loss": 4.5352, - "step": 49901 - }, - { - "epoch": 26.0245110821382, - "grad_norm": 1.5704617500305176, - "learning_rate": 5.0367839195979906e-05, - "loss": 5.1742, - "step": 49902 - }, - { - "epoch": 26.02503259452412, - "grad_norm": 1.611840844154358, - "learning_rate": 5.036683417085427e-05, - "loss": 4.9322, - "step": 49903 - }, - { - "epoch": 26.02555410691004, - "grad_norm": 1.6492494344711304, - "learning_rate": 5.036582914572865e-05, - "loss": 5.1859, - "step": 49904 - }, - { - "epoch": 26.02607561929596, - "grad_norm": 1.602911114692688, - "learning_rate": 5.036482412060301e-05, - "loss": 4.9235, - "step": 49905 - }, - { - "epoch": 26.02659713168188, - "grad_norm": 1.6575928926467896, - "learning_rate": 5.036381909547739e-05, - "loss": 4.5271, - "step": 49906 - }, - { - "epoch": 26.027118644067798, - "grad_norm": 1.7428065538406372, - "learning_rate": 5.036281407035176e-05, - "loss": 4.9101, - "step": 49907 - }, - { - "epoch": 26.027640156453717, - "grad_norm": 1.5835801362991333, - "learning_rate": 5.036180904522614e-05, - "loss": 5.0906, - "step": 49908 - }, - { - "epoch": 26.028161668839633, - "grad_norm": 1.53526771068573, - "learning_rate": 5.0360804020100503e-05, - "loss": 5.176, - "step": 49909 - }, - { - "epoch": 26.028683181225553, - "grad_norm": 1.3856509923934937, - "learning_rate": 5.0359798994974875e-05, - "loss": 5.5657, - "step": 49910 - }, - { - "epoch": 26.029204693611472, - "grad_norm": 1.554999828338623, - "learning_rate": 5.035879396984925e-05, - "loss": 5.3563, - "step": 49911 - }, - { - "epoch": 26.02972620599739, - "grad_norm": 1.6727173328399658, - "learning_rate": 5.035778894472362e-05, - "loss": 4.6031, - "step": 49912 - }, - { - "epoch": 26.03024771838331, - "grad_norm": 1.5232245922088623, - "learning_rate": 5.0356783919597994e-05, - "loss": 5.3846, - "step": 49913 - }, - { - "epoch": 26.03076923076923, - "grad_norm": 1.6969258785247803, - "learning_rate": 5.035577889447236e-05, - "loss": 5.0519, - "step": 49914 - }, - { - "epoch": 26.03129074315515, - "grad_norm": 1.5561836957931519, - "learning_rate": 5.0354773869346737e-05, - "loss": 5.3353, - "step": 49915 - }, - { - "epoch": 26.03181225554107, - "grad_norm": 1.5368386507034302, - "learning_rate": 5.035376884422111e-05, - "loss": 5.6348, - "step": 49916 - }, - { - "epoch": 26.03233376792699, - "grad_norm": 1.5277912616729736, - "learning_rate": 5.0352763819095485e-05, - "loss": 5.4047, - "step": 49917 - }, - { - "epoch": 26.03285528031291, - "grad_norm": 1.5470176935195923, - "learning_rate": 5.035175879396985e-05, - "loss": 5.4136, - "step": 49918 - }, - { - "epoch": 26.033376792698828, - "grad_norm": 1.5946000814437866, - "learning_rate": 5.035075376884423e-05, - "loss": 5.3486, - "step": 49919 - }, - { - "epoch": 26.033898305084747, - "grad_norm": 1.4964848756790161, - "learning_rate": 5.034974874371859e-05, - "loss": 5.0878, - "step": 49920 - }, - { - "epoch": 26.034419817470663, - "grad_norm": 1.5625120401382446, - "learning_rate": 5.034874371859297e-05, - "loss": 5.255, - "step": 49921 - }, - { - "epoch": 26.034941329856583, - "grad_norm": 1.6137350797653198, - "learning_rate": 5.034773869346734e-05, - "loss": 5.1863, - "step": 49922 - }, - { - "epoch": 26.035462842242502, - "grad_norm": 1.4832732677459717, - "learning_rate": 5.0346733668341705e-05, - "loss": 5.6676, - "step": 49923 - }, - { - "epoch": 26.03598435462842, - "grad_norm": 1.523400902748108, - "learning_rate": 5.034572864321608e-05, - "loss": 5.3586, - "step": 49924 - }, - { - "epoch": 26.03650586701434, - "grad_norm": 1.5644515752792358, - "learning_rate": 5.034472361809045e-05, - "loss": 5.2654, - "step": 49925 - }, - { - "epoch": 26.03702737940026, - "grad_norm": 1.4728585481643677, - "learning_rate": 5.0343718592964825e-05, - "loss": 5.6056, - "step": 49926 - }, - { - "epoch": 26.03754889178618, - "grad_norm": 1.6028105020523071, - "learning_rate": 5.0342713567839196e-05, - "loss": 4.6443, - "step": 49927 - }, - { - "epoch": 26.0380704041721, - "grad_norm": 1.5580484867095947, - "learning_rate": 5.0341708542713574e-05, - "loss": 5.1627, - "step": 49928 - }, - { - "epoch": 26.03859191655802, - "grad_norm": 1.5718873739242554, - "learning_rate": 5.034070351758794e-05, - "loss": 5.3933, - "step": 49929 - }, - { - "epoch": 26.03911342894394, - "grad_norm": 1.5662686824798584, - "learning_rate": 5.0339698492462316e-05, - "loss": 5.1158, - "step": 49930 - }, - { - "epoch": 26.039634941329858, - "grad_norm": 1.5889496803283691, - "learning_rate": 5.033869346733669e-05, - "loss": 4.9535, - "step": 49931 - }, - { - "epoch": 26.040156453715777, - "grad_norm": 1.4810431003570557, - "learning_rate": 5.0337688442211065e-05, - "loss": 5.682, - "step": 49932 - }, - { - "epoch": 26.040677966101693, - "grad_norm": 1.5462135076522827, - "learning_rate": 5.033668341708543e-05, - "loss": 4.6778, - "step": 49933 - }, - { - "epoch": 26.041199478487613, - "grad_norm": 1.5868403911590576, - "learning_rate": 5.033567839195981e-05, - "loss": 4.976, - "step": 49934 - }, - { - "epoch": 26.041720990873532, - "grad_norm": 1.655432939529419, - "learning_rate": 5.033467336683417e-05, - "loss": 4.4422, - "step": 49935 - }, - { - "epoch": 26.042242503259452, - "grad_norm": 1.539458990097046, - "learning_rate": 5.033366834170854e-05, - "loss": 5.0, - "step": 49936 - }, - { - "epoch": 26.04276401564537, - "grad_norm": 1.5608280897140503, - "learning_rate": 5.033266331658292e-05, - "loss": 5.3505, - "step": 49937 - }, - { - "epoch": 26.04328552803129, - "grad_norm": 1.8141158819198608, - "learning_rate": 5.0331658291457284e-05, - "loss": 4.8851, - "step": 49938 - }, - { - "epoch": 26.04380704041721, - "grad_norm": 1.4827202558517456, - "learning_rate": 5.033065326633166e-05, - "loss": 5.0209, - "step": 49939 - }, - { - "epoch": 26.04432855280313, - "grad_norm": 1.5417380332946777, - "learning_rate": 5.0329648241206026e-05, - "loss": 5.4906, - "step": 49940 - }, - { - "epoch": 26.04485006518905, - "grad_norm": 1.6341389417648315, - "learning_rate": 5.0328643216080404e-05, - "loss": 5.3053, - "step": 49941 - }, - { - "epoch": 26.04537157757497, - "grad_norm": 1.5426335334777832, - "learning_rate": 5.0327638190954775e-05, - "loss": 5.4556, - "step": 49942 - }, - { - "epoch": 26.045893089960888, - "grad_norm": 1.5502915382385254, - "learning_rate": 5.032663316582915e-05, - "loss": 4.8803, - "step": 49943 - }, - { - "epoch": 26.046414602346807, - "grad_norm": 1.6502996683120728, - "learning_rate": 5.032562814070352e-05, - "loss": 4.9702, - "step": 49944 - }, - { - "epoch": 26.046936114732723, - "grad_norm": 1.5974037647247314, - "learning_rate": 5.0324623115577895e-05, - "loss": 5.3433, - "step": 49945 - }, - { - "epoch": 26.047457627118643, - "grad_norm": 1.6698493957519531, - "learning_rate": 5.032361809045226e-05, - "loss": 4.9863, - "step": 49946 - }, - { - "epoch": 26.047979139504562, - "grad_norm": 1.6427046060562134, - "learning_rate": 5.032261306532664e-05, - "loss": 5.4131, - "step": 49947 - }, - { - "epoch": 26.048500651890482, - "grad_norm": 1.656296968460083, - "learning_rate": 5.032160804020101e-05, - "loss": 5.1012, - "step": 49948 - }, - { - "epoch": 26.0490221642764, - "grad_norm": 1.7394770383834839, - "learning_rate": 5.032060301507537e-05, - "loss": 4.7086, - "step": 49949 - }, - { - "epoch": 26.04954367666232, - "grad_norm": 1.6759474277496338, - "learning_rate": 5.031959798994975e-05, - "loss": 5.0219, - "step": 49950 - }, - { - "epoch": 26.05006518904824, - "grad_norm": 1.6022017002105713, - "learning_rate": 5.031859296482412e-05, - "loss": 4.9726, - "step": 49951 - }, - { - "epoch": 26.05058670143416, - "grad_norm": 1.5920095443725586, - "learning_rate": 5.03175879396985e-05, - "loss": 5.0485, - "step": 49952 - }, - { - "epoch": 26.05110821382008, - "grad_norm": 1.5178791284561157, - "learning_rate": 5.0316582914572864e-05, - "loss": 5.3107, - "step": 49953 - }, - { - "epoch": 26.051629726206, - "grad_norm": 1.543992519378662, - "learning_rate": 5.031557788944724e-05, - "loss": 5.4051, - "step": 49954 - }, - { - "epoch": 26.052151238591918, - "grad_norm": 1.6477265357971191, - "learning_rate": 5.0314572864321606e-05, - "loss": 5.0291, - "step": 49955 - }, - { - "epoch": 26.052672750977834, - "grad_norm": 1.6444275379180908, - "learning_rate": 5.0313567839195984e-05, - "loss": 4.9704, - "step": 49956 - }, - { - "epoch": 26.053194263363753, - "grad_norm": 1.5968772172927856, - "learning_rate": 5.0312562814070355e-05, - "loss": 5.5423, - "step": 49957 - }, - { - "epoch": 26.053715775749673, - "grad_norm": 1.5367923974990845, - "learning_rate": 5.031155778894473e-05, - "loss": 4.5087, - "step": 49958 - }, - { - "epoch": 26.054237288135592, - "grad_norm": 1.595871925354004, - "learning_rate": 5.03105527638191e-05, - "loss": 5.184, - "step": 49959 - }, - { - "epoch": 26.054758800521512, - "grad_norm": 1.5525952577590942, - "learning_rate": 5.0309547738693475e-05, - "loss": 5.2862, - "step": 49960 - }, - { - "epoch": 26.05528031290743, - "grad_norm": 1.664811372756958, - "learning_rate": 5.030854271356784e-05, - "loss": 4.5795, - "step": 49961 - }, - { - "epoch": 26.05580182529335, - "grad_norm": 1.6122732162475586, - "learning_rate": 5.030753768844222e-05, - "loss": 5.1343, - "step": 49962 - }, - { - "epoch": 26.05632333767927, - "grad_norm": 1.5548722743988037, - "learning_rate": 5.030653266331659e-05, - "loss": 5.494, - "step": 49963 - }, - { - "epoch": 26.05684485006519, - "grad_norm": 1.527498722076416, - "learning_rate": 5.030552763819095e-05, - "loss": 5.5403, - "step": 49964 - }, - { - "epoch": 26.05736636245111, - "grad_norm": 1.6688705682754517, - "learning_rate": 5.030452261306533e-05, - "loss": 5.0659, - "step": 49965 - }, - { - "epoch": 26.05788787483703, - "grad_norm": 1.6740771532058716, - "learning_rate": 5.0303517587939694e-05, - "loss": 4.4977, - "step": 49966 - }, - { - "epoch": 26.058409387222948, - "grad_norm": 1.6127047538757324, - "learning_rate": 5.030251256281407e-05, - "loss": 5.3266, - "step": 49967 - }, - { - "epoch": 26.058930899608864, - "grad_norm": 1.5834952592849731, - "learning_rate": 5.030150753768844e-05, - "loss": 5.0688, - "step": 49968 - }, - { - "epoch": 26.059452411994783, - "grad_norm": 1.5595929622650146, - "learning_rate": 5.030050251256282e-05, - "loss": 5.6848, - "step": 49969 - }, - { - "epoch": 26.059973924380703, - "grad_norm": 1.527309775352478, - "learning_rate": 5.0299497487437185e-05, - "loss": 5.0097, - "step": 49970 - }, - { - "epoch": 26.060495436766622, - "grad_norm": 1.6456427574157715, - "learning_rate": 5.029849246231156e-05, - "loss": 4.9512, - "step": 49971 - }, - { - "epoch": 26.061016949152542, - "grad_norm": 1.631182312965393, - "learning_rate": 5.0297487437185934e-05, - "loss": 5.2295, - "step": 49972 - }, - { - "epoch": 26.06153846153846, - "grad_norm": 1.5534121990203857, - "learning_rate": 5.029648241206031e-05, - "loss": 5.4364, - "step": 49973 - }, - { - "epoch": 26.06205997392438, - "grad_norm": 1.657080888748169, - "learning_rate": 5.0295477386934676e-05, - "loss": 4.8284, - "step": 49974 - }, - { - "epoch": 26.0625814863103, - "grad_norm": 1.6267520189285278, - "learning_rate": 5.0294472361809054e-05, - "loss": 4.9535, - "step": 49975 - }, - { - "epoch": 26.06310299869622, - "grad_norm": 1.4828643798828125, - "learning_rate": 5.029346733668342e-05, - "loss": 5.6494, - "step": 49976 - }, - { - "epoch": 26.06362451108214, - "grad_norm": 1.5096355676651, - "learning_rate": 5.029246231155779e-05, - "loss": 5.5209, - "step": 49977 - }, - { - "epoch": 26.06414602346806, - "grad_norm": 1.4697860479354858, - "learning_rate": 5.029145728643217e-05, - "loss": 5.5019, - "step": 49978 - }, - { - "epoch": 26.064667535853978, - "grad_norm": 1.6198960542678833, - "learning_rate": 5.029045226130653e-05, - "loss": 5.5207, - "step": 49979 - }, - { - "epoch": 26.065189048239894, - "grad_norm": 1.6579374074935913, - "learning_rate": 5.028944723618091e-05, - "loss": 5.009, - "step": 49980 - }, - { - "epoch": 26.065710560625813, - "grad_norm": 1.5776405334472656, - "learning_rate": 5.0288442211055273e-05, - "loss": 5.0292, - "step": 49981 - }, - { - "epoch": 26.066232073011733, - "grad_norm": 1.5483795404434204, - "learning_rate": 5.028743718592965e-05, - "loss": 5.5258, - "step": 49982 - }, - { - "epoch": 26.066753585397652, - "grad_norm": 1.6215063333511353, - "learning_rate": 5.028643216080402e-05, - "loss": 4.8534, - "step": 49983 - }, - { - "epoch": 26.067275097783572, - "grad_norm": 1.5702906847000122, - "learning_rate": 5.02854271356784e-05, - "loss": 5.2473, - "step": 49984 - }, - { - "epoch": 26.06779661016949, - "grad_norm": 1.6264228820800781, - "learning_rate": 5.0284422110552764e-05, - "loss": 4.7399, - "step": 49985 - }, - { - "epoch": 26.06831812255541, - "grad_norm": 1.598844289779663, - "learning_rate": 5.028341708542714e-05, - "loss": 5.193, - "step": 49986 - }, - { - "epoch": 26.06883963494133, - "grad_norm": 1.5737974643707275, - "learning_rate": 5.0282412060301506e-05, - "loss": 4.6594, - "step": 49987 - }, - { - "epoch": 26.06936114732725, - "grad_norm": 1.5599888563156128, - "learning_rate": 5.0281407035175884e-05, - "loss": 5.1594, - "step": 49988 - }, - { - "epoch": 26.06988265971317, - "grad_norm": 1.6480402946472168, - "learning_rate": 5.0280402010050255e-05, - "loss": 5.381, - "step": 49989 - }, - { - "epoch": 26.07040417209909, - "grad_norm": 1.5630629062652588, - "learning_rate": 5.027939698492462e-05, - "loss": 5.5411, - "step": 49990 - }, - { - "epoch": 26.070925684485008, - "grad_norm": 1.4982770681381226, - "learning_rate": 5.0278391959799e-05, - "loss": 5.3476, - "step": 49991 - }, - { - "epoch": 26.071447196870924, - "grad_norm": 1.6861292123794556, - "learning_rate": 5.027738693467336e-05, - "loss": 5.1003, - "step": 49992 - }, - { - "epoch": 26.071968709256844, - "grad_norm": 1.559387445449829, - "learning_rate": 5.027638190954774e-05, - "loss": 5.1098, - "step": 49993 - }, - { - "epoch": 26.072490221642763, - "grad_norm": 1.601373314857483, - "learning_rate": 5.027537688442211e-05, - "loss": 5.2072, - "step": 49994 - }, - { - "epoch": 26.073011734028682, - "grad_norm": 1.5824064016342163, - "learning_rate": 5.027437185929649e-05, - "loss": 5.2241, - "step": 49995 - }, - { - "epoch": 26.073533246414602, - "grad_norm": 1.641309142112732, - "learning_rate": 5.027336683417085e-05, - "loss": 5.6711, - "step": 49996 - }, - { - "epoch": 26.07405475880052, - "grad_norm": 1.5635870695114136, - "learning_rate": 5.027236180904523e-05, - "loss": 5.247, - "step": 49997 - }, - { - "epoch": 26.07457627118644, - "grad_norm": 1.541756510734558, - "learning_rate": 5.02713567839196e-05, - "loss": 5.4775, - "step": 49998 - }, - { - "epoch": 26.07509778357236, - "grad_norm": 1.5502567291259766, - "learning_rate": 5.027035175879398e-05, - "loss": 5.4107, - "step": 49999 - }, - { - "epoch": 26.07561929595828, - "grad_norm": 1.6434838771820068, - "learning_rate": 5.0269346733668344e-05, - "loss": 4.7404, - "step": 50000 - }, - { - "epoch": 26.07561929595828, - "eval_loss": 5.325130939483643, - "eval_runtime": 42.6665, - "eval_samples_per_second": 28.734, - "eval_steps_per_second": 3.609, - "step": 50000 - } - ], - "logging_steps": 1, - "max_steps": 100000, - "num_input_tokens_seen": 0, - "num_train_epochs": 53, - "save_steps": 5000, - "total_flos": 7.454840432225157e+17, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -}