diff --git "a/checkpoint-2000/trainer_state.json" "b/checkpoint-2000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2000/trainer_state.json" @@ -0,0 +1,12019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9995002498750625, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 2.205, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 2.1741, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 2.3915, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.2188, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.2271, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.4e-05, + "loss": 2.1674, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.0138, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.8433, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-05, + "loss": 2.0383, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.1195, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.1695, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 4.8e-05, + "loss": 1.8903, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 5.2000000000000004e-05, + "loss": 1.729, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.0379, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.9539, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 6.400000000000001e-05, + "loss": 2.0113, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 6.800000000000001e-05, + "loss": 1.6799, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 7.2e-05, + "loss": 2.0281, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 7.6e-05, + "loss": 1.8322, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.8084, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8.4e-05, + "loss": 1.9343, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 8.800000000000001e-05, + "loss": 1.9195, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000001e-05, + "loss": 1.8495, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 9.6e-05, + "loss": 1.8587, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.814, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010400000000000001, + "loss": 1.824, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010800000000000001, + "loss": 1.7464, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011200000000000001, + "loss": 1.8182, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.000116, + "loss": 1.6595, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.8015, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.000124, + "loss": 1.825, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012800000000000002, + "loss": 1.8051, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 0.000132, + "loss": 1.7185, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013600000000000003, + "loss": 1.7766, + "step": 34 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.7529, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 0.000144, + "loss": 1.6643, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 0.000148, + "loss": 1.8265, + "step": 37 + }, + { + "epoch": 0.02, + "learning_rate": 0.000152, + "loss": 1.6598, + "step": 38 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015600000000000002, + "loss": 1.5992, + "step": 39 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.9037, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.000164, + "loss": 1.7461, + "step": 41 + }, + { + "epoch": 0.02, + "learning_rate": 0.000168, + "loss": 1.7369, + "step": 42 + }, + { + "epoch": 0.02, + "learning_rate": 0.000172, + "loss": 1.78, + "step": 43 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017600000000000002, + "loss": 1.8073, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 0.00018, + "loss": 1.6786, + "step": 45 + }, + { + "epoch": 0.02, + "learning_rate": 0.00018400000000000003, + "loss": 1.8868, + "step": 46 + }, + { + "epoch": 0.02, + "learning_rate": 0.000188, + "loss": 1.9341, + "step": 47 + }, + { + "epoch": 0.02, + "learning_rate": 0.000192, + "loss": 1.5922, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 0.000196, + "loss": 1.8224, + "step": 49 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 1.8133, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020400000000000003, + "loss": 1.8373, + "step": 51 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020800000000000001, + "loss": 1.5649, + "step": 52 + }, + { + "epoch": 0.03, + "learning_rate": 0.00021200000000000003, + "loss": 1.7079, + "step": 53 + }, + { + "epoch": 0.03, + "learning_rate": 0.00021600000000000002, + "loss": 1.7249, + "step": 54 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022000000000000003, + "loss": 1.7299, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022400000000000002, + "loss": 1.7068, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022799999999999999, + "loss": 1.8436, + "step": 57 + }, + { + "epoch": 0.03, + "learning_rate": 0.000232, + "loss": 1.7897, + "step": 58 + }, + { + "epoch": 0.03, + "learning_rate": 0.000236, + "loss": 1.8142, + "step": 59 + }, + { + "epoch": 0.03, + "learning_rate": 0.00024, + "loss": 1.8176, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.000244, + "loss": 1.7742, + "step": 61 + }, + { + "epoch": 0.03, + "learning_rate": 0.000248, + "loss": 1.6512, + "step": 62 + }, + { + "epoch": 0.03, + "learning_rate": 0.000252, + "loss": 1.806, + "step": 63 + }, + { + "epoch": 0.03, + "learning_rate": 0.00025600000000000004, + "loss": 1.8776, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 0.00026000000000000003, + "loss": 1.8309, + "step": 65 + }, + { + "epoch": 0.03, + "learning_rate": 0.000264, + "loss": 1.7217, + "step": 66 + }, + { + "epoch": 0.03, + "learning_rate": 0.000268, + "loss": 1.7578, + "step": 67 + }, + { + "epoch": 0.03, + "learning_rate": 0.00027200000000000005, + "loss": 1.7367, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 0.000276, + "loss": 1.4613, + "step": 69 + }, + { + "epoch": 0.03, + "learning_rate": 0.00028, + "loss": 1.85, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 0.000284, + "loss": 1.7672, + "step": 71 + }, + { + "epoch": 0.04, + "learning_rate": 0.000288, + "loss": 1.6758, + "step": 72 + }, + { + "epoch": 0.04, + "learning_rate": 0.000292, + "loss": 1.6693, + "step": 73 + }, + { + "epoch": 0.04, + "learning_rate": 0.000296, + "loss": 1.6831, + "step": 74 + }, + { + "epoch": 0.04, + "learning_rate": 0.00030000000000000003, + "loss": 1.7338, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 0.000304, + "loss": 1.6858, + "step": 76 + }, + { + "epoch": 0.04, + "learning_rate": 0.000308, + "loss": 1.7453, + "step": 77 + }, + { + "epoch": 0.04, + "learning_rate": 0.00031200000000000005, + "loss": 1.6362, + "step": 78 + }, + { + "epoch": 0.04, + "learning_rate": 0.00031600000000000004, + "loss": 1.875, + "step": 79 + }, + { + "epoch": 0.04, + "learning_rate": 0.00032, + "loss": 1.7411, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.000324, + "loss": 1.7638, + "step": 81 + }, + { + "epoch": 0.04, + "learning_rate": 0.000328, + "loss": 1.7488, + "step": 82 + }, + { + "epoch": 0.04, + "learning_rate": 0.000332, + "loss": 1.5624, + "step": 83 + }, + { + "epoch": 0.04, + "learning_rate": 0.000336, + "loss": 1.8976, + "step": 84 + }, + { + "epoch": 0.04, + "learning_rate": 0.00034, + "loss": 1.7016, + "step": 85 + }, + { + "epoch": 0.04, + "learning_rate": 0.000344, + "loss": 1.6986, + "step": 86 + }, + { + "epoch": 0.04, + "learning_rate": 0.000348, + "loss": 1.8508, + "step": 87 + }, + { + "epoch": 0.04, + "learning_rate": 0.00035200000000000005, + "loss": 1.7049, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 0.00035600000000000003, + "loss": 1.7912, + "step": 89 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036, + "loss": 1.6776, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 0.000364, + "loss": 1.7725, + "step": 91 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036800000000000005, + "loss": 1.734, + "step": 92 + }, + { + "epoch": 0.05, + "learning_rate": 0.00037200000000000004, + "loss": 1.8326, + "step": 93 + }, + { + "epoch": 0.05, + "learning_rate": 0.000376, + "loss": 1.7755, + "step": 94 + }, + { + "epoch": 0.05, + "learning_rate": 0.00038, + "loss": 1.5972, + "step": 95 + }, + { + "epoch": 0.05, + "learning_rate": 0.000384, + "loss": 1.7307, + "step": 96 + }, + { + "epoch": 0.05, + "learning_rate": 0.000388, + "loss": 1.7856, + "step": 97 + }, + { + "epoch": 0.05, + "learning_rate": 0.000392, + "loss": 1.7529, + "step": 98 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039600000000000003, + "loss": 1.5194, + "step": 99 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004, + "loss": 1.7358, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039999972689137685, + "loss": 1.8458, + "step": 101 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039999890756625326, + "loss": 1.7214, + "step": 102 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999975420268669, + "loss": 1.9563, + "step": 103 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999956302769471, + "loss": 1.9295, + "step": 104 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999931723217151, + "loss": 1.7522, + "step": 105 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999901681678838, + "loss": 1.6478, + "step": 106 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039998661782365765, + "loss": 1.7303, + "step": 107 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039998252129873314, + "loss": 1.7452, + "step": 108 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039997787860429813, + "loss": 1.6586, + "step": 109 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999726897530322, + "loss": 1.5898, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999669547591067, + "loss": 1.5924, + "step": 111 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999606736381842, + "loss": 1.5336, + "step": 112 + }, + { + "epoch": 0.06, + "learning_rate": 0.000399953846407419, + "loss": 1.6407, + "step": 113 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999464730854571, + "loss": 1.5072, + "step": 114 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039993855369243534, + "loss": 1.8297, + "step": 115 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039993008824998246, + "loss": 1.8099, + "step": 116 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999210767812183, + "loss": 1.5861, + "step": 117 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999115193107539, + "loss": 1.7694, + "step": 118 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999014158646916, + "loss": 1.8015, + "step": 119 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039989076647062473, + "loss": 1.7514, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998795711576378, + "loss": 1.8225, + "step": 121 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039986782995630603, + "loss": 1.7047, + "step": 122 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039985554289869574, + "loss": 1.5168, + "step": 123 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039984271001836395, + "loss": 1.8128, + "step": 124 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998293313503583, + "loss": 1.8507, + "step": 125 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039981540693121716, + "loss": 1.6675, + "step": 126 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998009367989693, + "loss": 1.6252, + "step": 127 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039978592099313386, + "loss": 1.75, + "step": 128 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039977035955472034, + "loss": 1.4685, + "step": 129 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997542525262284, + "loss": 1.6629, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997375999516476, + "loss": 1.8121, + "step": 131 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997204018764577, + "loss": 1.8056, + "step": 132 + }, + { + "epoch": 0.07, + "learning_rate": 0.000399702658347628, + "loss": 1.6775, + "step": 133 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039968436941361773, + "loss": 1.6369, + "step": 134 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996655351243755, + "loss": 1.6055, + "step": 135 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996461555313393, + "loss": 1.636, + "step": 136 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996262306874366, + "loss": 1.739, + "step": 137 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996057606470837, + "loss": 1.561, + "step": 138 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039958474546618626, + "loss": 1.6892, + "step": 139 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039956318520213837, + "loss": 1.8357, + "step": 140 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003995410799138231, + "loss": 1.8147, + "step": 141 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039951842966161176, + "loss": 1.6856, + "step": 142 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994952345073643, + "loss": 1.6191, + "step": 143 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994714945144286, + "loss": 1.5254, + "step": 144 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994472097476406, + "loss": 1.5864, + "step": 145 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994223802733241, + "loss": 1.7604, + "step": 146 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003993970061592906, + "loss": 1.7338, + "step": 147 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039937108747483893, + "loss": 1.8078, + "step": 148 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003993446242907553, + "loss": 1.6927, + "step": 149 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039931761667931287, + "loss": 1.6732, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039929006471427187, + "loss": 1.5369, + "step": 151 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039926196847087905, + "loss": 1.5529, + "step": 152 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003992333280258676, + "loss": 1.6886, + "step": 153 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003992041434574572, + "loss": 1.7383, + "step": 154 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003991744148453532, + "loss": 1.7943, + "step": 155 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003991441422707472, + "loss": 1.6811, + "step": 156 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039911332581631613, + "loss": 1.7442, + "step": 157 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003990819655662224, + "loss": 1.5712, + "step": 158 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039905006160611357, + "loss": 1.5422, + "step": 159 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039901761402312205, + "loss": 1.6689, + "step": 160 + }, + { + "epoch": 0.08, + "learning_rate": 0.000398984622905865, + "loss": 1.8412, + "step": 161 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039895108834444405, + "loss": 1.7318, + "step": 162 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039891701043044496, + "loss": 1.4819, + "step": 163 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003988823892569375, + "loss": 1.4612, + "step": 164 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039884722491847504, + "loss": 1.5737, + "step": 165 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003988115175110943, + "loss": 1.7666, + "step": 166 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987752671323155, + "loss": 1.4879, + "step": 167 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987384738811414, + "loss": 1.7151, + "step": 168 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987011378580576, + "loss": 1.751, + "step": 169 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039866325916503205, + "loss": 1.7216, + "step": 170 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003986248379055146, + "loss": 1.5863, + "step": 171 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039858587418443715, + "loss": 1.793, + "step": 172 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039854636810821286, + "loss": 1.6011, + "step": 173 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003985063197847363, + "loss": 1.7418, + "step": 174 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003984657293233829, + "loss": 1.6862, + "step": 175 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003984245968350087, + "loss": 1.6857, + "step": 176 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039838292243195013, + "loss": 1.7442, + "step": 177 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003983407062280234, + "loss": 1.9183, + "step": 178 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003982979483385249, + "loss": 1.592, + "step": 179 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003982546488802298, + "loss": 1.777, + "step": 180 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039821080797139283, + "loss": 1.7078, + "step": 181 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003981664257317472, + "loss": 1.7803, + "step": 182 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039812150228250474, + "loss": 1.7864, + "step": 183 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003980760377463552, + "loss": 1.7185, + "step": 184 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003980300322474662, + "loss": 1.7885, + "step": 185 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039798348591148263, + "loss": 1.8994, + "step": 186 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039793639886552665, + "loss": 1.713, + "step": 187 + }, + { + "epoch": 0.09, + "learning_rate": 0.000397888771238197, + "loss": 1.5899, + "step": 188 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003978406031595688, + "loss": 1.6652, + "step": 189 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003977918947611932, + "loss": 1.5935, + "step": 190 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003977426461760972, + "loss": 1.6655, + "step": 191 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039769285753878277, + "loss": 1.8054, + "step": 192 + }, + { + "epoch": 0.1, + "learning_rate": 0.000397642528985227, + "loss": 1.6275, + "step": 193 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039759166065288167, + "loss": 1.7446, + "step": 194 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039754025268067233, + "loss": 1.6488, + "step": 195 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039748830520899874, + "loss": 1.7123, + "step": 196 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003974358183797339, + "loss": 1.6823, + "step": 197 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039738279233622386, + "loss": 1.4115, + "step": 198 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039732922722328725, + "loss": 1.3705, + "step": 199 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039727512318721514, + "loss": 1.776, + "step": 200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039722048037577024, + "loss": 1.7096, + "step": 201 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003971652989381868, + "loss": 1.5852, + "step": 202 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003971095790251701, + "loss": 1.8677, + "step": 203 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039705332078889596, + "loss": 1.4359, + "step": 204 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039699652438301053, + "loss": 1.7743, + "step": 205 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039693918996262974, + "loss": 1.6298, + "step": 206 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003968813176843387, + "loss": 1.7088, + "step": 207 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003968229077061917, + "loss": 1.4698, + "step": 208 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039676396018771147, + "loss": 1.6022, + "step": 209 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003967044752898886, + "loss": 1.6729, + "step": 210 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003966444531751816, + "loss": 1.5742, + "step": 211 + }, + { + "epoch": 0.11, + "learning_rate": 0.000396583894007516, + "loss": 1.437, + "step": 212 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003965227979522842, + "loss": 1.615, + "step": 213 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039646116517634463, + "loss": 1.6582, + "step": 214 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039639899584802184, + "loss": 1.5987, + "step": 215 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039633629013710554, + "loss": 1.8452, + "step": 216 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039627304821485056, + "loss": 1.6249, + "step": 217 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003962092702539759, + "loss": 1.7399, + "step": 218 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003961449564286648, + "loss": 1.5969, + "step": 219 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039608010691456367, + "loss": 1.5766, + "step": 220 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039601472188878235, + "loss": 1.6199, + "step": 221 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003959488015298929, + "loss": 1.773, + "step": 222 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039588234601792944, + "loss": 1.8759, + "step": 223 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003958153555343878, + "loss": 1.7133, + "step": 224 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039574783026222475, + "loss": 1.5905, + "step": 225 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039567977038585756, + "loss": 1.7087, + "step": 226 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003956111760911637, + "loss": 1.5135, + "step": 227 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039554204756548, + "loss": 1.4835, + "step": 228 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039547238499760255, + "loss": 1.7274, + "step": 229 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039540218857778576, + "loss": 1.568, + "step": 230 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003953314584977421, + "loss": 1.5526, + "step": 231 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039526019495064155, + "loss": 1.7895, + "step": 232 + }, + { + "epoch": 0.12, + "learning_rate": 0.000395188398131111, + "loss": 1.6583, + "step": 233 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039511606823523375, + "loss": 1.7053, + "step": 234 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039504320546054894, + "loss": 1.709, + "step": 235 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039496981000605117, + "loss": 1.6779, + "step": 236 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003948958820721897, + "loss": 1.6184, + "step": 237 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003948214218608681, + "loss": 1.6131, + "step": 238 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039474642957544365, + "loss": 1.6341, + "step": 239 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003946709054207267, + "loss": 1.6489, + "step": 240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039459484960298026, + "loss": 1.6761, + "step": 241 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039451826232991935, + "loss": 1.5096, + "step": 242 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003944411438107104, + "loss": 1.736, + "step": 243 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003943634942559708, + "loss": 1.7403, + "step": 244 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039428531387776804, + "loss": 1.554, + "step": 245 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003942066028896195, + "loss": 1.8338, + "step": 246 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003941273615064918, + "loss": 1.7811, + "step": 247 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039404758994479984, + "loss": 1.6816, + "step": 248 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039396728842240673, + "loss": 1.6188, + "step": 249 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003938864571586229, + "loss": 1.6212, + "step": 250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039380509637420533, + "loss": 1.8131, + "step": 251 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003937232062913575, + "loss": 1.7621, + "step": 252 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039364078713372816, + "loss": 1.7201, + "step": 253 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039355783912641126, + "loss": 1.6273, + "step": 254 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003934743624959449, + "loss": 1.812, + "step": 255 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003933903574703109, + "loss": 1.7049, + "step": 256 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003933058242789344, + "loss": 1.7132, + "step": 257 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039322076315268266, + "loss": 1.5422, + "step": 258 + }, + { + "epoch": 0.13, + "learning_rate": 0.000393135174323865, + "loss": 1.6279, + "step": 259 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003930490580262319, + "loss": 1.5107, + "step": 260 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039296241449497443, + "loss": 1.6808, + "step": 261 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039287524396672345, + "loss": 1.8189, + "step": 262 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039278754667954936, + "loss": 1.6716, + "step": 263 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039269932287296083, + "loss": 1.7371, + "step": 264 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039261057278790483, + "loss": 1.5354, + "step": 265 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003925212966667654, + "loss": 1.5848, + "step": 266 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003924314947533633, + "loss": 1.6857, + "step": 267 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039234116729295536, + "loss": 1.8029, + "step": 268 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039225031453223367, + "loss": 1.6986, + "step": 269 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039215893671932497, + "loss": 1.5044, + "step": 270 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003920670341037899, + "loss": 1.6712, + "step": 271 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039197460693662245, + "loss": 1.5513, + "step": 272 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039188165547024916, + "loss": 1.4925, + "step": 273 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039178817995852856, + "loss": 1.6916, + "step": 274 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039169418065675024, + "loss": 1.7875, + "step": 275 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039159965782163453, + "loss": 1.5011, + "step": 276 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039150461171133126, + "loss": 1.6323, + "step": 277 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003914090425854197, + "loss": 1.6161, + "step": 278 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039131295070490727, + "loss": 1.7276, + "step": 279 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003912163363322293, + "loss": 1.6082, + "step": 280 + }, + { + "epoch": 0.14, + "learning_rate": 0.000391119199731248, + "loss": 1.732, + "step": 281 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003910215411672516, + "loss": 1.6163, + "step": 282 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003909233609069542, + "loss": 1.8471, + "step": 283 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003908246592184946, + "loss": 1.6584, + "step": 284 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003907254363714355, + "loss": 1.6216, + "step": 285 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039062569263676307, + "loss": 1.6343, + "step": 286 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003905254282868861, + "loss": 1.682, + "step": 287 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039042464359563523, + "loss": 1.7296, + "step": 288 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039032333883826206, + "loss": 1.5631, + "step": 289 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039022151429143865, + "loss": 1.7181, + "step": 290 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039011917023325655, + "loss": 1.7264, + "step": 291 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003900163069432263, + "loss": 1.815, + "step": 292 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038991292470227636, + "loss": 1.5588, + "step": 293 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038980902379275257, + "loss": 1.7129, + "step": 294 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038970460449841725, + "loss": 1.6191, + "step": 295 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003895996671044485, + "loss": 1.6827, + "step": 296 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003894942118974394, + "loss": 1.5777, + "step": 297 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003893882391653973, + "loss": 1.664, + "step": 298 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003892817491977427, + "loss": 1.7806, + "step": 299 + }, + { + "epoch": 0.15, + "learning_rate": 0.000389174742285309, + "loss": 1.6249, + "step": 300 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003890672187203413, + "loss": 1.7223, + "step": 301 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003889591787964957, + "loss": 1.6471, + "step": 302 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003888506228088385, + "loss": 1.5887, + "step": 303 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003887415510538456, + "loss": 1.7015, + "step": 304 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038863196382940123, + "loss": 1.6102, + "step": 305 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038852186143479764, + "loss": 1.5874, + "step": 306 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003884112441707339, + "loss": 1.6151, + "step": 307 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038830011233931526, + "loss": 1.739, + "step": 308 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003881884662440525, + "loss": 1.7366, + "step": 309 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038807630618986063, + "loss": 1.6522, + "step": 310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003879636324830584, + "loss": 1.71, + "step": 311 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003878504454313675, + "loss": 1.7334, + "step": 312 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038773674534391144, + "loss": 1.613, + "step": 313 + }, + { + "epoch": 0.16, + "learning_rate": 0.000387622532531215, + "loss": 1.7322, + "step": 314 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038750780730520325, + "loss": 1.5234, + "step": 315 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038739256997920063, + "loss": 1.5423, + "step": 316 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003872768208679302, + "loss": 1.6155, + "step": 317 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038716056028751284, + "loss": 1.6176, + "step": 318 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038704378855546615, + "loss": 1.696, + "step": 319 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038692650599070393, + "loss": 1.5135, + "step": 320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003868087129135348, + "loss": 1.53, + "step": 321 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003866904096456619, + "loss": 1.6036, + "step": 322 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038657159651018163, + "loss": 1.8304, + "step": 323 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003864522738315829, + "loss": 1.6235, + "step": 324 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003863324419357463, + "loss": 1.5371, + "step": 325 + }, + { + "epoch": 0.16, + "learning_rate": 0.000386212101149943, + "loss": 1.6708, + "step": 326 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038609125180283414, + "loss": 1.6137, + "step": 327 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038596989422446954, + "loss": 1.546, + "step": 328 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003858480287462874, + "loss": 1.5953, + "step": 329 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038572565570111283, + "loss": 1.7084, + "step": 330 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003856027754231571, + "loss": 1.6816, + "step": 331 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038547938824801684, + "loss": 1.5167, + "step": 332 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038535549451267315, + "loss": 1.4679, + "step": 333 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003852310945554904, + "loss": 1.6548, + "step": 334 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003851061887162156, + "loss": 1.9461, + "step": 335 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003849807773359774, + "loss": 1.782, + "step": 336 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003848548607572852, + "loss": 1.7077, + "step": 337 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003847284393240279, + "loss": 1.5953, + "step": 338 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038460151338147333, + "loss": 1.5885, + "step": 339 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038447408327626733, + "loss": 1.5709, + "step": 340 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003843461493564323, + "loss": 1.7463, + "step": 341 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038421771197136696, + "loss": 1.7452, + "step": 342 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038408877147184483, + "loss": 1.4542, + "step": 343 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038395932821001354, + "loss": 1.4835, + "step": 344 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038382938253939385, + "loss": 1.6311, + "step": 345 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038369893481487847, + "loss": 1.6245, + "step": 346 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038356798539273146, + "loss": 1.6264, + "step": 347 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038343653463058705, + "loss": 1.5778, + "step": 348 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003833045828874485, + "loss": 1.6097, + "step": 349 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038317213052368744, + "loss": 1.67, + "step": 350 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038303917790104264, + "loss": 1.5424, + "step": 351 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038290572538261927, + "loss": 1.607, + "step": 352 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038277177333288765, + "loss": 1.6334, + "step": 353 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003826373221176823, + "loss": 1.5178, + "step": 354 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003825023721042012, + "loss": 1.5779, + "step": 355 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003823669236610044, + "loss": 1.5685, + "step": 356 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003822309771580132, + "loss": 1.6973, + "step": 357 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038209453296650944, + "loss": 1.8553, + "step": 358 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003819575914591338, + "loss": 1.6619, + "step": 359 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003818201530098853, + "loss": 1.7002, + "step": 360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003816822179941204, + "loss": 1.7676, + "step": 361 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003815437867885514, + "loss": 1.5902, + "step": 362 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003814048597712458, + "loss": 1.7975, + "step": 363 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003812654373216254, + "loss": 1.5581, + "step": 364 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038112551982046484, + "loss": 1.7062, + "step": 365 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038098510764989087, + "loss": 1.7171, + "step": 366 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003808442011933814, + "loss": 1.5299, + "step": 367 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003807028008357638, + "loss": 1.5733, + "step": 368 + }, + { + "epoch": 0.18, + "learning_rate": 0.000380560906963215, + "loss": 1.8065, + "step": 369 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003804185199632591, + "loss": 1.4458, + "step": 370 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003802756402247674, + "loss": 1.5408, + "step": 371 + }, + { + "epoch": 0.19, + "learning_rate": 0.00038013226813795686, + "loss": 1.476, + "step": 372 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003799884040943889, + "loss": 1.5259, + "step": 373 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037984404848696873, + "loss": 1.6524, + "step": 374 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003796992017099438, + "loss": 1.6925, + "step": 375 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003795538641589033, + "loss": 1.5929, + "step": 376 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003794080362307766, + "loss": 1.5452, + "step": 377 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037926171832383226, + "loss": 1.6099, + "step": 378 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037911491083767715, + "loss": 1.5971, + "step": 379 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037896761417325524, + "loss": 1.8214, + "step": 380 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003788198287328463, + "loss": 1.4613, + "step": 381 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037867155492006516, + "loss": 1.6455, + "step": 382 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037852279313986044, + "loss": 1.4235, + "step": 383 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003783735437985133, + "loss": 1.6516, + "step": 384 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003782238073036367, + "loss": 1.5719, + "step": 385 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037807358406417374, + "loss": 1.5233, + "step": 386 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037792287449039713, + "loss": 1.8381, + "step": 387 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037777167899390776, + "loss": 1.5918, + "step": 388 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003776199979876335, + "loss": 1.7216, + "step": 389 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037746783188582827, + "loss": 1.8514, + "step": 390 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037731518110407084, + "loss": 1.5811, + "step": 391 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037716204605926367, + "loss": 1.6584, + "step": 392 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003770084271696317, + "loss": 1.5818, + "step": 393 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037685432485472145, + "loss": 1.7268, + "step": 394 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003766997395353995, + "loss": 1.6397, + "step": 395 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003765446716338518, + "loss": 1.7954, + "step": 396 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037638912157358223, + "loss": 1.6724, + "step": 397 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037623308977941124, + "loss": 1.5138, + "step": 398 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037607657667747523, + "loss": 1.6065, + "step": 399 + }, + { + "epoch": 0.2, + "learning_rate": 0.000375919582695225, + "loss": 1.6297, + "step": 400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003757621082614245, + "loss": 1.5817, + "step": 401 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037560415380615014, + "loss": 1.5774, + "step": 402 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037544571976078913, + "loss": 1.6676, + "step": 403 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003752868065580384, + "loss": 1.7233, + "step": 404 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037512741463190374, + "loss": 1.6582, + "step": 405 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003749675444176983, + "loss": 1.648, + "step": 406 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003748071963520412, + "loss": 1.581, + "step": 407 + }, + { + "epoch": 0.2, + "learning_rate": 0.000374646370872857, + "loss": 1.7211, + "step": 408 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037448506841937393, + "loss": 1.567, + "step": 409 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003743232894321229, + "loss": 1.4671, + "step": 410 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037416103435293616, + "loss": 1.6647, + "step": 411 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003739983036249465, + "loss": 1.5772, + "step": 412 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003738350976925854, + "loss": 1.5484, + "step": 413 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037367141700158247, + "loss": 1.5484, + "step": 414 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037350726199896384, + "loss": 1.6113, + "step": 415 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037334263313305074, + "loss": 1.5764, + "step": 416 + }, + { + "epoch": 0.21, + "learning_rate": 0.000373177530853459, + "loss": 1.6048, + "step": 417 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003730119556110971, + "loss": 1.6771, + "step": 418 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037284590785816534, + "loss": 1.6426, + "step": 419 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037267938804815443, + "loss": 1.6309, + "step": 420 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003725123966358444, + "loss": 1.6995, + "step": 421 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037234493407730307, + "loss": 1.8452, + "step": 422 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003721770008298852, + "loss": 1.6109, + "step": 423 + }, + { + "epoch": 0.21, + "learning_rate": 0.000372008597352231, + "loss": 1.6494, + "step": 424 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037183972410426483, + "loss": 1.7364, + "step": 425 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003716703815471942, + "loss": 1.6014, + "step": 426 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037150057014350796, + "loss": 1.6395, + "step": 427 + }, + { + "epoch": 0.21, + "learning_rate": 0.000371330290356976, + "loss": 1.6497, + "step": 428 + }, + { + "epoch": 0.21, + "learning_rate": 0.000371159542652647, + "loss": 1.4915, + "step": 429 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037098832749684767, + "loss": 1.4835, + "step": 430 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003708166453571813, + "loss": 1.8236, + "step": 431 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003706444967025267, + "loss": 1.5515, + "step": 432 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003704718820030366, + "loss": 1.4684, + "step": 433 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003702988017301368, + "loss": 1.8934, + "step": 434 + }, + { + "epoch": 0.22, + "learning_rate": 0.00037012525635652424, + "loss": 1.7834, + "step": 435 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003699512463561664, + "loss": 1.6533, + "step": 436 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036977677220429963, + "loss": 1.5941, + "step": 437 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036960183437742783, + "loss": 1.6756, + "step": 438 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036942643335332134, + "loss": 1.5123, + "step": 439 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036925056961101537, + "loss": 1.5916, + "step": 440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003690742436308091, + "loss": 1.4561, + "step": 441 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003688974558942639, + "loss": 1.7021, + "step": 442 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003687202068842024, + "loss": 1.4809, + "step": 443 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036854249708470686, + "loss": 1.4896, + "step": 444 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036836432698111806, + "loss": 1.5072, + "step": 445 + }, + { + "epoch": 0.22, + "learning_rate": 0.000368185697060034, + "loss": 1.5797, + "step": 446 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036800660780930835, + "loss": 1.5629, + "step": 447 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036782705971804923, + "loss": 1.64, + "step": 448 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036764705327661806, + "loss": 1.6466, + "step": 449 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036746658897662793, + "loss": 1.6695, + "step": 450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036728566731094236, + "loss": 1.8699, + "step": 451 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003671042887736741, + "loss": 1.7128, + "step": 452 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036692245386018353, + "loss": 1.6109, + "step": 453 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003667401630670774, + "loss": 1.5786, + "step": 454 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003665574168922077, + "loss": 1.8162, + "step": 455 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036637421583466995, + "loss": 1.7225, + "step": 456 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003661905603948021, + "loss": 1.7744, + "step": 457 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003660064510741829, + "loss": 1.7345, + "step": 458 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003658218883756308, + "loss": 1.6193, + "step": 459 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036563687280320245, + "loss": 1.5931, + "step": 460 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036545140486219133, + "loss": 1.6595, + "step": 461 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003652654850591264, + "loss": 1.7491, + "step": 462 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003650791139017707, + "loss": 1.4703, + "step": 463 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036489229189911985, + "loss": 1.615, + "step": 464 + }, + { + "epoch": 0.23, + "learning_rate": 0.000364705019561401, + "loss": 1.6676, + "step": 465 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036451729740007084, + "loss": 1.5476, + "step": 466 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003643291259278149, + "loss": 1.4557, + "step": 467 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036414050565854574, + "loss": 1.7099, + "step": 468 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036395143710740143, + "loss": 1.7883, + "step": 469 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003637619207907447, + "loss": 1.4658, + "step": 470 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003635719572261608, + "loss": 1.6454, + "step": 471 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003633815469324566, + "loss": 1.6372, + "step": 472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003631906904296591, + "loss": 1.4782, + "step": 473 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003629993882390139, + "loss": 1.6954, + "step": 474 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003628076408829836, + "loss": 1.7706, + "step": 475 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036261544888524695, + "loss": 1.7135, + "step": 476 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003624228127706968, + "loss": 1.6762, + "step": 477 + }, + { + "epoch": 0.24, + "learning_rate": 0.000362229733065439, + "loss": 1.697, + "step": 478 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003620362102967909, + "loss": 1.5524, + "step": 479 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036184224499327976, + "loss": 1.488, + "step": 480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003616478376846417, + "loss": 1.5274, + "step": 481 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003614529889018197, + "loss": 1.437, + "step": 482 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003612576991769627, + "loss": 1.6498, + "step": 483 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036106196904342377, + "loss": 1.6727, + "step": 484 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036086579903575866, + "loss": 1.5969, + "step": 485 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003606691896897248, + "loss": 1.6462, + "step": 486 + }, + { + "epoch": 0.24, + "learning_rate": 0.000360472141542279, + "loss": 1.5607, + "step": 487 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003602746551315769, + "loss": 1.687, + "step": 488 + }, + { + "epoch": 0.24, + "learning_rate": 0.000360076730996971, + "loss": 1.3288, + "step": 489 + }, + { + "epoch": 0.24, + "learning_rate": 0.000359878369679009, + "loss": 1.6054, + "step": 490 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003596795717194328, + "loss": 1.6685, + "step": 491 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035948033766117687, + "loss": 1.5, + "step": 492 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035928066804836653, + "loss": 1.7705, + "step": 493 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003590805634263167, + "loss": 1.4625, + "step": 494 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003588800243415304, + "loss": 1.438, + "step": 495 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035867905134169716, + "loss": 1.7041, + "step": 496 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003584776449756915, + "loss": 1.5696, + "step": 497 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003582758057935717, + "loss": 1.571, + "step": 498 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003580735343465778, + "loss": 1.66, + "step": 499 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003578708311871308, + "loss": 1.4366, + "step": 500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003576676968688303, + "loss": 1.5916, + "step": 501 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003574641319464537, + "loss": 1.5619, + "step": 502 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003572601369759544, + "loss": 1.7142, + "step": 503 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003570557125144602, + "loss": 1.7435, + "step": 504 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035685085912027197, + "loss": 1.5314, + "step": 505 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035664557735286197, + "loss": 1.8234, + "step": 506 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003564398677728724, + "loss": 1.7898, + "step": 507 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003562337309421139, + "loss": 1.5523, + "step": 508 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035602716742356397, + "loss": 1.5225, + "step": 509 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003558201777813653, + "loss": 1.784, + "step": 510 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035561276258082444, + "loss": 1.5623, + "step": 511 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035540492238841025, + "loss": 1.6344, + "step": 512 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003551966577717522, + "loss": 1.7194, + "step": 513 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035498796929963895, + "loss": 1.4878, + "step": 514 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035477885754201666, + "loss": 1.5972, + "step": 515 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035456932306998765, + "loss": 1.4085, + "step": 516 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035435936645580846, + "loss": 1.4897, + "step": 517 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003541489882728889, + "loss": 1.5127, + "step": 518 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035393818909578985, + "loss": 1.6159, + "step": 519 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003537269695002221, + "loss": 1.6481, + "step": 520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003535153300630444, + "loss": 1.6057, + "step": 521 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003533032713622625, + "loss": 1.6588, + "step": 522 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003530907939770269, + "loss": 1.6406, + "step": 523 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035287789848763166, + "loss": 1.4532, + "step": 524 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003526645854755128, + "loss": 1.4354, + "step": 525 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003524508555232464, + "loss": 1.6292, + "step": 526 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035223670921454757, + "loss": 1.7245, + "step": 527 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003520221471342682, + "loss": 1.5872, + "step": 528 + }, + { + "epoch": 0.26, + "learning_rate": 0.000351807169868396, + "loss": 1.5689, + "step": 529 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003515917780040522, + "loss": 1.719, + "step": 530 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003513759721294907, + "loss": 1.6705, + "step": 531 + }, + { + "epoch": 0.27, + "learning_rate": 0.00035115975283409593, + "loss": 1.6617, + "step": 532 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003509431207083814, + "loss": 1.6712, + "step": 533 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003507260763439882, + "loss": 1.5884, + "step": 534 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003505086203336831, + "loss": 1.7172, + "step": 535 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003502907532713573, + "loss": 1.6537, + "step": 536 + }, + { + "epoch": 0.27, + "learning_rate": 0.00035007247575202446, + "loss": 1.5581, + "step": 537 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003498537883718194, + "loss": 1.632, + "step": 538 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034963469172799615, + "loss": 1.5551, + "step": 539 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003494151864189266, + "loss": 1.7205, + "step": 540 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034919527304409857, + "loss": 1.5633, + "step": 541 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003489749522041145, + "loss": 1.5343, + "step": 542 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034875422450068963, + "loss": 1.6079, + "step": 543 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003485330905366503, + "loss": 1.4977, + "step": 544 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003483115509159325, + "loss": 1.6487, + "step": 545 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034808960624358, + "loss": 1.6665, + "step": 546 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034786725712574287, + "loss": 1.6199, + "step": 547 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003476445041696757, + "loss": 1.5882, + "step": 548 + }, + { + "epoch": 0.27, + "learning_rate": 0.000347421347983736, + "loss": 1.6632, + "step": 549 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034719778917738256, + "loss": 1.4553, + "step": 550 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003469738283611738, + "loss": 1.5907, + "step": 551 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034674946614676597, + "loss": 1.7215, + "step": 552 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003465247031469117, + "loss": 1.6233, + "step": 553 + }, + { + "epoch": 0.28, + "learning_rate": 0.000346299539975458, + "loss": 1.6185, + "step": 554 + }, + { + "epoch": 0.28, + "learning_rate": 0.000346073977247345, + "loss": 1.4927, + "step": 555 + }, + { + "epoch": 0.28, + "learning_rate": 0.000345848015578604, + "loss": 1.6307, + "step": 556 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034562165558635577, + "loss": 1.6981, + "step": 557 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034539489788880883, + "loss": 1.5469, + "step": 558 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003451677431052582, + "loss": 1.6071, + "step": 559 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003449401918560831, + "loss": 1.658, + "step": 560 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003447122447627456, + "loss": 1.6857, + "step": 561 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003444839024477889, + "loss": 1.5205, + "step": 562 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003442551655348355, + "loss": 1.5646, + "step": 563 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034402603464858564, + "loss": 1.7056, + "step": 564 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003437965104148156, + "loss": 1.6276, + "step": 565 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034356659346037585, + "loss": 1.6262, + "step": 566 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034333628441318936, + "loss": 1.6572, + "step": 567 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034310558390225, + "loss": 1.6792, + "step": 568 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003428744925576208, + "loss": 1.4858, + "step": 569 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003426430110104321, + "loss": 1.318, + "step": 570 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034241113989288003, + "loss": 1.3952, + "step": 571 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034217887983822463, + "loss": 1.7068, + "step": 572 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003419462314807879, + "loss": 1.7115, + "step": 573 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003417131954559529, + "loss": 1.4274, + "step": 574 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003414797724001609, + "loss": 1.4477, + "step": 575 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003412459629509105, + "loss": 1.4958, + "step": 576 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003410117677467553, + "loss": 1.513, + "step": 577 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003407771874273028, + "loss": 1.6229, + "step": 578 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034054222263321194, + "loss": 1.5662, + "step": 579 + }, + { + "epoch": 0.29, + "learning_rate": 0.000340306874006192, + "loss": 1.6515, + "step": 580 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003400711421890001, + "loss": 1.762, + "step": 581 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033983502782544044, + "loss": 1.4794, + "step": 582 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003395985315603615, + "loss": 1.5187, + "step": 583 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033936165403965516, + "loss": 1.5355, + "step": 584 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003391243959102542, + "loss": 1.6659, + "step": 585 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033888675782013113, + "loss": 1.5111, + "step": 586 + }, + { + "epoch": 0.29, + "learning_rate": 0.000338648740418296, + "loss": 1.7099, + "step": 587 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003384103443547948, + "loss": 1.6258, + "step": 588 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003381715702807079, + "loss": 1.5812, + "step": 589 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033793241884814783, + "loss": 1.5988, + "step": 590 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003376928907102578, + "loss": 1.4556, + "step": 591 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003374529865212097, + "loss": 1.4119, + "step": 592 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033721270693620254, + "loss": 1.8002, + "step": 593 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033697205261146076, + "loss": 1.6446, + "step": 594 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033673102420423193, + "loss": 1.5731, + "step": 595 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003364896223727855, + "loss": 1.5963, + "step": 596 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033624784777641067, + "loss": 1.603, + "step": 597 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033600570107541463, + "loss": 1.7701, + "step": 598 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033576318293112103, + "loss": 1.4142, + "step": 599 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033552029400586773, + "loss": 1.6056, + "step": 600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033527703496300535, + "loss": 1.6508, + "step": 601 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033503340646689534, + "loss": 1.6636, + "step": 602 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033478940918290815, + "loss": 1.6291, + "step": 603 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033454504377742136, + "loss": 1.4496, + "step": 604 + }, + { + "epoch": 0.3, + "learning_rate": 0.000334300310917818, + "loss": 1.5698, + "step": 605 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003340552112724845, + "loss": 1.6541, + "step": 606 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003338097455108093, + "loss": 1.5061, + "step": 607 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033356391430318047, + "loss": 1.4854, + "step": 608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003333177183209842, + "loss": 1.6627, + "step": 609 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003330711582366031, + "loss": 1.535, + "step": 610 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033282423472341384, + "loss": 1.6511, + "step": 611 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003325769484557859, + "loss": 1.5739, + "step": 612 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003323293001090795, + "loss": 1.3858, + "step": 613 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003320812903596434, + "loss": 1.646, + "step": 614 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003318329198848138, + "loss": 1.7639, + "step": 615 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003315841893629118, + "loss": 1.447, + "step": 616 + }, + { + "epoch": 0.31, + "learning_rate": 0.000331335099473242, + "loss": 1.6622, + "step": 617 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033108565089609034, + "loss": 1.5356, + "step": 618 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033083584431272225, + "loss": 1.4241, + "step": 619 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003305856804053812, + "loss": 1.8722, + "step": 620 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033033515985728633, + "loss": 1.4449, + "step": 621 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003300842833526309, + "loss": 1.8373, + "step": 622 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032983305157658027, + "loss": 1.6631, + "step": 623 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032958146521527, + "loss": 1.771, + "step": 624 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032932952495580425, + "loss": 1.6418, + "step": 625 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032907723148625355, + "loss": 1.4662, + "step": 626 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003288245854956531, + "loss": 1.655, + "step": 627 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032857158767400104, + "loss": 1.4315, + "step": 628 + }, + { + "epoch": 0.31, + "learning_rate": 0.000328318238712256, + "loss": 1.716, + "step": 629 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032806453930233595, + "loss": 1.6393, + "step": 630 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003278104901371159, + "loss": 1.7138, + "step": 631 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003275560919104259, + "loss": 1.6158, + "step": 632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003273013453170496, + "loss": 1.5414, + "step": 633 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003270462510527218, + "loss": 1.5223, + "step": 634 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003267908098141271, + "loss": 1.6761, + "step": 635 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003265350222988972, + "loss": 1.443, + "step": 636 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032627888920561024, + "loss": 1.756, + "step": 637 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003260224112337876, + "loss": 1.506, + "step": 638 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003257655890838927, + "loss": 1.6524, + "step": 639 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032550842345732917, + "loss": 1.6236, + "step": 640 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032525091505643825, + "loss": 1.578, + "step": 641 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003249930645844978, + "loss": 1.2744, + "step": 642 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032473487274571963, + "loss": 1.7205, + "step": 643 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032447634024524786, + "loss": 1.6393, + "step": 644 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003242174677891571, + "loss": 1.5814, + "step": 645 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003239582560844503, + "loss": 1.4664, + "step": 646 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003236987058390571, + "loss": 1.6065, + "step": 647 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003234388177618314, + "loss": 1.5789, + "step": 648 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032317859256255016, + "loss": 1.4482, + "step": 649 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032291803095191074, + "loss": 1.5858, + "step": 650 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032265713364152933, + "loss": 1.6917, + "step": 651 + }, + { + "epoch": 0.33, + "learning_rate": 0.000322395901343939, + "loss": 1.5473, + "step": 652 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032213433477258776, + "loss": 1.574, + "step": 653 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003218724346418364, + "loss": 1.5813, + "step": 654 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003216102016669568, + "loss": 1.4462, + "step": 655 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003213476365641298, + "loss": 1.5667, + "step": 656 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032108474005044325, + "loss": 1.6149, + "step": 657 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003208215128438904, + "loss": 1.5657, + "step": 658 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003205579556633673, + "loss": 1.5153, + "step": 659 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003202940692286714, + "loss": 1.5941, + "step": 660 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032002985426049925, + "loss": 1.6282, + "step": 661 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031976531148044475, + "loss": 1.478, + "step": 662 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003195004416109971, + "loss": 1.4623, + "step": 663 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031923524537553864, + "loss": 1.5935, + "step": 664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003189697234983432, + "loss": 1.5293, + "step": 665 + }, + { + "epoch": 0.33, + "learning_rate": 0.000318703876704574, + "loss": 1.4841, + "step": 666 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031843770572028145, + "loss": 1.4985, + "step": 667 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003181712112724015, + "loss": 1.604, + "step": 668 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003179043940887535, + "loss": 1.5311, + "step": 669 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003176372548980381, + "loss": 1.4669, + "step": 670 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031736979442983557, + "loss": 1.4565, + "step": 671 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003171020134146035, + "loss": 1.6435, + "step": 672 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031683391258367484, + "loss": 1.6284, + "step": 673 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031656549266925613, + "loss": 1.4404, + "step": 674 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031629675440442536, + "loss": 1.5517, + "step": 675 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031602769852312983, + "loss": 1.6271, + "step": 676 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031575832576018437, + "loss": 1.6055, + "step": 677 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031548863685126926, + "loss": 1.7491, + "step": 678 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031521863253292814, + "loss": 1.6137, + "step": 679 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031494831354256605, + "loss": 1.5847, + "step": 680 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031467768061844753, + "loss": 1.3082, + "step": 681 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003144067344996944, + "loss": 1.6252, + "step": 682 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003141354759262839, + "loss": 1.5987, + "step": 683 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003138639056390465, + "loss": 1.4824, + "step": 684 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003135920243796641, + "loss": 1.3903, + "step": 685 + }, + { + "epoch": 0.34, + "learning_rate": 0.000313319832890668, + "loss": 1.5826, + "step": 686 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003130473319154365, + "loss": 1.5808, + "step": 687 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031277452219819325, + "loss": 1.6244, + "step": 688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003125014044840051, + "loss": 1.5069, + "step": 689 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031222797951878026, + "loss": 1.6237, + "step": 690 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031195424804926567, + "loss": 1.4301, + "step": 691 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031168021082304565, + "loss": 1.5819, + "step": 692 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003114058685885396, + "loss": 1.8096, + "step": 693 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003111312220949996, + "loss": 1.4657, + "step": 694 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031085627209250915, + "loss": 1.6379, + "step": 695 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031058101933198023, + "loss": 1.4865, + "step": 696 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031030546456515195, + "loss": 1.3837, + "step": 697 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003100296085445881, + "loss": 1.6494, + "step": 698 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003097534520236754, + "loss": 1.7428, + "step": 699 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030947699575662087, + "loss": 1.5992, + "step": 700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003092002404984506, + "loss": 1.6187, + "step": 701 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030892318700500703, + "loss": 1.7533, + "step": 702 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003086458360329471, + "loss": 1.6341, + "step": 703 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003083681883397403, + "loss": 1.6211, + "step": 704 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030809024468366635, + "loss": 1.5643, + "step": 705 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030781200582381336, + "loss": 1.5116, + "step": 706 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003075334725200757, + "loss": 1.627, + "step": 707 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030725464553315186, + "loss": 1.8024, + "step": 708 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030697552562454223, + "loss": 1.4348, + "step": 709 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030669611355654743, + "loss": 1.4995, + "step": 710 + }, + { + "epoch": 0.36, + "learning_rate": 0.000306416410092266, + "loss": 1.6125, + "step": 711 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003061364159955921, + "loss": 1.6559, + "step": 712 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003058561320312139, + "loss": 1.469, + "step": 713 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030557555896461086, + "loss": 1.404, + "step": 714 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003052946975620524, + "loss": 1.6919, + "step": 715 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003050135485905951, + "loss": 1.5993, + "step": 716 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003047321128180812, + "loss": 1.5386, + "step": 717 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030445039101313593, + "loss": 1.472, + "step": 718 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030416838394516587, + "loss": 1.413, + "step": 719 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003038860923843567, + "loss": 1.6552, + "step": 720 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030360351710167094, + "loss": 1.4909, + "step": 721 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003033206588688461, + "loss": 1.4127, + "step": 722 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003030375184583923, + "loss": 1.4671, + "step": 723 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030275409664359056, + "loss": 1.6795, + "step": 724 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030247039419849025, + "loss": 1.6729, + "step": 725 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003021864118979071, + "loss": 1.4749, + "step": 726 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003019021505174215, + "loss": 1.6014, + "step": 727 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003016176108333756, + "loss": 1.6341, + "step": 728 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030133279362287187, + "loss": 1.823, + "step": 729 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003010476996637706, + "loss": 1.5576, + "step": 730 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003007623297346881, + "loss": 1.729, + "step": 731 + }, + { + "epoch": 0.37, + "learning_rate": 0.00030047668461499413, + "loss": 1.4666, + "step": 732 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003001907650848103, + "loss": 1.5468, + "step": 733 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002999045719250074, + "loss": 1.6001, + "step": 734 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029961810591720364, + "loss": 1.6967, + "step": 735 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029933136784376235, + "loss": 1.5668, + "step": 736 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029904435848779, + "loss": 1.5286, + "step": 737 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002987570786331339, + "loss": 1.6096, + "step": 738 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029846952906438, + "loss": 1.5456, + "step": 739 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029818171056685103, + "loss": 1.6902, + "step": 740 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002978936239266042, + "loss": 1.7138, + "step": 741 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029760526993042886, + "loss": 1.5856, + "step": 742 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029731664936584473, + "loss": 1.464, + "step": 743 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029702776302109943, + "loss": 1.4777, + "step": 744 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029673861168516634, + "loss": 1.6905, + "step": 745 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002964491961477429, + "loss": 1.54, + "step": 746 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029615951719924783, + "loss": 1.6324, + "step": 747 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029586957563081925, + "loss": 1.5705, + "step": 748 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002955793722343127, + "loss": 1.7081, + "step": 749 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002952889078022985, + "loss": 1.5256, + "step": 750 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002949981831280602, + "loss": 1.4953, + "step": 751 + }, + { + "epoch": 0.38, + "learning_rate": 0.000294707199005592, + "loss": 1.51, + "step": 752 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029441595622959656, + "loss": 1.5811, + "step": 753 + }, + { + "epoch": 0.38, + "learning_rate": 0.000294124455595483, + "loss": 1.5375, + "step": 754 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029383269789936466, + "loss": 1.7053, + "step": 755 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002935406839380571, + "loss": 1.4679, + "step": 756 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002932484145090755, + "loss": 1.6812, + "step": 757 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002929558904106329, + "loss": 1.5965, + "step": 758 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029266311244163784, + "loss": 1.7395, + "step": 759 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029237008140169227, + "loss": 1.5782, + "step": 760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002920767980910891, + "loss": 1.6442, + "step": 761 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029178326331081043, + "loss": 1.5455, + "step": 762 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002914894778625251, + "loss": 1.3559, + "step": 763 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029119544254858643, + "loss": 1.4975, + "step": 764 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002909011581720302, + "loss": 1.6059, + "step": 765 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002906066255365724, + "loss": 1.6428, + "step": 766 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029031184544660717, + "loss": 1.6501, + "step": 767 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029001681870720434, + "loss": 1.4806, + "step": 768 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002897215461241072, + "loss": 1.4095, + "step": 769 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028942602850373086, + "loss": 1.6663, + "step": 770 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002891302666531592, + "loss": 1.8152, + "step": 771 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002888342613801436, + "loss": 1.7046, + "step": 772 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028853801349309983, + "loss": 1.6848, + "step": 773 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028824152380110645, + "loss": 1.6858, + "step": 774 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002879447931139023, + "loss": 1.5137, + "step": 775 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028764782224188453, + "loss": 1.6079, + "step": 776 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028735061199610626, + "loss": 1.7438, + "step": 777 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002870531631882742, + "loss": 1.6403, + "step": 778 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002867554766307468, + "loss": 1.6697, + "step": 779 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002864575531365316, + "loss": 1.513, + "step": 780 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028615939351928337, + "loss": 1.5807, + "step": 781 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028586099859330183, + "loss": 1.5632, + "step": 782 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028556236917352926, + "loss": 1.7058, + "step": 783 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028526350607554823, + "loss": 1.5961, + "step": 784 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002849644101155797, + "loss": 1.6368, + "step": 785 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002846650821104805, + "loss": 1.5342, + "step": 786 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002843655228777413, + "loss": 1.6549, + "step": 787 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002840657332354841, + "loss": 1.5317, + "step": 788 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028376571400246035, + "loss": 1.687, + "step": 789 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002834654659980484, + "loss": 1.421, + "step": 790 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002831649900422514, + "loss": 1.639, + "step": 791 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028286428695569523, + "loss": 1.62, + "step": 792 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028256335755962584, + "loss": 1.5643, + "step": 793 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002822622026759074, + "loss": 1.6884, + "step": 794 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028196082312701977, + "loss": 1.7462, + "step": 795 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002816592197360566, + "loss": 1.6372, + "step": 796 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028135739332672274, + "loss": 1.5019, + "step": 797 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002810553447233321, + "loss": 1.5365, + "step": 798 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002807530747508056, + "loss": 1.6709, + "step": 799 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002804505842346684, + "loss": 1.5888, + "step": 800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028014787400104825, + "loss": 1.6876, + "step": 801 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027984494487667296, + "loss": 1.5555, + "step": 802 + }, + { + "epoch": 0.4, + "learning_rate": 0.000279541797688868, + "loss": 1.762, + "step": 803 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027923843326555463, + "loss": 1.5098, + "step": 804 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027893485243524706, + "loss": 1.7013, + "step": 805 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002786310560270509, + "loss": 1.5521, + "step": 806 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002783270448706601, + "loss": 1.6102, + "step": 807 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027802281979635564, + "loss": 1.5667, + "step": 808 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027771838163500223, + "loss": 1.3938, + "step": 809 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027741373121804684, + "loss": 1.5571, + "step": 810 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002771088693775159, + "loss": 1.4687, + "step": 811 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002768037969460135, + "loss": 1.6164, + "step": 812 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002764985147567187, + "loss": 1.5342, + "step": 813 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002761930236433836, + "loss": 1.675, + "step": 814 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027588732444033066, + "loss": 1.5154, + "step": 815 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027558141798245074, + "loss": 1.6015, + "step": 816 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002752753051052007, + "loss": 1.6213, + "step": 817 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002749689866446015, + "loss": 1.7362, + "step": 818 + }, + { + "epoch": 0.41, + "learning_rate": 0.000274662463437235, + "loss": 1.5406, + "step": 819 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002743557363202427, + "loss": 1.5625, + "step": 820 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002740488061313225, + "loss": 1.7225, + "step": 821 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027374167370872746, + "loss": 1.5959, + "step": 822 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027343433989126273, + "loss": 1.5027, + "step": 823 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027312680551828337, + "loss": 1.5802, + "step": 824 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002728190714296923, + "loss": 1.6822, + "step": 825 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027251113846593785, + "loss": 1.5289, + "step": 826 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002722030074680114, + "loss": 1.6296, + "step": 827 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002718946792774455, + "loss": 1.6557, + "step": 828 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002715861547363109, + "loss": 1.5834, + "step": 829 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027127743468721466, + "loss": 1.4457, + "step": 830 + }, + { + "epoch": 0.42, + "learning_rate": 0.00027096851997329794, + "loss": 1.5919, + "step": 831 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002706594114382335, + "loss": 1.7794, + "step": 832 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002703501099262233, + "loss": 1.5044, + "step": 833 + }, + { + "epoch": 0.42, + "learning_rate": 0.00027004061628199645, + "loss": 1.4793, + "step": 834 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026973093135080684, + "loss": 1.4537, + "step": 835 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026942105597843076, + "loss": 1.5793, + "step": 836 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026911099101116444, + "loss": 1.5592, + "step": 837 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026880073729582213, + "loss": 1.4737, + "step": 838 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002684902956797335, + "loss": 1.5163, + "step": 839 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002681796670107413, + "loss": 1.3649, + "step": 840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002678688521371993, + "loss": 1.6616, + "step": 841 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026755785190796965, + "loss": 1.5122, + "step": 842 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002672466671724208, + "loss": 1.6713, + "step": 843 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002669352987804251, + "loss": 1.594, + "step": 844 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026662374758235655, + "loss": 1.5373, + "step": 845 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002663120144290883, + "loss": 1.3893, + "step": 846 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002660001001719904, + "loss": 1.6124, + "step": 847 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026568800566292763, + "loss": 1.5687, + "step": 848 + }, + { + "epoch": 0.42, + "learning_rate": 0.000265375731754257, + "loss": 1.2931, + "step": 849 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002650632792988255, + "loss": 1.5906, + "step": 850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026475064914996773, + "loss": 1.5324, + "step": 851 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002644378421615036, + "loss": 1.6167, + "step": 852 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026412485918773595, + "loss": 1.6344, + "step": 853 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026381170108344827, + "loss": 1.5958, + "step": 854 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026349836870390235, + "loss": 1.4789, + "step": 855 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026318486290483593, + "loss": 1.7105, + "step": 856 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026287118454246033, + "loss": 1.4852, + "step": 857 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026255733447345833, + "loss": 1.6808, + "step": 858 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002622433135549814, + "loss": 1.5825, + "step": 859 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026192912264464785, + "loss": 1.6492, + "step": 860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026161476260054014, + "loss": 1.5621, + "step": 861 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002613002342812026, + "loss": 1.6808, + "step": 862 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026098553854563916, + "loss": 1.5637, + "step": 863 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026067067625331117, + "loss": 1.6629, + "step": 864 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002603556482641345, + "loss": 1.3583, + "step": 865 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026004045543847796, + "loss": 1.6123, + "step": 866 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025972509863716016, + "loss": 1.4788, + "step": 867 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002594095787214478, + "loss": 1.5373, + "step": 868 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025909389655305305, + "loss": 1.5852, + "step": 869 + }, + { + "epoch": 0.43, + "learning_rate": 0.000258778052994131, + "loss": 1.5569, + "step": 870 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002584620489072777, + "loss": 1.54, + "step": 871 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025814588515552753, + "loss": 1.4899, + "step": 872 + }, + { + "epoch": 0.44, + "learning_rate": 0.000257829562602351, + "loss": 1.4598, + "step": 873 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025751308211165223, + "loss": 1.5688, + "step": 874 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002571964445477668, + "loss": 1.4195, + "step": 875 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002568796507754592, + "loss": 1.6566, + "step": 876 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002565627016599205, + "loss": 1.406, + "step": 877 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025624559806676603, + "loss": 1.827, + "step": 878 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025592834086203315, + "loss": 1.4952, + "step": 879 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002556109309121786, + "loss": 1.4737, + "step": 880 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002552933690840762, + "loss": 1.4676, + "step": 881 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002549756562450149, + "loss": 1.5686, + "step": 882 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002546577932626957, + "loss": 1.639, + "step": 883 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002543397810052299, + "loss": 1.6325, + "step": 884 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025402162034113637, + "loss": 1.4458, + "step": 885 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025370331213933926, + "loss": 1.4646, + "step": 886 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002533848572691658, + "loss": 1.7105, + "step": 887 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025306625660034365, + "loss": 1.6741, + "step": 888 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002527475110029988, + "loss": 1.6431, + "step": 889 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002524286213476529, + "loss": 1.7294, + "step": 890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025210958850522104, + "loss": 1.5165, + "step": 891 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002517904133470095, + "loss": 1.5219, + "step": 892 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025147109674471317, + "loss": 1.3967, + "step": 893 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002511516395704132, + "loss": 1.6214, + "step": 894 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025083204269657467, + "loss": 1.6539, + "step": 895 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002505123069960442, + "loss": 1.5878, + "step": 896 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002501924333420475, + "loss": 1.6149, + "step": 897 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002498724226081872, + "loss": 1.6648, + "step": 898 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002495522756684402, + "loss": 1.4552, + "step": 899 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024923199339715543, + "loss": 1.531, + "step": 900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002489115766690513, + "loss": 1.6431, + "step": 901 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002485910263592135, + "loss": 1.5006, + "step": 902 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024827034334309265, + "loss": 1.4951, + "step": 903 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024794952849650174, + "loss": 1.3887, + "step": 904 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002476285826956138, + "loss": 1.593, + "step": 905 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002473075068169593, + "loss": 1.4254, + "step": 906 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024698630173742436, + "loss": 1.5903, + "step": 907 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002466649683342477, + "loss": 1.5224, + "step": 908 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002466649683342477, + "loss": 1.6739, + "step": 909 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002463435074850184, + "loss": 1.4916, + "step": 910 + }, + { + "epoch": 0.46, + "learning_rate": 0.000246021920067674, + "loss": 1.3713, + "step": 911 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002457002069604973, + "loss": 1.3699, + "step": 912 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002453783690421146, + "loss": 1.3908, + "step": 913 + }, + { + "epoch": 0.46, + "learning_rate": 0.000245056407191493, + "loss": 1.4784, + "step": 914 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024473432228793807, + "loss": 1.5448, + "step": 915 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002444121152110915, + "loss": 1.4938, + "step": 916 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024408978684092847, + "loss": 1.5958, + "step": 917 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024376733805775574, + "loss": 1.5453, + "step": 918 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024344476974220855, + "loss": 1.4917, + "step": 919 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024312208277524892, + "loss": 1.4726, + "step": 920 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024279927803816276, + "loss": 1.7581, + "step": 921 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024247635641255766, + "loss": 1.517, + "step": 922 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024215331878036037, + "loss": 1.5761, + "step": 923 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024183016602381447, + "loss": 1.3919, + "step": 924 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024150689902547811, + "loss": 1.6481, + "step": 925 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024118351866822137, + "loss": 1.5396, + "step": 926 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024086002583522382, + "loss": 1.6246, + "step": 927 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024053642140997225, + "loss": 1.4913, + "step": 928 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024021270627625825, + "loss": 1.6974, + "step": 929 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023988888131817583, + "loss": 1.727, + "step": 930 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002395649474201189, + "loss": 1.4335, + "step": 931 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002392409054667788, + "loss": 1.7088, + "step": 932 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023891675634314202, + "loss": 1.6817, + "step": 933 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023859250093448783, + "loss": 1.7091, + "step": 934 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023826814012638568, + "loss": 1.5044, + "step": 935 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023794367480469295, + "loss": 1.6067, + "step": 936 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002376191058555524, + "loss": 1.6023, + "step": 937 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023729443416538982, + "loss": 1.5024, + "step": 938 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023696966062091148, + "loss": 1.6559, + "step": 939 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023664478610910207, + "loss": 1.6554, + "step": 940 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002363198115172219, + "loss": 1.5379, + "step": 941 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023599473773280454, + "loss": 1.4307, + "step": 942 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002356695656436546, + "loss": 1.6144, + "step": 943 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023534429613784497, + "loss": 1.6197, + "step": 944 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023501893010371476, + "loss": 1.6309, + "step": 945 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023469346842986677, + "loss": 1.5075, + "step": 946 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002343679120051648, + "loss": 1.6614, + "step": 947 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023404226171873157, + "loss": 1.4659, + "step": 948 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023371651845994603, + "loss": 1.3864, + "step": 949 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023339068311844114, + "loss": 1.3556, + "step": 950 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002330647565841013, + "loss": 1.5859, + "step": 951 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002327387397470601, + "loss": 1.6599, + "step": 952 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023241263349769748, + "loss": 1.5984, + "step": 953 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002320864387266378, + "loss": 1.5987, + "step": 954 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023176015632474703, + "loss": 1.727, + "step": 955 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023143378718313066, + "loss": 1.4925, + "step": 956 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023110733219313087, + "loss": 1.5061, + "step": 957 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002307807922463245, + "loss": 1.6809, + "step": 958 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023045416823452023, + "loss": 1.5185, + "step": 959 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023012746104975632, + "loss": 1.5064, + "step": 960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022980067158429832, + "loss": 1.4743, + "step": 961 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022947380073063656, + "loss": 1.5773, + "step": 962 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022914684938148342, + "loss": 1.3633, + "step": 963 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022881981842977117, + "loss": 1.6052, + "step": 964 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022849270876864965, + "loss": 1.5255, + "step": 965 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022816552129148354, + "loss": 1.7143, + "step": 966 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022783825689184998, + "loss": 1.5166, + "step": 967 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022751091646353632, + "loss": 1.5318, + "step": 968 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022718350090053752, + "loss": 1.512, + "step": 969 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022685601109705364, + "loss": 1.5758, + "step": 970 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022652844794748765, + "loss": 1.4, + "step": 971 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002262008123464427, + "loss": 1.5296, + "step": 972 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002258731051887199, + "loss": 1.7131, + "step": 973 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022554532736931577, + "loss": 1.5665, + "step": 974 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022521747978341972, + "loss": 1.4738, + "step": 975 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022488956332641192, + "loss": 1.6389, + "step": 976 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022456157889386033, + "loss": 1.6043, + "step": 977 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022423352738151886, + "loss": 1.5159, + "step": 978 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022390540968532442, + "loss": 1.5425, + "step": 979 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002235772267013947, + "loss": 1.5404, + "step": 980 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022324897932602574, + "loss": 1.6195, + "step": 981 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002229206684556895, + "loss": 1.5286, + "step": 982 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002225922949870311, + "loss": 1.4833, + "step": 983 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022226385981686706, + "loss": 1.5749, + "step": 984 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022193536384218195, + "loss": 1.718, + "step": 985 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022160680796012665, + "loss": 1.4774, + "step": 986 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022127819306801567, + "loss": 1.4944, + "step": 987 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022094952006332453, + "loss": 1.6471, + "step": 988 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022062078984368756, + "loss": 1.6706, + "step": 989 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022029200330689545, + "loss": 1.5164, + "step": 990 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021996316135089239, + "loss": 1.6634, + "step": 991 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021963426487377433, + "loss": 1.7244, + "step": 992 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021930531477378572, + "loss": 1.4838, + "step": 993 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002189763119493178, + "loss": 1.6398, + "step": 994 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021864725729890555, + "loss": 1.3957, + "step": 995 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002183181517212256, + "loss": 1.6115, + "step": 996 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021798899611509377, + "loss": 1.7738, + "step": 997 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021765979137946233, + "loss": 1.6396, + "step": 998 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021733053841341775, + "loss": 1.5969, + "step": 999 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021700123811617834, + "loss": 1.4756, + "step": 1000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002166718913870916, + "loss": 1.5806, + "step": 1001 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002163424991256318, + "loss": 1.5961, + "step": 1002 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002160130622313977, + "loss": 1.5734, + "step": 1003 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002156835816041098, + "loss": 1.4678, + "step": 1004 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002153540581436081, + "loss": 1.587, + "step": 1005 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021502449274984957, + "loss": 1.6823, + "step": 1006 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021469488632290583, + "loss": 1.5939, + "step": 1007 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021436523976296038, + "loss": 1.6331, + "step": 1008 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021403555397030632, + "loss": 1.4015, + "step": 1009 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021370582984534412, + "loss": 1.3894, + "step": 1010 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021337606828857872, + "loss": 1.5989, + "step": 1011 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021304627020061738, + "loss": 1.444, + "step": 1012 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021271643648216717, + "loss": 1.5686, + "step": 1013 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002123865680340323, + "loss": 1.6826, + "step": 1014 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021205666575711205, + "loss": 1.4933, + "step": 1015 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021172673055239795, + "loss": 1.503, + "step": 1016 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021139676332097146, + "loss": 1.6633, + "step": 1017 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021106676496400164, + "loss": 1.6478, + "step": 1018 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021073673638274238, + "loss": 1.4642, + "step": 1019 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021040667847853017, + "loss": 1.6525, + "step": 1020 + }, + { + "epoch": 0.51, + "learning_rate": 0.00021007659215278174, + "loss": 1.4527, + "step": 1021 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020974647830699112, + "loss": 1.4418, + "step": 1022 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020941633784272786, + "loss": 1.3624, + "step": 1023 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020908617166163392, + "loss": 1.4555, + "step": 1024 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020875598066542165, + "loss": 1.6097, + "step": 1025 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002084257657558712, + "loss": 1.4663, + "step": 1026 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002080955278348278, + "loss": 1.5117, + "step": 1027 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002077652678041999, + "loss": 1.4675, + "step": 1028 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020743498656595598, + "loss": 1.5064, + "step": 1029 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020710468502212263, + "loss": 1.5412, + "step": 1030 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002067743640747818, + "loss": 1.4103, + "step": 1031 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002064440246260685, + "loss": 1.4971, + "step": 1032 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020611366757816835, + "loss": 1.5548, + "step": 1033 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002057832938333148, + "loss": 1.4879, + "step": 1034 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002054529042937871, + "loss": 1.441, + "step": 1035 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020512249986190755, + "loss": 1.5259, + "step": 1036 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020479208144003912, + "loss": 1.6095, + "step": 1037 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020446164993058309, + "loss": 1.3883, + "step": 1038 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002041312062359764, + "loss": 1.5599, + "step": 1039 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020380075125868926, + "loss": 1.5393, + "step": 1040 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020347028590122257, + "loss": 1.5833, + "step": 1041 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002031398110661059, + "loss": 1.5744, + "step": 1042 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020280932765589444, + "loss": 1.5294, + "step": 1043 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020247883657316696, + "loss": 1.6453, + "step": 1044 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020214833872052301, + "loss": 1.5872, + "step": 1045 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002018178350005807, + "loss": 1.5812, + "step": 1046 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020148732631597427, + "loss": 1.3474, + "step": 1047 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020115681356935148, + "loss": 1.6486, + "step": 1048 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020082629766337103, + "loss": 1.4995, + "step": 1049 + }, + { + "epoch": 0.52, + "learning_rate": 0.00020049577950070045, + "loss": 1.6424, + "step": 1050 + }, + { + "epoch": 0.53, + "learning_rate": 0.00020016525998401327, + "loss": 1.4996, + "step": 1051 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019983474001598677, + "loss": 1.6066, + "step": 1052 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019950422049929965, + "loss": 1.5163, + "step": 1053 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019917370233662898, + "loss": 1.4323, + "step": 1054 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019884318643064854, + "loss": 1.5404, + "step": 1055 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019851267368402577, + "loss": 1.5302, + "step": 1056 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019818216499941935, + "loss": 1.7073, + "step": 1057 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019785166127947709, + "loss": 1.6111, + "step": 1058 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001975211634268331, + "loss": 1.4342, + "step": 1059 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019719067234410555, + "loss": 1.5892, + "step": 1060 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019686018893389417, + "loss": 1.3523, + "step": 1061 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019652971409877748, + "loss": 1.4861, + "step": 1062 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019619924874131087, + "loss": 1.5304, + "step": 1063 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019586879376402362, + "loss": 1.7805, + "step": 1064 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019553835006941688, + "loss": 1.4986, + "step": 1065 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001952079185599609, + "loss": 1.5767, + "step": 1066 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001948775001380925, + "loss": 1.6053, + "step": 1067 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019454709570621297, + "loss": 1.6215, + "step": 1068 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019421670616668522, + "loss": 1.5673, + "step": 1069 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019388633242183175, + "loss": 1.5943, + "step": 1070 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019355597537393154, + "loss": 1.5343, + "step": 1071 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019322563592521824, + "loss": 1.5878, + "step": 1072 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019289531497787744, + "loss": 1.4234, + "step": 1073 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019256501343404406, + "loss": 1.4744, + "step": 1074 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001922347321958002, + "loss": 1.5477, + "step": 1075 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019190447216517223, + "loss": 1.421, + "step": 1076 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019157423424412886, + "loss": 1.709, + "step": 1077 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019124401933457837, + "loss": 1.6079, + "step": 1078 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001909138283383661, + "loss": 1.4921, + "step": 1079 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019058366215727227, + "loss": 1.6009, + "step": 1080 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019025352169300895, + "loss": 1.4151, + "step": 1081 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018992340784721834, + "loss": 1.5145, + "step": 1082 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018959332152146987, + "loss": 1.7787, + "step": 1083 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018926326361725767, + "loss": 1.6729, + "step": 1084 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018893323503599844, + "loss": 1.8148, + "step": 1085 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018860323667902856, + "loss": 1.6266, + "step": 1086 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001882732694476021, + "loss": 1.5139, + "step": 1087 + }, + { + "epoch": 0.54, + "learning_rate": 0.000187943334242888, + "loss": 1.5239, + "step": 1088 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001876134319659677, + "loss": 1.6728, + "step": 1089 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001872835635178329, + "loss": 1.7159, + "step": 1090 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018695372979938264, + "loss": 1.5613, + "step": 1091 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001866239317114213, + "loss": 1.7827, + "step": 1092 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018629417015465592, + "loss": 1.4987, + "step": 1093 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018596444602969367, + "loss": 1.599, + "step": 1094 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018563476023703972, + "loss": 1.3636, + "step": 1095 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001853051136770942, + "loss": 1.6464, + "step": 1096 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001849755072501504, + "loss": 1.451, + "step": 1097 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018464594185639194, + "loss": 1.4646, + "step": 1098 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018431641839589021, + "loss": 1.6525, + "step": 1099 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018398693776860235, + "loss": 1.4996, + "step": 1100 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001836575008743682, + "loss": 1.483, + "step": 1101 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018332810861290848, + "loss": 1.4097, + "step": 1102 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001829987618838217, + "loss": 1.5803, + "step": 1103 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018266946158658227, + "loss": 1.6121, + "step": 1104 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018234020862053775, + "loss": 1.6006, + "step": 1105 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018201100388490627, + "loss": 1.627, + "step": 1106 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018168184827877442, + "loss": 1.7058, + "step": 1107 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018135274270109447, + "loss": 1.673, + "step": 1108 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001810236880506822, + "loss": 1.9456, + "step": 1109 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018069468522621433, + "loss": 1.6382, + "step": 1110 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018036573512622572, + "loss": 1.4667, + "step": 1111 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018003683864910763, + "loss": 1.4617, + "step": 1112 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001797079966931046, + "loss": 1.4715, + "step": 1113 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001793792101563124, + "loss": 1.5749, + "step": 1114 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017905047993667554, + "loss": 1.6088, + "step": 1115 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001787218069319844, + "loss": 1.6457, + "step": 1116 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001783931920398734, + "loss": 1.6885, + "step": 1117 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017806463615781807, + "loss": 1.4638, + "step": 1118 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017773614018313293, + "loss": 1.5279, + "step": 1119 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001774077050129689, + "loss": 1.3872, + "step": 1120 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017707933154431055, + "loss": 1.6311, + "step": 1121 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017675102067397436, + "loss": 1.4838, + "step": 1122 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017642277329860532, + "loss": 1.5995, + "step": 1123 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001760945903146756, + "loss": 1.6014, + "step": 1124 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017576647261848119, + "loss": 1.55, + "step": 1125 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017543842110613966, + "loss": 1.6189, + "step": 1126 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017511043667358818, + "loss": 1.5004, + "step": 1127 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001747825202165803, + "loss": 1.5344, + "step": 1128 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017445467263068422, + "loss": 1.4693, + "step": 1129 + }, + { + "epoch": 0.56, + "learning_rate": 0.00017412689481128015, + "loss": 1.5528, + "step": 1130 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001737991876535573, + "loss": 1.5298, + "step": 1131 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017347155205251245, + "loss": 1.4865, + "step": 1132 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001731439889029464, + "loss": 1.4381, + "step": 1133 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017281649909946256, + "loss": 1.4918, + "step": 1134 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017248908353646372, + "loss": 1.7483, + "step": 1135 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017216174310815004, + "loss": 1.5986, + "step": 1136 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017183447870851653, + "loss": 1.6334, + "step": 1137 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001715072912313504, + "loss": 1.6603, + "step": 1138 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017118018157022887, + "loss": 1.5052, + "step": 1139 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017085315061851665, + "loss": 1.4917, + "step": 1140 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017052619926936346, + "loss": 1.4266, + "step": 1141 + }, + { + "epoch": 0.57, + "learning_rate": 0.00017019932841570173, + "loss": 1.4824, + "step": 1142 + }, + { + "epoch": 0.57, + "learning_rate": 0.00016987253895024376, + "loss": 1.553, + "step": 1143 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001695458317654799, + "loss": 1.5831, + "step": 1144 + }, + { + "epoch": 0.57, + "learning_rate": 0.00016921920775367556, + "loss": 1.5165, + "step": 1145 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001688926678068691, + "loss": 1.6343, + "step": 1146 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001685662128168694, + "loss": 1.4093, + "step": 1147 + }, + { + "epoch": 0.57, + "learning_rate": 0.00016823984367525302, + "loss": 1.5443, + "step": 1148 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001679135612733623, + "loss": 1.5261, + "step": 1149 + }, + { + "epoch": 0.57, + "learning_rate": 0.00016758736650230257, + "loss": 1.7538, + "step": 1150 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016726126025293992, + "loss": 1.5514, + "step": 1151 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001669352434158987, + "loss": 1.3497, + "step": 1152 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001666093168815589, + "loss": 1.4964, + "step": 1153 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016628348154005404, + "loss": 1.4952, + "step": 1154 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016595773828126847, + "loss": 1.4553, + "step": 1155 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016563208799483522, + "loss": 1.4294, + "step": 1156 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016530653157013328, + "loss": 1.5523, + "step": 1157 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016498106989628526, + "loss": 1.3405, + "step": 1158 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016465570386215513, + "loss": 1.5908, + "step": 1159 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016433043435634548, + "loss": 1.4411, + "step": 1160 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016400526226719543, + "loss": 1.3684, + "step": 1161 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016368018848277816, + "loss": 1.4479, + "step": 1162 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016335521389089792, + "loss": 1.5124, + "step": 1163 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016303033937908854, + "loss": 1.6517, + "step": 1164 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016270556583461023, + "loss": 1.4531, + "step": 1165 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016238089414444758, + "loss": 1.3264, + "step": 1166 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016205632519530707, + "loss": 1.5187, + "step": 1167 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016173185987361432, + "loss": 1.5387, + "step": 1168 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001614074990655122, + "loss": 1.4165, + "step": 1169 + }, + { + "epoch": 0.58, + "learning_rate": 0.000161083243656858, + "loss": 1.4233, + "step": 1170 + }, + { + "epoch": 0.59, + "learning_rate": 0.00016075909453322132, + "loss": 1.5269, + "step": 1171 + }, + { + "epoch": 0.59, + "learning_rate": 0.00016043505257988115, + "loss": 1.5829, + "step": 1172 + }, + { + "epoch": 0.59, + "learning_rate": 0.00016011111868182416, + "loss": 1.6682, + "step": 1173 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015978729372374177, + "loss": 1.4563, + "step": 1174 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001594635785900278, + "loss": 1.4737, + "step": 1175 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015913997416477628, + "loss": 1.572, + "step": 1176 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015881648133177868, + "loss": 1.5574, + "step": 1177 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015849310097452185, + "loss": 1.4618, + "step": 1178 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015816983397618555, + "loss": 1.6466, + "step": 1179 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015784668121963968, + "loss": 1.4817, + "step": 1180 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015752364358744244, + "loss": 1.5657, + "step": 1181 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001572007219618373, + "loss": 1.7042, + "step": 1182 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001568779172247511, + "loss": 1.5432, + "step": 1183 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001565552302577915, + "loss": 1.7243, + "step": 1184 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015623266194224434, + "loss": 1.6929, + "step": 1185 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001559102131590716, + "loss": 1.58, + "step": 1186 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001555878847889086, + "loss": 1.4482, + "step": 1187 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015526567771206195, + "loss": 1.4163, + "step": 1188 + }, + { + "epoch": 0.59, + "learning_rate": 0.00015494359280850703, + "loss": 1.4849, + "step": 1189 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001546216309578854, + "loss": 1.6257, + "step": 1190 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015429979303950277, + "loss": 1.4019, + "step": 1191 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015397807993232604, + "loss": 1.6258, + "step": 1192 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015365649251498158, + "loss": 1.5102, + "step": 1193 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015333503166575237, + "loss": 1.7245, + "step": 1194 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015301369826257563, + "loss": 1.4299, + "step": 1195 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015269249318304072, + "loss": 1.4183, + "step": 1196 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015237141730438627, + "loss": 1.5347, + "step": 1197 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015205047150349823, + "loss": 1.6355, + "step": 1198 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015172965665690737, + "loss": 1.6188, + "step": 1199 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015140897364078647, + "loss": 1.3618, + "step": 1200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001510884233309488, + "loss": 1.5299, + "step": 1201 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015076800660284462, + "loss": 1.5383, + "step": 1202 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015044772433155985, + "loss": 1.5368, + "step": 1203 + }, + { + "epoch": 0.6, + "learning_rate": 0.00015012757739181284, + "loss": 1.6522, + "step": 1204 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014980756665795253, + "loss": 1.5328, + "step": 1205 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014948769300395588, + "loss": 1.4371, + "step": 1206 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014916795730342538, + "loss": 1.5072, + "step": 1207 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001488483604295869, + "loss": 1.5613, + "step": 1208 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014852890325528688, + "loss": 1.5922, + "step": 1209 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014820958665299049, + "loss": 1.5502, + "step": 1210 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014789041149477897, + "loss": 1.3874, + "step": 1211 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014757137865234712, + "loss": 1.6309, + "step": 1212 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014725248899700126, + "loss": 1.5276, + "step": 1213 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014693374339965637, + "loss": 1.6013, + "step": 1214 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014661514273083424, + "loss": 1.4085, + "step": 1215 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001462966878606608, + "loss": 1.3623, + "step": 1216 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001459783796588637, + "loss": 1.5691, + "step": 1217 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014566021899477022, + "loss": 1.557, + "step": 1218 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014534220673730438, + "loss": 1.6516, + "step": 1219 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014502434375498514, + "loss": 1.4433, + "step": 1220 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014470663091592381, + "loss": 1.5922, + "step": 1221 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014438906908782149, + "loss": 1.4993, + "step": 1222 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014407165913796693, + "loss": 1.5018, + "step": 1223 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014375440193323402, + "loss": 1.4906, + "step": 1224 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014343729834007953, + "loss": 1.5737, + "step": 1225 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014312034922454086, + "loss": 1.4981, + "step": 1226 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001428035554522332, + "loss": 1.6424, + "step": 1227 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014248691788834784, + "loss": 1.4925, + "step": 1228 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014217043739764908, + "loss": 1.5765, + "step": 1229 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014185411484447252, + "loss": 1.5934, + "step": 1230 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014153795109272238, + "loss": 1.4563, + "step": 1231 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014122194700586904, + "loss": 1.5703, + "step": 1232 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014090610344694702, + "loss": 1.5285, + "step": 1233 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014059042127855217, + "loss": 1.4793, + "step": 1234 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014027490136283988, + "loss": 1.7095, + "step": 1235 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013995954456152206, + "loss": 1.5333, + "step": 1236 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013964435173586547, + "loss": 1.4531, + "step": 1237 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013932932374668893, + "loss": 1.5855, + "step": 1238 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013901446145436083, + "loss": 1.3752, + "step": 1239 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013869976571879747, + "loss": 1.5541, + "step": 1240 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001383852373994599, + "loss": 1.7656, + "step": 1241 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013807087735535214, + "loss": 1.5334, + "step": 1242 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013775668644501864, + "loss": 1.4591, + "step": 1243 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013744266552654172, + "loss": 1.6527, + "step": 1244 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013712881545753972, + "loss": 1.5402, + "step": 1245 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013681513709516414, + "loss": 1.4721, + "step": 1246 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013650163129609767, + "loss": 1.451, + "step": 1247 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001361882989165518, + "loss": 1.5583, + "step": 1248 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001358751408122641, + "loss": 1.535, + "step": 1249 + }, + { + "epoch": 0.62, + "learning_rate": 0.00013556215783849644, + "loss": 1.2767, + "step": 1250 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001352493508500323, + "loss": 1.5292, + "step": 1251 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001349367207011745, + "loss": 1.4486, + "step": 1252 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013462426824574307, + "loss": 1.4923, + "step": 1253 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013431199433707245, + "loss": 1.5831, + "step": 1254 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001339998998280097, + "loss": 1.5556, + "step": 1255 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013368798557091176, + "loss": 1.3624, + "step": 1256 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013337625241764345, + "loss": 1.5453, + "step": 1257 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013306470121957493, + "loss": 1.7471, + "step": 1258 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001327533328275792, + "loss": 1.4559, + "step": 1259 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013244214809203042, + "loss": 1.2641, + "step": 1260 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013213114786280074, + "loss": 1.4312, + "step": 1261 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001318203329892587, + "loss": 1.4512, + "step": 1262 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013150970432026656, + "loss": 1.4561, + "step": 1263 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001311992627041779, + "loss": 1.5522, + "step": 1264 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013088900898883564, + "loss": 1.5752, + "step": 1265 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001305789440215693, + "loss": 1.7763, + "step": 1266 + }, + { + "epoch": 0.63, + "learning_rate": 0.00013026906864919323, + "loss": 1.6178, + "step": 1267 + }, + { + "epoch": 0.63, + "learning_rate": 0.00012995938371800363, + "loss": 1.5093, + "step": 1268 + }, + { + "epoch": 0.63, + "learning_rate": 0.00012964989007377677, + "loss": 1.4769, + "step": 1269 + }, + { + "epoch": 0.63, + "learning_rate": 0.00012934058856176663, + "loss": 1.4533, + "step": 1270 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012903148002670205, + "loss": 1.5406, + "step": 1271 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001287225653127854, + "loss": 1.4527, + "step": 1272 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001284138452636892, + "loss": 1.1498, + "step": 1273 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012810532072255452, + "loss": 1.3571, + "step": 1274 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012779699253198863, + "loss": 1.5416, + "step": 1275 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012748886153406223, + "loss": 1.4866, + "step": 1276 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001271809285703078, + "loss": 1.5418, + "step": 1277 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012687319448171668, + "loss": 1.6024, + "step": 1278 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012656566010873726, + "loss": 1.3701, + "step": 1279 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012625832629127256, + "loss": 1.5769, + "step": 1280 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012595119386867753, + "loss": 1.7299, + "step": 1281 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012564426367975745, + "loss": 1.5873, + "step": 1282 + }, + { + "epoch": 0.64, + "learning_rate": 0.000125337536562765, + "loss": 1.5423, + "step": 1283 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001250310133553985, + "loss": 1.5658, + "step": 1284 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012472469489479928, + "loss": 1.5692, + "step": 1285 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012441858201754934, + "loss": 1.5028, + "step": 1286 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012411267555966946, + "loss": 1.4544, + "step": 1287 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012380697635661645, + "loss": 1.4032, + "step": 1288 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012350148524328125, + "loss": 1.6898, + "step": 1289 + }, + { + "epoch": 0.64, + "learning_rate": 0.00012319620305398655, + "loss": 1.4942, + "step": 1290 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012289113062248413, + "loss": 1.575, + "step": 1291 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012258626878195326, + "loss": 1.6769, + "step": 1292 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001222816183649978, + "loss": 1.4289, + "step": 1293 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012197718020364437, + "loss": 1.7212, + "step": 1294 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012167295512933991, + "loss": 1.3705, + "step": 1295 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001213689439729492, + "loss": 1.4692, + "step": 1296 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012106514756475297, + "loss": 1.5929, + "step": 1297 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012076156673444546, + "loss": 1.7073, + "step": 1298 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012045820231113207, + "loss": 1.5234, + "step": 1299 + }, + { + "epoch": 0.65, + "learning_rate": 0.00012015505512332712, + "loss": 1.5092, + "step": 1300 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001198521259989518, + "loss": 1.4455, + "step": 1301 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011954941576533167, + "loss": 1.5459, + "step": 1302 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011924692524919447, + "loss": 1.3827, + "step": 1303 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011894465527666793, + "loss": 1.5077, + "step": 1304 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001186426066732773, + "loss": 1.3971, + "step": 1305 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011834078026394339, + "loss": 1.6192, + "step": 1306 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011803917687298028, + "loss": 1.3832, + "step": 1307 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011773779732409264, + "loss": 1.7161, + "step": 1308 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011743664244037425, + "loss": 1.6524, + "step": 1309 + }, + { + "epoch": 0.65, + "learning_rate": 0.00011713571304430479, + "loss": 1.514, + "step": 1310 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011683500995774856, + "loss": 1.5347, + "step": 1311 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011653453400195169, + "loss": 1.3202, + "step": 1312 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011623428599753966, + "loss": 1.5273, + "step": 1313 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011593426676451593, + "loss": 1.4826, + "step": 1314 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011563447712225875, + "loss": 1.4646, + "step": 1315 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011533491788951947, + "loss": 1.6094, + "step": 1316 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001150355898844204, + "loss": 1.5485, + "step": 1317 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011473649392445183, + "loss": 1.4593, + "step": 1318 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011443763082647081, + "loss": 1.6859, + "step": 1319 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001141390014066982, + "loss": 1.6198, + "step": 1320 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001138406064807166, + "loss": 1.4567, + "step": 1321 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011354244686346852, + "loss": 1.4479, + "step": 1322 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011324452336925327, + "loss": 1.5005, + "step": 1323 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011294683681172581, + "loss": 1.438, + "step": 1324 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011264938800389382, + "loss": 1.5529, + "step": 1325 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011235217775811544, + "loss": 1.4482, + "step": 1326 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011205520688609778, + "loss": 1.5138, + "step": 1327 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011175847619889359, + "loss": 1.4835, + "step": 1328 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011146198650690021, + "loss": 1.3981, + "step": 1329 + }, + { + "epoch": 0.66, + "learning_rate": 0.00011116573861985642, + "loss": 1.4917, + "step": 1330 + }, + { + "epoch": 0.67, + "learning_rate": 0.00011086973334684078, + "loss": 1.3993, + "step": 1331 + }, + { + "epoch": 0.67, + "learning_rate": 0.00011057397149626924, + "loss": 1.6514, + "step": 1332 + }, + { + "epoch": 0.67, + "learning_rate": 0.00011027845387589279, + "loss": 1.4756, + "step": 1333 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010998318129279575, + "loss": 1.5535, + "step": 1334 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010968815455339285, + "loss": 1.4985, + "step": 1335 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010939337446342763, + "loss": 1.4418, + "step": 1336 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001090988418279699, + "loss": 1.4817, + "step": 1337 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001088045574514136, + "loss": 1.5932, + "step": 1338 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010851052213747493, + "loss": 1.6614, + "step": 1339 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010821673668918957, + "loss": 1.6368, + "step": 1340 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010792320190891093, + "loss": 1.4143, + "step": 1341 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001076299185983078, + "loss": 1.5895, + "step": 1342 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010733688755836215, + "loss": 1.6284, + "step": 1343 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010704410958936719, + "loss": 1.5215, + "step": 1344 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010675158549092455, + "loss": 1.4819, + "step": 1345 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010645931606194297, + "loss": 1.4669, + "step": 1346 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010616730210063536, + "loss": 1.4592, + "step": 1347 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010587554440451702, + "loss": 1.4238, + "step": 1348 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010558404377040352, + "loss": 1.5082, + "step": 1349 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010529280099440799, + "loss": 1.4766, + "step": 1350 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010500181687193977, + "loss": 1.5275, + "step": 1351 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010471109219770155, + "loss": 1.5614, + "step": 1352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010442062776568735, + "loss": 1.517, + "step": 1353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010413042436918084, + "loss": 1.4797, + "step": 1354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001038404828007522, + "loss": 1.6415, + "step": 1355 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010355080385225713, + "loss": 1.5257, + "step": 1356 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010326138831483367, + "loss": 1.5953, + "step": 1357 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010297223697890062, + "loss": 1.5818, + "step": 1358 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010268335063415536, + "loss": 1.4314, + "step": 1359 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010239473006957112, + "loss": 1.3441, + "step": 1360 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010210637607339584, + "loss": 1.2736, + "step": 1361 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010181828943314899, + "loss": 1.3509, + "step": 1362 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010153047093562009, + "loss": 1.5463, + "step": 1363 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010124292136686624, + "loss": 1.5247, + "step": 1364 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010095564151221002, + "loss": 1.4775, + "step": 1365 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001006686321562377, + "loss": 1.5266, + "step": 1366 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010038189408279643, + "loss": 1.3547, + "step": 1367 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010009542807499266, + "loss": 1.6053, + "step": 1368 + }, + { + "epoch": 0.68, + "learning_rate": 9.980923491518976e-05, + "loss": 1.4517, + "step": 1369 + }, + { + "epoch": 0.68, + "learning_rate": 9.952331538500583e-05, + "loss": 1.6798, + "step": 1370 + }, + { + "epoch": 0.69, + "learning_rate": 9.923767026531195e-05, + "loss": 1.4975, + "step": 1371 + }, + { + "epoch": 0.69, + "learning_rate": 9.895230033622945e-05, + "loss": 1.492, + "step": 1372 + }, + { + "epoch": 0.69, + "learning_rate": 9.866720637712823e-05, + "loss": 1.5128, + "step": 1373 + }, + { + "epoch": 0.69, + "learning_rate": 9.83823891666245e-05, + "loss": 1.3858, + "step": 1374 + }, + { + "epoch": 0.69, + "learning_rate": 9.809784948257852e-05, + "loss": 1.612, + "step": 1375 + }, + { + "epoch": 0.69, + "learning_rate": 9.781358810209287e-05, + "loss": 1.3599, + "step": 1376 + }, + { + "epoch": 0.69, + "learning_rate": 9.752960580150981e-05, + "loss": 1.6477, + "step": 1377 + }, + { + "epoch": 0.69, + "learning_rate": 9.724590335640949e-05, + "loss": 1.5782, + "step": 1378 + }, + { + "epoch": 0.69, + "learning_rate": 9.696248154160776e-05, + "loss": 1.6439, + "step": 1379 + }, + { + "epoch": 0.69, + "learning_rate": 9.667934113115395e-05, + "loss": 1.7892, + "step": 1380 + }, + { + "epoch": 0.69, + "learning_rate": 9.63964828983291e-05, + "loss": 1.6616, + "step": 1381 + }, + { + "epoch": 0.69, + "learning_rate": 9.611390761564333e-05, + "loss": 1.7806, + "step": 1382 + }, + { + "epoch": 0.69, + "learning_rate": 9.583161605483415e-05, + "loss": 1.4184, + "step": 1383 + }, + { + "epoch": 0.69, + "learning_rate": 9.554960898686411e-05, + "loss": 1.4113, + "step": 1384 + }, + { + "epoch": 0.69, + "learning_rate": 9.526788718191881e-05, + "loss": 1.4594, + "step": 1385 + }, + { + "epoch": 0.69, + "learning_rate": 9.498645140940488e-05, + "loss": 1.4846, + "step": 1386 + }, + { + "epoch": 0.69, + "learning_rate": 9.470530243794765e-05, + "loss": 1.3407, + "step": 1387 + }, + { + "epoch": 0.69, + "learning_rate": 9.442444103538919e-05, + "loss": 1.4245, + "step": 1388 + }, + { + "epoch": 0.69, + "learning_rate": 9.414386796878621e-05, + "loss": 1.6035, + "step": 1389 + }, + { + "epoch": 0.69, + "learning_rate": 9.38635840044079e-05, + "loss": 1.5466, + "step": 1390 + }, + { + "epoch": 0.7, + "learning_rate": 9.358358990773402e-05, + "loss": 1.4481, + "step": 1391 + }, + { + "epoch": 0.7, + "learning_rate": 9.330388644345259e-05, + "loss": 1.5167, + "step": 1392 + }, + { + "epoch": 0.7, + "learning_rate": 9.302447437545784e-05, + "loss": 1.4248, + "step": 1393 + }, + { + "epoch": 0.7, + "learning_rate": 9.274535446684827e-05, + "loss": 1.7327, + "step": 1394 + }, + { + "epoch": 0.7, + "learning_rate": 9.246652747992429e-05, + "loss": 1.3955, + "step": 1395 + }, + { + "epoch": 0.7, + "learning_rate": 9.218799417618662e-05, + "loss": 1.7074, + "step": 1396 + }, + { + "epoch": 0.7, + "learning_rate": 9.190975531633368e-05, + "loss": 1.5225, + "step": 1397 + }, + { + "epoch": 0.7, + "learning_rate": 9.163181166025976e-05, + "loss": 1.587, + "step": 1398 + }, + { + "epoch": 0.7, + "learning_rate": 9.135416396705294e-05, + "loss": 1.3871, + "step": 1399 + }, + { + "epoch": 0.7, + "learning_rate": 9.107681299499304e-05, + "loss": 1.5438, + "step": 1400 + }, + { + "epoch": 0.7, + "learning_rate": 9.079975950154942e-05, + "loss": 1.6617, + "step": 1401 + }, + { + "epoch": 0.7, + "learning_rate": 9.052300424337917e-05, + "loss": 1.4866, + "step": 1402 + }, + { + "epoch": 0.7, + "learning_rate": 9.02465479763247e-05, + "loss": 1.4448, + "step": 1403 + }, + { + "epoch": 0.7, + "learning_rate": 8.997039145541193e-05, + "loss": 1.5473, + "step": 1404 + }, + { + "epoch": 0.7, + "learning_rate": 8.969453543484811e-05, + "loss": 1.6725, + "step": 1405 + }, + { + "epoch": 0.7, + "learning_rate": 8.941898066801979e-05, + "loss": 1.5296, + "step": 1406 + }, + { + "epoch": 0.7, + "learning_rate": 8.91437279074909e-05, + "loss": 1.5941, + "step": 1407 + }, + { + "epoch": 0.7, + "learning_rate": 8.886877790500042e-05, + "loss": 1.5012, + "step": 1408 + }, + { + "epoch": 0.7, + "learning_rate": 8.859413141146049e-05, + "loss": 1.5981, + "step": 1409 + }, + { + "epoch": 0.7, + "learning_rate": 8.831978917695438e-05, + "loss": 1.3714, + "step": 1410 + }, + { + "epoch": 0.71, + "learning_rate": 8.804575195073435e-05, + "loss": 1.4957, + "step": 1411 + }, + { + "epoch": 0.71, + "learning_rate": 8.777202048121977e-05, + "loss": 1.5818, + "step": 1412 + }, + { + "epoch": 0.71, + "learning_rate": 8.749859551599488e-05, + "loss": 1.5382, + "step": 1413 + }, + { + "epoch": 0.71, + "learning_rate": 8.72254778018068e-05, + "loss": 1.3759, + "step": 1414 + }, + { + "epoch": 0.71, + "learning_rate": 8.69526680845636e-05, + "loss": 1.4997, + "step": 1415 + }, + { + "epoch": 0.71, + "learning_rate": 8.6680167109332e-05, + "loss": 1.5585, + "step": 1416 + }, + { + "epoch": 0.71, + "learning_rate": 8.640797562033586e-05, + "loss": 1.5791, + "step": 1417 + }, + { + "epoch": 0.71, + "learning_rate": 8.613609436095353e-05, + "loss": 1.5011, + "step": 1418 + }, + { + "epoch": 0.71, + "learning_rate": 8.586452407371619e-05, + "loss": 1.4529, + "step": 1419 + }, + { + "epoch": 0.71, + "learning_rate": 8.559326550030567e-05, + "loss": 1.385, + "step": 1420 + }, + { + "epoch": 0.71, + "learning_rate": 8.532231938155253e-05, + "loss": 1.6166, + "step": 1421 + }, + { + "epoch": 0.71, + "learning_rate": 8.505168645743393e-05, + "loss": 1.433, + "step": 1422 + }, + { + "epoch": 0.71, + "learning_rate": 8.478136746707188e-05, + "loss": 1.526, + "step": 1423 + }, + { + "epoch": 0.71, + "learning_rate": 8.451136314873076e-05, + "loss": 1.5146, + "step": 1424 + }, + { + "epoch": 0.71, + "learning_rate": 8.424167423981565e-05, + "loss": 1.4147, + "step": 1425 + }, + { + "epoch": 0.71, + "learning_rate": 8.397230147687023e-05, + "loss": 1.6492, + "step": 1426 + }, + { + "epoch": 0.71, + "learning_rate": 8.370324559557463e-05, + "loss": 1.6118, + "step": 1427 + }, + { + "epoch": 0.71, + "learning_rate": 8.343450733074388e-05, + "loss": 1.6142, + "step": 1428 + }, + { + "epoch": 0.71, + "learning_rate": 8.316608741632519e-05, + "loss": 1.5937, + "step": 1429 + }, + { + "epoch": 0.71, + "learning_rate": 8.289798658539658e-05, + "loss": 1.3966, + "step": 1430 + }, + { + "epoch": 0.72, + "learning_rate": 8.263020557016447e-05, + "loss": 1.5064, + "step": 1431 + }, + { + "epoch": 0.72, + "learning_rate": 8.236274510196196e-05, + "loss": 1.4262, + "step": 1432 + }, + { + "epoch": 0.72, + "learning_rate": 8.209560591124654e-05, + "loss": 1.2924, + "step": 1433 + }, + { + "epoch": 0.72, + "learning_rate": 8.182878872759854e-05, + "loss": 1.6428, + "step": 1434 + }, + { + "epoch": 0.72, + "learning_rate": 8.15622942797186e-05, + "loss": 1.4808, + "step": 1435 + }, + { + "epoch": 0.72, + "learning_rate": 8.129612329542609e-05, + "loss": 1.5953, + "step": 1436 + }, + { + "epoch": 0.72, + "learning_rate": 8.103027650165685e-05, + "loss": 1.682, + "step": 1437 + }, + { + "epoch": 0.72, + "learning_rate": 8.076475462446138e-05, + "loss": 1.5173, + "step": 1438 + }, + { + "epoch": 0.72, + "learning_rate": 8.049955838900294e-05, + "loss": 1.6062, + "step": 1439 + }, + { + "epoch": 0.72, + "learning_rate": 8.023468851955527e-05, + "loss": 1.5373, + "step": 1440 + }, + { + "epoch": 0.72, + "learning_rate": 7.997014573950078e-05, + "loss": 1.5742, + "step": 1441 + }, + { + "epoch": 0.72, + "learning_rate": 7.970593077132869e-05, + "loss": 1.6135, + "step": 1442 + }, + { + "epoch": 0.72, + "learning_rate": 7.944204433663269e-05, + "loss": 1.4727, + "step": 1443 + }, + { + "epoch": 0.72, + "learning_rate": 7.917848715610962e-05, + "loss": 1.4902, + "step": 1444 + }, + { + "epoch": 0.72, + "learning_rate": 7.891525994955675e-05, + "loss": 1.7681, + "step": 1445 + }, + { + "epoch": 0.72, + "learning_rate": 7.865236343587028e-05, + "loss": 1.3804, + "step": 1446 + }, + { + "epoch": 0.72, + "learning_rate": 7.838979833304328e-05, + "loss": 1.6395, + "step": 1447 + }, + { + "epoch": 0.72, + "learning_rate": 7.812756535816358e-05, + "loss": 1.4848, + "step": 1448 + }, + { + "epoch": 0.72, + "learning_rate": 7.786566522741226e-05, + "loss": 1.5668, + "step": 1449 + }, + { + "epoch": 0.72, + "learning_rate": 7.760409865606102e-05, + "loss": 1.5328, + "step": 1450 + }, + { + "epoch": 0.73, + "learning_rate": 7.734286635847073e-05, + "loss": 1.4281, + "step": 1451 + }, + { + "epoch": 0.73, + "learning_rate": 7.708196904808935e-05, + "loss": 1.5234, + "step": 1452 + }, + { + "epoch": 0.73, + "learning_rate": 7.682140743744986e-05, + "loss": 1.4104, + "step": 1453 + }, + { + "epoch": 0.73, + "learning_rate": 7.65611822381686e-05, + "loss": 1.3889, + "step": 1454 + }, + { + "epoch": 0.73, + "learning_rate": 7.630129416094296e-05, + "loss": 1.5667, + "step": 1455 + }, + { + "epoch": 0.73, + "learning_rate": 7.604174391554973e-05, + "loss": 1.4768, + "step": 1456 + }, + { + "epoch": 0.73, + "learning_rate": 7.578253221084297e-05, + "loss": 1.6435, + "step": 1457 + }, + { + "epoch": 0.73, + "learning_rate": 7.552365975475217e-05, + "loss": 1.5696, + "step": 1458 + }, + { + "epoch": 0.73, + "learning_rate": 7.526512725428043e-05, + "loss": 1.7266, + "step": 1459 + }, + { + "epoch": 0.73, + "learning_rate": 7.500693541550224e-05, + "loss": 1.4111, + "step": 1460 + }, + { + "epoch": 0.73, + "learning_rate": 7.474908494356179e-05, + "loss": 1.5795, + "step": 1461 + }, + { + "epoch": 0.73, + "learning_rate": 7.449157654267093e-05, + "loss": 1.5585, + "step": 1462 + }, + { + "epoch": 0.73, + "learning_rate": 7.423441091610727e-05, + "loss": 1.3697, + "step": 1463 + }, + { + "epoch": 0.73, + "learning_rate": 7.39775887662125e-05, + "loss": 1.5309, + "step": 1464 + }, + { + "epoch": 0.73, + "learning_rate": 7.37211107943898e-05, + "loss": 1.5493, + "step": 1465 + }, + { + "epoch": 0.73, + "learning_rate": 7.346497770110282e-05, + "loss": 1.4651, + "step": 1466 + }, + { + "epoch": 0.73, + "learning_rate": 7.320919018587304e-05, + "loss": 1.4814, + "step": 1467 + }, + { + "epoch": 0.73, + "learning_rate": 7.295374894727818e-05, + "loss": 1.3825, + "step": 1468 + }, + { + "epoch": 0.73, + "learning_rate": 7.269865468295047e-05, + "loss": 1.4483, + "step": 1469 + }, + { + "epoch": 0.73, + "learning_rate": 7.244390808957409e-05, + "loss": 1.4974, + "step": 1470 + }, + { + "epoch": 0.74, + "learning_rate": 7.218950986288415e-05, + "loss": 1.418, + "step": 1471 + }, + { + "epoch": 0.74, + "learning_rate": 7.193546069766411e-05, + "loss": 1.3873, + "step": 1472 + }, + { + "epoch": 0.74, + "learning_rate": 7.168176128774399e-05, + "loss": 1.5701, + "step": 1473 + }, + { + "epoch": 0.74, + "learning_rate": 7.142841232599906e-05, + "loss": 1.5859, + "step": 1474 + }, + { + "epoch": 0.74, + "learning_rate": 7.117541450434686e-05, + "loss": 1.7759, + "step": 1475 + }, + { + "epoch": 0.74, + "learning_rate": 7.092276851374647e-05, + "loss": 1.6146, + "step": 1476 + }, + { + "epoch": 0.74, + "learning_rate": 7.067047504419579e-05, + "loss": 1.5228, + "step": 1477 + }, + { + "epoch": 0.74, + "learning_rate": 7.041853478473e-05, + "loss": 1.6275, + "step": 1478 + }, + { + "epoch": 0.74, + "learning_rate": 7.016694842341984e-05, + "loss": 1.6343, + "step": 1479 + }, + { + "epoch": 0.74, + "learning_rate": 6.991571664736916e-05, + "loss": 1.3821, + "step": 1480 + }, + { + "epoch": 0.74, + "learning_rate": 6.966484014271373e-05, + "loss": 1.494, + "step": 1481 + }, + { + "epoch": 0.74, + "learning_rate": 6.941431959461889e-05, + "loss": 1.4935, + "step": 1482 + }, + { + "epoch": 0.74, + "learning_rate": 6.916415568727777e-05, + "loss": 1.4673, + "step": 1483 + }, + { + "epoch": 0.74, + "learning_rate": 6.891434910390979e-05, + "loss": 1.5045, + "step": 1484 + }, + { + "epoch": 0.74, + "learning_rate": 6.8664900526758e-05, + "loss": 1.5553, + "step": 1485 + }, + { + "epoch": 0.74, + "learning_rate": 6.841581063708819e-05, + "loss": 1.3835, + "step": 1486 + }, + { + "epoch": 0.74, + "learning_rate": 6.816708011518623e-05, + "loss": 1.4642, + "step": 1487 + }, + { + "epoch": 0.74, + "learning_rate": 6.791870964035658e-05, + "loss": 1.7471, + "step": 1488 + }, + { + "epoch": 0.74, + "learning_rate": 6.76706998909206e-05, + "loss": 1.5005, + "step": 1489 + }, + { + "epoch": 0.74, + "learning_rate": 6.742305154421408e-05, + "loss": 1.7341, + "step": 1490 + }, + { + "epoch": 0.75, + "learning_rate": 6.717576527658618e-05, + "loss": 1.6091, + "step": 1491 + }, + { + "epoch": 0.75, + "learning_rate": 6.692884176339694e-05, + "loss": 1.3284, + "step": 1492 + }, + { + "epoch": 0.75, + "learning_rate": 6.668228167901573e-05, + "loss": 1.4199, + "step": 1493 + }, + { + "epoch": 0.75, + "learning_rate": 6.643608569681958e-05, + "loss": 1.6035, + "step": 1494 + }, + { + "epoch": 0.75, + "learning_rate": 6.61902544891907e-05, + "loss": 1.4549, + "step": 1495 + }, + { + "epoch": 0.75, + "learning_rate": 6.594478872751555e-05, + "loss": 1.4991, + "step": 1496 + }, + { + "epoch": 0.75, + "learning_rate": 6.569968908218207e-05, + "loss": 1.6751, + "step": 1497 + }, + { + "epoch": 0.75, + "learning_rate": 6.545495622257869e-05, + "loss": 1.4197, + "step": 1498 + }, + { + "epoch": 0.75, + "learning_rate": 6.521059081709191e-05, + "loss": 1.4976, + "step": 1499 + }, + { + "epoch": 0.75, + "learning_rate": 6.496659353310466e-05, + "loss": 1.3893, + "step": 1500 + }, + { + "epoch": 0.75, + "learning_rate": 6.472296503699473e-05, + "loss": 1.5723, + "step": 1501 + }, + { + "epoch": 0.75, + "learning_rate": 6.447970599413231e-05, + "loss": 1.4579, + "step": 1502 + }, + { + "epoch": 0.75, + "learning_rate": 6.423681706887903e-05, + "loss": 1.5235, + "step": 1503 + }, + { + "epoch": 0.75, + "learning_rate": 6.399429892458541e-05, + "loss": 1.3772, + "step": 1504 + }, + { + "epoch": 0.75, + "learning_rate": 6.375215222358935e-05, + "loss": 1.4735, + "step": 1505 + }, + { + "epoch": 0.75, + "learning_rate": 6.351037762721457e-05, + "loss": 1.5303, + "step": 1506 + }, + { + "epoch": 0.75, + "learning_rate": 6.326897579576807e-05, + "loss": 1.3491, + "step": 1507 + }, + { + "epoch": 0.75, + "learning_rate": 6.302794738853927e-05, + "loss": 1.5621, + "step": 1508 + }, + { + "epoch": 0.75, + "learning_rate": 6.27872930637975e-05, + "loss": 1.5875, + "step": 1509 + }, + { + "epoch": 0.75, + "learning_rate": 6.254701347879035e-05, + "loss": 1.4622, + "step": 1510 + }, + { + "epoch": 0.76, + "learning_rate": 6.230710928974232e-05, + "loss": 1.6829, + "step": 1511 + }, + { + "epoch": 0.76, + "learning_rate": 6.206758115185216e-05, + "loss": 1.5249, + "step": 1512 + }, + { + "epoch": 0.76, + "learning_rate": 6.18284297192921e-05, + "loss": 1.5523, + "step": 1513 + }, + { + "epoch": 0.76, + "learning_rate": 6.15896556452052e-05, + "loss": 1.4929, + "step": 1514 + }, + { + "epoch": 0.76, + "learning_rate": 6.135125958170405e-05, + "loss": 1.2468, + "step": 1515 + }, + { + "epoch": 0.76, + "learning_rate": 6.111324217986898e-05, + "loss": 1.4509, + "step": 1516 + }, + { + "epoch": 0.76, + "learning_rate": 6.087560408974584e-05, + "loss": 1.4516, + "step": 1517 + }, + { + "epoch": 0.76, + "learning_rate": 6.06383459603449e-05, + "loss": 1.6536, + "step": 1518 + }, + { + "epoch": 0.76, + "learning_rate": 6.040146843963852e-05, + "loss": 1.5844, + "step": 1519 + }, + { + "epoch": 0.76, + "learning_rate": 6.0164972174559586e-05, + "loss": 1.3181, + "step": 1520 + }, + { + "epoch": 0.76, + "learning_rate": 5.992885781099997e-05, + "loss": 1.4771, + "step": 1521 + }, + { + "epoch": 0.76, + "learning_rate": 5.96931259938081e-05, + "loss": 1.4503, + "step": 1522 + }, + { + "epoch": 0.76, + "learning_rate": 5.9457777366788084e-05, + "loss": 1.5759, + "step": 1523 + }, + { + "epoch": 0.76, + "learning_rate": 5.922281257269726e-05, + "loss": 1.3737, + "step": 1524 + }, + { + "epoch": 0.76, + "learning_rate": 5.8988232253244704e-05, + "loss": 1.5489, + "step": 1525 + }, + { + "epoch": 0.76, + "learning_rate": 5.875403704908964e-05, + "loss": 1.7822, + "step": 1526 + }, + { + "epoch": 0.76, + "learning_rate": 5.852022759983913e-05, + "loss": 1.3443, + "step": 1527 + }, + { + "epoch": 0.76, + "learning_rate": 5.828680454404714e-05, + "loss": 1.3229, + "step": 1528 + }, + { + "epoch": 0.76, + "learning_rate": 5.805376851921212e-05, + "loss": 1.4909, + "step": 1529 + }, + { + "epoch": 0.76, + "learning_rate": 5.7821120161775435e-05, + "loss": 1.5012, + "step": 1530 + }, + { + "epoch": 0.77, + "learning_rate": 5.758886010712003e-05, + "loss": 1.4465, + "step": 1531 + }, + { + "epoch": 0.77, + "learning_rate": 5.73569889895679e-05, + "loss": 1.3189, + "step": 1532 + }, + { + "epoch": 0.77, + "learning_rate": 5.712550744237923e-05, + "loss": 1.5343, + "step": 1533 + }, + { + "epoch": 0.77, + "learning_rate": 5.6894416097750034e-05, + "loss": 1.4563, + "step": 1534 + }, + { + "epoch": 0.77, + "learning_rate": 5.666371558681063e-05, + "loss": 1.4985, + "step": 1535 + }, + { + "epoch": 0.77, + "learning_rate": 5.643340653962421e-05, + "loss": 1.3521, + "step": 1536 + }, + { + "epoch": 0.77, + "learning_rate": 5.6203489585184375e-05, + "loss": 1.3999, + "step": 1537 + }, + { + "epoch": 0.77, + "learning_rate": 5.5973965351414346e-05, + "loss": 1.3275, + "step": 1538 + }, + { + "epoch": 0.77, + "learning_rate": 5.5744834465164564e-05, + "loss": 1.511, + "step": 1539 + }, + { + "epoch": 0.77, + "learning_rate": 5.551609755221112e-05, + "loss": 1.3924, + "step": 1540 + }, + { + "epoch": 0.77, + "learning_rate": 5.528775523725449e-05, + "loss": 1.5096, + "step": 1541 + }, + { + "epoch": 0.77, + "learning_rate": 5.5059808143916935e-05, + "loss": 1.5209, + "step": 1542 + }, + { + "epoch": 0.77, + "learning_rate": 5.483225689474183e-05, + "loss": 1.4124, + "step": 1543 + }, + { + "epoch": 0.77, + "learning_rate": 5.460510211119119e-05, + "loss": 1.5415, + "step": 1544 + }, + { + "epoch": 0.77, + "learning_rate": 5.4378344413644265e-05, + "loss": 1.4963, + "step": 1545 + }, + { + "epoch": 0.77, + "learning_rate": 5.415198442139604e-05, + "loss": 1.3565, + "step": 1546 + }, + { + "epoch": 0.77, + "learning_rate": 5.392602275265497e-05, + "loss": 1.6187, + "step": 1547 + }, + { + "epoch": 0.77, + "learning_rate": 5.370046002454201e-05, + "loss": 1.387, + "step": 1548 + }, + { + "epoch": 0.77, + "learning_rate": 5.347529685308836e-05, + "loss": 1.5391, + "step": 1549 + }, + { + "epoch": 0.77, + "learning_rate": 5.325053385323402e-05, + "loss": 1.5568, + "step": 1550 + }, + { + "epoch": 0.78, + "learning_rate": 5.302617163882626e-05, + "loss": 1.4682, + "step": 1551 + }, + { + "epoch": 0.78, + "learning_rate": 5.280221082261745e-05, + "loss": 1.4907, + "step": 1552 + }, + { + "epoch": 0.78, + "learning_rate": 5.257865201626404e-05, + "loss": 1.531, + "step": 1553 + }, + { + "epoch": 0.78, + "learning_rate": 5.235549583032435e-05, + "loss": 1.5039, + "step": 1554 + }, + { + "epoch": 0.78, + "learning_rate": 5.213274287425711e-05, + "loss": 1.5405, + "step": 1555 + }, + { + "epoch": 0.78, + "learning_rate": 5.1910393756420017e-05, + "loss": 1.5242, + "step": 1556 + }, + { + "epoch": 0.78, + "learning_rate": 5.168844908406749e-05, + "loss": 1.4804, + "step": 1557 + }, + { + "epoch": 0.78, + "learning_rate": 5.146690946334969e-05, + "loss": 1.4277, + "step": 1558 + }, + { + "epoch": 0.78, + "learning_rate": 5.12457754993104e-05, + "loss": 1.3221, + "step": 1559 + }, + { + "epoch": 0.78, + "learning_rate": 5.102504779588555e-05, + "loss": 1.6861, + "step": 1560 + }, + { + "epoch": 0.78, + "learning_rate": 5.0804726955901505e-05, + "loss": 1.5792, + "step": 1561 + }, + { + "epoch": 0.78, + "learning_rate": 5.058481358107343e-05, + "loss": 1.4429, + "step": 1562 + }, + { + "epoch": 0.78, + "learning_rate": 5.036530827200392e-05, + "loss": 1.6149, + "step": 1563 + }, + { + "epoch": 0.78, + "learning_rate": 5.014621162818061e-05, + "loss": 1.4497, + "step": 1564 + }, + { + "epoch": 0.78, + "learning_rate": 4.9927524247975555e-05, + "loss": 1.4262, + "step": 1565 + }, + { + "epoch": 0.78, + "learning_rate": 4.9709246728642747e-05, + "loss": 1.4911, + "step": 1566 + }, + { + "epoch": 0.78, + "learning_rate": 4.94913796663169e-05, + "loss": 1.4118, + "step": 1567 + }, + { + "epoch": 0.78, + "learning_rate": 4.927392365601189e-05, + "loss": 1.5412, + "step": 1568 + }, + { + "epoch": 0.78, + "learning_rate": 4.905687929161862e-05, + "loss": 1.5675, + "step": 1569 + }, + { + "epoch": 0.78, + "learning_rate": 4.884024716590412e-05, + "loss": 1.5406, + "step": 1570 + }, + { + "epoch": 0.79, + "learning_rate": 4.862402787050935e-05, + "loss": 1.5776, + "step": 1571 + }, + { + "epoch": 0.79, + "learning_rate": 4.840822199594781e-05, + "loss": 1.5056, + "step": 1572 + }, + { + "epoch": 0.79, + "learning_rate": 4.819283013160412e-05, + "loss": 1.6191, + "step": 1573 + }, + { + "epoch": 0.79, + "learning_rate": 4.79778528657318e-05, + "loss": 1.3349, + "step": 1574 + }, + { + "epoch": 0.79, + "learning_rate": 4.7763290785452476e-05, + "loss": 1.4504, + "step": 1575 + }, + { + "epoch": 0.79, + "learning_rate": 4.754914447675363e-05, + "loss": 1.5173, + "step": 1576 + }, + { + "epoch": 0.79, + "learning_rate": 4.7335414524487245e-05, + "loss": 1.604, + "step": 1577 + }, + { + "epoch": 0.79, + "learning_rate": 4.7122101512368424e-05, + "loss": 1.4894, + "step": 1578 + }, + { + "epoch": 0.79, + "learning_rate": 4.690920602297315e-05, + "loss": 1.5064, + "step": 1579 + }, + { + "epoch": 0.79, + "learning_rate": 4.669672863773758e-05, + "loss": 1.4289, + "step": 1580 + }, + { + "epoch": 0.79, + "learning_rate": 4.648466993695566e-05, + "loss": 1.3812, + "step": 1581 + }, + { + "epoch": 0.79, + "learning_rate": 4.627303049977796e-05, + "loss": 1.6637, + "step": 1582 + }, + { + "epoch": 0.79, + "learning_rate": 4.60618109042102e-05, + "loss": 1.4851, + "step": 1583 + }, + { + "epoch": 0.79, + "learning_rate": 4.58510117271111e-05, + "loss": 1.467, + "step": 1584 + }, + { + "epoch": 0.79, + "learning_rate": 4.5640633544191545e-05, + "loss": 1.5849, + "step": 1585 + }, + { + "epoch": 0.79, + "learning_rate": 4.5430676930012414e-05, + "loss": 1.4256, + "step": 1586 + }, + { + "epoch": 0.79, + "learning_rate": 4.522114245798332e-05, + "loss": 1.5615, + "step": 1587 + }, + { + "epoch": 0.79, + "learning_rate": 4.5012030700361085e-05, + "loss": 1.5331, + "step": 1588 + }, + { + "epoch": 0.79, + "learning_rate": 4.480334222824778e-05, + "loss": 1.3991, + "step": 1589 + }, + { + "epoch": 0.79, + "learning_rate": 4.4595077611589765e-05, + "loss": 1.5719, + "step": 1590 + }, + { + "epoch": 0.8, + "learning_rate": 4.438723741917559e-05, + "loss": 1.3425, + "step": 1591 + }, + { + "epoch": 0.8, + "learning_rate": 4.417982221863473e-05, + "loss": 1.6173, + "step": 1592 + }, + { + "epoch": 0.8, + "learning_rate": 4.3972832576436096e-05, + "loss": 1.5314, + "step": 1593 + }, + { + "epoch": 0.8, + "learning_rate": 4.376626905788608e-05, + "loss": 1.4374, + "step": 1594 + }, + { + "epoch": 0.8, + "learning_rate": 4.3560132227127606e-05, + "loss": 1.5889, + "step": 1595 + }, + { + "epoch": 0.8, + "learning_rate": 4.335442264713807e-05, + "loss": 1.3955, + "step": 1596 + }, + { + "epoch": 0.8, + "learning_rate": 4.314914087972808e-05, + "loss": 1.5982, + "step": 1597 + }, + { + "epoch": 0.8, + "learning_rate": 4.294428748553987e-05, + "loss": 1.4567, + "step": 1598 + }, + { + "epoch": 0.8, + "learning_rate": 4.2739863024045625e-05, + "loss": 1.7278, + "step": 1599 + }, + { + "epoch": 0.8, + "learning_rate": 4.2535868053546326e-05, + "loss": 1.4764, + "step": 1600 + }, + { + "epoch": 0.8, + "learning_rate": 4.2332303131169756e-05, + "loss": 1.4648, + "step": 1601 + }, + { + "epoch": 0.8, + "learning_rate": 4.212916881286926e-05, + "loss": 1.6258, + "step": 1602 + }, + { + "epoch": 0.8, + "learning_rate": 4.19264656534222e-05, + "loss": 1.6907, + "step": 1603 + }, + { + "epoch": 0.8, + "learning_rate": 4.1724194206428326e-05, + "loss": 1.5267, + "step": 1604 + }, + { + "epoch": 0.8, + "learning_rate": 4.15223550243085e-05, + "loss": 1.2853, + "step": 1605 + }, + { + "epoch": 0.8, + "learning_rate": 4.132094865830289e-05, + "loss": 1.3756, + "step": 1606 + }, + { + "epoch": 0.8, + "learning_rate": 4.1119975658469635e-05, + "loss": 1.5751, + "step": 1607 + }, + { + "epoch": 0.8, + "learning_rate": 4.091943657368333e-05, + "loss": 1.6317, + "step": 1608 + }, + { + "epoch": 0.8, + "learning_rate": 4.0719331951633486e-05, + "loss": 1.5201, + "step": 1609 + }, + { + "epoch": 0.8, + "learning_rate": 4.051966233882314e-05, + "loss": 1.539, + "step": 1610 + }, + { + "epoch": 0.81, + "learning_rate": 4.0320428280567213e-05, + "loss": 1.4511, + "step": 1611 + }, + { + "epoch": 0.81, + "learning_rate": 4.012163032099106e-05, + "loss": 1.5473, + "step": 1612 + }, + { + "epoch": 0.81, + "learning_rate": 3.992326900302907e-05, + "loss": 1.4899, + "step": 1613 + }, + { + "epoch": 0.81, + "learning_rate": 3.9725344868423054e-05, + "loss": 1.6457, + "step": 1614 + }, + { + "epoch": 0.81, + "learning_rate": 3.9527858457721e-05, + "loss": 1.5946, + "step": 1615 + }, + { + "epoch": 0.81, + "learning_rate": 3.933081031027528e-05, + "loss": 1.645, + "step": 1616 + }, + { + "epoch": 0.81, + "learning_rate": 3.9134200964241344e-05, + "loss": 1.2908, + "step": 1617 + }, + { + "epoch": 0.81, + "learning_rate": 3.893803095657631e-05, + "loss": 1.5992, + "step": 1618 + }, + { + "epoch": 0.81, + "learning_rate": 3.874230082303729e-05, + "loss": 1.3276, + "step": 1619 + }, + { + "epoch": 0.81, + "learning_rate": 3.8547011098180306e-05, + "loss": 1.441, + "step": 1620 + }, + { + "epoch": 0.81, + "learning_rate": 3.835216231535834e-05, + "loss": 1.4115, + "step": 1621 + }, + { + "epoch": 0.81, + "learning_rate": 3.815775500672028e-05, + "loss": 1.4233, + "step": 1622 + }, + { + "epoch": 0.81, + "learning_rate": 3.796378970320917e-05, + "loss": 1.5179, + "step": 1623 + }, + { + "epoch": 0.81, + "learning_rate": 3.7770266934561e-05, + "loss": 1.5091, + "step": 1624 + }, + { + "epoch": 0.81, + "learning_rate": 3.757718722930321e-05, + "loss": 1.5677, + "step": 1625 + }, + { + "epoch": 0.81, + "learning_rate": 3.7384551114753076e-05, + "loss": 1.5057, + "step": 1626 + }, + { + "epoch": 0.81, + "learning_rate": 3.719235911701642e-05, + "loss": 1.4919, + "step": 1627 + }, + { + "epoch": 0.81, + "learning_rate": 3.7000611760986193e-05, + "loss": 1.3167, + "step": 1628 + }, + { + "epoch": 0.81, + "learning_rate": 3.6809309570340946e-05, + "loss": 1.5101, + "step": 1629 + }, + { + "epoch": 0.81, + "learning_rate": 3.661845306754341e-05, + "loss": 1.4361, + "step": 1630 + }, + { + "epoch": 0.82, + "learning_rate": 3.6428042773839244e-05, + "loss": 1.5722, + "step": 1631 + }, + { + "epoch": 0.82, + "learning_rate": 3.6238079209255346e-05, + "loss": 1.3537, + "step": 1632 + }, + { + "epoch": 0.82, + "learning_rate": 3.604856289259857e-05, + "loss": 1.4854, + "step": 1633 + }, + { + "epoch": 0.82, + "learning_rate": 3.5859494341454344e-05, + "loss": 1.4207, + "step": 1634 + }, + { + "epoch": 0.82, + "learning_rate": 3.56708740721851e-05, + "loss": 1.3087, + "step": 1635 + }, + { + "epoch": 0.82, + "learning_rate": 3.548270259992921e-05, + "loss": 1.4631, + "step": 1636 + }, + { + "epoch": 0.82, + "learning_rate": 3.5294980438599066e-05, + "loss": 1.5242, + "step": 1637 + }, + { + "epoch": 0.82, + "learning_rate": 3.510770810088015e-05, + "loss": 1.5441, + "step": 1638 + }, + { + "epoch": 0.82, + "learning_rate": 3.492088609822934e-05, + "loss": 1.5334, + "step": 1639 + }, + { + "epoch": 0.82, + "learning_rate": 3.473451494087363e-05, + "loss": 1.3592, + "step": 1640 + }, + { + "epoch": 0.82, + "learning_rate": 3.454859513780868e-05, + "loss": 1.4713, + "step": 1641 + }, + { + "epoch": 0.82, + "learning_rate": 3.436312719679757e-05, + "loss": 1.4357, + "step": 1642 + }, + { + "epoch": 0.82, + "learning_rate": 3.417811162436926e-05, + "loss": 1.3486, + "step": 1643 + }, + { + "epoch": 0.82, + "learning_rate": 3.3993548925817166e-05, + "loss": 1.3834, + "step": 1644 + }, + { + "epoch": 0.82, + "learning_rate": 3.3809439605197955e-05, + "loss": 1.312, + "step": 1645 + }, + { + "epoch": 0.82, + "learning_rate": 3.362578416533002e-05, + "loss": 1.4625, + "step": 1646 + }, + { + "epoch": 0.82, + "learning_rate": 3.344258310779229e-05, + "loss": 1.4697, + "step": 1647 + }, + { + "epoch": 0.82, + "learning_rate": 3.3259836932922605e-05, + "loss": 1.4243, + "step": 1648 + }, + { + "epoch": 0.82, + "learning_rate": 3.307754613981655e-05, + "loss": 1.6532, + "step": 1649 + }, + { + "epoch": 0.82, + "learning_rate": 3.289571122632595e-05, + "loss": 1.5521, + "step": 1650 + }, + { + "epoch": 0.83, + "learning_rate": 3.271433268905764e-05, + "loss": 1.5757, + "step": 1651 + }, + { + "epoch": 0.83, + "learning_rate": 3.2533411023372085e-05, + "loss": 1.4505, + "step": 1652 + }, + { + "epoch": 0.83, + "learning_rate": 3.235294672338196e-05, + "loss": 1.5972, + "step": 1653 + }, + { + "epoch": 0.83, + "learning_rate": 3.2172940281950794e-05, + "loss": 1.4879, + "step": 1654 + }, + { + "epoch": 0.83, + "learning_rate": 3.199339219069173e-05, + "loss": 1.4165, + "step": 1655 + }, + { + "epoch": 0.83, + "learning_rate": 3.181430293996601e-05, + "loss": 1.3632, + "step": 1656 + }, + { + "epoch": 0.83, + "learning_rate": 3.163567301888195e-05, + "loss": 1.5905, + "step": 1657 + }, + { + "epoch": 0.83, + "learning_rate": 3.145750291529319e-05, + "loss": 1.4664, + "step": 1658 + }, + { + "epoch": 0.83, + "learning_rate": 3.127979311579765e-05, + "loss": 1.6243, + "step": 1659 + }, + { + "epoch": 0.83, + "learning_rate": 3.110254410573614e-05, + "loss": 1.3478, + "step": 1660 + }, + { + "epoch": 0.83, + "learning_rate": 3.092575636919097e-05, + "loss": 1.7581, + "step": 1661 + }, + { + "epoch": 0.83, + "learning_rate": 3.074943038898464e-05, + "loss": 1.4673, + "step": 1662 + }, + { + "epoch": 0.83, + "learning_rate": 3.057356664667872e-05, + "loss": 1.5611, + "step": 1663 + }, + { + "epoch": 0.83, + "learning_rate": 3.0398165622572205e-05, + "loss": 1.3356, + "step": 1664 + }, + { + "epoch": 0.83, + "learning_rate": 3.0223227795700416e-05, + "loss": 1.4599, + "step": 1665 + }, + { + "epoch": 0.83, + "learning_rate": 3.0048753643833637e-05, + "loss": 1.5329, + "step": 1666 + }, + { + "epoch": 0.83, + "learning_rate": 2.9874743643475776e-05, + "loss": 1.4561, + "step": 1667 + }, + { + "epoch": 0.83, + "learning_rate": 2.970119826986326e-05, + "loss": 1.497, + "step": 1668 + }, + { + "epoch": 0.83, + "learning_rate": 2.95281179969634e-05, + "loss": 1.505, + "step": 1669 + }, + { + "epoch": 0.83, + "learning_rate": 2.935550329747334e-05, + "loss": 1.3881, + "step": 1670 + }, + { + "epoch": 0.84, + "learning_rate": 2.9183354642818738e-05, + "loss": 1.5118, + "step": 1671 + }, + { + "epoch": 0.84, + "learning_rate": 2.9011672503152355e-05, + "loss": 1.5736, + "step": 1672 + }, + { + "epoch": 0.84, + "learning_rate": 2.884045734735301e-05, + "loss": 1.5678, + "step": 1673 + }, + { + "epoch": 0.84, + "learning_rate": 2.8669709643024e-05, + "loss": 1.4046, + "step": 1674 + }, + { + "epoch": 0.84, + "learning_rate": 2.8499429856492055e-05, + "loss": 1.5102, + "step": 1675 + }, + { + "epoch": 0.84, + "learning_rate": 2.8329618452805928e-05, + "loss": 1.4656, + "step": 1676 + }, + { + "epoch": 0.84, + "learning_rate": 2.8160275895735177e-05, + "loss": 1.6555, + "step": 1677 + }, + { + "epoch": 0.84, + "learning_rate": 2.7991402647769026e-05, + "loss": 1.5405, + "step": 1678 + }, + { + "epoch": 0.84, + "learning_rate": 2.7822999170114816e-05, + "loss": 1.5596, + "step": 1679 + }, + { + "epoch": 0.84, + "learning_rate": 2.7655065922696954e-05, + "loss": 1.5635, + "step": 1680 + }, + { + "epoch": 0.84, + "learning_rate": 2.7487603364155655e-05, + "loss": 1.3659, + "step": 1681 + }, + { + "epoch": 0.84, + "learning_rate": 2.7320611951845543e-05, + "loss": 1.3942, + "step": 1682 + }, + { + "epoch": 0.84, + "learning_rate": 2.7154092141834664e-05, + "loss": 1.5691, + "step": 1683 + }, + { + "epoch": 0.84, + "learning_rate": 2.6988044388902923e-05, + "loss": 1.5243, + "step": 1684 + }, + { + "epoch": 0.84, + "learning_rate": 2.6822469146541028e-05, + "loss": 1.3723, + "step": 1685 + }, + { + "epoch": 0.84, + "learning_rate": 2.6657366866949308e-05, + "loss": 1.454, + "step": 1686 + }, + { + "epoch": 0.84, + "learning_rate": 2.649273800103622e-05, + "loss": 1.567, + "step": 1687 + }, + { + "epoch": 0.84, + "learning_rate": 2.6328582998417516e-05, + "loss": 1.3523, + "step": 1688 + }, + { + "epoch": 0.84, + "learning_rate": 2.6164902307414596e-05, + "loss": 1.4375, + "step": 1689 + }, + { + "epoch": 0.84, + "learning_rate": 2.6001696375053563e-05, + "loss": 1.434, + "step": 1690 + }, + { + "epoch": 0.85, + "learning_rate": 2.5838965647063873e-05, + "loss": 1.4511, + "step": 1691 + }, + { + "epoch": 0.85, + "learning_rate": 2.5676710567877148e-05, + "loss": 1.4878, + "step": 1692 + }, + { + "epoch": 0.85, + "learning_rate": 2.5514931580626144e-05, + "loss": 1.5302, + "step": 1693 + }, + { + "epoch": 0.85, + "learning_rate": 2.535362912714301e-05, + "loss": 1.633, + "step": 1694 + }, + { + "epoch": 0.85, + "learning_rate": 2.5192803647958817e-05, + "loss": 1.4009, + "step": 1695 + }, + { + "epoch": 0.85, + "learning_rate": 2.5032455582301763e-05, + "loss": 1.4321, + "step": 1696 + }, + { + "epoch": 0.85, + "learning_rate": 2.4872585368096225e-05, + "loss": 1.4418, + "step": 1697 + }, + { + "epoch": 0.85, + "learning_rate": 2.4713193441961636e-05, + "loss": 1.479, + "step": 1698 + }, + { + "epoch": 0.85, + "learning_rate": 2.4554280239210915e-05, + "loss": 1.3998, + "step": 1699 + }, + { + "epoch": 0.85, + "learning_rate": 2.439584619384989e-05, + "loss": 1.4941, + "step": 1700 + }, + { + "epoch": 0.85, + "learning_rate": 2.4237891738575534e-05, + "loss": 1.5128, + "step": 1701 + }, + { + "epoch": 0.85, + "learning_rate": 2.408041730477506e-05, + "loss": 1.1936, + "step": 1702 + }, + { + "epoch": 0.85, + "learning_rate": 2.3923423322524818e-05, + "loss": 1.4058, + "step": 1703 + }, + { + "epoch": 0.85, + "learning_rate": 2.3766910220588767e-05, + "loss": 1.6485, + "step": 1704 + }, + { + "epoch": 0.85, + "learning_rate": 2.3610878426417803e-05, + "loss": 1.3647, + "step": 1705 + }, + { + "epoch": 0.85, + "learning_rate": 2.3455328366148188e-05, + "loss": 1.5596, + "step": 1706 + }, + { + "epoch": 0.85, + "learning_rate": 2.3300260464600497e-05, + "loss": 1.6531, + "step": 1707 + }, + { + "epoch": 0.85, + "learning_rate": 2.3145675145278655e-05, + "loss": 1.5407, + "step": 1708 + }, + { + "epoch": 0.85, + "learning_rate": 2.2991572830368325e-05, + "loss": 1.4708, + "step": 1709 + }, + { + "epoch": 0.85, + "learning_rate": 2.283795394073638e-05, + "loss": 1.5763, + "step": 1710 + }, + { + "epoch": 0.86, + "learning_rate": 2.2684818895929194e-05, + "loss": 1.5763, + "step": 1711 + }, + { + "epoch": 0.86, + "learning_rate": 2.2532168114171757e-05, + "loss": 1.4242, + "step": 1712 + }, + { + "epoch": 0.86, + "learning_rate": 2.238000201236654e-05, + "loss": 1.6482, + "step": 1713 + }, + { + "epoch": 0.86, + "learning_rate": 2.2228321006092247e-05, + "loss": 1.5563, + "step": 1714 + }, + { + "epoch": 0.86, + "learning_rate": 2.207712550960286e-05, + "loss": 1.641, + "step": 1715 + }, + { + "epoch": 0.86, + "learning_rate": 2.1926415935826295e-05, + "loss": 1.5114, + "step": 1716 + }, + { + "epoch": 0.86, + "learning_rate": 2.1776192696363372e-05, + "loss": 1.5597, + "step": 1717 + }, + { + "epoch": 0.86, + "learning_rate": 2.1626456201486712e-05, + "loss": 1.581, + "step": 1718 + }, + { + "epoch": 0.86, + "learning_rate": 2.1477206860139564e-05, + "loss": 1.4744, + "step": 1719 + }, + { + "epoch": 0.86, + "learning_rate": 2.1328445079934835e-05, + "loss": 1.4128, + "step": 1720 + }, + { + "epoch": 0.86, + "learning_rate": 2.1180171267153726e-05, + "loss": 1.5995, + "step": 1721 + }, + { + "epoch": 0.86, + "learning_rate": 2.1032385826744805e-05, + "loss": 1.3006, + "step": 1722 + }, + { + "epoch": 0.86, + "learning_rate": 2.0885089162322857e-05, + "loss": 1.7836, + "step": 1723 + }, + { + "epoch": 0.86, + "learning_rate": 2.0738281676167736e-05, + "loss": 1.4046, + "step": 1724 + }, + { + "epoch": 0.86, + "learning_rate": 2.059196376922348e-05, + "loss": 1.3634, + "step": 1725 + }, + { + "epoch": 0.86, + "learning_rate": 2.0446135841096713e-05, + "loss": 1.3388, + "step": 1726 + }, + { + "epoch": 0.86, + "learning_rate": 2.0300798290056223e-05, + "loss": 1.4948, + "step": 1727 + }, + { + "epoch": 0.86, + "learning_rate": 2.0155951513031357e-05, + "loss": 1.456, + "step": 1728 + }, + { + "epoch": 0.86, + "learning_rate": 2.0011595905611103e-05, + "loss": 1.3373, + "step": 1729 + }, + { + "epoch": 0.86, + "learning_rate": 1.9867731862043206e-05, + "loss": 1.7381, + "step": 1730 + }, + { + "epoch": 0.87, + "learning_rate": 1.9724359775232592e-05, + "loss": 1.5819, + "step": 1731 + }, + { + "epoch": 0.87, + "learning_rate": 1.958148003674094e-05, + "loss": 1.6322, + "step": 1732 + }, + { + "epoch": 0.87, + "learning_rate": 1.9439093036785105e-05, + "loss": 1.42, + "step": 1733 + }, + { + "epoch": 0.87, + "learning_rate": 1.9297199164236178e-05, + "loss": 1.5373, + "step": 1734 + }, + { + "epoch": 0.87, + "learning_rate": 1.915579880661873e-05, + "loss": 1.7121, + "step": 1735 + }, + { + "epoch": 0.87, + "learning_rate": 1.9014892350109113e-05, + "loss": 1.5765, + "step": 1736 + }, + { + "epoch": 0.87, + "learning_rate": 1.887448017953519e-05, + "loss": 1.7314, + "step": 1737 + }, + { + "epoch": 0.87, + "learning_rate": 1.8734562678374635e-05, + "loss": 1.3345, + "step": 1738 + }, + { + "epoch": 0.87, + "learning_rate": 1.8595140228754194e-05, + "loss": 1.3661, + "step": 1739 + }, + { + "epoch": 0.87, + "learning_rate": 1.845621321144866e-05, + "loss": 1.548, + "step": 1740 + }, + { + "epoch": 0.87, + "learning_rate": 1.8317782005879615e-05, + "loss": 1.433, + "step": 1741 + }, + { + "epoch": 0.87, + "learning_rate": 1.817984699011468e-05, + "loss": 1.3648, + "step": 1742 + }, + { + "epoch": 0.87, + "learning_rate": 1.8042408540866272e-05, + "loss": 1.4532, + "step": 1743 + }, + { + "epoch": 0.87, + "learning_rate": 1.7905467033490585e-05, + "loss": 1.5611, + "step": 1744 + }, + { + "epoch": 0.87, + "learning_rate": 1.7769022841986805e-05, + "loss": 1.6119, + "step": 1745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7633076338995647e-05, + "loss": 1.5655, + "step": 1746 + }, + { + "epoch": 0.87, + "learning_rate": 1.749762789579883e-05, + "loss": 1.536, + "step": 1747 + }, + { + "epoch": 0.87, + "learning_rate": 1.7362677882317713e-05, + "loss": 1.423, + "step": 1748 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228226667112368e-05, + "loss": 1.4794, + "step": 1749 + }, + { + "epoch": 0.87, + "learning_rate": 1.7094274617380755e-05, + "loss": 1.4831, + "step": 1750 + }, + { + "epoch": 0.88, + "learning_rate": 1.6960822098957374e-05, + "loss": 1.405, + "step": 1751 + }, + { + "epoch": 0.88, + "learning_rate": 1.6827869476312607e-05, + "loss": 1.45, + "step": 1752 + }, + { + "epoch": 0.88, + "learning_rate": 1.669541711255156e-05, + "loss": 1.6667, + "step": 1753 + }, + { + "epoch": 0.88, + "learning_rate": 1.656346536941298e-05, + "loss": 1.631, + "step": 1754 + }, + { + "epoch": 0.88, + "learning_rate": 1.6432014607268555e-05, + "loss": 1.4798, + "step": 1755 + }, + { + "epoch": 0.88, + "learning_rate": 1.6301065185121554e-05, + "loss": 1.4823, + "step": 1756 + }, + { + "epoch": 0.88, + "learning_rate": 1.6170617460606197e-05, + "loss": 1.486, + "step": 1757 + }, + { + "epoch": 0.88, + "learning_rate": 1.6040671789986472e-05, + "loss": 1.3193, + "step": 1758 + }, + { + "epoch": 0.88, + "learning_rate": 1.5911228528155164e-05, + "loss": 1.4193, + "step": 1759 + }, + { + "epoch": 0.88, + "learning_rate": 1.5782288028633085e-05, + "loss": 1.5819, + "step": 1760 + }, + { + "epoch": 0.88, + "learning_rate": 1.5653850643567725e-05, + "loss": 1.6686, + "step": 1761 + }, + { + "epoch": 0.88, + "learning_rate": 1.5525916723732737e-05, + "loss": 1.4264, + "step": 1762 + }, + { + "epoch": 0.88, + "learning_rate": 1.5398486618526674e-05, + "loss": 1.4875, + "step": 1763 + }, + { + "epoch": 0.88, + "learning_rate": 1.5271560675972128e-05, + "loss": 1.5749, + "step": 1764 + }, + { + "epoch": 0.88, + "learning_rate": 1.5145139242714857e-05, + "loss": 1.4587, + "step": 1765 + }, + { + "epoch": 0.88, + "learning_rate": 1.501922266402258e-05, + "loss": 1.4071, + "step": 1766 + }, + { + "epoch": 0.88, + "learning_rate": 1.4893811283784443e-05, + "loss": 1.5193, + "step": 1767 + }, + { + "epoch": 0.88, + "learning_rate": 1.4768905444509684e-05, + "loss": 1.5266, + "step": 1768 + }, + { + "epoch": 0.88, + "learning_rate": 1.4644505487326898e-05, + "loss": 1.658, + "step": 1769 + }, + { + "epoch": 0.88, + "learning_rate": 1.4520611751983182e-05, + "loss": 1.5443, + "step": 1770 + }, + { + "epoch": 0.89, + "learning_rate": 1.4397224576842916e-05, + "loss": 1.4865, + "step": 1771 + }, + { + "epoch": 0.89, + "learning_rate": 1.4274344298887166e-05, + "loss": 1.7879, + "step": 1772 + }, + { + "epoch": 0.89, + "learning_rate": 1.415197125371257e-05, + "loss": 1.4343, + "step": 1773 + }, + { + "epoch": 0.89, + "learning_rate": 1.403010577553041e-05, + "loss": 1.5379, + "step": 1774 + }, + { + "epoch": 0.89, + "learning_rate": 1.3908748197165944e-05, + "loss": 1.3095, + "step": 1775 + }, + { + "epoch": 0.89, + "learning_rate": 1.3787898850057024e-05, + "loss": 1.3313, + "step": 1776 + }, + { + "epoch": 0.89, + "learning_rate": 1.3667558064253727e-05, + "loss": 1.5196, + "step": 1777 + }, + { + "epoch": 0.89, + "learning_rate": 1.354772616841713e-05, + "loss": 1.4472, + "step": 1778 + }, + { + "epoch": 0.89, + "learning_rate": 1.3428403489818398e-05, + "loss": 1.3119, + "step": 1779 + }, + { + "epoch": 0.89, + "learning_rate": 1.3309590354338164e-05, + "loss": 1.5333, + "step": 1780 + }, + { + "epoch": 0.89, + "learning_rate": 1.3191287086465243e-05, + "loss": 1.6633, + "step": 1781 + }, + { + "epoch": 0.89, + "learning_rate": 1.3073494009296139e-05, + "loss": 1.362, + "step": 1782 + }, + { + "epoch": 0.89, + "learning_rate": 1.2956211444533827e-05, + "loss": 1.3867, + "step": 1783 + }, + { + "epoch": 0.89, + "learning_rate": 1.2839439712487133e-05, + "loss": 1.3955, + "step": 1784 + }, + { + "epoch": 0.89, + "learning_rate": 1.2723179132069817e-05, + "loss": 1.5394, + "step": 1785 + }, + { + "epoch": 0.89, + "learning_rate": 1.2607430020799405e-05, + "loss": 1.4558, + "step": 1786 + }, + { + "epoch": 0.89, + "learning_rate": 1.2492192694796822e-05, + "loss": 1.4158, + "step": 1787 + }, + { + "epoch": 0.89, + "learning_rate": 1.237746746878503e-05, + "loss": 1.4332, + "step": 1788 + }, + { + "epoch": 0.89, + "learning_rate": 1.2263254656088618e-05, + "loss": 1.4337, + "step": 1789 + }, + { + "epoch": 0.89, + "learning_rate": 1.214955456863256e-05, + "loss": 1.4629, + "step": 1790 + }, + { + "epoch": 0.9, + "learning_rate": 1.2036367516941594e-05, + "loss": 1.4211, + "step": 1791 + }, + { + "epoch": 0.9, + "learning_rate": 1.1923693810139424e-05, + "loss": 1.3978, + "step": 1792 + }, + { + "epoch": 0.9, + "learning_rate": 1.1811533755947502e-05, + "loss": 1.5042, + "step": 1793 + }, + { + "epoch": 0.9, + "learning_rate": 1.1699887660684705e-05, + "loss": 1.6496, + "step": 1794 + }, + { + "epoch": 0.9, + "learning_rate": 1.1588755829266152e-05, + "loss": 1.4824, + "step": 1795 + }, + { + "epoch": 0.9, + "learning_rate": 1.147813856520239e-05, + "loss": 1.5267, + "step": 1796 + }, + { + "epoch": 0.9, + "learning_rate": 1.1368036170598828e-05, + "loss": 1.491, + "step": 1797 + }, + { + "epoch": 0.9, + "learning_rate": 1.1258448946154444e-05, + "loss": 1.4966, + "step": 1798 + }, + { + "epoch": 0.9, + "learning_rate": 1.1149377191161491e-05, + "loss": 1.555, + "step": 1799 + }, + { + "epoch": 0.9, + "learning_rate": 1.1040821203504336e-05, + "loss": 1.4457, + "step": 1800 + }, + { + "epoch": 0.9, + "learning_rate": 1.093278127965871e-05, + "loss": 1.5241, + "step": 1801 + }, + { + "epoch": 0.9, + "learning_rate": 1.0825257714691029e-05, + "loss": 1.357, + "step": 1802 + }, + { + "epoch": 0.9, + "learning_rate": 1.0718250802257302e-05, + "loss": 1.3904, + "step": 1803 + }, + { + "epoch": 0.9, + "learning_rate": 1.0611760834602757e-05, + "loss": 1.3859, + "step": 1804 + }, + { + "epoch": 0.9, + "learning_rate": 1.0505788102560598e-05, + "loss": 1.2844, + "step": 1805 + }, + { + "epoch": 0.9, + "learning_rate": 1.0400332895551513e-05, + "loss": 1.5293, + "step": 1806 + }, + { + "epoch": 0.9, + "learning_rate": 1.0295395501582805e-05, + "loss": 1.5423, + "step": 1807 + }, + { + "epoch": 0.9, + "learning_rate": 1.0190976207247472e-05, + "loss": 1.511, + "step": 1808 + }, + { + "epoch": 0.9, + "learning_rate": 1.008707529772368e-05, + "loss": 1.3079, + "step": 1809 + }, + { + "epoch": 0.9, + "learning_rate": 9.983693056773757e-06, + "loss": 1.5143, + "step": 1810 + }, + { + "epoch": 0.91, + "learning_rate": 9.880829766743471e-06, + "loss": 1.6085, + "step": 1811 + }, + { + "epoch": 0.91, + "learning_rate": 9.778485708561436e-06, + "loss": 1.4507, + "step": 1812 + }, + { + "epoch": 0.91, + "learning_rate": 9.676661161737977e-06, + "loss": 1.4321, + "step": 1813 + }, + { + "epoch": 0.91, + "learning_rate": 9.575356404364799e-06, + "loss": 1.5854, + "step": 1814 + }, + { + "epoch": 0.91, + "learning_rate": 9.4745717131139e-06, + "loss": 1.721, + "step": 1815 + }, + { + "epoch": 0.91, + "learning_rate": 9.374307363236946e-06, + "loss": 1.2434, + "step": 1816 + }, + { + "epoch": 0.91, + "learning_rate": 9.274563628564581e-06, + "loss": 1.498, + "step": 1817 + }, + { + "epoch": 0.91, + "learning_rate": 9.17534078150546e-06, + "loss": 1.68, + "step": 1818 + }, + { + "epoch": 0.91, + "learning_rate": 9.076639093045814e-06, + "loss": 1.4945, + "step": 1819 + }, + { + "epoch": 0.91, + "learning_rate": 8.978458832748437e-06, + "loss": 1.4391, + "step": 1820 + }, + { + "epoch": 0.91, + "learning_rate": 8.880800268752043e-06, + "loss": 1.4764, + "step": 1821 + }, + { + "epoch": 0.91, + "learning_rate": 8.783663667770703e-06, + "loss": 1.6106, + "step": 1822 + }, + { + "epoch": 0.91, + "learning_rate": 8.687049295092698e-06, + "loss": 1.5949, + "step": 1823 + }, + { + "epoch": 0.91, + "learning_rate": 8.590957414580336e-06, + "loss": 1.5353, + "step": 1824 + }, + { + "epoch": 0.91, + "learning_rate": 8.495388288668782e-06, + "loss": 1.5349, + "step": 1825 + }, + { + "epoch": 0.91, + "learning_rate": 8.400342178365562e-06, + "loss": 1.3926, + "step": 1826 + }, + { + "epoch": 0.91, + "learning_rate": 8.305819343249788e-06, + "loss": 1.7055, + "step": 1827 + }, + { + "epoch": 0.91, + "learning_rate": 8.211820041471475e-06, + "loss": 1.5366, + "step": 1828 + }, + { + "epoch": 0.91, + "learning_rate": 8.118344529750843e-06, + "loss": 1.4641, + "step": 1829 + }, + { + "epoch": 0.91, + "learning_rate": 8.025393063377574e-06, + "loss": 1.4215, + "step": 1830 + }, + { + "epoch": 0.92, + "learning_rate": 7.932965896210109e-06, + "loss": 1.4099, + "step": 1831 + }, + { + "epoch": 0.92, + "learning_rate": 7.841063280675042e-06, + "loss": 1.5401, + "step": 1832 + }, + { + "epoch": 0.92, + "learning_rate": 7.74968546776631e-06, + "loss": 1.4222, + "step": 1833 + }, + { + "epoch": 0.92, + "learning_rate": 7.658832707044638e-06, + "loss": 1.5288, + "step": 1834 + }, + { + "epoch": 0.92, + "learning_rate": 7.568505246636748e-06, + "loss": 1.634, + "step": 1835 + }, + { + "epoch": 0.92, + "learning_rate": 7.4787033332346825e-06, + "loss": 1.2635, + "step": 1836 + }, + { + "epoch": 0.92, + "learning_rate": 7.389427212095234e-06, + "loss": 1.526, + "step": 1837 + }, + { + "epoch": 0.92, + "learning_rate": 7.3006771270391685e-06, + "loss": 1.5969, + "step": 1838 + }, + { + "epoch": 0.92, + "learning_rate": 7.212453320450685e-06, + "loss": 1.4708, + "step": 1839 + }, + { + "epoch": 0.92, + "learning_rate": 7.124756033276536e-06, + "loss": 1.4838, + "step": 1840 + }, + { + "epoch": 0.92, + "learning_rate": 7.037585505025601e-06, + "loss": 1.5253, + "step": 1841 + }, + { + "epoch": 0.92, + "learning_rate": 6.950941973768132e-06, + "loss": 1.4627, + "step": 1842 + }, + { + "epoch": 0.92, + "learning_rate": 6.8648256761350224e-06, + "loss": 1.5297, + "step": 1843 + }, + { + "epoch": 0.92, + "learning_rate": 6.779236847317383e-06, + "loss": 1.5432, + "step": 1844 + }, + { + "epoch": 0.92, + "learning_rate": 6.694175721065632e-06, + "loss": 1.5786, + "step": 1845 + }, + { + "epoch": 0.92, + "learning_rate": 6.6096425296890755e-06, + "loss": 1.3737, + "step": 1846 + }, + { + "epoch": 0.92, + "learning_rate": 6.525637504055127e-06, + "loss": 1.5205, + "step": 1847 + }, + { + "epoch": 0.92, + "learning_rate": 6.442160873588732e-06, + "loss": 1.5186, + "step": 1848 + }, + { + "epoch": 0.92, + "learning_rate": 6.359212866271836e-06, + "loss": 1.4775, + "step": 1849 + }, + { + "epoch": 0.92, + "learning_rate": 6.2767937086425366e-06, + "loss": 1.4804, + "step": 1850 + }, + { + "epoch": 0.93, + "learning_rate": 6.19490362579469e-06, + "loss": 1.5721, + "step": 1851 + }, + { + "epoch": 0.93, + "learning_rate": 6.1135428413771515e-06, + "loss": 1.5261, + "step": 1852 + }, + { + "epoch": 0.93, + "learning_rate": 6.032711577593264e-06, + "loss": 1.4337, + "step": 1853 + }, + { + "epoch": 0.93, + "learning_rate": 5.952410055200153e-06, + "loss": 1.5172, + "step": 1854 + }, + { + "epoch": 0.93, + "learning_rate": 5.872638493508231e-06, + "loss": 1.4595, + "step": 1855 + }, + { + "epoch": 0.93, + "learning_rate": 5.793397110380516e-06, + "loss": 1.4478, + "step": 1856 + }, + { + "epoch": 0.93, + "learning_rate": 5.714686122232027e-06, + "loss": 1.4076, + "step": 1857 + }, + { + "epoch": 0.93, + "learning_rate": 5.636505744029252e-06, + "loss": 1.6518, + "step": 1858 + }, + { + "epoch": 0.93, + "learning_rate": 5.558856189289596e-06, + "loss": 1.3948, + "step": 1859 + }, + { + "epoch": 0.93, + "learning_rate": 5.481737670080622e-06, + "loss": 1.5249, + "step": 1860 + }, + { + "epoch": 0.93, + "learning_rate": 5.4051503970197205e-06, + "loss": 1.2953, + "step": 1861 + }, + { + "epoch": 0.93, + "learning_rate": 5.329094579273308e-06, + "loss": 1.3525, + "step": 1862 + }, + { + "epoch": 0.93, + "learning_rate": 5.2535704245563866e-06, + "loss": 1.5081, + "step": 1863 + }, + { + "epoch": 0.93, + "learning_rate": 5.178578139131918e-06, + "loss": 1.6195, + "step": 1864 + }, + { + "epoch": 0.93, + "learning_rate": 5.104117927810314e-06, + "loss": 1.5943, + "step": 1865 + }, + { + "epoch": 0.93, + "learning_rate": 5.030189993948841e-06, + "loss": 1.2921, + "step": 1866 + }, + { + "epoch": 0.93, + "learning_rate": 4.95679453945106e-06, + "loss": 1.5332, + "step": 1867 + }, + { + "epoch": 0.93, + "learning_rate": 4.883931764766292e-06, + "loss": 1.4457, + "step": 1868 + }, + { + "epoch": 0.93, + "learning_rate": 4.8116018688890265e-06, + "loss": 1.4234, + "step": 1869 + }, + { + "epoch": 0.93, + "learning_rate": 4.73980504935847e-06, + "loss": 1.5769, + "step": 1870 + }, + { + "epoch": 0.94, + "learning_rate": 4.6685415022579286e-06, + "loss": 1.5136, + "step": 1871 + }, + { + "epoch": 0.94, + "learning_rate": 4.597811422214293e-06, + "loss": 1.6175, + "step": 1872 + }, + { + "epoch": 0.94, + "learning_rate": 4.527615002397489e-06, + "loss": 1.3397, + "step": 1873 + }, + { + "epoch": 0.94, + "learning_rate": 4.4579524345200295e-06, + "loss": 1.6076, + "step": 1874 + }, + { + "epoch": 0.94, + "learning_rate": 4.388823908836348e-06, + "loss": 1.5147, + "step": 1875 + }, + { + "epoch": 0.94, + "learning_rate": 4.320229614142468e-06, + "loss": 1.571, + "step": 1876 + }, + { + "epoch": 0.94, + "learning_rate": 4.252169737775291e-06, + "loss": 1.8309, + "step": 1877 + }, + { + "epoch": 0.94, + "learning_rate": 4.184644465612198e-06, + "loss": 1.5114, + "step": 1878 + }, + { + "epoch": 0.94, + "learning_rate": 4.1176539820705575e-06, + "loss": 1.5716, + "step": 1879 + }, + { + "epoch": 0.94, + "learning_rate": 4.051198470107109e-06, + "loss": 1.496, + "step": 1880 + }, + { + "epoch": 0.94, + "learning_rate": 3.98527811121765e-06, + "loss": 1.4383, + "step": 1881 + }, + { + "epoch": 0.94, + "learning_rate": 3.919893085436322e-06, + "loss": 1.4402, + "step": 1882 + }, + { + "epoch": 0.94, + "learning_rate": 3.855043571335282e-06, + "loss": 1.4978, + "step": 1883 + }, + { + "epoch": 0.94, + "learning_rate": 3.7907297460241465e-06, + "loss": 1.3977, + "step": 1884 + }, + { + "epoch": 0.94, + "learning_rate": 3.7269517851494797e-06, + "loss": 1.5101, + "step": 1885 + }, + { + "epoch": 0.94, + "learning_rate": 3.6637098628944598e-06, + "loss": 1.776, + "step": 1886 + }, + { + "epoch": 0.94, + "learning_rate": 3.6010041519782157e-06, + "loss": 1.4289, + "step": 1887 + }, + { + "epoch": 0.94, + "learning_rate": 3.5388348236554016e-06, + "loss": 1.5133, + "step": 1888 + }, + { + "epoch": 0.94, + "learning_rate": 3.4772020477158663e-06, + "loss": 1.3617, + "step": 1889 + }, + { + "epoch": 0.94, + "learning_rate": 3.416105992484009e-06, + "loss": 1.3404, + "step": 1890 + }, + { + "epoch": 0.95, + "learning_rate": 3.3555468248184006e-06, + "loss": 1.4797, + "step": 1891 + }, + { + "epoch": 0.95, + "learning_rate": 3.2955247101114082e-06, + "loss": 1.539, + "step": 1892 + }, + { + "epoch": 0.95, + "learning_rate": 3.236039812288594e-06, + "loss": 1.4868, + "step": 1893 + }, + { + "epoch": 0.95, + "learning_rate": 3.1770922938082926e-06, + "loss": 1.4764, + "step": 1894 + }, + { + "epoch": 0.95, + "learning_rate": 3.1186823156613254e-06, + "loss": 1.3227, + "step": 1895 + }, + { + "epoch": 0.95, + "learning_rate": 3.0608100373703095e-06, + "loss": 1.6015, + "step": 1896 + }, + { + "epoch": 0.95, + "learning_rate": 3.0034756169894816e-06, + "loss": 1.5561, + "step": 1897 + }, + { + "epoch": 0.95, + "learning_rate": 2.946679211104053e-06, + "loss": 1.5038, + "step": 1898 + }, + { + "epoch": 0.95, + "learning_rate": 2.8904209748299436e-06, + "loss": 1.3569, + "step": 1899 + }, + { + "epoch": 0.95, + "learning_rate": 2.8347010618132275e-06, + "loss": 1.6143, + "step": 1900 + }, + { + "epoch": 0.95, + "learning_rate": 2.779519624229776e-06, + "loss": 1.3514, + "step": 1901 + }, + { + "epoch": 0.95, + "learning_rate": 2.7248768127848824e-06, + "loss": 1.549, + "step": 1902 + }, + { + "epoch": 0.95, + "learning_rate": 2.670772776712771e-06, + "loss": 1.4779, + "step": 1903 + }, + { + "epoch": 0.95, + "learning_rate": 2.6172076637762e-06, + "loss": 1.599, + "step": 1904 + }, + { + "epoch": 0.95, + "learning_rate": 2.5641816202661485e-06, + "loss": 1.401, + "step": 1905 + }, + { + "epoch": 0.95, + "learning_rate": 2.5116947910012844e-06, + "loss": 1.7322, + "step": 1906 + }, + { + "epoch": 0.95, + "learning_rate": 2.4597473193276986e-06, + "loss": 1.4035, + "step": 1907 + }, + { + "epoch": 0.95, + "learning_rate": 2.4083393471183936e-06, + "loss": 1.6523, + "step": 1908 + }, + { + "epoch": 0.95, + "learning_rate": 2.3574710147729717e-06, + "loss": 1.4911, + "step": 1909 + }, + { + "epoch": 0.95, + "learning_rate": 2.30714246121726e-06, + "loss": 1.4443, + "step": 1910 + }, + { + "epoch": 0.96, + "learning_rate": 2.257353823902819e-06, + "loss": 1.5876, + "step": 1911 + }, + { + "epoch": 0.96, + "learning_rate": 2.2081052388067902e-06, + "loss": 1.4541, + "step": 1912 + }, + { + "epoch": 0.96, + "learning_rate": 2.159396840431249e-06, + "loss": 1.4649, + "step": 1913 + }, + { + "epoch": 0.96, + "learning_rate": 2.1112287618030523e-06, + "loss": 1.4844, + "step": 1914 + }, + { + "epoch": 0.96, + "learning_rate": 2.0636011344733696e-06, + "loss": 1.5646, + "step": 1915 + }, + { + "epoch": 0.96, + "learning_rate": 2.0165140885173516e-06, + "loss": 1.2648, + "step": 1916 + }, + { + "epoch": 0.96, + "learning_rate": 1.969967752533819e-06, + "loss": 1.5677, + "step": 1917 + }, + { + "epoch": 0.96, + "learning_rate": 1.9239622536447953e-06, + "loss": 1.4764, + "step": 1918 + }, + { + "epoch": 0.96, + "learning_rate": 1.8784977174952646e-06, + "loss": 1.51, + "step": 1919 + }, + { + "epoch": 0.96, + "learning_rate": 1.8335742682527912e-06, + "loss": 1.4352, + "step": 1920 + }, + { + "epoch": 0.96, + "learning_rate": 1.7891920286072117e-06, + "loss": 1.5023, + "step": 1921 + }, + { + "epoch": 0.96, + "learning_rate": 1.7453511197702554e-06, + "loss": 1.4443, + "step": 1922 + }, + { + "epoch": 0.96, + "learning_rate": 1.702051661475168e-06, + "loss": 1.3801, + "step": 1923 + }, + { + "epoch": 0.96, + "learning_rate": 1.6592937719765778e-06, + "loss": 1.5101, + "step": 1924 + }, + { + "epoch": 0.96, + "learning_rate": 1.6170775680499183e-06, + "loss": 1.323, + "step": 1925 + }, + { + "epoch": 0.96, + "learning_rate": 1.575403164991296e-06, + "loss": 1.1511, + "step": 1926 + }, + { + "epoch": 0.96, + "learning_rate": 1.5342706766171112e-06, + "loss": 1.4834, + "step": 1927 + }, + { + "epoch": 0.96, + "learning_rate": 1.4936802152637264e-06, + "loss": 1.3955, + "step": 1928 + }, + { + "epoch": 0.96, + "learning_rate": 1.4536318917871773e-06, + "loss": 1.4266, + "step": 1929 + }, + { + "epoch": 0.96, + "learning_rate": 1.4141258155629278e-06, + "loss": 1.4888, + "step": 1930 + }, + { + "epoch": 0.97, + "learning_rate": 1.375162094485427e-06, + "loss": 1.4385, + "step": 1931 + }, + { + "epoch": 0.97, + "learning_rate": 1.33674083496802e-06, + "loss": 1.6035, + "step": 1932 + }, + { + "epoch": 0.97, + "learning_rate": 1.2988621419424141e-06, + "loss": 1.3936, + "step": 1933 + }, + { + "epoch": 0.97, + "learning_rate": 1.2615261188586136e-06, + "loss": 1.53, + "step": 1934 + }, + { + "epoch": 0.97, + "learning_rate": 1.2247328676845415e-06, + "loss": 1.5621, + "step": 1935 + }, + { + "epoch": 0.97, + "learning_rate": 1.1884824889057066e-06, + "loss": 1.3635, + "step": 1936 + }, + { + "epoch": 0.97, + "learning_rate": 1.1527750815250481e-06, + "loss": 1.2758, + "step": 1937 + }, + { + "epoch": 0.97, + "learning_rate": 1.117610743062536e-06, + "loss": 1.518, + "step": 1938 + }, + { + "epoch": 0.97, + "learning_rate": 1.0829895695550374e-06, + "loss": 1.4032, + "step": 1939 + }, + { + "epoch": 0.97, + "learning_rate": 1.048911655555962e-06, + "loss": 1.7788, + "step": 1940 + }, + { + "epoch": 0.97, + "learning_rate": 1.0153770941350172e-06, + "loss": 1.4456, + "step": 1941 + }, + { + "epoch": 0.97, + "learning_rate": 9.823859768780086e-07, + "loss": 1.4624, + "step": 1942 + }, + { + "epoch": 0.97, + "learning_rate": 9.499383938864847e-07, + "loss": 1.3749, + "step": 1943 + }, + { + "epoch": 0.97, + "learning_rate": 9.180344337776259e-07, + "loss": 1.4968, + "step": 1944 + }, + { + "epoch": 0.97, + "learning_rate": 8.866741836838888e-07, + "loss": 1.6657, + "step": 1945 + }, + { + "epoch": 0.97, + "learning_rate": 8.558577292528069e-07, + "loss": 1.6438, + "step": 1946 + }, + { + "epoch": 0.97, + "learning_rate": 8.255851546467908e-07, + "loss": 1.3037, + "step": 1947 + }, + { + "epoch": 0.97, + "learning_rate": 7.958565425428388e-07, + "loss": 1.6018, + "step": 1948 + }, + { + "epoch": 0.97, + "learning_rate": 7.666719741323824e-07, + "loss": 1.392, + "step": 1949 + }, + { + "epoch": 0.97, + "learning_rate": 7.380315291209528e-07, + "loss": 1.5374, + "step": 1950 + }, + { + "epoch": 0.98, + "learning_rate": 7.099352857281138e-07, + "loss": 1.3997, + "step": 1951 + }, + { + "epoch": 0.98, + "learning_rate": 6.823833206871077e-07, + "loss": 1.4063, + "step": 1952 + }, + { + "epoch": 0.98, + "learning_rate": 6.55375709244721e-07, + "loss": 1.5195, + "step": 1953 + }, + { + "epoch": 0.98, + "learning_rate": 6.289125251610851e-07, + "loss": 1.5045, + "step": 1954 + }, + { + "epoch": 0.98, + "learning_rate": 6.029938407094316e-07, + "loss": 1.532, + "step": 1955 + }, + { + "epoch": 0.98, + "learning_rate": 5.776197266759376e-07, + "loss": 1.6174, + "step": 1956 + }, + { + "epoch": 0.98, + "learning_rate": 5.527902523594586e-07, + "loss": 1.4325, + "step": 1957 + }, + { + "epoch": 0.98, + "learning_rate": 5.285054855714622e-07, + "loss": 1.4119, + "step": 1958 + }, + { + "epoch": 0.98, + "learning_rate": 5.047654926357392e-07, + "loss": 1.4863, + "step": 1959 + }, + { + "epoch": 0.98, + "learning_rate": 4.815703383882264e-07, + "loss": 1.4364, + "step": 1960 + }, + { + "epoch": 0.98, + "learning_rate": 4.589200861769394e-07, + "loss": 1.5309, + "step": 1961 + }, + { + "epoch": 0.98, + "learning_rate": 4.368147978616399e-07, + "loss": 1.5563, + "step": 1962 + }, + { + "epoch": 0.98, + "learning_rate": 4.152545338137692e-07, + "loss": 1.5772, + "step": 1963 + }, + { + "epoch": 0.98, + "learning_rate": 3.9423935291629244e-07, + "loss": 1.2974, + "step": 1964 + }, + { + "epoch": 0.98, + "learning_rate": 3.737693125634545e-07, + "loss": 1.5887, + "step": 1965 + }, + { + "epoch": 0.98, + "learning_rate": 3.538444686607356e-07, + "loss": 1.3868, + "step": 1966 + }, + { + "epoch": 0.98, + "learning_rate": 3.3446487562456276e-07, + "loss": 1.4866, + "step": 1967 + }, + { + "epoch": 0.98, + "learning_rate": 3.1563058638230947e-07, + "loss": 1.4536, + "step": 1968 + }, + { + "epoch": 0.98, + "learning_rate": 2.973416523720296e-07, + "loss": 1.398, + "step": 1969 + }, + { + "epoch": 0.98, + "learning_rate": 2.7959812354234615e-07, + "loss": 1.4143, + "step": 1970 + }, + { + "epoch": 0.99, + "learning_rate": 2.6240004835242916e-07, + "loss": 1.4668, + "step": 1971 + }, + { + "epoch": 0.99, + "learning_rate": 2.457474737716625e-07, + "loss": 1.5373, + "step": 1972 + }, + { + "epoch": 0.99, + "learning_rate": 2.2964044527966633e-07, + "loss": 1.3918, + "step": 1973 + }, + { + "epoch": 0.99, + "learning_rate": 2.1407900686616355e-07, + "loss": 1.5909, + "step": 1974 + }, + { + "epoch": 0.99, + "learning_rate": 1.9906320103071363e-07, + "loss": 1.5743, + "step": 1975 + }, + { + "epoch": 0.99, + "learning_rate": 1.8459306878284566e-07, + "loss": 1.5593, + "step": 1976 + }, + { + "epoch": 0.99, + "learning_rate": 1.7066864964170314e-07, + "loss": 1.1698, + "step": 1977 + }, + { + "epoch": 0.99, + "learning_rate": 1.5728998163608843e-07, + "loss": 1.374, + "step": 1978 + }, + { + "epoch": 0.99, + "learning_rate": 1.4445710130428502e-07, + "loss": 1.4814, + "step": 1979 + }, + { + "epoch": 0.99, + "learning_rate": 1.32170043693991e-07, + "loss": 1.4802, + "step": 1980 + }, + { + "epoch": 0.99, + "learning_rate": 1.2042884236223018e-07, + "loss": 1.6539, + "step": 1981 + }, + { + "epoch": 0.99, + "learning_rate": 1.0923352937526332e-07, + "loss": 1.3322, + "step": 1982 + }, + { + "epoch": 0.99, + "learning_rate": 9.858413530841048e-08, + "loss": 1.545, + "step": 1983 + }, + { + "epoch": 0.99, + "learning_rate": 8.848068924611763e-08, + "loss": 1.5501, + "step": 1984 + }, + { + "epoch": 0.99, + "learning_rate": 7.892321878173459e-08, + "loss": 1.4385, + "step": 1985 + }, + { + "epoch": 0.99, + "learning_rate": 6.991175001755945e-08, + "loss": 1.3686, + "step": 1986 + }, + { + "epoch": 0.99, + "learning_rate": 6.144630756470537e-08, + "loss": 1.3645, + "step": 1987 + }, + { + "epoch": 0.99, + "learning_rate": 5.3526914542989524e-08, + "loss": 1.2885, + "step": 1988 + }, + { + "epoch": 0.99, + "learning_rate": 4.615359258097751e-08, + "loss": 1.6891, + "step": 1989 + }, + { + "epoch": 0.99, + "learning_rate": 3.932636181585014e-08, + "loss": 1.5689, + "step": 1990 + }, + { + "epoch": 1.0, + "learning_rate": 3.304524089335903e-08, + "loss": 1.3357, + "step": 1991 + }, + { + "epoch": 1.0, + "learning_rate": 2.7310246967782172e-08, + "loss": 1.5573, + "step": 1992 + }, + { + "epoch": 1.0, + "learning_rate": 2.2121395701901747e-08, + "loss": 1.6638, + "step": 1993 + }, + { + "epoch": 1.0, + "learning_rate": 1.7478701266893105e-08, + "loss": 1.4344, + "step": 1994 + }, + { + "epoch": 1.0, + "learning_rate": 1.3382176342346953e-08, + "loss": 1.3828, + "step": 1995 + }, + { + "epoch": 1.0, + "learning_rate": 9.831832116269368e-09, + "loss": 1.5397, + "step": 1996 + }, + { + "epoch": 1.0, + "learning_rate": 6.827678284926365e-09, + "loss": 1.5922, + "step": 1997 + }, + { + "epoch": 1.0, + "learning_rate": 4.369723052910502e-09, + "loss": 1.5328, + "step": 1998 + }, + { + "epoch": 1.0, + "learning_rate": 2.4579731331408896e-09, + "loss": 1.5408, + "step": 1999 + }, + { + "epoch": 1.0, + "learning_rate": 1.092433746752164e-09, + "loss": 1.4483, + "step": 2000 + } + ], + "logging_steps": 1, + "max_steps": 2001, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 3.8712881787070054e+17, + "trial_name": null, + "trial_params": null +}