diff --git "a/checkpoint-2000/trainer_state.json" "b/checkpoint-2000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2000/trainer_state.json" @@ -0,0 +1,12019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.513248219670238, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 1.9189, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 1.8871, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 2.09, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.0244, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.9614, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.4e-05, + "loss": 2.0378, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.0336, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.8758, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-05, + "loss": 1.9557, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.8357, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.4000000000000006e-05, + "loss": 1.8175, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 4.8e-05, + "loss": 1.8704, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 5.2000000000000004e-05, + "loss": 1.8078, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 5.6000000000000006e-05, + "loss": 1.7974, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 1.7014, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 6.400000000000001e-05, + "loss": 1.7933, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 6.800000000000001e-05, + "loss": 1.899, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 7.2e-05, + "loss": 1.8202, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-05, + "loss": 1.5872, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.6469, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8.4e-05, + "loss": 1.852, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 8.800000000000001e-05, + "loss": 1.7968, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000001e-05, + "loss": 1.8162, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 9.6e-05, + "loss": 1.796, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.7156, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010400000000000001, + "loss": 1.9844, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010800000000000001, + "loss": 1.5315, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011200000000000001, + "loss": 1.6643, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.000116, + "loss": 1.8375, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 2.052, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 0.000124, + "loss": 1.7656, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012800000000000002, + "loss": 1.8665, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 0.000132, + "loss": 1.7569, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013600000000000003, + "loss": 1.7503, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014, + "loss": 1.6777, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 0.000144, + "loss": 1.7534, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 0.000148, + "loss": 1.8081, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 0.000152, + "loss": 1.6412, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015600000000000002, + "loss": 1.7198, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 1.6977, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.000164, + "loss": 1.8538, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.000168, + "loss": 1.8738, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.000172, + "loss": 1.8707, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017600000000000002, + "loss": 1.7029, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018, + "loss": 1.6269, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018400000000000003, + "loss": 1.7867, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.000188, + "loss": 1.6741, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.000192, + "loss": 1.8431, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.000196, + "loss": 1.7986, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 1.6245, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020400000000000003, + "loss": 1.8149, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020800000000000001, + "loss": 1.831, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021200000000000003, + "loss": 1.6757, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021600000000000002, + "loss": 1.6475, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022000000000000003, + "loss": 1.7828, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022400000000000002, + "loss": 1.8008, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022799999999999999, + "loss": 1.6928, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.000232, + "loss": 1.6002, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 0.000236, + "loss": 1.7098, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 0.00024, + "loss": 1.6816, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.000244, + "loss": 1.6427, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 0.000248, + "loss": 1.9189, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 0.000252, + "loss": 1.7065, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 0.00025600000000000004, + "loss": 1.7845, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 0.00026000000000000003, + "loss": 1.7162, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 0.000264, + "loss": 1.8621, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 0.000268, + "loss": 1.7064, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 0.00027200000000000005, + "loss": 1.6587, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 0.000276, + "loss": 1.6538, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028, + "loss": 1.788, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 0.000284, + "loss": 1.8786, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 0.000288, + "loss": 1.6963, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 0.000292, + "loss": 1.7997, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 0.000296, + "loss": 1.6258, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 0.00030000000000000003, + "loss": 1.5675, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 0.000304, + "loss": 1.8501, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 0.000308, + "loss": 1.7404, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 0.00031200000000000005, + "loss": 1.6924, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 0.00031600000000000004, + "loss": 1.4889, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 0.00032, + "loss": 1.6161, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.000324, + "loss": 1.7842, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.000328, + "loss": 1.7691, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 0.000332, + "loss": 1.6911, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 0.000336, + "loss": 1.6236, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 0.00034, + "loss": 1.5516, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 0.000344, + "loss": 1.702, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 0.000348, + "loss": 1.7039, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 0.00035200000000000005, + "loss": 1.7233, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 0.00035600000000000003, + "loss": 1.7784, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036, + "loss": 1.7119, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 0.000364, + "loss": 1.7628, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036800000000000005, + "loss": 1.7374, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037200000000000004, + "loss": 1.5799, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 0.000376, + "loss": 1.7434, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 0.00038, + "loss": 1.8045, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 0.000384, + "loss": 1.7939, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 0.000388, + "loss": 1.7483, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 0.000392, + "loss": 1.6368, + "step": 98 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039600000000000003, + "loss": 1.6036, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004, + "loss": 1.6768, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999999315068227, + "loss": 1.7205, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999997260273377, + "loss": 1.6885, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999993835616856, + "loss": 1.7387, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039999890411010117, + "loss": 1.7599, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999982876729127, + "loss": 1.7746, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039999753425054243, + "loss": 1.7643, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999966438435064, + "loss": 1.6161, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999956164524145, + "loss": 1.749, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999944520779703, + "loss": 1.7299, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999931507209715, + "loss": 1.6908, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039999171238230927, + "loss": 1.7822, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999901370629689, + "loss": 1.7854, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039998842476402934, + "loss": 1.644, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999865754866634, + "loss": 1.7075, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999845892321377, + "loss": 1.8361, + "step": 115 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039998246600181267, + "loss": 1.6325, + "step": 116 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999802057971426, + "loss": 1.5407, + "step": 117 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039997780861967554, + "loss": 1.6327, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999752744710534, + "loss": 1.8547, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 0.000399972603353012, + "loss": 1.6868, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999697952673808, + "loss": 1.5997, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999668502160831, + "loss": 1.8115, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039996376820113615, + "loss": 1.7017, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999605492246508, + "loss": 1.716, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039995719328883204, + "loss": 1.7195, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039995370039597823, + "loss": 1.5336, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999500705484818, + "loss": 1.5242, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 0.000399946303748829, + "loss": 1.7638, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039994239999959993, + "loss": 1.6565, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999383593034683, + "loss": 1.719, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039993418166320164, + "loss": 1.782, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039992986708166145, + "loss": 1.6487, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039992541556180283, + "loss": 1.4585, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999208271066749, + "loss": 1.7476, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999161017194203, + "loss": 1.665, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999112394032757, + "loss": 1.8235, + "step": 136 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039990624016157135, + "loss": 1.7151, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003999011039977315, + "loss": 1.636, + "step": 138 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998958309152741, + "loss": 1.7582, + "step": 139 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998904209178107, + "loss": 1.8764, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039988487400904687, + "loss": 1.6772, + "step": 141 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039987919019278184, + "loss": 1.7488, + "step": 142 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998733694729087, + "loss": 1.6507, + "step": 143 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039986741185341415, + "loss": 1.6778, + "step": 144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998613173383788, + "loss": 1.6078, + "step": 145 + }, + { + "epoch": 0.04, + "learning_rate": 0.000399855085931977, + "loss": 1.7432, + "step": 146 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998487176384768, + "loss": 1.6079, + "step": 147 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039984221246224005, + "loss": 1.5346, + "step": 148 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998355704077224, + "loss": 1.4795, + "step": 149 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039982879147947313, + "loss": 1.7697, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998218756821354, + "loss": 1.616, + "step": 151 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039981482302044603, + "loss": 1.5246, + "step": 152 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998076334992356, + "loss": 1.6524, + "step": 153 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998003071234285, + "loss": 1.5095, + "step": 154 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003997928438980427, + "loss": 1.5523, + "step": 155 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003997852438281901, + "loss": 1.905, + "step": 156 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039977750691907614, + "loss": 1.6569, + "step": 157 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003997696331760002, + "loss": 1.6851, + "step": 158 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039976162260435517, + "loss": 1.4957, + "step": 159 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039975347520962776, + "loss": 1.6837, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039974519099739836, + "loss": 1.6918, + "step": 161 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039973676997334104, + "loss": 1.7119, + "step": 162 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003997282121432238, + "loss": 1.6749, + "step": 163 + }, + { + "epoch": 0.04, + "learning_rate": 0.000399719517512908, + "loss": 1.9509, + "step": 164 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003997106860883489, + "loss": 1.7915, + "step": 165 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039970171787559553, + "loss": 1.554, + "step": 166 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003996926128807904, + "loss": 1.7785, + "step": 167 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039968337111016984, + "loss": 1.6035, + "step": 168 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003996739925700638, + "loss": 1.6602, + "step": 169 + }, + { + "epoch": 0.04, + "learning_rate": 0.000399664477266896, + "loss": 1.6416, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003996548252071837, + "loss": 1.7058, + "step": 171 + }, + { + "epoch": 0.04, + "learning_rate": 0.000399645036397538, + "loss": 1.7482, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039963511084466354, + "loss": 1.6613, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039962504855535854, + "loss": 1.553, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003996148495365151, + "loss": 1.793, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039960451379511883, + "loss": 1.547, + "step": 176 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039959404133824893, + "loss": 1.6133, + "step": 177 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003995834321730785, + "loss": 1.6491, + "step": 178 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039957268630687386, + "loss": 1.6654, + "step": 179 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039956180374699533, + "loss": 1.6804, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003995507845008967, + "loss": 1.5704, + "step": 181 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003995396285761254, + "loss": 1.7928, + "step": 182 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003995283359803225, + "loss": 1.8712, + "step": 183 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039951690672122267, + "loss": 1.6735, + "step": 184 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039950534080665414, + "loss": 1.6005, + "step": 185 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039949363824453866, + "loss": 1.5399, + "step": 186 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003994817990428919, + "loss": 1.6491, + "step": 187 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039946982320982274, + "loss": 1.6356, + "step": 188 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039945771075353397, + "loss": 1.7414, + "step": 189 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003994454616823216, + "loss": 1.7381, + "step": 190 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003994330760045756, + "loss": 1.5481, + "step": 191 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039942055372877913, + "loss": 1.7216, + "step": 192 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039940789486350925, + "loss": 1.7587, + "step": 193 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039939509941743634, + "loss": 1.5533, + "step": 194 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003993821673993244, + "loss": 1.6301, + "step": 195 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039936909881803097, + "loss": 1.8665, + "step": 196 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039935589368250723, + "loss": 1.5792, + "step": 197 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003993425520017977, + "loss": 1.8631, + "step": 198 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003993290737850406, + "loss": 1.5977, + "step": 199 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003993154590414675, + "loss": 1.638, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039930170778040364, + "loss": 1.772, + "step": 201 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039928782001126767, + "loss": 1.8287, + "step": 202 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039927379574357174, + "loss": 1.6486, + "step": 203 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039925963498692156, + "loss": 1.8322, + "step": 204 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003992453377510163, + "loss": 1.5798, + "step": 205 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003992309040456485, + "loss": 1.8043, + "step": 206 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039921633388070437, + "loss": 1.7151, + "step": 207 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003992016272661634, + "loss": 1.7066, + "step": 208 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039918678421209863, + "loss": 1.602, + "step": 209 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003991718047286765, + "loss": 1.6753, + "step": 210 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039915668882615715, + "loss": 1.7632, + "step": 211 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039914143651489367, + "loss": 1.7793, + "step": 212 + }, + { + "epoch": 0.05, + "learning_rate": 0.000399126047805333, + "loss": 1.6199, + "step": 213 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003991105227080153, + "loss": 1.658, + "step": 214 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039909486123357427, + "loss": 1.6492, + "step": 215 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003990790633927368, + "loss": 1.6837, + "step": 216 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039906312919632357, + "loss": 1.6572, + "step": 217 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039904705865524817, + "loss": 1.6667, + "step": 218 + }, + { + "epoch": 0.06, + "learning_rate": 0.000399030851780518, + "loss": 1.6789, + "step": 219 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003990145085832336, + "loss": 1.692, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003989980290745889, + "loss": 1.8569, + "step": 221 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039898141326587136, + "loss": 1.6396, + "step": 222 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003989646611684616, + "loss": 1.7412, + "step": 223 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039894777279383356, + "loss": 1.7172, + "step": 224 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039893074815355476, + "loss": 1.7643, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039891358725928594, + "loss": 1.6603, + "step": 226 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003988962901227811, + "loss": 1.7529, + "step": 227 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003988788567558875, + "loss": 1.6443, + "step": 228 + }, + { + "epoch": 0.06, + "learning_rate": 0.000398861287170546, + "loss": 1.6134, + "step": 229 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003988435813787904, + "loss": 1.6467, + "step": 230 + }, + { + "epoch": 0.06, + "learning_rate": 0.000398825739392748, + "loss": 1.6458, + "step": 231 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003988077612246394, + "loss": 1.6152, + "step": 232 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003987896468867784, + "loss": 1.6081, + "step": 233 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039877139639157204, + "loss": 1.6888, + "step": 234 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003987530097515207, + "loss": 1.6167, + "step": 235 + }, + { + "epoch": 0.06, + "learning_rate": 0.000398734486979218, + "loss": 1.756, + "step": 236 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003987158280873507, + "loss": 1.5726, + "step": 237 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003986970330886989, + "loss": 1.702, + "step": 238 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003986781019961359, + "loss": 1.5532, + "step": 239 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003986590348226282, + "loss": 1.7834, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039863983158123557, + "loss": 1.7149, + "step": 241 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039862049228511087, + "loss": 1.7413, + "step": 242 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039860101694750015, + "loss": 1.7384, + "step": 243 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039858140558174274, + "loss": 1.523, + "step": 244 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039856165820127107, + "loss": 1.5346, + "step": 245 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003985417748196108, + "loss": 1.7447, + "step": 246 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003985217554503807, + "loss": 1.6395, + "step": 247 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003985016001072925, + "loss": 1.7433, + "step": 248 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003984813088041515, + "loss": 1.6971, + "step": 249 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003984608815548556, + "loss": 1.6549, + "step": 250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039844031837339624, + "loss": 1.6979, + "step": 251 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039841961927385773, + "loss": 1.7413, + "step": 252 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003983987842704176, + "loss": 1.6538, + "step": 253 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039837781337734627, + "loss": 1.7508, + "step": 254 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039835670660900757, + "loss": 1.669, + "step": 255 + }, + { + "epoch": 0.07, + "learning_rate": 0.000398335463979858, + "loss": 1.4171, + "step": 256 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039831408550444747, + "loss": 1.6787, + "step": 257 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003982925711974187, + "loss": 1.5834, + "step": 258 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003982709210735075, + "loss": 1.5367, + "step": 259 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003982491351475428, + "loss": 1.5698, + "step": 260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039822721343444635, + "loss": 1.5576, + "step": 261 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003982051559492332, + "loss": 1.7985, + "step": 262 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003981829627070111, + "loss": 1.8064, + "step": 263 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003981606337229809, + "loss": 1.4714, + "step": 264 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039813816901243645, + "loss": 1.6162, + "step": 265 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003981155685907647, + "loss": 1.5121, + "step": 266 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039809283247344517, + "loss": 1.8091, + "step": 267 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003980699606760506, + "loss": 1.72, + "step": 268 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003980469532142467, + "loss": 1.5372, + "step": 269 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039802381010379194, + "loss": 1.7935, + "step": 270 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039800053136053784, + "loss": 1.4957, + "step": 271 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003979771170004287, + "loss": 1.4728, + "step": 272 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003979535670395017, + "loss": 1.6232, + "step": 273 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039792988149388704, + "loss": 1.7514, + "step": 274 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003979060603798077, + "loss": 1.5872, + "step": 275 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003978821037135795, + "loss": 1.4762, + "step": 276 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003978580115116112, + "loss": 1.6576, + "step": 277 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003978337837904041, + "loss": 1.8939, + "step": 278 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003978094205665527, + "loss": 1.5578, + "step": 279 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039778492185674414, + "loss": 1.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003977602876777584, + "loss": 1.508, + "step": 281 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003977355180464681, + "loss": 1.6372, + "step": 282 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039771061297983883, + "loss": 1.5009, + "step": 283 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003976855724949288, + "loss": 1.7411, + "step": 284 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039766039660888905, + "loss": 1.7966, + "step": 285 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003976350853389634, + "loss": 1.8018, + "step": 286 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039760963870248835, + "loss": 1.6935, + "step": 287 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039758405671689304, + "loss": 1.6938, + "step": 288 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003975583393996994, + "loss": 1.7298, + "step": 289 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039753248676852206, + "loss": 1.6988, + "step": 290 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003975064988410682, + "loss": 1.6438, + "step": 291 + }, + { + "epoch": 0.07, + "learning_rate": 0.000397480375635138, + "loss": 1.6497, + "step": 292 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003974541171686239, + "loss": 1.8625, + "step": 293 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039742772345951116, + "loss": 1.6466, + "step": 294 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003974011945258778, + "loss": 1.5883, + "step": 295 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003973745303858942, + "loss": 1.6464, + "step": 296 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039734773105782353, + "loss": 1.7586, + "step": 297 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039732079656002145, + "loss": 1.6374, + "step": 298 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039729372691093634, + "loss": 1.7246, + "step": 299 + }, + { + "epoch": 0.08, + "learning_rate": 0.000397266522129109, + "loss": 1.5848, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003972391822331729, + "loss": 1.8049, + "step": 301 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039721170724185396, + "loss": 1.7783, + "step": 302 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003971840971739707, + "loss": 1.5078, + "step": 303 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003971563520484341, + "loss": 1.7298, + "step": 304 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003971284718842477, + "loss": 1.8349, + "step": 305 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003971004567005075, + "loss": 1.8207, + "step": 306 + }, + { + "epoch": 0.08, + "learning_rate": 0.000397072306516402, + "loss": 1.713, + "step": 307 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039704402135121213, + "loss": 1.6281, + "step": 308 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039701560122431135, + "loss": 1.6325, + "step": 309 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039698704615516553, + "loss": 1.7483, + "step": 310 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039695835616333276, + "loss": 1.489, + "step": 311 + }, + { + "epoch": 0.08, + "learning_rate": 0.000396929531268464, + "loss": 1.7287, + "step": 312 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039690057149030213, + "loss": 1.7216, + "step": 313 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039687147684868264, + "loss": 1.7696, + "step": 314 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003968422473635335, + "loss": 1.7033, + "step": 315 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039681288305487485, + "loss": 1.677, + "step": 316 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039678338394281916, + "loss": 1.5235, + "step": 317 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039675375004757143, + "loss": 1.6583, + "step": 318 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039672398138942874, + "loss": 1.5286, + "step": 319 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039669407798878064, + "loss": 1.5743, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.000396664039866109, + "loss": 1.6344, + "step": 321 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003966338670419878, + "loss": 1.5803, + "step": 322 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039660355953708337, + "loss": 1.6756, + "step": 323 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039657311737215426, + "loss": 1.5601, + "step": 324 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003965425405680513, + "loss": 1.6068, + "step": 325 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003965118291457176, + "loss": 1.6917, + "step": 326 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039648098312618826, + "loss": 1.5022, + "step": 327 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039645000253059077, + "loss": 1.7106, + "step": 328 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003964188873801447, + "loss": 1.6934, + "step": 329 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039638763769616177, + "loss": 1.6792, + "step": 330 + }, + { + "epoch": 0.08, + "learning_rate": 0.000396356253500046, + "loss": 1.5621, + "step": 331 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039632473481329326, + "loss": 1.6579, + "step": 332 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039629308165749184, + "loss": 1.6869, + "step": 333 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003962612940543219, + "loss": 1.5827, + "step": 334 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003962293720255558, + "loss": 1.7115, + "step": 335 + }, + { + "epoch": 0.09, + "learning_rate": 0.000396197315593058, + "loss": 1.7004, + "step": 336 + }, + { + "epoch": 0.09, + "learning_rate": 0.000396165124778785, + "loss": 1.673, + "step": 337 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039613279960478516, + "loss": 1.5426, + "step": 338 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003961003400931992, + "loss": 1.5172, + "step": 339 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003960677462662594, + "loss": 1.7333, + "step": 340 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003960350181462906, + "loss": 1.4797, + "step": 341 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039600215575570923, + "loss": 1.7813, + "step": 342 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003959691591170238, + "loss": 1.6425, + "step": 343 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003959360282528347, + "loss": 1.5172, + "step": 344 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003959027631858343, + "loss": 1.5473, + "step": 345 + }, + { + "epoch": 0.09, + "learning_rate": 0.000395869363938807, + "loss": 1.5006, + "step": 346 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003958358305346289, + "loss": 1.725, + "step": 347 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003958021629962681, + "loss": 1.618, + "step": 348 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003957683613467847, + "loss": 1.7454, + "step": 349 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039573442560933035, + "loss": 1.4735, + "step": 350 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003957003558071488, + "loss": 1.689, + "step": 351 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039566615196357565, + "loss": 1.6993, + "step": 352 + }, + { + "epoch": 0.09, + "learning_rate": 0.000395631814102038, + "loss": 1.5591, + "step": 353 + }, + { + "epoch": 0.09, + "learning_rate": 0.000395597342246055, + "loss": 1.7427, + "step": 354 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039556273641923757, + "loss": 1.7445, + "step": 355 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039552799664528833, + "loss": 1.6865, + "step": 356 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039549312294800157, + "loss": 1.5481, + "step": 357 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039545811535126354, + "loss": 1.6455, + "step": 358 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039542297387905193, + "loss": 1.6712, + "step": 359 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003953876985554364, + "loss": 1.7702, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039535228940457796, + "loss": 1.8498, + "step": 361 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003953167464507295, + "loss": 1.769, + "step": 362 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003952810697182356, + "loss": 1.5027, + "step": 363 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003952452592315324, + "loss": 1.5972, + "step": 364 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039520931501514753, + "loss": 1.6795, + "step": 365 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039517323709370043, + "loss": 1.7897, + "step": 366 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039513702549190193, + "loss": 1.5261, + "step": 367 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039510068023455457, + "loss": 1.5994, + "step": 368 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003950642013465523, + "loss": 1.6287, + "step": 369 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003950275888528808, + "loss": 1.7588, + "step": 370 + }, + { + "epoch": 0.1, + "learning_rate": 0.000394990842778617, + "loss": 1.6664, + "step": 371 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039495396314892956, + "loss": 1.8034, + "step": 372 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003949169499890784, + "loss": 1.6904, + "step": 373 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039487980332441496, + "loss": 1.604, + "step": 374 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039484252318038244, + "loss": 1.6644, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003948051095825149, + "loss": 1.7549, + "step": 376 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039476756255643836, + "loss": 1.6544, + "step": 377 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039472988212786974, + "loss": 1.6161, + "step": 378 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039469206832261767, + "loss": 1.8024, + "step": 379 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003946541211665821, + "loss": 1.655, + "step": 380 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039461604068575406, + "loss": 1.5149, + "step": 381 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039457782690621626, + "loss": 1.5712, + "step": 382 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003945394798541424, + "loss": 1.7293, + "step": 383 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039450099955579766, + "loss": 1.7491, + "step": 384 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039446238603753846, + "loss": 1.7214, + "step": 385 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039442363932581226, + "loss": 1.556, + "step": 386 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003943847594471581, + "loss": 1.7184, + "step": 387 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039434574642820597, + "loss": 1.7057, + "step": 388 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003943066002956772, + "loss": 1.5008, + "step": 389 + }, + { + "epoch": 0.1, + "learning_rate": 0.000394267321076384, + "loss": 1.6692, + "step": 390 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003942279087972302, + "loss": 1.5715, + "step": 391 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003941883634852104, + "loss": 1.6528, + "step": 392 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039414868516741053, + "loss": 1.6049, + "step": 393 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003941088738710074, + "loss": 1.5922, + "step": 394 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039406892962326907, + "loss": 1.5338, + "step": 395 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039402885245155475, + "loss": 1.5255, + "step": 396 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003939886423833144, + "loss": 1.7363, + "step": 397 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039394829944608927, + "loss": 1.6415, + "step": 398 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039390782366751146, + "loss": 1.591, + "step": 399 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003938672150753042, + "loss": 1.6248, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003938264736972815, + "loss": 1.684, + "step": 401 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003937855995613485, + "loss": 1.6973, + "step": 402 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039374459269550126, + "loss": 1.632, + "step": 403 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039370345312782646, + "loss": 1.4909, + "step": 404 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039366218088650215, + "loss": 1.6167, + "step": 405 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003936207759997968, + "loss": 1.6814, + "step": 406 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003935792384960701, + "loss": 1.7036, + "step": 407 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003935375684037723, + "loss": 1.5609, + "step": 408 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039349576575144457, + "loss": 1.5405, + "step": 409 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003934538305677189, + "loss": 1.6685, + "step": 410 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003934117628813181, + "loss": 1.7846, + "step": 411 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003933695627210555, + "loss": 1.599, + "step": 412 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003933272301158355, + "loss": 1.637, + "step": 413 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003932847650946529, + "loss": 1.7403, + "step": 414 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003932421676865935, + "loss": 1.6465, + "step": 415 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003931994379208335, + "loss": 1.7376, + "step": 416 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039315657582663987, + "loss": 1.5708, + "step": 417 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003931135814333703, + "loss": 1.6891, + "step": 418 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039307045477047294, + "loss": 1.5743, + "step": 419 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003930271958674867, + "loss": 1.5618, + "step": 420 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003929838047540408, + "loss": 1.5229, + "step": 421 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039294028145985547, + "loss": 1.6628, + "step": 422 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039289662601474097, + "loss": 1.6669, + "step": 423 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003928528384485984, + "loss": 1.6269, + "step": 424 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039280891879141916, + "loss": 1.7944, + "step": 425 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003927648670732853, + "loss": 1.5369, + "step": 426 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003927206833243693, + "loss": 1.6071, + "step": 427 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003926763675749339, + "loss": 1.4575, + "step": 428 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039263191985533237, + "loss": 1.7105, + "step": 429 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003925873401960084, + "loss": 1.5246, + "step": 430 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003925426286274961, + "loss": 1.5319, + "step": 431 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003924977851804197, + "loss": 1.6269, + "step": 432 + }, + { + "epoch": 0.11, + "learning_rate": 0.000392452809885494, + "loss": 1.6812, + "step": 433 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039240770277352394, + "loss": 1.6768, + "step": 434 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003923624638754048, + "loss": 1.734, + "step": 435 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003923170932221223, + "loss": 1.6455, + "step": 436 + }, + { + "epoch": 0.11, + "learning_rate": 0.000392271590844752, + "loss": 1.8286, + "step": 437 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003922259567744602, + "loss": 1.6616, + "step": 438 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039218019104250284, + "loss": 1.6312, + "step": 439 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039213429368022655, + "loss": 1.7211, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039208826471906776, + "loss": 1.7451, + "step": 441 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003920421041905532, + "loss": 1.558, + "step": 442 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039199581212629966, + "loss": 1.8128, + "step": 443 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003919493885580141, + "loss": 1.6611, + "step": 444 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003919028335174935, + "loss": 1.5493, + "step": 445 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003918561470366249, + "loss": 1.7107, + "step": 446 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003918093291473852, + "loss": 1.6758, + "step": 447 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003917623798818417, + "loss": 1.7284, + "step": 448 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003917152992721512, + "loss": 1.5784, + "step": 449 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003916680873505609, + "loss": 1.6436, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039162074414940764, + "loss": 1.5409, + "step": 451 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039157326970111836, + "loss": 1.549, + "step": 452 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003915256640382097, + "loss": 1.7267, + "step": 453 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003914779271932884, + "loss": 1.5716, + "step": 454 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003914300591990509, + "loss": 1.6648, + "step": 455 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003913820600882835, + "loss": 1.7487, + "step": 456 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003913339298938623, + "loss": 1.4822, + "step": 457 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039128566864875323, + "loss": 1.6741, + "step": 458 + }, + { + "epoch": 0.12, + "learning_rate": 0.000391237276386012, + "loss": 1.6325, + "step": 459 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003911887531387839, + "loss": 1.7415, + "step": 460 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003911400989403042, + "loss": 1.6301, + "step": 461 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003910913138238975, + "loss": 1.5065, + "step": 462 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003910423978229784, + "loss": 1.6894, + "step": 463 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003909933509710511, + "loss": 1.8452, + "step": 464 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039094417330170914, + "loss": 1.551, + "step": 465 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039089486484863604, + "loss": 1.5928, + "step": 466 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039084542564560466, + "loss": 1.6541, + "step": 467 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039079585572647746, + "loss": 1.5192, + "step": 468 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003907461551252065, + "loss": 1.8088, + "step": 469 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039069632387583327, + "loss": 1.6331, + "step": 470 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003906463620124888, + "loss": 1.6655, + "step": 471 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039059626956939355, + "loss": 1.5915, + "step": 472 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003905460465808574, + "loss": 1.6448, + "step": 473 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039049569308127974, + "loss": 1.5309, + "step": 474 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039044520910514916, + "loss": 1.739, + "step": 475 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039039459468704393, + "loss": 1.4387, + "step": 476 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003903438498616313, + "loss": 1.6392, + "step": 477 + }, + { + "epoch": 0.12, + "learning_rate": 0.000390292974663668, + "loss": 1.566, + "step": 478 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039024196912800023, + "loss": 1.8407, + "step": 479 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039019083328956317, + "loss": 1.803, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039013956718338144, + "loss": 1.6142, + "step": 481 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003900881708445688, + "loss": 1.5239, + "step": 482 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003900366443083282, + "loss": 1.675, + "step": 483 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003899849876099519, + "loss": 1.7271, + "step": 484 + }, + { + "epoch": 0.12, + "learning_rate": 0.00038993320078482114, + "loss": 1.6, + "step": 485 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003898812838684064, + "loss": 1.4937, + "step": 486 + }, + { + "epoch": 0.12, + "learning_rate": 0.00038982923689626715, + "loss": 1.7205, + "step": 487 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038977705990405216, + "loss": 1.5731, + "step": 488 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003897247529274989, + "loss": 1.5392, + "step": 489 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003896723160024343, + "loss": 1.5091, + "step": 490 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038961974916477396, + "loss": 1.6609, + "step": 491 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003895670524505226, + "loss": 1.5858, + "step": 492 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038951422589577386, + "loss": 1.604, + "step": 493 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003894612695367103, + "loss": 1.6486, + "step": 494 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038940818340960347, + "loss": 1.6754, + "step": 495 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038935496755081373, + "loss": 1.6549, + "step": 496 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003893016219967903, + "loss": 1.6725, + "step": 497 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003892481467840712, + "loss": 1.593, + "step": 498 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003891945419492834, + "loss": 1.613, + "step": 499 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038914080752914257, + "loss": 1.5978, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 0.000389086943560453, + "loss": 1.758, + "step": 501 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038903295008010783, + "loss": 1.7143, + "step": 502 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038897882712508907, + "loss": 1.7088, + "step": 503 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038892457473246704, + "loss": 1.7524, + "step": 504 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003888701929394012, + "loss": 1.6681, + "step": 505 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003888156817831391, + "loss": 1.7366, + "step": 506 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038876104130101724, + "loss": 1.6069, + "step": 507 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003887062715304607, + "loss": 1.7214, + "step": 508 + }, + { + "epoch": 0.13, + "learning_rate": 0.000388651372508983, + "loss": 1.7612, + "step": 509 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003885963442741862, + "loss": 1.6513, + "step": 510 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038854118686376096, + "loss": 1.6339, + "step": 511 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003884859003154862, + "loss": 1.5985, + "step": 512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003884304846672296, + "loss": 1.6362, + "step": 513 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003883749399569469, + "loss": 1.8127, + "step": 514 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038831926622268266, + "loss": 1.7264, + "step": 515 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038826346350256946, + "loss": 1.7378, + "step": 516 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038820753183482835, + "loss": 1.5021, + "step": 517 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003881514712577687, + "loss": 1.653, + "step": 518 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038809528180978826, + "loss": 1.5079, + "step": 519 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003880389635293729, + "loss": 1.706, + "step": 520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003879825164550969, + "loss": 1.554, + "step": 521 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003879259406256225, + "loss": 1.6374, + "step": 522 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003878692360797004, + "loss": 1.6435, + "step": 523 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038781240285616923, + "loss": 1.7432, + "step": 524 + }, + { + "epoch": 0.13, + "learning_rate": 0.000387755440993956, + "loss": 1.6771, + "step": 525 + }, + { + "epoch": 0.13, + "learning_rate": 0.00038769835053207566, + "loss": 1.5618, + "step": 526 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003876411315096313, + "loss": 1.654, + "step": 527 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003875837839658139, + "loss": 1.6184, + "step": 528 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003875263079399028, + "loss": 1.5148, + "step": 529 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003874687034712651, + "loss": 1.5511, + "step": 530 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038741097059935586, + "loss": 1.6583, + "step": 531 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003873531093637182, + "loss": 1.6299, + "step": 532 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003872951198039832, + "loss": 1.5602, + "step": 533 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038723700195986973, + "loss": 1.8269, + "step": 534 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038717875587118445, + "loss": 1.691, + "step": 535 + }, + { + "epoch": 0.14, + "learning_rate": 0.000387120381577822, + "loss": 1.7941, + "step": 536 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038706187911976474, + "loss": 1.6985, + "step": 537 + }, + { + "epoch": 0.14, + "learning_rate": 0.000387003248537083, + "loss": 1.5676, + "step": 538 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003869444898699346, + "loss": 1.6688, + "step": 539 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003868856031585653, + "loss": 1.7335, + "step": 540 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038682658844330846, + "loss": 1.6613, + "step": 541 + }, + { + "epoch": 0.14, + "learning_rate": 0.000386767445764585, + "loss": 1.6753, + "step": 542 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003867081751629039, + "loss": 1.5821, + "step": 543 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038664877667886127, + "loss": 1.6122, + "step": 544 + }, + { + "epoch": 0.14, + "learning_rate": 0.000386589250353141, + "loss": 1.5739, + "step": 545 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003865295962265147, + "loss": 1.5059, + "step": 546 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003864698143398413, + "loss": 1.7605, + "step": 547 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003864099047340673, + "loss": 1.6589, + "step": 548 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003863498674502267, + "loss": 1.7321, + "step": 549 + }, + { + "epoch": 0.14, + "learning_rate": 0.000386289702529441, + "loss": 1.7304, + "step": 550 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038622941001291894, + "loss": 1.6749, + "step": 551 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038616898994195696, + "loss": 1.6343, + "step": 552 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003861084423579385, + "loss": 1.6321, + "step": 553 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038604776730233467, + "loss": 1.7723, + "step": 554 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038598696481670364, + "loss": 1.5751, + "step": 555 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003859260349426911, + "loss": 1.6355, + "step": 556 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003858649777220296, + "loss": 1.4967, + "step": 557 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003858037931965395, + "loss": 1.4851, + "step": 558 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003857424814081278, + "loss": 1.5698, + "step": 559 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038568104239878893, + "loss": 1.5247, + "step": 560 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038561947621060445, + "loss": 1.6362, + "step": 561 + }, + { + "epoch": 0.14, + "learning_rate": 0.000385557782885743, + "loss": 1.6348, + "step": 562 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003854959624664603, + "loss": 1.6125, + "step": 563 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003854340149950991, + "loss": 1.6978, + "step": 564 + }, + { + "epoch": 0.14, + "learning_rate": 0.00038537194051408926, + "loss": 1.6874, + "step": 565 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003853097390659474, + "loss": 1.6011, + "step": 566 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003852474106932775, + "loss": 1.6312, + "step": 567 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038518495543877, + "loss": 1.7431, + "step": 568 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038512237334520263, + "loss": 1.436, + "step": 569 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003850596644554399, + "loss": 1.7691, + "step": 570 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038499682881243294, + "loss": 1.5807, + "step": 571 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038493386645922003, + "loss": 1.6818, + "step": 572 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038487077743892605, + "loss": 1.5128, + "step": 573 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003848075617947627, + "loss": 1.6996, + "step": 574 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038474421957002835, + "loss": 1.644, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038468075080810805, + "loss": 1.6346, + "step": 576 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003846171555524737, + "loss": 1.5746, + "step": 577 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003845534338466836, + "loss": 1.6314, + "step": 578 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038448958573438283, + "loss": 1.8287, + "step": 579 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003844256112593029, + "loss": 1.5512, + "step": 580 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003843615104652621, + "loss": 1.7027, + "step": 581 + }, + { + "epoch": 0.15, + "learning_rate": 0.000384297283396165, + "loss": 1.5607, + "step": 582 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003842329300960028, + "loss": 1.6542, + "step": 583 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003841684506088531, + "loss": 1.5433, + "step": 584 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038410384497888, + "loss": 1.5978, + "step": 585 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038403911325033383, + "loss": 1.6236, + "step": 586 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038397425546755155, + "loss": 1.6, + "step": 587 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003839092716749563, + "loss": 1.574, + "step": 588 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003838441619170574, + "loss": 1.4845, + "step": 589 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003837789262384507, + "loss": 1.6633, + "step": 590 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038371356468381825, + "loss": 1.653, + "step": 591 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038364807729792815, + "loss": 1.5522, + "step": 592 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003835824641256348, + "loss": 1.6637, + "step": 593 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038351672521187875, + "loss": 1.6192, + "step": 594 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003834508606016868, + "loss": 1.8062, + "step": 595 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003833848703401715, + "loss": 1.5102, + "step": 596 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003833187544725318, + "loss": 1.5951, + "step": 597 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038325251304405255, + "loss": 1.5905, + "step": 598 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003831861461001046, + "loss": 1.7055, + "step": 599 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003831196536861448, + "loss": 1.6382, + "step": 600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038305303584771586, + "loss": 1.6842, + "step": 601 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003829862926304465, + "loss": 1.6128, + "step": 602 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003829194240800513, + "loss": 1.5902, + "step": 603 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038285243024233057, + "loss": 1.603, + "step": 604 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003827853111631706, + "loss": 1.6249, + "step": 605 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003827180668885433, + "loss": 1.3987, + "step": 606 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038265069746450646, + "loss": 1.5676, + "step": 607 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003825832029372036, + "loss": 1.8549, + "step": 608 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038251558335286373, + "loss": 1.5502, + "step": 609 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038244783875780177, + "loss": 1.6703, + "step": 610 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038237996919841805, + "loss": 1.7345, + "step": 611 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038231197472119864, + "loss": 1.7552, + "step": 612 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038224385537271514, + "loss": 1.6106, + "step": 613 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003821756111996246, + "loss": 1.7377, + "step": 614 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038210724224866966, + "loss": 1.5222, + "step": 615 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003820387485666784, + "loss": 1.6635, + "step": 616 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038197013020056423, + "loss": 1.4375, + "step": 617 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038190138719732615, + "loss": 1.6819, + "step": 618 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003818325196040484, + "loss": 1.6796, + "step": 619 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003817635274679006, + "loss": 1.6895, + "step": 620 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038169441083613765, + "loss": 1.7996, + "step": 621 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003816251697560996, + "loss": 1.4623, + "step": 622 + }, + { + "epoch": 0.16, + "learning_rate": 0.000381555804275212, + "loss": 1.6559, + "step": 623 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038148631444098556, + "loss": 1.5815, + "step": 624 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003814167003010159, + "loss": 1.5663, + "step": 625 + }, + { + "epoch": 0.16, + "learning_rate": 0.000381346961902984, + "loss": 1.6197, + "step": 626 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038127709929465595, + "loss": 1.5806, + "step": 627 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003812071125238828, + "loss": 1.648, + "step": 628 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003811370016386008, + "loss": 1.5886, + "step": 629 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038106676668683105, + "loss": 1.4344, + "step": 630 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038099640771667974, + "loss": 1.6881, + "step": 631 + }, + { + "epoch": 0.16, + "learning_rate": 0.000380925924776338, + "loss": 1.733, + "step": 632 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003808553179140817, + "loss": 1.4853, + "step": 633 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038078458717827183, + "loss": 1.6185, + "step": 634 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003807137326173541, + "loss": 1.6862, + "step": 635 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003806427542798591, + "loss": 1.6541, + "step": 636 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003805716522144021, + "loss": 1.5254, + "step": 637 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003805004264696831, + "loss": 1.5116, + "step": 638 + }, + { + "epoch": 0.16, + "learning_rate": 0.000380429077094487, + "loss": 1.6079, + "step": 639 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038035760413768314, + "loss": 1.5912, + "step": 640 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003802860076482257, + "loss": 1.6562, + "step": 641 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003802142876751533, + "loss": 1.7789, + "step": 642 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003801424442675894, + "loss": 1.6394, + "step": 643 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038007047747474164, + "loss": 1.7994, + "step": 644 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037999838734590247, + "loss": 1.7582, + "step": 645 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037992617393044865, + "loss": 1.6606, + "step": 646 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037985383727784155, + "loss": 1.8093, + "step": 647 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003797813774376267, + "loss": 1.7377, + "step": 648 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003797087944594343, + "loss": 1.6768, + "step": 649 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003796360883929786, + "loss": 1.5083, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037956325928805833, + "loss": 1.7675, + "step": 651 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037949030719455654, + "loss": 1.678, + "step": 652 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003794172321624403, + "loss": 1.689, + "step": 653 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003793440342417611, + "loss": 1.5533, + "step": 654 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003792707134826546, + "loss": 1.6934, + "step": 655 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003791972699353404, + "loss": 1.6117, + "step": 656 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003791237036501223, + "loss": 1.6952, + "step": 657 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003790500146773883, + "loss": 1.6012, + "step": 658 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037897620306761025, + "loss": 1.6057, + "step": 659 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037890226887134405, + "loss": 1.5526, + "step": 660 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037882821213922963, + "loss": 1.5555, + "step": 661 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037875403292199074, + "loss": 1.4534, + "step": 662 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037867973127043514, + "loss": 1.6554, + "step": 663 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003786053072354544, + "loss": 1.7754, + "step": 664 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037853076086802383, + "loss": 1.8038, + "step": 665 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003784560922192027, + "loss": 1.6227, + "step": 666 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037838130134013384, + "loss": 1.5376, + "step": 667 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037830638828204396, + "loss": 1.7412, + "step": 668 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037823135309624336, + "loss": 1.5911, + "step": 669 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003781561958341261, + "loss": 1.9199, + "step": 670 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037808091654716963, + "loss": 1.5819, + "step": 671 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037800551528693535, + "loss": 1.5266, + "step": 672 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003779299921050678, + "loss": 1.4461, + "step": 673 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037785434705329523, + "loss": 1.5974, + "step": 674 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003777785801834294, + "loss": 1.5486, + "step": 675 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003777026915473654, + "loss": 1.6846, + "step": 676 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037762668119708175, + "loss": 1.4414, + "step": 677 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003775505491846404, + "loss": 1.455, + "step": 678 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003774742955621866, + "loss": 1.6806, + "step": 679 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003773979203819488, + "loss": 1.6142, + "step": 680 + }, + { + "epoch": 0.17, + "learning_rate": 0.00037732142369623886, + "loss": 1.6237, + "step": 681 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037724480555745174, + "loss": 1.6837, + "step": 682 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003771680660180657, + "loss": 1.5569, + "step": 683 + }, + { + "epoch": 0.18, + "learning_rate": 0.000377091205130642, + "loss": 1.7432, + "step": 684 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037701422294782514, + "loss": 1.6136, + "step": 685 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037693711952234275, + "loss": 1.6606, + "step": 686 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037685989490700533, + "loss": 1.8267, + "step": 687 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003767825491547065, + "loss": 1.6405, + "step": 688 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037670508231842277, + "loss": 1.7604, + "step": 689 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003766274944512137, + "loss": 1.7139, + "step": 690 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003765497856062216, + "loss": 1.6443, + "step": 691 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003764719558366719, + "loss": 1.6801, + "step": 692 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037639400519587243, + "loss": 1.5054, + "step": 693 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037631593373721434, + "loss": 1.6751, + "step": 694 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037623774151417105, + "loss": 1.7075, + "step": 695 + }, + { + "epoch": 0.18, + "learning_rate": 0.000376159428580299, + "loss": 1.6365, + "step": 696 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003760809949892371, + "loss": 1.6795, + "step": 697 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037600244079470717, + "loss": 1.754, + "step": 698 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003759237660505133, + "loss": 1.8013, + "step": 699 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037584497081054243, + "loss": 1.7363, + "step": 700 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037576605512876395, + "loss": 1.5492, + "step": 701 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003756870190592296, + "loss": 1.6954, + "step": 702 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037560786265607377, + "loss": 1.5803, + "step": 703 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003755285859735132, + "loss": 1.5493, + "step": 704 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037544918906584694, + "loss": 1.6476, + "step": 705 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003753696719874566, + "loss": 1.6574, + "step": 706 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037529003479280586, + "loss": 1.7977, + "step": 707 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003752102775364407, + "loss": 1.5634, + "step": 708 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003751304002729895, + "loss": 1.7496, + "step": 709 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003750504030571627, + "loss": 1.7538, + "step": 710 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037497028594375297, + "loss": 1.5127, + "step": 711 + }, + { + "epoch": 0.18, + "learning_rate": 0.000374890048987635, + "loss": 1.7468, + "step": 712 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037480969224376566, + "loss": 1.7455, + "step": 713 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003747292157671839, + "loss": 1.7747, + "step": 714 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037464861961301047, + "loss": 1.5947, + "step": 715 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037456790383644836, + "loss": 1.6351, + "step": 716 + }, + { + "epoch": 0.18, + "learning_rate": 0.00037448706849278236, + "loss": 1.66, + "step": 717 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003744061136373791, + "loss": 1.5671, + "step": 718 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003743250393256872, + "loss": 1.5424, + "step": 719 + }, + { + "epoch": 0.18, + "learning_rate": 0.000374243845613237, + "loss": 1.6223, + "step": 720 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037416253255564063, + "loss": 1.6559, + "step": 721 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003740811002085921, + "loss": 1.6577, + "step": 722 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003739995486278668, + "loss": 1.6025, + "step": 723 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003739178778693222, + "loss": 1.5758, + "step": 724 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037383608798889715, + "loss": 1.7676, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003737541790426121, + "loss": 1.7539, + "step": 726 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037367215108656906, + "loss": 1.5668, + "step": 727 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003735900041769516, + "loss": 1.668, + "step": 728 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003735077383700248, + "loss": 1.8307, + "step": 729 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003734253537221351, + "loss": 1.7394, + "step": 730 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003733428502897104, + "loss": 1.8586, + "step": 731 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037326022812925983, + "loss": 1.6289, + "step": 732 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037317748729737404, + "loss": 1.7485, + "step": 733 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037309462785072473, + "loss": 1.5752, + "step": 734 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003730116498460651, + "loss": 1.5966, + "step": 735 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037292855334022934, + "loss": 1.536, + "step": 736 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003728453383901329, + "loss": 1.5516, + "step": 737 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037276200505277233, + "loss": 1.7783, + "step": 738 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003726785533852254, + "loss": 1.6644, + "step": 739 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003725949834446506, + "loss": 1.5799, + "step": 740 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003725112952882878, + "loss": 1.726, + "step": 741 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003724274889734575, + "loss": 1.6901, + "step": 742 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003723435645575616, + "loss": 1.9424, + "step": 743 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003722595220980824, + "loss": 1.6959, + "step": 744 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003721753616525832, + "loss": 1.5724, + "step": 745 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003720910832787083, + "loss": 1.6969, + "step": 746 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037200668703418256, + "loss": 1.6828, + "step": 747 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037192217297681176, + "loss": 1.6534, + "step": 748 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037183754116448207, + "loss": 1.6635, + "step": 749 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037175279165516063, + "loss": 1.6784, + "step": 750 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003716679245068951, + "loss": 1.4693, + "step": 751 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037158293977781353, + "loss": 1.9277, + "step": 752 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003714978375261248, + "loss": 1.8126, + "step": 753 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003714126178101182, + "loss": 1.649, + "step": 754 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003713272806881633, + "loss": 1.6612, + "step": 755 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003712418262187103, + "loss": 1.626, + "step": 756 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003711562544602895, + "loss": 1.7009, + "step": 757 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037107056547151194, + "loss": 1.4813, + "step": 758 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003709847593110686, + "loss": 1.6061, + "step": 759 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037089883603773084, + "loss": 1.7086, + "step": 760 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037081279571035035, + "loss": 1.6072, + "step": 761 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037072663838785873, + "loss": 1.5301, + "step": 762 + }, + { + "epoch": 0.2, + "learning_rate": 0.000370640364129268, + "loss": 1.7812, + "step": 763 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037055397299367015, + "loss": 1.5838, + "step": 764 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037046746504023703, + "loss": 1.656, + "step": 765 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003703808403282209, + "loss": 1.4833, + "step": 766 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037029409891695364, + "loss": 1.6497, + "step": 767 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003702072408658473, + "loss": 1.7795, + "step": 768 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037012026623439357, + "loss": 1.776, + "step": 769 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037003317508216434, + "loss": 1.5213, + "step": 770 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003699459674688109, + "loss": 1.6342, + "step": 771 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003698586434540647, + "loss": 1.6278, + "step": 772 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003697712030977366, + "loss": 1.5861, + "step": 773 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036968364645971735, + "loss": 1.6302, + "step": 774 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036959597359997724, + "loss": 1.6089, + "step": 775 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036950818457856624, + "loss": 1.7833, + "step": 776 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036942027945561384, + "loss": 1.7146, + "step": 777 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003693322582913289, + "loss": 1.4829, + "step": 778 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003692441211460001, + "loss": 1.7925, + "step": 779 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036915586807999534, + "loss": 1.7343, + "step": 780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003690674991537618, + "loss": 1.5837, + "step": 781 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036897901442782636, + "loss": 1.6729, + "step": 782 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036889041396279495, + "loss": 1.6381, + "step": 783 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003688016978193528, + "loss": 1.6378, + "step": 784 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003687128660582644, + "loss": 1.6915, + "step": 785 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003686239187403736, + "loss": 1.8582, + "step": 786 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036853485592660304, + "loss": 1.4633, + "step": 787 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036844567767795484, + "loss": 1.5704, + "step": 788 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003683563840555099, + "loss": 1.6925, + "step": 789 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036826697512042827, + "loss": 1.6668, + "step": 790 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036817745093394906, + "loss": 1.8282, + "step": 791 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036808781155739015, + "loss": 1.607, + "step": 792 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036799805705214846, + "loss": 1.6484, + "step": 793 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003679081874796996, + "loss": 1.5758, + "step": 794 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003678182029015982, + "loss": 1.6793, + "step": 795 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003677281033794775, + "loss": 1.5645, + "step": 796 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036763788897504953, + "loss": 1.6874, + "step": 797 + }, + { + "epoch": 0.2, + "learning_rate": 0.000367547559750105, + "loss": 1.5802, + "step": 798 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003674571157665133, + "loss": 1.5528, + "step": 799 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003673665570862224, + "loss": 1.5414, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003672758837712587, + "loss": 1.6786, + "step": 801 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003671850958837274, + "loss": 1.5255, + "step": 802 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003670941934858119, + "loss": 1.5688, + "step": 803 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003670031766397742, + "loss": 1.5886, + "step": 804 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003669120454079545, + "loss": 1.89, + "step": 805 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036682079985277165, + "loss": 1.5677, + "step": 806 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036672944003672255, + "loss": 1.6519, + "step": 807 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036663796602238243, + "loss": 1.7055, + "step": 808 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036654637787240474, + "loss": 1.5883, + "step": 809 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036645467564952114, + "loss": 1.61, + "step": 810 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003663628594165414, + "loss": 1.6775, + "step": 811 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003662709292363534, + "loss": 1.4352, + "step": 812 + }, + { + "epoch": 0.21, + "learning_rate": 0.000366178885171923, + "loss": 1.5655, + "step": 813 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003660867272862941, + "loss": 1.5255, + "step": 814 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036599445564258857, + "loss": 1.6361, + "step": 815 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003659020703040062, + "loss": 1.4347, + "step": 816 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003658095713338247, + "loss": 1.5647, + "step": 817 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003657169587953994, + "loss": 1.7996, + "step": 818 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036562423275216373, + "loss": 1.5662, + "step": 819 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036553139326762863, + "loss": 1.7122, + "step": 820 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003654384404053828, + "loss": 1.6529, + "step": 821 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036534537422909264, + "loss": 1.5574, + "step": 822 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036525219480250214, + "loss": 1.6851, + "step": 823 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003651589021894328, + "loss": 1.7735, + "step": 824 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036506549645378377, + "loss": 1.711, + "step": 825 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003649719776595315, + "loss": 1.7488, + "step": 826 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036487834587073007, + "loss": 1.6816, + "step": 827 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036478460115151084, + "loss": 1.7147, + "step": 828 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003646907435660826, + "loss": 1.6348, + "step": 829 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003645967731787313, + "loss": 1.5697, + "step": 830 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003645026900538203, + "loss": 1.756, + "step": 831 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003644084942557901, + "loss": 1.5116, + "step": 832 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003643141858491584, + "loss": 1.6375, + "step": 833 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036421976489852006, + "loss": 1.6181, + "step": 834 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003641252314685469, + "loss": 1.4172, + "step": 835 + }, + { + "epoch": 0.21, + "learning_rate": 0.000364030585623988, + "loss": 1.544, + "step": 836 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003639358274296692, + "loss": 1.6789, + "step": 837 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036384095695049333, + "loss": 1.6391, + "step": 838 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036374597425144044, + "loss": 1.6257, + "step": 839 + }, + { + "epoch": 0.22, + "learning_rate": 0.000363650879397567, + "loss": 1.5339, + "step": 840 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036355567245400654, + "loss": 1.7024, + "step": 841 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036346035348596936, + "loss": 1.5804, + "step": 842 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036336492255874237, + "loss": 1.4643, + "step": 843 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003632693797376893, + "loss": 1.4877, + "step": 844 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036317372508825056, + "loss": 1.4627, + "step": 845 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003630779586759429, + "loss": 1.7036, + "step": 846 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036298208056635976, + "loss": 1.6852, + "step": 847 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036288609082517126, + "loss": 1.6766, + "step": 848 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036278998951812377, + "loss": 1.5677, + "step": 849 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003626937767110401, + "loss": 1.4498, + "step": 850 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036259745246981935, + "loss": 1.6757, + "step": 851 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003625010168604373, + "loss": 1.5938, + "step": 852 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036240446994894557, + "loss": 1.7743, + "step": 853 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003623078118014722, + "loss": 1.7202, + "step": 854 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003622110424842216, + "loss": 1.627, + "step": 855 + }, + { + "epoch": 0.22, + "learning_rate": 0.000362114162063474, + "loss": 1.568, + "step": 856 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003620171706055859, + "loss": 1.676, + "step": 857 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003619200681769899, + "loss": 1.6625, + "step": 858 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036182285484419443, + "loss": 1.6611, + "step": 859 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003617255306737841, + "loss": 1.8309, + "step": 860 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036162809573241924, + "loss": 1.5852, + "step": 861 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003615305500868362, + "loss": 1.7406, + "step": 862 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036143289380384703, + "loss": 1.4455, + "step": 863 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003613351269503397, + "loss": 1.601, + "step": 864 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003612372495932777, + "loss": 1.7066, + "step": 865 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003611392617997006, + "loss": 1.6644, + "step": 866 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036104116363672303, + "loss": 1.5774, + "step": 867 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003609429551715358, + "loss": 1.6126, + "step": 868 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036084463647140485, + "loss": 1.6447, + "step": 869 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036074620760367183, + "loss": 1.5225, + "step": 870 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003606476686357538, + "loss": 1.541, + "step": 871 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003605490196351433, + "loss": 1.4021, + "step": 872 + }, + { + "epoch": 0.22, + "learning_rate": 0.000360450260669408, + "loss": 1.6662, + "step": 873 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036035139180619124, + "loss": 1.6792, + "step": 874 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003602524131132113, + "loss": 1.5839, + "step": 875 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003601533246582619, + "loss": 1.8402, + "step": 876 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003600541265092119, + "loss": 1.556, + "step": 877 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003599548187340052, + "loss": 1.6817, + "step": 878 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035985540140066083, + "loss": 1.7084, + "step": 879 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035975587457727303, + "loss": 1.7476, + "step": 880 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003596562383320107, + "loss": 1.6483, + "step": 881 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035955649273311795, + "loss": 1.6566, + "step": 882 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035945663784891374, + "loss": 1.6778, + "step": 883 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003593566737477918, + "loss": 1.7335, + "step": 884 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003592566004982207, + "loss": 1.6062, + "step": 885 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035915641816874395, + "loss": 1.8055, + "step": 886 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035905612682797945, + "loss": 1.7148, + "step": 887 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003589557265446199, + "loss": 1.5053, + "step": 888 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035885521738743275, + "loss": 1.5616, + "step": 889 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035875459942525984, + "loss": 1.6662, + "step": 890 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003586538727270177, + "loss": 1.6985, + "step": 891 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003585530373616972, + "loss": 1.8217, + "step": 892 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003584520933983636, + "loss": 1.4381, + "step": 893 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035835104090615674, + "loss": 1.6774, + "step": 894 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003582498799542907, + "loss": 1.5872, + "step": 895 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035814861061205373, + "loss": 1.5814, + "step": 896 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003580472329488084, + "loss": 1.7271, + "step": 897 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003579457470339917, + "loss": 1.7893, + "step": 898 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003578441529371143, + "loss": 1.6335, + "step": 899 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035774245072776146, + "loss": 1.4715, + "step": 900 + }, + { + "epoch": 0.23, + "learning_rate": 0.000357640640475592, + "loss": 1.5212, + "step": 901 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035753872225033925, + "loss": 1.4646, + "step": 902 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035743669612181003, + "loss": 1.6283, + "step": 903 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035733456215988543, + "loss": 1.5013, + "step": 904 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003572323204345202, + "loss": 1.6665, + "step": 905 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003571299710157429, + "loss": 1.6076, + "step": 906 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003570275139736559, + "loss": 1.8144, + "step": 907 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035692494937843535, + "loss": 1.6256, + "step": 908 + }, + { + "epoch": 0.23, + "learning_rate": 0.000356822277300331, + "loss": 1.7083, + "step": 909 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003567194978096661, + "loss": 1.6267, + "step": 910 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003566166109768378, + "loss": 1.6438, + "step": 911 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035651361687231634, + "loss": 1.5485, + "step": 912 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003564105155666458, + "loss": 1.4371, + "step": 913 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003563073071304434, + "loss": 1.5437, + "step": 914 + }, + { + "epoch": 0.23, + "learning_rate": 0.00035620399163440006, + "loss": 1.5017, + "step": 915 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003561005691492797, + "loss": 1.6389, + "step": 916 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003559970397459197, + "loss": 1.464, + "step": 917 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035589340349523075, + "loss": 1.6925, + "step": 918 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003557896604681965, + "loss": 1.7354, + "step": 919 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035568581073587373, + "loss": 1.5807, + "step": 920 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035558185436939267, + "loss": 1.638, + "step": 921 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003554777914399562, + "loss": 1.4431, + "step": 922 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003553736220188403, + "loss": 1.4802, + "step": 923 + }, + { + "epoch": 0.24, + "learning_rate": 0.000355269346177394, + "loss": 1.6591, + "step": 924 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003551649639870391, + "loss": 1.7172, + "step": 925 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003550604755192703, + "loss": 1.456, + "step": 926 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003549558808456551, + "loss": 1.7723, + "step": 927 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003548511800378336, + "loss": 1.7289, + "step": 928 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035474637316751877, + "loss": 1.7105, + "step": 929 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003546414603064962, + "loss": 1.6791, + "step": 930 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035453644152662393, + "loss": 1.6817, + "step": 931 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035443131689983287, + "loss": 1.7588, + "step": 932 + }, + { + "epoch": 0.24, + "learning_rate": 0.000354326086498126, + "loss": 1.6242, + "step": 933 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003542207503935791, + "loss": 1.6501, + "step": 934 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003541153086583402, + "loss": 1.6213, + "step": 935 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003540097613646296, + "loss": 1.7623, + "step": 936 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035390410858474015, + "loss": 1.6942, + "step": 937 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035379835039103666, + "loss": 1.7283, + "step": 938 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003536924868559563, + "loss": 1.7197, + "step": 939 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003535865180520084, + "loss": 1.5329, + "step": 940 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035348044405177434, + "loss": 1.6563, + "step": 941 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003533742649279076, + "loss": 1.5709, + "step": 942 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003532679807531336, + "loss": 1.7297, + "step": 943 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035316159160024986, + "loss": 1.5328, + "step": 944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003530550975421255, + "loss": 1.6089, + "step": 945 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035294849865170186, + "loss": 1.5144, + "step": 946 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003528417950019918, + "loss": 1.6538, + "step": 947 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035273498666608006, + "loss": 1.6211, + "step": 948 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003526280737171231, + "loss": 1.5184, + "step": 949 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035252105622834897, + "loss": 1.6793, + "step": 950 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003524139342730573, + "loss": 1.6752, + "step": 951 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035230670792461946, + "loss": 1.5023, + "step": 952 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035219937725647804, + "loss": 1.6061, + "step": 953 + }, + { + "epoch": 0.24, + "learning_rate": 0.00035209194234214725, + "loss": 1.6007, + "step": 954 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035198440325521276, + "loss": 1.6313, + "step": 955 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003518767600693314, + "loss": 1.6372, + "step": 956 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003517690128582315, + "loss": 1.6819, + "step": 957 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003516611616957125, + "loss": 1.6247, + "step": 958 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003515532066556451, + "loss": 1.4948, + "step": 959 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035144514781197114, + "loss": 1.5885, + "step": 960 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035133698523870355, + "loss": 1.531, + "step": 961 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035122871900992634, + "loss": 1.6559, + "step": 962 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003511203491997944, + "loss": 1.6803, + "step": 963 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035101187588253374, + "loss": 1.6133, + "step": 964 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035090329913244116, + "loss": 1.5852, + "step": 965 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003507946190238843, + "loss": 1.6007, + "step": 966 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035068583563130167, + "loss": 1.6832, + "step": 967 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035057694902920246, + "loss": 1.6597, + "step": 968 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003504679592921666, + "loss": 1.664, + "step": 969 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003503588664948445, + "loss": 1.5242, + "step": 970 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035024967071195734, + "loss": 1.7077, + "step": 971 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003501403720182969, + "loss": 1.6798, + "step": 972 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035003097048872517, + "loss": 1.71, + "step": 973 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034992146619817474, + "loss": 1.5513, + "step": 974 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003498118592216487, + "loss": 1.7077, + "step": 975 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034970214963422024, + "loss": 1.7366, + "step": 976 + }, + { + "epoch": 0.25, + "learning_rate": 0.000349592337511033, + "loss": 1.7018, + "step": 977 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034948242292730074, + "loss": 1.7332, + "step": 978 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003493724059583075, + "loss": 1.8433, + "step": 979 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034926228667940746, + "loss": 1.6216, + "step": 980 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034915206516602463, + "loss": 1.6092, + "step": 981 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003490417414936534, + "loss": 1.722, + "step": 982 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034893131573785787, + "loss": 1.5727, + "step": 983 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034882078797427216, + "loss": 1.5996, + "step": 984 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034871015827860026, + "loss": 1.5437, + "step": 985 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034859942672661596, + "loss": 1.6642, + "step": 986 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003484885933941628, + "loss": 1.5446, + "step": 987 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034837765835715414, + "loss": 1.4511, + "step": 988 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034826662169157275, + "loss": 1.5277, + "step": 989 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034815548347347133, + "loss": 1.7051, + "step": 990 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003480442437789718, + "loss": 1.8108, + "step": 991 + }, + { + "epoch": 0.25, + "learning_rate": 0.00034793290268426595, + "loss": 1.8663, + "step": 992 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003478214602656148, + "loss": 1.6647, + "step": 993 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003477099165993487, + "loss": 1.7646, + "step": 994 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003475982717618675, + "loss": 1.5646, + "step": 995 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034748652582964034, + "loss": 1.5051, + "step": 996 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003473746788792055, + "loss": 1.7521, + "step": 997 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034726273098717057, + "loss": 1.7792, + "step": 998 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003471506822302121, + "loss": 1.6175, + "step": 999 + }, + { + "epoch": 0.26, + "learning_rate": 0.000347038532685076, + "loss": 1.6643, + "step": 1000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003469262824285769, + "loss": 1.613, + "step": 1001 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003468139315375987, + "loss": 1.6862, + "step": 1002 + }, + { + "epoch": 0.26, + "learning_rate": 0.000346701480089094, + "loss": 1.6329, + "step": 1003 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003465889281600845, + "loss": 1.5296, + "step": 1004 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003464762758276605, + "loss": 1.6299, + "step": 1005 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003463635231689811, + "loss": 1.5426, + "step": 1006 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034625067026127426, + "loss": 1.5857, + "step": 1007 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034613771718183654, + "loss": 1.7032, + "step": 1008 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003460246640080331, + "loss": 1.7083, + "step": 1009 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034591151081729755, + "loss": 1.6932, + "step": 1010 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034579825768713213, + "loss": 1.6534, + "step": 1011 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034568490469510764, + "loss": 1.5892, + "step": 1012 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034557145191886293, + "loss": 1.6582, + "step": 1013 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034545789943610564, + "loss": 1.5769, + "step": 1014 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034534424732461134, + "loss": 1.5182, + "step": 1015 + }, + { + "epoch": 0.26, + "learning_rate": 0.000345230495662224, + "loss": 1.6186, + "step": 1016 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003451166445268558, + "loss": 1.745, + "step": 1017 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003450026939964868, + "loss": 1.3753, + "step": 1018 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034488864414916555, + "loss": 1.5549, + "step": 1019 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034477449506300844, + "loss": 1.5743, + "step": 1020 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034466024681619964, + "loss": 1.5706, + "step": 1021 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003445458994869915, + "loss": 1.4817, + "step": 1022 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003444314531537041, + "loss": 1.7491, + "step": 1023 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034431690789472533, + "loss": 1.538, + "step": 1024 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034420226378851105, + "loss": 1.497, + "step": 1025 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003440875209135845, + "loss": 1.6419, + "step": 1026 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034397267934853674, + "loss": 1.6232, + "step": 1027 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003438577391720264, + "loss": 1.4944, + "step": 1028 + }, + { + "epoch": 0.26, + "learning_rate": 0.00034374270046277965, + "loss": 1.4733, + "step": 1029 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003436275632995903, + "loss": 1.703, + "step": 1030 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003435123277613193, + "loss": 1.6112, + "step": 1031 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003433969939268951, + "loss": 1.6088, + "step": 1032 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034328156187531366, + "loss": 1.6745, + "step": 1033 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034316603168563795, + "loss": 1.6352, + "step": 1034 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034305040343699834, + "loss": 1.7108, + "step": 1035 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034293467720859224, + "loss": 1.5367, + "step": 1036 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003428188530796842, + "loss": 1.7411, + "step": 1037 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034270293112960583, + "loss": 1.6243, + "step": 1038 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034258691143775586, + "loss": 1.8486, + "step": 1039 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003424707940835998, + "loss": 1.4878, + "step": 1040 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034235457914667014, + "loss": 1.8384, + "step": 1041 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003422382667065662, + "loss": 1.7218, + "step": 1042 + }, + { + "epoch": 0.27, + "learning_rate": 0.000342121856842954, + "loss": 1.5259, + "step": 1043 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003420053496355663, + "loss": 1.7574, + "step": 1044 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003418887451642028, + "loss": 1.6499, + "step": 1045 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034177204350872934, + "loss": 1.6151, + "step": 1046 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003416552447490788, + "loss": 1.5587, + "step": 1047 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034153834896525037, + "loss": 1.564, + "step": 1048 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003414213562373095, + "loss": 1.5364, + "step": 1049 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003413042666453884, + "loss": 1.6243, + "step": 1050 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034118708026968533, + "loss": 1.764, + "step": 1051 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003410697971904651, + "loss": 1.6464, + "step": 1052 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003409524174880585, + "loss": 1.5382, + "step": 1053 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034083494124286266, + "loss": 1.5711, + "step": 1054 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003407173685353408, + "loss": 1.668, + "step": 1055 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003405996994460221, + "loss": 1.4977, + "step": 1056 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034048193405550206, + "loss": 1.5744, + "step": 1057 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003403640724444418, + "loss": 1.5391, + "step": 1058 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003402461146935685, + "loss": 1.6653, + "step": 1059 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003401280608836752, + "loss": 1.6873, + "step": 1060 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003400099110956206, + "loss": 1.8143, + "step": 1061 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003398916654103294, + "loss": 1.6314, + "step": 1062 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003397733239087918, + "loss": 1.5244, + "step": 1063 + }, + { + "epoch": 0.27, + "learning_rate": 0.00033965488667206357, + "loss": 1.5605, + "step": 1064 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003395363537812662, + "loss": 1.6605, + "step": 1065 + }, + { + "epoch": 0.27, + "learning_rate": 0.00033941772531758654, + "loss": 1.5791, + "step": 1066 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003392990013622771, + "loss": 1.6512, + "step": 1067 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003391801819966557, + "loss": 1.4507, + "step": 1068 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003390612673021054, + "loss": 1.3856, + "step": 1069 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003389422573600747, + "loss": 1.476, + "step": 1070 + }, + { + "epoch": 0.27, + "learning_rate": 0.00033882315225207735, + "loss": 1.4502, + "step": 1071 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003387039520596921, + "loss": 1.4469, + "step": 1072 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033858465686456313, + "loss": 1.6909, + "step": 1073 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003384652667483993, + "loss": 1.6034, + "step": 1074 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003383457817929748, + "loss": 1.6907, + "step": 1075 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033822620208012863, + "loss": 1.4227, + "step": 1076 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033810652769176487, + "loss": 1.703, + "step": 1077 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033798675870985213, + "loss": 1.4151, + "step": 1078 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033786689521642414, + "loss": 1.8087, + "step": 1079 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003377469372935791, + "loss": 1.6159, + "step": 1080 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033762688502348006, + "loss": 1.5592, + "step": 1081 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003375067384883546, + "loss": 1.5487, + "step": 1082 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003373864977704949, + "loss": 1.6412, + "step": 1083 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033726616295225777, + "loss": 1.5919, + "step": 1084 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033714573411606423, + "loss": 1.5356, + "step": 1085 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003370252113443998, + "loss": 1.5686, + "step": 1086 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033690459471981437, + "loss": 1.5398, + "step": 1087 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033678388432492216, + "loss": 1.5646, + "step": 1088 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003366630802424015, + "loss": 1.7746, + "step": 1089 + }, + { + "epoch": 0.28, + "learning_rate": 0.000336542182554995, + "loss": 1.5381, + "step": 1090 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003364211913455092, + "loss": 1.6791, + "step": 1091 + }, + { + "epoch": 0.28, + "learning_rate": 0.000336300106696815, + "loss": 1.6803, + "step": 1092 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033617892869184697, + "loss": 1.5346, + "step": 1093 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003360576574136038, + "loss": 1.6946, + "step": 1094 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033593629294514823, + "loss": 1.5399, + "step": 1095 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003358148353696064, + "loss": 1.5099, + "step": 1096 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033569328477016857, + "loss": 1.4843, + "step": 1097 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003355716412300886, + "loss": 1.6703, + "step": 1098 + }, + { + "epoch": 0.28, + "learning_rate": 0.000335449904832684, + "loss": 1.6933, + "step": 1099 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033532807566133595, + "loss": 1.6817, + "step": 1100 + }, + { + "epoch": 0.28, + "learning_rate": 0.000335206153799489, + "loss": 1.631, + "step": 1101 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033508413933065143, + "loss": 1.7545, + "step": 1102 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003349620323383948, + "loss": 1.5482, + "step": 1103 + }, + { + "epoch": 0.28, + "learning_rate": 0.000334839832906354, + "loss": 1.5616, + "step": 1104 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003347175411182274, + "loss": 1.6343, + "step": 1105 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033459515705777643, + "loss": 1.5963, + "step": 1106 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033447268080882586, + "loss": 1.5286, + "step": 1107 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033435011245526366, + "loss": 1.5815, + "step": 1108 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003342274520810406, + "loss": 1.5509, + "step": 1109 + }, + { + "epoch": 0.28, + "learning_rate": 0.00033410469977017084, + "loss": 1.5919, + "step": 1110 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003339818556067313, + "loss": 1.523, + "step": 1111 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033385891967486185, + "loss": 1.5834, + "step": 1112 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033373589205876506, + "loss": 1.3742, + "step": 1113 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003336127728427067, + "loss": 1.5829, + "step": 1114 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033348956211101487, + "loss": 1.7769, + "step": 1115 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003333662599480806, + "loss": 1.5555, + "step": 1116 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003332428664383574, + "loss": 1.5663, + "step": 1117 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033311938166636143, + "loss": 1.682, + "step": 1118 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033299580571667127, + "loss": 1.6078, + "step": 1119 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003328721386739282, + "loss": 1.5645, + "step": 1120 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003327483806228355, + "loss": 1.6554, + "step": 1121 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033262453164815904, + "loss": 1.647, + "step": 1122 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033250059183472706, + "loss": 1.6142, + "step": 1123 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033237656126742967, + "loss": 1.3371, + "step": 1124 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033225244003121947, + "loss": 1.7582, + "step": 1125 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033212822821111104, + "loss": 1.5812, + "step": 1126 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003320039258921809, + "loss": 1.6319, + "step": 1127 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003318795331595678, + "loss": 1.6679, + "step": 1128 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003317550500984722, + "loss": 1.5731, + "step": 1129 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003316304767941564, + "loss": 1.7698, + "step": 1130 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033150581333194493, + "loss": 1.6733, + "step": 1131 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003313810597972234, + "loss": 1.4349, + "step": 1132 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033125621627543966, + "loss": 1.5253, + "step": 1133 + }, + { + "epoch": 0.29, + "learning_rate": 0.000331131282852103, + "loss": 1.7064, + "step": 1134 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003310062596127842, + "loss": 1.6397, + "step": 1135 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003308811466431157, + "loss": 1.8502, + "step": 1136 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003307559440287914, + "loss": 1.5914, + "step": 1137 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033063065185556654, + "loss": 1.6453, + "step": 1138 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003305052702092576, + "loss": 1.5936, + "step": 1139 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003303797991757425, + "loss": 1.5098, + "step": 1140 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033025423884096045, + "loss": 1.5962, + "step": 1141 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003301285892909116, + "loss": 1.6867, + "step": 1142 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003300028506116573, + "loss": 1.5216, + "step": 1143 + }, + { + "epoch": 0.29, + "learning_rate": 0.00032987702288932, + "loss": 1.6307, + "step": 1144 + }, + { + "epoch": 0.29, + "learning_rate": 0.00032975110621008324, + "loss": 1.7512, + "step": 1145 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003296251006601911, + "loss": 1.607, + "step": 1146 + }, + { + "epoch": 0.29, + "learning_rate": 0.00032949900632594896, + "loss": 1.6309, + "step": 1147 + }, + { + "epoch": 0.29, + "learning_rate": 0.00032937282329372283, + "loss": 1.6502, + "step": 1148 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003292465516499394, + "loss": 1.5231, + "step": 1149 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032912019148108616, + "loss": 1.5893, + "step": 1150 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032899374287371125, + "loss": 1.5379, + "step": 1151 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032886720591442326, + "loss": 1.5696, + "step": 1152 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032874058068989146, + "loss": 1.5037, + "step": 1153 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032861386728684543, + "loss": 1.608, + "step": 1154 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032848706579207527, + "loss": 1.5589, + "step": 1155 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003283601762924312, + "loss": 1.5463, + "step": 1156 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003282331988748241, + "loss": 1.5777, + "step": 1157 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003281061336262246, + "loss": 1.6662, + "step": 1158 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003279789806336639, + "loss": 1.5816, + "step": 1159 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003278517399842331, + "loss": 1.5934, + "step": 1160 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032772441176508323, + "loss": 1.641, + "step": 1161 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003275969960634256, + "loss": 1.6322, + "step": 1162 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003274694929665312, + "loss": 1.5589, + "step": 1163 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003273419025617309, + "loss": 1.6211, + "step": 1164 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003272142249364155, + "loss": 1.734, + "step": 1165 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003270864601780355, + "loss": 1.7946, + "step": 1166 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003269586083741009, + "loss": 1.4866, + "step": 1167 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032683066961218163, + "loss": 1.5883, + "step": 1168 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003267026439799069, + "loss": 1.6302, + "step": 1169 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003265745315649656, + "loss": 1.461, + "step": 1170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032644633245510586, + "loss": 1.7097, + "step": 1171 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032631804673813547, + "loss": 1.4899, + "step": 1172 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003261896745019214, + "loss": 1.5518, + "step": 1173 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032606121583438974, + "loss": 1.5662, + "step": 1174 + }, + { + "epoch": 0.3, + "learning_rate": 0.000325932670823526, + "loss": 1.5803, + "step": 1175 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003258040395573747, + "loss": 1.6027, + "step": 1176 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032567532212403956, + "loss": 1.636, + "step": 1177 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003255465186116832, + "loss": 1.6611, + "step": 1178 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003254176291085272, + "loss": 1.5739, + "step": 1179 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032528865370285207, + "loss": 1.6826, + "step": 1180 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003251595924829973, + "loss": 1.542, + "step": 1181 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003250304455373608, + "loss": 1.4313, + "step": 1182 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032490121295439966, + "loss": 1.7157, + "step": 1183 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003247718948226292, + "loss": 1.4181, + "step": 1184 + }, + { + "epoch": 0.3, + "learning_rate": 0.00032464249123062355, + "loss": 1.6829, + "step": 1185 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003245130022670154, + "loss": 1.5138, + "step": 1186 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003243834280204958, + "loss": 1.4924, + "step": 1187 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003242537685798143, + "loss": 1.6462, + "step": 1188 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032412402403377883, + "loss": 1.5998, + "step": 1189 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003239941944712554, + "loss": 1.7109, + "step": 1190 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003238642799811685, + "loss": 1.5108, + "step": 1191 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003237342806525007, + "loss": 1.6894, + "step": 1192 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032360419657429266, + "loss": 1.7361, + "step": 1193 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003234740278356431, + "loss": 1.7535, + "step": 1194 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003233437745257087, + "loss": 1.5421, + "step": 1195 + }, + { + "epoch": 0.31, + "learning_rate": 0.000323213436733704, + "loss": 1.7633, + "step": 1196 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003230830145489017, + "loss": 1.5741, + "step": 1197 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032295250806063197, + "loss": 1.6461, + "step": 1198 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032282191735828286, + "loss": 1.5867, + "step": 1199 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032269124253130017, + "loss": 1.6033, + "step": 1200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003225604836691871, + "loss": 1.5716, + "step": 1201 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032242964086150465, + "loss": 1.6491, + "step": 1202 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032229871419787125, + "loss": 1.6562, + "step": 1203 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032216770376796263, + "loss": 1.5501, + "step": 1204 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003220366096615121, + "loss": 1.7118, + "step": 1205 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032190543196831007, + "loss": 1.6797, + "step": 1206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032177417077820434, + "loss": 1.5592, + "step": 1207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032164282618109995, + "loss": 1.63, + "step": 1208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032151139826695893, + "loss": 1.5523, + "step": 1209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032137988712580034, + "loss": 1.554, + "step": 1210 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003212482928477005, + "loss": 1.5144, + "step": 1211 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032111661552279244, + "loss": 1.6007, + "step": 1212 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032098485524126617, + "loss": 1.6295, + "step": 1213 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003208530120933684, + "loss": 1.7251, + "step": 1214 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003207210861694028, + "loss": 1.4659, + "step": 1215 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003205890775597296, + "loss": 1.7953, + "step": 1216 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003204569863547657, + "loss": 1.5445, + "step": 1217 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003203248126449845, + "loss": 1.6421, + "step": 1218 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032019255652091606, + "loss": 1.5815, + "step": 1219 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003200602180731467, + "loss": 1.6216, + "step": 1220 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003199277973923194, + "loss": 1.5336, + "step": 1221 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003197952945691331, + "loss": 1.494, + "step": 1222 + }, + { + "epoch": 0.31, + "learning_rate": 0.00031966270969434324, + "loss": 1.5711, + "step": 1223 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003195300428587615, + "loss": 1.589, + "step": 1224 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003193972941532556, + "loss": 1.5672, + "step": 1225 + }, + { + "epoch": 0.31, + "learning_rate": 0.00031926446366874916, + "loss": 1.5589, + "step": 1226 + }, + { + "epoch": 0.31, + "learning_rate": 0.00031913155149622223, + "loss": 1.5723, + "step": 1227 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031899855772671043, + "loss": 1.4558, + "step": 1228 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031886548245130557, + "loss": 1.543, + "step": 1229 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031873232576115504, + "loss": 1.7061, + "step": 1230 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031859908774746205, + "loss": 1.6974, + "step": 1231 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003184657685014856, + "loss": 1.6647, + "step": 1232 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003183323681145402, + "loss": 1.5667, + "step": 1233 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003181988866779962, + "loss": 1.4718, + "step": 1234 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003180653242832791, + "loss": 1.6761, + "step": 1235 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003179316810218701, + "loss": 1.5363, + "step": 1236 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003177979569853057, + "loss": 1.6697, + "step": 1237 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003176641522651778, + "loss": 1.7792, + "step": 1238 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003175302669531333, + "loss": 1.6021, + "step": 1239 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031739630114087483, + "loss": 1.6237, + "step": 1240 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003172622549201596, + "loss": 1.3089, + "step": 1241 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003171281283828001, + "loss": 1.6997, + "step": 1242 + }, + { + "epoch": 0.32, + "learning_rate": 0.000316993921620664, + "loss": 1.6001, + "step": 1243 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003168596347256737, + "loss": 1.7336, + "step": 1244 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031672526778980657, + "loss": 1.6224, + "step": 1245 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003165908209050948, + "loss": 1.5767, + "step": 1246 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003164562941636254, + "loss": 1.6763, + "step": 1247 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031632168765753985, + "loss": 1.5773, + "step": 1248 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031618700147903456, + "loss": 1.5524, + "step": 1249 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031605223572036027, + "loss": 1.7016, + "step": 1250 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003159173904738224, + "loss": 1.642, + "step": 1251 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003157824658317808, + "loss": 1.4636, + "step": 1252 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003156474618866495, + "loss": 1.6121, + "step": 1253 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031551237873089706, + "loss": 1.5131, + "step": 1254 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031537721645704634, + "loss": 1.6786, + "step": 1255 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003152419751576741, + "loss": 1.6942, + "step": 1256 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031510665492541143, + "loss": 1.5515, + "step": 1257 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031497125585294353, + "loss": 1.5231, + "step": 1258 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031483577803300946, + "loss": 1.7488, + "step": 1259 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003147002215584023, + "loss": 1.4791, + "step": 1260 + }, + { + "epoch": 0.32, + "learning_rate": 0.00031456458652196915, + "loss": 1.7005, + "step": 1261 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003144288730166105, + "loss": 1.7452, + "step": 1262 + }, + { + "epoch": 0.32, + "learning_rate": 0.000314293081135281, + "loss": 1.8606, + "step": 1263 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003141572109709887, + "loss": 1.6484, + "step": 1264 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003140212626167955, + "loss": 1.7995, + "step": 1265 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003138852361658167, + "loss": 1.604, + "step": 1266 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003137491317112212, + "loss": 1.7919, + "step": 1267 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031361294934623114, + "loss": 1.6045, + "step": 1268 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031347668916412234, + "loss": 1.7168, + "step": 1269 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003133403512582235, + "loss": 1.5723, + "step": 1270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031320393572191696, + "loss": 1.589, + "step": 1271 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031306744264863806, + "loss": 1.4084, + "step": 1272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003129308721318751, + "loss": 1.6013, + "step": 1273 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003127942242651696, + "loss": 1.6407, + "step": 1274 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003126574991421162, + "loss": 1.6853, + "step": 1275 + }, + { + "epoch": 0.33, + "learning_rate": 0.000312520696856362, + "loss": 1.4918, + "step": 1276 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031238381750160754, + "loss": 1.6117, + "step": 1277 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031224686117160556, + "loss": 1.5732, + "step": 1278 + }, + { + "epoch": 0.33, + "learning_rate": 0.000312109827960162, + "loss": 1.6469, + "step": 1279 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003119727179611351, + "loss": 1.5207, + "step": 1280 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031183553126843603, + "loss": 1.6142, + "step": 1281 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031169826797602815, + "loss": 1.666, + "step": 1282 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031156092817792753, + "loss": 1.6464, + "step": 1283 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003114235119682026, + "loss": 1.7152, + "step": 1284 + }, + { + "epoch": 0.33, + "learning_rate": 0.000311286019440974, + "loss": 1.6024, + "step": 1285 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003111484506904148, + "loss": 1.5294, + "step": 1286 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003110108058107502, + "loss": 1.839, + "step": 1287 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003108730848962575, + "loss": 1.5395, + "step": 1288 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031073528804126616, + "loss": 1.5586, + "step": 1289 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003105974153401577, + "loss": 1.6043, + "step": 1290 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003104594668873653, + "loss": 1.621, + "step": 1291 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003103214427773745, + "loss": 1.643, + "step": 1292 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031018334310472227, + "loss": 1.5992, + "step": 1293 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003100451679639974, + "loss": 1.6329, + "step": 1294 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003099069174498406, + "loss": 1.4865, + "step": 1295 + }, + { + "epoch": 0.33, + "learning_rate": 0.00030976859165694385, + "loss": 1.6112, + "step": 1296 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003096301906800511, + "loss": 1.4441, + "step": 1297 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003094917146139573, + "loss": 1.6196, + "step": 1298 + }, + { + "epoch": 0.33, + "learning_rate": 0.00030935316355350934, + "loss": 1.6713, + "step": 1299 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003092145375936052, + "loss": 1.6121, + "step": 1300 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003090758368291941, + "loss": 1.6813, + "step": 1301 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003089370613552767, + "loss": 1.6353, + "step": 1302 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003087982112669047, + "loss": 1.4749, + "step": 1303 + }, + { + "epoch": 0.33, + "learning_rate": 0.00030865928665918096, + "loss": 1.5434, + "step": 1304 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003085202876272593, + "loss": 1.7776, + "step": 1305 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030838121426634467, + "loss": 1.6476, + "step": 1306 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003082420666716927, + "loss": 1.5497, + "step": 1307 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030810284493861006, + "loss": 1.6757, + "step": 1308 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030796354916245425, + "loss": 1.5378, + "step": 1309 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030782417943863317, + "loss": 1.6727, + "step": 1310 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030768473586260573, + "loss": 1.6966, + "step": 1311 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003075452185298812, + "loss": 1.6096, + "step": 1312 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003074056275360194, + "loss": 1.8107, + "step": 1313 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030726596297663076, + "loss": 1.5413, + "step": 1314 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030712622494737584, + "loss": 1.6315, + "step": 1315 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030698641354396576, + "loss": 1.5972, + "step": 1316 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030684652886216174, + "loss": 1.5292, + "step": 1317 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030670657099777517, + "loss": 1.6484, + "step": 1318 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003065665400466678, + "loss": 1.624, + "step": 1319 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030642643610475116, + "loss": 1.4635, + "step": 1320 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003062862592679869, + "loss": 1.5574, + "step": 1321 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030614600963238666, + "loss": 1.6276, + "step": 1322 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003060056872940118, + "loss": 1.5693, + "step": 1323 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003058652923489736, + "loss": 1.5445, + "step": 1324 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030572482489343297, + "loss": 1.5803, + "step": 1325 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030558428502360066, + "loss": 1.6336, + "step": 1326 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030544367283573664, + "loss": 1.5629, + "step": 1327 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003053029884261509, + "loss": 1.5731, + "step": 1328 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003051622318912025, + "loss": 1.6852, + "step": 1329 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003050214033273002, + "loss": 1.5451, + "step": 1330 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003048805028309019, + "loss": 1.5027, + "step": 1331 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030473953049851485, + "loss": 1.7529, + "step": 1332 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030459848642669536, + "loss": 1.581, + "step": 1333 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003044573707120491, + "loss": 1.5565, + "step": 1334 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003043161834512308, + "loss": 1.4594, + "step": 1335 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003041749247409439, + "loss": 1.6142, + "step": 1336 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003040335946779411, + "loss": 1.693, + "step": 1337 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030389219335902383, + "loss": 1.5725, + "step": 1338 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030375072088104234, + "loss": 1.6308, + "step": 1339 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030360917734089564, + "loss": 1.4691, + "step": 1340 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030346756283553136, + "loss": 1.6952, + "step": 1341 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030332587746194584, + "loss": 1.7193, + "step": 1342 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030318412131718385, + "loss": 1.4834, + "step": 1343 + }, + { + "epoch": 0.34, + "learning_rate": 0.00030304229449833864, + "loss": 1.6125, + "step": 1344 + }, + { + "epoch": 0.35, + "learning_rate": 0.000302900397102552, + "loss": 1.5938, + "step": 1345 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003027584292270138, + "loss": 1.6118, + "step": 1346 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003026163909689626, + "loss": 1.4214, + "step": 1347 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003024742824256848, + "loss": 1.6385, + "step": 1348 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003023321036945149, + "loss": 1.5173, + "step": 1349 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030218985487283584, + "loss": 1.5937, + "step": 1350 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003020475360580782, + "loss": 1.7806, + "step": 1351 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003019051473477208, + "loss": 1.5565, + "step": 1352 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030176268883929015, + "loss": 1.6108, + "step": 1353 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003016201606303605, + "loss": 1.6358, + "step": 1354 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003014775628185541, + "loss": 1.6248, + "step": 1355 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003013348955015407, + "loss": 1.6929, + "step": 1356 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030119215877703767, + "loss": 1.4244, + "step": 1357 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003010493527428098, + "loss": 1.5937, + "step": 1358 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003009064774966696, + "loss": 1.6738, + "step": 1359 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030076353313647687, + "loss": 1.4366, + "step": 1360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003006205197601387, + "loss": 1.6663, + "step": 1361 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003004774374656095, + "loss": 1.4839, + "step": 1362 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003003342863508909, + "loss": 1.622, + "step": 1363 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030019106651403164, + "loss": 1.6411, + "step": 1364 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003000477780531275, + "loss": 1.6131, + "step": 1365 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002999044210663214, + "loss": 1.7369, + "step": 1366 + }, + { + "epoch": 0.35, + "learning_rate": 0.000299760995651803, + "loss": 1.579, + "step": 1367 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029961750190780895, + "loss": 1.6389, + "step": 1368 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002994739399326227, + "loss": 1.6553, + "step": 1369 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002993303098245743, + "loss": 1.5453, + "step": 1370 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029918661168204074, + "loss": 1.5497, + "step": 1371 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002990428456034454, + "loss": 1.5584, + "step": 1372 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029889901168725813, + "loss": 1.6085, + "step": 1373 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029875511003199545, + "loss": 1.4761, + "step": 1374 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029861114073622015, + "loss": 1.6032, + "step": 1375 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029846710389854137, + "loss": 1.5981, + "step": 1376 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002983229996176145, + "loss": 1.6601, + "step": 1377 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002981788279921412, + "loss": 1.7005, + "step": 1378 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029803458912086914, + "loss": 1.6401, + "step": 1379 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002978902831025921, + "loss": 1.5898, + "step": 1380 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002977459100361498, + "loss": 1.612, + "step": 1381 + }, + { + "epoch": 0.35, + "learning_rate": 0.00029760147002042816, + "loss": 1.8926, + "step": 1382 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002974569631543585, + "loss": 1.5778, + "step": 1383 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002973123895369183, + "loss": 1.5375, + "step": 1384 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002971677492671305, + "loss": 1.6365, + "step": 1385 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029702304244406393, + "loss": 1.577, + "step": 1386 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002968782691668329, + "loss": 1.7008, + "step": 1387 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002967334295345972, + "loss": 1.6012, + "step": 1388 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002965885236465621, + "loss": 1.6782, + "step": 1389 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002964435516019782, + "loss": 1.4459, + "step": 1390 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029629851350014146, + "loss": 1.7411, + "step": 1391 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002961534094403931, + "loss": 1.7356, + "step": 1392 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029600823952211964, + "loss": 1.6521, + "step": 1393 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002958630038447523, + "loss": 1.5344, + "step": 1394 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029571770250776786, + "loss": 1.5301, + "step": 1395 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029557233561068765, + "loss": 1.68, + "step": 1396 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029542690325307813, + "loss": 1.4368, + "step": 1397 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002952814055345506, + "loss": 1.5486, + "step": 1398 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029513584255476107, + "loss": 1.5303, + "step": 1399 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002949902144134101, + "loss": 1.7193, + "step": 1400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029484452121024315, + "loss": 1.5619, + "step": 1401 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002946987630450502, + "loss": 1.5695, + "step": 1402 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029455294001766544, + "loss": 1.6316, + "step": 1403 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002944070522279679, + "loss": 1.5912, + "step": 1404 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002942610997758806, + "loss": 1.5533, + "step": 1405 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002941150827613711, + "loss": 1.562, + "step": 1406 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002939690012844512, + "loss": 1.4581, + "step": 1407 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002938228554451765, + "loss": 1.5636, + "step": 1408 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029367664534364716, + "loss": 1.5124, + "step": 1409 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029353037108000696, + "loss": 1.6057, + "step": 1410 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002933840327544439, + "loss": 1.7397, + "step": 1411 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002932376304671896, + "loss": 1.4797, + "step": 1412 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029309116431851977, + "loss": 1.5348, + "step": 1413 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002929446344087537, + "loss": 1.6984, + "step": 1414 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002927980408382544, + "loss": 1.5099, + "step": 1415 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029265138370742846, + "loss": 1.6399, + "step": 1416 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029250466311672593, + "loss": 1.4532, + "step": 1417 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002923578791666405, + "loss": 1.6414, + "step": 1418 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002922110319577091, + "loss": 1.5307, + "step": 1419 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029206412159051216, + "loss": 1.6637, + "step": 1420 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002919171481656731, + "loss": 1.4079, + "step": 1421 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002917701117838587, + "loss": 1.5175, + "step": 1422 + }, + { + "epoch": 0.37, + "learning_rate": 0.000291623012545779, + "loss": 1.5826, + "step": 1423 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029147585055218687, + "loss": 1.7214, + "step": 1424 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029132862590387814, + "loss": 1.6212, + "step": 1425 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029118133870169175, + "loss": 1.6883, + "step": 1426 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029103398904650935, + "loss": 1.6964, + "step": 1427 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002908865770392555, + "loss": 1.5076, + "step": 1428 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029073910278089717, + "loss": 1.6896, + "step": 1429 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029059156637244437, + "loss": 1.7404, + "step": 1430 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029044396791494935, + "loss": 1.654, + "step": 1431 + }, + { + "epoch": 0.37, + "learning_rate": 0.000290296307509507, + "loss": 1.5446, + "step": 1432 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002901485852572546, + "loss": 1.529, + "step": 1433 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029000080125937195, + "loss": 1.5286, + "step": 1434 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002898529556170808, + "loss": 1.5049, + "step": 1435 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002897050484316455, + "loss": 1.635, + "step": 1436 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002895570798043723, + "loss": 1.651, + "step": 1437 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002894090498366095, + "loss": 1.4991, + "step": 1438 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002892609586297478, + "loss": 1.6107, + "step": 1439 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002891128062852194, + "loss": 1.5898, + "step": 1440 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002889645929044986, + "loss": 1.5908, + "step": 1441 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028881631858910145, + "loss": 1.7763, + "step": 1442 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028866798344058563, + "loss": 1.5483, + "step": 1443 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028851958756055077, + "loss": 1.7179, + "step": 1444 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002883711310506378, + "loss": 1.4866, + "step": 1445 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028822261401252933, + "loss": 1.6043, + "step": 1446 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028807403654794943, + "loss": 1.6282, + "step": 1447 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028792539875866353, + "loss": 1.6041, + "step": 1448 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002877767007464783, + "loss": 1.4927, + "step": 1449 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028762794261324184, + "loss": 1.8006, + "step": 1450 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028747912446084326, + "loss": 1.5735, + "step": 1451 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002873302463912128, + "loss": 1.8155, + "step": 1452 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002871813085063219, + "loss": 1.6441, + "step": 1453 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002870323109081827, + "loss": 1.5956, + "step": 1454 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002868832536988484, + "loss": 1.6078, + "step": 1455 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002867341369804132, + "loss": 1.5815, + "step": 1456 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028658496085501176, + "loss": 1.5454, + "step": 1457 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028643572542481953, + "loss": 1.6233, + "step": 1458 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002862864307920526, + "loss": 1.6219, + "step": 1459 + }, + { + "epoch": 0.37, + "learning_rate": 0.00028613707705896766, + "loss": 1.5719, + "step": 1460 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002859876643278618, + "loss": 1.6911, + "step": 1461 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002858381927010725, + "loss": 1.353, + "step": 1462 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002856886622809777, + "loss": 1.7445, + "step": 1463 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028553907316999547, + "loss": 1.6386, + "step": 1464 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002853894254705842, + "loss": 1.6864, + "step": 1465 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028523971928524233, + "loss": 1.6302, + "step": 1466 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028508995471650836, + "loss": 1.8208, + "step": 1467 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028494013186696084, + "loss": 1.5861, + "step": 1468 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002847902508392182, + "loss": 1.5903, + "step": 1469 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002846403117359387, + "loss": 1.5317, + "step": 1470 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028449031465982037, + "loss": 1.3911, + "step": 1471 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028434025971360105, + "loss": 1.7583, + "step": 1472 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002841901470000581, + "loss": 1.5217, + "step": 1473 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028403997662200845, + "loss": 1.5471, + "step": 1474 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002838897486823086, + "loss": 1.5977, + "step": 1475 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002837394632838544, + "loss": 1.4183, + "step": 1476 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028358912052958116, + "loss": 1.5011, + "step": 1477 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028343872052246337, + "loss": 1.7538, + "step": 1478 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028328826336551476, + "loss": 1.6839, + "step": 1479 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002831377491617883, + "loss": 1.5824, + "step": 1480 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002829871780143758, + "loss": 1.6135, + "step": 1481 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028283655002640844, + "loss": 1.579, + "step": 1482 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028268586530105593, + "loss": 1.5095, + "step": 1483 + }, + { + "epoch": 0.38, + "learning_rate": 0.000282535123941527, + "loss": 1.5685, + "step": 1484 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002823843260510694, + "loss": 1.5204, + "step": 1485 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002822334717329692, + "loss": 1.5992, + "step": 1486 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002820825610905514, + "loss": 1.5639, + "step": 1487 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002819315942271795, + "loss": 1.5365, + "step": 1488 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002817805712462554, + "loss": 1.5937, + "step": 1489 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002816294922512197, + "loss": 1.7917, + "step": 1490 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028147835734555114, + "loss": 1.6199, + "step": 1491 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002813271666327669, + "loss": 1.5762, + "step": 1492 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028117592021642204, + "loss": 1.6284, + "step": 1493 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002810246182001103, + "loss": 1.4866, + "step": 1494 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002808732606874631, + "loss": 1.642, + "step": 1495 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028072184778215004, + "loss": 1.6069, + "step": 1496 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002805703795878787, + "loss": 1.4589, + "step": 1497 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002804188562083944, + "loss": 1.5317, + "step": 1498 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002802672777474803, + "loss": 1.6346, + "step": 1499 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002801156443089573, + "loss": 1.6194, + "step": 1500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027996395599668404, + "loss": 1.5903, + "step": 1501 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002798122129145566, + "loss": 1.5144, + "step": 1502 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002796604151665086, + "loss": 1.638, + "step": 1503 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002795085628565112, + "loss": 1.5247, + "step": 1504 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002793566560885729, + "loss": 1.5199, + "step": 1505 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002792046949667394, + "loss": 1.6022, + "step": 1506 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002790526795950937, + "loss": 1.8061, + "step": 1507 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027890061007775606, + "loss": 1.6278, + "step": 1508 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027874848651888355, + "loss": 1.5396, + "step": 1509 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002785963090226706, + "loss": 1.6383, + "step": 1510 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027844407769334837, + "loss": 1.5914, + "step": 1511 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027829179263518493, + "loss": 1.65, + "step": 1512 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002781394539524851, + "loss": 1.467, + "step": 1513 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002779870617495905, + "loss": 1.6693, + "step": 1514 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002778346161308794, + "loss": 1.4613, + "step": 1515 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002776821172007667, + "loss": 1.7304, + "step": 1516 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027752956506370366, + "loss": 1.5541, + "step": 1517 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002773769598241782, + "loss": 1.6559, + "step": 1518 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027722430158671444, + "loss": 1.5326, + "step": 1519 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002770715904558729, + "loss": 1.372, + "step": 1520 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027691882653625016, + "loss": 1.6196, + "step": 1521 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027676600993247917, + "loss": 1.4266, + "step": 1522 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027661314074922894, + "loss": 1.6936, + "step": 1523 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027646021909120435, + "loss": 1.6493, + "step": 1524 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002763072450631463, + "loss": 1.8153, + "step": 1525 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002761542187698316, + "loss": 1.6097, + "step": 1526 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002760011403160728, + "loss": 1.5904, + "step": 1527 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002758480098067182, + "loss": 1.7151, + "step": 1528 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027569482734665174, + "loss": 1.7408, + "step": 1529 + }, + { + "epoch": 0.39, + "learning_rate": 0.000275541593040793, + "loss": 1.7196, + "step": 1530 + }, + { + "epoch": 0.39, + "learning_rate": 0.000275388306994097, + "loss": 1.721, + "step": 1531 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002752349693115541, + "loss": 1.5782, + "step": 1532 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027508158009819034, + "loss": 1.4519, + "step": 1533 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002749281394590668, + "loss": 1.61, + "step": 1534 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027477464749927987, + "loss": 1.4476, + "step": 1535 + }, + { + "epoch": 0.39, + "learning_rate": 0.000274621104323961, + "loss": 1.5957, + "step": 1536 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027446751003827683, + "loss": 1.5382, + "step": 1537 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002743138647474289, + "loss": 1.4716, + "step": 1538 + }, + { + "epoch": 0.39, + "learning_rate": 0.00027416016855665395, + "loss": 1.5748, + "step": 1539 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027400642157122317, + "loss": 1.6772, + "step": 1540 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002738526238964428, + "loss": 1.507, + "step": 1541 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027369877563765383, + "loss": 1.6087, + "step": 1542 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027354487690023176, + "loss": 1.5503, + "step": 1543 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027339092778958684, + "loss": 1.7127, + "step": 1544 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027323692841116354, + "loss": 1.6009, + "step": 1545 + }, + { + "epoch": 0.4, + "learning_rate": 0.000273082878870441, + "loss": 1.648, + "step": 1546 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002729287792729327, + "loss": 1.6575, + "step": 1547 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002727746297241862, + "loss": 1.4992, + "step": 1548 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002726204303297836, + "loss": 1.5329, + "step": 1549 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027246618119534087, + "loss": 1.6748, + "step": 1550 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027231188242650813, + "loss": 1.655, + "step": 1551 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027215753412896955, + "loss": 1.6493, + "step": 1552 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002720031364084432, + "loss": 1.6357, + "step": 1553 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002718486893706809, + "loss": 1.6375, + "step": 1554 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002716941931214684, + "loss": 1.5255, + "step": 1555 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002715396477666252, + "loss": 1.5969, + "step": 1556 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027138505341200406, + "loss": 1.5466, + "step": 1557 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027123041016349173, + "loss": 1.6043, + "step": 1558 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027107571812700823, + "loss": 1.6583, + "step": 1559 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002709209774085071, + "loss": 1.5855, + "step": 1560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002707661881139752, + "loss": 1.4998, + "step": 1561 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002706113503494326, + "loss": 1.5773, + "step": 1562 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027045646422093256, + "loss": 1.6863, + "step": 1563 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027030152983456156, + "loss": 1.5917, + "step": 1564 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002701465472964391, + "loss": 1.502, + "step": 1565 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026999151671271767, + "loss": 1.6378, + "step": 1566 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002698364381895825, + "loss": 1.6121, + "step": 1567 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002696813118332519, + "loss": 1.5451, + "step": 1568 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002695261377499768, + "loss": 1.5322, + "step": 1569 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026937091604604095, + "loss": 1.5837, + "step": 1570 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002692156468277606, + "loss": 1.621, + "step": 1571 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026906033020148443, + "loss": 1.6575, + "step": 1572 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002689049662735939, + "loss": 1.5257, + "step": 1573 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002687495551505026, + "loss": 1.7193, + "step": 1574 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026859409693865663, + "loss": 1.6435, + "step": 1575 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002684385917445342, + "loss": 1.7418, + "step": 1576 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002682830396746457, + "loss": 1.6475, + "step": 1577 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002681274408355339, + "loss": 1.5798, + "step": 1578 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002679717953337731, + "loss": 1.6148, + "step": 1579 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002678161032759701, + "loss": 1.6599, + "step": 1580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002676603647687632, + "loss": 1.3501, + "step": 1581 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026750457991882266, + "loss": 1.6746, + "step": 1582 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026734874883285053, + "loss": 1.7309, + "step": 1583 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026719287161758044, + "loss": 1.7311, + "step": 1584 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002670369483797776, + "loss": 1.5443, + "step": 1585 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026688097922623893, + "loss": 1.6867, + "step": 1586 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026672496426379254, + "loss": 1.6587, + "step": 1587 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026656890359929805, + "loss": 1.441, + "step": 1588 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026641279733964634, + "loss": 1.4815, + "step": 1589 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002662566455917596, + "loss": 1.6149, + "step": 1590 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026610044846259114, + "loss": 1.5729, + "step": 1591 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002659442060591252, + "loss": 1.6241, + "step": 1592 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026578791848837734, + "loss": 1.4792, + "step": 1593 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026563158585739385, + "loss": 1.8221, + "step": 1594 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002654752082732519, + "loss": 1.6555, + "step": 1595 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002653187858430594, + "loss": 1.6838, + "step": 1596 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002651623186739551, + "loss": 1.5765, + "step": 1597 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002650058068731084, + "loss": 1.5523, + "step": 1598 + }, + { + "epoch": 0.41, + "learning_rate": 0.000264849250547719, + "loss": 1.6508, + "step": 1599 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002646926498050175, + "loss": 1.7745, + "step": 1600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026453600475226467, + "loss": 1.7415, + "step": 1601 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002643793154967517, + "loss": 1.59, + "step": 1602 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026422258214579993, + "loss": 1.5654, + "step": 1603 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002640658048067611, + "loss": 1.5435, + "step": 1604 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026390898358701704, + "loss": 1.59, + "step": 1605 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002637521185939795, + "loss": 1.6232, + "step": 1606 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026359520993509035, + "loss": 1.5684, + "step": 1607 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026343825771782127, + "loss": 1.3872, + "step": 1608 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002632812620496738, + "loss": 1.531, + "step": 1609 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026312422303817934, + "loss": 1.6953, + "step": 1610 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026296714079089887, + "loss": 1.7234, + "step": 1611 + }, + { + "epoch": 0.41, + "learning_rate": 0.000262810015415423, + "loss": 1.4666, + "step": 1612 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002626528470193719, + "loss": 1.4896, + "step": 1613 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002624956357103952, + "loss": 1.8093, + "step": 1614 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002623383815961719, + "loss": 1.6931, + "step": 1615 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002621810847844104, + "loss": 1.585, + "step": 1616 + }, + { + "epoch": 0.41, + "learning_rate": 0.00026202374538284817, + "loss": 1.6679, + "step": 1617 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026186636349925216, + "loss": 1.4948, + "step": 1618 + }, + { + "epoch": 0.42, + "learning_rate": 0.000261708939241418, + "loss": 1.619, + "step": 1619 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002615514727171707, + "loss": 1.6404, + "step": 1620 + }, + { + "epoch": 0.42, + "learning_rate": 0.000261393964034364, + "loss": 1.4362, + "step": 1621 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002612364133008807, + "loss": 1.5449, + "step": 1622 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026107882062463225, + "loss": 1.3076, + "step": 1623 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026092118611355887, + "loss": 1.5811, + "step": 1624 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026076350987562947, + "loss": 1.5561, + "step": 1625 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002606057920188415, + "loss": 1.4853, + "step": 1626 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002604480326512209, + "loss": 1.6639, + "step": 1627 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002602902318808222, + "loss": 1.4128, + "step": 1628 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026013238981572803, + "loss": 1.6599, + "step": 1629 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025997450656404944, + "loss": 1.6062, + "step": 1630 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025981658223392574, + "loss": 1.6728, + "step": 1631 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002596586169335243, + "loss": 1.6177, + "step": 1632 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025950061077104064, + "loss": 1.7149, + "step": 1633 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025934256385469804, + "loss": 1.5334, + "step": 1634 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025918447629274806, + "loss": 1.7066, + "step": 1635 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002590263481934697, + "loss": 1.498, + "step": 1636 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025886817966517005, + "loss": 1.4971, + "step": 1637 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002587099708161837, + "loss": 1.6817, + "step": 1638 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025855172175487303, + "loss": 1.5568, + "step": 1639 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025839343258962763, + "loss": 1.6308, + "step": 1640 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025823510342886497, + "loss": 1.5502, + "step": 1641 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002580767343810296, + "loss": 1.4908, + "step": 1642 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002579183255545936, + "loss": 1.7024, + "step": 1643 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002577598770580562, + "loss": 1.5162, + "step": 1644 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002576013889999437, + "loss": 1.7059, + "step": 1645 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002574428614888098, + "loss": 1.6551, + "step": 1646 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025728429463323484, + "loss": 1.6692, + "step": 1647 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025712568854182645, + "loss": 1.4655, + "step": 1648 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002569670433232189, + "loss": 1.5454, + "step": 1649 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002568083590860734, + "loss": 1.6795, + "step": 1650 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002566496359390777, + "loss": 1.6098, + "step": 1651 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025649087399094646, + "loss": 1.6315, + "step": 1652 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002563320733504207, + "loss": 1.6027, + "step": 1653 + }, + { + "epoch": 0.42, + "learning_rate": 0.000256173234126268, + "loss": 1.6203, + "step": 1654 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002560143564272825, + "loss": 1.6865, + "step": 1655 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025585544036228454, + "loss": 1.5802, + "step": 1656 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002556964860401208, + "loss": 1.4745, + "step": 1657 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025553749356966405, + "loss": 1.5728, + "step": 1658 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002553784630598134, + "loss": 1.4487, + "step": 1659 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025521939461949387, + "loss": 1.5949, + "step": 1660 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025506028835765637, + "loss": 1.4604, + "step": 1661 + }, + { + "epoch": 0.43, + "learning_rate": 0.000254901144383278, + "loss": 1.5229, + "step": 1662 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025474196280536143, + "loss": 1.5213, + "step": 1663 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002545827437329352, + "loss": 1.3546, + "step": 1664 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002544234872750535, + "loss": 1.6626, + "step": 1665 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025426419354079627, + "loss": 1.6668, + "step": 1666 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002541048626392686, + "loss": 1.5568, + "step": 1667 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025394549467960153, + "loss": 1.4723, + "step": 1668 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002537860897709511, + "loss": 1.5312, + "step": 1669 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025362664802249885, + "loss": 1.7142, + "step": 1670 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002534671695434515, + "loss": 1.7922, + "step": 1671 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002533076544430408, + "loss": 1.53, + "step": 1672 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002531481028305239, + "loss": 1.4422, + "step": 1673 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025298851481518267, + "loss": 1.6576, + "step": 1674 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025282889050632403, + "loss": 1.6241, + "step": 1675 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025266923001327976, + "loss": 1.5965, + "step": 1676 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025250953344540633, + "loss": 1.6764, + "step": 1677 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002523498009120851, + "loss": 1.6505, + "step": 1678 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002521900325227219, + "loss": 1.7018, + "step": 1679 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025203022838674716, + "loss": 1.7698, + "step": 1680 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025187038861361585, + "loss": 1.5424, + "step": 1681 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002517105133128073, + "loss": 1.5567, + "step": 1682 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025155060259382517, + "loss": 1.5819, + "step": 1683 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025139065656619734, + "loss": 1.5281, + "step": 1684 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025123067533947604, + "loss": 1.5636, + "step": 1685 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002510706590232374, + "loss": 1.7444, + "step": 1686 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025091060772708173, + "loss": 1.6398, + "step": 1687 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002507505215606333, + "loss": 1.6995, + "step": 1688 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002505904006335401, + "loss": 1.4023, + "step": 1689 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025043024505547407, + "loss": 1.7183, + "step": 1690 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002502700549361309, + "loss": 1.7195, + "step": 1691 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025010983038522986, + "loss": 1.6163, + "step": 1692 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002499495715125138, + "loss": 1.687, + "step": 1693 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002497892784277492, + "loss": 1.5803, + "step": 1694 + }, + { + "epoch": 0.43, + "learning_rate": 0.00024962895124072584, + "loss": 1.697, + "step": 1695 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024946859006125694, + "loss": 1.6095, + "step": 1696 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002493081949991788, + "loss": 1.7301, + "step": 1697 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024914776616435133, + "loss": 1.6211, + "step": 1698 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024898730366665724, + "loss": 1.5837, + "step": 1699 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024882680761600235, + "loss": 1.6459, + "step": 1700 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002486662781223156, + "loss": 1.6072, + "step": 1701 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002485057152955486, + "loss": 1.8014, + "step": 1702 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024834511924567607, + "loss": 1.6211, + "step": 1703 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002481844900826952, + "loss": 1.6319, + "step": 1704 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002480238279166262, + "loss": 1.7683, + "step": 1705 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002478631328575116, + "loss": 1.6965, + "step": 1706 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002477024050154165, + "loss": 1.6643, + "step": 1707 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002475416445004285, + "loss": 1.5239, + "step": 1708 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002473808514226577, + "loss": 1.6224, + "step": 1709 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024722002589223625, + "loss": 1.7401, + "step": 1710 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024705916801931883, + "loss": 1.4518, + "step": 1711 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024689827791408195, + "loss": 1.6527, + "step": 1712 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002467373556867245, + "loss": 1.6216, + "step": 1713 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024657640144746704, + "loss": 1.5725, + "step": 1714 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002464154153065524, + "loss": 1.6144, + "step": 1715 + }, + { + "epoch": 0.44, + "learning_rate": 0.000246254397374245, + "loss": 1.6282, + "step": 1716 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002460933477608312, + "loss": 1.5999, + "step": 1717 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024593226657661904, + "loss": 1.547, + "step": 1718 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002457711539319381, + "loss": 1.5498, + "step": 1719 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002456100099371395, + "loss": 1.4785, + "step": 1720 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024544883470259594, + "loss": 1.7352, + "step": 1721 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024528762833870145, + "loss": 1.7357, + "step": 1722 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002451263909558713, + "loss": 1.5262, + "step": 1723 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024496512266454224, + "loss": 1.5849, + "step": 1724 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024480382357517196, + "loss": 1.5754, + "step": 1725 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002446424937982394, + "loss": 1.8073, + "step": 1726 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002444811334442443, + "loss": 1.5518, + "step": 1727 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002443197426237077, + "loss": 1.5512, + "step": 1728 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024415832144717114, + "loss": 1.5013, + "step": 1729 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002439968700251972, + "loss": 1.4438, + "step": 1730 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024383538846836896, + "loss": 1.57, + "step": 1731 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002436738768872905, + "loss": 1.5665, + "step": 1732 + }, + { + "epoch": 0.44, + "learning_rate": 0.00024351233539258605, + "loss": 1.6741, + "step": 1733 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002433507640949006, + "loss": 1.5487, + "step": 1734 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002431891631048994, + "loss": 1.4817, + "step": 1735 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024302753253326812, + "loss": 1.418, + "step": 1736 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002428658724907127, + "loss": 1.5073, + "step": 1737 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024270418308795923, + "loss": 1.6726, + "step": 1738 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002425424644357539, + "loss": 1.5927, + "step": 1739 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024238071664486297, + "loss": 1.5454, + "step": 1740 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024221893982607267, + "loss": 1.687, + "step": 1741 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024205713409018902, + "loss": 1.6985, + "step": 1742 + }, + { + "epoch": 0.45, + "learning_rate": 0.000241895299548038, + "loss": 1.5363, + "step": 1743 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024173343631046513, + "loss": 1.608, + "step": 1744 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002415715444883357, + "loss": 1.8188, + "step": 1745 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024140962419253465, + "loss": 1.6473, + "step": 1746 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024124767553396618, + "loss": 1.5638, + "step": 1747 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002410856986235542, + "loss": 1.6095, + "step": 1748 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024092369357224183, + "loss": 1.5733, + "step": 1749 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024076166049099147, + "loss": 1.3559, + "step": 1750 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024059959949078468, + "loss": 1.6353, + "step": 1751 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024043751068262225, + "loss": 1.533, + "step": 1752 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024027539417752398, + "loss": 1.5335, + "step": 1753 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002401132500865285, + "loss": 1.4568, + "step": 1754 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002399510785206935, + "loss": 1.5689, + "step": 1755 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023978887959109546, + "loss": 1.6723, + "step": 1756 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023962665340882954, + "loss": 1.5459, + "step": 1757 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023946440008500966, + "loss": 1.7471, + "step": 1758 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023930211973076823, + "loss": 1.5413, + "step": 1759 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023913981245725629, + "loss": 1.4954, + "step": 1760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002389774783756432, + "loss": 1.5438, + "step": 1761 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023881511759711667, + "loss": 1.4055, + "step": 1762 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023865273023288287, + "loss": 1.7164, + "step": 1763 + }, + { + "epoch": 0.45, + "learning_rate": 0.000238490316394166, + "loss": 1.6785, + "step": 1764 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023832787619220853, + "loss": 1.6515, + "step": 1765 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023816540973827075, + "loss": 1.6134, + "step": 1766 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023800291714363124, + "loss": 1.5449, + "step": 1767 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023784039851958632, + "loss": 1.521, + "step": 1768 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023767785397745014, + "loss": 1.521, + "step": 1769 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002375152836285546, + "loss": 1.6915, + "step": 1770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023735268758424938, + "loss": 1.5852, + "step": 1771 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002371900659559016, + "loss": 1.5411, + "step": 1772 + }, + { + "epoch": 0.45, + "learning_rate": 0.00023702741885489598, + "loss": 1.6783, + "step": 1773 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023686474639263475, + "loss": 1.6284, + "step": 1774 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002367020486805374, + "loss": 1.7522, + "step": 1775 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002365393258300407, + "loss": 1.5532, + "step": 1776 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002363765779525988, + "loss": 1.658, + "step": 1777 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002362138051596829, + "loss": 1.7023, + "step": 1778 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002360510075627812, + "loss": 1.6038, + "step": 1779 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023588818527339896, + "loss": 1.7266, + "step": 1780 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002357253384030583, + "loss": 1.4659, + "step": 1781 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002355624670632983, + "loss": 1.5802, + "step": 1782 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023539957136567465, + "loss": 1.6673, + "step": 1783 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023523665142175987, + "loss": 1.6417, + "step": 1784 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002350737073431429, + "loss": 1.6511, + "step": 1785 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023491073924142942, + "loss": 1.5847, + "step": 1786 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023474774722824133, + "loss": 1.5309, + "step": 1787 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023458473141521719, + "loss": 1.6332, + "step": 1788 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002344216919140116, + "loss": 1.5327, + "step": 1789 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023425862883629553, + "loss": 1.4415, + "step": 1790 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023409554229375607, + "loss": 1.7203, + "step": 1791 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002339324323980964, + "loss": 1.4104, + "step": 1792 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002337692992610356, + "loss": 1.7439, + "step": 1793 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023360614299430878, + "loss": 1.532, + "step": 1794 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023344296370966686, + "loss": 1.657, + "step": 1795 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023327976151887655, + "loss": 1.5843, + "step": 1796 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002331165365337201, + "loss": 1.7402, + "step": 1797 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002329532888659956, + "loss": 1.5051, + "step": 1798 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023279001862751652, + "loss": 1.5654, + "step": 1799 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023262672593011178, + "loss": 1.5687, + "step": 1800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023246341088562584, + "loss": 1.6106, + "step": 1801 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023230007360591835, + "loss": 1.6117, + "step": 1802 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023213671420286414, + "loss": 1.5951, + "step": 1803 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023197333278835328, + "loss": 1.4736, + "step": 1804 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023180992947429094, + "loss": 1.5587, + "step": 1805 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002316465043725972, + "loss": 1.4072, + "step": 1806 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023148305759520708, + "loss": 1.6154, + "step": 1807 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002313195892540705, + "loss": 1.5905, + "step": 1808 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002311560994611521, + "loss": 1.4672, + "step": 1809 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023099258832843123, + "loss": 1.6173, + "step": 1810 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002308290559679019, + "loss": 1.5939, + "step": 1811 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023066550249157262, + "loss": 1.511, + "step": 1812 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023050192801146632, + "loss": 1.554, + "step": 1813 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002303383326396204, + "loss": 1.6823, + "step": 1814 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023017471648808653, + "loss": 1.6714, + "step": 1815 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023001107966893054, + "loss": 1.5731, + "step": 1816 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022984742229423258, + "loss": 1.5146, + "step": 1817 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022968374447608674, + "loss": 1.5385, + "step": 1818 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022952004632660112, + "loss": 1.3938, + "step": 1819 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022952004632660112, + "loss": 1.4386, + "step": 1820 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022935632795789785, + "loss": 1.6079, + "step": 1821 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022919258948211283, + "loss": 1.5689, + "step": 1822 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022902883101139573, + "loss": 1.6072, + "step": 1823 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002288650526579099, + "loss": 1.4643, + "step": 1824 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022870125453383242, + "loss": 1.677, + "step": 1825 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022853743675135376, + "loss": 1.5363, + "step": 1826 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022837359942267797, + "loss": 1.6787, + "step": 1827 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002282097426600224, + "loss": 1.7226, + "step": 1828 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022804586657561773, + "loss": 1.5577, + "step": 1829 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022788197128170796, + "loss": 1.6419, + "step": 1830 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022771805689055015, + "loss": 1.4039, + "step": 1831 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022755412351441447, + "loss": 1.5384, + "step": 1832 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022739017126558416, + "loss": 1.5469, + "step": 1833 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022722620025635525, + "loss": 1.5433, + "step": 1834 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002270622105990367, + "loss": 1.58, + "step": 1835 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022689820240595027, + "loss": 1.6223, + "step": 1836 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022673417578943043, + "loss": 1.7288, + "step": 1837 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022657013086182414, + "loss": 1.5495, + "step": 1838 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022640606773549092, + "loss": 1.744, + "step": 1839 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022624198652280297, + "loss": 1.7007, + "step": 1840 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022607788733614465, + "loss": 1.5063, + "step": 1841 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022591377028791275, + "loss": 1.4925, + "step": 1842 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002257496354905162, + "loss": 1.599, + "step": 1843 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002255854830563761, + "loss": 1.6931, + "step": 1844 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022542131309792577, + "loss": 1.5116, + "step": 1845 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022525712572761037, + "loss": 1.5694, + "step": 1846 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022509292105788706, + "loss": 1.5558, + "step": 1847 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022492869920122486, + "loss": 1.564, + "step": 1848 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022476446027010455, + "loss": 1.6279, + "step": 1849 + }, + { + "epoch": 0.47, + "learning_rate": 0.00022460020437701847, + "loss": 1.7296, + "step": 1850 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022443593163447086, + "loss": 1.6083, + "step": 1851 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022427164215497718, + "loss": 1.6426, + "step": 1852 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002241073360510646, + "loss": 1.6725, + "step": 1853 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022394301343527166, + "loss": 1.5463, + "step": 1854 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022377867442014804, + "loss": 1.644, + "step": 1855 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002236143191182548, + "loss": 1.6266, + "step": 1856 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022344994764216406, + "loss": 1.6824, + "step": 1857 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002232855601044592, + "loss": 1.5898, + "step": 1858 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022312115661773425, + "loss": 1.6571, + "step": 1859 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022295673729459456, + "loss": 1.6367, + "step": 1860 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022279230224765608, + "loss": 1.7015, + "step": 1861 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002226278515895456, + "loss": 1.4783, + "step": 1862 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022246338543290063, + "loss": 1.5272, + "step": 1863 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022229890389036921, + "loss": 1.4977, + "step": 1864 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022213440707461, + "loss": 1.4683, + "step": 1865 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022196989509829212, + "loss": 1.6906, + "step": 1866 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022180536807409501, + "loss": 1.6675, + "step": 1867 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022164082611470854, + "loss": 1.5075, + "step": 1868 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022147626933283264, + "loss": 1.5927, + "step": 1869 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022131169784117747, + "loss": 1.5544, + "step": 1870 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022114711175246336, + "loss": 1.6529, + "step": 1871 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022098251117942045, + "loss": 1.5055, + "step": 1872 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022081789623478896, + "loss": 1.559, + "step": 1873 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022065326703131892, + "loss": 1.5936, + "step": 1874 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022048862368177007, + "loss": 1.5641, + "step": 1875 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022032396629891187, + "loss": 1.6861, + "step": 1876 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022015929499552333, + "loss": 1.6423, + "step": 1877 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002199946098843932, + "loss": 1.64, + "step": 1878 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021982991107831948, + "loss": 1.5091, + "step": 1879 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021966519869010958, + "loss": 1.4667, + "step": 1880 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021950047283258024, + "loss": 1.6449, + "step": 1881 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021933573361855748, + "loss": 1.6666, + "step": 1882 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021917098116087643, + "loss": 1.6089, + "step": 1883 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021900621557238123, + "loss": 1.6387, + "step": 1884 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021884143696592511, + "loss": 1.6716, + "step": 1885 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021867664545437013, + "loss": 1.5598, + "step": 1886 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021851184115058726, + "loss": 1.5382, + "step": 1887 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021834702416745626, + "loss": 1.6058, + "step": 1888 + }, + { + "epoch": 0.48, + "learning_rate": 0.00021818219461786545, + "loss": 1.3473, + "step": 1889 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021801735261471183, + "loss": 1.7165, + "step": 1890 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021785249827090093, + "loss": 1.6397, + "step": 1891 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002176876316993467, + "loss": 1.7583, + "step": 1892 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021752275301297158, + "loss": 1.6599, + "step": 1893 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021735786232470616, + "loss": 1.6129, + "step": 1894 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021719295974748933, + "loss": 1.6891, + "step": 1895 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021702804539426804, + "loss": 1.6068, + "step": 1896 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021686311937799748, + "loss": 1.5649, + "step": 1897 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021669818181164064, + "loss": 1.5479, + "step": 1898 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021653323280816853, + "loss": 1.6326, + "step": 1899 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021636827248055994, + "loss": 1.5672, + "step": 1900 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002162033009418015, + "loss": 1.7149, + "step": 1901 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021603831830488738, + "loss": 1.7113, + "step": 1902 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021587332468281947, + "loss": 1.5232, + "step": 1903 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002157083201886071, + "loss": 1.6994, + "step": 1904 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021554330493526712, + "loss": 1.4806, + "step": 1905 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002153782790358238, + "loss": 1.6832, + "step": 1906 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021521324260330852, + "loss": 1.4143, + "step": 1907 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021504819575076, + "loss": 1.6276, + "step": 1908 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021488313859122413, + "loss": 1.5076, + "step": 1909 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002147180712377537, + "loss": 1.5225, + "step": 1910 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021455299380340867, + "loss": 1.514, + "step": 1911 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002143879064012558, + "loss": 1.6415, + "step": 1912 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021422280914436866, + "loss": 1.6178, + "step": 1913 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021405770214582765, + "loss": 1.5234, + "step": 1914 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021389258551871975, + "loss": 1.6361, + "step": 1915 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021372745937613865, + "loss": 1.6212, + "step": 1916 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021356232383118443, + "loss": 1.6064, + "step": 1917 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021339717899696373, + "loss": 1.5562, + "step": 1918 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021323202498658942, + "loss": 1.5651, + "step": 1919 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002130668619131808, + "loss": 1.731, + "step": 1920 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002129016898898633, + "loss": 1.502, + "step": 1921 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021273650902976841, + "loss": 1.4079, + "step": 1922 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021257131944603388, + "loss": 1.7191, + "step": 1923 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021240612125180314, + "loss": 1.7518, + "step": 1924 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021224091456022585, + "loss": 1.7236, + "step": 1925 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021207569948445722, + "loss": 1.7341, + "step": 1926 + }, + { + "epoch": 0.49, + "learning_rate": 0.00021191047613765832, + "loss": 1.4978, + "step": 1927 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002117452446329959, + "loss": 1.5701, + "step": 1928 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021158000508364225, + "loss": 1.6809, + "step": 1929 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021141475760277513, + "loss": 1.5072, + "step": 1930 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002112495023035779, + "loss": 1.7324, + "step": 1931 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021108423929923914, + "loss": 1.519, + "step": 1932 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021091896870295264, + "loss": 1.5575, + "step": 1933 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021075369062791759, + "loss": 1.6364, + "step": 1934 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021058840518733813, + "loss": 1.7163, + "step": 1935 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002104231124944235, + "loss": 1.5619, + "step": 1936 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021025781266238798, + "loss": 1.4483, + "step": 1937 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021009250580445064, + "loss": 1.6906, + "step": 1938 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020992719203383536, + "loss": 1.3692, + "step": 1939 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020976187146377084, + "loss": 1.5346, + "step": 1940 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020959654420749048, + "loss": 1.587, + "step": 1941 + }, + { + "epoch": 0.5, + "learning_rate": 0.000209431210378232, + "loss": 1.4805, + "step": 1942 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020926587008923795, + "loss": 1.7107, + "step": 1943 + }, + { + "epoch": 0.5, + "learning_rate": 0.000209100523453755, + "loss": 1.5559, + "step": 1944 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020893517058503437, + "loss": 1.6289, + "step": 1945 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020876981159633154, + "loss": 1.5435, + "step": 1946 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002086044466009061, + "loss": 1.7036, + "step": 1947 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020843907571202182, + "loss": 1.6263, + "step": 1948 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020827369904294642, + "loss": 1.6056, + "step": 1949 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020810831670695166, + "loss": 1.5738, + "step": 1950 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020794292881731315, + "loss": 1.6017, + "step": 1951 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020777753548731034, + "loss": 1.6065, + "step": 1952 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002076121368302264, + "loss": 1.4788, + "step": 1953 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020744673295934805, + "loss": 1.4585, + "step": 1954 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020728132398796566, + "loss": 1.6326, + "step": 1955 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002071159100293731, + "loss": 1.5254, + "step": 1956 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002069504911968677, + "loss": 1.5967, + "step": 1957 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020678506760375004, + "loss": 1.6305, + "step": 1958 + }, + { + "epoch": 0.5, + "learning_rate": 0.000206619639363324, + "loss": 1.6563, + "step": 1959 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020645420658889664, + "loss": 1.7254, + "step": 1960 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002062887693937781, + "loss": 1.6261, + "step": 1961 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020612332789128157, + "loss": 1.6886, + "step": 1962 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020595788219472327, + "loss": 1.6884, + "step": 1963 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020579243241742213, + "loss": 1.6986, + "step": 1964 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020562697867269997, + "loss": 1.5229, + "step": 1965 + }, + { + "epoch": 0.5, + "learning_rate": 0.00020546152107388133, + "loss": 1.5998, + "step": 1966 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002052960597342934, + "loss": 1.6359, + "step": 1967 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020513059476726583, + "loss": 1.462, + "step": 1968 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020496512628613093, + "loss": 1.5815, + "step": 1969 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020479965440422325, + "loss": 1.4841, + "step": 1970 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020463417923487982, + "loss": 1.666, + "step": 1971 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020446870089143974, + "loss": 1.5831, + "step": 1972 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020430321948724446, + "loss": 1.6392, + "step": 1973 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002041377351356374, + "loss": 1.5278, + "step": 1974 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020397224794996413, + "loss": 1.694, + "step": 1975 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020380675804357198, + "loss": 1.6439, + "step": 1976 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020364126552981028, + "loss": 1.776, + "step": 1977 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020347577052203019, + "loss": 1.6475, + "step": 1978 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020331027313358438, + "loss": 1.5769, + "step": 1979 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020314477347782735, + "loss": 1.6306, + "step": 1980 + }, + { + "epoch": 0.51, + "learning_rate": 0.000202979271668115, + "loss": 1.5303, + "step": 1981 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020281376781780484, + "loss": 1.6994, + "step": 1982 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020264826204025575, + "loss": 1.5276, + "step": 1983 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020248275444882788, + "loss": 1.4642, + "step": 1984 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002023172451568825, + "loss": 1.5447, + "step": 1985 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020215173427778235, + "loss": 1.5562, + "step": 1986 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020198622192489095, + "loss": 1.612, + "step": 1987 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020182070821157312, + "loss": 1.5208, + "step": 1988 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020165519325119436, + "loss": 1.6424, + "step": 1989 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020148967715712113, + "loss": 1.4241, + "step": 1990 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020132416004272065, + "loss": 1.4109, + "step": 1991 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020115864202136093, + "loss": 1.626, + "step": 1992 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002009931232064105, + "loss": 1.5725, + "step": 1993 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020082760371123844, + "loss": 1.5214, + "step": 1994 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020066208364921426, + "loss": 1.6411, + "step": 1995 + }, + { + "epoch": 0.51, + "learning_rate": 0.000200496563133708, + "loss": 1.5666, + "step": 1996 + }, + { + "epoch": 0.51, + "learning_rate": 0.00020033104227808982, + "loss": 1.8666, + "step": 1997 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002001655211957303, + "loss": 1.4865, + "step": 1998 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002, + "loss": 1.5759, + "step": 1999 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019983447880426972, + "loss": 1.6067, + "step": 2000 + } + ], + "logging_steps": 1, + "max_steps": 3896, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 7.066888272839967e+17, + "trial_name": null, + "trial_params": null +}