{ "best_metric": null, "best_model_checkpoint": null, "epoch": 500.0, "global_step": 31000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 40.06241989135742, "eval_runtime": 1.547, "eval_samples_per_second": 79.509, "eval_steps_per_second": 10.343, "step": 62 }, { "epoch": 2.0, "eval_loss": 39.472923278808594, "eval_runtime": 1.5508, "eval_samples_per_second": 79.312, "eval_steps_per_second": 10.317, "step": 124 }, { "epoch": 3.0, "eval_loss": 39.0081672668457, "eval_runtime": 1.546, "eval_samples_per_second": 79.558, "eval_steps_per_second": 10.349, "step": 186 }, { "epoch": 4.0, "eval_loss": 38.20319366455078, "eval_runtime": 1.5507, "eval_samples_per_second": 79.317, "eval_steps_per_second": 10.318, "step": 248 }, { "epoch": 5.0, "eval_loss": 37.33580017089844, "eval_runtime": 1.5485, "eval_samples_per_second": 79.431, "eval_steps_per_second": 10.332, "step": 310 }, { "epoch": 6.0, "eval_loss": 36.15742111206055, "eval_runtime": 1.5522, "eval_samples_per_second": 79.244, "eval_steps_per_second": 10.308, "step": 372 }, { "epoch": 7.0, "eval_loss": 35.14997863769531, "eval_runtime": 1.5454, "eval_samples_per_second": 79.589, "eval_steps_per_second": 10.353, "step": 434 }, { "epoch": 8.0, "eval_loss": 33.69432067871094, "eval_runtime": 1.5477, "eval_samples_per_second": 79.472, "eval_steps_per_second": 10.338, "step": 496 }, { "epoch": 8.06, "learning_rate": 6.887096774193549e-08, "loss": 39.2118, "step": 500 }, { "epoch": 9.0, "eval_loss": 32.3968391418457, "eval_runtime": 1.5552, "eval_samples_per_second": 79.09, "eval_steps_per_second": 10.288, "step": 558 }, { "epoch": 10.0, "eval_loss": 30.730754852294922, "eval_runtime": 1.5637, "eval_samples_per_second": 78.658, "eval_steps_per_second": 10.232, "step": 620 }, { "epoch": 11.0, "eval_loss": 29.227901458740234, "eval_runtime": 1.553, "eval_samples_per_second": 79.199, "eval_steps_per_second": 10.302, "step": 682 }, { "epoch": 12.0, "eval_loss": 27.82735824584961, "eval_runtime": 1.5637, "eval_samples_per_second": 78.659, "eval_steps_per_second": 10.232, "step": 744 }, { "epoch": 13.0, "eval_loss": 26.49405288696289, "eval_runtime": 1.5536, "eval_samples_per_second": 79.17, "eval_steps_per_second": 10.298, "step": 806 }, { "epoch": 14.0, "eval_loss": 24.659284591674805, "eval_runtime": 1.5457, "eval_samples_per_second": 79.576, "eval_steps_per_second": 10.351, "step": 868 }, { "epoch": 15.0, "eval_loss": 23.44048309326172, "eval_runtime": 1.5478, "eval_samples_per_second": 79.467, "eval_steps_per_second": 10.337, "step": 930 }, { "epoch": 16.0, "eval_loss": 21.999380111694336, "eval_runtime": 1.5447, "eval_samples_per_second": 79.626, "eval_steps_per_second": 10.358, "step": 992 }, { "epoch": 16.13, "learning_rate": 6.774193548387097e-08, "loss": 27.5423, "step": 1000 }, { "epoch": 17.0, "eval_loss": 21.027385711669922, "eval_runtime": 1.5545, "eval_samples_per_second": 79.127, "eval_steps_per_second": 10.293, "step": 1054 }, { "epoch": 18.0, "eval_loss": 19.89065170288086, "eval_runtime": 1.565, "eval_samples_per_second": 78.592, "eval_steps_per_second": 10.223, "step": 1116 }, { "epoch": 19.0, "eval_loss": 19.33472442626953, "eval_runtime": 1.5542, "eval_samples_per_second": 79.141, "eval_steps_per_second": 10.295, "step": 1178 }, { "epoch": 20.0, "eval_loss": 18.324054718017578, "eval_runtime": 1.5611, "eval_samples_per_second": 78.793, "eval_steps_per_second": 10.249, "step": 1240 }, { "epoch": 21.0, "eval_loss": 17.521472930908203, "eval_runtime": 1.5648, "eval_samples_per_second": 78.603, "eval_steps_per_second": 10.225, "step": 1302 }, { "epoch": 22.0, "eval_loss": 16.96886444091797, "eval_runtime": 1.5541, "eval_samples_per_second": 79.144, "eval_steps_per_second": 10.295, "step": 1364 }, { "epoch": 23.0, "eval_loss": 16.298294067382812, "eval_runtime": 1.5454, "eval_samples_per_second": 79.59, "eval_steps_per_second": 10.353, "step": 1426 }, { "epoch": 24.0, "eval_loss": 15.870747566223145, "eval_runtime": 1.5451, "eval_samples_per_second": 79.609, "eval_steps_per_second": 10.356, "step": 1488 }, { "epoch": 24.19, "learning_rate": 6.661290322580646e-08, "loss": 18.4542, "step": 1500 }, { "epoch": 25.0, "eval_loss": 15.465510368347168, "eval_runtime": 1.5532, "eval_samples_per_second": 79.191, "eval_steps_per_second": 10.301, "step": 1550 }, { "epoch": 26.0, "eval_loss": 15.184890747070312, "eval_runtime": 1.5516, "eval_samples_per_second": 79.271, "eval_steps_per_second": 10.312, "step": 1612 }, { "epoch": 27.0, "eval_loss": 14.780122756958008, "eval_runtime": 1.5538, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.297, "step": 1674 }, { "epoch": 28.0, "eval_loss": 14.557552337646484, "eval_runtime": 1.5541, "eval_samples_per_second": 79.147, "eval_steps_per_second": 10.296, "step": 1736 }, { "epoch": 29.0, "eval_loss": 14.421510696411133, "eval_runtime": 1.5516, "eval_samples_per_second": 79.275, "eval_steps_per_second": 10.312, "step": 1798 }, { "epoch": 30.0, "eval_loss": 13.931785583496094, "eval_runtime": 1.5492, "eval_samples_per_second": 79.395, "eval_steps_per_second": 10.328, "step": 1860 }, { "epoch": 31.0, "eval_loss": 13.980083465576172, "eval_runtime": 1.5454, "eval_samples_per_second": 79.593, "eval_steps_per_second": 10.354, "step": 1922 }, { "epoch": 32.0, "eval_loss": 13.709578514099121, "eval_runtime": 1.5464, "eval_samples_per_second": 79.537, "eval_steps_per_second": 10.346, "step": 1984 }, { "epoch": 32.26, "learning_rate": 6.548387096774194e-08, "loss": 13.9556, "step": 2000 }, { "epoch": 33.0, "eval_loss": 13.463862419128418, "eval_runtime": 1.5524, "eval_samples_per_second": 79.233, "eval_steps_per_second": 10.307, "step": 2046 }, { "epoch": 34.0, "eval_loss": 13.298738479614258, "eval_runtime": 1.5554, "eval_samples_per_second": 79.08, "eval_steps_per_second": 10.287, "step": 2108 }, { "epoch": 35.0, "eval_loss": 13.23029613494873, "eval_runtime": 1.5584, "eval_samples_per_second": 78.926, "eval_steps_per_second": 10.267, "step": 2170 }, { "epoch": 36.0, "eval_loss": 13.24497127532959, "eval_runtime": 1.5624, "eval_samples_per_second": 78.724, "eval_steps_per_second": 10.24, "step": 2232 }, { "epoch": 37.0, "eval_loss": 13.057371139526367, "eval_runtime": 1.5518, "eval_samples_per_second": 79.261, "eval_steps_per_second": 10.31, "step": 2294 }, { "epoch": 38.0, "eval_loss": 12.902210235595703, "eval_runtime": 1.5456, "eval_samples_per_second": 79.583, "eval_steps_per_second": 10.352, "step": 2356 }, { "epoch": 39.0, "eval_loss": 12.819963455200195, "eval_runtime": 1.5458, "eval_samples_per_second": 79.571, "eval_steps_per_second": 10.351, "step": 2418 }, { "epoch": 40.0, "eval_loss": 12.729530334472656, "eval_runtime": 1.5456, "eval_samples_per_second": 79.582, "eval_steps_per_second": 10.352, "step": 2480 }, { "epoch": 40.32, "learning_rate": 6.435483870967743e-08, "loss": 12.111, "step": 2500 }, { "epoch": 41.0, "eval_loss": 12.840241432189941, "eval_runtime": 1.5515, "eval_samples_per_second": 79.277, "eval_steps_per_second": 10.312, "step": 2542 }, { "epoch": 42.0, "eval_loss": 12.690611839294434, "eval_runtime": 1.5538, "eval_samples_per_second": 79.163, "eval_steps_per_second": 10.298, "step": 2604 }, { "epoch": 43.0, "eval_loss": 12.54220199584961, "eval_runtime": 1.554, "eval_samples_per_second": 79.15, "eval_steps_per_second": 10.296, "step": 2666 }, { "epoch": 44.0, "eval_loss": 12.503222465515137, "eval_runtime": 1.5553, "eval_samples_per_second": 79.086, "eval_steps_per_second": 10.288, "step": 2728 }, { "epoch": 45.0, "eval_loss": 12.296753883361816, "eval_runtime": 1.5516, "eval_samples_per_second": 79.275, "eval_steps_per_second": 10.312, "step": 2790 }, { "epoch": 46.0, "eval_loss": 12.332414627075195, "eval_runtime": 1.5484, "eval_samples_per_second": 79.436, "eval_steps_per_second": 10.333, "step": 2852 }, { "epoch": 47.0, "eval_loss": 12.254257202148438, "eval_runtime": 1.551, "eval_samples_per_second": 79.301, "eval_steps_per_second": 10.316, "step": 2914 }, { "epoch": 48.0, "eval_loss": 12.24339485168457, "eval_runtime": 1.5518, "eval_samples_per_second": 79.26, "eval_steps_per_second": 10.31, "step": 2976 }, { "epoch": 48.39, "learning_rate": 6.322580645161291e-08, "loss": 11.3304, "step": 3000 }, { "epoch": 49.0, "eval_loss": 12.251891136169434, "eval_runtime": 1.5529, "eval_samples_per_second": 79.207, "eval_steps_per_second": 10.303, "step": 3038 }, { "epoch": 50.0, "eval_loss": 12.112042427062988, "eval_runtime": 1.56, "eval_samples_per_second": 78.845, "eval_steps_per_second": 10.256, "step": 3100 }, { "epoch": 51.0, "eval_loss": 12.196945190429688, "eval_runtime": 1.5565, "eval_samples_per_second": 79.023, "eval_steps_per_second": 10.279, "step": 3162 }, { "epoch": 52.0, "eval_loss": 11.9977388381958, "eval_runtime": 1.5836, "eval_samples_per_second": 77.673, "eval_steps_per_second": 10.104, "step": 3224 }, { "epoch": 53.0, "eval_loss": 12.175936698913574, "eval_runtime": 1.5513, "eval_samples_per_second": 79.287, "eval_steps_per_second": 10.314, "step": 3286 }, { "epoch": 54.0, "eval_loss": 11.980809211730957, "eval_runtime": 1.546, "eval_samples_per_second": 79.559, "eval_steps_per_second": 10.349, "step": 3348 }, { "epoch": 55.0, "eval_loss": 12.149847030639648, "eval_runtime": 1.5462, "eval_samples_per_second": 79.548, "eval_steps_per_second": 10.348, "step": 3410 }, { "epoch": 56.0, "eval_loss": 11.985103607177734, "eval_runtime": 1.5457, "eval_samples_per_second": 79.577, "eval_steps_per_second": 10.351, "step": 3472 }, { "epoch": 56.45, "learning_rate": 6.20967741935484e-08, "loss": 10.9471, "step": 3500 }, { "epoch": 57.0, "eval_loss": 12.080038070678711, "eval_runtime": 1.5551, "eval_samples_per_second": 79.095, "eval_steps_per_second": 10.289, "step": 3534 }, { "epoch": 58.0, "eval_loss": 11.881911277770996, "eval_runtime": 1.5543, "eval_samples_per_second": 79.133, "eval_steps_per_second": 10.294, "step": 3596 }, { "epoch": 59.0, "eval_loss": 12.009904861450195, "eval_runtime": 1.5505, "eval_samples_per_second": 79.328, "eval_steps_per_second": 10.319, "step": 3658 }, { "epoch": 60.0, "eval_loss": 11.861567497253418, "eval_runtime": 1.5654, "eval_samples_per_second": 78.574, "eval_steps_per_second": 10.221, "step": 3720 }, { "epoch": 61.0, "eval_loss": 11.89500617980957, "eval_runtime": 1.5515, "eval_samples_per_second": 79.279, "eval_steps_per_second": 10.313, "step": 3782 }, { "epoch": 62.0, "eval_loss": 11.830110549926758, "eval_runtime": 1.5521, "eval_samples_per_second": 79.249, "eval_steps_per_second": 10.309, "step": 3844 }, { "epoch": 63.0, "eval_loss": 12.096732139587402, "eval_runtime": 1.5464, "eval_samples_per_second": 79.542, "eval_steps_per_second": 10.347, "step": 3906 }, { "epoch": 64.0, "eval_loss": 11.837874412536621, "eval_runtime": 1.5533, "eval_samples_per_second": 79.184, "eval_steps_per_second": 10.3, "step": 3968 }, { "epoch": 64.52, "learning_rate": 6.096774193548388e-08, "loss": 10.6841, "step": 4000 }, { "epoch": 65.0, "eval_loss": 11.909323692321777, "eval_runtime": 1.5534, "eval_samples_per_second": 79.181, "eval_steps_per_second": 10.3, "step": 4030 }, { "epoch": 66.0, "eval_loss": 11.91748332977295, "eval_runtime": 1.5598, "eval_samples_per_second": 78.854, "eval_steps_per_second": 10.257, "step": 4092 }, { "epoch": 67.0, "eval_loss": 11.73180103302002, "eval_runtime": 1.5498, "eval_samples_per_second": 79.365, "eval_steps_per_second": 10.324, "step": 4154 }, { "epoch": 68.0, "eval_loss": 11.830022811889648, "eval_runtime": 1.5461, "eval_samples_per_second": 79.557, "eval_steps_per_second": 10.349, "step": 4216 }, { "epoch": 69.0, "eval_loss": 11.599220275878906, "eval_runtime": 1.5555, "eval_samples_per_second": 79.074, "eval_steps_per_second": 10.286, "step": 4278 }, { "epoch": 70.0, "eval_loss": 11.604393005371094, "eval_runtime": 1.5547, "eval_samples_per_second": 79.114, "eval_steps_per_second": 10.291, "step": 4340 }, { "epoch": 71.0, "eval_loss": 11.693653106689453, "eval_runtime": 1.5528, "eval_samples_per_second": 79.212, "eval_steps_per_second": 10.304, "step": 4402 }, { "epoch": 72.0, "eval_loss": 11.596038818359375, "eval_runtime": 1.5565, "eval_samples_per_second": 79.023, "eval_steps_per_second": 10.279, "step": 4464 }, { "epoch": 72.58, "learning_rate": 5.983870967741936e-08, "loss": 10.542, "step": 4500 }, { "epoch": 73.0, "eval_loss": 11.545083045959473, "eval_runtime": 1.5539, "eval_samples_per_second": 79.157, "eval_steps_per_second": 10.297, "step": 4526 }, { "epoch": 74.0, "eval_loss": 11.403264999389648, "eval_runtime": 1.5513, "eval_samples_per_second": 79.291, "eval_steps_per_second": 10.314, "step": 4588 }, { "epoch": 75.0, "eval_loss": 11.480517387390137, "eval_runtime": 1.5515, "eval_samples_per_second": 79.28, "eval_steps_per_second": 10.313, "step": 4650 }, { "epoch": 76.0, "eval_loss": 11.539589881896973, "eval_runtime": 1.552, "eval_samples_per_second": 79.252, "eval_steps_per_second": 10.309, "step": 4712 }, { "epoch": 77.0, "eval_loss": 11.492401123046875, "eval_runtime": 1.5522, "eval_samples_per_second": 79.245, "eval_steps_per_second": 10.308, "step": 4774 }, { "epoch": 78.0, "eval_loss": 11.428875923156738, "eval_runtime": 1.5583, "eval_samples_per_second": 78.931, "eval_steps_per_second": 10.268, "step": 4836 }, { "epoch": 79.0, "eval_loss": 11.364572525024414, "eval_runtime": 1.5553, "eval_samples_per_second": 79.085, "eval_steps_per_second": 10.287, "step": 4898 }, { "epoch": 80.0, "eval_loss": 11.362411499023438, "eval_runtime": 1.5451, "eval_samples_per_second": 79.605, "eval_steps_per_second": 10.355, "step": 4960 }, { "epoch": 80.65, "learning_rate": 5.870967741935484e-08, "loss": 10.4457, "step": 5000 }, { "epoch": 81.0, "eval_loss": 11.462297439575195, "eval_runtime": 1.5542, "eval_samples_per_second": 79.139, "eval_steps_per_second": 10.294, "step": 5022 }, { "epoch": 82.0, "eval_loss": 11.522425651550293, "eval_runtime": 1.5538, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.297, "step": 5084 }, { "epoch": 83.0, "eval_loss": 11.419597625732422, "eval_runtime": 1.5519, "eval_samples_per_second": 79.26, "eval_steps_per_second": 10.31, "step": 5146 }, { "epoch": 84.0, "eval_loss": 11.435912132263184, "eval_runtime": 1.572, "eval_samples_per_second": 78.243, "eval_steps_per_second": 10.178, "step": 5208 }, { "epoch": 85.0, "eval_loss": 11.434619903564453, "eval_runtime": 1.5634, "eval_samples_per_second": 78.674, "eval_steps_per_second": 10.234, "step": 5270 }, { "epoch": 86.0, "eval_loss": 11.463889122009277, "eval_runtime": 1.5521, "eval_samples_per_second": 79.248, "eval_steps_per_second": 10.309, "step": 5332 }, { "epoch": 87.0, "eval_loss": 11.230652809143066, "eval_runtime": 1.5468, "eval_samples_per_second": 79.521, "eval_steps_per_second": 10.344, "step": 5394 }, { "epoch": 88.0, "eval_loss": 11.526171684265137, "eval_runtime": 1.5455, "eval_samples_per_second": 79.584, "eval_steps_per_second": 10.352, "step": 5456 }, { "epoch": 88.71, "learning_rate": 5.758064516129033e-08, "loss": 10.3854, "step": 5500 }, { "epoch": 89.0, "eval_loss": 11.665349006652832, "eval_runtime": 1.5505, "eval_samples_per_second": 79.329, "eval_steps_per_second": 10.319, "step": 5518 }, { "epoch": 90.0, "eval_loss": 11.3471040725708, "eval_runtime": 1.5506, "eval_samples_per_second": 79.325, "eval_steps_per_second": 10.319, "step": 5580 }, { "epoch": 91.0, "eval_loss": 11.304780960083008, "eval_runtime": 1.5529, "eval_samples_per_second": 79.209, "eval_steps_per_second": 10.304, "step": 5642 }, { "epoch": 92.0, "eval_loss": 11.36330509185791, "eval_runtime": 1.5593, "eval_samples_per_second": 78.88, "eval_steps_per_second": 10.261, "step": 5704 }, { "epoch": 93.0, "eval_loss": 11.343398094177246, "eval_runtime": 1.5672, "eval_samples_per_second": 78.483, "eval_steps_per_second": 10.209, "step": 5766 }, { "epoch": 94.0, "eval_loss": 11.257966041564941, "eval_runtime": 1.5518, "eval_samples_per_second": 79.263, "eval_steps_per_second": 10.311, "step": 5828 }, { "epoch": 95.0, "eval_loss": 11.270854949951172, "eval_runtime": 1.5445, "eval_samples_per_second": 79.635, "eval_steps_per_second": 10.359, "step": 5890 }, { "epoch": 96.0, "eval_loss": 11.198323249816895, "eval_runtime": 1.5471, "eval_samples_per_second": 79.503, "eval_steps_per_second": 10.342, "step": 5952 }, { "epoch": 96.77, "learning_rate": 5.6451612903225805e-08, "loss": 10.324, "step": 6000 }, { "epoch": 97.0, "eval_loss": 11.22177791595459, "eval_runtime": 1.5536, "eval_samples_per_second": 79.171, "eval_steps_per_second": 10.299, "step": 6014 }, { "epoch": 98.0, "eval_loss": 11.330657005310059, "eval_runtime": 1.5691, "eval_samples_per_second": 78.388, "eval_steps_per_second": 10.197, "step": 6076 }, { "epoch": 99.0, "eval_loss": 11.203608512878418, "eval_runtime": 1.5575, "eval_samples_per_second": 78.972, "eval_steps_per_second": 10.273, "step": 6138 }, { "epoch": 100.0, "eval_loss": 11.372391700744629, "eval_runtime": 1.5527, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.305, "step": 6200 }, { "epoch": 101.0, "eval_loss": 11.344269752502441, "eval_runtime": 1.5528, "eval_samples_per_second": 79.21, "eval_steps_per_second": 10.304, "step": 6262 }, { "epoch": 102.0, "eval_loss": 11.264634132385254, "eval_runtime": 1.5533, "eval_samples_per_second": 79.188, "eval_steps_per_second": 10.301, "step": 6324 }, { "epoch": 103.0, "eval_loss": 11.27165699005127, "eval_runtime": 1.549, "eval_samples_per_second": 79.408, "eval_steps_per_second": 10.33, "step": 6386 }, { "epoch": 104.0, "eval_loss": 11.310558319091797, "eval_runtime": 1.5464, "eval_samples_per_second": 79.542, "eval_steps_per_second": 10.347, "step": 6448 }, { "epoch": 104.84, "learning_rate": 5.532258064516129e-08, "loss": 10.2981, "step": 6500 }, { "epoch": 105.0, "eval_loss": 11.243507385253906, "eval_runtime": 1.5536, "eval_samples_per_second": 79.17, "eval_steps_per_second": 10.298, "step": 6510 }, { "epoch": 106.0, "eval_loss": 11.249150276184082, "eval_runtime": 1.5518, "eval_samples_per_second": 79.261, "eval_steps_per_second": 10.31, "step": 6572 }, { "epoch": 107.0, "eval_loss": 11.111257553100586, "eval_runtime": 1.5527, "eval_samples_per_second": 79.218, "eval_steps_per_second": 10.305, "step": 6634 }, { "epoch": 108.0, "eval_loss": 11.210649490356445, "eval_runtime": 1.557, "eval_samples_per_second": 78.999, "eval_steps_per_second": 10.276, "step": 6696 }, { "epoch": 109.0, "eval_loss": 11.214300155639648, "eval_runtime": 1.5747, "eval_samples_per_second": 78.112, "eval_steps_per_second": 10.161, "step": 6758 }, { "epoch": 110.0, "eval_loss": 11.015155792236328, "eval_runtime": 1.5516, "eval_samples_per_second": 79.274, "eval_steps_per_second": 10.312, "step": 6820 }, { "epoch": 111.0, "eval_loss": 11.08711051940918, "eval_runtime": 1.5448, "eval_samples_per_second": 79.621, "eval_steps_per_second": 10.357, "step": 6882 }, { "epoch": 112.0, "eval_loss": 11.098098754882812, "eval_runtime": 1.5449, "eval_samples_per_second": 79.617, "eval_steps_per_second": 10.357, "step": 6944 }, { "epoch": 112.9, "learning_rate": 5.4193548387096774e-08, "loss": 10.2536, "step": 7000 }, { "epoch": 113.0, "eval_loss": 11.243887901306152, "eval_runtime": 1.5475, "eval_samples_per_second": 79.483, "eval_steps_per_second": 10.339, "step": 7006 }, { "epoch": 114.0, "eval_loss": 11.156224250793457, "eval_runtime": 1.5505, "eval_samples_per_second": 79.327, "eval_steps_per_second": 10.319, "step": 7068 }, { "epoch": 115.0, "eval_loss": 11.118559837341309, "eval_runtime": 1.5498, "eval_samples_per_second": 79.363, "eval_steps_per_second": 10.324, "step": 7130 }, { "epoch": 116.0, "eval_loss": 11.2282133102417, "eval_runtime": 1.562, "eval_samples_per_second": 78.744, "eval_steps_per_second": 10.243, "step": 7192 }, { "epoch": 117.0, "eval_loss": 11.289180755615234, "eval_runtime": 1.5484, "eval_samples_per_second": 79.437, "eval_steps_per_second": 10.333, "step": 7254 }, { "epoch": 118.0, "eval_loss": 11.165923118591309, "eval_runtime": 1.5518, "eval_samples_per_second": 79.263, "eval_steps_per_second": 10.311, "step": 7316 }, { "epoch": 119.0, "eval_loss": 11.34392261505127, "eval_runtime": 1.545, "eval_samples_per_second": 79.613, "eval_steps_per_second": 10.356, "step": 7378 }, { "epoch": 120.0, "eval_loss": 11.199868202209473, "eval_runtime": 1.5455, "eval_samples_per_second": 79.586, "eval_steps_per_second": 10.353, "step": 7440 }, { "epoch": 120.97, "learning_rate": 5.306451612903226e-08, "loss": 10.2336, "step": 7500 }, { "epoch": 121.0, "eval_loss": 11.250391960144043, "eval_runtime": 1.5504, "eval_samples_per_second": 79.337, "eval_steps_per_second": 10.32, "step": 7502 }, { "epoch": 122.0, "eval_loss": 11.289613723754883, "eval_runtime": 1.5534, "eval_samples_per_second": 79.18, "eval_steps_per_second": 10.3, "step": 7564 }, { "epoch": 123.0, "eval_loss": 11.068822860717773, "eval_runtime": 1.5538, "eval_samples_per_second": 79.158, "eval_steps_per_second": 10.297, "step": 7626 }, { "epoch": 124.0, "eval_loss": 11.16740894317627, "eval_runtime": 1.5519, "eval_samples_per_second": 79.258, "eval_steps_per_second": 10.31, "step": 7688 }, { "epoch": 125.0, "eval_loss": 11.207221984863281, "eval_runtime": 1.5529, "eval_samples_per_second": 79.204, "eval_steps_per_second": 10.303, "step": 7750 }, { "epoch": 126.0, "eval_loss": 11.035326957702637, "eval_runtime": 1.552, "eval_samples_per_second": 79.253, "eval_steps_per_second": 10.309, "step": 7812 }, { "epoch": 127.0, "eval_loss": 11.048843383789062, "eval_runtime": 1.548, "eval_samples_per_second": 79.46, "eval_steps_per_second": 10.336, "step": 7874 }, { "epoch": 128.0, "eval_loss": 11.093210220336914, "eval_runtime": 1.5454, "eval_samples_per_second": 79.59, "eval_steps_per_second": 10.353, "step": 7936 }, { "epoch": 129.0, "eval_loss": 11.070518493652344, "eval_runtime": 1.5446, "eval_samples_per_second": 79.63, "eval_steps_per_second": 10.358, "step": 7998 }, { "epoch": 129.03, "learning_rate": 5.193548387096775e-08, "loss": 10.2101, "step": 8000 }, { "epoch": 130.0, "eval_loss": 11.219257354736328, "eval_runtime": 1.5539, "eval_samples_per_second": 79.157, "eval_steps_per_second": 10.297, "step": 8060 }, { "epoch": 131.0, "eval_loss": 11.227781295776367, "eval_runtime": 1.5509, "eval_samples_per_second": 79.307, "eval_steps_per_second": 10.316, "step": 8122 }, { "epoch": 132.0, "eval_loss": 11.136541366577148, "eval_runtime": 1.5783, "eval_samples_per_second": 77.932, "eval_steps_per_second": 10.137, "step": 8184 }, { "epoch": 133.0, "eval_loss": 11.035323143005371, "eval_runtime": 1.5538, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.297, "step": 8246 }, { "epoch": 134.0, "eval_loss": 11.014593124389648, "eval_runtime": 1.5521, "eval_samples_per_second": 79.246, "eval_steps_per_second": 10.308, "step": 8308 }, { "epoch": 135.0, "eval_loss": 11.237153053283691, "eval_runtime": 1.5457, "eval_samples_per_second": 79.575, "eval_steps_per_second": 10.351, "step": 8370 }, { "epoch": 136.0, "eval_loss": 11.104418754577637, "eval_runtime": 1.5452, "eval_samples_per_second": 79.6, "eval_steps_per_second": 10.355, "step": 8432 }, { "epoch": 137.0, "eval_loss": 11.134021759033203, "eval_runtime": 1.5476, "eval_samples_per_second": 79.478, "eval_steps_per_second": 10.339, "step": 8494 }, { "epoch": 137.1, "learning_rate": 5.0806451612903234e-08, "loss": 10.1991, "step": 8500 }, { "epoch": 138.0, "eval_loss": 11.075273513793945, "eval_runtime": 1.5484, "eval_samples_per_second": 79.436, "eval_steps_per_second": 10.333, "step": 8556 }, { "epoch": 139.0, "eval_loss": 11.021109580993652, "eval_runtime": 1.5493, "eval_samples_per_second": 79.391, "eval_steps_per_second": 10.327, "step": 8618 }, { "epoch": 140.0, "eval_loss": 10.930947303771973, "eval_runtime": 1.5551, "eval_samples_per_second": 79.096, "eval_steps_per_second": 10.289, "step": 8680 }, { "epoch": 141.0, "eval_loss": 10.977023124694824, "eval_runtime": 1.5518, "eval_samples_per_second": 79.263, "eval_steps_per_second": 10.311, "step": 8742 }, { "epoch": 142.0, "eval_loss": 11.079144477844238, "eval_runtime": 1.5523, "eval_samples_per_second": 79.238, "eval_steps_per_second": 10.307, "step": 8804 }, { "epoch": 143.0, "eval_loss": 11.116453170776367, "eval_runtime": 1.5541, "eval_samples_per_second": 79.147, "eval_steps_per_second": 10.296, "step": 8866 }, { "epoch": 144.0, "eval_loss": 10.936894416809082, "eval_runtime": 1.5532, "eval_samples_per_second": 79.19, "eval_steps_per_second": 10.301, "step": 8928 }, { "epoch": 145.0, "eval_loss": 11.121102333068848, "eval_runtime": 1.5444, "eval_samples_per_second": 79.641, "eval_steps_per_second": 10.36, "step": 8990 }, { "epoch": 145.16, "learning_rate": 4.967741935483872e-08, "loss": 10.1925, "step": 9000 }, { "epoch": 146.0, "eval_loss": 11.074503898620605, "eval_runtime": 1.5533, "eval_samples_per_second": 79.185, "eval_steps_per_second": 10.301, "step": 9052 }, { "epoch": 147.0, "eval_loss": 11.104596138000488, "eval_runtime": 1.553, "eval_samples_per_second": 79.201, "eval_steps_per_second": 10.303, "step": 9114 }, { "epoch": 148.0, "eval_loss": 11.025848388671875, "eval_runtime": 1.5524, "eval_samples_per_second": 79.234, "eval_steps_per_second": 10.307, "step": 9176 }, { "epoch": 149.0, "eval_loss": 11.02661418914795, "eval_runtime": 1.5531, "eval_samples_per_second": 79.198, "eval_steps_per_second": 10.302, "step": 9238 }, { "epoch": 150.0, "eval_loss": 11.253996849060059, "eval_runtime": 1.5526, "eval_samples_per_second": 79.224, "eval_steps_per_second": 10.306, "step": 9300 }, { "epoch": 151.0, "eval_loss": 10.970392227172852, "eval_runtime": 1.5474, "eval_samples_per_second": 79.487, "eval_steps_per_second": 10.34, "step": 9362 }, { "epoch": 152.0, "eval_loss": 11.034109115600586, "eval_runtime": 1.5453, "eval_samples_per_second": 79.599, "eval_steps_per_second": 10.354, "step": 9424 }, { "epoch": 153.0, "eval_loss": 11.111385345458984, "eval_runtime": 1.5448, "eval_samples_per_second": 79.622, "eval_steps_per_second": 10.357, "step": 9486 }, { "epoch": 153.23, "learning_rate": 4.85483870967742e-08, "loss": 10.1538, "step": 9500 }, { "epoch": 154.0, "eval_loss": 11.022419929504395, "eval_runtime": 1.5528, "eval_samples_per_second": 79.213, "eval_steps_per_second": 10.304, "step": 9548 }, { "epoch": 155.0, "eval_loss": 10.915043830871582, "eval_runtime": 1.6252, "eval_samples_per_second": 75.682, "eval_steps_per_second": 9.845, "step": 9610 }, { "epoch": 156.0, "eval_loss": 10.930474281311035, "eval_runtime": 1.5606, "eval_samples_per_second": 78.816, "eval_steps_per_second": 10.252, "step": 9672 }, { "epoch": 157.0, "eval_loss": 11.069900512695312, "eval_runtime": 1.5514, "eval_samples_per_second": 79.281, "eval_steps_per_second": 10.313, "step": 9734 }, { "epoch": 158.0, "eval_loss": 10.990084648132324, "eval_runtime": 1.5524, "eval_samples_per_second": 79.23, "eval_steps_per_second": 10.306, "step": 9796 }, { "epoch": 159.0, "eval_loss": 11.068836212158203, "eval_runtime": 1.5493, "eval_samples_per_second": 79.392, "eval_steps_per_second": 10.327, "step": 9858 }, { "epoch": 160.0, "eval_loss": 10.933384895324707, "eval_runtime": 1.5461, "eval_samples_per_second": 79.553, "eval_steps_per_second": 10.348, "step": 9920 }, { "epoch": 161.0, "eval_loss": 10.985239028930664, "eval_runtime": 1.5514, "eval_samples_per_second": 79.283, "eval_steps_per_second": 10.313, "step": 9982 }, { "epoch": 161.29, "learning_rate": 4.741935483870968e-08, "loss": 10.1494, "step": 10000 }, { "epoch": 162.0, "eval_loss": 10.903247833251953, "eval_runtime": 1.5599, "eval_samples_per_second": 78.85, "eval_steps_per_second": 10.257, "step": 10044 }, { "epoch": 163.0, "eval_loss": 10.96179485321045, "eval_runtime": 1.5509, "eval_samples_per_second": 79.309, "eval_steps_per_second": 10.317, "step": 10106 }, { "epoch": 164.0, "eval_loss": 10.947345733642578, "eval_runtime": 1.5514, "eval_samples_per_second": 79.286, "eval_steps_per_second": 10.314, "step": 10168 }, { "epoch": 165.0, "eval_loss": 11.153131484985352, "eval_runtime": 1.5559, "eval_samples_per_second": 79.053, "eval_steps_per_second": 10.283, "step": 10230 }, { "epoch": 166.0, "eval_loss": 10.846189498901367, "eval_runtime": 1.5522, "eval_samples_per_second": 79.244, "eval_steps_per_second": 10.308, "step": 10292 }, { "epoch": 167.0, "eval_loss": 10.957653045654297, "eval_runtime": 1.5541, "eval_samples_per_second": 79.147, "eval_steps_per_second": 10.296, "step": 10354 }, { "epoch": 168.0, "eval_loss": 10.935074806213379, "eval_runtime": 1.5492, "eval_samples_per_second": 79.394, "eval_steps_per_second": 10.328, "step": 10416 }, { "epoch": 169.0, "eval_loss": 11.212529182434082, "eval_runtime": 1.545, "eval_samples_per_second": 79.61, "eval_steps_per_second": 10.356, "step": 10478 }, { "epoch": 169.35, "learning_rate": 4.6290322580645165e-08, "loss": 10.1424, "step": 10500 }, { "epoch": 170.0, "eval_loss": 11.054618835449219, "eval_runtime": 1.5612, "eval_samples_per_second": 78.784, "eval_steps_per_second": 10.248, "step": 10540 }, { "epoch": 171.0, "eval_loss": 10.993365287780762, "eval_runtime": 1.5547, "eval_samples_per_second": 79.116, "eval_steps_per_second": 10.292, "step": 10602 }, { "epoch": 172.0, "eval_loss": 10.891836166381836, "eval_runtime": 1.5523, "eval_samples_per_second": 79.24, "eval_steps_per_second": 10.308, "step": 10664 }, { "epoch": 173.0, "eval_loss": 10.886784553527832, "eval_runtime": 1.5513, "eval_samples_per_second": 79.29, "eval_steps_per_second": 10.314, "step": 10726 }, { "epoch": 174.0, "eval_loss": 11.039985656738281, "eval_runtime": 1.6601, "eval_samples_per_second": 74.092, "eval_steps_per_second": 9.638, "step": 10788 }, { "epoch": 175.0, "eval_loss": 11.119784355163574, "eval_runtime": 1.5449, "eval_samples_per_second": 79.617, "eval_steps_per_second": 10.357, "step": 10850 }, { "epoch": 176.0, "eval_loss": 11.159457206726074, "eval_runtime": 1.5467, "eval_samples_per_second": 79.525, "eval_steps_per_second": 10.345, "step": 10912 }, { "epoch": 177.0, "eval_loss": 11.113701820373535, "eval_runtime": 1.5449, "eval_samples_per_second": 79.616, "eval_steps_per_second": 10.357, "step": 10974 }, { "epoch": 177.42, "learning_rate": 4.516129032258065e-08, "loss": 10.1181, "step": 11000 }, { "epoch": 178.0, "eval_loss": 11.199760437011719, "eval_runtime": 1.5538, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.297, "step": 11036 }, { "epoch": 179.0, "eval_loss": 11.049378395080566, "eval_runtime": 1.5527, "eval_samples_per_second": 79.215, "eval_steps_per_second": 10.304, "step": 11098 }, { "epoch": 180.0, "eval_loss": 10.948933601379395, "eval_runtime": 1.5527, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.305, "step": 11160 }, { "epoch": 181.0, "eval_loss": 11.02480411529541, "eval_runtime": 1.5509, "eval_samples_per_second": 79.306, "eval_steps_per_second": 10.316, "step": 11222 }, { "epoch": 182.0, "eval_loss": 10.991153717041016, "eval_runtime": 1.5561, "eval_samples_per_second": 79.045, "eval_steps_per_second": 10.282, "step": 11284 }, { "epoch": 183.0, "eval_loss": 10.922146797180176, "eval_runtime": 1.5488, "eval_samples_per_second": 79.416, "eval_steps_per_second": 10.331, "step": 11346 }, { "epoch": 184.0, "eval_loss": 11.055255889892578, "eval_runtime": 1.547, "eval_samples_per_second": 79.508, "eval_steps_per_second": 10.343, "step": 11408 }, { "epoch": 185.0, "eval_loss": 11.005462646484375, "eval_runtime": 1.5452, "eval_samples_per_second": 79.6, "eval_steps_per_second": 10.354, "step": 11470 }, { "epoch": 185.48, "learning_rate": 4.4032258064516134e-08, "loss": 10.1165, "step": 11500 }, { "epoch": 186.0, "eval_loss": 11.112872123718262, "eval_runtime": 1.5624, "eval_samples_per_second": 78.724, "eval_steps_per_second": 10.24, "step": 11532 }, { "epoch": 187.0, "eval_loss": 11.082246780395508, "eval_runtime": 1.5529, "eval_samples_per_second": 79.209, "eval_steps_per_second": 10.304, "step": 11594 }, { "epoch": 188.0, "eval_loss": 11.050854682922363, "eval_runtime": 1.5513, "eval_samples_per_second": 79.287, "eval_steps_per_second": 10.314, "step": 11656 }, { "epoch": 189.0, "eval_loss": 11.088144302368164, "eval_runtime": 1.5522, "eval_samples_per_second": 79.24, "eval_steps_per_second": 10.308, "step": 11718 }, { "epoch": 190.0, "eval_loss": 10.890948295593262, "eval_runtime": 1.5518, "eval_samples_per_second": 79.263, "eval_steps_per_second": 10.311, "step": 11780 }, { "epoch": 191.0, "eval_loss": 11.048895835876465, "eval_runtime": 1.5439, "eval_samples_per_second": 79.668, "eval_steps_per_second": 10.363, "step": 11842 }, { "epoch": 192.0, "eval_loss": 10.934896469116211, "eval_runtime": 1.548, "eval_samples_per_second": 79.46, "eval_steps_per_second": 10.336, "step": 11904 }, { "epoch": 193.0, "eval_loss": 10.882064819335938, "eval_runtime": 1.5473, "eval_samples_per_second": 79.492, "eval_steps_per_second": 10.34, "step": 11966 }, { "epoch": 193.55, "learning_rate": 4.290322580645162e-08, "loss": 10.1021, "step": 12000 }, { "epoch": 194.0, "eval_loss": 11.055244445800781, "eval_runtime": 1.5566, "eval_samples_per_second": 79.017, "eval_steps_per_second": 10.279, "step": 12028 }, { "epoch": 195.0, "eval_loss": 11.152359962463379, "eval_runtime": 1.5527, "eval_samples_per_second": 79.217, "eval_steps_per_second": 10.305, "step": 12090 }, { "epoch": 196.0, "eval_loss": 11.008437156677246, "eval_runtime": 1.5538, "eval_samples_per_second": 79.158, "eval_steps_per_second": 10.297, "step": 12152 }, { "epoch": 197.0, "eval_loss": 11.086997032165527, "eval_runtime": 1.5634, "eval_samples_per_second": 78.677, "eval_steps_per_second": 10.234, "step": 12214 }, { "epoch": 198.0, "eval_loss": 10.842860221862793, "eval_runtime": 1.5635, "eval_samples_per_second": 78.671, "eval_steps_per_second": 10.234, "step": 12276 }, { "epoch": 199.0, "eval_loss": 11.010416030883789, "eval_runtime": 1.5454, "eval_samples_per_second": 79.593, "eval_steps_per_second": 10.354, "step": 12338 }, { "epoch": 200.0, "eval_loss": 10.92686939239502, "eval_runtime": 1.545, "eval_samples_per_second": 79.613, "eval_steps_per_second": 10.356, "step": 12400 }, { "epoch": 201.0, "eval_loss": 10.987348556518555, "eval_runtime": 1.548, "eval_samples_per_second": 79.458, "eval_steps_per_second": 10.336, "step": 12462 }, { "epoch": 201.61, "learning_rate": 4.17741935483871e-08, "loss": 10.1022, "step": 12500 }, { "epoch": 202.0, "eval_loss": 10.98776912689209, "eval_runtime": 1.57, "eval_samples_per_second": 78.342, "eval_steps_per_second": 10.191, "step": 12524 }, { "epoch": 203.0, "eval_loss": 10.882826805114746, "eval_runtime": 1.5543, "eval_samples_per_second": 79.135, "eval_steps_per_second": 10.294, "step": 12586 }, { "epoch": 204.0, "eval_loss": 11.043395042419434, "eval_runtime": 1.5636, "eval_samples_per_second": 78.662, "eval_steps_per_second": 10.233, "step": 12648 }, { "epoch": 205.0, "eval_loss": 10.953902244567871, "eval_runtime": 1.5575, "eval_samples_per_second": 78.972, "eval_steps_per_second": 10.273, "step": 12710 }, { "epoch": 206.0, "eval_loss": 10.684782028198242, "eval_runtime": 1.5525, "eval_samples_per_second": 79.228, "eval_steps_per_second": 10.306, "step": 12772 }, { "epoch": 207.0, "eval_loss": 11.177257537841797, "eval_runtime": 1.5483, "eval_samples_per_second": 79.441, "eval_steps_per_second": 10.334, "step": 12834 }, { "epoch": 208.0, "eval_loss": 10.996722221374512, "eval_runtime": 1.5543, "eval_samples_per_second": 79.138, "eval_steps_per_second": 10.294, "step": 12896 }, { "epoch": 209.0, "eval_loss": 10.998943328857422, "eval_runtime": 1.5532, "eval_samples_per_second": 79.19, "eval_steps_per_second": 10.301, "step": 12958 }, { "epoch": 209.68, "learning_rate": 4.064516129032259e-08, "loss": 10.0962, "step": 13000 }, { "epoch": 210.0, "eval_loss": 11.20012378692627, "eval_runtime": 1.5557, "eval_samples_per_second": 79.063, "eval_steps_per_second": 10.285, "step": 13020 }, { "epoch": 211.0, "eval_loss": 10.965839385986328, "eval_runtime": 1.554, "eval_samples_per_second": 79.148, "eval_steps_per_second": 10.296, "step": 13082 }, { "epoch": 212.0, "eval_loss": 11.009934425354004, "eval_runtime": 1.5719, "eval_samples_per_second": 78.252, "eval_steps_per_second": 10.179, "step": 13144 }, { "epoch": 213.0, "eval_loss": 11.053404808044434, "eval_runtime": 1.5544, "eval_samples_per_second": 79.129, "eval_steps_per_second": 10.293, "step": 13206 }, { "epoch": 214.0, "eval_loss": 11.185773849487305, "eval_runtime": 1.5545, "eval_samples_per_second": 79.125, "eval_steps_per_second": 10.293, "step": 13268 }, { "epoch": 215.0, "eval_loss": 11.044096946716309, "eval_runtime": 1.5449, "eval_samples_per_second": 79.615, "eval_steps_per_second": 10.356, "step": 13330 }, { "epoch": 216.0, "eval_loss": 11.181927680969238, "eval_runtime": 1.5461, "eval_samples_per_second": 79.553, "eval_steps_per_second": 10.348, "step": 13392 }, { "epoch": 217.0, "eval_loss": 11.01004695892334, "eval_runtime": 1.5451, "eval_samples_per_second": 79.607, "eval_steps_per_second": 10.355, "step": 13454 }, { "epoch": 217.74, "learning_rate": 3.951612903225807e-08, "loss": 10.0861, "step": 13500 }, { "epoch": 218.0, "eval_loss": 11.007074356079102, "eval_runtime": 1.5582, "eval_samples_per_second": 78.937, "eval_steps_per_second": 10.268, "step": 13516 }, { "epoch": 219.0, "eval_loss": 11.145269393920898, "eval_runtime": 1.5533, "eval_samples_per_second": 79.188, "eval_steps_per_second": 10.301, "step": 13578 }, { "epoch": 220.0, "eval_loss": 10.985397338867188, "eval_runtime": 1.5653, "eval_samples_per_second": 78.577, "eval_steps_per_second": 10.221, "step": 13640 }, { "epoch": 221.0, "eval_loss": 11.120607376098633, "eval_runtime": 1.5535, "eval_samples_per_second": 79.174, "eval_steps_per_second": 10.299, "step": 13702 }, { "epoch": 222.0, "eval_loss": 11.069469451904297, "eval_runtime": 1.5538, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.297, "step": 13764 }, { "epoch": 223.0, "eval_loss": 11.070048332214355, "eval_runtime": 1.5473, "eval_samples_per_second": 79.495, "eval_steps_per_second": 10.341, "step": 13826 }, { "epoch": 224.0, "eval_loss": 10.968293190002441, "eval_runtime": 1.547, "eval_samples_per_second": 79.509, "eval_steps_per_second": 10.343, "step": 13888 }, { "epoch": 225.0, "eval_loss": 10.9319486618042, "eval_runtime": 1.5452, "eval_samples_per_second": 79.599, "eval_steps_per_second": 10.354, "step": 13950 }, { "epoch": 225.81, "learning_rate": 3.838709677419355e-08, "loss": 10.0808, "step": 14000 }, { "epoch": 226.0, "eval_loss": 10.993424415588379, "eval_runtime": 1.57, "eval_samples_per_second": 78.345, "eval_steps_per_second": 10.191, "step": 14012 }, { "epoch": 227.0, "eval_loss": 10.878746032714844, "eval_runtime": 1.5608, "eval_samples_per_second": 78.805, "eval_steps_per_second": 10.251, "step": 14074 }, { "epoch": 228.0, "eval_loss": 10.905098915100098, "eval_runtime": 1.5539, "eval_samples_per_second": 79.155, "eval_steps_per_second": 10.297, "step": 14136 }, { "epoch": 229.0, "eval_loss": 11.037513732910156, "eval_runtime": 1.5551, "eval_samples_per_second": 79.093, "eval_steps_per_second": 10.289, "step": 14198 }, { "epoch": 230.0, "eval_loss": 11.025934219360352, "eval_runtime": 1.5538, "eval_samples_per_second": 79.161, "eval_steps_per_second": 10.297, "step": 14260 }, { "epoch": 231.0, "eval_loss": 11.063486099243164, "eval_runtime": 1.5562, "eval_samples_per_second": 79.041, "eval_steps_per_second": 10.282, "step": 14322 }, { "epoch": 232.0, "eval_loss": 10.88222885131836, "eval_runtime": 1.5487, "eval_samples_per_second": 79.422, "eval_steps_per_second": 10.331, "step": 14384 }, { "epoch": 233.0, "eval_loss": 10.89100170135498, "eval_runtime": 1.5457, "eval_samples_per_second": 79.576, "eval_steps_per_second": 10.351, "step": 14446 }, { "epoch": 233.87, "learning_rate": 3.7258064516129034e-08, "loss": 10.0768, "step": 14500 }, { "epoch": 234.0, "eval_loss": 10.974419593811035, "eval_runtime": 1.5538, "eval_samples_per_second": 79.16, "eval_steps_per_second": 10.297, "step": 14508 }, { "epoch": 235.0, "eval_loss": 11.042181015014648, "eval_runtime": 1.58, "eval_samples_per_second": 77.85, "eval_steps_per_second": 10.127, "step": 14570 }, { "epoch": 236.0, "eval_loss": 11.03979778289795, "eval_runtime": 1.5648, "eval_samples_per_second": 78.605, "eval_steps_per_second": 10.225, "step": 14632 }, { "epoch": 237.0, "eval_loss": 10.850703239440918, "eval_runtime": 1.5587, "eval_samples_per_second": 78.914, "eval_steps_per_second": 10.265, "step": 14694 }, { "epoch": 238.0, "eval_loss": 11.022680282592773, "eval_runtime": 1.5543, "eval_samples_per_second": 79.135, "eval_steps_per_second": 10.294, "step": 14756 }, { "epoch": 239.0, "eval_loss": 11.027267456054688, "eval_runtime": 1.5884, "eval_samples_per_second": 77.438, "eval_steps_per_second": 10.073, "step": 14818 }, { "epoch": 240.0, "eval_loss": 11.07776165008545, "eval_runtime": 1.5492, "eval_samples_per_second": 79.395, "eval_steps_per_second": 10.328, "step": 14880 }, { "epoch": 241.0, "eval_loss": 11.077691078186035, "eval_runtime": 1.5493, "eval_samples_per_second": 79.393, "eval_steps_per_second": 10.328, "step": 14942 }, { "epoch": 241.94, "learning_rate": 3.612903225806452e-08, "loss": 10.0792, "step": 15000 }, { "epoch": 242.0, "eval_loss": 10.942319869995117, "eval_runtime": 1.5569, "eval_samples_per_second": 79.005, "eval_steps_per_second": 10.277, "step": 15004 }, { "epoch": 243.0, "eval_loss": 11.226703643798828, "eval_runtime": 1.5593, "eval_samples_per_second": 78.88, "eval_steps_per_second": 10.261, "step": 15066 }, { "epoch": 244.0, "eval_loss": 10.85287094116211, "eval_runtime": 1.564, "eval_samples_per_second": 78.644, "eval_steps_per_second": 10.23, "step": 15128 }, { "epoch": 245.0, "eval_loss": 11.056612968444824, "eval_runtime": 1.5825, "eval_samples_per_second": 77.725, "eval_steps_per_second": 10.111, "step": 15190 }, { "epoch": 246.0, "eval_loss": 10.887572288513184, "eval_runtime": 1.5637, "eval_samples_per_second": 78.661, "eval_steps_per_second": 10.232, "step": 15252 }, { "epoch": 247.0, "eval_loss": 11.144104957580566, "eval_runtime": 1.5574, "eval_samples_per_second": 78.978, "eval_steps_per_second": 10.274, "step": 15314 }, { "epoch": 248.0, "eval_loss": 10.909028053283691, "eval_runtime": 1.5501, "eval_samples_per_second": 79.35, "eval_steps_per_second": 10.322, "step": 15376 }, { "epoch": 249.0, "eval_loss": 10.97555923461914, "eval_runtime": 1.5502, "eval_samples_per_second": 79.347, "eval_steps_per_second": 10.322, "step": 15438 }, { "epoch": 250.0, "learning_rate": 3.5e-08, "loss": 10.0832, "step": 15500 }, { "epoch": 250.0, "eval_loss": 10.953834533691406, "eval_runtime": 1.5699, "eval_samples_per_second": 78.347, "eval_steps_per_second": 10.192, "step": 15500 }, { "epoch": 251.0, "eval_loss": 10.842375755310059, "eval_runtime": 1.5575, "eval_samples_per_second": 78.974, "eval_steps_per_second": 10.273, "step": 15562 }, { "epoch": 252.0, "eval_loss": 10.767086029052734, "eval_runtime": 1.5579, "eval_samples_per_second": 78.953, "eval_steps_per_second": 10.27, "step": 15624 }, { "epoch": 253.0, "eval_loss": 10.963540077209473, "eval_runtime": 1.5571, "eval_samples_per_second": 78.992, "eval_steps_per_second": 10.275, "step": 15686 }, { "epoch": 254.0, "eval_loss": 10.849014282226562, "eval_runtime": 1.5574, "eval_samples_per_second": 78.976, "eval_steps_per_second": 10.273, "step": 15748 }, { "epoch": 255.0, "eval_loss": 11.134928703308105, "eval_runtime": 1.5686, "eval_samples_per_second": 78.416, "eval_steps_per_second": 10.2, "step": 15810 }, { "epoch": 256.0, "eval_loss": 10.884407043457031, "eval_runtime": 1.5495, "eval_samples_per_second": 79.381, "eval_steps_per_second": 10.326, "step": 15872 }, { "epoch": 257.0, "eval_loss": 10.865640640258789, "eval_runtime": 1.552, "eval_samples_per_second": 79.251, "eval_steps_per_second": 10.309, "step": 15934 }, { "epoch": 258.0, "eval_loss": 10.823393821716309, "eval_runtime": 1.5505, "eval_samples_per_second": 79.33, "eval_steps_per_second": 10.319, "step": 15996 }, { "epoch": 258.06, "learning_rate": 3.387096774193549e-08, "loss": 10.0747, "step": 16000 }, { "epoch": 259.0, "eval_loss": 10.950251579284668, "eval_runtime": 1.5601, "eval_samples_per_second": 78.843, "eval_steps_per_second": 10.256, "step": 16058 }, { "epoch": 260.0, "eval_loss": 10.885615348815918, "eval_runtime": 1.5592, "eval_samples_per_second": 78.886, "eval_steps_per_second": 10.262, "step": 16120 }, { "epoch": 261.0, "eval_loss": 11.051911354064941, "eval_runtime": 1.5755, "eval_samples_per_second": 78.069, "eval_steps_per_second": 10.155, "step": 16182 }, { "epoch": 262.0, "eval_loss": 10.886160850524902, "eval_runtime": 1.557, "eval_samples_per_second": 79.0, "eval_steps_per_second": 10.276, "step": 16244 }, { "epoch": 263.0, "eval_loss": 10.897393226623535, "eval_runtime": 1.5641, "eval_samples_per_second": 78.64, "eval_steps_per_second": 10.23, "step": 16306 }, { "epoch": 264.0, "eval_loss": 10.84638500213623, "eval_runtime": 1.5498, "eval_samples_per_second": 79.364, "eval_steps_per_second": 10.324, "step": 16368 }, { "epoch": 265.0, "eval_loss": 10.965924263000488, "eval_runtime": 1.5489, "eval_samples_per_second": 79.411, "eval_steps_per_second": 10.33, "step": 16430 }, { "epoch": 266.0, "eval_loss": 10.742626190185547, "eval_runtime": 1.55, "eval_samples_per_second": 79.357, "eval_steps_per_second": 10.323, "step": 16492 }, { "epoch": 266.13, "learning_rate": 3.274193548387097e-08, "loss": 10.0678, "step": 16500 }, { "epoch": 267.0, "eval_loss": 10.985177040100098, "eval_runtime": 1.5698, "eval_samples_per_second": 78.356, "eval_steps_per_second": 10.193, "step": 16554 }, { "epoch": 268.0, "eval_loss": 11.049816131591797, "eval_runtime": 1.557, "eval_samples_per_second": 79.0, "eval_steps_per_second": 10.276, "step": 16616 }, { "epoch": 269.0, "eval_loss": 10.7893648147583, "eval_runtime": 1.5584, "eval_samples_per_second": 78.929, "eval_steps_per_second": 10.267, "step": 16678 }, { "epoch": 270.0, "eval_loss": 11.10730266571045, "eval_runtime": 1.5594, "eval_samples_per_second": 78.878, "eval_steps_per_second": 10.261, "step": 16740 }, { "epoch": 271.0, "eval_loss": 10.826376914978027, "eval_runtime": 1.5624, "eval_samples_per_second": 78.727, "eval_steps_per_second": 10.241, "step": 16802 }, { "epoch": 272.0, "eval_loss": 10.931463241577148, "eval_runtime": 1.5486, "eval_samples_per_second": 79.427, "eval_steps_per_second": 10.332, "step": 16864 }, { "epoch": 273.0, "eval_loss": 10.775986671447754, "eval_runtime": 1.5509, "eval_samples_per_second": 79.307, "eval_steps_per_second": 10.316, "step": 16926 }, { "epoch": 274.0, "eval_loss": 10.972262382507324, "eval_runtime": 1.566, "eval_samples_per_second": 78.543, "eval_steps_per_second": 10.217, "step": 16988 }, { "epoch": 274.19, "learning_rate": 3.1612903225806456e-08, "loss": 10.0631, "step": 17000 }, { "epoch": 275.0, "eval_loss": 10.748165130615234, "eval_runtime": 1.5567, "eval_samples_per_second": 79.014, "eval_steps_per_second": 10.278, "step": 17050 }, { "epoch": 276.0, "eval_loss": 11.049092292785645, "eval_runtime": 1.5587, "eval_samples_per_second": 78.912, "eval_steps_per_second": 10.265, "step": 17112 }, { "epoch": 277.0, "eval_loss": 10.751622200012207, "eval_runtime": 1.5568, "eval_samples_per_second": 79.007, "eval_steps_per_second": 10.277, "step": 17174 }, { "epoch": 278.0, "eval_loss": 10.950490951538086, "eval_runtime": 1.5586, "eval_samples_per_second": 78.916, "eval_steps_per_second": 10.266, "step": 17236 }, { "epoch": 279.0, "eval_loss": 10.889892578125, "eval_runtime": 1.5568, "eval_samples_per_second": 79.01, "eval_steps_per_second": 10.278, "step": 17298 }, { "epoch": 280.0, "eval_loss": 10.786404609680176, "eval_runtime": 1.557, "eval_samples_per_second": 78.998, "eval_steps_per_second": 10.276, "step": 17360 }, { "epoch": 281.0, "eval_loss": 10.865982055664062, "eval_runtime": 1.5496, "eval_samples_per_second": 79.377, "eval_steps_per_second": 10.326, "step": 17422 }, { "epoch": 282.0, "eval_loss": 10.97549819946289, "eval_runtime": 1.5507, "eval_samples_per_second": 79.318, "eval_steps_per_second": 10.318, "step": 17484 }, { "epoch": 282.26, "learning_rate": 3.048387096774194e-08, "loss": 10.0464, "step": 17500 }, { "epoch": 283.0, "eval_loss": 10.970197677612305, "eval_runtime": 1.5595, "eval_samples_per_second": 78.873, "eval_steps_per_second": 10.26, "step": 17546 }, { "epoch": 284.0, "eval_loss": 10.966419219970703, "eval_runtime": 1.5592, "eval_samples_per_second": 78.887, "eval_steps_per_second": 10.262, "step": 17608 }, { "epoch": 285.0, "eval_loss": 10.941322326660156, "eval_runtime": 1.5575, "eval_samples_per_second": 78.971, "eval_steps_per_second": 10.273, "step": 17670 }, { "epoch": 286.0, "eval_loss": 10.966172218322754, "eval_runtime": 1.5626, "eval_samples_per_second": 78.714, "eval_steps_per_second": 10.239, "step": 17732 }, { "epoch": 287.0, "eval_loss": 10.951107025146484, "eval_runtime": 1.5766, "eval_samples_per_second": 78.017, "eval_steps_per_second": 10.149, "step": 17794 }, { "epoch": 288.0, "eval_loss": 10.864799499511719, "eval_runtime": 1.5527, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.305, "step": 17856 }, { "epoch": 289.0, "eval_loss": 10.88668441772461, "eval_runtime": 1.5518, "eval_samples_per_second": 79.261, "eval_steps_per_second": 10.31, "step": 17918 }, { "epoch": 290.0, "eval_loss": 11.037867546081543, "eval_runtime": 1.5494, "eval_samples_per_second": 79.386, "eval_steps_per_second": 10.327, "step": 17980 }, { "epoch": 290.32, "learning_rate": 2.935483870967742e-08, "loss": 10.059, "step": 18000 }, { "epoch": 291.0, "eval_loss": 10.958696365356445, "eval_runtime": 1.5631, "eval_samples_per_second": 78.692, "eval_steps_per_second": 10.236, "step": 18042 }, { "epoch": 292.0, "eval_loss": 10.871495246887207, "eval_runtime": 1.5627, "eval_samples_per_second": 78.71, "eval_steps_per_second": 10.239, "step": 18104 }, { "epoch": 293.0, "eval_loss": 11.032896041870117, "eval_runtime": 1.5647, "eval_samples_per_second": 78.608, "eval_steps_per_second": 10.225, "step": 18166 }, { "epoch": 294.0, "eval_loss": 10.993353843688965, "eval_runtime": 1.557, "eval_samples_per_second": 78.998, "eval_steps_per_second": 10.276, "step": 18228 }, { "epoch": 295.0, "eval_loss": 11.031813621520996, "eval_runtime": 1.5556, "eval_samples_per_second": 79.072, "eval_steps_per_second": 10.286, "step": 18290 }, { "epoch": 296.0, "eval_loss": 10.949562072753906, "eval_runtime": 1.5493, "eval_samples_per_second": 79.389, "eval_steps_per_second": 10.327, "step": 18352 }, { "epoch": 297.0, "eval_loss": 10.985105514526367, "eval_runtime": 1.5496, "eval_samples_per_second": 79.373, "eval_steps_per_second": 10.325, "step": 18414 }, { "epoch": 298.0, "eval_loss": 10.904229164123535, "eval_runtime": 1.5499, "eval_samples_per_second": 79.359, "eval_steps_per_second": 10.323, "step": 18476 }, { "epoch": 298.39, "learning_rate": 2.8225806451612902e-08, "loss": 10.0478, "step": 18500 }, { "epoch": 299.0, "eval_loss": 10.848288536071777, "eval_runtime": 1.5652, "eval_samples_per_second": 78.585, "eval_steps_per_second": 10.222, "step": 18538 }, { "epoch": 300.0, "eval_loss": 11.128362655639648, "eval_runtime": 1.5589, "eval_samples_per_second": 78.903, "eval_steps_per_second": 10.264, "step": 18600 }, { "epoch": 301.0, "eval_loss": 10.965646743774414, "eval_runtime": 1.5837, "eval_samples_per_second": 77.668, "eval_steps_per_second": 10.103, "step": 18662 }, { "epoch": 302.0, "eval_loss": 10.962158203125, "eval_runtime": 1.5581, "eval_samples_per_second": 78.944, "eval_steps_per_second": 10.269, "step": 18724 }, { "epoch": 303.0, "eval_loss": 11.022586822509766, "eval_runtime": 1.5559, "eval_samples_per_second": 79.056, "eval_steps_per_second": 10.284, "step": 18786 }, { "epoch": 304.0, "eval_loss": 10.998231887817383, "eval_runtime": 1.5516, "eval_samples_per_second": 79.272, "eval_steps_per_second": 10.312, "step": 18848 }, { "epoch": 305.0, "eval_loss": 10.964576721191406, "eval_runtime": 1.5886, "eval_samples_per_second": 77.425, "eval_steps_per_second": 10.072, "step": 18910 }, { "epoch": 306.0, "eval_loss": 11.094855308532715, "eval_runtime": 1.549, "eval_samples_per_second": 79.406, "eval_steps_per_second": 10.329, "step": 18972 }, { "epoch": 306.45, "learning_rate": 2.7096774193548387e-08, "loss": 10.0513, "step": 19000 }, { "epoch": 307.0, "eval_loss": 11.073990821838379, "eval_runtime": 1.5592, "eval_samples_per_second": 78.885, "eval_steps_per_second": 10.261, "step": 19034 }, { "epoch": 308.0, "eval_loss": 10.888773918151855, "eval_runtime": 1.5638, "eval_samples_per_second": 78.652, "eval_steps_per_second": 10.231, "step": 19096 }, { "epoch": 309.0, "eval_loss": 11.247567176818848, "eval_runtime": 1.5616, "eval_samples_per_second": 78.763, "eval_steps_per_second": 10.246, "step": 19158 }, { "epoch": 310.0, "eval_loss": 10.999629020690918, "eval_runtime": 1.5902, "eval_samples_per_second": 77.348, "eval_steps_per_second": 10.062, "step": 19220 }, { "epoch": 311.0, "eval_loss": 10.804391860961914, "eval_runtime": 1.5586, "eval_samples_per_second": 78.919, "eval_steps_per_second": 10.266, "step": 19282 }, { "epoch": 312.0, "eval_loss": 10.949257850646973, "eval_runtime": 1.5493, "eval_samples_per_second": 79.391, "eval_steps_per_second": 10.327, "step": 19344 }, { "epoch": 313.0, "eval_loss": 10.94599723815918, "eval_runtime": 1.5498, "eval_samples_per_second": 79.367, "eval_steps_per_second": 10.324, "step": 19406 }, { "epoch": 314.0, "eval_loss": 10.966387748718262, "eval_runtime": 1.5504, "eval_samples_per_second": 79.336, "eval_steps_per_second": 10.32, "step": 19468 }, { "epoch": 314.52, "learning_rate": 2.5967741935483875e-08, "loss": 10.0363, "step": 19500 }, { "epoch": 315.0, "eval_loss": 10.91958999633789, "eval_runtime": 1.5704, "eval_samples_per_second": 78.324, "eval_steps_per_second": 10.188, "step": 19530 }, { "epoch": 316.0, "eval_loss": 10.921772956848145, "eval_runtime": 1.5577, "eval_samples_per_second": 78.964, "eval_steps_per_second": 10.272, "step": 19592 }, { "epoch": 317.0, "eval_loss": 10.800854682922363, "eval_runtime": 1.5565, "eval_samples_per_second": 79.024, "eval_steps_per_second": 10.28, "step": 19654 }, { "epoch": 318.0, "eval_loss": 11.121504783630371, "eval_runtime": 1.5574, "eval_samples_per_second": 78.979, "eval_steps_per_second": 10.274, "step": 19716 }, { "epoch": 319.0, "eval_loss": 10.96510124206543, "eval_runtime": 1.5619, "eval_samples_per_second": 78.751, "eval_steps_per_second": 10.244, "step": 19778 }, { "epoch": 320.0, "eval_loss": 10.995678901672363, "eval_runtime": 1.5505, "eval_samples_per_second": 79.33, "eval_steps_per_second": 10.319, "step": 19840 }, { "epoch": 321.0, "eval_loss": 11.144912719726562, "eval_runtime": 1.5521, "eval_samples_per_second": 79.247, "eval_steps_per_second": 10.309, "step": 19902 }, { "epoch": 322.0, "eval_loss": 11.01318645477295, "eval_runtime": 1.555, "eval_samples_per_second": 79.101, "eval_steps_per_second": 10.289, "step": 19964 }, { "epoch": 322.58, "learning_rate": 2.483870967741936e-08, "loss": 10.0503, "step": 20000 }, { "epoch": 323.0, "eval_loss": 11.119362831115723, "eval_runtime": 1.5655, "eval_samples_per_second": 78.571, "eval_steps_per_second": 10.221, "step": 20026 }, { "epoch": 324.0, "eval_loss": 10.964983940124512, "eval_runtime": 1.5692, "eval_samples_per_second": 78.382, "eval_steps_per_second": 10.196, "step": 20088 }, { "epoch": 325.0, "eval_loss": 10.841401100158691, "eval_runtime": 1.5595, "eval_samples_per_second": 78.871, "eval_steps_per_second": 10.26, "step": 20150 }, { "epoch": 326.0, "eval_loss": 10.86677360534668, "eval_runtime": 1.5533, "eval_samples_per_second": 79.184, "eval_steps_per_second": 10.3, "step": 20212 }, { "epoch": 327.0, "eval_loss": 11.009995460510254, "eval_runtime": 1.5673, "eval_samples_per_second": 78.478, "eval_steps_per_second": 10.209, "step": 20274 }, { "epoch": 328.0, "eval_loss": 10.875937461853027, "eval_runtime": 1.5499, "eval_samples_per_second": 79.359, "eval_steps_per_second": 10.323, "step": 20336 }, { "epoch": 329.0, "eval_loss": 10.965596199035645, "eval_runtime": 1.562, "eval_samples_per_second": 78.744, "eval_steps_per_second": 10.243, "step": 20398 }, { "epoch": 330.0, "eval_loss": 11.007844924926758, "eval_runtime": 1.5553, "eval_samples_per_second": 79.085, "eval_steps_per_second": 10.287, "step": 20460 }, { "epoch": 330.65, "learning_rate": 2.370967741935484e-08, "loss": 10.0223, "step": 20500 }, { "epoch": 331.0, "eval_loss": 11.0198335647583, "eval_runtime": 1.5833, "eval_samples_per_second": 77.688, "eval_steps_per_second": 10.106, "step": 20522 }, { "epoch": 332.0, "eval_loss": 11.088841438293457, "eval_runtime": 1.5649, "eval_samples_per_second": 78.601, "eval_steps_per_second": 10.224, "step": 20584 }, { "epoch": 333.0, "eval_loss": 11.105833053588867, "eval_runtime": 1.5609, "eval_samples_per_second": 78.798, "eval_steps_per_second": 10.25, "step": 20646 }, { "epoch": 334.0, "eval_loss": 10.942315101623535, "eval_runtime": 1.5599, "eval_samples_per_second": 78.853, "eval_steps_per_second": 10.257, "step": 20708 }, { "epoch": 335.0, "eval_loss": 11.103010177612305, "eval_runtime": 1.5806, "eval_samples_per_second": 77.817, "eval_steps_per_second": 10.123, "step": 20770 }, { "epoch": 336.0, "eval_loss": 11.081936836242676, "eval_runtime": 1.5498, "eval_samples_per_second": 79.366, "eval_steps_per_second": 10.324, "step": 20832 }, { "epoch": 337.0, "eval_loss": 10.90597152709961, "eval_runtime": 1.55, "eval_samples_per_second": 79.356, "eval_steps_per_second": 10.323, "step": 20894 }, { "epoch": 338.0, "eval_loss": 11.074986457824707, "eval_runtime": 1.5577, "eval_samples_per_second": 78.962, "eval_steps_per_second": 10.271, "step": 20956 }, { "epoch": 338.71, "learning_rate": 2.2580645161290325e-08, "loss": 10.0321, "step": 21000 }, { "epoch": 339.0, "eval_loss": 11.110125541687012, "eval_runtime": 1.5561, "eval_samples_per_second": 79.046, "eval_steps_per_second": 10.282, "step": 21018 }, { "epoch": 340.0, "eval_loss": 10.971090316772461, "eval_runtime": 1.5625, "eval_samples_per_second": 78.722, "eval_steps_per_second": 10.24, "step": 21080 }, { "epoch": 341.0, "eval_loss": 10.92870044708252, "eval_runtime": 1.5635, "eval_samples_per_second": 78.668, "eval_steps_per_second": 10.233, "step": 21142 }, { "epoch": 342.0, "eval_loss": 10.796195983886719, "eval_runtime": 1.5574, "eval_samples_per_second": 78.978, "eval_steps_per_second": 10.273, "step": 21204 }, { "epoch": 343.0, "eval_loss": 11.136199951171875, "eval_runtime": 1.5574, "eval_samples_per_second": 78.976, "eval_steps_per_second": 10.273, "step": 21266 }, { "epoch": 344.0, "eval_loss": 11.102249145507812, "eval_runtime": 1.551, "eval_samples_per_second": 79.302, "eval_steps_per_second": 10.316, "step": 21328 }, { "epoch": 345.0, "eval_loss": 10.999671936035156, "eval_runtime": 1.5493, "eval_samples_per_second": 79.392, "eval_steps_per_second": 10.327, "step": 21390 }, { "epoch": 346.0, "eval_loss": 10.876097679138184, "eval_runtime": 1.5498, "eval_samples_per_second": 79.366, "eval_steps_per_second": 10.324, "step": 21452 }, { "epoch": 346.77, "learning_rate": 2.145161290322581e-08, "loss": 10.0226, "step": 21500 }, { "epoch": 347.0, "eval_loss": 10.934317588806152, "eval_runtime": 1.5612, "eval_samples_per_second": 78.786, "eval_steps_per_second": 10.249, "step": 21514 }, { "epoch": 348.0, "eval_loss": 10.910962104797363, "eval_runtime": 1.5564, "eval_samples_per_second": 79.028, "eval_steps_per_second": 10.28, "step": 21576 }, { "epoch": 349.0, "eval_loss": 11.034477233886719, "eval_runtime": 1.5584, "eval_samples_per_second": 78.929, "eval_steps_per_second": 10.267, "step": 21638 }, { "epoch": 350.0, "eval_loss": 10.88716983795166, "eval_runtime": 1.5754, "eval_samples_per_second": 78.075, "eval_steps_per_second": 10.156, "step": 21700 }, { "epoch": 351.0, "eval_loss": 10.766716957092285, "eval_runtime": 1.5893, "eval_samples_per_second": 77.394, "eval_steps_per_second": 10.067, "step": 21762 }, { "epoch": 352.0, "eval_loss": 11.056756973266602, "eval_runtime": 1.5503, "eval_samples_per_second": 79.339, "eval_steps_per_second": 10.321, "step": 21824 }, { "epoch": 353.0, "eval_loss": 11.028831481933594, "eval_runtime": 1.5494, "eval_samples_per_second": 79.385, "eval_steps_per_second": 10.326, "step": 21886 }, { "epoch": 354.0, "eval_loss": 11.066434860229492, "eval_runtime": 1.5562, "eval_samples_per_second": 79.039, "eval_steps_per_second": 10.281, "step": 21948 }, { "epoch": 354.84, "learning_rate": 2.0322580645161293e-08, "loss": 10.033, "step": 22000 }, { "epoch": 355.0, "eval_loss": 10.975112915039062, "eval_runtime": 1.5594, "eval_samples_per_second": 78.876, "eval_steps_per_second": 10.26, "step": 22010 }, { "epoch": 356.0, "eval_loss": 10.856056213378906, "eval_runtime": 1.5565, "eval_samples_per_second": 79.022, "eval_steps_per_second": 10.279, "step": 22072 }, { "epoch": 357.0, "eval_loss": 10.960885047912598, "eval_runtime": 1.5772, "eval_samples_per_second": 77.988, "eval_steps_per_second": 10.145, "step": 22134 }, { "epoch": 358.0, "eval_loss": 11.000205993652344, "eval_runtime": 1.5685, "eval_samples_per_second": 78.418, "eval_steps_per_second": 10.201, "step": 22196 }, { "epoch": 359.0, "eval_loss": 11.056760787963867, "eval_runtime": 1.5597, "eval_samples_per_second": 78.862, "eval_steps_per_second": 10.259, "step": 22258 }, { "epoch": 360.0, "eval_loss": 10.958597183227539, "eval_runtime": 1.5568, "eval_samples_per_second": 79.009, "eval_steps_per_second": 10.278, "step": 22320 }, { "epoch": 361.0, "eval_loss": 10.925392150878906, "eval_runtime": 1.5568, "eval_samples_per_second": 79.007, "eval_steps_per_second": 10.277, "step": 22382 }, { "epoch": 362.0, "eval_loss": 11.065186500549316, "eval_runtime": 1.5511, "eval_samples_per_second": 79.297, "eval_steps_per_second": 10.315, "step": 22444 }, { "epoch": 362.9, "learning_rate": 1.9193548387096775e-08, "loss": 10.0084, "step": 22500 }, { "epoch": 363.0, "eval_loss": 10.965627670288086, "eval_runtime": 1.5573, "eval_samples_per_second": 78.985, "eval_steps_per_second": 10.274, "step": 22506 }, { "epoch": 364.0, "eval_loss": 11.061145782470703, "eval_runtime": 1.5616, "eval_samples_per_second": 78.765, "eval_steps_per_second": 10.246, "step": 22568 }, { "epoch": 365.0, "eval_loss": 10.833497047424316, "eval_runtime": 1.5633, "eval_samples_per_second": 78.678, "eval_steps_per_second": 10.235, "step": 22630 }, { "epoch": 366.0, "eval_loss": 10.980859756469727, "eval_runtime": 1.5593, "eval_samples_per_second": 78.883, "eval_steps_per_second": 10.261, "step": 22692 }, { "epoch": 367.0, "eval_loss": 11.020894050598145, "eval_runtime": 1.5581, "eval_samples_per_second": 78.94, "eval_steps_per_second": 10.269, "step": 22754 }, { "epoch": 368.0, "eval_loss": 10.91490364074707, "eval_runtime": 1.574, "eval_samples_per_second": 78.142, "eval_steps_per_second": 10.165, "step": 22816 }, { "epoch": 369.0, "eval_loss": 11.020668029785156, "eval_runtime": 1.5554, "eval_samples_per_second": 79.078, "eval_steps_per_second": 10.287, "step": 22878 }, { "epoch": 370.0, "eval_loss": 10.877435684204102, "eval_runtime": 1.5496, "eval_samples_per_second": 79.374, "eval_steps_per_second": 10.325, "step": 22940 }, { "epoch": 370.97, "learning_rate": 1.806451612903226e-08, "loss": 10.024, "step": 23000 }, { "epoch": 371.0, "eval_loss": 10.985451698303223, "eval_runtime": 1.5536, "eval_samples_per_second": 79.173, "eval_steps_per_second": 10.299, "step": 23002 }, { "epoch": 372.0, "eval_loss": 10.90639877319336, "eval_runtime": 1.5566, "eval_samples_per_second": 79.018, "eval_steps_per_second": 10.279, "step": 23064 }, { "epoch": 373.0, "eval_loss": 10.95183277130127, "eval_runtime": 1.5635, "eval_samples_per_second": 78.669, "eval_steps_per_second": 10.233, "step": 23126 }, { "epoch": 374.0, "eval_loss": 10.877464294433594, "eval_runtime": 1.5564, "eval_samples_per_second": 79.031, "eval_steps_per_second": 10.28, "step": 23188 }, { "epoch": 375.0, "eval_loss": 10.91928768157959, "eval_runtime": 1.5575, "eval_samples_per_second": 78.971, "eval_steps_per_second": 10.273, "step": 23250 }, { "epoch": 376.0, "eval_loss": 11.109807968139648, "eval_runtime": 1.5563, "eval_samples_per_second": 79.033, "eval_steps_per_second": 10.281, "step": 23312 }, { "epoch": 377.0, "eval_loss": 11.148626327514648, "eval_runtime": 1.5493, "eval_samples_per_second": 79.392, "eval_steps_per_second": 10.327, "step": 23374 }, { "epoch": 378.0, "eval_loss": 11.089497566223145, "eval_runtime": 1.5491, "eval_samples_per_second": 79.4, "eval_steps_per_second": 10.329, "step": 23436 }, { "epoch": 379.0, "eval_loss": 10.915903091430664, "eval_runtime": 1.5528, "eval_samples_per_second": 79.214, "eval_steps_per_second": 10.304, "step": 23498 }, { "epoch": 379.03, "learning_rate": 1.6935483870967743e-08, "loss": 10.0052, "step": 23500 }, { "epoch": 380.0, "eval_loss": 11.109511375427246, "eval_runtime": 1.5576, "eval_samples_per_second": 78.966, "eval_steps_per_second": 10.272, "step": 23560 }, { "epoch": 381.0, "eval_loss": 11.094901084899902, "eval_runtime": 1.5561, "eval_samples_per_second": 79.044, "eval_steps_per_second": 10.282, "step": 23622 }, { "epoch": 382.0, "eval_loss": 10.983734130859375, "eval_runtime": 1.5557, "eval_samples_per_second": 79.066, "eval_steps_per_second": 10.285, "step": 23684 }, { "epoch": 383.0, "eval_loss": 10.943252563476562, "eval_runtime": 1.577, "eval_samples_per_second": 77.998, "eval_steps_per_second": 10.146, "step": 23746 }, { "epoch": 384.0, "eval_loss": 10.910370826721191, "eval_runtime": 1.5682, "eval_samples_per_second": 78.434, "eval_steps_per_second": 10.203, "step": 23808 }, { "epoch": 385.0, "eval_loss": 10.95122241973877, "eval_runtime": 1.5515, "eval_samples_per_second": 79.28, "eval_steps_per_second": 10.313, "step": 23870 }, { "epoch": 386.0, "eval_loss": 11.107484817504883, "eval_runtime": 1.5487, "eval_samples_per_second": 79.422, "eval_steps_per_second": 10.331, "step": 23932 }, { "epoch": 387.0, "eval_loss": 10.865962982177734, "eval_runtime": 1.5535, "eval_samples_per_second": 79.176, "eval_steps_per_second": 10.299, "step": 23994 }, { "epoch": 387.1, "learning_rate": 1.5806451612903228e-08, "loss": 10.0218, "step": 24000 }, { "epoch": 388.0, "eval_loss": 11.052102088928223, "eval_runtime": 1.5572, "eval_samples_per_second": 78.988, "eval_steps_per_second": 10.275, "step": 24056 }, { "epoch": 389.0, "eval_loss": 10.898940086364746, "eval_runtime": 1.5627, "eval_samples_per_second": 78.712, "eval_steps_per_second": 10.239, "step": 24118 }, { "epoch": 390.0, "eval_loss": 10.913924217224121, "eval_runtime": 1.5799, "eval_samples_per_second": 77.852, "eval_steps_per_second": 10.127, "step": 24180 }, { "epoch": 391.0, "eval_loss": 11.10788345336914, "eval_runtime": 1.5783, "eval_samples_per_second": 77.934, "eval_steps_per_second": 10.138, "step": 24242 }, { "epoch": 392.0, "eval_loss": 10.906310081481934, "eval_runtime": 1.5568, "eval_samples_per_second": 79.006, "eval_steps_per_second": 10.277, "step": 24304 }, { "epoch": 393.0, "eval_loss": 11.085139274597168, "eval_runtime": 1.5494, "eval_samples_per_second": 79.384, "eval_steps_per_second": 10.326, "step": 24366 }, { "epoch": 394.0, "eval_loss": 10.980916023254395, "eval_runtime": 1.5522, "eval_samples_per_second": 79.244, "eval_steps_per_second": 10.308, "step": 24428 }, { "epoch": 395.0, "eval_loss": 10.932045936584473, "eval_runtime": 1.5493, "eval_samples_per_second": 79.389, "eval_steps_per_second": 10.327, "step": 24490 }, { "epoch": 395.16, "learning_rate": 1.467741935483871e-08, "loss": 10.0039, "step": 24500 }, { "epoch": 396.0, "eval_loss": 11.052530288696289, "eval_runtime": 1.5595, "eval_samples_per_second": 78.871, "eval_steps_per_second": 10.26, "step": 24552 }, { "epoch": 397.0, "eval_loss": 11.037784576416016, "eval_runtime": 1.5581, "eval_samples_per_second": 78.943, "eval_steps_per_second": 10.269, "step": 24614 }, { "epoch": 398.0, "eval_loss": 11.000636100769043, "eval_runtime": 1.5574, "eval_samples_per_second": 78.979, "eval_steps_per_second": 10.274, "step": 24676 }, { "epoch": 399.0, "eval_loss": 11.028385162353516, "eval_runtime": 1.5601, "eval_samples_per_second": 78.842, "eval_steps_per_second": 10.256, "step": 24738 }, { "epoch": 400.0, "eval_loss": 11.005521774291992, "eval_runtime": 1.5704, "eval_samples_per_second": 78.326, "eval_steps_per_second": 10.189, "step": 24800 }, { "epoch": 401.0, "eval_loss": 11.010418891906738, "eval_runtime": 1.55, "eval_samples_per_second": 79.356, "eval_steps_per_second": 10.323, "step": 24862 }, { "epoch": 402.0, "eval_loss": 11.11683464050293, "eval_runtime": 1.5496, "eval_samples_per_second": 79.377, "eval_steps_per_second": 10.325, "step": 24924 }, { "epoch": 403.0, "eval_loss": 10.831143379211426, "eval_runtime": 1.5514, "eval_samples_per_second": 79.284, "eval_steps_per_second": 10.313, "step": 24986 }, { "epoch": 403.23, "learning_rate": 1.3548387096774193e-08, "loss": 10.0186, "step": 25000 }, { "epoch": 404.0, "eval_loss": 10.890425682067871, "eval_runtime": 1.5613, "eval_samples_per_second": 78.78, "eval_steps_per_second": 10.248, "step": 25048 }, { "epoch": 405.0, "eval_loss": 10.861490249633789, "eval_runtime": 1.5685, "eval_samples_per_second": 78.418, "eval_steps_per_second": 10.201, "step": 25110 }, { "epoch": 406.0, "eval_loss": 10.951952934265137, "eval_runtime": 1.5597, "eval_samples_per_second": 78.863, "eval_steps_per_second": 10.259, "step": 25172 }, { "epoch": 407.0, "eval_loss": 10.911187171936035, "eval_runtime": 1.5579, "eval_samples_per_second": 78.953, "eval_steps_per_second": 10.27, "step": 25234 }, { "epoch": 408.0, "eval_loss": 11.027958869934082, "eval_runtime": 1.5537, "eval_samples_per_second": 79.165, "eval_steps_per_second": 10.298, "step": 25296 }, { "epoch": 409.0, "eval_loss": 10.828099250793457, "eval_runtime": 1.5453, "eval_samples_per_second": 79.597, "eval_steps_per_second": 10.354, "step": 25358 }, { "epoch": 410.0, "eval_loss": 11.187531471252441, "eval_runtime": 1.5496, "eval_samples_per_second": 79.375, "eval_steps_per_second": 10.325, "step": 25420 }, { "epoch": 411.0, "eval_loss": 11.00891399383545, "eval_runtime": 1.5524, "eval_samples_per_second": 79.231, "eval_steps_per_second": 10.306, "step": 25482 }, { "epoch": 411.29, "learning_rate": 1.241935483870968e-08, "loss": 10.0146, "step": 25500 }, { "epoch": 412.0, "eval_loss": 11.038575172424316, "eval_runtime": 1.5665, "eval_samples_per_second": 78.518, "eval_steps_per_second": 10.214, "step": 25544 }, { "epoch": 413.0, "eval_loss": 10.815169334411621, "eval_runtime": 1.558, "eval_samples_per_second": 78.948, "eval_steps_per_second": 10.27, "step": 25606 }, { "epoch": 414.0, "eval_loss": 10.946043014526367, "eval_runtime": 1.5604, "eval_samples_per_second": 78.824, "eval_steps_per_second": 10.254, "step": 25668 }, { "epoch": 415.0, "eval_loss": 10.9921875, "eval_runtime": 1.5586, "eval_samples_per_second": 78.917, "eval_steps_per_second": 10.266, "step": 25730 }, { "epoch": 416.0, "eval_loss": 11.085003852844238, "eval_runtime": 1.5606, "eval_samples_per_second": 78.817, "eval_steps_per_second": 10.253, "step": 25792 }, { "epoch": 417.0, "eval_loss": 11.002412796020508, "eval_runtime": 1.5515, "eval_samples_per_second": 79.276, "eval_steps_per_second": 10.312, "step": 25854 }, { "epoch": 418.0, "eval_loss": 11.093355178833008, "eval_runtime": 1.5531, "eval_samples_per_second": 79.196, "eval_steps_per_second": 10.302, "step": 25916 }, { "epoch": 419.0, "eval_loss": 11.095279693603516, "eval_runtime": 1.5495, "eval_samples_per_second": 79.379, "eval_steps_per_second": 10.326, "step": 25978 }, { "epoch": 419.35, "learning_rate": 1.1290322580645162e-08, "loss": 10.0251, "step": 26000 }, { "epoch": 420.0, "eval_loss": 10.89963150024414, "eval_runtime": 1.5606, "eval_samples_per_second": 78.815, "eval_steps_per_second": 10.252, "step": 26040 }, { "epoch": 421.0, "eval_loss": 10.983254432678223, "eval_runtime": 1.5556, "eval_samples_per_second": 79.068, "eval_steps_per_second": 10.285, "step": 26102 }, { "epoch": 422.0, "eval_loss": 10.754561424255371, "eval_runtime": 1.558, "eval_samples_per_second": 78.947, "eval_steps_per_second": 10.27, "step": 26164 }, { "epoch": 423.0, "eval_loss": 10.976980209350586, "eval_runtime": 1.577, "eval_samples_per_second": 77.995, "eval_steps_per_second": 10.146, "step": 26226 }, { "epoch": 424.0, "eval_loss": 11.063197135925293, "eval_runtime": 1.5683, "eval_samples_per_second": 78.427, "eval_steps_per_second": 10.202, "step": 26288 }, { "epoch": 425.0, "eval_loss": 10.924360275268555, "eval_runtime": 1.5498, "eval_samples_per_second": 79.364, "eval_steps_per_second": 10.324, "step": 26350 }, { "epoch": 426.0, "eval_loss": 10.937601089477539, "eval_runtime": 1.5495, "eval_samples_per_second": 79.379, "eval_steps_per_second": 10.326, "step": 26412 }, { "epoch": 427.0, "eval_loss": 11.044069290161133, "eval_runtime": 1.5501, "eval_samples_per_second": 79.351, "eval_steps_per_second": 10.322, "step": 26474 }, { "epoch": 427.42, "learning_rate": 1.0161290322580647e-08, "loss": 9.9776, "step": 26500 }, { "epoch": 428.0, "eval_loss": 11.067927360534668, "eval_runtime": 1.5604, "eval_samples_per_second": 78.827, "eval_steps_per_second": 10.254, "step": 26536 }, { "epoch": 429.0, "eval_loss": 11.066262245178223, "eval_runtime": 1.5875, "eval_samples_per_second": 77.482, "eval_steps_per_second": 10.079, "step": 26598 }, { "epoch": 430.0, "eval_loss": 10.988542556762695, "eval_runtime": 1.5716, "eval_samples_per_second": 78.264, "eval_steps_per_second": 10.181, "step": 26660 }, { "epoch": 431.0, "eval_loss": 10.956550598144531, "eval_runtime": 1.558, "eval_samples_per_second": 78.949, "eval_steps_per_second": 10.27, "step": 26722 }, { "epoch": 432.0, "eval_loss": 11.156039237976074, "eval_runtime": 1.5566, "eval_samples_per_second": 79.019, "eval_steps_per_second": 10.279, "step": 26784 }, { "epoch": 433.0, "eval_loss": 11.059048652648926, "eval_runtime": 1.5499, "eval_samples_per_second": 79.358, "eval_steps_per_second": 10.323, "step": 26846 }, { "epoch": 434.0, "eval_loss": 11.01535415649414, "eval_runtime": 1.5524, "eval_samples_per_second": 79.233, "eval_steps_per_second": 10.307, "step": 26908 }, { "epoch": 435.0, "eval_loss": 10.964756965637207, "eval_runtime": 1.553, "eval_samples_per_second": 79.204, "eval_steps_per_second": 10.303, "step": 26970 }, { "epoch": 435.48, "learning_rate": 9.03225806451613e-09, "loss": 10.0079, "step": 27000 }, { "epoch": 436.0, "eval_loss": 11.084016799926758, "eval_runtime": 1.559, "eval_samples_per_second": 78.895, "eval_steps_per_second": 10.263, "step": 27032 }, { "epoch": 437.0, "eval_loss": 10.810328483581543, "eval_runtime": 1.5654, "eval_samples_per_second": 78.574, "eval_steps_per_second": 10.221, "step": 27094 }, { "epoch": 438.0, "eval_loss": 10.89944839477539, "eval_runtime": 1.557, "eval_samples_per_second": 79.0, "eval_steps_per_second": 10.276, "step": 27156 }, { "epoch": 439.0, "eval_loss": 11.036003112792969, "eval_runtime": 1.5567, "eval_samples_per_second": 79.015, "eval_steps_per_second": 10.278, "step": 27218 }, { "epoch": 440.0, "eval_loss": 10.909425735473633, "eval_runtime": 1.5567, "eval_samples_per_second": 79.013, "eval_steps_per_second": 10.278, "step": 27280 }, { "epoch": 441.0, "eval_loss": 10.940203666687012, "eval_runtime": 1.5862, "eval_samples_per_second": 77.542, "eval_steps_per_second": 10.087, "step": 27342 }, { "epoch": 442.0, "eval_loss": 10.877068519592285, "eval_runtime": 1.5493, "eval_samples_per_second": 79.388, "eval_steps_per_second": 10.327, "step": 27404 }, { "epoch": 443.0, "eval_loss": 11.093377113342285, "eval_runtime": 1.5504, "eval_samples_per_second": 79.335, "eval_steps_per_second": 10.32, "step": 27466 }, { "epoch": 443.55, "learning_rate": 7.903225806451614e-09, "loss": 10.0049, "step": 27500 }, { "epoch": 444.0, "eval_loss": 11.089668273925781, "eval_runtime": 1.5636, "eval_samples_per_second": 78.663, "eval_steps_per_second": 10.233, "step": 27528 }, { "epoch": 445.0, "eval_loss": 11.049309730529785, "eval_runtime": 1.5562, "eval_samples_per_second": 79.038, "eval_steps_per_second": 10.281, "step": 27590 }, { "epoch": 446.0, "eval_loss": 11.039472579956055, "eval_runtime": 1.5633, "eval_samples_per_second": 78.678, "eval_steps_per_second": 10.235, "step": 27652 }, { "epoch": 447.0, "eval_loss": 10.898978233337402, "eval_runtime": 1.5657, "eval_samples_per_second": 78.558, "eval_steps_per_second": 10.219, "step": 27714 }, { "epoch": 448.0, "eval_loss": 11.16734504699707, "eval_runtime": 1.5575, "eval_samples_per_second": 78.975, "eval_steps_per_second": 10.273, "step": 27776 }, { "epoch": 449.0, "eval_loss": 11.158203125, "eval_runtime": 1.5524, "eval_samples_per_second": 79.235, "eval_steps_per_second": 10.307, "step": 27838 }, { "epoch": 450.0, "eval_loss": 10.85914421081543, "eval_runtime": 1.5491, "eval_samples_per_second": 79.399, "eval_steps_per_second": 10.328, "step": 27900 }, { "epoch": 451.0, "eval_loss": 11.079207420349121, "eval_runtime": 1.5491, "eval_samples_per_second": 79.398, "eval_steps_per_second": 10.328, "step": 27962 }, { "epoch": 451.61, "learning_rate": 6.774193548387097e-09, "loss": 9.9924, "step": 28000 }, { "epoch": 452.0, "eval_loss": 11.144965171813965, "eval_runtime": 1.5578, "eval_samples_per_second": 78.958, "eval_steps_per_second": 10.271, "step": 28024 }, { "epoch": 453.0, "eval_loss": 10.981874465942383, "eval_runtime": 1.5581, "eval_samples_per_second": 78.94, "eval_steps_per_second": 10.269, "step": 28086 }, { "epoch": 454.0, "eval_loss": 10.887094497680664, "eval_runtime": 1.5515, "eval_samples_per_second": 79.28, "eval_steps_per_second": 10.313, "step": 28148 }, { "epoch": 455.0, "eval_loss": 10.99148178100586, "eval_runtime": 1.558, "eval_samples_per_second": 78.947, "eval_steps_per_second": 10.27, "step": 28210 }, { "epoch": 456.0, "eval_loss": 11.03730583190918, "eval_runtime": 1.5584, "eval_samples_per_second": 78.926, "eval_steps_per_second": 10.267, "step": 28272 }, { "epoch": 457.0, "eval_loss": 10.938337326049805, "eval_runtime": 1.5696, "eval_samples_per_second": 78.366, "eval_steps_per_second": 10.194, "step": 28334 }, { "epoch": 458.0, "eval_loss": 11.129241943359375, "eval_runtime": 1.5563, "eval_samples_per_second": 79.032, "eval_steps_per_second": 10.281, "step": 28396 }, { "epoch": 459.0, "eval_loss": 11.168819427490234, "eval_runtime": 1.5502, "eval_samples_per_second": 79.343, "eval_steps_per_second": 10.321, "step": 28458 }, { "epoch": 459.68, "learning_rate": 5.645161290322581e-09, "loss": 9.9918, "step": 28500 }, { "epoch": 460.0, "eval_loss": 11.043048858642578, "eval_runtime": 1.5622, "eval_samples_per_second": 78.736, "eval_steps_per_second": 10.242, "step": 28520 }, { "epoch": 461.0, "eval_loss": 11.071016311645508, "eval_runtime": 1.5582, "eval_samples_per_second": 78.938, "eval_steps_per_second": 10.268, "step": 28582 }, { "epoch": 462.0, "eval_loss": 11.011009216308594, "eval_runtime": 1.554, "eval_samples_per_second": 79.151, "eval_steps_per_second": 10.296, "step": 28644 }, { "epoch": 463.0, "eval_loss": 11.087599754333496, "eval_runtime": 1.5566, "eval_samples_per_second": 79.02, "eval_steps_per_second": 10.279, "step": 28706 }, { "epoch": 464.0, "eval_loss": 11.0982027053833, "eval_runtime": 1.5598, "eval_samples_per_second": 78.856, "eval_steps_per_second": 10.258, "step": 28768 }, { "epoch": 465.0, "eval_loss": 10.962496757507324, "eval_runtime": 1.5771, "eval_samples_per_second": 77.992, "eval_steps_per_second": 10.145, "step": 28830 }, { "epoch": 466.0, "eval_loss": 10.964188575744629, "eval_runtime": 1.5492, "eval_samples_per_second": 79.395, "eval_steps_per_second": 10.328, "step": 28892 }, { "epoch": 467.0, "eval_loss": 10.798909187316895, "eval_runtime": 1.5512, "eval_samples_per_second": 79.296, "eval_steps_per_second": 10.315, "step": 28954 }, { "epoch": 467.74, "learning_rate": 4.516129032258065e-09, "loss": 9.9799, "step": 29000 }, { "epoch": 468.0, "eval_loss": 11.00378131866455, "eval_runtime": 1.562, "eval_samples_per_second": 78.745, "eval_steps_per_second": 10.243, "step": 29016 }, { "epoch": 469.0, "eval_loss": 11.124489784240723, "eval_runtime": 1.5579, "eval_samples_per_second": 78.951, "eval_steps_per_second": 10.27, "step": 29078 }, { "epoch": 470.0, "eval_loss": 10.913480758666992, "eval_runtime": 1.5763, "eval_samples_per_second": 78.031, "eval_steps_per_second": 10.15, "step": 29140 }, { "epoch": 471.0, "eval_loss": 10.943198204040527, "eval_runtime": 1.5563, "eval_samples_per_second": 79.031, "eval_steps_per_second": 10.281, "step": 29202 }, { "epoch": 472.0, "eval_loss": 10.829460144042969, "eval_runtime": 1.5569, "eval_samples_per_second": 79.005, "eval_steps_per_second": 10.277, "step": 29264 }, { "epoch": 473.0, "eval_loss": 10.927714347839355, "eval_runtime": 1.5542, "eval_samples_per_second": 79.141, "eval_steps_per_second": 10.295, "step": 29326 }, { "epoch": 474.0, "eval_loss": 11.094922065734863, "eval_runtime": 1.5516, "eval_samples_per_second": 79.274, "eval_steps_per_second": 10.312, "step": 29388 }, { "epoch": 475.0, "eval_loss": 11.037424087524414, "eval_runtime": 1.5502, "eval_samples_per_second": 79.343, "eval_steps_per_second": 10.321, "step": 29450 }, { "epoch": 475.81, "learning_rate": 3.3870967741935484e-09, "loss": 9.9961, "step": 29500 }, { "epoch": 476.0, "eval_loss": 10.958832740783691, "eval_runtime": 1.5596, "eval_samples_per_second": 78.865, "eval_steps_per_second": 10.259, "step": 29512 }, { "epoch": 477.0, "eval_loss": 11.034053802490234, "eval_runtime": 1.5537, "eval_samples_per_second": 79.168, "eval_steps_per_second": 10.298, "step": 29574 }, { "epoch": 478.0, "eval_loss": 10.954147338867188, "eval_runtime": 1.563, "eval_samples_per_second": 78.695, "eval_steps_per_second": 10.237, "step": 29636 }, { "epoch": 479.0, "eval_loss": 10.934306144714355, "eval_runtime": 1.5578, "eval_samples_per_second": 78.957, "eval_steps_per_second": 10.271, "step": 29698 }, { "epoch": 480.0, "eval_loss": 10.959942817687988, "eval_runtime": 1.5525, "eval_samples_per_second": 79.225, "eval_steps_per_second": 10.306, "step": 29760 }, { "epoch": 481.0, "eval_loss": 10.95332145690918, "eval_runtime": 1.5572, "eval_samples_per_second": 78.988, "eval_steps_per_second": 10.275, "step": 29822 }, { "epoch": 482.0, "eval_loss": 11.204573631286621, "eval_runtime": 1.5776, "eval_samples_per_second": 77.964, "eval_steps_per_second": 10.142, "step": 29884 }, { "epoch": 483.0, "eval_loss": 10.935157775878906, "eval_runtime": 1.5461, "eval_samples_per_second": 79.557, "eval_steps_per_second": 10.349, "step": 29946 }, { "epoch": 483.87, "learning_rate": 2.2580645161290324e-09, "loss": 10.006, "step": 30000 }, { "epoch": 484.0, "eval_loss": 11.172720909118652, "eval_runtime": 1.5619, "eval_samples_per_second": 78.751, "eval_steps_per_second": 10.244, "step": 30008 }, { "epoch": 485.0, "eval_loss": 10.99561595916748, "eval_runtime": 1.5552, "eval_samples_per_second": 79.09, "eval_steps_per_second": 10.288, "step": 30070 }, { "epoch": 486.0, "eval_loss": 11.166374206542969, "eval_runtime": 1.5713, "eval_samples_per_second": 78.281, "eval_steps_per_second": 10.183, "step": 30132 }, { "epoch": 487.0, "eval_loss": 11.094931602478027, "eval_runtime": 1.553, "eval_samples_per_second": 79.2, "eval_steps_per_second": 10.302, "step": 30194 }, { "epoch": 488.0, "eval_loss": 10.948943138122559, "eval_runtime": 1.5549, "eval_samples_per_second": 79.102, "eval_steps_per_second": 10.29, "step": 30256 }, { "epoch": 489.0, "eval_loss": 11.004921913146973, "eval_runtime": 1.5546, "eval_samples_per_second": 79.121, "eval_steps_per_second": 10.292, "step": 30318 }, { "epoch": 490.0, "eval_loss": 11.18515396118164, "eval_runtime": 1.5529, "eval_samples_per_second": 79.207, "eval_steps_per_second": 10.303, "step": 30380 }, { "epoch": 491.0, "eval_loss": 11.10179615020752, "eval_runtime": 1.5524, "eval_samples_per_second": 79.232, "eval_steps_per_second": 10.307, "step": 30442 }, { "epoch": 491.94, "learning_rate": 1.1290322580645162e-09, "loss": 10.0083, "step": 30500 }, { "epoch": 492.0, "eval_loss": 10.913029670715332, "eval_runtime": 1.5534, "eval_samples_per_second": 79.18, "eval_steps_per_second": 10.3, "step": 30504 }, { "epoch": 493.0, "eval_loss": 10.935208320617676, "eval_runtime": 1.5558, "eval_samples_per_second": 79.058, "eval_steps_per_second": 10.284, "step": 30566 }, { "epoch": 494.0, "eval_loss": 10.9716157913208, "eval_runtime": 1.5545, "eval_samples_per_second": 79.127, "eval_steps_per_second": 10.293, "step": 30628 }, { "epoch": 495.0, "eval_loss": 11.111549377441406, "eval_runtime": 1.5613, "eval_samples_per_second": 78.778, "eval_steps_per_second": 10.248, "step": 30690 }, { "epoch": 496.0, "eval_loss": 10.980956077575684, "eval_runtime": 1.5565, "eval_samples_per_second": 79.022, "eval_steps_per_second": 10.279, "step": 30752 }, { "epoch": 497.0, "eval_loss": 10.970294952392578, "eval_runtime": 1.5649, "eval_samples_per_second": 78.597, "eval_steps_per_second": 10.224, "step": 30814 }, { "epoch": 498.0, "eval_loss": 10.939518928527832, "eval_runtime": 1.547, "eval_samples_per_second": 79.51, "eval_steps_per_second": 10.343, "step": 30876 }, { "epoch": 499.0, "eval_loss": 11.018895149230957, "eval_runtime": 1.5496, "eval_samples_per_second": 79.375, "eval_steps_per_second": 10.325, "step": 30938 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 10.0106, "step": 31000 }, { "epoch": 500.0, "eval_loss": 10.97592544555664, "eval_runtime": 1.5827, "eval_samples_per_second": 77.713, "eval_steps_per_second": 10.109, "step": 31000 }, { "epoch": 500.0, "step": 31000, "total_flos": 1.6136618439168e+16, "train_loss": 11.119525689894154, "train_runtime": 17411.7991, "train_samples_per_second": 14.042, "train_steps_per_second": 1.78 } ], "max_steps": 31000, "num_train_epochs": 500, "total_flos": 1.6136618439168e+16, "trial_name": null, "trial_params": null }