|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"global_step": 31000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 40.06241989135742, |
|
"eval_runtime": 1.547, |
|
"eval_samples_per_second": 79.509, |
|
"eval_steps_per_second": 10.343, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 39.472923278808594, |
|
"eval_runtime": 1.5508, |
|
"eval_samples_per_second": 79.312, |
|
"eval_steps_per_second": 10.317, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 39.0081672668457, |
|
"eval_runtime": 1.546, |
|
"eval_samples_per_second": 79.558, |
|
"eval_steps_per_second": 10.349, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 38.20319366455078, |
|
"eval_runtime": 1.5507, |
|
"eval_samples_per_second": 79.317, |
|
"eval_steps_per_second": 10.318, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 37.33580017089844, |
|
"eval_runtime": 1.5485, |
|
"eval_samples_per_second": 79.431, |
|
"eval_steps_per_second": 10.332, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 36.15742111206055, |
|
"eval_runtime": 1.5522, |
|
"eval_samples_per_second": 79.244, |
|
"eval_steps_per_second": 10.308, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 35.14997863769531, |
|
"eval_runtime": 1.5454, |
|
"eval_samples_per_second": 79.589, |
|
"eval_steps_per_second": 10.353, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 33.69432067871094, |
|
"eval_runtime": 1.5477, |
|
"eval_samples_per_second": 79.472, |
|
"eval_steps_per_second": 10.338, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 6.887096774193549e-08, |
|
"loss": 39.2118, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 32.3968391418457, |
|
"eval_runtime": 1.5552, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 10.288, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 30.730754852294922, |
|
"eval_runtime": 1.5637, |
|
"eval_samples_per_second": 78.658, |
|
"eval_steps_per_second": 10.232, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 29.227901458740234, |
|
"eval_runtime": 1.553, |
|
"eval_samples_per_second": 79.199, |
|
"eval_steps_per_second": 10.302, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 27.82735824584961, |
|
"eval_runtime": 1.5637, |
|
"eval_samples_per_second": 78.659, |
|
"eval_steps_per_second": 10.232, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 26.49405288696289, |
|
"eval_runtime": 1.5536, |
|
"eval_samples_per_second": 79.17, |
|
"eval_steps_per_second": 10.298, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 24.659284591674805, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 79.576, |
|
"eval_steps_per_second": 10.351, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 23.44048309326172, |
|
"eval_runtime": 1.5478, |
|
"eval_samples_per_second": 79.467, |
|
"eval_steps_per_second": 10.337, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 21.999380111694336, |
|
"eval_runtime": 1.5447, |
|
"eval_samples_per_second": 79.626, |
|
"eval_steps_per_second": 10.358, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 6.774193548387097e-08, |
|
"loss": 27.5423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 21.027385711669922, |
|
"eval_runtime": 1.5545, |
|
"eval_samples_per_second": 79.127, |
|
"eval_steps_per_second": 10.293, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 19.89065170288086, |
|
"eval_runtime": 1.565, |
|
"eval_samples_per_second": 78.592, |
|
"eval_steps_per_second": 10.223, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 19.33472442626953, |
|
"eval_runtime": 1.5542, |
|
"eval_samples_per_second": 79.141, |
|
"eval_steps_per_second": 10.295, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 18.324054718017578, |
|
"eval_runtime": 1.5611, |
|
"eval_samples_per_second": 78.793, |
|
"eval_steps_per_second": 10.249, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 17.521472930908203, |
|
"eval_runtime": 1.5648, |
|
"eval_samples_per_second": 78.603, |
|
"eval_steps_per_second": 10.225, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 16.96886444091797, |
|
"eval_runtime": 1.5541, |
|
"eval_samples_per_second": 79.144, |
|
"eval_steps_per_second": 10.295, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 16.298294067382812, |
|
"eval_runtime": 1.5454, |
|
"eval_samples_per_second": 79.59, |
|
"eval_steps_per_second": 10.353, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 15.870747566223145, |
|
"eval_runtime": 1.5451, |
|
"eval_samples_per_second": 79.609, |
|
"eval_steps_per_second": 10.356, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 6.661290322580646e-08, |
|
"loss": 18.4542, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 15.465510368347168, |
|
"eval_runtime": 1.5532, |
|
"eval_samples_per_second": 79.191, |
|
"eval_steps_per_second": 10.301, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 15.184890747070312, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.271, |
|
"eval_steps_per_second": 10.312, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 14.780122756958008, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.297, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 14.557552337646484, |
|
"eval_runtime": 1.5541, |
|
"eval_samples_per_second": 79.147, |
|
"eval_steps_per_second": 10.296, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 14.421510696411133, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.275, |
|
"eval_steps_per_second": 10.312, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 13.931785583496094, |
|
"eval_runtime": 1.5492, |
|
"eval_samples_per_second": 79.395, |
|
"eval_steps_per_second": 10.328, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 13.980083465576172, |
|
"eval_runtime": 1.5454, |
|
"eval_samples_per_second": 79.593, |
|
"eval_steps_per_second": 10.354, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 13.709578514099121, |
|
"eval_runtime": 1.5464, |
|
"eval_samples_per_second": 79.537, |
|
"eval_steps_per_second": 10.346, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 6.548387096774194e-08, |
|
"loss": 13.9556, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 13.463862419128418, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.233, |
|
"eval_steps_per_second": 10.307, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 13.298738479614258, |
|
"eval_runtime": 1.5554, |
|
"eval_samples_per_second": 79.08, |
|
"eval_steps_per_second": 10.287, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 13.23029613494873, |
|
"eval_runtime": 1.5584, |
|
"eval_samples_per_second": 78.926, |
|
"eval_steps_per_second": 10.267, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 13.24497127532959, |
|
"eval_runtime": 1.5624, |
|
"eval_samples_per_second": 78.724, |
|
"eval_steps_per_second": 10.24, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 13.057371139526367, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.261, |
|
"eval_steps_per_second": 10.31, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 12.902210235595703, |
|
"eval_runtime": 1.5456, |
|
"eval_samples_per_second": 79.583, |
|
"eval_steps_per_second": 10.352, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 12.819963455200195, |
|
"eval_runtime": 1.5458, |
|
"eval_samples_per_second": 79.571, |
|
"eval_steps_per_second": 10.351, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 12.729530334472656, |
|
"eval_runtime": 1.5456, |
|
"eval_samples_per_second": 79.582, |
|
"eval_steps_per_second": 10.352, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 6.435483870967743e-08, |
|
"loss": 12.111, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 12.840241432189941, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.277, |
|
"eval_steps_per_second": 10.312, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 12.690611839294434, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.163, |
|
"eval_steps_per_second": 10.298, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 12.54220199584961, |
|
"eval_runtime": 1.554, |
|
"eval_samples_per_second": 79.15, |
|
"eval_steps_per_second": 10.296, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 12.503222465515137, |
|
"eval_runtime": 1.5553, |
|
"eval_samples_per_second": 79.086, |
|
"eval_steps_per_second": 10.288, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 12.296753883361816, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.275, |
|
"eval_steps_per_second": 10.312, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 12.332414627075195, |
|
"eval_runtime": 1.5484, |
|
"eval_samples_per_second": 79.436, |
|
"eval_steps_per_second": 10.333, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 12.254257202148438, |
|
"eval_runtime": 1.551, |
|
"eval_samples_per_second": 79.301, |
|
"eval_steps_per_second": 10.316, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 12.24339485168457, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.26, |
|
"eval_steps_per_second": 10.31, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 6.322580645161291e-08, |
|
"loss": 11.3304, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 12.251891136169434, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 79.207, |
|
"eval_steps_per_second": 10.303, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 12.112042427062988, |
|
"eval_runtime": 1.56, |
|
"eval_samples_per_second": 78.845, |
|
"eval_steps_per_second": 10.256, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 12.196945190429688, |
|
"eval_runtime": 1.5565, |
|
"eval_samples_per_second": 79.023, |
|
"eval_steps_per_second": 10.279, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 11.9977388381958, |
|
"eval_runtime": 1.5836, |
|
"eval_samples_per_second": 77.673, |
|
"eval_steps_per_second": 10.104, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 12.175936698913574, |
|
"eval_runtime": 1.5513, |
|
"eval_samples_per_second": 79.287, |
|
"eval_steps_per_second": 10.314, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 11.980809211730957, |
|
"eval_runtime": 1.546, |
|
"eval_samples_per_second": 79.559, |
|
"eval_steps_per_second": 10.349, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 12.149847030639648, |
|
"eval_runtime": 1.5462, |
|
"eval_samples_per_second": 79.548, |
|
"eval_steps_per_second": 10.348, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 11.985103607177734, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 79.577, |
|
"eval_steps_per_second": 10.351, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"learning_rate": 6.20967741935484e-08, |
|
"loss": 10.9471, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 12.080038070678711, |
|
"eval_runtime": 1.5551, |
|
"eval_samples_per_second": 79.095, |
|
"eval_steps_per_second": 10.289, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 11.881911277770996, |
|
"eval_runtime": 1.5543, |
|
"eval_samples_per_second": 79.133, |
|
"eval_steps_per_second": 10.294, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 12.009904861450195, |
|
"eval_runtime": 1.5505, |
|
"eval_samples_per_second": 79.328, |
|
"eval_steps_per_second": 10.319, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 11.861567497253418, |
|
"eval_runtime": 1.5654, |
|
"eval_samples_per_second": 78.574, |
|
"eval_steps_per_second": 10.221, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 11.89500617980957, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.279, |
|
"eval_steps_per_second": 10.313, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 11.830110549926758, |
|
"eval_runtime": 1.5521, |
|
"eval_samples_per_second": 79.249, |
|
"eval_steps_per_second": 10.309, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 12.096732139587402, |
|
"eval_runtime": 1.5464, |
|
"eval_samples_per_second": 79.542, |
|
"eval_steps_per_second": 10.347, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 11.837874412536621, |
|
"eval_runtime": 1.5533, |
|
"eval_samples_per_second": 79.184, |
|
"eval_steps_per_second": 10.3, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 6.096774193548388e-08, |
|
"loss": 10.6841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 11.909323692321777, |
|
"eval_runtime": 1.5534, |
|
"eval_samples_per_second": 79.181, |
|
"eval_steps_per_second": 10.3, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 11.91748332977295, |
|
"eval_runtime": 1.5598, |
|
"eval_samples_per_second": 78.854, |
|
"eval_steps_per_second": 10.257, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 11.73180103302002, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.365, |
|
"eval_steps_per_second": 10.324, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 11.830022811889648, |
|
"eval_runtime": 1.5461, |
|
"eval_samples_per_second": 79.557, |
|
"eval_steps_per_second": 10.349, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 11.599220275878906, |
|
"eval_runtime": 1.5555, |
|
"eval_samples_per_second": 79.074, |
|
"eval_steps_per_second": 10.286, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 11.604393005371094, |
|
"eval_runtime": 1.5547, |
|
"eval_samples_per_second": 79.114, |
|
"eval_steps_per_second": 10.291, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 11.693653106689453, |
|
"eval_runtime": 1.5528, |
|
"eval_samples_per_second": 79.212, |
|
"eval_steps_per_second": 10.304, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 11.596038818359375, |
|
"eval_runtime": 1.5565, |
|
"eval_samples_per_second": 79.023, |
|
"eval_steps_per_second": 10.279, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 72.58, |
|
"learning_rate": 5.983870967741936e-08, |
|
"loss": 10.542, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 11.545083045959473, |
|
"eval_runtime": 1.5539, |
|
"eval_samples_per_second": 79.157, |
|
"eval_steps_per_second": 10.297, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 11.403264999389648, |
|
"eval_runtime": 1.5513, |
|
"eval_samples_per_second": 79.291, |
|
"eval_steps_per_second": 10.314, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 11.480517387390137, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.28, |
|
"eval_steps_per_second": 10.313, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 11.539589881896973, |
|
"eval_runtime": 1.552, |
|
"eval_samples_per_second": 79.252, |
|
"eval_steps_per_second": 10.309, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 11.492401123046875, |
|
"eval_runtime": 1.5522, |
|
"eval_samples_per_second": 79.245, |
|
"eval_steps_per_second": 10.308, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 11.428875923156738, |
|
"eval_runtime": 1.5583, |
|
"eval_samples_per_second": 78.931, |
|
"eval_steps_per_second": 10.268, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 11.364572525024414, |
|
"eval_runtime": 1.5553, |
|
"eval_samples_per_second": 79.085, |
|
"eval_steps_per_second": 10.287, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 11.362411499023438, |
|
"eval_runtime": 1.5451, |
|
"eval_samples_per_second": 79.605, |
|
"eval_steps_per_second": 10.355, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 80.65, |
|
"learning_rate": 5.870967741935484e-08, |
|
"loss": 10.4457, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 11.462297439575195, |
|
"eval_runtime": 1.5542, |
|
"eval_samples_per_second": 79.139, |
|
"eval_steps_per_second": 10.294, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 11.522425651550293, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.297, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 11.419597625732422, |
|
"eval_runtime": 1.5519, |
|
"eval_samples_per_second": 79.26, |
|
"eval_steps_per_second": 10.31, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 11.435912132263184, |
|
"eval_runtime": 1.572, |
|
"eval_samples_per_second": 78.243, |
|
"eval_steps_per_second": 10.178, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 11.434619903564453, |
|
"eval_runtime": 1.5634, |
|
"eval_samples_per_second": 78.674, |
|
"eval_steps_per_second": 10.234, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 11.463889122009277, |
|
"eval_runtime": 1.5521, |
|
"eval_samples_per_second": 79.248, |
|
"eval_steps_per_second": 10.309, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 11.230652809143066, |
|
"eval_runtime": 1.5468, |
|
"eval_samples_per_second": 79.521, |
|
"eval_steps_per_second": 10.344, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 11.526171684265137, |
|
"eval_runtime": 1.5455, |
|
"eval_samples_per_second": 79.584, |
|
"eval_steps_per_second": 10.352, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 88.71, |
|
"learning_rate": 5.758064516129033e-08, |
|
"loss": 10.3854, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 11.665349006652832, |
|
"eval_runtime": 1.5505, |
|
"eval_samples_per_second": 79.329, |
|
"eval_steps_per_second": 10.319, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 11.3471040725708, |
|
"eval_runtime": 1.5506, |
|
"eval_samples_per_second": 79.325, |
|
"eval_steps_per_second": 10.319, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 11.304780960083008, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 79.209, |
|
"eval_steps_per_second": 10.304, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 11.36330509185791, |
|
"eval_runtime": 1.5593, |
|
"eval_samples_per_second": 78.88, |
|
"eval_steps_per_second": 10.261, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 11.343398094177246, |
|
"eval_runtime": 1.5672, |
|
"eval_samples_per_second": 78.483, |
|
"eval_steps_per_second": 10.209, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 11.257966041564941, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.263, |
|
"eval_steps_per_second": 10.311, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 11.270854949951172, |
|
"eval_runtime": 1.5445, |
|
"eval_samples_per_second": 79.635, |
|
"eval_steps_per_second": 10.359, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 11.198323249816895, |
|
"eval_runtime": 1.5471, |
|
"eval_samples_per_second": 79.503, |
|
"eval_steps_per_second": 10.342, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 96.77, |
|
"learning_rate": 5.6451612903225805e-08, |
|
"loss": 10.324, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 11.22177791595459, |
|
"eval_runtime": 1.5536, |
|
"eval_samples_per_second": 79.171, |
|
"eval_steps_per_second": 10.299, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 11.330657005310059, |
|
"eval_runtime": 1.5691, |
|
"eval_samples_per_second": 78.388, |
|
"eval_steps_per_second": 10.197, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 11.203608512878418, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.972, |
|
"eval_steps_per_second": 10.273, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 11.372391700744629, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.305, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_loss": 11.344269752502441, |
|
"eval_runtime": 1.5528, |
|
"eval_samples_per_second": 79.21, |
|
"eval_steps_per_second": 10.304, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_loss": 11.264634132385254, |
|
"eval_runtime": 1.5533, |
|
"eval_samples_per_second": 79.188, |
|
"eval_steps_per_second": 10.301, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_loss": 11.27165699005127, |
|
"eval_runtime": 1.549, |
|
"eval_samples_per_second": 79.408, |
|
"eval_steps_per_second": 10.33, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 11.310558319091797, |
|
"eval_runtime": 1.5464, |
|
"eval_samples_per_second": 79.542, |
|
"eval_steps_per_second": 10.347, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 104.84, |
|
"learning_rate": 5.532258064516129e-08, |
|
"loss": 10.2981, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 11.243507385253906, |
|
"eval_runtime": 1.5536, |
|
"eval_samples_per_second": 79.17, |
|
"eval_steps_per_second": 10.298, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_loss": 11.249150276184082, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.261, |
|
"eval_steps_per_second": 10.31, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 11.111257553100586, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.218, |
|
"eval_steps_per_second": 10.305, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 11.210649490356445, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 78.999, |
|
"eval_steps_per_second": 10.276, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_loss": 11.214300155639648, |
|
"eval_runtime": 1.5747, |
|
"eval_samples_per_second": 78.112, |
|
"eval_steps_per_second": 10.161, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 11.015155792236328, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.274, |
|
"eval_steps_per_second": 10.312, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_loss": 11.08711051940918, |
|
"eval_runtime": 1.5448, |
|
"eval_samples_per_second": 79.621, |
|
"eval_steps_per_second": 10.357, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 11.098098754882812, |
|
"eval_runtime": 1.5449, |
|
"eval_samples_per_second": 79.617, |
|
"eval_steps_per_second": 10.357, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 112.9, |
|
"learning_rate": 5.4193548387096774e-08, |
|
"loss": 10.2536, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_loss": 11.243887901306152, |
|
"eval_runtime": 1.5475, |
|
"eval_samples_per_second": 79.483, |
|
"eval_steps_per_second": 10.339, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_loss": 11.156224250793457, |
|
"eval_runtime": 1.5505, |
|
"eval_samples_per_second": 79.327, |
|
"eval_steps_per_second": 10.319, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 11.118559837341309, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.363, |
|
"eval_steps_per_second": 10.324, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_loss": 11.2282133102417, |
|
"eval_runtime": 1.562, |
|
"eval_samples_per_second": 78.744, |
|
"eval_steps_per_second": 10.243, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_loss": 11.289180755615234, |
|
"eval_runtime": 1.5484, |
|
"eval_samples_per_second": 79.437, |
|
"eval_steps_per_second": 10.333, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_loss": 11.165923118591309, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.263, |
|
"eval_steps_per_second": 10.311, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_loss": 11.34392261505127, |
|
"eval_runtime": 1.545, |
|
"eval_samples_per_second": 79.613, |
|
"eval_steps_per_second": 10.356, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 11.199868202209473, |
|
"eval_runtime": 1.5455, |
|
"eval_samples_per_second": 79.586, |
|
"eval_steps_per_second": 10.353, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 120.97, |
|
"learning_rate": 5.306451612903226e-08, |
|
"loss": 10.2336, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_loss": 11.250391960144043, |
|
"eval_runtime": 1.5504, |
|
"eval_samples_per_second": 79.337, |
|
"eval_steps_per_second": 10.32, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_loss": 11.289613723754883, |
|
"eval_runtime": 1.5534, |
|
"eval_samples_per_second": 79.18, |
|
"eval_steps_per_second": 10.3, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_loss": 11.068822860717773, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.158, |
|
"eval_steps_per_second": 10.297, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_loss": 11.16740894317627, |
|
"eval_runtime": 1.5519, |
|
"eval_samples_per_second": 79.258, |
|
"eval_steps_per_second": 10.31, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 11.207221984863281, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 79.204, |
|
"eval_steps_per_second": 10.303, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_loss": 11.035326957702637, |
|
"eval_runtime": 1.552, |
|
"eval_samples_per_second": 79.253, |
|
"eval_steps_per_second": 10.309, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_loss": 11.048843383789062, |
|
"eval_runtime": 1.548, |
|
"eval_samples_per_second": 79.46, |
|
"eval_steps_per_second": 10.336, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 11.093210220336914, |
|
"eval_runtime": 1.5454, |
|
"eval_samples_per_second": 79.59, |
|
"eval_steps_per_second": 10.353, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_loss": 11.070518493652344, |
|
"eval_runtime": 1.5446, |
|
"eval_samples_per_second": 79.63, |
|
"eval_steps_per_second": 10.358, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"learning_rate": 5.193548387096775e-08, |
|
"loss": 10.2101, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_loss": 11.219257354736328, |
|
"eval_runtime": 1.5539, |
|
"eval_samples_per_second": 79.157, |
|
"eval_steps_per_second": 10.297, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_loss": 11.227781295776367, |
|
"eval_runtime": 1.5509, |
|
"eval_samples_per_second": 79.307, |
|
"eval_steps_per_second": 10.316, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_loss": 11.136541366577148, |
|
"eval_runtime": 1.5783, |
|
"eval_samples_per_second": 77.932, |
|
"eval_steps_per_second": 10.137, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_loss": 11.035323143005371, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.297, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_loss": 11.014593124389648, |
|
"eval_runtime": 1.5521, |
|
"eval_samples_per_second": 79.246, |
|
"eval_steps_per_second": 10.308, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_loss": 11.237153053283691, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 79.575, |
|
"eval_steps_per_second": 10.351, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 11.104418754577637, |
|
"eval_runtime": 1.5452, |
|
"eval_samples_per_second": 79.6, |
|
"eval_steps_per_second": 10.355, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_loss": 11.134021759033203, |
|
"eval_runtime": 1.5476, |
|
"eval_samples_per_second": 79.478, |
|
"eval_steps_per_second": 10.339, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 137.1, |
|
"learning_rate": 5.0806451612903234e-08, |
|
"loss": 10.1991, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_loss": 11.075273513793945, |
|
"eval_runtime": 1.5484, |
|
"eval_samples_per_second": 79.436, |
|
"eval_steps_per_second": 10.333, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_loss": 11.021109580993652, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.391, |
|
"eval_steps_per_second": 10.327, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 10.930947303771973, |
|
"eval_runtime": 1.5551, |
|
"eval_samples_per_second": 79.096, |
|
"eval_steps_per_second": 10.289, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_loss": 10.977023124694824, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.263, |
|
"eval_steps_per_second": 10.311, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_loss": 11.079144477844238, |
|
"eval_runtime": 1.5523, |
|
"eval_samples_per_second": 79.238, |
|
"eval_steps_per_second": 10.307, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_loss": 11.116453170776367, |
|
"eval_runtime": 1.5541, |
|
"eval_samples_per_second": 79.147, |
|
"eval_steps_per_second": 10.296, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 10.936894416809082, |
|
"eval_runtime": 1.5532, |
|
"eval_samples_per_second": 79.19, |
|
"eval_steps_per_second": 10.301, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_loss": 11.121102333068848, |
|
"eval_runtime": 1.5444, |
|
"eval_samples_per_second": 79.641, |
|
"eval_steps_per_second": 10.36, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 145.16, |
|
"learning_rate": 4.967741935483872e-08, |
|
"loss": 10.1925, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_loss": 11.074503898620605, |
|
"eval_runtime": 1.5533, |
|
"eval_samples_per_second": 79.185, |
|
"eval_steps_per_second": 10.301, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_loss": 11.104596138000488, |
|
"eval_runtime": 1.553, |
|
"eval_samples_per_second": 79.201, |
|
"eval_steps_per_second": 10.303, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_loss": 11.025848388671875, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.234, |
|
"eval_steps_per_second": 10.307, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_loss": 11.02661418914795, |
|
"eval_runtime": 1.5531, |
|
"eval_samples_per_second": 79.198, |
|
"eval_steps_per_second": 10.302, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 11.253996849060059, |
|
"eval_runtime": 1.5526, |
|
"eval_samples_per_second": 79.224, |
|
"eval_steps_per_second": 10.306, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_loss": 10.970392227172852, |
|
"eval_runtime": 1.5474, |
|
"eval_samples_per_second": 79.487, |
|
"eval_steps_per_second": 10.34, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_loss": 11.034109115600586, |
|
"eval_runtime": 1.5453, |
|
"eval_samples_per_second": 79.599, |
|
"eval_steps_per_second": 10.354, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_loss": 11.111385345458984, |
|
"eval_runtime": 1.5448, |
|
"eval_samples_per_second": 79.622, |
|
"eval_steps_per_second": 10.357, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 153.23, |
|
"learning_rate": 4.85483870967742e-08, |
|
"loss": 10.1538, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_loss": 11.022419929504395, |
|
"eval_runtime": 1.5528, |
|
"eval_samples_per_second": 79.213, |
|
"eval_steps_per_second": 10.304, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_loss": 10.915043830871582, |
|
"eval_runtime": 1.6252, |
|
"eval_samples_per_second": 75.682, |
|
"eval_steps_per_second": 9.845, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_loss": 10.930474281311035, |
|
"eval_runtime": 1.5606, |
|
"eval_samples_per_second": 78.816, |
|
"eval_steps_per_second": 10.252, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_loss": 11.069900512695312, |
|
"eval_runtime": 1.5514, |
|
"eval_samples_per_second": 79.281, |
|
"eval_steps_per_second": 10.313, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_loss": 10.990084648132324, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.23, |
|
"eval_steps_per_second": 10.306, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_loss": 11.068836212158203, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.392, |
|
"eval_steps_per_second": 10.327, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 10.933384895324707, |
|
"eval_runtime": 1.5461, |
|
"eval_samples_per_second": 79.553, |
|
"eval_steps_per_second": 10.348, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_loss": 10.985239028930664, |
|
"eval_runtime": 1.5514, |
|
"eval_samples_per_second": 79.283, |
|
"eval_steps_per_second": 10.313, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 161.29, |
|
"learning_rate": 4.741935483870968e-08, |
|
"loss": 10.1494, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_loss": 10.903247833251953, |
|
"eval_runtime": 1.5599, |
|
"eval_samples_per_second": 78.85, |
|
"eval_steps_per_second": 10.257, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_loss": 10.96179485321045, |
|
"eval_runtime": 1.5509, |
|
"eval_samples_per_second": 79.309, |
|
"eval_steps_per_second": 10.317, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_loss": 10.947345733642578, |
|
"eval_runtime": 1.5514, |
|
"eval_samples_per_second": 79.286, |
|
"eval_steps_per_second": 10.314, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_loss": 11.153131484985352, |
|
"eval_runtime": 1.5559, |
|
"eval_samples_per_second": 79.053, |
|
"eval_steps_per_second": 10.283, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_loss": 10.846189498901367, |
|
"eval_runtime": 1.5522, |
|
"eval_samples_per_second": 79.244, |
|
"eval_steps_per_second": 10.308, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_loss": 10.957653045654297, |
|
"eval_runtime": 1.5541, |
|
"eval_samples_per_second": 79.147, |
|
"eval_steps_per_second": 10.296, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_loss": 10.935074806213379, |
|
"eval_runtime": 1.5492, |
|
"eval_samples_per_second": 79.394, |
|
"eval_steps_per_second": 10.328, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_loss": 11.212529182434082, |
|
"eval_runtime": 1.545, |
|
"eval_samples_per_second": 79.61, |
|
"eval_steps_per_second": 10.356, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 169.35, |
|
"learning_rate": 4.6290322580645165e-08, |
|
"loss": 10.1424, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_loss": 11.054618835449219, |
|
"eval_runtime": 1.5612, |
|
"eval_samples_per_second": 78.784, |
|
"eval_steps_per_second": 10.248, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_loss": 10.993365287780762, |
|
"eval_runtime": 1.5547, |
|
"eval_samples_per_second": 79.116, |
|
"eval_steps_per_second": 10.292, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_loss": 10.891836166381836, |
|
"eval_runtime": 1.5523, |
|
"eval_samples_per_second": 79.24, |
|
"eval_steps_per_second": 10.308, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_loss": 10.886784553527832, |
|
"eval_runtime": 1.5513, |
|
"eval_samples_per_second": 79.29, |
|
"eval_steps_per_second": 10.314, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_loss": 11.039985656738281, |
|
"eval_runtime": 1.6601, |
|
"eval_samples_per_second": 74.092, |
|
"eval_steps_per_second": 9.638, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 11.119784355163574, |
|
"eval_runtime": 1.5449, |
|
"eval_samples_per_second": 79.617, |
|
"eval_steps_per_second": 10.357, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_loss": 11.159457206726074, |
|
"eval_runtime": 1.5467, |
|
"eval_samples_per_second": 79.525, |
|
"eval_steps_per_second": 10.345, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_loss": 11.113701820373535, |
|
"eval_runtime": 1.5449, |
|
"eval_samples_per_second": 79.616, |
|
"eval_steps_per_second": 10.357, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 177.42, |
|
"learning_rate": 4.516129032258065e-08, |
|
"loss": 10.1181, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_loss": 11.199760437011719, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.297, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_loss": 11.049378395080566, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.215, |
|
"eval_steps_per_second": 10.304, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_loss": 10.948933601379395, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.305, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_loss": 11.02480411529541, |
|
"eval_runtime": 1.5509, |
|
"eval_samples_per_second": 79.306, |
|
"eval_steps_per_second": 10.316, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_loss": 10.991153717041016, |
|
"eval_runtime": 1.5561, |
|
"eval_samples_per_second": 79.045, |
|
"eval_steps_per_second": 10.282, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_loss": 10.922146797180176, |
|
"eval_runtime": 1.5488, |
|
"eval_samples_per_second": 79.416, |
|
"eval_steps_per_second": 10.331, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_loss": 11.055255889892578, |
|
"eval_runtime": 1.547, |
|
"eval_samples_per_second": 79.508, |
|
"eval_steps_per_second": 10.343, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_loss": 11.005462646484375, |
|
"eval_runtime": 1.5452, |
|
"eval_samples_per_second": 79.6, |
|
"eval_steps_per_second": 10.354, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 185.48, |
|
"learning_rate": 4.4032258064516134e-08, |
|
"loss": 10.1165, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_loss": 11.112872123718262, |
|
"eval_runtime": 1.5624, |
|
"eval_samples_per_second": 78.724, |
|
"eval_steps_per_second": 10.24, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_loss": 11.082246780395508, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 79.209, |
|
"eval_steps_per_second": 10.304, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_loss": 11.050854682922363, |
|
"eval_runtime": 1.5513, |
|
"eval_samples_per_second": 79.287, |
|
"eval_steps_per_second": 10.314, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_loss": 11.088144302368164, |
|
"eval_runtime": 1.5522, |
|
"eval_samples_per_second": 79.24, |
|
"eval_steps_per_second": 10.308, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_loss": 10.890948295593262, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.263, |
|
"eval_steps_per_second": 10.311, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_loss": 11.048895835876465, |
|
"eval_runtime": 1.5439, |
|
"eval_samples_per_second": 79.668, |
|
"eval_steps_per_second": 10.363, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_loss": 10.934896469116211, |
|
"eval_runtime": 1.548, |
|
"eval_samples_per_second": 79.46, |
|
"eval_steps_per_second": 10.336, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_loss": 10.882064819335938, |
|
"eval_runtime": 1.5473, |
|
"eval_samples_per_second": 79.492, |
|
"eval_steps_per_second": 10.34, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 193.55, |
|
"learning_rate": 4.290322580645162e-08, |
|
"loss": 10.1021, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_loss": 11.055244445800781, |
|
"eval_runtime": 1.5566, |
|
"eval_samples_per_second": 79.017, |
|
"eval_steps_per_second": 10.279, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_loss": 11.152359962463379, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.217, |
|
"eval_steps_per_second": 10.305, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_loss": 11.008437156677246, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.158, |
|
"eval_steps_per_second": 10.297, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_loss": 11.086997032165527, |
|
"eval_runtime": 1.5634, |
|
"eval_samples_per_second": 78.677, |
|
"eval_steps_per_second": 10.234, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_loss": 10.842860221862793, |
|
"eval_runtime": 1.5635, |
|
"eval_samples_per_second": 78.671, |
|
"eval_steps_per_second": 10.234, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_loss": 11.010416030883789, |
|
"eval_runtime": 1.5454, |
|
"eval_samples_per_second": 79.593, |
|
"eval_steps_per_second": 10.354, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 10.92686939239502, |
|
"eval_runtime": 1.545, |
|
"eval_samples_per_second": 79.613, |
|
"eval_steps_per_second": 10.356, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"eval_loss": 10.987348556518555, |
|
"eval_runtime": 1.548, |
|
"eval_samples_per_second": 79.458, |
|
"eval_steps_per_second": 10.336, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 201.61, |
|
"learning_rate": 4.17741935483871e-08, |
|
"loss": 10.1022, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"eval_loss": 10.98776912689209, |
|
"eval_runtime": 1.57, |
|
"eval_samples_per_second": 78.342, |
|
"eval_steps_per_second": 10.191, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"eval_loss": 10.882826805114746, |
|
"eval_runtime": 1.5543, |
|
"eval_samples_per_second": 79.135, |
|
"eval_steps_per_second": 10.294, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_loss": 11.043395042419434, |
|
"eval_runtime": 1.5636, |
|
"eval_samples_per_second": 78.662, |
|
"eval_steps_per_second": 10.233, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"eval_loss": 10.953902244567871, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.972, |
|
"eval_steps_per_second": 10.273, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"eval_loss": 10.684782028198242, |
|
"eval_runtime": 1.5525, |
|
"eval_samples_per_second": 79.228, |
|
"eval_steps_per_second": 10.306, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"eval_loss": 11.177257537841797, |
|
"eval_runtime": 1.5483, |
|
"eval_samples_per_second": 79.441, |
|
"eval_steps_per_second": 10.334, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_loss": 10.996722221374512, |
|
"eval_runtime": 1.5543, |
|
"eval_samples_per_second": 79.138, |
|
"eval_steps_per_second": 10.294, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"eval_loss": 10.998943328857422, |
|
"eval_runtime": 1.5532, |
|
"eval_samples_per_second": 79.19, |
|
"eval_steps_per_second": 10.301, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 209.68, |
|
"learning_rate": 4.064516129032259e-08, |
|
"loss": 10.0962, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"eval_loss": 11.20012378692627, |
|
"eval_runtime": 1.5557, |
|
"eval_samples_per_second": 79.063, |
|
"eval_steps_per_second": 10.285, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"eval_loss": 10.965839385986328, |
|
"eval_runtime": 1.554, |
|
"eval_samples_per_second": 79.148, |
|
"eval_steps_per_second": 10.296, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_loss": 11.009934425354004, |
|
"eval_runtime": 1.5719, |
|
"eval_samples_per_second": 78.252, |
|
"eval_steps_per_second": 10.179, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"eval_loss": 11.053404808044434, |
|
"eval_runtime": 1.5544, |
|
"eval_samples_per_second": 79.129, |
|
"eval_steps_per_second": 10.293, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"eval_loss": 11.185773849487305, |
|
"eval_runtime": 1.5545, |
|
"eval_samples_per_second": 79.125, |
|
"eval_steps_per_second": 10.293, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"eval_loss": 11.044096946716309, |
|
"eval_runtime": 1.5449, |
|
"eval_samples_per_second": 79.615, |
|
"eval_steps_per_second": 10.356, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_loss": 11.181927680969238, |
|
"eval_runtime": 1.5461, |
|
"eval_samples_per_second": 79.553, |
|
"eval_steps_per_second": 10.348, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"eval_loss": 11.01004695892334, |
|
"eval_runtime": 1.5451, |
|
"eval_samples_per_second": 79.607, |
|
"eval_steps_per_second": 10.355, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 217.74, |
|
"learning_rate": 3.951612903225807e-08, |
|
"loss": 10.0861, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"eval_loss": 11.007074356079102, |
|
"eval_runtime": 1.5582, |
|
"eval_samples_per_second": 78.937, |
|
"eval_steps_per_second": 10.268, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"eval_loss": 11.145269393920898, |
|
"eval_runtime": 1.5533, |
|
"eval_samples_per_second": 79.188, |
|
"eval_steps_per_second": 10.301, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_loss": 10.985397338867188, |
|
"eval_runtime": 1.5653, |
|
"eval_samples_per_second": 78.577, |
|
"eval_steps_per_second": 10.221, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"eval_loss": 11.120607376098633, |
|
"eval_runtime": 1.5535, |
|
"eval_samples_per_second": 79.174, |
|
"eval_steps_per_second": 10.299, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"eval_loss": 11.069469451904297, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.297, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"eval_loss": 11.070048332214355, |
|
"eval_runtime": 1.5473, |
|
"eval_samples_per_second": 79.495, |
|
"eval_steps_per_second": 10.341, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_loss": 10.968293190002441, |
|
"eval_runtime": 1.547, |
|
"eval_samples_per_second": 79.509, |
|
"eval_steps_per_second": 10.343, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"eval_loss": 10.9319486618042, |
|
"eval_runtime": 1.5452, |
|
"eval_samples_per_second": 79.599, |
|
"eval_steps_per_second": 10.354, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 225.81, |
|
"learning_rate": 3.838709677419355e-08, |
|
"loss": 10.0808, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"eval_loss": 10.993424415588379, |
|
"eval_runtime": 1.57, |
|
"eval_samples_per_second": 78.345, |
|
"eval_steps_per_second": 10.191, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"eval_loss": 10.878746032714844, |
|
"eval_runtime": 1.5608, |
|
"eval_samples_per_second": 78.805, |
|
"eval_steps_per_second": 10.251, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_loss": 10.905098915100098, |
|
"eval_runtime": 1.5539, |
|
"eval_samples_per_second": 79.155, |
|
"eval_steps_per_second": 10.297, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"eval_loss": 11.037513732910156, |
|
"eval_runtime": 1.5551, |
|
"eval_samples_per_second": 79.093, |
|
"eval_steps_per_second": 10.289, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"eval_loss": 11.025934219360352, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.161, |
|
"eval_steps_per_second": 10.297, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"eval_loss": 11.063486099243164, |
|
"eval_runtime": 1.5562, |
|
"eval_samples_per_second": 79.041, |
|
"eval_steps_per_second": 10.282, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_loss": 10.88222885131836, |
|
"eval_runtime": 1.5487, |
|
"eval_samples_per_second": 79.422, |
|
"eval_steps_per_second": 10.331, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"eval_loss": 10.89100170135498, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 79.576, |
|
"eval_steps_per_second": 10.351, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 233.87, |
|
"learning_rate": 3.7258064516129034e-08, |
|
"loss": 10.0768, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"eval_loss": 10.974419593811035, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 79.16, |
|
"eval_steps_per_second": 10.297, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"eval_loss": 11.042181015014648, |
|
"eval_runtime": 1.58, |
|
"eval_samples_per_second": 77.85, |
|
"eval_steps_per_second": 10.127, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_loss": 11.03979778289795, |
|
"eval_runtime": 1.5648, |
|
"eval_samples_per_second": 78.605, |
|
"eval_steps_per_second": 10.225, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"eval_loss": 10.850703239440918, |
|
"eval_runtime": 1.5587, |
|
"eval_samples_per_second": 78.914, |
|
"eval_steps_per_second": 10.265, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"eval_loss": 11.022680282592773, |
|
"eval_runtime": 1.5543, |
|
"eval_samples_per_second": 79.135, |
|
"eval_steps_per_second": 10.294, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"eval_loss": 11.027267456054688, |
|
"eval_runtime": 1.5884, |
|
"eval_samples_per_second": 77.438, |
|
"eval_steps_per_second": 10.073, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_loss": 11.07776165008545, |
|
"eval_runtime": 1.5492, |
|
"eval_samples_per_second": 79.395, |
|
"eval_steps_per_second": 10.328, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"eval_loss": 11.077691078186035, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.393, |
|
"eval_steps_per_second": 10.328, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 241.94, |
|
"learning_rate": 3.612903225806452e-08, |
|
"loss": 10.0792, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"eval_loss": 10.942319869995117, |
|
"eval_runtime": 1.5569, |
|
"eval_samples_per_second": 79.005, |
|
"eval_steps_per_second": 10.277, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"eval_loss": 11.226703643798828, |
|
"eval_runtime": 1.5593, |
|
"eval_samples_per_second": 78.88, |
|
"eval_steps_per_second": 10.261, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_loss": 10.85287094116211, |
|
"eval_runtime": 1.564, |
|
"eval_samples_per_second": 78.644, |
|
"eval_steps_per_second": 10.23, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"eval_loss": 11.056612968444824, |
|
"eval_runtime": 1.5825, |
|
"eval_samples_per_second": 77.725, |
|
"eval_steps_per_second": 10.111, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"eval_loss": 10.887572288513184, |
|
"eval_runtime": 1.5637, |
|
"eval_samples_per_second": 78.661, |
|
"eval_steps_per_second": 10.232, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"eval_loss": 11.144104957580566, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.978, |
|
"eval_steps_per_second": 10.274, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_loss": 10.909028053283691, |
|
"eval_runtime": 1.5501, |
|
"eval_samples_per_second": 79.35, |
|
"eval_steps_per_second": 10.322, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"eval_loss": 10.97555923461914, |
|
"eval_runtime": 1.5502, |
|
"eval_samples_per_second": 79.347, |
|
"eval_steps_per_second": 10.322, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 3.5e-08, |
|
"loss": 10.0832, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 10.953834533691406, |
|
"eval_runtime": 1.5699, |
|
"eval_samples_per_second": 78.347, |
|
"eval_steps_per_second": 10.192, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"eval_loss": 10.842375755310059, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.974, |
|
"eval_steps_per_second": 10.273, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_loss": 10.767086029052734, |
|
"eval_runtime": 1.5579, |
|
"eval_samples_per_second": 78.953, |
|
"eval_steps_per_second": 10.27, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"eval_loss": 10.963540077209473, |
|
"eval_runtime": 1.5571, |
|
"eval_samples_per_second": 78.992, |
|
"eval_steps_per_second": 10.275, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"eval_loss": 10.849014282226562, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.976, |
|
"eval_steps_per_second": 10.273, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"eval_loss": 11.134928703308105, |
|
"eval_runtime": 1.5686, |
|
"eval_samples_per_second": 78.416, |
|
"eval_steps_per_second": 10.2, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_loss": 10.884407043457031, |
|
"eval_runtime": 1.5495, |
|
"eval_samples_per_second": 79.381, |
|
"eval_steps_per_second": 10.326, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"eval_loss": 10.865640640258789, |
|
"eval_runtime": 1.552, |
|
"eval_samples_per_second": 79.251, |
|
"eval_steps_per_second": 10.309, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"eval_loss": 10.823393821716309, |
|
"eval_runtime": 1.5505, |
|
"eval_samples_per_second": 79.33, |
|
"eval_steps_per_second": 10.319, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 258.06, |
|
"learning_rate": 3.387096774193549e-08, |
|
"loss": 10.0747, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"eval_loss": 10.950251579284668, |
|
"eval_runtime": 1.5601, |
|
"eval_samples_per_second": 78.843, |
|
"eval_steps_per_second": 10.256, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_loss": 10.885615348815918, |
|
"eval_runtime": 1.5592, |
|
"eval_samples_per_second": 78.886, |
|
"eval_steps_per_second": 10.262, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"eval_loss": 11.051911354064941, |
|
"eval_runtime": 1.5755, |
|
"eval_samples_per_second": 78.069, |
|
"eval_steps_per_second": 10.155, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"eval_loss": 10.886160850524902, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 79.0, |
|
"eval_steps_per_second": 10.276, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"eval_loss": 10.897393226623535, |
|
"eval_runtime": 1.5641, |
|
"eval_samples_per_second": 78.64, |
|
"eval_steps_per_second": 10.23, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_loss": 10.84638500213623, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.364, |
|
"eval_steps_per_second": 10.324, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"eval_loss": 10.965924263000488, |
|
"eval_runtime": 1.5489, |
|
"eval_samples_per_second": 79.411, |
|
"eval_steps_per_second": 10.33, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"eval_loss": 10.742626190185547, |
|
"eval_runtime": 1.55, |
|
"eval_samples_per_second": 79.357, |
|
"eval_steps_per_second": 10.323, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 266.13, |
|
"learning_rate": 3.274193548387097e-08, |
|
"loss": 10.0678, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"eval_loss": 10.985177040100098, |
|
"eval_runtime": 1.5698, |
|
"eval_samples_per_second": 78.356, |
|
"eval_steps_per_second": 10.193, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_loss": 11.049816131591797, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 79.0, |
|
"eval_steps_per_second": 10.276, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"eval_loss": 10.7893648147583, |
|
"eval_runtime": 1.5584, |
|
"eval_samples_per_second": 78.929, |
|
"eval_steps_per_second": 10.267, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"eval_loss": 11.10730266571045, |
|
"eval_runtime": 1.5594, |
|
"eval_samples_per_second": 78.878, |
|
"eval_steps_per_second": 10.261, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"eval_loss": 10.826376914978027, |
|
"eval_runtime": 1.5624, |
|
"eval_samples_per_second": 78.727, |
|
"eval_steps_per_second": 10.241, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_loss": 10.931463241577148, |
|
"eval_runtime": 1.5486, |
|
"eval_samples_per_second": 79.427, |
|
"eval_steps_per_second": 10.332, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"eval_loss": 10.775986671447754, |
|
"eval_runtime": 1.5509, |
|
"eval_samples_per_second": 79.307, |
|
"eval_steps_per_second": 10.316, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"eval_loss": 10.972262382507324, |
|
"eval_runtime": 1.566, |
|
"eval_samples_per_second": 78.543, |
|
"eval_steps_per_second": 10.217, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 274.19, |
|
"learning_rate": 3.1612903225806456e-08, |
|
"loss": 10.0631, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"eval_loss": 10.748165130615234, |
|
"eval_runtime": 1.5567, |
|
"eval_samples_per_second": 79.014, |
|
"eval_steps_per_second": 10.278, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_loss": 11.049092292785645, |
|
"eval_runtime": 1.5587, |
|
"eval_samples_per_second": 78.912, |
|
"eval_steps_per_second": 10.265, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"eval_loss": 10.751622200012207, |
|
"eval_runtime": 1.5568, |
|
"eval_samples_per_second": 79.007, |
|
"eval_steps_per_second": 10.277, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"eval_loss": 10.950490951538086, |
|
"eval_runtime": 1.5586, |
|
"eval_samples_per_second": 78.916, |
|
"eval_steps_per_second": 10.266, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"eval_loss": 10.889892578125, |
|
"eval_runtime": 1.5568, |
|
"eval_samples_per_second": 79.01, |
|
"eval_steps_per_second": 10.278, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_loss": 10.786404609680176, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 78.998, |
|
"eval_steps_per_second": 10.276, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"eval_loss": 10.865982055664062, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.377, |
|
"eval_steps_per_second": 10.326, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"eval_loss": 10.97549819946289, |
|
"eval_runtime": 1.5507, |
|
"eval_samples_per_second": 79.318, |
|
"eval_steps_per_second": 10.318, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 282.26, |
|
"learning_rate": 3.048387096774194e-08, |
|
"loss": 10.0464, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"eval_loss": 10.970197677612305, |
|
"eval_runtime": 1.5595, |
|
"eval_samples_per_second": 78.873, |
|
"eval_steps_per_second": 10.26, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_loss": 10.966419219970703, |
|
"eval_runtime": 1.5592, |
|
"eval_samples_per_second": 78.887, |
|
"eval_steps_per_second": 10.262, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"eval_loss": 10.941322326660156, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.971, |
|
"eval_steps_per_second": 10.273, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"eval_loss": 10.966172218322754, |
|
"eval_runtime": 1.5626, |
|
"eval_samples_per_second": 78.714, |
|
"eval_steps_per_second": 10.239, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"eval_loss": 10.951107025146484, |
|
"eval_runtime": 1.5766, |
|
"eval_samples_per_second": 78.017, |
|
"eval_steps_per_second": 10.149, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_loss": 10.864799499511719, |
|
"eval_runtime": 1.5527, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.305, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"eval_loss": 10.88668441772461, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 79.261, |
|
"eval_steps_per_second": 10.31, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"eval_loss": 11.037867546081543, |
|
"eval_runtime": 1.5494, |
|
"eval_samples_per_second": 79.386, |
|
"eval_steps_per_second": 10.327, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 290.32, |
|
"learning_rate": 2.935483870967742e-08, |
|
"loss": 10.059, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"eval_loss": 10.958696365356445, |
|
"eval_runtime": 1.5631, |
|
"eval_samples_per_second": 78.692, |
|
"eval_steps_per_second": 10.236, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_loss": 10.871495246887207, |
|
"eval_runtime": 1.5627, |
|
"eval_samples_per_second": 78.71, |
|
"eval_steps_per_second": 10.239, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"eval_loss": 11.032896041870117, |
|
"eval_runtime": 1.5647, |
|
"eval_samples_per_second": 78.608, |
|
"eval_steps_per_second": 10.225, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"eval_loss": 10.993353843688965, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 78.998, |
|
"eval_steps_per_second": 10.276, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"eval_loss": 11.031813621520996, |
|
"eval_runtime": 1.5556, |
|
"eval_samples_per_second": 79.072, |
|
"eval_steps_per_second": 10.286, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_loss": 10.949562072753906, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.389, |
|
"eval_steps_per_second": 10.327, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"eval_loss": 10.985105514526367, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.373, |
|
"eval_steps_per_second": 10.325, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"eval_loss": 10.904229164123535, |
|
"eval_runtime": 1.5499, |
|
"eval_samples_per_second": 79.359, |
|
"eval_steps_per_second": 10.323, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 298.39, |
|
"learning_rate": 2.8225806451612902e-08, |
|
"loss": 10.0478, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"eval_loss": 10.848288536071777, |
|
"eval_runtime": 1.5652, |
|
"eval_samples_per_second": 78.585, |
|
"eval_steps_per_second": 10.222, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_loss": 11.128362655639648, |
|
"eval_runtime": 1.5589, |
|
"eval_samples_per_second": 78.903, |
|
"eval_steps_per_second": 10.264, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"eval_loss": 10.965646743774414, |
|
"eval_runtime": 1.5837, |
|
"eval_samples_per_second": 77.668, |
|
"eval_steps_per_second": 10.103, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"eval_loss": 10.962158203125, |
|
"eval_runtime": 1.5581, |
|
"eval_samples_per_second": 78.944, |
|
"eval_steps_per_second": 10.269, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"eval_loss": 11.022586822509766, |
|
"eval_runtime": 1.5559, |
|
"eval_samples_per_second": 79.056, |
|
"eval_steps_per_second": 10.284, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_loss": 10.998231887817383, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.272, |
|
"eval_steps_per_second": 10.312, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"eval_loss": 10.964576721191406, |
|
"eval_runtime": 1.5886, |
|
"eval_samples_per_second": 77.425, |
|
"eval_steps_per_second": 10.072, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"eval_loss": 11.094855308532715, |
|
"eval_runtime": 1.549, |
|
"eval_samples_per_second": 79.406, |
|
"eval_steps_per_second": 10.329, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 306.45, |
|
"learning_rate": 2.7096774193548387e-08, |
|
"loss": 10.0513, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"eval_loss": 11.073990821838379, |
|
"eval_runtime": 1.5592, |
|
"eval_samples_per_second": 78.885, |
|
"eval_steps_per_second": 10.261, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_loss": 10.888773918151855, |
|
"eval_runtime": 1.5638, |
|
"eval_samples_per_second": 78.652, |
|
"eval_steps_per_second": 10.231, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"eval_loss": 11.247567176818848, |
|
"eval_runtime": 1.5616, |
|
"eval_samples_per_second": 78.763, |
|
"eval_steps_per_second": 10.246, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"eval_loss": 10.999629020690918, |
|
"eval_runtime": 1.5902, |
|
"eval_samples_per_second": 77.348, |
|
"eval_steps_per_second": 10.062, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"eval_loss": 10.804391860961914, |
|
"eval_runtime": 1.5586, |
|
"eval_samples_per_second": 78.919, |
|
"eval_steps_per_second": 10.266, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_loss": 10.949257850646973, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.391, |
|
"eval_steps_per_second": 10.327, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"eval_loss": 10.94599723815918, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.367, |
|
"eval_steps_per_second": 10.324, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"eval_loss": 10.966387748718262, |
|
"eval_runtime": 1.5504, |
|
"eval_samples_per_second": 79.336, |
|
"eval_steps_per_second": 10.32, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 314.52, |
|
"learning_rate": 2.5967741935483875e-08, |
|
"loss": 10.0363, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"eval_loss": 10.91958999633789, |
|
"eval_runtime": 1.5704, |
|
"eval_samples_per_second": 78.324, |
|
"eval_steps_per_second": 10.188, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_loss": 10.921772956848145, |
|
"eval_runtime": 1.5577, |
|
"eval_samples_per_second": 78.964, |
|
"eval_steps_per_second": 10.272, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"eval_loss": 10.800854682922363, |
|
"eval_runtime": 1.5565, |
|
"eval_samples_per_second": 79.024, |
|
"eval_steps_per_second": 10.28, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"eval_loss": 11.121504783630371, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.979, |
|
"eval_steps_per_second": 10.274, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"eval_loss": 10.96510124206543, |
|
"eval_runtime": 1.5619, |
|
"eval_samples_per_second": 78.751, |
|
"eval_steps_per_second": 10.244, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_loss": 10.995678901672363, |
|
"eval_runtime": 1.5505, |
|
"eval_samples_per_second": 79.33, |
|
"eval_steps_per_second": 10.319, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"eval_loss": 11.144912719726562, |
|
"eval_runtime": 1.5521, |
|
"eval_samples_per_second": 79.247, |
|
"eval_steps_per_second": 10.309, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"eval_loss": 11.01318645477295, |
|
"eval_runtime": 1.555, |
|
"eval_samples_per_second": 79.101, |
|
"eval_steps_per_second": 10.289, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 322.58, |
|
"learning_rate": 2.483870967741936e-08, |
|
"loss": 10.0503, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"eval_loss": 11.119362831115723, |
|
"eval_runtime": 1.5655, |
|
"eval_samples_per_second": 78.571, |
|
"eval_steps_per_second": 10.221, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_loss": 10.964983940124512, |
|
"eval_runtime": 1.5692, |
|
"eval_samples_per_second": 78.382, |
|
"eval_steps_per_second": 10.196, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"eval_loss": 10.841401100158691, |
|
"eval_runtime": 1.5595, |
|
"eval_samples_per_second": 78.871, |
|
"eval_steps_per_second": 10.26, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"eval_loss": 10.86677360534668, |
|
"eval_runtime": 1.5533, |
|
"eval_samples_per_second": 79.184, |
|
"eval_steps_per_second": 10.3, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"eval_loss": 11.009995460510254, |
|
"eval_runtime": 1.5673, |
|
"eval_samples_per_second": 78.478, |
|
"eval_steps_per_second": 10.209, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_loss": 10.875937461853027, |
|
"eval_runtime": 1.5499, |
|
"eval_samples_per_second": 79.359, |
|
"eval_steps_per_second": 10.323, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"eval_loss": 10.965596199035645, |
|
"eval_runtime": 1.562, |
|
"eval_samples_per_second": 78.744, |
|
"eval_steps_per_second": 10.243, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"eval_loss": 11.007844924926758, |
|
"eval_runtime": 1.5553, |
|
"eval_samples_per_second": 79.085, |
|
"eval_steps_per_second": 10.287, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 330.65, |
|
"learning_rate": 2.370967741935484e-08, |
|
"loss": 10.0223, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"eval_loss": 11.0198335647583, |
|
"eval_runtime": 1.5833, |
|
"eval_samples_per_second": 77.688, |
|
"eval_steps_per_second": 10.106, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_loss": 11.088841438293457, |
|
"eval_runtime": 1.5649, |
|
"eval_samples_per_second": 78.601, |
|
"eval_steps_per_second": 10.224, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"eval_loss": 11.105833053588867, |
|
"eval_runtime": 1.5609, |
|
"eval_samples_per_second": 78.798, |
|
"eval_steps_per_second": 10.25, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"eval_loss": 10.942315101623535, |
|
"eval_runtime": 1.5599, |
|
"eval_samples_per_second": 78.853, |
|
"eval_steps_per_second": 10.257, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"eval_loss": 11.103010177612305, |
|
"eval_runtime": 1.5806, |
|
"eval_samples_per_second": 77.817, |
|
"eval_steps_per_second": 10.123, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_loss": 11.081936836242676, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.366, |
|
"eval_steps_per_second": 10.324, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"eval_loss": 10.90597152709961, |
|
"eval_runtime": 1.55, |
|
"eval_samples_per_second": 79.356, |
|
"eval_steps_per_second": 10.323, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"eval_loss": 11.074986457824707, |
|
"eval_runtime": 1.5577, |
|
"eval_samples_per_second": 78.962, |
|
"eval_steps_per_second": 10.271, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 338.71, |
|
"learning_rate": 2.2580645161290325e-08, |
|
"loss": 10.0321, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"eval_loss": 11.110125541687012, |
|
"eval_runtime": 1.5561, |
|
"eval_samples_per_second": 79.046, |
|
"eval_steps_per_second": 10.282, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_loss": 10.971090316772461, |
|
"eval_runtime": 1.5625, |
|
"eval_samples_per_second": 78.722, |
|
"eval_steps_per_second": 10.24, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"eval_loss": 10.92870044708252, |
|
"eval_runtime": 1.5635, |
|
"eval_samples_per_second": 78.668, |
|
"eval_steps_per_second": 10.233, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"eval_loss": 10.796195983886719, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.978, |
|
"eval_steps_per_second": 10.273, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"eval_loss": 11.136199951171875, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.976, |
|
"eval_steps_per_second": 10.273, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_loss": 11.102249145507812, |
|
"eval_runtime": 1.551, |
|
"eval_samples_per_second": 79.302, |
|
"eval_steps_per_second": 10.316, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"eval_loss": 10.999671936035156, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.392, |
|
"eval_steps_per_second": 10.327, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"eval_loss": 10.876097679138184, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.366, |
|
"eval_steps_per_second": 10.324, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 346.77, |
|
"learning_rate": 2.145161290322581e-08, |
|
"loss": 10.0226, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"eval_loss": 10.934317588806152, |
|
"eval_runtime": 1.5612, |
|
"eval_samples_per_second": 78.786, |
|
"eval_steps_per_second": 10.249, |
|
"step": 21514 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_loss": 10.910962104797363, |
|
"eval_runtime": 1.5564, |
|
"eval_samples_per_second": 79.028, |
|
"eval_steps_per_second": 10.28, |
|
"step": 21576 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"eval_loss": 11.034477233886719, |
|
"eval_runtime": 1.5584, |
|
"eval_samples_per_second": 78.929, |
|
"eval_steps_per_second": 10.267, |
|
"step": 21638 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_loss": 10.88716983795166, |
|
"eval_runtime": 1.5754, |
|
"eval_samples_per_second": 78.075, |
|
"eval_steps_per_second": 10.156, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"eval_loss": 10.766716957092285, |
|
"eval_runtime": 1.5893, |
|
"eval_samples_per_second": 77.394, |
|
"eval_steps_per_second": 10.067, |
|
"step": 21762 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_loss": 11.056756973266602, |
|
"eval_runtime": 1.5503, |
|
"eval_samples_per_second": 79.339, |
|
"eval_steps_per_second": 10.321, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"eval_loss": 11.028831481933594, |
|
"eval_runtime": 1.5494, |
|
"eval_samples_per_second": 79.385, |
|
"eval_steps_per_second": 10.326, |
|
"step": 21886 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"eval_loss": 11.066434860229492, |
|
"eval_runtime": 1.5562, |
|
"eval_samples_per_second": 79.039, |
|
"eval_steps_per_second": 10.281, |
|
"step": 21948 |
|
}, |
|
{ |
|
"epoch": 354.84, |
|
"learning_rate": 2.0322580645161293e-08, |
|
"loss": 10.033, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"eval_loss": 10.975112915039062, |
|
"eval_runtime": 1.5594, |
|
"eval_samples_per_second": 78.876, |
|
"eval_steps_per_second": 10.26, |
|
"step": 22010 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_loss": 10.856056213378906, |
|
"eval_runtime": 1.5565, |
|
"eval_samples_per_second": 79.022, |
|
"eval_steps_per_second": 10.279, |
|
"step": 22072 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"eval_loss": 10.960885047912598, |
|
"eval_runtime": 1.5772, |
|
"eval_samples_per_second": 77.988, |
|
"eval_steps_per_second": 10.145, |
|
"step": 22134 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"eval_loss": 11.000205993652344, |
|
"eval_runtime": 1.5685, |
|
"eval_samples_per_second": 78.418, |
|
"eval_steps_per_second": 10.201, |
|
"step": 22196 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"eval_loss": 11.056760787963867, |
|
"eval_runtime": 1.5597, |
|
"eval_samples_per_second": 78.862, |
|
"eval_steps_per_second": 10.259, |
|
"step": 22258 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_loss": 10.958597183227539, |
|
"eval_runtime": 1.5568, |
|
"eval_samples_per_second": 79.009, |
|
"eval_steps_per_second": 10.278, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"eval_loss": 10.925392150878906, |
|
"eval_runtime": 1.5568, |
|
"eval_samples_per_second": 79.007, |
|
"eval_steps_per_second": 10.277, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"eval_loss": 11.065186500549316, |
|
"eval_runtime": 1.5511, |
|
"eval_samples_per_second": 79.297, |
|
"eval_steps_per_second": 10.315, |
|
"step": 22444 |
|
}, |
|
{ |
|
"epoch": 362.9, |
|
"learning_rate": 1.9193548387096775e-08, |
|
"loss": 10.0084, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"eval_loss": 10.965627670288086, |
|
"eval_runtime": 1.5573, |
|
"eval_samples_per_second": 78.985, |
|
"eval_steps_per_second": 10.274, |
|
"step": 22506 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_loss": 11.061145782470703, |
|
"eval_runtime": 1.5616, |
|
"eval_samples_per_second": 78.765, |
|
"eval_steps_per_second": 10.246, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"eval_loss": 10.833497047424316, |
|
"eval_runtime": 1.5633, |
|
"eval_samples_per_second": 78.678, |
|
"eval_steps_per_second": 10.235, |
|
"step": 22630 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"eval_loss": 10.980859756469727, |
|
"eval_runtime": 1.5593, |
|
"eval_samples_per_second": 78.883, |
|
"eval_steps_per_second": 10.261, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"eval_loss": 11.020894050598145, |
|
"eval_runtime": 1.5581, |
|
"eval_samples_per_second": 78.94, |
|
"eval_steps_per_second": 10.269, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_loss": 10.91490364074707, |
|
"eval_runtime": 1.574, |
|
"eval_samples_per_second": 78.142, |
|
"eval_steps_per_second": 10.165, |
|
"step": 22816 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"eval_loss": 11.020668029785156, |
|
"eval_runtime": 1.5554, |
|
"eval_samples_per_second": 79.078, |
|
"eval_steps_per_second": 10.287, |
|
"step": 22878 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"eval_loss": 10.877435684204102, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.374, |
|
"eval_steps_per_second": 10.325, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 370.97, |
|
"learning_rate": 1.806451612903226e-08, |
|
"loss": 10.024, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"eval_loss": 10.985451698303223, |
|
"eval_runtime": 1.5536, |
|
"eval_samples_per_second": 79.173, |
|
"eval_steps_per_second": 10.299, |
|
"step": 23002 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_loss": 10.90639877319336, |
|
"eval_runtime": 1.5566, |
|
"eval_samples_per_second": 79.018, |
|
"eval_steps_per_second": 10.279, |
|
"step": 23064 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"eval_loss": 10.95183277130127, |
|
"eval_runtime": 1.5635, |
|
"eval_samples_per_second": 78.669, |
|
"eval_steps_per_second": 10.233, |
|
"step": 23126 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"eval_loss": 10.877464294433594, |
|
"eval_runtime": 1.5564, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 10.28, |
|
"step": 23188 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_loss": 10.91928768157959, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.971, |
|
"eval_steps_per_second": 10.273, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_loss": 11.109807968139648, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 79.033, |
|
"eval_steps_per_second": 10.281, |
|
"step": 23312 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"eval_loss": 11.148626327514648, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.392, |
|
"eval_steps_per_second": 10.327, |
|
"step": 23374 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"eval_loss": 11.089497566223145, |
|
"eval_runtime": 1.5491, |
|
"eval_samples_per_second": 79.4, |
|
"eval_steps_per_second": 10.329, |
|
"step": 23436 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"eval_loss": 10.915903091430664, |
|
"eval_runtime": 1.5528, |
|
"eval_samples_per_second": 79.214, |
|
"eval_steps_per_second": 10.304, |
|
"step": 23498 |
|
}, |
|
{ |
|
"epoch": 379.03, |
|
"learning_rate": 1.6935483870967743e-08, |
|
"loss": 10.0052, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_loss": 11.109511375427246, |
|
"eval_runtime": 1.5576, |
|
"eval_samples_per_second": 78.966, |
|
"eval_steps_per_second": 10.272, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"eval_loss": 11.094901084899902, |
|
"eval_runtime": 1.5561, |
|
"eval_samples_per_second": 79.044, |
|
"eval_steps_per_second": 10.282, |
|
"step": 23622 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"eval_loss": 10.983734130859375, |
|
"eval_runtime": 1.5557, |
|
"eval_samples_per_second": 79.066, |
|
"eval_steps_per_second": 10.285, |
|
"step": 23684 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"eval_loss": 10.943252563476562, |
|
"eval_runtime": 1.577, |
|
"eval_samples_per_second": 77.998, |
|
"eval_steps_per_second": 10.146, |
|
"step": 23746 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_loss": 10.910370826721191, |
|
"eval_runtime": 1.5682, |
|
"eval_samples_per_second": 78.434, |
|
"eval_steps_per_second": 10.203, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"eval_loss": 10.95122241973877, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.28, |
|
"eval_steps_per_second": 10.313, |
|
"step": 23870 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"eval_loss": 11.107484817504883, |
|
"eval_runtime": 1.5487, |
|
"eval_samples_per_second": 79.422, |
|
"eval_steps_per_second": 10.331, |
|
"step": 23932 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"eval_loss": 10.865962982177734, |
|
"eval_runtime": 1.5535, |
|
"eval_samples_per_second": 79.176, |
|
"eval_steps_per_second": 10.299, |
|
"step": 23994 |
|
}, |
|
{ |
|
"epoch": 387.1, |
|
"learning_rate": 1.5806451612903228e-08, |
|
"loss": 10.0218, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_loss": 11.052102088928223, |
|
"eval_runtime": 1.5572, |
|
"eval_samples_per_second": 78.988, |
|
"eval_steps_per_second": 10.275, |
|
"step": 24056 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"eval_loss": 10.898940086364746, |
|
"eval_runtime": 1.5627, |
|
"eval_samples_per_second": 78.712, |
|
"eval_steps_per_second": 10.239, |
|
"step": 24118 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"eval_loss": 10.913924217224121, |
|
"eval_runtime": 1.5799, |
|
"eval_samples_per_second": 77.852, |
|
"eval_steps_per_second": 10.127, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"eval_loss": 11.10788345336914, |
|
"eval_runtime": 1.5783, |
|
"eval_samples_per_second": 77.934, |
|
"eval_steps_per_second": 10.138, |
|
"step": 24242 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_loss": 10.906310081481934, |
|
"eval_runtime": 1.5568, |
|
"eval_samples_per_second": 79.006, |
|
"eval_steps_per_second": 10.277, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"eval_loss": 11.085139274597168, |
|
"eval_runtime": 1.5494, |
|
"eval_samples_per_second": 79.384, |
|
"eval_steps_per_second": 10.326, |
|
"step": 24366 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"eval_loss": 10.980916023254395, |
|
"eval_runtime": 1.5522, |
|
"eval_samples_per_second": 79.244, |
|
"eval_steps_per_second": 10.308, |
|
"step": 24428 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"eval_loss": 10.932045936584473, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.389, |
|
"eval_steps_per_second": 10.327, |
|
"step": 24490 |
|
}, |
|
{ |
|
"epoch": 395.16, |
|
"learning_rate": 1.467741935483871e-08, |
|
"loss": 10.0039, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_loss": 11.052530288696289, |
|
"eval_runtime": 1.5595, |
|
"eval_samples_per_second": 78.871, |
|
"eval_steps_per_second": 10.26, |
|
"step": 24552 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"eval_loss": 11.037784576416016, |
|
"eval_runtime": 1.5581, |
|
"eval_samples_per_second": 78.943, |
|
"eval_steps_per_second": 10.269, |
|
"step": 24614 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"eval_loss": 11.000636100769043, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 78.979, |
|
"eval_steps_per_second": 10.274, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"eval_loss": 11.028385162353516, |
|
"eval_runtime": 1.5601, |
|
"eval_samples_per_second": 78.842, |
|
"eval_steps_per_second": 10.256, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 11.005521774291992, |
|
"eval_runtime": 1.5704, |
|
"eval_samples_per_second": 78.326, |
|
"eval_steps_per_second": 10.189, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"eval_loss": 11.010418891906738, |
|
"eval_runtime": 1.55, |
|
"eval_samples_per_second": 79.356, |
|
"eval_steps_per_second": 10.323, |
|
"step": 24862 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"eval_loss": 11.11683464050293, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.377, |
|
"eval_steps_per_second": 10.325, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"eval_loss": 10.831143379211426, |
|
"eval_runtime": 1.5514, |
|
"eval_samples_per_second": 79.284, |
|
"eval_steps_per_second": 10.313, |
|
"step": 24986 |
|
}, |
|
{ |
|
"epoch": 403.23, |
|
"learning_rate": 1.3548387096774193e-08, |
|
"loss": 10.0186, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_loss": 10.890425682067871, |
|
"eval_runtime": 1.5613, |
|
"eval_samples_per_second": 78.78, |
|
"eval_steps_per_second": 10.248, |
|
"step": 25048 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"eval_loss": 10.861490249633789, |
|
"eval_runtime": 1.5685, |
|
"eval_samples_per_second": 78.418, |
|
"eval_steps_per_second": 10.201, |
|
"step": 25110 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"eval_loss": 10.951952934265137, |
|
"eval_runtime": 1.5597, |
|
"eval_samples_per_second": 78.863, |
|
"eval_steps_per_second": 10.259, |
|
"step": 25172 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"eval_loss": 10.911187171936035, |
|
"eval_runtime": 1.5579, |
|
"eval_samples_per_second": 78.953, |
|
"eval_steps_per_second": 10.27, |
|
"step": 25234 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_loss": 11.027958869934082, |
|
"eval_runtime": 1.5537, |
|
"eval_samples_per_second": 79.165, |
|
"eval_steps_per_second": 10.298, |
|
"step": 25296 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"eval_loss": 10.828099250793457, |
|
"eval_runtime": 1.5453, |
|
"eval_samples_per_second": 79.597, |
|
"eval_steps_per_second": 10.354, |
|
"step": 25358 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"eval_loss": 11.187531471252441, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.375, |
|
"eval_steps_per_second": 10.325, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"eval_loss": 11.00891399383545, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.231, |
|
"eval_steps_per_second": 10.306, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 411.29, |
|
"learning_rate": 1.241935483870968e-08, |
|
"loss": 10.0146, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_loss": 11.038575172424316, |
|
"eval_runtime": 1.5665, |
|
"eval_samples_per_second": 78.518, |
|
"eval_steps_per_second": 10.214, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"eval_loss": 10.815169334411621, |
|
"eval_runtime": 1.558, |
|
"eval_samples_per_second": 78.948, |
|
"eval_steps_per_second": 10.27, |
|
"step": 25606 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"eval_loss": 10.946043014526367, |
|
"eval_runtime": 1.5604, |
|
"eval_samples_per_second": 78.824, |
|
"eval_steps_per_second": 10.254, |
|
"step": 25668 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"eval_loss": 10.9921875, |
|
"eval_runtime": 1.5586, |
|
"eval_samples_per_second": 78.917, |
|
"eval_steps_per_second": 10.266, |
|
"step": 25730 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_loss": 11.085003852844238, |
|
"eval_runtime": 1.5606, |
|
"eval_samples_per_second": 78.817, |
|
"eval_steps_per_second": 10.253, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"eval_loss": 11.002412796020508, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.276, |
|
"eval_steps_per_second": 10.312, |
|
"step": 25854 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"eval_loss": 11.093355178833008, |
|
"eval_runtime": 1.5531, |
|
"eval_samples_per_second": 79.196, |
|
"eval_steps_per_second": 10.302, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"eval_loss": 11.095279693603516, |
|
"eval_runtime": 1.5495, |
|
"eval_samples_per_second": 79.379, |
|
"eval_steps_per_second": 10.326, |
|
"step": 25978 |
|
}, |
|
{ |
|
"epoch": 419.35, |
|
"learning_rate": 1.1290322580645162e-08, |
|
"loss": 10.0251, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_loss": 10.89963150024414, |
|
"eval_runtime": 1.5606, |
|
"eval_samples_per_second": 78.815, |
|
"eval_steps_per_second": 10.252, |
|
"step": 26040 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"eval_loss": 10.983254432678223, |
|
"eval_runtime": 1.5556, |
|
"eval_samples_per_second": 79.068, |
|
"eval_steps_per_second": 10.285, |
|
"step": 26102 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"eval_loss": 10.754561424255371, |
|
"eval_runtime": 1.558, |
|
"eval_samples_per_second": 78.947, |
|
"eval_steps_per_second": 10.27, |
|
"step": 26164 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"eval_loss": 10.976980209350586, |
|
"eval_runtime": 1.577, |
|
"eval_samples_per_second": 77.995, |
|
"eval_steps_per_second": 10.146, |
|
"step": 26226 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_loss": 11.063197135925293, |
|
"eval_runtime": 1.5683, |
|
"eval_samples_per_second": 78.427, |
|
"eval_steps_per_second": 10.202, |
|
"step": 26288 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"eval_loss": 10.924360275268555, |
|
"eval_runtime": 1.5498, |
|
"eval_samples_per_second": 79.364, |
|
"eval_steps_per_second": 10.324, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"eval_loss": 10.937601089477539, |
|
"eval_runtime": 1.5495, |
|
"eval_samples_per_second": 79.379, |
|
"eval_steps_per_second": 10.326, |
|
"step": 26412 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"eval_loss": 11.044069290161133, |
|
"eval_runtime": 1.5501, |
|
"eval_samples_per_second": 79.351, |
|
"eval_steps_per_second": 10.322, |
|
"step": 26474 |
|
}, |
|
{ |
|
"epoch": 427.42, |
|
"learning_rate": 1.0161290322580647e-08, |
|
"loss": 9.9776, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_loss": 11.067927360534668, |
|
"eval_runtime": 1.5604, |
|
"eval_samples_per_second": 78.827, |
|
"eval_steps_per_second": 10.254, |
|
"step": 26536 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"eval_loss": 11.066262245178223, |
|
"eval_runtime": 1.5875, |
|
"eval_samples_per_second": 77.482, |
|
"eval_steps_per_second": 10.079, |
|
"step": 26598 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"eval_loss": 10.988542556762695, |
|
"eval_runtime": 1.5716, |
|
"eval_samples_per_second": 78.264, |
|
"eval_steps_per_second": 10.181, |
|
"step": 26660 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"eval_loss": 10.956550598144531, |
|
"eval_runtime": 1.558, |
|
"eval_samples_per_second": 78.949, |
|
"eval_steps_per_second": 10.27, |
|
"step": 26722 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_loss": 11.156039237976074, |
|
"eval_runtime": 1.5566, |
|
"eval_samples_per_second": 79.019, |
|
"eval_steps_per_second": 10.279, |
|
"step": 26784 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"eval_loss": 11.059048652648926, |
|
"eval_runtime": 1.5499, |
|
"eval_samples_per_second": 79.358, |
|
"eval_steps_per_second": 10.323, |
|
"step": 26846 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"eval_loss": 11.01535415649414, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.233, |
|
"eval_steps_per_second": 10.307, |
|
"step": 26908 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"eval_loss": 10.964756965637207, |
|
"eval_runtime": 1.553, |
|
"eval_samples_per_second": 79.204, |
|
"eval_steps_per_second": 10.303, |
|
"step": 26970 |
|
}, |
|
{ |
|
"epoch": 435.48, |
|
"learning_rate": 9.03225806451613e-09, |
|
"loss": 10.0079, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_loss": 11.084016799926758, |
|
"eval_runtime": 1.559, |
|
"eval_samples_per_second": 78.895, |
|
"eval_steps_per_second": 10.263, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 437.0, |
|
"eval_loss": 10.810328483581543, |
|
"eval_runtime": 1.5654, |
|
"eval_samples_per_second": 78.574, |
|
"eval_steps_per_second": 10.221, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 438.0, |
|
"eval_loss": 10.89944839477539, |
|
"eval_runtime": 1.557, |
|
"eval_samples_per_second": 79.0, |
|
"eval_steps_per_second": 10.276, |
|
"step": 27156 |
|
}, |
|
{ |
|
"epoch": 439.0, |
|
"eval_loss": 11.036003112792969, |
|
"eval_runtime": 1.5567, |
|
"eval_samples_per_second": 79.015, |
|
"eval_steps_per_second": 10.278, |
|
"step": 27218 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_loss": 10.909425735473633, |
|
"eval_runtime": 1.5567, |
|
"eval_samples_per_second": 79.013, |
|
"eval_steps_per_second": 10.278, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 441.0, |
|
"eval_loss": 10.940203666687012, |
|
"eval_runtime": 1.5862, |
|
"eval_samples_per_second": 77.542, |
|
"eval_steps_per_second": 10.087, |
|
"step": 27342 |
|
}, |
|
{ |
|
"epoch": 442.0, |
|
"eval_loss": 10.877068519592285, |
|
"eval_runtime": 1.5493, |
|
"eval_samples_per_second": 79.388, |
|
"eval_steps_per_second": 10.327, |
|
"step": 27404 |
|
}, |
|
{ |
|
"epoch": 443.0, |
|
"eval_loss": 11.093377113342285, |
|
"eval_runtime": 1.5504, |
|
"eval_samples_per_second": 79.335, |
|
"eval_steps_per_second": 10.32, |
|
"step": 27466 |
|
}, |
|
{ |
|
"epoch": 443.55, |
|
"learning_rate": 7.903225806451614e-09, |
|
"loss": 10.0049, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_loss": 11.089668273925781, |
|
"eval_runtime": 1.5636, |
|
"eval_samples_per_second": 78.663, |
|
"eval_steps_per_second": 10.233, |
|
"step": 27528 |
|
}, |
|
{ |
|
"epoch": 445.0, |
|
"eval_loss": 11.049309730529785, |
|
"eval_runtime": 1.5562, |
|
"eval_samples_per_second": 79.038, |
|
"eval_steps_per_second": 10.281, |
|
"step": 27590 |
|
}, |
|
{ |
|
"epoch": 446.0, |
|
"eval_loss": 11.039472579956055, |
|
"eval_runtime": 1.5633, |
|
"eval_samples_per_second": 78.678, |
|
"eval_steps_per_second": 10.235, |
|
"step": 27652 |
|
}, |
|
{ |
|
"epoch": 447.0, |
|
"eval_loss": 10.898978233337402, |
|
"eval_runtime": 1.5657, |
|
"eval_samples_per_second": 78.558, |
|
"eval_steps_per_second": 10.219, |
|
"step": 27714 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_loss": 11.16734504699707, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 78.975, |
|
"eval_steps_per_second": 10.273, |
|
"step": 27776 |
|
}, |
|
{ |
|
"epoch": 449.0, |
|
"eval_loss": 11.158203125, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.235, |
|
"eval_steps_per_second": 10.307, |
|
"step": 27838 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"eval_loss": 10.85914421081543, |
|
"eval_runtime": 1.5491, |
|
"eval_samples_per_second": 79.399, |
|
"eval_steps_per_second": 10.328, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 451.0, |
|
"eval_loss": 11.079207420349121, |
|
"eval_runtime": 1.5491, |
|
"eval_samples_per_second": 79.398, |
|
"eval_steps_per_second": 10.328, |
|
"step": 27962 |
|
}, |
|
{ |
|
"epoch": 451.61, |
|
"learning_rate": 6.774193548387097e-09, |
|
"loss": 9.9924, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_loss": 11.144965171813965, |
|
"eval_runtime": 1.5578, |
|
"eval_samples_per_second": 78.958, |
|
"eval_steps_per_second": 10.271, |
|
"step": 28024 |
|
}, |
|
{ |
|
"epoch": 453.0, |
|
"eval_loss": 10.981874465942383, |
|
"eval_runtime": 1.5581, |
|
"eval_samples_per_second": 78.94, |
|
"eval_steps_per_second": 10.269, |
|
"step": 28086 |
|
}, |
|
{ |
|
"epoch": 454.0, |
|
"eval_loss": 10.887094497680664, |
|
"eval_runtime": 1.5515, |
|
"eval_samples_per_second": 79.28, |
|
"eval_steps_per_second": 10.313, |
|
"step": 28148 |
|
}, |
|
{ |
|
"epoch": 455.0, |
|
"eval_loss": 10.99148178100586, |
|
"eval_runtime": 1.558, |
|
"eval_samples_per_second": 78.947, |
|
"eval_steps_per_second": 10.27, |
|
"step": 28210 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_loss": 11.03730583190918, |
|
"eval_runtime": 1.5584, |
|
"eval_samples_per_second": 78.926, |
|
"eval_steps_per_second": 10.267, |
|
"step": 28272 |
|
}, |
|
{ |
|
"epoch": 457.0, |
|
"eval_loss": 10.938337326049805, |
|
"eval_runtime": 1.5696, |
|
"eval_samples_per_second": 78.366, |
|
"eval_steps_per_second": 10.194, |
|
"step": 28334 |
|
}, |
|
{ |
|
"epoch": 458.0, |
|
"eval_loss": 11.129241943359375, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 79.032, |
|
"eval_steps_per_second": 10.281, |
|
"step": 28396 |
|
}, |
|
{ |
|
"epoch": 459.0, |
|
"eval_loss": 11.168819427490234, |
|
"eval_runtime": 1.5502, |
|
"eval_samples_per_second": 79.343, |
|
"eval_steps_per_second": 10.321, |
|
"step": 28458 |
|
}, |
|
{ |
|
"epoch": 459.68, |
|
"learning_rate": 5.645161290322581e-09, |
|
"loss": 9.9918, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_loss": 11.043048858642578, |
|
"eval_runtime": 1.5622, |
|
"eval_samples_per_second": 78.736, |
|
"eval_steps_per_second": 10.242, |
|
"step": 28520 |
|
}, |
|
{ |
|
"epoch": 461.0, |
|
"eval_loss": 11.071016311645508, |
|
"eval_runtime": 1.5582, |
|
"eval_samples_per_second": 78.938, |
|
"eval_steps_per_second": 10.268, |
|
"step": 28582 |
|
}, |
|
{ |
|
"epoch": 462.0, |
|
"eval_loss": 11.011009216308594, |
|
"eval_runtime": 1.554, |
|
"eval_samples_per_second": 79.151, |
|
"eval_steps_per_second": 10.296, |
|
"step": 28644 |
|
}, |
|
{ |
|
"epoch": 463.0, |
|
"eval_loss": 11.087599754333496, |
|
"eval_runtime": 1.5566, |
|
"eval_samples_per_second": 79.02, |
|
"eval_steps_per_second": 10.279, |
|
"step": 28706 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_loss": 11.0982027053833, |
|
"eval_runtime": 1.5598, |
|
"eval_samples_per_second": 78.856, |
|
"eval_steps_per_second": 10.258, |
|
"step": 28768 |
|
}, |
|
{ |
|
"epoch": 465.0, |
|
"eval_loss": 10.962496757507324, |
|
"eval_runtime": 1.5771, |
|
"eval_samples_per_second": 77.992, |
|
"eval_steps_per_second": 10.145, |
|
"step": 28830 |
|
}, |
|
{ |
|
"epoch": 466.0, |
|
"eval_loss": 10.964188575744629, |
|
"eval_runtime": 1.5492, |
|
"eval_samples_per_second": 79.395, |
|
"eval_steps_per_second": 10.328, |
|
"step": 28892 |
|
}, |
|
{ |
|
"epoch": 467.0, |
|
"eval_loss": 10.798909187316895, |
|
"eval_runtime": 1.5512, |
|
"eval_samples_per_second": 79.296, |
|
"eval_steps_per_second": 10.315, |
|
"step": 28954 |
|
}, |
|
{ |
|
"epoch": 467.74, |
|
"learning_rate": 4.516129032258065e-09, |
|
"loss": 9.9799, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"eval_loss": 11.00378131866455, |
|
"eval_runtime": 1.562, |
|
"eval_samples_per_second": 78.745, |
|
"eval_steps_per_second": 10.243, |
|
"step": 29016 |
|
}, |
|
{ |
|
"epoch": 469.0, |
|
"eval_loss": 11.124489784240723, |
|
"eval_runtime": 1.5579, |
|
"eval_samples_per_second": 78.951, |
|
"eval_steps_per_second": 10.27, |
|
"step": 29078 |
|
}, |
|
{ |
|
"epoch": 470.0, |
|
"eval_loss": 10.913480758666992, |
|
"eval_runtime": 1.5763, |
|
"eval_samples_per_second": 78.031, |
|
"eval_steps_per_second": 10.15, |
|
"step": 29140 |
|
}, |
|
{ |
|
"epoch": 471.0, |
|
"eval_loss": 10.943198204040527, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 10.281, |
|
"step": 29202 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"eval_loss": 10.829460144042969, |
|
"eval_runtime": 1.5569, |
|
"eval_samples_per_second": 79.005, |
|
"eval_steps_per_second": 10.277, |
|
"step": 29264 |
|
}, |
|
{ |
|
"epoch": 473.0, |
|
"eval_loss": 10.927714347839355, |
|
"eval_runtime": 1.5542, |
|
"eval_samples_per_second": 79.141, |
|
"eval_steps_per_second": 10.295, |
|
"step": 29326 |
|
}, |
|
{ |
|
"epoch": 474.0, |
|
"eval_loss": 11.094922065734863, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 79.274, |
|
"eval_steps_per_second": 10.312, |
|
"step": 29388 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"eval_loss": 11.037424087524414, |
|
"eval_runtime": 1.5502, |
|
"eval_samples_per_second": 79.343, |
|
"eval_steps_per_second": 10.321, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 475.81, |
|
"learning_rate": 3.3870967741935484e-09, |
|
"loss": 9.9961, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"eval_loss": 10.958832740783691, |
|
"eval_runtime": 1.5596, |
|
"eval_samples_per_second": 78.865, |
|
"eval_steps_per_second": 10.259, |
|
"step": 29512 |
|
}, |
|
{ |
|
"epoch": 477.0, |
|
"eval_loss": 11.034053802490234, |
|
"eval_runtime": 1.5537, |
|
"eval_samples_per_second": 79.168, |
|
"eval_steps_per_second": 10.298, |
|
"step": 29574 |
|
}, |
|
{ |
|
"epoch": 478.0, |
|
"eval_loss": 10.954147338867188, |
|
"eval_runtime": 1.563, |
|
"eval_samples_per_second": 78.695, |
|
"eval_steps_per_second": 10.237, |
|
"step": 29636 |
|
}, |
|
{ |
|
"epoch": 479.0, |
|
"eval_loss": 10.934306144714355, |
|
"eval_runtime": 1.5578, |
|
"eval_samples_per_second": 78.957, |
|
"eval_steps_per_second": 10.271, |
|
"step": 29698 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"eval_loss": 10.959942817687988, |
|
"eval_runtime": 1.5525, |
|
"eval_samples_per_second": 79.225, |
|
"eval_steps_per_second": 10.306, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 481.0, |
|
"eval_loss": 10.95332145690918, |
|
"eval_runtime": 1.5572, |
|
"eval_samples_per_second": 78.988, |
|
"eval_steps_per_second": 10.275, |
|
"step": 29822 |
|
}, |
|
{ |
|
"epoch": 482.0, |
|
"eval_loss": 11.204573631286621, |
|
"eval_runtime": 1.5776, |
|
"eval_samples_per_second": 77.964, |
|
"eval_steps_per_second": 10.142, |
|
"step": 29884 |
|
}, |
|
{ |
|
"epoch": 483.0, |
|
"eval_loss": 10.935157775878906, |
|
"eval_runtime": 1.5461, |
|
"eval_samples_per_second": 79.557, |
|
"eval_steps_per_second": 10.349, |
|
"step": 29946 |
|
}, |
|
{ |
|
"epoch": 483.87, |
|
"learning_rate": 2.2580645161290324e-09, |
|
"loss": 10.006, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"eval_loss": 11.172720909118652, |
|
"eval_runtime": 1.5619, |
|
"eval_samples_per_second": 78.751, |
|
"eval_steps_per_second": 10.244, |
|
"step": 30008 |
|
}, |
|
{ |
|
"epoch": 485.0, |
|
"eval_loss": 10.99561595916748, |
|
"eval_runtime": 1.5552, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 10.288, |
|
"step": 30070 |
|
}, |
|
{ |
|
"epoch": 486.0, |
|
"eval_loss": 11.166374206542969, |
|
"eval_runtime": 1.5713, |
|
"eval_samples_per_second": 78.281, |
|
"eval_steps_per_second": 10.183, |
|
"step": 30132 |
|
}, |
|
{ |
|
"epoch": 487.0, |
|
"eval_loss": 11.094931602478027, |
|
"eval_runtime": 1.553, |
|
"eval_samples_per_second": 79.2, |
|
"eval_steps_per_second": 10.302, |
|
"step": 30194 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"eval_loss": 10.948943138122559, |
|
"eval_runtime": 1.5549, |
|
"eval_samples_per_second": 79.102, |
|
"eval_steps_per_second": 10.29, |
|
"step": 30256 |
|
}, |
|
{ |
|
"epoch": 489.0, |
|
"eval_loss": 11.004921913146973, |
|
"eval_runtime": 1.5546, |
|
"eval_samples_per_second": 79.121, |
|
"eval_steps_per_second": 10.292, |
|
"step": 30318 |
|
}, |
|
{ |
|
"epoch": 490.0, |
|
"eval_loss": 11.18515396118164, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 79.207, |
|
"eval_steps_per_second": 10.303, |
|
"step": 30380 |
|
}, |
|
{ |
|
"epoch": 491.0, |
|
"eval_loss": 11.10179615020752, |
|
"eval_runtime": 1.5524, |
|
"eval_samples_per_second": 79.232, |
|
"eval_steps_per_second": 10.307, |
|
"step": 30442 |
|
}, |
|
{ |
|
"epoch": 491.94, |
|
"learning_rate": 1.1290322580645162e-09, |
|
"loss": 10.0083, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"eval_loss": 10.913029670715332, |
|
"eval_runtime": 1.5534, |
|
"eval_samples_per_second": 79.18, |
|
"eval_steps_per_second": 10.3, |
|
"step": 30504 |
|
}, |
|
{ |
|
"epoch": 493.0, |
|
"eval_loss": 10.935208320617676, |
|
"eval_runtime": 1.5558, |
|
"eval_samples_per_second": 79.058, |
|
"eval_steps_per_second": 10.284, |
|
"step": 30566 |
|
}, |
|
{ |
|
"epoch": 494.0, |
|
"eval_loss": 10.9716157913208, |
|
"eval_runtime": 1.5545, |
|
"eval_samples_per_second": 79.127, |
|
"eval_steps_per_second": 10.293, |
|
"step": 30628 |
|
}, |
|
{ |
|
"epoch": 495.0, |
|
"eval_loss": 11.111549377441406, |
|
"eval_runtime": 1.5613, |
|
"eval_samples_per_second": 78.778, |
|
"eval_steps_per_second": 10.248, |
|
"step": 30690 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"eval_loss": 10.980956077575684, |
|
"eval_runtime": 1.5565, |
|
"eval_samples_per_second": 79.022, |
|
"eval_steps_per_second": 10.279, |
|
"step": 30752 |
|
}, |
|
{ |
|
"epoch": 497.0, |
|
"eval_loss": 10.970294952392578, |
|
"eval_runtime": 1.5649, |
|
"eval_samples_per_second": 78.597, |
|
"eval_steps_per_second": 10.224, |
|
"step": 30814 |
|
}, |
|
{ |
|
"epoch": 498.0, |
|
"eval_loss": 10.939518928527832, |
|
"eval_runtime": 1.547, |
|
"eval_samples_per_second": 79.51, |
|
"eval_steps_per_second": 10.343, |
|
"step": 30876 |
|
}, |
|
{ |
|
"epoch": 499.0, |
|
"eval_loss": 11.018895149230957, |
|
"eval_runtime": 1.5496, |
|
"eval_samples_per_second": 79.375, |
|
"eval_steps_per_second": 10.325, |
|
"step": 30938 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 0.0, |
|
"loss": 10.0106, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_loss": 10.97592544555664, |
|
"eval_runtime": 1.5827, |
|
"eval_samples_per_second": 77.713, |
|
"eval_steps_per_second": 10.109, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 31000, |
|
"total_flos": 1.6136618439168e+16, |
|
"train_loss": 11.119525689894154, |
|
"train_runtime": 17411.7991, |
|
"train_samples_per_second": 14.042, |
|
"train_steps_per_second": 1.78 |
|
} |
|
], |
|
"max_steps": 31000, |
|
"num_train_epochs": 500, |
|
"total_flos": 1.6136618439168e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|