|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 292.3076923076923, |
|
"eval_steps": 100, |
|
"global_step": 19000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 47.11796188354492, |
|
"learning_rate": 9.990900000000001e-06, |
|
"loss": 3.6644, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"eval_loss": 2.4919605255126953, |
|
"eval_runtime": 12.5517, |
|
"eval_samples_per_second": 10.437, |
|
"eval_steps_per_second": 1.354, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 31.28130531311035, |
|
"learning_rate": 9.980900000000001e-06, |
|
"loss": 2.2347, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"eval_loss": 2.156316041946411, |
|
"eval_runtime": 11.1792, |
|
"eval_samples_per_second": 11.718, |
|
"eval_steps_per_second": 1.521, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 39.616390228271484, |
|
"learning_rate": 9.970900000000001e-06, |
|
"loss": 2.0254, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"eval_loss": 2.024153709411621, |
|
"eval_runtime": 11.1934, |
|
"eval_samples_per_second": 11.703, |
|
"eval_steps_per_second": 1.519, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 28.285825729370117, |
|
"learning_rate": 9.960900000000001e-06, |
|
"loss": 1.9361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"eval_loss": 1.9094743728637695, |
|
"eval_runtime": 11.3855, |
|
"eval_samples_per_second": 11.506, |
|
"eval_steps_per_second": 1.493, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 34.14302062988281, |
|
"learning_rate": 9.950900000000002e-06, |
|
"loss": 1.8531, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"eval_loss": 1.8729331493377686, |
|
"eval_runtime": 11.2935, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.505, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 39.09531784057617, |
|
"learning_rate": 9.940900000000002e-06, |
|
"loss": 1.7669, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"eval_loss": 1.831756830215454, |
|
"eval_runtime": 11.0535, |
|
"eval_samples_per_second": 11.851, |
|
"eval_steps_per_second": 1.538, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"grad_norm": 93.24444580078125, |
|
"learning_rate": 9.930900000000002e-06, |
|
"loss": 1.7518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_loss": 1.7832175493240356, |
|
"eval_runtime": 11.1684, |
|
"eval_samples_per_second": 11.729, |
|
"eval_steps_per_second": 1.522, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 32.21013641357422, |
|
"learning_rate": 9.920900000000002e-06, |
|
"loss": 1.7149, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"eval_loss": 1.7581098079681396, |
|
"eval_runtime": 11.101, |
|
"eval_samples_per_second": 11.801, |
|
"eval_steps_per_second": 1.531, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"grad_norm": 59.90657043457031, |
|
"learning_rate": 9.9109e-06, |
|
"loss": 1.6734, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_loss": 1.7163844108581543, |
|
"eval_runtime": 11.1167, |
|
"eval_samples_per_second": 11.784, |
|
"eval_steps_per_second": 1.529, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 20.61592674255371, |
|
"learning_rate": 9.9009e-06, |
|
"loss": 1.6612, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"eval_loss": 1.6949567794799805, |
|
"eval_runtime": 11.0663, |
|
"eval_samples_per_second": 11.838, |
|
"eval_steps_per_second": 1.536, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"grad_norm": 17.60099220275879, |
|
"learning_rate": 9.8909e-06, |
|
"loss": 1.6199, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_loss": 1.6769332885742188, |
|
"eval_runtime": 11.0531, |
|
"eval_samples_per_second": 11.852, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 20.802692413330078, |
|
"learning_rate": 9.8809e-06, |
|
"loss": 1.6008, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"eval_loss": 1.6524990797042847, |
|
"eval_runtime": 11.0831, |
|
"eval_samples_per_second": 11.82, |
|
"eval_steps_per_second": 1.534, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 21.809823989868164, |
|
"learning_rate": 9.8709e-06, |
|
"loss": 1.5812, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.6428295373916626, |
|
"eval_runtime": 11.1093, |
|
"eval_samples_per_second": 11.792, |
|
"eval_steps_per_second": 1.53, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 46.8908576965332, |
|
"learning_rate": 9.8609e-06, |
|
"loss": 1.5419, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"eval_loss": 1.6006404161453247, |
|
"eval_runtime": 11.2393, |
|
"eval_samples_per_second": 11.655, |
|
"eval_steps_per_second": 1.513, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 23.076923076923077, |
|
"grad_norm": 27.15238380432129, |
|
"learning_rate": 9.8509e-06, |
|
"loss": 1.5374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.076923076923077, |
|
"eval_loss": 1.5862094163894653, |
|
"eval_runtime": 11.1815, |
|
"eval_samples_per_second": 11.716, |
|
"eval_steps_per_second": 1.52, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 40.26778030395508, |
|
"learning_rate": 9.840900000000001e-06, |
|
"loss": 1.4923, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"eval_loss": 1.576373815536499, |
|
"eval_runtime": 11.1215, |
|
"eval_samples_per_second": 11.779, |
|
"eval_steps_per_second": 1.529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 26.153846153846153, |
|
"grad_norm": 35.266971588134766, |
|
"learning_rate": 9.830900000000001e-06, |
|
"loss": 1.4989, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 26.153846153846153, |
|
"eval_loss": 1.5671430826187134, |
|
"eval_runtime": 11.1873, |
|
"eval_samples_per_second": 11.71, |
|
"eval_steps_per_second": 1.52, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 26.813480377197266, |
|
"learning_rate": 9.820900000000001e-06, |
|
"loss": 1.4711, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"eval_loss": 1.522908329963684, |
|
"eval_runtime": 11.2106, |
|
"eval_samples_per_second": 11.685, |
|
"eval_steps_per_second": 1.516, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 29.23076923076923, |
|
"grad_norm": 24.576723098754883, |
|
"learning_rate": 9.810900000000001e-06, |
|
"loss": 1.4421, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 29.23076923076923, |
|
"eval_loss": 1.5039104223251343, |
|
"eval_runtime": 11.257, |
|
"eval_samples_per_second": 11.637, |
|
"eval_steps_per_second": 1.51, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 28.480438232421875, |
|
"learning_rate": 9.800900000000001e-06, |
|
"loss": 1.4347, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_loss": 1.5123459100723267, |
|
"eval_runtime": 11.187, |
|
"eval_samples_per_second": 11.71, |
|
"eval_steps_per_second": 1.52, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.30769230769231, |
|
"grad_norm": 56.582088470458984, |
|
"learning_rate": 9.790900000000001e-06, |
|
"loss": 1.4212, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 32.30769230769231, |
|
"eval_loss": 1.481844425201416, |
|
"eval_runtime": 11.2075, |
|
"eval_samples_per_second": 11.689, |
|
"eval_steps_per_second": 1.517, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 38.5254020690918, |
|
"learning_rate": 9.780900000000002e-06, |
|
"loss": 1.3908, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_loss": 1.4529048204421997, |
|
"eval_runtime": 11.197, |
|
"eval_samples_per_second": 11.7, |
|
"eval_steps_per_second": 1.518, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.38461538461539, |
|
"grad_norm": 44.74857711791992, |
|
"learning_rate": 9.770900000000002e-06, |
|
"loss": 1.3734, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 35.38461538461539, |
|
"eval_loss": 1.4617427587509155, |
|
"eval_runtime": 11.1235, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.528, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 28.926782608032227, |
|
"learning_rate": 9.760900000000002e-06, |
|
"loss": 1.365, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_loss": 1.4297789335250854, |
|
"eval_runtime": 11.3007, |
|
"eval_samples_per_second": 11.592, |
|
"eval_steps_per_second": 1.504, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 27.750213623046875, |
|
"learning_rate": 9.7509e-06, |
|
"loss": 1.3306, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"eval_loss": 1.4345914125442505, |
|
"eval_runtime": 11.2795, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 1.507, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 35.288352966308594, |
|
"learning_rate": 9.7409e-06, |
|
"loss": 1.3677, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.447089433670044, |
|
"eval_runtime": 11.1366, |
|
"eval_samples_per_second": 11.763, |
|
"eval_steps_per_second": 1.527, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.53846153846154, |
|
"grad_norm": 65.49736022949219, |
|
"learning_rate": 9.7309e-06, |
|
"loss": 1.3453, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 41.53846153846154, |
|
"eval_loss": 1.405300259590149, |
|
"eval_runtime": 11.1549, |
|
"eval_samples_per_second": 11.744, |
|
"eval_steps_per_second": 1.524, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 24.333518981933594, |
|
"learning_rate": 9.7209e-06, |
|
"loss": 1.3206, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"eval_loss": 1.4218717813491821, |
|
"eval_runtime": 11.3113, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.503, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.61538461538461, |
|
"grad_norm": 45.50777816772461, |
|
"learning_rate": 9.7109e-06, |
|
"loss": 1.3363, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 44.61538461538461, |
|
"eval_loss": 1.4220006465911865, |
|
"eval_runtime": 11.156, |
|
"eval_samples_per_second": 11.743, |
|
"eval_steps_per_second": 1.524, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 25.898344039916992, |
|
"learning_rate": 9.7009e-06, |
|
"loss": 1.2995, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"eval_loss": 1.3942431211471558, |
|
"eval_runtime": 11.2282, |
|
"eval_samples_per_second": 11.667, |
|
"eval_steps_per_second": 1.514, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.69230769230769, |
|
"grad_norm": 56.0889892578125, |
|
"learning_rate": 9.6909e-06, |
|
"loss": 1.2994, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 47.69230769230769, |
|
"eval_loss": 1.3970586061477661, |
|
"eval_runtime": 11.1682, |
|
"eval_samples_per_second": 11.73, |
|
"eval_steps_per_second": 1.522, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 58.10311508178711, |
|
"learning_rate": 9.6809e-06, |
|
"loss": 1.2761, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"eval_loss": 1.390371561050415, |
|
"eval_runtime": 11.2406, |
|
"eval_samples_per_second": 11.654, |
|
"eval_steps_per_second": 1.512, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"grad_norm": 17.050870895385742, |
|
"learning_rate": 9.670900000000001e-06, |
|
"loss": 1.2712, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_loss": 1.3936753273010254, |
|
"eval_runtime": 11.2364, |
|
"eval_samples_per_second": 11.659, |
|
"eval_steps_per_second": 1.513, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 232.21804809570312, |
|
"learning_rate": 9.660900000000001e-06, |
|
"loss": 1.262, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"eval_loss": 1.4037091732025146, |
|
"eval_runtime": 11.2231, |
|
"eval_samples_per_second": 11.672, |
|
"eval_steps_per_second": 1.515, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"grad_norm": 35.11832046508789, |
|
"learning_rate": 9.650900000000001e-06, |
|
"loss": 1.2788, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_loss": 1.3545280694961548, |
|
"eval_runtime": 11.1609, |
|
"eval_samples_per_second": 11.737, |
|
"eval_steps_per_second": 1.523, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 27.077022552490234, |
|
"learning_rate": 9.640900000000001e-06, |
|
"loss": 1.2711, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"eval_loss": 1.3528518676757812, |
|
"eval_runtime": 11.2852, |
|
"eval_samples_per_second": 11.608, |
|
"eval_steps_per_second": 1.506, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"grad_norm": 46.97712326049805, |
|
"learning_rate": 9.630900000000001e-06, |
|
"loss": 1.2492, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_loss": 1.3534098863601685, |
|
"eval_runtime": 11.2368, |
|
"eval_samples_per_second": 11.658, |
|
"eval_steps_per_second": 1.513, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 42.06857681274414, |
|
"learning_rate": 9.620900000000001e-06, |
|
"loss": 1.2506, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"eval_loss": 1.3613977432250977, |
|
"eval_runtime": 11.2206, |
|
"eval_samples_per_second": 11.675, |
|
"eval_steps_per_second": 1.515, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 19.298952102661133, |
|
"learning_rate": 9.610900000000001e-06, |
|
"loss": 1.2201, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 1.3586474657058716, |
|
"eval_runtime": 11.2045, |
|
"eval_samples_per_second": 11.692, |
|
"eval_steps_per_second": 1.517, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 30.0198974609375, |
|
"learning_rate": 9.600900000000002e-06, |
|
"loss": 1.2086, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"eval_loss": 1.3304755687713623, |
|
"eval_runtime": 11.193, |
|
"eval_samples_per_second": 11.704, |
|
"eval_steps_per_second": 1.519, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 63.07692307692308, |
|
"grad_norm": 37.59902572631836, |
|
"learning_rate": 9.5909e-06, |
|
"loss": 1.2375, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 63.07692307692308, |
|
"eval_loss": 1.331407904624939, |
|
"eval_runtime": 10.6714, |
|
"eval_samples_per_second": 12.276, |
|
"eval_steps_per_second": 1.593, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 36.82079315185547, |
|
"learning_rate": 9.5809e-06, |
|
"loss": 1.2148, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"eval_loss": 1.3441548347473145, |
|
"eval_runtime": 10.7472, |
|
"eval_samples_per_second": 12.189, |
|
"eval_steps_per_second": 1.582, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 66.15384615384616, |
|
"grad_norm": 30.974130630493164, |
|
"learning_rate": 9.5709e-06, |
|
"loss": 1.197, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 66.15384615384616, |
|
"eval_loss": 1.34512197971344, |
|
"eval_runtime": 10.7398, |
|
"eval_samples_per_second": 12.198, |
|
"eval_steps_per_second": 1.583, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 20.45345115661621, |
|
"learning_rate": 9.5609e-06, |
|
"loss": 1.2361, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"eval_loss": 1.3371080160140991, |
|
"eval_runtime": 10.654, |
|
"eval_samples_per_second": 12.296, |
|
"eval_steps_per_second": 1.596, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 69.23076923076923, |
|
"grad_norm": 19.758630752563477, |
|
"learning_rate": 9.5509e-06, |
|
"loss": 1.2001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 69.23076923076923, |
|
"eval_loss": 1.3270760774612427, |
|
"eval_runtime": 10.6928, |
|
"eval_samples_per_second": 12.251, |
|
"eval_steps_per_second": 1.59, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 45.2899055480957, |
|
"learning_rate": 9.5409e-06, |
|
"loss": 1.192, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_loss": 1.3184590339660645, |
|
"eval_runtime": 10.7154, |
|
"eval_samples_per_second": 12.225, |
|
"eval_steps_per_second": 1.586, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 72.3076923076923, |
|
"grad_norm": 45.60734939575195, |
|
"learning_rate": 9.5309e-06, |
|
"loss": 1.2081, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 72.3076923076923, |
|
"eval_loss": 1.3107666969299316, |
|
"eval_runtime": 10.7029, |
|
"eval_samples_per_second": 12.24, |
|
"eval_steps_per_second": 1.588, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 26.859603881835938, |
|
"learning_rate": 9.5209e-06, |
|
"loss": 1.1729, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_loss": 1.310062289237976, |
|
"eval_runtime": 10.7544, |
|
"eval_samples_per_second": 12.181, |
|
"eval_steps_per_second": 1.581, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 75.38461538461539, |
|
"grad_norm": 19.90914535522461, |
|
"learning_rate": 9.5109e-06, |
|
"loss": 1.1899, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 75.38461538461539, |
|
"eval_loss": 1.3038017749786377, |
|
"eval_runtime": 10.6532, |
|
"eval_samples_per_second": 12.297, |
|
"eval_steps_per_second": 1.596, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 22.629934310913086, |
|
"learning_rate": 9.5009e-06, |
|
"loss": 1.1875, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_loss": 1.2785409688949585, |
|
"eval_runtime": 10.6388, |
|
"eval_samples_per_second": 12.313, |
|
"eval_steps_per_second": 1.598, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 78.46153846153847, |
|
"grad_norm": 45.462059020996094, |
|
"learning_rate": 9.490900000000001e-06, |
|
"loss": 1.1717, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 78.46153846153847, |
|
"eval_loss": 1.278078317642212, |
|
"eval_runtime": 10.9666, |
|
"eval_samples_per_second": 11.945, |
|
"eval_steps_per_second": 1.55, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 34.85255432128906, |
|
"learning_rate": 9.480900000000001e-06, |
|
"loss": 1.1657, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.2711185216903687, |
|
"eval_runtime": 11.0211, |
|
"eval_samples_per_second": 11.886, |
|
"eval_steps_per_second": 1.543, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 81.53846153846153, |
|
"grad_norm": 19.078449249267578, |
|
"learning_rate": 9.470900000000001e-06, |
|
"loss": 1.1814, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 81.53846153846153, |
|
"eval_loss": 1.2781996726989746, |
|
"eval_runtime": 11.0663, |
|
"eval_samples_per_second": 11.838, |
|
"eval_steps_per_second": 1.536, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 31.05898094177246, |
|
"learning_rate": 9.460900000000001e-06, |
|
"loss": 1.1452, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"eval_loss": 1.2848775386810303, |
|
"eval_runtime": 11.01, |
|
"eval_samples_per_second": 11.898, |
|
"eval_steps_per_second": 1.544, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 84.61538461538461, |
|
"grad_norm": 28.712461471557617, |
|
"learning_rate": 9.450900000000001e-06, |
|
"loss": 1.1465, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 84.61538461538461, |
|
"eval_loss": 1.2928494215011597, |
|
"eval_runtime": 10.9253, |
|
"eval_samples_per_second": 11.991, |
|
"eval_steps_per_second": 1.556, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 19.871828079223633, |
|
"learning_rate": 9.440900000000001e-06, |
|
"loss": 1.1736, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"eval_loss": 1.2648124694824219, |
|
"eval_runtime": 11.0314, |
|
"eval_samples_per_second": 11.875, |
|
"eval_steps_per_second": 1.541, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 87.6923076923077, |
|
"grad_norm": 22.47665023803711, |
|
"learning_rate": 9.4309e-06, |
|
"loss": 1.1184, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 87.6923076923077, |
|
"eval_loss": 1.2936598062515259, |
|
"eval_runtime": 10.9322, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 1.555, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 38.79877471923828, |
|
"learning_rate": 9.4209e-06, |
|
"loss": 1.1616, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"eval_loss": 1.2650004625320435, |
|
"eval_runtime": 10.9434, |
|
"eval_samples_per_second": 11.971, |
|
"eval_steps_per_second": 1.553, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"grad_norm": 40.851097106933594, |
|
"learning_rate": 9.4109e-06, |
|
"loss": 1.1469, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_loss": 1.252148151397705, |
|
"eval_runtime": 10.9867, |
|
"eval_samples_per_second": 11.923, |
|
"eval_steps_per_second": 1.547, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 21.35544204711914, |
|
"learning_rate": 9.4009e-06, |
|
"loss": 1.1489, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_loss": 1.259344220161438, |
|
"eval_runtime": 10.9649, |
|
"eval_samples_per_second": 11.947, |
|
"eval_steps_per_second": 1.55, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 93.84615384615384, |
|
"grad_norm": 33.265132904052734, |
|
"learning_rate": 9.3909e-06, |
|
"loss": 1.1315, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 93.84615384615384, |
|
"eval_loss": 1.252693772315979, |
|
"eval_runtime": 10.9852, |
|
"eval_samples_per_second": 11.925, |
|
"eval_steps_per_second": 1.548, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 95.38461538461539, |
|
"grad_norm": 23.43667221069336, |
|
"learning_rate": 9.381e-06, |
|
"loss": 1.119, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 95.38461538461539, |
|
"eval_loss": 1.254772424697876, |
|
"eval_runtime": 11.0937, |
|
"eval_samples_per_second": 11.808, |
|
"eval_steps_per_second": 1.532, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 96.92307692307692, |
|
"grad_norm": 51.93602752685547, |
|
"learning_rate": 9.371e-06, |
|
"loss": 1.1333, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 96.92307692307692, |
|
"eval_loss": 1.249408483505249, |
|
"eval_runtime": 11.0388, |
|
"eval_samples_per_second": 11.867, |
|
"eval_steps_per_second": 1.54, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 98.46153846153847, |
|
"grad_norm": 23.473421096801758, |
|
"learning_rate": 9.361e-06, |
|
"loss": 1.1164, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 98.46153846153847, |
|
"eval_loss": 1.2438522577285767, |
|
"eval_runtime": 11.1581, |
|
"eval_samples_per_second": 11.74, |
|
"eval_steps_per_second": 1.524, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 24.228403091430664, |
|
"learning_rate": 9.351e-06, |
|
"loss": 1.1333, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.248632788658142, |
|
"eval_runtime": 10.9675, |
|
"eval_samples_per_second": 11.944, |
|
"eval_steps_per_second": 1.55, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 101.53846153846153, |
|
"grad_norm": 18.29631996154785, |
|
"learning_rate": 9.341000000000001e-06, |
|
"loss": 1.1082, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 101.53846153846153, |
|
"eval_loss": 1.2509865760803223, |
|
"eval_runtime": 11.0693, |
|
"eval_samples_per_second": 11.834, |
|
"eval_steps_per_second": 1.536, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 103.07692307692308, |
|
"grad_norm": 42.855491638183594, |
|
"learning_rate": 9.331000000000001e-06, |
|
"loss": 1.1178, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 103.07692307692308, |
|
"eval_loss": 1.2890292406082153, |
|
"eval_runtime": 11.0366, |
|
"eval_samples_per_second": 11.87, |
|
"eval_steps_per_second": 1.54, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 104.61538461538461, |
|
"grad_norm": 46.675655364990234, |
|
"learning_rate": 9.321000000000001e-06, |
|
"loss": 1.1106, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 104.61538461538461, |
|
"eval_loss": 1.2719863653182983, |
|
"eval_runtime": 11.1266, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.528, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 106.15384615384616, |
|
"grad_norm": 26.414846420288086, |
|
"learning_rate": 9.311000000000001e-06, |
|
"loss": 1.1216, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 106.15384615384616, |
|
"eval_loss": 1.2423394918441772, |
|
"eval_runtime": 11.0197, |
|
"eval_samples_per_second": 11.888, |
|
"eval_steps_per_second": 1.543, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 107.6923076923077, |
|
"grad_norm": 31.4022274017334, |
|
"learning_rate": 9.301000000000001e-06, |
|
"loss": 1.1052, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 107.6923076923077, |
|
"eval_loss": 1.2372961044311523, |
|
"eval_runtime": 11.1127, |
|
"eval_samples_per_second": 11.788, |
|
"eval_steps_per_second": 1.53, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 109.23076923076923, |
|
"grad_norm": 23.16703987121582, |
|
"learning_rate": 9.291000000000001e-06, |
|
"loss": 1.0911, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 109.23076923076923, |
|
"eval_loss": 1.2309863567352295, |
|
"eval_runtime": 10.9572, |
|
"eval_samples_per_second": 11.956, |
|
"eval_steps_per_second": 1.551, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 110.76923076923077, |
|
"grad_norm": 21.648773193359375, |
|
"learning_rate": 9.281000000000001e-06, |
|
"loss": 1.0956, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 110.76923076923077, |
|
"eval_loss": 1.2261079549789429, |
|
"eval_runtime": 10.9351, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 1.555, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 112.3076923076923, |
|
"grad_norm": 24.5791072845459, |
|
"learning_rate": 9.271000000000002e-06, |
|
"loss": 1.0751, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 112.3076923076923, |
|
"eval_loss": 1.2161471843719482, |
|
"eval_runtime": 11.1052, |
|
"eval_samples_per_second": 11.796, |
|
"eval_steps_per_second": 1.531, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 113.84615384615384, |
|
"grad_norm": 35.867801666259766, |
|
"learning_rate": 9.261000000000002e-06, |
|
"loss": 1.086, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 113.84615384615384, |
|
"eval_loss": 1.2092362642288208, |
|
"eval_runtime": 11.2048, |
|
"eval_samples_per_second": 11.691, |
|
"eval_steps_per_second": 1.517, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 115.38461538461539, |
|
"grad_norm": 67.91041564941406, |
|
"learning_rate": 9.251000000000002e-06, |
|
"loss": 1.092, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 115.38461538461539, |
|
"eval_loss": 1.241829514503479, |
|
"eval_runtime": 10.9937, |
|
"eval_samples_per_second": 11.916, |
|
"eval_steps_per_second": 1.546, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 116.92307692307692, |
|
"grad_norm": 128.73751831054688, |
|
"learning_rate": 9.241000000000002e-06, |
|
"loss": 1.0764, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 116.92307692307692, |
|
"eval_loss": 1.2462713718414307, |
|
"eval_runtime": 10.9625, |
|
"eval_samples_per_second": 11.95, |
|
"eval_steps_per_second": 1.551, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 118.46153846153847, |
|
"grad_norm": 86.5144271850586, |
|
"learning_rate": 9.231000000000002e-06, |
|
"loss": 1.0643, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 118.46153846153847, |
|
"eval_loss": 1.2187525033950806, |
|
"eval_runtime": 10.9492, |
|
"eval_samples_per_second": 11.964, |
|
"eval_steps_per_second": 1.553, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"grad_norm": 19.4710750579834, |
|
"learning_rate": 9.221e-06, |
|
"loss": 1.0966, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 1.2282384634017944, |
|
"eval_runtime": 10.9078, |
|
"eval_samples_per_second": 12.01, |
|
"eval_steps_per_second": 1.559, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 121.53846153846153, |
|
"grad_norm": 37.673744201660156, |
|
"learning_rate": 9.211e-06, |
|
"loss": 1.0632, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 121.53846153846153, |
|
"eval_loss": 1.2206230163574219, |
|
"eval_runtime": 11.0018, |
|
"eval_samples_per_second": 11.907, |
|
"eval_steps_per_second": 1.545, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 123.07692307692308, |
|
"grad_norm": 25.10326385498047, |
|
"learning_rate": 9.201e-06, |
|
"loss": 1.0873, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 123.07692307692308, |
|
"eval_loss": 1.2137339115142822, |
|
"eval_runtime": 10.9232, |
|
"eval_samples_per_second": 11.993, |
|
"eval_steps_per_second": 1.556, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 124.61538461538461, |
|
"grad_norm": 32.02176284790039, |
|
"learning_rate": 9.191e-06, |
|
"loss": 1.0568, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 124.61538461538461, |
|
"eval_loss": 1.2065187692642212, |
|
"eval_runtime": 10.9614, |
|
"eval_samples_per_second": 11.951, |
|
"eval_steps_per_second": 1.551, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 126.15384615384616, |
|
"grad_norm": 19.97406005859375, |
|
"learning_rate": 9.181e-06, |
|
"loss": 1.065, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 126.15384615384616, |
|
"eval_loss": 1.2094841003417969, |
|
"eval_runtime": 11.0274, |
|
"eval_samples_per_second": 11.879, |
|
"eval_steps_per_second": 1.542, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 127.6923076923077, |
|
"grad_norm": 31.624399185180664, |
|
"learning_rate": 9.171e-06, |
|
"loss": 1.0805, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 127.6923076923077, |
|
"eval_loss": 1.2149733304977417, |
|
"eval_runtime": 11.2014, |
|
"eval_samples_per_second": 11.695, |
|
"eval_steps_per_second": 1.518, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 129.23076923076923, |
|
"grad_norm": 29.24848747253418, |
|
"learning_rate": 9.161000000000001e-06, |
|
"loss": 1.0463, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 129.23076923076923, |
|
"eval_loss": 1.2077099084854126, |
|
"eval_runtime": 10.9432, |
|
"eval_samples_per_second": 11.971, |
|
"eval_steps_per_second": 1.553, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 130.76923076923077, |
|
"grad_norm": 27.14787483215332, |
|
"learning_rate": 9.151000000000001e-06, |
|
"loss": 1.0607, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 130.76923076923077, |
|
"eval_loss": 1.2046644687652588, |
|
"eval_runtime": 11.001, |
|
"eval_samples_per_second": 11.908, |
|
"eval_steps_per_second": 1.545, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 132.30769230769232, |
|
"grad_norm": 32.416194915771484, |
|
"learning_rate": 9.141000000000001e-06, |
|
"loss": 1.0365, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 132.30769230769232, |
|
"eval_loss": 1.195080041885376, |
|
"eval_runtime": 11.0205, |
|
"eval_samples_per_second": 11.887, |
|
"eval_steps_per_second": 1.543, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 133.84615384615384, |
|
"grad_norm": 58.863582611083984, |
|
"learning_rate": 9.131000000000001e-06, |
|
"loss": 1.0564, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 133.84615384615384, |
|
"eval_loss": 1.1966980695724487, |
|
"eval_runtime": 10.9043, |
|
"eval_samples_per_second": 12.014, |
|
"eval_steps_per_second": 1.559, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 135.3846153846154, |
|
"grad_norm": 26.08232307434082, |
|
"learning_rate": 9.121000000000001e-06, |
|
"loss": 1.0507, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 135.3846153846154, |
|
"eval_loss": 1.201367735862732, |
|
"eval_runtime": 10.9714, |
|
"eval_samples_per_second": 11.94, |
|
"eval_steps_per_second": 1.549, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 136.92307692307693, |
|
"grad_norm": 59.368019104003906, |
|
"learning_rate": 9.111000000000001e-06, |
|
"loss": 1.0508, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 136.92307692307693, |
|
"eval_loss": 1.208795428276062, |
|
"eval_runtime": 11.0529, |
|
"eval_samples_per_second": 11.852, |
|
"eval_steps_per_second": 1.538, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 138.46153846153845, |
|
"grad_norm": 43.07460021972656, |
|
"learning_rate": 9.101000000000001e-06, |
|
"loss": 1.0359, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 138.46153846153845, |
|
"eval_loss": 1.1782392263412476, |
|
"eval_runtime": 11.1519, |
|
"eval_samples_per_second": 11.747, |
|
"eval_steps_per_second": 1.524, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"grad_norm": 22.39567756652832, |
|
"learning_rate": 9.091000000000002e-06, |
|
"loss": 1.0584, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 1.1873483657836914, |
|
"eval_runtime": 10.8732, |
|
"eval_samples_per_second": 12.048, |
|
"eval_steps_per_second": 1.563, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 141.53846153846155, |
|
"grad_norm": 22.813621520996094, |
|
"learning_rate": 9.081000000000002e-06, |
|
"loss": 1.0354, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 141.53846153846155, |
|
"eval_loss": 1.1731117963790894, |
|
"eval_runtime": 11.0847, |
|
"eval_samples_per_second": 11.818, |
|
"eval_steps_per_second": 1.534, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 143.07692307692307, |
|
"grad_norm": 31.44073486328125, |
|
"learning_rate": 9.071000000000002e-06, |
|
"loss": 1.0457, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 143.07692307692307, |
|
"eval_loss": 1.1962807178497314, |
|
"eval_runtime": 11.0668, |
|
"eval_samples_per_second": 11.837, |
|
"eval_steps_per_second": 1.536, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 144.6153846153846, |
|
"grad_norm": 18.18711280822754, |
|
"learning_rate": 9.061e-06, |
|
"loss": 1.0481, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 144.6153846153846, |
|
"eval_loss": 1.185339093208313, |
|
"eval_runtime": 10.9916, |
|
"eval_samples_per_second": 11.918, |
|
"eval_steps_per_second": 1.547, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 146.15384615384616, |
|
"grad_norm": 38.05665969848633, |
|
"learning_rate": 9.051e-06, |
|
"loss": 1.0391, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 146.15384615384616, |
|
"eval_loss": 1.1856777667999268, |
|
"eval_runtime": 11.0391, |
|
"eval_samples_per_second": 11.867, |
|
"eval_steps_per_second": 1.54, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 147.69230769230768, |
|
"grad_norm": 19.963260650634766, |
|
"learning_rate": 9.041e-06, |
|
"loss": 1.0322, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 147.69230769230768, |
|
"eval_loss": 1.1843148469924927, |
|
"eval_runtime": 11.0382, |
|
"eval_samples_per_second": 11.868, |
|
"eval_steps_per_second": 1.54, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 149.23076923076923, |
|
"grad_norm": 28.58108901977539, |
|
"learning_rate": 9.031e-06, |
|
"loss": 1.0369, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 149.23076923076923, |
|
"eval_loss": 1.1637182235717773, |
|
"eval_runtime": 11.2766, |
|
"eval_samples_per_second": 11.617, |
|
"eval_steps_per_second": 1.508, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 150.76923076923077, |
|
"grad_norm": 64.3956527709961, |
|
"learning_rate": 9.021e-06, |
|
"loss": 1.0519, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 150.76923076923077, |
|
"eval_loss": 1.1796802282333374, |
|
"eval_runtime": 11.3418, |
|
"eval_samples_per_second": 11.55, |
|
"eval_steps_per_second": 1.499, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 152.30769230769232, |
|
"grad_norm": 18.857580184936523, |
|
"learning_rate": 9.011e-06, |
|
"loss": 1.0272, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 152.30769230769232, |
|
"eval_loss": 1.1634279489517212, |
|
"eval_runtime": 11.6749, |
|
"eval_samples_per_second": 11.221, |
|
"eval_steps_per_second": 1.456, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 153.84615384615384, |
|
"grad_norm": 42.541683197021484, |
|
"learning_rate": 9.001e-06, |
|
"loss": 1.0287, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 153.84615384615384, |
|
"eval_loss": 1.1698088645935059, |
|
"eval_runtime": 11.068, |
|
"eval_samples_per_second": 11.836, |
|
"eval_steps_per_second": 1.536, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 155.3846153846154, |
|
"grad_norm": 30.52286720275879, |
|
"learning_rate": 8.991e-06, |
|
"loss": 1.0237, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 155.3846153846154, |
|
"eval_loss": 1.178311824798584, |
|
"eval_runtime": 11.1107, |
|
"eval_samples_per_second": 11.79, |
|
"eval_steps_per_second": 1.53, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 156.92307692307693, |
|
"grad_norm": 32.60612487792969, |
|
"learning_rate": 8.981000000000001e-06, |
|
"loss": 1.0362, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 156.92307692307693, |
|
"eval_loss": 1.157893180847168, |
|
"eval_runtime": 11.0484, |
|
"eval_samples_per_second": 11.857, |
|
"eval_steps_per_second": 1.539, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 158.46153846153845, |
|
"grad_norm": 26.15647315979004, |
|
"learning_rate": 8.971000000000001e-06, |
|
"loss": 0.998, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 158.46153846153845, |
|
"eval_loss": 1.1724605560302734, |
|
"eval_runtime": 11.2323, |
|
"eval_samples_per_second": 11.663, |
|
"eval_steps_per_second": 1.513, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"grad_norm": 45.76993942260742, |
|
"learning_rate": 8.961000000000001e-06, |
|
"loss": 1.0432, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 1.1688075065612793, |
|
"eval_runtime": 11.0984, |
|
"eval_samples_per_second": 11.803, |
|
"eval_steps_per_second": 1.532, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 161.53846153846155, |
|
"grad_norm": 40.384578704833984, |
|
"learning_rate": 8.951000000000001e-06, |
|
"loss": 1.0421, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 161.53846153846155, |
|
"eval_loss": 1.1621845960617065, |
|
"eval_runtime": 11.1624, |
|
"eval_samples_per_second": 11.736, |
|
"eval_steps_per_second": 1.523, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 163.07692307692307, |
|
"grad_norm": 22.886058807373047, |
|
"learning_rate": 8.941000000000001e-06, |
|
"loss": 0.9912, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 163.07692307692307, |
|
"eval_loss": 1.1695055961608887, |
|
"eval_runtime": 10.9842, |
|
"eval_samples_per_second": 11.926, |
|
"eval_steps_per_second": 1.548, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 164.6153846153846, |
|
"grad_norm": 26.940736770629883, |
|
"learning_rate": 8.931000000000001e-06, |
|
"loss": 1.011, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 164.6153846153846, |
|
"eval_loss": 1.1458157300949097, |
|
"eval_runtime": 11.025, |
|
"eval_samples_per_second": 11.882, |
|
"eval_steps_per_second": 1.542, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 166.15384615384616, |
|
"grad_norm": 26.5013484954834, |
|
"learning_rate": 8.921000000000001e-06, |
|
"loss": 0.9876, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 166.15384615384616, |
|
"eval_loss": 1.1569631099700928, |
|
"eval_runtime": 11.101, |
|
"eval_samples_per_second": 11.801, |
|
"eval_steps_per_second": 1.531, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 167.69230769230768, |
|
"grad_norm": 29.859987258911133, |
|
"learning_rate": 8.911000000000002e-06, |
|
"loss": 1.0374, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 167.69230769230768, |
|
"eval_loss": 1.149316668510437, |
|
"eval_runtime": 11.1242, |
|
"eval_samples_per_second": 11.776, |
|
"eval_steps_per_second": 1.528, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 169.23076923076923, |
|
"grad_norm": 27.777738571166992, |
|
"learning_rate": 8.901e-06, |
|
"loss": 0.985, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 169.23076923076923, |
|
"eval_loss": 1.1608328819274902, |
|
"eval_runtime": 11.2215, |
|
"eval_samples_per_second": 11.674, |
|
"eval_steps_per_second": 1.515, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 170.76923076923077, |
|
"grad_norm": 39.21344757080078, |
|
"learning_rate": 8.891e-06, |
|
"loss": 1.0049, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 170.76923076923077, |
|
"eval_loss": 1.1642228364944458, |
|
"eval_runtime": 11.1947, |
|
"eval_samples_per_second": 11.702, |
|
"eval_steps_per_second": 1.519, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 172.30769230769232, |
|
"grad_norm": 29.880149841308594, |
|
"learning_rate": 8.881e-06, |
|
"loss": 0.9843, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 172.30769230769232, |
|
"eval_loss": 1.1574000120162964, |
|
"eval_runtime": 11.2634, |
|
"eval_samples_per_second": 11.631, |
|
"eval_steps_per_second": 1.509, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 173.84615384615384, |
|
"grad_norm": 63.53031539916992, |
|
"learning_rate": 8.871e-06, |
|
"loss": 1.0354, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 173.84615384615384, |
|
"eval_loss": 1.1575734615325928, |
|
"eval_runtime": 11.0265, |
|
"eval_samples_per_second": 11.88, |
|
"eval_steps_per_second": 1.542, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 175.3846153846154, |
|
"grad_norm": 26.937786102294922, |
|
"learning_rate": 8.861e-06, |
|
"loss": 0.9964, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 175.3846153846154, |
|
"eval_loss": 1.1552445888519287, |
|
"eval_runtime": 11.0325, |
|
"eval_samples_per_second": 11.874, |
|
"eval_steps_per_second": 1.541, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 176.92307692307693, |
|
"grad_norm": 87.28536987304688, |
|
"learning_rate": 8.851e-06, |
|
"loss": 0.9932, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 176.92307692307693, |
|
"eval_loss": 1.1411677598953247, |
|
"eval_runtime": 11.1527, |
|
"eval_samples_per_second": 11.746, |
|
"eval_steps_per_second": 1.524, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 178.46153846153845, |
|
"grad_norm": 25.903568267822266, |
|
"learning_rate": 8.841e-06, |
|
"loss": 0.9768, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 178.46153846153845, |
|
"eval_loss": 1.1635726690292358, |
|
"eval_runtime": 11.123, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.528, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"grad_norm": 24.315654754638672, |
|
"learning_rate": 8.831e-06, |
|
"loss": 0.9984, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_loss": 1.1710366010665894, |
|
"eval_runtime": 11.1375, |
|
"eval_samples_per_second": 11.762, |
|
"eval_steps_per_second": 1.526, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 181.53846153846155, |
|
"grad_norm": 60.17182540893555, |
|
"learning_rate": 8.821e-06, |
|
"loss": 0.9703, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 181.53846153846155, |
|
"eval_loss": 1.1556936502456665, |
|
"eval_runtime": 11.0387, |
|
"eval_samples_per_second": 11.867, |
|
"eval_steps_per_second": 1.54, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 183.07692307692307, |
|
"grad_norm": 32.373477935791016, |
|
"learning_rate": 8.811000000000001e-06, |
|
"loss": 0.9996, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 183.07692307692307, |
|
"eval_loss": 1.146790623664856, |
|
"eval_runtime": 11.1805, |
|
"eval_samples_per_second": 11.717, |
|
"eval_steps_per_second": 1.521, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 184.6153846153846, |
|
"grad_norm": 42.578575134277344, |
|
"learning_rate": 8.801000000000001e-06, |
|
"loss": 0.9795, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 184.6153846153846, |
|
"eval_loss": 1.144544005393982, |
|
"eval_runtime": 11.0704, |
|
"eval_samples_per_second": 11.833, |
|
"eval_steps_per_second": 1.536, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 186.15384615384616, |
|
"grad_norm": 22.79789161682129, |
|
"learning_rate": 8.791000000000001e-06, |
|
"loss": 0.9905, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 186.15384615384616, |
|
"eval_loss": 1.1581685543060303, |
|
"eval_runtime": 11.272, |
|
"eval_samples_per_second": 11.622, |
|
"eval_steps_per_second": 1.508, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 187.69230769230768, |
|
"grad_norm": 52.061012268066406, |
|
"learning_rate": 8.781200000000002e-06, |
|
"loss": 0.9817, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 187.69230769230768, |
|
"eval_loss": 1.159809947013855, |
|
"eval_runtime": 11.2021, |
|
"eval_samples_per_second": 11.694, |
|
"eval_steps_per_second": 1.518, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 189.23076923076923, |
|
"grad_norm": 35.95882034301758, |
|
"learning_rate": 8.7712e-06, |
|
"loss": 1.0071, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 189.23076923076923, |
|
"eval_loss": 1.1944890022277832, |
|
"eval_runtime": 10.9263, |
|
"eval_samples_per_second": 11.989, |
|
"eval_steps_per_second": 1.556, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 190.76923076923077, |
|
"grad_norm": 213.48587036132812, |
|
"learning_rate": 8.7612e-06, |
|
"loss": 0.9997, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 190.76923076923077, |
|
"eval_loss": 1.191455602645874, |
|
"eval_runtime": 10.917, |
|
"eval_samples_per_second": 12.0, |
|
"eval_steps_per_second": 1.557, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 192.30769230769232, |
|
"grad_norm": 19.97510528564453, |
|
"learning_rate": 8.7512e-06, |
|
"loss": 1.001, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 192.30769230769232, |
|
"eval_loss": 1.167776346206665, |
|
"eval_runtime": 11.033, |
|
"eval_samples_per_second": 11.873, |
|
"eval_steps_per_second": 1.541, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 193.84615384615384, |
|
"grad_norm": 30.815828323364258, |
|
"learning_rate": 8.7412e-06, |
|
"loss": 0.9719, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 193.84615384615384, |
|
"eval_loss": 1.150451898574829, |
|
"eval_runtime": 11.2471, |
|
"eval_samples_per_second": 11.647, |
|
"eval_steps_per_second": 1.512, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 195.3846153846154, |
|
"grad_norm": 40.32701110839844, |
|
"learning_rate": 8.7312e-06, |
|
"loss": 0.9658, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 195.3846153846154, |
|
"eval_loss": 1.1517494916915894, |
|
"eval_runtime": 11.1893, |
|
"eval_samples_per_second": 11.708, |
|
"eval_steps_per_second": 1.519, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 196.92307692307693, |
|
"grad_norm": 42.11077117919922, |
|
"learning_rate": 8.7212e-06, |
|
"loss": 0.9744, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 196.92307692307693, |
|
"eval_loss": 1.1507395505905151, |
|
"eval_runtime": 11.2196, |
|
"eval_samples_per_second": 11.676, |
|
"eval_steps_per_second": 1.515, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 198.46153846153845, |
|
"grad_norm": 20.991779327392578, |
|
"learning_rate": 8.7112e-06, |
|
"loss": 0.9695, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 198.46153846153845, |
|
"eval_loss": 1.1557880640029907, |
|
"eval_runtime": 11.2158, |
|
"eval_samples_per_second": 11.68, |
|
"eval_steps_per_second": 1.516, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 22.79688835144043, |
|
"learning_rate": 8.7012e-06, |
|
"loss": 0.9652, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 1.1554670333862305, |
|
"eval_runtime": 11.1246, |
|
"eval_samples_per_second": 11.776, |
|
"eval_steps_per_second": 1.528, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 201.53846153846155, |
|
"grad_norm": 180.34512329101562, |
|
"learning_rate": 8.6912e-06, |
|
"loss": 0.9664, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 201.53846153846155, |
|
"eval_loss": 1.1403967142105103, |
|
"eval_runtime": 10.9895, |
|
"eval_samples_per_second": 11.921, |
|
"eval_steps_per_second": 1.547, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 203.07692307692307, |
|
"grad_norm": 31.583358764648438, |
|
"learning_rate": 8.6812e-06, |
|
"loss": 0.9672, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 203.07692307692307, |
|
"eval_loss": 1.1569470167160034, |
|
"eval_runtime": 11.2695, |
|
"eval_samples_per_second": 11.624, |
|
"eval_steps_per_second": 1.508, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 204.6153846153846, |
|
"grad_norm": 34.05722427368164, |
|
"learning_rate": 8.671200000000001e-06, |
|
"loss": 0.9531, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 204.6153846153846, |
|
"eval_loss": 1.1408321857452393, |
|
"eval_runtime": 11.1074, |
|
"eval_samples_per_second": 11.794, |
|
"eval_steps_per_second": 1.531, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 206.15384615384616, |
|
"grad_norm": 26.748388290405273, |
|
"learning_rate": 8.661200000000001e-06, |
|
"loss": 0.9484, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 206.15384615384616, |
|
"eval_loss": 1.151693344116211, |
|
"eval_runtime": 10.9585, |
|
"eval_samples_per_second": 11.954, |
|
"eval_steps_per_second": 1.551, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 207.69230769230768, |
|
"grad_norm": 19.531770706176758, |
|
"learning_rate": 8.651200000000001e-06, |
|
"loss": 0.971, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 207.69230769230768, |
|
"eval_loss": 1.138724684715271, |
|
"eval_runtime": 11.0296, |
|
"eval_samples_per_second": 11.877, |
|
"eval_steps_per_second": 1.541, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 209.23076923076923, |
|
"grad_norm": 23.87537384033203, |
|
"learning_rate": 8.641200000000001e-06, |
|
"loss": 0.944, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 209.23076923076923, |
|
"eval_loss": 1.1402664184570312, |
|
"eval_runtime": 11.1505, |
|
"eval_samples_per_second": 11.748, |
|
"eval_steps_per_second": 1.525, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 210.76923076923077, |
|
"grad_norm": 25.069852828979492, |
|
"learning_rate": 8.631200000000001e-06, |
|
"loss": 0.9581, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 210.76923076923077, |
|
"eval_loss": 1.1348073482513428, |
|
"eval_runtime": 11.063, |
|
"eval_samples_per_second": 11.841, |
|
"eval_steps_per_second": 1.537, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 212.30769230769232, |
|
"grad_norm": 23.794719696044922, |
|
"learning_rate": 8.621200000000001e-06, |
|
"loss": 0.957, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 212.30769230769232, |
|
"eval_loss": 1.143198013305664, |
|
"eval_runtime": 11.1422, |
|
"eval_samples_per_second": 11.757, |
|
"eval_steps_per_second": 1.526, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 213.84615384615384, |
|
"grad_norm": 26.059829711914062, |
|
"learning_rate": 8.611200000000002e-06, |
|
"loss": 0.9554, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 213.84615384615384, |
|
"eval_loss": 1.1333541870117188, |
|
"eval_runtime": 10.948, |
|
"eval_samples_per_second": 11.966, |
|
"eval_steps_per_second": 1.553, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 215.3846153846154, |
|
"grad_norm": 49.8937873840332, |
|
"learning_rate": 8.6012e-06, |
|
"loss": 0.9607, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 215.3846153846154, |
|
"eval_loss": 1.1584446430206299, |
|
"eval_runtime": 11.2488, |
|
"eval_samples_per_second": 11.646, |
|
"eval_steps_per_second": 1.511, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 216.92307692307693, |
|
"grad_norm": 17.9267520904541, |
|
"learning_rate": 8.5912e-06, |
|
"loss": 0.9444, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 216.92307692307693, |
|
"eval_loss": 1.1573532819747925, |
|
"eval_runtime": 11.143, |
|
"eval_samples_per_second": 11.756, |
|
"eval_steps_per_second": 1.526, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 218.46153846153845, |
|
"grad_norm": 27.38156509399414, |
|
"learning_rate": 8.5812e-06, |
|
"loss": 0.928, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 218.46153846153845, |
|
"eval_loss": 1.1540145874023438, |
|
"eval_runtime": 11.0475, |
|
"eval_samples_per_second": 11.858, |
|
"eval_steps_per_second": 1.539, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"grad_norm": 42.785037994384766, |
|
"learning_rate": 8.5712e-06, |
|
"loss": 0.9548, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_loss": 1.1379021406173706, |
|
"eval_runtime": 10.9412, |
|
"eval_samples_per_second": 11.973, |
|
"eval_steps_per_second": 1.554, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 221.53846153846155, |
|
"grad_norm": 39.50480270385742, |
|
"learning_rate": 8.5612e-06, |
|
"loss": 0.9583, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 221.53846153846155, |
|
"eval_loss": 1.1666078567504883, |
|
"eval_runtime": 11.2066, |
|
"eval_samples_per_second": 11.69, |
|
"eval_steps_per_second": 1.517, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 223.07692307692307, |
|
"grad_norm": 15.560932159423828, |
|
"learning_rate": 8.5512e-06, |
|
"loss": 0.9306, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 223.07692307692307, |
|
"eval_loss": 1.151904582977295, |
|
"eval_runtime": 11.2376, |
|
"eval_samples_per_second": 11.657, |
|
"eval_steps_per_second": 1.513, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 224.6153846153846, |
|
"grad_norm": 36.12020492553711, |
|
"learning_rate": 8.541400000000001e-06, |
|
"loss": 0.9668, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 224.6153846153846, |
|
"eval_loss": 1.139450192451477, |
|
"eval_runtime": 11.1643, |
|
"eval_samples_per_second": 11.734, |
|
"eval_steps_per_second": 1.523, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 226.15384615384616, |
|
"grad_norm": 31.29511070251465, |
|
"learning_rate": 8.531400000000001e-06, |
|
"loss": 0.9646, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 226.15384615384616, |
|
"eval_loss": 1.1311490535736084, |
|
"eval_runtime": 11.0, |
|
"eval_samples_per_second": 11.909, |
|
"eval_steps_per_second": 1.545, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 227.69230769230768, |
|
"grad_norm": 22.08748435974121, |
|
"learning_rate": 8.521400000000001e-06, |
|
"loss": 0.922, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 227.69230769230768, |
|
"eval_loss": 1.1504499912261963, |
|
"eval_runtime": 11.135, |
|
"eval_samples_per_second": 11.765, |
|
"eval_steps_per_second": 1.527, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 229.23076923076923, |
|
"grad_norm": 26.33457374572754, |
|
"learning_rate": 8.511400000000001e-06, |
|
"loss": 0.9306, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 229.23076923076923, |
|
"eval_loss": 1.136217713356018, |
|
"eval_runtime": 11.1465, |
|
"eval_samples_per_second": 11.753, |
|
"eval_steps_per_second": 1.525, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 230.76923076923077, |
|
"grad_norm": 49.193206787109375, |
|
"learning_rate": 8.501400000000001e-06, |
|
"loss": 0.938, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 230.76923076923077, |
|
"eval_loss": 1.1409271955490112, |
|
"eval_runtime": 11.0858, |
|
"eval_samples_per_second": 11.817, |
|
"eval_steps_per_second": 1.533, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 232.30769230769232, |
|
"grad_norm": 22.36850357055664, |
|
"learning_rate": 8.491400000000001e-06, |
|
"loss": 0.9218, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 232.30769230769232, |
|
"eval_loss": 1.131103515625, |
|
"eval_runtime": 11.0691, |
|
"eval_samples_per_second": 11.835, |
|
"eval_steps_per_second": 1.536, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 233.84615384615384, |
|
"grad_norm": 26.34011459350586, |
|
"learning_rate": 8.481400000000002e-06, |
|
"loss": 0.9617, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 233.84615384615384, |
|
"eval_loss": 1.1415542364120483, |
|
"eval_runtime": 10.9887, |
|
"eval_samples_per_second": 11.921, |
|
"eval_steps_per_second": 1.547, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 235.3846153846154, |
|
"grad_norm": 29.583358764648438, |
|
"learning_rate": 8.4714e-06, |
|
"loss": 0.9272, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 235.3846153846154, |
|
"eval_loss": 1.144914150238037, |
|
"eval_runtime": 11.2646, |
|
"eval_samples_per_second": 11.629, |
|
"eval_steps_per_second": 1.509, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 236.92307692307693, |
|
"grad_norm": 31.824247360229492, |
|
"learning_rate": 8.4614e-06, |
|
"loss": 0.9207, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 236.92307692307693, |
|
"eval_loss": 1.1387474536895752, |
|
"eval_runtime": 11.0721, |
|
"eval_samples_per_second": 11.832, |
|
"eval_steps_per_second": 1.535, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 238.46153846153845, |
|
"grad_norm": 41.94277572631836, |
|
"learning_rate": 8.4514e-06, |
|
"loss": 0.9454, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 238.46153846153845, |
|
"eval_loss": 1.1316168308258057, |
|
"eval_runtime": 11.1831, |
|
"eval_samples_per_second": 11.714, |
|
"eval_steps_per_second": 1.52, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"grad_norm": 21.150598526000977, |
|
"learning_rate": 8.4414e-06, |
|
"loss": 0.9249, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_loss": 1.1368097066879272, |
|
"eval_runtime": 11.0887, |
|
"eval_samples_per_second": 11.814, |
|
"eval_steps_per_second": 1.533, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 241.53846153846155, |
|
"grad_norm": 47.432212829589844, |
|
"learning_rate": 8.4314e-06, |
|
"loss": 0.9212, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 241.53846153846155, |
|
"eval_loss": 1.125348448753357, |
|
"eval_runtime": 11.2434, |
|
"eval_samples_per_second": 11.651, |
|
"eval_steps_per_second": 1.512, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 243.07692307692307, |
|
"grad_norm": 28.406036376953125, |
|
"learning_rate": 8.4214e-06, |
|
"loss": 0.9272, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 243.07692307692307, |
|
"eval_loss": 1.1328097581863403, |
|
"eval_runtime": 11.1097, |
|
"eval_samples_per_second": 11.791, |
|
"eval_steps_per_second": 1.53, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 244.6153846153846, |
|
"grad_norm": 53.369564056396484, |
|
"learning_rate": 8.4114e-06, |
|
"loss": 0.9174, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 244.6153846153846, |
|
"eval_loss": 1.1235861778259277, |
|
"eval_runtime": 10.9581, |
|
"eval_samples_per_second": 11.955, |
|
"eval_steps_per_second": 1.551, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 246.15384615384616, |
|
"grad_norm": 31.435935974121094, |
|
"learning_rate": 8.4014e-06, |
|
"loss": 0.9041, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 246.15384615384616, |
|
"eval_loss": 1.1266686916351318, |
|
"eval_runtime": 11.1276, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.528, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 247.69230769230768, |
|
"grad_norm": 32.799991607666016, |
|
"learning_rate": 8.3914e-06, |
|
"loss": 0.9062, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 247.69230769230768, |
|
"eval_loss": 1.1481693983078003, |
|
"eval_runtime": 11.0435, |
|
"eval_samples_per_second": 11.862, |
|
"eval_steps_per_second": 1.539, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 249.23076923076923, |
|
"grad_norm": 36.49935531616211, |
|
"learning_rate": 8.3814e-06, |
|
"loss": 0.9163, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 249.23076923076923, |
|
"eval_loss": 1.139769434928894, |
|
"eval_runtime": 11.1922, |
|
"eval_samples_per_second": 11.705, |
|
"eval_steps_per_second": 1.519, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 250.76923076923077, |
|
"grad_norm": 35.4781379699707, |
|
"learning_rate": 8.371400000000001e-06, |
|
"loss": 0.9219, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 250.76923076923077, |
|
"eval_loss": 1.1498539447784424, |
|
"eval_runtime": 11.1805, |
|
"eval_samples_per_second": 11.717, |
|
"eval_steps_per_second": 1.52, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 252.30769230769232, |
|
"grad_norm": 17.645612716674805, |
|
"learning_rate": 8.361400000000001e-06, |
|
"loss": 0.9278, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 252.30769230769232, |
|
"eval_loss": 1.1338902711868286, |
|
"eval_runtime": 11.1146, |
|
"eval_samples_per_second": 11.786, |
|
"eval_steps_per_second": 1.53, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 253.84615384615384, |
|
"grad_norm": 32.81660079956055, |
|
"learning_rate": 8.351400000000001e-06, |
|
"loss": 0.9108, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 253.84615384615384, |
|
"eval_loss": 1.1279574632644653, |
|
"eval_runtime": 11.1151, |
|
"eval_samples_per_second": 11.786, |
|
"eval_steps_per_second": 1.529, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 255.3846153846154, |
|
"grad_norm": 22.3878116607666, |
|
"learning_rate": 8.341400000000001e-06, |
|
"loss": 0.9011, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 255.3846153846154, |
|
"eval_loss": 1.1570419073104858, |
|
"eval_runtime": 11.0784, |
|
"eval_samples_per_second": 11.825, |
|
"eval_steps_per_second": 1.535, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 256.9230769230769, |
|
"grad_norm": 29.845205307006836, |
|
"learning_rate": 8.331400000000001e-06, |
|
"loss": 0.9314, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 256.9230769230769, |
|
"eval_loss": 1.1365561485290527, |
|
"eval_runtime": 11.2405, |
|
"eval_samples_per_second": 11.654, |
|
"eval_steps_per_second": 1.512, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 258.46153846153845, |
|
"grad_norm": 21.02674102783203, |
|
"learning_rate": 8.321400000000001e-06, |
|
"loss": 0.9021, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 258.46153846153845, |
|
"eval_loss": 1.164974570274353, |
|
"eval_runtime": 11.0643, |
|
"eval_samples_per_second": 11.84, |
|
"eval_steps_per_second": 1.536, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"grad_norm": 22.380117416381836, |
|
"learning_rate": 8.3114e-06, |
|
"loss": 0.912, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_loss": 1.1483317613601685, |
|
"eval_runtime": 11.1852, |
|
"eval_samples_per_second": 11.712, |
|
"eval_steps_per_second": 1.52, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 261.53846153846155, |
|
"grad_norm": 39.20146560668945, |
|
"learning_rate": 8.3014e-06, |
|
"loss": 0.9165, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 261.53846153846155, |
|
"eval_loss": 1.159449577331543, |
|
"eval_runtime": 11.4058, |
|
"eval_samples_per_second": 11.485, |
|
"eval_steps_per_second": 1.49, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 263.0769230769231, |
|
"grad_norm": 46.305389404296875, |
|
"learning_rate": 8.2914e-06, |
|
"loss": 0.916, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 263.0769230769231, |
|
"eval_loss": 1.146033525466919, |
|
"eval_runtime": 11.3638, |
|
"eval_samples_per_second": 11.528, |
|
"eval_steps_per_second": 1.496, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 264.61538461538464, |
|
"grad_norm": 33.07489776611328, |
|
"learning_rate": 8.2814e-06, |
|
"loss": 0.9147, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 264.61538461538464, |
|
"eval_loss": 1.143062710762024, |
|
"eval_runtime": 11.3544, |
|
"eval_samples_per_second": 11.537, |
|
"eval_steps_per_second": 1.497, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 266.15384615384613, |
|
"grad_norm": 35.233131408691406, |
|
"learning_rate": 8.2714e-06, |
|
"loss": 0.9151, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 266.15384615384613, |
|
"eval_loss": 1.126172661781311, |
|
"eval_runtime": 11.1185, |
|
"eval_samples_per_second": 11.782, |
|
"eval_steps_per_second": 1.529, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 267.6923076923077, |
|
"grad_norm": 32.72975540161133, |
|
"learning_rate": 8.2614e-06, |
|
"loss": 0.8881, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 267.6923076923077, |
|
"eval_loss": 1.1455607414245605, |
|
"eval_runtime": 11.0568, |
|
"eval_samples_per_second": 11.848, |
|
"eval_steps_per_second": 1.538, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 269.2307692307692, |
|
"grad_norm": 16.41983985900879, |
|
"learning_rate": 8.2514e-06, |
|
"loss": 0.9027, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 269.2307692307692, |
|
"eval_loss": 1.1283539533615112, |
|
"eval_runtime": 11.3233, |
|
"eval_samples_per_second": 11.569, |
|
"eval_steps_per_second": 1.501, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 270.7692307692308, |
|
"grad_norm": 20.32726287841797, |
|
"learning_rate": 8.2414e-06, |
|
"loss": 0.9391, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 270.7692307692308, |
|
"eval_loss": 1.124210000038147, |
|
"eval_runtime": 11.3345, |
|
"eval_samples_per_second": 11.558, |
|
"eval_steps_per_second": 1.5, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 272.3076923076923, |
|
"grad_norm": 23.14797019958496, |
|
"learning_rate": 8.2314e-06, |
|
"loss": 0.8899, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 272.3076923076923, |
|
"eval_loss": 1.1297597885131836, |
|
"eval_runtime": 11.1272, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.528, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 273.84615384615387, |
|
"grad_norm": 18.778406143188477, |
|
"learning_rate": 8.2214e-06, |
|
"loss": 0.9074, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 273.84615384615387, |
|
"eval_loss": 1.135562777519226, |
|
"eval_runtime": 11.2964, |
|
"eval_samples_per_second": 11.597, |
|
"eval_steps_per_second": 1.505, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 275.38461538461536, |
|
"grad_norm": 27.574323654174805, |
|
"learning_rate": 8.2114e-06, |
|
"loss": 0.8931, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 275.38461538461536, |
|
"eval_loss": 1.1423242092132568, |
|
"eval_runtime": 11.0992, |
|
"eval_samples_per_second": 11.803, |
|
"eval_steps_per_second": 1.532, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 276.9230769230769, |
|
"grad_norm": 26.559467315673828, |
|
"learning_rate": 8.2014e-06, |
|
"loss": 0.8913, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 276.9230769230769, |
|
"eval_loss": 1.1252741813659668, |
|
"eval_runtime": 11.2765, |
|
"eval_samples_per_second": 11.617, |
|
"eval_steps_per_second": 1.508, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 278.46153846153845, |
|
"grad_norm": 24.442596435546875, |
|
"learning_rate": 8.191400000000001e-06, |
|
"loss": 0.8993, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 278.46153846153845, |
|
"eval_loss": 1.1197646856307983, |
|
"eval_runtime": 11.0479, |
|
"eval_samples_per_second": 11.857, |
|
"eval_steps_per_second": 1.539, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"grad_norm": 42.99458694458008, |
|
"learning_rate": 8.181400000000001e-06, |
|
"loss": 0.8925, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_loss": 1.129381775856018, |
|
"eval_runtime": 11.1979, |
|
"eval_samples_per_second": 11.699, |
|
"eval_steps_per_second": 1.518, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 281.53846153846155, |
|
"grad_norm": 38.08549118041992, |
|
"learning_rate": 8.171400000000001e-06, |
|
"loss": 0.8699, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 281.53846153846155, |
|
"eval_loss": 1.1298307180404663, |
|
"eval_runtime": 11.1097, |
|
"eval_samples_per_second": 11.792, |
|
"eval_steps_per_second": 1.53, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 283.0769230769231, |
|
"grad_norm": 49.90501022338867, |
|
"learning_rate": 8.161400000000001e-06, |
|
"loss": 0.9207, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 283.0769230769231, |
|
"eval_loss": 1.1229758262634277, |
|
"eval_runtime": 11.284, |
|
"eval_samples_per_second": 11.609, |
|
"eval_steps_per_second": 1.507, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 284.61538461538464, |
|
"grad_norm": 37.63615036010742, |
|
"learning_rate": 8.1514e-06, |
|
"loss": 0.9061, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 284.61538461538464, |
|
"eval_loss": 1.1395354270935059, |
|
"eval_runtime": 11.2087, |
|
"eval_samples_per_second": 11.687, |
|
"eval_steps_per_second": 1.517, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 286.15384615384613, |
|
"grad_norm": 17.88991355895996, |
|
"learning_rate": 8.1414e-06, |
|
"loss": 0.8664, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 286.15384615384613, |
|
"eval_loss": 1.1339645385742188, |
|
"eval_runtime": 11.317, |
|
"eval_samples_per_second": 11.576, |
|
"eval_steps_per_second": 1.502, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 287.6923076923077, |
|
"grad_norm": 33.13370895385742, |
|
"learning_rate": 8.1314e-06, |
|
"loss": 0.8759, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 287.6923076923077, |
|
"eval_loss": 1.1445599794387817, |
|
"eval_runtime": 11.0472, |
|
"eval_samples_per_second": 11.858, |
|
"eval_steps_per_second": 1.539, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 289.2307692307692, |
|
"grad_norm": 22.776575088500977, |
|
"learning_rate": 8.1214e-06, |
|
"loss": 0.8889, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 289.2307692307692, |
|
"eval_loss": 1.1401453018188477, |
|
"eval_runtime": 11.2523, |
|
"eval_samples_per_second": 11.642, |
|
"eval_steps_per_second": 1.511, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 290.7692307692308, |
|
"grad_norm": 19.893653869628906, |
|
"learning_rate": 8.1114e-06, |
|
"loss": 0.8945, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 290.7692307692308, |
|
"eval_loss": 1.1185678243637085, |
|
"eval_runtime": 10.9814, |
|
"eval_samples_per_second": 11.929, |
|
"eval_steps_per_second": 1.548, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 292.3076923076923, |
|
"grad_norm": 35.09921646118164, |
|
"learning_rate": 8.1015e-06, |
|
"loss": 0.8821, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 292.3076923076923, |
|
"eval_loss": 1.1313185691833496, |
|
"eval_runtime": 11.2698, |
|
"eval_samples_per_second": 11.624, |
|
"eval_steps_per_second": 1.508, |
|
"step": 19000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 100000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1539, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.280203343597568e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|