|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.1111111111111111, |
|
"eval_loss": 1.3930704593658447, |
|
"eval_runtime": 0.7497, |
|
"eval_samples_per_second": 24.01, |
|
"eval_steps_per_second": 4.002, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.16666666666666666, |
|
"eval_loss": 1.376913070678711, |
|
"eval_runtime": 0.5755, |
|
"eval_samples_per_second": 31.277, |
|
"eval_steps_per_second": 5.213, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5555555555555556, |
|
"eval_loss": 1.3498488664627075, |
|
"eval_runtime": 0.5792, |
|
"eval_samples_per_second": 31.078, |
|
"eval_steps_per_second": 5.18, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.3331588506698608, |
|
"eval_runtime": 0.5535, |
|
"eval_samples_per_second": 32.518, |
|
"eval_steps_per_second": 5.42, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.3144757747650146, |
|
"eval_runtime": 0.5818, |
|
"eval_samples_per_second": 30.94, |
|
"eval_steps_per_second": 5.157, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.300213098526001, |
|
"eval_runtime": 0.5539, |
|
"eval_samples_per_second": 32.496, |
|
"eval_steps_per_second": 5.416, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.2916626930236816, |
|
"eval_runtime": 0.5489, |
|
"eval_samples_per_second": 32.791, |
|
"eval_steps_per_second": 5.465, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.270052194595337, |
|
"eval_runtime": 0.5746, |
|
"eval_samples_per_second": 31.328, |
|
"eval_steps_per_second": 5.221, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.2555011510849, |
|
"eval_runtime": 0.5489, |
|
"eval_samples_per_second": 32.791, |
|
"eval_steps_per_second": 5.465, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.2477619647979736, |
|
"eval_runtime": 0.5687, |
|
"eval_samples_per_second": 31.651, |
|
"eval_steps_per_second": 5.275, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.2382668256759644, |
|
"eval_runtime": 0.5502, |
|
"eval_samples_per_second": 32.716, |
|
"eval_steps_per_second": 5.453, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.223071575164795, |
|
"eval_runtime": 0.5476, |
|
"eval_samples_per_second": 32.873, |
|
"eval_steps_per_second": 5.479, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.2033451795578003, |
|
"eval_runtime": 0.5538, |
|
"eval_samples_per_second": 32.503, |
|
"eval_steps_per_second": 5.417, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.186124563217163, |
|
"eval_runtime": 0.5442, |
|
"eval_samples_per_second": 33.075, |
|
"eval_steps_per_second": 5.512, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.182234287261963, |
|
"eval_runtime": 0.5735, |
|
"eval_samples_per_second": 31.387, |
|
"eval_steps_per_second": 5.231, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.1588956117630005, |
|
"eval_runtime": 0.5533, |
|
"eval_samples_per_second": 32.534, |
|
"eval_steps_per_second": 5.422, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.1478044986724854, |
|
"eval_runtime": 0.5826, |
|
"eval_samples_per_second": 30.897, |
|
"eval_steps_per_second": 5.15, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.1322474479675293, |
|
"eval_runtime": 0.5883, |
|
"eval_samples_per_second": 30.598, |
|
"eval_steps_per_second": 5.1, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.110813021659851, |
|
"eval_runtime": 0.5594, |
|
"eval_samples_per_second": 32.177, |
|
"eval_steps_per_second": 5.363, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.1011286973953247, |
|
"eval_runtime": 0.5793, |
|
"eval_samples_per_second": 31.073, |
|
"eval_steps_per_second": 5.179, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.093163251876831, |
|
"eval_runtime": 0.5654, |
|
"eval_samples_per_second": 31.836, |
|
"eval_steps_per_second": 5.306, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.0637242794036865, |
|
"eval_runtime": 0.5679, |
|
"eval_samples_per_second": 31.695, |
|
"eval_steps_per_second": 5.283, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.0390856266021729, |
|
"eval_runtime": 0.574, |
|
"eval_samples_per_second": 31.358, |
|
"eval_steps_per_second": 5.226, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.0522559881210327, |
|
"eval_runtime": 0.6203, |
|
"eval_samples_per_second": 29.02, |
|
"eval_steps_per_second": 4.837, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.0473227500915527, |
|
"eval_runtime": 0.5963, |
|
"eval_samples_per_second": 30.186, |
|
"eval_steps_per_second": 5.031, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 0.9998855590820312, |
|
"eval_runtime": 0.6467, |
|
"eval_samples_per_second": 27.834, |
|
"eval_steps_per_second": 4.639, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.0170878171920776, |
|
"eval_runtime": 0.5994, |
|
"eval_samples_per_second": 30.032, |
|
"eval_steps_per_second": 5.005, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.028573989868164, |
|
"eval_runtime": 0.6332, |
|
"eval_samples_per_second": 28.427, |
|
"eval_steps_per_second": 4.738, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7222222222222222, |
|
"eval_loss": 1.0290330648422241, |
|
"eval_runtime": 0.5676, |
|
"eval_samples_per_second": 31.712, |
|
"eval_steps_per_second": 5.285, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.9571393132209778, |
|
"eval_runtime": 0.5753, |
|
"eval_samples_per_second": 31.286, |
|
"eval_steps_per_second": 5.214, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.9450912475585938, |
|
"eval_runtime": 0.5556, |
|
"eval_samples_per_second": 32.4, |
|
"eval_steps_per_second": 5.4, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.85059654712677, |
|
"eval_runtime": 0.5543, |
|
"eval_samples_per_second": 32.473, |
|
"eval_steps_per_second": 5.412, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.8480208516120911, |
|
"eval_runtime": 0.5762, |
|
"eval_samples_per_second": 31.241, |
|
"eval_steps_per_second": 5.207, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.8499312400817871, |
|
"eval_runtime": 0.555, |
|
"eval_samples_per_second": 32.433, |
|
"eval_steps_per_second": 5.405, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.8403282165527344, |
|
"eval_runtime": 0.5709, |
|
"eval_samples_per_second": 31.532, |
|
"eval_steps_per_second": 5.255, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7771400809288025, |
|
"eval_runtime": 0.5569, |
|
"eval_samples_per_second": 32.319, |
|
"eval_steps_per_second": 5.387, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7591123580932617, |
|
"eval_runtime": 0.6487, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 4.625, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7476389408111572, |
|
"eval_runtime": 0.5654, |
|
"eval_samples_per_second": 31.835, |
|
"eval_steps_per_second": 5.306, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7831047773361206, |
|
"eval_runtime": 0.5458, |
|
"eval_samples_per_second": 32.977, |
|
"eval_steps_per_second": 5.496, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7049207091331482, |
|
"eval_runtime": 0.5872, |
|
"eval_samples_per_second": 30.653, |
|
"eval_steps_per_second": 5.109, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.6811972856521606, |
|
"eval_runtime": 0.5667, |
|
"eval_samples_per_second": 31.762, |
|
"eval_steps_per_second": 5.294, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.6736953258514404, |
|
"eval_runtime": 0.5538, |
|
"eval_samples_per_second": 32.502, |
|
"eval_steps_per_second": 5.417, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.6515324711799622, |
|
"eval_runtime": 0.5632, |
|
"eval_samples_per_second": 31.958, |
|
"eval_steps_per_second": 5.326, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.6634184122085571, |
|
"eval_runtime": 1.1806, |
|
"eval_samples_per_second": 15.246, |
|
"eval_steps_per_second": 2.541, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.6234365105628967, |
|
"eval_runtime": 0.6418, |
|
"eval_samples_per_second": 28.044, |
|
"eval_steps_per_second": 4.674, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.8482791185379028, |
|
"eval_runtime": 0.5859, |
|
"eval_samples_per_second": 30.723, |
|
"eval_steps_per_second": 5.121, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7264916896820068, |
|
"eval_runtime": 0.5835, |
|
"eval_samples_per_second": 30.847, |
|
"eval_steps_per_second": 5.141, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.7383356094360352, |
|
"eval_runtime": 0.5549, |
|
"eval_samples_per_second": 32.437, |
|
"eval_steps_per_second": 5.406, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7005217671394348, |
|
"eval_runtime": 0.5608, |
|
"eval_samples_per_second": 32.095, |
|
"eval_steps_per_second": 5.349, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5740242004394531, |
|
"eval_runtime": 0.5536, |
|
"eval_samples_per_second": 32.517, |
|
"eval_steps_per_second": 5.42, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5622536540031433, |
|
"eval_runtime": 0.5935, |
|
"eval_samples_per_second": 30.329, |
|
"eval_steps_per_second": 5.055, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.557184636592865, |
|
"eval_runtime": 0.5545, |
|
"eval_samples_per_second": 32.463, |
|
"eval_steps_per_second": 5.41, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5769361853599548, |
|
"eval_runtime": 0.5844, |
|
"eval_samples_per_second": 30.802, |
|
"eval_steps_per_second": 5.134, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.550247311592102, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 29.266, |
|
"eval_steps_per_second": 4.878, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.6281833052635193, |
|
"eval_runtime": 0.5457, |
|
"eval_samples_per_second": 32.988, |
|
"eval_steps_per_second": 5.498, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.1157, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5906974077224731, |
|
"eval_runtime": 0.5821, |
|
"eval_samples_per_second": 30.92, |
|
"eval_steps_per_second": 5.153, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.6346855163574219, |
|
"eval_runtime": 0.5542, |
|
"eval_samples_per_second": 32.481, |
|
"eval_steps_per_second": 5.414, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.6413730382919312, |
|
"eval_runtime": 0.5829, |
|
"eval_samples_per_second": 30.882, |
|
"eval_steps_per_second": 5.147, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.465614378452301, |
|
"eval_runtime": 0.5965, |
|
"eval_samples_per_second": 30.178, |
|
"eval_steps_per_second": 5.03, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.48488152027130127, |
|
"eval_runtime": 0.5621, |
|
"eval_samples_per_second": 32.02, |
|
"eval_steps_per_second": 5.337, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.8426976799964905, |
|
"eval_runtime": 0.5798, |
|
"eval_samples_per_second": 31.045, |
|
"eval_steps_per_second": 5.174, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5708574652671814, |
|
"eval_runtime": 0.5531, |
|
"eval_samples_per_second": 32.544, |
|
"eval_steps_per_second": 5.424, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5026788115501404, |
|
"eval_runtime": 0.5575, |
|
"eval_samples_per_second": 32.289, |
|
"eval_steps_per_second": 5.381, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5724208354949951, |
|
"eval_runtime": 0.5481, |
|
"eval_samples_per_second": 32.844, |
|
"eval_steps_per_second": 5.474, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5301716327667236, |
|
"eval_runtime": 0.553, |
|
"eval_samples_per_second": 32.551, |
|
"eval_steps_per_second": 5.425, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5272272825241089, |
|
"eval_runtime": 0.5566, |
|
"eval_samples_per_second": 32.34, |
|
"eval_steps_per_second": 5.39, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5444329380989075, |
|
"eval_runtime": 0.555, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 5.406, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.3936518132686615, |
|
"eval_runtime": 0.6001, |
|
"eval_samples_per_second": 29.994, |
|
"eval_steps_per_second": 4.999, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.41802236437797546, |
|
"eval_runtime": 0.5658, |
|
"eval_samples_per_second": 31.816, |
|
"eval_steps_per_second": 5.303, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.5185115337371826, |
|
"eval_runtime": 0.5619, |
|
"eval_samples_per_second": 32.036, |
|
"eval_steps_per_second": 5.339, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.39606520533561707, |
|
"eval_runtime": 0.5983, |
|
"eval_samples_per_second": 30.085, |
|
"eval_steps_per_second": 5.014, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.3859682083129883, |
|
"eval_runtime": 0.5757, |
|
"eval_samples_per_second": 31.268, |
|
"eval_steps_per_second": 5.211, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.39656686782836914, |
|
"eval_runtime": 0.6158, |
|
"eval_samples_per_second": 29.228, |
|
"eval_steps_per_second": 4.871, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.39676183462142944, |
|
"eval_runtime": 0.6324, |
|
"eval_samples_per_second": 28.462, |
|
"eval_steps_per_second": 4.744, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.4546321630477905, |
|
"eval_runtime": 0.5605, |
|
"eval_samples_per_second": 32.114, |
|
"eval_steps_per_second": 5.352, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.4021334648132324, |
|
"eval_runtime": 0.553, |
|
"eval_samples_per_second": 32.55, |
|
"eval_steps_per_second": 5.425, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.414422869682312, |
|
"eval_runtime": 0.5759, |
|
"eval_samples_per_second": 31.256, |
|
"eval_steps_per_second": 5.209, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.35500773787498474, |
|
"eval_runtime": 0.5802, |
|
"eval_samples_per_second": 31.024, |
|
"eval_steps_per_second": 5.171, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.3838707506656647, |
|
"eval_runtime": 0.5616, |
|
"eval_samples_per_second": 32.052, |
|
"eval_steps_per_second": 5.342, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.437086820602417, |
|
"eval_runtime": 0.554, |
|
"eval_samples_per_second": 32.49, |
|
"eval_steps_per_second": 5.415, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.36943355202674866, |
|
"eval_runtime": 0.5894, |
|
"eval_samples_per_second": 30.537, |
|
"eval_steps_per_second": 5.09, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.36648380756378174, |
|
"eval_runtime": 0.5615, |
|
"eval_samples_per_second": 32.058, |
|
"eval_steps_per_second": 5.343, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.4732191264629364, |
|
"eval_runtime": 0.5632, |
|
"eval_samples_per_second": 31.959, |
|
"eval_steps_per_second": 5.327, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.4652000665664673, |
|
"eval_runtime": 0.592, |
|
"eval_samples_per_second": 30.406, |
|
"eval_steps_per_second": 5.068, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.670432448387146, |
|
"eval_runtime": 0.5801, |
|
"eval_samples_per_second": 31.028, |
|
"eval_steps_per_second": 5.171, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7354382872581482, |
|
"eval_runtime": 0.5599, |
|
"eval_samples_per_second": 32.151, |
|
"eval_steps_per_second": 5.359, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5439589023590088, |
|
"eval_runtime": 0.5486, |
|
"eval_samples_per_second": 32.811, |
|
"eval_steps_per_second": 5.468, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.4809061586856842, |
|
"eval_runtime": 0.6101, |
|
"eval_samples_per_second": 29.505, |
|
"eval_steps_per_second": 4.917, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.6704312562942505, |
|
"eval_runtime": 0.5954, |
|
"eval_samples_per_second": 30.229, |
|
"eval_steps_per_second": 5.038, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.41327810287475586, |
|
"eval_runtime": 0.5783, |
|
"eval_samples_per_second": 31.124, |
|
"eval_steps_per_second": 5.187, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.350969523191452, |
|
"eval_runtime": 0.5507, |
|
"eval_samples_per_second": 32.686, |
|
"eval_steps_per_second": 5.448, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.39819759130477905, |
|
"eval_runtime": 0.5465, |
|
"eval_samples_per_second": 32.939, |
|
"eval_steps_per_second": 5.49, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.34932640194892883, |
|
"eval_runtime": 0.5635, |
|
"eval_samples_per_second": 31.941, |
|
"eval_steps_per_second": 5.323, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.48363664746284485, |
|
"eval_runtime": 0.5411, |
|
"eval_samples_per_second": 33.263, |
|
"eval_steps_per_second": 5.544, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.4434005320072174, |
|
"eval_runtime": 0.5964, |
|
"eval_samples_per_second": 30.181, |
|
"eval_steps_per_second": 5.03, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.4290742874145508, |
|
"eval_runtime": 0.5584, |
|
"eval_samples_per_second": 32.235, |
|
"eval_steps_per_second": 5.373, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.34131091833114624, |
|
"eval_runtime": 0.5715, |
|
"eval_samples_per_second": 31.493, |
|
"eval_steps_per_second": 5.249, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.3645610511302948, |
|
"eval_runtime": 0.5506, |
|
"eval_samples_per_second": 32.692, |
|
"eval_steps_per_second": 5.449, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.5591509938240051, |
|
"eval_runtime": 0.5908, |
|
"eval_samples_per_second": 30.468, |
|
"eval_steps_per_second": 5.078, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_loss": 0.48491573333740234, |
|
"eval_runtime": 0.5689, |
|
"eval_samples_per_second": 31.643, |
|
"eval_steps_per_second": 5.274, |
|
"step": 900 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 900, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.507976427331584e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|