|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 3100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 38.730377197265625, |
|
"eval_runtime": 2.8841, |
|
"eval_samples_per_second": 41.955, |
|
"eval_steps_per_second": 5.548, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 37.18061828613281, |
|
"eval_runtime": 2.9537, |
|
"eval_samples_per_second": 40.966, |
|
"eval_steps_per_second": 5.417, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 33.34883499145508, |
|
"eval_runtime": 3.0329, |
|
"eval_samples_per_second": 39.895, |
|
"eval_steps_per_second": 5.275, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 28.77151870727539, |
|
"eval_runtime": 3.038, |
|
"eval_samples_per_second": 39.829, |
|
"eval_steps_per_second": 5.267, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 24.89649200439453, |
|
"eval_runtime": 3.1068, |
|
"eval_samples_per_second": 38.947, |
|
"eval_steps_per_second": 5.15, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 21.40663719177246, |
|
"eval_runtime": 3.1605, |
|
"eval_samples_per_second": 38.285, |
|
"eval_steps_per_second": 5.062, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 19.166534423828125, |
|
"eval_runtime": 3.191, |
|
"eval_samples_per_second": 37.919, |
|
"eval_steps_per_second": 5.014, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 17.308666229248047, |
|
"eval_runtime": 3.1957, |
|
"eval_samples_per_second": 37.863, |
|
"eval_steps_per_second": 5.007, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.6774193548387096e-07, |
|
"loss": 29.5022, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 15.532267570495605, |
|
"eval_runtime": 3.0171, |
|
"eval_samples_per_second": 40.104, |
|
"eval_steps_per_second": 5.303, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 14.449665069580078, |
|
"eval_runtime": 3.0595, |
|
"eval_samples_per_second": 39.549, |
|
"eval_steps_per_second": 5.23, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 14.162155151367188, |
|
"eval_runtime": 3.1079, |
|
"eval_samples_per_second": 38.933, |
|
"eval_steps_per_second": 5.148, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 13.751199722290039, |
|
"eval_runtime": 3.1538, |
|
"eval_samples_per_second": 38.366, |
|
"eval_steps_per_second": 5.073, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 13.394129753112793, |
|
"eval_runtime": 3.1936, |
|
"eval_samples_per_second": 37.888, |
|
"eval_steps_per_second": 5.01, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 13.021451950073242, |
|
"eval_runtime": 3.2109, |
|
"eval_samples_per_second": 37.684, |
|
"eval_steps_per_second": 4.983, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 12.83625602722168, |
|
"eval_runtime": 3.2002, |
|
"eval_samples_per_second": 37.81, |
|
"eval_steps_per_second": 5.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 12.659578323364258, |
|
"eval_runtime": 3.1987, |
|
"eval_samples_per_second": 37.828, |
|
"eval_steps_per_second": 5.002, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.3548387096774192e-07, |
|
"loss": 13.2409, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 12.591437339782715, |
|
"eval_runtime": 3.1748, |
|
"eval_samples_per_second": 38.113, |
|
"eval_steps_per_second": 5.04, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 12.289512634277344, |
|
"eval_runtime": 3.1814, |
|
"eval_samples_per_second": 38.033, |
|
"eval_steps_per_second": 5.029, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 12.269988059997559, |
|
"eval_runtime": 3.2171, |
|
"eval_samples_per_second": 37.611, |
|
"eval_steps_per_second": 4.973, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 12.142666816711426, |
|
"eval_runtime": 3.202, |
|
"eval_samples_per_second": 37.789, |
|
"eval_steps_per_second": 4.997, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 12.13444995880127, |
|
"eval_runtime": 3.1974, |
|
"eval_samples_per_second": 37.843, |
|
"eval_steps_per_second": 5.004, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 12.062304496765137, |
|
"eval_runtime": 3.1989, |
|
"eval_samples_per_second": 37.825, |
|
"eval_steps_per_second": 5.002, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 12.063033103942871, |
|
"eval_runtime": 3.2187, |
|
"eval_samples_per_second": 37.593, |
|
"eval_steps_per_second": 4.971, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 12.098295211791992, |
|
"eval_runtime": 3.1987, |
|
"eval_samples_per_second": 37.827, |
|
"eval_steps_per_second": 5.002, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 1.032258064516129e-07, |
|
"loss": 11.0242, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 11.790215492248535, |
|
"eval_runtime": 3.1841, |
|
"eval_samples_per_second": 38.002, |
|
"eval_steps_per_second": 5.025, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 11.862567901611328, |
|
"eval_runtime": 3.1927, |
|
"eval_samples_per_second": 37.899, |
|
"eval_steps_per_second": 5.011, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 11.915380477905273, |
|
"eval_runtime": 3.2213, |
|
"eval_samples_per_second": 37.562, |
|
"eval_steps_per_second": 4.967, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 11.648334503173828, |
|
"eval_runtime": 3.1958, |
|
"eval_samples_per_second": 37.862, |
|
"eval_steps_per_second": 5.007, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 11.861973762512207, |
|
"eval_runtime": 3.1992, |
|
"eval_samples_per_second": 37.822, |
|
"eval_steps_per_second": 5.001, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 11.598742485046387, |
|
"eval_runtime": 3.2, |
|
"eval_samples_per_second": 37.812, |
|
"eval_steps_per_second": 5.0, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 11.763324737548828, |
|
"eval_runtime": 3.1997, |
|
"eval_samples_per_second": 37.817, |
|
"eval_steps_per_second": 5.001, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 11.699971199035645, |
|
"eval_runtime": 3.1883, |
|
"eval_samples_per_second": 37.951, |
|
"eval_steps_per_second": 5.018, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 7.096774193548388e-08, |
|
"loss": 10.5931, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 11.6630859375, |
|
"eval_runtime": 3.1817, |
|
"eval_samples_per_second": 38.03, |
|
"eval_steps_per_second": 5.029, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 11.431499481201172, |
|
"eval_runtime": 3.1876, |
|
"eval_samples_per_second": 37.96, |
|
"eval_steps_per_second": 5.019, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 11.56187915802002, |
|
"eval_runtime": 3.1936, |
|
"eval_samples_per_second": 37.889, |
|
"eval_steps_per_second": 5.01, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 11.59299087524414, |
|
"eval_runtime": 3.1844, |
|
"eval_samples_per_second": 37.998, |
|
"eval_steps_per_second": 5.025, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 11.55374526977539, |
|
"eval_runtime": 3.2023, |
|
"eval_samples_per_second": 37.785, |
|
"eval_steps_per_second": 4.996, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 11.67027473449707, |
|
"eval_runtime": 3.1993, |
|
"eval_samples_per_second": 37.821, |
|
"eval_steps_per_second": 5.001, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 11.548821449279785, |
|
"eval_runtime": 3.1859, |
|
"eval_samples_per_second": 37.98, |
|
"eval_steps_per_second": 5.022, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 11.443954467773438, |
|
"eval_runtime": 3.1913, |
|
"eval_samples_per_second": 37.916, |
|
"eval_steps_per_second": 5.014, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 3.8709677419354835e-08, |
|
"loss": 10.4503, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 11.321048736572266, |
|
"eval_runtime": 3.1707, |
|
"eval_samples_per_second": 38.162, |
|
"eval_steps_per_second": 5.046, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 11.437300682067871, |
|
"eval_runtime": 3.1784, |
|
"eval_samples_per_second": 38.07, |
|
"eval_steps_per_second": 5.034, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 11.486796379089355, |
|
"eval_runtime": 3.187, |
|
"eval_samples_per_second": 37.967, |
|
"eval_steps_per_second": 5.02, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 11.38952350616455, |
|
"eval_runtime": 3.1899, |
|
"eval_samples_per_second": 37.932, |
|
"eval_steps_per_second": 5.016, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 11.409669876098633, |
|
"eval_runtime": 3.1917, |
|
"eval_samples_per_second": 37.911, |
|
"eval_steps_per_second": 5.013, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 11.556734085083008, |
|
"eval_runtime": 3.1934, |
|
"eval_samples_per_second": 37.891, |
|
"eval_steps_per_second": 5.01, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 11.35185718536377, |
|
"eval_runtime": 3.1869, |
|
"eval_samples_per_second": 37.968, |
|
"eval_steps_per_second": 5.021, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 11.326269149780273, |
|
"eval_runtime": 3.1877, |
|
"eval_samples_per_second": 37.959, |
|
"eval_steps_per_second": 5.019, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 6.451612903225806e-09, |
|
"loss": 10.4239, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 11.404916763305664, |
|
"eval_runtime": 3.3462, |
|
"eval_samples_per_second": 36.16, |
|
"eval_steps_per_second": 4.782, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 11.230060577392578, |
|
"eval_runtime": 3.1439, |
|
"eval_samples_per_second": 38.487, |
|
"eval_steps_per_second": 5.089, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 3100, |
|
"total_flos": 1623561609830400.0, |
|
"train_loss": 14.082046567855343, |
|
"train_runtime": 2464.2419, |
|
"train_samples_per_second": 9.983, |
|
"train_steps_per_second": 1.258 |
|
} |
|
], |
|
"max_steps": 3100, |
|
"num_train_epochs": 50, |
|
"total_flos": 1623561609830400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|