|
{ |
|
"best_metric": 28.6435, |
|
"best_model_checkpoint": "base_ox-wn_cod_15ep/checkpoint-38360", |
|
"epoch": 15.0, |
|
"global_step": 41100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.5645, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 11.496853096838793, |
|
"eval_loss": 2.2535109519958496, |
|
"eval_rouge1": 24.4437, |
|
"eval_rouge2": 6.4189, |
|
"eval_rougeL": 22.7949, |
|
"eval_rougeLsum": 22.7909, |
|
"eval_runtime": 92.2279, |
|
"eval_samples_per_second": 151.603, |
|
"eval_steps_per_second": 2.375, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 2.3501, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 11.71499070233157, |
|
"eval_loss": 2.1641664505004883, |
|
"eval_rouge1": 25.6642, |
|
"eval_rouge2": 7.289, |
|
"eval_rougeL": 23.8689, |
|
"eval_rougeLsum": 23.8749, |
|
"eval_runtime": 92.3758, |
|
"eval_samples_per_second": 151.36, |
|
"eval_steps_per_second": 2.371, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.2516, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 11.744099556572737, |
|
"eval_loss": 2.1115779876708984, |
|
"eval_rouge1": 26.4562, |
|
"eval_rouge2": 7.8955, |
|
"eval_rougeL": 24.6275, |
|
"eval_rougeLsum": 24.6376, |
|
"eval_runtime": 91.9643, |
|
"eval_samples_per_second": 152.037, |
|
"eval_steps_per_second": 2.381, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 2.1806, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 11.793019596624232, |
|
"eval_loss": 2.073715925216675, |
|
"eval_rouge1": 27.0392, |
|
"eval_rouge2": 8.2393, |
|
"eval_rougeL": 25.1555, |
|
"eval_rougeLsum": 25.1641, |
|
"eval_runtime": 92.1881, |
|
"eval_samples_per_second": 151.668, |
|
"eval_steps_per_second": 2.376, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.1233, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 11.901444714633099, |
|
"eval_loss": 2.0459506511688232, |
|
"eval_rouge1": 27.2709, |
|
"eval_rouge2": 8.4244, |
|
"eval_rougeL": 25.3847, |
|
"eval_rougeLsum": 25.4003, |
|
"eval_runtime": 92.1849, |
|
"eval_samples_per_second": 151.674, |
|
"eval_steps_per_second": 2.376, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0765, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 11.811257330853955, |
|
"eval_loss": 2.0235939025878906, |
|
"eval_rouge1": 27.5456, |
|
"eval_rouge2": 8.6096, |
|
"eval_rougeL": 25.6321, |
|
"eval_rougeLsum": 25.6462, |
|
"eval_runtime": 92.2077, |
|
"eval_samples_per_second": 151.636, |
|
"eval_steps_per_second": 2.375, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 2.0371, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 11.687455299671006, |
|
"eval_loss": 2.004668951034546, |
|
"eval_rouge1": 27.7209, |
|
"eval_rouge2": 8.7277, |
|
"eval_rougeL": 25.7871, |
|
"eval_rougeLsum": 25.8084, |
|
"eval_runtime": 92.3796, |
|
"eval_samples_per_second": 151.354, |
|
"eval_steps_per_second": 2.371, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 2.0036, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 11.548705478472321, |
|
"eval_loss": 1.9917967319488525, |
|
"eval_rouge1": 28.0431, |
|
"eval_rouge2": 8.9863, |
|
"eval_rougeL": 26.1072, |
|
"eval_rougeLsum": 26.1198, |
|
"eval_runtime": 92.2225, |
|
"eval_samples_per_second": 151.612, |
|
"eval_steps_per_second": 2.375, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9752, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 11.793949363467314, |
|
"eval_loss": 1.978798747062683, |
|
"eval_rouge1": 28.1807, |
|
"eval_rouge2": 9.0219, |
|
"eval_rougeL": 26.1692, |
|
"eval_rougeLsum": 26.1886, |
|
"eval_runtime": 92.3313, |
|
"eval_samples_per_second": 151.433, |
|
"eval_steps_per_second": 2.372, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.9513, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 11.593620369045917, |
|
"eval_loss": 1.9701664447784424, |
|
"eval_rouge1": 28.3204, |
|
"eval_rouge2": 9.1572, |
|
"eval_rougeL": 26.2955, |
|
"eval_rougeLsum": 26.3029, |
|
"eval_runtime": 92.353, |
|
"eval_samples_per_second": 151.397, |
|
"eval_steps_per_second": 2.371, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.9309, |
|
"step": 30140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 11.837076240881133, |
|
"eval_loss": 1.9640249013900757, |
|
"eval_rouge1": 28.4289, |
|
"eval_rouge2": 9.2845, |
|
"eval_rougeL": 26.4006, |
|
"eval_rougeLsum": 26.418, |
|
"eval_runtime": 92.2786, |
|
"eval_samples_per_second": 151.519, |
|
"eval_steps_per_second": 2.373, |
|
"step": 30140 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9144, |
|
"step": 32880 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 11.620082963810614, |
|
"eval_loss": 1.9570931196212769, |
|
"eval_rouge1": 28.4504, |
|
"eval_rouge2": 9.3406, |
|
"eval_rougeL": 26.4273, |
|
"eval_rougeLsum": 26.4384, |
|
"eval_runtime": 143.3751, |
|
"eval_samples_per_second": 97.52, |
|
"eval_steps_per_second": 1.527, |
|
"step": 32880 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.9013, |
|
"step": 35620 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 11.706694321270204, |
|
"eval_loss": 1.954399824142456, |
|
"eval_rouge1": 28.6319, |
|
"eval_rouge2": 9.3682, |
|
"eval_rougeL": 26.605, |
|
"eval_rougeLsum": 26.613, |
|
"eval_runtime": 92.2917, |
|
"eval_samples_per_second": 151.498, |
|
"eval_steps_per_second": 2.373, |
|
"step": 35620 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.8914, |
|
"step": 38360 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 11.730653697611215, |
|
"eval_loss": 1.9511938095092773, |
|
"eval_rouge1": 28.6435, |
|
"eval_rouge2": 9.3976, |
|
"eval_rougeL": 26.5839, |
|
"eval_rougeLsum": 26.5918, |
|
"eval_runtime": 92.0528, |
|
"eval_samples_per_second": 151.891, |
|
"eval_steps_per_second": 2.379, |
|
"step": 38360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.8866, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 11.74696037762838, |
|
"eval_loss": 1.9508894681930542, |
|
"eval_rouge1": 28.6111, |
|
"eval_rouge2": 9.3857, |
|
"eval_rougeL": 26.551, |
|
"eval_rougeLsum": 26.5648, |
|
"eval_runtime": 91.9698, |
|
"eval_samples_per_second": 152.028, |
|
"eval_steps_per_second": 2.381, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 41100, |
|
"total_flos": 4.436322961707786e+17, |
|
"train_loss": 2.069233327155566, |
|
"train_runtime": 14173.3385, |
|
"train_samples_per_second": 185.558, |
|
"train_steps_per_second": 2.9 |
|
} |
|
], |
|
"max_steps": 41100, |
|
"num_train_epochs": 15, |
|
"total_flos": 4.436322961707786e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|