|
{ |
|
"best_metric": 17.3273, |
|
"best_model_checkpoint": "models/mt0-xl_russian_natprompt_adafactor_updated/checkpoint-6150", |
|
"epoch": 14.999024390243903, |
|
"eval_steps": 500, |
|
"global_step": 7687, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.75e-05, |
|
"loss": 2.0388, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 16.58484100877193, |
|
"eval_loss": 1.6734575033187866, |
|
"eval_rouge1": 14.1367, |
|
"eval_rouge2": 7.0437, |
|
"eval_rougeL": 14.0625, |
|
"eval_rougeLsum": 14.0916, |
|
"eval_runtime": 270.6111, |
|
"eval_samples_per_second": 26.932, |
|
"eval_steps_per_second": 0.843, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.4995117187500005e-05, |
|
"loss": 1.7098, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 16.68050986842105, |
|
"eval_loss": 1.6203718185424805, |
|
"eval_rouge1": 15.2619, |
|
"eval_rouge2": 7.8124, |
|
"eval_rougeL": 15.159, |
|
"eval_rougeLsum": 15.2078, |
|
"eval_runtime": 276.6842, |
|
"eval_samples_per_second": 26.341, |
|
"eval_steps_per_second": 0.824, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.24951171875e-05, |
|
"loss": 1.539, |
|
"step": 1537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 16.61417214912281, |
|
"eval_loss": 1.6058766841888428, |
|
"eval_rouge1": 15.9942, |
|
"eval_rouge2": 8.1827, |
|
"eval_rougeL": 15.872, |
|
"eval_rougeLsum": 15.9105, |
|
"eval_runtime": 263.8074, |
|
"eval_samples_per_second": 27.626, |
|
"eval_steps_per_second": 0.864, |
|
"step": 1537 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.9990234375e-05, |
|
"loss": 1.403, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 16.26343201754386, |
|
"eval_loss": 1.6041721105575562, |
|
"eval_rouge1": 16.6383, |
|
"eval_rouge2": 8.4603, |
|
"eval_rougeL": 16.5096, |
|
"eval_rougeLsum": 16.5635, |
|
"eval_runtime": 251.4581, |
|
"eval_samples_per_second": 28.983, |
|
"eval_steps_per_second": 0.907, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.7490234375e-05, |
|
"loss": 1.295, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 15.741365131578947, |
|
"eval_loss": 1.6226089000701904, |
|
"eval_rouge1": 16.9189, |
|
"eval_rouge2": 8.8384, |
|
"eval_rougeL": 16.7799, |
|
"eval_rougeLsum": 16.8258, |
|
"eval_runtime": 169.6881, |
|
"eval_samples_per_second": 42.949, |
|
"eval_steps_per_second": 1.344, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.49853515625e-05, |
|
"loss": 1.1984, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 15.888157894736842, |
|
"eval_loss": 1.6289030313491821, |
|
"eval_rouge1": 16.9788, |
|
"eval_rouge2": 8.7272, |
|
"eval_rougeL": 16.8238, |
|
"eval_rougeLsum": 16.8765, |
|
"eval_runtime": 175.0677, |
|
"eval_samples_per_second": 41.63, |
|
"eval_steps_per_second": 1.302, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.2485351562499996e-05, |
|
"loss": 1.1195, |
|
"step": 3587 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 16.23519736842105, |
|
"eval_loss": 1.6697918176651, |
|
"eval_rouge1": 17.0912, |
|
"eval_rouge2": 8.7061, |
|
"eval_rougeL": 16.9084, |
|
"eval_rougeLsum": 16.9633, |
|
"eval_runtime": 171.9395, |
|
"eval_samples_per_second": 42.387, |
|
"eval_steps_per_second": 1.326, |
|
"step": 3587 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.998046875e-05, |
|
"loss": 1.0463, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 16.14761513157895, |
|
"eval_loss": 1.6845269203186035, |
|
"eval_rouge1": 17.201, |
|
"eval_rouge2": 8.7395, |
|
"eval_rougeL": 17.003, |
|
"eval_rougeLsum": 17.052, |
|
"eval_runtime": 252.7052, |
|
"eval_samples_per_second": 28.84, |
|
"eval_steps_per_second": 0.902, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.748046875e-05, |
|
"loss": 0.9866, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 15.878837719298245, |
|
"eval_loss": 1.726230502128601, |
|
"eval_rouge1": 17.3223, |
|
"eval_rouge2": 8.8289, |
|
"eval_rougeL": 17.1413, |
|
"eval_rougeLsum": 17.1756, |
|
"eval_runtime": 182.5703, |
|
"eval_samples_per_second": 39.919, |
|
"eval_steps_per_second": 1.249, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.49755859375e-05, |
|
"loss": 0.9326, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 15.797149122807017, |
|
"eval_loss": 1.7532711029052734, |
|
"eval_rouge1": 17.2655, |
|
"eval_rouge2": 8.7512, |
|
"eval_rougeL": 17.0508, |
|
"eval_rougeLsum": 17.1055, |
|
"eval_runtime": 168.7949, |
|
"eval_samples_per_second": 43.177, |
|
"eval_steps_per_second": 1.351, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.24755859375e-05, |
|
"loss": 0.8844, |
|
"step": 5637 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 16.32360197368421, |
|
"eval_loss": 1.7794246673583984, |
|
"eval_rouge1": 17.008, |
|
"eval_rouge2": 8.5404, |
|
"eval_rougeL": 16.8044, |
|
"eval_rougeLsum": 16.848, |
|
"eval_runtime": 168.6102, |
|
"eval_samples_per_second": 43.224, |
|
"eval_steps_per_second": 1.352, |
|
"step": 5637 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.9970703125e-05, |
|
"loss": 0.8393, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 16.143092105263158, |
|
"eval_loss": 1.7995822429656982, |
|
"eval_rouge1": 17.3273, |
|
"eval_rouge2": 8.7829, |
|
"eval_rougeL": 17.097, |
|
"eval_rougeLsum": 17.1644, |
|
"eval_runtime": 171.5723, |
|
"eval_samples_per_second": 42.478, |
|
"eval_steps_per_second": 1.329, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.7470703125000003e-05, |
|
"loss": 0.8046, |
|
"step": 6662 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 16.090597587719298, |
|
"eval_loss": 1.8266295194625854, |
|
"eval_rouge1": 17.1859, |
|
"eval_rouge2": 8.6524, |
|
"eval_rougeL": 16.9605, |
|
"eval_rougeLsum": 17.0118, |
|
"eval_runtime": 259.1646, |
|
"eval_samples_per_second": 28.121, |
|
"eval_steps_per_second": 0.88, |
|
"step": 6662 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.49658203125e-05, |
|
"loss": 0.7682, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 16.11239035087719, |
|
"eval_loss": 1.8624775409698486, |
|
"eval_rouge1": 17.0184, |
|
"eval_rouge2": 8.5314, |
|
"eval_rougeL": 16.8019, |
|
"eval_rougeLsum": 16.847, |
|
"eval_runtime": 170.9938, |
|
"eval_samples_per_second": 42.621, |
|
"eval_steps_per_second": 1.333, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.2465820312500002e-05, |
|
"loss": 0.7419, |
|
"step": 7687 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 15.95751096491228, |
|
"eval_loss": 1.8779526948928833, |
|
"eval_rouge1": 17.2742, |
|
"eval_rouge2": 8.6795, |
|
"eval_rougeL": 17.0699, |
|
"eval_rougeLsum": 17.1118, |
|
"eval_runtime": 177.9916, |
|
"eval_samples_per_second": 40.946, |
|
"eval_steps_per_second": 1.281, |
|
"step": 7687 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 7687, |
|
"total_flos": 1.7085595424946913e+18, |
|
"train_loss": 1.153788715837463, |
|
"train_runtime": 20083.7121, |
|
"train_samples_per_second": 65.311, |
|
"train_steps_per_second": 0.51 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.7085595424946913e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|