liputan6-base / trainer_state.json
apwic's picture
Training in progress, epoch 1
891af9c verified
raw
history blame
2.32 kB
{
"best_metric": 35.4918,
"best_model_checkpoint": "bin/liputan6-base/checkpoint-36354",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 36354,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.8706055879592896,
"learning_rate": 0.0006666666666666666,
"loss": 2.0683,
"step": 12118
},
{
"epoch": 1.0,
"eval_gen_len": 38.11319722931098,
"eval_loss": 2.372281312942505,
"eval_rouge1": 33.423,
"eval_rouge2": 16.5875,
"eval_rougeL": 28.0723,
"eval_rougeLsum": 31.0371,
"eval_runtime": 1335.7227,
"eval_samples_per_second": 8.214,
"eval_steps_per_second": 0.129,
"step": 12118
},
{
"epoch": 2.0,
"grad_norm": 0.7888175845146179,
"learning_rate": 0.0003333333333333333,
"loss": 1.5342,
"step": 24236
},
{
"epoch": 2.0,
"eval_gen_len": 36.71764491432738,
"eval_loss": 2.139983654022217,
"eval_rouge1": 34.4775,
"eval_rouge2": 17.1757,
"eval_rougeL": 28.8546,
"eval_rougeLsum": 32.0264,
"eval_runtime": 1116.2631,
"eval_samples_per_second": 9.829,
"eval_steps_per_second": 0.154,
"step": 24236
},
{
"epoch": 3.0,
"grad_norm": 0.6657369136810303,
"learning_rate": 0.0,
"loss": 1.241,
"step": 36354
},
{
"epoch": 3.0,
"eval_gen_len": 36.4740247903755,
"eval_loss": 2.039839029312134,
"eval_rouge1": 35.4918,
"eval_rouge2": 17.885,
"eval_rougeL": 29.6804,
"eval_rougeLsum": 32.9507,
"eval_runtime": 1054.5787,
"eval_samples_per_second": 10.404,
"eval_steps_per_second": 0.163,
"step": 36354
},
{
"epoch": 3.0,
"step": 36354,
"total_flos": 3.9828842929402675e+17,
"train_loss": 1.6144965134510645,
"train_runtime": 36481.981,
"train_samples_per_second": 15.943,
"train_steps_per_second": 0.996
}
],
"logging_steps": 500,
"max_steps": 36354,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 3.9828842929402675e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}