{ "best_metric": 1.3724207878112793, "best_model_checkpoint": "./LongT5-Large-NSPCC/checkpoint-1132", "epoch": 5.997350993377483, "eval_steps": 500, "global_step": 1132, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9960264900662251, "grad_norm": 1.2397888898849487, "learning_rate": 0.00029303322414305795, "loss": 6.0401, "step": 188 }, { "epoch": 0.9960264900662251, "eval_gen_len": 151.7021, "eval_loss": 2.708937406539917, "eval_rouge1": 0.2766, "eval_rouge2": 0.0617, "eval_rougeL": 0.1655, "eval_rougeLsum": 0.1657, "eval_runtime": 1372.78, "eval_samples_per_second": 0.068, "eval_steps_per_second": 0.068, "step": 188 }, { "epoch": 1.9973509933774833, "grad_norm": 0.9210834503173828, "learning_rate": 0.00026343936751493783, "loss": 2.4805, "step": 377 }, { "epoch": 1.9973509933774833, "eval_gen_len": 211.1809, "eval_loss": 1.8809192180633545, "eval_rouge1": 0.382, "eval_rouge2": 0.1178, "eval_rougeL": 0.2092, "eval_rougeLsum": 0.2092, "eval_runtime": 1618.944, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.058, "step": 377 }, { "epoch": 2.9986754966887417, "grad_norm": 1.2795201539993286, "learning_rate": 0.0002152904846699957, "loss": 1.8093, "step": 566 }, { "epoch": 2.9986754966887417, "eval_gen_len": 246.1277, "eval_loss": 1.576943039894104, "eval_rouge1": 0.4356, "eval_rouge2": 0.1527, "eval_rougeL": 0.2409, "eval_rougeLsum": 0.2409, "eval_runtime": 1917.7123, "eval_samples_per_second": 0.049, "eval_steps_per_second": 0.049, "step": 566 }, { "epoch": 4.0, "grad_norm": 0.6965745091438293, "learning_rate": 0.00015646218178508154, "loss": 1.4653, "step": 755 }, { "epoch": 4.0, "eval_gen_len": 245.0851, "eval_loss": 1.435921311378479, "eval_rouge1": 0.4661, "eval_rouge2": 0.1722, "eval_rougeL": 0.26, "eval_rougeLsum": 0.2603, "eval_runtime": 1747.772, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.054, "step": 755 }, { "epoch": 4.9960264900662255, "grad_norm": 1.0618396997451782, "learning_rate": 9.687901331136185e-05, "loss": 1.2626, "step": 943 }, { "epoch": 4.9960264900662255, "eval_gen_len": 239.8617, "eval_loss": 1.390848994255066, "eval_rouge1": 0.4829, "eval_rouge2": 0.1931, "eval_rougeL": 0.2717, "eval_rougeLsum": 0.2717, "eval_runtime": 1739.5487, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.054, "step": 943 }, { "epoch": 5.997350993377483, "grad_norm": 0.8346843123435974, "learning_rate": 4.566183323963369e-05, "loss": 1.117, "step": 1132 }, { "epoch": 5.997350993377483, "eval_gen_len": 244.4255, "eval_loss": 1.3724207878112793, "eval_rouge1": 0.4864, "eval_rouge2": 0.1988, "eval_rougeL": 0.2804, "eval_rougeLsum": 0.2804, "eval_runtime": 1739.9152, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.054, "step": 1132 } ], "logging_steps": 500, "max_steps": 1504, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 3.341101062684672e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }