{ "best_metric": 0.6807650327682495, "best_model_checkpoint": "./LongT5-XLarge-NSPCC/checkpoint-566", "epoch": 3.984105960264901, "eval_steps": 500, "global_step": 752, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9960264900662251, "grad_norm": 0.6866635680198669, "learning_rate": 0.00026365056951661766, "loss": 3.6911, "step": 188 }, { "epoch": 0.9960264900662251, "eval_gen_len": 360.7021, "eval_loss": 0.7291953563690186, "eval_rouge1": 0.4665, "eval_rouge2": 0.1826, "eval_rougeL": 0.2611, "eval_rougeLsum": 0.2611, "eval_runtime": 6342.3266, "eval_samples_per_second": 0.015, "eval_steps_per_second": 0.015, "step": 188 }, { "epoch": 1.9973509933774833, "grad_norm": 0.37886831164360046, "learning_rate": 0.00015678507557860595, "loss": 0.8701, "step": 377 }, { "epoch": 1.9973509933774833, "eval_gen_len": 365.3298, "eval_loss": 0.6966970562934875, "eval_rouge1": 0.4886, "eval_rouge2": 0.2073, "eval_rougeL": 0.2805, "eval_rougeLsum": 0.2799, "eval_runtime": 6456.3838, "eval_samples_per_second": 0.015, "eval_steps_per_second": 0.015, "step": 377 }, { "epoch": 2.9986754966887417, "grad_norm": 0.5063174366950989, "learning_rate": 4.566183323963369e-05, "loss": 0.7849, "step": 566 }, { "epoch": 2.9986754966887417, "eval_gen_len": 332.3191, "eval_loss": 0.6807650327682495, "eval_rouge1": 0.5116, "eval_rouge2": 0.2302, "eval_rougeL": 0.2995, "eval_rougeLsum": 0.2997, "eval_runtime": 5946.7561, "eval_samples_per_second": 0.016, "eval_steps_per_second": 0.016, "step": 566 }, { "epoch": 3.984105960264901, "grad_norm": 0.3437921106815338, "learning_rate": 0.0, "loss": 0.7769, "step": 752 }, { "epoch": 3.984105960264901, "eval_gen_len": 337.6809, "eval_loss": 0.6843053102493286, "eval_rouge1": 0.5138, "eval_rouge2": 0.2297, "eval_rougeL": 0.2999, "eval_rougeLsum": 0.2995, "eval_runtime": 6050.9414, "eval_samples_per_second": 0.016, "eval_steps_per_second": 0.016, "step": 752 } ], "logging_steps": 500, "max_steps": 752, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 8.260168119704617e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }