{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 39375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 7.902023616171372e-05, "loss": 2.3606, "step": 5625 }, { "epoch": 1.0, "eval_gen_len": 108.1064, "eval_loss": 2.050605297088623, "eval_rouge1": 43.1144, "eval_rouge2": 19.9792, "eval_rougeL": 29.3193, "eval_rougeLsum": 29.3219, "eval_runtime": 6630.4724, "eval_samples_per_second": 3.016, "eval_steps_per_second": 3.016, "step": 5625 }, { "epoch": 2.0, "learning_rate": 6.914534041525834e-05, "loss": 2.0883, "step": 11250 }, { "epoch": 2.0, "eval_gen_len": 111.923, "eval_loss": 1.9714523553848267, "eval_rouge1": 43.2087, "eval_rouge2": 20.0241, "eval_rougeL": 29.2716, "eval_rougeLsum": 29.2729, "eval_runtime": 6876.0309, "eval_samples_per_second": 2.909, "eval_steps_per_second": 2.909, "step": 11250 }, { "epoch": 3.0, "learning_rate": 5.9272200517975506e-05, "loss": 1.9401, "step": 16875 }, { "epoch": 3.0, "eval_gen_len": 110.7088, "eval_loss": 1.9192545413970947, "eval_rouge1": 43.7675, "eval_rouge2": 20.4219, "eval_rougeL": 29.736, "eval_rougeLsum": 29.736, "eval_runtime": 6792.0963, "eval_samples_per_second": 2.945, "eval_steps_per_second": 2.945, "step": 16875 }, { "epoch": 4.0, "learning_rate": 4.9397304771520136e-05, "loss": 1.8413, "step": 22500 }, { "epoch": 4.0, "eval_gen_len": 114.6206, "eval_loss": 1.9106097221374512, "eval_rouge1": 43.9857, "eval_rouge2": 20.6259, "eval_rougeL": 29.863, "eval_rougeLsum": 29.8645, "eval_runtime": 7084.2274, "eval_samples_per_second": 2.823, "eval_steps_per_second": 2.823, "step": 22500 }, { "epoch": 5.0, "learning_rate": 3.952240902506475e-05, "loss": 1.7632, "step": 28125 }, { "epoch": 5.0, "eval_gen_len": 116.4429, "eval_loss": 1.9056074619293213, "eval_rouge1": 44.0791, "eval_rouge2": 20.7132, "eval_rougeL": 29.9243, "eval_rougeLsum": 29.9263, "eval_runtime": 7223.7263, "eval_samples_per_second": 2.769, "eval_steps_per_second": 2.769, "step": 28125 }, { "epoch": 6.0, "learning_rate": 2.9649269127781926e-05, "loss": 1.6991, "step": 33750 }, { "epoch": 6.0, "eval_gen_len": 116.3214, "eval_loss": 1.9034677743911743, "eval_rouge1": 44.0398, "eval_rouge2": 20.6289, "eval_rougeL": 29.862, "eval_rougeLsum": 29.8651, "eval_runtime": 7200.6944, "eval_samples_per_second": 2.778, "eval_steps_per_second": 2.778, "step": 33750 }, { "epoch": 7.0, "learning_rate": 1.9777885079671656e-05, "loss": 1.6457, "step": 39375 }, { "epoch": 7.0, "eval_gen_len": 113.2878, "eval_loss": 1.9000203609466553, "eval_rouge1": 43.9966, "eval_rouge2": 20.5908, "eval_rougeL": 29.9303, "eval_rougeLsum": 29.93, "eval_runtime": 7006.6993, "eval_samples_per_second": 2.854, "eval_steps_per_second": 2.854, "step": 39375 } ], "max_steps": 50625, "num_train_epochs": 9, "total_flos": 3.353631721258291e+17, "trial_name": null, "trial_params": null }