{ "best_metric": 0.2082, "best_model_checkpoint": "mt0-xl_russian_natprompt_adafactor/checkpoint-2562", "epoch": 8.0, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.75e-05, "loss": 2.0449, "step": 512 }, { "epoch": 1.0, "eval_gen_len": 16.75809549945115, "eval_loss": 1.6754746437072754, "eval_rouge1": 0.0817, "eval_rouge2": 0.0, "eval_rougeL": 0.0778, "eval_rougeLsum": 0.0817, "eval_runtime": 196.0938, "eval_samples_per_second": 37.166, "eval_steps_per_second": 1.163, "step": 512 }, { "epoch": 2.0, "learning_rate": 4.4995117187500005e-05, "loss": 1.707, "step": 1025 }, { "epoch": 2.0, "eval_gen_len": 15.862102085620197, "eval_loss": 1.6181610822677612, "eval_rouge1": 0.096, "eval_rouge2": 0.0, "eval_rougeL": 0.097, "eval_rougeLsum": 0.1, "eval_runtime": 171.5849, "eval_samples_per_second": 42.475, "eval_steps_per_second": 1.329, "step": 1025 }, { "epoch": 3.0, "learning_rate": 4.24951171875e-05, "loss": 1.5398, "step": 1537 }, { "epoch": 3.0, "eval_gen_len": 16.47653677277717, "eval_loss": 1.6085278987884521, "eval_rouge1": 0.1394, "eval_rouge2": 0.0034, "eval_rougeL": 0.1401, "eval_rougeLsum": 0.1416, "eval_runtime": 171.9932, "eval_samples_per_second": 42.374, "eval_steps_per_second": 1.326, "step": 1537 }, { "epoch": 4.0, "learning_rate": 3.9990234375e-05, "loss": 1.4142, "step": 2050 }, { "epoch": 4.0, "eval_gen_len": 16.273188803512625, "eval_loss": 1.6016370058059692, "eval_rouge1": 0.1132, "eval_rouge2": 0.0, "eval_rougeL": 0.1132, "eval_rougeLsum": 0.1098, "eval_runtime": 171.2054, "eval_samples_per_second": 42.569, "eval_steps_per_second": 1.332, "step": 2050 }, { "epoch": 5.0, "learning_rate": 3.7490234375e-05, "loss": 1.3102, "step": 2562 }, { "epoch": 5.0, "eval_gen_len": 16.287733260153676, "eval_loss": 1.6240657567977905, "eval_rouge1": 0.2082, "eval_rouge2": 0.0034, "eval_rougeL": 0.2054, "eval_rougeLsum": 0.2061, "eval_runtime": 170.3025, "eval_samples_per_second": 42.794, "eval_steps_per_second": 1.339, "step": 2562 }, { "epoch": 6.0, "learning_rate": 3.49853515625e-05, "loss": 1.2162, "step": 3075 }, { "epoch": 6.0, "eval_gen_len": 16.158068057080133, "eval_loss": 1.6281158924102783, "eval_rouge1": 0.1549, "eval_rouge2": 0.0, "eval_rougeL": 0.1549, "eval_rougeLsum": 0.1549, "eval_runtime": 171.3659, "eval_samples_per_second": 42.529, "eval_steps_per_second": 1.33, "step": 3075 }, { "epoch": 7.0, "learning_rate": 3.2485351562499996e-05, "loss": 1.1364, "step": 3587 }, { "epoch": 7.0, "eval_gen_len": 15.992453347969265, "eval_loss": 1.6622037887573242, "eval_rouge1": 0.1583, "eval_rouge2": 0.0, "eval_rougeL": 0.1575, "eval_rougeLsum": 0.1589, "eval_runtime": 254.3332, "eval_samples_per_second": 28.655, "eval_steps_per_second": 0.896, "step": 3587 }, { "epoch": 8.0, "learning_rate": 2.998046875e-05, "loss": 1.0649, "step": 4100 }, { "epoch": 8.0, "eval_gen_len": 16.509879253567508, "eval_loss": 1.6811630725860596, "eval_rouge1": 0.2033, "eval_rouge2": 0.0137, "eval_rougeL": 0.2012, "eval_rougeLsum": 0.2027, "eval_runtime": 173.1353, "eval_samples_per_second": 42.094, "eval_steps_per_second": 1.317, "step": 4100 }, { "epoch": 8.0, "step": 4100, "total_flos": 9.102827646479237e+17, "train_loss": 1.429130665848895, "train_runtime": 9768.8261, "train_samples_per_second": 134.272, "train_steps_per_second": 1.048 } ], "max_steps": 10240, "num_train_epochs": 20, "total_flos": 9.102827646479237e+17, "trial_name": null, "trial_params": null }