{ "best_metric": 2.234222650527954, "best_model_checkpoint": "saves/ChineseLLaMA2-7B-Chat/lora/SFT/checkpoint-200", "epoch": 0.273972602739726, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.000999995369868095, "loss": 4.0099, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.0009999670749281082, "loss": 2.887, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.000999913058797528, "loss": 2.6742, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0009998333242552556, "loss": 2.7939, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.0009997278754032958, "loss": 2.6062, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.0009995967176665461, "loss": 2.7508, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.0009994398577925167, "loss": 2.547, "step": 35 }, { "epoch": 0.05, "learning_rate": 0.0009992573038509849, "loss": 2.5281, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.0009990490652335784, "loss": 2.6397, "step": 45 }, { "epoch": 0.07, "learning_rate": 0.000998815152653293, "loss": 2.608, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.000998555578143941, "loss": 2.4076, "step": 55 }, { "epoch": 0.08, "learning_rate": 0.0009982703550595329, "loss": 2.5914, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.0009979594980735896, "loss": 2.3517, "step": 65 }, { "epoch": 0.1, "learning_rate": 0.0009976230231783876, "loss": 2.4019, "step": 70 }, { "epoch": 0.1, "learning_rate": 0.0009972609476841367, "loss": 2.4481, "step": 75 }, { "epoch": 0.11, "learning_rate": 0.000996873290218089, "loss": 2.3244, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.000996460070723581, "loss": 2.4821, "step": 85 }, { "epoch": 0.12, "learning_rate": 0.0009960213104590074, "loss": 2.438, "step": 90 }, { "epoch": 0.13, "learning_rate": 0.0009955570319967273, "loss": 2.2925, "step": 95 }, { "epoch": 0.14, "learning_rate": 0.0009950672592219031, "loss": 2.3052, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.3112449645996094, "eval_runtime": 8.9668, "eval_samples_per_second": 13.16, "eval_steps_per_second": 1.673, "step": 100 }, { "epoch": 0.14, "learning_rate": 0.000994552017331272, "loss": 2.5839, "step": 105 }, { "epoch": 0.15, "learning_rate": 0.0009940113328318488, "loss": 2.4983, "step": 110 }, { "epoch": 0.16, "learning_rate": 0.0009934452335395637, "loss": 2.4598, "step": 115 }, { "epoch": 0.16, "learning_rate": 0.0009928537485778299, "loss": 2.4336, "step": 120 }, { "epoch": 0.17, "learning_rate": 0.0009922369083760461, "loss": 2.3799, "step": 125 }, { "epoch": 0.18, "learning_rate": 0.0009915947446680307, "loss": 2.5443, "step": 130 }, { "epoch": 0.18, "learning_rate": 0.0009909272904903897, "loss": 2.3701, "step": 135 }, { "epoch": 0.19, "learning_rate": 0.0009902345801808161, "loss": 2.2718, "step": 140 }, { "epoch": 0.2, "learning_rate": 0.0009895166493763246, "loss": 2.3402, "step": 145 }, { "epoch": 0.21, "learning_rate": 0.0009887735350114174, "loss": 2.4072, "step": 150 }, { "epoch": 0.21, "learning_rate": 0.000988005275316184, "loss": 2.3807, "step": 155 }, { "epoch": 0.22, "learning_rate": 0.000987211909814336, "loss": 2.2149, "step": 160 }, { "epoch": 0.23, "learning_rate": 0.000986393479321171, "loss": 2.2713, "step": 165 }, { "epoch": 0.23, "learning_rate": 0.0009855500259414753, "loss": 2.08, "step": 170 }, { "epoch": 0.24, "learning_rate": 0.0009846815930673563, "loss": 2.4682, "step": 175 }, { "epoch": 0.25, "learning_rate": 0.000983788225376011, "loss": 2.3328, "step": 180 }, { "epoch": 0.25, "learning_rate": 0.0009828699688274275, "loss": 2.3671, "step": 185 }, { "epoch": 0.26, "learning_rate": 0.0009819268706620196, "loss": 2.3251, "step": 190 }, { "epoch": 0.27, "learning_rate": 0.0009809589793981978, "loss": 2.4429, "step": 195 }, { "epoch": 0.27, "learning_rate": 0.0009799663448298724, "loss": 2.2499, "step": 200 }, { "epoch": 0.27, "eval_loss": 2.234222650527954, "eval_runtime": 8.8394, "eval_samples_per_second": 13.349, "eval_steps_per_second": 1.697, "step": 200 } ], "logging_steps": 5, "max_steps": 2190, "num_train_epochs": 3, "save_steps": 100, "total_flos": 5.897476754944819e+16, "trial_name": null, "trial_params": null }