{ "best_metric": 3.144331693649292, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_sft_1000samples_-1vocab_original-frozen/checkpoint-5000", "epoch": 144.9225225225225, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 18.12, "l1_reg_loss": 0.0, "learning_rate": 9e-05, "loss": 3.1058, "step": 2500 }, { "epoch": 36.23, "l1_reg_loss": 0.0001, "learning_rate": 8e-05, "loss": 2.3092, "step": 5000 }, { "epoch": 36.23, "eval_loss": 3.144331693649292, "eval_runtime": 516.01, "eval_samples_per_second": 9.508, "eval_steps_per_second": 4.754, "step": 5000 }, { "epoch": 54.35, "l1_reg_loss": 0.0001, "learning_rate": 7e-05, "loss": 1.854, "step": 7500 }, { "epoch": 72.46, "l1_reg_loss": 0.0001, "learning_rate": 6e-05, "loss": 1.4578, "step": 10000 }, { "epoch": 72.46, "eval_loss": 4.3338799476623535, "eval_runtime": 515.6849, "eval_samples_per_second": 9.514, "eval_steps_per_second": 4.757, "step": 10000 }, { "epoch": 90.58, "l1_reg_loss": 0.0001, "learning_rate": 5e-05, "loss": 1.121, "step": 12500 }, { "epoch": 108.69, "l1_reg_loss": 0.0001, "learning_rate": 4e-05, "loss": 0.8593, "step": 15000 }, { "epoch": 108.69, "eval_loss": 5.458686351776123, "eval_runtime": 514.7726, "eval_samples_per_second": 9.53, "eval_steps_per_second": 4.765, "step": 15000 }, { "epoch": 126.81, "l1_reg_loss": 0.0001, "learning_rate": 3e-05, "loss": 0.6664, "step": 17500 }, { "epoch": 144.92, "l1_reg_loss": 0.0001, "learning_rate": 2e-05, "loss": 0.5312, "step": 20000 }, { "epoch": 144.92, "eval_loss": 6.133657932281494, "eval_runtime": 514.9507, "eval_samples_per_second": 9.527, "eval_steps_per_second": 4.764, "step": 20000 }, { "epoch": 144.92, "step": 20000, "total_flos": 2.9879104118666035e+17, "train_loss": 1.488097003173828, "train_runtime": 49511.5153, "train_samples_per_second": 4.039, "train_steps_per_second": 0.505 } ], "max_steps": 25000, "num_train_epochs": 182, "total_flos": 2.9879104118666035e+17, "trial_name": null, "trial_params": null }