{ "best_metric": 0.5791521668434143, "best_model_checkpoint": "/home/khalid/Documents/github_rep/bigscience/data/processed/21/bloom-560m_my_continual-pretrain_100000samples_-1vocab_original/checkpoint-25000", "epoch": 0.39669788679035706, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9e-05, "loss": 0.8854, "step": 2500 }, { "epoch": 0.08, "learning_rate": 8e-05, "loss": 0.7087, "step": 5000 }, { "epoch": 0.08, "eval_loss": 0.7005436420440674, "eval_runtime": 502.012, "eval_samples_per_second": 9.398, "eval_steps_per_second": 4.699, "step": 5000 }, { "epoch": 0.12, "learning_rate": 7e-05, "loss": 0.6665, "step": 7500 }, { "epoch": 0.16, "learning_rate": 6e-05, "loss": 0.6442, "step": 10000 }, { "epoch": 0.16, "eval_loss": 0.6457962393760681, "eval_runtime": 502.0206, "eval_samples_per_second": 9.398, "eval_steps_per_second": 4.699, "step": 10000 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 0.6229, "step": 12500 }, { "epoch": 0.24, "learning_rate": 4e-05, "loss": 0.6083, "step": 15000 }, { "epoch": 0.24, "eval_loss": 0.6155888438224792, "eval_runtime": 501.1781, "eval_samples_per_second": 9.414, "eval_steps_per_second": 4.707, "step": 15000 }, { "epoch": 0.28, "learning_rate": 3e-05, "loss": 0.5988, "step": 17500 }, { "epoch": 0.32, "learning_rate": 2e-05, "loss": 0.5859, "step": 20000 }, { "epoch": 0.32, "eval_loss": 0.5931491255760193, "eval_runtime": 501.6202, "eval_samples_per_second": 9.406, "eval_steps_per_second": 4.703, "step": 20000 }, { "epoch": 0.36, "learning_rate": 1e-05, "loss": 0.5767, "step": 22500 }, { "epoch": 0.4, "learning_rate": 0.0, "loss": 0.5705, "step": 25000 }, { "epoch": 0.4, "eval_loss": 0.5791521668434143, "eval_runtime": 501.9862, "eval_samples_per_second": 9.399, "eval_steps_per_second": 4.699, "step": 25000 }, { "epoch": 0.4, "step": 25000, "total_flos": 3.714827943936e+17, "train_loss": 0.6467895458984375, "train_runtime": 73141.1233, "train_samples_per_second": 2.734, "train_steps_per_second": 0.342 } ], "max_steps": 25000, "num_train_epochs": 1, "total_flos": 3.714827943936e+17, "trial_name": null, "trial_params": null }