{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999023320368054, "global_step": 2733, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 9.998862506715891e-06, "loss": 2.3408, "step": 100 }, { "epoch": 0.22, "learning_rate": 9.951193630803286e-06, "loss": 2.2705, "step": 200 }, { "epoch": 0.33, "learning_rate": 9.834073214196232e-06, "loss": 2.2454, "step": 300 }, { "epoch": 0.44, "learning_rate": 9.649144132125768e-06, "loss": 2.2308, "step": 400 }, { "epoch": 0.55, "learning_rate": 9.399000427690736e-06, "loss": 2.222, "step": 500 }, { "epoch": 0.55, "eval_loss": 2.1795129776000977, "eval_runtime": 134.9568, "eval_samples_per_second": 47.771, "eval_steps_per_second": 11.945, "step": 500 }, { "epoch": 0.66, "learning_rate": 9.087150924613952e-06, "loss": 2.2111, "step": 600 }, { "epoch": 0.77, "learning_rate": 8.717970008158547e-06, "loss": 2.2157, "step": 700 }, { "epoch": 0.88, "learning_rate": 8.296636264611935e-06, "loss": 2.1904, "step": 800 }, { "epoch": 0.99, "learning_rate": 7.829059840055622e-06, "loss": 2.1934, "step": 900 }, { "epoch": 1.1, "learning_rate": 7.321799537376213e-06, "loss": 2.1122, "step": 1000 }, { "epoch": 1.1, "eval_loss": 2.1545722484588623, "eval_runtime": 135.1839, "eval_samples_per_second": 47.691, "eval_steps_per_second": 11.924, "step": 1000 }, { "epoch": 1.21, "learning_rate": 6.781970814417049e-06, "loss": 2.0764, "step": 1100 }, { "epoch": 1.32, "learning_rate": 6.217145973801762e-06, "loss": 2.0787, "step": 1200 }, { "epoch": 1.43, "learning_rate": 5.635247944490186e-06, "loss": 2.0785, "step": 1300 }, { "epoch": 1.54, "learning_rate": 5.044439145017379e-06, "loss": 2.0739, "step": 1400 }, { "epoch": 1.65, "learning_rate": 4.453006987356882e-06, "loss": 2.0817, "step": 1500 }, { "epoch": 1.65, "eval_loss": 2.139988422393799, "eval_runtime": 135.0241, "eval_samples_per_second": 47.747, "eval_steps_per_second": 11.939, "step": 1500 }, { "epoch": 1.76, "learning_rate": 3.869247627472021e-06, "loss": 2.0758, "step": 1600 }, { "epoch": 1.87, "learning_rate": 3.3013495932132446e-06, "loss": 2.0758, "step": 1700 }, { "epoch": 1.97, "learning_rate": 2.7572789219399587e-06, "loss": 2.0732, "step": 1800 }, { "epoch": 2.09, "learning_rate": 2.2446674190680336e-06, "loss": 2.0324, "step": 1900 }, { "epoch": 2.2, "learning_rate": 1.7707056049662669e-06, "loss": 2.0075, "step": 2000 }, { "epoch": 2.2, "eval_loss": 2.139112949371338, "eval_runtime": 135.0198, "eval_samples_per_second": 47.749, "eval_steps_per_second": 11.939, "step": 2000 }, { "epoch": 2.3, "learning_rate": 1.3420418518604827e-06, "loss": 1.9987, "step": 2100 }, { "epoch": 2.41, "learning_rate": 9.646891255752928e-07, "loss": 2.0033, "step": 2200 }, { "epoch": 2.52, "learning_rate": 6.439406402687365e-07, "loss": 2.0071, "step": 2300 }, { "epoch": 2.63, "learning_rate": 3.8429560929037044e-07, "loss": 1.9992, "step": 2400 }, { "epoch": 2.74, "learning_rate": 1.893961336727451e-07, "loss": 2.0045, "step": 2500 }, { "epoch": 2.74, "eval_loss": 2.137310743331909, "eval_runtime": 134.9818, "eval_samples_per_second": 47.762, "eval_steps_per_second": 11.942, "step": 2500 }, { "epoch": 2.85, "learning_rate": 6.197611353601918e-08, "loss": 1.9924, "step": 2600 }, { "epoch": 2.96, "learning_rate": 3.822899037286276e-09, "loss": 1.997, "step": 2700 }, { "epoch": 3.0, "step": 2733, "total_flos": 1.3361242101108096e+18, "train_loss": 2.1056736130614966, "train_runtime": 28254.5864, "train_samples_per_second": 12.393, "train_steps_per_second": 0.097 } ], "max_steps": 2733, "num_train_epochs": 3, "total_flos": 1.3361242101108096e+18, "trial_name": null, "trial_params": null }