{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "global_step": 31250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 1.9811272141706926e-05, "loss": 5.5901, "step": 500 }, { "epoch": 0.8, "learning_rate": 1.9489855072463772e-05, "loss": 3.3533, "step": 1000 }, { "epoch": 1.2, "learning_rate": 1.916779388083736e-05, "loss": 2.693, "step": 1500 }, { "epoch": 1.6, "learning_rate": 1.8847665056360712e-05, "loss": 2.1327, "step": 2000 }, { "epoch": 2.0, "learning_rate": 1.8525603864734302e-05, "loss": 1.8894, "step": 2500 }, { "epoch": 2.4, "learning_rate": 1.8204186795491144e-05, "loss": 1.7199, "step": 3000 }, { "epoch": 2.8, "learning_rate": 1.788276972624799e-05, "loss": 1.6236, "step": 3500 }, { "epoch": 3.2, "learning_rate": 1.7561996779388084e-05, "loss": 1.5586, "step": 4000 }, { "epoch": 3.6, "learning_rate": 1.724057971014493e-05, "loss": 1.4624, "step": 4500 }, { "epoch": 4.0, "learning_rate": 1.6919162640901772e-05, "loss": 1.4674, "step": 5000 }, { "epoch": 4.4, "learning_rate": 1.6636392914653787e-05, "loss": 1.452, "step": 5500 }, { "epoch": 4.8, "learning_rate": 1.6371014492753626e-05, "loss": 1.4946, "step": 6000 }, { "epoch": 5.2, "learning_rate": 1.6048953301127216e-05, "loss": 0.0, "step": 6500 }, { "epoch": 5.6, "learning_rate": 1.5726892109500806e-05, "loss": 0.0, "step": 7000 }, { "epoch": 6.0, "learning_rate": 1.54048309178744e-05, "loss": 0.0, "step": 7500 }, { "epoch": 6.4, "learning_rate": 1.5082769726247988e-05, "loss": 0.0, "step": 8000 }, { "epoch": 6.8, "learning_rate": 1.476070853462158e-05, "loss": 0.0, "step": 8500 }, { "epoch": 7.2, "learning_rate": 1.4438647342995172e-05, "loss": 0.0, "step": 9000 }, { "epoch": 7.6, "learning_rate": 1.411658615136876e-05, "loss": 0.0, "step": 9500 }, { "epoch": 8.0, "learning_rate": 1.3794524959742352e-05, "loss": 0.0, "step": 10000 }, { "epoch": 8.4, "learning_rate": 1.3472463768115942e-05, "loss": 0.0, "step": 10500 }, { "epoch": 8.8, "learning_rate": 1.3150402576489534e-05, "loss": 0.0, "step": 11000 }, { "epoch": 9.2, "learning_rate": 1.2828341384863126e-05, "loss": 0.0, "step": 11500 }, { "epoch": 9.6, "learning_rate": 1.2506280193236716e-05, "loss": 0.0, "step": 12000 }, { "epoch": 10.0, "learning_rate": 1.2184219001610308e-05, "loss": 0.0, "step": 12500 }, { "epoch": 10.4, "learning_rate": 1.18621578099839e-05, "loss": 0.0, "step": 13000 }, { "epoch": 10.8, "learning_rate": 1.1540096618357488e-05, "loss": 0.0, "step": 13500 }, { "epoch": 11.2, "learning_rate": 1.121803542673108e-05, "loss": 0.0, "step": 14000 }, { "epoch": 11.6, "learning_rate": 1.0895974235104671e-05, "loss": 0.0, "step": 14500 }, { "epoch": 12.0, "learning_rate": 1.0573913043478262e-05, "loss": 0.0, "step": 15000 }, { "epoch": 12.4, "learning_rate": 1.0251851851851853e-05, "loss": 0.0, "step": 15500 }, { "epoch": 12.8, "learning_rate": 9.929790660225444e-06, "loss": 0.0, "step": 16000 }, { "epoch": 13.2, "learning_rate": 9.607729468599034e-06, "loss": 0.0, "step": 16500 }, { "epoch": 13.6, "learning_rate": 9.285668276972625e-06, "loss": 0.0, "step": 17000 }, { "epoch": 14.0, "learning_rate": 8.963607085346217e-06, "loss": 0.0, "step": 17500 }, { "epoch": 14.4, "learning_rate": 8.641545893719807e-06, "loss": 0.0, "step": 18000 }, { "epoch": 14.8, "learning_rate": 8.3194847020934e-06, "loss": 0.0, "step": 18500 }, { "epoch": 15.2, "learning_rate": 7.99742351046699e-06, "loss": 0.0, "step": 19000 }, { "epoch": 15.6, "learning_rate": 7.67536231884058e-06, "loss": 0.0, "step": 19500 }, { "epoch": 16.0, "learning_rate": 7.3533011272141705e-06, "loss": 0.0, "step": 20000 }, { "epoch": 16.4, "learning_rate": 7.031239935587762e-06, "loss": 0.0, "step": 20500 }, { "epoch": 16.8, "learning_rate": 6.709178743961353e-06, "loss": 0.0, "step": 21000 }, { "epoch": 17.2, "learning_rate": 6.3871175523349435e-06, "loss": 0.0, "step": 21500 }, { "epoch": 17.6, "learning_rate": 6.065056360708535e-06, "loss": 0.0, "step": 22000 }, { "epoch": 18.0, "learning_rate": 5.742995169082126e-06, "loss": 0.0, "step": 22500 }, { "epoch": 18.4, "learning_rate": 5.420933977455716e-06, "loss": 0.0, "step": 23000 }, { "epoch": 18.8, "learning_rate": 5.098872785829307e-06, "loss": 0.0, "step": 23500 }, { "epoch": 19.2, "learning_rate": 4.776811594202899e-06, "loss": 0.0, "step": 24000 }, { "epoch": 19.6, "learning_rate": 4.45475040257649e-06, "loss": 0.0, "step": 24500 }, { "epoch": 20.0, "learning_rate": 4.132689210950081e-06, "loss": 0.0, "step": 25000 }, { "epoch": 20.4, "learning_rate": 3.8106280193236717e-06, "loss": 0.0, "step": 25500 }, { "epoch": 20.8, "learning_rate": 3.4885668276972627e-06, "loss": 0.0, "step": 26000 }, { "epoch": 21.2, "learning_rate": 3.1665056360708537e-06, "loss": 0.0, "step": 26500 }, { "epoch": 21.6, "learning_rate": 2.8444444444444446e-06, "loss": 0.0, "step": 27000 }, { "epoch": 22.0, "learning_rate": 2.522383252818036e-06, "loss": 0.0, "step": 27500 }, { "epoch": 22.4, "learning_rate": 2.2003220611916266e-06, "loss": 0.0, "step": 28000 }, { "epoch": 22.8, "learning_rate": 1.8782608695652174e-06, "loss": 0.0, "step": 28500 }, { "epoch": 23.2, "learning_rate": 1.5561996779388086e-06, "loss": 0.0, "step": 29000 }, { "epoch": 23.6, "learning_rate": 1.2341384863123995e-06, "loss": 0.0, "step": 29500 }, { "epoch": 24.0, "learning_rate": 9.120772946859904e-07, "loss": 0.0, "step": 30000 }, { "epoch": 24.4, "learning_rate": 5.900161030595814e-07, "loss": 0.0, "step": 30500 }, { "epoch": 24.8, "learning_rate": 2.679549114331723e-07, "loss": 0.0, "step": 31000 }, { "epoch": 25.0, "step": 31250, "total_flos": 2.3280874488575558e+19, "train_loss": 0.42298770703125, "train_runtime": 13299.417, "train_samples_per_second": 18.798, "train_steps_per_second": 2.35 } ], "max_steps": 31250, "num_train_epochs": 25, "total_flos": 2.3280874488575558e+19, "trial_name": null, "trial_params": null }