{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.827586206896552, "eval_steps": 2, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0001, "loss": 0.7533, "step": 1 }, { "epoch": 0.28, "learning_rate": 9.978670881475172e-05, "loss": 0.8025, "step": 2 }, { "epoch": 0.28, "eval_loss": 0.6696242094039917, "eval_runtime": 44.2051, "eval_samples_per_second": 1.968, "eval_steps_per_second": 1.968, "step": 2 }, { "epoch": 0.41, "learning_rate": 9.91486549841951e-05, "loss": 0.6626, "step": 3 }, { "epoch": 0.55, "learning_rate": 9.809128215864097e-05, "loss": 0.6912, "step": 4 }, { "epoch": 0.55, "eval_loss": 0.6283016800880432, "eval_runtime": 44.1458, "eval_samples_per_second": 1.971, "eval_steps_per_second": 1.971, "step": 4 }, { "epoch": 0.69, "learning_rate": 9.662361147021779e-05, "loss": 0.5708, "step": 5 }, { "epoch": 0.83, "learning_rate": 9.475816456775313e-05, "loss": 0.6832, "step": 6 }, { "epoch": 0.83, "eval_loss": 0.6122540235519409, "eval_runtime": 44.1996, "eval_samples_per_second": 1.968, "eval_steps_per_second": 1.968, "step": 6 }, { "epoch": 0.97, "learning_rate": 9.251085678648072e-05, "loss": 0.601, "step": 7 }, { "epoch": 1.1, "learning_rate": 8.9900861364012e-05, "loss": 0.6154, "step": 8 }, { "epoch": 1.1, "eval_loss": 0.6039846539497375, "eval_runtime": 44.163, "eval_samples_per_second": 1.97, "eval_steps_per_second": 1.97, "step": 8 }, { "epoch": 1.24, "learning_rate": 8.695044586103296e-05, "loss": 0.5479, "step": 9 }, { "epoch": 1.38, "learning_rate": 8.368478218232787e-05, "loss": 0.5438, "step": 10 }, { "epoch": 1.38, "eval_loss": 0.5960671901702881, "eval_runtime": 44.0267, "eval_samples_per_second": 1.976, "eval_steps_per_second": 1.976, "step": 10 }, { "epoch": 1.52, "learning_rate": 8.013173181896283e-05, "loss": 0.6171, "step": 11 }, { "epoch": 1.66, "learning_rate": 7.63216081438678e-05, "loss": 0.5781, "step": 12 }, { "epoch": 1.66, "eval_loss": 0.5879073143005371, "eval_runtime": 43.9085, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 12 }, { "epoch": 1.79, "learning_rate": 7.228691778882693e-05, "loss": 0.5811, "step": 13 }, { "epoch": 1.93, "learning_rate": 6.806208330935766e-05, "loss": 0.4899, "step": 14 }, { "epoch": 1.93, "eval_loss": 0.5821757912635803, "eval_runtime": 43.9108, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 14 }, { "epoch": 2.07, "learning_rate": 6.368314950360415e-05, "loss": 0.4649, "step": 15 }, { "epoch": 2.21, "learning_rate": 5.918747589082853e-05, "loss": 0.5615, "step": 16 }, { "epoch": 2.21, "eval_loss": 0.5788118839263916, "eval_runtime": 43.9641, "eval_samples_per_second": 1.979, "eval_steps_per_second": 1.979, "step": 16 }, { "epoch": 2.34, "learning_rate": 5.4613417973165106e-05, "loss": 0.4919, "step": 17 }, { "epoch": 2.48, "learning_rate": 5e-05, "loss": 0.4755, "step": 18 }, { "epoch": 2.48, "eval_loss": 0.5790432691574097, "eval_runtime": 43.9953, "eval_samples_per_second": 1.977, "eval_steps_per_second": 1.977, "step": 18 }, { "epoch": 2.62, "learning_rate": 4.5386582026834906e-05, "loss": 0.4525, "step": 19 }, { "epoch": 2.76, "learning_rate": 4.0812524109171476e-05, "loss": 0.4665, "step": 20 }, { "epoch": 2.76, "eval_loss": 0.5807910561561584, "eval_runtime": 44.0437, "eval_samples_per_second": 1.975, "eval_steps_per_second": 1.975, "step": 20 }, { "epoch": 2.9, "learning_rate": 3.631685049639586e-05, "loss": 0.5079, "step": 21 }, { "epoch": 3.03, "learning_rate": 3.1937916690642356e-05, "loss": 0.4525, "step": 22 }, { "epoch": 3.03, "eval_loss": 0.5798379182815552, "eval_runtime": 44.0127, "eval_samples_per_second": 1.977, "eval_steps_per_second": 1.977, "step": 22 }, { "epoch": 3.17, "learning_rate": 2.771308221117309e-05, "loss": 0.4697, "step": 23 }, { "epoch": 3.31, "learning_rate": 2.3678391856132204e-05, "loss": 0.4508, "step": 24 }, { "epoch": 3.31, "eval_loss": 0.5790860056877136, "eval_runtime": 44.0414, "eval_samples_per_second": 1.975, "eval_steps_per_second": 1.975, "step": 24 }, { "epoch": 3.45, "learning_rate": 1.9868268181037185e-05, "loss": 0.4566, "step": 25 }, { "epoch": 3.59, "learning_rate": 1.631521781767214e-05, "loss": 0.4934, "step": 26 }, { "epoch": 3.59, "eval_loss": 0.5790104269981384, "eval_runtime": 44.0612, "eval_samples_per_second": 1.975, "eval_steps_per_second": 1.975, "step": 26 }, { "epoch": 3.72, "learning_rate": 1.3049554138967051e-05, "loss": 0.3934, "step": 27 }, { "epoch": 3.86, "learning_rate": 1.0099138635988026e-05, "loss": 0.4291, "step": 28 }, { "epoch": 3.86, "eval_loss": 0.5794057846069336, "eval_runtime": 44.0534, "eval_samples_per_second": 1.975, "eval_steps_per_second": 1.975, "step": 28 }, { "epoch": 4.0, "learning_rate": 7.489143213519301e-06, "loss": 0.4485, "step": 29 }, { "epoch": 4.14, "learning_rate": 5.241835432246889e-06, "loss": 0.4624, "step": 30 }, { "epoch": 4.14, "eval_loss": 0.5792465806007385, "eval_runtime": 44.0238, "eval_samples_per_second": 1.976, "eval_steps_per_second": 1.976, "step": 30 }, { "epoch": 4.28, "learning_rate": 3.376388529782215e-06, "loss": 0.449, "step": 31 }, { "epoch": 4.41, "learning_rate": 1.908717841359048e-06, "loss": 0.4353, "step": 32 }, { "epoch": 4.41, "eval_loss": 0.579724907875061, "eval_runtime": 43.9242, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 32 }, { "epoch": 4.55, "learning_rate": 8.513450158049108e-07, "loss": 0.4108, "step": 33 }, { "epoch": 4.69, "learning_rate": 2.1329118524827662e-07, "loss": 0.4546, "step": 34 }, { "epoch": 4.69, "eval_loss": 0.5798984169960022, "eval_runtime": 43.9248, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 34 }, { "epoch": 4.83, "learning_rate": 0.0, "loss": 0.4189, "step": 35 }, { "epoch": 4.83, "step": 35, "total_flos": 9.3962984585429e+16, "train_loss": 0.5252525525433677, "train_runtime": 2623.1378, "train_samples_per_second": 0.66, "train_steps_per_second": 0.013 } ], "logging_steps": 1.0, "max_steps": 35, "num_train_epochs": 5, "save_steps": 4, "total_flos": 9.3962984585429e+16, "trial_name": null, "trial_params": null }