{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 5600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8955357074737549, "eval_loss": 0.33214062452316284, "eval_runtime": 7.5153, "eval_samples_per_second": 149.029, "eval_steps_per_second": 18.629, "step": 280 }, { "epoch": 1.79, "learning_rate": 2.743006199096114e-05, "loss": 0.3501, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9142857193946838, "eval_loss": 0.29908323287963867, "eval_runtime": 7.7634, "eval_samples_per_second": 144.266, "eval_steps_per_second": 18.033, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.9008928537368774, "eval_loss": 0.2781011164188385, "eval_runtime": 7.6341, "eval_samples_per_second": 146.71, "eval_steps_per_second": 18.339, "step": 840 }, { "epoch": 3.57, "learning_rate": 2.474084022714142e-05, "loss": 0.1794, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.9107142686843872, "eval_loss": 0.37903639674186707, "eval_runtime": 7.6349, "eval_samples_per_second": 146.696, "eval_steps_per_second": 18.337, "step": 1120 }, { "epoch": 5.0, "eval_accuracy": 0.9214285612106323, "eval_loss": 0.40712061524391174, "eval_runtime": 7.4748, "eval_samples_per_second": 149.837, "eval_steps_per_second": 18.73, "step": 1400 }, { "epoch": 5.36, "learning_rate": 2.2051618463321703e-05, "loss": 0.0871, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.9160714149475098, "eval_loss": 0.576979398727417, "eval_runtime": 7.7299, "eval_samples_per_second": 144.892, "eval_steps_per_second": 18.112, "step": 1680 }, { "epoch": 7.0, "eval_accuracy": 0.9116071462631226, "eval_loss": 0.6520564556121826, "eval_runtime": 7.614, "eval_samples_per_second": 147.098, "eval_steps_per_second": 18.387, "step": 1960 }, { "epoch": 7.14, "learning_rate": 1.9362396699501985e-05, "loss": 0.0437, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.9196428656578064, "eval_loss": 0.6204714179039001, "eval_runtime": 7.8377, "eval_samples_per_second": 142.899, "eval_steps_per_second": 17.862, "step": 2240 }, { "epoch": 8.93, "learning_rate": 1.6673174935682264e-05, "loss": 0.036, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.9169642925262451, "eval_loss": 0.5968001484870911, "eval_runtime": 7.7266, "eval_samples_per_second": 144.953, "eval_steps_per_second": 18.119, "step": 2520 }, { "epoch": 10.0, "eval_accuracy": 0.9214285612106323, "eval_loss": 0.6099338531494141, "eval_runtime": 7.5404, "eval_samples_per_second": 148.534, "eval_steps_per_second": 18.567, "step": 2800 }, { "epoch": 10.71, "learning_rate": 1.3983953171862545e-05, "loss": 0.0199, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.9169642925262451, "eval_loss": 0.6376703381538391, "eval_runtime": 7.6099, "eval_samples_per_second": 147.177, "eval_steps_per_second": 18.397, "step": 3080 }, { "epoch": 12.0, "eval_accuracy": 0.9205357432365417, "eval_loss": 0.6757161021232605, "eval_runtime": 7.6257, "eval_samples_per_second": 146.872, "eval_steps_per_second": 18.359, "step": 3360 }, { "epoch": 12.5, "learning_rate": 1.1294731408042823e-05, "loss": 0.0198, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.9205357432365417, "eval_loss": 0.6298871636390686, "eval_runtime": 7.5908, "eval_samples_per_second": 147.548, "eval_steps_per_second": 18.443, "step": 3640 }, { "epoch": 14.0, "eval_accuracy": 0.9125000238418579, "eval_loss": 0.693723201751709, "eval_runtime": 7.7616, "eval_samples_per_second": 144.301, "eval_steps_per_second": 18.038, "step": 3920 }, { "epoch": 14.29, "learning_rate": 8.605509644223102e-06, "loss": 0.0119, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.9169642925262451, "eval_loss": 0.6261208653450012, "eval_runtime": 7.7494, "eval_samples_per_second": 144.528, "eval_steps_per_second": 18.066, "step": 4200 }, { "epoch": 16.0, "eval_accuracy": 0.9160714149475098, "eval_loss": 0.7174915671348572, "eval_runtime": 7.862, "eval_samples_per_second": 142.457, "eval_steps_per_second": 17.807, "step": 4480 }, { "epoch": 16.07, "learning_rate": 5.916287880403384e-06, "loss": 0.0065, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.9169642925262451, "eval_loss": 0.7007285952568054, "eval_runtime": 7.7124, "eval_samples_per_second": 145.221, "eval_steps_per_second": 18.153, "step": 4760 }, { "epoch": 17.86, "learning_rate": 3.2270661165836636e-06, "loss": 0.0069, "step": 5000 }, { "epoch": 18.0, "eval_accuracy": 0.918749988079071, "eval_loss": 0.7041569352149963, "eval_runtime": 7.8136, "eval_samples_per_second": 143.339, "eval_steps_per_second": 17.917, "step": 5040 }, { "epoch": 19.0, "eval_accuracy": 0.9107142686843872, "eval_loss": 0.732833981513977, "eval_runtime": 7.5262, "eval_samples_per_second": 148.814, "eval_steps_per_second": 18.602, "step": 5320 }, { "epoch": 19.64, "learning_rate": 5.378443527639439e-07, "loss": 0.0034, "step": 5500 }, { "epoch": 20.0, "eval_accuracy": 0.9116071462631226, "eval_loss": 0.7286208271980286, "eval_runtime": 7.6221, "eval_samples_per_second": 146.941, "eval_steps_per_second": 18.368, "step": 5600 }, { "epoch": 20.0, "step": 5600, "total_flos": 5893793336524800.0, "train_loss": 0.06829224105924368, "train_runtime": 4007.6255, "train_samples_per_second": 22.357, "train_steps_per_second": 1.397 } ], "max_steps": 5600, "num_train_epochs": 20, "total_flos": 5893793336524800.0, "trial_name": null, "trial_params": null }