{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 1689, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002, "loss": 0.7526, "step": 1 }, { "epoch": 0.09, "eval_f1": 0.7867768595041322, "eval_loss": 0.501287043094635, "eval_runtime": 111.669, "eval_samples_per_second": 8.955, "eval_steps_per_second": 1.119, "step": 48 }, { "epoch": 0.17, "eval_f1": 0.731527093596059, "eval_loss": 0.45942792296409607, "eval_runtime": 14.4836, "eval_samples_per_second": 69.044, "eval_steps_per_second": 8.63, "step": 96 }, { "epoch": 0.26, "eval_f1": 0.768503937007874, "eval_loss": 0.6004758477210999, "eval_runtime": 13.7722, "eval_samples_per_second": 72.61, "eval_steps_per_second": 9.076, "step": 144 }, { "epoch": 0.34, "eval_f1": 0.8773784355179703, "eval_loss": 0.293561190366745, "eval_runtime": 16.0256, "eval_samples_per_second": 62.4, "eval_steps_per_second": 7.8, "step": 192 }, { "epoch": 0.43, "eval_f1": 0.8644444444444445, "eval_loss": 0.3103949725627899, "eval_runtime": 13.1537, "eval_samples_per_second": 76.024, "eval_steps_per_second": 9.503, "step": 240 }, { "epoch": 0.51, "eval_f1": 0.903353057199211, "eval_loss": 0.2314113825559616, "eval_runtime": 13.9609, "eval_samples_per_second": 71.629, "eval_steps_per_second": 8.954, "step": 288 }, { "epoch": 0.6, "eval_f1": 0.904950495049505, "eval_loss": 0.23260511457920074, "eval_runtime": 10.3047, "eval_samples_per_second": 97.043, "eval_steps_per_second": 12.13, "step": 336 }, { "epoch": 0.68, "eval_f1": 0.8210526315789474, "eval_loss": 0.39369526505470276, "eval_runtime": 13.155, "eval_samples_per_second": 76.017, "eval_steps_per_second": 9.502, "step": 384 }, { "epoch": 0.77, "eval_f1": 0.9147609147609148, "eval_loss": 0.20550310611724854, "eval_runtime": 45.6928, "eval_samples_per_second": 21.885, "eval_steps_per_second": 2.736, "step": 432 }, { "epoch": 0.85, "eval_f1": 0.8367816091954022, "eval_loss": 0.3017582893371582, "eval_runtime": 22.6976, "eval_samples_per_second": 44.057, "eval_steps_per_second": 5.507, "step": 480 }, { "epoch": 0.94, "eval_f1": 0.9271653543307087, "eval_loss": 0.19784249365329742, "eval_runtime": 12.076, "eval_samples_per_second": 82.809, "eval_steps_per_second": 10.351, "step": 528 }, { "epoch": 1.02, "eval_f1": 0.9368104312938816, "eval_loss": 0.17989766597747803, "eval_runtime": 49.8486, "eval_samples_per_second": 20.061, "eval_steps_per_second": 2.508, "step": 576 }, { "epoch": 1.11, "eval_f1": 0.895424836601307, "eval_loss": 0.23422278463840485, "eval_runtime": 11.6376, "eval_samples_per_second": 85.928, "eval_steps_per_second": 10.741, "step": 624 }, { "epoch": 1.19, "eval_f1": 0.9256360078277887, "eval_loss": 0.22150222957134247, "eval_runtime": 11.4494, "eval_samples_per_second": 87.341, "eval_steps_per_second": 10.918, "step": 672 }, { "epoch": 1.28, "eval_f1": 0.9051724137931034, "eval_loss": 0.22654065489768982, "eval_runtime": 11.6176, "eval_samples_per_second": 86.076, "eval_steps_per_second": 10.76, "step": 720 }, { "epoch": 1.36, "eval_f1": 0.9389389389389389, "eval_loss": 0.1583857387304306, "eval_runtime": 15.3861, "eval_samples_per_second": 64.994, "eval_steps_per_second": 8.124, "step": 768 }, { "epoch": 1.45, "eval_f1": 0.921487603305785, "eval_loss": 0.22388193011283875, "eval_runtime": 14.1559, "eval_samples_per_second": 70.642, "eval_steps_per_second": 8.83, "step": 816 }, { "epoch": 1.53, "eval_f1": 0.9282868525896415, "eval_loss": 0.1805724799633026, "eval_runtime": 11.7527, "eval_samples_per_second": 85.087, "eval_steps_per_second": 10.636, "step": 864 }, { "epoch": 1.62, "eval_f1": 0.8998899889988998, "eval_loss": 0.22186490893363953, "eval_runtime": 11.5055, "eval_samples_per_second": 86.915, "eval_steps_per_second": 10.864, "step": 912 }, { "epoch": 1.71, "eval_f1": 0.9466263846928499, "eval_loss": 0.15720757842063904, "eval_runtime": 11.3153, "eval_samples_per_second": 88.376, "eval_steps_per_second": 11.047, "step": 960 }, { "epoch": 1.79, "eval_f1": 0.9457523029682703, "eval_loss": 0.15362103283405304, "eval_runtime": 11.6626, "eval_samples_per_second": 85.744, "eval_steps_per_second": 10.718, "step": 1008 }, { "epoch": 1.88, "eval_f1": 0.9403714565004888, "eval_loss": 0.15603256225585938, "eval_runtime": 11.4304, "eval_samples_per_second": 87.486, "eval_steps_per_second": 10.936, "step": 1056 }, { "epoch": 1.96, "eval_f1": 0.9567901234567902, "eval_loss": 0.13539032638072968, "eval_runtime": 11.6426, "eval_samples_per_second": 85.891, "eval_steps_per_second": 10.736, "step": 1104 }, { "epoch": 2.0, "learning_rate": 6.714031971580817e-05, "loss": 0.2638, "step": 1125 }, { "epoch": 2.05, "eval_f1": 0.9402390438247011, "eval_loss": 0.2028977870941162, "eval_runtime": 11.4905, "eval_samples_per_second": 87.029, "eval_steps_per_second": 10.879, "step": 1152 }, { "epoch": 2.13, "eval_f1": 0.9477911646586347, "eval_loss": 0.158633291721344, "eval_runtime": 11.6696, "eval_samples_per_second": 85.693, "eval_steps_per_second": 10.712, "step": 1200 }, { "epoch": 2.22, "eval_f1": 0.9363920750782065, "eval_loss": 0.16604219377040863, "eval_runtime": 11.6366, "eval_samples_per_second": 85.936, "eval_steps_per_second": 10.742, "step": 1248 }, { "epoch": 2.3, "eval_f1": 0.946611909650924, "eval_loss": 0.16741609573364258, "eval_runtime": 11.5595, "eval_samples_per_second": 86.509, "eval_steps_per_second": 10.814, "step": 1296 }, { "epoch": 2.39, "eval_f1": 0.9566094853683148, "eval_loss": 0.15039804577827454, "eval_runtime": 11.5775, "eval_samples_per_second": 86.374, "eval_steps_per_second": 10.797, "step": 1344 }, { "epoch": 2.47, "eval_f1": 0.9386892177589852, "eval_loss": 0.1896335780620575, "eval_runtime": 11.4785, "eval_samples_per_second": 87.12, "eval_steps_per_second": 10.89, "step": 1392 }, { "epoch": 2.56, "eval_f1": 0.9516129032258065, "eval_loss": 0.1552370935678482, "eval_runtime": 11.6346, "eval_samples_per_second": 85.951, "eval_steps_per_second": 10.744, "step": 1440 }, { "epoch": 2.64, "eval_f1": 0.9494949494949494, "eval_loss": 0.17198829352855682, "eval_runtime": 11.4094, "eval_samples_per_second": 87.647, "eval_steps_per_second": 10.956, "step": 1488 }, { "epoch": 2.73, "eval_f1": 0.9596774193548386, "eval_loss": 0.14362381398677826, "eval_runtime": 13.3792, "eval_samples_per_second": 74.743, "eval_steps_per_second": 9.343, "step": 1536 }, { "epoch": 2.81, "eval_f1": 0.9530469530469531, "eval_loss": 0.15035580098628998, "eval_runtime": 12.9698, "eval_samples_per_second": 77.102, "eval_steps_per_second": 9.638, "step": 1584 }, { "epoch": 2.9, "eval_f1": 0.9538461538461538, "eval_loss": 0.1516994833946228, "eval_runtime": 11.9429, "eval_samples_per_second": 83.732, "eval_steps_per_second": 10.466, "step": 1632 }, { "epoch": 2.98, "eval_f1": 0.9538461538461538, "eval_loss": 0.15105971693992615, "eval_runtime": 13.9387, "eval_samples_per_second": 71.743, "eval_steps_per_second": 8.968, "step": 1680 }, { "epoch": 3.0, "step": 1689, "total_flos": 4.178103906557952e+18, "train_loss": 0.20243481533847807, "train_runtime": 2207.1281, "train_samples_per_second": 24.466, "train_steps_per_second": 0.765 } ], "max_steps": 1689, "num_train_epochs": 3, "total_flos": 4.178103906557952e+18, "trial_name": null, "trial_params": null }