{ "best_metric": 0.15806905925273895, "best_model_checkpoint": "./results/checkpoint-975", "epoch": 14.285714285714286, "eval_steps": 25, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.1731, "step": 25 }, { "epoch": 0.36, "eval_loss": 1.4641685485839844, "eval_runtime": 2.4526, "eval_samples_per_second": 22.833, "eval_steps_per_second": 2.854, "step": 25 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 1.3444, "step": 50 }, { "epoch": 0.71, "eval_loss": 1.161795735359192, "eval_runtime": 2.4547, "eval_samples_per_second": 22.813, "eval_steps_per_second": 2.852, "step": 50 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 1.0438, "step": 75 }, { "epoch": 1.07, "eval_loss": 0.938343346118927, "eval_runtime": 2.4539, "eval_samples_per_second": 22.821, "eval_steps_per_second": 2.853, "step": 75 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 0.9378, "step": 100 }, { "epoch": 1.43, "eval_loss": 0.8406057357788086, "eval_runtime": 2.4541, "eval_samples_per_second": 22.819, "eval_steps_per_second": 2.852, "step": 100 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 0.8852, "step": 125 }, { "epoch": 1.79, "eval_loss": 0.7779091000556946, "eval_runtime": 2.4548, "eval_samples_per_second": 22.812, "eval_steps_per_second": 2.852, "step": 125 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.8243, "step": 150 }, { "epoch": 2.14, "eval_loss": 0.7215237021446228, "eval_runtime": 2.4543, "eval_samples_per_second": 22.817, "eval_steps_per_second": 2.852, "step": 150 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 0.7581, "step": 175 }, { "epoch": 2.5, "eval_loss": 0.6108285188674927, "eval_runtime": 2.4539, "eval_samples_per_second": 22.821, "eval_steps_per_second": 2.853, "step": 175 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 0.6965, "step": 200 }, { "epoch": 2.86, "eval_loss": 0.5861143469810486, "eval_runtime": 2.4542, "eval_samples_per_second": 22.818, "eval_steps_per_second": 2.852, "step": 200 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 0.6161, "step": 225 }, { "epoch": 3.21, "eval_loss": 0.5066039562225342, "eval_runtime": 2.4563, "eval_samples_per_second": 22.798, "eval_steps_per_second": 2.85, "step": 225 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 0.5444, "step": 250 }, { "epoch": 3.57, "eval_loss": 0.45327526330947876, "eval_runtime": 2.4538, "eval_samples_per_second": 22.822, "eval_steps_per_second": 2.853, "step": 250 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 0.5739, "step": 275 }, { "epoch": 3.93, "eval_loss": 0.4038705825805664, "eval_runtime": 2.4543, "eval_samples_per_second": 22.817, "eval_steps_per_second": 2.852, "step": 275 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 0.4352, "step": 300 }, { "epoch": 4.29, "eval_loss": 0.3711726665496826, "eval_runtime": 2.4536, "eval_samples_per_second": 22.824, "eval_steps_per_second": 2.853, "step": 300 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 0.4281, "step": 325 }, { "epoch": 4.64, "eval_loss": 0.3348071277141571, "eval_runtime": 2.4574, "eval_samples_per_second": 22.789, "eval_steps_per_second": 2.849, "step": 325 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 0.4371, "step": 350 }, { "epoch": 5.0, "eval_loss": 0.2986474633216858, "eval_runtime": 2.4531, "eval_samples_per_second": 22.829, "eval_steps_per_second": 2.854, "step": 350 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 0.3143, "step": 375 }, { "epoch": 5.36, "eval_loss": 0.29203733801841736, "eval_runtime": 2.454, "eval_samples_per_second": 22.82, "eval_steps_per_second": 2.853, "step": 375 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 0.3315, "step": 400 }, { "epoch": 5.71, "eval_loss": 0.26739758253097534, "eval_runtime": 2.4554, "eval_samples_per_second": 22.807, "eval_steps_per_second": 2.851, "step": 400 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 0.3224, "step": 425 }, { "epoch": 6.07, "eval_loss": 0.2381574958562851, "eval_runtime": 2.4593, "eval_samples_per_second": 22.771, "eval_steps_per_second": 2.846, "step": 425 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 0.2582, "step": 450 }, { "epoch": 6.43, "eval_loss": 0.2326308786869049, "eval_runtime": 2.4554, "eval_samples_per_second": 22.807, "eval_steps_per_second": 2.851, "step": 450 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 0.2889, "step": 475 }, { "epoch": 6.79, "eval_loss": 0.22920013964176178, "eval_runtime": 2.4566, "eval_samples_per_second": 22.796, "eval_steps_per_second": 2.85, "step": 475 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 0.2766, "step": 500 }, { "epoch": 7.14, "eval_loss": 0.22648800909519196, "eval_runtime": 2.4544, "eval_samples_per_second": 22.816, "eval_steps_per_second": 2.852, "step": 500 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 0.2476, "step": 525 }, { "epoch": 7.5, "eval_loss": 0.19777119159698486, "eval_runtime": 2.4535, "eval_samples_per_second": 22.824, "eval_steps_per_second": 2.853, "step": 525 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 0.2383, "step": 550 }, { "epoch": 7.86, "eval_loss": 0.1977979987859726, "eval_runtime": 2.4538, "eval_samples_per_second": 22.821, "eval_steps_per_second": 2.853, "step": 550 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 0.2318, "step": 575 }, { "epoch": 8.21, "eval_loss": 0.19671748578548431, "eval_runtime": 2.4545, "eval_samples_per_second": 22.815, "eval_steps_per_second": 2.852, "step": 575 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 0.2159, "step": 600 }, { "epoch": 8.57, "eval_loss": 0.1927504688501358, "eval_runtime": 2.4546, "eval_samples_per_second": 22.815, "eval_steps_per_second": 2.852, "step": 600 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 0.2332, "step": 625 }, { "epoch": 8.93, "eval_loss": 0.18296389281749725, "eval_runtime": 2.4543, "eval_samples_per_second": 22.817, "eval_steps_per_second": 2.852, "step": 625 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 0.2088, "step": 650 }, { "epoch": 9.29, "eval_loss": 0.19335438311100006, "eval_runtime": 2.4553, "eval_samples_per_second": 22.808, "eval_steps_per_second": 2.851, "step": 650 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 0.2072, "step": 675 }, { "epoch": 9.64, "eval_loss": 0.18041561543941498, "eval_runtime": 2.4547, "eval_samples_per_second": 22.813, "eval_steps_per_second": 2.852, "step": 675 }, { "epoch": 10.0, "learning_rate": 0.0002, "loss": 0.2233, "step": 700 }, { "epoch": 10.0, "eval_loss": 0.18061913549900055, "eval_runtime": 2.454, "eval_samples_per_second": 22.82, "eval_steps_per_second": 2.853, "step": 700 }, { "epoch": 10.36, "learning_rate": 0.0002, "loss": 0.1796, "step": 725 }, { "epoch": 10.36, "eval_loss": 0.17700645327568054, "eval_runtime": 2.4539, "eval_samples_per_second": 22.821, "eval_steps_per_second": 2.853, "step": 725 }, { "epoch": 10.71, "learning_rate": 0.0002, "loss": 0.215, "step": 750 }, { "epoch": 10.71, "eval_loss": 0.18451173603534698, "eval_runtime": 2.4555, "eval_samples_per_second": 22.806, "eval_steps_per_second": 2.851, "step": 750 }, { "epoch": 11.07, "learning_rate": 0.0002, "loss": 0.1894, "step": 775 }, { "epoch": 11.07, "eval_loss": 0.17726834118366241, "eval_runtime": 2.4547, "eval_samples_per_second": 22.813, "eval_steps_per_second": 2.852, "step": 775 }, { "epoch": 11.43, "learning_rate": 0.0002, "loss": 0.1899, "step": 800 }, { "epoch": 11.43, "eval_loss": 0.17982231080532074, "eval_runtime": 2.4541, "eval_samples_per_second": 22.819, "eval_steps_per_second": 2.852, "step": 800 }, { "epoch": 11.79, "learning_rate": 0.0002, "loss": 0.2009, "step": 825 }, { "epoch": 11.79, "eval_loss": 0.1710078865289688, "eval_runtime": 2.4543, "eval_samples_per_second": 22.817, "eval_steps_per_second": 2.852, "step": 825 }, { "epoch": 12.14, "learning_rate": 0.0002, "loss": 0.1859, "step": 850 }, { "epoch": 12.14, "eval_loss": 0.1884012222290039, "eval_runtime": 2.4546, "eval_samples_per_second": 22.814, "eval_steps_per_second": 2.852, "step": 850 }, { "epoch": 12.5, "learning_rate": 0.0002, "loss": 0.1854, "step": 875 }, { "epoch": 12.5, "eval_loss": 0.16743424534797668, "eval_runtime": 2.4539, "eval_samples_per_second": 22.821, "eval_steps_per_second": 2.853, "step": 875 }, { "epoch": 12.86, "learning_rate": 0.0002, "loss": 0.191, "step": 900 }, { "epoch": 12.86, "eval_loss": 0.16949959099292755, "eval_runtime": 2.4543, "eval_samples_per_second": 22.817, "eval_steps_per_second": 2.852, "step": 900 }, { "epoch": 13.21, "learning_rate": 0.0002, "loss": 0.1912, "step": 925 }, { "epoch": 13.21, "eval_loss": 0.15851029753684998, "eval_runtime": 2.4557, "eval_samples_per_second": 22.804, "eval_steps_per_second": 2.851, "step": 925 }, { "epoch": 13.57, "learning_rate": 0.0002, "loss": 0.1763, "step": 950 }, { "epoch": 13.57, "eval_loss": 0.17656584084033966, "eval_runtime": 2.4541, "eval_samples_per_second": 22.819, "eval_steps_per_second": 2.852, "step": 950 }, { "epoch": 13.93, "learning_rate": 0.0002, "loss": 0.1953, "step": 975 }, { "epoch": 13.93, "eval_loss": 0.15806905925273895, "eval_runtime": 2.455, "eval_samples_per_second": 22.811, "eval_steps_per_second": 2.851, "step": 975 }, { "epoch": 14.29, "learning_rate": 0.0002, "loss": 0.1732, "step": 1000 }, { "epoch": 14.29, "eval_loss": 0.18219564855098724, "eval_runtime": 2.4533, "eval_samples_per_second": 22.826, "eval_steps_per_second": 2.853, "step": 1000 } ], "logging_steps": 25, "max_steps": 1000, "num_train_epochs": 15, "save_steps": 25, "total_flos": 239052193136640.0, "trial_name": null, "trial_params": null }