{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4877057508636456, "eval_steps": 100, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 8.994130006901536e-06, "loss": 0.3053, "step": 10 }, { "epoch": 0.03, "learning_rate": 8.976535341747906e-06, "loss": 0.1363, "step": 20 }, { "epoch": 0.05, "learning_rate": 8.947261907011569e-06, "loss": 0.1087, "step": 30 }, { "epoch": 0.07, "learning_rate": 8.906386073741352e-06, "loss": 0.106, "step": 40 }, { "epoch": 0.08, "learning_rate": 8.85401448231912e-06, "loss": 0.0791, "step": 50 }, { "epoch": 0.1, "learning_rate": 8.790283764247187e-06, "loss": 0.0702, "step": 60 }, { "epoch": 0.11, "learning_rate": 8.715360185692326e-06, "loss": 0.0773, "step": 70 }, { "epoch": 0.13, "learning_rate": 8.629439213716327e-06, "loss": 0.0583, "step": 80 }, { "epoch": 0.15, "learning_rate": 8.532745006324751e-06, "loss": 0.0637, "step": 90 }, { "epoch": 0.16, "learning_rate": 8.425529827664287e-06, "loss": 0.0554, "step": 100 }, { "epoch": 0.16, "eval_accuracy": 0.5, "eval_loss": 7.870811641330633e-11, "eval_runtime": 51.3409, "eval_samples_per_second": 85.195, "eval_steps_per_second": 10.654, "step": 100 }, { "epoch": 0.18, "learning_rate": 8.308073389894399e-06, "loss": 0.0666, "step": 110 }, { "epoch": 0.2, "learning_rate": 8.18068212345023e-06, "loss": 0.0598, "step": 120 }, { "epoch": 0.21, "learning_rate": 8.043688377600596e-06, "loss": 0.0423, "step": 130 }, { "epoch": 0.23, "learning_rate": 7.897449553386683e-06, "loss": 0.0311, "step": 140 }, { "epoch": 0.24, "learning_rate": 7.742347171203542e-06, "loss": 0.0527, "step": 150 }, { "epoch": 0.26, "learning_rate": 7.578785875456937e-06, "loss": 0.0847, "step": 160 }, { "epoch": 0.28, "learning_rate": 7.407192378892295e-06, "loss": 0.0595, "step": 170 }, { "epoch": 0.29, "learning_rate": 7.2280143493498716e-06, "loss": 0.0327, "step": 180 }, { "epoch": 0.31, "learning_rate": 7.041719241850471e-06, "loss": 0.0877, "step": 190 }, { "epoch": 0.33, "learning_rate": 6.848793079058672e-06, "loss": 0.0356, "step": 200 }, { "epoch": 0.33, "eval_accuracy": 0.5, "eval_loss": 9.303278147854144e-08, "eval_runtime": 53.2016, "eval_samples_per_second": 82.216, "eval_steps_per_second": 10.282, "step": 200 }, { "epoch": 0.34, "learning_rate": 6.649739183305183e-06, "loss": 0.0332, "step": 210 }, { "epoch": 0.36, "learning_rate": 6.445076863476361e-06, "loss": 0.0537, "step": 220 }, { "epoch": 0.37, "learning_rate": 6.235340060196612e-06, "loss": 0.0548, "step": 230 }, { "epoch": 0.39, "learning_rate": 6.021075952838263e-06, "loss": 0.0485, "step": 240 }, { "epoch": 0.41, "learning_rate": 5.802843531993069e-06, "loss": 0.041, "step": 250 }, { "epoch": 0.42, "learning_rate": 5.581212141129573e-06, "loss": 0.0266, "step": 260 }, { "epoch": 0.44, "learning_rate": 5.3567599912410075e-06, "loss": 0.0421, "step": 270 }, { "epoch": 0.46, "learning_rate": 5.130072652358832e-06, "loss": 0.0522, "step": 280 }, { "epoch": 0.47, "learning_rate": 4.901741525867374e-06, "loss": 0.0302, "step": 290 }, { "epoch": 0.49, "learning_rate": 4.67236230160516e-06, "loss": 0.0741, "step": 300 }, { "epoch": 0.49, "eval_accuracy": 0.0, "eval_loss": 2.2148276457301108e-07, "eval_runtime": 51.7845, "eval_samples_per_second": 84.465, "eval_steps_per_second": 10.563, "step": 300 } ], "logging_steps": 10, "max_steps": 615, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }