{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4050222762251924, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04050222762251924, "eval_accuracy": 0.07313909615825066, "eval_loss": 5.535554885864258, "eval_runtime": 288.6011, "eval_samples_per_second": 14.075, "eval_steps_per_second": 0.44, "step": 100 }, { "epoch": 0.08100445524503848, "eval_accuracy": 0.09321176989128148, "eval_loss": 5.312517166137695, "eval_runtime": 289.0997, "eval_samples_per_second": 14.051, "eval_steps_per_second": 0.439, "step": 200 }, { "epoch": 0.12150668286755771, "eval_accuracy": 0.1025266924717497, "eval_loss": 5.209850788116455, "eval_runtime": 288.9641, "eval_samples_per_second": 14.057, "eval_steps_per_second": 0.44, "step": 300 }, { "epoch": 0.16200891049007696, "eval_accuracy": 0.10690876214872297, "eval_loss": 5.157820224761963, "eval_runtime": 288.9416, "eval_samples_per_second": 14.058, "eval_steps_per_second": 0.44, "step": 400 }, { "epoch": 0.2025111381125962, "grad_norm": 4.206667900085449, "learning_rate": 4.6624814364790066e-05, "loss": 5.3676, "step": 500 }, { "epoch": 0.2025111381125962, "eval_accuracy": 0.11018071094865264, "eval_loss": 5.133289337158203, "eval_runtime": 288.702, "eval_samples_per_second": 14.07, "eval_steps_per_second": 0.44, "step": 500 }, { "epoch": 0.24301336573511542, "eval_accuracy": 0.1130116481766369, "eval_loss": 5.102772235870361, "eval_runtime": 288.9586, "eval_samples_per_second": 14.057, "eval_steps_per_second": 0.44, "step": 600 }, { "epoch": 0.28351559335763465, "eval_accuracy": 0.11700033155499291, "eval_loss": 5.068767070770264, "eval_runtime": 288.67, "eval_samples_per_second": 14.071, "eval_steps_per_second": 0.44, "step": 700 }, { "epoch": 0.3240178209801539, "eval_accuracy": 0.1189196062267089, "eval_loss": 5.049363136291504, "eval_runtime": 288.7628, "eval_samples_per_second": 14.067, "eval_steps_per_second": 0.44, "step": 800 }, { "epoch": 0.3645200486026732, "eval_accuracy": 0.12067047317065237, "eval_loss": 5.0373969078063965, "eval_runtime": 288.4915, "eval_samples_per_second": 14.08, "eval_steps_per_second": 0.44, "step": 900 }, { "epoch": 0.4050222762251924, "grad_norm": 2.745899200439453, "learning_rate": 4.324962872958013e-05, "loss": 5.0801, "step": 1000 }, { "epoch": 0.4050222762251924, "eval_accuracy": 0.12251942264285288, "eval_loss": 5.018136024475098, "eval_runtime": 288.8733, "eval_samples_per_second": 14.062, "eval_steps_per_second": 0.44, "step": 1000 }, { "epoch": 0.4050222762251924, "step": 1000, "total_flos": 7.03593355149312e+17, "train_loss": 5.223868896484375, "train_runtime": 10418.308, "train_samples_per_second": 22.746, "train_steps_per_second": 0.711 } ], "logging_steps": 500, "max_steps": 7407, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "total_flos": 7.03593355149312e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }