{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.995967741935484, "eval_steps": 495, "global_step": 990, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 0.7123413681983948, "learning_rate": 0.0002, "loss": 0.9903, "step": 100 }, { "epoch": 0.4, "grad_norm": 0.5445395112037659, "learning_rate": 0.0002, "loss": 0.5847, "step": 200 }, { "epoch": 0.6, "grad_norm": 0.5527735948562622, "learning_rate": 0.0002, "loss": 0.4054, "step": 300 }, { "epoch": 0.81, "grad_norm": 0.3411148190498352, "learning_rate": 0.0002, "loss": 0.2877, "step": 400 }, { "epoch": 1.0, "eval_loss": 0.1955450177192688, "eval_runtime": 459.2044, "eval_samples_per_second": 7.681, "eval_steps_per_second": 1.921, "step": 495 }, { "epoch": 1.01, "grad_norm": 0.21418581902980804, "learning_rate": 0.0002, "loss": 0.215, "step": 500 }, { "epoch": 1.21, "grad_norm": 0.2143397480249405, "learning_rate": 0.0002, "loss": 0.1507, "step": 600 }, { "epoch": 1.41, "grad_norm": 0.20695798099040985, "learning_rate": 0.0002, "loss": 0.1356, "step": 700 }, { "epoch": 1.61, "grad_norm": 0.1783648580312729, "learning_rate": 0.0002, "loss": 0.123, "step": 800 }, { "epoch": 1.81, "grad_norm": 0.1991637796163559, "learning_rate": 0.0002, "loss": 0.1129, "step": 900 }, { "epoch": 2.0, "eval_loss": 0.11679410934448242, "eval_runtime": 436.9065, "eval_samples_per_second": 8.073, "eval_steps_per_second": 2.019, "step": 990 } ], "logging_steps": 100, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 495, "total_flos": 1.2763485559860756e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }