{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05790622665655238, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002895311332827619, "grad_norm": 1.7338896989822388, "learning_rate": 4.985523443335862e-05, "loss": 3.1507, "step": 500 }, { "epoch": 0.005790622665655238, "grad_norm": 1.2579742670059204, "learning_rate": 4.9710468866717244e-05, "loss": 2.6357, "step": 1000 }, { "epoch": 0.008685933998482857, "grad_norm": 0.9996505975723267, "learning_rate": 4.956570330007586e-05, "loss": 2.4288, "step": 1500 }, { "epoch": 0.011581245331310476, "grad_norm": 0.9657206535339355, "learning_rate": 4.942093773343448e-05, "loss": 2.2119, "step": 2000 }, { "epoch": 0.014476556664138095, "grad_norm": 1.044640064239502, "learning_rate": 4.92761721667931e-05, "loss": 2.1225, "step": 2500 }, { "epoch": 0.017371867996965714, "grad_norm": 0.8428456783294678, "learning_rate": 4.913140660015172e-05, "loss": 2.0409, "step": 3000 }, { "epoch": 0.02026717932979333, "grad_norm": 1.2827749252319336, "learning_rate": 4.898664103351033e-05, "loss": 1.9031, "step": 3500 }, { "epoch": 0.023162490662620952, "grad_norm": 0.9291247129440308, "learning_rate": 4.884187546686895e-05, "loss": 1.8438, "step": 4000 }, { "epoch": 0.02605780199544857, "grad_norm": 0.7757951617240906, "learning_rate": 4.8697109900227575e-05, "loss": 1.7833, "step": 4500 }, { "epoch": 0.02895311332827619, "grad_norm": 1.520269751548767, "learning_rate": 4.855234433358619e-05, "loss": 1.6942, "step": 5000 }, { "epoch": 0.03184842466110381, "grad_norm": 1.2558553218841553, "learning_rate": 4.840757876694481e-05, "loss": 1.6669, "step": 5500 }, { "epoch": 0.03474373599393143, "grad_norm": 1.3526251316070557, "learning_rate": 4.826281320030343e-05, "loss": 1.6115, "step": 6000 }, { "epoch": 0.037639047326759045, "grad_norm": 2.5751099586486816, "learning_rate": 4.811804763366205e-05, "loss": 1.5629, "step": 6500 }, { "epoch": 0.04053435865958666, "grad_norm": 1.3569105863571167, "learning_rate": 4.797328206702067e-05, "loss": 1.5258, "step": 7000 }, { "epoch": 0.04342966999241429, "grad_norm": 1.7734428644180298, "learning_rate": 4.782851650037929e-05, "loss": 1.475, "step": 7500 }, { "epoch": 0.046324981325241904, "grad_norm": 0.7975415587425232, "learning_rate": 4.7683750933737905e-05, "loss": 1.437, "step": 8000 }, { "epoch": 0.04922029265806952, "grad_norm": 0.8590123057365417, "learning_rate": 4.753898536709653e-05, "loss": 1.4263, "step": 8500 }, { "epoch": 0.05211560399089714, "grad_norm": 1.1225152015686035, "learning_rate": 4.739421980045515e-05, "loss": 1.3528, "step": 9000 }, { "epoch": 0.05501091532372476, "grad_norm": 1.1342971324920654, "learning_rate": 4.7249454233813765e-05, "loss": 1.3576, "step": 9500 }, { "epoch": 0.05790622665655238, "grad_norm": 0.9445364475250244, "learning_rate": 4.710468866717238e-05, "loss": 1.3283, "step": 10000 } ], "logging_steps": 500, "max_steps": 172693, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.180672512e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }