{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0065930443382231745, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006593044338223175, "grad_norm": 2.508810520172119, "learning_rate": 2e-05, "loss": 3.5273, "step": 1 }, { "epoch": 0.0006593044338223175, "eval_loss": 3.6100175380706787, "eval_runtime": 21.0215, "eval_samples_per_second": 30.397, "eval_steps_per_second": 15.223, "step": 1 }, { "epoch": 0.001318608867644635, "grad_norm": 2.0886263847351074, "learning_rate": 4e-05, "loss": 3.5705, "step": 2 }, { "epoch": 0.0019779133014669525, "grad_norm": 1.9770153760910034, "learning_rate": 6e-05, "loss": 3.3942, "step": 3 }, { "epoch": 0.0019779133014669525, "eval_loss": 3.6000754833221436, "eval_runtime": 19.9102, "eval_samples_per_second": 32.094, "eval_steps_per_second": 16.072, "step": 3 }, { "epoch": 0.00263721773528927, "grad_norm": 2.0442955493927, "learning_rate": 8e-05, "loss": 2.895, "step": 4 }, { "epoch": 0.0032965221691115872, "grad_norm": 2.478562116622925, "learning_rate": 0.0001, "loss": 3.2157, "step": 5 }, { "epoch": 0.003955826602933905, "grad_norm": 2.527894973754883, "learning_rate": 0.00012, "loss": 3.6057, "step": 6 }, { "epoch": 0.003955826602933905, "eval_loss": 3.4499459266662598, "eval_runtime": 20.0525, "eval_samples_per_second": 31.866, "eval_steps_per_second": 15.958, "step": 6 }, { "epoch": 0.004615131036756222, "grad_norm": 2.320080280303955, "learning_rate": 0.00014, "loss": 2.7976, "step": 7 }, { "epoch": 0.00527443547057854, "grad_norm": 2.5601561069488525, "learning_rate": 0.00016, "loss": 3.1589, "step": 8 }, { "epoch": 0.005933739904400857, "grad_norm": 2.978994131088257, "learning_rate": 0.00018, "loss": 2.9887, "step": 9 }, { "epoch": 0.005933739904400857, "eval_loss": 3.153538227081299, "eval_runtime": 19.9718, "eval_samples_per_second": 31.995, "eval_steps_per_second": 16.023, "step": 9 }, { "epoch": 0.0065930443382231745, "grad_norm": 4.296704292297363, "learning_rate": 0.0002, "loss": 3.3058, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1849564248145920.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }