{ "best_metric": 1.625908613204956, "best_model_checkpoint": "qa_finetuning/run-3/checkpoint-1000", "epoch": 1.6, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 10.762316703796387, "learning_rate": 4.309025425308167e-05, "loss": 3.9927, "step": 100 }, { "epoch": 0.16, "eval_loss": 3.2011678218841553, "eval_runtime": 3.8548, "eval_samples_per_second": 129.707, "eval_steps_per_second": 4.151, "step": 100 }, { "epoch": 0.32, "grad_norm": 28.462312698364258, "learning_rate": 4.066263429516157e-05, "loss": 2.9504, "step": 200 }, { "epoch": 0.32, "eval_loss": 2.258704900741577, "eval_runtime": 3.6383, "eval_samples_per_second": 137.425, "eval_steps_per_second": 4.398, "step": 200 }, { "epoch": 0.48, "grad_norm": 31.754859924316406, "learning_rate": 3.823501433724148e-05, "loss": 2.2188, "step": 300 }, { "epoch": 0.48, "eval_loss": 1.7734456062316895, "eval_runtime": 3.6015, "eval_samples_per_second": 138.831, "eval_steps_per_second": 4.443, "step": 300 }, { "epoch": 0.64, "grad_norm": 19.329771041870117, "learning_rate": 3.580739437932139e-05, "loss": 1.8494, "step": 400 }, { "epoch": 0.64, "eval_loss": 1.7346322536468506, "eval_runtime": 3.7979, "eval_samples_per_second": 131.653, "eval_steps_per_second": 4.213, "step": 400 }, { "epoch": 0.8, "grad_norm": 36.452842712402344, "learning_rate": 3.3379774421401294e-05, "loss": 1.6365, "step": 500 }, { "epoch": 0.8, "eval_loss": 1.7207331657409668, "eval_runtime": 3.6332, "eval_samples_per_second": 137.618, "eval_steps_per_second": 4.404, "step": 500 }, { "epoch": 0.96, "grad_norm": 22.892818450927734, "learning_rate": 3.09521544634812e-05, "loss": 1.5395, "step": 600 }, { "epoch": 0.96, "eval_loss": 1.6212908029556274, "eval_runtime": 3.7235, "eval_samples_per_second": 134.282, "eval_steps_per_second": 4.297, "step": 600 }, { "epoch": 1.12, "grad_norm": 13.673020362854004, "learning_rate": 2.852453450556111e-05, "loss": 1.0662, "step": 700 }, { "epoch": 1.12, "eval_loss": 1.726162075996399, "eval_runtime": 3.7463, "eval_samples_per_second": 133.467, "eval_steps_per_second": 4.271, "step": 700 }, { "epoch": 1.28, "grad_norm": 7.783051013946533, "learning_rate": 2.6096914547641013e-05, "loss": 1.0033, "step": 800 }, { "epoch": 1.28, "eval_loss": 1.6507431268692017, "eval_runtime": 3.6574, "eval_samples_per_second": 136.709, "eval_steps_per_second": 4.375, "step": 800 }, { "epoch": 1.44, "grad_norm": 11.287247657775879, "learning_rate": 2.3669294589720917e-05, "loss": 0.9921, "step": 900 }, { "epoch": 1.44, "eval_loss": 1.621005654335022, "eval_runtime": 3.664, "eval_samples_per_second": 136.464, "eval_steps_per_second": 4.367, "step": 900 }, { "epoch": 1.6, "grad_norm": 25.076732635498047, "learning_rate": 2.1241674631800824e-05, "loss": 0.9504, "step": 1000 }, { "epoch": 1.6, "eval_loss": 1.625908613204956, "eval_runtime": 3.6844, "eval_samples_per_second": 135.705, "eval_steps_per_second": 4.343, "step": 1000 } ], "logging_steps": 100, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 783918600192000.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 4.5517874211001764e-05, "num_train_epochs": 3, "per_device_train_batch_size": 4, "seed": 18 } }