{ "best_metric": 0.7983730389308542, "best_model_checkpoint": "./sdg-classifier/checkpoint-1076", "epoch": 5.0, "eval_steps": 500, "global_step": 1345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.37174721189591076, "grad_norm": 3.068047046661377, "learning_rate": 1.4370370370370372e-05, "loss": 2.5931, "step": 100 }, { "epoch": 0.7434944237918215, "grad_norm": 4.889872074127197, "learning_rate": 1.897520661157025e-05, "loss": 1.2927, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.7515398024404416, "eval_loss": 0.8947181105613708, "eval_runtime": 9.4625, "eval_samples_per_second": 909.378, "eval_steps_per_second": 7.186, "step": 269 }, { "epoch": 1.1152416356877324, "grad_norm": 4.267650604248047, "learning_rate": 1.732231404958678e-05, "loss": 0.9586, "step": 300 }, { "epoch": 1.486988847583643, "grad_norm": 5.622753620147705, "learning_rate": 1.566942148760331e-05, "loss": 0.8529, "step": 400 }, { "epoch": 1.858736059479554, "grad_norm": 4.57409143447876, "learning_rate": 1.4016528925619836e-05, "loss": 0.7953, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.7795467751307379, "eval_loss": 0.7700016498565674, "eval_runtime": 9.1997, "eval_samples_per_second": 935.353, "eval_steps_per_second": 7.392, "step": 538 }, { "epoch": 2.2304832713754648, "grad_norm": 5.621212005615234, "learning_rate": 1.2363636363636364e-05, "loss": 0.7169, "step": 600 }, { "epoch": 2.6022304832713754, "grad_norm": 7.005183696746826, "learning_rate": 1.0710743801652894e-05, "loss": 0.666, "step": 700 }, { "epoch": 2.973977695167286, "grad_norm": 5.196578025817871, "learning_rate": 9.057851239669422e-06, "loss": 0.6549, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.7937245787332946, "eval_loss": 0.7241168022155762, "eval_runtime": 9.2798, "eval_samples_per_second": 927.284, "eval_steps_per_second": 7.328, "step": 807 }, { "epoch": 3.345724907063197, "grad_norm": 4.715315341949463, "learning_rate": 7.40495867768595e-06, "loss": 0.5591, "step": 900 }, { "epoch": 3.717472118959108, "grad_norm": 7.226228713989258, "learning_rate": 5.7520661157024795e-06, "loss": 0.5658, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.7983730389308542, "eval_loss": 0.7134895920753479, "eval_runtime": 9.1183, "eval_samples_per_second": 943.703, "eval_steps_per_second": 7.458, "step": 1076 }, { "epoch": 4.089219330855019, "grad_norm": 7.13743782043457, "learning_rate": 4.099173553719009e-06, "loss": 0.542, "step": 1100 }, { "epoch": 4.4609665427509295, "grad_norm": 6.263808727264404, "learning_rate": 2.4462809917355375e-06, "loss": 0.4922, "step": 1200 }, { "epoch": 4.83271375464684, "grad_norm": 7.6533989906311035, "learning_rate": 7.933884297520662e-07, "loss": 0.4799, "step": 1300 }, { "epoch": 5.0, "eval_accuracy": 0.7940732132481115, "eval_loss": 0.7142194509506226, "eval_runtime": 9.2321, "eval_samples_per_second": 932.07, "eval_steps_per_second": 7.366, "step": 1345 }, { "epoch": 5.0, "step": 1345, "total_flos": 4.52871045169152e+16, "train_loss": 0.846687467213457, "train_runtime": 657.9188, "train_samples_per_second": 261.582, "train_steps_per_second": 2.044 } ], "logging_steps": 100, "max_steps": 1345, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 5000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.52871045169152e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }