{ "best_metric": 0.7792592592592592, "best_model_checkpoint": "sentiment-analysis-pp/checkpoint-4728", "epoch": 4.0, "eval_steps": 500, "global_step": 4728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "grad_norm": 163.66278076171875, "learning_rate": 2.7461928934010155e-05, "loss": 0.9585, "step": 500 }, { "epoch": 0.85, "grad_norm": 7.153451919555664, "learning_rate": 2.4923857868020305e-05, "loss": 0.8643, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.6766666666666666, "eval_loss": 0.7678444981575012, "eval_runtime": 42.3243, "eval_samples_per_second": 63.793, "eval_steps_per_second": 3.993, "step": 1182 }, { "epoch": 1.27, "grad_norm": 6.358371257781982, "learning_rate": 2.238578680203046e-05, "loss": 0.7115, "step": 1500 }, { "epoch": 1.69, "grad_norm": 6.753957748413086, "learning_rate": 1.9847715736040607e-05, "loss": 0.6343, "step": 2000 }, { "epoch": 2.0, "eval_accuracy": 0.7188888888888889, "eval_loss": 0.6581271290779114, "eval_runtime": 42.3763, "eval_samples_per_second": 63.715, "eval_steps_per_second": 3.988, "step": 2364 }, { "epoch": 2.12, "grad_norm": 12.652050971984863, "learning_rate": 1.730964467005076e-05, "loss": 0.5684, "step": 2500 }, { "epoch": 2.54, "grad_norm": 22.428882598876953, "learning_rate": 1.4771573604060913e-05, "loss": 0.4881, "step": 3000 }, { "epoch": 2.96, "grad_norm": 7.206263542175293, "learning_rate": 1.2233502538071067e-05, "loss": 0.4631, "step": 3500 }, { "epoch": 3.0, "eval_accuracy": 0.7648148148148148, "eval_loss": 0.6259744763374329, "eval_runtime": 42.4889, "eval_samples_per_second": 63.546, "eval_steps_per_second": 3.978, "step": 3546 }, { "epoch": 3.38, "grad_norm": 7.703592300415039, "learning_rate": 9.695431472081218e-06, "loss": 0.3736, "step": 4000 }, { "epoch": 3.81, "grad_norm": 11.456498146057129, "learning_rate": 7.15736040609137e-06, "loss": 0.3553, "step": 4500 }, { "epoch": 4.0, "eval_accuracy": 0.7792592592592592, "eval_loss": 0.623358964920044, "eval_runtime": 42.3946, "eval_samples_per_second": 63.687, "eval_steps_per_second": 3.986, "step": 4728 } ], "logging_steps": 500, "max_steps": 5910, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.60063403215968e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }