{ "best_metric": 0.8440366972477065, "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-4210", "epoch": 5.0, "eval_steps": 500, "global_step": 4210, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "grad_norm": 7.641239166259766, "learning_rate": 1.4914273614162126e-05, "loss": 0.5407, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8084862385321101, "eval_loss": 0.4513384699821472, "eval_runtime": 0.9147, "eval_samples_per_second": 953.342, "eval_steps_per_second": 60.131, "step": 842 }, { "epoch": 1.19, "grad_norm": 20.01938247680664, "learning_rate": 1.2904263693116017e-05, "loss": 0.4323, "step": 1000 }, { "epoch": 1.78, "grad_norm": 2.568437337875366, "learning_rate": 1.089425377206991e-05, "loss": 0.3959, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.8256880733944955, "eval_loss": 0.45341619849205017, "eval_runtime": 1.2452, "eval_samples_per_second": 700.289, "eval_steps_per_second": 44.17, "step": 1684 }, { "epoch": 2.38, "grad_norm": 0.6975702047348022, "learning_rate": 8.884243851023799e-06, "loss": 0.361, "step": 2000 }, { "epoch": 2.97, "grad_norm": 0.5081818699836731, "learning_rate": 6.874233929977691e-06, "loss": 0.3226, "step": 2500 }, { "epoch": 3.0, "eval_accuracy": 0.8348623853211009, "eval_loss": 0.5020601153373718, "eval_runtime": 0.9128, "eval_samples_per_second": 955.316, "eval_steps_per_second": 60.255, "step": 2526 }, { "epoch": 3.56, "grad_norm": 39.4764518737793, "learning_rate": 4.864224008931583e-06, "loss": 0.2707, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.8394495412844036, "eval_loss": 0.5194000005722046, "eval_runtime": 1.1521, "eval_samples_per_second": 756.852, "eval_steps_per_second": 47.737, "step": 3368 }, { "epoch": 4.16, "grad_norm": 0.2999955117702484, "learning_rate": 2.8542140878854744e-06, "loss": 0.2608, "step": 3500 }, { "epoch": 4.75, "grad_norm": 37.41545867919922, "learning_rate": 8.442041668393656e-07, "loss": 0.2394, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.8440366972477065, "eval_loss": 0.5257198810577393, "eval_runtime": 0.9113, "eval_samples_per_second": 956.866, "eval_steps_per_second": 60.353, "step": 4210 } ], "logging_steps": 500, "max_steps": 4210, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 143595990102684.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 1.6924283535208234e-05, "num_train_epochs": 5, "per_device_train_batch_size": 8, "seed": 25 } }