{ "best_metric": 0.8348623853211009, "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-2526", "epoch": 3.0, "eval_steps": 500, "global_step": 2526, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "grad_norm": 7.641239166259766, "learning_rate": 1.4914273614162126e-05, "loss": 0.5407, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8084862385321101, "eval_loss": 0.4513384699821472, "eval_runtime": 0.9147, "eval_samples_per_second": 953.342, "eval_steps_per_second": 60.131, "step": 842 }, { "epoch": 1.19, "grad_norm": 20.01938247680664, "learning_rate": 1.2904263693116017e-05, "loss": 0.4323, "step": 1000 }, { "epoch": 1.78, "grad_norm": 2.568437337875366, "learning_rate": 1.089425377206991e-05, "loss": 0.3959, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.8256880733944955, "eval_loss": 0.45341619849205017, "eval_runtime": 1.2452, "eval_samples_per_second": 700.289, "eval_steps_per_second": 44.17, "step": 1684 }, { "epoch": 2.38, "grad_norm": 0.6975702047348022, "learning_rate": 8.884243851023799e-06, "loss": 0.361, "step": 2000 }, { "epoch": 2.97, "grad_norm": 0.5081818699836731, "learning_rate": 6.874233929977691e-06, "loss": 0.3226, "step": 2500 }, { "epoch": 3.0, "eval_accuracy": 0.8348623853211009, "eval_loss": 0.5020601153373718, "eval_runtime": 0.9128, "eval_samples_per_second": 955.316, "eval_steps_per_second": 60.255, "step": 2526 } ], "logging_steps": 500, "max_steps": 4210, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 90087133996608.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 1.6924283535208234e-05, "num_train_epochs": 5, "per_device_train_batch_size": 8, "seed": 25 } }