{ "best_metric": 0.8405963302752294, "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-1684", "epoch": 5.0, "eval_steps": 500, "global_step": 2105, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8119266055045872, "eval_loss": 0.4295983910560608, "eval_runtime": 1.0076, "eval_samples_per_second": 865.424, "eval_steps_per_second": 54.585, "step": 421 }, { "epoch": 1.19, "grad_norm": 8.123950004577637, "learning_rate": 1.3357804670815412e-05, "loss": 0.4991, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.823394495412844, "eval_loss": 0.39807233214378357, "eval_runtime": 1.0042, "eval_samples_per_second": 868.388, "eval_steps_per_second": 54.772, "step": 842 }, { "epoch": 2.38, "grad_norm": 5.427116870880127, "learning_rate": 9.196494804517776e-06, "loss": 0.3654, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.8337155963302753, "eval_loss": 0.40886953473091125, "eval_runtime": 0.9135, "eval_samples_per_second": 954.617, "eval_steps_per_second": 60.211, "step": 1263 }, { "epoch": 3.56, "grad_norm": 1.0015597343444824, "learning_rate": 5.03518493822014e-06, "loss": 0.2923, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.8405963302752294, "eval_loss": 0.43070515990257263, "eval_runtime": 0.931, "eval_samples_per_second": 936.614, "eval_steps_per_second": 59.075, "step": 1684 }, { "epoch": 4.75, "grad_norm": 18.156158447265625, "learning_rate": 8.738750719225037e-07, "loss": 0.2538, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.838302752293578, "eval_loss": 0.4588472545146942, "eval_runtime": 1.0148, "eval_samples_per_second": 859.266, "eval_steps_per_second": 54.197, "step": 2105 } ], "logging_steps": 500, "max_steps": 2105, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 170928927395856.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "learning_rate": 1.751911453711305e-05, "num_train_epochs": 5, "per_device_train_batch_size": 16, "seed": 24 } }