ZaaCo's picture
Training in progress, epoch 1
defb1f8 verified
raw
history blame
2.54 kB
{
"best_metric": 0.8405963302752294,
"best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-1684",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 2105,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.8119266055045872,
"eval_loss": 0.4295983910560608,
"eval_runtime": 1.0076,
"eval_samples_per_second": 865.424,
"eval_steps_per_second": 54.585,
"step": 421
},
{
"epoch": 1.19,
"grad_norm": 8.123950004577637,
"learning_rate": 1.3357804670815412e-05,
"loss": 0.4991,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.823394495412844,
"eval_loss": 0.39807233214378357,
"eval_runtime": 1.0042,
"eval_samples_per_second": 868.388,
"eval_steps_per_second": 54.772,
"step": 842
},
{
"epoch": 2.38,
"grad_norm": 5.427116870880127,
"learning_rate": 9.196494804517776e-06,
"loss": 0.3654,
"step": 1000
},
{
"epoch": 3.0,
"eval_accuracy": 0.8337155963302753,
"eval_loss": 0.40886953473091125,
"eval_runtime": 0.9135,
"eval_samples_per_second": 954.617,
"eval_steps_per_second": 60.211,
"step": 1263
},
{
"epoch": 3.56,
"grad_norm": 1.0015597343444824,
"learning_rate": 5.03518493822014e-06,
"loss": 0.2923,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.8405963302752294,
"eval_loss": 0.43070515990257263,
"eval_runtime": 0.931,
"eval_samples_per_second": 936.614,
"eval_steps_per_second": 59.075,
"step": 1684
},
{
"epoch": 4.75,
"grad_norm": 18.156158447265625,
"learning_rate": 8.738750719225037e-07,
"loss": 0.2538,
"step": 2000
},
{
"epoch": 5.0,
"eval_accuracy": 0.838302752293578,
"eval_loss": 0.4588472545146942,
"eval_runtime": 1.0148,
"eval_samples_per_second": 859.266,
"eval_steps_per_second": 54.197,
"step": 2105
}
],
"logging_steps": 500,
"max_steps": 2105,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 170928927395856.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": {
"learning_rate": 1.751911453711305e-05,
"num_train_epochs": 5,
"per_device_train_batch_size": 16,
"seed": 24
}
}