File size: 2,398 Bytes
c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 20beda7 c71a050 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
{
"best_metric": 0.4386209168112411,
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-3207",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 3207,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.47,
"grad_norm": 5.35684061050415,
"learning_rate": 2.775809181053764e-06,
"loss": 0.5956,
"step": 500
},
{
"epoch": 0.94,
"grad_norm": 11.188376426696777,
"learning_rate": 2.263099690648562e-06,
"loss": 0.5375,
"step": 1000
},
{
"epoch": 1.0,
"eval_loss": 0.5379385948181152,
"eval_matthews_correlation": 0.29871168199754417,
"eval_runtime": 0.7649,
"eval_samples_per_second": 1363.614,
"eval_steps_per_second": 86.288,
"step": 1069
},
{
"epoch": 1.4,
"grad_norm": 10.391807556152344,
"learning_rate": 1.7503902002433598e-06,
"loss": 0.4937,
"step": 1500
},
{
"epoch": 1.87,
"grad_norm": 19.357559204101562,
"learning_rate": 1.2376807098381578e-06,
"loss": 0.4738,
"step": 2000
},
{
"epoch": 2.0,
"eval_loss": 0.526983916759491,
"eval_matthews_correlation": 0.41870797137315424,
"eval_runtime": 0.7468,
"eval_samples_per_second": 1396.551,
"eval_steps_per_second": 88.372,
"step": 2138
},
{
"epoch": 2.34,
"grad_norm": 11.473833084106445,
"learning_rate": 7.249712194329557e-07,
"loss": 0.4364,
"step": 2500
},
{
"epoch": 2.81,
"grad_norm": 12.574313163757324,
"learning_rate": 2.1226172902775366e-07,
"loss": 0.4349,
"step": 3000
},
{
"epoch": 3.0,
"eval_loss": 0.5435938239097595,
"eval_matthews_correlation": 0.4386209168112411,
"eval_runtime": 0.7548,
"eval_samples_per_second": 1381.78,
"eval_steps_per_second": 87.438,
"step": 3207
}
],
"logging_steps": 500,
"max_steps": 3207,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 113055491519748.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": {
"learning_rate": 3.288518671458966e-06,
"num_train_epochs": 3,
"per_device_train_batch_size": 8,
"seed": 24
}
}
|