File size: 2,454 Bytes
96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 1779fb2 96a88b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
{
"best_metric": 0.4691032179514943,
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-4276",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 4276,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"grad_norm": 4.28505277633667,
"learning_rate": 3.0702893894484785e-06,
"loss": 0.6069,
"step": 500
},
{
"epoch": 0.47,
"grad_norm": 9.482794761657715,
"learning_rate": 2.9196373094951675e-06,
"loss": 0.5628,
"step": 1000
},
{
"epoch": 0.7,
"grad_norm": 22.521339416503906,
"learning_rate": 2.7689852295418565e-06,
"loss": 0.5565,
"step": 1500
},
{
"epoch": 0.94,
"grad_norm": 26.7753849029541,
"learning_rate": 2.6183331495885454e-06,
"loss": 0.5184,
"step": 2000
},
{
"epoch": 1.0,
"eval_loss": 0.5730993747711182,
"eval_matthews_correlation": 0.3853198145814999,
"eval_runtime": 0.7612,
"eval_samples_per_second": 1370.225,
"eval_steps_per_second": 86.706,
"step": 2138
},
{
"epoch": 1.17,
"grad_norm": 17.77669334411621,
"learning_rate": 2.4676810696352344e-06,
"loss": 0.4619,
"step": 2500
},
{
"epoch": 1.4,
"grad_norm": 37.4239387512207,
"learning_rate": 2.3170289896819234e-06,
"loss": 0.5014,
"step": 3000
},
{
"epoch": 1.64,
"grad_norm": 46.75569534301758,
"learning_rate": 2.1663769097286124e-06,
"loss": 0.492,
"step": 3500
},
{
"epoch": 1.87,
"grad_norm": 66.9134750366211,
"learning_rate": 2.0157248297753013e-06,
"loss": 0.4809,
"step": 4000
},
{
"epoch": 2.0,
"eval_loss": 0.6646500825881958,
"eval_matthews_correlation": 0.4691032179514943,
"eval_runtime": 0.8224,
"eval_samples_per_second": 1268.193,
"eval_steps_per_second": 80.25,
"step": 4276
}
],
"logging_steps": 500,
"max_steps": 10690,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 65200091402940.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": {
"learning_rate": 3.2209414694017896e-06,
"num_train_epochs": 5,
"per_device_train_batch_size": 4,
"seed": 16
}
}
|