|
{ |
|
"best_metric": 0.657294511795044, |
|
"best_model_checkpoint": "hBERTv2_new_pretrain_w_init__qnli/checkpoint-1638", |
|
"epoch": 7.0, |
|
"global_step": 5733, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.6809, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6020501555921655, |
|
"eval_loss": 0.6640335321426392, |
|
"eval_runtime": 9.3726, |
|
"eval_samples_per_second": 582.869, |
|
"eval_steps_per_second": 4.588, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.6539, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5958264689730917, |
|
"eval_loss": 0.657294511795044, |
|
"eval_runtime": 9.3593, |
|
"eval_samples_per_second": 583.694, |
|
"eval_steps_per_second": 4.594, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.6358, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5941790225151016, |
|
"eval_loss": 0.6694244146347046, |
|
"eval_runtime": 9.3585, |
|
"eval_samples_per_second": 583.748, |
|
"eval_steps_per_second": 4.595, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.6313, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5789859051803039, |
|
"eval_loss": 0.6897775530815125, |
|
"eval_runtime": 9.3577, |
|
"eval_samples_per_second": 583.797, |
|
"eval_steps_per_second": 4.595, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.6293, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5879553358960278, |
|
"eval_loss": 0.6801490783691406, |
|
"eval_runtime": 9.3527, |
|
"eval_samples_per_second": 584.106, |
|
"eval_steps_per_second": 4.598, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.6253, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5925315760571115, |
|
"eval_loss": 0.6828794479370117, |
|
"eval_runtime": 9.366, |
|
"eval_samples_per_second": 583.282, |
|
"eval_steps_per_second": 4.591, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 0.6166, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5916163280248947, |
|
"eval_loss": 0.6865497827529907, |
|
"eval_runtime": 9.3522, |
|
"eval_samples_per_second": 584.138, |
|
"eval_steps_per_second": 4.598, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5733, |
|
"total_flos": 1.0729602688011469e+17, |
|
"train_loss": 0.639016756103984, |
|
"train_runtime": 3839.7768, |
|
"train_samples_per_second": 1363.92, |
|
"train_steps_per_second": 10.665 |
|
} |
|
], |
|
"max_steps": 40950, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.0729602688011469e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|