|
{ |
|
"best_metric": 0.8245412844036697, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1054", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 2108, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.507440967713177e-05, |
|
"loss": 1.6043, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8107798165137615, |
|
"eval_loss": 1.3382474184036255, |
|
"eval_runtime": 7.7477, |
|
"eval_samples_per_second": 112.55, |
|
"eval_steps_per_second": 0.903, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.338293978475452e-05, |
|
"loss": 0.7901, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8245412844036697, |
|
"eval_loss": 1.3008999824523926, |
|
"eval_runtime": 7.5885, |
|
"eval_samples_per_second": 114.91, |
|
"eval_steps_per_second": 0.922, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.169146989237726e-05, |
|
"loss": 0.6181, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8188073394495413, |
|
"eval_loss": 1.3191856145858765, |
|
"eval_runtime": 7.916, |
|
"eval_samples_per_second": 110.157, |
|
"eval_steps_per_second": 0.884, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.5387, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.819954128440367, |
|
"eval_loss": 1.3426295518875122, |
|
"eval_runtime": 9.3241, |
|
"eval_samples_per_second": 93.521, |
|
"eval_steps_per_second": 0.751, |
|
"step": 2108 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2108, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 32344049138640.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.46638270752374766, |
|
"learning_rate": 0.00012676587956950903, |
|
"num_train_epochs": 4, |
|
"temperature": 20 |
|
} |
|
} |
|
|