|
{ |
|
"best_metric": 0.3467291593551636, |
|
"best_model_checkpoint": "./models/e5_weighted_3\\checkpoint-21170", |
|
"epoch": 10.0, |
|
"eval_steps": 4234, |
|
"global_step": 21170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6739985346794128, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.4895, |
|
"step": 4234 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6055607917059378, |
|
"eval_f1_0: ": 0.0, |
|
"eval_f1_1: ": 0.7543293219841503, |
|
"eval_loss": 0.4801671504974365, |
|
"eval_precision_0: ": 0.0, |
|
"eval_precision_1: ": 0.6055607917059378, |
|
"eval_recall_0: ": 0.0, |
|
"eval_recall_1: ": 1.0, |
|
"eval_runtime": 695.7116, |
|
"eval_samples_per_second": 3.05, |
|
"eval_steps_per_second": 3.05, |
|
"step": 4234 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.520650863647461, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.474, |
|
"step": 8468 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6710650329877474, |
|
"eval_f1_0: ": 0.3102766798418972, |
|
"eval_f1_1: ": 0.7840346534653465, |
|
"eval_loss": 0.4519682228565216, |
|
"eval_precision_0: ": 0.8971428571428571, |
|
"eval_precision_1: ": 0.6507447354904982, |
|
"eval_recall_0: ": 0.1875746714456392, |
|
"eval_recall_1: ": 0.9859922178988327, |
|
"eval_runtime": 685.0371, |
|
"eval_samples_per_second": 3.098, |
|
"eval_steps_per_second": 3.098, |
|
"step": 8468 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.5096299648284912, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.4505, |
|
"step": 12702 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7445805843543827, |
|
"eval_f1_0: ": 0.5505804311774462, |
|
"eval_f1_1: ": 0.8215931533903884, |
|
"eval_loss": 0.42401960492134094, |
|
"eval_precision_0: ": 0.8997289972899729, |
|
"eval_precision_1: ": 0.7119224187107815, |
|
"eval_recall_0: ": 0.3966547192353644, |
|
"eval_recall_1: ": 0.9712062256809338, |
|
"eval_runtime": 684.6385, |
|
"eval_samples_per_second": 3.099, |
|
"eval_steps_per_second": 3.099, |
|
"step": 12702 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.8480051755905151, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.418, |
|
"step": 16936 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8044297832233742, |
|
"eval_f1_0: ": 0.7182620502376104, |
|
"eval_f1_1: ": 0.85023457235655, |
|
"eval_loss": 0.37760457396507263, |
|
"eval_precision_0: ": 0.8317610062893082, |
|
"eval_precision_1: ": 0.7927321668909825, |
|
"eval_recall_0: ": 0.6320191158900836, |
|
"eval_recall_1: ": 0.9167315175097276, |
|
"eval_runtime": 680.6302, |
|
"eval_samples_per_second": 3.118, |
|
"eval_steps_per_second": 3.118, |
|
"step": 16936 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.9994410872459412, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3911, |
|
"step": 21170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8185673892554194, |
|
"eval_f1_0: ": 0.752411575562701, |
|
"eval_f1_1: ": 0.8568240981777613, |
|
"eval_loss": 0.3467291593551636, |
|
"eval_precision_0: ": 0.8147632311977716, |
|
"eval_precision_1: ": 0.8205128205128205, |
|
"eval_recall_0: ": 0.6989247311827957, |
|
"eval_recall_1: ": 0.8964980544747082, |
|
"eval_runtime": 673.4114, |
|
"eval_samples_per_second": 3.151, |
|
"eval_steps_per_second": 3.151, |
|
"step": 21170 |
|
} |
|
], |
|
"logging_steps": 4234, |
|
"max_steps": 42340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 4234, |
|
"total_flos": 2.448451500060672e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|