|
{ |
|
"best_metric": 0.7792592592592592, |
|
"best_model_checkpoint": "sentiment-analysis-pp/checkpoint-4728", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 4728, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 163.66278076171875, |
|
"learning_rate": 2.7461928934010155e-05, |
|
"loss": 0.9585, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.153451919555664, |
|
"learning_rate": 2.4923857868020305e-05, |
|
"loss": 0.8643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6766666666666666, |
|
"eval_loss": 0.7678444981575012, |
|
"eval_runtime": 42.3243, |
|
"eval_samples_per_second": 63.793, |
|
"eval_steps_per_second": 3.993, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 6.358371257781982, |
|
"learning_rate": 2.238578680203046e-05, |
|
"loss": 0.7115, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 6.753957748413086, |
|
"learning_rate": 1.9847715736040607e-05, |
|
"loss": 0.6343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7188888888888889, |
|
"eval_loss": 0.6581271290779114, |
|
"eval_runtime": 42.3763, |
|
"eval_samples_per_second": 63.715, |
|
"eval_steps_per_second": 3.988, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 12.652050971984863, |
|
"learning_rate": 1.730964467005076e-05, |
|
"loss": 0.5684, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 22.428882598876953, |
|
"learning_rate": 1.4771573604060913e-05, |
|
"loss": 0.4881, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 7.206263542175293, |
|
"learning_rate": 1.2233502538071067e-05, |
|
"loss": 0.4631, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7648148148148148, |
|
"eval_loss": 0.6259744763374329, |
|
"eval_runtime": 42.4889, |
|
"eval_samples_per_second": 63.546, |
|
"eval_steps_per_second": 3.978, |
|
"step": 3546 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 7.703592300415039, |
|
"learning_rate": 9.695431472081218e-06, |
|
"loss": 0.3736, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 11.456498146057129, |
|
"learning_rate": 7.15736040609137e-06, |
|
"loss": 0.3553, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7792592592592592, |
|
"eval_loss": 0.623358964920044, |
|
"eval_runtime": 42.3946, |
|
"eval_samples_per_second": 63.687, |
|
"eval_steps_per_second": 3.986, |
|
"step": 4728 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1.60063403215968e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|