|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4050222762251924, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04050222762251924, |
|
"eval_accuracy": 0.07313909615825066, |
|
"eval_loss": 5.535554885864258, |
|
"eval_runtime": 288.6011, |
|
"eval_samples_per_second": 14.075, |
|
"eval_steps_per_second": 0.44, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08100445524503848, |
|
"eval_accuracy": 0.09321176989128148, |
|
"eval_loss": 5.312517166137695, |
|
"eval_runtime": 289.0997, |
|
"eval_samples_per_second": 14.051, |
|
"eval_steps_per_second": 0.439, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12150668286755771, |
|
"eval_accuracy": 0.1025266924717497, |
|
"eval_loss": 5.209850788116455, |
|
"eval_runtime": 288.9641, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 0.44, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16200891049007696, |
|
"eval_accuracy": 0.10690876214872297, |
|
"eval_loss": 5.157820224761963, |
|
"eval_runtime": 288.9416, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 0.44, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2025111381125962, |
|
"grad_norm": 4.206667900085449, |
|
"learning_rate": 4.6624814364790066e-05, |
|
"loss": 5.3676, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2025111381125962, |
|
"eval_accuracy": 0.11018071094865264, |
|
"eval_loss": 5.133289337158203, |
|
"eval_runtime": 288.702, |
|
"eval_samples_per_second": 14.07, |
|
"eval_steps_per_second": 0.44, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24301336573511542, |
|
"eval_accuracy": 0.1130116481766369, |
|
"eval_loss": 5.102772235870361, |
|
"eval_runtime": 288.9586, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 0.44, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28351559335763465, |
|
"eval_accuracy": 0.11700033155499291, |
|
"eval_loss": 5.068767070770264, |
|
"eval_runtime": 288.67, |
|
"eval_samples_per_second": 14.071, |
|
"eval_steps_per_second": 0.44, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3240178209801539, |
|
"eval_accuracy": 0.1189196062267089, |
|
"eval_loss": 5.049363136291504, |
|
"eval_runtime": 288.7628, |
|
"eval_samples_per_second": 14.067, |
|
"eval_steps_per_second": 0.44, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3645200486026732, |
|
"eval_accuracy": 0.12067047317065237, |
|
"eval_loss": 5.0373969078063965, |
|
"eval_runtime": 288.4915, |
|
"eval_samples_per_second": 14.08, |
|
"eval_steps_per_second": 0.44, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4050222762251924, |
|
"grad_norm": 2.745899200439453, |
|
"learning_rate": 4.324962872958013e-05, |
|
"loss": 5.0801, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4050222762251924, |
|
"eval_accuracy": 0.12251942264285288, |
|
"eval_loss": 5.018136024475098, |
|
"eval_runtime": 288.8733, |
|
"eval_samples_per_second": 14.062, |
|
"eval_steps_per_second": 0.44, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4050222762251924, |
|
"step": 1000, |
|
"total_flos": 7.03593355149312e+17, |
|
"train_loss": 5.223868896484375, |
|
"train_runtime": 10418.308, |
|
"train_samples_per_second": 22.746, |
|
"train_steps_per_second": 0.711 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7407, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 7.03593355149312e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|