Output_llama3_80-20 / trainer_state.json
Ahatsham's picture
Model save
90ca7ab verified
{
"best_metric": 0.6927083333333334,
"best_model_checkpoint": "Output_llama3_80-20/checkpoint-1440",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 1920,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5625,
"eval_balanced_accuracy": 0.5461538461538462,
"eval_loss": 0.6857039928436279,
"eval_runtime": 73.4129,
"eval_samples_per_second": 2.615,
"eval_steps_per_second": 0.327,
"step": 96
},
{
"epoch": 2.0,
"eval_accuracy": 0.5833333333333334,
"eval_balanced_accuracy": 0.5758314855875832,
"eval_loss": 0.6683754324913025,
"eval_runtime": 104.3141,
"eval_samples_per_second": 1.841,
"eval_steps_per_second": 0.23,
"step": 192
},
{
"epoch": 3.0,
"eval_accuracy": 0.6197916666666666,
"eval_balanced_accuracy": 0.6383647798742138,
"eval_loss": 0.7166934609413147,
"eval_runtime": 110.585,
"eval_samples_per_second": 1.736,
"eval_steps_per_second": 0.217,
"step": 288
},
{
"epoch": 4.0,
"eval_accuracy": 0.625,
"eval_balanced_accuracy": 0.6178571428571429,
"eval_loss": 0.6334595084190369,
"eval_runtime": 109.4964,
"eval_samples_per_second": 1.753,
"eval_steps_per_second": 0.219,
"step": 384
},
{
"epoch": 5.0,
"eval_accuracy": 0.5989583333333334,
"eval_balanced_accuracy": 0.6296716417910448,
"eval_loss": 0.6574041843414307,
"eval_runtime": 89.6626,
"eval_samples_per_second": 2.141,
"eval_steps_per_second": 0.268,
"step": 480
},
{
"epoch": 5.208333333333333,
"grad_norm": 72.83433532714844,
"learning_rate": 7.395833333333335e-06,
"loss": 0.6776,
"step": 500
},
{
"epoch": 6.0,
"eval_accuracy": 0.625,
"eval_balanced_accuracy": 0.6168117269812186,
"eval_loss": 0.6321956515312195,
"eval_runtime": 106.1665,
"eval_samples_per_second": 1.808,
"eval_steps_per_second": 0.226,
"step": 576
},
{
"epoch": 7.0,
"eval_accuracy": 0.609375,
"eval_balanced_accuracy": 0.6114369501466275,
"eval_loss": 0.6374137997627258,
"eval_runtime": 107.701,
"eval_samples_per_second": 1.783,
"eval_steps_per_second": 0.223,
"step": 672
},
{
"epoch": 8.0,
"eval_accuracy": 0.6354166666666666,
"eval_balanced_accuracy": 0.6277777777777778,
"eval_loss": 0.6261330246925354,
"eval_runtime": 107.9832,
"eval_samples_per_second": 1.778,
"eval_steps_per_second": 0.222,
"step": 768
},
{
"epoch": 9.0,
"eval_accuracy": 0.640625,
"eval_balanced_accuracy": 0.6650742488776332,
"eval_loss": 0.6289492249488831,
"eval_runtime": 111.0235,
"eval_samples_per_second": 1.729,
"eval_steps_per_second": 0.216,
"step": 864
},
{
"epoch": 10.0,
"eval_accuracy": 0.640625,
"eval_balanced_accuracy": 0.6367950256354314,
"eval_loss": 0.6082468628883362,
"eval_runtime": 96.7719,
"eval_samples_per_second": 1.984,
"eval_steps_per_second": 0.248,
"step": 960
},
{
"epoch": 10.416666666666666,
"grad_norm": 18.0618839263916,
"learning_rate": 4.791666666666668e-06,
"loss": 0.5732,
"step": 1000
},
{
"epoch": 11.0,
"eval_accuracy": 0.6614583333333334,
"eval_balanced_accuracy": 0.6552593256413128,
"eval_loss": 0.603647768497467,
"eval_runtime": 103.3538,
"eval_samples_per_second": 1.858,
"eval_steps_per_second": 0.232,
"step": 1056
},
{
"epoch": 12.0,
"eval_accuracy": 0.6510416666666666,
"eval_balanced_accuracy": 0.6869850746268656,
"eval_loss": 0.6444854140281677,
"eval_runtime": 107.7167,
"eval_samples_per_second": 1.782,
"eval_steps_per_second": 0.223,
"step": 1152
},
{
"epoch": 13.0,
"eval_accuracy": 0.6875,
"eval_balanced_accuracy": 0.6833333333333333,
"eval_loss": 0.6093500852584839,
"eval_runtime": 101.5591,
"eval_samples_per_second": 1.891,
"eval_steps_per_second": 0.236,
"step": 1248
},
{
"epoch": 14.0,
"eval_accuracy": 0.6666666666666666,
"eval_balanced_accuracy": 0.6607142857142857,
"eval_loss": 0.6103670001029968,
"eval_runtime": 113.5782,
"eval_samples_per_second": 1.69,
"eval_steps_per_second": 0.211,
"step": 1344
},
{
"epoch": 15.0,
"eval_accuracy": 0.6927083333333334,
"eval_balanced_accuracy": 0.6959704667751221,
"eval_loss": 0.6553041338920593,
"eval_runtime": 100.5241,
"eval_samples_per_second": 1.91,
"eval_steps_per_second": 0.239,
"step": 1440
},
{
"epoch": 15.625,
"grad_norm": 14.87450885772705,
"learning_rate": 2.1875000000000002e-06,
"loss": 0.5144,
"step": 1500
},
{
"epoch": 16.0,
"eval_accuracy": 0.6510416666666666,
"eval_balanced_accuracy": 0.6603078614623419,
"eval_loss": 0.6261806488037109,
"eval_runtime": 106.7804,
"eval_samples_per_second": 1.798,
"eval_steps_per_second": 0.225,
"step": 1536
},
{
"epoch": 17.0,
"eval_accuracy": 0.6666666666666666,
"eval_balanced_accuracy": 0.6619131197893813,
"eval_loss": 0.6154211163520813,
"eval_runtime": 86.3959,
"eval_samples_per_second": 2.222,
"eval_steps_per_second": 0.278,
"step": 1632
},
{
"epoch": 18.0,
"eval_accuracy": 0.6666666666666666,
"eval_balanced_accuracy": 0.6619131197893813,
"eval_loss": 0.6210435032844543,
"eval_runtime": 88.4219,
"eval_samples_per_second": 2.171,
"eval_steps_per_second": 0.271,
"step": 1728
},
{
"epoch": 19.0,
"eval_accuracy": 0.6770833333333334,
"eval_balanced_accuracy": 0.6716186252771619,
"eval_loss": 0.6292756795883179,
"eval_runtime": 87.5484,
"eval_samples_per_second": 2.193,
"eval_steps_per_second": 0.274,
"step": 1824
},
{
"epoch": 20.0,
"eval_accuracy": 0.6614583333333334,
"eval_balanced_accuracy": 0.6563496426608026,
"eval_loss": 0.6274305582046509,
"eval_runtime": 65.5102,
"eval_samples_per_second": 2.931,
"eval_steps_per_second": 0.366,
"step": 1920
},
{
"epoch": 20.0,
"step": 1920,
"total_flos": 2.492453077307228e+17,
"train_loss": 0.5602036555608113,
"train_runtime": 27837.1599,
"train_samples_per_second": 0.552,
"train_steps_per_second": 0.069
}
],
"logging_steps": 500,
"max_steps": 1920,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.492453077307228e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}