|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 3820, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9462, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.453, |
|
"eval_loss": 0.2963322103023529, |
|
"eval_per": 0.3495, |
|
"eval_runtime": 122.3144, |
|
"eval_samples_per_second": 56.976, |
|
"eval_steps_per_second": 1.782, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.2979, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 15.6759, |
|
"eval_loss": 0.21439459919929504, |
|
"eval_per": 0.2941, |
|
"eval_runtime": 121.944, |
|
"eval_samples_per_second": 57.149, |
|
"eval_steps_per_second": 1.788, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.2319, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 15.7219, |
|
"eval_loss": 0.18984708189964294, |
|
"eval_per": 0.2765, |
|
"eval_runtime": 120.5224, |
|
"eval_samples_per_second": 57.823, |
|
"eval_steps_per_second": 1.809, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.2042, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 15.7235, |
|
"eval_loss": 0.1799211949110031, |
|
"eval_per": 0.2719, |
|
"eval_runtime": 119.8528, |
|
"eval_samples_per_second": 58.146, |
|
"eval_steps_per_second": 1.819, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.1879, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 15.7054, |
|
"eval_loss": 0.1743946522474289, |
|
"eval_per": 0.2657, |
|
"eval_runtime": 120.4255, |
|
"eval_samples_per_second": 57.87, |
|
"eval_steps_per_second": 1.81, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.1763, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 15.7406, |
|
"eval_loss": 0.1728605031967163, |
|
"eval_per": 0.2645, |
|
"eval_runtime": 119.6482, |
|
"eval_samples_per_second": 58.246, |
|
"eval_steps_per_second": 1.822, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.1677, |
|
"step": 2674 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 15.7269, |
|
"eval_loss": 0.1693088412284851, |
|
"eval_per": 0.2619, |
|
"eval_runtime": 119.4883, |
|
"eval_samples_per_second": 58.324, |
|
"eval_steps_per_second": 1.824, |
|
"step": 2674 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1613, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 15.7371, |
|
"eval_loss": 0.1667601615190506, |
|
"eval_per": 0.2604, |
|
"eval_runtime": 118.43, |
|
"eval_samples_per_second": 58.845, |
|
"eval_steps_per_second": 1.841, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1559, |
|
"step": 3438 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 15.7383, |
|
"eval_loss": 0.16669157147407532, |
|
"eval_per": 0.2585, |
|
"eval_runtime": 118.8757, |
|
"eval_samples_per_second": 58.624, |
|
"eval_steps_per_second": 1.834, |
|
"step": 3438 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1534, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 15.7318, |
|
"eval_loss": 0.16682308912277222, |
|
"eval_per": 0.2588, |
|
"eval_runtime": 118.3076, |
|
"eval_samples_per_second": 58.906, |
|
"eval_steps_per_second": 1.843, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3820, |
|
"total_flos": 5.601731015983104e+16, |
|
"train_loss": 0.36827531884478026, |
|
"train_runtime": 2516.6862, |
|
"train_samples_per_second": 193.814, |
|
"train_steps_per_second": 1.518 |
|
} |
|
], |
|
"max_steps": 3820, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.601731015983104e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|