|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 1100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 2.5611, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 2.5692, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 2.644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"gpt4_scores": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.2810959815979004, |
|
"eval_runtime": 4.9148, |
|
"eval_samples_per_second": 4.68, |
|
"eval_steps_per_second": 1.221, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 2.3589, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 2.0614, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 1.9483, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"gpt4_scores": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.938421607017517, |
|
"eval_runtime": 4.8876, |
|
"eval_samples_per_second": 4.706, |
|
"eval_steps_per_second": 1.228, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.9987413559579636e-05, |
|
"loss": 1.7952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.988679806432712e-05, |
|
"loss": 1.8206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.968597221690986e-05, |
|
"loss": 1.7791, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"gpt4_scores": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.910915493965149, |
|
"eval_runtime": 4.8939, |
|
"eval_samples_per_second": 4.7, |
|
"eval_steps_per_second": 1.226, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.938574467213518e-05, |
|
"loss": 1.6577, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.898732434036244e-05, |
|
"loss": 1.6283, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.849231551964771e-05, |
|
"loss": 1.6053, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"gpt4_scores": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.9559897184371948, |
|
"eval_runtime": 4.8861, |
|
"eval_samples_per_second": 4.707, |
|
"eval_steps_per_second": 1.228, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.790271143580174e-05, |
|
"loss": 1.332, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.722088621637309e-05, |
|
"loss": 1.3206, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"gpt4_scores": 0.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.167389154434204, |
|
"eval_runtime": 4.8826, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 1.229, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 4.644958533087443e-05, |
|
"loss": 1.1859, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.559191453574582e-05, |
|
"loss": 0.9584, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.465132736856969e-05, |
|
"loss": 0.9722, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"gpt4_scores": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.423626661300659, |
|
"eval_runtime": 4.8846, |
|
"eval_samples_per_second": 4.709, |
|
"eval_steps_per_second": 1.228, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 4.3631611241893874e-05, |
|
"loss": 0.8037, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 4.2536872192658036e-05, |
|
"loss": 0.6705, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 4.137151834863213e-05, |
|
"loss": 0.7026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"gpt4_scores": 0.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.693521499633789, |
|
"eval_runtime": 4.8871, |
|
"eval_samples_per_second": 4.706, |
|
"eval_steps_per_second": 1.228, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 4.014024217844167e-05, |
|
"loss": 0.5485, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.884800159665276e-05, |
|
"loss": 0.4682, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4776, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"gpt4_scores": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.0005459785461426, |
|
"eval_runtime": 4.8865, |
|
"eval_samples_per_second": 4.707, |
|
"eval_steps_per_second": 1.228, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 3.610166531514436e-05, |
|
"loss": 0.3172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 3.465862814232822e-05, |
|
"loss": 0.3163, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"gpt4_scores": 0.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.270254611968994, |
|
"eval_runtime": 4.8817, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 1.229, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.3176699082935545e-05, |
|
"loss": 0.3132, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 3.166184534225087e-05, |
|
"loss": 0.213, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 3.012016670162977e-05, |
|
"loss": 0.2355, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"gpt4_scores": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.4523885250091553, |
|
"eval_runtime": 4.8835, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 1.229, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 2.8557870956832132e-05, |
|
"loss": 0.194, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 2.698124892141971e-05, |
|
"loss": 0.1577, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 2.5396649095870202e-05, |
|
"loss": 0.1489, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"gpt4_scores": 0.0, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 3.6804909706115723, |
|
"eval_runtime": 4.8815, |
|
"eval_samples_per_second": 4.712, |
|
"eval_steps_per_second": 1.229, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 2.3810452104406444e-05, |
|
"loss": 0.1045, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 2.222904500247473e-05, |
|
"loss": 0.0945, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.1179, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"gpt4_scores": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.7960402965545654, |
|
"eval_runtime": 4.8861, |
|
"eval_samples_per_second": 4.707, |
|
"eval_steps_per_second": 1.228, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 1.9106026612264316e-05, |
|
"loss": 0.0938, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 1.7576990616793137e-05, |
|
"loss": 0.0712, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"gpt4_scores": 0.0, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 3.971480369567871, |
|
"eval_runtime": 4.8837, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 1.229, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.6077844460203206e-05, |
|
"loss": 0.0715, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 1.4614624674952842e-05, |
|
"loss": 0.056, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.3193223130682936e-05, |
|
"loss": 0.0782, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"gpt4_scores": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 4.028377532958984, |
|
"eval_runtime": 4.8846, |
|
"eval_samples_per_second": 4.709, |
|
"eval_steps_per_second": 1.228, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1.181936330973744e-05, |
|
"loss": 0.0582, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1.049857726072005e-05, |
|
"loss": 0.0518, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 9.236183322886945e-06, |
|
"loss": 0.0691, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"gpt4_scores": 0.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 4.157599925994873, |
|
"eval_runtime": 4.8853, |
|
"eval_samples_per_second": 4.708, |
|
"eval_steps_per_second": 1.228, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 8.0372647110717e-06, |
|
"loss": 0.0483, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 6.906649047373246e-06, |
|
"loss": 0.0672, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5.848888922025553e-06, |
|
"loss": 0.0486, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"gpt4_scores": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.169044017791748, |
|
"eval_runtime": 4.8903, |
|
"eval_samples_per_second": 4.703, |
|
"eval_steps_per_second": 1.227, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 4.868243561723535e-06, |
|
"loss": 0.044, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 3.968661679220468e-06, |
|
"loss": 0.0639, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"gpt4_scores": 0.0, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 4.209411144256592, |
|
"eval_runtime": 4.8828, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 1.229, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 3.1537655732553768e-06, |
|
"loss": 0.0469, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 2.4268365428344736e-06, |
|
"loss": 0.0427, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 1.790801674598186e-06, |
|
"loss": 0.0589, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"gpt4_scores": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 4.243592262268066, |
|
"eval_runtime": 4.8851, |
|
"eval_samples_per_second": 4.708, |
|
"eval_steps_per_second": 1.228, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.248222056476367e-06, |
|
"loss": 0.0453, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 8.012824650910938e-07, |
|
"loss": 0.0582, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 4.517825684323324e-07, |
|
"loss": 0.042, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"gpt4_scores": 0.0, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 4.24906063079834, |
|
"eval_runtime": 4.8836, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 1.229, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 2.011296792301165e-07, |
|
"loss": 0.0426, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 5.033308820289184e-08, |
|
"loss": 0.0611, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0405, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"gpt4_scores": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 4.249512195587158, |
|
"eval_runtime": 4.8886, |
|
"eval_samples_per_second": 4.705, |
|
"eval_steps_per_second": 1.227, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1100, |
|
"total_flos": 3.807078373542298e+16, |
|
"train_loss": 0.5375886155258526, |
|
"train_runtime": 2988.6301, |
|
"train_samples_per_second": 1.452, |
|
"train_steps_per_second": 0.368 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 20, |
|
"total_flos": 3.807078373542298e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|