{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.07732755954222084, | |
"eval_steps": 500, | |
"global_step": 2000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.1645, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 1.9507, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 1.502, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 1.7333, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 1.2266, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.604, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.2927, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.4849, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.2372, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.4155, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.1745, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.3327, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.1347, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.2828, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.3051, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.4646, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.2109, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.3195, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.3475, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.4109, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.0661, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.2554, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.1572, | |
"step": 575 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.3693, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 0.9443, | |
"step": 625 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.3281, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.189, | |
"step": 675 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.3747, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.0517, | |
"step": 725 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.3771, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 0.9529, | |
"step": 775 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.3633, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.1759, | |
"step": 825 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.2803, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.1455, | |
"step": 875 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.2917, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 0.9807, | |
"step": 925 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.2193, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.0852, | |
"step": 975 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.2919, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.1368, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.2861, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.162, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.3554, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.0723, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.1713, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.258, | |
"step": 1175 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.3437, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.1127, | |
"step": 1225 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.295, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.0447, | |
"step": 1275 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.1771, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.1057, | |
"step": 1325 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.2026, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.1061, | |
"step": 1375 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.3012, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.1393, | |
"step": 1425 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.3238, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.1414, | |
"step": 1475 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.3037, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.1731, | |
"step": 1525 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.1238, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 0.9542, | |
"step": 1575 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.1471, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 0.9938, | |
"step": 1625 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.2156, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.0002, | |
"loss": 1.03, | |
"step": 1675 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.2215, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.0335, | |
"step": 1725 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.3059, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 0.9583, | |
"step": 1775 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.1925, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.0331, | |
"step": 1825 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.2639, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 0.9898, | |
"step": 1875 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.2714, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.049, | |
"step": 1925 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.0002, | |
"loss": 1.2586, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.0002, | |
"loss": 1.0331, | |
"step": 1975 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.0002, | |
"loss": 1.2745, | |
"step": 2000 | |
} | |
], | |
"logging_steps": 25, | |
"max_steps": 25864, | |
"num_train_epochs": 1, | |
"save_steps": 2000, | |
"total_flos": 1.6927075537256448e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |