|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.567398119122257, |
|
"eval_steps": 50, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.3869346733668342e-05, |
|
"loss": 0.7793, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 0.27101635932922363, |
|
"eval_runtime": 141.469, |
|
"eval_samples_per_second": 5.047, |
|
"eval_steps_per_second": 0.636, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.2613065326633167e-05, |
|
"loss": 0.2453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.21987898647785187, |
|
"eval_runtime": 142.8952, |
|
"eval_samples_per_second": 4.997, |
|
"eval_steps_per_second": 0.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.135678391959799e-05, |
|
"loss": 0.2079, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.19277334213256836, |
|
"eval_runtime": 142.7101, |
|
"eval_samples_per_second": 5.003, |
|
"eval_steps_per_second": 0.631, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0100502512562815e-05, |
|
"loss": 0.1836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 0.179254949092865, |
|
"eval_runtime": 143.1949, |
|
"eval_samples_per_second": 4.986, |
|
"eval_steps_per_second": 0.629, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.884422110552764e-05, |
|
"loss": 0.1762, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.1739482283592224, |
|
"eval_runtime": 142.441, |
|
"eval_samples_per_second": 5.013, |
|
"eval_steps_per_second": 0.632, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7587939698492464e-05, |
|
"loss": 0.1692, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.16870561242103577, |
|
"eval_runtime": 141.3005, |
|
"eval_samples_per_second": 5.053, |
|
"eval_steps_per_second": 0.637, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6331658291457288e-05, |
|
"loss": 0.1657, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.1669251173734665, |
|
"eval_runtime": 141.02, |
|
"eval_samples_per_second": 5.063, |
|
"eval_steps_per_second": 0.638, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.507537688442211e-05, |
|
"loss": 0.1681, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.1648036390542984, |
|
"eval_runtime": 140.8855, |
|
"eval_samples_per_second": 5.068, |
|
"eval_steps_per_second": 0.639, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.3819095477386935e-05, |
|
"loss": 0.1576, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.16245362162590027, |
|
"eval_runtime": 141.4143, |
|
"eval_samples_per_second": 5.049, |
|
"eval_steps_per_second": 0.636, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2562814070351759e-05, |
|
"loss": 0.1656, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.1608215868473053, |
|
"eval_runtime": 141.5918, |
|
"eval_samples_per_second": 5.043, |
|
"eval_steps_per_second": 0.636, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1306532663316583e-05, |
|
"loss": 0.1517, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.1596228927373886, |
|
"eval_runtime": 142.7115, |
|
"eval_samples_per_second": 5.003, |
|
"eval_steps_per_second": 0.631, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0050251256281408e-05, |
|
"loss": 0.1561, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.15880218148231506, |
|
"eval_runtime": 143.3712, |
|
"eval_samples_per_second": 4.98, |
|
"eval_steps_per_second": 0.628, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.793969849246232e-06, |
|
"loss": 0.1473, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.15745262801647186, |
|
"eval_runtime": 143.6929, |
|
"eval_samples_per_second": 4.969, |
|
"eval_steps_per_second": 0.626, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.537688442211055e-06, |
|
"loss": 0.1488, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.1574247181415558, |
|
"eval_runtime": 142.725, |
|
"eval_samples_per_second": 5.003, |
|
"eval_steps_per_second": 0.631, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.2814070351758795e-06, |
|
"loss": 0.1468, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.1565464437007904, |
|
"eval_runtime": 141.7942, |
|
"eval_samples_per_second": 5.035, |
|
"eval_steps_per_second": 0.635, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.025125628140704e-06, |
|
"loss": 0.1379, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.1558983027935028, |
|
"eval_runtime": 141.5337, |
|
"eval_samples_per_second": 5.045, |
|
"eval_steps_per_second": 0.636, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.7688442211055276e-06, |
|
"loss": 0.1414, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.15601229667663574, |
|
"eval_runtime": 141.5594, |
|
"eval_samples_per_second": 5.044, |
|
"eval_steps_per_second": 0.636, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.512562814070352e-06, |
|
"loss": 0.1377, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.15548652410507202, |
|
"eval_runtime": 141.6005, |
|
"eval_samples_per_second": 5.042, |
|
"eval_steps_per_second": 0.636, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.256281407035176e-06, |
|
"loss": 0.1463, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.1545386165380478, |
|
"eval_runtime": 141.6136, |
|
"eval_samples_per_second": 5.042, |
|
"eval_steps_per_second": 0.636, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0, |
|
"loss": 0.1441, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.15440967679023743, |
|
"eval_runtime": 142.3851, |
|
"eval_samples_per_second": 5.015, |
|
"eval_steps_per_second": 0.632, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"total_flos": 1.7525216609776435e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|