|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.603550295857988, |
|
"eval_steps": 5, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11834319526627218, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.6428, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11834319526627218, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.5284, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23668639053254437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.992981096013517e-05, |
|
"loss": 2.6411, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23668639053254437, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7203, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35502958579881655, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.964635581908359e-05, |
|
"loss": 2.6552, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35502958579881655, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7242, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.47337278106508873, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9151456172430186e-05, |
|
"loss": 2.6293, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47337278106508873, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.967, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.845596003501826e-05, |
|
"loss": 2.6225, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 268.4624, |
|
"eval_samples_per_second": 0.603, |
|
"eval_steps_per_second": 0.153, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7100591715976331, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7575112421616203e-05, |
|
"loss": 2.6365, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7100591715976331, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.8379, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8284023668639053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6528221181905217e-05, |
|
"loss": 2.6304, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8284023668639053, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.5669, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.533823377964791e-05, |
|
"loss": 2.6426, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.4996, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0650887573964498, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4031234292879726e-05, |
|
"loss": 2.6404, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0650887573964498, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7725, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2635871660690677e-05, |
|
"loss": 2.6214, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7285, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.301775147928994, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1182731709213658e-05, |
|
"loss": 2.636, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.301775147928994, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.906, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4201183431952662, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.703666721774403e-06, |
|
"loss": 2.6574, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.4201183431952662, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.4678, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.231097248774273e-06, |
|
"loss": 2.6211, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.8967, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.6568047337278107, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.797301461371626e-06, |
|
"loss": 2.6171, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.6568047337278107, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.422, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.43370762606287e-06, |
|
"loss": 2.6375, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 268.3368, |
|
"eval_samples_per_second": 0.604, |
|
"eval_steps_per_second": 0.153, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.893491124260355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.170205208855281e-06, |
|
"loss": 2.6548, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.893491124260355, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.6946, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0118343195266273, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.0344897093700333e-06, |
|
"loss": 2.6684, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.0118343195266273, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.6543, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.1301775147928996, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0514555858664663e-06, |
|
"loss": 2.6305, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.1301775147928996, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.8601, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2485207100591715, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2426505780436326e-06, |
|
"loss": 2.6421, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.2485207100591715, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7705, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.258033886587911e-07, |
|
"loss": 2.6469, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.5145, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.485207100591716, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1443507700495968e-07, |
|
"loss": 2.6275, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.485207100591716, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.7646, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.603550295857988, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7562682356786488e-08, |
|
"loss": 2.6352, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.603550295857988, |
|
"eval_loss": 2.5045294761657715, |
|
"eval_runtime": 267.6496, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 110 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5, |
|
"total_flos": 5.736198700007424e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|