|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.4984384759525295, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12492192379762648, |
|
"grad_norm": 0.27754032611846924, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24984384759525297, |
|
"grad_norm": 0.2751781642436981, |
|
"learning_rate": 2e-05, |
|
"loss": 0.088, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3747657713928795, |
|
"grad_norm": 0.23674216866493225, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0738, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.49968769519050593, |
|
"grad_norm": 0.23040202260017395, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0643, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6246096189881324, |
|
"grad_norm": 0.26208606362342834, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0584, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.749531542785759, |
|
"grad_norm": 0.2172231376171112, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0534, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8744534665833854, |
|
"grad_norm": 0.21661953628063202, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0502, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9993753903810119, |
|
"grad_norm": 0.186213880777359, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0471, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1242973141786383, |
|
"grad_norm": 0.2086647003889084, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0373, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2492192379762648, |
|
"grad_norm": 0.1900208592414856, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0351, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3741411617738915, |
|
"grad_norm": 0.21206732094287872, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0349, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.499063085571518, |
|
"grad_norm": 0.20564615726470947, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0332, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6239850093691444, |
|
"grad_norm": 0.2596442997455597, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0313, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7489069331667708, |
|
"grad_norm": 0.18866442143917084, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0309, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8738288569643973, |
|
"grad_norm": 0.19815115630626678, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0298, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.9987507807620237, |
|
"grad_norm": 0.21096043288707733, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0281, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.12367270455965, |
|
"grad_norm": 0.1663718819618225, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0191, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.2485946283572766, |
|
"grad_norm": 0.1737908571958542, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0191, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.373516552154903, |
|
"grad_norm": 0.21257372200489044, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0191, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.4984384759525295, |
|
"grad_norm": 0.21343937516212463, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0185, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 94237318184960.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|