|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.34285714285714286, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 1.0595301389694214, |
|
"learning_rate": 0.002, |
|
"loss": 3.6082, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 0.2752091884613037, |
|
"learning_rate": 0.002, |
|
"loss": 1.5529, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 0.17800529301166534, |
|
"learning_rate": 0.002, |
|
"loss": 1.0928, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 0.21639685332775116, |
|
"learning_rate": 0.002, |
|
"loss": 0.99, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 0.2555713951587677, |
|
"learning_rate": 0.002, |
|
"loss": 0.884, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 0.1835678517818451, |
|
"learning_rate": 0.002, |
|
"loss": 1.2252, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.23407411575317383, |
|
"learning_rate": 0.002, |
|
"loss": 1.0325, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 0.20827867090702057, |
|
"learning_rate": 0.002, |
|
"loss": 0.9401, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 0.20146110653877258, |
|
"learning_rate": 0.002, |
|
"loss": 0.8852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.2659783661365509, |
|
"learning_rate": 0.002, |
|
"loss": 0.8176, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 0.20099078118801117, |
|
"learning_rate": 0.002, |
|
"loss": 1.1806, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 0.18531735241413116, |
|
"learning_rate": 0.002, |
|
"loss": 0.9712, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 0.19588737189769745, |
|
"learning_rate": 0.002, |
|
"loss": 0.918, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.19109125435352325, |
|
"learning_rate": 0.002, |
|
"loss": 0.7857, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 0.2857687771320343, |
|
"learning_rate": 0.002, |
|
"loss": 0.7828, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.524734837978624e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|