|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.957983193277311, |
|
"eval_steps": 500, |
|
"global_step": 132, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22408963585434175, |
|
"grad_norm": 31.895059346549615, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0566, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4481792717086835, |
|
"grad_norm": 1.7397235388731687, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9929, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 24.73289602340893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9455, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.896358543417367, |
|
"grad_norm": 15.103388080469578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9223, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9859943977591037, |
|
"eval_loss": 0.9011093974113464, |
|
"eval_runtime": 30.3519, |
|
"eval_samples_per_second": 39.569, |
|
"eval_steps_per_second": 0.626, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.1204481792717087, |
|
"grad_norm": 1.3240129819926922, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9194, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3445378151260505, |
|
"grad_norm": 1.0063155415585696, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8587, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 1.087235116946335, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8482, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7927170868347337, |
|
"grad_norm": 0.763889264805126, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8393, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.9943977591036415, |
|
"eval_loss": 0.8523734211921692, |
|
"eval_runtime": 30.796, |
|
"eval_samples_per_second": 38.999, |
|
"eval_steps_per_second": 0.617, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.0168067226890756, |
|
"grad_norm": 1.0022099725803806, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8585, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2408963585434174, |
|
"grad_norm": 0.8920948411419858, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4649859943977592, |
|
"grad_norm": 0.7868621725302428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7786, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.689075630252101, |
|
"grad_norm": 0.7973258054338478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.9131652661064424, |
|
"grad_norm": 0.8414953772946839, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7699, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.957983193277311, |
|
"eval_loss": 0.8426499366760254, |
|
"eval_runtime": 29.0845, |
|
"eval_samples_per_second": 41.293, |
|
"eval_steps_per_second": 0.653, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.957983193277311, |
|
"step": 132, |
|
"total_flos": 220895536742400.0, |
|
"train_loss": 0.8702321576349663, |
|
"train_runtime": 4622.8871, |
|
"train_samples_per_second": 14.805, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 132, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 220895536742400.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|