|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9907235621521338, |
|
"eval_steps": 500, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14842300556586271, |
|
"grad_norm": 8.770494049160096, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7728, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.29684601113172543, |
|
"grad_norm": 5.239180384850786, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4452690166975881, |
|
"grad_norm": 1.2130700854178535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6781, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5936920222634509, |
|
"grad_norm": 1.0745174920296572, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6678, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7421150278293135, |
|
"grad_norm": 0.8867357173198487, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6594, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8905380333951762, |
|
"grad_norm": 1.2379194861360832, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6453, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9944341372912802, |
|
"eval_loss": 0.6419587731361389, |
|
"eval_runtime": 72.5933, |
|
"eval_samples_per_second": 25.016, |
|
"eval_steps_per_second": 0.399, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0426716141001855, |
|
"grad_norm": 1.1230378978262539, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6781, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1910946196660483, |
|
"grad_norm": 0.9315188359610128, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5789, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.339517625231911, |
|
"grad_norm": 0.7280697861125209, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5768, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4879406307977736, |
|
"grad_norm": 0.7574742425182042, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5637, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 0.639849134642834, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5671, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.784786641929499, |
|
"grad_norm": 0.6958680579149541, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5709, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9332096474953617, |
|
"grad_norm": 0.540531330356496, |
|
"learning_rate": 5e-06, |
|
"loss": 0.568, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9925788497217067, |
|
"eval_loss": 0.6242453455924988, |
|
"eval_runtime": 73.2468, |
|
"eval_samples_per_second": 24.793, |
|
"eval_steps_per_second": 0.396, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.085343228200371, |
|
"grad_norm": 0.778875955578771, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5879, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.2337662337662336, |
|
"grad_norm": 0.756734990400302, |
|
"learning_rate": 5e-06, |
|
"loss": 0.507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3821892393320967, |
|
"grad_norm": 0.619492041600219, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5063, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.5306122448979593, |
|
"grad_norm": 0.843944094594807, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.679035250463822, |
|
"grad_norm": 0.860609592948253, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5013, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.8274582560296846, |
|
"grad_norm": 0.7816936988629369, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5063, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9758812615955472, |
|
"grad_norm": 0.7099567262640085, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5028, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9907235621521338, |
|
"eval_loss": 0.6297215223312378, |
|
"eval_runtime": 71.9347, |
|
"eval_samples_per_second": 25.245, |
|
"eval_steps_per_second": 0.403, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.9907235621521338, |
|
"step": 201, |
|
"total_flos": 336473106677760.0, |
|
"train_loss": 0.592634052186463, |
|
"train_runtime": 12077.2754, |
|
"train_samples_per_second": 8.567, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 336473106677760.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|