|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988649262202043, |
|
"eval_steps": 6, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05448354143019296, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.073, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10896708286038592, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.039, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16345062429057888, |
|
"grad_norm": 0.44932249188423157, |
|
"learning_rate": 0.0016329931618554523, |
|
"loss": 1.0353, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.21793416572077184, |
|
"grad_norm": 0.7056324481964111, |
|
"learning_rate": 0.001, |
|
"loss": 0.9232, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2724177071509648, |
|
"grad_norm": 0.21584172546863556, |
|
"learning_rate": 0.0007559289460184544, |
|
"loss": 0.7909, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32690124858115777, |
|
"grad_norm": 0.19453680515289307, |
|
"learning_rate": 0.0006324555320336759, |
|
"loss": 0.7352, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3813847900113507, |
|
"grad_norm": 0.19031628966331482, |
|
"learning_rate": 0.0005547001962252292, |
|
"loss": 0.7123, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4358683314415437, |
|
"grad_norm": 0.1654825061559677, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6918, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.49035187287173665, |
|
"grad_norm": 0.1845771223306656, |
|
"learning_rate": 0.0004588314677411235, |
|
"loss": 0.7118, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5448354143019296, |
|
"grad_norm": 0.19587676227092743, |
|
"learning_rate": 0.00042640143271122083, |
|
"loss": 0.6994, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5993189557321226, |
|
"grad_norm": 0.20122814178466797, |
|
"learning_rate": 0.0004, |
|
"loss": 0.672, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6538024971623155, |
|
"grad_norm": 0.21745193004608154, |
|
"learning_rate": 0.0003779644730092272, |
|
"loss": 0.6553, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7082860385925085, |
|
"grad_norm": 0.17879709601402283, |
|
"learning_rate": 0.00035921060405354985, |
|
"loss": 0.6807, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7627695800227015, |
|
"grad_norm": 0.18536536395549774, |
|
"learning_rate": 0.00034299717028501764, |
|
"loss": 0.6738, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.8172531214528944, |
|
"grad_norm": 0.20840761065483093, |
|
"learning_rate": 0.0003287979746107146, |
|
"loss": 0.669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8717366628830874, |
|
"grad_norm": 0.16762040555477142, |
|
"learning_rate": 0.00031622776601683794, |
|
"loss": 0.6734, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9262202043132803, |
|
"grad_norm": 0.16556385159492493, |
|
"learning_rate": 0.00030499714066520935, |
|
"loss": 0.6661, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9807037457434733, |
|
"grad_norm": 0.1867838054895401, |
|
"learning_rate": 0.0002948839123097943, |
|
"loss": 0.6784, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9988649262202043, |
|
"step": 110, |
|
"total_flos": 1.1172110162041242e+18, |
|
"train_loss": 0.7636631564660505, |
|
"train_runtime": 975.3131, |
|
"train_samples_per_second": 14.451, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 6, |
|
"max_steps": 110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 6, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1172110162041242e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|