|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04387311894002545, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002193655947001272, |
|
"grad_norm": 3.5356061458587646, |
|
"learning_rate": 2.997806344052999e-05, |
|
"loss": 0.2274, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004387311894002544, |
|
"grad_norm": 0.4738060534000397, |
|
"learning_rate": 2.9956126881059976e-05, |
|
"loss": 0.0055, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006580967841003817, |
|
"grad_norm": 0.18818098306655884, |
|
"learning_rate": 2.993419032158996e-05, |
|
"loss": 0.0014, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.008774623788005089, |
|
"grad_norm": 0.009225570596754551, |
|
"learning_rate": 2.991225376211995e-05, |
|
"loss": 0.0005, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.010968279735006362, |
|
"grad_norm": 0.032771218568086624, |
|
"learning_rate": 2.9890317202649936e-05, |
|
"loss": 0.0009, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.013161935682007634, |
|
"grad_norm": 0.03574460744857788, |
|
"learning_rate": 2.9868380643179925e-05, |
|
"loss": 0.0005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.015355591629008906, |
|
"grad_norm": 0.0016715412493795156, |
|
"learning_rate": 2.984644408370991e-05, |
|
"loss": 0.0001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.017549247576010178, |
|
"grad_norm": 0.0006590808043256402, |
|
"learning_rate": 2.9824507524239897e-05, |
|
"loss": 0.0001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01974290352301145, |
|
"grad_norm": 0.00635514734312892, |
|
"learning_rate": 2.9802570964769886e-05, |
|
"loss": 0.0002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.021936559470012724, |
|
"grad_norm": 0.0005727710667997599, |
|
"learning_rate": 2.9780634405299875e-05, |
|
"loss": 0.0001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.024130215417013996, |
|
"grad_norm": 0.0005088996258564293, |
|
"learning_rate": 2.975869784582986e-05, |
|
"loss": 0.0002, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.026323871364015268, |
|
"grad_norm": 0.010593525134027004, |
|
"learning_rate": 2.9736761286359847e-05, |
|
"loss": 0.0002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02851752731101654, |
|
"grad_norm": 0.006351508665829897, |
|
"learning_rate": 2.9714824726889836e-05, |
|
"loss": 0.0001, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03071118325801781, |
|
"grad_norm": 0.0015719968359917402, |
|
"learning_rate": 2.969288816741982e-05, |
|
"loss": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03290483920501908, |
|
"grad_norm": 0.0007212675409391522, |
|
"learning_rate": 2.967095160794981e-05, |
|
"loss": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.035098495152020355, |
|
"grad_norm": 0.00027612957637757063, |
|
"learning_rate": 2.96490150484798e-05, |
|
"loss": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03729215109902163, |
|
"grad_norm": 0.0005350236897356808, |
|
"learning_rate": 2.9627078489009782e-05, |
|
"loss": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0394858070460229, |
|
"grad_norm": 0.0006030969670973718, |
|
"learning_rate": 2.960514192953977e-05, |
|
"loss": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.04167946299302418, |
|
"grad_norm": 0.00019211292965337634, |
|
"learning_rate": 2.958320537006976e-05, |
|
"loss": 0.0001, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.04387311894002545, |
|
"grad_norm": 0.00020958702953066677, |
|
"learning_rate": 2.9561268810599746e-05, |
|
"loss": 0.0, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 68379, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|