omriel1's picture
Upload 9 files
c7b84fc verified
raw
history blame
4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04387311894002545,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002193655947001272,
"grad_norm": 3.5356061458587646,
"learning_rate": 2.997806344052999e-05,
"loss": 0.2274,
"step": 50
},
{
"epoch": 0.004387311894002544,
"grad_norm": 0.4738060534000397,
"learning_rate": 2.9956126881059976e-05,
"loss": 0.0055,
"step": 100
},
{
"epoch": 0.006580967841003817,
"grad_norm": 0.18818098306655884,
"learning_rate": 2.993419032158996e-05,
"loss": 0.0014,
"step": 150
},
{
"epoch": 0.008774623788005089,
"grad_norm": 0.009225570596754551,
"learning_rate": 2.991225376211995e-05,
"loss": 0.0005,
"step": 200
},
{
"epoch": 0.010968279735006362,
"grad_norm": 0.032771218568086624,
"learning_rate": 2.9890317202649936e-05,
"loss": 0.0009,
"step": 250
},
{
"epoch": 0.013161935682007634,
"grad_norm": 0.03574460744857788,
"learning_rate": 2.9868380643179925e-05,
"loss": 0.0005,
"step": 300
},
{
"epoch": 0.015355591629008906,
"grad_norm": 0.0016715412493795156,
"learning_rate": 2.984644408370991e-05,
"loss": 0.0001,
"step": 350
},
{
"epoch": 0.017549247576010178,
"grad_norm": 0.0006590808043256402,
"learning_rate": 2.9824507524239897e-05,
"loss": 0.0001,
"step": 400
},
{
"epoch": 0.01974290352301145,
"grad_norm": 0.00635514734312892,
"learning_rate": 2.9802570964769886e-05,
"loss": 0.0002,
"step": 450
},
{
"epoch": 0.021936559470012724,
"grad_norm": 0.0005727710667997599,
"learning_rate": 2.9780634405299875e-05,
"loss": 0.0001,
"step": 500
},
{
"epoch": 0.024130215417013996,
"grad_norm": 0.0005088996258564293,
"learning_rate": 2.975869784582986e-05,
"loss": 0.0002,
"step": 550
},
{
"epoch": 0.026323871364015268,
"grad_norm": 0.010593525134027004,
"learning_rate": 2.9736761286359847e-05,
"loss": 0.0002,
"step": 600
},
{
"epoch": 0.02851752731101654,
"grad_norm": 0.006351508665829897,
"learning_rate": 2.9714824726889836e-05,
"loss": 0.0001,
"step": 650
},
{
"epoch": 0.03071118325801781,
"grad_norm": 0.0015719968359917402,
"learning_rate": 2.969288816741982e-05,
"loss": 0.0,
"step": 700
},
{
"epoch": 0.03290483920501908,
"grad_norm": 0.0007212675409391522,
"learning_rate": 2.967095160794981e-05,
"loss": 0.0,
"step": 750
},
{
"epoch": 0.035098495152020355,
"grad_norm": 0.00027612957637757063,
"learning_rate": 2.96490150484798e-05,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.03729215109902163,
"grad_norm": 0.0005350236897356808,
"learning_rate": 2.9627078489009782e-05,
"loss": 0.0,
"step": 850
},
{
"epoch": 0.0394858070460229,
"grad_norm": 0.0006030969670973718,
"learning_rate": 2.960514192953977e-05,
"loss": 0.0,
"step": 900
},
{
"epoch": 0.04167946299302418,
"grad_norm": 0.00019211292965337634,
"learning_rate": 2.958320537006976e-05,
"loss": 0.0001,
"step": 950
},
{
"epoch": 0.04387311894002545,
"grad_norm": 0.00020958702953066677,
"learning_rate": 2.9561268810599746e-05,
"loss": 0.0,
"step": 1000
}
],
"logging_steps": 50,
"max_steps": 68379,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}