TongZheng1999's picture
Upload folder using huggingface_hub
9bb0c22 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9982668977469671,
"eval_steps": 18,
"global_step": 360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04991334488734835,
"grad_norm": 0.4083130955696106,
"learning_rate": 0.002,
"loss": 1.2589,
"step": 18
},
{
"epoch": 0.0998266897746967,
"grad_norm": 0.3094785809516907,
"learning_rate": 0.0008944271909999159,
"loss": 1.0636,
"step": 36
},
{
"epoch": 0.14974003466204505,
"grad_norm": 0.37615087628364563,
"learning_rate": 0.0006488856845230502,
"loss": 0.8574,
"step": 54
},
{
"epoch": 0.1996533795493934,
"grad_norm": 0.2839493453502655,
"learning_rate": 0.0005345224838248488,
"loss": 0.8292,
"step": 72
},
{
"epoch": 0.24956672443674177,
"grad_norm": 0.2941131889820099,
"learning_rate": 0.00046499055497527714,
"loss": 0.7934,
"step": 90
},
{
"epoch": 0.2994800693240901,
"grad_norm": 0.3659161627292633,
"learning_rate": 0.0004170288281141495,
"loss": 0.7727,
"step": 108
},
{
"epoch": 0.3493934142114385,
"grad_norm": 0.3437303304672241,
"learning_rate": 0.00038138503569823694,
"loss": 0.7557,
"step": 126
},
{
"epoch": 0.3993067590987868,
"grad_norm": 0.2811639904975891,
"learning_rate": 0.00035355339059327376,
"loss": 0.7285,
"step": 144
},
{
"epoch": 0.44922010398613516,
"grad_norm": 0.35479724407196045,
"learning_rate": 0.00033104235544094716,
"loss": 0.7107,
"step": 162
},
{
"epoch": 0.49913344887348354,
"grad_norm": 0.3011772036552429,
"learning_rate": 0.0003123475237772121,
"loss": 0.7186,
"step": 180
},
{
"epoch": 0.5490467937608319,
"grad_norm": 0.29623347520828247,
"learning_rate": 0.00029649972666444046,
"loss": 0.6818,
"step": 198
},
{
"epoch": 0.5989601386481802,
"grad_norm": 0.3092997074127197,
"learning_rate": 0.000282842712474619,
"loss": 0.6701,
"step": 216
},
{
"epoch": 0.6488734835355287,
"grad_norm": 0.32858991622924805,
"learning_rate": 0.00027091418459143857,
"loss": 0.6733,
"step": 234
},
{
"epoch": 0.698786828422877,
"grad_norm": 0.3046702742576599,
"learning_rate": 0.0002603778219616478,
"loss": 0.6523,
"step": 252
},
{
"epoch": 0.7487001733102253,
"grad_norm": 0.41049444675445557,
"learning_rate": 0.00025098232205526344,
"loss": 0.6473,
"step": 270
},
{
"epoch": 0.7986135181975736,
"grad_norm": 0.35009312629699707,
"learning_rate": 0.00024253562503633296,
"loss": 0.6309,
"step": 288
},
{
"epoch": 0.848526863084922,
"grad_norm": 0.3388204276561737,
"learning_rate": 0.0002348880878058814,
"loss": 0.6264,
"step": 306
},
{
"epoch": 0.8984402079722703,
"grad_norm": 0.28809213638305664,
"learning_rate": 0.0002279211529192759,
"loss": 0.6046,
"step": 324
},
{
"epoch": 0.9483535528596188,
"grad_norm": 0.3333686292171478,
"learning_rate": 0.0002215395102486845,
"loss": 0.5891,
"step": 342
},
{
"epoch": 0.9982668977469671,
"grad_norm": 0.3479894697666168,
"learning_rate": 0.00021566554640687683,
"loss": 0.5952,
"step": 360
}
],
"logging_steps": 18,
"max_steps": 360,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 18,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.656326962122588e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}