retro_base / intensive /trainer_state.json
faori's picture
Upload 12 files
e30f3bc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 20400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24509803921568626,
"grad_norm": 26.514015197753906,
"learning_rate": 4.872549019607843e-06,
"loss": 9.1121,
"step": 500
},
{
"epoch": 0.49019607843137253,
"grad_norm": 17.301259994506836,
"learning_rate": 9.764705882352942e-06,
"loss": 4.6199,
"step": 1000
},
{
"epoch": 0.7352941176470589,
"grad_norm": 19.435855865478516,
"learning_rate": 1.4656862745098039e-05,
"loss": 3.7886,
"step": 1500
},
{
"epoch": 0.9803921568627451,
"grad_norm": 21.463775634765625,
"learning_rate": 1.954901960784314e-05,
"loss": 3.3042,
"step": 2000
},
{
"epoch": 1.0,
"eval_HasAns_exact": 41.66596479407058,
"eval_HasAns_f1": 45.75952971203001,
"eval_HasAns_total": 11873,
"eval_best_exact": 63.918133580392485,
"eval_best_exact_thresh": 17.15625,
"eval_best_f1": 68.01169849835183,
"eval_best_f1_thresh": 17.15625,
"eval_exact": 41.66596479407058,
"eval_f1": 45.75952971203001,
"eval_runtime": 84.8894,
"eval_samples_per_second": 140.995,
"eval_steps_per_second": 2.215,
"eval_total": 11873,
"step": 2040
},
{
"epoch": 1.2254901960784315,
"grad_norm": 22.533653259277344,
"learning_rate": 1.9505446623093686e-05,
"loss": 2.8594,
"step": 2500
},
{
"epoch": 1.4705882352941178,
"grad_norm": 16.711044311523438,
"learning_rate": 1.8960784313725492e-05,
"loss": 2.7032,
"step": 3000
},
{
"epoch": 1.715686274509804,
"grad_norm": 20.539369583129883,
"learning_rate": 1.84161220043573e-05,
"loss": 2.6147,
"step": 3500
},
{
"epoch": 1.9607843137254903,
"grad_norm": 17.798688888549805,
"learning_rate": 1.7871459694989108e-05,
"loss": 2.565,
"step": 4000
},
{
"epoch": 2.0,
"eval_HasAns_exact": 42.69350627474101,
"eval_HasAns_f1": 46.51650253068049,
"eval_HasAns_total": 11873,
"eval_best_exact": 72.38271708919397,
"eval_best_exact_thresh": 19.26171875,
"eval_best_f1": 76.20571334513343,
"eval_best_f1_thresh": 19.26171875,
"eval_exact": 42.69350627474101,
"eval_f1": 46.51650253068049,
"eval_runtime": 85.6719,
"eval_samples_per_second": 139.707,
"eval_steps_per_second": 2.194,
"eval_total": 11873,
"step": 4080
},
{
"epoch": 2.2058823529411766,
"grad_norm": 15.679291725158691,
"learning_rate": 1.7326797385620918e-05,
"loss": 2.2395,
"step": 4500
},
{
"epoch": 2.450980392156863,
"grad_norm": 15.367960929870605,
"learning_rate": 1.6783224400871462e-05,
"loss": 2.1524,
"step": 5000
},
{
"epoch": 2.696078431372549,
"grad_norm": 13.34710693359375,
"learning_rate": 1.623856209150327e-05,
"loss": 2.1288,
"step": 5500
},
{
"epoch": 2.9411764705882355,
"grad_norm": 30.506305694580078,
"learning_rate": 1.5693899782135078e-05,
"loss": 2.1232,
"step": 6000
},
{
"epoch": 3.0,
"eval_HasAns_exact": 42.86195569780173,
"eval_HasAns_f1": 46.30919897657347,
"eval_HasAns_total": 11873,
"eval_best_exact": 74.37884275246357,
"eval_best_exact_thresh": 22.76953125,
"eval_best_f1": 77.82608603123529,
"eval_best_f1_thresh": 22.76953125,
"eval_exact": 42.86195569780173,
"eval_f1": 46.30919897657347,
"eval_runtime": 84.8777,
"eval_samples_per_second": 141.015,
"eval_steps_per_second": 2.215,
"eval_total": 11873,
"step": 6120
},
{
"epoch": 3.186274509803922,
"grad_norm": 16.010583877563477,
"learning_rate": 1.5149237472766886e-05,
"loss": 1.8918,
"step": 6500
},
{
"epoch": 3.431372549019608,
"grad_norm": 17.6584529876709,
"learning_rate": 1.4604575163398694e-05,
"loss": 1.8567,
"step": 7000
},
{
"epoch": 3.6764705882352944,
"grad_norm": 16.007558822631836,
"learning_rate": 1.4059912854030502e-05,
"loss": 1.828,
"step": 7500
},
{
"epoch": 3.9215686274509802,
"grad_norm": 13.604976654052734,
"learning_rate": 1.3515250544662311e-05,
"loss": 1.844,
"step": 8000
},
{
"epoch": 4.0,
"eval_HasAns_exact": 41.497515371009854,
"eval_HasAns_f1": 44.91728050965404,
"eval_HasAns_total": 11873,
"eval_best_exact": 76.1728291080603,
"eval_best_exact_thresh": 23.875,
"eval_best_f1": 79.59259424670454,
"eval_best_f1_thresh": 23.875,
"eval_exact": 41.497515371009854,
"eval_f1": 44.91728050965404,
"eval_runtime": 84.9529,
"eval_samples_per_second": 140.89,
"eval_steps_per_second": 2.213,
"eval_total": 11873,
"step": 8160
},
{
"epoch": 4.166666666666667,
"grad_norm": 19.50208282470703,
"learning_rate": 1.297058823529412e-05,
"loss": 1.6876,
"step": 8500
},
{
"epoch": 4.411764705882353,
"grad_norm": 31.982032775878906,
"learning_rate": 1.2425925925925927e-05,
"loss": 1.626,
"step": 9000
},
{
"epoch": 4.6568627450980395,
"grad_norm": 28.154998779296875,
"learning_rate": 1.1881263616557735e-05,
"loss": 1.6113,
"step": 9500
},
{
"epoch": 4.901960784313726,
"grad_norm": 14.066104888916016,
"learning_rate": 1.1336601307189543e-05,
"loss": 1.6376,
"step": 10000
},
{
"epoch": 5.0,
"eval_HasAns_exact": 41.337488419102165,
"eval_HasAns_f1": 44.79650825632658,
"eval_HasAns_total": 11873,
"eval_best_exact": 76.0549145119178,
"eval_best_exact_thresh": 24.96484375,
"eval_best_f1": 79.51393434914232,
"eval_best_f1_thresh": 24.96484375,
"eval_exact": 41.337488419102165,
"eval_f1": 44.79650825632658,
"eval_runtime": 86.6553,
"eval_samples_per_second": 138.122,
"eval_steps_per_second": 2.17,
"eval_total": 11873,
"step": 10200
},
{
"epoch": 5.147058823529412,
"grad_norm": 10.620172500610352,
"learning_rate": 1.0791938997821352e-05,
"loss": 1.5461,
"step": 10500
},
{
"epoch": 5.392156862745098,
"grad_norm": 8.425840377807617,
"learning_rate": 1.024727668845316e-05,
"loss": 1.4638,
"step": 11000
},
{
"epoch": 5.637254901960784,
"grad_norm": 14.811394691467285,
"learning_rate": 9.702614379084968e-06,
"loss": 1.5059,
"step": 11500
},
{
"epoch": 5.882352941176471,
"grad_norm": 24.639976501464844,
"learning_rate": 9.157952069716776e-06,
"loss": 1.4819,
"step": 12000
},
{
"epoch": 6.0,
"eval_HasAns_exact": 41.48067042870378,
"eval_HasAns_f1": 45.146223596426786,
"eval_HasAns_total": 11873,
"eval_best_exact": 74.41253263707571,
"eval_best_exact_thresh": 26.44140625,
"eval_best_f1": 78.0780858047988,
"eval_best_f1_thresh": 26.44140625,
"eval_exact": 41.48067042870378,
"eval_f1": 45.146223596426786,
"eval_runtime": 85.4707,
"eval_samples_per_second": 140.036,
"eval_steps_per_second": 2.2,
"eval_total": 11873,
"step": 12240
},
{
"epoch": 6.127450980392156,
"grad_norm": 15.370038032531738,
"learning_rate": 8.613289760348584e-06,
"loss": 1.4085,
"step": 12500
},
{
"epoch": 6.372549019607844,
"grad_norm": 21.215662002563477,
"learning_rate": 8.069716775599129e-06,
"loss": 1.3618,
"step": 13000
},
{
"epoch": 6.617647058823529,
"grad_norm": 21.488882064819336,
"learning_rate": 7.5250544662309376e-06,
"loss": 1.3808,
"step": 13500
},
{
"epoch": 6.862745098039216,
"grad_norm": 20.903297424316406,
"learning_rate": 6.9803921568627454e-06,
"loss": 1.3846,
"step": 14000
},
{
"epoch": 7.0,
"eval_HasAns_exact": 40.59631095763497,
"eval_HasAns_f1": 44.0313300086635,
"eval_HasAns_total": 11873,
"eval_best_exact": 78.64061315589994,
"eval_best_exact_thresh": 26.69140625,
"eval_best_f1": 82.07563220692849,
"eval_best_f1_thresh": 26.69140625,
"eval_exact": 40.59631095763497,
"eval_f1": 44.0313300086635,
"eval_runtime": 88.9313,
"eval_samples_per_second": 134.587,
"eval_steps_per_second": 2.114,
"eval_total": 11873,
"step": 14280
},
{
"epoch": 7.107843137254902,
"grad_norm": 37.26835250854492,
"learning_rate": 6.435729847494554e-06,
"loss": 1.3405,
"step": 14500
},
{
"epoch": 7.352941176470588,
"grad_norm": 15.88136100769043,
"learning_rate": 5.891067538126363e-06,
"loss": 1.2833,
"step": 15000
},
{
"epoch": 7.598039215686274,
"grad_norm": 18.780271530151367,
"learning_rate": 5.34640522875817e-06,
"loss": 1.2905,
"step": 15500
},
{
"epoch": 7.8431372549019605,
"grad_norm": 9.158843040466309,
"learning_rate": 4.801742919389979e-06,
"loss": 1.3058,
"step": 16000
},
{
"epoch": 8.0,
"eval_HasAns_exact": 40.32679188073781,
"eval_HasAns_f1": 44.093437026674906,
"eval_HasAns_total": 11873,
"eval_best_exact": 77.43619978101574,
"eval_best_exact_thresh": 27.0859375,
"eval_best_f1": 81.20284492695288,
"eval_best_f1_thresh": 27.0859375,
"eval_exact": 40.32679188073781,
"eval_f1": 44.093437026674906,
"eval_runtime": 85.3184,
"eval_samples_per_second": 140.286,
"eval_steps_per_second": 2.204,
"eval_total": 11873,
"step": 16320
},
{
"epoch": 8.088235294117647,
"grad_norm": 27.426488876342773,
"learning_rate": 4.2570806100217874e-06,
"loss": 1.2616,
"step": 16500
},
{
"epoch": 8.333333333333334,
"grad_norm": 21.802486419677734,
"learning_rate": 3.7135076252723314e-06,
"loss": 1.2336,
"step": 17000
},
{
"epoch": 8.57843137254902,
"grad_norm": 24.72883415222168,
"learning_rate": 3.1699346405228758e-06,
"loss": 1.2427,
"step": 17500
},
{
"epoch": 8.823529411764707,
"grad_norm": 23.679203033447266,
"learning_rate": 2.625272331154684e-06,
"loss": 1.2367,
"step": 18000
},
{
"epoch": 9.0,
"eval_HasAns_exact": 40.25941211151352,
"eval_HasAns_f1": 43.84815270396611,
"eval_HasAns_total": 11873,
"eval_best_exact": 77.7730986271372,
"eval_best_exact_thresh": 28.12890625,
"eval_best_f1": 81.36183921958985,
"eval_best_f1_thresh": 28.12890625,
"eval_exact": 40.25941211151352,
"eval_f1": 43.84815270396611,
"eval_runtime": 85.5334,
"eval_samples_per_second": 139.934,
"eval_steps_per_second": 2.198,
"eval_total": 11873,
"step": 18360
},
{
"epoch": 9.068627450980392,
"grad_norm": 6.289318561553955,
"learning_rate": 2.0806100217864924e-06,
"loss": 1.202,
"step": 18500
},
{
"epoch": 9.313725490196079,
"grad_norm": 13.79410457611084,
"learning_rate": 1.535947712418301e-06,
"loss": 1.1927,
"step": 19000
},
{
"epoch": 9.558823529411764,
"grad_norm": 10.254155158996582,
"learning_rate": 9.91285403050109e-07,
"loss": 1.1843,
"step": 19500
},
{
"epoch": 9.803921568627452,
"grad_norm": 8.800715446472168,
"learning_rate": 4.466230936819173e-07,
"loss": 1.1861,
"step": 20000
},
{
"epoch": 10.0,
"eval_HasAns_exact": 40.50366377495157,
"eval_HasAns_f1": 44.072135007775564,
"eval_HasAns_total": 11873,
"eval_best_exact": 77.4025098964036,
"eval_best_exact_thresh": 28.15625,
"eval_best_f1": 80.97098112922765,
"eval_best_f1_thresh": 28.15625,
"eval_exact": 40.50366377495157,
"eval_f1": 44.072135007775564,
"eval_runtime": 85.0565,
"eval_samples_per_second": 140.718,
"eval_steps_per_second": 2.21,
"eval_total": 11873,
"step": 20400
},
{
"epoch": 10.0,
"step": 20400,
"total_flos": 3.4100627236540416e+17,
"train_loss": 2.000769229963714,
"train_runtime": 15111.8581,
"train_samples_per_second": 86.358,
"train_steps_per_second": 1.35
}
],
"logging_steps": 500,
"max_steps": 20400,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.4100627236540416e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}