top_5_ranking_stackexchange / trainer_state.json
sedrickkeh's picture
End of training
608c49d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9962546816479403,
"eval_steps": 500,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.049937578027465665,
"grad_norm": 29.759422123901825,
"learning_rate": 5e-06,
"loss": 1.0298,
"step": 10
},
{
"epoch": 0.09987515605493133,
"grad_norm": 1.659620518169597,
"learning_rate": 5e-06,
"loss": 0.9418,
"step": 20
},
{
"epoch": 0.149812734082397,
"grad_norm": 1.6655817038532692,
"learning_rate": 5e-06,
"loss": 0.9067,
"step": 30
},
{
"epoch": 0.19975031210986266,
"grad_norm": 3.758532618440977,
"learning_rate": 5e-06,
"loss": 0.8837,
"step": 40
},
{
"epoch": 0.24968789013732834,
"grad_norm": 1.0914089528556838,
"learning_rate": 5e-06,
"loss": 0.8683,
"step": 50
},
{
"epoch": 0.299625468164794,
"grad_norm": 0.8217188574851603,
"learning_rate": 5e-06,
"loss": 0.8554,
"step": 60
},
{
"epoch": 0.3495630461922597,
"grad_norm": 2.6152137256768913,
"learning_rate": 5e-06,
"loss": 0.8501,
"step": 70
},
{
"epoch": 0.3995006242197253,
"grad_norm": 0.6790645381435163,
"learning_rate": 5e-06,
"loss": 0.8376,
"step": 80
},
{
"epoch": 0.449438202247191,
"grad_norm": 1.050547891408586,
"learning_rate": 5e-06,
"loss": 0.8389,
"step": 90
},
{
"epoch": 0.4993757802746567,
"grad_norm": 0.61531936130051,
"learning_rate": 5e-06,
"loss": 0.8283,
"step": 100
},
{
"epoch": 0.5493133583021224,
"grad_norm": 0.7158756598167714,
"learning_rate": 5e-06,
"loss": 0.8285,
"step": 110
},
{
"epoch": 0.599250936329588,
"grad_norm": 0.6741740850175894,
"learning_rate": 5e-06,
"loss": 0.8208,
"step": 120
},
{
"epoch": 0.6491885143570537,
"grad_norm": 0.727395321147856,
"learning_rate": 5e-06,
"loss": 0.8226,
"step": 130
},
{
"epoch": 0.6991260923845194,
"grad_norm": 0.5979029940330495,
"learning_rate": 5e-06,
"loss": 0.8218,
"step": 140
},
{
"epoch": 0.7490636704119851,
"grad_norm": 0.7354628133399658,
"learning_rate": 5e-06,
"loss": 0.8146,
"step": 150
},
{
"epoch": 0.7990012484394506,
"grad_norm": 1.0797869029852383,
"learning_rate": 5e-06,
"loss": 0.811,
"step": 160
},
{
"epoch": 0.8489388264669163,
"grad_norm": 0.5732338176348897,
"learning_rate": 5e-06,
"loss": 0.8117,
"step": 170
},
{
"epoch": 0.898876404494382,
"grad_norm": 0.516647597175339,
"learning_rate": 5e-06,
"loss": 0.8099,
"step": 180
},
{
"epoch": 0.9488139825218477,
"grad_norm": 0.595235164677505,
"learning_rate": 5e-06,
"loss": 0.8062,
"step": 190
},
{
"epoch": 0.9987515605493134,
"grad_norm": 0.5491264032653016,
"learning_rate": 5e-06,
"loss": 0.7992,
"step": 200
},
{
"epoch": 0.9987515605493134,
"eval_loss": 0.8015628457069397,
"eval_runtime": 212.5855,
"eval_samples_per_second": 25.387,
"eval_steps_per_second": 0.4,
"step": 200
},
{
"epoch": 1.048689138576779,
"grad_norm": 1.1815779977106664,
"learning_rate": 5e-06,
"loss": 0.8245,
"step": 210
},
{
"epoch": 1.0986267166042447,
"grad_norm": 0.8607004375758024,
"learning_rate": 5e-06,
"loss": 0.7537,
"step": 220
},
{
"epoch": 1.1485642946317103,
"grad_norm": 0.6382531480247193,
"learning_rate": 5e-06,
"loss": 0.7617,
"step": 230
},
{
"epoch": 1.198501872659176,
"grad_norm": 0.5848998545511357,
"learning_rate": 5e-06,
"loss": 0.7635,
"step": 240
},
{
"epoch": 1.2484394506866416,
"grad_norm": 0.6856479277932508,
"learning_rate": 5e-06,
"loss": 0.7623,
"step": 250
},
{
"epoch": 1.2983770287141074,
"grad_norm": 0.7819524787327043,
"learning_rate": 5e-06,
"loss": 0.7567,
"step": 260
},
{
"epoch": 1.348314606741573,
"grad_norm": 0.6831313099201878,
"learning_rate": 5e-06,
"loss": 0.7606,
"step": 270
},
{
"epoch": 1.3982521847690386,
"grad_norm": 0.6700856388131974,
"learning_rate": 5e-06,
"loss": 0.7553,
"step": 280
},
{
"epoch": 1.4481897627965044,
"grad_norm": 0.5874295240823044,
"learning_rate": 5e-06,
"loss": 0.7575,
"step": 290
},
{
"epoch": 1.4981273408239701,
"grad_norm": 0.6100148315517313,
"learning_rate": 5e-06,
"loss": 0.7523,
"step": 300
},
{
"epoch": 1.5480649188514357,
"grad_norm": 0.6291672713518774,
"learning_rate": 5e-06,
"loss": 0.759,
"step": 310
},
{
"epoch": 1.5980024968789013,
"grad_norm": 0.7275448418797654,
"learning_rate": 5e-06,
"loss": 0.7532,
"step": 320
},
{
"epoch": 1.647940074906367,
"grad_norm": 0.6798292981346045,
"learning_rate": 5e-06,
"loss": 0.7652,
"step": 330
},
{
"epoch": 1.6978776529338329,
"grad_norm": 0.7320780258400261,
"learning_rate": 5e-06,
"loss": 0.7554,
"step": 340
},
{
"epoch": 1.7478152309612984,
"grad_norm": 0.6107676047027211,
"learning_rate": 5e-06,
"loss": 0.757,
"step": 350
},
{
"epoch": 1.797752808988764,
"grad_norm": 0.5083613384732135,
"learning_rate": 5e-06,
"loss": 0.7576,
"step": 360
},
{
"epoch": 1.8476903870162298,
"grad_norm": 0.5021025632111004,
"learning_rate": 5e-06,
"loss": 0.7584,
"step": 370
},
{
"epoch": 1.8976279650436954,
"grad_norm": 0.6593302140861815,
"learning_rate": 5e-06,
"loss": 0.755,
"step": 380
},
{
"epoch": 1.947565543071161,
"grad_norm": 0.5669868340257436,
"learning_rate": 5e-06,
"loss": 0.7478,
"step": 390
},
{
"epoch": 1.9975031210986267,
"grad_norm": 0.5745040341281294,
"learning_rate": 5e-06,
"loss": 0.7538,
"step": 400
},
{
"epoch": 1.9975031210986267,
"eval_loss": 0.7874204516410828,
"eval_runtime": 212.6782,
"eval_samples_per_second": 25.376,
"eval_steps_per_second": 0.4,
"step": 400
},
{
"epoch": 2.0474406991260925,
"grad_norm": 0.6952347883899184,
"learning_rate": 5e-06,
"loss": 0.7611,
"step": 410
},
{
"epoch": 2.097378277153558,
"grad_norm": 0.6000479994459602,
"learning_rate": 5e-06,
"loss": 0.7037,
"step": 420
},
{
"epoch": 2.1473158551810236,
"grad_norm": 0.6575266096005482,
"learning_rate": 5e-06,
"loss": 0.7089,
"step": 430
},
{
"epoch": 2.1972534332084894,
"grad_norm": 0.7384159721059136,
"learning_rate": 5e-06,
"loss": 0.7057,
"step": 440
},
{
"epoch": 2.247191011235955,
"grad_norm": 0.6735840214535883,
"learning_rate": 5e-06,
"loss": 0.706,
"step": 450
},
{
"epoch": 2.2971285892634206,
"grad_norm": 0.7153617513297972,
"learning_rate": 5e-06,
"loss": 0.7064,
"step": 460
},
{
"epoch": 2.3470661672908864,
"grad_norm": 0.7396133098853745,
"learning_rate": 5e-06,
"loss": 0.7049,
"step": 470
},
{
"epoch": 2.397003745318352,
"grad_norm": 0.6440383221784979,
"learning_rate": 5e-06,
"loss": 0.705,
"step": 480
},
{
"epoch": 2.4469413233458175,
"grad_norm": 0.5481603423583875,
"learning_rate": 5e-06,
"loss": 0.709,
"step": 490
},
{
"epoch": 2.4968789013732833,
"grad_norm": 0.6611697985224058,
"learning_rate": 5e-06,
"loss": 0.71,
"step": 500
},
{
"epoch": 2.546816479400749,
"grad_norm": 0.6252639550455323,
"learning_rate": 5e-06,
"loss": 0.7128,
"step": 510
},
{
"epoch": 2.596754057428215,
"grad_norm": 0.578764019014536,
"learning_rate": 5e-06,
"loss": 0.7116,
"step": 520
},
{
"epoch": 2.6466916354556806,
"grad_norm": 0.5718219886250622,
"learning_rate": 5e-06,
"loss": 0.711,
"step": 530
},
{
"epoch": 2.696629213483146,
"grad_norm": 0.6480951015929783,
"learning_rate": 5e-06,
"loss": 0.706,
"step": 540
},
{
"epoch": 2.746566791510612,
"grad_norm": 0.568128147930456,
"learning_rate": 5e-06,
"loss": 0.7144,
"step": 550
},
{
"epoch": 2.796504369538077,
"grad_norm": 0.7016907742592169,
"learning_rate": 5e-06,
"loss": 0.708,
"step": 560
},
{
"epoch": 2.846441947565543,
"grad_norm": 0.6718047517989062,
"learning_rate": 5e-06,
"loss": 0.7147,
"step": 570
},
{
"epoch": 2.8963795255930087,
"grad_norm": 0.7869266984488797,
"learning_rate": 5e-06,
"loss": 0.7114,
"step": 580
},
{
"epoch": 2.9463171036204745,
"grad_norm": 0.6070316014377024,
"learning_rate": 5e-06,
"loss": 0.7072,
"step": 590
},
{
"epoch": 2.9962546816479403,
"grad_norm": 0.5662761836861052,
"learning_rate": 5e-06,
"loss": 0.7158,
"step": 600
},
{
"epoch": 2.9962546816479403,
"eval_loss": 0.7859531044960022,
"eval_runtime": 213.866,
"eval_samples_per_second": 25.235,
"eval_steps_per_second": 0.397,
"step": 600
},
{
"epoch": 2.9962546816479403,
"step": 600,
"total_flos": 1004812967608320.0,
"train_loss": 0.773849273522695,
"train_runtime": 35442.7976,
"train_samples_per_second": 8.678,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 600,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1004812967608320.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}