Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9291941eb7434899dfe63259f2324c25c686bd1b70cfee2b0e6d71ef2e4fa28
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ccbbc979dbcb855093c1022e67f8d8f2e7a8958695720a3fa7867d848fe8418
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ec16e554db031fc41660fe2ed019398ca6cade0eade423a47c8114cf75ca54f
 size 198011700

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b8ed08c3f666cd1c5ada4049417329565e12366158b85fa2a3651cffeda4c35
 size 198011700

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63161f04815e6b18c4138379dc13b055cbf4d2901c516663d4b27628238cae41
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cab5fa0301c82455b63eeff640d32c81a2bfdff2b469e31742e154db49ae5dba
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7755893468856812,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.005317363518233572,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 23.523,
       "eval_steps_per_second": 5.881,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.2946712002966323e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7737080454826355,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.006646704397791965,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.523,
       "eval_steps_per_second": 5.881,
       "step": 400
+    },
+    {
+      "epoch": 0.005330656927029156,
+      "grad_norm": 0.40012648701667786,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 0.9604,
+      "step": 401
+    },
+    {
+      "epoch": 0.00534395033582474,
+      "grad_norm": 0.5858917832374573,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 0.8581,
+      "step": 402
+    },
+    {
+      "epoch": 0.005357243744620324,
+      "grad_norm": 0.5397459864616394,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 0.7486,
+      "step": 403
+    },
+    {
+      "epoch": 0.0053705371534159075,
+      "grad_norm": 0.5918818116188049,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6034,
+      "step": 404
+    },
+    {
+      "epoch": 0.005383830562211492,
+      "grad_norm": 0.6741821765899658,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 0.7264,
+      "step": 405
+    },
+    {
+      "epoch": 0.005397123971007075,
+      "grad_norm": 0.6818976402282715,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 0.7604,
+      "step": 406
+    },
+    {
+      "epoch": 0.00541041737980266,
+      "grad_norm": 0.5880439281463623,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 0.8363,
+      "step": 407
+    },
+    {
+      "epoch": 0.005423710788598243,
+      "grad_norm": 0.5962891578674316,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 0.8284,
+      "step": 408
+    },
+    {
+      "epoch": 0.0054370041973938275,
+      "grad_norm": 0.6675729751586914,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 0.8829,
+      "step": 409
+    },
+    {
+      "epoch": 0.005450297606189411,
+      "grad_norm": 0.6426963210105896,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.8203,
+      "step": 410
+    },
+    {
+      "epoch": 0.005463591014984995,
+      "grad_norm": 0.6224886178970337,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 0.8401,
+      "step": 411
+    },
+    {
+      "epoch": 0.005476884423780579,
+      "grad_norm": 0.7933815717697144,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.9562,
+      "step": 412
+    },
+    {
+      "epoch": 0.005490177832576163,
+      "grad_norm": 0.6306743621826172,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 0.8244,
+      "step": 413
+    },
+    {
+      "epoch": 0.005503471241371747,
+      "grad_norm": 0.6678284406661987,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 0.9218,
+      "step": 414
+    },
+    {
+      "epoch": 0.005516764650167331,
+      "grad_norm": 0.7518947720527649,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 0.8463,
+      "step": 415
+    },
+    {
+      "epoch": 0.0055300580589629144,
+      "grad_norm": 0.7990968227386475,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.7836,
+      "step": 416
+    },
+    {
+      "epoch": 0.005543351467758499,
+      "grad_norm": 0.7595654129981995,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 0.784,
+      "step": 417
+    },
+    {
+      "epoch": 0.005556644876554082,
+      "grad_norm": 0.7038994431495667,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 0.799,
+      "step": 418
+    },
+    {
+      "epoch": 0.005569938285349667,
+      "grad_norm": 0.8161758780479431,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.7753,
+      "step": 419
+    },
+    {
+      "epoch": 0.00558323169414525,
+      "grad_norm": 0.7890415191650391,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.8616,
+      "step": 420
+    },
+    {
+      "epoch": 0.0055965251029408344,
+      "grad_norm": 0.7748125195503235,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 0.8245,
+      "step": 421
+    },
+    {
+      "epoch": 0.005609818511736418,
+      "grad_norm": 0.7969025373458862,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.8565,
+      "step": 422
+    },
+    {
+      "epoch": 0.005623111920532002,
+      "grad_norm": 0.8649901151657104,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.7626,
+      "step": 423
+    },
+    {
+      "epoch": 0.005636405329327586,
+      "grad_norm": 0.9874373078346252,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.8389,
+      "step": 424
+    },
+    {
+      "epoch": 0.00564969873812317,
+      "grad_norm": 1.0835766792297363,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 0.9118,
+      "step": 425
+    },
+    {
+      "epoch": 0.005662992146918754,
+      "grad_norm": 0.9118421673774719,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 0.7834,
+      "step": 426
+    },
+    {
+      "epoch": 0.005676285555714338,
+      "grad_norm": 0.8470845222473145,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 0.9175,
+      "step": 427
+    },
+    {
+      "epoch": 0.005689578964509922,
+      "grad_norm": 0.9073331952095032,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.6926,
+      "step": 428
+    },
+    {
+      "epoch": 0.005702872373305506,
+      "grad_norm": 0.8814385533332825,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.7417,
+      "step": 429
+    },
+    {
+      "epoch": 0.00571616578210109,
+      "grad_norm": 0.8356123566627502,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.7357,
+      "step": 430
+    },
+    {
+      "epoch": 0.005729459190896674,
+      "grad_norm": 0.9383419156074524,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.7568,
+      "step": 431
+    },
+    {
+      "epoch": 0.005742752599692258,
+      "grad_norm": 1.0396068096160889,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 0.8964,
+      "step": 432
+    },
+    {
+      "epoch": 0.005756046008487841,
+      "grad_norm": 0.943768322467804,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 0.8078,
+      "step": 433
+    },
+    {
+      "epoch": 0.005769339417283426,
+      "grad_norm": 1.0417028665542603,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.7028,
+      "step": 434
+    },
+    {
+      "epoch": 0.005782632826079009,
+      "grad_norm": 0.9930217862129211,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 0.7532,
+      "step": 435
+    },
+    {
+      "epoch": 0.005795926234874594,
+      "grad_norm": 0.96372389793396,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.778,
+      "step": 436
+    },
+    {
+      "epoch": 0.005809219643670177,
+      "grad_norm": 0.9927554726600647,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.7152,
+      "step": 437
+    },
+    {
+      "epoch": 0.005822513052465761,
+      "grad_norm": 1.0342813730239868,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 0.6377,
+      "step": 438
+    },
+    {
+      "epoch": 0.005835806461261345,
+      "grad_norm": 1.0414509773254395,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 0.6376,
+      "step": 439
+    },
+    {
+      "epoch": 0.005849099870056929,
+      "grad_norm": 1.107492208480835,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.6948,
+      "step": 440
+    },
+    {
+      "epoch": 0.005862393278852513,
+      "grad_norm": 1.1170679330825806,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 0.7437,
+      "step": 441
+    },
+    {
+      "epoch": 0.005875686687648097,
+      "grad_norm": 1.1103891134262085,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 0.7499,
+      "step": 442
+    },
+    {
+      "epoch": 0.0058889800964436806,
+      "grad_norm": 1.24330735206604,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 0.6334,
+      "step": 443
+    },
+    {
+      "epoch": 0.005902273505239265,
+      "grad_norm": 1.3449854850769043,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.7968,
+      "step": 444
+    },
+    {
+      "epoch": 0.005915566914034848,
+      "grad_norm": 1.3204176425933838,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 0.6635,
+      "step": 445
+    },
+    {
+      "epoch": 0.005928860322830433,
+      "grad_norm": 1.2414649724960327,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.7295,
+      "step": 446
+    },
+    {
+      "epoch": 0.005942153731626016,
+      "grad_norm": 1.5263049602508545,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 0.7899,
+      "step": 447
+    },
+    {
+      "epoch": 0.0059554471404216006,
+      "grad_norm": 1.4617761373519897,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 0.9122,
+      "step": 448
+    },
+    {
+      "epoch": 0.005968740549217184,
+      "grad_norm": 1.6838090419769287,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 0.64,
+      "step": 449
+    },
+    {
+      "epoch": 0.005982033958012768,
+      "grad_norm": 2.6550161838531494,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.8126,
+      "step": 450
+    },
+    {
+      "epoch": 0.005995327366808352,
+      "grad_norm": 0.37697362899780273,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.8715,
+      "step": 451
+    },
+    {
+      "epoch": 0.006008620775603936,
+      "grad_norm": 0.5114825963973999,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.9097,
+      "step": 452
+    },
+    {
+      "epoch": 0.00602191418439952,
+      "grad_norm": 0.9041467905044556,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 0.7524,
+      "step": 453
+    },
+    {
+      "epoch": 0.006035207593195104,
+      "grad_norm": 0.6717054843902588,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.0211,
+      "step": 454
+    },
+    {
+      "epoch": 0.006048501001990688,
+      "grad_norm": 0.7875092029571533,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 0.9721,
+      "step": 455
+    },
+    {
+      "epoch": 0.006061794410786272,
+      "grad_norm": 0.5727041959762573,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 0.9011,
+      "step": 456
+    },
+    {
+      "epoch": 0.006075087819581856,
+      "grad_norm": 0.7041869163513184,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 0.7107,
+      "step": 457
+    },
+    {
+      "epoch": 0.00608838122837744,
+      "grad_norm": 0.6156165599822998,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 0.9564,
+      "step": 458
+    },
+    {
+      "epoch": 0.006101674637173024,
+      "grad_norm": 0.7087550759315491,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 0.7426,
+      "step": 459
+    },
+    {
+      "epoch": 0.0061149680459686075,
+      "grad_norm": 0.6475067138671875,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.9619,
+      "step": 460
+    },
+    {
+      "epoch": 0.006128261454764192,
+      "grad_norm": 0.6087029576301575,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 0.7861,
+      "step": 461
+    },
+    {
+      "epoch": 0.006141554863559775,
+      "grad_norm": 0.7449685335159302,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 0.8359,
+      "step": 462
+    },
+    {
+      "epoch": 0.00615484827235536,
+      "grad_norm": 0.6803917288780212,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 0.8678,
+      "step": 463
+    },
+    {
+      "epoch": 0.006168141681150943,
+      "grad_norm": 0.7641089558601379,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.7741,
+      "step": 464
+    },
+    {
+      "epoch": 0.0061814350899465275,
+      "grad_norm": 0.6775990128517151,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 0.8723,
+      "step": 465
+    },
+    {
+      "epoch": 0.006194728498742111,
+      "grad_norm": 0.7013960480690002,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.773,
+      "step": 466
+    },
+    {
+      "epoch": 0.006208021907537695,
+      "grad_norm": 0.776786208152771,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.8559,
+      "step": 467
+    },
+    {
+      "epoch": 0.006221315316333279,
+      "grad_norm": 0.7775864601135254,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.9041,
+      "step": 468
+    },
+    {
+      "epoch": 0.006234608725128863,
+      "grad_norm": 0.7506851553916931,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.8435,
+      "step": 469
+    },
+    {
+      "epoch": 0.006247902133924447,
+      "grad_norm": 0.7386621236801147,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.7473,
+      "step": 470
+    },
+    {
+      "epoch": 0.006261195542720031,
+      "grad_norm": 0.7993509769439697,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 0.7489,
+      "step": 471
+    },
+    {
+      "epoch": 0.0062744889515156145,
+      "grad_norm": 0.7552006840705872,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.7127,
+      "step": 472
+    },
+    {
+      "epoch": 0.006287782360311199,
+      "grad_norm": 0.7693676352500916,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.5929,
+      "step": 473
+    },
+    {
+      "epoch": 0.006301075769106782,
+      "grad_norm": 0.815386950969696,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.7111,
+      "step": 474
+    },
+    {
+      "epoch": 0.006314369177902367,
+      "grad_norm": 0.8880845904350281,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 0.8529,
+      "step": 475
+    },
+    {
+      "epoch": 0.00632766258669795,
+      "grad_norm": 0.8343062400817871,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.757,
+      "step": 476
+    },
+    {
+      "epoch": 0.0063409559954935345,
+      "grad_norm": 0.8290151357650757,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.7969,
+      "step": 477
+    },
+    {
+      "epoch": 0.006354249404289118,
+      "grad_norm": 0.897857129573822,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 0.8043,
+      "step": 478
+    },
+    {
+      "epoch": 0.006367542813084702,
+      "grad_norm": 0.8593537211418152,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 0.7659,
+      "step": 479
+    },
+    {
+      "epoch": 0.006380836221880286,
+      "grad_norm": 0.9188363552093506,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.8651,
+      "step": 480
+    },
+    {
+      "epoch": 0.00639412963067587,
+      "grad_norm": 0.9337843060493469,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.6921,
+      "step": 481
+    },
+    {
+      "epoch": 0.006407423039471454,
+      "grad_norm": 1.0456016063690186,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.9062,
+      "step": 482
+    },
+    {
+      "epoch": 0.006420716448267038,
+      "grad_norm": 0.7936729788780212,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.6508,
+      "step": 483
+    },
+    {
+      "epoch": 0.006434009857062622,
+      "grad_norm": 1.1638076305389404,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.8091,
+      "step": 484
+    },
+    {
+      "epoch": 0.006447303265858206,
+      "grad_norm": 1.103006362915039,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 0.7818,
+      "step": 485
+    },
+    {
+      "epoch": 0.00646059667465379,
+      "grad_norm": 0.9680044054985046,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 0.8071,
+      "step": 486
+    },
+    {
+      "epoch": 0.006473890083449374,
+      "grad_norm": 0.9734976887702942,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 0.7186,
+      "step": 487
+    },
+    {
+      "epoch": 0.006487183492244958,
+      "grad_norm": 1.012603521347046,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.8422,
+      "step": 488
+    },
+    {
+      "epoch": 0.0065004769010405415,
+      "grad_norm": 1.0326621532440186,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 0.8658,
+      "step": 489
+    },
+    {
+      "epoch": 0.006513770309836126,
+      "grad_norm": 1.2042450904846191,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.0374,
+      "step": 490
+    },
+    {
+      "epoch": 0.006527063718631709,
+      "grad_norm": 1.1898421049118042,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 0.7595,
+      "step": 491
+    },
+    {
+      "epoch": 0.006540357127427294,
+      "grad_norm": 1.2077451944351196,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.7744,
+      "step": 492
+    },
+    {
+      "epoch": 0.006553650536222877,
+      "grad_norm": 1.251529335975647,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 0.5662,
+      "step": 493
+    },
+    {
+      "epoch": 0.0065669439450184615,
+      "grad_norm": 1.371506690979004,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.8208,
+      "step": 494
+    },
+    {
+      "epoch": 0.006580237353814045,
+      "grad_norm": 1.296230435371399,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 0.8259,
+      "step": 495
+    },
+    {
+      "epoch": 0.006593530762609629,
+      "grad_norm": 1.438158631324768,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 0.7091,
+      "step": 496
+    },
+    {
+      "epoch": 0.006606824171405213,
+      "grad_norm": 1.3646377325057983,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 0.934,
+      "step": 497
+    },
+    {
+      "epoch": 0.006620117580200797,
+      "grad_norm": 1.6082695722579956,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 0.8442,
+      "step": 498
+    },
+    {
+      "epoch": 0.006633410988996381,
+      "grad_norm": 1.5005607604980469,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 0.6807,
+      "step": 499
+    },
+    {
+      "epoch": 0.006646704397791965,
+      "grad_norm": 2.516221284866333,
+      "learning_rate": 0.0,
+      "loss": 0.7107,
+      "step": 500
+    },
+    {
+      "epoch": 0.006646704397791965,
+      "eval_loss": 0.7737080454826355,
+      "eval_runtime": 5387.8964,
+      "eval_samples_per_second": 23.515,
+      "eval_steps_per_second": 5.879,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8679806819447603e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null