Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84afea606d9006fe7973537b577d8365ff5cbb4e424056c46103ab3aa396ef24
 size 226530600

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcdbbe04b8a2742136d08df3160b7d141ac9f4d5eb61da9d82c8140d8d3cbd80
 size 226530600

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:732743f72395ec038a3feda70faa7db79c4deeb3aa4c460891d0629c5bb7bd58
 size 115354708

 version https://git-lfs.github.com/spec/v1
+oid sha256:b30a1bcd96c70281148f85233374c2d75344dde3f6a793ac1f187ecab2e59e1c
 size 115354708

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3ce13de4b5b2537e73b71dc2286e6f990f37876ee3171f981c1ab665e510069
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:55322e9e5517d21284e7b7e8cf29df52add4c691fc21c89f243dfcd6558cc0e4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01c5525f1d8420ca8a81a7fd2ec397a508131d03210dfd36c7ac5758b0e6313b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e88ef6a2716260516e17223973d6a3b0a4c88bf12c72ed47e80e6f2a6782fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6108973026275635,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.011298158400180771,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 24.115,
       "eval_steps_per_second": 6.031,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.01010030723072e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5817376375198364,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.015064211200241028,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 24.115,
       "eval_steps_per_second": 6.031,
       "step": 150
+    },
+    {
+      "epoch": 0.011373479456181976,
+      "grad_norm": 0.649183988571167,
+      "learning_rate": 2.589263157894737e-05,
+      "loss": 1.611,
+      "step": 151
+    },
+    {
+      "epoch": 0.011448800512183182,
+      "grad_norm": 0.7302289605140686,
+      "learning_rate": 2.536421052631579e-05,
+      "loss": 1.6365,
+      "step": 152
+    },
+    {
+      "epoch": 0.011524121568184387,
+      "grad_norm": 0.6303717494010925,
+      "learning_rate": 2.483578947368421e-05,
+      "loss": 1.6841,
+      "step": 153
+    },
+    {
+      "epoch": 0.011599442624185592,
+      "grad_norm": 0.617495596408844,
+      "learning_rate": 2.430736842105263e-05,
+      "loss": 1.7066,
+      "step": 154
+    },
+    {
+      "epoch": 0.011674763680186797,
+      "grad_norm": 0.5921853184700012,
+      "learning_rate": 2.3778947368421052e-05,
+      "loss": 1.6247,
+      "step": 155
+    },
+    {
+      "epoch": 0.011750084736188002,
+      "grad_norm": 0.5351122617721558,
+      "learning_rate": 2.3250526315789473e-05,
+      "loss": 1.532,
+      "step": 156
+    },
+    {
+      "epoch": 0.011825405792189207,
+      "grad_norm": 0.7010920643806458,
+      "learning_rate": 2.2722105263157894e-05,
+      "loss": 1.4932,
+      "step": 157
+    },
+    {
+      "epoch": 0.011900726848190412,
+      "grad_norm": 0.5989274382591248,
+      "learning_rate": 2.2193684210526316e-05,
+      "loss": 1.5778,
+      "step": 158
+    },
+    {
+      "epoch": 0.011976047904191617,
+      "grad_norm": 1.2539459466934204,
+      "learning_rate": 2.1665263157894737e-05,
+      "loss": 1.6146,
+      "step": 159
+    },
+    {
+      "epoch": 0.012051368960192823,
+      "grad_norm": 0.5905941724777222,
+      "learning_rate": 2.1136842105263158e-05,
+      "loss": 1.528,
+      "step": 160
+    },
+    {
+      "epoch": 0.012126690016194028,
+      "grad_norm": 0.6011009812355042,
+      "learning_rate": 2.060842105263158e-05,
+      "loss": 1.5958,
+      "step": 161
+    },
+    {
+      "epoch": 0.012202011072195233,
+      "grad_norm": 0.5911026000976562,
+      "learning_rate": 2.008e-05,
+      "loss": 1.5631,
+      "step": 162
+    },
+    {
+      "epoch": 0.012277332128196438,
+      "grad_norm": 0.6240259408950806,
+      "learning_rate": 1.9551578947368422e-05,
+      "loss": 1.5768,
+      "step": 163
+    },
+    {
+      "epoch": 0.012352653184197643,
+      "grad_norm": 0.614745557308197,
+      "learning_rate": 1.9023157894736843e-05,
+      "loss": 1.6111,
+      "step": 164
+    },
+    {
+      "epoch": 0.012427974240198848,
+      "grad_norm": 0.6425798535346985,
+      "learning_rate": 1.849473684210526e-05,
+      "loss": 1.7052,
+      "step": 165
+    },
+    {
+      "epoch": 0.012503295296200053,
+      "grad_norm": 0.6402067542076111,
+      "learning_rate": 1.7966315789473686e-05,
+      "loss": 1.5539,
+      "step": 166
+    },
+    {
+      "epoch": 0.012578616352201259,
+      "grad_norm": 0.6447589993476868,
+      "learning_rate": 1.7437894736842107e-05,
+      "loss": 1.5884,
+      "step": 167
+    },
+    {
+      "epoch": 0.012653937408202464,
+      "grad_norm": 0.6616122722625732,
+      "learning_rate": 1.6909473684210525e-05,
+      "loss": 1.4832,
+      "step": 168
+    },
+    {
+      "epoch": 0.012729258464203669,
+      "grad_norm": 0.7140440940856934,
+      "learning_rate": 1.638105263157895e-05,
+      "loss": 1.6539,
+      "step": 169
+    },
+    {
+      "epoch": 0.012804579520204874,
+      "grad_norm": 0.6964972019195557,
+      "learning_rate": 1.5852631578947368e-05,
+      "loss": 1.6509,
+      "step": 170
+    },
+    {
+      "epoch": 0.012879900576206079,
+      "grad_norm": 0.6502802968025208,
+      "learning_rate": 1.532421052631579e-05,
+      "loss": 1.498,
+      "step": 171
+    },
+    {
+      "epoch": 0.012955221632207284,
+      "grad_norm": 0.682644784450531,
+      "learning_rate": 1.4795789473684209e-05,
+      "loss": 1.5135,
+      "step": 172
+    },
+    {
+      "epoch": 0.01303054268820849,
+      "grad_norm": 0.6974450945854187,
+      "learning_rate": 1.4267368421052632e-05,
+      "loss": 1.5326,
+      "step": 173
+    },
+    {
+      "epoch": 0.013105863744209694,
+      "grad_norm": 0.7496076822280884,
+      "learning_rate": 1.3738947368421053e-05,
+      "loss": 1.5569,
+      "step": 174
+    },
+    {
+      "epoch": 0.0131811848002109,
+      "grad_norm": 0.7844842076301575,
+      "learning_rate": 1.3210526315789473e-05,
+      "loss": 1.5712,
+      "step": 175
+    },
+    {
+      "epoch": 0.013256505856212105,
+      "grad_norm": 0.7256350517272949,
+      "learning_rate": 1.2682105263157896e-05,
+      "loss": 1.5354,
+      "step": 176
+    },
+    {
+      "epoch": 0.01333182691221331,
+      "grad_norm": 0.8225219249725342,
+      "learning_rate": 1.2153684210526315e-05,
+      "loss": 1.6006,
+      "step": 177
+    },
+    {
+      "epoch": 0.013407147968214515,
+      "grad_norm": 0.8212623000144958,
+      "learning_rate": 1.1625263157894737e-05,
+      "loss": 1.6294,
+      "step": 178
+    },
+    {
+      "epoch": 0.01348246902421572,
+      "grad_norm": 0.83652263879776,
+      "learning_rate": 1.1096842105263158e-05,
+      "loss": 1.717,
+      "step": 179
+    },
+    {
+      "epoch": 0.013557790080216925,
+      "grad_norm": 0.8507919907569885,
+      "learning_rate": 1.0568421052631579e-05,
+      "loss": 1.5308,
+      "step": 180
+    },
+    {
+      "epoch": 0.01363311113621813,
+      "grad_norm": 0.852586567401886,
+      "learning_rate": 1.004e-05,
+      "loss": 1.4894,
+      "step": 181
+    },
+    {
+      "epoch": 0.013708432192219335,
+      "grad_norm": 1.0091602802276611,
+      "learning_rate": 9.511578947368422e-06,
+      "loss": 1.6264,
+      "step": 182
+    },
+    {
+      "epoch": 0.01378375324822054,
+      "grad_norm": 0.8656017184257507,
+      "learning_rate": 8.983157894736843e-06,
+      "loss": 1.3594,
+      "step": 183
+    },
+    {
+      "epoch": 0.013859074304221746,
+      "grad_norm": 1.1826636791229248,
+      "learning_rate": 8.454736842105263e-06,
+      "loss": 1.4783,
+      "step": 184
+    },
+    {
+      "epoch": 0.013934395360222951,
+      "grad_norm": 1.0532195568084717,
+      "learning_rate": 7.926315789473684e-06,
+      "loss": 1.5893,
+      "step": 185
+    },
+    {
+      "epoch": 0.014009716416224156,
+      "grad_norm": 0.9689043164253235,
+      "learning_rate": 7.397894736842104e-06,
+      "loss": 1.4995,
+      "step": 186
+    },
+    {
+      "epoch": 0.014085037472225361,
+      "grad_norm": 1.1377384662628174,
+      "learning_rate": 6.8694736842105265e-06,
+      "loss": 1.6958,
+      "step": 187
+    },
+    {
+      "epoch": 0.014160358528226566,
+      "grad_norm": 1.0540655851364136,
+      "learning_rate": 6.341052631578948e-06,
+      "loss": 1.4843,
+      "step": 188
+    },
+    {
+      "epoch": 0.014235679584227771,
+      "grad_norm": 1.1679211854934692,
+      "learning_rate": 5.812631578947368e-06,
+      "loss": 1.7684,
+      "step": 189
+    },
+    {
+      "epoch": 0.014311000640228977,
+      "grad_norm": 1.0264331102371216,
+      "learning_rate": 5.2842105263157896e-06,
+      "loss": 1.3892,
+      "step": 190
+    },
+    {
+      "epoch": 0.014386321696230182,
+      "grad_norm": 1.1910557746887207,
+      "learning_rate": 4.755789473684211e-06,
+      "loss": 1.5882,
+      "step": 191
+    },
+    {
+      "epoch": 0.014461642752231387,
+      "grad_norm": 1.1663460731506348,
+      "learning_rate": 4.227368421052631e-06,
+      "loss": 1.6217,
+      "step": 192
+    },
+    {
+      "epoch": 0.014536963808232592,
+      "grad_norm": 1.3855334520339966,
+      "learning_rate": 3.698947368421052e-06,
+      "loss": 1.7337,
+      "step": 193
+    },
+    {
+      "epoch": 0.014612284864233797,
+      "grad_norm": 1.2299613952636719,
+      "learning_rate": 3.170526315789474e-06,
+      "loss": 1.4779,
+      "step": 194
+    },
+    {
+      "epoch": 0.014687605920235002,
+      "grad_norm": 1.2253618240356445,
+      "learning_rate": 2.6421052631578948e-06,
+      "loss": 1.3989,
+      "step": 195
+    },
+    {
+      "epoch": 0.014762926976236207,
+      "grad_norm": 1.4197596311569214,
+      "learning_rate": 2.1136842105263157e-06,
+      "loss": 1.5233,
+      "step": 196
+    },
+    {
+      "epoch": 0.014838248032237412,
+      "grad_norm": 1.5346554517745972,
+      "learning_rate": 1.585263157894737e-06,
+      "loss": 1.6084,
+      "step": 197
+    },
+    {
+      "epoch": 0.014913569088238618,
+      "grad_norm": 1.5719937086105347,
+      "learning_rate": 1.0568421052631578e-06,
+      "loss": 1.5501,
+      "step": 198
+    },
+    {
+      "epoch": 0.014988890144239823,
+      "grad_norm": 1.8734774589538574,
+      "learning_rate": 5.284210526315789e-07,
+      "loss": 1.5491,
+      "step": 199
+    },
+    {
+      "epoch": 0.015064211200241028,
+      "grad_norm": 2.365112066268921,
+      "learning_rate": 0.0,
+      "loss": 1.833,
+      "step": 200
+    },
+    {
+      "epoch": 0.015064211200241028,
+      "eval_loss": 1.5817376375198364,
+      "eval_runtime": 231.6927,
+      "eval_samples_per_second": 24.127,
+      "eval_steps_per_second": 6.034,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.34680040964096e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null