Training in progress, step 2346, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +326 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2486c11cdeb92452f6569f7f4645d34f3fecf6d9804950791c482876e167dc31
 size 56662456

 version https://git-lfs.github.com/spec/v1
+oid sha256:c143e49ec80b9efa7c889526d4266a7b197596fb39eb75eb8f0c60a959dfb5b3
 size 56662456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:608405eab0f6d3208744fc1dcd08b352694f1fdb0ae36523b678ab0f05719c42
 size 29091284

 version https://git-lfs.github.com/spec/v1
+oid sha256:229ef2852a3e0ca7253143b05e4b688888a7bf5fee9127a04663067e45d8cf7d
 size 29091284

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ebc651fb234154b1f64daa74415a94e83b19956b155ddc949aefd7e730748ad
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:07d6aa011454a4748f5199ba02a257f0c3397f7d63ff5d7de731bdce7a2a6006
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de3131eebd88f15913d23a8b71b93a7cfd80886eb829bbe03936d99f0b97a408
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:82f6fb6c04e83ad8c7e7774f50cfceed3bf90e6e42ded09b4deef26723be76bc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.007140692323446274,
   "best_model_checkpoint": "miner_id_24/checkpoint-2300",
-  "epoch": 0.6836336615270295,
   "eval_steps": 100,
-  "global_step": 2300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -16299,6 +16299,328 @@
       "eval_samples_per_second": 10.677,
       "eval_steps_per_second": 2.669,
       "step": 2300
     }
   ],
   "logging_steps": 1,
@@ -16322,12 +16644,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0634494560090194e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.007140692323446274,
   "best_model_checkpoint": "miner_id_24/checkpoint-2300",
+  "epoch": 0.6973063347575701,
   "eval_steps": 100,
+  "global_step": 2346,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.677,
       "eval_steps_per_second": 2.669,
       "step": 2300
+    },
+    {
+      "epoch": 0.6839308935537805,
+      "grad_norm": 0.11718721687793732,
+      "learning_rate": 1.8306973193326084e-07,
+      "loss": 0.0639,
+      "step": 2301
+    },
+    {
+      "epoch": 0.6842281255805313,
+      "grad_norm": 0.11004548519849777,
+      "learning_rate": 1.7502605215715672e-07,
+      "loss": 0.0404,
+      "step": 2302
+    },
+    {
+      "epoch": 0.6845253576072822,
+      "grad_norm": 0.10758353769779205,
+      "learning_rate": 1.671629205999836e-07,
+      "loss": 0.0538,
+      "step": 2303
+    },
+    {
+      "epoch": 0.6848225896340331,
+      "grad_norm": 0.1499020904302597,
+      "learning_rate": 1.5948035148338757e-07,
+      "loss": 0.082,
+      "step": 2304
+    },
+    {
+      "epoch": 0.685119821660784,
+      "grad_norm": 0.10471717268228531,
+      "learning_rate": 1.5197835870242038e-07,
+      "loss": 0.049,
+      "step": 2305
+    },
+    {
+      "epoch": 0.6854170536875348,
+      "grad_norm": 0.09845124185085297,
+      "learning_rate": 1.446569558255395e-07,
+      "loss": 0.0395,
+      "step": 2306
+    },
+    {
+      "epoch": 0.6857142857142857,
+      "grad_norm": 0.10133890062570572,
+      "learning_rate": 1.375161560946081e-07,
+      "loss": 0.044,
+      "step": 2307
+    },
+    {
+      "epoch": 0.6860115177410366,
+      "grad_norm": 0.12090011686086655,
+      "learning_rate": 1.305559724248062e-07,
+      "loss": 0.0723,
+      "step": 2308
+    },
+    {
+      "epoch": 0.6863087497677874,
+      "grad_norm": 0.10536789149045944,
+      "learning_rate": 1.2377641740464187e-07,
+      "loss": 0.0536,
+      "step": 2309
+    },
+    {
+      "epoch": 0.6866059817945384,
+      "grad_norm": 0.13396191596984863,
+      "learning_rate": 1.1717750329595101e-07,
+      "loss": 0.0716,
+      "step": 2310
+    },
+    {
+      "epoch": 0.6869032138212893,
+      "grad_norm": 0.11562539637088776,
+      "learning_rate": 1.1075924203385324e-07,
+      "loss": 0.061,
+      "step": 2311
+    },
+    {
+      "epoch": 0.6872004458480401,
+      "grad_norm": 0.08731340616941452,
+      "learning_rate": 1.0452164522671837e-07,
+      "loss": 0.0424,
+      "step": 2312
+    },
+    {
+      "epoch": 0.687497677874791,
+      "grad_norm": 0.10618704557418823,
+      "learning_rate": 9.846472415615537e-08,
+      "loss": 0.0484,
+      "step": 2313
+    },
+    {
+      "epoch": 0.6877949099015419,
+      "grad_norm": 0.10936351865530014,
+      "learning_rate": 9.258848977700129e-08,
+      "loss": 0.0648,
+      "step": 2314
+    },
+    {
+      "epoch": 0.6880921419282928,
+      "grad_norm": 0.09159952402114868,
+      "learning_rate": 8.689295271729902e-08,
+      "loss": 0.0359,
+      "step": 2315
+    },
+    {
+      "epoch": 0.6883893739550436,
+      "grad_norm": 0.09538505971431732,
+      "learning_rate": 8.13781232782751e-08,
+      "loss": 0.0423,
+      "step": 2316
+    },
+    {
+      "epoch": 0.6886866059817945,
+      "grad_norm": 0.1170530915260315,
+      "learning_rate": 7.604401143430639e-08,
+      "loss": 0.0589,
+      "step": 2317
+    },
+    {
+      "epoch": 0.6889838380085455,
+      "grad_norm": 0.10562342405319214,
+      "learning_rate": 7.089062683292014e-08,
+      "loss": 0.0508,
+      "step": 2318
+    },
+    {
+      "epoch": 0.6892810700352963,
+      "grad_norm": 0.11546720564365387,
+      "learning_rate": 6.591797879478279e-08,
+      "loss": 0.0588,
+      "step": 2319
+    },
+    {
+      "epoch": 0.6895783020620472,
+      "grad_norm": 0.14187178015708923,
+      "learning_rate": 6.112607631364453e-08,
+      "loss": 0.0705,
+      "step": 2320
+    },
+    {
+      "epoch": 0.6898755340887981,
+      "grad_norm": 0.10787046700716019,
+      "learning_rate": 5.65149280563948e-08,
+      "loss": 0.0587,
+      "step": 2321
+    },
+    {
+      "epoch": 0.6901727661155489,
+      "grad_norm": 0.08616691827774048,
+      "learning_rate": 5.208454236296234e-08,
+      "loss": 0.0341,
+      "step": 2322
+    },
+    {
+      "epoch": 0.6904699981422998,
+      "grad_norm": 0.13538287580013275,
+      "learning_rate": 4.783492724635963e-08,
+      "loss": 0.0772,
+      "step": 2323
+    },
+    {
+      "epoch": 0.6907672301690507,
+      "grad_norm": 0.11333715915679932,
+      "learning_rate": 4.376609039262736e-08,
+      "loss": 0.0569,
+      "step": 2324
+    },
+    {
+      "epoch": 0.6910644621958016,
+      "grad_norm": 0.12235751003026962,
+      "learning_rate": 3.9878039160878844e-08,
+      "loss": 0.0556,
+      "step": 2325
+    },
+    {
+      "epoch": 0.6913616942225524,
+      "grad_norm": 0.10557149350643158,
+      "learning_rate": 3.617078058322232e-08,
+      "loss": 0.046,
+      "step": 2326
+    },
+    {
+      "epoch": 0.6916589262493034,
+      "grad_norm": 0.12478914111852646,
+      "learning_rate": 3.264432136478313e-08,
+      "loss": 0.0733,
+      "step": 2327
+    },
+    {
+      "epoch": 0.6919561582760543,
+      "grad_norm": 0.08897604048252106,
+      "learning_rate": 2.9298667883692622e-08,
+      "loss": 0.0388,
+      "step": 2328
+    },
+    {
+      "epoch": 0.6922533903028051,
+      "grad_norm": 0.09747358411550522,
+      "learning_rate": 2.6133826191032663e-08,
+      "loss": 0.0401,
+      "step": 2329
+    },
+    {
+      "epoch": 0.692550622329556,
+      "grad_norm": 0.09661777317523956,
+      "learning_rate": 2.3149802010913323e-08,
+      "loss": 0.0403,
+      "step": 2330
+    },
+    {
+      "epoch": 0.6928478543563069,
+      "grad_norm": 0.12822963297367096,
+      "learning_rate": 2.034660074037298e-08,
+      "loss": 0.0739,
+      "step": 2331
+    },
+    {
+      "epoch": 0.6931450863830578,
+      "grad_norm": 0.12262982130050659,
+      "learning_rate": 1.7724227449422705e-08,
+      "loss": 0.0491,
+      "step": 2332
+    },
+    {
+      "epoch": 0.6934423184098086,
+      "grad_norm": 0.11021570861339569,
+      "learning_rate": 1.5282686881001875e-08,
+      "loss": 0.0444,
+      "step": 2333
+    },
+    {
+      "epoch": 0.6937395504365595,
+      "grad_norm": 0.10723091661930084,
+      "learning_rate": 1.3021983451000364e-08,
+      "loss": 0.0525,
+      "step": 2334
+    },
+    {
+      "epoch": 0.6940367824633105,
+      "grad_norm": 0.12233065813779831,
+      "learning_rate": 1.094212124824745e-08,
+      "loss": 0.0541,
+      "step": 2335
+    },
+    {
+      "epoch": 0.6943340144900613,
+      "grad_norm": 0.12585288286209106,
+      "learning_rate": 9.043104034456295e-09,
+      "loss": 0.0507,
+      "step": 2336
+    },
+    {
+      "epoch": 0.6946312465168122,
+      "grad_norm": 0.08642668277025223,
+      "learning_rate": 7.324935244301667e-09,
+      "loss": 0.0327,
+      "step": 2337
+    },
+    {
+      "epoch": 0.6949284785435631,
+      "grad_norm": 0.10163281857967377,
+      "learning_rate": 5.78761798534222e-09,
+      "loss": 0.0378,
+      "step": 2338
+    },
+    {
+      "epoch": 0.6952257105703139,
+      "grad_norm": 0.1360352784395218,
+      "learning_rate": 4.431155038031598e-09,
+      "loss": 0.0524,
+      "step": 2339
+    },
+    {
+      "epoch": 0.6955229425970648,
+      "grad_norm": 0.12937673926353455,
+      "learning_rate": 3.255548855740642e-09,
+      "loss": 0.0615,
+      "step": 2340
+    },
+    {
+      "epoch": 0.6958201746238157,
+      "grad_norm": 0.11729301512241364,
+      "learning_rate": 2.260801564735182e-09,
+      "loss": 0.0516,
+      "step": 2341
+    },
+    {
+      "epoch": 0.6961174066505667,
+      "grad_norm": 0.1295451819896698,
+      "learning_rate": 1.4469149641538337e-09,
+      "loss": 0.0682,
+      "step": 2342
+    },
+    {
+      "epoch": 0.6964146386773175,
+      "grad_norm": 0.13985326886177063,
+      "learning_rate": 8.138905260302032e-10,
+      "loss": 0.0777,
+      "step": 2343
+    },
+    {
+      "epoch": 0.6967118707040684,
+      "grad_norm": 0.10082249343395233,
+      "learning_rate": 3.617293952817846e-10,
+      "loss": 0.0439,
+      "step": 2344
+    },
+    {
+      "epoch": 0.6970091027308193,
+      "grad_norm": 0.11734326928853989,
+      "learning_rate": 9.043238970996016e-11,
+      "loss": 0.0561,
+      "step": 2345
+    },
+    {
+      "epoch": 0.6973063347575701,
+      "grad_norm": 0.10924455523490906,
+      "learning_rate": 0.0,
+      "loss": 0.0507,
+      "step": 2346
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0845724172605194e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null