Training in progress, step 130, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:021782730cbc0dd9442791d4bbeb76ef95bd8476b8c416818e4a2325ee96f06e
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:06cb5c99fa6a855d28a17bf1c0bb727f661d2305e6ccf1eedacf257af1918190
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b0d83b35fe5deee471bfe28a6ca6ef3c994bdeaa39d567accad5569e3a08da0
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4da30ad3ecaf80f0132f8c30fcf3db794bf59cdbedc0a77a47030a55fbe18e8
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad4c4d2d769c6f52183fdfe62140ef02b36aa1e936b1d8050f51672d3d58fb1e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6035598516464233,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.0338863395693611,
   "eval_steps": 10,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -951,6 +951,84 @@
       "eval_samples_per_second": 5.554,
       "eval_steps_per_second": 5.554,
       "step": 120
     }
   ],
   "logging_steps": 1,
@@ -965,7 +1043,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -974,12 +1052,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.174564302225408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6035598516464233,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.03671020120014119,
   "eval_steps": 10,
+  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.554,
       "eval_steps_per_second": 5.554,
       "step": 120
+    },
+    {
+      "epoch": 0.03416872573243911,
+      "grad_norm": 1.1370112895965576,
+      "learning_rate": 0.00017572742764761055,
+      "loss": 1.9316,
+      "step": 121
+    },
+    {
+      "epoch": 0.03445111189551712,
+      "grad_norm": 1.037766456604004,
+      "learning_rate": 0.00017530714660036112,
+      "loss": 1.3929,
+      "step": 122
+    },
+    {
+      "epoch": 0.03473349805859513,
+      "grad_norm": 0.7955436706542969,
+      "learning_rate": 0.00017488376997127283,
+      "loss": 2.3555,
+      "step": 123
+    },
+    {
+      "epoch": 0.035015884221673135,
+      "grad_norm": 3.0241856575012207,
+      "learning_rate": 0.0001744573151637007,
+      "loss": 1.9409,
+      "step": 124
+    },
+    {
+      "epoch": 0.03529827038475115,
+      "grad_norm": 1.1123616695404053,
+      "learning_rate": 0.00017402779970753155,
+      "loss": 2.8273,
+      "step": 125
+    },
+    {
+      "epoch": 0.03558065654782916,
+      "grad_norm": 1.9221562147140503,
+      "learning_rate": 0.0001735952412584635,
+      "loss": 2.0479,
+      "step": 126
+    },
+    {
+      "epoch": 0.035863042710907164,
+      "grad_norm": 1.6352111101150513,
+      "learning_rate": 0.00017315965759728014,
+      "loss": 1.9576,
+      "step": 127
+    },
+    {
+      "epoch": 0.036145428873985176,
+      "grad_norm": 1.4023115634918213,
+      "learning_rate": 0.00017272106662911973,
+      "loss": 1.6282,
+      "step": 128
+    },
+    {
+      "epoch": 0.03642781503706318,
+      "grad_norm": 0.36453551054000854,
+      "learning_rate": 0.00017227948638273916,
+      "loss": 0.2846,
+      "step": 129
+    },
+    {
+      "epoch": 0.03671020120014119,
+      "grad_norm": 1.499108076095581,
+      "learning_rate": 0.00017183493500977278,
+      "loss": 2.0915,
+      "step": 130
+    },
+    {
+      "epoch": 0.03671020120014119,
+      "eval_loss": 1.625510811805725,
+      "eval_runtime": 134.291,
+      "eval_samples_per_second": 5.555,
+      "eval_steps_per_second": 5.555,
+      "step": 130
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.272444660744192e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null