Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a24427b0f5ff1bdc81da1497dc8ac481e90d09a8968c6ece36194f71954eb10
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:021782730cbc0dd9442791d4bbeb76ef95bd8476b8c416818e4a2325ee96f06e
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f86948b5aaae310edcc8383ad15c4a0ede660d6e83aa57fa6341cec4eaefed34
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b0d83b35fe5deee471bfe28a6ca6ef3c994bdeaa39d567accad5569e3a08da0
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d010434ee16c14958906b5bf42c930dd1be27075db7b352777346c25649d79d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:398198b060b9edcfe93ff59de4a929b40cbc42323ec0afb0426f8d7b821a61c1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6035598516464233,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.03106247793858101,
   "eval_steps": 10,
-  "global_step": 110,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -873,6 +873,84 @@
       "eval_samples_per_second": 5.556,
       "eval_steps_per_second": 5.556,
       "step": 110
     }
   ],
   "logging_steps": 1,
@@ -887,7 +965,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -901,7 +979,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.076683943706624e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6035598516464233,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0338863395693611,
   "eval_steps": 10,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.556,
       "eval_steps_per_second": 5.556,
       "step": 110
+    },
+    {
+      "epoch": 0.03134486410165902,
+      "grad_norm": 0.664234459400177,
+      "learning_rate": 0.00017975624426754848,
+      "loss": 1.4157,
+      "step": 111
+    },
+    {
+      "epoch": 0.03162725026473703,
+      "grad_norm": 1.8084183931350708,
+      "learning_rate": 0.00017936784788148328,
+      "loss": 1.9874,
+      "step": 112
+    },
+    {
+      "epoch": 0.03190963642781504,
+      "grad_norm": 1.104672908782959,
+      "learning_rate": 0.00017897618899405423,
+      "loss": 2.7257,
+      "step": 113
+    },
+    {
+      "epoch": 0.032192022590893044,
+      "grad_norm": 0.7887753844261169,
+      "learning_rate": 0.00017858128370482426,
+      "loss": 2.7542,
+      "step": 114
+    },
+    {
+      "epoch": 0.032474408753971055,
+      "grad_norm": 1.3737729787826538,
+      "learning_rate": 0.000178183148246803,
+      "loss": 2.0767,
+      "step": 115
+    },
+    {
+      "epoch": 0.03275679491704907,
+      "grad_norm": 0.7426419258117676,
+      "learning_rate": 0.00017778179898577973,
+      "loss": 1.4909,
+      "step": 116
+    },
+    {
+      "epoch": 0.03303918108012707,
+      "grad_norm": 2.0367558002471924,
+      "learning_rate": 0.00017737725241965069,
+      "loss": 1.6603,
+      "step": 117
+    },
+    {
+      "epoch": 0.033321567243205084,
+      "grad_norm": 0.9449915289878845,
+      "learning_rate": 0.00017696952517774062,
+      "loss": 2.323,
+      "step": 118
+    },
+    {
+      "epoch": 0.03360395340628309,
+      "grad_norm": 2.1352381706237793,
+      "learning_rate": 0.00017655863402011947,
+      "loss": 1.9585,
+      "step": 119
+    },
+    {
+      "epoch": 0.0338863395693611,
+      "grad_norm": 1.0839155912399292,
+      "learning_rate": 0.00017614459583691346,
+      "loss": 1.5273,
+      "step": 120
+    },
+    {
+      "epoch": 0.0338863395693611,
+      "eval_loss": 1.6136579513549805,
+      "eval_runtime": 134.3244,
+      "eval_samples_per_second": 5.554,
+      "eval_steps_per_second": 5.554,
+      "step": 120
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.174564302225408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null