Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d47375795bb95c7810811251ed890d09b08485d56f154652a654c75ae9c485c
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:7890d150186359623165f222ca4830c2a039fc03af0d56fe99e682d415fc31f6
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45dee7d2b556b85d291e06e42e7c8f845e737ce91e3c96b980ce976a2402877c
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:d30705974fe66df92448604887f4ff2019bdf74272d27933dc0a7e38b3354a13
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d010434ee16c14958906b5bf42c930dd1be27075db7b352777346c25649d79d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:398198b060b9edcfe93ff59de4a929b40cbc42323ec0afb0426f8d7b821a61c1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6011497974395752,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.03106247793858101,
   "eval_steps": 10,
-  "global_step": 110,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -873,6 +873,84 @@
       "eval_samples_per_second": 5.581,
       "eval_steps_per_second": 5.581,
       "step": 110
     }
   ],
   "logging_steps": 1,
@@ -887,7 +965,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -901,7 +979,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.076683943706624e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6011497974395752,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0338863395693611,
   "eval_steps": 10,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.581,
       "eval_steps_per_second": 5.581,
       "step": 110
+    },
+    {
+      "epoch": 0.03134486410165902,
+      "grad_norm": 0.7646064758300781,
+      "learning_rate": 0.00017975624426754848,
+      "loss": 1.6671,
+      "step": 111
+    },
+    {
+      "epoch": 0.03162725026473703,
+      "grad_norm": 2.133544445037842,
+      "learning_rate": 0.00017936784788148328,
+      "loss": 1.5909,
+      "step": 112
+    },
+    {
+      "epoch": 0.03190963642781504,
+      "grad_norm": 2.059943199157715,
+      "learning_rate": 0.00017897618899405423,
+      "loss": 1.7489,
+      "step": 113
+    },
+    {
+      "epoch": 0.032192022590893044,
+      "grad_norm": 0.8779903650283813,
+      "learning_rate": 0.00017858128370482426,
+      "loss": 1.4871,
+      "step": 114
+    },
+    {
+      "epoch": 0.032474408753971055,
+      "grad_norm": 1.5168753862380981,
+      "learning_rate": 0.000178183148246803,
+      "loss": 1.8045,
+      "step": 115
+    },
+    {
+      "epoch": 0.03275679491704907,
+      "grad_norm": 1.1241475343704224,
+      "learning_rate": 0.00017778179898577973,
+      "loss": 1.64,
+      "step": 116
+    },
+    {
+      "epoch": 0.03303918108012707,
+      "grad_norm": 9.078608512878418,
+      "learning_rate": 0.00017737725241965069,
+      "loss": 2.7736,
+      "step": 117
+    },
+    {
+      "epoch": 0.033321567243205084,
+      "grad_norm": 3.2590787410736084,
+      "learning_rate": 0.00017696952517774062,
+      "loss": 2.5064,
+      "step": 118
+    },
+    {
+      "epoch": 0.03360395340628309,
+      "grad_norm": 2.293269395828247,
+      "learning_rate": 0.00017655863402011947,
+      "loss": 2.146,
+      "step": 119
+    },
+    {
+      "epoch": 0.0338863395693611,
+      "grad_norm": 1.5803933143615723,
+      "learning_rate": 0.00017614459583691346,
+      "loss": 1.4979,
+      "step": 120
+    },
+    {
+      "epoch": 0.0338863395693611,
+      "eval_loss": 1.603722095489502,
+      "eval_runtime": 133.2987,
+      "eval_samples_per_second": 5.596,
+      "eval_steps_per_second": 5.596,
+      "step": 120
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.174564302225408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null