Training in progress, step 2576, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff07db1019ff730ef79b24091064f8a49cdefb9061c2509b0cec41d0929caa8e
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc2136cfbd00690f3783dc17e8800db2dc8ad727fd6fe2cc8c6c953b2599e207
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ef2da130652bf6a0a5337026ef09e6a2a500d20f2fc74f97ed1de83989cd348
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ea2b85354be8b703c4c2920b8b2fb21f3e0e76e72838162be0d20e1b9317205
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10a3f2d483e1271610cf1160deebfe4b7cd7e846596d1240bcea8a0fb41c92d6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fd8571639a832ac3a6bdbe8b83a9d5174ee7552fcb8cdc3d2effb6c405a5866
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bf3c143616698704049e834c6a36949b23673acd7aeab20c33c0b99d558ad3f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:83a970aca6d8492ea417337203ea2e40d5dcc9a674de898354c4c9743b9beef6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9965838692637534,
   "eval_steps": 500,
-  "global_step": 2572,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -18011,6 +18011,34 @@
       "learning_rate": 2.470938929571842e-10,
       "loss": 1.4405,
       "step": 2572
     }
   ],
   "logging_steps": 1.0,
@@ -18030,7 +18058,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5435260736329185e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9981337664165743,
   "eval_steps": 500,
+  "global_step": 2576,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.470938929571842e-10,
       "loss": 1.4405,
       "step": 2572
+    },
+    {
+      "epoch": 0.9969713435519586,
+      "grad_norm": 0.18423818051815033,
+      "learning_rate": 1.8918162699887997e-10,
+      "loss": 1.3955,
+      "step": 2573
+    },
+    {
+      "epoch": 0.9973588178401638,
+      "grad_norm": 0.17565150558948517,
+      "learning_rate": 1.3899081563906182e-10,
+      "loss": 1.3862,
+      "step": 2574
+    },
+    {
+      "epoch": 0.9977462921283691,
+      "grad_norm": 0.18880826234817505,
+      "learning_rate": 9.65215363907257e-11,
+      "loss": 1.3653,
+      "step": 2575
+    },
+    {
+      "epoch": 0.9981337664165743,
+      "grad_norm": 0.18108192086219788,
+      "learning_rate": 6.177385484029685e-11,
+      "loss": 1.3969,
+      "step": 2576
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.547514766592221e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null