Training in progress, step 185500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2baaeb2b23d09e31adf3ef5585f3e6639b56c07f476dcaafc27f4ae5d87b820
 size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb364ce891de5f4a56d29465d8827aee9763f63c827eed28e9f08a6879a4e0e8
 size 891644712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31c9234b22f8508a6d0d4122d44d2c6d1d2e6e2fee5b0b2e55791454aea923e8
 size 1783444357

 version https://git-lfs.github.com/spec/v1
+oid sha256:13bfc74584d67284695477358d19a44d719880c08f366f3d9f3dd6557e56e613
 size 1783444357

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:823c43fb8cb66b96b5deeb2005d1b02ad22bf7e9aa812aa31a158370b966082c
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:3abb4906081313365fe9d8e0e48faa531b110753b43df2422beeff510f4869fe
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82e2c4a62066e0491348b34524e07a98c2175669eb301b7705f9a7c6d9af8189
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:c743dd89240c66051c085e23b5c9372172e564216f22be729c3e3e69fdfbbada
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.168,
   "eval_steps": 500,
-  "global_step": 182500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2562,6 +2562,48 @@
       "learning_rate": 2.081056e-05,
       "loss": 0.2611,
       "step": 182500
     }
   ],
   "logging_steps": 500,
@@ -2581,7 +2623,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.890785202176e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1872,
   "eval_steps": 500,
+  "global_step": 185500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.081056e-05,
       "loss": 0.2611,
       "step": 182500
+    },
+    {
+      "epoch": 1.1712,
+      "grad_norm": 0.5887815356254578,
+      "learning_rate": 2.073056e-05,
+      "loss": 0.2632,
+      "step": 183000
+    },
+    {
+      "epoch": 1.1743999999999999,
+      "grad_norm": 0.6037270426750183,
+      "learning_rate": 2.0650560000000002e-05,
+      "loss": 0.2645,
+      "step": 183500
+    },
+    {
+      "epoch": 1.1776,
+      "grad_norm": 0.636946439743042,
+      "learning_rate": 2.057072e-05,
+      "loss": 0.2628,
+      "step": 184000
+    },
+    {
+      "epoch": 1.1808,
+      "grad_norm": 0.5285276770591736,
+      "learning_rate": 2.049072e-05,
+      "loss": 0.2629,
+      "step": 184500
+    },
+    {
+      "epoch": 1.184,
+      "grad_norm": 0.4634397625923157,
+      "learning_rate": 2.041072e-05,
+      "loss": 0.2615,
+      "step": 185000
+    },
+    {
+      "epoch": 1.1872,
+      "grad_norm": 0.5693604946136475,
+      "learning_rate": 2.033072e-05,
+      "loss": 0.2619,
+      "step": 185500
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 9.0369350959104e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null