Training in progress, step 2572, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b50a8963fd836f2d4a75e5d6e5108f8838240dead3cf06ed2b83dbc79100fd4d
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff07db1019ff730ef79b24091064f8a49cdefb9061c2509b0cec41d0929caa8e
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9a9e65bd995e2f2ee324878ea2d0c3f2b57d027cac2eb0f3d9cb367e065083f
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ef2da130652bf6a0a5337026ef09e6a2a500d20f2fc74f97ed1de83989cd348
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:947d372c2ac3d768de08720e6fe18650bc49df377d135d0776536e2ae0cb6658
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:10a3f2d483e1271610cf1160deebfe4b7cd7e846596d1240bcea8a0fb41c92d6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9314e03f72b7117ed44ca7e4c2d8792153e93080b1ee70d313c7ca3f40ea0cdc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bf3c143616698704049e834c6a36949b23673acd7aeab20c33c0b99d558ad3f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9950339721109327,
   "eval_steps": 500,
-  "global_step": 2568,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -17983,6 +17983,34 @@
       "learning_rate": 5.559555351780655e-10,
       "loss": 1.4324,
       "step": 2568
     }
   ],
   "logging_steps": 1.0,
@@ -18002,7 +18030,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5393388307779543e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9965838692637534,
   "eval_steps": 500,
+  "global_step": 2572,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.559555351780655e-10,
       "loss": 1.4324,
       "step": 2568
+    },
+    {
+      "epoch": 0.9954214463991379,
+      "grad_norm": 0.19253648817539215,
+      "learning_rate": 4.671584644355687e-10,
+      "loss": 1.426,
+      "step": 2569
+    },
+    {
+      "epoch": 0.9958089206873431,
+      "grad_norm": 0.18221616744995117,
+      "learning_rate": 3.8608241899940056e-10,
+      "loss": 1.3659,
+      "step": 2570
+    },
+    {
+      "epoch": 0.9961963949755482,
+      "grad_norm": 0.1748841404914856,
+      "learning_rate": 3.1272752407773834e-10,
+      "loss": 1.4263,
+      "step": 2571
+    },
+    {
+      "epoch": 0.9965838692637534,
+      "grad_norm": 0.18521229922771454,
+      "learning_rate": 2.470938929571842e-10,
+      "loss": 1.4405,
+      "step": 2572
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.5435260736329185e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null