Training in progress, step 12, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:425cdb8f526e670631680f9ba49af2b403eb6046620ed6f981443bd12fe48aaa
 size 2964338224

 version https://git-lfs.github.com/spec/v1
+oid sha256:21a2f62efd0d71d0d134266e515dda3b88038f99e0b24c6a836737401f2339a3
 size 2964338224

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57a6657239ae296db292588667b26f492809b84e60b1581a4a132e38af548e6d
 size 1485440604

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9bb165d055492bacaa7fc7f6a4b7aa35f27f884ae2282741026ab19d8357fdb
 size 1485440604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d79a1aac25962f14e4f652b048b5c2224eafccb90ae4f3d833b67a4a6590cbac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c200f8fdcde78c415e4babf1b6f200bbf13323757b5b248ace34314bfbe3fd44
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.45960502692998206,
   "eval_steps": 500,
-  "global_step": 8,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -63,6 +63,34 @@
       "learning_rate": 4e-05,
       "loss": 2.2126,
       "step": 8
     }
   ],
   "logging_steps": 1,
@@ -82,7 +110,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5313812434722816.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6894075403949731,
   "eval_steps": 500,
+  "global_step": 12,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4e-05,
       "loss": 2.2126,
       "step": 8
+    },
+    {
+      "epoch": 0.5170556552962298,
+      "grad_norm": 2.069558620452881,
+      "learning_rate": 4.5e-05,
+      "loss": 2.2389,
+      "step": 9
+    },
+    {
+      "epoch": 0.5745062836624776,
+      "grad_norm": 1.7339693307876587,
+      "learning_rate": 5e-05,
+      "loss": 2.1964,
+      "step": 10
+    },
+    {
+      "epoch": 0.6319569120287253,
+      "grad_norm": 1.6608643531799316,
+      "learning_rate": 4.992664502959351e-05,
+      "loss": 2.2079,
+      "step": 11
+    },
+    {
+      "epoch": 0.6894075403949731,
+      "grad_norm": 1.642751932144165,
+      "learning_rate": 4.970701059450872e-05,
+      "loss": 2.2359,
+      "step": 12
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8132581322686464.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null