Training in progress, step 22, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fa532b40d5c0681f2baf3e51922c8aea3f5f742de78d5404e2c7fb13b9d1384
 size 27024

 version https://git-lfs.github.com/spec/v1
+oid sha256:994a6c1ec3dcc1a5db23088000de14b21b89c2b42f8184f6c205cb6e0fb92541
 size 27024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c51b96be3eb3a7f526346cab91aa0e09347e5f1b0e2ea7802e6818d1e2a82d14
 size 63974

 version https://git-lfs.github.com/spec/v1
+oid sha256:966b5efc886d7597a65c3d0119ab91d4beaf99e8e9d77cfe0e8c341359f42811
 size 63974

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb04243bb25bf686a4dbec102fd9ca7613ad5c31a6a03dfc0541d4df84930100
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ed09bfe734c9814025ec917462737624591264937583c93cb36af9436c63d48
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f110d38baddc83b27dc701018da1279ccbbfd00f24dd48bfba755b5837c286c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1565387b96ec5f770ad7b2f4817988cb2fc09f94ceb911096ae3c468010baf3d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8545994065281899,
   "eval_steps": 6,
-  "global_step": 18,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -165,6 +165,34 @@
       "eval_samples_per_second": 464.759,
       "eval_steps_per_second": 232.379,
       "step": 18
     }
   ],
   "logging_steps": 1,
@@ -179,12 +207,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1432098078720.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0445103857566767,
   "eval_steps": 6,
+  "global_step": 22,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 464.759,
       "eval_steps_per_second": 232.379,
       "step": 18
+    },
+    {
+      "epoch": 0.9020771513353115,
+      "grad_norm": 0.10974813997745514,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 10.3813,
+      "step": 19
+    },
+    {
+      "epoch": 0.9495548961424333,
+      "grad_norm": 0.11267790198326111,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 10.3783,
+      "step": 20
+    },
+    {
+      "epoch": 0.9970326409495549,
+      "grad_norm": 0.10685980319976807,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 10.3776,
+      "step": 21
+    },
+    {
+      "epoch": 1.0445103857566767,
+      "grad_norm": 0.26596400141716003,
+      "learning_rate": 0.0,
+      "loss": 20.1393,
+      "step": 22
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1765068963840.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null