Training in progress, step 2472, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f0114b81da15b6e0edb34df7bec916aec9ae13a9b8de447966c00c46642455a
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:e011c421a60c7810ee24b763468071afa26d0387457e13695871b896b3574643
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e4b82b14943b2783fab220f436567f79afa2f07a9450944f80bd10e44ee4d56
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:2eb75a806bcf4890f062200854fe7f6686e84cf4c12a91ba62120df4c2eb5adf
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ed5e4e4db4550f7bdc953fe7ff92dc2e4e1604d975edafb5fd4e3d2ea5b4c14
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d921c4c665dbdab2f06d441e60a0ada3a95bd5abf389b9ce1b6155ead82a6b9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73a898b953c5a06d67dc9a202d5e9a9060db1598ab27acaafc60405a146042dd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbe24ee8253c280cdee86e896039139243254be40cb03c9cd81a3d19b30dd4a2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9562865432904135,
   "eval_steps": 500,
-  "global_step": 2468,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -17283,6 +17283,34 @@
       "learning_rate": 4.8352667637490694e-08,
       "loss": 1.3931,
       "step": 2468
     }
   ],
   "logging_steps": 1.0,
@@ -17302,7 +17330,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.440818876968626e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9578364404432342,
   "eval_steps": 500,
+  "global_step": 2472,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.8352667637490694e-08,
       "loss": 1.3931,
       "step": 2468
+    },
+    {
+      "epoch": 0.9566740175786187,
+      "grad_norm": 0.17757678031921387,
+      "learning_rate": 4.749444525663338e-08,
+      "loss": 1.3889,
+      "step": 2469
+    },
+    {
+      "epoch": 0.9570614918668239,
+      "grad_norm": 0.19035592675209045,
+      "learning_rate": 4.6643871274521525e-08,
+      "loss": 1.4116,
+      "step": 2470
+    },
+    {
+      "epoch": 0.9574489661550291,
+      "grad_norm": 0.18089327216148376,
+      "learning_rate": 4.5800947004738806e-08,
+      "loss": 1.4567,
+      "step": 2471
+    },
+    {
+      "epoch": 0.9578364404432342,
+      "grad_norm": 0.16921097040176392,
+      "learning_rate": 4.4965673749054474e-08,
+      "loss": 1.3548,
+      "step": 2472
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.444950205418074e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null