Training in progress, step 865, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99b129df98bdc306d1bc4565ee000cc0b871ca7381053214c2914dac7ae77608
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:82b7ea91db0b8414ee1ee370f896bbd26f3fbb47a73176b7f3f65b18d197a25b
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb976eeabc1d40bc1ef543f0d6b42e69810a3b568ef7fec526ce855dcd53f250
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:43083f322054aff43bc6930fa6998ada6fdd635d44556e37099a3e09e1044dd6
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:798b54db949c4ae9de08b62eac89d6111767c04dba8dc38518460f18e2c13d16
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d62a615819a68a35523082dd9af5336db4b9184e8270ea5a330d6ceefb606b95
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2d74cdd7773895240aff3837ca564e1fc035a5a8a0853fee30d28f2c6ee4c25
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc3d8a2771961fb52eec0bdd20d20676362c5357acba011a4424ecf88c9a6ca9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.112908443869298,
   "eval_steps": 386,
-  "global_step": 860,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6051,6 +6051,41 @@
       "learning_rate": 8.925627310699275e-05,
       "loss": 0.9271,
       "step": 860
     }
   ],
   "logging_steps": 1,
@@ -6070,7 +6105,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.613560965832376e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1193788417987707,
   "eval_steps": 386,
+  "global_step": 865,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.925627310699275e-05,
       "loss": 0.9271,
       "step": 860
+    },
+    {
+      "epoch": 1.1142025234551924,
+      "grad_norm": 0.8622956275939941,
+      "learning_rate": 8.92307255942185e-05,
+      "loss": 0.7689,
+      "step": 861
+    },
+    {
+      "epoch": 1.115496603041087,
+      "grad_norm": 0.8629088401794434,
+      "learning_rate": 8.920515140818351e-05,
+      "loss": 0.811,
+      "step": 862
+    },
+    {
+      "epoch": 1.1167906826269816,
+      "grad_norm": 0.9130288362503052,
+      "learning_rate": 8.91795505662759e-05,
+      "loss": 0.8302,
+      "step": 863
+    },
+    {
+      "epoch": 1.118084762212876,
+      "grad_norm": 0.9185603857040405,
+      "learning_rate": 8.915392308590183e-05,
+      "loss": 0.7699,
+      "step": 864
+    },
+    {
+      "epoch": 1.1193788417987707,
+      "grad_norm": 0.8291416168212891,
+      "learning_rate": 8.912826898448561e-05,
+      "loss": 0.6884,
+      "step": 865
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.669457824417055e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null