Training in progress, step 36000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0931cfc0502a489316d773e7e3fbc87d1bf87d22980010d8e5debce1a389ab5a
 size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:94ce7d00b3b1ef166fe747fd7b6c23182e77237f5b57569ed0937385e73e4eb0
 size 891644712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68488d7f6b13bf4345eed8b0bddb03bc654697dc1a0ce2e2c44855609a96ee79
 size 1783444357

 version https://git-lfs.github.com/spec/v1
+oid sha256:55b22c65c93511f0da0294e2300deedeb3c5ffabff2881e99b8673baaf86ca81
 size 1783444357

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2668009309a225b4528d1c2be158a46d8643edce60db33568885f84d9153d0f5
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aeb790f34b48d9bf3b86bc7bfcbcd5e61233f6de9cf9a2d4634826b6f96d5f0
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c659c1c793579d80447241ae65c0f0bd61aec30b0f2e6c845263cb4857f12c85
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:37bed98491ff3050ce572a36a899b02a6ec7498a5e4d6d63b97b9516bfd16419
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2208,
   "eval_steps": 500,
-  "global_step": 34500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -490,6 +490,27 @@
       "learning_rate": 4.4481920000000007e-05,
       "loss": 0.3637,
       "step": 34500
     }
   ],
   "logging_steps": 500,
@@ -509,7 +530,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6807237779456e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2304,
   "eval_steps": 500,
+  "global_step": 36000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.4481920000000007e-05,
       "loss": 0.3637,
       "step": 34500
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 0.7771899700164795,
+      "learning_rate": 4.440192e-05,
+      "loss": 0.3648,
+      "step": 35000
+    },
+    {
+      "epoch": 0.2272,
+      "grad_norm": 0.6887528300285339,
+      "learning_rate": 4.432192e-05,
+      "loss": 0.3562,
+      "step": 35500
+    },
+    {
+      "epoch": 0.2304,
+      "grad_norm": 0.7471407055854797,
+      "learning_rate": 4.424192e-05,
+      "loss": 0.3639,
+      "step": 36000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.7537987248128e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null