Training in progress, step 42000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a853f72dbaed77b0f2b7855c598ca5e8f7205a774d5e4a484dc9618c1d703f9c
 size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:341cf52e44aee26bac163dde1d46d0da21ad4c56269b433dfffb30a288535511
 size 891644712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1425ddea2e1aa4193cc447538f36a29b778873646642b0054c31483367ff5ac8
 size 1783444357

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fa19a4f5dbef468935d63a1f85e3cfae44ec707144d148f1de907a8b462aaae
 size 1783444357

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f28c7019382eca2900417a705546cdab309d5cbea5a7dff4f774785ccae3006
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:6be41eee8b8a0220527e2dacace3c21226f82da4d47c62f24e447c03fa2152a0
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7beb525f6000838c9b12583ad1253138900b22ae2a96dcaafec662e05e58650
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8cafd6acaa1b671acd46943cc34e18e4d792ad12b254ee219f6cb062b3f2459
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2496,
   "eval_steps": 500,
-  "global_step": 39000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -553,6 +553,48 @@
       "learning_rate": 4.376224e-05,
       "loss": 0.3551,
       "step": 39000
     }
   ],
   "logging_steps": 500,
@@ -572,7 +614,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.8999486185472e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2688,
   "eval_steps": 500,
+  "global_step": 42000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.376224e-05,
       "loss": 0.3551,
       "step": 39000
+    },
+    {
+      "epoch": 0.2528,
+      "grad_norm": 0.6636275053024292,
+      "learning_rate": 4.368224e-05,
+      "loss": 0.3587,
+      "step": 39500
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 0.710564911365509,
+      "learning_rate": 4.360224e-05,
+      "loss": 0.3537,
+      "step": 40000
+    },
+    {
+      "epoch": 0.2592,
+      "grad_norm": 0.6195800304412842,
+      "learning_rate": 4.3522240000000004e-05,
+      "loss": 0.3537,
+      "step": 40500
+    },
+    {
+      "epoch": 0.2624,
+      "grad_norm": 0.7131514549255371,
+      "learning_rate": 4.34424e-05,
+      "loss": 0.3531,
+      "step": 41000
+    },
+    {
+      "epoch": 0.2656,
+      "grad_norm": 0.6594410538673401,
+      "learning_rate": 4.336256e-05,
+      "loss": 0.3518,
+      "step": 41500
+    },
+    {
+      "epoch": 0.2688,
+      "grad_norm": 0.7651230096817017,
+      "learning_rate": 4.328256e-05,
+      "loss": 0.3516,
+      "step": 42000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 2.0460985122816e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null