Training in progress, step 6500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +80 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a4009e19ac3f932b12f74b5e5961fb8a1a2da79ef0beb6278bcbe4110281f58
 size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab28ecb94b7a818a5a7fd1c75030bb7191001272b6094d37a909690e2363f392
 size 891644712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2cdc3d74d2f035f1f3fcaf7445452a3b282c680da8a9098675ce7b6aef4afd7
 size 1783444357

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f96d7c8ef3b30034ac3cc33cc9b776b72fa413fbfee63fe75839a96f5fc8817
 size 1783444357

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5460bfda360ec6bf006c4e70c03c4e6dd92ee461733554c5eae970f8ca47273c
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:89d57fdcf5f9a2ba7a66cb157a536592aea4bc319138ee95de3e9ae0ff3c98c0
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75b4b746e7e71a283788a34656838ee1f277007f7a1a22835377cbaf8957d3ae
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c8aa76edc747fa1f55c383a900f2044d0133f29781c0dba0a687e1604db66b1
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0064,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -21,6 +21,83 @@
       "learning_rate": 4.9840800000000006e-05,
       "loss": 0.7566,
       "step": 1000
     }
   ],
   "logging_steps": 500,
@@ -40,7 +117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4871663124480000.0,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0416,
   "eval_steps": 500,
+  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.9840800000000006e-05,
       "loss": 0.7566,
       "step": 1000
+    },
+    {
+      "epoch": 0.0096,
+      "grad_norm": 1.2925927639007568,
+      "learning_rate": 4.97608e-05,
+      "loss": 0.6764,
+      "step": 1500
+    },
+    {
+      "epoch": 0.0128,
+      "grad_norm": 1.286004900932312,
+      "learning_rate": 4.968080000000001e-05,
+      "loss": 0.6304,
+      "step": 2000
+    },
+    {
+      "epoch": 0.016,
+      "grad_norm": 1.2140214443206787,
+      "learning_rate": 4.96008e-05,
+      "loss": 0.5981,
+      "step": 2500
+    },
+    {
+      "epoch": 0.0192,
+      "grad_norm": 1.2525482177734375,
+      "learning_rate": 4.95208e-05,
+      "loss": 0.5767,
+      "step": 3000
+    },
+    {
+      "epoch": 0.0224,
+      "grad_norm": 1.2310410737991333,
+      "learning_rate": 4.94408e-05,
+      "loss": 0.5597,
+      "step": 3500
+    },
+    {
+      "epoch": 0.0256,
+      "grad_norm": 1.1735206842422485,
+      "learning_rate": 4.9360800000000004e-05,
+      "loss": 0.5418,
+      "step": 4000
+    },
+    {
+      "epoch": 0.0288,
+      "grad_norm": 1.114688754081726,
+      "learning_rate": 4.9280800000000004e-05,
+      "loss": 0.5335,
+      "step": 4500
+    },
+    {
+      "epoch": 0.032,
+      "grad_norm": 0.8874593377113342,
+      "learning_rate": 4.9200800000000005e-05,
+      "loss": 0.5237,
+      "step": 5000
+    },
+    {
+      "epoch": 0.0352,
+      "grad_norm": 1.1261299848556519,
+      "learning_rate": 4.91208e-05,
+      "loss": 0.5135,
+      "step": 5500
+    },
+    {
+      "epoch": 0.0384,
+      "grad_norm": 0.9994556307792664,
+      "learning_rate": 4.9040800000000007e-05,
+      "loss": 0.5059,
+      "step": 6000
+    },
+    {
+      "epoch": 0.0416,
+      "grad_norm": 1.2349673509597778,
+      "learning_rate": 4.89608e-05,
+      "loss": 0.4939,
+      "step": 6500
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 3.166581030912e+16,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null