Training in progress, step 1000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:119ed194d1366f613f4ff5a50dc5adf9d1419dc19a0e5fdbe52a95ed44c8baaf
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:f49d74a21c9de4957df85ff3d5da7ab77e561aa8097ff2aa90937236812ddf5a
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b2d06d763f64e7867366113993b930392c5a1c1cf151c1a52e08d2baf51b2ad
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:78bc5d7f250a29484cb11644e9c6bdb27e7569607b8207b8f5e90054bdb57733
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b2d59715331ee8c33fa0d2ed55a484e9c2ac88210adb8737eec56220eee082c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f10c1e950ccc3ca6ce28644d055ede3dfe6763dec3f800e5ed80a1dc9e119762
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2738225629791895,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -42,6 +42,41 @@
       "learning_rate": 9.124087591240877e-05,
       "loss": 1.6711,
       "step": 500
     }
   ],
   "logging_steps": 100,
@@ -61,7 +96,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4419524014322483e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.547645125958379,
   "eval_steps": 500,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.124087591240877e-05,
       "loss": 1.6711,
       "step": 500
+    },
+    {
+      "epoch": 0.32858707557502737,
+      "grad_norm": 2.219165802001953,
+      "learning_rate": 9.997255186358079e-05,
+      "loss": 1.6651,
+      "step": 600
+    },
+    {
+      "epoch": 0.3833515881708653,
+      "grad_norm": 2.6104276180267334,
+      "learning_rate": 9.976563458663239e-05,
+      "loss": 1.5924,
+      "step": 700
+    },
+    {
+      "epoch": 0.43811610076670315,
+      "grad_norm": 2.4602837562561035,
+      "learning_rate": 9.93567000457336e-05,
+      "loss": 1.5787,
+      "step": 800
+    },
+    {
+      "epoch": 0.4928806133625411,
+      "grad_norm": 2.33683705329895,
+      "learning_rate": 9.874740825864108e-05,
+      "loss": 1.5269,
+      "step": 900
+    },
+    {
+      "epoch": 0.547645125958379,
+      "grad_norm": 3.0263447761535645,
+      "learning_rate": 9.794023256786919e-05,
+      "loss": 1.5251,
+      "step": 1000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 4.845362695616594e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null