Training in progress, step 4000, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4b4b7b2eecafa10d9b9fd56b2f27ca11649ae805630c3be6b59bc780c927216
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4a4f60720b11184986674e7b4a366ad3b9d917f91ec76c2f59fba01fcd1c4ba
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bdf2aa53843cdc6f0c24817577e70f22e6f55e70f6c0d0457f3479129a5bcb5
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cbb52d27c2843adda090a89873589ae51928d0a9e3b2e66f311e8f9097f3aa3
 size 3497859804

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4df07c96cd5b34fe2c47040206f73bf07f974f4e6c2b7b7219d670c2d00c146
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb1d32a5240cd2bec4435f56208363b0d59e8bf8c99164c7cbee490171e7de00
 size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27172d5cc1af6ec98dccb94fbc3511967f5f7dd8f8c183140f65b475db95b940
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ea4fa4d8b63c46011d327d88624348ec23af27ba1a62258b0451a85ac1f2f91
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9167579408543265,
   "eval_steps": 500,
-  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -252,6 +252,41 @@
       "learning_rate": 7.747278692494825e-05,
       "loss": 1.0665,
       "step": 3500
     }
   ],
   "logging_steps": 100,
@@ -271,7 +306,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6878032741533286e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.190580503833516,
   "eval_steps": 500,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.747278692494825e-05,
       "loss": 1.0665,
       "step": 3500
+    },
+    {
+      "epoch": 1.9715224534501643,
+      "grad_norm": 2.0689291954040527,
+      "learning_rate": 7.585587866727898e-05,
+      "loss": 1.0571,
+      "step": 3600
+    },
+    {
+      "epoch": 2.026286966046002,
+      "grad_norm": 1.5938904285430908,
+      "learning_rate": 7.420118017662894e-05,
+      "loss": 0.8671,
+      "step": 3700
+    },
+    {
+      "epoch": 2.08105147864184,
+      "grad_norm": 2.3023173809051514,
+      "learning_rate": 7.251110991428034e-05,
+      "loss": 0.6557,
+      "step": 3800
+    },
+    {
+      "epoch": 2.135815991237678,
+      "grad_norm": 1.6988605260849,
+      "learning_rate": 7.07881380399129e-05,
+      "loss": 0.6836,
+      "step": 3900
+    },
+    {
+      "epoch": 2.190580503833516,
+      "grad_norm": 1.5740976333618164,
+      "learning_rate": 6.903478280128721e-05,
+      "loss": 0.6712,
+      "step": 4000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.9236155642036552e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null