Training in progress, step 3500, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1db32908db7e581c7ecbabf0ed77edff18bebedf22eb460f2ce6d0e22aadc31
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd3ba167b05ddca415d83ce674f218d12eb00f8de957403146522132701a0645
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3aef3476b538a0bb23aa7395ffb5a7f216c6c7a68ff9ec849caec4083aab07fd
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5f73c26475321619da788f02bc5dfa39e953d35292b0e44c815a1d264534b76
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ae38ac09dfc015b057282d7da3e8579b4e8795c5553605412c178b0947f614b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:948ca56b053f56e11e66d405c77732b82bb85e418d1080c5ae696bdf321e898b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.642935377875137,
   "eval_steps": 500,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -217,6 +217,41 @@
       "learning_rate": 5.041420119038218e-05,
       "loss": 0.9874,
       "step": 3000
     }
   ],
   "logging_steps": 100,
@@ -236,7 +271,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4496265837006356e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9167579408543265,
   "eval_steps": 500,
+  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.041420119038218e-05,
       "loss": 0.9874,
       "step": 3000
+    },
+    {
+      "epoch": 1.6976998904709748,
+      "grad_norm": 2.0732619762420654,
+      "learning_rate": 4.72294262542015e-05,
+      "loss": 0.984,
+      "step": 3100
+    },
+    {
+      "epoch": 1.7524644030668126,
+      "grad_norm": 1.9700407981872559,
+      "learning_rate": 4.405589810975468e-05,
+      "loss": 0.9442,
+      "step": 3200
+    },
+    {
+      "epoch": 1.8072289156626506,
+      "grad_norm": 1.8880764245986938,
+      "learning_rate": 4.090649929090541e-05,
+      "loss": 0.9511,
+      "step": 3300
+    },
+    {
+      "epoch": 1.8619934282584885,
+      "grad_norm": 1.8169358968734741,
+      "learning_rate": 3.7794014381589125e-05,
+      "loss": 0.9123,
+      "step": 3400
+    },
+    {
+      "epoch": 1.9167579408543265,
+      "grad_norm": 2.062743663787842,
+      "learning_rate": 3.473107811842055e-05,
+      "loss": 0.9308,
+      "step": 3500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.6878032741533286e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null