Training in progress, step 65, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:492f62c36a1692ce068912b1d628ba12ae76234d0c198404d8ebdcc99d481a0c
 size 125248064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ad064f22dab87d3f89ed5c3c9a6228ec9671fe6c3881dc79eaf9164c1257ffa
 size 125248064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7ff9e56c8601711ed636ec4fe19a91638ec31dae45461988c532ca4a51e34d8
 size 63350356

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7a7ba9645022e500d708b119cb844695141adf068dd664b9a374923a7589910
 size 63350356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78da1da061288012ca67a39eca3ceec230fb3877f3be89de615db089beb7984c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad16dda5b42ed1a3750dc7c5d4d95215438c97f721863ee8d5f5fbd09779cc98
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a1b67e0370258becf640b62b2d37ad76e97fbb2bcef2aed2b85f7b9a0e5265f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:25af55feb1b9903fcca68f02f47bace5e1e3ee14f2adaa2a9b2e961c20b03a84
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.024,
   "eval_steps": 1000,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -367,13 +367,43 @@
       "learning_rate": 8.247422680412371e-05,
       "loss": 1.7194,
       "step": 60
     }
   ],
   "logging_steps": 1,
   "max_steps": 100,
   "num_train_epochs": 1,
   "save_steps": 5,
-  "total_flos": 6638833090068480.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.026,
   "eval_steps": 1000,
+  "global_step": 65,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.247422680412371e-05,
       "loss": 1.7194,
       "step": 60
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 8.041237113402063e-05,
+      "loss": 1.7635,
+      "step": 61
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 7.835051546391753e-05,
+      "loss": 1.5533,
+      "step": 62
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 7.628865979381443e-05,
+      "loss": 1.4684,
+      "step": 63
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 7.422680412371135e-05,
+      "loss": 1.3388,
+      "step": 64
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 7.216494845360825e-05,
+      "loss": 1.4259,
+      "step": 65
     }
   ],
   "logging_steps": 1,
   "max_steps": 100,
   "num_train_epochs": 1,
   "save_steps": 5,
+  "total_flos": 7198794280796160.0,
   "trial_name": null,
   "trial_params": null
 }