Training in progress, step 1500, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f97955c16cbe0a0381a10ec3d2fbd26beeb4c10488d24310768aee40a561599c
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0dfe6eaecb839955989b9b58a7b228f78777b5b121a351e0934acee0f013e15
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e495e78be39261f05440b17706ebf74600ac85e9a793030da3f93c30e20447b6
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1e577473e69e778538f87d97e263a29d526a20c5bd47adb94dfccf6a7e41443
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98768c7e1a0b380d852747a1f12b21819d559a6f6a06a08ce15a829e61d8c70d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebb46f96291f41e5c38f69006198515f805c6fdaab17d9124a8bdad701dddfb7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.547645125958379,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -77,6 +77,41 @@
       "learning_rate": 9.997234258138696e-05,
       "loss": 1.5673,
       "step": 1000
     }
   ],
   "logging_steps": 100,
@@ -96,7 +131,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.845362695616594e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8214676889375685,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.997234258138696e-05,
       "loss": 1.5673,
       "step": 1000
+    },
+    {
+      "epoch": 0.6024096385542169,
+      "grad_norm": 2.3736095428466797,
+      "learning_rate": 9.987226456522884e-05,
+      "loss": 1.5324,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6571741511500547,
+      "grad_norm": 2.253758192062378,
+      "learning_rate": 9.969929463456831e-05,
+      "loss": 1.52,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7119386637458927,
+      "grad_norm": 2.2543797492980957,
+      "learning_rate": 9.945368559744425e-05,
+      "loss": 1.463,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7667031763417306,
+      "grad_norm": 2.094783067703247,
+      "learning_rate": 9.913579642919276e-05,
+      "loss": 1.4819,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8214676889375685,
+      "grad_norm": 2.028780937194824,
+      "learning_rate": 9.874609174777887e-05,
+      "loss": 1.4738,
+      "step": 1500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 7.241672688292332e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null