Training in progress, step 24200

Browse files

Files changed (7) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +123 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83610511bf655017bef6d56a87dc4398518746f6cb634305881687d4e9150310
 size 2226478553

 version https://git-lfs.github.com/spec/v1
+oid sha256:582d50a5a8f019141e250e840870fee5833dcefaefb338a10e0a481e3b50a51c
 size 2226478553

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5939e05ea79762928ba2ffbc3aab0bbb4a9497c4028bf156702d8652c176c5c3
 size 1113252715

 version https://git-lfs.github.com/spec/v1
+oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
 size 1113252715

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee1fbec3c9acaf1c116f8c0eed78eaf363d1d6ca3fedc4eb95e1c9cc80326ac3
 size 17563

 version https://git-lfs.github.com/spec/v1
+oid sha256:88a6c9c9d9a0e90d25aefe3b1eec75c6a8d9602de8536842eb2cffe609f70cab
 size 17563

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81d62c4b0994f9b5cb61c52fa5a98a0dd932b4d2df04a8e35e3e4f5ab8129258
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:23bf027f9216fbe8b17c91852878854c7eb1a9b37ceb42d5492b73cb3332b194
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:557da7379fb4a9930f4429bc4aeda90eb5da85c9e16fa137579aefdcef084998
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4216a163f98df4253a3d2dda8c01570c7d3175dacb0f7d8217e0ca3a27141a6
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9660751405429207,
-  "global_step": 24000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14406,11 +14406,131 @@
       "learning_rate": 1.7895276330400616e-06,
       "loss": 1.08,
       "step": 24000
     }
   ],
   "max_steps": 24414,
   "num_train_epochs": 2,
-  "total_flos": 3.242550318207621e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9824590147147672,
+  "global_step": 24200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.7895276330400616e-06,
       "loss": 1.08,
       "step": 24000
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.7468181429913726e-06,
+      "loss": 1.0849,
+      "step": 24010
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.704108652942684e-06,
+      "loss": 1.0995,
+      "step": 24020
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.661399162893995e-06,
+      "loss": 1.0837,
+      "step": 24030
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.6186896728453061e-06,
+      "loss": 1.0905,
+      "step": 24040
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.5759801827966177e-06,
+      "loss": 1.0942,
+      "step": 24050
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.5332706927479286e-06,
+      "loss": 1.0924,
+      "step": 24060
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.49056120269924e-06,
+      "loss": 1.0925,
+      "step": 24070
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.447851712650551e-06,
+      "loss": 1.0886,
+      "step": 24080
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.4051422226018622e-06,
+      "loss": 1.1016,
+      "step": 24090
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 1.3624327325531735e-06,
+      "loss": 1.0909,
+      "step": 24100
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.3197232425044844e-06,
+      "loss": 1.1014,
+      "step": 24110
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.2770137524557958e-06,
+      "loss": 1.0913,
+      "step": 24120
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.234304262407107e-06,
+      "loss": 1.0932,
+      "step": 24130
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.191594772358418e-06,
+      "loss": 1.1051,
+      "step": 24140
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.1488852823097293e-06,
+      "loss": 1.0882,
+      "step": 24150
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.1061757922610405e-06,
+      "loss": 1.0917,
+      "step": 24160
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.0634663022123516e-06,
+      "loss": 1.0956,
+      "step": 24170
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 1.020756812163663e-06,
+      "loss": 1.0957,
+      "step": 24180
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 9.780473221149738e-07,
+      "loss": 1.0886,
+      "step": 24190
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 9.353378320662852e-07,
+      "loss": 1.0992,
+      "step": 24200
     }
   ],
   "max_steps": 24414,
   "num_train_epochs": 2,
+  "total_flos": 3.269571533476485e+18,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5939e05ea79762928ba2ffbc3aab0bbb4a9497c4028bf156702d8652c176c5c3
 size 1113252715

 version https://git-lfs.github.com/spec/v1
+oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
 size 1113252715