Training in progress, step 24400

Browse files

Files changed (7) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +123 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:582d50a5a8f019141e250e840870fee5833dcefaefb338a10e0a481e3b50a51c
 size 2226478553

 version https://git-lfs.github.com/spec/v1
+oid sha256:210a128933aae1983894b72b165841d8274c0d0117d155aea16bf708d2227228
 size 2226478553

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
 size 1113252715

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
 size 1113252715

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88a6c9c9d9a0e90d25aefe3b1eec75c6a8d9602de8536842eb2cffe609f70cab
 size 17563

 version https://git-lfs.github.com/spec/v1
+oid sha256:60e36185845bc18a656a31db5076cb45fb810dd43869bde58c051cae5a0e36a7
 size 17563

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23bf027f9216fbe8b17c91852878854c7eb1a9b37ceb42d5492b73cb3332b194
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c8213aaa5f90a2b250062d1c60bbdb54ae5dfd572df54e04a4bade191c0a7c5
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4216a163f98df4253a3d2dda8c01570c7d3175dacb0f7d8217e0ca3a27141a6
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:a748bb88559c27203523064ee7efe8a457e4bd76047f9c37d15baf1de97eaec9
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9824590147147672,
-  "global_step": 24200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14526,11 +14526,131 @@
       "learning_rate": 9.353378320662852e-07,
       "loss": 1.0992,
       "step": 24200
     }
   ],
   "max_steps": 24414,
   "num_train_epochs": 2,
-  "total_flos": 3.269571533476485e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9988428888866134,
+  "global_step": 24400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.353378320662852e-07,
       "loss": 1.0992,
       "step": 24200
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.968992910224651e-07,
+      "loss": 1.0964,
+      "step": 24210
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.541898009737764e-07,
+      "loss": 1.0921,
+      "step": 24220
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.114803109250876e-07,
+      "loss": 1.0858,
+      "step": 24230
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 7.687708208763988e-07,
+      "loss": 1.0917,
+      "step": 24240
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 7.260613308277099e-07,
+      "loss": 1.0885,
+      "step": 24250
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 6.833518407790211e-07,
+      "loss": 1.0898,
+      "step": 24260
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 6.406423507303323e-07,
+      "loss": 1.0852,
+      "step": 24270
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 5.979328606816435e-07,
+      "loss": 1.0954,
+      "step": 24280
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 5.552233706329546e-07,
+      "loss": 1.1051,
+      "step": 24290
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 5.125138805842659e-07,
+      "loss": 1.0875,
+      "step": 24300
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 4.6980439053557705e-07,
+      "loss": 1.0967,
+      "step": 24310
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 4.270949004868882e-07,
+      "loss": 1.099,
+      "step": 24320
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.843854104381994e-07,
+      "loss": 1.0952,
+      "step": 24330
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.4167592038951053e-07,
+      "loss": 1.0911,
+      "step": 24340
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 2.9896643034082176e-07,
+      "loss": 1.0909,
+      "step": 24350
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.5625694029213294e-07,
+      "loss": 1.0973,
+      "step": 24360
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.135474502434441e-07,
+      "loss": 1.0926,
+      "step": 24370
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 1.7083796019475527e-07,
+      "loss": 1.0924,
+      "step": 24380
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 1.2812847014606647e-07,
+      "loss": 1.0848,
+      "step": 24390
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 8.541898009737763e-08,
+      "loss": 1.1018,
+      "step": 24400
     }
   ],
   "max_steps": 24414,
   "num_train_epochs": 2,
+  "total_flos": 3.296592748745349e+18,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
 size 1113252715

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
 size 1113252715