Training in progress, step 430000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb8b7da07a19b89f2cab9e7ae5ecfface2ed5a4207f59160b87fe1ed401ae453
 size 202194449

 version https://git-lfs.github.com/spec/v1
+oid sha256:415b6b8c1b4d1a9fcb0ed4ef3046c0ad8d1ace085712e9a63b9c3ad1cdd80c3b
 size 202194449

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7c87e28b6a6f33ab4f0a8c8ae59e95a04117be20f20068b2e46c87ef398e799
 size 102501541

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.785875719378364,
-  "global_step": 420000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8406,11 +8406,211 @@
       "eval_samples_per_second": 1522.319,
       "eval_steps_per_second": 24.241,
       "step": 420000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
-  "total_flos": 1.341843735561084e+22,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.018872760315944,
+  "global_step": 430000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1522.319,
       "eval_steps_per_second": 24.241,
       "step": 420000
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 2.9586456905128618e-05,
+      "loss": 0.2539,
+      "step": 420500
+    },
+    {
+      "epoch": 9.81,
+      "learning_rate": 2.9346475051519687e-05,
+      "loss": 0.2539,
+      "step": 421000
+    },
+    {
+      "epoch": 9.81,
+      "eval_loss": 0.23778527975082397,
+      "eval_runtime": 1.4672,
+      "eval_samples_per_second": 1498.116,
+      "eval_steps_per_second": 23.855,
+      "step": 421000
+    },
+    {
+      "epoch": 9.82,
+      "learning_rate": 2.910786732472815e-05,
+      "loss": 0.2538,
+      "step": 421500
+    },
+    {
+      "epoch": 9.83,
+      "learning_rate": 2.887063633412981e-05,
+      "loss": 0.254,
+      "step": 422000
+    },
+    {
+      "epoch": 9.83,
+      "eval_loss": 0.23553605377674103,
+      "eval_runtime": 1.4917,
+      "eval_samples_per_second": 1473.465,
+      "eval_steps_per_second": 23.463,
+      "step": 422000
+    },
+    {
+      "epoch": 9.84,
+      "learning_rate": 2.863478467404478e-05,
+      "loss": 0.2538,
+      "step": 422500
+    },
+    {
+      "epoch": 9.86,
+      "learning_rate": 2.8400314923709112e-05,
+      "loss": 0.2537,
+      "step": 423000
+    },
+    {
+      "epoch": 9.86,
+      "eval_loss": 0.23579563200473785,
+      "eval_runtime": 1.4277,
+      "eval_samples_per_second": 1539.579,
+      "eval_steps_per_second": 24.516,
+      "step": 423000
+    },
+    {
+      "epoch": 9.87,
+      "learning_rate": 2.816722964724636e-05,
+      "loss": 0.2537,
+      "step": 423500
+    },
+    {
+      "epoch": 9.88,
+      "learning_rate": 2.793553139363981e-05,
+      "loss": 0.2536,
+      "step": 424000
+    },
+    {
+      "epoch": 9.88,
+      "eval_loss": 0.23610466718673706,
+      "eval_runtime": 1.4604,
+      "eval_samples_per_second": 1505.035,
+      "eval_steps_per_second": 23.966,
+      "step": 424000
+    },
+    {
+      "epoch": 9.89,
+      "learning_rate": 2.7705222696704366e-05,
+      "loss": 0.2536,
+      "step": 424500
+    },
+    {
+      "epoch": 9.9,
+      "learning_rate": 2.7476306075059096e-05,
+      "loss": 0.2534,
+      "step": 425000
+    },
+    {
+      "epoch": 9.9,
+      "eval_loss": 0.2384994477033615,
+      "eval_runtime": 1.4421,
+      "eval_samples_per_second": 1524.185,
+      "eval_steps_per_second": 24.27,
+      "step": 425000
+    },
+    {
+      "epoch": 9.91,
+      "learning_rate": 2.7248784032099478e-05,
+      "loss": 0.2536,
+      "step": 425500
+    },
+    {
+      "epoch": 9.93,
+      "learning_rate": 2.7022659055970144e-05,
+      "loss": 0.2534,
+      "step": 426000
+    },
+    {
+      "epoch": 9.93,
+      "eval_loss": 0.23720860481262207,
+      "eval_runtime": 1.4451,
+      "eval_samples_per_second": 1521.023,
+      "eval_steps_per_second": 24.22,
+      "step": 426000
+    },
+    {
+      "epoch": 9.94,
+      "learning_rate": 2.6797933619537604e-05,
+      "loss": 0.2534,
+      "step": 426500
+    },
+    {
+      "epoch": 9.95,
+      "learning_rate": 2.6574610180363166e-05,
+      "loss": 0.2535,
+      "step": 427000
+    },
+    {
+      "epoch": 9.95,
+      "eval_loss": 0.23618370294570923,
+      "eval_runtime": 1.4698,
+      "eval_samples_per_second": 1495.473,
+      "eval_steps_per_second": 23.813,
+      "step": 427000
+    },
+    {
+      "epoch": 9.96,
+      "learning_rate": 2.6352691180676286e-05,
+      "loss": 0.2532,
+      "step": 427500
+    },
+    {
+      "epoch": 9.97,
+      "learning_rate": 2.6132179047347505e-05,
+      "loss": 0.2535,
+      "step": 428000
+    },
+    {
+      "epoch": 9.97,
+      "eval_loss": 0.23612357676029205,
+      "eval_runtime": 1.4655,
+      "eval_samples_per_second": 1499.84,
+      "eval_steps_per_second": 23.883,
+      "step": 428000
+    },
+    {
+      "epoch": 9.98,
+      "learning_rate": 2.5913076191862238e-05,
+      "loss": 0.2534,
+      "step": 428500
+    },
+    {
+      "epoch": 10.0,
+      "learning_rate": 2.5695385010294165e-05,
+      "loss": 0.2532,
+      "step": 429000
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 0.23688232898712158,
+      "eval_runtime": 1.4807,
+      "eval_samples_per_second": 1484.441,
+      "eval_steps_per_second": 23.638,
+      "step": 429000
+    },
+    {
+      "epoch": 10.01,
+      "learning_rate": 2.5479107883279144e-05,
+      "loss": 0.2533,
+      "step": 429500
+    },
+    {
+      "epoch": 10.02,
+      "learning_rate": 2.5264247175989292e-05,
+      "loss": 0.2533,
+      "step": 430000
+    },
+    {
+      "epoch": 10.02,
+      "eval_loss": 0.2365807592868805,
+      "eval_runtime": 1.4314,
+      "eval_samples_per_second": 1535.576,
+      "eval_steps_per_second": 24.452,
+      "step": 430000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
+  "total_flos": 1.3737921997394432e+22,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7c87e28b6a6f33ab4f0a8c8ae59e95a04117be20f20068b2e46c87ef398e799
 size 102501541