Training in progress, step 400000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3ec63277b76b09811bbdd2d480a7f996b4a3ffed8bebe328de7594fd1352f61
 size 202194449

 version https://git-lfs.github.com/spec/v1
+oid sha256:b700ebc6fc16103b0c351577b0fe7b7a9955520a42d8ff4c426cb4c02c884230
 size 202194449

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2b105f9c88c11eebbcdaeb34a846701e3b5103c2b4d7fbe6a23d3a666e3272c
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
 size 102501541

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3030b698db0941f5555a8625c40cb49a93fb370be875ba63b732087f65663441
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ba83cbca80cb672828600b248dd69c4c050beb355cdcf7faf0b56212421edca
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.086884596565623,
-  "global_step": 390000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7806,11 +7806,211 @@
       "eval_samples_per_second": 1489.497,
       "eval_steps_per_second": 23.718,
       "step": 390000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
-  "total_flos": 1.245997593928734e+22,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.319881637503205,
+  "global_step": 400000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1489.497,
       "eval_steps_per_second": 23.718,
       "step": 390000
+    },
+    {
+      "epoch": 9.1,
+      "learning_rate": 4.639259509788768e-05,
+      "loss": 0.2564,
+      "step": 390500
+    },
+    {
+      "epoch": 9.11,
+      "learning_rate": 4.60754920716572e-05,
+      "loss": 0.2566,
+      "step": 391000
+    },
+    {
+      "epoch": 9.11,
+      "eval_loss": 0.23735950887203217,
+      "eval_runtime": 1.4271,
+      "eval_samples_per_second": 1540.156,
+      "eval_steps_per_second": 24.525,
+      "step": 391000
+    },
+    {
+      "epoch": 9.12,
+      "learning_rate": 4.5759580226394167e-05,
+      "loss": 0.2566,
+      "step": 391500
+    },
+    {
+      "epoch": 9.13,
+      "learning_rate": 4.544486301685993e-05,
+      "loss": 0.2564,
+      "step": 392000
+    },
+    {
+      "epoch": 9.13,
+      "eval_loss": 0.2416989952325821,
+      "eval_runtime": 1.449,
+      "eval_samples_per_second": 1516.918,
+      "eval_steps_per_second": 24.155,
+      "step": 392000
+    },
+    {
+      "epoch": 9.15,
+      "learning_rate": 4.5131343884751484e-05,
+      "loss": 0.2562,
+      "step": 392500
+    },
+    {
+      "epoch": 9.16,
+      "learning_rate": 4.4819026258663774e-05,
+      "loss": 0.2565,
+      "step": 393000
+    },
+    {
+      "epoch": 9.16,
+      "eval_loss": 0.24084173142910004,
+      "eval_runtime": 1.456,
+      "eval_samples_per_second": 1509.619,
+      "eval_steps_per_second": 24.039,
+      "step": 393000
+    },
+    {
+      "epoch": 9.17,
+      "learning_rate": 4.450791355405234e-05,
+      "loss": 0.2565,
+      "step": 393500
+    },
+    {
+      "epoch": 9.18,
+      "learning_rate": 4.419800917319588e-05,
+      "loss": 0.2566,
+      "step": 394000
+    },
+    {
+      "epoch": 9.18,
+      "eval_loss": 0.23908619582653046,
+      "eval_runtime": 1.4522,
+      "eval_samples_per_second": 1513.612,
+      "eval_steps_per_second": 24.102,
+      "step": 394000
+    },
+    {
+      "epoch": 9.19,
+      "learning_rate": 4.3889316505159056e-05,
+      "loss": 0.2564,
+      "step": 394500
+    },
+    {
+      "epoch": 9.2,
+      "learning_rate": 4.3581838925755465e-05,
+      "loss": 0.2561,
+      "step": 395000
+    },
+    {
+      "epoch": 9.2,
+      "eval_loss": 0.23751436173915863,
+      "eval_runtime": 1.4847,
+      "eval_samples_per_second": 1480.475,
+      "eval_steps_per_second": 23.574,
+      "step": 395000
+    },
+    {
+      "epoch": 9.22,
+      "learning_rate": 4.327557979751057e-05,
+      "loss": 0.2562,
+      "step": 395500
+    },
+    {
+      "epoch": 9.23,
+      "learning_rate": 4.297054246962517e-05,
+      "loss": 0.2562,
+      "step": 396000
+    },
+    {
+      "epoch": 9.23,
+      "eval_loss": 0.23929938673973083,
+      "eval_runtime": 1.4334,
+      "eval_samples_per_second": 1533.416,
+      "eval_steps_per_second": 24.417,
+      "step": 396000
+    },
+    {
+      "epoch": 9.24,
+      "learning_rate": 4.266673027793864e-05,
+      "loss": 0.2562,
+      "step": 396500
+    },
+    {
+      "epoch": 9.25,
+      "learning_rate": 4.236414654489242e-05,
+      "loss": 0.256,
+      "step": 397000
+    },
+    {
+      "epoch": 9.25,
+      "eval_loss": 0.24186982214450836,
+      "eval_runtime": 1.4512,
+      "eval_samples_per_second": 1514.618,
+      "eval_steps_per_second": 24.118,
+      "step": 397000
+    },
+    {
+      "epoch": 9.26,
+      "learning_rate": 4.206279457949371e-05,
+      "loss": 0.2558,
+      "step": 397500
+    },
+    {
+      "epoch": 9.27,
+      "learning_rate": 4.1762677677279335e-05,
+      "loss": 0.2561,
+      "step": 398000
+    },
+    {
+      "epoch": 9.27,
+      "eval_loss": 0.24197684228420258,
+      "eval_runtime": 1.4439,
+      "eval_samples_per_second": 1522.288,
+      "eval_steps_per_second": 24.24,
+      "step": 398000
+    },
+    {
+      "epoch": 9.28,
+      "learning_rate": 4.146379912027964e-05,
+      "loss": 0.2561,
+      "step": 398500
+    },
+    {
+      "epoch": 9.3,
+      "learning_rate": 4.1166162176982664e-05,
+      "loss": 0.2554,
+      "step": 399000
+    },
+    {
+      "epoch": 9.3,
+      "eval_loss": 0.24182839691638947,
+      "eval_runtime": 1.4359,
+      "eval_samples_per_second": 1530.716,
+      "eval_steps_per_second": 24.374,
+      "step": 399000
+    },
+    {
+      "epoch": 9.31,
+      "learning_rate": 4.086977010229838e-05,
+      "loss": 0.2557,
+      "step": 399500
+    },
+    {
+      "epoch": 9.32,
+      "learning_rate": 4.057462613752294e-05,
+      "loss": 0.2559,
+      "step": 400000
+    },
+    {
+      "epoch": 9.32,
+      "eval_loss": 0.23793195188045502,
+      "eval_runtime": 1.413,
+      "eval_samples_per_second": 1555.592,
+      "eval_steps_per_second": 24.771,
+      "step": 400000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
+  "total_flos": 1.277946307806184e+22,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2b105f9c88c11eebbcdaeb34a846701e3b5103c2b4d7fbe6a23d3a666e3272c
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
 size 102501541