Training in progress, step 370000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3622d2c4bf20d15081328014fbc84571924f388ee37dce9d92056a095aeebdc
 size 202194449

 version https://git-lfs.github.com/spec/v1
+oid sha256:df6d3d3f9674103740b8f59e2a1f3f36fbba555fa4f14347ef60833e8c7c8d0f
 size 202194449

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ca3e8665bb839f68d9a5ff24783ce35763f85a0cda61cb52ae923170ae03041
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
 size 102501541

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.387893473752884,
-  "global_step": 360000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7206,11 +7206,211 @@
       "eval_samples_per_second": 1504.148,
       "eval_steps_per_second": 23.951,
       "step": 360000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
-  "total_flos": 1.1501517019954746e+22,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 8.620890514690464,
+  "global_step": 370000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1504.148,
       "eval_steps_per_second": 23.951,
       "step": 360000
+    },
+    {
+      "epoch": 8.4,
+      "learning_rate": 6.746049527150238e-05,
+      "loss": 0.2596,
+      "step": 360500
+    },
+    {
+      "epoch": 8.41,
+      "learning_rate": 6.707871421704209e-05,
+      "loss": 0.2596,
+      "step": 361000
+    },
+    {
+      "epoch": 8.41,
+      "eval_loss": 0.24351955950260162,
+      "eval_runtime": 1.4786,
+      "eval_samples_per_second": 1486.571,
+      "eval_steps_per_second": 23.672,
+      "step": 361000
+    },
+    {
+      "epoch": 8.42,
+      "learning_rate": 6.669789465567683e-05,
+      "loss": 0.2593,
+      "step": 361500
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 6.631804075198838e-05,
+      "loss": 0.2595,
+      "step": 362000
+    },
+    {
+      "epoch": 8.43,
+      "eval_loss": 0.2438839077949524,
+      "eval_runtime": 1.4636,
+      "eval_samples_per_second": 1501.733,
+      "eval_steps_per_second": 23.913,
+      "step": 362000
+    },
+    {
+      "epoch": 8.45,
+      "learning_rate": 6.593915665999816e-05,
+      "loss": 0.2593,
+      "step": 362500
+    },
+    {
+      "epoch": 8.46,
+      "learning_rate": 6.55612465231219e-05,
+      "loss": 0.2593,
+      "step": 363000
+    },
+    {
+      "epoch": 8.46,
+      "eval_loss": 0.24523594975471497,
+      "eval_runtime": 1.4077,
+      "eval_samples_per_second": 1561.38,
+      "eval_steps_per_second": 24.863,
+      "step": 363000
+    },
+    {
+      "epoch": 8.47,
+      "learning_rate": 6.518431447412434e-05,
+      "loss": 0.2593,
+      "step": 363500
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 6.480836463507392e-05,
+      "loss": 0.2593,
+      "step": 364000
+    },
+    {
+      "epoch": 8.48,
+      "eval_loss": 0.24123093485832214,
+      "eval_runtime": 1.4526,
+      "eval_samples_per_second": 1513.098,
+      "eval_steps_per_second": 24.094,
+      "step": 364000
+    },
+    {
+      "epoch": 8.49,
+      "learning_rate": 6.443340111729786e-05,
+      "loss": 0.259,
+      "step": 364500
+    },
+    {
+      "epoch": 8.5,
+      "learning_rate": 6.405942802133713e-05,
+      "loss": 0.2592,
+      "step": 365000
+    },
+    {
+      "epoch": 8.5,
+      "eval_loss": 0.24300608038902283,
+      "eval_runtime": 1.5063,
+      "eval_samples_per_second": 1459.248,
+      "eval_steps_per_second": 23.236,
+      "step": 365000
+    },
+    {
+      "epoch": 8.52,
+      "learning_rate": 6.36864494369016e-05,
+      "loss": 0.259,
+      "step": 365500
+    },
+    {
+      "epoch": 8.53,
+      "learning_rate": 6.331446944282534e-05,
+      "loss": 0.2587,
+      "step": 366000
+    },
+    {
+      "epoch": 8.53,
+      "eval_loss": 0.24051520228385925,
+      "eval_runtime": 1.4527,
+      "eval_samples_per_second": 1513.01,
+      "eval_steps_per_second": 24.093,
+      "step": 366000
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 6.294349210702188e-05,
+      "loss": 0.2589,
+      "step": 366500
+    },
+    {
+      "epoch": 8.55,
+      "learning_rate": 6.257352148643998e-05,
+      "loss": 0.2591,
+      "step": 367000
+    },
+    {
+      "epoch": 8.55,
+      "eval_loss": 0.2446797788143158,
+      "eval_runtime": 1.4184,
+      "eval_samples_per_second": 1549.604,
+      "eval_steps_per_second": 24.675,
+      "step": 367000
+    },
+    {
+      "epoch": 8.56,
+      "learning_rate": 6.220456162701908e-05,
+      "loss": 0.2589,
+      "step": 367500
+    },
+    {
+      "epoch": 8.57,
+      "learning_rate": 6.183661656364515e-05,
+      "loss": 0.2586,
+      "step": 368000
+    },
+    {
+      "epoch": 8.57,
+      "eval_loss": 0.24278691411018372,
+      "eval_runtime": 1.4508,
+      "eval_samples_per_second": 1515.055,
+      "eval_steps_per_second": 24.125,
+      "step": 368000
+    },
+    {
+      "epoch": 8.59,
+      "learning_rate": 6.146969032010631e-05,
+      "loss": 0.2589,
+      "step": 368500
+    },
+    {
+      "epoch": 8.6,
+      "learning_rate": 6.110378690904928e-05,
+      "loss": 0.259,
+      "step": 369000
+    },
+    {
+      "epoch": 8.6,
+      "eval_loss": 0.24149444699287415,
+      "eval_runtime": 1.4509,
+      "eval_samples_per_second": 1514.912,
+      "eval_steps_per_second": 24.123,
+      "step": 369000
+    },
+    {
+      "epoch": 8.61,
+      "learning_rate": 6.073891033193507e-05,
+      "loss": 0.259,
+      "step": 369500
+    },
+    {
+      "epoch": 8.62,
+      "learning_rate": 6.037506457899553e-05,
+      "loss": 0.2588,
+      "step": 370000
+    },
+    {
+      "epoch": 8.62,
+      "eval_loss": 0.23884567618370056,
+      "eval_runtime": 1.4545,
+      "eval_samples_per_second": 1511.147,
+      "eval_steps_per_second": 24.063,
+      "step": 370000
     }
   ],
   "max_steps": 500000,
   "num_train_epochs": 12,
+  "total_flos": 1.1821004158729246e+22,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ca3e8665bb839f68d9a5ff24783ce35763f85a0cda61cb52ae923170ae03041
 size 102501541

 version https://git-lfs.github.com/spec/v1
+oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
 size 102501541