Training in progress, step 178500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +80 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d586d26b3bc11753f90bfdd44b6176d6e5f7c73010c731f933ccbacbe1da0d18
 size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f576c297c0eae7ededb3822e0fd97b204d6a332ff9c3b8d7ccd818ebf391761
 size 891644712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb31ef9b4e992bea78c8e73acaf0ac0b55d421befd26f3a71b566a864a42e688
 size 1783444357

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e4dd6b31039b01a958cd2e90946071f91e4689e9d9aca8c7e434cc905cde9ca
 size 1783444357

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9c578aa1d7020e4e5db26d4f55be115b74b2ac40930ca1256424dc52199113f
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9e95bfd6b2f51d2614c1dceffb80aed647095f9f81402665bc8a637aab51a24
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4f9081a0528121361fc8797ecc1e7c82e2cf08ea59233116797e8af7b086270
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:207898f141fbde837147332552977457f374b23313c4b963d8a62a4e111187ed
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1072,
   "eval_steps": 500,
-  "global_step": 173000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2429,6 +2429,83 @@
       "learning_rate": 2.233024e-05,
       "loss": 0.2686,
       "step": 173000
     }
   ],
   "logging_steps": 500,
@@ -2448,7 +2525,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.4279772053504e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1424,
   "eval_steps": 500,
+  "global_step": 178500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.233024e-05,
       "loss": 0.2686,
       "step": 173000
+    },
+    {
+      "epoch": 1.1104,
+      "grad_norm": 0.6357282996177673,
+      "learning_rate": 2.225024e-05,
+      "loss": 0.2639,
+      "step": 173500
+    },
+    {
+      "epoch": 1.1136,
+      "grad_norm": 0.5262208580970764,
+      "learning_rate": 2.2170400000000004e-05,
+      "loss": 0.2641,
+      "step": 174000
+    },
+    {
+      "epoch": 1.1168,
+      "grad_norm": 0.6878075003623962,
+      "learning_rate": 2.20904e-05,
+      "loss": 0.2654,
+      "step": 174500
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.5332186222076416,
+      "learning_rate": 2.2010400000000002e-05,
+      "loss": 0.2638,
+      "step": 175000
+    },
+    {
+      "epoch": 1.1232,
+      "grad_norm": 0.5562476515769958,
+      "learning_rate": 2.19304e-05,
+      "loss": 0.2648,
+      "step": 175500
+    },
+    {
+      "epoch": 1.1264,
+      "grad_norm": 0.5924221277236938,
+      "learning_rate": 2.18504e-05,
+      "loss": 0.2627,
+      "step": 176000
+    },
+    {
+      "epoch": 1.1296,
+      "grad_norm": 0.5250386595726013,
+      "learning_rate": 2.17704e-05,
+      "loss": 0.2619,
+      "step": 176500
+    },
+    {
+      "epoch": 1.1328,
+      "grad_norm": 0.7426069378852844,
+      "learning_rate": 2.16904e-05,
+      "loss": 0.2628,
+      "step": 177000
+    },
+    {
+      "epoch": 1.1360000000000001,
+      "grad_norm": 0.4925951063632965,
+      "learning_rate": 2.16104e-05,
+      "loss": 0.2661,
+      "step": 177500
+    },
+    {
+      "epoch": 1.1392,
+      "grad_norm": 0.5707270503044128,
+      "learning_rate": 2.15304e-05,
+      "loss": 0.2622,
+      "step": 178000
+    },
+    {
+      "epoch": 1.1424,
+      "grad_norm": 0.5793021321296692,
+      "learning_rate": 2.14504e-05,
+      "loss": 0.2671,
+      "step": 178500
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 8.6959186771968e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null