Training in progress, step 9900, checkpoint

Files changed (6) hide show

last-checkpoint/model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35ca73b672d977391aef537183758e87218bc9b1f338667515331d5bba2decd1
 size 4978139416

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5744380703f87f970b57582712646d1195547ea2deb809f1e728c012a141e65
 size 4978139416

last-checkpoint/model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41e1f8481ef4b2fa8757c7e2080dad3a9464c495f675c86fe8a2b21eb55e1af4
 size 3659223436

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1dfdf11a2a9cfabe064ab18caf2d667bf041175304c131085bc56d0b7260ec8
 size 3659223436

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72978f2549bdc36afbaafc26e6e615c11be366a830f1f901381cd993ed7e7933
 size 17241500333

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9b7d365602a80d5be1805125aa374b02c292b64a53e40e7d4a6fc4b2c32c3b5
 size 17241500333

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04636db759aef65e061d0a81cea15a49cf6becde8fb0cfb685f69da500fa6b33
 size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbe42a9a6d1a2aa6e8b3204aaff4e79e9911c21ae2de7ab694db56cf080fb011
 size 14567

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad3c21cfca6b96706a9c82298c8390f24827b562f6561d40e5e026d6ae4092b3
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:e024ad4d2c469d51bc12b39f389559da78567c0b22ee2b58dd4cac9872e0c9b3
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.083350873500316,
   "eval_steps": 100,
-  "global_step": 9700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1559,6 +1559,38 @@
       "eval_samples_per_second": 26.17,
       "eval_steps_per_second": 3.272,
       "step": 9700
     }
   ],
   "logging_steps": 100,
@@ -1566,7 +1598,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 30,
   "save_steps": 100,
-  "total_flos": 1.0629240419863537e+20,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.167543675015786,
   "eval_steps": 100,
+  "global_step": 9900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.17,
       "eval_steps_per_second": 3.272,
       "step": 9700
+    },
+    {
+      "epoch": 4.13,
+      "grad_norm": 27.368144989013672,
+      "learning_rate": 4.343462897526502e-05,
+      "loss": 1.6976,
+      "step": 9800
+    },
+    {
+      "epoch": 4.13,
+      "eval_cer": 0.4800359782566188,
+      "eval_loss": 2.5932295322418213,
+      "eval_runtime": 371.4743,
+      "eval_samples_per_second": 25.515,
+      "eval_steps_per_second": 3.19,
+      "step": 9800
+    },
+    {
+      "epoch": 4.17,
+      "grad_norm": 4.671775817871094,
+      "learning_rate": 4.336395759717315e-05,
+      "loss": 1.7712,
+      "step": 9900
+    },
+    {
+      "epoch": 4.17,
+      "eval_cer": 0.4871509718039967,
+      "eval_loss": 2.37919282913208,
+      "eval_runtime": 357.2849,
+      "eval_samples_per_second": 26.528,
+      "eval_steps_per_second": 3.317,
+      "step": 9900
     }
   ],
   "logging_steps": 100,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 30,
   "save_steps": 100,
+  "total_flos": 1.0845390616473847e+20,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null