Training in progress, step 12000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3866dd2a6475ce2b7385f181f047f2dde98048fd09569a5804ff351fd5e9912f
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:0426db5bf32594814a8c37e618d6398246b9271f06265c61e6b718a3b1d4b11f
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fa541d3192bd288c7edb3660d558e142b5e462f59453619d7373f84590e81f7
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:109669df17b5f6b02118f596a85fbd94dfbe94bfa26ddbed9e0a32a469bb37b2
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fe8dfec12d6b343eb28ee2b43b75b45c3ae2d185dfc75ecd757d9ec7c567e31
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0ddcf15b11b5d2f3a427728cce5454c9ef039ba8bfd4e438abdef47917e7a39
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2504d9c9451078f79b98032c170c402f3ffeb92d11496a90427e835bd48cd055
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff57c48e4ef354be063370e4a408d39b39e1d0cde79b3adacb1e6416a8aedc46
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.7797173732335825,
   "eval_steps": 500,
-  "global_step": 11500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -200,6 +200,13 @@
       "learning_rate": 8.89443059019119e-07,
       "loss": 0.3117,
       "step": 11500
     }
   ],
   "logging_steps": 500,
@@ -219,7 +226,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.800962713419776e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.987531172069826,
   "eval_steps": 500,
+  "global_step": 12000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.89443059019119e-07,
       "loss": 0.3117,
       "step": 11500
+    },
+    {
+      "epoch": 4.987531172069826,
+      "grad_norm": 0.9830431342124939,
+      "learning_rate": 5.8187863674147975e-08,
+      "loss": 0.3083,
+      "step": 12000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 2.922754291531776e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null