Upload checkpoint 3150

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +122 -3

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2169e58c8989a1e21145cbb3344f682c07fb03f8e1d2b3415d1fb54bf51fe94
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:57dd35bc2f6ffb39aa8b1f29e5f5a136575feb2cfea6fb25c20608557674098f
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9585e567e29098783a70a169ea5b462f55516e649fb34b65e156fb7731d93458
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:e624a7a0172e021f4952abfbdca579a62820eee0e2b3e77971e4d3e381845328
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cf824060625e49e48aeba0c2aaf08f1341a310120ee7a5f582fade92bc82a3f
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cbc9c739918e3728ff92771938e8d94644dc34caafd6417d182451fadc9bb8f
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8df9f252c6ffa775a0f749e6642c4aedea5bcba170d0101e8c19bd63ddb59f2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d016b6de510b811bd11c3b655ffc83267ff09c3e71671a7fad681fb0f1d2317d
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9193357058125742,
   "eval_steps": 500,
-  "global_step": 3100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7238,6 +7238,125 @@
       "learning_rate": 3.236257876723725e-06,
       "loss": 0.5991,
       "step": 3099
     }
   ],
   "logging_steps": 3,
@@ -7257,7 +7376,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.029418363643075e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9341637010676157,
   "eval_steps": 500,
+  "global_step": 3150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.236257876723725e-06,
       "loss": 0.5991,
       "step": 3099
+    },
+    {
+      "epoch": 0.9199288256227758,
+      "grad_norm": 0.259765625,
+      "learning_rate": 3.165897783676275e-06,
+      "loss": 0.5901,
+      "step": 3102
+    },
+    {
+      "epoch": 0.9208185053380783,
+      "grad_norm": 0.27734375,
+      "learning_rate": 3.0962986735020738e-06,
+      "loss": 0.6183,
+      "step": 3105
+    },
+    {
+      "epoch": 0.9217081850533808,
+      "grad_norm": 0.255859375,
+      "learning_rate": 3.027461093154449e-06,
+      "loss": 0.5892,
+      "step": 3108
+    },
+    {
+      "epoch": 0.9225978647686833,
+      "grad_norm": 0.2578125,
+      "learning_rate": 2.959385583602081e-06,
+      "loss": 0.6269,
+      "step": 3111
+    },
+    {
+      "epoch": 0.9234875444839857,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.8920726798248643e-06,
+      "loss": 0.5946,
+      "step": 3114
+    },
+    {
+      "epoch": 0.9243772241992882,
+      "grad_norm": 0.259765625,
+      "learning_rate": 2.8255229108096527e-06,
+      "loss": 0.6192,
+      "step": 3117
+    },
+    {
+      "epoch": 0.9252669039145908,
+      "grad_norm": 0.255859375,
+      "learning_rate": 2.7597367995461086e-06,
+      "loss": 0.6153,
+      "step": 3120
+    },
+    {
+      "epoch": 0.9261565836298933,
+      "grad_norm": 0.265625,
+      "learning_rate": 2.694714863022585e-06,
+      "loss": 0.5831,
+      "step": 3123
+    },
+    {
+      "epoch": 0.9270462633451957,
+      "grad_norm": 0.267578125,
+      "learning_rate": 2.6304576122221035e-06,
+      "loss": 0.5898,
+      "step": 3126
+    },
+    {
+      "epoch": 0.9279359430604982,
+      "grad_norm": 0.2490234375,
+      "learning_rate": 2.566965552118272e-06,
+      "loss": 0.6098,
+      "step": 3129
+    },
+    {
+      "epoch": 0.9288256227758007,
+      "grad_norm": 0.251953125,
+      "learning_rate": 2.504239181671353e-06,
+      "loss": 0.5932,
+      "step": 3132
+    },
+    {
+      "epoch": 0.9297153024911032,
+      "grad_norm": 0.259765625,
+      "learning_rate": 2.4422789938243763e-06,
+      "loss": 0.5877,
+      "step": 3135
+    },
+    {
+      "epoch": 0.9306049822064056,
+      "grad_norm": 0.279296875,
+      "learning_rate": 2.381085475499201e-06,
+      "loss": 0.5755,
+      "step": 3138
+    },
+    {
+      "epoch": 0.9314946619217082,
+      "grad_norm": 0.263671875,
+      "learning_rate": 2.3206591075927376e-06,
+      "loss": 0.5875,
+      "step": 3141
+    },
+    {
+      "epoch": 0.9323843416370107,
+      "grad_norm": 0.265625,
+      "learning_rate": 2.2610003649731092e-06,
+      "loss": 0.6113,
+      "step": 3144
+    },
+    {
+      "epoch": 0.9332740213523132,
+      "grad_norm": 0.2578125,
+      "learning_rate": 2.2021097164760085e-06,
+      "loss": 0.6035,
+      "step": 3147
+    },
+    {
+      "epoch": 0.9341637010676157,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.143987624900945e-06,
+      "loss": 0.5813,
+      "step": 3150
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 2.0621509178953826e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null