Upload checkpoint 3200

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +115 -3

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57dd35bc2f6ffb39aa8b1f29e5f5a136575feb2cfea6fb25c20608557674098f
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:e57e3bfc2074a9ad40f5be7d0742e4d85689e9cfc6394e051207e55781a96a00
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e624a7a0172e021f4952abfbdca579a62820eee0e2b3e77971e4d3e381845328
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:6db52593f9080fddf1220f3d831f1ab34cd8350f2c5e814ac3bd9e263f00cbed
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cbc9c739918e3728ff92771938e8d94644dc34caafd6417d182451fadc9bb8f
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff165cb496fd7093d743314d1f456b971f98bc37717d074d6a47adb5ffdca3f7
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d016b6de510b811bd11c3b655ffc83267ff09c3e71671a7fad681fb0f1d2317d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:feda042eff907491a34cdbf179b85f590d010493a95828afe0b5cdb1928ddd85
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9341637010676157,
   "eval_steps": 500,
-  "global_step": 3150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7357,6 +7357,118 @@
       "learning_rate": 2.143987624900945e-06,
       "loss": 0.5813,
       "step": 3150
     }
   ],
   "logging_steps": 3,
@@ -7376,7 +7488,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0621509178953826e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9489916963226572,
   "eval_steps": 500,
+  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.143987624900945e-06,
       "loss": 0.5813,
       "step": 3150
+    },
+    {
+      "epoch": 0.9350533807829181,
+      "grad_norm": 0.259765625,
+      "learning_rate": 2.0866345470076044e-06,
+      "loss": 0.589,
+      "step": 3153
+    },
+    {
+      "epoch": 0.9359430604982206,
+      "grad_norm": 0.279296875,
+      "learning_rate": 2.0300509335123283e-06,
+      "loss": 0.5971,
+      "step": 3156
+    },
+    {
+      "epoch": 0.9368327402135231,
+      "grad_norm": 0.255859375,
+      "learning_rate": 1.974237229084497e-06,
+      "loss": 0.5808,
+      "step": 3159
+    },
+    {
+      "epoch": 0.9377224199288257,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.9191938723430615e-06,
+      "loss": 0.6167,
+      "step": 3162
+    },
+    {
+      "epoch": 0.9386120996441281,
+      "grad_norm": 0.2578125,
+      "learning_rate": 1.8649212958531282e-06,
+      "loss": 0.6088,
+      "step": 3165
+    },
+    {
+      "epoch": 0.9395017793594306,
+      "grad_norm": 0.259765625,
+      "learning_rate": 1.8114199261224928e-06,
+      "loss": 0.5884,
+      "step": 3168
+    },
+    {
+      "epoch": 0.9403914590747331,
+      "grad_norm": 0.265625,
+      "learning_rate": 1.7586901835983437e-06,
+      "loss": 0.6122,
+      "step": 3171
+    },
+    {
+      "epoch": 0.9412811387900356,
+      "grad_norm": 0.259765625,
+      "learning_rate": 1.7067324826639419e-06,
+      "loss": 0.6036,
+      "step": 3174
+    },
+    {
+      "epoch": 0.9421708185053381,
+      "grad_norm": 0.2578125,
+      "learning_rate": 1.655547231635368e-06,
+      "loss": 0.598,
+      "step": 3177
+    },
+    {
+      "epoch": 0.9430604982206405,
+      "grad_norm": 0.26171875,
+      "learning_rate": 1.6051348327583037e-06,
+      "loss": 0.6078,
+      "step": 3180
+    },
+    {
+      "epoch": 0.943950177935943,
+      "grad_norm": 0.251953125,
+      "learning_rate": 1.5554956822048661e-06,
+      "loss": 0.5955,
+      "step": 3183
+    },
+    {
+      "epoch": 0.9448398576512456,
+      "grad_norm": 0.271484375,
+      "learning_rate": 1.5066301700705331e-06,
+      "loss": 0.589,
+      "step": 3186
+    },
+    {
+      "epoch": 0.9457295373665481,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.4585386803710021e-06,
+      "loss": 0.6035,
+      "step": 3189
+    },
+    {
+      "epoch": 0.9466192170818505,
+      "grad_norm": 0.265625,
+      "learning_rate": 1.411221591039269e-06,
+      "loss": 0.6396,
+      "step": 3192
+    },
+    {
+      "epoch": 0.947508896797153,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.3646792739225533e-06,
+      "loss": 0.577,
+      "step": 3195
+    },
+    {
+      "epoch": 0.9483985765124555,
+      "grad_norm": 0.2578125,
+      "learning_rate": 1.3189120947794897e-06,
+      "loss": 0.5983,
+      "step": 3198
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 2.0948834721476903e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null