Upload checkpoint 3250

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +122 -3

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e57e3bfc2074a9ad40f5be7d0742e4d85689e9cfc6394e051207e55781a96a00
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:fabce1a036331b94cdf566aa87c9ecf11584fa2ffb6b7be93064146a64f2230e
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6db52593f9080fddf1220f3d831f1ab34cd8350f2c5e814ac3bd9e263f00cbed
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:206358ae8b2081de1c7b1c84dbe9df7131ea730ad25beccd39f96f180a8d2041
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff165cb496fd7093d743314d1f456b971f98bc37717d074d6a47adb5ffdca3f7
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:1229aa48756d90df1c044eded08744a22ecacd564b9281644ebae34dbe880887
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feda042eff907491a34cdbf179b85f590d010493a95828afe0b5cdb1928ddd85
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f19d4f8529d4151f2aad4d583c0ecd97e68c1bbdb23e8cd6eee178553e9463b1
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9489916963226572,
   "eval_steps": 500,
-  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7469,6 +7469,125 @@
       "learning_rate": 1.3189120947794897e-06,
       "loss": 0.5983,
       "step": 3198
     }
   ],
   "logging_steps": 3,
@@ -7488,7 +7607,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0948834721476903e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9638196915776986,
   "eval_steps": 500,
+  "global_step": 3250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3189120947794897e-06,
       "loss": 0.5983,
       "step": 3198
+    },
+    {
+      "epoch": 0.949288256227758,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.273920413277152e-06,
+      "loss": 0.6093,
+      "step": 3201
+    },
+    {
+      "epoch": 0.9501779359430605,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.2297045829882892e-06,
+      "loss": 0.5966,
+      "step": 3204
+    },
+    {
+      "epoch": 0.951067615658363,
+      "grad_norm": 0.265625,
+      "learning_rate": 1.186264951388516e-06,
+      "loss": 0.6228,
+      "step": 3207
+    },
+    {
+      "epoch": 0.9519572953736655,
+      "grad_norm": 0.26953125,
+      "learning_rate": 1.1436018598535937e-06,
+      "loss": 0.6083,
+      "step": 3210
+    },
+    {
+      "epoch": 0.952846975088968,
+      "grad_norm": 0.25390625,
+      "learning_rate": 1.1017156436567532e-06,
+      "loss": 0.5806,
+      "step": 3213
+    },
+    {
+      "epoch": 0.9537366548042705,
+      "grad_norm": 0.267578125,
+      "learning_rate": 1.0606066319660435e-06,
+      "loss": 0.579,
+      "step": 3216
+    },
+    {
+      "epoch": 0.9546263345195729,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.020275147841765e-06,
+      "loss": 0.6053,
+      "step": 3219
+    },
+    {
+      "epoch": 0.9555160142348754,
+      "grad_norm": 0.26171875,
+      "learning_rate": 9.807215082339394e-07,
+      "loss": 0.595,
+      "step": 3222
+    },
+    {
+      "epoch": 0.9564056939501779,
+      "grad_norm": 0.25,
+      "learning_rate": 9.41946023979745e-07,
+      "loss": 0.5857,
+      "step": 3225
+    },
+    {
+      "epoch": 0.9572953736654805,
+      "grad_norm": 0.265625,
+      "learning_rate": 9.039489998011852e-07,
+      "loss": 0.6189,
+      "step": 3228
+    },
+    {
+      "epoch": 0.958185053380783,
+      "grad_norm": 0.26171875,
+      "learning_rate": 8.66730734302601e-07,
+      "loss": 0.5837,
+      "step": 3231
+    },
+    {
+      "epoch": 0.9590747330960854,
+      "grad_norm": 0.25,
+      "learning_rate": 8.302915199683737e-07,
+      "loss": 0.5827,
+      "step": 3234
+    },
+    {
+      "epoch": 0.9599644128113879,
+      "grad_norm": 0.259765625,
+      "learning_rate": 7.94631643160626e-07,
+      "loss": 0.6043,
+      "step": 3237
+    },
+    {
+      "epoch": 0.9608540925266904,
+      "grad_norm": 0.26171875,
+      "learning_rate": 7.597513841169468e-07,
+      "loss": 0.5621,
+      "step": 3240
+    },
+    {
+      "epoch": 0.9617437722419929,
+      "grad_norm": 0.26953125,
+      "learning_rate": 7.256510169482034e-07,
+      "loss": 0.5886,
+      "step": 3243
+    },
+    {
+      "epoch": 0.9626334519572953,
+      "grad_norm": 0.279296875,
+      "learning_rate": 6.923308096363879e-07,
+      "loss": 0.6205,
+      "step": 3246
+    },
+    {
+      "epoch": 0.9635231316725978,
+      "grad_norm": 0.267578125,
+      "learning_rate": 6.597910240324967e-07,
+      "loss": 0.6038,
+      "step": 3249
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 2.127616026399998e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null