Upload checkpoint 3300

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +122 -3

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fabce1a036331b94cdf566aa87c9ecf11584fa2ffb6b7be93064146a64f2230e
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:447ea1baf5ce4d3011df6f9cd40e05e076cbb62e1b5b4bd59b2b44e1b3600425
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:206358ae8b2081de1c7b1c84dbe9df7131ea730ad25beccd39f96f180a8d2041
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:649859d9b29ef3ee4a62106e3696336aee4f9cf11919324ac3cb0cd58eca6bd1
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1229aa48756d90df1c044eded08744a22ecacd564b9281644ebae34dbe880887
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:34eabe2409758e1c785b61e49de22c99478d71b4e287056e9ac7b8e66f22d2e7
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f19d4f8529d4151f2aad4d583c0ecd97e68c1bbdb23e8cd6eee178553e9463b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e3e5d81ec2d6c897aa97cd1ed656c7729f595b7fd89a2af10c54571be04f6c2
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9638196915776986,
   "eval_steps": 500,
-  "global_step": 3250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7588,6 +7588,125 @@
       "learning_rate": 6.597910240324967e-07,
       "loss": 0.6038,
       "step": 3249
     }
   ],
   "logging_steps": 3,
@@ -7607,7 +7726,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.127616026399998e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9786476868327402,
   "eval_steps": 500,
+  "global_step": 3300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.597910240324967e-07,
       "loss": 0.6038,
       "step": 3249
+    },
+    {
+      "epoch": 0.9644128113879004,
+      "grad_norm": 0.26171875,
+      "learning_rate": 6.280319158544989e-07,
+      "loss": 0.6301,
+      "step": 3252
+    },
+    {
+      "epoch": 0.9653024911032029,
+      "grad_norm": 0.265625,
+      "learning_rate": 5.970537346853156e-07,
+      "loss": 0.6007,
+      "step": 3255
+    },
+    {
+      "epoch": 0.9661921708185054,
+      "grad_norm": 0.2470703125,
+      "learning_rate": 5.668567239708323e-07,
+      "loss": 0.5789,
+      "step": 3258
+    },
+    {
+      "epoch": 0.9670818505338078,
+      "grad_norm": 0.265625,
+      "learning_rate": 5.374411210180341e-07,
+      "loss": 0.5964,
+      "step": 3261
+    },
+    {
+      "epoch": 0.9679715302491103,
+      "grad_norm": 0.25390625,
+      "learning_rate": 5.088071569931185e-07,
+      "loss": 0.5953,
+      "step": 3264
+    },
+    {
+      "epoch": 0.9688612099644128,
+      "grad_norm": 0.2431640625,
+      "learning_rate": 4.809550569196519e-07,
+      "loss": 0.5877,
+      "step": 3267
+    },
+    {
+      "epoch": 0.9697508896797153,
+      "grad_norm": 0.263671875,
+      "learning_rate": 4.5388503967683793e-07,
+      "loss": 0.5923,
+      "step": 3270
+    },
+    {
+      "epoch": 0.9706405693950177,
+      "grad_norm": 0.255859375,
+      "learning_rate": 4.275973179977855e-07,
+      "loss": 0.5958,
+      "step": 3273
+    },
+    {
+      "epoch": 0.9715302491103203,
+      "grad_norm": 0.271484375,
+      "learning_rate": 4.0209209846783224e-07,
+      "loss": 0.5977,
+      "step": 3276
+    },
+    {
+      "epoch": 0.9724199288256228,
+      "grad_norm": 0.265625,
+      "learning_rate": 3.773695815229239e-07,
+      "loss": 0.592,
+      "step": 3279
+    },
+    {
+      "epoch": 0.9733096085409253,
+      "grad_norm": 0.248046875,
+      "learning_rate": 3.534299614480596e-07,
+      "loss": 0.5702,
+      "step": 3282
+    },
+    {
+      "epoch": 0.9741992882562278,
+      "grad_norm": 0.255859375,
+      "learning_rate": 3.3027342637572676e-07,
+      "loss": 0.5893,
+      "step": 3285
+    },
+    {
+      "epoch": 0.9750889679715302,
+      "grad_norm": 0.251953125,
+      "learning_rate": 3.079001582844354e-07,
+      "loss": 0.6177,
+      "step": 3288
+    },
+    {
+      "epoch": 0.9759786476868327,
+      "grad_norm": 0.341796875,
+      "learning_rate": 2.8631033299730825e-07,
+      "loss": 0.6178,
+      "step": 3291
+    },
+    {
+      "epoch": 0.9768683274021353,
+      "grad_norm": 0.255859375,
+      "learning_rate": 2.655041201806707e-07,
+      "loss": 0.5924,
+      "step": 3294
+    },
+    {
+      "epoch": 0.9777580071174378,
+      "grad_norm": 0.259765625,
+      "learning_rate": 2.454816833427631e-07,
+      "loss": 0.6021,
+      "step": 3297
+    },
+    {
+      "epoch": 0.9786476868327402,
+      "grad_norm": 0.2578125,
+      "learning_rate": 2.2624317983239718e-07,
+      "loss": 0.6131,
+      "step": 3300
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 2.1603485806523056e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null