Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +0 -0
scheduler.pt +0 -0
trainer_state.json +213 -3

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0babfe95faee265d9d577fe2c56d57a9b4c5e68eb45a4725029da76c9390da22
 size 1426462208

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2ccb1201adc7ba3dce45c039a56a518750657ba48b9dc6407631be5a9ec0851
 size 1426462208

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d871bdcaf06084e3588a76936a4a0f38f2be601a3808b153e7b50db116c96d9
 size 2853107898

 version https://git-lfs.github.com/spec/v1
+oid sha256:a11c2381fc62f5a000590233a5f5371a956d9b0994b73d666ec134c93e250cb3
 size 2853107898

rng_state.pth CHANGED Viewed

Binary files a/rng_state.pth and b/rng_state.pth differ

scheduler.pt CHANGED Viewed

Binary files a/scheduler.pt and b/scheduler.pt differ

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.046627524414171784,
   "eval_steps": 500,
-  "global_step": 100000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -77,6 +77,216 @@
       "learning_rate": 4.766941644720645e-05,
       "loss": 2.9423,
       "step": 100000
     }
   ],
   "logging_steps": 10000,
@@ -96,7 +306,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.465742773984821e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.18651009765668713,
   "eval_steps": 500,
+  "global_step": 400000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.766941644720645e-05,
       "loss": 2.9423,
       "step": 100000
+    },
+    {
+      "epoch": 0.051290276855588963,
+      "grad_norm": 1.2270045280456543,
+      "learning_rate": 4.7436372080184424e-05,
+      "loss": 2.9127,
+      "step": 110000
+    },
+    {
+      "epoch": 0.05595302929700614,
+      "grad_norm": 1.0300427675247192,
+      "learning_rate": 4.7203327713162395e-05,
+      "loss": 2.8823,
+      "step": 120000
+    },
+    {
+      "epoch": 0.060615781738423316,
+      "grad_norm": 0.8841068148612976,
+      "learning_rate": 4.6970283346140366e-05,
+      "loss": 2.8588,
+      "step": 130000
+    },
+    {
+      "epoch": 0.0652785341798405,
+      "grad_norm": 1.0524730682373047,
+      "learning_rate": 4.673721566535613e-05,
+      "loss": 2.8425,
+      "step": 140000
+    },
+    {
+      "epoch": 0.06994128662125768,
+      "grad_norm": 0.9874018430709839,
+      "learning_rate": 4.650417129833409e-05,
+      "loss": 2.8257,
+      "step": 150000
+    },
+    {
+      "epoch": 0.07460403906267485,
+      "grad_norm": 0.9634119272232056,
+      "learning_rate": 4.6271126931312064e-05,
+      "loss": 2.8114,
+      "step": 160000
+    },
+    {
+      "epoch": 0.07926679150409204,
+      "grad_norm": 0.885671854019165,
+      "learning_rate": 4.603808256429003e-05,
+      "loss": 2.7911,
+      "step": 170000
+    },
+    {
+      "epoch": 0.08392954394550921,
+      "grad_norm": 1.0135940313339233,
+      "learning_rate": 4.58050148835058e-05,
+      "loss": 2.7786,
+      "step": 180000
+    },
+    {
+      "epoch": 0.0885922963869264,
+      "grad_norm": 1.011932611465454,
+      "learning_rate": 4.557194720272156e-05,
+      "loss": 2.7636,
+      "step": 190000
+    },
+    {
+      "epoch": 0.09325504882834357,
+      "grad_norm": 0.7796096205711365,
+      "learning_rate": 4.533892614946173e-05,
+      "loss": 2.753,
+      "step": 200000
+    },
+    {
+      "epoch": 0.09791780126976074,
+      "grad_norm": 1.1194034814834595,
+      "learning_rate": 4.5105858468677495e-05,
+      "loss": 2.7371,
+      "step": 210000
+    },
+    {
+      "epoch": 0.10258055371117793,
+      "grad_norm": 1.1135520935058594,
+      "learning_rate": 4.4872814101655467e-05,
+      "loss": 2.7285,
+      "step": 220000
+    },
+    {
+      "epoch": 0.1072433061525951,
+      "grad_norm": 0.7772097587585449,
+      "learning_rate": 4.463976973463344e-05,
+      "loss": 2.7173,
+      "step": 230000
+    },
+    {
+      "epoch": 0.11190605859401229,
+      "grad_norm": 1.096358299255371,
+      "learning_rate": 4.44067020538492e-05,
+      "loss": 2.7141,
+      "step": 240000
+    },
+    {
+      "epoch": 0.11656881103542946,
+      "grad_norm": 0.8112640380859375,
+      "learning_rate": 4.417363437306496e-05,
+      "loss": 2.7073,
+      "step": 250000
+    },
+    {
+      "epoch": 0.12123156347684663,
+      "grad_norm": 1.0931545495986938,
+      "learning_rate": 4.394059000604293e-05,
+      "loss": 2.6931,
+      "step": 260000
+    },
+    {
+      "epoch": 0.1258943159182638,
+      "grad_norm": 1.1369918584823608,
+      "learning_rate": 4.37075456390209e-05,
+      "loss": 2.6824,
+      "step": 270000
+    },
+    {
+      "epoch": 0.130557068359681,
+      "grad_norm": 1.258300542831421,
+      "learning_rate": 4.347450127199887e-05,
+      "loss": 2.6741,
+      "step": 280000
+    },
+    {
+      "epoch": 0.13521982080109818,
+      "grad_norm": 0.9752686023712158,
+      "learning_rate": 4.324143359121463e-05,
+      "loss": 2.6645,
+      "step": 290000
+    },
+    {
+      "epoch": 0.13988257324251535,
+      "grad_norm": 1.0001367330551147,
+      "learning_rate": 4.3008412537954805e-05,
+      "loss": 2.6592,
+      "step": 300000
+    },
+    {
+      "epoch": 0.14454532568393252,
+      "grad_norm": 1.0314422845840454,
+      "learning_rate": 4.277534485717057e-05,
+      "loss": 2.6523,
+      "step": 310000
+    },
+    {
+      "epoch": 0.1492080781253497,
+      "grad_norm": 0.9287506937980652,
+      "learning_rate": 4.254230049014854e-05,
+      "loss": 2.6503,
+      "step": 320000
+    },
+    {
+      "epoch": 0.1538708305667669,
+      "grad_norm": 0.8209073543548584,
+      "learning_rate": 4.23092328093643e-05,
+      "loss": 2.6379,
+      "step": 330000
+    },
+    {
+      "epoch": 0.15853358300818407,
+      "grad_norm": 0.8727386593818665,
+      "learning_rate": 4.207618844234227e-05,
+      "loss": 2.6332,
+      "step": 340000
+    },
+    {
+      "epoch": 0.16319633544960124,
+      "grad_norm": 0.9841961860656738,
+      "learning_rate": 4.184314407532024e-05,
+      "loss": 2.6279,
+      "step": 350000
+    },
+    {
+      "epoch": 0.16785908789101842,
+      "grad_norm": 0.7831237316131592,
+      "learning_rate": 4.1610099708298214e-05,
+      "loss": 2.6237,
+      "step": 360000
+    },
+    {
+      "epoch": 0.1725218403324356,
+      "grad_norm": 0.9184048175811768,
+      "learning_rate": 4.137707865503839e-05,
+      "loss": 2.6167,
+      "step": 370000
+    },
+    {
+      "epoch": 0.1771845927738528,
+      "grad_norm": 0.9598727822303772,
+      "learning_rate": 4.114401097425415e-05,
+      "loss": 2.6082,
+      "step": 380000
+    },
+    {
+      "epoch": 0.18184734521526996,
+      "grad_norm": 0.8814136981964111,
+      "learning_rate": 4.0910966607232115e-05,
+      "loss": 2.6027,
+      "step": 390000
+    },
+    {
+      "epoch": 0.18651009765668713,
+      "grad_norm": 0.9080318212509155,
+      "learning_rate": 4.067789892644788e-05,
+      "loss": 2.6014,
+      "step": 400000
     }
   ],
   "logging_steps": 10000,
       "attributes": {}
     }
   },
+  "total_flos": 9.86304678504589e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null