Training in progress, step 224, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33d1f902086bf162f42a891580c1d8f009b186a1b247fa2e5d0f3c8b552ca438
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:8184a10ca48807e22cc06e50ec6c504f76f4078fa26a373c09b1e614591a70dd
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffbe6c82a241eec13776bec8b0245f431cae9a909fe4fa531b5a99f34a39e259
 size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:27c59c970af91f75ad174c54b9036732a06ac597e37b07db4c76c33a0dbe1d9b
 size 240728084

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fb5768ef05fd83332549c08f206d2683f104437b7d63ada2cc0d97372b46d74
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ef7d41b67bbb64d8f59b1b890b60e98c43be1a525871d9f2311fa3b7b48a618
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.013447469810874753,
   "eval_steps": 500,
-  "global_step": 208,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1463,6 +1463,118 @@
       "learning_rate": 9.999704387106147e-06,
       "loss": 1.3735,
       "step": 208
     }
   ],
   "logging_steps": 1,
@@ -1482,7 +1594,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.334557816141824e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014481890565557426,
   "eval_steps": 500,
+  "global_step": 224,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.999704387106147e-06,
       "loss": 1.3735,
       "step": 208
+    },
+    {
+      "epoch": 0.013512121108042419,
+      "grad_norm": 4.388930320739746,
+      "learning_rate": 9.999693126780022e-06,
+      "loss": 1.4288,
+      "step": 209
+    },
+    {
+      "epoch": 0.013576772405210087,
+      "grad_norm": 3.7054548263549805,
+      "learning_rate": 9.999681655989203e-06,
+      "loss": 1.3322,
+      "step": 210
+    },
+    {
+      "epoch": 0.013641423702377753,
+      "grad_norm": 4.013354778289795,
+      "learning_rate": 9.999669974734172e-06,
+      "loss": 1.4192,
+      "step": 211
+    },
+    {
+      "epoch": 0.01370607499954542,
+      "grad_norm": 4.022690773010254,
+      "learning_rate": 9.999658083015423e-06,
+      "loss": 1.3474,
+      "step": 212
+    },
+    {
+      "epoch": 0.013770726296713087,
+      "grad_norm": 3.8308322429656982,
+      "learning_rate": 9.999645980833454e-06,
+      "loss": 1.3902,
+      "step": 213
+    },
+    {
+      "epoch": 0.013835377593880755,
+      "grad_norm": 4.453736305236816,
+      "learning_rate": 9.999633668188778e-06,
+      "loss": 1.4876,
+      "step": 214
+    },
+    {
+      "epoch": 0.013900028891048423,
+      "grad_norm": 4.379161834716797,
+      "learning_rate": 9.99962114508191e-06,
+      "loss": 1.369,
+      "step": 215
+    },
+    {
+      "epoch": 0.013964680188216089,
+      "grad_norm": 4.23476505279541,
+      "learning_rate": 9.999608411513378e-06,
+      "loss": 1.371,
+      "step": 216
+    },
+    {
+      "epoch": 0.014029331485383757,
+      "grad_norm": 3.8091630935668945,
+      "learning_rate": 9.999595467483719e-06,
+      "loss": 1.3317,
+      "step": 217
+    },
+    {
+      "epoch": 0.014093982782551423,
+      "grad_norm": 4.4740118980407715,
+      "learning_rate": 9.999582312993476e-06,
+      "loss": 1.3864,
+      "step": 218
+    },
+    {
+      "epoch": 0.01415863407971909,
+      "grad_norm": 3.8283002376556396,
+      "learning_rate": 9.999568948043206e-06,
+      "loss": 1.3924,
+      "step": 219
+    },
+    {
+      "epoch": 0.014223285376886757,
+      "grad_norm": 3.9413399696350098,
+      "learning_rate": 9.99955537263347e-06,
+      "loss": 1.365,
+      "step": 220
+    },
+    {
+      "epoch": 0.014287936674054424,
+      "grad_norm": 3.7700750827789307,
+      "learning_rate": 9.999541586764836e-06,
+      "loss": 1.3265,
+      "step": 221
+    },
+    {
+      "epoch": 0.01435258797122209,
+      "grad_norm": 4.468739986419678,
+      "learning_rate": 9.999527590437889e-06,
+      "loss": 1.4056,
+      "step": 222
+    },
+    {
+      "epoch": 0.014417239268389758,
+      "grad_norm": 3.847881555557251,
+      "learning_rate": 9.999513383653216e-06,
+      "loss": 1.3369,
+      "step": 223
+    },
+    {
+      "epoch": 0.014481890565557426,
+      "grad_norm": 4.515076637268066,
+      "learning_rate": 9.999498966411415e-06,
+      "loss": 1.3715,
+      "step": 224
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.437989735806894e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null