Training in progress, step 96, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd5ff5271312c471c1e5dff507f0c30a550d9a37628b9a6d4a570b4ea0de29a7
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:48183ccb0178125cc010c0512b0249661aa8bcda24238fa1ad703f3707aa9ee5
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28591169371e82e7643c9af0f37c54468a070ed22b024fbe67f162d95110bc52
 size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:330b34cef66bec5a4bb1e99534643208c6b0a5ea8737d041e4be17f7fef4502c
 size 240728084

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e648af67e02c768e1b8b9f76c18345c49f603e195d2b3ca0c4f4581aa1076ae
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc8a4187f1b7f884b08d389739f293db04ef58965aed4a5d8cc45be3c4d4da9c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005172103773413366,
   "eval_steps": 500,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -567,6 +567,118 @@
       "learning_rate": 5.161290322580646e-06,
       "loss": 1.3231,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -586,7 +698,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.178532622222131e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00620652452809604,
   "eval_steps": 500,
+  "global_step": 96,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.161290322580646e-06,
       "loss": 1.3231,
       "step": 80
+    },
+    {
+      "epoch": 0.005236755070581033,
+      "grad_norm": 6.0287041664123535,
+      "learning_rate": 5.2258064516129035e-06,
+      "loss": 1.4106,
+      "step": 81
+    },
+    {
+      "epoch": 0.0053014063677487,
+      "grad_norm": 5.727312088012695,
+      "learning_rate": 5.290322580645162e-06,
+      "loss": 1.52,
+      "step": 82
+    },
+    {
+      "epoch": 0.005366057664916367,
+      "grad_norm": 4.75112771987915,
+      "learning_rate": 5.35483870967742e-06,
+      "loss": 1.434,
+      "step": 83
+    },
+    {
+      "epoch": 0.005430708962084034,
+      "grad_norm": 5.614027500152588,
+      "learning_rate": 5.419354838709678e-06,
+      "loss": 1.4501,
+      "step": 84
+    },
+    {
+      "epoch": 0.005495360259251702,
+      "grad_norm": 6.246868133544922,
+      "learning_rate": 5.483870967741935e-06,
+      "loss": 1.4065,
+      "step": 85
+    },
+    {
+      "epoch": 0.005560011556419369,
+      "grad_norm": 4.8930559158325195,
+      "learning_rate": 5.548387096774194e-06,
+      "loss": 1.4059,
+      "step": 86
+    },
+    {
+      "epoch": 0.005624662853587036,
+      "grad_norm": 9.081551551818848,
+      "learning_rate": 5.612903225806452e-06,
+      "loss": 1.4045,
+      "step": 87
+    },
+    {
+      "epoch": 0.005689314150754703,
+      "grad_norm": 6.593941688537598,
+      "learning_rate": 5.677419354838711e-06,
+      "loss": 1.4229,
+      "step": 88
+    },
+    {
+      "epoch": 0.00575396544792237,
+      "grad_norm": 4.863624095916748,
+      "learning_rate": 5.7419354838709685e-06,
+      "loss": 1.4073,
+      "step": 89
+    },
+    {
+      "epoch": 0.005818616745090037,
+      "grad_norm": 5.167389392852783,
+      "learning_rate": 5.806451612903226e-06,
+      "loss": 1.5046,
+      "step": 90
+    },
+    {
+      "epoch": 0.005883268042257704,
+      "grad_norm": 4.816722869873047,
+      "learning_rate": 5.8709677419354835e-06,
+      "loss": 1.4358,
+      "step": 91
+    },
+    {
+      "epoch": 0.005947919339425371,
+      "grad_norm": 5.505555629730225,
+      "learning_rate": 5.935483870967742e-06,
+      "loss": 1.5727,
+      "step": 92
+    },
+    {
+      "epoch": 0.006012570636593038,
+      "grad_norm": 5.764698505401611,
+      "learning_rate": 6e-06,
+      "loss": 1.4799,
+      "step": 93
+    },
+    {
+      "epoch": 0.006077221933760706,
+      "grad_norm": 5.312406063079834,
+      "learning_rate": 6.064516129032259e-06,
+      "loss": 1.5233,
+      "step": 94
+    },
+    {
+      "epoch": 0.006141873230928373,
+      "grad_norm": 6.0215253829956055,
+      "learning_rate": 6.129032258064517e-06,
+      "loss": 1.3764,
+      "step": 95
+    },
+    {
+      "epoch": 0.00620652452809604,
+      "grad_norm": 6.582176208496094,
+      "learning_rate": 6.193548387096775e-06,
+      "loss": 1.3687,
+      "step": 96
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.186692359677542e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null