Training in progress, step 4500, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a9cf64b63b7766902a74424e2b9849dd20993349b621f4131a344d2c85b4630
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:035738fa0b0cf26c1523a2cbaaa821bc209024897fb82257fbb088cffe5dd105
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32cf687af0a4ab1e094756b89de6b91731e958f35e735a064e6bab3e4368a447
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcdfe18bd5d30faf69277fc553b17fc82cc47a083c89239fa36292531ae3105b
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9c6ca92757145af39d20b47188651ac4ae897a7257a652d8a9acf8e3ccda307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d8f12723c1cb91835f294867ed69b1fbd53831c2f03d745ef893e5654a0aed9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.190580503833516,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -287,6 +287,41 @@
       "learning_rate": 2.0584966459246906e-05,
       "loss": 0.4994,
       "step": 4000
     }
   ],
   "logging_steps": 100,
@@ -306,7 +341,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.9236155642036552e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.4644030668127055,
   "eval_steps": 500,
+  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.0584966459246906e-05,
       "loss": 0.4994,
       "step": 4000
+    },
+    {
+      "epoch": 2.245345016429354,
+      "grad_norm": 1.987844467163086,
+      "learning_rate": 1.8069917595926504e-05,
+      "loss": 0.4941,
+      "step": 4100
+    },
+    {
+      "epoch": 2.3001095290251916,
+      "grad_norm": 1.8899997472763062,
+      "learning_rate": 1.568448484096205e-05,
+      "loss": 0.4926,
+      "step": 4200
+    },
+    {
+      "epoch": 2.3548740416210294,
+      "grad_norm": 2.2514610290527344,
+      "learning_rate": 1.3438351555220874e-05,
+      "loss": 0.4809,
+      "step": 4300
+    },
+    {
+      "epoch": 2.4096385542168672,
+      "grad_norm": 1.9894077777862549,
+      "learning_rate": 1.1340635631092428e-05,
+      "loss": 0.4834,
+      "step": 4400
+    },
+    {
+      "epoch": 2.4644030668127055,
+      "grad_norm": 2.134925603866577,
+      "learning_rate": 9.399852479563775e-06,
+      "loss": 0.494,
+      "step": 4500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 2.1638170935806853e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null