Training in progress, step 2000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14cdf63f9857c0c17b44633ed454a57ec4a30ac3853b01a99ca4717295569997
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:32df347248b5349bd6c574534672abf58d2bb982eafe6ce6c8101d7540872441
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5d28a06d64d9e70ea8356bfe22b851b559685110dfee146f5ddc6e60d3483ae
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:05a44e113261b47efcfd265b60b329863eac807f5a9d7b9a6023a3e43ec03fd1
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3a42096078057ba9ff0a08ad02fe743f141bc5106a70b702caf3aca78f6eeb9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:216cbaf423629714bf2756e789fee139a891bbb0080333ef39b536a8f622bd50
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7159904534606205,
   "eval_steps": 500,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -112,6 +112,41 @@
       "learning_rate": 2.263423624322326e-05,
       "loss": 1.264,
       "step": 1500
     }
   ],
   "logging_steps": 100,
@@ -131,7 +166,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.170110002381455e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.954653937947494,
   "eval_steps": 500,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.263423624322326e-05,
       "loss": 1.264,
       "step": 1500
+    },
+    {
+      "epoch": 0.7637231503579952,
+      "grad_norm": 1.8664114475250244,
+      "learning_rate": 1.6071437899220688e-05,
+      "loss": 1.2934,
+      "step": 1600
+    },
+    {
+      "epoch": 0.8114558472553699,
+      "grad_norm": 2.2168829441070557,
+      "learning_rate": 1.0448875811934417e-05,
+      "loss": 1.2624,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8591885441527446,
+      "grad_norm": 2.4389848709106445,
+      "learning_rate": 5.922363738351888e-06,
+      "loss": 1.2715,
+      "step": 1800
+    },
+    {
+      "epoch": 0.9069212410501193,
+      "grad_norm": 2.066229820251465,
+      "learning_rate": 2.6173414408598827e-06,
+      "loss": 1.2322,
+      "step": 1900
+    },
+    {
+      "epoch": 0.954653937947494,
+      "grad_norm": 1.689720869064331,
+      "learning_rate": 6.253984711796612e-07,
+      "loss": 1.2093,
+      "step": 2000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 9.602614826159309e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null