Training in progress, step 66, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4c89642d3974ac32b363375b0c99db7e653577a645e9394d16578bbd3f811ce
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:57d605d747554c71cbe250fcab62c12ab370e0b38a25c196944ddc1db3fa279f
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f08e4c00e5d14470d149eafe75d1eb465d97905745751e11615a482394b67e6
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:4757421031a41e975903d9c7b4a5ec102cd4415dc1a3026ccbca019c3edc0bb4
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7eee2554b0dfe5eea88b1462af07fa6ac9ec1fbfcec19ffc8ea53ce2646989b3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c26b0b4146138aaf5b78e3b2cb250411b85c27b3b8555c34400744d1a8ac0117
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:543acdb760aa588d117c1e17e502ab560a6edcfdd5fcf35ff688beb89941889e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf1bba0d0805ef69f5b1e1b44636f294c28a67c159c0f653da5821751e943f43
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.526315789473684,
   "eval_steps": 6,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -235,6 +235,28 @@
       "eval_samples_per_second": 51.847,
       "eval_steps_per_second": 6.481,
       "step": 60
     }
   ],
   "logging_steps": 3,
@@ -254,7 +276,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.161267083476992e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.7789473684210524,
   "eval_steps": 6,
+  "global_step": 66,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 51.847,
       "eval_steps_per_second": 6.481,
       "step": 60
+    },
+    {
+      "epoch": 2.6526315789473687,
+      "grad_norm": 4.04122257232666,
+      "learning_rate": 2.5548865107314607e-06,
+      "loss": 3.3633,
+      "step": 63
+    },
+    {
+      "epoch": 2.7789473684210524,
+      "grad_norm": 4.012021541595459,
+      "learning_rate": 1.1465185899987797e-06,
+      "loss": 3.1055,
+      "step": 66
+    },
+    {
+      "epoch": 2.7789473684210524,
+      "eval_loss": 3.200587749481201,
+      "eval_runtime": 0.7722,
+      "eval_samples_per_second": 51.8,
+      "eval_steps_per_second": 6.475,
+      "step": 66
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.2773937918246912e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null