Training in progress, epoch 1, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b2187cfea95a2eca680241463ddf253050b5d1c67d3f6926c4c7461e2017e6b
 size 125918320

 version https://git-lfs.github.com/spec/v1
+oid sha256:e275e42a6e1a9d58cee44c2ad4d640cdf4c9a19deda114a48cace1ba0d46c984
 size 125918320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b11230822a821b7c8912e91766e9cf86ee2e3eaed9c397b82b4547c8d6d21bf1
 size 64684244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a24831531cd867bb57b3034524a71f4862c189ab56900454001f822fd0baa92c
 size 64684244

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39d6ba09f3ca3721d72b9f77c49ba758bdc2025a005905ac6b40e79eb7b4d2ed
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b149951a969917452b67be40996b6fadd1beb1de150bbcc21cd57e242cfb0b5d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:937b90285b79cb43fd1a74e2715c85320817d948429e0cf325617783fd5b98c7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e89df09ab5e0094d5655fa2536a552489bcab945628e8953beb9fd485709b6b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9971563981042654,
   "eval_steps": 66,
-  "global_step": 263,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -403,6 +403,14 @@
       "learning_rate": 7.208147179291192e-08,
       "loss": 1.0496,
       "step": 260
     }
   ],
   "logging_steps": 5,
@@ -417,12 +425,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.623917388229509e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0028436018957345,
   "eval_steps": 66,
+  "global_step": 264,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.208147179291192e-08,
       "loss": 1.0496,
       "step": 260
+    },
+    {
+      "epoch": 1.0028436018957345,
+      "eval_loss": 1.1479114294052124,
+      "eval_runtime": 15.1375,
+      "eval_samples_per_second": 7.333,
+      "eval_steps_per_second": 3.699,
+      "step": 264
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.631756088414044e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null