Training in progress, step 3500, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee0f7f6efa259946627904facc17872a7c399f185f2fa3c1cfc100a6322fb601
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4b4b7b2eecafa10d9b9fd56b2f27ca11649ae805630c3be6b59bc780c927216
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0539557cdcb1b310091e9a740e6565900f4359018a39a195ddb6d9bc8daf4959
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bdf2aa53843cdc6f0c24817577e70f22e6f55e70f6c0d0457f3479129a5bcb5
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd393a0ba2edec99a3162827022ce552435f0d62f37eef791559e57aae2a12ab
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:27172d5cc1af6ec98dccb94fbc3511967f5f7dd8f8c183140f65b475db95b940
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.642935377875137,
   "eval_steps": 500,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -217,6 +217,41 @@
       "learning_rate": 8.490988587906137e-05,
       "loss": 1.1102,
       "step": 3000
     }
   ],
   "logging_steps": 100,
@@ -236,7 +271,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4496265837006356e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9167579408543265,
   "eval_steps": 500,
+  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.490988587906137e-05,
       "loss": 1.1102,
       "step": 3000
+    },
+    {
+      "epoch": 1.6976998904709748,
+      "grad_norm": 2.2829463481903076,
+      "learning_rate": 8.351615348978318e-05,
+      "loss": 1.1058,
+      "step": 3100
+    },
+    {
+      "epoch": 1.7524644030668126,
+      "grad_norm": 2.018911600112915,
+      "learning_rate": 8.207343482294323e-05,
+      "loss": 1.0693,
+      "step": 3200
+    },
+    {
+      "epoch": 1.8072289156626506,
+      "grad_norm": 1.948006272315979,
+      "learning_rate": 8.058383851601027e-05,
+      "loss": 1.0797,
+      "step": 3300
+    },
+    {
+      "epoch": 1.8619934282584885,
+      "grad_norm": 1.9828593730926514,
+      "learning_rate": 7.904954172150776e-05,
+      "loss": 1.0454,
+      "step": 3400
+    },
+    {
+      "epoch": 1.9167579408543265,
+      "grad_norm": 2.2209079265594482,
+      "learning_rate": 7.747278692494825e-05,
+      "loss": 1.0665,
+      "step": 3500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.6878032741533286e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null