Training in progress, step 40, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e02bf26d5b2401ec7dc326297f3ee2388f15d11930d5efb6984ae8f6428a10f9
 size 22573704

 version https://git-lfs.github.com/spec/v1
+oid sha256:9258c3bf97aa3a5549055538a78ff5ecdd5a0381ae8cb44fed53bdf82e82eb7a
 size 22573704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da94be3ae0a64e853b92443c0a2c39df2e4402a3604ed63a25872c61f1cc51db
 size 11710970

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff4e7a95fff2b96fc767f188fcd9147729a5edc6157305cc2de75ce14a9af34f
 size 11710970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27827d7d71d66eac185d181a061a3fc686c05fceae71aefa31bdc9f272ad8dc6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8128afb4afe4aecc26d8f0be5e4c4ed9a96e2778b2735f61e9a821ba55be2be6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b01233c08a586038ebf1cf3e5cbb4f41b3484fab28bfbbe42cb46fd4e382bde
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:68b390e57be002933c68cbb0976c807a453fcfb48626c716bc0894f16432712e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7271688580513,
-  "best_model_checkpoint": "miner_id_24/checkpoint-30",
-  "epoch": 1.7142857142857144,
   "eval_steps": 10,
-  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -249,6 +249,84 @@
       "eval_samples_per_second": 7.185,
       "eval_steps_per_second": 1.916,
       "step": 30
     }
   ],
   "logging_steps": 1,
@@ -277,7 +355,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.154618474102784e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7075809240341187,
+  "best_model_checkpoint": "miner_id_24/checkpoint-40",
+  "epoch": 2.2857142857142856,
   "eval_steps": 10,
+  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.185,
       "eval_steps_per_second": 1.916,
       "step": 30
+    },
+    {
+      "epoch": 1.7714285714285714,
+      "grad_norm": 0.29218417406082153,
+      "learning_rate": 5.182610115288295e-05,
+      "loss": 0.6656,
+      "step": 31
+    },
+    {
+      "epoch": 1.8285714285714287,
+      "grad_norm": 0.30864810943603516,
+      "learning_rate": 4.817389884711705e-05,
+      "loss": 0.7106,
+      "step": 32
+    },
+    {
+      "epoch": 1.8857142857142857,
+      "grad_norm": 0.3442583382129669,
+      "learning_rate": 4.4531439581106295e-05,
+      "loss": 0.7097,
+      "step": 33
+    },
+    {
+      "epoch": 1.9428571428571428,
+      "grad_norm": 0.32284116744995117,
+      "learning_rate": 4.0918157451028185e-05,
+      "loss": 0.7053,
+      "step": 34
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2941528856754303,
+      "learning_rate": 3.735333088041596e-05,
+      "loss": 0.5891,
+      "step": 35
+    },
+    {
+      "epoch": 2.057142857142857,
+      "grad_norm": 0.3290832042694092,
+      "learning_rate": 3.38559797614277e-05,
+      "loss": 0.7012,
+      "step": 36
+    },
+    {
+      "epoch": 2.1142857142857143,
+      "grad_norm": 0.29661279916763306,
+      "learning_rate": 3.0444763975492208e-05,
+      "loss": 0.6034,
+      "step": 37
+    },
+    {
+      "epoch": 2.1714285714285713,
+      "grad_norm": 0.30858200788497925,
+      "learning_rate": 2.7137883834768073e-05,
+      "loss": 0.6878,
+      "step": 38
+    },
+    {
+      "epoch": 2.2285714285714286,
+      "grad_norm": 0.2789033353328705,
+      "learning_rate": 2.3952982975603496e-05,
+      "loss": 0.7019,
+      "step": 39
+    },
+    {
+      "epoch": 2.2857142857142856,
+      "grad_norm": 0.2946871519088745,
+      "learning_rate": 2.090705422210237e-05,
+      "loss": 0.6679,
+      "step": 40
+    },
+    {
+      "epoch": 2.2857142857142856,
+      "eval_loss": 0.7075809240341187,
+      "eval_runtime": 2.087,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 1.917,
+      "step": 40
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.539491298803712e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null