Training in progress, step 110, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e54408fb2a33f5c91595c277d43af9ca7442d78555f6734133f909076eb5ee0b
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d47375795bb95c7810811251ed890d09b08485d56f154652a654c75ae9c485c
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef3fcc665ceb8166c03b90dbe8b812cc65aef67e5c4040511438f0bcec2d036b
 size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:45dee7d2b556b85d291e06e42e7c8f845e737ce91e3c96b980ce976a2402877c
 size 46057082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e023d57ffc9febc8fefa58b1faee1161f6172e0c816bc8ad1dc30d22145a166
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d010434ee16c14958906b5bf42c930dd1be27075db7b352777346c25649d79d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:398198b060b9edcfe93ff59de4a929b40cbc42323ec0afb0426f8d7b821a61c1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6011497974395752,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.028238616307800918,
   "eval_steps": 10,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -795,6 +795,84 @@
       "eval_samples_per_second": 5.575,
       "eval_steps_per_second": 5.575,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -809,7 +887,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -823,7 +901,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9788035851878400.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6011497974395752,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.03106247793858101,
   "eval_steps": 10,
+  "global_step": 110,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.575,
       "eval_steps_per_second": 5.575,
       "step": 100
+    },
+    {
+      "epoch": 0.028521002470878926,
+      "grad_norm": 1.1203055381774902,
+      "learning_rate": 0.00018345732537213027,
+      "loss": 1.7475,
+      "step": 101
+    },
+    {
+      "epoch": 0.028803388633956935,
+      "grad_norm": 1.274515986442566,
+      "learning_rate": 0.00018310240965131041,
+      "loss": 2.6023,
+      "step": 102
+    },
+    {
+      "epoch": 0.029085774797034947,
+      "grad_norm": 2.5792765617370605,
+      "learning_rate": 0.00018274407791591966,
+      "loss": 1.1908,
+      "step": 103
+    },
+    {
+      "epoch": 0.029368160960112955,
+      "grad_norm": 1.466035008430481,
+      "learning_rate": 0.00018238234489557215,
+      "loss": 0.7359,
+      "step": 104
+    },
+    {
+      "epoch": 0.029650547123190964,
+      "grad_norm": 3.4681172370910645,
+      "learning_rate": 0.0001820172254596956,
+      "loss": 1.8144,
+      "step": 105
+    },
+    {
+      "epoch": 0.029932933286268972,
+      "grad_norm": 4.0510993003845215,
+      "learning_rate": 0.00018164873461691986,
+      "loss": 0.7832,
+      "step": 106
+    },
+    {
+      "epoch": 0.03021531944934698,
+      "grad_norm": 5.226031303405762,
+      "learning_rate": 0.00018127688751446027,
+      "loss": 1.7575,
+      "step": 107
+    },
+    {
+      "epoch": 0.030497705612424993,
+      "grad_norm": 1.0487242937088013,
+      "learning_rate": 0.00018090169943749476,
+      "loss": 2.077,
+      "step": 108
+    },
+    {
+      "epoch": 0.030780091775503,
+      "grad_norm": 1.5338118076324463,
+      "learning_rate": 0.0001805231858085356,
+      "loss": 1.5191,
+      "step": 109
+    },
+    {
+      "epoch": 0.03106247793858101,
+      "grad_norm": 1.2566704750061035,
+      "learning_rate": 0.00018014136218679567,
+      "loss": 1.756,
+      "step": 110
+    },
+    {
+      "epoch": 0.03106247793858101,
+      "eval_loss": 1.6045676469802856,
+      "eval_runtime": 133.6566,
+      "eval_samples_per_second": 5.581,
+      "eval_steps_per_second": 5.581,
+      "step": 110
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.076683943706624e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null