Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2db180a11a77d78f3e593b87d83a44b035440562a9d120d30d4b8671f5c0b6a6
 size 100690288

 version https://git-lfs.github.com/spec/v1
+oid sha256:efe115045ce3098b83208817be7b15f964e225849b3314c1641e3ca466a8d88c
 size 100690288

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a86b832964b8f87e25e6d8d00ddf36293ea9afd32b7885d43267570d1b84ab75
 size 51344890

 version https://git-lfs.github.com/spec/v1
+oid sha256:dcd9a4c591c4c6ff5e718585f3e79dfcd8b19bd364e519fc9fc4ae06512c890b
 size 51344890

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6965c5620d3e39226b1e45d33175dbc9ccb249fababa4515bfaf3b896d1f8c65
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8480a633bfcfa8c2d9fa818f135247c817e0614341bd17a78321f03a1149f95b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcf03b4d5469a652184718e32be2e70c8f65db8a0bf0774f129b38a7ee64ae50
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2eca4a3affde04f8094d886307f00c561b3e3a77570955ff1177732e06fbab0c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.2298707962036133,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.26881720430107525,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 22.759,
       "eval_steps_per_second": 5.714,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2414439353548800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.2126824855804443,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.35842293906810035,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.759,
       "eval_steps_per_second": 5.714,
       "step": 150
+    },
+    {
+      "epoch": 0.27060931899641577,
+      "grad_norm": 3.4328715801239014,
+      "learning_rate": 2.6021578947368423e-05,
+      "loss": 6.4233,
+      "step": 151
+    },
+    {
+      "epoch": 0.2724014336917563,
+      "grad_norm": 3.84840989112854,
+      "learning_rate": 2.5490526315789475e-05,
+      "loss": 6.5303,
+      "step": 152
+    },
+    {
+      "epoch": 0.27419354838709675,
+      "grad_norm": 4.179206848144531,
+      "learning_rate": 2.4959473684210524e-05,
+      "loss": 6.3638,
+      "step": 153
+    },
+    {
+      "epoch": 0.27598566308243727,
+      "grad_norm": 4.336339473724365,
+      "learning_rate": 2.442842105263158e-05,
+      "loss": 6.4631,
+      "step": 154
+    },
+    {
+      "epoch": 0.2777777777777778,
+      "grad_norm": 4.317768573760986,
+      "learning_rate": 2.389736842105263e-05,
+      "loss": 6.4562,
+      "step": 155
+    },
+    {
+      "epoch": 0.27956989247311825,
+      "grad_norm": 3.927757740020752,
+      "learning_rate": 2.3366315789473685e-05,
+      "loss": 6.3095,
+      "step": 156
+    },
+    {
+      "epoch": 0.28136200716845877,
+      "grad_norm": 4.185893535614014,
+      "learning_rate": 2.2835263157894738e-05,
+      "loss": 6.3723,
+      "step": 157
+    },
+    {
+      "epoch": 0.2831541218637993,
+      "grad_norm": 3.7943179607391357,
+      "learning_rate": 2.230421052631579e-05,
+      "loss": 6.3678,
+      "step": 158
+    },
+    {
+      "epoch": 0.2849462365591398,
+      "grad_norm": 3.7967092990875244,
+      "learning_rate": 2.1773157894736843e-05,
+      "loss": 6.4141,
+      "step": 159
+    },
+    {
+      "epoch": 0.2867383512544803,
+      "grad_norm": 3.7188453674316406,
+      "learning_rate": 2.1242105263157895e-05,
+      "loss": 6.0975,
+      "step": 160
+    },
+    {
+      "epoch": 0.2885304659498208,
+      "grad_norm": 3.597055673599243,
+      "learning_rate": 2.0711052631578947e-05,
+      "loss": 6.2374,
+      "step": 161
+    },
+    {
+      "epoch": 0.2903225806451613,
+      "grad_norm": 3.9644243717193604,
+      "learning_rate": 2.018e-05,
+      "loss": 6.4148,
+      "step": 162
+    },
+    {
+      "epoch": 0.2921146953405018,
+      "grad_norm": 3.8214528560638428,
+      "learning_rate": 1.9648947368421052e-05,
+      "loss": 6.7674,
+      "step": 163
+    },
+    {
+      "epoch": 0.2939068100358423,
+      "grad_norm": 4.147940158843994,
+      "learning_rate": 1.9117894736842105e-05,
+      "loss": 6.1552,
+      "step": 164
+    },
+    {
+      "epoch": 0.2956989247311828,
+      "grad_norm": 3.685476303100586,
+      "learning_rate": 1.8586842105263157e-05,
+      "loss": 6.6526,
+      "step": 165
+    },
+    {
+      "epoch": 0.2974910394265233,
+      "grad_norm": 3.927133560180664,
+      "learning_rate": 1.805578947368421e-05,
+      "loss": 6.4206,
+      "step": 166
+    },
+    {
+      "epoch": 0.2992831541218638,
+      "grad_norm": 3.8087570667266846,
+      "learning_rate": 1.7524736842105266e-05,
+      "loss": 5.7276,
+      "step": 167
+    },
+    {
+      "epoch": 0.3010752688172043,
+      "grad_norm": 3.9403533935546875,
+      "learning_rate": 1.6993684210526315e-05,
+      "loss": 6.2683,
+      "step": 168
+    },
+    {
+      "epoch": 0.30286738351254483,
+      "grad_norm": 4.542222023010254,
+      "learning_rate": 1.646263157894737e-05,
+      "loss": 6.7371,
+      "step": 169
+    },
+    {
+      "epoch": 0.3046594982078853,
+      "grad_norm": 4.154723167419434,
+      "learning_rate": 1.593157894736842e-05,
+      "loss": 6.3052,
+      "step": 170
+    },
+    {
+      "epoch": 0.3064516129032258,
+      "grad_norm": 4.047012805938721,
+      "learning_rate": 1.5400526315789475e-05,
+      "loss": 6.2573,
+      "step": 171
+    },
+    {
+      "epoch": 0.30824372759856633,
+      "grad_norm": 3.9153900146484375,
+      "learning_rate": 1.4869473684210524e-05,
+      "loss": 6.2081,
+      "step": 172
+    },
+    {
+      "epoch": 0.3100358422939068,
+      "grad_norm": 4.141908645629883,
+      "learning_rate": 1.4338421052631579e-05,
+      "loss": 6.3796,
+      "step": 173
+    },
+    {
+      "epoch": 0.3118279569892473,
+      "grad_norm": 4.091080188751221,
+      "learning_rate": 1.3807368421052633e-05,
+      "loss": 6.4765,
+      "step": 174
+    },
+    {
+      "epoch": 0.31362007168458783,
+      "grad_norm": 4.347208499908447,
+      "learning_rate": 1.3276315789473684e-05,
+      "loss": 6.2807,
+      "step": 175
+    },
+    {
+      "epoch": 0.3154121863799283,
+      "grad_norm": 3.9828412532806396,
+      "learning_rate": 1.2745263157894738e-05,
+      "loss": 5.8736,
+      "step": 176
+    },
+    {
+      "epoch": 0.3172043010752688,
+      "grad_norm": 4.449699878692627,
+      "learning_rate": 1.221421052631579e-05,
+      "loss": 6.8993,
+      "step": 177
+    },
+    {
+      "epoch": 0.31899641577060933,
+      "grad_norm": 4.073231220245361,
+      "learning_rate": 1.1683157894736843e-05,
+      "loss": 6.0161,
+      "step": 178
+    },
+    {
+      "epoch": 0.3207885304659498,
+      "grad_norm": 3.944138526916504,
+      "learning_rate": 1.1152105263157895e-05,
+      "loss": 6.4192,
+      "step": 179
+    },
+    {
+      "epoch": 0.3225806451612903,
+      "grad_norm": 4.515273094177246,
+      "learning_rate": 1.0621052631578948e-05,
+      "loss": 6.8505,
+      "step": 180
+    },
+    {
+      "epoch": 0.32437275985663083,
+      "grad_norm": 4.274086952209473,
+      "learning_rate": 1.009e-05,
+      "loss": 6.6451,
+      "step": 181
+    },
+    {
+      "epoch": 0.32616487455197135,
+      "grad_norm": 4.1749114990234375,
+      "learning_rate": 9.558947368421052e-06,
+      "loss": 6.1817,
+      "step": 182
+    },
+    {
+      "epoch": 0.3279569892473118,
+      "grad_norm": 4.580990314483643,
+      "learning_rate": 9.027894736842105e-06,
+      "loss": 6.6129,
+      "step": 183
+    },
+    {
+      "epoch": 0.32974910394265233,
+      "grad_norm": 4.359395503997803,
+      "learning_rate": 8.496842105263157e-06,
+      "loss": 6.2116,
+      "step": 184
+    },
+    {
+      "epoch": 0.33154121863799285,
+      "grad_norm": 4.21564245223999,
+      "learning_rate": 7.96578947368421e-06,
+      "loss": 6.1489,
+      "step": 185
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 4.635838508605957,
+      "learning_rate": 7.434736842105262e-06,
+      "loss": 6.2616,
+      "step": 186
+    },
+    {
+      "epoch": 0.33512544802867383,
+      "grad_norm": 4.36469841003418,
+      "learning_rate": 6.903684210526316e-06,
+      "loss": 6.8617,
+      "step": 187
+    },
+    {
+      "epoch": 0.33691756272401435,
+      "grad_norm": 4.288735389709473,
+      "learning_rate": 6.372631578947369e-06,
+      "loss": 6.4591,
+      "step": 188
+    },
+    {
+      "epoch": 0.3387096774193548,
+      "grad_norm": 4.40194845199585,
+      "learning_rate": 5.841578947368421e-06,
+      "loss": 6.7289,
+      "step": 189
+    },
+    {
+      "epoch": 0.34050179211469533,
+      "grad_norm": 4.413331508636475,
+      "learning_rate": 5.310526315789474e-06,
+      "loss": 6.6116,
+      "step": 190
+    },
+    {
+      "epoch": 0.34229390681003585,
+      "grad_norm": 4.823085784912109,
+      "learning_rate": 4.779473684210526e-06,
+      "loss": 6.35,
+      "step": 191
+    },
+    {
+      "epoch": 0.34408602150537637,
+      "grad_norm": 5.074087142944336,
+      "learning_rate": 4.248421052631579e-06,
+      "loss": 6.8554,
+      "step": 192
+    },
+    {
+      "epoch": 0.34587813620071683,
+      "grad_norm": 4.536152362823486,
+      "learning_rate": 3.717368421052631e-06,
+      "loss": 6.1555,
+      "step": 193
+    },
+    {
+      "epoch": 0.34767025089605735,
+      "grad_norm": 4.731776714324951,
+      "learning_rate": 3.1863157894736844e-06,
+      "loss": 6.5222,
+      "step": 194
+    },
+    {
+      "epoch": 0.34946236559139787,
+      "grad_norm": 4.665464401245117,
+      "learning_rate": 2.655263157894737e-06,
+      "loss": 6.4625,
+      "step": 195
+    },
+    {
+      "epoch": 0.35125448028673834,
+      "grad_norm": 5.292746067047119,
+      "learning_rate": 2.1242105263157893e-06,
+      "loss": 6.6233,
+      "step": 196
+    },
+    {
+      "epoch": 0.35304659498207885,
+      "grad_norm": 5.008660793304443,
+      "learning_rate": 1.5931578947368422e-06,
+      "loss": 6.4388,
+      "step": 197
+    },
+    {
+      "epoch": 0.3548387096774194,
+      "grad_norm": 5.119359493255615,
+      "learning_rate": 1.0621052631578947e-06,
+      "loss": 6.3227,
+      "step": 198
+    },
+    {
+      "epoch": 0.35663082437275984,
+      "grad_norm": 5.395398139953613,
+      "learning_rate": 5.310526315789473e-07,
+      "loss": 6.3648,
+      "step": 199
+    },
+    {
+      "epoch": 0.35842293906810035,
+      "grad_norm": 6.391620635986328,
+      "learning_rate": 0.0,
+      "loss": 6.0084,
+      "step": 200
+    },
+    {
+      "epoch": 0.35842293906810035,
+      "eval_loss": 3.2126824855804443,
+      "eval_runtime": 10.1929,
+      "eval_samples_per_second": 23.055,
+      "eval_steps_per_second": 5.788,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3219252471398400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null