Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1da8e4244ee2f29a6dd33b2b3f28a7b7a5fe10e585143763c29826ce9bfeddb
 size 100690288

 version https://git-lfs.github.com/spec/v1
+oid sha256:40db45d3d9a9857f9142fd99bf12f5de7d021ad0bb07bf1624cb1fb9f89b67d6
 size 100690288

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1ea54593dd06c791a2c63a2fc1d0d901d7dd3db312a2f7aee789fcfe12cf21f
 size 51344890

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfab43f9e4c386aa90887de7268be690d2c532b1ba0c242d927c8830c5ac66ae
 size 51344890

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d47344633e98a7e34880dcd6bf08f83d6265edf432ca9c24660ea237b07925a4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a23a0688584fbad5072f2472656b5a3972761d27e8aae70de57e12639dc777
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faf4b7c5498ae4c4e5a296fe320aca480fc4724dd9eaa1c7badb2de02f26fafb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:36c6d837152cc4853064add4c61272c62a18db4bbfe14e332929026ad210cee2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.7836493253707886,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5474452554744526,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.353,
       "eval_steps_per_second": 3.088,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3484840800288768.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.7836493253707886,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.7299270072992701,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.353,
       "eval_steps_per_second": 3.088,
       "step": 150
+    },
+    {
+      "epoch": 0.551094890510949,
+      "grad_norm": 2.173349142074585,
+      "learning_rate": 2.6073157894736845e-05,
+      "loss": 3.5216,
+      "step": 151
+    },
+    {
+      "epoch": 0.5547445255474452,
+      "grad_norm": 1.9568188190460205,
+      "learning_rate": 2.554105263157895e-05,
+      "loss": 3.414,
+      "step": 152
+    },
+    {
+      "epoch": 0.5583941605839416,
+      "grad_norm": 2.049314022064209,
+      "learning_rate": 2.5008947368421052e-05,
+      "loss": 3.1854,
+      "step": 153
+    },
+    {
+      "epoch": 0.5620437956204379,
+      "grad_norm": 1.8505172729492188,
+      "learning_rate": 2.447684210526316e-05,
+      "loss": 3.7311,
+      "step": 154
+    },
+    {
+      "epoch": 0.5656934306569343,
+      "grad_norm": 1.7153427600860596,
+      "learning_rate": 2.3944736842105262e-05,
+      "loss": 3.1837,
+      "step": 155
+    },
+    {
+      "epoch": 0.5693430656934306,
+      "grad_norm": 1.6411361694335938,
+      "learning_rate": 2.341263157894737e-05,
+      "loss": 3.521,
+      "step": 156
+    },
+    {
+      "epoch": 0.572992700729927,
+      "grad_norm": 1.6039944887161255,
+      "learning_rate": 2.2880526315789475e-05,
+      "loss": 3.4806,
+      "step": 157
+    },
+    {
+      "epoch": 0.5766423357664233,
+      "grad_norm": 1.678540587425232,
+      "learning_rate": 2.234842105263158e-05,
+      "loss": 3.1553,
+      "step": 158
+    },
+    {
+      "epoch": 0.5802919708029197,
+      "grad_norm": 1.57163405418396,
+      "learning_rate": 2.1816315789473685e-05,
+      "loss": 4.0207,
+      "step": 159
+    },
+    {
+      "epoch": 0.583941605839416,
+      "grad_norm": 1.9761027097702026,
+      "learning_rate": 2.128421052631579e-05,
+      "loss": 3.7772,
+      "step": 160
+    },
+    {
+      "epoch": 0.5875912408759124,
+      "grad_norm": 2.047546863555908,
+      "learning_rate": 2.0752105263157895e-05,
+      "loss": 3.4518,
+      "step": 161
+    },
+    {
+      "epoch": 0.5912408759124088,
+      "grad_norm": 1.9257515668869019,
+      "learning_rate": 2.0220000000000003e-05,
+      "loss": 3.3506,
+      "step": 162
+    },
+    {
+      "epoch": 0.5948905109489051,
+      "grad_norm": 2.5686001777648926,
+      "learning_rate": 1.9687894736842104e-05,
+      "loss": 3.4314,
+      "step": 163
+    },
+    {
+      "epoch": 0.5985401459854015,
+      "grad_norm": 1.6636919975280762,
+      "learning_rate": 1.9155789473684213e-05,
+      "loss": 3.6784,
+      "step": 164
+    },
+    {
+      "epoch": 0.6021897810218978,
+      "grad_norm": 1.5030348300933838,
+      "learning_rate": 1.8623684210526314e-05,
+      "loss": 3.0561,
+      "step": 165
+    },
+    {
+      "epoch": 0.6058394160583942,
+      "grad_norm": 1.5657211542129517,
+      "learning_rate": 1.8091578947368423e-05,
+      "loss": 2.7246,
+      "step": 166
+    },
+    {
+      "epoch": 0.6094890510948905,
+      "grad_norm": 1.5702214241027832,
+      "learning_rate": 1.7559473684210528e-05,
+      "loss": 2.7973,
+      "step": 167
+    },
+    {
+      "epoch": 0.6131386861313869,
+      "grad_norm": 1.8857451677322388,
+      "learning_rate": 1.7027368421052632e-05,
+      "loss": 3.6129,
+      "step": 168
+    },
+    {
+      "epoch": 0.6167883211678832,
+      "grad_norm": 1.5984008312225342,
+      "learning_rate": 1.6495263157894737e-05,
+      "loss": 3.2637,
+      "step": 169
+    },
+    {
+      "epoch": 0.6204379562043796,
+      "grad_norm": 1.8663055896759033,
+      "learning_rate": 1.5963157894736842e-05,
+      "loss": 3.5648,
+      "step": 170
+    },
+    {
+      "epoch": 0.6240875912408759,
+      "grad_norm": 2.12020206451416,
+      "learning_rate": 1.5431052631578947e-05,
+      "loss": 4.199,
+      "step": 171
+    },
+    {
+      "epoch": 0.6277372262773723,
+      "grad_norm": 1.8056983947753906,
+      "learning_rate": 1.4898947368421052e-05,
+      "loss": 3.4812,
+      "step": 172
+    },
+    {
+      "epoch": 0.6313868613138686,
+      "grad_norm": 1.6429282426834106,
+      "learning_rate": 1.4366842105263159e-05,
+      "loss": 3.5187,
+      "step": 173
+    },
+    {
+      "epoch": 0.635036496350365,
+      "grad_norm": 1.8332836627960205,
+      "learning_rate": 1.3834736842105265e-05,
+      "loss": 3.9151,
+      "step": 174
+    },
+    {
+      "epoch": 0.6386861313868614,
+      "grad_norm": 1.760815143585205,
+      "learning_rate": 1.3302631578947369e-05,
+      "loss": 3.1158,
+      "step": 175
+    },
+    {
+      "epoch": 0.6423357664233577,
+      "grad_norm": 1.793931484222412,
+      "learning_rate": 1.2770526315789475e-05,
+      "loss": 3.2062,
+      "step": 176
+    },
+    {
+      "epoch": 0.6459854014598541,
+      "grad_norm": 1.727136254310608,
+      "learning_rate": 1.223842105263158e-05,
+      "loss": 3.2026,
+      "step": 177
+    },
+    {
+      "epoch": 0.6496350364963503,
+      "grad_norm": 2.5563066005706787,
+      "learning_rate": 1.1706315789473685e-05,
+      "loss": 4.4599,
+      "step": 178
+    },
+    {
+      "epoch": 0.6532846715328468,
+      "grad_norm": 1.7797980308532715,
+      "learning_rate": 1.117421052631579e-05,
+      "loss": 3.446,
+      "step": 179
+    },
+    {
+      "epoch": 0.656934306569343,
+      "grad_norm": 1.8188791275024414,
+      "learning_rate": 1.0642105263157895e-05,
+      "loss": 3.7353,
+      "step": 180
+    },
+    {
+      "epoch": 0.6605839416058394,
+      "grad_norm": 1.6830250024795532,
+      "learning_rate": 1.0110000000000001e-05,
+      "loss": 3.3371,
+      "step": 181
+    },
+    {
+      "epoch": 0.6642335766423357,
+      "grad_norm": 2.488910675048828,
+      "learning_rate": 9.577894736842106e-06,
+      "loss": 4.4411,
+      "step": 182
+    },
+    {
+      "epoch": 0.6678832116788321,
+      "grad_norm": 1.7048317193984985,
+      "learning_rate": 9.045789473684211e-06,
+      "loss": 3.4593,
+      "step": 183
+    },
+    {
+      "epoch": 0.6715328467153284,
+      "grad_norm": 2.0829358100891113,
+      "learning_rate": 8.513684210526316e-06,
+      "loss": 4.0263,
+      "step": 184
+    },
+    {
+      "epoch": 0.6751824817518248,
+      "grad_norm": 2.3267061710357666,
+      "learning_rate": 7.981578947368421e-06,
+      "loss": 3.8797,
+      "step": 185
+    },
+    {
+      "epoch": 0.6788321167883211,
+      "grad_norm": 2.366909980773926,
+      "learning_rate": 7.449473684210526e-06,
+      "loss": 3.7547,
+      "step": 186
+    },
+    {
+      "epoch": 0.6824817518248175,
+      "grad_norm": 2.0214099884033203,
+      "learning_rate": 6.917368421052633e-06,
+      "loss": 4.1961,
+      "step": 187
+    },
+    {
+      "epoch": 0.6861313868613139,
+      "grad_norm": 3.4042675495147705,
+      "learning_rate": 6.385263157894738e-06,
+      "loss": 4.0792,
+      "step": 188
+    },
+    {
+      "epoch": 0.6897810218978102,
+      "grad_norm": 2.2859408855438232,
+      "learning_rate": 5.8531578947368425e-06,
+      "loss": 4.2271,
+      "step": 189
+    },
+    {
+      "epoch": 0.6934306569343066,
+      "grad_norm": 3.462792158126831,
+      "learning_rate": 5.321052631578947e-06,
+      "loss": 5.6922,
+      "step": 190
+    },
+    {
+      "epoch": 0.6970802919708029,
+      "grad_norm": 2.6065616607666016,
+      "learning_rate": 4.788947368421053e-06,
+      "loss": 4.1072,
+      "step": 191
+    },
+    {
+      "epoch": 0.7007299270072993,
+      "grad_norm": 2.6013782024383545,
+      "learning_rate": 4.256842105263158e-06,
+      "loss": 3.5596,
+      "step": 192
+    },
+    {
+      "epoch": 0.7043795620437956,
+      "grad_norm": 3.0235507488250732,
+      "learning_rate": 3.724736842105263e-06,
+      "loss": 3.9284,
+      "step": 193
+    },
+    {
+      "epoch": 0.708029197080292,
+      "grad_norm": 3.959134340286255,
+      "learning_rate": 3.192631578947369e-06,
+      "loss": 3.8374,
+      "step": 194
+    },
+    {
+      "epoch": 0.7116788321167883,
+      "grad_norm": 4.039380073547363,
+      "learning_rate": 2.6605263157894737e-06,
+      "loss": 4.4352,
+      "step": 195
+    },
+    {
+      "epoch": 0.7153284671532847,
+      "grad_norm": 5.668715953826904,
+      "learning_rate": 2.128421052631579e-06,
+      "loss": 3.3006,
+      "step": 196
+    },
+    {
+      "epoch": 0.718978102189781,
+      "grad_norm": 7.316230773925781,
+      "learning_rate": 1.5963157894736844e-06,
+      "loss": 3.4598,
+      "step": 197
+    },
+    {
+      "epoch": 0.7226277372262774,
+      "grad_norm": 5.929181098937988,
+      "learning_rate": 1.0642105263157895e-06,
+      "loss": 3.8279,
+      "step": 198
+    },
+    {
+      "epoch": 0.7262773722627737,
+      "grad_norm": 8.298709869384766,
+      "learning_rate": 5.321052631578948e-07,
+      "loss": 5.2993,
+      "step": 199
+    },
+    {
+      "epoch": 0.7299270072992701,
+      "grad_norm": 10.71786880493164,
+      "learning_rate": 0.0,
+      "loss": 5.1007,
+      "step": 200
+    },
+    {
+      "epoch": 0.7299270072992701,
+      "eval_loss": 1.7919894456863403,
+      "eval_runtime": 8.7871,
+      "eval_samples_per_second": 13.201,
+      "eval_steps_per_second": 3.3,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4684012345884672.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null