Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29142b2af43397ba4eb661020a07771954e4c00496a9ce872fc98564ffc502a0
 size 201353800

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e73d29622776437c0256527ddb10e4640e9bd3a8555fefd0cab122f644a85c0
 size 201353800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cce5d4951f09ce708e039957fe4662de516402b9e792813b51a64c56a403819
 size 102462970

 version https://git-lfs.github.com/spec/v1
+oid sha256:afaed2a97b90d242fdd45df86d18a38da66b3665b11198a6a194e61f8e2312ec
 size 102462970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:996dea7235f98847320dc87d217372999c52faed6d2e99ee338d3b9b79f96574
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6d0b025d07824687b5776aa5cc8693b176f48dc557b61da39ee8e6f5e4a6eda
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22c9dfa784729c93db12d225bfc25c64e7ae0e1e9f4be7b45dc255fae6ea42c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e90d1a9917ccbc9819e77d754e81c093aaa1bcc13b46bc6c7bb4bcae17159bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7120175361633301,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.6342494714587738,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 17.546,
       "eval_steps_per_second": 4.387,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9281870600601600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6885719895362854,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.8456659619450317,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.546,
       "eval_steps_per_second": 4.387,
       "step": 150
+    },
+    {
+      "epoch": 0.638477801268499,
+      "grad_norm": 3.7120771408081055,
+      "learning_rate": 2.586684210526316e-05,
+      "loss": 1.6074,
+      "step": 151
+    },
+    {
+      "epoch": 0.642706131078224,
+      "grad_norm": 4.883687973022461,
+      "learning_rate": 2.5338947368421054e-05,
+      "loss": 1.7242,
+      "step": 152
+    },
+    {
+      "epoch": 0.6469344608879493,
+      "grad_norm": 3.719517707824707,
+      "learning_rate": 2.4811052631578945e-05,
+      "loss": 1.5066,
+      "step": 153
+    },
+    {
+      "epoch": 0.6511627906976745,
+      "grad_norm": 3.921480417251587,
+      "learning_rate": 2.428315789473684e-05,
+      "loss": 1.3122,
+      "step": 154
+    },
+    {
+      "epoch": 0.6553911205073996,
+      "grad_norm": 4.006931304931641,
+      "learning_rate": 2.3755263157894736e-05,
+      "loss": 1.5879,
+      "step": 155
+    },
+    {
+      "epoch": 0.6596194503171248,
+      "grad_norm": 3.2869625091552734,
+      "learning_rate": 2.322736842105263e-05,
+      "loss": 1.3118,
+      "step": 156
+    },
+    {
+      "epoch": 0.6638477801268499,
+      "grad_norm": 4.165566921234131,
+      "learning_rate": 2.2699473684210526e-05,
+      "loss": 1.816,
+      "step": 157
+    },
+    {
+      "epoch": 0.6680761099365751,
+      "grad_norm": 3.342907667160034,
+      "learning_rate": 2.217157894736842e-05,
+      "loss": 1.491,
+      "step": 158
+    },
+    {
+      "epoch": 0.6723044397463002,
+      "grad_norm": 3.698087453842163,
+      "learning_rate": 2.1643684210526316e-05,
+      "loss": 1.3681,
+      "step": 159
+    },
+    {
+      "epoch": 0.6765327695560254,
+      "grad_norm": 3.9737207889556885,
+      "learning_rate": 2.1115789473684208e-05,
+      "loss": 1.4224,
+      "step": 160
+    },
+    {
+      "epoch": 0.6807610993657506,
+      "grad_norm": 4.067697048187256,
+      "learning_rate": 2.0587894736842106e-05,
+      "loss": 1.4399,
+      "step": 161
+    },
+    {
+      "epoch": 0.6849894291754757,
+      "grad_norm": 4.039961338043213,
+      "learning_rate": 2.006e-05,
+      "loss": 1.4007,
+      "step": 162
+    },
+    {
+      "epoch": 0.6892177589852009,
+      "grad_norm": 3.65069842338562,
+      "learning_rate": 1.9532105263157893e-05,
+      "loss": 1.2616,
+      "step": 163
+    },
+    {
+      "epoch": 0.693446088794926,
+      "grad_norm": 3.4885332584381104,
+      "learning_rate": 1.900421052631579e-05,
+      "loss": 1.325,
+      "step": 164
+    },
+    {
+      "epoch": 0.6976744186046512,
+      "grad_norm": 3.8202478885650635,
+      "learning_rate": 1.8476315789473683e-05,
+      "loss": 1.4452,
+      "step": 165
+    },
+    {
+      "epoch": 0.7019027484143763,
+      "grad_norm": 3.3636627197265625,
+      "learning_rate": 1.7948421052631578e-05,
+      "loss": 1.4768,
+      "step": 166
+    },
+    {
+      "epoch": 0.7061310782241015,
+      "grad_norm": 3.3209993839263916,
+      "learning_rate": 1.7420526315789473e-05,
+      "loss": 1.5622,
+      "step": 167
+    },
+    {
+      "epoch": 0.7103594080338267,
+      "grad_norm": 3.561314105987549,
+      "learning_rate": 1.6892631578947368e-05,
+      "loss": 1.3776,
+      "step": 168
+    },
+    {
+      "epoch": 0.7145877378435518,
+      "grad_norm": 3.6763312816619873,
+      "learning_rate": 1.6364736842105263e-05,
+      "loss": 1.2834,
+      "step": 169
+    },
+    {
+      "epoch": 0.718816067653277,
+      "grad_norm": 3.0946156978607178,
+      "learning_rate": 1.5836842105263158e-05,
+      "loss": 1.1698,
+      "step": 170
+    },
+    {
+      "epoch": 0.7230443974630021,
+      "grad_norm": 3.4231629371643066,
+      "learning_rate": 1.5308947368421053e-05,
+      "loss": 1.5614,
+      "step": 171
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 3.3777573108673096,
+      "learning_rate": 1.4781052631578945e-05,
+      "loss": 1.3396,
+      "step": 172
+    },
+    {
+      "epoch": 0.7315010570824524,
+      "grad_norm": 3.4750330448150635,
+      "learning_rate": 1.4253157894736842e-05,
+      "loss": 1.5131,
+      "step": 173
+    },
+    {
+      "epoch": 0.7357293868921776,
+      "grad_norm": 3.9094483852386475,
+      "learning_rate": 1.3725263157894737e-05,
+      "loss": 1.0579,
+      "step": 174
+    },
+    {
+      "epoch": 0.7399577167019028,
+      "grad_norm": 5.054296016693115,
+      "learning_rate": 1.319736842105263e-05,
+      "loss": 1.5261,
+      "step": 175
+    },
+    {
+      "epoch": 0.7441860465116279,
+      "grad_norm": 3.3590009212493896,
+      "learning_rate": 1.2669473684210527e-05,
+      "loss": 0.9721,
+      "step": 176
+    },
+    {
+      "epoch": 0.7484143763213531,
+      "grad_norm": 3.868457078933716,
+      "learning_rate": 1.214157894736842e-05,
+      "loss": 1.3163,
+      "step": 177
+    },
+    {
+      "epoch": 0.7526427061310782,
+      "grad_norm": 3.910431146621704,
+      "learning_rate": 1.1613684210526315e-05,
+      "loss": 1.5807,
+      "step": 178
+    },
+    {
+      "epoch": 0.7568710359408034,
+      "grad_norm": 4.657809734344482,
+      "learning_rate": 1.108578947368421e-05,
+      "loss": 1.8147,
+      "step": 179
+    },
+    {
+      "epoch": 0.7610993657505285,
+      "grad_norm": 2.995884656906128,
+      "learning_rate": 1.0557894736842104e-05,
+      "loss": 1.1418,
+      "step": 180
+    },
+    {
+      "epoch": 0.7653276955602537,
+      "grad_norm": 3.5650057792663574,
+      "learning_rate": 1.003e-05,
+      "loss": 1.2171,
+      "step": 181
+    },
+    {
+      "epoch": 0.7695560253699789,
+      "grad_norm": 2.947834014892578,
+      "learning_rate": 9.502105263157896e-06,
+      "loss": 0.9077,
+      "step": 182
+    },
+    {
+      "epoch": 0.773784355179704,
+      "grad_norm": 4.190479755401611,
+      "learning_rate": 8.974210526315789e-06,
+      "loss": 1.3593,
+      "step": 183
+    },
+    {
+      "epoch": 0.7780126849894292,
+      "grad_norm": 4.313446998596191,
+      "learning_rate": 8.446315789473684e-06,
+      "loss": 1.4314,
+      "step": 184
+    },
+    {
+      "epoch": 0.7822410147991543,
+      "grad_norm": 3.5509700775146484,
+      "learning_rate": 7.918421052631579e-06,
+      "loss": 0.8924,
+      "step": 185
+    },
+    {
+      "epoch": 0.7864693446088795,
+      "grad_norm": 3.5839693546295166,
+      "learning_rate": 7.3905263157894725e-06,
+      "loss": 1.2589,
+      "step": 186
+    },
+    {
+      "epoch": 0.7906976744186046,
+      "grad_norm": 3.49641489982605,
+      "learning_rate": 6.862631578947368e-06,
+      "loss": 1.2195,
+      "step": 187
+    },
+    {
+      "epoch": 0.7949260042283298,
+      "grad_norm": 4.494103908538818,
+      "learning_rate": 6.3347368421052634e-06,
+      "loss": 0.9546,
+      "step": 188
+    },
+    {
+      "epoch": 0.7991543340380549,
+      "grad_norm": 4.106745719909668,
+      "learning_rate": 5.806842105263158e-06,
+      "loss": 1.3837,
+      "step": 189
+    },
+    {
+      "epoch": 0.8033826638477801,
+      "grad_norm": 3.3557074069976807,
+      "learning_rate": 5.278947368421052e-06,
+      "loss": 1.0517,
+      "step": 190
+    },
+    {
+      "epoch": 0.8076109936575053,
+      "grad_norm": 3.182860851287842,
+      "learning_rate": 4.751052631578948e-06,
+      "loss": 0.9986,
+      "step": 191
+    },
+    {
+      "epoch": 0.8118393234672304,
+      "grad_norm": 3.671976327896118,
+      "learning_rate": 4.223157894736842e-06,
+      "loss": 1.0945,
+      "step": 192
+    },
+    {
+      "epoch": 0.8160676532769556,
+      "grad_norm": 3.382333517074585,
+      "learning_rate": 3.6952631578947362e-06,
+      "loss": 1.0867,
+      "step": 193
+    },
+    {
+      "epoch": 0.8202959830866807,
+      "grad_norm": 3.876882314682007,
+      "learning_rate": 3.1673684210526317e-06,
+      "loss": 1.5588,
+      "step": 194
+    },
+    {
+      "epoch": 0.8245243128964059,
+      "grad_norm": 4.147052764892578,
+      "learning_rate": 2.639473684210526e-06,
+      "loss": 1.0961,
+      "step": 195
+    },
+    {
+      "epoch": 0.828752642706131,
+      "grad_norm": 4.046473979949951,
+      "learning_rate": 2.111578947368421e-06,
+      "loss": 1.5097,
+      "step": 196
+    },
+    {
+      "epoch": 0.8329809725158562,
+      "grad_norm": 3.203843116760254,
+      "learning_rate": 1.5836842105263159e-06,
+      "loss": 0.9054,
+      "step": 197
+    },
+    {
+      "epoch": 0.8372093023255814,
+      "grad_norm": 3.514685869216919,
+      "learning_rate": 1.0557894736842105e-06,
+      "loss": 1.2774,
+      "step": 198
+    },
+    {
+      "epoch": 0.8414376321353065,
+      "grad_norm": 4.663123607635498,
+      "learning_rate": 5.278947368421053e-07,
+      "loss": 1.5852,
+      "step": 199
+    },
+    {
+      "epoch": 0.8456659619450317,
+      "grad_norm": 3.930481433868408,
+      "learning_rate": 0.0,
+      "loss": 1.3282,
+      "step": 200
+    },
+    {
+      "epoch": 0.8456659619450317,
+      "eval_loss": 0.6885719895362854,
+      "eval_runtime": 5.7331,
+      "eval_samples_per_second": 17.443,
+      "eval_steps_per_second": 4.361,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.23758274674688e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null