Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93ebd47fab448c3357c9add32ad243b54e46456ed5b431375cef09227d38d82b
 size 2269195160

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ec90b1bef3fae36ee2e6f3e35f4d0633e742835c484b7b53c25b2fb33e55b97
 size 2269195160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72f5d7ee65f2df0f29ed381bd138f9bc38f3d75496dacde2f48ef12403614b91
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:19b76775cb19aff3bbb8b74cab7e572f060c28279af09a0e2609759aba90d1e1
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a81e7941b209259fe8969856a6bf660f98b83b595e437ef84814654ec2459f12
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:391c32b5504b892c155845f383b09ba76c17c7429f8d75818faf9576ee7599e8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.303614854812622,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.13018008244738555,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 10.28,
       "eval_steps_per_second": 1.439,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.364766999805952e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.273140788078308,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.17357344326318072,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.28,
       "eval_steps_per_second": 1.439,
       "step": 150
+    },
+    {
+      "epoch": 0.13104794966370145,
+      "grad_norm": 1.435655117034912,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 1.3466,
+      "step": 151
+    },
+    {
+      "epoch": 0.13191581688001736,
+      "grad_norm": 1.3139824867248535,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 1.3012,
+      "step": 152
+    },
+    {
+      "epoch": 0.13278368409633326,
+      "grad_norm": 1.475575566291809,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 1.341,
+      "step": 153
+    },
+    {
+      "epoch": 0.13365155131264916,
+      "grad_norm": 1.266109824180603,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 1.2559,
+      "step": 154
+    },
+    {
+      "epoch": 0.13451941852896507,
+      "grad_norm": 1.6535367965698242,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 1.2211,
+      "step": 155
+    },
+    {
+      "epoch": 0.13538728574528097,
+      "grad_norm": 1.2901335954666138,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 1.265,
+      "step": 156
+    },
+    {
+      "epoch": 0.13625515296159688,
+      "grad_norm": 1.4438883066177368,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 1.4203,
+      "step": 157
+    },
+    {
+      "epoch": 0.13712302017791278,
+      "grad_norm": 1.3088321685791016,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 1.5061,
+      "step": 158
+    },
+    {
+      "epoch": 0.13799088739422868,
+      "grad_norm": 1.4172899723052979,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 1.3196,
+      "step": 159
+    },
+    {
+      "epoch": 0.1388587546105446,
+      "grad_norm": 1.0322235822677612,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 1.1525,
+      "step": 160
+    },
+    {
+      "epoch": 0.1397266218268605,
+      "grad_norm": 1.4172779321670532,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 1.1612,
+      "step": 161
+    },
+    {
+      "epoch": 0.1405944890431764,
+      "grad_norm": 1.2472504377365112,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 1.1889,
+      "step": 162
+    },
+    {
+      "epoch": 0.1414623562594923,
+      "grad_norm": 1.3722219467163086,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 1.3959,
+      "step": 163
+    },
+    {
+      "epoch": 0.1423302234758082,
+      "grad_norm": 1.0866336822509766,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 1.1537,
+      "step": 164
+    },
+    {
+      "epoch": 0.1431980906921241,
+      "grad_norm": 1.1457860469818115,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 1.1059,
+      "step": 165
+    },
+    {
+      "epoch": 0.14406595790844,
+      "grad_norm": 1.2376954555511475,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 1.2862,
+      "step": 166
+    },
+    {
+      "epoch": 0.14493382512475592,
+      "grad_norm": 1.3723816871643066,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 1.3396,
+      "step": 167
+    },
+    {
+      "epoch": 0.14580169234107182,
+      "grad_norm": 1.3971730470657349,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.991,
+      "step": 168
+    },
+    {
+      "epoch": 0.14666955955738772,
+      "grad_norm": 1.1824660301208496,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 1.2045,
+      "step": 169
+    },
+    {
+      "epoch": 0.14753742677370363,
+      "grad_norm": 1.5620667934417725,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 1.4492,
+      "step": 170
+    },
+    {
+      "epoch": 0.14840529399001953,
+      "grad_norm": 1.6076619625091553,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 1.1662,
+      "step": 171
+    },
+    {
+      "epoch": 0.14927316120633544,
+      "grad_norm": 1.2753527164459229,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 1.0975,
+      "step": 172
+    },
+    {
+      "epoch": 0.15014102842265134,
+      "grad_norm": 1.6548813581466675,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 1.3443,
+      "step": 173
+    },
+    {
+      "epoch": 0.15100889563896724,
+      "grad_norm": 1.3361274003982544,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 1.1309,
+      "step": 174
+    },
+    {
+      "epoch": 0.15187676285528315,
+      "grad_norm": 1.3236322402954102,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 1.3047,
+      "step": 175
+    },
+    {
+      "epoch": 0.15187676285528315,
+      "eval_loss": 1.2790803909301758,
+      "eval_runtime": 4.8651,
+      "eval_samples_per_second": 10.277,
+      "eval_steps_per_second": 1.439,
+      "step": 175
+    },
+    {
+      "epoch": 0.15274463007159905,
+      "grad_norm": 1.4842597246170044,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 1.0864,
+      "step": 176
+    },
+    {
+      "epoch": 0.15361249728791496,
+      "grad_norm": 1.5153896808624268,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 1.1642,
+      "step": 177
+    },
+    {
+      "epoch": 0.15448036450423086,
+      "grad_norm": 1.2488980293273926,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 1.323,
+      "step": 178
+    },
+    {
+      "epoch": 0.15534823172054676,
+      "grad_norm": 1.6930081844329834,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 1.2604,
+      "step": 179
+    },
+    {
+      "epoch": 0.15621609893686267,
+      "grad_norm": 1.3704711198806763,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 1.2578,
+      "step": 180
+    },
+    {
+      "epoch": 0.15708396615317857,
+      "grad_norm": 1.291325330734253,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 1.2179,
+      "step": 181
+    },
+    {
+      "epoch": 0.15795183336949448,
+      "grad_norm": 1.5434905290603638,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 1.305,
+      "step": 182
+    },
+    {
+      "epoch": 0.15881970058581038,
+      "grad_norm": 1.337917447090149,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 1.0775,
+      "step": 183
+    },
+    {
+      "epoch": 0.15968756780212628,
+      "grad_norm": 1.2450789213180542,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 1.2888,
+      "step": 184
+    },
+    {
+      "epoch": 0.1605554350184422,
+      "grad_norm": 1.8470358848571777,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 1.3356,
+      "step": 185
+    },
+    {
+      "epoch": 0.1614233022347581,
+      "grad_norm": 1.937638282775879,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 1.6163,
+      "step": 186
+    },
+    {
+      "epoch": 0.162291169451074,
+      "grad_norm": 1.4495834112167358,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 1.284,
+      "step": 187
+    },
+    {
+      "epoch": 0.1631590366673899,
+      "grad_norm": 1.6046760082244873,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 1.2811,
+      "step": 188
+    },
+    {
+      "epoch": 0.1640269038837058,
+      "grad_norm": 1.4449301958084106,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 1.3251,
+      "step": 189
+    },
+    {
+      "epoch": 0.1648947711000217,
+      "grad_norm": 1.6170849800109863,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 1.0835,
+      "step": 190
+    },
+    {
+      "epoch": 0.1657626383163376,
+      "grad_norm": 1.2608143091201782,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 1.1025,
+      "step": 191
+    },
+    {
+      "epoch": 0.16663050553265352,
+      "grad_norm": 1.7908244132995605,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 1.5649,
+      "step": 192
+    },
+    {
+      "epoch": 0.16749837274896942,
+      "grad_norm": 1.5216867923736572,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.9956,
+      "step": 193
+    },
+    {
+      "epoch": 0.16836623996528532,
+      "grad_norm": 1.6887457370758057,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 1.2089,
+      "step": 194
+    },
+    {
+      "epoch": 0.16923410718160123,
+      "grad_norm": 2.0695362091064453,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 1.495,
+      "step": 195
+    },
+    {
+      "epoch": 0.1701019743979171,
+      "grad_norm": 1.8778845071792603,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 1.4961,
+      "step": 196
+    },
+    {
+      "epoch": 0.170969841614233,
+      "grad_norm": 2.050307273864746,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 1.5306,
+      "step": 197
+    },
+    {
+      "epoch": 0.1718377088305489,
+      "grad_norm": 1.506821632385254,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 1.18,
+      "step": 198
+    },
+    {
+      "epoch": 0.17270557604686482,
+      "grad_norm": 1.6801998615264893,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 1.2565,
+      "step": 199
+    },
+    {
+      "epoch": 0.17357344326318072,
+      "grad_norm": 1.8449974060058594,
+      "learning_rate": 0.0,
+      "loss": 1.4853,
+      "step": 200
+    },
+    {
+      "epoch": 0.17357344326318072,
+      "eval_loss": 1.273140788078308,
+      "eval_runtime": 4.8567,
+      "eval_samples_per_second": 10.295,
+      "eval_steps_per_second": 1.441,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.153022666407936e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null