Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a18a1b14748d8b4db7db74a142e1902d53a3e2bd03c119f29ddb6f3abbd02604
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f64519c99b2c312cb1f57d9107d425128afc361f0f2925f0060b09b555564fa
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2aadffe177a513af10bd3ba9bc1059364dd7ec579f109342e9b4b2ec194fb520
 size 320194002

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd8a1b598f31c8449eb58f8d6bc777721909d28519352e3e36317113d7a3d945
 size 320194002

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1a12c05fdd0239884b6dbc91fa003ab5c9eb48317a39524098ff9406f6c0ee2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d13f1cb76d2fd1cb8c08c6c4fdc62917f4648b9e462f5576ed11de46a9b2ac3c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9701613783836365,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08042221663734607,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 7.141,
       "eval_steps_per_second": 7.141,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0881258502632243e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9611303806304932,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.10722962218312809,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.141,
       "eval_steps_per_second": 7.141,
       "step": 150
+    },
+    {
+      "epoch": 0.08095836474826171,
+      "grad_norm": 5.198462009429932,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 24.5072,
+      "step": 151
+    },
+    {
+      "epoch": 0.08149451285917735,
+      "grad_norm": 4.549412250518799,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 26.2581,
+      "step": 152
+    },
+    {
+      "epoch": 0.08203066097009298,
+      "grad_norm": 4.362401485443115,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 27.6788,
+      "step": 153
+    },
+    {
+      "epoch": 0.08256680908100864,
+      "grad_norm": 5.346713066101074,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 30.5177,
+      "step": 154
+    },
+    {
+      "epoch": 0.08310295719192427,
+      "grad_norm": 4.8502702713012695,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 27.5227,
+      "step": 155
+    },
+    {
+      "epoch": 0.08363910530283991,
+      "grad_norm": 4.808586120605469,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 30.8657,
+      "step": 156
+    },
+    {
+      "epoch": 0.08417525341375555,
+      "grad_norm": 4.663094997406006,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 29.7888,
+      "step": 157
+    },
+    {
+      "epoch": 0.08471140152467119,
+      "grad_norm": 5.056007385253906,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 29.6446,
+      "step": 158
+    },
+    {
+      "epoch": 0.08524754963558683,
+      "grad_norm": 4.635434150695801,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 29.1428,
+      "step": 159
+    },
+    {
+      "epoch": 0.08578369774650248,
+      "grad_norm": 4.840639591217041,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 29.1692,
+      "step": 160
+    },
+    {
+      "epoch": 0.08631984585741811,
+      "grad_norm": 4.706408977508545,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 30.8735,
+      "step": 161
+    },
+    {
+      "epoch": 0.08685599396833375,
+      "grad_norm": 4.438312530517578,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 29.7526,
+      "step": 162
+    },
+    {
+      "epoch": 0.08739214207924939,
+      "grad_norm": 4.957245349884033,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 31.258,
+      "step": 163
+    },
+    {
+      "epoch": 0.08792829019016503,
+      "grad_norm": 4.573652267456055,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 30.8789,
+      "step": 164
+    },
+    {
+      "epoch": 0.08846443830108067,
+      "grad_norm": 4.697335243225098,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 30.2755,
+      "step": 165
+    },
+    {
+      "epoch": 0.08900058641199632,
+      "grad_norm": 4.690494060516357,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 29.9391,
+      "step": 166
+    },
+    {
+      "epoch": 0.08953673452291196,
+      "grad_norm": 4.9841694831848145,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 27.8766,
+      "step": 167
+    },
+    {
+      "epoch": 0.0900728826338276,
+      "grad_norm": 5.224708557128906,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 29.0932,
+      "step": 168
+    },
+    {
+      "epoch": 0.09060903074474323,
+      "grad_norm": 4.802592754364014,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 30.957,
+      "step": 169
+    },
+    {
+      "epoch": 0.09114517885565887,
+      "grad_norm": 4.843587398529053,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 30.3239,
+      "step": 170
+    },
+    {
+      "epoch": 0.09168132696657452,
+      "grad_norm": 4.917239665985107,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 30.4517,
+      "step": 171
+    },
+    {
+      "epoch": 0.09221747507749016,
+      "grad_norm": 4.622511863708496,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 29.1561,
+      "step": 172
+    },
+    {
+      "epoch": 0.0927536231884058,
+      "grad_norm": 5.174027919769287,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 28.5216,
+      "step": 173
+    },
+    {
+      "epoch": 0.09328977129932144,
+      "grad_norm": 5.131156921386719,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 30.5295,
+      "step": 174
+    },
+    {
+      "epoch": 0.09382591941023707,
+      "grad_norm": 5.090980052947998,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 30.1167,
+      "step": 175
+    },
+    {
+      "epoch": 0.09382591941023707,
+      "eval_loss": 0.961780846118927,
+      "eval_runtime": 7.0068,
+      "eval_samples_per_second": 7.136,
+      "eval_steps_per_second": 7.136,
+      "step": 175
+    },
+    {
+      "epoch": 0.09436206752115271,
+      "grad_norm": 5.299232006072998,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 29.9242,
+      "step": 176
+    },
+    {
+      "epoch": 0.09489821563206836,
+      "grad_norm": 5.412871837615967,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 31.6097,
+      "step": 177
+    },
+    {
+      "epoch": 0.095434363742984,
+      "grad_norm": 5.443716526031494,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 32.2806,
+      "step": 178
+    },
+    {
+      "epoch": 0.09597051185389964,
+      "grad_norm": 5.2226080894470215,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 32.0395,
+      "step": 179
+    },
+    {
+      "epoch": 0.09650665996481528,
+      "grad_norm": 5.585422039031982,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 32.5046,
+      "step": 180
+    },
+    {
+      "epoch": 0.09704280807573092,
+      "grad_norm": 5.256342887878418,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 29.7976,
+      "step": 181
+    },
+    {
+      "epoch": 0.09757895618664657,
+      "grad_norm": 5.678742408752441,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 31.166,
+      "step": 182
+    },
+    {
+      "epoch": 0.0981151042975622,
+      "grad_norm": 5.608844757080078,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 31.462,
+      "step": 183
+    },
+    {
+      "epoch": 0.09865125240847784,
+      "grad_norm": 5.735723495483398,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 32.3888,
+      "step": 184
+    },
+    {
+      "epoch": 0.09918740051939348,
+      "grad_norm": 5.686238765716553,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 32.2446,
+      "step": 185
+    },
+    {
+      "epoch": 0.09972354863030912,
+      "grad_norm": 6.456745147705078,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 31.8302,
+      "step": 186
+    },
+    {
+      "epoch": 0.10025969674122476,
+      "grad_norm": 6.2624192237854,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 32.1222,
+      "step": 187
+    },
+    {
+      "epoch": 0.10079584485214041,
+      "grad_norm": 6.031360149383545,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 30.3575,
+      "step": 188
+    },
+    {
+      "epoch": 0.10133199296305605,
+      "grad_norm": 6.787166595458984,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 33.5298,
+      "step": 189
+    },
+    {
+      "epoch": 0.10186814107397169,
+      "grad_norm": 7.786647796630859,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 33.5317,
+      "step": 190
+    },
+    {
+      "epoch": 0.10240428918488732,
+      "grad_norm": 7.0574631690979,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 30.7742,
+      "step": 191
+    },
+    {
+      "epoch": 0.10294043729580296,
+      "grad_norm": 7.725040912628174,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 31.2916,
+      "step": 192
+    },
+    {
+      "epoch": 0.1034765854067186,
+      "grad_norm": 8.722898483276367,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 33.509,
+      "step": 193
+    },
+    {
+      "epoch": 0.10401273351763425,
+      "grad_norm": 9.124086380004883,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 32.766,
+      "step": 194
+    },
+    {
+      "epoch": 0.10454888162854989,
+      "grad_norm": 12.139370918273926,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 35.1079,
+      "step": 195
+    },
+    {
+      "epoch": 0.10508502973946553,
+      "grad_norm": 13.486305236816406,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 33.708,
+      "step": 196
+    },
+    {
+      "epoch": 0.10562117785038116,
+      "grad_norm": 15.585397720336914,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 35.6728,
+      "step": 197
+    },
+    {
+      "epoch": 0.1061573259612968,
+      "grad_norm": 17.088029861450195,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 33.6193,
+      "step": 198
+    },
+    {
+      "epoch": 0.10669347407221245,
+      "grad_norm": 17.898561477661133,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 27.5111,
+      "step": 199
+    },
+    {
+      "epoch": 0.10722962218312809,
+      "grad_norm": 28.369626998901367,
+      "learning_rate": 0.0,
+      "loss": 21.8853,
+      "step": 200
+    },
+    {
+      "epoch": 0.10722962218312809,
+      "eval_loss": 0.9611303806304932,
+      "eval_runtime": 7.0035,
+      "eval_samples_per_second": 7.139,
+      "eval_steps_per_second": 7.139,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.784601741716357e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null