Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:683756f504cb1fa56e928da67b61b8a067287de838c0e5294aa2b3d09c226f9f
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc8860ca7c21e90ae177e2c5652884046622522dba2350aa1a1c9006566d081d
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1361b8fb02e76795dc7c2852e80fd81a9313bc37d9c0bcaaf6193cc858ea6330
 size 147359892

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a212689fca77e7ad854dae71d631e5bc5e9a23c224832c34e944cdbd0f25a7b
 size 147359892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d03322aaa92c9c0fda603f7c49b1553d5d556d38758107e73bb1f644e67b85d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:75045824998ab58dfab9bb37ab127ea3bc28b76d0f02840d315b3a65651822f9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c16a9a2b8856e8f64eb7194578c6fcbc8625033d1caa318cc7b80ad824088880
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ba2dfce0937a08ee177ecd4ee2c0d73bc25d6c21d394bb71764724c742c652e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4480029344558716,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.11127596439169139,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 23.058,
       "eval_steps_per_second": 5.764,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.24089003933696e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4315483570098877,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14836795252225518,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.058,
       "eval_steps_per_second": 5.764,
       "step": 150
+    },
+    {
+      "epoch": 0.11201780415430267,
+      "grad_norm": 0.4206100106239319,
+      "learning_rate": 2.5833315789473687e-05,
+      "loss": 1.1387,
+      "step": 151
+    },
+    {
+      "epoch": 0.11275964391691394,
+      "grad_norm": 0.4989394247531891,
+      "learning_rate": 2.5306105263157895e-05,
+      "loss": 1.4917,
+      "step": 152
+    },
+    {
+      "epoch": 0.11350148367952523,
+      "grad_norm": 0.5074396729469299,
+      "learning_rate": 2.4778894736842104e-05,
+      "loss": 1.686,
+      "step": 153
+    },
+    {
+      "epoch": 0.1142433234421365,
+      "grad_norm": 0.5123466849327087,
+      "learning_rate": 2.4251684210526315e-05,
+      "loss": 1.3703,
+      "step": 154
+    },
+    {
+      "epoch": 0.11498516320474778,
+      "grad_norm": 0.5191718935966492,
+      "learning_rate": 2.3724473684210524e-05,
+      "loss": 1.4312,
+      "step": 155
+    },
+    {
+      "epoch": 0.11572700296735905,
+      "grad_norm": 0.47905129194259644,
+      "learning_rate": 2.3197263157894735e-05,
+      "loss": 1.4236,
+      "step": 156
+    },
+    {
+      "epoch": 0.11646884272997032,
+      "grad_norm": 0.4941026270389557,
+      "learning_rate": 2.2670052631578947e-05,
+      "loss": 1.2631,
+      "step": 157
+    },
+    {
+      "epoch": 0.1172106824925816,
+      "grad_norm": 0.5065926909446716,
+      "learning_rate": 2.214284210526316e-05,
+      "loss": 1.4134,
+      "step": 158
+    },
+    {
+      "epoch": 0.11795252225519288,
+      "grad_norm": 0.5101008415222168,
+      "learning_rate": 2.161563157894737e-05,
+      "loss": 1.3647,
+      "step": 159
+    },
+    {
+      "epoch": 0.11869436201780416,
+      "grad_norm": 0.5099448561668396,
+      "learning_rate": 2.108842105263158e-05,
+      "loss": 1.3093,
+      "step": 160
+    },
+    {
+      "epoch": 0.11943620178041543,
+      "grad_norm": 0.5270506143569946,
+      "learning_rate": 2.056121052631579e-05,
+      "loss": 1.4776,
+      "step": 161
+    },
+    {
+      "epoch": 0.1201780415430267,
+      "grad_norm": 0.519027829170227,
+      "learning_rate": 2.0034e-05,
+      "loss": 1.4011,
+      "step": 162
+    },
+    {
+      "epoch": 0.12091988130563798,
+      "grad_norm": 0.520959734916687,
+      "learning_rate": 1.950678947368421e-05,
+      "loss": 1.3618,
+      "step": 163
+    },
+    {
+      "epoch": 0.12166172106824925,
+      "grad_norm": 0.5643500685691833,
+      "learning_rate": 1.897957894736842e-05,
+      "loss": 1.5463,
+      "step": 164
+    },
+    {
+      "epoch": 0.12240356083086053,
+      "grad_norm": 0.5543540716171265,
+      "learning_rate": 1.845236842105263e-05,
+      "loss": 1.3988,
+      "step": 165
+    },
+    {
+      "epoch": 0.12314540059347182,
+      "grad_norm": 0.5814954042434692,
+      "learning_rate": 1.792515789473684e-05,
+      "loss": 1.5149,
+      "step": 166
+    },
+    {
+      "epoch": 0.12388724035608309,
+      "grad_norm": 0.5395814180374146,
+      "learning_rate": 1.7397947368421053e-05,
+      "loss": 1.4446,
+      "step": 167
+    },
+    {
+      "epoch": 0.12462908011869436,
+      "grad_norm": 0.5087323784828186,
+      "learning_rate": 1.687073684210526e-05,
+      "loss": 1.3358,
+      "step": 168
+    },
+    {
+      "epoch": 0.12537091988130564,
+      "grad_norm": 0.5564747452735901,
+      "learning_rate": 1.6343526315789473e-05,
+      "loss": 1.416,
+      "step": 169
+    },
+    {
+      "epoch": 0.1261127596439169,
+      "grad_norm": 0.5298991799354553,
+      "learning_rate": 1.5816315789473685e-05,
+      "loss": 1.3435,
+      "step": 170
+    },
+    {
+      "epoch": 0.12685459940652818,
+      "grad_norm": 0.554205060005188,
+      "learning_rate": 1.5289105263157896e-05,
+      "loss": 1.4239,
+      "step": 171
+    },
+    {
+      "epoch": 0.12759643916913946,
+      "grad_norm": 0.5437472462654114,
+      "learning_rate": 1.4761894736842103e-05,
+      "loss": 1.5082,
+      "step": 172
+    },
+    {
+      "epoch": 0.12833827893175073,
+      "grad_norm": 0.5654957294464111,
+      "learning_rate": 1.4234684210526314e-05,
+      "loss": 1.4074,
+      "step": 173
+    },
+    {
+      "epoch": 0.129080118694362,
+      "grad_norm": 0.5292185544967651,
+      "learning_rate": 1.3707473684210528e-05,
+      "loss": 1.4173,
+      "step": 174
+    },
+    {
+      "epoch": 0.1298219584569733,
+      "grad_norm": 0.5802279114723206,
+      "learning_rate": 1.3180263157894736e-05,
+      "loss": 1.4608,
+      "step": 175
+    },
+    {
+      "epoch": 0.13056379821958458,
+      "grad_norm": 0.5980931520462036,
+      "learning_rate": 1.2653052631578948e-05,
+      "loss": 1.6759,
+      "step": 176
+    },
+    {
+      "epoch": 0.13130563798219586,
+      "grad_norm": 0.5813962817192078,
+      "learning_rate": 1.2125842105263158e-05,
+      "loss": 1.4882,
+      "step": 177
+    },
+    {
+      "epoch": 0.13204747774480713,
+      "grad_norm": 0.6063507199287415,
+      "learning_rate": 1.1598631578947368e-05,
+      "loss": 1.518,
+      "step": 178
+    },
+    {
+      "epoch": 0.1327893175074184,
+      "grad_norm": 0.5826941132545471,
+      "learning_rate": 1.107142105263158e-05,
+      "loss": 1.4704,
+      "step": 179
+    },
+    {
+      "epoch": 0.13353115727002968,
+      "grad_norm": 0.6097956895828247,
+      "learning_rate": 1.054421052631579e-05,
+      "loss": 1.4025,
+      "step": 180
+    },
+    {
+      "epoch": 0.13427299703264095,
+      "grad_norm": 0.6548146605491638,
+      "learning_rate": 1.0017e-05,
+      "loss": 1.548,
+      "step": 181
+    },
+    {
+      "epoch": 0.13501483679525222,
+      "grad_norm": 0.663008987903595,
+      "learning_rate": 9.48978947368421e-06,
+      "loss": 1.764,
+      "step": 182
+    },
+    {
+      "epoch": 0.1357566765578635,
+      "grad_norm": 0.5668148994445801,
+      "learning_rate": 8.96257894736842e-06,
+      "loss": 1.351,
+      "step": 183
+    },
+    {
+      "epoch": 0.13649851632047477,
+      "grad_norm": 0.6421298384666443,
+      "learning_rate": 8.43536842105263e-06,
+      "loss": 1.4227,
+      "step": 184
+    },
+    {
+      "epoch": 0.13724035608308605,
+      "grad_norm": 0.6036615967750549,
+      "learning_rate": 7.908157894736842e-06,
+      "loss": 1.3897,
+      "step": 185
+    },
+    {
+      "epoch": 0.13798219584569732,
+      "grad_norm": 0.6182149052619934,
+      "learning_rate": 7.380947368421051e-06,
+      "loss": 1.7172,
+      "step": 186
+    },
+    {
+      "epoch": 0.1387240356083086,
+      "grad_norm": 0.6109817624092102,
+      "learning_rate": 6.853736842105264e-06,
+      "loss": 1.4867,
+      "step": 187
+    },
+    {
+      "epoch": 0.1394658753709199,
+      "grad_norm": 0.696526288986206,
+      "learning_rate": 6.326526315789474e-06,
+      "loss": 1.5992,
+      "step": 188
+    },
+    {
+      "epoch": 0.14020771513353117,
+      "grad_norm": 0.6494670510292053,
+      "learning_rate": 5.799315789473684e-06,
+      "loss": 1.3784,
+      "step": 189
+    },
+    {
+      "epoch": 0.14094955489614244,
+      "grad_norm": 0.6380143761634827,
+      "learning_rate": 5.272105263157895e-06,
+      "loss": 1.4366,
+      "step": 190
+    },
+    {
+      "epoch": 0.14169139465875372,
+      "grad_norm": 0.6222284436225891,
+      "learning_rate": 4.744894736842105e-06,
+      "loss": 1.379,
+      "step": 191
+    },
+    {
+      "epoch": 0.142433234421365,
+      "grad_norm": 0.6166477203369141,
+      "learning_rate": 4.217684210526315e-06,
+      "loss": 1.3242,
+      "step": 192
+    },
+    {
+      "epoch": 0.14317507418397626,
+      "grad_norm": 0.7048677206039429,
+      "learning_rate": 3.6904736842105257e-06,
+      "loss": 1.5888,
+      "step": 193
+    },
+    {
+      "epoch": 0.14391691394658754,
+      "grad_norm": 0.708202600479126,
+      "learning_rate": 3.163263157894737e-06,
+      "loss": 1.5138,
+      "step": 194
+    },
+    {
+      "epoch": 0.1446587537091988,
+      "grad_norm": 0.6921345591545105,
+      "learning_rate": 2.6360526315789473e-06,
+      "loss": 1.2819,
+      "step": 195
+    },
+    {
+      "epoch": 0.14540059347181009,
+      "grad_norm": 0.7002333998680115,
+      "learning_rate": 2.1088421052631577e-06,
+      "loss": 1.3984,
+      "step": 196
+    },
+    {
+      "epoch": 0.14614243323442136,
+      "grad_norm": 0.7143130898475647,
+      "learning_rate": 1.5816315789473685e-06,
+      "loss": 1.5338,
+      "step": 197
+    },
+    {
+      "epoch": 0.14688427299703263,
+      "grad_norm": 0.7665969133377075,
+      "learning_rate": 1.0544210526315788e-06,
+      "loss": 1.5382,
+      "step": 198
+    },
+    {
+      "epoch": 0.1476261127596439,
+      "grad_norm": 0.8401339650154114,
+      "learning_rate": 5.272105263157894e-07,
+      "loss": 1.8398,
+      "step": 199
+    },
+    {
+      "epoch": 0.14836795252225518,
+      "grad_norm": 0.8993075489997864,
+      "learning_rate": 0.0,
+      "loss": 1.6225,
+      "step": 200
+    },
+    {
+      "epoch": 0.14836795252225518,
+      "eval_loss": 1.4315483570098877,
+      "eval_runtime": 25.1319,
+      "eval_samples_per_second": 22.601,
+      "eval_steps_per_second": 5.65,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.65452005244928e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null