Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ccb8d71491a38ea0a5fe85f43ed37754b346b0412ddaefdc342328af620db55
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea17c10fc3e0d1fca2cbca97d6791fb21d0cc8b1d8b404b54a242a84018f1caf
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9aa3606bd7edc250a9043afe1a6e2ec1a1b3ac209d00eef7772f6893ec840687
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:05b91d4ce032b17c778522c034dedf6a27c0231c044f23840f025bb1ff67354c
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e56946399aabee448fd3c422a118df88cf2bc9a4a8936d73afb8a779bd8edd2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8ecd75598e972c810cf285aaa1e527eb3cd79801e76d98021a5e8cc302bbf63
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6293469667434692,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.10542962572482868,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 7.212,
       "eval_steps_per_second": 3.607,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.63693695041536e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6188913583755493,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14057283429977158,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.212,
       "eval_steps_per_second": 3.607,
       "step": 150
+    },
+    {
+      "epoch": 0.10613248989632754,
+      "grad_norm": 0.1557738333940506,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.2682,
+      "step": 151
+    },
+    {
+      "epoch": 0.10683535406782639,
+      "grad_norm": 0.16045674681663513,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.2497,
+      "step": 152
+    },
+    {
+      "epoch": 0.10753821823932525,
+      "grad_norm": 0.12793506681919098,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.2241,
+      "step": 153
+    },
+    {
+      "epoch": 0.10824108241082411,
+      "grad_norm": 0.14981037378311157,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.2546,
+      "step": 154
+    },
+    {
+      "epoch": 0.10894394658232297,
+      "grad_norm": 0.14852780103683472,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.2791,
+      "step": 155
+    },
+    {
+      "epoch": 0.10964681075382182,
+      "grad_norm": 0.19065548479557037,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.3471,
+      "step": 156
+    },
+    {
+      "epoch": 0.11034967492532068,
+      "grad_norm": 0.19516591727733612,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.3812,
+      "step": 157
+    },
+    {
+      "epoch": 0.11105253909681954,
+      "grad_norm": 0.19356763362884521,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.4681,
+      "step": 158
+    },
+    {
+      "epoch": 0.1117554032683184,
+      "grad_norm": 0.20971715450286865,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.5021,
+      "step": 159
+    },
+    {
+      "epoch": 0.11245826743981725,
+      "grad_norm": 0.18173177540302277,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.4129,
+      "step": 160
+    },
+    {
+      "epoch": 0.11316113161131611,
+      "grad_norm": 0.20953762531280518,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5283,
+      "step": 161
+    },
+    {
+      "epoch": 0.11386399578281498,
+      "grad_norm": 0.2320074439048767,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.5968,
+      "step": 162
+    },
+    {
+      "epoch": 0.11456685995431383,
+      "grad_norm": 0.231634259223938,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.6878,
+      "step": 163
+    },
+    {
+      "epoch": 0.11526972412581268,
+      "grad_norm": 0.24556051194667816,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.5471,
+      "step": 164
+    },
+    {
+      "epoch": 0.11597258829731154,
+      "grad_norm": 0.2406870275735855,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.64,
+      "step": 165
+    },
+    {
+      "epoch": 0.1166754524688104,
+      "grad_norm": 0.22864454984664917,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6471,
+      "step": 166
+    },
+    {
+      "epoch": 0.11737831664030926,
+      "grad_norm": 0.24874155223369598,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.639,
+      "step": 167
+    },
+    {
+      "epoch": 0.11808118081180811,
+      "grad_norm": 0.23163682222366333,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.5323,
+      "step": 168
+    },
+    {
+      "epoch": 0.11878404498330698,
+      "grad_norm": 0.2423689216375351,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.6043,
+      "step": 169
+    },
+    {
+      "epoch": 0.11948690915480584,
+      "grad_norm": 0.2547641396522522,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.6641,
+      "step": 170
+    },
+    {
+      "epoch": 0.12018977332630469,
+      "grad_norm": 0.23600344359874725,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.5934,
+      "step": 171
+    },
+    {
+      "epoch": 0.12089263749780355,
+      "grad_norm": 0.252996563911438,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.6689,
+      "step": 172
+    },
+    {
+      "epoch": 0.12159550166930241,
+      "grad_norm": 0.25687330961227417,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.6246,
+      "step": 173
+    },
+    {
+      "epoch": 0.12229836584080127,
+      "grad_norm": 0.2583775222301483,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.7533,
+      "step": 174
+    },
+    {
+      "epoch": 0.12300123001230012,
+      "grad_norm": 0.2819630801677704,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.7883,
+      "step": 175
+    },
+    {
+      "epoch": 0.12370409418379898,
+      "grad_norm": 0.28463077545166016,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.7116,
+      "step": 176
+    },
+    {
+      "epoch": 0.12440695835529784,
+      "grad_norm": 0.27820444107055664,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.8034,
+      "step": 177
+    },
+    {
+      "epoch": 0.1251098225267967,
+      "grad_norm": 0.27489644289016724,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.7264,
+      "step": 178
+    },
+    {
+      "epoch": 0.12581268669829557,
+      "grad_norm": 0.278911828994751,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.6959,
+      "step": 179
+    },
+    {
+      "epoch": 0.1265155508697944,
+      "grad_norm": 0.30769917368888855,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.7629,
+      "step": 180
+    },
+    {
+      "epoch": 0.12721841504129328,
+      "grad_norm": 0.34087008237838745,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.7145,
+      "step": 181
+    },
+    {
+      "epoch": 0.12792127921279212,
+      "grad_norm": 0.3040371239185333,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.6648,
+      "step": 182
+    },
+    {
+      "epoch": 0.12862414338429098,
+      "grad_norm": 0.38694098591804504,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.7641,
+      "step": 183
+    },
+    {
+      "epoch": 0.12932700755578985,
+      "grad_norm": 0.4117843210697174,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.63,
+      "step": 184
+    },
+    {
+      "epoch": 0.1300298717272887,
+      "grad_norm": 0.37666454911231995,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.6769,
+      "step": 185
+    },
+    {
+      "epoch": 0.13073273589878756,
+      "grad_norm": 0.43295755982398987,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.7073,
+      "step": 186
+    },
+    {
+      "epoch": 0.13143560007028643,
+      "grad_norm": 0.35277169942855835,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.8309,
+      "step": 187
+    },
+    {
+      "epoch": 0.13213846424178527,
+      "grad_norm": 0.33917170763015747,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.8627,
+      "step": 188
+    },
+    {
+      "epoch": 0.13284132841328414,
+      "grad_norm": 0.31541842222213745,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.7763,
+      "step": 189
+    },
+    {
+      "epoch": 0.13354419258478298,
+      "grad_norm": 0.2880040109157562,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.8386,
+      "step": 190
+    },
+    {
+      "epoch": 0.13424705675628185,
+      "grad_norm": 0.28794756531715393,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.813,
+      "step": 191
+    },
+    {
+      "epoch": 0.13494992092778071,
+      "grad_norm": 0.27827584743499756,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.7758,
+      "step": 192
+    },
+    {
+      "epoch": 0.13565278509927955,
+      "grad_norm": 0.30274316668510437,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.8663,
+      "step": 193
+    },
+    {
+      "epoch": 0.13635564927077842,
+      "grad_norm": 0.33732908964157104,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.8044,
+      "step": 194
+    },
+    {
+      "epoch": 0.1370585134422773,
+      "grad_norm": 0.34816497564315796,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.8055,
+      "step": 195
+    },
+    {
+      "epoch": 0.13776137761377613,
+      "grad_norm": 0.3468110263347626,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.8233,
+      "step": 196
+    },
+    {
+      "epoch": 0.138464241785275,
+      "grad_norm": 0.3623807430267334,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.8115,
+      "step": 197
+    },
+    {
+      "epoch": 0.13916710595677384,
+      "grad_norm": 0.3784177899360657,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.8123,
+      "step": 198
+    },
+    {
+      "epoch": 0.1398699701282727,
+      "grad_norm": 0.40762758255004883,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.8745,
+      "step": 199
+    },
+    {
+      "epoch": 0.14057283429977158,
+      "grad_norm": 0.48295310139656067,
+      "learning_rate": 0.0,
+      "loss": 0.8319,
+      "step": 200
+    },
+    {
+      "epoch": 0.14057283429977158,
+      "eval_loss": 0.6188913583755493,
+      "eval_runtime": 332.2497,
+      "eval_samples_per_second": 7.214,
+      "eval_steps_per_second": 3.609,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.83803453718528e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null