Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11e86620b98709d68cae6c127b8dcec064837aa2fb99347232d2ae21a0e7ef66
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:d78e07d9c69adb062f66f5a4fba927b6283df9e9e36960b9a515041c95fda040
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9089564b00e6064e79ae2e6b59fa5f71f0800d4b98eb30129c8ea4e4bbd0dc1
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cb3a657cda1bc5514c158983e809ff1e791727a253dce3c649756bde55eafd2
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a5ac7630893d9d42beb386c14aa06bc80423ddcdecefee9c473592247cac89b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc6887cf3eaa89559f853dec6e1255ee6c99a1487e3b8efcf5f0b6db778c6d2e
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:074c2b1087565bc1760e8e7e3230fae5d9964fcf585c035e148bbc0462420c24
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:127f78b23b90a55817d57cdfea7be1339295ae6ad1e28b296ac138e7c7416928
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ddb0e188cf1867b47b115cd250cd141961e477ddf3459ed239df0aec2175b8c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3f06f4c1b9fd152e9c91824606052b273341bedb130acb6aa7153e523ec3bc0
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32f0de849b47ea195299c9636ee57ebffa2fce56047411225e87ceb7116e6f4e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c69ee4e96f21a5111a8ce7e3dcdc580843529dedbb3db9774e1a908bbb9c5837
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.0591540336608887,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.0061703637429426465,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 14.298,
       "eval_steps_per_second": 3.717,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.9859068337822106e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9804583191871643,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.012340727485885293,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.298,
       "eval_steps_per_second": 3.717,
       "step": 25
+    },
+    {
+      "epoch": 0.0064171782926603525,
+      "grad_norm": 0.3678169846534729,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 1.4322,
+      "step": 26
+    },
+    {
+      "epoch": 0.0066639928423780585,
+      "grad_norm": 0.41855236887931824,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 1.3597,
+      "step": 27
+    },
+    {
+      "epoch": 0.006910807392095764,
+      "grad_norm": 0.4745751619338989,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 1.3224,
+      "step": 28
+    },
+    {
+      "epoch": 0.0071576219418134695,
+      "grad_norm": 0.559587836265564,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 1.3037,
+      "step": 29
+    },
+    {
+      "epoch": 0.0074044364915311755,
+      "grad_norm": 0.5168178081512451,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 1.3006,
+      "step": 30
+    },
+    {
+      "epoch": 0.0076512510412488815,
+      "grad_norm": 0.6175752282142639,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 1.2705,
+      "step": 31
+    },
+    {
+      "epoch": 0.007898065590966587,
+      "grad_norm": 0.6352323293685913,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 1.1419,
+      "step": 32
+    },
+    {
+      "epoch": 0.008144880140684293,
+      "grad_norm": 0.7056966423988342,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 1.1645,
+      "step": 33
+    },
+    {
+      "epoch": 0.008391694690402,
+      "grad_norm": 0.6783286929130554,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 1.104,
+      "step": 34
+    },
+    {
+      "epoch": 0.008638509240119704,
+      "grad_norm": 0.7926774024963379,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 0.9757,
+      "step": 35
+    },
+    {
+      "epoch": 0.008885323789837411,
+      "grad_norm": 1.0232789516448975,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 1.0068,
+      "step": 36
+    },
+    {
+      "epoch": 0.009132138339555116,
+      "grad_norm": 1.2049612998962402,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 1.0109,
+      "step": 37
+    },
+    {
+      "epoch": 0.009378952889272823,
+      "grad_norm": 0.2999906539916992,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 1.337,
+      "step": 38
+    },
+    {
+      "epoch": 0.009625767438990528,
+      "grad_norm": 0.2730841338634491,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 1.3385,
+      "step": 39
+    },
+    {
+      "epoch": 0.009872581988708235,
+      "grad_norm": 0.33873268961906433,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 1.3549,
+      "step": 40
+    },
+    {
+      "epoch": 0.01011939653842594,
+      "grad_norm": 0.3427824079990387,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 1.3463,
+      "step": 41
+    },
+    {
+      "epoch": 0.010366211088143645,
+      "grad_norm": 0.38400933146476746,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 1.2599,
+      "step": 42
+    },
+    {
+      "epoch": 0.010613025637861352,
+      "grad_norm": 0.4481218755245209,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 1.2069,
+      "step": 43
+    },
+    {
+      "epoch": 0.010859840187579057,
+      "grad_norm": 0.5700804591178894,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 1.2132,
+      "step": 44
+    },
+    {
+      "epoch": 0.011106654737296764,
+      "grad_norm": 0.5816468000411987,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 1.1683,
+      "step": 45
+    },
+    {
+      "epoch": 0.01135346928701447,
+      "grad_norm": 0.6591303944587708,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 1.142,
+      "step": 46
+    },
+    {
+      "epoch": 0.011600283836732176,
+      "grad_norm": 0.7625118494033813,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 0.9836,
+      "step": 47
+    },
+    {
+      "epoch": 0.011847098386449881,
+      "grad_norm": 0.9183720350265503,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 0.9474,
+      "step": 48
+    },
+    {
+      "epoch": 0.012093912936167586,
+      "grad_norm": 0.9961158037185669,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 0.9135,
+      "step": 49
+    },
+    {
+      "epoch": 0.012340727485885293,
+      "grad_norm": 1.5027074813842773,
+      "learning_rate": 1e-05,
+      "loss": 1.0364,
+      "step": 50
+    },
+    {
+      "epoch": 0.012340727485885293,
+      "eval_loss": 0.9804583191871643,
+      "eval_runtime": 3.5086,
+      "eval_samples_per_second": 14.251,
+      "eval_steps_per_second": 3.705,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.969948642440643e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null