Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab2c2ac0dfafabad99afe24d8b132ce37c40de0da6270295d971491a7a48b032
 size 432223744

 version https://git-lfs.github.com/spec/v1
+oid sha256:c349140eb7163846e7381b18150e06529ecc31a1bb0b223ae0a45f968ee7e6dc
 size 432223744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c215c919b9807da562ad4b7dffd5e3f2a37b439f8184ca74e3bdd46a24842c77
 size 864785974

 version https://git-lfs.github.com/spec/v1
+oid sha256:64150c5fa5546c8f4b5ec3cca8ac24293a932c3b248b1664438a74d0b081b2bc
 size 864785974

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ca78bde36e8b0ccd30d979b4c0d6412479a7e9c6f02c29fb910cc0b8eb76752
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c29aabfcfceff34d87b8c71583c20a189dffc4470fb7dab4fcf45450b59c663
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e82787723b39764661e61046e03144992bd1311194a08d432a90f9e97db38f6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:027d4a04e5d5ed192ec73d697075c242c266c04cf23d89e3bac84c8dccb5d79f
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:663ad8992e1e5f12ba651c1248c2333c20177a17d4231633b687dcfc06a080f2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:330350c01b4d55bdcd04d3c6e099c3628b52547da1ddb826a312e9da9d101c30
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9a026c2a5c52cef3f96a0e145dbc36faf52c3cd294f64ca6599743d3fd9ed07
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dba49d8071d260d2d23cae05d1a2b1c56a66a388ff1935f6b29d2f34b8c964b
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.32214125990867615,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.034934497816593885,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 12.889,
       "eval_steps_per_second": 1.612,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.315690861756416e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3099195957183838,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.06986899563318777,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.889,
       "eval_steps_per_second": 1.612,
       "step": 25
+    },
+    {
+      "epoch": 0.03633187772925764,
+      "grad_norm": 0.9103572368621826,
+      "learning_rate": 5e-05,
+      "loss": 0.2798,
+      "step": 26
+    },
+    {
+      "epoch": 0.0377292576419214,
+      "grad_norm": 1.0263386964797974,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.361,
+      "step": 27
+    },
+    {
+      "epoch": 0.039126637554585154,
+      "grad_norm": 0.8997160792350769,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.3359,
+      "step": 28
+    },
+    {
+      "epoch": 0.04052401746724891,
+      "grad_norm": 0.7481948137283325,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.3187,
+      "step": 29
+    },
+    {
+      "epoch": 0.04192139737991266,
+      "grad_norm": 0.873487114906311,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.2788,
+      "step": 30
+    },
+    {
+      "epoch": 0.04331877729257642,
+      "grad_norm": 1.0412384271621704,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.3317,
+      "step": 31
+    },
+    {
+      "epoch": 0.04471615720524017,
+      "grad_norm": 0.9272012710571289,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.2985,
+      "step": 32
+    },
+    {
+      "epoch": 0.04611353711790393,
+      "grad_norm": 1.0352059602737427,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.3057,
+      "step": 33
+    },
+    {
+      "epoch": 0.04751091703056769,
+      "grad_norm": 1.1511657238006592,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.274,
+      "step": 34
+    },
+    {
+      "epoch": 0.04890829694323144,
+      "grad_norm": 0.9687528610229492,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.2793,
+      "step": 35
+    },
+    {
+      "epoch": 0.050305676855895196,
+      "grad_norm": 1.0364540815353394,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.3161,
+      "step": 36
+    },
+    {
+      "epoch": 0.05170305676855895,
+      "grad_norm": 1.1564514636993408,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.4427,
+      "step": 37
+    },
+    {
+      "epoch": 0.053100436681222704,
+      "grad_norm": 1.0043615102767944,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.4424,
+      "step": 38
+    },
+    {
+      "epoch": 0.054497816593886465,
+      "grad_norm": 0.9488585591316223,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.2808,
+      "step": 39
+    },
+    {
+      "epoch": 0.05589519650655022,
+      "grad_norm": 0.8899033069610596,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.2916,
+      "step": 40
+    },
+    {
+      "epoch": 0.057292576419213974,
+      "grad_norm": 1.040342926979065,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.3335,
+      "step": 41
+    },
+    {
+      "epoch": 0.05868995633187773,
+      "grad_norm": 1.0307811498641968,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.2772,
+      "step": 42
+    },
+    {
+      "epoch": 0.06008733624454148,
+      "grad_norm": 1.0109599828720093,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.3653,
+      "step": 43
+    },
+    {
+      "epoch": 0.061484716157205244,
+      "grad_norm": 0.6494663953781128,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.1998,
+      "step": 44
+    },
+    {
+      "epoch": 0.062882096069869,
+      "grad_norm": 0.838367760181427,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.2945,
+      "step": 45
+    },
+    {
+      "epoch": 0.06427947598253275,
+      "grad_norm": 0.8463190197944641,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.3269,
+      "step": 46
+    },
+    {
+      "epoch": 0.0656768558951965,
+      "grad_norm": 0.968761146068573,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.2588,
+      "step": 47
+    },
+    {
+      "epoch": 0.06707423580786026,
+      "grad_norm": 0.9924123883247375,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.3954,
+      "step": 48
+    },
+    {
+      "epoch": 0.06847161572052402,
+      "grad_norm": 1.1434500217437744,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.3334,
+      "step": 49
+    },
+    {
+      "epoch": 0.06986899563318777,
+      "grad_norm": 1.3751486539840698,
+      "learning_rate": 0.0,
+      "loss": 0.4089,
+      "step": 50
+    },
+    {
+      "epoch": 0.06986899563318777,
+      "eval_loss": 0.3099195957183838,
+      "eval_runtime": 372.952,
+      "eval_samples_per_second": 12.927,
+      "eval_steps_per_second": 1.617,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.631381723512832e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null