Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1fd5f98b3e17d941f9701353b228c6d88f9e6bf9054c98df40ac5b60a55cbb9
 size 156926880

 version https://git-lfs.github.com/spec/v1
+oid sha256:b19a4bd63735bae0361d853190ee296bee73d0a33cc0d7c22ab1e35cad518e66
 size 156926880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbcc6bb23698617e22733b919c4e25478fdfcfd6761b7bbd54e8b76dbfa4f299
 size 313998650

 version https://git-lfs.github.com/spec/v1
+oid sha256:13e56754efcc24b4ae0765ef96969880d4d3a3a1ab718a63a28664447f6847aa
 size 313998650

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6307aca18a4b137baed3aad950f77018aea901f3dd02c11ef68745bcb66d1b49
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:86cfdae43b4b41e02bc98e3f426b2a0bb0fe98ce868c875487cbca03a7ff6c06
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24fc866f9c695663205aa5b763b4c4c980836ac685256c69a484fb9a2fa2db59
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:248a2419ad4e2f085c117bf78dc170d59e352fc9f2c721de65ac936e83ca9a3e
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6a625d0e9f9f101fc8f7402d671d982148de09ac70fa61e9157c1fd55c2b697
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:49c426fbb4ebae255f34d0e1ef716e2f597b1b5743cabdcf656cfc258bb3fa41
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:419c87d977f89ed6e5b701b2efa34f1f5a280538e24958f8b11739436a38bc9d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4f869da5b9ce4ddfa841dd9b950fbd4c915471ef149bd02c66fcc1baffbc88b
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6161407232284546,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.41237113402061853,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 34.056,
       "eval_steps_per_second": 4.33,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.94731890081792e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5220625400543213,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.8247422680412371,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 34.056,
       "eval_steps_per_second": 4.33,
       "step": 25
+    },
+    {
+      "epoch": 0.4288659793814433,
+      "grad_norm": 22.994813919067383,
+      "learning_rate": 5e-05,
+      "loss": 0.8178,
+      "step": 26
+    },
+    {
+      "epoch": 0.44536082474226807,
+      "grad_norm": 7.838329792022705,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.5679,
+      "step": 27
+    },
+    {
+      "epoch": 0.4618556701030928,
+      "grad_norm": 10.032183647155762,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.6159,
+      "step": 28
+    },
+    {
+      "epoch": 0.47835051546391755,
+      "grad_norm": 10.698407173156738,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.6122,
+      "step": 29
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 10.815820693969727,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.5697,
+      "step": 30
+    },
+    {
+      "epoch": 0.511340206185567,
+      "grad_norm": 11.5973539352417,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.582,
+      "step": 31
+    },
+    {
+      "epoch": 0.5278350515463918,
+      "grad_norm": 15.79391098022461,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.6156,
+      "step": 32
+    },
+    {
+      "epoch": 0.5443298969072164,
+      "grad_norm": 12.590981483459473,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.5863,
+      "step": 33
+    },
+    {
+      "epoch": 0.5608247422680412,
+      "grad_norm": 5.945058822631836,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.526,
+      "step": 34
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 5.012986183166504,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.5008,
+      "step": 35
+    },
+    {
+      "epoch": 0.5938144329896907,
+      "grad_norm": 1.1593012809753418,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.4249,
+      "step": 36
+    },
+    {
+      "epoch": 0.6103092783505155,
+      "grad_norm": 2.0200862884521484,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.4172,
+      "step": 37
+    },
+    {
+      "epoch": 0.6268041237113402,
+      "grad_norm": 8.196489334106445,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.6267,
+      "step": 38
+    },
+    {
+      "epoch": 0.643298969072165,
+      "grad_norm": 16.858001708984375,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.726,
+      "step": 39
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 12.409262657165527,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.6533,
+      "step": 40
+    },
+    {
+      "epoch": 0.6762886597938145,
+      "grad_norm": 12.153261184692383,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.5928,
+      "step": 41
+    },
+    {
+      "epoch": 0.6927835051546392,
+      "grad_norm": 7.903191566467285,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.5294,
+      "step": 42
+    },
+    {
+      "epoch": 0.709278350515464,
+      "grad_norm": 5.494553089141846,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.5219,
+      "step": 43
+    },
+    {
+      "epoch": 0.7257731958762886,
+      "grad_norm": 2.8553881645202637,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.5303,
+      "step": 44
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 6.593286037445068,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.5062,
+      "step": 45
+    },
+    {
+      "epoch": 0.7587628865979381,
+      "grad_norm": 7.074839115142822,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.531,
+      "step": 46
+    },
+    {
+      "epoch": 0.7752577319587629,
+      "grad_norm": 3.631089210510254,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.5295,
+      "step": 47
+    },
+    {
+      "epoch": 0.7917525773195876,
+      "grad_norm": 8.73243522644043,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.5199,
+      "step": 48
+    },
+    {
+      "epoch": 0.8082474226804124,
+      "grad_norm": 13.886251449584961,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.5525,
+      "step": 49
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 16.242029190063477,
+      "learning_rate": 0.0,
+      "loss": 0.6324,
+      "step": 50
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "eval_loss": 0.5220625400543213,
+      "eval_runtime": 12.0024,
+      "eval_samples_per_second": 34.076,
+      "eval_steps_per_second": 4.332,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.589463780163584e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null