Training in progress, step 100, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:393d2c945c5d602a687554d138228dd53a8117a45025e8a02dbc8674ac0a79f1
 size 524363632

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e5a3cbcfaf8ace7edd8f4262ce2ad731f55c7e6eaaebfd9931269aa391a4621
 size 524363632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46be39a3ed370c9cca16e6446fab3c33941e2cd8f262e8079306a339425ca6c2
 size 1049049122

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfae43722b1c9d679b1312744d48e26e273949df5558fc6cb09425d26a80affb
 size 1049049122

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7053778785f125e6386d20f76a7937458427ab75dc4764291ca3c69546150ab
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:345c670b13d87f8d3f915df7b81226d1a83aa0c401d9d0d6b21af02cbc8bf96a
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cda74f9f163dce7fb466c901b271d28b633378e115bdd5df99b9e9ce265beb9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bec207671a1ac823cd532380a7f11afc3efda11a0ff8b8aed036ee6d2ec24c79
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea79d29c59e466d51c06f6f308488133de30228a7db23bccbfc06a411ae6f41f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6daec216da31e255cb4d94d1eb020b9def764d324afcbd1bfa26e14f998b3fe
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28e635985ee7e82960dc5f04dcbec7fa8c52ff7223a5f7d82aed9806a4d4d3ea
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbf631c2ca8f1999470ed4e1ee8bf0abad957680b38fda9c4082a6e305ed1e12
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8d5f446d81df8b8e6d5d3423a874831fec3d08bffbe4980db71b54ceb3e7bd4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8243863582611084,
-  "best_model_checkpoint": "miner_id_24/checkpoint-75",
-  "epoch": 0.06680026720106881,
   "eval_steps": 25,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -564,6 +564,189 @@
       "eval_samples_per_second": 21.628,
       "eval_steps_per_second": 1.352,
       "step": 75
     }
   ],
   "logging_steps": 1,
@@ -587,12 +770,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.285825216105677e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8063623309135437,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0890670229347584,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.628,
       "eval_steps_per_second": 1.352,
       "step": 75
+    },
+    {
+      "epoch": 0.06769093743041639,
+      "grad_norm": 17.87114143371582,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 1.0139,
+      "step": 76
+    },
+    {
+      "epoch": 0.06858160765976397,
+      "grad_norm": 15.511335372924805,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.8656,
+      "step": 77
+    },
+    {
+      "epoch": 0.06947227788911156,
+      "grad_norm": 20.296796798706055,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.8372,
+      "step": 78
+    },
+    {
+      "epoch": 0.07036294811845914,
+      "grad_norm": 18.298046112060547,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.8844,
+      "step": 79
+    },
+    {
+      "epoch": 0.07125361834780672,
+      "grad_norm": 17.071407318115234,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.8669,
+      "step": 80
+    },
+    {
+      "epoch": 0.07214428857715431,
+      "grad_norm": 16.053665161132812,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.7517,
+      "step": 81
+    },
+    {
+      "epoch": 0.07303495880650189,
+      "grad_norm": 19.35648536682129,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.7972,
+      "step": 82
+    },
+    {
+      "epoch": 0.07392562903584948,
+      "grad_norm": 17.407955169677734,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.7013,
+      "step": 83
+    },
+    {
+      "epoch": 0.07481629926519706,
+      "grad_norm": 18.117521286010742,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.641,
+      "step": 84
+    },
+    {
+      "epoch": 0.07570696949454464,
+      "grad_norm": 18.53291893005371,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.7497,
+      "step": 85
+    },
+    {
+      "epoch": 0.07659763972389223,
+      "grad_norm": 21.79059410095215,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.829,
+      "step": 86
+    },
+    {
+      "epoch": 0.07748830995323981,
+      "grad_norm": 19.699893951416016,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.8702,
+      "step": 87
+    },
+    {
+      "epoch": 0.07837898018258739,
+      "grad_norm": 18.76403045654297,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 1.07,
+      "step": 88
+    },
+    {
+      "epoch": 0.07926965041193498,
+      "grad_norm": 17.980998992919922,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 1.0136,
+      "step": 89
+    },
+    {
+      "epoch": 0.08016032064128256,
+      "grad_norm": 17.479143142700195,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.7321,
+      "step": 90
+    },
+    {
+      "epoch": 0.08105099087063015,
+      "grad_norm": 16.1824951171875,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.7568,
+      "step": 91
+    },
+    {
+      "epoch": 0.08194166109997773,
+      "grad_norm": 17.499082565307617,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.8011,
+      "step": 92
+    },
+    {
+      "epoch": 0.08283233132932531,
+      "grad_norm": 17.437572479248047,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.7144,
+      "step": 93
+    },
+    {
+      "epoch": 0.0837230015586729,
+      "grad_norm": 19.064252853393555,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.7502,
+      "step": 94
+    },
+    {
+      "epoch": 0.08461367178802048,
+      "grad_norm": 18.436866760253906,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.7259,
+      "step": 95
+    },
+    {
+      "epoch": 0.08550434201736808,
+      "grad_norm": 16.668874740600586,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.764,
+      "step": 96
+    },
+    {
+      "epoch": 0.08639501224671566,
+      "grad_norm": 18.206295013427734,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.7036,
+      "step": 97
+    },
+    {
+      "epoch": 0.08728568247606323,
+      "grad_norm": 20.8583927154541,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.6996,
+      "step": 98
+    },
+    {
+      "epoch": 0.08817635270541083,
+      "grad_norm": 17.43104362487793,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.8025,
+      "step": 99
+    },
+    {
+      "epoch": 0.0890670229347584,
+      "grad_norm": 22.67059898376465,
+      "learning_rate": 0.0,
+      "loss": 0.8697,
+      "step": 100
+    },
+    {
+      "epoch": 0.0890670229347584,
+      "eval_loss": 0.8063623309135437,
+      "eval_runtime": 698.2614,
+      "eval_samples_per_second": 21.665,
+      "eval_steps_per_second": 1.355,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.3811002881409024e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null