Training in progress, step 44, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +137 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1894b6af04430ff9121e8f13d8e67e22f52f216a80ebc84610dcf4f5f6becbf4
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:12632d90567c6fde882bded6358e6c7b4a71fee2957475178fb9c0af69fa8321
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9425d49c8dea4513e77c415341076f8a409d0e4d458dc5447f6a674f953d93e5
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:fccf5d9f789ea7ddab5d0a51cde0a20ed4c88e8b1c64fc584b79453f5925d7cd
 size 212298

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e00426f9d312d38fed4d5a7dae87d688d728cdb569d7f6c595182f9d7399a424
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e2143bcf1389d49defafe207e5f58209b31462af91b67491b2fb516ae60fcad
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:902b764ae55b23a576245e078964502cec38db947846cdef44ae392316ef4ed5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cab0248e66639a677f0106f20189400dd6c5f74bd38c8c9c30d4fd7a40a33ce4
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f93b92f147ef611e1352967d1926c27c56a1525be330c852425970620f438f7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b996f258d134c6ce736e1f079e5c482d6edcc3f188922b9dd1512377e051d6d8
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:733ccd530505e32e37c5ef26efc8c5ba16646bd3cdcc1303471187a82694bbe5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:6970a5a80bba446342ce17448005922da7ac7f5f280df59e0757d3f81ca6fd0f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c90b2d8623eacbca7de7c3a4e7890d40417c2144f0746860a87974700b51740c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:74384ee4f4d516692f488cd2897f31d93127b8f052b6b6c3deb94eb2492306b3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.756828308105469,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.7391304347826086,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,139 @@
       "eval_samples_per_second": 230.322,
       "eval_steps_per_second": 30.868,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +354,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 81943068672000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.756828308105469,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.0608695652173914,
   "eval_steps": 25,
+  "global_step": 44,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 230.322,
       "eval_steps_per_second": 30.868,
       "step": 25
+    },
+    {
+      "epoch": 1.808695652173913,
+      "grad_norm": 0.1343703418970108,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 14.1249,
+      "step": 26
+    },
+    {
+      "epoch": 1.8782608695652174,
+      "grad_norm": 0.11429044604301453,
+      "learning_rate": 3.5262241279454785e-05,
+      "loss": 11.6902,
+      "step": 27
+    },
+    {
+      "epoch": 1.9478260869565216,
+      "grad_norm": 0.13081665337085724,
+      "learning_rate": 3.173294878168025e-05,
+      "loss": 12.252,
+      "step": 28
+    },
+    {
+      "epoch": 2.017391304347826,
+      "grad_norm": 0.18295542895793915,
+      "learning_rate": 2.8305813044122097e-05,
+      "loss": 19.1624,
+      "step": 29
+    },
+    {
+      "epoch": 2.0869565217391304,
+      "grad_norm": 0.11374109983444214,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 11.1943,
+      "step": 30
+    },
+    {
+      "epoch": 2.1565217391304348,
+      "grad_norm": 0.1199745312333107,
+      "learning_rate": 2.1833997096818898e-05,
+      "loss": 11.6884,
+      "step": 31
+    },
+    {
+      "epoch": 2.226086956521739,
+      "grad_norm": 0.10758765786886215,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 9.9215,
+      "step": 32
+    },
+    {
+      "epoch": 2.2956521739130435,
+      "grad_norm": 0.13169817626476288,
+      "learning_rate": 1.599136311145402e-05,
+      "loss": 13.5852,
+      "step": 33
+    },
+    {
+      "epoch": 2.365217391304348,
+      "grad_norm": 0.12861192226409912,
+      "learning_rate": 1.3347406408508695e-05,
+      "loss": 11.76,
+      "step": 34
+    },
+    {
+      "epoch": 2.4347826086956523,
+      "grad_norm": 0.12590602040290833,
+      "learning_rate": 1.090842587659851e-05,
+      "loss": 11.9853,
+      "step": 35
+    },
+    {
+      "epoch": 2.5043478260869563,
+      "grad_norm": 0.13119211792945862,
+      "learning_rate": 8.688061284200266e-06,
+      "loss": 12.1627,
+      "step": 36
+    },
+    {
+      "epoch": 2.573913043478261,
+      "grad_norm": 0.1124102994799614,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 11.1063,
+      "step": 37
+    },
+    {
+      "epoch": 2.643478260869565,
+      "grad_norm": 0.12109724432229996,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 11.8014,
+      "step": 38
+    },
+    {
+      "epoch": 2.7130434782608694,
+      "grad_norm": 0.11037920415401459,
+      "learning_rate": 3.4563125677897932e-06,
+      "loss": 9.9505,
+      "step": 39
+    },
+    {
+      "epoch": 2.782608695652174,
+      "grad_norm": 0.13785330951213837,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 13.5303,
+      "step": 40
+    },
+    {
+      "epoch": 2.8521739130434782,
+      "grad_norm": 0.1192273274064064,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 11.7184,
+      "step": 41
+    },
+    {
+      "epoch": 2.9217391304347826,
+      "grad_norm": 0.13213324546813965,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 12.025,
+      "step": 42
+    },
+    {
+      "epoch": 2.991304347826087,
+      "grad_norm": 0.17722098529338837,
+      "learning_rate": 1.3981014094099353e-07,
+      "loss": 17.698,
+      "step": 43
+    },
+    {
+      "epoch": 3.0608695652173914,
+      "grad_norm": 0.13186267018318176,
+      "learning_rate": 0.0,
+      "loss": 12.9099,
+      "step": 44
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 144219800862720.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null