Training in progress, step 128, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +200 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0349df464b4639cd3b912ec1b2ac00bffd66c29cea313e57b29682cd54e5c7e3
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d601d43dade33badf98bf0461027583958dd8667ff02874ea062b250cd9db81
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5ecb111f4b4668d7e476fa38b96190cc9a21631b7cf5344f19938dd2c19747f
 size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6c793b1cb9548fd561c88d3ad584df110322f62b86194ea29aeedbd50f780a3
 size 85723284

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a990d0f6ec1e99435be5e64ba855d80d12af86cad358e24e7e8cc8741b51a9a7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0810b5b5d390c1667c8d6c6351c2b0f8ee6d357396bbdeb5cbca15c6b011b798
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c54d816848a62c27523c4718d987d5248a2b166cfdaf873e405061cb5285a5d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:968a6dcbda34982ad43f2af2e04e5edf94e043c521201e71b1583695497d18e0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.24994200468063354,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.7839294463498285,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,202 @@
       "eval_samples_per_second": 7.183,
       "eval_steps_per_second": 1.804,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +950,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.627509890613248e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.24994200468063354,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.006859382655561,
   "eval_steps": 50,
+  "global_step": 128,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.183,
       "eval_steps_per_second": 1.804,
       "step": 100
+    },
+    {
+      "epoch": 0.7917687408133268,
+      "grad_norm": 4.1459736824035645,
+      "learning_rate": 2.4742923014386156e-05,
+      "loss": 4.0253,
+      "step": 101
+    },
+    {
+      "epoch": 0.7996080352768251,
+      "grad_norm": 4.166021347045898,
+      "learning_rate": 2.301660165700936e-05,
+      "loss": 4.1193,
+      "step": 102
+    },
+    {
+      "epoch": 0.8074473297403234,
+      "grad_norm": 4.5724639892578125,
+      "learning_rate": 2.1344844419735755e-05,
+      "loss": 4.1613,
+      "step": 103
+    },
+    {
+      "epoch": 0.8152866242038217,
+      "grad_norm": 3.7771105766296387,
+      "learning_rate": 1.9728836206903656e-05,
+      "loss": 3.577,
+      "step": 104
+    },
+    {
+      "epoch": 0.82312591866732,
+      "grad_norm": 4.427511215209961,
+      "learning_rate": 1.8169722409183097e-05,
+      "loss": 4.0308,
+      "step": 105
+    },
+    {
+      "epoch": 0.8309652131308183,
+      "grad_norm": 4.253128528594971,
+      "learning_rate": 1.6668608091748495e-05,
+      "loss": 4.358,
+      "step": 106
+    },
+    {
+      "epoch": 0.8388045075943165,
+      "grad_norm": 3.6124472618103027,
+      "learning_rate": 1.522655721103291e-05,
+      "loss": 3.8505,
+      "step": 107
+    },
+    {
+      "epoch": 0.8466438020578148,
+      "grad_norm": 4.112203598022461,
+      "learning_rate": 1.3844591860619383e-05,
+      "loss": 3.9511,
+      "step": 108
+    },
+    {
+      "epoch": 0.854483096521313,
+      "grad_norm": 3.7493703365325928,
+      "learning_rate": 1.2523691546803873e-05,
+      "loss": 3.8253,
+      "step": 109
+    },
+    {
+      "epoch": 0.8623223909848113,
+      "grad_norm": 4.115346431732178,
+      "learning_rate": 1.1264792494342857e-05,
+      "loss": 3.8934,
+      "step": 110
+    },
+    {
+      "epoch": 0.8701616854483096,
+      "grad_norm": 4.116607666015625,
+      "learning_rate": 1.0068786982878087e-05,
+      "loss": 3.7177,
+      "step": 111
+    },
+    {
+      "epoch": 0.8780009799118079,
+      "grad_norm": 3.9183499813079834,
+      "learning_rate": 8.936522714508678e-06,
+      "loss": 3.8943,
+      "step": 112
+    },
+    {
+      "epoch": 0.8858402743753062,
+      "grad_norm": 4.0384039878845215,
+      "learning_rate": 7.868802212958703e-06,
+      "loss": 3.4521,
+      "step": 113
+    },
+    {
+      "epoch": 0.8936795688388045,
+      "grad_norm": 4.025205612182617,
+      "learning_rate": 6.866382254766157e-06,
+      "loss": 3.8196,
+      "step": 114
+    },
+    {
+      "epoch": 0.9015188633023028,
+      "grad_norm": 4.01348352432251,
+      "learning_rate": 5.929973332896677e-06,
+      "loss": 3.9242,
+      "step": 115
+    },
+    {
+      "epoch": 0.9093581577658011,
+      "grad_norm": 4.146557331085205,
+      "learning_rate": 5.060239153161872e-06,
+      "loss": 4.0043,
+      "step": 116
+    },
+    {
+      "epoch": 0.9171974522292994,
+      "grad_norm": 4.394860744476318,
+      "learning_rate": 4.257796163799455e-06,
+      "loss": 3.8837,
+      "step": 117
+    },
+    {
+      "epoch": 0.9250367466927977,
+      "grad_norm": 4.56512451171875,
+      "learning_rate": 3.5232131185484076e-06,
+      "loss": 4.3846,
+      "step": 118
+    },
+    {
+      "epoch": 0.932876041156296,
+      "grad_norm": 3.989962577819824,
+      "learning_rate": 2.857010673529015e-06,
+      "loss": 3.3769,
+      "step": 119
+    },
+    {
+      "epoch": 0.9407153356197943,
+      "grad_norm": 4.115790843963623,
+      "learning_rate": 2.259661018213333e-06,
+      "loss": 3.7523,
+      "step": 120
+    },
+    {
+      "epoch": 0.9485546300832925,
+      "grad_norm": 4.354365348815918,
+      "learning_rate": 1.7315875407479032e-06,
+      "loss": 3.7809,
+      "step": 121
+    },
+    {
+      "epoch": 0.9563939245467908,
+      "grad_norm": 4.128818035125732,
+      "learning_rate": 1.2731645278655445e-06,
+      "loss": 3.8053,
+      "step": 122
+    },
+    {
+      "epoch": 0.964233219010289,
+      "grad_norm": 4.223034858703613,
+      "learning_rate": 8.847168995992916e-07,
+      "loss": 3.5431,
+      "step": 123
+    },
+    {
+      "epoch": 0.9720725134737873,
+      "grad_norm": 4.52358341217041,
+      "learning_rate": 5.665199789862907e-07,
+      "loss": 4.4289,
+      "step": 124
+    },
+    {
+      "epoch": 0.9799118079372856,
+      "grad_norm": 3.970877170562744,
+      "learning_rate": 3.1879929692498757e-07,
+      "loss": 3.9163,
+      "step": 125
+    },
+    {
+      "epoch": 0.9877511024007839,
+      "grad_norm": 4.14064359664917,
+      "learning_rate": 1.4173043232380557e-07,
+      "loss": 3.9214,
+      "step": 126
+    },
+    {
+      "epoch": 0.9955903968642822,
+      "grad_norm": 3.948699712753296,
+      "learning_rate": 3.5438887654737355e-08,
+      "loss": 4.2029,
+      "step": 127
+    },
+    {
+      "epoch": 1.006859382655561,
+      "grad_norm": 3.799189567565918,
+      "learning_rate": 0.0,
+      "loss": 3.387,
+      "step": 128
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.203212659984957e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null