Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcec14500c78912db43f92ea4988a2499175c417ecd13505175636110db4b091
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:425ba082c9856afd0cd9d29774b2151c22e3a220ccc441e5a801d9a1ea98a89e
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b9fc49a3d0a84e71ce888d3e72a4bfc24c054849f335bfeec3945d816e0be3b
 size 1980078

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6c9c5fa5dc2a758ffd24df8413238399ea435ace020171b437d03d54344f5ef
 size 1980078

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45d7eafd179fc78f473f0c9cfa0de0165288453578727daedea2c94bf8b0269c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:002018d787452eaa3dee44fbd4c21ec861914789e6a61001902df8148068dc88
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.678608894348145,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.07598784194528875,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 472.904,
       "eval_steps_per_second": 59.326,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6001709285376.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.644861221313477,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.1519756838905775,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 472.904,
       "eval_steps_per_second": 59.326,
       "step": 50
+    },
+    {
+      "epoch": 0.07750759878419453,
+      "grad_norm": 1.7174934148788452,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 42.8151,
+      "step": 51
+    },
+    {
+      "epoch": 0.0790273556231003,
+      "grad_norm": 1.5663914680480957,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 42.7887,
+      "step": 52
+    },
+    {
+      "epoch": 0.08054711246200608,
+      "grad_norm": 1.557456374168396,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 42.7401,
+      "step": 53
+    },
+    {
+      "epoch": 0.08206686930091185,
+      "grad_norm": 1.5269972085952759,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 42.7873,
+      "step": 54
+    },
+    {
+      "epoch": 0.08358662613981763,
+      "grad_norm": 1.5041111707687378,
+      "learning_rate": 5e-05,
+      "loss": 42.7403,
+      "step": 55
+    },
+    {
+      "epoch": 0.0851063829787234,
+      "grad_norm": 1.5063508749008179,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 42.7848,
+      "step": 56
+    },
+    {
+      "epoch": 0.08662613981762918,
+      "grad_norm": 1.389691710472107,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 42.7435,
+      "step": 57
+    },
+    {
+      "epoch": 0.08814589665653495,
+      "grad_norm": 1.4456936120986938,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 42.6884,
+      "step": 58
+    },
+    {
+      "epoch": 0.08966565349544073,
+      "grad_norm": 1.3679536581039429,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 42.6868,
+      "step": 59
+    },
+    {
+      "epoch": 0.0911854103343465,
+      "grad_norm": 1.297853946685791,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 42.7028,
+      "step": 60
+    },
+    {
+      "epoch": 0.09270516717325228,
+      "grad_norm": 1.2528061866760254,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 42.6757,
+      "step": 61
+    },
+    {
+      "epoch": 0.09422492401215805,
+      "grad_norm": 1.2463070154190063,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 42.7206,
+      "step": 62
+    },
+    {
+      "epoch": 0.09574468085106383,
+      "grad_norm": 1.2156941890716553,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 42.6653,
+      "step": 63
+    },
+    {
+      "epoch": 0.0972644376899696,
+      "grad_norm": 1.2177422046661377,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 42.6178,
+      "step": 64
+    },
+    {
+      "epoch": 0.09878419452887538,
+      "grad_norm": 1.1524884700775146,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 42.6053,
+      "step": 65
+    },
+    {
+      "epoch": 0.10030395136778116,
+      "grad_norm": 1.1051671504974365,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 42.6853,
+      "step": 66
+    },
+    {
+      "epoch": 0.10182370820668693,
+      "grad_norm": 1.069922685623169,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 42.6521,
+      "step": 67
+    },
+    {
+      "epoch": 0.1033434650455927,
+      "grad_norm": 1.0806479454040527,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 42.6595,
+      "step": 68
+    },
+    {
+      "epoch": 0.10486322188449848,
+      "grad_norm": 1.1521317958831787,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 42.6343,
+      "step": 69
+    },
+    {
+      "epoch": 0.10638297872340426,
+      "grad_norm": 1.2330914735794067,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 42.6126,
+      "step": 70
+    },
+    {
+      "epoch": 0.10790273556231003,
+      "grad_norm": 1.2084128856658936,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 42.57,
+      "step": 71
+    },
+    {
+      "epoch": 0.1094224924012158,
+      "grad_norm": 1.0930789709091187,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 42.609,
+      "step": 72
+    },
+    {
+      "epoch": 0.11094224924012158,
+      "grad_norm": 1.0117900371551514,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 42.6595,
+      "step": 73
+    },
+    {
+      "epoch": 0.11246200607902736,
+      "grad_norm": 1.211604118347168,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 42.6429,
+      "step": 74
+    },
+    {
+      "epoch": 0.11398176291793313,
+      "grad_norm": 1.430601716041565,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 42.5467,
+      "step": 75
+    },
+    {
+      "epoch": 0.11550151975683891,
+      "grad_norm": 1.2758630514144897,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 42.6039,
+      "step": 76
+    },
+    {
+      "epoch": 0.11702127659574468,
+      "grad_norm": 1.0489394664764404,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 42.5928,
+      "step": 77
+    },
+    {
+      "epoch": 0.11854103343465046,
+      "grad_norm": 1.3841549158096313,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 42.5912,
+      "step": 78
+    },
+    {
+      "epoch": 0.12006079027355623,
+      "grad_norm": 1.5512932538986206,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 42.5429,
+      "step": 79
+    },
+    {
+      "epoch": 0.12158054711246201,
+      "grad_norm": 1.3741813898086548,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 42.5487,
+      "step": 80
+    },
+    {
+      "epoch": 0.12310030395136778,
+      "grad_norm": 1.1332428455352783,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 42.6317,
+      "step": 81
+    },
+    {
+      "epoch": 0.12462006079027356,
+      "grad_norm": 1.4934285879135132,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 42.533,
+      "step": 82
+    },
+    {
+      "epoch": 0.12613981762917933,
+      "grad_norm": 1.4491100311279297,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 42.5128,
+      "step": 83
+    },
+    {
+      "epoch": 0.1276595744680851,
+      "grad_norm": 1.6558226346969604,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 42.4972,
+      "step": 84
+    },
+    {
+      "epoch": 0.12917933130699089,
+      "grad_norm": 1.6108096837997437,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 42.4374,
+      "step": 85
+    },
+    {
+      "epoch": 0.13069908814589665,
+      "grad_norm": 1.3853203058242798,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 42.5671,
+      "step": 86
+    },
+    {
+      "epoch": 0.13221884498480244,
+      "grad_norm": 1.7137513160705566,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 42.4684,
+      "step": 87
+    },
+    {
+      "epoch": 0.1337386018237082,
+      "grad_norm": 1.5326465368270874,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 42.4793,
+      "step": 88
+    },
+    {
+      "epoch": 0.135258358662614,
+      "grad_norm": 1.5756134986877441,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 42.4665,
+      "step": 89
+    },
+    {
+      "epoch": 0.13677811550151975,
+      "grad_norm": 1.8401323556900024,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 42.4228,
+      "step": 90
+    },
+    {
+      "epoch": 0.13829787234042554,
+      "grad_norm": 2.1934590339660645,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 42.5021,
+      "step": 91
+    },
+    {
+      "epoch": 0.1398176291793313,
+      "grad_norm": 4.079236030578613,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 42.6783,
+      "step": 92
+    },
+    {
+      "epoch": 0.1413373860182371,
+      "grad_norm": 4.929635047912598,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 42.678,
+      "step": 93
+    },
+    {
+      "epoch": 0.14285714285714285,
+      "grad_norm": 4.8210248947143555,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 42.6223,
+      "step": 94
+    },
+    {
+      "epoch": 0.14437689969604864,
+      "grad_norm": 4.764625549316406,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 42.6935,
+      "step": 95
+    },
+    {
+      "epoch": 0.1458966565349544,
+      "grad_norm": 4.984578609466553,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 42.5576,
+      "step": 96
+    },
+    {
+      "epoch": 0.1474164133738602,
+      "grad_norm": 5.2452263832092285,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 42.6357,
+      "step": 97
+    },
+    {
+      "epoch": 0.14893617021276595,
+      "grad_norm": 5.212264060974121,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 42.4783,
+      "step": 98
+    },
+    {
+      "epoch": 0.15045592705167174,
+      "grad_norm": 4.950127601623535,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 42.6065,
+      "step": 99
+    },
+    {
+      "epoch": 0.1519756838905775,
+      "grad_norm": 6.176753997802734,
+      "learning_rate": 0.0,
+      "loss": 42.3882,
+      "step": 100
+    },
+    {
+      "epoch": 0.1519756838905775,
+      "eval_loss": 10.644861221313477,
+      "eval_runtime": 2.3512,
+      "eval_samples_per_second": 471.254,
+      "eval_steps_per_second": 59.119,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 12003418570752.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null