Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8e23203049c5d18063f2a635c872379448686ef927b9481255e2fc3aaac1b50
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:70097065aaee7cce2f51c4d8c00e10116135a823af02692b44082a823a1dd1ec
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cf066c18e340875252a50addf2bf6671957204750c25b0152c1dd4ef1247dad
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:f205e6c2aa483c3ffb51c9a819942b6d288d13882a8bcd3e9b205a976177f7ce
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9cb139bc76f1ac9762675e942777eeba992825192e47b629184dfcd772f640a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0daf361b742a94c3f89fb07a30f370fad356787507ba63c4649ec31f20c12b5c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece3236edcb393fe9fe067fb4c27aaaf2d1a125595517cb84b2456d9b62475c0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.5429913997650146,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.009394964299135663,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 10.82,
       "eval_steps_per_second": 5.41,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.442888253027123e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2692981958389282,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.018789928598271326,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.82,
       "eval_steps_per_second": 5.41,
       "step": 50
+    },
+    {
+      "epoch": 0.009582863585118376,
+      "grad_norm": 12.12710189819336,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 4.1207,
+      "step": 51
+    },
+    {
+      "epoch": 0.00977076287110109,
+      "grad_norm": 6.877650260925293,
+      "learning_rate": 7.754484907260513e-05,
+      "loss": 3.854,
+      "step": 52
+    },
+    {
+      "epoch": 0.009958662157083803,
+      "grad_norm": 3.922449827194214,
+      "learning_rate": 7.564496387029532e-05,
+      "loss": 3.875,
+      "step": 53
+    },
+    {
+      "epoch": 0.010146561443066516,
+      "grad_norm": 2.6158945560455322,
+      "learning_rate": 7.369343312364993e-05,
+      "loss": 3.8383,
+      "step": 54
+    },
+    {
+      "epoch": 0.01033446072904923,
+      "grad_norm": 2.6309781074523926,
+      "learning_rate": 7.169418695587791e-05,
+      "loss": 3.3104,
+      "step": 55
+    },
+    {
+      "epoch": 0.010522360015031942,
+      "grad_norm": 2.4806551933288574,
+      "learning_rate": 6.965125158269619e-05,
+      "loss": 3.6959,
+      "step": 56
+    },
+    {
+      "epoch": 0.010710259301014656,
+      "grad_norm": 2.6647136211395264,
+      "learning_rate": 6.756874120406714e-05,
+      "loss": 4.4607,
+      "step": 57
+    },
+    {
+      "epoch": 0.010898158586997369,
+      "grad_norm": 2.6696109771728516,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 5.5749,
+      "step": 58
+    },
+    {
+      "epoch": 0.011086057872980082,
+      "grad_norm": 2.808939218521118,
+      "learning_rate": 6.330184227833376e-05,
+      "loss": 4.7221,
+      "step": 59
+    },
+    {
+      "epoch": 0.011273957158962795,
+      "grad_norm": 2.9471914768218994,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 4.6405,
+      "step": 60
+    },
+    {
+      "epoch": 0.011461856444945509,
+      "grad_norm": 2.7732815742492676,
+      "learning_rate": 5.8927844739931834e-05,
+      "loss": 4.311,
+      "step": 61
+    },
+    {
+      "epoch": 0.011649755730928222,
+      "grad_norm": 2.873124599456787,
+      "learning_rate": 5.6711663290882776e-05,
+      "loss": 4.8382,
+      "step": 62
+    },
+    {
+      "epoch": 0.011837655016910935,
+      "grad_norm": 2.967327117919922,
+      "learning_rate": 5.448196544517168e-05,
+      "loss": 4.4859,
+      "step": 63
+    },
+    {
+      "epoch": 0.012025554302893648,
+      "grad_norm": 3.0220985412597656,
+      "learning_rate": 5.2243241517525754e-05,
+      "loss": 4.8879,
+      "step": 64
+    },
+    {
+      "epoch": 0.012213453588876362,
+      "grad_norm": 5.608536720275879,
+      "learning_rate": 5e-05,
+      "loss": 4.8269,
+      "step": 65
+    },
+    {
+      "epoch": 0.012401352874859075,
+      "grad_norm": 2.988602876663208,
+      "learning_rate": 4.775675848247427e-05,
+      "loss": 5.6113,
+      "step": 66
+    },
+    {
+      "epoch": 0.012589252160841788,
+      "grad_norm": 2.927661180496216,
+      "learning_rate": 4.551803455482833e-05,
+      "loss": 4.4028,
+      "step": 67
+    },
+    {
+      "epoch": 0.012777151446824501,
+      "grad_norm": 2.921163320541382,
+      "learning_rate": 4.328833670911724e-05,
+      "loss": 4.3565,
+      "step": 68
+    },
+    {
+      "epoch": 0.012965050732807215,
+      "grad_norm": 3.0370848178863525,
+      "learning_rate": 4.107215526006817e-05,
+      "loss": 4.7969,
+      "step": 69
+    },
+    {
+      "epoch": 0.013152950018789928,
+      "grad_norm": 3.3245177268981934,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 5.602,
+      "step": 70
+    },
+    {
+      "epoch": 0.013340849304772641,
+      "grad_norm": 3.5475261211395264,
+      "learning_rate": 3.6698157721666246e-05,
+      "loss": 4.6884,
+      "step": 71
+    },
+    {
+      "epoch": 0.013528748590755355,
+      "grad_norm": 3.5381057262420654,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 5.7957,
+      "step": 72
+    },
+    {
+      "epoch": 0.013716647876738068,
+      "grad_norm": 4.085561752319336,
+      "learning_rate": 3.243125879593286e-05,
+      "loss": 5.4665,
+      "step": 73
+    },
+    {
+      "epoch": 0.013904547162720781,
+      "grad_norm": 4.82763671875,
+      "learning_rate": 3.0348748417303823e-05,
+      "loss": 5.9452,
+      "step": 74
+    },
+    {
+      "epoch": 0.014092446448703494,
+      "grad_norm": 4.237555980682373,
+      "learning_rate": 2.8305813044122097e-05,
+      "loss": 5.4789,
+      "step": 75
+    },
+    {
+      "epoch": 0.014280345734686208,
+      "grad_norm": 4.894902229309082,
+      "learning_rate": 2.630656687635007e-05,
+      "loss": 6.3016,
+      "step": 76
+    },
+    {
+      "epoch": 0.01446824502066892,
+      "grad_norm": 4.798618316650391,
+      "learning_rate": 2.43550361297047e-05,
+      "loss": 5.9612,
+      "step": 77
+    },
+    {
+      "epoch": 0.014656144306651634,
+      "grad_norm": 7.736368179321289,
+      "learning_rate": 2.245515092739488e-05,
+      "loss": 6.1193,
+      "step": 78
+    },
+    {
+      "epoch": 0.014844043592634347,
+      "grad_norm": 5.555391788482666,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 6.2544,
+      "step": 79
+    },
+    {
+      "epoch": 0.01503194287861706,
+      "grad_norm": 6.320089817047119,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 5.6853,
+      "step": 80
+    },
+    {
+      "epoch": 0.015219842164599774,
+      "grad_norm": 5.559424877166748,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 4.8867,
+      "step": 81
+    },
+    {
+      "epoch": 0.015407741450582487,
+      "grad_norm": 5.159763813018799,
+      "learning_rate": 1.544686755065677e-05,
+      "loss": 5.3778,
+      "step": 82
+    },
+    {
+      "epoch": 0.0155956407365652,
+      "grad_norm": 5.190471649169922,
+      "learning_rate": 1.3860256808630428e-05,
+      "loss": 4.3495,
+      "step": 83
+    },
+    {
+      "epoch": 0.015783540022547914,
+      "grad_norm": 5.162275314331055,
+      "learning_rate": 1.2346426699819458e-05,
+      "loss": 4.7606,
+      "step": 84
+    },
+    {
+      "epoch": 0.01597143930853063,
+      "grad_norm": 5.0200347900390625,
+      "learning_rate": 1.090842587659851e-05,
+      "loss": 3.9641,
+      "step": 85
+    },
+    {
+      "epoch": 0.01615933859451334,
+      "grad_norm": 4.721124172210693,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 3.9436,
+      "step": 86
+    },
+    {
+      "epoch": 0.016347237880496055,
+      "grad_norm": 5.321893215179443,
+      "learning_rate": 8.271337313934869e-06,
+      "loss": 4.0494,
+      "step": 87
+    },
+    {
+      "epoch": 0.016535137166478767,
+      "grad_norm": 5.561007022857666,
+      "learning_rate": 7.077560319906695e-06,
+      "loss": 4.481,
+      "step": 88
+    },
+    {
+      "epoch": 0.01672303645246148,
+      "grad_norm": 6.137837886810303,
+      "learning_rate": 5.9702234071631e-06,
+      "loss": 4.0547,
+      "step": 89
+    },
+    {
+      "epoch": 0.016910935738444193,
+      "grad_norm": 6.851164817810059,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 5.9195,
+      "step": 90
+    },
+    {
+      "epoch": 0.017098835024426908,
+      "grad_norm": 6.387176036834717,
+      "learning_rate": 4.023611372427471e-06,
+      "loss": 6.3249,
+      "step": 91
+    },
+    {
+      "epoch": 0.01728673431040962,
+      "grad_norm": 8.208516120910645,
+      "learning_rate": 3.18825646801314e-06,
+      "loss": 7.7682,
+      "step": 92
+    },
+    {
+      "epoch": 0.017474633596392335,
+      "grad_norm": 8.880036354064941,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 7.4679,
+      "step": 93
+    },
+    {
+      "epoch": 0.017662532882375046,
+      "grad_norm": 9.490116119384766,
+      "learning_rate": 1.8018569652073381e-06,
+      "loss": 6.5789,
+      "step": 94
+    },
+    {
+      "epoch": 0.01785043216835776,
+      "grad_norm": 10.559647560119629,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 7.196,
+      "step": 95
+    },
+    {
+      "epoch": 0.018038331454340473,
+      "grad_norm": 10.684134483337402,
+      "learning_rate": 8.035205700685167e-07,
+      "loss": 5.8043,
+      "step": 96
+    },
+    {
+      "epoch": 0.018226230740323188,
+      "grad_norm": 9.896649360656738,
+      "learning_rate": 4.52511911603265e-07,
+      "loss": 5.7818,
+      "step": 97
+    },
+    {
+      "epoch": 0.0184141300263059,
+      "grad_norm": 10.521429061889648,
+      "learning_rate": 2.012853002380466e-07,
+      "loss": 5.4138,
+      "step": 98
+    },
+    {
+      "epoch": 0.018602029312288614,
+      "grad_norm": 12.07073974609375,
+      "learning_rate": 5.0346672934270534e-08,
+      "loss": 5.933,
+      "step": 99
+    },
+    {
+      "epoch": 0.018789928598271326,
+      "grad_norm": 15.48339557647705,
+      "learning_rate": 0.0,
+      "loss": 5.1382,
+      "step": 100
+    },
+    {
+      "epoch": 0.018789928598271326,
+      "eval_loss": 1.2692981958389282,
+      "eval_runtime": 828.1962,
+      "eval_samples_per_second": 10.824,
+      "eval_steps_per_second": 5.412,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6778451655380173e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null