Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5ef3beb9448d72cca91d94dfe6a496c55f1eeb8999d0eaa8713ad4c2713c966
 size 144748392

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e1ddf20e43ee498cc289f2bad036aa9eb0970219206ea831c5581b208af2226
 size 144748392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18cd7e8f7810a66c037ca385abba7a1212abce264c65faa9cb6e38bdb05aedd3
 size 73877972

 version https://git-lfs.github.com/spec/v1
+oid sha256:800b02184e3f2fd61d964ff1a7bb0ea20318449052b1c3837ad59923d32c1d68
 size 73877972

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38c3ff7e6b44015b9baf9a104f355f5f950f6a24cd408cba2777c3c7df222047
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f7e4d7d2b0c8aa3af21365f6f0926784e1d68c844ac2fbc1cc56728a1f7c21d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe0a47cc675a5d4ba5fe8c0d42564476e1fe842799977ab67bf2a8317adef53f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.511861264705658,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.08089387734465847,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -327,6 +327,84 @@
       "eval_samples_per_second": 25.263,
       "eval_steps_per_second": 6.316,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -355,7 +433,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.8399454524604416e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5024861097335815,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.10111734668082309,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.263,
       "eval_steps_per_second": 6.316,
       "step": 400
+    },
+    {
+      "epoch": 0.08291622427827494,
+      "grad_norm": 0.16395606100559235,
+      "learning_rate": 4.695790918802576e-05,
+      "loss": 0.6875,
+      "step": 410
+    },
+    {
+      "epoch": 0.0849385712118914,
+      "grad_norm": 0.53562331199646,
+      "learning_rate": 4.252125897855932e-05,
+      "loss": 0.5775,
+      "step": 420
+    },
+    {
+      "epoch": 0.08696091814550787,
+      "grad_norm": 0.15994961559772491,
+      "learning_rate": 3.824753850538082e-05,
+      "loss": 0.5303,
+      "step": 430
+    },
+    {
+      "epoch": 0.08898326507912432,
+      "grad_norm": 0.2935360074043274,
+      "learning_rate": 3.414886209349615e-05,
+      "loss": 0.5177,
+      "step": 440
+    },
+    {
+      "epoch": 0.09100561201274078,
+      "grad_norm": 0.4813726544380188,
+      "learning_rate": 3.0236847886501542e-05,
+      "loss": 0.2539,
+      "step": 450
+    },
+    {
+      "epoch": 0.09302795894635725,
+      "grad_norm": 0.15252335369586945,
+      "learning_rate": 2.6522584913693294e-05,
+      "loss": 0.6666,
+      "step": 460
+    },
+    {
+      "epoch": 0.0950503058799737,
+      "grad_norm": 0.3821258246898651,
+      "learning_rate": 2.301660165700936e-05,
+      "loss": 0.5569,
+      "step": 470
+    },
+    {
+      "epoch": 0.09707265281359018,
+      "grad_norm": 0.14530214667320251,
+      "learning_rate": 1.9728836206903656e-05,
+      "loss": 0.5426,
+      "step": 480
+    },
+    {
+      "epoch": 0.09909499974720663,
+      "grad_norm": 0.28957322239875793,
+      "learning_rate": 1.6668608091748495e-05,
+      "loss": 0.5386,
+      "step": 490
+    },
+    {
+      "epoch": 0.10111734668082309,
+      "grad_norm": 0.19388361275196075,
+      "learning_rate": 1.3844591860619383e-05,
+      "loss": 0.2322,
+      "step": 500
+    },
+    {
+      "epoch": 0.10111734668082309,
+      "eval_loss": 0.5024861097335815,
+      "eval_runtime": 330.0244,
+      "eval_samples_per_second": 25.234,
+      "eval_steps_per_second": 6.309,
+      "step": 500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.2973010092752896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null