Training in progress, epoch 2

Files changed (7) hide show

logs/events.out.tfevents.1719305887.852b1e905a9a.223.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99959e7256c57c0a7615a75a705d0c04ea51d6768974b0ec0a62b7fc9de73dfc
-size 5430

 version https://git-lfs.github.com/spec/v1
+oid sha256:3563337f111b88b6da01083577e56dfa749019aaf0c65354784829fe6a3eb876
+size 5964

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa21ae55c664d183ed99418fbb9247b4b7ee6800ae3d0ed0500451e8a3bebc7d
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ffba682ed7f803f9b0e6756d1d7262c0a8ff7725ee17d25a4400fb904738bf0
 size 17549312

run-0/checkpoint-1054/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edc4ad43f5e49b3a0c534114a93919a157d55efbdcfb062448c7795f2b5963e8
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ffba682ed7f803f9b0e6756d1d7262c0a8ff7725ee17d25a4400fb904738bf0
 size 17549312

run-0/checkpoint-1054/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:975869f9203f2c677cd6911f6da07d03212f74cc889dd914e5daa8af896878e4
 size 35123898

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cc2a85d4ea840d3fcc66aea49ec835a62a0e8cec385ee7e3944eeb287a7e1d9
 size 35123898

run-0/checkpoint-1054/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:678f449fdc613a144c5634eb27c401deb6afd96d4ef05b160236c47f1a867c35
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ecce5e800ae20802be0ec9e25ab4e60fcfb940623cf20ae609f503abadbbe480
 size 1064

run-0/checkpoint-1054/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.8107798165137615,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1054",
   "epoch": 2.0,
   "eval_steps": 500,
@@ -10,41 +10,41 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 16.55626678466797,
-      "learning_rate": 5.1383294230414005e-05,
-      "loss": 1.5907,
       "step": 527
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.8061926605504587,
-      "eval_loss": 1.2464169263839722,
-      "eval_runtime": 2.4007,
-      "eval_samples_per_second": 363.232,
-      "eval_steps_per_second": 2.916,
       "step": 527
     },
     {
       "epoch": 2.0,
-      "grad_norm": Infinity,
-      "learning_rate": 4.405675241279466e-05,
-      "loss": 0.9038,
       "step": 1054
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.8107798165137615,
-      "eval_loss": 1.1235342025756836,
-      "eval_runtime": 2.3533,
-      "eval_samples_per_second": 370.54,
-      "eval_steps_per_second": 2.975,
       "step": 1054
     }
   ],
   "logging_steps": 500,
-  "max_steps": 4216,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 8,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -62,9 +62,9 @@
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.5167874928728581,
-    "learning_rate": 5.872376483475886e-05,
-    "num_train_epochs": 8,
-    "temperature": 5
   }
 }

 {
+  "best_metric": 0.7786697247706422,
   "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1054",
   "epoch": 2.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 5.95961856842041,
+      "learning_rate": 9.55389368279823e-06,
+      "loss": 1.5369,
       "step": 527
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.7339449541284404,
+      "eval_loss": 1.2773902416229248,
+      "eval_runtime": 2.5957,
+      "eval_samples_per_second": 335.939,
+      "eval_steps_per_second": 2.697,
       "step": 527
     },
     {
       "epoch": 2.0,
+      "grad_norm": 15.252978324890137,
+      "learning_rate": 8.492349940265094e-06,
+      "loss": 1.2159,
       "step": 1054
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.7786697247706422,
+      "eval_loss": 1.022659182548523,
+      "eval_runtime": 2.5741,
+      "eval_samples_per_second": 338.753,
+      "eval_steps_per_second": 2.719,
       "step": 1054
     }
   ],
   "logging_steps": 500,
+  "max_steps": 5270,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.27608826195592573,
+    "learning_rate": 1.0615437425331367e-05,
+    "num_train_epochs": 10,
+    "temperature": 2
   }
 }

run-0/checkpoint-1054/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f28149fe21091b257234d7cbe1611ee6ca88e3a7cef675e40e6d90410e6fc1a6
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8b6a60f7b85b38fa45cddf1a417ee51250fe5822237403416bf2406ff2cdb84
 size 5176