Model Update

Browse files

Files changed (7) hide show

README.md +4 -4
all_results.json +9 -9
eval_results.json +5 -5
pytorch_model.bin +1 -1
train_results.json +4 -4
trainer_state.json +24 -24
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ model-index:
     metrics:
     - name: Matthews Correlation
       type: matthews_correlation
-      value: 0.6724688526255549
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,8 +31,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the GLUE COLA dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6228
-- Matthews Correlation: 0.6725
 ## Model description
@@ -51,7 +51,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 3e-06
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42

     metrics:
     - name: Matthews Correlation
       type: matthews_correlation
+      value: 0.6875144669936191
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the GLUE COLA dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.7360
+- Matthews Correlation: 0.6875
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 9e-06
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42

all_results.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
     "epoch": 5.0,
-    "eval_loss": 0.6228054165840149,
-    "eval_matthews_correlation": 0.6724688526255549,
-    "eval_runtime": 3.7036,
     "eval_samples": 1043,
-    "eval_samples_per_second": 281.615,
-    "eval_steps_per_second": 35.371,
-    "train_loss": 0.3132009479462041,
-    "train_runtime": 699.3959,
     "train_samples": 8551,
-    "train_samples_per_second": 61.131,
-    "train_steps_per_second": 7.642
 }

 {
     "epoch": 5.0,
+    "eval_loss": 0.735971987247467,
+    "eval_matthews_correlation": 0.6875144669936191,
+    "eval_runtime": 3.5686,
     "eval_samples": 1043,
+    "eval_samples_per_second": 292.27,
+    "eval_steps_per_second": 36.709,
+    "train_loss": 0.2295077174929841,
+    "train_runtime": 717.9746,
     "train_samples": 8551,
+    "train_samples_per_second": 59.549,
+    "train_steps_per_second": 7.445
 }

eval_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 5.0,
-    "eval_loss": 0.6228054165840149,
-    "eval_matthews_correlation": 0.6724688526255549,
-    "eval_runtime": 3.7036,
     "eval_samples": 1043,
-    "eval_samples_per_second": 281.615,
-    "eval_steps_per_second": 35.371
 }

 {
     "epoch": 5.0,
+    "eval_loss": 0.735971987247467,
+    "eval_matthews_correlation": 0.6875144669936191,
+    "eval_runtime": 3.5686,
     "eval_samples": 1043,
+    "eval_samples_per_second": 292.27,
+    "eval_steps_per_second": 36.709
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3764e7b1ebca2e8fbacb2703f3f8368962f19cc40b5d60417e1e1152daa3c70
 size 737766955

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6d7772ab9a6cdcc483d31c37839d8d5068d50aa17374cbc10b5f0be41078786
 size 737766955

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 5.0,
-    "train_loss": 0.3132009479462041,
-    "train_runtime": 699.3959,
     "train_samples": 8551,
-    "train_samples_per_second": 61.131,
-    "train_steps_per_second": 7.642
 }

 {
     "epoch": 5.0,
+    "train_loss": 0.2295077174929841,
+    "train_runtime": 717.9746,
     "train_samples": 8551,
+    "train_samples_per_second": 59.549,
+    "train_steps_per_second": 7.445
 }

trainer_state.json CHANGED Viewed

@@ -9,72 +9,72 @@
   "log_history": [
     {
       "epoch": 0.47,
-      "learning_rate": 2.7193638914873715e-06,
-      "loss": 0.4857,
       "step": 500
     },
     {
       "epoch": 0.94,
-      "learning_rate": 2.438727782974743e-06,
-      "loss": 0.3995,
       "step": 1000
     },
     {
       "epoch": 1.4,
-      "learning_rate": 2.1580916744621143e-06,
-      "loss": 0.3319,
       "step": 1500
     },
     {
       "epoch": 1.87,
-      "learning_rate": 1.8774555659494855e-06,
-      "loss": 0.3296,
       "step": 2000
     },
     {
       "epoch": 2.34,
-      "learning_rate": 1.5968194574368567e-06,
-      "loss": 0.2956,
       "step": 2500
     },
     {
       "epoch": 2.81,
-      "learning_rate": 1.3161833489242283e-06,
-      "loss": 0.289,
       "step": 3000
     },
     {
       "epoch": 3.27,
-      "learning_rate": 1.0355472404115997e-06,
-      "loss": 0.2876,
       "step": 3500
     },
     {
       "epoch": 3.74,
-      "learning_rate": 7.549111318989711e-07,
-      "loss": 0.2735,
       "step": 4000
     },
     {
       "epoch": 4.21,
-      "learning_rate": 4.7427502338634235e-07,
-      "loss": 0.2586,
       "step": 4500
     },
     {
       "epoch": 4.68,
-      "learning_rate": 1.9363891487371378e-07,
-      "loss": 0.2505,
       "step": 5000
     },
     {
       "epoch": 5.0,
       "step": 5345,
       "total_flos": 2812378728829440.0,
-      "train_loss": 0.3132009479462041,
-      "train_runtime": 699.3959,
-      "train_samples_per_second": 61.131,
-      "train_steps_per_second": 7.642
     }
   ],
   "max_steps": 5345,

   "log_history": [
     {
       "epoch": 0.47,
+      "learning_rate": 8.158091674462115e-06,
+      "loss": 0.4466,
       "step": 500
     },
     {
       "epoch": 0.94,
+      "learning_rate": 7.316183348924229e-06,
+      "loss": 0.3772,
       "step": 1000
     },
     {
       "epoch": 1.4,
+      "learning_rate": 6.4742750233863424e-06,
+      "loss": 0.287,
       "step": 1500
     },
     {
       "epoch": 1.87,
+      "learning_rate": 5.632366697848456e-06,
+      "loss": 0.2872,
       "step": 2000
     },
     {
       "epoch": 2.34,
+      "learning_rate": 4.79045837231057e-06,
+      "loss": 0.2215,
       "step": 2500
     },
     {
       "epoch": 2.81,
+      "learning_rate": 3.948550046772685e-06,
+      "loss": 0.2127,
       "step": 3000
     },
     {
       "epoch": 3.27,
+      "learning_rate": 3.106641721234799e-06,
+      "loss": 0.1651,
       "step": 3500
     },
     {
       "epoch": 3.74,
+      "learning_rate": 2.264733395696913e-06,
+      "loss": 0.1548,
       "step": 4000
     },
     {
       "epoch": 4.21,
+      "learning_rate": 1.422825070159027e-06,
+      "loss": 0.1361,
       "step": 4500
     },
     {
       "epoch": 4.68,
+      "learning_rate": 5.809167446211413e-07,
+      "loss": 0.0992,
       "step": 5000
     },
     {
       "epoch": 5.0,
       "step": 5345,
       "total_flos": 2812378728829440.0,
+      "train_loss": 0.2295077174929841,
+      "train_runtime": 717.9746,
+      "train_samples_per_second": 59.549,
+      "train_steps_per_second": 7.445
     }
   ],
   "max_steps": 5345,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f90722bb40edfe5c1d150dd1aaeddda73c54c03f9869ce0a3d45384a72f4b4f0
 size 3375

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a1b1a0ef6256873ccd440986be20c6fde549b5a5285f6423b8facb4c03901a2
 size 3375