yiiino commited on
Commit
bf18d06
·
1 Parent(s): 1d4a7fc

Model Update

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  metrics:
22
  - name: Matthews Correlation
23
  type: matthews_correlation
24
- value: 0.6724688526255549
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,8 +31,8 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the GLUE COLA dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.6228
35
- - Matthews Correlation: 0.6725
36
 
37
  ## Model description
38
 
@@ -51,7 +51,7 @@ More information needed
51
  ### Training hyperparameters
52
 
53
  The following hyperparameters were used during training:
54
- - learning_rate: 3e-06
55
  - train_batch_size: 8
56
  - eval_batch_size: 8
57
  - seed: 42
 
21
  metrics:
22
  - name: Matthews Correlation
23
  type: matthews_correlation
24
+ value: 0.6875144669936191
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the GLUE COLA dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 0.7360
35
+ - Matthews Correlation: 0.6875
36
 
37
  ## Model description
38
 
 
51
  ### Training hyperparameters
52
 
53
  The following hyperparameters were used during training:
54
+ - learning_rate: 9e-06
55
  - train_batch_size: 8
56
  - eval_batch_size: 8
57
  - seed: 42
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.6228054165840149,
4
- "eval_matthews_correlation": 0.6724688526255549,
5
- "eval_runtime": 3.7036,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 281.615,
8
- "eval_steps_per_second": 35.371,
9
- "train_loss": 0.3132009479462041,
10
- "train_runtime": 699.3959,
11
  "train_samples": 8551,
12
- "train_samples_per_second": 61.131,
13
- "train_steps_per_second": 7.642
14
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.735971987247467,
4
+ "eval_matthews_correlation": 0.6875144669936191,
5
+ "eval_runtime": 3.5686,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 292.27,
8
+ "eval_steps_per_second": 36.709,
9
+ "train_loss": 0.2295077174929841,
10
+ "train_runtime": 717.9746,
11
  "train_samples": 8551,
12
+ "train_samples_per_second": 59.549,
13
+ "train_steps_per_second": 7.445
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.6228054165840149,
4
- "eval_matthews_correlation": 0.6724688526255549,
5
- "eval_runtime": 3.7036,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 281.615,
8
- "eval_steps_per_second": 35.371
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.735971987247467,
4
+ "eval_matthews_correlation": 0.6875144669936191,
5
+ "eval_runtime": 3.5686,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 292.27,
8
+ "eval_steps_per_second": 36.709
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3764e7b1ebca2e8fbacb2703f3f8368962f19cc40b5d60417e1e1152daa3c70
3
  size 737766955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d7772ab9a6cdcc483d31c37839d8d5068d50aa17374cbc10b5f0be41078786
3
  size 737766955
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.3132009479462041,
4
- "train_runtime": 699.3959,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 61.131,
7
- "train_steps_per_second": 7.642
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.2295077174929841,
4
+ "train_runtime": 717.9746,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 59.549,
7
+ "train_steps_per_second": 7.445
8
  }
trainer_state.json CHANGED
@@ -9,72 +9,72 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.47,
12
- "learning_rate": 2.7193638914873715e-06,
13
- "loss": 0.4857,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.94,
18
- "learning_rate": 2.438727782974743e-06,
19
- "loss": 0.3995,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.4,
24
- "learning_rate": 2.1580916744621143e-06,
25
- "loss": 0.3319,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 1.87,
30
- "learning_rate": 1.8774555659494855e-06,
31
- "loss": 0.3296,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 2.34,
36
- "learning_rate": 1.5968194574368567e-06,
37
- "loss": 0.2956,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 2.81,
42
- "learning_rate": 1.3161833489242283e-06,
43
- "loss": 0.289,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 3.27,
48
- "learning_rate": 1.0355472404115997e-06,
49
- "loss": 0.2876,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 3.74,
54
- "learning_rate": 7.549111318989711e-07,
55
- "loss": 0.2735,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 4.21,
60
- "learning_rate": 4.7427502338634235e-07,
61
- "loss": 0.2586,
62
  "step": 4500
63
  },
64
  {
65
  "epoch": 4.68,
66
- "learning_rate": 1.9363891487371378e-07,
67
- "loss": 0.2505,
68
  "step": 5000
69
  },
70
  {
71
  "epoch": 5.0,
72
  "step": 5345,
73
  "total_flos": 2812378728829440.0,
74
- "train_loss": 0.3132009479462041,
75
- "train_runtime": 699.3959,
76
- "train_samples_per_second": 61.131,
77
- "train_steps_per_second": 7.642
78
  }
79
  ],
80
  "max_steps": 5345,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.47,
12
+ "learning_rate": 8.158091674462115e-06,
13
+ "loss": 0.4466,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.94,
18
+ "learning_rate": 7.316183348924229e-06,
19
+ "loss": 0.3772,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.4,
24
+ "learning_rate": 6.4742750233863424e-06,
25
+ "loss": 0.287,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 1.87,
30
+ "learning_rate": 5.632366697848456e-06,
31
+ "loss": 0.2872,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 2.34,
36
+ "learning_rate": 4.79045837231057e-06,
37
+ "loss": 0.2215,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 2.81,
42
+ "learning_rate": 3.948550046772685e-06,
43
+ "loss": 0.2127,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 3.27,
48
+ "learning_rate": 3.106641721234799e-06,
49
+ "loss": 0.1651,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 3.74,
54
+ "learning_rate": 2.264733395696913e-06,
55
+ "loss": 0.1548,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 4.21,
60
+ "learning_rate": 1.422825070159027e-06,
61
+ "loss": 0.1361,
62
  "step": 4500
63
  },
64
  {
65
  "epoch": 4.68,
66
+ "learning_rate": 5.809167446211413e-07,
67
+ "loss": 0.0992,
68
  "step": 5000
69
  },
70
  {
71
  "epoch": 5.0,
72
  "step": 5345,
73
  "total_flos": 2812378728829440.0,
74
+ "train_loss": 0.2295077174929841,
75
+ "train_runtime": 717.9746,
76
+ "train_samples_per_second": 59.549,
77
+ "train_steps_per_second": 7.445
78
  }
79
  ],
80
  "max_steps": 5345,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f90722bb40edfe5c1d150dd1aaeddda73c54c03f9869ce0a3d45384a72f4b4f0
3
  size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1b1a0ef6256873ccd440986be20c6fde549b5a5285f6423b8facb4c03901a2
3
  size 3375