gokuls commited on
Commit
f09ce58
·
1 Parent(s): bd35a55

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -13,7 +15,7 @@ model-index:
13
  name: Text Classification
14
  type: text-classification
15
  dataset:
16
- name: glue
17
  type: glue
18
  config: cola
19
  split: validation
@@ -32,9 +34,9 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  # hBERTv1_new_pretrain_w_init__cola
34
 
35
- This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_wt_init](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_wt_init) on the glue dataset.
36
  It achieves the following results on the evaluation set:
37
- - Loss: 0.6260
38
  - Matthews Correlation: 0.0
39
  - Accuracy: 0.6913
40
 
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
  name: Text Classification
16
  type: text-classification
17
  dataset:
18
+ name: GLUE COLA
19
  type: glue
20
  config: cola
21
  split: validation
 
34
 
35
  # hBERTv1_new_pretrain_w_init__cola
36
 
37
+ This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_wt_init](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_wt_init) on the GLUE COLA dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.6171
40
  - Matthews Correlation: 0.0
41
  - Accuracy: 0.6913
42
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 9.0,
3
  "eval_accuracy": 0.6912751793861389,
4
- "eval_loss": 0.6171762347221375,
5
  "eval_matthews_correlation": 0.0,
6
- "eval_runtime": 1.8974,
7
  "eval_samples": 1043,
8
- "eval_samples_per_second": 549.702,
9
- "eval_steps_per_second": 4.743,
10
- "train_loss": 0.7229372850104944,
11
- "train_runtime": 479.5868,
12
  "train_samples": 8551,
13
- "train_samples_per_second": 891.497,
14
- "train_steps_per_second": 6.985
15
  }
 
1
  {
2
  "epoch": 9.0,
3
  "eval_accuracy": 0.6912751793861389,
4
+ "eval_loss": 0.6171294450759888,
5
  "eval_matthews_correlation": 0.0,
6
+ "eval_runtime": 1.8779,
7
  "eval_samples": 1043,
8
+ "eval_samples_per_second": 555.393,
9
+ "eval_steps_per_second": 4.792,
10
+ "train_loss": 0.6151945009753479,
11
+ "train_runtime": 493.5949,
12
  "train_samples": 8551,
13
+ "train_samples_per_second": 866.196,
14
+ "train_steps_per_second": 6.787
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 9.0,
3
  "eval_accuracy": 0.6912751793861389,
4
- "eval_loss": 0.6171762347221375,
5
  "eval_matthews_correlation": 0.0,
6
- "eval_runtime": 1.8974,
7
  "eval_samples": 1043,
8
- "eval_samples_per_second": 549.702,
9
- "eval_steps_per_second": 4.743
10
  }
 
1
  {
2
  "epoch": 9.0,
3
  "eval_accuracy": 0.6912751793861389,
4
+ "eval_loss": 0.6171294450759888,
5
  "eval_matthews_correlation": 0.0,
6
+ "eval_runtime": 1.8779,
7
  "eval_samples": 1043,
8
+ "eval_samples_per_second": 555.393,
9
+ "eval_steps_per_second": 4.792
10
  }
logs/events.out.tfevents.1686033047.serv-3317.3377907.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63dc7ee9f39b634ce4eddab4e0f593a00a158d0c2233f7d784a41464f904ee4e
3
+ size 427
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.0,
3
- "train_loss": 0.7229372850104944,
4
- "train_runtime": 479.5868,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 891.497,
7
- "train_steps_per_second": 6.985
8
  }
 
1
  {
2
  "epoch": 9.0,
3
+ "train_loss": 0.6151945009753479,
4
+ "train_runtime": 493.5949,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 866.196,
7
+ "train_steps_per_second": 6.787
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6171762347221375,
3
  "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__cola/checkpoint-268",
4
  "epoch": 9.0,
5
  "global_step": 603,
@@ -9,156 +9,156 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 0.00049,
13
- "loss": 1.5988,
14
  "step": 67
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_accuracy": 0.6912751793861389,
19
- "eval_loss": 0.6768448948860168,
20
  "eval_matthews_correlation": 0.0,
21
- "eval_runtime": 1.8807,
22
- "eval_samples_per_second": 554.574,
23
- "eval_steps_per_second": 4.785,
24
  "step": 67
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 0.00048,
29
- "loss": 0.6348,
30
  "step": 134
31
  },
32
  {
33
  "epoch": 2.0,
34
  "eval_accuracy": 0.6912751793861389,
35
- "eval_loss": 0.6294155716896057,
36
  "eval_matthews_correlation": 0.0,
37
- "eval_runtime": 1.89,
38
- "eval_samples_per_second": 551.843,
39
- "eval_steps_per_second": 4.762,
40
  "step": 134
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 0.00047,
45
- "loss": 0.6135,
46
  "step": 201
47
  },
48
  {
49
  "epoch": 3.0,
50
  "eval_accuracy": 0.6912751793861389,
51
- "eval_loss": 0.6195020079612732,
52
  "eval_matthews_correlation": 0.0,
53
- "eval_runtime": 1.8864,
54
- "eval_samples_per_second": 552.896,
55
- "eval_steps_per_second": 4.771,
56
  "step": 201
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 0.00046,
61
- "loss": 0.6111,
62
  "step": 268
63
  },
64
  {
65
  "epoch": 4.0,
66
  "eval_accuracy": 0.6912751793861389,
67
- "eval_loss": 0.6171762347221375,
68
  "eval_matthews_correlation": 0.0,
69
- "eval_runtime": 1.8924,
70
- "eval_samples_per_second": 551.137,
71
- "eval_steps_per_second": 4.756,
72
  "step": 268
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 0.00045000000000000004,
77
- "loss": 0.6093,
78
  "step": 335
79
  },
80
  {
81
  "epoch": 5.0,
82
  "eval_accuracy": 0.6912751793861389,
83
- "eval_loss": 0.6190469861030579,
84
  "eval_matthews_correlation": 0.0,
85
- "eval_runtime": 1.8886,
86
- "eval_samples_per_second": 552.263,
87
- "eval_steps_per_second": 4.765,
88
  "step": 335
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 0.00044,
93
- "loss": 0.6094,
94
  "step": 402
95
  },
96
  {
97
  "epoch": 6.0,
98
  "eval_accuracy": 0.6912751793861389,
99
- "eval_loss": 0.6183953285217285,
100
  "eval_matthews_correlation": 0.0,
101
- "eval_runtime": 1.8864,
102
- "eval_samples_per_second": 552.902,
103
- "eval_steps_per_second": 4.771,
104
  "step": 402
105
  },
106
  {
107
  "epoch": 7.0,
108
- "learning_rate": 0.00043,
109
- "loss": 0.6096,
110
  "step": 469
111
  },
112
  {
113
  "epoch": 7.0,
114
  "eval_accuracy": 0.6912751793861389,
115
- "eval_loss": 0.6203241348266602,
116
  "eval_matthews_correlation": 0.0,
117
- "eval_runtime": 1.8849,
118
- "eval_samples_per_second": 553.337,
119
- "eval_steps_per_second": 4.775,
120
  "step": 469
121
  },
122
  {
123
  "epoch": 8.0,
124
- "learning_rate": 0.00042,
125
- "loss": 0.6103,
126
  "step": 536
127
  },
128
  {
129
  "epoch": 8.0,
130
  "eval_accuracy": 0.6912751793861389,
131
- "eval_loss": 0.6184271574020386,
132
  "eval_matthews_correlation": 0.0,
133
- "eval_runtime": 1.8889,
134
- "eval_samples_per_second": 552.179,
135
- "eval_steps_per_second": 4.765,
136
  "step": 536
137
  },
138
  {
139
  "epoch": 9.0,
140
- "learning_rate": 0.00041,
141
- "loss": 0.6097,
142
  "step": 603
143
  },
144
  {
145
  "epoch": 9.0,
146
  "eval_accuracy": 0.6912751793861389,
147
- "eval_loss": 0.6273332834243774,
148
  "eval_matthews_correlation": 0.0,
149
- "eval_runtime": 1.886,
150
- "eval_samples_per_second": 553.026,
151
- "eval_steps_per_second": 4.772,
152
  "step": 603
153
  },
154
  {
155
  "epoch": 9.0,
156
  "step": 603,
157
  "total_flos": 1.1397379827695616e+16,
158
- "train_loss": 0.7229372850104944,
159
- "train_runtime": 479.5868,
160
- "train_samples_per_second": 891.497,
161
- "train_steps_per_second": 6.985
162
  }
163
  ],
164
  "max_steps": 3350,
 
1
  {
2
+ "best_metric": 0.6171294450759888,
3
  "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__cola/checkpoint-268",
4
  "epoch": 9.0,
5
  "global_step": 603,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 3.9200000000000004e-05,
13
+ "loss": 0.6355,
14
  "step": 67
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_accuracy": 0.6912751793861389,
19
+ "eval_loss": 0.6238539814949036,
20
  "eval_matthews_correlation": 0.0,
21
+ "eval_runtime": 1.8831,
22
+ "eval_samples_per_second": 553.864,
23
+ "eval_steps_per_second": 4.779,
24
  "step": 67
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 3.8400000000000005e-05,
29
+ "loss": 0.6177,
30
  "step": 134
31
  },
32
  {
33
  "epoch": 2.0,
34
  "eval_accuracy": 0.6912751793861389,
35
+ "eval_loss": 0.6210843920707703,
36
  "eval_matthews_correlation": 0.0,
37
+ "eval_runtime": 1.8854,
38
+ "eval_samples_per_second": 553.187,
39
+ "eval_steps_per_second": 4.773,
40
  "step": 134
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 3.76e-05,
45
+ "loss": 0.6142,
46
  "step": 201
47
  },
48
  {
49
  "epoch": 3.0,
50
  "eval_accuracy": 0.6912751793861389,
51
+ "eval_loss": 0.6231480240821838,
52
  "eval_matthews_correlation": 0.0,
53
+ "eval_runtime": 1.8881,
54
+ "eval_samples_per_second": 552.412,
55
+ "eval_steps_per_second": 4.767,
56
  "step": 201
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 3.680000000000001e-05,
61
+ "loss": 0.6145,
62
  "step": 268
63
  },
64
  {
65
  "epoch": 4.0,
66
  "eval_accuracy": 0.6912751793861389,
67
+ "eval_loss": 0.6171294450759888,
68
  "eval_matthews_correlation": 0.0,
69
+ "eval_runtime": 1.8901,
70
+ "eval_samples_per_second": 551.824,
71
+ "eval_steps_per_second": 4.762,
72
  "step": 268
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 3.6e-05,
77
+ "loss": 0.6102,
78
  "step": 335
79
  },
80
  {
81
  "epoch": 5.0,
82
  "eval_accuracy": 0.6912751793861389,
83
+ "eval_loss": 0.6199458241462708,
84
  "eval_matthews_correlation": 0.0,
85
+ "eval_runtime": 1.8879,
86
+ "eval_samples_per_second": 552.46,
87
+ "eval_steps_per_second": 4.767,
88
  "step": 335
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 3.52e-05,
93
+ "loss": 0.6126,
94
  "step": 402
95
  },
96
  {
97
  "epoch": 6.0,
98
  "eval_accuracy": 0.6912751793861389,
99
+ "eval_loss": 0.6183897256851196,
100
  "eval_matthews_correlation": 0.0,
101
+ "eval_runtime": 1.8897,
102
+ "eval_samples_per_second": 551.929,
103
+ "eval_steps_per_second": 4.763,
104
  "step": 402
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "learning_rate": 3.44e-05,
109
+ "loss": 0.6127,
110
  "step": 469
111
  },
112
  {
113
  "epoch": 7.0,
114
  "eval_accuracy": 0.6912751793861389,
115
+ "eval_loss": 0.6205731630325317,
116
  "eval_matthews_correlation": 0.0,
117
+ "eval_runtime": 1.8922,
118
+ "eval_samples_per_second": 551.21,
119
+ "eval_steps_per_second": 4.756,
120
  "step": 469
121
  },
122
  {
123
  "epoch": 8.0,
124
+ "learning_rate": 3.3600000000000004e-05,
125
+ "loss": 0.6107,
126
  "step": 536
127
  },
128
  {
129
  "epoch": 8.0,
130
  "eval_accuracy": 0.6912751793861389,
131
+ "eval_loss": 0.618511438369751,
132
  "eval_matthews_correlation": 0.0,
133
+ "eval_runtime": 1.8868,
134
+ "eval_samples_per_second": 552.792,
135
+ "eval_steps_per_second": 4.77,
136
  "step": 536
137
  },
138
  {
139
  "epoch": 9.0,
140
+ "learning_rate": 3.28e-05,
141
+ "loss": 0.6086,
142
  "step": 603
143
  },
144
  {
145
  "epoch": 9.0,
146
  "eval_accuracy": 0.6912751793861389,
147
+ "eval_loss": 0.6259681582450867,
148
  "eval_matthews_correlation": 0.0,
149
+ "eval_runtime": 1.8926,
150
+ "eval_samples_per_second": 551.082,
151
+ "eval_steps_per_second": 4.755,
152
  "step": 603
153
  },
154
  {
155
  "epoch": 9.0,
156
  "step": 603,
157
  "total_flos": 1.1397379827695616e+16,
158
+ "train_loss": 0.6151945009753479,
159
+ "train_runtime": 493.5949,
160
+ "train_samples_per_second": 866.196,
161
+ "train_steps_per_second": 6.787
162
  }
163
  ],
164
  "max_steps": 3350,