Ahatsham commited on
Commit
90ca7ab
·
verified ·
1 Parent(s): c4fbe7e

Model save

Browse files
README.md CHANGED
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7193
22
- - Balanced Accuracy: 0.6060
23
- - Accuracy: 0.6354
24
 
25
  ## Model description
26
 
@@ -51,26 +51,26 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Balanced Accuracy | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:-----------------:|:--------:|
54
- | No log | 1.0 | 96 | 0.6971 | 0.5143 | 0.5677 |
55
- | No log | 2.0 | 192 | 0.6867 | 0.5831 | 0.5625 |
56
- | No log | 3.0 | 288 | 0.6729 | 0.5990 | 0.625 |
57
- | No log | 4.0 | 384 | 0.6751 | 0.5691 | 0.5521 |
58
- | No log | 5.0 | 480 | 0.6685 | 0.6224 | 0.6562 |
59
- | 0.6536 | 6.0 | 576 | 0.6699 | 0.6199 | 0.6562 |
60
- | 0.6536 | 7.0 | 672 | 0.6705 | 0.6219 | 0.6510 |
61
- | 0.6536 | 8.0 | 768 | 0.6747 | 0.6252 | 0.6562 |
62
- | 0.6536 | 9.0 | 864 | 0.6807 | 0.6254 | 0.6510 |
63
- | 0.6536 | 10.0 | 960 | 0.6782 | 0.6034 | 0.6302 |
64
- | 0.6132 | 11.0 | 1056 | 0.6869 | 0.5695 | 0.5885 |
65
- | 0.6132 | 12.0 | 1152 | 0.7084 | 0.5622 | 0.5469 |
66
- | 0.6132 | 13.0 | 1248 | 0.6915 | 0.6451 | 0.6771 |
67
- | 0.6132 | 14.0 | 1344 | 0.7046 | 0.5669 | 0.5781 |
68
- | 0.6132 | 15.0 | 1440 | 0.7032 | 0.6418 | 0.6719 |
69
- | 0.5715 | 16.0 | 1536 | 0.7011 | 0.6079 | 0.6354 |
70
- | 0.5715 | 17.0 | 1632 | 0.7048 | 0.6367 | 0.6667 |
71
- | 0.5715 | 18.0 | 1728 | 0.7149 | 0.6107 | 0.6406 |
72
- | 0.5715 | 19.0 | 1824 | 0.7165 | 0.5903 | 0.6146 |
73
- | 0.5715 | 20.0 | 1920 | 0.7193 | 0.6060 | 0.6354 |
74
 
75
 
76
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6274
22
+ - Balanced Accuracy: 0.6563
23
+ - Accuracy: 0.6615
24
 
25
  ## Model description
26
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Balanced Accuracy | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:-----------------:|:--------:|
54
+ | No log | 1.0 | 96 | 0.6857 | 0.5462 | 0.5625 |
55
+ | No log | 2.0 | 192 | 0.6684 | 0.5758 | 0.5833 |
56
+ | No log | 3.0 | 288 | 0.7167 | 0.6384 | 0.6198 |
57
+ | No log | 4.0 | 384 | 0.6335 | 0.6179 | 0.625 |
58
+ | No log | 5.0 | 480 | 0.6574 | 0.6297 | 0.5990 |
59
+ | 0.6776 | 6.0 | 576 | 0.6322 | 0.6168 | 0.625 |
60
+ | 0.6776 | 7.0 | 672 | 0.6374 | 0.6114 | 0.6094 |
61
+ | 0.6776 | 8.0 | 768 | 0.6261 | 0.6278 | 0.6354 |
62
+ | 0.6776 | 9.0 | 864 | 0.6289 | 0.6651 | 0.6406 |
63
+ | 0.6776 | 10.0 | 960 | 0.6082 | 0.6368 | 0.6406 |
64
+ | 0.5732 | 11.0 | 1056 | 0.6036 | 0.6553 | 0.6615 |
65
+ | 0.5732 | 12.0 | 1152 | 0.6445 | 0.6870 | 0.6510 |
66
+ | 0.5732 | 13.0 | 1248 | 0.6094 | 0.6833 | 0.6875 |
67
+ | 0.5732 | 14.0 | 1344 | 0.6104 | 0.6607 | 0.6667 |
68
+ | 0.5732 | 15.0 | 1440 | 0.6553 | 0.6960 | 0.6927 |
69
+ | 0.5144 | 16.0 | 1536 | 0.6262 | 0.6603 | 0.6510 |
70
+ | 0.5144 | 17.0 | 1632 | 0.6154 | 0.6619 | 0.6667 |
71
+ | 0.5144 | 18.0 | 1728 | 0.6210 | 0.6619 | 0.6667 |
72
+ | 0.5144 | 19.0 | 1824 | 0.6293 | 0.6716 | 0.6771 |
73
+ | 0.5144 | 20.0 | 1920 | 0.6274 | 0.6563 | 0.6615 |
74
 
75
 
76
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef5e2e402a0ddf131b976809cc4ab1cbf763289e9bb43603fcbc2600db749c0
3
  size 54593240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55caaf3b258dd25f76e2edebe1001cb2019d391369eedb2700ef30b91034b3ac
3
  size 54593240
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 2.3164148658497126e+17,
4
- "train_loss": 0.5955550193786621,
5
- "train_runtime": 25564.1453,
6
  "train_samples": 768,
7
- "train_samples_per_second": 0.601,
8
- "train_steps_per_second": 0.075
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 2.492453077307228e+17,
4
+ "train_loss": 0.5602036555608113,
5
+ "train_runtime": 27837.1599,
6
  "train_samples": 768,
7
+ "train_samples_per_second": 0.552,
8
+ "train_steps_per_second": 0.069
9
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd751a4191592ee601080b532d487e2444024e3205900acca92b7c94791e29dd
3
- size 17210060
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00224c78475a2ff2e27556796824b19aed9bd853b1925d36728fd05fd9d7693b
3
+ size 17210230
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 2.3164148658497126e+17,
4
- "train_loss": 0.5955550193786621,
5
- "train_runtime": 25564.1453,
6
  "train_samples": 768,
7
- "train_samples_per_second": 0.601,
8
- "train_steps_per_second": 0.075
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 2.492453077307228e+17,
4
+ "train_loss": 0.5602036555608113,
5
+ "train_runtime": 27837.1599,
6
  "train_samples": 768,
7
+ "train_samples_per_second": 0.552,
8
+ "train_steps_per_second": 0.069
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6770833333333334,
3
- "best_model_checkpoint": "Output_llama3_80-20/checkpoint-1248",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 1920,
@@ -10,233 +10,233 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5677083333333334,
14
- "eval_balanced_accuracy": 0.5142820380854349,
15
- "eval_loss": 0.6970844864845276,
16
- "eval_runtime": 97.3238,
17
- "eval_samples_per_second": 1.973,
18
- "eval_steps_per_second": 0.247,
19
  "step": 96
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.5625,
24
- "eval_balanced_accuracy": 0.5830662097933017,
25
- "eval_loss": 0.6866571307182312,
26
- "eval_runtime": 102.2158,
27
- "eval_samples_per_second": 1.878,
28
- "eval_steps_per_second": 0.235,
29
  "step": 192
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_accuracy": 0.625,
34
- "eval_balanced_accuracy": 0.598974358974359,
35
- "eval_loss": 0.6729380488395691,
36
- "eval_runtime": 103.7077,
37
- "eval_samples_per_second": 1.851,
38
- "eval_steps_per_second": 0.231,
39
  "step": 288
40
  },
41
  {
42
  "epoch": 4.0,
43
- "eval_accuracy": 0.5520833333333334,
44
- "eval_balanced_accuracy": 0.5691039032435404,
45
- "eval_loss": 0.6750795841217041,
46
- "eval_runtime": 89.7713,
47
- "eval_samples_per_second": 2.139,
48
- "eval_steps_per_second": 0.267,
49
  "step": 384
50
  },
51
  {
52
  "epoch": 5.0,
53
- "eval_accuracy": 0.65625,
54
- "eval_balanced_accuracy": 0.6223698781838316,
55
- "eval_loss": 0.6684662699699402,
56
- "eval_runtime": 106.6219,
57
- "eval_samples_per_second": 1.801,
58
- "eval_steps_per_second": 0.225,
59
  "step": 480
60
  },
61
  {
62
  "epoch": 5.208333333333333,
63
- "grad_norm": 3.4840712547302246,
64
  "learning_rate": 7.395833333333335e-06,
65
- "loss": 0.6536,
66
  "step": 500
67
  },
68
  {
69
  "epoch": 6.0,
70
- "eval_accuracy": 0.65625,
71
- "eval_balanced_accuracy": 0.6198547215496368,
72
- "eval_loss": 0.6698883175849915,
73
- "eval_runtime": 93.5707,
74
- "eval_samples_per_second": 2.052,
75
- "eval_steps_per_second": 0.256,
76
  "step": 576
77
  },
78
  {
79
  "epoch": 7.0,
80
- "eval_accuracy": 0.6510416666666666,
81
- "eval_balanced_accuracy": 0.6218969555035129,
82
- "eval_loss": 0.6704875826835632,
83
- "eval_runtime": 101.5333,
84
- "eval_samples_per_second": 1.891,
85
- "eval_steps_per_second": 0.236,
86
  "step": 672
87
  },
88
  {
89
  "epoch": 8.0,
90
- "eval_accuracy": 0.65625,
91
- "eval_balanced_accuracy": 0.6251940298507462,
92
- "eval_loss": 0.6747407913208008,
93
- "eval_runtime": 104.8955,
94
- "eval_samples_per_second": 1.83,
95
- "eval_steps_per_second": 0.229,
96
  "step": 768
97
  },
98
  {
99
  "epoch": 9.0,
100
- "eval_accuracy": 0.6510416666666666,
101
- "eval_balanced_accuracy": 0.6254008245533669,
102
- "eval_loss": 0.6806783676147461,
103
- "eval_runtime": 100.5578,
104
- "eval_samples_per_second": 1.909,
105
- "eval_steps_per_second": 0.239,
106
  "step": 864
107
  },
108
  {
109
  "epoch": 10.0,
110
- "eval_accuracy": 0.6302083333333334,
111
- "eval_balanced_accuracy": 0.603412734768667,
112
- "eval_loss": 0.6781837344169617,
113
- "eval_runtime": 99.3329,
114
- "eval_samples_per_second": 1.933,
115
- "eval_steps_per_second": 0.242,
116
  "step": 960
117
  },
118
  {
119
  "epoch": 10.416666666666666,
120
- "grad_norm": 31.503376007080078,
121
  "learning_rate": 4.791666666666668e-06,
122
- "loss": 0.6132,
123
  "step": 1000
124
  },
125
  {
126
  "epoch": 11.0,
127
- "eval_accuracy": 0.5885416666666666,
128
- "eval_balanced_accuracy": 0.5695121951219512,
129
- "eval_loss": 0.6868965029716492,
130
- "eval_runtime": 103.0036,
131
- "eval_samples_per_second": 1.864,
132
- "eval_steps_per_second": 0.233,
133
  "step": 1056
134
  },
135
  {
136
  "epoch": 12.0,
137
- "eval_accuracy": 0.546875,
138
- "eval_balanced_accuracy": 0.5621983326020185,
139
- "eval_loss": 0.708362877368927,
140
- "eval_runtime": 104.4672,
141
- "eval_samples_per_second": 1.838,
142
- "eval_steps_per_second": 0.23,
143
  "step": 1152
144
  },
145
  {
146
  "epoch": 13.0,
147
- "eval_accuracy": 0.6770833333333334,
148
- "eval_balanced_accuracy": 0.645100738330622,
149
- "eval_loss": 0.6914852261543274,
150
- "eval_runtime": 111.3687,
151
- "eval_samples_per_second": 1.724,
152
- "eval_steps_per_second": 0.216,
153
  "step": 1248
154
  },
155
  {
156
  "epoch": 14.0,
157
- "eval_accuracy": 0.578125,
158
- "eval_balanced_accuracy": 0.5668706293706294,
159
- "eval_loss": 0.7045938968658447,
160
- "eval_runtime": 102.8426,
161
- "eval_samples_per_second": 1.867,
162
- "eval_steps_per_second": 0.233,
163
  "step": 1344
164
  },
165
  {
166
  "epoch": 15.0,
167
- "eval_accuracy": 0.671875,
168
- "eval_balanced_accuracy": 0.6417748917748918,
169
- "eval_loss": 0.7031722068786621,
170
- "eval_runtime": 101.3122,
171
- "eval_samples_per_second": 1.895,
172
- "eval_steps_per_second": 0.237,
173
  "step": 1440
174
  },
175
  {
176
  "epoch": 15.625,
177
- "grad_norm": 10.083698272705078,
178
  "learning_rate": 2.1875000000000002e-06,
179
- "loss": 0.5715,
180
  "step": 1500
181
  },
182
  {
183
  "epoch": 16.0,
184
- "eval_accuracy": 0.6354166666666666,
185
- "eval_balanced_accuracy": 0.6079198802808794,
186
- "eval_loss": 0.7011265754699707,
187
- "eval_runtime": 100.5947,
188
- "eval_samples_per_second": 1.909,
189
- "eval_steps_per_second": 0.239,
190
  "step": 1536
191
  },
192
  {
193
  "epoch": 17.0,
194
  "eval_accuracy": 0.6666666666666666,
195
- "eval_balanced_accuracy": 0.6366567164179104,
196
- "eval_loss": 0.7047572135925293,
197
- "eval_runtime": 93.643,
198
- "eval_samples_per_second": 2.05,
199
- "eval_steps_per_second": 0.256,
200
  "step": 1632
201
  },
202
  {
203
  "epoch": 18.0,
204
- "eval_accuracy": 0.640625,
205
- "eval_balanced_accuracy": 0.610655737704918,
206
- "eval_loss": 0.7149026989936829,
207
- "eval_runtime": 78.6581,
208
- "eval_samples_per_second": 2.441,
209
- "eval_steps_per_second": 0.305,
210
  "step": 1728
211
  },
212
  {
213
  "epoch": 19.0,
214
- "eval_accuracy": 0.6145833333333334,
215
- "eval_balanced_accuracy": 0.5902879728966686,
216
- "eval_loss": 0.7165386080741882,
217
- "eval_runtime": 78.477,
218
- "eval_samples_per_second": 2.447,
219
- "eval_steps_per_second": 0.306,
220
  "step": 1824
221
  },
222
  {
223
  "epoch": 20.0,
224
- "eval_accuracy": 0.6354166666666666,
225
- "eval_balanced_accuracy": 0.6059830054708416,
226
- "eval_loss": 0.719306468963623,
227
- "eval_runtime": 79.9559,
228
- "eval_samples_per_second": 2.401,
229
- "eval_steps_per_second": 0.3,
230
  "step": 1920
231
  },
232
  {
233
  "epoch": 20.0,
234
  "step": 1920,
235
- "total_flos": 2.3164148658497126e+17,
236
- "train_loss": 0.5955550193786621,
237
- "train_runtime": 25564.1453,
238
- "train_samples_per_second": 0.601,
239
- "train_steps_per_second": 0.075
240
  }
241
  ],
242
  "logging_steps": 500,
@@ -251,7 +251,7 @@
251
  "early_stopping_threshold": 0.0
252
  },
253
  "attributes": {
254
- "early_stopping_patience_counter": 7
255
  }
256
  },
257
  "TrainerControl": {
@@ -265,7 +265,7 @@
265
  "attributes": {}
266
  }
267
  },
268
- "total_flos": 2.3164148658497126e+17,
269
  "train_batch_size": 8,
270
  "trial_name": null,
271
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6927083333333334,
3
+ "best_model_checkpoint": "Output_llama3_80-20/checkpoint-1440",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 1920,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5625,
14
+ "eval_balanced_accuracy": 0.5461538461538462,
15
+ "eval_loss": 0.6857039928436279,
16
+ "eval_runtime": 73.4129,
17
+ "eval_samples_per_second": 2.615,
18
+ "eval_steps_per_second": 0.327,
19
  "step": 96
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.5833333333333334,
24
+ "eval_balanced_accuracy": 0.5758314855875832,
25
+ "eval_loss": 0.6683754324913025,
26
+ "eval_runtime": 104.3141,
27
+ "eval_samples_per_second": 1.841,
28
+ "eval_steps_per_second": 0.23,
29
  "step": 192
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_accuracy": 0.6197916666666666,
34
+ "eval_balanced_accuracy": 0.6383647798742138,
35
+ "eval_loss": 0.7166934609413147,
36
+ "eval_runtime": 110.585,
37
+ "eval_samples_per_second": 1.736,
38
+ "eval_steps_per_second": 0.217,
39
  "step": 288
40
  },
41
  {
42
  "epoch": 4.0,
43
+ "eval_accuracy": 0.625,
44
+ "eval_balanced_accuracy": 0.6178571428571429,
45
+ "eval_loss": 0.6334595084190369,
46
+ "eval_runtime": 109.4964,
47
+ "eval_samples_per_second": 1.753,
48
+ "eval_steps_per_second": 0.219,
49
  "step": 384
50
  },
51
  {
52
  "epoch": 5.0,
53
+ "eval_accuracy": 0.5989583333333334,
54
+ "eval_balanced_accuracy": 0.6296716417910448,
55
+ "eval_loss": 0.6574041843414307,
56
+ "eval_runtime": 89.6626,
57
+ "eval_samples_per_second": 2.141,
58
+ "eval_steps_per_second": 0.268,
59
  "step": 480
60
  },
61
  {
62
  "epoch": 5.208333333333333,
63
+ "grad_norm": 72.83433532714844,
64
  "learning_rate": 7.395833333333335e-06,
65
+ "loss": 0.6776,
66
  "step": 500
67
  },
68
  {
69
  "epoch": 6.0,
70
+ "eval_accuracy": 0.625,
71
+ "eval_balanced_accuracy": 0.6168117269812186,
72
+ "eval_loss": 0.6321956515312195,
73
+ "eval_runtime": 106.1665,
74
+ "eval_samples_per_second": 1.808,
75
+ "eval_steps_per_second": 0.226,
76
  "step": 576
77
  },
78
  {
79
  "epoch": 7.0,
80
+ "eval_accuracy": 0.609375,
81
+ "eval_balanced_accuracy": 0.6114369501466275,
82
+ "eval_loss": 0.6374137997627258,
83
+ "eval_runtime": 107.701,
84
+ "eval_samples_per_second": 1.783,
85
+ "eval_steps_per_second": 0.223,
86
  "step": 672
87
  },
88
  {
89
  "epoch": 8.0,
90
+ "eval_accuracy": 0.6354166666666666,
91
+ "eval_balanced_accuracy": 0.6277777777777778,
92
+ "eval_loss": 0.6261330246925354,
93
+ "eval_runtime": 107.9832,
94
+ "eval_samples_per_second": 1.778,
95
+ "eval_steps_per_second": 0.222,
96
  "step": 768
97
  },
98
  {
99
  "epoch": 9.0,
100
+ "eval_accuracy": 0.640625,
101
+ "eval_balanced_accuracy": 0.6650742488776332,
102
+ "eval_loss": 0.6289492249488831,
103
+ "eval_runtime": 111.0235,
104
+ "eval_samples_per_second": 1.729,
105
+ "eval_steps_per_second": 0.216,
106
  "step": 864
107
  },
108
  {
109
  "epoch": 10.0,
110
+ "eval_accuracy": 0.640625,
111
+ "eval_balanced_accuracy": 0.6367950256354314,
112
+ "eval_loss": 0.6082468628883362,
113
+ "eval_runtime": 96.7719,
114
+ "eval_samples_per_second": 1.984,
115
+ "eval_steps_per_second": 0.248,
116
  "step": 960
117
  },
118
  {
119
  "epoch": 10.416666666666666,
120
+ "grad_norm": 18.0618839263916,
121
  "learning_rate": 4.791666666666668e-06,
122
+ "loss": 0.5732,
123
  "step": 1000
124
  },
125
  {
126
  "epoch": 11.0,
127
+ "eval_accuracy": 0.6614583333333334,
128
+ "eval_balanced_accuracy": 0.6552593256413128,
129
+ "eval_loss": 0.603647768497467,
130
+ "eval_runtime": 103.3538,
131
+ "eval_samples_per_second": 1.858,
132
+ "eval_steps_per_second": 0.232,
133
  "step": 1056
134
  },
135
  {
136
  "epoch": 12.0,
137
+ "eval_accuracy": 0.6510416666666666,
138
+ "eval_balanced_accuracy": 0.6869850746268656,
139
+ "eval_loss": 0.6444854140281677,
140
+ "eval_runtime": 107.7167,
141
+ "eval_samples_per_second": 1.782,
142
+ "eval_steps_per_second": 0.223,
143
  "step": 1152
144
  },
145
  {
146
  "epoch": 13.0,
147
+ "eval_accuracy": 0.6875,
148
+ "eval_balanced_accuracy": 0.6833333333333333,
149
+ "eval_loss": 0.6093500852584839,
150
+ "eval_runtime": 101.5591,
151
+ "eval_samples_per_second": 1.891,
152
+ "eval_steps_per_second": 0.236,
153
  "step": 1248
154
  },
155
  {
156
  "epoch": 14.0,
157
+ "eval_accuracy": 0.6666666666666666,
158
+ "eval_balanced_accuracy": 0.6607142857142857,
159
+ "eval_loss": 0.6103670001029968,
160
+ "eval_runtime": 113.5782,
161
+ "eval_samples_per_second": 1.69,
162
+ "eval_steps_per_second": 0.211,
163
  "step": 1344
164
  },
165
  {
166
  "epoch": 15.0,
167
+ "eval_accuracy": 0.6927083333333334,
168
+ "eval_balanced_accuracy": 0.6959704667751221,
169
+ "eval_loss": 0.6553041338920593,
170
+ "eval_runtime": 100.5241,
171
+ "eval_samples_per_second": 1.91,
172
+ "eval_steps_per_second": 0.239,
173
  "step": 1440
174
  },
175
  {
176
  "epoch": 15.625,
177
+ "grad_norm": 14.87450885772705,
178
  "learning_rate": 2.1875000000000002e-06,
179
+ "loss": 0.5144,
180
  "step": 1500
181
  },
182
  {
183
  "epoch": 16.0,
184
+ "eval_accuracy": 0.6510416666666666,
185
+ "eval_balanced_accuracy": 0.6603078614623419,
186
+ "eval_loss": 0.6261806488037109,
187
+ "eval_runtime": 106.7804,
188
+ "eval_samples_per_second": 1.798,
189
+ "eval_steps_per_second": 0.225,
190
  "step": 1536
191
  },
192
  {
193
  "epoch": 17.0,
194
  "eval_accuracy": 0.6666666666666666,
195
+ "eval_balanced_accuracy": 0.6619131197893813,
196
+ "eval_loss": 0.6154211163520813,
197
+ "eval_runtime": 86.3959,
198
+ "eval_samples_per_second": 2.222,
199
+ "eval_steps_per_second": 0.278,
200
  "step": 1632
201
  },
202
  {
203
  "epoch": 18.0,
204
+ "eval_accuracy": 0.6666666666666666,
205
+ "eval_balanced_accuracy": 0.6619131197893813,
206
+ "eval_loss": 0.6210435032844543,
207
+ "eval_runtime": 88.4219,
208
+ "eval_samples_per_second": 2.171,
209
+ "eval_steps_per_second": 0.271,
210
  "step": 1728
211
  },
212
  {
213
  "epoch": 19.0,
214
+ "eval_accuracy": 0.6770833333333334,
215
+ "eval_balanced_accuracy": 0.6716186252771619,
216
+ "eval_loss": 0.6292756795883179,
217
+ "eval_runtime": 87.5484,
218
+ "eval_samples_per_second": 2.193,
219
+ "eval_steps_per_second": 0.274,
220
  "step": 1824
221
  },
222
  {
223
  "epoch": 20.0,
224
+ "eval_accuracy": 0.6614583333333334,
225
+ "eval_balanced_accuracy": 0.6563496426608026,
226
+ "eval_loss": 0.6274305582046509,
227
+ "eval_runtime": 65.5102,
228
+ "eval_samples_per_second": 2.931,
229
+ "eval_steps_per_second": 0.366,
230
  "step": 1920
231
  },
232
  {
233
  "epoch": 20.0,
234
  "step": 1920,
235
+ "total_flos": 2.492453077307228e+17,
236
+ "train_loss": 0.5602036555608113,
237
+ "train_runtime": 27837.1599,
238
+ "train_samples_per_second": 0.552,
239
+ "train_steps_per_second": 0.069
240
  }
241
  ],
242
  "logging_steps": 500,
 
251
  "early_stopping_threshold": 0.0
252
  },
253
  "attributes": {
254
+ "early_stopping_patience_counter": 5
255
  }
256
  },
257
  "TrainerControl": {
 
265
  "attributes": {}
266
  }
267
  },
268
+ "total_flos": 2.492453077307228e+17,
269
  "train_batch_size": 8,
270
  "trial_name": null,
271
  "trial_params": null