zkdeng commited on
Commit
38f98de
·
1 Parent(s): d1b1b9e

End of training

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: microsoft/resnet-50
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: resnet-50-finetuned-combinedSpiders
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # resnet-50-finetuned-combinedSpiders
15
+
16
+ This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - eval_loss: 0.3794
19
+ - eval_accuracy: 0.8996
20
+ - eval_precision: 0.8983
21
+ - eval_recall: 0.8934
22
+ - eval_f1: 0.8943
23
+ - eval_runtime: 14.9052
24
+ - eval_samples_per_second: 181.145
25
+ - eval_steps_per_second: 11.338
26
+ - step: 0
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 16
47
+ - eval_batch_size: 16
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 64
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 4
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.35.0
59
+ - Pytorch 2.1.0+cu118
60
+ - Datasets 2.14.6
61
+ - Tokenizers 0.14.1
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.99,
3
+ "eval_accuracy": 0.8996296296296297,
4
+ "eval_f1": 0.894271485857943,
5
+ "eval_loss": 0.379376620054245,
6
+ "eval_precision": 0.8983431695901203,
7
+ "eval_recall": 0.8933698002925257,
8
+ "eval_runtime": 14.9052,
9
+ "eval_samples_per_second": 181.145,
10
+ "eval_steps_per_second": 11.338,
11
+ "total_flos": 1.8332979581807493e+18,
12
+ "train_loss": 1.1345372907485736,
13
+ "train_runtime": 1112.1938,
14
+ "train_samples_per_second": 77.684,
15
+ "train_steps_per_second": 1.212
16
+ }
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-50",
3
+ "architectures": [
4
+ "ResNetForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 3,
8
+ 4,
9
+ 6,
10
+ 3
11
+ ],
12
+ "downsample_in_bottleneck": false,
13
+ "downsample_in_first_stage": false,
14
+ "embedding_size": 64,
15
+ "hidden_act": "relu",
16
+ "hidden_sizes": [
17
+ 256,
18
+ 512,
19
+ 1024,
20
+ 2048
21
+ ],
22
+ "id2label": {
23
+ "0": "annual crop",
24
+ "1": "forest",
25
+ "2": "herbaceous vegetation",
26
+ "3": "highway",
27
+ "4": "industrial",
28
+ "5": "pasture",
29
+ "6": "permanent crop",
30
+ "7": "residential",
31
+ "8": "river",
32
+ "9": "sea or lake"
33
+ },
34
+ "label2id": {
35
+ "annual crop": 0,
36
+ "forest": 1,
37
+ "herbaceous vegetation": 2,
38
+ "highway": 3,
39
+ "industrial": 4,
40
+ "pasture": 5,
41
+ "permanent crop": 6,
42
+ "residential": 7,
43
+ "river": 8,
44
+ "sea or lake": 9
45
+ },
46
+ "layer_type": "bottleneck",
47
+ "model_type": "resnet",
48
+ "num_channels": 3,
49
+ "out_features": [
50
+ "stage4"
51
+ ],
52
+ "out_indices": [
53
+ 4
54
+ ],
55
+ "problem_type": "single_label_classification",
56
+ "stage_names": [
57
+ "stem",
58
+ "stage1",
59
+ "stage2",
60
+ "stage3",
61
+ "stage4"
62
+ ],
63
+ "torch_dtype": "float32",
64
+ "transformers_version": "4.35.0"
65
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.8996296296296297,
3
+ "eval_f1": 0.894271485857943,
4
+ "eval_loss": 0.379376620054245,
5
+ "eval_precision": 0.8983431695901203,
6
+ "eval_recall": 0.8933698002925257,
7
+ "eval_runtime": 14.9052,
8
+ "eval_samples_per_second": 181.145,
9
+ "eval_steps_per_second": 11.338
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d4b20265dd4e5e74dcb81e8d3f6de5ebe97a827e7374b8bf3bbc8cc149d6d6
3
+ size 94368544
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
runs/Nov13_19-50-32_d01704b0a29d/events.out.tfevents.1699905032.d01704b0a29d.625.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20cfd35552df19e0b03a69e507505efae76c09a5aef48604522d8cb318cfba0c
3
+ size 27996
runs/Nov13_19-50-32_d01704b0a29d/events.out.tfevents.1699906160.d01704b0a29d.625.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d8000e2f12341736975e0bd98ee5eb13a03a98853f48f879186ea27d4342a7
3
+ size 486
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.99,
3
+ "total_flos": 1.8332979581807493e+18,
4
+ "train_loss": 1.1345372907485736,
5
+ "train_runtime": 1112.1938,
6
+ "train_samples_per_second": 77.684,
7
+ "train_steps_per_second": 1.212
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,880 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8974074074074074,
3
+ "best_model_checkpoint": "resnet-50-finetuned-combinedSpiders/checkpoint-1348",
4
+ "epoch": 3.9940740740740743,
5
+ "eval_steps": 500,
6
+ "global_step": 1348,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 3.7037037037037037e-06,
14
+ "loss": 2.3139,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.06,
19
+ "learning_rate": 7.4074074074074075e-06,
20
+ "loss": 2.3199,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.09,
25
+ "learning_rate": 1.1111111111111112e-05,
26
+ "loss": 2.3141,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.12,
31
+ "learning_rate": 1.4814814814814815e-05,
32
+ "loss": 2.3051,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.15,
37
+ "learning_rate": 1.8518518518518518e-05,
38
+ "loss": 2.3055,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.18,
43
+ "learning_rate": 2.2222222222222223e-05,
44
+ "loss": 2.2966,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.21,
49
+ "learning_rate": 2.5925925925925925e-05,
50
+ "loss": 2.2891,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.24,
55
+ "learning_rate": 2.962962962962963e-05,
56
+ "loss": 2.2871,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.27,
61
+ "learning_rate": 3.3333333333333335e-05,
62
+ "loss": 2.2767,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.3,
67
+ "learning_rate": 3.7037037037037037e-05,
68
+ "loss": 2.2604,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.33,
73
+ "learning_rate": 4.074074074074074e-05,
74
+ "loss": 2.254,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.36,
79
+ "learning_rate": 4.4444444444444447e-05,
80
+ "loss": 2.2329,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.39,
85
+ "learning_rate": 4.814814814814815e-05,
86
+ "loss": 2.2251,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.41,
91
+ "learning_rate": 4.979389942291839e-05,
92
+ "loss": 2.2012,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.44,
97
+ "learning_rate": 4.9381698268755155e-05,
98
+ "loss": 2.1776,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.47,
103
+ "learning_rate": 4.896949711459192e-05,
104
+ "loss": 2.1594,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.5,
109
+ "learning_rate": 4.8557295960428687e-05,
110
+ "loss": 2.1244,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.53,
115
+ "learning_rate": 4.814509480626546e-05,
116
+ "loss": 2.0911,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 0.56,
121
+ "learning_rate": 4.7732893652102225e-05,
122
+ "loss": 2.071,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 0.59,
127
+ "learning_rate": 4.7320692497939e-05,
128
+ "loss": 2.0046,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 0.62,
133
+ "learning_rate": 4.6908491343775764e-05,
134
+ "loss": 1.9742,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 0.65,
139
+ "learning_rate": 4.649629018961254e-05,
140
+ "loss": 1.9107,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 0.68,
145
+ "learning_rate": 4.60840890354493e-05,
146
+ "loss": 1.8838,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 0.71,
151
+ "learning_rate": 4.567188788128607e-05,
152
+ "loss": 1.8305,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 0.74,
157
+ "learning_rate": 4.525968672712284e-05,
158
+ "loss": 1.7924,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 0.77,
163
+ "learning_rate": 4.484748557295961e-05,
164
+ "loss": 1.7411,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 0.8,
169
+ "learning_rate": 4.443528441879637e-05,
170
+ "loss": 1.6658,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 0.83,
175
+ "learning_rate": 4.402308326463314e-05,
176
+ "loss": 1.6818,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 0.86,
181
+ "learning_rate": 4.361088211046991e-05,
182
+ "loss": 1.5883,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 0.89,
187
+ "learning_rate": 4.319868095630668e-05,
188
+ "loss": 1.5316,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 0.92,
193
+ "learning_rate": 4.278647980214345e-05,
194
+ "loss": 1.5345,
195
+ "step": 310
196
+ },
197
+ {
198
+ "epoch": 0.95,
199
+ "learning_rate": 4.2374278647980216e-05,
200
+ "loss": 1.4529,
201
+ "step": 320
202
+ },
203
+ {
204
+ "epoch": 0.98,
205
+ "learning_rate": 4.196207749381699e-05,
206
+ "loss": 1.4418,
207
+ "step": 330
208
+ },
209
+ {
210
+ "epoch": 1.0,
211
+ "eval_accuracy": 0.6692592592592592,
212
+ "eval_f1": 0.6056123000612204,
213
+ "eval_loss": 1.2732560634613037,
214
+ "eval_precision": 0.6781768119793957,
215
+ "eval_recall": 0.6388902436275756,
216
+ "eval_runtime": 15.6445,
217
+ "eval_samples_per_second": 172.584,
218
+ "eval_steps_per_second": 10.803,
219
+ "step": 337
220
+ },
221
+ {
222
+ "epoch": 1.01,
223
+ "learning_rate": 4.1549876339653755e-05,
224
+ "loss": 1.4001,
225
+ "step": 340
226
+ },
227
+ {
228
+ "epoch": 1.04,
229
+ "learning_rate": 4.113767518549052e-05,
230
+ "loss": 1.3676,
231
+ "step": 350
232
+ },
233
+ {
234
+ "epoch": 1.07,
235
+ "learning_rate": 4.072547403132729e-05,
236
+ "loss": 1.3273,
237
+ "step": 360
238
+ },
239
+ {
240
+ "epoch": 1.1,
241
+ "learning_rate": 4.031327287716406e-05,
242
+ "loss": 1.3226,
243
+ "step": 370
244
+ },
245
+ {
246
+ "epoch": 1.13,
247
+ "learning_rate": 3.9901071723000826e-05,
248
+ "loss": 1.2902,
249
+ "step": 380
250
+ },
251
+ {
252
+ "epoch": 1.16,
253
+ "learning_rate": 3.948887056883759e-05,
254
+ "loss": 1.2565,
255
+ "step": 390
256
+ },
257
+ {
258
+ "epoch": 1.19,
259
+ "learning_rate": 3.907666941467436e-05,
260
+ "loss": 1.2411,
261
+ "step": 400
262
+ },
263
+ {
264
+ "epoch": 1.21,
265
+ "learning_rate": 3.866446826051113e-05,
266
+ "loss": 1.2337,
267
+ "step": 410
268
+ },
269
+ {
270
+ "epoch": 1.24,
271
+ "learning_rate": 3.82522671063479e-05,
272
+ "loss": 1.1719,
273
+ "step": 420
274
+ },
275
+ {
276
+ "epoch": 1.27,
277
+ "learning_rate": 3.784006595218467e-05,
278
+ "loss": 1.1746,
279
+ "step": 430
280
+ },
281
+ {
282
+ "epoch": 1.3,
283
+ "learning_rate": 3.7427864798021435e-05,
284
+ "loss": 1.1444,
285
+ "step": 440
286
+ },
287
+ {
288
+ "epoch": 1.33,
289
+ "learning_rate": 3.701566364385821e-05,
290
+ "loss": 1.1284,
291
+ "step": 450
292
+ },
293
+ {
294
+ "epoch": 1.36,
295
+ "learning_rate": 3.6603462489694974e-05,
296
+ "loss": 1.09,
297
+ "step": 460
298
+ },
299
+ {
300
+ "epoch": 1.39,
301
+ "learning_rate": 3.619126133553174e-05,
302
+ "loss": 1.1276,
303
+ "step": 470
304
+ },
305
+ {
306
+ "epoch": 1.42,
307
+ "learning_rate": 3.577906018136851e-05,
308
+ "loss": 1.0837,
309
+ "step": 480
310
+ },
311
+ {
312
+ "epoch": 1.45,
313
+ "learning_rate": 3.536685902720528e-05,
314
+ "loss": 1.0355,
315
+ "step": 490
316
+ },
317
+ {
318
+ "epoch": 1.48,
319
+ "learning_rate": 3.4954657873042044e-05,
320
+ "loss": 0.9847,
321
+ "step": 500
322
+ },
323
+ {
324
+ "epoch": 1.51,
325
+ "learning_rate": 3.454245671887881e-05,
326
+ "loss": 1.0393,
327
+ "step": 510
328
+ },
329
+ {
330
+ "epoch": 1.54,
331
+ "learning_rate": 3.413025556471558e-05,
332
+ "loss": 1.008,
333
+ "step": 520
334
+ },
335
+ {
336
+ "epoch": 1.57,
337
+ "learning_rate": 3.371805441055235e-05,
338
+ "loss": 0.977,
339
+ "step": 530
340
+ },
341
+ {
342
+ "epoch": 1.6,
343
+ "learning_rate": 3.330585325638912e-05,
344
+ "loss": 0.9954,
345
+ "step": 540
346
+ },
347
+ {
348
+ "epoch": 1.63,
349
+ "learning_rate": 3.289365210222589e-05,
350
+ "loss": 0.9811,
351
+ "step": 550
352
+ },
353
+ {
354
+ "epoch": 1.66,
355
+ "learning_rate": 3.248145094806266e-05,
356
+ "loss": 1.0519,
357
+ "step": 560
358
+ },
359
+ {
360
+ "epoch": 1.69,
361
+ "learning_rate": 3.2069249793899426e-05,
362
+ "loss": 0.9764,
363
+ "step": 570
364
+ },
365
+ {
366
+ "epoch": 1.72,
367
+ "learning_rate": 3.165704863973619e-05,
368
+ "loss": 0.9426,
369
+ "step": 580
370
+ },
371
+ {
372
+ "epoch": 1.75,
373
+ "learning_rate": 3.124484748557296e-05,
374
+ "loss": 0.9598,
375
+ "step": 590
376
+ },
377
+ {
378
+ "epoch": 1.78,
379
+ "learning_rate": 3.083264633140973e-05,
380
+ "loss": 0.9344,
381
+ "step": 600
382
+ },
383
+ {
384
+ "epoch": 1.81,
385
+ "learning_rate": 3.0420445177246497e-05,
386
+ "loss": 0.9049,
387
+ "step": 610
388
+ },
389
+ {
390
+ "epoch": 1.84,
391
+ "learning_rate": 3.0008244023083266e-05,
392
+ "loss": 0.8284,
393
+ "step": 620
394
+ },
395
+ {
396
+ "epoch": 1.87,
397
+ "learning_rate": 2.9596042868920032e-05,
398
+ "loss": 0.9119,
399
+ "step": 630
400
+ },
401
+ {
402
+ "epoch": 1.9,
403
+ "learning_rate": 2.9183841714756805e-05,
404
+ "loss": 0.8489,
405
+ "step": 640
406
+ },
407
+ {
408
+ "epoch": 1.93,
409
+ "learning_rate": 2.877164056059357e-05,
410
+ "loss": 0.8227,
411
+ "step": 650
412
+ },
413
+ {
414
+ "epoch": 1.96,
415
+ "learning_rate": 2.8359439406430337e-05,
416
+ "loss": 0.8788,
417
+ "step": 660
418
+ },
419
+ {
420
+ "epoch": 1.99,
421
+ "learning_rate": 2.7947238252267106e-05,
422
+ "loss": 0.9058,
423
+ "step": 670
424
+ },
425
+ {
426
+ "epoch": 2.0,
427
+ "eval_accuracy": 0.8525925925925926,
428
+ "eval_f1": 0.8410753034302768,
429
+ "eval_loss": 0.5913922786712646,
430
+ "eval_precision": 0.8467806416477608,
431
+ "eval_recall": 0.840061365110396,
432
+ "eval_runtime": 14.4146,
433
+ "eval_samples_per_second": 187.31,
434
+ "eval_steps_per_second": 11.724,
435
+ "step": 675
436
+ },
437
+ {
438
+ "epoch": 2.01,
439
+ "learning_rate": 2.753503709810388e-05,
440
+ "loss": 0.8246,
441
+ "step": 680
442
+ },
443
+ {
444
+ "epoch": 2.04,
445
+ "learning_rate": 2.7122835943940645e-05,
446
+ "loss": 0.8661,
447
+ "step": 690
448
+ },
449
+ {
450
+ "epoch": 2.07,
451
+ "learning_rate": 2.671063478977741e-05,
452
+ "loss": 0.9393,
453
+ "step": 700
454
+ },
455
+ {
456
+ "epoch": 2.1,
457
+ "learning_rate": 2.6298433635614183e-05,
458
+ "loss": 0.8919,
459
+ "step": 710
460
+ },
461
+ {
462
+ "epoch": 2.13,
463
+ "learning_rate": 2.588623248145095e-05,
464
+ "loss": 0.8454,
465
+ "step": 720
466
+ },
467
+ {
468
+ "epoch": 2.16,
469
+ "learning_rate": 2.547403132728772e-05,
470
+ "loss": 0.8213,
471
+ "step": 730
472
+ },
473
+ {
474
+ "epoch": 2.19,
475
+ "learning_rate": 2.5061830173124485e-05,
476
+ "loss": 0.8213,
477
+ "step": 740
478
+ },
479
+ {
480
+ "epoch": 2.22,
481
+ "learning_rate": 2.4649629018961254e-05,
482
+ "loss": 0.7937,
483
+ "step": 750
484
+ },
485
+ {
486
+ "epoch": 2.25,
487
+ "learning_rate": 2.4237427864798023e-05,
488
+ "loss": 0.7901,
489
+ "step": 760
490
+ },
491
+ {
492
+ "epoch": 2.28,
493
+ "learning_rate": 2.382522671063479e-05,
494
+ "loss": 0.8005,
495
+ "step": 770
496
+ },
497
+ {
498
+ "epoch": 2.31,
499
+ "learning_rate": 2.341302555647156e-05,
500
+ "loss": 0.776,
501
+ "step": 780
502
+ },
503
+ {
504
+ "epoch": 2.34,
505
+ "learning_rate": 2.3000824402308328e-05,
506
+ "loss": 0.776,
507
+ "step": 790
508
+ },
509
+ {
510
+ "epoch": 2.37,
511
+ "learning_rate": 2.2588623248145097e-05,
512
+ "loss": 0.7684,
513
+ "step": 800
514
+ },
515
+ {
516
+ "epoch": 2.4,
517
+ "learning_rate": 2.2176422093981863e-05,
518
+ "loss": 0.7854,
519
+ "step": 810
520
+ },
521
+ {
522
+ "epoch": 2.43,
523
+ "learning_rate": 2.1764220939818633e-05,
524
+ "loss": 0.7608,
525
+ "step": 820
526
+ },
527
+ {
528
+ "epoch": 2.46,
529
+ "learning_rate": 2.13520197856554e-05,
530
+ "loss": 0.7563,
531
+ "step": 830
532
+ },
533
+ {
534
+ "epoch": 2.49,
535
+ "learning_rate": 2.093981863149217e-05,
536
+ "loss": 0.7836,
537
+ "step": 840
538
+ },
539
+ {
540
+ "epoch": 2.52,
541
+ "learning_rate": 2.0527617477328937e-05,
542
+ "loss": 0.7936,
543
+ "step": 850
544
+ },
545
+ {
546
+ "epoch": 2.55,
547
+ "learning_rate": 2.0115416323165706e-05,
548
+ "loss": 0.7693,
549
+ "step": 860
550
+ },
551
+ {
552
+ "epoch": 2.58,
553
+ "learning_rate": 1.9703215169002472e-05,
554
+ "loss": 0.8235,
555
+ "step": 870
556
+ },
557
+ {
558
+ "epoch": 2.61,
559
+ "learning_rate": 1.9291014014839242e-05,
560
+ "loss": 0.8423,
561
+ "step": 880
562
+ },
563
+ {
564
+ "epoch": 2.64,
565
+ "learning_rate": 1.887881286067601e-05,
566
+ "loss": 0.6937,
567
+ "step": 890
568
+ },
569
+ {
570
+ "epoch": 2.67,
571
+ "learning_rate": 1.846661170651278e-05,
572
+ "loss": 0.7325,
573
+ "step": 900
574
+ },
575
+ {
576
+ "epoch": 2.7,
577
+ "learning_rate": 1.805441055234955e-05,
578
+ "loss": 0.779,
579
+ "step": 910
580
+ },
581
+ {
582
+ "epoch": 2.73,
583
+ "learning_rate": 1.7642209398186316e-05,
584
+ "loss": 0.7278,
585
+ "step": 920
586
+ },
587
+ {
588
+ "epoch": 2.76,
589
+ "learning_rate": 1.7230008244023085e-05,
590
+ "loss": 0.6911,
591
+ "step": 930
592
+ },
593
+ {
594
+ "epoch": 2.79,
595
+ "learning_rate": 1.681780708985985e-05,
596
+ "loss": 0.7127,
597
+ "step": 940
598
+ },
599
+ {
600
+ "epoch": 2.81,
601
+ "learning_rate": 1.640560593569662e-05,
602
+ "loss": 0.7116,
603
+ "step": 950
604
+ },
605
+ {
606
+ "epoch": 2.84,
607
+ "learning_rate": 1.599340478153339e-05,
608
+ "loss": 0.76,
609
+ "step": 960
610
+ },
611
+ {
612
+ "epoch": 2.87,
613
+ "learning_rate": 1.558120362737016e-05,
614
+ "loss": 0.7644,
615
+ "step": 970
616
+ },
617
+ {
618
+ "epoch": 2.9,
619
+ "learning_rate": 1.5169002473206925e-05,
620
+ "loss": 0.7876,
621
+ "step": 980
622
+ },
623
+ {
624
+ "epoch": 2.93,
625
+ "learning_rate": 1.4756801319043694e-05,
626
+ "loss": 0.6966,
627
+ "step": 990
628
+ },
629
+ {
630
+ "epoch": 2.96,
631
+ "learning_rate": 1.4344600164880462e-05,
632
+ "loss": 0.7425,
633
+ "step": 1000
634
+ },
635
+ {
636
+ "epoch": 2.99,
637
+ "learning_rate": 1.3932399010717231e-05,
638
+ "loss": 0.743,
639
+ "step": 1010
640
+ },
641
+ {
642
+ "epoch": 3.0,
643
+ "eval_accuracy": 0.8881481481481481,
644
+ "eval_f1": 0.8799732123735866,
645
+ "eval_loss": 0.4424385130405426,
646
+ "eval_precision": 0.8877465438754772,
647
+ "eval_recall": 0.8775775457617858,
648
+ "eval_runtime": 14.8388,
649
+ "eval_samples_per_second": 181.956,
650
+ "eval_steps_per_second": 11.389,
651
+ "step": 1012
652
+ },
653
+ {
654
+ "epoch": 3.02,
655
+ "learning_rate": 1.3520197856553999e-05,
656
+ "loss": 0.7802,
657
+ "step": 1020
658
+ },
659
+ {
660
+ "epoch": 3.05,
661
+ "learning_rate": 1.3107996702390768e-05,
662
+ "loss": 0.7482,
663
+ "step": 1030
664
+ },
665
+ {
666
+ "epoch": 3.08,
667
+ "learning_rate": 1.2695795548227534e-05,
668
+ "loss": 0.6815,
669
+ "step": 1040
670
+ },
671
+ {
672
+ "epoch": 3.11,
673
+ "learning_rate": 1.2283594394064305e-05,
674
+ "loss": 0.6821,
675
+ "step": 1050
676
+ },
677
+ {
678
+ "epoch": 3.14,
679
+ "learning_rate": 1.1871393239901073e-05,
680
+ "loss": 0.6976,
681
+ "step": 1060
682
+ },
683
+ {
684
+ "epoch": 3.17,
685
+ "learning_rate": 1.145919208573784e-05,
686
+ "loss": 0.7766,
687
+ "step": 1070
688
+ },
689
+ {
690
+ "epoch": 3.2,
691
+ "learning_rate": 1.104699093157461e-05,
692
+ "loss": 0.7489,
693
+ "step": 1080
694
+ },
695
+ {
696
+ "epoch": 3.23,
697
+ "learning_rate": 1.0634789777411378e-05,
698
+ "loss": 0.7489,
699
+ "step": 1090
700
+ },
701
+ {
702
+ "epoch": 3.26,
703
+ "learning_rate": 1.0222588623248145e-05,
704
+ "loss": 0.6568,
705
+ "step": 1100
706
+ },
707
+ {
708
+ "epoch": 3.29,
709
+ "learning_rate": 9.810387469084915e-06,
710
+ "loss": 0.7121,
711
+ "step": 1110
712
+ },
713
+ {
714
+ "epoch": 3.32,
715
+ "learning_rate": 9.398186314921682e-06,
716
+ "loss": 0.7013,
717
+ "step": 1120
718
+ },
719
+ {
720
+ "epoch": 3.35,
721
+ "learning_rate": 8.98598516075845e-06,
722
+ "loss": 0.667,
723
+ "step": 1130
724
+ },
725
+ {
726
+ "epoch": 3.38,
727
+ "learning_rate": 8.573784006595219e-06,
728
+ "loss": 0.715,
729
+ "step": 1140
730
+ },
731
+ {
732
+ "epoch": 3.41,
733
+ "learning_rate": 8.161582852431987e-06,
734
+ "loss": 0.6593,
735
+ "step": 1150
736
+ },
737
+ {
738
+ "epoch": 3.44,
739
+ "learning_rate": 7.749381698268756e-06,
740
+ "loss": 0.6886,
741
+ "step": 1160
742
+ },
743
+ {
744
+ "epoch": 3.47,
745
+ "learning_rate": 7.337180544105524e-06,
746
+ "loss": 0.712,
747
+ "step": 1170
748
+ },
749
+ {
750
+ "epoch": 3.5,
751
+ "learning_rate": 6.924979389942292e-06,
752
+ "loss": 0.752,
753
+ "step": 1180
754
+ },
755
+ {
756
+ "epoch": 3.53,
757
+ "learning_rate": 6.51277823577906e-06,
758
+ "loss": 0.6274,
759
+ "step": 1190
760
+ },
761
+ {
762
+ "epoch": 3.56,
763
+ "learning_rate": 6.100577081615829e-06,
764
+ "loss": 0.698,
765
+ "step": 1200
766
+ },
767
+ {
768
+ "epoch": 3.59,
769
+ "learning_rate": 5.688375927452598e-06,
770
+ "loss": 0.7475,
771
+ "step": 1210
772
+ },
773
+ {
774
+ "epoch": 3.61,
775
+ "learning_rate": 5.276174773289365e-06,
776
+ "loss": 0.7261,
777
+ "step": 1220
778
+ },
779
+ {
780
+ "epoch": 3.64,
781
+ "learning_rate": 4.863973619126134e-06,
782
+ "loss": 0.6776,
783
+ "step": 1230
784
+ },
785
+ {
786
+ "epoch": 3.67,
787
+ "learning_rate": 4.451772464962902e-06,
788
+ "loss": 0.6548,
789
+ "step": 1240
790
+ },
791
+ {
792
+ "epoch": 3.7,
793
+ "learning_rate": 4.03957131079967e-06,
794
+ "loss": 0.6526,
795
+ "step": 1250
796
+ },
797
+ {
798
+ "epoch": 3.73,
799
+ "learning_rate": 3.6273701566364385e-06,
800
+ "loss": 0.6432,
801
+ "step": 1260
802
+ },
803
+ {
804
+ "epoch": 3.76,
805
+ "learning_rate": 3.215169002473207e-06,
806
+ "loss": 0.6769,
807
+ "step": 1270
808
+ },
809
+ {
810
+ "epoch": 3.79,
811
+ "learning_rate": 2.8029678483099755e-06,
812
+ "loss": 0.6966,
813
+ "step": 1280
814
+ },
815
+ {
816
+ "epoch": 3.82,
817
+ "learning_rate": 2.390766694146744e-06,
818
+ "loss": 0.662,
819
+ "step": 1290
820
+ },
821
+ {
822
+ "epoch": 3.85,
823
+ "learning_rate": 1.978565539983512e-06,
824
+ "loss": 0.6816,
825
+ "step": 1300
826
+ },
827
+ {
828
+ "epoch": 3.88,
829
+ "learning_rate": 1.5663643858202803e-06,
830
+ "loss": 0.6373,
831
+ "step": 1310
832
+ },
833
+ {
834
+ "epoch": 3.91,
835
+ "learning_rate": 1.1541632316570488e-06,
836
+ "loss": 0.6328,
837
+ "step": 1320
838
+ },
839
+ {
840
+ "epoch": 3.94,
841
+ "learning_rate": 7.419620774938171e-07,
842
+ "loss": 0.6925,
843
+ "step": 1330
844
+ },
845
+ {
846
+ "epoch": 3.97,
847
+ "learning_rate": 3.2976092333058533e-07,
848
+ "loss": 0.7272,
849
+ "step": 1340
850
+ },
851
+ {
852
+ "epoch": 3.99,
853
+ "eval_accuracy": 0.8974074074074074,
854
+ "eval_f1": 0.8910186753180345,
855
+ "eval_loss": 0.3921002447605133,
856
+ "eval_precision": 0.8937527021895239,
857
+ "eval_recall": 0.8899884876686517,
858
+ "eval_runtime": 14.7611,
859
+ "eval_samples_per_second": 182.913,
860
+ "eval_steps_per_second": 11.449,
861
+ "step": 1348
862
+ },
863
+ {
864
+ "epoch": 3.99,
865
+ "step": 1348,
866
+ "total_flos": 1.8332979581807493e+18,
867
+ "train_loss": 1.1345372907485736,
868
+ "train_runtime": 1112.1938,
869
+ "train_samples_per_second": 77.684,
870
+ "train_steps_per_second": 1.212
871
+ }
872
+ ],
873
+ "logging_steps": 10,
874
+ "max_steps": 1348,
875
+ "num_train_epochs": 4,
876
+ "save_steps": 500,
877
+ "total_flos": 1.8332979581807493e+18,
878
+ "trial_name": null,
879
+ "trial_params": null
880
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1730a086082e0549a90b9b37204d2544c558a655e046ca2bc64ddde10a035b4f
3
+ size 4600