mykahh commited on
Commit
61273ba
·
verified ·
1 Parent(s): 459f272

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - autotrain-2t1ic-d661v/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 0.1272398680448532
18
+
19
+ f1: 0.7869222096956031
20
+
21
+ precision: 0.7949886104783599
22
+
23
+ recall: 0.7790178571428571
24
+
25
+ auc: 0.9779004715847651
26
+
27
+ accuracy: 0.9704364148287189
checkpoint-6394/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
3
+ "_num_labels": 2,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "finetuning_task": "sst-2",
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": 0,
15
+ "1": 1
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "0": 0,
20
+ "1": 1
21
+ },
22
+ "max_position_embeddings": 512,
23
+ "model_type": "distilbert",
24
+ "n_heads": 12,
25
+ "n_layers": 6,
26
+ "output_past": true,
27
+ "pad_token_id": 0,
28
+ "problem_type": "single_label_classification",
29
+ "qa_dropout": 0.1,
30
+ "seq_classif_dropout": 0.2,
31
+ "sinusoidal_pos_embds": false,
32
+ "tie_weights_": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.38.2",
35
+ "vocab_size": 30522
36
+ }
checkpoint-6394/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2be0aa3ca51b8edb56d84e6a8c6da754e039f6b7bf8d6c25fb5fbdaf781793
3
+ size 267832560
checkpoint-6394/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbcc84bd2a71afb387dfa0d8c57d386a5ba765080888b036563dc0f90ef1c823
3
+ size 535724410
checkpoint-6394/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30bee460298c66df58de2c372aef5d75245eec6679733a0717afce6a13f89031
3
+ size 13990
checkpoint-6394/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c907b3d4727338c781661950145481de729c08b86ec5bfe250e696d8e100d98
3
+ size 1064
checkpoint-6394/trainer_state.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1272398680448532,
3
+ "best_model_checkpoint": "autotrain-2t1ic-d661v/checkpoint-6394",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6394,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "grad_norm": 3.1554582118988037,
14
+ "learning_rate": 8.28125e-06,
15
+ "loss": 0.937,
16
+ "step": 159
17
+ },
18
+ {
19
+ "epoch": 0.1,
20
+ "grad_norm": 0.7134726643562317,
21
+ "learning_rate": 1.65625e-05,
22
+ "loss": 0.2178,
23
+ "step": 318
24
+ },
25
+ {
26
+ "epoch": 0.15,
27
+ "grad_norm": 3.067269802093506,
28
+ "learning_rate": 2.484375e-05,
29
+ "loss": 0.2279,
30
+ "step": 477
31
+ },
32
+ {
33
+ "epoch": 0.2,
34
+ "grad_norm": 3.953152656555176,
35
+ "learning_rate": 3.3125e-05,
36
+ "loss": 0.2313,
37
+ "step": 636
38
+ },
39
+ {
40
+ "epoch": 0.25,
41
+ "grad_norm": 19.392597198486328,
42
+ "learning_rate": 4.140625e-05,
43
+ "loss": 0.1997,
44
+ "step": 795
45
+ },
46
+ {
47
+ "epoch": 0.3,
48
+ "grad_norm": 0.18459168076515198,
49
+ "learning_rate": 4.96875e-05,
50
+ "loss": 0.1585,
51
+ "step": 954
52
+ },
53
+ {
54
+ "epoch": 0.35,
55
+ "grad_norm": 0.036189883947372437,
56
+ "learning_rate": 4.9113660062565175e-05,
57
+ "loss": 0.1673,
58
+ "step": 1113
59
+ },
60
+ {
61
+ "epoch": 0.4,
62
+ "grad_norm": 20.717161178588867,
63
+ "learning_rate": 4.8192561696211335e-05,
64
+ "loss": 0.1645,
65
+ "step": 1272
66
+ },
67
+ {
68
+ "epoch": 0.45,
69
+ "grad_norm": 3.4379196166992188,
70
+ "learning_rate": 4.727146332985749e-05,
71
+ "loss": 0.1448,
72
+ "step": 1431
73
+ },
74
+ {
75
+ "epoch": 0.5,
76
+ "grad_norm": 0.0559392012655735,
77
+ "learning_rate": 4.635036496350365e-05,
78
+ "loss": 0.1412,
79
+ "step": 1590
80
+ },
81
+ {
82
+ "epoch": 0.55,
83
+ "grad_norm": 0.04697013273835182,
84
+ "learning_rate": 4.5429266597149814e-05,
85
+ "loss": 0.1501,
86
+ "step": 1749
87
+ },
88
+ {
89
+ "epoch": 0.6,
90
+ "grad_norm": 0.027782494202256203,
91
+ "learning_rate": 4.4508168230795974e-05,
92
+ "loss": 0.1596,
93
+ "step": 1908
94
+ },
95
+ {
96
+ "epoch": 0.65,
97
+ "grad_norm": 0.26669323444366455,
98
+ "learning_rate": 4.3587069864442134e-05,
99
+ "loss": 0.1686,
100
+ "step": 2067
101
+ },
102
+ {
103
+ "epoch": 0.7,
104
+ "grad_norm": 6.07758092880249,
105
+ "learning_rate": 4.266597149808829e-05,
106
+ "loss": 0.1741,
107
+ "step": 2226
108
+ },
109
+ {
110
+ "epoch": 0.75,
111
+ "grad_norm": 0.06619926542043686,
112
+ "learning_rate": 4.174487313173445e-05,
113
+ "loss": 0.1287,
114
+ "step": 2385
115
+ },
116
+ {
117
+ "epoch": 0.8,
118
+ "grad_norm": 0.1769120991230011,
119
+ "learning_rate": 4.082377476538061e-05,
120
+ "loss": 0.1629,
121
+ "step": 2544
122
+ },
123
+ {
124
+ "epoch": 0.85,
125
+ "grad_norm": 0.7576848864555359,
126
+ "learning_rate": 3.990267639902677e-05,
127
+ "loss": 0.136,
128
+ "step": 2703
129
+ },
130
+ {
131
+ "epoch": 0.9,
132
+ "grad_norm": 0.10874854773283005,
133
+ "learning_rate": 3.8981578032672927e-05,
134
+ "loss": 0.127,
135
+ "step": 2862
136
+ },
137
+ {
138
+ "epoch": 0.94,
139
+ "grad_norm": 6.004174709320068,
140
+ "learning_rate": 3.806047966631908e-05,
141
+ "loss": 0.1193,
142
+ "step": 3021
143
+ },
144
+ {
145
+ "epoch": 0.99,
146
+ "grad_norm": 0.25709450244903564,
147
+ "learning_rate": 3.713938129996524e-05,
148
+ "loss": 0.1277,
149
+ "step": 3180
150
+ },
151
+ {
152
+ "epoch": 1.0,
153
+ "eval_accuracy": 0.9488503050211169,
154
+ "eval_auc": 0.9710095518442868,
155
+ "eval_f1": 0.6929577464788733,
156
+ "eval_loss": 0.15181826055049896,
157
+ "eval_precision": 0.5980551053484603,
158
+ "eval_recall": 0.8236607142857143,
159
+ "eval_runtime": 595.045,
160
+ "eval_samples_per_second": 10.744,
161
+ "eval_steps_per_second": 0.672,
162
+ "step": 3197
163
+ },
164
+ {
165
+ "epoch": 1.04,
166
+ "grad_norm": 0.4182649850845337,
167
+ "learning_rate": 3.62182829336114e-05,
168
+ "loss": 0.1071,
169
+ "step": 3339
170
+ },
171
+ {
172
+ "epoch": 1.09,
173
+ "grad_norm": 0.09685356914997101,
174
+ "learning_rate": 3.5297184567257566e-05,
175
+ "loss": 0.0737,
176
+ "step": 3498
177
+ },
178
+ {
179
+ "epoch": 1.14,
180
+ "grad_norm": 44.25665283203125,
181
+ "learning_rate": 3.4376086200903726e-05,
182
+ "loss": 0.0923,
183
+ "step": 3657
184
+ },
185
+ {
186
+ "epoch": 1.19,
187
+ "grad_norm": 0.017466088756918907,
188
+ "learning_rate": 3.345498783454988e-05,
189
+ "loss": 0.0972,
190
+ "step": 3816
191
+ },
192
+ {
193
+ "epoch": 1.24,
194
+ "grad_norm": 0.06699241697788239,
195
+ "learning_rate": 3.253388946819604e-05,
196
+ "loss": 0.0743,
197
+ "step": 3975
198
+ },
199
+ {
200
+ "epoch": 1.29,
201
+ "grad_norm": 0.028020156547427177,
202
+ "learning_rate": 3.16127911018422e-05,
203
+ "loss": 0.0872,
204
+ "step": 4134
205
+ },
206
+ {
207
+ "epoch": 1.34,
208
+ "grad_norm": 19.901599884033203,
209
+ "learning_rate": 3.069169273548836e-05,
210
+ "loss": 0.0947,
211
+ "step": 4293
212
+ },
213
+ {
214
+ "epoch": 1.39,
215
+ "grad_norm": 0.458806574344635,
216
+ "learning_rate": 2.977059436913452e-05,
217
+ "loss": 0.0837,
218
+ "step": 4452
219
+ },
220
+ {
221
+ "epoch": 1.44,
222
+ "grad_norm": 1.4870176315307617,
223
+ "learning_rate": 2.8849496002780675e-05,
224
+ "loss": 0.0793,
225
+ "step": 4611
226
+ },
227
+ {
228
+ "epoch": 1.49,
229
+ "grad_norm": 0.014602471143007278,
230
+ "learning_rate": 2.7928397636426835e-05,
231
+ "loss": 0.0917,
232
+ "step": 4770
233
+ },
234
+ {
235
+ "epoch": 1.54,
236
+ "grad_norm": 0.010597724467515945,
237
+ "learning_rate": 2.7007299270072995e-05,
238
+ "loss": 0.0761,
239
+ "step": 4929
240
+ },
241
+ {
242
+ "epoch": 1.59,
243
+ "grad_norm": 0.01183533389121294,
244
+ "learning_rate": 2.6086200903719154e-05,
245
+ "loss": 0.0607,
246
+ "step": 5088
247
+ },
248
+ {
249
+ "epoch": 1.64,
250
+ "grad_norm": 0.050857510417699814,
251
+ "learning_rate": 2.5165102537365314e-05,
252
+ "loss": 0.0698,
253
+ "step": 5247
254
+ },
255
+ {
256
+ "epoch": 1.69,
257
+ "grad_norm": 0.059834353625774384,
258
+ "learning_rate": 2.424400417101147e-05,
259
+ "loss": 0.0907,
260
+ "step": 5406
261
+ },
262
+ {
263
+ "epoch": 1.74,
264
+ "grad_norm": 0.04689362645149231,
265
+ "learning_rate": 2.332290580465763e-05,
266
+ "loss": 0.0715,
267
+ "step": 5565
268
+ },
269
+ {
270
+ "epoch": 1.79,
271
+ "grad_norm": 0.02181609719991684,
272
+ "learning_rate": 2.240180743830379e-05,
273
+ "loss": 0.0777,
274
+ "step": 5724
275
+ },
276
+ {
277
+ "epoch": 1.84,
278
+ "grad_norm": 0.010604546405375004,
279
+ "learning_rate": 2.1480709071949947e-05,
280
+ "loss": 0.061,
281
+ "step": 5883
282
+ },
283
+ {
284
+ "epoch": 1.89,
285
+ "grad_norm": 0.044349148869514465,
286
+ "learning_rate": 2.0559610705596107e-05,
287
+ "loss": 0.0648,
288
+ "step": 6042
289
+ },
290
+ {
291
+ "epoch": 1.94,
292
+ "grad_norm": 74.56462860107422,
293
+ "learning_rate": 1.9638512339242267e-05,
294
+ "loss": 0.0816,
295
+ "step": 6201
296
+ },
297
+ {
298
+ "epoch": 1.99,
299
+ "grad_norm": 0.01588965579867363,
300
+ "learning_rate": 1.8717413972888427e-05,
301
+ "loss": 0.0619,
302
+ "step": 6360
303
+ },
304
+ {
305
+ "epoch": 2.0,
306
+ "eval_accuracy": 0.9704364148287189,
307
+ "eval_auc": 0.9779004715847651,
308
+ "eval_f1": 0.7869222096956031,
309
+ "eval_loss": 0.1272398680448532,
310
+ "eval_precision": 0.7949886104783599,
311
+ "eval_recall": 0.7790178571428571,
312
+ "eval_runtime": 543.9479,
313
+ "eval_samples_per_second": 11.753,
314
+ "eval_steps_per_second": 0.735,
315
+ "step": 6394
316
+ }
317
+ ],
318
+ "logging_steps": 159,
319
+ "max_steps": 9591,
320
+ "num_input_tokens_seen": 0,
321
+ "num_train_epochs": 3,
322
+ "save_steps": 500,
323
+ "total_flos": 1693529458117632.0,
324
+ "train_batch_size": 8,
325
+ "trial_name": null,
326
+ "trial_params": null
327
+ }
checkpoint-6394/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d739182b7976fff686e822a701fb96816beb8f24420446af5b91a50057cfb60
3
+ size 4920
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
3
+ "_num_labels": 2,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "finetuning_task": "sst-2",
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": 0,
15
+ "1": 1
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "0": 0,
20
+ "1": 1
21
+ },
22
+ "max_position_embeddings": 512,
23
+ "model_type": "distilbert",
24
+ "n_heads": 12,
25
+ "n_layers": 6,
26
+ "output_past": true,
27
+ "pad_token_id": 0,
28
+ "problem_type": "single_label_classification",
29
+ "qa_dropout": 0.1,
30
+ "seq_classif_dropout": 0.2,
31
+ "sinusoidal_pos_embds": false,
32
+ "tie_weights_": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.38.2",
35
+ "vocab_size": 30522
36
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2be0aa3ca51b8edb56d84e6a8c6da754e039f6b7bf8d6c25fb5fbdaf781793
3
+ size 267832560
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d739182b7976fff686e822a701fb96816beb8f24420446af5b91a50057cfb60
3
+ size 4920
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-2t1ic-d661v/autotrain-data",
3
+ "model": "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-2t1ic-d661v",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": null,
23
+ "save_total_limit": 1,
24
+ "save_strategy": "epoch",
25
+ "push_to_hub": true,
26
+ "repo_id": "mykahh/autotrain-2t1ic-d661v",
27
+ "evaluation_strategy": "epoch",
28
+ "username": "mykahh",
29
+ "log": "none"
30
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff