Chernoffface commited on
Commit
f0b6614
1 Parent(s): 0a24492

Add SetFit model

Browse files
1_Pooling/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "word_embedding_dimension": 768,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
README.md CHANGED
@@ -5,7 +5,7 @@ tags:
5
  - sentence-transformers
6
  - text-classification
7
  - generated_from_setfit_trainer
8
- base_model: sentence-transformers/paraphrase-mpnet-base-v2
9
  metrics:
10
  - accuracy
11
  widget:
@@ -69,9 +69,9 @@ pipeline_tag: text-classification
69
  inference: false
70
  ---
71
 
72
- # SetFit with sentence-transformers/paraphrase-mpnet-base-v2
73
 
74
- This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/paraphrase-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2) as the Sentence Transformer embedding model. A OneVsRestClassifier instance is used for classification.
75
 
76
  The model has been trained using an efficient few-shot learning technique that involves:
77
 
@@ -82,9 +82,9 @@ The model has been trained using an efficient few-shot learning technique that i
82
 
83
  ### Model Description
84
  - **Model Type:** SetFit
85
- - **Sentence Transformer body:** [sentence-transformers/paraphrase-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2)
86
  - **Classification head:** a OneVsRestClassifier instance
87
- - **Maximum Sequence Length:** 512 tokens
88
  <!-- - **Number of Classes:** Unknown -->
89
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
90
  <!-- - **Language:** Unknown -->
@@ -170,20 +170,69 @@ preds = model("Seminar Internet Technology Das Seminar behandelt aktuelle Themen
170
  ### Training Results
171
  | Epoch | Step | Training Loss | Validation Loss |
172
  |:------:|:----:|:-------------:|:---------------:|
173
- | 0.0014 | 1 | 0.2798 | - |
174
- | 0.0716 | 50 | 0.2227 | - |
175
- | 0.1433 | 100 | 0.1775 | - |
176
- | 0.2149 | 150 | 0.1471 | - |
177
- | 0.2865 | 200 | 0.1325 | - |
178
- | 0.3582 | 250 | 0.1292 | - |
179
- | 0.4298 | 300 | 0.1067 | - |
180
- | 0.5014 | 350 | 0.0985 | - |
181
- | 0.5731 | 400 | 0.0943 | - |
182
- | 0.6447 | 450 | 0.091 | - |
183
- | 0.7163 | 500 | 0.0786 | - |
184
- | 0.7880 | 550 | 0.0911 | - |
185
- | 0.8596 | 600 | 0.0762 | - |
186
- | 0.9312 | 650 | 0.0775 | - |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  ### Framework Versions
189
  - Python: 3.12.3
 
5
  - sentence-transformers
6
  - text-classification
7
  - generated_from_setfit_trainer
8
+ base_model: sentence-transformers/paraphrase-MiniLM-L6-v2
9
  metrics:
10
  - accuracy
11
  widget:
 
69
  inference: false
70
  ---
71
 
72
+ # SetFit with sentence-transformers/paraphrase-MiniLM-L6-v2
73
 
74
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/paraphrase-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2) as the Sentence Transformer embedding model. A OneVsRestClassifier instance is used for classification.
75
 
76
  The model has been trained using an efficient few-shot learning technique that involves:
77
 
 
82
 
83
  ### Model Description
84
  - **Model Type:** SetFit
85
+ - **Sentence Transformer body:** [sentence-transformers/paraphrase-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2)
86
  - **Classification head:** a OneVsRestClassifier instance
87
+ - **Maximum Sequence Length:** 128 tokens
88
  <!-- - **Number of Classes:** Unknown -->
89
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
90
  <!-- - **Language:** Unknown -->
 
170
  ### Training Results
171
  | Epoch | Step | Training Loss | Validation Loss |
172
  |:------:|:----:|:-------------:|:---------------:|
173
+ | 0.0014 | 1 | 0.3334 | - |
174
+ | 0.0716 | 50 | 0.2411 | - |
175
+ | 0.1433 | 100 | 0.2124 | - |
176
+ | 0.2149 | 150 | 0.186 | - |
177
+ | 0.2865 | 200 | 0.1806 | - |
178
+ | 0.3582 | 250 | 0.1759 | - |
179
+ | 0.4298 | 300 | 0.1705 | - |
180
+ | 0.5014 | 350 | 0.1542 | - |
181
+ | 0.5731 | 400 | 0.1559 | - |
182
+ | 0.6447 | 450 | 0.1524 | - |
183
+ | 0.7163 | 500 | 0.1438 | - |
184
+ | 0.7880 | 550 | 0.1507 | - |
185
+ | 0.8596 | 600 | 0.14 | - |
186
+ | 0.9312 | 650 | 0.1466 | - |
187
+ | 0.0006 | 1 | 0.1157 | - |
188
+ | 0.0287 | 50 | 0.1266 | - |
189
+ | 0.0573 | 100 | 0.1325 | - |
190
+ | 0.0860 | 150 | 0.1237 | - |
191
+ | 0.1147 | 200 | 0.12 | - |
192
+ | 0.1433 | 250 | 0.1189 | - |
193
+ | 0.1720 | 300 | 0.1094 | - |
194
+ | 0.2007 | 350 | 0.1028 | - |
195
+ | 0.2294 | 400 | 0.0993 | - |
196
+ | 0.2580 | 450 | 0.1003 | - |
197
+ | 0.2867 | 500 | 0.0898 | - |
198
+ | 0.3154 | 550 | 0.0875 | - |
199
+ | 0.3440 | 600 | 0.0847 | - |
200
+ | 0.3727 | 650 | 0.0879 | - |
201
+ | 0.4014 | 700 | 0.0801 | - |
202
+ | 0.4300 | 750 | 0.0754 | - |
203
+ | 0.4587 | 800 | 0.0791 | - |
204
+ | 0.4874 | 850 | 0.0715 | - |
205
+ | 0.5161 | 900 | 0.0781 | - |
206
+ | 0.5447 | 950 | 0.0765 | - |
207
+ | 0.5734 | 1000 | 0.0718 | - |
208
+ | 0.6021 | 1050 | 0.0786 | - |
209
+ | 0.6307 | 1100 | 0.073 | - |
210
+ | 0.6594 | 1150 | 0.0705 | - |
211
+ | 0.6881 | 1200 | 0.072 | - |
212
+ | 0.7167 | 1250 | 0.0673 | - |
213
+ | 0.7454 | 1300 | 0.066 | - |
214
+ | 0.7741 | 1350 | 0.0671 | - |
215
+ | 0.8028 | 1400 | 0.0631 | - |
216
+ | 0.8314 | 1450 | 0.0673 | - |
217
+ | 0.8601 | 1500 | 0.0638 | - |
218
+ | 0.8888 | 1550 | 0.0674 | - |
219
+ | 0.9174 | 1600 | 0.0613 | - |
220
+ | 0.9461 | 1650 | 0.063 | - |
221
+ | 0.9748 | 1700 | 0.0682 | - |
222
+ | 0.0014 | 1 | 0.0497 | - |
223
+ | 0.0716 | 50 | 0.0584 | - |
224
+ | 0.1433 | 100 | 0.0663 | - |
225
+ | 0.2149 | 150 | 0.0682 | - |
226
+ | 0.2865 | 200 | 0.0616 | - |
227
+ | 0.3582 | 250 | 0.0657 | - |
228
+ | 0.4298 | 300 | 0.0593 | - |
229
+ | 0.5014 | 350 | 0.0593 | - |
230
+ | 0.5731 | 400 | 0.0565 | - |
231
+ | 0.6447 | 450 | 0.0595 | - |
232
+ | 0.7163 | 500 | 0.0589 | - |
233
+ | 0.7880 | 550 | 0.0649 | - |
234
+ | 0.8596 | 600 | 0.0554 | - |
235
+ | 0.9312 | 650 | 0.0601 | - |
236
 
237
  ### Framework Versions
238
  - Python: 3.12.3
config.json CHANGED
@@ -1,24 +1,26 @@
1
  {
2
- "_name_or_path": "sentence-transformers/paraphrase-mpnet-base-v2",
3
  "architectures": [
4
- "MPNetModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-05,
15
- "max_position_embeddings": 514,
16
- "model_type": "mpnet",
17
  "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 1,
20
- "relative_attention_num_buckets": 32,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.43.1",
23
- "vocab_size": 30527
 
 
24
  }
 
1
  {
2
+ "_name_or_path": "sentence-transformers/paraphrase-MiniLM-L6-v2",
3
  "architectures": [
4
+ "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.43.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92972fdc0601d168d56004e78d57ba956c481075c9ea4613576d48e38177b8e9
3
- size 437967672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db084c914530afe8da5c4c9ca8188cf25f0f6947c28be54680fb375c036b64b4
3
+ size 90864192
model_head.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3631581829b8a144cf4ed104775763cd8f15d7c6018e93f041e2edb943bdd511
3
- size 39828
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4dcf0c31745b0af1e0164490cc79d5e24b8d09e028a744b5b5f2258404271f
3
+ size 21396
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 512,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 128,
3
  "do_lower_case": false
4
  }
special_tokens_map.json CHANGED
@@ -1,41 +1,27 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
  "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
- "content": "<pad>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
- "content": "</s>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
 
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
+ "content": "[CLS]",
 
 
 
 
 
 
 
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
+ "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,59 +1,57 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "<s>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
- "1": {
12
- "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "2": {
20
- "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "104": {
28
- "content": "[UNK]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "30526": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
44
- "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": true,
46
- "cls_token": "<s>",
47
  "do_basic_tokenize": true,
48
  "do_lower_case": true,
49
- "eos_token": "</s>",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
  "never_split": null,
53
- "pad_token": "<pad>",
54
- "sep_token": "</s>",
55
  "strip_accents": null,
56
  "tokenize_chinese_chars": true,
57
- "tokenizer_class": "MPNetTokenizer",
58
  "unk_token": "[UNK]"
59
  }
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "100": {
12
+ "content": "[UNK]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "101": {
20
+ "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "102": {
28
+ "content": "[SEP]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 128,
 
50
  "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
vocab.txt CHANGED
@@ -1,7 +1,3 @@
1
- <s>
2
- <pad>
3
- </s>
4
- <unk>
5
  [PAD]
6
  [unused0]
7
  [unused1]
@@ -30524,4 +30520,3 @@ necessitated
30524
  ##:
30525
  ##?
30526
  ##~
30527
- <mask>
 
 
 
 
 
1
  [PAD]
2
  [unused0]
3
  [unused1]
 
30520
  ##:
30521
  ##?
30522
  ##~